MemoryOS 0.2.2__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/METADATA +7 -1
- {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/RECORD +81 -66
- memos/__init__.py +1 -1
- memos/api/config.py +31 -8
- memos/api/context/context.py +1 -1
- memos/api/context/context_thread.py +96 -0
- memos/api/middleware/request_context.py +94 -0
- memos/api/product_api.py +5 -1
- memos/api/product_models.py +16 -0
- memos/api/routers/product_router.py +39 -3
- memos/api/start_api.py +3 -0
- memos/configs/internet_retriever.py +13 -0
- memos/configs/mem_scheduler.py +38 -16
- memos/configs/memory.py +13 -0
- memos/configs/reranker.py +18 -0
- memos/graph_dbs/base.py +33 -4
- memos/graph_dbs/nebular.py +631 -236
- memos/graph_dbs/neo4j.py +18 -7
- memos/graph_dbs/neo4j_community.py +6 -3
- memos/llms/vllm.py +2 -0
- memos/log.py +125 -8
- memos/mem_os/core.py +49 -11
- memos/mem_os/main.py +1 -1
- memos/mem_os/product.py +392 -215
- memos/mem_os/utils/default_config.py +1 -1
- memos/mem_os/utils/format_utils.py +11 -47
- memos/mem_os/utils/reference_utils.py +153 -0
- memos/mem_reader/simple_struct.py +112 -43
- memos/mem_scheduler/base_scheduler.py +58 -55
- memos/mem_scheduler/{modules → general_modules}/base.py +1 -2
- memos/mem_scheduler/{modules → general_modules}/dispatcher.py +54 -15
- memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +4 -4
- memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
- memos/mem_scheduler/{modules → general_modules}/retriever.py +19 -5
- memos/mem_scheduler/{modules → general_modules}/scheduler_logger.py +10 -4
- memos/mem_scheduler/general_scheduler.py +110 -67
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
- memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +57 -19
- memos/mem_scheduler/mos_for_test_scheduler.py +7 -1
- memos/mem_scheduler/schemas/general_schemas.py +3 -2
- memos/mem_scheduler/schemas/message_schemas.py +2 -1
- memos/mem_scheduler/schemas/monitor_schemas.py +10 -2
- memos/mem_scheduler/utils/misc_utils.py +43 -2
- memos/mem_user/mysql_user_manager.py +4 -2
- memos/memories/activation/item.py +1 -1
- memos/memories/activation/kv.py +20 -8
- memos/memories/textual/base.py +1 -1
- memos/memories/textual/general.py +1 -1
- memos/memories/textual/item.py +1 -1
- memos/memories/textual/tree.py +31 -1
- memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +30 -48
- memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +2 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +102 -140
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +231 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +9 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +67 -10
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +246 -134
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +7 -2
- memos/memories/textual/tree_text_memory/retrieve/utils.py +7 -5
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_utils.py +46 -0
- memos/memos_tools/thread_safe_dict.py +288 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +24 -0
- memos/reranker/cosine_local.py +95 -0
- memos/reranker/factory.py +43 -0
- memos/reranker/http_bge.py +99 -0
- memos/reranker/noop.py +16 -0
- memos/templates/mem_reader_prompts.py +290 -39
- memos/templates/mem_scheduler_prompts.py +23 -10
- memos/templates/mos_prompts.py +133 -31
- memos/templates/tree_reorganize_prompts.py +24 -17
- memos/utils.py +19 -0
- memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
- {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/LICENSE +0 -0
- {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/WHEEL +0 -0
- {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/entry_points.txt +0 -0
- /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
- /memos/mem_scheduler/{modules → general_modules}/misc.py +0 -0
memos/mem_os/product.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
import os
|
|
3
4
|
import random
|
|
5
|
+
import threading
|
|
4
6
|
import time
|
|
5
7
|
|
|
6
8
|
from collections.abc import Generator
|
|
@@ -22,7 +24,10 @@ from memos.mem_os.utils.format_utils import (
|
|
|
22
24
|
filter_nodes_by_tree_ids,
|
|
23
25
|
remove_embedding_recursive,
|
|
24
26
|
sort_children_by_memory_type,
|
|
25
|
-
|
|
27
|
+
)
|
|
28
|
+
from memos.mem_os.utils.reference_utils import (
|
|
29
|
+
prepare_reference_data,
|
|
30
|
+
process_streaming_references_complete,
|
|
26
31
|
)
|
|
27
32
|
from memos.mem_scheduler.schemas.general_schemas import (
|
|
28
33
|
ANSWER_LABEL,
|
|
@@ -34,7 +39,12 @@ from memos.mem_user.user_manager import UserRole
|
|
|
34
39
|
from memos.memories.textual.item import (
|
|
35
40
|
TextualMemoryItem,
|
|
36
41
|
)
|
|
37
|
-
from memos.templates.mos_prompts import
|
|
42
|
+
from memos.templates.mos_prompts import (
|
|
43
|
+
FURTHER_SUGGESTION_PROMPT,
|
|
44
|
+
SUGGESTION_QUERY_PROMPT_EN,
|
|
45
|
+
SUGGESTION_QUERY_PROMPT_ZH,
|
|
46
|
+
get_memos_prompt,
|
|
47
|
+
)
|
|
38
48
|
from memos.types import MessageList
|
|
39
49
|
|
|
40
50
|
|
|
@@ -45,6 +55,39 @@ load_dotenv()
|
|
|
45
55
|
CUBE_PATH = os.getenv("MOS_CUBE_PATH", "/tmp/data/")
|
|
46
56
|
|
|
47
57
|
|
|
58
|
+
def _short_id(mem_id: str) -> str:
|
|
59
|
+
return (mem_id or "").split("-")[0] if mem_id else ""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _format_mem_block(memories_all, max_items: int = 20, max_chars_each: int = 320) -> str:
|
|
63
|
+
"""
|
|
64
|
+
Modify TextualMemoryItem Format:
|
|
65
|
+
1:abcd :: [P] text...
|
|
66
|
+
2:ef01 :: [O] text...
|
|
67
|
+
sequence is [i:memId] i; [P]=PersonalMemory / [O]=OuterMemory
|
|
68
|
+
"""
|
|
69
|
+
if not memories_all:
|
|
70
|
+
return "(none)", "(none)"
|
|
71
|
+
|
|
72
|
+
lines_o = []
|
|
73
|
+
lines_p = []
|
|
74
|
+
for idx, m in enumerate(memories_all[:max_items], 1):
|
|
75
|
+
mid = _short_id(getattr(m, "id", "") or "")
|
|
76
|
+
mtype = getattr(getattr(m, "metadata", {}), "memory_type", None) or getattr(
|
|
77
|
+
m, "metadata", {}
|
|
78
|
+
).get("memory_type", "")
|
|
79
|
+
tag = "O" if "Outer" in str(mtype) else "P"
|
|
80
|
+
txt = (getattr(m, "memory", "") or "").replace("\n", " ").strip()
|
|
81
|
+
if len(txt) > max_chars_each:
|
|
82
|
+
txt = txt[: max_chars_each - 1] + "…"
|
|
83
|
+
mid = mid or f"mem_{idx}"
|
|
84
|
+
if tag == "O":
|
|
85
|
+
lines_o.append(f"[{idx}:{mid}] :: [{tag}] {txt}\n")
|
|
86
|
+
elif tag == "P":
|
|
87
|
+
lines_p.append(f"[{idx}:{mid}] :: [{tag}] {txt}")
|
|
88
|
+
return "\n".join(lines_o), "\n".join(lines_p)
|
|
89
|
+
|
|
90
|
+
|
|
48
91
|
class MOSProduct(MOSCore):
|
|
49
92
|
"""
|
|
50
93
|
The MOSProduct class inherits from MOSCore and manages multiple users.
|
|
@@ -348,7 +391,11 @@ class MOSProduct(MOSCore):
|
|
|
348
391
|
return self._create_user_config(user_id, user_config)
|
|
349
392
|
|
|
350
393
|
def _build_system_prompt(
|
|
351
|
-
self,
|
|
394
|
+
self,
|
|
395
|
+
memories_all: list[TextualMemoryItem],
|
|
396
|
+
base_prompt: str | None = None,
|
|
397
|
+
tone: str = "friendly",
|
|
398
|
+
verbosity: str = "mid",
|
|
352
399
|
) -> str:
|
|
353
400
|
"""
|
|
354
401
|
Build custom system prompt for the user with memory references.
|
|
@@ -360,116 +407,46 @@ class MOSProduct(MOSCore):
|
|
|
360
407
|
Returns:
|
|
361
408
|
str: The custom system prompt.
|
|
362
409
|
"""
|
|
363
|
-
|
|
364
410
|
# Build base prompt
|
|
365
411
|
# Add memory context if available
|
|
366
|
-
|
|
367
|
-
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
412
|
+
now = datetime.now()
|
|
413
|
+
formatted_date = now.strftime("%Y-%m-%d (%A)")
|
|
414
|
+
sys_body = get_memos_prompt(
|
|
415
|
+
date=formatted_date, tone=tone, verbosity=verbosity, mode="base"
|
|
416
|
+
)
|
|
417
|
+
mem_block_o, mem_block_p = _format_mem_block(memories_all)
|
|
418
|
+
mem_block = mem_block_o + "\n" + mem_block_p
|
|
419
|
+
prefix = (base_prompt.strip() + "\n\n") if base_prompt else ""
|
|
420
|
+
return (
|
|
421
|
+
prefix
|
|
422
|
+
+ sys_body
|
|
423
|
+
+ "\n\n# Memories\n## PersonalMemory & OuterMemory (ordered)\n"
|
|
424
|
+
+ mem_block
|
|
425
|
+
)
|
|
377
426
|
|
|
378
427
|
def _build_enhance_system_prompt(
|
|
379
|
-
self,
|
|
428
|
+
self,
|
|
429
|
+
user_id: str,
|
|
430
|
+
memories_all: list[TextualMemoryItem],
|
|
431
|
+
tone: str = "friendly",
|
|
432
|
+
verbosity: str = "mid",
|
|
380
433
|
) -> str:
|
|
381
434
|
"""
|
|
382
435
|
Build enhance prompt for the user with memory references.
|
|
383
436
|
"""
|
|
384
|
-
|
|
385
|
-
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
else:
|
|
398
|
-
memory_id = (
|
|
399
|
-
f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
|
|
400
|
-
)
|
|
401
|
-
memory_content = (
|
|
402
|
-
memory.memory[:500] if hasattr(memory, "memory") else str(memory)
|
|
403
|
-
)
|
|
404
|
-
memory_content = memory_content.replace("\n", " ")
|
|
405
|
-
outer_memory_context += f"{memory_id}: {memory_content}\n"
|
|
406
|
-
return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
|
|
407
|
-
return MEMOS_PRODUCT_ENHANCE_PROMPT
|
|
408
|
-
|
|
409
|
-
def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
|
|
410
|
-
"""
|
|
411
|
-
Complete streaming reference processing to ensure reference tags are never split.
|
|
412
|
-
|
|
413
|
-
Args:
|
|
414
|
-
text_buffer (str): The accumulated text buffer.
|
|
415
|
-
|
|
416
|
-
Returns:
|
|
417
|
-
tuple[str, str]: (processed_text, remaining_buffer)
|
|
418
|
-
"""
|
|
419
|
-
import re
|
|
420
|
-
|
|
421
|
-
# Pattern to match complete reference tags: [refid:memoriesID]
|
|
422
|
-
complete_pattern = r"\[\d+:[^\]]+\]"
|
|
423
|
-
|
|
424
|
-
# Find all complete reference tags
|
|
425
|
-
complete_matches = list(re.finditer(complete_pattern, text_buffer))
|
|
426
|
-
|
|
427
|
-
if complete_matches:
|
|
428
|
-
# Find the last complete tag
|
|
429
|
-
last_match = complete_matches[-1]
|
|
430
|
-
end_pos = last_match.end()
|
|
431
|
-
|
|
432
|
-
# Get text up to the end of the last complete tag
|
|
433
|
-
processed_text = text_buffer[:end_pos]
|
|
434
|
-
remaining_buffer = text_buffer[end_pos:]
|
|
435
|
-
|
|
436
|
-
# Apply reference splitting to the processed text
|
|
437
|
-
processed_text = split_continuous_references(processed_text)
|
|
438
|
-
|
|
439
|
-
return processed_text, remaining_buffer
|
|
440
|
-
|
|
441
|
-
# Check for incomplete reference tags
|
|
442
|
-
# Look for opening bracket with number and colon
|
|
443
|
-
opening_pattern = r"\[\d+:"
|
|
444
|
-
opening_matches = list(re.finditer(opening_pattern, text_buffer))
|
|
445
|
-
|
|
446
|
-
if opening_matches:
|
|
447
|
-
# Find the last opening tag
|
|
448
|
-
last_opening = opening_matches[-1]
|
|
449
|
-
opening_start = last_opening.start()
|
|
450
|
-
|
|
451
|
-
# Check if we have a complete opening pattern
|
|
452
|
-
if last_opening.end() <= len(text_buffer):
|
|
453
|
-
# We have a complete opening pattern, keep everything in buffer
|
|
454
|
-
return "", text_buffer
|
|
455
|
-
else:
|
|
456
|
-
# Incomplete opening pattern, return text before it
|
|
457
|
-
processed_text = text_buffer[:opening_start]
|
|
458
|
-
# Apply reference splitting to the processed text
|
|
459
|
-
processed_text = split_continuous_references(processed_text)
|
|
460
|
-
return processed_text, text_buffer[opening_start:]
|
|
461
|
-
|
|
462
|
-
# Check for partial opening pattern (starts with [ but not complete)
|
|
463
|
-
if "[" in text_buffer:
|
|
464
|
-
ref_start = text_buffer.find("[")
|
|
465
|
-
processed_text = text_buffer[:ref_start]
|
|
466
|
-
# Apply reference splitting to the processed text
|
|
467
|
-
processed_text = split_continuous_references(processed_text)
|
|
468
|
-
return processed_text, text_buffer[ref_start:]
|
|
469
|
-
|
|
470
|
-
# No reference tags found, apply reference splitting and return all text
|
|
471
|
-
processed_text = split_continuous_references(text_buffer)
|
|
472
|
-
return processed_text, ""
|
|
437
|
+
now = datetime.now()
|
|
438
|
+
formatted_date = now.strftime("%Y-%m-%d (%A)")
|
|
439
|
+
sys_body = get_memos_prompt(
|
|
440
|
+
date=formatted_date, tone=tone, verbosity=verbosity, mode="enhance"
|
|
441
|
+
)
|
|
442
|
+
mem_block_o, mem_block_p = _format_mem_block(memories_all)
|
|
443
|
+
return (
|
|
444
|
+
sys_body
|
|
445
|
+
+ "\n\n# Memories\n## PersonalMemory (ordered)\n"
|
|
446
|
+
+ mem_block_p
|
|
447
|
+
+ "\n## OuterMemory (ordered)\n"
|
|
448
|
+
+ mem_block_o
|
|
449
|
+
)
|
|
473
450
|
|
|
474
451
|
def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
|
|
475
452
|
"""
|
|
@@ -554,17 +531,212 @@ class MOSProduct(MOSCore):
|
|
|
554
531
|
mem_cube=self.mem_cubes[mem_cube_id],
|
|
555
532
|
label=label,
|
|
556
533
|
content=query,
|
|
557
|
-
timestamp=datetime.
|
|
534
|
+
timestamp=datetime.utcnow(),
|
|
558
535
|
)
|
|
559
536
|
self.mem_scheduler.submit_messages(messages=[message_item])
|
|
560
537
|
|
|
538
|
+
async def _post_chat_processing(
|
|
539
|
+
self,
|
|
540
|
+
user_id: str,
|
|
541
|
+
cube_id: str,
|
|
542
|
+
query: str,
|
|
543
|
+
full_response: str,
|
|
544
|
+
system_prompt: str,
|
|
545
|
+
time_start: float,
|
|
546
|
+
time_end: float,
|
|
547
|
+
speed_improvement: float,
|
|
548
|
+
current_messages: list,
|
|
549
|
+
) -> None:
|
|
550
|
+
"""
|
|
551
|
+
Asynchronous processing of logs, notifications and memory additions
|
|
552
|
+
"""
|
|
553
|
+
try:
|
|
554
|
+
logger.info(
|
|
555
|
+
f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}"
|
|
556
|
+
)
|
|
557
|
+
logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
|
|
558
|
+
|
|
559
|
+
clean_response, extracted_references = self._extract_references_from_response(
|
|
560
|
+
full_response
|
|
561
|
+
)
|
|
562
|
+
logger.info(f"Extracted {len(extracted_references)} references from response")
|
|
563
|
+
|
|
564
|
+
# Send chat report notifications asynchronously
|
|
565
|
+
if self.online_bot:
|
|
566
|
+
try:
|
|
567
|
+
from memos.memos_tools.notification_utils import (
|
|
568
|
+
send_online_bot_notification_async,
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
# Prepare notification data
|
|
572
|
+
chat_data = {
|
|
573
|
+
"query": query,
|
|
574
|
+
"user_id": user_id,
|
|
575
|
+
"cube_id": cube_id,
|
|
576
|
+
"system_prompt": system_prompt,
|
|
577
|
+
"full_response": full_response,
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
system_data = {
|
|
581
|
+
"references": extracted_references,
|
|
582
|
+
"time_start": time_start,
|
|
583
|
+
"time_end": time_end,
|
|
584
|
+
"speed_improvement": speed_improvement,
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
emoji_config = {"chat": "💬", "system_info": "📊"}
|
|
588
|
+
|
|
589
|
+
await send_online_bot_notification_async(
|
|
590
|
+
online_bot=self.online_bot,
|
|
591
|
+
header_name="MemOS Chat Report",
|
|
592
|
+
sub_title_name="chat_with_references",
|
|
593
|
+
title_color="#00956D",
|
|
594
|
+
other_data1=chat_data,
|
|
595
|
+
other_data2=system_data,
|
|
596
|
+
emoji=emoji_config,
|
|
597
|
+
)
|
|
598
|
+
except Exception as e:
|
|
599
|
+
logger.warning(f"Failed to send chat notification (async): {e}")
|
|
600
|
+
|
|
601
|
+
self._send_message_to_scheduler(
|
|
602
|
+
user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
self.add(
|
|
606
|
+
user_id=user_id,
|
|
607
|
+
messages=[
|
|
608
|
+
{
|
|
609
|
+
"role": "user",
|
|
610
|
+
"content": query,
|
|
611
|
+
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
612
|
+
},
|
|
613
|
+
{
|
|
614
|
+
"role": "assistant",
|
|
615
|
+
"content": clean_response, # Store clean text without reference markers
|
|
616
|
+
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
617
|
+
},
|
|
618
|
+
],
|
|
619
|
+
mem_cube_id=cube_id,
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
logger.info(f"Post-chat processing completed for user {user_id}")
|
|
623
|
+
|
|
624
|
+
except Exception as e:
|
|
625
|
+
logger.error(f"Error in post-chat processing for user {user_id}: {e}", exc_info=True)
|
|
626
|
+
|
|
627
|
+
def _start_post_chat_processing(
|
|
628
|
+
self,
|
|
629
|
+
user_id: str,
|
|
630
|
+
cube_id: str,
|
|
631
|
+
query: str,
|
|
632
|
+
full_response: str,
|
|
633
|
+
system_prompt: str,
|
|
634
|
+
time_start: float,
|
|
635
|
+
time_end: float,
|
|
636
|
+
speed_improvement: float,
|
|
637
|
+
current_messages: list,
|
|
638
|
+
) -> None:
|
|
639
|
+
"""
|
|
640
|
+
Asynchronous processing of logs, notifications and memory additions, handle synchronous and asynchronous environments
|
|
641
|
+
"""
|
|
642
|
+
|
|
643
|
+
def run_async_in_thread():
|
|
644
|
+
"""Running asynchronous tasks in a new thread"""
|
|
645
|
+
try:
|
|
646
|
+
loop = asyncio.new_event_loop()
|
|
647
|
+
asyncio.set_event_loop(loop)
|
|
648
|
+
try:
|
|
649
|
+
loop.run_until_complete(
|
|
650
|
+
self._post_chat_processing(
|
|
651
|
+
user_id=user_id,
|
|
652
|
+
cube_id=cube_id,
|
|
653
|
+
query=query,
|
|
654
|
+
full_response=full_response,
|
|
655
|
+
system_prompt=system_prompt,
|
|
656
|
+
time_start=time_start,
|
|
657
|
+
time_end=time_end,
|
|
658
|
+
speed_improvement=speed_improvement,
|
|
659
|
+
current_messages=current_messages,
|
|
660
|
+
)
|
|
661
|
+
)
|
|
662
|
+
finally:
|
|
663
|
+
loop.close()
|
|
664
|
+
except Exception as e:
|
|
665
|
+
logger.error(
|
|
666
|
+
f"Error in thread-based post-chat processing for user {user_id}: {e}",
|
|
667
|
+
exc_info=True,
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
try:
|
|
671
|
+
# Try to get the current event loop
|
|
672
|
+
asyncio.get_running_loop()
|
|
673
|
+
# Create task and store reference to prevent garbage collection
|
|
674
|
+
task = asyncio.create_task(
|
|
675
|
+
self._post_chat_processing(
|
|
676
|
+
user_id=user_id,
|
|
677
|
+
cube_id=cube_id,
|
|
678
|
+
query=query,
|
|
679
|
+
full_response=full_response,
|
|
680
|
+
system_prompt=system_prompt,
|
|
681
|
+
time_start=time_start,
|
|
682
|
+
time_end=time_end,
|
|
683
|
+
speed_improvement=speed_improvement,
|
|
684
|
+
current_messages=current_messages,
|
|
685
|
+
)
|
|
686
|
+
)
|
|
687
|
+
# Add exception handling for the background task
|
|
688
|
+
task.add_done_callback(
|
|
689
|
+
lambda t: logger.error(
|
|
690
|
+
f"Error in background post-chat processing for user {user_id}: {t.exception()}",
|
|
691
|
+
exc_info=True,
|
|
692
|
+
)
|
|
693
|
+
if t.exception()
|
|
694
|
+
else None
|
|
695
|
+
)
|
|
696
|
+
except RuntimeError:
|
|
697
|
+
# No event loop, run in a new thread
|
|
698
|
+
thread = threading.Thread(
|
|
699
|
+
target=run_async_in_thread,
|
|
700
|
+
name=f"PostChatProcessing-{user_id}",
|
|
701
|
+
# Set as a daemon thread to avoid blocking program exit
|
|
702
|
+
daemon=True,
|
|
703
|
+
)
|
|
704
|
+
thread.start()
|
|
705
|
+
|
|
561
706
|
def _filter_memories_by_threshold(
|
|
562
|
-
self,
|
|
707
|
+
self,
|
|
708
|
+
memories: list[TextualMemoryItem],
|
|
709
|
+
threshold: float = 0.30,
|
|
710
|
+
min_num: int = 3,
|
|
711
|
+
memory_type: Literal["OuterMemory"] = "OuterMemory",
|
|
563
712
|
) -> list[TextualMemoryItem]:
|
|
564
713
|
"""
|
|
565
|
-
Filter memories by threshold.
|
|
714
|
+
Filter memories by threshold and type, at least min_num memories for Non-OuterMemory.
|
|
715
|
+
Args:
|
|
716
|
+
memories: list[TextualMemoryItem],
|
|
717
|
+
threshold: float,
|
|
718
|
+
min_num: int,
|
|
719
|
+
memory_type: Literal["OuterMemory"],
|
|
720
|
+
Returns:
|
|
721
|
+
list[TextualMemoryItem]
|
|
566
722
|
"""
|
|
567
|
-
|
|
723
|
+
sorted_memories = sorted(memories, key=lambda m: m.metadata.relativity, reverse=True)
|
|
724
|
+
filtered_person = [m for m in memories if m.metadata.memory_type != memory_type]
|
|
725
|
+
filtered_outer = [m for m in memories if m.metadata.memory_type == memory_type]
|
|
726
|
+
filtered = []
|
|
727
|
+
per_memory_count = 0
|
|
728
|
+
for m in sorted_memories:
|
|
729
|
+
if m.metadata.relativity >= threshold:
|
|
730
|
+
if m.metadata.memory_type != memory_type:
|
|
731
|
+
per_memory_count += 1
|
|
732
|
+
filtered.append(m)
|
|
733
|
+
if len(filtered) < min_num:
|
|
734
|
+
filtered = filtered_person[:min_num] + filtered_outer[:min_num]
|
|
735
|
+
else:
|
|
736
|
+
if per_memory_count < min_num:
|
|
737
|
+
filtered += filtered_person[per_memory_count:min_num]
|
|
738
|
+
filtered_memory = sorted(filtered, key=lambda m: m.metadata.relativity, reverse=True)
|
|
739
|
+
return filtered_memory
|
|
568
740
|
|
|
569
741
|
def register_mem_cube(
|
|
570
742
|
self,
|
|
@@ -662,7 +834,7 @@ class MOSProduct(MOSCore):
|
|
|
662
834
|
|
|
663
835
|
# Create a default cube for the user using MOSCore's methods
|
|
664
836
|
default_cube_name = f"{user_name}_{user_id}_default_cube"
|
|
665
|
-
mem_cube_name_or_path =
|
|
837
|
+
mem_cube_name_or_path = os.path.join(CUBE_PATH, default_cube_name)
|
|
666
838
|
default_cube_id = self.create_cube_for_user(
|
|
667
839
|
cube_name=default_cube_name, owner_id=user_id, cube_path=mem_cube_name_or_path
|
|
668
840
|
)
|
|
@@ -696,7 +868,23 @@ class MOSProduct(MOSCore):
|
|
|
696
868
|
except Exception as e:
|
|
697
869
|
return {"status": "error", "message": f"Failed to register user: {e!s}"}
|
|
698
870
|
|
|
699
|
-
def
|
|
871
|
+
def _get_further_suggestion(self, message: MessageList | None = None) -> list[str]:
|
|
872
|
+
"""Get further suggestion prompt."""
|
|
873
|
+
try:
|
|
874
|
+
dialogue_info = "\n".join([f"{msg['role']}: {msg['content']}" for msg in message[-2:]])
|
|
875
|
+
further_suggestion_prompt = FURTHER_SUGGESTION_PROMPT.format(dialogue=dialogue_info)
|
|
876
|
+
message_list = [{"role": "system", "content": further_suggestion_prompt}]
|
|
877
|
+
response = self.chat_llm.generate(message_list)
|
|
878
|
+
clean_response = clean_json_response(response)
|
|
879
|
+
response_json = json.loads(clean_response)
|
|
880
|
+
return response_json["query"]
|
|
881
|
+
except Exception as e:
|
|
882
|
+
logger.error(f"Error getting further suggestion: {e}", exc_info=True)
|
|
883
|
+
return []
|
|
884
|
+
|
|
885
|
+
def get_suggestion_query(
|
|
886
|
+
self, user_id: str, language: str = "zh", message: MessageList | None = None
|
|
887
|
+
) -> list[str]:
|
|
700
888
|
"""Get suggestion query from LLM.
|
|
701
889
|
Args:
|
|
702
890
|
user_id (str): User ID.
|
|
@@ -705,37 +893,13 @@ class MOSProduct(MOSCore):
|
|
|
705
893
|
Returns:
|
|
706
894
|
list[str]: The suggestion query list.
|
|
707
895
|
"""
|
|
708
|
-
|
|
896
|
+
if message:
|
|
897
|
+
further_suggestion = self._get_further_suggestion(message)
|
|
898
|
+
return further_suggestion
|
|
709
899
|
if language == "zh":
|
|
710
|
-
suggestion_prompt =
|
|
711
|
-
你是一个有用的助手,可以帮助用户生成建议查询。
|
|
712
|
-
我将获取用户最近的一些记忆,
|
|
713
|
-
你应该生成一些建议查询,这些查询应该是用户想要查询的内容,
|
|
714
|
-
用户最近的记忆是:
|
|
715
|
-
{memories}
|
|
716
|
-
请生成3个建议查询用中文,
|
|
717
|
-
输出应该是json格式,键是"query",值是一个建议查询列表。
|
|
718
|
-
|
|
719
|
-
示例:
|
|
720
|
-
{{
|
|
721
|
-
"query": ["查询1", "查询2", "查询3"]
|
|
722
|
-
}}
|
|
723
|
-
"""
|
|
900
|
+
suggestion_prompt = SUGGESTION_QUERY_PROMPT_ZH
|
|
724
901
|
else: # English
|
|
725
|
-
suggestion_prompt =
|
|
726
|
-
You are a helpful assistant that can help users to generate suggestion query.
|
|
727
|
-
I will get some user recently memories,
|
|
728
|
-
you should generate some suggestion query, the query should be user what to query,
|
|
729
|
-
user recently memories is:
|
|
730
|
-
{memories}
|
|
731
|
-
if the user recently memories is empty, please generate 3 suggestion query in English,
|
|
732
|
-
output should be a json format, the key is "query", the value is a list of suggestion query.
|
|
733
|
-
|
|
734
|
-
example:
|
|
735
|
-
{{
|
|
736
|
-
"query": ["query1", "query2", "query3"]
|
|
737
|
-
}}
|
|
738
|
-
"""
|
|
902
|
+
suggestion_prompt = SUGGESTION_QUERY_PROMPT_EN
|
|
739
903
|
text_mem_result = super().search("my recently memories", user_id=user_id, top_k=3)[
|
|
740
904
|
"text_mem"
|
|
741
905
|
]
|
|
@@ -749,14 +913,75 @@ class MOSProduct(MOSCore):
|
|
|
749
913
|
response_json = json.loads(clean_response)
|
|
750
914
|
return response_json["query"]
|
|
751
915
|
|
|
752
|
-
def
|
|
916
|
+
def chat(
|
|
753
917
|
self,
|
|
754
918
|
query: str,
|
|
755
919
|
user_id: str,
|
|
756
920
|
cube_id: str | None = None,
|
|
757
921
|
history: MessageList | None = None,
|
|
922
|
+
base_prompt: str | None = None,
|
|
923
|
+
internet_search: bool = False,
|
|
924
|
+
moscube: bool = False,
|
|
758
925
|
top_k: int = 10,
|
|
926
|
+
threshold: float = 0.5,
|
|
927
|
+
) -> str:
|
|
928
|
+
"""
|
|
929
|
+
Chat with LLM with memory references and complete response.
|
|
930
|
+
"""
|
|
931
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
932
|
+
time_start = time.time()
|
|
933
|
+
memories_result = super().search(
|
|
934
|
+
query,
|
|
935
|
+
user_id,
|
|
936
|
+
install_cube_ids=[cube_id] if cube_id else None,
|
|
937
|
+
top_k=top_k,
|
|
938
|
+
mode="fine",
|
|
939
|
+
internet_search=internet_search,
|
|
940
|
+
moscube=moscube,
|
|
941
|
+
)["text_mem"]
|
|
942
|
+
|
|
943
|
+
memories_list = []
|
|
944
|
+
if memories_result:
|
|
945
|
+
memories_list = memories_result[0]["memories"]
|
|
946
|
+
memories_list = self._filter_memories_by_threshold(memories_list, threshold)
|
|
947
|
+
new_memories_list = []
|
|
948
|
+
for m in memories_list:
|
|
949
|
+
m.metadata.embedding = []
|
|
950
|
+
new_memories_list.append(m)
|
|
951
|
+
memories_list = new_memories_list
|
|
952
|
+
system_prompt = super()._build_system_prompt(memories_list, base_prompt)
|
|
953
|
+
history_info = []
|
|
954
|
+
if history:
|
|
955
|
+
history_info = history[-20:]
|
|
956
|
+
current_messages = [
|
|
957
|
+
{"role": "system", "content": system_prompt},
|
|
958
|
+
*history_info,
|
|
959
|
+
{"role": "user", "content": query},
|
|
960
|
+
]
|
|
961
|
+
response = self.chat_llm.generate(current_messages)
|
|
962
|
+
time_end = time.time()
|
|
963
|
+
self._start_post_chat_processing(
|
|
964
|
+
user_id=user_id,
|
|
965
|
+
cube_id=cube_id,
|
|
966
|
+
query=query,
|
|
967
|
+
full_response=response,
|
|
968
|
+
system_prompt=system_prompt,
|
|
969
|
+
time_start=time_start,
|
|
970
|
+
time_end=time_end,
|
|
971
|
+
speed_improvement=0.0,
|
|
972
|
+
current_messages=current_messages,
|
|
973
|
+
)
|
|
974
|
+
return response, memories_list
|
|
975
|
+
|
|
976
|
+
def chat_with_references(
|
|
977
|
+
self,
|
|
978
|
+
query: str,
|
|
979
|
+
user_id: str,
|
|
980
|
+
cube_id: str | None = None,
|
|
981
|
+
history: MessageList | None = None,
|
|
982
|
+
top_k: int = 20,
|
|
759
983
|
internet_search: bool = False,
|
|
984
|
+
moscube: bool = False,
|
|
760
985
|
) -> Generator[str, None, None]:
|
|
761
986
|
"""
|
|
762
987
|
Chat with LLM with memory references and streaming output.
|
|
@@ -782,7 +1007,9 @@ class MOSProduct(MOSCore):
|
|
|
782
1007
|
top_k=top_k,
|
|
783
1008
|
mode="fine",
|
|
784
1009
|
internet_search=internet_search,
|
|
1010
|
+
moscube=moscube,
|
|
785
1011
|
)["text_mem"]
|
|
1012
|
+
|
|
786
1013
|
yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
|
|
787
1014
|
search_time_end = time.time()
|
|
788
1015
|
logger.info(
|
|
@@ -794,6 +1021,9 @@ class MOSProduct(MOSCore):
|
|
|
794
1021
|
if memories_result:
|
|
795
1022
|
memories_list = memories_result[0]["memories"]
|
|
796
1023
|
memories_list = self._filter_memories_by_threshold(memories_list)
|
|
1024
|
+
|
|
1025
|
+
reference = prepare_reference_data(memories_list)
|
|
1026
|
+
yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
|
|
797
1027
|
# Build custom system prompt with relevant memories)
|
|
798
1028
|
system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
|
|
799
1029
|
# Get chat history
|
|
@@ -802,7 +1032,7 @@ class MOSProduct(MOSCore):
|
|
|
802
1032
|
|
|
803
1033
|
chat_history = self.chat_history_manager[user_id]
|
|
804
1034
|
if history:
|
|
805
|
-
chat_history.chat_history = history[-
|
|
1035
|
+
chat_history.chat_history = history[-20:]
|
|
806
1036
|
current_messages = [
|
|
807
1037
|
{"role": "system", "content": system_prompt},
|
|
808
1038
|
*chat_history.chat_history,
|
|
@@ -835,7 +1065,7 @@ class MOSProduct(MOSCore):
|
|
|
835
1065
|
elif self.config.chat_model.backend == "vllm":
|
|
836
1066
|
response_stream = self.chat_llm.generate_stream(current_messages)
|
|
837
1067
|
else:
|
|
838
|
-
if self.config.chat_model.backend in ["huggingface", "vllm"]:
|
|
1068
|
+
if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
|
|
839
1069
|
response_stream = self.chat_llm.generate_stream(current_messages)
|
|
840
1070
|
else:
|
|
841
1071
|
response_stream = self.chat_llm.generate(current_messages)
|
|
@@ -852,7 +1082,7 @@ class MOSProduct(MOSCore):
|
|
|
852
1082
|
full_response = ""
|
|
853
1083
|
token_count = 0
|
|
854
1084
|
# Use tiktoken for proper token-based chunking
|
|
855
|
-
if self.config.chat_model.backend not in ["huggingface", "vllm"]:
|
|
1085
|
+
if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
|
|
856
1086
|
# For non-huggingface backends, we need to collect the full response first
|
|
857
1087
|
full_response_text = ""
|
|
858
1088
|
for chunk in response_stream:
|
|
@@ -868,7 +1098,7 @@ class MOSProduct(MOSCore):
|
|
|
868
1098
|
full_response += chunk
|
|
869
1099
|
|
|
870
1100
|
# Process buffer to ensure complete reference tags
|
|
871
|
-
processed_chunk, remaining_buffer =
|
|
1101
|
+
processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
|
|
872
1102
|
|
|
873
1103
|
if processed_chunk:
|
|
874
1104
|
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
|
|
@@ -877,88 +1107,34 @@ class MOSProduct(MOSCore):
|
|
|
877
1107
|
|
|
878
1108
|
# Process any remaining buffer
|
|
879
1109
|
if buffer:
|
|
880
|
-
processed_chunk, remaining_buffer =
|
|
1110
|
+
processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
|
|
881
1111
|
if processed_chunk:
|
|
882
1112
|
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
|
|
883
1113
|
yield chunk_data
|
|
884
1114
|
|
|
885
|
-
# Prepare reference data
|
|
886
|
-
reference = []
|
|
887
|
-
for memories in memories_list:
|
|
888
|
-
memories_json = memories.model_dump()
|
|
889
|
-
memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
|
|
890
|
-
memories_json["metadata"]["embedding"] = []
|
|
891
|
-
memories_json["metadata"]["sources"] = []
|
|
892
|
-
memories_json["metadata"]["memory"] = memories.memory
|
|
893
|
-
memories_json["metadata"]["id"] = memories.id
|
|
894
|
-
reference.append({"metadata": memories_json["metadata"]})
|
|
895
|
-
|
|
896
|
-
yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
|
|
897
1115
|
# set kvcache improve speed
|
|
898
1116
|
speed_improvement = round(float((len(system_prompt) / 2) * 0.0048 + 44.5), 1)
|
|
899
1117
|
total_time = round(float(time_end - time_start), 1)
|
|
900
1118
|
|
|
901
1119
|
yield f"data: {json.dumps({'type': 'time', 'data': {'total_time': total_time, 'speed_improvement': f'{speed_improvement}%'}})}\n\n"
|
|
1120
|
+
# get further suggestion
|
|
1121
|
+
current_messages.append({"role": "assistant", "content": full_response})
|
|
1122
|
+
further_suggestion = self._get_further_suggestion(current_messages)
|
|
1123
|
+
logger.info(f"further_suggestion: {further_suggestion}")
|
|
1124
|
+
yield f"data: {json.dumps({'type': 'suggestion', 'data': further_suggestion})}\n\n"
|
|
902
1125
|
yield f"data: {json.dumps({'type': 'end'})}\n\n"
|
|
903
1126
|
|
|
904
|
-
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
clean_response, extracted_references = self._extract_references_from_response(full_response)
|
|
908
|
-
logger.info(f"Extracted {len(extracted_references)} references from response")
|
|
909
|
-
|
|
910
|
-
# Send chat report if online_bot is available
|
|
911
|
-
try:
|
|
912
|
-
from memos.memos_tools.notification_utils import send_online_bot_notification
|
|
913
|
-
|
|
914
|
-
# Prepare data for online_bot
|
|
915
|
-
chat_data = {
|
|
916
|
-
"query": query,
|
|
917
|
-
"user_id": user_id,
|
|
918
|
-
"cube_id": cube_id,
|
|
919
|
-
"system_prompt": system_prompt,
|
|
920
|
-
"full_response": full_response,
|
|
921
|
-
}
|
|
922
|
-
|
|
923
|
-
system_data = {
|
|
924
|
-
"references": extracted_references,
|
|
925
|
-
"time_start": time_start,
|
|
926
|
-
"time_end": time_end,
|
|
927
|
-
"speed_improvement": speed_improvement,
|
|
928
|
-
}
|
|
929
|
-
|
|
930
|
-
emoji_config = {"chat": "💬", "system_info": "📊"}
|
|
931
|
-
|
|
932
|
-
send_online_bot_notification(
|
|
933
|
-
online_bot=self.online_bot,
|
|
934
|
-
header_name="MemOS Chat Report",
|
|
935
|
-
sub_title_name="chat_with_references",
|
|
936
|
-
title_color="#00956D",
|
|
937
|
-
other_data1=chat_data,
|
|
938
|
-
other_data2=system_data,
|
|
939
|
-
emoji=emoji_config,
|
|
940
|
-
)
|
|
941
|
-
except Exception as e:
|
|
942
|
-
logger.warning(f"Failed to send chat notification: {e}")
|
|
943
|
-
|
|
944
|
-
self._send_message_to_scheduler(
|
|
945
|
-
user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
|
|
946
|
-
)
|
|
947
|
-
self.add(
|
|
1127
|
+
# Asynchronous processing of logs, notifications and memory additions
|
|
1128
|
+
self._start_post_chat_processing(
|
|
948
1129
|
user_id=user_id,
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
"content": clean_response, # Store clean text without reference markers
|
|
958
|
-
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
959
|
-
},
|
|
960
|
-
],
|
|
961
|
-
mem_cube_id=cube_id,
|
|
1130
|
+
cube_id=cube_id,
|
|
1131
|
+
query=query,
|
|
1132
|
+
full_response=full_response,
|
|
1133
|
+
system_prompt=system_prompt,
|
|
1134
|
+
time_start=time_start,
|
|
1135
|
+
time_end=time_end,
|
|
1136
|
+
speed_improvement=speed_improvement,
|
|
1137
|
+
current_messages=current_messages,
|
|
962
1138
|
)
|
|
963
1139
|
|
|
964
1140
|
def get_all(
|
|
@@ -1149,6 +1325,7 @@ class MOSProduct(MOSCore):
|
|
|
1149
1325
|
memories["metadata"]["memory"] = memories["memory"]
|
|
1150
1326
|
memories_list.append(memories)
|
|
1151
1327
|
reformat_memory_list.append({"cube_id": memory["cube_id"], "memories": memories_list})
|
|
1328
|
+
logger.info(f"search memory list is : {reformat_memory_list}")
|
|
1152
1329
|
search_result["text_mem"] = reformat_memory_list
|
|
1153
1330
|
time_end = time.time()
|
|
1154
1331
|
logger.info(
|