MemoryOS 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/METADATA +2 -1
- {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/RECORD +42 -33
- memos/__init__.py +1 -1
- memos/api/config.py +25 -0
- memos/api/context/context_thread.py +96 -0
- memos/api/context/dependencies.py +0 -11
- memos/api/middleware/request_context.py +94 -0
- memos/api/product_api.py +5 -1
- memos/api/product_models.py +16 -0
- memos/api/routers/product_router.py +39 -3
- memos/api/start_api.py +3 -0
- memos/configs/memory.py +13 -0
- memos/configs/reranker.py +18 -0
- memos/graph_dbs/base.py +4 -2
- memos/graph_dbs/nebular.py +215 -68
- memos/graph_dbs/neo4j.py +14 -12
- memos/graph_dbs/neo4j_community.py +6 -3
- memos/llms/vllm.py +2 -0
- memos/log.py +120 -8
- memos/mem_os/core.py +30 -2
- memos/mem_os/product.py +386 -146
- memos/mem_os/utils/reference_utils.py +20 -0
- memos/mem_reader/simple_struct.py +112 -43
- memos/mem_user/mysql_user_manager.py +4 -2
- memos/memories/textual/item.py +1 -1
- memos/memories/textual/tree.py +31 -1
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +3 -1
- memos/memories/textual/tree_text_memory/retrieve/recall.py +53 -3
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +74 -14
- memos/memories/textual/tree_text_memory/retrieve/utils.py +6 -4
- memos/memos_tools/notification_utils.py +46 -0
- memos/reranker/__init__.py +4 -0
- memos/reranker/base.py +24 -0
- memos/reranker/cosine_local.py +95 -0
- memos/reranker/factory.py +43 -0
- memos/reranker/http_bge.py +99 -0
- memos/reranker/noop.py +16 -0
- memos/templates/mem_reader_prompts.py +289 -40
- memos/templates/mos_prompts.py +133 -60
- {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/LICENSE +0 -0
- {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/WHEEL +0 -0
- {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/entry_points.txt +0 -0
memos/mem_os/product.py
CHANGED
|
@@ -1,6 +1,8 @@
|
|
|
1
|
+
import asyncio
|
|
1
2
|
import json
|
|
2
3
|
import os
|
|
3
4
|
import random
|
|
5
|
+
import threading
|
|
4
6
|
import time
|
|
5
7
|
|
|
6
8
|
from collections.abc import Generator
|
|
@@ -24,6 +26,7 @@ from memos.mem_os.utils.format_utils import (
|
|
|
24
26
|
sort_children_by_memory_type,
|
|
25
27
|
)
|
|
26
28
|
from memos.mem_os.utils.reference_utils import (
|
|
29
|
+
prepare_reference_data,
|
|
27
30
|
process_streaming_references_complete,
|
|
28
31
|
)
|
|
29
32
|
from memos.mem_scheduler.schemas.general_schemas import (
|
|
@@ -36,7 +39,12 @@ from memos.mem_user.user_manager import UserRole
|
|
|
36
39
|
from memos.memories.textual.item import (
|
|
37
40
|
TextualMemoryItem,
|
|
38
41
|
)
|
|
39
|
-
from memos.templates.mos_prompts import
|
|
42
|
+
from memos.templates.mos_prompts import (
|
|
43
|
+
FURTHER_SUGGESTION_PROMPT,
|
|
44
|
+
SUGGESTION_QUERY_PROMPT_EN,
|
|
45
|
+
SUGGESTION_QUERY_PROMPT_ZH,
|
|
46
|
+
get_memos_prompt,
|
|
47
|
+
)
|
|
40
48
|
from memos.types import MessageList
|
|
41
49
|
|
|
42
50
|
|
|
@@ -47,6 +55,39 @@ load_dotenv()
|
|
|
47
55
|
CUBE_PATH = os.getenv("MOS_CUBE_PATH", "/tmp/data/")
|
|
48
56
|
|
|
49
57
|
|
|
58
|
+
def _short_id(mem_id: str) -> str:
|
|
59
|
+
return (mem_id or "").split("-")[0] if mem_id else ""
|
|
60
|
+
|
|
61
|
+
|
|
62
|
+
def _format_mem_block(memories_all, max_items: int = 20, max_chars_each: int = 320) -> str:
|
|
63
|
+
"""
|
|
64
|
+
Modify TextualMemoryItem Format:
|
|
65
|
+
1:abcd :: [P] text...
|
|
66
|
+
2:ef01 :: [O] text...
|
|
67
|
+
sequence is [i:memId] i; [P]=PersonalMemory / [O]=OuterMemory
|
|
68
|
+
"""
|
|
69
|
+
if not memories_all:
|
|
70
|
+
return "(none)", "(none)"
|
|
71
|
+
|
|
72
|
+
lines_o = []
|
|
73
|
+
lines_p = []
|
|
74
|
+
for idx, m in enumerate(memories_all[:max_items], 1):
|
|
75
|
+
mid = _short_id(getattr(m, "id", "") or "")
|
|
76
|
+
mtype = getattr(getattr(m, "metadata", {}), "memory_type", None) or getattr(
|
|
77
|
+
m, "metadata", {}
|
|
78
|
+
).get("memory_type", "")
|
|
79
|
+
tag = "O" if "Outer" in str(mtype) else "P"
|
|
80
|
+
txt = (getattr(m, "memory", "") or "").replace("\n", " ").strip()
|
|
81
|
+
if len(txt) > max_chars_each:
|
|
82
|
+
txt = txt[: max_chars_each - 1] + "…"
|
|
83
|
+
mid = mid or f"mem_{idx}"
|
|
84
|
+
if tag == "O":
|
|
85
|
+
lines_o.append(f"[{idx}:{mid}] :: [{tag}] {txt}\n")
|
|
86
|
+
elif tag == "P":
|
|
87
|
+
lines_p.append(f"[{idx}:{mid}] :: [{tag}] {txt}")
|
|
88
|
+
return "\n".join(lines_o), "\n".join(lines_p)
|
|
89
|
+
|
|
90
|
+
|
|
50
91
|
class MOSProduct(MOSCore):
|
|
51
92
|
"""
|
|
52
93
|
The MOSProduct class inherits from MOSCore and manages multiple users.
|
|
@@ -350,7 +391,11 @@ class MOSProduct(MOSCore):
|
|
|
350
391
|
return self._create_user_config(user_id, user_config)
|
|
351
392
|
|
|
352
393
|
def _build_system_prompt(
|
|
353
|
-
self,
|
|
394
|
+
self,
|
|
395
|
+
memories_all: list[TextualMemoryItem],
|
|
396
|
+
base_prompt: str | None = None,
|
|
397
|
+
tone: str = "friendly",
|
|
398
|
+
verbosity: str = "mid",
|
|
354
399
|
) -> str:
|
|
355
400
|
"""
|
|
356
401
|
Build custom system prompt for the user with memory references.
|
|
@@ -362,51 +407,46 @@ class MOSProduct(MOSCore):
|
|
|
362
407
|
Returns:
|
|
363
408
|
str: The custom system prompt.
|
|
364
409
|
"""
|
|
365
|
-
|
|
366
410
|
# Build base prompt
|
|
367
411
|
# Add memory context if available
|
|
368
|
-
|
|
369
|
-
|
|
370
|
-
|
|
371
|
-
|
|
372
|
-
|
|
373
|
-
|
|
374
|
-
|
|
375
|
-
|
|
376
|
-
|
|
377
|
-
|
|
378
|
-
|
|
412
|
+
now = datetime.now()
|
|
413
|
+
formatted_date = now.strftime("%Y-%m-%d (%A)")
|
|
414
|
+
sys_body = get_memos_prompt(
|
|
415
|
+
date=formatted_date, tone=tone, verbosity=verbosity, mode="base"
|
|
416
|
+
)
|
|
417
|
+
mem_block_o, mem_block_p = _format_mem_block(memories_all)
|
|
418
|
+
mem_block = mem_block_o + "\n" + mem_block_p
|
|
419
|
+
prefix = (base_prompt.strip() + "\n\n") if base_prompt else ""
|
|
420
|
+
return (
|
|
421
|
+
prefix
|
|
422
|
+
+ sys_body
|
|
423
|
+
+ "\n\n# Memories\n## PersonalMemory & OuterMemory (ordered)\n"
|
|
424
|
+
+ mem_block
|
|
425
|
+
)
|
|
379
426
|
|
|
380
427
|
def _build_enhance_system_prompt(
|
|
381
|
-
self,
|
|
428
|
+
self,
|
|
429
|
+
user_id: str,
|
|
430
|
+
memories_all: list[TextualMemoryItem],
|
|
431
|
+
tone: str = "friendly",
|
|
432
|
+
verbosity: str = "mid",
|
|
382
433
|
) -> str:
|
|
383
434
|
"""
|
|
384
435
|
Build enhance prompt for the user with memory references.
|
|
385
436
|
"""
|
|
386
|
-
|
|
387
|
-
|
|
388
|
-
|
|
389
|
-
|
|
390
|
-
|
|
391
|
-
|
|
392
|
-
|
|
393
|
-
|
|
394
|
-
|
|
395
|
-
|
|
396
|
-
|
|
397
|
-
|
|
398
|
-
|
|
399
|
-
else:
|
|
400
|
-
memory_id = (
|
|
401
|
-
f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
|
|
402
|
-
)
|
|
403
|
-
memory_content = (
|
|
404
|
-
memory.memory[:500] if hasattr(memory, "memory") else str(memory)
|
|
405
|
-
)
|
|
406
|
-
memory_content = memory_content.replace("\n", " ")
|
|
407
|
-
outer_memory_context += f"{memory_id}: {memory_content}\n"
|
|
408
|
-
return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
|
|
409
|
-
return MEMOS_PRODUCT_ENHANCE_PROMPT
|
|
437
|
+
now = datetime.now()
|
|
438
|
+
formatted_date = now.strftime("%Y-%m-%d (%A)")
|
|
439
|
+
sys_body = get_memos_prompt(
|
|
440
|
+
date=formatted_date, tone=tone, verbosity=verbosity, mode="enhance"
|
|
441
|
+
)
|
|
442
|
+
mem_block_o, mem_block_p = _format_mem_block(memories_all)
|
|
443
|
+
return (
|
|
444
|
+
sys_body
|
|
445
|
+
+ "\n\n# Memories\n## PersonalMemory (ordered)\n"
|
|
446
|
+
+ mem_block_p
|
|
447
|
+
+ "\n## OuterMemory (ordered)\n"
|
|
448
|
+
+ mem_block_o
|
|
449
|
+
)
|
|
410
450
|
|
|
411
451
|
def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
|
|
412
452
|
"""
|
|
@@ -495,13 +535,208 @@ class MOSProduct(MOSCore):
|
|
|
495
535
|
)
|
|
496
536
|
self.mem_scheduler.submit_messages(messages=[message_item])
|
|
497
537
|
|
|
538
|
+
async def _post_chat_processing(
|
|
539
|
+
self,
|
|
540
|
+
user_id: str,
|
|
541
|
+
cube_id: str,
|
|
542
|
+
query: str,
|
|
543
|
+
full_response: str,
|
|
544
|
+
system_prompt: str,
|
|
545
|
+
time_start: float,
|
|
546
|
+
time_end: float,
|
|
547
|
+
speed_improvement: float,
|
|
548
|
+
current_messages: list,
|
|
549
|
+
) -> None:
|
|
550
|
+
"""
|
|
551
|
+
Asynchronous processing of logs, notifications and memory additions
|
|
552
|
+
"""
|
|
553
|
+
try:
|
|
554
|
+
logger.info(
|
|
555
|
+
f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}"
|
|
556
|
+
)
|
|
557
|
+
logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
|
|
558
|
+
|
|
559
|
+
clean_response, extracted_references = self._extract_references_from_response(
|
|
560
|
+
full_response
|
|
561
|
+
)
|
|
562
|
+
logger.info(f"Extracted {len(extracted_references)} references from response")
|
|
563
|
+
|
|
564
|
+
# Send chat report notifications asynchronously
|
|
565
|
+
if self.online_bot:
|
|
566
|
+
try:
|
|
567
|
+
from memos.memos_tools.notification_utils import (
|
|
568
|
+
send_online_bot_notification_async,
|
|
569
|
+
)
|
|
570
|
+
|
|
571
|
+
# Prepare notification data
|
|
572
|
+
chat_data = {
|
|
573
|
+
"query": query,
|
|
574
|
+
"user_id": user_id,
|
|
575
|
+
"cube_id": cube_id,
|
|
576
|
+
"system_prompt": system_prompt,
|
|
577
|
+
"full_response": full_response,
|
|
578
|
+
}
|
|
579
|
+
|
|
580
|
+
system_data = {
|
|
581
|
+
"references": extracted_references,
|
|
582
|
+
"time_start": time_start,
|
|
583
|
+
"time_end": time_end,
|
|
584
|
+
"speed_improvement": speed_improvement,
|
|
585
|
+
}
|
|
586
|
+
|
|
587
|
+
emoji_config = {"chat": "💬", "system_info": "📊"}
|
|
588
|
+
|
|
589
|
+
await send_online_bot_notification_async(
|
|
590
|
+
online_bot=self.online_bot,
|
|
591
|
+
header_name="MemOS Chat Report",
|
|
592
|
+
sub_title_name="chat_with_references",
|
|
593
|
+
title_color="#00956D",
|
|
594
|
+
other_data1=chat_data,
|
|
595
|
+
other_data2=system_data,
|
|
596
|
+
emoji=emoji_config,
|
|
597
|
+
)
|
|
598
|
+
except Exception as e:
|
|
599
|
+
logger.warning(f"Failed to send chat notification (async): {e}")
|
|
600
|
+
|
|
601
|
+
self._send_message_to_scheduler(
|
|
602
|
+
user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
|
|
603
|
+
)
|
|
604
|
+
|
|
605
|
+
self.add(
|
|
606
|
+
user_id=user_id,
|
|
607
|
+
messages=[
|
|
608
|
+
{
|
|
609
|
+
"role": "user",
|
|
610
|
+
"content": query,
|
|
611
|
+
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
612
|
+
},
|
|
613
|
+
{
|
|
614
|
+
"role": "assistant",
|
|
615
|
+
"content": clean_response, # Store clean text without reference markers
|
|
616
|
+
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
617
|
+
},
|
|
618
|
+
],
|
|
619
|
+
mem_cube_id=cube_id,
|
|
620
|
+
)
|
|
621
|
+
|
|
622
|
+
logger.info(f"Post-chat processing completed for user {user_id}")
|
|
623
|
+
|
|
624
|
+
except Exception as e:
|
|
625
|
+
logger.error(f"Error in post-chat processing for user {user_id}: {e}", exc_info=True)
|
|
626
|
+
|
|
627
|
+
def _start_post_chat_processing(
|
|
628
|
+
self,
|
|
629
|
+
user_id: str,
|
|
630
|
+
cube_id: str,
|
|
631
|
+
query: str,
|
|
632
|
+
full_response: str,
|
|
633
|
+
system_prompt: str,
|
|
634
|
+
time_start: float,
|
|
635
|
+
time_end: float,
|
|
636
|
+
speed_improvement: float,
|
|
637
|
+
current_messages: list,
|
|
638
|
+
) -> None:
|
|
639
|
+
"""
|
|
640
|
+
Asynchronous processing of logs, notifications and memory additions, handle synchronous and asynchronous environments
|
|
641
|
+
"""
|
|
642
|
+
|
|
643
|
+
def run_async_in_thread():
|
|
644
|
+
"""Running asynchronous tasks in a new thread"""
|
|
645
|
+
try:
|
|
646
|
+
loop = asyncio.new_event_loop()
|
|
647
|
+
asyncio.set_event_loop(loop)
|
|
648
|
+
try:
|
|
649
|
+
loop.run_until_complete(
|
|
650
|
+
self._post_chat_processing(
|
|
651
|
+
user_id=user_id,
|
|
652
|
+
cube_id=cube_id,
|
|
653
|
+
query=query,
|
|
654
|
+
full_response=full_response,
|
|
655
|
+
system_prompt=system_prompt,
|
|
656
|
+
time_start=time_start,
|
|
657
|
+
time_end=time_end,
|
|
658
|
+
speed_improvement=speed_improvement,
|
|
659
|
+
current_messages=current_messages,
|
|
660
|
+
)
|
|
661
|
+
)
|
|
662
|
+
finally:
|
|
663
|
+
loop.close()
|
|
664
|
+
except Exception as e:
|
|
665
|
+
logger.error(
|
|
666
|
+
f"Error in thread-based post-chat processing for user {user_id}: {e}",
|
|
667
|
+
exc_info=True,
|
|
668
|
+
)
|
|
669
|
+
|
|
670
|
+
try:
|
|
671
|
+
# Try to get the current event loop
|
|
672
|
+
asyncio.get_running_loop()
|
|
673
|
+
# Create task and store reference to prevent garbage collection
|
|
674
|
+
task = asyncio.create_task(
|
|
675
|
+
self._post_chat_processing(
|
|
676
|
+
user_id=user_id,
|
|
677
|
+
cube_id=cube_id,
|
|
678
|
+
query=query,
|
|
679
|
+
full_response=full_response,
|
|
680
|
+
system_prompt=system_prompt,
|
|
681
|
+
time_start=time_start,
|
|
682
|
+
time_end=time_end,
|
|
683
|
+
speed_improvement=speed_improvement,
|
|
684
|
+
current_messages=current_messages,
|
|
685
|
+
)
|
|
686
|
+
)
|
|
687
|
+
# Add exception handling for the background task
|
|
688
|
+
task.add_done_callback(
|
|
689
|
+
lambda t: logger.error(
|
|
690
|
+
f"Error in background post-chat processing for user {user_id}: {t.exception()}",
|
|
691
|
+
exc_info=True,
|
|
692
|
+
)
|
|
693
|
+
if t.exception()
|
|
694
|
+
else None
|
|
695
|
+
)
|
|
696
|
+
except RuntimeError:
|
|
697
|
+
# No event loop, run in a new thread
|
|
698
|
+
thread = threading.Thread(
|
|
699
|
+
target=run_async_in_thread,
|
|
700
|
+
name=f"PostChatProcessing-{user_id}",
|
|
701
|
+
# Set as a daemon thread to avoid blocking program exit
|
|
702
|
+
daemon=True,
|
|
703
|
+
)
|
|
704
|
+
thread.start()
|
|
705
|
+
|
|
498
706
|
def _filter_memories_by_threshold(
|
|
499
|
-
self,
|
|
707
|
+
self,
|
|
708
|
+
memories: list[TextualMemoryItem],
|
|
709
|
+
threshold: float = 0.30,
|
|
710
|
+
min_num: int = 3,
|
|
711
|
+
memory_type: Literal["OuterMemory"] = "OuterMemory",
|
|
500
712
|
) -> list[TextualMemoryItem]:
|
|
501
713
|
"""
|
|
502
|
-
Filter memories by threshold.
|
|
714
|
+
Filter memories by threshold and type, at least min_num memories for Non-OuterMemory.
|
|
715
|
+
Args:
|
|
716
|
+
memories: list[TextualMemoryItem],
|
|
717
|
+
threshold: float,
|
|
718
|
+
min_num: int,
|
|
719
|
+
memory_type: Literal["OuterMemory"],
|
|
720
|
+
Returns:
|
|
721
|
+
list[TextualMemoryItem]
|
|
503
722
|
"""
|
|
504
|
-
|
|
723
|
+
sorted_memories = sorted(memories, key=lambda m: m.metadata.relativity, reverse=True)
|
|
724
|
+
filtered_person = [m for m in memories if m.metadata.memory_type != memory_type]
|
|
725
|
+
filtered_outer = [m for m in memories if m.metadata.memory_type == memory_type]
|
|
726
|
+
filtered = []
|
|
727
|
+
per_memory_count = 0
|
|
728
|
+
for m in sorted_memories:
|
|
729
|
+
if m.metadata.relativity >= threshold:
|
|
730
|
+
if m.metadata.memory_type != memory_type:
|
|
731
|
+
per_memory_count += 1
|
|
732
|
+
filtered.append(m)
|
|
733
|
+
if len(filtered) < min_num:
|
|
734
|
+
filtered = filtered_person[:min_num] + filtered_outer[:min_num]
|
|
735
|
+
else:
|
|
736
|
+
if per_memory_count < min_num:
|
|
737
|
+
filtered += filtered_person[per_memory_count:min_num]
|
|
738
|
+
filtered_memory = sorted(filtered, key=lambda m: m.metadata.relativity, reverse=True)
|
|
739
|
+
return filtered_memory
|
|
505
740
|
|
|
506
741
|
def register_mem_cube(
|
|
507
742
|
self,
|
|
@@ -599,7 +834,7 @@ class MOSProduct(MOSCore):
|
|
|
599
834
|
|
|
600
835
|
# Create a default cube for the user using MOSCore's methods
|
|
601
836
|
default_cube_name = f"{user_name}_{user_id}_default_cube"
|
|
602
|
-
mem_cube_name_or_path =
|
|
837
|
+
mem_cube_name_or_path = os.path.join(CUBE_PATH, default_cube_name)
|
|
603
838
|
default_cube_id = self.create_cube_for_user(
|
|
604
839
|
cube_name=default_cube_name, owner_id=user_id, cube_path=mem_cube_name_or_path
|
|
605
840
|
)
|
|
@@ -633,7 +868,23 @@ class MOSProduct(MOSCore):
|
|
|
633
868
|
except Exception as e:
|
|
634
869
|
return {"status": "error", "message": f"Failed to register user: {e!s}"}
|
|
635
870
|
|
|
636
|
-
def
|
|
871
|
+
def _get_further_suggestion(self, message: MessageList | None = None) -> list[str]:
|
|
872
|
+
"""Get further suggestion prompt."""
|
|
873
|
+
try:
|
|
874
|
+
dialogue_info = "\n".join([f"{msg['role']}: {msg['content']}" for msg in message[-2:]])
|
|
875
|
+
further_suggestion_prompt = FURTHER_SUGGESTION_PROMPT.format(dialogue=dialogue_info)
|
|
876
|
+
message_list = [{"role": "system", "content": further_suggestion_prompt}]
|
|
877
|
+
response = self.chat_llm.generate(message_list)
|
|
878
|
+
clean_response = clean_json_response(response)
|
|
879
|
+
response_json = json.loads(clean_response)
|
|
880
|
+
return response_json["query"]
|
|
881
|
+
except Exception as e:
|
|
882
|
+
logger.error(f"Error getting further suggestion: {e}", exc_info=True)
|
|
883
|
+
return []
|
|
884
|
+
|
|
885
|
+
def get_suggestion_query(
|
|
886
|
+
self, user_id: str, language: str = "zh", message: MessageList | None = None
|
|
887
|
+
) -> list[str]:
|
|
637
888
|
"""Get suggestion query from LLM.
|
|
638
889
|
Args:
|
|
639
890
|
user_id (str): User ID.
|
|
@@ -642,37 +893,13 @@ class MOSProduct(MOSCore):
|
|
|
642
893
|
Returns:
|
|
643
894
|
list[str]: The suggestion query list.
|
|
644
895
|
"""
|
|
645
|
-
|
|
896
|
+
if message:
|
|
897
|
+
further_suggestion = self._get_further_suggestion(message)
|
|
898
|
+
return further_suggestion
|
|
646
899
|
if language == "zh":
|
|
647
|
-
suggestion_prompt =
|
|
648
|
-
你是一个有用的助手,可以帮助用户生成建议查询。
|
|
649
|
-
我将获取用户最近的一些记忆,
|
|
650
|
-
你应该生成一些建议查询,这些查询应该是用户想要查询的内容,
|
|
651
|
-
用户最近的记忆是:
|
|
652
|
-
{memories}
|
|
653
|
-
请生成3个建议查询用中文,
|
|
654
|
-
输出应该是json格式,键是"query",值是一个建议查询列表。
|
|
655
|
-
|
|
656
|
-
示例:
|
|
657
|
-
{{
|
|
658
|
-
"query": ["查询1", "查询2", "查询3"]
|
|
659
|
-
}}
|
|
660
|
-
"""
|
|
900
|
+
suggestion_prompt = SUGGESTION_QUERY_PROMPT_ZH
|
|
661
901
|
else: # English
|
|
662
|
-
suggestion_prompt =
|
|
663
|
-
You are a helpful assistant that can help users to generate suggestion query.
|
|
664
|
-
I will get some user recently memories,
|
|
665
|
-
you should generate some suggestion query, the query should be user what to query,
|
|
666
|
-
user recently memories is:
|
|
667
|
-
{memories}
|
|
668
|
-
if the user recently memories is empty, please generate 3 suggestion query in English,
|
|
669
|
-
output should be a json format, the key is "query", the value is a list of suggestion query.
|
|
670
|
-
|
|
671
|
-
example:
|
|
672
|
-
{{
|
|
673
|
-
"query": ["query1", "query2", "query3"]
|
|
674
|
-
}}
|
|
675
|
-
"""
|
|
902
|
+
suggestion_prompt = SUGGESTION_QUERY_PROMPT_EN
|
|
676
903
|
text_mem_result = super().search("my recently memories", user_id=user_id, top_k=3)[
|
|
677
904
|
"text_mem"
|
|
678
905
|
]
|
|
@@ -686,14 +913,75 @@ class MOSProduct(MOSCore):
|
|
|
686
913
|
response_json = json.loads(clean_response)
|
|
687
914
|
return response_json["query"]
|
|
688
915
|
|
|
689
|
-
def
|
|
916
|
+
def chat(
|
|
690
917
|
self,
|
|
691
918
|
query: str,
|
|
692
919
|
user_id: str,
|
|
693
920
|
cube_id: str | None = None,
|
|
694
921
|
history: MessageList | None = None,
|
|
922
|
+
base_prompt: str | None = None,
|
|
923
|
+
internet_search: bool = False,
|
|
924
|
+
moscube: bool = False,
|
|
695
925
|
top_k: int = 10,
|
|
926
|
+
threshold: float = 0.5,
|
|
927
|
+
) -> str:
|
|
928
|
+
"""
|
|
929
|
+
Chat with LLM with memory references and complete response.
|
|
930
|
+
"""
|
|
931
|
+
self._load_user_cubes(user_id, self.default_cube_config)
|
|
932
|
+
time_start = time.time()
|
|
933
|
+
memories_result = super().search(
|
|
934
|
+
query,
|
|
935
|
+
user_id,
|
|
936
|
+
install_cube_ids=[cube_id] if cube_id else None,
|
|
937
|
+
top_k=top_k,
|
|
938
|
+
mode="fine",
|
|
939
|
+
internet_search=internet_search,
|
|
940
|
+
moscube=moscube,
|
|
941
|
+
)["text_mem"]
|
|
942
|
+
|
|
943
|
+
memories_list = []
|
|
944
|
+
if memories_result:
|
|
945
|
+
memories_list = memories_result[0]["memories"]
|
|
946
|
+
memories_list = self._filter_memories_by_threshold(memories_list, threshold)
|
|
947
|
+
new_memories_list = []
|
|
948
|
+
for m in memories_list:
|
|
949
|
+
m.metadata.embedding = []
|
|
950
|
+
new_memories_list.append(m)
|
|
951
|
+
memories_list = new_memories_list
|
|
952
|
+
system_prompt = super()._build_system_prompt(memories_list, base_prompt)
|
|
953
|
+
history_info = []
|
|
954
|
+
if history:
|
|
955
|
+
history_info = history[-20:]
|
|
956
|
+
current_messages = [
|
|
957
|
+
{"role": "system", "content": system_prompt},
|
|
958
|
+
*history_info,
|
|
959
|
+
{"role": "user", "content": query},
|
|
960
|
+
]
|
|
961
|
+
response = self.chat_llm.generate(current_messages)
|
|
962
|
+
time_end = time.time()
|
|
963
|
+
self._start_post_chat_processing(
|
|
964
|
+
user_id=user_id,
|
|
965
|
+
cube_id=cube_id,
|
|
966
|
+
query=query,
|
|
967
|
+
full_response=response,
|
|
968
|
+
system_prompt=system_prompt,
|
|
969
|
+
time_start=time_start,
|
|
970
|
+
time_end=time_end,
|
|
971
|
+
speed_improvement=0.0,
|
|
972
|
+
current_messages=current_messages,
|
|
973
|
+
)
|
|
974
|
+
return response, memories_list
|
|
975
|
+
|
|
976
|
+
def chat_with_references(
|
|
977
|
+
self,
|
|
978
|
+
query: str,
|
|
979
|
+
user_id: str,
|
|
980
|
+
cube_id: str | None = None,
|
|
981
|
+
history: MessageList | None = None,
|
|
982
|
+
top_k: int = 20,
|
|
696
983
|
internet_search: bool = False,
|
|
984
|
+
moscube: bool = False,
|
|
697
985
|
) -> Generator[str, None, None]:
|
|
698
986
|
"""
|
|
699
987
|
Chat with LLM with memory references and streaming output.
|
|
@@ -719,7 +1007,9 @@ class MOSProduct(MOSCore):
|
|
|
719
1007
|
top_k=top_k,
|
|
720
1008
|
mode="fine",
|
|
721
1009
|
internet_search=internet_search,
|
|
1010
|
+
moscube=moscube,
|
|
722
1011
|
)["text_mem"]
|
|
1012
|
+
|
|
723
1013
|
yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
|
|
724
1014
|
search_time_end = time.time()
|
|
725
1015
|
logger.info(
|
|
@@ -731,6 +1021,9 @@ class MOSProduct(MOSCore):
|
|
|
731
1021
|
if memories_result:
|
|
732
1022
|
memories_list = memories_result[0]["memories"]
|
|
733
1023
|
memories_list = self._filter_memories_by_threshold(memories_list)
|
|
1024
|
+
|
|
1025
|
+
reference = prepare_reference_data(memories_list)
|
|
1026
|
+
yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
|
|
734
1027
|
# Build custom system prompt with relevant memories)
|
|
735
1028
|
system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
|
|
736
1029
|
# Get chat history
|
|
@@ -739,7 +1032,7 @@ class MOSProduct(MOSCore):
|
|
|
739
1032
|
|
|
740
1033
|
chat_history = self.chat_history_manager[user_id]
|
|
741
1034
|
if history:
|
|
742
|
-
chat_history.chat_history = history[-
|
|
1035
|
+
chat_history.chat_history = history[-20:]
|
|
743
1036
|
current_messages = [
|
|
744
1037
|
{"role": "system", "content": system_prompt},
|
|
745
1038
|
*chat_history.chat_history,
|
|
@@ -772,7 +1065,7 @@ class MOSProduct(MOSCore):
|
|
|
772
1065
|
elif self.config.chat_model.backend == "vllm":
|
|
773
1066
|
response_stream = self.chat_llm.generate_stream(current_messages)
|
|
774
1067
|
else:
|
|
775
|
-
if self.config.chat_model.backend in ["huggingface", "vllm"]:
|
|
1068
|
+
if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
|
|
776
1069
|
response_stream = self.chat_llm.generate_stream(current_messages)
|
|
777
1070
|
else:
|
|
778
1071
|
response_stream = self.chat_llm.generate(current_messages)
|
|
@@ -789,7 +1082,7 @@ class MOSProduct(MOSCore):
|
|
|
789
1082
|
full_response = ""
|
|
790
1083
|
token_count = 0
|
|
791
1084
|
# Use tiktoken for proper token-based chunking
|
|
792
|
-
if self.config.chat_model.backend not in ["huggingface", "vllm"]:
|
|
1085
|
+
if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
|
|
793
1086
|
# For non-huggingface backends, we need to collect the full response first
|
|
794
1087
|
full_response_text = ""
|
|
795
1088
|
for chunk in response_stream:
|
|
@@ -819,83 +1112,29 @@ class MOSProduct(MOSCore):
|
|
|
819
1112
|
chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
|
|
820
1113
|
yield chunk_data
|
|
821
1114
|
|
|
822
|
-
# Prepare reference data
|
|
823
|
-
reference = []
|
|
824
|
-
for memories in memories_list:
|
|
825
|
-
memories_json = memories.model_dump()
|
|
826
|
-
memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
|
|
827
|
-
memories_json["metadata"]["embedding"] = []
|
|
828
|
-
memories_json["metadata"]["sources"] = []
|
|
829
|
-
memories_json["metadata"]["memory"] = memories.memory
|
|
830
|
-
memories_json["metadata"]["id"] = memories.id
|
|
831
|
-
reference.append({"metadata": memories_json["metadata"]})
|
|
832
|
-
|
|
833
|
-
yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
|
|
834
1115
|
# set kvcache improve speed
|
|
835
1116
|
speed_improvement = round(float((len(system_prompt) / 2) * 0.0048 + 44.5), 1)
|
|
836
1117
|
total_time = round(float(time_end - time_start), 1)
|
|
837
1118
|
|
|
838
1119
|
yield f"data: {json.dumps({'type': 'time', 'data': {'total_time': total_time, 'speed_improvement': f'{speed_improvement}%'}})}\n\n"
|
|
1120
|
+
# get further suggestion
|
|
1121
|
+
current_messages.append({"role": "assistant", "content": full_response})
|
|
1122
|
+
further_suggestion = self._get_further_suggestion(current_messages)
|
|
1123
|
+
logger.info(f"further_suggestion: {further_suggestion}")
|
|
1124
|
+
yield f"data: {json.dumps({'type': 'suggestion', 'data': further_suggestion})}\n\n"
|
|
839
1125
|
yield f"data: {json.dumps({'type': 'end'})}\n\n"
|
|
840
1126
|
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
clean_response, extracted_references = self._extract_references_from_response(full_response)
|
|
845
|
-
logger.info(f"Extracted {len(extracted_references)} references from response")
|
|
846
|
-
|
|
847
|
-
# Send chat report if online_bot is available
|
|
848
|
-
try:
|
|
849
|
-
from memos.memos_tools.notification_utils import send_online_bot_notification
|
|
850
|
-
|
|
851
|
-
# Prepare data for online_bot
|
|
852
|
-
chat_data = {
|
|
853
|
-
"query": query,
|
|
854
|
-
"user_id": user_id,
|
|
855
|
-
"cube_id": cube_id,
|
|
856
|
-
"system_prompt": system_prompt,
|
|
857
|
-
"full_response": full_response,
|
|
858
|
-
}
|
|
859
|
-
|
|
860
|
-
system_data = {
|
|
861
|
-
"references": extracted_references,
|
|
862
|
-
"time_start": time_start,
|
|
863
|
-
"time_end": time_end,
|
|
864
|
-
"speed_improvement": speed_improvement,
|
|
865
|
-
}
|
|
866
|
-
|
|
867
|
-
emoji_config = {"chat": "💬", "system_info": "📊"}
|
|
868
|
-
|
|
869
|
-
send_online_bot_notification(
|
|
870
|
-
online_bot=self.online_bot,
|
|
871
|
-
header_name="MemOS Chat Report",
|
|
872
|
-
sub_title_name="chat_with_references",
|
|
873
|
-
title_color="#00956D",
|
|
874
|
-
other_data1=chat_data,
|
|
875
|
-
other_data2=system_data,
|
|
876
|
-
emoji=emoji_config,
|
|
877
|
-
)
|
|
878
|
-
except Exception as e:
|
|
879
|
-
logger.warning(f"Failed to send chat notification: {e}")
|
|
880
|
-
|
|
881
|
-
self._send_message_to_scheduler(
|
|
882
|
-
user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
|
|
883
|
-
)
|
|
884
|
-
self.add(
|
|
1127
|
+
# Asynchronous processing of logs, notifications and memory additions
|
|
1128
|
+
self._start_post_chat_processing(
|
|
885
1129
|
user_id=user_id,
|
|
886
|
-
|
|
887
|
-
|
|
888
|
-
|
|
889
|
-
|
|
890
|
-
|
|
891
|
-
|
|
892
|
-
|
|
893
|
-
|
|
894
|
-
"content": clean_response, # Store clean text without reference markers
|
|
895
|
-
"chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
|
|
896
|
-
},
|
|
897
|
-
],
|
|
898
|
-
mem_cube_id=cube_id,
|
|
1130
|
+
cube_id=cube_id,
|
|
1131
|
+
query=query,
|
|
1132
|
+
full_response=full_response,
|
|
1133
|
+
system_prompt=system_prompt,
|
|
1134
|
+
time_start=time_start,
|
|
1135
|
+
time_end=time_end,
|
|
1136
|
+
speed_improvement=speed_improvement,
|
|
1137
|
+
current_messages=current_messages,
|
|
899
1138
|
)
|
|
900
1139
|
|
|
901
1140
|
def get_all(
|
|
@@ -1086,6 +1325,7 @@ class MOSProduct(MOSCore):
|
|
|
1086
1325
|
memories["metadata"]["memory"] = memories["memory"]
|
|
1087
1326
|
memories_list.append(memories)
|
|
1088
1327
|
reformat_memory_list.append({"cube_id": memory["cube_id"], "memories": memories_list})
|
|
1328
|
+
logger.info(f"search memory list is : {reformat_memory_list}")
|
|
1089
1329
|
search_result["text_mem"] = reformat_memory_list
|
|
1090
1330
|
time_end = time.time()
|
|
1091
1331
|
logger.info(
|