MemoryOS 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (94) hide show
  1. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/METADATA +8 -2
  2. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/RECORD +92 -69
  3. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/WHEEL +1 -1
  4. memos/__init__.py +1 -1
  5. memos/api/client.py +109 -0
  6. memos/api/config.py +35 -8
  7. memos/api/context/dependencies.py +15 -66
  8. memos/api/middleware/request_context.py +63 -0
  9. memos/api/product_api.py +5 -2
  10. memos/api/product_models.py +107 -16
  11. memos/api/routers/product_router.py +62 -19
  12. memos/api/start_api.py +13 -0
  13. memos/configs/graph_db.py +4 -0
  14. memos/configs/mem_scheduler.py +38 -3
  15. memos/configs/memory.py +13 -0
  16. memos/configs/reranker.py +18 -0
  17. memos/context/context.py +255 -0
  18. memos/embedders/factory.py +2 -0
  19. memos/graph_dbs/base.py +4 -2
  20. memos/graph_dbs/nebular.py +368 -223
  21. memos/graph_dbs/neo4j.py +49 -13
  22. memos/graph_dbs/neo4j_community.py +13 -3
  23. memos/llms/factory.py +2 -0
  24. memos/llms/openai.py +74 -2
  25. memos/llms/vllm.py +2 -0
  26. memos/log.py +128 -4
  27. memos/mem_cube/general.py +3 -1
  28. memos/mem_os/core.py +89 -23
  29. memos/mem_os/main.py +3 -6
  30. memos/mem_os/product.py +418 -154
  31. memos/mem_os/utils/reference_utils.py +20 -0
  32. memos/mem_reader/factory.py +2 -0
  33. memos/mem_reader/simple_struct.py +204 -82
  34. memos/mem_scheduler/analyzer/__init__.py +0 -0
  35. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +569 -0
  36. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  37. memos/mem_scheduler/base_scheduler.py +126 -56
  38. memos/mem_scheduler/general_modules/dispatcher.py +2 -2
  39. memos/mem_scheduler/general_modules/misc.py +99 -1
  40. memos/mem_scheduler/general_modules/scheduler_logger.py +17 -11
  41. memos/mem_scheduler/general_scheduler.py +40 -88
  42. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  43. memos/mem_scheduler/memory_manage_modules/memory_filter.py +308 -0
  44. memos/mem_scheduler/{general_modules → memory_manage_modules}/retriever.py +34 -7
  45. memos/mem_scheduler/monitors/dispatcher_monitor.py +9 -8
  46. memos/mem_scheduler/monitors/general_monitor.py +119 -39
  47. memos/mem_scheduler/optimized_scheduler.py +124 -0
  48. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  49. memos/mem_scheduler/orm_modules/base_model.py +635 -0
  50. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  51. memos/mem_scheduler/scheduler_factory.py +2 -0
  52. memos/mem_scheduler/schemas/monitor_schemas.py +96 -29
  53. memos/mem_scheduler/utils/config_utils.py +100 -0
  54. memos/mem_scheduler/utils/db_utils.py +33 -0
  55. memos/mem_scheduler/utils/filter_utils.py +1 -1
  56. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  57. memos/mem_user/mysql_user_manager.py +4 -2
  58. memos/memories/activation/kv.py +2 -1
  59. memos/memories/textual/item.py +96 -17
  60. memos/memories/textual/naive.py +1 -1
  61. memos/memories/textual/tree.py +57 -3
  62. memos/memories/textual/tree_text_memory/organize/handler.py +4 -2
  63. memos/memories/textual/tree_text_memory/organize/manager.py +28 -14
  64. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +1 -2
  65. memos/memories/textual/tree_text_memory/organize/reorganizer.py +75 -23
  66. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +10 -6
  67. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -2
  68. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  69. memos/memories/textual/tree_text_memory/retrieve/recall.py +119 -21
  70. memos/memories/textual/tree_text_memory/retrieve/searcher.py +172 -44
  71. memos/memories/textual/tree_text_memory/retrieve/utils.py +6 -4
  72. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +5 -4
  73. memos/memos_tools/notification_utils.py +46 -0
  74. memos/memos_tools/singleton.py +174 -0
  75. memos/memos_tools/thread_safe_dict.py +22 -0
  76. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  77. memos/parsers/factory.py +2 -0
  78. memos/reranker/__init__.py +4 -0
  79. memos/reranker/base.py +24 -0
  80. memos/reranker/concat.py +59 -0
  81. memos/reranker/cosine_local.py +96 -0
  82. memos/reranker/factory.py +48 -0
  83. memos/reranker/http_bge.py +312 -0
  84. memos/reranker/noop.py +16 -0
  85. memos/templates/mem_reader_prompts.py +289 -40
  86. memos/templates/mem_scheduler_prompts.py +242 -0
  87. memos/templates/mos_prompts.py +133 -60
  88. memos/types.py +4 -1
  89. memos/api/context/context.py +0 -147
  90. memos/mem_scheduler/mos_for_test_scheduler.py +0 -146
  91. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/entry_points.txt +0 -0
  92. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info/licenses}/LICENSE +0 -0
  93. /memos/mem_scheduler/{general_modules → webservice_modules}/rabbitmq_service.py +0 -0
  94. /memos/mem_scheduler/{general_modules → webservice_modules}/redis_service.py +0 -0
memos/mem_os/product.py CHANGED
@@ -1,3 +1,4 @@
1
+ import asyncio
1
2
  import json
2
3
  import os
3
4
  import random
@@ -12,6 +13,7 @@ from transformers import AutoTokenizer
12
13
 
13
14
  from memos.configs.mem_cube import GeneralMemCubeConfig
14
15
  from memos.configs.mem_os import MOSConfig
16
+ from memos.context.context import ContextThread
15
17
  from memos.log import get_logger
16
18
  from memos.mem_cube.general import GeneralMemCube
17
19
  from memos.mem_os.core import MOSCore
@@ -24,6 +26,7 @@ from memos.mem_os.utils.format_utils import (
24
26
  sort_children_by_memory_type,
25
27
  )
26
28
  from memos.mem_os.utils.reference_utils import (
29
+ prepare_reference_data,
27
30
  process_streaming_references_complete,
28
31
  )
29
32
  from memos.mem_scheduler.schemas.general_schemas import (
@@ -36,8 +39,14 @@ from memos.mem_user.user_manager import UserRole
36
39
  from memos.memories.textual.item import (
37
40
  TextualMemoryItem,
38
41
  )
39
- from memos.templates.mos_prompts import MEMOS_PRODUCT_BASE_PROMPT, MEMOS_PRODUCT_ENHANCE_PROMPT
42
+ from memos.templates.mos_prompts import (
43
+ FURTHER_SUGGESTION_PROMPT,
44
+ SUGGESTION_QUERY_PROMPT_EN,
45
+ SUGGESTION_QUERY_PROMPT_ZH,
46
+ get_memos_prompt,
47
+ )
40
48
  from memos.types import MessageList
49
+ from memos.utils import timed
41
50
 
42
51
 
43
52
  logger = get_logger(__name__)
@@ -47,6 +56,39 @@ load_dotenv()
47
56
  CUBE_PATH = os.getenv("MOS_CUBE_PATH", "/tmp/data/")
48
57
 
49
58
 
59
+ def _short_id(mem_id: str) -> str:
60
+ return (mem_id or "").split("-")[0] if mem_id else ""
61
+
62
+
63
+ def _format_mem_block(memories_all, max_items: int = 20, max_chars_each: int = 320) -> str:
64
+ """
65
+ Modify TextualMemoryItem Format:
66
+ 1:abcd :: [P] text...
67
+ 2:ef01 :: [O] text...
68
+ sequence is [i:memId] i; [P]=PersonalMemory / [O]=OuterMemory
69
+ """
70
+ if not memories_all:
71
+ return "(none)", "(none)"
72
+
73
+ lines_o = []
74
+ lines_p = []
75
+ for idx, m in enumerate(memories_all[:max_items], 1):
76
+ mid = _short_id(getattr(m, "id", "") or "")
77
+ mtype = getattr(getattr(m, "metadata", {}), "memory_type", None) or getattr(
78
+ m, "metadata", {}
79
+ ).get("memory_type", "")
80
+ tag = "O" if "Outer" in str(mtype) else "P"
81
+ txt = (getattr(m, "memory", "") or "").replace("\n", " ").strip()
82
+ if len(txt) > max_chars_each:
83
+ txt = txt[: max_chars_each - 1] + "…"
84
+ mid = mid or f"mem_{idx}"
85
+ if tag == "O":
86
+ lines_o.append(f"[{idx}:{mid}] :: [{tag}] {txt}\n")
87
+ elif tag == "P":
88
+ lines_p.append(f"[{idx}:{mid}] :: [{tag}] {txt}")
89
+ return "\n".join(lines_o), "\n".join(lines_p)
90
+
91
+
50
92
  class MOSProduct(MOSCore):
51
93
  """
52
94
  The MOSProduct class inherits from MOSCore and manages multiple users.
@@ -216,6 +258,7 @@ class MOSProduct(MOSCore):
216
258
  except Exception as e:
217
259
  logger.error(f"Error pre-loading cubes for user {user_id}: {e}", exc_info=True)
218
260
 
261
+ @timed
219
262
  def _load_user_cubes(
220
263
  self, user_id: str, default_cube_config: GeneralMemCubeConfig | None = None
221
264
  ) -> None:
@@ -247,6 +290,7 @@ class MOSProduct(MOSCore):
247
290
  )
248
291
  except Exception as e:
249
292
  logger.error(f"Failed to load cube {cube.cube_id} for user {user_id}: {e}")
293
+ logger.info(f"load user {user_id} cubes successfully")
250
294
 
251
295
  def _ensure_user_instance(self, user_id: str, max_instances: int | None = None) -> None:
252
296
  """
@@ -350,7 +394,11 @@ class MOSProduct(MOSCore):
350
394
  return self._create_user_config(user_id, user_config)
351
395
 
352
396
  def _build_system_prompt(
353
- self, memories_all: list[TextualMemoryItem], base_prompt: str | None = None
397
+ self,
398
+ memories_all: list[TextualMemoryItem],
399
+ base_prompt: str | None = None,
400
+ tone: str = "friendly",
401
+ verbosity: str = "mid",
354
402
  ) -> str:
355
403
  """
356
404
  Build custom system prompt for the user with memory references.
@@ -362,51 +410,46 @@ class MOSProduct(MOSCore):
362
410
  Returns:
363
411
  str: The custom system prompt.
364
412
  """
365
-
366
413
  # Build base prompt
367
414
  # Add memory context if available
368
- if memories_all:
369
- memory_context = "\n\n## Available ID Memories:\n"
370
- for i, memory in enumerate(memories_all, 1):
371
- # Format: [memory_id]: memory_content
372
- memory_id = f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
373
- memory_content = memory.memory[:500] if hasattr(memory, "memory") else str(memory)
374
- memory_content = memory_content.replace("\n", " ")
375
- memory_context += f"{memory_id}: {memory_content}\n"
376
- return MEMOS_PRODUCT_BASE_PROMPT + memory_context
377
-
378
- return MEMOS_PRODUCT_BASE_PROMPT
415
+ now = datetime.now()
416
+ formatted_date = now.strftime("%Y-%m-%d (%A)")
417
+ sys_body = get_memos_prompt(
418
+ date=formatted_date, tone=tone, verbosity=verbosity, mode="base"
419
+ )
420
+ mem_block_o, mem_block_p = _format_mem_block(memories_all)
421
+ mem_block = mem_block_o + "\n" + mem_block_p
422
+ prefix = (base_prompt.strip() + "\n\n") if base_prompt else ""
423
+ return (
424
+ prefix
425
+ + sys_body
426
+ + "\n\n# Memories\n## PersonalMemory & OuterMemory (ordered)\n"
427
+ + mem_block
428
+ )
379
429
 
380
430
  def _build_enhance_system_prompt(
381
- self, user_id: str, memories_all: list[TextualMemoryItem]
431
+ self,
432
+ user_id: str,
433
+ memories_all: list[TextualMemoryItem],
434
+ tone: str = "friendly",
435
+ verbosity: str = "mid",
382
436
  ) -> str:
383
437
  """
384
438
  Build enhance prompt for the user with memory references.
385
439
  """
386
- if memories_all:
387
- personal_memory_context = "\n\n## Available ID and PersonalMemory Memories:\n"
388
- outer_memory_context = "\n\n## Available ID and OuterMemory Memories:\n"
389
- for i, memory in enumerate(memories_all, 1):
390
- # Format: [memory_id]: memory_content
391
- if memory.metadata.memory_type != "OuterMemory":
392
- memory_id = (
393
- f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
394
- )
395
- memory_content = (
396
- memory.memory[:500] if hasattr(memory, "memory") else str(memory)
397
- )
398
- personal_memory_context += f"{memory_id}: {memory_content}\n"
399
- else:
400
- memory_id = (
401
- f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
402
- )
403
- memory_content = (
404
- memory.memory[:500] if hasattr(memory, "memory") else str(memory)
405
- )
406
- memory_content = memory_content.replace("\n", " ")
407
- outer_memory_context += f"{memory_id}: {memory_content}\n"
408
- return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
409
- return MEMOS_PRODUCT_ENHANCE_PROMPT
440
+ now = datetime.now()
441
+ formatted_date = now.strftime("%Y-%m-%d (%A)")
442
+ sys_body = get_memos_prompt(
443
+ date=formatted_date, tone=tone, verbosity=verbosity, mode="enhance"
444
+ )
445
+ mem_block_o, mem_block_p = _format_mem_block(memories_all)
446
+ return (
447
+ sys_body
448
+ + "\n\n# Memories\n## PersonalMemory (ordered)\n"
449
+ + mem_block_p
450
+ + "\n## OuterMemory (ordered)\n"
451
+ + mem_block_o
452
+ )
410
453
 
411
454
  def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
412
455
  """
@@ -495,13 +538,208 @@ class MOSProduct(MOSCore):
495
538
  )
496
539
  self.mem_scheduler.submit_messages(messages=[message_item])
497
540
 
541
+ async def _post_chat_processing(
542
+ self,
543
+ user_id: str,
544
+ cube_id: str,
545
+ query: str,
546
+ full_response: str,
547
+ system_prompt: str,
548
+ time_start: float,
549
+ time_end: float,
550
+ speed_improvement: float,
551
+ current_messages: list,
552
+ ) -> None:
553
+ """
554
+ Asynchronous processing of logs, notifications and memory additions
555
+ """
556
+ try:
557
+ logger.info(
558
+ f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}"
559
+ )
560
+ logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
561
+
562
+ clean_response, extracted_references = self._extract_references_from_response(
563
+ full_response
564
+ )
565
+ logger.info(f"Extracted {len(extracted_references)} references from response")
566
+
567
+ # Send chat report notifications asynchronously
568
+ if self.online_bot:
569
+ try:
570
+ from memos.memos_tools.notification_utils import (
571
+ send_online_bot_notification_async,
572
+ )
573
+
574
+ # Prepare notification data
575
+ chat_data = {
576
+ "query": query,
577
+ "user_id": user_id,
578
+ "cube_id": cube_id,
579
+ "system_prompt": system_prompt,
580
+ "full_response": full_response,
581
+ }
582
+
583
+ system_data = {
584
+ "references": extracted_references,
585
+ "time_start": time_start,
586
+ "time_end": time_end,
587
+ "speed_improvement": speed_improvement,
588
+ }
589
+
590
+ emoji_config = {"chat": "💬", "system_info": "📊"}
591
+
592
+ await send_online_bot_notification_async(
593
+ online_bot=self.online_bot,
594
+ header_name="MemOS Chat Report",
595
+ sub_title_name="chat_with_references",
596
+ title_color="#00956D",
597
+ other_data1=chat_data,
598
+ other_data2=system_data,
599
+ emoji=emoji_config,
600
+ )
601
+ except Exception as e:
602
+ logger.warning(f"Failed to send chat notification (async): {e}")
603
+
604
+ self._send_message_to_scheduler(
605
+ user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
606
+ )
607
+
608
+ self.add(
609
+ user_id=user_id,
610
+ messages=[
611
+ {
612
+ "role": "user",
613
+ "content": query,
614
+ "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
615
+ },
616
+ {
617
+ "role": "assistant",
618
+ "content": clean_response, # Store clean text without reference markers
619
+ "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
620
+ },
621
+ ],
622
+ mem_cube_id=cube_id,
623
+ )
624
+
625
+ logger.info(f"Post-chat processing completed for user {user_id}")
626
+
627
+ except Exception as e:
628
+ logger.error(f"Error in post-chat processing for user {user_id}: {e}", exc_info=True)
629
+
630
+ def _start_post_chat_processing(
631
+ self,
632
+ user_id: str,
633
+ cube_id: str,
634
+ query: str,
635
+ full_response: str,
636
+ system_prompt: str,
637
+ time_start: float,
638
+ time_end: float,
639
+ speed_improvement: float,
640
+ current_messages: list,
641
+ ) -> None:
642
+ """
643
+ Asynchronous processing of logs, notifications and memory additions, handle synchronous and asynchronous environments
644
+ """
645
+
646
+ def run_async_in_thread():
647
+ """Running asynchronous tasks in a new thread"""
648
+ try:
649
+ loop = asyncio.new_event_loop()
650
+ asyncio.set_event_loop(loop)
651
+ try:
652
+ loop.run_until_complete(
653
+ self._post_chat_processing(
654
+ user_id=user_id,
655
+ cube_id=cube_id,
656
+ query=query,
657
+ full_response=full_response,
658
+ system_prompt=system_prompt,
659
+ time_start=time_start,
660
+ time_end=time_end,
661
+ speed_improvement=speed_improvement,
662
+ current_messages=current_messages,
663
+ )
664
+ )
665
+ finally:
666
+ loop.close()
667
+ except Exception as e:
668
+ logger.error(
669
+ f"Error in thread-based post-chat processing for user {user_id}: {e}",
670
+ exc_info=True,
671
+ )
672
+
673
+ try:
674
+ # Try to get the current event loop
675
+ asyncio.get_running_loop()
676
+ # Create task and store reference to prevent garbage collection
677
+ task = asyncio.create_task(
678
+ self._post_chat_processing(
679
+ user_id=user_id,
680
+ cube_id=cube_id,
681
+ query=query,
682
+ full_response=full_response,
683
+ system_prompt=system_prompt,
684
+ time_start=time_start,
685
+ time_end=time_end,
686
+ speed_improvement=speed_improvement,
687
+ current_messages=current_messages,
688
+ )
689
+ )
690
+ # Add exception handling for the background task
691
+ task.add_done_callback(
692
+ lambda t: logger.error(
693
+ f"Error in background post-chat processing for user {user_id}: {t.exception()}",
694
+ exc_info=True,
695
+ )
696
+ if t.exception()
697
+ else None
698
+ )
699
+ except RuntimeError:
700
+ # No event loop, run in a new thread with context propagation
701
+ thread = ContextThread(
702
+ target=run_async_in_thread,
703
+ name=f"PostChatProcessing-{user_id}",
704
+ # Set as a daemon thread to avoid blocking program exit
705
+ daemon=True,
706
+ )
707
+ thread.start()
708
+
498
709
  def _filter_memories_by_threshold(
499
- self, memories: list[TextualMemoryItem], threshold: float = 0.20
710
+ self,
711
+ memories: list[TextualMemoryItem],
712
+ threshold: float = 0.30,
713
+ min_num: int = 3,
714
+ memory_type: Literal["OuterMemory"] = "OuterMemory",
500
715
  ) -> list[TextualMemoryItem]:
501
716
  """
502
- Filter memories by threshold.
717
+ Filter memories by threshold and type, at least min_num memories for Non-OuterMemory.
718
+ Args:
719
+ memories: list[TextualMemoryItem],
720
+ threshold: float,
721
+ min_num: int,
722
+ memory_type: Literal["OuterMemory"],
723
+ Returns:
724
+ list[TextualMemoryItem]
503
725
  """
504
- return [memory for memory in memories if memory.metadata.relativity >= threshold]
726
+ sorted_memories = sorted(memories, key=lambda m: m.metadata.relativity, reverse=True)
727
+ filtered_person = [m for m in memories if m.metadata.memory_type != memory_type]
728
+ filtered_outer = [m for m in memories if m.metadata.memory_type == memory_type]
729
+ filtered = []
730
+ per_memory_count = 0
731
+ for m in sorted_memories:
732
+ if m.metadata.relativity >= threshold:
733
+ if m.metadata.memory_type != memory_type:
734
+ per_memory_count += 1
735
+ filtered.append(m)
736
+ if len(filtered) < min_num:
737
+ filtered = filtered_person[:min_num] + filtered_outer[:min_num]
738
+ else:
739
+ if per_memory_count < min_num:
740
+ filtered += filtered_person[per_memory_count:min_num]
741
+ filtered_memory = sorted(filtered, key=lambda m: m.metadata.relativity, reverse=True)
742
+ return filtered_memory
505
743
 
506
744
  def register_mem_cube(
507
745
  self,
@@ -540,10 +778,14 @@ class MOSProduct(MOSCore):
540
778
  return
541
779
 
542
780
  # Create MemCube from path
781
+ time_start = time.time()
543
782
  if os.path.exists(mem_cube_name_or_path):
544
783
  mem_cube = GeneralMemCube.init_from_dir(
545
784
  mem_cube_name_or_path, memory_types, default_config
546
785
  )
786
+ logger.info(
787
+ f"time register_mem_cube: init_from_dir time is: {time.time() - time_start}"
788
+ )
547
789
  else:
548
790
  logger.warning(
549
791
  f"MemCube {mem_cube_name_or_path} does not exist, try to init from remote repo."
@@ -556,7 +798,10 @@ class MOSProduct(MOSCore):
556
798
  logger.info(
557
799
  f"Registering MemCube {mem_cube_id} with cube config {mem_cube.config.model_dump(mode='json')}"
558
800
  )
801
+ time_start = time.time()
559
802
  self.mem_cubes[mem_cube_id] = mem_cube
803
+ time_end = time.time()
804
+ logger.info(f"time register_mem_cube: add mem_cube time is: {time_end - time_start}")
560
805
 
561
806
  def user_register(
562
807
  self,
@@ -566,6 +811,7 @@ class MOSProduct(MOSCore):
566
811
  interests: str | None = None,
567
812
  default_mem_cube: GeneralMemCube | None = None,
568
813
  default_cube_config: GeneralMemCubeConfig | None = None,
814
+ mem_cube_id: str | None = None,
569
815
  ) -> dict[str, str]:
570
816
  """Register a new user with configuration and default cube.
571
817
 
@@ -599,17 +845,21 @@ class MOSProduct(MOSCore):
599
845
 
600
846
  # Create a default cube for the user using MOSCore's methods
601
847
  default_cube_name = f"{user_name}_{user_id}_default_cube"
602
- mem_cube_name_or_path = f"{CUBE_PATH}/{default_cube_name}"
848
+ mem_cube_name_or_path = os.path.join(CUBE_PATH, default_cube_name)
603
849
  default_cube_id = self.create_cube_for_user(
604
- cube_name=default_cube_name, owner_id=user_id, cube_path=mem_cube_name_or_path
850
+ cube_name=default_cube_name,
851
+ owner_id=user_id,
852
+ cube_path=mem_cube_name_or_path,
853
+ cube_id=mem_cube_id,
605
854
  )
606
-
855
+ time_start = time.time()
607
856
  if default_mem_cube:
608
857
  try:
609
- default_mem_cube.dump(mem_cube_name_or_path)
858
+ default_mem_cube.dump(mem_cube_name_or_path, memory_types=[])
610
859
  except Exception as e:
611
860
  logger.error(f"Failed to dump default cube: {e}")
612
-
861
+ time_end = time.time()
862
+ logger.info(f"time user_register: dump default cube time is: {time_end - time_start}")
613
863
  # Register the default cube with MOS
614
864
  self.register_mem_cube(
615
865
  mem_cube_name_or_path_or_object=default_mem_cube,
@@ -633,7 +883,23 @@ class MOSProduct(MOSCore):
633
883
  except Exception as e:
634
884
  return {"status": "error", "message": f"Failed to register user: {e!s}"}
635
885
 
636
- def get_suggestion_query(self, user_id: str, language: str = "zh") -> list[str]:
886
+ def _get_further_suggestion(self, message: MessageList | None = None) -> list[str]:
887
+ """Get further suggestion prompt."""
888
+ try:
889
+ dialogue_info = "\n".join([f"{msg['role']}: {msg['content']}" for msg in message[-2:]])
890
+ further_suggestion_prompt = FURTHER_SUGGESTION_PROMPT.format(dialogue=dialogue_info)
891
+ message_list = [{"role": "system", "content": further_suggestion_prompt}]
892
+ response = self.chat_llm.generate(message_list)
893
+ clean_response = clean_json_response(response)
894
+ response_json = json.loads(clean_response)
895
+ return response_json["query"]
896
+ except Exception as e:
897
+ logger.error(f"Error getting further suggestion: {e}", exc_info=True)
898
+ return []
899
+
900
+ def get_suggestion_query(
901
+ self, user_id: str, language: str = "zh", message: MessageList | None = None
902
+ ) -> list[str]:
637
903
  """Get suggestion query from LLM.
638
904
  Args:
639
905
  user_id (str): User ID.
@@ -642,37 +908,13 @@ class MOSProduct(MOSCore):
642
908
  Returns:
643
909
  list[str]: The suggestion query list.
644
910
  """
645
-
911
+ if message:
912
+ further_suggestion = self._get_further_suggestion(message)
913
+ return further_suggestion
646
914
  if language == "zh":
647
- suggestion_prompt = """
648
- 你是一个有用的助手,可以帮助用户生成建议查询。
649
- 我将获取用户最近的一些记忆,
650
- 你应该生成一些建议查询,这些查询应该是用户想要查询的内容,
651
- 用户最近的记忆是:
652
- {memories}
653
- 请生成3个建议查询用中文,
654
- 输出应该是json格式,键是"query",值是一个建议查询列表。
655
-
656
- 示例:
657
- {{
658
- "query": ["查询1", "查询2", "查询3"]
659
- }}
660
- """
915
+ suggestion_prompt = SUGGESTION_QUERY_PROMPT_ZH
661
916
  else: # English
662
- suggestion_prompt = """
663
- You are a helpful assistant that can help users to generate suggestion query.
664
- I will get some user recently memories,
665
- you should generate some suggestion query, the query should be user what to query,
666
- user recently memories is:
667
- {memories}
668
- if the user recently memories is empty, please generate 3 suggestion query in English,
669
- output should be a json format, the key is "query", the value is a list of suggestion query.
670
-
671
- example:
672
- {{
673
- "query": ["query1", "query2", "query3"]
674
- }}
675
- """
917
+ suggestion_prompt = SUGGESTION_QUERY_PROMPT_EN
676
918
  text_mem_result = super().search("my recently memories", user_id=user_id, top_k=3)[
677
919
  "text_mem"
678
920
  ]
@@ -686,14 +928,78 @@ class MOSProduct(MOSCore):
686
928
  response_json = json.loads(clean_response)
687
929
  return response_json["query"]
688
930
 
689
- def chat_with_references(
931
+ def chat(
690
932
  self,
691
933
  query: str,
692
934
  user_id: str,
693
935
  cube_id: str | None = None,
694
936
  history: MessageList | None = None,
937
+ base_prompt: str | None = None,
938
+ internet_search: bool = False,
939
+ moscube: bool = False,
695
940
  top_k: int = 10,
941
+ threshold: float = 0.5,
942
+ session_id: str | None = None,
943
+ ) -> str:
944
+ """
945
+ Chat with LLM with memory references and complete response.
946
+ """
947
+ self._load_user_cubes(user_id, self.default_cube_config)
948
+ time_start = time.time()
949
+ memories_result = super().search(
950
+ query,
951
+ user_id,
952
+ install_cube_ids=[cube_id] if cube_id else None,
953
+ top_k=top_k,
954
+ mode="fine",
955
+ internet_search=internet_search,
956
+ moscube=moscube,
957
+ session_id=session_id,
958
+ )["text_mem"]
959
+
960
+ memories_list = []
961
+ if memories_result:
962
+ memories_list = memories_result[0]["memories"]
963
+ memories_list = self._filter_memories_by_threshold(memories_list, threshold)
964
+ new_memories_list = []
965
+ for m in memories_list:
966
+ m.metadata.embedding = []
967
+ new_memories_list.append(m)
968
+ memories_list = new_memories_list
969
+ system_prompt = super()._build_system_prompt(memories_list, base_prompt)
970
+ history_info = []
971
+ if history:
972
+ history_info = history[-20:]
973
+ current_messages = [
974
+ {"role": "system", "content": system_prompt},
975
+ *history_info,
976
+ {"role": "user", "content": query},
977
+ ]
978
+ response = self.chat_llm.generate(current_messages)
979
+ time_end = time.time()
980
+ self._start_post_chat_processing(
981
+ user_id=user_id,
982
+ cube_id=cube_id,
983
+ query=query,
984
+ full_response=response,
985
+ system_prompt=system_prompt,
986
+ time_start=time_start,
987
+ time_end=time_end,
988
+ speed_improvement=0.0,
989
+ current_messages=current_messages,
990
+ )
991
+ return response, memories_list
992
+
993
+ def chat_with_references(
994
+ self,
995
+ query: str,
996
+ user_id: str,
997
+ cube_id: str | None = None,
998
+ history: MessageList | None = None,
999
+ top_k: int = 20,
696
1000
  internet_search: bool = False,
1001
+ moscube: bool = False,
1002
+ session_id: str | None = None,
697
1003
  ) -> Generator[str, None, None]:
698
1004
  """
699
1005
  Chat with LLM with memory references and streaming output.
@@ -719,7 +1025,10 @@ class MOSProduct(MOSCore):
719
1025
  top_k=top_k,
720
1026
  mode="fine",
721
1027
  internet_search=internet_search,
1028
+ moscube=moscube,
1029
+ session_id=session_id,
722
1030
  )["text_mem"]
1031
+
723
1032
  yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
724
1033
  search_time_end = time.time()
725
1034
  logger.info(
@@ -731,15 +1040,18 @@ class MOSProduct(MOSCore):
731
1040
  if memories_result:
732
1041
  memories_list = memories_result[0]["memories"]
733
1042
  memories_list = self._filter_memories_by_threshold(memories_list)
1043
+
1044
+ reference = prepare_reference_data(memories_list)
1045
+ yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
734
1046
  # Build custom system prompt with relevant memories)
735
1047
  system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
736
1048
  # Get chat history
737
1049
  if user_id not in self.chat_history_manager:
738
- self._register_chat_history(user_id)
1050
+ self._register_chat_history(user_id, session_id)
739
1051
 
740
1052
  chat_history = self.chat_history_manager[user_id]
741
1053
  if history:
742
- chat_history.chat_history = history[-10:]
1054
+ chat_history.chat_history = history[-20:]
743
1055
  current_messages = [
744
1056
  {"role": "system", "content": system_prompt},
745
1057
  *chat_history.chat_history,
@@ -772,7 +1084,7 @@ class MOSProduct(MOSCore):
772
1084
  elif self.config.chat_model.backend == "vllm":
773
1085
  response_stream = self.chat_llm.generate_stream(current_messages)
774
1086
  else:
775
- if self.config.chat_model.backend in ["huggingface", "vllm"]:
1087
+ if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
776
1088
  response_stream = self.chat_llm.generate_stream(current_messages)
777
1089
  else:
778
1090
  response_stream = self.chat_llm.generate(current_messages)
@@ -789,7 +1101,7 @@ class MOSProduct(MOSCore):
789
1101
  full_response = ""
790
1102
  token_count = 0
791
1103
  # Use tiktoken for proper token-based chunking
792
- if self.config.chat_model.backend not in ["huggingface", "vllm"]:
1104
+ if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
793
1105
  # For non-huggingface backends, we need to collect the full response first
794
1106
  full_response_text = ""
795
1107
  for chunk in response_stream:
@@ -819,83 +1131,29 @@ class MOSProduct(MOSCore):
819
1131
  chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
820
1132
  yield chunk_data
821
1133
 
822
- # Prepare reference data
823
- reference = []
824
- for memories in memories_list:
825
- memories_json = memories.model_dump()
826
- memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
827
- memories_json["metadata"]["embedding"] = []
828
- memories_json["metadata"]["sources"] = []
829
- memories_json["metadata"]["memory"] = memories.memory
830
- memories_json["metadata"]["id"] = memories.id
831
- reference.append({"metadata": memories_json["metadata"]})
832
-
833
- yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
834
1134
  # set kvcache improve speed
835
1135
  speed_improvement = round(float((len(system_prompt) / 2) * 0.0048 + 44.5), 1)
836
1136
  total_time = round(float(time_end - time_start), 1)
837
1137
 
838
1138
  yield f"data: {json.dumps({'type': 'time', 'data': {'total_time': total_time, 'speed_improvement': f'{speed_improvement}%'}})}\n\n"
1139
+ # get further suggestion
1140
+ current_messages.append({"role": "assistant", "content": full_response})
1141
+ further_suggestion = self._get_further_suggestion(current_messages)
1142
+ logger.info(f"further_suggestion: {further_suggestion}")
1143
+ yield f"data: {json.dumps({'type': 'suggestion', 'data': further_suggestion})}\n\n"
839
1144
  yield f"data: {json.dumps({'type': 'end'})}\n\n"
840
1145
 
841
- logger.info(f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}")
842
- logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
843
-
844
- clean_response, extracted_references = self._extract_references_from_response(full_response)
845
- logger.info(f"Extracted {len(extracted_references)} references from response")
846
-
847
- # Send chat report if online_bot is available
848
- try:
849
- from memos.memos_tools.notification_utils import send_online_bot_notification
850
-
851
- # Prepare data for online_bot
852
- chat_data = {
853
- "query": query,
854
- "user_id": user_id,
855
- "cube_id": cube_id,
856
- "system_prompt": system_prompt,
857
- "full_response": full_response,
858
- }
859
-
860
- system_data = {
861
- "references": extracted_references,
862
- "time_start": time_start,
863
- "time_end": time_end,
864
- "speed_improvement": speed_improvement,
865
- }
866
-
867
- emoji_config = {"chat": "💬", "system_info": "📊"}
868
-
869
- send_online_bot_notification(
870
- online_bot=self.online_bot,
871
- header_name="MemOS Chat Report",
872
- sub_title_name="chat_with_references",
873
- title_color="#00956D",
874
- other_data1=chat_data,
875
- other_data2=system_data,
876
- emoji=emoji_config,
877
- )
878
- except Exception as e:
879
- logger.warning(f"Failed to send chat notification: {e}")
880
-
881
- self._send_message_to_scheduler(
882
- user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
883
- )
884
- self.add(
1146
+ # Asynchronous processing of logs, notifications and memory additions
1147
+ self._start_post_chat_processing(
885
1148
  user_id=user_id,
886
- messages=[
887
- {
888
- "role": "user",
889
- "content": query,
890
- "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
891
- },
892
- {
893
- "role": "assistant",
894
- "content": clean_response, # Store clean text without reference markers
895
- "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
896
- },
897
- ],
898
- mem_cube_id=cube_id,
1149
+ cube_id=cube_id,
1150
+ query=query,
1151
+ full_response=full_response,
1152
+ system_prompt=system_prompt,
1153
+ time_start=time_start,
1154
+ time_end=time_end,
1155
+ speed_improvement=speed_improvement,
1156
+ current_messages=current_messages,
899
1157
  )
900
1158
 
901
1159
  def get_all(
@@ -1057,6 +1315,7 @@ class MOSProduct(MOSCore):
1057
1315
  install_cube_ids: list[str] | None = None,
1058
1316
  top_k: int = 10,
1059
1317
  mode: Literal["fast", "fine"] = "fast",
1318
+ session_id: str | None = None,
1060
1319
  ):
1061
1320
  """Search memories for a specific user."""
1062
1321
 
@@ -1067,7 +1326,9 @@ class MOSProduct(MOSCore):
1067
1326
  logger.info(
1068
1327
  f"time search: load_user_cubes time user_id: {user_id} time is: {load_user_cubes_time_end - time_start}"
1069
1328
  )
1070
- search_result = super().search(query, user_id, install_cube_ids, top_k, mode=mode)
1329
+ search_result = super().search(
1330
+ query, user_id, install_cube_ids, top_k, mode=mode, session_id=session_id
1331
+ )
1071
1332
  search_time_end = time.time()
1072
1333
  logger.info(
1073
1334
  f"time search: search text_mem time user_id: {user_id} time is: {search_time_end - load_user_cubes_time_end}"
@@ -1086,6 +1347,7 @@ class MOSProduct(MOSCore):
1086
1347
  memories["metadata"]["memory"] = memories["memory"]
1087
1348
  memories_list.append(memories)
1088
1349
  reformat_memory_list.append({"cube_id": memory["cube_id"], "memories": memories_list})
1350
+ logger.info(f"search memory list is : {reformat_memory_list}")
1089
1351
  search_result["text_mem"] = reformat_memory_list
1090
1352
  time_end = time.time()
1091
1353
  logger.info(
@@ -1102,13 +1364,15 @@ class MOSProduct(MOSCore):
1102
1364
  mem_cube_id: str | None = None,
1103
1365
  source: str | None = None,
1104
1366
  user_profile: bool = False,
1367
+ session_id: str | None = None,
1105
1368
  ):
1106
1369
  """Add memory for a specific user."""
1107
1370
 
1108
1371
  # Load user cubes if not already loaded
1109
1372
  self._load_user_cubes(user_id, self.default_cube_config)
1110
-
1111
- result = super().add(messages, memory_content, doc_path, mem_cube_id, user_id)
1373
+ result = super().add(
1374
+ messages, memory_content, doc_path, mem_cube_id, user_id, session_id=session_id
1375
+ )
1112
1376
  if user_profile:
1113
1377
  try:
1114
1378
  user_interests = memory_content.split("'userInterests': '")[1].split("', '")[0]