MemoryOS 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (42) hide show
  1. {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/METADATA +2 -1
  2. {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/RECORD +42 -33
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +25 -0
  5. memos/api/context/context_thread.py +96 -0
  6. memos/api/context/dependencies.py +0 -11
  7. memos/api/middleware/request_context.py +94 -0
  8. memos/api/product_api.py +5 -1
  9. memos/api/product_models.py +16 -0
  10. memos/api/routers/product_router.py +39 -3
  11. memos/api/start_api.py +3 -0
  12. memos/configs/memory.py +13 -0
  13. memos/configs/reranker.py +18 -0
  14. memos/graph_dbs/base.py +4 -2
  15. memos/graph_dbs/nebular.py +215 -68
  16. memos/graph_dbs/neo4j.py +14 -12
  17. memos/graph_dbs/neo4j_community.py +6 -3
  18. memos/llms/vllm.py +2 -0
  19. memos/log.py +120 -8
  20. memos/mem_os/core.py +30 -2
  21. memos/mem_os/product.py +386 -146
  22. memos/mem_os/utils/reference_utils.py +20 -0
  23. memos/mem_reader/simple_struct.py +112 -43
  24. memos/mem_user/mysql_user_manager.py +4 -2
  25. memos/memories/textual/item.py +1 -1
  26. memos/memories/textual/tree.py +31 -1
  27. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +3 -1
  28. memos/memories/textual/tree_text_memory/retrieve/recall.py +53 -3
  29. memos/memories/textual/tree_text_memory/retrieve/searcher.py +74 -14
  30. memos/memories/textual/tree_text_memory/retrieve/utils.py +6 -4
  31. memos/memos_tools/notification_utils.py +46 -0
  32. memos/reranker/__init__.py +4 -0
  33. memos/reranker/base.py +24 -0
  34. memos/reranker/cosine_local.py +95 -0
  35. memos/reranker/factory.py +43 -0
  36. memos/reranker/http_bge.py +99 -0
  37. memos/reranker/noop.py +16 -0
  38. memos/templates/mem_reader_prompts.py +289 -40
  39. memos/templates/mos_prompts.py +133 -60
  40. {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/LICENSE +0 -0
  41. {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/WHEEL +0 -0
  42. {memoryos-1.0.0.dist-info → memoryos-1.0.1.dist-info}/entry_points.txt +0 -0
memos/mem_os/product.py CHANGED
@@ -1,6 +1,8 @@
1
+ import asyncio
1
2
  import json
2
3
  import os
3
4
  import random
5
+ import threading
4
6
  import time
5
7
 
6
8
  from collections.abc import Generator
@@ -24,6 +26,7 @@ from memos.mem_os.utils.format_utils import (
24
26
  sort_children_by_memory_type,
25
27
  )
26
28
  from memos.mem_os.utils.reference_utils import (
29
+ prepare_reference_data,
27
30
  process_streaming_references_complete,
28
31
  )
29
32
  from memos.mem_scheduler.schemas.general_schemas import (
@@ -36,7 +39,12 @@ from memos.mem_user.user_manager import UserRole
36
39
  from memos.memories.textual.item import (
37
40
  TextualMemoryItem,
38
41
  )
39
- from memos.templates.mos_prompts import MEMOS_PRODUCT_BASE_PROMPT, MEMOS_PRODUCT_ENHANCE_PROMPT
42
+ from memos.templates.mos_prompts import (
43
+ FURTHER_SUGGESTION_PROMPT,
44
+ SUGGESTION_QUERY_PROMPT_EN,
45
+ SUGGESTION_QUERY_PROMPT_ZH,
46
+ get_memos_prompt,
47
+ )
40
48
  from memos.types import MessageList
41
49
 
42
50
 
@@ -47,6 +55,39 @@ load_dotenv()
47
55
  CUBE_PATH = os.getenv("MOS_CUBE_PATH", "/tmp/data/")
48
56
 
49
57
 
58
+ def _short_id(mem_id: str) -> str:
59
+ return (mem_id or "").split("-")[0] if mem_id else ""
60
+
61
+
62
+ def _format_mem_block(memories_all, max_items: int = 20, max_chars_each: int = 320) -> str:
63
+ """
64
+ Modify TextualMemoryItem Format:
65
+ 1:abcd :: [P] text...
66
+ 2:ef01 :: [O] text...
67
+ sequence is [i:memId] i; [P]=PersonalMemory / [O]=OuterMemory
68
+ """
69
+ if not memories_all:
70
+ return "(none)", "(none)"
71
+
72
+ lines_o = []
73
+ lines_p = []
74
+ for idx, m in enumerate(memories_all[:max_items], 1):
75
+ mid = _short_id(getattr(m, "id", "") or "")
76
+ mtype = getattr(getattr(m, "metadata", {}), "memory_type", None) or getattr(
77
+ m, "metadata", {}
78
+ ).get("memory_type", "")
79
+ tag = "O" if "Outer" in str(mtype) else "P"
80
+ txt = (getattr(m, "memory", "") or "").replace("\n", " ").strip()
81
+ if len(txt) > max_chars_each:
82
+ txt = txt[: max_chars_each - 1] + "…"
83
+ mid = mid or f"mem_{idx}"
84
+ if tag == "O":
85
+ lines_o.append(f"[{idx}:{mid}] :: [{tag}] {txt}\n")
86
+ elif tag == "P":
87
+ lines_p.append(f"[{idx}:{mid}] :: [{tag}] {txt}")
88
+ return "\n".join(lines_o), "\n".join(lines_p)
89
+
90
+
50
91
  class MOSProduct(MOSCore):
51
92
  """
52
93
  The MOSProduct class inherits from MOSCore and manages multiple users.
@@ -350,7 +391,11 @@ class MOSProduct(MOSCore):
350
391
  return self._create_user_config(user_id, user_config)
351
392
 
352
393
  def _build_system_prompt(
353
- self, memories_all: list[TextualMemoryItem], base_prompt: str | None = None
394
+ self,
395
+ memories_all: list[TextualMemoryItem],
396
+ base_prompt: str | None = None,
397
+ tone: str = "friendly",
398
+ verbosity: str = "mid",
354
399
  ) -> str:
355
400
  """
356
401
  Build custom system prompt for the user with memory references.
@@ -362,51 +407,46 @@ class MOSProduct(MOSCore):
362
407
  Returns:
363
408
  str: The custom system prompt.
364
409
  """
365
-
366
410
  # Build base prompt
367
411
  # Add memory context if available
368
- if memories_all:
369
- memory_context = "\n\n## Available ID Memories:\n"
370
- for i, memory in enumerate(memories_all, 1):
371
- # Format: [memory_id]: memory_content
372
- memory_id = f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
373
- memory_content = memory.memory[:500] if hasattr(memory, "memory") else str(memory)
374
- memory_content = memory_content.replace("\n", " ")
375
- memory_context += f"{memory_id}: {memory_content}\n"
376
- return MEMOS_PRODUCT_BASE_PROMPT + memory_context
377
-
378
- return MEMOS_PRODUCT_BASE_PROMPT
412
+ now = datetime.now()
413
+ formatted_date = now.strftime("%Y-%m-%d (%A)")
414
+ sys_body = get_memos_prompt(
415
+ date=formatted_date, tone=tone, verbosity=verbosity, mode="base"
416
+ )
417
+ mem_block_o, mem_block_p = _format_mem_block(memories_all)
418
+ mem_block = mem_block_o + "\n" + mem_block_p
419
+ prefix = (base_prompt.strip() + "\n\n") if base_prompt else ""
420
+ return (
421
+ prefix
422
+ + sys_body
423
+ + "\n\n# Memories\n## PersonalMemory & OuterMemory (ordered)\n"
424
+ + mem_block
425
+ )
379
426
 
380
427
  def _build_enhance_system_prompt(
381
- self, user_id: str, memories_all: list[TextualMemoryItem]
428
+ self,
429
+ user_id: str,
430
+ memories_all: list[TextualMemoryItem],
431
+ tone: str = "friendly",
432
+ verbosity: str = "mid",
382
433
  ) -> str:
383
434
  """
384
435
  Build enhance prompt for the user with memory references.
385
436
  """
386
- if memories_all:
387
- personal_memory_context = "\n\n## Available ID and PersonalMemory Memories:\n"
388
- outer_memory_context = "\n\n## Available ID and OuterMemory Memories:\n"
389
- for i, memory in enumerate(memories_all, 1):
390
- # Format: [memory_id]: memory_content
391
- if memory.metadata.memory_type != "OuterMemory":
392
- memory_id = (
393
- f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
394
- )
395
- memory_content = (
396
- memory.memory[:500] if hasattr(memory, "memory") else str(memory)
397
- )
398
- personal_memory_context += f"{memory_id}: {memory_content}\n"
399
- else:
400
- memory_id = (
401
- f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
402
- )
403
- memory_content = (
404
- memory.memory[:500] if hasattr(memory, "memory") else str(memory)
405
- )
406
- memory_content = memory_content.replace("\n", " ")
407
- outer_memory_context += f"{memory_id}: {memory_content}\n"
408
- return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
409
- return MEMOS_PRODUCT_ENHANCE_PROMPT
437
+ now = datetime.now()
438
+ formatted_date = now.strftime("%Y-%m-%d (%A)")
439
+ sys_body = get_memos_prompt(
440
+ date=formatted_date, tone=tone, verbosity=verbosity, mode="enhance"
441
+ )
442
+ mem_block_o, mem_block_p = _format_mem_block(memories_all)
443
+ return (
444
+ sys_body
445
+ + "\n\n# Memories\n## PersonalMemory (ordered)\n"
446
+ + mem_block_p
447
+ + "\n## OuterMemory (ordered)\n"
448
+ + mem_block_o
449
+ )
410
450
 
411
451
  def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
412
452
  """
@@ -495,13 +535,208 @@ class MOSProduct(MOSCore):
495
535
  )
496
536
  self.mem_scheduler.submit_messages(messages=[message_item])
497
537
 
538
+ async def _post_chat_processing(
539
+ self,
540
+ user_id: str,
541
+ cube_id: str,
542
+ query: str,
543
+ full_response: str,
544
+ system_prompt: str,
545
+ time_start: float,
546
+ time_end: float,
547
+ speed_improvement: float,
548
+ current_messages: list,
549
+ ) -> None:
550
+ """
551
+ Asynchronous processing of logs, notifications and memory additions
552
+ """
553
+ try:
554
+ logger.info(
555
+ f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}"
556
+ )
557
+ logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
558
+
559
+ clean_response, extracted_references = self._extract_references_from_response(
560
+ full_response
561
+ )
562
+ logger.info(f"Extracted {len(extracted_references)} references from response")
563
+
564
+ # Send chat report notifications asynchronously
565
+ if self.online_bot:
566
+ try:
567
+ from memos.memos_tools.notification_utils import (
568
+ send_online_bot_notification_async,
569
+ )
570
+
571
+ # Prepare notification data
572
+ chat_data = {
573
+ "query": query,
574
+ "user_id": user_id,
575
+ "cube_id": cube_id,
576
+ "system_prompt": system_prompt,
577
+ "full_response": full_response,
578
+ }
579
+
580
+ system_data = {
581
+ "references": extracted_references,
582
+ "time_start": time_start,
583
+ "time_end": time_end,
584
+ "speed_improvement": speed_improvement,
585
+ }
586
+
587
+ emoji_config = {"chat": "💬", "system_info": "📊"}
588
+
589
+ await send_online_bot_notification_async(
590
+ online_bot=self.online_bot,
591
+ header_name="MemOS Chat Report",
592
+ sub_title_name="chat_with_references",
593
+ title_color="#00956D",
594
+ other_data1=chat_data,
595
+ other_data2=system_data,
596
+ emoji=emoji_config,
597
+ )
598
+ except Exception as e:
599
+ logger.warning(f"Failed to send chat notification (async): {e}")
600
+
601
+ self._send_message_to_scheduler(
602
+ user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
603
+ )
604
+
605
+ self.add(
606
+ user_id=user_id,
607
+ messages=[
608
+ {
609
+ "role": "user",
610
+ "content": query,
611
+ "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
612
+ },
613
+ {
614
+ "role": "assistant",
615
+ "content": clean_response, # Store clean text without reference markers
616
+ "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
617
+ },
618
+ ],
619
+ mem_cube_id=cube_id,
620
+ )
621
+
622
+ logger.info(f"Post-chat processing completed for user {user_id}")
623
+
624
+ except Exception as e:
625
+ logger.error(f"Error in post-chat processing for user {user_id}: {e}", exc_info=True)
626
+
627
+ def _start_post_chat_processing(
628
+ self,
629
+ user_id: str,
630
+ cube_id: str,
631
+ query: str,
632
+ full_response: str,
633
+ system_prompt: str,
634
+ time_start: float,
635
+ time_end: float,
636
+ speed_improvement: float,
637
+ current_messages: list,
638
+ ) -> None:
639
+ """
640
+ Asynchronous processing of logs, notifications and memory additions, handle synchronous and asynchronous environments
641
+ """
642
+
643
+ def run_async_in_thread():
644
+ """Running asynchronous tasks in a new thread"""
645
+ try:
646
+ loop = asyncio.new_event_loop()
647
+ asyncio.set_event_loop(loop)
648
+ try:
649
+ loop.run_until_complete(
650
+ self._post_chat_processing(
651
+ user_id=user_id,
652
+ cube_id=cube_id,
653
+ query=query,
654
+ full_response=full_response,
655
+ system_prompt=system_prompt,
656
+ time_start=time_start,
657
+ time_end=time_end,
658
+ speed_improvement=speed_improvement,
659
+ current_messages=current_messages,
660
+ )
661
+ )
662
+ finally:
663
+ loop.close()
664
+ except Exception as e:
665
+ logger.error(
666
+ f"Error in thread-based post-chat processing for user {user_id}: {e}",
667
+ exc_info=True,
668
+ )
669
+
670
+ try:
671
+ # Try to get the current event loop
672
+ asyncio.get_running_loop()
673
+ # Create task and store reference to prevent garbage collection
674
+ task = asyncio.create_task(
675
+ self._post_chat_processing(
676
+ user_id=user_id,
677
+ cube_id=cube_id,
678
+ query=query,
679
+ full_response=full_response,
680
+ system_prompt=system_prompt,
681
+ time_start=time_start,
682
+ time_end=time_end,
683
+ speed_improvement=speed_improvement,
684
+ current_messages=current_messages,
685
+ )
686
+ )
687
+ # Add exception handling for the background task
688
+ task.add_done_callback(
689
+ lambda t: logger.error(
690
+ f"Error in background post-chat processing for user {user_id}: {t.exception()}",
691
+ exc_info=True,
692
+ )
693
+ if t.exception()
694
+ else None
695
+ )
696
+ except RuntimeError:
697
+ # No event loop, run in a new thread
698
+ thread = threading.Thread(
699
+ target=run_async_in_thread,
700
+ name=f"PostChatProcessing-{user_id}",
701
+ # Set as a daemon thread to avoid blocking program exit
702
+ daemon=True,
703
+ )
704
+ thread.start()
705
+
498
706
  def _filter_memories_by_threshold(
499
- self, memories: list[TextualMemoryItem], threshold: float = 0.20
707
+ self,
708
+ memories: list[TextualMemoryItem],
709
+ threshold: float = 0.30,
710
+ min_num: int = 3,
711
+ memory_type: Literal["OuterMemory"] = "OuterMemory",
500
712
  ) -> list[TextualMemoryItem]:
501
713
  """
502
- Filter memories by threshold.
714
+ Filter memories by threshold and type, at least min_num memories for Non-OuterMemory.
715
+ Args:
716
+ memories: list[TextualMemoryItem],
717
+ threshold: float,
718
+ min_num: int,
719
+ memory_type: Literal["OuterMemory"],
720
+ Returns:
721
+ list[TextualMemoryItem]
503
722
  """
504
- return [memory for memory in memories if memory.metadata.relativity >= threshold]
723
+ sorted_memories = sorted(memories, key=lambda m: m.metadata.relativity, reverse=True)
724
+ filtered_person = [m for m in memories if m.metadata.memory_type != memory_type]
725
+ filtered_outer = [m for m in memories if m.metadata.memory_type == memory_type]
726
+ filtered = []
727
+ per_memory_count = 0
728
+ for m in sorted_memories:
729
+ if m.metadata.relativity >= threshold:
730
+ if m.metadata.memory_type != memory_type:
731
+ per_memory_count += 1
732
+ filtered.append(m)
733
+ if len(filtered) < min_num:
734
+ filtered = filtered_person[:min_num] + filtered_outer[:min_num]
735
+ else:
736
+ if per_memory_count < min_num:
737
+ filtered += filtered_person[per_memory_count:min_num]
738
+ filtered_memory = sorted(filtered, key=lambda m: m.metadata.relativity, reverse=True)
739
+ return filtered_memory
505
740
 
506
741
  def register_mem_cube(
507
742
  self,
@@ -599,7 +834,7 @@ class MOSProduct(MOSCore):
599
834
 
600
835
  # Create a default cube for the user using MOSCore's methods
601
836
  default_cube_name = f"{user_name}_{user_id}_default_cube"
602
- mem_cube_name_or_path = f"{CUBE_PATH}/{default_cube_name}"
837
+ mem_cube_name_or_path = os.path.join(CUBE_PATH, default_cube_name)
603
838
  default_cube_id = self.create_cube_for_user(
604
839
  cube_name=default_cube_name, owner_id=user_id, cube_path=mem_cube_name_or_path
605
840
  )
@@ -633,7 +868,23 @@ class MOSProduct(MOSCore):
633
868
  except Exception as e:
634
869
  return {"status": "error", "message": f"Failed to register user: {e!s}"}
635
870
 
636
- def get_suggestion_query(self, user_id: str, language: str = "zh") -> list[str]:
871
+ def _get_further_suggestion(self, message: MessageList | None = None) -> list[str]:
872
+ """Get further suggestion prompt."""
873
+ try:
874
+ dialogue_info = "\n".join([f"{msg['role']}: {msg['content']}" for msg in message[-2:]])
875
+ further_suggestion_prompt = FURTHER_SUGGESTION_PROMPT.format(dialogue=dialogue_info)
876
+ message_list = [{"role": "system", "content": further_suggestion_prompt}]
877
+ response = self.chat_llm.generate(message_list)
878
+ clean_response = clean_json_response(response)
879
+ response_json = json.loads(clean_response)
880
+ return response_json["query"]
881
+ except Exception as e:
882
+ logger.error(f"Error getting further suggestion: {e}", exc_info=True)
883
+ return []
884
+
885
+ def get_suggestion_query(
886
+ self, user_id: str, language: str = "zh", message: MessageList | None = None
887
+ ) -> list[str]:
637
888
  """Get suggestion query from LLM.
638
889
  Args:
639
890
  user_id (str): User ID.
@@ -642,37 +893,13 @@ class MOSProduct(MOSCore):
642
893
  Returns:
643
894
  list[str]: The suggestion query list.
644
895
  """
645
-
896
+ if message:
897
+ further_suggestion = self._get_further_suggestion(message)
898
+ return further_suggestion
646
899
  if language == "zh":
647
- suggestion_prompt = """
648
- 你是一个有用的助手,可以帮助用户生成建议查询。
649
- 我将获取用户最近的一些记忆,
650
- 你应该生成一些建议查询,这些查询应该是用户想要查询的内容,
651
- 用户最近的记忆是:
652
- {memories}
653
- 请生成3个建议查询用中文,
654
- 输出应该是json格式,键是"query",值是一个建议查询列表。
655
-
656
- 示例:
657
- {{
658
- "query": ["查询1", "查询2", "查询3"]
659
- }}
660
- """
900
+ suggestion_prompt = SUGGESTION_QUERY_PROMPT_ZH
661
901
  else: # English
662
- suggestion_prompt = """
663
- You are a helpful assistant that can help users to generate suggestion query.
664
- I will get some user recently memories,
665
- you should generate some suggestion query, the query should be user what to query,
666
- user recently memories is:
667
- {memories}
668
- if the user recently memories is empty, please generate 3 suggestion query in English,
669
- output should be a json format, the key is "query", the value is a list of suggestion query.
670
-
671
- example:
672
- {{
673
- "query": ["query1", "query2", "query3"]
674
- }}
675
- """
902
+ suggestion_prompt = SUGGESTION_QUERY_PROMPT_EN
676
903
  text_mem_result = super().search("my recently memories", user_id=user_id, top_k=3)[
677
904
  "text_mem"
678
905
  ]
@@ -686,14 +913,75 @@ class MOSProduct(MOSCore):
686
913
  response_json = json.loads(clean_response)
687
914
  return response_json["query"]
688
915
 
689
- def chat_with_references(
916
+ def chat(
690
917
  self,
691
918
  query: str,
692
919
  user_id: str,
693
920
  cube_id: str | None = None,
694
921
  history: MessageList | None = None,
922
+ base_prompt: str | None = None,
923
+ internet_search: bool = False,
924
+ moscube: bool = False,
695
925
  top_k: int = 10,
926
+ threshold: float = 0.5,
927
+ ) -> str:
928
+ """
929
+ Chat with LLM with memory references and complete response.
930
+ """
931
+ self._load_user_cubes(user_id, self.default_cube_config)
932
+ time_start = time.time()
933
+ memories_result = super().search(
934
+ query,
935
+ user_id,
936
+ install_cube_ids=[cube_id] if cube_id else None,
937
+ top_k=top_k,
938
+ mode="fine",
939
+ internet_search=internet_search,
940
+ moscube=moscube,
941
+ )["text_mem"]
942
+
943
+ memories_list = []
944
+ if memories_result:
945
+ memories_list = memories_result[0]["memories"]
946
+ memories_list = self._filter_memories_by_threshold(memories_list, threshold)
947
+ new_memories_list = []
948
+ for m in memories_list:
949
+ m.metadata.embedding = []
950
+ new_memories_list.append(m)
951
+ memories_list = new_memories_list
952
+ system_prompt = super()._build_system_prompt(memories_list, base_prompt)
953
+ history_info = []
954
+ if history:
955
+ history_info = history[-20:]
956
+ current_messages = [
957
+ {"role": "system", "content": system_prompt},
958
+ *history_info,
959
+ {"role": "user", "content": query},
960
+ ]
961
+ response = self.chat_llm.generate(current_messages)
962
+ time_end = time.time()
963
+ self._start_post_chat_processing(
964
+ user_id=user_id,
965
+ cube_id=cube_id,
966
+ query=query,
967
+ full_response=response,
968
+ system_prompt=system_prompt,
969
+ time_start=time_start,
970
+ time_end=time_end,
971
+ speed_improvement=0.0,
972
+ current_messages=current_messages,
973
+ )
974
+ return response, memories_list
975
+
976
+ def chat_with_references(
977
+ self,
978
+ query: str,
979
+ user_id: str,
980
+ cube_id: str | None = None,
981
+ history: MessageList | None = None,
982
+ top_k: int = 20,
696
983
  internet_search: bool = False,
984
+ moscube: bool = False,
697
985
  ) -> Generator[str, None, None]:
698
986
  """
699
987
  Chat with LLM with memory references and streaming output.
@@ -719,7 +1007,9 @@ class MOSProduct(MOSCore):
719
1007
  top_k=top_k,
720
1008
  mode="fine",
721
1009
  internet_search=internet_search,
1010
+ moscube=moscube,
722
1011
  )["text_mem"]
1012
+
723
1013
  yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
724
1014
  search_time_end = time.time()
725
1015
  logger.info(
@@ -731,6 +1021,9 @@ class MOSProduct(MOSCore):
731
1021
  if memories_result:
732
1022
  memories_list = memories_result[0]["memories"]
733
1023
  memories_list = self._filter_memories_by_threshold(memories_list)
1024
+
1025
+ reference = prepare_reference_data(memories_list)
1026
+ yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
734
1027
  # Build custom system prompt with relevant memories)
735
1028
  system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
736
1029
  # Get chat history
@@ -739,7 +1032,7 @@ class MOSProduct(MOSCore):
739
1032
 
740
1033
  chat_history = self.chat_history_manager[user_id]
741
1034
  if history:
742
- chat_history.chat_history = history[-10:]
1035
+ chat_history.chat_history = history[-20:]
743
1036
  current_messages = [
744
1037
  {"role": "system", "content": system_prompt},
745
1038
  *chat_history.chat_history,
@@ -772,7 +1065,7 @@ class MOSProduct(MOSCore):
772
1065
  elif self.config.chat_model.backend == "vllm":
773
1066
  response_stream = self.chat_llm.generate_stream(current_messages)
774
1067
  else:
775
- if self.config.chat_model.backend in ["huggingface", "vllm"]:
1068
+ if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
776
1069
  response_stream = self.chat_llm.generate_stream(current_messages)
777
1070
  else:
778
1071
  response_stream = self.chat_llm.generate(current_messages)
@@ -789,7 +1082,7 @@ class MOSProduct(MOSCore):
789
1082
  full_response = ""
790
1083
  token_count = 0
791
1084
  # Use tiktoken for proper token-based chunking
792
- if self.config.chat_model.backend not in ["huggingface", "vllm"]:
1085
+ if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
793
1086
  # For non-huggingface backends, we need to collect the full response first
794
1087
  full_response_text = ""
795
1088
  for chunk in response_stream:
@@ -819,83 +1112,29 @@ class MOSProduct(MOSCore):
819
1112
  chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
820
1113
  yield chunk_data
821
1114
 
822
- # Prepare reference data
823
- reference = []
824
- for memories in memories_list:
825
- memories_json = memories.model_dump()
826
- memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
827
- memories_json["metadata"]["embedding"] = []
828
- memories_json["metadata"]["sources"] = []
829
- memories_json["metadata"]["memory"] = memories.memory
830
- memories_json["metadata"]["id"] = memories.id
831
- reference.append({"metadata": memories_json["metadata"]})
832
-
833
- yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
834
1115
  # set kvcache improve speed
835
1116
  speed_improvement = round(float((len(system_prompt) / 2) * 0.0048 + 44.5), 1)
836
1117
  total_time = round(float(time_end - time_start), 1)
837
1118
 
838
1119
  yield f"data: {json.dumps({'type': 'time', 'data': {'total_time': total_time, 'speed_improvement': f'{speed_improvement}%'}})}\n\n"
1120
+ # get further suggestion
1121
+ current_messages.append({"role": "assistant", "content": full_response})
1122
+ further_suggestion = self._get_further_suggestion(current_messages)
1123
+ logger.info(f"further_suggestion: {further_suggestion}")
1124
+ yield f"data: {json.dumps({'type': 'suggestion', 'data': further_suggestion})}\n\n"
839
1125
  yield f"data: {json.dumps({'type': 'end'})}\n\n"
840
1126
 
841
- logger.info(f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}")
842
- logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
843
-
844
- clean_response, extracted_references = self._extract_references_from_response(full_response)
845
- logger.info(f"Extracted {len(extracted_references)} references from response")
846
-
847
- # Send chat report if online_bot is available
848
- try:
849
- from memos.memos_tools.notification_utils import send_online_bot_notification
850
-
851
- # Prepare data for online_bot
852
- chat_data = {
853
- "query": query,
854
- "user_id": user_id,
855
- "cube_id": cube_id,
856
- "system_prompt": system_prompt,
857
- "full_response": full_response,
858
- }
859
-
860
- system_data = {
861
- "references": extracted_references,
862
- "time_start": time_start,
863
- "time_end": time_end,
864
- "speed_improvement": speed_improvement,
865
- }
866
-
867
- emoji_config = {"chat": "💬", "system_info": "📊"}
868
-
869
- send_online_bot_notification(
870
- online_bot=self.online_bot,
871
- header_name="MemOS Chat Report",
872
- sub_title_name="chat_with_references",
873
- title_color="#00956D",
874
- other_data1=chat_data,
875
- other_data2=system_data,
876
- emoji=emoji_config,
877
- )
878
- except Exception as e:
879
- logger.warning(f"Failed to send chat notification: {e}")
880
-
881
- self._send_message_to_scheduler(
882
- user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
883
- )
884
- self.add(
1127
+ # Asynchronous processing of logs, notifications and memory additions
1128
+ self._start_post_chat_processing(
885
1129
  user_id=user_id,
886
- messages=[
887
- {
888
- "role": "user",
889
- "content": query,
890
- "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
891
- },
892
- {
893
- "role": "assistant",
894
- "content": clean_response, # Store clean text without reference markers
895
- "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
896
- },
897
- ],
898
- mem_cube_id=cube_id,
1130
+ cube_id=cube_id,
1131
+ query=query,
1132
+ full_response=full_response,
1133
+ system_prompt=system_prompt,
1134
+ time_start=time_start,
1135
+ time_end=time_end,
1136
+ speed_improvement=speed_improvement,
1137
+ current_messages=current_messages,
899
1138
  )
900
1139
 
901
1140
  def get_all(
@@ -1086,6 +1325,7 @@ class MOSProduct(MOSCore):
1086
1325
  memories["metadata"]["memory"] = memories["memory"]
1087
1326
  memories_list.append(memories)
1088
1327
  reformat_memory_list.append({"cube_id": memory["cube_id"], "memories": memories_list})
1328
+ logger.info(f"search memory list is : {reformat_memory_list}")
1089
1329
  search_result["text_mem"] = reformat_memory_list
1090
1330
  time_end = time.time()
1091
1331
  logger.info(