MemoryOS 0.2.2__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (82) hide show
  1. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/METADATA +7 -1
  2. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/RECORD +81 -66
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +31 -8
  5. memos/api/context/context.py +1 -1
  6. memos/api/context/context_thread.py +96 -0
  7. memos/api/middleware/request_context.py +94 -0
  8. memos/api/product_api.py +5 -1
  9. memos/api/product_models.py +16 -0
  10. memos/api/routers/product_router.py +39 -3
  11. memos/api/start_api.py +3 -0
  12. memos/configs/internet_retriever.py +13 -0
  13. memos/configs/mem_scheduler.py +38 -16
  14. memos/configs/memory.py +13 -0
  15. memos/configs/reranker.py +18 -0
  16. memos/graph_dbs/base.py +33 -4
  17. memos/graph_dbs/nebular.py +631 -236
  18. memos/graph_dbs/neo4j.py +18 -7
  19. memos/graph_dbs/neo4j_community.py +6 -3
  20. memos/llms/vllm.py +2 -0
  21. memos/log.py +125 -8
  22. memos/mem_os/core.py +49 -11
  23. memos/mem_os/main.py +1 -1
  24. memos/mem_os/product.py +392 -215
  25. memos/mem_os/utils/default_config.py +1 -1
  26. memos/mem_os/utils/format_utils.py +11 -47
  27. memos/mem_os/utils/reference_utils.py +153 -0
  28. memos/mem_reader/simple_struct.py +112 -43
  29. memos/mem_scheduler/base_scheduler.py +58 -55
  30. memos/mem_scheduler/{modules → general_modules}/base.py +1 -2
  31. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +54 -15
  32. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +4 -4
  33. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  34. memos/mem_scheduler/{modules → general_modules}/retriever.py +19 -5
  35. memos/mem_scheduler/{modules → general_modules}/scheduler_logger.py +10 -4
  36. memos/mem_scheduler/general_scheduler.py +110 -67
  37. memos/mem_scheduler/monitors/__init__.py +0 -0
  38. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  39. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +57 -19
  40. memos/mem_scheduler/mos_for_test_scheduler.py +7 -1
  41. memos/mem_scheduler/schemas/general_schemas.py +3 -2
  42. memos/mem_scheduler/schemas/message_schemas.py +2 -1
  43. memos/mem_scheduler/schemas/monitor_schemas.py +10 -2
  44. memos/mem_scheduler/utils/misc_utils.py +43 -2
  45. memos/mem_user/mysql_user_manager.py +4 -2
  46. memos/memories/activation/item.py +1 -1
  47. memos/memories/activation/kv.py +20 -8
  48. memos/memories/textual/base.py +1 -1
  49. memos/memories/textual/general.py +1 -1
  50. memos/memories/textual/item.py +1 -1
  51. memos/memories/textual/tree.py +31 -1
  52. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +30 -48
  53. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  54. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +2 -0
  55. memos/memories/textual/tree_text_memory/organize/reorganizer.py +102 -140
  56. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +231 -0
  57. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +9 -0
  58. memos/memories/textual/tree_text_memory/retrieve/recall.py +67 -10
  59. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  60. memos/memories/textual/tree_text_memory/retrieve/searcher.py +246 -134
  61. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +7 -2
  62. memos/memories/textual/tree_text_memory/retrieve/utils.py +7 -5
  63. memos/memos_tools/lockfree_dict.py +120 -0
  64. memos/memos_tools/notification_utils.py +46 -0
  65. memos/memos_tools/thread_safe_dict.py +288 -0
  66. memos/reranker/__init__.py +4 -0
  67. memos/reranker/base.py +24 -0
  68. memos/reranker/cosine_local.py +95 -0
  69. memos/reranker/factory.py +43 -0
  70. memos/reranker/http_bge.py +99 -0
  71. memos/reranker/noop.py +16 -0
  72. memos/templates/mem_reader_prompts.py +290 -39
  73. memos/templates/mem_scheduler_prompts.py +23 -10
  74. memos/templates/mos_prompts.py +133 -31
  75. memos/templates/tree_reorganize_prompts.py +24 -17
  76. memos/utils.py +19 -0
  77. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  78. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/LICENSE +0 -0
  79. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/WHEEL +0 -0
  80. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/entry_points.txt +0 -0
  81. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
  82. /memos/mem_scheduler/{modules → general_modules}/misc.py +0 -0
memos/mem_os/product.py CHANGED
@@ -1,6 +1,8 @@
1
+ import asyncio
1
2
  import json
2
3
  import os
3
4
  import random
5
+ import threading
4
6
  import time
5
7
 
6
8
  from collections.abc import Generator
@@ -22,7 +24,10 @@ from memos.mem_os.utils.format_utils import (
22
24
  filter_nodes_by_tree_ids,
23
25
  remove_embedding_recursive,
24
26
  sort_children_by_memory_type,
25
- split_continuous_references,
27
+ )
28
+ from memos.mem_os.utils.reference_utils import (
29
+ prepare_reference_data,
30
+ process_streaming_references_complete,
26
31
  )
27
32
  from memos.mem_scheduler.schemas.general_schemas import (
28
33
  ANSWER_LABEL,
@@ -34,7 +39,12 @@ from memos.mem_user.user_manager import UserRole
34
39
  from memos.memories.textual.item import (
35
40
  TextualMemoryItem,
36
41
  )
37
- from memos.templates.mos_prompts import MEMOS_PRODUCT_BASE_PROMPT, MEMOS_PRODUCT_ENHANCE_PROMPT
42
+ from memos.templates.mos_prompts import (
43
+ FURTHER_SUGGESTION_PROMPT,
44
+ SUGGESTION_QUERY_PROMPT_EN,
45
+ SUGGESTION_QUERY_PROMPT_ZH,
46
+ get_memos_prompt,
47
+ )
38
48
  from memos.types import MessageList
39
49
 
40
50
 
@@ -45,6 +55,39 @@ load_dotenv()
45
55
  CUBE_PATH = os.getenv("MOS_CUBE_PATH", "/tmp/data/")
46
56
 
47
57
 
58
+ def _short_id(mem_id: str) -> str:
59
+ return (mem_id or "").split("-")[0] if mem_id else ""
60
+
61
+
62
+ def _format_mem_block(memories_all, max_items: int = 20, max_chars_each: int = 320) -> str:
63
+ """
64
+ Modify TextualMemoryItem Format:
65
+ 1:abcd :: [P] text...
66
+ 2:ef01 :: [O] text...
67
+ sequence is [i:memId] i; [P]=PersonalMemory / [O]=OuterMemory
68
+ """
69
+ if not memories_all:
70
+ return "(none)", "(none)"
71
+
72
+ lines_o = []
73
+ lines_p = []
74
+ for idx, m in enumerate(memories_all[:max_items], 1):
75
+ mid = _short_id(getattr(m, "id", "") or "")
76
+ mtype = getattr(getattr(m, "metadata", {}), "memory_type", None) or getattr(
77
+ m, "metadata", {}
78
+ ).get("memory_type", "")
79
+ tag = "O" if "Outer" in str(mtype) else "P"
80
+ txt = (getattr(m, "memory", "") or "").replace("\n", " ").strip()
81
+ if len(txt) > max_chars_each:
82
+ txt = txt[: max_chars_each - 1] + "…"
83
+ mid = mid or f"mem_{idx}"
84
+ if tag == "O":
85
+ lines_o.append(f"[{idx}:{mid}] :: [{tag}] {txt}\n")
86
+ elif tag == "P":
87
+ lines_p.append(f"[{idx}:{mid}] :: [{tag}] {txt}")
88
+ return "\n".join(lines_o), "\n".join(lines_p)
89
+
90
+
48
91
  class MOSProduct(MOSCore):
49
92
  """
50
93
  The MOSProduct class inherits from MOSCore and manages multiple users.
@@ -348,7 +391,11 @@ class MOSProduct(MOSCore):
348
391
  return self._create_user_config(user_id, user_config)
349
392
 
350
393
  def _build_system_prompt(
351
- self, memories_all: list[TextualMemoryItem], base_prompt: str | None = None
394
+ self,
395
+ memories_all: list[TextualMemoryItem],
396
+ base_prompt: str | None = None,
397
+ tone: str = "friendly",
398
+ verbosity: str = "mid",
352
399
  ) -> str:
353
400
  """
354
401
  Build custom system prompt for the user with memory references.
@@ -360,116 +407,46 @@ class MOSProduct(MOSCore):
360
407
  Returns:
361
408
  str: The custom system prompt.
362
409
  """
363
-
364
410
  # Build base prompt
365
411
  # Add memory context if available
366
- if memories_all:
367
- memory_context = "\n\n## Available ID Memories:\n"
368
- for i, memory in enumerate(memories_all, 1):
369
- # Format: [memory_id]: memory_content
370
- memory_id = f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
371
- memory_content = memory.memory[:500] if hasattr(memory, "memory") else str(memory)
372
- memory_content = memory_content.replace("\n", " ")
373
- memory_context += f"{memory_id}: {memory_content}\n"
374
- return MEMOS_PRODUCT_BASE_PROMPT + memory_context
375
-
376
- return MEMOS_PRODUCT_BASE_PROMPT
412
+ now = datetime.now()
413
+ formatted_date = now.strftime("%Y-%m-%d (%A)")
414
+ sys_body = get_memos_prompt(
415
+ date=formatted_date, tone=tone, verbosity=verbosity, mode="base"
416
+ )
417
+ mem_block_o, mem_block_p = _format_mem_block(memories_all)
418
+ mem_block = mem_block_o + "\n" + mem_block_p
419
+ prefix = (base_prompt.strip() + "\n\n") if base_prompt else ""
420
+ return (
421
+ prefix
422
+ + sys_body
423
+ + "\n\n# Memories\n## PersonalMemory & OuterMemory (ordered)\n"
424
+ + mem_block
425
+ )
377
426
 
378
427
  def _build_enhance_system_prompt(
379
- self, user_id: str, memories_all: list[TextualMemoryItem]
428
+ self,
429
+ user_id: str,
430
+ memories_all: list[TextualMemoryItem],
431
+ tone: str = "friendly",
432
+ verbosity: str = "mid",
380
433
  ) -> str:
381
434
  """
382
435
  Build enhance prompt for the user with memory references.
383
436
  """
384
- if memories_all:
385
- personal_memory_context = "\n\n## Available ID and PersonalMemory Memories:\n"
386
- outer_memory_context = "\n\n## Available ID and OuterMemory Memories:\n"
387
- for i, memory in enumerate(memories_all, 1):
388
- # Format: [memory_id]: memory_content
389
- if memory.metadata.memory_type != "OuterMemory":
390
- memory_id = (
391
- f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
392
- )
393
- memory_content = (
394
- memory.memory[:500] if hasattr(memory, "memory") else str(memory)
395
- )
396
- personal_memory_context += f"{memory_id}: {memory_content}\n"
397
- else:
398
- memory_id = (
399
- f"{memory.id.split('-')[0]}" if hasattr(memory, "id") else f"mem_{i}"
400
- )
401
- memory_content = (
402
- memory.memory[:500] if hasattr(memory, "memory") else str(memory)
403
- )
404
- memory_content = memory_content.replace("\n", " ")
405
- outer_memory_context += f"{memory_id}: {memory_content}\n"
406
- return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
407
- return MEMOS_PRODUCT_ENHANCE_PROMPT
408
-
409
- def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
410
- """
411
- Complete streaming reference processing to ensure reference tags are never split.
412
-
413
- Args:
414
- text_buffer (str): The accumulated text buffer.
415
-
416
- Returns:
417
- tuple[str, str]: (processed_text, remaining_buffer)
418
- """
419
- import re
420
-
421
- # Pattern to match complete reference tags: [refid:memoriesID]
422
- complete_pattern = r"\[\d+:[^\]]+\]"
423
-
424
- # Find all complete reference tags
425
- complete_matches = list(re.finditer(complete_pattern, text_buffer))
426
-
427
- if complete_matches:
428
- # Find the last complete tag
429
- last_match = complete_matches[-1]
430
- end_pos = last_match.end()
431
-
432
- # Get text up to the end of the last complete tag
433
- processed_text = text_buffer[:end_pos]
434
- remaining_buffer = text_buffer[end_pos:]
435
-
436
- # Apply reference splitting to the processed text
437
- processed_text = split_continuous_references(processed_text)
438
-
439
- return processed_text, remaining_buffer
440
-
441
- # Check for incomplete reference tags
442
- # Look for opening bracket with number and colon
443
- opening_pattern = r"\[\d+:"
444
- opening_matches = list(re.finditer(opening_pattern, text_buffer))
445
-
446
- if opening_matches:
447
- # Find the last opening tag
448
- last_opening = opening_matches[-1]
449
- opening_start = last_opening.start()
450
-
451
- # Check if we have a complete opening pattern
452
- if last_opening.end() <= len(text_buffer):
453
- # We have a complete opening pattern, keep everything in buffer
454
- return "", text_buffer
455
- else:
456
- # Incomplete opening pattern, return text before it
457
- processed_text = text_buffer[:opening_start]
458
- # Apply reference splitting to the processed text
459
- processed_text = split_continuous_references(processed_text)
460
- return processed_text, text_buffer[opening_start:]
461
-
462
- # Check for partial opening pattern (starts with [ but not complete)
463
- if "[" in text_buffer:
464
- ref_start = text_buffer.find("[")
465
- processed_text = text_buffer[:ref_start]
466
- # Apply reference splitting to the processed text
467
- processed_text = split_continuous_references(processed_text)
468
- return processed_text, text_buffer[ref_start:]
469
-
470
- # No reference tags found, apply reference splitting and return all text
471
- processed_text = split_continuous_references(text_buffer)
472
- return processed_text, ""
437
+ now = datetime.now()
438
+ formatted_date = now.strftime("%Y-%m-%d (%A)")
439
+ sys_body = get_memos_prompt(
440
+ date=formatted_date, tone=tone, verbosity=verbosity, mode="enhance"
441
+ )
442
+ mem_block_o, mem_block_p = _format_mem_block(memories_all)
443
+ return (
444
+ sys_body
445
+ + "\n\n# Memories\n## PersonalMemory (ordered)\n"
446
+ + mem_block_p
447
+ + "\n## OuterMemory (ordered)\n"
448
+ + mem_block_o
449
+ )
473
450
 
474
451
  def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
475
452
  """
@@ -554,17 +531,212 @@ class MOSProduct(MOSCore):
554
531
  mem_cube=self.mem_cubes[mem_cube_id],
555
532
  label=label,
556
533
  content=query,
557
- timestamp=datetime.now(),
534
+ timestamp=datetime.utcnow(),
558
535
  )
559
536
  self.mem_scheduler.submit_messages(messages=[message_item])
560
537
 
538
+ async def _post_chat_processing(
539
+ self,
540
+ user_id: str,
541
+ cube_id: str,
542
+ query: str,
543
+ full_response: str,
544
+ system_prompt: str,
545
+ time_start: float,
546
+ time_end: float,
547
+ speed_improvement: float,
548
+ current_messages: list,
549
+ ) -> None:
550
+ """
551
+ Asynchronous processing of logs, notifications and memory additions
552
+ """
553
+ try:
554
+ logger.info(
555
+ f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}"
556
+ )
557
+ logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
558
+
559
+ clean_response, extracted_references = self._extract_references_from_response(
560
+ full_response
561
+ )
562
+ logger.info(f"Extracted {len(extracted_references)} references from response")
563
+
564
+ # Send chat report notifications asynchronously
565
+ if self.online_bot:
566
+ try:
567
+ from memos.memos_tools.notification_utils import (
568
+ send_online_bot_notification_async,
569
+ )
570
+
571
+ # Prepare notification data
572
+ chat_data = {
573
+ "query": query,
574
+ "user_id": user_id,
575
+ "cube_id": cube_id,
576
+ "system_prompt": system_prompt,
577
+ "full_response": full_response,
578
+ }
579
+
580
+ system_data = {
581
+ "references": extracted_references,
582
+ "time_start": time_start,
583
+ "time_end": time_end,
584
+ "speed_improvement": speed_improvement,
585
+ }
586
+
587
+ emoji_config = {"chat": "💬", "system_info": "📊"}
588
+
589
+ await send_online_bot_notification_async(
590
+ online_bot=self.online_bot,
591
+ header_name="MemOS Chat Report",
592
+ sub_title_name="chat_with_references",
593
+ title_color="#00956D",
594
+ other_data1=chat_data,
595
+ other_data2=system_data,
596
+ emoji=emoji_config,
597
+ )
598
+ except Exception as e:
599
+ logger.warning(f"Failed to send chat notification (async): {e}")
600
+
601
+ self._send_message_to_scheduler(
602
+ user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
603
+ )
604
+
605
+ self.add(
606
+ user_id=user_id,
607
+ messages=[
608
+ {
609
+ "role": "user",
610
+ "content": query,
611
+ "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
612
+ },
613
+ {
614
+ "role": "assistant",
615
+ "content": clean_response, # Store clean text without reference markers
616
+ "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
617
+ },
618
+ ],
619
+ mem_cube_id=cube_id,
620
+ )
621
+
622
+ logger.info(f"Post-chat processing completed for user {user_id}")
623
+
624
+ except Exception as e:
625
+ logger.error(f"Error in post-chat processing for user {user_id}: {e}", exc_info=True)
626
+
627
+ def _start_post_chat_processing(
628
+ self,
629
+ user_id: str,
630
+ cube_id: str,
631
+ query: str,
632
+ full_response: str,
633
+ system_prompt: str,
634
+ time_start: float,
635
+ time_end: float,
636
+ speed_improvement: float,
637
+ current_messages: list,
638
+ ) -> None:
639
+ """
640
+ Asynchronous processing of logs, notifications and memory additions, handle synchronous and asynchronous environments
641
+ """
642
+
643
+ def run_async_in_thread():
644
+ """Running asynchronous tasks in a new thread"""
645
+ try:
646
+ loop = asyncio.new_event_loop()
647
+ asyncio.set_event_loop(loop)
648
+ try:
649
+ loop.run_until_complete(
650
+ self._post_chat_processing(
651
+ user_id=user_id,
652
+ cube_id=cube_id,
653
+ query=query,
654
+ full_response=full_response,
655
+ system_prompt=system_prompt,
656
+ time_start=time_start,
657
+ time_end=time_end,
658
+ speed_improvement=speed_improvement,
659
+ current_messages=current_messages,
660
+ )
661
+ )
662
+ finally:
663
+ loop.close()
664
+ except Exception as e:
665
+ logger.error(
666
+ f"Error in thread-based post-chat processing for user {user_id}: {e}",
667
+ exc_info=True,
668
+ )
669
+
670
+ try:
671
+ # Try to get the current event loop
672
+ asyncio.get_running_loop()
673
+ # Create task and store reference to prevent garbage collection
674
+ task = asyncio.create_task(
675
+ self._post_chat_processing(
676
+ user_id=user_id,
677
+ cube_id=cube_id,
678
+ query=query,
679
+ full_response=full_response,
680
+ system_prompt=system_prompt,
681
+ time_start=time_start,
682
+ time_end=time_end,
683
+ speed_improvement=speed_improvement,
684
+ current_messages=current_messages,
685
+ )
686
+ )
687
+ # Add exception handling for the background task
688
+ task.add_done_callback(
689
+ lambda t: logger.error(
690
+ f"Error in background post-chat processing for user {user_id}: {t.exception()}",
691
+ exc_info=True,
692
+ )
693
+ if t.exception()
694
+ else None
695
+ )
696
+ except RuntimeError:
697
+ # No event loop, run in a new thread
698
+ thread = threading.Thread(
699
+ target=run_async_in_thread,
700
+ name=f"PostChatProcessing-{user_id}",
701
+ # Set as a daemon thread to avoid blocking program exit
702
+ daemon=True,
703
+ )
704
+ thread.start()
705
+
561
706
  def _filter_memories_by_threshold(
562
- self, memories: list[TextualMemoryItem], threshold: float = 0.20
707
+ self,
708
+ memories: list[TextualMemoryItem],
709
+ threshold: float = 0.30,
710
+ min_num: int = 3,
711
+ memory_type: Literal["OuterMemory"] = "OuterMemory",
563
712
  ) -> list[TextualMemoryItem]:
564
713
  """
565
- Filter memories by threshold.
714
+ Filter memories by threshold and type, at least min_num memories for Non-OuterMemory.
715
+ Args:
716
+ memories: list[TextualMemoryItem],
717
+ threshold: float,
718
+ min_num: int,
719
+ memory_type: Literal["OuterMemory"],
720
+ Returns:
721
+ list[TextualMemoryItem]
566
722
  """
567
- return [memory for memory in memories if memory.metadata.relativity >= threshold]
723
+ sorted_memories = sorted(memories, key=lambda m: m.metadata.relativity, reverse=True)
724
+ filtered_person = [m for m in memories if m.metadata.memory_type != memory_type]
725
+ filtered_outer = [m for m in memories if m.metadata.memory_type == memory_type]
726
+ filtered = []
727
+ per_memory_count = 0
728
+ for m in sorted_memories:
729
+ if m.metadata.relativity >= threshold:
730
+ if m.metadata.memory_type != memory_type:
731
+ per_memory_count += 1
732
+ filtered.append(m)
733
+ if len(filtered) < min_num:
734
+ filtered = filtered_person[:min_num] + filtered_outer[:min_num]
735
+ else:
736
+ if per_memory_count < min_num:
737
+ filtered += filtered_person[per_memory_count:min_num]
738
+ filtered_memory = sorted(filtered, key=lambda m: m.metadata.relativity, reverse=True)
739
+ return filtered_memory
568
740
 
569
741
  def register_mem_cube(
570
742
  self,
@@ -662,7 +834,7 @@ class MOSProduct(MOSCore):
662
834
 
663
835
  # Create a default cube for the user using MOSCore's methods
664
836
  default_cube_name = f"{user_name}_{user_id}_default_cube"
665
- mem_cube_name_or_path = f"{CUBE_PATH}/{default_cube_name}"
837
+ mem_cube_name_or_path = os.path.join(CUBE_PATH, default_cube_name)
666
838
  default_cube_id = self.create_cube_for_user(
667
839
  cube_name=default_cube_name, owner_id=user_id, cube_path=mem_cube_name_or_path
668
840
  )
@@ -696,7 +868,23 @@ class MOSProduct(MOSCore):
696
868
  except Exception as e:
697
869
  return {"status": "error", "message": f"Failed to register user: {e!s}"}
698
870
 
699
- def get_suggestion_query(self, user_id: str, language: str = "zh") -> list[str]:
871
+ def _get_further_suggestion(self, message: MessageList | None = None) -> list[str]:
872
+ """Get further suggestion prompt."""
873
+ try:
874
+ dialogue_info = "\n".join([f"{msg['role']}: {msg['content']}" for msg in message[-2:]])
875
+ further_suggestion_prompt = FURTHER_SUGGESTION_PROMPT.format(dialogue=dialogue_info)
876
+ message_list = [{"role": "system", "content": further_suggestion_prompt}]
877
+ response = self.chat_llm.generate(message_list)
878
+ clean_response = clean_json_response(response)
879
+ response_json = json.loads(clean_response)
880
+ return response_json["query"]
881
+ except Exception as e:
882
+ logger.error(f"Error getting further suggestion: {e}", exc_info=True)
883
+ return []
884
+
885
+ def get_suggestion_query(
886
+ self, user_id: str, language: str = "zh", message: MessageList | None = None
887
+ ) -> list[str]:
700
888
  """Get suggestion query from LLM.
701
889
  Args:
702
890
  user_id (str): User ID.
@@ -705,37 +893,13 @@ class MOSProduct(MOSCore):
705
893
  Returns:
706
894
  list[str]: The suggestion query list.
707
895
  """
708
-
896
+ if message:
897
+ further_suggestion = self._get_further_suggestion(message)
898
+ return further_suggestion
709
899
  if language == "zh":
710
- suggestion_prompt = """
711
- 你是一个有用的助手,可以帮助用户生成建议查询。
712
- 我将获取用户最近的一些记忆,
713
- 你应该生成一些建议查询,这些查询应该是用户想要查询的内容,
714
- 用户最近的记忆是:
715
- {memories}
716
- 请生成3个建议查询用中文,
717
- 输出应该是json格式,键是"query",值是一个建议查询列表。
718
-
719
- 示例:
720
- {{
721
- "query": ["查询1", "查询2", "查询3"]
722
- }}
723
- """
900
+ suggestion_prompt = SUGGESTION_QUERY_PROMPT_ZH
724
901
  else: # English
725
- suggestion_prompt = """
726
- You are a helpful assistant that can help users to generate suggestion query.
727
- I will get some user recently memories,
728
- you should generate some suggestion query, the query should be user what to query,
729
- user recently memories is:
730
- {memories}
731
- if the user recently memories is empty, please generate 3 suggestion query in English,
732
- output should be a json format, the key is "query", the value is a list of suggestion query.
733
-
734
- example:
735
- {{
736
- "query": ["query1", "query2", "query3"]
737
- }}
738
- """
902
+ suggestion_prompt = SUGGESTION_QUERY_PROMPT_EN
739
903
  text_mem_result = super().search("my recently memories", user_id=user_id, top_k=3)[
740
904
  "text_mem"
741
905
  ]
@@ -749,14 +913,75 @@ class MOSProduct(MOSCore):
749
913
  response_json = json.loads(clean_response)
750
914
  return response_json["query"]
751
915
 
752
- def chat_with_references(
916
+ def chat(
753
917
  self,
754
918
  query: str,
755
919
  user_id: str,
756
920
  cube_id: str | None = None,
757
921
  history: MessageList | None = None,
922
+ base_prompt: str | None = None,
923
+ internet_search: bool = False,
924
+ moscube: bool = False,
758
925
  top_k: int = 10,
926
+ threshold: float = 0.5,
927
+ ) -> str:
928
+ """
929
+ Chat with LLM with memory references and complete response.
930
+ """
931
+ self._load_user_cubes(user_id, self.default_cube_config)
932
+ time_start = time.time()
933
+ memories_result = super().search(
934
+ query,
935
+ user_id,
936
+ install_cube_ids=[cube_id] if cube_id else None,
937
+ top_k=top_k,
938
+ mode="fine",
939
+ internet_search=internet_search,
940
+ moscube=moscube,
941
+ )["text_mem"]
942
+
943
+ memories_list = []
944
+ if memories_result:
945
+ memories_list = memories_result[0]["memories"]
946
+ memories_list = self._filter_memories_by_threshold(memories_list, threshold)
947
+ new_memories_list = []
948
+ for m in memories_list:
949
+ m.metadata.embedding = []
950
+ new_memories_list.append(m)
951
+ memories_list = new_memories_list
952
+ system_prompt = super()._build_system_prompt(memories_list, base_prompt)
953
+ history_info = []
954
+ if history:
955
+ history_info = history[-20:]
956
+ current_messages = [
957
+ {"role": "system", "content": system_prompt},
958
+ *history_info,
959
+ {"role": "user", "content": query},
960
+ ]
961
+ response = self.chat_llm.generate(current_messages)
962
+ time_end = time.time()
963
+ self._start_post_chat_processing(
964
+ user_id=user_id,
965
+ cube_id=cube_id,
966
+ query=query,
967
+ full_response=response,
968
+ system_prompt=system_prompt,
969
+ time_start=time_start,
970
+ time_end=time_end,
971
+ speed_improvement=0.0,
972
+ current_messages=current_messages,
973
+ )
974
+ return response, memories_list
975
+
976
+ def chat_with_references(
977
+ self,
978
+ query: str,
979
+ user_id: str,
980
+ cube_id: str | None = None,
981
+ history: MessageList | None = None,
982
+ top_k: int = 20,
759
983
  internet_search: bool = False,
984
+ moscube: bool = False,
760
985
  ) -> Generator[str, None, None]:
761
986
  """
762
987
  Chat with LLM with memory references and streaming output.
@@ -782,7 +1007,9 @@ class MOSProduct(MOSCore):
782
1007
  top_k=top_k,
783
1008
  mode="fine",
784
1009
  internet_search=internet_search,
1010
+ moscube=moscube,
785
1011
  )["text_mem"]
1012
+
786
1013
  yield f"data: {json.dumps({'type': 'status', 'data': '1'})}\n\n"
787
1014
  search_time_end = time.time()
788
1015
  logger.info(
@@ -794,6 +1021,9 @@ class MOSProduct(MOSCore):
794
1021
  if memories_result:
795
1022
  memories_list = memories_result[0]["memories"]
796
1023
  memories_list = self._filter_memories_by_threshold(memories_list)
1024
+
1025
+ reference = prepare_reference_data(memories_list)
1026
+ yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
797
1027
  # Build custom system prompt with relevant memories)
798
1028
  system_prompt = self._build_enhance_system_prompt(user_id, memories_list)
799
1029
  # Get chat history
@@ -802,7 +1032,7 @@ class MOSProduct(MOSCore):
802
1032
 
803
1033
  chat_history = self.chat_history_manager[user_id]
804
1034
  if history:
805
- chat_history.chat_history = history[-10:]
1035
+ chat_history.chat_history = history[-20:]
806
1036
  current_messages = [
807
1037
  {"role": "system", "content": system_prompt},
808
1038
  *chat_history.chat_history,
@@ -835,7 +1065,7 @@ class MOSProduct(MOSCore):
835
1065
  elif self.config.chat_model.backend == "vllm":
836
1066
  response_stream = self.chat_llm.generate_stream(current_messages)
837
1067
  else:
838
- if self.config.chat_model.backend in ["huggingface", "vllm"]:
1068
+ if self.config.chat_model.backend in ["huggingface", "vllm", "openai"]:
839
1069
  response_stream = self.chat_llm.generate_stream(current_messages)
840
1070
  else:
841
1071
  response_stream = self.chat_llm.generate(current_messages)
@@ -852,7 +1082,7 @@ class MOSProduct(MOSCore):
852
1082
  full_response = ""
853
1083
  token_count = 0
854
1084
  # Use tiktoken for proper token-based chunking
855
- if self.config.chat_model.backend not in ["huggingface", "vllm"]:
1085
+ if self.config.chat_model.backend not in ["huggingface", "vllm", "openai"]:
856
1086
  # For non-huggingface backends, we need to collect the full response first
857
1087
  full_response_text = ""
858
1088
  for chunk in response_stream:
@@ -868,7 +1098,7 @@ class MOSProduct(MOSCore):
868
1098
  full_response += chunk
869
1099
 
870
1100
  # Process buffer to ensure complete reference tags
871
- processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
1101
+ processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
872
1102
 
873
1103
  if processed_chunk:
874
1104
  chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
@@ -877,88 +1107,34 @@ class MOSProduct(MOSCore):
877
1107
 
878
1108
  # Process any remaining buffer
879
1109
  if buffer:
880
- processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
1110
+ processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
881
1111
  if processed_chunk:
882
1112
  chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
883
1113
  yield chunk_data
884
1114
 
885
- # Prepare reference data
886
- reference = []
887
- for memories in memories_list:
888
- memories_json = memories.model_dump()
889
- memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
890
- memories_json["metadata"]["embedding"] = []
891
- memories_json["metadata"]["sources"] = []
892
- memories_json["metadata"]["memory"] = memories.memory
893
- memories_json["metadata"]["id"] = memories.id
894
- reference.append({"metadata": memories_json["metadata"]})
895
-
896
- yield f"data: {json.dumps({'type': 'reference', 'data': reference})}\n\n"
897
1115
  # set kvcache improve speed
898
1116
  speed_improvement = round(float((len(system_prompt) / 2) * 0.0048 + 44.5), 1)
899
1117
  total_time = round(float(time_end - time_start), 1)
900
1118
 
901
1119
  yield f"data: {json.dumps({'type': 'time', 'data': {'total_time': total_time, 'speed_improvement': f'{speed_improvement}%'}})}\n\n"
1120
+ # get further suggestion
1121
+ current_messages.append({"role": "assistant", "content": full_response})
1122
+ further_suggestion = self._get_further_suggestion(current_messages)
1123
+ logger.info(f"further_suggestion: {further_suggestion}")
1124
+ yield f"data: {json.dumps({'type': 'suggestion', 'data': further_suggestion})}\n\n"
902
1125
  yield f"data: {json.dumps({'type': 'end'})}\n\n"
903
1126
 
904
- logger.info(f"user_id: {user_id}, cube_id: {cube_id}, current_messages: {current_messages}")
905
- logger.info(f"user_id: {user_id}, cube_id: {cube_id}, full_response: {full_response}")
906
-
907
- clean_response, extracted_references = self._extract_references_from_response(full_response)
908
- logger.info(f"Extracted {len(extracted_references)} references from response")
909
-
910
- # Send chat report if online_bot is available
911
- try:
912
- from memos.memos_tools.notification_utils import send_online_bot_notification
913
-
914
- # Prepare data for online_bot
915
- chat_data = {
916
- "query": query,
917
- "user_id": user_id,
918
- "cube_id": cube_id,
919
- "system_prompt": system_prompt,
920
- "full_response": full_response,
921
- }
922
-
923
- system_data = {
924
- "references": extracted_references,
925
- "time_start": time_start,
926
- "time_end": time_end,
927
- "speed_improvement": speed_improvement,
928
- }
929
-
930
- emoji_config = {"chat": "💬", "system_info": "📊"}
931
-
932
- send_online_bot_notification(
933
- online_bot=self.online_bot,
934
- header_name="MemOS Chat Report",
935
- sub_title_name="chat_with_references",
936
- title_color="#00956D",
937
- other_data1=chat_data,
938
- other_data2=system_data,
939
- emoji=emoji_config,
940
- )
941
- except Exception as e:
942
- logger.warning(f"Failed to send chat notification: {e}")
943
-
944
- self._send_message_to_scheduler(
945
- user_id=user_id, mem_cube_id=cube_id, query=clean_response, label=ANSWER_LABEL
946
- )
947
- self.add(
1127
+ # Asynchronous processing of logs, notifications and memory additions
1128
+ self._start_post_chat_processing(
948
1129
  user_id=user_id,
949
- messages=[
950
- {
951
- "role": "user",
952
- "content": query,
953
- "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
954
- },
955
- {
956
- "role": "assistant",
957
- "content": clean_response, # Store clean text without reference markers
958
- "chat_time": str(datetime.now().strftime("%Y-%m-%d %H:%M:%S")),
959
- },
960
- ],
961
- mem_cube_id=cube_id,
1130
+ cube_id=cube_id,
1131
+ query=query,
1132
+ full_response=full_response,
1133
+ system_prompt=system_prompt,
1134
+ time_start=time_start,
1135
+ time_end=time_end,
1136
+ speed_improvement=speed_improvement,
1137
+ current_messages=current_messages,
962
1138
  )
963
1139
 
964
1140
  def get_all(
@@ -1149,6 +1325,7 @@ class MOSProduct(MOSCore):
1149
1325
  memories["metadata"]["memory"] = memories["memory"]
1150
1326
  memories_list.append(memories)
1151
1327
  reformat_memory_list.append({"cube_id": memory["cube_id"], "memories": memories_list})
1328
+ logger.info(f"search memory list is : {reformat_memory_list}")
1152
1329
  search_result["text_mem"] = reformat_memory_list
1153
1330
  time_end = time.time()
1154
1331
  logger.info(