MemoryOS 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (62) hide show
  1. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/METADATA +6 -1
  2. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/RECORD +61 -55
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +6 -8
  5. memos/api/context/context.py +1 -1
  6. memos/api/context/dependencies.py +11 -0
  7. memos/configs/internet_retriever.py +13 -0
  8. memos/configs/mem_scheduler.py +38 -16
  9. memos/graph_dbs/base.py +30 -3
  10. memos/graph_dbs/nebular.py +442 -194
  11. memos/graph_dbs/neo4j.py +14 -5
  12. memos/log.py +5 -0
  13. memos/mem_os/core.py +19 -9
  14. memos/mem_os/main.py +1 -1
  15. memos/mem_os/product.py +6 -69
  16. memos/mem_os/utils/default_config.py +1 -1
  17. memos/mem_os/utils/format_utils.py +11 -47
  18. memos/mem_os/utils/reference_utils.py +133 -0
  19. memos/mem_scheduler/base_scheduler.py +58 -55
  20. memos/mem_scheduler/{modules → general_modules}/base.py +1 -2
  21. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +54 -15
  22. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +4 -4
  23. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  24. memos/mem_scheduler/{modules → general_modules}/retriever.py +19 -5
  25. memos/mem_scheduler/{modules → general_modules}/scheduler_logger.py +10 -4
  26. memos/mem_scheduler/general_scheduler.py +110 -67
  27. memos/mem_scheduler/monitors/__init__.py +0 -0
  28. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  29. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +57 -19
  30. memos/mem_scheduler/mos_for_test_scheduler.py +7 -1
  31. memos/mem_scheduler/schemas/general_schemas.py +3 -2
  32. memos/mem_scheduler/schemas/message_schemas.py +2 -1
  33. memos/mem_scheduler/schemas/monitor_schemas.py +10 -2
  34. memos/mem_scheduler/utils/misc_utils.py +43 -2
  35. memos/memories/activation/item.py +1 -1
  36. memos/memories/activation/kv.py +20 -8
  37. memos/memories/textual/base.py +1 -1
  38. memos/memories/textual/general.py +1 -1
  39. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +30 -48
  40. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  41. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +2 -0
  42. memos/memories/textual/tree_text_memory/organize/reorganizer.py +102 -140
  43. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  44. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +9 -0
  45. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  46. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  47. memos/memories/textual/tree_text_memory/retrieve/searcher.py +177 -125
  48. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +7 -2
  49. memos/memories/textual/tree_text_memory/retrieve/utils.py +1 -1
  50. memos/memos_tools/lockfree_dict.py +120 -0
  51. memos/memos_tools/thread_safe_dict.py +288 -0
  52. memos/templates/mem_reader_prompts.py +2 -0
  53. memos/templates/mem_scheduler_prompts.py +23 -10
  54. memos/templates/mos_prompts.py +40 -11
  55. memos/templates/tree_reorganize_prompts.py +24 -17
  56. memos/utils.py +19 -0
  57. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  58. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  59. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  60. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  61. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
  62. /memos/mem_scheduler/{modules → general_modules}/misc.py +0 -0
@@ -1,11 +1,12 @@
1
1
  from datetime import datetime
2
+ from threading import Lock
2
3
  from typing import Any
3
4
 
4
5
  from memos.configs.mem_scheduler import BaseSchedulerConfig
5
6
  from memos.llms.base import BaseLLM
6
7
  from memos.log import get_logger
7
8
  from memos.mem_cube.general import GeneralMemCube
8
- from memos.mem_scheduler.modules.base import BaseSchedulerModule
9
+ from memos.mem_scheduler.general_modules.base import BaseSchedulerModule
9
10
  from memos.mem_scheduler.schemas.general_schemas import (
10
11
  DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT,
11
12
  DEFAULT_WEIGHT_VECTOR_FOR_RANKING,
@@ -28,7 +29,7 @@ from memos.memories.textual.tree import TreeTextMemory
28
29
  logger = get_logger(__name__)
29
30
 
30
31
 
31
- class SchedulerMonitor(BaseSchedulerModule):
32
+ class SchedulerGeneralMonitor(BaseSchedulerModule):
32
33
  """Monitors and manages scheduling operations with LLM integration."""
33
34
 
34
35
  def __init__(self, process_llm: BaseLLM, config: BaseSchedulerConfig):
@@ -41,14 +42,16 @@ class SchedulerMonitor(BaseSchedulerModule):
41
42
 
42
43
  # Partial Retention Strategy
43
44
  self.partial_retention_number = 2
44
- self.working_mem_monitor_capacity = DEFAULT_WORKING_MEM_MONITOR_SIZE_LIMIT
45
- self.activation_mem_monitor_capacity = DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT
45
+ self.working_mem_monitor_capacity = self.config.get(
46
+ "working_mem_monitor_capacity", DEFAULT_WORKING_MEM_MONITOR_SIZE_LIMIT
47
+ )
48
+ self.activation_mem_monitor_capacity = self.config.get(
49
+ "activation_mem_monitor_capacity", DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT
50
+ )
46
51
 
47
52
  # attributes
48
53
  # recording query_messages
49
- self.query_monitors: QueryMonitorQueue[QueryMonitorItem] = QueryMonitorQueue(
50
- maxsize=self.config.context_window_size
51
- )
54
+ self.query_monitors: dict[UserID, dict[MemCubeID, QueryMonitorQueue[QueryMonitorItem]]] = {}
52
55
 
53
56
  self.working_memory_monitors: dict[UserID, dict[MemCubeID, MemoryMonitorManager]] = {}
54
57
  self.activation_memory_monitors: dict[UserID, dict[MemCubeID, MemoryMonitorManager]] = {}
@@ -57,6 +60,7 @@ class SchedulerMonitor(BaseSchedulerModule):
57
60
  self.last_activation_mem_update_time = datetime.min
58
61
  self.last_query_consume_time = datetime.min
59
62
 
63
+ self._register_lock = Lock()
60
64
  self._process_llm = process_llm
61
65
 
62
66
  def extract_query_keywords(self, query: str) -> list:
@@ -78,15 +82,34 @@ class SchedulerMonitor(BaseSchedulerModule):
78
82
  keywords = [query]
79
83
  return keywords
80
84
 
85
+ def register_query_monitor_if_not_exists(
86
+ self,
87
+ user_id: UserID | str,
88
+ mem_cube_id: MemCubeID | str,
89
+ ) -> None:
90
+ # First check (lock-free, fast path)
91
+ if user_id in self.query_monitors and mem_cube_id in self.query_monitors[user_id]:
92
+ return
93
+
94
+ # Second check (with lock, ensures uniqueness)
95
+ with self._register_lock:
96
+ if user_id not in self.query_monitors:
97
+ self.query_monitors[user_id] = {}
98
+ if mem_cube_id not in self.query_monitors[user_id]:
99
+ self.query_monitors[user_id][mem_cube_id] = QueryMonitorQueue(
100
+ maxsize=self.config.context_window_size
101
+ )
102
+
81
103
  def register_memory_manager_if_not_exists(
82
104
  self,
83
- user_id: str,
84
- mem_cube_id: str,
105
+ user_id: UserID | str,
106
+ mem_cube_id: MemCubeID | str,
85
107
  memory_monitors: dict[UserID, dict[MemCubeID, MemoryMonitorManager]],
86
108
  max_capacity: int,
87
109
  ) -> None:
88
110
  """
89
111
  Register a new MemoryMonitorManager for the given user and memory cube if it doesn't exist.
112
+ Thread-safe implementation using double-checked locking pattern.
90
113
 
91
114
  Checks if a MemoryMonitorManager already exists for the specified user_id and mem_cube_id.
92
115
  If not, creates a new MemoryMonitorManager with appropriate capacity settings and registers it.
@@ -94,14 +117,34 @@ class SchedulerMonitor(BaseSchedulerModule):
94
117
  Args:
95
118
  user_id: The ID of the user to associate with the memory manager
96
119
  mem_cube_id: The ID of the memory cube to monitor
120
+ memory_monitors: Dictionary storing existing memory monitor managers
121
+ max_capacity: Maximum capacity for the new memory monitor manager
122
+ lock: Threading lock to ensure safe concurrent access
97
123
 
98
124
  Note:
99
125
  This function will update the loose_max_working_memory_capacity based on the current
100
126
  WorkingMemory size plus partial retention number before creating a new manager.
101
127
  """
102
- # Check if a MemoryMonitorManager already exists for the current user_id and mem_cube_id
103
- # If doesn't exist, create and register a new one
104
- if (user_id not in memory_monitors) or (mem_cube_id not in memory_monitors[user_id]):
128
+ # First check (lock-free, fast path)
129
+ # Quickly verify existence without lock overhead
130
+ if user_id in memory_monitors and mem_cube_id in memory_monitors[user_id]:
131
+ logger.info(
132
+ f"MemoryMonitorManager already exists for user_id={user_id}, "
133
+ f"mem_cube_id={mem_cube_id} in the provided memory_monitors dictionary"
134
+ )
135
+ return
136
+
137
+ # Second check (with lock, ensures uniqueness)
138
+ # Acquire lock before modification and verify again to prevent race conditions
139
+ with self._register_lock:
140
+ # Re-check after acquiring lock, as another thread might have created it
141
+ if user_id in memory_monitors and mem_cube_id in memory_monitors[user_id]:
142
+ logger.info(
143
+ f"MemoryMonitorManager already exists for user_id={user_id}, "
144
+ f"mem_cube_id={mem_cube_id} in the provided memory_monitors dictionary"
145
+ )
146
+ return
147
+
105
148
  # Initialize MemoryMonitorManager with user ID, memory cube ID, and max capacity
106
149
  monitor_manager = MemoryMonitorManager(
107
150
  user_id=user_id, mem_cube_id=mem_cube_id, max_capacity=max_capacity
@@ -113,11 +156,6 @@ class SchedulerMonitor(BaseSchedulerModule):
113
156
  f"Registered new MemoryMonitorManager for user_id={user_id},"
114
157
  f" mem_cube_id={mem_cube_id} with max_capacity={max_capacity}"
115
158
  )
116
- else:
117
- logger.info(
118
- f"MemoryMonitorManager already exists for user_id={user_id}, "
119
- f"mem_cube_id={mem_cube_id} in the provided memory_monitors dictionary"
120
- )
121
159
 
122
160
  def update_working_memory_monitors(
123
161
  self,
@@ -174,11 +212,11 @@ class SchedulerMonitor(BaseSchedulerModule):
174
212
  )
175
213
 
176
214
  def timed_trigger(self, last_time: datetime, interval_seconds: float) -> bool:
177
- now = datetime.now()
215
+ now = datetime.utcnow()
178
216
  elapsed = (now - last_time).total_seconds()
179
217
  if elapsed >= interval_seconds:
180
218
  return True
181
- logger.debug(f"Time trigger not ready, {elapsed:.1f}s elapsed (needs {interval_seconds}s)")
219
+ logger.info(f"Time trigger not ready, {elapsed:.1f}s elapsed (needs {interval_seconds}s)")
182
220
  return False
183
221
 
184
222
  def get_monitor_memories(
@@ -81,7 +81,13 @@ class MOSForTestScheduler(MOS):
81
81
 
82
82
  # from mem_cube
83
83
  memories = mem_cube.text_mem.search(
84
- query, top_k=self.config.top_k - topk_for_scheduler
84
+ query,
85
+ top_k=self.config.top_k - topk_for_scheduler,
86
+ info={
87
+ "user_id": target_user_id,
88
+ "session_id": self.session_id,
89
+ "chat_history": chat_history.chat_history,
90
+ },
85
91
  )
86
92
  text_memories = [m.memory for m in memories]
87
93
  print(f"Search results with new working memories: {text_memories}")
@@ -10,11 +10,12 @@ ANSWER_LABEL = "answer"
10
10
  ADD_LABEL = "add"
11
11
 
12
12
  TreeTextMemory_SEARCH_METHOD = "tree_text_memory_search"
13
+ TreeTextMemory_FINE_SEARCH_METHOD = "tree_text_memory_fine_search"
13
14
  TextMemory_SEARCH_METHOD = "text_memory_search"
14
15
  DIRECT_EXCHANGE_TYPE = "direct"
15
16
  FANOUT_EXCHANGE_TYPE = "fanout"
16
- DEFAULT_WORKING_MEM_MONITOR_SIZE_LIMIT = 20
17
- DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT = 5
17
+ DEFAULT_WORKING_MEM_MONITOR_SIZE_LIMIT = 30
18
+ DEFAULT_ACTIVATION_MEM_MONITOR_SIZE_LIMIT = 20
18
19
  DEFAULT_ACT_MEM_DUMP_PATH = f"{BASE_DIR}/outputs/mem_scheduler/mem_cube_scheduler_test.kv_cache"
19
20
  DEFAULT_THREAD__POOL_MAX_WORKERS = 5
20
21
  DEFAULT_CONSUME_INTERVAL_SECONDS = 3
@@ -7,7 +7,7 @@ from typing_extensions import TypedDict
7
7
 
8
8
  from memos.log import get_logger
9
9
  from memos.mem_cube.general import GeneralMemCube
10
- from memos.mem_scheduler.modules.misc import DictConversionMixin
10
+ from memos.mem_scheduler.general_modules.misc import DictConversionMixin
11
11
 
12
12
  from .general_schemas import NOT_INITIALIZED
13
13
 
@@ -138,6 +138,7 @@ class ScheduleLogForWebItem(BaseModel, DictConversionMixin):
138
138
  def debug_info(self) -> dict[str, Any]:
139
139
  """Return structured debug information for logging purposes."""
140
140
  return {
141
+ "content_preview:": self.log_content[:50],
141
142
  "log_id": self.item_id,
142
143
  "user_id": self.user_id,
143
144
  "mem_cube_id": self.mem_cube_id,
@@ -1,3 +1,5 @@
1
+ import threading
2
+
1
3
  from collections import Counter
2
4
  from datetime import datetime
3
5
  from pathlib import Path
@@ -7,7 +9,7 @@ from uuid import uuid4
7
9
  from pydantic import BaseModel, Field, computed_field, field_validator
8
10
 
9
11
  from memos.log import get_logger
10
- from memos.mem_scheduler.modules.misc import AutoDroppingQueue, DictConversionMixin
12
+ from memos.mem_scheduler.general_modules.misc import AutoDroppingQueue, DictConversionMixin
11
13
  from memos.mem_scheduler.schemas.general_schemas import (
12
14
  DEFAULT_MAX_QUERY_KEY_WORDS,
13
15
  DEFAULT_WEIGHT_VECTOR_FOR_RANKING,
@@ -76,7 +78,7 @@ class QueryMonitorQueue(AutoDroppingQueue[QueryMonitorItem]):
76
78
  Each item is expected to be a dictionary containing:
77
79
  """
78
80
 
79
- def put(self, item: QueryMonitorItem, block: bool = True, timeout: float | None = None) -> None:
81
+ def put(self, item: QueryMonitorItem, block: bool = True, timeout: float | None = 5.0) -> None:
80
82
  """
81
83
  Add a query item to the queue. Ensures the item is of correct type.
82
84
 
@@ -85,6 +87,9 @@ class QueryMonitorQueue(AutoDroppingQueue[QueryMonitorItem]):
85
87
  """
86
88
  if not isinstance(item, QueryMonitorItem):
87
89
  raise ValueError("Item must be an instance of QueryMonitorItem")
90
+ logger.debug(
91
+ f"Thread {threading.get_ident()} acquired mutex. Timeout is set to {timeout} seconds"
92
+ )
88
93
  super().put(item, block, timeout)
89
94
 
90
95
  def get_queries_by_timestamp(
@@ -94,6 +99,7 @@ class QueryMonitorQueue(AutoDroppingQueue[QueryMonitorItem]):
94
99
  Retrieve queries added between the specified time range.
95
100
  """
96
101
  with self.mutex:
102
+ logger.debug(f"Thread {threading.get_ident()} acquired mutex.")
97
103
  return [item for item in self.queue if start_time <= item.timestamp <= end_time]
98
104
 
99
105
  def get_keywords_collections(self) -> Counter:
@@ -104,6 +110,7 @@ class QueryMonitorQueue(AutoDroppingQueue[QueryMonitorItem]):
104
110
  Counter object with keyword counts
105
111
  """
106
112
  with self.mutex:
113
+ logger.debug(f"Thread {threading.get_ident()} acquired mutex.")
107
114
  all_keywords = [kw for item in self.queue for kw in item.keywords]
108
115
  return Counter(all_keywords)
109
116
 
@@ -119,6 +126,7 @@ class QueryMonitorQueue(AutoDroppingQueue[QueryMonitorItem]):
119
126
  List of query items sorted by timestamp
120
127
  """
121
128
  with self.mutex:
129
+ logger.debug(f"Thread {threading.get_ident()} acquired mutex.")
122
130
  return [
123
131
  monitor.query_text
124
132
  for monitor in sorted(self.queue, key=lambda x: x.timestamp, reverse=reverse)
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import re
2
3
 
3
4
  from functools import wraps
4
5
  from pathlib import Path
@@ -12,12 +13,52 @@ logger = get_logger(__name__)
12
13
 
13
14
 
14
15
  def extract_json_dict(text: str):
16
+ """
17
+ Safely extracts JSON from LLM response text with robust error handling.
18
+
19
+ Args:
20
+ text: Raw text response from LLM that may contain JSON
21
+
22
+ Returns:
23
+ Parsed JSON data (dict or list)
24
+
25
+ Raises:
26
+ ValueError: If no valid JSON can be extracted
27
+ """
28
+ if not text:
29
+ raise ValueError("Empty input text")
30
+
31
+ # Normalize the text
15
32
  text = text.strip()
33
+
34
+ # Remove common code block markers
16
35
  patterns_to_remove = ["json```", "```python", "```json", "latex```", "```latex", "```"]
17
36
  for pattern in patterns_to_remove:
18
37
  text = text.replace(pattern, "")
19
- res = json.loads(text.strip())
20
- return res
38
+
39
+ # Try: direct JSON parse first
40
+ try:
41
+ return json.loads(text.strip())
42
+ except json.JSONDecodeError as e:
43
+ logger.error(f"Failed to parse JSON from text: {text}. Error: {e!s}", exc_info=True)
44
+
45
+ # Fallback 1: Extract JSON using regex
46
+ json_pattern = r"\{[\s\S]*\}|\[[\s\S]*\]"
47
+ matches = re.findall(json_pattern, text)
48
+ if matches:
49
+ try:
50
+ return json.loads(matches[0])
51
+ except json.JSONDecodeError as e:
52
+ logger.error(f"Failed to parse JSON from text: {text}. Error: {e!s}", exc_info=True)
53
+
54
+ # Fallback 2: Handle malformed JSON (common LLM issues)
55
+ try:
56
+ # Try adding missing quotes around keys
57
+ text = re.sub(r"([\{\s,])(\w+)(:)", r'\1"\2"\3', text)
58
+ return json.loads(text)
59
+ except json.JSONDecodeError as e:
60
+ logger.error(f"Failed to parse JSON from text: {text}. Error: {e!s}", exc_info=True)
61
+ raise ValueError(text) from e
21
62
 
22
63
 
23
64
  def parse_yaml(yaml_file: str | Path):
@@ -23,7 +23,7 @@ class KVCacheRecords(BaseModel):
23
23
  description="Single string combining all text_memories using assembly template",
24
24
  )
25
25
  timestamp: datetime = Field(
26
- default_factory=datetime.now, description="submit time for schedule_messages"
26
+ default_factory=datetime.utcnow, description="submit time for schedule_messages"
27
27
  )
28
28
 
29
29
 
@@ -1,7 +1,8 @@
1
1
  import os
2
2
  import pickle
3
-
4
3
  from datetime import datetime
4
+ from importlib.metadata import version
5
+ from packaging.version import Version
5
6
 
6
7
  from transformers import DynamicCache
7
8
 
@@ -211,13 +212,24 @@ class KVCacheMemory(BaseActMemory):
211
212
  merged = DynamicCache()
212
213
  num_layers = len(caches[0].key_cache)
213
214
 
214
- for layer in range(num_layers):
215
- # gather all K and V for this layer
216
- keys = [c.key_cache[layer] for c in caches]
217
- vals = [c.value_cache[layer] for c in caches]
218
- # single concat per layer
219
- merged.key_cache.append(torch.cat(keys, dim=-2))
220
- merged.value_cache.append(torch.cat(vals, dim=-2))
215
+ if Version(version("transformers")) >= Version("4.54.0"):
216
+ merged.append_new_layers(num_layers - 1)
217
+ for layer in range(num_layers):
218
+ # gather all K and V for this layer
219
+ keys = [c.layers[layer].keys for c in caches]
220
+ vals = [c.layers[layer].values for c in caches]
221
+ # single concat per layer
222
+ merged.layers[layer].keys = torch.cat(keys, dim=-2)
223
+ merged.layers[layer].values = torch.cat(vals, dim=-2)
224
+
225
+ else:
226
+ for layer in range(num_layers):
227
+ # gather all K and V for this layer
228
+ keys = [c.key_cache[layer] for c in caches]
229
+ vals = [c.value_cache[layer] for c in caches]
230
+ # single concat per layer
231
+ merged.key_cache.append(torch.cat(keys, dim=-2))
232
+ merged.value_cache.append(torch.cat(vals, dim=-2))
221
233
 
222
234
  return merged
223
235
 
@@ -36,7 +36,7 @@ class BaseTextMemory(BaseMemory):
36
36
  """Update a memory by memory_id."""
37
37
 
38
38
  @abstractmethod
39
- def search(self, query: str, top_k: int, info=None) -> list[TextualMemoryItem]:
39
+ def search(self, query: str, top_k: int, info=None, **kwargs) -> list[TextualMemoryItem]:
40
40
  """Search for memories based on a query.
41
41
  Args:
42
42
  query (str): The query to search for.
@@ -114,7 +114,7 @@ class GeneralTextMemory(BaseTextMemory):
114
114
 
115
115
  self.vector_db.update(memory_id, vec_db_item)
116
116
 
117
- def search(self, query: str, top_k: int) -> list[TextualMemoryItem]:
117
+ def search(self, query: str, top_k: int, info=None, **kwargs) -> list[TextualMemoryItem]:
118
118
  """Search for memories based on a query.
119
119
  Args:
120
120
  query (str): The query to search for.
@@ -1,6 +1,5 @@
1
1
  import json
2
2
  import re
3
-
4
3
  from datetime import datetime
5
4
 
6
5
  from dateutil import parser
@@ -11,15 +10,14 @@ from memos.llms.base import BaseLLM
11
10
  from memos.log import get_logger
12
11
  from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
13
12
  from memos.templates.tree_reorganize_prompts import (
14
- CONFLICT_DETECTOR_PROMPT,
15
- CONFLICT_RESOLVER_PROMPT,
13
+ MEMORY_RELATION_DETECTOR_PROMPT,
14
+ MEMORY_RELATION_RESOLVER_PROMPT,
16
15
  )
17
16
 
18
-
19
17
  logger = get_logger(__name__)
20
18
 
21
19
 
22
- class ConflictHandler:
20
+ class NodeHandler:
23
21
  EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider conflict
24
22
 
25
23
  def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
@@ -27,66 +25,53 @@ class ConflictHandler:
27
25
  self.llm = llm
28
26
  self.embedder = embedder
29
27
 
30
- def detect(
31
- self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
32
- ) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
33
- """
34
- Detect conflicts by finding the most similar items in the graph database based on embedding, then use LLM to judge conflict.
35
- Args:
36
- memory: The memory item (should have an embedding attribute or field).
37
- top_k: Number of top similar nodes to retrieve.
38
- scope: Optional memory type filter.
39
- Returns:
40
- List of conflict pairs (each pair is a tuple: (memory, candidate)).
41
- """
28
+ def detect(self, memory, top_k: int = 5, scope=None):
42
29
  # 1. Search for similar memories based on embedding
43
30
  embedding = memory.metadata.embedding
44
31
  embedding_candidates_info = self.graph_store.search_by_embedding(
45
- embedding, top_k=top_k, scope=scope
32
+ embedding, top_k=top_k, scope=scope, threshold=self.EMBEDDING_THRESHOLD
46
33
  )
47
34
  # 2. Filter based on similarity threshold
48
35
  embedding_candidates_ids = [
49
- info["id"]
50
- for info in embedding_candidates_info
51
- if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
36
+ info["id"] for info in embedding_candidates_info if info["id"] != memory.id
52
37
  ]
53
38
  # 3. Judge conflicts using LLM
54
39
  embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
55
- conflict_pairs = []
40
+ detected_relationships = []
56
41
  for embedding_candidate in embedding_candidates:
57
42
  embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
58
43
  prompt = [
59
- {
60
- "role": "system",
61
- "content": "You are a conflict detector for memory items.",
62
- },
63
44
  {
64
45
  "role": "user",
65
- "content": CONFLICT_DETECTOR_PROMPT.format(
66
- statement_1=memory.memory,
67
- statement_2=embedding_candidate.memory,
46
+ "content": MEMORY_RELATION_DETECTOR_PROMPT.format(
47
+ statement_1=memory.memory, statement_2=embedding_candidate.memory
68
48
  ),
69
- },
49
+ }
70
50
  ]
71
51
  result = self.llm.generate(prompt).strip()
72
- if "yes" in result.lower():
73
- conflict_pairs.append([memory, embedding_candidate])
74
- if len(conflict_pairs):
75
- conflict_text = "\n".join(
76
- f'"{pair[0].memory!s}" <==CONFLICT==> "{pair[1].memory!s}"'
77
- for pair in conflict_pairs
78
- )
79
- logger.warning(
80
- f"Detected {len(conflict_pairs)} conflicts for memory {memory.id}\n {conflict_text}"
81
- )
82
- return conflict_pairs
52
+ if result == "contradictory":
53
+ logger.warning(
54
+ f'detected "{memory.memory}" <==CONFLICT==> "{embedding_candidate.memory}"'
55
+ )
56
+ detected_relationships.append([memory, embedding_candidate, "contradictory"])
57
+ elif result == "redundant":
58
+ logger.warning(
59
+ f'detected "{memory.memory}" <==REDUNDANT==> "{embedding_candidate.memory}"'
60
+ )
61
+ detected_relationships.append([memory, embedding_candidate, "redundant"])
62
+ elif result == "independent":
63
+ pass
64
+ else:
65
+ pass
66
+ return detected_relationships
83
67
 
84
- def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
68
+ def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem, relation) -> None:
85
69
  """
86
70
  Resolve detected conflicts between two memory items using LLM fusion.
87
71
  Args:
88
72
  memory_a: The first conflicting memory item.
89
73
  memory_b: The second conflicting memory item.
74
+ relation: relation
90
75
  Returns:
91
76
  A fused TextualMemoryItem representing the resolved memory.
92
77
  """
@@ -96,13 +81,10 @@ class ConflictHandler:
96
81
  metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
97
82
  metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
98
83
  prompt = [
99
- {
100
- "role": "system",
101
- "content": "",
102
- },
103
84
  {
104
85
  "role": "user",
105
- "content": CONFLICT_RESOLVER_PROMPT.format(
86
+ "content": MEMORY_RELATION_RESOLVER_PROMPT.format(
87
+ relation=relation,
106
88
  statement_1=memory_a.memory,
107
89
  metadata_1=metadata_1,
108
90
  statement_2=memory_b.memory,
@@ -119,7 +101,7 @@ class ConflictHandler:
119
101
  # —————— 2.1 Can't resolve conflict, hard update by comparing timestamp ————
120
102
  if len(answer) <= 10 and "no" in answer.lower():
121
103
  logger.warning(
122
- f"Conflict between {memory_a.id} and {memory_b.id} could not be resolved. "
104
+ f"{relation} between {memory_a.id} and {memory_b.id} could not be resolved. "
123
105
  )
124
106
  self._hard_update(memory_a, memory_b)
125
107
  # —————— 2.2 Conflict resolved, update metadata and memory ————
@@ -39,8 +39,8 @@ class MemoryManager:
39
39
  if not memory_size:
40
40
  self.memory_size = {
41
41
  "WorkingMemory": 20,
42
- "LongTermMemory": 10000,
43
- "UserMemory": 10000,
42
+ "LongTermMemory": 1500,
43
+ "UserMemory": 480,
44
44
  }
45
45
  self._threshold = threshold
46
46
  self.is_reorganize = is_reorganize
@@ -158,106 +158,18 @@ class MemoryManager:
158
158
  - topic_summary_prefix: summary node id prefix if applicable
159
159
  - enable_summary_link: whether to auto-link to a summary node
160
160
  """
161
- embedding = memory.metadata.embedding
162
-
163
- # Step 1: Find similar nodes for possible merging
164
- similar_nodes = self.graph_store.search_by_embedding(
165
- vector=embedding,
166
- top_k=3,
167
- scope=memory_type,
168
- threshold=self._threshold,
169
- status="activated",
170
- )
171
-
172
- if similar_nodes and similar_nodes[0]["score"] > self._merged_threshold:
173
- return self._merge(memory, similar_nodes)
174
- else:
175
- node_id = str(uuid.uuid4())
176
- # Step 2: Add new node to graph
177
- self.graph_store.add_node(
178
- node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
179
- )
180
- self.reorganizer.add_message(
181
- QueueMessage(
182
- op="add",
183
- after_node=[node_id],
184
- )
185
- )
186
- return node_id
187
-
188
- def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> str:
189
- """
190
- TODO: Add node traceability support by optionally preserving source nodes and linking them with MERGED_FROM edges.
191
-
192
- Merge the source memory into the most similar existing node (only one),
193
- and establish a MERGED_FROM edge in the graph.
194
-
195
- Parameters:
196
- source_node: The new memory item (not yet in the graph)
197
- similar_nodes: A list of dicts returned by search_by_embedding(), ordered by similarity
198
- """
199
- original_node = similar_nodes[0]
200
- original_id = original_node["id"]
201
- original_data = self.graph_store.get_node(original_id)
202
-
203
- target_text = original_data.get("memory", "")
204
- merged_text = f"{target_text}\n⟵MERGED⟶\n{source_node.memory}"
205
-
206
- original_meta = TreeNodeTextualMemoryMetadata(**original_data["metadata"])
207
- source_meta = source_node.metadata
208
-
209
- merged_key = source_meta.key or original_meta.key
210
- merged_tags = list(set((original_meta.tags or []) + (source_meta.tags or [])))
211
- merged_sources = list(set((original_meta.sources or []) + (source_meta.sources or [])))
212
- merged_background = f"{original_meta.background}\n⟵MERGED⟶\n{source_meta.background}"
213
- merged_embedding = self.embedder.embed([merged_text])[0]
214
-
215
- original_conf = original_meta.confidence or 0.0
216
- source_conf = source_meta.confidence or 0.0
217
- merged_confidence = float((original_conf + source_conf) / 2)
218
- merged_usage = list(set((original_meta.usage or []) + (source_meta.usage or [])))
219
-
220
- # Create new merged node
221
- merged_id = str(uuid.uuid4())
222
- merged_metadata = source_meta.model_copy(
223
- update={
224
- "embedding": merged_embedding,
225
- "updated_at": datetime.now().isoformat(),
226
- "key": merged_key,
227
- "tags": merged_tags,
228
- "sources": merged_sources,
229
- "background": merged_background,
230
- "confidence": merged_confidence,
231
- "usage": merged_usage,
232
- }
233
- )
234
-
161
+ node_id = str(uuid.uuid4())
162
+ # Step 2: Add new node to graph
235
163
  self.graph_store.add_node(
236
- merged_id, merged_text, merged_metadata.model_dump(exclude_none=True)
164
+ node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
237
165
  )
238
-
239
- # Add traceability edges: both original and new point to merged node
240
- self.graph_store.add_edge(original_id, merged_id, type="MERGED_TO")
241
- self.graph_store.update_node(original_id, {"status": "archived"})
242
- source_id = str(uuid.uuid4())
243
- source_metadata = source_node.metadata.model_copy(update={"status": "archived"})
244
- self.graph_store.add_node(source_id, source_node.memory, source_metadata.model_dump())
245
- self.graph_store.add_edge(source_id, merged_id, type="MERGED_TO")
246
- # After creating merged node and tracing lineage
247
- self._inherit_edges(original_id, merged_id)
248
-
249
- # log to reorganizer before updating the graph
250
166
  self.reorganizer.add_message(
251
167
  QueueMessage(
252
- op="merge",
253
- before_node=[
254
- original_id,
255
- source_node.id,
256
- ],
257
- after_node=[merged_id],
168
+ op="add",
169
+ after_node=[node_id],
258
170
  )
259
171
  )
260
- return merged_id
172
+ return node_id
261
173
 
262
174
  def _inherit_edges(self, from_id: str, to_id: str) -> None:
263
175
  """