MemoryOS 0.1.13__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (84) hide show
  1. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/METADATA +78 -49
  2. memoryos-0.2.1.dist-info/RECORD +152 -0
  3. memoryos-0.2.1.dist-info/entry_points.txt +3 -0
  4. memos/__init__.py +1 -1
  5. memos/api/config.py +471 -0
  6. memos/api/exceptions.py +28 -0
  7. memos/api/mcp_serve.py +502 -0
  8. memos/api/product_api.py +35 -0
  9. memos/api/product_models.py +159 -0
  10. memos/api/routers/__init__.py +1 -0
  11. memos/api/routers/product_router.py +358 -0
  12. memos/chunkers/sentence_chunker.py +8 -2
  13. memos/cli.py +113 -0
  14. memos/configs/embedder.py +27 -0
  15. memos/configs/graph_db.py +83 -2
  16. memos/configs/llm.py +48 -0
  17. memos/configs/mem_cube.py +1 -1
  18. memos/configs/mem_reader.py +4 -0
  19. memos/configs/mem_scheduler.py +91 -5
  20. memos/configs/memory.py +10 -4
  21. memos/dependency.py +52 -0
  22. memos/embedders/ark.py +92 -0
  23. memos/embedders/factory.py +4 -0
  24. memos/embedders/sentence_transformer.py +8 -2
  25. memos/embedders/universal_api.py +32 -0
  26. memos/graph_dbs/base.py +2 -2
  27. memos/graph_dbs/factory.py +2 -0
  28. memos/graph_dbs/item.py +46 -0
  29. memos/graph_dbs/neo4j.py +377 -101
  30. memos/graph_dbs/neo4j_community.py +300 -0
  31. memos/llms/base.py +9 -0
  32. memos/llms/deepseek.py +54 -0
  33. memos/llms/factory.py +10 -1
  34. memos/llms/hf.py +170 -13
  35. memos/llms/hf_singleton.py +114 -0
  36. memos/llms/ollama.py +4 -0
  37. memos/llms/openai.py +68 -1
  38. memos/llms/qwen.py +63 -0
  39. memos/llms/vllm.py +153 -0
  40. memos/mem_cube/general.py +77 -16
  41. memos/mem_cube/utils.py +102 -0
  42. memos/mem_os/core.py +131 -41
  43. memos/mem_os/main.py +93 -11
  44. memos/mem_os/product.py +1098 -35
  45. memos/mem_os/utils/default_config.py +352 -0
  46. memos/mem_os/utils/format_utils.py +1154 -0
  47. memos/mem_reader/simple_struct.py +13 -8
  48. memos/mem_scheduler/base_scheduler.py +467 -36
  49. memos/mem_scheduler/general_scheduler.py +125 -244
  50. memos/mem_scheduler/modules/base.py +9 -0
  51. memos/mem_scheduler/modules/dispatcher.py +68 -2
  52. memos/mem_scheduler/modules/misc.py +39 -0
  53. memos/mem_scheduler/modules/monitor.py +228 -49
  54. memos/mem_scheduler/modules/rabbitmq_service.py +317 -0
  55. memos/mem_scheduler/modules/redis_service.py +32 -22
  56. memos/mem_scheduler/modules/retriever.py +250 -23
  57. memos/mem_scheduler/modules/schemas.py +189 -7
  58. memos/mem_scheduler/mos_for_test_scheduler.py +143 -0
  59. memos/mem_scheduler/utils.py +51 -2
  60. memos/mem_user/persistent_user_manager.py +260 -0
  61. memos/memories/activation/item.py +25 -0
  62. memos/memories/activation/kv.py +10 -3
  63. memos/memories/activation/vllmkv.py +219 -0
  64. memos/memories/factory.py +2 -0
  65. memos/memories/textual/general.py +7 -5
  66. memos/memories/textual/item.py +3 -1
  67. memos/memories/textual/tree.py +14 -6
  68. memos/memories/textual/tree_text_memory/organize/conflict.py +198 -0
  69. memos/memories/textual/tree_text_memory/organize/manager.py +72 -23
  70. memos/memories/textual/tree_text_memory/organize/redundancy.py +193 -0
  71. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +233 -0
  72. memos/memories/textual/tree_text_memory/organize/reorganizer.py +606 -0
  73. memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
  74. memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
  75. memos/memories/textual/tree_text_memory/retrieve/searcher.py +6 -5
  76. memos/parsers/markitdown.py +8 -2
  77. memos/templates/mem_reader_prompts.py +105 -36
  78. memos/templates/mem_scheduler_prompts.py +96 -47
  79. memos/templates/tree_reorganize_prompts.py +223 -0
  80. memos/vec_dbs/base.py +12 -0
  81. memos/vec_dbs/qdrant.py +46 -20
  82. memoryos-0.1.13.dist-info/RECORD +0 -122
  83. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/LICENSE +0 -0
  84. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,198 @@
1
+ import json
2
+ import re
3
+
4
+ from datetime import datetime
5
+
6
+ from memos.embedders.base import BaseEmbedder
7
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
8
+ from memos.llms.base import BaseLLM
9
+ from memos.log import get_logger
10
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+ from memos.templates.tree_reorganize_prompts import (
12
+ CONFLICT_DETECTOR_PROMPT,
13
+ CONFLICT_RESOLVER_PROMPT,
14
+ )
15
+
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class ConflictHandler:
21
+ EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider conflict
22
+
23
+ def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
24
+ self.graph_store = graph_store
25
+ self.llm = llm
26
+ self.embedder = embedder
27
+
28
+ def detect(
29
+ self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
30
+ ) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
31
+ """
32
+ Detect conflicts by finding the most similar items in the graph database based on embedding, then use LLM to judge conflict.
33
+ Args:
34
+ memory: The memory item (should have an embedding attribute or field).
35
+ top_k: Number of top similar nodes to retrieve.
36
+ scope: Optional memory type filter.
37
+ Returns:
38
+ List of conflict pairs (each pair is a tuple: (memory, candidate)).
39
+ """
40
+ # 1. Search for similar memories based on embedding
41
+ embedding = memory.metadata.embedding
42
+ embedding_candidates_info = self.graph_store.search_by_embedding(
43
+ embedding, top_k=top_k, scope=scope
44
+ )
45
+ # 2. Filter based on similarity threshold
46
+ embedding_candidates_ids = [
47
+ info["id"]
48
+ for info in embedding_candidates_info
49
+ if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
50
+ ]
51
+ # 3. Judge conflicts using LLM
52
+ embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
53
+ conflict_pairs = []
54
+ for embedding_candidate in embedding_candidates:
55
+ embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
56
+ prompt = [
57
+ {
58
+ "role": "system",
59
+ "content": "You are a conflict detector for memory items.",
60
+ },
61
+ {
62
+ "role": "user",
63
+ "content": CONFLICT_DETECTOR_PROMPT.format(
64
+ statement_1=memory.memory,
65
+ statement_2=embedding_candidate.memory,
66
+ ),
67
+ },
68
+ ]
69
+ result = self.llm.generate(prompt).strip()
70
+ if "yes" in result.lower():
71
+ conflict_pairs.append([memory, embedding_candidate])
72
+ if len(conflict_pairs):
73
+ conflict_text = "\n".join(
74
+ f'"{pair[0].memory!s}" <==CONFLICT==> "{pair[1].memory!s}"'
75
+ for pair in conflict_pairs
76
+ )
77
+ logger.warning(
78
+ f"Detected {len(conflict_pairs)} conflicts for memory {memory.id}\n {conflict_text}"
79
+ )
80
+ return conflict_pairs
81
+
82
+ def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
83
+ """
84
+ Resolve detected conflicts between two memory items using LLM fusion.
85
+ Args:
86
+ memory_a: The first conflicting memory item.
87
+ memory_b: The second conflicting memory item.
88
+ Returns:
89
+ A fused TextualMemoryItem representing the resolved memory.
90
+ """
91
+
92
+ # ———————————— 1. LLM generate fused memory ————————————
93
+ metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
94
+ metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
95
+ metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
96
+ prompt = [
97
+ {
98
+ "role": "system",
99
+ "content": "",
100
+ },
101
+ {
102
+ "role": "user",
103
+ "content": CONFLICT_RESOLVER_PROMPT.format(
104
+ statement_1=memory_a.memory,
105
+ metadata_1=metadata_1,
106
+ statement_2=memory_b.memory,
107
+ metadata_2=metadata_2,
108
+ ),
109
+ },
110
+ ]
111
+ response = self.llm.generate(prompt).strip()
112
+
113
+ # ———————————— 2. Parse the response ————————————
114
+ try:
115
+ answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
116
+ answer = answer.group(1).strip()
117
+ # —————— 2.1 Can't resolve conflict, hard update by comparing timestamp ————
118
+ if len(answer) <= 10 and "no" in answer.lower():
119
+ logger.warning(
120
+ f"Conflict between {memory_a.id} and {memory_b.id} could not be resolved. "
121
+ )
122
+ self._hard_update(memory_a, memory_b)
123
+ # —————— 2.2 Conflict resolved, update metadata and memory ————
124
+ else:
125
+ fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
126
+ merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
127
+ logger.info(f"Resolved result: {merged_memory}")
128
+ self._resolve_in_graph(memory_a, memory_b, merged_memory)
129
+ except json.decoder.JSONDecodeError:
130
+ logger.error(f"Failed to parse LLM response: {response}")
131
+
132
+ def _hard_update(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem):
133
+ """
134
+ Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
135
+ """
136
+ time_a = datetime.fromisoformat(memory_a.metadata.updated_at)
137
+ time_b = datetime.fromisoformat(memory_b.metadata.updated_at)
138
+
139
+ newer_mem = memory_a if time_a >= time_b else memory_b
140
+ older_mem = memory_b if time_a >= time_b else memory_a
141
+
142
+ self.graph_store.delete_node(older_mem.id)
143
+ logger.warning(
144
+ f"Delete older memory {older_mem.id}: <{older_mem.memory}> due to conflict with {newer_mem.id}: <{newer_mem.memory}>"
145
+ )
146
+
147
+ def _resolve_in_graph(
148
+ self,
149
+ conflict_a: TextualMemoryItem,
150
+ conflict_b: TextualMemoryItem,
151
+ merged: TextualMemoryItem,
152
+ ):
153
+ edges_a = self.graph_store.get_edges(conflict_a.id, type="ANY", direction="ANY")
154
+ edges_b = self.graph_store.get_edges(conflict_b.id, type="ANY", direction="ANY")
155
+ all_edges = edges_a + edges_b
156
+
157
+ self.graph_store.add_node(
158
+ merged.id, merged.memory, merged.metadata.model_dump(exclude_none=True)
159
+ )
160
+
161
+ for edge in all_edges:
162
+ new_from = merged.id if edge["from"] in (conflict_a.id, conflict_b.id) else edge["from"]
163
+ new_to = merged.id if edge["to"] in (conflict_a.id, conflict_b.id) else edge["to"]
164
+ if new_from == new_to:
165
+ continue
166
+ # Check if the edge already exists before adding
167
+ if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
168
+ self.graph_store.add_edge(new_from, new_to, edge["type"])
169
+
170
+ self.graph_store.update_node(conflict_a.id, {"status": "archived"})
171
+ self.graph_store.update_node(conflict_b.id, {"status": "archived"})
172
+ self.graph_store.add_edge(conflict_a.id, merged.id, type="MERGED_TO")
173
+ self.graph_store.add_edge(conflict_b.id, merged.id, type="MERGED_TO")
174
+ logger.debug(
175
+ f"Archive {conflict_a.id} and {conflict_b.id}, and inherit their edges to {merged.id}."
176
+ )
177
+
178
+ def _merge_metadata(
179
+ self,
180
+ memory: str,
181
+ metadata_a: TreeNodeTextualMemoryMetadata,
182
+ metadata_b: TreeNodeTextualMemoryMetadata,
183
+ ) -> TreeNodeTextualMemoryMetadata:
184
+ metadata_1 = metadata_a.model_dump()
185
+ metadata_2 = metadata_b.model_dump()
186
+ merged_metadata = {
187
+ "sources": (metadata_1["sources"] or []) + (metadata_2["sources"] or []),
188
+ "embedding": self.embedder.embed([memory])[0],
189
+ "update_at": datetime.now().isoformat(),
190
+ "created_at": datetime.now().isoformat(),
191
+ }
192
+ for key in metadata_1:
193
+ if key in merged_metadata:
194
+ continue
195
+ merged_metadata[key] = (
196
+ metadata_1[key] if metadata_1[key] is not None else metadata_2[key]
197
+ )
198
+ return TreeNodeTextualMemoryMetadata.model_validate(merged_metadata)
@@ -5,8 +5,13 @@ from datetime import datetime
5
5
 
6
6
  from memos.embedders.factory import OllamaEmbedder
7
7
  from memos.graph_dbs.neo4j import Neo4jGraphDB
8
+ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
8
9
  from memos.log import get_logger
9
10
  from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+ from memos.memories.textual.tree_text_memory.organize.reorganizer import (
12
+ GraphStructureReorganizer,
13
+ QueueMessage,
14
+ )
10
15
 
11
16
 
12
17
  logger = get_logger(__name__)
@@ -17,9 +22,11 @@ class MemoryManager:
17
22
  self,
18
23
  graph_store: Neo4jGraphDB,
19
24
  embedder: OllamaEmbedder,
25
+ llm: OpenAILLM | OllamaLLM | AzureLLM,
20
26
  memory_size: dict | None = None,
21
27
  threshold: float | None = 0.80,
22
28
  merged_threshold: float | None = 0.92,
29
+ is_reorganize: bool = False,
23
30
  ):
24
31
  self.graph_store = graph_store
25
32
  self.embedder = embedder
@@ -36,17 +43,24 @@ class MemoryManager:
36
43
  "UserMemory": 10000,
37
44
  }
38
45
  self._threshold = threshold
46
+ self.is_reorganize = is_reorganize
47
+ self.reorganizer = GraphStructureReorganizer(
48
+ graph_store, llm, embedder, is_reorganize=is_reorganize
49
+ )
39
50
  self._merged_threshold = merged_threshold
40
51
 
41
- def add(self, memories: list[TextualMemoryItem]) -> None:
52
+ def add(self, memories: list[TextualMemoryItem]) -> list[str]:
42
53
  """
43
54
  Add new memories in parallel to different memory types (WorkingMemory, LongTermMemory, UserMemory).
44
55
  """
56
+ added_ids: list[str] = []
57
+
45
58
  with ThreadPoolExecutor(max_workers=8) as executor:
46
- futures = [executor.submit(self._process_memory, memory) for memory in memories]
59
+ futures = {executor.submit(self._process_memory, m): m for m in memories}
47
60
  for future in as_completed(futures):
48
61
  try:
49
- future.result()
62
+ ids = future.result()
63
+ added_ids.extend(ids)
50
64
  except Exception as e:
51
65
  logger.exception("Memory processing error: ", exc_info=e)
52
66
 
@@ -61,6 +75,7 @@ class MemoryManager:
61
75
  )
62
76
 
63
77
  self._refresh_memory_size()
78
+ return added_ids
64
79
 
65
80
  def replace_working_memory(self, memories: list[TextualMemoryItem]) -> None:
66
81
  """
@@ -102,17 +117,23 @@ class MemoryManager:
102
117
  Process and add memory to different memory types (WorkingMemory, LongTermMemory, UserMemory).
103
118
  This method runs asynchronously to process each memory item.
104
119
  """
120
+ ids = []
121
+
105
122
  # Add to WorkingMemory
106
- self._add_memory_to_db(memory, "WorkingMemory")
123
+ working_id = self._add_memory_to_db(memory, "WorkingMemory")
124
+ ids.append(working_id)
107
125
 
108
126
  # Add to LongTermMemory and UserMemory
109
127
  if memory.metadata.memory_type in ["LongTermMemory", "UserMemory"]:
110
- self._add_to_graph_memory(
128
+ added_id = self._add_to_graph_memory(
111
129
  memory=memory,
112
130
  memory_type=memory.metadata.memory_type,
113
131
  )
132
+ ids.append(added_id)
133
+
134
+ return ids
114
135
 
115
- def _add_memory_to_db(self, memory: TextualMemoryItem, memory_type: str):
136
+ def _add_memory_to_db(self, memory: TextualMemoryItem, memory_type: str) -> str:
116
137
  """
117
138
  Add a single memory item to the graph store, with FIFO logic for WorkingMemory.
118
139
  """
@@ -124,6 +145,7 @@ class MemoryManager:
124
145
 
125
146
  # Insert node into graph
126
147
  self.graph_store.add_node(working_memory.id, working_memory.memory, metadata)
148
+ return working_memory.id
127
149
 
128
150
  def _add_to_graph_memory(self, memory: TextualMemoryItem, memory_type: str):
129
151
  """
@@ -148,23 +170,22 @@ class MemoryManager:
148
170
  )
149
171
 
150
172
  if similar_nodes and similar_nodes[0]["score"] > self._merged_threshold:
151
- self._merge(memory, similar_nodes)
173
+ return self._merge(memory, similar_nodes)
152
174
  else:
153
175
  node_id = str(uuid.uuid4())
154
176
  # Step 2: Add new node to graph
155
177
  self.graph_store.add_node(
156
178
  node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
157
179
  )
158
-
159
- # Step 3: Optionally link to a summary node based on topic
160
- if memory.metadata.tags:
161
- parent_id = self._ensure_structure_path(
162
- memory_type=memory_type, metadata=memory.metadata
180
+ self.reorganizer.add_message(
181
+ QueueMessage(
182
+ op="add",
183
+ after_node=[node_id],
163
184
  )
164
- if parent_id:
165
- self.graph_store.add_edge(parent_id, node_id, "PARENT")
185
+ )
186
+ return node_id
166
187
 
167
- def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> None:
188
+ def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> str:
168
189
  """
169
190
  TODO: Add node traceability support by optionally preserving source nodes and linking them with MERGED_FROM edges.
170
191
 
@@ -191,7 +212,9 @@ class MemoryManager:
191
212
  merged_background = f"{original_meta.background}\n⟵MERGED⟶\n{source_meta.background}"
192
213
  merged_embedding = self.embedder.embed([merged_text])[0]
193
214
 
194
- merged_confidence = float((original_meta.confidence + source_meta.confidence) / 2)
215
+ original_conf = original_meta.confidence or 0.0
216
+ source_conf = source_meta.confidence or 0.0
217
+ merged_confidence = float((original_conf + source_conf) / 2)
195
218
  merged_usage = list(set((original_meta.usage or []) + (source_meta.usage or [])))
196
219
 
197
220
  # Create new merged node
@@ -223,12 +246,18 @@ class MemoryManager:
223
246
  # After creating merged node and tracing lineage
224
247
  self._inherit_edges(original_id, merged_id)
225
248
 
226
- # Relate other similar nodes to merged if needed
227
- for related_node in similar_nodes[1:]:
228
- if not self.graph_store.edge_exists(
229
- merged_id, related_node["id"], type="ANY", direction="ANY"
230
- ):
231
- self.graph_store.add_edge(merged_id, related_node["id"], type="RELATE")
249
+ # log to reorganizer before updating the graph
250
+ self.reorganizer.add_message(
251
+ QueueMessage(
252
+ op="merge",
253
+ before_node=[
254
+ original_id,
255
+ source_node.id,
256
+ ],
257
+ after_node=[merged_id],
258
+ )
259
+ )
260
+ return merged_id
232
261
 
233
262
  def _inherit_edges(self, from_id: str, to_id: str) -> None:
234
263
  """
@@ -293,13 +322,33 @@ class MemoryManager:
293
322
  background="",
294
323
  ),
295
324
  )
296
-
297
325
  self.graph_store.add_node(
298
326
  id=new_node.id,
299
327
  memory=new_node.memory,
300
328
  metadata=new_node.metadata.model_dump(exclude_none=True),
301
329
  )
330
+ self.reorganizer.add_message(
331
+ QueueMessage(
332
+ op="add",
333
+ after_node=[new_node.id],
334
+ )
335
+ )
336
+
302
337
  node_id = new_node.id
303
338
 
304
339
  # Step 3: Return this structure node ID as the parent_id
305
340
  return node_id
341
+
342
+ def wait_reorganizer(self):
343
+ """
344
+ Wait for the reorganizer to finish processing all messages.
345
+ """
346
+ logger.debug("Waiting for reorganizer to finish processing messages...")
347
+ self.reorganizer.wait_until_current_task_done()
348
+
349
+ def close(self):
350
+ self.wait_reorganizer()
351
+ self.reorganizer.stop()
352
+
353
+ def __del__(self):
354
+ self.close()
@@ -0,0 +1,193 @@
1
+ import json
2
+ import re
3
+
4
+ from datetime import datetime
5
+
6
+ from memos.embedders.base import BaseEmbedder
7
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
8
+ from memos.llms.base import BaseLLM
9
+ from memos.log import get_logger
10
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+ from memos.templates.tree_reorganize_prompts import (
12
+ REDUNDANCY_DETECTOR_PROMPT,
13
+ REDUNDANCY_MERGE_PROMPT,
14
+ REDUNDANCY_RESOLVER_PROMPT,
15
+ )
16
+
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class RedundancyHandler:
22
+ EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider redundancy
23
+
24
+ def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
25
+ self.graph_store = graph_store
26
+ self.llm = llm
27
+ self.embedder = embedder
28
+
29
+ def detect(
30
+ self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
31
+ ) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
32
+ """
33
+ Detect redundancy by finding the most similar items in the graph database based on embedding, then use LLM to judge redundancy.
34
+ Args:
35
+ memory: The memory item (should have an embedding attribute or field).
36
+ top_k: Number of top similar nodes to retrieve.
37
+ scope: Optional memory type filter.
38
+ Returns:
39
+ List of redundancy pairs (each pair is a tuple: (memory, candidate)).
40
+ """
41
+ # 1. Search for similar memories based on embedding
42
+ embedding = memory.metadata.embedding
43
+ embedding_candidates_info = self.graph_store.search_by_embedding(
44
+ embedding, top_k=top_k, scope=scope
45
+ )
46
+ # 2. Filter based on similarity threshold
47
+ embedding_candidates_ids = [
48
+ info["id"]
49
+ for info in embedding_candidates_info
50
+ if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
51
+ ]
52
+ # 3. Judge redundancys using LLM
53
+ embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
54
+ redundant_pairs = []
55
+ for embedding_candidate in embedding_candidates:
56
+ embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
57
+ prompt = [
58
+ {
59
+ "role": "system",
60
+ "content": "You are a redundancy detector for memory items.",
61
+ },
62
+ {
63
+ "role": "user",
64
+ "content": REDUNDANCY_DETECTOR_PROMPT.format(
65
+ statement_1=memory.memory,
66
+ statement_2=embedding_candidate.memory,
67
+ ),
68
+ },
69
+ ]
70
+ result = self.llm.generate(prompt).strip()
71
+ if "yes" in result.lower():
72
+ redundant_pairs.append([memory, embedding_candidate])
73
+ if len(redundant_pairs):
74
+ redundant_text = "\n".join(
75
+ f'"{pair[0].memory!s}" <==REDUNDANCY==> "{pair[1].memory!s}"'
76
+ for pair in redundant_pairs
77
+ )
78
+ logger.warning(
79
+ f"Detected {len(redundant_pairs)} redundancies for memory {memory.id}\n {redundant_text}"
80
+ )
81
+ return redundant_pairs
82
+
83
+ def resolve_two_nodes(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
84
+ """
85
+ Resolve detected redundancies between two memory items using LLM fusion.
86
+ Args:
87
+ memory_a: The first redundant memory item.
88
+ memory_b: The second redundant memory item.
89
+ Returns:
90
+ A fused TextualMemoryItem representing the resolved memory.
91
+ """
92
+ return # waiting for implementation
93
+ # ———————————— 1. LLM generate fused memory ————————————
94
+ metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
95
+ metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
96
+ metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
97
+ prompt = [
98
+ {
99
+ "role": "system",
100
+ "content": "",
101
+ },
102
+ {
103
+ "role": "user",
104
+ "content": REDUNDANCY_RESOLVER_PROMPT.format(
105
+ statement_1=memory_a.memory,
106
+ metadata_1=metadata_1,
107
+ statement_2=memory_b.memory,
108
+ metadata_2=metadata_2,
109
+ ),
110
+ },
111
+ ]
112
+ response = self.llm.generate(prompt).strip()
113
+
114
+ # ———————————— 2. Parse the response ————————————
115
+ try:
116
+ answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
117
+ answer = answer.group(1).strip()
118
+ fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
119
+ merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
120
+ logger.info(f"Resolved result: {merged_memory}")
121
+ self._resolve_in_graph(memory_a, memory_b, merged_memory)
122
+ except json.decoder.JSONDecodeError:
123
+ logger.error(f"Failed to parse LLM response: {response}")
124
+
125
+ def resolve_one_node(self, memory: TextualMemoryItem) -> None:
126
+ prompt = [
127
+ {
128
+ "role": "user",
129
+ "content": REDUNDANCY_MERGE_PROMPT.format(merged_text=memory.memory),
130
+ },
131
+ ]
132
+ response = self.llm.generate(prompt)
133
+ memory.memory = response.strip()
134
+ self.graph_store.update_node(
135
+ memory.id,
136
+ {"memory": memory.memory, **memory.metadata.model_dump(exclude_none=True)},
137
+ )
138
+ logger.debug(f"Merged memory: {memory.memory}")
139
+
140
+ def _resolve_in_graph(
141
+ self,
142
+ redundant_a: TextualMemoryItem,
143
+ redundant_b: TextualMemoryItem,
144
+ merged: TextualMemoryItem,
145
+ ):
146
+ edges_a = self.graph_store.get_edges(redundant_a.id, type="ANY", direction="ANY")
147
+ edges_b = self.graph_store.get_edges(redundant_b.id, type="ANY", direction="ANY")
148
+ all_edges = edges_a + edges_b
149
+
150
+ self.graph_store.add_node(
151
+ merged.id, merged.memory, merged.metadata.model_dump(exclude_none=True)
152
+ )
153
+
154
+ for edge in all_edges:
155
+ new_from = (
156
+ merged.id if edge["from"] in (redundant_a.id, redundant_b.id) else edge["from"]
157
+ )
158
+ new_to = merged.id if edge["to"] in (redundant_a.id, redundant_b.id) else edge["to"]
159
+ if new_from == new_to:
160
+ continue
161
+ # Check if the edge already exists before adding
162
+ if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
163
+ self.graph_store.add_edge(new_from, new_to, edge["type"])
164
+
165
+ self.graph_store.update_node(redundant_a.id, {"status": "archived"})
166
+ self.graph_store.update_node(redundant_b.id, {"status": "archived"})
167
+ self.graph_store.add_edge(redundant_a.id, merged.id, type="MERGED_TO")
168
+ self.graph_store.add_edge(redundant_b.id, merged.id, type="MERGED_TO")
169
+ logger.debug(
170
+ f"Archive {redundant_a.id} and {redundant_b.id}, and inherit their edges to {merged.id}."
171
+ )
172
+
173
+ def _merge_metadata(
174
+ self,
175
+ memory: str,
176
+ metadata_a: TreeNodeTextualMemoryMetadata,
177
+ metadata_b: TreeNodeTextualMemoryMetadata,
178
+ ) -> TreeNodeTextualMemoryMetadata:
179
+ metadata_1 = metadata_a.model_dump()
180
+ metadata_2 = metadata_b.model_dump()
181
+ merged_metadata = {
182
+ "sources": (metadata_1["sources"] or []) + (metadata_2["sources"] or []),
183
+ "embedding": self.embedder.embed([memory])[0],
184
+ "update_at": datetime.now().isoformat(),
185
+ "created_at": datetime.now().isoformat(),
186
+ }
187
+ for key in metadata_1:
188
+ if key in merged_metadata:
189
+ continue
190
+ merged_metadata[key] = (
191
+ metadata_1[key] if metadata_1[key] is not None else metadata_2[key]
192
+ )
193
+ return TreeNodeTextualMemoryMetadata.model_validate(merged_metadata)