MemoryOS 0.1.13__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

@@ -0,0 +1,196 @@
1
+ import json
2
+ import re
3
+
4
+ from datetime import datetime
5
+
6
+ from memos.embedders.base import BaseEmbedder
7
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
8
+ from memos.llms.base import BaseLLM
9
+ from memos.log import get_logger
10
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+ from memos.templates.tree_reorganize_prompts import (
12
+ CONFLICT_DETECTOR_PROMPT,
13
+ CONFLICT_RESOLVER_PROMPT,
14
+ )
15
+
16
+
17
+ logger = get_logger(__name__)
18
+
19
+
20
+ class ConflictHandler:
21
+ EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider conflict
22
+
23
+ def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
24
+ self.graph_store = graph_store
25
+ self.llm = llm
26
+ self.embedder = embedder
27
+
28
+ def detect(
29
+ self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
30
+ ) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
31
+ """
32
+ Detect conflicts by finding the most similar items in the graph database based on embedding, then use LLM to judge conflict.
33
+ Args:
34
+ memory: The memory item (should have an embedding attribute or field).
35
+ top_k: Number of top similar nodes to retrieve.
36
+ scope: Optional memory type filter.
37
+ Returns:
38
+ List of conflict pairs (each pair is a tuple: (memory, candidate)).
39
+ """
40
+ # 1. Search for similar memories based on embedding
41
+ embedding = memory.metadata.embedding
42
+ embedding_candidates_info = self.graph_store.search_by_embedding(
43
+ embedding, top_k=top_k, scope=scope
44
+ )
45
+ # 2. Filter based on similarity threshold
46
+ embedding_candidates_ids = [
47
+ info["id"]
48
+ for info in embedding_candidates_info
49
+ if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
50
+ ]
51
+ # 3. Judge conflicts using LLM
52
+ embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
53
+ conflict_pairs = []
54
+ for embedding_candidate in embedding_candidates:
55
+ embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
56
+ prompt = [
57
+ {
58
+ "role": "system",
59
+ "content": "You are a conflict detector for memory items.",
60
+ },
61
+ {
62
+ "role": "user",
63
+ "content": CONFLICT_DETECTOR_PROMPT.format(
64
+ statement_1=memory.memory,
65
+ statement_2=embedding_candidate.memory,
66
+ ),
67
+ },
68
+ ]
69
+ result = self.llm.generate(prompt).strip()
70
+ if "yes" in result.lower():
71
+ conflict_pairs.append([memory, embedding_candidate])
72
+ if len(conflict_pairs):
73
+ conflict_text = "\n".join(
74
+ f'"{pair[0].memory!s}" <==CONFLICT==> "{pair[1].memory!s}"'
75
+ for pair in conflict_pairs
76
+ )
77
+ logger.warning(
78
+ f"Detected {len(conflict_pairs)} conflicts for memory {memory.id}\n {conflict_text}"
79
+ )
80
+ return conflict_pairs
81
+
82
+ def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
83
+ """
84
+ Resolve detected conflicts between two memory items using LLM fusion.
85
+ Args:
86
+ memory_a: The first conflicting memory item.
87
+ memory_b: The second conflicting memory item.
88
+ Returns:
89
+ A fused TextualMemoryItem representing the resolved memory.
90
+ """
91
+
92
+ # ———————————— 1. LLM generate fused memory ————————————
93
+ metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
94
+ metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
95
+ metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
96
+ prompt = [
97
+ {
98
+ "role": "system",
99
+ "content": "",
100
+ },
101
+ {
102
+ "role": "user",
103
+ "content": CONFLICT_RESOLVER_PROMPT.format(
104
+ statement_1=memory_a.memory,
105
+ metadata_1=metadata_1,
106
+ statement_2=memory_b.memory,
107
+ metadata_2=metadata_2,
108
+ ),
109
+ },
110
+ ]
111
+ response = self.llm.generate(prompt).strip()
112
+
113
+ # ———————————— 2. Parse the response ————————————
114
+ try:
115
+ answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
116
+ answer = answer.group(1).strip()
117
+ # —————— 2.1 Can't resolve conflict, hard update by comparing timestamp ————
118
+ if len(answer) <= 10 and "no" in answer.lower():
119
+ logger.warning(
120
+ f"Conflict between {memory_a.id} and {memory_b.id} could not be resolved. "
121
+ )
122
+ self._hard_update(memory_a, memory_b)
123
+ # —————— 2.2 Conflict resolved, update metadata and memory ————
124
+ else:
125
+ fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
126
+ merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
127
+ logger.info(f"Resolved result: {merged_memory}")
128
+ self._resolve_in_graph(memory_a, memory_b, merged_memory)
129
+ except json.decoder.JSONDecodeError:
130
+ logger.error(f"Failed to parse LLM response: {response}")
131
+
132
+ def _hard_update(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem):
133
+ """
134
+ Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
135
+ """
136
+ time_a = datetime.fromisoformat(memory_a.metadata.updated_at)
137
+ time_b = datetime.fromisoformat(memory_b.metadata.updated_at)
138
+
139
+ newer_mem = memory_a if time_a >= time_b else memory_b
140
+ older_mem = memory_b if time_a >= time_b else memory_a
141
+
142
+ self.graph_store.delete_node(older_mem.id)
143
+ logger.warning(
144
+ f"Delete older memory {older_mem.id}: <{older_mem.memory}> due to conflict with {newer_mem.id}: <{newer_mem.memory}>"
145
+ )
146
+
147
+ def _resolve_in_graph(
148
+ self,
149
+ conflict_a: TextualMemoryItem,
150
+ conflict_b: TextualMemoryItem,
151
+ merged: TextualMemoryItem,
152
+ ):
153
+ edges_a = self.graph_store.get_edges(conflict_a.id, type="ANY", direction="ANY")
154
+ edges_b = self.graph_store.get_edges(conflict_b.id, type="ANY", direction="ANY")
155
+ all_edges = edges_a + edges_b
156
+
157
+ self.graph_store.add_node(
158
+ merged.id, merged.memory, merged.metadata.model_dump(exclude_none=True)
159
+ )
160
+
161
+ for edge in all_edges:
162
+ new_from = merged.id if edge["from"] in (conflict_a.id, conflict_b.id) else edge["from"]
163
+ new_to = merged.id if edge["to"] in (conflict_a.id, conflict_b.id) else edge["to"]
164
+ if new_from == new_to:
165
+ continue
166
+ # Check if the edge already exists before adding
167
+ if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
168
+ self.graph_store.add_edge(new_from, new_to, edge["type"])
169
+
170
+ self.graph_store.delete_node(conflict_a.id)
171
+ self.graph_store.delete_node(conflict_b.id)
172
+ logger.debug(
173
+ f"Remove {conflict_a.id} and {conflict_b.id}, and inherit their edges to {merged.id}."
174
+ )
175
+
176
+ def _merge_metadata(
177
+ self,
178
+ memory: str,
179
+ metadata_a: TreeNodeTextualMemoryMetadata,
180
+ metadata_b: TreeNodeTextualMemoryMetadata,
181
+ ) -> TreeNodeTextualMemoryMetadata:
182
+ metadata_1 = metadata_a.model_dump()
183
+ metadata_2 = metadata_b.model_dump()
184
+ merged_metadata = {
185
+ "sources": (metadata_1["sources"] or []) + (metadata_2["sources"] or []),
186
+ "embedding": self.embedder.embed([memory])[0],
187
+ "update_at": datetime.now().isoformat(),
188
+ "created_at": datetime.now().isoformat(),
189
+ }
190
+ for key in metadata_1:
191
+ if key in merged_metadata:
192
+ continue
193
+ merged_metadata[key] = (
194
+ metadata_1[key] if metadata_1[key] is not None else metadata_2[key]
195
+ )
196
+ return TreeNodeTextualMemoryMetadata.model_validate(merged_metadata)
@@ -5,8 +5,13 @@ from datetime import datetime
5
5
 
6
6
  from memos.embedders.factory import OllamaEmbedder
7
7
  from memos.graph_dbs.neo4j import Neo4jGraphDB
8
+ from memos.llms.factory import OllamaLLM, OpenAILLM
8
9
  from memos.log import get_logger
9
10
  from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+ from memos.memories.textual.tree_text_memory.organize.reorganizer import (
12
+ GraphStructureReorganizer,
13
+ QueueMessage,
14
+ )
10
15
 
11
16
 
12
17
  logger = get_logger(__name__)
@@ -17,9 +22,11 @@ class MemoryManager:
17
22
  self,
18
23
  graph_store: Neo4jGraphDB,
19
24
  embedder: OllamaEmbedder,
25
+ llm: OpenAILLM | OllamaLLM,
20
26
  memory_size: dict | None = None,
21
27
  threshold: float | None = 0.80,
22
28
  merged_threshold: float | None = 0.92,
29
+ is_reorganize: bool = False,
23
30
  ):
24
31
  self.graph_store = graph_store
25
32
  self.embedder = embedder
@@ -36,6 +43,10 @@ class MemoryManager:
36
43
  "UserMemory": 10000,
37
44
  }
38
45
  self._threshold = threshold
46
+ self.is_reorganize = is_reorganize
47
+ self.reorganizer = GraphStructureReorganizer(
48
+ graph_store, llm, embedder, is_reorganize=is_reorganize
49
+ )
39
50
  self._merged_threshold = merged_threshold
40
51
 
41
52
  def add(self, memories: list[TextualMemoryItem]) -> None:
@@ -155,14 +166,12 @@ class MemoryManager:
155
166
  self.graph_store.add_node(
156
167
  node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
157
168
  )
158
-
159
- # Step 3: Optionally link to a summary node based on topic
160
- if memory.metadata.tags:
161
- parent_id = self._ensure_structure_path(
162
- memory_type=memory_type, metadata=memory.metadata
169
+ self.reorganizer.add_message(
170
+ QueueMessage(
171
+ op="add",
172
+ after_node=[node_id],
163
173
  )
164
- if parent_id:
165
- self.graph_store.add_edge(parent_id, node_id, "PARENT")
174
+ )
166
175
 
167
176
  def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> None:
168
177
  """
@@ -230,6 +239,18 @@ class MemoryManager:
230
239
  ):
231
240
  self.graph_store.add_edge(merged_id, related_node["id"], type="RELATE")
232
241
 
242
+ # log to reorganizer before updating the graph
243
+ self.reorganizer.add_message(
244
+ QueueMessage(
245
+ op="merge",
246
+ before_node=[
247
+ original_id,
248
+ source_node.id,
249
+ ],
250
+ after_node=[merged_id],
251
+ )
252
+ )
253
+
233
254
  def _inherit_edges(self, from_id: str, to_id: str) -> None:
234
255
  """
235
256
  Migrate all non-lineage edges from `from_id` to `to_id`,
@@ -293,13 +314,33 @@ class MemoryManager:
293
314
  background="",
294
315
  ),
295
316
  )
296
-
297
317
  self.graph_store.add_node(
298
318
  id=new_node.id,
299
319
  memory=new_node.memory,
300
320
  metadata=new_node.metadata.model_dump(exclude_none=True),
301
321
  )
322
+ self.reorganizer.add_message(
323
+ QueueMessage(
324
+ op="add",
325
+ after_node=[new_node.id],
326
+ )
327
+ )
328
+
302
329
  node_id = new_node.id
303
330
 
304
331
  # Step 3: Return this structure node ID as the parent_id
305
332
  return node_id
333
+
334
+ def wait_reorganizer(self):
335
+ """
336
+ Wait for the reorganizer to finish processing all messages.
337
+ """
338
+ logger.debug("Waiting for reorganizer to finish processing messages...")
339
+ self.reorganizer.wait_until_current_task_done()
340
+
341
+ def close(self):
342
+ self.wait_reorganizer()
343
+ self.reorganizer.stop()
344
+
345
+ def __del__(self):
346
+ self.close()
@@ -0,0 +1,212 @@
1
+ import json
2
+ import re
3
+
4
+ from datetime import datetime
5
+
6
+ from memos.embedders.base import BaseEmbedder
7
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
8
+ from memos.llms.base import BaseLLM
9
+ from memos.log import get_logger
10
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+ from memos.templates.tree_reorganize_prompts import (
12
+ REDUNDANCY_DETECTOR_PROMPT,
13
+ REDUNDANCY_MERGE_PROMPT,
14
+ REDUNDANCY_RESOLVER_PROMPT,
15
+ )
16
+
17
+
18
+ logger = get_logger(__name__)
19
+
20
+
21
+ class RedundancyHandler:
22
+ EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider redundancy
23
+
24
+ def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
25
+ self.graph_store = graph_store
26
+ self.llm = llm
27
+ self.embedder = embedder
28
+
29
+ def detect(
30
+ self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
31
+ ) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
32
+ """
33
+ Detect redundancy by finding the most similar items in the graph database based on embedding, then use LLM to judge conflict.
34
+ Args:
35
+ memory: The memory item (should have an embedding attribute or field).
36
+ top_k: Number of top similar nodes to retrieve.
37
+ scope: Optional memory type filter.
38
+ Returns:
39
+ List of redundancy pairs (each pair is a tuple: (memory, candidate)).
40
+ """
41
+ # 1. Search for similar memories based on embedding
42
+ embedding = memory.metadata.embedding
43
+ embedding_candidates_info = self.graph_store.search_by_embedding(
44
+ embedding, top_k=top_k, scope=scope
45
+ )
46
+ # 2. Filter based on similarity threshold
47
+ embedding_candidates_ids = [
48
+ info["id"]
49
+ for info in embedding_candidates_info
50
+ if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
51
+ ]
52
+ # 3. Judge conflicts using LLM
53
+ embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
54
+ redundant_pairs = []
55
+ for embedding_candidate in embedding_candidates:
56
+ embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
57
+ prompt = [
58
+ {
59
+ "role": "system",
60
+ "content": "You are a conflict detector for memory items.",
61
+ },
62
+ {
63
+ "role": "user",
64
+ "content": REDUNDANCY_DETECTOR_PROMPT.format(
65
+ statement_1=memory.memory,
66
+ statement_2=embedding_candidate.memory,
67
+ ),
68
+ },
69
+ ]
70
+ result = self.llm.generate(prompt).strip()
71
+ if "yes" in result.lower():
72
+ redundant_pairs.append([memory, embedding_candidate])
73
+ if len(redundant_pairs):
74
+ conflict_text = "\n".join(
75
+ f'"{pair[0].memory!s}" <==REDUNDANCY==> "{pair[1].memory!s}"'
76
+ for pair in redundant_pairs
77
+ )
78
+ logger.warning(
79
+ f"Detected {len(redundant_pairs)} redundancies for memory {memory.id}\n {conflict_text}"
80
+ )
81
+ return redundant_pairs
82
+
83
+ def resolve_two_nodes(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
84
+ """
85
+ Resolve detected redundancies between two memory items using LLM fusion.
86
+ Args:
87
+ memory_a: The first conflicting memory item.
88
+ memory_b: The second conflicting memory item.
89
+ Returns:
90
+ A fused TextualMemoryItem representing the resolved memory.
91
+ """
92
+
93
+ # ———————————— 1. LLM generate fused memory ————————————
94
+ metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
95
+ metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
96
+ metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
97
+ prompt = [
98
+ {
99
+ "role": "system",
100
+ "content": "",
101
+ },
102
+ {
103
+ "role": "user",
104
+ "content": REDUNDANCY_RESOLVER_PROMPT.format(
105
+ statement_1=memory_a.memory,
106
+ metadata_1=metadata_1,
107
+ statement_2=memory_b.memory,
108
+ metadata_2=metadata_2,
109
+ ),
110
+ },
111
+ ]
112
+ response = self.llm.generate(prompt).strip()
113
+
114
+ # ———————————— 2. Parse the response ————————————
115
+ try:
116
+ answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
117
+ answer = answer.group(1).strip()
118
+ # —————— 2.1 Can't resolve conflict, hard update by comparing timestamp ————
119
+ if len(answer) <= 10 and "no" in answer.lower():
120
+ logger.warning(
121
+ f"Conflict between {memory_a.id} and {memory_b.id} could not be resolved. "
122
+ )
123
+ self._hard_update(memory_a, memory_b)
124
+ # —————— 2.2 Conflict resolved, update metadata and memory ————
125
+ else:
126
+ fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
127
+ merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
128
+ logger.info(f"Resolved result: {merged_memory}")
129
+ self._resolve_in_graph(memory_a, memory_b, merged_memory)
130
+ except json.decoder.JSONDecodeError:
131
+ logger.error(f"Failed to parse LLM response: {response}")
132
+
133
+ def resolve_one_node(self, memory: TextualMemoryItem) -> None:
134
+ prompt = [
135
+ {
136
+ "role": "user",
137
+ "content": REDUNDANCY_MERGE_PROMPT.format(merged_text=memory.memory),
138
+ },
139
+ ]
140
+ response = self.llm.generate(prompt)
141
+ memory.memory = response.strip()
142
+ self.graph_store.update_node(
143
+ memory.id,
144
+ {"memory": memory.memory, **memory.metadata.model_dump(exclude_none=True)},
145
+ )
146
+ logger.debug(f"Merged memory: {memory.memory}")
147
+
148
+ def _hard_update(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem):
149
+ """
150
+ Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
151
+ """
152
+ time_a = datetime.fromisoformat(memory_a.metadata.updated_at)
153
+ time_b = datetime.fromisoformat(memory_b.metadata.updated_at)
154
+
155
+ newer_mem = memory_a if time_a >= time_b else memory_b
156
+ older_mem = memory_b if time_a >= time_b else memory_a
157
+
158
+ self.graph_store.delete_node(older_mem.id)
159
+ logger.warning(
160
+ f"Delete older memory {older_mem.id}: <{older_mem.memory}> due to conflict with {newer_mem.id}: <{newer_mem.memory}>"
161
+ )
162
+
163
+ def _resolve_in_graph(
164
+ self,
165
+ conflict_a: TextualMemoryItem,
166
+ conflict_b: TextualMemoryItem,
167
+ merged: TextualMemoryItem,
168
+ ):
169
+ edges_a = self.graph_store.get_edges(conflict_a.id, type="ANY", direction="ANY")
170
+ edges_b = self.graph_store.get_edges(conflict_b.id, type="ANY", direction="ANY")
171
+ all_edges = edges_a + edges_b
172
+
173
+ self.graph_store.add_node(
174
+ merged.id, merged.memory, merged.metadata.model_dump(exclude_none=True)
175
+ )
176
+
177
+ for edge in all_edges:
178
+ new_from = merged.id if edge["from"] in (conflict_a.id, conflict_b.id) else edge["from"]
179
+ new_to = merged.id if edge["to"] in (conflict_a.id, conflict_b.id) else edge["to"]
180
+ if new_from == new_to:
181
+ continue
182
+ # Check if the edge already exists before adding
183
+ if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
184
+ self.graph_store.add_edge(new_from, new_to, edge["type"])
185
+
186
+ self.graph_store.delete_node(conflict_a.id)
187
+ self.graph_store.delete_node(conflict_b.id)
188
+ logger.debug(
189
+ f"Remove {conflict_a.id} and {conflict_b.id}, and inherit their edges to {merged.id}."
190
+ )
191
+
192
+ def _merge_metadata(
193
+ self,
194
+ memory: str,
195
+ metadata_a: TreeNodeTextualMemoryMetadata,
196
+ metadata_b: TreeNodeTextualMemoryMetadata,
197
+ ) -> TreeNodeTextualMemoryMetadata:
198
+ metadata_1 = metadata_a.model_dump()
199
+ metadata_2 = metadata_b.model_dump()
200
+ merged_metadata = {
201
+ "sources": (metadata_1["sources"] or []) + (metadata_2["sources"] or []),
202
+ "embedding": self.embedder.embed([memory])[0],
203
+ "update_at": datetime.now().isoformat(),
204
+ "created_at": datetime.now().isoformat(),
205
+ }
206
+ for key in metadata_1:
207
+ if key in merged_metadata:
208
+ continue
209
+ merged_metadata[key] = (
210
+ metadata_1[key] if metadata_1[key] is not None else metadata_2[key]
211
+ )
212
+ return TreeNodeTextualMemoryMetadata.model_validate(merged_metadata)