MemoryOS 0.1.13__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/METADATA +78 -49
- memoryos-0.2.1.dist-info/RECORD +152 -0
- memoryos-0.2.1.dist-info/entry_points.txt +3 -0
- memos/__init__.py +1 -1
- memos/api/config.py +471 -0
- memos/api/exceptions.py +28 -0
- memos/api/mcp_serve.py +502 -0
- memos/api/product_api.py +35 -0
- memos/api/product_models.py +159 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +358 -0
- memos/chunkers/sentence_chunker.py +8 -2
- memos/cli.py +113 -0
- memos/configs/embedder.py +27 -0
- memos/configs/graph_db.py +83 -2
- memos/configs/llm.py +48 -0
- memos/configs/mem_cube.py +1 -1
- memos/configs/mem_reader.py +4 -0
- memos/configs/mem_scheduler.py +91 -5
- memos/configs/memory.py +10 -4
- memos/dependency.py +52 -0
- memos/embedders/ark.py +92 -0
- memos/embedders/factory.py +4 -0
- memos/embedders/sentence_transformer.py +8 -2
- memos/embedders/universal_api.py +32 -0
- memos/graph_dbs/base.py +2 -2
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/item.py +46 -0
- memos/graph_dbs/neo4j.py +377 -101
- memos/graph_dbs/neo4j_community.py +300 -0
- memos/llms/base.py +9 -0
- memos/llms/deepseek.py +54 -0
- memos/llms/factory.py +10 -1
- memos/llms/hf.py +170 -13
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +4 -0
- memos/llms/openai.py +68 -1
- memos/llms/qwen.py +63 -0
- memos/llms/vllm.py +153 -0
- memos/mem_cube/general.py +77 -16
- memos/mem_cube/utils.py +102 -0
- memos/mem_os/core.py +131 -41
- memos/mem_os/main.py +93 -11
- memos/mem_os/product.py +1098 -35
- memos/mem_os/utils/default_config.py +352 -0
- memos/mem_os/utils/format_utils.py +1154 -0
- memos/mem_reader/simple_struct.py +13 -8
- memos/mem_scheduler/base_scheduler.py +467 -36
- memos/mem_scheduler/general_scheduler.py +125 -244
- memos/mem_scheduler/modules/base.py +9 -0
- memos/mem_scheduler/modules/dispatcher.py +68 -2
- memos/mem_scheduler/modules/misc.py +39 -0
- memos/mem_scheduler/modules/monitor.py +228 -49
- memos/mem_scheduler/modules/rabbitmq_service.py +317 -0
- memos/mem_scheduler/modules/redis_service.py +32 -22
- memos/mem_scheduler/modules/retriever.py +250 -23
- memos/mem_scheduler/modules/schemas.py +189 -7
- memos/mem_scheduler/mos_for_test_scheduler.py +143 -0
- memos/mem_scheduler/utils.py +51 -2
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +10 -3
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/factory.py +2 -0
- memos/memories/textual/general.py +7 -5
- memos/memories/textual/item.py +3 -1
- memos/memories/textual/tree.py +14 -6
- memos/memories/textual/tree_text_memory/organize/conflict.py +198 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +72 -23
- memos/memories/textual/tree_text_memory/organize/redundancy.py +193 -0
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +233 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +606 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +6 -5
- memos/parsers/markitdown.py +8 -2
- memos/templates/mem_reader_prompts.py +105 -36
- memos/templates/mem_scheduler_prompts.py +96 -47
- memos/templates/tree_reorganize_prompts.py +223 -0
- memos/vec_dbs/base.py +12 -0
- memos/vec_dbs/qdrant.py +46 -20
- memoryos-0.1.13.dist-info/RECORD +0 -122
- {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/LICENSE +0 -0
- {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,198 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from memos.embedders.base import BaseEmbedder
|
|
7
|
+
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
8
|
+
from memos.llms.base import BaseLLM
|
|
9
|
+
from memos.log import get_logger
|
|
10
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
11
|
+
from memos.templates.tree_reorganize_prompts import (
|
|
12
|
+
CONFLICT_DETECTOR_PROMPT,
|
|
13
|
+
CONFLICT_RESOLVER_PROMPT,
|
|
14
|
+
)
|
|
15
|
+
|
|
16
|
+
|
|
17
|
+
logger = get_logger(__name__)
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
class ConflictHandler:
|
|
21
|
+
EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider conflict
|
|
22
|
+
|
|
23
|
+
def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
|
|
24
|
+
self.graph_store = graph_store
|
|
25
|
+
self.llm = llm
|
|
26
|
+
self.embedder = embedder
|
|
27
|
+
|
|
28
|
+
def detect(
|
|
29
|
+
self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
|
|
30
|
+
) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
|
|
31
|
+
"""
|
|
32
|
+
Detect conflicts by finding the most similar items in the graph database based on embedding, then use LLM to judge conflict.
|
|
33
|
+
Args:
|
|
34
|
+
memory: The memory item (should have an embedding attribute or field).
|
|
35
|
+
top_k: Number of top similar nodes to retrieve.
|
|
36
|
+
scope: Optional memory type filter.
|
|
37
|
+
Returns:
|
|
38
|
+
List of conflict pairs (each pair is a tuple: (memory, candidate)).
|
|
39
|
+
"""
|
|
40
|
+
# 1. Search for similar memories based on embedding
|
|
41
|
+
embedding = memory.metadata.embedding
|
|
42
|
+
embedding_candidates_info = self.graph_store.search_by_embedding(
|
|
43
|
+
embedding, top_k=top_k, scope=scope
|
|
44
|
+
)
|
|
45
|
+
# 2. Filter based on similarity threshold
|
|
46
|
+
embedding_candidates_ids = [
|
|
47
|
+
info["id"]
|
|
48
|
+
for info in embedding_candidates_info
|
|
49
|
+
if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
|
|
50
|
+
]
|
|
51
|
+
# 3. Judge conflicts using LLM
|
|
52
|
+
embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
|
|
53
|
+
conflict_pairs = []
|
|
54
|
+
for embedding_candidate in embedding_candidates:
|
|
55
|
+
embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
|
|
56
|
+
prompt = [
|
|
57
|
+
{
|
|
58
|
+
"role": "system",
|
|
59
|
+
"content": "You are a conflict detector for memory items.",
|
|
60
|
+
},
|
|
61
|
+
{
|
|
62
|
+
"role": "user",
|
|
63
|
+
"content": CONFLICT_DETECTOR_PROMPT.format(
|
|
64
|
+
statement_1=memory.memory,
|
|
65
|
+
statement_2=embedding_candidate.memory,
|
|
66
|
+
),
|
|
67
|
+
},
|
|
68
|
+
]
|
|
69
|
+
result = self.llm.generate(prompt).strip()
|
|
70
|
+
if "yes" in result.lower():
|
|
71
|
+
conflict_pairs.append([memory, embedding_candidate])
|
|
72
|
+
if len(conflict_pairs):
|
|
73
|
+
conflict_text = "\n".join(
|
|
74
|
+
f'"{pair[0].memory!s}" <==CONFLICT==> "{pair[1].memory!s}"'
|
|
75
|
+
for pair in conflict_pairs
|
|
76
|
+
)
|
|
77
|
+
logger.warning(
|
|
78
|
+
f"Detected {len(conflict_pairs)} conflicts for memory {memory.id}\n {conflict_text}"
|
|
79
|
+
)
|
|
80
|
+
return conflict_pairs
|
|
81
|
+
|
|
82
|
+
def resolve(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
|
|
83
|
+
"""
|
|
84
|
+
Resolve detected conflicts between two memory items using LLM fusion.
|
|
85
|
+
Args:
|
|
86
|
+
memory_a: The first conflicting memory item.
|
|
87
|
+
memory_b: The second conflicting memory item.
|
|
88
|
+
Returns:
|
|
89
|
+
A fused TextualMemoryItem representing the resolved memory.
|
|
90
|
+
"""
|
|
91
|
+
|
|
92
|
+
# ———————————— 1. LLM generate fused memory ————————————
|
|
93
|
+
metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
|
|
94
|
+
metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
|
|
95
|
+
metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
|
|
96
|
+
prompt = [
|
|
97
|
+
{
|
|
98
|
+
"role": "system",
|
|
99
|
+
"content": "",
|
|
100
|
+
},
|
|
101
|
+
{
|
|
102
|
+
"role": "user",
|
|
103
|
+
"content": CONFLICT_RESOLVER_PROMPT.format(
|
|
104
|
+
statement_1=memory_a.memory,
|
|
105
|
+
metadata_1=metadata_1,
|
|
106
|
+
statement_2=memory_b.memory,
|
|
107
|
+
metadata_2=metadata_2,
|
|
108
|
+
),
|
|
109
|
+
},
|
|
110
|
+
]
|
|
111
|
+
response = self.llm.generate(prompt).strip()
|
|
112
|
+
|
|
113
|
+
# ———————————— 2. Parse the response ————————————
|
|
114
|
+
try:
|
|
115
|
+
answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
|
|
116
|
+
answer = answer.group(1).strip()
|
|
117
|
+
# —————— 2.1 Can't resolve conflict, hard update by comparing timestamp ————
|
|
118
|
+
if len(answer) <= 10 and "no" in answer.lower():
|
|
119
|
+
logger.warning(
|
|
120
|
+
f"Conflict between {memory_a.id} and {memory_b.id} could not be resolved. "
|
|
121
|
+
)
|
|
122
|
+
self._hard_update(memory_a, memory_b)
|
|
123
|
+
# —————— 2.2 Conflict resolved, update metadata and memory ————
|
|
124
|
+
else:
|
|
125
|
+
fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
|
|
126
|
+
merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
|
|
127
|
+
logger.info(f"Resolved result: {merged_memory}")
|
|
128
|
+
self._resolve_in_graph(memory_a, memory_b, merged_memory)
|
|
129
|
+
except json.decoder.JSONDecodeError:
|
|
130
|
+
logger.error(f"Failed to parse LLM response: {response}")
|
|
131
|
+
|
|
132
|
+
def _hard_update(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem):
|
|
133
|
+
"""
|
|
134
|
+
Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
|
|
135
|
+
"""
|
|
136
|
+
time_a = datetime.fromisoformat(memory_a.metadata.updated_at)
|
|
137
|
+
time_b = datetime.fromisoformat(memory_b.metadata.updated_at)
|
|
138
|
+
|
|
139
|
+
newer_mem = memory_a if time_a >= time_b else memory_b
|
|
140
|
+
older_mem = memory_b if time_a >= time_b else memory_a
|
|
141
|
+
|
|
142
|
+
self.graph_store.delete_node(older_mem.id)
|
|
143
|
+
logger.warning(
|
|
144
|
+
f"Delete older memory {older_mem.id}: <{older_mem.memory}> due to conflict with {newer_mem.id}: <{newer_mem.memory}>"
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
def _resolve_in_graph(
|
|
148
|
+
self,
|
|
149
|
+
conflict_a: TextualMemoryItem,
|
|
150
|
+
conflict_b: TextualMemoryItem,
|
|
151
|
+
merged: TextualMemoryItem,
|
|
152
|
+
):
|
|
153
|
+
edges_a = self.graph_store.get_edges(conflict_a.id, type="ANY", direction="ANY")
|
|
154
|
+
edges_b = self.graph_store.get_edges(conflict_b.id, type="ANY", direction="ANY")
|
|
155
|
+
all_edges = edges_a + edges_b
|
|
156
|
+
|
|
157
|
+
self.graph_store.add_node(
|
|
158
|
+
merged.id, merged.memory, merged.metadata.model_dump(exclude_none=True)
|
|
159
|
+
)
|
|
160
|
+
|
|
161
|
+
for edge in all_edges:
|
|
162
|
+
new_from = merged.id if edge["from"] in (conflict_a.id, conflict_b.id) else edge["from"]
|
|
163
|
+
new_to = merged.id if edge["to"] in (conflict_a.id, conflict_b.id) else edge["to"]
|
|
164
|
+
if new_from == new_to:
|
|
165
|
+
continue
|
|
166
|
+
# Check if the edge already exists before adding
|
|
167
|
+
if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
|
|
168
|
+
self.graph_store.add_edge(new_from, new_to, edge["type"])
|
|
169
|
+
|
|
170
|
+
self.graph_store.update_node(conflict_a.id, {"status": "archived"})
|
|
171
|
+
self.graph_store.update_node(conflict_b.id, {"status": "archived"})
|
|
172
|
+
self.graph_store.add_edge(conflict_a.id, merged.id, type="MERGED_TO")
|
|
173
|
+
self.graph_store.add_edge(conflict_b.id, merged.id, type="MERGED_TO")
|
|
174
|
+
logger.debug(
|
|
175
|
+
f"Archive {conflict_a.id} and {conflict_b.id}, and inherit their edges to {merged.id}."
|
|
176
|
+
)
|
|
177
|
+
|
|
178
|
+
def _merge_metadata(
|
|
179
|
+
self,
|
|
180
|
+
memory: str,
|
|
181
|
+
metadata_a: TreeNodeTextualMemoryMetadata,
|
|
182
|
+
metadata_b: TreeNodeTextualMemoryMetadata,
|
|
183
|
+
) -> TreeNodeTextualMemoryMetadata:
|
|
184
|
+
metadata_1 = metadata_a.model_dump()
|
|
185
|
+
metadata_2 = metadata_b.model_dump()
|
|
186
|
+
merged_metadata = {
|
|
187
|
+
"sources": (metadata_1["sources"] or []) + (metadata_2["sources"] or []),
|
|
188
|
+
"embedding": self.embedder.embed([memory])[0],
|
|
189
|
+
"update_at": datetime.now().isoformat(),
|
|
190
|
+
"created_at": datetime.now().isoformat(),
|
|
191
|
+
}
|
|
192
|
+
for key in metadata_1:
|
|
193
|
+
if key in merged_metadata:
|
|
194
|
+
continue
|
|
195
|
+
merged_metadata[key] = (
|
|
196
|
+
metadata_1[key] if metadata_1[key] is not None else metadata_2[key]
|
|
197
|
+
)
|
|
198
|
+
return TreeNodeTextualMemoryMetadata.model_validate(merged_metadata)
|
|
@@ -5,8 +5,13 @@ from datetime import datetime
|
|
|
5
5
|
|
|
6
6
|
from memos.embedders.factory import OllamaEmbedder
|
|
7
7
|
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
8
|
+
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
8
9
|
from memos.log import get_logger
|
|
9
10
|
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
11
|
+
from memos.memories.textual.tree_text_memory.organize.reorganizer import (
|
|
12
|
+
GraphStructureReorganizer,
|
|
13
|
+
QueueMessage,
|
|
14
|
+
)
|
|
10
15
|
|
|
11
16
|
|
|
12
17
|
logger = get_logger(__name__)
|
|
@@ -17,9 +22,11 @@ class MemoryManager:
|
|
|
17
22
|
self,
|
|
18
23
|
graph_store: Neo4jGraphDB,
|
|
19
24
|
embedder: OllamaEmbedder,
|
|
25
|
+
llm: OpenAILLM | OllamaLLM | AzureLLM,
|
|
20
26
|
memory_size: dict | None = None,
|
|
21
27
|
threshold: float | None = 0.80,
|
|
22
28
|
merged_threshold: float | None = 0.92,
|
|
29
|
+
is_reorganize: bool = False,
|
|
23
30
|
):
|
|
24
31
|
self.graph_store = graph_store
|
|
25
32
|
self.embedder = embedder
|
|
@@ -36,17 +43,24 @@ class MemoryManager:
|
|
|
36
43
|
"UserMemory": 10000,
|
|
37
44
|
}
|
|
38
45
|
self._threshold = threshold
|
|
46
|
+
self.is_reorganize = is_reorganize
|
|
47
|
+
self.reorganizer = GraphStructureReorganizer(
|
|
48
|
+
graph_store, llm, embedder, is_reorganize=is_reorganize
|
|
49
|
+
)
|
|
39
50
|
self._merged_threshold = merged_threshold
|
|
40
51
|
|
|
41
|
-
def add(self, memories: list[TextualMemoryItem]) ->
|
|
52
|
+
def add(self, memories: list[TextualMemoryItem]) -> list[str]:
|
|
42
53
|
"""
|
|
43
54
|
Add new memories in parallel to different memory types (WorkingMemory, LongTermMemory, UserMemory).
|
|
44
55
|
"""
|
|
56
|
+
added_ids: list[str] = []
|
|
57
|
+
|
|
45
58
|
with ThreadPoolExecutor(max_workers=8) as executor:
|
|
46
|
-
futures =
|
|
59
|
+
futures = {executor.submit(self._process_memory, m): m for m in memories}
|
|
47
60
|
for future in as_completed(futures):
|
|
48
61
|
try:
|
|
49
|
-
future.result()
|
|
62
|
+
ids = future.result()
|
|
63
|
+
added_ids.extend(ids)
|
|
50
64
|
except Exception as e:
|
|
51
65
|
logger.exception("Memory processing error: ", exc_info=e)
|
|
52
66
|
|
|
@@ -61,6 +75,7 @@ class MemoryManager:
|
|
|
61
75
|
)
|
|
62
76
|
|
|
63
77
|
self._refresh_memory_size()
|
|
78
|
+
return added_ids
|
|
64
79
|
|
|
65
80
|
def replace_working_memory(self, memories: list[TextualMemoryItem]) -> None:
|
|
66
81
|
"""
|
|
@@ -102,17 +117,23 @@ class MemoryManager:
|
|
|
102
117
|
Process and add memory to different memory types (WorkingMemory, LongTermMemory, UserMemory).
|
|
103
118
|
This method runs asynchronously to process each memory item.
|
|
104
119
|
"""
|
|
120
|
+
ids = []
|
|
121
|
+
|
|
105
122
|
# Add to WorkingMemory
|
|
106
|
-
self._add_memory_to_db(memory, "WorkingMemory")
|
|
123
|
+
working_id = self._add_memory_to_db(memory, "WorkingMemory")
|
|
124
|
+
ids.append(working_id)
|
|
107
125
|
|
|
108
126
|
# Add to LongTermMemory and UserMemory
|
|
109
127
|
if memory.metadata.memory_type in ["LongTermMemory", "UserMemory"]:
|
|
110
|
-
self._add_to_graph_memory(
|
|
128
|
+
added_id = self._add_to_graph_memory(
|
|
111
129
|
memory=memory,
|
|
112
130
|
memory_type=memory.metadata.memory_type,
|
|
113
131
|
)
|
|
132
|
+
ids.append(added_id)
|
|
133
|
+
|
|
134
|
+
return ids
|
|
114
135
|
|
|
115
|
-
def _add_memory_to_db(self, memory: TextualMemoryItem, memory_type: str):
|
|
136
|
+
def _add_memory_to_db(self, memory: TextualMemoryItem, memory_type: str) -> str:
|
|
116
137
|
"""
|
|
117
138
|
Add a single memory item to the graph store, with FIFO logic for WorkingMemory.
|
|
118
139
|
"""
|
|
@@ -124,6 +145,7 @@ class MemoryManager:
|
|
|
124
145
|
|
|
125
146
|
# Insert node into graph
|
|
126
147
|
self.graph_store.add_node(working_memory.id, working_memory.memory, metadata)
|
|
148
|
+
return working_memory.id
|
|
127
149
|
|
|
128
150
|
def _add_to_graph_memory(self, memory: TextualMemoryItem, memory_type: str):
|
|
129
151
|
"""
|
|
@@ -148,23 +170,22 @@ class MemoryManager:
|
|
|
148
170
|
)
|
|
149
171
|
|
|
150
172
|
if similar_nodes and similar_nodes[0]["score"] > self._merged_threshold:
|
|
151
|
-
self._merge(memory, similar_nodes)
|
|
173
|
+
return self._merge(memory, similar_nodes)
|
|
152
174
|
else:
|
|
153
175
|
node_id = str(uuid.uuid4())
|
|
154
176
|
# Step 2: Add new node to graph
|
|
155
177
|
self.graph_store.add_node(
|
|
156
178
|
node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
|
|
157
179
|
)
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
memory_type=memory_type, metadata=memory.metadata
|
|
180
|
+
self.reorganizer.add_message(
|
|
181
|
+
QueueMessage(
|
|
182
|
+
op="add",
|
|
183
|
+
after_node=[node_id],
|
|
163
184
|
)
|
|
164
|
-
|
|
165
|
-
|
|
185
|
+
)
|
|
186
|
+
return node_id
|
|
166
187
|
|
|
167
|
-
def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) ->
|
|
188
|
+
def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> str:
|
|
168
189
|
"""
|
|
169
190
|
TODO: Add node traceability support by optionally preserving source nodes and linking them with MERGED_FROM edges.
|
|
170
191
|
|
|
@@ -191,7 +212,9 @@ class MemoryManager:
|
|
|
191
212
|
merged_background = f"{original_meta.background}\n⟵MERGED⟶\n{source_meta.background}"
|
|
192
213
|
merged_embedding = self.embedder.embed([merged_text])[0]
|
|
193
214
|
|
|
194
|
-
|
|
215
|
+
original_conf = original_meta.confidence or 0.0
|
|
216
|
+
source_conf = source_meta.confidence or 0.0
|
|
217
|
+
merged_confidence = float((original_conf + source_conf) / 2)
|
|
195
218
|
merged_usage = list(set((original_meta.usage or []) + (source_meta.usage or [])))
|
|
196
219
|
|
|
197
220
|
# Create new merged node
|
|
@@ -223,12 +246,18 @@ class MemoryManager:
|
|
|
223
246
|
# After creating merged node and tracing lineage
|
|
224
247
|
self._inherit_edges(original_id, merged_id)
|
|
225
248
|
|
|
226
|
-
#
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
|
|
230
|
-
|
|
231
|
-
|
|
249
|
+
# log to reorganizer before updating the graph
|
|
250
|
+
self.reorganizer.add_message(
|
|
251
|
+
QueueMessage(
|
|
252
|
+
op="merge",
|
|
253
|
+
before_node=[
|
|
254
|
+
original_id,
|
|
255
|
+
source_node.id,
|
|
256
|
+
],
|
|
257
|
+
after_node=[merged_id],
|
|
258
|
+
)
|
|
259
|
+
)
|
|
260
|
+
return merged_id
|
|
232
261
|
|
|
233
262
|
def _inherit_edges(self, from_id: str, to_id: str) -> None:
|
|
234
263
|
"""
|
|
@@ -293,13 +322,33 @@ class MemoryManager:
|
|
|
293
322
|
background="",
|
|
294
323
|
),
|
|
295
324
|
)
|
|
296
|
-
|
|
297
325
|
self.graph_store.add_node(
|
|
298
326
|
id=new_node.id,
|
|
299
327
|
memory=new_node.memory,
|
|
300
328
|
metadata=new_node.metadata.model_dump(exclude_none=True),
|
|
301
329
|
)
|
|
330
|
+
self.reorganizer.add_message(
|
|
331
|
+
QueueMessage(
|
|
332
|
+
op="add",
|
|
333
|
+
after_node=[new_node.id],
|
|
334
|
+
)
|
|
335
|
+
)
|
|
336
|
+
|
|
302
337
|
node_id = new_node.id
|
|
303
338
|
|
|
304
339
|
# Step 3: Return this structure node ID as the parent_id
|
|
305
340
|
return node_id
|
|
341
|
+
|
|
342
|
+
def wait_reorganizer(self):
|
|
343
|
+
"""
|
|
344
|
+
Wait for the reorganizer to finish processing all messages.
|
|
345
|
+
"""
|
|
346
|
+
logger.debug("Waiting for reorganizer to finish processing messages...")
|
|
347
|
+
self.reorganizer.wait_until_current_task_done()
|
|
348
|
+
|
|
349
|
+
def close(self):
|
|
350
|
+
self.wait_reorganizer()
|
|
351
|
+
self.reorganizer.stop()
|
|
352
|
+
|
|
353
|
+
def __del__(self):
|
|
354
|
+
self.close()
|
|
@@ -0,0 +1,193 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from memos.embedders.base import BaseEmbedder
|
|
7
|
+
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
8
|
+
from memos.llms.base import BaseLLM
|
|
9
|
+
from memos.log import get_logger
|
|
10
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
11
|
+
from memos.templates.tree_reorganize_prompts import (
|
|
12
|
+
REDUNDANCY_DETECTOR_PROMPT,
|
|
13
|
+
REDUNDANCY_MERGE_PROMPT,
|
|
14
|
+
REDUNDANCY_RESOLVER_PROMPT,
|
|
15
|
+
)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
logger = get_logger(__name__)
|
|
19
|
+
|
|
20
|
+
|
|
21
|
+
class RedundancyHandler:
|
|
22
|
+
EMBEDDING_THRESHOLD: float = 0.8 # Threshold for embedding similarity to consider redundancy
|
|
23
|
+
|
|
24
|
+
def __init__(self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: BaseEmbedder):
|
|
25
|
+
self.graph_store = graph_store
|
|
26
|
+
self.llm = llm
|
|
27
|
+
self.embedder = embedder
|
|
28
|
+
|
|
29
|
+
def detect(
|
|
30
|
+
self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
|
|
31
|
+
) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
|
|
32
|
+
"""
|
|
33
|
+
Detect redundancy by finding the most similar items in the graph database based on embedding, then use LLM to judge redundancy.
|
|
34
|
+
Args:
|
|
35
|
+
memory: The memory item (should have an embedding attribute or field).
|
|
36
|
+
top_k: Number of top similar nodes to retrieve.
|
|
37
|
+
scope: Optional memory type filter.
|
|
38
|
+
Returns:
|
|
39
|
+
List of redundancy pairs (each pair is a tuple: (memory, candidate)).
|
|
40
|
+
"""
|
|
41
|
+
# 1. Search for similar memories based on embedding
|
|
42
|
+
embedding = memory.metadata.embedding
|
|
43
|
+
embedding_candidates_info = self.graph_store.search_by_embedding(
|
|
44
|
+
embedding, top_k=top_k, scope=scope
|
|
45
|
+
)
|
|
46
|
+
# 2. Filter based on similarity threshold
|
|
47
|
+
embedding_candidates_ids = [
|
|
48
|
+
info["id"]
|
|
49
|
+
for info in embedding_candidates_info
|
|
50
|
+
if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
|
|
51
|
+
]
|
|
52
|
+
# 3. Judge redundancys using LLM
|
|
53
|
+
embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
|
|
54
|
+
redundant_pairs = []
|
|
55
|
+
for embedding_candidate in embedding_candidates:
|
|
56
|
+
embedding_candidate = TextualMemoryItem.from_dict(embedding_candidate)
|
|
57
|
+
prompt = [
|
|
58
|
+
{
|
|
59
|
+
"role": "system",
|
|
60
|
+
"content": "You are a redundancy detector for memory items.",
|
|
61
|
+
},
|
|
62
|
+
{
|
|
63
|
+
"role": "user",
|
|
64
|
+
"content": REDUNDANCY_DETECTOR_PROMPT.format(
|
|
65
|
+
statement_1=memory.memory,
|
|
66
|
+
statement_2=embedding_candidate.memory,
|
|
67
|
+
),
|
|
68
|
+
},
|
|
69
|
+
]
|
|
70
|
+
result = self.llm.generate(prompt).strip()
|
|
71
|
+
if "yes" in result.lower():
|
|
72
|
+
redundant_pairs.append([memory, embedding_candidate])
|
|
73
|
+
if len(redundant_pairs):
|
|
74
|
+
redundant_text = "\n".join(
|
|
75
|
+
f'"{pair[0].memory!s}" <==REDUNDANCY==> "{pair[1].memory!s}"'
|
|
76
|
+
for pair in redundant_pairs
|
|
77
|
+
)
|
|
78
|
+
logger.warning(
|
|
79
|
+
f"Detected {len(redundant_pairs)} redundancies for memory {memory.id}\n {redundant_text}"
|
|
80
|
+
)
|
|
81
|
+
return redundant_pairs
|
|
82
|
+
|
|
83
|
+
def resolve_two_nodes(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem) -> None:
|
|
84
|
+
"""
|
|
85
|
+
Resolve detected redundancies between two memory items using LLM fusion.
|
|
86
|
+
Args:
|
|
87
|
+
memory_a: The first redundant memory item.
|
|
88
|
+
memory_b: The second redundant memory item.
|
|
89
|
+
Returns:
|
|
90
|
+
A fused TextualMemoryItem representing the resolved memory.
|
|
91
|
+
"""
|
|
92
|
+
return # waiting for implementation
|
|
93
|
+
# ———————————— 1. LLM generate fused memory ————————————
|
|
94
|
+
metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
|
|
95
|
+
metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
|
|
96
|
+
metadata_2 = memory_b.metadata.model_dump_json(include=metadata_for_resolve)
|
|
97
|
+
prompt = [
|
|
98
|
+
{
|
|
99
|
+
"role": "system",
|
|
100
|
+
"content": "",
|
|
101
|
+
},
|
|
102
|
+
{
|
|
103
|
+
"role": "user",
|
|
104
|
+
"content": REDUNDANCY_RESOLVER_PROMPT.format(
|
|
105
|
+
statement_1=memory_a.memory,
|
|
106
|
+
metadata_1=metadata_1,
|
|
107
|
+
statement_2=memory_b.memory,
|
|
108
|
+
metadata_2=metadata_2,
|
|
109
|
+
),
|
|
110
|
+
},
|
|
111
|
+
]
|
|
112
|
+
response = self.llm.generate(prompt).strip()
|
|
113
|
+
|
|
114
|
+
# ———————————— 2. Parse the response ————————————
|
|
115
|
+
try:
|
|
116
|
+
answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
|
|
117
|
+
answer = answer.group(1).strip()
|
|
118
|
+
fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
|
|
119
|
+
merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
|
|
120
|
+
logger.info(f"Resolved result: {merged_memory}")
|
|
121
|
+
self._resolve_in_graph(memory_a, memory_b, merged_memory)
|
|
122
|
+
except json.decoder.JSONDecodeError:
|
|
123
|
+
logger.error(f"Failed to parse LLM response: {response}")
|
|
124
|
+
|
|
125
|
+
def resolve_one_node(self, memory: TextualMemoryItem) -> None:
|
|
126
|
+
prompt = [
|
|
127
|
+
{
|
|
128
|
+
"role": "user",
|
|
129
|
+
"content": REDUNDANCY_MERGE_PROMPT.format(merged_text=memory.memory),
|
|
130
|
+
},
|
|
131
|
+
]
|
|
132
|
+
response = self.llm.generate(prompt)
|
|
133
|
+
memory.memory = response.strip()
|
|
134
|
+
self.graph_store.update_node(
|
|
135
|
+
memory.id,
|
|
136
|
+
{"memory": memory.memory, **memory.metadata.model_dump(exclude_none=True)},
|
|
137
|
+
)
|
|
138
|
+
logger.debug(f"Merged memory: {memory.memory}")
|
|
139
|
+
|
|
140
|
+
def _resolve_in_graph(
|
|
141
|
+
self,
|
|
142
|
+
redundant_a: TextualMemoryItem,
|
|
143
|
+
redundant_b: TextualMemoryItem,
|
|
144
|
+
merged: TextualMemoryItem,
|
|
145
|
+
):
|
|
146
|
+
edges_a = self.graph_store.get_edges(redundant_a.id, type="ANY", direction="ANY")
|
|
147
|
+
edges_b = self.graph_store.get_edges(redundant_b.id, type="ANY", direction="ANY")
|
|
148
|
+
all_edges = edges_a + edges_b
|
|
149
|
+
|
|
150
|
+
self.graph_store.add_node(
|
|
151
|
+
merged.id, merged.memory, merged.metadata.model_dump(exclude_none=True)
|
|
152
|
+
)
|
|
153
|
+
|
|
154
|
+
for edge in all_edges:
|
|
155
|
+
new_from = (
|
|
156
|
+
merged.id if edge["from"] in (redundant_a.id, redundant_b.id) else edge["from"]
|
|
157
|
+
)
|
|
158
|
+
new_to = merged.id if edge["to"] in (redundant_a.id, redundant_b.id) else edge["to"]
|
|
159
|
+
if new_from == new_to:
|
|
160
|
+
continue
|
|
161
|
+
# Check if the edge already exists before adding
|
|
162
|
+
if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
|
|
163
|
+
self.graph_store.add_edge(new_from, new_to, edge["type"])
|
|
164
|
+
|
|
165
|
+
self.graph_store.update_node(redundant_a.id, {"status": "archived"})
|
|
166
|
+
self.graph_store.update_node(redundant_b.id, {"status": "archived"})
|
|
167
|
+
self.graph_store.add_edge(redundant_a.id, merged.id, type="MERGED_TO")
|
|
168
|
+
self.graph_store.add_edge(redundant_b.id, merged.id, type="MERGED_TO")
|
|
169
|
+
logger.debug(
|
|
170
|
+
f"Archive {redundant_a.id} and {redundant_b.id}, and inherit their edges to {merged.id}."
|
|
171
|
+
)
|
|
172
|
+
|
|
173
|
+
def _merge_metadata(
|
|
174
|
+
self,
|
|
175
|
+
memory: str,
|
|
176
|
+
metadata_a: TreeNodeTextualMemoryMetadata,
|
|
177
|
+
metadata_b: TreeNodeTextualMemoryMetadata,
|
|
178
|
+
) -> TreeNodeTextualMemoryMetadata:
|
|
179
|
+
metadata_1 = metadata_a.model_dump()
|
|
180
|
+
metadata_2 = metadata_b.model_dump()
|
|
181
|
+
merged_metadata = {
|
|
182
|
+
"sources": (metadata_1["sources"] or []) + (metadata_2["sources"] or []),
|
|
183
|
+
"embedding": self.embedder.embed([memory])[0],
|
|
184
|
+
"update_at": datetime.now().isoformat(),
|
|
185
|
+
"created_at": datetime.now().isoformat(),
|
|
186
|
+
}
|
|
187
|
+
for key in metadata_1:
|
|
188
|
+
if key in merged_metadata:
|
|
189
|
+
continue
|
|
190
|
+
merged_metadata[key] = (
|
|
191
|
+
metadata_1[key] if metadata_1[key] is not None else metadata_2[key]
|
|
192
|
+
)
|
|
193
|
+
return TreeNodeTextualMemoryMetadata.model_validate(merged_metadata)
|