MemoryOS 0.2.0__py3-none-any.whl → 0.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/METADATA +66 -26
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/RECORD +80 -56
- memoryos-0.2.1.dist-info/entry_points.txt +3 -0
- memos/__init__.py +1 -1
- memos/api/config.py +471 -0
- memos/api/exceptions.py +28 -0
- memos/api/mcp_serve.py +502 -0
- memos/api/product_api.py +35 -0
- memos/api/product_models.py +159 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +358 -0
- memos/chunkers/sentence_chunker.py +8 -2
- memos/cli.py +113 -0
- memos/configs/embedder.py +27 -0
- memos/configs/graph_db.py +83 -2
- memos/configs/llm.py +47 -0
- memos/configs/mem_cube.py +1 -1
- memos/configs/mem_scheduler.py +91 -5
- memos/configs/memory.py +5 -4
- memos/dependency.py +52 -0
- memos/embedders/ark.py +92 -0
- memos/embedders/factory.py +4 -0
- memos/embedders/sentence_transformer.py +8 -2
- memos/embedders/universal_api.py +32 -0
- memos/graph_dbs/base.py +2 -2
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/neo4j.py +331 -122
- memos/graph_dbs/neo4j_community.py +300 -0
- memos/llms/base.py +9 -0
- memos/llms/deepseek.py +54 -0
- memos/llms/factory.py +10 -1
- memos/llms/hf.py +170 -13
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +4 -0
- memos/llms/openai.py +67 -1
- memos/llms/qwen.py +63 -0
- memos/llms/vllm.py +153 -0
- memos/mem_cube/general.py +77 -16
- memos/mem_cube/utils.py +102 -0
- memos/mem_os/core.py +131 -41
- memos/mem_os/main.py +93 -11
- memos/mem_os/product.py +1098 -35
- memos/mem_os/utils/default_config.py +352 -0
- memos/mem_os/utils/format_utils.py +1154 -0
- memos/mem_reader/simple_struct.py +5 -5
- memos/mem_scheduler/base_scheduler.py +467 -36
- memos/mem_scheduler/general_scheduler.py +125 -244
- memos/mem_scheduler/modules/base.py +9 -0
- memos/mem_scheduler/modules/dispatcher.py +68 -2
- memos/mem_scheduler/modules/misc.py +39 -0
- memos/mem_scheduler/modules/monitor.py +228 -49
- memos/mem_scheduler/modules/rabbitmq_service.py +317 -0
- memos/mem_scheduler/modules/redis_service.py +32 -22
- memos/mem_scheduler/modules/retriever.py +250 -23
- memos/mem_scheduler/modules/schemas.py +189 -7
- memos/mem_scheduler/mos_for_test_scheduler.py +143 -0
- memos/mem_scheduler/utils.py +51 -2
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +10 -3
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/factory.py +2 -0
- memos/memories/textual/general.py +7 -5
- memos/memories/textual/tree.py +9 -5
- memos/memories/textual/tree_text_memory/organize/conflict.py +5 -3
- memos/memories/textual/tree_text_memory/organize/manager.py +26 -18
- memos/memories/textual/tree_text_memory/organize/redundancy.py +25 -44
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +11 -13
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +73 -51
- memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +6 -5
- memos/parsers/markitdown.py +8 -2
- memos/templates/mem_reader_prompts.py +65 -23
- memos/templates/mem_scheduler_prompts.py +96 -47
- memos/templates/tree_reorganize_prompts.py +85 -30
- memos/vec_dbs/base.py +12 -0
- memos/vec_dbs/qdrant.py +46 -20
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/LICENSE +0 -0
- {memoryos-0.2.0.dist-info → memoryos-0.2.1.dist-info}/WHEEL +0 -0
|
@@ -5,7 +5,7 @@ from datetime import datetime
|
|
|
5
5
|
|
|
6
6
|
from memos.embedders.factory import OllamaEmbedder
|
|
7
7
|
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
8
|
-
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
8
|
+
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
9
9
|
from memos.log import get_logger
|
|
10
10
|
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
11
11
|
from memos.memories.textual.tree_text_memory.organize.reorganizer import (
|
|
@@ -22,7 +22,7 @@ class MemoryManager:
|
|
|
22
22
|
self,
|
|
23
23
|
graph_store: Neo4jGraphDB,
|
|
24
24
|
embedder: OllamaEmbedder,
|
|
25
|
-
llm: OpenAILLM | OllamaLLM,
|
|
25
|
+
llm: OpenAILLM | OllamaLLM | AzureLLM,
|
|
26
26
|
memory_size: dict | None = None,
|
|
27
27
|
threshold: float | None = 0.80,
|
|
28
28
|
merged_threshold: float | None = 0.92,
|
|
@@ -49,15 +49,18 @@ class MemoryManager:
|
|
|
49
49
|
)
|
|
50
50
|
self._merged_threshold = merged_threshold
|
|
51
51
|
|
|
52
|
-
def add(self, memories: list[TextualMemoryItem]) ->
|
|
52
|
+
def add(self, memories: list[TextualMemoryItem]) -> list[str]:
|
|
53
53
|
"""
|
|
54
54
|
Add new memories in parallel to different memory types (WorkingMemory, LongTermMemory, UserMemory).
|
|
55
55
|
"""
|
|
56
|
+
added_ids: list[str] = []
|
|
57
|
+
|
|
56
58
|
with ThreadPoolExecutor(max_workers=8) as executor:
|
|
57
|
-
futures =
|
|
59
|
+
futures = {executor.submit(self._process_memory, m): m for m in memories}
|
|
58
60
|
for future in as_completed(futures):
|
|
59
61
|
try:
|
|
60
|
-
future.result()
|
|
62
|
+
ids = future.result()
|
|
63
|
+
added_ids.extend(ids)
|
|
61
64
|
except Exception as e:
|
|
62
65
|
logger.exception("Memory processing error: ", exc_info=e)
|
|
63
66
|
|
|
@@ -72,6 +75,7 @@ class MemoryManager:
|
|
|
72
75
|
)
|
|
73
76
|
|
|
74
77
|
self._refresh_memory_size()
|
|
78
|
+
return added_ids
|
|
75
79
|
|
|
76
80
|
def replace_working_memory(self, memories: list[TextualMemoryItem]) -> None:
|
|
77
81
|
"""
|
|
@@ -113,17 +117,23 @@ class MemoryManager:
|
|
|
113
117
|
Process and add memory to different memory types (WorkingMemory, LongTermMemory, UserMemory).
|
|
114
118
|
This method runs asynchronously to process each memory item.
|
|
115
119
|
"""
|
|
120
|
+
ids = []
|
|
121
|
+
|
|
116
122
|
# Add to WorkingMemory
|
|
117
|
-
self._add_memory_to_db(memory, "WorkingMemory")
|
|
123
|
+
working_id = self._add_memory_to_db(memory, "WorkingMemory")
|
|
124
|
+
ids.append(working_id)
|
|
118
125
|
|
|
119
126
|
# Add to LongTermMemory and UserMemory
|
|
120
127
|
if memory.metadata.memory_type in ["LongTermMemory", "UserMemory"]:
|
|
121
|
-
self._add_to_graph_memory(
|
|
128
|
+
added_id = self._add_to_graph_memory(
|
|
122
129
|
memory=memory,
|
|
123
130
|
memory_type=memory.metadata.memory_type,
|
|
124
131
|
)
|
|
132
|
+
ids.append(added_id)
|
|
125
133
|
|
|
126
|
-
|
|
134
|
+
return ids
|
|
135
|
+
|
|
136
|
+
def _add_memory_to_db(self, memory: TextualMemoryItem, memory_type: str) -> str:
|
|
127
137
|
"""
|
|
128
138
|
Add a single memory item to the graph store, with FIFO logic for WorkingMemory.
|
|
129
139
|
"""
|
|
@@ -135,6 +145,7 @@ class MemoryManager:
|
|
|
135
145
|
|
|
136
146
|
# Insert node into graph
|
|
137
147
|
self.graph_store.add_node(working_memory.id, working_memory.memory, metadata)
|
|
148
|
+
return working_memory.id
|
|
138
149
|
|
|
139
150
|
def _add_to_graph_memory(self, memory: TextualMemoryItem, memory_type: str):
|
|
140
151
|
"""
|
|
@@ -159,7 +170,7 @@ class MemoryManager:
|
|
|
159
170
|
)
|
|
160
171
|
|
|
161
172
|
if similar_nodes and similar_nodes[0]["score"] > self._merged_threshold:
|
|
162
|
-
self._merge(memory, similar_nodes)
|
|
173
|
+
return self._merge(memory, similar_nodes)
|
|
163
174
|
else:
|
|
164
175
|
node_id = str(uuid.uuid4())
|
|
165
176
|
# Step 2: Add new node to graph
|
|
@@ -172,8 +183,9 @@ class MemoryManager:
|
|
|
172
183
|
after_node=[node_id],
|
|
173
184
|
)
|
|
174
185
|
)
|
|
186
|
+
return node_id
|
|
175
187
|
|
|
176
|
-
def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) ->
|
|
188
|
+
def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> str:
|
|
177
189
|
"""
|
|
178
190
|
TODO: Add node traceability support by optionally preserving source nodes and linking them with MERGED_FROM edges.
|
|
179
191
|
|
|
@@ -200,7 +212,9 @@ class MemoryManager:
|
|
|
200
212
|
merged_background = f"{original_meta.background}\n⟵MERGED⟶\n{source_meta.background}"
|
|
201
213
|
merged_embedding = self.embedder.embed([merged_text])[0]
|
|
202
214
|
|
|
203
|
-
|
|
215
|
+
original_conf = original_meta.confidence or 0.0
|
|
216
|
+
source_conf = source_meta.confidence or 0.0
|
|
217
|
+
merged_confidence = float((original_conf + source_conf) / 2)
|
|
204
218
|
merged_usage = list(set((original_meta.usage or []) + (source_meta.usage or [])))
|
|
205
219
|
|
|
206
220
|
# Create new merged node
|
|
@@ -232,13 +246,6 @@ class MemoryManager:
|
|
|
232
246
|
# After creating merged node and tracing lineage
|
|
233
247
|
self._inherit_edges(original_id, merged_id)
|
|
234
248
|
|
|
235
|
-
# Relate other similar nodes to merged if needed
|
|
236
|
-
for related_node in similar_nodes[1:]:
|
|
237
|
-
if not self.graph_store.edge_exists(
|
|
238
|
-
merged_id, related_node["id"], type="ANY", direction="ANY"
|
|
239
|
-
):
|
|
240
|
-
self.graph_store.add_edge(merged_id, related_node["id"], type="RELATE")
|
|
241
|
-
|
|
242
249
|
# log to reorganizer before updating the graph
|
|
243
250
|
self.reorganizer.add_message(
|
|
244
251
|
QueueMessage(
|
|
@@ -250,6 +257,7 @@ class MemoryManager:
|
|
|
250
257
|
after_node=[merged_id],
|
|
251
258
|
)
|
|
252
259
|
)
|
|
260
|
+
return merged_id
|
|
253
261
|
|
|
254
262
|
def _inherit_edges(self, from_id: str, to_id: str) -> None:
|
|
255
263
|
"""
|
|
@@ -30,7 +30,7 @@ class RedundancyHandler:
|
|
|
30
30
|
self, memory: TextualMemoryItem, top_k: int = 5, scope: str | None = None
|
|
31
31
|
) -> list[tuple[TextualMemoryItem, TextualMemoryItem]]:
|
|
32
32
|
"""
|
|
33
|
-
Detect redundancy by finding the most similar items in the graph database based on embedding, then use LLM to judge
|
|
33
|
+
Detect redundancy by finding the most similar items in the graph database based on embedding, then use LLM to judge redundancy.
|
|
34
34
|
Args:
|
|
35
35
|
memory: The memory item (should have an embedding attribute or field).
|
|
36
36
|
top_k: Number of top similar nodes to retrieve.
|
|
@@ -49,7 +49,7 @@ class RedundancyHandler:
|
|
|
49
49
|
for info in embedding_candidates_info
|
|
50
50
|
if info["score"] >= self.EMBEDDING_THRESHOLD and info["id"] != memory.id
|
|
51
51
|
]
|
|
52
|
-
# 3. Judge
|
|
52
|
+
# 3. Judge redundancys using LLM
|
|
53
53
|
embedding_candidates = self.graph_store.get_nodes(embedding_candidates_ids)
|
|
54
54
|
redundant_pairs = []
|
|
55
55
|
for embedding_candidate in embedding_candidates:
|
|
@@ -57,7 +57,7 @@ class RedundancyHandler:
|
|
|
57
57
|
prompt = [
|
|
58
58
|
{
|
|
59
59
|
"role": "system",
|
|
60
|
-
"content": "You are a
|
|
60
|
+
"content": "You are a redundancy detector for memory items.",
|
|
61
61
|
},
|
|
62
62
|
{
|
|
63
63
|
"role": "user",
|
|
@@ -71,12 +71,12 @@ class RedundancyHandler:
|
|
|
71
71
|
if "yes" in result.lower():
|
|
72
72
|
redundant_pairs.append([memory, embedding_candidate])
|
|
73
73
|
if len(redundant_pairs):
|
|
74
|
-
|
|
74
|
+
redundant_text = "\n".join(
|
|
75
75
|
f'"{pair[0].memory!s}" <==REDUNDANCY==> "{pair[1].memory!s}"'
|
|
76
76
|
for pair in redundant_pairs
|
|
77
77
|
)
|
|
78
78
|
logger.warning(
|
|
79
|
-
f"Detected {len(redundant_pairs)} redundancies for memory {memory.id}\n {
|
|
79
|
+
f"Detected {len(redundant_pairs)} redundancies for memory {memory.id}\n {redundant_text}"
|
|
80
80
|
)
|
|
81
81
|
return redundant_pairs
|
|
82
82
|
|
|
@@ -84,12 +84,12 @@ class RedundancyHandler:
|
|
|
84
84
|
"""
|
|
85
85
|
Resolve detected redundancies between two memory items using LLM fusion.
|
|
86
86
|
Args:
|
|
87
|
-
memory_a: The first
|
|
88
|
-
memory_b: The second
|
|
87
|
+
memory_a: The first redundant memory item.
|
|
88
|
+
memory_b: The second redundant memory item.
|
|
89
89
|
Returns:
|
|
90
90
|
A fused TextualMemoryItem representing the resolved memory.
|
|
91
91
|
"""
|
|
92
|
-
|
|
92
|
+
return # waiting for implementation
|
|
93
93
|
# ———————————— 1. LLM generate fused memory ————————————
|
|
94
94
|
metadata_for_resolve = ["key", "background", "confidence", "updated_at"]
|
|
95
95
|
metadata_1 = memory_a.metadata.model_dump_json(include=metadata_for_resolve)
|
|
@@ -115,18 +115,10 @@ class RedundancyHandler:
|
|
|
115
115
|
try:
|
|
116
116
|
answer = re.search(r"<answer>(.*?)</answer>", response, re.DOTALL)
|
|
117
117
|
answer = answer.group(1).strip()
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
)
|
|
123
|
-
self._hard_update(memory_a, memory_b)
|
|
124
|
-
# —————— 2.2 Conflict resolved, update metadata and memory ————
|
|
125
|
-
else:
|
|
126
|
-
fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
|
|
127
|
-
merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
|
|
128
|
-
logger.info(f"Resolved result: {merged_memory}")
|
|
129
|
-
self._resolve_in_graph(memory_a, memory_b, merged_memory)
|
|
118
|
+
fixed_metadata = self._merge_metadata(answer, memory_a.metadata, memory_b.metadata)
|
|
119
|
+
merged_memory = TextualMemoryItem(memory=answer, metadata=fixed_metadata)
|
|
120
|
+
logger.info(f"Resolved result: {merged_memory}")
|
|
121
|
+
self._resolve_in_graph(memory_a, memory_b, merged_memory)
|
|
130
122
|
except json.decoder.JSONDecodeError:
|
|
131
123
|
logger.error(f"Failed to parse LLM response: {response}")
|
|
132
124
|
|
|
@@ -145,29 +137,14 @@ class RedundancyHandler:
|
|
|
145
137
|
)
|
|
146
138
|
logger.debug(f"Merged memory: {memory.memory}")
|
|
147
139
|
|
|
148
|
-
def _hard_update(self, memory_a: TextualMemoryItem, memory_b: TextualMemoryItem):
|
|
149
|
-
"""
|
|
150
|
-
Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
|
|
151
|
-
"""
|
|
152
|
-
time_a = datetime.fromisoformat(memory_a.metadata.updated_at)
|
|
153
|
-
time_b = datetime.fromisoformat(memory_b.metadata.updated_at)
|
|
154
|
-
|
|
155
|
-
newer_mem = memory_a if time_a >= time_b else memory_b
|
|
156
|
-
older_mem = memory_b if time_a >= time_b else memory_a
|
|
157
|
-
|
|
158
|
-
self.graph_store.delete_node(older_mem.id)
|
|
159
|
-
logger.warning(
|
|
160
|
-
f"Delete older memory {older_mem.id}: <{older_mem.memory}> due to conflict with {newer_mem.id}: <{newer_mem.memory}>"
|
|
161
|
-
)
|
|
162
|
-
|
|
163
140
|
def _resolve_in_graph(
|
|
164
141
|
self,
|
|
165
|
-
|
|
166
|
-
|
|
142
|
+
redundant_a: TextualMemoryItem,
|
|
143
|
+
redundant_b: TextualMemoryItem,
|
|
167
144
|
merged: TextualMemoryItem,
|
|
168
145
|
):
|
|
169
|
-
edges_a = self.graph_store.get_edges(
|
|
170
|
-
edges_b = self.graph_store.get_edges(
|
|
146
|
+
edges_a = self.graph_store.get_edges(redundant_a.id, type="ANY", direction="ANY")
|
|
147
|
+
edges_b = self.graph_store.get_edges(redundant_b.id, type="ANY", direction="ANY")
|
|
171
148
|
all_edges = edges_a + edges_b
|
|
172
149
|
|
|
173
150
|
self.graph_store.add_node(
|
|
@@ -175,18 +152,22 @@ class RedundancyHandler:
|
|
|
175
152
|
)
|
|
176
153
|
|
|
177
154
|
for edge in all_edges:
|
|
178
|
-
new_from =
|
|
179
|
-
|
|
155
|
+
new_from = (
|
|
156
|
+
merged.id if edge["from"] in (redundant_a.id, redundant_b.id) else edge["from"]
|
|
157
|
+
)
|
|
158
|
+
new_to = merged.id if edge["to"] in (redundant_a.id, redundant_b.id) else edge["to"]
|
|
180
159
|
if new_from == new_to:
|
|
181
160
|
continue
|
|
182
161
|
# Check if the edge already exists before adding
|
|
183
162
|
if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
|
|
184
163
|
self.graph_store.add_edge(new_from, new_to, edge["type"])
|
|
185
164
|
|
|
186
|
-
self.graph_store.
|
|
187
|
-
self.graph_store.
|
|
165
|
+
self.graph_store.update_node(redundant_a.id, {"status": "archived"})
|
|
166
|
+
self.graph_store.update_node(redundant_b.id, {"status": "archived"})
|
|
167
|
+
self.graph_store.add_edge(redundant_a.id, merged.id, type="MERGED_TO")
|
|
168
|
+
self.graph_store.add_edge(redundant_b.id, merged.id, type="MERGED_TO")
|
|
188
169
|
logger.debug(
|
|
189
|
-
f"
|
|
170
|
+
f"Archive {redundant_a.id} and {redundant_b.id}, and inherit their edges to {merged.id}."
|
|
190
171
|
)
|
|
191
172
|
|
|
192
173
|
def _merge_metadata(
|
|
@@ -54,20 +54,26 @@ class RelationAndReasoningDetector:
|
|
|
54
54
|
)
|
|
55
55
|
nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
|
|
56
56
|
|
|
57
|
+
"""
|
|
57
58
|
# 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
|
|
58
59
|
pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
|
|
59
60
|
results["relations"].extend(pairwise["relations"])
|
|
61
|
+
"""
|
|
60
62
|
|
|
63
|
+
"""
|
|
61
64
|
# 2) Inferred nodes (from causal/condition)
|
|
62
65
|
inferred = self._infer_fact_nodes_from_relations(pairwise)
|
|
63
66
|
results["inferred_nodes"].extend(inferred)
|
|
67
|
+
"""
|
|
64
68
|
|
|
65
|
-
|
|
69
|
+
"""
|
|
70
|
+
3) Sequence (optional, if you have timestamps)
|
|
66
71
|
seq = self._detect_sequence_links(node, nearest)
|
|
67
72
|
results["sequence_links"].extend(seq)
|
|
73
|
+
"""
|
|
68
74
|
|
|
69
75
|
# 4) Aggregate
|
|
70
|
-
agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=
|
|
76
|
+
agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=5)
|
|
71
77
|
if agg:
|
|
72
78
|
results["aggregate_nodes"].append(agg)
|
|
73
79
|
|
|
@@ -80,7 +86,7 @@ class RelationAndReasoningDetector:
|
|
|
80
86
|
Vector/tag search ➜ For each candidate, use LLM to decide:
|
|
81
87
|
- CAUSE
|
|
82
88
|
- CONDITION
|
|
83
|
-
-
|
|
89
|
+
- RELATE
|
|
84
90
|
- CONFLICT
|
|
85
91
|
"""
|
|
86
92
|
results = {"relations": []}
|
|
@@ -168,7 +174,7 @@ class RelationAndReasoningDetector:
|
|
|
168
174
|
combined_nodes = [node, *nearest_nodes]
|
|
169
175
|
|
|
170
176
|
joined = "\n".join(f"- {n.memory}" for n in combined_nodes)
|
|
171
|
-
prompt = AGGREGATE_PROMPT.
|
|
177
|
+
prompt = AGGREGATE_PROMPT.replace("{joined}", joined)
|
|
172
178
|
response_text = self._call_llm(prompt)
|
|
173
179
|
response_json = self._parse_json_result(response_text)
|
|
174
180
|
if not response_json:
|
|
@@ -205,14 +211,6 @@ class RelationAndReasoningDetector:
|
|
|
205
211
|
logger.warning(f"[LLM Error] {e}")
|
|
206
212
|
return ""
|
|
207
213
|
|
|
208
|
-
def _parse_relation_result(self, response_text: str) -> str:
|
|
209
|
-
relation = response_text.strip().upper()
|
|
210
|
-
valid = {"CAUSE", "CONDITION", "RELATE_TO", "CONFLICT", "NONE"}
|
|
211
|
-
if relation not in valid:
|
|
212
|
-
logger.warning(f"[RelationDetector] Unexpected relation: {relation}. Fallback NONE.")
|
|
213
|
-
return "NONE"
|
|
214
|
-
return relation
|
|
215
|
-
|
|
216
214
|
def _parse_json_result(self, response_text):
|
|
217
215
|
try:
|
|
218
216
|
response_text = response_text.replace("```", "").replace("json", "")
|
|
@@ -226,7 +224,7 @@ class RelationAndReasoningDetector:
|
|
|
226
224
|
Normalize and validate the LLM relation type output.
|
|
227
225
|
"""
|
|
228
226
|
relation = response_text.strip().upper()
|
|
229
|
-
valid = {"CAUSE", "CONDITION", "
|
|
227
|
+
valid = {"CAUSE", "CONDITION", "RELATE", "CONFLICT", "NONE"}
|
|
230
228
|
if relation not in valid:
|
|
231
229
|
logger.warning(
|
|
232
230
|
f"[RelationDetector] Unexpected relation type: {relation}. Fallback to NONE."
|
|
@@ -3,15 +3,14 @@ import threading
|
|
|
3
3
|
import time
|
|
4
4
|
import traceback
|
|
5
5
|
|
|
6
|
+
from collections import Counter, defaultdict
|
|
6
7
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
8
|
from queue import PriorityQueue
|
|
8
9
|
from typing import Literal
|
|
9
10
|
|
|
10
11
|
import numpy as np
|
|
11
|
-
import schedule
|
|
12
|
-
|
|
13
|
-
from sklearn.cluster import MiniBatchKMeans
|
|
14
12
|
|
|
13
|
+
from memos.dependency import require_python_package
|
|
15
14
|
from memos.embedders.factory import OllamaEmbedder
|
|
16
15
|
from memos.graph_dbs.item import GraphDBEdge, GraphDBNode
|
|
17
16
|
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
@@ -32,7 +31,7 @@ logger = get_logger(__name__)
|
|
|
32
31
|
class QueueMessage:
|
|
33
32
|
def __init__(
|
|
34
33
|
self,
|
|
35
|
-
op: Literal["add", "remove", "merge", "update"],
|
|
34
|
+
op: Literal["add", "remove", "merge", "update", "end"],
|
|
36
35
|
# `str` for node and edge IDs, `GraphDBNode` and `GraphDBEdge` for actual objects
|
|
37
36
|
before_node: list[str] | list[GraphDBNode] | None = None,
|
|
38
37
|
before_edge: list[str] | list[GraphDBEdge] | None = None,
|
|
@@ -49,7 +48,7 @@ class QueueMessage:
|
|
|
49
48
|
return f"QueueMessage(op={self.op}, before_node={self.before_node if self.before_node is None else len(self.before_node)}, after_node={self.after_node if self.after_node is None else len(self.after_node)})"
|
|
50
49
|
|
|
51
50
|
def __lt__(self, other: "QueueMessage") -> bool:
|
|
52
|
-
op_priority = {"add": 2, "remove": 2, "merge": 1}
|
|
51
|
+
op_priority = {"add": 2, "remove": 2, "merge": 1, "end": 0}
|
|
53
52
|
return op_priority[self.op] < op_priority[other.op]
|
|
54
53
|
|
|
55
54
|
|
|
@@ -104,7 +103,7 @@ class GraphStructureReorganizer:
|
|
|
104
103
|
def _run_message_consumer_loop(self):
|
|
105
104
|
while True:
|
|
106
105
|
message = self.queue.get()
|
|
107
|
-
if message
|
|
106
|
+
if message.op == "end":
|
|
108
107
|
break
|
|
109
108
|
|
|
110
109
|
try:
|
|
@@ -114,11 +113,18 @@ class GraphStructureReorganizer:
|
|
|
114
113
|
logger.error(traceback.format_exc())
|
|
115
114
|
self.queue.task_done()
|
|
116
115
|
|
|
116
|
+
@require_python_package(
|
|
117
|
+
import_name="schedule",
|
|
118
|
+
install_command="pip install schedule",
|
|
119
|
+
install_link="https://schedule.readthedocs.io/en/stable/installation.html",
|
|
120
|
+
)
|
|
117
121
|
def _run_structure_organizer_loop(self):
|
|
118
122
|
"""
|
|
119
123
|
Use schedule library to periodically trigger structure optimization.
|
|
120
124
|
This runs until the stop flag is set.
|
|
121
125
|
"""
|
|
126
|
+
import schedule
|
|
127
|
+
|
|
122
128
|
schedule.every(20).seconds.do(self.optimize_structure, scope="LongTermMemory")
|
|
123
129
|
schedule.every(20).seconds.do(self.optimize_structure, scope="UserMemory")
|
|
124
130
|
|
|
@@ -134,7 +140,7 @@ class GraphStructureReorganizer:
|
|
|
134
140
|
if not self.is_reorganize:
|
|
135
141
|
return
|
|
136
142
|
|
|
137
|
-
self.add_message(
|
|
143
|
+
self.add_message(QueueMessage(op="end"))
|
|
138
144
|
self.thread.join()
|
|
139
145
|
logger.info("Reorganize thread stopped.")
|
|
140
146
|
self._stop_scheduler = True
|
|
@@ -152,9 +158,6 @@ class GraphStructureReorganizer:
|
|
|
152
158
|
|
|
153
159
|
def handle_add(self, message: QueueMessage):
|
|
154
160
|
logger.debug(f"Handling add operation: {str(message)[:500]}")
|
|
155
|
-
assert message.before_node is None and message.before_edge is None, (
|
|
156
|
-
"Before node and edge should be None for `add` operation."
|
|
157
|
-
)
|
|
158
161
|
# ———————— 1. check for conflicts ————————
|
|
159
162
|
added_node = message.after_node[0]
|
|
160
163
|
conflicts = self.conflict.detect(added_node, scope=added_node.metadata.memory_type)
|
|
@@ -164,9 +167,9 @@ class GraphStructureReorganizer:
|
|
|
164
167
|
logger.info(f"Resolved conflict between {added_node.id} and {existing_node.id}.")
|
|
165
168
|
|
|
166
169
|
# ———————— 2. check for redundancy ————————
|
|
167
|
-
|
|
168
|
-
if
|
|
169
|
-
for added_node, existing_node in
|
|
170
|
+
redundancies = self.redundancy.detect(added_node, scope=added_node.metadata.memory_type)
|
|
171
|
+
if redundancies:
|
|
172
|
+
for added_node, existing_node in redundancies:
|
|
170
173
|
self.redundancy.resolve_two_nodes(added_node, existing_node)
|
|
171
174
|
logger.info(f"Resolved redundancy between {added_node.id} and {existing_node.id}.")
|
|
172
175
|
|
|
@@ -176,14 +179,14 @@ class GraphStructureReorganizer:
|
|
|
176
179
|
def handle_merge(self, message: QueueMessage):
|
|
177
180
|
after_node = message.after_node[0]
|
|
178
181
|
logger.debug(f"Handling merge operation: <{after_node.memory}>")
|
|
179
|
-
self.
|
|
182
|
+
self.redundancy.resolve_one_node(after_node)
|
|
180
183
|
|
|
181
184
|
def optimize_structure(
|
|
182
185
|
self,
|
|
183
186
|
scope: str = "LongTermMemory",
|
|
184
187
|
local_tree_threshold: int = 10,
|
|
185
188
|
min_cluster_size: int = 3,
|
|
186
|
-
min_group_size: int =
|
|
189
|
+
min_group_size: int = 5,
|
|
187
190
|
):
|
|
188
191
|
"""
|
|
189
192
|
Periodically reorganize the graph:
|
|
@@ -358,7 +361,7 @@ class GraphStructureReorganizer:
|
|
|
358
361
|
scene_lines.append(line)
|
|
359
362
|
|
|
360
363
|
joined_scene = "\n".join(scene_lines)
|
|
361
|
-
prompt = LOCAL_SUBCLUSTER_PROMPT.
|
|
364
|
+
prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
|
|
362
365
|
|
|
363
366
|
messages = [{"role": "user", "content": prompt}]
|
|
364
367
|
response_text = self.llm.generate(messages)
|
|
@@ -378,9 +381,12 @@ class GraphStructureReorganizer:
|
|
|
378
381
|
|
|
379
382
|
return result_subclusters
|
|
380
383
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
384
|
+
@require_python_package(
|
|
385
|
+
import_name="sklearn",
|
|
386
|
+
install_command="pip install scikit-learn",
|
|
387
|
+
install_link="https://scikit-learn.org/stable/install.html",
|
|
388
|
+
)
|
|
389
|
+
def _partition(self, nodes, min_cluster_size: int = 3, max_cluster_size: int = 20):
|
|
384
390
|
"""
|
|
385
391
|
Partition nodes by:
|
|
386
392
|
1) Frequent tags (top N & above threshold)
|
|
@@ -394,7 +400,7 @@ class GraphStructureReorganizer:
|
|
|
394
400
|
Returns:
|
|
395
401
|
List of clusters, each as a list of GraphDBNode
|
|
396
402
|
"""
|
|
397
|
-
from
|
|
403
|
+
from sklearn.cluster import MiniBatchKMeans
|
|
398
404
|
|
|
399
405
|
# 1) Count all tags
|
|
400
406
|
tag_counter = Counter()
|
|
@@ -407,7 +413,7 @@ class GraphStructureReorganizer:
|
|
|
407
413
|
threshold_tags = {tag for tag, count in tag_counter.items() if count >= 50}
|
|
408
414
|
frequent_tags = top_n_tags | threshold_tags
|
|
409
415
|
|
|
410
|
-
# Group nodes by tags
|
|
416
|
+
# Group nodes by tags
|
|
411
417
|
tag_groups = defaultdict(list)
|
|
412
418
|
|
|
413
419
|
for node in nodes:
|
|
@@ -420,48 +426,67 @@ class GraphStructureReorganizer:
|
|
|
420
426
|
assigned_ids = set()
|
|
421
427
|
for tag, group in tag_groups.items():
|
|
422
428
|
if len(group) >= min_cluster_size:
|
|
423
|
-
|
|
424
|
-
|
|
429
|
+
# Split large groups into chunks of at most max_cluster_size
|
|
430
|
+
for i in range(0, len(group), max_cluster_size):
|
|
431
|
+
sub_group = group[i : i + max_cluster_size]
|
|
432
|
+
filtered_tag_clusters.append(sub_group)
|
|
433
|
+
assigned_ids.update(n.id for n in sub_group)
|
|
425
434
|
else:
|
|
426
|
-
logger.info(f"... dropped {tag} ...")
|
|
435
|
+
logger.info(f"... dropped tag {tag} due to low size ...")
|
|
427
436
|
|
|
428
437
|
logger.info(
|
|
429
438
|
f"[MixedPartition] Created {len(filtered_tag_clusters)} clusters from tags. "
|
|
430
439
|
f"Nodes grouped by tags: {len(assigned_ids)} / {len(nodes)}"
|
|
431
440
|
)
|
|
432
441
|
|
|
433
|
-
#
|
|
442
|
+
# Remaining nodes -> embedding clustering
|
|
434
443
|
remaining_nodes = [n for n in nodes if n.id not in assigned_ids]
|
|
435
444
|
logger.info(
|
|
436
445
|
f"[MixedPartition] Remaining nodes for embedding clustering: {len(remaining_nodes)}"
|
|
437
446
|
)
|
|
438
447
|
|
|
439
448
|
embedding_clusters = []
|
|
440
|
-
if remaining_nodes:
|
|
441
|
-
x = np.array([n.metadata.embedding for n in remaining_nodes if n.metadata.embedding])
|
|
442
|
-
k = max(1, min(len(remaining_nodes) // min_cluster_size, 20))
|
|
443
|
-
if len(x) < k:
|
|
444
|
-
k = len(x)
|
|
445
449
|
|
|
446
|
-
|
|
450
|
+
def recursive_clustering(nodes_list):
|
|
451
|
+
"""Recursively split clusters until each is <= max_cluster_size."""
|
|
452
|
+
if len(nodes_list) <= max_cluster_size:
|
|
453
|
+
return [nodes_list]
|
|
454
|
+
|
|
455
|
+
# Try kmeans with k = ceil(len(nodes) / max_cluster_size)
|
|
456
|
+
x = np.array([n.metadata.embedding for n in nodes_list if n.metadata.embedding])
|
|
457
|
+
if len(x) < 2:
|
|
458
|
+
return [nodes_list]
|
|
459
|
+
|
|
460
|
+
k = min(len(x), (len(nodes_list) + max_cluster_size - 1) // max_cluster_size)
|
|
461
|
+
k = max(1, min(k, len(x)))
|
|
462
|
+
|
|
463
|
+
try:
|
|
447
464
|
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=256, random_state=42)
|
|
448
465
|
labels = kmeans.fit_predict(x)
|
|
449
466
|
|
|
450
467
|
label_groups = defaultdict(list)
|
|
451
|
-
for node, label in zip(
|
|
468
|
+
for node, label in zip(nodes_list, labels, strict=False):
|
|
452
469
|
label_groups[label].append(node)
|
|
453
470
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
471
|
+
result = []
|
|
472
|
+
for sub_group in label_groups.values():
|
|
473
|
+
result.extend(recursive_clustering(sub_group))
|
|
474
|
+
return result
|
|
475
|
+
except Exception as e:
|
|
476
|
+
logger.warning(f"Clustering failed: {e}, falling back to single cluster.")
|
|
477
|
+
return [nodes_list]
|
|
478
|
+
|
|
479
|
+
if remaining_nodes:
|
|
480
|
+
clusters = recursive_clustering(remaining_nodes)
|
|
481
|
+
embedding_clusters.extend(clusters)
|
|
482
|
+
logger.info(
|
|
483
|
+
f"[MixedPartition] Created {len(embedding_clusters)} clusters from embeddings."
|
|
484
|
+
)
|
|
460
485
|
|
|
461
|
-
# Merge all
|
|
486
|
+
# Merge all clusters
|
|
462
487
|
all_clusters = filtered_tag_clusters + embedding_clusters
|
|
463
488
|
|
|
464
|
-
#
|
|
489
|
+
# Handle small clusters (< min_cluster_size)
|
|
465
490
|
final_clusters = []
|
|
466
491
|
small_nodes = []
|
|
467
492
|
for group in all_clusters:
|
|
@@ -484,18 +509,15 @@ class GraphStructureReorganizer:
|
|
|
484
509
|
if not cluster_nodes:
|
|
485
510
|
raise ValueError("Cluster nodes cannot be empty.")
|
|
486
511
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
512
|
+
memories_items_text = "\n\n".join(
|
|
513
|
+
[
|
|
514
|
+
f"{i}. key: {n.metadata.key}\nvalue: {n.memory}\nsummary:{n.metadata.background}"
|
|
515
|
+
for i, n in enumerate(cluster_nodes)
|
|
516
|
+
]
|
|
491
517
|
)
|
|
492
518
|
|
|
493
519
|
# Build prompt
|
|
494
|
-
prompt = REORGANIZE_PROMPT.
|
|
495
|
-
joined_keys=joined_keys,
|
|
496
|
-
joined_values=joined_values,
|
|
497
|
-
joined_backgrounds=joined_backgrounds,
|
|
498
|
-
)
|
|
520
|
+
prompt = REORGANIZE_PROMPT.replace("{memory_items_text}", memories_items_text)
|
|
499
521
|
|
|
500
522
|
messages = [{"role": "user", "content": prompt}]
|
|
501
523
|
response_text = self.llm.generate(messages)
|
|
@@ -505,7 +527,7 @@ class GraphStructureReorganizer:
|
|
|
505
527
|
parent_key = response_json.get("key", "").strip()
|
|
506
528
|
parent_value = response_json.get("value", "").strip()
|
|
507
529
|
parent_tags = response_json.get("tags", [])
|
|
508
|
-
parent_background = response_json.get("
|
|
530
|
+
parent_background = response_json.get("summary", "").strip()
|
|
509
531
|
|
|
510
532
|
embedding = self.embedder.embed([parent_value])[0]
|
|
511
533
|
|
|
@@ -561,7 +583,7 @@ class GraphStructureReorganizer:
|
|
|
561
583
|
|
|
562
584
|
def _preprocess_message(self, message: QueueMessage) -> bool:
|
|
563
585
|
message = self._convert_id_to_node(message)
|
|
564
|
-
if None in message.after_node:
|
|
586
|
+
if message.after_node is None or None in message.after_node:
|
|
565
587
|
logger.debug(
|
|
566
588
|
f"Found non-existent node in after_node in message: {message}, skip this message."
|
|
567
589
|
)
|
|
@@ -56,7 +56,6 @@ class GraphMemoryRetriever:
|
|
|
56
56
|
# Step 3: Merge and deduplicate results
|
|
57
57
|
combined = {item.id: item for item in graph_results + vector_results}
|
|
58
58
|
|
|
59
|
-
# Debug: 打印在 graph_results 中但不在 combined 中的 id
|
|
60
59
|
graph_ids = {item.id for item in graph_results}
|
|
61
60
|
combined_ids = set(combined.keys())
|
|
62
61
|
lost_ids = graph_ids - combined_ids
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
3
|
from memos.embedders.factory import OllamaEmbedder
|
|
4
|
-
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
4
|
+
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
5
5
|
from memos.memories.textual.item import TextualMemoryItem
|
|
6
6
|
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
7
|
|
|
@@ -41,7 +41,7 @@ class MemoryReranker:
|
|
|
41
41
|
Rank retrieved memory cards by structural priority and contextual similarity.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
|
|
44
|
+
def __init__(self, llm: OpenAILLM | OllamaLLM | AzureLLM, embedder: OllamaEmbedder):
|
|
45
45
|
self.llm = llm
|
|
46
46
|
self.embedder = embedder
|
|
47
47
|
|