MemoryOS 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/METADATA +67 -26
- memoryos-0.2.2.dist-info/RECORD +169 -0
- memoryos-0.2.2.dist-info/entry_points.txt +3 -0
- memos/__init__.py +1 -1
- memos/api/config.py +562 -0
- memos/api/context/context.py +147 -0
- memos/api/context/dependencies.py +90 -0
- memos/api/exceptions.py +28 -0
- memos/api/mcp_serve.py +502 -0
- memos/api/product_api.py +35 -0
- memos/api/product_models.py +163 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +386 -0
- memos/chunkers/sentence_chunker.py +8 -2
- memos/cli.py +113 -0
- memos/configs/embedder.py +27 -0
- memos/configs/graph_db.py +132 -3
- memos/configs/internet_retriever.py +6 -0
- memos/configs/llm.py +47 -0
- memos/configs/mem_cube.py +1 -1
- memos/configs/mem_os.py +5 -0
- memos/configs/mem_reader.py +9 -0
- memos/configs/mem_scheduler.py +107 -7
- memos/configs/mem_user.py +58 -0
- memos/configs/memory.py +5 -4
- memos/dependency.py +52 -0
- memos/embedders/ark.py +92 -0
- memos/embedders/factory.py +4 -0
- memos/embedders/sentence_transformer.py +8 -2
- memos/embedders/universal_api.py +32 -0
- memos/graph_dbs/base.py +11 -3
- memos/graph_dbs/factory.py +4 -0
- memos/graph_dbs/nebular.py +1364 -0
- memos/graph_dbs/neo4j.py +333 -124
- memos/graph_dbs/neo4j_community.py +300 -0
- memos/llms/base.py +9 -0
- memos/llms/deepseek.py +54 -0
- memos/llms/factory.py +10 -1
- memos/llms/hf.py +170 -13
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +4 -0
- memos/llms/openai.py +67 -1
- memos/llms/qwen.py +63 -0
- memos/llms/vllm.py +153 -0
- memos/log.py +1 -1
- memos/mem_cube/general.py +77 -16
- memos/mem_cube/utils.py +109 -0
- memos/mem_os/core.py +251 -51
- memos/mem_os/main.py +94 -12
- memos/mem_os/product.py +1220 -43
- memos/mem_os/utils/default_config.py +352 -0
- memos/mem_os/utils/format_utils.py +1401 -0
- memos/mem_reader/simple_struct.py +18 -10
- memos/mem_scheduler/base_scheduler.py +441 -40
- memos/mem_scheduler/general_scheduler.py +249 -248
- memos/mem_scheduler/modules/base.py +14 -5
- memos/mem_scheduler/modules/dispatcher.py +67 -4
- memos/mem_scheduler/modules/misc.py +104 -0
- memos/mem_scheduler/modules/monitor.py +240 -50
- memos/mem_scheduler/modules/rabbitmq_service.py +319 -0
- memos/mem_scheduler/modules/redis_service.py +32 -22
- memos/mem_scheduler/modules/retriever.py +167 -23
- memos/mem_scheduler/modules/scheduler_logger.py +255 -0
- memos/mem_scheduler/mos_for_test_scheduler.py +140 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/general_schemas.py +43 -0
- memos/mem_scheduler/{modules/schemas.py → schemas/message_schemas.py} +63 -61
- memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/misc_utils.py +61 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +500 -0
- memos/mem_user/persistent_factory.py +96 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/user_manager.py +4 -4
- memos/memories/activation/item.py +29 -0
- memos/memories/activation/kv.py +10 -3
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/factory.py +2 -0
- memos/memories/textual/base.py +1 -1
- memos/memories/textual/general.py +43 -97
- memos/memories/textual/item.py +5 -33
- memos/memories/textual/tree.py +22 -12
- memos/memories/textual/tree_text_memory/organize/conflict.py +9 -5
- memos/memories/textual/tree_text_memory/organize/manager.py +26 -18
- memos/memories/textual/tree_text_memory/organize/redundancy.py +25 -44
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +50 -48
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +81 -56
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +52 -28
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
- memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
- memos/memos_tools/dinding_report_bot.py +422 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +96 -0
- memos/parsers/markitdown.py +8 -2
- memos/settings.py +3 -1
- memos/templates/mem_reader_prompts.py +66 -23
- memos/templates/mem_scheduler_prompts.py +126 -43
- memos/templates/mos_prompts.py +87 -0
- memos/templates/tree_reorganize_prompts.py +85 -30
- memos/vec_dbs/base.py +12 -0
- memos/vec_dbs/qdrant.py +46 -20
- memoryos-0.2.0.dist-info/RECORD +0 -128
- memos/mem_scheduler/utils.py +0 -26
- {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
- {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import traceback
|
|
2
3
|
|
|
3
4
|
from memos.embedders.factory import OllamaEmbedder
|
|
4
5
|
from memos.graph_dbs.item import GraphDBNode
|
|
@@ -30,47 +31,57 @@ class RelationAndReasoningDetector:
|
|
|
30
31
|
3) Sequence links
|
|
31
32
|
4) Aggregate concepts
|
|
32
33
|
"""
|
|
33
|
-
if node.metadata.type == "reasoning":
|
|
34
|
-
logger.info(f"Skip reasoning for inferred node {node.id}")
|
|
35
|
-
return {
|
|
36
|
-
"relations": [],
|
|
37
|
-
"inferred_nodes": [],
|
|
38
|
-
"sequence_links": [],
|
|
39
|
-
"aggregate_nodes": [],
|
|
40
|
-
}
|
|
41
|
-
|
|
42
34
|
results = {
|
|
43
35
|
"relations": [],
|
|
44
36
|
"inferred_nodes": [],
|
|
45
37
|
"sequence_links": [],
|
|
46
38
|
"aggregate_nodes": [],
|
|
47
39
|
}
|
|
40
|
+
try:
|
|
41
|
+
if node.metadata.type == "reasoning":
|
|
42
|
+
logger.info(f"Skip reasoning for inferred node {node.id}")
|
|
43
|
+
return {
|
|
44
|
+
"relations": [],
|
|
45
|
+
"inferred_nodes": [],
|
|
46
|
+
"sequence_links": [],
|
|
47
|
+
"aggregate_nodes": [],
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
nearest = self.graph_store.get_neighbors_by_tag(
|
|
51
|
+
tags=node.metadata.tags,
|
|
52
|
+
exclude_ids=exclude_ids,
|
|
53
|
+
top_k=top_k,
|
|
54
|
+
min_overlap=2,
|
|
55
|
+
)
|
|
56
|
+
nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
|
|
57
|
+
|
|
58
|
+
"""
|
|
59
|
+
# 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
|
|
60
|
+
pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
|
|
61
|
+
results["relations"].extend(pairwise["relations"])
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
# 2) Inferred nodes (from causal/condition)
|
|
66
|
+
inferred = self._infer_fact_nodes_from_relations(pairwise)
|
|
67
|
+
results["inferred_nodes"].extend(inferred)
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
3) Sequence (optional, if you have timestamps)
|
|
72
|
+
seq = self._detect_sequence_links(node, nearest)
|
|
73
|
+
results["sequence_links"].extend(seq)
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
# 4) Aggregate
|
|
77
|
+
agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=5)
|
|
78
|
+
if agg:
|
|
79
|
+
results["aggregate_nodes"].append(agg)
|
|
48
80
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
min_overlap=2,
|
|
54
|
-
)
|
|
55
|
-
nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
|
|
56
|
-
|
|
57
|
-
# 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
|
|
58
|
-
pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
|
|
59
|
-
results["relations"].extend(pairwise["relations"])
|
|
60
|
-
|
|
61
|
-
# 2) Inferred nodes (from causal/condition)
|
|
62
|
-
inferred = self._infer_fact_nodes_from_relations(pairwise)
|
|
63
|
-
results["inferred_nodes"].extend(inferred)
|
|
64
|
-
|
|
65
|
-
# 3) Sequence (optional, if you have timestamps)
|
|
66
|
-
seq = self._detect_sequence_links(node, nearest)
|
|
67
|
-
results["sequence_links"].extend(seq)
|
|
68
|
-
|
|
69
|
-
# 4) Aggregate
|
|
70
|
-
agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=3)
|
|
71
|
-
if agg:
|
|
72
|
-
results["aggregate_nodes"].append(agg)
|
|
73
|
-
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(
|
|
83
|
+
f"Error {e} while process struct reorganize: trace: {traceback.format_exc()}"
|
|
84
|
+
)
|
|
74
85
|
return results
|
|
75
86
|
|
|
76
87
|
def _detect_pairwise_causal_condition_relations(
|
|
@@ -80,7 +91,7 @@ class RelationAndReasoningDetector:
|
|
|
80
91
|
Vector/tag search ➜ For each candidate, use LLM to decide:
|
|
81
92
|
- CAUSE
|
|
82
93
|
- CONDITION
|
|
83
|
-
-
|
|
94
|
+
- RELATE
|
|
84
95
|
- CONFLICT
|
|
85
96
|
"""
|
|
86
97
|
results = {"relations": []}
|
|
@@ -168,12 +179,11 @@ class RelationAndReasoningDetector:
|
|
|
168
179
|
combined_nodes = [node, *nearest_nodes]
|
|
169
180
|
|
|
170
181
|
joined = "\n".join(f"- {n.memory}" for n in combined_nodes)
|
|
171
|
-
prompt = AGGREGATE_PROMPT.
|
|
182
|
+
prompt = AGGREGATE_PROMPT.replace("{joined}", joined)
|
|
172
183
|
response_text = self._call_llm(prompt)
|
|
173
|
-
|
|
174
|
-
if not
|
|
184
|
+
summary = self._parse_json_result(response_text)
|
|
185
|
+
if not summary:
|
|
175
186
|
return None
|
|
176
|
-
summary = json.loads(response_text)
|
|
177
187
|
embedding = self.embedder.embed([summary["value"]])[0]
|
|
178
188
|
|
|
179
189
|
parent_node = GraphDBNode(
|
|
@@ -205,14 +215,6 @@ class RelationAndReasoningDetector:
|
|
|
205
215
|
logger.warning(f"[LLM Error] {e}")
|
|
206
216
|
return ""
|
|
207
217
|
|
|
208
|
-
def _parse_relation_result(self, response_text: str) -> str:
|
|
209
|
-
relation = response_text.strip().upper()
|
|
210
|
-
valid = {"CAUSE", "CONDITION", "RELATE_TO", "CONFLICT", "NONE"}
|
|
211
|
-
if relation not in valid:
|
|
212
|
-
logger.warning(f"[RelationDetector] Unexpected relation: {relation}. Fallback NONE.")
|
|
213
|
-
return "NONE"
|
|
214
|
-
return relation
|
|
215
|
-
|
|
216
218
|
def _parse_json_result(self, response_text):
|
|
217
219
|
try:
|
|
218
220
|
response_text = response_text.replace("```", "").replace("json", "")
|
|
@@ -226,7 +228,7 @@ class RelationAndReasoningDetector:
|
|
|
226
228
|
Normalize and validate the LLM relation type output.
|
|
227
229
|
"""
|
|
228
230
|
relation = response_text.strip().upper()
|
|
229
|
-
valid = {"CAUSE", "CONDITION", "
|
|
231
|
+
valid = {"CAUSE", "CONDITION", "RELATE", "CONFLICT", "NONE"}
|
|
230
232
|
if relation not in valid:
|
|
231
233
|
logger.warning(
|
|
232
234
|
f"[RelationDetector] Unexpected relation type: {relation}. Fallback to NONE."
|
|
@@ -3,15 +3,14 @@ import threading
|
|
|
3
3
|
import time
|
|
4
4
|
import traceback
|
|
5
5
|
|
|
6
|
+
from collections import Counter, defaultdict
|
|
6
7
|
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
7
8
|
from queue import PriorityQueue
|
|
8
9
|
from typing import Literal
|
|
9
10
|
|
|
10
11
|
import numpy as np
|
|
11
|
-
import schedule
|
|
12
|
-
|
|
13
|
-
from sklearn.cluster import MiniBatchKMeans
|
|
14
12
|
|
|
13
|
+
from memos.dependency import require_python_package
|
|
15
14
|
from memos.embedders.factory import OllamaEmbedder
|
|
16
15
|
from memos.graph_dbs.item import GraphDBEdge, GraphDBNode
|
|
17
16
|
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
@@ -32,7 +31,7 @@ logger = get_logger(__name__)
|
|
|
32
31
|
class QueueMessage:
|
|
33
32
|
def __init__(
|
|
34
33
|
self,
|
|
35
|
-
op: Literal["add", "remove", "merge", "update"],
|
|
34
|
+
op: Literal["add", "remove", "merge", "update", "end"],
|
|
36
35
|
# `str` for node and edge IDs, `GraphDBNode` and `GraphDBEdge` for actual objects
|
|
37
36
|
before_node: list[str] | list[GraphDBNode] | None = None,
|
|
38
37
|
before_edge: list[str] | list[GraphDBEdge] | None = None,
|
|
@@ -49,7 +48,7 @@ class QueueMessage:
|
|
|
49
48
|
return f"QueueMessage(op={self.op}, before_node={self.before_node if self.before_node is None else len(self.before_node)}, after_node={self.after_node if self.after_node is None else len(self.after_node)})"
|
|
50
49
|
|
|
51
50
|
def __lt__(self, other: "QueueMessage") -> bool:
|
|
52
|
-
op_priority = {"add": 2, "remove": 2, "merge": 1}
|
|
51
|
+
op_priority = {"add": 2, "remove": 2, "merge": 1, "end": 0}
|
|
53
52
|
return op_priority[self.op] < op_priority[other.op]
|
|
54
53
|
|
|
55
54
|
|
|
@@ -104,7 +103,7 @@ class GraphStructureReorganizer:
|
|
|
104
103
|
def _run_message_consumer_loop(self):
|
|
105
104
|
while True:
|
|
106
105
|
message = self.queue.get()
|
|
107
|
-
if message
|
|
106
|
+
if message.op == "end":
|
|
108
107
|
break
|
|
109
108
|
|
|
110
109
|
try:
|
|
@@ -114,13 +113,20 @@ class GraphStructureReorganizer:
|
|
|
114
113
|
logger.error(traceback.format_exc())
|
|
115
114
|
self.queue.task_done()
|
|
116
115
|
|
|
116
|
+
@require_python_package(
|
|
117
|
+
import_name="schedule",
|
|
118
|
+
install_command="pip install schedule",
|
|
119
|
+
install_link="https://schedule.readthedocs.io/en/stable/installation.html",
|
|
120
|
+
)
|
|
117
121
|
def _run_structure_organizer_loop(self):
|
|
118
122
|
"""
|
|
119
123
|
Use schedule library to periodically trigger structure optimization.
|
|
120
124
|
This runs until the stop flag is set.
|
|
121
125
|
"""
|
|
122
|
-
schedule
|
|
123
|
-
|
|
126
|
+
import schedule
|
|
127
|
+
|
|
128
|
+
schedule.every(600).seconds.do(self.optimize_structure, scope="LongTermMemory")
|
|
129
|
+
schedule.every(600).seconds.do(self.optimize_structure, scope="UserMemory")
|
|
124
130
|
|
|
125
131
|
logger.info("Structure optimizer schedule started.")
|
|
126
132
|
while not getattr(self, "_stop_scheduler", False):
|
|
@@ -134,7 +140,7 @@ class GraphStructureReorganizer:
|
|
|
134
140
|
if not self.is_reorganize:
|
|
135
141
|
return
|
|
136
142
|
|
|
137
|
-
self.add_message(
|
|
143
|
+
self.add_message(QueueMessage(op="end"))
|
|
138
144
|
self.thread.join()
|
|
139
145
|
logger.info("Reorganize thread stopped.")
|
|
140
146
|
self._stop_scheduler = True
|
|
@@ -152,9 +158,6 @@ class GraphStructureReorganizer:
|
|
|
152
158
|
|
|
153
159
|
def handle_add(self, message: QueueMessage):
|
|
154
160
|
logger.debug(f"Handling add operation: {str(message)[:500]}")
|
|
155
|
-
assert message.before_node is None and message.before_edge is None, (
|
|
156
|
-
"Before node and edge should be None for `add` operation."
|
|
157
|
-
)
|
|
158
161
|
# ———————— 1. check for conflicts ————————
|
|
159
162
|
added_node = message.after_node[0]
|
|
160
163
|
conflicts = self.conflict.detect(added_node, scope=added_node.metadata.memory_type)
|
|
@@ -164,9 +167,9 @@ class GraphStructureReorganizer:
|
|
|
164
167
|
logger.info(f"Resolved conflict between {added_node.id} and {existing_node.id}.")
|
|
165
168
|
|
|
166
169
|
# ———————— 2. check for redundancy ————————
|
|
167
|
-
|
|
168
|
-
if
|
|
169
|
-
for added_node, existing_node in
|
|
170
|
+
redundancies = self.redundancy.detect(added_node, scope=added_node.metadata.memory_type)
|
|
171
|
+
if redundancies:
|
|
172
|
+
for added_node, existing_node in redundancies:
|
|
170
173
|
self.redundancy.resolve_two_nodes(added_node, existing_node)
|
|
171
174
|
logger.info(f"Resolved redundancy between {added_node.id} and {existing_node.id}.")
|
|
172
175
|
|
|
@@ -176,14 +179,14 @@ class GraphStructureReorganizer:
|
|
|
176
179
|
def handle_merge(self, message: QueueMessage):
|
|
177
180
|
after_node = message.after_node[0]
|
|
178
181
|
logger.debug(f"Handling merge operation: <{after_node.memory}>")
|
|
179
|
-
self.
|
|
182
|
+
self.redundancy.resolve_one_node(after_node)
|
|
180
183
|
|
|
181
184
|
def optimize_structure(
|
|
182
185
|
self,
|
|
183
186
|
scope: str = "LongTermMemory",
|
|
184
187
|
local_tree_threshold: int = 10,
|
|
185
188
|
min_cluster_size: int = 3,
|
|
186
|
-
min_group_size: int =
|
|
189
|
+
min_group_size: int = 5,
|
|
187
190
|
):
|
|
188
191
|
"""
|
|
189
192
|
Periodically reorganize the graph:
|
|
@@ -195,7 +198,7 @@ class GraphStructureReorganizer:
|
|
|
195
198
|
logger.info(f"Already optimizing for {scope}. Skipping.")
|
|
196
199
|
return
|
|
197
200
|
|
|
198
|
-
if self.graph_store.
|
|
201
|
+
if self.graph_store.node_not_exist(scope):
|
|
199
202
|
logger.debug(f"[GraphStructureReorganize] No nodes for scope={scope}. Skip.")
|
|
200
203
|
return
|
|
201
204
|
|
|
@@ -248,7 +251,10 @@ class GraphStructureReorganizer:
|
|
|
248
251
|
try:
|
|
249
252
|
f.result()
|
|
250
253
|
except Exception as e:
|
|
251
|
-
logger.warning(
|
|
254
|
+
logger.warning(
|
|
255
|
+
f"[Reorganize] Cluster processing "
|
|
256
|
+
f"failed: {e}, trace: {traceback.format_exc()}"
|
|
257
|
+
)
|
|
252
258
|
logger.info("[GraphStructure Reorganize] Structure optimization finished.")
|
|
253
259
|
|
|
254
260
|
finally:
|
|
@@ -340,7 +346,7 @@ class GraphStructureReorganizer:
|
|
|
340
346
|
agg_node.metadata.model_dump(exclude_none=True),
|
|
341
347
|
)
|
|
342
348
|
for child_id in agg_node.metadata.sources:
|
|
343
|
-
self.graph_store.add_edge(agg_node.id, child_id, "
|
|
349
|
+
self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATE_TO")
|
|
344
350
|
|
|
345
351
|
logger.info("[Reorganizer] Cluster relation/reasoning done.")
|
|
346
352
|
|
|
@@ -358,7 +364,7 @@ class GraphStructureReorganizer:
|
|
|
358
364
|
scene_lines.append(line)
|
|
359
365
|
|
|
360
366
|
joined_scene = "\n".join(scene_lines)
|
|
361
|
-
prompt = LOCAL_SUBCLUSTER_PROMPT.
|
|
367
|
+
prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
|
|
362
368
|
|
|
363
369
|
messages = [{"role": "user", "content": prompt}]
|
|
364
370
|
response_text = self.llm.generate(messages)
|
|
@@ -378,9 +384,12 @@ class GraphStructureReorganizer:
|
|
|
378
384
|
|
|
379
385
|
return result_subclusters
|
|
380
386
|
|
|
381
|
-
|
|
382
|
-
|
|
383
|
-
|
|
387
|
+
@require_python_package(
|
|
388
|
+
import_name="sklearn",
|
|
389
|
+
install_command="pip install scikit-learn",
|
|
390
|
+
install_link="https://scikit-learn.org/stable/install.html",
|
|
391
|
+
)
|
|
392
|
+
def _partition(self, nodes, min_cluster_size: int = 3, max_cluster_size: int = 20):
|
|
384
393
|
"""
|
|
385
394
|
Partition nodes by:
|
|
386
395
|
1) Frequent tags (top N & above threshold)
|
|
@@ -394,7 +403,7 @@ class GraphStructureReorganizer:
|
|
|
394
403
|
Returns:
|
|
395
404
|
List of clusters, each as a list of GraphDBNode
|
|
396
405
|
"""
|
|
397
|
-
from
|
|
406
|
+
from sklearn.cluster import MiniBatchKMeans
|
|
398
407
|
|
|
399
408
|
# 1) Count all tags
|
|
400
409
|
tag_counter = Counter()
|
|
@@ -407,7 +416,7 @@ class GraphStructureReorganizer:
|
|
|
407
416
|
threshold_tags = {tag for tag, count in tag_counter.items() if count >= 50}
|
|
408
417
|
frequent_tags = top_n_tags | threshold_tags
|
|
409
418
|
|
|
410
|
-
# Group nodes by tags
|
|
419
|
+
# Group nodes by tags
|
|
411
420
|
tag_groups = defaultdict(list)
|
|
412
421
|
|
|
413
422
|
for node in nodes:
|
|
@@ -420,48 +429,67 @@ class GraphStructureReorganizer:
|
|
|
420
429
|
assigned_ids = set()
|
|
421
430
|
for tag, group in tag_groups.items():
|
|
422
431
|
if len(group) >= min_cluster_size:
|
|
423
|
-
|
|
424
|
-
|
|
432
|
+
# Split large groups into chunks of at most max_cluster_size
|
|
433
|
+
for i in range(0, len(group), max_cluster_size):
|
|
434
|
+
sub_group = group[i : i + max_cluster_size]
|
|
435
|
+
filtered_tag_clusters.append(sub_group)
|
|
436
|
+
assigned_ids.update(n.id for n in sub_group)
|
|
425
437
|
else:
|
|
426
|
-
logger.info(f"... dropped {tag} ...")
|
|
438
|
+
logger.info(f"... dropped tag {tag} due to low size ...")
|
|
427
439
|
|
|
428
440
|
logger.info(
|
|
429
441
|
f"[MixedPartition] Created {len(filtered_tag_clusters)} clusters from tags. "
|
|
430
442
|
f"Nodes grouped by tags: {len(assigned_ids)} / {len(nodes)}"
|
|
431
443
|
)
|
|
432
444
|
|
|
433
|
-
#
|
|
445
|
+
# Remaining nodes -> embedding clustering
|
|
434
446
|
remaining_nodes = [n for n in nodes if n.id not in assigned_ids]
|
|
435
447
|
logger.info(
|
|
436
448
|
f"[MixedPartition] Remaining nodes for embedding clustering: {len(remaining_nodes)}"
|
|
437
449
|
)
|
|
438
450
|
|
|
439
451
|
embedding_clusters = []
|
|
440
|
-
if remaining_nodes:
|
|
441
|
-
x = np.array([n.metadata.embedding for n in remaining_nodes if n.metadata.embedding])
|
|
442
|
-
k = max(1, min(len(remaining_nodes) // min_cluster_size, 20))
|
|
443
|
-
if len(x) < k:
|
|
444
|
-
k = len(x)
|
|
445
452
|
|
|
446
|
-
|
|
453
|
+
def recursive_clustering(nodes_list):
|
|
454
|
+
"""Recursively split clusters until each is <= max_cluster_size."""
|
|
455
|
+
if len(nodes_list) <= max_cluster_size:
|
|
456
|
+
return [nodes_list]
|
|
457
|
+
|
|
458
|
+
# Try kmeans with k = ceil(len(nodes) / max_cluster_size)
|
|
459
|
+
x = np.array([n.metadata.embedding for n in nodes_list if n.metadata.embedding])
|
|
460
|
+
if len(x) < 2:
|
|
461
|
+
return [nodes_list]
|
|
462
|
+
|
|
463
|
+
k = min(len(x), (len(nodes_list) + max_cluster_size - 1) // max_cluster_size)
|
|
464
|
+
k = max(1, min(k, len(x)))
|
|
465
|
+
|
|
466
|
+
try:
|
|
447
467
|
kmeans = MiniBatchKMeans(n_clusters=k, batch_size=256, random_state=42)
|
|
448
468
|
labels = kmeans.fit_predict(x)
|
|
449
469
|
|
|
450
470
|
label_groups = defaultdict(list)
|
|
451
|
-
for node, label in zip(
|
|
471
|
+
for node, label in zip(nodes_list, labels, strict=False):
|
|
452
472
|
label_groups[label].append(node)
|
|
453
473
|
|
|
454
|
-
|
|
455
|
-
|
|
456
|
-
|
|
457
|
-
|
|
458
|
-
|
|
459
|
-
|
|
474
|
+
result = []
|
|
475
|
+
for sub_group in label_groups.values():
|
|
476
|
+
result.extend(recursive_clustering(sub_group))
|
|
477
|
+
return result
|
|
478
|
+
except Exception as e:
|
|
479
|
+
logger.warning(f"Clustering failed: {e}, falling back to single cluster.")
|
|
480
|
+
return [nodes_list]
|
|
481
|
+
|
|
482
|
+
if remaining_nodes:
|
|
483
|
+
clusters = recursive_clustering(remaining_nodes)
|
|
484
|
+
embedding_clusters.extend(clusters)
|
|
485
|
+
logger.info(
|
|
486
|
+
f"[MixedPartition] Created {len(embedding_clusters)} clusters from embeddings."
|
|
487
|
+
)
|
|
460
488
|
|
|
461
|
-
# Merge all
|
|
489
|
+
# Merge all clusters
|
|
462
490
|
all_clusters = filtered_tag_clusters + embedding_clusters
|
|
463
491
|
|
|
464
|
-
#
|
|
492
|
+
# Handle small clusters (< min_cluster_size)
|
|
465
493
|
final_clusters = []
|
|
466
494
|
small_nodes = []
|
|
467
495
|
for group in all_clusters:
|
|
@@ -484,18 +512,15 @@ class GraphStructureReorganizer:
|
|
|
484
512
|
if not cluster_nodes:
|
|
485
513
|
raise ValueError("Cluster nodes cannot be empty.")
|
|
486
514
|
|
|
487
|
-
|
|
488
|
-
|
|
489
|
-
|
|
490
|
-
|
|
515
|
+
memories_items_text = "\n\n".join(
|
|
516
|
+
[
|
|
517
|
+
f"{i}. key: {n.metadata.key}\nvalue: {n.memory}\nsummary:{n.metadata.background}"
|
|
518
|
+
for i, n in enumerate(cluster_nodes)
|
|
519
|
+
]
|
|
491
520
|
)
|
|
492
521
|
|
|
493
522
|
# Build prompt
|
|
494
|
-
prompt = REORGANIZE_PROMPT.
|
|
495
|
-
joined_keys=joined_keys,
|
|
496
|
-
joined_values=joined_values,
|
|
497
|
-
joined_backgrounds=joined_backgrounds,
|
|
498
|
-
)
|
|
523
|
+
prompt = REORGANIZE_PROMPT.replace("{memory_items_text}", memories_items_text)
|
|
499
524
|
|
|
500
525
|
messages = [{"role": "user", "content": prompt}]
|
|
501
526
|
response_text = self.llm.generate(messages)
|
|
@@ -505,7 +530,7 @@ class GraphStructureReorganizer:
|
|
|
505
530
|
parent_key = response_json.get("key", "").strip()
|
|
506
531
|
parent_value = response_json.get("value", "").strip()
|
|
507
532
|
parent_tags = response_json.get("tags", [])
|
|
508
|
-
parent_background = response_json.get("
|
|
533
|
+
parent_background = response_json.get("summary", "").strip()
|
|
509
534
|
|
|
510
535
|
embedding = self.embedder.embed([parent_value])[0]
|
|
511
536
|
|
|
@@ -561,7 +586,7 @@ class GraphStructureReorganizer:
|
|
|
561
586
|
|
|
562
587
|
def _preprocess_message(self, message: QueueMessage) -> bool:
|
|
563
588
|
message = self._convert_id_to_node(message)
|
|
564
|
-
if None in message.after_node:
|
|
589
|
+
if message.after_node is None or None in message.after_node:
|
|
565
590
|
logger.debug(
|
|
566
591
|
f"Found non-existent node in after_node in message: {message}, skip this message."
|
|
567
592
|
)
|
|
@@ -127,7 +127,7 @@ class InternetGoogleRetriever:
|
|
|
127
127
|
self.embedder = embedder
|
|
128
128
|
|
|
129
129
|
def retrieve_from_internet(
|
|
130
|
-
self, query: str, top_k: int = 10, parsed_goal=None
|
|
130
|
+
self, query: str, top_k: int = 10, parsed_goal=None, info=None
|
|
131
131
|
) -> list[TextualMemoryItem]:
|
|
132
132
|
"""
|
|
133
133
|
Retrieve information from the internet and convert to TextualMemoryItem format
|
|
@@ -136,10 +136,13 @@ class InternetGoogleRetriever:
|
|
|
136
136
|
query: Search query
|
|
137
137
|
top_k: Number of results to return
|
|
138
138
|
parsed_goal: Parsed task goal (optional)
|
|
139
|
+
info (dict): Leave a record of memory consumption.
|
|
139
140
|
|
|
140
141
|
Returns:
|
|
141
142
|
List of TextualMemoryItem
|
|
142
143
|
"""
|
|
144
|
+
if not info:
|
|
145
|
+
info = {"user_id": "", "session_id": ""}
|
|
143
146
|
# Get search results
|
|
144
147
|
search_results = self.google_api.get_all_results(query, max_results=top_k)
|
|
145
148
|
|
|
@@ -157,8 +160,8 @@ class InternetGoogleRetriever:
|
|
|
157
160
|
memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
|
|
158
161
|
# Create metadata
|
|
159
162
|
metadata = TreeNodeTextualMemoryMetadata(
|
|
160
|
-
user_id=
|
|
161
|
-
session_id=
|
|
163
|
+
user_id=info.get("user_id", ""),
|
|
164
|
+
session_id=info.get("session_id", ""),
|
|
162
165
|
status="activated",
|
|
163
166
|
type="fact", # Internet search results are usually factual information
|
|
164
167
|
memory_time=datetime.now().strftime("%Y-%m-%d"),
|
|
@@ -4,6 +4,7 @@ from typing import Any, ClassVar
|
|
|
4
4
|
|
|
5
5
|
from memos.configs.internet_retriever import InternetRetrieverConfigFactory
|
|
6
6
|
from memos.embedders.base import BaseEmbedder
|
|
7
|
+
from memos.mem_reader.factory import MemReaderFactory
|
|
7
8
|
from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
|
|
8
9
|
InternetGoogleRetriever,
|
|
9
10
|
)
|
|
@@ -66,6 +67,7 @@ class InternetRetrieverFactory:
|
|
|
66
67
|
access_key=config.api_key, # Use api_key as access_key for xinyu
|
|
67
68
|
search_engine_id=config.search_engine_id,
|
|
68
69
|
embedder=embedder,
|
|
70
|
+
reader=MemReaderFactory.from_config(config.reader),
|
|
69
71
|
max_results=config.max_results,
|
|
70
72
|
)
|
|
71
73
|
else:
|
|
@@ -56,7 +56,6 @@ class GraphMemoryRetriever:
|
|
|
56
56
|
# Step 3: Merge and deduplicate results
|
|
57
57
|
combined = {item.id: item for item in graph_results + vector_results}
|
|
58
58
|
|
|
59
|
-
# Debug: 打印在 graph_results 中但不在 combined 中的 id
|
|
60
59
|
graph_ids = {item.id for item in graph_results}
|
|
61
60
|
combined_ids = set(combined.keys())
|
|
62
61
|
lost_ids = graph_ids - combined_ids
|
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
import numpy as np
|
|
2
2
|
|
|
3
3
|
from memos.embedders.factory import OllamaEmbedder
|
|
4
|
-
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
4
|
+
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
5
5
|
from memos.memories.textual.item import TextualMemoryItem
|
|
6
6
|
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
7
|
|
|
@@ -41,7 +41,7 @@ class MemoryReranker:
|
|
|
41
41
|
Rank retrieved memory cards by structural priority and contextual similarity.
|
|
42
42
|
"""
|
|
43
43
|
|
|
44
|
-
def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
|
|
44
|
+
def __init__(self, llm: OpenAILLM | OllamaLLM | AzureLLM, embedder: OllamaEmbedder):
|
|
45
45
|
self.llm = llm
|
|
46
46
|
self.embedder = embedder
|
|
47
47
|
|
|
@@ -10,4 +10,6 @@ class ParsedTaskGoal:
|
|
|
10
10
|
memories: list[str] = field(default_factory=list)
|
|
11
11
|
keys: list[str] = field(default_factory=list)
|
|
12
12
|
tags: list[str] = field(default_factory=list)
|
|
13
|
+
rephrased_query: str | None = None
|
|
14
|
+
internet_search: bool = False
|
|
13
15
|
goal_type: str | None = None # e.g., 'default', 'explanation', etc.
|