MemoryOS 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (114) hide show
  1. {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/METADATA +67 -26
  2. memoryos-0.2.2.dist-info/RECORD +169 -0
  3. memoryos-0.2.2.dist-info/entry_points.txt +3 -0
  4. memos/__init__.py +1 -1
  5. memos/api/config.py +562 -0
  6. memos/api/context/context.py +147 -0
  7. memos/api/context/dependencies.py +90 -0
  8. memos/api/exceptions.py +28 -0
  9. memos/api/mcp_serve.py +502 -0
  10. memos/api/product_api.py +35 -0
  11. memos/api/product_models.py +163 -0
  12. memos/api/routers/__init__.py +1 -0
  13. memos/api/routers/product_router.py +386 -0
  14. memos/chunkers/sentence_chunker.py +8 -2
  15. memos/cli.py +113 -0
  16. memos/configs/embedder.py +27 -0
  17. memos/configs/graph_db.py +132 -3
  18. memos/configs/internet_retriever.py +6 -0
  19. memos/configs/llm.py +47 -0
  20. memos/configs/mem_cube.py +1 -1
  21. memos/configs/mem_os.py +5 -0
  22. memos/configs/mem_reader.py +9 -0
  23. memos/configs/mem_scheduler.py +107 -7
  24. memos/configs/mem_user.py +58 -0
  25. memos/configs/memory.py +5 -4
  26. memos/dependency.py +52 -0
  27. memos/embedders/ark.py +92 -0
  28. memos/embedders/factory.py +4 -0
  29. memos/embedders/sentence_transformer.py +8 -2
  30. memos/embedders/universal_api.py +32 -0
  31. memos/graph_dbs/base.py +11 -3
  32. memos/graph_dbs/factory.py +4 -0
  33. memos/graph_dbs/nebular.py +1364 -0
  34. memos/graph_dbs/neo4j.py +333 -124
  35. memos/graph_dbs/neo4j_community.py +300 -0
  36. memos/llms/base.py +9 -0
  37. memos/llms/deepseek.py +54 -0
  38. memos/llms/factory.py +10 -1
  39. memos/llms/hf.py +170 -13
  40. memos/llms/hf_singleton.py +114 -0
  41. memos/llms/ollama.py +4 -0
  42. memos/llms/openai.py +67 -1
  43. memos/llms/qwen.py +63 -0
  44. memos/llms/vllm.py +153 -0
  45. memos/log.py +1 -1
  46. memos/mem_cube/general.py +77 -16
  47. memos/mem_cube/utils.py +109 -0
  48. memos/mem_os/core.py +251 -51
  49. memos/mem_os/main.py +94 -12
  50. memos/mem_os/product.py +1220 -43
  51. memos/mem_os/utils/default_config.py +352 -0
  52. memos/mem_os/utils/format_utils.py +1401 -0
  53. memos/mem_reader/simple_struct.py +18 -10
  54. memos/mem_scheduler/base_scheduler.py +441 -40
  55. memos/mem_scheduler/general_scheduler.py +249 -248
  56. memos/mem_scheduler/modules/base.py +14 -5
  57. memos/mem_scheduler/modules/dispatcher.py +67 -4
  58. memos/mem_scheduler/modules/misc.py +104 -0
  59. memos/mem_scheduler/modules/monitor.py +240 -50
  60. memos/mem_scheduler/modules/rabbitmq_service.py +319 -0
  61. memos/mem_scheduler/modules/redis_service.py +32 -22
  62. memos/mem_scheduler/modules/retriever.py +167 -23
  63. memos/mem_scheduler/modules/scheduler_logger.py +255 -0
  64. memos/mem_scheduler/mos_for_test_scheduler.py +140 -0
  65. memos/mem_scheduler/schemas/__init__.py +0 -0
  66. memos/mem_scheduler/schemas/general_schemas.py +43 -0
  67. memos/mem_scheduler/{modules/schemas.py → schemas/message_schemas.py} +63 -61
  68. memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
  69. memos/mem_scheduler/utils/__init__.py +0 -0
  70. memos/mem_scheduler/utils/filter_utils.py +176 -0
  71. memos/mem_scheduler/utils/misc_utils.py +61 -0
  72. memos/mem_user/factory.py +94 -0
  73. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  74. memos/mem_user/mysql_user_manager.py +500 -0
  75. memos/mem_user/persistent_factory.py +96 -0
  76. memos/mem_user/persistent_user_manager.py +260 -0
  77. memos/mem_user/user_manager.py +4 -4
  78. memos/memories/activation/item.py +29 -0
  79. memos/memories/activation/kv.py +10 -3
  80. memos/memories/activation/vllmkv.py +219 -0
  81. memos/memories/factory.py +2 -0
  82. memos/memories/textual/base.py +1 -1
  83. memos/memories/textual/general.py +43 -97
  84. memos/memories/textual/item.py +5 -33
  85. memos/memories/textual/tree.py +22 -12
  86. memos/memories/textual/tree_text_memory/organize/conflict.py +9 -5
  87. memos/memories/textual/tree_text_memory/organize/manager.py +26 -18
  88. memos/memories/textual/tree_text_memory/organize/redundancy.py +25 -44
  89. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +50 -48
  90. memos/memories/textual/tree_text_memory/organize/reorganizer.py +81 -56
  91. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  92. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  93. memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
  94. memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
  95. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  96. memos/memories/textual/tree_text_memory/retrieve/searcher.py +52 -28
  97. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
  98. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  99. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  100. memos/memos_tools/dinding_report_bot.py +422 -0
  101. memos/memos_tools/notification_service.py +44 -0
  102. memos/memos_tools/notification_utils.py +96 -0
  103. memos/parsers/markitdown.py +8 -2
  104. memos/settings.py +3 -1
  105. memos/templates/mem_reader_prompts.py +66 -23
  106. memos/templates/mem_scheduler_prompts.py +126 -43
  107. memos/templates/mos_prompts.py +87 -0
  108. memos/templates/tree_reorganize_prompts.py +85 -30
  109. memos/vec_dbs/base.py +12 -0
  110. memos/vec_dbs/qdrant.py +46 -20
  111. memoryos-0.2.0.dist-info/RECORD +0 -128
  112. memos/mem_scheduler/utils.py +0 -26
  113. {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
  114. {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import traceback
2
3
 
3
4
  from memos.embedders.factory import OllamaEmbedder
4
5
  from memos.graph_dbs.item import GraphDBNode
@@ -30,47 +31,57 @@ class RelationAndReasoningDetector:
30
31
  3) Sequence links
31
32
  4) Aggregate concepts
32
33
  """
33
- if node.metadata.type == "reasoning":
34
- logger.info(f"Skip reasoning for inferred node {node.id}")
35
- return {
36
- "relations": [],
37
- "inferred_nodes": [],
38
- "sequence_links": [],
39
- "aggregate_nodes": [],
40
- }
41
-
42
34
  results = {
43
35
  "relations": [],
44
36
  "inferred_nodes": [],
45
37
  "sequence_links": [],
46
38
  "aggregate_nodes": [],
47
39
  }
40
+ try:
41
+ if node.metadata.type == "reasoning":
42
+ logger.info(f"Skip reasoning for inferred node {node.id}")
43
+ return {
44
+ "relations": [],
45
+ "inferred_nodes": [],
46
+ "sequence_links": [],
47
+ "aggregate_nodes": [],
48
+ }
49
+
50
+ nearest = self.graph_store.get_neighbors_by_tag(
51
+ tags=node.metadata.tags,
52
+ exclude_ids=exclude_ids,
53
+ top_k=top_k,
54
+ min_overlap=2,
55
+ )
56
+ nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
57
+
58
+ """
59
+ # 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
60
+ pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
61
+ results["relations"].extend(pairwise["relations"])
62
+ """
63
+
64
+ """
65
+ # 2) Inferred nodes (from causal/condition)
66
+ inferred = self._infer_fact_nodes_from_relations(pairwise)
67
+ results["inferred_nodes"].extend(inferred)
68
+ """
69
+
70
+ """
71
+ 3) Sequence (optional, if you have timestamps)
72
+ seq = self._detect_sequence_links(node, nearest)
73
+ results["sequence_links"].extend(seq)
74
+ """
75
+
76
+ # 4) Aggregate
77
+ agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=5)
78
+ if agg:
79
+ results["aggregate_nodes"].append(agg)
48
80
 
49
- nearest = self.graph_store.get_neighbors_by_tag(
50
- tags=node.metadata.tags,
51
- exclude_ids=exclude_ids,
52
- top_k=top_k,
53
- min_overlap=2,
54
- )
55
- nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
56
-
57
- # 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
58
- pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
59
- results["relations"].extend(pairwise["relations"])
60
-
61
- # 2) Inferred nodes (from causal/condition)
62
- inferred = self._infer_fact_nodes_from_relations(pairwise)
63
- results["inferred_nodes"].extend(inferred)
64
-
65
- # 3) Sequence (optional, if you have timestamps)
66
- seq = self._detect_sequence_links(node, nearest)
67
- results["sequence_links"].extend(seq)
68
-
69
- # 4) Aggregate
70
- agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=3)
71
- if agg:
72
- results["aggregate_nodes"].append(agg)
73
-
81
+ except Exception as e:
82
+ logger.error(
83
+ f"Error {e} while process struct reorganize: trace: {traceback.format_exc()}"
84
+ )
74
85
  return results
75
86
 
76
87
  def _detect_pairwise_causal_condition_relations(
@@ -80,7 +91,7 @@ class RelationAndReasoningDetector:
80
91
  Vector/tag search ➜ For each candidate, use LLM to decide:
81
92
  - CAUSE
82
93
  - CONDITION
83
- - RELATE_TO
94
+ - RELATE
84
95
  - CONFLICT
85
96
  """
86
97
  results = {"relations": []}
@@ -168,12 +179,11 @@ class RelationAndReasoningDetector:
168
179
  combined_nodes = [node, *nearest_nodes]
169
180
 
170
181
  joined = "\n".join(f"- {n.memory}" for n in combined_nodes)
171
- prompt = AGGREGATE_PROMPT.format(joined=joined)
182
+ prompt = AGGREGATE_PROMPT.replace("{joined}", joined)
172
183
  response_text = self._call_llm(prompt)
173
- response_json = self._parse_json_result(response_text)
174
- if not response_json:
184
+ summary = self._parse_json_result(response_text)
185
+ if not summary:
175
186
  return None
176
- summary = json.loads(response_text)
177
187
  embedding = self.embedder.embed([summary["value"]])[0]
178
188
 
179
189
  parent_node = GraphDBNode(
@@ -205,14 +215,6 @@ class RelationAndReasoningDetector:
205
215
  logger.warning(f"[LLM Error] {e}")
206
216
  return ""
207
217
 
208
- def _parse_relation_result(self, response_text: str) -> str:
209
- relation = response_text.strip().upper()
210
- valid = {"CAUSE", "CONDITION", "RELATE_TO", "CONFLICT", "NONE"}
211
- if relation not in valid:
212
- logger.warning(f"[RelationDetector] Unexpected relation: {relation}. Fallback NONE.")
213
- return "NONE"
214
- return relation
215
-
216
218
  def _parse_json_result(self, response_text):
217
219
  try:
218
220
  response_text = response_text.replace("```", "").replace("json", "")
@@ -226,7 +228,7 @@ class RelationAndReasoningDetector:
226
228
  Normalize and validate the LLM relation type output.
227
229
  """
228
230
  relation = response_text.strip().upper()
229
- valid = {"CAUSE", "CONDITION", "RELATE_TO", "CONFLICT", "NONE"}
231
+ valid = {"CAUSE", "CONDITION", "RELATE", "CONFLICT", "NONE"}
230
232
  if relation not in valid:
231
233
  logger.warning(
232
234
  f"[RelationDetector] Unexpected relation type: {relation}. Fallback to NONE."
@@ -3,15 +3,14 @@ import threading
3
3
  import time
4
4
  import traceback
5
5
 
6
+ from collections import Counter, defaultdict
6
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
7
8
  from queue import PriorityQueue
8
9
  from typing import Literal
9
10
 
10
11
  import numpy as np
11
- import schedule
12
-
13
- from sklearn.cluster import MiniBatchKMeans
14
12
 
13
+ from memos.dependency import require_python_package
15
14
  from memos.embedders.factory import OllamaEmbedder
16
15
  from memos.graph_dbs.item import GraphDBEdge, GraphDBNode
17
16
  from memos.graph_dbs.neo4j import Neo4jGraphDB
@@ -32,7 +31,7 @@ logger = get_logger(__name__)
32
31
  class QueueMessage:
33
32
  def __init__(
34
33
  self,
35
- op: Literal["add", "remove", "merge", "update"],
34
+ op: Literal["add", "remove", "merge", "update", "end"],
36
35
  # `str` for node and edge IDs, `GraphDBNode` and `GraphDBEdge` for actual objects
37
36
  before_node: list[str] | list[GraphDBNode] | None = None,
38
37
  before_edge: list[str] | list[GraphDBEdge] | None = None,
@@ -49,7 +48,7 @@ class QueueMessage:
49
48
  return f"QueueMessage(op={self.op}, before_node={self.before_node if self.before_node is None else len(self.before_node)}, after_node={self.after_node if self.after_node is None else len(self.after_node)})"
50
49
 
51
50
  def __lt__(self, other: "QueueMessage") -> bool:
52
- op_priority = {"add": 2, "remove": 2, "merge": 1}
51
+ op_priority = {"add": 2, "remove": 2, "merge": 1, "end": 0}
53
52
  return op_priority[self.op] < op_priority[other.op]
54
53
 
55
54
 
@@ -104,7 +103,7 @@ class GraphStructureReorganizer:
104
103
  def _run_message_consumer_loop(self):
105
104
  while True:
106
105
  message = self.queue.get()
107
- if message is None:
106
+ if message.op == "end":
108
107
  break
109
108
 
110
109
  try:
@@ -114,13 +113,20 @@ class GraphStructureReorganizer:
114
113
  logger.error(traceback.format_exc())
115
114
  self.queue.task_done()
116
115
 
116
+ @require_python_package(
117
+ import_name="schedule",
118
+ install_command="pip install schedule",
119
+ install_link="https://schedule.readthedocs.io/en/stable/installation.html",
120
+ )
117
121
  def _run_structure_organizer_loop(self):
118
122
  """
119
123
  Use schedule library to periodically trigger structure optimization.
120
124
  This runs until the stop flag is set.
121
125
  """
122
- schedule.every(20).seconds.do(self.optimize_structure, scope="LongTermMemory")
123
- schedule.every(20).seconds.do(self.optimize_structure, scope="UserMemory")
126
+ import schedule
127
+
128
+ schedule.every(600).seconds.do(self.optimize_structure, scope="LongTermMemory")
129
+ schedule.every(600).seconds.do(self.optimize_structure, scope="UserMemory")
124
130
 
125
131
  logger.info("Structure optimizer schedule started.")
126
132
  while not getattr(self, "_stop_scheduler", False):
@@ -134,7 +140,7 @@ class GraphStructureReorganizer:
134
140
  if not self.is_reorganize:
135
141
  return
136
142
 
137
- self.add_message(None)
143
+ self.add_message(QueueMessage(op="end"))
138
144
  self.thread.join()
139
145
  logger.info("Reorganize thread stopped.")
140
146
  self._stop_scheduler = True
@@ -152,9 +158,6 @@ class GraphStructureReorganizer:
152
158
 
153
159
  def handle_add(self, message: QueueMessage):
154
160
  logger.debug(f"Handling add operation: {str(message)[:500]}")
155
- assert message.before_node is None and message.before_edge is None, (
156
- "Before node and edge should be None for `add` operation."
157
- )
158
161
  # ———————— 1. check for conflicts ————————
159
162
  added_node = message.after_node[0]
160
163
  conflicts = self.conflict.detect(added_node, scope=added_node.metadata.memory_type)
@@ -164,9 +167,9 @@ class GraphStructureReorganizer:
164
167
  logger.info(f"Resolved conflict between {added_node.id} and {existing_node.id}.")
165
168
 
166
169
  # ———————— 2. check for redundancy ————————
167
- redundancy = self.redundancy.detect(added_node, scope=added_node.metadata.memory_type)
168
- if redundancy:
169
- for added_node, existing_node in redundancy:
170
+ redundancies = self.redundancy.detect(added_node, scope=added_node.metadata.memory_type)
171
+ if redundancies:
172
+ for added_node, existing_node in redundancies:
170
173
  self.redundancy.resolve_two_nodes(added_node, existing_node)
171
174
  logger.info(f"Resolved redundancy between {added_node.id} and {existing_node.id}.")
172
175
 
@@ -176,14 +179,14 @@ class GraphStructureReorganizer:
176
179
  def handle_merge(self, message: QueueMessage):
177
180
  after_node = message.after_node[0]
178
181
  logger.debug(f"Handling merge operation: <{after_node.memory}>")
179
- self.redundancy_resolver.resolve_one_node(after_node)
182
+ self.redundancy.resolve_one_node(after_node)
180
183
 
181
184
  def optimize_structure(
182
185
  self,
183
186
  scope: str = "LongTermMemory",
184
187
  local_tree_threshold: int = 10,
185
188
  min_cluster_size: int = 3,
186
- min_group_size: int = 10,
189
+ min_group_size: int = 5,
187
190
  ):
188
191
  """
189
192
  Periodically reorganize the graph:
@@ -195,7 +198,7 @@ class GraphStructureReorganizer:
195
198
  logger.info(f"Already optimizing for {scope}. Skipping.")
196
199
  return
197
200
 
198
- if self.graph_store.count_nodes(scope) == 0:
201
+ if self.graph_store.node_not_exist(scope):
199
202
  logger.debug(f"[GraphStructureReorganize] No nodes for scope={scope}. Skip.")
200
203
  return
201
204
 
@@ -248,7 +251,10 @@ class GraphStructureReorganizer:
248
251
  try:
249
252
  f.result()
250
253
  except Exception as e:
251
- logger.warning(f"[Reorganize] Cluster processing failed: {e}")
254
+ logger.warning(
255
+ f"[Reorganize] Cluster processing "
256
+ f"failed: {e}, trace: {traceback.format_exc()}"
257
+ )
252
258
  logger.info("[GraphStructure Reorganize] Structure optimization finished.")
253
259
 
254
260
  finally:
@@ -340,7 +346,7 @@ class GraphStructureReorganizer:
340
346
  agg_node.metadata.model_dump(exclude_none=True),
341
347
  )
342
348
  for child_id in agg_node.metadata.sources:
343
- self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATES")
349
+ self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATE_TO")
344
350
 
345
351
  logger.info("[Reorganizer] Cluster relation/reasoning done.")
346
352
 
@@ -358,7 +364,7 @@ class GraphStructureReorganizer:
358
364
  scene_lines.append(line)
359
365
 
360
366
  joined_scene = "\n".join(scene_lines)
361
- prompt = LOCAL_SUBCLUSTER_PROMPT.format(joined_scene=joined_scene)
367
+ prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
362
368
 
363
369
  messages = [{"role": "user", "content": prompt}]
364
370
  response_text = self.llm.generate(messages)
@@ -378,9 +384,12 @@ class GraphStructureReorganizer:
378
384
 
379
385
  return result_subclusters
380
386
 
381
- def _partition(
382
- self, nodes: list[GraphDBNode], min_cluster_size: int = 3
383
- ) -> list[list[GraphDBNode]]:
387
+ @require_python_package(
388
+ import_name="sklearn",
389
+ install_command="pip install scikit-learn",
390
+ install_link="https://scikit-learn.org/stable/install.html",
391
+ )
392
+ def _partition(self, nodes, min_cluster_size: int = 3, max_cluster_size: int = 20):
384
393
  """
385
394
  Partition nodes by:
386
395
  1) Frequent tags (top N & above threshold)
@@ -394,7 +403,7 @@ class GraphStructureReorganizer:
394
403
  Returns:
395
404
  List of clusters, each as a list of GraphDBNode
396
405
  """
397
- from collections import Counter, defaultdict
406
+ from sklearn.cluster import MiniBatchKMeans
398
407
 
399
408
  # 1) Count all tags
400
409
  tag_counter = Counter()
@@ -407,7 +416,7 @@ class GraphStructureReorganizer:
407
416
  threshold_tags = {tag for tag, count in tag_counter.items() if count >= 50}
408
417
  frequent_tags = top_n_tags | threshold_tags
409
418
 
410
- # Group nodes by tags, ensure each group is unique internally
419
+ # Group nodes by tags
411
420
  tag_groups = defaultdict(list)
412
421
 
413
422
  for node in nodes:
@@ -420,48 +429,67 @@ class GraphStructureReorganizer:
420
429
  assigned_ids = set()
421
430
  for tag, group in tag_groups.items():
422
431
  if len(group) >= min_cluster_size:
423
- filtered_tag_clusters.append(group)
424
- assigned_ids.update(n.id for n in group)
432
+ # Split large groups into chunks of at most max_cluster_size
433
+ for i in range(0, len(group), max_cluster_size):
434
+ sub_group = group[i : i + max_cluster_size]
435
+ filtered_tag_clusters.append(sub_group)
436
+ assigned_ids.update(n.id for n in sub_group)
425
437
  else:
426
- logger.info(f"... dropped {tag} ...")
438
+ logger.info(f"... dropped tag {tag} due to low size ...")
427
439
 
428
440
  logger.info(
429
441
  f"[MixedPartition] Created {len(filtered_tag_clusters)} clusters from tags. "
430
442
  f"Nodes grouped by tags: {len(assigned_ids)} / {len(nodes)}"
431
443
  )
432
444
 
433
- # 5) Remaining nodes -> embedding clustering
445
+ # Remaining nodes -> embedding clustering
434
446
  remaining_nodes = [n for n in nodes if n.id not in assigned_ids]
435
447
  logger.info(
436
448
  f"[MixedPartition] Remaining nodes for embedding clustering: {len(remaining_nodes)}"
437
449
  )
438
450
 
439
451
  embedding_clusters = []
440
- if remaining_nodes:
441
- x = np.array([n.metadata.embedding for n in remaining_nodes if n.metadata.embedding])
442
- k = max(1, min(len(remaining_nodes) // min_cluster_size, 20))
443
- if len(x) < k:
444
- k = len(x)
445
452
 
446
- if 1 < k <= len(x):
453
+ def recursive_clustering(nodes_list):
454
+ """Recursively split clusters until each is <= max_cluster_size."""
455
+ if len(nodes_list) <= max_cluster_size:
456
+ return [nodes_list]
457
+
458
+ # Try kmeans with k = ceil(len(nodes) / max_cluster_size)
459
+ x = np.array([n.metadata.embedding for n in nodes_list if n.metadata.embedding])
460
+ if len(x) < 2:
461
+ return [nodes_list]
462
+
463
+ k = min(len(x), (len(nodes_list) + max_cluster_size - 1) // max_cluster_size)
464
+ k = max(1, min(k, len(x)))
465
+
466
+ try:
447
467
  kmeans = MiniBatchKMeans(n_clusters=k, batch_size=256, random_state=42)
448
468
  labels = kmeans.fit_predict(x)
449
469
 
450
470
  label_groups = defaultdict(list)
451
- for node, label in zip(remaining_nodes, labels, strict=False):
471
+ for node, label in zip(nodes_list, labels, strict=False):
452
472
  label_groups[label].append(node)
453
473
 
454
- embedding_clusters = list(label_groups.values())
455
- logger.info(
456
- f"[MixedPartition] Created {len(embedding_clusters)} clusters from embedding."
457
- )
458
- else:
459
- embedding_clusters = [remaining_nodes]
474
+ result = []
475
+ for sub_group in label_groups.values():
476
+ result.extend(recursive_clustering(sub_group))
477
+ return result
478
+ except Exception as e:
479
+ logger.warning(f"Clustering failed: {e}, falling back to single cluster.")
480
+ return [nodes_list]
481
+
482
+ if remaining_nodes:
483
+ clusters = recursive_clustering(remaining_nodes)
484
+ embedding_clusters.extend(clusters)
485
+ logger.info(
486
+ f"[MixedPartition] Created {len(embedding_clusters)} clusters from embeddings."
487
+ )
460
488
 
461
- # Merge all & handle small clusters
489
+ # Merge all clusters
462
490
  all_clusters = filtered_tag_clusters + embedding_clusters
463
491
 
464
- # Optional: merge tiny clusters
492
+ # Handle small clusters (< min_cluster_size)
465
493
  final_clusters = []
466
494
  small_nodes = []
467
495
  for group in all_clusters:
@@ -484,18 +512,15 @@ class GraphStructureReorganizer:
484
512
  if not cluster_nodes:
485
513
  raise ValueError("Cluster nodes cannot be empty.")
486
514
 
487
- joined_keys = "\n".join(f"- {n.metadata.key}" for n in cluster_nodes if n.metadata.key)
488
- joined_values = "\n".join(f"- {n.memory}" for n in cluster_nodes)
489
- joined_backgrounds = "\n".join(
490
- f"- {n.metadata.background}" for n in cluster_nodes if n.metadata.background
515
+ memories_items_text = "\n\n".join(
516
+ [
517
+ f"{i}. key: {n.metadata.key}\nvalue: {n.memory}\nsummary:{n.metadata.background}"
518
+ for i, n in enumerate(cluster_nodes)
519
+ ]
491
520
  )
492
521
 
493
522
  # Build prompt
494
- prompt = REORGANIZE_PROMPT.format(
495
- joined_keys=joined_keys,
496
- joined_values=joined_values,
497
- joined_backgrounds=joined_backgrounds,
498
- )
523
+ prompt = REORGANIZE_PROMPT.replace("{memory_items_text}", memories_items_text)
499
524
 
500
525
  messages = [{"role": "user", "content": prompt}]
501
526
  response_text = self.llm.generate(messages)
@@ -505,7 +530,7 @@ class GraphStructureReorganizer:
505
530
  parent_key = response_json.get("key", "").strip()
506
531
  parent_value = response_json.get("value", "").strip()
507
532
  parent_tags = response_json.get("tags", [])
508
- parent_background = response_json.get("background", "").strip()
533
+ parent_background = response_json.get("summary", "").strip()
509
534
 
510
535
  embedding = self.embedder.embed([parent_value])[0]
511
536
 
@@ -561,7 +586,7 @@ class GraphStructureReorganizer:
561
586
 
562
587
  def _preprocess_message(self, message: QueueMessage) -> bool:
563
588
  message = self._convert_id_to_node(message)
564
- if None in message.after_node:
589
+ if message.after_node is None or None in message.after_node:
565
590
  logger.debug(
566
591
  f"Found non-existent node in after_node in message: {message}, skip this message."
567
592
  )
@@ -127,7 +127,7 @@ class InternetGoogleRetriever:
127
127
  self.embedder = embedder
128
128
 
129
129
  def retrieve_from_internet(
130
- self, query: str, top_k: int = 10, parsed_goal=None
130
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
131
131
  ) -> list[TextualMemoryItem]:
132
132
  """
133
133
  Retrieve information from the internet and convert to TextualMemoryItem format
@@ -136,10 +136,13 @@ class InternetGoogleRetriever:
136
136
  query: Search query
137
137
  top_k: Number of results to return
138
138
  parsed_goal: Parsed task goal (optional)
139
+ info (dict): Leave a record of memory consumption.
139
140
 
140
141
  Returns:
141
142
  List of TextualMemoryItem
142
143
  """
144
+ if not info:
145
+ info = {"user_id": "", "session_id": ""}
143
146
  # Get search results
144
147
  search_results = self.google_api.get_all_results(query, max_results=top_k)
145
148
 
@@ -157,8 +160,8 @@ class InternetGoogleRetriever:
157
160
  memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
158
161
  # Create metadata
159
162
  metadata = TreeNodeTextualMemoryMetadata(
160
- user_id=None,
161
- session_id=None,
163
+ user_id=info.get("user_id", ""),
164
+ session_id=info.get("session_id", ""),
162
165
  status="activated",
163
166
  type="fact", # Internet search results are usually factual information
164
167
  memory_time=datetime.now().strftime("%Y-%m-%d"),
@@ -4,6 +4,7 @@ from typing import Any, ClassVar
4
4
 
5
5
  from memos.configs.internet_retriever import InternetRetrieverConfigFactory
6
6
  from memos.embedders.base import BaseEmbedder
7
+ from memos.mem_reader.factory import MemReaderFactory
7
8
  from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
8
9
  InternetGoogleRetriever,
9
10
  )
@@ -66,6 +67,7 @@ class InternetRetrieverFactory:
66
67
  access_key=config.api_key, # Use api_key as access_key for xinyu
67
68
  search_engine_id=config.search_engine_id,
68
69
  embedder=embedder,
70
+ reader=MemReaderFactory.from_config(config.reader),
69
71
  max_results=config.max_results,
70
72
  )
71
73
  else:
@@ -56,7 +56,6 @@ class GraphMemoryRetriever:
56
56
  # Step 3: Merge and deduplicate results
57
57
  combined = {item.id: item for item in graph_results + vector_results}
58
58
 
59
- # Debug: 打印在 graph_results 中但不在 combined 中的 id
60
59
  graph_ids = {item.id for item in graph_results}
61
60
  combined_ids = set(combined.keys())
62
61
  lost_ids = graph_ids - combined_ids
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
 
3
3
  from memos.embedders.factory import OllamaEmbedder
4
- from memos.llms.factory import OllamaLLM, OpenAILLM
4
+ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
5
5
  from memos.memories.textual.item import TextualMemoryItem
6
6
  from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
7
 
@@ -41,7 +41,7 @@ class MemoryReranker:
41
41
  Rank retrieved memory cards by structural priority and contextual similarity.
42
42
  """
43
43
 
44
- def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
44
+ def __init__(self, llm: OpenAILLM | OllamaLLM | AzureLLM, embedder: OllamaEmbedder):
45
45
  self.llm = llm
46
46
  self.embedder = embedder
47
47
 
@@ -10,4 +10,6 @@ class ParsedTaskGoal:
10
10
  memories: list[str] = field(default_factory=list)
11
11
  keys: list[str] = field(default_factory=list)
12
12
  tags: list[str] = field(default_factory=list)
13
+ rephrased_query: str | None = None
14
+ internet_search: bool = False
13
15
  goal_type: str | None = None # e.g., 'default', 'explanation', etc.