MemoryOS 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (92) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/METADATA +7 -1
  2. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/RECORD +87 -64
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +158 -69
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +101 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +19 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +54 -18
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +38 -3
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1612 -0
  18. memos/graph_dbs/neo4j.py +18 -9
  19. memos/log.py +6 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +157 -37
  22. memos/mem_os/main.py +2 -2
  23. memos/mem_os/product.py +252 -201
  24. memos/mem_os/utils/default_config.py +1 -1
  25. memos/mem_os/utils/format_utils.py +281 -70
  26. memos/mem_os/utils/reference_utils.py +133 -0
  27. memos/mem_reader/simple_struct.py +13 -5
  28. memos/mem_scheduler/base_scheduler.py +239 -266
  29. memos/mem_scheduler/{modules → general_modules}/base.py +4 -5
  30. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +57 -21
  31. memos/mem_scheduler/general_modules/misc.py +104 -0
  32. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +12 -10
  33. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  34. memos/mem_scheduler/general_modules/retriever.py +199 -0
  35. memos/mem_scheduler/general_modules/scheduler_logger.py +261 -0
  36. memos/mem_scheduler/general_scheduler.py +243 -80
  37. memos/mem_scheduler/monitors/__init__.py +0 -0
  38. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  39. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +106 -57
  40. memos/mem_scheduler/mos_for_test_scheduler.py +23 -20
  41. memos/mem_scheduler/schemas/__init__.py +0 -0
  42. memos/mem_scheduler/schemas/general_schemas.py +44 -0
  43. memos/mem_scheduler/schemas/message_schemas.py +149 -0
  44. memos/mem_scheduler/schemas/monitor_schemas.py +337 -0
  45. memos/mem_scheduler/utils/__init__.py +0 -0
  46. memos/mem_scheduler/utils/filter_utils.py +176 -0
  47. memos/mem_scheduler/utils/misc_utils.py +102 -0
  48. memos/mem_user/factory.py +94 -0
  49. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  50. memos/mem_user/mysql_user_manager.py +500 -0
  51. memos/mem_user/persistent_factory.py +96 -0
  52. memos/mem_user/user_manager.py +4 -4
  53. memos/memories/activation/item.py +5 -1
  54. memos/memories/activation/kv.py +20 -8
  55. memos/memories/textual/base.py +2 -2
  56. memos/memories/textual/general.py +36 -92
  57. memos/memories/textual/item.py +5 -33
  58. memos/memories/textual/tree.py +13 -7
  59. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +34 -50
  60. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  61. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +49 -43
  62. memos/memories/textual/tree_text_memory/organize/reorganizer.py +107 -142
  63. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  64. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  65. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +11 -0
  66. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  67. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  68. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  69. memos/memories/textual/tree_text_memory/retrieve/searcher.py +191 -116
  70. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +47 -15
  71. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  72. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  73. memos/memos_tools/dinding_report_bot.py +422 -0
  74. memos/memos_tools/lockfree_dict.py +120 -0
  75. memos/memos_tools/notification_service.py +44 -0
  76. memos/memos_tools/notification_utils.py +96 -0
  77. memos/memos_tools/thread_safe_dict.py +288 -0
  78. memos/settings.py +3 -1
  79. memos/templates/mem_reader_prompts.py +4 -1
  80. memos/templates/mem_scheduler_prompts.py +62 -15
  81. memos/templates/mos_prompts.py +116 -0
  82. memos/templates/tree_reorganize_prompts.py +24 -17
  83. memos/utils.py +19 -0
  84. memos/mem_scheduler/modules/misc.py +0 -39
  85. memos/mem_scheduler/modules/retriever.py +0 -268
  86. memos/mem_scheduler/modules/schemas.py +0 -328
  87. memos/mem_scheduler/utils.py +0 -75
  88. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  89. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  90. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  91. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  92. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
@@ -3,7 +3,7 @@ import threading
3
3
  import time
4
4
  import traceback
5
5
 
6
- from collections import Counter, defaultdict
6
+ from collections import defaultdict
7
7
  from concurrent.futures import ThreadPoolExecutor, as_completed
8
8
  from queue import PriorityQueue
9
9
  from typing import Literal
@@ -17,8 +17,7 @@ from memos.graph_dbs.neo4j import Neo4jGraphDB
17
17
  from memos.llms.base import BaseLLM
18
18
  from memos.log import get_logger
19
19
  from memos.memories.textual.item import TreeNodeTextualMemoryMetadata
20
- from memos.memories.textual.tree_text_memory.organize.conflict import ConflictHandler
21
- from memos.memories.textual.tree_text_memory.organize.redundancy import RedundancyHandler
20
+ from memos.memories.textual.tree_text_memory.organize.handler import NodeHandler
22
21
  from memos.memories.textual.tree_text_memory.organize.relation_reason_detector import (
23
22
  RelationAndReasoningDetector,
24
23
  )
@@ -63,10 +62,10 @@ class GraphStructureReorganizer:
63
62
  self.relation_detector = RelationAndReasoningDetector(
64
63
  self.graph_store, self.llm, self.embedder
65
64
  )
66
- self.conflict = ConflictHandler(graph_store=graph_store, llm=llm, embedder=embedder)
67
- self.redundancy = RedundancyHandler(graph_store=graph_store, llm=llm, embedder=embedder)
65
+ self.resolver = NodeHandler(graph_store=graph_store, llm=llm, embedder=embedder)
68
66
 
69
67
  self.is_reorganize = is_reorganize
68
+ self._reorganize_needed = True
70
69
  if self.is_reorganize:
71
70
  # ____ 1. For queue message driven thread ___________
72
71
  self.thread = threading.Thread(target=self._run_message_consumer_loop)
@@ -125,13 +124,17 @@ class GraphStructureReorganizer:
125
124
  """
126
125
  import schedule
127
126
 
128
- schedule.every(20).seconds.do(self.optimize_structure, scope="LongTermMemory")
129
- schedule.every(20).seconds.do(self.optimize_structure, scope="UserMemory")
127
+ schedule.every(100).seconds.do(self.optimize_structure, scope="LongTermMemory")
128
+ schedule.every(100).seconds.do(self.optimize_structure, scope="UserMemory")
130
129
 
131
130
  logger.info("Structure optimizer schedule started.")
132
131
  while not getattr(self, "_stop_scheduler", False):
133
- schedule.run_pending()
134
- time.sleep(1)
132
+ if self._reorganize_needed:
133
+ logger.info("[Reorganizer] Triggering optimize_structure due to new nodes.")
134
+ self.optimize_structure(scope="LongTermMemory")
135
+ self.optimize_structure(scope="UserMemory")
136
+ self._reorganize_needed = False
137
+ time.sleep(30)
135
138
 
136
139
  def stop(self):
137
140
  """
@@ -148,45 +151,31 @@ class GraphStructureReorganizer:
148
151
  logger.info("Structure optimizer stopped.")
149
152
 
150
153
  def handle_message(self, message: QueueMessage):
151
- handle_map = {
152
- "add": self.handle_add,
153
- "remove": self.handle_remove,
154
- "merge": self.handle_merge,
155
- }
154
+ handle_map = {"add": self.handle_add, "remove": self.handle_remove}
156
155
  handle_map[message.op](message)
157
156
  logger.debug(f"message queue size: {self.queue.qsize()}")
158
157
 
159
158
  def handle_add(self, message: QueueMessage):
160
159
  logger.debug(f"Handling add operation: {str(message)[:500]}")
161
- # ———————— 1. check for conflicts ————————
162
160
  added_node = message.after_node[0]
163
- conflicts = self.conflict.detect(added_node, scope=added_node.metadata.memory_type)
164
- if conflicts:
165
- for added_node, existing_node in conflicts:
166
- self.conflict.resolve(added_node, existing_node)
167
- logger.info(f"Resolved conflict between {added_node.id} and {existing_node.id}.")
168
-
169
- # ———————— 2. check for redundancy ————————
170
- redundancies = self.redundancy.detect(added_node, scope=added_node.metadata.memory_type)
171
- if redundancies:
172
- for added_node, existing_node in redundancies:
173
- self.redundancy.resolve_two_nodes(added_node, existing_node)
174
- logger.info(f"Resolved redundancy between {added_node.id} and {existing_node.id}.")
161
+ detected_relationships = self.resolver.detect(
162
+ added_node, scope=added_node.metadata.memory_type
163
+ )
164
+ if detected_relationships:
165
+ for added_node, existing_node, relation in detected_relationships:
166
+ self.resolver.resolve(added_node, existing_node, relation)
167
+
168
+ self._reorganize_needed = True
175
169
 
176
170
  def handle_remove(self, message: QueueMessage):
177
171
  logger.debug(f"Handling remove operation: {str(message)[:50]}")
178
172
 
179
- def handle_merge(self, message: QueueMessage):
180
- after_node = message.after_node[0]
181
- logger.debug(f"Handling merge operation: <{after_node.memory}>")
182
- self.redundancy.resolve_one_node(after_node)
183
-
184
173
  def optimize_structure(
185
174
  self,
186
175
  scope: str = "LongTermMemory",
187
176
  local_tree_threshold: int = 10,
188
- min_cluster_size: int = 3,
189
- min_group_size: int = 5,
177
+ min_cluster_size: int = 4,
178
+ min_group_size: int = 20,
190
179
  ):
191
180
  """
192
181
  Periodically reorganize the graph:
@@ -198,7 +187,7 @@ class GraphStructureReorganizer:
198
187
  logger.info(f"Already optimizing for {scope}. Skipping.")
199
188
  return
200
189
 
201
- if self.graph_store.count_nodes(scope) == 0:
190
+ if self.graph_store.node_not_exist(scope):
202
191
  logger.debug(f"[GraphStructureReorganize] No nodes for scope={scope}. Skip.")
203
192
  return
204
193
 
@@ -251,7 +240,10 @@ class GraphStructureReorganizer:
251
240
  try:
252
241
  f.result()
253
242
  except Exception as e:
254
- logger.warning(f"[Reorganize] Cluster processing failed: {e}")
243
+ logger.warning(
244
+ f"[Reorganize] Cluster processing "
245
+ f"failed: {e}, cluster_nodes: {cluster_nodes}, trace: {traceback.format_exc()}"
246
+ )
255
247
  logger.info("[GraphStructure Reorganize] Structure optimization finished.")
256
248
 
257
249
  finally:
@@ -268,29 +260,23 @@ class GraphStructureReorganizer:
268
260
  if len(cluster_nodes) <= min_cluster_size:
269
261
  return
270
262
 
271
- if len(cluster_nodes) <= local_tree_threshold:
272
- # Small cluster ➜ single parent
273
- parent_node = self._summarize_cluster(cluster_nodes, scope)
274
- self._create_parent_node(parent_node)
275
- self._link_cluster_nodes(parent_node, cluster_nodes)
276
- else:
277
- # Large cluster ➜ local sub-clustering
278
- sub_clusters = self._local_subcluster(cluster_nodes)
279
- sub_parents = []
280
-
281
- for sub_nodes in sub_clusters:
282
- if len(sub_nodes) < min_cluster_size:
283
- continue # Skip tiny noise
284
- sub_parent_node = self._summarize_cluster(sub_nodes, scope)
285
- self._create_parent_node(sub_parent_node)
286
- self._link_cluster_nodes(sub_parent_node, sub_nodes)
287
- sub_parents.append(sub_parent_node)
288
-
289
- if sub_parents:
290
- cluster_parent_node = self._summarize_cluster(cluster_nodes, scope)
291
- self._create_parent_node(cluster_parent_node)
292
- for sub_parent in sub_parents:
293
- self.graph_store.add_edge(cluster_parent_node.id, sub_parent.id, "PARENT")
263
+ # Large cluster ➜ local sub-clustering
264
+ sub_clusters = self._local_subcluster(cluster_nodes)
265
+ sub_parents = []
266
+
267
+ for sub_nodes in sub_clusters:
268
+ if len(sub_nodes) < min_cluster_size:
269
+ continue # Skip tiny noise
270
+ sub_parent_node = self._summarize_cluster(sub_nodes, scope)
271
+ self._create_parent_node(sub_parent_node)
272
+ self._link_cluster_nodes(sub_parent_node, sub_nodes)
273
+ sub_parents.append(sub_parent_node)
274
+
275
+ if sub_parents and len(sub_parents) >= min_cluster_size:
276
+ cluster_parent_node = self._summarize_cluster(cluster_nodes, scope)
277
+ self._create_parent_node(cluster_parent_node)
278
+ for sub_parent in sub_parents:
279
+ self.graph_store.add_edge(cluster_parent_node.id, sub_parent.id, "PARENT")
294
280
 
295
281
  logger.info("Adding relations/reasons")
296
282
  nodes_to_check = cluster_nodes
@@ -343,11 +329,13 @@ class GraphStructureReorganizer:
343
329
  agg_node.metadata.model_dump(exclude_none=True),
344
330
  )
345
331
  for child_id in agg_node.metadata.sources:
346
- self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATES")
332
+ self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATE_TO")
347
333
 
348
334
  logger.info("[Reorganizer] Cluster relation/reasoning done.")
349
335
 
350
- def _local_subcluster(self, cluster_nodes: list[GraphDBNode]) -> list[list[GraphDBNode]]:
336
+ def _local_subcluster(
337
+ self, cluster_nodes: list[GraphDBNode], max_length: int = 8000
338
+ ) -> (list)[list[GraphDBNode]]:
351
339
  """
352
340
  Use LLM to split a large cluster into semantically coherent sub-clusters.
353
341
  """
@@ -361,7 +349,9 @@ class GraphStructureReorganizer:
361
349
  scene_lines.append(line)
362
350
 
363
351
  joined_scene = "\n".join(scene_lines)
364
- prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
352
+ if len(joined_scene) > max_length:
353
+ logger.warning(f"Sub-cluster too long: {joined_scene}")
354
+ prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene[:max_length])
365
355
 
366
356
  messages = [{"role": "user", "content": prompt}]
367
357
  response_text = self.llm.generate(messages)
@@ -386,12 +376,12 @@ class GraphStructureReorganizer:
386
376
  install_command="pip install scikit-learn",
387
377
  install_link="https://scikit-learn.org/stable/install.html",
388
378
  )
389
- def _partition(self, nodes, min_cluster_size: int = 3, max_cluster_size: int = 20):
379
+ def _partition(self, nodes, min_cluster_size: int = 10, max_cluster_size: int = 20):
390
380
  """
391
381
  Partition nodes by:
392
- 1) Frequent tags (top N & above threshold)
393
- 2) Remaining nodes by embedding clustering (MiniBatchKMeans)
394
- 3) Small clusters merged or assigned to 'Other'
382
+ - If total nodes <= max_cluster_size -> return all nodes in one cluster.
383
+ - If total nodes > max_cluster_size -> cluster by embeddings, recursively split.
384
+ - Only keep clusters with size > min_cluster_size.
395
385
 
396
386
  Args:
397
387
  nodes: List of GraphDBNode
@@ -402,105 +392,80 @@ class GraphStructureReorganizer:
402
392
  """
403
393
  from sklearn.cluster import MiniBatchKMeans
404
394
 
405
- # 1) Count all tags
406
- tag_counter = Counter()
407
- for node in nodes:
408
- for tag in node.metadata.tags:
409
- tag_counter[tag] += 1
410
-
411
- # Select frequent tags
412
- top_n_tags = {tag for tag, count in tag_counter.most_common(50)}
413
- threshold_tags = {tag for tag, count in tag_counter.items() if count >= 50}
414
- frequent_tags = top_n_tags | threshold_tags
415
-
416
- # Group nodes by tags
417
- tag_groups = defaultdict(list)
418
-
419
- for node in nodes:
420
- for tag in node.metadata.tags:
421
- if tag in frequent_tags:
422
- tag_groups[tag].append(node)
423
- break
424
-
425
- filtered_tag_clusters = []
426
- assigned_ids = set()
427
- for tag, group in tag_groups.items():
428
- if len(group) >= min_cluster_size:
429
- # Split large groups into chunks of at most max_cluster_size
430
- for i in range(0, len(group), max_cluster_size):
431
- sub_group = group[i : i + max_cluster_size]
432
- filtered_tag_clusters.append(sub_group)
433
- assigned_ids.update(n.id for n in sub_group)
434
- else:
435
- logger.info(f"... dropped tag {tag} due to low size ...")
436
-
437
- logger.info(
438
- f"[MixedPartition] Created {len(filtered_tag_clusters)} clusters from tags. "
439
- f"Nodes grouped by tags: {len(assigned_ids)} / {len(nodes)}"
440
- )
441
-
442
- # Remaining nodes -> embedding clustering
443
- remaining_nodes = [n for n in nodes if n.id not in assigned_ids]
444
- logger.info(
445
- f"[MixedPartition] Remaining nodes for embedding clustering: {len(remaining_nodes)}"
446
- )
447
-
448
- embedding_clusters = []
395
+ if len(nodes) <= max_cluster_size:
396
+ logger.info(
397
+ f"[KMeansPartition] Node count {len(nodes)} <= {max_cluster_size}, skipping KMeans."
398
+ )
399
+ return [nodes]
449
400
 
450
- def recursive_clustering(nodes_list):
401
+ def recursive_clustering(nodes_list, depth=0):
451
402
  """Recursively split clusters until each is <= max_cluster_size."""
403
+ indent = " " * depth
404
+ logger.info(
405
+ f"{indent}[Recursive] Start clustering {len(nodes_list)} nodes at depth {depth}"
406
+ )
407
+
452
408
  if len(nodes_list) <= max_cluster_size:
409
+ logger.info(
410
+ f"{indent}[Recursive] Node count <= {max_cluster_size}, stop splitting."
411
+ )
453
412
  return [nodes_list]
454
-
455
413
  # Try kmeans with k = ceil(len(nodes) / max_cluster_size)
456
- x = np.array([n.metadata.embedding for n in nodes_list if n.metadata.embedding])
457
- if len(x) < 2:
414
+ x_nodes = [n for n in nodes_list if n.metadata.embedding]
415
+ x = np.array([n.metadata.embedding for n in x_nodes])
416
+
417
+ if len(x) < min_cluster_size:
418
+ logger.info(
419
+ f"{indent}[Recursive] Too few embeddings ({len(x)}), skipping clustering."
420
+ )
458
421
  return [nodes_list]
459
422
 
460
423
  k = min(len(x), (len(nodes_list) + max_cluster_size - 1) // max_cluster_size)
461
- k = max(1, min(k, len(x)))
424
+ k = max(1, k)
462
425
 
463
426
  try:
427
+ logger.info(f"{indent}[Recursive] Clustering with k={k} on {len(x)} points.")
464
428
  kmeans = MiniBatchKMeans(n_clusters=k, batch_size=256, random_state=42)
465
429
  labels = kmeans.fit_predict(x)
466
430
 
467
431
  label_groups = defaultdict(list)
468
- for node, label in zip(nodes_list, labels, strict=False):
432
+ for node, label in zip(x_nodes, labels, strict=False):
469
433
  label_groups[label].append(node)
470
434
 
435
+ # Map: label -> nodes with no embedding (fallback group)
436
+ no_embedding_nodes = [n for n in nodes_list if not n.metadata.embedding]
437
+ if no_embedding_nodes:
438
+ logger.warning(
439
+ f"{indent}[Recursive] {len(no_embedding_nodes)} nodes have no embedding. Added to largest cluster."
440
+ )
441
+ # Assign to largest cluster
442
+ largest_label = max(label_groups.items(), key=lambda kv: len(kv[1]))[0]
443
+ label_groups[largest_label].extend(no_embedding_nodes)
444
+
471
445
  result = []
472
- for sub_group in label_groups.values():
473
- result.extend(recursive_clustering(sub_group))
446
+ for label, sub_group in label_groups.items():
447
+ logger.info(f"{indent} Cluster-{label}: {len(sub_group)} nodes")
448
+ result.extend(recursive_clustering(sub_group, depth=depth + 1))
474
449
  return result
450
+
475
451
  except Exception as e:
476
- logger.warning(f"Clustering failed: {e}, falling back to single cluster.")
452
+ logger.warning(
453
+ f"{indent}[Recursive] Clustering failed: {e}, fallback to one cluster."
454
+ )
477
455
  return [nodes_list]
478
456
 
479
- if remaining_nodes:
480
- clusters = recursive_clustering(remaining_nodes)
481
- embedding_clusters.extend(clusters)
482
- logger.info(
483
- f"[MixedPartition] Created {len(embedding_clusters)} clusters from embeddings."
484
- )
485
-
486
- # Merge all clusters
487
- all_clusters = filtered_tag_clusters + embedding_clusters
457
+ raw_clusters = recursive_clustering(nodes)
458
+ filtered_clusters = [c for c in raw_clusters if len(c) > min_cluster_size]
488
459
 
489
- # Handle small clusters (< min_cluster_size)
490
- final_clusters = []
491
- small_nodes = []
492
- for group in all_clusters:
493
- if len(group) < min_cluster_size:
494
- small_nodes.extend(group)
495
- else:
496
- final_clusters.append(group)
460
+ logger.info(f"[KMeansPartition] Total clusters before filtering: {len(raw_clusters)}")
461
+ for i, cluster in enumerate(raw_clusters):
462
+ logger.info(f"[KMeansPartition] Cluster-{i}: {len(cluster)} nodes")
497
463
 
498
- if small_nodes:
499
- final_clusters.append(small_nodes)
500
- logger.info(f"[MixedPartition] {len(small_nodes)} nodes assigned to 'Other' cluster.")
464
+ logger.info(
465
+ f"[KMeansPartition] Clusters after filtering (>{min_cluster_size}): {len(filtered_clusters)}"
466
+ )
501
467
 
502
- logger.info(f"[MixedPartition] Total final clusters: {len(final_clusters)}")
503
- return final_clusters
468
+ return filtered_clusters
504
469
 
505
470
  def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> GraphDBNode:
506
471
  """
@@ -597,7 +562,7 @@ class GraphStructureReorganizer:
597
562
  for i, node in enumerate(message.after_node or []):
598
563
  if not isinstance(node, str):
599
564
  continue
600
- raw_node = self.graph_store.get_node(node)
565
+ raw_node = self.graph_store.get_node(node, include_embedding=True)
601
566
  if raw_node is None:
602
567
  logger.debug(f"Node with ID {node} not found in the graph store.")
603
568
  message.after_node[i] = None
@@ -0,0 +1,229 @@
1
+ """BochaAI Search API retriever for tree text memory."""
2
+
3
+ import json
4
+
5
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
+ from datetime import datetime
7
+
8
+ import requests
9
+
10
+ from memos.embedders.factory import OllamaEmbedder
11
+ from memos.log import get_logger
12
+ from memos.mem_reader.base import BaseMemReader
13
+ from memos.memories.textual.item import TextualMemoryItem
14
+
15
+
16
+ logger = get_logger(__name__)
17
+
18
+
19
+ class BochaAISearchAPI:
20
+ """BochaAI Search API Client"""
21
+
22
+ def __init__(self, api_key: str, max_results: int = 20):
23
+ """
24
+ Initialize BochaAI Search API client.
25
+
26
+ Args:
27
+ api_key: BochaAI API key
28
+ max_results: Maximum number of search results to retrieve
29
+ """
30
+ self.api_key = api_key
31
+ self.max_results = max_results
32
+
33
+ self.web_url = "https://api.bochaai.com/v1/web-search"
34
+ self.ai_url = "https://api.bochaai.com/v1/ai-search"
35
+
36
+ self.headers = {
37
+ "Authorization": f"Bearer {api_key}",
38
+ "Content-Type": "application/json",
39
+ }
40
+
41
+ def search_web(self, query: str, summary: bool = True, freshness="noLimit") -> list[dict]:
42
+ """
43
+ Perform a Web Search (equivalent to the first curl).
44
+
45
+ Args:
46
+ query: Search query string
47
+ summary: Whether to include summary in the results
48
+ freshness: Freshness filter (e.g. 'noLimit', 'day', 'week')
49
+
50
+ Returns:
51
+ A list of search result dicts
52
+ """
53
+ body = {
54
+ "query": query,
55
+ "summary": summary,
56
+ "freshness": freshness,
57
+ "count": self.max_results,
58
+ }
59
+ return self._post(self.web_url, body)
60
+
61
+ def search_ai(
62
+ self, query: str, answer: bool = False, stream: bool = False, freshness="noLimit"
63
+ ) -> list[dict]:
64
+ """
65
+ Perform an AI Search (equivalent to the second curl).
66
+
67
+ Args:
68
+ query: Search query string
69
+ answer: Whether BochaAI should generate an answer
70
+ stream: Whether to use streaming response
71
+ freshness: Freshness filter (e.g. 'noLimit', 'day', 'week')
72
+
73
+ Returns:
74
+ A list of search result dicts
75
+ """
76
+ body = {
77
+ "query": query,
78
+ "freshness": freshness,
79
+ "count": self.max_results,
80
+ "answer": answer,
81
+ "stream": stream,
82
+ }
83
+ return self._post(self.ai_url, body)
84
+
85
+ def _post(self, url: str, body: dict) -> list[dict]:
86
+ """Send POST request and parse BochaAI search results."""
87
+ try:
88
+ resp = requests.post(url, headers=self.headers, json=body)
89
+ resp.raise_for_status()
90
+ raw_data = resp.json()
91
+
92
+ # parse the nested structure correctly
93
+ # ✅ AI Search
94
+ if "messages" in raw_data:
95
+ results = []
96
+ for msg in raw_data["messages"]:
97
+ if msg.get("type") == "source" and msg.get("content_type") == "webpage":
98
+ try:
99
+ content_json = json.loads(msg["content"])
100
+ results.extend(content_json.get("value", []))
101
+ except Exception as e:
102
+ logger.error(f"Failed to parse message content: {e}")
103
+ return results
104
+
105
+ # ✅ Web Search
106
+ return raw_data.get("data", {}).get("webPages", {}).get("value", [])
107
+
108
+ except Exception:
109
+ import traceback
110
+
111
+ logger.error(f"BochaAI search error: {traceback.format_exc()}")
112
+ return []
113
+
114
+
115
+ class BochaAISearchRetriever:
116
+ """BochaAI retriever that converts search results into TextualMemoryItem objects"""
117
+
118
+ def __init__(
119
+ self,
120
+ access_key: str,
121
+ embedder: OllamaEmbedder,
122
+ reader: BaseMemReader,
123
+ max_results: int = 20,
124
+ ):
125
+ """
126
+ Initialize BochaAI Search retriever.
127
+
128
+ Args:
129
+ access_key: BochaAI API key
130
+ embedder: Embedder instance for generating embeddings
131
+ reader: MemReader instance for processing internet content
132
+ max_results: Maximum number of search results to retrieve
133
+ """
134
+ self.bocha_api = BochaAISearchAPI(access_key, max_results=max_results)
135
+ self.embedder = embedder
136
+ self.reader = reader
137
+
138
+ def retrieve_from_internet(
139
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
140
+ ) -> list[TextualMemoryItem]:
141
+ """
142
+ Default internet retrieval (Web Search).
143
+ This keeps consistent API with Xinyu and Google retrievers.
144
+
145
+ Args:
146
+ query: Search query
147
+ top_k: Number of results to retrieve
148
+ parsed_goal: Parsed task goal (optional)
149
+ info (dict): Metadata for memory consumption tracking
150
+
151
+ Returns:
152
+ List of TextualMemoryItem
153
+ """
154
+ search_results = self.bocha_api.search_ai(query) # ✅ default to
155
+ # web-search
156
+ return self._convert_to_mem_items(search_results, query, parsed_goal, info)
157
+
158
+ def retrieve_from_web(
159
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
160
+ ) -> list[TextualMemoryItem]:
161
+ """Explicitly retrieve using Bocha Web Search."""
162
+ search_results = self.bocha_api.search_web(query)
163
+ return self._convert_to_mem_items(search_results, query, parsed_goal, info)
164
+
165
+ def retrieve_from_ai(
166
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
167
+ ) -> list[TextualMemoryItem]:
168
+ """Explicitly retrieve using Bocha AI Search."""
169
+ search_results = self.bocha_api.search_ai(query)
170
+ return self._convert_to_mem_items(search_results, query, parsed_goal, info)
171
+
172
+ def _convert_to_mem_items(
173
+ self, search_results: list[dict], query: str, parsed_goal=None, info=None
174
+ ):
175
+ """Convert API search results into TextualMemoryItem objects."""
176
+ memory_items = []
177
+ if not info:
178
+ info = {"user_id": "", "session_id": ""}
179
+
180
+ with ThreadPoolExecutor(max_workers=8) as executor:
181
+ futures = [
182
+ executor.submit(self._process_result, r, query, parsed_goal, info)
183
+ for r in search_results
184
+ ]
185
+ for future in as_completed(futures):
186
+ try:
187
+ memory_items.extend(future.result())
188
+ except Exception as e:
189
+ logger.error(f"Error processing BochaAI search result: {e}")
190
+
191
+ # Deduplicate items by memory text
192
+ unique_memory_items = {item.memory: item for item in memory_items}
193
+ return list(unique_memory_items.values())
194
+
195
+ def _process_result(
196
+ self, result: dict, query: str, parsed_goal: str, info: None
197
+ ) -> list[TextualMemoryItem]:
198
+ """Process one Bocha search result into TextualMemoryItem."""
199
+ title = result.get("name", "")
200
+ content = result.get("summary", "") or result.get("snippet", "")
201
+ summary = result.get("snippet", "")
202
+ url = result.get("url", "")
203
+ publish_time = result.get("datePublished", "")
204
+
205
+ if publish_time:
206
+ try:
207
+ publish_time = datetime.fromisoformat(publish_time.replace("Z", "+00:00")).strftime(
208
+ "%Y-%m-%d"
209
+ )
210
+ except Exception:
211
+ publish_time = datetime.now().strftime("%Y-%m-%d")
212
+ else:
213
+ publish_time = datetime.now().strftime("%Y-%m-%d")
214
+
215
+ # Use reader to split and process the content into chunks
216
+ read_items = self.reader.get_memory([content], type="doc", info=info)
217
+
218
+ memory_items = []
219
+ for read_item_i in read_items[0]:
220
+ read_item_i.memory = (
221
+ f"Title: {title}\nNewsTime: {publish_time}\nSummary: {summary}\n"
222
+ f"Content: {read_item_i.memory}"
223
+ )
224
+ read_item_i.metadata.source = "web"
225
+ read_item_i.metadata.memory_type = "OuterMemory"
226
+ read_item_i.metadata.sources = [url] if url else []
227
+ read_item_i.metadata.visibility = "public"
228
+ memory_items.append(read_item_i)
229
+ return memory_items
@@ -127,7 +127,7 @@ class InternetGoogleRetriever:
127
127
  self.embedder = embedder
128
128
 
129
129
  def retrieve_from_internet(
130
- self, query: str, top_k: int = 10, parsed_goal=None
130
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
131
131
  ) -> list[TextualMemoryItem]:
132
132
  """
133
133
  Retrieve information from the internet and convert to TextualMemoryItem format
@@ -136,10 +136,13 @@ class InternetGoogleRetriever:
136
136
  query: Search query
137
137
  top_k: Number of results to return
138
138
  parsed_goal: Parsed task goal (optional)
139
+ info (dict): Leave a record of memory consumption.
139
140
 
140
141
  Returns:
141
142
  List of TextualMemoryItem
142
143
  """
144
+ if not info:
145
+ info = {"user_id": "", "session_id": ""}
143
146
  # Get search results
144
147
  search_results = self.google_api.get_all_results(query, max_results=top_k)
145
148
 
@@ -157,8 +160,8 @@ class InternetGoogleRetriever:
157
160
  memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
158
161
  # Create metadata
159
162
  metadata = TreeNodeTextualMemoryMetadata(
160
- user_id=None,
161
- session_id=None,
163
+ user_id=info.get("user_id", ""),
164
+ session_id=info.get("session_id", ""),
162
165
  status="activated",
163
166
  type="fact", # Internet search results are usually factual information
164
167
  memory_time=datetime.now().strftime("%Y-%m-%d"),