MemoryOS 0.1.13__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (84) hide show
  1. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/METADATA +78 -49
  2. memoryos-0.2.1.dist-info/RECORD +152 -0
  3. memoryos-0.2.1.dist-info/entry_points.txt +3 -0
  4. memos/__init__.py +1 -1
  5. memos/api/config.py +471 -0
  6. memos/api/exceptions.py +28 -0
  7. memos/api/mcp_serve.py +502 -0
  8. memos/api/product_api.py +35 -0
  9. memos/api/product_models.py +159 -0
  10. memos/api/routers/__init__.py +1 -0
  11. memos/api/routers/product_router.py +358 -0
  12. memos/chunkers/sentence_chunker.py +8 -2
  13. memos/cli.py +113 -0
  14. memos/configs/embedder.py +27 -0
  15. memos/configs/graph_db.py +83 -2
  16. memos/configs/llm.py +48 -0
  17. memos/configs/mem_cube.py +1 -1
  18. memos/configs/mem_reader.py +4 -0
  19. memos/configs/mem_scheduler.py +91 -5
  20. memos/configs/memory.py +10 -4
  21. memos/dependency.py +52 -0
  22. memos/embedders/ark.py +92 -0
  23. memos/embedders/factory.py +4 -0
  24. memos/embedders/sentence_transformer.py +8 -2
  25. memos/embedders/universal_api.py +32 -0
  26. memos/graph_dbs/base.py +2 -2
  27. memos/graph_dbs/factory.py +2 -0
  28. memos/graph_dbs/item.py +46 -0
  29. memos/graph_dbs/neo4j.py +377 -101
  30. memos/graph_dbs/neo4j_community.py +300 -0
  31. memos/llms/base.py +9 -0
  32. memos/llms/deepseek.py +54 -0
  33. memos/llms/factory.py +10 -1
  34. memos/llms/hf.py +170 -13
  35. memos/llms/hf_singleton.py +114 -0
  36. memos/llms/ollama.py +4 -0
  37. memos/llms/openai.py +68 -1
  38. memos/llms/qwen.py +63 -0
  39. memos/llms/vllm.py +153 -0
  40. memos/mem_cube/general.py +77 -16
  41. memos/mem_cube/utils.py +102 -0
  42. memos/mem_os/core.py +131 -41
  43. memos/mem_os/main.py +93 -11
  44. memos/mem_os/product.py +1098 -35
  45. memos/mem_os/utils/default_config.py +352 -0
  46. memos/mem_os/utils/format_utils.py +1154 -0
  47. memos/mem_reader/simple_struct.py +13 -8
  48. memos/mem_scheduler/base_scheduler.py +467 -36
  49. memos/mem_scheduler/general_scheduler.py +125 -244
  50. memos/mem_scheduler/modules/base.py +9 -0
  51. memos/mem_scheduler/modules/dispatcher.py +68 -2
  52. memos/mem_scheduler/modules/misc.py +39 -0
  53. memos/mem_scheduler/modules/monitor.py +228 -49
  54. memos/mem_scheduler/modules/rabbitmq_service.py +317 -0
  55. memos/mem_scheduler/modules/redis_service.py +32 -22
  56. memos/mem_scheduler/modules/retriever.py +250 -23
  57. memos/mem_scheduler/modules/schemas.py +189 -7
  58. memos/mem_scheduler/mos_for_test_scheduler.py +143 -0
  59. memos/mem_scheduler/utils.py +51 -2
  60. memos/mem_user/persistent_user_manager.py +260 -0
  61. memos/memories/activation/item.py +25 -0
  62. memos/memories/activation/kv.py +10 -3
  63. memos/memories/activation/vllmkv.py +219 -0
  64. memos/memories/factory.py +2 -0
  65. memos/memories/textual/general.py +7 -5
  66. memos/memories/textual/item.py +3 -1
  67. memos/memories/textual/tree.py +14 -6
  68. memos/memories/textual/tree_text_memory/organize/conflict.py +198 -0
  69. memos/memories/textual/tree_text_memory/organize/manager.py +72 -23
  70. memos/memories/textual/tree_text_memory/organize/redundancy.py +193 -0
  71. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +233 -0
  72. memos/memories/textual/tree_text_memory/organize/reorganizer.py +606 -0
  73. memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
  74. memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
  75. memos/memories/textual/tree_text_memory/retrieve/searcher.py +6 -5
  76. memos/parsers/markitdown.py +8 -2
  77. memos/templates/mem_reader_prompts.py +105 -36
  78. memos/templates/mem_scheduler_prompts.py +96 -47
  79. memos/templates/tree_reorganize_prompts.py +223 -0
  80. memos/vec_dbs/base.py +12 -0
  81. memos/vec_dbs/qdrant.py +46 -20
  82. memoryos-0.1.13.dist-info/RECORD +0 -122
  83. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/LICENSE +0 -0
  84. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/WHEEL +0 -0
@@ -0,0 +1,606 @@
1
+ import json
2
+ import threading
3
+ import time
4
+ import traceback
5
+
6
+ from collections import Counter, defaultdict
7
+ from concurrent.futures import ThreadPoolExecutor, as_completed
8
+ from queue import PriorityQueue
9
+ from typing import Literal
10
+
11
+ import numpy as np
12
+
13
+ from memos.dependency import require_python_package
14
+ from memos.embedders.factory import OllamaEmbedder
15
+ from memos.graph_dbs.item import GraphDBEdge, GraphDBNode
16
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
17
+ from memos.llms.base import BaseLLM
18
+ from memos.log import get_logger
19
+ from memos.memories.textual.item import TreeNodeTextualMemoryMetadata
20
+ from memos.memories.textual.tree_text_memory.organize.conflict import ConflictHandler
21
+ from memos.memories.textual.tree_text_memory.organize.redundancy import RedundancyHandler
22
+ from memos.memories.textual.tree_text_memory.organize.relation_reason_detector import (
23
+ RelationAndReasoningDetector,
24
+ )
25
+ from memos.templates.tree_reorganize_prompts import LOCAL_SUBCLUSTER_PROMPT, REORGANIZE_PROMPT
26
+
27
+
28
+ logger = get_logger(__name__)
29
+
30
+
31
+ class QueueMessage:
32
+ def __init__(
33
+ self,
34
+ op: Literal["add", "remove", "merge", "update", "end"],
35
+ # `str` for node and edge IDs, `GraphDBNode` and `GraphDBEdge` for actual objects
36
+ before_node: list[str] | list[GraphDBNode] | None = None,
37
+ before_edge: list[str] | list[GraphDBEdge] | None = None,
38
+ after_node: list[str] | list[GraphDBNode] | None = None,
39
+ after_edge: list[str] | list[GraphDBEdge] | None = None,
40
+ ):
41
+ self.op = op
42
+ self.before_node = before_node
43
+ self.before_edge = before_edge
44
+ self.after_node = after_node
45
+ self.after_edge = after_edge
46
+
47
+ def __str__(self) -> str:
48
+ return f"QueueMessage(op={self.op}, before_node={self.before_node if self.before_node is None else len(self.before_node)}, after_node={self.after_node if self.after_node is None else len(self.after_node)})"
49
+
50
+ def __lt__(self, other: "QueueMessage") -> bool:
51
+ op_priority = {"add": 2, "remove": 2, "merge": 1, "end": 0}
52
+ return op_priority[self.op] < op_priority[other.op]
53
+
54
+
55
+ class GraphStructureReorganizer:
56
+ def __init__(
57
+ self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: OllamaEmbedder, is_reorganize: bool
58
+ ):
59
+ self.queue = PriorityQueue() # Min-heap
60
+ self.graph_store = graph_store
61
+ self.llm = llm
62
+ self.embedder = embedder
63
+ self.relation_detector = RelationAndReasoningDetector(
64
+ self.graph_store, self.llm, self.embedder
65
+ )
66
+ self.conflict = ConflictHandler(graph_store=graph_store, llm=llm, embedder=embedder)
67
+ self.redundancy = RedundancyHandler(graph_store=graph_store, llm=llm, embedder=embedder)
68
+
69
+ self.is_reorganize = is_reorganize
70
+ if self.is_reorganize:
71
+ # ____ 1. For queue message driven thread ___________
72
+ self.thread = threading.Thread(target=self._run_message_consumer_loop)
73
+ self.thread.start()
74
+ # ____ 2. For periodic structure optimization _______
75
+ self._stop_scheduler = False
76
+ self._is_optimizing = {"LongTermMemory": False, "UserMemory": False}
77
+ self.structure_optimizer_thread = threading.Thread(
78
+ target=self._run_structure_organizer_loop
79
+ )
80
+ self.structure_optimizer_thread.start()
81
+
82
+ def add_message(self, message: QueueMessage):
83
+ self.queue.put_nowait(message)
84
+
85
+ def wait_until_current_task_done(self):
86
+ """
87
+ Wait until:
88
+ 1) queue is empty
89
+ 2) any running structure optimization is done
90
+ """
91
+ if not self.is_reorganize:
92
+ return
93
+
94
+ if not self.queue.empty():
95
+ self.queue.join()
96
+ logger.debug("Queue is now empty.")
97
+
98
+ while any(self._is_optimizing.values()):
99
+ logger.debug(f"Waiting for structure optimizer to finish... {self._is_optimizing}")
100
+ time.sleep(1)
101
+ logger.debug("Structure optimizer is now idle.")
102
+
103
+ def _run_message_consumer_loop(self):
104
+ while True:
105
+ message = self.queue.get()
106
+ if message.op == "end":
107
+ break
108
+
109
+ try:
110
+ if self._preprocess_message(message):
111
+ self.handle_message(message)
112
+ except Exception:
113
+ logger.error(traceback.format_exc())
114
+ self.queue.task_done()
115
+
116
+ @require_python_package(
117
+ import_name="schedule",
118
+ install_command="pip install schedule",
119
+ install_link="https://schedule.readthedocs.io/en/stable/installation.html",
120
+ )
121
+ def _run_structure_organizer_loop(self):
122
+ """
123
+ Use schedule library to periodically trigger structure optimization.
124
+ This runs until the stop flag is set.
125
+ """
126
+ import schedule
127
+
128
+ schedule.every(20).seconds.do(self.optimize_structure, scope="LongTermMemory")
129
+ schedule.every(20).seconds.do(self.optimize_structure, scope="UserMemory")
130
+
131
+ logger.info("Structure optimizer schedule started.")
132
+ while not getattr(self, "_stop_scheduler", False):
133
+ schedule.run_pending()
134
+ time.sleep(1)
135
+
136
+ def stop(self):
137
+ """
138
+ Stop the reorganizer thread.
139
+ """
140
+ if not self.is_reorganize:
141
+ return
142
+
143
+ self.add_message(QueueMessage(op="end"))
144
+ self.thread.join()
145
+ logger.info("Reorganize thread stopped.")
146
+ self._stop_scheduler = True
147
+ self.structure_optimizer_thread.join()
148
+ logger.info("Structure optimizer stopped.")
149
+
150
+ def handle_message(self, message: QueueMessage):
151
+ handle_map = {
152
+ "add": self.handle_add,
153
+ "remove": self.handle_remove,
154
+ "merge": self.handle_merge,
155
+ }
156
+ handle_map[message.op](message)
157
+ logger.debug(f"message queue size: {self.queue.qsize()}")
158
+
159
+ def handle_add(self, message: QueueMessage):
160
+ logger.debug(f"Handling add operation: {str(message)[:500]}")
161
+ # ———————— 1. check for conflicts ————————
162
+ added_node = message.after_node[0]
163
+ conflicts = self.conflict.detect(added_node, scope=added_node.metadata.memory_type)
164
+ if conflicts:
165
+ for added_node, existing_node in conflicts:
166
+ self.conflict.resolve(added_node, existing_node)
167
+ logger.info(f"Resolved conflict between {added_node.id} and {existing_node.id}.")
168
+
169
+ # ———————— 2. check for redundancy ————————
170
+ redundancies = self.redundancy.detect(added_node, scope=added_node.metadata.memory_type)
171
+ if redundancies:
172
+ for added_node, existing_node in redundancies:
173
+ self.redundancy.resolve_two_nodes(added_node, existing_node)
174
+ logger.info(f"Resolved redundancy between {added_node.id} and {existing_node.id}.")
175
+
176
+ def handle_remove(self, message: QueueMessage):
177
+ logger.debug(f"Handling remove operation: {str(message)[:50]}")
178
+
179
+ def handle_merge(self, message: QueueMessage):
180
+ after_node = message.after_node[0]
181
+ logger.debug(f"Handling merge operation: <{after_node.memory}>")
182
+ self.redundancy.resolve_one_node(after_node)
183
+
184
+ def optimize_structure(
185
+ self,
186
+ scope: str = "LongTermMemory",
187
+ local_tree_threshold: int = 10,
188
+ min_cluster_size: int = 3,
189
+ min_group_size: int = 5,
190
+ ):
191
+ """
192
+ Periodically reorganize the graph:
193
+ 1. Weakly partition nodes into clusters.
194
+ 2. Summarize each cluster.
195
+ 3. Create parent nodes and build local PARENT trees.
196
+ """
197
+ if self._is_optimizing[scope]:
198
+ logger.info(f"Already optimizing for {scope}. Skipping.")
199
+ return
200
+
201
+ if self.graph_store.count_nodes(scope) == 0:
202
+ logger.debug(f"[GraphStructureReorganize] No nodes for scope={scope}. Skip.")
203
+ return
204
+
205
+ self._is_optimizing[scope] = True
206
+ try:
207
+ logger.debug(
208
+ f"[GraphStructureReorganize] 🔍 Starting structure optimization for scope: {scope}"
209
+ )
210
+
211
+ logger.debug(
212
+ f"Num of scope in self.graph_store is {self.graph_store.get_memory_count(scope)}"
213
+ )
214
+ # Load candidate nodes
215
+ raw_nodes = self.graph_store.get_structure_optimization_candidates(scope)
216
+ nodes = [GraphDBNode(**n) for n in raw_nodes]
217
+
218
+ if not nodes:
219
+ logger.info("[GraphStructureReorganize] No nodes to optimize. Skipping.")
220
+ return
221
+
222
+ if len(nodes) < min_group_size:
223
+ logger.info(
224
+ f"[GraphStructureReorganize] Only {len(nodes)} candidate nodes found. Not enough to reorganize. Skipping."
225
+ )
226
+ return
227
+
228
+ logger.info(f"[GraphStructureReorganize] Loaded {len(nodes)} nodes.")
229
+
230
+ # Step 2: Partition nodes
231
+ partitioned_groups = self._partition(nodes)
232
+
233
+ logger.info(
234
+ f"[GraphStructureReorganize] Partitioned into {len(partitioned_groups)} clusters."
235
+ )
236
+
237
+ with ThreadPoolExecutor(max_workers=4) as executor:
238
+ futures = []
239
+ for cluster_nodes in partitioned_groups:
240
+ futures.append(
241
+ executor.submit(
242
+ self._process_cluster_and_write,
243
+ cluster_nodes,
244
+ scope,
245
+ local_tree_threshold,
246
+ min_cluster_size,
247
+ )
248
+ )
249
+
250
+ for f in as_completed(futures):
251
+ try:
252
+ f.result()
253
+ except Exception as e:
254
+ logger.warning(f"[Reorganize] Cluster processing failed: {e}")
255
+ logger.info("[GraphStructure Reorganize] Structure optimization finished.")
256
+
257
+ finally:
258
+ self._is_optimizing[scope] = False
259
+ logger.info("[GraphStructureReorganize] Structure optimization finished.")
260
+
261
+ def _process_cluster_and_write(
262
+ self,
263
+ cluster_nodes: list[GraphDBNode],
264
+ scope: str,
265
+ local_tree_threshold: int,
266
+ min_cluster_size: int,
267
+ ):
268
+ if len(cluster_nodes) <= min_cluster_size:
269
+ return
270
+
271
+ if len(cluster_nodes) <= local_tree_threshold:
272
+ # Small cluster ➜ single parent
273
+ parent_node = self._summarize_cluster(cluster_nodes, scope)
274
+ self._create_parent_node(parent_node)
275
+ self._link_cluster_nodes(parent_node, cluster_nodes)
276
+ else:
277
+ # Large cluster ➜ local sub-clustering
278
+ sub_clusters = self._local_subcluster(cluster_nodes)
279
+ sub_parents = []
280
+
281
+ for sub_nodes in sub_clusters:
282
+ if len(sub_nodes) < min_cluster_size:
283
+ continue # Skip tiny noise
284
+ sub_parent_node = self._summarize_cluster(sub_nodes, scope)
285
+ self._create_parent_node(sub_parent_node)
286
+ self._link_cluster_nodes(sub_parent_node, sub_nodes)
287
+ sub_parents.append(sub_parent_node)
288
+
289
+ if sub_parents:
290
+ cluster_parent_node = self._summarize_cluster(cluster_nodes, scope)
291
+ self._create_parent_node(cluster_parent_node)
292
+ for sub_parent in sub_parents:
293
+ self.graph_store.add_edge(cluster_parent_node.id, sub_parent.id, "PARENT")
294
+
295
+ logger.info("Adding relations/reasons")
296
+ nodes_to_check = cluster_nodes
297
+ exclude_ids = [n.id for n in nodes_to_check]
298
+
299
+ with ThreadPoolExecutor(max_workers=4) as executor:
300
+ futures = []
301
+ for node in nodes_to_check:
302
+ futures.append(
303
+ executor.submit(
304
+ self.relation_detector.process_node,
305
+ node,
306
+ exclude_ids,
307
+ 10, # top_k
308
+ )
309
+ )
310
+
311
+ for f in as_completed(futures):
312
+ results = f.result()
313
+
314
+ # 1) Add pairwise relations
315
+ for rel in results["relations"]:
316
+ if not self.graph_store.edge_exists(
317
+ rel["source_id"], rel["target_id"], rel["relation_type"]
318
+ ):
319
+ self.graph_store.add_edge(
320
+ rel["source_id"], rel["target_id"], rel["relation_type"]
321
+ )
322
+
323
+ # 2) Add inferred nodes and link to sources
324
+ for inf_node in results["inferred_nodes"]:
325
+ self.graph_store.add_node(
326
+ inf_node.id,
327
+ inf_node.memory,
328
+ inf_node.metadata.model_dump(exclude_none=True),
329
+ )
330
+ for src_id in inf_node.metadata.sources:
331
+ self.graph_store.add_edge(src_id, inf_node.id, "INFERS")
332
+
333
+ # 3) Add sequence links
334
+ for seq in results["sequence_links"]:
335
+ if not self.graph_store.edge_exists(seq["from_id"], seq["to_id"], "FOLLOWS"):
336
+ self.graph_store.add_edge(seq["from_id"], seq["to_id"], "FOLLOWS")
337
+
338
+ # 4) Add aggregate concept nodes
339
+ for agg_node in results["aggregate_nodes"]:
340
+ self.graph_store.add_node(
341
+ agg_node.id,
342
+ agg_node.memory,
343
+ agg_node.metadata.model_dump(exclude_none=True),
344
+ )
345
+ for child_id in agg_node.metadata.sources:
346
+ self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATES")
347
+
348
+ logger.info("[Reorganizer] Cluster relation/reasoning done.")
349
+
350
+ def _local_subcluster(self, cluster_nodes: list[GraphDBNode]) -> list[list[GraphDBNode]]:
351
+ """
352
+ Use LLM to split a large cluster into semantically coherent sub-clusters.
353
+ """
354
+ if not cluster_nodes:
355
+ return []
356
+
357
+ # Prepare conversation-like input: ID + key + value
358
+ scene_lines = []
359
+ for node in cluster_nodes:
360
+ line = f"- ID: {node.id} | Key: {node.metadata.key} | Value: {node.memory}"
361
+ scene_lines.append(line)
362
+
363
+ joined_scene = "\n".join(scene_lines)
364
+ prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene)
365
+
366
+ messages = [{"role": "user", "content": prompt}]
367
+ response_text = self.llm.generate(messages)
368
+ response_json = self._parse_json_result(response_text)
369
+ assigned_ids = set()
370
+ result_subclusters = []
371
+
372
+ for cluster in response_json.get("clusters", []):
373
+ ids = []
374
+ for nid in cluster.get("ids", []):
375
+ if nid not in assigned_ids:
376
+ ids.append(nid)
377
+ assigned_ids.add(nid)
378
+ sub_nodes = [node for node in cluster_nodes if node.id in ids]
379
+ if len(sub_nodes) >= 2:
380
+ result_subclusters.append(sub_nodes)
381
+
382
+ return result_subclusters
383
+
384
+ @require_python_package(
385
+ import_name="sklearn",
386
+ install_command="pip install scikit-learn",
387
+ install_link="https://scikit-learn.org/stable/install.html",
388
+ )
389
+ def _partition(self, nodes, min_cluster_size: int = 3, max_cluster_size: int = 20):
390
+ """
391
+ Partition nodes by:
392
+ 1) Frequent tags (top N & above threshold)
393
+ 2) Remaining nodes by embedding clustering (MiniBatchKMeans)
394
+ 3) Small clusters merged or assigned to 'Other'
395
+
396
+ Args:
397
+ nodes: List of GraphDBNode
398
+ min_cluster_size: Min size to keep a cluster as-is
399
+
400
+ Returns:
401
+ List of clusters, each as a list of GraphDBNode
402
+ """
403
+ from sklearn.cluster import MiniBatchKMeans
404
+
405
+ # 1) Count all tags
406
+ tag_counter = Counter()
407
+ for node in nodes:
408
+ for tag in node.metadata.tags:
409
+ tag_counter[tag] += 1
410
+
411
+ # Select frequent tags
412
+ top_n_tags = {tag for tag, count in tag_counter.most_common(50)}
413
+ threshold_tags = {tag for tag, count in tag_counter.items() if count >= 50}
414
+ frequent_tags = top_n_tags | threshold_tags
415
+
416
+ # Group nodes by tags
417
+ tag_groups = defaultdict(list)
418
+
419
+ for node in nodes:
420
+ for tag in node.metadata.tags:
421
+ if tag in frequent_tags:
422
+ tag_groups[tag].append(node)
423
+ break
424
+
425
+ filtered_tag_clusters = []
426
+ assigned_ids = set()
427
+ for tag, group in tag_groups.items():
428
+ if len(group) >= min_cluster_size:
429
+ # Split large groups into chunks of at most max_cluster_size
430
+ for i in range(0, len(group), max_cluster_size):
431
+ sub_group = group[i : i + max_cluster_size]
432
+ filtered_tag_clusters.append(sub_group)
433
+ assigned_ids.update(n.id for n in sub_group)
434
+ else:
435
+ logger.info(f"... dropped tag {tag} due to low size ...")
436
+
437
+ logger.info(
438
+ f"[MixedPartition] Created {len(filtered_tag_clusters)} clusters from tags. "
439
+ f"Nodes grouped by tags: {len(assigned_ids)} / {len(nodes)}"
440
+ )
441
+
442
+ # Remaining nodes -> embedding clustering
443
+ remaining_nodes = [n for n in nodes if n.id not in assigned_ids]
444
+ logger.info(
445
+ f"[MixedPartition] Remaining nodes for embedding clustering: {len(remaining_nodes)}"
446
+ )
447
+
448
+ embedding_clusters = []
449
+
450
+ def recursive_clustering(nodes_list):
451
+ """Recursively split clusters until each is <= max_cluster_size."""
452
+ if len(nodes_list) <= max_cluster_size:
453
+ return [nodes_list]
454
+
455
+ # Try kmeans with k = ceil(len(nodes) / max_cluster_size)
456
+ x = np.array([n.metadata.embedding for n in nodes_list if n.metadata.embedding])
457
+ if len(x) < 2:
458
+ return [nodes_list]
459
+
460
+ k = min(len(x), (len(nodes_list) + max_cluster_size - 1) // max_cluster_size)
461
+ k = max(1, min(k, len(x)))
462
+
463
+ try:
464
+ kmeans = MiniBatchKMeans(n_clusters=k, batch_size=256, random_state=42)
465
+ labels = kmeans.fit_predict(x)
466
+
467
+ label_groups = defaultdict(list)
468
+ for node, label in zip(nodes_list, labels, strict=False):
469
+ label_groups[label].append(node)
470
+
471
+ result = []
472
+ for sub_group in label_groups.values():
473
+ result.extend(recursive_clustering(sub_group))
474
+ return result
475
+ except Exception as e:
476
+ logger.warning(f"Clustering failed: {e}, falling back to single cluster.")
477
+ return [nodes_list]
478
+
479
+ if remaining_nodes:
480
+ clusters = recursive_clustering(remaining_nodes)
481
+ embedding_clusters.extend(clusters)
482
+ logger.info(
483
+ f"[MixedPartition] Created {len(embedding_clusters)} clusters from embeddings."
484
+ )
485
+
486
+ # Merge all clusters
487
+ all_clusters = filtered_tag_clusters + embedding_clusters
488
+
489
+ # Handle small clusters (< min_cluster_size)
490
+ final_clusters = []
491
+ small_nodes = []
492
+ for group in all_clusters:
493
+ if len(group) < min_cluster_size:
494
+ small_nodes.extend(group)
495
+ else:
496
+ final_clusters.append(group)
497
+
498
+ if small_nodes:
499
+ final_clusters.append(small_nodes)
500
+ logger.info(f"[MixedPartition] {len(small_nodes)} nodes assigned to 'Other' cluster.")
501
+
502
+ logger.info(f"[MixedPartition] Total final clusters: {len(final_clusters)}")
503
+ return final_clusters
504
+
505
+ def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> GraphDBNode:
506
+ """
507
+ Generate a cluster label using LLM, based on top keys in the cluster.
508
+ """
509
+ if not cluster_nodes:
510
+ raise ValueError("Cluster nodes cannot be empty.")
511
+
512
+ memories_items_text = "\n\n".join(
513
+ [
514
+ f"{i}. key: {n.metadata.key}\nvalue: {n.memory}\nsummary:{n.metadata.background}"
515
+ for i, n in enumerate(cluster_nodes)
516
+ ]
517
+ )
518
+
519
+ # Build prompt
520
+ prompt = REORGANIZE_PROMPT.replace("{memory_items_text}", memories_items_text)
521
+
522
+ messages = [{"role": "user", "content": prompt}]
523
+ response_text = self.llm.generate(messages)
524
+ response_json = self._parse_json_result(response_text)
525
+
526
+ # Extract fields
527
+ parent_key = response_json.get("key", "").strip()
528
+ parent_value = response_json.get("value", "").strip()
529
+ parent_tags = response_json.get("tags", [])
530
+ parent_background = response_json.get("summary", "").strip()
531
+
532
+ embedding = self.embedder.embed([parent_value])[0]
533
+
534
+ parent_node = GraphDBNode(
535
+ memory=parent_value,
536
+ metadata=TreeNodeTextualMemoryMetadata(
537
+ user_id="", # TODO: summarized node: no user_id
538
+ session_id="", # TODO: summarized node: no session_id
539
+ memory_type=scope,
540
+ status="activated",
541
+ key=parent_key,
542
+ tags=parent_tags,
543
+ embedding=embedding,
544
+ usage=[],
545
+ sources=[n.id for n in cluster_nodes],
546
+ background=parent_background,
547
+ confidence=0.99,
548
+ type="topic",
549
+ ),
550
+ )
551
+ return parent_node
552
+
553
+ def _parse_json_result(self, response_text):
554
+ try:
555
+ response_text = response_text.replace("```", "").replace("json", "")
556
+ response_json = json.loads(response_text)
557
+ return response_json
558
+ except json.JSONDecodeError as e:
559
+ logger.warning(
560
+ f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
561
+ )
562
+ return {}
563
+
564
+ def _create_parent_node(self, parent_node: GraphDBNode) -> None:
565
+ """
566
+ Create a new parent node for the cluster.
567
+ """
568
+ self.graph_store.add_node(
569
+ parent_node.id,
570
+ parent_node.memory,
571
+ parent_node.metadata.model_dump(exclude_none=True),
572
+ )
573
+
574
+ def _link_cluster_nodes(self, parent_node: GraphDBNode, child_nodes: list[GraphDBNode]):
575
+ """
576
+ Add PARENT edges from the parent node to all nodes in the cluster.
577
+ """
578
+ for child in child_nodes:
579
+ if not self.graph_store.edge_exists(
580
+ parent_node.id, child.id, "PARENT", direction="OUTGOING"
581
+ ):
582
+ self.graph_store.add_edge(parent_node.id, child.id, "PARENT")
583
+
584
+ def _preprocess_message(self, message: QueueMessage) -> bool:
585
+ message = self._convert_id_to_node(message)
586
+ if message.after_node is None or None in message.after_node:
587
+ logger.debug(
588
+ f"Found non-existent node in after_node in message: {message}, skip this message."
589
+ )
590
+ return False
591
+ return True
592
+
593
+ def _convert_id_to_node(self, message: QueueMessage) -> QueueMessage:
594
+ """
595
+ Convert IDs in the message.after_node to GraphDBNode objects.
596
+ """
597
+ for i, node in enumerate(message.after_node or []):
598
+ if not isinstance(node, str):
599
+ continue
600
+ raw_node = self.graph_store.get_node(node)
601
+ if raw_node is None:
602
+ logger.debug(f"Node with ID {node} not found in the graph store.")
603
+ message.after_node[i] = None
604
+ else:
605
+ message.after_node[i] = GraphDBNode(**raw_node)
606
+ return message
@@ -56,7 +56,6 @@ class GraphMemoryRetriever:
56
56
  # Step 3: Merge and deduplicate results
57
57
  combined = {item.id: item for item in graph_results + vector_results}
58
58
 
59
- # Debug: 打印在 graph_results 中但不在 combined 中的 id
60
59
  graph_ids = {item.id for item in graph_results}
61
60
  combined_ids = set(combined.keys())
62
61
  lost_ids = graph_ids - combined_ids
@@ -1,7 +1,7 @@
1
1
  import numpy as np
2
2
 
3
3
  from memos.embedders.factory import OllamaEmbedder
4
- from memos.llms.factory import OllamaLLM, OpenAILLM
4
+ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
5
5
  from memos.memories.textual.item import TextualMemoryItem
6
6
  from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
7
 
@@ -41,7 +41,7 @@ class MemoryReranker:
41
41
  Rank retrieved memory cards by structural priority and contextual similarity.
42
42
  """
43
43
 
44
- def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
44
+ def __init__(self, llm: OpenAILLM | OllamaLLM | AzureLLM, embedder: OllamaEmbedder):
45
45
  self.llm = llm
46
46
  self.embedder = embedder
47
47
 
@@ -5,7 +5,7 @@ from datetime import datetime
5
5
 
6
6
  from memos.embedders.factory import OllamaEmbedder
7
7
  from memos.graph_dbs.factory import Neo4jGraphDB
8
- from memos.llms.factory import OllamaLLM, OpenAILLM
8
+ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
9
9
  from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
10
10
 
11
11
  from .internet_retriever_factory import InternetRetrieverFactory
@@ -18,7 +18,7 @@ from .task_goal_parser import TaskGoalParser
18
18
  class Searcher:
19
19
  def __init__(
20
20
  self,
21
- dispatcher_llm: OpenAILLM | OllamaLLM,
21
+ dispatcher_llm: OpenAILLM | OllamaLLM | AzureLLM,
22
22
  graph_store: Neo4jGraphDB,
23
23
  embedder: OllamaEmbedder,
24
24
  internet_retriever: InternetRetrieverFactory | None = None,
@@ -176,9 +176,10 @@ class Searcher:
176
176
  for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
177
177
  :top_k
178
178
  ]:
179
- new_meta = SearchedTreeNodeTextualMemoryMetadata(
180
- **item.metadata.model_dump(), relativity=score
181
- )
179
+ meta_data = item.metadata.model_dump()
180
+ if "relativity" not in meta_data:
181
+ meta_data["relativity"] = score
182
+ new_meta = SearchedTreeNodeTextualMemoryMetadata(**meta_data)
182
183
  searched_res.append(
183
184
  TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
184
185
  )
@@ -1,6 +1,5 @@
1
- from markitdown import MarkItDown
2
-
3
1
  from memos.configs.parser import MarkItDownParserConfig
2
+ from memos.dependency import require_python_package
4
3
  from memos.log import get_logger
5
4
  from memos.parsers.base import BaseParser
6
5
 
@@ -14,7 +13,14 @@ class MarkItDownParser(BaseParser):
14
13
  def __init__(self, config: MarkItDownParserConfig):
15
14
  self.config = config
16
15
 
16
+ @require_python_package(
17
+ import_name="markitdown",
18
+ install_command="pip install markitdown[all]",
19
+ install_link="https://github.com/microsoft/markitdown",
20
+ )
17
21
  def parse(self, file_path: str) -> str:
22
+ from markitdown import MarkItDown
23
+
18
24
  """Parse the file at the given path and return its content as a MarkDown string."""
19
25
  md = MarkItDown(enable_plugins=False)
20
26
  result = md.convert(file_path)