MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,622 @@
1
+ import json
2
+ import time
3
+ import traceback
4
+
5
+ from collections import defaultdict
6
+ from concurrent.futures import as_completed
7
+ from queue import PriorityQueue
8
+ from typing import Literal
9
+
10
+ import numpy as np
11
+
12
+ from memos.context.context import ContextThread, ContextThreadPoolExecutor
13
+ from memos.dependency import require_python_package
14
+ from memos.embedders.factory import OllamaEmbedder
15
+ from memos.graph_dbs.item import GraphDBEdge, GraphDBNode
16
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
17
+ from memos.llms.base import BaseLLM
18
+ from memos.log import get_logger
19
+ from memos.memories.textual.item import SourceMessage, TreeNodeTextualMemoryMetadata
20
+ from memos.memories.textual.tree_text_memory.organize.handler import NodeHandler
21
+ from memos.memories.textual.tree_text_memory.organize.relation_reason_detector import (
22
+ RelationAndReasoningDetector,
23
+ )
24
+ from memos.templates.tree_reorganize_prompts import LOCAL_SUBCLUSTER_PROMPT, REORGANIZE_PROMPT
25
+
26
+
27
+ logger = get_logger(__name__)
28
+
29
+
30
+ def build_summary_parent_node(cluster_nodes):
31
+ normalized_sources = []
32
+ for n in cluster_nodes:
33
+ sm = SourceMessage(
34
+ type="chat",
35
+ role=None,
36
+ chat_time=None,
37
+ message_id=None,
38
+ content=n.memory,
39
+ # extra
40
+ node_id=n.id,
41
+ )
42
+ normalized_sources.append(sm)
43
+ return normalized_sources
44
+
45
+
46
+ class QueueMessage:
47
+ def __init__(
48
+ self,
49
+ op: Literal["add", "remove", "merge", "update", "end"],
50
+ # `str` for node and edge IDs, `GraphDBNode` and `GraphDBEdge` for actual objects
51
+ before_node: list[str] | list[GraphDBNode] | None = None,
52
+ before_edge: list[str] | list[GraphDBEdge] | None = None,
53
+ after_node: list[str] | list[GraphDBNode] | None = None,
54
+ after_edge: list[str] | list[GraphDBEdge] | None = None,
55
+ ):
56
+ self.op = op
57
+ self.before_node = before_node
58
+ self.before_edge = before_edge
59
+ self.after_node = after_node
60
+ self.after_edge = after_edge
61
+
62
+ def __str__(self) -> str:
63
+ return f"QueueMessage(op={self.op}, before_node={self.before_node if self.before_node is None else len(self.before_node)}, after_node={self.after_node if self.after_node is None else len(self.after_node)})"
64
+
65
+ def __lt__(self, other: "QueueMessage") -> bool:
66
+ op_priority = {"add": 2, "remove": 2, "merge": 1, "end": 0}
67
+ return op_priority[self.op] < op_priority[other.op]
68
+
69
+
70
+ def extract_first_to_last_brace(text: str):
71
+ start = text.find("{")
72
+ end = text.rfind("}")
73
+ if start == -1 or end == -1 or end < start:
74
+ return "", None
75
+ json_str = text[start : end + 1]
76
+ return json_str, json.loads(json_str)
77
+
78
+
79
+ class GraphStructureReorganizer:
80
+ def __init__(
81
+ self, graph_store: Neo4jGraphDB, llm: BaseLLM, embedder: OllamaEmbedder, is_reorganize: bool
82
+ ):
83
+ self.queue = PriorityQueue() # Min-heap
84
+ self.graph_store = graph_store
85
+ self.llm = llm
86
+ self.embedder = embedder
87
+ self.relation_detector = RelationAndReasoningDetector(
88
+ self.graph_store, self.llm, self.embedder
89
+ )
90
+ self.resolver = NodeHandler(graph_store=graph_store, llm=llm, embedder=embedder)
91
+
92
+ self.is_reorganize = is_reorganize
93
+ self._reorganize_needed = True
94
+ if self.is_reorganize:
95
+ # ____ 1. For queue message driven thread ___________
96
+ self.thread = ContextThread(target=self._run_message_consumer_loop)
97
+ self.thread.start()
98
+ # ____ 2. For periodic structure optimization _______
99
+ self._stop_scheduler = False
100
+ self._is_optimizing = {"LongTermMemory": False, "UserMemory": False}
101
+ self.structure_optimizer_thread = ContextThread(
102
+ target=self._run_structure_organizer_loop
103
+ )
104
+ self.structure_optimizer_thread.start()
105
+
106
+ def add_message(self, message: QueueMessage):
107
+ self.queue.put_nowait(message)
108
+
109
+ def wait_until_current_task_done(self):
110
+ """
111
+ Wait until:
112
+ 1) queue is empty
113
+ 2) any running structure optimization is done
114
+ """
115
+ deadline = time.time() + 600
116
+ if not self.is_reorganize:
117
+ return
118
+
119
+ if not self.queue.empty():
120
+ self.queue.join()
121
+ logger.debug("Queue is now empty.")
122
+
123
+ while any(self._is_optimizing.values()):
124
+ logger.debug(f"Waiting for structure optimizer to finish... {self._is_optimizing}")
125
+ if time.time() > deadline:
126
+ logger.error(f"Wait timed out; flags={self._is_optimizing}")
127
+ break
128
+ time.sleep(1)
129
+ logger.debug("Structure optimizer is now idle.")
130
+
131
+ def _run_message_consumer_loop(self):
132
+ while True:
133
+ message = self.queue.get()
134
+ if message.op == "end":
135
+ break
136
+
137
+ try:
138
+ if self._preprocess_message(message):
139
+ self.handle_message(message)
140
+ except Exception:
141
+ logger.error(traceback.format_exc())
142
+ self.queue.task_done()
143
+
144
+ @require_python_package(
145
+ import_name="schedule",
146
+ install_command="pip install schedule",
147
+ install_link="https://schedule.readthedocs.io/en/stable/installation.html",
148
+ )
149
+ def _run_structure_organizer_loop(self):
150
+ """
151
+ Use schedule library to periodically trigger structure optimization.
152
+ This runs until the stop flag is set.
153
+ """
154
+ import schedule
155
+
156
+ schedule.every(100).seconds.do(self.optimize_structure, scope="LongTermMemory")
157
+ schedule.every(100).seconds.do(self.optimize_structure, scope="UserMemory")
158
+
159
+ logger.info("Structure optimizer schedule started.")
160
+ while not getattr(self, "_stop_scheduler", False):
161
+ if any(self._is_optimizing.values()):
162
+ time.sleep(1)
163
+ continue
164
+ if self._reorganize_needed:
165
+ logger.info("[Reorganizer] Triggering optimize_structure due to new nodes.")
166
+ self.optimize_structure(scope="LongTermMemory")
167
+ self.optimize_structure(scope="UserMemory")
168
+ self._reorganize_needed = False
169
+ time.sleep(30)
170
+
171
+ def stop(self):
172
+ """
173
+ Stop the reorganizer thread.
174
+ """
175
+ if not self.is_reorganize:
176
+ return
177
+
178
+ self.add_message(QueueMessage(op="end"))
179
+ self.thread.join()
180
+ logger.info("Reorganize thread stopped.")
181
+ self._stop_scheduler = True
182
+ self.structure_optimizer_thread.join()
183
+ logger.info("Structure optimizer stopped.")
184
+
185
+ def handle_message(self, message: QueueMessage):
186
+ handle_map = {"add": self.handle_add, "remove": self.handle_remove}
187
+ handle_map[message.op](message)
188
+ logger.debug(f"message queue size: {self.queue.qsize()}")
189
+
190
+ def handle_add(self, message: QueueMessage):
191
+ logger.debug(f"Handling add operation: {str(message)[:500]}")
192
+ added_node = message.after_node[0]
193
+ detected_relationships = self.resolver.detect(
194
+ added_node, scope=added_node.metadata.memory_type
195
+ )
196
+ if detected_relationships:
197
+ for added_node, existing_node, relation in detected_relationships:
198
+ self.resolver.resolve(added_node, existing_node, relation)
199
+
200
+ self._reorganize_needed = True
201
+
202
+ def handle_remove(self, message: QueueMessage):
203
+ logger.debug(f"Handling remove operation: {str(message)[:50]}")
204
+
205
+ def optimize_structure(
206
+ self,
207
+ scope: str = "LongTermMemory",
208
+ local_tree_threshold: int = 10,
209
+ min_cluster_size: int = 4,
210
+ min_group_size: int = 20,
211
+ max_duration_sec: int = 600,
212
+ ):
213
+ """
214
+ Periodically reorganize the graph:
215
+ 1. Weakly partition nodes into clusters.
216
+ 2. Summarize each cluster.
217
+ 3. Create parent nodes and build local PARENT trees.
218
+ """
219
+ # --- Total time watch dog: check functions ---
220
+ start_ts = time.time()
221
+
222
+ def _check_deadline(where: str):
223
+ if time.time() - start_ts > max_duration_sec:
224
+ logger.error(
225
+ f"[GraphStructureReorganize] {scope} surpass {max_duration_sec}s,time "
226
+ f"over at {where}"
227
+ )
228
+ return True
229
+ return False
230
+
231
+ if self._is_optimizing[scope]:
232
+ logger.info(f"[GraphStructureReorganize] Already optimizing for {scope}. Skipping.")
233
+ return
234
+
235
+ if self.graph_store.node_not_exist(scope):
236
+ logger.debug(f"[GraphStructureReorganize] No nodes for scope={scope}. Skip.")
237
+ return
238
+
239
+ self._is_optimizing[scope] = True
240
+ try:
241
+ logger.debug(
242
+ f"[GraphStructureReorganize] 🔍 Starting structure optimization for scope: {scope}"
243
+ )
244
+
245
+ logger.debug(
246
+ f"[GraphStructureReorganize] Num of scope in self.graph_store is"
247
+ f" {self.graph_store.get_memory_count(scope)}"
248
+ )
249
+ # Load candidate nodes
250
+ if _check_deadline("[GraphStructureReorganize] Before loading candidates"):
251
+ return
252
+ raw_nodes = self.graph_store.get_structure_optimization_candidates(scope)
253
+ nodes = [GraphDBNode(**n) for n in raw_nodes]
254
+
255
+ if not nodes:
256
+ logger.info("[GraphStructureReorganize] No nodes to optimize. Skipping.")
257
+ return
258
+ if len(nodes) < min_group_size:
259
+ logger.info(
260
+ f"[GraphStructureReorganize] Only {len(nodes)} candidate nodes found. Not enough to reorganize. Skipping."
261
+ )
262
+ return
263
+
264
+ # Step 2: Partition nodes
265
+ if _check_deadline("[GraphStructureReorganize] Before partition"):
266
+ return
267
+ partitioned_groups = self._partition(nodes)
268
+ logger.info(
269
+ f"[GraphStructureReorganize] Partitioned into {len(partitioned_groups)} clusters."
270
+ )
271
+
272
+ if _check_deadline("[GraphStructureReorganize] Before submit partition task"):
273
+ return
274
+ with ContextThreadPoolExecutor(max_workers=4) as executor:
275
+ futures = []
276
+ for cluster_nodes in partitioned_groups:
277
+ futures.append(
278
+ executor.submit(
279
+ self._process_cluster_and_write,
280
+ cluster_nodes,
281
+ scope,
282
+ local_tree_threshold,
283
+ min_cluster_size,
284
+ )
285
+ )
286
+
287
+ for f in as_completed(futures):
288
+ if _check_deadline("[GraphStructureReorganize] Waiting clusters..."):
289
+ for x in futures:
290
+ x.cancel()
291
+ return
292
+ try:
293
+ f.result()
294
+ except Exception as e:
295
+ logger.warning(
296
+ f"[GraphStructureReorganize] Cluster processing failed: {e}, trace: {traceback.format_exc()}"
297
+ )
298
+ logger.info("[GraphStructure Reorganize] Structure optimization finished.")
299
+
300
+ finally:
301
+ self._is_optimizing[scope] = False
302
+ logger.info("[GraphStructureReorganize] Structure optimization finished.")
303
+
304
+ def _process_cluster_and_write(
305
+ self,
306
+ cluster_nodes: list[GraphDBNode],
307
+ scope: str,
308
+ local_tree_threshold: int,
309
+ min_cluster_size: int,
310
+ ):
311
+ if len(cluster_nodes) <= min_cluster_size:
312
+ return
313
+
314
+ # Large cluster ➜ local sub-clustering
315
+ sub_clusters = self._local_subcluster(cluster_nodes)
316
+ sub_parents = []
317
+
318
+ for sub_nodes in sub_clusters:
319
+ if len(sub_nodes) < min_cluster_size:
320
+ continue # Skip tiny noise
321
+ sub_parent_node = self._summarize_cluster(sub_nodes, scope)
322
+ self._create_parent_node(sub_parent_node)
323
+ self._link_cluster_nodes(sub_parent_node, sub_nodes)
324
+ sub_parents.append(sub_parent_node)
325
+
326
+ if sub_parents and len(sub_parents) >= min_cluster_size:
327
+ cluster_parent_node = self._summarize_cluster(cluster_nodes, scope)
328
+ self._create_parent_node(cluster_parent_node)
329
+ for sub_parent in sub_parents:
330
+ self.graph_store.add_edge(cluster_parent_node.id, sub_parent.id, "PARENT")
331
+
332
+ logger.info("Adding relations/reasons")
333
+ nodes_to_check = cluster_nodes
334
+ exclude_ids = [n.id for n in nodes_to_check]
335
+
336
+ with ContextThreadPoolExecutor(max_workers=4) as executor:
337
+ futures = []
338
+ for node in nodes_to_check:
339
+ futures.append(
340
+ executor.submit(
341
+ self.relation_detector.process_node,
342
+ node,
343
+ exclude_ids,
344
+ 10, # top_k
345
+ )
346
+ )
347
+
348
+ for f in as_completed(futures, timeout=300):
349
+ results = f.result()
350
+
351
+ # 1) Add pairwise relations
352
+ for rel in results["relations"]:
353
+ if not self.graph_store.edge_exists(
354
+ rel["source_id"], rel["target_id"], rel["relation_type"]
355
+ ):
356
+ self.graph_store.add_edge(
357
+ rel["source_id"], rel["target_id"], rel["relation_type"]
358
+ )
359
+
360
+ # 2) Add inferred nodes and link to sources
361
+ for inf_node in results["inferred_nodes"]:
362
+ self.graph_store.add_node(
363
+ inf_node.id,
364
+ inf_node.memory,
365
+ inf_node.metadata.model_dump(exclude_none=True),
366
+ )
367
+ for src_id in inf_node.metadata.sources:
368
+ self.graph_store.add_edge(src_id, inf_node.id, "INFERS")
369
+
370
+ # 3) Add sequence links
371
+ for seq in results["sequence_links"]:
372
+ if not self.graph_store.edge_exists(seq["from_id"], seq["to_id"], "FOLLOWS"):
373
+ self.graph_store.add_edge(seq["from_id"], seq["to_id"], "FOLLOWS")
374
+
375
+ # 4) Add aggregate concept nodes
376
+ for agg_node in results["aggregate_nodes"]:
377
+ self.graph_store.add_node(
378
+ agg_node.id,
379
+ agg_node.memory,
380
+ agg_node.metadata.model_dump(exclude_none=True),
381
+ )
382
+ for child_id in agg_node.metadata.sources:
383
+ self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATE_TO")
384
+
385
+ logger.info("[Reorganizer] Cluster relation/reasoning done.")
386
+
387
+ def _local_subcluster(
388
+ self, cluster_nodes: list[GraphDBNode], max_length: int = 15000
389
+ ) -> list[list[GraphDBNode]]:
390
+ """
391
+ Use LLM to split a large cluster into semantically coherent sub-clusters.
392
+ """
393
+ if not cluster_nodes:
394
+ return []
395
+
396
+ # Prepare conversation-like input: ID + key + value
397
+ scene_lines = []
398
+ for node in cluster_nodes:
399
+ line = f"- ID: {node.id} | Key: {node.metadata.key} | Value: {node.memory}"
400
+ scene_lines.append(line)
401
+
402
+ joined_scene = "\n".join(scene_lines)
403
+ if len(joined_scene) > max_length:
404
+ logger.warning("Sub-cluster too long")
405
+ prompt = LOCAL_SUBCLUSTER_PROMPT.replace("{joined_scene}", joined_scene[:max_length])
406
+
407
+ messages = [{"role": "user", "content": prompt}]
408
+ response_text = self.llm.generate(messages)
409
+ response_json = self._parse_json_result(response_text)
410
+ assigned_ids = set()
411
+ result_subclusters = []
412
+
413
+ for cluster in response_json.get("clusters", []):
414
+ ids = []
415
+ for nid in cluster.get("ids", []):
416
+ if nid not in assigned_ids:
417
+ ids.append(nid)
418
+ assigned_ids.add(nid)
419
+ sub_nodes = [node for node in cluster_nodes if node.id in ids]
420
+ if len(sub_nodes) >= 2:
421
+ result_subclusters.append(sub_nodes)
422
+
423
+ return result_subclusters
424
+
425
+ @require_python_package(
426
+ import_name="sklearn",
427
+ install_command="pip install scikit-learn",
428
+ install_link="https://scikit-learn.org/stable/install.html",
429
+ )
430
+ def _partition(self, nodes, min_cluster_size: int = 10, max_cluster_size: int = 20):
431
+ """
432
+ Partition nodes by:
433
+ - If total nodes <= max_cluster_size -> return all nodes in one cluster.
434
+ - If total nodes > max_cluster_size -> cluster by embeddings, recursively split.
435
+ - Only keep clusters with size > min_cluster_size.
436
+
437
+ Args:
438
+ nodes: List of GraphDBNode
439
+ min_cluster_size: Min size to keep a cluster as-is
440
+
441
+ Returns:
442
+ List of clusters, each as a list of GraphDBNode
443
+ """
444
+ from sklearn.cluster import MiniBatchKMeans
445
+
446
+ if len(nodes) <= max_cluster_size:
447
+ logger.info(
448
+ f"[KMeansPartition] Node count {len(nodes)} <= {max_cluster_size}, skipping KMeans."
449
+ )
450
+ return [nodes]
451
+
452
+ def recursive_clustering(nodes_list, depth=0):
453
+ """Recursively split clusters until each is <= max_cluster_size."""
454
+ indent = " " * depth
455
+ logger.info(
456
+ f"{indent}[Recursive] Start clustering {len(nodes_list)} nodes at depth {depth}"
457
+ )
458
+
459
+ if len(nodes_list) <= max_cluster_size:
460
+ logger.info(
461
+ f"{indent}[Recursive] Node count <= {max_cluster_size}, stop splitting."
462
+ )
463
+ return [nodes_list]
464
+ # Try kmeans with k = ceil(len(nodes) / max_cluster_size)
465
+ x_nodes = [n for n in nodes_list if n.metadata.embedding]
466
+ x = np.array([n.metadata.embedding for n in x_nodes])
467
+
468
+ if len(x) < min_cluster_size:
469
+ logger.info(
470
+ f"{indent}[Recursive] Too few embeddings ({len(x)}), skipping clustering."
471
+ )
472
+ return [nodes_list]
473
+
474
+ k = min(len(x), (len(nodes_list) + max_cluster_size - 1) // max_cluster_size)
475
+ k = max(1, k)
476
+
477
+ try:
478
+ logger.info(f"{indent}[Recursive] Clustering with k={k} on {len(x)} points.")
479
+ kmeans = MiniBatchKMeans(n_clusters=k, batch_size=256, random_state=42)
480
+ labels = kmeans.fit_predict(x)
481
+
482
+ label_groups = defaultdict(list)
483
+ for node, label in zip(x_nodes, labels, strict=False):
484
+ label_groups[label].append(node)
485
+
486
+ # Map: label -> nodes with no embedding (fallback group)
487
+ no_embedding_nodes = [n for n in nodes_list if not n.metadata.embedding]
488
+ if no_embedding_nodes:
489
+ logger.warning(
490
+ f"{indent}[Recursive] {len(no_embedding_nodes)} nodes have no embedding. Added to largest cluster."
491
+ )
492
+ # Assign to largest cluster
493
+ largest_label = max(label_groups.items(), key=lambda kv: len(kv[1]))[0]
494
+ label_groups[largest_label].extend(no_embedding_nodes)
495
+
496
+ result = []
497
+ for label, sub_group in label_groups.items():
498
+ logger.info(f"{indent} Cluster-{label}: {len(sub_group)} nodes")
499
+ result.extend(recursive_clustering(sub_group, depth=depth + 1))
500
+ return result
501
+
502
+ except Exception as e:
503
+ logger.warning(
504
+ f"{indent}[Recursive] Clustering failed: {e}, fallback to one cluster."
505
+ )
506
+ return [nodes_list]
507
+
508
+ raw_clusters = recursive_clustering(nodes)
509
+ filtered_clusters = [c for c in raw_clusters if len(c) > min_cluster_size]
510
+
511
+ logger.info(f"[KMeansPartition] Total clusters before filtering: {len(raw_clusters)}")
512
+ for i, cluster in enumerate(raw_clusters):
513
+ logger.info(f"[KMeansPartition] Cluster-{i}: {len(cluster)} nodes")
514
+
515
+ logger.info(
516
+ f"[KMeansPartition] Clusters after filtering (>{min_cluster_size}): {len(filtered_clusters)}"
517
+ )
518
+
519
+ return filtered_clusters
520
+
521
+ def _summarize_cluster(self, cluster_nodes: list[GraphDBNode], scope: str) -> GraphDBNode:
522
+ """
523
+ Generate a cluster label using LLM, based on top keys in the cluster.
524
+ """
525
+ if not cluster_nodes:
526
+ raise ValueError("Cluster nodes cannot be empty.")
527
+
528
+ memories_items_text = "\n\n".join(
529
+ [
530
+ f"{i}. key: {n.metadata.key}\nvalue: {n.memory}\nsummary:{n.metadata.background}"
531
+ for i, n in enumerate(cluster_nodes)
532
+ ]
533
+ )
534
+
535
+ # Build prompt
536
+ prompt = REORGANIZE_PROMPT.replace("{memory_items_text}", memories_items_text)
537
+
538
+ messages = [{"role": "user", "content": prompt}]
539
+ response_text = self.llm.generate(messages)
540
+ response_json = self._parse_json_result(response_text)
541
+
542
+ # Extract fields
543
+ parent_key = response_json.get("key", "").strip()
544
+ parent_value = response_json.get("value", "").strip()
545
+ parent_tags = response_json.get("tags", [])
546
+ parent_background = response_json.get("summary", "").strip()
547
+
548
+ embedding = self.embedder.embed([parent_value])[0]
549
+
550
+ parent_node = GraphDBNode(
551
+ memory=parent_value,
552
+ metadata=TreeNodeTextualMemoryMetadata(
553
+ user_id=None,
554
+ session_id=None,
555
+ memory_type=scope,
556
+ status="activated",
557
+ key=parent_key,
558
+ tags=parent_tags,
559
+ embedding=embedding,
560
+ usage=[],
561
+ sources=build_summary_parent_node(cluster_nodes),
562
+ background=parent_background,
563
+ confidence=0.66,
564
+ type="topic",
565
+ ),
566
+ )
567
+ return parent_node
568
+
569
+ def _parse_json_result(self, response_text):
570
+ try:
571
+ response_text = response_text.replace("```", "").replace("json", "")
572
+ response_json = extract_first_to_last_brace(response_text)[1]
573
+ return response_json
574
+ except json.JSONDecodeError as e:
575
+ logger.warning(
576
+ f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
577
+ )
578
+ return {}
579
+
580
+ def _create_parent_node(self, parent_node: GraphDBNode) -> None:
581
+ """
582
+ Create a new parent node for the cluster.
583
+ """
584
+ self.graph_store.add_node(
585
+ parent_node.id,
586
+ parent_node.memory,
587
+ parent_node.metadata.model_dump(exclude_none=True),
588
+ )
589
+
590
+ def _link_cluster_nodes(self, parent_node: GraphDBNode, child_nodes: list[GraphDBNode]):
591
+ """
592
+ Add PARENT edges from the parent node to all nodes in the cluster.
593
+ """
594
+ for child in child_nodes:
595
+ if not self.graph_store.edge_exists(
596
+ parent_node.id, child.id, "PARENT", direction="OUTGOING"
597
+ ):
598
+ self.graph_store.add_edge(parent_node.id, child.id, "PARENT")
599
+
600
+ def _preprocess_message(self, message: QueueMessage) -> bool:
601
+ message = self._convert_id_to_node(message)
602
+ if message.after_node is None or None in message.after_node:
603
+ logger.debug(
604
+ f"Found non-existent node in after_node in message: {message}, skip this message."
605
+ )
606
+ return False
607
+ return True
608
+
609
+ def _convert_id_to_node(self, message: QueueMessage) -> QueueMessage:
610
+ """
611
+ Convert IDs in the message.after_node to GraphDBNode objects.
612
+ """
613
+ for i, node in enumerate(message.after_node or []):
614
+ if not isinstance(node, str):
615
+ continue
616
+ raw_node = self.graph_store.get_node(node, include_embedding=True)
617
+ if raw_node is None:
618
+ logger.debug(f"Node with ID {node} not found in the graph store.")
619
+ message.after_node[i] = None
620
+ else:
621
+ message.after_node[i] = GraphDBNode(**raw_node)
622
+ return message