MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,364 @@
1
+ import copy
2
+ import time
3
+
4
+ from typing import Any
5
+
6
+ from memos.embedders.factory import OllamaEmbedder
7
+ from memos.graph_dbs.factory import Neo4jGraphDB
8
+ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
9
+ from memos.log import get_logger
10
+ from memos.memories.textual.item import TextualMemoryItem, TextualMemoryMetadata
11
+ from memos.memories.textual.tree_text_memory.retrieve.bm25_util import EnhancedBM25
12
+ from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import (
13
+ FastTokenizer,
14
+ parse_structured_output,
15
+ )
16
+ from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher
17
+ from memos.reranker.base import BaseReranker
18
+ from memos.templates.advanced_search_prompts import PROMPT_MAPPING
19
+ from memos.types.general_types import SearchMode
20
+
21
+
22
+ logger = get_logger(__name__)
23
+
24
+
25
+ class AdvancedSearcher(Searcher):
26
+ def __init__(
27
+ self,
28
+ dispatcher_llm: OpenAILLM | OllamaLLM | AzureLLM,
29
+ graph_store: Neo4jGraphDB,
30
+ embedder: OllamaEmbedder,
31
+ reranker: BaseReranker,
32
+ bm25_retriever: EnhancedBM25 | None = None,
33
+ internet_retriever: None = None,
34
+ search_strategy: dict | None = None,
35
+ manual_close_internet: bool = True,
36
+ process_llm: Any | None = None,
37
+ tokenizer: FastTokenizer | None = None,
38
+ include_embedding: bool = False,
39
+ ):
40
+ super().__init__(
41
+ dispatcher_llm=dispatcher_llm,
42
+ graph_store=graph_store,
43
+ embedder=embedder,
44
+ reranker=reranker,
45
+ bm25_retriever=bm25_retriever,
46
+ internet_retriever=internet_retriever,
47
+ search_strategy=search_strategy,
48
+ manual_close_internet=manual_close_internet,
49
+ tokenizer=tokenizer,
50
+ include_embedding=include_embedding,
51
+ )
52
+
53
+ self.stage_retrieve_top = 3
54
+ self.process_llm = process_llm
55
+ self.thinking_stages = 3
56
+ self.max_retry_times = 2
57
+ self.deep_search_top_k_bar = 2
58
+
59
+ def load_template(self, template_name: str) -> str:
60
+ if template_name not in PROMPT_MAPPING:
61
+ logger.error("Prompt template is not found!")
62
+ prompt = PROMPT_MAPPING[template_name]
63
+ return prompt
64
+
65
+ def build_prompt(self, template_name: str, **kwargs) -> str:
66
+ template = self.load_template(template_name)
67
+ if not template:
68
+ raise FileNotFoundError(f"Prompt template `{template_name}` not found.")
69
+ return template.format(**kwargs)
70
+
71
+ def stage_retrieve(
72
+ self,
73
+ stage_id: int,
74
+ query: str,
75
+ previous_retrieval_phrases: list[str],
76
+ text_memories: str,
77
+ ) -> tuple[bool, str, list[str]]:
78
+ """Run a retrieval-expansion stage and parse structured LLM output.
79
+
80
+ Returns a tuple of:
81
+ - can_answer: whether current memories suffice to answer
82
+ - reason: brief reasoning or hypotheses
83
+ - context: synthesized context summary
84
+ - retrieval_phrases: list of phrases to retrieve next
85
+ """
86
+
87
+ # Format previous phrases as bullet list to align with prompt expectations
88
+ prev_phrases_text = (
89
+ "- " + "\n- ".join(previous_retrieval_phrases) if previous_retrieval_phrases else ""
90
+ )
91
+
92
+ args = {
93
+ "template_name": f"stage{stage_id}_expand_retrieve",
94
+ "query": query,
95
+ "previous_retrieval_phrases": prev_phrases_text,
96
+ "memories": text_memories,
97
+ }
98
+ prompt = self.build_prompt(**args)
99
+
100
+ max_attempts = max(0, self.max_retry_times) + 1
101
+ for attempt in range(1, max_attempts + 1):
102
+ try:
103
+ llm_response = self.process_llm.generate(
104
+ [{"role": "user", "content": prompt}]
105
+ ).strip()
106
+ result = parse_structured_output(content=llm_response)
107
+
108
+ # Parse booleans and fallbacks robustly
109
+ can_answer_str = str(result.get("can_answer", "")).strip().lower()
110
+ can_answer = can_answer_str in {"true", "yes", "y", "1"}
111
+
112
+ reason = result.get("reason", "")
113
+
114
+ phrases_val = result.get("retrieval_phrases", result.get("retrival_phrases", []))
115
+ if isinstance(phrases_val, list):
116
+ retrieval_phrases = [str(p).strip() for p in phrases_val if str(p).strip()]
117
+ elif isinstance(phrases_val, str) and phrases_val.strip():
118
+ retrieval_phrases = [p.strip() for p in phrases_val.splitlines() if p.strip()]
119
+ else:
120
+ retrieval_phrases = []
121
+
122
+ return can_answer, reason, retrieval_phrases
123
+
124
+ except Exception as e:
125
+ if attempt < max_attempts:
126
+ logger.debug(f"[stage_retrieve]🔁 retry {attempt}/{max_attempts} failed: {e!s}")
127
+ time.sleep(1)
128
+ else:
129
+ logger.error(
130
+ f"[stage_retrieve]❌ all {max_attempts} attempts failed: {e!s}; \nprompt: {prompt}",
131
+ exc_info=True,
132
+ )
133
+ raise e
134
+
135
+ def judge_memories(self, query: str, text_memories: str):
136
+ args = {
137
+ "template_name": "memory_judgement",
138
+ "query": query,
139
+ "memories": text_memories,
140
+ }
141
+
142
+ prompt = self.build_prompt(**args)
143
+
144
+ max_attempts = max(0, self.max_retry_times) + 1
145
+ for attempt in range(1, max_attempts + 1):
146
+ try:
147
+ llm_response = self.process_llm.generate([{"role": "user", "content": prompt}])
148
+ result = parse_structured_output(content=llm_response)
149
+ reason, can_answer = (
150
+ result["reason"],
151
+ result["can_answer"],
152
+ )
153
+
154
+ return reason, can_answer
155
+ except Exception as e:
156
+ if attempt < max_attempts:
157
+ logger.debug(
158
+ f"[summarize_and_eval]🔁 retry {attempt}/{max_attempts} failed: {e!s}"
159
+ )
160
+ time.sleep(1)
161
+ else:
162
+ logger.error(
163
+ f"[summarize_and_eval]❌ all {max_attempts} attempts failed: {e!s}; \nprompt: {prompt}",
164
+ exc_info=True,
165
+ )
166
+ raise e
167
+
168
+ def tree_memories_to_text_memories(self, memories: list[TextualMemoryItem]):
169
+ mem_list = []
170
+ source_documents = []
171
+ for mem in memories:
172
+ source_documents.extend(
173
+ [f"({one.chat_time}) {one.content}" for one in mem.metadata.sources]
174
+ )
175
+ mem_list.append(mem.memory)
176
+ mem_list = list(set(mem_list))
177
+ source_documents = list(set(source_documents))
178
+ return mem_list, source_documents
179
+
180
+ def get_final_memories(self, user_id: str, top_k: int, mem_list: list[str]):
181
+ enhanced_memories = []
182
+ for new_mem in mem_list:
183
+ enhanced_memories.append(
184
+ TextualMemoryItem(memory=new_mem, metadata=TextualMemoryMetadata(user_id=user_id))
185
+ )
186
+ if len(enhanced_memories) > top_k:
187
+ logger.info(
188
+ f"Result count {len(enhanced_memories)} exceeds requested top_k {top_k}, truncating to top {top_k} memories"
189
+ )
190
+ result_memories = enhanced_memories[:top_k]
191
+ return result_memories
192
+
193
+ def memory_recreate_enhancement(
194
+ self,
195
+ query: str,
196
+ top_k: int,
197
+ text_memories: list[str],
198
+ retries: int,
199
+ ) -> list:
200
+ attempt = 0
201
+ text_memories = "\n".join([f"- [{i}] {mem}" for i, mem in enumerate(text_memories)])
202
+ prompt_name = "memory_recreate_enhancement"
203
+ prompt = self.build_prompt(
204
+ template_name=prompt_name, query=query, top_k=top_k, memories=text_memories
205
+ )
206
+
207
+ llm_response = None
208
+ while attempt <= max(0, retries) + 1:
209
+ try:
210
+ llm_response = self.process_llm.generate([{"role": "user", "content": prompt}])
211
+ processed_text_memories = parse_structured_output(content=llm_response)
212
+ logger.debug(
213
+ f"[memory_recreate_enhancement]\n "
214
+ f"- original memories: \n"
215
+ f"{text_memories}\n"
216
+ f"- final memories: \n"
217
+ f"{processed_text_memories['answer']}"
218
+ )
219
+ return processed_text_memories["answer"]
220
+ except Exception as e:
221
+ attempt += 1
222
+ time.sleep(1)
223
+ logger.debug(
224
+ f"[memory_recreate_enhancement] 🔁 retry {attempt}/{max(1, retries) + 1} failed: {e}"
225
+ )
226
+ logger.error(
227
+ f"Fail to run memory enhancement; prompt: {prompt};\n llm_response: {llm_response}",
228
+ exc_info=True,
229
+ )
230
+ raise ValueError("Fail to run memory enhancement")
231
+
232
+ def deep_search(
233
+ self,
234
+ query: str,
235
+ top_k: int,
236
+ info=None,
237
+ memory_type="All",
238
+ search_filter: dict | None = None,
239
+ user_name: str | None = None,
240
+ **kwargs,
241
+ ):
242
+ previous_retrieval_phrases = [query]
243
+ retrieved_memories = self.retrieve(
244
+ query=query,
245
+ user_name=user_name,
246
+ top_k=top_k,
247
+ mode=SearchMode.FAST,
248
+ memory_type=memory_type,
249
+ search_filter=search_filter,
250
+ info=info,
251
+ )
252
+ memories = self.post_retrieve(
253
+ retrieved_results=retrieved_memories,
254
+ top_k=top_k,
255
+ user_name=user_name,
256
+ info=info,
257
+ )
258
+ if len(memories) == 0:
259
+ logger.warning("Requirements not met; returning memories as-is.")
260
+ return memories
261
+
262
+ user_id = memories[0].metadata.user_id
263
+
264
+ mem_list, _ = self.tree_memories_to_text_memories(memories=memories)
265
+ retrieved_memories = copy.deepcopy(retrieved_memories)
266
+ rewritten_flag = False
267
+ for current_stage_id in range(self.thinking_stages + 1):
268
+ try:
269
+ # at last
270
+ if current_stage_id == self.thinking_stages:
271
+ # eval to finish
272
+ reason, can_answer = self.judge_memories(
273
+ query=query,
274
+ text_memories="- " + "\n- ".join(mem_list) + "\n",
275
+ )
276
+
277
+ logger.info(
278
+ f"Final Stage: Stage {current_stage_id}; "
279
+ f"previous retrieval phrases have been tried: {previous_retrieval_phrases}; "
280
+ f"final can_answer: {can_answer}; reason: {reason}"
281
+ )
282
+ if rewritten_flag:
283
+ enhanced_memories = self.get_final_memories(
284
+ user_id=user_id, top_k=top_k, mem_list=mem_list
285
+ )
286
+ else:
287
+ enhanced_memories = memories
288
+ return enhanced_memories[:top_k]
289
+
290
+ can_answer, reason, retrieval_phrases = self.stage_retrieve(
291
+ stage_id=current_stage_id + 1,
292
+ query=query,
293
+ previous_retrieval_phrases=previous_retrieval_phrases,
294
+ text_memories="- " + "\n- ".join(mem_list) + "\n",
295
+ )
296
+ if can_answer:
297
+ logger.info(
298
+ f"Stage {current_stage_id}: determined answer can be provided, creating enhanced memories; reason: {reason}",
299
+ )
300
+ if rewritten_flag:
301
+ enhanced_memories = self.get_final_memories(
302
+ user_id=user_id, top_k=top_k, mem_list=mem_list
303
+ )
304
+ else:
305
+ enhanced_memories = memories
306
+ return enhanced_memories[:top_k]
307
+ else:
308
+ previous_retrieval_phrases.extend(retrieval_phrases)
309
+ logger.info(
310
+ f"Start complementary retrieval for Stage {current_stage_id}; "
311
+ f"previous retrieval phrases have been tried: {previous_retrieval_phrases}; "
312
+ f"can_answer: {can_answer}; reason: {reason}"
313
+ )
314
+ logger.info(
315
+ "Stage %d - Found %d new retrieval phrases",
316
+ current_stage_id,
317
+ len(retrieval_phrases),
318
+ )
319
+ # Search for additional memories based on retrieval phrases
320
+ additional_retrieved_memories = []
321
+ for phrase in retrieval_phrases:
322
+ _retrieved_memories = self.retrieve(
323
+ query=phrase,
324
+ user_name=user_name,
325
+ top_k=self.stage_retrieve_top,
326
+ mode=SearchMode.FAST,
327
+ memory_type=memory_type,
328
+ search_filter=search_filter,
329
+ info=info,
330
+ )
331
+ logger.info(
332
+ "Found %d additional memories for phrase: '%s'",
333
+ len(_retrieved_memories),
334
+ phrase[:30] + "..." if len(phrase) > 30 else phrase,
335
+ )
336
+ additional_retrieved_memories.extend(_retrieved_memories)
337
+ merged_memories = self.post_retrieve(
338
+ retrieved_results=retrieved_memories + additional_retrieved_memories,
339
+ top_k=top_k * 2,
340
+ user_name=user_name,
341
+ info=info,
342
+ )
343
+ rewritten_flag = True
344
+ _mem_list, _ = self.tree_memories_to_text_memories(memories=merged_memories)
345
+ mem_list = _mem_list
346
+ mem_list = list(set(mem_list))
347
+ mem_list = self.memory_recreate_enhancement(
348
+ query=query,
349
+ top_k=top_k,
350
+ text_memories=mem_list,
351
+ retries=self.max_retry_times,
352
+ )
353
+ logger.info(
354
+ "After stage %d, total memories in list: %d",
355
+ current_stage_id,
356
+ len(mem_list),
357
+ )
358
+
359
+ except Exception as e:
360
+ logger.error("Error in stage %d: %s", current_stage_id, str(e), exc_info=True)
361
+ # Continue to next stage instead of failing completely
362
+ continue
363
+ logger.error("Deep search failed, returning original memories")
364
+ return memories
@@ -0,0 +1,186 @@
1
+ import threading
2
+
3
+ import numpy as np
4
+
5
+ from sklearn.feature_extraction.text import TfidfVectorizer
6
+
7
+ from memos.dependency import require_python_package
8
+ from memos.log import get_logger
9
+ from memos.memories.textual.tree_text_memory.retrieve.retrieve_utils import FastTokenizer
10
+ from memos.utils import timed
11
+
12
+
13
+ logger = get_logger(__name__)
14
+ # Global model cache
15
+ _CACHE_LOCK = threading.Lock()
16
+
17
+
18
+ class EnhancedBM25:
19
+ """Enhanced BM25 with Spacy tokenization and TF-IDF reranking"""
20
+
21
+ @require_python_package(import_name="cachetools", install_command="pip install cachetools")
22
+ def __init__(self, tokenizer=None, en_model="en_core_web_sm", zh_model="zh_core_web_sm"):
23
+ """
24
+ Initialize Enhanced BM25 with memory management
25
+ """
26
+ if tokenizer is None:
27
+ self.tokenizer = FastTokenizer()
28
+ else:
29
+ self.tokenizer = tokenizer
30
+ self._current_tfidf = None
31
+
32
+ global _BM25_CACHE
33
+ from cachetools import LRUCache
34
+
35
+ _BM25_CACHE = LRUCache(maxsize=100)
36
+
37
+ def _tokenize_doc(self, text):
38
+ """
39
+ Tokenize a single document using SpacyTokenizer
40
+ """
41
+ return self.tokenizer.tokenize_mixed(text, lang="auto")
42
+
43
+ @require_python_package(import_name="rank_bm25", install_command="pip install rank_bm25")
44
+ def _prepare_corpus_data(self, corpus, corpus_name="default"):
45
+ from rank_bm25 import BM25Okapi
46
+
47
+ with _CACHE_LOCK:
48
+ if corpus_name in _BM25_CACHE:
49
+ print("hit::", corpus_name)
50
+ return _BM25_CACHE[corpus_name]
51
+ print("not hit::", corpus_name)
52
+
53
+ tokenized_corpus = [self._tokenize_doc(doc) for doc in corpus]
54
+ bm25_model = BM25Okapi(tokenized_corpus)
55
+ _BM25_CACHE[corpus_name] = bm25_model
56
+ return bm25_model
57
+
58
+ def clear_cache(self, corpus_name=None):
59
+ """Clear cache for specific corpus or clear all cache"""
60
+ with _CACHE_LOCK:
61
+ if corpus_name:
62
+ if corpus_name in _BM25_CACHE:
63
+ del _BM25_CACHE[corpus_name]
64
+ else:
65
+ _BM25_CACHE.clear()
66
+
67
+ def get_cache_info(self):
68
+ """Get current cache information"""
69
+ with _CACHE_LOCK:
70
+ return {
71
+ "cache_size": len(_BM25_CACHE),
72
+ "max_cache_size": 100,
73
+ "cached_corpora": list(_BM25_CACHE.keys()),
74
+ }
75
+
76
+ def _search_docs(
77
+ self,
78
+ query: str,
79
+ corpus: list[str],
80
+ corpus_name="test",
81
+ top_k=50,
82
+ use_tfidf=False,
83
+ rerank_candidates_multiplier=2,
84
+ cleanup=False,
85
+ ):
86
+ """
87
+ Args:
88
+ query: Search query string
89
+ corpus: List of document texts
90
+ top_k: Number of top results to return
91
+ rerank_candidates_multiplier: Multiplier for candidate selection
92
+ cleanup: Whether to cleanup memory after search (default: True)
93
+ """
94
+ if not corpus:
95
+ return []
96
+
97
+ logger.info(f"Searching {len(corpus)} documents for query: '{query}'")
98
+
99
+ try:
100
+ # Prepare BM25 model
101
+ bm25_model = self._prepare_corpus_data(corpus, corpus_name=corpus_name)
102
+ tokenized_query = self._tokenize_doc(query)
103
+ tokenized_query = list(dict.fromkeys(tokenized_query))
104
+
105
+ # Get BM25 scores
106
+ bm25_scores = bm25_model.get_scores(tokenized_query)
107
+
108
+ # Select candidates
109
+ candidate_count = min(top_k * rerank_candidates_multiplier, len(corpus))
110
+ candidate_indices = np.argsort(bm25_scores)[-candidate_count:][::-1]
111
+ combined_scores = bm25_scores[candidate_indices]
112
+
113
+ if use_tfidf:
114
+ # Create TF-IDF for this search
115
+ tfidf = TfidfVectorizer(
116
+ tokenizer=self._tokenize_doc, lowercase=False, token_pattern=None
117
+ )
118
+ tfidf_matrix = tfidf.fit_transform(corpus)
119
+
120
+ # TF-IDF reranking
121
+ query_vec = tfidf.transform([query])
122
+ tfidf_similarities = (
123
+ (tfidf_matrix[candidate_indices] * query_vec.T).toarray().flatten()
124
+ )
125
+
126
+ # Combine scores
127
+ combined_scores = 0.7 * bm25_scores[candidate_indices] + 0.3 * tfidf_similarities
128
+
129
+ sorted_candidate_indices = candidate_indices[np.argsort(combined_scores)[::-1][:top_k]]
130
+ sorted_combined_scores = np.sort(combined_scores)[::-1][:top_k]
131
+
132
+ # build result list
133
+ bm25_recalled_results = []
134
+ for rank, (doc_idx, combined_score) in enumerate(
135
+ zip(sorted_candidate_indices, sorted_combined_scores, strict=False), 1
136
+ ):
137
+ bm25_score = bm25_scores[doc_idx]
138
+
139
+ candidate_pos = np.where(candidate_indices == doc_idx)[0][0]
140
+ tfidf_score = tfidf_similarities[candidate_pos] if use_tfidf else 0
141
+
142
+ bm25_recalled_results.append(
143
+ {
144
+ "text": corpus[doc_idx],
145
+ "bm25_score": float(bm25_score),
146
+ "tfidf_score": float(tfidf_score),
147
+ "combined_score": float(combined_score),
148
+ "rank": rank,
149
+ "doc_index": int(doc_idx),
150
+ }
151
+ )
152
+
153
+ logger.debug(f"Search completed: found {len(bm25_recalled_results)} results")
154
+ return bm25_recalled_results
155
+
156
+ except Exception as e:
157
+ logger.error(f"BM25 search failed: {e}")
158
+ return []
159
+ finally:
160
+ # Always cleanup if requested
161
+ if cleanup:
162
+ self._cleanup_memory()
163
+
164
+ @timed
165
+ def search(self, query: str, node_dicts: list[dict], corpus_name="default", **kwargs):
166
+ """
167
+ Search with BM25 and optional TF-IDF reranking
168
+ """
169
+ try:
170
+ corpus_list = []
171
+ for node_dict in node_dicts:
172
+ corpus_list.append(
173
+ " ".join([node_dict["metadata"]["key"]] + node_dict["metadata"]["tags"])
174
+ )
175
+
176
+ recalled_results = self._search_docs(
177
+ query, corpus_list, corpus_name=corpus_name, **kwargs
178
+ )
179
+ bm25_searched_nodes = []
180
+ for item in recalled_results:
181
+ doc_idx = item["doc_index"]
182
+ bm25_searched_nodes.append(node_dicts[doc_idx])
183
+ return bm25_searched_nodes
184
+ except Exception as e:
185
+ logger.error(f"Error in bm25 search: {e}")
186
+ return []