MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,419 @@
1
+ """BochaAI Search API retriever for tree text memory."""
2
+
3
+ import json
4
+
5
+ from concurrent.futures import as_completed
6
+ from datetime import datetime
7
+ from typing import Any
8
+
9
+ import requests
10
+
11
+ from memos.context.context import ContextThreadPoolExecutor
12
+ from memos.dependency import require_python_package
13
+ from memos.embedders.factory import OllamaEmbedder
14
+ from memos.log import get_logger
15
+ from memos.mem_reader.base import BaseMemReader
16
+ from memos.mem_reader.read_multi_modal import detect_lang
17
+ from memos.memories.textual.item import (
18
+ SearchedTreeNodeTextualMemoryMetadata,
19
+ SourceMessage,
20
+ TextualMemoryItem,
21
+ )
22
+
23
+
24
+ logger = get_logger(__name__)
25
+
26
+
27
+ class BochaAISearchAPI:
28
+ """BochaAI Search API Client"""
29
+
30
+ def __init__(self, api_key: str, max_results: int = 20):
31
+ """
32
+ Initialize BochaAI Search API client.
33
+
34
+ Args:
35
+ api_key: BochaAI API key
36
+ max_results: Maximum number of search results to retrieve
37
+ """
38
+ self.api_key = api_key
39
+ self.max_results = max_results
40
+
41
+ self.web_url = "https://api.bochaai.com/v1/web-search"
42
+ self.ai_url = "https://api.bochaai.com/v1/ai-search"
43
+
44
+ self.headers = {
45
+ "Authorization": f"Bearer {api_key}",
46
+ "Content-Type": "application/json",
47
+ }
48
+
49
+ def search_web(
50
+ self, query: str, summary: bool = True, freshness="noLimit", max_results=None
51
+ ) -> list[dict]:
52
+ """
53
+ Perform a Web Search (equivalent to the first curl).
54
+
55
+ Args:
56
+ query: Search query string
57
+ summary: Whether to include summary in the results
58
+ freshness: Freshness filter (e.g. 'noLimit', 'day', 'week')
59
+ max_results: Maximum number of results to retrieve, bocha is limited to 50
60
+
61
+ Returns:
62
+ A list of search result dicts
63
+ """
64
+ body = {
65
+ "query": query,
66
+ "summary": summary,
67
+ "freshness": freshness,
68
+ "count": max_results or self.max_results,
69
+ }
70
+ return self._post(self.web_url, body)
71
+
72
+ def search_ai(
73
+ self,
74
+ query: str,
75
+ answer: bool = False,
76
+ stream: bool = False,
77
+ freshness="noLimit",
78
+ max_results=None,
79
+ ) -> list[dict]:
80
+ """
81
+ Perform an AI Search (equivalent to the second curl).
82
+
83
+ Args:
84
+ query: Search query string
85
+ answer: Whether BochaAI should generate an answer
86
+ stream: Whether to use streaming response
87
+ freshness: Freshness filter (e.g. 'noLimit', 'day', 'week')
88
+ max_results: Maximum number of results to retrieve, bocha is limited to 50
89
+
90
+ Returns:
91
+ A list of search result dicts
92
+ """
93
+ body = {
94
+ "query": query,
95
+ "freshness": freshness,
96
+ "count": max_results or self.max_results,
97
+ "answer": answer,
98
+ "stream": stream,
99
+ }
100
+ return self._post(self.ai_url, body)
101
+
102
+ def _post(self, url: str, body: dict) -> list[dict]:
103
+ """Send POST request and parse BochaAI search results."""
104
+ try:
105
+ resp = requests.post(url, headers=self.headers, json=body)
106
+ resp.raise_for_status()
107
+ raw_data = resp.json()
108
+
109
+ # parse the nested structure correctly
110
+ # ✅ AI Search
111
+ if "messages" in raw_data:
112
+ results = []
113
+ for msg in raw_data["messages"]:
114
+ if msg.get("type") == "source" and msg.get("content_type") == "webpage":
115
+ try:
116
+ content_json = json.loads(msg["content"])
117
+ results.extend(content_json.get("value", []))
118
+ except Exception as e:
119
+ logger.error(f"Failed to parse message content: {e}")
120
+ return results
121
+
122
+ # ✅ Web Search
123
+ return raw_data.get("data", {}).get("webPages", {}).get("value", [])
124
+
125
+ except Exception:
126
+ import traceback
127
+
128
+ logger.error(f"BochaAI search error: {traceback.format_exc()}")
129
+ return []
130
+
131
+
132
+ class BochaAISearchRetriever:
133
+ """BochaAI retriever that converts search results into TextualMemoryItem objects"""
134
+
135
+ @require_python_package(
136
+ import_name="jieba",
137
+ install_command="pip install jieba",
138
+ install_link="https://github.com/fxsjy/jieba",
139
+ )
140
+ def __init__(
141
+ self,
142
+ access_key: str,
143
+ embedder: OllamaEmbedder,
144
+ reader: BaseMemReader,
145
+ max_results: int = 20,
146
+ ):
147
+ """
148
+ Initialize BochaAI Search retriever.
149
+
150
+ Args:
151
+ access_key: BochaAI API key
152
+ embedder: Embedder instance for generating embeddings
153
+ reader: MemReader instance for processing internet content
154
+ max_results: Maximum number of search results to retrieve
155
+ """
156
+
157
+ from jieba.analyse import TextRank
158
+
159
+ self.bocha_api = BochaAISearchAPI(access_key, max_results=max_results)
160
+ self.embedder = embedder
161
+ self.reader = reader
162
+ self.zh_fast_keywords_extractor = TextRank()
163
+
164
+ def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None) -> list[str]:
165
+ """
166
+ Extract tags from title, content and summary
167
+
168
+ Args:
169
+ title: Article title
170
+ content: Article content
171
+ summary: Article summary
172
+ parsed_goal: Parsed task goal (optional)
173
+
174
+ Returns:
175
+ List of extracted tags
176
+ """
177
+ tags = []
178
+
179
+ # Add source-based tags
180
+ tags.append("bocha_search")
181
+ tags.append("news")
182
+
183
+ # Add content-based tags
184
+ text = f"{title} {content} {summary}".lower()
185
+
186
+ # Simple keyword-based tagging
187
+ keywords = {
188
+ "economy": [
189
+ "economy",
190
+ "GDP",
191
+ "growth",
192
+ "production",
193
+ "industry",
194
+ "investment",
195
+ "consumption",
196
+ "market",
197
+ "trade",
198
+ "finance",
199
+ ],
200
+ "politics": [
201
+ "politics",
202
+ "government",
203
+ "policy",
204
+ "meeting",
205
+ "leader",
206
+ "election",
207
+ "parliament",
208
+ "ministry",
209
+ ],
210
+ "technology": [
211
+ "technology",
212
+ "tech",
213
+ "innovation",
214
+ "digital",
215
+ "internet",
216
+ "AI",
217
+ "artificial intelligence",
218
+ "software",
219
+ "hardware",
220
+ ],
221
+ "sports": [
222
+ "sports",
223
+ "game",
224
+ "athlete",
225
+ "olympic",
226
+ "championship",
227
+ "tournament",
228
+ "team",
229
+ "player",
230
+ ],
231
+ "culture": [
232
+ "culture",
233
+ "education",
234
+ "art",
235
+ "history",
236
+ "literature",
237
+ "music",
238
+ "film",
239
+ "museum",
240
+ ],
241
+ "health": [
242
+ "health",
243
+ "medical",
244
+ "pandemic",
245
+ "hospital",
246
+ "doctor",
247
+ "medicine",
248
+ "disease",
249
+ "treatment",
250
+ ],
251
+ "environment": [
252
+ "environment",
253
+ "ecology",
254
+ "pollution",
255
+ "green",
256
+ "climate",
257
+ "sustainability",
258
+ "renewable",
259
+ ],
260
+ }
261
+
262
+ for category, words in keywords.items():
263
+ if any(word in text for word in words):
264
+ tags.append(category)
265
+
266
+ # Add goal-based tags if available
267
+ if parsed_goal and hasattr(parsed_goal, "tags"):
268
+ tags.extend(parsed_goal.tags)
269
+
270
+ return list(set(tags))[:15] # Limit to 15 tags
271
+
272
+ def retrieve_from_internet(
273
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None, mode="fast"
274
+ ) -> list[TextualMemoryItem]:
275
+ """
276
+ Default internet retrieval (Web Search).
277
+ This keeps consistent API with Xinyu and Google retrievers.
278
+
279
+ Args:
280
+ query: Search query
281
+ top_k: Number of results to retrieve
282
+ parsed_goal: Parsed task goal (optional)
283
+ info (dict): Metadata for memory consumption tracking
284
+
285
+ Returns:
286
+ List of TextualMemoryItem
287
+ """
288
+ search_results = self.bocha_api.search_ai(query, max_results=top_k) # ✅ default to
289
+ # web-search
290
+ return self._convert_to_mem_items(search_results, query, parsed_goal, info, mode=mode)
291
+
292
+ def retrieve_from_web(
293
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None, mode="fast"
294
+ ) -> list[TextualMemoryItem]:
295
+ """Explicitly retrieve using Bocha Web Search."""
296
+ search_results = self.bocha_api.search_web(query)
297
+ return self._convert_to_mem_items(search_results, query, parsed_goal, info, mode=mode)
298
+
299
+ def retrieve_from_ai(
300
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None, mode="fast"
301
+ ) -> list[TextualMemoryItem]:
302
+ """Explicitly retrieve using Bocha AI Search."""
303
+ search_results = self.bocha_api.search_ai(query)
304
+ return self._convert_to_mem_items(search_results, query, parsed_goal, info, mode=mode)
305
+
306
+ def _convert_to_mem_items(
307
+ self, search_results: list[dict], query: str, parsed_goal=None, info=None, mode="fast"
308
+ ):
309
+ """Convert API search results into TextualMemoryItem objects."""
310
+ memory_items = []
311
+ if not info:
312
+ info = {"user_id": "", "session_id": ""}
313
+
314
+ with ContextThreadPoolExecutor(max_workers=8) as executor:
315
+ futures = [
316
+ executor.submit(self._process_result, r, query, parsed_goal, info, mode=mode)
317
+ for r in search_results
318
+ ]
319
+ for future in as_completed(futures):
320
+ try:
321
+ memory_items.extend(future.result())
322
+ except Exception as e:
323
+ logger.error(f"Error processing BochaAI search result: {e}")
324
+
325
+ # Deduplicate items by memory text
326
+ unique_memory_items = {item.memory: item for item in memory_items}
327
+ return list(unique_memory_items.values())
328
+
329
+ def _process_result(
330
+ self, result: dict, query: str, parsed_goal: str, info: dict[str, Any], mode="fast"
331
+ ) -> list[TextualMemoryItem]:
332
+ """Process one Bocha search result into TextualMemoryItem."""
333
+ title = result.get("name", "")
334
+ content = result.get("summary", "") or result.get("snippet", "")
335
+ summary = result.get("summary", "") or result.get("snippet", "")
336
+ url = result.get("url", "")
337
+ publish_time = result.get("datePublished", "")
338
+ site_name = result.get("siteName", "")
339
+ site_icon = result.get("siteIcon")
340
+
341
+ if publish_time:
342
+ try:
343
+ publish_time = datetime.fromisoformat(publish_time.replace("Z", "+00:00")).strftime(
344
+ "%Y-%m-%d"
345
+ )
346
+ except Exception:
347
+ publish_time = datetime.now().strftime("%Y-%m-%d")
348
+ else:
349
+ publish_time = datetime.now().strftime("%Y-%m-%d")
350
+
351
+ if mode == "fast":
352
+ info_ = info.copy()
353
+ user_id = info_.pop("user_id", "")
354
+ session_id = info_.pop("session_id", "")
355
+ lang = detect_lang(summary)
356
+ tags = (
357
+ self.zh_fast_keywords_extractor.textrank(summary, topK=3)[:3]
358
+ if lang == "zh"
359
+ else self._extract_tags(title, content, summary)[:3]
360
+ )
361
+
362
+ return [
363
+ TextualMemoryItem(
364
+ memory=(
365
+ f"[Outer internet view] Title: {title}\nNewsTime:"
366
+ f" {publish_time}\nSummary:"
367
+ f" {summary}\n"
368
+ ),
369
+ metadata=SearchedTreeNodeTextualMemoryMetadata(
370
+ user_id=user_id,
371
+ session_id=session_id,
372
+ memory_type="OuterMemory",
373
+ status="activated",
374
+ type="fact",
375
+ source="web",
376
+ sources=[SourceMessage(type="web", url=url)] if url else [],
377
+ visibility="public",
378
+ info=info_,
379
+ background="",
380
+ confidence=0.99,
381
+ usage=[],
382
+ tags=tags,
383
+ key=title,
384
+ embedding=self.embedder.embed([content])[0],
385
+ internet_info={
386
+ "title": title,
387
+ "url": url,
388
+ "site_name": site_name,
389
+ "site_icon": site_icon,
390
+ "summary": summary,
391
+ },
392
+ ),
393
+ )
394
+ ]
395
+ else:
396
+ # Use reader to split and process the content into chunks
397
+ read_items = self.reader.get_memory([content], type="doc", info=info)
398
+
399
+ memory_items = []
400
+ for read_item_i in read_items[0]:
401
+ read_item_i.memory = (
402
+ f"[Outer internet view] Title: {title}\nNewsTime:"
403
+ f" {publish_time}\nSummary:"
404
+ f" {summary}\n"
405
+ f"Content: {read_item_i.memory}"
406
+ )
407
+ read_item_i.metadata.source = "web"
408
+ read_item_i.metadata.memory_type = "OuterMemory"
409
+ read_item_i.metadata.sources = [SourceMessage(type="web", url=url)] if url else []
410
+ read_item_i.metadata.visibility = "public"
411
+ read_item_i.metadata.internet_info = {
412
+ "title": title,
413
+ "url": url,
414
+ "site_name": site_name,
415
+ "site_icon": site_icon,
416
+ "summary": summary,
417
+ }
418
+ memory_items.append(read_item_i)
419
+ return memory_items
@@ -0,0 +1,270 @@
1
+ """Internet retrieval module for tree text memory."""
2
+
3
+ import uuid
4
+
5
+ from datetime import datetime
6
+
7
+ import requests
8
+
9
+ from memos.embedders.factory import OllamaEmbedder
10
+ from memos.memories.textual.item import (
11
+ SourceMessage,
12
+ TextualMemoryItem,
13
+ TreeNodeTextualMemoryMetadata,
14
+ )
15
+
16
+
17
+ class GoogleCustomSearchAPI:
18
+ """Google Custom Search API Client"""
19
+
20
+ def __init__(
21
+ self, api_key: str, search_engine_id: str, max_results: int = 20, num_per_request: int = 10
22
+ ):
23
+ """
24
+ Initialize Google Custom Search API client
25
+
26
+ Args:
27
+ api_key: Google API key
28
+ search_engine_id: Search engine ID (cx parameter)
29
+ max_results: Maximum number of results to retrieve
30
+ num_per_request: Number of results per API request
31
+ """
32
+ self.api_key = api_key
33
+ self.search_engine_id = search_engine_id
34
+ self.max_results = max_results
35
+ self.num_per_request = min(num_per_request, 10) # Google API limits to 10
36
+ self.base_url = "https://www.googleapis.com/customsearch/v1"
37
+
38
+ def search(self, query: str, num_results: int | None = None, start_index: int = 1) -> dict:
39
+ """
40
+ Execute search request
41
+
42
+ Args:
43
+ query: Search query
44
+ num_results: Number of results to return (uses config default if None)
45
+ start_index: Starting index (default 1)
46
+
47
+ Returns:
48
+ Dictionary containing search results
49
+ """
50
+ if num_results is None:
51
+ num_results = self.num_per_request
52
+
53
+ params = {
54
+ "key": self.api_key,
55
+ "cx": self.search_engine_id,
56
+ "q": query,
57
+ "num": min(num_results, self.num_per_request),
58
+ "start": start_index,
59
+ }
60
+
61
+ try:
62
+ response = requests.get(self.base_url, params=params)
63
+ response.raise_for_status()
64
+ return response.json()
65
+ except requests.exceptions.RequestException as e:
66
+ print(f"Google search request failed: {e}")
67
+ return {}
68
+
69
+ def get_all_results(self, query: str, max_results: int | None = None) -> list[dict]:
70
+ """
71
+ Get all search results (with pagination)
72
+
73
+ Args:
74
+ query: Search query
75
+ max_results: Maximum number of results (uses config default if None)
76
+
77
+ Returns:
78
+ List of all search results
79
+ """
80
+ if max_results is None:
81
+ max_results = self.max_results
82
+
83
+ all_results = []
84
+ start_index = 1
85
+
86
+ while len(all_results) < max_results:
87
+ search_data = self.search(query, start_index=start_index)
88
+
89
+ if not search_data or "items" not in search_data:
90
+ break
91
+
92
+ all_results.extend(search_data["items"])
93
+
94
+ # Check if there are more results
95
+ if len(search_data["items"]) < self.num_per_request:
96
+ break
97
+
98
+ start_index += self.num_per_request
99
+
100
+ # Avoid infinite loop
101
+ if start_index > 100:
102
+ break
103
+
104
+ return all_results[:max_results]
105
+
106
+
107
+ class InternetGoogleRetriever:
108
+ """Internet retriever that converts search results to TextualMemoryItem format"""
109
+
110
+ def __init__(
111
+ self,
112
+ api_key: str,
113
+ search_engine_id: str,
114
+ embedder: OllamaEmbedder,
115
+ max_results: int = 20,
116
+ num_per_request: int = 10,
117
+ ):
118
+ """
119
+ Initialize internet retriever
120
+
121
+ Args:
122
+ api_key: Google API key
123
+ search_engine_id: Search engine ID
124
+ embedder: Embedder instance for generating embeddings
125
+ max_results: Maximum number of results to retrieve
126
+ num_per_request: Number of results per API request
127
+ """
128
+ self.google_api = GoogleCustomSearchAPI(
129
+ api_key, search_engine_id, max_results=max_results, num_per_request=num_per_request
130
+ )
131
+ self.embedder = embedder
132
+
133
+ def retrieve_from_internet(
134
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
135
+ ) -> list[TextualMemoryItem]:
136
+ """
137
+ Retrieve information from the internet and convert to TextualMemoryItem format
138
+
139
+ Args:
140
+ query: Search query
141
+ top_k: Number of results to return
142
+ parsed_goal: Parsed task goal (optional)
143
+ info (dict): Leave a record of memory consumption.
144
+
145
+ Returns:
146
+ List of TextualMemoryItem
147
+ """
148
+ if not info:
149
+ info = {"user_id": "", "session_id": ""}
150
+ # Get search results
151
+ search_results = self.google_api.get_all_results(query, max_results=top_k)
152
+
153
+ # Convert to TextualMemoryItem format
154
+ memory_items = []
155
+
156
+ for _, result in enumerate(search_results):
157
+ # Extract basic information
158
+ title = result.get("title", "")
159
+ snippet = result.get("snippet", "")
160
+ link = result.get("link", "")
161
+ display_link = result.get("displayLink", "")
162
+
163
+ # Combine memory content
164
+ memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
165
+ # Create metadata
166
+ metadata = TreeNodeTextualMemoryMetadata(
167
+ user_id=info.get("user_id", ""),
168
+ session_id=info.get("session_id", ""),
169
+ status="activated",
170
+ type="fact", # Internet search results are usually factual information
171
+ memory_time=datetime.now().strftime("%Y-%m-%d"),
172
+ source="web",
173
+ confidence=85.0, # Confidence level for internet information
174
+ entities=self._extract_entities(title, snippet),
175
+ tags=self._extract_tags(title, snippet, parsed_goal),
176
+ visibility="public",
177
+ memory_type="LongTermMemory", # Internet search results as working memory
178
+ key=title,
179
+ sources=[SourceMessage(type="web", url=link)] if link else [],
180
+ embedding=self.embedder.embed([memory_content])[0], # Can add embedding later
181
+ created_at=datetime.now().isoformat(),
182
+ usage=[],
183
+ background=f"Internet search result from {display_link}",
184
+ )
185
+
186
+ # Create TextualMemoryItem
187
+ memory_item = TextualMemoryItem(
188
+ id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
189
+ )
190
+
191
+ memory_items.append(memory_item)
192
+
193
+ return memory_items
194
+
195
+ def _extract_entities(self, title: str, snippet: str) -> list[str]:
196
+ """
197
+ Extract entities from title and snippet
198
+
199
+ Args:
200
+ title: Title
201
+ snippet: Snippet
202
+
203
+ Returns:
204
+ List of entities
205
+ """
206
+ # Simple entity extraction logic, can be improved as needed
207
+ text = f"{title} {snippet}"
208
+ entities = []
209
+
210
+ # Extract possible organization names (with common suffixes)
211
+ org_suffixes = ["Inc", "Corp", "LLC", "Ltd", "Company", "University", "Institute"]
212
+ words = text.split()
213
+ for i, word in enumerate(words):
214
+ if word in org_suffixes and i > 0:
215
+ entities.append(f"{words[i - 1]} {word}")
216
+
217
+ # Extract possible dates
218
+ import re
219
+
220
+ date_pattern = r"\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}|\w+ \d{1,2}, \d{4}"
221
+ dates = re.findall(date_pattern, text)
222
+ entities.extend(dates)
223
+
224
+ return entities[:5] # Limit number of entities
225
+
226
+ def _extract_tags(self, title: str, snippet: str, parsed_goal=None) -> list[str]:
227
+ """
228
+ Extract tags from title and snippet
229
+
230
+ Args:
231
+ title: Title
232
+ snippet: Snippet
233
+ parsed_goal: Parsed task goal
234
+
235
+ Returns:
236
+ List of tags
237
+ """
238
+ tags = []
239
+
240
+ # Extract tags from parsed goal
241
+ if parsed_goal:
242
+ if hasattr(parsed_goal, "topic") and parsed_goal.topic:
243
+ tags.append(parsed_goal.topic)
244
+ if hasattr(parsed_goal, "concept") and parsed_goal.concept:
245
+ tags.append(parsed_goal.concept)
246
+
247
+ # Extract keywords from text
248
+ text = f"{title} {snippet}".lower()
249
+
250
+ # Simple keyword extraction
251
+ keywords = [
252
+ "news",
253
+ "report",
254
+ "article",
255
+ "study",
256
+ "research",
257
+ "analysis",
258
+ "update",
259
+ "announcement",
260
+ "policy",
261
+ "memo",
262
+ "document",
263
+ ]
264
+
265
+ for keyword in keywords:
266
+ if keyword in text:
267
+ tags.append(keyword)
268
+
269
+ # Remove duplicates and limit count
270
+ return list(set(tags))[:10]