MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,111 @@
1
+ import numpy as np
2
+
3
+ from memos.embedders.factory import OllamaEmbedder
4
+ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
5
+ from memos.memories.textual.item import TextualMemoryItem
6
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
+
8
+
9
+ def batch_cosine_similarity(
10
+ query_vec: list[float], candidate_vecs: list[list[float]]
11
+ ) -> list[float]:
12
+ """
13
+ Compute cosine similarity between a single query vector and multiple candidate vectors using NumPy.
14
+
15
+ Args:
16
+ query_vec (list[float]): The query embedding.
17
+ candidate_vecs (list[list[float]]): A list of memory embeddings.
18
+
19
+ Returns:
20
+ list[float]: Cosine similarity scores for each candidate.
21
+ """
22
+ query = np.array(query_vec)
23
+ candidates = np.array(candidate_vecs)
24
+
25
+ # Normalize query and candidates
26
+ query_norm = np.linalg.norm(query)
27
+ candidates_norm = np.linalg.norm(candidates, axis=1)
28
+
29
+ # Compute dot products
30
+ dot_products = np.dot(candidates, query)
31
+
32
+ # Avoid division by zero
33
+ eps = 1e-10
34
+ similarities = dot_products / (candidates_norm * query_norm + eps)
35
+
36
+ return similarities.tolist()
37
+
38
+
39
+ class MemoryReranker:
40
+ """
41
+ Rank retrieved memory cards by structural priority and contextual similarity.
42
+ """
43
+
44
+ def __init__(self, llm: OpenAILLM | OllamaLLM | AzureLLM, embedder: OllamaEmbedder):
45
+ self.llm = llm
46
+ self.embedder = embedder
47
+
48
+ # Structural priority weights
49
+ self.level_weights = {
50
+ "topic": 1.0,
51
+ "concept": 1.0,
52
+ "fact": 1.0,
53
+ }
54
+
55
+ def rerank(
56
+ self,
57
+ query: str,
58
+ query_embedding: list[float],
59
+ graph_results: list,
60
+ top_k: int,
61
+ parsed_goal: ParsedTaskGoal,
62
+ ) -> list[tuple[TextualMemoryItem, float]]:
63
+ """
64
+ Rerank memory items by relevance to task.
65
+
66
+ Args:
67
+ query (str): Original task.
68
+ query_embedding(list[float]): embedding of query
69
+ graph_results (list): Combined retrieval results.
70
+ top_k (int): Number of top results to return.
71
+ parsed_goal (dict): Structured task representation.
72
+
73
+ Returns:
74
+ list(tuple): Ranked list of memory items with similarity score.
75
+ """
76
+ # Step 1: Filter out items without embeddings
77
+ items_with_embeddings = [item for item in graph_results if item.metadata.embedding]
78
+ embeddings = [item.metadata.embedding for item in items_with_embeddings]
79
+
80
+ if not embeddings:
81
+ return [(item, 0.5) for item in graph_results[:top_k]]
82
+
83
+ # Step 2: Compute cosine similarities
84
+ similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
85
+
86
+ # Step 3: Apply structural weight boost
87
+ def get_weight(item: TextualMemoryItem) -> float:
88
+ level = item.metadata.background
89
+ return self.level_weights.get(level, 1.0)
90
+
91
+ weighted_scores = [
92
+ sim * get_weight(item)
93
+ for sim, item in zip(similarity_scores, items_with_embeddings, strict=False)
94
+ ]
95
+
96
+ # Step 4: Sort by weighted score
97
+ sorted_items = sorted(
98
+ zip(items_with_embeddings, weighted_scores, strict=False),
99
+ key=lambda pair: pair[1],
100
+ reverse=True,
101
+ )
102
+
103
+ # Step 5: Return top-k items with fallback
104
+ top_items = sorted_items[:top_k]
105
+
106
+ if len(top_items) < top_k:
107
+ selected_items = [item for item, _ in top_items]
108
+ remaining = [(item, -1.0) for item in graph_results if item not in selected_items]
109
+ top_items.extend(remaining[: top_k - len(top_items)])
110
+
111
+ return top_items # list of (item, score)
@@ -0,0 +1,16 @@
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class ParsedTaskGoal:
6
+ """
7
+ Goal structure for both Fast & LLM.
8
+ """
9
+
10
+ memories: list[str] = field(default_factory=list)
11
+ keys: list[str] = field(default_factory=list)
12
+ tags: list[str] = field(default_factory=list)
13
+ rephrased_query: str | None = None
14
+ internet_search: bool = False
15
+ goal_type: str | None = None # e.g., 'default', 'explanation', etc.
16
+ context: str = ""
@@ -0,0 +1,472 @@
1
+ import json
2
+ import re
3
+
4
+ from pathlib import Path
5
+ from typing import Any
6
+
7
+ import numpy as np
8
+
9
+ from memos.dependency import require_python_package
10
+ from memos.log import get_logger
11
+
12
+
13
+ logger = get_logger(__name__)
14
+
15
+
16
+ def parse_structured_output(content: str) -> dict[str, str | list[str]]:
17
+ """
18
+ Parse structured text containing arbitrary XML-like tags in the format <tag_name>content</tag_name>.
19
+
20
+ This function extracts all tagged content and automatically determines whether each tag's content
21
+ should be returned as a string or a list of strings based on its format:
22
+
23
+ - If the content consists of multiple non-empty lines, and each line starts with "- ",
24
+ it is interpreted as a list (e.g., a bullet-point list of phrases).
25
+ - Otherwise, the entire content is returned as a single string.
26
+
27
+ The function is generic and supports any tag name (e.g., <can_answer>, <reason>, <missing_phrases>).
28
+
29
+ Args:
30
+ content (str): Raw text containing one or more <tag_name>...</tag_name> blocks.
31
+
32
+ Returns:
33
+ Dict[str, Union[str, List[str]]]: A dictionary where keys are tag names and values are either:
34
+ - a string (for single-line or non-list content)
35
+ - a list of strings (for content formatted as bullet points with "- " prefix)
36
+
37
+ Example:
38
+ Input:
39
+ <can_answer>
40
+ true
41
+ </can_answer>
42
+ <missing_phrases>
43
+ - phrase 1
44
+ - phrase 2
45
+ </missing_phrases>
46
+
47
+ Output:
48
+ {
49
+ 'can_answer': 'true',
50
+ 'missing_phrases': ['phrase 1', 'phrase 2']
51
+ }
52
+ """
53
+ result = {}
54
+
55
+ # Regex pattern to match any tag with name and content (supports multi-line content via DOTALL)
56
+ # Pattern explanation:
57
+ # <([a-zA-Z_][a-zA-Z0-9_]*)> : Captures valid tag name (letter/underscore + alphanumeric)
58
+ # (.*?) : Non-greedy capture of content (including newlines)
59
+ # </\1> : Closing tag matching the captured name
60
+ tag_pattern = r"<([a-zA-Z_][a-zA-Z0-9_]*)>(.*?)</\1>"
61
+ matches = re.findall(tag_pattern, content, re.DOTALL)
62
+
63
+ for tag_name, raw_content in matches:
64
+ content = raw_content.strip() # Remove leading/trailing whitespace
65
+
66
+ # If content is empty, store as empty string
67
+ if not content:
68
+ result[tag_name] = ""
69
+ continue
70
+
71
+ # Split content into lines and filter out empty ones
72
+ lines = [line.strip() for line in content.splitlines() if line.strip()]
73
+
74
+ # Check if content is formatted as a bullet list: all non-empty lines start with "- "
75
+ if lines and all(line.startswith("-") for line in lines):
76
+ # Extract the text after the "- " prefix from each line
77
+ items = [line[1:].strip() for line in lines]
78
+ result[tag_name] = items
79
+ else:
80
+ # Treat as plain string (preserve original formatting if multi-line)
81
+ result[tag_name] = content
82
+
83
+ return result
84
+
85
+
86
+ def find_project_root(marker=".git"):
87
+ """Find the project root directory by marking the file"""
88
+ current = Path(__file__).resolve()
89
+ while current != current.parent:
90
+ if (current / marker).exists():
91
+ return current
92
+ current = current.parent
93
+ return Path(".")
94
+
95
+
96
+ class StopwordManager:
97
+ _stopwords = None
98
+
99
+ @classmethod
100
+ def _load_stopwords(cls):
101
+ """load stopwords for once"""
102
+ if cls._stopwords is not None:
103
+ return cls._stopwords
104
+
105
+ stopwords = set()
106
+ stopwords = cls._load_default_stopwords()
107
+
108
+ cls._stopwords = stopwords
109
+ return stopwords
110
+
111
+ @classmethod
112
+ def _load_default_stopwords(cls):
113
+ """load stop words"""
114
+ chinese_stop_words = {
115
+ "的",
116
+ "了",
117
+ "在",
118
+ "是",
119
+ "我",
120
+ "有",
121
+ "和",
122
+ "就",
123
+ "不",
124
+ "人",
125
+ "都",
126
+ "一",
127
+ "一个",
128
+ "上",
129
+ "也",
130
+ "很",
131
+ "到",
132
+ "说",
133
+ "要",
134
+ "去",
135
+ "你",
136
+ "会",
137
+ "着",
138
+ "没有",
139
+ "看",
140
+ "好",
141
+ "自己",
142
+ "这",
143
+ "那",
144
+ "他",
145
+ "她",
146
+ "它",
147
+ "我们",
148
+ "你们",
149
+ "他们",
150
+ "这个",
151
+ "那个",
152
+ "这些",
153
+ "那些",
154
+ "怎么",
155
+ "什么",
156
+ "为什么",
157
+ "如何",
158
+ "哪里",
159
+ "谁",
160
+ "几",
161
+ "多少",
162
+ "这样",
163
+ "那样",
164
+ "这么",
165
+ "那么",
166
+ }
167
+ english_stop_words = {
168
+ "the",
169
+ "a",
170
+ "an",
171
+ "and",
172
+ "or",
173
+ "but",
174
+ "in",
175
+ "on",
176
+ "at",
177
+ "to",
178
+ "for",
179
+ "of",
180
+ "with",
181
+ "by",
182
+ "as",
183
+ "is",
184
+ "are",
185
+ "was",
186
+ "were",
187
+ "be",
188
+ "been",
189
+ "have",
190
+ "has",
191
+ "had",
192
+ "do",
193
+ "does",
194
+ "did",
195
+ "will",
196
+ "would",
197
+ "could",
198
+ "should",
199
+ "may",
200
+ "might",
201
+ "must",
202
+ "this",
203
+ "that",
204
+ "these",
205
+ "those",
206
+ "i",
207
+ "you",
208
+ "he",
209
+ "she",
210
+ "it",
211
+ "we",
212
+ "they",
213
+ "me",
214
+ "him",
215
+ "her",
216
+ "us",
217
+ "them",
218
+ "my",
219
+ "your",
220
+ "his",
221
+ "its",
222
+ "our",
223
+ "their",
224
+ "mine",
225
+ "yours",
226
+ "hers",
227
+ "ours",
228
+ "theirs",
229
+ }
230
+ chinese_punctuation = {
231
+ ",",
232
+ "。",
233
+ "!",
234
+ "?",
235
+ ";",
236
+ ":",
237
+ "「",
238
+ "」",
239
+ "『",
240
+ "』",
241
+ "【",
242
+ "】",
243
+ "(",
244
+ ")",
245
+ "《",
246
+ "》",
247
+ "—",
248
+ "…",
249
+ "~",
250
+ "·",
251
+ "、",
252
+ "“",
253
+ "”",
254
+ "‘",
255
+ "’",
256
+ "〈",
257
+ "〉",
258
+ "〖",
259
+ "〗",
260
+ "〝",
261
+ "〞",
262
+ "{",
263
+ "}",
264
+ "〔",
265
+ "〕",
266
+ "¡",
267
+ "¿",
268
+ }
269
+ english_punctuation = {
270
+ ",",
271
+ ".",
272
+ "!",
273
+ "?",
274
+ ";",
275
+ ":",
276
+ '"',
277
+ "'",
278
+ "(",
279
+ ")",
280
+ "[",
281
+ "]",
282
+ "{",
283
+ "}",
284
+ "<",
285
+ ">",
286
+ "/",
287
+ "\\",
288
+ "|",
289
+ "-",
290
+ "_",
291
+ "=",
292
+ "+",
293
+ "@",
294
+ "#",
295
+ "$",
296
+ "%",
297
+ "^",
298
+ "&",
299
+ "*",
300
+ "~",
301
+ "`",
302
+ "¡",
303
+ "¿",
304
+ }
305
+ numbers = {
306
+ "0",
307
+ "1",
308
+ "2",
309
+ "3",
310
+ "4",
311
+ "5",
312
+ "6",
313
+ "7",
314
+ "8",
315
+ "9",
316
+ "零",
317
+ "一",
318
+ "二",
319
+ "三",
320
+ "四",
321
+ "五",
322
+ "六",
323
+ "七",
324
+ "八",
325
+ "九",
326
+ "十",
327
+ "百",
328
+ "千",
329
+ "万",
330
+ "亿",
331
+ }
332
+ whitespace = {" ", "\t", "\n", "\r", "\f", "\v"}
333
+
334
+ return (
335
+ chinese_stop_words
336
+ | english_stop_words
337
+ | chinese_punctuation
338
+ | english_punctuation
339
+ | numbers
340
+ | whitespace
341
+ )
342
+
343
+ @classmethod
344
+ def get_stopwords(cls):
345
+ if cls._stopwords is None:
346
+ cls._load_stopwords()
347
+ return cls._stopwords
348
+
349
+ @classmethod
350
+ def filter_words(cls, words):
351
+ if cls._stopwords is None:
352
+ cls._load_stopwords()
353
+ return [word for word in words if word not in cls._stopwords and word.strip()]
354
+
355
+ @classmethod
356
+ def is_stopword(cls, word):
357
+ if cls._stopwords is None:
358
+ cls._load_stopwords()
359
+ return word in cls._stopwords
360
+
361
+
362
+ class FastTokenizer:
363
+ def __init__(self, use_jieba=True, use_stopwords=True):
364
+ self.use_jieba = use_jieba
365
+ self.use_stopwords = use_stopwords
366
+ if self.use_stopwords:
367
+ self.stopword_manager = StopwordManager
368
+
369
+ def tokenize_mixed(self, text, **kwargs):
370
+ """fast tokenizer"""
371
+ if self._is_chinese(text):
372
+ return self._tokenize_chinese(text)
373
+ else:
374
+ return self._tokenize_english(text)
375
+
376
+ def _is_chinese(self, text):
377
+ """check if chinese"""
378
+ chinese_chars = sum(1 for char in text if "\u4e00" <= char <= "\u9fff")
379
+ return chinese_chars / max(len(text), 1) > 0.3
380
+
381
+ @require_python_package(
382
+ import_name="jieba",
383
+ install_command="pip install jieba",
384
+ install_link="https://github.com/fxsjy/jieba",
385
+ )
386
+ def _tokenize_chinese(self, text):
387
+ """split zh jieba"""
388
+ import jieba
389
+
390
+ tokens = jieba.lcut(text) if self.use_jieba else list(text)
391
+ tokens = [token.strip() for token in tokens if token.strip()]
392
+ if self.use_stopwords:
393
+ return self.stopword_manager.filter_words(tokens)
394
+
395
+ return tokens
396
+
397
+ def _tokenize_english(self, text):
398
+ """split zh regex"""
399
+ tokens = re.findall(r"\b[a-zA-Z0-9]+\b", text.lower())
400
+ if self.use_stopwords:
401
+ return self.stopword_manager.filter_words(tokens)
402
+ return tokens
403
+
404
+
405
+ def parse_json_result(response_text):
406
+ try:
407
+ json_start = response_text.find("{")
408
+ response_text = response_text[json_start:]
409
+ response_text = response_text.replace("```", "").strip()
410
+ if not response_text.endswith("}"):
411
+ response_text += "}"
412
+ return json.loads(response_text)
413
+ except json.JSONDecodeError as e:
414
+ logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}")
415
+ return {}
416
+ except Exception as e:
417
+ logger.error(f"[JSONParse] Unexpected error: {e}")
418
+ return {}
419
+
420
+
421
+ def detect_lang(text):
422
+ try:
423
+ if not text or not isinstance(text, str):
424
+ return "en"
425
+ chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
426
+ chinese_chars = re.findall(chinese_pattern, text)
427
+ if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3:
428
+ return "zh"
429
+ return "en"
430
+ except Exception:
431
+ return "en"
432
+
433
+
434
+ def format_memory_item(memory_data: Any) -> dict[str, Any]:
435
+ memory = memory_data.model_dump()
436
+ memory_id = memory["id"]
437
+ ref_id = f"[{memory_id.split('-')[0]}]"
438
+
439
+ memory["ref_id"] = ref_id
440
+ memory["metadata"]["embedding"] = []
441
+ memory["metadata"]["sources"] = []
442
+ memory["metadata"]["usage"] = []
443
+ memory["metadata"]["ref_id"] = ref_id
444
+ memory["metadata"]["id"] = memory_id
445
+ memory["metadata"]["memory"] = memory["memory"]
446
+
447
+ return memory
448
+
449
+
450
+ def find_best_unrelated_subgroup(sentences: list, similarity_matrix: list, bar: float = 0.8):
451
+ assert len(sentences) == len(similarity_matrix)
452
+
453
+ num_sentence = len(sentences)
454
+ selected_sentences = []
455
+ selected_indices = []
456
+ for i in range(num_sentence):
457
+ can_add = True
458
+ for j in selected_indices:
459
+ if similarity_matrix[i][j] > bar:
460
+ can_add = False
461
+ break
462
+ if can_add:
463
+ selected_sentences.append(i)
464
+ selected_indices.append(i)
465
+ return selected_sentences, selected_indices
466
+
467
+
468
+ def cosine_similarity_matrix(embeddings: list[list[float]]) -> list[list[float]]:
469
+ norms = np.linalg.norm(embeddings, axis=1, keepdims=True)
470
+ x_normalized = embeddings / norms
471
+ similarity_matrix = np.dot(x_normalized, x_normalized.T)
472
+ return similarity_matrix