MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,43 @@
1
+ """Multimodal message parsers for different message types.
2
+
3
+ This package provides parsers for different message types in both fast and fine modes:
4
+ - String messages
5
+ - System messages
6
+ - User messages
7
+ - Assistant messages
8
+ - Tool messages
9
+ - Text content parts
10
+ - File content parts
11
+
12
+ Each parser supports both "fast" mode (quick processing without LLM) and
13
+ "fine" mode (with LLM for better understanding).
14
+ """
15
+
16
+ from .assistant_parser import AssistantParser
17
+ from .base import BaseMessageParser
18
+ from .file_content_parser import FileContentParser
19
+ from .image_parser import ImageParser
20
+ from .multi_modal_parser import MultiModalParser
21
+ from .string_parser import StringParser
22
+ from .system_parser import SystemParser
23
+ from .text_content_parser import TextContentParser
24
+ from .tool_parser import ToolParser
25
+ from .user_parser import UserParser
26
+ from .utils import coerce_scene_data, detect_lang, extract_role
27
+
28
+
29
+ __all__ = [
30
+ "AssistantParser",
31
+ "BaseMessageParser",
32
+ "FileContentParser",
33
+ "ImageParser",
34
+ "MultiModalParser",
35
+ "StringParser",
36
+ "SystemParser",
37
+ "TextContentParser",
38
+ "ToolParser",
39
+ "UserParser",
40
+ "coerce_scene_data",
41
+ "detect_lang",
42
+ "extract_role",
43
+ ]
@@ -0,0 +1,311 @@
1
+ """Parser for assistant messages."""
2
+
3
+ import json
4
+
5
+ from typing import Any
6
+
7
+ from memos.embedders.base import BaseEmbedder
8
+ from memos.llms.base import BaseLLM
9
+ from memos.log import get_logger
10
+ from memos.memories.textual.item import (
11
+ SourceMessage,
12
+ TextualMemoryItem,
13
+ TreeNodeTextualMemoryMetadata,
14
+ )
15
+ from memos.types.openai_chat_completion_types import ChatCompletionAssistantMessageParam
16
+
17
+ from .base import BaseMessageParser, _add_lang_to_source, _derive_key, _extract_text_from_content
18
+ from .utils import detect_lang
19
+
20
+
21
+ logger = get_logger(__name__)
22
+
23
+
24
+ class AssistantParser(BaseMessageParser):
25
+ """Parser for assistant messages.
26
+
27
+ Handles multimodal assistant messages by creating one SourceMessage per content part.
28
+ Supports text and refusal content parts.
29
+ """
30
+
31
+ def __init__(self, embedder: BaseEmbedder, llm: BaseLLM | None = None):
32
+ """
33
+ Initialize AssistantParser.
34
+
35
+ Args:
36
+ embedder: Embedder for generating embeddings
37
+ llm: Optional LLM for fine mode processing
38
+ """
39
+ super().__init__(embedder, llm)
40
+
41
+ def create_source(
42
+ self,
43
+ message: ChatCompletionAssistantMessageParam,
44
+ info: dict[str, Any],
45
+ ) -> SourceMessage | list[SourceMessage]:
46
+ """
47
+ Create SourceMessage(s) from assistant message.
48
+
49
+ Handles:
50
+ - content: str | list of content parts (text/refusal) | None
51
+ - refusal: str | None (top-level refusal message)
52
+ - tool_calls: list of tool calls (when content is None)
53
+ - audio: Audio | None (audio response data)
54
+
55
+ For multimodal messages (content is a list), creates one SourceMessage per part.
56
+ For simple messages (content is str), creates a single SourceMessage.
57
+ """
58
+ if not isinstance(message, dict):
59
+ return []
60
+
61
+ role = message.get("role", "assistant")
62
+ raw_content = message.get("content")
63
+ refusal = message.get("refusal")
64
+ tool_calls = message.get("tool_calls")
65
+ audio = message.get("audio")
66
+ chat_time = message.get("chat_time")
67
+ message_id = message.get("message_id")
68
+
69
+ sources = []
70
+
71
+ if isinstance(raw_content, list):
72
+ # Multimodal: first collect all text content to detect overall language
73
+ text_contents = []
74
+ for part in raw_content:
75
+ if isinstance(part, dict):
76
+ part_type = part.get("type", "")
77
+ if part_type == "text":
78
+ text_contents.append(part.get("text", ""))
79
+ elif part_type == "refusal":
80
+ text_contents.append(part.get("refusal", ""))
81
+
82
+ # Detect overall language from all text content
83
+ overall_lang = "en" # default
84
+ if text_contents:
85
+ combined_text = " ".join(text_contents)
86
+ overall_lang = detect_lang(combined_text)
87
+ # Note: Assistant messages only support "text" and "refusal" part types
88
+ for part in raw_content:
89
+ if isinstance(part, dict):
90
+ part_type = part.get("type", "")
91
+ if part_type == "text":
92
+ text_content = part.get("text", "")
93
+ source = SourceMessage(
94
+ type="chat",
95
+ role=role,
96
+ chat_time=chat_time,
97
+ message_id=message_id,
98
+ content=text_content,
99
+ )
100
+ source.lang = overall_lang
101
+ sources.append(source)
102
+ elif part_type == "refusal":
103
+ refusal_content = part.get("refusal", "")
104
+ source = SourceMessage(
105
+ type="refusal",
106
+ role=role,
107
+ chat_time=chat_time,
108
+ message_id=message_id,
109
+ content=refusal_content,
110
+ )
111
+ source.lang = overall_lang
112
+ sources.append(source)
113
+ else:
114
+ # Unknown part type - log warning but still create SourceMessage
115
+ logger.warning(
116
+ f"[AssistantParser] Unknown part type `{part_type}`. "
117
+ f"Expected `text` or `refusal`. Creating SourceMessage with placeholder content."
118
+ )
119
+ source = SourceMessage(
120
+ type="chat",
121
+ role=role,
122
+ chat_time=chat_time,
123
+ message_id=message_id,
124
+ content=f"[{part_type}]",
125
+ )
126
+ source.lang = overall_lang
127
+ sources.append(source)
128
+ elif raw_content is not None:
129
+ # Simple message: single SourceMessage
130
+ content = _extract_text_from_content(raw_content)
131
+ if content:
132
+ source = SourceMessage(
133
+ type="chat",
134
+ role=role,
135
+ chat_time=chat_time,
136
+ message_id=message_id,
137
+ content=content,
138
+ )
139
+ sources.append(_add_lang_to_source(source, content))
140
+
141
+ # Handle top-level refusal field
142
+ if refusal:
143
+ source = SourceMessage(
144
+ type="refusal",
145
+ role=role,
146
+ chat_time=chat_time,
147
+ message_id=message_id,
148
+ content=refusal,
149
+ )
150
+ # Use overall_lang if we have sources from multimodal content, otherwise detect
151
+ if sources and hasattr(sources[0], "lang"):
152
+ source.lang = sources[0].lang
153
+ else:
154
+ source = _add_lang_to_source(source, refusal)
155
+ sources.append(source)
156
+
157
+ # Handle tool_calls (when content is None or empty)
158
+ if tool_calls:
159
+ tool_calls_str = (
160
+ json.dumps(tool_calls, ensure_ascii=False)
161
+ if isinstance(tool_calls, list | dict)
162
+ else str(tool_calls)
163
+ )
164
+ source = SourceMessage(
165
+ type="tool_calls",
166
+ role=role,
167
+ chat_time=chat_time,
168
+ message_id=message_id,
169
+ content=f"[tool_calls]: {tool_calls_str}",
170
+ )
171
+ # Use overall_lang if we have sources from multimodal content, otherwise default
172
+ if sources and hasattr(sources[0], "lang"):
173
+ source.lang = sources[0].lang
174
+ else:
175
+ source = _add_lang_to_source(source, None)
176
+ sources.append(source)
177
+
178
+ # Handle audio (optional)
179
+ if audio:
180
+ audio_id = audio.get("id", "") if isinstance(audio, dict) else str(audio)
181
+ source = SourceMessage(
182
+ type="audio",
183
+ role=role,
184
+ chat_time=chat_time,
185
+ message_id=message_id,
186
+ content=f"[audio]: {audio_id}",
187
+ )
188
+ # Use overall_lang if we have sources from multimodal content, otherwise default
189
+ if sources and hasattr(sources[0], "lang"):
190
+ source.lang = sources[0].lang
191
+ else:
192
+ source = _add_lang_to_source(source, None)
193
+ sources.append(source)
194
+
195
+ if not sources:
196
+ return _add_lang_to_source(SourceMessage(type="chat", role=role), None)
197
+ if len(sources) > 1:
198
+ return sources
199
+ return sources[0]
200
+
201
+ def rebuild_from_source(
202
+ self,
203
+ source: SourceMessage,
204
+ ) -> ChatCompletionAssistantMessageParam:
205
+ """We only need rebuild from specific multimodal source"""
206
+
207
+ def parse_fast(
208
+ self,
209
+ message: ChatCompletionAssistantMessageParam,
210
+ info: dict[str, Any],
211
+ **kwargs,
212
+ ) -> list[TextualMemoryItem]:
213
+ if not isinstance(message, dict):
214
+ logger.warning(f"[AssistantParser] Expected dict, got {type(message)}")
215
+ return []
216
+
217
+ role = message.get("role", "")
218
+ raw_content = message.get("content")
219
+ refusal = message.get("refusal")
220
+ tool_calls = message.get("tool_calls")
221
+ audio = message.get("audio")
222
+ chat_time = message.get("chat_time", None)
223
+
224
+ if role != "assistant":
225
+ logger.warning(f"[AssistantParser] Expected role is `assistant`, got {role}")
226
+ return []
227
+
228
+ # Build content string from various sources
229
+ content_parts = []
230
+
231
+ # Extract content (can be str, list, or None)
232
+ if raw_content is not None:
233
+ extracted_content = _extract_text_from_content(raw_content)
234
+ if extracted_content:
235
+ content_parts.append(extracted_content)
236
+
237
+ # Add top-level refusal if present
238
+ if refusal:
239
+ content_parts.append(f"[refusal]: {refusal}")
240
+
241
+ # Add tool_calls if present (when content is None or empty)
242
+ if tool_calls:
243
+ tool_calls_str = (
244
+ json.dumps(tool_calls, ensure_ascii=False)
245
+ if isinstance(tool_calls, list | dict)
246
+ else str(tool_calls)
247
+ )
248
+ content_parts.append(f"[tool_calls]: {tool_calls_str}")
249
+
250
+ # Add audio if present
251
+ if audio:
252
+ audio_id = audio.get("id", "") if isinstance(audio, dict) else str(audio)
253
+ content_parts.append(f"[audio]: {audio_id}")
254
+
255
+ # Combine all content parts
256
+ content = " ".join(content_parts) if content_parts else ""
257
+
258
+ # If content is empty but we have tool_calls, audio, or refusal, still create memory
259
+ if not content and not tool_calls and not audio and not refusal:
260
+ return []
261
+
262
+ parts = [f"{role}: "]
263
+ if chat_time:
264
+ parts.append(f"[{chat_time}]: ")
265
+ prefix = "".join(parts)
266
+ line = f"{prefix}{content}\n"
267
+ if not line.strip():
268
+ return []
269
+ memory_type = "LongTermMemory"
270
+
271
+ # Create source(s) using parser's create_source method
272
+ sources = self.create_source(message, info)
273
+ if isinstance(sources, SourceMessage):
274
+ sources = [sources]
275
+ elif not sources:
276
+ return []
277
+
278
+ # Extract info fields
279
+ info_ = info.copy()
280
+ user_id = info_.pop("user_id", "")
281
+ session_id = info_.pop("session_id", "")
282
+
283
+ # Create memory item (equivalent to _make_memory_item)
284
+ memory_item = TextualMemoryItem(
285
+ memory=line,
286
+ metadata=TreeNodeTextualMemoryMetadata(
287
+ user_id=user_id,
288
+ session_id=session_id,
289
+ memory_type=memory_type,
290
+ status="activated",
291
+ tags=["mode:fast"],
292
+ key=_derive_key(line),
293
+ embedding=self.embedder.embed([line])[0],
294
+ usage=[],
295
+ sources=sources,
296
+ background="",
297
+ confidence=0.99,
298
+ type="fact",
299
+ info=info_,
300
+ ),
301
+ )
302
+
303
+ return [memory_item]
304
+
305
+ def parse_fine(
306
+ self,
307
+ message: ChatCompletionAssistantMessageParam,
308
+ info: dict[str, Any],
309
+ **kwargs,
310
+ ) -> list[TextualMemoryItem]:
311
+ return []
@@ -0,0 +1,273 @@
1
+ """Base parser interface for multi-model message parsing.
2
+
3
+ This module defines the base interface for parsing different message types
4
+ in both fast and fine modes.
5
+ """
6
+
7
+ import re
8
+
9
+ from abc import ABC, abstractmethod
10
+ from typing import Any
11
+
12
+ from memos import log
13
+ from memos.memories.textual.item import (
14
+ SourceMessage,
15
+ TextualMemoryItem,
16
+ TreeNodeTextualMemoryMetadata,
17
+ )
18
+
19
+ from .utils import detect_lang, get_text_splitter
20
+
21
+
22
+ logger = log.get_logger(__name__)
23
+
24
+
25
+ def _derive_key(text: str, max_len: int = 80) -> str:
26
+ """Default key when without LLM: first max_len words."""
27
+ if not text:
28
+ return ""
29
+ sent = re.split(r"[。!?!?]\s*|\n", text.strip())[0]
30
+ return (sent[:max_len]).strip()
31
+
32
+
33
+ def _extract_text_from_content(content: Any) -> str:
34
+ """
35
+ Extract text from message content.
36
+ Handles str, list of parts, or None.
37
+ """
38
+ if content is None:
39
+ return ""
40
+ if isinstance(content, str):
41
+ return content
42
+ if isinstance(content, list):
43
+ texts = []
44
+ for part in content:
45
+ if isinstance(part, dict):
46
+ part_type = part.get("type", "")
47
+ if part_type == "text":
48
+ texts.append(part.get("text", ""))
49
+ elif part_type == "file":
50
+ file_info = part.get("file", {})
51
+ texts.append(file_info.get("file_data") or file_info.get("filename", "[file]"))
52
+ else:
53
+ texts.append(f"[{part_type}]")
54
+ else:
55
+ texts.append(str(part))
56
+ return " ".join(texts)
57
+ return str(content)
58
+
59
+
60
+ def _add_lang_to_source(source: SourceMessage, content: str | None = None) -> SourceMessage:
61
+ """
62
+ Add lang field to SourceMessage based on content.
63
+
64
+ Args:
65
+ source: SourceMessage to add lang field to
66
+ content: Optional content text for language detection.
67
+ If None, uses source.content
68
+
69
+ Returns:
70
+ SourceMessage with lang field added
71
+ """
72
+ if not hasattr(source, "lang") or getattr(source, "lang", None) is None:
73
+ text_for_detection = content or getattr(source, "content", None) or ""
74
+ lang = detect_lang(text_for_detection)
75
+ source.lang = lang
76
+ return source
77
+
78
+
79
+ class BaseMessageParser(ABC):
80
+ """Base interface for message type parsers."""
81
+
82
+ def __init__(self, embedder, llm=None):
83
+ """
84
+ Initialize BaseMessageParser.
85
+
86
+ Args:
87
+ embedder: Embedder for generating embeddings
88
+ llm: Optional LLM for fine mode processing
89
+ """
90
+ self.embedder = embedder
91
+ self.llm = llm
92
+
93
+ @abstractmethod
94
+ def create_source(
95
+ self,
96
+ message: Any,
97
+ info: dict[str, Any],
98
+ ) -> SourceMessage | list[SourceMessage]:
99
+ """
100
+ Create SourceMessage(s) from the message.
101
+
102
+ Each parser decides how to create sources:
103
+ - Simple messages: return single SourceMessage
104
+ - Multimodal messages: return list of SourceMessage (one per part)
105
+
106
+ Args:
107
+ message: The message to create source from
108
+ info: Dictionary containing user_id and session_id
109
+
110
+ Returns:
111
+ SourceMessage or list of SourceMessage
112
+ """
113
+
114
+ @abstractmethod
115
+ def rebuild_from_source(
116
+ self,
117
+ source: SourceMessage,
118
+ ) -> Any:
119
+ """
120
+ Rebuild original message from SourceMessage.
121
+
122
+ Each parser knows how to reconstruct its own message type.
123
+
124
+ Args:
125
+ source: SourceMessage to rebuild from
126
+
127
+ Returns:
128
+ Rebuilt message in original format
129
+ """
130
+
131
+ def parse_fast(
132
+ self,
133
+ message: Any,
134
+ info: dict[str, Any],
135
+ **kwargs,
136
+ ) -> list[TextualMemoryItem]:
137
+ """
138
+ Default parse_fast implementation (equivalent to simple_struct fast mode).
139
+
140
+ Fast mode logic:
141
+ - Extract text content from message
142
+ - Determine memory_type based on role (UserMemory for user, LongTermMemory otherwise)
143
+ - Create TextualMemoryItem with tags=["mode:fast"]
144
+ - No LLM calls, quick processing
145
+
146
+ Subclasses can override this method for custom behavior.
147
+
148
+ Args:
149
+ message: The message to parse
150
+ info: Dictionary containing user_id and session_id
151
+ **kwargs: Additional parameters
152
+
153
+ Returns:
154
+ List of TextualMemoryItem objects
155
+ """
156
+ if not isinstance(message, dict):
157
+ logger.warning(f"[BaseParser] Expected dict, got {type(message)}")
158
+ return []
159
+
160
+ # Extract text content
161
+ content = _extract_text_from_content(message.get("content"))
162
+ if not content:
163
+ return []
164
+
165
+ # Determine memory_type based on role (equivalent to simple_struct logic)
166
+ role = message.get("role", "").strip().lower()
167
+ memory_type = "UserMemory" if role == "user" else "LongTermMemory"
168
+
169
+ # Create source(s) using parser's create_source method
170
+ sources = self.create_source(message, info)
171
+ if isinstance(sources, SourceMessage):
172
+ sources = [sources]
173
+ elif not sources:
174
+ return []
175
+
176
+ # Extract info fields
177
+ info_ = info.copy()
178
+ user_id = info_.pop("user_id", "")
179
+ session_id = info_.pop("session_id", "")
180
+
181
+ # Create memory item (equivalent to _make_memory_item)
182
+ memory_item = TextualMemoryItem(
183
+ memory=content,
184
+ metadata=TreeNodeTextualMemoryMetadata(
185
+ user_id=user_id,
186
+ session_id=session_id,
187
+ memory_type=memory_type,
188
+ status="activated",
189
+ tags=["mode:fast"],
190
+ key=_derive_key(content),
191
+ embedding=self.embedder.embed([content])[0],
192
+ usage=[],
193
+ sources=sources,
194
+ background="",
195
+ confidence=0.99,
196
+ type="fact",
197
+ info=info_,
198
+ ),
199
+ )
200
+
201
+ return [memory_item]
202
+
203
+ @abstractmethod
204
+ def parse_fine(
205
+ self,
206
+ message: Any,
207
+ info: dict[str, Any],
208
+ **kwargs,
209
+ ) -> list[TextualMemoryItem]:
210
+ """
211
+ Parse message in fine mode (with LLM calls for better understanding).
212
+
213
+ Args:
214
+ message: The message to parse
215
+ info: Dictionary containing user_id and session_id
216
+ **kwargs: Additional parameters (e.g., llm, embedder)
217
+
218
+ Returns:
219
+ List of TextualMemoryItem objects
220
+ """
221
+
222
+ def parse(
223
+ self,
224
+ message: Any,
225
+ info: dict[str, Any],
226
+ mode: str = "fast",
227
+ **kwargs,
228
+ ) -> list[TextualMemoryItem]:
229
+ """
230
+ Parse message in the specified mode.
231
+
232
+ Args:
233
+ message: The message to parse
234
+ info: Dictionary containing user_id and session_id
235
+ mode: "fast" or "fine"
236
+ **kwargs: Additional parameters
237
+
238
+ Returns:
239
+ List of TextualMemoryItem objects
240
+ """
241
+ if mode == "fast":
242
+ return self.parse_fast(message, info, **kwargs)
243
+ elif mode == "fine":
244
+ return self.parse_fine(message, info, **kwargs)
245
+ else:
246
+ raise ValueError(f"Unknown mode: {mode}. Must be 'fast' or 'fine'")
247
+
248
+ def _split_text(self, text: str, is_markdown: bool = False) -> list[str]:
249
+ """
250
+ Split text into chunks using text splitter from utils.
251
+
252
+ Args:
253
+ text: Text to split
254
+
255
+ Returns:
256
+ List of text chunks
257
+ """
258
+ if not text or not text.strip():
259
+ return []
260
+
261
+ splitter = get_text_splitter(is_markdown=is_markdown)
262
+ if not splitter:
263
+ # If text splitter is not available, return text as single chunk
264
+ return [text] if text.strip() else []
265
+
266
+ try:
267
+ chunks = splitter.chunk(text)
268
+ logger.debug(f"[FileContentParser] Split text into {len(chunks)} chunks")
269
+ return chunks
270
+ except Exception as e:
271
+ logger.error(f"[FileContentParser] Error splitting text: {e}")
272
+ # Fallback to single chunk
273
+ return [text] if text.strip() else []