MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,163 @@
1
+ import os
2
+
3
+ from abc import ABC
4
+
5
+ from memos import log
6
+ from memos.configs.mem_reader import StrategyStructMemReaderConfig
7
+ from memos.configs.parser import ParserConfigFactory
8
+ from memos.mem_reader.read_multi_modal import detect_lang
9
+ from memos.mem_reader.simple_struct import SimpleStructMemReader
10
+ from memos.parsers.factory import ParserFactory
11
+ from memos.templates.mem_reader_prompts import (
12
+ CUSTOM_TAGS_INSTRUCTION,
13
+ CUSTOM_TAGS_INSTRUCTION_ZH,
14
+ SIMPLE_STRUCT_DOC_READER_PROMPT,
15
+ SIMPLE_STRUCT_DOC_READER_PROMPT_ZH,
16
+ SIMPLE_STRUCT_MEM_READER_EXAMPLE,
17
+ SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
18
+ )
19
+ from memos.templates.mem_reader_strategy_prompts import (
20
+ STRATEGY_STRUCT_MEM_READER_PROMPT,
21
+ STRATEGY_STRUCT_MEM_READER_PROMPT_ZH,
22
+ )
23
+
24
+
25
+ logger = log.get_logger(__name__)
26
+ STRATEGY_PROMPT_DICT = {
27
+ "chat": {
28
+ "en": STRATEGY_STRUCT_MEM_READER_PROMPT,
29
+ "zh": STRATEGY_STRUCT_MEM_READER_PROMPT_ZH,
30
+ "en_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE,
31
+ "zh_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
32
+ },
33
+ "doc": {"en": SIMPLE_STRUCT_DOC_READER_PROMPT, "zh": SIMPLE_STRUCT_DOC_READER_PROMPT_ZH},
34
+ "custom_tags": {"en": CUSTOM_TAGS_INSTRUCTION, "zh": CUSTOM_TAGS_INSTRUCTION_ZH},
35
+ }
36
+
37
+
38
+ class StrategyStructMemReader(SimpleStructMemReader, ABC):
39
+ """Naive implementation of MemReader."""
40
+
41
+ def __init__(self, config: StrategyStructMemReaderConfig):
42
+ super().__init__(config)
43
+ self.chat_chunker = config.chat_chunker["config"]
44
+
45
+ def _get_llm_response(self, mem_str: str, custom_tags: list[str] | None) -> dict:
46
+ lang = detect_lang(mem_str)
47
+ template = STRATEGY_PROMPT_DICT["chat"][lang]
48
+ examples = STRATEGY_PROMPT_DICT["chat"][f"{lang}_example"]
49
+ prompt = template.replace("${conversation}", mem_str)
50
+
51
+ custom_tags_prompt = (
52
+ STRATEGY_PROMPT_DICT["custom_tags"][lang].replace("{custom_tags}", str(custom_tags))
53
+ if custom_tags
54
+ else ""
55
+ )
56
+ prompt = prompt.replace("${custom_tags_prompt}", custom_tags_prompt)
57
+
58
+ if self.config.remove_prompt_example: # TODO unused
59
+ prompt = prompt.replace(examples, "")
60
+ messages = [{"role": "user", "content": prompt}]
61
+ try:
62
+ response_text = self.llm.generate(messages)
63
+ response_json = self.parse_json_result(response_text)
64
+ except Exception as e:
65
+ logger.error(f"[LLM] Exception during chat generation: {e}")
66
+ response_json = {
67
+ "memory list": [
68
+ {
69
+ "key": mem_str[:10],
70
+ "memory_type": "UserMemory",
71
+ "value": mem_str,
72
+ "tags": [],
73
+ }
74
+ ],
75
+ "summary": mem_str,
76
+ }
77
+ return response_json
78
+
79
+ def get_scene_data_info(self, scene_data: list, type: str) -> list[str]:
80
+ """
81
+ Get raw information from scene_data.
82
+ If scene_data contains dictionaries, convert them to strings.
83
+ If scene_data contains file paths, parse them using the parser.
84
+
85
+ Args:
86
+ scene_data: List of dialogue information or document paths
87
+ type: Type of scene data: ['doc', 'chat']
88
+ Returns:
89
+ List of strings containing the processed scene data
90
+ """
91
+ results = []
92
+
93
+ if type == "chat":
94
+ if self.chat_chunker["chunk_type"] == "content_length":
95
+ content_len_thredshold = self.chat_chunker["chunk_length"]
96
+ for items in scene_data:
97
+ if not items:
98
+ continue
99
+
100
+ results.append([])
101
+ current_length = 0
102
+
103
+ for _i, item in enumerate(items):
104
+ content_length = (
105
+ len(item.get("content", ""))
106
+ if isinstance(item, dict)
107
+ else len(str(item))
108
+ )
109
+ if not results[-1]:
110
+ results[-1].append(item)
111
+ current_length = content_length
112
+ continue
113
+
114
+ if current_length + content_length <= content_len_thredshold:
115
+ results[-1].append(item)
116
+ current_length += content_length
117
+ else:
118
+ overlap_item = results[-1][-1]
119
+ overlap_length = (
120
+ len(overlap_item.get("content", ""))
121
+ if isinstance(overlap_item, dict)
122
+ else len(str(overlap_item))
123
+ )
124
+
125
+ results.append([overlap_item, item])
126
+ current_length = overlap_length + content_length
127
+ else:
128
+ cut_size, cut_overlap = (
129
+ self.chat_chunker["chunk_session"],
130
+ self.chat_chunker["chunk_overlap"],
131
+ )
132
+ for items in scene_data:
133
+ step = cut_size - cut_overlap
134
+ end = len(items) - cut_overlap
135
+ if end <= 0:
136
+ results.extend([items[:]])
137
+ else:
138
+ results.extend([items[i : i + cut_size] for i in range(0, end, step)])
139
+
140
+ elif type == "doc":
141
+ parser_config = ParserConfigFactory.model_validate(
142
+ {
143
+ "backend": "markitdown",
144
+ "config": {},
145
+ }
146
+ )
147
+ parser = ParserFactory.from_config(parser_config)
148
+ for item in scene_data:
149
+ try:
150
+ if os.path.exists(item):
151
+ try:
152
+ parsed_text = parser.parse(item)
153
+ results.append({"file": item, "text": parsed_text})
154
+ except Exception as e:
155
+ logger.error(f"[SceneParser] Error parsing {item}: {e}")
156
+ continue
157
+ else:
158
+ parsed_text = item
159
+ results.append({"file": "pure_text", "text": parsed_text})
160
+ except Exception as e:
161
+ print(f"Error parsing file {item}: {e!s}")
162
+
163
+ return results
@@ -0,0 +1,157 @@
1
+ import json
2
+ import re
3
+
4
+ from memos import log
5
+
6
+
7
+ logger = log.get_logger(__name__)
8
+
9
+ try:
10
+ import tiktoken
11
+
12
+ try:
13
+ _ENC = tiktoken.encoding_for_model("gpt-4o-mini")
14
+ except Exception:
15
+ _ENC = tiktoken.get_encoding("cl100k_base")
16
+
17
+ def count_tokens_text(s: str) -> int:
18
+ return len(_ENC.encode(s or "", disallowed_special=()))
19
+ except Exception:
20
+ # Heuristic fallback: zh chars ~1 token, others ~1 token per ~4 chars
21
+ def count_tokens_text(s: str) -> int:
22
+ if not s:
23
+ return 0
24
+ zh_chars = re.findall(r"[\u4e00-\u9fff]", s)
25
+ zh = len(zh_chars)
26
+ rest = len(s) - zh
27
+ return zh + max(1, rest // 4)
28
+
29
+
30
+ def derive_key(text: str, max_len: int = 80) -> str:
31
+ """default key when without LLM: first max_len words"""
32
+ if not text:
33
+ return ""
34
+ sent = re.split(r"[。!?!?]\s*|\n", text.strip())[0]
35
+ return (sent[:max_len]).strip()
36
+
37
+
38
+ def parse_json_result(response_text: str) -> dict:
39
+ s = (response_text or "").strip()
40
+
41
+ m = re.search(r"```(?:json)?\s*([\s\S]*?)```", s, flags=re.I)
42
+ s = (m.group(1) if m else s.replace("```", "")).strip()
43
+
44
+ i = s.find("{")
45
+ if i == -1:
46
+ return {}
47
+ s = s[i:].strip()
48
+
49
+ try:
50
+ return json.loads(s)
51
+ except json.JSONDecodeError:
52
+ pass
53
+
54
+ j = max(s.rfind("}"), s.rfind("]"))
55
+ if j != -1:
56
+ try:
57
+ return json.loads(s[: j + 1])
58
+ except json.JSONDecodeError:
59
+ pass
60
+
61
+ def _cheap_close(t: str) -> str:
62
+ t += "}" * max(0, t.count("{") - t.count("}"))
63
+ t += "]" * max(0, t.count("[") - t.count("]"))
64
+ return t
65
+
66
+ t = _cheap_close(s)
67
+ try:
68
+ return json.loads(t)
69
+ except json.JSONDecodeError as e:
70
+ if "Invalid \\escape" in str(e):
71
+ s = s.replace("\\", "\\\\")
72
+ return json.loads(s)
73
+ logger.warning(
74
+ f"[JSONParse] Failed to decode JSON: {e}\nTail: Raw {response_text} \
75
+ json: {s}"
76
+ )
77
+ return {}
78
+
79
+
80
+ def parse_rewritten_response(text: str) -> tuple[bool, dict[int, dict]]:
81
+ """Parse index-keyed JSON from hallucination filter response.
82
+ Expected shape: { "0": {"need_rewrite": bool, "rewritten": str, "reason": str}, ... }
83
+ Returns (success, parsed_dict) with int keys.
84
+ """
85
+ try:
86
+ m = re.search(r"```(?:json)?\s*([\s\S]*?)```", text, flags=re.I)
87
+ s = (m.group(1) if m else text).strip()
88
+ data = json.loads(s)
89
+ except Exception:
90
+ return False, {}
91
+
92
+ if not isinstance(data, dict):
93
+ return False, {}
94
+
95
+ result: dict[int, dict] = {}
96
+ for k, v in data.items():
97
+ try:
98
+ idx = int(k)
99
+ except Exception:
100
+ # allow integer keys as-is
101
+ if isinstance(k, int):
102
+ idx = k
103
+ else:
104
+ continue
105
+ if not isinstance(v, dict):
106
+ continue
107
+ need_rewrite = v.get("need_rewrite")
108
+ rewritten = v.get("rewritten", "")
109
+ reason = v.get("reason", "")
110
+ if (
111
+ isinstance(need_rewrite, bool)
112
+ and isinstance(rewritten, str)
113
+ and isinstance(reason, str)
114
+ ):
115
+ result[idx] = {
116
+ "need_rewrite": need_rewrite,
117
+ "rewritten": rewritten,
118
+ "reason": reason,
119
+ }
120
+
121
+ return (len(result) > 0), result
122
+
123
+
124
+ def parse_keep_filter_response(text: str) -> tuple[bool, dict[int, dict]]:
125
+ """Parse index-keyed JSON from keep filter response.
126
+ Expected shape: { "0": {"keep": bool, "reason": str}, ... }
127
+ Returns (success, parsed_dict) with int keys.
128
+ """
129
+ try:
130
+ m = re.search(r"```(?:json)?\s*([\s\S]*?)```", text, flags=re.I)
131
+ s = (m.group(1) if m else text).strip()
132
+ data = json.loads(s)
133
+ except Exception:
134
+ return False, {}
135
+
136
+ if not isinstance(data, dict):
137
+ return False, {}
138
+
139
+ result: dict[int, dict] = {}
140
+ for k, v in data.items():
141
+ try:
142
+ idx = int(k)
143
+ except Exception:
144
+ if isinstance(k, int):
145
+ idx = k
146
+ else:
147
+ continue
148
+ if not isinstance(v, dict):
149
+ continue
150
+ keep = v.get("keep")
151
+ reason = v.get("reason", "")
152
+ if isinstance(keep, bool):
153
+ result[idx] = {
154
+ "keep": keep,
155
+ "reason": reason,
156
+ }
157
+ return (len(result) > 0), result
File without changes
File without changes