MemoryOS 2.0.3__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (315) hide show
  1. memoryos-2.0.3.dist-info/METADATA +418 -0
  2. memoryos-2.0.3.dist-info/RECORD +315 -0
  3. memoryos-2.0.3.dist-info/WHEEL +4 -0
  4. memoryos-2.0.3.dist-info/entry_points.txt +3 -0
  5. memoryos-2.0.3.dist-info/licenses/LICENSE +201 -0
  6. memos/__init__.py +20 -0
  7. memos/api/client.py +571 -0
  8. memos/api/config.py +1018 -0
  9. memos/api/context/dependencies.py +50 -0
  10. memos/api/exceptions.py +53 -0
  11. memos/api/handlers/__init__.py +62 -0
  12. memos/api/handlers/add_handler.py +158 -0
  13. memos/api/handlers/base_handler.py +194 -0
  14. memos/api/handlers/chat_handler.py +1401 -0
  15. memos/api/handlers/component_init.py +388 -0
  16. memos/api/handlers/config_builders.py +190 -0
  17. memos/api/handlers/feedback_handler.py +93 -0
  18. memos/api/handlers/formatters_handler.py +237 -0
  19. memos/api/handlers/memory_handler.py +316 -0
  20. memos/api/handlers/scheduler_handler.py +497 -0
  21. memos/api/handlers/search_handler.py +222 -0
  22. memos/api/handlers/suggestion_handler.py +117 -0
  23. memos/api/mcp_serve.py +614 -0
  24. memos/api/middleware/request_context.py +101 -0
  25. memos/api/product_api.py +38 -0
  26. memos/api/product_models.py +1206 -0
  27. memos/api/routers/__init__.py +1 -0
  28. memos/api/routers/product_router.py +477 -0
  29. memos/api/routers/server_router.py +394 -0
  30. memos/api/server_api.py +44 -0
  31. memos/api/start_api.py +433 -0
  32. memos/chunkers/__init__.py +4 -0
  33. memos/chunkers/base.py +24 -0
  34. memos/chunkers/charactertext_chunker.py +41 -0
  35. memos/chunkers/factory.py +24 -0
  36. memos/chunkers/markdown_chunker.py +62 -0
  37. memos/chunkers/sentence_chunker.py +54 -0
  38. memos/chunkers/simple_chunker.py +50 -0
  39. memos/cli.py +113 -0
  40. memos/configs/__init__.py +0 -0
  41. memos/configs/base.py +82 -0
  42. memos/configs/chunker.py +59 -0
  43. memos/configs/embedder.py +88 -0
  44. memos/configs/graph_db.py +236 -0
  45. memos/configs/internet_retriever.py +100 -0
  46. memos/configs/llm.py +151 -0
  47. memos/configs/mem_agent.py +54 -0
  48. memos/configs/mem_chat.py +81 -0
  49. memos/configs/mem_cube.py +105 -0
  50. memos/configs/mem_os.py +83 -0
  51. memos/configs/mem_reader.py +91 -0
  52. memos/configs/mem_scheduler.py +385 -0
  53. memos/configs/mem_user.py +70 -0
  54. memos/configs/memory.py +324 -0
  55. memos/configs/parser.py +38 -0
  56. memos/configs/reranker.py +18 -0
  57. memos/configs/utils.py +8 -0
  58. memos/configs/vec_db.py +80 -0
  59. memos/context/context.py +355 -0
  60. memos/dependency.py +52 -0
  61. memos/deprecation.py +262 -0
  62. memos/embedders/__init__.py +0 -0
  63. memos/embedders/ark.py +95 -0
  64. memos/embedders/base.py +106 -0
  65. memos/embedders/factory.py +29 -0
  66. memos/embedders/ollama.py +77 -0
  67. memos/embedders/sentence_transformer.py +49 -0
  68. memos/embedders/universal_api.py +51 -0
  69. memos/exceptions.py +30 -0
  70. memos/graph_dbs/__init__.py +0 -0
  71. memos/graph_dbs/base.py +274 -0
  72. memos/graph_dbs/factory.py +27 -0
  73. memos/graph_dbs/item.py +46 -0
  74. memos/graph_dbs/nebular.py +1794 -0
  75. memos/graph_dbs/neo4j.py +1942 -0
  76. memos/graph_dbs/neo4j_community.py +1058 -0
  77. memos/graph_dbs/polardb.py +5446 -0
  78. memos/hello_world.py +97 -0
  79. memos/llms/__init__.py +0 -0
  80. memos/llms/base.py +25 -0
  81. memos/llms/deepseek.py +13 -0
  82. memos/llms/factory.py +38 -0
  83. memos/llms/hf.py +443 -0
  84. memos/llms/hf_singleton.py +114 -0
  85. memos/llms/ollama.py +135 -0
  86. memos/llms/openai.py +222 -0
  87. memos/llms/openai_new.py +198 -0
  88. memos/llms/qwen.py +13 -0
  89. memos/llms/utils.py +14 -0
  90. memos/llms/vllm.py +218 -0
  91. memos/log.py +237 -0
  92. memos/mem_agent/base.py +19 -0
  93. memos/mem_agent/deepsearch_agent.py +391 -0
  94. memos/mem_agent/factory.py +36 -0
  95. memos/mem_chat/__init__.py +0 -0
  96. memos/mem_chat/base.py +30 -0
  97. memos/mem_chat/factory.py +21 -0
  98. memos/mem_chat/simple.py +200 -0
  99. memos/mem_cube/__init__.py +0 -0
  100. memos/mem_cube/base.py +30 -0
  101. memos/mem_cube/general.py +240 -0
  102. memos/mem_cube/navie.py +172 -0
  103. memos/mem_cube/utils.py +169 -0
  104. memos/mem_feedback/base.py +15 -0
  105. memos/mem_feedback/feedback.py +1192 -0
  106. memos/mem_feedback/simple_feedback.py +40 -0
  107. memos/mem_feedback/utils.py +230 -0
  108. memos/mem_os/client.py +5 -0
  109. memos/mem_os/core.py +1203 -0
  110. memos/mem_os/main.py +582 -0
  111. memos/mem_os/product.py +1608 -0
  112. memos/mem_os/product_server.py +455 -0
  113. memos/mem_os/utils/default_config.py +359 -0
  114. memos/mem_os/utils/format_utils.py +1403 -0
  115. memos/mem_os/utils/reference_utils.py +162 -0
  116. memos/mem_reader/__init__.py +0 -0
  117. memos/mem_reader/base.py +47 -0
  118. memos/mem_reader/factory.py +53 -0
  119. memos/mem_reader/memory.py +298 -0
  120. memos/mem_reader/multi_modal_struct.py +965 -0
  121. memos/mem_reader/read_multi_modal/__init__.py +43 -0
  122. memos/mem_reader/read_multi_modal/assistant_parser.py +311 -0
  123. memos/mem_reader/read_multi_modal/base.py +273 -0
  124. memos/mem_reader/read_multi_modal/file_content_parser.py +826 -0
  125. memos/mem_reader/read_multi_modal/image_parser.py +359 -0
  126. memos/mem_reader/read_multi_modal/multi_modal_parser.py +252 -0
  127. memos/mem_reader/read_multi_modal/string_parser.py +139 -0
  128. memos/mem_reader/read_multi_modal/system_parser.py +327 -0
  129. memos/mem_reader/read_multi_modal/text_content_parser.py +131 -0
  130. memos/mem_reader/read_multi_modal/tool_parser.py +210 -0
  131. memos/mem_reader/read_multi_modal/user_parser.py +218 -0
  132. memos/mem_reader/read_multi_modal/utils.py +358 -0
  133. memos/mem_reader/simple_struct.py +912 -0
  134. memos/mem_reader/strategy_struct.py +163 -0
  135. memos/mem_reader/utils.py +157 -0
  136. memos/mem_scheduler/__init__.py +0 -0
  137. memos/mem_scheduler/analyzer/__init__.py +0 -0
  138. memos/mem_scheduler/analyzer/api_analyzer.py +714 -0
  139. memos/mem_scheduler/analyzer/eval_analyzer.py +219 -0
  140. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +571 -0
  141. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  142. memos/mem_scheduler/base_scheduler.py +1319 -0
  143. memos/mem_scheduler/general_modules/__init__.py +0 -0
  144. memos/mem_scheduler/general_modules/api_misc.py +137 -0
  145. memos/mem_scheduler/general_modules/base.py +80 -0
  146. memos/mem_scheduler/general_modules/init_components_for_scheduler.py +425 -0
  147. memos/mem_scheduler/general_modules/misc.py +313 -0
  148. memos/mem_scheduler/general_modules/scheduler_logger.py +389 -0
  149. memos/mem_scheduler/general_modules/task_threads.py +315 -0
  150. memos/mem_scheduler/general_scheduler.py +1495 -0
  151. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  152. memos/mem_scheduler/memory_manage_modules/memory_filter.py +306 -0
  153. memos/mem_scheduler/memory_manage_modules/retriever.py +547 -0
  154. memos/mem_scheduler/monitors/__init__.py +0 -0
  155. memos/mem_scheduler/monitors/dispatcher_monitor.py +366 -0
  156. memos/mem_scheduler/monitors/general_monitor.py +394 -0
  157. memos/mem_scheduler/monitors/task_schedule_monitor.py +254 -0
  158. memos/mem_scheduler/optimized_scheduler.py +410 -0
  159. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  160. memos/mem_scheduler/orm_modules/api_redis_model.py +518 -0
  161. memos/mem_scheduler/orm_modules/base_model.py +729 -0
  162. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  163. memos/mem_scheduler/orm_modules/redis_model.py +699 -0
  164. memos/mem_scheduler/scheduler_factory.py +23 -0
  165. memos/mem_scheduler/schemas/__init__.py +0 -0
  166. memos/mem_scheduler/schemas/analyzer_schemas.py +52 -0
  167. memos/mem_scheduler/schemas/api_schemas.py +233 -0
  168. memos/mem_scheduler/schemas/general_schemas.py +55 -0
  169. memos/mem_scheduler/schemas/message_schemas.py +173 -0
  170. memos/mem_scheduler/schemas/monitor_schemas.py +406 -0
  171. memos/mem_scheduler/schemas/task_schemas.py +132 -0
  172. memos/mem_scheduler/task_schedule_modules/__init__.py +0 -0
  173. memos/mem_scheduler/task_schedule_modules/dispatcher.py +740 -0
  174. memos/mem_scheduler/task_schedule_modules/local_queue.py +247 -0
  175. memos/mem_scheduler/task_schedule_modules/orchestrator.py +74 -0
  176. memos/mem_scheduler/task_schedule_modules/redis_queue.py +1385 -0
  177. memos/mem_scheduler/task_schedule_modules/task_queue.py +162 -0
  178. memos/mem_scheduler/utils/__init__.py +0 -0
  179. memos/mem_scheduler/utils/api_utils.py +77 -0
  180. memos/mem_scheduler/utils/config_utils.py +100 -0
  181. memos/mem_scheduler/utils/db_utils.py +50 -0
  182. memos/mem_scheduler/utils/filter_utils.py +176 -0
  183. memos/mem_scheduler/utils/metrics.py +125 -0
  184. memos/mem_scheduler/utils/misc_utils.py +290 -0
  185. memos/mem_scheduler/utils/monitor_event_utils.py +67 -0
  186. memos/mem_scheduler/utils/status_tracker.py +229 -0
  187. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  188. memos/mem_scheduler/webservice_modules/rabbitmq_service.py +485 -0
  189. memos/mem_scheduler/webservice_modules/redis_service.py +380 -0
  190. memos/mem_user/factory.py +94 -0
  191. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  192. memos/mem_user/mysql_user_manager.py +502 -0
  193. memos/mem_user/persistent_factory.py +98 -0
  194. memos/mem_user/persistent_user_manager.py +260 -0
  195. memos/mem_user/redis_persistent_user_manager.py +225 -0
  196. memos/mem_user/user_manager.py +488 -0
  197. memos/memories/__init__.py +0 -0
  198. memos/memories/activation/__init__.py +0 -0
  199. memos/memories/activation/base.py +42 -0
  200. memos/memories/activation/item.py +56 -0
  201. memos/memories/activation/kv.py +292 -0
  202. memos/memories/activation/vllmkv.py +219 -0
  203. memos/memories/base.py +19 -0
  204. memos/memories/factory.py +42 -0
  205. memos/memories/parametric/__init__.py +0 -0
  206. memos/memories/parametric/base.py +19 -0
  207. memos/memories/parametric/item.py +11 -0
  208. memos/memories/parametric/lora.py +41 -0
  209. memos/memories/textual/__init__.py +0 -0
  210. memos/memories/textual/base.py +92 -0
  211. memos/memories/textual/general.py +236 -0
  212. memos/memories/textual/item.py +304 -0
  213. memos/memories/textual/naive.py +187 -0
  214. memos/memories/textual/prefer_text_memory/__init__.py +0 -0
  215. memos/memories/textual/prefer_text_memory/adder.py +504 -0
  216. memos/memories/textual/prefer_text_memory/config.py +106 -0
  217. memos/memories/textual/prefer_text_memory/extractor.py +221 -0
  218. memos/memories/textual/prefer_text_memory/factory.py +85 -0
  219. memos/memories/textual/prefer_text_memory/retrievers.py +177 -0
  220. memos/memories/textual/prefer_text_memory/spliter.py +132 -0
  221. memos/memories/textual/prefer_text_memory/utils.py +93 -0
  222. memos/memories/textual/preference.py +344 -0
  223. memos/memories/textual/simple_preference.py +161 -0
  224. memos/memories/textual/simple_tree.py +69 -0
  225. memos/memories/textual/tree.py +459 -0
  226. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  227. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  228. memos/memories/textual/tree_text_memory/organize/handler.py +184 -0
  229. memos/memories/textual/tree_text_memory/organize/manager.py +518 -0
  230. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +238 -0
  231. memos/memories/textual/tree_text_memory/organize/reorganizer.py +622 -0
  232. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  233. memos/memories/textual/tree_text_memory/retrieve/advanced_searcher.py +364 -0
  234. memos/memories/textual/tree_text_memory/retrieve/bm25_util.py +186 -0
  235. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +419 -0
  236. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +270 -0
  237. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +102 -0
  238. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  239. memos/memories/textual/tree_text_memory/retrieve/recall.py +497 -0
  240. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  241. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +16 -0
  242. memos/memories/textual/tree_text_memory/retrieve/retrieve_utils.py +472 -0
  243. memos/memories/textual/tree_text_memory/retrieve/searcher.py +848 -0
  244. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +135 -0
  245. memos/memories/textual/tree_text_memory/retrieve/utils.py +54 -0
  246. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +387 -0
  247. memos/memos_tools/dinding_report_bot.py +453 -0
  248. memos/memos_tools/lockfree_dict.py +120 -0
  249. memos/memos_tools/notification_service.py +44 -0
  250. memos/memos_tools/notification_utils.py +142 -0
  251. memos/memos_tools/singleton.py +174 -0
  252. memos/memos_tools/thread_safe_dict.py +310 -0
  253. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  254. memos/multi_mem_cube/__init__.py +0 -0
  255. memos/multi_mem_cube/composite_cube.py +86 -0
  256. memos/multi_mem_cube/single_cube.py +874 -0
  257. memos/multi_mem_cube/views.py +54 -0
  258. memos/parsers/__init__.py +0 -0
  259. memos/parsers/base.py +15 -0
  260. memos/parsers/factory.py +21 -0
  261. memos/parsers/markitdown.py +28 -0
  262. memos/reranker/__init__.py +4 -0
  263. memos/reranker/base.py +25 -0
  264. memos/reranker/concat.py +103 -0
  265. memos/reranker/cosine_local.py +102 -0
  266. memos/reranker/factory.py +72 -0
  267. memos/reranker/http_bge.py +324 -0
  268. memos/reranker/http_bge_strategy.py +327 -0
  269. memos/reranker/noop.py +19 -0
  270. memos/reranker/strategies/__init__.py +4 -0
  271. memos/reranker/strategies/base.py +61 -0
  272. memos/reranker/strategies/concat_background.py +94 -0
  273. memos/reranker/strategies/concat_docsource.py +110 -0
  274. memos/reranker/strategies/dialogue_common.py +109 -0
  275. memos/reranker/strategies/factory.py +31 -0
  276. memos/reranker/strategies/single_turn.py +107 -0
  277. memos/reranker/strategies/singleturn_outmem.py +98 -0
  278. memos/settings.py +10 -0
  279. memos/templates/__init__.py +0 -0
  280. memos/templates/advanced_search_prompts.py +211 -0
  281. memos/templates/cloud_service_prompt.py +107 -0
  282. memos/templates/instruction_completion.py +66 -0
  283. memos/templates/mem_agent_prompts.py +85 -0
  284. memos/templates/mem_feedback_prompts.py +822 -0
  285. memos/templates/mem_reader_prompts.py +1096 -0
  286. memos/templates/mem_reader_strategy_prompts.py +238 -0
  287. memos/templates/mem_scheduler_prompts.py +626 -0
  288. memos/templates/mem_search_prompts.py +93 -0
  289. memos/templates/mos_prompts.py +403 -0
  290. memos/templates/prefer_complete_prompt.py +735 -0
  291. memos/templates/tool_mem_prompts.py +139 -0
  292. memos/templates/tree_reorganize_prompts.py +230 -0
  293. memos/types/__init__.py +34 -0
  294. memos/types/general_types.py +151 -0
  295. memos/types/openai_chat_completion_types/__init__.py +15 -0
  296. memos/types/openai_chat_completion_types/chat_completion_assistant_message_param.py +56 -0
  297. memos/types/openai_chat_completion_types/chat_completion_content_part_image_param.py +27 -0
  298. memos/types/openai_chat_completion_types/chat_completion_content_part_input_audio_param.py +23 -0
  299. memos/types/openai_chat_completion_types/chat_completion_content_part_param.py +43 -0
  300. memos/types/openai_chat_completion_types/chat_completion_content_part_refusal_param.py +16 -0
  301. memos/types/openai_chat_completion_types/chat_completion_content_part_text_param.py +16 -0
  302. memos/types/openai_chat_completion_types/chat_completion_message_custom_tool_call_param.py +27 -0
  303. memos/types/openai_chat_completion_types/chat_completion_message_function_tool_call_param.py +32 -0
  304. memos/types/openai_chat_completion_types/chat_completion_message_param.py +18 -0
  305. memos/types/openai_chat_completion_types/chat_completion_message_tool_call_union_param.py +15 -0
  306. memos/types/openai_chat_completion_types/chat_completion_system_message_param.py +36 -0
  307. memos/types/openai_chat_completion_types/chat_completion_tool_message_param.py +30 -0
  308. memos/types/openai_chat_completion_types/chat_completion_user_message_param.py +34 -0
  309. memos/utils.py +123 -0
  310. memos/vec_dbs/__init__.py +0 -0
  311. memos/vec_dbs/base.py +117 -0
  312. memos/vec_dbs/factory.py +23 -0
  313. memos/vec_dbs/item.py +50 -0
  314. memos/vec_dbs/milvus.py +654 -0
  315. memos/vec_dbs/qdrant.py +355 -0
@@ -0,0 +1,132 @@
1
+ import copy
2
+
3
+ from memos.chunkers import ChunkerFactory
4
+ from memos.configs.chunker import ChunkerConfigFactory
5
+ from memos.configs.parser import ParserConfigFactory
6
+ from memos.parsers.factory import ParserFactory
7
+ from memos.types import MessageList
8
+
9
+
10
+ class Splitter:
11
+ """Splitter."""
12
+
13
+ def __init__(
14
+ self,
15
+ lookback_turns: int = 1,
16
+ chunk_size: int = 256,
17
+ chunk_overlap: int = 128,
18
+ min_sentences_per_chunk: int = 1,
19
+ tokenizer: str = "gpt2",
20
+ parser_backend: str = "markitdown",
21
+ chunker_backend: str = "sentence",
22
+ ):
23
+ """Initialize the splitter."""
24
+ self.lookback_turns = lookback_turns
25
+ self.chunk_size = chunk_size
26
+ self.chunk_overlap = chunk_overlap
27
+ self.min_sentences_per_chunk = min_sentences_per_chunk
28
+ self.tokenizer = tokenizer
29
+ self.chunker_backend = chunker_backend
30
+ self.parser_backend = parser_backend
31
+ # Initialize parser
32
+ parser_config = ParserConfigFactory.model_validate(
33
+ {
34
+ "backend": self.parser_backend,
35
+ "config": {},
36
+ }
37
+ )
38
+ self.parser = ParserFactory.from_config(parser_config)
39
+
40
+ # Initialize chunker
41
+ chunker_config = ChunkerConfigFactory.model_validate(
42
+ {
43
+ "backend": self.chunker_backend,
44
+ "config": {
45
+ "tokenizer_or_token_counter": self.tokenizer,
46
+ "chunk_size": self.chunk_size,
47
+ "chunk_overlap": self.chunk_overlap,
48
+ "min_sentences_per_chunk": self.min_sentences_per_chunk,
49
+ },
50
+ }
51
+ )
52
+ self.chunker = ChunkerFactory.from_config(chunker_config)
53
+
54
+ def _split_with_lookback(self, data: MessageList) -> list[MessageList]:
55
+ """Split the messages or files into chunks by looking back fixed number of turns.
56
+ adjacent chunk with high duplicate rate,
57
+ default lookback turns is 1, only current turn in chunk"""
58
+ # Build QA pairs from chat history
59
+ pairs = self.build_qa_pairs(data)
60
+ chunks = []
61
+
62
+ # Create chunks by looking back fixed number of turns
63
+ for i in range(len(pairs)):
64
+ # Calculate the start index for lookback
65
+ start_idx = max(0, i + 1 - self.lookback_turns)
66
+ # Get the chunk of pairs (as many as available, up to lookback_turns)
67
+ chunk_pairs = pairs[start_idx : i + 1]
68
+
69
+ # Flatten chunk_pairs (list[list[dict]]) to MessageList (list[dict])
70
+ chunk_messages = []
71
+ for pair in chunk_pairs:
72
+ chunk_messages.extend(pair)
73
+
74
+ chunks.append(chunk_messages)
75
+ return chunks
76
+
77
+ def _split_with_overlap(self, data: MessageList) -> list[MessageList]:
78
+ """split the messages or files into chunks with overlap.
79
+ adjacent chunk with low duplicate rate"""
80
+ chunks = []
81
+ chunk = []
82
+ for i, item in enumerate(data):
83
+ chunk.append(item)
84
+ # 5 turns (Q + A = 10) each chunk
85
+ if len(chunk) >= 10:
86
+ chunks.append(chunk)
87
+ # overlap 1 turns (Q + A = 2)
88
+ context = copy.deepcopy(chunk[-2:]) if i + 1 < len(data) else []
89
+ chunk = context
90
+ if chunk:
91
+ chunks.append(chunk)
92
+
93
+ return chunks
94
+
95
+ def split_chunks(self, data: MessageList | str, **kwargs) -> list[MessageList] | list[str]:
96
+ """Split the messages or files into chunks.
97
+
98
+ Args:
99
+ data: MessageList or string to split
100
+
101
+ Returns:
102
+ List of MessageList chunks or list of string chunks
103
+ """
104
+ if isinstance(data, list):
105
+ if kwargs.get("split_type") == "lookback":
106
+ chunks = self._split_with_lookback(data)
107
+ elif kwargs.get("split_type") == "overlap":
108
+ chunks = self._split_with_overlap(data)
109
+ return chunks
110
+ else:
111
+ # Parse and chunk the string data using pre-initialized components
112
+ text = self.parser.parse(data)
113
+ chunks = self.chunker.chunk(text)
114
+
115
+ return [chunk.text for chunk in chunks]
116
+
117
+ def build_qa_pairs(self, chat_history: MessageList) -> list[MessageList]:
118
+ """Build QA pairs from chat history."""
119
+ qa_pairs = []
120
+ current_qa_pair = []
121
+
122
+ for message in chat_history:
123
+ if message["role"] == "user":
124
+ current_qa_pair.append(message)
125
+ elif message["role"] == "assistant":
126
+ if not current_qa_pair:
127
+ continue
128
+ current_qa_pair.append(message)
129
+ qa_pairs.append(current_qa_pair.copy())
130
+ current_qa_pair = [] # reset
131
+
132
+ return qa_pairs
@@ -0,0 +1,93 @@
1
+ import json
2
+ import re
3
+
4
+ from memos.dependency import require_python_package
5
+ from memos.memories.textual.item import TextualMemoryItem
6
+ from memos.types import MessageList
7
+
8
+
9
+ def convert_messages_to_string(messages: MessageList) -> str:
10
+ """Convert a list of messages to a string."""
11
+ message_text = ""
12
+ for message in messages:
13
+ content = message.get("content", "")
14
+ content = (
15
+ content.strip()
16
+ if isinstance(content, str)
17
+ else json.dumps(content, ensure_ascii=False).strip()
18
+ )
19
+ if message["role"] == "system":
20
+ continue
21
+ if message["role"] == "user":
22
+ message_text += f"User: {content}\n" if content else ""
23
+ elif message["role"] == "assistant":
24
+ tool_calls = message.get("tool_calls", [])
25
+ tool_calls_str = (
26
+ f"[tool_calls]: {json.dumps(tool_calls, ensure_ascii=False)}" if tool_calls else ""
27
+ )
28
+ line_str = (
29
+ f"Assistant: {content} {tool_calls_str}".strip()
30
+ if content or tool_calls_str
31
+ else ""
32
+ )
33
+ message_text += f"{line_str}\n" if line_str else ""
34
+ elif message["role"] == "tool":
35
+ tool_call_id = message.get("tool_call_id", "")
36
+ line_str = (
37
+ f"Tool: {content} [tool_call_id]: {tool_call_id}".strip()
38
+ if tool_call_id
39
+ else f"Tool: {content}".strip()
40
+ )
41
+ message_text += f"{line_str}\n" if line_str else ""
42
+ return message_text.strip()
43
+
44
+
45
+ @require_python_package(
46
+ import_name="datasketch",
47
+ install_command="pip install datasketch",
48
+ install_link="https://github.com/ekzhu/datasketch",
49
+ )
50
+ def deduplicate_preferences(
51
+ prefs: list[TextualMemoryItem], similarity_threshold: float = 0.6, num_perm: int = 256
52
+ ) -> list[TextualMemoryItem]:
53
+ """
54
+ Deduplicate preference texts using MinHash algorithm.
55
+
56
+ Args:
57
+ prefs: List of preference memory items to deduplicate
58
+ similarity_threshold: Jaccard similarity threshold (0.0-1.0), default 0.8
59
+
60
+ Returns:
61
+ Deduplicated list of preference items
62
+ """
63
+ from datasketch import MinHash, MinHashLSH
64
+
65
+ if not prefs:
66
+ return prefs
67
+
68
+ # Use MinHashLSH for efficient similarity search
69
+ lsh = MinHashLSH(threshold=similarity_threshold, num_perm=num_perm)
70
+ unique_prefs = []
71
+
72
+ for i, pref in enumerate(prefs):
73
+ # Extract preference text
74
+ if hasattr(pref.metadata, "preference") and pref.metadata.preference:
75
+ text = pref.metadata.preference
76
+ else:
77
+ text = pref.memory
78
+
79
+ # Create MinHash from text tokens
80
+ minhash = MinHash(num_perm=num_perm)
81
+ # Simple tokenization: split by whitespace and clean
82
+ tokens = re.findall(r"\w+", text.lower())
83
+ for token in tokens:
84
+ minhash.update(token.encode("utf8"))
85
+
86
+ # Check for duplicates using LSH
87
+ similar_items = lsh.query(minhash)
88
+
89
+ if not similar_items: # No similar items found
90
+ lsh.insert(i, minhash)
91
+ unique_prefs.append(pref)
92
+
93
+ return unique_prefs
@@ -0,0 +1,344 @@
1
+ import json
2
+ import os
3
+
4
+ from datetime import datetime
5
+ from typing import Any
6
+
7
+ from memos.configs.memory import PreferenceTextMemoryConfig
8
+ from memos.embedders.factory import (
9
+ ArkEmbedder,
10
+ EmbedderFactory,
11
+ OllamaEmbedder,
12
+ SenTranEmbedder,
13
+ UniversalAPIEmbedder,
14
+ )
15
+ from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM
16
+ from memos.log import get_logger
17
+ from memos.memories.textual.base import BaseTextMemory
18
+ from memos.memories.textual.item import PreferenceTextualMemoryMetadata, TextualMemoryItem
19
+ from memos.memories.textual.prefer_text_memory.factory import (
20
+ AdderFactory,
21
+ ExtractorFactory,
22
+ RetrieverFactory,
23
+ )
24
+ from memos.reranker.factory import RerankerFactory
25
+ from memos.types import MessageList
26
+ from memos.vec_dbs.factory import MilvusVecDB, QdrantVecDB, VecDBFactory
27
+ from memos.vec_dbs.item import VecDBItem
28
+
29
+
30
+ logger = get_logger(__name__)
31
+
32
+
33
+ class PreferenceTextMemory(BaseTextMemory):
34
+ """Preference textual memory implementation for storing and retrieving memories."""
35
+
36
+ def __init__(self, config: PreferenceTextMemoryConfig):
37
+ """Initialize memory with the given configuration."""
38
+ self.config: PreferenceTextMemoryConfig = config
39
+ self.extractor_llm: OpenAILLM | OllamaLLM | AzureLLM = LLMFactory.from_config(
40
+ config.extractor_llm
41
+ )
42
+ self.vector_db: MilvusVecDB | QdrantVecDB = VecDBFactory.from_config(config.vector_db)
43
+ self.embedder: OllamaEmbedder | ArkEmbedder | SenTranEmbedder | UniversalAPIEmbedder = (
44
+ EmbedderFactory.from_config(config.embedder)
45
+ )
46
+ self.reranker = RerankerFactory.from_config(config.reranker)
47
+
48
+ self.extractor = ExtractorFactory.from_config(
49
+ config.extractor,
50
+ llm_provider=self.extractor_llm,
51
+ embedder=self.embedder,
52
+ vector_db=self.vector_db,
53
+ )
54
+
55
+ self.adder = AdderFactory.from_config(
56
+ config.adder,
57
+ llm_provider=self.extractor_llm,
58
+ embedder=self.embedder,
59
+ vector_db=self.vector_db,
60
+ )
61
+ self.retriever = RetrieverFactory.from_config(
62
+ config.retriever,
63
+ llm_provider=self.extractor_llm,
64
+ embedder=self.embedder,
65
+ reranker=self.reranker,
66
+ vector_db=self.vector_db,
67
+ )
68
+
69
+ def get_memory(
70
+ self, messages: list[MessageList], type: str, info: dict[str, Any]
71
+ ) -> list[TextualMemoryItem]:
72
+ """Get memory based on the messages.
73
+ Args:
74
+ messages (list[MessageList]): The messages to get memory from.
75
+ type (str): The type of memory to get.
76
+ info (dict[str, Any]): The info to get memory.
77
+ """
78
+ return self.extractor.extract(messages, type, info)
79
+
80
+ def search(
81
+ self, query: str, top_k: int, info=None, search_filter=None, **kwargs
82
+ ) -> list[TextualMemoryItem]:
83
+ """Search for memories based on a query.
84
+ Args:
85
+ query (str): The query to search for.
86
+ top_k (int): The number of top results to return.
87
+ info (dict): Leave a record of memory consumption.
88
+ Returns:
89
+ list[TextualMemoryItem]: List of matching memories.
90
+ """
91
+ if not isinstance(search_filter, dict):
92
+ search_filter = {}
93
+ search_filter.update({"status": "activated"})
94
+ logger.info(f"search_filter for preference memory: {search_filter}")
95
+ return self.retriever.retrieve(query, top_k, info, search_filter)
96
+
97
+ def load(self, dir: str) -> None:
98
+ """Load memories from the specified directory.
99
+ Args:
100
+ dir (str): The directory containing the memory files.
101
+ """
102
+ # For preference memory, we don't need to load from files
103
+ # as the data is stored in the vector database
104
+ try:
105
+ memory_file = os.path.join(dir, self.config.memory_filename)
106
+
107
+ if not os.path.exists(memory_file):
108
+ logger.warning(f"Memory file not found: {memory_file}")
109
+ return
110
+
111
+ with open(memory_file, encoding="utf-8") as f:
112
+ memories = json.load(f)
113
+ for collection_name, items in memories.items():
114
+ vec_db_items = [VecDBItem.from_dict(m) for m in items]
115
+ self.vector_db.add(collection_name, vec_db_items)
116
+ logger.info(f"Loaded {len(items)} memories from {collection_name} in {memory_file}")
117
+
118
+ except FileNotFoundError:
119
+ logger.error(f"Memory file not found in directory: {dir}")
120
+ except json.JSONDecodeError as e:
121
+ if e.pos == 0 and "Expecting value" in str(e):
122
+ logger.warning(f"Memory file is empty or contains only whitespace: {memory_file}")
123
+ else:
124
+ logger.error(f"Error decoding JSON from memory file: {e}")
125
+ except Exception as e:
126
+ logger.error(f"An error occurred while loading memories: {e}")
127
+
128
+ def dump(self, dir: str) -> None:
129
+ """Dump memories to the specified directory.
130
+ Args:
131
+ dir (str): The directory where the memory files will be saved.
132
+ """
133
+ # For preference memory, we don't need to dump to files
134
+ # as the data is stored in the vector database
135
+ try:
136
+ json_memories = {}
137
+ for collection_name in self.vector_db.config.collection_name:
138
+ items = self.vector_db.get_all(collection_name)
139
+ json_memories[collection_name] = [memory.to_dict() for memory in items]
140
+
141
+ os.makedirs(dir, exist_ok=True)
142
+ memory_file = os.path.join(dir, self.config.memory_filename)
143
+ with open(memory_file, "w", encoding="utf-8") as f:
144
+ json.dump(json_memories, f, indent=4, ensure_ascii=False)
145
+
146
+ logger.info(
147
+ f"Dumped {len(json_memories)} collections, {sum(len(items) for items in json_memories.values())} memories to {memory_file}"
148
+ )
149
+
150
+ except Exception as e:
151
+ logger.error(f"An error occurred while dumping memories: {e}")
152
+ raise
153
+
154
+ def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
155
+ """Extract memories based on the messages.
156
+ Args:
157
+ messages (MessageList): The messages to extract memories from.
158
+ Returns:
159
+ list[TextualMemoryItem]: List of extracted memory items.
160
+ """
161
+ raise NotImplementedError
162
+
163
+ def add(self, memories: list[TextualMemoryItem | dict[str, Any]]) -> list[str]:
164
+ """Add memories.
165
+
166
+ Args:
167
+ memories: List of TextualMemoryItem objects or dictionaries to add.
168
+ """
169
+ return self.adder.add(memories)
170
+
171
+ def update(self, memory_id: str, new_memory: TextualMemoryItem | dict[str, Any]) -> None:
172
+ """Update a memory by memory_id."""
173
+ raise NotImplementedError
174
+
175
+ def get(self, memory_id: str, user_name: str | None = None) -> TextualMemoryItem:
176
+ """Get a memory by its ID.
177
+ Args:
178
+ memory_id (str): The ID of the memory to retrieve.
179
+ Returns:
180
+ TextualMemoryItem: The memory with the given ID.
181
+ """
182
+ raise NotImplementedError
183
+
184
+ def get_with_collection_name(
185
+ self, collection_name: str, memory_id: str
186
+ ) -> TextualMemoryItem | None:
187
+ """Get a memory by its ID and collection name.
188
+ Args:
189
+ memory_id (str): The ID of the memory to retrieve.
190
+ collection_name (str): The name of the collection to retrieve the memory from.
191
+ Returns:
192
+ TextualMemoryItem: The memory with the given ID and collection name.
193
+ """
194
+ try:
195
+ res = self.vector_db.get_by_id(collection_name, memory_id)
196
+ if res is None:
197
+ return None
198
+ return TextualMemoryItem(
199
+ id=res.id,
200
+ memory=res.memory,
201
+ metadata=PreferenceTextualMemoryMetadata(**res.payload),
202
+ )
203
+ except Exception as e:
204
+ # Convert any other exception to ValueError for consistent error handling
205
+ raise ValueError(
206
+ f"Memory with ID {memory_id} not found in collection {collection_name}: {e}"
207
+ ) from e
208
+
209
+ def get_by_ids(self, memory_ids: list[str]) -> list[TextualMemoryItem]:
210
+ """Get memories by their IDs.
211
+ Args:
212
+ memory_ids (list[str]): List of memory IDs to retrieve.
213
+ Returns:
214
+ list[TextualMemoryItem]: List of memories with the specified IDs.
215
+ """
216
+ raise NotImplementedError
217
+
218
+ def get_by_ids_with_collection_name(
219
+ self, collection_name: str, memory_ids: list[str]
220
+ ) -> list[TextualMemoryItem]:
221
+ """Get memories by their IDs and collection name.
222
+ Args:
223
+ collection_name (str): The name of the collection to retrieve the memory from.
224
+ memory_ids (list[str]): List of memory IDs to retrieve.
225
+ Returns:
226
+ list[TextualMemoryItem]: List of memories with the specified IDs and collection name.
227
+ """
228
+ try:
229
+ res = self.vector_db.get_by_ids(collection_name, memory_ids)
230
+ if not res:
231
+ return []
232
+ return [
233
+ TextualMemoryItem(
234
+ id=memo.id,
235
+ memory=memo.memory,
236
+ metadata=PreferenceTextualMemoryMetadata(**memo.payload),
237
+ )
238
+ for memo in res
239
+ ]
240
+ except Exception as e:
241
+ # Convert any other exception to ValueError for consistent error handling
242
+ raise ValueError(
243
+ f"Memory with IDs {memory_ids} not found in collection {collection_name}: {e}"
244
+ ) from e
245
+
246
+ def get_all(self) -> list[TextualMemoryItem]:
247
+ """Get all memories.
248
+ Returns:
249
+ list[TextualMemoryItem]: List of all memories.
250
+ """
251
+ all_collections = ["explicit_preference", "implicit_preference"]
252
+ all_memories = {}
253
+ for collection_name in all_collections:
254
+ items = self.vector_db.get_all(collection_name)
255
+ all_memories[collection_name] = [
256
+ TextualMemoryItem(
257
+ id=memo.id,
258
+ memory=memo.memory,
259
+ metadata=PreferenceTextualMemoryMetadata(**memo.payload),
260
+ )
261
+ for memo in items
262
+ ]
263
+ return all_memories
264
+
265
+ def get_memory_by_filter(
266
+ self,
267
+ filter: dict[str, Any] | None = None,
268
+ page: int | None = None,
269
+ page_size: int | None = None,
270
+ ):
271
+ """Get memories by filter.
272
+ Args:
273
+ filter (dict[str, Any]): Filter criteria.
274
+ Returns:
275
+ list[TextualMemoryItem]: List of memories that match the filter.
276
+ """
277
+ collection_list = self.vector_db.config.collection_name
278
+
279
+ memories = []
280
+ for collection_name in collection_list:
281
+ db_items = self.vector_db.get_by_filter(collection_name=collection_name, filter=filter)
282
+ db_items_memory = [
283
+ TextualMemoryItem(
284
+ id=memo.id,
285
+ memory=memo.memory,
286
+ metadata=PreferenceTextualMemoryMetadata(**memo.payload),
287
+ )
288
+ for memo in db_items
289
+ ]
290
+ memories.extend(db_items_memory)
291
+
292
+ # sort
293
+ sorted_memories = sorted(
294
+ memories,
295
+ key=lambda item: datetime.fromisoformat(item.metadata.created_at),
296
+ reverse=True,
297
+ )
298
+ if page and page_size:
299
+ if page < 1:
300
+ page = 1
301
+ if page_size < 1:
302
+ page_size = 10
303
+ pick_memories = sorted_memories[(page - 1) * page_size : page * page_size]
304
+ return pick_memories, len(sorted_memories)
305
+
306
+ return sorted_memories, len(sorted_memories)
307
+
308
+ def delete(self, memory_ids: list[str]) -> None:
309
+ """Delete memories.
310
+ Args:
311
+ memory_ids (list[str]): List of memory IDs to delete.
312
+ """
313
+ collection_list = self.vector_db.config.collection_name
314
+ for collection_name in collection_list:
315
+ self.vector_db.delete(collection_name, memory_ids)
316
+
317
+ def delete_by_filter(self, filter: dict[str, Any]) -> None:
318
+ """Delete memories by filter.
319
+ Args:
320
+ filter (dict[str, Any]): Filter criteria.
321
+ """
322
+ collection_list = self.vector_db.config.collection_name
323
+ for collection_name in collection_list:
324
+ self.vector_db.delete_by_filter(collection_name=collection_name, filter=filter)
325
+
326
+ def delete_with_collection_name(self, collection_name: str, memory_ids: list[str]) -> None:
327
+ """Delete memories by their IDs and collection name.
328
+ Args:
329
+ collection_name (str): The name of the collection to delete the memory from.
330
+ memory_ids (list[str]): List of memory IDs to delete.
331
+ """
332
+ self.vector_db.delete(collection_name, memory_ids)
333
+
334
+ def delete_all(self) -> None:
335
+ """Delete all memories."""
336
+ for collection_name in self.vector_db.config.collection_name:
337
+ self.vector_db.delete_collection(collection_name)
338
+ self.vector_db.create_collection()
339
+
340
+ def drop(
341
+ self,
342
+ ) -> None:
343
+ """Drop all databases."""
344
+ raise NotImplementedError