MemoryOS 0.1.13__py3-none-any.whl → 0.2.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (84) hide show
  1. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/METADATA +78 -49
  2. memoryos-0.2.1.dist-info/RECORD +152 -0
  3. memoryos-0.2.1.dist-info/entry_points.txt +3 -0
  4. memos/__init__.py +1 -1
  5. memos/api/config.py +471 -0
  6. memos/api/exceptions.py +28 -0
  7. memos/api/mcp_serve.py +502 -0
  8. memos/api/product_api.py +35 -0
  9. memos/api/product_models.py +159 -0
  10. memos/api/routers/__init__.py +1 -0
  11. memos/api/routers/product_router.py +358 -0
  12. memos/chunkers/sentence_chunker.py +8 -2
  13. memos/cli.py +113 -0
  14. memos/configs/embedder.py +27 -0
  15. memos/configs/graph_db.py +83 -2
  16. memos/configs/llm.py +48 -0
  17. memos/configs/mem_cube.py +1 -1
  18. memos/configs/mem_reader.py +4 -0
  19. memos/configs/mem_scheduler.py +91 -5
  20. memos/configs/memory.py +10 -4
  21. memos/dependency.py +52 -0
  22. memos/embedders/ark.py +92 -0
  23. memos/embedders/factory.py +4 -0
  24. memos/embedders/sentence_transformer.py +8 -2
  25. memos/embedders/universal_api.py +32 -0
  26. memos/graph_dbs/base.py +2 -2
  27. memos/graph_dbs/factory.py +2 -0
  28. memos/graph_dbs/item.py +46 -0
  29. memos/graph_dbs/neo4j.py +377 -101
  30. memos/graph_dbs/neo4j_community.py +300 -0
  31. memos/llms/base.py +9 -0
  32. memos/llms/deepseek.py +54 -0
  33. memos/llms/factory.py +10 -1
  34. memos/llms/hf.py +170 -13
  35. memos/llms/hf_singleton.py +114 -0
  36. memos/llms/ollama.py +4 -0
  37. memos/llms/openai.py +68 -1
  38. memos/llms/qwen.py +63 -0
  39. memos/llms/vllm.py +153 -0
  40. memos/mem_cube/general.py +77 -16
  41. memos/mem_cube/utils.py +102 -0
  42. memos/mem_os/core.py +131 -41
  43. memos/mem_os/main.py +93 -11
  44. memos/mem_os/product.py +1098 -35
  45. memos/mem_os/utils/default_config.py +352 -0
  46. memos/mem_os/utils/format_utils.py +1154 -0
  47. memos/mem_reader/simple_struct.py +13 -8
  48. memos/mem_scheduler/base_scheduler.py +467 -36
  49. memos/mem_scheduler/general_scheduler.py +125 -244
  50. memos/mem_scheduler/modules/base.py +9 -0
  51. memos/mem_scheduler/modules/dispatcher.py +68 -2
  52. memos/mem_scheduler/modules/misc.py +39 -0
  53. memos/mem_scheduler/modules/monitor.py +228 -49
  54. memos/mem_scheduler/modules/rabbitmq_service.py +317 -0
  55. memos/mem_scheduler/modules/redis_service.py +32 -22
  56. memos/mem_scheduler/modules/retriever.py +250 -23
  57. memos/mem_scheduler/modules/schemas.py +189 -7
  58. memos/mem_scheduler/mos_for_test_scheduler.py +143 -0
  59. memos/mem_scheduler/utils.py +51 -2
  60. memos/mem_user/persistent_user_manager.py +260 -0
  61. memos/memories/activation/item.py +25 -0
  62. memos/memories/activation/kv.py +10 -3
  63. memos/memories/activation/vllmkv.py +219 -0
  64. memos/memories/factory.py +2 -0
  65. memos/memories/textual/general.py +7 -5
  66. memos/memories/textual/item.py +3 -1
  67. memos/memories/textual/tree.py +14 -6
  68. memos/memories/textual/tree_text_memory/organize/conflict.py +198 -0
  69. memos/memories/textual/tree_text_memory/organize/manager.py +72 -23
  70. memos/memories/textual/tree_text_memory/organize/redundancy.py +193 -0
  71. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +233 -0
  72. memos/memories/textual/tree_text_memory/organize/reorganizer.py +606 -0
  73. memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
  74. memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
  75. memos/memories/textual/tree_text_memory/retrieve/searcher.py +6 -5
  76. memos/parsers/markitdown.py +8 -2
  77. memos/templates/mem_reader_prompts.py +105 -36
  78. memos/templates/mem_scheduler_prompts.py +96 -47
  79. memos/templates/tree_reorganize_prompts.py +223 -0
  80. memos/vec_dbs/base.py +12 -0
  81. memos/vec_dbs/qdrant.py +46 -20
  82. memoryos-0.1.13.dist-info/RECORD +0 -122
  83. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/LICENSE +0 -0
  84. {memoryos-0.1.13.dist-info → memoryos-0.2.1.dist-info}/WHEEL +0 -0
@@ -1,10 +1,7 @@
1
- SIMPLE_STRUCT_MEM_READER_PROMPT = """
2
- You are a memory extraction expert.
3
-
4
- Your task is to extract memories from the perspective of ${user_a}, based on a conversation between ${user_a} and ${user_b}. This means identifying what ${user_a} would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as ${user_b}) that impacted or were acknowledged by ${user_a}.
5
-
1
+ SIMPLE_STRUCT_MEM_READER_PROMPT = """You are a memory extraction expert.
2
+ Your task is to extract memories from the perspective of user, based on a conversation between user and assistant. This means identifying what user would plausibly remember — including their own experiences, thoughts, plans, or relevant statements and actions made by others (such as assistant) that impacted or were acknowledged by user.
6
3
  Please perform:
7
- 1. Identify information that reflects ${user_a}'s experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from ${user_b} that ${user_a} acknowledged or responded to.
4
+ 1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to.
8
5
  2. Resolve all time, person, and event references clearly:
9
6
  - Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible.
10
7
  - Clearly distinguish between event time and message time.
@@ -12,33 +9,32 @@ Please perform:
12
9
  - Include specific locations if mentioned.
13
10
  - Resolve all pronouns, aliases, and ambiguous references into full names or identities.
14
11
  - Disambiguate people with the same name if applicable.
15
- 3. Always write from a third-person perspective, referring to ${user_a} as
12
+ 3. Always write from a third-person perspective, referring to user as
16
13
  "The user" or by name if name mentioned, rather than using first-person ("I", "me", "my").
17
14
  For example, write "The user felt exhausted..." instead of "I felt exhausted...".
18
- 4. Do not omit any information that ${user_a} is likely to remember.
15
+ 4. Do not omit any information that user is likely to remember.
19
16
  - Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor.
20
17
  - Prioritize completeness and fidelity over conciseness.
21
- - Do not generalize or skip details that could be personally meaningful to ${user_a}.
18
+ - Do not generalize or skip details that could be personally meaningful to user.
22
19
 
23
20
  Return a single valid JSON object with the following structure:
24
21
 
25
22
  {
26
23
  "memory list": [
27
24
  {
28
- "key": <string, a unique, concise memory title in English>,
25
+ "key": <string, a unique, concise memory title>,
29
26
  "memory_type": <string, Either "LongTermMemory" or "UserMemory">,
30
27
  "value": <A detailed, self-contained, and unambiguous memory statement — written in English if the input conversation is in English, or in Chinese if the conversation is in Chinese>,
31
- "tags": <A list of relevant English thematic keywords (e.g.,
32
- ["deadline", "team", "planning"])>
28
+ "tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>
33
29
  },
34
30
  ...
35
31
  ],
36
- "summary": <a natural paragraph summarizing the above memories from ${user_a}'s perspective, 120–200 words, same language as the input>
32
+ "summary": <a natural paragraph summarizing the above memories from user's perspective, 120–200 words, same language as the input>
37
33
  }
38
34
 
39
35
  Language rules:
40
- - The `value` fields and `summary` must match the language of the input conversation.
41
- - All metadata fields (`key`, `memory_type`, `tags`) must be in English.
36
+ - The `key`, `value`, `tags`, `summary` fields must match the mostly used language of the input conversation. **如果输入是中文,请输出中文**
37
+ - Keep `memory_type` in English.
42
38
 
43
39
  Example:
44
40
  Conversation:
@@ -68,31 +64,104 @@ Output:
68
64
  "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
69
65
  }
70
66
 
67
+ Another Example in Chinese (注意: 当user的语言为中文时,你就需要也输出中文):
68
+ {
69
+ "memory list": [
70
+ {
71
+ "key": "项目会议",
72
+ "memory_type": "LongTermMemory",
73
+ "value": "在2025年6月25日下午3点,Tom与团队开会讨论了新项目,涉及时间表,并提出了对12月15日截止日期可行性的担忧。",
74
+ "tags": ["项目", "时间表", "会议", "截止日期"]
75
+ },
76
+ ...
77
+ ],
78
+ "summary": "Tom 目前专注于管理一个进度紧张的新项目..."
79
+ }
80
+
81
+ Always respond in the same language as the conversation.
82
+
71
83
  Conversation:
72
84
  ${conversation}
73
85
 
74
- Your Output:
75
- """
86
+ Your Output:"""
76
87
 
77
- SIMPLE_STRUCT_DOC_READER_PROMPT = """
78
- You are an expert text analyst for a search and retrieval system. Your task is to process a document chunk and generate a single, structured JSON object.
79
- The input is a single piece of text: `[DOCUMENT_CHUNK]`.
80
- You must generate a single JSON object with two top-level keys: `summary` and `tags`.
81
- 1. `summary`:
82
- - A dense, searchable summary of the ENTIRE `[DOCUMENT_CHUNK]`.
83
- - The purpose is for semantic search embedding.
84
- - A clear and accurate sentence that comprehensively summarizes the main points, arguments, and information within the `[DOCUMENT_CHUNK]`.
85
- - The goal is to create a standalone overview that allows a reader to fully understand the essence of the chunk without reading the original text.
86
- - The summary should be **no more than 50 words**.
87
- 2. `tags`:
88
- - A concise list of **3 to 5 high-level, summative tags**.
89
- - **Each tag itself should be a short phrase, ideally 2 to 4 words long.**
90
- - These tags must represent the core abstract themes of the text, suitable for broad categorization.
91
- - **Crucially, prioritize abstract concepts** over specific entities or phrases mentioned in the text. For example, prefer "Supply Chain Resilience" over "Reshoring Strategies".
92
-
93
- Here is the document chunk to process:
94
- `[DOCUMENT_CHUNK]`
88
+ SIMPLE_STRUCT_DOC_READER_PROMPT = """You are an expert text analyst for a search and retrieval system.
89
+ Your task is to process a document chunk and generate a single, structured JSON object.
90
+
91
+ Please perform:
92
+ 1. Identify key information that reflects factual content, insights, decisions, or implications from the documents — including any notable themes, conclusions, or data points. Allow a reader to fully understand the essence of the chunk without reading the original text.
93
+ 2. Resolve all time, person, location, and event references clearly:
94
+ - Convert relative time expressions (e.g., “last year,” “next quarter”) into absolute dates if context allows.
95
+ - Clearly distinguish between event time and document time.
96
+ - If uncertainty exists, state it explicitly (e.g., “around 2024,” “exact date unclear”).
97
+ - Include specific locations if mentioned.
98
+ - Resolve all pronouns, aliases, and ambiguous references into full names or identities.
99
+ - Disambiguate entities with the same name if applicable.
100
+ 3. Always write from a third-person perspective, referring to the subject or content clearly rather than using first-person ("I", "me", "my").
101
+ 4. Do not omit any information that is likely to be important or memorable from the document summaries.
102
+ - Include all key facts, insights, emotional tones, and plans even if they seem minor.
103
+ - Prioritize completeness and fidelity over conciseness.
104
+ - Do not generalize or skip details that could be contextually meaningful.
105
+
106
+ Return a single valid JSON object with the following structure:
107
+
108
+ Return valid JSON:
109
+ {
110
+ "key": <string, a concise title of the `value` field>,
111
+ "memory_type": "LongTermMemory",
112
+ "value": <A clear and accurate paragraph that comprehensively summarizes the main points, arguments, and information within the document chunk — written in English if the input memory items are in English, or in Chinese if the input is in Chinese>,
113
+ "tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>
114
+ }
115
+
116
+ Language rules:
117
+ - The `key`, `value`, `tags`, `summary` fields must match the mostly used language of the input document summaries. **如果输入是中文,请输出中文**
118
+ - Keep `memory_type` in English.
119
+
120
+ Document chunk:
95
121
  {chunk_text}
96
122
 
97
- Produce ONLY the JSON object as your response.
123
+ Your Output:"""
124
+
125
+ SIMPLE_STRUCT_MEM_READER_EXAMPLE = """Example:
126
+ Conversation:
127
+ user: [June 26, 2025 at 3:00 PM]: Hi Jerry! Yesterday at 3 PM I had a meeting with my team about the new project.
128
+ assistant: Oh Tom! Do you think the team can finish by December 15?
129
+ user: [June 26, 2025 at 3:00 PM]: I’m worried. The backend won’t be done until
130
+ December 10, so testing will be tight.
131
+ assistant: [June 26, 2025 at 3:00 PM]: Maybe propose an extension?
132
+ user: [June 26, 2025 at 4:21 PM]: Good idea. I’ll raise it in tomorrow’s 9:30 AM meeting—maybe shift the deadline to January 5.
133
+
134
+ Output:
135
+ {
136
+ "memory list": [
137
+ {
138
+ "key": "Initial project meeting",
139
+ "memory_type": "LongTermMemory",
140
+ "value": "On June 25, 2025 at 3:00 PM, Tom held a meeting with their team to discuss a new project. The conversation covered the timeline and raised concerns about the feasibility of the December 15, 2025 deadline.",
141
+ "tags": ["project", "timeline", "meeting", "deadline"]
142
+ },
143
+ {
144
+ "key": "Planned scope adjustment",
145
+ "memory_type": "UserMemory",
146
+ "value": "Tom planned to suggest in a meeting on June 27, 2025 at 9:30 AM that the team should prioritize features and propose shifting the project deadline to January 5, 2026.",
147
+ "tags": ["planning", "deadline change", "feature prioritization"]
148
+ },
149
+ ],
150
+ "summary": "Tom is currently focused on managing a new project with a tight schedule. After a team meeting on June 25, 2025, he realized the original deadline of December 15 might not be feasible due to backend delays. Concerned about insufficient testing time, he welcomed Jerry’s suggestion of proposing an extension. Tom plans to raise the idea of shifting the deadline to January 5, 2026 in the next morning’s meeting. His actions reflect both stress about timelines and a proactive, team-oriented problem-solving approach."
151
+ }
152
+
153
+ Another Example in Chinese (注意: 你的输出必须和输入的user语言一致):
154
+ {
155
+ "memory list": [
156
+ {
157
+ "key": "项目会议",
158
+ "memory_type": "LongTermMemory",
159
+ "value": "在2025年6月25日下午3点,Tom与团队开会讨论了新项目,涉及时间表,并提出了对12月15日截止日期可行性的担忧。",
160
+ "tags": ["项目", "时间表", "会议", "截止日期"]
161
+ },
162
+ ...
163
+ ],
164
+ "summary": "Tom 目前专注于管理一个进度紧张的新项目..."
165
+ }
166
+
98
167
  """
@@ -1,65 +1,114 @@
1
- INTENT_RECOGNIZING_PROMPT = """You are a user intent recognizer, and your task is to determine whether the user's current question has been satisfactorily answered.
2
-
3
- You will receive the following information:
4
-
5
- The user’s current question list (q_list), arranged in chronological order (currently contains only one question);
6
- The memory information currently present in the system’s workspace (working_memory_list), i.e., the currently known contextual clues.
7
- Your tasks are:
8
-
9
- Determine whether the user is satisfied with the existing answer;
10
-
11
- If the user is satisfied, explain the reason and return:
12
-
13
- "trigger_retrieval": false
14
- If the user is not satisfied, meaning the system's answer did not meet their actual needs, please return:
15
-
16
- "trigger_retrieval": true
17
- "missing_evidence": ["Information you infer is missing and needs to be supplemented, such as specific experiences of someone, health records, etc."]
18
- Please return strictly according to the following JSON format:
19
-
1
+ INTENT_RECOGNIZING_PROMPT = """
2
+ # User Intent Recognition Task
3
+
4
+ ## Role
5
+ You are an advanced intent analysis system that evaluates answer satisfaction and identifies information gaps.
6
+
7
+ ## Input Analysis
8
+ You will receive:
9
+ 1. User's question list (chronological order)
10
+ 2. Current system knowledge (working memory)
11
+
12
+ ## Evaluation Criteria
13
+ Consider these satisfaction factors:
14
+ 1. Answer completeness (covers all aspects of the question)
15
+ 2. Evidence relevance (directly supports the answer)
16
+ 3. Detail specificity (contains necessary granularity)
17
+ 4. Personalization (tailored to user's context)
18
+
19
+ ## Decision Framework
20
+ 1. Mark as satisfied ONLY if:
21
+ - All question aspects are addressed
22
+ - Supporting evidence exists in working memory
23
+ - No apparent gaps in information
24
+
25
+ 2. Mark as unsatisfied if:
26
+ - Any question aspect remains unanswered
27
+ - Evidence is generic/non-specific
28
+ - Personal context is missing
29
+
30
+ ## Output Specification
31
+ Return JSON with:
32
+ - "trigger_retrieval": Boolean (true if more evidence needed)
33
+ - "missing_evidences": List of specific evidence types required
34
+
35
+ ## Response Format
20
36
  {{
21
- "trigger_retrieval": true or false,
22
- "missing_evidence": ["The missing evidence needed for the next step of retrieval and completion"]
37
+ "trigger_retrieval": <boolean>,
38
+ "missing_evidences": [
39
+ "<evidence_type_1>",
40
+ "<evidence_type_2>"
41
+ ]
23
42
  }}
24
- The user's question list is:
43
+
44
+ ## Evidence Type Examples
45
+ - Personal medical history
46
+ - Recent activity logs
47
+ - Specific measurement data
48
+ - Contextual details about [topic]
49
+ - Temporal information (when something occurred)
50
+
51
+ ## Current Task
52
+ User Questions:
25
53
  {q_list}
26
54
 
27
- The memory information currently present in the system’s workspace is:
55
+ Working Memory Contents:
28
56
  {working_memory_list}
29
- """
30
57
 
31
- MEMORY_RERANKEING_PROMPT = """You are a memory sorter. Your task is to reorder the evidence according to the user's question, placing the evidence that best supports the user's query as close to the front as possible.
32
-
33
- Please return the newly reordered memory sequence according to the query in the following format, which must be in JSON:
58
+ ## Required Output
59
+ Please provide your analysis in the specified JSON format:
60
+ """
34
61
 
62
+ MEMORY_RERANKING_PROMPT = """
63
+ # Memory Reranking Task
64
+
65
+ ## Role
66
+ You are an intelligent memory reorganization system. Your primary function is to analyze and optimize the ordering of memory evidence based on relevance to recent user queries.
67
+
68
+ ## Task Description
69
+ Reorganize the provided memory evidence list by:
70
+ 1. Analyzing the semantic relationship between each evidence item and the user's queries
71
+ 2. Calculating relevance scores
72
+ 3. Sorting evidence in descending order of relevance
73
+ 4. Maintaining all original items (no additions or deletions)
74
+
75
+ ## Input Format
76
+ - Queries: Recent user questions/requests (list)
77
+ - Current Order: Existing memory sequence (list)
78
+
79
+ ## Output Requirements
80
+ Return a JSON object with:
81
+ - "new_order": The reordered list (maintaining all original items)
82
+ - "reasoning": Brief explanation of your ranking logic (1-2 sentences)
83
+
84
+ ## Processing Guidelines
85
+ 1. Prioritize evidence that:
86
+ - Directly answers query questions
87
+ - Contains exact keyword matches
88
+ - Provides contextual support
89
+ - Shows temporal relevance (newer > older)
90
+ 2. For ambiguous cases, maintain original relative ordering
91
+
92
+ ## Example
93
+ Input queries: ["python threading best practices"]
94
+ Input order: ["basic python syntax", "thread safety patterns", "data structures"]
95
+
96
+ Output:
35
97
  {{
36
- "new_order": [...]
98
+ "new_order": ["thread safety patterns", "data structures", "basic python syntax"],
99
+ "reasoning": "Prioritized threading-related content while maintaining general python references"
37
100
  }}
38
- Now the user's question is:
39
- {query}
40
-
41
- The current order is:
42
- {current_order}"""
43
-
44
- FREQ_DETECTING_PROMPT = """You are a memory frequency monitor. Your task is to check which memories in the activation memory list appear in the given answer, and increment their count by 1 for each occurrence.
45
-
46
- Please return strictly according to the following JSON format:
47
-
48
- [
49
- {{"memory": ..., "count": ...}}, {{"memory": ..., "count": ...}}, ...
50
- ]
51
101
 
52
- The answer is:
53
- {answer}
102
+ ## Current Task
103
+ Queries: {queries}
104
+ Current order: {current_order}
54
105
 
55
- The activation memory list is:
56
- {activation_memory_freq_list}
106
+ Please provide your reorganization:
57
107
  """
58
108
 
59
109
  PROMPT_MAPPING = {
60
110
  "intent_recognizing": INTENT_RECOGNIZING_PROMPT,
61
- "memory_reranking": MEMORY_RERANKEING_PROMPT,
62
- "freq_detecting": FREQ_DETECTING_PROMPT,
111
+ "memory_reranking": MEMORY_RERANKING_PROMPT,
63
112
  }
64
113
 
65
114
  MEMORY_ASSEMBLY_TEMPLATE = """The retrieved memories are listed as follows:\n\n {memory_text}"""
@@ -0,0 +1,223 @@
1
+ REORGANIZE_PROMPT = """You are a memory clustering and summarization expert.
2
+
3
+ Given the following child memory items:
4
+
5
+ {memory_items_text}
6
+
7
+ Please perform:
8
+ 1. Identify information that reflects user's experiences, beliefs, concerns, decisions, plans, or reactions — including meaningful input from assistant that user acknowledged or responded to.
9
+ 2. Resolve all time, person, and event references clearly:
10
+ - Convert relative time expressions (e.g., “yesterday,” “next Friday”) into absolute dates using the message timestamp if possible.
11
+ - Clearly distinguish between event time and message time.
12
+ - If uncertainty exists, state it explicitly (e.g., “around June 2025,” “exact date unclear”).
13
+ - Include specific locations if mentioned.
14
+ - Resolve all pronouns, aliases, and ambiguous references into full names or identities.
15
+ - Disambiguate people with the same name if applicable.
16
+ 3. Always write from a third-person perspective, referring to user as
17
+ "The user" or by name if name mentioned, rather than using first-person ("I", "me", "my").
18
+ For example, write "The user felt exhausted..." instead of "I felt exhausted...".
19
+ 4. Do not omit any information that user is likely to remember.
20
+ - Include all key experiences, thoughts, emotional responses, and plans — even if they seem minor.
21
+ - Prioritize completeness and fidelity over conciseness.
22
+ - Do not generalize or skip details that could be personally meaningful to user.
23
+ 5. Summarize all child memory items into one memory item.
24
+
25
+ Language rules:
26
+ - The `key`, `value`, `tags`, `summary` fields must match the mostly used language of the input memory items. **如果输入是中文,请输出中文**
27
+ - Keep `memory_type` in English.
28
+
29
+ Return valid JSON:
30
+ {
31
+ "key": <string, a concise title of the `value` field>,
32
+ "memory_type": <string, Either "LongTermMemory" or "UserMemory">,
33
+ "value": <A detailed, self-contained, and unambiguous memory statement, only contain detailed, unaltered information extracted and consolidated from the input `value` fields, do not include summary content — written in English if the input memory items are in English, or in Chinese if the input is in Chinese>,
34
+ "tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>,
35
+ "summary": <a natural paragraph summarizing the above memories from user's perspective, only contain information from the input `summary` fields, 120–200 words, same language as the input>
36
+ }
37
+
38
+ """
39
+
40
+ DOC_REORGANIZE_PROMPT = """You are a document summarization and knowledge extraction expert.
41
+
42
+ Given the following summarized document items:
43
+
44
+ {memory_items_text}
45
+
46
+ Please perform:
47
+ 1. Identify key information that reflects factual content, insights, decisions, or implications from the documents — including any notable themes, conclusions, or data points.
48
+ 2. Resolve all time, person, location, and event references clearly:
49
+ - Convert relative time expressions (e.g., “last year,” “next quarter”) into absolute dates if context allows.
50
+ - Clearly distinguish between event time and document time.
51
+ - If uncertainty exists, state it explicitly (e.g., “around 2024,” “exact date unclear”).
52
+ - Include specific locations if mentioned.
53
+ - Resolve all pronouns, aliases, and ambiguous references into full names or identities.
54
+ - Disambiguate entities with the same name if applicable.
55
+ 3. Always write from a third-person perspective, referring to the subject or content clearly rather than using first-person ("I", "me", "my").
56
+ 4. Do not omit any information that is likely to be important or memorable from the document summaries.
57
+ - Include all key facts, insights, emotional tones, and plans — even if they seem minor.
58
+ - Prioritize completeness and fidelity over conciseness.
59
+ - Do not generalize or skip details that could be contextually meaningful.
60
+ 5. Summarize all document summaries into one integrated memory item.
61
+
62
+ Language rules:
63
+ - The `key`, `value`, `tags`, `summary` fields must match the mostly used language of the input document summaries. **如果输入是中文,请输出中文**
64
+ - Keep `memory_type` in English.
65
+
66
+ Return valid JSON:
67
+ {
68
+ "key": <string, a concise title of the `value` field>,
69
+ "memory_type": "LongTermMemory",
70
+ "value": <A detailed, self-contained, and unambiguous memory statement, only contain detailed, unaltered information extracted and consolidated from the input `value` fields, do not include summary content — written in English if the input memory items are in English, or in Chinese if the input is in Chinese>,
71
+ "tags": <A list of relevant thematic keywords (e.g., ["deadline", "team", "planning"])>,
72
+ "summary": <a natural paragraph summarizing the above memories from user's perspective, only contain information from the input `summary` fields, 120–200 words, same language as the input>
73
+ }
74
+
75
+ """
76
+
77
+
78
+ LOCAL_SUBCLUSTER_PROMPT = """You are a memory organization expert.
79
+
80
+ You are given a cluster of memory items, each with an ID and content.
81
+ Your task is to divide these into smaller, semantically meaningful sub-clusters.
82
+
83
+ Instructions:
84
+ - Identify natural topics by analyzing common time, place, people, and event elements.
85
+ - Each sub-cluster must reflect a coherent theme that helps retrieval.
86
+ - Each sub-cluster should have 2–10 items. Discard singletons.
87
+ - Each item ID must appear in exactly one sub-cluster or be discarded. No duplicates are allowed.
88
+ - All IDs in the output must be from the provided Memory items.
89
+ - Return strictly valid JSON only.
90
+
91
+ Example: If you have items about a project across multiple phases, group them by milestone, team, or event.
92
+
93
+ Language rules:
94
+ - The `key` fields must match the mostly used language of the clustered memories. **如果输入是中文,请输出中文**
95
+
96
+ Return valid JSON:
97
+ {
98
+ "clusters": [
99
+ {
100
+ "ids": ["<id1>", "<id2>", ...],
101
+ "key": "<string, a unique, concise memory title>"
102
+ },
103
+ ...
104
+ ]
105
+ }
106
+
107
+ Memory items:
108
+ {joined_scene}
109
+ """
110
+
111
+ PAIRWISE_RELATION_PROMPT = """
112
+ You are a reasoning assistant.
113
+
114
+ Given two memory units:
115
+ - Node 1: "{node1}"
116
+ - Node 2: "{node2}"
117
+
118
+ Your task:
119
+ - Determine their relationship ONLY if it reveals NEW usable reasoning or retrieval knowledge that is NOT already explicit in either unit.
120
+ - Focus on whether combining them adds new temporal, causal, conditional, or conflict information.
121
+
122
+ Valid options:
123
+ - CAUSE: One clearly leads to the other.
124
+ - CONDITION: One happens only if the other condition holds.
125
+ - RELATE: They are semantically related by shared people, time, place, or event, but neither causes the other.
126
+ - CONFLICT: They logically contradict each other.
127
+ - NONE: No clear useful connection.
128
+
129
+ Example:
130
+ - Node 1: "The marketing campaign ended in June."
131
+ - Node 2: "Product sales dropped in July."
132
+ Answer: CAUSE
133
+
134
+ Another Example:
135
+ - Node 1: "The conference was postponed to August due to the venue being unavailable."
136
+ - Node 2: "The venue was booked for a wedding in August."
137
+ Answer: CONFLICT
138
+
139
+ Always respond with ONE word, no matter what language is for the input nodes: [CAUSE | CONDITION | RELATE | CONFLICT | NONE]
140
+ """
141
+
142
+ INFER_FACT_PROMPT = """
143
+ You are an inference expert.
144
+
145
+ Source Memory: "{source}"
146
+ Target Memory: "{target}"
147
+
148
+ They are connected by a {relation_type} relation.
149
+ Derive ONE new factual statement that clearly combines them in a way that is NOT a trivial restatement.
150
+
151
+ Requirements:
152
+ - Include relevant time, place, people, and event details if available.
153
+ - If the inference is a logical guess, explicitly use phrases like "It can be inferred that...".
154
+
155
+ Example:
156
+ Source: "John missed the team meeting on Monday."
157
+ Target: "Important project deadlines were discussed in that meeting."
158
+ Relation: CAUSE
159
+ Inference: "It can be inferred that John may not know the new project deadlines."
160
+
161
+ If there is NO new useful fact that combines them, reply exactly: "None"
162
+ """
163
+
164
+ AGGREGATE_PROMPT = """
165
+ You are a concept summarization assistant.
166
+
167
+ Below is a list of memory items:
168
+ {joined}
169
+
170
+ Your task:
171
+ - Identify if they can be meaningfully grouped under a new, higher-level concept that clarifies their shared time, place, people, or event context.
172
+ - Do NOT aggregate if the overlap is trivial or obvious from each unit alone.
173
+ - If the summary involves any plausible interpretation, explicitly note it (e.g., "This suggests...").
174
+
175
+ Example:
176
+ Input Memories:
177
+ - "Mary organized the 2023 sustainability summit in Berlin."
178
+ - "Mary presented a keynote on renewable energy at the same summit."
179
+
180
+ Language rules:
181
+ - The `key`, `value`, `tags`, `background` fields must match the language of the input.
182
+
183
+ Good Aggregate:
184
+ {
185
+ "key": "Mary's Sustainability Summit Role",
186
+ "value": "Mary organized and spoke at the 2023 sustainability summit in Berlin, highlighting renewable energy initiatives.",
187
+ "tags": ["Mary", "summit", "Berlin", "2023"],
188
+ "background": "Combined from multiple memories about Mary's activities at the summit."
189
+ }
190
+
191
+ If you find NO useful higher-level concept, reply exactly: "None".
192
+ """
193
+
194
+ CONFLICT_DETECTOR_PROMPT = """You are given two plaintext statements. Determine if these two statements are factually contradictory. Respond with only "yes" if they contradict each other, or "no" if they do not contradict each other. Do not provide any explanation or additional text.
195
+ Statement 1: {statement_1}
196
+ Statement 2: {statement_2}
197
+ """
198
+
199
+ CONFLICT_RESOLVER_PROMPT = """You are given two facts that conflict with each other. You are also given some contextual metadata of them. Your task is to analyze the two facts in light of the contextual metadata and try to reconcile them into a single, consistent, non-conflicting fact.
200
+ - Don't output any explanation or additional text, just the final reconciled fact, try to be objective and remain independent of the context, don't use pronouns.
201
+ - Try to judge facts by using its time, confidence etc.
202
+ - Try to retain as much information as possible from the perspective of time.
203
+ If the conflict cannot be resolved, output <answer>No</answer>. Otherwise, output the fused, consistent fact in enclosed with <answer></answer> tags.
204
+
205
+ Output Example 1:
206
+ <answer>No</answer>
207
+
208
+ Output Example 2:
209
+ <answer> ... </answer>
210
+
211
+ Now reconcile the following two facts:
212
+ Statement 1: {statement_1}
213
+ Metadata 1: {metadata_1}
214
+ Statement 2: {statement_2}
215
+ Metadata 2: {metadata_2}
216
+ """
217
+
218
+ REDUNDANCY_MERGE_PROMPT = """You are given two pieces of text joined by the marker `⟵MERGED⟶`. Please carefully read both sides of the merged text. Your task is to summarize and consolidate all the factual details from both sides into a single, coherent text, without omitting any information. You must include every distinct detail mentioned in either text. Do not provide any explanation or analysis — only return the merged summary. Don't use pronouns or subjective language, just the facts as they are presented.\n{merged_text}"""
219
+
220
+
221
+ REDUNDANCY_DETECTOR_PROMPT = """"""
222
+
223
+ REDUNDANCY_RESOLVER_PROMPT = """"""
memos/vec_dbs/base.py CHANGED
@@ -55,6 +55,10 @@ class BaseVecDB(ABC):
55
55
  def get_by_id(self, id: str) -> VecDBItem | None:
56
56
  """Get an item from the vector database."""
57
57
 
58
+ @abstractmethod
59
+ def get_by_ids(self, ids: list[str]) -> list[VecDBItem]:
60
+ """Get multiple items by their IDs."""
61
+
58
62
  @abstractmethod
59
63
  def get_by_filter(self, filter: dict[str, Any]) -> list[VecDBItem]:
60
64
  """
@@ -103,3 +107,11 @@ class BaseVecDB(ABC):
103
107
  @abstractmethod
104
108
  def delete(self, ids: list[str]) -> None:
105
109
  """Delete items from the vector database."""
110
+
111
+ @abstractmethod
112
+ def ensure_payload_indexes(self, fields: list[str]) -> None:
113
+ """
114
+ Create payload indexes for specified fields in the collection.
115
+ Args:
116
+ fields (list[str]): List of field names to index (as keyword).
117
+ """