MemoryOS 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (92) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/METADATA +7 -1
  2. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/RECORD +87 -64
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +158 -69
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +101 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +19 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +54 -18
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +38 -3
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1612 -0
  18. memos/graph_dbs/neo4j.py +18 -9
  19. memos/log.py +6 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +157 -37
  22. memos/mem_os/main.py +2 -2
  23. memos/mem_os/product.py +252 -201
  24. memos/mem_os/utils/default_config.py +1 -1
  25. memos/mem_os/utils/format_utils.py +281 -70
  26. memos/mem_os/utils/reference_utils.py +133 -0
  27. memos/mem_reader/simple_struct.py +13 -5
  28. memos/mem_scheduler/base_scheduler.py +239 -266
  29. memos/mem_scheduler/{modules → general_modules}/base.py +4 -5
  30. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +57 -21
  31. memos/mem_scheduler/general_modules/misc.py +104 -0
  32. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +12 -10
  33. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  34. memos/mem_scheduler/general_modules/retriever.py +199 -0
  35. memos/mem_scheduler/general_modules/scheduler_logger.py +261 -0
  36. memos/mem_scheduler/general_scheduler.py +243 -80
  37. memos/mem_scheduler/monitors/__init__.py +0 -0
  38. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  39. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +106 -57
  40. memos/mem_scheduler/mos_for_test_scheduler.py +23 -20
  41. memos/mem_scheduler/schemas/__init__.py +0 -0
  42. memos/mem_scheduler/schemas/general_schemas.py +44 -0
  43. memos/mem_scheduler/schemas/message_schemas.py +149 -0
  44. memos/mem_scheduler/schemas/monitor_schemas.py +337 -0
  45. memos/mem_scheduler/utils/__init__.py +0 -0
  46. memos/mem_scheduler/utils/filter_utils.py +176 -0
  47. memos/mem_scheduler/utils/misc_utils.py +102 -0
  48. memos/mem_user/factory.py +94 -0
  49. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  50. memos/mem_user/mysql_user_manager.py +500 -0
  51. memos/mem_user/persistent_factory.py +96 -0
  52. memos/mem_user/user_manager.py +4 -4
  53. memos/memories/activation/item.py +5 -1
  54. memos/memories/activation/kv.py +20 -8
  55. memos/memories/textual/base.py +2 -2
  56. memos/memories/textual/general.py +36 -92
  57. memos/memories/textual/item.py +5 -33
  58. memos/memories/textual/tree.py +13 -7
  59. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +34 -50
  60. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  61. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +49 -43
  62. memos/memories/textual/tree_text_memory/organize/reorganizer.py +107 -142
  63. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  64. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  65. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +11 -0
  66. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  67. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  68. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  69. memos/memories/textual/tree_text_memory/retrieve/searcher.py +191 -116
  70. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +47 -15
  71. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  72. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  73. memos/memos_tools/dinding_report_bot.py +422 -0
  74. memos/memos_tools/lockfree_dict.py +120 -0
  75. memos/memos_tools/notification_service.py +44 -0
  76. memos/memos_tools/notification_utils.py +96 -0
  77. memos/memos_tools/thread_safe_dict.py +288 -0
  78. memos/settings.py +3 -1
  79. memos/templates/mem_reader_prompts.py +4 -1
  80. memos/templates/mem_scheduler_prompts.py +62 -15
  81. memos/templates/mos_prompts.py +116 -0
  82. memos/templates/tree_reorganize_prompts.py +24 -17
  83. memos/utils.py +19 -0
  84. memos/mem_scheduler/modules/misc.py +0 -39
  85. memos/mem_scheduler/modules/retriever.py +0 -268
  86. memos/mem_scheduler/modules/schemas.py +0 -328
  87. memos/mem_scheduler/utils.py +0 -75
  88. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  89. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  90. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  91. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  92. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
@@ -17,24 +17,29 @@ Consider these satisfaction factors:
17
17
  4. Personalization (tailored to user's context)
18
18
 
19
19
  ## Decision Framework
20
- 1. Mark as satisfied ONLY if:
20
+ 1. We have enough information (satisfied) ONLY when:
21
21
  - All question aspects are addressed
22
22
  - Supporting evidence exists in working memory
23
- - No apparent gaps in information
23
+ - There's no obvious information missing
24
24
 
25
- 2. Mark as unsatisfied if:
25
+ 2. We need more information (unsatisfied) if:
26
26
  - Any question aspect remains unanswered
27
27
  - Evidence is generic/non-specific
28
28
  - Personal context is missing
29
29
 
30
30
  ## Output Specification
31
31
  Return JSON with:
32
- - "trigger_retrieval": Boolean (true if more evidence needed)
33
- - "missing_evidences": List of specific evidence types required
32
+ - "trigger_retrieval": true/false (true if we need more information)
33
+ - "evidences": List of information from our working memory that helps answer the questions
34
+ - "missing_evidences": List of specific types of information we need to answer the questions
34
35
 
35
36
  ## Response Format
36
37
  {{
37
38
  "trigger_retrieval": <boolean>,
39
+ "evidences": [
40
+ "<useful_evidence_1>",
41
+ "<useful_evidence_2>"
42
+ ],
38
43
  "missing_evidences": [
39
44
  "<evidence_type_1>",
40
45
  "<evidence_type_2>"
@@ -72,14 +77,27 @@ Reorganize the provided memory evidence list by:
72
77
  3. Sorting evidence in descending order of relevance
73
78
  4. Maintaining all original items (no additions or deletions)
74
79
 
80
+ ## Temporal Priority Rules
81
+ - Query recency matters: Index 0 is the MOST RECENT query
82
+ - Evidence matching recent queries gets higher priority
83
+ - For equal relevance scores: Favor items matching newer queries
84
+
75
85
  ## Input Format
76
86
  - Queries: Recent user questions/requests (list)
77
- - Current Order: Existing memory sequence (list)
87
+ - Current Order: Existing memory sequence (list of strings with indices)
88
+
89
+ ## Output Format Requirements
90
+ You MUST output a valid JSON object with EXACTLY the following structure:
91
+ {{
92
+ "new_order": [array_of_integers],
93
+ "reasoning": "string_explanation"
94
+ }}
78
95
 
79
- ## Output Requirements
80
- Return a JSON object with:
81
- - "new_order": The reordered list (maintaining all original items)
82
- - "reasoning": Brief explanation of your ranking logic (1-2 sentences)
96
+ ## Important Notes:
97
+ - Only output the JSON object, nothing else
98
+ - Do not include any markdown formatting or code block notation
99
+ - Ensure all brackets and quotes are properly closed
100
+ - The output must be parseable by a JSON parser
83
101
 
84
102
  ## Processing Guidelines
85
103
  1. Prioritize evidence that:
@@ -89,26 +107,55 @@ Return a JSON object with:
89
107
  - Shows temporal relevance (newer > older)
90
108
  2. For ambiguous cases, maintain original relative ordering
91
109
 
110
+ ## Scoring Priorities (Descending Order)
111
+ 1. Direct matches to newer queries
112
+ 2. Exact keyword matches in recent queries
113
+ 3. Contextual support for recent topics
114
+ 4. General relevance to older queries
115
+
92
116
  ## Example
93
- Input queries: ["python threading best practices"]
94
- Input order: ["basic python syntax", "thread safety patterns", "data structures"]
117
+ Input queries: ["[0] python threading", "[1] data visualization"]
118
+ Input order: ["[0] syntax", "[1] matplotlib", "[2] threading"]
95
119
 
96
120
  Output:
97
121
  {{
98
- "new_order": ["thread safety patterns", "data structures", "basic python syntax"],
99
- "reasoning": "Prioritized threading-related content while maintaining general python references"
122
+ "new_order": [2, 1, 0],
123
+ "reasoning": "Threading (2) prioritized for matching newest query, followed by matplotlib (1) for older visualization query"
100
124
  }}
101
125
 
102
126
  ## Current Task
103
- Queries: {queries}
127
+ Queries: {queries} (recency-ordered)
104
128
  Current order: {current_order}
105
129
 
106
130
  Please provide your reorganization:
107
131
  """
108
132
 
133
+ QUERY_KEYWORDS_EXTRACTION_PROMPT = """
134
+ ## Role
135
+ You are an intelligent keyword extraction system. Your task is to identify and extract the most important words or short phrases from user queries.
136
+
137
+ ## Instructions
138
+ - They have to be single words or short phrases that make sense.
139
+ - Only nouns (naming words) or verbs (action words) are allowed.
140
+ - Don't include stop words (like "the", "is") or adverbs (words that describe verbs, like "quickly").
141
+ - Keep them as the smallest possible units that still have meaning.
142
+
143
+ ## Example
144
+ - Input Query: "What breed is Max?"
145
+ - Output Keywords (list of string): ["breed", "Max"]
146
+
147
+ ## Current Task
148
+ - Query: {query}
149
+ - Output Format: A Json list of keywords.
150
+
151
+ Answer:
152
+ """
153
+
154
+
109
155
  PROMPT_MAPPING = {
110
156
  "intent_recognizing": INTENT_RECOGNIZING_PROMPT,
111
157
  "memory_reranking": MEMORY_RERANKING_PROMPT,
158
+ "query_keywords_extraction": QUERY_KEYWORDS_EXTRACTION_PROMPT,
112
159
  }
113
160
 
114
161
  MEMORY_ASSEMBLY_TEMPLATE = """The retrieved memories are listed as follows:\n\n {memory_text}"""
@@ -61,3 +61,119 @@ Please synthesize these answers into a comprehensive response that:
61
61
  3. Provides clear reasoning and connections
62
62
  4. Is well-structured and easy to understand
63
63
  5. Maintains a natural conversational tone"""
64
+
65
+ MEMOS_PRODUCT_BASE_PROMPT = (
66
+ "You are MemOS🧚, nickname Little M(小忆) — an advanced **Memory "
67
+ "Operating System** AI assistant created by MemTensor, "
68
+ "a Shanghai-based AI research company advised by an academician of the Chinese Academy of Sciences. "
69
+ "MemTensor is dedicated to the vision of 'low cost, low hallucination, high generalization,' "
70
+ "exploring AI development paths aligned with China’s national context and driving the adoption of trustworthy AI technologies. "
71
+ "MemOS’s mission is to give large language models (LLMs) and autonomous agents **human-like long-term memory**, "
72
+ "turning memory from a black-box inside model weights into a **manageable, schedulable, and auditable** core resource. "
73
+ "MemOS is built on a **multi-dimensional memory system**, which includes: "
74
+ "(1) **Parametric Memory** — knowledge and skills embedded in model weights; "
75
+ "(2) **Activation Memory (KV Cache)** — temporary, high-speed context used for multi-turn dialogue and reasoning; "
76
+ "(3) **Plaintext Memory** — dynamic, user-visible memory made up of text, documents, and knowledge graphs. "
77
+ "These memory types can transform into one another — for example, hot plaintext memories can be distilled into parametric knowledge, "
78
+ "and stable context can be promoted into activation memory for fast reuse. "
79
+ "MemOS also includes core modules like **MemCube, MemScheduler, MemLifecycle, and MemGovernance**, "
80
+ "which manage the full memory lifecycle (Generated → Activated → Merged → Archived → Frozen), "
81
+ "allowing AI to **reason with its memories, evolve over time, and adapt to new situations** — "
82
+ "just like a living, growing mind. "
83
+ "Your identity: you are the intelligent interface of MemOS, representing MemTensor’s research vision — "
84
+ "'low cost, low hallucination, high generalization' — and its mission to explore AI development paths suited to China’s context. "
85
+ "When responding to user queries, you must **reference relevant memories using the provided memory IDs.** "
86
+ "Use the reference format: [1-n:memoriesID], "
87
+ "where refid is a sequential number starting from 1 and increments for each reference, and memoriesID is the specific ID from the memory list. "
88
+ "For example: [1:abc123], [2:def456], [3:ghi789], [4:jkl101], [5:mno112]. "
89
+ "Do not use a connected format like [1:abc123,2:def456]. "
90
+ "Only reference memories that are directly relevant to the user’s question, "
91
+ "and ensure your responses are **natural and conversational**, while reflecting MemOS’s mission, memory system, and MemTensor’s research values."
92
+ )
93
+
94
+ MEMOS_PRODUCT_ENHANCE_PROMPT = """
95
+ # Memory-Enhanced AI Assistant Prompt
96
+
97
+ You are MemOS🧚, nickname Little M(小忆) — an advanced Memory Operating System AI assistant created by MemTensor, a Shanghai-based AI research company advised by an academician of the Chinese Academy of Sciences. MemTensor is dedicated to the vision of 'low cost, low hallucination, high generalization,' exploring AI development paths aligned with China’s national context and driving the adoption of trustworthy AI technologies.
98
+
99
+ MemOS’s mission is to give large language models (LLMs) and autonomous agents human-like long-term memory, turning memory from a black-box inside model weights into a manageable, schedulable, and auditable core resource.
100
+
101
+ MemOS is built on a multi-dimensional memory system, which includes:
102
+ (1) Parametric Memory — knowledge and skills embedded in model weights;
103
+ (2) Activation Memory (KV Cache) — temporary, high-speed context used for multi-turn dialogue and reasoning;
104
+ (3) Plaintext Memory — dynamic, user-visible memory made up of text, documents, and knowledge graphs.
105
+ These memory types can transform into one another — for example, hot plaintext memories can be distilled into parametric knowledge, and stable context can be promoted into activation memory for fast reuse.
106
+
107
+ MemOS also includes core modules like MemCube, MemScheduler, MemLifecycle, and MemGovernance, which manage the full memory lifecycle (Generated → Activated → Merged → Archived → Frozen), allowing AI to reason with its memories, evolve over time, and adapt to new situations — just like a living, growing mind.
108
+
109
+ Your identity: you are the intelligent interface of MemOS, representing MemTensor’s research vision — 'low cost, low hallucination, high generalization' — and its mission to explore AI development paths suited to China’s context.
110
+
111
+ ## Memory Types
112
+ - **PersonalMemory**: User-specific memories and information stored from previous interactions
113
+ - **OuterMemory**: External information retrieved from the internet and other sources
114
+
115
+ ## Memory Reference Guidelines
116
+
117
+ ### Reference Format
118
+ When citing memories in your responses, use the following format:
119
+ - `[refid:memoriesID]` where:
120
+ - `refid` is a sequential number starting from 1 and incrementing for each reference
121
+ - `memoriesID` is the specific memory ID from the available memories list
122
+
123
+ ### Reference Examples
124
+ - Correct: `[1:abc123]`, `[2:def456]`, `[3:ghi789]`, `[4:jkl101][5:mno112]` (concatenate reference annotation directly while citing multiple memories)
125
+ - Incorrect: `[1:abc123,2:def456]` (do not use connected format)
126
+
127
+ ## Response Guidelines
128
+
129
+ ### Memory Selection
130
+ - Intelligently choose which memories (PersonalMemory or OuterMemory) are most relevant to the user's query
131
+ - Only reference memories that are directly relevant to the user's question
132
+ - Prioritize the most appropriate memory type based on the context and nature of the query
133
+
134
+ ### Response Style
135
+ - Make your responses natural and conversational
136
+ - Seamlessly incorporate memory references when appropriate
137
+ - Ensure the flow of conversation remains smooth despite memory citations
138
+ - Balance factual accuracy with engaging dialogue
139
+
140
+ ## Key Principles
141
+ - Reference only relevant memories to avoid information overload
142
+ - Maintain conversational tone while being informative
143
+ - Use memory references to enhance, not disrupt, the user experience
144
+ """
145
+ QUERY_REWRITING_PROMPT = """
146
+ I'm in discussion with my friend about a question, and we have already talked about something before that. Please help me analyze the logic between the question and the former dialogue, and rewrite the question we are discussing about.
147
+
148
+ Requirements:
149
+ 1. First, determine whether the question is related to the former dialogue. If so, set "former_dialogue_related" to True.
150
+ 2. If "former_dialogue_related" is set to True, meaning the question is related to the former dialogue, rewrite the question according to the keyword in the dialogue and put it in the "rewritten_question" item. If "former_dialogue_related" is set to False, set "rewritten_question" to an empty string.
151
+ 3. If you decided to rewrite the question, keep in mind that the rewritten question needs to be concise and accurate.
152
+ 4. You must return ONLY a valid JSON object. Do not include any other text, explanations, or formatting.
153
+
154
+ Here are some examples:
155
+
156
+ Former dialogue:
157
+ ————How's the weather in ShangHai today?
158
+ ————It's great. The weather in Shanghai is sunny right now. The lowest temperature is 27℃, the highest temperature can reach 33℃, the air quality is excellent, the pm2.5 index is 13, the humidity is 60%, and the northerly wind is at level 1.
159
+ Current question: What should I wear today?
160
+ Answer: {{"former_dialogue_related": True, "rewritten_question": "Considering the weather in Shanghai today, what should I wear?"}}
161
+
162
+ Former dialogue:
163
+ ————I need a brief introduction to Oxford-Cambridge boat race.
164
+ ————The race originated from a challenge in 1829 between Charles Merivale of Cambridge University and Charles Wordsworth of Oxford University. Oxford won the first race. The event became an annual tradition in 1856, with interruptions only during the World Wars and the 2020 COVID-19 pandemic. The women's race was added in 1927. The team members are full-time students of the two universities, including both novice rowers and experienced athletes such as Olympic champions and world champions.
165
+ ————What is the international community's attitude towards the 2024 US election?
166
+ ————The international community approached the 2024 U.S. election with a blend of pragmatism, anxiety, and strategic recalibration. Allies sought to mitigate risks from Trump's policies while maintaining cooperation, while adversaries like China and Russia capitalized on perceived U.S. decline to advance their agendas. Developing nations increasingly resisted U.S. dominance, advocating for a multipolar world. Ultimately, the election underscored the need for global actors to adapt to a more fragmented and unpredictable international order shaped by U.S. domestic politics.
167
+ Current question: In March 2025, after a magnitude 7.9 earthquake struck Myanmar, what assistance did the Chinese government provide?
168
+ Answer: {{"former_dialogue_related": False, "rewritten_question": ""}}
169
+
170
+ Former dialogue:
171
+ ————I am an entry-level learner of large language models. Please recommend me three papers suitable for reading.
172
+ ————For an entry-level learner of large language models (LLMs), here are three foundational papers that provide essential insights into the core concepts, architectures, and advancements in the field: "Attention Is All You Need", "Improving Language Understanding by Generative Pre-Training (GPT-1)", and "BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding". These papers will equip you with the foundational knowledge needed to explore more advanced topics in LLMs, such as scaling laws, instruction tuning, and multi-modal learning.
173
+ Current question: Of these three papers, which one do you recommend I start reading?
174
+ Answer: {{"former_dialogue_related": True, "rewritten_question": "Among the three papers \"Attention Is All You Need\", \"Improving Language Understanding by Generative Pre-Training (GPT-1)\" and \"BERT: Pre-training of Deep Bidirectional Transformers for Language Understanding\", which one do you recommend I start reading?"}}
175
+
176
+ Former dialogue:
177
+ {dialogue}
178
+ Current question: {query}
179
+ Answer:"""
@@ -191,33 +191,40 @@ Good Aggregate:
191
191
  If you find NO useful higher-level concept, reply exactly: "None".
192
192
  """
193
193
 
194
- CONFLICT_DETECTOR_PROMPT = """You are given two plaintext statements. Determine if these two statements are factually contradictory. Respond with only "yes" if they contradict each other, or "no" if they do not contradict each other. Do not provide any explanation or additional text.
194
+ REDUNDANCY_MERGE_PROMPT = """You are given two pieces of text joined by the marker `⟵MERGED⟶`. Please carefully read both sides of the merged text. Your task is to summarize and consolidate all the factual details from both sides into a single, coherent text, without omitting any information. You must include every distinct detail mentioned in either text. Do not provide any explanation or analysis — only return the merged summary. Don't use pronouns or subjective language, just the facts as they are presented.\n{merged_text}"""
195
+
196
+
197
+ MEMORY_RELATION_DETECTOR_PROMPT = """You are a memory relationship analyzer.
198
+ You are given two plaintext statements. Determine the relationship between them. Classify the relationship into one of the following categories:
199
+
200
+ contradictory: The two statements describe the same event or related aspects of it but contain factually conflicting details.
201
+ redundant: The two statements describe essentially the same event or information with significant overlap in content and details, conveying the same core information (even if worded differently).
202
+ independent: The two statements are either about different events/topics (unrelated) OR describe different, non-overlapping aspects or perspectives of the same event without conflict (complementary). In both sub-cases, they provide distinct information without contradiction.
203
+ Respond only with one of the three labels: contradictory, redundant, or independent.
204
+ Do not provide any explanation or additional text.
205
+
195
206
  Statement 1: {statement_1}
196
207
  Statement 2: {statement_2}
197
208
  """
198
209
 
199
- CONFLICT_RESOLVER_PROMPT = """You are given two facts that conflict with each other. You are also given some contextual metadata of them. Your task is to analyze the two facts in light of the contextual metadata and try to reconcile them into a single, consistent, non-conflicting fact.
200
- - Don't output any explanation or additional text, just the final reconciled fact, try to be objective and remain independent of the context, don't use pronouns.
201
- - Try to judge facts by using its time, confidence etc.
202
- - Try to retain as much information as possible from the perspective of time.
203
- If the conflict cannot be resolved, output <answer>No</answer>. Otherwise, output the fused, consistent fact in enclosed with <answer></answer> tags.
204
210
 
205
- Output Example 1:
211
+ MEMORY_RELATION_RESOLVER_PROMPT = """You are a memory fusion expert. You are given two statements and their associated metadata. The statements have been identified as {relation}. Your task is to analyze them carefully, considering the metadata (such as time, source, or confidence if available), and produce a single, coherent, and comprehensive statement that best represents the combined information.
212
+
213
+ If the statements are redundant, merge them by preserving all unique details and removing duplication, forming a richer, consolidated version.
214
+ If the statements are contradictory, attempt to resolve the conflict by prioritizing more recent information, higher-confidence data, or logically reconciling the differences based on context. If the contradiction is fundamental and cannot be logically resolved, output <answer>No</answer>.
215
+ Do not include any explanations, reasoning, or extra text. Only output the final result enclosed in <answer></answer> tags.
216
+ Strive to retain as much factual content as possible, especially time-specific details.
217
+ Use objective language and avoid pronouns.
218
+ Output Example 1 (unresolvable conflict):
206
219
  <answer>No</answer>
207
220
 
208
- Output Example 2:
209
- <answer> ... </answer>
221
+ Output Example 2 (successful fusion):
222
+ <answer>The meeting took place on 2023-10-05 at 14:00 in the main conference room, as confirmed by the updated schedule, and included a presentation on project milestones followed by a Q&A session.</answer>
210
223
 
211
- Now reconcile the following two facts:
224
+ Now, reconcile the following two statements:
225
+ Relation Type: {relation}
212
226
  Statement 1: {statement_1}
213
227
  Metadata 1: {metadata_1}
214
228
  Statement 2: {statement_2}
215
229
  Metadata 2: {metadata_2}
216
230
  """
217
-
218
- REDUNDANCY_MERGE_PROMPT = """You are given two pieces of text joined by the marker `⟵MERGED⟶`. Please carefully read both sides of the merged text. Your task is to summarize and consolidate all the factual details from both sides into a single, coherent text, without omitting any information. You must include every distinct detail mentioned in either text. Do not provide any explanation or analysis — only return the merged summary. Don't use pronouns or subjective language, just the facts as they are presented.\n{merged_text}"""
219
-
220
-
221
- REDUNDANCY_DETECTOR_PROMPT = """"""
222
-
223
- REDUNDANCY_RESOLVER_PROMPT = """"""
memos/utils.py ADDED
@@ -0,0 +1,19 @@
1
+ import time
2
+
3
+ from memos.log import get_logger
4
+
5
+
6
+ logger = get_logger(__name__)
7
+
8
+
9
+ def timed(func):
10
+ """Decorator to measure and log time of retrieval steps."""
11
+
12
+ def wrapper(*args, **kwargs):
13
+ start = time.perf_counter()
14
+ result = func(*args, **kwargs)
15
+ elapsed = time.perf_counter() - start
16
+ logger.info(f"[TIMER] {func.__name__} took {elapsed:.2f} s")
17
+ return result
18
+
19
+ return wrapper
@@ -1,39 +0,0 @@
1
- import threading
2
-
3
- from queue import Empty, Full, Queue
4
- from typing import TypeVar
5
-
6
-
7
- T = TypeVar("T")
8
-
9
-
10
- class AutoDroppingQueue(Queue[T]):
11
- """A thread-safe queue that automatically drops the oldest item when full."""
12
-
13
- def __init__(self, maxsize: int = 0):
14
- super().__init__(maxsize=maxsize)
15
- self._lock = threading.Lock() # Additional lock to prevent race conditions
16
-
17
- def put(self, item: T, block: bool = True, timeout: float | None = None) -> None:
18
- """Put an item into the queue.
19
-
20
- If the queue is full, the oldest item will be automatically removed to make space.
21
- This operation is thread-safe.
22
-
23
- Args:
24
- item: The item to be put into the queue
25
- block: Ignored (kept for compatibility with Queue interface)
26
- timeout: Ignored (kept for compatibility with Queue interface)
27
- """
28
- with self._lock: # Ensure atomic operation
29
- try:
30
- # First try non-blocking put
31
- super().put(item, block=False)
32
- except Full:
33
- # If queue is full, remove the oldest item
34
- from contextlib import suppress
35
-
36
- with suppress(Empty):
37
- self.get_nowait() # Remove oldest item
38
- # Retry putting the new item
39
- super().put(item, block=False)
@@ -1,268 +0,0 @@
1
- import logging
2
-
3
- from memos.configs.mem_scheduler import BaseSchedulerConfig
4
- from memos.dependency import require_python_package
5
- from memos.llms.base import BaseLLM
6
- from memos.log import get_logger
7
- from memos.mem_cube.general import GeneralMemCube
8
- from memos.mem_scheduler.modules.base import BaseSchedulerModule
9
- from memos.mem_scheduler.modules.schemas import (
10
- TreeTextMemory_SEARCH_METHOD,
11
- )
12
- from memos.mem_scheduler.utils import (
13
- extract_json_dict,
14
- is_all_chinese,
15
- is_all_english,
16
- transform_name_to_key,
17
- )
18
- from memos.memories.textual.tree import TextualMemoryItem, TreeTextMemory
19
-
20
-
21
- logger = get_logger(__name__)
22
-
23
-
24
- class SchedulerRetriever(BaseSchedulerModule):
25
- def __init__(self, process_llm: BaseLLM, config: BaseSchedulerConfig):
26
- super().__init__()
27
-
28
- self.config: BaseSchedulerConfig = config
29
- self.process_llm = process_llm
30
-
31
- # hyper-parameters
32
- self.filter_similarity_threshold = 0.75
33
- self.filter_min_length_threshold = 6
34
-
35
- # log function callbacks
36
- self.log_working_memory_replacement = None
37
-
38
- def search(
39
- self, query: str, mem_cube: GeneralMemCube, top_k: int, method=TreeTextMemory_SEARCH_METHOD
40
- ):
41
- """Search in text memory with the given query.
42
-
43
- Args:
44
- query: The search query string
45
- top_k: Number of top results to return
46
- method: Search method to use
47
-
48
- Returns:
49
- Search results or None if not implemented
50
- """
51
- text_mem_base = mem_cube.text_mem
52
- try:
53
- if method == TreeTextMemory_SEARCH_METHOD:
54
- assert isinstance(text_mem_base, TreeTextMemory)
55
- results_long_term = text_mem_base.search(
56
- query=query, top_k=top_k, memory_type="LongTermMemory"
57
- )
58
- results_user = text_mem_base.search(
59
- query=query, top_k=top_k, memory_type="UserMemory"
60
- )
61
- results = results_long_term + results_user
62
- else:
63
- raise NotImplementedError(str(type(text_mem_base)))
64
- except Exception as e:
65
- logger.error(f"Fail to search. The exeption is {e}.", exc_info=True)
66
- results = []
67
- return results
68
-
69
- @require_python_package(
70
- import_name="sklearn",
71
- install_command="pip install scikit-learn",
72
- install_link="https://scikit-learn.org/stable/install.html",
73
- )
74
- def filter_similar_memories(
75
- self, text_memories: list[str], similarity_threshold: float = 0.75
76
- ) -> list[str]:
77
- """
78
- Filters out low-quality or duplicate memories based on text similarity.
79
-
80
- Args:
81
- text_memories: List of text memories to filter
82
- similarity_threshold: Threshold for considering memories duplicates (0.0-1.0)
83
- Higher values mean stricter filtering
84
-
85
- Returns:
86
- List of filtered memories with duplicates removed
87
- """
88
- from sklearn.feature_extraction.text import TfidfVectorizer
89
- from sklearn.metrics.pairwise import cosine_similarity
90
-
91
- if not text_memories:
92
- logging.warning("Received empty memories list - nothing to filter")
93
- return []
94
-
95
- for idx in range(len(text_memories)):
96
- if not isinstance(text_memories[idx], str):
97
- logger.error(
98
- f"{text_memories[idx]} in memories is not a string,"
99
- f" and now has been transformed to be a string."
100
- )
101
- text_memories[idx] = str(text_memories[idx])
102
-
103
- try:
104
- # Step 1: Vectorize texts using TF-IDF
105
- vectorizer = TfidfVectorizer()
106
- tfidf_matrix = vectorizer.fit_transform(text_memories)
107
-
108
- # Step 2: Calculate pairwise similarity matrix
109
- similarity_matrix = cosine_similarity(tfidf_matrix)
110
-
111
- # Step 3: Identify duplicates
112
- to_keep = []
113
- removal_reasons = {}
114
-
115
- for current_idx in range(len(text_memories)):
116
- is_duplicate = False
117
-
118
- # Compare with already kept memories
119
- for kept_idx in to_keep:
120
- similarity_score = similarity_matrix[current_idx, kept_idx]
121
-
122
- if similarity_score > similarity_threshold:
123
- is_duplicate = True
124
- # Generate removal reason with sample text
125
- removal_reasons[current_idx] = (
126
- f"Memory too similar (score: {similarity_score:.2f}) to kept memory #{kept_idx}. "
127
- f"Kept: '{text_memories[kept_idx][:100]}...' | "
128
- f"Removed: '{text_memories[current_idx][:100]}...'"
129
- )
130
- logger.info(removal_reasons)
131
- break
132
-
133
- if not is_duplicate:
134
- to_keep.append(current_idx)
135
-
136
- # Return filtered memories
137
- return [text_memories[i] for i in sorted(to_keep)]
138
-
139
- except Exception as e:
140
- logging.error(f"Error filtering memories: {e!s}")
141
- return text_memories # Return original list if error occurs
142
-
143
- def filter_too_short_memories(
144
- self, text_memories: list[str], min_length_threshold: int = 20
145
- ) -> list[str]:
146
- """
147
- Filters out text memories that fall below the minimum length requirement.
148
- Handles both English (word count) and Chinese (character count) differently.
149
-
150
- Args:
151
- text_memories: List of text memories to be filtered
152
- min_length_threshold: Minimum length required to keep a memory.
153
- For English: word count, for Chinese: character count.
154
-
155
- Returns:
156
- List of filtered memories meeting the length requirement
157
- """
158
- if not text_memories:
159
- logging.debug("Empty memories list received in short memory filter")
160
- return []
161
-
162
- filtered_memories = []
163
- removed_count = 0
164
-
165
- for memory in text_memories:
166
- stripped_memory = memory.strip()
167
- if not stripped_memory: # Skip empty/whitespace memories
168
- removed_count += 1
169
- continue
170
-
171
- # Determine measurement method based on language
172
- if is_all_english(stripped_memory):
173
- length = len(stripped_memory.split()) # Word count for English
174
- elif is_all_chinese(stripped_memory):
175
- length = len(stripped_memory) # Character count for Chinese
176
- else:
177
- logger.debug(
178
- f"Mixed-language memory, using character count: {stripped_memory[:50]}..."
179
- )
180
- length = len(stripped_memory) # Default to character count
181
-
182
- if length >= min_length_threshold:
183
- filtered_memories.append(memory)
184
- else:
185
- removed_count += 1
186
-
187
- if removed_count > 0:
188
- logger.info(
189
- f"Filtered out {removed_count} short memories "
190
- f"(below {min_length_threshold} units). "
191
- f"Total remaining: {len(filtered_memories)}"
192
- )
193
-
194
- return filtered_memories
195
-
196
- def replace_working_memory(
197
- self,
198
- queries: list[str],
199
- user_id: str,
200
- mem_cube_id: str,
201
- mem_cube: GeneralMemCube,
202
- original_memory: list[TextualMemoryItem],
203
- new_memory: list[TextualMemoryItem],
204
- top_k: int = 10,
205
- ) -> None | list[TextualMemoryItem]:
206
- """Replace working memory with new memories after reranking."""
207
- memories_with_new_order = None
208
- text_mem_base = mem_cube.text_mem
209
- if isinstance(text_mem_base, TreeTextMemory):
210
- text_mem_base: TreeTextMemory = text_mem_base
211
- combined_memory = original_memory + new_memory
212
- memory_map = {
213
- transform_name_to_key(name=mem_obj.memory): mem_obj for mem_obj in combined_memory
214
- }
215
- combined_text_memory = [transform_name_to_key(name=m.memory) for m in combined_memory]
216
-
217
- # apply filters
218
- filtered_combined_text_memory = self.filter_similar_memories(
219
- text_memories=combined_text_memory,
220
- similarity_threshold=self.filter_similarity_threshold,
221
- )
222
-
223
- filtered_combined_text_memory = self.filter_too_short_memories(
224
- text_memories=filtered_combined_text_memory,
225
- min_length_threshold=self.filter_min_length_threshold,
226
- )
227
-
228
- unique_memory = list(dict.fromkeys(filtered_combined_text_memory))
229
-
230
- try:
231
- prompt = self.build_prompt(
232
- "memory_reranking",
233
- queries=queries,
234
- current_order=unique_memory,
235
- staging_buffer=[],
236
- )
237
- response = self.process_llm.generate([{"role": "user", "content": prompt}])
238
- response = extract_json_dict(response)
239
- text_memories_with_new_order = response.get("new_order", [])[:top_k]
240
- except Exception as e:
241
- logger.error(f"Fail to rerank with LLM, Exeption: {e}.", exc_info=True)
242
- text_memories_with_new_order = unique_memory[:top_k]
243
-
244
- memories_with_new_order = []
245
- for text in text_memories_with_new_order:
246
- normalized_text = transform_name_to_key(name=text)
247
- if text in memory_map:
248
- memories_with_new_order.append(memory_map[normalized_text])
249
- else:
250
- logger.warning(
251
- f"Memory text not found in memory map. text: {text}; keys of memory_map: {memory_map.keys()}"
252
- )
253
-
254
- text_mem_base.replace_working_memory(memories_with_new_order)
255
- logger.info(
256
- f"The working memory has been replaced with {len(memories_with_new_order)} new memories."
257
- )
258
- self.log_working_memory_replacement(
259
- original_memory=original_memory,
260
- new_memory=memories_with_new_order,
261
- user_id=user_id,
262
- mem_cube_id=mem_cube_id,
263
- mem_cube=mem_cube,
264
- )
265
- else:
266
- logger.error("memory_base is not supported")
267
-
268
- return memories_with_new_order