MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- memoryos-0.1.13.dist-info/METADATA +288 -0
- memoryos-0.1.13.dist-info/RECORD +122 -0
- memos/__init__.py +20 -1
- memos/api/start_api.py +420 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/factory.py +22 -0
- memos/chunkers/sentence_chunker.py +35 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +45 -0
- memos/configs/embedder.py +53 -0
- memos/configs/graph_db.py +45 -0
- memos/configs/internet_retriever.py +81 -0
- memos/configs/llm.py +71 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +89 -0
- memos/configs/mem_os.py +74 -0
- memos/configs/mem_reader.py +53 -0
- memos/configs/mem_scheduler.py +78 -0
- memos/configs/memory.py +195 -0
- memos/configs/parser.py +38 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +64 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/base.py +15 -0
- memos/embedders/factory.py +23 -0
- memos/embedders/ollama.py +74 -0
- memos/embedders/sentence_transformer.py +40 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +215 -0
- memos/graph_dbs/factory.py +21 -0
- memos/graph_dbs/neo4j.py +827 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +16 -0
- memos/llms/factory.py +25 -0
- memos/llms/hf.py +231 -0
- memos/llms/ollama.py +82 -0
- memos/llms/openai.py +34 -0
- memos/llms/utils.py +14 -0
- memos/log.py +78 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +29 -0
- memos/mem_cube/general.py +146 -0
- memos/mem_cube/utils.py +24 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +819 -0
- memos/mem_os/main.py +503 -0
- memos/mem_os/product.py +89 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +27 -0
- memos/mem_reader/factory.py +21 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/simple_struct.py +241 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/base_scheduler.py +164 -0
- memos/mem_scheduler/general_scheduler.py +305 -0
- memos/mem_scheduler/modules/__init__.py +0 -0
- memos/mem_scheduler/modules/base.py +74 -0
- memos/mem_scheduler/modules/dispatcher.py +103 -0
- memos/mem_scheduler/modules/monitor.py +82 -0
- memos/mem_scheduler/modules/redis_service.py +146 -0
- memos/mem_scheduler/modules/retriever.py +41 -0
- memos/mem_scheduler/modules/schemas.py +146 -0
- memos/mem_scheduler/scheduler_factory.py +21 -0
- memos/mem_scheduler/utils.py +26 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +232 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +34 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +89 -0
- memos/memories/textual/general.py +286 -0
- memos/memories/textual/item.py +167 -0
- memos/memories/textual/naive.py +185 -0
- memos/memories/textual/tree.py +321 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +19 -0
- memos/parsers/markitdown.py +22 -0
- memos/settings.py +8 -0
- memos/templates/__init__.py +0 -0
- memos/templates/mem_reader_prompts.py +98 -0
- memos/templates/mem_scheduler_prompts.py +65 -0
- memos/templates/mos_prompts.py +63 -0
- memos/types.py +55 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +105 -0
- memos/vec_dbs/factory.py +21 -0
- memos/vec_dbs/item.py +43 -0
- memos/vec_dbs/qdrant.py +292 -0
- memoryos-0.0.1.dist-info/METADATA +0 -53
- memoryos-0.0.1.dist-info/RECORD +0 -5
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,286 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from tenacity import retry, retry_if_exception_type, stop_after_attempt
|
|
8
|
+
|
|
9
|
+
from memos.configs.memory import GeneralTextMemoryConfig
|
|
10
|
+
from memos.embedders.factory import EmbedderFactory, OllamaEmbedder
|
|
11
|
+
from memos.llms.factory import LLMFactory, OllamaLLM, OpenAILLM
|
|
12
|
+
from memos.log import get_logger
|
|
13
|
+
from memos.memories.textual.base import BaseTextMemory
|
|
14
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
15
|
+
from memos.types import MessageList
|
|
16
|
+
from memos.vec_dbs.factory import QdrantVecDB, VecDBFactory
|
|
17
|
+
from memos.vec_dbs.item import VecDBItem
|
|
18
|
+
|
|
19
|
+
|
|
20
|
+
logger = get_logger(__name__)
|
|
21
|
+
|
|
22
|
+
|
|
23
|
+
class GeneralTextMemory(BaseTextMemory):
|
|
24
|
+
"""General textual memory implementation for storing and retrieving memories."""
|
|
25
|
+
|
|
26
|
+
def __init__(self, config: GeneralTextMemoryConfig):
|
|
27
|
+
"""Initialize memory with the given configuration."""
|
|
28
|
+
self.config: GeneralTextMemoryConfig = config
|
|
29
|
+
self.extractor_llm: OpenAILLM | OllamaLLM = LLMFactory.from_config(config.extractor_llm)
|
|
30
|
+
self.vector_db: QdrantVecDB = VecDBFactory.from_config(config.vector_db)
|
|
31
|
+
self.embedder: OllamaEmbedder = EmbedderFactory.from_config(config.embedder)
|
|
32
|
+
|
|
33
|
+
@retry(
|
|
34
|
+
stop=stop_after_attempt(3),
|
|
35
|
+
retry=retry_if_exception_type(json.JSONDecodeError),
|
|
36
|
+
before_sleep=lambda retry_state: logger.warning(
|
|
37
|
+
EXTRACTION_RETRY_LOG.format(
|
|
38
|
+
error=retry_state.outcome.exception(),
|
|
39
|
+
attempt_number=retry_state.attempt_number,
|
|
40
|
+
max_attempt_number=3,
|
|
41
|
+
)
|
|
42
|
+
),
|
|
43
|
+
)
|
|
44
|
+
def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
|
|
45
|
+
"""Extract memories based on the messages.
|
|
46
|
+
|
|
47
|
+
Args:
|
|
48
|
+
messages: List of message dictionaries to extract memories from.
|
|
49
|
+
|
|
50
|
+
Returns:
|
|
51
|
+
List of TextualMemoryItem objects representing the extracted memories.
|
|
52
|
+
"""
|
|
53
|
+
str_messages = json.dumps(messages)
|
|
54
|
+
user_query = EXTRACTION_PROMPT_PART_1 + EXTRACTION_PROMPT_PART_2.format(
|
|
55
|
+
messages=str_messages
|
|
56
|
+
)
|
|
57
|
+
response = self.extractor_llm.generate([{"role": "user", "content": user_query}])
|
|
58
|
+
raw_extracted_memories = json.loads(response)
|
|
59
|
+
extracted_memories = [
|
|
60
|
+
TextualMemoryItem(**memory_dict) for memory_dict in raw_extracted_memories
|
|
61
|
+
]
|
|
62
|
+
|
|
63
|
+
return extracted_memories
|
|
64
|
+
|
|
65
|
+
def add(self, memories: list[TextualMemoryItem | dict[str, Any]]) -> None:
|
|
66
|
+
"""Add memories.
|
|
67
|
+
|
|
68
|
+
Args:
|
|
69
|
+
memories: List of TextualMemoryItem objects or dictionaries to add.
|
|
70
|
+
"""
|
|
71
|
+
memory_items = [TextualMemoryItem(**m) if isinstance(m, dict) else m for m in memories]
|
|
72
|
+
|
|
73
|
+
# Memory encode
|
|
74
|
+
embed_memories = self.embedder.embed([m.memory for m in memory_items])
|
|
75
|
+
|
|
76
|
+
# Create vector db items
|
|
77
|
+
vec_db_items = []
|
|
78
|
+
for item, emb in zip(memory_items, embed_memories, strict=True):
|
|
79
|
+
vec_db_items.append(
|
|
80
|
+
VecDBItem(
|
|
81
|
+
id=item.id,
|
|
82
|
+
payload=item.model_dump(),
|
|
83
|
+
vector=emb,
|
|
84
|
+
)
|
|
85
|
+
)
|
|
86
|
+
|
|
87
|
+
# Add to vector db
|
|
88
|
+
self.vector_db.add(vec_db_items)
|
|
89
|
+
|
|
90
|
+
def update(self, memory_id: str, new_memory: TextualMemoryItem | dict[str, Any]) -> None:
|
|
91
|
+
"""Update a memory by memory_id."""
|
|
92
|
+
memory_item = (
|
|
93
|
+
TextualMemoryItem(**new_memory) if isinstance(new_memory, dict) else new_memory
|
|
94
|
+
)
|
|
95
|
+
memory_item.id = memory_id
|
|
96
|
+
|
|
97
|
+
vec_db_item = VecDBItem(
|
|
98
|
+
id=memory_item.id,
|
|
99
|
+
payload=memory_item.model_dump(),
|
|
100
|
+
vector=self._embed_one_sentence(memory_item.memory),
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
self.vector_db.update(memory_id, vec_db_item)
|
|
104
|
+
|
|
105
|
+
def search(self, query: str, top_k: int) -> list[TextualMemoryItem]:
|
|
106
|
+
"""Search for memories based on a query.
|
|
107
|
+
Args:
|
|
108
|
+
query (str): The query to search for.
|
|
109
|
+
top_k (int): The number of top results to return.
|
|
110
|
+
Returns:
|
|
111
|
+
list[TextualMemoryItem]: List of matching memories.
|
|
112
|
+
"""
|
|
113
|
+
query_vector = self._embed_one_sentence(query)
|
|
114
|
+
search_results = self.vector_db.search(query_vector, top_k)
|
|
115
|
+
search_results = sorted( # make higher score first
|
|
116
|
+
search_results, key=lambda x: x.score, reverse=True
|
|
117
|
+
)
|
|
118
|
+
result_memories = [
|
|
119
|
+
TextualMemoryItem(**search_item.payload) for search_item in search_results
|
|
120
|
+
]
|
|
121
|
+
return result_memories
|
|
122
|
+
|
|
123
|
+
def get(self, memory_id: str) -> TextualMemoryItem:
|
|
124
|
+
"""Get a memory by its ID."""
|
|
125
|
+
result = self.vector_db.get_by_id(memory_id)
|
|
126
|
+
if result is None:
|
|
127
|
+
raise ValueError(f"Memory with ID {memory_id} not found")
|
|
128
|
+
return TextualMemoryItem(**result.payload)
|
|
129
|
+
|
|
130
|
+
def get_by_ids(self, memory_ids: list[str]) -> list[TextualMemoryItem]:
|
|
131
|
+
"""Get memories by their IDs.
|
|
132
|
+
Args:
|
|
133
|
+
memory_ids (list[str]): List of memory IDs to retrieve.
|
|
134
|
+
Returns:
|
|
135
|
+
list[TextualMemoryItem]: List of memories with the specified IDs.
|
|
136
|
+
"""
|
|
137
|
+
db_items = self.vector_db.get_by_ids(memory_ids)
|
|
138
|
+
memories = [TextualMemoryItem(**db_item.payload) for db_item in db_items]
|
|
139
|
+
return memories
|
|
140
|
+
|
|
141
|
+
def get_all(self) -> list[TextualMemoryItem]:
|
|
142
|
+
"""Get all memories.
|
|
143
|
+
Returns:
|
|
144
|
+
list[TextualMemoryItem]: List of all memories.
|
|
145
|
+
"""
|
|
146
|
+
all_items = self.vector_db.get_all()
|
|
147
|
+
all_memories = [TextualMemoryItem(**memo.payload) for memo in all_items]
|
|
148
|
+
return all_memories
|
|
149
|
+
|
|
150
|
+
def delete(self, memory_ids: list[str]) -> None:
|
|
151
|
+
"""Delete a memory."""
|
|
152
|
+
self.vector_db.delete(memory_ids)
|
|
153
|
+
|
|
154
|
+
def delete_all(self) -> None:
|
|
155
|
+
"""Delete all memories."""
|
|
156
|
+
self.vector_db.delete_collection(self.vector_db.config.collection_name)
|
|
157
|
+
self.vector_db.create_collection()
|
|
158
|
+
|
|
159
|
+
def load(self, dir: str) -> None:
|
|
160
|
+
try:
|
|
161
|
+
memory_file = os.path.join(dir, self.config.memory_filename)
|
|
162
|
+
|
|
163
|
+
if not os.path.exists(memory_file):
|
|
164
|
+
logger.warning(f"Memory file not found: {memory_file}")
|
|
165
|
+
return
|
|
166
|
+
|
|
167
|
+
with open(memory_file, encoding="utf-8") as f:
|
|
168
|
+
memories = json.load(f)
|
|
169
|
+
|
|
170
|
+
vec_db_items = [VecDBItem.from_dict(m) for m in memories]
|
|
171
|
+
self.vector_db.add(vec_db_items)
|
|
172
|
+
logger.info(f"Loaded {len(memories)} memories from {memory_file}")
|
|
173
|
+
|
|
174
|
+
except FileNotFoundError:
|
|
175
|
+
logger.error(f"Memory file not found in directory: {dir}")
|
|
176
|
+
except json.JSONDecodeError as e:
|
|
177
|
+
logger.error(f"Error decoding JSON from memory file: {e}")
|
|
178
|
+
except Exception as e:
|
|
179
|
+
logger.error(f"An error occurred while loading memories: {e}")
|
|
180
|
+
|
|
181
|
+
def dump(self, dir: str) -> None:
|
|
182
|
+
"""Dump memories to os.path.join(dir, self.config.memory_filename)"""
|
|
183
|
+
try:
|
|
184
|
+
all_vec_db_items = self.vector_db.get_all()
|
|
185
|
+
json_memories = [memory.to_dict() for memory in all_vec_db_items]
|
|
186
|
+
|
|
187
|
+
os.makedirs(dir, exist_ok=True)
|
|
188
|
+
memory_file = os.path.join(dir, self.config.memory_filename)
|
|
189
|
+
with open(memory_file, "w", encoding="utf-8") as f:
|
|
190
|
+
json.dump(json_memories, f, indent=4, ensure_ascii=False)
|
|
191
|
+
|
|
192
|
+
logger.info(f"Dumped {len(all_vec_db_items)} memories to {memory_file}")
|
|
193
|
+
|
|
194
|
+
except Exception as e:
|
|
195
|
+
logger.error(f"An error occurred while dumping memories: {e}")
|
|
196
|
+
raise
|
|
197
|
+
|
|
198
|
+
def drop(
|
|
199
|
+
self,
|
|
200
|
+
) -> None:
|
|
201
|
+
pass
|
|
202
|
+
|
|
203
|
+
def _embed_one_sentence(self, sentence: str) -> list[float]:
|
|
204
|
+
"""Embed a single sentence."""
|
|
205
|
+
return self.embedder.embed(sentence)[0]
|
|
206
|
+
|
|
207
|
+
|
|
208
|
+
EXTRACTION_PROMPT_PART_1 = f"""You are a memory extractor. Your task is to extract memories from the given messages.
|
|
209
|
+
* You will receive a list of messages, each with a role (user or assistant) and content.
|
|
210
|
+
* Your job is to extract memories related to the user's long-term goals, interests, and emotional states.
|
|
211
|
+
* Each memory should be a dictionary with the following keys:
|
|
212
|
+
- "memory": The content of the memory (string). Rephrase the content if necessary.
|
|
213
|
+
- "metadata": A dictionary containing additional information about the memory.
|
|
214
|
+
* The metadata dictionary should include:
|
|
215
|
+
- "type": The type of memory (string), e.g., "procedure", "fact", "event", "opinion", etc.
|
|
216
|
+
- "memory_time": The time the memory occurred or refers to (string). Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.
|
|
217
|
+
- "source": The origin of the memory (string), e.g., `"conversation"`, `"retrieved"`, `"web"`, `"file"`.
|
|
218
|
+
- "confidence": A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.
|
|
219
|
+
- "entities": A list of key entities (array of strings) mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.
|
|
220
|
+
- "tags": A list of keywords or thematic labels (array of strings) associated with the memory for categorization or retrieval, e.g., `["travel", "health", "project-x"]`.
|
|
221
|
+
- "visibility": The accessibility scope of the memory (string), e.g., `"private"`, `"public"`, `"session"`, determining who or what contexts can access it.
|
|
222
|
+
- "updated_at": The timestamp of the last modification to the memory (string). Useful for tracking memory freshness or change history. Format: ISO 8601 or natural language.
|
|
223
|
+
* Current date and time is {datetime.now().isoformat()}.
|
|
224
|
+
* Only return the list of memories in JSON format.
|
|
225
|
+
* Do not include any explanations
|
|
226
|
+
* Do not include any extra text
|
|
227
|
+
* Do not include code blocks (```json```)
|
|
228
|
+
|
|
229
|
+
## Example
|
|
230
|
+
|
|
231
|
+
### Input
|
|
232
|
+
|
|
233
|
+
[
|
|
234
|
+
{{"role": "user", "content": "I plan to visit Paris next week."}},
|
|
235
|
+
{{"role": "assistant", "content": "Paris is a beautiful city with many attractions."}},
|
|
236
|
+
{{"role": "user", "content": "I love the Eiffel Tower."}},
|
|
237
|
+
{{"role": "assistant", "content": "The Eiffel Tower is a must-see landmark in Paris."}}
|
|
238
|
+
]
|
|
239
|
+
|
|
240
|
+
### Output
|
|
241
|
+
|
|
242
|
+
[
|
|
243
|
+
{{
|
|
244
|
+
"memory": "The user plans to visit Paris on 05-26-2025.",
|
|
245
|
+
"metadata": {{
|
|
246
|
+
"type": "event",
|
|
247
|
+
"memory_time": "2025-05-26",
|
|
248
|
+
"source": "conversation",
|
|
249
|
+
"confidence": 90.0,
|
|
250
|
+
"entities": ["Paris"],
|
|
251
|
+
"tags": ["travel", "plans"],
|
|
252
|
+
"visibility": "private",
|
|
253
|
+
"updated_at": "2025-05-19T00:00:00"
|
|
254
|
+
}}
|
|
255
|
+
}},
|
|
256
|
+
{{
|
|
257
|
+
"memory": "The user loves the Eiffel Tower.",
|
|
258
|
+
"metadata": {{
|
|
259
|
+
"type": "opinion",
|
|
260
|
+
"memory_time": "2025-05-19",
|
|
261
|
+
"source": "conversation",
|
|
262
|
+
"confidence": 100.0,
|
|
263
|
+
"entities": ["Eiffel Tower"],
|
|
264
|
+
"tags": ["opinions", "landmarks"],
|
|
265
|
+
"visibility": "session",
|
|
266
|
+
"updated_at": "2025-05-19T00:00:00"
|
|
267
|
+
}}
|
|
268
|
+
}}
|
|
269
|
+
]
|
|
270
|
+
|
|
271
|
+
"""
|
|
272
|
+
|
|
273
|
+
EXTRACTION_PROMPT_PART_2 = """
|
|
274
|
+
## Query
|
|
275
|
+
|
|
276
|
+
### Input
|
|
277
|
+
|
|
278
|
+
{messages}
|
|
279
|
+
|
|
280
|
+
### Output
|
|
281
|
+
|
|
282
|
+
"""
|
|
283
|
+
|
|
284
|
+
EXTRACTION_RETRY_LOG = """Extracting memory failed due to JSON decode error: {error},
|
|
285
|
+
Attempt retry: {attempt_number} / {max_attempt_number}
|
|
286
|
+
"""
|
|
@@ -0,0 +1,167 @@
|
|
|
1
|
+
"""Defines memory item types for textual memory."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
from typing import Literal
|
|
7
|
+
|
|
8
|
+
from pydantic import BaseModel, ConfigDict, Field, field_validator
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class TextualMemoryMetadata(BaseModel):
|
|
12
|
+
"""Metadata for a memory item.
|
|
13
|
+
|
|
14
|
+
This includes information such as the type of memory, when it occurred,
|
|
15
|
+
its source, and other relevant details.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
user_id: str | None = Field(
|
|
19
|
+
default=None,
|
|
20
|
+
description="The ID of the user associated with the memory. Useful for multi-user systems.",
|
|
21
|
+
)
|
|
22
|
+
session_id: str | None = Field(
|
|
23
|
+
default=None,
|
|
24
|
+
description="The ID of the session during which the memory was created. Useful for tracking context in conversations.",
|
|
25
|
+
)
|
|
26
|
+
status: Literal["activated", "archived", "deleted"] | None = Field(
|
|
27
|
+
default="activated",
|
|
28
|
+
description="The status of the memory, e.g., 'activated', 'archived', 'deleted'.",
|
|
29
|
+
)
|
|
30
|
+
type: Literal["procedure", "fact", "event", "opinion", "topic"] | None = Field(default=None)
|
|
31
|
+
memory_time: str | None = Field(
|
|
32
|
+
default=None,
|
|
33
|
+
description='The time the memory occurred or refers to. Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.',
|
|
34
|
+
)
|
|
35
|
+
source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
|
|
36
|
+
default=None, description="The origin of the memory"
|
|
37
|
+
)
|
|
38
|
+
confidence: float | None = Field(
|
|
39
|
+
default=None,
|
|
40
|
+
description="A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.",
|
|
41
|
+
)
|
|
42
|
+
entities: list[str] | None = Field(
|
|
43
|
+
default=None,
|
|
44
|
+
description='A list of key entities mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.',
|
|
45
|
+
)
|
|
46
|
+
tags: list[str] | None = Field(
|
|
47
|
+
default=None,
|
|
48
|
+
description='A list of keywords or thematic labels associated with the memory for categorization or retrieval, e.g., `["travel", "health", "project-x"]`.',
|
|
49
|
+
)
|
|
50
|
+
visibility: Literal["private", "public", "session"] | None = Field(
|
|
51
|
+
default=None, description="e.g., 'private', 'public', 'session'"
|
|
52
|
+
)
|
|
53
|
+
updated_at: str | None = Field(
|
|
54
|
+
default_factory=lambda: datetime.now().isoformat(),
|
|
55
|
+
description="The timestamp of the last modification to the memory. Useful for tracking memory freshness or change history. Format: ISO 8601.",
|
|
56
|
+
)
|
|
57
|
+
|
|
58
|
+
model_config = ConfigDict(extra="allow")
|
|
59
|
+
|
|
60
|
+
@field_validator("memory_time")
|
|
61
|
+
@classmethod
|
|
62
|
+
def validate_memory_time(cls, v):
|
|
63
|
+
try:
|
|
64
|
+
if v:
|
|
65
|
+
datetime.strptime(v, "%Y-%m-%d")
|
|
66
|
+
except ValueError as e:
|
|
67
|
+
raise ValueError("Invalid date format. Use YYYY-MM-DD.") from e
|
|
68
|
+
return v
|
|
69
|
+
|
|
70
|
+
@field_validator("confidence")
|
|
71
|
+
@classmethod
|
|
72
|
+
def validate_confidence(cls, v):
|
|
73
|
+
if v is not None and (v < 0 or v > 100):
|
|
74
|
+
raise ValueError("Confidence must be between 0 and 100.")
|
|
75
|
+
return v
|
|
76
|
+
|
|
77
|
+
def __str__(self) -> str:
|
|
78
|
+
"""Pretty string representation of the metadata."""
|
|
79
|
+
meta = self.model_dump(exclude_none=True)
|
|
80
|
+
return ", ".join(f"{k}={v}" for k, v in meta.items())
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata):
|
|
84
|
+
"""Extended metadata for structured memory, layered retrieval, and lifecycle tracking."""
|
|
85
|
+
|
|
86
|
+
memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory"] = Field(
|
|
87
|
+
default="WorkingMemory", description="Memory lifecycle type."
|
|
88
|
+
)
|
|
89
|
+
key: str | None = Field(default=None, description="Memory key or title.")
|
|
90
|
+
sources: list[str] | None = Field(
|
|
91
|
+
default=None, description="Multiple origins of the memory (e.g., URLs, notes)."
|
|
92
|
+
)
|
|
93
|
+
embedding: list[float] | None = Field(
|
|
94
|
+
default=None,
|
|
95
|
+
description="The vector embedding of the memory content, used for semantic search or clustering.",
|
|
96
|
+
)
|
|
97
|
+
created_at: str | None = Field(
|
|
98
|
+
default_factory=lambda: datetime.now().isoformat(),
|
|
99
|
+
description="The timestamp of the first creation to the memory. Useful "
|
|
100
|
+
"for tracking memory initialization. Format: ISO 8601.",
|
|
101
|
+
)
|
|
102
|
+
usage: list[str] | None = Field(
|
|
103
|
+
default=[],
|
|
104
|
+
description="Usage history of this node",
|
|
105
|
+
)
|
|
106
|
+
background: str | None = Field(
|
|
107
|
+
default="",
|
|
108
|
+
description="background of this node",
|
|
109
|
+
)
|
|
110
|
+
|
|
111
|
+
@field_validator("sources")
|
|
112
|
+
@classmethod
|
|
113
|
+
def validate_sources(cls, v):
|
|
114
|
+
if v is not None and not isinstance(v, list):
|
|
115
|
+
raise ValueError("Sources must be a list of strings.")
|
|
116
|
+
return v
|
|
117
|
+
|
|
118
|
+
def __str__(self) -> str:
|
|
119
|
+
"""Pretty string representation of the metadata."""
|
|
120
|
+
meta = self.model_dump(exclude_none=True)
|
|
121
|
+
return ", ".join([f"{k}={v}" for k, v in meta.items() if k != "embedding"])
|
|
122
|
+
|
|
123
|
+
|
|
124
|
+
class SearchedTreeNodeTextualMemoryMetadata(TreeNodeTextualMemoryMetadata):
|
|
125
|
+
"""Metadata for nodes returned by search, includes similarity info."""
|
|
126
|
+
|
|
127
|
+
relativity: float | None = Field(
|
|
128
|
+
default=None, description="Similarity score with respect to the query, 0 ~ 1."
|
|
129
|
+
)
|
|
130
|
+
|
|
131
|
+
|
|
132
|
+
class TextualMemoryItem(BaseModel):
|
|
133
|
+
"""Represents a single memory item in the textual memory.
|
|
134
|
+
|
|
135
|
+
This serves as a standardized format for memory items across different
|
|
136
|
+
textual memory implementations.
|
|
137
|
+
"""
|
|
138
|
+
|
|
139
|
+
id: str = Field(default_factory=lambda: str(uuid.uuid4()))
|
|
140
|
+
memory: str
|
|
141
|
+
metadata: (
|
|
142
|
+
TextualMemoryMetadata
|
|
143
|
+
| TreeNodeTextualMemoryMetadata
|
|
144
|
+
| SearchedTreeNodeTextualMemoryMetadata
|
|
145
|
+
) = Field(default_factory=TextualMemoryMetadata)
|
|
146
|
+
|
|
147
|
+
model_config = ConfigDict(extra="forbid")
|
|
148
|
+
|
|
149
|
+
@field_validator("id")
|
|
150
|
+
@classmethod
|
|
151
|
+
def validate_id(cls, v):
|
|
152
|
+
try:
|
|
153
|
+
uuid.UUID(v)
|
|
154
|
+
except ValueError as e:
|
|
155
|
+
raise ValueError("Invalid UUID format") from e
|
|
156
|
+
return v
|
|
157
|
+
|
|
158
|
+
@classmethod
|
|
159
|
+
def from_dict(cls, data: dict) -> "TextualMemoryItem":
|
|
160
|
+
return cls(**data)
|
|
161
|
+
|
|
162
|
+
def to_dict(self) -> dict:
|
|
163
|
+
return self.model_dump(exclude_none=True)
|
|
164
|
+
|
|
165
|
+
def __str__(self) -> str:
|
|
166
|
+
"""Pretty string representation of the memory item."""
|
|
167
|
+
return f"<ID: {self.id} | Memory: {self.memory} | Metadata: {self.metadata!s}>"
|
|
@@ -0,0 +1,185 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
from typing import Any
|
|
6
|
+
|
|
7
|
+
from memos.configs.memory import NaiveTextMemoryConfig
|
|
8
|
+
from memos.llms.factory import LLMFactory
|
|
9
|
+
from memos.log import get_logger
|
|
10
|
+
from memos.memories.textual.base import BaseTextMemory
|
|
11
|
+
from memos.memories.textual.item import TextualMemoryItem, TextualMemoryMetadata
|
|
12
|
+
from memos.types import MessageList
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
EXTRACTION_PROMPT_PART_1 = f"""You are a memory extractor. Your task is to extract memories from the given messages.
|
|
19
|
+
* You will receive a list of messages, each with a role (user or assistant) and content.
|
|
20
|
+
* Your job is to extract the memories from these messages.
|
|
21
|
+
* Each memory should be a dictionary with the following keys:
|
|
22
|
+
- "memory": The content of the memory (string). Rephrase the content if necessary.
|
|
23
|
+
- "type": The type of memory (string), e.g., "procedure", "fact", "event", "opinion", etc.
|
|
24
|
+
* Current date and time is {datetime.now().isoformat()}.
|
|
25
|
+
* Only return the list of memories in JSON format.
|
|
26
|
+
* Do not include any other text or explanation.
|
|
27
|
+
|
|
28
|
+
## Example
|
|
29
|
+
|
|
30
|
+
### Input
|
|
31
|
+
|
|
32
|
+
[
|
|
33
|
+
{{"role": "user", "content": "I plan to visit Paris next week."}},
|
|
34
|
+
{{"role": "assistant", "content": "Paris is a beautiful city with many attractions."}},
|
|
35
|
+
{{"role": "user", "content": "I love the Eiffel Tower."}},
|
|
36
|
+
{{"role": "assistant", "content": "The Eiffel Tower is a must-see landmark in Paris."}}
|
|
37
|
+
]
|
|
38
|
+
|
|
39
|
+
### Output
|
|
40
|
+
|
|
41
|
+
[
|
|
42
|
+
{{"memory": "User plans to visit Paris next week.", "metadata": {{"type": "event"}}}},
|
|
43
|
+
{{"memory": "User loves the Eiffel Tower.", "metadata": {{"type": "opinion"}}}},
|
|
44
|
+
]
|
|
45
|
+
"""
|
|
46
|
+
|
|
47
|
+
EXTRACTION_PROMPT_PART_2 = """
|
|
48
|
+
## Query
|
|
49
|
+
|
|
50
|
+
### Input
|
|
51
|
+
|
|
52
|
+
{messages}
|
|
53
|
+
|
|
54
|
+
### Output
|
|
55
|
+
|
|
56
|
+
"""
|
|
57
|
+
|
|
58
|
+
|
|
59
|
+
class NaiveTextMemory(BaseTextMemory):
|
|
60
|
+
"""Naive textual memory implementation for storing and retrieving memories."""
|
|
61
|
+
|
|
62
|
+
def __init__(self, config: NaiveTextMemoryConfig):
|
|
63
|
+
"""Initialize memory with the given configuration."""
|
|
64
|
+
self.config = config
|
|
65
|
+
self.extractor_llm = LLMFactory.from_config(config.extractor_llm)
|
|
66
|
+
self.memories = []
|
|
67
|
+
|
|
68
|
+
def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
|
|
69
|
+
"""Extract memories based on the messages."""
|
|
70
|
+
str_messages = json.dumps(messages)
|
|
71
|
+
user_query = EXTRACTION_PROMPT_PART_1 + EXTRACTION_PROMPT_PART_2.format(
|
|
72
|
+
messages=str_messages
|
|
73
|
+
)
|
|
74
|
+
response = self.extractor_llm.generate([{"role": "user", "content": user_query}])
|
|
75
|
+
raw_extracted_memories = json.loads(response)
|
|
76
|
+
|
|
77
|
+
# Convert raw dictionaries to TextualMemoryItem objects
|
|
78
|
+
extracted_memories = []
|
|
79
|
+
for memory_dict in raw_extracted_memories:
|
|
80
|
+
# Ensure proper structure with memory and metadata
|
|
81
|
+
memory_content = memory_dict.get("memory", "")
|
|
82
|
+
metadata_dict = memory_dict.get("metadata", {})
|
|
83
|
+
|
|
84
|
+
# Create a TextualMemoryItem with properly structured metadata
|
|
85
|
+
memory_item = TextualMemoryItem(memory=memory_content, metadata=metadata_dict)
|
|
86
|
+
extracted_memories.append(memory_item)
|
|
87
|
+
|
|
88
|
+
return extracted_memories
|
|
89
|
+
|
|
90
|
+
def add(self, memories: list[TextualMemoryItem | dict[str, Any]]) -> None:
|
|
91
|
+
"""Add memories."""
|
|
92
|
+
for m in memories:
|
|
93
|
+
# Convert dict to TextualMemoryItem if needed
|
|
94
|
+
memory_item = TextualMemoryItem(**m) if isinstance(m, dict) else m
|
|
95
|
+
|
|
96
|
+
# Convert to dictionary for storage
|
|
97
|
+
memory_dict = memory_item.model_dump()
|
|
98
|
+
|
|
99
|
+
if memory_dict["id"] not in [m["id"] for m in self.memories]:
|
|
100
|
+
self.memories.append(memory_dict)
|
|
101
|
+
|
|
102
|
+
def update(self, memory_id: str, new_memory: TextualMemoryItem | dict[str, Any]) -> None:
|
|
103
|
+
"""Update a memory by memory_id."""
|
|
104
|
+
# Convert dict to TextualMemoryItem if needed
|
|
105
|
+
memory_item = (
|
|
106
|
+
TextualMemoryItem(**new_memory) if isinstance(new_memory, dict) else new_memory
|
|
107
|
+
)
|
|
108
|
+
|
|
109
|
+
# Ensure the memory item has the correct ID
|
|
110
|
+
memory_item.id = memory_id
|
|
111
|
+
memory_dict = memory_item.model_dump()
|
|
112
|
+
|
|
113
|
+
for i, memory in enumerate(self.memories):
|
|
114
|
+
if memory["id"] == memory_id:
|
|
115
|
+
self.memories[i] = memory_dict
|
|
116
|
+
break
|
|
117
|
+
|
|
118
|
+
def search(self, query: str, top_k: int) -> list[TextualMemoryItem]:
|
|
119
|
+
"""Search for memories based on a query."""
|
|
120
|
+
sims = [
|
|
121
|
+
(memory, len(set(query.split()) & set(memory["memory"].split())))
|
|
122
|
+
for memory in self.memories
|
|
123
|
+
]
|
|
124
|
+
sims.sort(key=lambda x: x[1], reverse=True)
|
|
125
|
+
# Convert search results to TextualMemoryItem objects
|
|
126
|
+
return [TextualMemoryItem(**memory) for memory, _ in sims[:top_k]]
|
|
127
|
+
|
|
128
|
+
def get(self, memory_id: str) -> TextualMemoryItem:
|
|
129
|
+
"""Get a memory by its ID."""
|
|
130
|
+
for memory in self.memories:
|
|
131
|
+
if memory["id"] == memory_id:
|
|
132
|
+
return TextualMemoryItem(**memory)
|
|
133
|
+
# Return empty memory item if not found
|
|
134
|
+
return TextualMemoryItem(id=memory_id, memory="", metadata=TextualMemoryMetadata())
|
|
135
|
+
|
|
136
|
+
def get_all(self) -> list[TextualMemoryItem]:
|
|
137
|
+
"""Get all memories."""
|
|
138
|
+
return [TextualMemoryItem(**memory) for memory in self.memories]
|
|
139
|
+
|
|
140
|
+
def get_by_ids(self, memory_ids: list[str]) -> list[TextualMemoryItem]:
|
|
141
|
+
"""Get memories by their IDs.
|
|
142
|
+
Args:
|
|
143
|
+
memory_ids (list[str]): List of memory IDs to retrieve.
|
|
144
|
+
Returns:
|
|
145
|
+
list[TextualMemoryItem]: List of memories with the specified IDs.
|
|
146
|
+
"""
|
|
147
|
+
return [self.get(memory_id) for memory_id in memory_ids]
|
|
148
|
+
|
|
149
|
+
def delete(self, memory_ids: list[str]) -> None:
|
|
150
|
+
"""Delete memories.
|
|
151
|
+
Args:
|
|
152
|
+
memory_ids (list[str]): List of memory IDs to delete.
|
|
153
|
+
"""
|
|
154
|
+
self.memories = [m for m in self.memories if m["id"] not in memory_ids]
|
|
155
|
+
|
|
156
|
+
def delete_all(self) -> None:
|
|
157
|
+
"""Delete all memories."""
|
|
158
|
+
self.memories = []
|
|
159
|
+
|
|
160
|
+
def load(self, dir: str) -> None:
|
|
161
|
+
try:
|
|
162
|
+
with open(os.path.join(dir, self.config.memory_filename), encoding="utf-8") as file:
|
|
163
|
+
raw_memories = json.load(file)
|
|
164
|
+
self.add(raw_memories)
|
|
165
|
+
except FileNotFoundError:
|
|
166
|
+
logger.error(f"Directory not found: {dir}")
|
|
167
|
+
except json.JSONDecodeError:
|
|
168
|
+
logger.error(f"Error decoding JSON from file in directory: {dir}")
|
|
169
|
+
except Exception as e:
|
|
170
|
+
logger.error(f"An error occurred while loading memories: {e}")
|
|
171
|
+
|
|
172
|
+
def dump(self, dir: str) -> None:
|
|
173
|
+
try:
|
|
174
|
+
os.makedirs(dir, exist_ok=True)
|
|
175
|
+
memory_file = os.path.join(dir, self.config.memory_filename)
|
|
176
|
+
with open(memory_file, "w", encoding="utf-8") as file:
|
|
177
|
+
json.dump(self.memories, file, indent=4, ensure_ascii=False)
|
|
178
|
+
except Exception as e:
|
|
179
|
+
logger.error(f"An error occurred while dumping memories: {e}")
|
|
180
|
+
raise
|
|
181
|
+
|
|
182
|
+
def drop(
|
|
183
|
+
self,
|
|
184
|
+
) -> None:
|
|
185
|
+
pass
|