MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- memoryos-0.1.13.dist-info/METADATA +288 -0
- memoryos-0.1.13.dist-info/RECORD +122 -0
- memos/__init__.py +20 -1
- memos/api/start_api.py +420 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/factory.py +22 -0
- memos/chunkers/sentence_chunker.py +35 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +45 -0
- memos/configs/embedder.py +53 -0
- memos/configs/graph_db.py +45 -0
- memos/configs/internet_retriever.py +81 -0
- memos/configs/llm.py +71 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +89 -0
- memos/configs/mem_os.py +74 -0
- memos/configs/mem_reader.py +53 -0
- memos/configs/mem_scheduler.py +78 -0
- memos/configs/memory.py +195 -0
- memos/configs/parser.py +38 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +64 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/base.py +15 -0
- memos/embedders/factory.py +23 -0
- memos/embedders/ollama.py +74 -0
- memos/embedders/sentence_transformer.py +40 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +215 -0
- memos/graph_dbs/factory.py +21 -0
- memos/graph_dbs/neo4j.py +827 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +16 -0
- memos/llms/factory.py +25 -0
- memos/llms/hf.py +231 -0
- memos/llms/ollama.py +82 -0
- memos/llms/openai.py +34 -0
- memos/llms/utils.py +14 -0
- memos/log.py +78 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +29 -0
- memos/mem_cube/general.py +146 -0
- memos/mem_cube/utils.py +24 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +819 -0
- memos/mem_os/main.py +503 -0
- memos/mem_os/product.py +89 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +27 -0
- memos/mem_reader/factory.py +21 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/simple_struct.py +241 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/base_scheduler.py +164 -0
- memos/mem_scheduler/general_scheduler.py +305 -0
- memos/mem_scheduler/modules/__init__.py +0 -0
- memos/mem_scheduler/modules/base.py +74 -0
- memos/mem_scheduler/modules/dispatcher.py +103 -0
- memos/mem_scheduler/modules/monitor.py +82 -0
- memos/mem_scheduler/modules/redis_service.py +146 -0
- memos/mem_scheduler/modules/retriever.py +41 -0
- memos/mem_scheduler/modules/schemas.py +146 -0
- memos/mem_scheduler/scheduler_factory.py +21 -0
- memos/mem_scheduler/utils.py +26 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +232 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +34 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +89 -0
- memos/memories/textual/general.py +286 -0
- memos/memories/textual/item.py +167 -0
- memos/memories/textual/naive.py +185 -0
- memos/memories/textual/tree.py +321 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +19 -0
- memos/parsers/markitdown.py +22 -0
- memos/settings.py +8 -0
- memos/templates/__init__.py +0 -0
- memos/templates/mem_reader_prompts.py +98 -0
- memos/templates/mem_scheduler_prompts.py +65 -0
- memos/templates/mos_prompts.py +63 -0
- memos/types.py +55 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +105 -0
- memos/vec_dbs/factory.py +21 -0
- memos/vec_dbs/item.py +43 -0
- memos/vec_dbs/qdrant.py +292 -0
- memoryos-0.0.1.dist-info/METADATA +0 -53
- memoryos-0.0.1.dist-info/RECORD +0 -5
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,321 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import os
|
|
3
|
+
import shutil
|
|
4
|
+
import tempfile
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
from pathlib import Path
|
|
8
|
+
from typing import Any
|
|
9
|
+
|
|
10
|
+
from memos.configs.memory import TreeTextMemoryConfig
|
|
11
|
+
from memos.embedders.factory import EmbedderFactory, OllamaEmbedder
|
|
12
|
+
from memos.graph_dbs.factory import GraphStoreFactory, Neo4jGraphDB
|
|
13
|
+
from memos.llms.factory import LLMFactory, OllamaLLM, OpenAILLM
|
|
14
|
+
from memos.log import get_logger
|
|
15
|
+
from memos.memories.textual.base import BaseTextMemory
|
|
16
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
17
|
+
from memos.memories.textual.tree_text_memory.organize.manager import MemoryManager
|
|
18
|
+
from memos.memories.textual.tree_text_memory.retrieve.internet_retriever_factory import (
|
|
19
|
+
InternetRetrieverFactory,
|
|
20
|
+
)
|
|
21
|
+
from memos.memories.textual.tree_text_memory.retrieve.searcher import Searcher
|
|
22
|
+
from memos.types import MessageList
|
|
23
|
+
|
|
24
|
+
|
|
25
|
+
logger = get_logger(__name__)
|
|
26
|
+
|
|
27
|
+
|
|
28
|
+
class TreeTextMemory(BaseTextMemory):
|
|
29
|
+
"""General textual memory implementation for storing and retrieving memories."""
|
|
30
|
+
|
|
31
|
+
def __init__(self, config: TreeTextMemoryConfig):
|
|
32
|
+
"""Initialize memory with the given configuration."""
|
|
33
|
+
self.config: TreeTextMemoryConfig = config
|
|
34
|
+
self.extractor_llm: OpenAILLM | OllamaLLM = LLMFactory.from_config(config.extractor_llm)
|
|
35
|
+
self.dispatcher_llm: OpenAILLM | OllamaLLM = LLMFactory.from_config(config.dispatcher_llm)
|
|
36
|
+
self.embedder: OllamaEmbedder = EmbedderFactory.from_config(config.embedder)
|
|
37
|
+
self.graph_store: Neo4jGraphDB = GraphStoreFactory.from_config(config.graph_db)
|
|
38
|
+
self.memory_manager: MemoryManager = MemoryManager(self.graph_store, self.embedder)
|
|
39
|
+
|
|
40
|
+
# Create internet retriever if configured
|
|
41
|
+
self.internet_retriever = None
|
|
42
|
+
if config.internet_retriever is not None:
|
|
43
|
+
self.internet_retriever = InternetRetrieverFactory.from_config(
|
|
44
|
+
config.internet_retriever, self.embedder
|
|
45
|
+
)
|
|
46
|
+
logger.info(
|
|
47
|
+
f"Internet retriever initialized with backend: {config.internet_retriever.backend}"
|
|
48
|
+
)
|
|
49
|
+
else:
|
|
50
|
+
logger.info("No internet retriever configured")
|
|
51
|
+
|
|
52
|
+
def add(self, memories: list[TextualMemoryItem | dict[str, Any]]) -> None:
|
|
53
|
+
"""Add memories.
|
|
54
|
+
Args:
|
|
55
|
+
memories: List of TextualMemoryItem objects or dictionaries to add.
|
|
56
|
+
Later:
|
|
57
|
+
memory_items = [TextualMemoryItem(**m) if isinstance(m, dict) else m for m in memories]
|
|
58
|
+
metadata = extract_metadata(memory_items, self.extractor_llm)
|
|
59
|
+
plan = plan_memory_operations(memory_items, metadata, self.graph_store)
|
|
60
|
+
execute_plan(memory_items, metadata, plan, self.graph_store)
|
|
61
|
+
"""
|
|
62
|
+
self.memory_manager.add(memories)
|
|
63
|
+
|
|
64
|
+
def replace_working_memory(self, memories: list[TextualMemoryItem]) -> None:
|
|
65
|
+
self.memory_manager.replace_working_memory(memories)
|
|
66
|
+
|
|
67
|
+
def get_working_memory(self) -> list[TextualMemoryItem]:
|
|
68
|
+
working_memories = self.graph_store.get_all_memory_items(scope="WorkingMemory")
|
|
69
|
+
items = [TextualMemoryItem.from_dict(record) for record in (working_memories)]
|
|
70
|
+
# Sort by updated_at in descending order
|
|
71
|
+
sorted_items = sorted(
|
|
72
|
+
items, key=lambda x: x.metadata.updated_at or datetime.min, reverse=True
|
|
73
|
+
)
|
|
74
|
+
return sorted_items
|
|
75
|
+
|
|
76
|
+
def get_current_memory_size(self) -> dict[str, int]:
|
|
77
|
+
"""
|
|
78
|
+
Get the current size of each memory type.
|
|
79
|
+
This delegates to the MemoryManager.
|
|
80
|
+
"""
|
|
81
|
+
return self.memory_manager.get_current_memory_size()
|
|
82
|
+
|
|
83
|
+
def search(
|
|
84
|
+
self,
|
|
85
|
+
query: str,
|
|
86
|
+
top_k: int,
|
|
87
|
+
info=None,
|
|
88
|
+
mode: str = "fast",
|
|
89
|
+
memory_type: str = "All",
|
|
90
|
+
manual_close_internet: bool = False,
|
|
91
|
+
) -> list[TextualMemoryItem]:
|
|
92
|
+
"""Search for memories based on a query.
|
|
93
|
+
User query -> TaskGoalParser -> MemoryPathResolver ->
|
|
94
|
+
GraphMemoryRetriever -> MemoryReranker -> MemoryReasoner -> Final output
|
|
95
|
+
Args:
|
|
96
|
+
query (str): The query to search for.
|
|
97
|
+
top_k (int): The number of top results to return.
|
|
98
|
+
info (dict): Leave a record of memory consumption.
|
|
99
|
+
mode (str, optional): The mode of the search.
|
|
100
|
+
- 'fast': Uses a faster search process, sacrificing some precision for speed.
|
|
101
|
+
- 'fine': Uses a more detailed search process, invoking large models for higher precision, but slower performance.
|
|
102
|
+
memory_type (str): Type restriction for search.
|
|
103
|
+
['All', 'WorkingMemory', 'LongTermMemory', 'UserMemory']
|
|
104
|
+
manual_close_internet (bool): If True, the internet retriever will be closed by this search, it high priority than config.
|
|
105
|
+
Returns:
|
|
106
|
+
list[TextualMemoryItem]: List of matching memories.
|
|
107
|
+
"""
|
|
108
|
+
if (self.internet_retriever is not None) and manual_close_internet:
|
|
109
|
+
logger.warning(
|
|
110
|
+
"Internet retriever is init by config , but this search set manual_close_internet is True and will close it"
|
|
111
|
+
)
|
|
112
|
+
self.internet_retriever = None
|
|
113
|
+
searcher = Searcher(
|
|
114
|
+
self.dispatcher_llm,
|
|
115
|
+
self.graph_store,
|
|
116
|
+
self.embedder,
|
|
117
|
+
internet_retriever=self.internet_retriever,
|
|
118
|
+
)
|
|
119
|
+
return searcher.search(query, top_k, info, mode, memory_type)
|
|
120
|
+
|
|
121
|
+
def get_relevant_subgraph(
|
|
122
|
+
self, query: str, top_k: int = 5, depth: int = 2, center_status: str = "activated"
|
|
123
|
+
) -> dict[str, Any]:
|
|
124
|
+
"""
|
|
125
|
+
Find and merge the local neighborhood sub-graphs of the top-k
|
|
126
|
+
nodes most relevant to the query.
|
|
127
|
+
Process:
|
|
128
|
+
1. Embed the user query into a vector representation.
|
|
129
|
+
2. Use vector similarity search to find the top-k similar nodes.
|
|
130
|
+
3. For each similar node:
|
|
131
|
+
- Ensure its status matches `center_status` (e.g., 'active').
|
|
132
|
+
- Retrieve its local subgraph up to `depth` hops.
|
|
133
|
+
- Collect the center node, its neighbors, and connecting edges.
|
|
134
|
+
4. Merge all retrieved subgraphs into a single unified subgraph.
|
|
135
|
+
5. Return the merged subgraph structure.
|
|
136
|
+
|
|
137
|
+
Args:
|
|
138
|
+
query (str): The user input or concept to find relevant memories for.
|
|
139
|
+
top_k (int, optional): How many top similar nodes to retrieve. Default is 5.
|
|
140
|
+
depth (int, optional): The neighborhood depth (number of hops). Default is 2.
|
|
141
|
+
center_status (str, optional): Status condition the center node must satisfy (e.g., 'active').
|
|
142
|
+
|
|
143
|
+
Returns:
|
|
144
|
+
dict[str, Any]: A subgraph dict with:
|
|
145
|
+
- 'core_id': ID of the top matching core node, or None if none found.
|
|
146
|
+
- 'nodes': List of unique nodes (core + neighbors) in the merged subgraph.
|
|
147
|
+
- 'edges': List of unique edges (as dicts with 'from', 'to', 'type') in the merged subgraph.
|
|
148
|
+
"""
|
|
149
|
+
# Step 1: Embed query
|
|
150
|
+
query_embedding = self.embedder.embed([query])[0]
|
|
151
|
+
|
|
152
|
+
# Step 2: Get top-1 similar node
|
|
153
|
+
similar_nodes = self.graph_store.search_by_embedding(query_embedding, top_k=top_k)
|
|
154
|
+
if not similar_nodes:
|
|
155
|
+
logger.info("No similar nodes found for query embedding.")
|
|
156
|
+
return {"core_id": None, "nodes": [], "edges": []}
|
|
157
|
+
|
|
158
|
+
# Step 3: Fetch neighborhood
|
|
159
|
+
all_nodes = {}
|
|
160
|
+
all_edges = set()
|
|
161
|
+
cores = []
|
|
162
|
+
|
|
163
|
+
for node in similar_nodes:
|
|
164
|
+
core_id = node["id"]
|
|
165
|
+
score = node["score"]
|
|
166
|
+
|
|
167
|
+
subgraph = self.graph_store.get_subgraph(
|
|
168
|
+
center_id=core_id, depth=depth, center_status=center_status
|
|
169
|
+
)
|
|
170
|
+
|
|
171
|
+
if not subgraph["core_node"]:
|
|
172
|
+
logger.info(f"Skipping node {core_id} (inactive or not found).")
|
|
173
|
+
continue
|
|
174
|
+
|
|
175
|
+
core_node = subgraph["core_node"]
|
|
176
|
+
neighbors = subgraph["neighbors"]
|
|
177
|
+
edges = subgraph["edges"]
|
|
178
|
+
|
|
179
|
+
# Collect nodes
|
|
180
|
+
all_nodes[core_node["id"]] = core_node
|
|
181
|
+
for n in neighbors:
|
|
182
|
+
all_nodes[n["id"]] = n
|
|
183
|
+
|
|
184
|
+
# Collect edges
|
|
185
|
+
for e in edges:
|
|
186
|
+
all_edges.add((e["source"], e["target"], e["type"]))
|
|
187
|
+
|
|
188
|
+
cores.append(
|
|
189
|
+
{"id": core_id, "score": score, "core_node": core_node, "neighbors": neighbors}
|
|
190
|
+
)
|
|
191
|
+
|
|
192
|
+
top_core = cores[0]
|
|
193
|
+
return {
|
|
194
|
+
"core_id": top_core["id"],
|
|
195
|
+
"nodes": list(all_nodes.values()),
|
|
196
|
+
"edges": [{"source": f, "target": t, "type": ty} for (f, t, ty) in all_edges],
|
|
197
|
+
}
|
|
198
|
+
|
|
199
|
+
def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
|
|
200
|
+
raise NotImplementedError
|
|
201
|
+
|
|
202
|
+
def update(self, memory_id: str, new_memory: TextualMemoryItem | dict[str, Any]) -> None:
|
|
203
|
+
raise NotImplementedError
|
|
204
|
+
|
|
205
|
+
def get(self, memory_id: str) -> TextualMemoryItem:
|
|
206
|
+
"""Get a memory by its ID."""
|
|
207
|
+
result = self.graph_store.get_node(memory_id)
|
|
208
|
+
if result is None:
|
|
209
|
+
raise ValueError(f"Memory with ID {memory_id} not found")
|
|
210
|
+
metadata_dict = result.get("metadata", {})
|
|
211
|
+
return TextualMemoryItem(
|
|
212
|
+
id=result["id"],
|
|
213
|
+
memory=result["memory"],
|
|
214
|
+
metadata=TreeNodeTextualMemoryMetadata(**metadata_dict),
|
|
215
|
+
)
|
|
216
|
+
|
|
217
|
+
def get_by_ids(self, memory_ids: list[str]) -> list[TextualMemoryItem]:
|
|
218
|
+
raise NotImplementedError
|
|
219
|
+
|
|
220
|
+
def get_all(self) -> dict:
|
|
221
|
+
"""Get all memories.
|
|
222
|
+
Returns:
|
|
223
|
+
list[TextualMemoryItem]: List of all memories.
|
|
224
|
+
"""
|
|
225
|
+
all_items = self.graph_store.export_graph()
|
|
226
|
+
return all_items
|
|
227
|
+
|
|
228
|
+
def delete(self, memory_ids: list[str]) -> None:
|
|
229
|
+
raise NotImplementedError
|
|
230
|
+
|
|
231
|
+
def delete_all(self) -> None:
|
|
232
|
+
"""Delete all memories and their relationships from the graph store."""
|
|
233
|
+
try:
|
|
234
|
+
self.graph_store.clear()
|
|
235
|
+
logger.info("All memories and edges have been deleted from the graph.")
|
|
236
|
+
except Exception as e:
|
|
237
|
+
logger.error(f"An error occurred while deleting all memories: {e}")
|
|
238
|
+
raise
|
|
239
|
+
|
|
240
|
+
def load(self, dir: str) -> None:
|
|
241
|
+
try:
|
|
242
|
+
memory_file = os.path.join(dir, self.config.memory_filename)
|
|
243
|
+
|
|
244
|
+
if not os.path.exists(memory_file):
|
|
245
|
+
logger.warning(f"Memory file not found: {memory_file}")
|
|
246
|
+
return
|
|
247
|
+
|
|
248
|
+
with open(memory_file, encoding="utf-8") as f:
|
|
249
|
+
memories = json.load(f)
|
|
250
|
+
|
|
251
|
+
self.graph_store.import_graph(memories)
|
|
252
|
+
logger.info(f"Loaded {len(memories)} memories from {memory_file}")
|
|
253
|
+
|
|
254
|
+
except FileNotFoundError:
|
|
255
|
+
logger.error(f"Memory file not found in directory: {dir}")
|
|
256
|
+
except json.JSONDecodeError as e:
|
|
257
|
+
logger.error(f"Error decoding JSON from memory file: {e}")
|
|
258
|
+
except Exception as e:
|
|
259
|
+
logger.error(f"An error occurred while loading memories: {e}")
|
|
260
|
+
|
|
261
|
+
def dump(self, dir: str) -> None:
|
|
262
|
+
"""Dump memories to os.path.join(dir, self.config.memory_filename)"""
|
|
263
|
+
try:
|
|
264
|
+
json_memories = self.graph_store.export_graph()
|
|
265
|
+
|
|
266
|
+
os.makedirs(dir, exist_ok=True)
|
|
267
|
+
memory_file = os.path.join(dir, self.config.memory_filename)
|
|
268
|
+
with open(memory_file, "w", encoding="utf-8") as f:
|
|
269
|
+
json.dump(json_memories, f, indent=4, ensure_ascii=False)
|
|
270
|
+
|
|
271
|
+
logger.info(f"Dumped {len(json_memories.get('nodes'))} memories to {memory_file}")
|
|
272
|
+
|
|
273
|
+
except Exception as e:
|
|
274
|
+
logger.error(f"An error occurred while dumping memories: {e}")
|
|
275
|
+
raise
|
|
276
|
+
|
|
277
|
+
def drop(self, keep_last_n: int = 30) -> None:
|
|
278
|
+
"""
|
|
279
|
+
Export all memory data to a versioned backup dir and drop the Neo4j database.
|
|
280
|
+
Only the latest `keep_last_n` backups will be retained.
|
|
281
|
+
"""
|
|
282
|
+
try:
|
|
283
|
+
backup_root = Path(tempfile.gettempdir()) / "memos_backups"
|
|
284
|
+
backup_root.mkdir(parents=True, exist_ok=True)
|
|
285
|
+
|
|
286
|
+
timestamp = datetime.now().strftime("%Y%m%d_%H%M%S")
|
|
287
|
+
backup_dir = backup_root / f"memos_backup_{timestamp}"
|
|
288
|
+
backup_dir.mkdir()
|
|
289
|
+
|
|
290
|
+
logger.info(f"Exporting memory to backup dir: {backup_dir}")
|
|
291
|
+
self.dump(str(backup_dir))
|
|
292
|
+
|
|
293
|
+
# Clean up old backups
|
|
294
|
+
self._cleanup_old_backups(backup_root, keep_last_n)
|
|
295
|
+
|
|
296
|
+
self.graph_store.drop_database()
|
|
297
|
+
logger.info(f"Database '{self.graph_store.db_name}' dropped after backup.")
|
|
298
|
+
|
|
299
|
+
except Exception as e:
|
|
300
|
+
logger.error(f"Error in drop(): {e}")
|
|
301
|
+
raise
|
|
302
|
+
|
|
303
|
+
@staticmethod
|
|
304
|
+
def _cleanup_old_backups(root_dir: Path, keep_last_n: int) -> None:
|
|
305
|
+
"""
|
|
306
|
+
Keep only the latest `keep_last_n` backup directories under `root_dir`.
|
|
307
|
+
Older ones will be deleted.
|
|
308
|
+
"""
|
|
309
|
+
backups = sorted(
|
|
310
|
+
[d for d in root_dir.iterdir() if d.is_dir() and d.name.startswith("memos_backup_")],
|
|
311
|
+
key=lambda p: p.name, # name includes timestamp
|
|
312
|
+
reverse=True,
|
|
313
|
+
)
|
|
314
|
+
|
|
315
|
+
to_delete = backups[keep_last_n:]
|
|
316
|
+
for old_dir in to_delete:
|
|
317
|
+
try:
|
|
318
|
+
shutil.rmtree(old_dir)
|
|
319
|
+
logger.info(f"Deleted old backup directory: {old_dir}")
|
|
320
|
+
except Exception as e:
|
|
321
|
+
logger.warning(f"Failed to delete backup {old_dir}: {e}")
|
|
File without changes
|
|
File without changes
|
|
@@ -0,0 +1,305 @@
|
|
|
1
|
+
import uuid
|
|
2
|
+
|
|
3
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
7
|
+
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
8
|
+
from memos.log import get_logger
|
|
9
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
logger = get_logger(__name__)
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
class MemoryManager:
|
|
16
|
+
def __init__(
|
|
17
|
+
self,
|
|
18
|
+
graph_store: Neo4jGraphDB,
|
|
19
|
+
embedder: OllamaEmbedder,
|
|
20
|
+
memory_size: dict | None = None,
|
|
21
|
+
threshold: float | None = 0.80,
|
|
22
|
+
merged_threshold: float | None = 0.92,
|
|
23
|
+
):
|
|
24
|
+
self.graph_store = graph_store
|
|
25
|
+
self.embedder = embedder
|
|
26
|
+
self.memory_size = memory_size
|
|
27
|
+
self.current_memory_size = {
|
|
28
|
+
"WorkingMemory": 0,
|
|
29
|
+
"LongTermMemory": 0,
|
|
30
|
+
"UserMemory": 0,
|
|
31
|
+
}
|
|
32
|
+
if not memory_size:
|
|
33
|
+
self.memory_size = {
|
|
34
|
+
"WorkingMemory": 20,
|
|
35
|
+
"LongTermMemory": 10000,
|
|
36
|
+
"UserMemory": 10000,
|
|
37
|
+
}
|
|
38
|
+
self._threshold = threshold
|
|
39
|
+
self._merged_threshold = merged_threshold
|
|
40
|
+
|
|
41
|
+
def add(self, memories: list[TextualMemoryItem]) -> None:
|
|
42
|
+
"""
|
|
43
|
+
Add new memories in parallel to different memory types (WorkingMemory, LongTermMemory, UserMemory).
|
|
44
|
+
"""
|
|
45
|
+
with ThreadPoolExecutor(max_workers=8) as executor:
|
|
46
|
+
futures = [executor.submit(self._process_memory, memory) for memory in memories]
|
|
47
|
+
for future in as_completed(futures):
|
|
48
|
+
try:
|
|
49
|
+
future.result()
|
|
50
|
+
except Exception as e:
|
|
51
|
+
logger.exception("Memory processing error: ", exc_info=e)
|
|
52
|
+
|
|
53
|
+
self.graph_store.remove_oldest_memory(
|
|
54
|
+
memory_type="WorkingMemory", keep_latest=self.memory_size["WorkingMemory"]
|
|
55
|
+
)
|
|
56
|
+
self.graph_store.remove_oldest_memory(
|
|
57
|
+
memory_type="LongTermMemory", keep_latest=self.memory_size["LongTermMemory"]
|
|
58
|
+
)
|
|
59
|
+
self.graph_store.remove_oldest_memory(
|
|
60
|
+
memory_type="UserMemory", keep_latest=self.memory_size["UserMemory"]
|
|
61
|
+
)
|
|
62
|
+
|
|
63
|
+
self._refresh_memory_size()
|
|
64
|
+
|
|
65
|
+
def replace_working_memory(self, memories: list[TextualMemoryItem]) -> None:
|
|
66
|
+
"""
|
|
67
|
+
Replace WorkingMemory
|
|
68
|
+
"""
|
|
69
|
+
working_memory_top_k = memories[: self.memory_size["WorkingMemory"]]
|
|
70
|
+
with ThreadPoolExecutor(max_workers=8) as executor:
|
|
71
|
+
futures = [
|
|
72
|
+
executor.submit(self._add_memory_to_db, memory, "WorkingMemory")
|
|
73
|
+
for memory in working_memory_top_k
|
|
74
|
+
]
|
|
75
|
+
for future in as_completed(futures):
|
|
76
|
+
try:
|
|
77
|
+
future.result()
|
|
78
|
+
except Exception as e:
|
|
79
|
+
logger.exception("Memory processing error: ", exc_info=e)
|
|
80
|
+
|
|
81
|
+
self.graph_store.remove_oldest_memory(
|
|
82
|
+
memory_type="WorkingMemory", keep_latest=self.memory_size["WorkingMemory"]
|
|
83
|
+
)
|
|
84
|
+
self._refresh_memory_size()
|
|
85
|
+
|
|
86
|
+
def get_current_memory_size(self) -> dict[str, int]:
|
|
87
|
+
"""
|
|
88
|
+
Return the cached memory type counts.
|
|
89
|
+
"""
|
|
90
|
+
return self.current_memory_size
|
|
91
|
+
|
|
92
|
+
def _refresh_memory_size(self) -> None:
|
|
93
|
+
"""
|
|
94
|
+
Query the latest counts from the graph store and update internal state.
|
|
95
|
+
"""
|
|
96
|
+
results = self.graph_store.get_grouped_counts(group_fields=["memory_type"])
|
|
97
|
+
self.current_memory_size = {record["memory_type"]: record["count"] for record in results}
|
|
98
|
+
logger.info(f"[MemoryManager] Refreshed memory sizes: {self.current_memory_size}")
|
|
99
|
+
|
|
100
|
+
def _process_memory(self, memory: TextualMemoryItem):
|
|
101
|
+
"""
|
|
102
|
+
Process and add memory to different memory types (WorkingMemory, LongTermMemory, UserMemory).
|
|
103
|
+
This method runs asynchronously to process each memory item.
|
|
104
|
+
"""
|
|
105
|
+
# Add to WorkingMemory
|
|
106
|
+
self._add_memory_to_db(memory, "WorkingMemory")
|
|
107
|
+
|
|
108
|
+
# Add to LongTermMemory and UserMemory
|
|
109
|
+
if memory.metadata.memory_type in ["LongTermMemory", "UserMemory"]:
|
|
110
|
+
self._add_to_graph_memory(
|
|
111
|
+
memory=memory,
|
|
112
|
+
memory_type=memory.metadata.memory_type,
|
|
113
|
+
)
|
|
114
|
+
|
|
115
|
+
def _add_memory_to_db(self, memory: TextualMemoryItem, memory_type: str):
|
|
116
|
+
"""
|
|
117
|
+
Add a single memory item to the graph store, with FIFO logic for WorkingMemory.
|
|
118
|
+
"""
|
|
119
|
+
metadata = memory.metadata.model_copy(update={"memory_type": memory_type}).model_dump(
|
|
120
|
+
exclude_none=True
|
|
121
|
+
)
|
|
122
|
+
metadata["updated_at"] = datetime.now().isoformat()
|
|
123
|
+
working_memory = TextualMemoryItem(memory=memory.memory, metadata=metadata)
|
|
124
|
+
|
|
125
|
+
# Insert node into graph
|
|
126
|
+
self.graph_store.add_node(working_memory.id, working_memory.memory, metadata)
|
|
127
|
+
|
|
128
|
+
def _add_to_graph_memory(self, memory: TextualMemoryItem, memory_type: str):
|
|
129
|
+
"""
|
|
130
|
+
Generalized method to add memory to a graph-based memory type (e.g., LongTermMemory, UserMemory).
|
|
131
|
+
|
|
132
|
+
Parameters:
|
|
133
|
+
- memory: memory item to insert
|
|
134
|
+
- memory_type: "LongTermMemory" | "UserMemory"
|
|
135
|
+
- similarity_threshold: deduplication threshold
|
|
136
|
+
- topic_summary_prefix: summary node id prefix if applicable
|
|
137
|
+
- enable_summary_link: whether to auto-link to a summary node
|
|
138
|
+
"""
|
|
139
|
+
embedding = memory.metadata.embedding
|
|
140
|
+
|
|
141
|
+
# Step 1: Find similar nodes for possible merging
|
|
142
|
+
similar_nodes = self.graph_store.search_by_embedding(
|
|
143
|
+
vector=embedding,
|
|
144
|
+
top_k=3,
|
|
145
|
+
scope=memory_type,
|
|
146
|
+
threshold=self._threshold,
|
|
147
|
+
status="activated",
|
|
148
|
+
)
|
|
149
|
+
|
|
150
|
+
if similar_nodes and similar_nodes[0]["score"] > self._merged_threshold:
|
|
151
|
+
self._merge(memory, similar_nodes)
|
|
152
|
+
else:
|
|
153
|
+
node_id = str(uuid.uuid4())
|
|
154
|
+
# Step 2: Add new node to graph
|
|
155
|
+
self.graph_store.add_node(
|
|
156
|
+
node_id, memory.memory, memory.metadata.model_dump(exclude_none=True)
|
|
157
|
+
)
|
|
158
|
+
|
|
159
|
+
# Step 3: Optionally link to a summary node based on topic
|
|
160
|
+
if memory.metadata.tags:
|
|
161
|
+
parent_id = self._ensure_structure_path(
|
|
162
|
+
memory_type=memory_type, metadata=memory.metadata
|
|
163
|
+
)
|
|
164
|
+
if parent_id:
|
|
165
|
+
self.graph_store.add_edge(parent_id, node_id, "PARENT")
|
|
166
|
+
|
|
167
|
+
def _merge(self, source_node: TextualMemoryItem, similar_nodes: list[dict]) -> None:
|
|
168
|
+
"""
|
|
169
|
+
TODO: Add node traceability support by optionally preserving source nodes and linking them with MERGED_FROM edges.
|
|
170
|
+
|
|
171
|
+
Merge the source memory into the most similar existing node (only one),
|
|
172
|
+
and establish a MERGED_FROM edge in the graph.
|
|
173
|
+
|
|
174
|
+
Parameters:
|
|
175
|
+
source_node: The new memory item (not yet in the graph)
|
|
176
|
+
similar_nodes: A list of dicts returned by search_by_embedding(), ordered by similarity
|
|
177
|
+
"""
|
|
178
|
+
original_node = similar_nodes[0]
|
|
179
|
+
original_id = original_node["id"]
|
|
180
|
+
original_data = self.graph_store.get_node(original_id)
|
|
181
|
+
|
|
182
|
+
target_text = original_data.get("memory", "")
|
|
183
|
+
merged_text = f"{target_text}\n⟵MERGED⟶\n{source_node.memory}"
|
|
184
|
+
|
|
185
|
+
original_meta = TreeNodeTextualMemoryMetadata(**original_data["metadata"])
|
|
186
|
+
source_meta = source_node.metadata
|
|
187
|
+
|
|
188
|
+
merged_key = source_meta.key or original_meta.key
|
|
189
|
+
merged_tags = list(set((original_meta.tags or []) + (source_meta.tags or [])))
|
|
190
|
+
merged_sources = list(set((original_meta.sources or []) + (source_meta.sources or [])))
|
|
191
|
+
merged_background = f"{original_meta.background}\n⟵MERGED⟶\n{source_meta.background}"
|
|
192
|
+
merged_embedding = self.embedder.embed([merged_text])[0]
|
|
193
|
+
|
|
194
|
+
merged_confidence = float((original_meta.confidence + source_meta.confidence) / 2)
|
|
195
|
+
merged_usage = list(set((original_meta.usage or []) + (source_meta.usage or [])))
|
|
196
|
+
|
|
197
|
+
# Create new merged node
|
|
198
|
+
merged_id = str(uuid.uuid4())
|
|
199
|
+
merged_metadata = source_meta.model_copy(
|
|
200
|
+
update={
|
|
201
|
+
"embedding": merged_embedding,
|
|
202
|
+
"updated_at": datetime.now().isoformat(),
|
|
203
|
+
"key": merged_key,
|
|
204
|
+
"tags": merged_tags,
|
|
205
|
+
"sources": merged_sources,
|
|
206
|
+
"background": merged_background,
|
|
207
|
+
"confidence": merged_confidence,
|
|
208
|
+
"usage": merged_usage,
|
|
209
|
+
}
|
|
210
|
+
)
|
|
211
|
+
|
|
212
|
+
self.graph_store.add_node(
|
|
213
|
+
merged_id, merged_text, merged_metadata.model_dump(exclude_none=True)
|
|
214
|
+
)
|
|
215
|
+
|
|
216
|
+
# Add traceability edges: both original and new point to merged node
|
|
217
|
+
self.graph_store.add_edge(original_id, merged_id, type="MERGED_TO")
|
|
218
|
+
self.graph_store.update_node(original_id, {"status": "archived"})
|
|
219
|
+
source_id = str(uuid.uuid4())
|
|
220
|
+
source_metadata = source_node.metadata.model_copy(update={"status": "archived"})
|
|
221
|
+
self.graph_store.add_node(source_id, source_node.memory, source_metadata.model_dump())
|
|
222
|
+
self.graph_store.add_edge(source_id, merged_id, type="MERGED_TO")
|
|
223
|
+
# After creating merged node and tracing lineage
|
|
224
|
+
self._inherit_edges(original_id, merged_id)
|
|
225
|
+
|
|
226
|
+
# Relate other similar nodes to merged if needed
|
|
227
|
+
for related_node in similar_nodes[1:]:
|
|
228
|
+
if not self.graph_store.edge_exists(
|
|
229
|
+
merged_id, related_node["id"], type="ANY", direction="ANY"
|
|
230
|
+
):
|
|
231
|
+
self.graph_store.add_edge(merged_id, related_node["id"], type="RELATE")
|
|
232
|
+
|
|
233
|
+
def _inherit_edges(self, from_id: str, to_id: str) -> None:
|
|
234
|
+
"""
|
|
235
|
+
Migrate all non-lineage edges from `from_id` to `to_id`,
|
|
236
|
+
and remove them from `from_id` after copying.
|
|
237
|
+
"""
|
|
238
|
+
edges = self.graph_store.get_edges(from_id, type="ANY", direction="ANY")
|
|
239
|
+
|
|
240
|
+
for edge in edges:
|
|
241
|
+
if edge["type"] == "MERGED_TO":
|
|
242
|
+
continue # Keep lineage edges
|
|
243
|
+
|
|
244
|
+
new_from = to_id if edge["from"] == from_id else edge["from"]
|
|
245
|
+
new_to = to_id if edge["to"] == from_id else edge["to"]
|
|
246
|
+
|
|
247
|
+
if new_from == new_to:
|
|
248
|
+
continue
|
|
249
|
+
|
|
250
|
+
# Add edge to merged node if it doesn't already exist
|
|
251
|
+
if not self.graph_store.edge_exists(new_from, new_to, edge["type"], direction="ANY"):
|
|
252
|
+
self.graph_store.add_edge(new_from, new_to, edge["type"])
|
|
253
|
+
|
|
254
|
+
# Remove original edge if it involved the archived node
|
|
255
|
+
self.graph_store.delete_edge(edge["from"], edge["to"], edge["type"])
|
|
256
|
+
|
|
257
|
+
def _ensure_structure_path(
|
|
258
|
+
self, memory_type: str, metadata: TreeNodeTextualMemoryMetadata
|
|
259
|
+
) -> str:
|
|
260
|
+
"""
|
|
261
|
+
Ensure structural path exists (ROOT → ... → final node), return last node ID.
|
|
262
|
+
|
|
263
|
+
Args:
|
|
264
|
+
path: like ["hobby", "photography"]
|
|
265
|
+
|
|
266
|
+
Returns:
|
|
267
|
+
Final node ID of the structure path.
|
|
268
|
+
"""
|
|
269
|
+
# Step 1: Try to find an existing memory node with content == tag
|
|
270
|
+
existing = self.graph_store.get_by_metadata(
|
|
271
|
+
[
|
|
272
|
+
{"field": "memory", "op": "=", "value": metadata.key},
|
|
273
|
+
{"field": "memory_type", "op": "=", "value": memory_type},
|
|
274
|
+
]
|
|
275
|
+
)
|
|
276
|
+
if existing:
|
|
277
|
+
node_id = existing[0] # Use the first match
|
|
278
|
+
else:
|
|
279
|
+
# Step 2: If not found, create a new structure node
|
|
280
|
+
new_node = TextualMemoryItem(
|
|
281
|
+
memory=metadata.key,
|
|
282
|
+
metadata=TreeNodeTextualMemoryMetadata(
|
|
283
|
+
user_id=metadata.user_id,
|
|
284
|
+
session_id=metadata.session_id,
|
|
285
|
+
memory_type=memory_type,
|
|
286
|
+
status="activated",
|
|
287
|
+
tags=[],
|
|
288
|
+
key=metadata.key,
|
|
289
|
+
embedding=self.embedder.embed([metadata.key])[0],
|
|
290
|
+
usage=[],
|
|
291
|
+
sources=[],
|
|
292
|
+
confidence=0.99,
|
|
293
|
+
background="",
|
|
294
|
+
),
|
|
295
|
+
)
|
|
296
|
+
|
|
297
|
+
self.graph_store.add_node(
|
|
298
|
+
id=new_node.id,
|
|
299
|
+
memory=new_node.memory,
|
|
300
|
+
metadata=new_node.metadata.model_dump(exclude_none=True),
|
|
301
|
+
)
|
|
302
|
+
node_id = new_node.id
|
|
303
|
+
|
|
304
|
+
# Step 3: Return this structure node ID as the parent_id
|
|
305
|
+
return node_id
|
|
File without changes
|