MemoryOS 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/METADATA +6 -1
- {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/RECORD +61 -55
- memos/__init__.py +1 -1
- memos/api/config.py +6 -8
- memos/api/context/context.py +1 -1
- memos/api/context/dependencies.py +11 -0
- memos/configs/internet_retriever.py +13 -0
- memos/configs/mem_scheduler.py +38 -16
- memos/graph_dbs/base.py +30 -3
- memos/graph_dbs/nebular.py +442 -194
- memos/graph_dbs/neo4j.py +14 -5
- memos/log.py +5 -0
- memos/mem_os/core.py +19 -9
- memos/mem_os/main.py +1 -1
- memos/mem_os/product.py +6 -69
- memos/mem_os/utils/default_config.py +1 -1
- memos/mem_os/utils/format_utils.py +11 -47
- memos/mem_os/utils/reference_utils.py +133 -0
- memos/mem_scheduler/base_scheduler.py +58 -55
- memos/mem_scheduler/{modules → general_modules}/base.py +1 -2
- memos/mem_scheduler/{modules → general_modules}/dispatcher.py +54 -15
- memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +4 -4
- memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
- memos/mem_scheduler/{modules → general_modules}/retriever.py +19 -5
- memos/mem_scheduler/{modules → general_modules}/scheduler_logger.py +10 -4
- memos/mem_scheduler/general_scheduler.py +110 -67
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
- memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +57 -19
- memos/mem_scheduler/mos_for_test_scheduler.py +7 -1
- memos/mem_scheduler/schemas/general_schemas.py +3 -2
- memos/mem_scheduler/schemas/message_schemas.py +2 -1
- memos/mem_scheduler/schemas/monitor_schemas.py +10 -2
- memos/mem_scheduler/utils/misc_utils.py +43 -2
- memos/memories/activation/item.py +1 -1
- memos/memories/activation/kv.py +20 -8
- memos/memories/textual/base.py +1 -1
- memos/memories/textual/general.py +1 -1
- memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +30 -48
- memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +2 -0
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +102 -140
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +9 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +177 -125
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +7 -2
- memos/memories/textual/tree_text_memory/retrieve/utils.py +1 -1
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/thread_safe_dict.py +288 -0
- memos/templates/mem_reader_prompts.py +2 -0
- memos/templates/mem_scheduler_prompts.py +23 -10
- memos/templates/mos_prompts.py +40 -11
- memos/templates/tree_reorganize_prompts.py +24 -17
- memos/utils.py +19 -0
- memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
- {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
- {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
- {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
- /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
- /memos/mem_scheduler/{modules → general_modules}/misc.py +0 -0
|
@@ -44,16 +44,23 @@ class GraphMemoryRetriever:
|
|
|
44
44
|
|
|
45
45
|
if memory_scope == "WorkingMemory":
|
|
46
46
|
# For working memory, retrieve all entries (no filtering)
|
|
47
|
-
working_memories = self.graph_store.get_all_memory_items(
|
|
47
|
+
working_memories = self.graph_store.get_all_memory_items(
|
|
48
|
+
scope="WorkingMemory", include_embedding=True
|
|
49
|
+
)
|
|
48
50
|
return [TextualMemoryItem.from_dict(record) for record in working_memories]
|
|
49
51
|
|
|
50
|
-
|
|
51
|
-
|
|
52
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
|
53
|
+
# Structured graph-based retrieval
|
|
54
|
+
future_graph = executor.submit(self._graph_recall, parsed_goal, memory_scope)
|
|
55
|
+
# Vector similarity search
|
|
56
|
+
future_vector = executor.submit(
|
|
57
|
+
self._vector_recall, query_embedding, memory_scope, top_k
|
|
58
|
+
)
|
|
52
59
|
|
|
53
|
-
|
|
54
|
-
|
|
60
|
+
graph_results = future_graph.result()
|
|
61
|
+
vector_results = future_vector.result()
|
|
55
62
|
|
|
56
|
-
#
|
|
63
|
+
# Merge and deduplicate by ID
|
|
57
64
|
combined = {item.id: item for item in graph_results + vector_results}
|
|
58
65
|
|
|
59
66
|
graph_ids = {item.id for item in graph_results}
|
|
@@ -101,7 +108,7 @@ class GraphMemoryRetriever:
|
|
|
101
108
|
return []
|
|
102
109
|
|
|
103
110
|
# Load nodes and post-filter
|
|
104
|
-
node_dicts = self.graph_store.get_nodes(list(candidate_ids))
|
|
111
|
+
node_dicts = self.graph_store.get_nodes(list(candidate_ids), include_embedding=True)
|
|
105
112
|
|
|
106
113
|
final_nodes = []
|
|
107
114
|
for node in node_dicts:
|
|
@@ -152,6 +159,6 @@ class GraphMemoryRetriever:
|
|
|
152
159
|
|
|
153
160
|
# Step 3: Extract matched IDs and retrieve full nodes
|
|
154
161
|
unique_ids = set({r["id"] for r in all_matches})
|
|
155
|
-
node_dicts = self.graph_store.get_nodes(list(unique_ids))
|
|
162
|
+
node_dicts = self.graph_store.get_nodes(list(unique_ids), include_embedding=True)
|
|
156
163
|
|
|
157
164
|
return [TextualMemoryItem.from_dict(record) for record in node_dicts]
|
|
@@ -78,7 +78,7 @@ class MemoryReranker:
|
|
|
78
78
|
embeddings = [item.metadata.embedding for item in items_with_embeddings]
|
|
79
79
|
|
|
80
80
|
if not embeddings:
|
|
81
|
-
return graph_results[:top_k]
|
|
81
|
+
return [(item, 0.5) for item in graph_results[:top_k]]
|
|
82
82
|
|
|
83
83
|
# Step 2: Compute cosine similarities
|
|
84
84
|
similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
|
|
@@ -8,6 +8,7 @@ from memos.graph_dbs.factory import Neo4jGraphDB
|
|
|
8
8
|
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
9
9
|
from memos.log import get_logger
|
|
10
10
|
from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
|
|
11
|
+
from memos.utils import timed
|
|
11
12
|
|
|
12
13
|
from .internet_retriever_factory import InternetRetrieverFactory
|
|
13
14
|
from .reasoner import MemoryReasoner
|
|
@@ -38,8 +39,9 @@ class Searcher:
|
|
|
38
39
|
# Create internet retriever from config if provided
|
|
39
40
|
self.internet_retriever = internet_retriever
|
|
40
41
|
|
|
42
|
+
@timed
|
|
41
43
|
def search(
|
|
42
|
-
self, query: str, top_k: int, info=None, mode
|
|
44
|
+
self, query: str, top_k: int, info=None, mode="fast", memory_type="All"
|
|
43
45
|
) -> list[TextualMemoryItem]:
|
|
44
46
|
"""
|
|
45
47
|
Search for memories based on a query.
|
|
@@ -57,25 +59,53 @@ class Searcher:
|
|
|
57
59
|
Returns:
|
|
58
60
|
list[TextualMemoryItem]: List of matching memories.
|
|
59
61
|
"""
|
|
62
|
+
logger.info(
|
|
63
|
+
f"[SEARCH] Start query='{query}', top_k={top_k}, mode={mode}, memory_type={memory_type}"
|
|
64
|
+
)
|
|
60
65
|
if not info:
|
|
61
66
|
logger.warning(
|
|
62
67
|
"Please input 'info' when use tree.search so that "
|
|
63
68
|
"the database would store the consume history."
|
|
64
69
|
)
|
|
65
70
|
info = {"user_id": "", "session_id": ""}
|
|
66
|
-
|
|
71
|
+
else:
|
|
72
|
+
logger.debug(f"[SEARCH] Received info dict: {info}")
|
|
73
|
+
|
|
74
|
+
parsed_goal, query_embedding, context, query = self._parse_task(query, info, mode)
|
|
75
|
+
results = self._retrieve_paths(
|
|
76
|
+
query, parsed_goal, query_embedding, info, top_k, mode, memory_type
|
|
77
|
+
)
|
|
78
|
+
deduped = self._deduplicate_results(results)
|
|
79
|
+
final_results = self._sort_and_trim(deduped, top_k)
|
|
80
|
+
self._update_usage_history(final_results, info)
|
|
81
|
+
|
|
82
|
+
logger.info(f"[SEARCH] Done. Total {len(final_results)} results.")
|
|
83
|
+
return final_results
|
|
84
|
+
|
|
85
|
+
@timed
|
|
86
|
+
def _parse_task(self, query, info, mode, top_k=5):
|
|
87
|
+
"""Parse user query, do embedding search and create context"""
|
|
67
88
|
context = []
|
|
89
|
+
query_embedding = None
|
|
90
|
+
|
|
91
|
+
# fine mode will trigger initial embedding search
|
|
68
92
|
if mode == "fine":
|
|
93
|
+
logger.info("[SEARCH] Fine mode: embedding search")
|
|
69
94
|
query_embedding = self.embedder.embed([query])[0]
|
|
70
|
-
|
|
95
|
+
|
|
96
|
+
# retrieve related nodes by embedding
|
|
71
97
|
related_nodes = [
|
|
72
|
-
self.graph_store.get_node(
|
|
98
|
+
self.graph_store.get_node(n["id"])
|
|
99
|
+
for n in self.graph_store.search_by_embedding(query_embedding, top_k=top_k)
|
|
73
100
|
]
|
|
101
|
+
context = list({node["memory"] for node in related_nodes})
|
|
74
102
|
|
|
75
|
-
|
|
76
|
-
|
|
103
|
+
# optional: supplement context with internet knowledge
|
|
104
|
+
if self.internet_retriever:
|
|
105
|
+
extra = self.internet_retriever.retrieve_from_internet(query=query, top_k=3)
|
|
106
|
+
context.extend(item.memory.partition("\nContent: ")[-1] for item in extra)
|
|
77
107
|
|
|
78
|
-
#
|
|
108
|
+
# parse goal using LLM
|
|
79
109
|
parsed_goal = self.task_goal_parser.parse(
|
|
80
110
|
task_description=query,
|
|
81
111
|
context="\n".join(context),
|
|
@@ -83,145 +113,168 @@ class Searcher:
|
|
|
83
113
|
mode=mode,
|
|
84
114
|
)
|
|
85
115
|
|
|
86
|
-
query =
|
|
87
|
-
|
|
88
|
-
if parsed_goal.rephrased_query and len(parsed_goal.rephrased_query) > 0
|
|
89
|
-
else query
|
|
90
|
-
)
|
|
91
|
-
|
|
116
|
+
query = parsed_goal.rephrased_query or query
|
|
117
|
+
# if goal has extra memories, embed them too
|
|
92
118
|
if parsed_goal.memories:
|
|
93
119
|
query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
|
|
94
120
|
|
|
95
|
-
|
|
96
|
-
def retrieve_from_working_memory():
|
|
97
|
-
"""
|
|
98
|
-
Direct structure-based retrieval from working memory.
|
|
99
|
-
"""
|
|
100
|
-
if memory_type not in ["All", "WorkingMemory"]:
|
|
101
|
-
return []
|
|
121
|
+
return parsed_goal, query_embedding, context, query
|
|
102
122
|
|
|
103
|
-
|
|
104
|
-
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
123
|
+
@timed
|
|
124
|
+
def _retrieve_paths(self, query, parsed_goal, query_embedding, info, top_k, mode, memory_type):
|
|
125
|
+
"""Run A/B/C retrieval paths in parallel"""
|
|
126
|
+
tasks = []
|
|
127
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
128
|
+
tasks.append(
|
|
129
|
+
executor.submit(
|
|
130
|
+
self._retrieve_from_working_memory,
|
|
131
|
+
query,
|
|
132
|
+
parsed_goal,
|
|
133
|
+
query_embedding,
|
|
134
|
+
top_k,
|
|
135
|
+
memory_type,
|
|
136
|
+
)
|
|
113
137
|
)
|
|
114
|
-
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
self.graph_retriever.retrieve(
|
|
123
|
-
query=query,
|
|
124
|
-
query_embedding=query_embedding,
|
|
125
|
-
parsed_goal=parsed_goal,
|
|
126
|
-
top_k=top_k * 2,
|
|
127
|
-
memory_scope="LongTermMemory",
|
|
138
|
+
tasks.append(
|
|
139
|
+
executor.submit(
|
|
140
|
+
self._retrieve_from_long_term_and_user,
|
|
141
|
+
query,
|
|
142
|
+
parsed_goal,
|
|
143
|
+
query_embedding,
|
|
144
|
+
top_k,
|
|
145
|
+
memory_type,
|
|
128
146
|
)
|
|
129
|
-
if memory_type in ["All", "LongTermMemory"]
|
|
130
|
-
else []
|
|
131
147
|
)
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
parsed_goal
|
|
137
|
-
|
|
138
|
-
|
|
148
|
+
tasks.append(
|
|
149
|
+
executor.submit(
|
|
150
|
+
self._retrieve_from_internet,
|
|
151
|
+
query,
|
|
152
|
+
parsed_goal,
|
|
153
|
+
query_embedding,
|
|
154
|
+
top_k,
|
|
155
|
+
info,
|
|
156
|
+
mode,
|
|
157
|
+
memory_type,
|
|
139
158
|
)
|
|
140
|
-
if memory_type in ["All", "UserMemory"]
|
|
141
|
-
else []
|
|
142
159
|
)
|
|
143
160
|
|
|
144
|
-
|
|
145
|
-
|
|
161
|
+
results = []
|
|
162
|
+
for t in tasks:
|
|
163
|
+
results.extend(t.result())
|
|
164
|
+
|
|
165
|
+
logger.info(f"[SEARCH] Total raw results: {len(results)}")
|
|
166
|
+
return results
|
|
167
|
+
|
|
168
|
+
# --- Path A
|
|
169
|
+
@timed
|
|
170
|
+
def _retrieve_from_working_memory(
|
|
171
|
+
self, query, parsed_goal, query_embedding, top_k, memory_type
|
|
172
|
+
):
|
|
173
|
+
"""Retrieve and rerank from WorkingMemory"""
|
|
174
|
+
if memory_type not in ["All", "WorkingMemory"]:
|
|
175
|
+
logger.info(f"[PATH-A] '{query}'Skipped (memory_type does not match)")
|
|
176
|
+
return []
|
|
177
|
+
items = self.graph_retriever.retrieve(
|
|
178
|
+
query=query, parsed_goal=parsed_goal, top_k=top_k, memory_scope="WorkingMemory"
|
|
179
|
+
)
|
|
180
|
+
return self.reranker.rerank(
|
|
181
|
+
query=query,
|
|
182
|
+
query_embedding=query_embedding[0],
|
|
183
|
+
graph_results=items,
|
|
184
|
+
top_k=top_k,
|
|
185
|
+
parsed_goal=parsed_goal,
|
|
186
|
+
)
|
|
187
|
+
|
|
188
|
+
# --- Path B
|
|
189
|
+
@timed
|
|
190
|
+
def _retrieve_from_long_term_and_user(
|
|
191
|
+
self, query, parsed_goal, query_embedding, top_k, memory_type
|
|
192
|
+
):
|
|
193
|
+
"""Retrieve and rerank from LongTermMemory and UserMemory"""
|
|
194
|
+
results = []
|
|
195
|
+
if memory_type in ["All", "LongTermMemory"]:
|
|
196
|
+
results += self.graph_retriever.retrieve(
|
|
146
197
|
query=query,
|
|
147
|
-
query_embedding=query_embedding[0],
|
|
148
|
-
graph_results=long_term_items + user_items,
|
|
149
|
-
top_k=top_k * 2,
|
|
150
198
|
parsed_goal=parsed_goal,
|
|
199
|
+
query_embedding=query_embedding,
|
|
200
|
+
top_k=top_k * 2,
|
|
201
|
+
memory_scope="LongTermMemory",
|
|
151
202
|
)
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
# Step 2c: Internet retrieval (Path C)
|
|
155
|
-
def retrieve_from_internet():
|
|
156
|
-
"""
|
|
157
|
-
Retrieve information from the internet using Google Custom Search API.
|
|
158
|
-
"""
|
|
159
|
-
if not self.internet_retriever or mode == "fast" or not parsed_goal.internet_search:
|
|
160
|
-
return []
|
|
161
|
-
if memory_type not in ["All"]:
|
|
162
|
-
return []
|
|
163
|
-
internet_items = self.internet_retriever.retrieve_from_internet(
|
|
164
|
-
query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
|
|
165
|
-
)
|
|
166
|
-
|
|
167
|
-
# Convert to the format expected by reranker
|
|
168
|
-
ranked_memories = self.reranker.rerank(
|
|
203
|
+
if memory_type in ["All", "UserMemory"]:
|
|
204
|
+
results += self.graph_retriever.retrieve(
|
|
169
205
|
query=query,
|
|
170
|
-
query_embedding=query_embedding[0],
|
|
171
|
-
graph_results=internet_items,
|
|
172
|
-
top_k=min(top_k, 5),
|
|
173
206
|
parsed_goal=parsed_goal,
|
|
207
|
+
query_embedding=query_embedding,
|
|
208
|
+
top_k=top_k * 2,
|
|
209
|
+
memory_scope="UserMemory",
|
|
174
210
|
)
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
187
|
-
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
|
|
191
|
-
|
|
192
|
-
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
205
|
-
|
|
206
|
-
|
|
207
|
-
|
|
211
|
+
return self.reranker.rerank(
|
|
212
|
+
query=query,
|
|
213
|
+
query_embedding=query_embedding[0],
|
|
214
|
+
graph_results=results,
|
|
215
|
+
top_k=top_k * 2,
|
|
216
|
+
parsed_goal=parsed_goal,
|
|
217
|
+
)
|
|
218
|
+
|
|
219
|
+
# --- Path C
|
|
220
|
+
@timed
|
|
221
|
+
def _retrieve_from_internet(
|
|
222
|
+
self, query, parsed_goal, query_embedding, top_k, info, mode, memory_type
|
|
223
|
+
):
|
|
224
|
+
"""Retrieve and rerank from Internet source"""
|
|
225
|
+
if not self.internet_retriever or mode == "fast":
|
|
226
|
+
logger.info(f"[PATH-C] '{query}' Skipped (no retriever, fast mode)")
|
|
227
|
+
return []
|
|
228
|
+
if memory_type not in ["All"]:
|
|
229
|
+
return []
|
|
230
|
+
logger.info(f"[PATH-C] '{query}' Retrieving from internet...")
|
|
231
|
+
items = self.internet_retriever.retrieve_from_internet(
|
|
232
|
+
query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
|
|
233
|
+
)
|
|
234
|
+
logger.info(f"[PATH-C] '{query}' Retrieved from internet {len(items)} items: {items}")
|
|
235
|
+
return self.reranker.rerank(
|
|
236
|
+
query=query,
|
|
237
|
+
query_embedding=query_embedding[0],
|
|
238
|
+
graph_results=items,
|
|
239
|
+
top_k=min(top_k, 5),
|
|
240
|
+
parsed_goal=parsed_goal,
|
|
241
|
+
)
|
|
242
|
+
|
|
243
|
+
@timed
|
|
244
|
+
def _deduplicate_results(self, results):
|
|
245
|
+
"""Deduplicate results by memory text"""
|
|
246
|
+
deduped = {}
|
|
247
|
+
for item, score in results:
|
|
248
|
+
if item.memory not in deduped or score > deduped[item.memory][1]:
|
|
249
|
+
deduped[item.memory] = (item, score)
|
|
250
|
+
return list(deduped.values())
|
|
251
|
+
|
|
252
|
+
@timed
|
|
253
|
+
def _sort_and_trim(self, results, top_k):
|
|
254
|
+
"""Sort results by score and trim to top_k"""
|
|
255
|
+
sorted_results = sorted(results, key=lambda pair: pair[1], reverse=True)[:top_k]
|
|
256
|
+
final_items = []
|
|
257
|
+
for item, score in sorted_results:
|
|
208
258
|
meta_data = item.metadata.model_dump()
|
|
209
259
|
if "relativity" not in meta_data:
|
|
210
260
|
meta_data["relativity"] = score
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
261
|
+
final_items.append(
|
|
262
|
+
TextualMemoryItem(
|
|
263
|
+
id=item.id,
|
|
264
|
+
memory=item.memory,
|
|
265
|
+
metadata=SearchedTreeNodeTextualMemoryMetadata(**meta_data),
|
|
266
|
+
)
|
|
214
267
|
)
|
|
268
|
+
return final_items
|
|
215
269
|
|
|
216
|
-
|
|
270
|
+
@timed
|
|
271
|
+
def _update_usage_history(self, items, info):
|
|
272
|
+
"""Update usage history in graph DB"""
|
|
217
273
|
now_time = datetime.now().isoformat()
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
usage_record = json.dumps(
|
|
221
|
-
|
|
222
|
-
) # `info` should be a serializable dict or string
|
|
223
|
-
|
|
224
|
-
for item in searched_res:
|
|
274
|
+
info.pop("chat_history", None)
|
|
275
|
+
# `info` should be a serializable dict or string
|
|
276
|
+
usage_record = json.dumps({"time": now_time, "info": info})
|
|
277
|
+
for item in items:
|
|
225
278
|
if (
|
|
226
279
|
hasattr(item, "id")
|
|
227
280
|
and hasattr(item, "metadata")
|
|
@@ -229,4 +282,3 @@ class Searcher:
|
|
|
229
282
|
):
|
|
230
283
|
item.metadata.usage.append(usage_record)
|
|
231
284
|
self.graph_store.update_node(item.id, {"usage": item.metadata.usage})
|
|
232
|
-
return searched_res
|
|
@@ -1,13 +1,16 @@
|
|
|
1
|
-
import logging
|
|
2
1
|
import traceback
|
|
3
2
|
|
|
4
3
|
from string import Template
|
|
5
4
|
|
|
6
5
|
from memos.llms.base import BaseLLM
|
|
6
|
+
from memos.log import get_logger
|
|
7
7
|
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
8
8
|
from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT
|
|
9
9
|
|
|
10
10
|
|
|
11
|
+
logger = get_logger(__name__)
|
|
12
|
+
|
|
13
|
+
|
|
11
14
|
class TaskGoalParser:
|
|
12
15
|
"""
|
|
13
16
|
Unified TaskGoalParser:
|
|
@@ -70,10 +73,12 @@ class TaskGoalParser:
|
|
|
70
73
|
prompt = Template(TASK_PARSE_PROMPT).substitute(
|
|
71
74
|
task=query.strip(), context=context, conversation=conversation_prompt
|
|
72
75
|
)
|
|
76
|
+
logger.info(f"Parsing Goal... LLM input is {prompt}")
|
|
73
77
|
response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
|
|
78
|
+
logger.info(f"Parsing Goal... LLM Response is {response}")
|
|
74
79
|
return self._parse_response(response)
|
|
75
80
|
except Exception:
|
|
76
|
-
|
|
81
|
+
logger.warning(f"Fail to fine-parse query {query}: {traceback.format_exc()}")
|
|
77
82
|
return self._parse_fast(query)
|
|
78
83
|
|
|
79
84
|
def _parse_response(self, response: str) -> ParsedTaskGoal:
|
|
@@ -5,7 +5,7 @@ You are a task parsing expert. Given a user task instruction, optional former co
|
|
|
5
5
|
2. Tags: thematic tags to help categorize and retrieve related memories.
|
|
6
6
|
3. Goal Type: retrieval | qa | generation
|
|
7
7
|
4. Rephrased instruction: Give a rephrased task instruction based on the former conversation to make it less confusing to look alone. If you think the task instruction is easy enough to understand, or there is no former conversation, set "rephrased_instruction" to an empty string.
|
|
8
|
-
5. Need for internet search: If
|
|
8
|
+
5. Need for internet search: If the user's task instruction only involves objective facts or can be completed without introducing external knowledge, set "internet_search" to False. Otherwise, set it to True.
|
|
9
9
|
6. Memories: Provide 2–5 short semantic expansions or rephrasings of the rephrased/original user task instruction. These are used for improved embedding search coverage. Each should be clear, concise, and meaningful for retrieval.
|
|
10
10
|
|
|
11
11
|
Task description:
|
|
@@ -0,0 +1,120 @@
|
|
|
1
|
+
"""
|
|
2
|
+
Lock-free dictionary implementation using copy-on-write strategy.
|
|
3
|
+
This provides better performance but uses more memory.
|
|
4
|
+
"""
|
|
5
|
+
|
|
6
|
+
import threading
|
|
7
|
+
|
|
8
|
+
from collections.abc import ItemsView, Iterator, KeysView, ValuesView
|
|
9
|
+
from typing import Generic, TypeVar
|
|
10
|
+
|
|
11
|
+
|
|
12
|
+
K = TypeVar("K")
|
|
13
|
+
V = TypeVar("V")
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class CopyOnWriteDict(Generic[K, V]):
|
|
17
|
+
"""
|
|
18
|
+
A lock-free dictionary using copy-on-write strategy.
|
|
19
|
+
|
|
20
|
+
Reads are completely lock-free and very fast.
|
|
21
|
+
Writes create a new copy of the dictionary.
|
|
22
|
+
Uses more memory but provides excellent read performance.
|
|
23
|
+
"""
|
|
24
|
+
|
|
25
|
+
def __init__(self, initial_dict: dict[K, V] | None = None):
|
|
26
|
+
"""Initialize with optional initial dictionary."""
|
|
27
|
+
self._dict = initial_dict.copy() if initial_dict else {}
|
|
28
|
+
self._write_lock = threading.Lock() # Only for writes
|
|
29
|
+
|
|
30
|
+
def __getitem__(self, key: K) -> V:
|
|
31
|
+
"""Get item by key - completely lock-free."""
|
|
32
|
+
return self._dict[key]
|
|
33
|
+
|
|
34
|
+
def __setitem__(self, key: K, value: V) -> None:
|
|
35
|
+
"""Set item by key - uses copy-on-write."""
|
|
36
|
+
with self._write_lock:
|
|
37
|
+
# Create a new dictionary with the update
|
|
38
|
+
new_dict = self._dict.copy()
|
|
39
|
+
new_dict[key] = value
|
|
40
|
+
# Atomic replacement
|
|
41
|
+
self._dict = new_dict
|
|
42
|
+
|
|
43
|
+
def __delitem__(self, key: K) -> None:
|
|
44
|
+
"""Delete item by key - uses copy-on-write."""
|
|
45
|
+
with self._write_lock:
|
|
46
|
+
new_dict = self._dict.copy()
|
|
47
|
+
del new_dict[key]
|
|
48
|
+
self._dict = new_dict
|
|
49
|
+
|
|
50
|
+
def __contains__(self, key: K) -> bool:
|
|
51
|
+
"""Check if key exists - completely lock-free."""
|
|
52
|
+
return key in self._dict
|
|
53
|
+
|
|
54
|
+
def __len__(self) -> int:
|
|
55
|
+
"""Get length - completely lock-free."""
|
|
56
|
+
return len(self._dict)
|
|
57
|
+
|
|
58
|
+
def __bool__(self) -> bool:
|
|
59
|
+
"""Check if not empty - completely lock-free."""
|
|
60
|
+
return bool(self._dict)
|
|
61
|
+
|
|
62
|
+
def __iter__(self) -> Iterator[K]:
|
|
63
|
+
"""Iterate over keys - completely lock-free."""
|
|
64
|
+
return iter(self._dict.keys())
|
|
65
|
+
|
|
66
|
+
def get(self, key: K, default: V | None = None) -> V:
|
|
67
|
+
"""Get with default - completely lock-free."""
|
|
68
|
+
return self._dict.get(key, default)
|
|
69
|
+
|
|
70
|
+
def keys(self) -> KeysView[K]:
|
|
71
|
+
"""Get keys - completely lock-free."""
|
|
72
|
+
return self._dict.keys()
|
|
73
|
+
|
|
74
|
+
def values(self) -> ValuesView[V]:
|
|
75
|
+
"""Get values - completely lock-free."""
|
|
76
|
+
return self._dict.values()
|
|
77
|
+
|
|
78
|
+
def items(self) -> ItemsView[K, V]:
|
|
79
|
+
"""Get items - completely lock-free."""
|
|
80
|
+
return self._dict.items()
|
|
81
|
+
|
|
82
|
+
def copy(self) -> dict[K, V]:
|
|
83
|
+
"""Create a copy - completely lock-free."""
|
|
84
|
+
return self._dict.copy()
|
|
85
|
+
|
|
86
|
+
def update(self, *args, **kwargs) -> None:
|
|
87
|
+
"""Update dictionary - uses copy-on-write."""
|
|
88
|
+
with self._write_lock:
|
|
89
|
+
new_dict = self._dict.copy()
|
|
90
|
+
new_dict.update(*args, **kwargs)
|
|
91
|
+
self._dict = new_dict
|
|
92
|
+
|
|
93
|
+
def clear(self) -> None:
|
|
94
|
+
"""Clear all items."""
|
|
95
|
+
with self._write_lock:
|
|
96
|
+
self._dict = {}
|
|
97
|
+
|
|
98
|
+
def pop(self, key: K, *args) -> V:
|
|
99
|
+
"""Pop item by key."""
|
|
100
|
+
with self._write_lock:
|
|
101
|
+
new_dict = self._dict.copy()
|
|
102
|
+
result = new_dict.pop(key, *args)
|
|
103
|
+
self._dict = new_dict
|
|
104
|
+
return result
|
|
105
|
+
|
|
106
|
+
def setdefault(self, key: K, default: V | None = None) -> V:
|
|
107
|
+
"""Set default value for key if not exists."""
|
|
108
|
+
# Fast path for existing keys
|
|
109
|
+
if key in self._dict:
|
|
110
|
+
return self._dict[key]
|
|
111
|
+
|
|
112
|
+
with self._write_lock:
|
|
113
|
+
# Double-check after acquiring lock
|
|
114
|
+
if key in self._dict:
|
|
115
|
+
return self._dict[key]
|
|
116
|
+
|
|
117
|
+
new_dict = self._dict.copy()
|
|
118
|
+
result = new_dict.setdefault(key, default)
|
|
119
|
+
self._dict = new_dict
|
|
120
|
+
return result
|