MemoryOS 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/METADATA +2 -1
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/RECORD +72 -55
- memos/__init__.py +1 -1
- memos/api/config.py +156 -65
- memos/api/context/context.py +147 -0
- memos/api/context/dependencies.py +90 -0
- memos/api/product_models.py +5 -1
- memos/api/routers/product_router.py +54 -26
- memos/configs/graph_db.py +49 -1
- memos/configs/internet_retriever.py +6 -0
- memos/configs/mem_os.py +5 -0
- memos/configs/mem_reader.py +9 -0
- memos/configs/mem_scheduler.py +18 -4
- memos/configs/mem_user.py +58 -0
- memos/graph_dbs/base.py +9 -1
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/nebular.py +1364 -0
- memos/graph_dbs/neo4j.py +4 -4
- memos/log.py +1 -1
- memos/mem_cube/utils.py +13 -6
- memos/mem_os/core.py +140 -30
- memos/mem_os/main.py +1 -1
- memos/mem_os/product.py +266 -152
- memos/mem_os/utils/format_utils.py +314 -67
- memos/mem_reader/simple_struct.py +13 -5
- memos/mem_scheduler/base_scheduler.py +220 -250
- memos/mem_scheduler/general_scheduler.py +193 -73
- memos/mem_scheduler/modules/base.py +5 -5
- memos/mem_scheduler/modules/dispatcher.py +6 -9
- memos/mem_scheduler/modules/misc.py +81 -16
- memos/mem_scheduler/modules/monitor.py +52 -41
- memos/mem_scheduler/modules/rabbitmq_service.py +9 -7
- memos/mem_scheduler/modules/retriever.py +108 -191
- memos/mem_scheduler/modules/scheduler_logger.py +255 -0
- memos/mem_scheduler/mos_for_test_scheduler.py +16 -19
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/general_schemas.py +43 -0
- memos/mem_scheduler/schemas/message_schemas.py +148 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/misc_utils.py +61 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +500 -0
- memos/mem_user/persistent_factory.py +96 -0
- memos/mem_user/user_manager.py +4 -4
- memos/memories/activation/item.py +4 -0
- memos/memories/textual/base.py +1 -1
- memos/memories/textual/general.py +35 -91
- memos/memories/textual/item.py +5 -33
- memos/memories/textual/tree.py +13 -7
- memos/memories/textual/tree_text_memory/organize/conflict.py +4 -2
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +47 -43
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +8 -5
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -23
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
- memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
- memos/memos_tools/dinding_report_bot.py +422 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +96 -0
- memos/settings.py +3 -1
- memos/templates/mem_reader_prompts.py +2 -1
- memos/templates/mem_scheduler_prompts.py +41 -7
- memos/templates/mos_prompts.py +87 -0
- memos/mem_scheduler/modules/schemas.py +0 -328
- memos/mem_scheduler/utils.py +0 -75
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/entry_points.txt +0 -0
|
@@ -12,6 +12,7 @@ from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM
|
|
|
12
12
|
from memos.log import get_logger
|
|
13
13
|
from memos.memories.textual.base import BaseTextMemory
|
|
14
14
|
from memos.memories.textual.item import TextualMemoryItem
|
|
15
|
+
from memos.templates.mem_reader_prompts import SIMPLE_STRUCT_MEM_READER_PROMPT
|
|
15
16
|
from memos.types import MessageList
|
|
16
17
|
from memos.vec_dbs.factory import QdrantVecDB, VecDBFactory
|
|
17
18
|
from memos.vec_dbs.item import VecDBItem
|
|
@@ -36,11 +37,7 @@ class GeneralTextMemory(BaseTextMemory):
|
|
|
36
37
|
stop=stop_after_attempt(3),
|
|
37
38
|
retry=retry_if_exception_type(json.JSONDecodeError),
|
|
38
39
|
before_sleep=lambda retry_state: logger.warning(
|
|
39
|
-
|
|
40
|
-
error=retry_state.outcome.exception(),
|
|
41
|
-
attempt_number=retry_state.attempt_number,
|
|
42
|
-
max_attempt_number=3,
|
|
43
|
-
)
|
|
40
|
+
f"Extracting memory failed due to JSON decode error: {retry_state.outcome.exception()}, Attempt retry: {retry_state.attempt_number} / {3}"
|
|
44
41
|
),
|
|
45
42
|
)
|
|
46
43
|
def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
|
|
@@ -52,14 +49,27 @@ class GeneralTextMemory(BaseTextMemory):
|
|
|
52
49
|
Returns:
|
|
53
50
|
List of TextualMemoryItem objects representing the extracted memories.
|
|
54
51
|
"""
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
messages
|
|
52
|
+
|
|
53
|
+
str_messages = "\n".join(
|
|
54
|
+
[message["role"] + ":" + message["content"] for message in messages]
|
|
58
55
|
)
|
|
59
|
-
|
|
60
|
-
|
|
56
|
+
|
|
57
|
+
prompt = SIMPLE_STRUCT_MEM_READER_PROMPT.replace("${conversation}", str_messages)
|
|
58
|
+
messages = [{"role": "user", "content": prompt}]
|
|
59
|
+
response_text = self.extractor_llm.generate(messages)
|
|
60
|
+
response_json = self.parse_json_result(response_text)
|
|
61
|
+
|
|
61
62
|
extracted_memories = [
|
|
62
|
-
TextualMemoryItem(
|
|
63
|
+
TextualMemoryItem(
|
|
64
|
+
memory=memory_dict["value"],
|
|
65
|
+
metadata={
|
|
66
|
+
"key": memory_dict["key"],
|
|
67
|
+
"source": "conversation",
|
|
68
|
+
"tags": memory_dict["tags"],
|
|
69
|
+
"updated_at": datetime.now().isoformat(),
|
|
70
|
+
},
|
|
71
|
+
)
|
|
72
|
+
for memory_dict in response_json["memory list"]
|
|
63
73
|
]
|
|
64
74
|
|
|
65
75
|
return extracted_memories
|
|
@@ -206,83 +216,17 @@ class GeneralTextMemory(BaseTextMemory):
|
|
|
206
216
|
"""Embed a single sentence."""
|
|
207
217
|
return self.embedder.embed([sentence])[0]
|
|
208
218
|
|
|
209
|
-
|
|
210
|
-
|
|
211
|
-
|
|
212
|
-
|
|
213
|
-
|
|
214
|
-
|
|
215
|
-
|
|
216
|
-
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
223
|
-
- "visibility": The accessibility scope of the memory (string), e.g., `"private"`, `"public"`, `"session"`, determining who or what contexts can access it.
|
|
224
|
-
- "updated_at": The timestamp of the last modification to the memory (string). Useful for tracking memory freshness or change history. Format: ISO 8601 or natural language.
|
|
225
|
-
* Current date and time is {datetime.now().isoformat()}.
|
|
226
|
-
* Only return the list of memories in JSON format.
|
|
227
|
-
* Do not include any explanations
|
|
228
|
-
* Do not include any extra text
|
|
229
|
-
* Do not include code blocks (```json```)
|
|
230
|
-
|
|
231
|
-
## Example
|
|
232
|
-
|
|
233
|
-
### Input
|
|
234
|
-
|
|
235
|
-
[
|
|
236
|
-
{{"role": "user", "content": "I plan to visit Paris next week."}},
|
|
237
|
-
{{"role": "assistant", "content": "Paris is a beautiful city with many attractions."}},
|
|
238
|
-
{{"role": "user", "content": "I love the Eiffel Tower."}},
|
|
239
|
-
{{"role": "assistant", "content": "The Eiffel Tower is a must-see landmark in Paris."}}
|
|
240
|
-
]
|
|
241
|
-
|
|
242
|
-
### Output
|
|
243
|
-
|
|
244
|
-
[
|
|
245
|
-
{{
|
|
246
|
-
"memory": "The user plans to visit Paris on 05-26-2025.",
|
|
247
|
-
"metadata": {{
|
|
248
|
-
"type": "event",
|
|
249
|
-
"memory_time": "2025-05-26",
|
|
250
|
-
"source": "conversation",
|
|
251
|
-
"confidence": 90.0,
|
|
252
|
-
"entities": ["Paris"],
|
|
253
|
-
"tags": ["travel", "plans"],
|
|
254
|
-
"visibility": "private",
|
|
255
|
-
"updated_at": "2025-05-19T00:00:00"
|
|
256
|
-
}}
|
|
257
|
-
}},
|
|
258
|
-
{{
|
|
259
|
-
"memory": "The user loves the Eiffel Tower.",
|
|
260
|
-
"metadata": {{
|
|
261
|
-
"type": "opinion",
|
|
262
|
-
"memory_time": "2025-05-19",
|
|
263
|
-
"source": "conversation",
|
|
264
|
-
"confidence": 100.0,
|
|
265
|
-
"entities": ["Eiffel Tower"],
|
|
266
|
-
"tags": ["opinions", "landmarks"],
|
|
267
|
-
"visibility": "session",
|
|
268
|
-
"updated_at": "2025-05-19T00:00:00"
|
|
269
|
-
}}
|
|
270
|
-
}}
|
|
271
|
-
]
|
|
272
|
-
|
|
273
|
-
"""
|
|
274
|
-
|
|
275
|
-
EXTRACTION_PROMPT_PART_2 = """
|
|
276
|
-
## Query
|
|
277
|
-
|
|
278
|
-
### Input
|
|
279
|
-
|
|
280
|
-
{messages}
|
|
281
|
-
|
|
282
|
-
### Output
|
|
283
|
-
|
|
284
|
-
"""
|
|
285
|
-
|
|
286
|
-
EXTRACTION_RETRY_LOG = """Extracting memory failed due to JSON decode error: {error},
|
|
287
|
-
Attempt retry: {attempt_number} / {max_attempt_number}
|
|
288
|
-
"""
|
|
219
|
+
def parse_json_result(self, response_text):
|
|
220
|
+
try:
|
|
221
|
+
json_start = response_text.find("{")
|
|
222
|
+
response_text = response_text[json_start:]
|
|
223
|
+
response_text = response_text.replace("```", "").strip()
|
|
224
|
+
if response_text[-1] != "}":
|
|
225
|
+
response_text += "}"
|
|
226
|
+
response_json = json.loads(response_text)
|
|
227
|
+
return response_json
|
|
228
|
+
except json.JSONDecodeError as e:
|
|
229
|
+
logger.warning(
|
|
230
|
+
f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
|
|
231
|
+
)
|
|
232
|
+
return {}
|
memos/memories/textual/item.py
CHANGED
|
@@ -27,23 +27,14 @@ class TextualMemoryMetadata(BaseModel):
|
|
|
27
27
|
default="activated",
|
|
28
28
|
description="The status of the memory, e.g., 'activated', 'archived', 'deleted'.",
|
|
29
29
|
)
|
|
30
|
-
type:
|
|
31
|
-
|
|
32
|
-
)
|
|
33
|
-
memory_time: str | None = Field(
|
|
34
|
-
default=None,
|
|
35
|
-
description='The time the memory occurred or refers to. Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.',
|
|
36
|
-
)
|
|
37
|
-
source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
|
|
38
|
-
default=None, description="The origin of the memory"
|
|
39
|
-
)
|
|
30
|
+
type: str | None = Field(default=None)
|
|
31
|
+
key: str | None = Field(default=None, description="Memory key or title.")
|
|
40
32
|
confidence: float | None = Field(
|
|
41
33
|
default=None,
|
|
42
34
|
description="A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.",
|
|
43
35
|
)
|
|
44
|
-
|
|
45
|
-
default=None,
|
|
46
|
-
description='A list of key entities mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.',
|
|
36
|
+
source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
|
|
37
|
+
default=None, description="The origin of the memory"
|
|
47
38
|
)
|
|
48
39
|
tags: list[str] | None = Field(
|
|
49
40
|
default=None,
|
|
@@ -59,23 +50,6 @@ class TextualMemoryMetadata(BaseModel):
|
|
|
59
50
|
|
|
60
51
|
model_config = ConfigDict(extra="allow")
|
|
61
52
|
|
|
62
|
-
@field_validator("memory_time")
|
|
63
|
-
@classmethod
|
|
64
|
-
def validate_memory_time(cls, v):
|
|
65
|
-
try:
|
|
66
|
-
if v:
|
|
67
|
-
datetime.strptime(v, "%Y-%m-%d")
|
|
68
|
-
except ValueError as e:
|
|
69
|
-
raise ValueError("Invalid date format. Use YYYY-MM-DD.") from e
|
|
70
|
-
return v
|
|
71
|
-
|
|
72
|
-
@field_validator("confidence")
|
|
73
|
-
@classmethod
|
|
74
|
-
def validate_confidence(cls, v):
|
|
75
|
-
if v is not None and (v < 0 or v > 100):
|
|
76
|
-
raise ValueError("Confidence must be between 0 and 100.")
|
|
77
|
-
return v
|
|
78
|
-
|
|
79
53
|
def __str__(self) -> str:
|
|
80
54
|
"""Pretty string representation of the metadata."""
|
|
81
55
|
meta = self.model_dump(exclude_none=True)
|
|
@@ -85,10 +59,9 @@ class TextualMemoryMetadata(BaseModel):
|
|
|
85
59
|
class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata):
|
|
86
60
|
"""Extended metadata for structured memory, layered retrieval, and lifecycle tracking."""
|
|
87
61
|
|
|
88
|
-
memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory"] = Field(
|
|
62
|
+
memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"] = Field(
|
|
89
63
|
default="WorkingMemory", description="Memory lifecycle type."
|
|
90
64
|
)
|
|
91
|
-
key: str | None = Field(default=None, description="Memory key or title.")
|
|
92
65
|
sources: list[str] | None = Field(
|
|
93
66
|
default=None, description="Multiple origins of the memory (e.g., URLs, notes)."
|
|
94
67
|
)
|
|
@@ -148,7 +121,6 @@ class TextualMemoryItem(BaseModel):
|
|
|
148
121
|
|
|
149
122
|
model_config = ConfigDict(extra="forbid")
|
|
150
123
|
|
|
151
|
-
@field_validator("id")
|
|
152
124
|
@classmethod
|
|
153
125
|
def validate_id(cls, v):
|
|
154
126
|
try:
|
memos/memories/textual/tree.py
CHANGED
|
@@ -117,13 +117,19 @@ class TreeTextMemory(BaseTextMemory):
|
|
|
117
117
|
logger.warning(
|
|
118
118
|
"Internet retriever is init by config , but this search set manual_close_internet is True and will close it"
|
|
119
119
|
)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
|
|
120
|
+
searcher = Searcher(
|
|
121
|
+
self.dispatcher_llm,
|
|
122
|
+
self.graph_store,
|
|
123
|
+
self.embedder,
|
|
124
|
+
internet_retriever=None,
|
|
125
|
+
)
|
|
126
|
+
else:
|
|
127
|
+
searcher = Searcher(
|
|
128
|
+
self.dispatcher_llm,
|
|
129
|
+
self.graph_store,
|
|
130
|
+
self.embedder,
|
|
131
|
+
internet_retriever=self.internet_retriever,
|
|
132
|
+
)
|
|
127
133
|
return searcher.search(query, top_k, info, mode, memory_type)
|
|
128
134
|
|
|
129
135
|
def get_relevant_subgraph(
|
|
@@ -3,6 +3,8 @@ import re
|
|
|
3
3
|
|
|
4
4
|
from datetime import datetime
|
|
5
5
|
|
|
6
|
+
from dateutil import parser
|
|
7
|
+
|
|
6
8
|
from memos.embedders.base import BaseEmbedder
|
|
7
9
|
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
8
10
|
from memos.llms.base import BaseLLM
|
|
@@ -133,8 +135,8 @@ class ConflictHandler:
|
|
|
133
135
|
"""
|
|
134
136
|
Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
|
|
135
137
|
"""
|
|
136
|
-
time_a =
|
|
137
|
-
time_b =
|
|
138
|
+
time_a = parser.isoparse(memory_a.metadata.updated_at)
|
|
139
|
+
time_b = parser.isoparse(memory_b.metadata.updated_at)
|
|
138
140
|
|
|
139
141
|
newer_mem = memory_a if time_a >= time_b else memory_b
|
|
140
142
|
older_mem = memory_b if time_a >= time_b else memory_a
|
|
@@ -1,4 +1,5 @@
|
|
|
1
1
|
import json
|
|
2
|
+
import traceback
|
|
2
3
|
|
|
3
4
|
from memos.embedders.factory import OllamaEmbedder
|
|
4
5
|
from memos.graph_dbs.item import GraphDBNode
|
|
@@ -30,53 +31,57 @@ class RelationAndReasoningDetector:
|
|
|
30
31
|
3) Sequence links
|
|
31
32
|
4) Aggregate concepts
|
|
32
33
|
"""
|
|
33
|
-
if node.metadata.type == "reasoning":
|
|
34
|
-
logger.info(f"Skip reasoning for inferred node {node.id}")
|
|
35
|
-
return {
|
|
36
|
-
"relations": [],
|
|
37
|
-
"inferred_nodes": [],
|
|
38
|
-
"sequence_links": [],
|
|
39
|
-
"aggregate_nodes": [],
|
|
40
|
-
}
|
|
41
|
-
|
|
42
34
|
results = {
|
|
43
35
|
"relations": [],
|
|
44
36
|
"inferred_nodes": [],
|
|
45
37
|
"sequence_links": [],
|
|
46
38
|
"aggregate_nodes": [],
|
|
47
39
|
}
|
|
40
|
+
try:
|
|
41
|
+
if node.metadata.type == "reasoning":
|
|
42
|
+
logger.info(f"Skip reasoning for inferred node {node.id}")
|
|
43
|
+
return {
|
|
44
|
+
"relations": [],
|
|
45
|
+
"inferred_nodes": [],
|
|
46
|
+
"sequence_links": [],
|
|
47
|
+
"aggregate_nodes": [],
|
|
48
|
+
}
|
|
49
|
+
|
|
50
|
+
nearest = self.graph_store.get_neighbors_by_tag(
|
|
51
|
+
tags=node.metadata.tags,
|
|
52
|
+
exclude_ids=exclude_ids,
|
|
53
|
+
top_k=top_k,
|
|
54
|
+
min_overlap=2,
|
|
55
|
+
)
|
|
56
|
+
nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
|
|
57
|
+
|
|
58
|
+
"""
|
|
59
|
+
# 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
|
|
60
|
+
pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
|
|
61
|
+
results["relations"].extend(pairwise["relations"])
|
|
62
|
+
"""
|
|
63
|
+
|
|
64
|
+
"""
|
|
65
|
+
# 2) Inferred nodes (from causal/condition)
|
|
66
|
+
inferred = self._infer_fact_nodes_from_relations(pairwise)
|
|
67
|
+
results["inferred_nodes"].extend(inferred)
|
|
68
|
+
"""
|
|
69
|
+
|
|
70
|
+
"""
|
|
71
|
+
3) Sequence (optional, if you have timestamps)
|
|
72
|
+
seq = self._detect_sequence_links(node, nearest)
|
|
73
|
+
results["sequence_links"].extend(seq)
|
|
74
|
+
"""
|
|
75
|
+
|
|
76
|
+
# 4) Aggregate
|
|
77
|
+
agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=5)
|
|
78
|
+
if agg:
|
|
79
|
+
results["aggregate_nodes"].append(agg)
|
|
48
80
|
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
53
|
-
min_overlap=2,
|
|
54
|
-
)
|
|
55
|
-
nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
|
|
56
|
-
|
|
57
|
-
"""
|
|
58
|
-
# 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
|
|
59
|
-
pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
|
|
60
|
-
results["relations"].extend(pairwise["relations"])
|
|
61
|
-
"""
|
|
62
|
-
|
|
63
|
-
"""
|
|
64
|
-
# 2) Inferred nodes (from causal/condition)
|
|
65
|
-
inferred = self._infer_fact_nodes_from_relations(pairwise)
|
|
66
|
-
results["inferred_nodes"].extend(inferred)
|
|
67
|
-
"""
|
|
68
|
-
|
|
69
|
-
"""
|
|
70
|
-
3) Sequence (optional, if you have timestamps)
|
|
71
|
-
seq = self._detect_sequence_links(node, nearest)
|
|
72
|
-
results["sequence_links"].extend(seq)
|
|
73
|
-
"""
|
|
74
|
-
|
|
75
|
-
# 4) Aggregate
|
|
76
|
-
agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=5)
|
|
77
|
-
if agg:
|
|
78
|
-
results["aggregate_nodes"].append(agg)
|
|
79
|
-
|
|
81
|
+
except Exception as e:
|
|
82
|
+
logger.error(
|
|
83
|
+
f"Error {e} while process struct reorganize: trace: {traceback.format_exc()}"
|
|
84
|
+
)
|
|
80
85
|
return results
|
|
81
86
|
|
|
82
87
|
def _detect_pairwise_causal_condition_relations(
|
|
@@ -176,10 +181,9 @@ class RelationAndReasoningDetector:
|
|
|
176
181
|
joined = "\n".join(f"- {n.memory}" for n in combined_nodes)
|
|
177
182
|
prompt = AGGREGATE_PROMPT.replace("{joined}", joined)
|
|
178
183
|
response_text = self._call_llm(prompt)
|
|
179
|
-
|
|
180
|
-
if not
|
|
184
|
+
summary = self._parse_json_result(response_text)
|
|
185
|
+
if not summary:
|
|
181
186
|
return None
|
|
182
|
-
summary = json.loads(response_text)
|
|
183
187
|
embedding = self.embedder.embed([summary["value"]])[0]
|
|
184
188
|
|
|
185
189
|
parent_node = GraphDBNode(
|
|
@@ -125,8 +125,8 @@ class GraphStructureReorganizer:
|
|
|
125
125
|
"""
|
|
126
126
|
import schedule
|
|
127
127
|
|
|
128
|
-
schedule.every(
|
|
129
|
-
schedule.every(
|
|
128
|
+
schedule.every(600).seconds.do(self.optimize_structure, scope="LongTermMemory")
|
|
129
|
+
schedule.every(600).seconds.do(self.optimize_structure, scope="UserMemory")
|
|
130
130
|
|
|
131
131
|
logger.info("Structure optimizer schedule started.")
|
|
132
132
|
while not getattr(self, "_stop_scheduler", False):
|
|
@@ -198,7 +198,7 @@ class GraphStructureReorganizer:
|
|
|
198
198
|
logger.info(f"Already optimizing for {scope}. Skipping.")
|
|
199
199
|
return
|
|
200
200
|
|
|
201
|
-
if self.graph_store.
|
|
201
|
+
if self.graph_store.node_not_exist(scope):
|
|
202
202
|
logger.debug(f"[GraphStructureReorganize] No nodes for scope={scope}. Skip.")
|
|
203
203
|
return
|
|
204
204
|
|
|
@@ -251,7 +251,10 @@ class GraphStructureReorganizer:
|
|
|
251
251
|
try:
|
|
252
252
|
f.result()
|
|
253
253
|
except Exception as e:
|
|
254
|
-
logger.warning(
|
|
254
|
+
logger.warning(
|
|
255
|
+
f"[Reorganize] Cluster processing "
|
|
256
|
+
f"failed: {e}, trace: {traceback.format_exc()}"
|
|
257
|
+
)
|
|
255
258
|
logger.info("[GraphStructure Reorganize] Structure optimization finished.")
|
|
256
259
|
|
|
257
260
|
finally:
|
|
@@ -343,7 +346,7 @@ class GraphStructureReorganizer:
|
|
|
343
346
|
agg_node.metadata.model_dump(exclude_none=True),
|
|
344
347
|
)
|
|
345
348
|
for child_id in agg_node.metadata.sources:
|
|
346
|
-
self.graph_store.add_edge(agg_node.id, child_id, "
|
|
349
|
+
self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATE_TO")
|
|
347
350
|
|
|
348
351
|
logger.info("[Reorganizer] Cluster relation/reasoning done.")
|
|
349
352
|
|
|
@@ -127,7 +127,7 @@ class InternetGoogleRetriever:
|
|
|
127
127
|
self.embedder = embedder
|
|
128
128
|
|
|
129
129
|
def retrieve_from_internet(
|
|
130
|
-
self, query: str, top_k: int = 10, parsed_goal=None
|
|
130
|
+
self, query: str, top_k: int = 10, parsed_goal=None, info=None
|
|
131
131
|
) -> list[TextualMemoryItem]:
|
|
132
132
|
"""
|
|
133
133
|
Retrieve information from the internet and convert to TextualMemoryItem format
|
|
@@ -136,10 +136,13 @@ class InternetGoogleRetriever:
|
|
|
136
136
|
query: Search query
|
|
137
137
|
top_k: Number of results to return
|
|
138
138
|
parsed_goal: Parsed task goal (optional)
|
|
139
|
+
info (dict): Leave a record of memory consumption.
|
|
139
140
|
|
|
140
141
|
Returns:
|
|
141
142
|
List of TextualMemoryItem
|
|
142
143
|
"""
|
|
144
|
+
if not info:
|
|
145
|
+
info = {"user_id": "", "session_id": ""}
|
|
143
146
|
# Get search results
|
|
144
147
|
search_results = self.google_api.get_all_results(query, max_results=top_k)
|
|
145
148
|
|
|
@@ -157,8 +160,8 @@ class InternetGoogleRetriever:
|
|
|
157
160
|
memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
|
|
158
161
|
# Create metadata
|
|
159
162
|
metadata = TreeNodeTextualMemoryMetadata(
|
|
160
|
-
user_id=
|
|
161
|
-
session_id=
|
|
163
|
+
user_id=info.get("user_id", ""),
|
|
164
|
+
session_id=info.get("session_id", ""),
|
|
162
165
|
status="activated",
|
|
163
166
|
type="fact", # Internet search results are usually factual information
|
|
164
167
|
memory_time=datetime.now().strftime("%Y-%m-%d"),
|
|
@@ -4,6 +4,7 @@ from typing import Any, ClassVar
|
|
|
4
4
|
|
|
5
5
|
from memos.configs.internet_retriever import InternetRetrieverConfigFactory
|
|
6
6
|
from memos.embedders.base import BaseEmbedder
|
|
7
|
+
from memos.mem_reader.factory import MemReaderFactory
|
|
7
8
|
from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
|
|
8
9
|
InternetGoogleRetriever,
|
|
9
10
|
)
|
|
@@ -66,6 +67,7 @@ class InternetRetrieverFactory:
|
|
|
66
67
|
access_key=config.api_key, # Use api_key as access_key for xinyu
|
|
67
68
|
search_engine_id=config.search_engine_id,
|
|
68
69
|
embedder=embedder,
|
|
70
|
+
reader=MemReaderFactory.from_config(config.reader),
|
|
69
71
|
max_results=config.max_results,
|
|
70
72
|
)
|
|
71
73
|
else:
|
|
@@ -10,4 +10,6 @@ class ParsedTaskGoal:
|
|
|
10
10
|
memories: list[str] = field(default_factory=list)
|
|
11
11
|
keys: list[str] = field(default_factory=list)
|
|
12
12
|
tags: list[str] = field(default_factory=list)
|
|
13
|
+
rephrased_query: str | None = None
|
|
14
|
+
internet_search: bool = False
|
|
13
15
|
goal_type: str | None = None # e.g., 'default', 'explanation', etc.
|
|
@@ -6,6 +6,7 @@ from datetime import datetime
|
|
|
6
6
|
from memos.embedders.factory import OllamaEmbedder
|
|
7
7
|
from memos.graph_dbs.factory import Neo4jGraphDB
|
|
8
8
|
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
9
|
+
from memos.log import get_logger
|
|
9
10
|
from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
|
|
10
11
|
|
|
11
12
|
from .internet_retriever_factory import InternetRetrieverFactory
|
|
@@ -15,6 +16,9 @@ from .reranker import MemoryReranker
|
|
|
15
16
|
from .task_goal_parser import TaskGoalParser
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
18
22
|
class Searcher:
|
|
19
23
|
def __init__(
|
|
20
24
|
self,
|
|
@@ -53,7 +57,12 @@ class Searcher:
|
|
|
53
57
|
Returns:
|
|
54
58
|
list[TextualMemoryItem]: List of matching memories.
|
|
55
59
|
"""
|
|
56
|
-
|
|
60
|
+
if not info:
|
|
61
|
+
logger.warning(
|
|
62
|
+
"Please input 'info' when use tree.search so that "
|
|
63
|
+
"the database would store the consume history."
|
|
64
|
+
)
|
|
65
|
+
info = {"user_id": "", "session_id": ""}
|
|
57
66
|
# Step 1: Parse task structure into topic, concept, and fact levels
|
|
58
67
|
context = []
|
|
59
68
|
if mode == "fine":
|
|
@@ -67,7 +76,18 @@ class Searcher:
|
|
|
67
76
|
context = list(set(context))
|
|
68
77
|
|
|
69
78
|
# Step 1a: Parse task structure into topic, concept, and fact levels
|
|
70
|
-
parsed_goal = self.task_goal_parser.parse(
|
|
79
|
+
parsed_goal = self.task_goal_parser.parse(
|
|
80
|
+
task_description=query,
|
|
81
|
+
context="\n".join(context),
|
|
82
|
+
conversation=info.get("chat_history", []),
|
|
83
|
+
mode=mode,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
query = (
|
|
87
|
+
parsed_goal.rephrased_query
|
|
88
|
+
if parsed_goal.rephrased_query and len(parsed_goal.rephrased_query) > 0
|
|
89
|
+
else query
|
|
90
|
+
)
|
|
71
91
|
|
|
72
92
|
if parsed_goal.memories:
|
|
73
93
|
query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
|
|
@@ -136,12 +156,12 @@ class Searcher:
|
|
|
136
156
|
"""
|
|
137
157
|
Retrieve information from the internet using Google Custom Search API.
|
|
138
158
|
"""
|
|
139
|
-
if not self.internet_retriever:
|
|
159
|
+
if not self.internet_retriever or mode == "fast" or not parsed_goal.internet_search:
|
|
140
160
|
return []
|
|
141
161
|
if memory_type not in ["All"]:
|
|
142
162
|
return []
|
|
143
163
|
internet_items = self.internet_retriever.retrieve_from_internet(
|
|
144
|
-
query=query, top_k=top_k, parsed_goal=parsed_goal
|
|
164
|
+
query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
|
|
145
165
|
)
|
|
146
166
|
|
|
147
167
|
# Convert to the format expected by reranker
|
|
@@ -149,21 +169,30 @@ class Searcher:
|
|
|
149
169
|
query=query,
|
|
150
170
|
query_embedding=query_embedding[0],
|
|
151
171
|
graph_results=internet_items,
|
|
152
|
-
top_k=top_k
|
|
172
|
+
top_k=min(top_k, 5),
|
|
153
173
|
parsed_goal=parsed_goal,
|
|
154
174
|
)
|
|
155
175
|
return ranked_memories
|
|
156
176
|
|
|
157
|
-
# Step 3: Parallel execution of all paths
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
177
|
+
# Step 3: Parallel execution of all paths (enable internet search accoeding to parameter in the parsed goal)
|
|
178
|
+
if parsed_goal.internet_search:
|
|
179
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
180
|
+
future_working = executor.submit(retrieve_from_working_memory)
|
|
181
|
+
future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
|
|
182
|
+
future_internet = executor.submit(retrieve_from_internet)
|
|
183
|
+
|
|
184
|
+
working_results = future_working.result()
|
|
185
|
+
hybrid_results = future_hybrid.result()
|
|
186
|
+
internet_results = future_internet.result()
|
|
187
|
+
searched_res = working_results + hybrid_results + internet_results
|
|
188
|
+
else:
|
|
189
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
|
190
|
+
future_working = executor.submit(retrieve_from_working_memory)
|
|
191
|
+
future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
|
|
192
|
+
|
|
193
|
+
working_results = future_working.result()
|
|
194
|
+
hybrid_results = future_hybrid.result()
|
|
195
|
+
searched_res = working_results + hybrid_results
|
|
167
196
|
|
|
168
197
|
# Deduplicate by item.memory, keep higher score
|
|
169
198
|
deduped_result = {}
|
|
@@ -184,16 +213,10 @@ class Searcher:
|
|
|
184
213
|
TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
|
|
185
214
|
)
|
|
186
215
|
|
|
187
|
-
# Step 4: Reasoning over all retrieved and ranked memory
|
|
188
|
-
if mode == "fine":
|
|
189
|
-
searched_res = self.reasoner.reason(
|
|
190
|
-
query=query,
|
|
191
|
-
ranked_memories=searched_res,
|
|
192
|
-
parsed_goal=parsed_goal,
|
|
193
|
-
)
|
|
194
|
-
|
|
195
216
|
# Step 5: Update usage history with current timestamp
|
|
196
217
|
now_time = datetime.now().isoformat()
|
|
218
|
+
if "chat_history" in info:
|
|
219
|
+
info.pop("chat_history")
|
|
197
220
|
usage_record = json.dumps(
|
|
198
221
|
{"time": now_time, "info": info}
|
|
199
222
|
) # `info` should be a serializable dict or string
|