MemoryOS 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/METADATA +67 -26
- memoryos-0.2.2.dist-info/RECORD +169 -0
- memoryos-0.2.2.dist-info/entry_points.txt +3 -0
- memos/__init__.py +1 -1
- memos/api/config.py +562 -0
- memos/api/context/context.py +147 -0
- memos/api/context/dependencies.py +90 -0
- memos/api/exceptions.py +28 -0
- memos/api/mcp_serve.py +502 -0
- memos/api/product_api.py +35 -0
- memos/api/product_models.py +163 -0
- memos/api/routers/__init__.py +1 -0
- memos/api/routers/product_router.py +386 -0
- memos/chunkers/sentence_chunker.py +8 -2
- memos/cli.py +113 -0
- memos/configs/embedder.py +27 -0
- memos/configs/graph_db.py +132 -3
- memos/configs/internet_retriever.py +6 -0
- memos/configs/llm.py +47 -0
- memos/configs/mem_cube.py +1 -1
- memos/configs/mem_os.py +5 -0
- memos/configs/mem_reader.py +9 -0
- memos/configs/mem_scheduler.py +107 -7
- memos/configs/mem_user.py +58 -0
- memos/configs/memory.py +5 -4
- memos/dependency.py +52 -0
- memos/embedders/ark.py +92 -0
- memos/embedders/factory.py +4 -0
- memos/embedders/sentence_transformer.py +8 -2
- memos/embedders/universal_api.py +32 -0
- memos/graph_dbs/base.py +11 -3
- memos/graph_dbs/factory.py +4 -0
- memos/graph_dbs/nebular.py +1364 -0
- memos/graph_dbs/neo4j.py +333 -124
- memos/graph_dbs/neo4j_community.py +300 -0
- memos/llms/base.py +9 -0
- memos/llms/deepseek.py +54 -0
- memos/llms/factory.py +10 -1
- memos/llms/hf.py +170 -13
- memos/llms/hf_singleton.py +114 -0
- memos/llms/ollama.py +4 -0
- memos/llms/openai.py +67 -1
- memos/llms/qwen.py +63 -0
- memos/llms/vllm.py +153 -0
- memos/log.py +1 -1
- memos/mem_cube/general.py +77 -16
- memos/mem_cube/utils.py +109 -0
- memos/mem_os/core.py +251 -51
- memos/mem_os/main.py +94 -12
- memos/mem_os/product.py +1220 -43
- memos/mem_os/utils/default_config.py +352 -0
- memos/mem_os/utils/format_utils.py +1401 -0
- memos/mem_reader/simple_struct.py +18 -10
- memos/mem_scheduler/base_scheduler.py +441 -40
- memos/mem_scheduler/general_scheduler.py +249 -248
- memos/mem_scheduler/modules/base.py +14 -5
- memos/mem_scheduler/modules/dispatcher.py +67 -4
- memos/mem_scheduler/modules/misc.py +104 -0
- memos/mem_scheduler/modules/monitor.py +240 -50
- memos/mem_scheduler/modules/rabbitmq_service.py +319 -0
- memos/mem_scheduler/modules/redis_service.py +32 -22
- memos/mem_scheduler/modules/retriever.py +167 -23
- memos/mem_scheduler/modules/scheduler_logger.py +255 -0
- memos/mem_scheduler/mos_for_test_scheduler.py +140 -0
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/general_schemas.py +43 -0
- memos/mem_scheduler/{modules/schemas.py → schemas/message_schemas.py} +63 -61
- memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/misc_utils.py +61 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +500 -0
- memos/mem_user/persistent_factory.py +96 -0
- memos/mem_user/persistent_user_manager.py +260 -0
- memos/mem_user/user_manager.py +4 -4
- memos/memories/activation/item.py +29 -0
- memos/memories/activation/kv.py +10 -3
- memos/memories/activation/vllmkv.py +219 -0
- memos/memories/factory.py +2 -0
- memos/memories/textual/base.py +1 -1
- memos/memories/textual/general.py +43 -97
- memos/memories/textual/item.py +5 -33
- memos/memories/textual/tree.py +22 -12
- memos/memories/textual/tree_text_memory/organize/conflict.py +9 -5
- memos/memories/textual/tree_text_memory/organize/manager.py +26 -18
- memos/memories/textual/tree_text_memory/organize/redundancy.py +25 -44
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +50 -48
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +81 -56
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +52 -28
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
- memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
- memos/memos_tools/dinding_report_bot.py +422 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +96 -0
- memos/parsers/markitdown.py +8 -2
- memos/settings.py +3 -1
- memos/templates/mem_reader_prompts.py +66 -23
- memos/templates/mem_scheduler_prompts.py +126 -43
- memos/templates/mos_prompts.py +87 -0
- memos/templates/tree_reorganize_prompts.py +85 -30
- memos/vec_dbs/base.py +12 -0
- memos/vec_dbs/qdrant.py +46 -20
- memoryos-0.2.0.dist-info/RECORD +0 -128
- memos/mem_scheduler/utils.py +0 -26
- {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
- {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
|
@@ -5,7 +5,8 @@ from datetime import datetime
|
|
|
5
5
|
|
|
6
6
|
from memos.embedders.factory import OllamaEmbedder
|
|
7
7
|
from memos.graph_dbs.factory import Neo4jGraphDB
|
|
8
|
-
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
8
|
+
from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
|
|
9
|
+
from memos.log import get_logger
|
|
9
10
|
from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
|
|
10
11
|
|
|
11
12
|
from .internet_retriever_factory import InternetRetrieverFactory
|
|
@@ -15,10 +16,13 @@ from .reranker import MemoryReranker
|
|
|
15
16
|
from .task_goal_parser import TaskGoalParser
|
|
16
17
|
|
|
17
18
|
|
|
19
|
+
logger = get_logger(__name__)
|
|
20
|
+
|
|
21
|
+
|
|
18
22
|
class Searcher:
|
|
19
23
|
def __init__(
|
|
20
24
|
self,
|
|
21
|
-
dispatcher_llm: OpenAILLM | OllamaLLM,
|
|
25
|
+
dispatcher_llm: OpenAILLM | OllamaLLM | AzureLLM,
|
|
22
26
|
graph_store: Neo4jGraphDB,
|
|
23
27
|
embedder: OllamaEmbedder,
|
|
24
28
|
internet_retriever: InternetRetrieverFactory | None = None,
|
|
@@ -53,7 +57,12 @@ class Searcher:
|
|
|
53
57
|
Returns:
|
|
54
58
|
list[TextualMemoryItem]: List of matching memories.
|
|
55
59
|
"""
|
|
56
|
-
|
|
60
|
+
if not info:
|
|
61
|
+
logger.warning(
|
|
62
|
+
"Please input 'info' when use tree.search so that "
|
|
63
|
+
"the database would store the consume history."
|
|
64
|
+
)
|
|
65
|
+
info = {"user_id": "", "session_id": ""}
|
|
57
66
|
# Step 1: Parse task structure into topic, concept, and fact levels
|
|
58
67
|
context = []
|
|
59
68
|
if mode == "fine":
|
|
@@ -67,7 +76,18 @@ class Searcher:
|
|
|
67
76
|
context = list(set(context))
|
|
68
77
|
|
|
69
78
|
# Step 1a: Parse task structure into topic, concept, and fact levels
|
|
70
|
-
parsed_goal = self.task_goal_parser.parse(
|
|
79
|
+
parsed_goal = self.task_goal_parser.parse(
|
|
80
|
+
task_description=query,
|
|
81
|
+
context="\n".join(context),
|
|
82
|
+
conversation=info.get("chat_history", []),
|
|
83
|
+
mode=mode,
|
|
84
|
+
)
|
|
85
|
+
|
|
86
|
+
query = (
|
|
87
|
+
parsed_goal.rephrased_query
|
|
88
|
+
if parsed_goal.rephrased_query and len(parsed_goal.rephrased_query) > 0
|
|
89
|
+
else query
|
|
90
|
+
)
|
|
71
91
|
|
|
72
92
|
if parsed_goal.memories:
|
|
73
93
|
query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
|
|
@@ -136,12 +156,12 @@ class Searcher:
|
|
|
136
156
|
"""
|
|
137
157
|
Retrieve information from the internet using Google Custom Search API.
|
|
138
158
|
"""
|
|
139
|
-
if not self.internet_retriever:
|
|
159
|
+
if not self.internet_retriever or mode == "fast" or not parsed_goal.internet_search:
|
|
140
160
|
return []
|
|
141
161
|
if memory_type not in ["All"]:
|
|
142
162
|
return []
|
|
143
163
|
internet_items = self.internet_retriever.retrieve_from_internet(
|
|
144
|
-
query=query, top_k=top_k, parsed_goal=parsed_goal
|
|
164
|
+
query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
|
|
145
165
|
)
|
|
146
166
|
|
|
147
167
|
# Convert to the format expected by reranker
|
|
@@ -149,21 +169,30 @@ class Searcher:
|
|
|
149
169
|
query=query,
|
|
150
170
|
query_embedding=query_embedding[0],
|
|
151
171
|
graph_results=internet_items,
|
|
152
|
-
top_k=top_k
|
|
172
|
+
top_k=min(top_k, 5),
|
|
153
173
|
parsed_goal=parsed_goal,
|
|
154
174
|
)
|
|
155
175
|
return ranked_memories
|
|
156
176
|
|
|
157
|
-
# Step 3: Parallel execution of all paths
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
|
|
165
|
-
|
|
166
|
-
|
|
177
|
+
# Step 3: Parallel execution of all paths (enable internet search accoeding to parameter in the parsed goal)
|
|
178
|
+
if parsed_goal.internet_search:
|
|
179
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
180
|
+
future_working = executor.submit(retrieve_from_working_memory)
|
|
181
|
+
future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
|
|
182
|
+
future_internet = executor.submit(retrieve_from_internet)
|
|
183
|
+
|
|
184
|
+
working_results = future_working.result()
|
|
185
|
+
hybrid_results = future_hybrid.result()
|
|
186
|
+
internet_results = future_internet.result()
|
|
187
|
+
searched_res = working_results + hybrid_results + internet_results
|
|
188
|
+
else:
|
|
189
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
|
|
190
|
+
future_working = executor.submit(retrieve_from_working_memory)
|
|
191
|
+
future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
|
|
192
|
+
|
|
193
|
+
working_results = future_working.result()
|
|
194
|
+
hybrid_results = future_hybrid.result()
|
|
195
|
+
searched_res = working_results + hybrid_results
|
|
167
196
|
|
|
168
197
|
# Deduplicate by item.memory, keep higher score
|
|
169
198
|
deduped_result = {}
|
|
@@ -176,23 +205,18 @@ class Searcher:
|
|
|
176
205
|
for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
|
|
177
206
|
:top_k
|
|
178
207
|
]:
|
|
179
|
-
|
|
180
|
-
|
|
181
|
-
|
|
208
|
+
meta_data = item.metadata.model_dump()
|
|
209
|
+
if "relativity" not in meta_data:
|
|
210
|
+
meta_data["relativity"] = score
|
|
211
|
+
new_meta = SearchedTreeNodeTextualMemoryMetadata(**meta_data)
|
|
182
212
|
searched_res.append(
|
|
183
213
|
TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
|
|
184
214
|
)
|
|
185
215
|
|
|
186
|
-
# Step 4: Reasoning over all retrieved and ranked memory
|
|
187
|
-
if mode == "fine":
|
|
188
|
-
searched_res = self.reasoner.reason(
|
|
189
|
-
query=query,
|
|
190
|
-
ranked_memories=searched_res,
|
|
191
|
-
parsed_goal=parsed_goal,
|
|
192
|
-
)
|
|
193
|
-
|
|
194
216
|
# Step 5: Update usage history with current timestamp
|
|
195
217
|
now_time = datetime.now().isoformat()
|
|
218
|
+
if "chat_history" in info:
|
|
219
|
+
info.pop("chat_history")
|
|
196
220
|
usage_record = json.dumps(
|
|
197
221
|
{"time": now_time, "info": info}
|
|
198
222
|
) # `info` should be a serializable dict or string
|
|
@@ -1,4 +1,5 @@
|
|
|
1
|
-
import
|
|
1
|
+
import logging
|
|
2
|
+
import traceback
|
|
2
3
|
|
|
3
4
|
from string import Template
|
|
4
5
|
|
|
@@ -14,11 +15,16 @@ class TaskGoalParser:
|
|
|
14
15
|
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
15
16
|
"""
|
|
16
17
|
|
|
17
|
-
def __init__(self, llm=BaseLLM
|
|
18
|
+
def __init__(self, llm=BaseLLM):
|
|
18
19
|
self.llm = llm
|
|
19
|
-
self.mode = mode
|
|
20
20
|
|
|
21
|
-
def parse(
|
|
21
|
+
def parse(
|
|
22
|
+
self,
|
|
23
|
+
task_description: str,
|
|
24
|
+
context: str = "",
|
|
25
|
+
conversation: list[dict] | None = None,
|
|
26
|
+
mode: str = "fast",
|
|
27
|
+
) -> ParsedTaskGoal:
|
|
22
28
|
"""
|
|
23
29
|
Parse user input into structured semantic layers.
|
|
24
30
|
Returns:
|
|
@@ -26,42 +32,63 @@ class TaskGoalParser:
|
|
|
26
32
|
- mode == 'fast': use jieba to split words only
|
|
27
33
|
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
28
34
|
"""
|
|
29
|
-
if
|
|
35
|
+
if mode == "fast":
|
|
30
36
|
return self._parse_fast(task_description)
|
|
31
|
-
elif
|
|
37
|
+
elif mode == "fine":
|
|
32
38
|
if not self.llm:
|
|
33
39
|
raise ValueError("LLM not provided for slow mode.")
|
|
34
|
-
return self._parse_fine(task_description, context)
|
|
40
|
+
return self._parse_fine(task_description, context, conversation)
|
|
35
41
|
else:
|
|
36
|
-
raise ValueError(f"Unknown mode: {
|
|
42
|
+
raise ValueError(f"Unknown mode: {mode}")
|
|
37
43
|
|
|
38
44
|
def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal:
|
|
39
45
|
"""
|
|
40
46
|
Fast mode: simple jieba word split.
|
|
41
47
|
"""
|
|
42
48
|
return ParsedTaskGoal(
|
|
43
|
-
memories=[task_description],
|
|
49
|
+
memories=[task_description],
|
|
50
|
+
keys=[task_description],
|
|
51
|
+
tags=[],
|
|
52
|
+
goal_type="default",
|
|
53
|
+
rephrased_query=task_description,
|
|
54
|
+
internet_search=False,
|
|
44
55
|
)
|
|
45
56
|
|
|
46
|
-
def _parse_fine(
|
|
57
|
+
def _parse_fine(
|
|
58
|
+
self, query: str, context: str = "", conversation: list[dict] | None = None
|
|
59
|
+
) -> ParsedTaskGoal:
|
|
47
60
|
"""
|
|
48
61
|
Slow mode: LLM structured parse.
|
|
49
62
|
"""
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
|
|
63
|
+
try:
|
|
64
|
+
if conversation:
|
|
65
|
+
conversation_prompt = "\n".join(
|
|
66
|
+
[f"{each['role']}: {each['content']}" for each in conversation]
|
|
67
|
+
)
|
|
68
|
+
else:
|
|
69
|
+
conversation_prompt = ""
|
|
70
|
+
prompt = Template(TASK_PARSE_PROMPT).substitute(
|
|
71
|
+
task=query.strip(), context=context, conversation=conversation_prompt
|
|
72
|
+
)
|
|
73
|
+
response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
|
|
74
|
+
return self._parse_response(response)
|
|
75
|
+
except Exception:
|
|
76
|
+
logging.warning(f"Fail to fine-parse query {query}: {traceback.format_exc()}")
|
|
77
|
+
return self._parse_fast(query)
|
|
53
78
|
|
|
54
79
|
def _parse_response(self, response: str) -> ParsedTaskGoal:
|
|
55
80
|
"""
|
|
56
81
|
Parse LLM JSON output safely.
|
|
57
82
|
"""
|
|
58
83
|
try:
|
|
59
|
-
response = response.replace("```", "").replace("json", "")
|
|
60
|
-
response_json =
|
|
84
|
+
response = response.replace("```", "").replace("json", "").strip()
|
|
85
|
+
response_json = eval(response)
|
|
61
86
|
return ParsedTaskGoal(
|
|
62
87
|
memories=response_json.get("memories", []),
|
|
63
88
|
keys=response_json.get("keys", []),
|
|
64
89
|
tags=response_json.get("tags", []),
|
|
90
|
+
rephrased_query=response_json.get("rephrased_instruction", None),
|
|
91
|
+
internet_search=response_json.get("internet_search", False),
|
|
65
92
|
goal_type=response_json.get("goal_type", "default"),
|
|
66
93
|
)
|
|
67
94
|
except Exception as e:
|
|
@@ -1,19 +1,21 @@
|
|
|
1
1
|
# Prompt for task parsing
|
|
2
2
|
TASK_PARSE_PROMPT = """
|
|
3
|
-
You are a task parsing expert. Given a user
|
|
4
|
-
|
|
5
|
-
Given a user task instruction and optional related memory context,
|
|
6
|
-
extract the following structured information:
|
|
3
|
+
You are a task parsing expert. Given a user task instruction, optional former conversation and optional related memory context,extract the following structured information:
|
|
7
4
|
1. Keys: the high-level keywords directly relevant to the user’s task.
|
|
8
5
|
2. Tags: thematic tags to help categorize and retrieve related memories.
|
|
9
6
|
3. Goal Type: retrieval | qa | generation
|
|
10
|
-
4.
|
|
11
|
-
|
|
12
|
-
|
|
7
|
+
4. Rephrased instruction: Give a rephrased task instruction based on the former conversation to make it less confusing to look alone. If you think the task instruction is easy enough to understand, or there is no former conversation, set "rephrased_instruction" to an empty string.
|
|
8
|
+
5. Need for internet search: If you think you need to search the internet to finish the rephrased/original user task instruction, set "internet_search" to True. Otherwise, set it to False.
|
|
9
|
+
6. Memories: Provide 2–5 short semantic expansions or rephrasings of the rephrased/original user task instruction. These are used for improved embedding search coverage. Each should be clear, concise, and meaningful for retrieval.
|
|
13
10
|
|
|
14
11
|
Task description:
|
|
15
12
|
\"\"\"$task\"\"\"
|
|
16
13
|
|
|
14
|
+
Former conversation (if any):
|
|
15
|
+
\"\"\"
|
|
16
|
+
$conversation
|
|
17
|
+
\"\"\"
|
|
18
|
+
|
|
17
19
|
Context (if any):
|
|
18
20
|
\"\"\"$context\"\"\"
|
|
19
21
|
|
|
@@ -22,6 +24,8 @@ Return strictly in this JSON format:
|
|
|
22
24
|
"keys": [...],
|
|
23
25
|
"tags": [...],
|
|
24
26
|
"goal_type": "retrieval | qa | generation",
|
|
27
|
+
"rephrased_instruction": "...", # return an empty string if the original instruction is easy enough to understand
|
|
28
|
+
"internet_search": True/False,
|
|
25
29
|
"memories": ["...", "...", ...]
|
|
26
30
|
}
|
|
27
31
|
"""
|
|
@@ -3,13 +3,15 @@
|
|
|
3
3
|
import json
|
|
4
4
|
import uuid
|
|
5
5
|
|
|
6
|
+
from concurrent.futures import ThreadPoolExecutor, as_completed
|
|
6
7
|
from datetime import datetime
|
|
7
8
|
|
|
8
9
|
import requests
|
|
9
10
|
|
|
10
11
|
from memos.embedders.factory import OllamaEmbedder
|
|
11
12
|
from memos.log import get_logger
|
|
12
|
-
from memos.
|
|
13
|
+
from memos.mem_reader.base import BaseMemReader
|
|
14
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
13
15
|
|
|
14
16
|
|
|
15
17
|
logger = get_logger(__name__)
|
|
@@ -93,8 +95,8 @@ class XinyuSearchAPI:
|
|
|
93
95
|
"online_search": {
|
|
94
96
|
"max_entries": max_results,
|
|
95
97
|
"cache_switch": False,
|
|
96
|
-
"baidu_field": {"switch":
|
|
97
|
-
"bing_field": {"switch":
|
|
98
|
+
"baidu_field": {"switch": False, "mode": "relevance", "type": "page"},
|
|
99
|
+
"bing_field": {"switch": True, "mode": "relevance", "type": "page"},
|
|
98
100
|
"sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
|
|
99
101
|
},
|
|
100
102
|
"request_id": "memos" + str(uuid.uuid4()),
|
|
@@ -112,6 +114,7 @@ class XinyuSearchRetriever:
|
|
|
112
114
|
access_key: str,
|
|
113
115
|
search_engine_id: str,
|
|
114
116
|
embedder: OllamaEmbedder,
|
|
117
|
+
reader: BaseMemReader,
|
|
115
118
|
max_results: int = 20,
|
|
116
119
|
):
|
|
117
120
|
"""
|
|
@@ -121,12 +124,14 @@ class XinyuSearchRetriever:
|
|
|
121
124
|
access_key: Xinyu API access key
|
|
122
125
|
embedder: Embedder instance for generating embeddings
|
|
123
126
|
max_results: Maximum number of results to retrieve
|
|
127
|
+
reader: MemReader Moduel to deal with internet contents
|
|
124
128
|
"""
|
|
125
129
|
self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
|
|
126
130
|
self.embedder = embedder
|
|
131
|
+
self.reader = reader
|
|
127
132
|
|
|
128
133
|
def retrieve_from_internet(
|
|
129
|
-
self, query: str, top_k: int = 10, parsed_goal=None
|
|
134
|
+
self, query: str, top_k: int = 10, parsed_goal=None, info=None
|
|
130
135
|
) -> list[TextualMemoryItem]:
|
|
131
136
|
"""
|
|
132
137
|
Retrieve information from Xinyu search and convert to TextualMemoryItem format
|
|
@@ -135,7 +140,7 @@ class XinyuSearchRetriever:
|
|
|
135
140
|
query: Search query
|
|
136
141
|
top_k: Number of results to return
|
|
137
142
|
parsed_goal: Parsed task goal (optional)
|
|
138
|
-
|
|
143
|
+
info (dict): Leave a record of memory consumption.
|
|
139
144
|
Returns:
|
|
140
145
|
List of TextualMemoryItem
|
|
141
146
|
"""
|
|
@@ -143,63 +148,25 @@ class XinyuSearchRetriever:
|
|
|
143
148
|
search_results = self.xinyu_api.search(query, max_results=top_k)
|
|
144
149
|
|
|
145
150
|
# Convert to TextualMemoryItem format
|
|
146
|
-
memory_items = []
|
|
147
|
-
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
publish_time = result.get("publish_time", "")
|
|
155
|
-
if publish_time:
|
|
151
|
+
memory_items: list[TextualMemoryItem] = []
|
|
152
|
+
|
|
153
|
+
with ThreadPoolExecutor(max_workers=8) as executor:
|
|
154
|
+
futures = [
|
|
155
|
+
executor.submit(self._process_result, result, query, parsed_goal, info)
|
|
156
|
+
for result in search_results
|
|
157
|
+
]
|
|
158
|
+
for future in as_completed(futures):
|
|
156
159
|
try:
|
|
157
|
-
|
|
158
|
-
"%Y-%m-%d"
|
|
159
|
-
)
|
|
160
|
+
memory_items.extend(future.result())
|
|
160
161
|
except Exception as e:
|
|
161
|
-
logger.error(f"
|
|
162
|
-
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
163
|
-
else:
|
|
164
|
-
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
165
|
-
source = result.get("source", "")
|
|
166
|
-
site = result.get("site", "")
|
|
167
|
-
if site:
|
|
168
|
-
site = site.split("|")[0]
|
|
169
|
-
|
|
170
|
-
# Combine memory content
|
|
171
|
-
memory_content = (
|
|
172
|
-
f"Title: {title}\nSummary: {summary}\nContent: {content[:200]}...\nSource: {url}"
|
|
173
|
-
)
|
|
162
|
+
logger.error(f"Error processing search result: {e}")
|
|
174
163
|
|
|
175
|
-
|
|
176
|
-
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
status="activated",
|
|
180
|
-
type="fact", # Search results are usually factual information
|
|
181
|
-
memory_time=publish_time,
|
|
182
|
-
source="web",
|
|
183
|
-
confidence=85.0, # Confidence level for search information
|
|
184
|
-
entities=self._extract_entities(title, content, summary),
|
|
185
|
-
tags=self._extract_tags(title, content, summary, parsed_goal),
|
|
186
|
-
visibility="public",
|
|
187
|
-
memory_type="LongTermMemory", # Search results as working memory
|
|
188
|
-
key=title,
|
|
189
|
-
sources=[url] if url else [],
|
|
190
|
-
embedding=self.embedder.embed([memory_content])[0],
|
|
191
|
-
created_at=datetime.now().isoformat(),
|
|
192
|
-
usage=[],
|
|
193
|
-
background=f"Xinyu search result from {site or source}",
|
|
194
|
-
)
|
|
195
|
-
# Create TextualMemoryItem
|
|
196
|
-
memory_item = TextualMemoryItem(
|
|
197
|
-
id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
|
|
198
|
-
)
|
|
164
|
+
unique_memory_items = {}
|
|
165
|
+
for item in memory_items:
|
|
166
|
+
if item.memory not in unique_memory_items:
|
|
167
|
+
unique_memory_items[item.memory] = item
|
|
199
168
|
|
|
200
|
-
|
|
201
|
-
|
|
202
|
-
return memory_items
|
|
169
|
+
return list(unique_memory_items.values())
|
|
203
170
|
|
|
204
171
|
def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
|
|
205
172
|
"""
|
|
@@ -333,3 +300,40 @@ class XinyuSearchRetriever:
|
|
|
333
300
|
tags.extend(parsed_goal.tags)
|
|
334
301
|
|
|
335
302
|
return list(set(tags))[:15] # Limit to 15 tags
|
|
303
|
+
|
|
304
|
+
def _process_result(
|
|
305
|
+
self, result: dict, query: str, parsed_goal: str, info: None
|
|
306
|
+
) -> list[TextualMemoryItem]:
|
|
307
|
+
if not info:
|
|
308
|
+
info = {"user_id": "", "session_id": ""}
|
|
309
|
+
title = result.get("title", "")
|
|
310
|
+
content = result.get("content", "")
|
|
311
|
+
summary = result.get("summary", "")
|
|
312
|
+
url = result.get("url", "")
|
|
313
|
+
publish_time = result.get("publish_time", "")
|
|
314
|
+
if publish_time:
|
|
315
|
+
try:
|
|
316
|
+
publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
|
|
317
|
+
"%Y-%m-%d"
|
|
318
|
+
)
|
|
319
|
+
except Exception as e:
|
|
320
|
+
logger.error(f"xinyu search error: {e}")
|
|
321
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
322
|
+
else:
|
|
323
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
324
|
+
|
|
325
|
+
read_items = self.reader.get_memory([content], type="doc", info=info)
|
|
326
|
+
|
|
327
|
+
memory_items = []
|
|
328
|
+
for read_item_i in read_items[0]:
|
|
329
|
+
read_item_i.memory = (
|
|
330
|
+
f"Title: {title}\nNewsTime: {publish_time}\nSummary: {summary}\n"
|
|
331
|
+
f"Content: {read_item_i.memory}"
|
|
332
|
+
)
|
|
333
|
+
read_item_i.metadata.source = "web"
|
|
334
|
+
read_item_i.metadata.memory_type = "OuterMemory"
|
|
335
|
+
read_item_i.metadata.sources = [url] if url else []
|
|
336
|
+
read_item_i.metadata.visibility = "public"
|
|
337
|
+
|
|
338
|
+
memory_items.append(read_item_i)
|
|
339
|
+
return memory_items
|