MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- memoryos-0.1.13.dist-info/METADATA +288 -0
- memoryos-0.1.13.dist-info/RECORD +122 -0
- memos/__init__.py +20 -1
- memos/api/start_api.py +420 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/factory.py +22 -0
- memos/chunkers/sentence_chunker.py +35 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +45 -0
- memos/configs/embedder.py +53 -0
- memos/configs/graph_db.py +45 -0
- memos/configs/internet_retriever.py +81 -0
- memos/configs/llm.py +71 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +89 -0
- memos/configs/mem_os.py +74 -0
- memos/configs/mem_reader.py +53 -0
- memos/configs/mem_scheduler.py +78 -0
- memos/configs/memory.py +195 -0
- memos/configs/parser.py +38 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +64 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/base.py +15 -0
- memos/embedders/factory.py +23 -0
- memos/embedders/ollama.py +74 -0
- memos/embedders/sentence_transformer.py +40 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +215 -0
- memos/graph_dbs/factory.py +21 -0
- memos/graph_dbs/neo4j.py +827 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +16 -0
- memos/llms/factory.py +25 -0
- memos/llms/hf.py +231 -0
- memos/llms/ollama.py +82 -0
- memos/llms/openai.py +34 -0
- memos/llms/utils.py +14 -0
- memos/log.py +78 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +29 -0
- memos/mem_cube/general.py +146 -0
- memos/mem_cube/utils.py +24 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +819 -0
- memos/mem_os/main.py +503 -0
- memos/mem_os/product.py +89 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +27 -0
- memos/mem_reader/factory.py +21 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/simple_struct.py +241 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/base_scheduler.py +164 -0
- memos/mem_scheduler/general_scheduler.py +305 -0
- memos/mem_scheduler/modules/__init__.py +0 -0
- memos/mem_scheduler/modules/base.py +74 -0
- memos/mem_scheduler/modules/dispatcher.py +103 -0
- memos/mem_scheduler/modules/monitor.py +82 -0
- memos/mem_scheduler/modules/redis_service.py +146 -0
- memos/mem_scheduler/modules/retriever.py +41 -0
- memos/mem_scheduler/modules/schemas.py +146 -0
- memos/mem_scheduler/scheduler_factory.py +21 -0
- memos/mem_scheduler/utils.py +26 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +232 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +34 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +89 -0
- memos/memories/textual/general.py +286 -0
- memos/memories/textual/item.py +167 -0
- memos/memories/textual/naive.py +185 -0
- memos/memories/textual/tree.py +321 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +19 -0
- memos/parsers/markitdown.py +22 -0
- memos/settings.py +8 -0
- memos/templates/__init__.py +0 -0
- memos/templates/mem_reader_prompts.py +98 -0
- memos/templates/mem_scheduler_prompts.py +65 -0
- memos/templates/mos_prompts.py +63 -0
- memos/types.py +55 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +105 -0
- memos/vec_dbs/factory.py +21 -0
- memos/vec_dbs/item.py +43 -0
- memos/vec_dbs/qdrant.py +292 -0
- memoryos-0.0.1.dist-info/METADATA +0 -53
- memoryos-0.0.1.dist-info/RECORD +0 -5
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,208 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
import json
|
|
3
|
+
|
|
4
|
+
from datetime import datetime
|
|
5
|
+
|
|
6
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
7
|
+
from memos.graph_dbs.factory import Neo4jGraphDB
|
|
8
|
+
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
9
|
+
from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
|
|
10
|
+
|
|
11
|
+
from .internet_retriever_factory import InternetRetrieverFactory
|
|
12
|
+
from .reasoner import MemoryReasoner
|
|
13
|
+
from .recall import GraphMemoryRetriever
|
|
14
|
+
from .reranker import MemoryReranker
|
|
15
|
+
from .task_goal_parser import TaskGoalParser
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class Searcher:
|
|
19
|
+
def __init__(
|
|
20
|
+
self,
|
|
21
|
+
dispatcher_llm: OpenAILLM | OllamaLLM,
|
|
22
|
+
graph_store: Neo4jGraphDB,
|
|
23
|
+
embedder: OllamaEmbedder,
|
|
24
|
+
internet_retriever: InternetRetrieverFactory | None = None,
|
|
25
|
+
):
|
|
26
|
+
self.graph_store = graph_store
|
|
27
|
+
self.embedder = embedder
|
|
28
|
+
|
|
29
|
+
self.task_goal_parser = TaskGoalParser(dispatcher_llm)
|
|
30
|
+
self.graph_retriever = GraphMemoryRetriever(self.graph_store, self.embedder)
|
|
31
|
+
self.reranker = MemoryReranker(dispatcher_llm, self.embedder)
|
|
32
|
+
self.reasoner = MemoryReasoner(dispatcher_llm)
|
|
33
|
+
|
|
34
|
+
# Create internet retriever from config if provided
|
|
35
|
+
self.internet_retriever = internet_retriever
|
|
36
|
+
|
|
37
|
+
def search(
|
|
38
|
+
self, query: str, top_k: int, info=None, mode: str = "fast", memory_type: str = "All"
|
|
39
|
+
) -> list[TextualMemoryItem]:
|
|
40
|
+
"""
|
|
41
|
+
Search for memories based on a query.
|
|
42
|
+
User query -> TaskGoalParser -> GraphMemoryRetriever ->
|
|
43
|
+
MemoryReranker -> MemoryReasoner -> Final output
|
|
44
|
+
Args:
|
|
45
|
+
query (str): The query to search for.
|
|
46
|
+
top_k (int): The number of top results to return.
|
|
47
|
+
info (dict): Leave a record of memory consumption.
|
|
48
|
+
mode (str, optional): The mode of the search.
|
|
49
|
+
- 'fast': Uses a faster search process, sacrificing some precision for speed.
|
|
50
|
+
- 'fine': Uses a more detailed search process, invoking large models for higher precision, but slower performance.
|
|
51
|
+
memory_type (str): Type restriction for search.
|
|
52
|
+
['All', 'WorkingMemory', 'LongTermMemory', 'UserMemory']
|
|
53
|
+
Returns:
|
|
54
|
+
list[TextualMemoryItem]: List of matching memories.
|
|
55
|
+
"""
|
|
56
|
+
|
|
57
|
+
# Step 1: Parse task structure into topic, concept, and fact levels
|
|
58
|
+
context = []
|
|
59
|
+
if mode == "fine":
|
|
60
|
+
query_embedding = self.embedder.embed([query])[0]
|
|
61
|
+
related_node_ids = self.graph_store.search_by_embedding(query_embedding, top_k=top_k)
|
|
62
|
+
related_nodes = [
|
|
63
|
+
self.graph_store.get_node(related_node["id"]) for related_node in related_node_ids
|
|
64
|
+
]
|
|
65
|
+
|
|
66
|
+
context = [related_node["memory"] for related_node in related_nodes]
|
|
67
|
+
context = list(set(context))
|
|
68
|
+
|
|
69
|
+
# Step 1a: Parse task structure into topic, concept, and fact levels
|
|
70
|
+
parsed_goal = self.task_goal_parser.parse(query, "\n".join(context))
|
|
71
|
+
|
|
72
|
+
if parsed_goal.memories:
|
|
73
|
+
query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
|
|
74
|
+
|
|
75
|
+
# Step 2a: Working memory retrieval (Path A)
|
|
76
|
+
def retrieve_from_working_memory():
|
|
77
|
+
"""
|
|
78
|
+
Direct structure-based retrieval from working memory.
|
|
79
|
+
"""
|
|
80
|
+
if memory_type not in ["All", "WorkingMemory"]:
|
|
81
|
+
return []
|
|
82
|
+
|
|
83
|
+
working_memory = self.graph_retriever.retrieve(
|
|
84
|
+
query=query, parsed_goal=parsed_goal, top_k=top_k, memory_scope="WorkingMemory"
|
|
85
|
+
)
|
|
86
|
+
# Rerank working_memory results
|
|
87
|
+
ranked_memories = self.reranker.rerank(
|
|
88
|
+
query=query,
|
|
89
|
+
query_embedding=query_embedding[0],
|
|
90
|
+
graph_results=working_memory,
|
|
91
|
+
top_k=top_k,
|
|
92
|
+
parsed_goal=parsed_goal,
|
|
93
|
+
)
|
|
94
|
+
return ranked_memories
|
|
95
|
+
|
|
96
|
+
# Step 2b: Parallel long-term and user memory retrieval (Path B)
|
|
97
|
+
def retrieve_ranked_long_term_and_user():
|
|
98
|
+
"""
|
|
99
|
+
Retrieve from both long-term and user memory, then rank and merge results.
|
|
100
|
+
"""
|
|
101
|
+
long_term_items = (
|
|
102
|
+
self.graph_retriever.retrieve(
|
|
103
|
+
query=query,
|
|
104
|
+
query_embedding=query_embedding,
|
|
105
|
+
parsed_goal=parsed_goal,
|
|
106
|
+
top_k=top_k * 2,
|
|
107
|
+
memory_scope="LongTermMemory",
|
|
108
|
+
)
|
|
109
|
+
if memory_type in ["All", "LongTermMemory"]
|
|
110
|
+
else []
|
|
111
|
+
)
|
|
112
|
+
user_items = (
|
|
113
|
+
self.graph_retriever.retrieve(
|
|
114
|
+
query=query,
|
|
115
|
+
query_embedding=query_embedding,
|
|
116
|
+
parsed_goal=parsed_goal,
|
|
117
|
+
top_k=top_k * 2,
|
|
118
|
+
memory_scope="UserMemory",
|
|
119
|
+
)
|
|
120
|
+
if memory_type in ["All", "UserMemory"]
|
|
121
|
+
else []
|
|
122
|
+
)
|
|
123
|
+
|
|
124
|
+
# Rerank combined results
|
|
125
|
+
ranked_memories = self.reranker.rerank(
|
|
126
|
+
query=query,
|
|
127
|
+
query_embedding=query_embedding[0],
|
|
128
|
+
graph_results=long_term_items + user_items,
|
|
129
|
+
top_k=top_k * 2,
|
|
130
|
+
parsed_goal=parsed_goal,
|
|
131
|
+
)
|
|
132
|
+
return ranked_memories
|
|
133
|
+
|
|
134
|
+
# Step 2c: Internet retrieval (Path C)
|
|
135
|
+
def retrieve_from_internet():
|
|
136
|
+
"""
|
|
137
|
+
Retrieve information from the internet using Google Custom Search API.
|
|
138
|
+
"""
|
|
139
|
+
if not self.internet_retriever:
|
|
140
|
+
return []
|
|
141
|
+
if memory_type not in ["All"]:
|
|
142
|
+
return []
|
|
143
|
+
internet_items = self.internet_retriever.retrieve_from_internet(
|
|
144
|
+
query=query, top_k=top_k, parsed_goal=parsed_goal
|
|
145
|
+
)
|
|
146
|
+
|
|
147
|
+
# Convert to the format expected by reranker
|
|
148
|
+
ranked_memories = self.reranker.rerank(
|
|
149
|
+
query=query,
|
|
150
|
+
query_embedding=query_embedding[0],
|
|
151
|
+
graph_results=internet_items,
|
|
152
|
+
top_k=top_k * 2,
|
|
153
|
+
parsed_goal=parsed_goal,
|
|
154
|
+
)
|
|
155
|
+
return ranked_memories
|
|
156
|
+
|
|
157
|
+
# Step 3: Parallel execution of all paths
|
|
158
|
+
with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
|
|
159
|
+
future_working = executor.submit(retrieve_from_working_memory)
|
|
160
|
+
future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
|
|
161
|
+
future_internet = executor.submit(retrieve_from_internet)
|
|
162
|
+
|
|
163
|
+
working_results = future_working.result()
|
|
164
|
+
hybrid_results = future_hybrid.result()
|
|
165
|
+
internet_results = future_internet.result()
|
|
166
|
+
searched_res = working_results + hybrid_results + internet_results
|
|
167
|
+
|
|
168
|
+
# Deduplicate by item.memory, keep higher score
|
|
169
|
+
deduped_result = {}
|
|
170
|
+
for item, score in searched_res:
|
|
171
|
+
mem_key = item.memory
|
|
172
|
+
if mem_key not in deduped_result or score > deduped_result[mem_key][1]:
|
|
173
|
+
deduped_result[mem_key] = (item, score)
|
|
174
|
+
|
|
175
|
+
searched_res = []
|
|
176
|
+
for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
|
|
177
|
+
:top_k
|
|
178
|
+
]:
|
|
179
|
+
new_meta = SearchedTreeNodeTextualMemoryMetadata(
|
|
180
|
+
**item.metadata.model_dump(), relativity=score
|
|
181
|
+
)
|
|
182
|
+
searched_res.append(
|
|
183
|
+
TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
|
|
184
|
+
)
|
|
185
|
+
|
|
186
|
+
# Step 4: Reasoning over all retrieved and ranked memory
|
|
187
|
+
if mode == "fine":
|
|
188
|
+
searched_res = self.reasoner.reason(
|
|
189
|
+
query=query,
|
|
190
|
+
ranked_memories=searched_res,
|
|
191
|
+
parsed_goal=parsed_goal,
|
|
192
|
+
)
|
|
193
|
+
|
|
194
|
+
# Step 5: Update usage history with current timestamp
|
|
195
|
+
now_time = datetime.now().isoformat()
|
|
196
|
+
usage_record = json.dumps(
|
|
197
|
+
{"time": now_time, "info": info}
|
|
198
|
+
) # `info` should be a serializable dict or string
|
|
199
|
+
|
|
200
|
+
for item in searched_res:
|
|
201
|
+
if (
|
|
202
|
+
hasattr(item, "id")
|
|
203
|
+
and hasattr(item, "metadata")
|
|
204
|
+
and hasattr(item.metadata, "usage")
|
|
205
|
+
):
|
|
206
|
+
item.metadata.usage.append(usage_record)
|
|
207
|
+
self.graph_store.update_node(item.id, {"usage": item.metadata.usage})
|
|
208
|
+
return searched_res
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
import json
|
|
2
|
+
|
|
3
|
+
from string import Template
|
|
4
|
+
|
|
5
|
+
from memos.llms.base import BaseLLM
|
|
6
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
|
+
from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT
|
|
8
|
+
|
|
9
|
+
|
|
10
|
+
class TaskGoalParser:
|
|
11
|
+
"""
|
|
12
|
+
Unified TaskGoalParser:
|
|
13
|
+
- mode == 'fast': directly use origin task_description
|
|
14
|
+
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, llm=BaseLLM, mode: str = "fast"):
|
|
18
|
+
self.llm = llm
|
|
19
|
+
self.mode = mode
|
|
20
|
+
|
|
21
|
+
def parse(self, task_description: str, context: str = "") -> ParsedTaskGoal:
|
|
22
|
+
"""
|
|
23
|
+
Parse user input into structured semantic layers.
|
|
24
|
+
Returns:
|
|
25
|
+
ParsedTaskGoal: object containing topic/concept/fact levels and optional metadata
|
|
26
|
+
- mode == 'fast': use jieba to split words only
|
|
27
|
+
- mode == 'fine': use LLM to parse structured topic/keys/tags
|
|
28
|
+
"""
|
|
29
|
+
if self.mode == "fast":
|
|
30
|
+
return self._parse_fast(task_description)
|
|
31
|
+
elif self.mode == "fine":
|
|
32
|
+
if not self.llm:
|
|
33
|
+
raise ValueError("LLM not provided for slow mode.")
|
|
34
|
+
return self._parse_fine(task_description, context)
|
|
35
|
+
else:
|
|
36
|
+
raise ValueError(f"Unknown mode: {self.mode}")
|
|
37
|
+
|
|
38
|
+
def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal:
|
|
39
|
+
"""
|
|
40
|
+
Fast mode: simple jieba word split.
|
|
41
|
+
"""
|
|
42
|
+
return ParsedTaskGoal(
|
|
43
|
+
memories=[task_description], keys=[task_description], tags=[], goal_type="default"
|
|
44
|
+
)
|
|
45
|
+
|
|
46
|
+
def _parse_fine(self, query: str, context: str = "") -> ParsedTaskGoal:
|
|
47
|
+
"""
|
|
48
|
+
Slow mode: LLM structured parse.
|
|
49
|
+
"""
|
|
50
|
+
prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context=context)
|
|
51
|
+
response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
|
|
52
|
+
return self._parse_response(response)
|
|
53
|
+
|
|
54
|
+
def _parse_response(self, response: str) -> ParsedTaskGoal:
|
|
55
|
+
"""
|
|
56
|
+
Parse LLM JSON output safely.
|
|
57
|
+
"""
|
|
58
|
+
try:
|
|
59
|
+
response = response.replace("```", "").replace("json", "")
|
|
60
|
+
response_json = json.loads(response.strip())
|
|
61
|
+
return ParsedTaskGoal(
|
|
62
|
+
memories=response_json.get("memories", []),
|
|
63
|
+
keys=response_json.get("keys", []),
|
|
64
|
+
tags=response_json.get("tags", []),
|
|
65
|
+
goal_type=response_json.get("goal_type", "default"),
|
|
66
|
+
)
|
|
67
|
+
except Exception as e:
|
|
68
|
+
raise ValueError(f"Failed to parse LLM output: {e}\nRaw response:\n{response}") from e
|
|
@@ -0,0 +1,48 @@
|
|
|
1
|
+
# Prompt for task parsing
|
|
2
|
+
TASK_PARSE_PROMPT = """
|
|
3
|
+
You are a task parsing expert. Given a user's task instruction, extract the following structured information:
|
|
4
|
+
|
|
5
|
+
Given a user task instruction and optional related memory context,
|
|
6
|
+
extract the following structured information:
|
|
7
|
+
1. Keys: the high-level keywords directly relevant to the user’s task.
|
|
8
|
+
2. Tags: thematic tags to help categorize and retrieve related memories.
|
|
9
|
+
3. Goal Type: retrieval | qa | generation
|
|
10
|
+
4. Memories: Provide 2–5 short semantic expansions or rephrasings of the task instruction.
|
|
11
|
+
These are used for improved embedding search coverage.
|
|
12
|
+
Each should be clear, concise, and meaningful for retrieval.
|
|
13
|
+
|
|
14
|
+
Task description:
|
|
15
|
+
\"\"\"$task\"\"\"
|
|
16
|
+
|
|
17
|
+
Context (if any):
|
|
18
|
+
\"\"\"$context\"\"\"
|
|
19
|
+
|
|
20
|
+
Return strictly in this JSON format:
|
|
21
|
+
{
|
|
22
|
+
"keys": [...],
|
|
23
|
+
"tags": [...],
|
|
24
|
+
"goal_type": "retrieval | qa | generation",
|
|
25
|
+
"memories": ["...", "...", ...]
|
|
26
|
+
}
|
|
27
|
+
"""
|
|
28
|
+
|
|
29
|
+
|
|
30
|
+
REASON_PROMPT = """
|
|
31
|
+
You are a reasoning agent working with a memory system. You will synthesize knowledge from multiple memory cards to construct a meaningful response to the task below.
|
|
32
|
+
|
|
33
|
+
Task: ${task}
|
|
34
|
+
|
|
35
|
+
Memory cards (with metadata):
|
|
36
|
+
${detailed_memory_list}
|
|
37
|
+
|
|
38
|
+
Please perform:
|
|
39
|
+
1. Clustering by theme (topic/concept/fact)
|
|
40
|
+
2. Identify useful chains or connections
|
|
41
|
+
3. Return a curated list of memory card IDs with reasons.
|
|
42
|
+
|
|
43
|
+
Output in JSON:
|
|
44
|
+
{
|
|
45
|
+
"selected_ids": [...],
|
|
46
|
+
"explanation": "..."
|
|
47
|
+
}
|
|
48
|
+
"""
|
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""Xinyu Search API retriever for tree text memory."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
11
|
+
from memos.log import get_logger
|
|
12
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class XinyuSearchAPI:
|
|
19
|
+
"""Xinyu Search API Client"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, access_key: str, search_engine_id: str, max_results: int = 20):
|
|
22
|
+
"""
|
|
23
|
+
Initialize Xinyu Search API client
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
access_key: Xinyu API access key
|
|
27
|
+
max_results: Maximum number of results to retrieve
|
|
28
|
+
"""
|
|
29
|
+
self.access_key = access_key
|
|
30
|
+
self.max_results = max_results
|
|
31
|
+
|
|
32
|
+
# API configuration
|
|
33
|
+
self.config = {"url": search_engine_id}
|
|
34
|
+
|
|
35
|
+
self.headers = {
|
|
36
|
+
"User-Agent": "PostmanRuntime/7.39.0",
|
|
37
|
+
"Content-Type": "application/json",
|
|
38
|
+
"Accept": "*/*",
|
|
39
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
40
|
+
"Connection": "keep-alive",
|
|
41
|
+
"token": access_key,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def query_detail(self, body: dict | None = None, detail: bool = True) -> list[dict]:
|
|
45
|
+
"""
|
|
46
|
+
Query Xinyu search API for detailed results
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
body: Search parameters
|
|
50
|
+
detail: Whether to get detailed results
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of search results
|
|
54
|
+
"""
|
|
55
|
+
res = []
|
|
56
|
+
try:
|
|
57
|
+
url = self.config["url"]
|
|
58
|
+
|
|
59
|
+
params = json.dumps(body)
|
|
60
|
+
resp = requests.request("POST", url, headers=self.headers, data=params)
|
|
61
|
+
res = json.loads(resp.text)["results"]
|
|
62
|
+
|
|
63
|
+
# If detail interface, return online part
|
|
64
|
+
if "search_type" in body:
|
|
65
|
+
res = res["online"]
|
|
66
|
+
|
|
67
|
+
if not detail:
|
|
68
|
+
for res_i in res:
|
|
69
|
+
res_i["summary"] = "「SUMMARY」" + res_i.get("summary", "")
|
|
70
|
+
|
|
71
|
+
except Exception:
|
|
72
|
+
import traceback
|
|
73
|
+
|
|
74
|
+
logger.error(f"xinyu search error: {traceback.format_exc()}")
|
|
75
|
+
return res
|
|
76
|
+
|
|
77
|
+
def search(self, query: str, max_results: int | None = None) -> list[dict]:
|
|
78
|
+
"""
|
|
79
|
+
Execute search request
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
query: Search query
|
|
83
|
+
max_results: Maximum number of results to return
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
List of search results
|
|
87
|
+
"""
|
|
88
|
+
if max_results is None:
|
|
89
|
+
max_results = self.max_results
|
|
90
|
+
|
|
91
|
+
body = {
|
|
92
|
+
"search_type": ["online"],
|
|
93
|
+
"online_search": {
|
|
94
|
+
"max_entries": max_results,
|
|
95
|
+
"cache_switch": False,
|
|
96
|
+
"baidu_field": {"switch": True, "mode": "relevance", "type": "page"},
|
|
97
|
+
"bing_field": {"switch": False, "mode": "relevance", "type": "page_web"},
|
|
98
|
+
"sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
|
|
99
|
+
},
|
|
100
|
+
"request_id": "memos" + str(uuid.uuid4()),
|
|
101
|
+
"queries": query,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return self.query_detail(body)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class XinyuSearchRetriever:
|
|
108
|
+
"""Xinyu Search retriever that converts search results to TextualMemoryItem format"""
|
|
109
|
+
|
|
110
|
+
def __init__(
|
|
111
|
+
self,
|
|
112
|
+
access_key: str,
|
|
113
|
+
search_engine_id: str,
|
|
114
|
+
embedder: OllamaEmbedder,
|
|
115
|
+
max_results: int = 20,
|
|
116
|
+
):
|
|
117
|
+
"""
|
|
118
|
+
Initialize Xinyu search retriever
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
access_key: Xinyu API access key
|
|
122
|
+
embedder: Embedder instance for generating embeddings
|
|
123
|
+
max_results: Maximum number of results to retrieve
|
|
124
|
+
"""
|
|
125
|
+
self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
|
|
126
|
+
self.embedder = embedder
|
|
127
|
+
|
|
128
|
+
def retrieve_from_internet(
|
|
129
|
+
self, query: str, top_k: int = 10, parsed_goal=None
|
|
130
|
+
) -> list[TextualMemoryItem]:
|
|
131
|
+
"""
|
|
132
|
+
Retrieve information from Xinyu search and convert to TextualMemoryItem format
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
query: Search query
|
|
136
|
+
top_k: Number of results to return
|
|
137
|
+
parsed_goal: Parsed task goal (optional)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of TextualMemoryItem
|
|
141
|
+
"""
|
|
142
|
+
# Get search results
|
|
143
|
+
search_results = self.xinyu_api.search(query, max_results=top_k)
|
|
144
|
+
|
|
145
|
+
# Convert to TextualMemoryItem format
|
|
146
|
+
memory_items = []
|
|
147
|
+
|
|
148
|
+
for _, result in enumerate(search_results):
|
|
149
|
+
# Extract basic information from Xinyu response format
|
|
150
|
+
title = result.get("title", "")
|
|
151
|
+
content = result.get("content", "")
|
|
152
|
+
summary = result.get("summary", "")
|
|
153
|
+
url = result.get("url", "")
|
|
154
|
+
publish_time = result.get("publish_time", "")
|
|
155
|
+
if publish_time:
|
|
156
|
+
try:
|
|
157
|
+
publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
|
|
158
|
+
"%Y-%m-%d"
|
|
159
|
+
)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"xinyu search error: {e}")
|
|
162
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
163
|
+
else:
|
|
164
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
165
|
+
source = result.get("source", "")
|
|
166
|
+
site = result.get("site", "")
|
|
167
|
+
if site:
|
|
168
|
+
site = site.split("|")[0]
|
|
169
|
+
|
|
170
|
+
# Combine memory content
|
|
171
|
+
memory_content = (
|
|
172
|
+
f"Title: {title}\nSummary: {summary}\nContent: {content[:200]}...\nSource: {url}"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Create metadata
|
|
176
|
+
metadata = TreeNodeTextualMemoryMetadata(
|
|
177
|
+
user_id=None,
|
|
178
|
+
session_id=None,
|
|
179
|
+
status="activated",
|
|
180
|
+
type="fact", # Search results are usually factual information
|
|
181
|
+
memory_time=publish_time,
|
|
182
|
+
source="web",
|
|
183
|
+
confidence=85.0, # Confidence level for search information
|
|
184
|
+
entities=self._extract_entities(title, content, summary),
|
|
185
|
+
tags=self._extract_tags(title, content, summary, parsed_goal),
|
|
186
|
+
visibility="public",
|
|
187
|
+
memory_type="LongTermMemory", # Search results as working memory
|
|
188
|
+
key=title,
|
|
189
|
+
sources=[url] if url else [],
|
|
190
|
+
embedding=self.embedder.embed([memory_content])[0],
|
|
191
|
+
created_at=datetime.now().isoformat(),
|
|
192
|
+
usage=[],
|
|
193
|
+
background=f"Xinyu search result from {site or source}",
|
|
194
|
+
)
|
|
195
|
+
# Create TextualMemoryItem
|
|
196
|
+
memory_item = TextualMemoryItem(
|
|
197
|
+
id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
memory_items.append(memory_item)
|
|
201
|
+
|
|
202
|
+
return memory_items
|
|
203
|
+
|
|
204
|
+
def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
|
|
205
|
+
"""
|
|
206
|
+
Extract entities from title, content and summary
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
title: Article title
|
|
210
|
+
content: Article content
|
|
211
|
+
summary: Article summary
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
List of extracted entities
|
|
215
|
+
"""
|
|
216
|
+
# Simple entity extraction - can be enhanced with NER
|
|
217
|
+
text = f"{title} {content} {summary}"
|
|
218
|
+
entities = []
|
|
219
|
+
|
|
220
|
+
# Extract potential entities (simple approach)
|
|
221
|
+
# This can be enhanced with proper NER models
|
|
222
|
+
words = text.split()
|
|
223
|
+
for word in words:
|
|
224
|
+
if len(word) > 2 and word[0].isupper():
|
|
225
|
+
entities.append(word)
|
|
226
|
+
|
|
227
|
+
return list(set(entities))[:10] # Limit to 10 entities
|
|
228
|
+
|
|
229
|
+
def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None) -> list[str]:
|
|
230
|
+
"""
|
|
231
|
+
Extract tags from title, content and summary
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
title: Article title
|
|
235
|
+
content: Article content
|
|
236
|
+
summary: Article summary
|
|
237
|
+
parsed_goal: Parsed task goal (optional)
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List of extracted tags
|
|
241
|
+
"""
|
|
242
|
+
tags = []
|
|
243
|
+
|
|
244
|
+
# Add source-based tags
|
|
245
|
+
tags.append("xinyu_search")
|
|
246
|
+
tags.append("news")
|
|
247
|
+
|
|
248
|
+
# Add content-based tags
|
|
249
|
+
text = f"{title} {content} {summary}".lower()
|
|
250
|
+
|
|
251
|
+
# Simple keyword-based tagging
|
|
252
|
+
keywords = {
|
|
253
|
+
"economy": [
|
|
254
|
+
"economy",
|
|
255
|
+
"GDP",
|
|
256
|
+
"growth",
|
|
257
|
+
"production",
|
|
258
|
+
"industry",
|
|
259
|
+
"investment",
|
|
260
|
+
"consumption",
|
|
261
|
+
"market",
|
|
262
|
+
"trade",
|
|
263
|
+
"finance",
|
|
264
|
+
],
|
|
265
|
+
"politics": [
|
|
266
|
+
"politics",
|
|
267
|
+
"government",
|
|
268
|
+
"policy",
|
|
269
|
+
"meeting",
|
|
270
|
+
"leader",
|
|
271
|
+
"election",
|
|
272
|
+
"parliament",
|
|
273
|
+
"ministry",
|
|
274
|
+
],
|
|
275
|
+
"technology": [
|
|
276
|
+
"technology",
|
|
277
|
+
"tech",
|
|
278
|
+
"innovation",
|
|
279
|
+
"digital",
|
|
280
|
+
"internet",
|
|
281
|
+
"AI",
|
|
282
|
+
"artificial intelligence",
|
|
283
|
+
"software",
|
|
284
|
+
"hardware",
|
|
285
|
+
],
|
|
286
|
+
"sports": [
|
|
287
|
+
"sports",
|
|
288
|
+
"game",
|
|
289
|
+
"athlete",
|
|
290
|
+
"olympic",
|
|
291
|
+
"championship",
|
|
292
|
+
"tournament",
|
|
293
|
+
"team",
|
|
294
|
+
"player",
|
|
295
|
+
],
|
|
296
|
+
"culture": [
|
|
297
|
+
"culture",
|
|
298
|
+
"education",
|
|
299
|
+
"art",
|
|
300
|
+
"history",
|
|
301
|
+
"literature",
|
|
302
|
+
"music",
|
|
303
|
+
"film",
|
|
304
|
+
"museum",
|
|
305
|
+
],
|
|
306
|
+
"health": [
|
|
307
|
+
"health",
|
|
308
|
+
"medical",
|
|
309
|
+
"pandemic",
|
|
310
|
+
"hospital",
|
|
311
|
+
"doctor",
|
|
312
|
+
"medicine",
|
|
313
|
+
"disease",
|
|
314
|
+
"treatment",
|
|
315
|
+
],
|
|
316
|
+
"environment": [
|
|
317
|
+
"environment",
|
|
318
|
+
"ecology",
|
|
319
|
+
"pollution",
|
|
320
|
+
"green",
|
|
321
|
+
"climate",
|
|
322
|
+
"sustainability",
|
|
323
|
+
"renewable",
|
|
324
|
+
],
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for category, words in keywords.items():
|
|
328
|
+
if any(word in text for word in words):
|
|
329
|
+
tags.append(category)
|
|
330
|
+
|
|
331
|
+
# Add goal-based tags if available
|
|
332
|
+
if parsed_goal and hasattr(parsed_goal, "tags"):
|
|
333
|
+
tags.extend(parsed_goal.tags)
|
|
334
|
+
|
|
335
|
+
return list(set(tags))[:15] # Limit to 15 tags
|
|
File without changes
|
memos/parsers/base.py
ADDED
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
from abc import ABC, abstractmethod
|
|
2
|
+
|
|
3
|
+
from memos.configs.parser import BaseParserConfig
|
|
4
|
+
|
|
5
|
+
|
|
6
|
+
class BaseParser(ABC):
|
|
7
|
+
"""Base class for all parsers."""
|
|
8
|
+
|
|
9
|
+
@abstractmethod
|
|
10
|
+
def __init__(self, config: BaseParserConfig):
|
|
11
|
+
"""Initialize the parser with the given configuration."""
|
|
12
|
+
|
|
13
|
+
@abstractmethod
|
|
14
|
+
def parse(self, file_path: str) -> str:
|
|
15
|
+
"""Parse the file at the given path and return its content as a string."""
|
memos/parsers/factory.py
ADDED
|
@@ -0,0 +1,19 @@
|
|
|
1
|
+
from typing import Any, ClassVar
|
|
2
|
+
|
|
3
|
+
from memos.configs.parser import ParserConfigFactory
|
|
4
|
+
from memos.parsers.base import BaseParser
|
|
5
|
+
from memos.parsers.markitdown import MarkItDownParser
|
|
6
|
+
|
|
7
|
+
|
|
8
|
+
class ParserFactory(BaseParser):
|
|
9
|
+
"""Factory class for creating Parser instances."""
|
|
10
|
+
|
|
11
|
+
backend_to_class: ClassVar[dict[str, Any]] = {"markitdown": MarkItDownParser}
|
|
12
|
+
|
|
13
|
+
@classmethod
|
|
14
|
+
def from_config(cls, config_factory: ParserConfigFactory) -> BaseParser:
|
|
15
|
+
backend = config_factory.backend
|
|
16
|
+
if backend not in cls.backend_to_class:
|
|
17
|
+
raise ValueError(f"Invalid backend: {backend}")
|
|
18
|
+
parser_class = cls.backend_to_class[backend]
|
|
19
|
+
return parser_class(config_factory.config)
|