MemoryOS 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.1.12.dist-info → memoryos-0.1.13.dist-info}/METADATA +42 -11
- {memoryos-0.1.12.dist-info → memoryos-0.1.13.dist-info}/RECORD +18 -13
- memos/__init__.py +1 -1
- memos/configs/internet_retriever.py +81 -0
- memos/configs/mem_os.py +4 -0
- memos/configs/memory.py +6 -1
- memos/mem_os/main.py +491 -0
- memos/mem_user/user_manager.py +10 -0
- memos/memories/textual/tree.py +34 -2
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +1 -4
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -4
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +3 -3
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
- memos/templates/mos_prompts.py +63 -0
- {memoryos-0.1.12.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
- {memoryos-0.1.12.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,335 @@
|
|
|
1
|
+
"""Xinyu Search API retriever for tree text memory."""
|
|
2
|
+
|
|
3
|
+
import json
|
|
4
|
+
import uuid
|
|
5
|
+
|
|
6
|
+
from datetime import datetime
|
|
7
|
+
|
|
8
|
+
import requests
|
|
9
|
+
|
|
10
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
11
|
+
from memos.log import get_logger
|
|
12
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
13
|
+
|
|
14
|
+
|
|
15
|
+
logger = get_logger(__name__)
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
class XinyuSearchAPI:
|
|
19
|
+
"""Xinyu Search API Client"""
|
|
20
|
+
|
|
21
|
+
def __init__(self, access_key: str, search_engine_id: str, max_results: int = 20):
|
|
22
|
+
"""
|
|
23
|
+
Initialize Xinyu Search API client
|
|
24
|
+
|
|
25
|
+
Args:
|
|
26
|
+
access_key: Xinyu API access key
|
|
27
|
+
max_results: Maximum number of results to retrieve
|
|
28
|
+
"""
|
|
29
|
+
self.access_key = access_key
|
|
30
|
+
self.max_results = max_results
|
|
31
|
+
|
|
32
|
+
# API configuration
|
|
33
|
+
self.config = {"url": search_engine_id}
|
|
34
|
+
|
|
35
|
+
self.headers = {
|
|
36
|
+
"User-Agent": "PostmanRuntime/7.39.0",
|
|
37
|
+
"Content-Type": "application/json",
|
|
38
|
+
"Accept": "*/*",
|
|
39
|
+
"Accept-Encoding": "gzip, deflate, br",
|
|
40
|
+
"Connection": "keep-alive",
|
|
41
|
+
"token": access_key,
|
|
42
|
+
}
|
|
43
|
+
|
|
44
|
+
def query_detail(self, body: dict | None = None, detail: bool = True) -> list[dict]:
|
|
45
|
+
"""
|
|
46
|
+
Query Xinyu search API for detailed results
|
|
47
|
+
|
|
48
|
+
Args:
|
|
49
|
+
body: Search parameters
|
|
50
|
+
detail: Whether to get detailed results
|
|
51
|
+
|
|
52
|
+
Returns:
|
|
53
|
+
List of search results
|
|
54
|
+
"""
|
|
55
|
+
res = []
|
|
56
|
+
try:
|
|
57
|
+
url = self.config["url"]
|
|
58
|
+
|
|
59
|
+
params = json.dumps(body)
|
|
60
|
+
resp = requests.request("POST", url, headers=self.headers, data=params)
|
|
61
|
+
res = json.loads(resp.text)["results"]
|
|
62
|
+
|
|
63
|
+
# If detail interface, return online part
|
|
64
|
+
if "search_type" in body:
|
|
65
|
+
res = res["online"]
|
|
66
|
+
|
|
67
|
+
if not detail:
|
|
68
|
+
for res_i in res:
|
|
69
|
+
res_i["summary"] = "「SUMMARY」" + res_i.get("summary", "")
|
|
70
|
+
|
|
71
|
+
except Exception:
|
|
72
|
+
import traceback
|
|
73
|
+
|
|
74
|
+
logger.error(f"xinyu search error: {traceback.format_exc()}")
|
|
75
|
+
return res
|
|
76
|
+
|
|
77
|
+
def search(self, query: str, max_results: int | None = None) -> list[dict]:
|
|
78
|
+
"""
|
|
79
|
+
Execute search request
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
query: Search query
|
|
83
|
+
max_results: Maximum number of results to return
|
|
84
|
+
|
|
85
|
+
Returns:
|
|
86
|
+
List of search results
|
|
87
|
+
"""
|
|
88
|
+
if max_results is None:
|
|
89
|
+
max_results = self.max_results
|
|
90
|
+
|
|
91
|
+
body = {
|
|
92
|
+
"search_type": ["online"],
|
|
93
|
+
"online_search": {
|
|
94
|
+
"max_entries": max_results,
|
|
95
|
+
"cache_switch": False,
|
|
96
|
+
"baidu_field": {"switch": True, "mode": "relevance", "type": "page"},
|
|
97
|
+
"bing_field": {"switch": False, "mode": "relevance", "type": "page_web"},
|
|
98
|
+
"sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
|
|
99
|
+
},
|
|
100
|
+
"request_id": "memos" + str(uuid.uuid4()),
|
|
101
|
+
"queries": query,
|
|
102
|
+
}
|
|
103
|
+
|
|
104
|
+
return self.query_detail(body)
|
|
105
|
+
|
|
106
|
+
|
|
107
|
+
class XinyuSearchRetriever:
|
|
108
|
+
"""Xinyu Search retriever that converts search results to TextualMemoryItem format"""
|
|
109
|
+
|
|
110
|
+
def __init__(
|
|
111
|
+
self,
|
|
112
|
+
access_key: str,
|
|
113
|
+
search_engine_id: str,
|
|
114
|
+
embedder: OllamaEmbedder,
|
|
115
|
+
max_results: int = 20,
|
|
116
|
+
):
|
|
117
|
+
"""
|
|
118
|
+
Initialize Xinyu search retriever
|
|
119
|
+
|
|
120
|
+
Args:
|
|
121
|
+
access_key: Xinyu API access key
|
|
122
|
+
embedder: Embedder instance for generating embeddings
|
|
123
|
+
max_results: Maximum number of results to retrieve
|
|
124
|
+
"""
|
|
125
|
+
self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
|
|
126
|
+
self.embedder = embedder
|
|
127
|
+
|
|
128
|
+
def retrieve_from_internet(
|
|
129
|
+
self, query: str, top_k: int = 10, parsed_goal=None
|
|
130
|
+
) -> list[TextualMemoryItem]:
|
|
131
|
+
"""
|
|
132
|
+
Retrieve information from Xinyu search and convert to TextualMemoryItem format
|
|
133
|
+
|
|
134
|
+
Args:
|
|
135
|
+
query: Search query
|
|
136
|
+
top_k: Number of results to return
|
|
137
|
+
parsed_goal: Parsed task goal (optional)
|
|
138
|
+
|
|
139
|
+
Returns:
|
|
140
|
+
List of TextualMemoryItem
|
|
141
|
+
"""
|
|
142
|
+
# Get search results
|
|
143
|
+
search_results = self.xinyu_api.search(query, max_results=top_k)
|
|
144
|
+
|
|
145
|
+
# Convert to TextualMemoryItem format
|
|
146
|
+
memory_items = []
|
|
147
|
+
|
|
148
|
+
for _, result in enumerate(search_results):
|
|
149
|
+
# Extract basic information from Xinyu response format
|
|
150
|
+
title = result.get("title", "")
|
|
151
|
+
content = result.get("content", "")
|
|
152
|
+
summary = result.get("summary", "")
|
|
153
|
+
url = result.get("url", "")
|
|
154
|
+
publish_time = result.get("publish_time", "")
|
|
155
|
+
if publish_time:
|
|
156
|
+
try:
|
|
157
|
+
publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
|
|
158
|
+
"%Y-%m-%d"
|
|
159
|
+
)
|
|
160
|
+
except Exception as e:
|
|
161
|
+
logger.error(f"xinyu search error: {e}")
|
|
162
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
163
|
+
else:
|
|
164
|
+
publish_time = datetime.now().strftime("%Y-%m-%d")
|
|
165
|
+
source = result.get("source", "")
|
|
166
|
+
site = result.get("site", "")
|
|
167
|
+
if site:
|
|
168
|
+
site = site.split("|")[0]
|
|
169
|
+
|
|
170
|
+
# Combine memory content
|
|
171
|
+
memory_content = (
|
|
172
|
+
f"Title: {title}\nSummary: {summary}\nContent: {content[:200]}...\nSource: {url}"
|
|
173
|
+
)
|
|
174
|
+
|
|
175
|
+
# Create metadata
|
|
176
|
+
metadata = TreeNodeTextualMemoryMetadata(
|
|
177
|
+
user_id=None,
|
|
178
|
+
session_id=None,
|
|
179
|
+
status="activated",
|
|
180
|
+
type="fact", # Search results are usually factual information
|
|
181
|
+
memory_time=publish_time,
|
|
182
|
+
source="web",
|
|
183
|
+
confidence=85.0, # Confidence level for search information
|
|
184
|
+
entities=self._extract_entities(title, content, summary),
|
|
185
|
+
tags=self._extract_tags(title, content, summary, parsed_goal),
|
|
186
|
+
visibility="public",
|
|
187
|
+
memory_type="LongTermMemory", # Search results as working memory
|
|
188
|
+
key=title,
|
|
189
|
+
sources=[url] if url else [],
|
|
190
|
+
embedding=self.embedder.embed([memory_content])[0],
|
|
191
|
+
created_at=datetime.now().isoformat(),
|
|
192
|
+
usage=[],
|
|
193
|
+
background=f"Xinyu search result from {site or source}",
|
|
194
|
+
)
|
|
195
|
+
# Create TextualMemoryItem
|
|
196
|
+
memory_item = TextualMemoryItem(
|
|
197
|
+
id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
|
|
198
|
+
)
|
|
199
|
+
|
|
200
|
+
memory_items.append(memory_item)
|
|
201
|
+
|
|
202
|
+
return memory_items
|
|
203
|
+
|
|
204
|
+
def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
|
|
205
|
+
"""
|
|
206
|
+
Extract entities from title, content and summary
|
|
207
|
+
|
|
208
|
+
Args:
|
|
209
|
+
title: Article title
|
|
210
|
+
content: Article content
|
|
211
|
+
summary: Article summary
|
|
212
|
+
|
|
213
|
+
Returns:
|
|
214
|
+
List of extracted entities
|
|
215
|
+
"""
|
|
216
|
+
# Simple entity extraction - can be enhanced with NER
|
|
217
|
+
text = f"{title} {content} {summary}"
|
|
218
|
+
entities = []
|
|
219
|
+
|
|
220
|
+
# Extract potential entities (simple approach)
|
|
221
|
+
# This can be enhanced with proper NER models
|
|
222
|
+
words = text.split()
|
|
223
|
+
for word in words:
|
|
224
|
+
if len(word) > 2 and word[0].isupper():
|
|
225
|
+
entities.append(word)
|
|
226
|
+
|
|
227
|
+
return list(set(entities))[:10] # Limit to 10 entities
|
|
228
|
+
|
|
229
|
+
def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None) -> list[str]:
|
|
230
|
+
"""
|
|
231
|
+
Extract tags from title, content and summary
|
|
232
|
+
|
|
233
|
+
Args:
|
|
234
|
+
title: Article title
|
|
235
|
+
content: Article content
|
|
236
|
+
summary: Article summary
|
|
237
|
+
parsed_goal: Parsed task goal (optional)
|
|
238
|
+
|
|
239
|
+
Returns:
|
|
240
|
+
List of extracted tags
|
|
241
|
+
"""
|
|
242
|
+
tags = []
|
|
243
|
+
|
|
244
|
+
# Add source-based tags
|
|
245
|
+
tags.append("xinyu_search")
|
|
246
|
+
tags.append("news")
|
|
247
|
+
|
|
248
|
+
# Add content-based tags
|
|
249
|
+
text = f"{title} {content} {summary}".lower()
|
|
250
|
+
|
|
251
|
+
# Simple keyword-based tagging
|
|
252
|
+
keywords = {
|
|
253
|
+
"economy": [
|
|
254
|
+
"economy",
|
|
255
|
+
"GDP",
|
|
256
|
+
"growth",
|
|
257
|
+
"production",
|
|
258
|
+
"industry",
|
|
259
|
+
"investment",
|
|
260
|
+
"consumption",
|
|
261
|
+
"market",
|
|
262
|
+
"trade",
|
|
263
|
+
"finance",
|
|
264
|
+
],
|
|
265
|
+
"politics": [
|
|
266
|
+
"politics",
|
|
267
|
+
"government",
|
|
268
|
+
"policy",
|
|
269
|
+
"meeting",
|
|
270
|
+
"leader",
|
|
271
|
+
"election",
|
|
272
|
+
"parliament",
|
|
273
|
+
"ministry",
|
|
274
|
+
],
|
|
275
|
+
"technology": [
|
|
276
|
+
"technology",
|
|
277
|
+
"tech",
|
|
278
|
+
"innovation",
|
|
279
|
+
"digital",
|
|
280
|
+
"internet",
|
|
281
|
+
"AI",
|
|
282
|
+
"artificial intelligence",
|
|
283
|
+
"software",
|
|
284
|
+
"hardware",
|
|
285
|
+
],
|
|
286
|
+
"sports": [
|
|
287
|
+
"sports",
|
|
288
|
+
"game",
|
|
289
|
+
"athlete",
|
|
290
|
+
"olympic",
|
|
291
|
+
"championship",
|
|
292
|
+
"tournament",
|
|
293
|
+
"team",
|
|
294
|
+
"player",
|
|
295
|
+
],
|
|
296
|
+
"culture": [
|
|
297
|
+
"culture",
|
|
298
|
+
"education",
|
|
299
|
+
"art",
|
|
300
|
+
"history",
|
|
301
|
+
"literature",
|
|
302
|
+
"music",
|
|
303
|
+
"film",
|
|
304
|
+
"museum",
|
|
305
|
+
],
|
|
306
|
+
"health": [
|
|
307
|
+
"health",
|
|
308
|
+
"medical",
|
|
309
|
+
"pandemic",
|
|
310
|
+
"hospital",
|
|
311
|
+
"doctor",
|
|
312
|
+
"medicine",
|
|
313
|
+
"disease",
|
|
314
|
+
"treatment",
|
|
315
|
+
],
|
|
316
|
+
"environment": [
|
|
317
|
+
"environment",
|
|
318
|
+
"ecology",
|
|
319
|
+
"pollution",
|
|
320
|
+
"green",
|
|
321
|
+
"climate",
|
|
322
|
+
"sustainability",
|
|
323
|
+
"renewable",
|
|
324
|
+
],
|
|
325
|
+
}
|
|
326
|
+
|
|
327
|
+
for category, words in keywords.items():
|
|
328
|
+
if any(word in text for word in words):
|
|
329
|
+
tags.append(category)
|
|
330
|
+
|
|
331
|
+
# Add goal-based tags if available
|
|
332
|
+
if parsed_goal and hasattr(parsed_goal, "tags"):
|
|
333
|
+
tags.extend(parsed_goal.tags)
|
|
334
|
+
|
|
335
|
+
return list(set(tags))[:15] # Limit to 15 tags
|
|
@@ -0,0 +1,63 @@
|
|
|
1
|
+
COT_DECOMPOSE_PROMPT = """
|
|
2
|
+
I am an 8-year-old student who needs help analyzing and breaking down complex questions. Your task is to help me understand whether a question is complex enough to be broken down into smaller parts.
|
|
3
|
+
|
|
4
|
+
Requirements:
|
|
5
|
+
1. First, determine if the question is a decomposable problem. If it is a decomposable problem, set 'is_complex' to True.
|
|
6
|
+
2. If the question needs to be decomposed, break it down into 1-3 sub-questions. The number should be controlled by the model based on the complexity of the question.
|
|
7
|
+
3. For decomposable questions, break them down into sub-questions and put them in the 'sub_questions' list. Each sub-question should contain only one question content without any additional notes.
|
|
8
|
+
4. If the question is not a decomposable problem, set 'is_complex' to False and set 'sub_questions' to an empty list.
|
|
9
|
+
5. You must return ONLY a valid JSON object. Do not include any other text, explanations, or formatting.
|
|
10
|
+
|
|
11
|
+
Here are some examples:
|
|
12
|
+
|
|
13
|
+
Question: Who is the current head coach of the gymnastics team in the capital of the country that Lang Ping represents?
|
|
14
|
+
Answer: {{"is_complex": true, "sub_questions": ["Which country does Lang Ping represent in volleyball?", "What is the capital of this country?", "Who is the current head coach of the gymnastics team in this capital?"]}}
|
|
15
|
+
|
|
16
|
+
Question: Which country's cultural heritage is the Great Wall?
|
|
17
|
+
Answer: {{"is_complex": false, "sub_questions": []}}
|
|
18
|
+
|
|
19
|
+
Question: How did the trade relationship between Madagascar and China develop, and how does this relationship affect the market expansion of the essential oil industry on Nosy Be Island?
|
|
20
|
+
Answer: {{"is_complex": true, "sub_questions": ["How did the trade relationship between Madagascar and China develop?", "How does this trade relationship affect the market expansion of the essential oil industry on Nosy Be Island?"]}}
|
|
21
|
+
|
|
22
|
+
Please analyze the following question and respond with ONLY a valid JSON object:
|
|
23
|
+
Question: {query}
|
|
24
|
+
Answer:"""
|
|
25
|
+
|
|
26
|
+
PRO_MODE_WELCOME_MESSAGE = """
|
|
27
|
+
============================================================
|
|
28
|
+
🚀 MemOS PRO Mode Activated!
|
|
29
|
+
============================================================
|
|
30
|
+
✅ Chain of Thought (CoT) enhancement is now enabled by default
|
|
31
|
+
✅ Complex queries will be automatically decomposed and enhanced
|
|
32
|
+
|
|
33
|
+
🌐 To enable Internet search capabilities:
|
|
34
|
+
1. Go to your cube's textual memory configuration
|
|
35
|
+
2. Set the backend to 'google' in the internet_retriever section
|
|
36
|
+
3. Configure the following parameters:
|
|
37
|
+
- api_key: Your Google Search API key
|
|
38
|
+
- cse_id: Your Custom Search Engine ID
|
|
39
|
+
- num_results: Number of search results (default: 5)
|
|
40
|
+
|
|
41
|
+
📝 Example configuration at cube config for tree_text_memory :
|
|
42
|
+
internet_retriever:
|
|
43
|
+
backend: 'google'
|
|
44
|
+
config:
|
|
45
|
+
api_key: 'your_google_api_key_here'
|
|
46
|
+
cse_id: 'your_custom_search_engine_id'
|
|
47
|
+
num_results: 5
|
|
48
|
+
details: https://github.com/memos-ai/memos/blob/main/examples/core_memories/tree_textual_w_internet_memoy.py
|
|
49
|
+
============================================================
|
|
50
|
+
"""
|
|
51
|
+
|
|
52
|
+
SYNTHESIS_PROMPT = """
|
|
53
|
+
exclude memory information, synthesizing information from multiple sources to provide comprehensive answers.
|
|
54
|
+
I will give you chain of thought for sub-questions and their answers.
|
|
55
|
+
Sub-questions and their answers:
|
|
56
|
+
{qa_text}
|
|
57
|
+
|
|
58
|
+
Please synthesize these answers into a comprehensive response that:
|
|
59
|
+
1. Addresses the original question completely
|
|
60
|
+
2. Integrates information from all sub-questions
|
|
61
|
+
3. Provides clear reasoning and connections
|
|
62
|
+
4. Is well-structured and easy to understand
|
|
63
|
+
5. Maintains a natural conversational tone"""
|
|
File without changes
|
|
File without changes
|