MemoryOS 0.1.12__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

@@ -0,0 +1,335 @@
1
+ """Xinyu Search API retriever for tree text memory."""
2
+
3
+ import json
4
+ import uuid
5
+
6
+ from datetime import datetime
7
+
8
+ import requests
9
+
10
+ from memos.embedders.factory import OllamaEmbedder
11
+ from memos.log import get_logger
12
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
13
+
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class XinyuSearchAPI:
19
+ """Xinyu Search API Client"""
20
+
21
+ def __init__(self, access_key: str, search_engine_id: str, max_results: int = 20):
22
+ """
23
+ Initialize Xinyu Search API client
24
+
25
+ Args:
26
+ access_key: Xinyu API access key
27
+ max_results: Maximum number of results to retrieve
28
+ """
29
+ self.access_key = access_key
30
+ self.max_results = max_results
31
+
32
+ # API configuration
33
+ self.config = {"url": search_engine_id}
34
+
35
+ self.headers = {
36
+ "User-Agent": "PostmanRuntime/7.39.0",
37
+ "Content-Type": "application/json",
38
+ "Accept": "*/*",
39
+ "Accept-Encoding": "gzip, deflate, br",
40
+ "Connection": "keep-alive",
41
+ "token": access_key,
42
+ }
43
+
44
+ def query_detail(self, body: dict | None = None, detail: bool = True) -> list[dict]:
45
+ """
46
+ Query Xinyu search API for detailed results
47
+
48
+ Args:
49
+ body: Search parameters
50
+ detail: Whether to get detailed results
51
+
52
+ Returns:
53
+ List of search results
54
+ """
55
+ res = []
56
+ try:
57
+ url = self.config["url"]
58
+
59
+ params = json.dumps(body)
60
+ resp = requests.request("POST", url, headers=self.headers, data=params)
61
+ res = json.loads(resp.text)["results"]
62
+
63
+ # If detail interface, return online part
64
+ if "search_type" in body:
65
+ res = res["online"]
66
+
67
+ if not detail:
68
+ for res_i in res:
69
+ res_i["summary"] = "「SUMMARY」" + res_i.get("summary", "")
70
+
71
+ except Exception:
72
+ import traceback
73
+
74
+ logger.error(f"xinyu search error: {traceback.format_exc()}")
75
+ return res
76
+
77
+ def search(self, query: str, max_results: int | None = None) -> list[dict]:
78
+ """
79
+ Execute search request
80
+
81
+ Args:
82
+ query: Search query
83
+ max_results: Maximum number of results to return
84
+
85
+ Returns:
86
+ List of search results
87
+ """
88
+ if max_results is None:
89
+ max_results = self.max_results
90
+
91
+ body = {
92
+ "search_type": ["online"],
93
+ "online_search": {
94
+ "max_entries": max_results,
95
+ "cache_switch": False,
96
+ "baidu_field": {"switch": True, "mode": "relevance", "type": "page"},
97
+ "bing_field": {"switch": False, "mode": "relevance", "type": "page_web"},
98
+ "sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
99
+ },
100
+ "request_id": "memos" + str(uuid.uuid4()),
101
+ "queries": query,
102
+ }
103
+
104
+ return self.query_detail(body)
105
+
106
+
107
+ class XinyuSearchRetriever:
108
+ """Xinyu Search retriever that converts search results to TextualMemoryItem format"""
109
+
110
+ def __init__(
111
+ self,
112
+ access_key: str,
113
+ search_engine_id: str,
114
+ embedder: OllamaEmbedder,
115
+ max_results: int = 20,
116
+ ):
117
+ """
118
+ Initialize Xinyu search retriever
119
+
120
+ Args:
121
+ access_key: Xinyu API access key
122
+ embedder: Embedder instance for generating embeddings
123
+ max_results: Maximum number of results to retrieve
124
+ """
125
+ self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
126
+ self.embedder = embedder
127
+
128
+ def retrieve_from_internet(
129
+ self, query: str, top_k: int = 10, parsed_goal=None
130
+ ) -> list[TextualMemoryItem]:
131
+ """
132
+ Retrieve information from Xinyu search and convert to TextualMemoryItem format
133
+
134
+ Args:
135
+ query: Search query
136
+ top_k: Number of results to return
137
+ parsed_goal: Parsed task goal (optional)
138
+
139
+ Returns:
140
+ List of TextualMemoryItem
141
+ """
142
+ # Get search results
143
+ search_results = self.xinyu_api.search(query, max_results=top_k)
144
+
145
+ # Convert to TextualMemoryItem format
146
+ memory_items = []
147
+
148
+ for _, result in enumerate(search_results):
149
+ # Extract basic information from Xinyu response format
150
+ title = result.get("title", "")
151
+ content = result.get("content", "")
152
+ summary = result.get("summary", "")
153
+ url = result.get("url", "")
154
+ publish_time = result.get("publish_time", "")
155
+ if publish_time:
156
+ try:
157
+ publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
158
+ "%Y-%m-%d"
159
+ )
160
+ except Exception as e:
161
+ logger.error(f"xinyu search error: {e}")
162
+ publish_time = datetime.now().strftime("%Y-%m-%d")
163
+ else:
164
+ publish_time = datetime.now().strftime("%Y-%m-%d")
165
+ source = result.get("source", "")
166
+ site = result.get("site", "")
167
+ if site:
168
+ site = site.split("|")[0]
169
+
170
+ # Combine memory content
171
+ memory_content = (
172
+ f"Title: {title}\nSummary: {summary}\nContent: {content[:200]}...\nSource: {url}"
173
+ )
174
+
175
+ # Create metadata
176
+ metadata = TreeNodeTextualMemoryMetadata(
177
+ user_id=None,
178
+ session_id=None,
179
+ status="activated",
180
+ type="fact", # Search results are usually factual information
181
+ memory_time=publish_time,
182
+ source="web",
183
+ confidence=85.0, # Confidence level for search information
184
+ entities=self._extract_entities(title, content, summary),
185
+ tags=self._extract_tags(title, content, summary, parsed_goal),
186
+ visibility="public",
187
+ memory_type="LongTermMemory", # Search results as working memory
188
+ key=title,
189
+ sources=[url] if url else [],
190
+ embedding=self.embedder.embed([memory_content])[0],
191
+ created_at=datetime.now().isoformat(),
192
+ usage=[],
193
+ background=f"Xinyu search result from {site or source}",
194
+ )
195
+ # Create TextualMemoryItem
196
+ memory_item = TextualMemoryItem(
197
+ id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
198
+ )
199
+
200
+ memory_items.append(memory_item)
201
+
202
+ return memory_items
203
+
204
+ def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
205
+ """
206
+ Extract entities from title, content and summary
207
+
208
+ Args:
209
+ title: Article title
210
+ content: Article content
211
+ summary: Article summary
212
+
213
+ Returns:
214
+ List of extracted entities
215
+ """
216
+ # Simple entity extraction - can be enhanced with NER
217
+ text = f"{title} {content} {summary}"
218
+ entities = []
219
+
220
+ # Extract potential entities (simple approach)
221
+ # This can be enhanced with proper NER models
222
+ words = text.split()
223
+ for word in words:
224
+ if len(word) > 2 and word[0].isupper():
225
+ entities.append(word)
226
+
227
+ return list(set(entities))[:10] # Limit to 10 entities
228
+
229
+ def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None) -> list[str]:
230
+ """
231
+ Extract tags from title, content and summary
232
+
233
+ Args:
234
+ title: Article title
235
+ content: Article content
236
+ summary: Article summary
237
+ parsed_goal: Parsed task goal (optional)
238
+
239
+ Returns:
240
+ List of extracted tags
241
+ """
242
+ tags = []
243
+
244
+ # Add source-based tags
245
+ tags.append("xinyu_search")
246
+ tags.append("news")
247
+
248
+ # Add content-based tags
249
+ text = f"{title} {content} {summary}".lower()
250
+
251
+ # Simple keyword-based tagging
252
+ keywords = {
253
+ "economy": [
254
+ "economy",
255
+ "GDP",
256
+ "growth",
257
+ "production",
258
+ "industry",
259
+ "investment",
260
+ "consumption",
261
+ "market",
262
+ "trade",
263
+ "finance",
264
+ ],
265
+ "politics": [
266
+ "politics",
267
+ "government",
268
+ "policy",
269
+ "meeting",
270
+ "leader",
271
+ "election",
272
+ "parliament",
273
+ "ministry",
274
+ ],
275
+ "technology": [
276
+ "technology",
277
+ "tech",
278
+ "innovation",
279
+ "digital",
280
+ "internet",
281
+ "AI",
282
+ "artificial intelligence",
283
+ "software",
284
+ "hardware",
285
+ ],
286
+ "sports": [
287
+ "sports",
288
+ "game",
289
+ "athlete",
290
+ "olympic",
291
+ "championship",
292
+ "tournament",
293
+ "team",
294
+ "player",
295
+ ],
296
+ "culture": [
297
+ "culture",
298
+ "education",
299
+ "art",
300
+ "history",
301
+ "literature",
302
+ "music",
303
+ "film",
304
+ "museum",
305
+ ],
306
+ "health": [
307
+ "health",
308
+ "medical",
309
+ "pandemic",
310
+ "hospital",
311
+ "doctor",
312
+ "medicine",
313
+ "disease",
314
+ "treatment",
315
+ ],
316
+ "environment": [
317
+ "environment",
318
+ "ecology",
319
+ "pollution",
320
+ "green",
321
+ "climate",
322
+ "sustainability",
323
+ "renewable",
324
+ ],
325
+ }
326
+
327
+ for category, words in keywords.items():
328
+ if any(word in text for word in words):
329
+ tags.append(category)
330
+
331
+ # Add goal-based tags if available
332
+ if parsed_goal and hasattr(parsed_goal, "tags"):
333
+ tags.extend(parsed_goal.tags)
334
+
335
+ return list(set(tags))[:15] # Limit to 15 tags
@@ -0,0 +1,63 @@
1
+ COT_DECOMPOSE_PROMPT = """
2
+ I am an 8-year-old student who needs help analyzing and breaking down complex questions. Your task is to help me understand whether a question is complex enough to be broken down into smaller parts.
3
+
4
+ Requirements:
5
+ 1. First, determine if the question is a decomposable problem. If it is a decomposable problem, set 'is_complex' to True.
6
+ 2. If the question needs to be decomposed, break it down into 1-3 sub-questions. The number should be controlled by the model based on the complexity of the question.
7
+ 3. For decomposable questions, break them down into sub-questions and put them in the 'sub_questions' list. Each sub-question should contain only one question content without any additional notes.
8
+ 4. If the question is not a decomposable problem, set 'is_complex' to False and set 'sub_questions' to an empty list.
9
+ 5. You must return ONLY a valid JSON object. Do not include any other text, explanations, or formatting.
10
+
11
+ Here are some examples:
12
+
13
+ Question: Who is the current head coach of the gymnastics team in the capital of the country that Lang Ping represents?
14
+ Answer: {{"is_complex": true, "sub_questions": ["Which country does Lang Ping represent in volleyball?", "What is the capital of this country?", "Who is the current head coach of the gymnastics team in this capital?"]}}
15
+
16
+ Question: Which country's cultural heritage is the Great Wall?
17
+ Answer: {{"is_complex": false, "sub_questions": []}}
18
+
19
+ Question: How did the trade relationship between Madagascar and China develop, and how does this relationship affect the market expansion of the essential oil industry on Nosy Be Island?
20
+ Answer: {{"is_complex": true, "sub_questions": ["How did the trade relationship between Madagascar and China develop?", "How does this trade relationship affect the market expansion of the essential oil industry on Nosy Be Island?"]}}
21
+
22
+ Please analyze the following question and respond with ONLY a valid JSON object:
23
+ Question: {query}
24
+ Answer:"""
25
+
26
+ PRO_MODE_WELCOME_MESSAGE = """
27
+ ============================================================
28
+ 🚀 MemOS PRO Mode Activated!
29
+ ============================================================
30
+ ✅ Chain of Thought (CoT) enhancement is now enabled by default
31
+ ✅ Complex queries will be automatically decomposed and enhanced
32
+
33
+ 🌐 To enable Internet search capabilities:
34
+ 1. Go to your cube's textual memory configuration
35
+ 2. Set the backend to 'google' in the internet_retriever section
36
+ 3. Configure the following parameters:
37
+ - api_key: Your Google Search API key
38
+ - cse_id: Your Custom Search Engine ID
39
+ - num_results: Number of search results (default: 5)
40
+
41
+ 📝 Example configuration at cube config for tree_text_memory :
42
+ internet_retriever:
43
+ backend: 'google'
44
+ config:
45
+ api_key: 'your_google_api_key_here'
46
+ cse_id: 'your_custom_search_engine_id'
47
+ num_results: 5
48
+ details: https://github.com/memos-ai/memos/blob/main/examples/core_memories/tree_textual_w_internet_memoy.py
49
+ ============================================================
50
+ """
51
+
52
+ SYNTHESIS_PROMPT = """
53
+ exclude memory information, synthesizing information from multiple sources to provide comprehensive answers.
54
+ I will give you chain of thought for sub-questions and their answers.
55
+ Sub-questions and their answers:
56
+ {qa_text}
57
+
58
+ Please synthesize these answers into a comprehensive response that:
59
+ 1. Addresses the original question completely
60
+ 2. Integrates information from all sub-questions
61
+ 3. Provides clear reasoning and connections
62
+ 4. Is well-structured and easy to understand
63
+ 5. Maintains a natural conversational tone"""