MemoryOS 0.1.12__py3-none-any.whl → 0.2.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (32) hide show
  1. {memoryos-0.1.12.dist-info → memoryos-0.2.0.dist-info}/METADATA +51 -31
  2. {memoryos-0.1.12.dist-info → memoryos-0.2.0.dist-info}/RECORD +32 -21
  3. memos/__init__.py +1 -1
  4. memos/configs/internet_retriever.py +81 -0
  5. memos/configs/llm.py +1 -0
  6. memos/configs/mem_os.py +4 -0
  7. memos/configs/mem_reader.py +4 -0
  8. memos/configs/memory.py +11 -1
  9. memos/graph_dbs/item.py +46 -0
  10. memos/graph_dbs/neo4j.py +72 -5
  11. memos/llms/openai.py +1 -0
  12. memos/mem_os/main.py +491 -0
  13. memos/mem_reader/simple_struct.py +11 -6
  14. memos/mem_user/user_manager.py +10 -0
  15. memos/memories/textual/item.py +3 -1
  16. memos/memories/textual/tree.py +39 -3
  17. memos/memories/textual/tree_text_memory/organize/conflict.py +196 -0
  18. memos/memories/textual/tree_text_memory/organize/manager.py +49 -8
  19. memos/memories/textual/tree_text_memory/organize/redundancy.py +212 -0
  20. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +235 -0
  21. memos/memories/textual/tree_text_memory/organize/reorganizer.py +584 -0
  22. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
  23. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
  24. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +1 -4
  25. memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -4
  26. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +3 -3
  27. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
  28. memos/templates/mem_reader_prompts.py +42 -15
  29. memos/templates/mos_prompts.py +63 -0
  30. memos/templates/tree_reorganize_prompts.py +168 -0
  31. {memoryos-0.1.12.dist-info → memoryos-0.2.0.dist-info}/LICENSE +0 -0
  32. {memoryos-0.1.12.dist-info → memoryos-0.2.0.dist-info}/WHEEL +0 -0
@@ -0,0 +1,263 @@
1
+ """Internet retrieval module for tree text memory."""
2
+
3
+ import uuid
4
+
5
+ from datetime import datetime
6
+
7
+ import requests
8
+
9
+ from memos.embedders.factory import OllamaEmbedder
10
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+
12
+
13
+ class GoogleCustomSearchAPI:
14
+ """Google Custom Search API Client"""
15
+
16
+ def __init__(
17
+ self, api_key: str, search_engine_id: str, max_results: int = 20, num_per_request: int = 10
18
+ ):
19
+ """
20
+ Initialize Google Custom Search API client
21
+
22
+ Args:
23
+ api_key: Google API key
24
+ search_engine_id: Search engine ID (cx parameter)
25
+ max_results: Maximum number of results to retrieve
26
+ num_per_request: Number of results per API request
27
+ """
28
+ self.api_key = api_key
29
+ self.search_engine_id = search_engine_id
30
+ self.max_results = max_results
31
+ self.num_per_request = min(num_per_request, 10) # Google API limits to 10
32
+ self.base_url = "https://www.googleapis.com/customsearch/v1"
33
+
34
+ def search(self, query: str, num_results: int | None = None, start_index: int = 1) -> dict:
35
+ """
36
+ Execute search request
37
+
38
+ Args:
39
+ query: Search query
40
+ num_results: Number of results to return (uses config default if None)
41
+ start_index: Starting index (default 1)
42
+
43
+ Returns:
44
+ Dictionary containing search results
45
+ """
46
+ if num_results is None:
47
+ num_results = self.num_per_request
48
+
49
+ params = {
50
+ "key": self.api_key,
51
+ "cx": self.search_engine_id,
52
+ "q": query,
53
+ "num": min(num_results, self.num_per_request),
54
+ "start": start_index,
55
+ }
56
+
57
+ try:
58
+ response = requests.get(self.base_url, params=params)
59
+ response.raise_for_status()
60
+ return response.json()
61
+ except requests.exceptions.RequestException as e:
62
+ print(f"Google search request failed: {e}")
63
+ return {}
64
+
65
+ def get_all_results(self, query: str, max_results: int | None = None) -> list[dict]:
66
+ """
67
+ Get all search results (with pagination)
68
+
69
+ Args:
70
+ query: Search query
71
+ max_results: Maximum number of results (uses config default if None)
72
+
73
+ Returns:
74
+ List of all search results
75
+ """
76
+ if max_results is None:
77
+ max_results = self.max_results
78
+
79
+ all_results = []
80
+ start_index = 1
81
+
82
+ while len(all_results) < max_results:
83
+ search_data = self.search(query, start_index=start_index)
84
+
85
+ if not search_data or "items" not in search_data:
86
+ break
87
+
88
+ all_results.extend(search_data["items"])
89
+
90
+ # Check if there are more results
91
+ if len(search_data["items"]) < self.num_per_request:
92
+ break
93
+
94
+ start_index += self.num_per_request
95
+
96
+ # Avoid infinite loop
97
+ if start_index > 100:
98
+ break
99
+
100
+ return all_results[:max_results]
101
+
102
+
103
+ class InternetGoogleRetriever:
104
+ """Internet retriever that converts search results to TextualMemoryItem format"""
105
+
106
+ def __init__(
107
+ self,
108
+ api_key: str,
109
+ search_engine_id: str,
110
+ embedder: OllamaEmbedder,
111
+ max_results: int = 20,
112
+ num_per_request: int = 10,
113
+ ):
114
+ """
115
+ Initialize internet retriever
116
+
117
+ Args:
118
+ api_key: Google API key
119
+ search_engine_id: Search engine ID
120
+ embedder: Embedder instance for generating embeddings
121
+ max_results: Maximum number of results to retrieve
122
+ num_per_request: Number of results per API request
123
+ """
124
+ self.google_api = GoogleCustomSearchAPI(
125
+ api_key, search_engine_id, max_results=max_results, num_per_request=num_per_request
126
+ )
127
+ self.embedder = embedder
128
+
129
+ def retrieve_from_internet(
130
+ self, query: str, top_k: int = 10, parsed_goal=None
131
+ ) -> list[TextualMemoryItem]:
132
+ """
133
+ Retrieve information from the internet and convert to TextualMemoryItem format
134
+
135
+ Args:
136
+ query: Search query
137
+ top_k: Number of results to return
138
+ parsed_goal: Parsed task goal (optional)
139
+
140
+ Returns:
141
+ List of TextualMemoryItem
142
+ """
143
+ # Get search results
144
+ search_results = self.google_api.get_all_results(query, max_results=top_k)
145
+
146
+ # Convert to TextualMemoryItem format
147
+ memory_items = []
148
+
149
+ for _, result in enumerate(search_results):
150
+ # Extract basic information
151
+ title = result.get("title", "")
152
+ snippet = result.get("snippet", "")
153
+ link = result.get("link", "")
154
+ display_link = result.get("displayLink", "")
155
+
156
+ # Combine memory content
157
+ memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
158
+ # Create metadata
159
+ metadata = TreeNodeTextualMemoryMetadata(
160
+ user_id=None,
161
+ session_id=None,
162
+ status="activated",
163
+ type="fact", # Internet search results are usually factual information
164
+ memory_time=datetime.now().strftime("%Y-%m-%d"),
165
+ source="web",
166
+ confidence=85.0, # Confidence level for internet information
167
+ entities=self._extract_entities(title, snippet),
168
+ tags=self._extract_tags(title, snippet, parsed_goal),
169
+ visibility="public",
170
+ memory_type="LongTermMemory", # Internet search results as working memory
171
+ key=title,
172
+ sources=[link] if link else [],
173
+ embedding=self.embedder.embed([memory_content])[0], # Can add embedding later
174
+ created_at=datetime.now().isoformat(),
175
+ usage=[],
176
+ background=f"Internet search result from {display_link}",
177
+ )
178
+
179
+ # Create TextualMemoryItem
180
+ memory_item = TextualMemoryItem(
181
+ id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
182
+ )
183
+
184
+ memory_items.append(memory_item)
185
+
186
+ return memory_items
187
+
188
+ def _extract_entities(self, title: str, snippet: str) -> list[str]:
189
+ """
190
+ Extract entities from title and snippet
191
+
192
+ Args:
193
+ title: Title
194
+ snippet: Snippet
195
+
196
+ Returns:
197
+ List of entities
198
+ """
199
+ # Simple entity extraction logic, can be improved as needed
200
+ text = f"{title} {snippet}"
201
+ entities = []
202
+
203
+ # Extract possible organization names (with common suffixes)
204
+ org_suffixes = ["Inc", "Corp", "LLC", "Ltd", "Company", "University", "Institute"]
205
+ words = text.split()
206
+ for i, word in enumerate(words):
207
+ if word in org_suffixes and i > 0:
208
+ entities.append(f"{words[i - 1]} {word}")
209
+
210
+ # Extract possible dates
211
+ import re
212
+
213
+ date_pattern = r"\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}|\w+ \d{1,2}, \d{4}"
214
+ dates = re.findall(date_pattern, text)
215
+ entities.extend(dates)
216
+
217
+ return entities[:5] # Limit number of entities
218
+
219
+ def _extract_tags(self, title: str, snippet: str, parsed_goal=None) -> list[str]:
220
+ """
221
+ Extract tags from title and snippet
222
+
223
+ Args:
224
+ title: Title
225
+ snippet: Snippet
226
+ parsed_goal: Parsed task goal
227
+
228
+ Returns:
229
+ List of tags
230
+ """
231
+ tags = []
232
+
233
+ # Extract tags from parsed goal
234
+ if parsed_goal:
235
+ if hasattr(parsed_goal, "topic") and parsed_goal.topic:
236
+ tags.append(parsed_goal.topic)
237
+ if hasattr(parsed_goal, "concept") and parsed_goal.concept:
238
+ tags.append(parsed_goal.concept)
239
+
240
+ # Extract keywords from text
241
+ text = f"{title} {snippet}".lower()
242
+
243
+ # Simple keyword extraction
244
+ keywords = [
245
+ "news",
246
+ "report",
247
+ "article",
248
+ "study",
249
+ "research",
250
+ "analysis",
251
+ "update",
252
+ "announcement",
253
+ "policy",
254
+ "memo",
255
+ "document",
256
+ ]
257
+
258
+ for keyword in keywords:
259
+ if keyword in text:
260
+ tags.append(keyword)
261
+
262
+ # Remove duplicates and limit count
263
+ return list(set(tags))[:10]
@@ -0,0 +1,89 @@
1
+ """Factory for creating internet retrievers."""
2
+
3
+ from typing import Any, ClassVar
4
+
5
+ from memos.configs.internet_retriever import InternetRetrieverConfigFactory
6
+ from memos.embedders.base import BaseEmbedder
7
+ from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
8
+ InternetGoogleRetriever,
9
+ )
10
+ from memos.memories.textual.tree_text_memory.retrieve.xinyusearch import XinyuSearchRetriever
11
+
12
+
13
+ class InternetRetrieverFactory:
14
+ """Factory class for creating internet retriever instances."""
15
+
16
+ backend_to_class: ClassVar[dict[str, Any]] = {
17
+ "google": InternetGoogleRetriever,
18
+ "bing": InternetGoogleRetriever, # TODO: Implement BingRetriever
19
+ "xinyu": XinyuSearchRetriever,
20
+ }
21
+
22
+ @classmethod
23
+ def from_config(
24
+ cls, config_factory: InternetRetrieverConfigFactory, embedder: BaseEmbedder
25
+ ) -> InternetGoogleRetriever | None:
26
+ """
27
+ Create internet retriever from configuration.
28
+
29
+ Args:
30
+ config_factory: Internet retriever configuration
31
+ embedder: Embedder instance for generating embeddings
32
+
33
+ Returns:
34
+ InternetRetriever instance or None if no configuration provided
35
+ """
36
+ if config_factory.backend is None:
37
+ return None
38
+
39
+ backend = config_factory.backend
40
+ if backend not in cls.backend_to_class:
41
+ raise ValueError(f"Invalid internet retriever backend: {backend}")
42
+
43
+ retriever_class = cls.backend_to_class[backend]
44
+ config = config_factory.config
45
+
46
+ # Create retriever with appropriate parameters
47
+ if backend == "google":
48
+ return retriever_class(
49
+ api_key=config.api_key,
50
+ search_engine_id=config.search_engine_id,
51
+ embedder=embedder,
52
+ max_results=config.max_results,
53
+ num_per_request=config.num_per_request,
54
+ )
55
+ elif backend == "bing":
56
+ # TODO: Implement Bing retriever
57
+ return retriever_class(
58
+ api_key=config.api_key,
59
+ search_engine_id=None, # Bing doesn't use search_engine_id
60
+ embedder=embedder,
61
+ max_results=config.max_results,
62
+ num_per_request=config.num_per_request,
63
+ )
64
+ elif backend == "xinyu":
65
+ return retriever_class(
66
+ access_key=config.api_key, # Use api_key as access_key for xinyu
67
+ search_engine_id=config.search_engine_id,
68
+ embedder=embedder,
69
+ max_results=config.max_results,
70
+ )
71
+ else:
72
+ raise ValueError(f"Unsupported backend: {backend}")
73
+
74
+ @classmethod
75
+ def create_google_retriever(
76
+ cls, api_key: str, search_engine_id: str, embedder: BaseEmbedder
77
+ ) -> InternetGoogleRetriever:
78
+ """
79
+ Create Google Custom Search retriever.
80
+
81
+ Args:
82
+ api_key: Google API key
83
+ search_engine_id: Google Custom Search Engine ID
84
+ embedder: Embedder instance
85
+
86
+ Returns:
87
+ InternetRetriever instance
88
+ """
89
+ return InternetGoogleRetriever(api_key, search_engine_id, embedder)
@@ -34,10 +34,7 @@ class MemoryReasoner:
34
34
  """
35
35
  prompt_template = Template(REASON_PROMPT)
36
36
  memory_detailed_str = "\n".join(
37
- [
38
- f"[{m.id}] ({m.metadata.hierarchy_level}) {m.metadata.key}: {m.memory}"
39
- for m in ranked_memories
40
- ]
37
+ [f"[{m.id}] {m.metadata.key}: {m.memory}" for m in ranked_memories]
41
38
  )
42
39
  prompt = prompt_template.substitute(task=query, detailed_memory_list=memory_detailed_str)
43
40
 
@@ -8,6 +8,7 @@ from memos.graph_dbs.factory import Neo4jGraphDB
8
8
  from memos.llms.factory import OllamaLLM, OpenAILLM
9
9
  from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
10
10
 
11
+ from .internet_retriever_factory import InternetRetrieverFactory
11
12
  from .reasoner import MemoryReasoner
12
13
  from .recall import GraphMemoryRetriever
13
14
  from .reranker import MemoryReranker
@@ -20,6 +21,7 @@ class Searcher:
20
21
  dispatcher_llm: OpenAILLM | OllamaLLM,
21
22
  graph_store: Neo4jGraphDB,
22
23
  embedder: OllamaEmbedder,
24
+ internet_retriever: InternetRetrieverFactory | None = None,
23
25
  ):
24
26
  self.graph_store = graph_store
25
27
  self.embedder = embedder
@@ -29,6 +31,9 @@ class Searcher:
29
31
  self.reranker = MemoryReranker(dispatcher_llm, self.embedder)
30
32
  self.reasoner = MemoryReasoner(dispatcher_llm)
31
33
 
34
+ # Create internet retriever from config if provided
35
+ self.internet_retriever = internet_retriever
36
+
32
37
  def search(
33
38
  self, query: str, top_k: int, info=None, mode: str = "fast", memory_type: str = "All"
34
39
  ) -> list[TextualMemoryItem]:
@@ -50,7 +55,19 @@ class Searcher:
50
55
  """
51
56
 
52
57
  # Step 1: Parse task structure into topic, concept, and fact levels
53
- parsed_goal = self.task_goal_parser.parse(query)
58
+ context = []
59
+ if mode == "fine":
60
+ query_embedding = self.embedder.embed([query])[0]
61
+ related_node_ids = self.graph_store.search_by_embedding(query_embedding, top_k=top_k)
62
+ related_nodes = [
63
+ self.graph_store.get_node(related_node["id"]) for related_node in related_node_ids
64
+ ]
65
+
66
+ context = [related_node["memory"] for related_node in related_nodes]
67
+ context = list(set(context))
68
+
69
+ # Step 1a: Parse task structure into topic, concept, and fact levels
70
+ parsed_goal = self.task_goal_parser.parse(query, "\n".join(context))
54
71
 
55
72
  if parsed_goal.memories:
56
73
  query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
@@ -114,14 +131,39 @@ class Searcher:
114
131
  )
115
132
  return ranked_memories
116
133
 
117
- # Step 3: Parallel execution of both paths
118
- with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
134
+ # Step 2c: Internet retrieval (Path C)
135
+ def retrieve_from_internet():
136
+ """
137
+ Retrieve information from the internet using Google Custom Search API.
138
+ """
139
+ if not self.internet_retriever:
140
+ return []
141
+ if memory_type not in ["All"]:
142
+ return []
143
+ internet_items = self.internet_retriever.retrieve_from_internet(
144
+ query=query, top_k=top_k, parsed_goal=parsed_goal
145
+ )
146
+
147
+ # Convert to the format expected by reranker
148
+ ranked_memories = self.reranker.rerank(
149
+ query=query,
150
+ query_embedding=query_embedding[0],
151
+ graph_results=internet_items,
152
+ top_k=top_k * 2,
153
+ parsed_goal=parsed_goal,
154
+ )
155
+ return ranked_memories
156
+
157
+ # Step 3: Parallel execution of all paths
158
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
119
159
  future_working = executor.submit(retrieve_from_working_memory)
120
160
  future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
161
+ future_internet = executor.submit(retrieve_from_internet)
121
162
 
122
163
  working_results = future_working.result()
123
164
  hybrid_results = future_hybrid.result()
124
- searched_res = working_results + hybrid_results
165
+ internet_results = future_internet.result()
166
+ searched_res = working_results + hybrid_results + internet_results
125
167
 
126
168
  # Deduplicate by item.memory, keep higher score
127
169
  deduped_result = {}
@@ -31,7 +31,7 @@ class TaskGoalParser:
31
31
  elif self.mode == "fine":
32
32
  if not self.llm:
33
33
  raise ValueError("LLM not provided for slow mode.")
34
- return self._parse_fine(task_description)
34
+ return self._parse_fine(task_description, context)
35
35
  else:
36
36
  raise ValueError(f"Unknown mode: {self.mode}")
37
37
 
@@ -43,11 +43,11 @@ class TaskGoalParser:
43
43
  memories=[task_description], keys=[task_description], tags=[], goal_type="default"
44
44
  )
45
45
 
46
- def _parse_fine(self, query: str) -> ParsedTaskGoal:
46
+ def _parse_fine(self, query: str, context: str = "") -> ParsedTaskGoal:
47
47
  """
48
48
  Slow mode: LLM structured parse.
49
49
  """
50
- prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context="")
50
+ prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context=context)
51
51
  response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
52
52
  return self._parse_response(response)
53
53