MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- memoryos-0.1.13.dist-info/METADATA +288 -0
- memoryos-0.1.13.dist-info/RECORD +122 -0
- memos/__init__.py +20 -1
- memos/api/start_api.py +420 -0
- memos/chunkers/__init__.py +4 -0
- memos/chunkers/base.py +24 -0
- memos/chunkers/factory.py +22 -0
- memos/chunkers/sentence_chunker.py +35 -0
- memos/configs/__init__.py +0 -0
- memos/configs/base.py +82 -0
- memos/configs/chunker.py +45 -0
- memos/configs/embedder.py +53 -0
- memos/configs/graph_db.py +45 -0
- memos/configs/internet_retriever.py +81 -0
- memos/configs/llm.py +71 -0
- memos/configs/mem_chat.py +81 -0
- memos/configs/mem_cube.py +89 -0
- memos/configs/mem_os.py +74 -0
- memos/configs/mem_reader.py +53 -0
- memos/configs/mem_scheduler.py +78 -0
- memos/configs/memory.py +195 -0
- memos/configs/parser.py +38 -0
- memos/configs/utils.py +8 -0
- memos/configs/vec_db.py +64 -0
- memos/deprecation.py +262 -0
- memos/embedders/__init__.py +0 -0
- memos/embedders/base.py +15 -0
- memos/embedders/factory.py +23 -0
- memos/embedders/ollama.py +74 -0
- memos/embedders/sentence_transformer.py +40 -0
- memos/exceptions.py +30 -0
- memos/graph_dbs/__init__.py +0 -0
- memos/graph_dbs/base.py +215 -0
- memos/graph_dbs/factory.py +21 -0
- memos/graph_dbs/neo4j.py +827 -0
- memos/hello_world.py +97 -0
- memos/llms/__init__.py +0 -0
- memos/llms/base.py +16 -0
- memos/llms/factory.py +25 -0
- memos/llms/hf.py +231 -0
- memos/llms/ollama.py +82 -0
- memos/llms/openai.py +34 -0
- memos/llms/utils.py +14 -0
- memos/log.py +78 -0
- memos/mem_chat/__init__.py +0 -0
- memos/mem_chat/base.py +30 -0
- memos/mem_chat/factory.py +21 -0
- memos/mem_chat/simple.py +200 -0
- memos/mem_cube/__init__.py +0 -0
- memos/mem_cube/base.py +29 -0
- memos/mem_cube/general.py +146 -0
- memos/mem_cube/utils.py +24 -0
- memos/mem_os/client.py +5 -0
- memos/mem_os/core.py +819 -0
- memos/mem_os/main.py +503 -0
- memos/mem_os/product.py +89 -0
- memos/mem_reader/__init__.py +0 -0
- memos/mem_reader/base.py +27 -0
- memos/mem_reader/factory.py +21 -0
- memos/mem_reader/memory.py +298 -0
- memos/mem_reader/simple_struct.py +241 -0
- memos/mem_scheduler/__init__.py +0 -0
- memos/mem_scheduler/base_scheduler.py +164 -0
- memos/mem_scheduler/general_scheduler.py +305 -0
- memos/mem_scheduler/modules/__init__.py +0 -0
- memos/mem_scheduler/modules/base.py +74 -0
- memos/mem_scheduler/modules/dispatcher.py +103 -0
- memos/mem_scheduler/modules/monitor.py +82 -0
- memos/mem_scheduler/modules/redis_service.py +146 -0
- memos/mem_scheduler/modules/retriever.py +41 -0
- memos/mem_scheduler/modules/schemas.py +146 -0
- memos/mem_scheduler/scheduler_factory.py +21 -0
- memos/mem_scheduler/utils.py +26 -0
- memos/mem_user/user_manager.py +488 -0
- memos/memories/__init__.py +0 -0
- memos/memories/activation/__init__.py +0 -0
- memos/memories/activation/base.py +42 -0
- memos/memories/activation/item.py +25 -0
- memos/memories/activation/kv.py +232 -0
- memos/memories/base.py +19 -0
- memos/memories/factory.py +34 -0
- memos/memories/parametric/__init__.py +0 -0
- memos/memories/parametric/base.py +19 -0
- memos/memories/parametric/item.py +11 -0
- memos/memories/parametric/lora.py +41 -0
- memos/memories/textual/__init__.py +0 -0
- memos/memories/textual/base.py +89 -0
- memos/memories/textual/general.py +286 -0
- memos/memories/textual/item.py +167 -0
- memos/memories/textual/naive.py +185 -0
- memos/memories/textual/tree.py +321 -0
- memos/memories/textual/tree_text_memory/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
- memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
- memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
- memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
- memos/parsers/__init__.py +0 -0
- memos/parsers/base.py +15 -0
- memos/parsers/factory.py +19 -0
- memos/parsers/markitdown.py +22 -0
- memos/settings.py +8 -0
- memos/templates/__init__.py +0 -0
- memos/templates/mem_reader_prompts.py +98 -0
- memos/templates/mem_scheduler_prompts.py +65 -0
- memos/templates/mos_prompts.py +63 -0
- memos/types.py +55 -0
- memos/vec_dbs/__init__.py +0 -0
- memos/vec_dbs/base.py +105 -0
- memos/vec_dbs/factory.py +21 -0
- memos/vec_dbs/item.py +43 -0
- memos/vec_dbs/qdrant.py +292 -0
- memoryos-0.0.1.dist-info/METADATA +0 -53
- memoryos-0.0.1.dist-info/RECORD +0 -5
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
- {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
|
@@ -0,0 +1,263 @@
|
|
|
1
|
+
"""Internet retrieval module for tree text memory."""
|
|
2
|
+
|
|
3
|
+
import uuid
|
|
4
|
+
|
|
5
|
+
from datetime import datetime
|
|
6
|
+
|
|
7
|
+
import requests
|
|
8
|
+
|
|
9
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
10
|
+
from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class GoogleCustomSearchAPI:
|
|
14
|
+
"""Google Custom Search API Client"""
|
|
15
|
+
|
|
16
|
+
def __init__(
|
|
17
|
+
self, api_key: str, search_engine_id: str, max_results: int = 20, num_per_request: int = 10
|
|
18
|
+
):
|
|
19
|
+
"""
|
|
20
|
+
Initialize Google Custom Search API client
|
|
21
|
+
|
|
22
|
+
Args:
|
|
23
|
+
api_key: Google API key
|
|
24
|
+
search_engine_id: Search engine ID (cx parameter)
|
|
25
|
+
max_results: Maximum number of results to retrieve
|
|
26
|
+
num_per_request: Number of results per API request
|
|
27
|
+
"""
|
|
28
|
+
self.api_key = api_key
|
|
29
|
+
self.search_engine_id = search_engine_id
|
|
30
|
+
self.max_results = max_results
|
|
31
|
+
self.num_per_request = min(num_per_request, 10) # Google API limits to 10
|
|
32
|
+
self.base_url = "https://www.googleapis.com/customsearch/v1"
|
|
33
|
+
|
|
34
|
+
def search(self, query: str, num_results: int | None = None, start_index: int = 1) -> dict:
|
|
35
|
+
"""
|
|
36
|
+
Execute search request
|
|
37
|
+
|
|
38
|
+
Args:
|
|
39
|
+
query: Search query
|
|
40
|
+
num_results: Number of results to return (uses config default if None)
|
|
41
|
+
start_index: Starting index (default 1)
|
|
42
|
+
|
|
43
|
+
Returns:
|
|
44
|
+
Dictionary containing search results
|
|
45
|
+
"""
|
|
46
|
+
if num_results is None:
|
|
47
|
+
num_results = self.num_per_request
|
|
48
|
+
|
|
49
|
+
params = {
|
|
50
|
+
"key": self.api_key,
|
|
51
|
+
"cx": self.search_engine_id,
|
|
52
|
+
"q": query,
|
|
53
|
+
"num": min(num_results, self.num_per_request),
|
|
54
|
+
"start": start_index,
|
|
55
|
+
}
|
|
56
|
+
|
|
57
|
+
try:
|
|
58
|
+
response = requests.get(self.base_url, params=params)
|
|
59
|
+
response.raise_for_status()
|
|
60
|
+
return response.json()
|
|
61
|
+
except requests.exceptions.RequestException as e:
|
|
62
|
+
print(f"Google search request failed: {e}")
|
|
63
|
+
return {}
|
|
64
|
+
|
|
65
|
+
def get_all_results(self, query: str, max_results: int | None = None) -> list[dict]:
|
|
66
|
+
"""
|
|
67
|
+
Get all search results (with pagination)
|
|
68
|
+
|
|
69
|
+
Args:
|
|
70
|
+
query: Search query
|
|
71
|
+
max_results: Maximum number of results (uses config default if None)
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
List of all search results
|
|
75
|
+
"""
|
|
76
|
+
if max_results is None:
|
|
77
|
+
max_results = self.max_results
|
|
78
|
+
|
|
79
|
+
all_results = []
|
|
80
|
+
start_index = 1
|
|
81
|
+
|
|
82
|
+
while len(all_results) < max_results:
|
|
83
|
+
search_data = self.search(query, start_index=start_index)
|
|
84
|
+
|
|
85
|
+
if not search_data or "items" not in search_data:
|
|
86
|
+
break
|
|
87
|
+
|
|
88
|
+
all_results.extend(search_data["items"])
|
|
89
|
+
|
|
90
|
+
# Check if there are more results
|
|
91
|
+
if len(search_data["items"]) < self.num_per_request:
|
|
92
|
+
break
|
|
93
|
+
|
|
94
|
+
start_index += self.num_per_request
|
|
95
|
+
|
|
96
|
+
# Avoid infinite loop
|
|
97
|
+
if start_index > 100:
|
|
98
|
+
break
|
|
99
|
+
|
|
100
|
+
return all_results[:max_results]
|
|
101
|
+
|
|
102
|
+
|
|
103
|
+
class InternetGoogleRetriever:
|
|
104
|
+
"""Internet retriever that converts search results to TextualMemoryItem format"""
|
|
105
|
+
|
|
106
|
+
def __init__(
|
|
107
|
+
self,
|
|
108
|
+
api_key: str,
|
|
109
|
+
search_engine_id: str,
|
|
110
|
+
embedder: OllamaEmbedder,
|
|
111
|
+
max_results: int = 20,
|
|
112
|
+
num_per_request: int = 10,
|
|
113
|
+
):
|
|
114
|
+
"""
|
|
115
|
+
Initialize internet retriever
|
|
116
|
+
|
|
117
|
+
Args:
|
|
118
|
+
api_key: Google API key
|
|
119
|
+
search_engine_id: Search engine ID
|
|
120
|
+
embedder: Embedder instance for generating embeddings
|
|
121
|
+
max_results: Maximum number of results to retrieve
|
|
122
|
+
num_per_request: Number of results per API request
|
|
123
|
+
"""
|
|
124
|
+
self.google_api = GoogleCustomSearchAPI(
|
|
125
|
+
api_key, search_engine_id, max_results=max_results, num_per_request=num_per_request
|
|
126
|
+
)
|
|
127
|
+
self.embedder = embedder
|
|
128
|
+
|
|
129
|
+
def retrieve_from_internet(
|
|
130
|
+
self, query: str, top_k: int = 10, parsed_goal=None
|
|
131
|
+
) -> list[TextualMemoryItem]:
|
|
132
|
+
"""
|
|
133
|
+
Retrieve information from the internet and convert to TextualMemoryItem format
|
|
134
|
+
|
|
135
|
+
Args:
|
|
136
|
+
query: Search query
|
|
137
|
+
top_k: Number of results to return
|
|
138
|
+
parsed_goal: Parsed task goal (optional)
|
|
139
|
+
|
|
140
|
+
Returns:
|
|
141
|
+
List of TextualMemoryItem
|
|
142
|
+
"""
|
|
143
|
+
# Get search results
|
|
144
|
+
search_results = self.google_api.get_all_results(query, max_results=top_k)
|
|
145
|
+
|
|
146
|
+
# Convert to TextualMemoryItem format
|
|
147
|
+
memory_items = []
|
|
148
|
+
|
|
149
|
+
for _, result in enumerate(search_results):
|
|
150
|
+
# Extract basic information
|
|
151
|
+
title = result.get("title", "")
|
|
152
|
+
snippet = result.get("snippet", "")
|
|
153
|
+
link = result.get("link", "")
|
|
154
|
+
display_link = result.get("displayLink", "")
|
|
155
|
+
|
|
156
|
+
# Combine memory content
|
|
157
|
+
memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
|
|
158
|
+
# Create metadata
|
|
159
|
+
metadata = TreeNodeTextualMemoryMetadata(
|
|
160
|
+
user_id=None,
|
|
161
|
+
session_id=None,
|
|
162
|
+
status="activated",
|
|
163
|
+
type="fact", # Internet search results are usually factual information
|
|
164
|
+
memory_time=datetime.now().strftime("%Y-%m-%d"),
|
|
165
|
+
source="web",
|
|
166
|
+
confidence=85.0, # Confidence level for internet information
|
|
167
|
+
entities=self._extract_entities(title, snippet),
|
|
168
|
+
tags=self._extract_tags(title, snippet, parsed_goal),
|
|
169
|
+
visibility="public",
|
|
170
|
+
memory_type="LongTermMemory", # Internet search results as working memory
|
|
171
|
+
key=title,
|
|
172
|
+
sources=[link] if link else [],
|
|
173
|
+
embedding=self.embedder.embed([memory_content])[0], # Can add embedding later
|
|
174
|
+
created_at=datetime.now().isoformat(),
|
|
175
|
+
usage=[],
|
|
176
|
+
background=f"Internet search result from {display_link}",
|
|
177
|
+
)
|
|
178
|
+
|
|
179
|
+
# Create TextualMemoryItem
|
|
180
|
+
memory_item = TextualMemoryItem(
|
|
181
|
+
id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
|
|
182
|
+
)
|
|
183
|
+
|
|
184
|
+
memory_items.append(memory_item)
|
|
185
|
+
|
|
186
|
+
return memory_items
|
|
187
|
+
|
|
188
|
+
def _extract_entities(self, title: str, snippet: str) -> list[str]:
|
|
189
|
+
"""
|
|
190
|
+
Extract entities from title and snippet
|
|
191
|
+
|
|
192
|
+
Args:
|
|
193
|
+
title: Title
|
|
194
|
+
snippet: Snippet
|
|
195
|
+
|
|
196
|
+
Returns:
|
|
197
|
+
List of entities
|
|
198
|
+
"""
|
|
199
|
+
# Simple entity extraction logic, can be improved as needed
|
|
200
|
+
text = f"{title} {snippet}"
|
|
201
|
+
entities = []
|
|
202
|
+
|
|
203
|
+
# Extract possible organization names (with common suffixes)
|
|
204
|
+
org_suffixes = ["Inc", "Corp", "LLC", "Ltd", "Company", "University", "Institute"]
|
|
205
|
+
words = text.split()
|
|
206
|
+
for i, word in enumerate(words):
|
|
207
|
+
if word in org_suffixes and i > 0:
|
|
208
|
+
entities.append(f"{words[i - 1]} {word}")
|
|
209
|
+
|
|
210
|
+
# Extract possible dates
|
|
211
|
+
import re
|
|
212
|
+
|
|
213
|
+
date_pattern = r"\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}|\w+ \d{1,2}, \d{4}"
|
|
214
|
+
dates = re.findall(date_pattern, text)
|
|
215
|
+
entities.extend(dates)
|
|
216
|
+
|
|
217
|
+
return entities[:5] # Limit number of entities
|
|
218
|
+
|
|
219
|
+
def _extract_tags(self, title: str, snippet: str, parsed_goal=None) -> list[str]:
|
|
220
|
+
"""
|
|
221
|
+
Extract tags from title and snippet
|
|
222
|
+
|
|
223
|
+
Args:
|
|
224
|
+
title: Title
|
|
225
|
+
snippet: Snippet
|
|
226
|
+
parsed_goal: Parsed task goal
|
|
227
|
+
|
|
228
|
+
Returns:
|
|
229
|
+
List of tags
|
|
230
|
+
"""
|
|
231
|
+
tags = []
|
|
232
|
+
|
|
233
|
+
# Extract tags from parsed goal
|
|
234
|
+
if parsed_goal:
|
|
235
|
+
if hasattr(parsed_goal, "topic") and parsed_goal.topic:
|
|
236
|
+
tags.append(parsed_goal.topic)
|
|
237
|
+
if hasattr(parsed_goal, "concept") and parsed_goal.concept:
|
|
238
|
+
tags.append(parsed_goal.concept)
|
|
239
|
+
|
|
240
|
+
# Extract keywords from text
|
|
241
|
+
text = f"{title} {snippet}".lower()
|
|
242
|
+
|
|
243
|
+
# Simple keyword extraction
|
|
244
|
+
keywords = [
|
|
245
|
+
"news",
|
|
246
|
+
"report",
|
|
247
|
+
"article",
|
|
248
|
+
"study",
|
|
249
|
+
"research",
|
|
250
|
+
"analysis",
|
|
251
|
+
"update",
|
|
252
|
+
"announcement",
|
|
253
|
+
"policy",
|
|
254
|
+
"memo",
|
|
255
|
+
"document",
|
|
256
|
+
]
|
|
257
|
+
|
|
258
|
+
for keyword in keywords:
|
|
259
|
+
if keyword in text:
|
|
260
|
+
tags.append(keyword)
|
|
261
|
+
|
|
262
|
+
# Remove duplicates and limit count
|
|
263
|
+
return list(set(tags))[:10]
|
|
@@ -0,0 +1,89 @@
|
|
|
1
|
+
"""Factory for creating internet retrievers."""
|
|
2
|
+
|
|
3
|
+
from typing import Any, ClassVar
|
|
4
|
+
|
|
5
|
+
from memos.configs.internet_retriever import InternetRetrieverConfigFactory
|
|
6
|
+
from memos.embedders.base import BaseEmbedder
|
|
7
|
+
from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
|
|
8
|
+
InternetGoogleRetriever,
|
|
9
|
+
)
|
|
10
|
+
from memos.memories.textual.tree_text_memory.retrieve.xinyusearch import XinyuSearchRetriever
|
|
11
|
+
|
|
12
|
+
|
|
13
|
+
class InternetRetrieverFactory:
|
|
14
|
+
"""Factory class for creating internet retriever instances."""
|
|
15
|
+
|
|
16
|
+
backend_to_class: ClassVar[dict[str, Any]] = {
|
|
17
|
+
"google": InternetGoogleRetriever,
|
|
18
|
+
"bing": InternetGoogleRetriever, # TODO: Implement BingRetriever
|
|
19
|
+
"xinyu": XinyuSearchRetriever,
|
|
20
|
+
}
|
|
21
|
+
|
|
22
|
+
@classmethod
|
|
23
|
+
def from_config(
|
|
24
|
+
cls, config_factory: InternetRetrieverConfigFactory, embedder: BaseEmbedder
|
|
25
|
+
) -> InternetGoogleRetriever | None:
|
|
26
|
+
"""
|
|
27
|
+
Create internet retriever from configuration.
|
|
28
|
+
|
|
29
|
+
Args:
|
|
30
|
+
config_factory: Internet retriever configuration
|
|
31
|
+
embedder: Embedder instance for generating embeddings
|
|
32
|
+
|
|
33
|
+
Returns:
|
|
34
|
+
InternetRetriever instance or None if no configuration provided
|
|
35
|
+
"""
|
|
36
|
+
if config_factory.backend is None:
|
|
37
|
+
return None
|
|
38
|
+
|
|
39
|
+
backend = config_factory.backend
|
|
40
|
+
if backend not in cls.backend_to_class:
|
|
41
|
+
raise ValueError(f"Invalid internet retriever backend: {backend}")
|
|
42
|
+
|
|
43
|
+
retriever_class = cls.backend_to_class[backend]
|
|
44
|
+
config = config_factory.config
|
|
45
|
+
|
|
46
|
+
# Create retriever with appropriate parameters
|
|
47
|
+
if backend == "google":
|
|
48
|
+
return retriever_class(
|
|
49
|
+
api_key=config.api_key,
|
|
50
|
+
search_engine_id=config.search_engine_id,
|
|
51
|
+
embedder=embedder,
|
|
52
|
+
max_results=config.max_results,
|
|
53
|
+
num_per_request=config.num_per_request,
|
|
54
|
+
)
|
|
55
|
+
elif backend == "bing":
|
|
56
|
+
# TODO: Implement Bing retriever
|
|
57
|
+
return retriever_class(
|
|
58
|
+
api_key=config.api_key,
|
|
59
|
+
search_engine_id=None, # Bing doesn't use search_engine_id
|
|
60
|
+
embedder=embedder,
|
|
61
|
+
max_results=config.max_results,
|
|
62
|
+
num_per_request=config.num_per_request,
|
|
63
|
+
)
|
|
64
|
+
elif backend == "xinyu":
|
|
65
|
+
return retriever_class(
|
|
66
|
+
access_key=config.api_key, # Use api_key as access_key for xinyu
|
|
67
|
+
search_engine_id=config.search_engine_id,
|
|
68
|
+
embedder=embedder,
|
|
69
|
+
max_results=config.max_results,
|
|
70
|
+
)
|
|
71
|
+
else:
|
|
72
|
+
raise ValueError(f"Unsupported backend: {backend}")
|
|
73
|
+
|
|
74
|
+
@classmethod
|
|
75
|
+
def create_google_retriever(
|
|
76
|
+
cls, api_key: str, search_engine_id: str, embedder: BaseEmbedder
|
|
77
|
+
) -> InternetGoogleRetriever:
|
|
78
|
+
"""
|
|
79
|
+
Create Google Custom Search retriever.
|
|
80
|
+
|
|
81
|
+
Args:
|
|
82
|
+
api_key: Google API key
|
|
83
|
+
search_engine_id: Google Custom Search Engine ID
|
|
84
|
+
embedder: Embedder instance
|
|
85
|
+
|
|
86
|
+
Returns:
|
|
87
|
+
InternetRetriever instance
|
|
88
|
+
"""
|
|
89
|
+
return InternetGoogleRetriever(api_key, search_engine_id, embedder)
|
|
@@ -0,0 +1,61 @@
|
|
|
1
|
+
import json
|
|
2
|
+
import re
|
|
3
|
+
|
|
4
|
+
from string import Template
|
|
5
|
+
|
|
6
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
7
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
8
|
+
from memos.memories.textual.tree_text_memory.retrieve.utils import REASON_PROMPT
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class MemoryReasoner:
|
|
12
|
+
"""
|
|
13
|
+
Memory reasoner that performs reasoning and knowledge synthesis
|
|
14
|
+
over retrieved memory items using a language model.
|
|
15
|
+
"""
|
|
16
|
+
|
|
17
|
+
def __init__(self, llm):
|
|
18
|
+
self.llm = llm
|
|
19
|
+
|
|
20
|
+
def reason(
|
|
21
|
+
self, query: str, ranked_memories: list, parsed_goal: ParsedTaskGoal
|
|
22
|
+
) -> list[TextualMemoryItem]:
|
|
23
|
+
"""
|
|
24
|
+
Reason across multiple retrieved memory items and synthesize
|
|
25
|
+
a response or knowledge structure based on query objective.
|
|
26
|
+
|
|
27
|
+
Args:
|
|
28
|
+
query (str): Original user query description.
|
|
29
|
+
ranked_memories (list): List of relevant memory items.
|
|
30
|
+
parsed_goal (dict): Structured topic/concept/fact from TaskGoalParser.
|
|
31
|
+
|
|
32
|
+
Returns:
|
|
33
|
+
List of TextualMemoryItem: Refined memory items.
|
|
34
|
+
"""
|
|
35
|
+
prompt_template = Template(REASON_PROMPT)
|
|
36
|
+
memory_detailed_str = "\n".join(
|
|
37
|
+
[f"[{m.id}] {m.metadata.key}: {m.memory}" for m in ranked_memories]
|
|
38
|
+
)
|
|
39
|
+
prompt = prompt_template.substitute(task=query, detailed_memory_list=memory_detailed_str)
|
|
40
|
+
|
|
41
|
+
response = self.llm.generate([{"role": "user", "content": prompt}])
|
|
42
|
+
content = response.content if hasattr(response, "content") else response
|
|
43
|
+
|
|
44
|
+
# Step 1: Extract selected IDs
|
|
45
|
+
selected_ids = self._parse_selected_ids(content)
|
|
46
|
+
id_set = set(selected_ids)
|
|
47
|
+
|
|
48
|
+
return [m for m in ranked_memories if m.id in id_set]
|
|
49
|
+
|
|
50
|
+
def _parse_selected_ids(self, response_text: str) -> list[str]:
|
|
51
|
+
"""
|
|
52
|
+
Extracts memory IDs from model response. Supports both simple text list and JSON.
|
|
53
|
+
"""
|
|
54
|
+
try:
|
|
55
|
+
parsed = json.loads(response_text)
|
|
56
|
+
if isinstance(parsed, dict) and "selected_ids" in parsed:
|
|
57
|
+
return parsed["selected_ids"]
|
|
58
|
+
except json.JSONDecodeError:
|
|
59
|
+
pass
|
|
60
|
+
|
|
61
|
+
return re.findall(r"[a-f0-9\-]{36}", response_text) # UUID pattern fallback
|
|
@@ -0,0 +1,158 @@
|
|
|
1
|
+
import concurrent.futures
|
|
2
|
+
|
|
3
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
4
|
+
from memos.graph_dbs.neo4j import Neo4jGraphDB
|
|
5
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
6
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
class GraphMemoryRetriever:
|
|
10
|
+
"""
|
|
11
|
+
Unified memory retriever that combines both graph-based and vector-based retrieval logic.
|
|
12
|
+
"""
|
|
13
|
+
|
|
14
|
+
def __init__(self, graph_store: Neo4jGraphDB, embedder: OllamaEmbedder):
|
|
15
|
+
self.graph_store = graph_store
|
|
16
|
+
self.embedder = embedder
|
|
17
|
+
|
|
18
|
+
def retrieve(
|
|
19
|
+
self,
|
|
20
|
+
query: str,
|
|
21
|
+
parsed_goal: ParsedTaskGoal,
|
|
22
|
+
top_k: int,
|
|
23
|
+
memory_scope: str,
|
|
24
|
+
query_embedding: list[list[float]] | None = None,
|
|
25
|
+
) -> list[TextualMemoryItem]:
|
|
26
|
+
"""
|
|
27
|
+
Perform hybrid memory retrieval:
|
|
28
|
+
- Run graph-based lookup from dispatch plan.
|
|
29
|
+
- Run vector similarity search from embedded query.
|
|
30
|
+
- Merge and return combined result set.
|
|
31
|
+
|
|
32
|
+
Args:
|
|
33
|
+
query (str): Original task query.
|
|
34
|
+
parsed_goal (dict): parsed_goal.
|
|
35
|
+
top_k (int): Number of candidates to return.
|
|
36
|
+
memory_scope (str): One of ['working', 'long_term', 'user'].
|
|
37
|
+
query_embedding(list of embedding): list of embedding of query
|
|
38
|
+
|
|
39
|
+
Returns:
|
|
40
|
+
list: Combined memory items.
|
|
41
|
+
"""
|
|
42
|
+
if memory_scope not in ["WorkingMemory", "LongTermMemory", "UserMemory"]:
|
|
43
|
+
raise ValueError(f"Unsupported memory scope: {memory_scope}")
|
|
44
|
+
|
|
45
|
+
if memory_scope == "WorkingMemory":
|
|
46
|
+
# For working memory, retrieve all entries (no filtering)
|
|
47
|
+
working_memories = self.graph_store.get_all_memory_items(scope="WorkingMemory")
|
|
48
|
+
return [TextualMemoryItem.from_dict(record) for record in working_memories]
|
|
49
|
+
|
|
50
|
+
# Step 1: Structured graph-based retrieval
|
|
51
|
+
graph_results = self._graph_recall(parsed_goal, memory_scope)
|
|
52
|
+
|
|
53
|
+
# Step 2: Vector similarity search
|
|
54
|
+
vector_results = self._vector_recall(query_embedding, memory_scope, top_k)
|
|
55
|
+
|
|
56
|
+
# Step 3: Merge and deduplicate results
|
|
57
|
+
combined = {item.id: item for item in graph_results + vector_results}
|
|
58
|
+
|
|
59
|
+
# Debug: 打印在 graph_results 中但不在 combined 中的 id
|
|
60
|
+
graph_ids = {item.id for item in graph_results}
|
|
61
|
+
combined_ids = set(combined.keys())
|
|
62
|
+
lost_ids = graph_ids - combined_ids
|
|
63
|
+
|
|
64
|
+
if lost_ids:
|
|
65
|
+
print(
|
|
66
|
+
f"[DEBUG] The following nodes were in graph_results but missing in combined: {lost_ids}"
|
|
67
|
+
)
|
|
68
|
+
|
|
69
|
+
return list(combined.values())
|
|
70
|
+
|
|
71
|
+
def _graph_recall(
|
|
72
|
+
self, parsed_goal: ParsedTaskGoal, memory_scope: str
|
|
73
|
+
) -> list[TextualMemoryItem]:
|
|
74
|
+
"""
|
|
75
|
+
Perform structured node-based retrieval from Neo4j.
|
|
76
|
+
- keys must match exactly (n.key IN keys)
|
|
77
|
+
- tags must overlap with at least 2 input tags
|
|
78
|
+
- scope filters by memory_type if provided
|
|
79
|
+
"""
|
|
80
|
+
candidate_ids = set()
|
|
81
|
+
|
|
82
|
+
# 1) key-based OR branch
|
|
83
|
+
if parsed_goal.keys:
|
|
84
|
+
key_filters = [
|
|
85
|
+
{"field": "key", "op": "in", "value": parsed_goal.keys},
|
|
86
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
87
|
+
]
|
|
88
|
+
key_ids = self.graph_store.get_by_metadata(key_filters)
|
|
89
|
+
candidate_ids.update(key_ids)
|
|
90
|
+
|
|
91
|
+
# 2) tag-based OR branch
|
|
92
|
+
if parsed_goal.tags:
|
|
93
|
+
tag_filters = [
|
|
94
|
+
{"field": "tags", "op": "contains", "value": parsed_goal.tags},
|
|
95
|
+
{"field": "memory_type", "op": "=", "value": memory_scope},
|
|
96
|
+
]
|
|
97
|
+
tag_ids = self.graph_store.get_by_metadata(tag_filters)
|
|
98
|
+
candidate_ids.update(tag_ids)
|
|
99
|
+
|
|
100
|
+
# No matches → return empty
|
|
101
|
+
if not candidate_ids:
|
|
102
|
+
return []
|
|
103
|
+
|
|
104
|
+
# Load nodes and post-filter
|
|
105
|
+
node_dicts = self.graph_store.get_nodes(list(candidate_ids))
|
|
106
|
+
|
|
107
|
+
final_nodes = []
|
|
108
|
+
for node in node_dicts:
|
|
109
|
+
meta = node.get("metadata", {})
|
|
110
|
+
node_key = meta.get("key")
|
|
111
|
+
node_tags = meta.get("tags", []) or []
|
|
112
|
+
|
|
113
|
+
keep = False
|
|
114
|
+
# key equals to node_key
|
|
115
|
+
if parsed_goal.keys and node_key in parsed_goal.keys:
|
|
116
|
+
keep = True
|
|
117
|
+
# overlap tags more than 2
|
|
118
|
+
elif parsed_goal.tags:
|
|
119
|
+
overlap = len(set(node_tags) & set(parsed_goal.tags))
|
|
120
|
+
if overlap >= 2:
|
|
121
|
+
keep = True
|
|
122
|
+
if keep:
|
|
123
|
+
final_nodes.append(TextualMemoryItem.from_dict(node))
|
|
124
|
+
return final_nodes
|
|
125
|
+
|
|
126
|
+
def _vector_recall(
|
|
127
|
+
self,
|
|
128
|
+
query_embedding: list[list[float]],
|
|
129
|
+
memory_scope: str,
|
|
130
|
+
top_k: int = 20,
|
|
131
|
+
max_num: int = 5,
|
|
132
|
+
) -> list[TextualMemoryItem]:
|
|
133
|
+
"""
|
|
134
|
+
# TODO: tackle with post-filter and pre-filter(5.18+) better.
|
|
135
|
+
Perform vector-based similarity retrieval using query embedding.
|
|
136
|
+
"""
|
|
137
|
+
all_matches = []
|
|
138
|
+
|
|
139
|
+
def search_single(vec):
|
|
140
|
+
return (
|
|
141
|
+
self.graph_store.search_by_embedding(vector=vec, top_k=top_k, scope=memory_scope)
|
|
142
|
+
or []
|
|
143
|
+
)
|
|
144
|
+
|
|
145
|
+
with concurrent.futures.ThreadPoolExecutor() as executor:
|
|
146
|
+
futures = [executor.submit(search_single, vec) for vec in query_embedding[:max_num]]
|
|
147
|
+
for future in concurrent.futures.as_completed(futures):
|
|
148
|
+
result = future.result()
|
|
149
|
+
all_matches.extend(result)
|
|
150
|
+
|
|
151
|
+
if not all_matches:
|
|
152
|
+
return []
|
|
153
|
+
|
|
154
|
+
# Step 3: Extract matched IDs and retrieve full nodes
|
|
155
|
+
unique_ids = set({r["id"] for r in all_matches})
|
|
156
|
+
node_dicts = self.graph_store.get_nodes(list(unique_ids))
|
|
157
|
+
|
|
158
|
+
return [TextualMemoryItem.from_dict(record) for record in node_dicts]
|
|
@@ -0,0 +1,111 @@
|
|
|
1
|
+
import numpy as np
|
|
2
|
+
|
|
3
|
+
from memos.embedders.factory import OllamaEmbedder
|
|
4
|
+
from memos.llms.factory import OllamaLLM, OpenAILLM
|
|
5
|
+
from memos.memories.textual.item import TextualMemoryItem
|
|
6
|
+
from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
|
|
7
|
+
|
|
8
|
+
|
|
9
|
+
def batch_cosine_similarity(
|
|
10
|
+
query_vec: list[float], candidate_vecs: list[list[float]]
|
|
11
|
+
) -> list[float]:
|
|
12
|
+
"""
|
|
13
|
+
Compute cosine similarity between a single query vector and multiple candidate vectors using NumPy.
|
|
14
|
+
|
|
15
|
+
Args:
|
|
16
|
+
query_vec (list[float]): The query embedding.
|
|
17
|
+
candidate_vecs (list[list[float]]): A list of memory embeddings.
|
|
18
|
+
|
|
19
|
+
Returns:
|
|
20
|
+
list[float]: Cosine similarity scores for each candidate.
|
|
21
|
+
"""
|
|
22
|
+
query = np.array(query_vec)
|
|
23
|
+
candidates = np.array(candidate_vecs)
|
|
24
|
+
|
|
25
|
+
# Normalize query and candidates
|
|
26
|
+
query_norm = np.linalg.norm(query)
|
|
27
|
+
candidates_norm = np.linalg.norm(candidates, axis=1)
|
|
28
|
+
|
|
29
|
+
# Compute dot products
|
|
30
|
+
dot_products = np.dot(candidates, query)
|
|
31
|
+
|
|
32
|
+
# Avoid division by zero
|
|
33
|
+
eps = 1e-10
|
|
34
|
+
similarities = dot_products / (candidates_norm * query_norm + eps)
|
|
35
|
+
|
|
36
|
+
return similarities.tolist()
|
|
37
|
+
|
|
38
|
+
|
|
39
|
+
class MemoryReranker:
|
|
40
|
+
"""
|
|
41
|
+
Rank retrieved memory cards by structural priority and contextual similarity.
|
|
42
|
+
"""
|
|
43
|
+
|
|
44
|
+
def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
|
|
45
|
+
self.llm = llm
|
|
46
|
+
self.embedder = embedder
|
|
47
|
+
|
|
48
|
+
# Structural priority weights
|
|
49
|
+
self.level_weights = {
|
|
50
|
+
"topic": 1.0,
|
|
51
|
+
"concept": 1.0,
|
|
52
|
+
"fact": 1.0,
|
|
53
|
+
}
|
|
54
|
+
|
|
55
|
+
def rerank(
|
|
56
|
+
self,
|
|
57
|
+
query: str,
|
|
58
|
+
query_embedding: list[float],
|
|
59
|
+
graph_results: list,
|
|
60
|
+
top_k: int,
|
|
61
|
+
parsed_goal: ParsedTaskGoal,
|
|
62
|
+
) -> list[tuple[TextualMemoryItem, float]]:
|
|
63
|
+
"""
|
|
64
|
+
Rerank memory items by relevance to task.
|
|
65
|
+
|
|
66
|
+
Args:
|
|
67
|
+
query (str): Original task.
|
|
68
|
+
query_embedding(list[float]): embedding of query
|
|
69
|
+
graph_results (list): Combined retrieval results.
|
|
70
|
+
top_k (int): Number of top results to return.
|
|
71
|
+
parsed_goal (dict): Structured task representation.
|
|
72
|
+
|
|
73
|
+
Returns:
|
|
74
|
+
list(tuple): Ranked list of memory items with similarity score.
|
|
75
|
+
"""
|
|
76
|
+
# Step 1: Filter out items without embeddings
|
|
77
|
+
items_with_embeddings = [item for item in graph_results if item.metadata.embedding]
|
|
78
|
+
embeddings = [item.metadata.embedding for item in items_with_embeddings]
|
|
79
|
+
|
|
80
|
+
if not embeddings:
|
|
81
|
+
return graph_results[:top_k]
|
|
82
|
+
|
|
83
|
+
# Step 2: Compute cosine similarities
|
|
84
|
+
similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
|
|
85
|
+
|
|
86
|
+
# Step 3: Apply structural weight boost
|
|
87
|
+
def get_weight(item: TextualMemoryItem) -> float:
|
|
88
|
+
level = item.metadata.background
|
|
89
|
+
return self.level_weights.get(level, 1.0)
|
|
90
|
+
|
|
91
|
+
weighted_scores = [
|
|
92
|
+
sim * get_weight(item)
|
|
93
|
+
for sim, item in zip(similarity_scores, items_with_embeddings, strict=False)
|
|
94
|
+
]
|
|
95
|
+
|
|
96
|
+
# Step 4: Sort by weighted score
|
|
97
|
+
sorted_items = sorted(
|
|
98
|
+
zip(items_with_embeddings, weighted_scores, strict=False),
|
|
99
|
+
key=lambda pair: pair[1],
|
|
100
|
+
reverse=True,
|
|
101
|
+
)
|
|
102
|
+
|
|
103
|
+
# Step 5: Return top-k items with fallback
|
|
104
|
+
top_items = sorted_items[:top_k]
|
|
105
|
+
|
|
106
|
+
if len(top_items) < top_k:
|
|
107
|
+
selected_items = [item for item, _ in top_items]
|
|
108
|
+
remaining = [(item, -1.0) for item in graph_results if item not in selected_items]
|
|
109
|
+
top_items.extend(remaining[: top_k - len(top_items)])
|
|
110
|
+
|
|
111
|
+
return top_items # list of (item, score)
|
|
@@ -0,0 +1,13 @@
|
|
|
1
|
+
from dataclasses import dataclass, field
|
|
2
|
+
|
|
3
|
+
|
|
4
|
+
@dataclass
|
|
5
|
+
class ParsedTaskGoal:
|
|
6
|
+
"""
|
|
7
|
+
Goal structure for both Fast & LLM.
|
|
8
|
+
"""
|
|
9
|
+
|
|
10
|
+
memories: list[str] = field(default_factory=list)
|
|
11
|
+
keys: list[str] = field(default_factory=list)
|
|
12
|
+
tags: list[str] = field(default_factory=list)
|
|
13
|
+
goal_type: str | None = None # e.g., 'default', 'explanation', etc.
|