MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (124) hide show
  1. memoryos-0.1.13.dist-info/METADATA +288 -0
  2. memoryos-0.1.13.dist-info/RECORD +122 -0
  3. memos/__init__.py +20 -1
  4. memos/api/start_api.py +420 -0
  5. memos/chunkers/__init__.py +4 -0
  6. memos/chunkers/base.py +24 -0
  7. memos/chunkers/factory.py +22 -0
  8. memos/chunkers/sentence_chunker.py +35 -0
  9. memos/configs/__init__.py +0 -0
  10. memos/configs/base.py +82 -0
  11. memos/configs/chunker.py +45 -0
  12. memos/configs/embedder.py +53 -0
  13. memos/configs/graph_db.py +45 -0
  14. memos/configs/internet_retriever.py +81 -0
  15. memos/configs/llm.py +71 -0
  16. memos/configs/mem_chat.py +81 -0
  17. memos/configs/mem_cube.py +89 -0
  18. memos/configs/mem_os.py +74 -0
  19. memos/configs/mem_reader.py +53 -0
  20. memos/configs/mem_scheduler.py +78 -0
  21. memos/configs/memory.py +195 -0
  22. memos/configs/parser.py +38 -0
  23. memos/configs/utils.py +8 -0
  24. memos/configs/vec_db.py +64 -0
  25. memos/deprecation.py +262 -0
  26. memos/embedders/__init__.py +0 -0
  27. memos/embedders/base.py +15 -0
  28. memos/embedders/factory.py +23 -0
  29. memos/embedders/ollama.py +74 -0
  30. memos/embedders/sentence_transformer.py +40 -0
  31. memos/exceptions.py +30 -0
  32. memos/graph_dbs/__init__.py +0 -0
  33. memos/graph_dbs/base.py +215 -0
  34. memos/graph_dbs/factory.py +21 -0
  35. memos/graph_dbs/neo4j.py +827 -0
  36. memos/hello_world.py +97 -0
  37. memos/llms/__init__.py +0 -0
  38. memos/llms/base.py +16 -0
  39. memos/llms/factory.py +25 -0
  40. memos/llms/hf.py +231 -0
  41. memos/llms/ollama.py +82 -0
  42. memos/llms/openai.py +34 -0
  43. memos/llms/utils.py +14 -0
  44. memos/log.py +78 -0
  45. memos/mem_chat/__init__.py +0 -0
  46. memos/mem_chat/base.py +30 -0
  47. memos/mem_chat/factory.py +21 -0
  48. memos/mem_chat/simple.py +200 -0
  49. memos/mem_cube/__init__.py +0 -0
  50. memos/mem_cube/base.py +29 -0
  51. memos/mem_cube/general.py +146 -0
  52. memos/mem_cube/utils.py +24 -0
  53. memos/mem_os/client.py +5 -0
  54. memos/mem_os/core.py +819 -0
  55. memos/mem_os/main.py +503 -0
  56. memos/mem_os/product.py +89 -0
  57. memos/mem_reader/__init__.py +0 -0
  58. memos/mem_reader/base.py +27 -0
  59. memos/mem_reader/factory.py +21 -0
  60. memos/mem_reader/memory.py +298 -0
  61. memos/mem_reader/simple_struct.py +241 -0
  62. memos/mem_scheduler/__init__.py +0 -0
  63. memos/mem_scheduler/base_scheduler.py +164 -0
  64. memos/mem_scheduler/general_scheduler.py +305 -0
  65. memos/mem_scheduler/modules/__init__.py +0 -0
  66. memos/mem_scheduler/modules/base.py +74 -0
  67. memos/mem_scheduler/modules/dispatcher.py +103 -0
  68. memos/mem_scheduler/modules/monitor.py +82 -0
  69. memos/mem_scheduler/modules/redis_service.py +146 -0
  70. memos/mem_scheduler/modules/retriever.py +41 -0
  71. memos/mem_scheduler/modules/schemas.py +146 -0
  72. memos/mem_scheduler/scheduler_factory.py +21 -0
  73. memos/mem_scheduler/utils.py +26 -0
  74. memos/mem_user/user_manager.py +488 -0
  75. memos/memories/__init__.py +0 -0
  76. memos/memories/activation/__init__.py +0 -0
  77. memos/memories/activation/base.py +42 -0
  78. memos/memories/activation/item.py +25 -0
  79. memos/memories/activation/kv.py +232 -0
  80. memos/memories/base.py +19 -0
  81. memos/memories/factory.py +34 -0
  82. memos/memories/parametric/__init__.py +0 -0
  83. memos/memories/parametric/base.py +19 -0
  84. memos/memories/parametric/item.py +11 -0
  85. memos/memories/parametric/lora.py +41 -0
  86. memos/memories/textual/__init__.py +0 -0
  87. memos/memories/textual/base.py +89 -0
  88. memos/memories/textual/general.py +286 -0
  89. memos/memories/textual/item.py +167 -0
  90. memos/memories/textual/naive.py +185 -0
  91. memos/memories/textual/tree.py +321 -0
  92. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  93. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  94. memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
  95. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  96. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
  97. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
  98. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  99. memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
  100. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  101. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
  102. memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
  103. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
  104. memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
  105. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
  106. memos/parsers/__init__.py +0 -0
  107. memos/parsers/base.py +15 -0
  108. memos/parsers/factory.py +19 -0
  109. memos/parsers/markitdown.py +22 -0
  110. memos/settings.py +8 -0
  111. memos/templates/__init__.py +0 -0
  112. memos/templates/mem_reader_prompts.py +98 -0
  113. memos/templates/mem_scheduler_prompts.py +65 -0
  114. memos/templates/mos_prompts.py +63 -0
  115. memos/types.py +55 -0
  116. memos/vec_dbs/__init__.py +0 -0
  117. memos/vec_dbs/base.py +105 -0
  118. memos/vec_dbs/factory.py +21 -0
  119. memos/vec_dbs/item.py +43 -0
  120. memos/vec_dbs/qdrant.py +292 -0
  121. memoryos-0.0.1.dist-info/METADATA +0 -53
  122. memoryos-0.0.1.dist-info/RECORD +0 -5
  123. {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
  124. {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,263 @@
1
+ """Internet retrieval module for tree text memory."""
2
+
3
+ import uuid
4
+
5
+ from datetime import datetime
6
+
7
+ import requests
8
+
9
+ from memos.embedders.factory import OllamaEmbedder
10
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
11
+
12
+
13
+ class GoogleCustomSearchAPI:
14
+ """Google Custom Search API Client"""
15
+
16
+ def __init__(
17
+ self, api_key: str, search_engine_id: str, max_results: int = 20, num_per_request: int = 10
18
+ ):
19
+ """
20
+ Initialize Google Custom Search API client
21
+
22
+ Args:
23
+ api_key: Google API key
24
+ search_engine_id: Search engine ID (cx parameter)
25
+ max_results: Maximum number of results to retrieve
26
+ num_per_request: Number of results per API request
27
+ """
28
+ self.api_key = api_key
29
+ self.search_engine_id = search_engine_id
30
+ self.max_results = max_results
31
+ self.num_per_request = min(num_per_request, 10) # Google API limits to 10
32
+ self.base_url = "https://www.googleapis.com/customsearch/v1"
33
+
34
+ def search(self, query: str, num_results: int | None = None, start_index: int = 1) -> dict:
35
+ """
36
+ Execute search request
37
+
38
+ Args:
39
+ query: Search query
40
+ num_results: Number of results to return (uses config default if None)
41
+ start_index: Starting index (default 1)
42
+
43
+ Returns:
44
+ Dictionary containing search results
45
+ """
46
+ if num_results is None:
47
+ num_results = self.num_per_request
48
+
49
+ params = {
50
+ "key": self.api_key,
51
+ "cx": self.search_engine_id,
52
+ "q": query,
53
+ "num": min(num_results, self.num_per_request),
54
+ "start": start_index,
55
+ }
56
+
57
+ try:
58
+ response = requests.get(self.base_url, params=params)
59
+ response.raise_for_status()
60
+ return response.json()
61
+ except requests.exceptions.RequestException as e:
62
+ print(f"Google search request failed: {e}")
63
+ return {}
64
+
65
+ def get_all_results(self, query: str, max_results: int | None = None) -> list[dict]:
66
+ """
67
+ Get all search results (with pagination)
68
+
69
+ Args:
70
+ query: Search query
71
+ max_results: Maximum number of results (uses config default if None)
72
+
73
+ Returns:
74
+ List of all search results
75
+ """
76
+ if max_results is None:
77
+ max_results = self.max_results
78
+
79
+ all_results = []
80
+ start_index = 1
81
+
82
+ while len(all_results) < max_results:
83
+ search_data = self.search(query, start_index=start_index)
84
+
85
+ if not search_data or "items" not in search_data:
86
+ break
87
+
88
+ all_results.extend(search_data["items"])
89
+
90
+ # Check if there are more results
91
+ if len(search_data["items"]) < self.num_per_request:
92
+ break
93
+
94
+ start_index += self.num_per_request
95
+
96
+ # Avoid infinite loop
97
+ if start_index > 100:
98
+ break
99
+
100
+ return all_results[:max_results]
101
+
102
+
103
+ class InternetGoogleRetriever:
104
+ """Internet retriever that converts search results to TextualMemoryItem format"""
105
+
106
+ def __init__(
107
+ self,
108
+ api_key: str,
109
+ search_engine_id: str,
110
+ embedder: OllamaEmbedder,
111
+ max_results: int = 20,
112
+ num_per_request: int = 10,
113
+ ):
114
+ """
115
+ Initialize internet retriever
116
+
117
+ Args:
118
+ api_key: Google API key
119
+ search_engine_id: Search engine ID
120
+ embedder: Embedder instance for generating embeddings
121
+ max_results: Maximum number of results to retrieve
122
+ num_per_request: Number of results per API request
123
+ """
124
+ self.google_api = GoogleCustomSearchAPI(
125
+ api_key, search_engine_id, max_results=max_results, num_per_request=num_per_request
126
+ )
127
+ self.embedder = embedder
128
+
129
+ def retrieve_from_internet(
130
+ self, query: str, top_k: int = 10, parsed_goal=None
131
+ ) -> list[TextualMemoryItem]:
132
+ """
133
+ Retrieve information from the internet and convert to TextualMemoryItem format
134
+
135
+ Args:
136
+ query: Search query
137
+ top_k: Number of results to return
138
+ parsed_goal: Parsed task goal (optional)
139
+
140
+ Returns:
141
+ List of TextualMemoryItem
142
+ """
143
+ # Get search results
144
+ search_results = self.google_api.get_all_results(query, max_results=top_k)
145
+
146
+ # Convert to TextualMemoryItem format
147
+ memory_items = []
148
+
149
+ for _, result in enumerate(search_results):
150
+ # Extract basic information
151
+ title = result.get("title", "")
152
+ snippet = result.get("snippet", "")
153
+ link = result.get("link", "")
154
+ display_link = result.get("displayLink", "")
155
+
156
+ # Combine memory content
157
+ memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
158
+ # Create metadata
159
+ metadata = TreeNodeTextualMemoryMetadata(
160
+ user_id=None,
161
+ session_id=None,
162
+ status="activated",
163
+ type="fact", # Internet search results are usually factual information
164
+ memory_time=datetime.now().strftime("%Y-%m-%d"),
165
+ source="web",
166
+ confidence=85.0, # Confidence level for internet information
167
+ entities=self._extract_entities(title, snippet),
168
+ tags=self._extract_tags(title, snippet, parsed_goal),
169
+ visibility="public",
170
+ memory_type="LongTermMemory", # Internet search results as working memory
171
+ key=title,
172
+ sources=[link] if link else [],
173
+ embedding=self.embedder.embed([memory_content])[0], # Can add embedding later
174
+ created_at=datetime.now().isoformat(),
175
+ usage=[],
176
+ background=f"Internet search result from {display_link}",
177
+ )
178
+
179
+ # Create TextualMemoryItem
180
+ memory_item = TextualMemoryItem(
181
+ id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
182
+ )
183
+
184
+ memory_items.append(memory_item)
185
+
186
+ return memory_items
187
+
188
+ def _extract_entities(self, title: str, snippet: str) -> list[str]:
189
+ """
190
+ Extract entities from title and snippet
191
+
192
+ Args:
193
+ title: Title
194
+ snippet: Snippet
195
+
196
+ Returns:
197
+ List of entities
198
+ """
199
+ # Simple entity extraction logic, can be improved as needed
200
+ text = f"{title} {snippet}"
201
+ entities = []
202
+
203
+ # Extract possible organization names (with common suffixes)
204
+ org_suffixes = ["Inc", "Corp", "LLC", "Ltd", "Company", "University", "Institute"]
205
+ words = text.split()
206
+ for i, word in enumerate(words):
207
+ if word in org_suffixes and i > 0:
208
+ entities.append(f"{words[i - 1]} {word}")
209
+
210
+ # Extract possible dates
211
+ import re
212
+
213
+ date_pattern = r"\d{4}-\d{2}-\d{2}|\d{1,2}/\d{1,2}/\d{4}|\w+ \d{1,2}, \d{4}"
214
+ dates = re.findall(date_pattern, text)
215
+ entities.extend(dates)
216
+
217
+ return entities[:5] # Limit number of entities
218
+
219
+ def _extract_tags(self, title: str, snippet: str, parsed_goal=None) -> list[str]:
220
+ """
221
+ Extract tags from title and snippet
222
+
223
+ Args:
224
+ title: Title
225
+ snippet: Snippet
226
+ parsed_goal: Parsed task goal
227
+
228
+ Returns:
229
+ List of tags
230
+ """
231
+ tags = []
232
+
233
+ # Extract tags from parsed goal
234
+ if parsed_goal:
235
+ if hasattr(parsed_goal, "topic") and parsed_goal.topic:
236
+ tags.append(parsed_goal.topic)
237
+ if hasattr(parsed_goal, "concept") and parsed_goal.concept:
238
+ tags.append(parsed_goal.concept)
239
+
240
+ # Extract keywords from text
241
+ text = f"{title} {snippet}".lower()
242
+
243
+ # Simple keyword extraction
244
+ keywords = [
245
+ "news",
246
+ "report",
247
+ "article",
248
+ "study",
249
+ "research",
250
+ "analysis",
251
+ "update",
252
+ "announcement",
253
+ "policy",
254
+ "memo",
255
+ "document",
256
+ ]
257
+
258
+ for keyword in keywords:
259
+ if keyword in text:
260
+ tags.append(keyword)
261
+
262
+ # Remove duplicates and limit count
263
+ return list(set(tags))[:10]
@@ -0,0 +1,89 @@
1
+ """Factory for creating internet retrievers."""
2
+
3
+ from typing import Any, ClassVar
4
+
5
+ from memos.configs.internet_retriever import InternetRetrieverConfigFactory
6
+ from memos.embedders.base import BaseEmbedder
7
+ from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
8
+ InternetGoogleRetriever,
9
+ )
10
+ from memos.memories.textual.tree_text_memory.retrieve.xinyusearch import XinyuSearchRetriever
11
+
12
+
13
+ class InternetRetrieverFactory:
14
+ """Factory class for creating internet retriever instances."""
15
+
16
+ backend_to_class: ClassVar[dict[str, Any]] = {
17
+ "google": InternetGoogleRetriever,
18
+ "bing": InternetGoogleRetriever, # TODO: Implement BingRetriever
19
+ "xinyu": XinyuSearchRetriever,
20
+ }
21
+
22
+ @classmethod
23
+ def from_config(
24
+ cls, config_factory: InternetRetrieverConfigFactory, embedder: BaseEmbedder
25
+ ) -> InternetGoogleRetriever | None:
26
+ """
27
+ Create internet retriever from configuration.
28
+
29
+ Args:
30
+ config_factory: Internet retriever configuration
31
+ embedder: Embedder instance for generating embeddings
32
+
33
+ Returns:
34
+ InternetRetriever instance or None if no configuration provided
35
+ """
36
+ if config_factory.backend is None:
37
+ return None
38
+
39
+ backend = config_factory.backend
40
+ if backend not in cls.backend_to_class:
41
+ raise ValueError(f"Invalid internet retriever backend: {backend}")
42
+
43
+ retriever_class = cls.backend_to_class[backend]
44
+ config = config_factory.config
45
+
46
+ # Create retriever with appropriate parameters
47
+ if backend == "google":
48
+ return retriever_class(
49
+ api_key=config.api_key,
50
+ search_engine_id=config.search_engine_id,
51
+ embedder=embedder,
52
+ max_results=config.max_results,
53
+ num_per_request=config.num_per_request,
54
+ )
55
+ elif backend == "bing":
56
+ # TODO: Implement Bing retriever
57
+ return retriever_class(
58
+ api_key=config.api_key,
59
+ search_engine_id=None, # Bing doesn't use search_engine_id
60
+ embedder=embedder,
61
+ max_results=config.max_results,
62
+ num_per_request=config.num_per_request,
63
+ )
64
+ elif backend == "xinyu":
65
+ return retriever_class(
66
+ access_key=config.api_key, # Use api_key as access_key for xinyu
67
+ search_engine_id=config.search_engine_id,
68
+ embedder=embedder,
69
+ max_results=config.max_results,
70
+ )
71
+ else:
72
+ raise ValueError(f"Unsupported backend: {backend}")
73
+
74
+ @classmethod
75
+ def create_google_retriever(
76
+ cls, api_key: str, search_engine_id: str, embedder: BaseEmbedder
77
+ ) -> InternetGoogleRetriever:
78
+ """
79
+ Create Google Custom Search retriever.
80
+
81
+ Args:
82
+ api_key: Google API key
83
+ search_engine_id: Google Custom Search Engine ID
84
+ embedder: Embedder instance
85
+
86
+ Returns:
87
+ InternetRetriever instance
88
+ """
89
+ return InternetGoogleRetriever(api_key, search_engine_id, embedder)
@@ -0,0 +1,61 @@
1
+ import json
2
+ import re
3
+
4
+ from string import Template
5
+
6
+ from memos.memories.textual.item import TextualMemoryItem
7
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
8
+ from memos.memories.textual.tree_text_memory.retrieve.utils import REASON_PROMPT
9
+
10
+
11
+ class MemoryReasoner:
12
+ """
13
+ Memory reasoner that performs reasoning and knowledge synthesis
14
+ over retrieved memory items using a language model.
15
+ """
16
+
17
+ def __init__(self, llm):
18
+ self.llm = llm
19
+
20
+ def reason(
21
+ self, query: str, ranked_memories: list, parsed_goal: ParsedTaskGoal
22
+ ) -> list[TextualMemoryItem]:
23
+ """
24
+ Reason across multiple retrieved memory items and synthesize
25
+ a response or knowledge structure based on query objective.
26
+
27
+ Args:
28
+ query (str): Original user query description.
29
+ ranked_memories (list): List of relevant memory items.
30
+ parsed_goal (dict): Structured topic/concept/fact from TaskGoalParser.
31
+
32
+ Returns:
33
+ List of TextualMemoryItem: Refined memory items.
34
+ """
35
+ prompt_template = Template(REASON_PROMPT)
36
+ memory_detailed_str = "\n".join(
37
+ [f"[{m.id}] {m.metadata.key}: {m.memory}" for m in ranked_memories]
38
+ )
39
+ prompt = prompt_template.substitute(task=query, detailed_memory_list=memory_detailed_str)
40
+
41
+ response = self.llm.generate([{"role": "user", "content": prompt}])
42
+ content = response.content if hasattr(response, "content") else response
43
+
44
+ # Step 1: Extract selected IDs
45
+ selected_ids = self._parse_selected_ids(content)
46
+ id_set = set(selected_ids)
47
+
48
+ return [m for m in ranked_memories if m.id in id_set]
49
+
50
+ def _parse_selected_ids(self, response_text: str) -> list[str]:
51
+ """
52
+ Extracts memory IDs from model response. Supports both simple text list and JSON.
53
+ """
54
+ try:
55
+ parsed = json.loads(response_text)
56
+ if isinstance(parsed, dict) and "selected_ids" in parsed:
57
+ return parsed["selected_ids"]
58
+ except json.JSONDecodeError:
59
+ pass
60
+
61
+ return re.findall(r"[a-f0-9\-]{36}", response_text) # UUID pattern fallback
@@ -0,0 +1,158 @@
1
+ import concurrent.futures
2
+
3
+ from memos.embedders.factory import OllamaEmbedder
4
+ from memos.graph_dbs.neo4j import Neo4jGraphDB
5
+ from memos.memories.textual.item import TextualMemoryItem
6
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
+
8
+
9
+ class GraphMemoryRetriever:
10
+ """
11
+ Unified memory retriever that combines both graph-based and vector-based retrieval logic.
12
+ """
13
+
14
+ def __init__(self, graph_store: Neo4jGraphDB, embedder: OllamaEmbedder):
15
+ self.graph_store = graph_store
16
+ self.embedder = embedder
17
+
18
+ def retrieve(
19
+ self,
20
+ query: str,
21
+ parsed_goal: ParsedTaskGoal,
22
+ top_k: int,
23
+ memory_scope: str,
24
+ query_embedding: list[list[float]] | None = None,
25
+ ) -> list[TextualMemoryItem]:
26
+ """
27
+ Perform hybrid memory retrieval:
28
+ - Run graph-based lookup from dispatch plan.
29
+ - Run vector similarity search from embedded query.
30
+ - Merge and return combined result set.
31
+
32
+ Args:
33
+ query (str): Original task query.
34
+ parsed_goal (dict): parsed_goal.
35
+ top_k (int): Number of candidates to return.
36
+ memory_scope (str): One of ['working', 'long_term', 'user'].
37
+ query_embedding(list of embedding): list of embedding of query
38
+
39
+ Returns:
40
+ list: Combined memory items.
41
+ """
42
+ if memory_scope not in ["WorkingMemory", "LongTermMemory", "UserMemory"]:
43
+ raise ValueError(f"Unsupported memory scope: {memory_scope}")
44
+
45
+ if memory_scope == "WorkingMemory":
46
+ # For working memory, retrieve all entries (no filtering)
47
+ working_memories = self.graph_store.get_all_memory_items(scope="WorkingMemory")
48
+ return [TextualMemoryItem.from_dict(record) for record in working_memories]
49
+
50
+ # Step 1: Structured graph-based retrieval
51
+ graph_results = self._graph_recall(parsed_goal, memory_scope)
52
+
53
+ # Step 2: Vector similarity search
54
+ vector_results = self._vector_recall(query_embedding, memory_scope, top_k)
55
+
56
+ # Step 3: Merge and deduplicate results
57
+ combined = {item.id: item for item in graph_results + vector_results}
58
+
59
+ # Debug: 打印在 graph_results 中但不在 combined 中的 id
60
+ graph_ids = {item.id for item in graph_results}
61
+ combined_ids = set(combined.keys())
62
+ lost_ids = graph_ids - combined_ids
63
+
64
+ if lost_ids:
65
+ print(
66
+ f"[DEBUG] The following nodes were in graph_results but missing in combined: {lost_ids}"
67
+ )
68
+
69
+ return list(combined.values())
70
+
71
+ def _graph_recall(
72
+ self, parsed_goal: ParsedTaskGoal, memory_scope: str
73
+ ) -> list[TextualMemoryItem]:
74
+ """
75
+ Perform structured node-based retrieval from Neo4j.
76
+ - keys must match exactly (n.key IN keys)
77
+ - tags must overlap with at least 2 input tags
78
+ - scope filters by memory_type if provided
79
+ """
80
+ candidate_ids = set()
81
+
82
+ # 1) key-based OR branch
83
+ if parsed_goal.keys:
84
+ key_filters = [
85
+ {"field": "key", "op": "in", "value": parsed_goal.keys},
86
+ {"field": "memory_type", "op": "=", "value": memory_scope},
87
+ ]
88
+ key_ids = self.graph_store.get_by_metadata(key_filters)
89
+ candidate_ids.update(key_ids)
90
+
91
+ # 2) tag-based OR branch
92
+ if parsed_goal.tags:
93
+ tag_filters = [
94
+ {"field": "tags", "op": "contains", "value": parsed_goal.tags},
95
+ {"field": "memory_type", "op": "=", "value": memory_scope},
96
+ ]
97
+ tag_ids = self.graph_store.get_by_metadata(tag_filters)
98
+ candidate_ids.update(tag_ids)
99
+
100
+ # No matches → return empty
101
+ if not candidate_ids:
102
+ return []
103
+
104
+ # Load nodes and post-filter
105
+ node_dicts = self.graph_store.get_nodes(list(candidate_ids))
106
+
107
+ final_nodes = []
108
+ for node in node_dicts:
109
+ meta = node.get("metadata", {})
110
+ node_key = meta.get("key")
111
+ node_tags = meta.get("tags", []) or []
112
+
113
+ keep = False
114
+ # key equals to node_key
115
+ if parsed_goal.keys and node_key in parsed_goal.keys:
116
+ keep = True
117
+ # overlap tags more than 2
118
+ elif parsed_goal.tags:
119
+ overlap = len(set(node_tags) & set(parsed_goal.tags))
120
+ if overlap >= 2:
121
+ keep = True
122
+ if keep:
123
+ final_nodes.append(TextualMemoryItem.from_dict(node))
124
+ return final_nodes
125
+
126
+ def _vector_recall(
127
+ self,
128
+ query_embedding: list[list[float]],
129
+ memory_scope: str,
130
+ top_k: int = 20,
131
+ max_num: int = 5,
132
+ ) -> list[TextualMemoryItem]:
133
+ """
134
+ # TODO: tackle with post-filter and pre-filter(5.18+) better.
135
+ Perform vector-based similarity retrieval using query embedding.
136
+ """
137
+ all_matches = []
138
+
139
+ def search_single(vec):
140
+ return (
141
+ self.graph_store.search_by_embedding(vector=vec, top_k=top_k, scope=memory_scope)
142
+ or []
143
+ )
144
+
145
+ with concurrent.futures.ThreadPoolExecutor() as executor:
146
+ futures = [executor.submit(search_single, vec) for vec in query_embedding[:max_num]]
147
+ for future in concurrent.futures.as_completed(futures):
148
+ result = future.result()
149
+ all_matches.extend(result)
150
+
151
+ if not all_matches:
152
+ return []
153
+
154
+ # Step 3: Extract matched IDs and retrieve full nodes
155
+ unique_ids = set({r["id"] for r in all_matches})
156
+ node_dicts = self.graph_store.get_nodes(list(unique_ids))
157
+
158
+ return [TextualMemoryItem.from_dict(record) for record in node_dicts]
@@ -0,0 +1,111 @@
1
+ import numpy as np
2
+
3
+ from memos.embedders.factory import OllamaEmbedder
4
+ from memos.llms.factory import OllamaLLM, OpenAILLM
5
+ from memos.memories.textual.item import TextualMemoryItem
6
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
+
8
+
9
+ def batch_cosine_similarity(
10
+ query_vec: list[float], candidate_vecs: list[list[float]]
11
+ ) -> list[float]:
12
+ """
13
+ Compute cosine similarity between a single query vector and multiple candidate vectors using NumPy.
14
+
15
+ Args:
16
+ query_vec (list[float]): The query embedding.
17
+ candidate_vecs (list[list[float]]): A list of memory embeddings.
18
+
19
+ Returns:
20
+ list[float]: Cosine similarity scores for each candidate.
21
+ """
22
+ query = np.array(query_vec)
23
+ candidates = np.array(candidate_vecs)
24
+
25
+ # Normalize query and candidates
26
+ query_norm = np.linalg.norm(query)
27
+ candidates_norm = np.linalg.norm(candidates, axis=1)
28
+
29
+ # Compute dot products
30
+ dot_products = np.dot(candidates, query)
31
+
32
+ # Avoid division by zero
33
+ eps = 1e-10
34
+ similarities = dot_products / (candidates_norm * query_norm + eps)
35
+
36
+ return similarities.tolist()
37
+
38
+
39
+ class MemoryReranker:
40
+ """
41
+ Rank retrieved memory cards by structural priority and contextual similarity.
42
+ """
43
+
44
+ def __init__(self, llm: OpenAILLM | OllamaLLM, embedder: OllamaEmbedder):
45
+ self.llm = llm
46
+ self.embedder = embedder
47
+
48
+ # Structural priority weights
49
+ self.level_weights = {
50
+ "topic": 1.0,
51
+ "concept": 1.0,
52
+ "fact": 1.0,
53
+ }
54
+
55
+ def rerank(
56
+ self,
57
+ query: str,
58
+ query_embedding: list[float],
59
+ graph_results: list,
60
+ top_k: int,
61
+ parsed_goal: ParsedTaskGoal,
62
+ ) -> list[tuple[TextualMemoryItem, float]]:
63
+ """
64
+ Rerank memory items by relevance to task.
65
+
66
+ Args:
67
+ query (str): Original task.
68
+ query_embedding(list[float]): embedding of query
69
+ graph_results (list): Combined retrieval results.
70
+ top_k (int): Number of top results to return.
71
+ parsed_goal (dict): Structured task representation.
72
+
73
+ Returns:
74
+ list(tuple): Ranked list of memory items with similarity score.
75
+ """
76
+ # Step 1: Filter out items without embeddings
77
+ items_with_embeddings = [item for item in graph_results if item.metadata.embedding]
78
+ embeddings = [item.metadata.embedding for item in items_with_embeddings]
79
+
80
+ if not embeddings:
81
+ return graph_results[:top_k]
82
+
83
+ # Step 2: Compute cosine similarities
84
+ similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
85
+
86
+ # Step 3: Apply structural weight boost
87
+ def get_weight(item: TextualMemoryItem) -> float:
88
+ level = item.metadata.background
89
+ return self.level_weights.get(level, 1.0)
90
+
91
+ weighted_scores = [
92
+ sim * get_weight(item)
93
+ for sim, item in zip(similarity_scores, items_with_embeddings, strict=False)
94
+ ]
95
+
96
+ # Step 4: Sort by weighted score
97
+ sorted_items = sorted(
98
+ zip(items_with_embeddings, weighted_scores, strict=False),
99
+ key=lambda pair: pair[1],
100
+ reverse=True,
101
+ )
102
+
103
+ # Step 5: Return top-k items with fallback
104
+ top_items = sorted_items[:top_k]
105
+
106
+ if len(top_items) < top_k:
107
+ selected_items = [item for item, _ in top_items]
108
+ remaining = [(item, -1.0) for item in graph_results if item not in selected_items]
109
+ top_items.extend(remaining[: top_k - len(top_items)])
110
+
111
+ return top_items # list of (item, score)
@@ -0,0 +1,13 @@
1
+ from dataclasses import dataclass, field
2
+
3
+
4
+ @dataclass
5
+ class ParsedTaskGoal:
6
+ """
7
+ Goal structure for both Fast & LLM.
8
+ """
9
+
10
+ memories: list[str] = field(default_factory=list)
11
+ keys: list[str] = field(default_factory=list)
12
+ tags: list[str] = field(default_factory=list)
13
+ goal_type: str | None = None # e.g., 'default', 'explanation', etc.