MemoryOS 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (92) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/METADATA +7 -1
  2. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/RECORD +87 -64
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +158 -69
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +101 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +19 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +54 -18
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +38 -3
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1612 -0
  18. memos/graph_dbs/neo4j.py +18 -9
  19. memos/log.py +6 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +157 -37
  22. memos/mem_os/main.py +2 -2
  23. memos/mem_os/product.py +252 -201
  24. memos/mem_os/utils/default_config.py +1 -1
  25. memos/mem_os/utils/format_utils.py +281 -70
  26. memos/mem_os/utils/reference_utils.py +133 -0
  27. memos/mem_reader/simple_struct.py +13 -5
  28. memos/mem_scheduler/base_scheduler.py +239 -266
  29. memos/mem_scheduler/{modules → general_modules}/base.py +4 -5
  30. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +57 -21
  31. memos/mem_scheduler/general_modules/misc.py +104 -0
  32. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +12 -10
  33. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  34. memos/mem_scheduler/general_modules/retriever.py +199 -0
  35. memos/mem_scheduler/general_modules/scheduler_logger.py +261 -0
  36. memos/mem_scheduler/general_scheduler.py +243 -80
  37. memos/mem_scheduler/monitors/__init__.py +0 -0
  38. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  39. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +106 -57
  40. memos/mem_scheduler/mos_for_test_scheduler.py +23 -20
  41. memos/mem_scheduler/schemas/__init__.py +0 -0
  42. memos/mem_scheduler/schemas/general_schemas.py +44 -0
  43. memos/mem_scheduler/schemas/message_schemas.py +149 -0
  44. memos/mem_scheduler/schemas/monitor_schemas.py +337 -0
  45. memos/mem_scheduler/utils/__init__.py +0 -0
  46. memos/mem_scheduler/utils/filter_utils.py +176 -0
  47. memos/mem_scheduler/utils/misc_utils.py +102 -0
  48. memos/mem_user/factory.py +94 -0
  49. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  50. memos/mem_user/mysql_user_manager.py +500 -0
  51. memos/mem_user/persistent_factory.py +96 -0
  52. memos/mem_user/user_manager.py +4 -4
  53. memos/memories/activation/item.py +5 -1
  54. memos/memories/activation/kv.py +20 -8
  55. memos/memories/textual/base.py +2 -2
  56. memos/memories/textual/general.py +36 -92
  57. memos/memories/textual/item.py +5 -33
  58. memos/memories/textual/tree.py +13 -7
  59. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +34 -50
  60. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  61. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +49 -43
  62. memos/memories/textual/tree_text_memory/organize/reorganizer.py +107 -142
  63. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  64. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  65. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +11 -0
  66. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  67. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  68. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  69. memos/memories/textual/tree_text_memory/retrieve/searcher.py +191 -116
  70. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +47 -15
  71. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  72. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  73. memos/memos_tools/dinding_report_bot.py +422 -0
  74. memos/memos_tools/lockfree_dict.py +120 -0
  75. memos/memos_tools/notification_service.py +44 -0
  76. memos/memos_tools/notification_utils.py +96 -0
  77. memos/memos_tools/thread_safe_dict.py +288 -0
  78. memos/settings.py +3 -1
  79. memos/templates/mem_reader_prompts.py +4 -1
  80. memos/templates/mem_scheduler_prompts.py +62 -15
  81. memos/templates/mos_prompts.py +116 -0
  82. memos/templates/tree_reorganize_prompts.py +24 -17
  83. memos/utils.py +19 -0
  84. memos/mem_scheduler/modules/misc.py +0 -39
  85. memos/mem_scheduler/modules/retriever.py +0 -268
  86. memos/mem_scheduler/modules/schemas.py +0 -328
  87. memos/mem_scheduler/utils.py +0 -75
  88. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  89. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  90. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  91. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  92. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
@@ -4,6 +4,8 @@ from typing import Any, ClassVar
4
4
 
5
5
  from memos.configs.internet_retriever import InternetRetrieverConfigFactory
6
6
  from memos.embedders.base import BaseEmbedder
7
+ from memos.mem_reader.factory import MemReaderFactory
8
+ from memos.memories.textual.tree_text_memory.retrieve.bochasearch import BochaAISearchRetriever
7
9
  from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
8
10
  InternetGoogleRetriever,
9
11
  )
@@ -17,6 +19,7 @@ class InternetRetrieverFactory:
17
19
  "google": InternetGoogleRetriever,
18
20
  "bing": InternetGoogleRetriever, # TODO: Implement BingRetriever
19
21
  "xinyu": XinyuSearchRetriever,
22
+ "bocha": BochaAISearchRetriever,
20
23
  }
21
24
 
22
25
  @classmethod
@@ -66,6 +69,14 @@ class InternetRetrieverFactory:
66
69
  access_key=config.api_key, # Use api_key as access_key for xinyu
67
70
  search_engine_id=config.search_engine_id,
68
71
  embedder=embedder,
72
+ reader=MemReaderFactory.from_config(config.reader),
73
+ max_results=config.max_results,
74
+ )
75
+ elif backend == "bocha":
76
+ return retriever_class(
77
+ access_key=config.api_key, # Use api_key as access_key for xinyu
78
+ embedder=embedder,
79
+ reader=MemReaderFactory.from_config(config.reader),
69
80
  max_results=config.max_results,
70
81
  )
71
82
  else:
@@ -44,16 +44,23 @@ class GraphMemoryRetriever:
44
44
 
45
45
  if memory_scope == "WorkingMemory":
46
46
  # For working memory, retrieve all entries (no filtering)
47
- working_memories = self.graph_store.get_all_memory_items(scope="WorkingMemory")
47
+ working_memories = self.graph_store.get_all_memory_items(
48
+ scope="WorkingMemory", include_embedding=True
49
+ )
48
50
  return [TextualMemoryItem.from_dict(record) for record in working_memories]
49
51
 
50
- # Step 1: Structured graph-based retrieval
51
- graph_results = self._graph_recall(parsed_goal, memory_scope)
52
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
53
+ # Structured graph-based retrieval
54
+ future_graph = executor.submit(self._graph_recall, parsed_goal, memory_scope)
55
+ # Vector similarity search
56
+ future_vector = executor.submit(
57
+ self._vector_recall, query_embedding, memory_scope, top_k
58
+ )
52
59
 
53
- # Step 2: Vector similarity search
54
- vector_results = self._vector_recall(query_embedding, memory_scope, top_k)
60
+ graph_results = future_graph.result()
61
+ vector_results = future_vector.result()
55
62
 
56
- # Step 3: Merge and deduplicate results
63
+ # Merge and deduplicate by ID
57
64
  combined = {item.id: item for item in graph_results + vector_results}
58
65
 
59
66
  graph_ids = {item.id for item in graph_results}
@@ -101,7 +108,7 @@ class GraphMemoryRetriever:
101
108
  return []
102
109
 
103
110
  # Load nodes and post-filter
104
- node_dicts = self.graph_store.get_nodes(list(candidate_ids))
111
+ node_dicts = self.graph_store.get_nodes(list(candidate_ids), include_embedding=True)
105
112
 
106
113
  final_nodes = []
107
114
  for node in node_dicts:
@@ -152,6 +159,6 @@ class GraphMemoryRetriever:
152
159
 
153
160
  # Step 3: Extract matched IDs and retrieve full nodes
154
161
  unique_ids = set({r["id"] for r in all_matches})
155
- node_dicts = self.graph_store.get_nodes(list(unique_ids))
162
+ node_dicts = self.graph_store.get_nodes(list(unique_ids), include_embedding=True)
156
163
 
157
164
  return [TextualMemoryItem.from_dict(record) for record in node_dicts]
@@ -78,7 +78,7 @@ class MemoryReranker:
78
78
  embeddings = [item.metadata.embedding for item in items_with_embeddings]
79
79
 
80
80
  if not embeddings:
81
- return graph_results[:top_k]
81
+ return [(item, 0.5) for item in graph_results[:top_k]]
82
82
 
83
83
  # Step 2: Compute cosine similarities
84
84
  similarity_scores = batch_cosine_similarity(query_embedding, embeddings)
@@ -10,4 +10,6 @@ class ParsedTaskGoal:
10
10
  memories: list[str] = field(default_factory=list)
11
11
  keys: list[str] = field(default_factory=list)
12
12
  tags: list[str] = field(default_factory=list)
13
+ rephrased_query: str | None = None
14
+ internet_search: bool = False
13
15
  goal_type: str | None = None # e.g., 'default', 'explanation', etc.
@@ -6,7 +6,9 @@ from datetime import datetime
6
6
  from memos.embedders.factory import OllamaEmbedder
7
7
  from memos.graph_dbs.factory import Neo4jGraphDB
8
8
  from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
9
+ from memos.log import get_logger
9
10
  from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
11
+ from memos.utils import timed
10
12
 
11
13
  from .internet_retriever_factory import InternetRetrieverFactory
12
14
  from .reasoner import MemoryReasoner
@@ -15,6 +17,9 @@ from .reranker import MemoryReranker
15
17
  from .task_goal_parser import TaskGoalParser
16
18
 
17
19
 
20
+ logger = get_logger(__name__)
21
+
22
+
18
23
  class Searcher:
19
24
  def __init__(
20
25
  self,
@@ -34,8 +39,9 @@ class Searcher:
34
39
  # Create internet retriever from config if provided
35
40
  self.internet_retriever = internet_retriever
36
41
 
42
+ @timed
37
43
  def search(
38
- self, query: str, top_k: int, info=None, mode: str = "fast", memory_type: str = "All"
44
+ self, query: str, top_k: int, info=None, mode="fast", memory_type="All"
39
45
  ) -> list[TextualMemoryItem]:
40
46
  """
41
47
  Search for memories based on a query.
@@ -53,152 +59,222 @@ class Searcher:
53
59
  Returns:
54
60
  list[TextualMemoryItem]: List of matching memories.
55
61
  """
62
+ logger.info(
63
+ f"[SEARCH] Start query='{query}', top_k={top_k}, mode={mode}, memory_type={memory_type}"
64
+ )
65
+ if not info:
66
+ logger.warning(
67
+ "Please input 'info' when use tree.search so that "
68
+ "the database would store the consume history."
69
+ )
70
+ info = {"user_id": "", "session_id": ""}
71
+ else:
72
+ logger.debug(f"[SEARCH] Received info dict: {info}")
73
+
74
+ parsed_goal, query_embedding, context, query = self._parse_task(query, info, mode)
75
+ results = self._retrieve_paths(
76
+ query, parsed_goal, query_embedding, info, top_k, mode, memory_type
77
+ )
78
+ deduped = self._deduplicate_results(results)
79
+ final_results = self._sort_and_trim(deduped, top_k)
80
+ self._update_usage_history(final_results, info)
81
+
82
+ logger.info(f"[SEARCH] Done. Total {len(final_results)} results.")
83
+ return final_results
56
84
 
57
- # Step 1: Parse task structure into topic, concept, and fact levels
85
+ @timed
86
+ def _parse_task(self, query, info, mode, top_k=5):
87
+ """Parse user query, do embedding search and create context"""
58
88
  context = []
89
+ query_embedding = None
90
+
91
+ # fine mode will trigger initial embedding search
59
92
  if mode == "fine":
93
+ logger.info("[SEARCH] Fine mode: embedding search")
60
94
  query_embedding = self.embedder.embed([query])[0]
61
- related_node_ids = self.graph_store.search_by_embedding(query_embedding, top_k=top_k)
95
+
96
+ # retrieve related nodes by embedding
62
97
  related_nodes = [
63
- self.graph_store.get_node(related_node["id"]) for related_node in related_node_ids
98
+ self.graph_store.get_node(n["id"])
99
+ for n in self.graph_store.search_by_embedding(query_embedding, top_k=top_k)
64
100
  ]
101
+ context = list({node["memory"] for node in related_nodes})
65
102
 
66
- context = [related_node["memory"] for related_node in related_nodes]
67
- context = list(set(context))
103
+ # optional: supplement context with internet knowledge
104
+ if self.internet_retriever:
105
+ extra = self.internet_retriever.retrieve_from_internet(query=query, top_k=3)
106
+ context.extend(item.memory.partition("\nContent: ")[-1] for item in extra)
68
107
 
69
- # Step 1a: Parse task structure into topic, concept, and fact levels
70
- parsed_goal = self.task_goal_parser.parse(query, "\n".join(context))
108
+ # parse goal using LLM
109
+ parsed_goal = self.task_goal_parser.parse(
110
+ task_description=query,
111
+ context="\n".join(context),
112
+ conversation=info.get("chat_history", []),
113
+ mode=mode,
114
+ )
71
115
 
116
+ query = parsed_goal.rephrased_query or query
117
+ # if goal has extra memories, embed them too
72
118
  if parsed_goal.memories:
73
119
  query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
74
120
 
75
- # Step 2a: Working memory retrieval (Path A)
76
- def retrieve_from_working_memory():
77
- """
78
- Direct structure-based retrieval from working memory.
79
- """
80
- if memory_type not in ["All", "WorkingMemory"]:
81
- return []
121
+ return parsed_goal, query_embedding, context, query
82
122
 
83
- working_memory = self.graph_retriever.retrieve(
84
- query=query, parsed_goal=parsed_goal, top_k=top_k, memory_scope="WorkingMemory"
85
- )
86
- # Rerank working_memory results
87
- ranked_memories = self.reranker.rerank(
88
- query=query,
89
- query_embedding=query_embedding[0],
90
- graph_results=working_memory,
91
- top_k=top_k,
92
- parsed_goal=parsed_goal,
123
+ @timed
124
+ def _retrieve_paths(self, query, parsed_goal, query_embedding, info, top_k, mode, memory_type):
125
+ """Run A/B/C retrieval paths in parallel"""
126
+ tasks = []
127
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
128
+ tasks.append(
129
+ executor.submit(
130
+ self._retrieve_from_working_memory,
131
+ query,
132
+ parsed_goal,
133
+ query_embedding,
134
+ top_k,
135
+ memory_type,
136
+ )
93
137
  )
94
- return ranked_memories
95
-
96
- # Step 2b: Parallel long-term and user memory retrieval (Path B)
97
- def retrieve_ranked_long_term_and_user():
98
- """
99
- Retrieve from both long-term and user memory, then rank and merge results.
100
- """
101
- long_term_items = (
102
- self.graph_retriever.retrieve(
103
- query=query,
104
- query_embedding=query_embedding,
105
- parsed_goal=parsed_goal,
106
- top_k=top_k * 2,
107
- memory_scope="LongTermMemory",
138
+ tasks.append(
139
+ executor.submit(
140
+ self._retrieve_from_long_term_and_user,
141
+ query,
142
+ parsed_goal,
143
+ query_embedding,
144
+ top_k,
145
+ memory_type,
108
146
  )
109
- if memory_type in ["All", "LongTermMemory"]
110
- else []
111
147
  )
112
- user_items = (
113
- self.graph_retriever.retrieve(
114
- query=query,
115
- query_embedding=query_embedding,
116
- parsed_goal=parsed_goal,
117
- top_k=top_k * 2,
118
- memory_scope="UserMemory",
148
+ tasks.append(
149
+ executor.submit(
150
+ self._retrieve_from_internet,
151
+ query,
152
+ parsed_goal,
153
+ query_embedding,
154
+ top_k,
155
+ info,
156
+ mode,
157
+ memory_type,
119
158
  )
120
- if memory_type in ["All", "UserMemory"]
121
- else []
122
159
  )
123
160
 
124
- # Rerank combined results
125
- ranked_memories = self.reranker.rerank(
161
+ results = []
162
+ for t in tasks:
163
+ results.extend(t.result())
164
+
165
+ logger.info(f"[SEARCH] Total raw results: {len(results)}")
166
+ return results
167
+
168
+ # --- Path A
169
+ @timed
170
+ def _retrieve_from_working_memory(
171
+ self, query, parsed_goal, query_embedding, top_k, memory_type
172
+ ):
173
+ """Retrieve and rerank from WorkingMemory"""
174
+ if memory_type not in ["All", "WorkingMemory"]:
175
+ logger.info(f"[PATH-A] '{query}'Skipped (memory_type does not match)")
176
+ return []
177
+ items = self.graph_retriever.retrieve(
178
+ query=query, parsed_goal=parsed_goal, top_k=top_k, memory_scope="WorkingMemory"
179
+ )
180
+ return self.reranker.rerank(
181
+ query=query,
182
+ query_embedding=query_embedding[0],
183
+ graph_results=items,
184
+ top_k=top_k,
185
+ parsed_goal=parsed_goal,
186
+ )
187
+
188
+ # --- Path B
189
+ @timed
190
+ def _retrieve_from_long_term_and_user(
191
+ self, query, parsed_goal, query_embedding, top_k, memory_type
192
+ ):
193
+ """Retrieve and rerank from LongTermMemory and UserMemory"""
194
+ results = []
195
+ if memory_type in ["All", "LongTermMemory"]:
196
+ results += self.graph_retriever.retrieve(
126
197
  query=query,
127
- query_embedding=query_embedding[0],
128
- graph_results=long_term_items + user_items,
129
- top_k=top_k * 2,
130
198
  parsed_goal=parsed_goal,
199
+ query_embedding=query_embedding,
200
+ top_k=top_k * 2,
201
+ memory_scope="LongTermMemory",
131
202
  )
132
- return ranked_memories
133
-
134
- # Step 2c: Internet retrieval (Path C)
135
- def retrieve_from_internet():
136
- """
137
- Retrieve information from the internet using Google Custom Search API.
138
- """
139
- if not self.internet_retriever:
140
- return []
141
- if memory_type not in ["All"]:
142
- return []
143
- internet_items = self.internet_retriever.retrieve_from_internet(
144
- query=query, top_k=top_k, parsed_goal=parsed_goal
145
- )
146
-
147
- # Convert to the format expected by reranker
148
- ranked_memories = self.reranker.rerank(
203
+ if memory_type in ["All", "UserMemory"]:
204
+ results += self.graph_retriever.retrieve(
149
205
  query=query,
150
- query_embedding=query_embedding[0],
151
- graph_results=internet_items,
152
- top_k=top_k * 2,
153
206
  parsed_goal=parsed_goal,
207
+ query_embedding=query_embedding,
208
+ top_k=top_k * 2,
209
+ memory_scope="UserMemory",
154
210
  )
155
- return ranked_memories
211
+ return self.reranker.rerank(
212
+ query=query,
213
+ query_embedding=query_embedding[0],
214
+ graph_results=results,
215
+ top_k=top_k * 2,
216
+ parsed_goal=parsed_goal,
217
+ )
156
218
 
157
- # Step 3: Parallel execution of all paths
158
- with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
159
- future_working = executor.submit(retrieve_from_working_memory)
160
- future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
161
- future_internet = executor.submit(retrieve_from_internet)
162
-
163
- working_results = future_working.result()
164
- hybrid_results = future_hybrid.result()
165
- internet_results = future_internet.result()
166
- searched_res = working_results + hybrid_results + internet_results
167
-
168
- # Deduplicate by item.memory, keep higher score
169
- deduped_result = {}
170
- for item, score in searched_res:
171
- mem_key = item.memory
172
- if mem_key not in deduped_result or score > deduped_result[mem_key][1]:
173
- deduped_result[mem_key] = (item, score)
174
-
175
- searched_res = []
176
- for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
177
- :top_k
178
- ]:
219
+ # --- Path C
220
+ @timed
221
+ def _retrieve_from_internet(
222
+ self, query, parsed_goal, query_embedding, top_k, info, mode, memory_type
223
+ ):
224
+ """Retrieve and rerank from Internet source"""
225
+ if not self.internet_retriever or mode == "fast":
226
+ logger.info(f"[PATH-C] '{query}' Skipped (no retriever, fast mode)")
227
+ return []
228
+ if memory_type not in ["All"]:
229
+ return []
230
+ logger.info(f"[PATH-C] '{query}' Retrieving from internet...")
231
+ items = self.internet_retriever.retrieve_from_internet(
232
+ query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
233
+ )
234
+ logger.info(f"[PATH-C] '{query}' Retrieved from internet {len(items)} items: {items}")
235
+ return self.reranker.rerank(
236
+ query=query,
237
+ query_embedding=query_embedding[0],
238
+ graph_results=items,
239
+ top_k=min(top_k, 5),
240
+ parsed_goal=parsed_goal,
241
+ )
242
+
243
+ @timed
244
+ def _deduplicate_results(self, results):
245
+ """Deduplicate results by memory text"""
246
+ deduped = {}
247
+ for item, score in results:
248
+ if item.memory not in deduped or score > deduped[item.memory][1]:
249
+ deduped[item.memory] = (item, score)
250
+ return list(deduped.values())
251
+
252
+ @timed
253
+ def _sort_and_trim(self, results, top_k):
254
+ """Sort results by score and trim to top_k"""
255
+ sorted_results = sorted(results, key=lambda pair: pair[1], reverse=True)[:top_k]
256
+ final_items = []
257
+ for item, score in sorted_results:
179
258
  meta_data = item.metadata.model_dump()
180
259
  if "relativity" not in meta_data:
181
260
  meta_data["relativity"] = score
182
- new_meta = SearchedTreeNodeTextualMemoryMetadata(**meta_data)
183
- searched_res.append(
184
- TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
185
- )
186
-
187
- # Step 4: Reasoning over all retrieved and ranked memory
188
- if mode == "fine":
189
- searched_res = self.reasoner.reason(
190
- query=query,
191
- ranked_memories=searched_res,
192
- parsed_goal=parsed_goal,
261
+ final_items.append(
262
+ TextualMemoryItem(
263
+ id=item.id,
264
+ memory=item.memory,
265
+ metadata=SearchedTreeNodeTextualMemoryMetadata(**meta_data),
266
+ )
193
267
  )
268
+ return final_items
194
269
 
195
- # Step 5: Update usage history with current timestamp
270
+ @timed
271
+ def _update_usage_history(self, items, info):
272
+ """Update usage history in graph DB"""
196
273
  now_time = datetime.now().isoformat()
197
- usage_record = json.dumps(
198
- {"time": now_time, "info": info}
199
- ) # `info` should be a serializable dict or string
200
-
201
- for item in searched_res:
274
+ info.pop("chat_history", None)
275
+ # `info` should be a serializable dict or string
276
+ usage_record = json.dumps({"time": now_time, "info": info})
277
+ for item in items:
202
278
  if (
203
279
  hasattr(item, "id")
204
280
  and hasattr(item, "metadata")
@@ -206,4 +282,3 @@ class Searcher:
206
282
  ):
207
283
  item.metadata.usage.append(usage_record)
208
284
  self.graph_store.update_node(item.id, {"usage": item.metadata.usage})
209
- return searched_res
@@ -1,12 +1,16 @@
1
- import json
1
+ import traceback
2
2
 
3
3
  from string import Template
4
4
 
5
5
  from memos.llms.base import BaseLLM
6
+ from memos.log import get_logger
6
7
  from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
8
  from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT
8
9
 
9
10
 
11
+ logger = get_logger(__name__)
12
+
13
+
10
14
  class TaskGoalParser:
11
15
  """
12
16
  Unified TaskGoalParser:
@@ -14,11 +18,16 @@ class TaskGoalParser:
14
18
  - mode == 'fine': use LLM to parse structured topic/keys/tags
15
19
  """
16
20
 
17
- def __init__(self, llm=BaseLLM, mode: str = "fast"):
21
+ def __init__(self, llm=BaseLLM):
18
22
  self.llm = llm
19
- self.mode = mode
20
23
 
21
- def parse(self, task_description: str, context: str = "") -> ParsedTaskGoal:
24
+ def parse(
25
+ self,
26
+ task_description: str,
27
+ context: str = "",
28
+ conversation: list[dict] | None = None,
29
+ mode: str = "fast",
30
+ ) -> ParsedTaskGoal:
22
31
  """
23
32
  Parse user input into structured semantic layers.
24
33
  Returns:
@@ -26,42 +35,65 @@ class TaskGoalParser:
26
35
  - mode == 'fast': use jieba to split words only
27
36
  - mode == 'fine': use LLM to parse structured topic/keys/tags
28
37
  """
29
- if self.mode == "fast":
38
+ if mode == "fast":
30
39
  return self._parse_fast(task_description)
31
- elif self.mode == "fine":
40
+ elif mode == "fine":
32
41
  if not self.llm:
33
42
  raise ValueError("LLM not provided for slow mode.")
34
- return self._parse_fine(task_description, context)
43
+ return self._parse_fine(task_description, context, conversation)
35
44
  else:
36
- raise ValueError(f"Unknown mode: {self.mode}")
45
+ raise ValueError(f"Unknown mode: {mode}")
37
46
 
38
47
  def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal:
39
48
  """
40
49
  Fast mode: simple jieba word split.
41
50
  """
42
51
  return ParsedTaskGoal(
43
- memories=[task_description], keys=[task_description], tags=[], goal_type="default"
52
+ memories=[task_description],
53
+ keys=[task_description],
54
+ tags=[],
55
+ goal_type="default",
56
+ rephrased_query=task_description,
57
+ internet_search=False,
44
58
  )
45
59
 
46
- def _parse_fine(self, query: str, context: str = "") -> ParsedTaskGoal:
60
+ def _parse_fine(
61
+ self, query: str, context: str = "", conversation: list[dict] | None = None
62
+ ) -> ParsedTaskGoal:
47
63
  """
48
64
  Slow mode: LLM structured parse.
49
65
  """
50
- prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context=context)
51
- response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
52
- return self._parse_response(response)
66
+ try:
67
+ if conversation:
68
+ conversation_prompt = "\n".join(
69
+ [f"{each['role']}: {each['content']}" for each in conversation]
70
+ )
71
+ else:
72
+ conversation_prompt = ""
73
+ prompt = Template(TASK_PARSE_PROMPT).substitute(
74
+ task=query.strip(), context=context, conversation=conversation_prompt
75
+ )
76
+ logger.info(f"Parsing Goal... LLM input is {prompt}")
77
+ response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
78
+ logger.info(f"Parsing Goal... LLM Response is {response}")
79
+ return self._parse_response(response)
80
+ except Exception:
81
+ logger.warning(f"Fail to fine-parse query {query}: {traceback.format_exc()}")
82
+ return self._parse_fast(query)
53
83
 
54
84
  def _parse_response(self, response: str) -> ParsedTaskGoal:
55
85
  """
56
86
  Parse LLM JSON output safely.
57
87
  """
58
88
  try:
59
- response = response.replace("```", "").replace("json", "")
60
- response_json = json.loads(response.strip())
89
+ response = response.replace("```", "").replace("json", "").strip()
90
+ response_json = eval(response)
61
91
  return ParsedTaskGoal(
62
92
  memories=response_json.get("memories", []),
63
93
  keys=response_json.get("keys", []),
64
94
  tags=response_json.get("tags", []),
95
+ rephrased_query=response_json.get("rephrased_instruction", None),
96
+ internet_search=response_json.get("internet_search", False),
65
97
  goal_type=response_json.get("goal_type", "default"),
66
98
  )
67
99
  except Exception as e:
@@ -1,19 +1,21 @@
1
1
  # Prompt for task parsing
2
2
  TASK_PARSE_PROMPT = """
3
- You are a task parsing expert. Given a user's task instruction, extract the following structured information:
4
-
5
- Given a user task instruction and optional related memory context,
6
- extract the following structured information:
3
+ You are a task parsing expert. Given a user task instruction, optional former conversation and optional related memory context,extract the following structured information:
7
4
  1. Keys: the high-level keywords directly relevant to the user’s task.
8
5
  2. Tags: thematic tags to help categorize and retrieve related memories.
9
6
  3. Goal Type: retrieval | qa | generation
10
- 4. Memories: Provide 2–5 short semantic expansions or rephrasings of the task instruction.
11
- These are used for improved embedding search coverage.
12
- Each should be clear, concise, and meaningful for retrieval.
7
+ 4. Rephrased instruction: Give a rephrased task instruction based on the former conversation to make it less confusing to look alone. If you think the task instruction is easy enough to understand, or there is no former conversation, set "rephrased_instruction" to an empty string.
8
+ 5. Need for internet search: If the user's task instruction only involves objective facts or can be completed without introducing external knowledge, set "internet_search" to False. Otherwise, set it to True.
9
+ 6. Memories: Provide 2–5 short semantic expansions or rephrasings of the rephrased/original user task instruction. These are used for improved embedding search coverage. Each should be clear, concise, and meaningful for retrieval.
13
10
 
14
11
  Task description:
15
12
  \"\"\"$task\"\"\"
16
13
 
14
+ Former conversation (if any):
15
+ \"\"\"
16
+ $conversation
17
+ \"\"\"
18
+
17
19
  Context (if any):
18
20
  \"\"\"$context\"\"\"
19
21
 
@@ -22,6 +24,8 @@ Return strictly in this JSON format:
22
24
  "keys": [...],
23
25
  "tags": [...],
24
26
  "goal_type": "retrieval | qa | generation",
27
+ "rephrased_instruction": "...", # return an empty string if the original instruction is easy enough to understand
28
+ "internet_search": True/False,
25
29
  "memories": ["...", "...", ...]
26
30
  }
27
31
  """