MemoryOS 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (74) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/METADATA +2 -1
  2. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/RECORD +72 -55
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +156 -65
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +90 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +6 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +18 -4
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +9 -1
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1364 -0
  18. memos/graph_dbs/neo4j.py +4 -4
  19. memos/log.py +1 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +140 -30
  22. memos/mem_os/main.py +1 -1
  23. memos/mem_os/product.py +266 -152
  24. memos/mem_os/utils/format_utils.py +314 -67
  25. memos/mem_reader/simple_struct.py +13 -5
  26. memos/mem_scheduler/base_scheduler.py +220 -250
  27. memos/mem_scheduler/general_scheduler.py +193 -73
  28. memos/mem_scheduler/modules/base.py +5 -5
  29. memos/mem_scheduler/modules/dispatcher.py +6 -9
  30. memos/mem_scheduler/modules/misc.py +81 -16
  31. memos/mem_scheduler/modules/monitor.py +52 -41
  32. memos/mem_scheduler/modules/rabbitmq_service.py +9 -7
  33. memos/mem_scheduler/modules/retriever.py +108 -191
  34. memos/mem_scheduler/modules/scheduler_logger.py +255 -0
  35. memos/mem_scheduler/mos_for_test_scheduler.py +16 -19
  36. memos/mem_scheduler/schemas/__init__.py +0 -0
  37. memos/mem_scheduler/schemas/general_schemas.py +43 -0
  38. memos/mem_scheduler/schemas/message_schemas.py +148 -0
  39. memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
  40. memos/mem_scheduler/utils/__init__.py +0 -0
  41. memos/mem_scheduler/utils/filter_utils.py +176 -0
  42. memos/mem_scheduler/utils/misc_utils.py +61 -0
  43. memos/mem_user/factory.py +94 -0
  44. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  45. memos/mem_user/mysql_user_manager.py +500 -0
  46. memos/mem_user/persistent_factory.py +96 -0
  47. memos/mem_user/user_manager.py +4 -4
  48. memos/memories/activation/item.py +4 -0
  49. memos/memories/textual/base.py +1 -1
  50. memos/memories/textual/general.py +35 -91
  51. memos/memories/textual/item.py +5 -33
  52. memos/memories/textual/tree.py +13 -7
  53. memos/memories/textual/tree_text_memory/organize/conflict.py +4 -2
  54. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +47 -43
  55. memos/memories/textual/tree_text_memory/organize/reorganizer.py +8 -5
  56. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  57. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  58. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  59. memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -23
  60. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
  61. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  62. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  63. memos/memos_tools/dinding_report_bot.py +422 -0
  64. memos/memos_tools/notification_service.py +44 -0
  65. memos/memos_tools/notification_utils.py +96 -0
  66. memos/settings.py +3 -1
  67. memos/templates/mem_reader_prompts.py +2 -1
  68. memos/templates/mem_scheduler_prompts.py +41 -7
  69. memos/templates/mos_prompts.py +87 -0
  70. memos/mem_scheduler/modules/schemas.py +0 -328
  71. memos/mem_scheduler/utils.py +0 -75
  72. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
  73. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
  74. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/entry_points.txt +0 -0
@@ -12,6 +12,7 @@ from memos.llms.factory import AzureLLM, LLMFactory, OllamaLLM, OpenAILLM
12
12
  from memos.log import get_logger
13
13
  from memos.memories.textual.base import BaseTextMemory
14
14
  from memos.memories.textual.item import TextualMemoryItem
15
+ from memos.templates.mem_reader_prompts import SIMPLE_STRUCT_MEM_READER_PROMPT
15
16
  from memos.types import MessageList
16
17
  from memos.vec_dbs.factory import QdrantVecDB, VecDBFactory
17
18
  from memos.vec_dbs.item import VecDBItem
@@ -36,11 +37,7 @@ class GeneralTextMemory(BaseTextMemory):
36
37
  stop=stop_after_attempt(3),
37
38
  retry=retry_if_exception_type(json.JSONDecodeError),
38
39
  before_sleep=lambda retry_state: logger.warning(
39
- EXTRACTION_RETRY_LOG.format(
40
- error=retry_state.outcome.exception(),
41
- attempt_number=retry_state.attempt_number,
42
- max_attempt_number=3,
43
- )
40
+ f"Extracting memory failed due to JSON decode error: {retry_state.outcome.exception()}, Attempt retry: {retry_state.attempt_number} / {3}"
44
41
  ),
45
42
  )
46
43
  def extract(self, messages: MessageList) -> list[TextualMemoryItem]:
@@ -52,14 +49,27 @@ class GeneralTextMemory(BaseTextMemory):
52
49
  Returns:
53
50
  List of TextualMemoryItem objects representing the extracted memories.
54
51
  """
55
- str_messages = json.dumps(messages)
56
- user_query = EXTRACTION_PROMPT_PART_1 + EXTRACTION_PROMPT_PART_2.format(
57
- messages=str_messages
52
+
53
+ str_messages = "\n".join(
54
+ [message["role"] + ":" + message["content"] for message in messages]
58
55
  )
59
- response = self.extractor_llm.generate([{"role": "user", "content": user_query}])
60
- raw_extracted_memories = json.loads(response)
56
+
57
+ prompt = SIMPLE_STRUCT_MEM_READER_PROMPT.replace("${conversation}", str_messages)
58
+ messages = [{"role": "user", "content": prompt}]
59
+ response_text = self.extractor_llm.generate(messages)
60
+ response_json = self.parse_json_result(response_text)
61
+
61
62
  extracted_memories = [
62
- TextualMemoryItem(**memory_dict) for memory_dict in raw_extracted_memories
63
+ TextualMemoryItem(
64
+ memory=memory_dict["value"],
65
+ metadata={
66
+ "key": memory_dict["key"],
67
+ "source": "conversation",
68
+ "tags": memory_dict["tags"],
69
+ "updated_at": datetime.now().isoformat(),
70
+ },
71
+ )
72
+ for memory_dict in response_json["memory list"]
63
73
  ]
64
74
 
65
75
  return extracted_memories
@@ -206,83 +216,17 @@ class GeneralTextMemory(BaseTextMemory):
206
216
  """Embed a single sentence."""
207
217
  return self.embedder.embed([sentence])[0]
208
218
 
209
-
210
- EXTRACTION_PROMPT_PART_1 = f"""You are a memory extractor. Your task is to extract memories from the given messages.
211
- * You will receive a list of messages, each with a role (user or assistant) and content.
212
- * Your job is to extract memories related to the user's long-term goals, interests, and emotional states.
213
- * Each memory should be a dictionary with the following keys:
214
- - "memory": The content of the memory (string). Rephrase the content if necessary.
215
- - "metadata": A dictionary containing additional information about the memory.
216
- * The metadata dictionary should include:
217
- - "type": The type of memory (string), e.g., "procedure", "fact", "event", "opinion", etc.
218
- - "memory_time": The time the memory occurred or refers to (string). Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.
219
- - "source": The origin of the memory (string), e.g., `"conversation"`, `"retrieved"`, `"web"`, `"file"`.
220
- - "confidence": A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.
221
- - "entities": A list of key entities (array of strings) mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.
222
- - "tags": A list of keywords or thematic labels (array of strings) associated with the memory for categorization or retrieval, e.g., `["travel", "health", "project-x"]`.
223
- - "visibility": The accessibility scope of the memory (string), e.g., `"private"`, `"public"`, `"session"`, determining who or what contexts can access it.
224
- - "updated_at": The timestamp of the last modification to the memory (string). Useful for tracking memory freshness or change history. Format: ISO 8601 or natural language.
225
- * Current date and time is {datetime.now().isoformat()}.
226
- * Only return the list of memories in JSON format.
227
- * Do not include any explanations
228
- * Do not include any extra text
229
- * Do not include code blocks (```json```)
230
-
231
- ## Example
232
-
233
- ### Input
234
-
235
- [
236
- {{"role": "user", "content": "I plan to visit Paris next week."}},
237
- {{"role": "assistant", "content": "Paris is a beautiful city with many attractions."}},
238
- {{"role": "user", "content": "I love the Eiffel Tower."}},
239
- {{"role": "assistant", "content": "The Eiffel Tower is a must-see landmark in Paris."}}
240
- ]
241
-
242
- ### Output
243
-
244
- [
245
- {{
246
- "memory": "The user plans to visit Paris on 05-26-2025.",
247
- "metadata": {{
248
- "type": "event",
249
- "memory_time": "2025-05-26",
250
- "source": "conversation",
251
- "confidence": 90.0,
252
- "entities": ["Paris"],
253
- "tags": ["travel", "plans"],
254
- "visibility": "private",
255
- "updated_at": "2025-05-19T00:00:00"
256
- }}
257
- }},
258
- {{
259
- "memory": "The user loves the Eiffel Tower.",
260
- "metadata": {{
261
- "type": "opinion",
262
- "memory_time": "2025-05-19",
263
- "source": "conversation",
264
- "confidence": 100.0,
265
- "entities": ["Eiffel Tower"],
266
- "tags": ["opinions", "landmarks"],
267
- "visibility": "session",
268
- "updated_at": "2025-05-19T00:00:00"
269
- }}
270
- }}
271
- ]
272
-
273
- """
274
-
275
- EXTRACTION_PROMPT_PART_2 = """
276
- ## Query
277
-
278
- ### Input
279
-
280
- {messages}
281
-
282
- ### Output
283
-
284
- """
285
-
286
- EXTRACTION_RETRY_LOG = """Extracting memory failed due to JSON decode error: {error},
287
- Attempt retry: {attempt_number} / {max_attempt_number}
288
- """
219
+ def parse_json_result(self, response_text):
220
+ try:
221
+ json_start = response_text.find("{")
222
+ response_text = response_text[json_start:]
223
+ response_text = response_text.replace("```", "").strip()
224
+ if response_text[-1] != "}":
225
+ response_text += "}"
226
+ response_json = json.loads(response_text)
227
+ return response_json
228
+ except json.JSONDecodeError as e:
229
+ logger.warning(
230
+ f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
231
+ )
232
+ return {}
@@ -27,23 +27,14 @@ class TextualMemoryMetadata(BaseModel):
27
27
  default="activated",
28
28
  description="The status of the memory, e.g., 'activated', 'archived', 'deleted'.",
29
29
  )
30
- type: Literal["procedure", "fact", "event", "opinion", "topic", "reasoning"] | None = Field(
31
- default=None
32
- )
33
- memory_time: str | None = Field(
34
- default=None,
35
- description='The time the memory occurred or refers to. Must be in standard `YYYY-MM-DD` format. Relative expressions such as "yesterday" or "tomorrow" are not allowed.',
36
- )
37
- source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
38
- default=None, description="The origin of the memory"
39
- )
30
+ type: str | None = Field(default=None)
31
+ key: str | None = Field(default=None, description="Memory key or title.")
40
32
  confidence: float | None = Field(
41
33
  default=None,
42
34
  description="A numeric score (float between 0 and 100) indicating how certain you are about the accuracy or reliability of the memory.",
43
35
  )
44
- entities: list[str] | None = Field(
45
- default=None,
46
- description='A list of key entities mentioned in the memory, e.g., people, places, organizations, e.g., `["Alice", "Paris", "OpenAI"]`.',
36
+ source: Literal["conversation", "retrieved", "web", "file"] | None = Field(
37
+ default=None, description="The origin of the memory"
47
38
  )
48
39
  tags: list[str] | None = Field(
49
40
  default=None,
@@ -59,23 +50,6 @@ class TextualMemoryMetadata(BaseModel):
59
50
 
60
51
  model_config = ConfigDict(extra="allow")
61
52
 
62
- @field_validator("memory_time")
63
- @classmethod
64
- def validate_memory_time(cls, v):
65
- try:
66
- if v:
67
- datetime.strptime(v, "%Y-%m-%d")
68
- except ValueError as e:
69
- raise ValueError("Invalid date format. Use YYYY-MM-DD.") from e
70
- return v
71
-
72
- @field_validator("confidence")
73
- @classmethod
74
- def validate_confidence(cls, v):
75
- if v is not None and (v < 0 or v > 100):
76
- raise ValueError("Confidence must be between 0 and 100.")
77
- return v
78
-
79
53
  def __str__(self) -> str:
80
54
  """Pretty string representation of the metadata."""
81
55
  meta = self.model_dump(exclude_none=True)
@@ -85,10 +59,9 @@ class TextualMemoryMetadata(BaseModel):
85
59
  class TreeNodeTextualMemoryMetadata(TextualMemoryMetadata):
86
60
  """Extended metadata for structured memory, layered retrieval, and lifecycle tracking."""
87
61
 
88
- memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory"] = Field(
62
+ memory_type: Literal["WorkingMemory", "LongTermMemory", "UserMemory", "OuterMemory"] = Field(
89
63
  default="WorkingMemory", description="Memory lifecycle type."
90
64
  )
91
- key: str | None = Field(default=None, description="Memory key or title.")
92
65
  sources: list[str] | None = Field(
93
66
  default=None, description="Multiple origins of the memory (e.g., URLs, notes)."
94
67
  )
@@ -148,7 +121,6 @@ class TextualMemoryItem(BaseModel):
148
121
 
149
122
  model_config = ConfigDict(extra="forbid")
150
123
 
151
- @field_validator("id")
152
124
  @classmethod
153
125
  def validate_id(cls, v):
154
126
  try:
@@ -117,13 +117,19 @@ class TreeTextMemory(BaseTextMemory):
117
117
  logger.warning(
118
118
  "Internet retriever is init by config , but this search set manual_close_internet is True and will close it"
119
119
  )
120
- self.internet_retriever = None
121
- searcher = Searcher(
122
- self.dispatcher_llm,
123
- self.graph_store,
124
- self.embedder,
125
- internet_retriever=self.internet_retriever,
126
- )
120
+ searcher = Searcher(
121
+ self.dispatcher_llm,
122
+ self.graph_store,
123
+ self.embedder,
124
+ internet_retriever=None,
125
+ )
126
+ else:
127
+ searcher = Searcher(
128
+ self.dispatcher_llm,
129
+ self.graph_store,
130
+ self.embedder,
131
+ internet_retriever=self.internet_retriever,
132
+ )
127
133
  return searcher.search(query, top_k, info, mode, memory_type)
128
134
 
129
135
  def get_relevant_subgraph(
@@ -3,6 +3,8 @@ import re
3
3
 
4
4
  from datetime import datetime
5
5
 
6
+ from dateutil import parser
7
+
6
8
  from memos.embedders.base import BaseEmbedder
7
9
  from memos.graph_dbs.neo4j import Neo4jGraphDB
8
10
  from memos.llms.base import BaseLLM
@@ -133,8 +135,8 @@ class ConflictHandler:
133
135
  """
134
136
  Hard update: compare updated_at, keep the newer one, overwrite the older one's metadata.
135
137
  """
136
- time_a = datetime.fromisoformat(memory_a.metadata.updated_at)
137
- time_b = datetime.fromisoformat(memory_b.metadata.updated_at)
138
+ time_a = parser.isoparse(memory_a.metadata.updated_at)
139
+ time_b = parser.isoparse(memory_b.metadata.updated_at)
138
140
 
139
141
  newer_mem = memory_a if time_a >= time_b else memory_b
140
142
  older_mem = memory_b if time_a >= time_b else memory_a
@@ -1,4 +1,5 @@
1
1
  import json
2
+ import traceback
2
3
 
3
4
  from memos.embedders.factory import OllamaEmbedder
4
5
  from memos.graph_dbs.item import GraphDBNode
@@ -30,53 +31,57 @@ class RelationAndReasoningDetector:
30
31
  3) Sequence links
31
32
  4) Aggregate concepts
32
33
  """
33
- if node.metadata.type == "reasoning":
34
- logger.info(f"Skip reasoning for inferred node {node.id}")
35
- return {
36
- "relations": [],
37
- "inferred_nodes": [],
38
- "sequence_links": [],
39
- "aggregate_nodes": [],
40
- }
41
-
42
34
  results = {
43
35
  "relations": [],
44
36
  "inferred_nodes": [],
45
37
  "sequence_links": [],
46
38
  "aggregate_nodes": [],
47
39
  }
40
+ try:
41
+ if node.metadata.type == "reasoning":
42
+ logger.info(f"Skip reasoning for inferred node {node.id}")
43
+ return {
44
+ "relations": [],
45
+ "inferred_nodes": [],
46
+ "sequence_links": [],
47
+ "aggregate_nodes": [],
48
+ }
49
+
50
+ nearest = self.graph_store.get_neighbors_by_tag(
51
+ tags=node.metadata.tags,
52
+ exclude_ids=exclude_ids,
53
+ top_k=top_k,
54
+ min_overlap=2,
55
+ )
56
+ nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
57
+
58
+ """
59
+ # 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
60
+ pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
61
+ results["relations"].extend(pairwise["relations"])
62
+ """
63
+
64
+ """
65
+ # 2) Inferred nodes (from causal/condition)
66
+ inferred = self._infer_fact_nodes_from_relations(pairwise)
67
+ results["inferred_nodes"].extend(inferred)
68
+ """
69
+
70
+ """
71
+ 3) Sequence (optional, if you have timestamps)
72
+ seq = self._detect_sequence_links(node, nearest)
73
+ results["sequence_links"].extend(seq)
74
+ """
75
+
76
+ # 4) Aggregate
77
+ agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=5)
78
+ if agg:
79
+ results["aggregate_nodes"].append(agg)
48
80
 
49
- nearest = self.graph_store.get_neighbors_by_tag(
50
- tags=node.metadata.tags,
51
- exclude_ids=exclude_ids,
52
- top_k=top_k,
53
- min_overlap=2,
54
- )
55
- nearest = [GraphDBNode(**cand_data) for cand_data in nearest]
56
-
57
- """
58
- # 1) Pairwise relations (including CAUSE/CONDITION/CONFLICT)
59
- pairwise = self._detect_pairwise_causal_condition_relations(node, nearest)
60
- results["relations"].extend(pairwise["relations"])
61
- """
62
-
63
- """
64
- # 2) Inferred nodes (from causal/condition)
65
- inferred = self._infer_fact_nodes_from_relations(pairwise)
66
- results["inferred_nodes"].extend(inferred)
67
- """
68
-
69
- """
70
- 3) Sequence (optional, if you have timestamps)
71
- seq = self._detect_sequence_links(node, nearest)
72
- results["sequence_links"].extend(seq)
73
- """
74
-
75
- # 4) Aggregate
76
- agg = self._detect_aggregate_node_for_group(node, nearest, min_group_size=5)
77
- if agg:
78
- results["aggregate_nodes"].append(agg)
79
-
81
+ except Exception as e:
82
+ logger.error(
83
+ f"Error {e} while process struct reorganize: trace: {traceback.format_exc()}"
84
+ )
80
85
  return results
81
86
 
82
87
  def _detect_pairwise_causal_condition_relations(
@@ -176,10 +181,9 @@ class RelationAndReasoningDetector:
176
181
  joined = "\n".join(f"- {n.memory}" for n in combined_nodes)
177
182
  prompt = AGGREGATE_PROMPT.replace("{joined}", joined)
178
183
  response_text = self._call_llm(prompt)
179
- response_json = self._parse_json_result(response_text)
180
- if not response_json:
184
+ summary = self._parse_json_result(response_text)
185
+ if not summary:
181
186
  return None
182
- summary = json.loads(response_text)
183
187
  embedding = self.embedder.embed([summary["value"]])[0]
184
188
 
185
189
  parent_node = GraphDBNode(
@@ -125,8 +125,8 @@ class GraphStructureReorganizer:
125
125
  """
126
126
  import schedule
127
127
 
128
- schedule.every(20).seconds.do(self.optimize_structure, scope="LongTermMemory")
129
- schedule.every(20).seconds.do(self.optimize_structure, scope="UserMemory")
128
+ schedule.every(600).seconds.do(self.optimize_structure, scope="LongTermMemory")
129
+ schedule.every(600).seconds.do(self.optimize_structure, scope="UserMemory")
130
130
 
131
131
  logger.info("Structure optimizer schedule started.")
132
132
  while not getattr(self, "_stop_scheduler", False):
@@ -198,7 +198,7 @@ class GraphStructureReorganizer:
198
198
  logger.info(f"Already optimizing for {scope}. Skipping.")
199
199
  return
200
200
 
201
- if self.graph_store.count_nodes(scope) == 0:
201
+ if self.graph_store.node_not_exist(scope):
202
202
  logger.debug(f"[GraphStructureReorganize] No nodes for scope={scope}. Skip.")
203
203
  return
204
204
 
@@ -251,7 +251,10 @@ class GraphStructureReorganizer:
251
251
  try:
252
252
  f.result()
253
253
  except Exception as e:
254
- logger.warning(f"[Reorganize] Cluster processing failed: {e}")
254
+ logger.warning(
255
+ f"[Reorganize] Cluster processing "
256
+ f"failed: {e}, trace: {traceback.format_exc()}"
257
+ )
255
258
  logger.info("[GraphStructure Reorganize] Structure optimization finished.")
256
259
 
257
260
  finally:
@@ -343,7 +346,7 @@ class GraphStructureReorganizer:
343
346
  agg_node.metadata.model_dump(exclude_none=True),
344
347
  )
345
348
  for child_id in agg_node.metadata.sources:
346
- self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATES")
349
+ self.graph_store.add_edge(agg_node.id, child_id, "AGGREGATE_TO")
347
350
 
348
351
  logger.info("[Reorganizer] Cluster relation/reasoning done.")
349
352
 
@@ -127,7 +127,7 @@ class InternetGoogleRetriever:
127
127
  self.embedder = embedder
128
128
 
129
129
  def retrieve_from_internet(
130
- self, query: str, top_k: int = 10, parsed_goal=None
130
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
131
131
  ) -> list[TextualMemoryItem]:
132
132
  """
133
133
  Retrieve information from the internet and convert to TextualMemoryItem format
@@ -136,10 +136,13 @@ class InternetGoogleRetriever:
136
136
  query: Search query
137
137
  top_k: Number of results to return
138
138
  parsed_goal: Parsed task goal (optional)
139
+ info (dict): Leave a record of memory consumption.
139
140
 
140
141
  Returns:
141
142
  List of TextualMemoryItem
142
143
  """
144
+ if not info:
145
+ info = {"user_id": "", "session_id": ""}
143
146
  # Get search results
144
147
  search_results = self.google_api.get_all_results(query, max_results=top_k)
145
148
 
@@ -157,8 +160,8 @@ class InternetGoogleRetriever:
157
160
  memory_content = f"Title: {title}\nSummary: {snippet}\nSource: {link}"
158
161
  # Create metadata
159
162
  metadata = TreeNodeTextualMemoryMetadata(
160
- user_id=None,
161
- session_id=None,
163
+ user_id=info.get("user_id", ""),
164
+ session_id=info.get("session_id", ""),
162
165
  status="activated",
163
166
  type="fact", # Internet search results are usually factual information
164
167
  memory_time=datetime.now().strftime("%Y-%m-%d"),
@@ -4,6 +4,7 @@ from typing import Any, ClassVar
4
4
 
5
5
  from memos.configs.internet_retriever import InternetRetrieverConfigFactory
6
6
  from memos.embedders.base import BaseEmbedder
7
+ from memos.mem_reader.factory import MemReaderFactory
7
8
  from memos.memories.textual.tree_text_memory.retrieve.internet_retriever import (
8
9
  InternetGoogleRetriever,
9
10
  )
@@ -66,6 +67,7 @@ class InternetRetrieverFactory:
66
67
  access_key=config.api_key, # Use api_key as access_key for xinyu
67
68
  search_engine_id=config.search_engine_id,
68
69
  embedder=embedder,
70
+ reader=MemReaderFactory.from_config(config.reader),
69
71
  max_results=config.max_results,
70
72
  )
71
73
  else:
@@ -10,4 +10,6 @@ class ParsedTaskGoal:
10
10
  memories: list[str] = field(default_factory=list)
11
11
  keys: list[str] = field(default_factory=list)
12
12
  tags: list[str] = field(default_factory=list)
13
+ rephrased_query: str | None = None
14
+ internet_search: bool = False
13
15
  goal_type: str | None = None # e.g., 'default', 'explanation', etc.
@@ -6,6 +6,7 @@ from datetime import datetime
6
6
  from memos.embedders.factory import OllamaEmbedder
7
7
  from memos.graph_dbs.factory import Neo4jGraphDB
8
8
  from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
9
+ from memos.log import get_logger
9
10
  from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
10
11
 
11
12
  from .internet_retriever_factory import InternetRetrieverFactory
@@ -15,6 +16,9 @@ from .reranker import MemoryReranker
15
16
  from .task_goal_parser import TaskGoalParser
16
17
 
17
18
 
19
+ logger = get_logger(__name__)
20
+
21
+
18
22
  class Searcher:
19
23
  def __init__(
20
24
  self,
@@ -53,7 +57,12 @@ class Searcher:
53
57
  Returns:
54
58
  list[TextualMemoryItem]: List of matching memories.
55
59
  """
56
-
60
+ if not info:
61
+ logger.warning(
62
+ "Please input 'info' when use tree.search so that "
63
+ "the database would store the consume history."
64
+ )
65
+ info = {"user_id": "", "session_id": ""}
57
66
  # Step 1: Parse task structure into topic, concept, and fact levels
58
67
  context = []
59
68
  if mode == "fine":
@@ -67,7 +76,18 @@ class Searcher:
67
76
  context = list(set(context))
68
77
 
69
78
  # Step 1a: Parse task structure into topic, concept, and fact levels
70
- parsed_goal = self.task_goal_parser.parse(query, "\n".join(context))
79
+ parsed_goal = self.task_goal_parser.parse(
80
+ task_description=query,
81
+ context="\n".join(context),
82
+ conversation=info.get("chat_history", []),
83
+ mode=mode,
84
+ )
85
+
86
+ query = (
87
+ parsed_goal.rephrased_query
88
+ if parsed_goal.rephrased_query and len(parsed_goal.rephrased_query) > 0
89
+ else query
90
+ )
71
91
 
72
92
  if parsed_goal.memories:
73
93
  query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
@@ -136,12 +156,12 @@ class Searcher:
136
156
  """
137
157
  Retrieve information from the internet using Google Custom Search API.
138
158
  """
139
- if not self.internet_retriever:
159
+ if not self.internet_retriever or mode == "fast" or not parsed_goal.internet_search:
140
160
  return []
141
161
  if memory_type not in ["All"]:
142
162
  return []
143
163
  internet_items = self.internet_retriever.retrieve_from_internet(
144
- query=query, top_k=top_k, parsed_goal=parsed_goal
164
+ query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
145
165
  )
146
166
 
147
167
  # Convert to the format expected by reranker
@@ -149,21 +169,30 @@ class Searcher:
149
169
  query=query,
150
170
  query_embedding=query_embedding[0],
151
171
  graph_results=internet_items,
152
- top_k=top_k * 2,
172
+ top_k=min(top_k, 5),
153
173
  parsed_goal=parsed_goal,
154
174
  )
155
175
  return ranked_memories
156
176
 
157
- # Step 3: Parallel execution of all paths
158
- with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
159
- future_working = executor.submit(retrieve_from_working_memory)
160
- future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
161
- future_internet = executor.submit(retrieve_from_internet)
162
-
163
- working_results = future_working.result()
164
- hybrid_results = future_hybrid.result()
165
- internet_results = future_internet.result()
166
- searched_res = working_results + hybrid_results + internet_results
177
+ # Step 3: Parallel execution of all paths (enable internet search accoeding to parameter in the parsed goal)
178
+ if parsed_goal.internet_search:
179
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
180
+ future_working = executor.submit(retrieve_from_working_memory)
181
+ future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
182
+ future_internet = executor.submit(retrieve_from_internet)
183
+
184
+ working_results = future_working.result()
185
+ hybrid_results = future_hybrid.result()
186
+ internet_results = future_internet.result()
187
+ searched_res = working_results + hybrid_results + internet_results
188
+ else:
189
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
190
+ future_working = executor.submit(retrieve_from_working_memory)
191
+ future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
192
+
193
+ working_results = future_working.result()
194
+ hybrid_results = future_hybrid.result()
195
+ searched_res = working_results + hybrid_results
167
196
 
168
197
  # Deduplicate by item.memory, keep higher score
169
198
  deduped_result = {}
@@ -184,16 +213,10 @@ class Searcher:
184
213
  TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
185
214
  )
186
215
 
187
- # Step 4: Reasoning over all retrieved and ranked memory
188
- if mode == "fine":
189
- searched_res = self.reasoner.reason(
190
- query=query,
191
- ranked_memories=searched_res,
192
- parsed_goal=parsed_goal,
193
- )
194
-
195
216
  # Step 5: Update usage history with current timestamp
196
217
  now_time = datetime.now().isoformat()
218
+ if "chat_history" in info:
219
+ info.pop("chat_history")
197
220
  usage_record = json.dumps(
198
221
  {"time": now_time, "info": info}
199
222
  ) # `info` should be a serializable dict or string