MemoryOS 0.2.0__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (114) hide show
  1. {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/METADATA +67 -26
  2. memoryos-0.2.2.dist-info/RECORD +169 -0
  3. memoryos-0.2.2.dist-info/entry_points.txt +3 -0
  4. memos/__init__.py +1 -1
  5. memos/api/config.py +562 -0
  6. memos/api/context/context.py +147 -0
  7. memos/api/context/dependencies.py +90 -0
  8. memos/api/exceptions.py +28 -0
  9. memos/api/mcp_serve.py +502 -0
  10. memos/api/product_api.py +35 -0
  11. memos/api/product_models.py +163 -0
  12. memos/api/routers/__init__.py +1 -0
  13. memos/api/routers/product_router.py +386 -0
  14. memos/chunkers/sentence_chunker.py +8 -2
  15. memos/cli.py +113 -0
  16. memos/configs/embedder.py +27 -0
  17. memos/configs/graph_db.py +132 -3
  18. memos/configs/internet_retriever.py +6 -0
  19. memos/configs/llm.py +47 -0
  20. memos/configs/mem_cube.py +1 -1
  21. memos/configs/mem_os.py +5 -0
  22. memos/configs/mem_reader.py +9 -0
  23. memos/configs/mem_scheduler.py +107 -7
  24. memos/configs/mem_user.py +58 -0
  25. memos/configs/memory.py +5 -4
  26. memos/dependency.py +52 -0
  27. memos/embedders/ark.py +92 -0
  28. memos/embedders/factory.py +4 -0
  29. memos/embedders/sentence_transformer.py +8 -2
  30. memos/embedders/universal_api.py +32 -0
  31. memos/graph_dbs/base.py +11 -3
  32. memos/graph_dbs/factory.py +4 -0
  33. memos/graph_dbs/nebular.py +1364 -0
  34. memos/graph_dbs/neo4j.py +333 -124
  35. memos/graph_dbs/neo4j_community.py +300 -0
  36. memos/llms/base.py +9 -0
  37. memos/llms/deepseek.py +54 -0
  38. memos/llms/factory.py +10 -1
  39. memos/llms/hf.py +170 -13
  40. memos/llms/hf_singleton.py +114 -0
  41. memos/llms/ollama.py +4 -0
  42. memos/llms/openai.py +67 -1
  43. memos/llms/qwen.py +63 -0
  44. memos/llms/vllm.py +153 -0
  45. memos/log.py +1 -1
  46. memos/mem_cube/general.py +77 -16
  47. memos/mem_cube/utils.py +109 -0
  48. memos/mem_os/core.py +251 -51
  49. memos/mem_os/main.py +94 -12
  50. memos/mem_os/product.py +1220 -43
  51. memos/mem_os/utils/default_config.py +352 -0
  52. memos/mem_os/utils/format_utils.py +1401 -0
  53. memos/mem_reader/simple_struct.py +18 -10
  54. memos/mem_scheduler/base_scheduler.py +441 -40
  55. memos/mem_scheduler/general_scheduler.py +249 -248
  56. memos/mem_scheduler/modules/base.py +14 -5
  57. memos/mem_scheduler/modules/dispatcher.py +67 -4
  58. memos/mem_scheduler/modules/misc.py +104 -0
  59. memos/mem_scheduler/modules/monitor.py +240 -50
  60. memos/mem_scheduler/modules/rabbitmq_service.py +319 -0
  61. memos/mem_scheduler/modules/redis_service.py +32 -22
  62. memos/mem_scheduler/modules/retriever.py +167 -23
  63. memos/mem_scheduler/modules/scheduler_logger.py +255 -0
  64. memos/mem_scheduler/mos_for_test_scheduler.py +140 -0
  65. memos/mem_scheduler/schemas/__init__.py +0 -0
  66. memos/mem_scheduler/schemas/general_schemas.py +43 -0
  67. memos/mem_scheduler/{modules/schemas.py → schemas/message_schemas.py} +63 -61
  68. memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
  69. memos/mem_scheduler/utils/__init__.py +0 -0
  70. memos/mem_scheduler/utils/filter_utils.py +176 -0
  71. memos/mem_scheduler/utils/misc_utils.py +61 -0
  72. memos/mem_user/factory.py +94 -0
  73. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  74. memos/mem_user/mysql_user_manager.py +500 -0
  75. memos/mem_user/persistent_factory.py +96 -0
  76. memos/mem_user/persistent_user_manager.py +260 -0
  77. memos/mem_user/user_manager.py +4 -4
  78. memos/memories/activation/item.py +29 -0
  79. memos/memories/activation/kv.py +10 -3
  80. memos/memories/activation/vllmkv.py +219 -0
  81. memos/memories/factory.py +2 -0
  82. memos/memories/textual/base.py +1 -1
  83. memos/memories/textual/general.py +43 -97
  84. memos/memories/textual/item.py +5 -33
  85. memos/memories/textual/tree.py +22 -12
  86. memos/memories/textual/tree_text_memory/organize/conflict.py +9 -5
  87. memos/memories/textual/tree_text_memory/organize/manager.py +26 -18
  88. memos/memories/textual/tree_text_memory/organize/redundancy.py +25 -44
  89. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +50 -48
  90. memos/memories/textual/tree_text_memory/organize/reorganizer.py +81 -56
  91. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  92. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  93. memos/memories/textual/tree_text_memory/retrieve/recall.py +0 -1
  94. memos/memories/textual/tree_text_memory/retrieve/reranker.py +2 -2
  95. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  96. memos/memories/textual/tree_text_memory/retrieve/searcher.py +52 -28
  97. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
  98. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  99. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  100. memos/memos_tools/dinding_report_bot.py +422 -0
  101. memos/memos_tools/notification_service.py +44 -0
  102. memos/memos_tools/notification_utils.py +96 -0
  103. memos/parsers/markitdown.py +8 -2
  104. memos/settings.py +3 -1
  105. memos/templates/mem_reader_prompts.py +66 -23
  106. memos/templates/mem_scheduler_prompts.py +126 -43
  107. memos/templates/mos_prompts.py +87 -0
  108. memos/templates/tree_reorganize_prompts.py +85 -30
  109. memos/vec_dbs/base.py +12 -0
  110. memos/vec_dbs/qdrant.py +46 -20
  111. memoryos-0.2.0.dist-info/RECORD +0 -128
  112. memos/mem_scheduler/utils.py +0 -26
  113. {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
  114. {memoryos-0.2.0.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
@@ -5,7 +5,8 @@ from datetime import datetime
5
5
 
6
6
  from memos.embedders.factory import OllamaEmbedder
7
7
  from memos.graph_dbs.factory import Neo4jGraphDB
8
- from memos.llms.factory import OllamaLLM, OpenAILLM
8
+ from memos.llms.factory import AzureLLM, OllamaLLM, OpenAILLM
9
+ from memos.log import get_logger
9
10
  from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
10
11
 
11
12
  from .internet_retriever_factory import InternetRetrieverFactory
@@ -15,10 +16,13 @@ from .reranker import MemoryReranker
15
16
  from .task_goal_parser import TaskGoalParser
16
17
 
17
18
 
19
+ logger = get_logger(__name__)
20
+
21
+
18
22
  class Searcher:
19
23
  def __init__(
20
24
  self,
21
- dispatcher_llm: OpenAILLM | OllamaLLM,
25
+ dispatcher_llm: OpenAILLM | OllamaLLM | AzureLLM,
22
26
  graph_store: Neo4jGraphDB,
23
27
  embedder: OllamaEmbedder,
24
28
  internet_retriever: InternetRetrieverFactory | None = None,
@@ -53,7 +57,12 @@ class Searcher:
53
57
  Returns:
54
58
  list[TextualMemoryItem]: List of matching memories.
55
59
  """
56
-
60
+ if not info:
61
+ logger.warning(
62
+ "Please input 'info' when use tree.search so that "
63
+ "the database would store the consume history."
64
+ )
65
+ info = {"user_id": "", "session_id": ""}
57
66
  # Step 1: Parse task structure into topic, concept, and fact levels
58
67
  context = []
59
68
  if mode == "fine":
@@ -67,7 +76,18 @@ class Searcher:
67
76
  context = list(set(context))
68
77
 
69
78
  # Step 1a: Parse task structure into topic, concept, and fact levels
70
- parsed_goal = self.task_goal_parser.parse(query, "\n".join(context))
79
+ parsed_goal = self.task_goal_parser.parse(
80
+ task_description=query,
81
+ context="\n".join(context),
82
+ conversation=info.get("chat_history", []),
83
+ mode=mode,
84
+ )
85
+
86
+ query = (
87
+ parsed_goal.rephrased_query
88
+ if parsed_goal.rephrased_query and len(parsed_goal.rephrased_query) > 0
89
+ else query
90
+ )
71
91
 
72
92
  if parsed_goal.memories:
73
93
  query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
@@ -136,12 +156,12 @@ class Searcher:
136
156
  """
137
157
  Retrieve information from the internet using Google Custom Search API.
138
158
  """
139
- if not self.internet_retriever:
159
+ if not self.internet_retriever or mode == "fast" or not parsed_goal.internet_search:
140
160
  return []
141
161
  if memory_type not in ["All"]:
142
162
  return []
143
163
  internet_items = self.internet_retriever.retrieve_from_internet(
144
- query=query, top_k=top_k, parsed_goal=parsed_goal
164
+ query=query, top_k=top_k, parsed_goal=parsed_goal, info=info
145
165
  )
146
166
 
147
167
  # Convert to the format expected by reranker
@@ -149,21 +169,30 @@ class Searcher:
149
169
  query=query,
150
170
  query_embedding=query_embedding[0],
151
171
  graph_results=internet_items,
152
- top_k=top_k * 2,
172
+ top_k=min(top_k, 5),
153
173
  parsed_goal=parsed_goal,
154
174
  )
155
175
  return ranked_memories
156
176
 
157
- # Step 3: Parallel execution of all paths
158
- with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
159
- future_working = executor.submit(retrieve_from_working_memory)
160
- future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
161
- future_internet = executor.submit(retrieve_from_internet)
162
-
163
- working_results = future_working.result()
164
- hybrid_results = future_hybrid.result()
165
- internet_results = future_internet.result()
166
- searched_res = working_results + hybrid_results + internet_results
177
+ # Step 3: Parallel execution of all paths (enable internet search accoeding to parameter in the parsed goal)
178
+ if parsed_goal.internet_search:
179
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
180
+ future_working = executor.submit(retrieve_from_working_memory)
181
+ future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
182
+ future_internet = executor.submit(retrieve_from_internet)
183
+
184
+ working_results = future_working.result()
185
+ hybrid_results = future_hybrid.result()
186
+ internet_results = future_internet.result()
187
+ searched_res = working_results + hybrid_results + internet_results
188
+ else:
189
+ with concurrent.futures.ThreadPoolExecutor(max_workers=2) as executor:
190
+ future_working = executor.submit(retrieve_from_working_memory)
191
+ future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
192
+
193
+ working_results = future_working.result()
194
+ hybrid_results = future_hybrid.result()
195
+ searched_res = working_results + hybrid_results
167
196
 
168
197
  # Deduplicate by item.memory, keep higher score
169
198
  deduped_result = {}
@@ -176,23 +205,18 @@ class Searcher:
176
205
  for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
177
206
  :top_k
178
207
  ]:
179
- new_meta = SearchedTreeNodeTextualMemoryMetadata(
180
- **item.metadata.model_dump(), relativity=score
181
- )
208
+ meta_data = item.metadata.model_dump()
209
+ if "relativity" not in meta_data:
210
+ meta_data["relativity"] = score
211
+ new_meta = SearchedTreeNodeTextualMemoryMetadata(**meta_data)
182
212
  searched_res.append(
183
213
  TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
184
214
  )
185
215
 
186
- # Step 4: Reasoning over all retrieved and ranked memory
187
- if mode == "fine":
188
- searched_res = self.reasoner.reason(
189
- query=query,
190
- ranked_memories=searched_res,
191
- parsed_goal=parsed_goal,
192
- )
193
-
194
216
  # Step 5: Update usage history with current timestamp
195
217
  now_time = datetime.now().isoformat()
218
+ if "chat_history" in info:
219
+ info.pop("chat_history")
196
220
  usage_record = json.dumps(
197
221
  {"time": now_time, "info": info}
198
222
  ) # `info` should be a serializable dict or string
@@ -1,4 +1,5 @@
1
- import json
1
+ import logging
2
+ import traceback
2
3
 
3
4
  from string import Template
4
5
 
@@ -14,11 +15,16 @@ class TaskGoalParser:
14
15
  - mode == 'fine': use LLM to parse structured topic/keys/tags
15
16
  """
16
17
 
17
- def __init__(self, llm=BaseLLM, mode: str = "fast"):
18
+ def __init__(self, llm=BaseLLM):
18
19
  self.llm = llm
19
- self.mode = mode
20
20
 
21
- def parse(self, task_description: str, context: str = "") -> ParsedTaskGoal:
21
+ def parse(
22
+ self,
23
+ task_description: str,
24
+ context: str = "",
25
+ conversation: list[dict] | None = None,
26
+ mode: str = "fast",
27
+ ) -> ParsedTaskGoal:
22
28
  """
23
29
  Parse user input into structured semantic layers.
24
30
  Returns:
@@ -26,42 +32,63 @@ class TaskGoalParser:
26
32
  - mode == 'fast': use jieba to split words only
27
33
  - mode == 'fine': use LLM to parse structured topic/keys/tags
28
34
  """
29
- if self.mode == "fast":
35
+ if mode == "fast":
30
36
  return self._parse_fast(task_description)
31
- elif self.mode == "fine":
37
+ elif mode == "fine":
32
38
  if not self.llm:
33
39
  raise ValueError("LLM not provided for slow mode.")
34
- return self._parse_fine(task_description, context)
40
+ return self._parse_fine(task_description, context, conversation)
35
41
  else:
36
- raise ValueError(f"Unknown mode: {self.mode}")
42
+ raise ValueError(f"Unknown mode: {mode}")
37
43
 
38
44
  def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal:
39
45
  """
40
46
  Fast mode: simple jieba word split.
41
47
  """
42
48
  return ParsedTaskGoal(
43
- memories=[task_description], keys=[task_description], tags=[], goal_type="default"
49
+ memories=[task_description],
50
+ keys=[task_description],
51
+ tags=[],
52
+ goal_type="default",
53
+ rephrased_query=task_description,
54
+ internet_search=False,
44
55
  )
45
56
 
46
- def _parse_fine(self, query: str, context: str = "") -> ParsedTaskGoal:
57
+ def _parse_fine(
58
+ self, query: str, context: str = "", conversation: list[dict] | None = None
59
+ ) -> ParsedTaskGoal:
47
60
  """
48
61
  Slow mode: LLM structured parse.
49
62
  """
50
- prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context=context)
51
- response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
52
- return self._parse_response(response)
63
+ try:
64
+ if conversation:
65
+ conversation_prompt = "\n".join(
66
+ [f"{each['role']}: {each['content']}" for each in conversation]
67
+ )
68
+ else:
69
+ conversation_prompt = ""
70
+ prompt = Template(TASK_PARSE_PROMPT).substitute(
71
+ task=query.strip(), context=context, conversation=conversation_prompt
72
+ )
73
+ response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
74
+ return self._parse_response(response)
75
+ except Exception:
76
+ logging.warning(f"Fail to fine-parse query {query}: {traceback.format_exc()}")
77
+ return self._parse_fast(query)
53
78
 
54
79
  def _parse_response(self, response: str) -> ParsedTaskGoal:
55
80
  """
56
81
  Parse LLM JSON output safely.
57
82
  """
58
83
  try:
59
- response = response.replace("```", "").replace("json", "")
60
- response_json = json.loads(response.strip())
84
+ response = response.replace("```", "").replace("json", "").strip()
85
+ response_json = eval(response)
61
86
  return ParsedTaskGoal(
62
87
  memories=response_json.get("memories", []),
63
88
  keys=response_json.get("keys", []),
64
89
  tags=response_json.get("tags", []),
90
+ rephrased_query=response_json.get("rephrased_instruction", None),
91
+ internet_search=response_json.get("internet_search", False),
65
92
  goal_type=response_json.get("goal_type", "default"),
66
93
  )
67
94
  except Exception as e:
@@ -1,19 +1,21 @@
1
1
  # Prompt for task parsing
2
2
  TASK_PARSE_PROMPT = """
3
- You are a task parsing expert. Given a user's task instruction, extract the following structured information:
4
-
5
- Given a user task instruction and optional related memory context,
6
- extract the following structured information:
3
+ You are a task parsing expert. Given a user task instruction, optional former conversation and optional related memory context,extract the following structured information:
7
4
  1. Keys: the high-level keywords directly relevant to the user’s task.
8
5
  2. Tags: thematic tags to help categorize and retrieve related memories.
9
6
  3. Goal Type: retrieval | qa | generation
10
- 4. Memories: Provide 2–5 short semantic expansions or rephrasings of the task instruction.
11
- These are used for improved embedding search coverage.
12
- Each should be clear, concise, and meaningful for retrieval.
7
+ 4. Rephrased instruction: Give a rephrased task instruction based on the former conversation to make it less confusing to look alone. If you think the task instruction is easy enough to understand, or there is no former conversation, set "rephrased_instruction" to an empty string.
8
+ 5. Need for internet search: If you think you need to search the internet to finish the rephrased/original user task instruction, set "internet_search" to True. Otherwise, set it to False.
9
+ 6. Memories: Provide 2–5 short semantic expansions or rephrasings of the rephrased/original user task instruction. These are used for improved embedding search coverage. Each should be clear, concise, and meaningful for retrieval.
13
10
 
14
11
  Task description:
15
12
  \"\"\"$task\"\"\"
16
13
 
14
+ Former conversation (if any):
15
+ \"\"\"
16
+ $conversation
17
+ \"\"\"
18
+
17
19
  Context (if any):
18
20
  \"\"\"$context\"\"\"
19
21
 
@@ -22,6 +24,8 @@ Return strictly in this JSON format:
22
24
  "keys": [...],
23
25
  "tags": [...],
24
26
  "goal_type": "retrieval | qa | generation",
27
+ "rephrased_instruction": "...", # return an empty string if the original instruction is easy enough to understand
28
+ "internet_search": True/False,
25
29
  "memories": ["...", "...", ...]
26
30
  }
27
31
  """
@@ -3,13 +3,15 @@
3
3
  import json
4
4
  import uuid
5
5
 
6
+ from concurrent.futures import ThreadPoolExecutor, as_completed
6
7
  from datetime import datetime
7
8
 
8
9
  import requests
9
10
 
10
11
  from memos.embedders.factory import OllamaEmbedder
11
12
  from memos.log import get_logger
12
- from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
13
+ from memos.mem_reader.base import BaseMemReader
14
+ from memos.memories.textual.item import TextualMemoryItem
13
15
 
14
16
 
15
17
  logger = get_logger(__name__)
@@ -93,8 +95,8 @@ class XinyuSearchAPI:
93
95
  "online_search": {
94
96
  "max_entries": max_results,
95
97
  "cache_switch": False,
96
- "baidu_field": {"switch": True, "mode": "relevance", "type": "page"},
97
- "bing_field": {"switch": False, "mode": "relevance", "type": "page_web"},
98
+ "baidu_field": {"switch": False, "mode": "relevance", "type": "page"},
99
+ "bing_field": {"switch": True, "mode": "relevance", "type": "page"},
98
100
  "sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
99
101
  },
100
102
  "request_id": "memos" + str(uuid.uuid4()),
@@ -112,6 +114,7 @@ class XinyuSearchRetriever:
112
114
  access_key: str,
113
115
  search_engine_id: str,
114
116
  embedder: OllamaEmbedder,
117
+ reader: BaseMemReader,
115
118
  max_results: int = 20,
116
119
  ):
117
120
  """
@@ -121,12 +124,14 @@ class XinyuSearchRetriever:
121
124
  access_key: Xinyu API access key
122
125
  embedder: Embedder instance for generating embeddings
123
126
  max_results: Maximum number of results to retrieve
127
+ reader: MemReader Moduel to deal with internet contents
124
128
  """
125
129
  self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
126
130
  self.embedder = embedder
131
+ self.reader = reader
127
132
 
128
133
  def retrieve_from_internet(
129
- self, query: str, top_k: int = 10, parsed_goal=None
134
+ self, query: str, top_k: int = 10, parsed_goal=None, info=None
130
135
  ) -> list[TextualMemoryItem]:
131
136
  """
132
137
  Retrieve information from Xinyu search and convert to TextualMemoryItem format
@@ -135,7 +140,7 @@ class XinyuSearchRetriever:
135
140
  query: Search query
136
141
  top_k: Number of results to return
137
142
  parsed_goal: Parsed task goal (optional)
138
-
143
+ info (dict): Leave a record of memory consumption.
139
144
  Returns:
140
145
  List of TextualMemoryItem
141
146
  """
@@ -143,63 +148,25 @@ class XinyuSearchRetriever:
143
148
  search_results = self.xinyu_api.search(query, max_results=top_k)
144
149
 
145
150
  # Convert to TextualMemoryItem format
146
- memory_items = []
147
-
148
- for _, result in enumerate(search_results):
149
- # Extract basic information from Xinyu response format
150
- title = result.get("title", "")
151
- content = result.get("content", "")
152
- summary = result.get("summary", "")
153
- url = result.get("url", "")
154
- publish_time = result.get("publish_time", "")
155
- if publish_time:
151
+ memory_items: list[TextualMemoryItem] = []
152
+
153
+ with ThreadPoolExecutor(max_workers=8) as executor:
154
+ futures = [
155
+ executor.submit(self._process_result, result, query, parsed_goal, info)
156
+ for result in search_results
157
+ ]
158
+ for future in as_completed(futures):
156
159
  try:
157
- publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
158
- "%Y-%m-%d"
159
- )
160
+ memory_items.extend(future.result())
160
161
  except Exception as e:
161
- logger.error(f"xinyu search error: {e}")
162
- publish_time = datetime.now().strftime("%Y-%m-%d")
163
- else:
164
- publish_time = datetime.now().strftime("%Y-%m-%d")
165
- source = result.get("source", "")
166
- site = result.get("site", "")
167
- if site:
168
- site = site.split("|")[0]
169
-
170
- # Combine memory content
171
- memory_content = (
172
- f"Title: {title}\nSummary: {summary}\nContent: {content[:200]}...\nSource: {url}"
173
- )
162
+ logger.error(f"Error processing search result: {e}")
174
163
 
175
- # Create metadata
176
- metadata = TreeNodeTextualMemoryMetadata(
177
- user_id=None,
178
- session_id=None,
179
- status="activated",
180
- type="fact", # Search results are usually factual information
181
- memory_time=publish_time,
182
- source="web",
183
- confidence=85.0, # Confidence level for search information
184
- entities=self._extract_entities(title, content, summary),
185
- tags=self._extract_tags(title, content, summary, parsed_goal),
186
- visibility="public",
187
- memory_type="LongTermMemory", # Search results as working memory
188
- key=title,
189
- sources=[url] if url else [],
190
- embedding=self.embedder.embed([memory_content])[0],
191
- created_at=datetime.now().isoformat(),
192
- usage=[],
193
- background=f"Xinyu search result from {site or source}",
194
- )
195
- # Create TextualMemoryItem
196
- memory_item = TextualMemoryItem(
197
- id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
198
- )
164
+ unique_memory_items = {}
165
+ for item in memory_items:
166
+ if item.memory not in unique_memory_items:
167
+ unique_memory_items[item.memory] = item
199
168
 
200
- memory_items.append(memory_item)
201
-
202
- return memory_items
169
+ return list(unique_memory_items.values())
203
170
 
204
171
  def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
205
172
  """
@@ -333,3 +300,40 @@ class XinyuSearchRetriever:
333
300
  tags.extend(parsed_goal.tags)
334
301
 
335
302
  return list(set(tags))[:15] # Limit to 15 tags
303
+
304
+ def _process_result(
305
+ self, result: dict, query: str, parsed_goal: str, info: None
306
+ ) -> list[TextualMemoryItem]:
307
+ if not info:
308
+ info = {"user_id": "", "session_id": ""}
309
+ title = result.get("title", "")
310
+ content = result.get("content", "")
311
+ summary = result.get("summary", "")
312
+ url = result.get("url", "")
313
+ publish_time = result.get("publish_time", "")
314
+ if publish_time:
315
+ try:
316
+ publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
317
+ "%Y-%m-%d"
318
+ )
319
+ except Exception as e:
320
+ logger.error(f"xinyu search error: {e}")
321
+ publish_time = datetime.now().strftime("%Y-%m-%d")
322
+ else:
323
+ publish_time = datetime.now().strftime("%Y-%m-%d")
324
+
325
+ read_items = self.reader.get_memory([content], type="doc", info=info)
326
+
327
+ memory_items = []
328
+ for read_item_i in read_items[0]:
329
+ read_item_i.memory = (
330
+ f"Title: {title}\nNewsTime: {publish_time}\nSummary: {summary}\n"
331
+ f"Content: {read_item_i.memory}"
332
+ )
333
+ read_item_i.metadata.source = "web"
334
+ read_item_i.metadata.memory_type = "OuterMemory"
335
+ read_item_i.metadata.sources = [url] if url else []
336
+ read_item_i.metadata.visibility = "public"
337
+
338
+ memory_items.append(read_item_i)
339
+ return memory_items