MemoryOS 0.0.1__py3-none-any.whl → 0.1.13__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (124) hide show
  1. memoryos-0.1.13.dist-info/METADATA +288 -0
  2. memoryos-0.1.13.dist-info/RECORD +122 -0
  3. memos/__init__.py +20 -1
  4. memos/api/start_api.py +420 -0
  5. memos/chunkers/__init__.py +4 -0
  6. memos/chunkers/base.py +24 -0
  7. memos/chunkers/factory.py +22 -0
  8. memos/chunkers/sentence_chunker.py +35 -0
  9. memos/configs/__init__.py +0 -0
  10. memos/configs/base.py +82 -0
  11. memos/configs/chunker.py +45 -0
  12. memos/configs/embedder.py +53 -0
  13. memos/configs/graph_db.py +45 -0
  14. memos/configs/internet_retriever.py +81 -0
  15. memos/configs/llm.py +71 -0
  16. memos/configs/mem_chat.py +81 -0
  17. memos/configs/mem_cube.py +89 -0
  18. memos/configs/mem_os.py +74 -0
  19. memos/configs/mem_reader.py +53 -0
  20. memos/configs/mem_scheduler.py +78 -0
  21. memos/configs/memory.py +195 -0
  22. memos/configs/parser.py +38 -0
  23. memos/configs/utils.py +8 -0
  24. memos/configs/vec_db.py +64 -0
  25. memos/deprecation.py +262 -0
  26. memos/embedders/__init__.py +0 -0
  27. memos/embedders/base.py +15 -0
  28. memos/embedders/factory.py +23 -0
  29. memos/embedders/ollama.py +74 -0
  30. memos/embedders/sentence_transformer.py +40 -0
  31. memos/exceptions.py +30 -0
  32. memos/graph_dbs/__init__.py +0 -0
  33. memos/graph_dbs/base.py +215 -0
  34. memos/graph_dbs/factory.py +21 -0
  35. memos/graph_dbs/neo4j.py +827 -0
  36. memos/hello_world.py +97 -0
  37. memos/llms/__init__.py +0 -0
  38. memos/llms/base.py +16 -0
  39. memos/llms/factory.py +25 -0
  40. memos/llms/hf.py +231 -0
  41. memos/llms/ollama.py +82 -0
  42. memos/llms/openai.py +34 -0
  43. memos/llms/utils.py +14 -0
  44. memos/log.py +78 -0
  45. memos/mem_chat/__init__.py +0 -0
  46. memos/mem_chat/base.py +30 -0
  47. memos/mem_chat/factory.py +21 -0
  48. memos/mem_chat/simple.py +200 -0
  49. memos/mem_cube/__init__.py +0 -0
  50. memos/mem_cube/base.py +29 -0
  51. memos/mem_cube/general.py +146 -0
  52. memos/mem_cube/utils.py +24 -0
  53. memos/mem_os/client.py +5 -0
  54. memos/mem_os/core.py +819 -0
  55. memos/mem_os/main.py +503 -0
  56. memos/mem_os/product.py +89 -0
  57. memos/mem_reader/__init__.py +0 -0
  58. memos/mem_reader/base.py +27 -0
  59. memos/mem_reader/factory.py +21 -0
  60. memos/mem_reader/memory.py +298 -0
  61. memos/mem_reader/simple_struct.py +241 -0
  62. memos/mem_scheduler/__init__.py +0 -0
  63. memos/mem_scheduler/base_scheduler.py +164 -0
  64. memos/mem_scheduler/general_scheduler.py +305 -0
  65. memos/mem_scheduler/modules/__init__.py +0 -0
  66. memos/mem_scheduler/modules/base.py +74 -0
  67. memos/mem_scheduler/modules/dispatcher.py +103 -0
  68. memos/mem_scheduler/modules/monitor.py +82 -0
  69. memos/mem_scheduler/modules/redis_service.py +146 -0
  70. memos/mem_scheduler/modules/retriever.py +41 -0
  71. memos/mem_scheduler/modules/schemas.py +146 -0
  72. memos/mem_scheduler/scheduler_factory.py +21 -0
  73. memos/mem_scheduler/utils.py +26 -0
  74. memos/mem_user/user_manager.py +488 -0
  75. memos/memories/__init__.py +0 -0
  76. memos/memories/activation/__init__.py +0 -0
  77. memos/memories/activation/base.py +42 -0
  78. memos/memories/activation/item.py +25 -0
  79. memos/memories/activation/kv.py +232 -0
  80. memos/memories/base.py +19 -0
  81. memos/memories/factory.py +34 -0
  82. memos/memories/parametric/__init__.py +0 -0
  83. memos/memories/parametric/base.py +19 -0
  84. memos/memories/parametric/item.py +11 -0
  85. memos/memories/parametric/lora.py +41 -0
  86. memos/memories/textual/__init__.py +0 -0
  87. memos/memories/textual/base.py +89 -0
  88. memos/memories/textual/general.py +286 -0
  89. memos/memories/textual/item.py +167 -0
  90. memos/memories/textual/naive.py +185 -0
  91. memos/memories/textual/tree.py +321 -0
  92. memos/memories/textual/tree_text_memory/__init__.py +0 -0
  93. memos/memories/textual/tree_text_memory/organize/__init__.py +0 -0
  94. memos/memories/textual/tree_text_memory/organize/manager.py +305 -0
  95. memos/memories/textual/tree_text_memory/retrieve/__init__.py +0 -0
  96. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +263 -0
  97. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +89 -0
  98. memos/memories/textual/tree_text_memory/retrieve/reasoner.py +61 -0
  99. memos/memories/textual/tree_text_memory/retrieve/recall.py +158 -0
  100. memos/memories/textual/tree_text_memory/retrieve/reranker.py +111 -0
  101. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +13 -0
  102. memos/memories/textual/tree_text_memory/retrieve/searcher.py +208 -0
  103. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +68 -0
  104. memos/memories/textual/tree_text_memory/retrieve/utils.py +48 -0
  105. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +335 -0
  106. memos/parsers/__init__.py +0 -0
  107. memos/parsers/base.py +15 -0
  108. memos/parsers/factory.py +19 -0
  109. memos/parsers/markitdown.py +22 -0
  110. memos/settings.py +8 -0
  111. memos/templates/__init__.py +0 -0
  112. memos/templates/mem_reader_prompts.py +98 -0
  113. memos/templates/mem_scheduler_prompts.py +65 -0
  114. memos/templates/mos_prompts.py +63 -0
  115. memos/types.py +55 -0
  116. memos/vec_dbs/__init__.py +0 -0
  117. memos/vec_dbs/base.py +105 -0
  118. memos/vec_dbs/factory.py +21 -0
  119. memos/vec_dbs/item.py +43 -0
  120. memos/vec_dbs/qdrant.py +292 -0
  121. memoryos-0.0.1.dist-info/METADATA +0 -53
  122. memoryos-0.0.1.dist-info/RECORD +0 -5
  123. {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/LICENSE +0 -0
  124. {memoryos-0.0.1.dist-info → memoryos-0.1.13.dist-info}/WHEEL +0 -0
@@ -0,0 +1,208 @@
1
+ import concurrent.futures
2
+ import json
3
+
4
+ from datetime import datetime
5
+
6
+ from memos.embedders.factory import OllamaEmbedder
7
+ from memos.graph_dbs.factory import Neo4jGraphDB
8
+ from memos.llms.factory import OllamaLLM, OpenAILLM
9
+ from memos.memories.textual.item import SearchedTreeNodeTextualMemoryMetadata, TextualMemoryItem
10
+
11
+ from .internet_retriever_factory import InternetRetrieverFactory
12
+ from .reasoner import MemoryReasoner
13
+ from .recall import GraphMemoryRetriever
14
+ from .reranker import MemoryReranker
15
+ from .task_goal_parser import TaskGoalParser
16
+
17
+
18
+ class Searcher:
19
+ def __init__(
20
+ self,
21
+ dispatcher_llm: OpenAILLM | OllamaLLM,
22
+ graph_store: Neo4jGraphDB,
23
+ embedder: OllamaEmbedder,
24
+ internet_retriever: InternetRetrieverFactory | None = None,
25
+ ):
26
+ self.graph_store = graph_store
27
+ self.embedder = embedder
28
+
29
+ self.task_goal_parser = TaskGoalParser(dispatcher_llm)
30
+ self.graph_retriever = GraphMemoryRetriever(self.graph_store, self.embedder)
31
+ self.reranker = MemoryReranker(dispatcher_llm, self.embedder)
32
+ self.reasoner = MemoryReasoner(dispatcher_llm)
33
+
34
+ # Create internet retriever from config if provided
35
+ self.internet_retriever = internet_retriever
36
+
37
+ def search(
38
+ self, query: str, top_k: int, info=None, mode: str = "fast", memory_type: str = "All"
39
+ ) -> list[TextualMemoryItem]:
40
+ """
41
+ Search for memories based on a query.
42
+ User query -> TaskGoalParser -> GraphMemoryRetriever ->
43
+ MemoryReranker -> MemoryReasoner -> Final output
44
+ Args:
45
+ query (str): The query to search for.
46
+ top_k (int): The number of top results to return.
47
+ info (dict): Leave a record of memory consumption.
48
+ mode (str, optional): The mode of the search.
49
+ - 'fast': Uses a faster search process, sacrificing some precision for speed.
50
+ - 'fine': Uses a more detailed search process, invoking large models for higher precision, but slower performance.
51
+ memory_type (str): Type restriction for search.
52
+ ['All', 'WorkingMemory', 'LongTermMemory', 'UserMemory']
53
+ Returns:
54
+ list[TextualMemoryItem]: List of matching memories.
55
+ """
56
+
57
+ # Step 1: Parse task structure into topic, concept, and fact levels
58
+ context = []
59
+ if mode == "fine":
60
+ query_embedding = self.embedder.embed([query])[0]
61
+ related_node_ids = self.graph_store.search_by_embedding(query_embedding, top_k=top_k)
62
+ related_nodes = [
63
+ self.graph_store.get_node(related_node["id"]) for related_node in related_node_ids
64
+ ]
65
+
66
+ context = [related_node["memory"] for related_node in related_nodes]
67
+ context = list(set(context))
68
+
69
+ # Step 1a: Parse task structure into topic, concept, and fact levels
70
+ parsed_goal = self.task_goal_parser.parse(query, "\n".join(context))
71
+
72
+ if parsed_goal.memories:
73
+ query_embedding = self.embedder.embed(list({query, *parsed_goal.memories}))
74
+
75
+ # Step 2a: Working memory retrieval (Path A)
76
+ def retrieve_from_working_memory():
77
+ """
78
+ Direct structure-based retrieval from working memory.
79
+ """
80
+ if memory_type not in ["All", "WorkingMemory"]:
81
+ return []
82
+
83
+ working_memory = self.graph_retriever.retrieve(
84
+ query=query, parsed_goal=parsed_goal, top_k=top_k, memory_scope="WorkingMemory"
85
+ )
86
+ # Rerank working_memory results
87
+ ranked_memories = self.reranker.rerank(
88
+ query=query,
89
+ query_embedding=query_embedding[0],
90
+ graph_results=working_memory,
91
+ top_k=top_k,
92
+ parsed_goal=parsed_goal,
93
+ )
94
+ return ranked_memories
95
+
96
+ # Step 2b: Parallel long-term and user memory retrieval (Path B)
97
+ def retrieve_ranked_long_term_and_user():
98
+ """
99
+ Retrieve from both long-term and user memory, then rank and merge results.
100
+ """
101
+ long_term_items = (
102
+ self.graph_retriever.retrieve(
103
+ query=query,
104
+ query_embedding=query_embedding,
105
+ parsed_goal=parsed_goal,
106
+ top_k=top_k * 2,
107
+ memory_scope="LongTermMemory",
108
+ )
109
+ if memory_type in ["All", "LongTermMemory"]
110
+ else []
111
+ )
112
+ user_items = (
113
+ self.graph_retriever.retrieve(
114
+ query=query,
115
+ query_embedding=query_embedding,
116
+ parsed_goal=parsed_goal,
117
+ top_k=top_k * 2,
118
+ memory_scope="UserMemory",
119
+ )
120
+ if memory_type in ["All", "UserMemory"]
121
+ else []
122
+ )
123
+
124
+ # Rerank combined results
125
+ ranked_memories = self.reranker.rerank(
126
+ query=query,
127
+ query_embedding=query_embedding[0],
128
+ graph_results=long_term_items + user_items,
129
+ top_k=top_k * 2,
130
+ parsed_goal=parsed_goal,
131
+ )
132
+ return ranked_memories
133
+
134
+ # Step 2c: Internet retrieval (Path C)
135
+ def retrieve_from_internet():
136
+ """
137
+ Retrieve information from the internet using Google Custom Search API.
138
+ """
139
+ if not self.internet_retriever:
140
+ return []
141
+ if memory_type not in ["All"]:
142
+ return []
143
+ internet_items = self.internet_retriever.retrieve_from_internet(
144
+ query=query, top_k=top_k, parsed_goal=parsed_goal
145
+ )
146
+
147
+ # Convert to the format expected by reranker
148
+ ranked_memories = self.reranker.rerank(
149
+ query=query,
150
+ query_embedding=query_embedding[0],
151
+ graph_results=internet_items,
152
+ top_k=top_k * 2,
153
+ parsed_goal=parsed_goal,
154
+ )
155
+ return ranked_memories
156
+
157
+ # Step 3: Parallel execution of all paths
158
+ with concurrent.futures.ThreadPoolExecutor(max_workers=3) as executor:
159
+ future_working = executor.submit(retrieve_from_working_memory)
160
+ future_hybrid = executor.submit(retrieve_ranked_long_term_and_user)
161
+ future_internet = executor.submit(retrieve_from_internet)
162
+
163
+ working_results = future_working.result()
164
+ hybrid_results = future_hybrid.result()
165
+ internet_results = future_internet.result()
166
+ searched_res = working_results + hybrid_results + internet_results
167
+
168
+ # Deduplicate by item.memory, keep higher score
169
+ deduped_result = {}
170
+ for item, score in searched_res:
171
+ mem_key = item.memory
172
+ if mem_key not in deduped_result or score > deduped_result[mem_key][1]:
173
+ deduped_result[mem_key] = (item, score)
174
+
175
+ searched_res = []
176
+ for item, score in sorted(deduped_result.values(), key=lambda pair: pair[1], reverse=True)[
177
+ :top_k
178
+ ]:
179
+ new_meta = SearchedTreeNodeTextualMemoryMetadata(
180
+ **item.metadata.model_dump(), relativity=score
181
+ )
182
+ searched_res.append(
183
+ TextualMemoryItem(id=item.id, memory=item.memory, metadata=new_meta)
184
+ )
185
+
186
+ # Step 4: Reasoning over all retrieved and ranked memory
187
+ if mode == "fine":
188
+ searched_res = self.reasoner.reason(
189
+ query=query,
190
+ ranked_memories=searched_res,
191
+ parsed_goal=parsed_goal,
192
+ )
193
+
194
+ # Step 5: Update usage history with current timestamp
195
+ now_time = datetime.now().isoformat()
196
+ usage_record = json.dumps(
197
+ {"time": now_time, "info": info}
198
+ ) # `info` should be a serializable dict or string
199
+
200
+ for item in searched_res:
201
+ if (
202
+ hasattr(item, "id")
203
+ and hasattr(item, "metadata")
204
+ and hasattr(item.metadata, "usage")
205
+ ):
206
+ item.metadata.usage.append(usage_record)
207
+ self.graph_store.update_node(item.id, {"usage": item.metadata.usage})
208
+ return searched_res
@@ -0,0 +1,68 @@
1
+ import json
2
+
3
+ from string import Template
4
+
5
+ from memos.llms.base import BaseLLM
6
+ from memos.memories.textual.tree_text_memory.retrieve.retrieval_mid_structs import ParsedTaskGoal
7
+ from memos.memories.textual.tree_text_memory.retrieve.utils import TASK_PARSE_PROMPT
8
+
9
+
10
+ class TaskGoalParser:
11
+ """
12
+ Unified TaskGoalParser:
13
+ - mode == 'fast': directly use origin task_description
14
+ - mode == 'fine': use LLM to parse structured topic/keys/tags
15
+ """
16
+
17
+ def __init__(self, llm=BaseLLM, mode: str = "fast"):
18
+ self.llm = llm
19
+ self.mode = mode
20
+
21
+ def parse(self, task_description: str, context: str = "") -> ParsedTaskGoal:
22
+ """
23
+ Parse user input into structured semantic layers.
24
+ Returns:
25
+ ParsedTaskGoal: object containing topic/concept/fact levels and optional metadata
26
+ - mode == 'fast': use jieba to split words only
27
+ - mode == 'fine': use LLM to parse structured topic/keys/tags
28
+ """
29
+ if self.mode == "fast":
30
+ return self._parse_fast(task_description)
31
+ elif self.mode == "fine":
32
+ if not self.llm:
33
+ raise ValueError("LLM not provided for slow mode.")
34
+ return self._parse_fine(task_description, context)
35
+ else:
36
+ raise ValueError(f"Unknown mode: {self.mode}")
37
+
38
+ def _parse_fast(self, task_description: str, limit_num: int = 5) -> ParsedTaskGoal:
39
+ """
40
+ Fast mode: simple jieba word split.
41
+ """
42
+ return ParsedTaskGoal(
43
+ memories=[task_description], keys=[task_description], tags=[], goal_type="default"
44
+ )
45
+
46
+ def _parse_fine(self, query: str, context: str = "") -> ParsedTaskGoal:
47
+ """
48
+ Slow mode: LLM structured parse.
49
+ """
50
+ prompt = Template(TASK_PARSE_PROMPT).substitute(task=query.strip(), context=context)
51
+ response = self.llm.generate(messages=[{"role": "user", "content": prompt}])
52
+ return self._parse_response(response)
53
+
54
+ def _parse_response(self, response: str) -> ParsedTaskGoal:
55
+ """
56
+ Parse LLM JSON output safely.
57
+ """
58
+ try:
59
+ response = response.replace("```", "").replace("json", "")
60
+ response_json = json.loads(response.strip())
61
+ return ParsedTaskGoal(
62
+ memories=response_json.get("memories", []),
63
+ keys=response_json.get("keys", []),
64
+ tags=response_json.get("tags", []),
65
+ goal_type=response_json.get("goal_type", "default"),
66
+ )
67
+ except Exception as e:
68
+ raise ValueError(f"Failed to parse LLM output: {e}\nRaw response:\n{response}") from e
@@ -0,0 +1,48 @@
1
+ # Prompt for task parsing
2
+ TASK_PARSE_PROMPT = """
3
+ You are a task parsing expert. Given a user's task instruction, extract the following structured information:
4
+
5
+ Given a user task instruction and optional related memory context,
6
+ extract the following structured information:
7
+ 1. Keys: the high-level keywords directly relevant to the user’s task.
8
+ 2. Tags: thematic tags to help categorize and retrieve related memories.
9
+ 3. Goal Type: retrieval | qa | generation
10
+ 4. Memories: Provide 2–5 short semantic expansions or rephrasings of the task instruction.
11
+ These are used for improved embedding search coverage.
12
+ Each should be clear, concise, and meaningful for retrieval.
13
+
14
+ Task description:
15
+ \"\"\"$task\"\"\"
16
+
17
+ Context (if any):
18
+ \"\"\"$context\"\"\"
19
+
20
+ Return strictly in this JSON format:
21
+ {
22
+ "keys": [...],
23
+ "tags": [...],
24
+ "goal_type": "retrieval | qa | generation",
25
+ "memories": ["...", "...", ...]
26
+ }
27
+ """
28
+
29
+
30
+ REASON_PROMPT = """
31
+ You are a reasoning agent working with a memory system. You will synthesize knowledge from multiple memory cards to construct a meaningful response to the task below.
32
+
33
+ Task: ${task}
34
+
35
+ Memory cards (with metadata):
36
+ ${detailed_memory_list}
37
+
38
+ Please perform:
39
+ 1. Clustering by theme (topic/concept/fact)
40
+ 2. Identify useful chains or connections
41
+ 3. Return a curated list of memory card IDs with reasons.
42
+
43
+ Output in JSON:
44
+ {
45
+ "selected_ids": [...],
46
+ "explanation": "..."
47
+ }
48
+ """
@@ -0,0 +1,335 @@
1
+ """Xinyu Search API retriever for tree text memory."""
2
+
3
+ import json
4
+ import uuid
5
+
6
+ from datetime import datetime
7
+
8
+ import requests
9
+
10
+ from memos.embedders.factory import OllamaEmbedder
11
+ from memos.log import get_logger
12
+ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemoryMetadata
13
+
14
+
15
+ logger = get_logger(__name__)
16
+
17
+
18
+ class XinyuSearchAPI:
19
+ """Xinyu Search API Client"""
20
+
21
+ def __init__(self, access_key: str, search_engine_id: str, max_results: int = 20):
22
+ """
23
+ Initialize Xinyu Search API client
24
+
25
+ Args:
26
+ access_key: Xinyu API access key
27
+ max_results: Maximum number of results to retrieve
28
+ """
29
+ self.access_key = access_key
30
+ self.max_results = max_results
31
+
32
+ # API configuration
33
+ self.config = {"url": search_engine_id}
34
+
35
+ self.headers = {
36
+ "User-Agent": "PostmanRuntime/7.39.0",
37
+ "Content-Type": "application/json",
38
+ "Accept": "*/*",
39
+ "Accept-Encoding": "gzip, deflate, br",
40
+ "Connection": "keep-alive",
41
+ "token": access_key,
42
+ }
43
+
44
+ def query_detail(self, body: dict | None = None, detail: bool = True) -> list[dict]:
45
+ """
46
+ Query Xinyu search API for detailed results
47
+
48
+ Args:
49
+ body: Search parameters
50
+ detail: Whether to get detailed results
51
+
52
+ Returns:
53
+ List of search results
54
+ """
55
+ res = []
56
+ try:
57
+ url = self.config["url"]
58
+
59
+ params = json.dumps(body)
60
+ resp = requests.request("POST", url, headers=self.headers, data=params)
61
+ res = json.loads(resp.text)["results"]
62
+
63
+ # If detail interface, return online part
64
+ if "search_type" in body:
65
+ res = res["online"]
66
+
67
+ if not detail:
68
+ for res_i in res:
69
+ res_i["summary"] = "「SUMMARY」" + res_i.get("summary", "")
70
+
71
+ except Exception:
72
+ import traceback
73
+
74
+ logger.error(f"xinyu search error: {traceback.format_exc()}")
75
+ return res
76
+
77
+ def search(self, query: str, max_results: int | None = None) -> list[dict]:
78
+ """
79
+ Execute search request
80
+
81
+ Args:
82
+ query: Search query
83
+ max_results: Maximum number of results to return
84
+
85
+ Returns:
86
+ List of search results
87
+ """
88
+ if max_results is None:
89
+ max_results = self.max_results
90
+
91
+ body = {
92
+ "search_type": ["online"],
93
+ "online_search": {
94
+ "max_entries": max_results,
95
+ "cache_switch": False,
96
+ "baidu_field": {"switch": True, "mode": "relevance", "type": "page"},
97
+ "bing_field": {"switch": False, "mode": "relevance", "type": "page_web"},
98
+ "sogou_field": {"switch": False, "mode": "relevance", "type": "page"},
99
+ },
100
+ "request_id": "memos" + str(uuid.uuid4()),
101
+ "queries": query,
102
+ }
103
+
104
+ return self.query_detail(body)
105
+
106
+
107
+ class XinyuSearchRetriever:
108
+ """Xinyu Search retriever that converts search results to TextualMemoryItem format"""
109
+
110
+ def __init__(
111
+ self,
112
+ access_key: str,
113
+ search_engine_id: str,
114
+ embedder: OllamaEmbedder,
115
+ max_results: int = 20,
116
+ ):
117
+ """
118
+ Initialize Xinyu search retriever
119
+
120
+ Args:
121
+ access_key: Xinyu API access key
122
+ embedder: Embedder instance for generating embeddings
123
+ max_results: Maximum number of results to retrieve
124
+ """
125
+ self.xinyu_api = XinyuSearchAPI(access_key, search_engine_id, max_results=max_results)
126
+ self.embedder = embedder
127
+
128
+ def retrieve_from_internet(
129
+ self, query: str, top_k: int = 10, parsed_goal=None
130
+ ) -> list[TextualMemoryItem]:
131
+ """
132
+ Retrieve information from Xinyu search and convert to TextualMemoryItem format
133
+
134
+ Args:
135
+ query: Search query
136
+ top_k: Number of results to return
137
+ parsed_goal: Parsed task goal (optional)
138
+
139
+ Returns:
140
+ List of TextualMemoryItem
141
+ """
142
+ # Get search results
143
+ search_results = self.xinyu_api.search(query, max_results=top_k)
144
+
145
+ # Convert to TextualMemoryItem format
146
+ memory_items = []
147
+
148
+ for _, result in enumerate(search_results):
149
+ # Extract basic information from Xinyu response format
150
+ title = result.get("title", "")
151
+ content = result.get("content", "")
152
+ summary = result.get("summary", "")
153
+ url = result.get("url", "")
154
+ publish_time = result.get("publish_time", "")
155
+ if publish_time:
156
+ try:
157
+ publish_time = datetime.strptime(publish_time, "%Y-%m-%d %H:%M:%S").strftime(
158
+ "%Y-%m-%d"
159
+ )
160
+ except Exception as e:
161
+ logger.error(f"xinyu search error: {e}")
162
+ publish_time = datetime.now().strftime("%Y-%m-%d")
163
+ else:
164
+ publish_time = datetime.now().strftime("%Y-%m-%d")
165
+ source = result.get("source", "")
166
+ site = result.get("site", "")
167
+ if site:
168
+ site = site.split("|")[0]
169
+
170
+ # Combine memory content
171
+ memory_content = (
172
+ f"Title: {title}\nSummary: {summary}\nContent: {content[:200]}...\nSource: {url}"
173
+ )
174
+
175
+ # Create metadata
176
+ metadata = TreeNodeTextualMemoryMetadata(
177
+ user_id=None,
178
+ session_id=None,
179
+ status="activated",
180
+ type="fact", # Search results are usually factual information
181
+ memory_time=publish_time,
182
+ source="web",
183
+ confidence=85.0, # Confidence level for search information
184
+ entities=self._extract_entities(title, content, summary),
185
+ tags=self._extract_tags(title, content, summary, parsed_goal),
186
+ visibility="public",
187
+ memory_type="LongTermMemory", # Search results as working memory
188
+ key=title,
189
+ sources=[url] if url else [],
190
+ embedding=self.embedder.embed([memory_content])[0],
191
+ created_at=datetime.now().isoformat(),
192
+ usage=[],
193
+ background=f"Xinyu search result from {site or source}",
194
+ )
195
+ # Create TextualMemoryItem
196
+ memory_item = TextualMemoryItem(
197
+ id=str(uuid.uuid4()), memory=memory_content, metadata=metadata
198
+ )
199
+
200
+ memory_items.append(memory_item)
201
+
202
+ return memory_items
203
+
204
+ def _extract_entities(self, title: str, content: str, summary: str) -> list[str]:
205
+ """
206
+ Extract entities from title, content and summary
207
+
208
+ Args:
209
+ title: Article title
210
+ content: Article content
211
+ summary: Article summary
212
+
213
+ Returns:
214
+ List of extracted entities
215
+ """
216
+ # Simple entity extraction - can be enhanced with NER
217
+ text = f"{title} {content} {summary}"
218
+ entities = []
219
+
220
+ # Extract potential entities (simple approach)
221
+ # This can be enhanced with proper NER models
222
+ words = text.split()
223
+ for word in words:
224
+ if len(word) > 2 and word[0].isupper():
225
+ entities.append(word)
226
+
227
+ return list(set(entities))[:10] # Limit to 10 entities
228
+
229
+ def _extract_tags(self, title: str, content: str, summary: str, parsed_goal=None) -> list[str]:
230
+ """
231
+ Extract tags from title, content and summary
232
+
233
+ Args:
234
+ title: Article title
235
+ content: Article content
236
+ summary: Article summary
237
+ parsed_goal: Parsed task goal (optional)
238
+
239
+ Returns:
240
+ List of extracted tags
241
+ """
242
+ tags = []
243
+
244
+ # Add source-based tags
245
+ tags.append("xinyu_search")
246
+ tags.append("news")
247
+
248
+ # Add content-based tags
249
+ text = f"{title} {content} {summary}".lower()
250
+
251
+ # Simple keyword-based tagging
252
+ keywords = {
253
+ "economy": [
254
+ "economy",
255
+ "GDP",
256
+ "growth",
257
+ "production",
258
+ "industry",
259
+ "investment",
260
+ "consumption",
261
+ "market",
262
+ "trade",
263
+ "finance",
264
+ ],
265
+ "politics": [
266
+ "politics",
267
+ "government",
268
+ "policy",
269
+ "meeting",
270
+ "leader",
271
+ "election",
272
+ "parliament",
273
+ "ministry",
274
+ ],
275
+ "technology": [
276
+ "technology",
277
+ "tech",
278
+ "innovation",
279
+ "digital",
280
+ "internet",
281
+ "AI",
282
+ "artificial intelligence",
283
+ "software",
284
+ "hardware",
285
+ ],
286
+ "sports": [
287
+ "sports",
288
+ "game",
289
+ "athlete",
290
+ "olympic",
291
+ "championship",
292
+ "tournament",
293
+ "team",
294
+ "player",
295
+ ],
296
+ "culture": [
297
+ "culture",
298
+ "education",
299
+ "art",
300
+ "history",
301
+ "literature",
302
+ "music",
303
+ "film",
304
+ "museum",
305
+ ],
306
+ "health": [
307
+ "health",
308
+ "medical",
309
+ "pandemic",
310
+ "hospital",
311
+ "doctor",
312
+ "medicine",
313
+ "disease",
314
+ "treatment",
315
+ ],
316
+ "environment": [
317
+ "environment",
318
+ "ecology",
319
+ "pollution",
320
+ "green",
321
+ "climate",
322
+ "sustainability",
323
+ "renewable",
324
+ ],
325
+ }
326
+
327
+ for category, words in keywords.items():
328
+ if any(word in text for word in words):
329
+ tags.append(category)
330
+
331
+ # Add goal-based tags if available
332
+ if parsed_goal and hasattr(parsed_goal, "tags"):
333
+ tags.extend(parsed_goal.tags)
334
+
335
+ return list(set(tags))[:15] # Limit to 15 tags
File without changes
memos/parsers/base.py ADDED
@@ -0,0 +1,15 @@
1
+ from abc import ABC, abstractmethod
2
+
3
+ from memos.configs.parser import BaseParserConfig
4
+
5
+
6
+ class BaseParser(ABC):
7
+ """Base class for all parsers."""
8
+
9
+ @abstractmethod
10
+ def __init__(self, config: BaseParserConfig):
11
+ """Initialize the parser with the given configuration."""
12
+
13
+ @abstractmethod
14
+ def parse(self, file_path: str) -> str:
15
+ """Parse the file at the given path and return its content as a string."""
@@ -0,0 +1,19 @@
1
+ from typing import Any, ClassVar
2
+
3
+ from memos.configs.parser import ParserConfigFactory
4
+ from memos.parsers.base import BaseParser
5
+ from memos.parsers.markitdown import MarkItDownParser
6
+
7
+
8
+ class ParserFactory(BaseParser):
9
+ """Factory class for creating Parser instances."""
10
+
11
+ backend_to_class: ClassVar[dict[str, Any]] = {"markitdown": MarkItDownParser}
12
+
13
+ @classmethod
14
+ def from_config(cls, config_factory: ParserConfigFactory) -> BaseParser:
15
+ backend = config_factory.backend
16
+ if backend not in cls.backend_to_class:
17
+ raise ValueError(f"Invalid backend: {backend}")
18
+ parser_class = cls.backend_to_class[backend]
19
+ return parser_class(config_factory.config)