MemoryOS 1.0.0__py3-none-any.whl → 1.1.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (94) hide show
  1. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/METADATA +8 -2
  2. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/RECORD +92 -69
  3. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/WHEEL +1 -1
  4. memos/__init__.py +1 -1
  5. memos/api/client.py +109 -0
  6. memos/api/config.py +35 -8
  7. memos/api/context/dependencies.py +15 -66
  8. memos/api/middleware/request_context.py +63 -0
  9. memos/api/product_api.py +5 -2
  10. memos/api/product_models.py +107 -16
  11. memos/api/routers/product_router.py +62 -19
  12. memos/api/start_api.py +13 -0
  13. memos/configs/graph_db.py +4 -0
  14. memos/configs/mem_scheduler.py +38 -3
  15. memos/configs/memory.py +13 -0
  16. memos/configs/reranker.py +18 -0
  17. memos/context/context.py +255 -0
  18. memos/embedders/factory.py +2 -0
  19. memos/graph_dbs/base.py +4 -2
  20. memos/graph_dbs/nebular.py +368 -223
  21. memos/graph_dbs/neo4j.py +49 -13
  22. memos/graph_dbs/neo4j_community.py +13 -3
  23. memos/llms/factory.py +2 -0
  24. memos/llms/openai.py +74 -2
  25. memos/llms/vllm.py +2 -0
  26. memos/log.py +128 -4
  27. memos/mem_cube/general.py +3 -1
  28. memos/mem_os/core.py +89 -23
  29. memos/mem_os/main.py +3 -6
  30. memos/mem_os/product.py +418 -154
  31. memos/mem_os/utils/reference_utils.py +20 -0
  32. memos/mem_reader/factory.py +2 -0
  33. memos/mem_reader/simple_struct.py +204 -82
  34. memos/mem_scheduler/analyzer/__init__.py +0 -0
  35. memos/mem_scheduler/analyzer/mos_for_test_scheduler.py +569 -0
  36. memos/mem_scheduler/analyzer/scheduler_for_eval.py +280 -0
  37. memos/mem_scheduler/base_scheduler.py +126 -56
  38. memos/mem_scheduler/general_modules/dispatcher.py +2 -2
  39. memos/mem_scheduler/general_modules/misc.py +99 -1
  40. memos/mem_scheduler/general_modules/scheduler_logger.py +17 -11
  41. memos/mem_scheduler/general_scheduler.py +40 -88
  42. memos/mem_scheduler/memory_manage_modules/__init__.py +5 -0
  43. memos/mem_scheduler/memory_manage_modules/memory_filter.py +308 -0
  44. memos/mem_scheduler/{general_modules → memory_manage_modules}/retriever.py +34 -7
  45. memos/mem_scheduler/monitors/dispatcher_monitor.py +9 -8
  46. memos/mem_scheduler/monitors/general_monitor.py +119 -39
  47. memos/mem_scheduler/optimized_scheduler.py +124 -0
  48. memos/mem_scheduler/orm_modules/__init__.py +0 -0
  49. memos/mem_scheduler/orm_modules/base_model.py +635 -0
  50. memos/mem_scheduler/orm_modules/monitor_models.py +261 -0
  51. memos/mem_scheduler/scheduler_factory.py +2 -0
  52. memos/mem_scheduler/schemas/monitor_schemas.py +96 -29
  53. memos/mem_scheduler/utils/config_utils.py +100 -0
  54. memos/mem_scheduler/utils/db_utils.py +33 -0
  55. memos/mem_scheduler/utils/filter_utils.py +1 -1
  56. memos/mem_scheduler/webservice_modules/__init__.py +0 -0
  57. memos/mem_user/mysql_user_manager.py +4 -2
  58. memos/memories/activation/kv.py +2 -1
  59. memos/memories/textual/item.py +96 -17
  60. memos/memories/textual/naive.py +1 -1
  61. memos/memories/textual/tree.py +57 -3
  62. memos/memories/textual/tree_text_memory/organize/handler.py +4 -2
  63. memos/memories/textual/tree_text_memory/organize/manager.py +28 -14
  64. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +1 -2
  65. memos/memories/textual/tree_text_memory/organize/reorganizer.py +75 -23
  66. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +10 -6
  67. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -2
  68. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  69. memos/memories/textual/tree_text_memory/retrieve/recall.py +119 -21
  70. memos/memories/textual/tree_text_memory/retrieve/searcher.py +172 -44
  71. memos/memories/textual/tree_text_memory/retrieve/utils.py +6 -4
  72. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +5 -4
  73. memos/memos_tools/notification_utils.py +46 -0
  74. memos/memos_tools/singleton.py +174 -0
  75. memos/memos_tools/thread_safe_dict.py +22 -0
  76. memos/memos_tools/thread_safe_dict_segment.py +382 -0
  77. memos/parsers/factory.py +2 -0
  78. memos/reranker/__init__.py +4 -0
  79. memos/reranker/base.py +24 -0
  80. memos/reranker/concat.py +59 -0
  81. memos/reranker/cosine_local.py +96 -0
  82. memos/reranker/factory.py +48 -0
  83. memos/reranker/http_bge.py +312 -0
  84. memos/reranker/noop.py +16 -0
  85. memos/templates/mem_reader_prompts.py +289 -40
  86. memos/templates/mem_scheduler_prompts.py +242 -0
  87. memos/templates/mos_prompts.py +133 -60
  88. memos/types.py +4 -1
  89. memos/api/context/context.py +0 -147
  90. memos/mem_scheduler/mos_for_test_scheduler.py +0 -146
  91. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info}/entry_points.txt +0 -0
  92. {memoryos-1.0.0.dist-info → memoryos-1.1.1.dist-info/licenses}/LICENSE +0 -0
  93. /memos/mem_scheduler/{general_modules → webservice_modules}/rabbitmq_service.py +0 -0
  94. /memos/mem_scheduler/{general_modules → webservice_modules}/redis_service.py +0 -0
@@ -1,3 +1,8 @@
1
+ from memos.memories.textual.item import (
2
+ TextualMemoryItem,
3
+ )
4
+
5
+
1
6
  def split_continuous_references(text: str) -> str:
2
7
  """
3
8
  Split continuous reference tags into individual reference tags.
@@ -131,3 +136,18 @@ def process_streaming_references_complete(text_buffer: str) -> tuple[str, str]:
131
136
  # No reference-like patterns found, process all text
132
137
  processed_text = split_continuous_references(text_buffer)
133
138
  return processed_text, ""
139
+
140
+
141
+ def prepare_reference_data(memories_list: list[TextualMemoryItem]) -> list[dict]:
142
+ # Prepare reference data
143
+ reference = []
144
+ for memories in memories_list:
145
+ memories_json = memories.model_dump()
146
+ memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
147
+ memories_json["metadata"]["embedding"] = []
148
+ memories_json["metadata"]["sources"] = []
149
+ memories_json["metadata"]["memory"] = memories.memory
150
+ memories_json["metadata"]["id"] = memories.id
151
+ reference.append({"metadata": memories_json["metadata"]})
152
+
153
+ return reference
@@ -3,6 +3,7 @@ from typing import Any, ClassVar
3
3
  from memos.configs.mem_reader import MemReaderConfigFactory
4
4
  from memos.mem_reader.base import BaseMemReader
5
5
  from memos.mem_reader.simple_struct import SimpleStructMemReader
6
+ from memos.memos_tools.singleton import singleton_factory
6
7
 
7
8
 
8
9
  class MemReaderFactory(BaseMemReader):
@@ -13,6 +14,7 @@ class MemReaderFactory(BaseMemReader):
13
14
  }
14
15
 
15
16
  @classmethod
17
+ @singleton_factory()
16
18
  def from_config(cls, config_factory: MemReaderConfigFactory) -> BaseMemReader:
17
19
  backend = config_factory.backend
18
20
  if backend not in cls.backend_to_class:
@@ -1,14 +1,19 @@
1
1
  import concurrent.futures
2
2
  import copy
3
3
  import json
4
+ import os
5
+ import re
4
6
 
5
7
  from abc import ABC
6
8
  from typing import Any
7
9
 
10
+ from tqdm import tqdm
11
+
8
12
  from memos import log
9
13
  from memos.chunkers import ChunkerFactory
10
14
  from memos.configs.mem_reader import SimpleStructMemReaderConfig
11
15
  from memos.configs.parser import ParserConfigFactory
16
+ from memos.context.context import ContextThreadPoolExecutor
12
17
  from memos.embedders.factory import EmbedderFactory
13
18
  from memos.llms.factory import LLMFactory
14
19
  from memos.mem_reader.base import BaseMemReader
@@ -16,12 +21,95 @@ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemory
16
21
  from memos.parsers.factory import ParserFactory
17
22
  from memos.templates.mem_reader_prompts import (
18
23
  SIMPLE_STRUCT_DOC_READER_PROMPT,
24
+ SIMPLE_STRUCT_DOC_READER_PROMPT_ZH,
19
25
  SIMPLE_STRUCT_MEM_READER_EXAMPLE,
26
+ SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
20
27
  SIMPLE_STRUCT_MEM_READER_PROMPT,
28
+ SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
21
29
  )
30
+ from memos.utils import timed
22
31
 
23
32
 
24
33
  logger = log.get_logger(__name__)
34
+ PROMPT_DICT = {
35
+ "chat": {
36
+ "en": SIMPLE_STRUCT_MEM_READER_PROMPT,
37
+ "zh": SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
38
+ "en_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE,
39
+ "zh_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
40
+ },
41
+ "doc": {"en": SIMPLE_STRUCT_DOC_READER_PROMPT, "zh": SIMPLE_STRUCT_DOC_READER_PROMPT_ZH},
42
+ }
43
+
44
+
45
+ def detect_lang(text):
46
+ try:
47
+ if not text or not isinstance(text, str):
48
+ return "en"
49
+ chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
50
+ chinese_chars = re.findall(chinese_pattern, text)
51
+ if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3:
52
+ return "zh"
53
+ return "en"
54
+ except Exception:
55
+ return "en"
56
+
57
+
58
+ def _build_node(idx, message, info, scene_file, llm, parse_json_result, embedder):
59
+ # generate
60
+ try:
61
+ raw = llm.generate(message)
62
+ if not raw:
63
+ logger.warning(f"[LLM] Empty generation for input: {message}")
64
+ return None
65
+ except Exception as e:
66
+ logger.error(f"[LLM] Exception during generation: {e}")
67
+ return None
68
+
69
+ # parse_json_result
70
+ try:
71
+ chunk_res = parse_json_result(raw)
72
+ if not chunk_res:
73
+ logger.warning(f"[Parse] Failed to parse result: {raw}")
74
+ return None
75
+ except Exception as e:
76
+ logger.error(f"[Parse] Exception during JSON parsing: {e}")
77
+ return None
78
+
79
+ try:
80
+ value = chunk_res.get("value", "").strip()
81
+ if not value:
82
+ logger.warning("[BuildNode] value is empty")
83
+ return None
84
+
85
+ tags = chunk_res.get("tags", [])
86
+ if not isinstance(tags, list):
87
+ tags = []
88
+
89
+ key = chunk_res.get("key", None)
90
+
91
+ embedding = embedder.embed([value])[0]
92
+
93
+ return TextualMemoryItem(
94
+ memory=value,
95
+ metadata=TreeNodeTextualMemoryMetadata(
96
+ user_id=info.get("user_id", ""),
97
+ session_id=info.get("session_id", ""),
98
+ memory_type="LongTermMemory",
99
+ status="activated",
100
+ tags=tags,
101
+ key=key,
102
+ embedding=embedding,
103
+ usage=[],
104
+ sources=[{"type": "doc", "doc_path": f"{scene_file}_{idx}"}],
105
+ background="",
106
+ confidence=0.99,
107
+ type="fact",
108
+ ),
109
+ )
110
+ except Exception as e:
111
+ logger.error(f"[BuildNode] Error building node: {e}")
112
+ return None
25
113
 
26
114
 
27
115
  class SimpleStructMemReader(BaseMemReader, ABC):
@@ -39,42 +127,77 @@ class SimpleStructMemReader(BaseMemReader, ABC):
39
127
  self.embedder = EmbedderFactory.from_config(config.embedder)
40
128
  self.chunker = ChunkerFactory.from_config(config.chunker)
41
129
 
130
+ @timed
42
131
  def _process_chat_data(self, scene_data_info, info):
43
- prompt = SIMPLE_STRUCT_MEM_READER_PROMPT.replace(
44
- "${conversation}", "\n".join(scene_data_info)
45
- )
132
+ mem_list = []
133
+ for item in scene_data_info:
134
+ if "chat_time" in item:
135
+ mem = item["role"] + ": " + f"[{item['chat_time']}]: " + item["content"]
136
+ mem_list.append(mem)
137
+ else:
138
+ mem = item["role"] + ":" + item["content"]
139
+ mem_list.append(mem)
140
+ lang = detect_lang("\n".join(mem_list))
141
+ template = PROMPT_DICT["chat"][lang]
142
+ examples = PROMPT_DICT["chat"][f"{lang}_example"]
143
+
144
+ prompt = template.replace("${conversation}", "\n".join(mem_list))
46
145
  if self.config.remove_prompt_example:
47
- prompt = prompt.replace(SIMPLE_STRUCT_MEM_READER_EXAMPLE, "")
146
+ prompt = prompt.replace(examples, "")
48
147
 
49
148
  messages = [{"role": "user", "content": prompt}]
50
149
 
51
- response_text = self.llm.generate(messages)
52
- response_json = self.parse_json_result(response_text)
150
+ try:
151
+ response_text = self.llm.generate(messages)
152
+ response_json = self.parse_json_result(response_text)
153
+ except Exception as e:
154
+ logger.error(f"[LLM] Exception during chat generation: {e}")
155
+ response_json = {
156
+ "memory list": [
157
+ {
158
+ "key": "\n".join(mem_list)[:10],
159
+ "memory_type": "UserMemory",
160
+ "value": "\n".join(mem_list),
161
+ "tags": [],
162
+ }
163
+ ],
164
+ "summary": "\n".join(mem_list),
165
+ }
53
166
 
54
167
  chat_read_nodes = []
55
168
  for memory_i_raw in response_json.get("memory list", []):
56
- node_i = TextualMemoryItem(
57
- memory=memory_i_raw.get("value", ""),
58
- metadata=TreeNodeTextualMemoryMetadata(
59
- user_id=info.get("user_id"),
60
- session_id=info.get("session_id"),
61
- memory_type=memory_i_raw.get("memory_type", "")
169
+ try:
170
+ memory_type = (
171
+ memory_i_raw.get("memory_type", "LongTermMemory")
62
172
  .replace("长期记忆", "LongTermMemory")
63
- .replace("用户记忆", "UserMemory"),
64
- status="activated",
65
- tags=memory_i_raw.get("tags", [])
66
- if type(memory_i_raw.get("tags", [])) is list
67
- else [],
68
- key=memory_i_raw.get("key", ""),
69
- embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
70
- usage=[],
71
- sources=scene_data_info,
72
- background=response_json.get("summary", ""),
73
- confidence=0.99,
74
- type="fact",
75
- ),
76
- )
77
- chat_read_nodes.append(node_i)
173
+ .replace("用户记忆", "UserMemory")
174
+ )
175
+
176
+ if memory_type not in ["LongTermMemory", "UserMemory"]:
177
+ memory_type = "LongTermMemory"
178
+
179
+ node_i = TextualMemoryItem(
180
+ memory=memory_i_raw.get("value", ""),
181
+ metadata=TreeNodeTextualMemoryMetadata(
182
+ user_id=info.get("user_id"),
183
+ session_id=info.get("session_id"),
184
+ memory_type=memory_type,
185
+ status="activated",
186
+ tags=memory_i_raw.get("tags", [])
187
+ if type(memory_i_raw.get("tags", [])) is list
188
+ else [],
189
+ key=memory_i_raw.get("key", ""),
190
+ embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
191
+ usage=[],
192
+ sources=scene_data_info,
193
+ background=response_json.get("summary", ""),
194
+ confidence=0.99,
195
+ type="fact",
196
+ ),
197
+ )
198
+ chat_read_nodes.append(node_i)
199
+ except Exception as e:
200
+ logger.error(f"[ChatReader] Error parsing memory item: {e}")
78
201
 
79
202
  return chat_read_nodes
80
203
 
@@ -127,8 +250,8 @@ class SimpleStructMemReader(BaseMemReader, ABC):
127
250
  else:
128
251
  processing_func = self._process_doc_data
129
252
 
130
- # Process Q&A pairs concurrently
131
- with concurrent.futures.ThreadPoolExecutor() as executor:
253
+ # Process Q&A pairs concurrently with context propagation
254
+ with ContextThreadPoolExecutor() as executor:
132
255
  futures = [
133
256
  executor.submit(processing_func, scene_data_info, info)
134
257
  for scene_data_info in list_scene_data_info
@@ -166,11 +289,9 @@ class SimpleStructMemReader(BaseMemReader, ABC):
166
289
  for item in items:
167
290
  # Convert dictionary to string
168
291
  if "chat_time" in item:
169
- mem = item["role"] + ": " + f"[{item['chat_time']}]: " + item["content"]
170
- result.append(mem)
292
+ result.append(item)
171
293
  else:
172
- mem = item["role"] + ":" + item["content"]
173
- result.append(mem)
294
+ result.append(item)
174
295
  if len(result) >= 10:
175
296
  results.append(result)
176
297
  context = copy.deepcopy(result[-2:])
@@ -180,59 +301,60 @@ class SimpleStructMemReader(BaseMemReader, ABC):
180
301
  elif type == "doc":
181
302
  for item in scene_data:
182
303
  try:
183
- if not isinstance(item, str):
184
- parsed_text = parser.parse(item)
185
- results.append({"file": "pure_text", "text": parsed_text})
304
+ if os.path.exists(item):
305
+ try:
306
+ parsed_text = parser.parse(item)
307
+ results.append({"file": item, "text": parsed_text})
308
+ except Exception as e:
309
+ logger.error(f"[SceneParser] Error parsing {item}: {e}")
310
+ continue
186
311
  else:
187
312
  parsed_text = item
188
- results.append({"file": item, "text": parsed_text})
313
+ results.append({"file": "pure_text", "text": parsed_text})
189
314
  except Exception as e:
190
315
  print(f"Error parsing file {item}: {e!s}")
191
316
 
192
317
  return results
193
318
 
194
- def _process_doc_data(self, scene_data_info, info):
319
+ def _process_doc_data(self, scene_data_info, info, **kwargs):
195
320
  chunks = self.chunker.chunk(scene_data_info["text"])
196
- messages = [
197
- [
198
- {
199
- "role": "user",
200
- "content": SIMPLE_STRUCT_DOC_READER_PROMPT.replace("{chunk_text}", chunk.text),
201
- }
202
- ]
203
- for chunk in chunks
204
- ]
321
+ messages = []
322
+ for chunk in chunks:
323
+ lang = detect_lang(chunk.text)
324
+ template = PROMPT_DICT["doc"][lang]
325
+ prompt = template.replace("{chunk_text}", chunk.text)
326
+ message = [{"role": "user", "content": prompt}]
327
+ messages.append(message)
205
328
 
206
- processed_chunks = []
207
- with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
208
- futures = [executor.submit(self.llm.generate, message) for message in messages]
209
- for future in concurrent.futures.as_completed(futures):
210
- chunk_result = future.result()
211
- if chunk_result:
212
- processed_chunks.append(chunk_result)
213
-
214
- processed_chunks = [self.parse_json_result(r) for r in processed_chunks]
215
329
  doc_nodes = []
216
- for i, chunk_res in enumerate(processed_chunks):
217
- if chunk_res:
218
- node_i = TextualMemoryItem(
219
- memory=chunk_res["value"],
220
- metadata=TreeNodeTextualMemoryMetadata(
221
- user_id=info.get("user_id"),
222
- session_id=info.get("session_id"),
223
- memory_type="LongTermMemory",
224
- status="activated",
225
- tags=chunk_res["tags"] if type(chunk_res["tags"]) is list else [],
226
- key=chunk_res["key"],
227
- embedding=self.embedder.embed([chunk_res["value"]])[0],
228
- usage=[],
229
- sources=[f"{scene_data_info['file']}_{i}"],
230
- background="",
231
- confidence=0.99,
232
- type="fact",
233
- ),
234
- )
235
- doc_nodes.append(node_i)
330
+ scene_file = scene_data_info["file"]
331
+
332
+ with ContextThreadPoolExecutor(max_workers=50) as executor:
333
+ futures = {
334
+ executor.submit(
335
+ _build_node,
336
+ idx,
337
+ msg,
338
+ info,
339
+ scene_file,
340
+ self.llm,
341
+ self.parse_json_result,
342
+ self.embedder,
343
+ ): idx
344
+ for idx, msg in enumerate(messages)
345
+ }
346
+ total = len(futures)
347
+
348
+ for future in tqdm(
349
+ concurrent.futures.as_completed(futures), total=total, desc="Processing"
350
+ ):
351
+ try:
352
+ node = future.result()
353
+ if node:
354
+ doc_nodes.append(node)
355
+ except Exception as e:
356
+ tqdm.write(f"[ERROR] {e}")
357
+ logger.error(f"[DocReader] Future task failed: {e}")
236
358
  return doc_nodes
237
359
 
238
360
  def parse_json_result(self, response_text):
@@ -240,14 +362,14 @@ class SimpleStructMemReader(BaseMemReader, ABC):
240
362
  json_start = response_text.find("{")
241
363
  response_text = response_text[json_start:]
242
364
  response_text = response_text.replace("```", "").strip()
243
- if response_text[-1] != "}":
365
+ if not response_text.endswith("}"):
244
366
  response_text += "}"
245
- response_json = json.loads(response_text)
246
- return response_json
367
+ return json.loads(response_text)
247
368
  except json.JSONDecodeError as e:
248
- logger.warning(
249
- f"Failed to parse LLM response as JSON: {e}\nRaw response:\n{response_text}"
250
- )
369
+ logger.error(f"[JSONParse] Failed to decode JSON: {e}\nRaw:\n{response_text}")
370
+ return {}
371
+ except Exception as e:
372
+ logger.error(f"[JSONParse] Unexpected error: {e}")
251
373
  return {}
252
374
 
253
375
  def transform_memreader(self, data: dict) -> list[TextualMemoryItem]:
File without changes