MemoryOS 0.2.2__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (82) hide show
  1. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/METADATA +7 -1
  2. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/RECORD +81 -66
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +31 -8
  5. memos/api/context/context.py +1 -1
  6. memos/api/context/context_thread.py +96 -0
  7. memos/api/middleware/request_context.py +94 -0
  8. memos/api/product_api.py +5 -1
  9. memos/api/product_models.py +16 -0
  10. memos/api/routers/product_router.py +39 -3
  11. memos/api/start_api.py +3 -0
  12. memos/configs/internet_retriever.py +13 -0
  13. memos/configs/mem_scheduler.py +38 -16
  14. memos/configs/memory.py +13 -0
  15. memos/configs/reranker.py +18 -0
  16. memos/graph_dbs/base.py +33 -4
  17. memos/graph_dbs/nebular.py +631 -236
  18. memos/graph_dbs/neo4j.py +18 -7
  19. memos/graph_dbs/neo4j_community.py +6 -3
  20. memos/llms/vllm.py +2 -0
  21. memos/log.py +125 -8
  22. memos/mem_os/core.py +49 -11
  23. memos/mem_os/main.py +1 -1
  24. memos/mem_os/product.py +392 -215
  25. memos/mem_os/utils/default_config.py +1 -1
  26. memos/mem_os/utils/format_utils.py +11 -47
  27. memos/mem_os/utils/reference_utils.py +153 -0
  28. memos/mem_reader/simple_struct.py +112 -43
  29. memos/mem_scheduler/base_scheduler.py +58 -55
  30. memos/mem_scheduler/{modules → general_modules}/base.py +1 -2
  31. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +54 -15
  32. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +4 -4
  33. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  34. memos/mem_scheduler/{modules → general_modules}/retriever.py +19 -5
  35. memos/mem_scheduler/{modules → general_modules}/scheduler_logger.py +10 -4
  36. memos/mem_scheduler/general_scheduler.py +110 -67
  37. memos/mem_scheduler/monitors/__init__.py +0 -0
  38. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  39. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +57 -19
  40. memos/mem_scheduler/mos_for_test_scheduler.py +7 -1
  41. memos/mem_scheduler/schemas/general_schemas.py +3 -2
  42. memos/mem_scheduler/schemas/message_schemas.py +2 -1
  43. memos/mem_scheduler/schemas/monitor_schemas.py +10 -2
  44. memos/mem_scheduler/utils/misc_utils.py +43 -2
  45. memos/mem_user/mysql_user_manager.py +4 -2
  46. memos/memories/activation/item.py +1 -1
  47. memos/memories/activation/kv.py +20 -8
  48. memos/memories/textual/base.py +1 -1
  49. memos/memories/textual/general.py +1 -1
  50. memos/memories/textual/item.py +1 -1
  51. memos/memories/textual/tree.py +31 -1
  52. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +30 -48
  53. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  54. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +2 -0
  55. memos/memories/textual/tree_text_memory/organize/reorganizer.py +102 -140
  56. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +231 -0
  57. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +9 -0
  58. memos/memories/textual/tree_text_memory/retrieve/recall.py +67 -10
  59. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  60. memos/memories/textual/tree_text_memory/retrieve/searcher.py +246 -134
  61. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +7 -2
  62. memos/memories/textual/tree_text_memory/retrieve/utils.py +7 -5
  63. memos/memos_tools/lockfree_dict.py +120 -0
  64. memos/memos_tools/notification_utils.py +46 -0
  65. memos/memos_tools/thread_safe_dict.py +288 -0
  66. memos/reranker/__init__.py +4 -0
  67. memos/reranker/base.py +24 -0
  68. memos/reranker/cosine_local.py +95 -0
  69. memos/reranker/factory.py +43 -0
  70. memos/reranker/http_bge.py +99 -0
  71. memos/reranker/noop.py +16 -0
  72. memos/templates/mem_reader_prompts.py +290 -39
  73. memos/templates/mem_scheduler_prompts.py +23 -10
  74. memos/templates/mos_prompts.py +133 -31
  75. memos/templates/tree_reorganize_prompts.py +24 -17
  76. memos/utils.py +19 -0
  77. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  78. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/LICENSE +0 -0
  79. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/WHEEL +0 -0
  80. {memoryos-0.2.2.dist-info → memoryos-1.0.1.dist-info}/entry_points.txt +0 -0
  81. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
  82. /memos/mem_scheduler/{modules → general_modules}/misc.py +0 -0
@@ -112,7 +112,7 @@ def get_default_config(
112
112
  "thread_pool_max_workers": kwargs.get("scheduler_thread_pool_max_workers", 10),
113
113
  "consume_interval_seconds": kwargs.get("scheduler_consume_interval_seconds", 3),
114
114
  "enable_parallel_dispatch": kwargs.get("scheduler_enable_parallel_dispatch", True),
115
- "enable_act_memory_update": True,
115
+ "enable_activation_memory": True,
116
116
  },
117
117
  }
118
118
 
@@ -570,15 +570,23 @@ def convert_graph_to_tree_forworkmem(
570
570
  else:
571
571
  other_roots.append(root_id)
572
572
 
573
- def build_tree(node_id: str) -> dict[str, Any]:
574
- """Recursively build tree structure"""
573
+ def build_tree(node_id: str, visited=None) -> dict[str, Any] | None:
574
+ """Recursively build tree structure with cycle detection"""
575
+ if visited is None:
576
+ visited = set()
577
+
578
+ if node_id in visited:
579
+ logger.warning(f"[build_tree] Detected cycle at node {node_id}, skipping.")
580
+ return None
581
+ visited.add(node_id)
582
+
575
583
  if node_id not in node_map:
576
584
  return None
577
585
 
578
586
  children_ids = children_map.get(node_id, [])
579
587
  children = []
580
588
  for child_id in children_ids:
581
- child_tree = build_tree(child_id)
589
+ child_tree = build_tree(child_id, visited)
582
590
  if child_tree:
583
591
  children.append(child_tree)
584
592
 
@@ -1355,47 +1363,3 @@ def clean_json_response(response: str) -> str:
1355
1363
  str: Clean JSON string without markdown formatting
1356
1364
  """
1357
1365
  return response.replace("```json", "").replace("```", "").strip()
1358
-
1359
-
1360
- def split_continuous_references(text: str) -> str:
1361
- """
1362
- Split continuous reference tags into individual reference tags.
1363
-
1364
- Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
1365
-
1366
- Only processes text if:
1367
- 1. '[' appears exactly once
1368
- 2. ']' appears exactly once
1369
- 3. Contains commas between '[' and ']'
1370
-
1371
- Args:
1372
- text (str): Text containing reference tags
1373
-
1374
- Returns:
1375
- str: Text with split reference tags, or original text if conditions not met
1376
- """
1377
- # Early return if text is empty
1378
- if not text:
1379
- return text
1380
- # Check if '[' appears exactly once
1381
- if text.count("[") != 1:
1382
- return text
1383
- # Check if ']' appears exactly once
1384
- if text.count("]") != 1:
1385
- return text
1386
- # Find positions of brackets
1387
- open_bracket_pos = text.find("[")
1388
- close_bracket_pos = text.find("]")
1389
-
1390
- # Check if brackets are in correct order
1391
- if open_bracket_pos >= close_bracket_pos:
1392
- return text
1393
- # Extract content between brackets
1394
- content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
1395
- # Check if there's a comma between brackets
1396
- if "," not in content_between_brackets:
1397
- return text
1398
- text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
1399
- text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
1400
-
1401
- return text
@@ -0,0 +1,153 @@
1
+ from memos.memories.textual.item import (
2
+ TextualMemoryItem,
3
+ )
4
+
5
+
6
+ def split_continuous_references(text: str) -> str:
7
+ """
8
+ Split continuous reference tags into individual reference tags.
9
+
10
+ Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
11
+
12
+ Only processes text if:
13
+ 1. '[' appears exactly once
14
+ 2. ']' appears exactly once
15
+ 3. Contains commas between '[' and ']'
16
+
17
+ Args:
18
+ text (str): Text containing reference tags
19
+
20
+ Returns:
21
+ str: Text with split reference tags, or original text if conditions not met
22
+ """
23
+ # Early return if text is empty
24
+ if not text:
25
+ return text
26
+ # Check if '[' appears exactly once
27
+ if text.count("[") != 1:
28
+ return text
29
+ # Check if ']' appears exactly once
30
+ if text.count("]") != 1:
31
+ return text
32
+ # Find positions of brackets
33
+ open_bracket_pos = text.find("[")
34
+ close_bracket_pos = text.find("]")
35
+
36
+ # Check if brackets are in correct order
37
+ if open_bracket_pos >= close_bracket_pos:
38
+ return text
39
+ # Extract content between brackets
40
+ content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
41
+ # Check if there's a comma between brackets
42
+ if "," not in content_between_brackets:
43
+ return text
44
+ text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
45
+ text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
46
+
47
+ return text
48
+
49
+
50
+ def process_streaming_references_complete(text_buffer: str) -> tuple[str, str]:
51
+ """
52
+ Complete streaming reference processing to ensure reference tags are never split.
53
+
54
+ Args:
55
+ text_buffer (str): The accumulated text buffer.
56
+
57
+ Returns:
58
+ tuple[str, str]: (processed_text, remaining_buffer)
59
+ """
60
+ import re
61
+
62
+ # Pattern to match complete reference tags: [refid:memoriesID]
63
+ complete_pattern = r"\[\d+:[^\]]+\]"
64
+
65
+ # Find all complete reference tags
66
+ complete_matches = list(re.finditer(complete_pattern, text_buffer))
67
+
68
+ if complete_matches:
69
+ # Find the last complete tag
70
+ last_match = complete_matches[-1]
71
+ end_pos = last_match.end()
72
+
73
+ # Check if there's any incomplete reference after the last complete one
74
+ remaining_text = text_buffer[end_pos:]
75
+
76
+ # Look for potential incomplete reference patterns after the last complete tag
77
+ incomplete_pattern = r"\[\d*:?[^\]]*$"
78
+ if re.search(incomplete_pattern, remaining_text):
79
+ # There's a potential incomplete reference, find where it starts
80
+ incomplete_match = re.search(incomplete_pattern, remaining_text)
81
+ if incomplete_match:
82
+ incomplete_start = end_pos + incomplete_match.start()
83
+ processed_text = text_buffer[:incomplete_start]
84
+ remaining_buffer = text_buffer[incomplete_start:]
85
+
86
+ # Apply reference splitting to the processed text
87
+ processed_text = split_continuous_references(processed_text)
88
+ return processed_text, remaining_buffer
89
+
90
+ # No incomplete reference after the last complete tag, process all
91
+ processed_text = split_continuous_references(text_buffer)
92
+ return processed_text, ""
93
+
94
+ # Check for incomplete reference tags - be more specific about what constitutes a potential reference
95
+ # Look for opening bracket with number and colon that could be a reference tag
96
+ opening_pattern = r"\[\d+:"
97
+ opening_matches = list(re.finditer(opening_pattern, text_buffer))
98
+
99
+ if opening_matches:
100
+ # Find the last opening tag
101
+ last_opening = opening_matches[-1]
102
+ opening_start = last_opening.start()
103
+
104
+ # Check if this might be a complete reference tag (has closing bracket after the pattern)
105
+ remaining_text = text_buffer[last_opening.end() :]
106
+ if "]" in remaining_text:
107
+ # This looks like a complete reference tag, process it
108
+ processed_text = split_continuous_references(text_buffer)
109
+ return processed_text, ""
110
+ else:
111
+ # Incomplete reference tag, keep it in buffer
112
+ processed_text = text_buffer[:opening_start]
113
+ processed_text = split_continuous_references(processed_text)
114
+ return processed_text, text_buffer[opening_start:]
115
+
116
+ # More sophisticated check for potential reference patterns
117
+ # Only hold back text if we see a pattern that could be the start of a reference tag
118
+ potential_ref_pattern = r"\[\d*:?$" # Matches [, [1, [12:, etc. at end of buffer
119
+ if re.search(potential_ref_pattern, text_buffer):
120
+ # Find the position of the potential reference start
121
+ match = re.search(potential_ref_pattern, text_buffer)
122
+ if match:
123
+ ref_start = match.start()
124
+ processed_text = text_buffer[:ref_start]
125
+ processed_text = split_continuous_references(processed_text)
126
+ return processed_text, text_buffer[ref_start:]
127
+
128
+ # Check for standalone [ only at the very end of the buffer
129
+ # This prevents cutting off mathematical expressions like [ \Delta U = Q - W ]
130
+ if text_buffer.endswith("["):
131
+ # Only hold back the single [ character
132
+ processed_text = text_buffer[:-1]
133
+ processed_text = split_continuous_references(processed_text)
134
+ return processed_text, "["
135
+
136
+ # No reference-like patterns found, process all text
137
+ processed_text = split_continuous_references(text_buffer)
138
+ return processed_text, ""
139
+
140
+
141
+ def prepare_reference_data(memories_list: list[TextualMemoryItem]) -> list[dict]:
142
+ # Prepare reference data
143
+ reference = []
144
+ for memories in memories_list:
145
+ memories_json = memories.model_dump()
146
+ memories_json["metadata"]["ref_id"] = f"{memories.id.split('-')[0]}"
147
+ memories_json["metadata"]["embedding"] = []
148
+ memories_json["metadata"]["sources"] = []
149
+ memories_json["metadata"]["memory"] = memories.memory
150
+ memories_json["metadata"]["id"] = memories.id
151
+ reference.append({"metadata": memories_json["metadata"]})
152
+
153
+ return reference
@@ -1,10 +1,14 @@
1
1
  import concurrent.futures
2
2
  import copy
3
3
  import json
4
+ import os
5
+ import re
4
6
 
5
7
  from abc import ABC
6
8
  from typing import Any
7
9
 
10
+ from tqdm import tqdm
11
+
8
12
  from memos import log
9
13
  from memos.chunkers import ChunkerFactory
10
14
  from memos.configs.mem_reader import SimpleStructMemReaderConfig
@@ -16,12 +20,79 @@ from memos.memories.textual.item import TextualMemoryItem, TreeNodeTextualMemory
16
20
  from memos.parsers.factory import ParserFactory
17
21
  from memos.templates.mem_reader_prompts import (
18
22
  SIMPLE_STRUCT_DOC_READER_PROMPT,
23
+ SIMPLE_STRUCT_DOC_READER_PROMPT_ZH,
19
24
  SIMPLE_STRUCT_MEM_READER_EXAMPLE,
25
+ SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
20
26
  SIMPLE_STRUCT_MEM_READER_PROMPT,
27
+ SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
21
28
  )
22
29
 
23
30
 
24
31
  logger = log.get_logger(__name__)
32
+ PROMPT_DICT = {
33
+ "chat": {
34
+ "en": SIMPLE_STRUCT_MEM_READER_PROMPT,
35
+ "zh": SIMPLE_STRUCT_MEM_READER_PROMPT_ZH,
36
+ "en_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE,
37
+ "zh_example": SIMPLE_STRUCT_MEM_READER_EXAMPLE_ZH,
38
+ },
39
+ "doc": {"en": SIMPLE_STRUCT_DOC_READER_PROMPT, "zh": SIMPLE_STRUCT_DOC_READER_PROMPT_ZH},
40
+ }
41
+
42
+
43
+ def detect_lang(text):
44
+ try:
45
+ if not text or not isinstance(text, str):
46
+ return "en"
47
+ chinese_pattern = r"[\u4e00-\u9fff\u3400-\u4dbf\U00020000-\U0002a6df\U0002a700-\U0002b73f\U0002b740-\U0002b81f\U0002b820-\U0002ceaf\uf900-\ufaff]"
48
+ chinese_chars = re.findall(chinese_pattern, text)
49
+ if len(chinese_chars) / len(re.sub(r"[\s\d\W]", "", text)) > 0.3:
50
+ return "zh"
51
+ return "en"
52
+ except Exception:
53
+ return "en"
54
+
55
+
56
+ def _build_node(idx, message, info, scene_file, llm, parse_json_result, embedder):
57
+ # generate
58
+ raw = llm.generate(message)
59
+ if not raw:
60
+ return None
61
+
62
+ # parse_json_result
63
+ chunk_res = parse_json_result(raw)
64
+ if not chunk_res:
65
+ return None
66
+
67
+ value = chunk_res.get("value")
68
+ if not value:
69
+ return None
70
+
71
+ # embed
72
+ embedding = embedder.embed([value])[0]
73
+
74
+ # TextualMemoryItem
75
+ tags = chunk_res["tags"] if isinstance(chunk_res.get("tags"), list) else []
76
+ key = chunk_res.get("key", None)
77
+
78
+ node_i = TextualMemoryItem(
79
+ memory=value,
80
+ metadata=TreeNodeTextualMemoryMetadata(
81
+ user_id=info.get("user_id"),
82
+ session_id=info.get("session_id"),
83
+ memory_type="LongTermMemory",
84
+ status="activated",
85
+ tags=tags,
86
+ key=key,
87
+ embedding=embedding,
88
+ usage=[],
89
+ sources=[f"{scene_file}_{idx}"],
90
+ background="",
91
+ confidence=0.99,
92
+ type="fact",
93
+ ),
94
+ )
95
+ return node_i
25
96
 
26
97
 
27
98
  class SimpleStructMemReader(BaseMemReader, ABC):
@@ -40,11 +111,13 @@ class SimpleStructMemReader(BaseMemReader, ABC):
40
111
  self.chunker = ChunkerFactory.from_config(config.chunker)
41
112
 
42
113
  def _process_chat_data(self, scene_data_info, info):
43
- prompt = SIMPLE_STRUCT_MEM_READER_PROMPT.replace(
44
- "${conversation}", "\n".join(scene_data_info)
45
- )
114
+ lang = detect_lang("\n".join(scene_data_info))
115
+ template = PROMPT_DICT["chat"][lang]
116
+ examples = PROMPT_DICT["chat"][f"{lang}_example"]
117
+
118
+ prompt = template.replace("${conversation}", "\n".join(scene_data_info))
46
119
  if self.config.remove_prompt_example:
47
- prompt = prompt.replace(SIMPLE_STRUCT_MEM_READER_EXAMPLE, "")
120
+ prompt = prompt.replace(examples, "")
48
121
 
49
122
  messages = [{"role": "user", "content": prompt}]
50
123
 
@@ -180,7 +253,7 @@ class SimpleStructMemReader(BaseMemReader, ABC):
180
253
  elif type == "doc":
181
254
  for item in scene_data:
182
255
  try:
183
- if not isinstance(item, str):
256
+ if os.path.exists(item):
184
257
  parsed_text = parser.parse(item)
185
258
  results.append({"file": "pure_text", "text": parsed_text})
186
259
  else:
@@ -193,46 +266,42 @@ class SimpleStructMemReader(BaseMemReader, ABC):
193
266
 
194
267
  def _process_doc_data(self, scene_data_info, info):
195
268
  chunks = self.chunker.chunk(scene_data_info["text"])
196
- messages = [
197
- [
198
- {
199
- "role": "user",
200
- "content": SIMPLE_STRUCT_DOC_READER_PROMPT.replace("{chunk_text}", chunk.text),
201
- }
202
- ]
203
- for chunk in chunks
204
- ]
205
-
206
- processed_chunks = []
207
- with concurrent.futures.ThreadPoolExecutor(max_workers=5) as executor:
208
- futures = [executor.submit(self.llm.generate, message) for message in messages]
209
- for future in concurrent.futures.as_completed(futures):
210
- chunk_result = future.result()
211
- if chunk_result:
212
- processed_chunks.append(chunk_result)
269
+ messages = []
270
+ for chunk in chunks:
271
+ lang = detect_lang(chunk.text)
272
+ template = PROMPT_DICT["doc"][lang]
273
+ prompt = template.replace("{chunk_text}", chunk.text)
274
+ message = [{"role": "user", "content": prompt}]
275
+ messages.append(message)
213
276
 
214
- processed_chunks = [self.parse_json_result(r) for r in processed_chunks]
215
277
  doc_nodes = []
216
- for i, chunk_res in enumerate(processed_chunks):
217
- if chunk_res:
218
- node_i = TextualMemoryItem(
219
- memory=chunk_res["value"],
220
- metadata=TreeNodeTextualMemoryMetadata(
221
- user_id=info.get("user_id"),
222
- session_id=info.get("session_id"),
223
- memory_type="LongTermMemory",
224
- status="activated",
225
- tags=chunk_res["tags"] if type(chunk_res["tags"]) is list else [],
226
- key=chunk_res["key"],
227
- embedding=self.embedder.embed([chunk_res["value"]])[0],
228
- usage=[],
229
- sources=[f"{scene_data_info['file']}_{i}"],
230
- background="",
231
- confidence=0.99,
232
- type="fact",
233
- ),
234
- )
235
- doc_nodes.append(node_i)
278
+ scene_file = scene_data_info["file"]
279
+
280
+ with concurrent.futures.ThreadPoolExecutor(max_workers=50) as executor:
281
+ futures = {
282
+ executor.submit(
283
+ _build_node,
284
+ idx,
285
+ msg,
286
+ info,
287
+ scene_file,
288
+ self.llm,
289
+ self.parse_json_result,
290
+ self.embedder,
291
+ ): idx
292
+ for idx, msg in enumerate(messages)
293
+ }
294
+ total = len(futures)
295
+
296
+ for future in tqdm(
297
+ concurrent.futures.as_completed(futures), total=total, desc="Processing"
298
+ ):
299
+ try:
300
+ node = future.result()
301
+ if node:
302
+ doc_nodes.append(node)
303
+ except Exception as e:
304
+ tqdm.write(f"[ERROR] {e}")
236
305
  return doc_nodes
237
306
 
238
307
  def parse_json_result(self, response_text):