MemoryOS 0.2.2__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (62) hide show
  1. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/METADATA +6 -1
  2. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/RECORD +61 -55
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +6 -8
  5. memos/api/context/context.py +1 -1
  6. memos/api/context/dependencies.py +11 -0
  7. memos/configs/internet_retriever.py +13 -0
  8. memos/configs/mem_scheduler.py +38 -16
  9. memos/graph_dbs/base.py +30 -3
  10. memos/graph_dbs/nebular.py +442 -194
  11. memos/graph_dbs/neo4j.py +14 -5
  12. memos/log.py +5 -0
  13. memos/mem_os/core.py +19 -9
  14. memos/mem_os/main.py +1 -1
  15. memos/mem_os/product.py +6 -69
  16. memos/mem_os/utils/default_config.py +1 -1
  17. memos/mem_os/utils/format_utils.py +11 -47
  18. memos/mem_os/utils/reference_utils.py +133 -0
  19. memos/mem_scheduler/base_scheduler.py +58 -55
  20. memos/mem_scheduler/{modules → general_modules}/base.py +1 -2
  21. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +54 -15
  22. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +4 -4
  23. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  24. memos/mem_scheduler/{modules → general_modules}/retriever.py +19 -5
  25. memos/mem_scheduler/{modules → general_modules}/scheduler_logger.py +10 -4
  26. memos/mem_scheduler/general_scheduler.py +110 -67
  27. memos/mem_scheduler/monitors/__init__.py +0 -0
  28. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  29. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +57 -19
  30. memos/mem_scheduler/mos_for_test_scheduler.py +7 -1
  31. memos/mem_scheduler/schemas/general_schemas.py +3 -2
  32. memos/mem_scheduler/schemas/message_schemas.py +2 -1
  33. memos/mem_scheduler/schemas/monitor_schemas.py +10 -2
  34. memos/mem_scheduler/utils/misc_utils.py +43 -2
  35. memos/memories/activation/item.py +1 -1
  36. memos/memories/activation/kv.py +20 -8
  37. memos/memories/textual/base.py +1 -1
  38. memos/memories/textual/general.py +1 -1
  39. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +30 -48
  40. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  41. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +2 -0
  42. memos/memories/textual/tree_text_memory/organize/reorganizer.py +102 -140
  43. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  44. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +9 -0
  45. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  46. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  47. memos/memories/textual/tree_text_memory/retrieve/searcher.py +177 -125
  48. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +7 -2
  49. memos/memories/textual/tree_text_memory/retrieve/utils.py +1 -1
  50. memos/memos_tools/lockfree_dict.py +120 -0
  51. memos/memos_tools/thread_safe_dict.py +288 -0
  52. memos/templates/mem_reader_prompts.py +2 -0
  53. memos/templates/mem_scheduler_prompts.py +23 -10
  54. memos/templates/mos_prompts.py +40 -11
  55. memos/templates/tree_reorganize_prompts.py +24 -17
  56. memos/utils.py +19 -0
  57. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  58. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  59. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  60. {memoryos-0.2.2.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  61. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
  62. /memos/mem_scheduler/{modules → general_modules}/misc.py +0 -0
memos/graph_dbs/neo4j.py CHANGED
@@ -323,14 +323,16 @@ class Neo4jGraphDB(BaseGraphDB):
323
323
  return result.single() is not None
324
324
 
325
325
  # Graph Query & Reasoning
326
- def get_node(self, id: str) -> dict[str, Any] | None:
326
+ def get_node(self, id: str, include_embedding: bool = True) -> dict[str, Any] | None:
327
327
  """
328
328
  Retrieve the metadata and memory of a node.
329
329
  Args:
330
330
  id: Node identifier.
331
+ include_embedding (bool): Whether to include the large embedding field.
331
332
  Returns:
332
333
  Dictionary of node fields, or None if not found.
333
334
  """
335
+
334
336
  where_user = ""
335
337
  params = {"id": id}
336
338
  if not self.config.use_multi_db and self.config.user_name:
@@ -343,11 +345,12 @@ class Neo4jGraphDB(BaseGraphDB):
343
345
  record = session.run(query, params).single()
344
346
  return self._parse_node(dict(record["n"])) if record else None
345
347
 
346
- def get_nodes(self, ids: list[str]) -> list[dict[str, Any]]:
348
+ def get_nodes(self, ids: list[str], include_embedding: bool = True) -> list[dict[str, Any]]:
347
349
  """
348
350
  Retrieve the metadata and memory of a list of nodes.
349
351
  Args:
350
352
  ids: List of Node identifier.
353
+ include_embedding (bool): Whether to include the large embedding field.
351
354
  Returns:
352
355
  list[dict]: Parsed node records containing 'id', 'memory', and 'metadata'.
353
356
 
@@ -355,6 +358,7 @@ class Neo4jGraphDB(BaseGraphDB):
355
358
  - Assumes all provided IDs are valid and exist.
356
359
  - Returns empty list if input is empty.
357
360
  """
361
+
358
362
  if not ids:
359
363
  return []
360
364
 
@@ -829,7 +833,7 @@ class Neo4jGraphDB(BaseGraphDB):
829
833
  logger.error(f"[ERROR] Failed to clear database '{self.db_name}': {e}")
830
834
  raise
831
835
 
832
- def export_graph(self) -> dict[str, Any]:
836
+ def export_graph(self, include_embedding: bool = True) -> dict[str, Any]:
833
837
  """
834
838
  Export all graph nodes and edges in a structured form.
835
839
 
@@ -910,12 +914,14 @@ class Neo4jGraphDB(BaseGraphDB):
910
914
  target_id=edge["target"],
911
915
  )
912
916
 
913
- def get_all_memory_items(self, scope: str) -> list[dict]:
917
+ def get_all_memory_items(self, scope: str, include_embedding: bool = True) -> list[dict]:
914
918
  """
915
919
  Retrieve all memory items of a specific memory_type.
916
920
 
917
921
  Args:
918
922
  scope (str): Must be one of 'WorkingMemory', 'LongTermMemory', or 'UserMemory'.
923
+ include_embedding (bool): Whether to include the large embedding field.
924
+ Returns:
919
925
 
920
926
  Returns:
921
927
  list[dict]: Full list of memory items under this scope.
@@ -940,12 +946,15 @@ class Neo4jGraphDB(BaseGraphDB):
940
946
  results = session.run(query, params)
941
947
  return [self._parse_node(dict(record["n"])) for record in results]
942
948
 
943
- def get_structure_optimization_candidates(self, scope: str) -> list[dict]:
949
+ def get_structure_optimization_candidates(
950
+ self, scope: str, include_embedding: bool = True
951
+ ) -> list[dict]:
944
952
  """
945
953
  Find nodes that are likely candidates for structure optimization:
946
954
  - Isolated nodes, nodes with empty background, or nodes with exactly one child.
947
955
  - Plus: the child of any parent node that has exactly one child.
948
956
  """
957
+
949
958
  where_clause = """
950
959
  WHERE n.memory_type = $scope
951
960
  AND n.status = 'activated'
memos/log.py CHANGED
@@ -4,9 +4,14 @@ from logging.config import dictConfig
4
4
  from pathlib import Path
5
5
  from sys import stdout
6
6
 
7
+ from dotenv import load_dotenv
8
+
7
9
  from memos import settings
8
10
 
9
11
 
12
+ # Load environment variables
13
+ load_dotenv()
14
+
10
15
  selected_log_level = logging.DEBUG if settings.DEBUG else logging.WARNING
11
16
 
12
17
 
memos/mem_os/core.py CHANGED
@@ -24,6 +24,7 @@ from memos.mem_user.user_manager import UserManager, UserRole
24
24
  from memos.memories.activation.item import ActivationMemoryItem
25
25
  from memos.memories.parametric.item import ParametricMemoryItem
26
26
  from memos.memories.textual.item import TextualMemoryItem, TextualMemoryMetadata
27
+ from memos.memos_tools.thread_safe_dict import ThreadSafeDict
27
28
  from memos.templates.mos_prompts import QUERY_REWRITING_PROMPT
28
29
  from memos.types import ChatHistory, MessageList, MOSSearchResult
29
30
 
@@ -42,10 +43,13 @@ class MOSCore:
42
43
  self.config = config
43
44
  self.user_id = config.user_id
44
45
  self.session_id = config.session_id
45
- self.mem_cubes: dict[str, GeneralMemCube] = {}
46
46
  self.chat_llm = LLMFactory.from_config(config.chat_model)
47
47
  self.mem_reader = MemReaderFactory.from_config(config.mem_reader)
48
48
  self.chat_history_manager: dict[str, ChatHistory] = {}
49
+ # use thread safe dict for multi-user product-server scenario
50
+ self.mem_cubes: ThreadSafeDict[str, GeneralMemCube] = (
51
+ ThreadSafeDict() if user_manager is not None else {}
52
+ )
49
53
  self._register_chat_history()
50
54
 
51
55
  # Use provided user_manager or create a new one
@@ -124,7 +128,7 @@ class MOSCore:
124
128
  chat_llm=self.chat_llm, process_llm=self.chat_llm
125
129
  )
126
130
  else:
127
- # Configure scheduler modules
131
+ # Configure scheduler general_modules
128
132
  self._mem_scheduler.initialize_modules(
129
133
  chat_llm=self.chat_llm, process_llm=self.mem_reader.llm
130
134
  )
@@ -185,7 +189,7 @@ class MOSCore:
185
189
  self.chat_history_manager[user_id] = ChatHistory(
186
190
  user_id=user_id,
187
191
  session_id=self.session_id,
188
- created_at=datetime.now(),
192
+ created_at=datetime.utcnow(),
189
193
  total_messages=0,
190
194
  chat_history=[],
191
195
  )
@@ -279,7 +283,7 @@ class MOSCore:
279
283
  mem_cube=mem_cube,
280
284
  label=QUERY_LABEL,
281
285
  content=query,
282
- timestamp=datetime.now(),
286
+ timestamp=datetime.utcnow(),
283
287
  )
284
288
  self.mem_scheduler.submit_messages(messages=[message_item])
285
289
 
@@ -338,7 +342,7 @@ class MOSCore:
338
342
  mem_cube=mem_cube,
339
343
  label=ANSWER_LABEL,
340
344
  content=response,
341
- timestamp=datetime.now(),
345
+ timestamp=datetime.utcnow(),
342
346
  )
343
347
  self.mem_scheduler.submit_messages(messages=[message_item])
344
348
 
@@ -575,7 +579,13 @@ class MOSCore:
575
579
  }
576
580
  if install_cube_ids is None:
577
581
  install_cube_ids = user_cube_ids
578
- for mem_cube_id, mem_cube in self.mem_cubes.items():
582
+ # create exist dict in mem_cubes and avoid one search slow
583
+ tmp_mem_cubes = {}
584
+ for mem_cube_id in install_cube_ids:
585
+ if mem_cube_id in self.mem_cubes:
586
+ tmp_mem_cubes[mem_cube_id] = self.mem_cubes.get(mem_cube_id)
587
+
588
+ for mem_cube_id, mem_cube in tmp_mem_cubes.items():
579
589
  if (
580
590
  (mem_cube_id in install_cube_ids)
581
591
  and (mem_cube.text_mem is not None)
@@ -681,7 +691,7 @@ class MOSCore:
681
691
  mem_cube=mem_cube,
682
692
  label=ADD_LABEL,
683
693
  content=json.dumps(mem_ids),
684
- timestamp=datetime.now(),
694
+ timestamp=datetime.utcnow(),
685
695
  )
686
696
  self.mem_scheduler.submit_messages(messages=[message_item])
687
697
 
@@ -725,7 +735,7 @@ class MOSCore:
725
735
  mem_cube=mem_cube,
726
736
  label=ADD_LABEL,
727
737
  content=json.dumps(mem_ids),
728
- timestamp=datetime.now(),
738
+ timestamp=datetime.utcnow(),
729
739
  )
730
740
  self.mem_scheduler.submit_messages(messages=[message_item])
731
741
 
@@ -756,7 +766,7 @@ class MOSCore:
756
766
  mem_cube=mem_cube,
757
767
  label=ADD_LABEL,
758
768
  content=json.dumps(mem_ids),
759
- timestamp=datetime.now(),
769
+ timestamp=datetime.utcnow(),
760
770
  )
761
771
  self.mem_scheduler.submit_messages(messages=[message_item])
762
772
 
memos/mem_os/main.py CHANGED
@@ -219,7 +219,7 @@ class MOS(MOSCore):
219
219
  mem_cube=mem_cube,
220
220
  label=ANSWER_LABEL,
221
221
  content=enhanced_response,
222
- timestamp=datetime.now(),
222
+ timestamp=datetime.now().isoformat(),
223
223
  )
224
224
  self.mem_scheduler.submit_messages(messages=[message_item])
225
225
 
memos/mem_os/product.py CHANGED
@@ -22,7 +22,9 @@ from memos.mem_os.utils.format_utils import (
22
22
  filter_nodes_by_tree_ids,
23
23
  remove_embedding_recursive,
24
24
  sort_children_by_memory_type,
25
- split_continuous_references,
25
+ )
26
+ from memos.mem_os.utils.reference_utils import (
27
+ process_streaming_references_complete,
26
28
  )
27
29
  from memos.mem_scheduler.schemas.general_schemas import (
28
30
  ANSWER_LABEL,
@@ -406,71 +408,6 @@ class MOSProduct(MOSCore):
406
408
  return MEMOS_PRODUCT_ENHANCE_PROMPT + personal_memory_context + outer_memory_context
407
409
  return MEMOS_PRODUCT_ENHANCE_PROMPT
408
410
 
409
- def _process_streaming_references_complete(self, text_buffer: str) -> tuple[str, str]:
410
- """
411
- Complete streaming reference processing to ensure reference tags are never split.
412
-
413
- Args:
414
- text_buffer (str): The accumulated text buffer.
415
-
416
- Returns:
417
- tuple[str, str]: (processed_text, remaining_buffer)
418
- """
419
- import re
420
-
421
- # Pattern to match complete reference tags: [refid:memoriesID]
422
- complete_pattern = r"\[\d+:[^\]]+\]"
423
-
424
- # Find all complete reference tags
425
- complete_matches = list(re.finditer(complete_pattern, text_buffer))
426
-
427
- if complete_matches:
428
- # Find the last complete tag
429
- last_match = complete_matches[-1]
430
- end_pos = last_match.end()
431
-
432
- # Get text up to the end of the last complete tag
433
- processed_text = text_buffer[:end_pos]
434
- remaining_buffer = text_buffer[end_pos:]
435
-
436
- # Apply reference splitting to the processed text
437
- processed_text = split_continuous_references(processed_text)
438
-
439
- return processed_text, remaining_buffer
440
-
441
- # Check for incomplete reference tags
442
- # Look for opening bracket with number and colon
443
- opening_pattern = r"\[\d+:"
444
- opening_matches = list(re.finditer(opening_pattern, text_buffer))
445
-
446
- if opening_matches:
447
- # Find the last opening tag
448
- last_opening = opening_matches[-1]
449
- opening_start = last_opening.start()
450
-
451
- # Check if we have a complete opening pattern
452
- if last_opening.end() <= len(text_buffer):
453
- # We have a complete opening pattern, keep everything in buffer
454
- return "", text_buffer
455
- else:
456
- # Incomplete opening pattern, return text before it
457
- processed_text = text_buffer[:opening_start]
458
- # Apply reference splitting to the processed text
459
- processed_text = split_continuous_references(processed_text)
460
- return processed_text, text_buffer[opening_start:]
461
-
462
- # Check for partial opening pattern (starts with [ but not complete)
463
- if "[" in text_buffer:
464
- ref_start = text_buffer.find("[")
465
- processed_text = text_buffer[:ref_start]
466
- # Apply reference splitting to the processed text
467
- processed_text = split_continuous_references(processed_text)
468
- return processed_text, text_buffer[ref_start:]
469
-
470
- # No reference tags found, apply reference splitting and return all text
471
- processed_text = split_continuous_references(text_buffer)
472
- return processed_text, ""
473
-
474
411
  def _extract_references_from_response(self, response: str) -> tuple[str, list[dict]]:
475
412
  """
476
413
  Extract reference information from the response and return clean text.
@@ -554,7 +491,7 @@ class MOSProduct(MOSCore):
554
491
  mem_cube=self.mem_cubes[mem_cube_id],
555
492
  label=label,
556
493
  content=query,
557
- timestamp=datetime.now(),
494
+ timestamp=datetime.utcnow(),
558
495
  )
559
496
  self.mem_scheduler.submit_messages(messages=[message_item])
560
497
 
@@ -868,7 +805,7 @@ class MOSProduct(MOSCore):
868
805
  full_response += chunk
869
806
 
870
807
  # Process buffer to ensure complete reference tags
871
- processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
808
+ processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
872
809
 
873
810
  if processed_chunk:
874
811
  chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
@@ -877,7 +814,7 @@ class MOSProduct(MOSCore):
877
814
 
878
815
  # Process any remaining buffer
879
816
  if buffer:
880
- processed_chunk, remaining_buffer = self._process_streaming_references_complete(buffer)
817
+ processed_chunk, remaining_buffer = process_streaming_references_complete(buffer)
881
818
  if processed_chunk:
882
819
  chunk_data = f"data: {json.dumps({'type': 'text', 'data': processed_chunk}, ensure_ascii=False)}\n\n"
883
820
  yield chunk_data
@@ -112,7 +112,7 @@ def get_default_config(
112
112
  "thread_pool_max_workers": kwargs.get("scheduler_thread_pool_max_workers", 10),
113
113
  "consume_interval_seconds": kwargs.get("scheduler_consume_interval_seconds", 3),
114
114
  "enable_parallel_dispatch": kwargs.get("scheduler_enable_parallel_dispatch", True),
115
- "enable_act_memory_update": True,
115
+ "enable_activation_memory": True,
116
116
  },
117
117
  }
118
118
 
@@ -570,15 +570,23 @@ def convert_graph_to_tree_forworkmem(
570
570
  else:
571
571
  other_roots.append(root_id)
572
572
 
573
- def build_tree(node_id: str) -> dict[str, Any]:
574
- """Recursively build tree structure"""
573
+ def build_tree(node_id: str, visited=None) -> dict[str, Any] | None:
574
+ """Recursively build tree structure with cycle detection"""
575
+ if visited is None:
576
+ visited = set()
577
+
578
+ if node_id in visited:
579
+ logger.warning(f"[build_tree] Detected cycle at node {node_id}, skipping.")
580
+ return None
581
+ visited.add(node_id)
582
+
575
583
  if node_id not in node_map:
576
584
  return None
577
585
 
578
586
  children_ids = children_map.get(node_id, [])
579
587
  children = []
580
588
  for child_id in children_ids:
581
- child_tree = build_tree(child_id)
589
+ child_tree = build_tree(child_id, visited)
582
590
  if child_tree:
583
591
  children.append(child_tree)
584
592
 
@@ -1355,47 +1363,3 @@ def clean_json_response(response: str) -> str:
1355
1363
  str: Clean JSON string without markdown formatting
1356
1364
  """
1357
1365
  return response.replace("```json", "").replace("```", "").strip()
1358
-
1359
-
1360
- def split_continuous_references(text: str) -> str:
1361
- """
1362
- Split continuous reference tags into individual reference tags.
1363
-
1364
- Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
1365
-
1366
- Only processes text if:
1367
- 1. '[' appears exactly once
1368
- 2. ']' appears exactly once
1369
- 3. Contains commas between '[' and ']'
1370
-
1371
- Args:
1372
- text (str): Text containing reference tags
1373
-
1374
- Returns:
1375
- str: Text with split reference tags, or original text if conditions not met
1376
- """
1377
- # Early return if text is empty
1378
- if not text:
1379
- return text
1380
- # Check if '[' appears exactly once
1381
- if text.count("[") != 1:
1382
- return text
1383
- # Check if ']' appears exactly once
1384
- if text.count("]") != 1:
1385
- return text
1386
- # Find positions of brackets
1387
- open_bracket_pos = text.find("[")
1388
- close_bracket_pos = text.find("]")
1389
-
1390
- # Check if brackets are in correct order
1391
- if open_bracket_pos >= close_bracket_pos:
1392
- return text
1393
- # Extract content between brackets
1394
- content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
1395
- # Check if there's a comma between brackets
1396
- if "," not in content_between_brackets:
1397
- return text
1398
- text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
1399
- text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
1400
-
1401
- return text
@@ -0,0 +1,133 @@
1
+ def split_continuous_references(text: str) -> str:
2
+ """
3
+ Split continuous reference tags into individual reference tags.
4
+
5
+ Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
6
+
7
+ Only processes text if:
8
+ 1. '[' appears exactly once
9
+ 2. ']' appears exactly once
10
+ 3. Contains commas between '[' and ']'
11
+
12
+ Args:
13
+ text (str): Text containing reference tags
14
+
15
+ Returns:
16
+ str: Text with split reference tags, or original text if conditions not met
17
+ """
18
+ # Early return if text is empty
19
+ if not text:
20
+ return text
21
+ # Check if '[' appears exactly once
22
+ if text.count("[") != 1:
23
+ return text
24
+ # Check if ']' appears exactly once
25
+ if text.count("]") != 1:
26
+ return text
27
+ # Find positions of brackets
28
+ open_bracket_pos = text.find("[")
29
+ close_bracket_pos = text.find("]")
30
+
31
+ # Check if brackets are in correct order
32
+ if open_bracket_pos >= close_bracket_pos:
33
+ return text
34
+ # Extract content between brackets
35
+ content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
36
+ # Check if there's a comma between brackets
37
+ if "," not in content_between_brackets:
38
+ return text
39
+ text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
40
+ text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
41
+
42
+ return text
43
+
44
+
45
+ def process_streaming_references_complete(text_buffer: str) -> tuple[str, str]:
46
+ """
47
+ Complete streaming reference processing to ensure reference tags are never split.
48
+
49
+ Args:
50
+ text_buffer (str): The accumulated text buffer.
51
+
52
+ Returns:
53
+ tuple[str, str]: (processed_text, remaining_buffer)
54
+ """
55
+ import re
56
+
57
+ # Pattern to match complete reference tags: [refid:memoriesID]
58
+ complete_pattern = r"\[\d+:[^\]]+\]"
59
+
60
+ # Find all complete reference tags
61
+ complete_matches = list(re.finditer(complete_pattern, text_buffer))
62
+
63
+ if complete_matches:
64
+ # Find the last complete tag
65
+ last_match = complete_matches[-1]
66
+ end_pos = last_match.end()
67
+
68
+ # Check if there's any incomplete reference after the last complete one
69
+ remaining_text = text_buffer[end_pos:]
70
+
71
+ # Look for potential incomplete reference patterns after the last complete tag
72
+ incomplete_pattern = r"\[\d*:?[^\]]*$"
73
+ if re.search(incomplete_pattern, remaining_text):
74
+ # There's a potential incomplete reference, find where it starts
75
+ incomplete_match = re.search(incomplete_pattern, remaining_text)
76
+ if incomplete_match:
77
+ incomplete_start = end_pos + incomplete_match.start()
78
+ processed_text = text_buffer[:incomplete_start]
79
+ remaining_buffer = text_buffer[incomplete_start:]
80
+
81
+ # Apply reference splitting to the processed text
82
+ processed_text = split_continuous_references(processed_text)
83
+ return processed_text, remaining_buffer
84
+
85
+ # No incomplete reference after the last complete tag, process all
86
+ processed_text = split_continuous_references(text_buffer)
87
+ return processed_text, ""
88
+
89
+ # Check for incomplete reference tags - be more specific about what constitutes a potential reference
90
+ # Look for opening bracket with number and colon that could be a reference tag
91
+ opening_pattern = r"\[\d+:"
92
+ opening_matches = list(re.finditer(opening_pattern, text_buffer))
93
+
94
+ if opening_matches:
95
+ # Find the last opening tag
96
+ last_opening = opening_matches[-1]
97
+ opening_start = last_opening.start()
98
+
99
+ # Check if this might be a complete reference tag (has closing bracket after the pattern)
100
+ remaining_text = text_buffer[last_opening.end() :]
101
+ if "]" in remaining_text:
102
+ # This looks like a complete reference tag, process it
103
+ processed_text = split_continuous_references(text_buffer)
104
+ return processed_text, ""
105
+ else:
106
+ # Incomplete reference tag, keep it in buffer
107
+ processed_text = text_buffer[:opening_start]
108
+ processed_text = split_continuous_references(processed_text)
109
+ return processed_text, text_buffer[opening_start:]
110
+
111
+ # More sophisticated check for potential reference patterns
112
+ # Only hold back text if we see a pattern that could be the start of a reference tag
113
+ potential_ref_pattern = r"\[\d*:?$" # Matches [, [1, [12:, etc. at end of buffer
114
+ if re.search(potential_ref_pattern, text_buffer):
115
+ # Find the position of the potential reference start
116
+ match = re.search(potential_ref_pattern, text_buffer)
117
+ if match:
118
+ ref_start = match.start()
119
+ processed_text = text_buffer[:ref_start]
120
+ processed_text = split_continuous_references(processed_text)
121
+ return processed_text, text_buffer[ref_start:]
122
+
123
+ # Check for standalone [ only at the very end of the buffer
124
+ # This prevents cutting off mathematical expressions like [ \Delta U = Q - W ]
125
+ if text_buffer.endswith("["):
126
+ # Only hold back the single [ character
127
+ processed_text = text_buffer[:-1]
128
+ processed_text = split_continuous_references(processed_text)
129
+ return processed_text, "["
130
+
131
+ # No reference-like patterns found, process all text
132
+ processed_text = split_continuous_references(text_buffer)
133
+ return processed_text, ""