MemoryOS 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (74) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/METADATA +2 -1
  2. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/RECORD +72 -55
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +156 -65
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +90 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +6 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +18 -4
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +9 -1
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1364 -0
  18. memos/graph_dbs/neo4j.py +4 -4
  19. memos/log.py +1 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +140 -30
  22. memos/mem_os/main.py +1 -1
  23. memos/mem_os/product.py +266 -152
  24. memos/mem_os/utils/format_utils.py +314 -67
  25. memos/mem_reader/simple_struct.py +13 -5
  26. memos/mem_scheduler/base_scheduler.py +220 -250
  27. memos/mem_scheduler/general_scheduler.py +193 -73
  28. memos/mem_scheduler/modules/base.py +5 -5
  29. memos/mem_scheduler/modules/dispatcher.py +6 -9
  30. memos/mem_scheduler/modules/misc.py +81 -16
  31. memos/mem_scheduler/modules/monitor.py +52 -41
  32. memos/mem_scheduler/modules/rabbitmq_service.py +9 -7
  33. memos/mem_scheduler/modules/retriever.py +108 -191
  34. memos/mem_scheduler/modules/scheduler_logger.py +255 -0
  35. memos/mem_scheduler/mos_for_test_scheduler.py +16 -19
  36. memos/mem_scheduler/schemas/__init__.py +0 -0
  37. memos/mem_scheduler/schemas/general_schemas.py +43 -0
  38. memos/mem_scheduler/schemas/message_schemas.py +148 -0
  39. memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
  40. memos/mem_scheduler/utils/__init__.py +0 -0
  41. memos/mem_scheduler/utils/filter_utils.py +176 -0
  42. memos/mem_scheduler/utils/misc_utils.py +61 -0
  43. memos/mem_user/factory.py +94 -0
  44. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  45. memos/mem_user/mysql_user_manager.py +500 -0
  46. memos/mem_user/persistent_factory.py +96 -0
  47. memos/mem_user/user_manager.py +4 -4
  48. memos/memories/activation/item.py +4 -0
  49. memos/memories/textual/base.py +1 -1
  50. memos/memories/textual/general.py +35 -91
  51. memos/memories/textual/item.py +5 -33
  52. memos/memories/textual/tree.py +13 -7
  53. memos/memories/textual/tree_text_memory/organize/conflict.py +4 -2
  54. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +47 -43
  55. memos/memories/textual/tree_text_memory/organize/reorganizer.py +8 -5
  56. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  57. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
  58. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  59. memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -23
  60. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
  61. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  62. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  63. memos/memos_tools/dinding_report_bot.py +422 -0
  64. memos/memos_tools/notification_service.py +44 -0
  65. memos/memos_tools/notification_utils.py +96 -0
  66. memos/settings.py +3 -1
  67. memos/templates/mem_reader_prompts.py +2 -1
  68. memos/templates/mem_scheduler_prompts.py +41 -7
  69. memos/templates/mos_prompts.py +87 -0
  70. memos/mem_scheduler/modules/schemas.py +0 -328
  71. memos/mem_scheduler/utils.py +0 -75
  72. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
  73. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
  74. {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/entry_points.txt +0 -0
@@ -239,10 +239,10 @@ def sample_nodes_with_type_balance(
239
239
  "MetaMemory": 0.05, # 5%
240
240
  }
241
241
 
242
- print(
242
+ logger.info(
243
243
  f"Starting type-balanced sampling, original nodes: {len(nodes)}, target nodes: {target_count}"
244
244
  )
245
- print(f"Target type ratios: {type_ratios}")
245
+ logger.info(f"Target type ratios: {type_ratios}")
246
246
 
247
247
  # Analyze current node type distribution
248
248
  current_type_counts = {}
@@ -255,7 +255,7 @@ def sample_nodes_with_type_balance(
255
255
  nodes_by_type[memory_type] = []
256
256
  nodes_by_type[memory_type].append(node)
257
257
 
258
- print(f"Current type distribution: {current_type_counts}")
258
+ logger.info(f"Current type distribution: {current_type_counts}")
259
259
 
260
260
  # Calculate target node count for each type
261
261
  type_targets = {}
@@ -290,7 +290,7 @@ def sample_nodes_with_type_balance(
290
290
  )
291
291
  type_targets[memory_type] = type_targets.get(memory_type, 0) + additional
292
292
 
293
- print(f"Target node count for each type: {type_targets}")
293
+ logger.info(f"Target node count for each type: {type_targets}")
294
294
 
295
295
  # Perform subtree quality sampling for each type
296
296
  selected_nodes = []
@@ -300,16 +300,18 @@ def sample_nodes_with_type_balance(
300
300
  continue
301
301
 
302
302
  type_nodes = nodes_by_type[memory_type]
303
- print(f"\n--- Processing {memory_type} type: {len(type_nodes)} -> {target_for_type} ---")
303
+ logger.info(
304
+ f"\n--- Processing {memory_type} type: {len(type_nodes)} -> {target_for_type} ---"
305
+ )
304
306
 
305
307
  if len(type_nodes) <= target_for_type:
306
308
  selected_nodes.extend(type_nodes)
307
- print(f" Select all: {len(type_nodes)} nodes")
309
+ logger.info(f" Select all: {len(type_nodes)} nodes")
308
310
  else:
309
311
  # Use enhanced subtree quality sampling
310
312
  type_selected = sample_by_enhanced_subtree_quality(type_nodes, edges, target_for_type)
311
313
  selected_nodes.extend(type_selected)
312
- print(f" Sampled selection: {len(type_selected)} nodes")
314
+ logger.info(f" Sampled selection: {len(type_selected)} nodes")
313
315
 
314
316
  # Filter edges
315
317
  selected_node_ids = {node["id"] for node in selected_nodes}
@@ -319,8 +321,8 @@ def sample_nodes_with_type_balance(
319
321
  if edge["source"] in selected_node_ids and edge["target"] in selected_node_ids
320
322
  ]
321
323
 
322
- print(f"\nFinal selected nodes: {len(selected_nodes)}")
323
- print(f"Final edges: {len(filtered_edges)}")
324
+ logger.info(f"\nFinal selected nodes: {len(selected_nodes)}")
325
+ logger.info(f"Final edges: {len(filtered_edges)}")
324
326
 
325
327
  # Verify final type distribution
326
328
  final_type_counts = {}
@@ -328,11 +330,11 @@ def sample_nodes_with_type_balance(
328
330
  memory_type = node.get("metadata", {}).get("memory_type", "Unknown")
329
331
  final_type_counts[memory_type] = final_type_counts.get(memory_type, 0) + 1
330
332
 
331
- print(f"Final type distribution: {final_type_counts}")
333
+ logger.info(f"Final type distribution: {final_type_counts}")
332
334
  for memory_type, count in final_type_counts.items():
333
335
  percentage = count / len(selected_nodes) * 100
334
336
  target_percentage = type_ratios.get(memory_type, 0) * 100
335
- print(
337
+ logger.info(
336
338
  f" {memory_type}: {count} nodes ({percentage:.1f}%, target: {target_percentage:.1f}%)"
337
339
  )
338
340
 
@@ -358,9 +360,9 @@ def sample_by_enhanced_subtree_quality(
358
360
  subtree_analysis.items(), key=lambda x: x[1]["quality_score"], reverse=True
359
361
  )
360
362
 
361
- print(" Subtree quality ranking:")
363
+ logger.info(" Subtree quality ranking:")
362
364
  for i, (root_id, analysis) in enumerate(sorted_subtrees[:5]):
363
- print(
365
+ logger.info(
364
366
  f" #{i + 1} Root node {root_id}: Quality={analysis['quality_score']:.2f}, "
365
367
  f"Depth={analysis['max_depth']}, Branches={analysis['branch_nodes']}, "
366
368
  f"Leaves={analysis['leaf_count']}, Max Width={analysis['max_width']}"
@@ -386,7 +388,7 @@ def sample_by_enhanced_subtree_quality(
386
388
  if node:
387
389
  selected_nodes.append(node)
388
390
  selected_node_ids.add(node_id)
389
- print(f" Select entire subtree {root_id}: +{len(new_nodes)} nodes")
391
+ logger.info(f" Select entire subtree {root_id}: +{len(new_nodes)} nodes")
390
392
  else:
391
393
  # Subtree too large, need partial selection
392
394
  if analysis["quality_score"] > 5: # Only partial selection for high-quality subtrees
@@ -398,7 +400,7 @@ def sample_by_enhanced_subtree_quality(
398
400
  selected_nodes.extend(partial_selection)
399
401
  for node in partial_selection:
400
402
  selected_node_ids.add(node["id"])
401
- print(
403
+ logger.info(
402
404
  f" Partial selection of subtree {root_id}: +{len(partial_selection)} nodes"
403
405
  )
404
406
 
@@ -411,7 +413,7 @@ def sample_by_enhanced_subtree_quality(
411
413
  remaining_count = target_count - len(selected_nodes)
412
414
  additional = sample_nodes_by_importance(remaining_nodes, edges, remaining_count)
413
415
  selected_nodes.extend(additional)
414
- print(f" Supplementary selection: +{len(additional)} nodes")
416
+ logger.info(f" Supplementary selection: +{len(additional)} nodes")
415
417
 
416
418
  return selected_nodes
417
419
 
@@ -493,7 +495,7 @@ def sample_nodes_by_importance(
493
495
  # Modified main function to use new sampling strategy
494
496
  def convert_graph_to_tree_forworkmem(
495
497
  json_data: dict[str, Any],
496
- target_node_count: int = 150,
498
+ target_node_count: int = 200,
497
499
  type_ratios: dict[str, float] | None = None,
498
500
  ) -> dict[str, Any]:
499
501
  """
@@ -502,8 +504,8 @@ def convert_graph_to_tree_forworkmem(
502
504
  original_nodes = json_data.get("nodes", [])
503
505
  original_edges = json_data.get("edges", [])
504
506
 
505
- print(f"Original node count: {len(original_nodes)}")
506
- print(f"Target node count: {target_node_count}")
507
+ logger.info(f"Original node count: {len(original_nodes)}")
508
+ logger.info(f"Target node count: {target_node_count}")
507
509
  filter_original_edges = []
508
510
  for original_edge in original_edges:
509
511
  if original_edge["type"] == "PARENT":
@@ -531,7 +533,7 @@ def convert_graph_to_tree_forworkmem(
531
533
  node_name = extract_node_name(memory)
532
534
  memory_key = node.get("metadata", {}).get("key", node_name)
533
535
  usage = node.get("metadata", {}).get("usage", [])
534
- frequency = len(usage)
536
+ frequency = len(usage) if len(usage) < 100 else 100
535
537
  node_map[node["id"]] = {
536
538
  "id": node["id"],
537
539
  "value": memory,
@@ -633,7 +635,7 @@ def convert_graph_to_tree_forworkmem(
633
635
 
634
636
 
635
637
  def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int = 5):
636
- """Print the first few layers of tree structure for easy viewing"""
638
+ """logger.info the first few layers of tree structure for easy viewing"""
637
639
  if level > max_level:
638
640
  return
639
641
 
@@ -647,21 +649,21 @@ def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int =
647
649
  children = node.get("children", [])
648
650
  if children:
649
651
  # Intermediate node, display name, type and child count
650
- print(f"{indent}- {node_name} [{memory_type}] ({len(children)} children)")
651
- print(f"{indent} ID: {node_id}")
652
+ logger.info(f"{indent}- {node_name} [{memory_type}] ({len(children)} children)")
653
+ logger.info(f"{indent} ID: {node_id}")
652
654
  display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
653
- print(f"{indent} Value: {display_value}")
655
+ logger.info(f"{indent} Value: {display_value}")
654
656
 
655
657
  if level < max_level:
656
658
  for child in children:
657
659
  print_tree_structure(child, level + 1, max_level)
658
660
  elif level == max_level:
659
- print(f"{indent} ... (expansion limited)")
661
+ logger.info(f"{indent} ... (expansion limited)")
660
662
  else:
661
663
  # Leaf node, display name, type and value
662
664
  display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
663
- print(f"{indent}- {node_name} [{memory_type}]: {display_value}")
664
- print(f"{indent} ID: {node_id}")
665
+ logger.info(f"{indent}- {node_name} [{memory_type}]: {display_value}")
666
+ logger.info(f"{indent} ID: {node_id}")
665
667
 
666
668
 
667
669
  def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
@@ -856,107 +858,107 @@ def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
856
858
 
857
859
 
858
860
  def print_tree_analysis(tree_data: dict[str, Any]):
859
- """Print enhanced tree analysis results"""
861
+ """logger.info enhanced tree analysis results"""
860
862
  stats = analyze_final_tree_quality(tree_data)
861
863
 
862
- print("\n" + "=" * 60)
863
- print("🌳 Enhanced Tree Structure Quality Analysis Report")
864
- print("=" * 60)
864
+ logger.info("\n" + "=" * 60)
865
+ logger.info("🌳 Enhanced Tree Structure Quality Analysis Report")
866
+ logger.info("=" * 60)
865
867
 
866
868
  # Basic statistics
867
- print("\n📊 Basic Statistics:")
868
- print(f" Total nodes: {stats['total_nodes']}")
869
- print(f" Max depth: {stats['max_depth']}")
870
- print(
869
+ logger.info("\n📊 Basic Statistics:")
870
+ logger.info(f" Total nodes: {stats['total_nodes']}")
871
+ logger.info(f" Max depth: {stats['max_depth']}")
872
+ logger.info(
871
873
  f" Leaf nodes: {stats['total_leaves']} ({stats['total_leaves'] / stats['total_nodes'] * 100:.1f}%)"
872
874
  )
873
- print(
875
+ logger.info(
874
876
  f" Branch nodes: {stats['total_branches']} ({stats['total_branches'] / stats['total_nodes'] * 100:.1f}%)"
875
877
  )
876
878
 
877
879
  # Structure quality assessment
878
880
  structure = stats.get("structure_quality", {})
879
881
  if structure:
880
- print("\n🏗️ Structure Quality Assessment:")
881
- print(
882
+ logger.info("\n🏗️ Structure Quality Assessment:")
883
+ logger.info(
882
884
  f" Branch density: {structure['branch_density']:.3f} ({'✅ Good' if 0.2 <= structure['branch_density'] <= 0.6 else '⚠️ Needs improvement'})"
883
885
  )
884
- print(
886
+ logger.info(
885
887
  f" Leaf ratio: {structure['leaf_ratio']:.3f} ({'✅ Good' if 0.3 <= structure['leaf_ratio'] <= 0.7 else '⚠️ Needs improvement'})"
886
888
  )
887
- print(f" Max width: {structure['max_width']}")
888
- print(
889
+ logger.info(f" Max width: {structure['max_width']}")
890
+ logger.info(
889
891
  f" Depth-width ratio: {structure['depth_width_ratio']:.2f} ({'✅ Good' if structure['depth_width_ratio'] <= 3 else '⚠️ Too thin'})"
890
892
  )
891
- print(
893
+ logger.info(
892
894
  f" Overall balance: {'✅ Good' if structure['is_well_balanced'] else '⚠️ Needs improvement'}"
893
895
  )
894
896
 
895
897
  # Single chain analysis
896
898
  chain_analysis = stats.get("chain_analysis", {})
897
899
  if chain_analysis:
898
- print("\n🔗 Single Chain Structure Analysis:")
899
- print(f" Longest chain: {chain_analysis.get('max_chain_length', 0)} layers")
900
- print(f" Single chain subtrees: {chain_analysis.get('single_chain_subtrees', 0)}")
901
- print(
900
+ logger.info("\n🔗 Single Chain Structure Analysis:")
901
+ logger.info(f" Longest chain: {chain_analysis.get('max_chain_length', 0)} layers")
902
+ logger.info(f" Single chain subtrees: {chain_analysis.get('single_chain_subtrees', 0)}")
903
+ logger.info(
902
904
  f" Single chain subtree ratio: {chain_analysis.get('chain_subtree_ratio', 0) * 100:.1f}%"
903
905
  )
904
906
 
905
907
  if chain_analysis.get("max_chain_length", 0) > 5:
906
- print(" ⚠️ Warning: Overly long single chain structure may affect display")
908
+ logger.info(" ⚠️ Warning: Overly long single chain structure may affect display")
907
909
  elif chain_analysis.get("chain_subtree_ratio", 0) > 0.3:
908
- print(
910
+ logger.info(
909
911
  " ⚠️ Warning: Too many single chain subtrees, suggest increasing branch structure"
910
912
  )
911
913
  else:
912
- print(" ✅ Single chain structure well controlled")
914
+ logger.info(" ✅ Single chain structure well controlled")
913
915
 
914
916
  # Type diversity
915
917
  type_div = stats.get("type_diversity", {})
916
918
  if type_div:
917
- print("\n🎨 Type Diversity Analysis:")
918
- print(f" Total types: {type_div['total_types']}")
919
- print(f" Diversity index: {type_div['shannon_diversity']:.3f}")
920
- print(f" Normalized diversity: {type_div['normalized_diversity']:.3f}")
921
- print(f" Distribution balance: {type_div['distribution_balance']:.3f}")
919
+ logger.info("\n🎨 Type Diversity Analysis:")
920
+ logger.info(f" Total types: {type_div['total_types']}")
921
+ logger.info(f" Diversity index: {type_div['shannon_diversity']:.3f}")
922
+ logger.info(f" Normalized diversity: {type_div['normalized_diversity']:.3f}")
923
+ logger.info(f" Distribution balance: {type_div['distribution_balance']:.3f}")
922
924
 
923
925
  # Type distribution
924
- print("\n📋 Type Distribution Details:")
926
+ logger.info("\n📋 Type Distribution Details:")
925
927
  for mem_type, count in sorted(stats["by_type"].items(), key=lambda x: x[1], reverse=True):
926
928
  percentage = count / stats["total_nodes"] * 100
927
- print(f" {mem_type}: {count} nodes ({percentage:.1f}%)")
929
+ logger.info(f" {mem_type}: {count} nodes ({percentage:.1f}%)")
928
930
 
929
931
  # Depth distribution
930
- print("\n📏 Depth Distribution:")
932
+ logger.info("\n📏 Depth Distribution:")
931
933
  for depth in sorted(stats["by_depth"].keys()):
932
934
  count = stats["by_depth"][depth]
933
- print(f" Depth {depth}: {count} nodes")
935
+ logger.info(f" Depth {depth}: {count} nodes")
934
936
 
935
937
  # Major subtree analysis
936
938
  if stats["subtrees"]:
937
- print("\n🌲 Major Subtree Analysis (sorted by quality):")
939
+ logger.info("\n🌲 Major Subtree Analysis (sorted by quality):")
938
940
  sorted_subtrees = sorted(
939
941
  stats["subtrees"], key=lambda x: x.get("quality_score", 0), reverse=True
940
942
  )
941
943
  for i, subtree in enumerate(sorted_subtrees[:8]): # Show first 8
942
944
  quality = subtree.get("quality_score", 0)
943
- print(f" #{i + 1} {subtree['root']} [{subtree['type']}]:")
944
- print(f" Quality score: {quality:.2f}")
945
- print(
945
+ logger.info(f" #{i + 1} {subtree['root']} [{subtree['type']}]:")
946
+ logger.info(f" Quality score: {quality:.2f}")
947
+ logger.info(
946
948
  f" Structure: Depth={subtree['depth']}, Branches={subtree['branches']}, Leaves={subtree['leaves']}"
947
949
  )
948
- print(
950
+ logger.info(
949
951
  f" Density: Branch density={subtree.get('branch_density', 0):.3f}, Leaf ratio={subtree.get('leaf_ratio', 0):.3f}"
950
952
  )
951
953
 
952
954
  if quality > 15:
953
- print(" ✅ High quality subtree")
955
+ logger.info(" ✅ High quality subtree")
954
956
  elif quality > 8:
955
- print(" 🟡 Medium quality subtree")
957
+ logger.info(" 🟡 Medium quality subtree")
956
958
  else:
957
- print(" 🔴 Low quality subtree")
959
+ logger.info(" 🔴 Low quality subtree")
958
960
 
959
- print("\n" + "=" * 60)
961
+ logger.info("\n" + "=" * 60)
960
962
 
961
963
 
962
964
  def remove_embedding_recursive(memory_info: dict) -> Any:
@@ -1152,3 +1154,248 @@ def convert_activation_memory_summary(act_mem_items: list[KVCacheItem]) -> dict[
1152
1154
  "total_parameters": total_parameters,
1153
1155
  "summary": f"Activation memory contains {total_items} items with {total_layers} layers and approximately {total_parameters:,} parameters",
1154
1156
  }
1157
+
1158
+
1159
+ def detect_and_remove_duplicate_ids(tree_node: dict[str, Any]) -> dict[str, Any]:
1160
+ """
1161
+ Detect and remove duplicate IDs in tree structure by skipping duplicate nodes.
1162
+ First occurrence of each ID is kept, subsequent duplicates are removed.
1163
+
1164
+ Args:
1165
+ tree_node: Tree node (dictionary format)
1166
+
1167
+ Returns:
1168
+ dict: Fixed tree node with duplicate nodes removed
1169
+ """
1170
+ used_ids = set()
1171
+ removed_count = 0
1172
+
1173
+ def remove_duplicates_recursive(
1174
+ node: dict[str, Any], parent_path: str = ""
1175
+ ) -> dict[str, Any] | None:
1176
+ """Recursively remove duplicate IDs by skipping duplicate nodes"""
1177
+ nonlocal removed_count
1178
+
1179
+ if not isinstance(node, dict):
1180
+ return node
1181
+
1182
+ # Create node copy
1183
+ fixed_node = node.copy()
1184
+
1185
+ # Handle current node ID
1186
+ current_id = fixed_node.get("id", "")
1187
+ if current_id in used_ids and current_id not in ["root", "WorkingMemory"]:
1188
+ # Skip this duplicate node
1189
+ logger.info(f"Skipping duplicate node: {current_id} (path: {parent_path})")
1190
+ removed_count += 1
1191
+ return None # Return None to indicate this node should be removed
1192
+ else:
1193
+ used_ids.add(current_id)
1194
+
1195
+ # Recursively process child nodes
1196
+ if "children" in fixed_node and isinstance(fixed_node["children"], list):
1197
+ fixed_children = []
1198
+ for i, child in enumerate(fixed_node["children"]):
1199
+ child_path = f"{parent_path}/{fixed_node.get('node_name', 'unknown')}[{i}]"
1200
+ fixed_child = remove_duplicates_recursive(child, child_path)
1201
+ if fixed_child is not None: # Only add non-None children
1202
+ fixed_children.append(fixed_child)
1203
+ fixed_node["children"] = fixed_children
1204
+
1205
+ return fixed_node
1206
+
1207
+ result = remove_duplicates_recursive(tree_node)
1208
+ if result is not None:
1209
+ logger.info(f"Removed {removed_count} duplicate nodes")
1210
+ return result
1211
+ else:
1212
+ # If root node itself was removed (shouldn't happen), return empty root
1213
+ return {
1214
+ "id": "root",
1215
+ "node_name": "root",
1216
+ "value": "root",
1217
+ "memory_type": "Root",
1218
+ "children": [],
1219
+ }
1220
+
1221
+
1222
+ def validate_tree_structure(tree_node: dict[str, Any]) -> dict[str, Any]:
1223
+ """
1224
+ Validate tree structure integrity, including ID uniqueness check
1225
+
1226
+ Args:
1227
+ tree_node: Tree node (dictionary format)
1228
+
1229
+ Returns:
1230
+ dict: Validation result containing error messages and fix suggestions
1231
+ """
1232
+ validation_result = {
1233
+ "is_valid": True,
1234
+ "errors": [],
1235
+ "warnings": [],
1236
+ "total_nodes": 0,
1237
+ "unique_ids": set(),
1238
+ "duplicate_ids": set(),
1239
+ "missing_ids": set(),
1240
+ "invalid_structure": [],
1241
+ }
1242
+
1243
+ def validate_recursive(node: dict[str, Any], path: str = "", depth: int = 0):
1244
+ """Recursively validate tree structure"""
1245
+ if not isinstance(node, dict):
1246
+ validation_result["errors"].append(f"Node is not a dictionary: {path}")
1247
+ validation_result["is_valid"] = False
1248
+ return
1249
+
1250
+ validation_result["total_nodes"] += 1
1251
+
1252
+ # Check required fields
1253
+ if "id" not in node:
1254
+ validation_result["errors"].append(f"Node missing ID field: {path}")
1255
+ validation_result["missing_ids"].add(path)
1256
+ validation_result["is_valid"] = False
1257
+ else:
1258
+ node_id = node["id"]
1259
+ if node_id in validation_result["unique_ids"]:
1260
+ validation_result["errors"].append(f"Duplicate node ID: {node_id} (path: {path})")
1261
+ validation_result["duplicate_ids"].add(node_id)
1262
+ validation_result["is_valid"] = False
1263
+ else:
1264
+ validation_result["unique_ids"].add(node_id)
1265
+
1266
+ # Check other required fields
1267
+ required_fields = ["node_name", "value", "memory_type"]
1268
+ for field in required_fields:
1269
+ if field not in node:
1270
+ validation_result["warnings"].append(f"Node missing field '{field}': {path}")
1271
+
1272
+ # Recursively validate child nodes
1273
+ if "children" in node:
1274
+ if not isinstance(node["children"], list):
1275
+ validation_result["errors"].append(f"Children field is not a list: {path}")
1276
+ validation_result["is_valid"] = False
1277
+ else:
1278
+ for i, child in enumerate(node["children"]):
1279
+ child_path = f"{path}/children[{i}]"
1280
+ validate_recursive(child, child_path, depth + 1)
1281
+
1282
+ # Check depth limit
1283
+ if depth > 20:
1284
+ validation_result["warnings"].append(f"Tree depth too deep ({depth}): {path}")
1285
+
1286
+ validate_recursive(tree_node)
1287
+
1288
+ # Generate fix suggestions
1289
+ if validation_result["duplicate_ids"]:
1290
+ validation_result["fix_suggestion"] = (
1291
+ "Use detect_and_fix_duplicate_ids() function to fix duplicate IDs"
1292
+ )
1293
+
1294
+ return validation_result
1295
+
1296
+
1297
+ def ensure_unique_tree_ids(tree_result: dict[str, Any]) -> dict[str, Any]:
1298
+ """
1299
+ Ensure all node IDs in tree structure are unique by removing duplicate nodes,
1300
+ this is a post-processing function for convert_graph_to_tree_forworkmem
1301
+
1302
+ Args:
1303
+ tree_result: Tree structure returned by convert_graph_to_tree_forworkmem
1304
+
1305
+ Returns:
1306
+ dict: Fixed tree structure with duplicate nodes removed
1307
+ """
1308
+ logger.info("🔍 Starting duplicate ID check in tree structure...")
1309
+
1310
+ # First validate tree structure
1311
+ validation = validate_tree_structure(tree_result)
1312
+
1313
+ if validation["is_valid"]:
1314
+ logger.info("Tree structure validation passed, no duplicate IDs found")
1315
+ return tree_result
1316
+
1317
+ # Report issues
1318
+ logger.info(f"Found {len(validation['errors'])} errors:")
1319
+ for error in validation["errors"][:5]: # Only show first 5 errors
1320
+ logger.info(f" - {error}")
1321
+
1322
+ if len(validation["errors"]) > 5:
1323
+ logger.info(f" ... and {len(validation['errors']) - 5} more errors")
1324
+
1325
+ logger.info("Statistics:")
1326
+ logger.info(f" - Total nodes: {validation['total_nodes']}")
1327
+ logger.info(f" - Unique IDs: {len(validation['unique_ids'])}")
1328
+ logger.info(f" - Duplicate IDs: {len(validation['duplicate_ids'])}")
1329
+
1330
+ # Remove duplicate nodes
1331
+ logger.info(" Starting duplicate node removal...")
1332
+ fixed_tree = detect_and_remove_duplicate_ids(tree_result)
1333
+
1334
+ # Validate again
1335
+ post_validation = validate_tree_structure(fixed_tree)
1336
+ if post_validation["is_valid"]:
1337
+ logger.info("Removal completed, tree structure is now valid")
1338
+ logger.info(f"Final node count: {post_validation['total_nodes']}")
1339
+ else:
1340
+ logger.info("Issues remain after removal, please check code logic")
1341
+ for error in post_validation["errors"][:3]:
1342
+ logger.info(f" - {error}")
1343
+
1344
+ return fixed_tree
1345
+
1346
+
1347
+ def clean_json_response(response: str) -> str:
1348
+ """
1349
+ Remove markdown JSON code block formatting from LLM response.
1350
+
1351
+ Args:
1352
+ response: Raw response string that may contain ```json and ```
1353
+
1354
+ Returns:
1355
+ str: Clean JSON string without markdown formatting
1356
+ """
1357
+ return response.replace("```json", "").replace("```", "").strip()
1358
+
1359
+
1360
+ def split_continuous_references(text: str) -> str:
1361
+ """
1362
+ Split continuous reference tags into individual reference tags.
1363
+
1364
+ Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
1365
+
1366
+ Only processes text if:
1367
+ 1. '[' appears exactly once
1368
+ 2. ']' appears exactly once
1369
+ 3. Contains commas between '[' and ']'
1370
+
1371
+ Args:
1372
+ text (str): Text containing reference tags
1373
+
1374
+ Returns:
1375
+ str: Text with split reference tags, or original text if conditions not met
1376
+ """
1377
+ # Early return if text is empty
1378
+ if not text:
1379
+ return text
1380
+ # Check if '[' appears exactly once
1381
+ if text.count("[") != 1:
1382
+ return text
1383
+ # Check if ']' appears exactly once
1384
+ if text.count("]") != 1:
1385
+ return text
1386
+ # Find positions of brackets
1387
+ open_bracket_pos = text.find("[")
1388
+ close_bracket_pos = text.find("]")
1389
+
1390
+ # Check if brackets are in correct order
1391
+ if open_bracket_pos >= close_bracket_pos:
1392
+ return text
1393
+ # Extract content between brackets
1394
+ content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
1395
+ # Check if there's a comma between brackets
1396
+ if "," not in content_between_brackets:
1397
+ return text
1398
+ text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
1399
+ text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
1400
+
1401
+ return text
@@ -58,9 +58,13 @@ class SimpleStructMemReader(BaseMemReader, ABC):
58
58
  metadata=TreeNodeTextualMemoryMetadata(
59
59
  user_id=info.get("user_id"),
60
60
  session_id=info.get("session_id"),
61
- memory_type=memory_i_raw.get("memory_type", ""),
61
+ memory_type=memory_i_raw.get("memory_type", "")
62
+ .replace("长期记忆", "LongTermMemory")
63
+ .replace("用户记忆", "UserMemory"),
62
64
  status="activated",
63
- tags=memory_i_raw.get("tags", ""),
65
+ tags=memory_i_raw.get("tags", [])
66
+ if type(memory_i_raw.get("tags", [])) is list
67
+ else [],
64
68
  key=memory_i_raw.get("key", ""),
65
69
  embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
66
70
  usage=[],
@@ -176,8 +180,12 @@ class SimpleStructMemReader(BaseMemReader, ABC):
176
180
  elif type == "doc":
177
181
  for item in scene_data:
178
182
  try:
179
- parsed_text = parser.parse(item)
180
- results.append({"file": item, "text": parsed_text})
183
+ if not isinstance(item, str):
184
+ parsed_text = parser.parse(item)
185
+ results.append({"file": "pure_text", "text": parsed_text})
186
+ else:
187
+ parsed_text = item
188
+ results.append({"file": item, "text": parsed_text})
181
189
  except Exception as e:
182
190
  print(f"Error parsing file {item}: {e!s}")
183
191
 
@@ -214,7 +222,7 @@ class SimpleStructMemReader(BaseMemReader, ABC):
214
222
  session_id=info.get("session_id"),
215
223
  memory_type="LongTermMemory",
216
224
  status="activated",
217
- tags=chunk_res["tags"],
225
+ tags=chunk_res["tags"] if type(chunk_res["tags"]) is list else [],
218
226
  key=chunk_res["key"],
219
227
  embedding=self.embedder.embed([chunk_res["value"]])[0],
220
228
  usage=[],