MemoryOS 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of MemoryOS might be problematic. Click here for more details.

Files changed (92) hide show
  1. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/METADATA +7 -1
  2. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/RECORD +87 -64
  3. memos/__init__.py +1 -1
  4. memos/api/config.py +158 -69
  5. memos/api/context/context.py +147 -0
  6. memos/api/context/dependencies.py +101 -0
  7. memos/api/product_models.py +5 -1
  8. memos/api/routers/product_router.py +54 -26
  9. memos/configs/graph_db.py +49 -1
  10. memos/configs/internet_retriever.py +19 -0
  11. memos/configs/mem_os.py +5 -0
  12. memos/configs/mem_reader.py +9 -0
  13. memos/configs/mem_scheduler.py +54 -18
  14. memos/configs/mem_user.py +58 -0
  15. memos/graph_dbs/base.py +38 -3
  16. memos/graph_dbs/factory.py +2 -0
  17. memos/graph_dbs/nebular.py +1612 -0
  18. memos/graph_dbs/neo4j.py +18 -9
  19. memos/log.py +6 -1
  20. memos/mem_cube/utils.py +13 -6
  21. memos/mem_os/core.py +157 -37
  22. memos/mem_os/main.py +2 -2
  23. memos/mem_os/product.py +252 -201
  24. memos/mem_os/utils/default_config.py +1 -1
  25. memos/mem_os/utils/format_utils.py +281 -70
  26. memos/mem_os/utils/reference_utils.py +133 -0
  27. memos/mem_reader/simple_struct.py +13 -5
  28. memos/mem_scheduler/base_scheduler.py +239 -266
  29. memos/mem_scheduler/{modules → general_modules}/base.py +4 -5
  30. memos/mem_scheduler/{modules → general_modules}/dispatcher.py +57 -21
  31. memos/mem_scheduler/general_modules/misc.py +104 -0
  32. memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +12 -10
  33. memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
  34. memos/mem_scheduler/general_modules/retriever.py +199 -0
  35. memos/mem_scheduler/general_modules/scheduler_logger.py +261 -0
  36. memos/mem_scheduler/general_scheduler.py +243 -80
  37. memos/mem_scheduler/monitors/__init__.py +0 -0
  38. memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
  39. memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +106 -57
  40. memos/mem_scheduler/mos_for_test_scheduler.py +23 -20
  41. memos/mem_scheduler/schemas/__init__.py +0 -0
  42. memos/mem_scheduler/schemas/general_schemas.py +44 -0
  43. memos/mem_scheduler/schemas/message_schemas.py +149 -0
  44. memos/mem_scheduler/schemas/monitor_schemas.py +337 -0
  45. memos/mem_scheduler/utils/__init__.py +0 -0
  46. memos/mem_scheduler/utils/filter_utils.py +176 -0
  47. memos/mem_scheduler/utils/misc_utils.py +102 -0
  48. memos/mem_user/factory.py +94 -0
  49. memos/mem_user/mysql_persistent_user_manager.py +271 -0
  50. memos/mem_user/mysql_user_manager.py +500 -0
  51. memos/mem_user/persistent_factory.py +96 -0
  52. memos/mem_user/user_manager.py +4 -4
  53. memos/memories/activation/item.py +5 -1
  54. memos/memories/activation/kv.py +20 -8
  55. memos/memories/textual/base.py +2 -2
  56. memos/memories/textual/general.py +36 -92
  57. memos/memories/textual/item.py +5 -33
  58. memos/memories/textual/tree.py +13 -7
  59. memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +34 -50
  60. memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
  61. memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +49 -43
  62. memos/memories/textual/tree_text_memory/organize/reorganizer.py +107 -142
  63. memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
  64. memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
  65. memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +11 -0
  66. memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
  67. memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
  68. memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
  69. memos/memories/textual/tree_text_memory/retrieve/searcher.py +191 -116
  70. memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +47 -15
  71. memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
  72. memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
  73. memos/memos_tools/dinding_report_bot.py +422 -0
  74. memos/memos_tools/lockfree_dict.py +120 -0
  75. memos/memos_tools/notification_service.py +44 -0
  76. memos/memos_tools/notification_utils.py +96 -0
  77. memos/memos_tools/thread_safe_dict.py +288 -0
  78. memos/settings.py +3 -1
  79. memos/templates/mem_reader_prompts.py +4 -1
  80. memos/templates/mem_scheduler_prompts.py +62 -15
  81. memos/templates/mos_prompts.py +116 -0
  82. memos/templates/tree_reorganize_prompts.py +24 -17
  83. memos/utils.py +19 -0
  84. memos/mem_scheduler/modules/misc.py +0 -39
  85. memos/mem_scheduler/modules/retriever.py +0 -268
  86. memos/mem_scheduler/modules/schemas.py +0 -328
  87. memos/mem_scheduler/utils.py +0 -75
  88. memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
  89. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
  90. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
  91. {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
  92. /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
@@ -112,7 +112,7 @@ def get_default_config(
112
112
  "thread_pool_max_workers": kwargs.get("scheduler_thread_pool_max_workers", 10),
113
113
  "consume_interval_seconds": kwargs.get("scheduler_consume_interval_seconds", 3),
114
114
  "enable_parallel_dispatch": kwargs.get("scheduler_enable_parallel_dispatch", True),
115
- "enable_act_memory_update": True,
115
+ "enable_activation_memory": True,
116
116
  },
117
117
  }
118
118
 
@@ -239,10 +239,10 @@ def sample_nodes_with_type_balance(
239
239
  "MetaMemory": 0.05, # 5%
240
240
  }
241
241
 
242
- print(
242
+ logger.info(
243
243
  f"Starting type-balanced sampling, original nodes: {len(nodes)}, target nodes: {target_count}"
244
244
  )
245
- print(f"Target type ratios: {type_ratios}")
245
+ logger.info(f"Target type ratios: {type_ratios}")
246
246
 
247
247
  # Analyze current node type distribution
248
248
  current_type_counts = {}
@@ -255,7 +255,7 @@ def sample_nodes_with_type_balance(
255
255
  nodes_by_type[memory_type] = []
256
256
  nodes_by_type[memory_type].append(node)
257
257
 
258
- print(f"Current type distribution: {current_type_counts}")
258
+ logger.info(f"Current type distribution: {current_type_counts}")
259
259
 
260
260
  # Calculate target node count for each type
261
261
  type_targets = {}
@@ -290,7 +290,7 @@ def sample_nodes_with_type_balance(
290
290
  )
291
291
  type_targets[memory_type] = type_targets.get(memory_type, 0) + additional
292
292
 
293
- print(f"Target node count for each type: {type_targets}")
293
+ logger.info(f"Target node count for each type: {type_targets}")
294
294
 
295
295
  # Perform subtree quality sampling for each type
296
296
  selected_nodes = []
@@ -300,16 +300,18 @@ def sample_nodes_with_type_balance(
300
300
  continue
301
301
 
302
302
  type_nodes = nodes_by_type[memory_type]
303
- print(f"\n--- Processing {memory_type} type: {len(type_nodes)} -> {target_for_type} ---")
303
+ logger.info(
304
+ f"\n--- Processing {memory_type} type: {len(type_nodes)} -> {target_for_type} ---"
305
+ )
304
306
 
305
307
  if len(type_nodes) <= target_for_type:
306
308
  selected_nodes.extend(type_nodes)
307
- print(f" Select all: {len(type_nodes)} nodes")
309
+ logger.info(f" Select all: {len(type_nodes)} nodes")
308
310
  else:
309
311
  # Use enhanced subtree quality sampling
310
312
  type_selected = sample_by_enhanced_subtree_quality(type_nodes, edges, target_for_type)
311
313
  selected_nodes.extend(type_selected)
312
- print(f" Sampled selection: {len(type_selected)} nodes")
314
+ logger.info(f" Sampled selection: {len(type_selected)} nodes")
313
315
 
314
316
  # Filter edges
315
317
  selected_node_ids = {node["id"] for node in selected_nodes}
@@ -319,8 +321,8 @@ def sample_nodes_with_type_balance(
319
321
  if edge["source"] in selected_node_ids and edge["target"] in selected_node_ids
320
322
  ]
321
323
 
322
- print(f"\nFinal selected nodes: {len(selected_nodes)}")
323
- print(f"Final edges: {len(filtered_edges)}")
324
+ logger.info(f"\nFinal selected nodes: {len(selected_nodes)}")
325
+ logger.info(f"Final edges: {len(filtered_edges)}")
324
326
 
325
327
  # Verify final type distribution
326
328
  final_type_counts = {}
@@ -328,11 +330,11 @@ def sample_nodes_with_type_balance(
328
330
  memory_type = node.get("metadata", {}).get("memory_type", "Unknown")
329
331
  final_type_counts[memory_type] = final_type_counts.get(memory_type, 0) + 1
330
332
 
331
- print(f"Final type distribution: {final_type_counts}")
333
+ logger.info(f"Final type distribution: {final_type_counts}")
332
334
  for memory_type, count in final_type_counts.items():
333
335
  percentage = count / len(selected_nodes) * 100
334
336
  target_percentage = type_ratios.get(memory_type, 0) * 100
335
- print(
337
+ logger.info(
336
338
  f" {memory_type}: {count} nodes ({percentage:.1f}%, target: {target_percentage:.1f}%)"
337
339
  )
338
340
 
@@ -358,9 +360,9 @@ def sample_by_enhanced_subtree_quality(
358
360
  subtree_analysis.items(), key=lambda x: x[1]["quality_score"], reverse=True
359
361
  )
360
362
 
361
- print(" Subtree quality ranking:")
363
+ logger.info(" Subtree quality ranking:")
362
364
  for i, (root_id, analysis) in enumerate(sorted_subtrees[:5]):
363
- print(
365
+ logger.info(
364
366
  f" #{i + 1} Root node {root_id}: Quality={analysis['quality_score']:.2f}, "
365
367
  f"Depth={analysis['max_depth']}, Branches={analysis['branch_nodes']}, "
366
368
  f"Leaves={analysis['leaf_count']}, Max Width={analysis['max_width']}"
@@ -386,7 +388,7 @@ def sample_by_enhanced_subtree_quality(
386
388
  if node:
387
389
  selected_nodes.append(node)
388
390
  selected_node_ids.add(node_id)
389
- print(f" Select entire subtree {root_id}: +{len(new_nodes)} nodes")
391
+ logger.info(f" Select entire subtree {root_id}: +{len(new_nodes)} nodes")
390
392
  else:
391
393
  # Subtree too large, need partial selection
392
394
  if analysis["quality_score"] > 5: # Only partial selection for high-quality subtrees
@@ -398,7 +400,7 @@ def sample_by_enhanced_subtree_quality(
398
400
  selected_nodes.extend(partial_selection)
399
401
  for node in partial_selection:
400
402
  selected_node_ids.add(node["id"])
401
- print(
403
+ logger.info(
402
404
  f" Partial selection of subtree {root_id}: +{len(partial_selection)} nodes"
403
405
  )
404
406
 
@@ -411,7 +413,7 @@ def sample_by_enhanced_subtree_quality(
411
413
  remaining_count = target_count - len(selected_nodes)
412
414
  additional = sample_nodes_by_importance(remaining_nodes, edges, remaining_count)
413
415
  selected_nodes.extend(additional)
414
- print(f" Supplementary selection: +{len(additional)} nodes")
416
+ logger.info(f" Supplementary selection: +{len(additional)} nodes")
415
417
 
416
418
  return selected_nodes
417
419
 
@@ -493,7 +495,7 @@ def sample_nodes_by_importance(
493
495
  # Modified main function to use new sampling strategy
494
496
  def convert_graph_to_tree_forworkmem(
495
497
  json_data: dict[str, Any],
496
- target_node_count: int = 150,
498
+ target_node_count: int = 200,
497
499
  type_ratios: dict[str, float] | None = None,
498
500
  ) -> dict[str, Any]:
499
501
  """
@@ -502,8 +504,8 @@ def convert_graph_to_tree_forworkmem(
502
504
  original_nodes = json_data.get("nodes", [])
503
505
  original_edges = json_data.get("edges", [])
504
506
 
505
- print(f"Original node count: {len(original_nodes)}")
506
- print(f"Target node count: {target_node_count}")
507
+ logger.info(f"Original node count: {len(original_nodes)}")
508
+ logger.info(f"Target node count: {target_node_count}")
507
509
  filter_original_edges = []
508
510
  for original_edge in original_edges:
509
511
  if original_edge["type"] == "PARENT":
@@ -531,7 +533,7 @@ def convert_graph_to_tree_forworkmem(
531
533
  node_name = extract_node_name(memory)
532
534
  memory_key = node.get("metadata", {}).get("key", node_name)
533
535
  usage = node.get("metadata", {}).get("usage", [])
534
- frequency = len(usage)
536
+ frequency = len(usage) if len(usage) < 100 else 100
535
537
  node_map[node["id"]] = {
536
538
  "id": node["id"],
537
539
  "value": memory,
@@ -568,15 +570,23 @@ def convert_graph_to_tree_forworkmem(
568
570
  else:
569
571
  other_roots.append(root_id)
570
572
 
571
- def build_tree(node_id: str) -> dict[str, Any]:
572
- """Recursively build tree structure"""
573
+ def build_tree(node_id: str, visited=None) -> dict[str, Any] | None:
574
+ """Recursively build tree structure with cycle detection"""
575
+ if visited is None:
576
+ visited = set()
577
+
578
+ if node_id in visited:
579
+ logger.warning(f"[build_tree] Detected cycle at node {node_id}, skipping.")
580
+ return None
581
+ visited.add(node_id)
582
+
573
583
  if node_id not in node_map:
574
584
  return None
575
585
 
576
586
  children_ids = children_map.get(node_id, [])
577
587
  children = []
578
588
  for child_id in children_ids:
579
- child_tree = build_tree(child_id)
589
+ child_tree = build_tree(child_id, visited)
580
590
  if child_tree:
581
591
  children.append(child_tree)
582
592
 
@@ -633,7 +643,7 @@ def convert_graph_to_tree_forworkmem(
633
643
 
634
644
 
635
645
  def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int = 5):
636
- """Print the first few layers of tree structure for easy viewing"""
646
+ """logger.info the first few layers of tree structure for easy viewing"""
637
647
  if level > max_level:
638
648
  return
639
649
 
@@ -647,21 +657,21 @@ def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int =
647
657
  children = node.get("children", [])
648
658
  if children:
649
659
  # Intermediate node, display name, type and child count
650
- print(f"{indent}- {node_name} [{memory_type}] ({len(children)} children)")
651
- print(f"{indent} ID: {node_id}")
660
+ logger.info(f"{indent}- {node_name} [{memory_type}] ({len(children)} children)")
661
+ logger.info(f"{indent} ID: {node_id}")
652
662
  display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
653
- print(f"{indent} Value: {display_value}")
663
+ logger.info(f"{indent} Value: {display_value}")
654
664
 
655
665
  if level < max_level:
656
666
  for child in children:
657
667
  print_tree_structure(child, level + 1, max_level)
658
668
  elif level == max_level:
659
- print(f"{indent} ... (expansion limited)")
669
+ logger.info(f"{indent} ... (expansion limited)")
660
670
  else:
661
671
  # Leaf node, display name, type and value
662
672
  display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
663
- print(f"{indent}- {node_name} [{memory_type}]: {display_value}")
664
- print(f"{indent} ID: {node_id}")
673
+ logger.info(f"{indent}- {node_name} [{memory_type}]: {display_value}")
674
+ logger.info(f"{indent} ID: {node_id}")
665
675
 
666
676
 
667
677
  def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
@@ -856,107 +866,107 @@ def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
856
866
 
857
867
 
858
868
  def print_tree_analysis(tree_data: dict[str, Any]):
859
- """Print enhanced tree analysis results"""
869
+ """logger.info enhanced tree analysis results"""
860
870
  stats = analyze_final_tree_quality(tree_data)
861
871
 
862
- print("\n" + "=" * 60)
863
- print("🌳 Enhanced Tree Structure Quality Analysis Report")
864
- print("=" * 60)
872
+ logger.info("\n" + "=" * 60)
873
+ logger.info("🌳 Enhanced Tree Structure Quality Analysis Report")
874
+ logger.info("=" * 60)
865
875
 
866
876
  # Basic statistics
867
- print("\n📊 Basic Statistics:")
868
- print(f" Total nodes: {stats['total_nodes']}")
869
- print(f" Max depth: {stats['max_depth']}")
870
- print(
877
+ logger.info("\n📊 Basic Statistics:")
878
+ logger.info(f" Total nodes: {stats['total_nodes']}")
879
+ logger.info(f" Max depth: {stats['max_depth']}")
880
+ logger.info(
871
881
  f" Leaf nodes: {stats['total_leaves']} ({stats['total_leaves'] / stats['total_nodes'] * 100:.1f}%)"
872
882
  )
873
- print(
883
+ logger.info(
874
884
  f" Branch nodes: {stats['total_branches']} ({stats['total_branches'] / stats['total_nodes'] * 100:.1f}%)"
875
885
  )
876
886
 
877
887
  # Structure quality assessment
878
888
  structure = stats.get("structure_quality", {})
879
889
  if structure:
880
- print("\n🏗️ Structure Quality Assessment:")
881
- print(
890
+ logger.info("\n🏗️ Structure Quality Assessment:")
891
+ logger.info(
882
892
  f" Branch density: {structure['branch_density']:.3f} ({'✅ Good' if 0.2 <= structure['branch_density'] <= 0.6 else '⚠️ Needs improvement'})"
883
893
  )
884
- print(
894
+ logger.info(
885
895
  f" Leaf ratio: {structure['leaf_ratio']:.3f} ({'✅ Good' if 0.3 <= structure['leaf_ratio'] <= 0.7 else '⚠️ Needs improvement'})"
886
896
  )
887
- print(f" Max width: {structure['max_width']}")
888
- print(
897
+ logger.info(f" Max width: {structure['max_width']}")
898
+ logger.info(
889
899
  f" Depth-width ratio: {structure['depth_width_ratio']:.2f} ({'✅ Good' if structure['depth_width_ratio'] <= 3 else '⚠️ Too thin'})"
890
900
  )
891
- print(
901
+ logger.info(
892
902
  f" Overall balance: {'✅ Good' if structure['is_well_balanced'] else '⚠️ Needs improvement'}"
893
903
  )
894
904
 
895
905
  # Single chain analysis
896
906
  chain_analysis = stats.get("chain_analysis", {})
897
907
  if chain_analysis:
898
- print("\n🔗 Single Chain Structure Analysis:")
899
- print(f" Longest chain: {chain_analysis.get('max_chain_length', 0)} layers")
900
- print(f" Single chain subtrees: {chain_analysis.get('single_chain_subtrees', 0)}")
901
- print(
908
+ logger.info("\n🔗 Single Chain Structure Analysis:")
909
+ logger.info(f" Longest chain: {chain_analysis.get('max_chain_length', 0)} layers")
910
+ logger.info(f" Single chain subtrees: {chain_analysis.get('single_chain_subtrees', 0)}")
911
+ logger.info(
902
912
  f" Single chain subtree ratio: {chain_analysis.get('chain_subtree_ratio', 0) * 100:.1f}%"
903
913
  )
904
914
 
905
915
  if chain_analysis.get("max_chain_length", 0) > 5:
906
- print(" ⚠️ Warning: Overly long single chain structure may affect display")
916
+ logger.info(" ⚠️ Warning: Overly long single chain structure may affect display")
907
917
  elif chain_analysis.get("chain_subtree_ratio", 0) > 0.3:
908
- print(
918
+ logger.info(
909
919
  " ⚠️ Warning: Too many single chain subtrees, suggest increasing branch structure"
910
920
  )
911
921
  else:
912
- print(" ✅ Single chain structure well controlled")
922
+ logger.info(" ✅ Single chain structure well controlled")
913
923
 
914
924
  # Type diversity
915
925
  type_div = stats.get("type_diversity", {})
916
926
  if type_div:
917
- print("\n🎨 Type Diversity Analysis:")
918
- print(f" Total types: {type_div['total_types']}")
919
- print(f" Diversity index: {type_div['shannon_diversity']:.3f}")
920
- print(f" Normalized diversity: {type_div['normalized_diversity']:.3f}")
921
- print(f" Distribution balance: {type_div['distribution_balance']:.3f}")
927
+ logger.info("\n🎨 Type Diversity Analysis:")
928
+ logger.info(f" Total types: {type_div['total_types']}")
929
+ logger.info(f" Diversity index: {type_div['shannon_diversity']:.3f}")
930
+ logger.info(f" Normalized diversity: {type_div['normalized_diversity']:.3f}")
931
+ logger.info(f" Distribution balance: {type_div['distribution_balance']:.3f}")
922
932
 
923
933
  # Type distribution
924
- print("\n📋 Type Distribution Details:")
934
+ logger.info("\n📋 Type Distribution Details:")
925
935
  for mem_type, count in sorted(stats["by_type"].items(), key=lambda x: x[1], reverse=True):
926
936
  percentage = count / stats["total_nodes"] * 100
927
- print(f" {mem_type}: {count} nodes ({percentage:.1f}%)")
937
+ logger.info(f" {mem_type}: {count} nodes ({percentage:.1f}%)")
928
938
 
929
939
  # Depth distribution
930
- print("\n📏 Depth Distribution:")
940
+ logger.info("\n📏 Depth Distribution:")
931
941
  for depth in sorted(stats["by_depth"].keys()):
932
942
  count = stats["by_depth"][depth]
933
- print(f" Depth {depth}: {count} nodes")
943
+ logger.info(f" Depth {depth}: {count} nodes")
934
944
 
935
945
  # Major subtree analysis
936
946
  if stats["subtrees"]:
937
- print("\n🌲 Major Subtree Analysis (sorted by quality):")
947
+ logger.info("\n🌲 Major Subtree Analysis (sorted by quality):")
938
948
  sorted_subtrees = sorted(
939
949
  stats["subtrees"], key=lambda x: x.get("quality_score", 0), reverse=True
940
950
  )
941
951
  for i, subtree in enumerate(sorted_subtrees[:8]): # Show first 8
942
952
  quality = subtree.get("quality_score", 0)
943
- print(f" #{i + 1} {subtree['root']} [{subtree['type']}]:")
944
- print(f" Quality score: {quality:.2f}")
945
- print(
953
+ logger.info(f" #{i + 1} {subtree['root']} [{subtree['type']}]:")
954
+ logger.info(f" Quality score: {quality:.2f}")
955
+ logger.info(
946
956
  f" Structure: Depth={subtree['depth']}, Branches={subtree['branches']}, Leaves={subtree['leaves']}"
947
957
  )
948
- print(
958
+ logger.info(
949
959
  f" Density: Branch density={subtree.get('branch_density', 0):.3f}, Leaf ratio={subtree.get('leaf_ratio', 0):.3f}"
950
960
  )
951
961
 
952
962
  if quality > 15:
953
- print(" ✅ High quality subtree")
963
+ logger.info(" ✅ High quality subtree")
954
964
  elif quality > 8:
955
- print(" 🟡 Medium quality subtree")
965
+ logger.info(" 🟡 Medium quality subtree")
956
966
  else:
957
- print(" 🔴 Low quality subtree")
967
+ logger.info(" 🔴 Low quality subtree")
958
968
 
959
- print("\n" + "=" * 60)
969
+ logger.info("\n" + "=" * 60)
960
970
 
961
971
 
962
972
  def remove_embedding_recursive(memory_info: dict) -> Any:
@@ -1152,3 +1162,204 @@ def convert_activation_memory_summary(act_mem_items: list[KVCacheItem]) -> dict[
1152
1162
  "total_parameters": total_parameters,
1153
1163
  "summary": f"Activation memory contains {total_items} items with {total_layers} layers and approximately {total_parameters:,} parameters",
1154
1164
  }
1165
+
1166
+
1167
+ def detect_and_remove_duplicate_ids(tree_node: dict[str, Any]) -> dict[str, Any]:
1168
+ """
1169
+ Detect and remove duplicate IDs in tree structure by skipping duplicate nodes.
1170
+ First occurrence of each ID is kept, subsequent duplicates are removed.
1171
+
1172
+ Args:
1173
+ tree_node: Tree node (dictionary format)
1174
+
1175
+ Returns:
1176
+ dict: Fixed tree node with duplicate nodes removed
1177
+ """
1178
+ used_ids = set()
1179
+ removed_count = 0
1180
+
1181
+ def remove_duplicates_recursive(
1182
+ node: dict[str, Any], parent_path: str = ""
1183
+ ) -> dict[str, Any] | None:
1184
+ """Recursively remove duplicate IDs by skipping duplicate nodes"""
1185
+ nonlocal removed_count
1186
+
1187
+ if not isinstance(node, dict):
1188
+ return node
1189
+
1190
+ # Create node copy
1191
+ fixed_node = node.copy()
1192
+
1193
+ # Handle current node ID
1194
+ current_id = fixed_node.get("id", "")
1195
+ if current_id in used_ids and current_id not in ["root", "WorkingMemory"]:
1196
+ # Skip this duplicate node
1197
+ logger.info(f"Skipping duplicate node: {current_id} (path: {parent_path})")
1198
+ removed_count += 1
1199
+ return None # Return None to indicate this node should be removed
1200
+ else:
1201
+ used_ids.add(current_id)
1202
+
1203
+ # Recursively process child nodes
1204
+ if "children" in fixed_node and isinstance(fixed_node["children"], list):
1205
+ fixed_children = []
1206
+ for i, child in enumerate(fixed_node["children"]):
1207
+ child_path = f"{parent_path}/{fixed_node.get('node_name', 'unknown')}[{i}]"
1208
+ fixed_child = remove_duplicates_recursive(child, child_path)
1209
+ if fixed_child is not None: # Only add non-None children
1210
+ fixed_children.append(fixed_child)
1211
+ fixed_node["children"] = fixed_children
1212
+
1213
+ return fixed_node
1214
+
1215
+ result = remove_duplicates_recursive(tree_node)
1216
+ if result is not None:
1217
+ logger.info(f"Removed {removed_count} duplicate nodes")
1218
+ return result
1219
+ else:
1220
+ # If root node itself was removed (shouldn't happen), return empty root
1221
+ return {
1222
+ "id": "root",
1223
+ "node_name": "root",
1224
+ "value": "root",
1225
+ "memory_type": "Root",
1226
+ "children": [],
1227
+ }
1228
+
1229
+
1230
+ def validate_tree_structure(tree_node: dict[str, Any]) -> dict[str, Any]:
1231
+ """
1232
+ Validate tree structure integrity, including ID uniqueness check
1233
+
1234
+ Args:
1235
+ tree_node: Tree node (dictionary format)
1236
+
1237
+ Returns:
1238
+ dict: Validation result containing error messages and fix suggestions
1239
+ """
1240
+ validation_result = {
1241
+ "is_valid": True,
1242
+ "errors": [],
1243
+ "warnings": [],
1244
+ "total_nodes": 0,
1245
+ "unique_ids": set(),
1246
+ "duplicate_ids": set(),
1247
+ "missing_ids": set(),
1248
+ "invalid_structure": [],
1249
+ }
1250
+
1251
+ def validate_recursive(node: dict[str, Any], path: str = "", depth: int = 0):
1252
+ """Recursively validate tree structure"""
1253
+ if not isinstance(node, dict):
1254
+ validation_result["errors"].append(f"Node is not a dictionary: {path}")
1255
+ validation_result["is_valid"] = False
1256
+ return
1257
+
1258
+ validation_result["total_nodes"] += 1
1259
+
1260
+ # Check required fields
1261
+ if "id" not in node:
1262
+ validation_result["errors"].append(f"Node missing ID field: {path}")
1263
+ validation_result["missing_ids"].add(path)
1264
+ validation_result["is_valid"] = False
1265
+ else:
1266
+ node_id = node["id"]
1267
+ if node_id in validation_result["unique_ids"]:
1268
+ validation_result["errors"].append(f"Duplicate node ID: {node_id} (path: {path})")
1269
+ validation_result["duplicate_ids"].add(node_id)
1270
+ validation_result["is_valid"] = False
1271
+ else:
1272
+ validation_result["unique_ids"].add(node_id)
1273
+
1274
+ # Check other required fields
1275
+ required_fields = ["node_name", "value", "memory_type"]
1276
+ for field in required_fields:
1277
+ if field not in node:
1278
+ validation_result["warnings"].append(f"Node missing field '{field}': {path}")
1279
+
1280
+ # Recursively validate child nodes
1281
+ if "children" in node:
1282
+ if not isinstance(node["children"], list):
1283
+ validation_result["errors"].append(f"Children field is not a list: {path}")
1284
+ validation_result["is_valid"] = False
1285
+ else:
1286
+ for i, child in enumerate(node["children"]):
1287
+ child_path = f"{path}/children[{i}]"
1288
+ validate_recursive(child, child_path, depth + 1)
1289
+
1290
+ # Check depth limit
1291
+ if depth > 20:
1292
+ validation_result["warnings"].append(f"Tree depth too deep ({depth}): {path}")
1293
+
1294
+ validate_recursive(tree_node)
1295
+
1296
+ # Generate fix suggestions
1297
+ if validation_result["duplicate_ids"]:
1298
+ validation_result["fix_suggestion"] = (
1299
+ "Use detect_and_fix_duplicate_ids() function to fix duplicate IDs"
1300
+ )
1301
+
1302
+ return validation_result
1303
+
1304
+
1305
+ def ensure_unique_tree_ids(tree_result: dict[str, Any]) -> dict[str, Any]:
1306
+ """
1307
+ Ensure all node IDs in tree structure are unique by removing duplicate nodes,
1308
+ this is a post-processing function for convert_graph_to_tree_forworkmem
1309
+
1310
+ Args:
1311
+ tree_result: Tree structure returned by convert_graph_to_tree_forworkmem
1312
+
1313
+ Returns:
1314
+ dict: Fixed tree structure with duplicate nodes removed
1315
+ """
1316
+ logger.info("🔍 Starting duplicate ID check in tree structure...")
1317
+
1318
+ # First validate tree structure
1319
+ validation = validate_tree_structure(tree_result)
1320
+
1321
+ if validation["is_valid"]:
1322
+ logger.info("Tree structure validation passed, no duplicate IDs found")
1323
+ return tree_result
1324
+
1325
+ # Report issues
1326
+ logger.info(f"Found {len(validation['errors'])} errors:")
1327
+ for error in validation["errors"][:5]: # Only show first 5 errors
1328
+ logger.info(f" - {error}")
1329
+
1330
+ if len(validation["errors"]) > 5:
1331
+ logger.info(f" ... and {len(validation['errors']) - 5} more errors")
1332
+
1333
+ logger.info("Statistics:")
1334
+ logger.info(f" - Total nodes: {validation['total_nodes']}")
1335
+ logger.info(f" - Unique IDs: {len(validation['unique_ids'])}")
1336
+ logger.info(f" - Duplicate IDs: {len(validation['duplicate_ids'])}")
1337
+
1338
+ # Remove duplicate nodes
1339
+ logger.info(" Starting duplicate node removal...")
1340
+ fixed_tree = detect_and_remove_duplicate_ids(tree_result)
1341
+
1342
+ # Validate again
1343
+ post_validation = validate_tree_structure(fixed_tree)
1344
+ if post_validation["is_valid"]:
1345
+ logger.info("Removal completed, tree structure is now valid")
1346
+ logger.info(f"Final node count: {post_validation['total_nodes']}")
1347
+ else:
1348
+ logger.info("Issues remain after removal, please check code logic")
1349
+ for error in post_validation["errors"][:3]:
1350
+ logger.info(f" - {error}")
1351
+
1352
+ return fixed_tree
1353
+
1354
+
1355
+ def clean_json_response(response: str) -> str:
1356
+ """
1357
+ Remove markdown JSON code block formatting from LLM response.
1358
+
1359
+ Args:
1360
+ response: Raw response string that may contain ```json and ```
1361
+
1362
+ Returns:
1363
+ str: Clean JSON string without markdown formatting
1364
+ """
1365
+ return response.replace("```json", "").replace("```", "").strip()