MemoryOS 0.2.1__py3-none-any.whl → 1.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/METADATA +7 -1
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/RECORD +87 -64
- memos/__init__.py +1 -1
- memos/api/config.py +158 -69
- memos/api/context/context.py +147 -0
- memos/api/context/dependencies.py +101 -0
- memos/api/product_models.py +5 -1
- memos/api/routers/product_router.py +54 -26
- memos/configs/graph_db.py +49 -1
- memos/configs/internet_retriever.py +19 -0
- memos/configs/mem_os.py +5 -0
- memos/configs/mem_reader.py +9 -0
- memos/configs/mem_scheduler.py +54 -18
- memos/configs/mem_user.py +58 -0
- memos/graph_dbs/base.py +38 -3
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/nebular.py +1612 -0
- memos/graph_dbs/neo4j.py +18 -9
- memos/log.py +6 -1
- memos/mem_cube/utils.py +13 -6
- memos/mem_os/core.py +157 -37
- memos/mem_os/main.py +2 -2
- memos/mem_os/product.py +252 -201
- memos/mem_os/utils/default_config.py +1 -1
- memos/mem_os/utils/format_utils.py +281 -70
- memos/mem_os/utils/reference_utils.py +133 -0
- memos/mem_reader/simple_struct.py +13 -5
- memos/mem_scheduler/base_scheduler.py +239 -266
- memos/mem_scheduler/{modules → general_modules}/base.py +4 -5
- memos/mem_scheduler/{modules → general_modules}/dispatcher.py +57 -21
- memos/mem_scheduler/general_modules/misc.py +104 -0
- memos/mem_scheduler/{modules → general_modules}/rabbitmq_service.py +12 -10
- memos/mem_scheduler/{modules → general_modules}/redis_service.py +1 -1
- memos/mem_scheduler/general_modules/retriever.py +199 -0
- memos/mem_scheduler/general_modules/scheduler_logger.py +261 -0
- memos/mem_scheduler/general_scheduler.py +243 -80
- memos/mem_scheduler/monitors/__init__.py +0 -0
- memos/mem_scheduler/monitors/dispatcher_monitor.py +305 -0
- memos/mem_scheduler/{modules/monitor.py → monitors/general_monitor.py} +106 -57
- memos/mem_scheduler/mos_for_test_scheduler.py +23 -20
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/general_schemas.py +44 -0
- memos/mem_scheduler/schemas/message_schemas.py +149 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +337 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/misc_utils.py +102 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +500 -0
- memos/mem_user/persistent_factory.py +96 -0
- memos/mem_user/user_manager.py +4 -4
- memos/memories/activation/item.py +5 -1
- memos/memories/activation/kv.py +20 -8
- memos/memories/textual/base.py +2 -2
- memos/memories/textual/general.py +36 -92
- memos/memories/textual/item.py +5 -33
- memos/memories/textual/tree.py +13 -7
- memos/memories/textual/tree_text_memory/organize/{conflict.py → handler.py} +34 -50
- memos/memories/textual/tree_text_memory/organize/manager.py +8 -96
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +49 -43
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +107 -142
- memos/memories/textual/tree_text_memory/retrieve/bochasearch.py +229 -0
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +11 -0
- memos/memories/textual/tree_text_memory/retrieve/recall.py +15 -8
- memos/memories/textual/tree_text_memory/retrieve/reranker.py +1 -1
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +191 -116
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +47 -15
- memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
- memos/memos_tools/dinding_report_bot.py +422 -0
- memos/memos_tools/lockfree_dict.py +120 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +96 -0
- memos/memos_tools/thread_safe_dict.py +288 -0
- memos/settings.py +3 -1
- memos/templates/mem_reader_prompts.py +4 -1
- memos/templates/mem_scheduler_prompts.py +62 -15
- memos/templates/mos_prompts.py +116 -0
- memos/templates/tree_reorganize_prompts.py +24 -17
- memos/utils.py +19 -0
- memos/mem_scheduler/modules/misc.py +0 -39
- memos/mem_scheduler/modules/retriever.py +0 -268
- memos/mem_scheduler/modules/schemas.py +0 -328
- memos/mem_scheduler/utils.py +0 -75
- memos/memories/textual/tree_text_memory/organize/redundancy.py +0 -193
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/LICENSE +0 -0
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/WHEEL +0 -0
- {memoryos-0.2.1.dist-info → memoryos-1.0.0.dist-info}/entry_points.txt +0 -0
- /memos/mem_scheduler/{modules → general_modules}/__init__.py +0 -0
|
@@ -112,7 +112,7 @@ def get_default_config(
|
|
|
112
112
|
"thread_pool_max_workers": kwargs.get("scheduler_thread_pool_max_workers", 10),
|
|
113
113
|
"consume_interval_seconds": kwargs.get("scheduler_consume_interval_seconds", 3),
|
|
114
114
|
"enable_parallel_dispatch": kwargs.get("scheduler_enable_parallel_dispatch", True),
|
|
115
|
-
"
|
|
115
|
+
"enable_activation_memory": True,
|
|
116
116
|
},
|
|
117
117
|
}
|
|
118
118
|
|
|
@@ -239,10 +239,10 @@ def sample_nodes_with_type_balance(
|
|
|
239
239
|
"MetaMemory": 0.05, # 5%
|
|
240
240
|
}
|
|
241
241
|
|
|
242
|
-
|
|
242
|
+
logger.info(
|
|
243
243
|
f"Starting type-balanced sampling, original nodes: {len(nodes)}, target nodes: {target_count}"
|
|
244
244
|
)
|
|
245
|
-
|
|
245
|
+
logger.info(f"Target type ratios: {type_ratios}")
|
|
246
246
|
|
|
247
247
|
# Analyze current node type distribution
|
|
248
248
|
current_type_counts = {}
|
|
@@ -255,7 +255,7 @@ def sample_nodes_with_type_balance(
|
|
|
255
255
|
nodes_by_type[memory_type] = []
|
|
256
256
|
nodes_by_type[memory_type].append(node)
|
|
257
257
|
|
|
258
|
-
|
|
258
|
+
logger.info(f"Current type distribution: {current_type_counts}")
|
|
259
259
|
|
|
260
260
|
# Calculate target node count for each type
|
|
261
261
|
type_targets = {}
|
|
@@ -290,7 +290,7 @@ def sample_nodes_with_type_balance(
|
|
|
290
290
|
)
|
|
291
291
|
type_targets[memory_type] = type_targets.get(memory_type, 0) + additional
|
|
292
292
|
|
|
293
|
-
|
|
293
|
+
logger.info(f"Target node count for each type: {type_targets}")
|
|
294
294
|
|
|
295
295
|
# Perform subtree quality sampling for each type
|
|
296
296
|
selected_nodes = []
|
|
@@ -300,16 +300,18 @@ def sample_nodes_with_type_balance(
|
|
|
300
300
|
continue
|
|
301
301
|
|
|
302
302
|
type_nodes = nodes_by_type[memory_type]
|
|
303
|
-
|
|
303
|
+
logger.info(
|
|
304
|
+
f"\n--- Processing {memory_type} type: {len(type_nodes)} -> {target_for_type} ---"
|
|
305
|
+
)
|
|
304
306
|
|
|
305
307
|
if len(type_nodes) <= target_for_type:
|
|
306
308
|
selected_nodes.extend(type_nodes)
|
|
307
|
-
|
|
309
|
+
logger.info(f" Select all: {len(type_nodes)} nodes")
|
|
308
310
|
else:
|
|
309
311
|
# Use enhanced subtree quality sampling
|
|
310
312
|
type_selected = sample_by_enhanced_subtree_quality(type_nodes, edges, target_for_type)
|
|
311
313
|
selected_nodes.extend(type_selected)
|
|
312
|
-
|
|
314
|
+
logger.info(f" Sampled selection: {len(type_selected)} nodes")
|
|
313
315
|
|
|
314
316
|
# Filter edges
|
|
315
317
|
selected_node_ids = {node["id"] for node in selected_nodes}
|
|
@@ -319,8 +321,8 @@ def sample_nodes_with_type_balance(
|
|
|
319
321
|
if edge["source"] in selected_node_ids and edge["target"] in selected_node_ids
|
|
320
322
|
]
|
|
321
323
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
+
logger.info(f"\nFinal selected nodes: {len(selected_nodes)}")
|
|
325
|
+
logger.info(f"Final edges: {len(filtered_edges)}")
|
|
324
326
|
|
|
325
327
|
# Verify final type distribution
|
|
326
328
|
final_type_counts = {}
|
|
@@ -328,11 +330,11 @@ def sample_nodes_with_type_balance(
|
|
|
328
330
|
memory_type = node.get("metadata", {}).get("memory_type", "Unknown")
|
|
329
331
|
final_type_counts[memory_type] = final_type_counts.get(memory_type, 0) + 1
|
|
330
332
|
|
|
331
|
-
|
|
333
|
+
logger.info(f"Final type distribution: {final_type_counts}")
|
|
332
334
|
for memory_type, count in final_type_counts.items():
|
|
333
335
|
percentage = count / len(selected_nodes) * 100
|
|
334
336
|
target_percentage = type_ratios.get(memory_type, 0) * 100
|
|
335
|
-
|
|
337
|
+
logger.info(
|
|
336
338
|
f" {memory_type}: {count} nodes ({percentage:.1f}%, target: {target_percentage:.1f}%)"
|
|
337
339
|
)
|
|
338
340
|
|
|
@@ -358,9 +360,9 @@ def sample_by_enhanced_subtree_quality(
|
|
|
358
360
|
subtree_analysis.items(), key=lambda x: x[1]["quality_score"], reverse=True
|
|
359
361
|
)
|
|
360
362
|
|
|
361
|
-
|
|
363
|
+
logger.info(" Subtree quality ranking:")
|
|
362
364
|
for i, (root_id, analysis) in enumerate(sorted_subtrees[:5]):
|
|
363
|
-
|
|
365
|
+
logger.info(
|
|
364
366
|
f" #{i + 1} Root node {root_id}: Quality={analysis['quality_score']:.2f}, "
|
|
365
367
|
f"Depth={analysis['max_depth']}, Branches={analysis['branch_nodes']}, "
|
|
366
368
|
f"Leaves={analysis['leaf_count']}, Max Width={analysis['max_width']}"
|
|
@@ -386,7 +388,7 @@ def sample_by_enhanced_subtree_quality(
|
|
|
386
388
|
if node:
|
|
387
389
|
selected_nodes.append(node)
|
|
388
390
|
selected_node_ids.add(node_id)
|
|
389
|
-
|
|
391
|
+
logger.info(f" Select entire subtree {root_id}: +{len(new_nodes)} nodes")
|
|
390
392
|
else:
|
|
391
393
|
# Subtree too large, need partial selection
|
|
392
394
|
if analysis["quality_score"] > 5: # Only partial selection for high-quality subtrees
|
|
@@ -398,7 +400,7 @@ def sample_by_enhanced_subtree_quality(
|
|
|
398
400
|
selected_nodes.extend(partial_selection)
|
|
399
401
|
for node in partial_selection:
|
|
400
402
|
selected_node_ids.add(node["id"])
|
|
401
|
-
|
|
403
|
+
logger.info(
|
|
402
404
|
f" Partial selection of subtree {root_id}: +{len(partial_selection)} nodes"
|
|
403
405
|
)
|
|
404
406
|
|
|
@@ -411,7 +413,7 @@ def sample_by_enhanced_subtree_quality(
|
|
|
411
413
|
remaining_count = target_count - len(selected_nodes)
|
|
412
414
|
additional = sample_nodes_by_importance(remaining_nodes, edges, remaining_count)
|
|
413
415
|
selected_nodes.extend(additional)
|
|
414
|
-
|
|
416
|
+
logger.info(f" Supplementary selection: +{len(additional)} nodes")
|
|
415
417
|
|
|
416
418
|
return selected_nodes
|
|
417
419
|
|
|
@@ -493,7 +495,7 @@ def sample_nodes_by_importance(
|
|
|
493
495
|
# Modified main function to use new sampling strategy
|
|
494
496
|
def convert_graph_to_tree_forworkmem(
|
|
495
497
|
json_data: dict[str, Any],
|
|
496
|
-
target_node_count: int =
|
|
498
|
+
target_node_count: int = 200,
|
|
497
499
|
type_ratios: dict[str, float] | None = None,
|
|
498
500
|
) -> dict[str, Any]:
|
|
499
501
|
"""
|
|
@@ -502,8 +504,8 @@ def convert_graph_to_tree_forworkmem(
|
|
|
502
504
|
original_nodes = json_data.get("nodes", [])
|
|
503
505
|
original_edges = json_data.get("edges", [])
|
|
504
506
|
|
|
505
|
-
|
|
506
|
-
|
|
507
|
+
logger.info(f"Original node count: {len(original_nodes)}")
|
|
508
|
+
logger.info(f"Target node count: {target_node_count}")
|
|
507
509
|
filter_original_edges = []
|
|
508
510
|
for original_edge in original_edges:
|
|
509
511
|
if original_edge["type"] == "PARENT":
|
|
@@ -531,7 +533,7 @@ def convert_graph_to_tree_forworkmem(
|
|
|
531
533
|
node_name = extract_node_name(memory)
|
|
532
534
|
memory_key = node.get("metadata", {}).get("key", node_name)
|
|
533
535
|
usage = node.get("metadata", {}).get("usage", [])
|
|
534
|
-
frequency = len(usage)
|
|
536
|
+
frequency = len(usage) if len(usage) < 100 else 100
|
|
535
537
|
node_map[node["id"]] = {
|
|
536
538
|
"id": node["id"],
|
|
537
539
|
"value": memory,
|
|
@@ -568,15 +570,23 @@ def convert_graph_to_tree_forworkmem(
|
|
|
568
570
|
else:
|
|
569
571
|
other_roots.append(root_id)
|
|
570
572
|
|
|
571
|
-
def build_tree(node_id: str) -> dict[str, Any]:
|
|
572
|
-
"""Recursively build tree structure"""
|
|
573
|
+
def build_tree(node_id: str, visited=None) -> dict[str, Any] | None:
|
|
574
|
+
"""Recursively build tree structure with cycle detection"""
|
|
575
|
+
if visited is None:
|
|
576
|
+
visited = set()
|
|
577
|
+
|
|
578
|
+
if node_id in visited:
|
|
579
|
+
logger.warning(f"[build_tree] Detected cycle at node {node_id}, skipping.")
|
|
580
|
+
return None
|
|
581
|
+
visited.add(node_id)
|
|
582
|
+
|
|
573
583
|
if node_id not in node_map:
|
|
574
584
|
return None
|
|
575
585
|
|
|
576
586
|
children_ids = children_map.get(node_id, [])
|
|
577
587
|
children = []
|
|
578
588
|
for child_id in children_ids:
|
|
579
|
-
child_tree = build_tree(child_id)
|
|
589
|
+
child_tree = build_tree(child_id, visited)
|
|
580
590
|
if child_tree:
|
|
581
591
|
children.append(child_tree)
|
|
582
592
|
|
|
@@ -633,7 +643,7 @@ def convert_graph_to_tree_forworkmem(
|
|
|
633
643
|
|
|
634
644
|
|
|
635
645
|
def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int = 5):
|
|
636
|
-
"""
|
|
646
|
+
"""logger.info the first few layers of tree structure for easy viewing"""
|
|
637
647
|
if level > max_level:
|
|
638
648
|
return
|
|
639
649
|
|
|
@@ -647,21 +657,21 @@ def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int =
|
|
|
647
657
|
children = node.get("children", [])
|
|
648
658
|
if children:
|
|
649
659
|
# Intermediate node, display name, type and child count
|
|
650
|
-
|
|
651
|
-
|
|
660
|
+
logger.info(f"{indent}- {node_name} [{memory_type}] ({len(children)} children)")
|
|
661
|
+
logger.info(f"{indent} ID: {node_id}")
|
|
652
662
|
display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
|
|
653
|
-
|
|
663
|
+
logger.info(f"{indent} Value: {display_value}")
|
|
654
664
|
|
|
655
665
|
if level < max_level:
|
|
656
666
|
for child in children:
|
|
657
667
|
print_tree_structure(child, level + 1, max_level)
|
|
658
668
|
elif level == max_level:
|
|
659
|
-
|
|
669
|
+
logger.info(f"{indent} ... (expansion limited)")
|
|
660
670
|
else:
|
|
661
671
|
# Leaf node, display name, type and value
|
|
662
672
|
display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
|
|
663
|
-
|
|
664
|
-
|
|
673
|
+
logger.info(f"{indent}- {node_name} [{memory_type}]: {display_value}")
|
|
674
|
+
logger.info(f"{indent} ID: {node_id}")
|
|
665
675
|
|
|
666
676
|
|
|
667
677
|
def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
|
|
@@ -856,107 +866,107 @@ def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
|
|
|
856
866
|
|
|
857
867
|
|
|
858
868
|
def print_tree_analysis(tree_data: dict[str, Any]):
|
|
859
|
-
"""
|
|
869
|
+
"""logger.info enhanced tree analysis results"""
|
|
860
870
|
stats = analyze_final_tree_quality(tree_data)
|
|
861
871
|
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
872
|
+
logger.info("\n" + "=" * 60)
|
|
873
|
+
logger.info("🌳 Enhanced Tree Structure Quality Analysis Report")
|
|
874
|
+
logger.info("=" * 60)
|
|
865
875
|
|
|
866
876
|
# Basic statistics
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
877
|
+
logger.info("\n📊 Basic Statistics:")
|
|
878
|
+
logger.info(f" Total nodes: {stats['total_nodes']}")
|
|
879
|
+
logger.info(f" Max depth: {stats['max_depth']}")
|
|
880
|
+
logger.info(
|
|
871
881
|
f" Leaf nodes: {stats['total_leaves']} ({stats['total_leaves'] / stats['total_nodes'] * 100:.1f}%)"
|
|
872
882
|
)
|
|
873
|
-
|
|
883
|
+
logger.info(
|
|
874
884
|
f" Branch nodes: {stats['total_branches']} ({stats['total_branches'] / stats['total_nodes'] * 100:.1f}%)"
|
|
875
885
|
)
|
|
876
886
|
|
|
877
887
|
# Structure quality assessment
|
|
878
888
|
structure = stats.get("structure_quality", {})
|
|
879
889
|
if structure:
|
|
880
|
-
|
|
881
|
-
|
|
890
|
+
logger.info("\n🏗️ Structure Quality Assessment:")
|
|
891
|
+
logger.info(
|
|
882
892
|
f" Branch density: {structure['branch_density']:.3f} ({'✅ Good' if 0.2 <= structure['branch_density'] <= 0.6 else '⚠️ Needs improvement'})"
|
|
883
893
|
)
|
|
884
|
-
|
|
894
|
+
logger.info(
|
|
885
895
|
f" Leaf ratio: {structure['leaf_ratio']:.3f} ({'✅ Good' if 0.3 <= structure['leaf_ratio'] <= 0.7 else '⚠️ Needs improvement'})"
|
|
886
896
|
)
|
|
887
|
-
|
|
888
|
-
|
|
897
|
+
logger.info(f" Max width: {structure['max_width']}")
|
|
898
|
+
logger.info(
|
|
889
899
|
f" Depth-width ratio: {structure['depth_width_ratio']:.2f} ({'✅ Good' if structure['depth_width_ratio'] <= 3 else '⚠️ Too thin'})"
|
|
890
900
|
)
|
|
891
|
-
|
|
901
|
+
logger.info(
|
|
892
902
|
f" Overall balance: {'✅ Good' if structure['is_well_balanced'] else '⚠️ Needs improvement'}"
|
|
893
903
|
)
|
|
894
904
|
|
|
895
905
|
# Single chain analysis
|
|
896
906
|
chain_analysis = stats.get("chain_analysis", {})
|
|
897
907
|
if chain_analysis:
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
908
|
+
logger.info("\n🔗 Single Chain Structure Analysis:")
|
|
909
|
+
logger.info(f" Longest chain: {chain_analysis.get('max_chain_length', 0)} layers")
|
|
910
|
+
logger.info(f" Single chain subtrees: {chain_analysis.get('single_chain_subtrees', 0)}")
|
|
911
|
+
logger.info(
|
|
902
912
|
f" Single chain subtree ratio: {chain_analysis.get('chain_subtree_ratio', 0) * 100:.1f}%"
|
|
903
913
|
)
|
|
904
914
|
|
|
905
915
|
if chain_analysis.get("max_chain_length", 0) > 5:
|
|
906
|
-
|
|
916
|
+
logger.info(" ⚠️ Warning: Overly long single chain structure may affect display")
|
|
907
917
|
elif chain_analysis.get("chain_subtree_ratio", 0) > 0.3:
|
|
908
|
-
|
|
918
|
+
logger.info(
|
|
909
919
|
" ⚠️ Warning: Too many single chain subtrees, suggest increasing branch structure"
|
|
910
920
|
)
|
|
911
921
|
else:
|
|
912
|
-
|
|
922
|
+
logger.info(" ✅ Single chain structure well controlled")
|
|
913
923
|
|
|
914
924
|
# Type diversity
|
|
915
925
|
type_div = stats.get("type_diversity", {})
|
|
916
926
|
if type_div:
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
927
|
+
logger.info("\n🎨 Type Diversity Analysis:")
|
|
928
|
+
logger.info(f" Total types: {type_div['total_types']}")
|
|
929
|
+
logger.info(f" Diversity index: {type_div['shannon_diversity']:.3f}")
|
|
930
|
+
logger.info(f" Normalized diversity: {type_div['normalized_diversity']:.3f}")
|
|
931
|
+
logger.info(f" Distribution balance: {type_div['distribution_balance']:.3f}")
|
|
922
932
|
|
|
923
933
|
# Type distribution
|
|
924
|
-
|
|
934
|
+
logger.info("\n📋 Type Distribution Details:")
|
|
925
935
|
for mem_type, count in sorted(stats["by_type"].items(), key=lambda x: x[1], reverse=True):
|
|
926
936
|
percentage = count / stats["total_nodes"] * 100
|
|
927
|
-
|
|
937
|
+
logger.info(f" {mem_type}: {count} nodes ({percentage:.1f}%)")
|
|
928
938
|
|
|
929
939
|
# Depth distribution
|
|
930
|
-
|
|
940
|
+
logger.info("\n📏 Depth Distribution:")
|
|
931
941
|
for depth in sorted(stats["by_depth"].keys()):
|
|
932
942
|
count = stats["by_depth"][depth]
|
|
933
|
-
|
|
943
|
+
logger.info(f" Depth {depth}: {count} nodes")
|
|
934
944
|
|
|
935
945
|
# Major subtree analysis
|
|
936
946
|
if stats["subtrees"]:
|
|
937
|
-
|
|
947
|
+
logger.info("\n🌲 Major Subtree Analysis (sorted by quality):")
|
|
938
948
|
sorted_subtrees = sorted(
|
|
939
949
|
stats["subtrees"], key=lambda x: x.get("quality_score", 0), reverse=True
|
|
940
950
|
)
|
|
941
951
|
for i, subtree in enumerate(sorted_subtrees[:8]): # Show first 8
|
|
942
952
|
quality = subtree.get("quality_score", 0)
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
953
|
+
logger.info(f" #{i + 1} {subtree['root']} [{subtree['type']}]:")
|
|
954
|
+
logger.info(f" Quality score: {quality:.2f}")
|
|
955
|
+
logger.info(
|
|
946
956
|
f" Structure: Depth={subtree['depth']}, Branches={subtree['branches']}, Leaves={subtree['leaves']}"
|
|
947
957
|
)
|
|
948
|
-
|
|
958
|
+
logger.info(
|
|
949
959
|
f" Density: Branch density={subtree.get('branch_density', 0):.3f}, Leaf ratio={subtree.get('leaf_ratio', 0):.3f}"
|
|
950
960
|
)
|
|
951
961
|
|
|
952
962
|
if quality > 15:
|
|
953
|
-
|
|
963
|
+
logger.info(" ✅ High quality subtree")
|
|
954
964
|
elif quality > 8:
|
|
955
|
-
|
|
965
|
+
logger.info(" 🟡 Medium quality subtree")
|
|
956
966
|
else:
|
|
957
|
-
|
|
967
|
+
logger.info(" 🔴 Low quality subtree")
|
|
958
968
|
|
|
959
|
-
|
|
969
|
+
logger.info("\n" + "=" * 60)
|
|
960
970
|
|
|
961
971
|
|
|
962
972
|
def remove_embedding_recursive(memory_info: dict) -> Any:
|
|
@@ -1152,3 +1162,204 @@ def convert_activation_memory_summary(act_mem_items: list[KVCacheItem]) -> dict[
|
|
|
1152
1162
|
"total_parameters": total_parameters,
|
|
1153
1163
|
"summary": f"Activation memory contains {total_items} items with {total_layers} layers and approximately {total_parameters:,} parameters",
|
|
1154
1164
|
}
|
|
1165
|
+
|
|
1166
|
+
|
|
1167
|
+
def detect_and_remove_duplicate_ids(tree_node: dict[str, Any]) -> dict[str, Any]:
|
|
1168
|
+
"""
|
|
1169
|
+
Detect and remove duplicate IDs in tree structure by skipping duplicate nodes.
|
|
1170
|
+
First occurrence of each ID is kept, subsequent duplicates are removed.
|
|
1171
|
+
|
|
1172
|
+
Args:
|
|
1173
|
+
tree_node: Tree node (dictionary format)
|
|
1174
|
+
|
|
1175
|
+
Returns:
|
|
1176
|
+
dict: Fixed tree node with duplicate nodes removed
|
|
1177
|
+
"""
|
|
1178
|
+
used_ids = set()
|
|
1179
|
+
removed_count = 0
|
|
1180
|
+
|
|
1181
|
+
def remove_duplicates_recursive(
|
|
1182
|
+
node: dict[str, Any], parent_path: str = ""
|
|
1183
|
+
) -> dict[str, Any] | None:
|
|
1184
|
+
"""Recursively remove duplicate IDs by skipping duplicate nodes"""
|
|
1185
|
+
nonlocal removed_count
|
|
1186
|
+
|
|
1187
|
+
if not isinstance(node, dict):
|
|
1188
|
+
return node
|
|
1189
|
+
|
|
1190
|
+
# Create node copy
|
|
1191
|
+
fixed_node = node.copy()
|
|
1192
|
+
|
|
1193
|
+
# Handle current node ID
|
|
1194
|
+
current_id = fixed_node.get("id", "")
|
|
1195
|
+
if current_id in used_ids and current_id not in ["root", "WorkingMemory"]:
|
|
1196
|
+
# Skip this duplicate node
|
|
1197
|
+
logger.info(f"Skipping duplicate node: {current_id} (path: {parent_path})")
|
|
1198
|
+
removed_count += 1
|
|
1199
|
+
return None # Return None to indicate this node should be removed
|
|
1200
|
+
else:
|
|
1201
|
+
used_ids.add(current_id)
|
|
1202
|
+
|
|
1203
|
+
# Recursively process child nodes
|
|
1204
|
+
if "children" in fixed_node and isinstance(fixed_node["children"], list):
|
|
1205
|
+
fixed_children = []
|
|
1206
|
+
for i, child in enumerate(fixed_node["children"]):
|
|
1207
|
+
child_path = f"{parent_path}/{fixed_node.get('node_name', 'unknown')}[{i}]"
|
|
1208
|
+
fixed_child = remove_duplicates_recursive(child, child_path)
|
|
1209
|
+
if fixed_child is not None: # Only add non-None children
|
|
1210
|
+
fixed_children.append(fixed_child)
|
|
1211
|
+
fixed_node["children"] = fixed_children
|
|
1212
|
+
|
|
1213
|
+
return fixed_node
|
|
1214
|
+
|
|
1215
|
+
result = remove_duplicates_recursive(tree_node)
|
|
1216
|
+
if result is not None:
|
|
1217
|
+
logger.info(f"Removed {removed_count} duplicate nodes")
|
|
1218
|
+
return result
|
|
1219
|
+
else:
|
|
1220
|
+
# If root node itself was removed (shouldn't happen), return empty root
|
|
1221
|
+
return {
|
|
1222
|
+
"id": "root",
|
|
1223
|
+
"node_name": "root",
|
|
1224
|
+
"value": "root",
|
|
1225
|
+
"memory_type": "Root",
|
|
1226
|
+
"children": [],
|
|
1227
|
+
}
|
|
1228
|
+
|
|
1229
|
+
|
|
1230
|
+
def validate_tree_structure(tree_node: dict[str, Any]) -> dict[str, Any]:
|
|
1231
|
+
"""
|
|
1232
|
+
Validate tree structure integrity, including ID uniqueness check
|
|
1233
|
+
|
|
1234
|
+
Args:
|
|
1235
|
+
tree_node: Tree node (dictionary format)
|
|
1236
|
+
|
|
1237
|
+
Returns:
|
|
1238
|
+
dict: Validation result containing error messages and fix suggestions
|
|
1239
|
+
"""
|
|
1240
|
+
validation_result = {
|
|
1241
|
+
"is_valid": True,
|
|
1242
|
+
"errors": [],
|
|
1243
|
+
"warnings": [],
|
|
1244
|
+
"total_nodes": 0,
|
|
1245
|
+
"unique_ids": set(),
|
|
1246
|
+
"duplicate_ids": set(),
|
|
1247
|
+
"missing_ids": set(),
|
|
1248
|
+
"invalid_structure": [],
|
|
1249
|
+
}
|
|
1250
|
+
|
|
1251
|
+
def validate_recursive(node: dict[str, Any], path: str = "", depth: int = 0):
|
|
1252
|
+
"""Recursively validate tree structure"""
|
|
1253
|
+
if not isinstance(node, dict):
|
|
1254
|
+
validation_result["errors"].append(f"Node is not a dictionary: {path}")
|
|
1255
|
+
validation_result["is_valid"] = False
|
|
1256
|
+
return
|
|
1257
|
+
|
|
1258
|
+
validation_result["total_nodes"] += 1
|
|
1259
|
+
|
|
1260
|
+
# Check required fields
|
|
1261
|
+
if "id" not in node:
|
|
1262
|
+
validation_result["errors"].append(f"Node missing ID field: {path}")
|
|
1263
|
+
validation_result["missing_ids"].add(path)
|
|
1264
|
+
validation_result["is_valid"] = False
|
|
1265
|
+
else:
|
|
1266
|
+
node_id = node["id"]
|
|
1267
|
+
if node_id in validation_result["unique_ids"]:
|
|
1268
|
+
validation_result["errors"].append(f"Duplicate node ID: {node_id} (path: {path})")
|
|
1269
|
+
validation_result["duplicate_ids"].add(node_id)
|
|
1270
|
+
validation_result["is_valid"] = False
|
|
1271
|
+
else:
|
|
1272
|
+
validation_result["unique_ids"].add(node_id)
|
|
1273
|
+
|
|
1274
|
+
# Check other required fields
|
|
1275
|
+
required_fields = ["node_name", "value", "memory_type"]
|
|
1276
|
+
for field in required_fields:
|
|
1277
|
+
if field not in node:
|
|
1278
|
+
validation_result["warnings"].append(f"Node missing field '{field}': {path}")
|
|
1279
|
+
|
|
1280
|
+
# Recursively validate child nodes
|
|
1281
|
+
if "children" in node:
|
|
1282
|
+
if not isinstance(node["children"], list):
|
|
1283
|
+
validation_result["errors"].append(f"Children field is not a list: {path}")
|
|
1284
|
+
validation_result["is_valid"] = False
|
|
1285
|
+
else:
|
|
1286
|
+
for i, child in enumerate(node["children"]):
|
|
1287
|
+
child_path = f"{path}/children[{i}]"
|
|
1288
|
+
validate_recursive(child, child_path, depth + 1)
|
|
1289
|
+
|
|
1290
|
+
# Check depth limit
|
|
1291
|
+
if depth > 20:
|
|
1292
|
+
validation_result["warnings"].append(f"Tree depth too deep ({depth}): {path}")
|
|
1293
|
+
|
|
1294
|
+
validate_recursive(tree_node)
|
|
1295
|
+
|
|
1296
|
+
# Generate fix suggestions
|
|
1297
|
+
if validation_result["duplicate_ids"]:
|
|
1298
|
+
validation_result["fix_suggestion"] = (
|
|
1299
|
+
"Use detect_and_fix_duplicate_ids() function to fix duplicate IDs"
|
|
1300
|
+
)
|
|
1301
|
+
|
|
1302
|
+
return validation_result
|
|
1303
|
+
|
|
1304
|
+
|
|
1305
|
+
def ensure_unique_tree_ids(tree_result: dict[str, Any]) -> dict[str, Any]:
|
|
1306
|
+
"""
|
|
1307
|
+
Ensure all node IDs in tree structure are unique by removing duplicate nodes,
|
|
1308
|
+
this is a post-processing function for convert_graph_to_tree_forworkmem
|
|
1309
|
+
|
|
1310
|
+
Args:
|
|
1311
|
+
tree_result: Tree structure returned by convert_graph_to_tree_forworkmem
|
|
1312
|
+
|
|
1313
|
+
Returns:
|
|
1314
|
+
dict: Fixed tree structure with duplicate nodes removed
|
|
1315
|
+
"""
|
|
1316
|
+
logger.info("🔍 Starting duplicate ID check in tree structure...")
|
|
1317
|
+
|
|
1318
|
+
# First validate tree structure
|
|
1319
|
+
validation = validate_tree_structure(tree_result)
|
|
1320
|
+
|
|
1321
|
+
if validation["is_valid"]:
|
|
1322
|
+
logger.info("Tree structure validation passed, no duplicate IDs found")
|
|
1323
|
+
return tree_result
|
|
1324
|
+
|
|
1325
|
+
# Report issues
|
|
1326
|
+
logger.info(f"Found {len(validation['errors'])} errors:")
|
|
1327
|
+
for error in validation["errors"][:5]: # Only show first 5 errors
|
|
1328
|
+
logger.info(f" - {error}")
|
|
1329
|
+
|
|
1330
|
+
if len(validation["errors"]) > 5:
|
|
1331
|
+
logger.info(f" ... and {len(validation['errors']) - 5} more errors")
|
|
1332
|
+
|
|
1333
|
+
logger.info("Statistics:")
|
|
1334
|
+
logger.info(f" - Total nodes: {validation['total_nodes']}")
|
|
1335
|
+
logger.info(f" - Unique IDs: {len(validation['unique_ids'])}")
|
|
1336
|
+
logger.info(f" - Duplicate IDs: {len(validation['duplicate_ids'])}")
|
|
1337
|
+
|
|
1338
|
+
# Remove duplicate nodes
|
|
1339
|
+
logger.info(" Starting duplicate node removal...")
|
|
1340
|
+
fixed_tree = detect_and_remove_duplicate_ids(tree_result)
|
|
1341
|
+
|
|
1342
|
+
# Validate again
|
|
1343
|
+
post_validation = validate_tree_structure(fixed_tree)
|
|
1344
|
+
if post_validation["is_valid"]:
|
|
1345
|
+
logger.info("Removal completed, tree structure is now valid")
|
|
1346
|
+
logger.info(f"Final node count: {post_validation['total_nodes']}")
|
|
1347
|
+
else:
|
|
1348
|
+
logger.info("Issues remain after removal, please check code logic")
|
|
1349
|
+
for error in post_validation["errors"][:3]:
|
|
1350
|
+
logger.info(f" - {error}")
|
|
1351
|
+
|
|
1352
|
+
return fixed_tree
|
|
1353
|
+
|
|
1354
|
+
|
|
1355
|
+
def clean_json_response(response: str) -> str:
|
|
1356
|
+
"""
|
|
1357
|
+
Remove markdown JSON code block formatting from LLM response.
|
|
1358
|
+
|
|
1359
|
+
Args:
|
|
1360
|
+
response: Raw response string that may contain ```json and ```
|
|
1361
|
+
|
|
1362
|
+
Returns:
|
|
1363
|
+
str: Clean JSON string without markdown formatting
|
|
1364
|
+
"""
|
|
1365
|
+
return response.replace("```json", "").replace("```", "").strip()
|