MemoryOS 0.2.1__py3-none-any.whl → 0.2.2__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of MemoryOS might be problematic. Click here for more details.
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/METADATA +2 -1
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/RECORD +72 -55
- memos/__init__.py +1 -1
- memos/api/config.py +156 -65
- memos/api/context/context.py +147 -0
- memos/api/context/dependencies.py +90 -0
- memos/api/product_models.py +5 -1
- memos/api/routers/product_router.py +54 -26
- memos/configs/graph_db.py +49 -1
- memos/configs/internet_retriever.py +6 -0
- memos/configs/mem_os.py +5 -0
- memos/configs/mem_reader.py +9 -0
- memos/configs/mem_scheduler.py +18 -4
- memos/configs/mem_user.py +58 -0
- memos/graph_dbs/base.py +9 -1
- memos/graph_dbs/factory.py +2 -0
- memos/graph_dbs/nebular.py +1364 -0
- memos/graph_dbs/neo4j.py +4 -4
- memos/log.py +1 -1
- memos/mem_cube/utils.py +13 -6
- memos/mem_os/core.py +140 -30
- memos/mem_os/main.py +1 -1
- memos/mem_os/product.py +266 -152
- memos/mem_os/utils/format_utils.py +314 -67
- memos/mem_reader/simple_struct.py +13 -5
- memos/mem_scheduler/base_scheduler.py +220 -250
- memos/mem_scheduler/general_scheduler.py +193 -73
- memos/mem_scheduler/modules/base.py +5 -5
- memos/mem_scheduler/modules/dispatcher.py +6 -9
- memos/mem_scheduler/modules/misc.py +81 -16
- memos/mem_scheduler/modules/monitor.py +52 -41
- memos/mem_scheduler/modules/rabbitmq_service.py +9 -7
- memos/mem_scheduler/modules/retriever.py +108 -191
- memos/mem_scheduler/modules/scheduler_logger.py +255 -0
- memos/mem_scheduler/mos_for_test_scheduler.py +16 -19
- memos/mem_scheduler/schemas/__init__.py +0 -0
- memos/mem_scheduler/schemas/general_schemas.py +43 -0
- memos/mem_scheduler/schemas/message_schemas.py +148 -0
- memos/mem_scheduler/schemas/monitor_schemas.py +329 -0
- memos/mem_scheduler/utils/__init__.py +0 -0
- memos/mem_scheduler/utils/filter_utils.py +176 -0
- memos/mem_scheduler/utils/misc_utils.py +61 -0
- memos/mem_user/factory.py +94 -0
- memos/mem_user/mysql_persistent_user_manager.py +271 -0
- memos/mem_user/mysql_user_manager.py +500 -0
- memos/mem_user/persistent_factory.py +96 -0
- memos/mem_user/user_manager.py +4 -4
- memos/memories/activation/item.py +4 -0
- memos/memories/textual/base.py +1 -1
- memos/memories/textual/general.py +35 -91
- memos/memories/textual/item.py +5 -33
- memos/memories/textual/tree.py +13 -7
- memos/memories/textual/tree_text_memory/organize/conflict.py +4 -2
- memos/memories/textual/tree_text_memory/organize/relation_reason_detector.py +47 -43
- memos/memories/textual/tree_text_memory/organize/reorganizer.py +8 -5
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever.py +6 -3
- memos/memories/textual/tree_text_memory/retrieve/internet_retriever_factory.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/retrieval_mid_structs.py +2 -0
- memos/memories/textual/tree_text_memory/retrieve/searcher.py +46 -23
- memos/memories/textual/tree_text_memory/retrieve/task_goal_parser.py +42 -15
- memos/memories/textual/tree_text_memory/retrieve/utils.py +11 -7
- memos/memories/textual/tree_text_memory/retrieve/xinyusearch.py +62 -58
- memos/memos_tools/dinding_report_bot.py +422 -0
- memos/memos_tools/notification_service.py +44 -0
- memos/memos_tools/notification_utils.py +96 -0
- memos/settings.py +3 -1
- memos/templates/mem_reader_prompts.py +2 -1
- memos/templates/mem_scheduler_prompts.py +41 -7
- memos/templates/mos_prompts.py +87 -0
- memos/mem_scheduler/modules/schemas.py +0 -328
- memos/mem_scheduler/utils.py +0 -75
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/LICENSE +0 -0
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/WHEEL +0 -0
- {memoryos-0.2.1.dist-info → memoryos-0.2.2.dist-info}/entry_points.txt +0 -0
|
@@ -239,10 +239,10 @@ def sample_nodes_with_type_balance(
|
|
|
239
239
|
"MetaMemory": 0.05, # 5%
|
|
240
240
|
}
|
|
241
241
|
|
|
242
|
-
|
|
242
|
+
logger.info(
|
|
243
243
|
f"Starting type-balanced sampling, original nodes: {len(nodes)}, target nodes: {target_count}"
|
|
244
244
|
)
|
|
245
|
-
|
|
245
|
+
logger.info(f"Target type ratios: {type_ratios}")
|
|
246
246
|
|
|
247
247
|
# Analyze current node type distribution
|
|
248
248
|
current_type_counts = {}
|
|
@@ -255,7 +255,7 @@ def sample_nodes_with_type_balance(
|
|
|
255
255
|
nodes_by_type[memory_type] = []
|
|
256
256
|
nodes_by_type[memory_type].append(node)
|
|
257
257
|
|
|
258
|
-
|
|
258
|
+
logger.info(f"Current type distribution: {current_type_counts}")
|
|
259
259
|
|
|
260
260
|
# Calculate target node count for each type
|
|
261
261
|
type_targets = {}
|
|
@@ -290,7 +290,7 @@ def sample_nodes_with_type_balance(
|
|
|
290
290
|
)
|
|
291
291
|
type_targets[memory_type] = type_targets.get(memory_type, 0) + additional
|
|
292
292
|
|
|
293
|
-
|
|
293
|
+
logger.info(f"Target node count for each type: {type_targets}")
|
|
294
294
|
|
|
295
295
|
# Perform subtree quality sampling for each type
|
|
296
296
|
selected_nodes = []
|
|
@@ -300,16 +300,18 @@ def sample_nodes_with_type_balance(
|
|
|
300
300
|
continue
|
|
301
301
|
|
|
302
302
|
type_nodes = nodes_by_type[memory_type]
|
|
303
|
-
|
|
303
|
+
logger.info(
|
|
304
|
+
f"\n--- Processing {memory_type} type: {len(type_nodes)} -> {target_for_type} ---"
|
|
305
|
+
)
|
|
304
306
|
|
|
305
307
|
if len(type_nodes) <= target_for_type:
|
|
306
308
|
selected_nodes.extend(type_nodes)
|
|
307
|
-
|
|
309
|
+
logger.info(f" Select all: {len(type_nodes)} nodes")
|
|
308
310
|
else:
|
|
309
311
|
# Use enhanced subtree quality sampling
|
|
310
312
|
type_selected = sample_by_enhanced_subtree_quality(type_nodes, edges, target_for_type)
|
|
311
313
|
selected_nodes.extend(type_selected)
|
|
312
|
-
|
|
314
|
+
logger.info(f" Sampled selection: {len(type_selected)} nodes")
|
|
313
315
|
|
|
314
316
|
# Filter edges
|
|
315
317
|
selected_node_ids = {node["id"] for node in selected_nodes}
|
|
@@ -319,8 +321,8 @@ def sample_nodes_with_type_balance(
|
|
|
319
321
|
if edge["source"] in selected_node_ids and edge["target"] in selected_node_ids
|
|
320
322
|
]
|
|
321
323
|
|
|
322
|
-
|
|
323
|
-
|
|
324
|
+
logger.info(f"\nFinal selected nodes: {len(selected_nodes)}")
|
|
325
|
+
logger.info(f"Final edges: {len(filtered_edges)}")
|
|
324
326
|
|
|
325
327
|
# Verify final type distribution
|
|
326
328
|
final_type_counts = {}
|
|
@@ -328,11 +330,11 @@ def sample_nodes_with_type_balance(
|
|
|
328
330
|
memory_type = node.get("metadata", {}).get("memory_type", "Unknown")
|
|
329
331
|
final_type_counts[memory_type] = final_type_counts.get(memory_type, 0) + 1
|
|
330
332
|
|
|
331
|
-
|
|
333
|
+
logger.info(f"Final type distribution: {final_type_counts}")
|
|
332
334
|
for memory_type, count in final_type_counts.items():
|
|
333
335
|
percentage = count / len(selected_nodes) * 100
|
|
334
336
|
target_percentage = type_ratios.get(memory_type, 0) * 100
|
|
335
|
-
|
|
337
|
+
logger.info(
|
|
336
338
|
f" {memory_type}: {count} nodes ({percentage:.1f}%, target: {target_percentage:.1f}%)"
|
|
337
339
|
)
|
|
338
340
|
|
|
@@ -358,9 +360,9 @@ def sample_by_enhanced_subtree_quality(
|
|
|
358
360
|
subtree_analysis.items(), key=lambda x: x[1]["quality_score"], reverse=True
|
|
359
361
|
)
|
|
360
362
|
|
|
361
|
-
|
|
363
|
+
logger.info(" Subtree quality ranking:")
|
|
362
364
|
for i, (root_id, analysis) in enumerate(sorted_subtrees[:5]):
|
|
363
|
-
|
|
365
|
+
logger.info(
|
|
364
366
|
f" #{i + 1} Root node {root_id}: Quality={analysis['quality_score']:.2f}, "
|
|
365
367
|
f"Depth={analysis['max_depth']}, Branches={analysis['branch_nodes']}, "
|
|
366
368
|
f"Leaves={analysis['leaf_count']}, Max Width={analysis['max_width']}"
|
|
@@ -386,7 +388,7 @@ def sample_by_enhanced_subtree_quality(
|
|
|
386
388
|
if node:
|
|
387
389
|
selected_nodes.append(node)
|
|
388
390
|
selected_node_ids.add(node_id)
|
|
389
|
-
|
|
391
|
+
logger.info(f" Select entire subtree {root_id}: +{len(new_nodes)} nodes")
|
|
390
392
|
else:
|
|
391
393
|
# Subtree too large, need partial selection
|
|
392
394
|
if analysis["quality_score"] > 5: # Only partial selection for high-quality subtrees
|
|
@@ -398,7 +400,7 @@ def sample_by_enhanced_subtree_quality(
|
|
|
398
400
|
selected_nodes.extend(partial_selection)
|
|
399
401
|
for node in partial_selection:
|
|
400
402
|
selected_node_ids.add(node["id"])
|
|
401
|
-
|
|
403
|
+
logger.info(
|
|
402
404
|
f" Partial selection of subtree {root_id}: +{len(partial_selection)} nodes"
|
|
403
405
|
)
|
|
404
406
|
|
|
@@ -411,7 +413,7 @@ def sample_by_enhanced_subtree_quality(
|
|
|
411
413
|
remaining_count = target_count - len(selected_nodes)
|
|
412
414
|
additional = sample_nodes_by_importance(remaining_nodes, edges, remaining_count)
|
|
413
415
|
selected_nodes.extend(additional)
|
|
414
|
-
|
|
416
|
+
logger.info(f" Supplementary selection: +{len(additional)} nodes")
|
|
415
417
|
|
|
416
418
|
return selected_nodes
|
|
417
419
|
|
|
@@ -493,7 +495,7 @@ def sample_nodes_by_importance(
|
|
|
493
495
|
# Modified main function to use new sampling strategy
|
|
494
496
|
def convert_graph_to_tree_forworkmem(
|
|
495
497
|
json_data: dict[str, Any],
|
|
496
|
-
target_node_count: int =
|
|
498
|
+
target_node_count: int = 200,
|
|
497
499
|
type_ratios: dict[str, float] | None = None,
|
|
498
500
|
) -> dict[str, Any]:
|
|
499
501
|
"""
|
|
@@ -502,8 +504,8 @@ def convert_graph_to_tree_forworkmem(
|
|
|
502
504
|
original_nodes = json_data.get("nodes", [])
|
|
503
505
|
original_edges = json_data.get("edges", [])
|
|
504
506
|
|
|
505
|
-
|
|
506
|
-
|
|
507
|
+
logger.info(f"Original node count: {len(original_nodes)}")
|
|
508
|
+
logger.info(f"Target node count: {target_node_count}")
|
|
507
509
|
filter_original_edges = []
|
|
508
510
|
for original_edge in original_edges:
|
|
509
511
|
if original_edge["type"] == "PARENT":
|
|
@@ -531,7 +533,7 @@ def convert_graph_to_tree_forworkmem(
|
|
|
531
533
|
node_name = extract_node_name(memory)
|
|
532
534
|
memory_key = node.get("metadata", {}).get("key", node_name)
|
|
533
535
|
usage = node.get("metadata", {}).get("usage", [])
|
|
534
|
-
frequency = len(usage)
|
|
536
|
+
frequency = len(usage) if len(usage) < 100 else 100
|
|
535
537
|
node_map[node["id"]] = {
|
|
536
538
|
"id": node["id"],
|
|
537
539
|
"value": memory,
|
|
@@ -633,7 +635,7 @@ def convert_graph_to_tree_forworkmem(
|
|
|
633
635
|
|
|
634
636
|
|
|
635
637
|
def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int = 5):
|
|
636
|
-
"""
|
|
638
|
+
"""logger.info the first few layers of tree structure for easy viewing"""
|
|
637
639
|
if level > max_level:
|
|
638
640
|
return
|
|
639
641
|
|
|
@@ -647,21 +649,21 @@ def print_tree_structure(node: dict[str, Any], level: int = 0, max_level: int =
|
|
|
647
649
|
children = node.get("children", [])
|
|
648
650
|
if children:
|
|
649
651
|
# Intermediate node, display name, type and child count
|
|
650
|
-
|
|
651
|
-
|
|
652
|
+
logger.info(f"{indent}- {node_name} [{memory_type}] ({len(children)} children)")
|
|
653
|
+
logger.info(f"{indent} ID: {node_id}")
|
|
652
654
|
display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
|
|
653
|
-
|
|
655
|
+
logger.info(f"{indent} Value: {display_value}")
|
|
654
656
|
|
|
655
657
|
if level < max_level:
|
|
656
658
|
for child in children:
|
|
657
659
|
print_tree_structure(child, level + 1, max_level)
|
|
658
660
|
elif level == max_level:
|
|
659
|
-
|
|
661
|
+
logger.info(f"{indent} ... (expansion limited)")
|
|
660
662
|
else:
|
|
661
663
|
# Leaf node, display name, type and value
|
|
662
664
|
display_value = node_value[:80] + "..." if len(node_value) > 80 else node_value
|
|
663
|
-
|
|
664
|
-
|
|
665
|
+
logger.info(f"{indent}- {node_name} [{memory_type}]: {display_value}")
|
|
666
|
+
logger.info(f"{indent} ID: {node_id}")
|
|
665
667
|
|
|
666
668
|
|
|
667
669
|
def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
|
|
@@ -856,107 +858,107 @@ def analyze_final_tree_quality(tree_data: dict[str, Any]) -> dict:
|
|
|
856
858
|
|
|
857
859
|
|
|
858
860
|
def print_tree_analysis(tree_data: dict[str, Any]):
|
|
859
|
-
"""
|
|
861
|
+
"""logger.info enhanced tree analysis results"""
|
|
860
862
|
stats = analyze_final_tree_quality(tree_data)
|
|
861
863
|
|
|
862
|
-
|
|
863
|
-
|
|
864
|
-
|
|
864
|
+
logger.info("\n" + "=" * 60)
|
|
865
|
+
logger.info("🌳 Enhanced Tree Structure Quality Analysis Report")
|
|
866
|
+
logger.info("=" * 60)
|
|
865
867
|
|
|
866
868
|
# Basic statistics
|
|
867
|
-
|
|
868
|
-
|
|
869
|
-
|
|
870
|
-
|
|
869
|
+
logger.info("\n📊 Basic Statistics:")
|
|
870
|
+
logger.info(f" Total nodes: {stats['total_nodes']}")
|
|
871
|
+
logger.info(f" Max depth: {stats['max_depth']}")
|
|
872
|
+
logger.info(
|
|
871
873
|
f" Leaf nodes: {stats['total_leaves']} ({stats['total_leaves'] / stats['total_nodes'] * 100:.1f}%)"
|
|
872
874
|
)
|
|
873
|
-
|
|
875
|
+
logger.info(
|
|
874
876
|
f" Branch nodes: {stats['total_branches']} ({stats['total_branches'] / stats['total_nodes'] * 100:.1f}%)"
|
|
875
877
|
)
|
|
876
878
|
|
|
877
879
|
# Structure quality assessment
|
|
878
880
|
structure = stats.get("structure_quality", {})
|
|
879
881
|
if structure:
|
|
880
|
-
|
|
881
|
-
|
|
882
|
+
logger.info("\n🏗️ Structure Quality Assessment:")
|
|
883
|
+
logger.info(
|
|
882
884
|
f" Branch density: {structure['branch_density']:.3f} ({'✅ Good' if 0.2 <= structure['branch_density'] <= 0.6 else '⚠️ Needs improvement'})"
|
|
883
885
|
)
|
|
884
|
-
|
|
886
|
+
logger.info(
|
|
885
887
|
f" Leaf ratio: {structure['leaf_ratio']:.3f} ({'✅ Good' if 0.3 <= structure['leaf_ratio'] <= 0.7 else '⚠️ Needs improvement'})"
|
|
886
888
|
)
|
|
887
|
-
|
|
888
|
-
|
|
889
|
+
logger.info(f" Max width: {structure['max_width']}")
|
|
890
|
+
logger.info(
|
|
889
891
|
f" Depth-width ratio: {structure['depth_width_ratio']:.2f} ({'✅ Good' if structure['depth_width_ratio'] <= 3 else '⚠️ Too thin'})"
|
|
890
892
|
)
|
|
891
|
-
|
|
893
|
+
logger.info(
|
|
892
894
|
f" Overall balance: {'✅ Good' if structure['is_well_balanced'] else '⚠️ Needs improvement'}"
|
|
893
895
|
)
|
|
894
896
|
|
|
895
897
|
# Single chain analysis
|
|
896
898
|
chain_analysis = stats.get("chain_analysis", {})
|
|
897
899
|
if chain_analysis:
|
|
898
|
-
|
|
899
|
-
|
|
900
|
-
|
|
901
|
-
|
|
900
|
+
logger.info("\n🔗 Single Chain Structure Analysis:")
|
|
901
|
+
logger.info(f" Longest chain: {chain_analysis.get('max_chain_length', 0)} layers")
|
|
902
|
+
logger.info(f" Single chain subtrees: {chain_analysis.get('single_chain_subtrees', 0)}")
|
|
903
|
+
logger.info(
|
|
902
904
|
f" Single chain subtree ratio: {chain_analysis.get('chain_subtree_ratio', 0) * 100:.1f}%"
|
|
903
905
|
)
|
|
904
906
|
|
|
905
907
|
if chain_analysis.get("max_chain_length", 0) > 5:
|
|
906
|
-
|
|
908
|
+
logger.info(" ⚠️ Warning: Overly long single chain structure may affect display")
|
|
907
909
|
elif chain_analysis.get("chain_subtree_ratio", 0) > 0.3:
|
|
908
|
-
|
|
910
|
+
logger.info(
|
|
909
911
|
" ⚠️ Warning: Too many single chain subtrees, suggest increasing branch structure"
|
|
910
912
|
)
|
|
911
913
|
else:
|
|
912
|
-
|
|
914
|
+
logger.info(" ✅ Single chain structure well controlled")
|
|
913
915
|
|
|
914
916
|
# Type diversity
|
|
915
917
|
type_div = stats.get("type_diversity", {})
|
|
916
918
|
if type_div:
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
919
|
+
logger.info("\n🎨 Type Diversity Analysis:")
|
|
920
|
+
logger.info(f" Total types: {type_div['total_types']}")
|
|
921
|
+
logger.info(f" Diversity index: {type_div['shannon_diversity']:.3f}")
|
|
922
|
+
logger.info(f" Normalized diversity: {type_div['normalized_diversity']:.3f}")
|
|
923
|
+
logger.info(f" Distribution balance: {type_div['distribution_balance']:.3f}")
|
|
922
924
|
|
|
923
925
|
# Type distribution
|
|
924
|
-
|
|
926
|
+
logger.info("\n📋 Type Distribution Details:")
|
|
925
927
|
for mem_type, count in sorted(stats["by_type"].items(), key=lambda x: x[1], reverse=True):
|
|
926
928
|
percentage = count / stats["total_nodes"] * 100
|
|
927
|
-
|
|
929
|
+
logger.info(f" {mem_type}: {count} nodes ({percentage:.1f}%)")
|
|
928
930
|
|
|
929
931
|
# Depth distribution
|
|
930
|
-
|
|
932
|
+
logger.info("\n📏 Depth Distribution:")
|
|
931
933
|
for depth in sorted(stats["by_depth"].keys()):
|
|
932
934
|
count = stats["by_depth"][depth]
|
|
933
|
-
|
|
935
|
+
logger.info(f" Depth {depth}: {count} nodes")
|
|
934
936
|
|
|
935
937
|
# Major subtree analysis
|
|
936
938
|
if stats["subtrees"]:
|
|
937
|
-
|
|
939
|
+
logger.info("\n🌲 Major Subtree Analysis (sorted by quality):")
|
|
938
940
|
sorted_subtrees = sorted(
|
|
939
941
|
stats["subtrees"], key=lambda x: x.get("quality_score", 0), reverse=True
|
|
940
942
|
)
|
|
941
943
|
for i, subtree in enumerate(sorted_subtrees[:8]): # Show first 8
|
|
942
944
|
quality = subtree.get("quality_score", 0)
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
945
|
+
logger.info(f" #{i + 1} {subtree['root']} [{subtree['type']}]:")
|
|
946
|
+
logger.info(f" Quality score: {quality:.2f}")
|
|
947
|
+
logger.info(
|
|
946
948
|
f" Structure: Depth={subtree['depth']}, Branches={subtree['branches']}, Leaves={subtree['leaves']}"
|
|
947
949
|
)
|
|
948
|
-
|
|
950
|
+
logger.info(
|
|
949
951
|
f" Density: Branch density={subtree.get('branch_density', 0):.3f}, Leaf ratio={subtree.get('leaf_ratio', 0):.3f}"
|
|
950
952
|
)
|
|
951
953
|
|
|
952
954
|
if quality > 15:
|
|
953
|
-
|
|
955
|
+
logger.info(" ✅ High quality subtree")
|
|
954
956
|
elif quality > 8:
|
|
955
|
-
|
|
957
|
+
logger.info(" 🟡 Medium quality subtree")
|
|
956
958
|
else:
|
|
957
|
-
|
|
959
|
+
logger.info(" 🔴 Low quality subtree")
|
|
958
960
|
|
|
959
|
-
|
|
961
|
+
logger.info("\n" + "=" * 60)
|
|
960
962
|
|
|
961
963
|
|
|
962
964
|
def remove_embedding_recursive(memory_info: dict) -> Any:
|
|
@@ -1152,3 +1154,248 @@ def convert_activation_memory_summary(act_mem_items: list[KVCacheItem]) -> dict[
|
|
|
1152
1154
|
"total_parameters": total_parameters,
|
|
1153
1155
|
"summary": f"Activation memory contains {total_items} items with {total_layers} layers and approximately {total_parameters:,} parameters",
|
|
1154
1156
|
}
|
|
1157
|
+
|
|
1158
|
+
|
|
1159
|
+
def detect_and_remove_duplicate_ids(tree_node: dict[str, Any]) -> dict[str, Any]:
|
|
1160
|
+
"""
|
|
1161
|
+
Detect and remove duplicate IDs in tree structure by skipping duplicate nodes.
|
|
1162
|
+
First occurrence of each ID is kept, subsequent duplicates are removed.
|
|
1163
|
+
|
|
1164
|
+
Args:
|
|
1165
|
+
tree_node: Tree node (dictionary format)
|
|
1166
|
+
|
|
1167
|
+
Returns:
|
|
1168
|
+
dict: Fixed tree node with duplicate nodes removed
|
|
1169
|
+
"""
|
|
1170
|
+
used_ids = set()
|
|
1171
|
+
removed_count = 0
|
|
1172
|
+
|
|
1173
|
+
def remove_duplicates_recursive(
|
|
1174
|
+
node: dict[str, Any], parent_path: str = ""
|
|
1175
|
+
) -> dict[str, Any] | None:
|
|
1176
|
+
"""Recursively remove duplicate IDs by skipping duplicate nodes"""
|
|
1177
|
+
nonlocal removed_count
|
|
1178
|
+
|
|
1179
|
+
if not isinstance(node, dict):
|
|
1180
|
+
return node
|
|
1181
|
+
|
|
1182
|
+
# Create node copy
|
|
1183
|
+
fixed_node = node.copy()
|
|
1184
|
+
|
|
1185
|
+
# Handle current node ID
|
|
1186
|
+
current_id = fixed_node.get("id", "")
|
|
1187
|
+
if current_id in used_ids and current_id not in ["root", "WorkingMemory"]:
|
|
1188
|
+
# Skip this duplicate node
|
|
1189
|
+
logger.info(f"Skipping duplicate node: {current_id} (path: {parent_path})")
|
|
1190
|
+
removed_count += 1
|
|
1191
|
+
return None # Return None to indicate this node should be removed
|
|
1192
|
+
else:
|
|
1193
|
+
used_ids.add(current_id)
|
|
1194
|
+
|
|
1195
|
+
# Recursively process child nodes
|
|
1196
|
+
if "children" in fixed_node and isinstance(fixed_node["children"], list):
|
|
1197
|
+
fixed_children = []
|
|
1198
|
+
for i, child in enumerate(fixed_node["children"]):
|
|
1199
|
+
child_path = f"{parent_path}/{fixed_node.get('node_name', 'unknown')}[{i}]"
|
|
1200
|
+
fixed_child = remove_duplicates_recursive(child, child_path)
|
|
1201
|
+
if fixed_child is not None: # Only add non-None children
|
|
1202
|
+
fixed_children.append(fixed_child)
|
|
1203
|
+
fixed_node["children"] = fixed_children
|
|
1204
|
+
|
|
1205
|
+
return fixed_node
|
|
1206
|
+
|
|
1207
|
+
result = remove_duplicates_recursive(tree_node)
|
|
1208
|
+
if result is not None:
|
|
1209
|
+
logger.info(f"Removed {removed_count} duplicate nodes")
|
|
1210
|
+
return result
|
|
1211
|
+
else:
|
|
1212
|
+
# If root node itself was removed (shouldn't happen), return empty root
|
|
1213
|
+
return {
|
|
1214
|
+
"id": "root",
|
|
1215
|
+
"node_name": "root",
|
|
1216
|
+
"value": "root",
|
|
1217
|
+
"memory_type": "Root",
|
|
1218
|
+
"children": [],
|
|
1219
|
+
}
|
|
1220
|
+
|
|
1221
|
+
|
|
1222
|
+
def validate_tree_structure(tree_node: dict[str, Any]) -> dict[str, Any]:
|
|
1223
|
+
"""
|
|
1224
|
+
Validate tree structure integrity, including ID uniqueness check
|
|
1225
|
+
|
|
1226
|
+
Args:
|
|
1227
|
+
tree_node: Tree node (dictionary format)
|
|
1228
|
+
|
|
1229
|
+
Returns:
|
|
1230
|
+
dict: Validation result containing error messages and fix suggestions
|
|
1231
|
+
"""
|
|
1232
|
+
validation_result = {
|
|
1233
|
+
"is_valid": True,
|
|
1234
|
+
"errors": [],
|
|
1235
|
+
"warnings": [],
|
|
1236
|
+
"total_nodes": 0,
|
|
1237
|
+
"unique_ids": set(),
|
|
1238
|
+
"duplicate_ids": set(),
|
|
1239
|
+
"missing_ids": set(),
|
|
1240
|
+
"invalid_structure": [],
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
def validate_recursive(node: dict[str, Any], path: str = "", depth: int = 0):
|
|
1244
|
+
"""Recursively validate tree structure"""
|
|
1245
|
+
if not isinstance(node, dict):
|
|
1246
|
+
validation_result["errors"].append(f"Node is not a dictionary: {path}")
|
|
1247
|
+
validation_result["is_valid"] = False
|
|
1248
|
+
return
|
|
1249
|
+
|
|
1250
|
+
validation_result["total_nodes"] += 1
|
|
1251
|
+
|
|
1252
|
+
# Check required fields
|
|
1253
|
+
if "id" not in node:
|
|
1254
|
+
validation_result["errors"].append(f"Node missing ID field: {path}")
|
|
1255
|
+
validation_result["missing_ids"].add(path)
|
|
1256
|
+
validation_result["is_valid"] = False
|
|
1257
|
+
else:
|
|
1258
|
+
node_id = node["id"]
|
|
1259
|
+
if node_id in validation_result["unique_ids"]:
|
|
1260
|
+
validation_result["errors"].append(f"Duplicate node ID: {node_id} (path: {path})")
|
|
1261
|
+
validation_result["duplicate_ids"].add(node_id)
|
|
1262
|
+
validation_result["is_valid"] = False
|
|
1263
|
+
else:
|
|
1264
|
+
validation_result["unique_ids"].add(node_id)
|
|
1265
|
+
|
|
1266
|
+
# Check other required fields
|
|
1267
|
+
required_fields = ["node_name", "value", "memory_type"]
|
|
1268
|
+
for field in required_fields:
|
|
1269
|
+
if field not in node:
|
|
1270
|
+
validation_result["warnings"].append(f"Node missing field '{field}': {path}")
|
|
1271
|
+
|
|
1272
|
+
# Recursively validate child nodes
|
|
1273
|
+
if "children" in node:
|
|
1274
|
+
if not isinstance(node["children"], list):
|
|
1275
|
+
validation_result["errors"].append(f"Children field is not a list: {path}")
|
|
1276
|
+
validation_result["is_valid"] = False
|
|
1277
|
+
else:
|
|
1278
|
+
for i, child in enumerate(node["children"]):
|
|
1279
|
+
child_path = f"{path}/children[{i}]"
|
|
1280
|
+
validate_recursive(child, child_path, depth + 1)
|
|
1281
|
+
|
|
1282
|
+
# Check depth limit
|
|
1283
|
+
if depth > 20:
|
|
1284
|
+
validation_result["warnings"].append(f"Tree depth too deep ({depth}): {path}")
|
|
1285
|
+
|
|
1286
|
+
validate_recursive(tree_node)
|
|
1287
|
+
|
|
1288
|
+
# Generate fix suggestions
|
|
1289
|
+
if validation_result["duplicate_ids"]:
|
|
1290
|
+
validation_result["fix_suggestion"] = (
|
|
1291
|
+
"Use detect_and_fix_duplicate_ids() function to fix duplicate IDs"
|
|
1292
|
+
)
|
|
1293
|
+
|
|
1294
|
+
return validation_result
|
|
1295
|
+
|
|
1296
|
+
|
|
1297
|
+
def ensure_unique_tree_ids(tree_result: dict[str, Any]) -> dict[str, Any]:
|
|
1298
|
+
"""
|
|
1299
|
+
Ensure all node IDs in tree structure are unique by removing duplicate nodes,
|
|
1300
|
+
this is a post-processing function for convert_graph_to_tree_forworkmem
|
|
1301
|
+
|
|
1302
|
+
Args:
|
|
1303
|
+
tree_result: Tree structure returned by convert_graph_to_tree_forworkmem
|
|
1304
|
+
|
|
1305
|
+
Returns:
|
|
1306
|
+
dict: Fixed tree structure with duplicate nodes removed
|
|
1307
|
+
"""
|
|
1308
|
+
logger.info("🔍 Starting duplicate ID check in tree structure...")
|
|
1309
|
+
|
|
1310
|
+
# First validate tree structure
|
|
1311
|
+
validation = validate_tree_structure(tree_result)
|
|
1312
|
+
|
|
1313
|
+
if validation["is_valid"]:
|
|
1314
|
+
logger.info("Tree structure validation passed, no duplicate IDs found")
|
|
1315
|
+
return tree_result
|
|
1316
|
+
|
|
1317
|
+
# Report issues
|
|
1318
|
+
logger.info(f"Found {len(validation['errors'])} errors:")
|
|
1319
|
+
for error in validation["errors"][:5]: # Only show first 5 errors
|
|
1320
|
+
logger.info(f" - {error}")
|
|
1321
|
+
|
|
1322
|
+
if len(validation["errors"]) > 5:
|
|
1323
|
+
logger.info(f" ... and {len(validation['errors']) - 5} more errors")
|
|
1324
|
+
|
|
1325
|
+
logger.info("Statistics:")
|
|
1326
|
+
logger.info(f" - Total nodes: {validation['total_nodes']}")
|
|
1327
|
+
logger.info(f" - Unique IDs: {len(validation['unique_ids'])}")
|
|
1328
|
+
logger.info(f" - Duplicate IDs: {len(validation['duplicate_ids'])}")
|
|
1329
|
+
|
|
1330
|
+
# Remove duplicate nodes
|
|
1331
|
+
logger.info(" Starting duplicate node removal...")
|
|
1332
|
+
fixed_tree = detect_and_remove_duplicate_ids(tree_result)
|
|
1333
|
+
|
|
1334
|
+
# Validate again
|
|
1335
|
+
post_validation = validate_tree_structure(fixed_tree)
|
|
1336
|
+
if post_validation["is_valid"]:
|
|
1337
|
+
logger.info("Removal completed, tree structure is now valid")
|
|
1338
|
+
logger.info(f"Final node count: {post_validation['total_nodes']}")
|
|
1339
|
+
else:
|
|
1340
|
+
logger.info("Issues remain after removal, please check code logic")
|
|
1341
|
+
for error in post_validation["errors"][:3]:
|
|
1342
|
+
logger.info(f" - {error}")
|
|
1343
|
+
|
|
1344
|
+
return fixed_tree
|
|
1345
|
+
|
|
1346
|
+
|
|
1347
|
+
def clean_json_response(response: str) -> str:
|
|
1348
|
+
"""
|
|
1349
|
+
Remove markdown JSON code block formatting from LLM response.
|
|
1350
|
+
|
|
1351
|
+
Args:
|
|
1352
|
+
response: Raw response string that may contain ```json and ```
|
|
1353
|
+
|
|
1354
|
+
Returns:
|
|
1355
|
+
str: Clean JSON string without markdown formatting
|
|
1356
|
+
"""
|
|
1357
|
+
return response.replace("```json", "").replace("```", "").strip()
|
|
1358
|
+
|
|
1359
|
+
|
|
1360
|
+
def split_continuous_references(text: str) -> str:
|
|
1361
|
+
"""
|
|
1362
|
+
Split continuous reference tags into individual reference tags.
|
|
1363
|
+
|
|
1364
|
+
Converts patterns like [1:92ff35fb, 4:bfe6f044] to [1:92ff35fb] [4:bfe6f044]
|
|
1365
|
+
|
|
1366
|
+
Only processes text if:
|
|
1367
|
+
1. '[' appears exactly once
|
|
1368
|
+
2. ']' appears exactly once
|
|
1369
|
+
3. Contains commas between '[' and ']'
|
|
1370
|
+
|
|
1371
|
+
Args:
|
|
1372
|
+
text (str): Text containing reference tags
|
|
1373
|
+
|
|
1374
|
+
Returns:
|
|
1375
|
+
str: Text with split reference tags, or original text if conditions not met
|
|
1376
|
+
"""
|
|
1377
|
+
# Early return if text is empty
|
|
1378
|
+
if not text:
|
|
1379
|
+
return text
|
|
1380
|
+
# Check if '[' appears exactly once
|
|
1381
|
+
if text.count("[") != 1:
|
|
1382
|
+
return text
|
|
1383
|
+
# Check if ']' appears exactly once
|
|
1384
|
+
if text.count("]") != 1:
|
|
1385
|
+
return text
|
|
1386
|
+
# Find positions of brackets
|
|
1387
|
+
open_bracket_pos = text.find("[")
|
|
1388
|
+
close_bracket_pos = text.find("]")
|
|
1389
|
+
|
|
1390
|
+
# Check if brackets are in correct order
|
|
1391
|
+
if open_bracket_pos >= close_bracket_pos:
|
|
1392
|
+
return text
|
|
1393
|
+
# Extract content between brackets
|
|
1394
|
+
content_between_brackets = text[open_bracket_pos + 1 : close_bracket_pos]
|
|
1395
|
+
# Check if there's a comma between brackets
|
|
1396
|
+
if "," not in content_between_brackets:
|
|
1397
|
+
return text
|
|
1398
|
+
text = text.replace(content_between_brackets, content_between_brackets.replace(", ", "]["))
|
|
1399
|
+
text = text.replace(content_between_brackets, content_between_brackets.replace(",", "]["))
|
|
1400
|
+
|
|
1401
|
+
return text
|
|
@@ -58,9 +58,13 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
58
58
|
metadata=TreeNodeTextualMemoryMetadata(
|
|
59
59
|
user_id=info.get("user_id"),
|
|
60
60
|
session_id=info.get("session_id"),
|
|
61
|
-
memory_type=memory_i_raw.get("memory_type", "")
|
|
61
|
+
memory_type=memory_i_raw.get("memory_type", "")
|
|
62
|
+
.replace("长期记忆", "LongTermMemory")
|
|
63
|
+
.replace("用户记忆", "UserMemory"),
|
|
62
64
|
status="activated",
|
|
63
|
-
tags=memory_i_raw.get("tags",
|
|
65
|
+
tags=memory_i_raw.get("tags", [])
|
|
66
|
+
if type(memory_i_raw.get("tags", [])) is list
|
|
67
|
+
else [],
|
|
64
68
|
key=memory_i_raw.get("key", ""),
|
|
65
69
|
embedding=self.embedder.embed([memory_i_raw.get("value", "")])[0],
|
|
66
70
|
usage=[],
|
|
@@ -176,8 +180,12 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
176
180
|
elif type == "doc":
|
|
177
181
|
for item in scene_data:
|
|
178
182
|
try:
|
|
179
|
-
|
|
180
|
-
|
|
183
|
+
if not isinstance(item, str):
|
|
184
|
+
parsed_text = parser.parse(item)
|
|
185
|
+
results.append({"file": "pure_text", "text": parsed_text})
|
|
186
|
+
else:
|
|
187
|
+
parsed_text = item
|
|
188
|
+
results.append({"file": item, "text": parsed_text})
|
|
181
189
|
except Exception as e:
|
|
182
190
|
print(f"Error parsing file {item}: {e!s}")
|
|
183
191
|
|
|
@@ -214,7 +222,7 @@ class SimpleStructMemReader(BaseMemReader, ABC):
|
|
|
214
222
|
session_id=info.get("session_id"),
|
|
215
223
|
memory_type="LongTermMemory",
|
|
216
224
|
status="activated",
|
|
217
|
-
tags=chunk_res["tags"],
|
|
225
|
+
tags=chunk_res["tags"] if type(chunk_res["tags"]) is list else [],
|
|
218
226
|
key=chunk_res["key"],
|
|
219
227
|
embedding=self.embedder.embed([chunk_res["value"]])[0],
|
|
220
228
|
usage=[],
|