claude_swarm 1.0.1 → 1.0.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.claude/commands/release.md +1 -1
- data/.claude/hooks/lint-code-files.rb +65 -0
- data/.rubocop.yml +22 -2
- data/CHANGELOG.md +14 -1
- data/CLAUDE.md +1 -1
- data/CONTRIBUTING.md +69 -0
- data/README.md +27 -2
- data/Rakefile +71 -3
- data/analyze_coverage.rb +94 -0
- data/docs/v2/CHANGELOG.swarm_cli.md +43 -0
- data/docs/v2/CHANGELOG.swarm_memory.md +379 -0
- data/docs/v2/CHANGELOG.swarm_sdk.md +362 -0
- data/docs/v2/README.md +308 -0
- data/docs/v2/guides/claude-code-agents.md +262 -0
- data/docs/v2/guides/complete-tutorial.md +3088 -0
- data/docs/v2/guides/getting-started.md +1456 -0
- data/docs/v2/guides/memory-adapters.md +998 -0
- data/docs/v2/guides/plugins.md +816 -0
- data/docs/v2/guides/quick-start-cli.md +1745 -0
- data/docs/v2/guides/rails-integration.md +1902 -0
- data/docs/v2/guides/swarm-memory.md +599 -0
- data/docs/v2/reference/cli.md +729 -0
- data/docs/v2/reference/ruby-dsl.md +2154 -0
- data/docs/v2/reference/yaml.md +1835 -0
- data/docs-team-swarm.yml +2222 -0
- data/examples/learning-assistant/assistant.md +7 -0
- data/examples/learning-assistant/example-memories/concept-example.md +90 -0
- data/examples/learning-assistant/example-memories/experience-example.md +66 -0
- data/examples/learning-assistant/example-memories/fact-example.md +76 -0
- data/examples/learning-assistant/example-memories/memory-index.md +78 -0
- data/examples/learning-assistant/example-memories/skill-example.md +168 -0
- data/examples/learning-assistant/learning_assistant.rb +34 -0
- data/examples/learning-assistant/learning_assistant.yml +20 -0
- data/examples/v2/dsl/01_basic.rb +44 -0
- data/examples/v2/dsl/02_core_parameters.rb +59 -0
- data/examples/v2/dsl/03_capabilities.rb +71 -0
- data/examples/v2/dsl/04_llm_parameters.rb +56 -0
- data/examples/v2/dsl/05_advanced_flags.rb +73 -0
- data/examples/v2/dsl/06_permissions.rb +80 -0
- data/examples/v2/dsl/07_mcp_server.rb +62 -0
- data/examples/v2/dsl/08_swarm_hooks.rb +53 -0
- data/examples/v2/dsl/09_agent_hooks.rb +67 -0
- data/examples/v2/dsl/10_all_agents_hooks.rb +67 -0
- data/examples/v2/dsl/11_delegation.rb +60 -0
- data/examples/v2/dsl/12_complete_integration.rb +137 -0
- data/examples/v2/file_tools_swarm.yml +102 -0
- data/examples/v2/hooks/01_basic_hooks.rb +133 -0
- data/examples/v2/hooks/02_usage_tracking.rb +201 -0
- data/examples/v2/hooks/03_production_monitoring.rb +429 -0
- data/examples/v2/hooks/agent_stop_exit_0.yml +21 -0
- data/examples/v2/hooks/agent_stop_exit_1.yml +21 -0
- data/examples/v2/hooks/agent_stop_exit_2.yml +26 -0
- data/examples/v2/hooks/multiple_hooks_all_pass.yml +37 -0
- data/examples/v2/hooks/multiple_hooks_first_fails.yml +37 -0
- data/examples/v2/hooks/multiple_hooks_second_fails.yml +37 -0
- data/examples/v2/hooks/multiple_hooks_warnings.yml +37 -0
- data/examples/v2/hooks/post_tool_use_exit_0.yml +24 -0
- data/examples/v2/hooks/post_tool_use_exit_1.yml +24 -0
- data/examples/v2/hooks/post_tool_use_exit_2.yml +24 -0
- data/examples/v2/hooks/post_tool_use_multi_matcher_exit_0.yml +26 -0
- data/examples/v2/hooks/post_tool_use_multi_matcher_exit_1.yml +26 -0
- data/examples/v2/hooks/post_tool_use_multi_matcher_exit_2.yml +26 -0
- data/examples/v2/hooks/pre_tool_use_exit_0.yml +24 -0
- data/examples/v2/hooks/pre_tool_use_exit_1.yml +24 -0
- data/examples/v2/hooks/pre_tool_use_exit_2.yml +24 -0
- data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_0.yml +26 -0
- data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_1.yml +26 -0
- data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_2.yml +27 -0
- data/examples/v2/hooks/swarm_summary.sh +44 -0
- data/examples/v2/hooks/user_prompt_exit_0.yml +21 -0
- data/examples/v2/hooks/user_prompt_exit_1.yml +21 -0
- data/examples/v2/hooks/user_prompt_exit_2.yml +21 -0
- data/examples/v2/hooks/validate_bash.rb +59 -0
- data/examples/v2/multi_directory_permissions.yml +221 -0
- data/examples/v2/node_context_demo.rb +127 -0
- data/examples/v2/node_workflow.rb +173 -0
- data/examples/v2/path_resolution_demo.rb +216 -0
- data/examples/v2/simple-swarm-v2.rb +90 -0
- data/examples/v2/simple-swarm-v2.yml +62 -0
- data/examples/v2/swarm.yml +71 -0
- data/examples/v2/swarm_with_hooks.yml +61 -0
- data/examples/v2/swarm_with_hooks_simple.yml +25 -0
- data/examples/v2/think_tool_demo.rb +62 -0
- data/exe/swarm +6 -0
- data/lib/claude_swarm/claude_mcp_server.rb +0 -6
- data/lib/claude_swarm/cli.rb +10 -3
- data/lib/claude_swarm/commands/ps.rb +19 -20
- data/lib/claude_swarm/commands/show.rb +1 -1
- data/lib/claude_swarm/configuration.rb +10 -12
- data/lib/claude_swarm/mcp_generator.rb +10 -1
- data/lib/claude_swarm/orchestrator.rb +73 -49
- data/lib/claude_swarm/system_utils.rb +37 -11
- data/lib/claude_swarm/version.rb +1 -1
- data/lib/claude_swarm/worktree_manager.rb +1 -0
- data/lib/claude_swarm/yaml_loader.rb +22 -0
- data/lib/claude_swarm.rb +6 -2
- data/lib/swarm_cli/cli.rb +201 -0
- data/lib/swarm_cli/command_registry.rb +61 -0
- data/lib/swarm_cli/commands/mcp_serve.rb +130 -0
- data/lib/swarm_cli/commands/mcp_tools.rb +148 -0
- data/lib/swarm_cli/commands/migrate.rb +55 -0
- data/lib/swarm_cli/commands/run.rb +173 -0
- data/lib/swarm_cli/config_loader.rb +97 -0
- data/lib/swarm_cli/formatters/human_formatter.rb +711 -0
- data/lib/swarm_cli/formatters/json_formatter.rb +51 -0
- data/lib/swarm_cli/interactive_repl.rb +918 -0
- data/lib/swarm_cli/mcp_serve_options.rb +44 -0
- data/lib/swarm_cli/mcp_tools_options.rb +59 -0
- data/lib/swarm_cli/migrate_options.rb +54 -0
- data/lib/swarm_cli/migrator.rb +132 -0
- data/lib/swarm_cli/options.rb +151 -0
- data/lib/swarm_cli/ui/components/agent_badge.rb +33 -0
- data/lib/swarm_cli/ui/components/content_block.rb +120 -0
- data/lib/swarm_cli/ui/components/divider.rb +57 -0
- data/lib/swarm_cli/ui/components/panel.rb +62 -0
- data/lib/swarm_cli/ui/components/usage_stats.rb +70 -0
- data/lib/swarm_cli/ui/formatters/cost.rb +49 -0
- data/lib/swarm_cli/ui/formatters/number.rb +58 -0
- data/lib/swarm_cli/ui/formatters/text.rb +77 -0
- data/lib/swarm_cli/ui/formatters/time.rb +73 -0
- data/lib/swarm_cli/ui/icons.rb +59 -0
- data/lib/swarm_cli/ui/renderers/event_renderer.rb +188 -0
- data/lib/swarm_cli/ui/state/agent_color_cache.rb +45 -0
- data/lib/swarm_cli/ui/state/depth_tracker.rb +40 -0
- data/lib/swarm_cli/ui/state/spinner_manager.rb +170 -0
- data/lib/swarm_cli/ui/state/usage_tracker.rb +62 -0
- data/lib/swarm_cli/version.rb +5 -0
- data/lib/swarm_cli.rb +44 -0
- data/lib/swarm_memory/adapters/base.rb +141 -0
- data/lib/swarm_memory/adapters/filesystem_adapter.rb +845 -0
- data/lib/swarm_memory/chat_extension.rb +34 -0
- data/lib/swarm_memory/cli/commands.rb +306 -0
- data/lib/swarm_memory/core/entry.rb +37 -0
- data/lib/swarm_memory/core/frontmatter_parser.rb +108 -0
- data/lib/swarm_memory/core/metadata_extractor.rb +68 -0
- data/lib/swarm_memory/core/path_normalizer.rb +75 -0
- data/lib/swarm_memory/core/semantic_index.rb +244 -0
- data/lib/swarm_memory/core/storage.rb +288 -0
- data/lib/swarm_memory/core/storage_read_tracker.rb +63 -0
- data/lib/swarm_memory/dsl/builder_extension.rb +40 -0
- data/lib/swarm_memory/dsl/memory_config.rb +113 -0
- data/lib/swarm_memory/embeddings/embedder.rb +36 -0
- data/lib/swarm_memory/embeddings/informers_embedder.rb +152 -0
- data/lib/swarm_memory/errors.rb +21 -0
- data/lib/swarm_memory/integration/cli_registration.rb +30 -0
- data/lib/swarm_memory/integration/configuration.rb +43 -0
- data/lib/swarm_memory/integration/registration.rb +31 -0
- data/lib/swarm_memory/integration/sdk_plugin.rb +531 -0
- data/lib/swarm_memory/optimization/analyzer.rb +244 -0
- data/lib/swarm_memory/optimization/defragmenter.rb +863 -0
- data/lib/swarm_memory/prompts/memory.md.erb +109 -0
- data/lib/swarm_memory/prompts/memory_assistant.md.erb +181 -0
- data/lib/swarm_memory/prompts/memory_researcher.md.erb +281 -0
- data/lib/swarm_memory/prompts/memory_retrieval.md.erb +78 -0
- data/lib/swarm_memory/search/semantic_search.rb +112 -0
- data/lib/swarm_memory/search/text_search.rb +42 -0
- data/lib/swarm_memory/search/text_similarity.rb +80 -0
- data/lib/swarm_memory/skills/meta/deep-learning.md +101 -0
- data/lib/swarm_memory/skills/meta/deep-learning.yml +14 -0
- data/lib/swarm_memory/tools/load_skill.rb +313 -0
- data/lib/swarm_memory/tools/memory_defrag.rb +382 -0
- data/lib/swarm_memory/tools/memory_delete.rb +99 -0
- data/lib/swarm_memory/tools/memory_edit.rb +185 -0
- data/lib/swarm_memory/tools/memory_glob.rb +160 -0
- data/lib/swarm_memory/tools/memory_grep.rb +247 -0
- data/lib/swarm_memory/tools/memory_multi_edit.rb +281 -0
- data/lib/swarm_memory/tools/memory_read.rb +123 -0
- data/lib/swarm_memory/tools/memory_write.rb +231 -0
- data/lib/swarm_memory/utils.rb +50 -0
- data/lib/swarm_memory/version.rb +5 -0
- data/lib/swarm_memory.rb +166 -0
- data/lib/swarm_sdk/agent/RETRY_LOGIC.md +127 -0
- data/lib/swarm_sdk/agent/builder.rb +461 -0
- data/lib/swarm_sdk/agent/chat/context_tracker.rb +314 -0
- data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
- data/lib/swarm_sdk/agent/chat/logging_helpers.rb +116 -0
- data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +152 -0
- data/lib/swarm_sdk/agent/chat.rb +1159 -0
- data/lib/swarm_sdk/agent/context.rb +112 -0
- data/lib/swarm_sdk/agent/context_manager.rb +309 -0
- data/lib/swarm_sdk/agent/definition.rb +556 -0
- data/lib/swarm_sdk/claude_code_agent_adapter.rb +205 -0
- data/lib/swarm_sdk/configuration.rb +296 -0
- data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
- data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
- data/lib/swarm_sdk/context_compactor.rb +340 -0
- data/lib/swarm_sdk/hooks/adapter.rb +359 -0
- data/lib/swarm_sdk/hooks/context.rb +197 -0
- data/lib/swarm_sdk/hooks/definition.rb +80 -0
- data/lib/swarm_sdk/hooks/error.rb +29 -0
- data/lib/swarm_sdk/hooks/executor.rb +146 -0
- data/lib/swarm_sdk/hooks/registry.rb +147 -0
- data/lib/swarm_sdk/hooks/result.rb +150 -0
- data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
- data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
- data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
- data/lib/swarm_sdk/log_collector.rb +51 -0
- data/lib/swarm_sdk/log_stream.rb +69 -0
- data/lib/swarm_sdk/markdown_parser.rb +75 -0
- data/lib/swarm_sdk/model_aliases.json +5 -0
- data/lib/swarm_sdk/models.json +1 -0
- data/lib/swarm_sdk/models.rb +120 -0
- data/lib/swarm_sdk/node/agent_config.rb +49 -0
- data/lib/swarm_sdk/node/builder.rb +439 -0
- data/lib/swarm_sdk/node/transformer_executor.rb +248 -0
- data/lib/swarm_sdk/node_context.rb +170 -0
- data/lib/swarm_sdk/node_orchestrator.rb +384 -0
- data/lib/swarm_sdk/permissions/config.rb +239 -0
- data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
- data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
- data/lib/swarm_sdk/permissions/validator.rb +173 -0
- data/lib/swarm_sdk/permissions_builder.rb +122 -0
- data/lib/swarm_sdk/plugin.rb +147 -0
- data/lib/swarm_sdk/plugin_registry.rb +101 -0
- data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +243 -0
- data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
- data/lib/swarm_sdk/result.rb +97 -0
- data/lib/swarm_sdk/swarm/agent_initializer.rb +334 -0
- data/lib/swarm_sdk/swarm/all_agents_builder.rb +140 -0
- data/lib/swarm_sdk/swarm/builder.rb +586 -0
- data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
- data/lib/swarm_sdk/swarm/tool_configurator.rb +419 -0
- data/lib/swarm_sdk/swarm.rb +982 -0
- data/lib/swarm_sdk/tools/bash.rb +274 -0
- data/lib/swarm_sdk/tools/clock.rb +44 -0
- data/lib/swarm_sdk/tools/delegate.rb +164 -0
- data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
- data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
- data/lib/swarm_sdk/tools/document_converters/html_converter.rb +101 -0
- data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
- data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
- data/lib/swarm_sdk/tools/edit.rb +150 -0
- data/lib/swarm_sdk/tools/glob.rb +158 -0
- data/lib/swarm_sdk/tools/grep.rb +228 -0
- data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
- data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
- data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
- data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
- data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
- data/lib/swarm_sdk/tools/read.rb +251 -0
- data/lib/swarm_sdk/tools/registry.rb +93 -0
- data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +96 -0
- data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +76 -0
- data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +91 -0
- data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
- data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +224 -0
- data/lib/swarm_sdk/tools/stores/storage.rb +148 -0
- data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
- data/lib/swarm_sdk/tools/think.rb +95 -0
- data/lib/swarm_sdk/tools/todo_write.rb +216 -0
- data/lib/swarm_sdk/tools/web_fetch.rb +261 -0
- data/lib/swarm_sdk/tools/write.rb +117 -0
- data/lib/swarm_sdk/utils.rb +50 -0
- data/lib/swarm_sdk/version.rb +5 -0
- data/lib/swarm_sdk.rb +157 -0
- data/llm.v2.txt +13407 -0
- data/rubocop/cop/security/no_reflection_methods.rb +47 -0
- data/rubocop/cop/security/no_ruby_llm_logger.rb +32 -0
- data/swarm_cli.gemspec +57 -0
- data/swarm_memory.gemspec +28 -0
- data/swarm_sdk.gemspec +41 -0
- data/team.yml +1 -1
- data/team_full.yml +1875 -0
- data/{team_v2.yml → team_sdk.yml} +121 -52
- metadata +247 -4
- data/EXAMPLES.md +0 -164
|
@@ -0,0 +1,863 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module SwarmMemory
|
|
4
|
+
module Optimization
|
|
5
|
+
# Defragments memory by finding duplicates, low-quality entries, and archival candidates
|
|
6
|
+
#
|
|
7
|
+
# This class analyzes memory and suggests optimizations without making changes.
|
|
8
|
+
# Agents must manually review and act on suggestions.
|
|
9
|
+
class Defragmenter
|
|
10
|
+
# Initialize defragmenter
|
|
11
|
+
#
|
|
12
|
+
# @param adapter [Adapters::Base] Storage adapter
|
|
13
|
+
# @param embedder [Embeddings::Embedder, nil] Optional embedder for semantic duplicate detection
|
|
14
|
+
def initialize(adapter:, embedder: nil)
|
|
15
|
+
@adapter = adapter
|
|
16
|
+
@embedder = embedder
|
|
17
|
+
@analyzer = Analyzer.new(adapter: adapter)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
# Generate health report
|
|
21
|
+
#
|
|
22
|
+
# @return [String] Formatted health report
|
|
23
|
+
def health_report
|
|
24
|
+
@analyzer.health_report
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
# Run full analysis (all operations)
|
|
28
|
+
#
|
|
29
|
+
# @param similarity_threshold [Float] Threshold for duplicate detection
|
|
30
|
+
# @param age_days [Integer] Age threshold for archival
|
|
31
|
+
# @param confidence_filter [String] Confidence level filter
|
|
32
|
+
# @return [String] Complete analysis report
|
|
33
|
+
def full_analysis(similarity_threshold: 0.85, age_days: 90, confidence_filter: "low")
|
|
34
|
+
report = []
|
|
35
|
+
report << "# Full Memory Defrag Analysis\n"
|
|
36
|
+
report << @analyzer.health_report
|
|
37
|
+
report << "\n---\n"
|
|
38
|
+
report << find_duplicates_report(threshold: similarity_threshold)
|
|
39
|
+
report << "\n---\n"
|
|
40
|
+
report << find_low_quality_report(confidence_filter: confidence_filter)
|
|
41
|
+
report << "\n---\n"
|
|
42
|
+
report << find_archival_candidates_report(age_days: age_days)
|
|
43
|
+
|
|
44
|
+
report.join("\n")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
# Find potential duplicate entries
|
|
48
|
+
#
|
|
49
|
+
# Uses both text similarity (Jaccard) and semantic similarity (embeddings)
|
|
50
|
+
# to find entries that could be merged.
|
|
51
|
+
#
|
|
52
|
+
# @param threshold [Float] Similarity threshold (0.0-1.0)
|
|
53
|
+
# @return [Array<Hash>] Duplicate pairs with similarity scores
|
|
54
|
+
def find_duplicates(threshold: 0.85)
|
|
55
|
+
entries = @adapter.list
|
|
56
|
+
return [] if entries.size < 2
|
|
57
|
+
|
|
58
|
+
duplicates = []
|
|
59
|
+
all_entries = @adapter.all_entries
|
|
60
|
+
|
|
61
|
+
# Compare all pairs
|
|
62
|
+
entry_paths = entries.map { |e| e[:path] }
|
|
63
|
+
entry_paths.combination(2).each do |path1, path2|
|
|
64
|
+
entry1 = all_entries[path1]
|
|
65
|
+
entry2 = all_entries[path2]
|
|
66
|
+
|
|
67
|
+
# Calculate text similarity (always available)
|
|
68
|
+
text_sim = Search::TextSimilarity.jaccard(entry1.content, entry2.content)
|
|
69
|
+
|
|
70
|
+
# Calculate semantic similarity if embeddings available
|
|
71
|
+
semantic_sim = if entry1.embedded? && entry2.embedded?
|
|
72
|
+
Search::TextSimilarity.cosine(entry1.embedding, entry2.embedding)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Use highest similarity score
|
|
76
|
+
similarity = [text_sim, semantic_sim].compact.max
|
|
77
|
+
|
|
78
|
+
next if similarity < threshold
|
|
79
|
+
|
|
80
|
+
duplicates << {
|
|
81
|
+
path1: path1,
|
|
82
|
+
path2: path2,
|
|
83
|
+
similarity: (similarity * 100).round(1),
|
|
84
|
+
text_similarity: (text_sim * 100).round(1),
|
|
85
|
+
semantic_similarity: semantic_sim ? (semantic_sim * 100).round(1) : nil,
|
|
86
|
+
title1: entry1.title,
|
|
87
|
+
title2: entry2.title,
|
|
88
|
+
size1: entry1.size,
|
|
89
|
+
size2: entry2.size,
|
|
90
|
+
}
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
duplicates.sort_by { |d| -d[:similarity] }
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
# Find low-quality entries
|
|
97
|
+
#
|
|
98
|
+
# @param confidence_filter [String] Filter level ("low", "medium", "high")
|
|
99
|
+
# @return [Array<Hash>] Low-quality entries with issues
|
|
100
|
+
def find_low_quality(confidence_filter: "low")
|
|
101
|
+
entries = @adapter.list
|
|
102
|
+
low_quality = []
|
|
103
|
+
|
|
104
|
+
entries.each do |entry_info|
|
|
105
|
+
entry = @adapter.read_entry(file_path: entry_info[:path])
|
|
106
|
+
|
|
107
|
+
# Get metadata from entry (always has string keys from .yml file)
|
|
108
|
+
metadata = entry.metadata || {}
|
|
109
|
+
|
|
110
|
+
# Calculate quality score from metadata
|
|
111
|
+
quality = calculate_quality_from_metadata(metadata)
|
|
112
|
+
|
|
113
|
+
# Check for issues (all keys are strings)
|
|
114
|
+
issues = []
|
|
115
|
+
issues << "No metadata" if metadata.empty?
|
|
116
|
+
issues << "Confidence: #{metadata["confidence"]}" if should_flag_confidence?(metadata["confidence"], confidence_filter)
|
|
117
|
+
issues << "No type specified" if metadata["type"].nil?
|
|
118
|
+
issues << "No tags" if (metadata["tags"] || []).empty?
|
|
119
|
+
issues << "No related links" if (metadata["related"] || []).empty?
|
|
120
|
+
issues << "Not embedded" if !entry.embedded? && @embedder
|
|
121
|
+
|
|
122
|
+
next if issues.empty?
|
|
123
|
+
|
|
124
|
+
low_quality << {
|
|
125
|
+
path: entry_info[:path],
|
|
126
|
+
title: entry.title,
|
|
127
|
+
issues: issues,
|
|
128
|
+
confidence: metadata["confidence"] || "unknown",
|
|
129
|
+
quality_score: quality,
|
|
130
|
+
}
|
|
131
|
+
end
|
|
132
|
+
|
|
133
|
+
low_quality.sort_by { |e| e[:quality_score] }
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# Find entries that could be archived (old and unused)
|
|
137
|
+
#
|
|
138
|
+
# @param age_days [Integer] Minimum age in days
|
|
139
|
+
# @return [Array<Hash>] Archival candidates
|
|
140
|
+
def find_archival_candidates(age_days: 90)
|
|
141
|
+
entries = @adapter.list
|
|
142
|
+
cutoff_date = Time.now - (age_days * 24 * 60 * 60)
|
|
143
|
+
|
|
144
|
+
candidates = entries.select do |entry_info|
|
|
145
|
+
entry_info[:updated_at] < cutoff_date
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
candidates.map do |entry_info|
|
|
149
|
+
entry = @adapter.read_entry(file_path: entry_info[:path])
|
|
150
|
+
metadata = entry.metadata || {}
|
|
151
|
+
|
|
152
|
+
{
|
|
153
|
+
path: entry_info[:path],
|
|
154
|
+
title: entry.title,
|
|
155
|
+
age_days: ((Time.now - entry_info[:updated_at]) / 86400).round,
|
|
156
|
+
last_verified: metadata["last_verified"],
|
|
157
|
+
confidence: metadata["confidence"] || "unknown",
|
|
158
|
+
size: entry.size,
|
|
159
|
+
}
|
|
160
|
+
end.sort_by { |e| -e[:age_days] }
|
|
161
|
+
end
|
|
162
|
+
|
|
163
|
+
# Generate formatted report for duplicates
|
|
164
|
+
#
|
|
165
|
+
# @param threshold [Float] Similarity threshold
|
|
166
|
+
# @return [String] Formatted report
|
|
167
|
+
def find_duplicates_report(threshold: 0.85)
|
|
168
|
+
duplicates = find_duplicates(threshold: threshold)
|
|
169
|
+
|
|
170
|
+
return "No duplicate entries found above #{(threshold * 100).round}% similarity." if duplicates.empty?
|
|
171
|
+
|
|
172
|
+
report = []
|
|
173
|
+
report << "# Potential Duplicates (#{duplicates.size} pairs)"
|
|
174
|
+
report << ""
|
|
175
|
+
report << "Found #{duplicates.size} pair(s) of similar entries that could potentially be merged."
|
|
176
|
+
report << ""
|
|
177
|
+
|
|
178
|
+
duplicates.each_with_index do |dup, index|
|
|
179
|
+
report << "## Pair #{index + 1}: #{dup[:similarity]}% similar"
|
|
180
|
+
report << "- memory://#{dup[:path1]}"
|
|
181
|
+
report << " Title: \"#{dup[:title1]}\""
|
|
182
|
+
report << " Size: #{format_bytes(dup[:size1])}"
|
|
183
|
+
report << "- memory://#{dup[:path2]}"
|
|
184
|
+
report << " Title: \"#{dup[:title2]}\""
|
|
185
|
+
report << " Size: #{format_bytes(dup[:size2])}"
|
|
186
|
+
report << ""
|
|
187
|
+
report << " Text similarity: #{dup[:text_similarity]}%"
|
|
188
|
+
report << if dup[:semantic_similarity]
|
|
189
|
+
" Semantic similarity: #{dup[:semantic_similarity]}%"
|
|
190
|
+
else
|
|
191
|
+
" Semantic similarity: N/A (no embeddings)"
|
|
192
|
+
end
|
|
193
|
+
report << ""
|
|
194
|
+
report << " **Suggestion:** Review both entries and consider merging with MemoryMultiEdit"
|
|
195
|
+
report << ""
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
report.join("\n")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# Generate formatted report for low-quality entries
|
|
202
|
+
#
|
|
203
|
+
# @param confidence_filter [String] Confidence level filter
|
|
204
|
+
# @return [String] Formatted report
|
|
205
|
+
def find_low_quality_report(confidence_filter: "low")
|
|
206
|
+
entries = find_low_quality(confidence_filter: confidence_filter)
|
|
207
|
+
|
|
208
|
+
return "No low-quality entries found." if entries.empty?
|
|
209
|
+
|
|
210
|
+
report = []
|
|
211
|
+
report << "# Low-Quality Entries (#{entries.size} entries)"
|
|
212
|
+
report << ""
|
|
213
|
+
report << "Found #{entries.size} entry/entries with quality issues."
|
|
214
|
+
report << ""
|
|
215
|
+
|
|
216
|
+
entries.each do |entry|
|
|
217
|
+
report << "## memory://#{entry[:path]}"
|
|
218
|
+
report << "- Title: #{entry[:title]}"
|
|
219
|
+
report << "- Quality score: #{entry[:quality_score]}/100"
|
|
220
|
+
report << "- Confidence: #{entry[:confidence]}"
|
|
221
|
+
report << "- Issues:"
|
|
222
|
+
entry[:issues].each do |issue|
|
|
223
|
+
report << " - #{issue}"
|
|
224
|
+
end
|
|
225
|
+
report << ""
|
|
226
|
+
report << " **Suggestion:** Add proper frontmatter and metadata with MemoryEdit"
|
|
227
|
+
report << ""
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
report.join("\n")
|
|
231
|
+
end
|
|
232
|
+
|
|
233
|
+
# Generate formatted report for archival candidates
|
|
234
|
+
#
|
|
235
|
+
# @param age_days [Integer] Age threshold
|
|
236
|
+
# @return [String] Formatted report
|
|
237
|
+
def find_archival_candidates_report(age_days: 90)
|
|
238
|
+
candidates = find_archival_candidates(age_days: age_days)
|
|
239
|
+
|
|
240
|
+
return "No entries older than #{age_days} days found." if candidates.empty?
|
|
241
|
+
|
|
242
|
+
report = []
|
|
243
|
+
report << "# Archival Candidates (#{candidates.size} entries older than #{age_days} days)"
|
|
244
|
+
report << ""
|
|
245
|
+
report << "Found #{candidates.size} old entry/entries that could be archived."
|
|
246
|
+
report << ""
|
|
247
|
+
|
|
248
|
+
candidates.each do |entry|
|
|
249
|
+
report << "## memory://#{entry[:path]}"
|
|
250
|
+
report << "- Title: #{entry[:title]}"
|
|
251
|
+
report << "- Age: #{entry[:age_days]} days"
|
|
252
|
+
report << "- Last verified: #{entry[:last_verified] || "never"}"
|
|
253
|
+
report << "- Confidence: #{entry[:confidence]}"
|
|
254
|
+
report << "- Size: #{format_bytes(entry[:size])}"
|
|
255
|
+
report << ""
|
|
256
|
+
report << " **Suggestion:** Review and delete with MemoryDelete if truly obsolete, or use compact action with appropriate thresholds"
|
|
257
|
+
report << ""
|
|
258
|
+
end
|
|
259
|
+
|
|
260
|
+
report.join("\n")
|
|
261
|
+
end
|
|
262
|
+
|
|
263
|
+
# Find related entries that should be cross-linked
|
|
264
|
+
#
|
|
265
|
+
# Finds entry pairs with semantic similarity in the "related" range
|
|
266
|
+
# but NOT duplicates. Uses pure semantic similarity (no keyword boost).
|
|
267
|
+
#
|
|
268
|
+
# @param min_threshold [Float] Minimum similarity for relationships (default: 0.60)
|
|
269
|
+
# @param max_threshold [Float] Maximum similarity (above = duplicates) (default: 0.85)
|
|
270
|
+
# @return [Array<Hash>] Related pairs
|
|
271
|
+
def find_related(min_threshold: 0.60, max_threshold: 0.85)
|
|
272
|
+
entries = @adapter.list
|
|
273
|
+
return [] if entries.size < 2
|
|
274
|
+
|
|
275
|
+
related_pairs = []
|
|
276
|
+
all_entries = @adapter.all_entries
|
|
277
|
+
|
|
278
|
+
# Compare all pairs
|
|
279
|
+
entry_paths = entries.map { |e| e[:path] }
|
|
280
|
+
entry_paths.combination(2).each do |path1, path2|
|
|
281
|
+
entry1 = all_entries[path1]
|
|
282
|
+
entry2 = all_entries[path2]
|
|
283
|
+
|
|
284
|
+
# Skip if no embeddings (need semantic similarity)
|
|
285
|
+
next unless entry1.embedded? && entry2.embedded?
|
|
286
|
+
|
|
287
|
+
# Calculate PURE semantic similarity (no keyword boosting for merging)
|
|
288
|
+
semantic_sim = Search::TextSimilarity.cosine(entry1.embedding, entry2.embedding)
|
|
289
|
+
|
|
290
|
+
# Must be in the "related" range
|
|
291
|
+
next if semantic_sim < min_threshold
|
|
292
|
+
next if semantic_sim >= max_threshold
|
|
293
|
+
|
|
294
|
+
# Check current linking status
|
|
295
|
+
entry1_related = (entry1.metadata["related"] || []).map { |r| r.sub(%r{^memory://}, "") }
|
|
296
|
+
entry2_related = (entry2.metadata["related"] || []).map { |r| r.sub(%r{^memory://}, "") }
|
|
297
|
+
|
|
298
|
+
linked_1_to_2 = entry1_related.include?(path2)
|
|
299
|
+
linked_2_to_1 = entry2_related.include?(path1)
|
|
300
|
+
already_linked = linked_1_to_2 && linked_2_to_1
|
|
301
|
+
|
|
302
|
+
# Extract metadata
|
|
303
|
+
type1 = entry1.metadata["type"] || "unknown"
|
|
304
|
+
type2 = entry2.metadata["type"] || "unknown"
|
|
305
|
+
|
|
306
|
+
related_pairs << {
|
|
307
|
+
path1: path1,
|
|
308
|
+
path2: path2,
|
|
309
|
+
similarity: (semantic_sim * 100).round(1),
|
|
310
|
+
title1: entry1.title,
|
|
311
|
+
title2: entry2.title,
|
|
312
|
+
type1: type1,
|
|
313
|
+
type2: type2,
|
|
314
|
+
already_linked: already_linked,
|
|
315
|
+
linked_1_to_2: linked_1_to_2,
|
|
316
|
+
linked_2_to_1: linked_2_to_1,
|
|
317
|
+
}
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
related_pairs.sort_by { |d| -d[:similarity] }
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
# Generate formatted report for related entries
|
|
324
|
+
#
|
|
325
|
+
# @param min_threshold [Float] Minimum similarity
|
|
326
|
+
# @param max_threshold [Float] Maximum similarity
|
|
327
|
+
# @return [String] Formatted report
|
|
328
|
+
def find_related_report(min_threshold: 0.60, max_threshold: 0.85)
|
|
329
|
+
pairs = find_related(min_threshold: min_threshold, max_threshold: max_threshold)
|
|
330
|
+
|
|
331
|
+
return "No related entry pairs found in #{(min_threshold * 100).round}-#{(max_threshold * 100).round}% similarity range." if pairs.empty?
|
|
332
|
+
|
|
333
|
+
report = []
|
|
334
|
+
report << "# Related Entries (#{pairs.size} pairs)"
|
|
335
|
+
report << ""
|
|
336
|
+
report << "Found #{pairs.size} pair(s) of semantically related entries."
|
|
337
|
+
report << "Similarity range: #{(min_threshold * 100).round}-#{(max_threshold * 100).round}% (pure semantic, no keyword boost)"
|
|
338
|
+
report << ""
|
|
339
|
+
|
|
340
|
+
pairs.each_with_index do |pair, index|
|
|
341
|
+
report << "## Pair #{index + 1}: #{pair[:similarity]}% similar"
|
|
342
|
+
report << "- memory://#{pair[:path1]} (#{pair[:type1]})"
|
|
343
|
+
report << " \"#{pair[:title1]}\""
|
|
344
|
+
report << "- memory://#{pair[:path2]} (#{pair[:type2]})"
|
|
345
|
+
report << " \"#{pair[:title2]}\""
|
|
346
|
+
report << ""
|
|
347
|
+
|
|
348
|
+
if pair[:already_linked]
|
|
349
|
+
report << " ✓ Already linked bidirectionally"
|
|
350
|
+
elsif pair[:linked_1_to_2]
|
|
351
|
+
report << " → Entry 1 links to Entry 2, but not vice versa"
|
|
352
|
+
report << " **Suggestion:** Add backward link from Entry 2 to Entry 1"
|
|
353
|
+
elsif pair[:linked_2_to_1]
|
|
354
|
+
report << " → Entry 2 links to Entry 1, but not vice versa"
|
|
355
|
+
report << " **Suggestion:** Add backward link from Entry 1 to Entry 2"
|
|
356
|
+
else
|
|
357
|
+
report << " **Suggestion:** Add bidirectional links to cross-reference these related entries"
|
|
358
|
+
end
|
|
359
|
+
report << ""
|
|
360
|
+
end
|
|
361
|
+
|
|
362
|
+
report << "To automatically create missing links, use:"
|
|
363
|
+
report << " MemoryDefrag(action: \"link_related\", dry_run: true) # Preview first"
|
|
364
|
+
report << " MemoryDefrag(action: \"link_related\", dry_run: false) # Execute"
|
|
365
|
+
report << ""
|
|
366
|
+
|
|
367
|
+
report.join("\n")
|
|
368
|
+
end
|
|
369
|
+
|
|
370
|
+
# ============================================================================
|
|
371
|
+
# ACTIVE OPTIMIZATION OPERATIONS (Actually modify memory)
|
|
372
|
+
# ============================================================================
|
|
373
|
+
|
|
374
|
+
# Merge duplicate entries
|
|
375
|
+
#
|
|
376
|
+
# @param threshold [Float] Similarity threshold
|
|
377
|
+
# @param strategy [Symbol] Merge strategy (:keep_newer, :keep_larger, :combine)
|
|
378
|
+
# @param dry_run [Boolean] If true, show what would be done without doing it
|
|
379
|
+
# @return [String] Result report
|
|
380
|
+
def merge_duplicates_active(threshold: 0.85, strategy: :keep_newer, dry_run: true)
|
|
381
|
+
duplicates = find_duplicates(threshold: threshold)
|
|
382
|
+
|
|
383
|
+
return "No duplicates found above #{(threshold * 100).round}% similarity." if duplicates.empty?
|
|
384
|
+
|
|
385
|
+
results = []
|
|
386
|
+
freed_bytes = 0
|
|
387
|
+
|
|
388
|
+
duplicates.each do |pair|
|
|
389
|
+
if dry_run
|
|
390
|
+
results << "Would merge: #{pair[:path2]} → #{pair[:path1]} (#{pair[:similarity]}% similar)"
|
|
391
|
+
else
|
|
392
|
+
# Actually merge
|
|
393
|
+
result_info = merge_pair(pair, strategy: strategy)
|
|
394
|
+
freed_bytes += result_info[:freed_bytes]
|
|
395
|
+
results << "✓ Merged: #{result_info[:merged_path]} → #{result_info[:kept_path]}"
|
|
396
|
+
end
|
|
397
|
+
end
|
|
398
|
+
|
|
399
|
+
format_merge_report(results, duplicates.size, freed_bytes, dry_run)
|
|
400
|
+
end
|
|
401
|
+
|
|
402
|
+
# Clean up old stub files
|
|
403
|
+
#
|
|
404
|
+
# @param min_age_days [Integer] Minimum age for cleanup
|
|
405
|
+
# @param max_hits [Integer] Maximum hits to consider for cleanup
|
|
406
|
+
# @param dry_run [Boolean] Preview mode
|
|
407
|
+
# @return [String] Result report
|
|
408
|
+
def cleanup_stubs_active(min_age_days: 30, max_hits: 3, dry_run: true)
|
|
409
|
+
stubs = find_stubs_to_cleanup(min_age_days: min_age_days, max_hits: max_hits)
|
|
410
|
+
|
|
411
|
+
return "No stubs found for cleanup." if stubs.empty?
|
|
412
|
+
|
|
413
|
+
results = []
|
|
414
|
+
freed_bytes = 0
|
|
415
|
+
|
|
416
|
+
stubs.each do |stub|
|
|
417
|
+
if dry_run
|
|
418
|
+
results << "Would delete stub: #{stub[:path]} (age: #{stub[:age_days]}d, hits: #{stub[:hits]})"
|
|
419
|
+
else
|
|
420
|
+
freed_bytes += stub[:size]
|
|
421
|
+
@adapter.delete(file_path: stub[:path])
|
|
422
|
+
results << "✓ Deleted stub: #{stub[:path]}"
|
|
423
|
+
end
|
|
424
|
+
end
|
|
425
|
+
|
|
426
|
+
format_cleanup_report(results, stubs.size, freed_bytes, dry_run)
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
# Compact low-value entries (delete permanently)
|
|
430
|
+
#
|
|
431
|
+
# @param min_quality_score [Integer] Minimum quality threshold (0-100)
|
|
432
|
+
# @param min_age_days [Integer] Minimum age
|
|
433
|
+
# @param max_hits [Integer] Maximum hits
|
|
434
|
+
# @param dry_run [Boolean] Preview mode
|
|
435
|
+
# @return [String] Result report
|
|
436
|
+
def compact_active(min_quality_score: 20, min_age_days: 30, max_hits: 0, dry_run: true)
|
|
437
|
+
entries = @adapter.list
|
|
438
|
+
low_value = []
|
|
439
|
+
|
|
440
|
+
entries.each do |entry_info|
|
|
441
|
+
entry = @adapter.read_entry(file_path: entry_info[:path])
|
|
442
|
+
|
|
443
|
+
# Calculate quality from metadata (not content)
|
|
444
|
+
quality = calculate_quality_from_metadata(entry.metadata || {})
|
|
445
|
+
|
|
446
|
+
age_days = ((Time.now - entry.updated_at) / 86400).round
|
|
447
|
+
hits = entry.metadata&.dig("hits") || 0
|
|
448
|
+
|
|
449
|
+
next if quality >= min_quality_score || age_days < min_age_days || hits > max_hits
|
|
450
|
+
|
|
451
|
+
low_value << {
|
|
452
|
+
path: entry_info[:path],
|
|
453
|
+
quality: quality,
|
|
454
|
+
age_days: age_days,
|
|
455
|
+
hits: hits,
|
|
456
|
+
size: entry.size,
|
|
457
|
+
}
|
|
458
|
+
end
|
|
459
|
+
|
|
460
|
+
return "No low-value entries found for compaction." if low_value.empty?
|
|
461
|
+
|
|
462
|
+
results = []
|
|
463
|
+
freed_bytes = 0
|
|
464
|
+
|
|
465
|
+
low_value.each do |entry|
|
|
466
|
+
if dry_run
|
|
467
|
+
results << "Would delete: #{entry[:path]} (quality: #{entry[:quality]}, age: #{entry[:age_days]}d, hits: #{entry[:hits]})"
|
|
468
|
+
else
|
|
469
|
+
freed_bytes += entry[:size]
|
|
470
|
+
@adapter.delete(file_path: entry[:path])
|
|
471
|
+
results << "✓ Deleted: #{entry[:path]}"
|
|
472
|
+
end
|
|
473
|
+
end
|
|
474
|
+
|
|
475
|
+
format_compact_report(results, low_value.size, freed_bytes, dry_run)
|
|
476
|
+
end
|
|
477
|
+
|
|
478
|
+
# Create bidirectional links between related entries
|
|
479
|
+
#
|
|
480
|
+
# Finds related pairs and updates their 'related' metadata to cross-reference each other.
|
|
481
|
+
#
|
|
482
|
+
# @param min_threshold [Float] Minimum similarity (default: 0.60)
|
|
483
|
+
# @param max_threshold [Float] Maximum similarity (default: 0.85)
|
|
484
|
+
# @param dry_run [Boolean] Preview mode (default: true)
|
|
485
|
+
# @return [String] Result report
|
|
486
|
+
def link_related_active(min_threshold: 0.60, max_threshold: 0.85, dry_run: true)
|
|
487
|
+
pairs = find_related(min_threshold: min_threshold, max_threshold: max_threshold)
|
|
488
|
+
|
|
489
|
+
# Filter to only pairs that need linking
|
|
490
|
+
needs_linking = pairs.reject { |p| p[:already_linked] }
|
|
491
|
+
|
|
492
|
+
if needs_linking.empty?
|
|
493
|
+
return "No related entries found that need linking. All similar entries are already cross-referenced."
|
|
494
|
+
end
|
|
495
|
+
|
|
496
|
+
report = []
|
|
497
|
+
report << (dry_run ? "# Link Related Entries (DRY RUN)" : "# Link Related Entries")
|
|
498
|
+
report << ""
|
|
499
|
+
report << "Found #{needs_linking.size} pair(s) that should be cross-linked."
|
|
500
|
+
report << ""
|
|
501
|
+
|
|
502
|
+
links_created = 0
|
|
503
|
+
|
|
504
|
+
needs_linking.each_with_index do |pair, index|
|
|
505
|
+
report << "## Pair #{index + 1}: #{pair[:similarity]}% similar"
|
|
506
|
+
report << "- memory://#{pair[:path1]}"
|
|
507
|
+
report << "- memory://#{pair[:path2]}"
|
|
508
|
+
report << ""
|
|
509
|
+
|
|
510
|
+
if dry_run
|
|
511
|
+
# Show what would happen
|
|
512
|
+
if !pair[:linked_1_to_2] && !pair[:linked_2_to_1]
|
|
513
|
+
report << " Would add bidirectional links:"
|
|
514
|
+
report << " - Add #{pair[:path2]} to #{pair[:path1]}'s related array"
|
|
515
|
+
report << " - Add #{pair[:path1]} to #{pair[:path2]}'s related array"
|
|
516
|
+
elsif !pair[:linked_1_to_2]
|
|
517
|
+
report << " Would add backward link:"
|
|
518
|
+
report << " - Add #{pair[:path2]} to #{pair[:path1]}'s related array"
|
|
519
|
+
elsif !pair[:linked_2_to_1]
|
|
520
|
+
report << " Would add backward link:"
|
|
521
|
+
report << " - Add #{pair[:path1]} to #{pair[:path2]}'s related array"
|
|
522
|
+
end
|
|
523
|
+
else
|
|
524
|
+
# Actually create links
|
|
525
|
+
created = create_bidirectional_links(pair[:path1], pair[:path2], pair[:linked_1_to_2], pair[:linked_2_to_1])
|
|
526
|
+
links_created += created
|
|
527
|
+
|
|
528
|
+
report << " ✓ Created #{created} link(s)"
|
|
529
|
+
end
|
|
530
|
+
report << ""
|
|
531
|
+
end
|
|
532
|
+
|
|
533
|
+
report << if dry_run
|
|
534
|
+
"**DRY RUN:** No changes made. Set dry_run=false to execute."
|
|
535
|
+
else
|
|
536
|
+
"**COMPLETED:** Created #{links_created} link(s) across #{needs_linking.size} pairs."
|
|
537
|
+
end
|
|
538
|
+
|
|
539
|
+
report.join("\n")
|
|
540
|
+
end
|
|
541
|
+
|
|
542
|
+
# Full optimization (all operations)
|
|
543
|
+
#
|
|
544
|
+
# @param dry_run [Boolean] Preview mode (default: true)
|
|
545
|
+
# @return [String] Complete optimization report
|
|
546
|
+
def full_optimization(dry_run: true)
|
|
547
|
+
report = []
|
|
548
|
+
report << "# Full Memory Optimization"
|
|
549
|
+
report << ""
|
|
550
|
+
mode_message = dry_run ? "## DRY RUN MODE - No changes will be made" : "## ACTIVE MODE - Performing optimizations"
|
|
551
|
+
report << mode_message
|
|
552
|
+
report << ""
|
|
553
|
+
|
|
554
|
+
# 1. Health baseline
|
|
555
|
+
initial_health = @analyzer.analyze
|
|
556
|
+
report << "Initial health score: #{initial_health[:health_score]}/100"
|
|
557
|
+
report << ""
|
|
558
|
+
|
|
559
|
+
# 2. Merge duplicates
|
|
560
|
+
report << "## 1. Merging Duplicates"
|
|
561
|
+
report << merge_duplicates_active(dry_run: dry_run)
|
|
562
|
+
report << ""
|
|
563
|
+
|
|
564
|
+
# 3. Cleanup stubs
|
|
565
|
+
report << "## 2. Cleaning Up Stubs"
|
|
566
|
+
report << cleanup_stubs_active(dry_run: dry_run)
|
|
567
|
+
report << ""
|
|
568
|
+
|
|
569
|
+
# 4. Compact low-value
|
|
570
|
+
report << "## 3. Compacting Low-Value Entries"
|
|
571
|
+
report << compact_active(dry_run: dry_run)
|
|
572
|
+
report << ""
|
|
573
|
+
|
|
574
|
+
# 6. Final health check
|
|
575
|
+
unless dry_run
|
|
576
|
+
final_health = @analyzer.analyze
|
|
577
|
+
report << "## Summary"
|
|
578
|
+
report << "Health score: #{initial_health[:health_score]} → #{final_health[:health_score]} (+#{final_health[:health_score] - initial_health[:health_score]})"
|
|
579
|
+
end
|
|
580
|
+
|
|
581
|
+
report.join("\n")
|
|
582
|
+
end
|
|
583
|
+
|
|
584
|
+
private
|
|
585
|
+
|
|
586
|
+
def should_flag_confidence?(confidence, filter_level)
|
|
587
|
+
return false if confidence.nil?
|
|
588
|
+
|
|
589
|
+
levels = { "low" => 0, "medium" => 1, "high" => 2 }
|
|
590
|
+
filter_rank = levels[filter_level] || 0
|
|
591
|
+
entry_rank = levels[confidence] || 0
|
|
592
|
+
|
|
593
|
+
entry_rank <= filter_rank
|
|
594
|
+
end
|
|
595
|
+
|
|
596
|
+
def format_bytes(bytes)
|
|
597
|
+
if bytes >= 1_000_000
|
|
598
|
+
"#{(bytes.to_f / 1_000_000).round(1)}MB"
|
|
599
|
+
elsif bytes >= 1_000
|
|
600
|
+
"#{(bytes.to_f / 1_000).round(1)}KB"
|
|
601
|
+
else
|
|
602
|
+
"#{bytes}B"
|
|
603
|
+
end
|
|
604
|
+
end
|
|
605
|
+
|
|
606
|
+
# Calculate quality score from metadata (not from content parsing)
|
|
607
|
+
#
|
|
608
|
+
# @param metadata [Hash] Metadata hash (string keys guaranteed)
|
|
609
|
+
# @return [Integer] Quality score 0-100
|
|
610
|
+
def calculate_quality_from_metadata(metadata)
|
|
611
|
+
return 0 if metadata.nil? || metadata.empty?
|
|
612
|
+
|
|
613
|
+
score = 0
|
|
614
|
+
|
|
615
|
+
# All keys are strings (no defensive checks needed)
|
|
616
|
+
score += 20 if metadata["type"]
|
|
617
|
+
score += 20 if metadata["confidence"]
|
|
618
|
+
score += 15 unless (metadata["tags"] || []).empty?
|
|
619
|
+
score += 15 unless (metadata["related"] || []).empty?
|
|
620
|
+
score += 10 if metadata["domain"]
|
|
621
|
+
score += 10 if metadata["last_verified"]
|
|
622
|
+
score += 10 if metadata["confidence"] == "high"
|
|
623
|
+
|
|
624
|
+
score
|
|
625
|
+
end
|
|
626
|
+
|
|
627
|
+
# ============================================================================
|
|
628
|
+
# HELPER METHODS FOR ACTIVE OPERATIONS
|
|
629
|
+
# ============================================================================
|
|
630
|
+
|
|
631
|
+
# Create bidirectional links between two entries
|
|
632
|
+
#
|
|
633
|
+
# Updates the 'related' metadata arrays to cross-reference entries.
|
|
634
|
+
#
|
|
635
|
+
# @param path1 [String] First entry path
|
|
636
|
+
# @param path2 [String] Second entry path
|
|
637
|
+
# @param already_linked_1_to_2 [Boolean] If entry1 already links to entry2
|
|
638
|
+
# @param already_linked_2_to_1 [Boolean] If entry2 already links to entry1
|
|
639
|
+
# @return [Integer] Number of links created (0-2)
|
|
640
|
+
def create_bidirectional_links(path1, path2, already_linked_1_to_2, already_linked_2_to_1)
|
|
641
|
+
links_created = 0
|
|
642
|
+
all_entries = @adapter.all_entries
|
|
643
|
+
|
|
644
|
+
# Add path2 to entry1's related array (if not already there)
|
|
645
|
+
unless already_linked_1_to_2
|
|
646
|
+
entry1 = all_entries[path1]
|
|
647
|
+
related_array = entry1.metadata["related"] || []
|
|
648
|
+
related_array << "memory://#{path2}"
|
|
649
|
+
|
|
650
|
+
# Update entry1
|
|
651
|
+
metadata = entry1.metadata.dup
|
|
652
|
+
metadata["related"] = related_array.uniq
|
|
653
|
+
|
|
654
|
+
@adapter.write(
|
|
655
|
+
file_path: path1,
|
|
656
|
+
content: entry1.content,
|
|
657
|
+
title: entry1.title,
|
|
658
|
+
embedding: entry1.embedding,
|
|
659
|
+
metadata: metadata,
|
|
660
|
+
)
|
|
661
|
+
|
|
662
|
+
links_created += 1
|
|
663
|
+
end
|
|
664
|
+
|
|
665
|
+
# Add path1 to entry2's related array (if not already there)
|
|
666
|
+
unless already_linked_2_to_1
|
|
667
|
+
entry2 = all_entries[path2]
|
|
668
|
+
related_array = entry2.metadata["related"] || []
|
|
669
|
+
related_array << "memory://#{path1}"
|
|
670
|
+
|
|
671
|
+
# Update entry2
|
|
672
|
+
metadata = entry2.metadata.dup
|
|
673
|
+
metadata["related"] = related_array.uniq
|
|
674
|
+
|
|
675
|
+
@adapter.write(
|
|
676
|
+
file_path: path2,
|
|
677
|
+
content: entry2.content,
|
|
678
|
+
title: entry2.title,
|
|
679
|
+
embedding: entry2.embedding,
|
|
680
|
+
metadata: metadata,
|
|
681
|
+
)
|
|
682
|
+
|
|
683
|
+
links_created += 1
|
|
684
|
+
end
|
|
685
|
+
|
|
686
|
+
links_created
|
|
687
|
+
end
|
|
688
|
+
|
|
689
|
+
# Merge a pair of duplicate entries
|
|
690
|
+
#
|
|
691
|
+
# @param pair [Hash] Duplicate pair info
|
|
692
|
+
# @param strategy [Symbol] Merge strategy
|
|
693
|
+
# @return [Hash] Result info with :kept_path, :merged_path, :freed_bytes
|
|
694
|
+
def merge_pair(pair, strategy:)
|
|
695
|
+
entry1 = @adapter.read_entry(file_path: pair[:path1])
|
|
696
|
+
entry2 = @adapter.read_entry(file_path: pair[:path2])
|
|
697
|
+
|
|
698
|
+
# Decide which to keep and which to merge
|
|
699
|
+
keep_path, merge_path, keep_entry, merge_entry = case strategy
|
|
700
|
+
when :keep_newer
|
|
701
|
+
if entry1.updated_at > entry2.updated_at
|
|
702
|
+
[pair[:path1], pair[:path2], entry1, entry2]
|
|
703
|
+
else
|
|
704
|
+
[pair[:path2], pair[:path1], entry2, entry1]
|
|
705
|
+
end
|
|
706
|
+
when :keep_larger
|
|
707
|
+
if entry1.size > entry2.size
|
|
708
|
+
[pair[:path1], pair[:path2], entry1, entry2]
|
|
709
|
+
else
|
|
710
|
+
[pair[:path2], pair[:path1], entry2, entry1]
|
|
711
|
+
end
|
|
712
|
+
when :combine
|
|
713
|
+
# Keep path1, merge content from path2
|
|
714
|
+
[pair[:path1], pair[:path2], entry1, entry2]
|
|
715
|
+
else
|
|
716
|
+
[pair[:path1], pair[:path2], entry1, entry2]
|
|
717
|
+
end
|
|
718
|
+
|
|
719
|
+
# Merge content if combining
|
|
720
|
+
if strategy == :combine
|
|
721
|
+
merged_content = combine_contents(keep_entry.content, merge_entry.content)
|
|
722
|
+
merged_metadata = combine_metadata(keep_entry.metadata, merge_entry.metadata)
|
|
723
|
+
|
|
724
|
+
@adapter.write(
|
|
725
|
+
file_path: keep_path,
|
|
726
|
+
content: merged_content,
|
|
727
|
+
title: keep_entry.title,
|
|
728
|
+
embedding: keep_entry.embedding,
|
|
729
|
+
metadata: merged_metadata,
|
|
730
|
+
)
|
|
731
|
+
end
|
|
732
|
+
|
|
733
|
+
# Create stub at merged location
|
|
734
|
+
create_stub(from: merge_path, to: keep_path, reason: "merged")
|
|
735
|
+
|
|
736
|
+
# Return result info
|
|
737
|
+
{
|
|
738
|
+
kept_path: keep_path,
|
|
739
|
+
merged_path: merge_path,
|
|
740
|
+
freed_bytes: merge_entry.size,
|
|
741
|
+
}
|
|
742
|
+
end
|
|
743
|
+
|
|
744
|
+
# Create a stub (redirect) file
|
|
745
|
+
#
|
|
746
|
+
# @param from [String] Original path
|
|
747
|
+
# @param to [String] Target path
|
|
748
|
+
# @param reason [String] Reason (merged, moved)
|
|
749
|
+
# @return [void]
|
|
750
|
+
def create_stub(from:, to:, reason:)
|
|
751
|
+
stub_content = "# #{reason} → #{to}\n\nThis entry was #{reason} into #{to}."
|
|
752
|
+
|
|
753
|
+
@adapter.write(
|
|
754
|
+
file_path: from,
|
|
755
|
+
content: stub_content,
|
|
756
|
+
title: "[STUB] → #{to}",
|
|
757
|
+
metadata: { "stub" => true, "redirect_to" => to, "reason" => reason },
|
|
758
|
+
)
|
|
759
|
+
end
|
|
760
|
+
|
|
761
|
+
# Find stubs that can be cleaned up
|
|
762
|
+
#
|
|
763
|
+
# @param min_age_days [Integer] Minimum age
|
|
764
|
+
# @param max_hits [Integer] Maximum hits
|
|
765
|
+
# @return [Array<Hash>] Stub info
|
|
766
|
+
def find_stubs_to_cleanup(min_age_days:, max_hits:)
|
|
767
|
+
stubs = []
|
|
768
|
+
Time.now
|
|
769
|
+
|
|
770
|
+
@adapter.list.each do |entry_info|
|
|
771
|
+
entry = @adapter.read_entry(file_path: entry_info[:path])
|
|
772
|
+
|
|
773
|
+
# Check if it's a stub
|
|
774
|
+
next unless entry.content.start_with?("# merged →", "# moved →")
|
|
775
|
+
|
|
776
|
+
age_days = ((Time.now - entry.updated_at) / 86400).round
|
|
777
|
+
hits = entry.metadata&.dig("hits") || 0
|
|
778
|
+
|
|
779
|
+
next if age_days < min_age_days || hits > max_hits
|
|
780
|
+
|
|
781
|
+
stubs << {
|
|
782
|
+
path: entry_info[:path],
|
|
783
|
+
age_days: age_days,
|
|
784
|
+
hits: hits,
|
|
785
|
+
size: entry.size,
|
|
786
|
+
}
|
|
787
|
+
end
|
|
788
|
+
|
|
789
|
+
stubs
|
|
790
|
+
end
|
|
791
|
+
|
|
792
|
+
# Combine contents from two entries
|
|
793
|
+
#
|
|
794
|
+
# @param content1 [String] First content
|
|
795
|
+
# @param content2 [String] Second content
|
|
796
|
+
# @return [String] Combined content
|
|
797
|
+
def combine_contents(content1, content2)
|
|
798
|
+
# Simple concatenation with separator
|
|
799
|
+
# TODO: Could be smarter (LLM-based merge)
|
|
800
|
+
"#{content1}\n\n---\n\n#{content2}"
|
|
801
|
+
end
|
|
802
|
+
|
|
803
|
+
# Combine metadata from two entries
|
|
804
|
+
#
|
|
805
|
+
# @param metadata1 [Hash] First metadata
|
|
806
|
+
# @param metadata2 [Hash] Second metadata
|
|
807
|
+
# @return [Hash] Combined metadata
|
|
808
|
+
def combine_metadata(metadata1, metadata2)
|
|
809
|
+
return metadata2 if metadata1.nil?
|
|
810
|
+
return metadata1 if metadata2.nil?
|
|
811
|
+
|
|
812
|
+
# Merge tags and related links
|
|
813
|
+
combined = metadata1.dup
|
|
814
|
+
combined["tags"] = ((metadata1["tags"] || []) + (metadata2["tags"] || [])).uniq
|
|
815
|
+
combined["related"] = ((metadata1["related"] || []) + (metadata2["related"] || [])).uniq
|
|
816
|
+
|
|
817
|
+
combined
|
|
818
|
+
end
|
|
819
|
+
|
|
820
|
+
# Format merge operation report
|
|
821
|
+
#
|
|
822
|
+
# @param results [Array<String>] Result messages
|
|
823
|
+
# @param count [Integer] Number of merges
|
|
824
|
+
# @param freed_bytes [Integer] Bytes freed
|
|
825
|
+
# @param dry_run [Boolean] Dry run mode
|
|
826
|
+
# @return [String] Formatted report
|
|
827
|
+
def format_merge_report(results, count, freed_bytes, dry_run)
|
|
828
|
+
report = []
|
|
829
|
+
header = dry_run ? "Found #{count} duplicate pair(s) to merge:" : "Merged #{count} duplicate pair(s):"
|
|
830
|
+
report << header
|
|
831
|
+
report << ""
|
|
832
|
+
results.each { |r| report << r }
|
|
833
|
+
report << ""
|
|
834
|
+
report << "Space freed: #{format_bytes(freed_bytes)}" unless dry_run
|
|
835
|
+
report.join("\n")
|
|
836
|
+
end
|
|
837
|
+
|
|
838
|
+
# Format cleanup report
|
|
839
|
+
def format_cleanup_report(results, count, freed_bytes, dry_run)
|
|
840
|
+
report = []
|
|
841
|
+
header = dry_run ? "Found #{count} stub(s) to clean up:" : "Cleaned up #{count} stub(s):"
|
|
842
|
+
report << header
|
|
843
|
+
report << ""
|
|
844
|
+
results.each { |r| report << r }
|
|
845
|
+
report << ""
|
|
846
|
+
report << "Space freed: #{format_bytes(freed_bytes)}" unless dry_run
|
|
847
|
+
report.join("\n")
|
|
848
|
+
end
|
|
849
|
+
|
|
850
|
+
# Format compact report
|
|
851
|
+
def format_compact_report(results, count, freed_bytes, dry_run)
|
|
852
|
+
report = []
|
|
853
|
+
header = dry_run ? "Found #{count} low-value entry/entries to delete:" : "Deleted #{count} low-value entry/entries:"
|
|
854
|
+
report << header
|
|
855
|
+
report << ""
|
|
856
|
+
results.each { |r| report << r }
|
|
857
|
+
report << ""
|
|
858
|
+
report << "Space freed: #{format_bytes(freed_bytes)}" unless dry_run
|
|
859
|
+
report.join("\n")
|
|
860
|
+
end
|
|
861
|
+
end
|
|
862
|
+
end
|
|
863
|
+
end
|