claude_swarm 1.0.0 → 1.0.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/commands/release.md +1 -1
  3. data/.claude/hooks/lint-code-files.rb +65 -0
  4. data/.rubocop.yml +22 -2
  5. data/CHANGELOG.md +21 -1
  6. data/CLAUDE.md +1 -1
  7. data/CONTRIBUTING.md +69 -0
  8. data/README.md +27 -2
  9. data/Rakefile +71 -3
  10. data/analyze_coverage.rb +94 -0
  11. data/docs/v2/CHANGELOG.swarm_cli.md +43 -0
  12. data/docs/v2/CHANGELOG.swarm_memory.md +379 -0
  13. data/docs/v2/CHANGELOG.swarm_sdk.md +362 -0
  14. data/docs/v2/README.md +308 -0
  15. data/docs/v2/guides/claude-code-agents.md +262 -0
  16. data/docs/v2/guides/complete-tutorial.md +3088 -0
  17. data/docs/v2/guides/getting-started.md +1456 -0
  18. data/docs/v2/guides/memory-adapters.md +998 -0
  19. data/docs/v2/guides/plugins.md +816 -0
  20. data/docs/v2/guides/quick-start-cli.md +1745 -0
  21. data/docs/v2/guides/rails-integration.md +1902 -0
  22. data/docs/v2/guides/swarm-memory.md +599 -0
  23. data/docs/v2/reference/cli.md +729 -0
  24. data/docs/v2/reference/ruby-dsl.md +2154 -0
  25. data/docs/v2/reference/yaml.md +1835 -0
  26. data/docs-team-swarm.yml +2222 -0
  27. data/examples/learning-assistant/assistant.md +7 -0
  28. data/examples/learning-assistant/example-memories/concept-example.md +90 -0
  29. data/examples/learning-assistant/example-memories/experience-example.md +66 -0
  30. data/examples/learning-assistant/example-memories/fact-example.md +76 -0
  31. data/examples/learning-assistant/example-memories/memory-index.md +78 -0
  32. data/examples/learning-assistant/example-memories/skill-example.md +168 -0
  33. data/examples/learning-assistant/learning_assistant.rb +34 -0
  34. data/examples/learning-assistant/learning_assistant.yml +20 -0
  35. data/examples/v2/dsl/01_basic.rb +44 -0
  36. data/examples/v2/dsl/02_core_parameters.rb +59 -0
  37. data/examples/v2/dsl/03_capabilities.rb +71 -0
  38. data/examples/v2/dsl/04_llm_parameters.rb +56 -0
  39. data/examples/v2/dsl/05_advanced_flags.rb +73 -0
  40. data/examples/v2/dsl/06_permissions.rb +80 -0
  41. data/examples/v2/dsl/07_mcp_server.rb +62 -0
  42. data/examples/v2/dsl/08_swarm_hooks.rb +53 -0
  43. data/examples/v2/dsl/09_agent_hooks.rb +67 -0
  44. data/examples/v2/dsl/10_all_agents_hooks.rb +67 -0
  45. data/examples/v2/dsl/11_delegation.rb +60 -0
  46. data/examples/v2/dsl/12_complete_integration.rb +137 -0
  47. data/examples/v2/file_tools_swarm.yml +102 -0
  48. data/examples/v2/hooks/01_basic_hooks.rb +133 -0
  49. data/examples/v2/hooks/02_usage_tracking.rb +201 -0
  50. data/examples/v2/hooks/03_production_monitoring.rb +429 -0
  51. data/examples/v2/hooks/agent_stop_exit_0.yml +21 -0
  52. data/examples/v2/hooks/agent_stop_exit_1.yml +21 -0
  53. data/examples/v2/hooks/agent_stop_exit_2.yml +26 -0
  54. data/examples/v2/hooks/multiple_hooks_all_pass.yml +37 -0
  55. data/examples/v2/hooks/multiple_hooks_first_fails.yml +37 -0
  56. data/examples/v2/hooks/multiple_hooks_second_fails.yml +37 -0
  57. data/examples/v2/hooks/multiple_hooks_warnings.yml +37 -0
  58. data/examples/v2/hooks/post_tool_use_exit_0.yml +24 -0
  59. data/examples/v2/hooks/post_tool_use_exit_1.yml +24 -0
  60. data/examples/v2/hooks/post_tool_use_exit_2.yml +24 -0
  61. data/examples/v2/hooks/post_tool_use_multi_matcher_exit_0.yml +26 -0
  62. data/examples/v2/hooks/post_tool_use_multi_matcher_exit_1.yml +26 -0
  63. data/examples/v2/hooks/post_tool_use_multi_matcher_exit_2.yml +26 -0
  64. data/examples/v2/hooks/pre_tool_use_exit_0.yml +24 -0
  65. data/examples/v2/hooks/pre_tool_use_exit_1.yml +24 -0
  66. data/examples/v2/hooks/pre_tool_use_exit_2.yml +24 -0
  67. data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_0.yml +26 -0
  68. data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_1.yml +26 -0
  69. data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_2.yml +27 -0
  70. data/examples/v2/hooks/swarm_summary.sh +44 -0
  71. data/examples/v2/hooks/user_prompt_exit_0.yml +21 -0
  72. data/examples/v2/hooks/user_prompt_exit_1.yml +21 -0
  73. data/examples/v2/hooks/user_prompt_exit_2.yml +21 -0
  74. data/examples/v2/hooks/validate_bash.rb +59 -0
  75. data/examples/v2/multi_directory_permissions.yml +221 -0
  76. data/examples/v2/node_context_demo.rb +127 -0
  77. data/examples/v2/node_workflow.rb +173 -0
  78. data/examples/v2/path_resolution_demo.rb +216 -0
  79. data/examples/v2/simple-swarm-v2.rb +90 -0
  80. data/examples/v2/simple-swarm-v2.yml +62 -0
  81. data/examples/v2/swarm.yml +71 -0
  82. data/examples/v2/swarm_with_hooks.yml +61 -0
  83. data/examples/v2/swarm_with_hooks_simple.yml +25 -0
  84. data/examples/v2/think_tool_demo.rb +62 -0
  85. data/exe/swarm +6 -0
  86. data/lib/claude_swarm/claude_mcp_server.rb +0 -6
  87. data/lib/claude_swarm/cli.rb +10 -3
  88. data/lib/claude_swarm/commands/ps.rb +19 -20
  89. data/lib/claude_swarm/commands/show.rb +1 -1
  90. data/lib/claude_swarm/configuration.rb +10 -12
  91. data/lib/claude_swarm/mcp_generator.rb +10 -1
  92. data/lib/claude_swarm/orchestrator.rb +73 -49
  93. data/lib/claude_swarm/system_utils.rb +37 -11
  94. data/lib/claude_swarm/version.rb +1 -1
  95. data/lib/claude_swarm/worktree_manager.rb +1 -0
  96. data/lib/claude_swarm/yaml_loader.rb +22 -0
  97. data/lib/claude_swarm.rb +7 -3
  98. data/lib/swarm_cli/cli.rb +201 -0
  99. data/lib/swarm_cli/command_registry.rb +61 -0
  100. data/lib/swarm_cli/commands/mcp_serve.rb +130 -0
  101. data/lib/swarm_cli/commands/mcp_tools.rb +148 -0
  102. data/lib/swarm_cli/commands/migrate.rb +55 -0
  103. data/lib/swarm_cli/commands/run.rb +173 -0
  104. data/lib/swarm_cli/config_loader.rb +97 -0
  105. data/lib/swarm_cli/formatters/human_formatter.rb +711 -0
  106. data/lib/swarm_cli/formatters/json_formatter.rb +51 -0
  107. data/lib/swarm_cli/interactive_repl.rb +918 -0
  108. data/lib/swarm_cli/mcp_serve_options.rb +44 -0
  109. data/lib/swarm_cli/mcp_tools_options.rb +59 -0
  110. data/lib/swarm_cli/migrate_options.rb +54 -0
  111. data/lib/swarm_cli/migrator.rb +132 -0
  112. data/lib/swarm_cli/options.rb +151 -0
  113. data/lib/swarm_cli/ui/components/agent_badge.rb +33 -0
  114. data/lib/swarm_cli/ui/components/content_block.rb +120 -0
  115. data/lib/swarm_cli/ui/components/divider.rb +57 -0
  116. data/lib/swarm_cli/ui/components/panel.rb +62 -0
  117. data/lib/swarm_cli/ui/components/usage_stats.rb +70 -0
  118. data/lib/swarm_cli/ui/formatters/cost.rb +49 -0
  119. data/lib/swarm_cli/ui/formatters/number.rb +58 -0
  120. data/lib/swarm_cli/ui/formatters/text.rb +77 -0
  121. data/lib/swarm_cli/ui/formatters/time.rb +73 -0
  122. data/lib/swarm_cli/ui/icons.rb +59 -0
  123. data/lib/swarm_cli/ui/renderers/event_renderer.rb +188 -0
  124. data/lib/swarm_cli/ui/state/agent_color_cache.rb +45 -0
  125. data/lib/swarm_cli/ui/state/depth_tracker.rb +40 -0
  126. data/lib/swarm_cli/ui/state/spinner_manager.rb +170 -0
  127. data/lib/swarm_cli/ui/state/usage_tracker.rb +62 -0
  128. data/lib/swarm_cli/version.rb +5 -0
  129. data/lib/swarm_cli.rb +44 -0
  130. data/lib/swarm_memory/adapters/base.rb +141 -0
  131. data/lib/swarm_memory/adapters/filesystem_adapter.rb +845 -0
  132. data/lib/swarm_memory/chat_extension.rb +34 -0
  133. data/lib/swarm_memory/cli/commands.rb +306 -0
  134. data/lib/swarm_memory/core/entry.rb +37 -0
  135. data/lib/swarm_memory/core/frontmatter_parser.rb +108 -0
  136. data/lib/swarm_memory/core/metadata_extractor.rb +68 -0
  137. data/lib/swarm_memory/core/path_normalizer.rb +75 -0
  138. data/lib/swarm_memory/core/semantic_index.rb +244 -0
  139. data/lib/swarm_memory/core/storage.rb +288 -0
  140. data/lib/swarm_memory/core/storage_read_tracker.rb +63 -0
  141. data/lib/swarm_memory/dsl/builder_extension.rb +40 -0
  142. data/lib/swarm_memory/dsl/memory_config.rb +113 -0
  143. data/lib/swarm_memory/embeddings/embedder.rb +36 -0
  144. data/lib/swarm_memory/embeddings/informers_embedder.rb +152 -0
  145. data/lib/swarm_memory/errors.rb +21 -0
  146. data/lib/swarm_memory/integration/cli_registration.rb +30 -0
  147. data/lib/swarm_memory/integration/configuration.rb +43 -0
  148. data/lib/swarm_memory/integration/registration.rb +31 -0
  149. data/lib/swarm_memory/integration/sdk_plugin.rb +531 -0
  150. data/lib/swarm_memory/optimization/analyzer.rb +244 -0
  151. data/lib/swarm_memory/optimization/defragmenter.rb +863 -0
  152. data/lib/swarm_memory/prompts/memory.md.erb +109 -0
  153. data/lib/swarm_memory/prompts/memory_assistant.md.erb +181 -0
  154. data/lib/swarm_memory/prompts/memory_researcher.md.erb +281 -0
  155. data/lib/swarm_memory/prompts/memory_retrieval.md.erb +78 -0
  156. data/lib/swarm_memory/search/semantic_search.rb +112 -0
  157. data/lib/swarm_memory/search/text_search.rb +42 -0
  158. data/lib/swarm_memory/search/text_similarity.rb +80 -0
  159. data/lib/swarm_memory/skills/meta/deep-learning.md +101 -0
  160. data/lib/swarm_memory/skills/meta/deep-learning.yml +14 -0
  161. data/lib/swarm_memory/tools/load_skill.rb +313 -0
  162. data/lib/swarm_memory/tools/memory_defrag.rb +382 -0
  163. data/lib/swarm_memory/tools/memory_delete.rb +99 -0
  164. data/lib/swarm_memory/tools/memory_edit.rb +185 -0
  165. data/lib/swarm_memory/tools/memory_glob.rb +160 -0
  166. data/lib/swarm_memory/tools/memory_grep.rb +247 -0
  167. data/lib/swarm_memory/tools/memory_multi_edit.rb +281 -0
  168. data/lib/swarm_memory/tools/memory_read.rb +123 -0
  169. data/lib/swarm_memory/tools/memory_write.rb +231 -0
  170. data/lib/swarm_memory/utils.rb +50 -0
  171. data/lib/swarm_memory/version.rb +5 -0
  172. data/lib/swarm_memory.rb +166 -0
  173. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +127 -0
  174. data/lib/swarm_sdk/agent/builder.rb +461 -0
  175. data/lib/swarm_sdk/agent/chat/context_tracker.rb +314 -0
  176. data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
  177. data/lib/swarm_sdk/agent/chat/logging_helpers.rb +116 -0
  178. data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +152 -0
  179. data/lib/swarm_sdk/agent/chat.rb +1159 -0
  180. data/lib/swarm_sdk/agent/context.rb +112 -0
  181. data/lib/swarm_sdk/agent/context_manager.rb +309 -0
  182. data/lib/swarm_sdk/agent/definition.rb +556 -0
  183. data/lib/swarm_sdk/claude_code_agent_adapter.rb +205 -0
  184. data/lib/swarm_sdk/configuration.rb +296 -0
  185. data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
  186. data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
  187. data/lib/swarm_sdk/context_compactor.rb +340 -0
  188. data/lib/swarm_sdk/hooks/adapter.rb +359 -0
  189. data/lib/swarm_sdk/hooks/context.rb +197 -0
  190. data/lib/swarm_sdk/hooks/definition.rb +80 -0
  191. data/lib/swarm_sdk/hooks/error.rb +29 -0
  192. data/lib/swarm_sdk/hooks/executor.rb +146 -0
  193. data/lib/swarm_sdk/hooks/registry.rb +147 -0
  194. data/lib/swarm_sdk/hooks/result.rb +150 -0
  195. data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
  196. data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
  197. data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
  198. data/lib/swarm_sdk/log_collector.rb +51 -0
  199. data/lib/swarm_sdk/log_stream.rb +69 -0
  200. data/lib/swarm_sdk/markdown_parser.rb +75 -0
  201. data/lib/swarm_sdk/model_aliases.json +5 -0
  202. data/lib/swarm_sdk/models.json +1 -0
  203. data/lib/swarm_sdk/models.rb +120 -0
  204. data/lib/swarm_sdk/node/agent_config.rb +49 -0
  205. data/lib/swarm_sdk/node/builder.rb +439 -0
  206. data/lib/swarm_sdk/node/transformer_executor.rb +248 -0
  207. data/lib/swarm_sdk/node_context.rb +170 -0
  208. data/lib/swarm_sdk/node_orchestrator.rb +384 -0
  209. data/lib/swarm_sdk/permissions/config.rb +239 -0
  210. data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
  211. data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
  212. data/lib/swarm_sdk/permissions/validator.rb +173 -0
  213. data/lib/swarm_sdk/permissions_builder.rb +122 -0
  214. data/lib/swarm_sdk/plugin.rb +147 -0
  215. data/lib/swarm_sdk/plugin_registry.rb +101 -0
  216. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +243 -0
  217. data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
  218. data/lib/swarm_sdk/result.rb +97 -0
  219. data/lib/swarm_sdk/swarm/agent_initializer.rb +334 -0
  220. data/lib/swarm_sdk/swarm/all_agents_builder.rb +140 -0
  221. data/lib/swarm_sdk/swarm/builder.rb +586 -0
  222. data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
  223. data/lib/swarm_sdk/swarm/tool_configurator.rb +419 -0
  224. data/lib/swarm_sdk/swarm.rb +982 -0
  225. data/lib/swarm_sdk/tools/bash.rb +274 -0
  226. data/lib/swarm_sdk/tools/clock.rb +44 -0
  227. data/lib/swarm_sdk/tools/delegate.rb +164 -0
  228. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
  229. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
  230. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +101 -0
  231. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
  232. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
  233. data/lib/swarm_sdk/tools/edit.rb +150 -0
  234. data/lib/swarm_sdk/tools/glob.rb +158 -0
  235. data/lib/swarm_sdk/tools/grep.rb +228 -0
  236. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
  237. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
  238. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
  239. data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
  240. data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
  241. data/lib/swarm_sdk/tools/read.rb +251 -0
  242. data/lib/swarm_sdk/tools/registry.rb +93 -0
  243. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +96 -0
  244. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +76 -0
  245. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +91 -0
  246. data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
  247. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +224 -0
  248. data/lib/swarm_sdk/tools/stores/storage.rb +148 -0
  249. data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
  250. data/lib/swarm_sdk/tools/think.rb +95 -0
  251. data/lib/swarm_sdk/tools/todo_write.rb +216 -0
  252. data/lib/swarm_sdk/tools/web_fetch.rb +261 -0
  253. data/lib/swarm_sdk/tools/write.rb +117 -0
  254. data/lib/swarm_sdk/utils.rb +50 -0
  255. data/lib/swarm_sdk/version.rb +5 -0
  256. data/lib/swarm_sdk.rb +157 -0
  257. data/llm.v2.txt +13407 -0
  258. data/rubocop/cop/security/no_reflection_methods.rb +47 -0
  259. data/rubocop/cop/security/no_ruby_llm_logger.rb +32 -0
  260. data/swarm_cli.gemspec +57 -0
  261. data/swarm_memory.gemspec +28 -0
  262. data/swarm_sdk.gemspec +41 -0
  263. data/team.yml +1 -1
  264. data/team_full.yml +1875 -0
  265. data/{team_v2.yml → team_sdk.yml} +121 -52
  266. metadata +247 -4
  267. data/EXAMPLES.md +0 -164
@@ -0,0 +1,863 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmMemory
4
+ module Optimization
5
+ # Defragments memory by finding duplicates, low-quality entries, and archival candidates
6
+ #
7
+ # This class analyzes memory and suggests optimizations without making changes.
8
+ # Agents must manually review and act on suggestions.
9
+ class Defragmenter
10
+ # Initialize defragmenter
11
+ #
12
+ # @param adapter [Adapters::Base] Storage adapter
13
+ # @param embedder [Embeddings::Embedder, nil] Optional embedder for semantic duplicate detection
14
+ def initialize(adapter:, embedder: nil)
15
+ @adapter = adapter
16
+ @embedder = embedder
17
+ @analyzer = Analyzer.new(adapter: adapter)
18
+ end
19
+
20
+ # Generate health report
21
+ #
22
+ # @return [String] Formatted health report
23
+ def health_report
24
+ @analyzer.health_report
25
+ end
26
+
27
+ # Run full analysis (all operations)
28
+ #
29
+ # @param similarity_threshold [Float] Threshold for duplicate detection
30
+ # @param age_days [Integer] Age threshold for archival
31
+ # @param confidence_filter [String] Confidence level filter
32
+ # @return [String] Complete analysis report
33
+ def full_analysis(similarity_threshold: 0.85, age_days: 90, confidence_filter: "low")
34
+ report = []
35
+ report << "# Full Memory Defrag Analysis\n"
36
+ report << @analyzer.health_report
37
+ report << "\n---\n"
38
+ report << find_duplicates_report(threshold: similarity_threshold)
39
+ report << "\n---\n"
40
+ report << find_low_quality_report(confidence_filter: confidence_filter)
41
+ report << "\n---\n"
42
+ report << find_archival_candidates_report(age_days: age_days)
43
+
44
+ report.join("\n")
45
+ end
46
+
47
+ # Find potential duplicate entries
48
+ #
49
+ # Uses both text similarity (Jaccard) and semantic similarity (embeddings)
50
+ # to find entries that could be merged.
51
+ #
52
+ # @param threshold [Float] Similarity threshold (0.0-1.0)
53
+ # @return [Array<Hash>] Duplicate pairs with similarity scores
54
+ def find_duplicates(threshold: 0.85)
55
+ entries = @adapter.list
56
+ return [] if entries.size < 2
57
+
58
+ duplicates = []
59
+ all_entries = @adapter.all_entries
60
+
61
+ # Compare all pairs
62
+ entry_paths = entries.map { |e| e[:path] }
63
+ entry_paths.combination(2).each do |path1, path2|
64
+ entry1 = all_entries[path1]
65
+ entry2 = all_entries[path2]
66
+
67
+ # Calculate text similarity (always available)
68
+ text_sim = Search::TextSimilarity.jaccard(entry1.content, entry2.content)
69
+
70
+ # Calculate semantic similarity if embeddings available
71
+ semantic_sim = if entry1.embedded? && entry2.embedded?
72
+ Search::TextSimilarity.cosine(entry1.embedding, entry2.embedding)
73
+ end
74
+
75
+ # Use highest similarity score
76
+ similarity = [text_sim, semantic_sim].compact.max
77
+
78
+ next if similarity < threshold
79
+
80
+ duplicates << {
81
+ path1: path1,
82
+ path2: path2,
83
+ similarity: (similarity * 100).round(1),
84
+ text_similarity: (text_sim * 100).round(1),
85
+ semantic_similarity: semantic_sim ? (semantic_sim * 100).round(1) : nil,
86
+ title1: entry1.title,
87
+ title2: entry2.title,
88
+ size1: entry1.size,
89
+ size2: entry2.size,
90
+ }
91
+ end
92
+
93
+ duplicates.sort_by { |d| -d[:similarity] }
94
+ end
95
+
96
+ # Find low-quality entries
97
+ #
98
+ # @param confidence_filter [String] Filter level ("low", "medium", "high")
99
+ # @return [Array<Hash>] Low-quality entries with issues
100
+ def find_low_quality(confidence_filter: "low")
101
+ entries = @adapter.list
102
+ low_quality = []
103
+
104
+ entries.each do |entry_info|
105
+ entry = @adapter.read_entry(file_path: entry_info[:path])
106
+
107
+ # Get metadata from entry (always has string keys from .yml file)
108
+ metadata = entry.metadata || {}
109
+
110
+ # Calculate quality score from metadata
111
+ quality = calculate_quality_from_metadata(metadata)
112
+
113
+ # Check for issues (all keys are strings)
114
+ issues = []
115
+ issues << "No metadata" if metadata.empty?
116
+ issues << "Confidence: #{metadata["confidence"]}" if should_flag_confidence?(metadata["confidence"], confidence_filter)
117
+ issues << "No type specified" if metadata["type"].nil?
118
+ issues << "No tags" if (metadata["tags"] || []).empty?
119
+ issues << "No related links" if (metadata["related"] || []).empty?
120
+ issues << "Not embedded" if !entry.embedded? && @embedder
121
+
122
+ next if issues.empty?
123
+
124
+ low_quality << {
125
+ path: entry_info[:path],
126
+ title: entry.title,
127
+ issues: issues,
128
+ confidence: metadata["confidence"] || "unknown",
129
+ quality_score: quality,
130
+ }
131
+ end
132
+
133
+ low_quality.sort_by { |e| e[:quality_score] }
134
+ end
135
+
136
+ # Find entries that could be archived (old and unused)
137
+ #
138
+ # @param age_days [Integer] Minimum age in days
139
+ # @return [Array<Hash>] Archival candidates
140
+ def find_archival_candidates(age_days: 90)
141
+ entries = @adapter.list
142
+ cutoff_date = Time.now - (age_days * 24 * 60 * 60)
143
+
144
+ candidates = entries.select do |entry_info|
145
+ entry_info[:updated_at] < cutoff_date
146
+ end
147
+
148
+ candidates.map do |entry_info|
149
+ entry = @adapter.read_entry(file_path: entry_info[:path])
150
+ metadata = entry.metadata || {}
151
+
152
+ {
153
+ path: entry_info[:path],
154
+ title: entry.title,
155
+ age_days: ((Time.now - entry_info[:updated_at]) / 86400).round,
156
+ last_verified: metadata["last_verified"],
157
+ confidence: metadata["confidence"] || "unknown",
158
+ size: entry.size,
159
+ }
160
+ end.sort_by { |e| -e[:age_days] }
161
+ end
162
+
163
+ # Generate formatted report for duplicates
164
+ #
165
+ # @param threshold [Float] Similarity threshold
166
+ # @return [String] Formatted report
167
+ def find_duplicates_report(threshold: 0.85)
168
+ duplicates = find_duplicates(threshold: threshold)
169
+
170
+ return "No duplicate entries found above #{(threshold * 100).round}% similarity." if duplicates.empty?
171
+
172
+ report = []
173
+ report << "# Potential Duplicates (#{duplicates.size} pairs)"
174
+ report << ""
175
+ report << "Found #{duplicates.size} pair(s) of similar entries that could potentially be merged."
176
+ report << ""
177
+
178
+ duplicates.each_with_index do |dup, index|
179
+ report << "## Pair #{index + 1}: #{dup[:similarity]}% similar"
180
+ report << "- memory://#{dup[:path1]}"
181
+ report << " Title: \"#{dup[:title1]}\""
182
+ report << " Size: #{format_bytes(dup[:size1])}"
183
+ report << "- memory://#{dup[:path2]}"
184
+ report << " Title: \"#{dup[:title2]}\""
185
+ report << " Size: #{format_bytes(dup[:size2])}"
186
+ report << ""
187
+ report << " Text similarity: #{dup[:text_similarity]}%"
188
+ report << if dup[:semantic_similarity]
189
+ " Semantic similarity: #{dup[:semantic_similarity]}%"
190
+ else
191
+ " Semantic similarity: N/A (no embeddings)"
192
+ end
193
+ report << ""
194
+ report << " **Suggestion:** Review both entries and consider merging with MemoryMultiEdit"
195
+ report << ""
196
+ end
197
+
198
+ report.join("\n")
199
+ end
200
+
201
+ # Generate formatted report for low-quality entries
202
+ #
203
+ # @param confidence_filter [String] Confidence level filter
204
+ # @return [String] Formatted report
205
+ def find_low_quality_report(confidence_filter: "low")
206
+ entries = find_low_quality(confidence_filter: confidence_filter)
207
+
208
+ return "No low-quality entries found." if entries.empty?
209
+
210
+ report = []
211
+ report << "# Low-Quality Entries (#{entries.size} entries)"
212
+ report << ""
213
+ report << "Found #{entries.size} entry/entries with quality issues."
214
+ report << ""
215
+
216
+ entries.each do |entry|
217
+ report << "## memory://#{entry[:path]}"
218
+ report << "- Title: #{entry[:title]}"
219
+ report << "- Quality score: #{entry[:quality_score]}/100"
220
+ report << "- Confidence: #{entry[:confidence]}"
221
+ report << "- Issues:"
222
+ entry[:issues].each do |issue|
223
+ report << " - #{issue}"
224
+ end
225
+ report << ""
226
+ report << " **Suggestion:** Add proper frontmatter and metadata with MemoryEdit"
227
+ report << ""
228
+ end
229
+
230
+ report.join("\n")
231
+ end
232
+
233
+ # Generate formatted report for archival candidates
234
+ #
235
+ # @param age_days [Integer] Age threshold
236
+ # @return [String] Formatted report
237
+ def find_archival_candidates_report(age_days: 90)
238
+ candidates = find_archival_candidates(age_days: age_days)
239
+
240
+ return "No entries older than #{age_days} days found." if candidates.empty?
241
+
242
+ report = []
243
+ report << "# Archival Candidates (#{candidates.size} entries older than #{age_days} days)"
244
+ report << ""
245
+ report << "Found #{candidates.size} old entry/entries that could be archived."
246
+ report << ""
247
+
248
+ candidates.each do |entry|
249
+ report << "## memory://#{entry[:path]}"
250
+ report << "- Title: #{entry[:title]}"
251
+ report << "- Age: #{entry[:age_days]} days"
252
+ report << "- Last verified: #{entry[:last_verified] || "never"}"
253
+ report << "- Confidence: #{entry[:confidence]}"
254
+ report << "- Size: #{format_bytes(entry[:size])}"
255
+ report << ""
256
+ report << " **Suggestion:** Review and delete with MemoryDelete if truly obsolete, or use compact action with appropriate thresholds"
257
+ report << ""
258
+ end
259
+
260
+ report.join("\n")
261
+ end
262
+
263
+ # Find related entries that should be cross-linked
264
+ #
265
+ # Finds entry pairs with semantic similarity in the "related" range
266
+ # but NOT duplicates. Uses pure semantic similarity (no keyword boost).
267
+ #
268
+ # @param min_threshold [Float] Minimum similarity for relationships (default: 0.60)
269
+ # @param max_threshold [Float] Maximum similarity (above = duplicates) (default: 0.85)
270
+ # @return [Array<Hash>] Related pairs
271
+ def find_related(min_threshold: 0.60, max_threshold: 0.85)
272
+ entries = @adapter.list
273
+ return [] if entries.size < 2
274
+
275
+ related_pairs = []
276
+ all_entries = @adapter.all_entries
277
+
278
+ # Compare all pairs
279
+ entry_paths = entries.map { |e| e[:path] }
280
+ entry_paths.combination(2).each do |path1, path2|
281
+ entry1 = all_entries[path1]
282
+ entry2 = all_entries[path2]
283
+
284
+ # Skip if no embeddings (need semantic similarity)
285
+ next unless entry1.embedded? && entry2.embedded?
286
+
287
+ # Calculate PURE semantic similarity (no keyword boosting for merging)
288
+ semantic_sim = Search::TextSimilarity.cosine(entry1.embedding, entry2.embedding)
289
+
290
+ # Must be in the "related" range
291
+ next if semantic_sim < min_threshold
292
+ next if semantic_sim >= max_threshold
293
+
294
+ # Check current linking status
295
+ entry1_related = (entry1.metadata["related"] || []).map { |r| r.sub(%r{^memory://}, "") }
296
+ entry2_related = (entry2.metadata["related"] || []).map { |r| r.sub(%r{^memory://}, "") }
297
+
298
+ linked_1_to_2 = entry1_related.include?(path2)
299
+ linked_2_to_1 = entry2_related.include?(path1)
300
+ already_linked = linked_1_to_2 && linked_2_to_1
301
+
302
+ # Extract metadata
303
+ type1 = entry1.metadata["type"] || "unknown"
304
+ type2 = entry2.metadata["type"] || "unknown"
305
+
306
+ related_pairs << {
307
+ path1: path1,
308
+ path2: path2,
309
+ similarity: (semantic_sim * 100).round(1),
310
+ title1: entry1.title,
311
+ title2: entry2.title,
312
+ type1: type1,
313
+ type2: type2,
314
+ already_linked: already_linked,
315
+ linked_1_to_2: linked_1_to_2,
316
+ linked_2_to_1: linked_2_to_1,
317
+ }
318
+ end
319
+
320
+ related_pairs.sort_by { |d| -d[:similarity] }
321
+ end
322
+
323
+ # Generate formatted report for related entries
324
+ #
325
+ # @param min_threshold [Float] Minimum similarity
326
+ # @param max_threshold [Float] Maximum similarity
327
+ # @return [String] Formatted report
328
+ def find_related_report(min_threshold: 0.60, max_threshold: 0.85)
329
+ pairs = find_related(min_threshold: min_threshold, max_threshold: max_threshold)
330
+
331
+ return "No related entry pairs found in #{(min_threshold * 100).round}-#{(max_threshold * 100).round}% similarity range." if pairs.empty?
332
+
333
+ report = []
334
+ report << "# Related Entries (#{pairs.size} pairs)"
335
+ report << ""
336
+ report << "Found #{pairs.size} pair(s) of semantically related entries."
337
+ report << "Similarity range: #{(min_threshold * 100).round}-#{(max_threshold * 100).round}% (pure semantic, no keyword boost)"
338
+ report << ""
339
+
340
+ pairs.each_with_index do |pair, index|
341
+ report << "## Pair #{index + 1}: #{pair[:similarity]}% similar"
342
+ report << "- memory://#{pair[:path1]} (#{pair[:type1]})"
343
+ report << " \"#{pair[:title1]}\""
344
+ report << "- memory://#{pair[:path2]} (#{pair[:type2]})"
345
+ report << " \"#{pair[:title2]}\""
346
+ report << ""
347
+
348
+ if pair[:already_linked]
349
+ report << " ✓ Already linked bidirectionally"
350
+ elsif pair[:linked_1_to_2]
351
+ report << " → Entry 1 links to Entry 2, but not vice versa"
352
+ report << " **Suggestion:** Add backward link from Entry 2 to Entry 1"
353
+ elsif pair[:linked_2_to_1]
354
+ report << " → Entry 2 links to Entry 1, but not vice versa"
355
+ report << " **Suggestion:** Add backward link from Entry 1 to Entry 2"
356
+ else
357
+ report << " **Suggestion:** Add bidirectional links to cross-reference these related entries"
358
+ end
359
+ report << ""
360
+ end
361
+
362
+ report << "To automatically create missing links, use:"
363
+ report << " MemoryDefrag(action: \"link_related\", dry_run: true) # Preview first"
364
+ report << " MemoryDefrag(action: \"link_related\", dry_run: false) # Execute"
365
+ report << ""
366
+
367
+ report.join("\n")
368
+ end
369
+
370
+ # ============================================================================
371
+ # ACTIVE OPTIMIZATION OPERATIONS (Actually modify memory)
372
+ # ============================================================================
373
+
374
+ # Merge duplicate entries
375
+ #
376
+ # @param threshold [Float] Similarity threshold
377
+ # @param strategy [Symbol] Merge strategy (:keep_newer, :keep_larger, :combine)
378
+ # @param dry_run [Boolean] If true, show what would be done without doing it
379
+ # @return [String] Result report
380
+ def merge_duplicates_active(threshold: 0.85, strategy: :keep_newer, dry_run: true)
381
+ duplicates = find_duplicates(threshold: threshold)
382
+
383
+ return "No duplicates found above #{(threshold * 100).round}% similarity." if duplicates.empty?
384
+
385
+ results = []
386
+ freed_bytes = 0
387
+
388
+ duplicates.each do |pair|
389
+ if dry_run
390
+ results << "Would merge: #{pair[:path2]} → #{pair[:path1]} (#{pair[:similarity]}% similar)"
391
+ else
392
+ # Actually merge
393
+ result_info = merge_pair(pair, strategy: strategy)
394
+ freed_bytes += result_info[:freed_bytes]
395
+ results << "✓ Merged: #{result_info[:merged_path]} → #{result_info[:kept_path]}"
396
+ end
397
+ end
398
+
399
+ format_merge_report(results, duplicates.size, freed_bytes, dry_run)
400
+ end
401
+
402
+ # Clean up old stub files
403
+ #
404
+ # @param min_age_days [Integer] Minimum age for cleanup
405
+ # @param max_hits [Integer] Maximum hits to consider for cleanup
406
+ # @param dry_run [Boolean] Preview mode
407
+ # @return [String] Result report
408
+ def cleanup_stubs_active(min_age_days: 30, max_hits: 3, dry_run: true)
409
+ stubs = find_stubs_to_cleanup(min_age_days: min_age_days, max_hits: max_hits)
410
+
411
+ return "No stubs found for cleanup." if stubs.empty?
412
+
413
+ results = []
414
+ freed_bytes = 0
415
+
416
+ stubs.each do |stub|
417
+ if dry_run
418
+ results << "Would delete stub: #{stub[:path]} (age: #{stub[:age_days]}d, hits: #{stub[:hits]})"
419
+ else
420
+ freed_bytes += stub[:size]
421
+ @adapter.delete(file_path: stub[:path])
422
+ results << "✓ Deleted stub: #{stub[:path]}"
423
+ end
424
+ end
425
+
426
+ format_cleanup_report(results, stubs.size, freed_bytes, dry_run)
427
+ end
428
+
429
+ # Compact low-value entries (delete permanently)
430
+ #
431
+ # @param min_quality_score [Integer] Minimum quality threshold (0-100)
432
+ # @param min_age_days [Integer] Minimum age
433
+ # @param max_hits [Integer] Maximum hits
434
+ # @param dry_run [Boolean] Preview mode
435
+ # @return [String] Result report
436
+ def compact_active(min_quality_score: 20, min_age_days: 30, max_hits: 0, dry_run: true)
437
+ entries = @adapter.list
438
+ low_value = []
439
+
440
+ entries.each do |entry_info|
441
+ entry = @adapter.read_entry(file_path: entry_info[:path])
442
+
443
+ # Calculate quality from metadata (not content)
444
+ quality = calculate_quality_from_metadata(entry.metadata || {})
445
+
446
+ age_days = ((Time.now - entry.updated_at) / 86400).round
447
+ hits = entry.metadata&.dig("hits") || 0
448
+
449
+ next if quality >= min_quality_score || age_days < min_age_days || hits > max_hits
450
+
451
+ low_value << {
452
+ path: entry_info[:path],
453
+ quality: quality,
454
+ age_days: age_days,
455
+ hits: hits,
456
+ size: entry.size,
457
+ }
458
+ end
459
+
460
+ return "No low-value entries found for compaction." if low_value.empty?
461
+
462
+ results = []
463
+ freed_bytes = 0
464
+
465
+ low_value.each do |entry|
466
+ if dry_run
467
+ results << "Would delete: #{entry[:path]} (quality: #{entry[:quality]}, age: #{entry[:age_days]}d, hits: #{entry[:hits]})"
468
+ else
469
+ freed_bytes += entry[:size]
470
+ @adapter.delete(file_path: entry[:path])
471
+ results << "✓ Deleted: #{entry[:path]}"
472
+ end
473
+ end
474
+
475
+ format_compact_report(results, low_value.size, freed_bytes, dry_run)
476
+ end
477
+
478
+ # Create bidirectional links between related entries
479
+ #
480
+ # Finds related pairs and updates their 'related' metadata to cross-reference each other.
481
+ #
482
+ # @param min_threshold [Float] Minimum similarity (default: 0.60)
483
+ # @param max_threshold [Float] Maximum similarity (default: 0.85)
484
+ # @param dry_run [Boolean] Preview mode (default: true)
485
+ # @return [String] Result report
486
+ def link_related_active(min_threshold: 0.60, max_threshold: 0.85, dry_run: true)
487
+ pairs = find_related(min_threshold: min_threshold, max_threshold: max_threshold)
488
+
489
+ # Filter to only pairs that need linking
490
+ needs_linking = pairs.reject { |p| p[:already_linked] }
491
+
492
+ if needs_linking.empty?
493
+ return "No related entries found that need linking. All similar entries are already cross-referenced."
494
+ end
495
+
496
+ report = []
497
+ report << (dry_run ? "# Link Related Entries (DRY RUN)" : "# Link Related Entries")
498
+ report << ""
499
+ report << "Found #{needs_linking.size} pair(s) that should be cross-linked."
500
+ report << ""
501
+
502
+ links_created = 0
503
+
504
+ needs_linking.each_with_index do |pair, index|
505
+ report << "## Pair #{index + 1}: #{pair[:similarity]}% similar"
506
+ report << "- memory://#{pair[:path1]}"
507
+ report << "- memory://#{pair[:path2]}"
508
+ report << ""
509
+
510
+ if dry_run
511
+ # Show what would happen
512
+ if !pair[:linked_1_to_2] && !pair[:linked_2_to_1]
513
+ report << " Would add bidirectional links:"
514
+ report << " - Add #{pair[:path2]} to #{pair[:path1]}'s related array"
515
+ report << " - Add #{pair[:path1]} to #{pair[:path2]}'s related array"
516
+ elsif !pair[:linked_1_to_2]
517
+ report << " Would add backward link:"
518
+ report << " - Add #{pair[:path2]} to #{pair[:path1]}'s related array"
519
+ elsif !pair[:linked_2_to_1]
520
+ report << " Would add backward link:"
521
+ report << " - Add #{pair[:path1]} to #{pair[:path2]}'s related array"
522
+ end
523
+ else
524
+ # Actually create links
525
+ created = create_bidirectional_links(pair[:path1], pair[:path2], pair[:linked_1_to_2], pair[:linked_2_to_1])
526
+ links_created += created
527
+
528
+ report << " ✓ Created #{created} link(s)"
529
+ end
530
+ report << ""
531
+ end
532
+
533
+ report << if dry_run
534
+ "**DRY RUN:** No changes made. Set dry_run=false to execute."
535
+ else
536
+ "**COMPLETED:** Created #{links_created} link(s) across #{needs_linking.size} pairs."
537
+ end
538
+
539
+ report.join("\n")
540
+ end
541
+
542
+ # Full optimization (all operations)
543
+ #
544
+ # @param dry_run [Boolean] Preview mode (default: true)
545
+ # @return [String] Complete optimization report
546
+ def full_optimization(dry_run: true)
547
+ report = []
548
+ report << "# Full Memory Optimization"
549
+ report << ""
550
+ mode_message = dry_run ? "## DRY RUN MODE - No changes will be made" : "## ACTIVE MODE - Performing optimizations"
551
+ report << mode_message
552
+ report << ""
553
+
554
+ # 1. Health baseline
555
+ initial_health = @analyzer.analyze
556
+ report << "Initial health score: #{initial_health[:health_score]}/100"
557
+ report << ""
558
+
559
+ # 2. Merge duplicates
560
+ report << "## 1. Merging Duplicates"
561
+ report << merge_duplicates_active(dry_run: dry_run)
562
+ report << ""
563
+
564
+ # 3. Cleanup stubs
565
+ report << "## 2. Cleaning Up Stubs"
566
+ report << cleanup_stubs_active(dry_run: dry_run)
567
+ report << ""
568
+
569
+ # 4. Compact low-value
570
+ report << "## 3. Compacting Low-Value Entries"
571
+ report << compact_active(dry_run: dry_run)
572
+ report << ""
573
+
574
+ # 6. Final health check
575
+ unless dry_run
576
+ final_health = @analyzer.analyze
577
+ report << "## Summary"
578
+ report << "Health score: #{initial_health[:health_score]} → #{final_health[:health_score]} (+#{final_health[:health_score] - initial_health[:health_score]})"
579
+ end
580
+
581
+ report.join("\n")
582
+ end
583
+
584
+ private
585
+
586
+ def should_flag_confidence?(confidence, filter_level)
587
+ return false if confidence.nil?
588
+
589
+ levels = { "low" => 0, "medium" => 1, "high" => 2 }
590
+ filter_rank = levels[filter_level] || 0
591
+ entry_rank = levels[confidence] || 0
592
+
593
+ entry_rank <= filter_rank
594
+ end
595
+
596
+ def format_bytes(bytes)
597
+ if bytes >= 1_000_000
598
+ "#{(bytes.to_f / 1_000_000).round(1)}MB"
599
+ elsif bytes >= 1_000
600
+ "#{(bytes.to_f / 1_000).round(1)}KB"
601
+ else
602
+ "#{bytes}B"
603
+ end
604
+ end
605
+
606
+ # Calculate quality score from metadata (not from content parsing)
607
+ #
608
+ # @param metadata [Hash] Metadata hash (string keys guaranteed)
609
+ # @return [Integer] Quality score 0-100
610
+ def calculate_quality_from_metadata(metadata)
611
+ return 0 if metadata.nil? || metadata.empty?
612
+
613
+ score = 0
614
+
615
+ # All keys are strings (no defensive checks needed)
616
+ score += 20 if metadata["type"]
617
+ score += 20 if metadata["confidence"]
618
+ score += 15 unless (metadata["tags"] || []).empty?
619
+ score += 15 unless (metadata["related"] || []).empty?
620
+ score += 10 if metadata["domain"]
621
+ score += 10 if metadata["last_verified"]
622
+ score += 10 if metadata["confidence"] == "high"
623
+
624
+ score
625
+ end
626
+
627
+ # ============================================================================
628
+ # HELPER METHODS FOR ACTIVE OPERATIONS
629
+ # ============================================================================
630
+
631
+ # Create bidirectional links between two entries
632
+ #
633
+ # Updates the 'related' metadata arrays to cross-reference entries.
634
+ #
635
+ # @param path1 [String] First entry path
636
+ # @param path2 [String] Second entry path
637
+ # @param already_linked_1_to_2 [Boolean] If entry1 already links to entry2
638
+ # @param already_linked_2_to_1 [Boolean] If entry2 already links to entry1
639
+ # @return [Integer] Number of links created (0-2)
640
+ def create_bidirectional_links(path1, path2, already_linked_1_to_2, already_linked_2_to_1)
641
+ links_created = 0
642
+ all_entries = @adapter.all_entries
643
+
644
+ # Add path2 to entry1's related array (if not already there)
645
+ unless already_linked_1_to_2
646
+ entry1 = all_entries[path1]
647
+ related_array = entry1.metadata["related"] || []
648
+ related_array << "memory://#{path2}"
649
+
650
+ # Update entry1
651
+ metadata = entry1.metadata.dup
652
+ metadata["related"] = related_array.uniq
653
+
654
+ @adapter.write(
655
+ file_path: path1,
656
+ content: entry1.content,
657
+ title: entry1.title,
658
+ embedding: entry1.embedding,
659
+ metadata: metadata,
660
+ )
661
+
662
+ links_created += 1
663
+ end
664
+
665
+ # Add path1 to entry2's related array (if not already there)
666
+ unless already_linked_2_to_1
667
+ entry2 = all_entries[path2]
668
+ related_array = entry2.metadata["related"] || []
669
+ related_array << "memory://#{path1}"
670
+
671
+ # Update entry2
672
+ metadata = entry2.metadata.dup
673
+ metadata["related"] = related_array.uniq
674
+
675
+ @adapter.write(
676
+ file_path: path2,
677
+ content: entry2.content,
678
+ title: entry2.title,
679
+ embedding: entry2.embedding,
680
+ metadata: metadata,
681
+ )
682
+
683
+ links_created += 1
684
+ end
685
+
686
+ links_created
687
+ end
688
+
689
+ # Merge a pair of duplicate entries
690
+ #
691
+ # @param pair [Hash] Duplicate pair info
692
+ # @param strategy [Symbol] Merge strategy
693
+ # @return [Hash] Result info with :kept_path, :merged_path, :freed_bytes
694
+ def merge_pair(pair, strategy:)
695
+ entry1 = @adapter.read_entry(file_path: pair[:path1])
696
+ entry2 = @adapter.read_entry(file_path: pair[:path2])
697
+
698
+ # Decide which to keep and which to merge
699
+ keep_path, merge_path, keep_entry, merge_entry = case strategy
700
+ when :keep_newer
701
+ if entry1.updated_at > entry2.updated_at
702
+ [pair[:path1], pair[:path2], entry1, entry2]
703
+ else
704
+ [pair[:path2], pair[:path1], entry2, entry1]
705
+ end
706
+ when :keep_larger
707
+ if entry1.size > entry2.size
708
+ [pair[:path1], pair[:path2], entry1, entry2]
709
+ else
710
+ [pair[:path2], pair[:path1], entry2, entry1]
711
+ end
712
+ when :combine
713
+ # Keep path1, merge content from path2
714
+ [pair[:path1], pair[:path2], entry1, entry2]
715
+ else
716
+ [pair[:path1], pair[:path2], entry1, entry2]
717
+ end
718
+
719
+ # Merge content if combining
720
+ if strategy == :combine
721
+ merged_content = combine_contents(keep_entry.content, merge_entry.content)
722
+ merged_metadata = combine_metadata(keep_entry.metadata, merge_entry.metadata)
723
+
724
+ @adapter.write(
725
+ file_path: keep_path,
726
+ content: merged_content,
727
+ title: keep_entry.title,
728
+ embedding: keep_entry.embedding,
729
+ metadata: merged_metadata,
730
+ )
731
+ end
732
+
733
+ # Create stub at merged location
734
+ create_stub(from: merge_path, to: keep_path, reason: "merged")
735
+
736
+ # Return result info
737
+ {
738
+ kept_path: keep_path,
739
+ merged_path: merge_path,
740
+ freed_bytes: merge_entry.size,
741
+ }
742
+ end
743
+
744
+ # Create a stub (redirect) file
745
+ #
746
+ # @param from [String] Original path
747
+ # @param to [String] Target path
748
+ # @param reason [String] Reason (merged, moved)
749
+ # @return [void]
750
+ def create_stub(from:, to:, reason:)
751
+ stub_content = "# #{reason} → #{to}\n\nThis entry was #{reason} into #{to}."
752
+
753
+ @adapter.write(
754
+ file_path: from,
755
+ content: stub_content,
756
+ title: "[STUB] → #{to}",
757
+ metadata: { "stub" => true, "redirect_to" => to, "reason" => reason },
758
+ )
759
+ end
760
+
761
+ # Find stubs that can be cleaned up
762
+ #
763
+ # @param min_age_days [Integer] Minimum age
764
+ # @param max_hits [Integer] Maximum hits
765
+ # @return [Array<Hash>] Stub info
766
+ def find_stubs_to_cleanup(min_age_days:, max_hits:)
767
+ stubs = []
768
+ Time.now
769
+
770
+ @adapter.list.each do |entry_info|
771
+ entry = @adapter.read_entry(file_path: entry_info[:path])
772
+
773
+ # Check if it's a stub
774
+ next unless entry.content.start_with?("# merged →", "# moved →")
775
+
776
+ age_days = ((Time.now - entry.updated_at) / 86400).round
777
+ hits = entry.metadata&.dig("hits") || 0
778
+
779
+ next if age_days < min_age_days || hits > max_hits
780
+
781
+ stubs << {
782
+ path: entry_info[:path],
783
+ age_days: age_days,
784
+ hits: hits,
785
+ size: entry.size,
786
+ }
787
+ end
788
+
789
+ stubs
790
+ end
791
+
792
+ # Combine contents from two entries
793
+ #
794
+ # @param content1 [String] First content
795
+ # @param content2 [String] Second content
796
+ # @return [String] Combined content
797
+ def combine_contents(content1, content2)
798
+ # Simple concatenation with separator
799
+ # TODO: Could be smarter (LLM-based merge)
800
+ "#{content1}\n\n---\n\n#{content2}"
801
+ end
802
+
803
+ # Combine metadata from two entries
804
+ #
805
+ # @param metadata1 [Hash] First metadata
806
+ # @param metadata2 [Hash] Second metadata
807
+ # @return [Hash] Combined metadata
808
+ def combine_metadata(metadata1, metadata2)
809
+ return metadata2 if metadata1.nil?
810
+ return metadata1 if metadata2.nil?
811
+
812
+ # Merge tags and related links
813
+ combined = metadata1.dup
814
+ combined["tags"] = ((metadata1["tags"] || []) + (metadata2["tags"] || [])).uniq
815
+ combined["related"] = ((metadata1["related"] || []) + (metadata2["related"] || [])).uniq
816
+
817
+ combined
818
+ end
819
+
820
+ # Format merge operation report
821
+ #
822
+ # @param results [Array<String>] Result messages
823
+ # @param count [Integer] Number of merges
824
+ # @param freed_bytes [Integer] Bytes freed
825
+ # @param dry_run [Boolean] Dry run mode
826
+ # @return [String] Formatted report
827
+ def format_merge_report(results, count, freed_bytes, dry_run)
828
+ report = []
829
+ header = dry_run ? "Found #{count} duplicate pair(s) to merge:" : "Merged #{count} duplicate pair(s):"
830
+ report << header
831
+ report << ""
832
+ results.each { |r| report << r }
833
+ report << ""
834
+ report << "Space freed: #{format_bytes(freed_bytes)}" unless dry_run
835
+ report.join("\n")
836
+ end
837
+
838
+ # Format cleanup report
839
+ def format_cleanup_report(results, count, freed_bytes, dry_run)
840
+ report = []
841
+ header = dry_run ? "Found #{count} stub(s) to clean up:" : "Cleaned up #{count} stub(s):"
842
+ report << header
843
+ report << ""
844
+ results.each { |r| report << r }
845
+ report << ""
846
+ report << "Space freed: #{format_bytes(freed_bytes)}" unless dry_run
847
+ report.join("\n")
848
+ end
849
+
850
+ # Format compact report
851
+ def format_compact_report(results, count, freed_bytes, dry_run)
852
+ report = []
853
+ header = dry_run ? "Found #{count} low-value entry/entries to delete:" : "Deleted #{count} low-value entry/entries:"
854
+ report << header
855
+ report << ""
856
+ results.each { |r| report << r }
857
+ report << ""
858
+ report << "Space freed: #{format_bytes(freed_bytes)}" unless dry_run
859
+ report.join("\n")
860
+ end
861
+ end
862
+ end
863
+ end