claude_swarm 1.0.1 → 1.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (267) hide show
  1. checksums.yaml +4 -4
  2. data/.claude/commands/release.md +1 -1
  3. data/.claude/hooks/lint-code-files.rb +65 -0
  4. data/.rubocop.yml +22 -2
  5. data/CHANGELOG.md +14 -1
  6. data/CLAUDE.md +1 -1
  7. data/CONTRIBUTING.md +69 -0
  8. data/README.md +27 -2
  9. data/Rakefile +71 -3
  10. data/analyze_coverage.rb +94 -0
  11. data/docs/v2/CHANGELOG.swarm_cli.md +43 -0
  12. data/docs/v2/CHANGELOG.swarm_memory.md +379 -0
  13. data/docs/v2/CHANGELOG.swarm_sdk.md +362 -0
  14. data/docs/v2/README.md +308 -0
  15. data/docs/v2/guides/claude-code-agents.md +262 -0
  16. data/docs/v2/guides/complete-tutorial.md +3088 -0
  17. data/docs/v2/guides/getting-started.md +1456 -0
  18. data/docs/v2/guides/memory-adapters.md +998 -0
  19. data/docs/v2/guides/plugins.md +816 -0
  20. data/docs/v2/guides/quick-start-cli.md +1745 -0
  21. data/docs/v2/guides/rails-integration.md +1902 -0
  22. data/docs/v2/guides/swarm-memory.md +599 -0
  23. data/docs/v2/reference/cli.md +729 -0
  24. data/docs/v2/reference/ruby-dsl.md +2154 -0
  25. data/docs/v2/reference/yaml.md +1835 -0
  26. data/docs-team-swarm.yml +2222 -0
  27. data/examples/learning-assistant/assistant.md +7 -0
  28. data/examples/learning-assistant/example-memories/concept-example.md +90 -0
  29. data/examples/learning-assistant/example-memories/experience-example.md +66 -0
  30. data/examples/learning-assistant/example-memories/fact-example.md +76 -0
  31. data/examples/learning-assistant/example-memories/memory-index.md +78 -0
  32. data/examples/learning-assistant/example-memories/skill-example.md +168 -0
  33. data/examples/learning-assistant/learning_assistant.rb +34 -0
  34. data/examples/learning-assistant/learning_assistant.yml +20 -0
  35. data/examples/v2/dsl/01_basic.rb +44 -0
  36. data/examples/v2/dsl/02_core_parameters.rb +59 -0
  37. data/examples/v2/dsl/03_capabilities.rb +71 -0
  38. data/examples/v2/dsl/04_llm_parameters.rb +56 -0
  39. data/examples/v2/dsl/05_advanced_flags.rb +73 -0
  40. data/examples/v2/dsl/06_permissions.rb +80 -0
  41. data/examples/v2/dsl/07_mcp_server.rb +62 -0
  42. data/examples/v2/dsl/08_swarm_hooks.rb +53 -0
  43. data/examples/v2/dsl/09_agent_hooks.rb +67 -0
  44. data/examples/v2/dsl/10_all_agents_hooks.rb +67 -0
  45. data/examples/v2/dsl/11_delegation.rb +60 -0
  46. data/examples/v2/dsl/12_complete_integration.rb +137 -0
  47. data/examples/v2/file_tools_swarm.yml +102 -0
  48. data/examples/v2/hooks/01_basic_hooks.rb +133 -0
  49. data/examples/v2/hooks/02_usage_tracking.rb +201 -0
  50. data/examples/v2/hooks/03_production_monitoring.rb +429 -0
  51. data/examples/v2/hooks/agent_stop_exit_0.yml +21 -0
  52. data/examples/v2/hooks/agent_stop_exit_1.yml +21 -0
  53. data/examples/v2/hooks/agent_stop_exit_2.yml +26 -0
  54. data/examples/v2/hooks/multiple_hooks_all_pass.yml +37 -0
  55. data/examples/v2/hooks/multiple_hooks_first_fails.yml +37 -0
  56. data/examples/v2/hooks/multiple_hooks_second_fails.yml +37 -0
  57. data/examples/v2/hooks/multiple_hooks_warnings.yml +37 -0
  58. data/examples/v2/hooks/post_tool_use_exit_0.yml +24 -0
  59. data/examples/v2/hooks/post_tool_use_exit_1.yml +24 -0
  60. data/examples/v2/hooks/post_tool_use_exit_2.yml +24 -0
  61. data/examples/v2/hooks/post_tool_use_multi_matcher_exit_0.yml +26 -0
  62. data/examples/v2/hooks/post_tool_use_multi_matcher_exit_1.yml +26 -0
  63. data/examples/v2/hooks/post_tool_use_multi_matcher_exit_2.yml +26 -0
  64. data/examples/v2/hooks/pre_tool_use_exit_0.yml +24 -0
  65. data/examples/v2/hooks/pre_tool_use_exit_1.yml +24 -0
  66. data/examples/v2/hooks/pre_tool_use_exit_2.yml +24 -0
  67. data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_0.yml +26 -0
  68. data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_1.yml +26 -0
  69. data/examples/v2/hooks/pre_tool_use_multi_matcher_exit_2.yml +27 -0
  70. data/examples/v2/hooks/swarm_summary.sh +44 -0
  71. data/examples/v2/hooks/user_prompt_exit_0.yml +21 -0
  72. data/examples/v2/hooks/user_prompt_exit_1.yml +21 -0
  73. data/examples/v2/hooks/user_prompt_exit_2.yml +21 -0
  74. data/examples/v2/hooks/validate_bash.rb +59 -0
  75. data/examples/v2/multi_directory_permissions.yml +221 -0
  76. data/examples/v2/node_context_demo.rb +127 -0
  77. data/examples/v2/node_workflow.rb +173 -0
  78. data/examples/v2/path_resolution_demo.rb +216 -0
  79. data/examples/v2/simple-swarm-v2.rb +90 -0
  80. data/examples/v2/simple-swarm-v2.yml +62 -0
  81. data/examples/v2/swarm.yml +71 -0
  82. data/examples/v2/swarm_with_hooks.yml +61 -0
  83. data/examples/v2/swarm_with_hooks_simple.yml +25 -0
  84. data/examples/v2/think_tool_demo.rb +62 -0
  85. data/exe/swarm +6 -0
  86. data/lib/claude_swarm/claude_mcp_server.rb +0 -6
  87. data/lib/claude_swarm/cli.rb +10 -3
  88. data/lib/claude_swarm/commands/ps.rb +19 -20
  89. data/lib/claude_swarm/commands/show.rb +1 -1
  90. data/lib/claude_swarm/configuration.rb +10 -12
  91. data/lib/claude_swarm/mcp_generator.rb +10 -1
  92. data/lib/claude_swarm/orchestrator.rb +73 -49
  93. data/lib/claude_swarm/system_utils.rb +37 -11
  94. data/lib/claude_swarm/version.rb +1 -1
  95. data/lib/claude_swarm/worktree_manager.rb +1 -0
  96. data/lib/claude_swarm/yaml_loader.rb +22 -0
  97. data/lib/claude_swarm.rb +7 -2
  98. data/lib/swarm_cli/cli.rb +201 -0
  99. data/lib/swarm_cli/command_registry.rb +61 -0
  100. data/lib/swarm_cli/commands/mcp_serve.rb +130 -0
  101. data/lib/swarm_cli/commands/mcp_tools.rb +148 -0
  102. data/lib/swarm_cli/commands/migrate.rb +55 -0
  103. data/lib/swarm_cli/commands/run.rb +173 -0
  104. data/lib/swarm_cli/config_loader.rb +97 -0
  105. data/lib/swarm_cli/formatters/human_formatter.rb +711 -0
  106. data/lib/swarm_cli/formatters/json_formatter.rb +51 -0
  107. data/lib/swarm_cli/interactive_repl.rb +918 -0
  108. data/lib/swarm_cli/mcp_serve_options.rb +44 -0
  109. data/lib/swarm_cli/mcp_tools_options.rb +59 -0
  110. data/lib/swarm_cli/migrate_options.rb +54 -0
  111. data/lib/swarm_cli/migrator.rb +132 -0
  112. data/lib/swarm_cli/options.rb +151 -0
  113. data/lib/swarm_cli/ui/components/agent_badge.rb +33 -0
  114. data/lib/swarm_cli/ui/components/content_block.rb +120 -0
  115. data/lib/swarm_cli/ui/components/divider.rb +57 -0
  116. data/lib/swarm_cli/ui/components/panel.rb +62 -0
  117. data/lib/swarm_cli/ui/components/usage_stats.rb +70 -0
  118. data/lib/swarm_cli/ui/formatters/cost.rb +49 -0
  119. data/lib/swarm_cli/ui/formatters/number.rb +58 -0
  120. data/lib/swarm_cli/ui/formatters/text.rb +77 -0
  121. data/lib/swarm_cli/ui/formatters/time.rb +73 -0
  122. data/lib/swarm_cli/ui/icons.rb +59 -0
  123. data/lib/swarm_cli/ui/renderers/event_renderer.rb +188 -0
  124. data/lib/swarm_cli/ui/state/agent_color_cache.rb +45 -0
  125. data/lib/swarm_cli/ui/state/depth_tracker.rb +40 -0
  126. data/lib/swarm_cli/ui/state/spinner_manager.rb +170 -0
  127. data/lib/swarm_cli/ui/state/usage_tracker.rb +62 -0
  128. data/lib/swarm_cli/version.rb +5 -0
  129. data/lib/swarm_cli.rb +44 -0
  130. data/lib/swarm_memory/adapters/base.rb +141 -0
  131. data/lib/swarm_memory/adapters/filesystem_adapter.rb +845 -0
  132. data/lib/swarm_memory/chat_extension.rb +34 -0
  133. data/lib/swarm_memory/cli/commands.rb +306 -0
  134. data/lib/swarm_memory/core/entry.rb +37 -0
  135. data/lib/swarm_memory/core/frontmatter_parser.rb +108 -0
  136. data/lib/swarm_memory/core/metadata_extractor.rb +68 -0
  137. data/lib/swarm_memory/core/path_normalizer.rb +75 -0
  138. data/lib/swarm_memory/core/semantic_index.rb +244 -0
  139. data/lib/swarm_memory/core/storage.rb +288 -0
  140. data/lib/swarm_memory/core/storage_read_tracker.rb +63 -0
  141. data/lib/swarm_memory/dsl/builder_extension.rb +40 -0
  142. data/lib/swarm_memory/dsl/memory_config.rb +113 -0
  143. data/lib/swarm_memory/embeddings/embedder.rb +36 -0
  144. data/lib/swarm_memory/embeddings/informers_embedder.rb +152 -0
  145. data/lib/swarm_memory/errors.rb +21 -0
  146. data/lib/swarm_memory/integration/cli_registration.rb +30 -0
  147. data/lib/swarm_memory/integration/configuration.rb +43 -0
  148. data/lib/swarm_memory/integration/registration.rb +31 -0
  149. data/lib/swarm_memory/integration/sdk_plugin.rb +531 -0
  150. data/lib/swarm_memory/optimization/analyzer.rb +244 -0
  151. data/lib/swarm_memory/optimization/defragmenter.rb +863 -0
  152. data/lib/swarm_memory/prompts/memory.md.erb +109 -0
  153. data/lib/swarm_memory/prompts/memory_assistant.md.erb +181 -0
  154. data/lib/swarm_memory/prompts/memory_researcher.md.erb +281 -0
  155. data/lib/swarm_memory/prompts/memory_retrieval.md.erb +78 -0
  156. data/lib/swarm_memory/search/semantic_search.rb +112 -0
  157. data/lib/swarm_memory/search/text_search.rb +42 -0
  158. data/lib/swarm_memory/search/text_similarity.rb +80 -0
  159. data/lib/swarm_memory/skills/meta/deep-learning.md +101 -0
  160. data/lib/swarm_memory/skills/meta/deep-learning.yml +14 -0
  161. data/lib/swarm_memory/tools/load_skill.rb +313 -0
  162. data/lib/swarm_memory/tools/memory_defrag.rb +382 -0
  163. data/lib/swarm_memory/tools/memory_delete.rb +99 -0
  164. data/lib/swarm_memory/tools/memory_edit.rb +185 -0
  165. data/lib/swarm_memory/tools/memory_glob.rb +160 -0
  166. data/lib/swarm_memory/tools/memory_grep.rb +247 -0
  167. data/lib/swarm_memory/tools/memory_multi_edit.rb +281 -0
  168. data/lib/swarm_memory/tools/memory_read.rb +123 -0
  169. data/lib/swarm_memory/tools/memory_write.rb +231 -0
  170. data/lib/swarm_memory/utils.rb +50 -0
  171. data/lib/swarm_memory/version.rb +5 -0
  172. data/lib/swarm_memory.rb +166 -0
  173. data/lib/swarm_sdk/agent/RETRY_LOGIC.md +127 -0
  174. data/lib/swarm_sdk/agent/builder.rb +461 -0
  175. data/lib/swarm_sdk/agent/chat/context_tracker.rb +314 -0
  176. data/lib/swarm_sdk/agent/chat/hook_integration.rb +372 -0
  177. data/lib/swarm_sdk/agent/chat/logging_helpers.rb +116 -0
  178. data/lib/swarm_sdk/agent/chat/system_reminder_injector.rb +152 -0
  179. data/lib/swarm_sdk/agent/chat.rb +1159 -0
  180. data/lib/swarm_sdk/agent/context.rb +112 -0
  181. data/lib/swarm_sdk/agent/context_manager.rb +309 -0
  182. data/lib/swarm_sdk/agent/definition.rb +556 -0
  183. data/lib/swarm_sdk/claude_code_agent_adapter.rb +205 -0
  184. data/lib/swarm_sdk/configuration.rb +296 -0
  185. data/lib/swarm_sdk/context_compactor/metrics.rb +147 -0
  186. data/lib/swarm_sdk/context_compactor/token_counter.rb +106 -0
  187. data/lib/swarm_sdk/context_compactor.rb +340 -0
  188. data/lib/swarm_sdk/hooks/adapter.rb +359 -0
  189. data/lib/swarm_sdk/hooks/context.rb +197 -0
  190. data/lib/swarm_sdk/hooks/definition.rb +80 -0
  191. data/lib/swarm_sdk/hooks/error.rb +29 -0
  192. data/lib/swarm_sdk/hooks/executor.rb +146 -0
  193. data/lib/swarm_sdk/hooks/registry.rb +147 -0
  194. data/lib/swarm_sdk/hooks/result.rb +150 -0
  195. data/lib/swarm_sdk/hooks/shell_executor.rb +254 -0
  196. data/lib/swarm_sdk/hooks/tool_call.rb +35 -0
  197. data/lib/swarm_sdk/hooks/tool_result.rb +62 -0
  198. data/lib/swarm_sdk/log_collector.rb +51 -0
  199. data/lib/swarm_sdk/log_stream.rb +69 -0
  200. data/lib/swarm_sdk/markdown_parser.rb +75 -0
  201. data/lib/swarm_sdk/model_aliases.json +5 -0
  202. data/lib/swarm_sdk/models.json +1 -0
  203. data/lib/swarm_sdk/models.rb +120 -0
  204. data/lib/swarm_sdk/node/agent_config.rb +49 -0
  205. data/lib/swarm_sdk/node/builder.rb +439 -0
  206. data/lib/swarm_sdk/node/transformer_executor.rb +248 -0
  207. data/lib/swarm_sdk/node_context.rb +170 -0
  208. data/lib/swarm_sdk/node_orchestrator.rb +384 -0
  209. data/lib/swarm_sdk/permissions/config.rb +239 -0
  210. data/lib/swarm_sdk/permissions/error_formatter.rb +121 -0
  211. data/lib/swarm_sdk/permissions/path_matcher.rb +35 -0
  212. data/lib/swarm_sdk/permissions/validator.rb +173 -0
  213. data/lib/swarm_sdk/permissions_builder.rb +122 -0
  214. data/lib/swarm_sdk/plugin.rb +147 -0
  215. data/lib/swarm_sdk/plugin_registry.rb +101 -0
  216. data/lib/swarm_sdk/prompts/base_system_prompt.md.erb +243 -0
  217. data/lib/swarm_sdk/providers/openai_with_responses.rb +582 -0
  218. data/lib/swarm_sdk/result.rb +97 -0
  219. data/lib/swarm_sdk/swarm/agent_initializer.rb +334 -0
  220. data/lib/swarm_sdk/swarm/all_agents_builder.rb +140 -0
  221. data/lib/swarm_sdk/swarm/builder.rb +586 -0
  222. data/lib/swarm_sdk/swarm/mcp_configurator.rb +151 -0
  223. data/lib/swarm_sdk/swarm/tool_configurator.rb +419 -0
  224. data/lib/swarm_sdk/swarm.rb +982 -0
  225. data/lib/swarm_sdk/tools/bash.rb +274 -0
  226. data/lib/swarm_sdk/tools/clock.rb +44 -0
  227. data/lib/swarm_sdk/tools/delegate.rb +164 -0
  228. data/lib/swarm_sdk/tools/document_converters/base_converter.rb +83 -0
  229. data/lib/swarm_sdk/tools/document_converters/docx_converter.rb +99 -0
  230. data/lib/swarm_sdk/tools/document_converters/html_converter.rb +101 -0
  231. data/lib/swarm_sdk/tools/document_converters/pdf_converter.rb +78 -0
  232. data/lib/swarm_sdk/tools/document_converters/xlsx_converter.rb +194 -0
  233. data/lib/swarm_sdk/tools/edit.rb +150 -0
  234. data/lib/swarm_sdk/tools/glob.rb +158 -0
  235. data/lib/swarm_sdk/tools/grep.rb +228 -0
  236. data/lib/swarm_sdk/tools/image_extractors/docx_image_extractor.rb +43 -0
  237. data/lib/swarm_sdk/tools/image_extractors/pdf_image_extractor.rb +163 -0
  238. data/lib/swarm_sdk/tools/image_formats/tiff_builder.rb +65 -0
  239. data/lib/swarm_sdk/tools/multi_edit.rb +232 -0
  240. data/lib/swarm_sdk/tools/path_resolver.rb +43 -0
  241. data/lib/swarm_sdk/tools/read.rb +251 -0
  242. data/lib/swarm_sdk/tools/registry.rb +93 -0
  243. data/lib/swarm_sdk/tools/scratchpad/scratchpad_list.rb +96 -0
  244. data/lib/swarm_sdk/tools/scratchpad/scratchpad_read.rb +76 -0
  245. data/lib/swarm_sdk/tools/scratchpad/scratchpad_write.rb +91 -0
  246. data/lib/swarm_sdk/tools/stores/read_tracker.rb +61 -0
  247. data/lib/swarm_sdk/tools/stores/scratchpad_storage.rb +224 -0
  248. data/lib/swarm_sdk/tools/stores/storage.rb +148 -0
  249. data/lib/swarm_sdk/tools/stores/todo_manager.rb +65 -0
  250. data/lib/swarm_sdk/tools/think.rb +95 -0
  251. data/lib/swarm_sdk/tools/todo_write.rb +216 -0
  252. data/lib/swarm_sdk/tools/web_fetch.rb +261 -0
  253. data/lib/swarm_sdk/tools/write.rb +117 -0
  254. data/lib/swarm_sdk/utils.rb +50 -0
  255. data/lib/swarm_sdk/version.rb +5 -0
  256. data/lib/swarm_sdk.rb +157 -0
  257. data/llm.v2.txt +13407 -0
  258. data/rubocop/cop/security/no_reflection_methods.rb +47 -0
  259. data/rubocop/cop/security/no_ruby_llm_logger.rb +32 -0
  260. data/swarm_cli.gemspec +57 -0
  261. data/swarm_memory.gemspec +28 -0
  262. data/swarm_sdk.gemspec +41 -0
  263. data/team.yml +1 -1
  264. data/team_full.yml +1875 -0
  265. data/{team_v2.yml → team_sdk.yml} +121 -52
  266. metadata +249 -6
  267. data/EXAMPLES.md +0 -164
@@ -0,0 +1,845 @@
1
+ # frozen_string_literal: true
2
+
3
+ module SwarmMemory
4
+ module Adapters
5
+ # Real filesystem adapter using .md/.yml file pairs
6
+ #
7
+ # Architecture:
8
+ # - Content stored in .md files (markdown)
9
+ # - Metadata stored in .yml files (tags, confidence, hits)
10
+ # - Embeddings stored in .emb files (binary, optional)
11
+ # - Paths flattened with -- separator for Git-friendly structure
12
+ # - Stubs for merged/moved entries with auto-redirect
13
+ # - Hit tracking for access patterns
14
+ #
15
+ # Example on disk:
16
+ # .swarm/memory/
17
+ # ├── concepts--ruby--classes.md (content)
18
+ # ├── concepts--ruby--classes.yml (metadata)
19
+ # ├── concepts--ruby--classes.emb (embedding, optional)
20
+ # └── _stubs/
21
+ # ├── old-ruby-intro.md (stub: "# merged → concepts--ruby--classes")
22
+ # └── old-ruby-intro.yml (metadata with stub: true)
23
+ class FilesystemAdapter < Base
24
+ # Stub markers
25
+ STUB_MARKERS = ["# merged →", "# moved →"].freeze
26
+
27
+ # Virtual built-in entries that always exist without taking storage space
28
+ # These are meta-skills and resources available to all agents
29
+ # Mapped as: memory_path => gem_file_basename
30
+ VIRTUAL_ENTRIES = {
31
+ "skill/meta/deep-learning.md" => "meta/deep-learning",
32
+ }.freeze
33
+
34
+ # Initialize filesystem adapter with directory
35
+ #
36
+ # @param directory [String] Directory path for storage (REQUIRED)
37
+ # @raise [ArgumentError] If directory is not provided
38
+ def initialize(directory:)
39
+ super()
40
+ raise ArgumentError, "directory is required for FilesystemAdapter" if directory.nil? || directory.to_s.strip.empty?
41
+
42
+ @directory = File.expand_path(directory)
43
+ @semaphore = Async::Semaphore.new(1) # Fiber-aware concurrency control
44
+ @total_size = 0
45
+
46
+ # Create directory if it doesn't exist
47
+ FileUtils.mkdir_p(@directory)
48
+
49
+ # Lock file for cross-process synchronization
50
+ @lock_file_path = File.join(@directory, ".lock")
51
+
52
+ # Build in-memory index on boot (for fast lookups)
53
+ @index = build_index
54
+ end
55
+
56
+ # Write content to filesystem
57
+ #
58
+ # @param file_path [String] Logical path (e.g., "concepts/ruby/classes")
59
+ # @param content [String] Content to store
60
+ # @param title [String] Brief title
61
+ # @param embedding [Array<Float>, nil] Optional embedding vector
62
+ # @param metadata [Hash, nil] Optional metadata
63
+ # @return [Core::Entry] The created entry
64
+ def write(file_path:, content:, title:, embedding: nil, metadata: nil)
65
+ with_write_lock do
66
+ @semaphore.acquire do
67
+ raise ArgumentError, "file_path is required" if file_path.nil? || file_path.to_s.strip.empty?
68
+ raise ArgumentError, "content is required" if content.nil?
69
+ raise ArgumentError, "title is required" if title.nil? || title.to_s.strip.empty?
70
+
71
+ # Content is stored as-is (no frontmatter extraction)
72
+ # Metadata comes from tool parameters, not from content
73
+ content_size = content.bytesize
74
+
75
+ # Ensure all metadata keys are strings
76
+ stringified_metadata = metadata ? Utils.stringify_keys(metadata) : {}
77
+
78
+ # Check entry size limit
79
+ if content_size > MAX_ENTRY_SIZE
80
+ raise ArgumentError, "Content exceeds maximum size (#{format_bytes(MAX_ENTRY_SIZE)}). " \
81
+ "Current: #{format_bytes(content_size)}"
82
+ end
83
+
84
+ # Calculate new total size
85
+ existing_size = get_entry_size(file_path)
86
+ new_total_size = @total_size - existing_size + content_size
87
+
88
+ # Check total size limit
89
+ if new_total_size > MAX_TOTAL_SIZE
90
+ raise ArgumentError, "Memory storage full (#{format_bytes(MAX_TOTAL_SIZE)} limit). " \
91
+ "Current: #{format_bytes(@total_size)}, " \
92
+ "Would be: #{format_bytes(new_total_size)}. " \
93
+ "Clear old entries or use smaller content."
94
+ end
95
+
96
+ # Strip .md extension and flatten path for disk storage
97
+ # "concepts/ruby/classes.md" → "concepts--ruby--classes"
98
+ base_path = file_path.sub(/\.md\z/, "")
99
+ disk_path = flatten_path(base_path)
100
+
101
+ # 1. Write content to .md file (stored exactly as provided)
102
+ md_file = File.join(@directory, "#{disk_path}.md")
103
+ FileUtils.mkdir_p(File.dirname(md_file))
104
+ File.write(md_file, content)
105
+
106
+ # 2. Write metadata to .yml file
107
+ yaml_file = File.join(@directory, "#{disk_path}.yml")
108
+ existing_hits = read_yaml_field(yaml_file, :hits) || 0
109
+
110
+ yaml_data = {
111
+ title: title,
112
+ file_path: file_path, # Logical path with .md extension
113
+ updated_at: Time.now,
114
+ size: content_size,
115
+ hits: existing_hits, # Preserve hit count
116
+ metadata: stringified_metadata, # Metadata from tool parameters
117
+ embedding_checksum: embedding ? checksum(embedding) : nil,
118
+ }
119
+ # Convert symbol keys to strings for clean YAML output
120
+ File.write(yaml_file, YAML.dump(Utils.stringify_keys(yaml_data)))
121
+
122
+ # 3. Write embedding to .emb file (binary, optional)
123
+ if embedding
124
+ emb_file = File.join(@directory, "#{disk_path}.emb")
125
+ File.write(emb_file, embedding.pack("f*"))
126
+ end
127
+
128
+ # Update total size
129
+ @total_size = new_total_size
130
+
131
+ # Update index
132
+ @index[file_path] = {
133
+ disk_path: disk_path,
134
+ title: title,
135
+ size: content_size,
136
+ updated_at: Time.now,
137
+ }
138
+
139
+ # Return entry object
140
+ Core::Entry.new(
141
+ content: content,
142
+ title: title,
143
+ updated_at: Time.now,
144
+ size: content_size,
145
+ embedding: embedding,
146
+ metadata: stringified_metadata,
147
+ )
148
+ end
149
+ end
150
+ end
151
+
152
+ # Read content from filesystem
153
+ #
154
+ # @param file_path [String] Logical path with .md extension
155
+ # @return [String] Content
156
+ def read(file_path:)
157
+ raise ArgumentError, "file_path is required" if file_path.nil? || file_path.to_s.strip.empty?
158
+
159
+ # Check for virtual built-in entries first
160
+ if VIRTUAL_ENTRIES.key?(file_path)
161
+ entry = load_virtual_entry(file_path)
162
+ return entry.content
163
+ end
164
+
165
+ # Strip .md extension and flatten path
166
+ base_path = file_path.sub(/\.md\z/, "")
167
+ disk_path = flatten_path(base_path)
168
+ md_file = File.join(@directory, "#{disk_path}.md")
169
+
170
+ raise ArgumentError, "memory://#{file_path} not found" unless File.exist?(md_file)
171
+
172
+ content = File.read(md_file)
173
+
174
+ # Check if it's a stub (redirect)
175
+ if stub_content?(content)
176
+ target_path = extract_redirect_target(content)
177
+ return read(file_path: target_path) if target_path
178
+ end
179
+
180
+ # Increment hit counter
181
+ increment_hits(file_path)
182
+
183
+ content
184
+ end
185
+
186
+ # Read full entry with all metadata
187
+ #
188
+ # @param file_path [String] Logical path with .md extension
189
+ # @return [Core::Entry] Full entry object
190
+ def read_entry(file_path:)
191
+ raise ArgumentError, "file_path is required" if file_path.nil? || file_path.to_s.strip.empty?
192
+
193
+ # Check for virtual built-in entries first
194
+ if VIRTUAL_ENTRIES.key?(file_path)
195
+ return load_virtual_entry(file_path)
196
+ end
197
+
198
+ # Strip .md extension and flatten path
199
+ base_path = file_path.sub(/\.md\z/, "")
200
+ disk_path = flatten_path(base_path)
201
+ md_file = File.join(@directory, "#{disk_path}.md")
202
+ yaml_file = File.join(@directory, "#{disk_path}.yml")
203
+
204
+ raise ArgumentError, "memory://#{file_path} not found" unless File.exist?(md_file)
205
+
206
+ content = File.read(md_file)
207
+
208
+ # Follow stub redirect if applicable
209
+ if stub_content?(content)
210
+ target_path = extract_redirect_target(content)
211
+ return read_entry(file_path: target_path) if target_path
212
+ end
213
+
214
+ # Read metadata
215
+ yaml_data = File.exist?(yaml_file) ? YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol]) : {}
216
+
217
+ # Read embedding if exists
218
+ emb_file = File.join(@directory, "#{disk_path}.emb")
219
+ embedding = if File.exist?(emb_file)
220
+ File.read(emb_file).unpack("f*")
221
+ end
222
+
223
+ # Increment hit counter
224
+ increment_hits(file_path)
225
+
226
+ Core::Entry.new(
227
+ content: content,
228
+ title: yaml_data["title"] || "Untitled",
229
+ updated_at: parse_time(yaml_data["updated_at"]) || Time.now,
230
+ size: yaml_data["size"] || content.bytesize,
231
+ embedding: embedding,
232
+ metadata: yaml_data["metadata"],
233
+ )
234
+ end
235
+
236
+ # Delete entry from filesystem
237
+ #
238
+ # @param file_path [String] Logical path with .md extension
239
+ # @return [void]
240
+ def delete(file_path:)
241
+ with_write_lock do
242
+ @semaphore.acquire do
243
+ raise ArgumentError, "file_path is required" if file_path.nil? || file_path.to_s.strip.empty?
244
+
245
+ # Strip .md extension and flatten path
246
+ base_path = file_path.sub(/\.md\z/, "")
247
+ disk_path = flatten_path(base_path)
248
+ md_file = File.join(@directory, "#{disk_path}.md")
249
+
250
+ raise ArgumentError, "memory://#{file_path} not found" unless File.exist?(md_file)
251
+
252
+ # Get size before deletion
253
+ entry_size = get_entry_size(file_path)
254
+
255
+ # Delete all related files
256
+ File.delete(md_file) if File.exist?(md_file)
257
+ File.delete(File.join(@directory, "#{disk_path}.yaml")) if File.exist?(File.join(@directory, "#{disk_path}.yaml"))
258
+ File.delete(File.join(@directory, "#{disk_path}.emb")) if File.exist?(File.join(@directory, "#{disk_path}.emb"))
259
+
260
+ # Update total size
261
+ @total_size -= entry_size
262
+
263
+ # Update index
264
+ @index.delete(file_path)
265
+ end
266
+ end
267
+ end
268
+
269
+ # List all entries
270
+ #
271
+ # @param prefix [String, nil] Filter by prefix
272
+ # @return [Array<Hash>] Entry metadata
273
+ def list(prefix: nil)
274
+ # Find all .md files (excluding stubs)
275
+ md_files = Dir.glob(File.join(@directory, "**/*.md"))
276
+ .reject { |f| stub_file?(f) }
277
+
278
+ entries = md_files.map do |md_file|
279
+ # Calculate logical path relative to @directory
280
+ logical_path = md_file.sub("#{@directory}/", "")
281
+ base_logical_path = logical_path.sub(/\.md\z/, "")
282
+
283
+ # Filter by prefix if provided (strip .md for comparison)
284
+ next if prefix && !base_logical_path.start_with?(prefix.sub(/\.md\z/, ""))
285
+
286
+ yaml_file = md_file.sub(".md", ".yml")
287
+ yaml_data = File.exist?(yaml_file) ? YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol]) : {}
288
+
289
+ {
290
+ path: logical_path,
291
+ title: yaml_data["title"] || "Untitled",
292
+ size: yaml_data["size"] || File.size(md_file),
293
+ updated_at: parse_time(yaml_data["updated_at"]) || File.mtime(md_file),
294
+ }
295
+ end.compact
296
+
297
+ entries.sort_by { |e| e[:path] }
298
+ end
299
+
300
+ # Search by glob pattern
301
+ #
302
+ # @param pattern [String] Glob pattern (e.g., "concepts/**/*.md")
303
+ # @return [Array<Hash>] Matching entries
304
+ def glob(pattern:)
305
+ raise ArgumentError, "pattern is required" if pattern.nil? || pattern.to_s.strip.empty?
306
+
307
+ # Normalize pattern to ensure we only match .md files
308
+ # Standard glob behavior - just add .md extension intelligently
309
+ normalized_pattern = if pattern.end_with?("**")
310
+ # fact/** → fact/**/*.md (recursive match of all .md files)
311
+ "#{pattern}/*.md"
312
+ elsif pattern.end_with?("*")
313
+ # fact/* → fact/*.md (direct children .md files only)
314
+ "#{pattern}.md"
315
+ elsif pattern.end_with?(".md")
316
+ # Already has .md, use as-is
317
+ pattern
318
+ else
319
+ # No wildcard or extension, add .md
320
+ "#{pattern}.md"
321
+ end
322
+
323
+ # Use native Dir.glob with hierarchical paths - efficient!
324
+ glob_pattern = File.join(@directory, normalized_pattern)
325
+ md_files = Dir.glob(glob_pattern).reject { |f| stub_file?(f) }
326
+
327
+ results = md_files.map do |md_file|
328
+ # Calculate logical path relative to @directory
329
+ relative_path = md_file.sub("#{@directory}/", "")
330
+
331
+ yaml_file = md_file.sub(".md", ".yml")
332
+ yaml_data = File.exist?(yaml_file) ? YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol]) : {}
333
+
334
+ {
335
+ path: relative_path,
336
+ title: yaml_data["title"] || "Untitled",
337
+ size: File.size(md_file),
338
+ updated_at: parse_time(yaml_data["updated_at"]) || File.mtime(md_file),
339
+ }
340
+ end
341
+
342
+ results.sort_by { |e| -e[:updated_at].to_f }
343
+ end
344
+
345
+ # Search by content pattern
346
+ #
347
+ # Fast path: grep .yml files first (metadata)
348
+ # Fallback: grep .md files (content)
349
+ #
350
+ # @param pattern [String] Regex pattern
351
+ # @param case_insensitive [Boolean] Case-insensitive search
352
+ # @param output_mode [String] Output mode
353
+ # @return [Array<Hash>] Results
354
+ def grep(pattern:, case_insensitive: false, output_mode: "files_with_matches", path: nil)
355
+ raise ArgumentError, "pattern is required" if pattern.nil? || pattern.to_s.strip.empty?
356
+
357
+ flags = case_insensitive ? Regexp::IGNORECASE : 0
358
+ regex = Regexp.new(pattern, flags)
359
+
360
+ case output_mode
361
+ when "files_with_matches"
362
+ grep_files_with_matches(regex, path)
363
+ when "content"
364
+ grep_with_content(regex, path)
365
+ when "count"
366
+ grep_with_count(regex, path)
367
+ else
368
+ raise ArgumentError, "Invalid output_mode: #{output_mode}"
369
+ end
370
+ end
371
+
372
+ # Clear all entries
373
+ #
374
+ # @return [void]
375
+ def clear
376
+ with_write_lock do
377
+ @semaphore.acquire do
378
+ # Delete all .md, .yml, .emb files
379
+ Dir.glob(File.join(@directory, "**/*.{md,yml,emb}")).each do |file|
380
+ File.delete(file)
381
+ end
382
+
383
+ @total_size = 0
384
+ @index = {}
385
+ end
386
+ end
387
+ end
388
+
389
+ # Get current total size
390
+ #
391
+ # @return [Integer] Total size in bytes
392
+ attr_reader :total_size
393
+
394
+ # Get number of entries
395
+ #
396
+ # @return [Integer] Number of entries
397
+ def size
398
+ @index.size
399
+ end
400
+
401
+ # Get all entries (for optimization/analysis)
402
+ #
403
+ # @return [Hash<String, Core::Entry>] All entries
404
+ def all_entries
405
+ entries = {}
406
+
407
+ @index.each do |logical_path, _index_data|
408
+ entries[logical_path] = read_entry(file_path: logical_path)
409
+ rescue ArgumentError
410
+ # Skip entries that can't be read
411
+ next
412
+ end
413
+
414
+ entries
415
+ end
416
+
417
+ # Semantic search by embedding vector
418
+ #
419
+ # Searches all entries with embeddings and returns those similar to the query.
420
+ # Results are sorted by cosine similarity in descending order.
421
+ #
422
+ # @param embedding [Array<Float>] Query embedding vector
423
+ # @param top_k [Integer] Number of results to return
424
+ # @param threshold [Float] Minimum similarity score (0.0-1.0)
425
+ # @return [Array<Hash>] Results with similarity scores
426
+ #
427
+ # @example
428
+ # results = adapter.semantic_search(
429
+ # embedding: query_embedding,
430
+ # top_k: 5,
431
+ # threshold: 0.65
432
+ # )
433
+ def semantic_search(embedding:, top_k: 10, threshold: 0.0)
434
+ results = []
435
+
436
+ # Iterate all entries in the index
437
+ @index.each do |logical_path, index_data|
438
+ # Load embedding file
439
+ emb_file = File.join(@directory, "#{index_data[:disk_path]}.emb")
440
+ next unless File.exist?(emb_file)
441
+
442
+ # Read and unpack embedding
443
+ entry_embedding = File.read(emb_file).unpack("f*")
444
+
445
+ # Compute cosine similarity
446
+ similarity = cosine_similarity(embedding, entry_embedding)
447
+ next if similarity < threshold
448
+
449
+ # Load metadata from YAML
450
+ yaml_file = File.join(@directory, "#{index_data[:disk_path]}.yml")
451
+ yaml_data = if File.exist?(yaml_file)
452
+ YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol])
453
+ else
454
+ {}
455
+ end
456
+
457
+ # Build result
458
+ results << {
459
+ path: logical_path,
460
+ similarity: similarity,
461
+ title: index_data[:title],
462
+ size: index_data[:size],
463
+ updated_at: index_data[:updated_at],
464
+ metadata: yaml_data["metadata"],
465
+ }
466
+ end
467
+
468
+ # Sort by similarity descending, return top K
469
+ results.sort_by { |r| -r[:similarity] }.take(top_k)
470
+ end
471
+
472
+ private
473
+
474
+ # Calculate cosine similarity between two vectors
475
+ #
476
+ # @param a [Array<Float>] First vector
477
+ # @param b [Array<Float>] Second vector
478
+ # @return [Float] Cosine similarity (0.0-1.0)
479
+ def cosine_similarity(a, b)
480
+ dot_product = a.zip(b).sum { |x, y| x * y }
481
+ magnitude_a = Math.sqrt(a.sum { |x| x**2 })
482
+ magnitude_b = Math.sqrt(b.sum { |x| x**2 })
483
+ dot_product / (magnitude_a * magnitude_b)
484
+ end
485
+
486
+ # Load virtual built-in entry from gem files
487
+ #
488
+ # Virtual entries are stored in lib/swarm_memory/skills/ as .md/.yml pairs
489
+ # and are always available without taking user storage space.
490
+ #
491
+ # @param file_path [String] Logical path (e.g., "skill/meta/deep-learning-protocol.md")
492
+ # @return [Core::Entry] Virtual entry object
493
+ def load_virtual_entry(file_path)
494
+ basename = VIRTUAL_ENTRIES[file_path]
495
+ skills_dir = File.expand_path("../skills", __dir__)
496
+
497
+ # Load content from .md file
498
+ md_file = File.join(skills_dir, "#{basename}.md")
499
+ content = File.read(md_file)
500
+
501
+ # Load metadata from .yml file
502
+ yml_file = File.join(skills_dir, "#{basename}.yml")
503
+ yaml_data = YAML.load_file(yml_file, permitted_classes: [Time, Date, Symbol])
504
+
505
+ Core::Entry.new(
506
+ content: content,
507
+ title: yaml_data["title"],
508
+ updated_at: Time.now,
509
+ size: content.bytesize,
510
+ embedding: nil,
511
+ metadata: yaml_data,
512
+ )
513
+ end
514
+
515
+ # Flatten path for disk storage
516
+ # "concepts/ruby/classes" → "concepts--ruby--classes"
517
+ #
518
+ # @param logical_path [String] Logical path with slashes
519
+ # @return [String] Flattened path with --
520
+ # Identity function - paths are now stored hierarchically
521
+ # Kept for backward compatibility during transition
522
+ #
523
+ # @param logical_path [String] Logical path
524
+ # @return [String] Same path (no flattening)
525
+ def flatten_path(logical_path)
526
+ logical_path
527
+ end
528
+
529
+ # Identity function - paths are now stored hierarchically
530
+ # Kept for backward compatibility during transition
531
+ #
532
+ # @param disk_path [String] Disk path
533
+ # @return [String] Same path (no unflattening)
534
+ def unflatten_path(disk_path)
535
+ disk_path
536
+ end
537
+
538
+ # Check if content is a stub (redirect)
539
+ #
540
+ # @param content [String] File content
541
+ # @return [Boolean] True if stub
542
+ def stub_content?(content)
543
+ STUB_MARKERS.any? { |marker| content.start_with?(marker) }
544
+ end
545
+
546
+ # Check if file is a stub
547
+ #
548
+ # @param file_path [String] Path to .md file
549
+ # @return [Boolean] True if stub
550
+ def stub_file?(file_path)
551
+ return false unless File.exist?(file_path)
552
+
553
+ # Read first 100 bytes to check for stub markers
554
+ content = File.read(file_path, 100)
555
+ stub_content?(content)
556
+ rescue StandardError
557
+ false
558
+ end
559
+
560
+ # Extract redirect target from stub content
561
+ #
562
+ # @param content [String] Stub content
563
+ # @return [String, nil] Target path or nil
564
+ def extract_redirect_target(content)
565
+ STUB_MARKERS.each do |marker|
566
+ next unless content.start_with?(marker)
567
+
568
+ # Extract path after marker
569
+ match = content.match(/#{Regexp.escape(marker)}\s+(.+?)$/m)
570
+ return match[1].strip if match
571
+ end
572
+ nil
573
+ end
574
+
575
+ # Increment hit counter for an entry
576
+ #
577
+ # @param file_path [String] Logical path with .md extension
578
+ # @return [void]
579
+ def increment_hits(file_path)
580
+ base_path = file_path.sub(/\.md\z/, "")
581
+ disk_path = flatten_path(base_path)
582
+ yaml_file = File.join(@directory, "#{disk_path}.yml")
583
+ return unless File.exist?(yaml_file)
584
+
585
+ @semaphore.acquire do
586
+ data = YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol])
587
+ # Use string key to match the rest of the YAML file
588
+ data["hits"] = (data[:hits] || data["hits"] || 0) + 1
589
+ File.write(yaml_file, YAML.dump(data))
590
+ end
591
+ rescue StandardError => e
592
+ # Don't fail read if hit tracking fails
593
+ warn("Warning: Failed to increment hits for #{file_path}: #{e.message}")
594
+ end
595
+
596
+ # Get entry size from .yml or .md file
597
+ #
598
+ # @param file_path [String] Logical path with .md extension
599
+ # @return [Integer] Size in bytes
600
+ def get_entry_size(file_path)
601
+ base_path = file_path.sub(/\.md\z/, "")
602
+ disk_path = flatten_path(base_path)
603
+ yaml_file = File.join(@directory, "#{disk_path}.yml")
604
+
605
+ if File.exist?(yaml_file)
606
+ yaml_data = YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol])
607
+ yaml_data["size"] || 0
608
+ else
609
+ md_file = File.join(@directory, "#{disk_path}.md")
610
+ File.exist?(md_file) ? File.size(md_file) : 0
611
+ end
612
+ rescue StandardError
613
+ 0
614
+ end
615
+
616
+ # Read specific field from .yml file
617
+ #
618
+ # @param yaml_file [String] Path to .yml file
619
+ # @param field [Symbol, String] Field to read
620
+ # @return [Object, nil] Field value or nil
621
+ def read_yaml_field(yaml_file, field)
622
+ return unless File.exist?(yaml_file)
623
+
624
+ data = YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol])
625
+ # YAML files always have string keys (we stringify when writing)
626
+ data[field.to_s]
627
+ rescue StandardError
628
+ nil
629
+ end
630
+
631
+ # Build in-memory index of all entries
632
+ #
633
+ # @return [Hash] Index mapping logical_path → metadata
634
+ def build_index
635
+ index = {}
636
+ total = 0
637
+
638
+ Dir.glob(File.join(@directory, "**/*.md")).each do |md_file|
639
+ next if stub_file?(md_file)
640
+
641
+ # Calculate logical path relative to @directory
642
+ logical_path = md_file.sub("#{@directory}/", "")
643
+ base_logical_path = logical_path.sub(/\.md\z/, "")
644
+
645
+ # disk_path is now the same as base_logical_path (no flattening)
646
+ disk_path = base_logical_path
647
+
648
+ yaml_file = md_file.sub(".md", ".yml")
649
+ yaml_data = File.exist?(yaml_file) ? YAML.load_file(yaml_file, permitted_classes: [Time, Date, Symbol]) : {}
650
+
651
+ size = yaml_data["size"] || File.size(md_file)
652
+ total += size
653
+
654
+ index[logical_path] = {
655
+ disk_path: disk_path,
656
+ title: yaml_data["title"] || "Untitled",
657
+ size: size,
658
+ updated_at: parse_time(yaml_data["updated_at"]) || File.mtime(md_file),
659
+ }
660
+ end
661
+
662
+ @total_size = total
663
+ index
664
+ end
665
+
666
+ # Grep for files with matches (fast path: .yml first)
667
+ #
668
+ # @param regex [Regexp] Pattern to match
669
+ # @return [Array<String>] Matching logical paths with .md extension
670
+ def grep_files_with_matches(regex, path_filter = nil)
671
+ results = []
672
+
673
+ # Fast path: Search .yml files (metadata)
674
+ Dir.glob(File.join(@directory, "**/*.yml")).each do |yaml_file|
675
+ next if yaml_file.include?("_stubs/")
676
+
677
+ # Calculate logical path relative to @directory
678
+ logical_path = yaml_file.sub("#{@directory}/", "").sub(".yml", ".md")
679
+ next unless matches_path_filter?(logical_path, path_filter)
680
+
681
+ content = File.read(yaml_file)
682
+ next unless regex.match?(content)
683
+
684
+ results << logical_path
685
+ end
686
+
687
+ # If found in metadata, return quickly
688
+ return results.sort unless results.empty?
689
+
690
+ # Fallback: Search .md files (content)
691
+ Dir.glob(File.join(@directory, "**/*.md")).each do |md_file|
692
+ next if stub_file?(md_file)
693
+
694
+ # Calculate logical path relative to @directory
695
+ logical_path = md_file.sub("#{@directory}/", "")
696
+ next unless matches_path_filter?(logical_path, path_filter)
697
+
698
+ content = File.read(md_file)
699
+ next unless regex.match?(content)
700
+
701
+ results << logical_path
702
+ end
703
+
704
+ results.uniq.sort
705
+ end
706
+
707
+ # Grep with content and line numbers
708
+ #
709
+ # @param regex [Regexp] Pattern to match
710
+ # @param path_filter [String, nil] Optional path prefix filter
711
+ # @return [Array<Hash>] Results with matches
712
+ def grep_with_content(regex, path_filter = nil)
713
+ results = []
714
+
715
+ Dir.glob(File.join(@directory, "**/*.md")).each do |md_file|
716
+ next if stub_file?(md_file)
717
+
718
+ # Calculate logical path relative to @directory
719
+ logical_path = md_file.sub("#{@directory}/", "")
720
+ next unless matches_path_filter?(logical_path, path_filter)
721
+
722
+ content = File.read(md_file)
723
+ matching_lines = []
724
+
725
+ content.each_line.with_index(1) do |line, line_num|
726
+ matching_lines << { line_number: line_num, content: line.chomp } if regex.match?(line)
727
+ end
728
+
729
+ next if matching_lines.empty?
730
+
731
+ results << {
732
+ path: logical_path,
733
+ matches: matching_lines,
734
+ }
735
+ end
736
+
737
+ results
738
+ end
739
+
740
+ # Grep with match counts
741
+ #
742
+ # @param regex [Regexp] Pattern to match
743
+ # @param path_filter [String, nil] Optional path prefix filter
744
+ # @return [Array<Hash>] Results with counts
745
+ def grep_with_count(regex, path_filter = nil)
746
+ results = []
747
+
748
+ Dir.glob(File.join(@directory, "**/*.md")).each do |md_file|
749
+ next if stub_file?(md_file)
750
+
751
+ # Calculate logical path relative to @directory
752
+ logical_path = md_file.sub("#{@directory}/", "")
753
+ next unless matches_path_filter?(logical_path, path_filter)
754
+
755
+ content = File.read(md_file)
756
+ count = content.scan(regex).size
757
+
758
+ next if count <= 0
759
+
760
+ results << {
761
+ path: logical_path,
762
+ count: count,
763
+ }
764
+ end
765
+
766
+ results
767
+ end
768
+
769
+ # Check if a logical path matches the filter
770
+ #
771
+ # Behaves like directory/file filtering even though paths are logical.
772
+ #
773
+ # @param logical_path [String] The logical path to check (e.g., "concept/ruby/blocks.md")
774
+ # @param path_filter [String, nil] Optional path prefix filter (e.g., "concept/", "fact/api-design", "skill/ruby/lambdas.md")
775
+ # @return [Boolean] True if path matches or no filter specified
776
+ #
777
+ # @example Directory-style filtering
778
+ # matches_path_filter?("concept/ruby/blocks.md", "concept/") #=> true
779
+ # matches_path_filter?("concept/ruby/blocks.md", "concept") #=> true
780
+ # matches_path_filter?("fact/api-design/rest.md", "fact/api") #=> false (requires "fact/api/")
781
+ # matches_path_filter?("fact/api/rest-basics.md", "fact/api") #=> true
782
+ #
783
+ # @example File-specific filtering
784
+ # matches_path_filter?("concept/ruby/blocks.md", "concept/ruby/blocks.md") #=> true (exact match)
785
+ # matches_path_filter?("concept/ruby/lambdas.md", "concept/ruby/blocks.md") #=> false
786
+ def matches_path_filter?(logical_path, path_filter)
787
+ return true if path_filter.nil? || path_filter.empty?
788
+
789
+ # If filter specifies a file (ends with .md), do exact match
790
+ return logical_path == path_filter if path_filter.end_with?(".md")
791
+
792
+ # Otherwise, treat as directory path
793
+ # Normalize: ensure filter ends with "/" for proper directory matching
794
+ # This prevents "fact/api" from matching "fact/api-design/"
795
+ dir_filter = path_filter.end_with?("/") ? path_filter : "#{path_filter}/"
796
+
797
+ logical_path.start_with?(dir_filter)
798
+ end
799
+
800
+ # Calculate checksum for embedding
801
+ #
802
+ # @param embedding [Array<Float>] Embedding vector
803
+ # @return [String] Hex checksum
804
+ def checksum(embedding)
805
+ Digest::MD5.hexdigest(embedding.pack("f*"))
806
+ end
807
+
808
+ # Parse time from various formats
809
+ #
810
+ # @param value [String, Time, nil] Time value
811
+ # @return [Time, nil] Parsed time
812
+ def parse_time(value)
813
+ return if value.nil?
814
+ return value if value.is_a?(Time)
815
+
816
+ Time.parse(value.to_s)
817
+ rescue ArgumentError
818
+ nil
819
+ end
820
+
821
+ # Execute block with cross-process write lock
822
+ #
823
+ # Uses flock to ensure exclusive access across processes.
824
+ # This prevents corruption when agent writes while defrag runs.
825
+ #
826
+ # @yield Block to execute with lock held
827
+ # @return [Object] Result of block
828
+ def with_write_lock
829
+ # Open lock file (create if doesn't exist)
830
+ File.open(@lock_file_path, File::RDWR | File::CREAT, 0o644) do |lock_file|
831
+ # Acquire exclusive lock (blocks if another process has it)
832
+ lock_file.flock(File::LOCK_EX)
833
+
834
+ begin
835
+ # Execute the block with lock held
836
+ yield
837
+ ensure
838
+ # Release lock
839
+ lock_file.flock(File::LOCK_UN)
840
+ end
841
+ end
842
+ end
843
+ end
844
+ end
845
+ end