woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
@@ -0,0 +1,621 @@
1
+ # frozen_string_literal: true
2
+
3
+ # lib/tasks/woods.rake
4
+ #
5
+ # Rake tasks for codebase indexing.
6
+ # These can be run manually or integrated into CI pipelines.
7
+ #
8
+ # Usage:
9
+ # bundle exec rake woods:extract # Full extraction
10
+ # bundle exec rake woods:incremental # Changed files only
11
+ # bundle exec rake woods:extract_framework # Rails/gem sources only
12
+ # bundle exec rake woods:validate # Validate index integrity
13
+ # bundle exec rake woods:stats # Show index statistics
14
+ # bundle exec rake woods:clean # Remove index
15
+ # bundle exec rake woods:self_analyze # Analyze gem's own source
16
+ # bundle exec rake woods:flow[EntryPoint] # Generate execution flow
17
+
18
+ namespace :woods do
19
+ desc 'Full extraction of codebase for indexing'
20
+ task extract: :environment do
21
+ require 'woods/extractor'
22
+
23
+ output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
24
+
25
+ puts 'Starting full codebase extraction...'
26
+ puts "Output directory: #{output_dir}"
27
+ puts
28
+
29
+ extractor = Woods::Extractor.new(output_dir: output_dir)
30
+ results = extractor.extract_all
31
+
32
+ puts
33
+ puts 'Extraction complete!'
34
+ puts '=' * 50
35
+ results.each do |type, units|
36
+ puts " #{type.to_s.ljust(15)}: #{units.size} units"
37
+ end
38
+ puts '=' * 50
39
+ puts " Total: #{results.values.sum(&:size)} units"
40
+ puts
41
+ puts "Output written to: #{output_dir}"
42
+ end
43
+
44
+ desc 'Scan the forest — full extraction (alias for extract)'
45
+ task scan: :extract
46
+
47
+ desc 'Incremental extraction based on git changes'
48
+ task incremental: :environment do
49
+ require 'woods/extractor'
50
+
51
+ output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
52
+
53
+ # Determine changed files from CI environment or git
54
+ require 'open3'
55
+
56
+ changed_files = if ENV['CHANGED_FILES']
57
+ # Explicit list from CI
58
+ ENV['CHANGED_FILES'].split(',').map(&:strip)
59
+ elsif ENV['CI_COMMIT_BEFORE_SHA']
60
+ # GitLab CI
61
+ output, = Open3.capture2('git', 'diff', '--name-only',
62
+ "#{ENV['CI_COMMIT_BEFORE_SHA']}..#{ENV.fetch('CI_COMMIT_SHA', nil)}")
63
+ output.lines.map(&:strip)
64
+ elsif ENV['GITHUB_BASE_REF']
65
+ # GitHub Actions PR
66
+ output, = Open3.capture2('git', 'diff', '--name-only',
67
+ "origin/#{ENV['GITHUB_BASE_REF']}...HEAD")
68
+ output.lines.map(&:strip)
69
+ else
70
+ # Default: changes since last commit
71
+ output, = Open3.capture2('git', 'diff', '--name-only', 'HEAD~1')
72
+ output.lines.map(&:strip)
73
+ end
74
+
75
+ # Filter to relevant files
76
+ relevant_patterns = [
77
+ %r{^app/models/},
78
+ %r{^app/controllers/},
79
+ %r{^app/services/},
80
+ %r{^app/components/},
81
+ %r{^app/views/components/},
82
+ %r{^app/views/.*\.rb$}, # Phlex views
83
+ %r{^app/interactors/},
84
+ %r{^app/operations/},
85
+ %r{^app/commands/},
86
+ %r{^app/use_cases/},
87
+ %r{^app/jobs/},
88
+ %r{^app/workers/}, # Sidekiq workers
89
+ %r{^app/mailers/},
90
+ %r{^app/graphql/}, # GraphQL types/mutations/resolvers
91
+ %r{^app/serializers/},
92
+ %r{^app/decorators/},
93
+ %r{^app/blueprinters/},
94
+ %r{^db/migrate/},
95
+ %r{^db/schema\.rb$}, # Schema changes affect model metadata
96
+ %r{^config/routes\.rb$},
97
+ /^Gemfile\.lock$/ # Dependency changes trigger framework re-index
98
+ ]
99
+
100
+ changed_files = changed_files.select do |f|
101
+ relevant_patterns.any? { |p| f.match?(p) }
102
+ end
103
+
104
+ if changed_files.empty?
105
+ puts 'No relevant files changed. Skipping extraction.'
106
+ exit 0
107
+ end
108
+
109
+ puts "Incremental extraction for #{changed_files.size} changed files..."
110
+ changed_files.each { |f| puts " - #{f}" }
111
+ puts
112
+
113
+ extractor = Woods::Extractor.new(output_dir: output_dir)
114
+ affected = extractor.extract_changed(changed_files)
115
+
116
+ puts
117
+ puts "Re-extracted #{affected.size} affected units."
118
+ end
119
+
120
+ desc 'Tend the garden — incremental extraction (alias for incremental)'
121
+ task tend: :incremental
122
+
123
+ desc 'Extract only Rails/gem framework sources (run when dependencies change)'
124
+ task extract_framework: :environment do
125
+ require 'woods/extractors/rails_source_extractor'
126
+
127
+ output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
128
+
129
+ puts 'Extracting Rails and gem framework sources...'
130
+ puts "Rails version: #{Rails.version}"
131
+ puts
132
+
133
+ extractor = Woods::Extractors::RailsSourceExtractor.new
134
+ units = extractor.extract_all
135
+
136
+ # Write output
137
+ framework_dir = Pathname.new(output_dir).join('rails_source')
138
+ FileUtils.mkdir_p(framework_dir)
139
+
140
+ units.each do |unit|
141
+ file_name = "#{unit.identifier.gsub('/', '__').gsub('::', '__')}.json"
142
+ File.write(
143
+ framework_dir.join(file_name),
144
+ JSON.pretty_generate(unit.to_h)
145
+ )
146
+ end
147
+
148
+ puts "Extracted #{units.size} framework source units."
149
+ puts "Output: #{framework_dir}"
150
+ end
151
+
152
+ desc 'Validate extracted index integrity'
153
+ task validate: :environment do
154
+ output_dir = Pathname.new(ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods')))
155
+
156
+ unless output_dir.exist?
157
+ puts "ERROR: Index directory does not exist: #{output_dir}"
158
+ exit 1
159
+ end
160
+
161
+ manifest_path = output_dir.join('manifest.json')
162
+ unless manifest_path.exist?
163
+ puts 'ERROR: Manifest not found. Run extraction first.'
164
+ exit 1
165
+ end
166
+
167
+ manifest = JSON.parse(File.read(manifest_path))
168
+
169
+ puts 'Validating index...'
170
+ puts " Extracted at: #{manifest['extracted_at']}"
171
+ puts " Git SHA: #{manifest['git_sha']}"
172
+ puts
173
+
174
+ errors = []
175
+ warnings = []
176
+
177
+ # Check each type directory
178
+ manifest['counts'].each do |type, expected_count|
179
+ type_dir = output_dir.join(type)
180
+ unless type_dir.exist?
181
+ errors << "Missing directory: #{type}"
182
+ next
183
+ end
184
+
185
+ actual_count = Dir[type_dir.join('*.json')].reject { |f| f.end_with?('_index.json') }.size
186
+
187
+ warnings << "#{type}: expected #{expected_count}, found #{actual_count}" if actual_count != expected_count
188
+
189
+ # Validate each unit file is valid JSON
190
+ Dir[type_dir.join('*.json')].each do |file|
191
+ next if file.end_with?('_index.json')
192
+
193
+ begin
194
+ data = JSON.parse(File.read(file))
195
+ errors << "#{file}: missing identifier" unless data['identifier']
196
+ errors << "#{file}: missing source_code" unless data['source_code']
197
+ rescue JSON::ParserError => e
198
+ errors << "#{file}: invalid JSON - #{e.message}"
199
+ end
200
+ end
201
+ end
202
+
203
+ # Check dependency graph
204
+ graph_path = output_dir.join('dependency_graph.json')
205
+ if graph_path.exist?
206
+ begin
207
+ JSON.parse(File.read(graph_path))
208
+ rescue JSON::ParserError
209
+ errors << 'dependency_graph.json: invalid JSON'
210
+ end
211
+ else
212
+ errors << 'Missing dependency_graph.json'
213
+ end
214
+
215
+ # Report
216
+ if errors.any?
217
+ puts 'ERRORS:'
218
+ errors.each { |e| puts " ✗ #{e}" }
219
+ end
220
+
221
+ if warnings.any?
222
+ puts 'WARNINGS:'
223
+ warnings.each { |w| puts " ⚠ #{w}" }
224
+ end
225
+
226
+ if errors.empty? && warnings.empty?
227
+ puts '✓ Index is valid.'
228
+ elsif errors.empty?
229
+ puts "\n✓ Index is valid with #{warnings.size} warning(s)."
230
+ else
231
+ puts "\n✗ Index has #{errors.size} error(s)."
232
+ exit 1
233
+ end
234
+ end
235
+
236
+ desc 'Vet the data — validate index integrity (alias for validate)'
237
+ task vet: :validate
238
+
239
+ desc 'Show index statistics'
240
+ task stats: :environment do
241
+ output_dir = Pathname.new(ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods')))
242
+
243
+ unless output_dir.exist?
244
+ puts 'Index directory does not exist. Run extraction first.'
245
+ exit 1
246
+ end
247
+
248
+ manifest_path = output_dir.join('manifest.json')
249
+ manifest = manifest_path.exist? ? JSON.parse(File.read(manifest_path)) : {}
250
+
251
+ puts 'Woods Index Statistics'
252
+ puts '=' * 50
253
+ puts " Extracted at: #{manifest['extracted_at'] || 'unknown'}"
254
+ puts " Rails version: #{manifest['rails_version'] || 'unknown'}"
255
+ puts " Ruby version: #{manifest['ruby_version'] || 'unknown'}"
256
+ puts " Git SHA: #{manifest['git_sha'] || 'unknown'}"
257
+ puts " Git branch: #{manifest['git_branch'] || 'unknown'}"
258
+ puts
259
+
260
+ puts 'Units by Type'
261
+ puts '-' * 50
262
+
263
+ total_size = 0
264
+ total_units = 0
265
+ total_chunks = 0
266
+
267
+ (manifest['counts'] || {}).each do |type, count|
268
+ type_dir = output_dir.join(type)
269
+ next unless type_dir.exist?
270
+
271
+ type_size = Dir[type_dir.join('*.json')].sum { |f| File.size(f) }
272
+ total_size += type_size
273
+ total_units += count
274
+
275
+ # Count chunks from index
276
+ index_path = type_dir.join('_index.json')
277
+ type_chunks = 0
278
+ if index_path.exist?
279
+ index = JSON.parse(File.read(index_path))
280
+ type_chunks = index.sum { |u| u['chunk_count'] || 0 }
281
+ total_chunks += type_chunks
282
+ end
283
+
284
+ puts " #{type.ljust(15)}: #{count.to_s.rjust(4)} units, #{type_chunks.to_s.rjust(4)} chunks, #{(type_size / 1024.0).round(1).to_s.rjust(8)} KB"
285
+ end
286
+
287
+ puts '-' * 50
288
+ puts " #{'Total'.ljust(15)}: #{total_units.to_s.rjust(4)} units, #{total_chunks.to_s.rjust(4)} chunks, #{(total_size / 1024.0).round(1).to_s.rjust(8)} KB"
289
+ puts
290
+
291
+ # Dependency graph stats
292
+ graph_path = output_dir.join('dependency_graph.json')
293
+ if graph_path.exist?
294
+ graph = JSON.parse(File.read(graph_path))
295
+ stats = graph['stats'] || {}
296
+ puts 'Dependency Graph'
297
+ puts '-' * 50
298
+ puts " Nodes: #{stats['node_count'] || 'unknown'}"
299
+ puts " Edges: #{stats['edge_count'] || 'unknown'}"
300
+ end
301
+ end
302
+
303
+ desc 'Take a look — show index statistics (alias for stats)'
304
+ task look: :stats
305
+
306
+ desc 'Clean extracted index'
307
+ task clean: :environment do
308
+ output_dir = Pathname.new(ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods')))
309
+
310
+ if output_dir.exist?
311
+ puts "Removing #{output_dir}..."
312
+ FileUtils.rm_rf(output_dir)
313
+ puts 'Done.'
314
+ else
315
+ puts 'Index directory does not exist.'
316
+ end
317
+ end
318
+
319
+ desc 'Clear the brush — remove index (alias for clean)'
320
+ task clear: :clean
321
+
322
+ # Internal debugging tool — hidden from `rails -T`
323
+ task :retrieve, [:query] => :environment do |_t, args|
324
+ query = args[:query] || raise('Usage: rake woods:retrieve[query]')
325
+
326
+ require 'woods'
327
+ require 'woods/retriever'
328
+ require 'woods/embedding/provider'
329
+ require 'woods/storage/vector_store'
330
+ require 'woods/storage/metadata_store'
331
+ require 'woods/storage/graph_store'
332
+ require 'woods/formatting/human_adapter'
333
+
334
+ config = Woods.configuration
335
+
336
+ provider = Woods::Embedding::Provider::Ollama.new
337
+ vector_store = Woods::Storage::VectorStore::InMemory.new
338
+ metadata_store = Woods::Storage::MetadataStore::SQLite.new
339
+ graph_store = Woods::Storage::GraphStore::Memory.new
340
+
341
+ retriever = Woods::Retriever.new(
342
+ vector_store: vector_store,
343
+ metadata_store: metadata_store,
344
+ graph_store: graph_store,
345
+ embedding_provider: provider
346
+ )
347
+
348
+ result = retriever.retrieve(query, budget: config.max_context_tokens)
349
+
350
+ formatter = Woods::Formatting::HumanAdapter.new
351
+ puts formatter.format(result)
352
+ end
353
+
354
+ desc 'Embed all extracted units'
355
+ task embed: :environment do
356
+ require 'woods'
357
+ require 'woods/embedding/indexer'
358
+ require 'woods/embedding/text_preparer'
359
+ require 'woods/embedding/provider'
360
+ require 'woods/storage/vector_store'
361
+
362
+ config = Woods.configuration
363
+ output_dir = ENV.fetch('WOODS_OUTPUT', config.output_dir)
364
+
365
+ provider = Woods::Embedding::Provider::Ollama.new
366
+ text_preparer = Woods::Embedding::TextPreparer.new
367
+ vector_store = Woods::Storage::VectorStore::InMemory.new
368
+
369
+ indexer = Woods::Embedding::Indexer.new(
370
+ provider: provider,
371
+ text_preparer: text_preparer,
372
+ vector_store: vector_store,
373
+ output_dir: output_dir
374
+ )
375
+
376
+ puts 'Embedding all extracted units...'
377
+ stats = indexer.index_all
378
+
379
+ puts
380
+ puts 'Embedding complete!'
381
+ puts " Processed: #{stats[:processed]}"
382
+ puts " Skipped: #{stats[:skipped]}"
383
+ puts " Errors: #{stats[:errors]}"
384
+ end
385
+
386
+ desc 'Nest the data — embed all units (alias for embed)'
387
+ task nest: :embed
388
+
389
+ desc 'Embed changed units only (incremental)'
390
+ task embed_incremental: :environment do
391
+ require 'woods'
392
+ require 'woods/embedding/indexer'
393
+ require 'woods/embedding/text_preparer'
394
+ require 'woods/embedding/provider'
395
+ require 'woods/storage/vector_store'
396
+
397
+ config = Woods.configuration
398
+ output_dir = ENV.fetch('WOODS_OUTPUT', config.output_dir)
399
+
400
+ provider = Woods::Embedding::Provider::Ollama.new
401
+ text_preparer = Woods::Embedding::TextPreparer.new
402
+ vector_store = Woods::Storage::VectorStore::InMemory.new
403
+
404
+ indexer = Woods::Embedding::Indexer.new(
405
+ provider: provider,
406
+ text_preparer: text_preparer,
407
+ vector_store: vector_store,
408
+ output_dir: output_dir
409
+ )
410
+
411
+ puts 'Embedding changed units (incremental)...'
412
+ stats = indexer.index_incremental
413
+
414
+ puts
415
+ puts 'Incremental embedding complete!'
416
+ puts " Processed: #{stats[:processed]}"
417
+ puts " Skipped: #{stats[:skipped]}"
418
+ puts " Errors: #{stats[:errors]}"
419
+ end
420
+
421
+ desc 'Hone the blade — incremental embedding (alias for embed_incremental)'
422
+ task hone: :embed_incremental
423
+
424
+ # Internal debugging tool — hidden from `rails -T`
425
+ task :self_analyze do
426
+ require 'digest'
427
+ require 'json'
428
+ require 'fileutils'
429
+ require 'woods/ruby_analyzer'
430
+ require 'woods/dependency_graph'
431
+ require 'woods/graph_analyzer'
432
+ require 'woods/ruby_analyzer/mermaid_renderer'
433
+
434
+ gem_root = File.expand_path('../..', __dir__)
435
+ json_dir = File.join(gem_root, 'tmp', 'woods_self')
436
+ docs_dir = File.join(gem_root, 'docs', 'self-analysis')
437
+ manifest_path = File.join(json_dir, 'manifest.json')
438
+
439
+ # 1. Check staleness via source_checksum
440
+ lib_files = Dir.glob(File.join(gem_root, 'lib', '**', '*.rb'))
441
+ source_content = lib_files.map { |f| File.read(f) }.join
442
+ source_checksum = Digest::SHA256.hexdigest(source_content)
443
+
444
+ if File.exist?(manifest_path)
445
+ existing = JSON.parse(File.read(manifest_path))
446
+ if existing['source_checksum'] == source_checksum
447
+ puts 'Source unchanged — skipping self-analysis.'
448
+ next
449
+ end
450
+ end
451
+
452
+ puts 'Running self-analysis on gem source...'
453
+
454
+ # 2. Run RubyAnalyzer
455
+ units = Woods::RubyAnalyzer.analyze(paths: [File.join(gem_root, 'lib', 'woods')])
456
+ puts " Analyzed #{units.size} units"
457
+
458
+ # 3. Build DependencyGraph + GraphAnalyzer
459
+ graph = Woods::DependencyGraph.new
460
+ units.each { |unit| graph.register(unit) }
461
+ analyzer = Woods::GraphAnalyzer.new(graph)
462
+ analysis = analyzer.analyze
463
+ graph_data = graph.to_h
464
+
465
+ # 4. Write JSON to tmp/woods_self/
466
+ FileUtils.mkdir_p(json_dir)
467
+
468
+ units.each do |unit|
469
+ file_name = "#{unit.identifier.gsub(/[^a-zA-Z0-9_]/, '_')}.json"
470
+ File.write(
471
+ File.join(json_dir, file_name),
472
+ JSON.pretty_generate(unit.to_h)
473
+ )
474
+ end
475
+
476
+ File.write(
477
+ File.join(json_dir, 'dependency_graph.json'),
478
+ JSON.pretty_generate(graph_data)
479
+ )
480
+
481
+ File.write(
482
+ File.join(json_dir, 'analysis.json'),
483
+ JSON.pretty_generate(analysis)
484
+ )
485
+
486
+ manifest = {
487
+ 'source_checksum' => source_checksum,
488
+ 'generated_at' => Time.now.iso8601,
489
+ 'unit_count' => units.size,
490
+ 'node_count' => graph_data[:stats][:node_count],
491
+ 'edge_count' => graph_data[:stats][:edge_count]
492
+ }
493
+ File.write(manifest_path, JSON.pretty_generate(manifest))
494
+
495
+ # 5. Render Mermaid to docs/self-analysis/
496
+ FileUtils.mkdir_p(docs_dir)
497
+ renderer = Woods::RubyAnalyzer::MermaidRenderer.new
498
+
499
+ File.write(
500
+ File.join(docs_dir, 'architecture.md'),
501
+ renderer.render_architecture(units, graph_data, analysis)
502
+ )
503
+
504
+ File.write(
505
+ File.join(docs_dir, 'call-graph.md'),
506
+ "# Call Graph\n\n```mermaid\n#{renderer.render_call_graph(units)}\n```\n"
507
+ )
508
+
509
+ File.write(
510
+ File.join(docs_dir, 'dependency-map.md'),
511
+ "# Dependency Map\n\n```mermaid\n#{renderer.render_dependency_map(graph_data)}\n```\n"
512
+ )
513
+
514
+ File.write(
515
+ File.join(docs_dir, 'dataflow.md'),
516
+ "# Data Flow\n\n```mermaid\n#{renderer.render_dataflow(units)}\n```\n"
517
+ )
518
+
519
+ puts " JSON output: #{json_dir}"
520
+ puts " Mermaid docs: #{docs_dir}"
521
+ puts 'Self-analysis complete.'
522
+ end
523
+
524
+ desc 'Generate execution flow document for a Rails entry point'
525
+ task :flow, [:entry_point] => :environment do |_t, args|
526
+ require 'json'
527
+ require 'woods/flow_assembler'
528
+ require 'woods/dependency_graph'
529
+
530
+ entry_point = args[:entry_point]
531
+ unless entry_point
532
+ puts 'Usage: rake woods:flow[EntryPoint#method]'
533
+ exit 1
534
+ end
535
+
536
+ output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
537
+ graph_path = File.join(output_dir, 'dependency_graph.json')
538
+
539
+ unless File.exist?(graph_path)
540
+ puts "ERROR: Dependency graph not found at #{graph_path}"
541
+ puts 'Run woods:extract first.'
542
+ exit 1
543
+ end
544
+
545
+ graph_data = JSON.parse(File.read(graph_path))
546
+ graph = Woods::DependencyGraph.from_h(graph_data)
547
+
548
+ max_depth = ENV.fetch('MAX_DEPTH', 5).to_i
549
+ assembler = Woods::FlowAssembler.new(graph: graph, extracted_dir: output_dir)
550
+ flow = assembler.assemble(entry_point, max_depth: max_depth)
551
+
552
+ format = ENV.fetch('FORMAT', 'markdown').downcase
553
+
554
+ case format
555
+ when 'json'
556
+ puts JSON.pretty_generate(flow.to_h)
557
+ else
558
+ puts flow.to_markdown
559
+ end
560
+ end
561
+
562
+ desc 'Start the embedded console MCP server (stdio transport)'
563
+ task :console do
564
+ # Capture stdout before Rails boot to keep MCP protocol clean.
565
+ # Rails boot emits OpenTelemetry, gem warnings, etc. to stdout —
566
+ # MCP client cannot parse these as JSON-RPC.
567
+ # Global variable passes the fd to exe/woods-console via load.
568
+ $woods_protocol_out = $stdout.dup # rubocop:disable Style/GlobalVars
569
+ $stdout.reopen($stderr)
570
+
571
+ Rake::Task[:environment].invoke
572
+
573
+ load File.expand_path('../../exe/woods-console', __dir__)
574
+ end
575
+
576
+ desc 'Sync extraction data to Notion databases (Data Models + Columns)'
577
+ task notion_sync: :environment do
578
+ require 'woods/notion/exporter'
579
+
580
+ config = Woods.configuration
581
+ # Env var takes precedence over configured value
582
+ config.notion_api_token = ENV.fetch('NOTION_API_TOKEN', nil) || config.notion_api_token
583
+
584
+ unless config.notion_api_token
585
+ puts 'ERROR: Notion API token not configured.'
586
+ puts 'Set NOTION_API_TOKEN env var or configure notion_api_token in Woods.configure.'
587
+ exit 1
588
+ end
589
+
590
+ output_dir = ENV.fetch('WOODS_OUTPUT', config.output_dir)
591
+
592
+ db_ids = config.notion_database_ids || {}
593
+ if db_ids.empty?
594
+ puts 'ERROR: No Notion database IDs configured.'
595
+ puts 'Set notion_database_ids in Woods.configure:'
596
+ puts ' config.notion_database_ids = { data_models: "db-uuid", columns: "db-uuid" }'
597
+ exit 1
598
+ end
599
+
600
+ puts 'Syncing extraction data to Notion...'
601
+ puts " Output dir: #{output_dir}"
602
+ puts " Databases: #{db_ids.keys.join(', ')}"
603
+ puts
604
+
605
+ exporter = Woods::Notion::Exporter.new(index_dir: output_dir)
606
+ stats = exporter.sync_all
607
+
608
+ puts 'Sync complete!'
609
+ puts " Data Models: #{stats[:data_models]} synced"
610
+ puts " Columns: #{stats[:columns]} synced"
611
+
612
+ if stats[:errors].any?
613
+ puts " Errors: #{stats[:errors].size}"
614
+ stats[:errors].first(5).each { |e| puts " - #{e}" }
615
+ puts " ... and #{stats[:errors].size - 5} more" if stats[:errors].size > 5
616
+ end
617
+ end
618
+
619
+ desc 'Send findings from the field — sync to Notion (alias for notion_sync)'
620
+ task send: :notion_sync
621
+ end