woods 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +89 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +406 -0
- data/exe/woods-console +59 -0
- data/exe/woods-console-mcp +22 -0
- data/exe/woods-mcp +34 -0
- data/exe/woods-mcp-http +37 -0
- data/exe/woods-mcp-start +58 -0
- data/lib/generators/woods/install_generator.rb +32 -0
- data/lib/generators/woods/pgvector_generator.rb +37 -0
- data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
- data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
- data/lib/tasks/woods.rake +621 -0
- data/lib/tasks/woods_evaluation.rake +115 -0
- data/lib/woods/ast/call_site_extractor.rb +106 -0
- data/lib/woods/ast/method_extractor.rb +71 -0
- data/lib/woods/ast/node.rb +116 -0
- data/lib/woods/ast/parser.rb +614 -0
- data/lib/woods/ast.rb +6 -0
- data/lib/woods/builder.rb +200 -0
- data/lib/woods/cache/cache_middleware.rb +199 -0
- data/lib/woods/cache/cache_store.rb +264 -0
- data/lib/woods/cache/redis_cache_store.rb +116 -0
- data/lib/woods/cache/solid_cache_store.rb +111 -0
- data/lib/woods/chunking/chunk.rb +84 -0
- data/lib/woods/chunking/semantic_chunker.rb +295 -0
- data/lib/woods/console/adapters/cache_adapter.rb +58 -0
- data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
- data/lib/woods/console/adapters/job_adapter.rb +68 -0
- data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
- data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
- data/lib/woods/console/audit_logger.rb +75 -0
- data/lib/woods/console/bridge.rb +177 -0
- data/lib/woods/console/confirmation.rb +90 -0
- data/lib/woods/console/connection_manager.rb +173 -0
- data/lib/woods/console/console_response_renderer.rb +74 -0
- data/lib/woods/console/embedded_executor.rb +373 -0
- data/lib/woods/console/model_validator.rb +81 -0
- data/lib/woods/console/rack_middleware.rb +87 -0
- data/lib/woods/console/safe_context.rb +82 -0
- data/lib/woods/console/server.rb +612 -0
- data/lib/woods/console/sql_validator.rb +172 -0
- data/lib/woods/console/tools/tier1.rb +118 -0
- data/lib/woods/console/tools/tier2.rb +117 -0
- data/lib/woods/console/tools/tier3.rb +110 -0
- data/lib/woods/console/tools/tier4.rb +79 -0
- data/lib/woods/coordination/pipeline_lock.rb +109 -0
- data/lib/woods/cost_model/embedding_cost.rb +88 -0
- data/lib/woods/cost_model/estimator.rb +128 -0
- data/lib/woods/cost_model/provider_pricing.rb +67 -0
- data/lib/woods/cost_model/storage_cost.rb +52 -0
- data/lib/woods/cost_model.rb +22 -0
- data/lib/woods/db/migrations/001_create_units.rb +38 -0
- data/lib/woods/db/migrations/002_create_edges.rb +35 -0
- data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
- data/lib/woods/db/migrator.rb +73 -0
- data/lib/woods/db/schema_version.rb +73 -0
- data/lib/woods/dependency_graph.rb +236 -0
- data/lib/woods/embedding/indexer.rb +140 -0
- data/lib/woods/embedding/openai.rb +126 -0
- data/lib/woods/embedding/provider.rb +162 -0
- data/lib/woods/embedding/text_preparer.rb +112 -0
- data/lib/woods/evaluation/baseline_runner.rb +115 -0
- data/lib/woods/evaluation/evaluator.rb +139 -0
- data/lib/woods/evaluation/metrics.rb +79 -0
- data/lib/woods/evaluation/query_set.rb +148 -0
- data/lib/woods/evaluation/report_generator.rb +90 -0
- data/lib/woods/extracted_unit.rb +145 -0
- data/lib/woods/extractor.rb +1028 -0
- data/lib/woods/extractors/action_cable_extractor.rb +201 -0
- data/lib/woods/extractors/ast_source_extraction.rb +46 -0
- data/lib/woods/extractors/behavioral_profile.rb +309 -0
- data/lib/woods/extractors/caching_extractor.rb +261 -0
- data/lib/woods/extractors/callback_analyzer.rb +246 -0
- data/lib/woods/extractors/concern_extractor.rb +292 -0
- data/lib/woods/extractors/configuration_extractor.rb +219 -0
- data/lib/woods/extractors/controller_extractor.rb +404 -0
- data/lib/woods/extractors/database_view_extractor.rb +278 -0
- data/lib/woods/extractors/decorator_extractor.rb +253 -0
- data/lib/woods/extractors/engine_extractor.rb +223 -0
- data/lib/woods/extractors/event_extractor.rb +211 -0
- data/lib/woods/extractors/factory_extractor.rb +289 -0
- data/lib/woods/extractors/graphql_extractor.rb +892 -0
- data/lib/woods/extractors/i18n_extractor.rb +117 -0
- data/lib/woods/extractors/job_extractor.rb +374 -0
- data/lib/woods/extractors/lib_extractor.rb +218 -0
- data/lib/woods/extractors/mailer_extractor.rb +269 -0
- data/lib/woods/extractors/manager_extractor.rb +188 -0
- data/lib/woods/extractors/middleware_extractor.rb +133 -0
- data/lib/woods/extractors/migration_extractor.rb +469 -0
- data/lib/woods/extractors/model_extractor.rb +988 -0
- data/lib/woods/extractors/phlex_extractor.rb +252 -0
- data/lib/woods/extractors/policy_extractor.rb +191 -0
- data/lib/woods/extractors/poro_extractor.rb +229 -0
- data/lib/woods/extractors/pundit_extractor.rb +223 -0
- data/lib/woods/extractors/rails_source_extractor.rb +473 -0
- data/lib/woods/extractors/rake_task_extractor.rb +343 -0
- data/lib/woods/extractors/route_extractor.rb +181 -0
- data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/woods/extractors/serializer_extractor.rb +339 -0
- data/lib/woods/extractors/service_extractor.rb +217 -0
- data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/woods/extractors/shared_utility_methods.rb +281 -0
- data/lib/woods/extractors/state_machine_extractor.rb +398 -0
- data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
- data/lib/woods/extractors/validator_extractor.rb +211 -0
- data/lib/woods/extractors/view_component_extractor.rb +311 -0
- data/lib/woods/extractors/view_template_extractor.rb +261 -0
- data/lib/woods/feedback/gap_detector.rb +89 -0
- data/lib/woods/feedback/store.rb +119 -0
- data/lib/woods/filename_utils.rb +32 -0
- data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
- data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/woods/flow_assembler.rb +290 -0
- data/lib/woods/flow_document.rb +191 -0
- data/lib/woods/flow_precomputer.rb +102 -0
- data/lib/woods/formatting/base.rb +30 -0
- data/lib/woods/formatting/claude_adapter.rb +98 -0
- data/lib/woods/formatting/generic_adapter.rb +56 -0
- data/lib/woods/formatting/gpt_adapter.rb +64 -0
- data/lib/woods/formatting/human_adapter.rb +78 -0
- data/lib/woods/graph_analyzer.rb +374 -0
- data/lib/woods/mcp/bootstrapper.rb +96 -0
- data/lib/woods/mcp/index_reader.rb +394 -0
- data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
- data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
- data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/woods/mcp/server.rb +962 -0
- data/lib/woods/mcp/tool_response_renderer.rb +85 -0
- data/lib/woods/model_name_cache.rb +51 -0
- data/lib/woods/notion/client.rb +217 -0
- data/lib/woods/notion/exporter.rb +219 -0
- data/lib/woods/notion/mapper.rb +40 -0
- data/lib/woods/notion/mappers/column_mapper.rb +57 -0
- data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
- data/lib/woods/notion/mappers/model_mapper.rb +161 -0
- data/lib/woods/notion/mappers/shared.rb +22 -0
- data/lib/woods/notion/rate_limiter.rb +68 -0
- data/lib/woods/observability/health_check.rb +79 -0
- data/lib/woods/observability/instrumentation.rb +34 -0
- data/lib/woods/observability/structured_logger.rb +57 -0
- data/lib/woods/operator/error_escalator.rb +81 -0
- data/lib/woods/operator/pipeline_guard.rb +92 -0
- data/lib/woods/operator/status_reporter.rb +80 -0
- data/lib/woods/railtie.rb +38 -0
- data/lib/woods/resilience/circuit_breaker.rb +99 -0
- data/lib/woods/resilience/index_validator.rb +167 -0
- data/lib/woods/resilience/retryable_provider.rb +108 -0
- data/lib/woods/retrieval/context_assembler.rb +261 -0
- data/lib/woods/retrieval/query_classifier.rb +133 -0
- data/lib/woods/retrieval/ranker.rb +277 -0
- data/lib/woods/retrieval/search_executor.rb +316 -0
- data/lib/woods/retriever.rb +152 -0
- data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
- data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
- data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
- data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
- data/lib/woods/ruby_analyzer.rb +87 -0
- data/lib/woods/session_tracer/file_store.rb +104 -0
- data/lib/woods/session_tracer/middleware.rb +143 -0
- data/lib/woods/session_tracer/redis_store.rb +106 -0
- data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
- data/lib/woods/session_tracer/session_flow_document.rb +223 -0
- data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
- data/lib/woods/session_tracer/store.rb +81 -0
- data/lib/woods/storage/graph_store.rb +120 -0
- data/lib/woods/storage/metadata_store.rb +196 -0
- data/lib/woods/storage/pgvector.rb +195 -0
- data/lib/woods/storage/qdrant.rb +205 -0
- data/lib/woods/storage/vector_store.rb +167 -0
- data/lib/woods/temporal/json_snapshot_store.rb +245 -0
- data/lib/woods/temporal/snapshot_store.rb +345 -0
- data/lib/woods/token_utils.rb +19 -0
- data/lib/woods/version.rb +5 -0
- data/lib/woods.rb +246 -0
- metadata +270 -0
|
@@ -0,0 +1,621 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# lib/tasks/woods.rake
|
|
4
|
+
#
|
|
5
|
+
# Rake tasks for codebase indexing.
|
|
6
|
+
# These can be run manually or integrated into CI pipelines.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# bundle exec rake woods:extract # Full extraction
|
|
10
|
+
# bundle exec rake woods:incremental # Changed files only
|
|
11
|
+
# bundle exec rake woods:extract_framework # Rails/gem sources only
|
|
12
|
+
# bundle exec rake woods:validate # Validate index integrity
|
|
13
|
+
# bundle exec rake woods:stats # Show index statistics
|
|
14
|
+
# bundle exec rake woods:clean # Remove index
|
|
15
|
+
# bundle exec rake woods:self_analyze # Analyze gem's own source
|
|
16
|
+
# bundle exec rake woods:flow[EntryPoint] # Generate execution flow
|
|
17
|
+
|
|
18
|
+
namespace :woods do
|
|
19
|
+
desc 'Full extraction of codebase for indexing'
|
|
20
|
+
task extract: :environment do
|
|
21
|
+
require 'woods/extractor'
|
|
22
|
+
|
|
23
|
+
output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
|
|
24
|
+
|
|
25
|
+
puts 'Starting full codebase extraction...'
|
|
26
|
+
puts "Output directory: #{output_dir}"
|
|
27
|
+
puts
|
|
28
|
+
|
|
29
|
+
extractor = Woods::Extractor.new(output_dir: output_dir)
|
|
30
|
+
results = extractor.extract_all
|
|
31
|
+
|
|
32
|
+
puts
|
|
33
|
+
puts 'Extraction complete!'
|
|
34
|
+
puts '=' * 50
|
|
35
|
+
results.each do |type, units|
|
|
36
|
+
puts " #{type.to_s.ljust(15)}: #{units.size} units"
|
|
37
|
+
end
|
|
38
|
+
puts '=' * 50
|
|
39
|
+
puts " Total: #{results.values.sum(&:size)} units"
|
|
40
|
+
puts
|
|
41
|
+
puts "Output written to: #{output_dir}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
desc 'Scan the forest — full extraction (alias for extract)'
|
|
45
|
+
task scan: :extract
|
|
46
|
+
|
|
47
|
+
desc 'Incremental extraction based on git changes'
|
|
48
|
+
task incremental: :environment do
|
|
49
|
+
require 'woods/extractor'
|
|
50
|
+
|
|
51
|
+
output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
|
|
52
|
+
|
|
53
|
+
# Determine changed files from CI environment or git
|
|
54
|
+
require 'open3'
|
|
55
|
+
|
|
56
|
+
changed_files = if ENV['CHANGED_FILES']
|
|
57
|
+
# Explicit list from CI
|
|
58
|
+
ENV['CHANGED_FILES'].split(',').map(&:strip)
|
|
59
|
+
elsif ENV['CI_COMMIT_BEFORE_SHA']
|
|
60
|
+
# GitLab CI
|
|
61
|
+
output, = Open3.capture2('git', 'diff', '--name-only',
|
|
62
|
+
"#{ENV['CI_COMMIT_BEFORE_SHA']}..#{ENV.fetch('CI_COMMIT_SHA', nil)}")
|
|
63
|
+
output.lines.map(&:strip)
|
|
64
|
+
elsif ENV['GITHUB_BASE_REF']
|
|
65
|
+
# GitHub Actions PR
|
|
66
|
+
output, = Open3.capture2('git', 'diff', '--name-only',
|
|
67
|
+
"origin/#{ENV['GITHUB_BASE_REF']}...HEAD")
|
|
68
|
+
output.lines.map(&:strip)
|
|
69
|
+
else
|
|
70
|
+
# Default: changes since last commit
|
|
71
|
+
output, = Open3.capture2('git', 'diff', '--name-only', 'HEAD~1')
|
|
72
|
+
output.lines.map(&:strip)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
# Filter to relevant files
|
|
76
|
+
relevant_patterns = [
|
|
77
|
+
%r{^app/models/},
|
|
78
|
+
%r{^app/controllers/},
|
|
79
|
+
%r{^app/services/},
|
|
80
|
+
%r{^app/components/},
|
|
81
|
+
%r{^app/views/components/},
|
|
82
|
+
%r{^app/views/.*\.rb$}, # Phlex views
|
|
83
|
+
%r{^app/interactors/},
|
|
84
|
+
%r{^app/operations/},
|
|
85
|
+
%r{^app/commands/},
|
|
86
|
+
%r{^app/use_cases/},
|
|
87
|
+
%r{^app/jobs/},
|
|
88
|
+
%r{^app/workers/}, # Sidekiq workers
|
|
89
|
+
%r{^app/mailers/},
|
|
90
|
+
%r{^app/graphql/}, # GraphQL types/mutations/resolvers
|
|
91
|
+
%r{^app/serializers/},
|
|
92
|
+
%r{^app/decorators/},
|
|
93
|
+
%r{^app/blueprinters/},
|
|
94
|
+
%r{^db/migrate/},
|
|
95
|
+
%r{^db/schema\.rb$}, # Schema changes affect model metadata
|
|
96
|
+
%r{^config/routes\.rb$},
|
|
97
|
+
/^Gemfile\.lock$/ # Dependency changes trigger framework re-index
|
|
98
|
+
]
|
|
99
|
+
|
|
100
|
+
changed_files = changed_files.select do |f|
|
|
101
|
+
relevant_patterns.any? { |p| f.match?(p) }
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
if changed_files.empty?
|
|
105
|
+
puts 'No relevant files changed. Skipping extraction.'
|
|
106
|
+
exit 0
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
puts "Incremental extraction for #{changed_files.size} changed files..."
|
|
110
|
+
changed_files.each { |f| puts " - #{f}" }
|
|
111
|
+
puts
|
|
112
|
+
|
|
113
|
+
extractor = Woods::Extractor.new(output_dir: output_dir)
|
|
114
|
+
affected = extractor.extract_changed(changed_files)
|
|
115
|
+
|
|
116
|
+
puts
|
|
117
|
+
puts "Re-extracted #{affected.size} affected units."
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
desc 'Tend the garden — incremental extraction (alias for incremental)'
|
|
121
|
+
task tend: :incremental
|
|
122
|
+
|
|
123
|
+
desc 'Extract only Rails/gem framework sources (run when dependencies change)'
|
|
124
|
+
task extract_framework: :environment do
|
|
125
|
+
require 'woods/extractors/rails_source_extractor'
|
|
126
|
+
|
|
127
|
+
output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
|
|
128
|
+
|
|
129
|
+
puts 'Extracting Rails and gem framework sources...'
|
|
130
|
+
puts "Rails version: #{Rails.version}"
|
|
131
|
+
puts
|
|
132
|
+
|
|
133
|
+
extractor = Woods::Extractors::RailsSourceExtractor.new
|
|
134
|
+
units = extractor.extract_all
|
|
135
|
+
|
|
136
|
+
# Write output
|
|
137
|
+
framework_dir = Pathname.new(output_dir).join('rails_source')
|
|
138
|
+
FileUtils.mkdir_p(framework_dir)
|
|
139
|
+
|
|
140
|
+
units.each do |unit|
|
|
141
|
+
file_name = "#{unit.identifier.gsub('/', '__').gsub('::', '__')}.json"
|
|
142
|
+
File.write(
|
|
143
|
+
framework_dir.join(file_name),
|
|
144
|
+
JSON.pretty_generate(unit.to_h)
|
|
145
|
+
)
|
|
146
|
+
end
|
|
147
|
+
|
|
148
|
+
puts "Extracted #{units.size} framework source units."
|
|
149
|
+
puts "Output: #{framework_dir}"
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
desc 'Validate extracted index integrity'
|
|
153
|
+
task validate: :environment do
|
|
154
|
+
output_dir = Pathname.new(ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods')))
|
|
155
|
+
|
|
156
|
+
unless output_dir.exist?
|
|
157
|
+
puts "ERROR: Index directory does not exist: #{output_dir}"
|
|
158
|
+
exit 1
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
manifest_path = output_dir.join('manifest.json')
|
|
162
|
+
unless manifest_path.exist?
|
|
163
|
+
puts 'ERROR: Manifest not found. Run extraction first.'
|
|
164
|
+
exit 1
|
|
165
|
+
end
|
|
166
|
+
|
|
167
|
+
manifest = JSON.parse(File.read(manifest_path))
|
|
168
|
+
|
|
169
|
+
puts 'Validating index...'
|
|
170
|
+
puts " Extracted at: #{manifest['extracted_at']}"
|
|
171
|
+
puts " Git SHA: #{manifest['git_sha']}"
|
|
172
|
+
puts
|
|
173
|
+
|
|
174
|
+
errors = []
|
|
175
|
+
warnings = []
|
|
176
|
+
|
|
177
|
+
# Check each type directory
|
|
178
|
+
manifest['counts'].each do |type, expected_count|
|
|
179
|
+
type_dir = output_dir.join(type)
|
|
180
|
+
unless type_dir.exist?
|
|
181
|
+
errors << "Missing directory: #{type}"
|
|
182
|
+
next
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
actual_count = Dir[type_dir.join('*.json')].reject { |f| f.end_with?('_index.json') }.size
|
|
186
|
+
|
|
187
|
+
warnings << "#{type}: expected #{expected_count}, found #{actual_count}" if actual_count != expected_count
|
|
188
|
+
|
|
189
|
+
# Validate each unit file is valid JSON
|
|
190
|
+
Dir[type_dir.join('*.json')].each do |file|
|
|
191
|
+
next if file.end_with?('_index.json')
|
|
192
|
+
|
|
193
|
+
begin
|
|
194
|
+
data = JSON.parse(File.read(file))
|
|
195
|
+
errors << "#{file}: missing identifier" unless data['identifier']
|
|
196
|
+
errors << "#{file}: missing source_code" unless data['source_code']
|
|
197
|
+
rescue JSON::ParserError => e
|
|
198
|
+
errors << "#{file}: invalid JSON - #{e.message}"
|
|
199
|
+
end
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
# Check dependency graph
|
|
204
|
+
graph_path = output_dir.join('dependency_graph.json')
|
|
205
|
+
if graph_path.exist?
|
|
206
|
+
begin
|
|
207
|
+
JSON.parse(File.read(graph_path))
|
|
208
|
+
rescue JSON::ParserError
|
|
209
|
+
errors << 'dependency_graph.json: invalid JSON'
|
|
210
|
+
end
|
|
211
|
+
else
|
|
212
|
+
errors << 'Missing dependency_graph.json'
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Report
|
|
216
|
+
if errors.any?
|
|
217
|
+
puts 'ERRORS:'
|
|
218
|
+
errors.each { |e| puts " ✗ #{e}" }
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
if warnings.any?
|
|
222
|
+
puts 'WARNINGS:'
|
|
223
|
+
warnings.each { |w| puts " ⚠ #{w}" }
|
|
224
|
+
end
|
|
225
|
+
|
|
226
|
+
if errors.empty? && warnings.empty?
|
|
227
|
+
puts '✓ Index is valid.'
|
|
228
|
+
elsif errors.empty?
|
|
229
|
+
puts "\n✓ Index is valid with #{warnings.size} warning(s)."
|
|
230
|
+
else
|
|
231
|
+
puts "\n✗ Index has #{errors.size} error(s)."
|
|
232
|
+
exit 1
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
desc 'Vet the data — validate index integrity (alias for validate)'
|
|
237
|
+
task vet: :validate
|
|
238
|
+
|
|
239
|
+
desc 'Show index statistics'
|
|
240
|
+
task stats: :environment do
|
|
241
|
+
output_dir = Pathname.new(ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods')))
|
|
242
|
+
|
|
243
|
+
unless output_dir.exist?
|
|
244
|
+
puts 'Index directory does not exist. Run extraction first.'
|
|
245
|
+
exit 1
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
manifest_path = output_dir.join('manifest.json')
|
|
249
|
+
manifest = manifest_path.exist? ? JSON.parse(File.read(manifest_path)) : {}
|
|
250
|
+
|
|
251
|
+
puts 'Woods Index Statistics'
|
|
252
|
+
puts '=' * 50
|
|
253
|
+
puts " Extracted at: #{manifest['extracted_at'] || 'unknown'}"
|
|
254
|
+
puts " Rails version: #{manifest['rails_version'] || 'unknown'}"
|
|
255
|
+
puts " Ruby version: #{manifest['ruby_version'] || 'unknown'}"
|
|
256
|
+
puts " Git SHA: #{manifest['git_sha'] || 'unknown'}"
|
|
257
|
+
puts " Git branch: #{manifest['git_branch'] || 'unknown'}"
|
|
258
|
+
puts
|
|
259
|
+
|
|
260
|
+
puts 'Units by Type'
|
|
261
|
+
puts '-' * 50
|
|
262
|
+
|
|
263
|
+
total_size = 0
|
|
264
|
+
total_units = 0
|
|
265
|
+
total_chunks = 0
|
|
266
|
+
|
|
267
|
+
(manifest['counts'] || {}).each do |type, count|
|
|
268
|
+
type_dir = output_dir.join(type)
|
|
269
|
+
next unless type_dir.exist?
|
|
270
|
+
|
|
271
|
+
type_size = Dir[type_dir.join('*.json')].sum { |f| File.size(f) }
|
|
272
|
+
total_size += type_size
|
|
273
|
+
total_units += count
|
|
274
|
+
|
|
275
|
+
# Count chunks from index
|
|
276
|
+
index_path = type_dir.join('_index.json')
|
|
277
|
+
type_chunks = 0
|
|
278
|
+
if index_path.exist?
|
|
279
|
+
index = JSON.parse(File.read(index_path))
|
|
280
|
+
type_chunks = index.sum { |u| u['chunk_count'] || 0 }
|
|
281
|
+
total_chunks += type_chunks
|
|
282
|
+
end
|
|
283
|
+
|
|
284
|
+
puts " #{type.ljust(15)}: #{count.to_s.rjust(4)} units, #{type_chunks.to_s.rjust(4)} chunks, #{(type_size / 1024.0).round(1).to_s.rjust(8)} KB"
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
puts '-' * 50
|
|
288
|
+
puts " #{'Total'.ljust(15)}: #{total_units.to_s.rjust(4)} units, #{total_chunks.to_s.rjust(4)} chunks, #{(total_size / 1024.0).round(1).to_s.rjust(8)} KB"
|
|
289
|
+
puts
|
|
290
|
+
|
|
291
|
+
# Dependency graph stats
|
|
292
|
+
graph_path = output_dir.join('dependency_graph.json')
|
|
293
|
+
if graph_path.exist?
|
|
294
|
+
graph = JSON.parse(File.read(graph_path))
|
|
295
|
+
stats = graph['stats'] || {}
|
|
296
|
+
puts 'Dependency Graph'
|
|
297
|
+
puts '-' * 50
|
|
298
|
+
puts " Nodes: #{stats['node_count'] || 'unknown'}"
|
|
299
|
+
puts " Edges: #{stats['edge_count'] || 'unknown'}"
|
|
300
|
+
end
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
desc 'Take a look — show index statistics (alias for stats)'
|
|
304
|
+
task look: :stats
|
|
305
|
+
|
|
306
|
+
desc 'Clean extracted index'
|
|
307
|
+
task clean: :environment do
|
|
308
|
+
output_dir = Pathname.new(ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods')))
|
|
309
|
+
|
|
310
|
+
if output_dir.exist?
|
|
311
|
+
puts "Removing #{output_dir}..."
|
|
312
|
+
FileUtils.rm_rf(output_dir)
|
|
313
|
+
puts 'Done.'
|
|
314
|
+
else
|
|
315
|
+
puts 'Index directory does not exist.'
|
|
316
|
+
end
|
|
317
|
+
end
|
|
318
|
+
|
|
319
|
+
desc 'Clear the brush — remove index (alias for clean)'
|
|
320
|
+
task clear: :clean
|
|
321
|
+
|
|
322
|
+
# Internal debugging tool — hidden from `rails -T`
|
|
323
|
+
task :retrieve, [:query] => :environment do |_t, args|
|
|
324
|
+
query = args[:query] || raise('Usage: rake woods:retrieve[query]')
|
|
325
|
+
|
|
326
|
+
require 'woods'
|
|
327
|
+
require 'woods/retriever'
|
|
328
|
+
require 'woods/embedding/provider'
|
|
329
|
+
require 'woods/storage/vector_store'
|
|
330
|
+
require 'woods/storage/metadata_store'
|
|
331
|
+
require 'woods/storage/graph_store'
|
|
332
|
+
require 'woods/formatting/human_adapter'
|
|
333
|
+
|
|
334
|
+
config = Woods.configuration
|
|
335
|
+
|
|
336
|
+
provider = Woods::Embedding::Provider::Ollama.new
|
|
337
|
+
vector_store = Woods::Storage::VectorStore::InMemory.new
|
|
338
|
+
metadata_store = Woods::Storage::MetadataStore::SQLite.new
|
|
339
|
+
graph_store = Woods::Storage::GraphStore::Memory.new
|
|
340
|
+
|
|
341
|
+
retriever = Woods::Retriever.new(
|
|
342
|
+
vector_store: vector_store,
|
|
343
|
+
metadata_store: metadata_store,
|
|
344
|
+
graph_store: graph_store,
|
|
345
|
+
embedding_provider: provider
|
|
346
|
+
)
|
|
347
|
+
|
|
348
|
+
result = retriever.retrieve(query, budget: config.max_context_tokens)
|
|
349
|
+
|
|
350
|
+
formatter = Woods::Formatting::HumanAdapter.new
|
|
351
|
+
puts formatter.format(result)
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
desc 'Embed all extracted units'
|
|
355
|
+
task embed: :environment do
|
|
356
|
+
require 'woods'
|
|
357
|
+
require 'woods/embedding/indexer'
|
|
358
|
+
require 'woods/embedding/text_preparer'
|
|
359
|
+
require 'woods/embedding/provider'
|
|
360
|
+
require 'woods/storage/vector_store'
|
|
361
|
+
|
|
362
|
+
config = Woods.configuration
|
|
363
|
+
output_dir = ENV.fetch('WOODS_OUTPUT', config.output_dir)
|
|
364
|
+
|
|
365
|
+
provider = Woods::Embedding::Provider::Ollama.new
|
|
366
|
+
text_preparer = Woods::Embedding::TextPreparer.new
|
|
367
|
+
vector_store = Woods::Storage::VectorStore::InMemory.new
|
|
368
|
+
|
|
369
|
+
indexer = Woods::Embedding::Indexer.new(
|
|
370
|
+
provider: provider,
|
|
371
|
+
text_preparer: text_preparer,
|
|
372
|
+
vector_store: vector_store,
|
|
373
|
+
output_dir: output_dir
|
|
374
|
+
)
|
|
375
|
+
|
|
376
|
+
puts 'Embedding all extracted units...'
|
|
377
|
+
stats = indexer.index_all
|
|
378
|
+
|
|
379
|
+
puts
|
|
380
|
+
puts 'Embedding complete!'
|
|
381
|
+
puts " Processed: #{stats[:processed]}"
|
|
382
|
+
puts " Skipped: #{stats[:skipped]}"
|
|
383
|
+
puts " Errors: #{stats[:errors]}"
|
|
384
|
+
end
|
|
385
|
+
|
|
386
|
+
desc 'Nest the data — embed all units (alias for embed)'
|
|
387
|
+
task nest: :embed
|
|
388
|
+
|
|
389
|
+
desc 'Embed changed units only (incremental)'
|
|
390
|
+
task embed_incremental: :environment do
|
|
391
|
+
require 'woods'
|
|
392
|
+
require 'woods/embedding/indexer'
|
|
393
|
+
require 'woods/embedding/text_preparer'
|
|
394
|
+
require 'woods/embedding/provider'
|
|
395
|
+
require 'woods/storage/vector_store'
|
|
396
|
+
|
|
397
|
+
config = Woods.configuration
|
|
398
|
+
output_dir = ENV.fetch('WOODS_OUTPUT', config.output_dir)
|
|
399
|
+
|
|
400
|
+
provider = Woods::Embedding::Provider::Ollama.new
|
|
401
|
+
text_preparer = Woods::Embedding::TextPreparer.new
|
|
402
|
+
vector_store = Woods::Storage::VectorStore::InMemory.new
|
|
403
|
+
|
|
404
|
+
indexer = Woods::Embedding::Indexer.new(
|
|
405
|
+
provider: provider,
|
|
406
|
+
text_preparer: text_preparer,
|
|
407
|
+
vector_store: vector_store,
|
|
408
|
+
output_dir: output_dir
|
|
409
|
+
)
|
|
410
|
+
|
|
411
|
+
puts 'Embedding changed units (incremental)...'
|
|
412
|
+
stats = indexer.index_incremental
|
|
413
|
+
|
|
414
|
+
puts
|
|
415
|
+
puts 'Incremental embedding complete!'
|
|
416
|
+
puts " Processed: #{stats[:processed]}"
|
|
417
|
+
puts " Skipped: #{stats[:skipped]}"
|
|
418
|
+
puts " Errors: #{stats[:errors]}"
|
|
419
|
+
end
|
|
420
|
+
|
|
421
|
+
desc 'Hone the blade — incremental embedding (alias for embed_incremental)'
|
|
422
|
+
task hone: :embed_incremental
|
|
423
|
+
|
|
424
|
+
# Internal debugging tool — hidden from `rails -T`
|
|
425
|
+
task :self_analyze do
|
|
426
|
+
require 'digest'
|
|
427
|
+
require 'json'
|
|
428
|
+
require 'fileutils'
|
|
429
|
+
require 'woods/ruby_analyzer'
|
|
430
|
+
require 'woods/dependency_graph'
|
|
431
|
+
require 'woods/graph_analyzer'
|
|
432
|
+
require 'woods/ruby_analyzer/mermaid_renderer'
|
|
433
|
+
|
|
434
|
+
gem_root = File.expand_path('../..', __dir__)
|
|
435
|
+
json_dir = File.join(gem_root, 'tmp', 'woods_self')
|
|
436
|
+
docs_dir = File.join(gem_root, 'docs', 'self-analysis')
|
|
437
|
+
manifest_path = File.join(json_dir, 'manifest.json')
|
|
438
|
+
|
|
439
|
+
# 1. Check staleness via source_checksum
|
|
440
|
+
lib_files = Dir.glob(File.join(gem_root, 'lib', '**', '*.rb'))
|
|
441
|
+
source_content = lib_files.map { |f| File.read(f) }.join
|
|
442
|
+
source_checksum = Digest::SHA256.hexdigest(source_content)
|
|
443
|
+
|
|
444
|
+
if File.exist?(manifest_path)
|
|
445
|
+
existing = JSON.parse(File.read(manifest_path))
|
|
446
|
+
if existing['source_checksum'] == source_checksum
|
|
447
|
+
puts 'Source unchanged — skipping self-analysis.'
|
|
448
|
+
next
|
|
449
|
+
end
|
|
450
|
+
end
|
|
451
|
+
|
|
452
|
+
puts 'Running self-analysis on gem source...'
|
|
453
|
+
|
|
454
|
+
# 2. Run RubyAnalyzer
|
|
455
|
+
units = Woods::RubyAnalyzer.analyze(paths: [File.join(gem_root, 'lib', 'woods')])
|
|
456
|
+
puts " Analyzed #{units.size} units"
|
|
457
|
+
|
|
458
|
+
# 3. Build DependencyGraph + GraphAnalyzer
|
|
459
|
+
graph = Woods::DependencyGraph.new
|
|
460
|
+
units.each { |unit| graph.register(unit) }
|
|
461
|
+
analyzer = Woods::GraphAnalyzer.new(graph)
|
|
462
|
+
analysis = analyzer.analyze
|
|
463
|
+
graph_data = graph.to_h
|
|
464
|
+
|
|
465
|
+
# 4. Write JSON to tmp/woods_self/
|
|
466
|
+
FileUtils.mkdir_p(json_dir)
|
|
467
|
+
|
|
468
|
+
units.each do |unit|
|
|
469
|
+
file_name = "#{unit.identifier.gsub(/[^a-zA-Z0-9_]/, '_')}.json"
|
|
470
|
+
File.write(
|
|
471
|
+
File.join(json_dir, file_name),
|
|
472
|
+
JSON.pretty_generate(unit.to_h)
|
|
473
|
+
)
|
|
474
|
+
end
|
|
475
|
+
|
|
476
|
+
File.write(
|
|
477
|
+
File.join(json_dir, 'dependency_graph.json'),
|
|
478
|
+
JSON.pretty_generate(graph_data)
|
|
479
|
+
)
|
|
480
|
+
|
|
481
|
+
File.write(
|
|
482
|
+
File.join(json_dir, 'analysis.json'),
|
|
483
|
+
JSON.pretty_generate(analysis)
|
|
484
|
+
)
|
|
485
|
+
|
|
486
|
+
manifest = {
|
|
487
|
+
'source_checksum' => source_checksum,
|
|
488
|
+
'generated_at' => Time.now.iso8601,
|
|
489
|
+
'unit_count' => units.size,
|
|
490
|
+
'node_count' => graph_data[:stats][:node_count],
|
|
491
|
+
'edge_count' => graph_data[:stats][:edge_count]
|
|
492
|
+
}
|
|
493
|
+
File.write(manifest_path, JSON.pretty_generate(manifest))
|
|
494
|
+
|
|
495
|
+
# 5. Render Mermaid to docs/self-analysis/
|
|
496
|
+
FileUtils.mkdir_p(docs_dir)
|
|
497
|
+
renderer = Woods::RubyAnalyzer::MermaidRenderer.new
|
|
498
|
+
|
|
499
|
+
File.write(
|
|
500
|
+
File.join(docs_dir, 'architecture.md'),
|
|
501
|
+
renderer.render_architecture(units, graph_data, analysis)
|
|
502
|
+
)
|
|
503
|
+
|
|
504
|
+
File.write(
|
|
505
|
+
File.join(docs_dir, 'call-graph.md'),
|
|
506
|
+
"# Call Graph\n\n```mermaid\n#{renderer.render_call_graph(units)}\n```\n"
|
|
507
|
+
)
|
|
508
|
+
|
|
509
|
+
File.write(
|
|
510
|
+
File.join(docs_dir, 'dependency-map.md'),
|
|
511
|
+
"# Dependency Map\n\n```mermaid\n#{renderer.render_dependency_map(graph_data)}\n```\n"
|
|
512
|
+
)
|
|
513
|
+
|
|
514
|
+
File.write(
|
|
515
|
+
File.join(docs_dir, 'dataflow.md'),
|
|
516
|
+
"# Data Flow\n\n```mermaid\n#{renderer.render_dataflow(units)}\n```\n"
|
|
517
|
+
)
|
|
518
|
+
|
|
519
|
+
puts " JSON output: #{json_dir}"
|
|
520
|
+
puts " Mermaid docs: #{docs_dir}"
|
|
521
|
+
puts 'Self-analysis complete.'
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
desc 'Generate execution flow document for a Rails entry point'
|
|
525
|
+
task :flow, [:entry_point] => :environment do |_t, args|
|
|
526
|
+
require 'json'
|
|
527
|
+
require 'woods/flow_assembler'
|
|
528
|
+
require 'woods/dependency_graph'
|
|
529
|
+
|
|
530
|
+
entry_point = args[:entry_point]
|
|
531
|
+
unless entry_point
|
|
532
|
+
puts 'Usage: rake woods:flow[EntryPoint#method]'
|
|
533
|
+
exit 1
|
|
534
|
+
end
|
|
535
|
+
|
|
536
|
+
output_dir = ENV.fetch('WOODS_OUTPUT', Rails.root.join('tmp/woods'))
|
|
537
|
+
graph_path = File.join(output_dir, 'dependency_graph.json')
|
|
538
|
+
|
|
539
|
+
unless File.exist?(graph_path)
|
|
540
|
+
puts "ERROR: Dependency graph not found at #{graph_path}"
|
|
541
|
+
puts 'Run woods:extract first.'
|
|
542
|
+
exit 1
|
|
543
|
+
end
|
|
544
|
+
|
|
545
|
+
graph_data = JSON.parse(File.read(graph_path))
|
|
546
|
+
graph = Woods::DependencyGraph.from_h(graph_data)
|
|
547
|
+
|
|
548
|
+
max_depth = ENV.fetch('MAX_DEPTH', 5).to_i
|
|
549
|
+
assembler = Woods::FlowAssembler.new(graph: graph, extracted_dir: output_dir)
|
|
550
|
+
flow = assembler.assemble(entry_point, max_depth: max_depth)
|
|
551
|
+
|
|
552
|
+
format = ENV.fetch('FORMAT', 'markdown').downcase
|
|
553
|
+
|
|
554
|
+
case format
|
|
555
|
+
when 'json'
|
|
556
|
+
puts JSON.pretty_generate(flow.to_h)
|
|
557
|
+
else
|
|
558
|
+
puts flow.to_markdown
|
|
559
|
+
end
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
desc 'Start the embedded console MCP server (stdio transport)'
|
|
563
|
+
task :console do
|
|
564
|
+
# Capture stdout before Rails boot to keep MCP protocol clean.
|
|
565
|
+
# Rails boot emits OpenTelemetry, gem warnings, etc. to stdout —
|
|
566
|
+
# MCP client cannot parse these as JSON-RPC.
|
|
567
|
+
# Global variable passes the fd to exe/woods-console via load.
|
|
568
|
+
$woods_protocol_out = $stdout.dup # rubocop:disable Style/GlobalVars
|
|
569
|
+
$stdout.reopen($stderr)
|
|
570
|
+
|
|
571
|
+
Rake::Task[:environment].invoke
|
|
572
|
+
|
|
573
|
+
load File.expand_path('../../exe/woods-console', __dir__)
|
|
574
|
+
end
|
|
575
|
+
|
|
576
|
+
desc 'Sync extraction data to Notion databases (Data Models + Columns)'
|
|
577
|
+
task notion_sync: :environment do
|
|
578
|
+
require 'woods/notion/exporter'
|
|
579
|
+
|
|
580
|
+
config = Woods.configuration
|
|
581
|
+
# Env var takes precedence over configured value
|
|
582
|
+
config.notion_api_token = ENV.fetch('NOTION_API_TOKEN', nil) || config.notion_api_token
|
|
583
|
+
|
|
584
|
+
unless config.notion_api_token
|
|
585
|
+
puts 'ERROR: Notion API token not configured.'
|
|
586
|
+
puts 'Set NOTION_API_TOKEN env var or configure notion_api_token in Woods.configure.'
|
|
587
|
+
exit 1
|
|
588
|
+
end
|
|
589
|
+
|
|
590
|
+
output_dir = ENV.fetch('WOODS_OUTPUT', config.output_dir)
|
|
591
|
+
|
|
592
|
+
db_ids = config.notion_database_ids || {}
|
|
593
|
+
if db_ids.empty?
|
|
594
|
+
puts 'ERROR: No Notion database IDs configured.'
|
|
595
|
+
puts 'Set notion_database_ids in Woods.configure:'
|
|
596
|
+
puts ' config.notion_database_ids = { data_models: "db-uuid", columns: "db-uuid" }'
|
|
597
|
+
exit 1
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
puts 'Syncing extraction data to Notion...'
|
|
601
|
+
puts " Output dir: #{output_dir}"
|
|
602
|
+
puts " Databases: #{db_ids.keys.join(', ')}"
|
|
603
|
+
puts
|
|
604
|
+
|
|
605
|
+
exporter = Woods::Notion::Exporter.new(index_dir: output_dir)
|
|
606
|
+
stats = exporter.sync_all
|
|
607
|
+
|
|
608
|
+
puts 'Sync complete!'
|
|
609
|
+
puts " Data Models: #{stats[:data_models]} synced"
|
|
610
|
+
puts " Columns: #{stats[:columns]} synced"
|
|
611
|
+
|
|
612
|
+
if stats[:errors].any?
|
|
613
|
+
puts " Errors: #{stats[:errors].size}"
|
|
614
|
+
stats[:errors].first(5).each { |e| puts " - #{e}" }
|
|
615
|
+
puts " ... and #{stats[:errors].size - 5} more" if stats[:errors].size > 5
|
|
616
|
+
end
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
desc 'Send findings from the field — sync to Notion (alias for notion_sync)'
|
|
620
|
+
task send: :notion_sync
|
|
621
|
+
end
|