codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,583 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# lib/tasks/codebase_index.rake
|
|
4
|
+
#
|
|
5
|
+
# Rake tasks for codebase indexing.
|
|
6
|
+
# These can be run manually or integrated into CI pipelines.
|
|
7
|
+
#
|
|
8
|
+
# Usage:
|
|
9
|
+
# bundle exec rake codebase_index:extract # Full extraction
|
|
10
|
+
# bundle exec rake codebase_index:incremental # Changed files only
|
|
11
|
+
# bundle exec rake codebase_index:extract_framework # Rails/gem sources only
|
|
12
|
+
# bundle exec rake codebase_index:validate # Validate index integrity
|
|
13
|
+
# bundle exec rake codebase_index:stats # Show index statistics
|
|
14
|
+
# bundle exec rake codebase_index:clean # Remove index
|
|
15
|
+
# bundle exec rake codebase_index:self_analyze # Analyze gem's own source
|
|
16
|
+
# bundle exec rake codebase_index:flow[EntryPoint] # Generate execution flow
|
|
17
|
+
|
|
18
|
+
namespace :codebase_index do
|
|
19
|
+
desc 'Full extraction of codebase for indexing'
|
|
20
|
+
task extract: :environment do
|
|
21
|
+
require 'codebase_index/extractor'
|
|
22
|
+
|
|
23
|
+
output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
|
|
24
|
+
|
|
25
|
+
puts 'Starting full codebase extraction...'
|
|
26
|
+
puts "Output directory: #{output_dir}"
|
|
27
|
+
puts
|
|
28
|
+
|
|
29
|
+
extractor = CodebaseIndex::Extractor.new(output_dir: output_dir)
|
|
30
|
+
results = extractor.extract_all
|
|
31
|
+
|
|
32
|
+
puts
|
|
33
|
+
puts 'Extraction complete!'
|
|
34
|
+
puts '=' * 50
|
|
35
|
+
results.each do |type, units|
|
|
36
|
+
puts " #{type.to_s.ljust(15)}: #{units.size} units"
|
|
37
|
+
end
|
|
38
|
+
puts '=' * 50
|
|
39
|
+
puts " Total: #{results.values.sum(&:size)} units"
|
|
40
|
+
puts
|
|
41
|
+
puts "Output written to: #{output_dir}"
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
desc 'Incremental extraction based on git changes'
|
|
45
|
+
task incremental: :environment do
|
|
46
|
+
require 'codebase_index/extractor'
|
|
47
|
+
|
|
48
|
+
output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
|
|
49
|
+
|
|
50
|
+
# Determine changed files from CI environment or git
|
|
51
|
+
require 'open3'
|
|
52
|
+
|
|
53
|
+
changed_files = if ENV['CHANGED_FILES']
|
|
54
|
+
# Explicit list from CI
|
|
55
|
+
ENV['CHANGED_FILES'].split(',').map(&:strip)
|
|
56
|
+
elsif ENV['CI_COMMIT_BEFORE_SHA']
|
|
57
|
+
# GitLab CI
|
|
58
|
+
output, = Open3.capture2('git', 'diff', '--name-only',
|
|
59
|
+
"#{ENV['CI_COMMIT_BEFORE_SHA']}..#{ENV.fetch('CI_COMMIT_SHA', nil)}")
|
|
60
|
+
output.lines.map(&:strip)
|
|
61
|
+
elsif ENV['GITHUB_BASE_REF']
|
|
62
|
+
# GitHub Actions PR
|
|
63
|
+
output, = Open3.capture2('git', 'diff', '--name-only',
|
|
64
|
+
"origin/#{ENV['GITHUB_BASE_REF']}...HEAD")
|
|
65
|
+
output.lines.map(&:strip)
|
|
66
|
+
else
|
|
67
|
+
# Default: changes since last commit
|
|
68
|
+
output, = Open3.capture2('git', 'diff', '--name-only', 'HEAD~1')
|
|
69
|
+
output.lines.map(&:strip)
|
|
70
|
+
end
|
|
71
|
+
|
|
72
|
+
# Filter to relevant files
|
|
73
|
+
relevant_patterns = [
|
|
74
|
+
%r{^app/models/},
|
|
75
|
+
%r{^app/controllers/},
|
|
76
|
+
%r{^app/services/},
|
|
77
|
+
%r{^app/components/},
|
|
78
|
+
%r{^app/views/components/},
|
|
79
|
+
%r{^app/views/.*\.rb$}, # Phlex views
|
|
80
|
+
%r{^app/interactors/},
|
|
81
|
+
%r{^app/operations/},
|
|
82
|
+
%r{^app/commands/},
|
|
83
|
+
%r{^app/use_cases/},
|
|
84
|
+
%r{^app/jobs/},
|
|
85
|
+
%r{^app/workers/}, # Sidekiq workers
|
|
86
|
+
%r{^app/mailers/},
|
|
87
|
+
%r{^app/graphql/}, # GraphQL types/mutations/resolvers
|
|
88
|
+
%r{^app/serializers/},
|
|
89
|
+
%r{^app/decorators/},
|
|
90
|
+
%r{^app/blueprinters/},
|
|
91
|
+
%r{^db/migrate/},
|
|
92
|
+
%r{^db/schema\.rb$}, # Schema changes affect model metadata
|
|
93
|
+
%r{^config/routes\.rb$},
|
|
94
|
+
/^Gemfile\.lock$/ # Dependency changes trigger framework re-index
|
|
95
|
+
]
|
|
96
|
+
|
|
97
|
+
changed_files = changed_files.select do |f|
|
|
98
|
+
relevant_patterns.any? { |p| f.match?(p) }
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
if changed_files.empty?
|
|
102
|
+
puts 'No relevant files changed. Skipping extraction.'
|
|
103
|
+
exit 0
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
puts "Incremental extraction for #{changed_files.size} changed files..."
|
|
107
|
+
changed_files.each { |f| puts " - #{f}" }
|
|
108
|
+
puts
|
|
109
|
+
|
|
110
|
+
extractor = CodebaseIndex::Extractor.new(output_dir: output_dir)
|
|
111
|
+
affected = extractor.extract_changed(changed_files)
|
|
112
|
+
|
|
113
|
+
puts
|
|
114
|
+
puts "Re-extracted #{affected.size} affected units."
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
desc 'Extract only Rails/gem framework sources (run when dependencies change)'
|
|
118
|
+
task extract_framework: :environment do
|
|
119
|
+
require 'codebase_index/extractors/rails_source_extractor'
|
|
120
|
+
|
|
121
|
+
output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
|
|
122
|
+
|
|
123
|
+
puts 'Extracting Rails and gem framework sources...'
|
|
124
|
+
puts "Rails version: #{Rails.version}"
|
|
125
|
+
puts
|
|
126
|
+
|
|
127
|
+
extractor = CodebaseIndex::Extractors::RailsSourceExtractor.new
|
|
128
|
+
units = extractor.extract_all
|
|
129
|
+
|
|
130
|
+
# Write output
|
|
131
|
+
framework_dir = Pathname.new(output_dir).join('rails_source')
|
|
132
|
+
FileUtils.mkdir_p(framework_dir)
|
|
133
|
+
|
|
134
|
+
units.each do |unit|
|
|
135
|
+
file_name = "#{unit.identifier.gsub('/', '__').gsub('::', '__')}.json"
|
|
136
|
+
File.write(
|
|
137
|
+
framework_dir.join(file_name),
|
|
138
|
+
JSON.pretty_generate(unit.to_h)
|
|
139
|
+
)
|
|
140
|
+
end
|
|
141
|
+
|
|
142
|
+
puts "Extracted #{units.size} framework source units."
|
|
143
|
+
puts "Output: #{framework_dir}"
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
desc 'Validate extracted index integrity'
|
|
147
|
+
task validate: :environment do
|
|
148
|
+
output_dir = Pathname.new(ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index')))
|
|
149
|
+
|
|
150
|
+
unless output_dir.exist?
|
|
151
|
+
puts "ERROR: Index directory does not exist: #{output_dir}"
|
|
152
|
+
exit 1
|
|
153
|
+
end
|
|
154
|
+
|
|
155
|
+
manifest_path = output_dir.join('manifest.json')
|
|
156
|
+
unless manifest_path.exist?
|
|
157
|
+
puts 'ERROR: Manifest not found. Run extraction first.'
|
|
158
|
+
exit 1
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
manifest = JSON.parse(File.read(manifest_path))
|
|
162
|
+
|
|
163
|
+
puts 'Validating index...'
|
|
164
|
+
puts " Extracted at: #{manifest['extracted_at']}"
|
|
165
|
+
puts " Git SHA: #{manifest['git_sha']}"
|
|
166
|
+
puts
|
|
167
|
+
|
|
168
|
+
errors = []
|
|
169
|
+
warnings = []
|
|
170
|
+
|
|
171
|
+
# Check each type directory
|
|
172
|
+
manifest['counts'].each do |type, expected_count|
|
|
173
|
+
type_dir = output_dir.join(type)
|
|
174
|
+
unless type_dir.exist?
|
|
175
|
+
errors << "Missing directory: #{type}"
|
|
176
|
+
next
|
|
177
|
+
end
|
|
178
|
+
|
|
179
|
+
actual_count = Dir[type_dir.join('*.json')].reject { |f| f.end_with?('_index.json') }.size
|
|
180
|
+
|
|
181
|
+
warnings << "#{type}: expected #{expected_count}, found #{actual_count}" if actual_count != expected_count
|
|
182
|
+
|
|
183
|
+
# Validate each unit file is valid JSON
|
|
184
|
+
Dir[type_dir.join('*.json')].each do |file|
|
|
185
|
+
next if file.end_with?('_index.json')
|
|
186
|
+
|
|
187
|
+
begin
|
|
188
|
+
data = JSON.parse(File.read(file))
|
|
189
|
+
errors << "#{file}: missing identifier" unless data['identifier']
|
|
190
|
+
errors << "#{file}: missing source_code" unless data['source_code']
|
|
191
|
+
rescue JSON::ParserError => e
|
|
192
|
+
errors << "#{file}: invalid JSON - #{e.message}"
|
|
193
|
+
end
|
|
194
|
+
end
|
|
195
|
+
end
|
|
196
|
+
|
|
197
|
+
# Check dependency graph
|
|
198
|
+
graph_path = output_dir.join('dependency_graph.json')
|
|
199
|
+
if graph_path.exist?
|
|
200
|
+
begin
|
|
201
|
+
JSON.parse(File.read(graph_path))
|
|
202
|
+
rescue JSON::ParserError
|
|
203
|
+
errors << 'dependency_graph.json: invalid JSON'
|
|
204
|
+
end
|
|
205
|
+
else
|
|
206
|
+
errors << 'Missing dependency_graph.json'
|
|
207
|
+
end
|
|
208
|
+
|
|
209
|
+
# Report
|
|
210
|
+
if errors.any?
|
|
211
|
+
puts 'ERRORS:'
|
|
212
|
+
errors.each { |e| puts " ✗ #{e}" }
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
if warnings.any?
|
|
216
|
+
puts 'WARNINGS:'
|
|
217
|
+
warnings.each { |w| puts " ⚠ #{w}" }
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
if errors.empty? && warnings.empty?
|
|
221
|
+
puts '✓ Index is valid.'
|
|
222
|
+
elsif errors.empty?
|
|
223
|
+
puts "\n✓ Index is valid with #{warnings.size} warning(s)."
|
|
224
|
+
else
|
|
225
|
+
puts "\n✗ Index has #{errors.size} error(s)."
|
|
226
|
+
exit 1
|
|
227
|
+
end
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
desc 'Show index statistics'
|
|
231
|
+
task stats: :environment do
|
|
232
|
+
output_dir = Pathname.new(ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index')))
|
|
233
|
+
|
|
234
|
+
unless output_dir.exist?
|
|
235
|
+
puts 'Index directory does not exist. Run extraction first.'
|
|
236
|
+
exit 1
|
|
237
|
+
end
|
|
238
|
+
|
|
239
|
+
manifest_path = output_dir.join('manifest.json')
|
|
240
|
+
manifest = manifest_path.exist? ? JSON.parse(File.read(manifest_path)) : {}
|
|
241
|
+
|
|
242
|
+
puts 'Codebase Index Statistics'
|
|
243
|
+
puts '=' * 50
|
|
244
|
+
puts " Extracted at: #{manifest['extracted_at'] || 'unknown'}"
|
|
245
|
+
puts " Rails version: #{manifest['rails_version'] || 'unknown'}"
|
|
246
|
+
puts " Ruby version: #{manifest['ruby_version'] || 'unknown'}"
|
|
247
|
+
puts " Git SHA: #{manifest['git_sha'] || 'unknown'}"
|
|
248
|
+
puts " Git branch: #{manifest['git_branch'] || 'unknown'}"
|
|
249
|
+
puts
|
|
250
|
+
|
|
251
|
+
puts 'Units by Type'
|
|
252
|
+
puts '-' * 50
|
|
253
|
+
|
|
254
|
+
total_size = 0
|
|
255
|
+
total_units = 0
|
|
256
|
+
total_chunks = 0
|
|
257
|
+
|
|
258
|
+
(manifest['counts'] || {}).each do |type, count|
|
|
259
|
+
type_dir = output_dir.join(type)
|
|
260
|
+
next unless type_dir.exist?
|
|
261
|
+
|
|
262
|
+
type_size = Dir[type_dir.join('*.json')].sum { |f| File.size(f) }
|
|
263
|
+
total_size += type_size
|
|
264
|
+
total_units += count
|
|
265
|
+
|
|
266
|
+
# Count chunks from index
|
|
267
|
+
index_path = type_dir.join('_index.json')
|
|
268
|
+
type_chunks = 0
|
|
269
|
+
if index_path.exist?
|
|
270
|
+
index = JSON.parse(File.read(index_path))
|
|
271
|
+
type_chunks = index.sum { |u| u['chunk_count'] || 0 }
|
|
272
|
+
total_chunks += type_chunks
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
puts " #{type.ljust(15)}: #{count.to_s.rjust(4)} units, #{type_chunks.to_s.rjust(4)} chunks, #{(type_size / 1024.0).round(1).to_s.rjust(8)} KB"
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
puts '-' * 50
|
|
279
|
+
puts " #{'Total'.ljust(15)}: #{total_units.to_s.rjust(4)} units, #{total_chunks.to_s.rjust(4)} chunks, #{(total_size / 1024.0).round(1).to_s.rjust(8)} KB"
|
|
280
|
+
puts
|
|
281
|
+
|
|
282
|
+
# Dependency graph stats
|
|
283
|
+
graph_path = output_dir.join('dependency_graph.json')
|
|
284
|
+
if graph_path.exist?
|
|
285
|
+
graph = JSON.parse(File.read(graph_path))
|
|
286
|
+
stats = graph['stats'] || {}
|
|
287
|
+
puts 'Dependency Graph'
|
|
288
|
+
puts '-' * 50
|
|
289
|
+
puts " Nodes: #{stats['node_count'] || 'unknown'}"
|
|
290
|
+
puts " Edges: #{stats['edge_count'] || 'unknown'}"
|
|
291
|
+
end
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
desc 'Clean extracted index'
|
|
295
|
+
task clean: :environment do
|
|
296
|
+
output_dir = Pathname.new(ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index')))
|
|
297
|
+
|
|
298
|
+
if output_dir.exist?
|
|
299
|
+
puts "Removing #{output_dir}..."
|
|
300
|
+
FileUtils.rm_rf(output_dir)
|
|
301
|
+
puts 'Done.'
|
|
302
|
+
else
|
|
303
|
+
puts 'Index directory does not exist.'
|
|
304
|
+
end
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
desc 'Retrieve context for a query (for testing)'
|
|
308
|
+
task :retrieve, [:query] => :environment do |_t, args|
|
|
309
|
+
query = args[:query] || raise('Usage: rake codebase_index:retrieve[query]')
|
|
310
|
+
|
|
311
|
+
require 'codebase_index'
|
|
312
|
+
require 'codebase_index/retriever'
|
|
313
|
+
require 'codebase_index/embedding/provider'
|
|
314
|
+
require 'codebase_index/storage/vector_store'
|
|
315
|
+
require 'codebase_index/storage/metadata_store'
|
|
316
|
+
require 'codebase_index/storage/graph_store'
|
|
317
|
+
require 'codebase_index/formatting/human_adapter'
|
|
318
|
+
|
|
319
|
+
config = CodebaseIndex.configuration
|
|
320
|
+
|
|
321
|
+
provider = CodebaseIndex::Embedding::Provider::Ollama.new
|
|
322
|
+
vector_store = CodebaseIndex::Storage::VectorStore::InMemory.new
|
|
323
|
+
metadata_store = CodebaseIndex::Storage::MetadataStore::SQLite.new
|
|
324
|
+
graph_store = CodebaseIndex::Storage::GraphStore::Memory.new
|
|
325
|
+
|
|
326
|
+
retriever = CodebaseIndex::Retriever.new(
|
|
327
|
+
vector_store: vector_store,
|
|
328
|
+
metadata_store: metadata_store,
|
|
329
|
+
graph_store: graph_store,
|
|
330
|
+
embedding_provider: provider
|
|
331
|
+
)
|
|
332
|
+
|
|
333
|
+
result = retriever.retrieve(query, budget: config.max_context_tokens)
|
|
334
|
+
|
|
335
|
+
formatter = CodebaseIndex::Formatting::HumanAdapter.new
|
|
336
|
+
puts formatter.format(result)
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
desc 'Embed all extracted units'
|
|
340
|
+
task embed: :environment do
|
|
341
|
+
require 'codebase_index'
|
|
342
|
+
require 'codebase_index/embedding/indexer'
|
|
343
|
+
require 'codebase_index/embedding/text_preparer'
|
|
344
|
+
require 'codebase_index/embedding/provider'
|
|
345
|
+
require 'codebase_index/storage/vector_store'
|
|
346
|
+
|
|
347
|
+
config = CodebaseIndex.configuration
|
|
348
|
+
output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', config.output_dir)
|
|
349
|
+
|
|
350
|
+
provider = CodebaseIndex::Embedding::Provider::Ollama.new
|
|
351
|
+
text_preparer = CodebaseIndex::Embedding::TextPreparer.new
|
|
352
|
+
vector_store = CodebaseIndex::Storage::VectorStore::InMemory.new
|
|
353
|
+
|
|
354
|
+
indexer = CodebaseIndex::Embedding::Indexer.new(
|
|
355
|
+
provider: provider,
|
|
356
|
+
text_preparer: text_preparer,
|
|
357
|
+
vector_store: vector_store,
|
|
358
|
+
output_dir: output_dir
|
|
359
|
+
)
|
|
360
|
+
|
|
361
|
+
puts 'Embedding all extracted units...'
|
|
362
|
+
stats = indexer.index_all
|
|
363
|
+
|
|
364
|
+
puts
|
|
365
|
+
puts 'Embedding complete!'
|
|
366
|
+
puts " Processed: #{stats[:processed]}"
|
|
367
|
+
puts " Skipped: #{stats[:skipped]}"
|
|
368
|
+
puts " Errors: #{stats[:errors]}"
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
desc 'Embed changed units only (incremental)'
|
|
372
|
+
task embed_incremental: :environment do
|
|
373
|
+
require 'codebase_index'
|
|
374
|
+
require 'codebase_index/embedding/indexer'
|
|
375
|
+
require 'codebase_index/embedding/text_preparer'
|
|
376
|
+
require 'codebase_index/embedding/provider'
|
|
377
|
+
require 'codebase_index/storage/vector_store'
|
|
378
|
+
|
|
379
|
+
config = CodebaseIndex.configuration
|
|
380
|
+
output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', config.output_dir)
|
|
381
|
+
|
|
382
|
+
provider = CodebaseIndex::Embedding::Provider::Ollama.new
|
|
383
|
+
text_preparer = CodebaseIndex::Embedding::TextPreparer.new
|
|
384
|
+
vector_store = CodebaseIndex::Storage::VectorStore::InMemory.new
|
|
385
|
+
|
|
386
|
+
indexer = CodebaseIndex::Embedding::Indexer.new(
|
|
387
|
+
provider: provider,
|
|
388
|
+
text_preparer: text_preparer,
|
|
389
|
+
vector_store: vector_store,
|
|
390
|
+
output_dir: output_dir
|
|
391
|
+
)
|
|
392
|
+
|
|
393
|
+
puts 'Embedding changed units (incremental)...'
|
|
394
|
+
stats = indexer.index_incremental
|
|
395
|
+
|
|
396
|
+
puts
|
|
397
|
+
puts 'Incremental embedding complete!'
|
|
398
|
+
puts " Processed: #{stats[:processed]}"
|
|
399
|
+
puts " Skipped: #{stats[:skipped]}"
|
|
400
|
+
puts " Errors: #{stats[:errors]}"
|
|
401
|
+
end
|
|
402
|
+
|
|
403
|
+
desc "Analyze the gem's own source code and generate self-analysis output"
|
|
404
|
+
task :self_analyze do
|
|
405
|
+
require 'digest'
|
|
406
|
+
require 'json'
|
|
407
|
+
require 'fileutils'
|
|
408
|
+
require 'codebase_index/ruby_analyzer'
|
|
409
|
+
require 'codebase_index/dependency_graph'
|
|
410
|
+
require 'codebase_index/graph_analyzer'
|
|
411
|
+
require 'codebase_index/ruby_analyzer/mermaid_renderer'
|
|
412
|
+
|
|
413
|
+
gem_root = File.expand_path('../..', __dir__)
|
|
414
|
+
json_dir = File.join(gem_root, 'tmp', 'codebase_index_self')
|
|
415
|
+
docs_dir = File.join(gem_root, 'docs', 'self-analysis')
|
|
416
|
+
manifest_path = File.join(json_dir, 'manifest.json')
|
|
417
|
+
|
|
418
|
+
# 1. Check staleness via source_checksum
|
|
419
|
+
lib_files = Dir.glob(File.join(gem_root, 'lib', '**', '*.rb'))
|
|
420
|
+
source_content = lib_files.map { |f| File.read(f) }.join
|
|
421
|
+
source_checksum = Digest::SHA256.hexdigest(source_content)
|
|
422
|
+
|
|
423
|
+
if File.exist?(manifest_path)
|
|
424
|
+
existing = JSON.parse(File.read(manifest_path))
|
|
425
|
+
if existing['source_checksum'] == source_checksum
|
|
426
|
+
puts 'Source unchanged — skipping self-analysis.'
|
|
427
|
+
next
|
|
428
|
+
end
|
|
429
|
+
end
|
|
430
|
+
|
|
431
|
+
puts 'Running self-analysis on gem source...'
|
|
432
|
+
|
|
433
|
+
# 2. Run RubyAnalyzer
|
|
434
|
+
units = CodebaseIndex::RubyAnalyzer.analyze(paths: [File.join(gem_root, 'lib', 'codebase_index')])
|
|
435
|
+
puts " Analyzed #{units.size} units"
|
|
436
|
+
|
|
437
|
+
# 3. Build DependencyGraph + GraphAnalyzer
|
|
438
|
+
graph = CodebaseIndex::DependencyGraph.new
|
|
439
|
+
units.each { |unit| graph.register(unit) }
|
|
440
|
+
analyzer = CodebaseIndex::GraphAnalyzer.new(graph)
|
|
441
|
+
analysis = analyzer.analyze
|
|
442
|
+
graph_data = graph.to_h
|
|
443
|
+
|
|
444
|
+
# 4. Write JSON to tmp/codebase_index_self/
|
|
445
|
+
FileUtils.mkdir_p(json_dir)
|
|
446
|
+
|
|
447
|
+
units.each do |unit|
|
|
448
|
+
file_name = "#{unit.identifier.gsub(/[^a-zA-Z0-9_]/, '_')}.json"
|
|
449
|
+
File.write(
|
|
450
|
+
File.join(json_dir, file_name),
|
|
451
|
+
JSON.pretty_generate(unit.to_h)
|
|
452
|
+
)
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
File.write(
|
|
456
|
+
File.join(json_dir, 'dependency_graph.json'),
|
|
457
|
+
JSON.pretty_generate(graph_data)
|
|
458
|
+
)
|
|
459
|
+
|
|
460
|
+
File.write(
|
|
461
|
+
File.join(json_dir, 'analysis.json'),
|
|
462
|
+
JSON.pretty_generate(analysis)
|
|
463
|
+
)
|
|
464
|
+
|
|
465
|
+
manifest = {
|
|
466
|
+
'source_checksum' => source_checksum,
|
|
467
|
+
'generated_at' => Time.now.iso8601,
|
|
468
|
+
'unit_count' => units.size,
|
|
469
|
+
'node_count' => graph_data[:stats][:node_count],
|
|
470
|
+
'edge_count' => graph_data[:stats][:edge_count]
|
|
471
|
+
}
|
|
472
|
+
File.write(manifest_path, JSON.pretty_generate(manifest))
|
|
473
|
+
|
|
474
|
+
# 5. Render Mermaid to docs/self-analysis/
|
|
475
|
+
FileUtils.mkdir_p(docs_dir)
|
|
476
|
+
renderer = CodebaseIndex::RubyAnalyzer::MermaidRenderer.new
|
|
477
|
+
|
|
478
|
+
File.write(
|
|
479
|
+
File.join(docs_dir, 'architecture.md'),
|
|
480
|
+
renderer.render_architecture(units, graph_data, analysis)
|
|
481
|
+
)
|
|
482
|
+
|
|
483
|
+
File.write(
|
|
484
|
+
File.join(docs_dir, 'call-graph.md'),
|
|
485
|
+
"# Call Graph\n\n```mermaid\n#{renderer.render_call_graph(units)}\n```\n"
|
|
486
|
+
)
|
|
487
|
+
|
|
488
|
+
File.write(
|
|
489
|
+
File.join(docs_dir, 'dependency-map.md'),
|
|
490
|
+
"# Dependency Map\n\n```mermaid\n#{renderer.render_dependency_map(graph_data)}\n```\n"
|
|
491
|
+
)
|
|
492
|
+
|
|
493
|
+
File.write(
|
|
494
|
+
File.join(docs_dir, 'dataflow.md'),
|
|
495
|
+
"# Data Flow\n\n```mermaid\n#{renderer.render_dataflow(units)}\n```\n"
|
|
496
|
+
)
|
|
497
|
+
|
|
498
|
+
puts " JSON output: #{json_dir}"
|
|
499
|
+
puts " Mermaid docs: #{docs_dir}"
|
|
500
|
+
puts 'Self-analysis complete.'
|
|
501
|
+
end
|
|
502
|
+
|
|
503
|
+
desc 'Generate execution flow document for a Rails entry point'
|
|
504
|
+
task :flow, [:entry_point] => :environment do |_t, args|
|
|
505
|
+
require 'json'
|
|
506
|
+
require 'codebase_index/flow_assembler'
|
|
507
|
+
require 'codebase_index/dependency_graph'
|
|
508
|
+
|
|
509
|
+
entry_point = args[:entry_point]
|
|
510
|
+
unless entry_point
|
|
511
|
+
puts 'Usage: rake codebase_index:flow[EntryPoint#method]'
|
|
512
|
+
exit 1
|
|
513
|
+
end
|
|
514
|
+
|
|
515
|
+
output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
|
|
516
|
+
graph_path = File.join(output_dir, 'dependency_graph.json')
|
|
517
|
+
|
|
518
|
+
unless File.exist?(graph_path)
|
|
519
|
+
puts "ERROR: Dependency graph not found at #{graph_path}"
|
|
520
|
+
puts 'Run codebase_index:extract first.'
|
|
521
|
+
exit 1
|
|
522
|
+
end
|
|
523
|
+
|
|
524
|
+
graph_data = JSON.parse(File.read(graph_path))
|
|
525
|
+
graph = CodebaseIndex::DependencyGraph.from_h(graph_data)
|
|
526
|
+
|
|
527
|
+
max_depth = ENV.fetch('MAX_DEPTH', 5).to_i
|
|
528
|
+
assembler = CodebaseIndex::FlowAssembler.new(graph: graph, extracted_dir: output_dir)
|
|
529
|
+
flow = assembler.assemble(entry_point, max_depth: max_depth)
|
|
530
|
+
|
|
531
|
+
format = ENV.fetch('FORMAT', 'markdown').downcase
|
|
532
|
+
|
|
533
|
+
case format
|
|
534
|
+
when 'json'
|
|
535
|
+
puts JSON.pretty_generate(flow.to_h)
|
|
536
|
+
else
|
|
537
|
+
puts flow.to_markdown
|
|
538
|
+
end
|
|
539
|
+
end
|
|
540
|
+
|
|
541
|
+
desc 'Sync extraction data to Notion databases (Data Models + Columns)'
|
|
542
|
+
task notion_sync: :environment do
|
|
543
|
+
require 'codebase_index/notion/exporter'
|
|
544
|
+
|
|
545
|
+
config = CodebaseIndex.configuration
|
|
546
|
+
# Env var takes precedence over configured value
|
|
547
|
+
config.notion_api_token = ENV.fetch('NOTION_API_TOKEN', nil) || config.notion_api_token
|
|
548
|
+
|
|
549
|
+
unless config.notion_api_token
|
|
550
|
+
puts 'ERROR: Notion API token not configured.'
|
|
551
|
+
puts 'Set NOTION_API_TOKEN env var or configure notion_api_token in CodebaseIndex.configure.'
|
|
552
|
+
exit 1
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', config.output_dir)
|
|
556
|
+
|
|
557
|
+
db_ids = config.notion_database_ids || {}
|
|
558
|
+
if db_ids.empty?
|
|
559
|
+
puts 'ERROR: No Notion database IDs configured.'
|
|
560
|
+
puts 'Set notion_database_ids in CodebaseIndex.configure:'
|
|
561
|
+
puts ' config.notion_database_ids = { data_models: "db-uuid", columns: "db-uuid" }'
|
|
562
|
+
exit 1
|
|
563
|
+
end
|
|
564
|
+
|
|
565
|
+
puts 'Syncing extraction data to Notion...'
|
|
566
|
+
puts " Output dir: #{output_dir}"
|
|
567
|
+
puts " Databases: #{db_ids.keys.join(', ')}"
|
|
568
|
+
puts
|
|
569
|
+
|
|
570
|
+
exporter = CodebaseIndex::Notion::Exporter.new(index_dir: output_dir)
|
|
571
|
+
stats = exporter.sync_all
|
|
572
|
+
|
|
573
|
+
puts 'Sync complete!'
|
|
574
|
+
puts " Data Models: #{stats[:data_models]} synced"
|
|
575
|
+
puts " Columns: #{stats[:columns]} synced"
|
|
576
|
+
|
|
577
|
+
if stats[:errors].any?
|
|
578
|
+
puts " Errors: #{stats[:errors].size}"
|
|
579
|
+
stats[:errors].first(5).each { |e| puts " - #{e}" }
|
|
580
|
+
puts " ... and #{stats[:errors].size - 5} more" if stats[:errors].size > 5
|
|
581
|
+
end
|
|
582
|
+
end
|
|
583
|
+
end
|
|
@@ -0,0 +1,115 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
# lib/tasks/codebase_index_evaluation.rake
|
|
4
|
+
#
|
|
5
|
+
# Rake tasks for evaluating retrieval quality.
|
|
6
|
+
#
|
|
7
|
+
# Usage:
|
|
8
|
+
# bundle exec rake codebase_index:evaluate # Run evaluation
|
|
9
|
+
# bundle exec rake codebase_index:evaluate:baseline[grep] # Run baseline comparison
|
|
10
|
+
|
|
11
|
+
namespace :codebase_index do
|
|
12
|
+
desc 'Run evaluation queries against the retrieval pipeline'
|
|
13
|
+
task evaluate: :environment do
|
|
14
|
+
require 'codebase_index/retriever'
|
|
15
|
+
require 'codebase_index/evaluation/query_set'
|
|
16
|
+
require 'codebase_index/evaluation/evaluator'
|
|
17
|
+
require 'codebase_index/evaluation/report_generator'
|
|
18
|
+
|
|
19
|
+
run_evaluation
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
namespace :evaluate do
|
|
23
|
+
desc 'Run baseline comparison'
|
|
24
|
+
task :baseline, [:strategy] => :environment do |_t, args|
|
|
25
|
+
require 'codebase_index/evaluation/query_set'
|
|
26
|
+
require 'codebase_index/evaluation/baseline_runner'
|
|
27
|
+
require 'codebase_index/evaluation/metrics'
|
|
28
|
+
|
|
29
|
+
run_baseline(args)
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
def run_evaluation
|
|
35
|
+
query_set_path = ENV.fetch('EVAL_QUERY_SET', 'config/eval_queries.json')
|
|
36
|
+
output_path = ENV.fetch('EVAL_OUTPUT', 'tmp/eval_report.json')
|
|
37
|
+
budget = ENV.fetch('EVAL_BUDGET', '8000').to_i
|
|
38
|
+
|
|
39
|
+
puts "Loading query set from: #{query_set_path}"
|
|
40
|
+
query_set = CodebaseIndex::Evaluation::QuerySet.load(query_set_path)
|
|
41
|
+
puts "Loaded #{query_set.size} queries — building retriever..."
|
|
42
|
+
|
|
43
|
+
evaluator = CodebaseIndex::Evaluation::Evaluator.new(
|
|
44
|
+
retriever: build_eval_retriever, query_set: query_set, budget: budget
|
|
45
|
+
)
|
|
46
|
+
report = evaluator.evaluate
|
|
47
|
+
|
|
48
|
+
CodebaseIndex::Evaluation::ReportGenerator.new
|
|
49
|
+
.save(report, output_path, metadata: { 'query_set' => query_set_path })
|
|
50
|
+
|
|
51
|
+
print_eval_report(report, output_path)
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def run_baseline(args)
|
|
55
|
+
strategy = (args[:strategy] || ENV.fetch('EVAL_BASELINE_STRATEGY', 'grep')).to_sym
|
|
56
|
+
query_set_path = ENV.fetch('EVAL_QUERY_SET', 'config/eval_queries.json')
|
|
57
|
+
limit = ENV.fetch('EVAL_BASELINE_LIMIT', '10').to_i
|
|
58
|
+
|
|
59
|
+
puts "Loading query set from: #{query_set_path}"
|
|
60
|
+
query_set = CodebaseIndex::Evaluation::QuerySet.load(query_set_path)
|
|
61
|
+
puts "Running #{strategy} baseline (limit: #{limit})..."
|
|
62
|
+
|
|
63
|
+
runner = CodebaseIndex::Evaluation::BaselineRunner.new(
|
|
64
|
+
metadata_store: CodebaseIndex.metadata_store
|
|
65
|
+
)
|
|
66
|
+
|
|
67
|
+
totals = compute_baseline_totals(query_set, runner, strategy, limit)
|
|
68
|
+
print_baseline_report(strategy, query_set.size, totals)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
def compute_baseline_totals(query_set, runner, strategy, limit)
|
|
72
|
+
total_mrr = 0.0
|
|
73
|
+
total_recall = 0.0
|
|
74
|
+
|
|
75
|
+
query_set.queries.each do |query|
|
|
76
|
+
results = runner.run(query.query, strategy: strategy, limit: limit)
|
|
77
|
+
total_mrr += CodebaseIndex::Evaluation::Metrics.mrr(results, query.expected_units)
|
|
78
|
+
total_recall += CodebaseIndex::Evaluation::Metrics.recall(results, query.expected_units)
|
|
79
|
+
end
|
|
80
|
+
|
|
81
|
+
{ mrr: total_mrr, recall: total_recall }
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
def print_eval_report(report, output_path)
|
|
85
|
+
puts
|
|
86
|
+
puts 'Evaluation complete!'
|
|
87
|
+
puts '=' * 50
|
|
88
|
+
report.aggregates.each do |key, value|
|
|
89
|
+
formatted = value.is_a?(Float) ? format('%.4f', value) : value.to_s
|
|
90
|
+
puts " #{key.to_s.ljust(25)}: #{formatted}"
|
|
91
|
+
end
|
|
92
|
+
puts '=' * 50
|
|
93
|
+
puts "Report saved to: #{output_path}"
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
def print_baseline_report(strategy, count, totals)
|
|
97
|
+
puts
|
|
98
|
+
puts "Baseline: #{strategy}"
|
|
99
|
+
puts '=' * 50
|
|
100
|
+
puts " Mean MRR: #{format('%.4f', count.positive? ? totals[:mrr] / count : 0.0)}"
|
|
101
|
+
puts " Mean Recall: #{format('%.4f', count.positive? ? totals[:recall] / count : 0.0)}"
|
|
102
|
+
puts '=' * 50
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
# Build a retriever for evaluation (requires Rails environment with stores configured).
|
|
106
|
+
#
|
|
107
|
+
# @return [CodebaseIndex::Retriever]
|
|
108
|
+
def build_eval_retriever
|
|
109
|
+
CodebaseIndex::Retriever.new(
|
|
110
|
+
vector_store: CodebaseIndex.vector_store,
|
|
111
|
+
metadata_store: CodebaseIndex.metadata_store,
|
|
112
|
+
graph_store: CodebaseIndex.graph_store,
|
|
113
|
+
embedding_provider: CodebaseIndex.embedding_provider
|
|
114
|
+
)
|
|
115
|
+
end
|