codebase_index 0.3.2 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (184) hide show
  1. checksums.yaml +4 -4
  2. data/lib/codebase_index.rb +3 -243
  3. metadata +28 -223
  4. data/CHANGELOG.md +0 -89
  5. data/CODE_OF_CONDUCT.md +0 -83
  6. data/CONTRIBUTING.md +0 -65
  7. data/LICENSE.txt +0 -21
  8. data/README.md +0 -325
  9. data/exe/codebase-console +0 -59
  10. data/exe/codebase-console-mcp +0 -22
  11. data/exe/codebase-index-mcp +0 -34
  12. data/exe/codebase-index-mcp-http +0 -37
  13. data/exe/codebase-index-mcp-start +0 -58
  14. data/lib/codebase_index/ast/call_site_extractor.rb +0 -106
  15. data/lib/codebase_index/ast/method_extractor.rb +0 -71
  16. data/lib/codebase_index/ast/node.rb +0 -116
  17. data/lib/codebase_index/ast/parser.rb +0 -614
  18. data/lib/codebase_index/ast.rb +0 -6
  19. data/lib/codebase_index/builder.rb +0 -200
  20. data/lib/codebase_index/cache/cache_middleware.rb +0 -199
  21. data/lib/codebase_index/cache/cache_store.rb +0 -264
  22. data/lib/codebase_index/cache/redis_cache_store.rb +0 -116
  23. data/lib/codebase_index/cache/solid_cache_store.rb +0 -111
  24. data/lib/codebase_index/chunking/chunk.rb +0 -84
  25. data/lib/codebase_index/chunking/semantic_chunker.rb +0 -295
  26. data/lib/codebase_index/console/adapters/cache_adapter.rb +0 -58
  27. data/lib/codebase_index/console/adapters/good_job_adapter.rb +0 -33
  28. data/lib/codebase_index/console/adapters/job_adapter.rb +0 -68
  29. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +0 -33
  30. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +0 -33
  31. data/lib/codebase_index/console/audit_logger.rb +0 -75
  32. data/lib/codebase_index/console/bridge.rb +0 -177
  33. data/lib/codebase_index/console/confirmation.rb +0 -90
  34. data/lib/codebase_index/console/connection_manager.rb +0 -173
  35. data/lib/codebase_index/console/console_response_renderer.rb +0 -74
  36. data/lib/codebase_index/console/embedded_executor.rb +0 -373
  37. data/lib/codebase_index/console/model_validator.rb +0 -81
  38. data/lib/codebase_index/console/rack_middleware.rb +0 -87
  39. data/lib/codebase_index/console/safe_context.rb +0 -82
  40. data/lib/codebase_index/console/server.rb +0 -612
  41. data/lib/codebase_index/console/sql_validator.rb +0 -172
  42. data/lib/codebase_index/console/tools/tier1.rb +0 -118
  43. data/lib/codebase_index/console/tools/tier2.rb +0 -117
  44. data/lib/codebase_index/console/tools/tier3.rb +0 -110
  45. data/lib/codebase_index/console/tools/tier4.rb +0 -79
  46. data/lib/codebase_index/coordination/pipeline_lock.rb +0 -109
  47. data/lib/codebase_index/cost_model/embedding_cost.rb +0 -88
  48. data/lib/codebase_index/cost_model/estimator.rb +0 -128
  49. data/lib/codebase_index/cost_model/provider_pricing.rb +0 -67
  50. data/lib/codebase_index/cost_model/storage_cost.rb +0 -52
  51. data/lib/codebase_index/cost_model.rb +0 -22
  52. data/lib/codebase_index/db/migrations/001_create_units.rb +0 -38
  53. data/lib/codebase_index/db/migrations/002_create_edges.rb +0 -35
  54. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +0 -37
  55. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +0 -45
  56. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +0 -40
  57. data/lib/codebase_index/db/migrator.rb +0 -71
  58. data/lib/codebase_index/db/schema_version.rb +0 -73
  59. data/lib/codebase_index/dependency_graph.rb +0 -236
  60. data/lib/codebase_index/embedding/indexer.rb +0 -140
  61. data/lib/codebase_index/embedding/openai.rb +0 -126
  62. data/lib/codebase_index/embedding/provider.rb +0 -162
  63. data/lib/codebase_index/embedding/text_preparer.rb +0 -112
  64. data/lib/codebase_index/evaluation/baseline_runner.rb +0 -115
  65. data/lib/codebase_index/evaluation/evaluator.rb +0 -139
  66. data/lib/codebase_index/evaluation/metrics.rb +0 -79
  67. data/lib/codebase_index/evaluation/query_set.rb +0 -148
  68. data/lib/codebase_index/evaluation/report_generator.rb +0 -90
  69. data/lib/codebase_index/extracted_unit.rb +0 -145
  70. data/lib/codebase_index/extractor.rb +0 -1028
  71. data/lib/codebase_index/extractors/action_cable_extractor.rb +0 -201
  72. data/lib/codebase_index/extractors/ast_source_extraction.rb +0 -46
  73. data/lib/codebase_index/extractors/behavioral_profile.rb +0 -309
  74. data/lib/codebase_index/extractors/caching_extractor.rb +0 -261
  75. data/lib/codebase_index/extractors/callback_analyzer.rb +0 -246
  76. data/lib/codebase_index/extractors/concern_extractor.rb +0 -292
  77. data/lib/codebase_index/extractors/configuration_extractor.rb +0 -219
  78. data/lib/codebase_index/extractors/controller_extractor.rb +0 -404
  79. data/lib/codebase_index/extractors/database_view_extractor.rb +0 -278
  80. data/lib/codebase_index/extractors/decorator_extractor.rb +0 -253
  81. data/lib/codebase_index/extractors/engine_extractor.rb +0 -223
  82. data/lib/codebase_index/extractors/event_extractor.rb +0 -211
  83. data/lib/codebase_index/extractors/factory_extractor.rb +0 -289
  84. data/lib/codebase_index/extractors/graphql_extractor.rb +0 -892
  85. data/lib/codebase_index/extractors/i18n_extractor.rb +0 -117
  86. data/lib/codebase_index/extractors/job_extractor.rb +0 -374
  87. data/lib/codebase_index/extractors/lib_extractor.rb +0 -218
  88. data/lib/codebase_index/extractors/mailer_extractor.rb +0 -269
  89. data/lib/codebase_index/extractors/manager_extractor.rb +0 -188
  90. data/lib/codebase_index/extractors/middleware_extractor.rb +0 -133
  91. data/lib/codebase_index/extractors/migration_extractor.rb +0 -469
  92. data/lib/codebase_index/extractors/model_extractor.rb +0 -988
  93. data/lib/codebase_index/extractors/phlex_extractor.rb +0 -252
  94. data/lib/codebase_index/extractors/policy_extractor.rb +0 -191
  95. data/lib/codebase_index/extractors/poro_extractor.rb +0 -229
  96. data/lib/codebase_index/extractors/pundit_extractor.rb +0 -223
  97. data/lib/codebase_index/extractors/rails_source_extractor.rb +0 -473
  98. data/lib/codebase_index/extractors/rake_task_extractor.rb +0 -343
  99. data/lib/codebase_index/extractors/route_extractor.rb +0 -181
  100. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +0 -331
  101. data/lib/codebase_index/extractors/serializer_extractor.rb +0 -339
  102. data/lib/codebase_index/extractors/service_extractor.rb +0 -217
  103. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +0 -91
  104. data/lib/codebase_index/extractors/shared_utility_methods.rb +0 -281
  105. data/lib/codebase_index/extractors/state_machine_extractor.rb +0 -398
  106. data/lib/codebase_index/extractors/test_mapping_extractor.rb +0 -225
  107. data/lib/codebase_index/extractors/validator_extractor.rb +0 -211
  108. data/lib/codebase_index/extractors/view_component_extractor.rb +0 -311
  109. data/lib/codebase_index/extractors/view_template_extractor.rb +0 -261
  110. data/lib/codebase_index/feedback/gap_detector.rb +0 -89
  111. data/lib/codebase_index/feedback/store.rb +0 -119
  112. data/lib/codebase_index/filename_utils.rb +0 -32
  113. data/lib/codebase_index/flow_analysis/operation_extractor.rb +0 -206
  114. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +0 -154
  115. data/lib/codebase_index/flow_assembler.rb +0 -290
  116. data/lib/codebase_index/flow_document.rb +0 -191
  117. data/lib/codebase_index/flow_precomputer.rb +0 -102
  118. data/lib/codebase_index/formatting/base.rb +0 -30
  119. data/lib/codebase_index/formatting/claude_adapter.rb +0 -98
  120. data/lib/codebase_index/formatting/generic_adapter.rb +0 -56
  121. data/lib/codebase_index/formatting/gpt_adapter.rb +0 -64
  122. data/lib/codebase_index/formatting/human_adapter.rb +0 -78
  123. data/lib/codebase_index/graph_analyzer.rb +0 -374
  124. data/lib/codebase_index/mcp/bootstrapper.rb +0 -96
  125. data/lib/codebase_index/mcp/index_reader.rb +0 -394
  126. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +0 -81
  127. data/lib/codebase_index/mcp/renderers/json_renderer.rb +0 -17
  128. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +0 -353
  129. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +0 -240
  130. data/lib/codebase_index/mcp/server.rb +0 -961
  131. data/lib/codebase_index/mcp/tool_response_renderer.rb +0 -85
  132. data/lib/codebase_index/model_name_cache.rb +0 -51
  133. data/lib/codebase_index/notion/client.rb +0 -217
  134. data/lib/codebase_index/notion/exporter.rb +0 -219
  135. data/lib/codebase_index/notion/mapper.rb +0 -40
  136. data/lib/codebase_index/notion/mappers/column_mapper.rb +0 -57
  137. data/lib/codebase_index/notion/mappers/migration_mapper.rb +0 -39
  138. data/lib/codebase_index/notion/mappers/model_mapper.rb +0 -161
  139. data/lib/codebase_index/notion/mappers/shared.rb +0 -22
  140. data/lib/codebase_index/notion/rate_limiter.rb +0 -68
  141. data/lib/codebase_index/observability/health_check.rb +0 -79
  142. data/lib/codebase_index/observability/instrumentation.rb +0 -34
  143. data/lib/codebase_index/observability/structured_logger.rb +0 -57
  144. data/lib/codebase_index/operator/error_escalator.rb +0 -81
  145. data/lib/codebase_index/operator/pipeline_guard.rb +0 -92
  146. data/lib/codebase_index/operator/status_reporter.rb +0 -80
  147. data/lib/codebase_index/railtie.rb +0 -38
  148. data/lib/codebase_index/resilience/circuit_breaker.rb +0 -99
  149. data/lib/codebase_index/resilience/index_validator.rb +0 -167
  150. data/lib/codebase_index/resilience/retryable_provider.rb +0 -108
  151. data/lib/codebase_index/retrieval/context_assembler.rb +0 -261
  152. data/lib/codebase_index/retrieval/query_classifier.rb +0 -133
  153. data/lib/codebase_index/retrieval/ranker.rb +0 -277
  154. data/lib/codebase_index/retrieval/search_executor.rb +0 -316
  155. data/lib/codebase_index/retriever.rb +0 -152
  156. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +0 -170
  157. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +0 -77
  158. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +0 -18
  159. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +0 -280
  160. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +0 -143
  161. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +0 -143
  162. data/lib/codebase_index/ruby_analyzer.rb +0 -87
  163. data/lib/codebase_index/session_tracer/file_store.rb +0 -104
  164. data/lib/codebase_index/session_tracer/middleware.rb +0 -143
  165. data/lib/codebase_index/session_tracer/redis_store.rb +0 -106
  166. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +0 -254
  167. data/lib/codebase_index/session_tracer/session_flow_document.rb +0 -223
  168. data/lib/codebase_index/session_tracer/solid_cache_store.rb +0 -139
  169. data/lib/codebase_index/session_tracer/store.rb +0 -81
  170. data/lib/codebase_index/storage/graph_store.rb +0 -120
  171. data/lib/codebase_index/storage/metadata_store.rb +0 -196
  172. data/lib/codebase_index/storage/pgvector.rb +0 -195
  173. data/lib/codebase_index/storage/qdrant.rb +0 -205
  174. data/lib/codebase_index/storage/vector_store.rb +0 -167
  175. data/lib/codebase_index/temporal/json_snapshot_store.rb +0 -245
  176. data/lib/codebase_index/temporal/snapshot_store.rb +0 -345
  177. data/lib/codebase_index/token_utils.rb +0 -19
  178. data/lib/codebase_index/version.rb +0 -5
  179. data/lib/generators/codebase_index/install_generator.rb +0 -32
  180. data/lib/generators/codebase_index/pgvector_generator.rb +0 -37
  181. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +0 -15
  182. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +0 -43
  183. data/lib/tasks/codebase_index.rake +0 -597
  184. data/lib/tasks/codebase_index_evaluation.rake +0 -115
@@ -1,597 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- # lib/tasks/codebase_index.rake
4
- #
5
- # Rake tasks for codebase indexing.
6
- # These can be run manually or integrated into CI pipelines.
7
- #
8
- # Usage:
9
- # bundle exec rake codebase_index:extract # Full extraction
10
- # bundle exec rake codebase_index:incremental # Changed files only
11
- # bundle exec rake codebase_index:extract_framework # Rails/gem sources only
12
- # bundle exec rake codebase_index:validate # Validate index integrity
13
- # bundle exec rake codebase_index:stats # Show index statistics
14
- # bundle exec rake codebase_index:clean # Remove index
15
- # bundle exec rake codebase_index:self_analyze # Analyze gem's own source
16
- # bundle exec rake codebase_index:flow[EntryPoint] # Generate execution flow
17
-
18
- namespace :codebase_index do
19
- desc 'Full extraction of codebase for indexing'
20
- task extract: :environment do
21
- require 'codebase_index/extractor'
22
-
23
- output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
24
-
25
- puts 'Starting full codebase extraction...'
26
- puts "Output directory: #{output_dir}"
27
- puts
28
-
29
- extractor = CodebaseIndex::Extractor.new(output_dir: output_dir)
30
- results = extractor.extract_all
31
-
32
- puts
33
- puts 'Extraction complete!'
34
- puts '=' * 50
35
- results.each do |type, units|
36
- puts " #{type.to_s.ljust(15)}: #{units.size} units"
37
- end
38
- puts '=' * 50
39
- puts " Total: #{results.values.sum(&:size)} units"
40
- puts
41
- puts "Output written to: #{output_dir}"
42
- end
43
-
44
- desc 'Incremental extraction based on git changes'
45
- task incremental: :environment do
46
- require 'codebase_index/extractor'
47
-
48
- output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
49
-
50
- # Determine changed files from CI environment or git
51
- require 'open3'
52
-
53
- changed_files = if ENV['CHANGED_FILES']
54
- # Explicit list from CI
55
- ENV['CHANGED_FILES'].split(',').map(&:strip)
56
- elsif ENV['CI_COMMIT_BEFORE_SHA']
57
- # GitLab CI
58
- output, = Open3.capture2('git', 'diff', '--name-only',
59
- "#{ENV['CI_COMMIT_BEFORE_SHA']}..#{ENV.fetch('CI_COMMIT_SHA', nil)}")
60
- output.lines.map(&:strip)
61
- elsif ENV['GITHUB_BASE_REF']
62
- # GitHub Actions PR
63
- output, = Open3.capture2('git', 'diff', '--name-only',
64
- "origin/#{ENV['GITHUB_BASE_REF']}...HEAD")
65
- output.lines.map(&:strip)
66
- else
67
- # Default: changes since last commit
68
- output, = Open3.capture2('git', 'diff', '--name-only', 'HEAD~1')
69
- output.lines.map(&:strip)
70
- end
71
-
72
- # Filter to relevant files
73
- relevant_patterns = [
74
- %r{^app/models/},
75
- %r{^app/controllers/},
76
- %r{^app/services/},
77
- %r{^app/components/},
78
- %r{^app/views/components/},
79
- %r{^app/views/.*\.rb$}, # Phlex views
80
- %r{^app/interactors/},
81
- %r{^app/operations/},
82
- %r{^app/commands/},
83
- %r{^app/use_cases/},
84
- %r{^app/jobs/},
85
- %r{^app/workers/}, # Sidekiq workers
86
- %r{^app/mailers/},
87
- %r{^app/graphql/}, # GraphQL types/mutations/resolvers
88
- %r{^app/serializers/},
89
- %r{^app/decorators/},
90
- %r{^app/blueprinters/},
91
- %r{^db/migrate/},
92
- %r{^db/schema\.rb$}, # Schema changes affect model metadata
93
- %r{^config/routes\.rb$},
94
- /^Gemfile\.lock$/ # Dependency changes trigger framework re-index
95
- ]
96
-
97
- changed_files = changed_files.select do |f|
98
- relevant_patterns.any? { |p| f.match?(p) }
99
- end
100
-
101
- if changed_files.empty?
102
- puts 'No relevant files changed. Skipping extraction.'
103
- exit 0
104
- end
105
-
106
- puts "Incremental extraction for #{changed_files.size} changed files..."
107
- changed_files.each { |f| puts " - #{f}" }
108
- puts
109
-
110
- extractor = CodebaseIndex::Extractor.new(output_dir: output_dir)
111
- affected = extractor.extract_changed(changed_files)
112
-
113
- puts
114
- puts "Re-extracted #{affected.size} affected units."
115
- end
116
-
117
- desc 'Extract only Rails/gem framework sources (run when dependencies change)'
118
- task extract_framework: :environment do
119
- require 'codebase_index/extractors/rails_source_extractor'
120
-
121
- output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
122
-
123
- puts 'Extracting Rails and gem framework sources...'
124
- puts "Rails version: #{Rails.version}"
125
- puts
126
-
127
- extractor = CodebaseIndex::Extractors::RailsSourceExtractor.new
128
- units = extractor.extract_all
129
-
130
- # Write output
131
- framework_dir = Pathname.new(output_dir).join('rails_source')
132
- FileUtils.mkdir_p(framework_dir)
133
-
134
- units.each do |unit|
135
- file_name = "#{unit.identifier.gsub('/', '__').gsub('::', '__')}.json"
136
- File.write(
137
- framework_dir.join(file_name),
138
- JSON.pretty_generate(unit.to_h)
139
- )
140
- end
141
-
142
- puts "Extracted #{units.size} framework source units."
143
- puts "Output: #{framework_dir}"
144
- end
145
-
146
- desc 'Validate extracted index integrity'
147
- task validate: :environment do
148
- output_dir = Pathname.new(ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index')))
149
-
150
- unless output_dir.exist?
151
- puts "ERROR: Index directory does not exist: #{output_dir}"
152
- exit 1
153
- end
154
-
155
- manifest_path = output_dir.join('manifest.json')
156
- unless manifest_path.exist?
157
- puts 'ERROR: Manifest not found. Run extraction first.'
158
- exit 1
159
- end
160
-
161
- manifest = JSON.parse(File.read(manifest_path))
162
-
163
- puts 'Validating index...'
164
- puts " Extracted at: #{manifest['extracted_at']}"
165
- puts " Git SHA: #{manifest['git_sha']}"
166
- puts
167
-
168
- errors = []
169
- warnings = []
170
-
171
- # Check each type directory
172
- manifest['counts'].each do |type, expected_count|
173
- type_dir = output_dir.join(type)
174
- unless type_dir.exist?
175
- errors << "Missing directory: #{type}"
176
- next
177
- end
178
-
179
- actual_count = Dir[type_dir.join('*.json')].reject { |f| f.end_with?('_index.json') }.size
180
-
181
- warnings << "#{type}: expected #{expected_count}, found #{actual_count}" if actual_count != expected_count
182
-
183
- # Validate each unit file is valid JSON
184
- Dir[type_dir.join('*.json')].each do |file|
185
- next if file.end_with?('_index.json')
186
-
187
- begin
188
- data = JSON.parse(File.read(file))
189
- errors << "#{file}: missing identifier" unless data['identifier']
190
- errors << "#{file}: missing source_code" unless data['source_code']
191
- rescue JSON::ParserError => e
192
- errors << "#{file}: invalid JSON - #{e.message}"
193
- end
194
- end
195
- end
196
-
197
- # Check dependency graph
198
- graph_path = output_dir.join('dependency_graph.json')
199
- if graph_path.exist?
200
- begin
201
- JSON.parse(File.read(graph_path))
202
- rescue JSON::ParserError
203
- errors << 'dependency_graph.json: invalid JSON'
204
- end
205
- else
206
- errors << 'Missing dependency_graph.json'
207
- end
208
-
209
- # Report
210
- if errors.any?
211
- puts 'ERRORS:'
212
- errors.each { |e| puts " ✗ #{e}" }
213
- end
214
-
215
- if warnings.any?
216
- puts 'WARNINGS:'
217
- warnings.each { |w| puts " ⚠ #{w}" }
218
- end
219
-
220
- if errors.empty? && warnings.empty?
221
- puts '✓ Index is valid.'
222
- elsif errors.empty?
223
- puts "\n✓ Index is valid with #{warnings.size} warning(s)."
224
- else
225
- puts "\n✗ Index has #{errors.size} error(s)."
226
- exit 1
227
- end
228
- end
229
-
230
- desc 'Show index statistics'
231
- task stats: :environment do
232
- output_dir = Pathname.new(ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index')))
233
-
234
- unless output_dir.exist?
235
- puts 'Index directory does not exist. Run extraction first.'
236
- exit 1
237
- end
238
-
239
- manifest_path = output_dir.join('manifest.json')
240
- manifest = manifest_path.exist? ? JSON.parse(File.read(manifest_path)) : {}
241
-
242
- puts 'Codebase Index Statistics'
243
- puts '=' * 50
244
- puts " Extracted at: #{manifest['extracted_at'] || 'unknown'}"
245
- puts " Rails version: #{manifest['rails_version'] || 'unknown'}"
246
- puts " Ruby version: #{manifest['ruby_version'] || 'unknown'}"
247
- puts " Git SHA: #{manifest['git_sha'] || 'unknown'}"
248
- puts " Git branch: #{manifest['git_branch'] || 'unknown'}"
249
- puts
250
-
251
- puts 'Units by Type'
252
- puts '-' * 50
253
-
254
- total_size = 0
255
- total_units = 0
256
- total_chunks = 0
257
-
258
- (manifest['counts'] || {}).each do |type, count|
259
- type_dir = output_dir.join(type)
260
- next unless type_dir.exist?
261
-
262
- type_size = Dir[type_dir.join('*.json')].sum { |f| File.size(f) }
263
- total_size += type_size
264
- total_units += count
265
-
266
- # Count chunks from index
267
- index_path = type_dir.join('_index.json')
268
- type_chunks = 0
269
- if index_path.exist?
270
- index = JSON.parse(File.read(index_path))
271
- type_chunks = index.sum { |u| u['chunk_count'] || 0 }
272
- total_chunks += type_chunks
273
- end
274
-
275
- puts " #{type.ljust(15)}: #{count.to_s.rjust(4)} units, #{type_chunks.to_s.rjust(4)} chunks, #{(type_size / 1024.0).round(1).to_s.rjust(8)} KB"
276
- end
277
-
278
- puts '-' * 50
279
- puts " #{'Total'.ljust(15)}: #{total_units.to_s.rjust(4)} units, #{total_chunks.to_s.rjust(4)} chunks, #{(total_size / 1024.0).round(1).to_s.rjust(8)} KB"
280
- puts
281
-
282
- # Dependency graph stats
283
- graph_path = output_dir.join('dependency_graph.json')
284
- if graph_path.exist?
285
- graph = JSON.parse(File.read(graph_path))
286
- stats = graph['stats'] || {}
287
- puts 'Dependency Graph'
288
- puts '-' * 50
289
- puts " Nodes: #{stats['node_count'] || 'unknown'}"
290
- puts " Edges: #{stats['edge_count'] || 'unknown'}"
291
- end
292
- end
293
-
294
- desc 'Clean extracted index'
295
- task clean: :environment do
296
- output_dir = Pathname.new(ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index')))
297
-
298
- if output_dir.exist?
299
- puts "Removing #{output_dir}..."
300
- FileUtils.rm_rf(output_dir)
301
- puts 'Done.'
302
- else
303
- puts 'Index directory does not exist.'
304
- end
305
- end
306
-
307
- # Internal debugging tool — hidden from `rails -T`
308
- task :retrieve, [:query] => :environment do |_t, args|
309
- query = args[:query] || raise('Usage: rake codebase_index:retrieve[query]')
310
-
311
- require 'codebase_index'
312
- require 'codebase_index/retriever'
313
- require 'codebase_index/embedding/provider'
314
- require 'codebase_index/storage/vector_store'
315
- require 'codebase_index/storage/metadata_store'
316
- require 'codebase_index/storage/graph_store'
317
- require 'codebase_index/formatting/human_adapter'
318
-
319
- config = CodebaseIndex.configuration
320
-
321
- provider = CodebaseIndex::Embedding::Provider::Ollama.new
322
- vector_store = CodebaseIndex::Storage::VectorStore::InMemory.new
323
- metadata_store = CodebaseIndex::Storage::MetadataStore::SQLite.new
324
- graph_store = CodebaseIndex::Storage::GraphStore::Memory.new
325
-
326
- retriever = CodebaseIndex::Retriever.new(
327
- vector_store: vector_store,
328
- metadata_store: metadata_store,
329
- graph_store: graph_store,
330
- embedding_provider: provider
331
- )
332
-
333
- result = retriever.retrieve(query, budget: config.max_context_tokens)
334
-
335
- formatter = CodebaseIndex::Formatting::HumanAdapter.new
336
- puts formatter.format(result)
337
- end
338
-
339
- desc 'Embed all extracted units'
340
- task embed: :environment do
341
- require 'codebase_index'
342
- require 'codebase_index/embedding/indexer'
343
- require 'codebase_index/embedding/text_preparer'
344
- require 'codebase_index/embedding/provider'
345
- require 'codebase_index/storage/vector_store'
346
-
347
- config = CodebaseIndex.configuration
348
- output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', config.output_dir)
349
-
350
- provider = CodebaseIndex::Embedding::Provider::Ollama.new
351
- text_preparer = CodebaseIndex::Embedding::TextPreparer.new
352
- vector_store = CodebaseIndex::Storage::VectorStore::InMemory.new
353
-
354
- indexer = CodebaseIndex::Embedding::Indexer.new(
355
- provider: provider,
356
- text_preparer: text_preparer,
357
- vector_store: vector_store,
358
- output_dir: output_dir
359
- )
360
-
361
- puts 'Embedding all extracted units...'
362
- stats = indexer.index_all
363
-
364
- puts
365
- puts 'Embedding complete!'
366
- puts " Processed: #{stats[:processed]}"
367
- puts " Skipped: #{stats[:skipped]}"
368
- puts " Errors: #{stats[:errors]}"
369
- end
370
-
371
- desc 'Embed changed units only (incremental)'
372
- task embed_incremental: :environment do
373
- require 'codebase_index'
374
- require 'codebase_index/embedding/indexer'
375
- require 'codebase_index/embedding/text_preparer'
376
- require 'codebase_index/embedding/provider'
377
- require 'codebase_index/storage/vector_store'
378
-
379
- config = CodebaseIndex.configuration
380
- output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', config.output_dir)
381
-
382
- provider = CodebaseIndex::Embedding::Provider::Ollama.new
383
- text_preparer = CodebaseIndex::Embedding::TextPreparer.new
384
- vector_store = CodebaseIndex::Storage::VectorStore::InMemory.new
385
-
386
- indexer = CodebaseIndex::Embedding::Indexer.new(
387
- provider: provider,
388
- text_preparer: text_preparer,
389
- vector_store: vector_store,
390
- output_dir: output_dir
391
- )
392
-
393
- puts 'Embedding changed units (incremental)...'
394
- stats = indexer.index_incremental
395
-
396
- puts
397
- puts 'Incremental embedding complete!'
398
- puts " Processed: #{stats[:processed]}"
399
- puts " Skipped: #{stats[:skipped]}"
400
- puts " Errors: #{stats[:errors]}"
401
- end
402
-
403
- # Internal debugging tool — hidden from `rails -T`
404
- task :self_analyze do
405
- require 'digest'
406
- require 'json'
407
- require 'fileutils'
408
- require 'codebase_index/ruby_analyzer'
409
- require 'codebase_index/dependency_graph'
410
- require 'codebase_index/graph_analyzer'
411
- require 'codebase_index/ruby_analyzer/mermaid_renderer'
412
-
413
- gem_root = File.expand_path('../..', __dir__)
414
- json_dir = File.join(gem_root, 'tmp', 'codebase_index_self')
415
- docs_dir = File.join(gem_root, 'docs', 'self-analysis')
416
- manifest_path = File.join(json_dir, 'manifest.json')
417
-
418
- # 1. Check staleness via source_checksum
419
- lib_files = Dir.glob(File.join(gem_root, 'lib', '**', '*.rb'))
420
- source_content = lib_files.map { |f| File.read(f) }.join
421
- source_checksum = Digest::SHA256.hexdigest(source_content)
422
-
423
- if File.exist?(manifest_path)
424
- existing = JSON.parse(File.read(manifest_path))
425
- if existing['source_checksum'] == source_checksum
426
- puts 'Source unchanged — skipping self-analysis.'
427
- next
428
- end
429
- end
430
-
431
- puts 'Running self-analysis on gem source...'
432
-
433
- # 2. Run RubyAnalyzer
434
- units = CodebaseIndex::RubyAnalyzer.analyze(paths: [File.join(gem_root, 'lib', 'codebase_index')])
435
- puts " Analyzed #{units.size} units"
436
-
437
- # 3. Build DependencyGraph + GraphAnalyzer
438
- graph = CodebaseIndex::DependencyGraph.new
439
- units.each { |unit| graph.register(unit) }
440
- analyzer = CodebaseIndex::GraphAnalyzer.new(graph)
441
- analysis = analyzer.analyze
442
- graph_data = graph.to_h
443
-
444
- # 4. Write JSON to tmp/codebase_index_self/
445
- FileUtils.mkdir_p(json_dir)
446
-
447
- units.each do |unit|
448
- file_name = "#{unit.identifier.gsub(/[^a-zA-Z0-9_]/, '_')}.json"
449
- File.write(
450
- File.join(json_dir, file_name),
451
- JSON.pretty_generate(unit.to_h)
452
- )
453
- end
454
-
455
- File.write(
456
- File.join(json_dir, 'dependency_graph.json'),
457
- JSON.pretty_generate(graph_data)
458
- )
459
-
460
- File.write(
461
- File.join(json_dir, 'analysis.json'),
462
- JSON.pretty_generate(analysis)
463
- )
464
-
465
- manifest = {
466
- 'source_checksum' => source_checksum,
467
- 'generated_at' => Time.now.iso8601,
468
- 'unit_count' => units.size,
469
- 'node_count' => graph_data[:stats][:node_count],
470
- 'edge_count' => graph_data[:stats][:edge_count]
471
- }
472
- File.write(manifest_path, JSON.pretty_generate(manifest))
473
-
474
- # 5. Render Mermaid to docs/self-analysis/
475
- FileUtils.mkdir_p(docs_dir)
476
- renderer = CodebaseIndex::RubyAnalyzer::MermaidRenderer.new
477
-
478
- File.write(
479
- File.join(docs_dir, 'architecture.md'),
480
- renderer.render_architecture(units, graph_data, analysis)
481
- )
482
-
483
- File.write(
484
- File.join(docs_dir, 'call-graph.md'),
485
- "# Call Graph\n\n```mermaid\n#{renderer.render_call_graph(units)}\n```\n"
486
- )
487
-
488
- File.write(
489
- File.join(docs_dir, 'dependency-map.md'),
490
- "# Dependency Map\n\n```mermaid\n#{renderer.render_dependency_map(graph_data)}\n```\n"
491
- )
492
-
493
- File.write(
494
- File.join(docs_dir, 'dataflow.md'),
495
- "# Data Flow\n\n```mermaid\n#{renderer.render_dataflow(units)}\n```\n"
496
- )
497
-
498
- puts " JSON output: #{json_dir}"
499
- puts " Mermaid docs: #{docs_dir}"
500
- puts 'Self-analysis complete.'
501
- end
502
-
503
- desc 'Generate execution flow document for a Rails entry point'
504
- task :flow, [:entry_point] => :environment do |_t, args|
505
- require 'json'
506
- require 'codebase_index/flow_assembler'
507
- require 'codebase_index/dependency_graph'
508
-
509
- entry_point = args[:entry_point]
510
- unless entry_point
511
- puts 'Usage: rake codebase_index:flow[EntryPoint#method]'
512
- exit 1
513
- end
514
-
515
- output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', Rails.root.join('tmp/codebase_index'))
516
- graph_path = File.join(output_dir, 'dependency_graph.json')
517
-
518
- unless File.exist?(graph_path)
519
- puts "ERROR: Dependency graph not found at #{graph_path}"
520
- puts 'Run codebase_index:extract first.'
521
- exit 1
522
- end
523
-
524
- graph_data = JSON.parse(File.read(graph_path))
525
- graph = CodebaseIndex::DependencyGraph.from_h(graph_data)
526
-
527
- max_depth = ENV.fetch('MAX_DEPTH', 5).to_i
528
- assembler = CodebaseIndex::FlowAssembler.new(graph: graph, extracted_dir: output_dir)
529
- flow = assembler.assemble(entry_point, max_depth: max_depth)
530
-
531
- format = ENV.fetch('FORMAT', 'markdown').downcase
532
-
533
- case format
534
- when 'json'
535
- puts JSON.pretty_generate(flow.to_h)
536
- else
537
- puts flow.to_markdown
538
- end
539
- end
540
-
541
- desc 'Start the embedded console MCP server (stdio transport)'
542
- task :console do
543
- # Capture stdout before Rails boot to keep MCP protocol clean.
544
- # Rails boot emits OpenTelemetry, gem warnings, etc. to stdout —
545
- # MCP client cannot parse these as JSON-RPC.
546
- # Global variable passes the fd to exe/codebase-console via load.
547
- $codebase_index_protocol_out = $stdout.dup # rubocop:disable Style/GlobalVars
548
- $stdout.reopen($stderr)
549
-
550
- Rake::Task[:environment].invoke
551
-
552
- load File.expand_path('../../exe/codebase-console', __dir__)
553
- end
554
-
555
- desc 'Sync extraction data to Notion databases (Data Models + Columns)'
556
- task notion_sync: :environment do
557
- require 'codebase_index/notion/exporter'
558
-
559
- config = CodebaseIndex.configuration
560
- # Env var takes precedence over configured value
561
- config.notion_api_token = ENV.fetch('NOTION_API_TOKEN', nil) || config.notion_api_token
562
-
563
- unless config.notion_api_token
564
- puts 'ERROR: Notion API token not configured.'
565
- puts 'Set NOTION_API_TOKEN env var or configure notion_api_token in CodebaseIndex.configure.'
566
- exit 1
567
- end
568
-
569
- output_dir = ENV.fetch('CODEBASE_INDEX_OUTPUT', config.output_dir)
570
-
571
- db_ids = config.notion_database_ids || {}
572
- if db_ids.empty?
573
- puts 'ERROR: No Notion database IDs configured.'
574
- puts 'Set notion_database_ids in CodebaseIndex.configure:'
575
- puts ' config.notion_database_ids = { data_models: "db-uuid", columns: "db-uuid" }'
576
- exit 1
577
- end
578
-
579
- puts 'Syncing extraction data to Notion...'
580
- puts " Output dir: #{output_dir}"
581
- puts " Databases: #{db_ids.keys.join(', ')}"
582
- puts
583
-
584
- exporter = CodebaseIndex::Notion::Exporter.new(index_dir: output_dir)
585
- stats = exporter.sync_all
586
-
587
- puts 'Sync complete!'
588
- puts " Data Models: #{stats[:data_models]} synced"
589
- puts " Columns: #{stats[:columns]} synced"
590
-
591
- if stats[:errors].any?
592
- puts " Errors: #{stats[:errors].size}"
593
- stats[:errors].first(5).each { |e| puts " - #{e}" }
594
- puts " ... and #{stats[:errors].size - 5} more" if stats[:errors].size > 5
595
- end
596
- end
597
- end