codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,343 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # RakeTaskExtractor handles extraction of custom rake tasks from lib/tasks/.
9
+ #
10
+ # Scans `lib/tasks/**/*.rake` for task definitions and produces one
11
+ # ExtractedUnit per task. Uses static regex parsing (never evals rake files).
12
+ # Supports namespaced tasks, nested namespaces, task dependencies, and arguments.
13
+ #
14
+ # @example
15
+ # extractor = RakeTaskExtractor.new
16
+ # units = extractor.extract_all
17
+ # cleanup = units.find { |u| u.identifier == "cleanup:stale_orders" }
18
+ # cleanup.metadata[:description] # => "Remove orders older than 30 days"
19
+ #
20
+ class RakeTaskExtractor
21
+ include SharedUtilityMethods
22
+ include SharedDependencyScanner
23
+
24
+ RAKE_DIRECTORIES = %w[lib/tasks].freeze
25
+
26
+ # Namespaces to exclude from extraction (this gem's own tasks)
27
+ EXCLUDED_NAMESPACES = %w[codebase_index].freeze
28
+
29
+ def initialize
30
+ @directories = RAKE_DIRECTORIES.map { |d| Rails.root.join(d) }.select(&:directory?)
31
+ end
32
+
33
+ # Extract all rake tasks from all discovered directories.
34
+ #
35
+ # @return [Array<ExtractedUnit>] List of rake task units
36
+ def extract_all
37
+ @directories.flat_map do |dir|
38
+ Dir[dir.join('**/*.rake')].flat_map { |file| extract_rake_file(file) }
39
+ end
40
+ end
41
+
42
+ # Extract rake tasks from a single .rake file.
43
+ #
44
+ # Returns an Array because each file may contain multiple task definitions.
45
+ #
46
+ # @param file_path [String] Path to the .rake file
47
+ # @return [Array<ExtractedUnit>] List of rake task units
48
+ def extract_rake_file(file_path)
49
+ return [] unless file_path.to_s.end_with?('.rake')
50
+
51
+ source = File.read(file_path)
52
+ tasks = parse_tasks(source)
53
+
54
+ tasks.filter_map do |task_data|
55
+ next if excluded_namespace?(task_data[:full_name])
56
+
57
+ build_unit(task_data, file_path, source)
58
+ end
59
+ rescue StandardError => e
60
+ Rails.logger.error("Failed to extract rake tasks from #{file_path}: #{e.message}")
61
+ []
62
+ end
63
+
64
+ private
65
+
66
+ # Parse task definitions from rake source using a line-by-line state machine.
67
+ #
68
+ # Tracks namespace nesting, desc buffers, and task blocks.
69
+ #
70
+ # @param source [String] Rake file source code
71
+ # @return [Array<Hash>] Parsed task data
72
+ def parse_tasks(source)
73
+ tasks = []
74
+ namespace_stack = []
75
+ # Track the block depth at which each namespace was opened.
76
+ # When depth returns to that level, the namespace ends.
77
+ namespace_depths = []
78
+ pending_desc = nil
79
+ depth = 0
80
+ lines = source.lines
81
+
82
+ lines.each_with_index do |line, index|
83
+ stripped = line.strip
84
+
85
+ # Track namespace blocks
86
+ if stripped.match?(/\Anamespace\s+/)
87
+ name = extract_namespace_name(stripped)
88
+ if name
89
+ namespace_stack.push(name)
90
+ namespace_depths.push(depth)
91
+ depth += 1
92
+ end
93
+ next
94
+ end
95
+
96
+ # Buffer desc for the next task
97
+ if stripped.match?(/\Adesc\s+/)
98
+ pending_desc = extract_desc(stripped)
99
+ next
100
+ end
101
+
102
+ # Detect task definitions
103
+ if stripped.match?(/\Atask\s+/)
104
+ task_data = parse_task_line(stripped, namespace_stack, pending_desc, index + 1)
105
+ if task_data
106
+ task_data[:block_source] = extract_task_block(lines, index)
107
+ tasks << task_data
108
+ end
109
+ pending_desc = nil
110
+ depth += 1 if stripped.include?(' do')
111
+ next
112
+ end
113
+
114
+ # Track block openers (non-namespace, non-task)
115
+ depth += 1 if block_opener?(stripped)
116
+
117
+ # Track end keywords
118
+ next unless stripped == 'end'
119
+
120
+ depth -= 1
121
+ # Pop namespace if we've returned to the depth where it was opened
122
+ if namespace_depths.any? && depth == namespace_depths.last
123
+ namespace_stack.pop
124
+ namespace_depths.pop
125
+ end
126
+ end
127
+
128
+ tasks
129
+ end
130
+
131
+ # Extract the namespace name from a namespace declaration line.
132
+ #
133
+ # @param line [String] e.g. "namespace :foo do"
134
+ # @return [String, nil] The namespace name
135
+ def extract_namespace_name(line)
136
+ match = line.match(/\Anamespace\s+:(\w+)/)
137
+ match ? match[1] : nil
138
+ end
139
+
140
+ # Extract the description string from a desc line.
141
+ #
142
+ # @param line [String] e.g. "desc 'Remove stale orders'"
143
+ # @return [String, nil] The description text
144
+ def extract_desc(line)
145
+ match = line.match(/\Adesc\s+(['"])(.*?)\1/)
146
+ match ? match[2] : nil
147
+ end
148
+
149
+ # Parse a task definition line into structured data.
150
+ #
151
+ # @param line [String] The task line
152
+ # @param namespace_stack [Array<String>] Current namespace nesting
153
+ # @param description [String, nil] Buffered desc
154
+ # @param line_number [Integer] 1-based line number
155
+ # @return [Hash, nil] Parsed task data or nil if unparseable
156
+ def parse_task_line(line, namespace_stack, description, line_number)
157
+ task_name, deps, args = parse_task_signature(line)
158
+ return nil unless task_name
159
+
160
+ ns = namespace_stack.any? ? namespace_stack.join(':') : nil
161
+ full_name = ns ? "#{ns}:#{task_name}" : task_name
162
+
163
+ {
164
+ task_name: task_name,
165
+ full_name: full_name,
166
+ task_namespace: ns,
167
+ description: description,
168
+ task_dependencies: deps,
169
+ arguments: args,
170
+ line_number: line_number
171
+ }
172
+ end
173
+
174
+ # Parse the task name, dependencies, and arguments from a task signature.
175
+ #
176
+ # Handles:
177
+ # task :name
178
+ # task :name => :dep
179
+ # task :name => [:dep1, :dep2]
180
+ # task :name, [:arg1, :arg2] => :dep
181
+ #
182
+ # @param line [String] The task line
183
+ # @return [Array(String, Array<String>, Array<String>)] [name, deps, args]
184
+ def parse_task_signature(line)
185
+ # Task with args: task :name, [:arg1, :arg2]
186
+ if line.match(/\Atask\s+:(\w+)\s*,\s*\[([^\]]*)\]/)
187
+ name = ::Regexp.last_match(1)
188
+ args = ::Regexp.last_match(2).scan(/:(\w+)/).flatten
189
+
190
+ # Check for dependencies after args
191
+ deps = if line.match(/=>\s*(.+?)(?:\s+do|\s*$)/)
192
+ parse_dependency_list(::Regexp.last_match(1))
193
+ else
194
+ []
195
+ end
196
+
197
+ return [name, deps, args]
198
+ end
199
+
200
+ # Task with hash-rocket deps: task :name => [:dep1, :dep2]
201
+ if line.match(/\Atask\s+:(\w+)\s*=>\s*(.+?)(?:\s+do|\s*$)/)
202
+ name = ::Regexp.last_match(1)
203
+ deps = parse_dependency_list(::Regexp.last_match(2))
204
+ return [name, deps, []]
205
+ end
206
+
207
+ # Simple task: task :name
208
+ return [::Regexp.last_match(1), [], []] if line.match(/\Atask\s+:(\w+)/)
209
+
210
+ nil
211
+ end
212
+
213
+ # Parse a dependency list from a hash-rocket right-hand side.
214
+ #
215
+ # @param dep_str [String] e.g. ":environment" or "[:dep1, :dep2]"
216
+ # @return [Array<String>]
217
+ def parse_dependency_list(dep_str)
218
+ dep_str.scan(/:(\w+)/).flatten
219
+ end
220
+
221
+ # Extract the task block body (lines between task...do and matching end).
222
+ #
223
+ # @param lines [Array<String>] All source lines
224
+ # @param task_line_index [Integer] 0-based index of the task line
225
+ # @return [String] The block body source
226
+ def extract_task_block(lines, task_line_index)
227
+ task_line = lines[task_line_index]
228
+ return '' unless task_line&.include?('do')
229
+
230
+ depth = 1
231
+ body_lines = []
232
+
233
+ ((task_line_index + 1)...lines.size).each do |i|
234
+ line = lines[i]
235
+ stripped = line.strip
236
+
237
+ depth += 1 if block_opener?(stripped)
238
+ depth -= 1 if stripped == 'end'
239
+
240
+ break if depth.zero?
241
+
242
+ body_lines << line
243
+ end
244
+
245
+ body_lines.join
246
+ end
247
+
248
+ # Check if a line opens a new block (do...end or def...end).
249
+ # Note: if/unless only count as block openers when they start the line
250
+ # (standalone form), not as trailing modifiers (e.g., `return if x`).
251
+ #
252
+ # @param stripped [String] Stripped line content
253
+ # @return [Boolean]
254
+ def block_opener?(stripped)
255
+ return true if stripped.match?(/\b(do|def|case|begin|class|module|while|until|for)\b.*(?<!\bend)\s*$/)
256
+
257
+ stripped.match?(/\A(if|unless)\b/)
258
+ end
259
+
260
+ # Check if a task name falls under an excluded namespace.
261
+ #
262
+ # @param full_name [String] e.g. "codebase_index:extract"
263
+ # @return [Boolean]
264
+ def excluded_namespace?(full_name)
265
+ EXCLUDED_NAMESPACES.any? { |ns| full_name.start_with?("#{ns}:") }
266
+ end
267
+
268
+ # Build an ExtractedUnit from parsed task data.
269
+ #
270
+ # @param task_data [Hash] Parsed task data
271
+ # @param file_path [String] Path to the .rake file
272
+ # @param file_source [String] Full file source
273
+ # @return [ExtractedUnit]
274
+ def build_unit(task_data, file_path, file_source)
275
+ unit = ExtractedUnit.new(
276
+ type: :rake_task,
277
+ identifier: task_data[:full_name],
278
+ file_path: file_path
279
+ )
280
+
281
+ unit.namespace = task_data[:task_namespace]
282
+ unit.source_code = build_source_annotation(task_data, file_source)
283
+ unit.metadata = build_metadata(task_data)
284
+ unit.dependencies = extract_dependencies(task_data, file_source)
285
+
286
+ unit
287
+ end
288
+
289
+ # Build annotated source code for the unit.
290
+ #
291
+ # @param task_data [Hash] Parsed task data
292
+ # @param file_source [String] Full file source
293
+ # @return [String]
294
+ def build_source_annotation(task_data, file_source)
295
+ header = "# Rake task: #{task_data[:full_name]}"
296
+ header += "\n# #{task_data[:description]}" if task_data[:description]
297
+ "#{header}\n#{file_source}"
298
+ end
299
+
300
+ # Build metadata hash for the unit.
301
+ #
302
+ # @param task_data [Hash] Parsed task data
303
+ # @return [Hash]
304
+ def build_metadata(task_data)
305
+ {
306
+ task_name: task_data[:task_name],
307
+ full_name: task_data[:full_name],
308
+ description: task_data[:description],
309
+ task_namespace: task_data[:task_namespace],
310
+ task_dependencies: task_data[:task_dependencies],
311
+ arguments: task_data[:arguments],
312
+ has_environment_dependency: task_data[:task_dependencies].include?('environment'),
313
+ source_lines: (task_data[:block_source] || '').lines.size
314
+ }
315
+ end
316
+
317
+ # Extract dependencies from task source.
318
+ #
319
+ # Combines common dependency scanning with cross-task invocation detection.
320
+ #
321
+ # @param task_data [Hash] Parsed task data
322
+ # @param file_source [String] Full file source
323
+ # @return [Array<Hash>]
324
+ def extract_dependencies(task_data, file_source)
325
+ deps = scan_common_dependencies(task_data[:block_source] || file_source)
326
+
327
+ # Detect Rake::Task invocations
328
+ (task_data[:block_source] || '').scan(/Rake::Task\[['"]([^'"]+)['"]\]\.invoke/) do |match|
329
+ deps << { type: :rake_task, target: match[0], via: :task_invoke }
330
+ end
331
+
332
+ # Add task dependency references
333
+ task_data[:task_dependencies].each do |dep|
334
+ next if dep == 'environment'
335
+
336
+ deps << { type: :rake_task, target: dep, via: :task_dependency }
337
+ end
338
+
339
+ deps.uniq { |d| [d[:type], d[:target]] }
340
+ end
341
+ end
342
+ end
343
+ end
@@ -0,0 +1,181 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # RouteExtractor handles Rails route extraction via runtime introspection.
9
+ #
10
+ # Unlike file-based extractors, RouteExtractor reads the live routing
11
+ # table from `Rails.application.routes.routes`. Each route becomes an
12
+ # ExtractedUnit with metadata about HTTP method, path, controller, and
13
+ # action.
14
+ #
15
+ # @example
16
+ # extractor = RouteExtractor.new
17
+ # units = extractor.extract_all
18
+ # login = units.find { |u| u.identifier == "POST /login" }
19
+ #
20
+ class RouteExtractor
21
+ include SharedUtilityMethods
22
+
23
+ def initialize
24
+ # No directories to scan — this is runtime introspection
25
+ end
26
+
27
+ # Extract all routes from the Rails routing table
28
+ #
29
+ # @return [Array<ExtractedUnit>] List of route units
30
+ def extract_all
31
+ return [] unless rails_routes_available?
32
+
33
+ routes = Rails.application.routes.routes
34
+ routes.filter_map { |route| extract_route(route) }
35
+ end
36
+
37
+ private
38
+
39
+ # Check if the Rails routing table is available.
40
+ #
41
+ # @return [Boolean]
42
+ def rails_routes_available?
43
+ defined?(Rails) &&
44
+ Rails.respond_to?(:application) &&
45
+ Rails.application.respond_to?(:routes) &&
46
+ Rails.application.routes.respond_to?(:routes)
47
+ end
48
+
49
+ # Extract a single route into an ExtractedUnit.
50
+ #
51
+ # @param route [ActionDispatch::Journey::Route] A route object
52
+ # @return [ExtractedUnit, nil]
53
+ def extract_route(route)
54
+ defaults = route_defaults(route)
55
+ controller = defaults[:controller]
56
+ action = defaults[:action]
57
+
58
+ return nil unless controller && action
59
+
60
+ verb = route_verb(route)
61
+ path = route_path(route)
62
+ identifier = "#{verb} #{path}"
63
+
64
+ controller_class = "#{controller.camelize}Controller"
65
+
66
+ unit = ExtractedUnit.new(
67
+ type: :route,
68
+ identifier: identifier,
69
+ file_path: nil
70
+ )
71
+
72
+ unit.namespace = extract_namespace(controller_class)
73
+ unit.source_code = build_route_source(verb, path, controller, action, route)
74
+ unit.metadata = build_route_metadata(verb, path, controller, action, route)
75
+ unit.dependencies = build_route_dependencies(controller_class)
76
+
77
+ unit
78
+ rescue StandardError => e
79
+ Rails.logger.error("Failed to extract route: #{e.message}")
80
+ nil
81
+ end
82
+
83
+ # Extract defaults hash from route, handling different Rails versions.
84
+ #
85
+ # @param route [ActionDispatch::Journey::Route]
86
+ # @return [Hash]
87
+ def route_defaults(route)
88
+ if route.respond_to?(:defaults)
89
+ route.defaults
90
+ else
91
+ {}
92
+ end
93
+ end
94
+
95
+ # Extract HTTP verb from route.
96
+ #
97
+ # @param route [ActionDispatch::Journey::Route]
98
+ # @return [String]
99
+ def route_verb(route)
100
+ if route.respond_to?(:verb) && route.verb.present?
101
+ verb = route.verb
102
+ verb.is_a?(String) ? verb : verb.to_s.scan(/[A-Z]+/).first
103
+ else
104
+ 'GET'
105
+ end.to_s
106
+ end
107
+
108
+ # Extract path pattern from route.
109
+ #
110
+ # @param route [ActionDispatch::Journey::Route]
111
+ # @return [String]
112
+ def route_path(route)
113
+ if route.respond_to?(:path)
114
+ spec = route.path
115
+ spec = spec.spec if spec.respond_to?(:spec)
116
+ spec.to_s.sub('(.:format)', '')
117
+ else
118
+ '/'
119
+ end
120
+ end
121
+
122
+ # Build a human-readable source representation of the route.
123
+ #
124
+ # @param verb [String] HTTP method
125
+ # @param path [String] URL path pattern
126
+ # @param controller [String] Controller name (underscored)
127
+ # @param action [String] Action name
128
+ # @param route [ActionDispatch::Journey::Route]
129
+ # @return [String]
130
+ def build_route_source(verb, path, controller, action, route)
131
+ name = route.respond_to?(:name) ? route.name : nil
132
+ constraints = route_constraints(route)
133
+
134
+ lines = []
135
+ lines << "# Route: #{verb} #{path}"
136
+ lines << "# Name: #{name}" if name
137
+ lines << "# Controller: #{controller}##{action}"
138
+ lines << "# Constraints: #{constraints.inspect}" if constraints.any?
139
+ lines << '#'
140
+ lines << "# #{verb.downcase} '#{path}', to: '#{controller}##{action}'"
141
+
142
+ lines.join("\n")
143
+ end
144
+
145
+ # Build metadata hash for a route.
146
+ #
147
+ # @return [Hash]
148
+ def build_route_metadata(verb, path, controller, action, route)
149
+ {
150
+ http_method: verb,
151
+ path: path,
152
+ controller: controller,
153
+ action: action,
154
+ route_name: route.respond_to?(:name) ? route.name : nil,
155
+ constraints: route_constraints(route),
156
+ path_params: path.scan(/:(\w+)/).flatten
157
+ }
158
+ end
159
+
160
+ # Extract route constraints.
161
+ #
162
+ # @param route [ActionDispatch::Journey::Route]
163
+ # @return [Hash]
164
+ def route_constraints(route)
165
+ if route.respond_to?(:constraints) && route.constraints.is_a?(Hash)
166
+ route.constraints
167
+ else
168
+ {}
169
+ end
170
+ end
171
+
172
+ # Build dependencies linking route to its controller.
173
+ #
174
+ # @param controller_class [String] The controller class name
175
+ # @return [Array<Hash>]
176
+ def build_route_dependencies(controller_class)
177
+ [{ type: :controller, target: controller_class, via: :route_dispatch }]
178
+ end
179
+ end
180
+ end
181
+ end