codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,398 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # StateMachineExtractor scans app/models for state machine DSL definitions.
9
+ #
10
+ # Supports three state machine gems:
11
+ # - AASM: files that include AASM with +aasm do...end+ blocks
12
+ # - Statesman: files that include Statesman::Machine with state/transition calls
13
+ # - state_machines: files using the +state_machine :attr do...end+ DSL
14
+ #
15
+ # Produces one ExtractedUnit per state machine definition found.
16
+ # A single model file can produce multiple units (e.g., two state_machine blocks).
17
+ #
18
+ # @example
19
+ # extractor = StateMachineExtractor.new
20
+ # units = extractor.extract_all
21
+ # order_sm = units.find { |u| u.identifier == "Order::aasm" }
22
+ # order_sm.metadata[:states] # => ["pending", "processing", "completed"]
23
+ # order_sm.metadata[:gem_detected] # => "aasm"
24
+ #
25
+ class StateMachineExtractor
26
+ include SharedUtilityMethods
27
+ include SharedDependencyScanner
28
+
29
+ MODEL_DIRECTORIES = %w[app/models].freeze
30
+
31
+ def initialize
32
+ @directories = MODEL_DIRECTORIES.map { |d| Rails.root.join(d) }.select(&:directory?)
33
+ end
34
+
35
+ # Extract all state machine definitions from model files.
36
+ #
37
+ # @return [Array<ExtractedUnit>] List of state machine units
38
+ def extract_all
39
+ @directories.flat_map do |dir|
40
+ Dir[dir.join('**/*.rb')].flat_map { |file| extract_model_file(file) }
41
+ end
42
+ end
43
+
44
+ # Extract state machine definitions from a single model file.
45
+ #
46
+ # Returns an Array because one model file may contain multiple state machine
47
+ # definitions (e.g., multiple +state_machine+ blocks for different attributes).
48
+ #
49
+ # @param file_path [String] Path to the model file
50
+ # @return [Array<ExtractedUnit>] List of state machine units (empty if none detected)
51
+ def extract_model_file(file_path)
52
+ source = File.read(file_path)
53
+ class_name = detect_class_name(source, file_path)
54
+
55
+ units = []
56
+ units.concat(extract_aasm_units(source, class_name, file_path))
57
+ units.concat(extract_statesman_units(source, class_name, file_path))
58
+ units.concat(extract_state_machines_units(source, class_name, file_path))
59
+ units
60
+ rescue StandardError => e
61
+ Rails.logger.error("Failed to extract state machines from #{file_path}: #{e.message}")
62
+ []
63
+ end
64
+
65
+ private
66
+
67
+ # ──────────────────────────────────────────────────────────────────────
68
+ # Class Discovery
69
+ # ──────────────────────────────────────────────────────────────────────
70
+
71
+ # Detect class name from source or derive it from the file path.
72
+ #
73
+ # @param source [String] Ruby source code
74
+ # @param file_path [String] File path
75
+ # @return [String] Class name
76
+ def detect_class_name(source, file_path)
77
+ return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
78
+
79
+ relative = file_path.sub("#{Rails.root}/", '')
80
+ relative.sub(%r{^app/models/}, '').sub('.rb', '').camelize
81
+ end
82
+
83
+ # ──────────────────────────────────────────────────────────────────────
84
+ # AASM
85
+ # ──────────────────────────────────────────────────────────────────────
86
+
87
+ # Extract AASM state machine units from source.
88
+ #
89
+ # @param source [String] Ruby source code
90
+ # @param class_name [String] Model class name
91
+ # @param file_path [String] File path
92
+ # @return [Array<ExtractedUnit>]
93
+ def extract_aasm_units(source, class_name, file_path)
94
+ return [] unless source.match?(/include\s+AASM/)
95
+
96
+ states = source.scan(/^\s*state\s+:(\w+)/).flatten
97
+ initial_state = parse_initial_state_aasm(source)
98
+ events = parse_events_from_source(source, /\Aevent\s+:(\w+)/)
99
+ callbacks = parse_state_machine_callbacks(source)
100
+
101
+ [build_unit(
102
+ identifier: "#{class_name}::aasm",
103
+ class_name: class_name,
104
+ file_path: file_path,
105
+ source: source,
106
+ gem_detected: 'aasm',
107
+ states: states,
108
+ events: events,
109
+ transitions: events.flat_map { |e| e[:transitions] },
110
+ initial_state: initial_state,
111
+ callbacks: callbacks
112
+ )]
113
+ end
114
+
115
+ # Parse initial state from AASM source.
116
+ #
117
+ # Handles both:
118
+ # state :pending, initial: true
119
+ # aasm initial: :pending do
120
+ #
121
+ # @param source [String] Ruby source code
122
+ # @return [String, nil]
123
+ def parse_initial_state_aasm(source)
124
+ match = source.match(/state\s+:(\w+)[^#\n]*initial:\s*true/)
125
+ return match[1] if match
126
+
127
+ match = source.match(/aasm\b[^#\n]*initial:\s*:(\w+)/)
128
+ match ? match[1] : nil
129
+ end
130
+
131
+ # ──────────────────────────────────────────────────────────────────────
132
+ # Statesman
133
+ # ──────────────────────────────────────────────────────────────────────
134
+
135
+ # Extract Statesman state machine units from source.
136
+ #
137
+ # @param source [String] Ruby source code
138
+ # @param class_name [String] Model class name
139
+ # @param file_path [String] File path
140
+ # @return [Array<ExtractedUnit>]
141
+ def extract_statesman_units(source, class_name, file_path)
142
+ return [] unless source.match?(/include\s+Statesman::Machine/)
143
+
144
+ states = source.scan(/^\s*state\s+:(\w+)/).flatten
145
+ initial_state = source.match(/state\s+:(\w+)[^#\n]*,\s*initial:\s*true/)&.[](1)
146
+ transitions = parse_statesman_transitions(source)
147
+ callbacks = parse_state_machine_callbacks(source)
148
+
149
+ [build_unit(
150
+ identifier: "#{class_name}::statesman",
151
+ class_name: class_name,
152
+ file_path: file_path,
153
+ source: source,
154
+ gem_detected: 'statesman',
155
+ states: states,
156
+ events: [],
157
+ transitions: transitions,
158
+ initial_state: initial_state,
159
+ callbacks: callbacks
160
+ )]
161
+ end
162
+
163
+ # Parse transitions from Statesman source.
164
+ #
165
+ # @param source [String] Ruby source code
166
+ # @return [Array<Hash>] Transitions with :from, :to, :guard keys
167
+ def parse_statesman_transitions(source)
168
+ source.scan(/transition\s+from:\s*:(\w+)\s*,\s*to:\s*:(\w+)/).map do |from, to|
169
+ { from: from, to: to, guard: nil }
170
+ end
171
+ end
172
+
173
+ # ──────────────────────────────────────────────────────────────────────
174
+ # state_machines gem
175
+ # ──────────────────────────────────────────────────────────────────────
176
+
177
+ # Extract state_machines gem state machine units from source.
178
+ #
179
+ # Handles multiple state_machine blocks for different attributes.
180
+ #
181
+ # @param source [String] Ruby source code
182
+ # @param class_name [String] Model class name
183
+ # @param file_path [String] File path
184
+ # @return [Array<ExtractedUnit>]
185
+ def extract_state_machines_units(source, class_name, file_path)
186
+ return [] unless source.match?(/\bstate_machine\b/)
187
+
188
+ units = []
189
+ source.scan(/state_machine\s+:(\w+)/) do |match|
190
+ attr_name = match[0]
191
+ block = extract_block_for_state_machine(source, attr_name)
192
+ states = block.scan(/^\s*state\s+:(\w+)/).flatten
193
+ events = parse_events_from_source(block, /\Aevent\s+:(\w+)/)
194
+ initial_state = source.match(/state_machine\s+:#{Regexp.escape(attr_name)}[^#\n]*initial:\s*:(\w+)/)&.[](1)
195
+ callbacks = parse_state_machine_callbacks(block)
196
+
197
+ units << build_unit(
198
+ identifier: "#{class_name}::state_machine_#{attr_name}",
199
+ class_name: class_name,
200
+ file_path: file_path,
201
+ source: source,
202
+ gem_detected: 'state_machines',
203
+ states: states,
204
+ events: events,
205
+ transitions: events.flat_map { |e| e[:transitions] },
206
+ initial_state: initial_state,
207
+ callbacks: callbacks
208
+ )
209
+ end
210
+
211
+ units
212
+ end
213
+
214
+ # Extract the block body for a specific state_machine attribute.
215
+ #
216
+ # Uses depth tracking (do/end balance) to find the block boundaries.
217
+ #
218
+ # @param source [String] Ruby source code
219
+ # @param attr_name [String] Attribute name (e.g., "status")
220
+ # @return [String] Block body source
221
+ def extract_block_for_state_machine(source, attr_name)
222
+ lines = source.lines
223
+ result = []
224
+ depth = 0
225
+ capturing = false
226
+
227
+ lines.each do |line|
228
+ stripped = line.strip
229
+
230
+ unless capturing
231
+ if stripped.match?(/\Astate_machine\s+:#{Regexp.escape(attr_name)}.*\bdo\b/)
232
+ capturing = true
233
+ depth = 1
234
+ end
235
+ next
236
+ end
237
+
238
+ depth += 1 if block_opener?(stripped)
239
+ depth -= 1 if stripped == 'end'
240
+ break if depth <= 0
241
+
242
+ result << line
243
+ end
244
+
245
+ result.join
246
+ end
247
+
248
+ # ──────────────────────────────────────────────────────────────────────
249
+ # Shared Parsing Helpers
250
+ # ──────────────────────────────────────────────────────────────────────
251
+
252
+ # Parse state machine callbacks (before_transition, after_transition, etc.).
253
+ #
254
+ # @param source [String] Ruby source code
255
+ # @return [Array<String>] Callback descriptions
256
+ def parse_state_machine_callbacks(source)
257
+ callbacks = []
258
+ source.scan(/(before_transition|after_transition|around_transition|after_failure)\s+(.+?)(?=\n)/) do |cb, args|
259
+ callbacks << "#{cb} #{args.strip}"
260
+ end
261
+ callbacks
262
+ end
263
+
264
+ # Parse events from source using a line-by-line depth tracker.
265
+ #
266
+ # Correctly handles nested blocks (e.g., guard lambdas) within event blocks.
267
+ # Only processes lines after detecting +event :name do+, and closes the event
268
+ # when the matching +end+ is found.
269
+ #
270
+ # @param source [String] Source code to parse
271
+ # @param event_pattern [Regexp] Pattern to match event declaration (must capture event name in group 1)
272
+ # @return [Array<Hash>] Events with :name and :transitions keys
273
+ def parse_events_from_source(source, event_pattern)
274
+ events = []
275
+ current_event = nil
276
+ depth = 0
277
+
278
+ source.each_line do |line|
279
+ stripped = line.strip
280
+ next if stripped.start_with?('#')
281
+
282
+ if depth.zero? && (m = stripped.match(event_pattern))
283
+ current_event = { name: m[1], transitions: [] }
284
+ depth = 1 if stripped.include?(' do') || stripped.end_with?('do')
285
+ next
286
+ end
287
+
288
+ next unless current_event
289
+
290
+ if (t = parse_transition_line(stripped))
291
+ current_event[:transitions] << t
292
+ end
293
+
294
+ if stripped.match?(/\bdo\b/) && depth.positive?
295
+ depth += 1
296
+ elsif stripped == 'end'
297
+ depth -= 1
298
+ if depth.zero?
299
+ events << current_event
300
+ current_event = nil
301
+ end
302
+ end
303
+ end
304
+
305
+ events
306
+ end
307
+
308
+ # Parse a single transition line into a structured hash.
309
+ #
310
+ # Handles two styles:
311
+ # - AASM/Statesman: +transitions from: :a, to: :b, guard: :method+
312
+ # - state_machines: +transition pending: :active+
313
+ #
314
+ # @param line [String] Stripped source line
315
+ # @return [Hash, nil] Transition hash with :from, :to, :guard, or nil if not a transition
316
+ def parse_transition_line(line)
317
+ if (m = line.match(/transitions?\s+from:\s*:(\w+)\s*,\s*to:\s*:(\w+)/))
318
+ guard = line.match(/guard:\s*:?(\w+[?!]?)/)&.[](1)
319
+ return { from: m[1], to: m[2], guard: guard }
320
+ end
321
+
322
+ if (m = line.match(/\Atransition\s+(\w+):\s*:(\w+)/))
323
+ return { from: m[1], to: m[2], guard: nil }
324
+ end
325
+
326
+ nil
327
+ end
328
+
329
+ # Check if a line opens a new block.
330
+ #
331
+ # Mirrors the implementation in RakeTaskExtractor to correctly handle
332
+ # trailing +if+/+unless+ modifiers vs standalone block openers.
333
+ #
334
+ # @param stripped [String] Stripped line content
335
+ # @return [Boolean]
336
+ def block_opener?(stripped)
337
+ return true if stripped.match?(/\b(do|def|case|begin|class|module|while|until|for)\b.*(?<!\bend)\s*$/)
338
+
339
+ stripped.match?(/\A(if|unless)\b/)
340
+ end
341
+
342
+ # ──────────────────────────────────────────────────────────────────────
343
+ # Unit Construction
344
+ # ──────────────────────────────────────────────────────────────────────
345
+
346
+ # Build an ExtractedUnit from parsed state machine data.
347
+ #
348
+ # @param identifier [String] Unit identifier (e.g., "Order::aasm")
349
+ # @param class_name [String] Model class name
350
+ # @param file_path [String] File path
351
+ # @param source [String] Model source code
352
+ # @param gem_detected [String] Which state machine gem was detected
353
+ # @param states [Array<String>] Detected state names
354
+ # @param events [Array<Hash>] Detected events with transitions
355
+ # @param transitions [Array<Hash>] Flat list of all transitions
356
+ # @param initial_state [String, nil] Initial state name
357
+ # @param callbacks [Array<String>] Detected callbacks
358
+ # @return [ExtractedUnit]
359
+ def build_unit(identifier:, class_name:, file_path:, source:, gem_detected:,
360
+ states:, events:, transitions:, initial_state:, callbacks:)
361
+ unit = ExtractedUnit.new(
362
+ type: :state_machine,
363
+ identifier: identifier,
364
+ file_path: file_path
365
+ )
366
+
367
+ unit.namespace = extract_namespace(class_name)
368
+ unit.source_code = "# State machine (#{gem_detected}) for #{class_name}\n#{source}"
369
+ unit.metadata = {
370
+ gem_detected: gem_detected,
371
+ states: states,
372
+ events: events,
373
+ transitions: transitions,
374
+ initial_state: initial_state,
375
+ callbacks: callbacks,
376
+ model_name: class_name
377
+ }
378
+ unit.dependencies = build_dependencies(class_name, source)
379
+ unit
380
+ end
381
+
382
+ # Build dependencies for a state machine unit.
383
+ #
384
+ # Always includes a reference to the host model. Also scans source for
385
+ # service and job references that may be invoked in callbacks.
386
+ #
387
+ # @param class_name [String] Model class name
388
+ # @param source [String] Ruby source code
389
+ # @return [Array<Hash>]
390
+ def build_dependencies(class_name, source)
391
+ deps = [{ type: :model, target: class_name, via: :state_machine }]
392
+ deps.concat(scan_service_dependencies(source, via: :state_machine_callback))
393
+ deps.concat(scan_job_dependencies(source, via: :state_machine_callback))
394
+ deps.uniq { |d| [d[:type], d[:target]] }
395
+ end
396
+ end
397
+ end
398
+ end
@@ -0,0 +1,225 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # TestMappingExtractor maps test files to the units they exercise.
9
+ #
10
+ # Scans spec/**/*_spec.rb (RSpec) and test/**/*_test.rb (Minitest) to
11
+ # produce one ExtractedUnit per test file. Extracts subject class,
12
+ # test count, shared example usage, and test framework type.
13
+ #
14
+ # Units are linked to the code under test via :test_coverage dependencies,
15
+ # inferred from the subject class name and file directory structure.
16
+ #
17
+ # @example
18
+ # extractor = TestMappingExtractor.new
19
+ # units = extractor.extract_all
20
+ # spec = units.find { |u| u.identifier == "spec/models/user_spec.rb" }
21
+ # spec.metadata[:subject_class] # => "User"
22
+ # spec.metadata[:test_count] # => 12
23
+ #
24
+ class TestMappingExtractor
25
+ include SharedUtilityMethods
26
+ include SharedDependencyScanner
27
+
28
+ RSPEC_GLOB = 'spec/**/*_spec.rb'
29
+ MINITEST_GLOB = 'test/**/*_test.rb'
30
+
31
+ def initialize
32
+ @rails_root = Rails.root
33
+ end
34
+
35
+ # Extract all test mapping units from spec/ and test/ directories.
36
+ #
37
+ # @return [Array<ExtractedUnit>] List of test mapping units
38
+ def extract_all
39
+ rspec_units + minitest_units
40
+ end
41
+
42
+ # Extract a single test file into a test mapping unit.
43
+ #
44
+ # @param file_path [String] Absolute path to the spec or test file
45
+ # @return [ExtractedUnit, nil] The extracted unit or nil on error
46
+ def extract_test_file(file_path)
47
+ source = File.read(file_path)
48
+ framework = detect_framework(file_path)
49
+ relative_path = file_path.sub("#{@rails_root}/", '')
50
+
51
+ unit = ExtractedUnit.new(
52
+ type: :test_mapping,
53
+ identifier: relative_path,
54
+ file_path: file_path
55
+ )
56
+
57
+ unit.source_code = source
58
+ unit.metadata = extract_metadata(source, file_path, framework)
59
+ unit.dependencies = extract_dependencies(unit.metadata[:subject_class], unit.metadata[:test_type])
60
+
61
+ unit
62
+ rescue StandardError => e
63
+ Rails.logger.error("Failed to extract test mapping from #{file_path}: #{e.message}")
64
+ nil
65
+ end
66
+
67
+ private
68
+
69
+ def rspec_units
70
+ Dir[@rails_root.join(RSPEC_GLOB)].filter_map { |f| extract_test_file(f) }
71
+ end
72
+
73
+ def minitest_units
74
+ Dir[@rails_root.join(MINITEST_GLOB)].filter_map { |f| extract_test_file(f) }
75
+ end
76
+
77
+ # Determine test framework from file path.
78
+ #
79
+ # @param file_path [String] Path to the test file
80
+ # @return [Symbol] :rspec or :minitest
81
+ def detect_framework(file_path)
82
+ file_path.end_with?('_spec.rb') ? :rspec : :minitest
83
+ end
84
+
85
+ # Extract all metadata from a test file.
86
+ #
87
+ # @param source [String] File source code
88
+ # @param file_path [String] Absolute path to the file
89
+ # @param framework [Symbol] :rspec or :minitest
90
+ # @return [Hash]
91
+ def extract_metadata(source, file_path, framework)
92
+ subject_class = extract_subject_class(source, framework)
93
+ test_type = infer_test_type(file_path)
94
+
95
+ {
96
+ subject_class: subject_class,
97
+ test_count: count_tests(source, framework),
98
+ test_type: test_type,
99
+ test_framework: framework,
100
+ shared_examples: extract_shared_examples_defined(source),
101
+ shared_examples_used: extract_shared_examples_used(source)
102
+ }
103
+ end
104
+
105
+ # Extract the primary subject class under test.
106
+ #
107
+ # For RSpec: reads the top-level describe/RSpec.describe argument.
108
+ # For Minitest: reads the class name and strips the "Test" suffix.
109
+ #
110
+ # @param source [String] File source code
111
+ # @param framework [Symbol] :rspec or :minitest
112
+ # @return [String, nil] Class name or nil if not detected
113
+ def extract_subject_class(source, framework)
114
+ framework == :rspec ? extract_rspec_subject(source) : extract_minitest_subject(source)
115
+ end
116
+
117
+ # Extract subject class from top-level describe in an RSpec file.
118
+ #
119
+ # Tries constant reference first (describe User do), then string/symbol
120
+ # form (describe 'User' do). Handles both RSpec.describe and bare describe.
121
+ #
122
+ # @param source [String] RSpec file source code
123
+ # @return [String, nil]
124
+ def extract_rspec_subject(source)
125
+ # Constant reference: describe User do, RSpec.describe UsersController do
126
+ match = source.match(/^\s*(?:RSpec\.)?describe\s+([\w:]+)\s/)
127
+ return match[1] if match
128
+
129
+ # String/symbol form: describe 'User' do
130
+ match = source.match(/^\s*(?:RSpec\.)?describe\s+['"]([^'"]+)['"]\s/)
131
+ match ? match[1] : nil
132
+ end
133
+
134
+ # Extract subject class from Minitest test class name.
135
+ #
136
+ # Strips conventional "Test" suffix: "UserTest" => "User".
137
+ #
138
+ # @param source [String] Minitest file source code
139
+ # @return [String, nil]
140
+ def extract_minitest_subject(source)
141
+ match = source.match(/class\s+(\w+Test)\s*</)
142
+ return nil unless match
143
+
144
+ match[1].sub(/Test\z/, '')
145
+ end
146
+
147
+ # Count test examples in the file.
148
+ #
149
+ # For RSpec: counts it/specify/example blocks.
150
+ # For Minitest: counts test "..." strings and def test_ methods.
151
+ #
152
+ # @param source [String] File source code
153
+ # @param framework [Symbol] :rspec or :minitest
154
+ # @return [Integer]
155
+ def count_tests(source, framework)
156
+ if framework == :rspec
157
+ source.scan(/^\s*(?:it|specify|example)\s+['"]/).size
158
+ else
159
+ source.scan(/^\s*test\s+['"]/).size +
160
+ source.scan(/^\s*def\s+test_\w/).size
161
+ end
162
+ end
163
+
164
+ # Extract names of shared examples defined in the file.
165
+ #
166
+ # @param source [String] File source code
167
+ # @return [Array<String>]
168
+ def extract_shared_examples_defined(source)
169
+ source.scan(/^\s*shared_examples(?:_for)?\s+['"]([^'"]+)['"]/).flatten
170
+ end
171
+
172
+ # Extract names of shared examples used (included) in the file.
173
+ #
174
+ # @param source [String] File source code
175
+ # @return [Array<String>]
176
+ def extract_shared_examples_used(source)
177
+ source.scan(/^\s*(?:include_examples|it_behaves_like)\s+['"]([^'"]+)['"]/).flatten
178
+ end
179
+
180
+ # Infer test type from the directory structure of the file path.
181
+ #
182
+ # @param file_path [String] Absolute path to the test file
183
+ # @return [Symbol] One of :model, :controller, :request, :system, :unit
184
+ def infer_test_type(file_path)
185
+ case file_path
186
+ when %r{/spec/models/}, %r{/test/models/} then :model
187
+ when %r{/spec/controllers/}, %r{/test/controllers/} then :controller
188
+ when %r{/spec/requests/}, %r{/test/integration/} then :request
189
+ when %r{/spec/system/}, %r{/test/system/} then :system
190
+ else :unit
191
+ end
192
+ end
193
+
194
+ # Extract dependencies by linking the test file to the unit under test.
195
+ #
196
+ # Dependency type is inferred from the subject class name suffix.
197
+ # Falls back to :model when the suffix is ambiguous.
198
+ #
199
+ # @param subject_class [String, nil] The class under test
200
+ # @param test_type [Symbol] The inferred test file category
201
+ # @return [Array<Hash>]
202
+ def extract_dependencies(subject_class, test_type)
203
+ return [] unless subject_class
204
+
205
+ target_type = case subject_class
206
+ when /Controller\z/ then :controller
207
+ when /Job\z/ then :job
208
+ when /Mailer\z/ then :mailer
209
+ when /Service\z/, /Interactor\z/ then :service
210
+ else infer_type_from_test_type(test_type)
211
+ end
212
+
213
+ [{ type: target_type, target: subject_class, via: :test_coverage }]
214
+ end
215
+
216
+ # Infer dependency type from test_type when class name suffix is ambiguous.
217
+ #
218
+ # @param test_type [Symbol] The test type
219
+ # @return [Symbol]
220
+ def infer_type_from_test_type(test_type)
221
+ test_type == :controller ? :controller : :model
222
+ end
223
+ end
224
+ end
225
+ end