codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,204 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+
5
+ module CodebaseIndex
6
+ module Extractors
7
+ # EngineExtractor handles Rails engine and mountable gem extraction via runtime introspection.
8
+ #
9
+ # Reads `Rails::Engine.subclasses` to discover engines, then inspects each engine's
10
+ # routes, mount point, and configuration. Each engine becomes one ExtractedUnit with
11
+ # metadata about its name, root path, mount point, route count, and isolation.
12
+ #
13
+ # @example
14
+ # extractor = EngineExtractor.new
15
+ # units = extractor.extract_all
16
+ # devise = units.find { |u| u.identifier == "Devise::Engine" }
17
+ #
18
+ class EngineExtractor
19
+ include SharedUtilityMethods
20
+
21
+ def initialize
22
+ # No directories to scan — this is runtime introspection
23
+ end
24
+
25
+ # Extract all Rails engines as ExtractedUnits
26
+ #
27
+ # @return [Array<ExtractedUnit>] List of engine units
28
+ def extract_all
29
+ return [] unless engines_available?
30
+
31
+ engines = engine_subclasses
32
+ return [] if engines.empty?
33
+
34
+ mount_map = build_mount_map
35
+ engines.filter_map { |engine| extract_engine(engine, mount_map) }
36
+ end
37
+
38
+ private
39
+
40
+ # Check if Rails::Engine and the application routing table are available.
41
+ #
42
+ # @return [Boolean]
43
+ def engines_available?
44
+ defined?(Rails::Engine) &&
45
+ Rails.respond_to?(:application) &&
46
+ Rails.application.respond_to?(:routes)
47
+ end
48
+
49
+ # Retrieve Engine subclasses, compatible with Ruby 3.0+.
50
+ # Class#subclasses was added in Ruby 3.1; fall back to descendants filtering.
51
+ #
52
+ # @return [Array<Class>]
53
+ def engine_subclasses
54
+ if Rails::Engine.respond_to?(:subclasses)
55
+ Rails::Engine.subclasses
56
+ else
57
+ ObjectSpace.each_object(Class).select { |klass| klass < Rails::Engine }
58
+ end
59
+ end
60
+
61
+ # Build a mapping from engine class to mounted path by scanning app routes.
62
+ #
63
+ # @return [Hash{Class => String}] Engine class to mount path
64
+ def build_mount_map
65
+ map = {}
66
+ Rails.application.routes.routes.each do |route|
67
+ app = route.app
68
+ next unless engine_class?(app)
69
+
70
+ path = extract_mount_path(route)
71
+ map[app] = path if path
72
+ rescue StandardError
73
+ next
74
+ end
75
+ map
76
+ rescue StandardError
77
+ {}
78
+ end
79
+
80
+ # Check if an object is a Rails::Engine subclass.
81
+ #
82
+ # Uses duck-typing: checks for engine_name method which is defined on all
83
+ # Rails::Engine subclasses. Falls back to class hierarchy check.
84
+ #
85
+ # @param app [Object] The route app object
86
+ # @return [Boolean]
87
+ def engine_class?(app)
88
+ return true if app.is_a?(Class) && defined?(Rails::Engine) && app < Rails::Engine
89
+ return true if app.respond_to?(:engine_name) && app.respond_to?(:routes)
90
+
91
+ false
92
+ end
93
+
94
+ # Extract the mount path string from a route object.
95
+ #
96
+ # @param route [ActionDispatch::Journey::Route]
97
+ # @return [String, nil]
98
+ def extract_mount_path(route)
99
+ return nil unless route.respond_to?(:path) && route.path
100
+
101
+ spec = route.path
102
+ spec = spec.spec if spec.respond_to?(:spec)
103
+ path = spec.to_s
104
+ path.empty? ? nil : path
105
+ end
106
+
107
+ # Extract a single engine into an ExtractedUnit.
108
+ #
109
+ # @param engine [Class] A Rails::Engine subclass
110
+ # @param mount_map [Hash] Engine-to-path mapping
111
+ # @return [ExtractedUnit, nil]
112
+ def extract_engine(engine, mount_map)
113
+ name = engine.name
114
+ engine_name = engine.engine_name
115
+ root_path = engine.root.to_s
116
+ route_count = count_engine_routes(engine)
117
+ mounted_path = mount_map[engine]
118
+ isolated = engine.respond_to?(:isolated?) ? engine.isolated? : false
119
+ controllers = extract_engine_controllers(engine)
120
+
121
+ unit = ExtractedUnit.new(
122
+ type: :engine,
123
+ identifier: name,
124
+ file_path: nil
125
+ )
126
+
127
+ unit.namespace = extract_namespace(name)
128
+ unit.source_code = build_engine_source(name, engine_name, root_path, mounted_path, route_count, isolated)
129
+ unit.metadata = {
130
+ engine_name: engine_name,
131
+ root_path: root_path,
132
+ mounted_path: mounted_path,
133
+ route_count: route_count,
134
+ isolate_namespace: isolated,
135
+ controllers: controllers
136
+ }
137
+ unit.dependencies = build_engine_dependencies(controllers)
138
+
139
+ unit
140
+ rescue StandardError => e
141
+ Rails.logger.error("Failed to extract engine #{engine.name}: #{e.message}")
142
+ nil
143
+ end
144
+
145
+ # Count routes defined by an engine.
146
+ #
147
+ # @param engine [Class] A Rails::Engine subclass
148
+ # @return [Integer]
149
+ def count_engine_routes(engine)
150
+ engine.routes.routes.count
151
+ rescue StandardError
152
+ 0
153
+ end
154
+
155
+ # Extract controller names from engine routes.
156
+ #
157
+ # @param engine [Class] A Rails::Engine subclass
158
+ # @return [Array<String>] Controller class names
159
+ def extract_engine_controllers(engine)
160
+ controllers = Set.new
161
+ engine.routes.routes.each do |route|
162
+ defaults = route.respond_to?(:defaults) ? route.defaults : {}
163
+ controller = defaults[:controller]
164
+ controllers << "#{controller.camelize}Controller" if controller
165
+ rescue StandardError
166
+ next
167
+ end
168
+ controllers.to_a
169
+ rescue StandardError
170
+ []
171
+ end
172
+
173
+ # Build a human-readable source representation of the engine.
174
+ #
175
+ # @param name [String] Engine class name
176
+ # @param engine_name [String] Engine short name
177
+ # @param root_path [String] Engine root directory
178
+ # @param mounted_path [String, nil] Mount path in host app
179
+ # @param route_count [Integer] Number of routes
180
+ # @param isolated [Boolean] Whether engine uses isolate_namespace
181
+ # @return [String]
182
+ def build_engine_source(name, engine_name, root_path, mounted_path, route_count, isolated)
183
+ lines = []
184
+ lines << "# Engine: #{name}"
185
+ lines << "# Name: #{engine_name}"
186
+ lines << "# Root: #{root_path}"
187
+ lines << "# Mounted at: #{mounted_path || '(not mounted)'}"
188
+ lines << "# Routes: #{route_count}"
189
+ lines << "# Isolated namespace: #{isolated}"
190
+ lines.join("\n")
191
+ end
192
+
193
+ # Build dependencies linking engine to its controllers.
194
+ #
195
+ # @param controllers [Array<String>] Controller class names
196
+ # @return [Array<Hash>]
197
+ def build_engine_dependencies(controllers)
198
+ controllers.map do |controller|
199
+ { type: :controller, target: controller, via: :engine_route }
200
+ end
201
+ end
202
+ end
203
+ end
204
+ end
@@ -0,0 +1,211 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # EventExtractor discovers event publishing and subscribing patterns across the app.
9
+ #
10
+ # Scans +app/**/*.rb+ for two event system conventions:
11
+ # - ActiveSupport::Notifications: +instrument+ (publish) and +subscribe+ (consume)
12
+ # - Wisper: +publish+/+broadcast+ (publish) and +on(:event_name)+ (subscribe)
13
+ #
14
+ # Uses a two-pass approach:
15
+ # 1. Scan all files, collecting publishers and subscribers per event name
16
+ # 2. Merge by event name → one ExtractedUnit per unique event
17
+ #
18
+ # @example
19
+ # extractor = EventExtractor.new
20
+ # units = extractor.extract_all
21
+ # event = units.find { |u| u.identifier == "order.completed" }
22
+ # event.metadata[:publishers] # => ["app/services/order_service.rb"]
23
+ # event.metadata[:subscribers] # => ["app/listeners/order_listener.rb"]
24
+ # event.metadata[:pattern] # => :active_support
25
+ #
26
+ class EventExtractor
27
+ include SharedUtilityMethods
28
+ include SharedDependencyScanner
29
+
30
+ APP_DIRECTORIES = %w[app].freeze
31
+
32
+ def initialize
33
+ @directories = APP_DIRECTORIES.map { |d| Rails.root.join(d) }.select(&:directory?)
34
+ end
35
+
36
+ # Extract all event units using a two-pass approach.
37
+ #
38
+ # Pass 1: Collect publish/subscribe references across all app files.
39
+ # Pass 2: Merge by event name — one ExtractedUnit per unique event.
40
+ #
41
+ # @return [Array<ExtractedUnit>] One unit per unique event name
42
+ def extract_all
43
+ event_map = {}
44
+
45
+ @directories.flat_map { |dir| Dir[dir.join('**/*.rb')] }.each do |file_path|
46
+ scan_file(file_path, event_map)
47
+ end
48
+
49
+ event_map.filter_map { |event_name, data| build_unit(event_name, data) }
50
+ end
51
+
52
+ # Scan a single file for event publishing and subscribing patterns.
53
+ #
54
+ # Mutates +event_map+ in place, registering publishers and subscribers.
55
+ #
56
+ # @param file_path [String] Path to the Ruby file
57
+ # @param event_map [Hash] Mutable map of event_name => {publishers:, subscribers:, pattern:}
58
+ # @return [void]
59
+ def scan_file(file_path, event_map)
60
+ source = File.read(file_path)
61
+ scan_active_support_notifications(source, file_path, event_map)
62
+ scan_wisper_patterns(source, file_path, event_map)
63
+ rescue StandardError => e
64
+ Rails.logger.error("Failed to scan #{file_path} for events: #{e.message}")
65
+ end
66
+
67
+ private
68
+
69
+ # ──────────────────────────────────────────────────────────────────────
70
+ # Pattern Detection
71
+ # ──────────────────────────────────────────────────────────────────────
72
+
73
+ # Scan for ActiveSupport::Notifications instrument and subscribe patterns.
74
+ #
75
+ # @param source [String] Ruby source code
76
+ # @param file_path [String] File path
77
+ # @param event_map [Hash] Mutable event map
78
+ # @return [void]
79
+ def scan_active_support_notifications(source, file_path, event_map)
80
+ source.scan(/ActiveSupport::Notifications\.instrument\s*\(\s*["']([^"']+)["']/) do |m|
81
+ register_publisher(event_map, m[0], file_path, :active_support)
82
+ end
83
+
84
+ source.scan(/ActiveSupport::Notifications\.subscribe\s*\(\s*["']([^"']+)["']/) do |m|
85
+ register_subscriber(event_map, m[0], file_path, :active_support)
86
+ end
87
+ end
88
+
89
+ # Scan for Wisper event patterns.
90
+ #
91
+ # Publishers must have Wisper context in the file (include Wisper or use
92
+ # Wisper directly). Subscribers are detected via +.on(:event_name)+ chains.
93
+ #
94
+ # @param source [String] Ruby source code
95
+ # @param file_path [String] File path
96
+ # @param event_map [Hash] Mutable event map
97
+ # @return [void]
98
+ def scan_wisper_patterns(source, file_path, event_map)
99
+ if source.match?(/include\s+Wisper/)
100
+ source.scan(/\b(?:publish|broadcast)\s+:(\w+)/) do |m|
101
+ register_publisher(event_map, m[0], file_path, :wisper)
102
+ end
103
+ end
104
+
105
+ source.scan(/\.on\s*\(\s*:(\w+)/) do |m|
106
+ register_subscriber(event_map, m[0], file_path, :wisper)
107
+ end
108
+ end
109
+
110
+ # ──────────────────────────────────────────────────────────────────────
111
+ # Event Map Mutation
112
+ # ──────────────────────────────────────────────────────────────────────
113
+
114
+ # Register a publisher for an event name.
115
+ #
116
+ # @param event_map [Hash] Mutable event map
117
+ # @param event_name [String] Event name
118
+ # @param file_path [String] Publisher file path
119
+ # @param pattern [Symbol] :active_support or :wisper
120
+ # @return [void]
121
+ def register_publisher(event_map, event_name, file_path, pattern)
122
+ entry = event_map[event_name] ||= { publishers: [], subscribers: [], pattern: pattern }
123
+ entry[:publishers] << file_path unless entry[:publishers].include?(file_path)
124
+ end
125
+
126
+ # Register a subscriber for an event name.
127
+ #
128
+ # @param event_map [Hash] Mutable event map
129
+ # @param event_name [String] Event name
130
+ # @param file_path [String] Subscriber file path
131
+ # @param pattern [Symbol] :active_support or :wisper
132
+ # @return [void]
133
+ def register_subscriber(event_map, event_name, file_path, pattern)
134
+ entry = event_map[event_name] ||= { publishers: [], subscribers: [], pattern: pattern }
135
+ entry[:subscribers] << file_path unless entry[:subscribers].include?(file_path)
136
+ end
137
+
138
+ # ──────────────────────────────────────────────────────────────────────
139
+ # Unit Construction
140
+ # ──────────────────────────────────────────────────────────────────────
141
+
142
+ # Build an ExtractedUnit from accumulated event data.
143
+ #
144
+ # Returns nil if the event has neither publishers nor subscribers (no-op).
145
+ #
146
+ # @param event_name [String] Event name (used as the unit identifier)
147
+ # @param data [Hash] Accumulated publishers/subscribers/pattern
148
+ # @return [ExtractedUnit, nil]
149
+ def build_unit(event_name, data)
150
+ return nil if data[:publishers].empty? && data[:subscribers].empty?
151
+
152
+ file_path = data[:publishers].first || data[:subscribers].first
153
+ all_paths = (data[:publishers] + data[:subscribers]).uniq
154
+ combined_source = load_source_files(all_paths)
155
+
156
+ unit = ExtractedUnit.new(
157
+ type: :event,
158
+ identifier: event_name,
159
+ file_path: file_path
160
+ )
161
+
162
+ unit.source_code = build_source_annotation(event_name, data)
163
+ unit.metadata = {
164
+ event_name: event_name,
165
+ publishers: data[:publishers],
166
+ subscribers: data[:subscribers],
167
+ pattern: data[:pattern],
168
+ publisher_count: data[:publishers].size,
169
+ subscriber_count: data[:subscribers].size
170
+ }
171
+ unit.dependencies = build_dependencies(combined_source)
172
+ unit
173
+ end
174
+
175
+ # Load source from multiple files for dependency scanning.
176
+ #
177
+ # Silently skips files that cannot be read.
178
+ #
179
+ # @param file_paths [Array<String>] File paths to read
180
+ # @return [String] Combined source
181
+ def load_source_files(file_paths)
182
+ file_paths.filter_map do |path|
183
+ File.read(path)
184
+ rescue StandardError
185
+ nil
186
+ end.join("\n")
187
+ end
188
+
189
+ # Build annotated source annotation for the event unit.
190
+ #
191
+ # @param event_name [String] Event name
192
+ # @param data [Hash] Event data with publishers and subscribers
193
+ # @return [String]
194
+ def build_source_annotation(event_name, data)
195
+ lines = ["# Event: #{event_name} (#{data[:pattern]})"]
196
+ lines << "# Publishers: #{data[:publishers].join(', ')}" if data[:publishers].any?
197
+ lines << "# Subscribers: #{data[:subscribers].join(', ')}" if data[:subscribers].any?
198
+ lines.join("\n")
199
+ end
200
+
201
+ # Build dependencies by scanning combined source of publisher/subscriber files.
202
+ #
203
+ # @param combined_source [String] Combined source from all related files
204
+ # @return [Array<Hash>]
205
+ def build_dependencies(combined_source)
206
+ deps = scan_common_dependencies(combined_source)
207
+ deps.uniq { |d| [d[:type], d[:target]] }
208
+ end
209
+ end
210
+ end
211
+ end
@@ -0,0 +1,289 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # FactoryExtractor handles extraction of FactoryBot factory definitions.
9
+ #
10
+ # Scans spec/factories/ and test/factories/ for FactoryBot definitions
11
+ # and produces one ExtractedUnit per factory block. Uses a line-by-line
12
+ # state machine parser (never evals factory files).
13
+ #
14
+ # Supports: basic factories, explicit class override, traits, associations,
15
+ # sequences, callbacks, parent inheritance, transient attributes, and
16
+ # nested factory definitions (each becomes its own unit).
17
+ #
18
+ # @example
19
+ # extractor = FactoryExtractor.new
20
+ # units = extractor.extract_all
21
+ # user = units.find { |u| u.identifier == "user" }
22
+ # user.metadata[:traits] # => ["admin", "with_avatar"]
23
+ #
24
+ class FactoryExtractor
25
+ include SharedUtilityMethods
26
+ include SharedDependencyScanner
27
+
28
+ FACTORY_DIRECTORIES = %w[spec/factories test/factories].freeze
29
+
30
+ def initialize
31
+ @directories = FACTORY_DIRECTORIES.map { |d| Rails.root.join(d) }.select(&:directory?)
32
+ end
33
+
34
+ # Extract all factory definitions from all discovered directories.
35
+ #
36
+ # @return [Array<ExtractedUnit>] List of factory units
37
+ def extract_all
38
+ @directories.flat_map do |dir|
39
+ Dir[dir.join('**/*.rb')].flat_map { |file| extract_factory_file(file) }
40
+ end
41
+ end
42
+
43
+ # Extract factory definitions from a single factory file.
44
+ #
45
+ # Returns an Array because each file may contain multiple factory definitions.
46
+ #
47
+ # @param file_path [String] Path to the factory file
48
+ # @return [Array<ExtractedUnit>] List of factory units
49
+ def extract_factory_file(file_path)
50
+ return [] unless file_path.to_s.end_with?('.rb')
51
+
52
+ source = File.read(file_path)
53
+ factories = parse_factories(source)
54
+
55
+ factories.map { |factory_data| build_unit(factory_data, file_path, source) }
56
+ rescue StandardError => e
57
+ Rails.logger.error("Failed to extract factories from #{file_path}: #{e.message}")
58
+ []
59
+ end
60
+
61
+ private
62
+
63
+ # Parse factory definitions from source using a line-by-line state machine.
64
+ #
65
+ # Tracks factory nesting, traits, associations, sequences, callbacks, and
66
+ # transient attributes. Each factory block (including nested factories within
67
+ # a parent factory) produces one entry in the returned array.
68
+ #
69
+ # @param source [String] Factory file source code
70
+ # @return [Array<Hash>] Parsed factory data hashes
71
+ def parse_factories(source)
72
+ completed = []
73
+ factory_stack = []
74
+ depth = 0
75
+ in_transient = false
76
+ transient_depth = nil
77
+
78
+ source.lines.each_with_index do |line, index|
79
+ stripped = line.strip
80
+
81
+ # Factory definition — push new factory onto stack
82
+ if (factory_data = match_factory(stripped, depth, index + 1))
83
+ factory_stack.push(factory_data)
84
+ depth += 1
85
+ next
86
+ end
87
+
88
+ # Trait definition — record trait in current factory, open block
89
+ if (trait_match = stripped.match(/\Atrait\s+:(\w+)\s+do/))
90
+ factory_stack.last[:traits] << trait_match[1] if factory_stack.any?
91
+ depth += 1
92
+ next
93
+ end
94
+
95
+ # Transient block — start collecting transient attributes
96
+ if stripped.match?(/\Atransient\s+do/)
97
+ in_transient = true
98
+ transient_depth = depth
99
+ depth += 1
100
+ next
101
+ end
102
+
103
+ # Collect transient attribute names (word { ... } or word do)
104
+ if in_transient && factory_stack.any? && (attr_match = stripped.match(/\A(\w+)\s*(?:\{|do\b)/))
105
+ factory_stack.last[:transient_attributes] << attr_match[1]
106
+ end
107
+
108
+ # Association
109
+ if factory_stack.any? && (assoc_match = stripped.match(/\Aassociation\s+:(\w+)/))
110
+ factory_stack.last[:associations] << assoc_match[1]
111
+ end
112
+
113
+ # Sequence
114
+ if factory_stack.any? && (seq_match = stripped.match(/\Asequence\s*\(:(\w+)\)/))
115
+ factory_stack.last[:sequences] << seq_match[1]
116
+ end
117
+
118
+ # Callbacks: after(:hook), before(:hook), after_stub(:hook)
119
+ if factory_stack.any? && (cb_match = stripped.match(/\A(?:after|before|after_stub)\s*\([:'"](\w+)/))
120
+ factory_stack.last[:callbacks] << cb_match[1]
121
+ end
122
+
123
+ # Generic block openers — factory/trait/transient already handled above with next
124
+ if block_opener?(stripped)
125
+ depth += 1
126
+ next
127
+ end
128
+
129
+ next unless stripped == 'end'
130
+
131
+ depth -= 1
132
+
133
+ # Close transient block if we've returned to the depth where it was opened
134
+ if in_transient && depth == transient_depth
135
+ in_transient = false
136
+ transient_depth = nil
137
+ end
138
+
139
+ # Close factory if top factory was opened at this depth
140
+ next unless factory_stack.any? && depth == factory_stack.last[:open_depth]
141
+
142
+ completed << factory_stack.pop
143
+ end
144
+
145
+ completed
146
+ end
147
+
148
+ # Try to match a factory definition line and return initialized factory data.
149
+ #
150
+ # Handles:
151
+ # factory :name do
152
+ # factory :name, class: ClassName do
153
+ # factory :name, class: 'ClassName' do
154
+ # factory :name, parent: :other do
155
+ #
156
+ # @param line [String] Stripped source line
157
+ # @param depth [Integer] Current block depth when factory would be opened
158
+ # @param line_number [Integer] 1-based line number
159
+ # @return [Hash, nil] Initialized factory data or nil if not a factory line
160
+ def match_factory(line, depth, line_number)
161
+ return nil unless line.match?(/\Afactory\s+:/) && line.match?(/\bdo\b/)
162
+
163
+ name_match = line.match(/\Afactory\s+:(\w+)/)
164
+ return nil unless name_match
165
+
166
+ name = name_match[1]
167
+ options = {}
168
+
169
+ if (class_match = line.match(/\bclass:\s*['"]?([\w:]+)['"]?/))
170
+ options[:class_name] = class_match[1]
171
+ end
172
+
173
+ if (parent_match = line.match(/\bparent:\s*:(\w+)/))
174
+ options[:parent] = parent_match[1]
175
+ end
176
+
177
+ {
178
+ name: name,
179
+ class_name: options[:class_name] || classify(name),
180
+ parent_factory: options[:parent],
181
+ open_depth: depth,
182
+ line_number: line_number,
183
+ traits: [],
184
+ associations: [],
185
+ sequences: [],
186
+ callbacks: [],
187
+ transient_attributes: []
188
+ }
189
+ end
190
+
191
+ # Convert a snake_case factory name to a CamelCase class name.
192
+ #
193
+ # @param name [String] Snake_case factory name (e.g., "admin_user")
194
+ # @return [String] CamelCase class name (e.g., "AdminUser")
195
+ def classify(name)
196
+ name.split('_').map(&:capitalize).join
197
+ end
198
+
199
+ # Check if a stripped line opens a new block.
200
+ #
201
+ # Excludes factory, trait, and transient lines — those are handled
202
+ # explicitly in the main parser loop with depth tracking of their own.
203
+ #
204
+ # @param stripped [String] Stripped line content
205
+ # @return [Boolean]
206
+ def block_opener?(stripped)
207
+ return false if stripped.match?(/\Afactory\s+:/)
208
+ return false if stripped.match?(/\Atrait\s+:/)
209
+ return false if stripped.match?(/\Atransient\s+do/)
210
+ return true if stripped.match?(/\b(do|def|case|begin|class|module|while|until|for)\b.*(?<!\bend)\s*$/)
211
+
212
+ stripped.match?(/\A(if|unless)\b/)
213
+ end
214
+
215
+ # Build an ExtractedUnit from parsed factory data.
216
+ #
217
+ # @param factory_data [Hash] Parsed factory data
218
+ # @param file_path [String] Path to the factory file
219
+ # @param file_source [String] Full file source
220
+ # @return [ExtractedUnit]
221
+ def build_unit(factory_data, file_path, file_source)
222
+ unit = ExtractedUnit.new(
223
+ type: :factory,
224
+ identifier: factory_data[:name],
225
+ file_path: file_path
226
+ )
227
+
228
+ unit.source_code = build_source_annotation(factory_data, file_source)
229
+ unit.metadata = build_metadata(factory_data)
230
+ unit.dependencies = extract_dependencies(factory_data)
231
+
232
+ unit
233
+ end
234
+
235
+ # Build annotated source code for the unit.
236
+ #
237
+ # @param factory_data [Hash] Parsed factory data
238
+ # @param file_source [String] Full file source
239
+ # @return [String]
240
+ def build_source_annotation(factory_data, file_source)
241
+ header = "# Factory: #{factory_data[:name]} (model: #{factory_data[:class_name]})"
242
+ header += "\n# Parent: #{factory_data[:parent_factory]}" if factory_data[:parent_factory]
243
+ "#{header}\n#{file_source}"
244
+ end
245
+
246
+ # Build metadata hash for the unit.
247
+ #
248
+ # @param factory_data [Hash] Parsed factory data
249
+ # @return [Hash]
250
+ def build_metadata(factory_data)
251
+ {
252
+ factory_name: factory_data[:name],
253
+ model_class: factory_data[:class_name],
254
+ traits: factory_data[:traits],
255
+ associations: factory_data[:associations],
256
+ sequences: factory_data[:sequences],
257
+ parent_factory: factory_data[:parent_factory],
258
+ callbacks: factory_data[:callbacks].uniq,
259
+ transient_attributes: factory_data[:transient_attributes]
260
+ }
261
+ end
262
+
263
+ # Extract dependencies from factory data.
264
+ #
265
+ # Creates:
266
+ # - :model dependency (via :factory_for) linking to the modeled class
267
+ # - :factory dependency (via :factory_parent) for parent factory inheritance
268
+ # - :factory dependencies (via :factory_association) for each association
269
+ #
270
+ # @param factory_data [Hash] Parsed factory data
271
+ # @return [Array<Hash>]
272
+ def extract_dependencies(factory_data)
273
+ deps = []
274
+
275
+ deps << { type: :model, target: factory_data[:class_name], via: :factory_for }
276
+
277
+ if factory_data[:parent_factory]
278
+ deps << { type: :factory, target: factory_data[:parent_factory], via: :factory_parent }
279
+ end
280
+
281
+ factory_data[:associations].each do |assoc|
282
+ deps << { type: :factory, target: assoc, via: :factory_association }
283
+ end
284
+
285
+ deps.uniq { |d| [d[:type], d[:target]] }
286
+ end
287
+ end
288
+ end
289
+ end