codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,331 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+
5
+ module CodebaseIndex
6
+ module Extractors
7
+ # ScheduledJobExtractor handles scheduled/recurring job configuration extraction.
8
+ #
9
+ # Scans three schedule file formats to extract one unit per scheduled entry:
10
+ # - `config/recurring.yml` — Solid Queue recurring tasks
11
+ # - `config/sidekiq_cron.yml` — Sidekiq-Cron scheduled jobs
12
+ # - `config/schedule.rb` — Whenever DSL
13
+ #
14
+ # Each scheduled entry becomes its own ExtractedUnit with type `:scheduled_job`.
15
+ # Identifiers are prefixed with "scheduled:" to avoid collision with JobExtractor units.
16
+ #
17
+ # @example
18
+ # extractor = ScheduledJobExtractor.new
19
+ # units = extractor.extract_all
20
+ # cleanup = units.find { |u| u.identifier == "scheduled:periodic_cleanup" }
21
+ #
22
+ class ScheduledJobExtractor
23
+ # Schedule files to scan, mapped to their format
24
+ SCHEDULE_FILES = {
25
+ 'config/recurring.yml' => :solid_queue,
26
+ 'config/sidekiq_cron.yml' => :sidekiq_cron,
27
+ 'config/schedule.rb' => :whenever
28
+ }.freeze
29
+
30
+ # Common cron patterns mapped to human-readable descriptions
31
+ CRON_HUMANIZE = {
32
+ '* * * * *' => 'every minute',
33
+ '0 * * * *' => 'every hour',
34
+ '0 0 * * *' => 'daily at midnight',
35
+ '0 0 * * 0' => 'weekly on Sunday',
36
+ '0 0 * * 1' => 'weekly on Monday',
37
+ '0 0 1 * *' => 'monthly on the 1st',
38
+ '0 0 1 1 *' => 'yearly on January 1st'
39
+ }.freeze
40
+
41
+ # Environment keys to unwrap when nested in YAML
42
+ ENVIRONMENT_KEYS = %w[production development test staging].freeze
43
+
44
+ def initialize
45
+ @schedule_files = SCHEDULE_FILES.each_with_object({}) do |(relative_path, format), hash|
46
+ full_path = Rails.root.join(relative_path)
47
+ hash[full_path.to_s] = format if File.exist?(full_path)
48
+ end
49
+ end
50
+
51
+ # Extract all scheduled job entries from all discovered schedule files.
52
+ #
53
+ # @return [Array<ExtractedUnit>] List of scheduled job units
54
+ def extract_all
55
+ @schedule_files.flat_map do |file_path, format|
56
+ extract_scheduled_job_file(file_path, format)
57
+ end
58
+ end
59
+
60
+ # Extract scheduled job entries from a single schedule file.
61
+ #
62
+ # Unlike other file-based extractors that return a single ExtractedUnit,
63
+ # this returns an Array because each schedule file contains multiple entries.
64
+ #
65
+ # @param file_path [String] Path to the schedule file
66
+ # @param format [Symbol, nil] One of :solid_queue, :sidekiq_cron, :whenever (inferred from filename if nil)
67
+ # @return [Array<ExtractedUnit>] List of scheduled job units
68
+ def extract_scheduled_job_file(file_path, format = nil)
69
+ format ||= infer_format(file_path)
70
+ case format
71
+ when :solid_queue, :sidekiq_cron
72
+ extract_yaml_schedule(file_path, format)
73
+ when :whenever
74
+ extract_whenever_schedule(file_path)
75
+ else
76
+ []
77
+ end
78
+ rescue StandardError => e
79
+ Rails.logger.error("Failed to extract scheduled jobs from #{file_path}: #{e.message}")
80
+ []
81
+ end
82
+
83
+ private
84
+
85
+ # ──────────────────────────────────────────────────────────────────────
86
+ # YAML-based formats (Solid Queue, Sidekiq-Cron)
87
+ # ──────────────────────────────────────────────────────────────────────
88
+
89
+ # Parse a YAML schedule file and produce units.
90
+ #
91
+ # @param file_path [String] Path to the YAML file
92
+ # @param format [Symbol] :solid_queue or :sidekiq_cron
93
+ # @return [Array<ExtractedUnit>]
94
+ def extract_yaml_schedule(file_path, format)
95
+ source = File.read(file_path)
96
+ data = YAML.safe_load(source, permitted_classes: [Symbol])
97
+
98
+ return [] unless data.is_a?(Hash) && data.any?
99
+
100
+ entries = unwrap_environment_nesting(data)
101
+ return [] unless entries.is_a?(Hash)
102
+
103
+ entries.filter_map do |task_name, config|
104
+ next unless config.is_a?(Hash)
105
+
106
+ build_yaml_unit(task_name, config, file_path, source, format)
107
+ end
108
+ end
109
+
110
+ # Detect and unwrap environment-nested YAML.
111
+ #
112
+ # If the top-level keys are environment names (production, development, etc.),
113
+ # unwrap to the first environment's entries.
114
+ #
115
+ # @param data [Hash] Parsed YAML data
116
+ # @return [Hash] Unwrapped entries
117
+ def unwrap_environment_nesting(data)
118
+ if data.keys.all? { |k| ENVIRONMENT_KEYS.include?(k.to_s) }
119
+ data.values.first || {}
120
+ else
121
+ data
122
+ end
123
+ end
124
+
125
+ # Build an ExtractedUnit from a YAML schedule entry.
126
+ #
127
+ # @param task_name [String] The task/job name key
128
+ # @param config [Hash] The entry configuration
129
+ # @param file_path [String] Path to the schedule file
130
+ # @param source [String] Raw file content
131
+ # @param format [Symbol] :solid_queue or :sidekiq_cron
132
+ # @return [ExtractedUnit]
133
+ def build_yaml_unit(task_name, config, file_path, source, format)
134
+ job_class = config['class']
135
+ cron = extract_cron(config, format)
136
+
137
+ unit = ExtractedUnit.new(
138
+ type: :scheduled_job,
139
+ identifier: "scheduled:#{task_name}",
140
+ file_path: file_path
141
+ )
142
+
143
+ unit.namespace = job_class.include?('::') ? job_class.split('::')[0..-2].join('::') : nil if job_class
144
+ unit.source_code = source
145
+ unit.metadata = {
146
+ schedule_format: format,
147
+ job_class: job_class,
148
+ cron_expression: cron,
149
+ queue: config['queue'],
150
+ args: config['args'],
151
+ frequency_human_readable: humanize_frequency(cron, format)
152
+ }
153
+ unit.dependencies = build_dependencies(job_class)
154
+
155
+ unit
156
+ end
157
+
158
+ # Extract the cron/schedule expression from config.
159
+ #
160
+ # @param config [Hash] Entry configuration
161
+ # @param format [Symbol] :solid_queue or :sidekiq_cron
162
+ # @return [String, nil]
163
+ def extract_cron(config, format)
164
+ case format
165
+ when :solid_queue
166
+ config['schedule']
167
+ when :sidekiq_cron
168
+ config['cron']
169
+ end
170
+ end
171
+
172
+ # ──────────────────────────────────────────────────────────────────────
173
+ # Whenever DSL (config/schedule.rb)
174
+ # ──────────────────────────────────────────────────────────────────────
175
+
176
+ # Parse a Whenever schedule.rb file using regex.
177
+ #
178
+ # @param file_path [String] Path to the schedule.rb file
179
+ # @return [Array<ExtractedUnit>]
180
+ def extract_whenever_schedule(file_path)
181
+ source = File.read(file_path)
182
+ blocks = parse_whenever_blocks(source)
183
+
184
+ blocks.each_with_index.map do |block, index|
185
+ build_whenever_unit(block, index, file_path, source)
186
+ end
187
+ end
188
+
189
+ # Parse `every ... do ... end` blocks from Whenever DSL.
190
+ #
191
+ # @param source [String] Ruby source code
192
+ # @return [Array<Hash>] Parsed block data
193
+ def parse_whenever_blocks(source)
194
+ blocks = []
195
+ # Match: every <frequency>[, options] do ... end
196
+ source.scan(/every\s+(.+?)\s+do\s*\n(.*?)end/m) do |frequency_str, body|
197
+ # Clean up the frequency — strip trailing options like ", at: '...'"
198
+ frequency = frequency_str.strip.sub(/,\s*at:.*\z/, '').strip
199
+
200
+ command_type, command_body = detect_whenever_command(body)
201
+ job_class = extract_job_class_from_runner(command_body) if command_type == :runner
202
+
203
+ blocks << {
204
+ frequency: frequency,
205
+ frequency_str: frequency_str.strip,
206
+ command_type: command_type,
207
+ command_body: command_body,
208
+ job_class: job_class
209
+ }
210
+ end
211
+
212
+ blocks
213
+ end
214
+
215
+ # Detect the command type inside a Whenever block body.
216
+ #
217
+ # @param body [String] Block body content
218
+ # @return [Array<Symbol, String>] Command type and body
219
+ def detect_whenever_command(body)
220
+ case body
221
+ when /runner\s+"([^"]+)"/
222
+ [:runner, ::Regexp.last_match(1)]
223
+ when /rake\s+"([^"]+)"/
224
+ [:rake, ::Regexp.last_match(1)]
225
+ when /command\s+"([^"]+)"/
226
+ [:command, ::Regexp.last_match(1)]
227
+ else
228
+ [:unknown, body.strip]
229
+ end
230
+ end
231
+
232
+ # Extract a job class name from a runner string.
233
+ #
234
+ # Looks for patterns like `MyJob.perform_later` or `MyJob.perform_now`.
235
+ #
236
+ # @param runner_str [String] The runner command string
237
+ # @return [String, nil] The job class name or nil
238
+ def extract_job_class_from_runner(runner_str)
239
+ return nil unless runner_str
240
+
241
+ match = runner_str.match(/([A-Z]\w*(?:::\w+)*)\.perform_(later|now)/)
242
+ match ? match[1] : nil
243
+ end
244
+
245
+ # Build an ExtractedUnit from a Whenever block.
246
+ #
247
+ # @param block [Hash] Parsed block data
248
+ # @param index [Integer] Block index for identifier uniqueness
249
+ # @param file_path [String] Path to schedule.rb
250
+ # @param source [String] Raw file content
251
+ # @return [ExtractedUnit]
252
+ def build_whenever_unit(block, index, file_path, source)
253
+ identifier = if block[:job_class]
254
+ "scheduled:whenever_#{block[:job_class].underscore}_#{index}"
255
+ else
256
+ "scheduled:whenever_task_#{index}"
257
+ end
258
+
259
+ unit = ExtractedUnit.new(
260
+ type: :scheduled_job,
261
+ identifier: identifier,
262
+ file_path: file_path
263
+ )
264
+
265
+ unit.namespace = block[:job_class].split('::')[0..-2].join('::') if block[:job_class]&.include?('::')
266
+ unit.source_code = source
267
+ unit.metadata = {
268
+ schedule_format: :whenever,
269
+ job_class: block[:job_class],
270
+ cron_expression: block[:frequency],
271
+ command_type: block[:command_type],
272
+ frequency_human_readable: block[:frequency]
273
+ }
274
+ unit.dependencies = build_dependencies(block[:job_class])
275
+
276
+ unit
277
+ end
278
+
279
+ # ──────────────────────────────────────────────────────────────────────
280
+ # Format Detection
281
+ # ──────────────────────────────────────────────────────────────────────
282
+
283
+ # Infer the schedule format from the file path.
284
+ #
285
+ # @param file_path [String] Path to the schedule file
286
+ # @return [Symbol] One of :solid_queue, :sidekiq_cron, :whenever
287
+ def infer_format(file_path)
288
+ basename = File.basename(file_path)
289
+ SCHEDULE_FILES.each do |relative, fmt|
290
+ return fmt if basename == File.basename(relative)
291
+ end
292
+ :unknown
293
+ end
294
+
295
+ # ──────────────────────────────────────────────────────────────────────
296
+ # Shared helpers
297
+ # ──────────────────────────────────────────────────────────────────────
298
+
299
+ # Build dependency array linking to a job class.
300
+ #
301
+ # @param job_class [String, nil] The job class name
302
+ # @return [Array<Hash>]
303
+ def build_dependencies(job_class)
304
+ return [] unless job_class
305
+
306
+ [{ type: :job, target: job_class, via: :scheduled }]
307
+ end
308
+
309
+ # Humanize a cron expression or Solid Queue frequency string.
310
+ #
311
+ # @param expression [String, nil] Cron expression or frequency
312
+ # @param format [Symbol] Schedule format
313
+ # @return [String, nil]
314
+ def humanize_frequency(expression, format)
315
+ return nil unless expression
316
+
317
+ # Solid Queue schedules are already human-readable
318
+ return expression if format == :solid_queue
319
+
320
+ # Check exact matches
321
+ return CRON_HUMANIZE[expression] if CRON_HUMANIZE.key?(expression)
322
+
323
+ # Check */N minute pattern
324
+ return "every #{::Regexp.last_match(1)} minutes" if expression =~ %r{\A\*/(\d+) \* \* \* \*\z}
325
+
326
+ # Fallback: return raw expression
327
+ expression
328
+ end
329
+ end
330
+ end
331
+ end
@@ -0,0 +1,334 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # SerializerExtractor handles extraction of serializers, blueprinters, and decorators.
9
+ #
10
+ # Serializers define the API contract — what data is exposed and how it's shaped.
11
+ # They often wrap models, select attributes, and define associations that map
12
+ # directly to JSON responses. Understanding these is critical for API-aware
13
+ # code analysis.
14
+ #
15
+ # Supports:
16
+ # - ActiveModel::Serializer (AMS)
17
+ # - Blueprinter::Base
18
+ # - Draper::Decorator
19
+ #
20
+ # @example
21
+ # extractor = SerializerExtractor.new
22
+ # units = extractor.extract_all
23
+ # user_serializer = units.find { |u| u.identifier == "UserSerializer" }
24
+ #
25
+ class SerializerExtractor
26
+ include SharedUtilityMethods
27
+ include SharedDependencyScanner
28
+
29
+ # Directories to scan for serializer-like files
30
+ SERIALIZER_DIRECTORIES = %w[
31
+ app/serializers
32
+ app/blueprinters
33
+ app/decorators
34
+ ].freeze
35
+
36
+ # Known base classes for runtime discovery
37
+ BASE_CLASSES = {
38
+ 'ActiveModel::Serializer' => :ams,
39
+ 'Blueprinter::Base' => :blueprinter,
40
+ 'Draper::Decorator' => :draper
41
+ }.freeze
42
+
43
+ def initialize
44
+ @directories = SERIALIZER_DIRECTORIES.map { |d| Rails.root.join(d) }
45
+ .select(&:directory?)
46
+ end
47
+
48
+ # Extract all serializers, blueprinters, and decorators in the application
49
+ #
50
+ # @return [Array<ExtractedUnit>] List of serializer units
51
+ def extract_all
52
+ units = []
53
+
54
+ # File-based discovery (catches everything in known directories)
55
+ @directories.each do |dir|
56
+ Dir[dir.join('**/*.rb')].each do |file|
57
+ unit = extract_serializer_file(file)
58
+ units << unit if unit
59
+ end
60
+ end
61
+
62
+ # Class-based discovery for loaded gems
63
+ seen = units.to_set(&:identifier)
64
+ BASE_CLASSES.each_key do |base_class_name|
65
+ base_class = begin
66
+ base_class_name.constantize
67
+ rescue NameError
68
+ nil
69
+ end
70
+ next unless base_class
71
+
72
+ base_class.descendants.each do |klass|
73
+ next if klass.name.nil?
74
+ next if seen.include?(klass.name)
75
+
76
+ unit = extract_serializer_class(klass, base_class_name)
77
+ if unit
78
+ units << unit
79
+ seen << unit.identifier
80
+ end
81
+ end
82
+ end
83
+
84
+ units.compact
85
+ end
86
+
87
+ # Extract a serializer from its file
88
+ #
89
+ # @param file_path [String] Path to the serializer file
90
+ # @return [ExtractedUnit, nil] The extracted unit, or nil if not a serializer
91
+ def extract_serializer_file(file_path)
92
+ source = File.read(file_path)
93
+ class_name = extract_class_name(file_path, source)
94
+
95
+ return nil unless class_name
96
+ return nil unless serializer_file?(source)
97
+
98
+ unit = ExtractedUnit.new(
99
+ type: :serializer,
100
+ identifier: class_name,
101
+ file_path: file_path
102
+ )
103
+
104
+ unit.namespace = extract_namespace(class_name)
105
+ unit.source_code = annotate_source(source, class_name)
106
+ unit.metadata = extract_metadata_from_source(source, class_name)
107
+ unit.dependencies = extract_dependencies(source)
108
+
109
+ unit
110
+ rescue StandardError => e
111
+ Rails.logger.error("Failed to extract serializer #{file_path}: #{e.message}")
112
+ nil
113
+ end
114
+
115
+ private
116
+
117
+ # Extract a serializer from its class (runtime introspection)
118
+ #
119
+ # @param klass [Class] The serializer class
120
+ # @param base_class_name [String] Name of the detected base class
121
+ # @return [ExtractedUnit, nil] The extracted unit
122
+ def extract_serializer_class(klass, base_class_name)
123
+ return nil if klass.name.nil?
124
+
125
+ file_path = source_file_for(klass)
126
+ source = file_path && File.exist?(file_path) ? File.read(file_path) : ''
127
+
128
+ unit = ExtractedUnit.new(
129
+ type: :serializer,
130
+ identifier: klass.name,
131
+ file_path: file_path
132
+ )
133
+
134
+ unit.namespace = extract_namespace(klass.name)
135
+ unit.source_code = annotate_source(source, klass.name)
136
+ unit.metadata = extract_metadata_from_class(klass, source, base_class_name)
137
+ unit.dependencies = extract_dependencies(source)
138
+
139
+ unit
140
+ rescue StandardError => e
141
+ Rails.logger.error("Failed to extract serializer #{klass.name}: #{e.message}")
142
+ nil
143
+ end
144
+
145
+ # ──────────────────────────────────────────────────────────────────────
146
+ # Class Discovery
147
+ # ──────────────────────────────────────────────────────────────────────
148
+
149
+ def extract_class_name(file_path, source)
150
+ return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
151
+
152
+ # Fall back to convention
153
+ file_path
154
+ .sub("#{Rails.root}/", '')
155
+ .sub(%r{^app/(serializers|blueprinters|decorators)/}, '')
156
+ .sub('.rb', '')
157
+ .camelize
158
+ end
159
+
160
+ def serializer_file?(source)
161
+ source.match?(/< ActiveModel::Serializer/) ||
162
+ source.match?(/< Blueprinter::Base/) ||
163
+ source.match?(/< Draper::Decorator/) ||
164
+ source.match?(/< ApplicationSerializer/) ||
165
+ source.match?(/< ApplicationDecorator/) ||
166
+ source.match?(/< BaseSerializer/) ||
167
+ source.match?(/< BaseBlueprinter/) ||
168
+ source.match?(/attributes?\s+:/) ||
169
+ source.match?(/has_many\s+:.*serializer/) ||
170
+ source.match?(/belongs_to\s+:.*serializer/) ||
171
+ source.match?(/view\s+:/) # Blueprinter views
172
+ end
173
+
174
+ def source_file_for(klass)
175
+ methods = klass.instance_methods(false)
176
+ if methods.any?
177
+ klass.instance_method(methods.first).source_location&.first
178
+ end || Rails.root.join("app/serializers/#{klass.name.underscore}.rb").to_s
179
+ rescue StandardError
180
+ nil
181
+ end
182
+
183
+ # ──────────────────────────────────────────────────────────────────────
184
+ # Source Annotation
185
+ # ──────────────────────────────────────────────────────────────────────
186
+
187
+ def annotate_source(source, class_name)
188
+ serializer_type = detect_serializer_type(source)
189
+ wrapped_model = detect_wrapped_model(source, class_name)
190
+
191
+ <<~ANNOTATION
192
+ # ╔═══════════════════════════════════════════════════════════════════════╗
193
+ # ║ Serializer: #{class_name.ljust(57)}║
194
+ # ║ Type: #{serializer_type.to_s.ljust(61)}║
195
+ # ║ Wraps: #{(wrapped_model || 'unknown').ljust(60)}║
196
+ # ╚═══════════════════════════════════════════════════════════════════════╝
197
+
198
+ #{source}
199
+ ANNOTATION
200
+ end
201
+
202
+ def detect_serializer_type(source)
203
+ return :ams if source.match?(/< ActiveModel::Serializer/) || source.match?(/< ApplicationSerializer/)
204
+ return :blueprinter if source.match?(/< Blueprinter::Base/) || source.match?(/< BaseBlueprinter/)
205
+ return :draper if source.match?(/< Draper::Decorator/) || source.match?(/< ApplicationDecorator/)
206
+
207
+ :unknown
208
+ end
209
+
210
+ def detect_wrapped_model(source, class_name)
211
+ # AMS: `type` declaration
212
+ return ::Regexp.last_match(1).classify if source =~ /type\s+[:"'](\w+)/
213
+
214
+ # Draper: `decorates` declaration
215
+ return ::Regexp.last_match(1).classify if source =~ /decorates\s+[:"'](\w+)/
216
+
217
+ # Convention: strip Serializer/Decorator/Blueprinter suffix
218
+ class_name
219
+ .split('::')
220
+ .last
221
+ .sub(/Serializer$/, '')
222
+ .sub(/Decorator$/, '')
223
+ .sub(/Blueprinter$/, '')
224
+ .sub(/Blueprint$/, '')
225
+ .then { |name| name.empty? ? nil : name }
226
+ end
227
+
228
+ # ──────────────────────────────────────────────────────────────────────
229
+ # Metadata Extraction (from source)
230
+ # ──────────────────────────────────────────────────────────────────────
231
+
232
+ def extract_metadata_from_source(source, class_name)
233
+ {
234
+ serializer_type: detect_serializer_type(source),
235
+ wrapped_model: detect_wrapped_model(source, class_name),
236
+ attributes: extract_attributes(source),
237
+ associations: extract_associations(source),
238
+ custom_methods: extract_custom_methods(source),
239
+ views: extract_views(source),
240
+ loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
241
+ }
242
+ end
243
+
244
+ def extract_metadata_from_class(klass, source, base_class_name)
245
+ base_metadata = extract_metadata_from_source(source, klass.name)
246
+ base_metadata[:serializer_type] = BASE_CLASSES[base_class_name] || base_metadata[:serializer_type]
247
+
248
+ # Enhance with runtime introspection if available
249
+ if klass.respond_to?(:_attributes_data)
250
+ # AMS runtime attributes
251
+ runtime_attrs = klass._attributes_data.keys.map(&:to_s)
252
+ base_metadata[:attributes] = runtime_attrs if runtime_attrs.any?
253
+ elsif klass.respond_to?(:definition)
254
+ # Blueprinter runtime fields
255
+ definition = klass.definition
256
+ base_metadata[:views] = definition.keys.map(&:to_s) if definition.respond_to?(:keys)
257
+ end
258
+
259
+ base_metadata
260
+ end
261
+
262
+ def extract_attributes(source)
263
+ attrs = []
264
+
265
+ # AMS / generic: `attributes :name, :email, :created_at`
266
+ source.scan(/attributes?\s+((?::\w+(?:,\s*)?)+)/).each do |match|
267
+ match[0].scan(/:(\w+)/).flatten.each { |a| attrs << a }
268
+ end
269
+
270
+ # Blueprinter: `field :name` or `identifier :id`
271
+ source.scan(/(?:field|identifier)\s+:(\w+)/).flatten.each { |a| attrs << a }
272
+
273
+ # Draper: `delegate :name, :email, to: :object`
274
+ source.scan(/delegate\s+((?::\w+(?:,\s*)?)+)\s*,\s*to:\s*:object/).each do |match|
275
+ match[0].scan(/:(\w+)/).flatten.each { |a| attrs << a }
276
+ end
277
+
278
+ attrs.uniq
279
+ end
280
+
281
+ def extract_associations(source)
282
+ assocs = []
283
+
284
+ # AMS: `has_many :comments`, `belongs_to :author`, `has_one :profile`
285
+ source.scan(/(has_many|has_one|belongs_to)\s+:(\w+)(?:,\s*serializer:\s*([\w:]+))?/) do |type, name, serializer|
286
+ assocs << { type: type, name: name, serializer: serializer }.compact
287
+ end
288
+
289
+ # Blueprinter: `association :comments, blueprint: CommentBlueprint`
290
+ source.scan(/association\s+:(\w+)(?:,\s*blueprint:\s*([\w:]+))?/) do |name, blueprint|
291
+ assocs << { type: 'association', name: name, serializer: blueprint }.compact
292
+ end
293
+
294
+ assocs
295
+ end
296
+
297
+ def extract_custom_methods(source)
298
+ methods = []
299
+
300
+ # Instance methods defined in the class (excluding standard callbacks)
301
+ source.scan(/def\s+(\w+)/).flatten.each do |method_name|
302
+ next if %w[initialize].include?(method_name)
303
+
304
+ methods << method_name
305
+ end
306
+
307
+ methods
308
+ end
309
+
310
+ def extract_views(source)
311
+ # Blueprinter views: `view :extended do`
312
+ source.scan(/view\s+:(\w+)/).flatten.map { |v| v }
313
+ end
314
+
315
+ # ──────────────────────────────────────────────────────────────────────
316
+ # Dependency Extraction
317
+ # ──────────────────────────────────────────────────────────────────────
318
+
319
+ def extract_dependencies(source)
320
+ deps = []
321
+ deps.concat(scan_model_dependencies(source, via: :serialization))
322
+
323
+ # Other serializers referenced (e.g., `serializer: CommentSerializer`)
324
+ source.scan(/(?:serializer|blueprint):\s*([\w:]+)/).flatten.uniq.each do |serializer|
325
+ deps << { type: :serializer, target: serializer, via: :serialization }
326
+ end
327
+
328
+ deps.concat(scan_service_dependencies(source))
329
+
330
+ deps.uniq { |d| [d[:type], d[:target]] }
331
+ end
332
+ end
333
+ end
334
+ end