codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,117 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'yaml'
4
+
5
+ module CodebaseIndex
6
+ module Extractors
7
+ # I18nExtractor handles internationalization locale file extraction.
8
+ #
9
+ # Parses YAML files from `config/locales/` to extract translation keys,
10
+ # locale information, and key structure. Each locale file becomes one
11
+ # ExtractedUnit.
12
+ #
13
+ # @example
14
+ # extractor = I18nExtractor.new
15
+ # units = extractor.extract_all
16
+ # en = units.find { |u| u.identifier == "en.yml" }
17
+ #
18
+ class I18nExtractor
19
+ # Directories to scan for locale files
20
+ I18N_DIRECTORIES = %w[
21
+ config/locales
22
+ ].freeze
23
+
24
+ def initialize
25
+ @directories = I18N_DIRECTORIES.map { |d| Rails.root.join(d) }
26
+ .select(&:directory?)
27
+ end
28
+
29
+ # Extract all locale files
30
+ #
31
+ # @return [Array<ExtractedUnit>] List of i18n units
32
+ def extract_all
33
+ @directories.flat_map do |dir|
34
+ Dir[dir.join('**/*.yml')].filter_map do |file|
35
+ extract_i18n_file(file)
36
+ end
37
+ end
38
+ end
39
+
40
+ # Extract a single locale file
41
+ #
42
+ # @param file_path [String] Path to the YAML locale file
43
+ # @return [ExtractedUnit, nil] The extracted unit or nil on failure
44
+ def extract_i18n_file(file_path)
45
+ source = File.read(file_path)
46
+ data = YAML.safe_load(source, permitted_classes: [Symbol, Date, Time, Regexp])
47
+
48
+ return nil unless data.is_a?(Hash) && data.any?
49
+
50
+ identifier = build_identifier(file_path)
51
+ locale = data.keys.first
52
+
53
+ unit = ExtractedUnit.new(
54
+ type: :i18n,
55
+ identifier: identifier,
56
+ file_path: file_path
57
+ )
58
+
59
+ unit.namespace = locale
60
+ unit.source_code = source
61
+ unit.metadata = build_metadata(data, locale)
62
+ unit.dependencies = []
63
+
64
+ unit
65
+ rescue StandardError => e
66
+ Rails.logger.error("Failed to extract i18n #{file_path}: #{e.message}")
67
+ nil
68
+ end
69
+
70
+ private
71
+
72
+ # Build a readable identifier from the file path.
73
+ #
74
+ # @param file_path [String] Absolute path
75
+ # @return [String] Relative identifier like "en.yml" or "models/en.yml"
76
+ def build_identifier(file_path)
77
+ relative = file_path.sub("#{Rails.root}/", '')
78
+ relative.sub(%r{^config/locales/}, '')
79
+ end
80
+
81
+ # Build metadata for the locale file.
82
+ #
83
+ # @param data [Hash] Parsed YAML data
84
+ # @param locale [String] The locale key (e.g., "en")
85
+ # @return [Hash]
86
+ def build_metadata(data, locale)
87
+ locale_data = data[locale] || {}
88
+ key_paths = flatten_keys(locale_data)
89
+
90
+ {
91
+ locale: locale,
92
+ key_count: key_paths.size,
93
+ top_level_keys: locale_data.is_a?(Hash) ? locale_data.keys : [],
94
+ key_paths: key_paths
95
+ }
96
+ end
97
+
98
+ # Flatten a nested hash into dot-notation key paths.
99
+ #
100
+ # @param hash [Hash] Nested hash to flatten
101
+ # @param prefix [String] Current key prefix
102
+ # @return [Array<String>] Flattened key paths
103
+ def flatten_keys(hash, prefix = '')
104
+ return ["#{prefix}(leaf)"] unless hash.is_a?(Hash)
105
+
106
+ hash.flat_map do |key, value|
107
+ full_key = prefix.empty? ? key.to_s : "#{prefix}.#{key}"
108
+ if value.is_a?(Hash)
109
+ flatten_keys(value, full_key)
110
+ else
111
+ [full_key]
112
+ end
113
+ end
114
+ end
115
+ end
116
+ end
117
+ end
@@ -0,0 +1,369 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # JobExtractor handles ActiveJob and Sidekiq job extraction.
9
+ #
10
+ # Background jobs are critical for understanding async behavior.
11
+ # They often perform important business logic that would otherwise
12
+ # be unclear from just looking at models and controllers.
13
+ #
14
+ # We extract:
15
+ # - Queue configuration
16
+ # - Retry/error handling configuration
17
+ # - Arguments (the job's interface)
18
+ # - What the job calls (dependencies)
19
+ # - What triggers this job (reverse lookup via dependencies)
20
+ #
21
+ # @example
22
+ # extractor = JobExtractor.new
23
+ # units = extractor.extract_all
24
+ # order_job = units.find { |u| u.identifier == "ProcessOrderJob" }
25
+ #
26
+ class JobExtractor
27
+ include SharedUtilityMethods
28
+ include SharedDependencyScanner
29
+
30
+ # Directories to scan for jobs
31
+ JOB_DIRECTORIES = %w[
32
+ app/jobs
33
+ app/workers
34
+ app/sidekiq
35
+ ].freeze
36
+
37
+ def initialize
38
+ @directories = JOB_DIRECTORIES.map { |d| Rails.root.join(d) }
39
+ .select(&:directory?)
40
+ end
41
+
42
+ # Extract all jobs in the application
43
+ #
44
+ # @return [Array<ExtractedUnit>] List of job units
45
+ def extract_all
46
+ units = []
47
+
48
+ # File-based discovery (catches everything)
49
+ @directories.each do |dir|
50
+ Dir[dir.join('**/*.rb')].each do |file|
51
+ unit = extract_job_file(file)
52
+ units << unit if unit
53
+ end
54
+ end
55
+
56
+ # Also try class-based discovery for ActiveJob
57
+ if defined?(ApplicationJob)
58
+ seen = units.to_set(&:identifier)
59
+ ApplicationJob.descendants.each do |job_class|
60
+ next if seen.include?(job_class.name)
61
+
62
+ unit = extract_job_class(job_class)
63
+ if unit
64
+ units << unit
65
+ seen << unit.identifier
66
+ end
67
+ end
68
+ end
69
+
70
+ units.compact
71
+ end
72
+
73
+ # Extract a job from its file
74
+ #
75
+ # @param file_path [String] Path to the job file
76
+ # @return [ExtractedUnit, nil] The extracted unit
77
+ def extract_job_file(file_path)
78
+ source = File.read(file_path)
79
+ class_name = extract_class_name(file_path, source)
80
+
81
+ return nil unless class_name
82
+ return nil unless job_file?(source)
83
+
84
+ unit = ExtractedUnit.new(
85
+ type: :job,
86
+ identifier: class_name,
87
+ file_path: file_path
88
+ )
89
+
90
+ unit.namespace = extract_namespace(class_name)
91
+ unit.source_code = annotate_source(source, class_name)
92
+ unit.metadata = extract_metadata_from_source(source, class_name)
93
+ unit.dependencies = extract_dependencies(source, class_name)
94
+
95
+ unit
96
+ rescue StandardError => e
97
+ Rails.logger.error("Failed to extract job #{file_path}: #{e.message}")
98
+ nil
99
+ end
100
+
101
+ # Extract a job from its class (runtime introspection)
102
+ #
103
+ # @param job_class [Class] The job class
104
+ # @return [ExtractedUnit, nil] The extracted unit
105
+ def extract_job_class(job_class)
106
+ return nil if job_class.name.nil?
107
+
108
+ file_path = source_file_for(job_class)
109
+ source = file_path && File.exist?(file_path) ? File.read(file_path) : ''
110
+
111
+ unit = ExtractedUnit.new(
112
+ type: :job,
113
+ identifier: job_class.name,
114
+ file_path: file_path
115
+ )
116
+
117
+ unit.namespace = extract_namespace(job_class.name)
118
+ unit.source_code = annotate_source(source, job_class.name)
119
+ unit.metadata = extract_metadata_from_class(job_class, source)
120
+ unit.dependencies = extract_dependencies(source, job_class.name)
121
+
122
+ unit
123
+ rescue StandardError => e
124
+ Rails.logger.error("Failed to extract job #{job_class.name}: #{e.message}")
125
+ nil
126
+ end
127
+
128
+ private
129
+
130
+ # ──────────────────────────────────────────────────────────────────────
131
+ # Class Discovery
132
+ # ──────────────────────────────────────────────────────────────────────
133
+
134
+ def extract_class_name(file_path, source)
135
+ # Try to extract from source
136
+ return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
137
+
138
+ # Fall back to convention
139
+ file_path
140
+ .sub("#{Rails.root}/", '')
141
+ .sub(%r{^app/(jobs|workers|sidekiq)/}, '')
142
+ .sub('.rb', '')
143
+ .camelize
144
+ end
145
+
146
+ def job_file?(source)
147
+ # Check if this looks like a job/worker file
148
+ source.match?(/< ApplicationJob/) ||
149
+ source.match?(/< ActiveJob::Base/) ||
150
+ source.match?(/include Sidekiq::Worker/) ||
151
+ source.match?(/include Sidekiq::Job/) ||
152
+ source.match?(/def perform/)
153
+ end
154
+
155
+ def source_file_for(job_class)
156
+ # Try to get from method source location
157
+ if job_class.method_defined?(:perform, false)
158
+ job_class.instance_method(:perform).source_location&.first
159
+ end || Rails.root.join("app/jobs/#{job_class.name.underscore}.rb").to_s
160
+ rescue StandardError
161
+ nil
162
+ end
163
+
164
+ # ──────────────────────────────────────────────────────────────────────
165
+ # Source Annotation
166
+ # ──────────────────────────────────────────────────────────────────────
167
+
168
+ def annotate_source(source, class_name)
169
+ job_type = detect_job_type(source)
170
+ queue = extract_queue(source)
171
+
172
+ <<~ANNOTATION
173
+ # ╔═══════════════════════════════════════════════════════════════════════╗
174
+ # ║ Job: #{class_name.ljust(62)}║
175
+ # ║ Type: #{job_type.to_s.ljust(61)}║
176
+ # ║ Queue: #{(queue || 'default').ljust(60)}║
177
+ # ╚═══════════════════════════════════════════════════════════════════════╝
178
+
179
+ #{source}
180
+ ANNOTATION
181
+ end
182
+
183
+ def detect_job_type(source)
184
+ return :sidekiq if source.match?(/include Sidekiq::(Worker|Job)/)
185
+ return :active_job if source.match?(/< (ApplicationJob|ActiveJob::Base)/)
186
+ return :good_job if source.match?(/include GoodJob/)
187
+ return :delayed_job if source.match?(/delay|handle_asynchronously/)
188
+
189
+ :unknown
190
+ end
191
+
192
+ def extract_queue(source)
193
+ # ActiveJob style
194
+ return ::Regexp.last_match(1) if source =~ /queue_as\s+[:"'](\w+)/
195
+
196
+ # Sidekiq style
197
+ return ::Regexp.last_match(1) if source =~ /sidekiq_options.*queue:\s*[:"'](\w+)/
198
+
199
+ nil
200
+ end
201
+
202
+ # ──────────────────────────────────────────────────────────────────────
203
+ # Metadata Extraction (from source)
204
+ # ──────────────────────────────────────────────────────────────────────
205
+
206
+ def extract_metadata_from_source(source, class_name)
207
+ {
208
+ job_type: detect_job_type(source),
209
+ queue: extract_queue(source),
210
+
211
+ # Configuration
212
+ sidekiq_options: extract_sidekiq_options(source),
213
+ retry_config: extract_retry_config(source),
214
+ concurrency_controls: extract_concurrency(source),
215
+
216
+ # Interface
217
+ perform_params: extract_perform_params(source),
218
+ scheduled: source.match?(/perform_later|perform_in|perform_at/),
219
+
220
+ # Error handling
221
+ discard_on: extract_discard_on(source),
222
+ retry_on: extract_retry_on(source),
223
+
224
+ # Callbacks
225
+ callbacks: extract_callbacks(source),
226
+
227
+ # Job chaining
228
+ enqueues_jobs: extract_enqueued_jobs(source, class_name),
229
+
230
+ # Metrics
231
+ loc: source.lines.count { |l| l.strip.present? && !l.strip.start_with?('#') }
232
+ }
233
+ end
234
+
235
+ def extract_metadata_from_class(job_class, source)
236
+ base_metadata = extract_metadata_from_source(source, job_class.name)
237
+
238
+ # Enhance with runtime introspection if available
239
+ base_metadata[:queue] ||= job_class.queue_name if job_class.respond_to?(:queue_name)
240
+
241
+ base_metadata[:sidekiq_options] = job_class.sidekiq_options_hash if job_class.respond_to?(:sidekiq_options_hash)
242
+
243
+ base_metadata
244
+ end
245
+
246
+ def extract_sidekiq_options(source)
247
+ options = {}
248
+
249
+ if source =~ /sidekiq_options\s+(.+)/
250
+ opts_str = ::Regexp.last_match(1)
251
+ opts_str.scan(/(\w+):\s*([^,\n]+)/) do |key, value|
252
+ options[key.to_sym] = value.strip
253
+ end
254
+ end
255
+
256
+ options
257
+ end
258
+
259
+ def extract_retry_config(source)
260
+ config = {}
261
+
262
+ # ActiveJob retry_on
263
+ source.scan(/retry_on\s+(\w+)(?:,\s*wait:\s*([^,\n]+))?(?:,\s*attempts:\s*(\d+))?/) do |error, wait, attempts|
264
+ config[:retry_on] ||= []
265
+ config[:retry_on] << {
266
+ error: error,
267
+ wait: wait,
268
+ attempts: attempts&.to_i
269
+ }
270
+ end
271
+
272
+ # Sidekiq retries
273
+ config[:sidekiq_retries] = ::Regexp.last_match(1) if source =~ /sidekiq_options.*retry:\s*(\d+|false|true)/
274
+
275
+ config
276
+ end
277
+
278
+ def extract_concurrency(source)
279
+ controls = {}
280
+
281
+ # Sidekiq unique jobs
282
+ controls[:unique_for] = ::Regexp.last_match(1).to_i if source =~ /unique_for:\s*(\d+)/
283
+
284
+ # Sidekiq rate limiting
285
+ controls[:rate_limit] = ::Regexp.last_match(1) if source =~ /rate_limit:\s*\{([^}]+)\}/
286
+
287
+ controls
288
+ end
289
+
290
+ def extract_perform_params(source)
291
+ return [] unless source =~ /def\s+perform\s*\(([^)]*)\)/
292
+
293
+ params_str = ::Regexp.last_match(1)
294
+ params = []
295
+
296
+ params_str.scan(/(\*?\*?\w+)(?:\s*=\s*([^,]+))?/) do |name, default|
297
+ params << {
298
+ name: name.gsub(/^\*+/, ''),
299
+ splat: if name.start_with?('**')
300
+ :double
301
+ else
302
+ (name.start_with?('*') ? :single : nil)
303
+ end,
304
+ has_default: !default.nil?
305
+ }
306
+ end
307
+
308
+ params
309
+ end
310
+
311
+ def extract_discard_on(source)
312
+ source.scan(/discard_on\s+(\w+(?:::\w+)*)/).flatten
313
+ end
314
+
315
+ def extract_retry_on(source)
316
+ source.scan(/retry_on\s+(\w+(?:::\w+)*)/).flatten
317
+ end
318
+
319
+ def extract_callbacks(source)
320
+ callbacks = []
321
+
322
+ %w[before_enqueue after_enqueue before_perform after_perform around_perform].each do |cb|
323
+ source.scan(/#{cb}\s+(?::(\w+)|do)/) do |method|
324
+ callbacks << { type: cb, method: method&.first }
325
+ end
326
+ end
327
+
328
+ callbacks
329
+ end
330
+
331
+ # ──────────────────────────────────────────────────────────────────────
332
+ # Dependency Extraction
333
+ # ──────────────────────────────────────────────────────────────────────
334
+
335
+ def extract_dependencies(source, current_class_name = nil)
336
+ # Scan standard dep types individually (not scan_common_dependencies) so we can
337
+ # handle job deps with the richer :job_enqueue via and self-reference exclusion.
338
+ deps = scan_model_dependencies(source)
339
+ deps.concat(scan_service_dependencies(source))
340
+ deps.concat(scan_mailer_dependencies(source))
341
+
342
+ # Job-to-job dependencies with specific :job_enqueue via and self-reference exclusion
343
+ extract_enqueued_jobs(source, current_class_name).each do |job_name|
344
+ deps << { type: :job, target: job_name, via: :job_enqueue }
345
+ end
346
+
347
+ # External services
348
+ if source.match?(/HTTParty|Faraday|RestClient|Net::HTTP/)
349
+ deps << { type: :external, target: :http_api, via: :code_reference }
350
+ end
351
+
352
+ deps << { type: :infrastructure, target: :redis, via: :code_reference } if source.match?(/Redis\.current|REDIS/)
353
+
354
+ deps.uniq { |d| [d[:type], d[:target]] }
355
+ end
356
+
357
+ # Scan source for job class enqueue calls and return the list of enqueued job names.
358
+ #
359
+ # @param source [String] The job source code
360
+ # @param current_class_name [String, nil] The current job class name (excluded from results)
361
+ # @return [Array<String>] Unique list of enqueued job class names
362
+ def extract_enqueued_jobs(source, current_class_name = nil)
363
+ pattern = /(\w+Job)\.(?:perform_later|perform_async|perform_in|perform_at|set\b)/
364
+ job_names = source.scan(pattern).flatten.uniq
365
+ job_names.reject { |name| name == current_class_name }
366
+ end
367
+ end
368
+ end
369
+ end