codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,261 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # CachingExtractor detects caching usage across controllers, models, and views.
9
+ #
10
+ # Scans `app/controllers/**/*.rb`, `app/models/**/*.rb`, and
11
+ # `app/views/**/*.erb` for cache-related patterns: Rails.cache.*,
12
+ # caches_action, fragment cache blocks, cache_key, cache_version,
13
+ # and expires_in. Produces one unit per file that contains any
14
+ # cache calls, identifying the strategy and TTL patterns.
15
+ #
16
+ # @example
17
+ # extractor = CachingExtractor.new
18
+ # units = extractor.extract_all
19
+ # ctrl = units.find { |u| u.identifier == "app/controllers/products_controller.rb" }
20
+ # ctrl.metadata[:cache_strategy] # => :low_level
21
+ # ctrl.metadata[:cache_calls].size # => 3
22
+ #
23
+ class CachingExtractor
24
+ include SharedUtilityMethods
25
+ include SharedDependencyScanner
26
+
27
+ # File glob patterns to scan
28
+ SCAN_PATTERNS = {
29
+ controller: 'app/controllers/**/*.rb',
30
+ model: 'app/models/**/*.rb',
31
+ view: 'app/views/**/*.erb'
32
+ }.freeze
33
+
34
+ # Patterns that indicate cache usage, grouped by type
35
+ CACHE_PATTERNS = {
36
+ fetch: /Rails\.cache\.fetch\s*[(\[]/,
37
+ read: /Rails\.cache\.read\s*[(\[]/,
38
+ write: /Rails\.cache\.write\s*[(\[]/,
39
+ delete: /Rails\.cache\.delete\s*[(\[]/,
40
+ exist: /Rails\.cache\.exist\?\s*[(\[]/,
41
+ caches_action: /\bcaches_action\b/,
42
+ fragment: /\bcache\s+.*?\bdo\b|\bcache\s+do\b|\bcache\s*\(/,
43
+ cache_key: /\bcache_key\b/,
44
+ cache_version: /\bcache_version\b/
45
+ }.freeze
46
+
47
+ # Patterns for extracting TTL values
48
+ TTL_PATTERN = /expires_in:\s*([^,\n)]+)/
49
+
50
+ # Key-pattern regex (first argument to Rails.cache.*)
51
+ KEY_PATTERN = /Rails\.cache\.(?:fetch|read|write|delete|exist\?)\s*[(\[]?\s*([^,\n)\]]+)/
52
+
53
+ def initialize
54
+ @rails_root = Rails.root
55
+ end
56
+
57
+ # Extract caching units from all scanned files.
58
+ #
59
+ # @return [Array<ExtractedUnit>] One unit per file with cache calls
60
+ def extract_all
61
+ units = []
62
+
63
+ SCAN_PATTERNS.each do |file_type, pattern|
64
+ Dir[@rails_root.join(pattern)].each do |file|
65
+ unit = extract_caching_file(file, file_type)
66
+ units << unit if unit
67
+ end
68
+ end
69
+
70
+ units
71
+ end
72
+
73
+ # Extract a single file for caching patterns.
74
+ #
75
+ # Returns nil if the file contains no cache calls.
76
+ #
77
+ # @param file_path [String] Absolute path to the file
78
+ # @param file_type [Symbol] :controller, :model, or :view
79
+ # @return [ExtractedUnit, nil] The unit or nil if no cache usage
80
+ def extract_caching_file(file_path, file_type = nil)
81
+ source = File.read(file_path)
82
+
83
+ return nil unless cache_usage?(source)
84
+
85
+ file_type ||= infer_file_type(file_path)
86
+ identifier = relative_path(file_path)
87
+
88
+ unit = ExtractedUnit.new(
89
+ type: :caching,
90
+ identifier: identifier,
91
+ file_path: file_path
92
+ )
93
+
94
+ unit.namespace = nil
95
+ unit.source_code = annotate_source(source, identifier, file_type)
96
+ unit.metadata = extract_metadata(source, file_type)
97
+ unit.dependencies = extract_dependencies(source)
98
+
99
+ unit
100
+ rescue StandardError => e
101
+ Rails.logger.error("Failed to extract caching info from #{file_path}: #{e.message}")
102
+ nil
103
+ end
104
+
105
+ private
106
+
107
+ # ──────────────────────────────────────────────────────────────────────
108
+ # Detection
109
+ # ──────────────────────────────────────────────────────────────────────
110
+
111
+ # Check whether the source contains any cache calls.
112
+ #
113
+ # @param source [String] Ruby or ERB source
114
+ # @return [Boolean]
115
+ def cache_usage?(source)
116
+ CACHE_PATTERNS.values.any? { |pattern| source.match?(pattern) }
117
+ end
118
+
119
+ # ──────────────────────────────────────────────────────────────────────
120
+ # Source Annotation
121
+ # ──────────────────────────────────────────────────────────────────────
122
+
123
+ # Prepend a summary annotation header to the source.
124
+ #
125
+ # @param source [String] Source code
126
+ # @param identifier [String] Relative file path identifier
127
+ # @param file_type [Symbol] :controller, :model, or :view
128
+ # @return [String] Annotated source
129
+ def annotate_source(source, identifier, file_type)
130
+ annotation = <<~ANNOTATION
131
+ # ╔═══════════════════════════════════════════════════════════════════════╗
132
+ # ║ Caching: #{identifier.ljust(59)}║
133
+ # ║ File type: #{file_type.to_s.ljust(57)}║
134
+ # ╚═══════════════════════════════════════════════════════════════════════╝
135
+
136
+ ANNOTATION
137
+
138
+ annotation + source
139
+ end
140
+
141
+ # ──────────────────────────────────────────────────────────────────────
142
+ # Metadata Extraction
143
+ # ──────────────────────────────────────────────────────────────────────
144
+
145
+ # Build the metadata hash for a caching unit.
146
+ #
147
+ # @param source [String] Source code
148
+ # @param file_type [Symbol] :controller, :model, or :view
149
+ # @return [Hash] Caching metadata
150
+ def extract_metadata(source, file_type)
151
+ cache_calls = extract_cache_calls(source)
152
+ {
153
+ cache_calls: cache_calls,
154
+ cache_strategy: infer_cache_strategy(source, cache_calls),
155
+ file_type: file_type,
156
+ loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
157
+ }
158
+ end
159
+
160
+ # Extract individual cache call entries from source.
161
+ #
162
+ # Each entry has :type, :key_pattern, and :ttl.
163
+ #
164
+ # @param source [String] Source code
165
+ # @return [Array<Hash>] Cache call descriptors
166
+ def extract_cache_calls(source)
167
+ calls = []
168
+
169
+ CACHE_PATTERNS.each do |type, pattern|
170
+ source.scan(pattern) do
171
+ key = extract_key_pattern(source, type)
172
+ ttl = extract_ttl(source)
173
+ calls << { type: type, key_pattern: key, ttl: ttl }
174
+ end
175
+ end
176
+
177
+ calls
178
+ end
179
+
180
+ # Extract the key pattern for a Rails.cache call.
181
+ #
182
+ # Returns a simplified string representation of the first argument.
183
+ #
184
+ # @param source [String] Source code
185
+ # @param type [Symbol] The cache call type
186
+ # @return [String, nil] The key pattern or nil
187
+ def extract_key_pattern(source, type)
188
+ return nil unless %i[fetch read write delete exist].include?(type)
189
+
190
+ match = source.match(KEY_PATTERN)
191
+ match ? match[1].strip[0, 60] : nil
192
+ end
193
+
194
+ # Extract TTL value from expires_in option.
195
+ #
196
+ # @param source [String] Source code
197
+ # @return [String, nil] The TTL expression or nil
198
+ def extract_ttl(source)
199
+ match = source.match(TTL_PATTERN)
200
+ match ? match[1].strip : nil
201
+ end
202
+
203
+ # Infer the caching strategy from the call types present.
204
+ #
205
+ # @param source [String] Source code
206
+ # @param cache_calls [Array<Hash>] Extracted cache calls
207
+ # @return [Symbol] :fragment, :action, :low_level, or :mixed
208
+ def infer_cache_strategy(source, _cache_calls)
209
+ has_action = source.match?(CACHE_PATTERNS[:caches_action])
210
+ has_fragment = source.match?(CACHE_PATTERNS[:fragment])
211
+ has_low_level = source.match?(/Rails\.cache\.(?:fetch|read|write)/)
212
+
213
+ active_strategies = [has_action, has_fragment, has_low_level].count(true)
214
+
215
+ return :mixed if active_strategies > 1
216
+ return :action if has_action
217
+ return :fragment if has_fragment
218
+ return :low_level if has_low_level
219
+
220
+ :unknown
221
+ end
222
+
223
+ # ──────────────────────────────────────────────────────────────────────
224
+ # Helpers
225
+ # ──────────────────────────────────────────────────────────────────────
226
+
227
+ # Infer the file type from the file path.
228
+ #
229
+ # @param file_path [String] Absolute path to the file
230
+ # @return [Symbol] :controller, :model, or :view
231
+ def infer_file_type(file_path)
232
+ case file_path
233
+ when %r{app/controllers/} then :controller
234
+ when %r{app/models/} then :model
235
+ when %r{app/views/} then :view
236
+ else :unknown
237
+ end
238
+ end
239
+
240
+ # Compute the relative path from Rails root.
241
+ #
242
+ # @param file_path [String] Absolute path
243
+ # @return [String] Relative path (e.g., "app/controllers/products_controller.rb")
244
+ def relative_path(file_path)
245
+ file_path.sub("#{@rails_root}/", '')
246
+ end
247
+
248
+ # ──────────────────────────────────────────────────────────────────────
249
+ # Dependency Extraction
250
+ # ──────────────────────────────────────────────────────────────────────
251
+
252
+ # Build the dependency array by scanning source for common references.
253
+ #
254
+ # @param source [String] Source code
255
+ # @return [Array<Hash>] Dependency hashes with :type, :target, :via
256
+ def extract_dependencies(source)
257
+ scan_common_dependencies(source)
258
+ end
259
+ end
260
+ end
261
+ end
@@ -0,0 +1,232 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'set'
4
+ require_relative '../ast/parser'
5
+ require_relative '../flow_analysis/operation_extractor'
6
+
7
+ module CodebaseIndex
8
+ module Extractors
9
+ # Analyzes callback method bodies to detect side effects.
10
+ #
11
+ # Given a model's composite source code (with inlined concerns) and its
12
+ # callback metadata, this analyzer finds each callback method body and
13
+ # classifies its side effects: column writes, job enqueues, service calls,
14
+ # mailer triggers, and database reads.
15
+ #
16
+ # @example
17
+ # analyzer = CallbackAnalyzer.new(
18
+ # source_code: model_source,
19
+ # column_names: %w[email status name]
20
+ # )
21
+ # enriched = analyzer.analyze(callback_hash)
22
+ # enriched[:side_effects][:columns_written] #=> ["email"]
23
+ #
24
+ class CallbackAnalyzer
25
+ # Database query methods that indicate a read operation.
26
+ DB_READ_METHODS = %w[find where pluck first last].freeze
27
+
28
+ # Methods that write a single column, taking column name as first argument.
29
+ SINGLE_COLUMN_WRITERS = %w[update_column write_attribute].freeze
30
+
31
+ # Methods that write multiple columns via keyword arguments.
32
+ MULTI_COLUMN_WRITERS = %w[update_columns assign_attributes].freeze
33
+
34
+ # Async enqueue methods that indicate a job is being dispatched.
35
+ ASYNC_METHODS = %w[perform_later perform_async perform_in perform_at].freeze
36
+
37
+ # @param source_code [String] Composite model source (with inlined concerns)
38
+ # @param column_names [Array<String>] Model's database column names
39
+ def initialize(source_code:, column_names: [])
40
+ @source_code = source_code
41
+ @column_names = column_names.map(&:to_s)
42
+ @parser = Ast::Parser.new
43
+ @operation_extractor = FlowAnalysis::OperationExtractor.new
44
+ @parsed_root = safe_parse
45
+ end
46
+
47
+ # Analyze a single callback and enrich it with side-effect data.
48
+ #
49
+ # Finds the callback's method body in the source, scans it for
50
+ # side effects, and returns the original callback hash with an
51
+ # added :side_effects key.
52
+ #
53
+ # @param callback_hash [Hash] Callback metadata from ModelExtractor:
54
+ # { type:, filter:, kind:, conditions: }
55
+ # @return [Hash] The callback hash with an added :side_effects key
56
+ def analyze(callback_hash)
57
+ filter = callback_hash[:filter].to_s
58
+ method_node = find_method_node(filter)
59
+
60
+ return callback_hash.merge(side_effects: empty_side_effects) if method_node.nil?
61
+
62
+ method_source = method_source_from_node(method_node)
63
+ return callback_hash.merge(side_effects: empty_side_effects) if method_source.nil?
64
+
65
+ callback_hash.merge(
66
+ side_effects: {
67
+ columns_written: detect_columns_written(method_source),
68
+ jobs_enqueued: detect_jobs_enqueued(method_source),
69
+ services_called: detect_services_called(method_source),
70
+ mailers_triggered: detect_mailers_triggered(method_source),
71
+ database_reads: detect_database_reads(method_source),
72
+ operations: extract_operations(method_node)
73
+ }
74
+ )
75
+ end
76
+
77
+ private
78
+
79
+ # Parse source code safely, returning nil on failure.
80
+ #
81
+ # @return [Ast::Node, nil]
82
+ def safe_parse
83
+ @parser.parse(@source_code)
84
+ rescue StandardError
85
+ nil
86
+ end
87
+
88
+ # Find a method definition node by name in the cached AST.
89
+ #
90
+ # @param method_name [String]
91
+ # @return [Ast::Node, nil]
92
+ def find_method_node(method_name)
93
+ return nil unless @parsed_root
94
+ return nil if method_name.empty? || !valid_method_name?(method_name)
95
+
96
+ @parsed_root.find_all(:def).find do |node|
97
+ node.method_name == method_name
98
+ end
99
+ end
100
+
101
+ # Extract the raw source text of a method from its AST node.
102
+ #
103
+ # @param node [Ast::Node]
104
+ # @return [String, nil]
105
+ def method_source_from_node(node)
106
+ return node.source if node.source
107
+
108
+ return nil unless node.line && node.end_line
109
+
110
+ lines = @source_code.lines
111
+ start_idx = node.line - 1
112
+ end_idx = node.end_line - 1
113
+ return nil if start_idx.negative? || end_idx >= lines.length
114
+
115
+ lines[start_idx..end_idx].join
116
+ end
117
+
118
+ # Check if a filter string looks like a valid Ruby method name.
119
+ # Rejects proc/lambda string representations and other non-method filters.
120
+ #
121
+ # @param name [String]
122
+ # @return [Boolean]
123
+ def valid_method_name?(name)
124
+ name.match?(/\A[a-z_]\w*[!?=]?\z/i)
125
+ end
126
+
127
+ # Detect columns written by the callback method.
128
+ #
129
+ # Scans for self.col= assignments, update_column, update_columns,
130
+ # write_attribute, and assign_attributes calls, cross-referencing
131
+ # against the model's known column_names.
132
+ #
133
+ # @param method_source [String]
134
+ # @return [Array<String>]
135
+ def detect_columns_written(method_source)
136
+ columns = Set.new
137
+
138
+ # Pattern: self.col = value (direct assignment, not ==)
139
+ method_source.scan(/self\.(\w+)\s*=(?!=)/).flatten.each do |col|
140
+ columns << col if @column_names.include?(col)
141
+ end
142
+
143
+ # Pattern: update_column(:col, ...) / write_attribute(:col, ...)
144
+ SINGLE_COLUMN_WRITERS.each do |writer|
145
+ method_source.scan(/\b#{Regexp.escape(writer)}\s*\(?\s*[:'"](\w+)/).flatten.each do |col|
146
+ columns << col if @column_names.include?(col)
147
+ end
148
+ end
149
+
150
+ # Pattern: update_columns(col: ...) / assign_attributes(col: ...)
151
+ MULTI_COLUMN_WRITERS.each do |writer|
152
+ method_source.scan(/\b#{Regexp.escape(writer)}\s*\(([^)]+)\)/m).each do |match|
153
+ match[0].scan(/\b(\w+)\s*:(?!:)/).flatten.each do |col|
154
+ columns << col if @column_names.include?(col)
155
+ end
156
+ end
157
+ end
158
+
159
+ columns.to_a.sort
160
+ end
161
+
162
+ # Detect jobs enqueued by the callback method.
163
+ #
164
+ # Matches Job/Worker classes calling async dispatch methods.
165
+ #
166
+ # @param method_source [String]
167
+ # @return [Array<String>]
168
+ def detect_jobs_enqueued(method_source)
169
+ async_pattern = ASYNC_METHODS.map { |m| Regexp.escape(m) }.join('|')
170
+ method_source.scan(/(\w+(?:Job|Worker))\.(?:#{async_pattern})/).flatten.uniq.sort
171
+ end
172
+
173
+ # Detect service objects called by the callback method.
174
+ #
175
+ # Matches classes ending in Service followed by a method call.
176
+ #
177
+ # @param method_source [String]
178
+ # @return [Array<String>]
179
+ def detect_services_called(method_source)
180
+ method_source.scan(/(\w+Service)(?:\.|::)/).flatten.uniq.sort
181
+ end
182
+
183
+ # Detect mailers triggered by the callback method.
184
+ #
185
+ # Matches classes ending in Mailer followed by a method call.
186
+ #
187
+ # @param method_source [String]
188
+ # @return [Array<String>]
189
+ def detect_mailers_triggered(method_source)
190
+ method_source.scan(/(\w+Mailer)\./).flatten.uniq.sort
191
+ end
192
+
193
+ # Detect database read operations in the callback method.
194
+ #
195
+ # Checks for common ActiveRecord query methods called via dot notation.
196
+ #
197
+ # @param method_source [String]
198
+ # @return [Array<String>]
199
+ def detect_database_reads(method_source)
200
+ DB_READ_METHODS.select do |method|
201
+ method_source.match?(/\.#{Regexp.escape(method)}\b/)
202
+ end
203
+ end
204
+
205
+ # Extract operations using OperationExtractor from the method's AST node.
206
+ #
207
+ # @param method_node [Ast::Node, nil]
208
+ # @return [Array<Hash>]
209
+ def extract_operations(method_node)
210
+ return [] unless method_node
211
+
212
+ @operation_extractor.extract(method_node)
213
+ rescue StandardError
214
+ []
215
+ end
216
+
217
+ # Return an empty side-effects structure.
218
+ #
219
+ # @return [Hash]
220
+ def empty_side_effects
221
+ {
222
+ columns_written: [],
223
+ jobs_enqueued: [],
224
+ services_called: [],
225
+ mailers_triggered: [],
226
+ database_reads: [],
227
+ operations: []
228
+ }
229
+ end
230
+ end
231
+ end
232
+ end