codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,469 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # MigrationExtractor handles ActiveRecord migration file extraction.
9
+ #
10
+ # Scans `db/migrate/*.rb` for migration files and produces one
11
+ # ExtractedUnit per migration. Extracts DDL metadata (tables, columns,
12
+ # indexes, references), reversibility, risk indicators (data migrations,
13
+ # raw SQL), and links to affected models via table name classification.
14
+ #
15
+ # @example
16
+ # extractor = MigrationExtractor.new
17
+ # units = extractor.extract_all
18
+ # create_users = units.find { |u| u.identifier == "CreateUsers" }
19
+ # create_users.metadata[:tables_affected] # => ["users"]
20
+ #
21
+ class MigrationExtractor
22
+ include SharedUtilityMethods
23
+ include SharedDependencyScanner
24
+
25
+ # Rails internal tables that should not generate model dependencies
26
+ INTERNAL_TABLES = %w[
27
+ schema_migrations
28
+ ar_internal_metadata
29
+ active_storage_blobs
30
+ active_storage_attachments
31
+ active_storage_variant_records
32
+ action_text_rich_texts
33
+ action_mailbox_inbound_emails
34
+ ].freeze
35
+
36
+ # DDL operations that take a table name as the first symbol argument
37
+ TABLE_OPERATIONS = %w[
38
+ create_table
39
+ drop_table
40
+ rename_table
41
+ add_column
42
+ remove_column
43
+ change_column
44
+ rename_column
45
+ add_index
46
+ remove_index
47
+ add_reference
48
+ remove_reference
49
+ add_belongs_to
50
+ remove_belongs_to
51
+ add_foreign_key
52
+ remove_foreign_key
53
+ add_timestamps
54
+ remove_timestamps
55
+ change_column_default
56
+ change_column_null
57
+ ].freeze
58
+
59
+ # Column type methods used inside create_table blocks
60
+ COLUMN_TYPE_METHODS = %w[
61
+ string integer float decimal boolean binary text
62
+ date datetime time timestamp
63
+ bigint numeric json jsonb uuid inet cidr
64
+ hstore ltree point polygon
65
+ ].freeze
66
+
67
+ # Patterns indicating data migration (not just DDL)
68
+ DATA_MIGRATION_PATTERNS = [
69
+ /\.update_all\b/,
70
+ /\.find_each\b/,
71
+ /\.find_in_batches\b/,
72
+ /\.update!\b/,
73
+ /\.update\b/,
74
+ /\.save!\b/,
75
+ /\.save\b/,
76
+ /\.delete_all\b/,
77
+ /\.destroy_all\b/
78
+ ].freeze
79
+
80
+ def initialize
81
+ @migrate_dir = Rails.root.join('db/migrate')
82
+ @has_directory = @migrate_dir.directory?
83
+ end
84
+
85
+ # Extract all migration files from db/migrate/
86
+ #
87
+ # @return [Array<ExtractedUnit>] List of migration units, sorted by timestamp
88
+ def extract_all
89
+ return [] unless @has_directory
90
+
91
+ files = Dir[@migrate_dir.join('*.rb')]
92
+ files.filter_map { |file| extract_migration_file(file) }
93
+ end
94
+
95
+ # Extract a single migration file
96
+ #
97
+ # @param file_path [String] Path to the migration file
98
+ # @return [ExtractedUnit, nil] The extracted unit or nil if not a migration
99
+ def extract_migration_file(file_path)
100
+ source = File.read(file_path)
101
+ class_name = extract_class_name(source)
102
+
103
+ return nil unless class_name
104
+ return nil unless migration_class?(source)
105
+
106
+ unit = ExtractedUnit.new(
107
+ type: :migration,
108
+ identifier: class_name,
109
+ file_path: file_path
110
+ )
111
+
112
+ unit.namespace = extract_namespace(class_name)
113
+ unit.metadata = extract_metadata(source, file_path)
114
+ unit.source_code = annotate_source(source, class_name, unit.metadata)
115
+ unit.dependencies = extract_dependencies(source, unit.metadata)
116
+
117
+ unit
118
+ rescue StandardError => e
119
+ Rails.logger.error("Failed to extract migration #{file_path}: #{e.message}")
120
+ nil
121
+ end
122
+
123
+ private
124
+
125
+ # ──────────────────────────────────────────────────────────────────────
126
+ # Class Discovery
127
+ # ──────────────────────────────────────────────────────────────────────
128
+
129
+ # Extract the class name from migration source code.
130
+ #
131
+ # @param source [String] Ruby source code
132
+ # @return [String, nil] The class name or nil
133
+ def extract_class_name(source)
134
+ # Match namespaced or plain class declarations
135
+ namespaces = source.scan(/^\s*module\s+([\w:]+)/).flatten
136
+ class_match = source.match(/^\s*class\s+([\w:]+)\s*</)
137
+ return nil unless class_match
138
+
139
+ base_class = class_match[1]
140
+ if namespaces.any? && !base_class.include?('::')
141
+ "#{namespaces.join('::')}::#{base_class}"
142
+ else
143
+ base_class
144
+ end
145
+ end
146
+
147
+ # Check whether the source defines an ActiveRecord::Migration subclass.
148
+ #
149
+ # @param source [String] Ruby source code
150
+ # @return [Boolean]
151
+ def migration_class?(source)
152
+ source.match?(/class\s+\w+\s*<\s*ActiveRecord::Migration/)
153
+ end
154
+
155
+ # ──────────────────────────────────────────────────────────────────────
156
+ # Metadata Extraction
157
+ # ──────────────────────────────────────────────────────────────────────
158
+
159
+ # @param source [String] Ruby source code
160
+ # @param file_path [String] Path to the migration file
161
+ # @return [Hash] Migration metadata
162
+ def extract_metadata(source, file_path)
163
+ tables = extract_tables_affected(source)
164
+ direction = detect_direction(source)
165
+
166
+ {
167
+ migration_version: extract_migration_version(file_path),
168
+ rails_version: extract_rails_version(source),
169
+ reversible: %w[change up_down].include?(direction),
170
+ direction: direction,
171
+ tables_affected: tables,
172
+ columns_added: extract_columns_added(source),
173
+ columns_removed: extract_columns_removed(source),
174
+ indexes_added: extract_indexes_added(source),
175
+ indexes_removed: extract_indexes_removed(source),
176
+ references_added: extract_references_added(source),
177
+ references_removed: extract_references_removed(source),
178
+ operations: extract_operations(source),
179
+ has_data_migration: data_migration?(source),
180
+ has_execute_sql: source.match?(/\bexecute\s/),
181
+ loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
182
+ }
183
+ end
184
+
185
+ # Extract migration timestamp from filename.
186
+ #
187
+ # @param file_path [String] Path to the migration file
188
+ # @return [String, nil] The timestamp or nil
189
+ def extract_migration_version(file_path)
190
+ basename = File.basename(file_path)
191
+ match = basename.match(/\A(\d{14})_/)
192
+ match ? match[1] : nil
193
+ end
194
+
195
+ # Extract Rails version from migration bracket notation.
196
+ #
197
+ # @param source [String] Ruby source code
198
+ # @return [String, nil] The Rails version or nil
199
+ def extract_rails_version(source)
200
+ match = source.match(/ActiveRecord::Migration\[(\d+\.\d+)\]/)
201
+ match ? match[1] : nil
202
+ end
203
+
204
+ # ──────────────────────────────────────────────────────────────────────
205
+ # Direction / Reversibility
206
+ # ──────────────────────────────────────────────────────────────────────
207
+
208
+ # Detect migration direction from method definitions.
209
+ #
210
+ # @param source [String] Ruby source code
211
+ # @return [String] One of "change", "up_down", "up_only", "unknown"
212
+ def detect_direction(source)
213
+ has_change = source.match?(/^\s*def\s+change\b/)
214
+ has_up = source.match?(/^\s*def\s+up\b/)
215
+ has_down = source.match?(/^\s*def\s+down\b/)
216
+
217
+ if has_change
218
+ 'change'
219
+ elsif has_up && has_down
220
+ 'up_down'
221
+ elsif has_up
222
+ 'up_only'
223
+ else
224
+ 'unknown'
225
+ end
226
+ end
227
+
228
+ # ──────────────────────────────────────────────────────────────────────
229
+ # DDL Extraction
230
+ # ──────────────────────────────────────────────────────────────────────
231
+
232
+ # Extract all tables affected by DDL operations.
233
+ #
234
+ # @param source [String] Ruby source code
235
+ # @return [Array<String>] Deduplicated table names
236
+ def extract_tables_affected(source)
237
+ tables = []
238
+
239
+ TABLE_OPERATIONS.each do |op|
240
+ source.scan(/#{op}\s+:(\w+)/).each do |match|
241
+ tables << match[0]
242
+ end
243
+ end
244
+
245
+ # rename_table has two table arguments
246
+ source.scan(/rename_table\s+:\w+\s*,\s*:(\w+)/).each do |match|
247
+ tables << match[0]
248
+ end
249
+
250
+ tables.uniq
251
+ end
252
+
253
+ # Extract columns added via add_column and create_table block columns.
254
+ #
255
+ # @param source [String] Ruby source code
256
+ # @return [Array<Hash>] Column info hashes with :table, :column, :type
257
+ def extract_columns_added(source)
258
+ # add_column :table, :column, :type
259
+ columns = source.scan(/add_column\s+:(\w+)\s*,\s*:(\w+)\s*,\s*:(\w+)/).map do |table, column, type|
260
+ { table: table, column: column, type: type }
261
+ end
262
+
263
+ # t.type :column inside create_table blocks
264
+ extract_block_columns(source, columns)
265
+
266
+ # t.column :name, :type
267
+ extract_explicit_column_calls(source, columns)
268
+
269
+ columns
270
+ end
271
+
272
+ # Extract columns removed via remove_column.
273
+ #
274
+ # @param source [String] Ruby source code
275
+ # @return [Array<Hash>] Column info hashes
276
+ def extract_columns_removed(source)
277
+ source.scan(/remove_column\s+:(\w+)\s*,\s*:(\w+)(?:\s*,\s*:(\w+))?/).map do |table, column, type|
278
+ { table: table, column: column, type: type || 'unknown' }
279
+ end
280
+ end
281
+
282
+ # Extract indexes added via add_index.
283
+ #
284
+ # @param source [String] Ruby source code
285
+ # @return [Array<Hash>] Index info hashes with :table, :column
286
+ def extract_indexes_added(source)
287
+ source.scan(/add_index\s+:(\w+)\s*,\s*(.+?)(?:\s*,\s*\w+:|$)/m).map do |table, column_expr|
288
+ column = column_expr.strip.sub(/\s*,\s*\w+:.*\z/m, '').strip
289
+ # Handle array syntax for composite indexes: [:user_id, :created_at]
290
+ column = if column.start_with?('[')
291
+ column.gsub(/[\[\]:"\s]/, '')
292
+ else
293
+ column.delete(':').strip
294
+ end
295
+ { table: table, column: column }
296
+ end
297
+ end
298
+
299
+ # Extract indexes removed via remove_index.
300
+ #
301
+ # @param source [String] Ruby source code
302
+ # @return [Array<Hash>] Index info hashes
303
+ def extract_indexes_removed(source)
304
+ source.scan(/remove_index\s+:(\w+)\s*,\s*:(\w+)/).map do |table, column|
305
+ { table: table, column: column }
306
+ end
307
+ end
308
+
309
+ # Extract references added via add_reference or t.references.
310
+ #
311
+ # @param source [String] Ruby source code
312
+ # @return [Array<Hash>] Reference info hashes with :table, :reference
313
+ def extract_references_added(source)
314
+ # add_reference :table, :reference
315
+ refs = source.scan(/add_reference\s+:(\w+)\s*,\s*:(\w+)/).map do |table, reference|
316
+ { table: table, reference: reference }
317
+ end
318
+
319
+ # t.references :ref inside create_table blocks
320
+ extract_block_references(source, refs)
321
+
322
+ refs
323
+ end
324
+
325
+ # Extract references removed via remove_reference.
326
+ #
327
+ # @param source [String] Ruby source code
328
+ # @return [Array<Hash>] Reference info hashes
329
+ def extract_references_removed(source)
330
+ source.scan(/remove_reference\s+:(\w+)\s*,\s*:(\w+)/).map do |table, reference|
331
+ { table: table, reference: reference }
332
+ end
333
+ end
334
+
335
+ # ──────────────────────────────────────────────────────────────────────
336
+ # Block Column / Reference Parsing
337
+ # ──────────────────────────────────────────────────────────────────────
338
+
339
+ # Extract t.type :column declarations inside create_table blocks.
340
+ #
341
+ # @param source [String] Ruby source code
342
+ # @param columns [Array<Hash>] Accumulator array
343
+ # @return [void]
344
+ def extract_block_columns(source, columns)
345
+ # Find create_table blocks and parse t.type :column patterns
346
+ source.scan(/create_table\s+:(\w+).*?do\s*\|(\w+)\|(.+?)^\s*end/m).each do |table, var, block|
347
+ type_pattern = COLUMN_TYPE_METHODS.join('|')
348
+ block.scan(/#{var}\.(#{type_pattern})\s+:(\w+)/).each do |type, column|
349
+ columns << { table: table, column: column, type: type }
350
+ end
351
+ end
352
+ end
353
+
354
+ # Extract t.column :name, :type declarations inside create_table blocks.
355
+ #
356
+ # @param source [String] Ruby source code
357
+ # @param columns [Array<Hash>] Accumulator array
358
+ # @return [void]
359
+ def extract_explicit_column_calls(source, columns)
360
+ source.scan(/create_table\s+:(\w+).*?do\s*\|(\w+)\|(.+?)^\s*end/m).each do |table, var, block|
361
+ block.scan(/#{var}\.column\s+:(\w+)\s*,\s*:(\w+)/).each do |column, type|
362
+ columns << { table: table, column: column, type: type }
363
+ end
364
+ end
365
+ end
366
+
367
+ # Extract t.references declarations inside create_table blocks.
368
+ #
369
+ # @param source [String] Ruby source code
370
+ # @param refs [Array<Hash>] Accumulator array
371
+ # @return [void]
372
+ def extract_block_references(source, refs)
373
+ source.scan(/create_table\s+:(\w+).*?do\s*\|(\w+)\|(.+?)^\s*end/m).each do |table, var, block|
374
+ block.scan(/#{var}\.references\s+:(\w+)/).each do |reference,|
375
+ refs << { table: table, reference: reference }
376
+ end
377
+ end
378
+ end
379
+
380
+ # ──────────────────────────────────────────────────────────────────────
381
+ # Operations Tracking
382
+ # ──────────────────────────────────────────────────────────────────────
383
+
384
+ # Extract operation counts from migration source.
385
+ #
386
+ # @param source [String] Ruby source code
387
+ # @return [Array<Hash>] Operation hashes with :operation, :count
388
+ def extract_operations(source)
389
+ ops = Hash.new(0)
390
+
391
+ TABLE_OPERATIONS.each do |op|
392
+ count = source.scan(/#{op}\s+:/).size
393
+ ops[op] = count if count.positive?
394
+ end
395
+
396
+ ops.map { |op, count| { operation: op, count: count } }
397
+ end
398
+
399
+ # ──────────────────────────────────────────────────────────────────────
400
+ # Risk Indicators
401
+ # ──────────────────────────────────────────────────────────────────────
402
+
403
+ # Detect data migration patterns in source.
404
+ #
405
+ # @param source [String] Ruby source code
406
+ # @return [Boolean]
407
+ def data_migration?(source)
408
+ DATA_MIGRATION_PATTERNS.any? { |pattern| source.match?(pattern) }
409
+ end
410
+
411
+ # ──────────────────────────────────────────────────────────────────────
412
+ # Source Annotation
413
+ # ──────────────────────────────────────────────────────────────────────
414
+
415
+ # @param source [String] Ruby source code
416
+ # @param class_name [String] The migration class name
417
+ # @param metadata [Hash] Extracted metadata
418
+ # @return [String] Annotated source
419
+ def annotate_source(source, class_name, metadata)
420
+ version = metadata[:migration_version] || 'none'
421
+ tables = metadata[:tables_affected].join(', ')
422
+ tables_display = tables.length > 59 ? "#{tables[0, 56]}..." : tables
423
+ direction = metadata[:direction]
424
+
425
+ <<~ANNOTATION
426
+ # ╔═══════════════════════════════════════════════════════════════════════╗
427
+ # ║ Migration: #{class_name.ljust(57)}║
428
+ # ║ Version: #{version.ljust(59)}║
429
+ # ║ Direction: #{direction.ljust(57)}║
430
+ # ║ Tables: #{tables_display.ljust(60)}║
431
+ # ╚═══════════════════════════════════════════════════════════════════════╝
432
+
433
+ #{source}
434
+ ANNOTATION
435
+ end
436
+
437
+ # ──────────────────────────────────────────────────────────────────────
438
+ # Dependency Extraction
439
+ # ──────────────────────────────────────────────────────────────────────
440
+
441
+ # @param source [String] Ruby source code
442
+ # @param metadata [Hash] Extracted metadata
443
+ # @return [Array<Hash>] Dependency hashes
444
+ def extract_dependencies(source, metadata)
445
+ deps = []
446
+
447
+ # Link tables to models via classify
448
+ metadata[:tables_affected].each do |table|
449
+ next if INTERNAL_TABLES.include?(table)
450
+
451
+ model_name = table.classify
452
+ deps << { type: :model, target: model_name, via: :table_name }
453
+ end
454
+
455
+ # Link references to models
456
+ all_refs = (metadata[:references_added] + metadata[:references_removed]).uniq
457
+ all_refs.each do |ref|
458
+ model_name = ref[:reference].classify
459
+ deps << { type: :model, target: model_name, via: :reference }
460
+ end
461
+
462
+ # Scan data migration code for common dependencies
463
+ deps.concat(scan_common_dependencies(source))
464
+
465
+ deps.uniq { |d| [d[:type], d[:target]] }
466
+ end
467
+ end
468
+ end
469
+ end