woods 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (185) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +89 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +406 -0
  7. data/exe/woods-console +59 -0
  8. data/exe/woods-console-mcp +22 -0
  9. data/exe/woods-mcp +34 -0
  10. data/exe/woods-mcp-http +37 -0
  11. data/exe/woods-mcp-start +58 -0
  12. data/lib/generators/woods/install_generator.rb +32 -0
  13. data/lib/generators/woods/pgvector_generator.rb +37 -0
  14. data/lib/generators/woods/templates/add_pgvector_to_woods.rb.erb +15 -0
  15. data/lib/generators/woods/templates/create_woods_tables.rb.erb +43 -0
  16. data/lib/tasks/woods.rake +621 -0
  17. data/lib/tasks/woods_evaluation.rake +115 -0
  18. data/lib/woods/ast/call_site_extractor.rb +106 -0
  19. data/lib/woods/ast/method_extractor.rb +71 -0
  20. data/lib/woods/ast/node.rb +116 -0
  21. data/lib/woods/ast/parser.rb +614 -0
  22. data/lib/woods/ast.rb +6 -0
  23. data/lib/woods/builder.rb +200 -0
  24. data/lib/woods/cache/cache_middleware.rb +199 -0
  25. data/lib/woods/cache/cache_store.rb +264 -0
  26. data/lib/woods/cache/redis_cache_store.rb +116 -0
  27. data/lib/woods/cache/solid_cache_store.rb +111 -0
  28. data/lib/woods/chunking/chunk.rb +84 -0
  29. data/lib/woods/chunking/semantic_chunker.rb +295 -0
  30. data/lib/woods/console/adapters/cache_adapter.rb +58 -0
  31. data/lib/woods/console/adapters/good_job_adapter.rb +33 -0
  32. data/lib/woods/console/adapters/job_adapter.rb +68 -0
  33. data/lib/woods/console/adapters/sidekiq_adapter.rb +33 -0
  34. data/lib/woods/console/adapters/solid_queue_adapter.rb +33 -0
  35. data/lib/woods/console/audit_logger.rb +75 -0
  36. data/lib/woods/console/bridge.rb +177 -0
  37. data/lib/woods/console/confirmation.rb +90 -0
  38. data/lib/woods/console/connection_manager.rb +173 -0
  39. data/lib/woods/console/console_response_renderer.rb +74 -0
  40. data/lib/woods/console/embedded_executor.rb +373 -0
  41. data/lib/woods/console/model_validator.rb +81 -0
  42. data/lib/woods/console/rack_middleware.rb +87 -0
  43. data/lib/woods/console/safe_context.rb +82 -0
  44. data/lib/woods/console/server.rb +612 -0
  45. data/lib/woods/console/sql_validator.rb +172 -0
  46. data/lib/woods/console/tools/tier1.rb +118 -0
  47. data/lib/woods/console/tools/tier2.rb +117 -0
  48. data/lib/woods/console/tools/tier3.rb +110 -0
  49. data/lib/woods/console/tools/tier4.rb +79 -0
  50. data/lib/woods/coordination/pipeline_lock.rb +109 -0
  51. data/lib/woods/cost_model/embedding_cost.rb +88 -0
  52. data/lib/woods/cost_model/estimator.rb +128 -0
  53. data/lib/woods/cost_model/provider_pricing.rb +67 -0
  54. data/lib/woods/cost_model/storage_cost.rb +52 -0
  55. data/lib/woods/cost_model.rb +22 -0
  56. data/lib/woods/db/migrations/001_create_units.rb +38 -0
  57. data/lib/woods/db/migrations/002_create_edges.rb +35 -0
  58. data/lib/woods/db/migrations/003_create_embeddings.rb +37 -0
  59. data/lib/woods/db/migrations/004_create_snapshots.rb +45 -0
  60. data/lib/woods/db/migrations/005_create_snapshot_units.rb +40 -0
  61. data/lib/woods/db/migrations/006_rename_tables.rb +34 -0
  62. data/lib/woods/db/migrator.rb +73 -0
  63. data/lib/woods/db/schema_version.rb +73 -0
  64. data/lib/woods/dependency_graph.rb +236 -0
  65. data/lib/woods/embedding/indexer.rb +140 -0
  66. data/lib/woods/embedding/openai.rb +126 -0
  67. data/lib/woods/embedding/provider.rb +162 -0
  68. data/lib/woods/embedding/text_preparer.rb +112 -0
  69. data/lib/woods/evaluation/baseline_runner.rb +115 -0
  70. data/lib/woods/evaluation/evaluator.rb +139 -0
  71. data/lib/woods/evaluation/metrics.rb +79 -0
  72. data/lib/woods/evaluation/query_set.rb +148 -0
  73. data/lib/woods/evaluation/report_generator.rb +90 -0
  74. data/lib/woods/extracted_unit.rb +145 -0
  75. data/lib/woods/extractor.rb +1028 -0
  76. data/lib/woods/extractors/action_cable_extractor.rb +201 -0
  77. data/lib/woods/extractors/ast_source_extraction.rb +46 -0
  78. data/lib/woods/extractors/behavioral_profile.rb +309 -0
  79. data/lib/woods/extractors/caching_extractor.rb +261 -0
  80. data/lib/woods/extractors/callback_analyzer.rb +246 -0
  81. data/lib/woods/extractors/concern_extractor.rb +292 -0
  82. data/lib/woods/extractors/configuration_extractor.rb +219 -0
  83. data/lib/woods/extractors/controller_extractor.rb +404 -0
  84. data/lib/woods/extractors/database_view_extractor.rb +278 -0
  85. data/lib/woods/extractors/decorator_extractor.rb +253 -0
  86. data/lib/woods/extractors/engine_extractor.rb +223 -0
  87. data/lib/woods/extractors/event_extractor.rb +211 -0
  88. data/lib/woods/extractors/factory_extractor.rb +289 -0
  89. data/lib/woods/extractors/graphql_extractor.rb +892 -0
  90. data/lib/woods/extractors/i18n_extractor.rb +117 -0
  91. data/lib/woods/extractors/job_extractor.rb +374 -0
  92. data/lib/woods/extractors/lib_extractor.rb +218 -0
  93. data/lib/woods/extractors/mailer_extractor.rb +269 -0
  94. data/lib/woods/extractors/manager_extractor.rb +188 -0
  95. data/lib/woods/extractors/middleware_extractor.rb +133 -0
  96. data/lib/woods/extractors/migration_extractor.rb +469 -0
  97. data/lib/woods/extractors/model_extractor.rb +988 -0
  98. data/lib/woods/extractors/phlex_extractor.rb +252 -0
  99. data/lib/woods/extractors/policy_extractor.rb +191 -0
  100. data/lib/woods/extractors/poro_extractor.rb +229 -0
  101. data/lib/woods/extractors/pundit_extractor.rb +223 -0
  102. data/lib/woods/extractors/rails_source_extractor.rb +473 -0
  103. data/lib/woods/extractors/rake_task_extractor.rb +343 -0
  104. data/lib/woods/extractors/route_extractor.rb +181 -0
  105. data/lib/woods/extractors/scheduled_job_extractor.rb +331 -0
  106. data/lib/woods/extractors/serializer_extractor.rb +339 -0
  107. data/lib/woods/extractors/service_extractor.rb +217 -0
  108. data/lib/woods/extractors/shared_dependency_scanner.rb +91 -0
  109. data/lib/woods/extractors/shared_utility_methods.rb +281 -0
  110. data/lib/woods/extractors/state_machine_extractor.rb +398 -0
  111. data/lib/woods/extractors/test_mapping_extractor.rb +225 -0
  112. data/lib/woods/extractors/validator_extractor.rb +211 -0
  113. data/lib/woods/extractors/view_component_extractor.rb +311 -0
  114. data/lib/woods/extractors/view_template_extractor.rb +261 -0
  115. data/lib/woods/feedback/gap_detector.rb +89 -0
  116. data/lib/woods/feedback/store.rb +119 -0
  117. data/lib/woods/filename_utils.rb +32 -0
  118. data/lib/woods/flow_analysis/operation_extractor.rb +206 -0
  119. data/lib/woods/flow_analysis/response_code_mapper.rb +154 -0
  120. data/lib/woods/flow_assembler.rb +290 -0
  121. data/lib/woods/flow_document.rb +191 -0
  122. data/lib/woods/flow_precomputer.rb +102 -0
  123. data/lib/woods/formatting/base.rb +30 -0
  124. data/lib/woods/formatting/claude_adapter.rb +98 -0
  125. data/lib/woods/formatting/generic_adapter.rb +56 -0
  126. data/lib/woods/formatting/gpt_adapter.rb +64 -0
  127. data/lib/woods/formatting/human_adapter.rb +78 -0
  128. data/lib/woods/graph_analyzer.rb +374 -0
  129. data/lib/woods/mcp/bootstrapper.rb +96 -0
  130. data/lib/woods/mcp/index_reader.rb +394 -0
  131. data/lib/woods/mcp/renderers/claude_renderer.rb +81 -0
  132. data/lib/woods/mcp/renderers/json_renderer.rb +17 -0
  133. data/lib/woods/mcp/renderers/markdown_renderer.rb +353 -0
  134. data/lib/woods/mcp/renderers/plain_renderer.rb +240 -0
  135. data/lib/woods/mcp/server.rb +962 -0
  136. data/lib/woods/mcp/tool_response_renderer.rb +85 -0
  137. data/lib/woods/model_name_cache.rb +51 -0
  138. data/lib/woods/notion/client.rb +217 -0
  139. data/lib/woods/notion/exporter.rb +219 -0
  140. data/lib/woods/notion/mapper.rb +40 -0
  141. data/lib/woods/notion/mappers/column_mapper.rb +57 -0
  142. data/lib/woods/notion/mappers/migration_mapper.rb +39 -0
  143. data/lib/woods/notion/mappers/model_mapper.rb +161 -0
  144. data/lib/woods/notion/mappers/shared.rb +22 -0
  145. data/lib/woods/notion/rate_limiter.rb +68 -0
  146. data/lib/woods/observability/health_check.rb +79 -0
  147. data/lib/woods/observability/instrumentation.rb +34 -0
  148. data/lib/woods/observability/structured_logger.rb +57 -0
  149. data/lib/woods/operator/error_escalator.rb +81 -0
  150. data/lib/woods/operator/pipeline_guard.rb +92 -0
  151. data/lib/woods/operator/status_reporter.rb +80 -0
  152. data/lib/woods/railtie.rb +38 -0
  153. data/lib/woods/resilience/circuit_breaker.rb +99 -0
  154. data/lib/woods/resilience/index_validator.rb +167 -0
  155. data/lib/woods/resilience/retryable_provider.rb +108 -0
  156. data/lib/woods/retrieval/context_assembler.rb +261 -0
  157. data/lib/woods/retrieval/query_classifier.rb +133 -0
  158. data/lib/woods/retrieval/ranker.rb +277 -0
  159. data/lib/woods/retrieval/search_executor.rb +316 -0
  160. data/lib/woods/retriever.rb +152 -0
  161. data/lib/woods/ruby_analyzer/class_analyzer.rb +170 -0
  162. data/lib/woods/ruby_analyzer/dataflow_analyzer.rb +77 -0
  163. data/lib/woods/ruby_analyzer/fqn_builder.rb +18 -0
  164. data/lib/woods/ruby_analyzer/mermaid_renderer.rb +280 -0
  165. data/lib/woods/ruby_analyzer/method_analyzer.rb +143 -0
  166. data/lib/woods/ruby_analyzer/trace_enricher.rb +143 -0
  167. data/lib/woods/ruby_analyzer.rb +87 -0
  168. data/lib/woods/session_tracer/file_store.rb +104 -0
  169. data/lib/woods/session_tracer/middleware.rb +143 -0
  170. data/lib/woods/session_tracer/redis_store.rb +106 -0
  171. data/lib/woods/session_tracer/session_flow_assembler.rb +254 -0
  172. data/lib/woods/session_tracer/session_flow_document.rb +223 -0
  173. data/lib/woods/session_tracer/solid_cache_store.rb +139 -0
  174. data/lib/woods/session_tracer/store.rb +81 -0
  175. data/lib/woods/storage/graph_store.rb +120 -0
  176. data/lib/woods/storage/metadata_store.rb +196 -0
  177. data/lib/woods/storage/pgvector.rb +195 -0
  178. data/lib/woods/storage/qdrant.rb +205 -0
  179. data/lib/woods/storage/vector_store.rb +167 -0
  180. data/lib/woods/temporal/json_snapshot_store.rb +245 -0
  181. data/lib/woods/temporal/snapshot_store.rb +345 -0
  182. data/lib/woods/token_utils.rb +19 -0
  183. data/lib/woods/version.rb +5 -0
  184. data/lib/woods.rb +246 -0
  185. metadata +270 -0
@@ -0,0 +1,404 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'digest'
4
+ require_relative 'ast_source_extraction'
5
+ require_relative 'shared_utility_methods'
6
+ require_relative 'shared_dependency_scanner'
7
+
8
+ module Woods
9
+ module Extractors
10
+ # ControllerExtractor handles ActionController extraction with:
11
+ # - Route mapping (which HTTP endpoints hit which actions)
12
+ # - Before/after action filter chain resolution
13
+ # - Per-action chunking for precise retrieval
14
+ # - Concern inlining
15
+ #
16
+ # Controllers are chunked more aggressively than models because
17
+ # queries are often action-specific ("how does the create action work").
18
+ #
19
+ # @example
20
+ # extractor = ControllerExtractor.new
21
+ # units = extractor.extract_all
22
+ # registrations = units.find { |u| u.identifier == "Users::RegistrationsController" }
23
+ #
24
+ class ControllerExtractor
25
+ include AstSourceExtraction
26
+ include SharedUtilityMethods
27
+ include SharedDependencyScanner
28
+
29
+ def initialize
30
+ @routes_map = build_routes_map
31
+ end
32
+
33
+ # Extract all controllers in the application
34
+ #
35
+ # @return [Array<ExtractedUnit>] List of controller units
36
+ def extract_all
37
+ controllers = ApplicationController.descendants
38
+
39
+ controllers = (controllers + ActionController::API.descendants).uniq if defined?(ActionController::API)
40
+
41
+ controllers.map do |controller|
42
+ extract_controller(controller)
43
+ end.compact
44
+ end
45
+
46
+ # Extract a single controller
47
+ #
48
+ # @param controller [Class] The controller class
49
+ # @return [ExtractedUnit] The extracted unit
50
+ def extract_controller(controller)
51
+ unit = ExtractedUnit.new(
52
+ type: :controller,
53
+ identifier: controller.name,
54
+ file_path: source_file_for(controller)
55
+ )
56
+
57
+ source_path = unit.file_path
58
+ source = source_path && File.exist?(source_path) ? File.read(source_path) : ''
59
+
60
+ unit.namespace = extract_namespace(controller)
61
+ unit.source_code = build_composite_source(controller, source)
62
+ unit.metadata = extract_metadata(controller, source)
63
+ unit.dependencies = extract_dependencies(controller, source)
64
+
65
+ # Controllers benefit from per-action chunks
66
+ unit.chunks = build_action_chunks(controller, unit)
67
+
68
+ unit
69
+ rescue StandardError => e
70
+ Rails.logger.error("[Woods] Failed to extract controller #{controller.name}: #{e.class}: #{e.message}")
71
+ Rails.logger.error("[Woods] #{e.backtrace&.first(5)&.join("\n ")}")
72
+ nil
73
+ end
74
+
75
+ private
76
+
77
+ # ──────────────────────────────────────────────────────────────────────
78
+ # Route Mapping
79
+ # ──────────────────────────────────────────────────────────────────────
80
+
81
+ # Build a map of controller -> action -> route info from Rails routes
82
+ def build_routes_map
83
+ routes = {}
84
+
85
+ Rails.application.routes.routes.each do |route|
86
+ next unless route.defaults[:controller]
87
+
88
+ controller = "#{route.defaults[:controller].camelize}Controller"
89
+ action = route.defaults[:action]
90
+
91
+ routes[controller] ||= {}
92
+ routes[controller][action] ||= []
93
+ routes[controller][action] << {
94
+ verb: extract_verb(route),
95
+ path: route.path.spec.to_s.gsub('(.:format)', ''),
96
+ name: route.name,
97
+ constraints: route.constraints.except(:request_method)
98
+ }
99
+ end
100
+
101
+ routes
102
+ end
103
+
104
+ def extract_verb(route)
105
+ verb = route.verb
106
+ return verb if verb.is_a?(String)
107
+ return verb.source.gsub(/[\^$]/, '') if verb.respond_to?(:source)
108
+
109
+ verb.to_s
110
+ end
111
+
112
+ # ──────────────────────────────────────────────────────────────────────
113
+ # Source Building
114
+ # ──────────────────────────────────────────────────────────────────────
115
+
116
+ # Find the source file for a controller, validating paths are within Rails.root.
117
+ #
118
+ # Convention path first, then introspection via {#resolve_source_location}
119
+ # which filters out vendor/node_modules paths.
120
+ #
121
+ # @param controller [Class] The controller class
122
+ # @return [String] Absolute path to the controller source file
123
+ def source_file_for(controller)
124
+ convention_path = Rails.root.join("app/controllers/#{controller.name.underscore}.rb").to_s
125
+ return convention_path if File.exist?(convention_path)
126
+
127
+ resolve_source_location(controller, app_root: Rails.root.to_s, fallback: convention_path)
128
+ end
129
+
130
+ # Build composite source with routes and filters as headers
131
+ def build_composite_source(controller, source = nil)
132
+ if source.nil?
133
+ source_path = source_file_for(controller)
134
+ return '' unless source_path && File.exist?(source_path)
135
+
136
+ source = File.read(source_path)
137
+ end
138
+
139
+ # Prepend route information
140
+ routes_comment = build_routes_comment(controller)
141
+
142
+ # Prepend before_action chain
143
+ filters_comment = build_filters_comment(controller)
144
+
145
+ "#{routes_comment}\n#{filters_comment}\n#{source}"
146
+ end
147
+
148
+ def build_routes_comment(controller)
149
+ routes = @routes_map[controller.name] || {}
150
+ return '' if routes.empty?
151
+
152
+ lines = routes.flat_map do |action, route_list|
153
+ route_list.map do |info|
154
+ verb = info[:verb].to_s.ljust(7)
155
+ path = info[:path].ljust(45)
156
+ " #{verb} #{path} → ##{action}"
157
+ end
158
+ end
159
+
160
+ <<~ROUTES
161
+ # ╔═══════════════════════════════════════════════════════════════════════╗
162
+ # ║ Routes ║
163
+ # ╚═══════════════════════════════════════════════════════════════════════╝
164
+ #
165
+ #{lines.map { |l| "# #{l}" }.join("\n")}
166
+ #
167
+ ROUTES
168
+ end
169
+
170
+ def build_filters_comment(controller)
171
+ filters = extract_filter_chain(controller)
172
+ return '' if filters.empty?
173
+
174
+ lines = filters.map do |f|
175
+ opts = []
176
+ opts << "only: [#{f[:only].map { |a| ":#{a}" }.join(', ')}]" if f[:only]&.any?
177
+ opts << "except: [#{f[:except].map { |a| ":#{a}" }.join(', ')}]" if f[:except]&.any?
178
+ opts << "if: #{f[:if]}" if f[:if]
179
+
180
+ opts_str = opts.any? ? " (#{opts.join('; ')})" : ''
181
+ " #{f[:kind].to_s.ljust(8)} :#{f[:filter]}#{opts_str}"
182
+ end
183
+
184
+ <<~FILTERS
185
+ # ╔═══════════════════════════════════════════════════════════════════════╗
186
+ # ║ Filter Chain ║
187
+ # ╚═══════════════════════════════════════════════════════════════════════╝
188
+ #
189
+ #{lines.map { |l| "# #{l}" }.join("\n")}
190
+ #
191
+ FILTERS
192
+ end
193
+
194
+ def extract_filter_chain(controller)
195
+ controller._process_action_callbacks.map do |callback|
196
+ only, except, if_conds, unless_conds = extract_callback_conditions(callback)
197
+
198
+ result = { kind: callback.kind, filter: callback.filter }
199
+ result[:only] = only if only.any?
200
+ result[:except] = except if except.any?
201
+ result[:if] = if_conds.join(', ') if if_conds.any?
202
+ result[:unless] = unless_conds.join(', ') if unless_conds.any?
203
+ result
204
+ end
205
+ end
206
+
207
+ # ──────────────────────────────────────────────────────────────────────
208
+ # Metadata Extraction
209
+ # ──────────────────────────────────────────────────────────────────────
210
+
211
+ # Extract comprehensive metadata
212
+ def extract_metadata(controller, source = nil)
213
+ own_methods = controller.instance_methods(false).to_set(&:to_s)
214
+ actions = controller.action_methods.select { |m| own_methods.include?(m) }.to_a
215
+
216
+ {
217
+ # Actions and routes
218
+ actions: actions,
219
+ routes: @routes_map[controller.name] || {},
220
+
221
+ # Filter chain
222
+ filters: extract_filter_chain(controller),
223
+
224
+ # Parent chain for understanding inherited behavior
225
+ ancestors: controller.ancestors
226
+ .take_while { |a| a != ActionController::Base && a != ActionController::API }
227
+ .grep(Class)
228
+ .map(&:name)
229
+ .compact,
230
+
231
+ # Concerns included
232
+ included_concerns: extract_included_concerns(controller),
233
+
234
+ # Response formats
235
+ responds_to: extract_respond_formats(controller, source),
236
+
237
+ # Metrics
238
+ action_count: actions.size,
239
+ filter_count: controller._process_action_callbacks.count,
240
+
241
+ # Strong parameters if definable
242
+ permitted_params: extract_permitted_params(controller, source)
243
+ }
244
+ end
245
+
246
+ def extract_included_concerns(controller)
247
+ controller.included_modules
248
+ .select { |m| m.name&.include?('Concern') || m.name&.include?('Concerns') }
249
+ .map(&:name)
250
+ end
251
+
252
+ def extract_respond_formats(controller, source = nil)
253
+ if source.nil?
254
+ source_path = source_file_for(controller)
255
+ return [] unless source_path && File.exist?(source_path)
256
+
257
+ source = File.read(source_path)
258
+ end
259
+
260
+ formats = []
261
+
262
+ formats << :html if source.include?('respond_to do') || !source.include?('respond_to')
263
+ formats << :json if source.include?(':json') || source.include?('render json:')
264
+ formats << :xml if source.include?(':xml') || source.include?('render xml:')
265
+ formats << :turbo_stream if source.include?('turbo_stream')
266
+
267
+ formats.uniq
268
+ end
269
+
270
+ def extract_permitted_params(controller, source = nil)
271
+ if source.nil?
272
+ source_path = source_file_for(controller)
273
+ return {} unless source_path && File.exist?(source_path)
274
+
275
+ source = File.read(source_path)
276
+ end
277
+
278
+ params = {}
279
+
280
+ # Match params.require(:x).permit(...) patterns
281
+ source.scan(/def\s+(\w+_params).*?params\.require\(:(\w+)\)\.permit\((.*?)\)/m) do |method, model, permitted|
282
+ params[method] = {
283
+ model: model,
284
+ permitted: permitted.scan(/:(\w+)/).flatten
285
+ }
286
+ end
287
+
288
+ params
289
+ end
290
+
291
+ # ──────────────────────────────────────────────────────────────────────
292
+ # Dependency Extraction
293
+ # ──────────────────────────────────────────────────────────────────────
294
+
295
+ def extract_dependencies(controller, source = nil)
296
+ deps = []
297
+
298
+ if source.nil?
299
+ source_path = source_file_for(controller)
300
+ source = File.read(source_path) if source_path && File.exist?(source_path)
301
+ end
302
+
303
+ if source
304
+ deps.concat(scan_common_dependencies(source))
305
+
306
+ # Phlex component references
307
+ source.scan(/render\s+(\w+(?:::\w+)*Component)/).flatten.uniq.each do |component|
308
+ deps << { type: :component, target: component, via: :render }
309
+ end
310
+
311
+ # Other view renders
312
+ source.scan(%r{render\s+["'](\w+/\w+)["']}).flatten.uniq.each do |template|
313
+ deps << { type: :view, target: template, via: :render }
314
+ end
315
+ end
316
+
317
+ deps.uniq { |d| [d[:type], d[:target]] }
318
+ end
319
+
320
+ # ──────────────────────────────────────────────────────────────────────
321
+ # Per-Action Chunking
322
+ # ──────────────────────────────────────────────────────────────────────
323
+
324
+ # Build per-action chunks for precise retrieval
325
+ def build_action_chunks(controller, unit)
326
+ controller.action_methods.filter_map do |action|
327
+ route_info = @routes_map.dig(controller.name, action.to_s)
328
+ filters = applicable_filters(controller, action)
329
+
330
+ # Extract just this action's source
331
+ action_source = extract_action_source(controller, action)
332
+ next if action_source.nil? || action_source.strip.empty?
333
+
334
+ route_desc = if route_info&.any?
335
+ route_info.map { |r| "#{r[:verb]} #{r[:path]}" }.join(', ')
336
+ else
337
+ 'No direct route'
338
+ end
339
+
340
+ chunk_content = <<~ACTION
341
+ # Controller: #{controller.name}
342
+ # Action: #{action}
343
+ # Route: #{route_desc}
344
+ # Filters: #{filters.map { |f| "#{f[:kind]}(:#{f[:filter]})" }.join(', ').presence || 'none'}
345
+
346
+ #{action_source}
347
+ ACTION
348
+
349
+ {
350
+ chunk_type: :action,
351
+ identifier: "#{controller.name}##{action}",
352
+ content: chunk_content,
353
+ content_hash: Digest::SHA256.hexdigest(chunk_content),
354
+ metadata: {
355
+ parent: unit.identifier,
356
+ action: action.to_s,
357
+ route: route_info,
358
+ filters: filters,
359
+ http_methods: route_info&.map { |r| r[:verb] }&.uniq || []
360
+ }
361
+ }
362
+ end
363
+ end
364
+
365
+ def applicable_filters(controller, action)
366
+ action_name = action.to_s
367
+
368
+ applicable = controller._process_action_callbacks.select do |cb|
369
+ callback_applies_to_action?(cb, action_name)
370
+ end
371
+ applicable.map { |cb| { kind: cb.kind, filter: cb.filter } }
372
+ end
373
+
374
+ # Determine if a callback applies to a given action name.
375
+ #
376
+ # Checks ActionFilter objects in @if (only) and @unless (except).
377
+ # Non-ActionFilter conditions (procs, symbols) are assumed true.
378
+ #
379
+ # @param callback [ActiveSupport::Callbacks::Callback]
380
+ # @param action_name [String]
381
+ # @return [Boolean]
382
+ def callback_applies_to_action?(callback, action_name)
383
+ if_conditions = callback.instance_variable_get(:@if) || []
384
+ unless_conditions = callback.instance_variable_get(:@unless) || []
385
+
386
+ # Check @if conditions — all must pass for the callback to apply
387
+ if_conditions.each do |cond|
388
+ actions = extract_action_filter_actions(cond)
389
+ next unless actions # skip non-ActionFilter conditions (assume true)
390
+ return false unless actions.include?(action_name)
391
+ end
392
+
393
+ # Check @unless conditions — if any match, callback doesn't apply
394
+ unless_conditions.each do |cond|
395
+ actions = extract_action_filter_actions(cond)
396
+ next unless actions
397
+ return false if actions.include?(action_name)
398
+ end
399
+
400
+ true
401
+ end
402
+ end
403
+ end
404
+ end
@@ -0,0 +1,278 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module Woods
7
+ module Extractors
8
+ # DatabaseViewExtractor handles SQL view file extraction.
9
+ #
10
+ # Scans `db/views/` for Scenic gem convention SQL files
11
+ # (e.g., `db/views/active_users_v01.sql`). Extracts one unit per
12
+ # view name using the latest version only, parsing basic SQL metadata
13
+ # (materialized flag, referenced tables, selected columns) via regex.
14
+ #
15
+ # @example
16
+ # extractor = DatabaseViewExtractor.new
17
+ # units = extractor.extract_all
18
+ # view = units.find { |u| u.identifier == "active_users" }
19
+ # view.metadata[:is_materialized] # => false
20
+ # view.metadata[:tables_referenced] # => ["users", "orders"]
21
+ #
22
+ class DatabaseViewExtractor
23
+ include SharedUtilityMethods
24
+ include SharedDependencyScanner
25
+
26
+ # Rails internal tables that should not generate model dependencies
27
+ INTERNAL_TABLES = %w[
28
+ schema_migrations
29
+ ar_internal_metadata
30
+ active_storage_blobs
31
+ active_storage_attachments
32
+ active_storage_variant_records
33
+ action_text_rich_texts
34
+ action_mailbox_inbound_emails
35
+ ].freeze
36
+
37
+ # SQL keywords that are not table names
38
+ SQL_KEYWORDS = %w[
39
+ select from where join inner outer left right full cross
40
+ on and or not in is null true false as with having group by
41
+ order limit offset union intersect except distinct all case when
42
+ then else end between like ilike similar to cast values lateral
43
+ returning exists any some
44
+ ].freeze
45
+
46
+ def initialize
47
+ @views_dir = Rails.root.join('db/views')
48
+ @has_directory = @views_dir.directory?
49
+ end
50
+
51
+ # Extract all database view units from db/views/.
52
+ #
53
+ # Only the latest version of each view is extracted.
54
+ #
55
+ # @return [Array<ExtractedUnit>] List of database view units
56
+ def extract_all
57
+ return [] unless @has_directory
58
+
59
+ latest_view_files.filter_map do |file|
60
+ extract_view_file(file)
61
+ end
62
+ end
63
+
64
+ # Extract a single SQL view file.
65
+ #
66
+ # @param file_path [String] Absolute path to the SQL file
67
+ # @return [ExtractedUnit, nil] The extracted unit or nil on failure
68
+ def extract_view_file(file_path)
69
+ source = File.read(file_path)
70
+ view_name = extract_view_name(file_path)
71
+ version = extract_version(file_path)
72
+
73
+ return nil unless view_name
74
+
75
+ unit = ExtractedUnit.new(
76
+ type: :database_view,
77
+ identifier: view_name,
78
+ file_path: file_path
79
+ )
80
+
81
+ unit.namespace = nil
82
+ unit.source_code = annotate_source(source, view_name, version)
83
+ unit.metadata = extract_metadata(source, view_name, version)
84
+ unit.dependencies = extract_dependencies(source, unit.metadata)
85
+
86
+ unit
87
+ rescue StandardError => e
88
+ Rails.logger.error("Failed to extract database view #{file_path}: #{e.message}")
89
+ nil
90
+ end
91
+
92
+ private
93
+
94
+ # ──────────────────────────────────────────────────────────────────────
95
+ # File Discovery
96
+ # ──────────────────────────────────────────────────────────────────────
97
+
98
+ # Return only the latest-version SQL file for each view name.
99
+ #
100
+ # Scenic filenames: <view_name>_v<NN>.sql (e.g., active_users_v02.sql)
101
+ # Groups by view name, picks the file with the highest version number.
102
+ #
103
+ # @return [Array<String>] Paths to latest-version files
104
+ def latest_view_files
105
+ all_files = Dir[@views_dir.join('*.sql')].select do |f|
106
+ File.basename(f).match?(/\A\w+_v\d+\.sql\z/)
107
+ end
108
+
109
+ grouped = all_files.group_by { |f| extract_view_name(f) }
110
+ grouped.values.map do |files|
111
+ files.max_by { |f| extract_version(f) }
112
+ end
113
+ end
114
+
115
+ # ──────────────────────────────────────────────────────────────────────
116
+ # Name and Version Parsing
117
+ # ──────────────────────────────────────────────────────────────────────
118
+
119
+ # Extract the view name (without version suffix) from the filename.
120
+ #
121
+ # @param file_path [String] Path to the SQL file
122
+ # @return [String, nil] The view name (e.g., "active_users") or nil
123
+ def extract_view_name(file_path)
124
+ basename = File.basename(file_path, '.sql')
125
+ match = basename.match(/\A(.+?)_v(\d+)\z/)
126
+ match ? match[1] : nil
127
+ end
128
+
129
+ # Extract the integer version number from the filename.
130
+ #
131
+ # @param file_path [String] Path to the SQL file
132
+ # @return [Integer] The version number (e.g., 1 for "_v01")
133
+ def extract_version(file_path)
134
+ basename = File.basename(file_path, '.sql')
135
+ match = basename.match(/_v(\d+)\z/)
136
+ match ? match[1].to_i : 0
137
+ end
138
+
139
+ # ──────────────────────────────────────────────────────────────────────
140
+ # Source Annotation
141
+ # ──────────────────────────────────────────────────────────────────────
142
+
143
+ # Prepend a summary annotation to the SQL source.
144
+ #
145
+ # @param source [String] SQL source
146
+ # @param view_name [String] The view name
147
+ # @param version [Integer] The version number
148
+ # @return [String] Annotated SQL
149
+ def annotate_source(source, view_name, version)
150
+ materialized = materialized_view?(source) ? 'YES' : 'NO'
151
+
152
+ annotation = <<~ANNOTATION
153
+ -- ╔═══════════════════════════════════════════════════════════════════════╗
154
+ -- ║ Database View: #{view_name.ljust(52)}║
155
+ -- ║ Version: #{version.to_s.ljust(59)}║
156
+ -- ║ Materialized: #{materialized.ljust(54)}║
157
+ -- ╚═══════════════════════════════════════════════════════════════════════╝
158
+
159
+ ANNOTATION
160
+
161
+ annotation + source
162
+ end
163
+
164
+ # ──────────────────────────────────────────────────────────────────────
165
+ # Metadata Extraction
166
+ # ──────────────────────────────────────────────────────────────────────
167
+
168
+ # Build the metadata hash for a database view unit.
169
+ #
170
+ # @param source [String] SQL source
171
+ # @param view_name [String] The view name
172
+ # @param version [Integer] The version number
173
+ # @return [Hash] View metadata
174
+ def extract_metadata(source, view_name, version)
175
+ {
176
+ view_name: view_name,
177
+ version: version,
178
+ is_materialized: materialized_view?(source),
179
+ tables_referenced: extract_referenced_tables(source),
180
+ columns_selected: extract_selected_columns(source),
181
+ loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('--') }
182
+ }
183
+ end
184
+
185
+ # Detect whether this is a materialized view.
186
+ #
187
+ # @param source [String] SQL source
188
+ # @return [Boolean]
189
+ def materialized_view?(source)
190
+ source.match?(/\bMATERIALIZED\b/i)
191
+ end
192
+
193
+ # Extract table names referenced in FROM and JOIN clauses.
194
+ #
195
+ # Uses a simple regex approach. Handles basic FROM/JOIN patterns
196
+ # and filters out SQL keywords and subqueries.
197
+ #
198
+ # @param source [String] SQL source
199
+ # @return [Array<String>] Deduplicated table names (lowercase)
200
+ def extract_referenced_tables(source)
201
+ tables = []
202
+
203
+ # FROM clause: FROM table_name [alias]
204
+ source.scan(/\bFROM\s+([a-zA-Z_][a-zA-Z0-9_]*)/i).flatten.each do |t|
205
+ tables << t.downcase unless sql_keyword?(t)
206
+ end
207
+
208
+ # JOIN clauses: [INNER|LEFT|RIGHT|...] JOIN table_name
209
+ source.scan(/\bJOIN\s+([a-zA-Z_][a-zA-Z0-9_]*)/i).flatten.each do |t|
210
+ tables << t.downcase unless sql_keyword?(t)
211
+ end
212
+
213
+ tables.uniq
214
+ end
215
+
216
+ # Extract column names from the SELECT clause.
217
+ #
218
+ # Handles simple column names and table.column patterns.
219
+ # Returns '*' for SELECT * queries.
220
+ #
221
+ # @param source [String] SQL source
222
+ # @return [Array<String>] Column names
223
+ def extract_selected_columns(source)
224
+ # Find the SELECT ... FROM block
225
+ select_match = source.match(/\bSELECT\s+(.+?)\s+FROM\b/im)
226
+ return [] unless select_match
227
+
228
+ select_clause = select_match[1].strip
229
+ return ['*'] if select_clause == '*'
230
+
231
+ # Split on commas, strip whitespace and aliases, handle table.column
232
+ select_clause.split(',').filter_map do |col|
233
+ col = col.strip
234
+ # Remove AS alias: "col AS alias" or "table.col alias" → take first token
235
+ col = col.split(/\s+AS\s+/i).first.strip
236
+ # For table.column, take the column part
237
+ col = col.split('.').last.strip
238
+ # Skip expressions, subqueries, and empty strings
239
+ next if col.empty? || col.include?('(') || col.include?(')')
240
+
241
+ col.delete('"').delete("'")
242
+ end.uniq
243
+ end
244
+
245
+ # Check if a token is a SQL keyword.
246
+ #
247
+ # @param token [String] The token to check
248
+ # @return [Boolean]
249
+ def sql_keyword?(token)
250
+ SQL_KEYWORDS.include?(token.downcase)
251
+ end
252
+
253
+ # ──────────────────────────────────────────────────────────────────────
254
+ # Dependency Extraction
255
+ # ──────────────────────────────────────────────────────────────────────
256
+
257
+ # Build the dependency array by linking referenced tables to models.
258
+ #
259
+ # Uses the same table → model classify pattern as MigrationExtractor.
260
+ #
261
+ # @param source [String] SQL source
262
+ # @param metadata [Hash] Extracted metadata
263
+ # @return [Array<Hash>] Dependency hashes with :type, :target, :via
264
+ def extract_dependencies(_source, metadata)
265
+ deps = []
266
+
267
+ metadata[:tables_referenced].each do |table|
268
+ next if INTERNAL_TABLES.include?(table)
269
+
270
+ model_name = table.classify
271
+ deps << { type: :model, target: model_name, via: :table_name }
272
+ end
273
+
274
+ deps.uniq { |d| [d[:type], d[:target]] }
275
+ end
276
+ end
277
+ end
278
+ end