codebase_index 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +7 -0
  2. data/CHANGELOG.md +29 -0
  3. data/CODE_OF_CONDUCT.md +83 -0
  4. data/CONTRIBUTING.md +65 -0
  5. data/LICENSE.txt +21 -0
  6. data/README.md +481 -0
  7. data/exe/codebase-console-mcp +22 -0
  8. data/exe/codebase-index-mcp +61 -0
  9. data/exe/codebase-index-mcp-http +64 -0
  10. data/exe/codebase-index-mcp-start +58 -0
  11. data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
  12. data/lib/codebase_index/ast/method_extractor.rb +76 -0
  13. data/lib/codebase_index/ast/node.rb +88 -0
  14. data/lib/codebase_index/ast/parser.rb +653 -0
  15. data/lib/codebase_index/ast.rb +6 -0
  16. data/lib/codebase_index/builder.rb +137 -0
  17. data/lib/codebase_index/chunking/chunk.rb +84 -0
  18. data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
  19. data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
  20. data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
  21. data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
  22. data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
  23. data/lib/codebase_index/console/audit_logger.rb +75 -0
  24. data/lib/codebase_index/console/bridge.rb +170 -0
  25. data/lib/codebase_index/console/confirmation.rb +90 -0
  26. data/lib/codebase_index/console/connection_manager.rb +173 -0
  27. data/lib/codebase_index/console/console_response_renderer.rb +78 -0
  28. data/lib/codebase_index/console/model_validator.rb +81 -0
  29. data/lib/codebase_index/console/safe_context.rb +82 -0
  30. data/lib/codebase_index/console/server.rb +557 -0
  31. data/lib/codebase_index/console/sql_validator.rb +172 -0
  32. data/lib/codebase_index/console/tools/tier1.rb +118 -0
  33. data/lib/codebase_index/console/tools/tier2.rb +117 -0
  34. data/lib/codebase_index/console/tools/tier3.rb +110 -0
  35. data/lib/codebase_index/console/tools/tier4.rb +79 -0
  36. data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
  37. data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
  38. data/lib/codebase_index/cost_model/estimator.rb +128 -0
  39. data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
  40. data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
  41. data/lib/codebase_index/cost_model.rb +22 -0
  42. data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
  43. data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
  44. data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
  45. data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
  46. data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
  47. data/lib/codebase_index/db/migrator.rb +71 -0
  48. data/lib/codebase_index/db/schema_version.rb +73 -0
  49. data/lib/codebase_index/dependency_graph.rb +227 -0
  50. data/lib/codebase_index/embedding/indexer.rb +130 -0
  51. data/lib/codebase_index/embedding/openai.rb +105 -0
  52. data/lib/codebase_index/embedding/provider.rb +135 -0
  53. data/lib/codebase_index/embedding/text_preparer.rb +112 -0
  54. data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
  55. data/lib/codebase_index/evaluation/evaluator.rb +146 -0
  56. data/lib/codebase_index/evaluation/metrics.rb +79 -0
  57. data/lib/codebase_index/evaluation/query_set.rb +148 -0
  58. data/lib/codebase_index/evaluation/report_generator.rb +90 -0
  59. data/lib/codebase_index/extracted_unit.rb +145 -0
  60. data/lib/codebase_index/extractor.rb +956 -0
  61. data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
  62. data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
  63. data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
  64. data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
  65. data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
  66. data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
  67. data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
  68. data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
  69. data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
  70. data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
  71. data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
  72. data/lib/codebase_index/extractors/event_extractor.rb +211 -0
  73. data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
  74. data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
  75. data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
  76. data/lib/codebase_index/extractors/job_extractor.rb +369 -0
  77. data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
  78. data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
  79. data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
  80. data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
  81. data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
  82. data/lib/codebase_index/extractors/model_extractor.rb +960 -0
  83. data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
  84. data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
  85. data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
  86. data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
  87. data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
  88. data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
  89. data/lib/codebase_index/extractors/route_extractor.rb +181 -0
  90. data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
  91. data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
  92. data/lib/codebase_index/extractors/service_extractor.rb +254 -0
  93. data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
  94. data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
  95. data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
  96. data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
  97. data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
  98. data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
  99. data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
  100. data/lib/codebase_index/feedback/gap_detector.rb +89 -0
  101. data/lib/codebase_index/feedback/store.rb +119 -0
  102. data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
  103. data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
  104. data/lib/codebase_index/flow_assembler.rb +290 -0
  105. data/lib/codebase_index/flow_document.rb +191 -0
  106. data/lib/codebase_index/flow_precomputer.rb +102 -0
  107. data/lib/codebase_index/formatting/base.rb +40 -0
  108. data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
  109. data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
  110. data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
  111. data/lib/codebase_index/formatting/human_adapter.rb +78 -0
  112. data/lib/codebase_index/graph_analyzer.rb +374 -0
  113. data/lib/codebase_index/mcp/index_reader.rb +394 -0
  114. data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
  115. data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
  116. data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
  117. data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
  118. data/lib/codebase_index/mcp/server.rb +935 -0
  119. data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
  120. data/lib/codebase_index/model_name_cache.rb +51 -0
  121. data/lib/codebase_index/notion/client.rb +217 -0
  122. data/lib/codebase_index/notion/exporter.rb +219 -0
  123. data/lib/codebase_index/notion/mapper.rb +39 -0
  124. data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
  125. data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
  126. data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
  127. data/lib/codebase_index/notion/rate_limiter.rb +68 -0
  128. data/lib/codebase_index/observability/health_check.rb +81 -0
  129. data/lib/codebase_index/observability/instrumentation.rb +34 -0
  130. data/lib/codebase_index/observability/structured_logger.rb +75 -0
  131. data/lib/codebase_index/operator/error_escalator.rb +81 -0
  132. data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
  133. data/lib/codebase_index/operator/status_reporter.rb +80 -0
  134. data/lib/codebase_index/railtie.rb +26 -0
  135. data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
  136. data/lib/codebase_index/resilience/index_validator.rb +185 -0
  137. data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
  138. data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
  139. data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
  140. data/lib/codebase_index/retrieval/ranker.rb +273 -0
  141. data/lib/codebase_index/retrieval/search_executor.rb +327 -0
  142. data/lib/codebase_index/retriever.rb +160 -0
  143. data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
  144. data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
  145. data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
  146. data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
  147. data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
  148. data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
  149. data/lib/codebase_index/ruby_analyzer.rb +87 -0
  150. data/lib/codebase_index/session_tracer/file_store.rb +111 -0
  151. data/lib/codebase_index/session_tracer/middleware.rb +143 -0
  152. data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
  153. data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
  154. data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
  155. data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
  156. data/lib/codebase_index/session_tracer/store.rb +67 -0
  157. data/lib/codebase_index/storage/graph_store.rb +120 -0
  158. data/lib/codebase_index/storage/metadata_store.rb +169 -0
  159. data/lib/codebase_index/storage/pgvector.rb +163 -0
  160. data/lib/codebase_index/storage/qdrant.rb +172 -0
  161. data/lib/codebase_index/storage/vector_store.rb +156 -0
  162. data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
  163. data/lib/codebase_index/version.rb +5 -0
  164. data/lib/codebase_index.rb +223 -0
  165. data/lib/generators/codebase_index/install_generator.rb +32 -0
  166. data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
  167. data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
  168. data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
  169. data/lib/tasks/codebase_index.rake +583 -0
  170. data/lib/tasks/codebase_index_evaluation.rake +115 -0
  171. metadata +252 -0
@@ -0,0 +1,246 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # PoroExtractor handles plain Ruby object extraction from app/models/.
9
+ #
10
+ # Scans app/models/ for Ruby files that define classes which are NOT
11
+ # ActiveRecord descendants (those are handled by ModelExtractor). Captures
12
+ # value objects, form objects, CurrentAttributes subclasses, Struct.new
13
+ # wrappers, and any other non-AR class living alongside AR models.
14
+ #
15
+ # Files under app/models/concerns/ are excluded — those are handled by
16
+ # ConcernExtractor. Module-only files are also excluded.
17
+ #
18
+ # @example
19
+ # extractor = PoroExtractor.new
20
+ # units = extractor.extract_all
21
+ # money = units.find { |u| u.identifier == "Money" }
22
+ # money.metadata[:parent_class] # => nil
23
+ # money.metadata[:method_count] # => 3
24
+ #
25
+ class PoroExtractor
26
+ include SharedUtilityMethods
27
+ include SharedDependencyScanner
28
+
29
+ # Glob pattern for all Ruby files in app/models/ (recursive).
30
+ MODELS_GLOB = 'app/models/**/*.rb'
31
+
32
+ # Subdirectory to exclude — handled by ConcernExtractor.
33
+ CONCERNS_SEGMENT = '/concerns/'
34
+
35
+ def initialize
36
+ @models_dir = Rails.root.join('app/models')
37
+ end
38
+
39
+ # Extract all PORO units from app/models/.
40
+ #
41
+ # Filters out ActiveRecord descendants by name so we don't duplicate
42
+ # what ModelExtractor already produces. Concerns/ subdir is also skipped.
43
+ #
44
+ # @return [Array<ExtractedUnit>] List of PORO units
45
+ def extract_all
46
+ return [] unless @models_dir.directory?
47
+
48
+ ar_names = ActiveRecord::Base.descendants.filter_map(&:name).to_set
49
+
50
+ Dir[Rails.root.join(MODELS_GLOB)].filter_map do |file|
51
+ next if file.include?(CONCERNS_SEGMENT)
52
+
53
+ extract_poro_file(file, ar_names: ar_names)
54
+ end
55
+ end
56
+
57
+ # Extract a single PORO file.
58
+ #
59
+ # Returns nil if the file is not a PORO (e.g., module-only, no class
60
+ # or PORO pattern found, or the inferred class is an AR descendant).
61
+ #
62
+ # @param file_path [String] Absolute path to the Ruby file
63
+ # @param ar_names [Set<String>] Set of AR descendant names to skip
64
+ # @return [ExtractedUnit, nil] The extracted unit or nil
65
+ def extract_poro_file(file_path, ar_names: Set.new)
66
+ source = File.read(file_path)
67
+
68
+ return nil unless poro_file?(source)
69
+ return nil if module_only?(source)
70
+
71
+ class_name = infer_class_name(file_path, source)
72
+ return nil unless class_name
73
+ return nil if ar_names.include?(class_name)
74
+
75
+ unit = ExtractedUnit.new(
76
+ type: :poro,
77
+ identifier: class_name,
78
+ file_path: file_path
79
+ )
80
+
81
+ unit.namespace = extract_namespace(class_name)
82
+ unit.source_code = annotate_source(source, class_name)
83
+ unit.metadata = extract_metadata(source, class_name)
84
+ unit.dependencies = extract_dependencies(source)
85
+
86
+ unit
87
+ rescue StandardError => e
88
+ Rails.logger.error("Failed to extract PORO #{file_path}: #{e.message}")
89
+ nil
90
+ end
91
+
92
+ private
93
+
94
+ # ──────────────────────────────────────────────────────────────────────
95
+ # File Classification
96
+ # ──────────────────────────────────────────────────────────────────────
97
+
98
+ # Determine whether a file is worth examining as a PORO.
99
+ #
100
+ # A file qualifies if it contains a class definition OR uses one of the
101
+ # common PORO-without-class patterns (Struct.new, Data.define).
102
+ # Plain constant assignments and module-only files are excluded upstream.
103
+ #
104
+ # @param source [String] Ruby source code
105
+ # @return [Boolean]
106
+ def poro_file?(source)
107
+ source.match?(/^\s*class\s+/) ||
108
+ source.match?(/\bStruct\.new\b/) ||
109
+ source.match?(/\bData\.define\b/)
110
+ end
111
+
112
+ # Return true when the file defines only modules, no class keyword.
113
+ #
114
+ # @param source [String] Ruby source code
115
+ # @return [Boolean]
116
+ def module_only?(source)
117
+ source.match?(/^\s*module\s+\w+/) && !source.match?(/^\s*class\s+/)
118
+ end
119
+
120
+ # ──────────────────────────────────────────────────────────────────────
121
+ # Class Name Inference
122
+ # ──────────────────────────────────────────────────────────────────────
123
+
124
+ # Infer the primary class name from source or fall back to file path.
125
+ #
126
+ # For regular class definitions we parse the first `class Foo` line,
127
+ # joining outer module namespaces when present. For Struct.new / Data.define
128
+ # patterns we read the constant assignment name. Falls back to the
129
+ # Rails camelize convention on the relative path.
130
+ #
131
+ # @param file_path [String] Absolute path to the file
132
+ # @param source [String] Ruby source code
133
+ # @return [String, nil] The inferred class name
134
+ def infer_class_name(file_path, source)
135
+ # Explicit class keyword — combine outer module namespaces + class name
136
+ class_match = source.match(/^\s*class\s+([\w:]+)/)
137
+ if class_match
138
+ base = class_match[1]
139
+ # If already fully qualified (e.g., Order::Update), use as-is
140
+ return base if base.include?('::')
141
+
142
+ namespaces = source.scan(/^\s*module\s+([\w:]+)/).flatten
143
+ return namespaces.any? ? "#{namespaces.join('::')}::#{base}" : base
144
+ end
145
+
146
+ # Struct.new / Data.define: ConstantName = Struct.new(...)
147
+ struct_match = source.match(/^(\w[\w:]*)\s*=\s*(?:Struct\.new|Data\.define)/)
148
+ return struct_match[1] if struct_match
149
+
150
+ # Fall back: derive from file path using Rails naming convention
151
+ path_based_class_name(file_path)
152
+ end
153
+
154
+ # Derive a class name from a file path using Rails camelize convention.
155
+ #
156
+ # app/models/order/update.rb => Order::Update
157
+ # app/models/money.rb => Money
158
+ #
159
+ # @param file_path [String] Absolute path to the file
160
+ # @return [String] Camelize-derived class name
161
+ def path_based_class_name(file_path)
162
+ relative = file_path.sub("#{Rails.root}/", '')
163
+ relative
164
+ .sub(%r{^app/models/}, '')
165
+ .sub('.rb', '')
166
+ .split('/')
167
+ .map(&:camelize)
168
+ .join('::')
169
+ end
170
+
171
+ # ──────────────────────────────────────────────────────────────────────
172
+ # Source Annotation
173
+ # ──────────────────────────────────────────────────────────────────────
174
+
175
+ # Prepend a summary annotation header to the source.
176
+ #
177
+ # @param source [String] Ruby source code
178
+ # @param class_name [String] The class name
179
+ # @return [String] Annotated source
180
+ def annotate_source(source, class_name)
181
+ parent = extract_parent_class(source)
182
+ parent_label = parent || 'none'
183
+
184
+ annotation = <<~ANNOTATION
185
+ # ╔═══════════════════════════════════════════════════════════════════════╗
186
+ # ║ PORO: #{class_name.ljust(63)}║
187
+ # ║ Parent: #{parent_label.ljust(61)}║
188
+ # ╚═══════════════════════════════════════════════════════════════════════╝
189
+
190
+ ANNOTATION
191
+
192
+ annotation + source
193
+ end
194
+
195
+ # ──────────────────────────────────────────────────────────────────────
196
+ # Metadata Extraction
197
+ # ──────────────────────────────────────────────────────────────────────
198
+
199
+ # Build the metadata hash for a PORO unit.
200
+ #
201
+ # @param source [String] Ruby source code
202
+ # @param class_name [String] The class name
203
+ # @return [Hash] PORO metadata
204
+ def extract_metadata(source, _class_name)
205
+ {
206
+ public_methods: extract_public_methods(source),
207
+ class_methods: extract_class_methods(source),
208
+ initialize_params: extract_initialize_params(source),
209
+ parent_class: extract_parent_class(source),
210
+ loc: count_loc(source),
211
+ method_count: source.scan(/def\s+(?:self\.)?\w+/).size
212
+ }
213
+ end
214
+
215
+ # Extract the parent class name from a class definition.
216
+ #
217
+ # @param source [String] Ruby source code
218
+ # @return [String, nil] Parent class name or nil
219
+ def extract_parent_class(source)
220
+ match = source.match(/^\s*class\s+[\w:]+\s*<\s*([\w:]+)/)
221
+ match ? match[1] : nil
222
+ end
223
+
224
+ # Count non-blank, non-comment lines of code.
225
+ #
226
+ # @param source [String] Ruby source code
227
+ # @return [Integer] LOC count
228
+ def count_loc(source)
229
+ source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
230
+ end
231
+
232
+ # ──────────────────────────────────────────────────────────────────────
233
+ # Dependency Extraction
234
+ # ──────────────────────────────────────────────────────────────────────
235
+
236
+ # Build the dependency array for a PORO unit using common scanners.
237
+ #
238
+ # @param source [String] Ruby source code
239
+ # @return [Array<Hash>] Dependency hashes with :type, :target, :via
240
+ def extract_dependencies(source)
241
+ deps = scan_common_dependencies(source)
242
+ deps.uniq { |d| [d[:type], d[:target]] }
243
+ end
244
+ end
245
+ end
246
+ end
@@ -0,0 +1,223 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'shared_utility_methods'
4
+ require_relative 'shared_dependency_scanner'
5
+
6
+ module CodebaseIndex
7
+ module Extractors
8
+ # PunditExtractor handles Pundit authorization policy extraction.
9
+ #
10
+ # Specifically targets Pundit convention: classes in `app/policies/`
11
+ # that inherit from ApplicationPolicy or follow Pundit patterns
12
+ # (user/record attrs, action? methods). This is distinct from the
13
+ # generic PolicyExtractor which handles domain eligibility policies.
14
+ #
15
+ # @example
16
+ # extractor = PunditExtractor.new
17
+ # units = extractor.extract_all
18
+ # post_policy = units.find { |u| u.identifier == "PostPolicy" }
19
+ #
20
+ class PunditExtractor
21
+ include SharedUtilityMethods
22
+ include SharedDependencyScanner
23
+
24
+ # Directories to scan for Pundit policies
25
+ PUNDIT_DIRECTORIES = %w[
26
+ app/policies
27
+ ].freeze
28
+
29
+ # Standard Pundit action methods
30
+ PUNDIT_ACTIONS = %w[index? show? create? new? update? edit? destroy?].freeze
31
+
32
+ def initialize
33
+ @directories = PUNDIT_DIRECTORIES.map { |d| Rails.root.join(d) }
34
+ .select(&:directory?)
35
+ end
36
+
37
+ # Extract all Pundit policy classes
38
+ #
39
+ # @return [Array<ExtractedUnit>] List of Pundit policy units
40
+ def extract_all
41
+ @directories.flat_map do |dir|
42
+ Dir[dir.join('**/*.rb')].filter_map do |file|
43
+ extract_pundit_file(file)
44
+ end
45
+ end
46
+ end
47
+
48
+ # Extract a single Pundit policy file
49
+ #
50
+ # @param file_path [String] Path to the policy file
51
+ # @return [ExtractedUnit, nil] The extracted unit or nil if not a Pundit policy
52
+ def extract_pundit_file(file_path)
53
+ source = File.read(file_path)
54
+ class_name = extract_class_name(file_path, source)
55
+
56
+ return nil unless class_name
57
+ return nil unless pundit_policy?(source)
58
+
59
+ unit = ExtractedUnit.new(
60
+ type: :pundit_policy,
61
+ identifier: class_name,
62
+ file_path: file_path
63
+ )
64
+
65
+ unit.namespace = extract_namespace(class_name)
66
+ unit.source_code = annotate_source(source, class_name)
67
+ unit.metadata = extract_metadata(source, class_name)
68
+ unit.dependencies = extract_dependencies(source, class_name)
69
+
70
+ unit
71
+ rescue StandardError => e
72
+ Rails.logger.error("Failed to extract Pundit policy #{file_path}: #{e.message}")
73
+ nil
74
+ end
75
+
76
+ private
77
+
78
+ # ──────────────────────────────────────────────────────────────────────
79
+ # Class Discovery
80
+ # ──────────────────────────────────────────────────────────────────────
81
+
82
+ # Extract class name from source or infer from file path.
83
+ #
84
+ # @param file_path [String]
85
+ # @param source [String]
86
+ # @return [String, nil]
87
+ def extract_class_name(file_path, source)
88
+ return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
89
+
90
+ file_path
91
+ .sub("#{Rails.root}/", '')
92
+ .sub(%r{^app/policies/}, '')
93
+ .sub('.rb', '')
94
+ .split('/')
95
+ .map { |s| s.split('_').map(&:capitalize).join }
96
+ .join('::')
97
+ end
98
+
99
+ # Detect whether this is a Pundit policy.
100
+ #
101
+ # @param source [String] Ruby source code
102
+ # @return [Boolean]
103
+ def pundit_policy?(source)
104
+ source.match?(/< ApplicationPolicy/) ||
105
+ (source.match?(/attr_reader\s+:user/) && source.match?(/attr_reader.*:record/)) ||
106
+ (source.match?(/def\s+initialize\s*\(\s*user\s*,/) && source.match?(/def\s+\w+\?/))
107
+ end
108
+
109
+ # ──────────────────────────────────────────────────────────────────────
110
+ # Source Annotation
111
+ # ──────────────────────────────────────────────────────────────────────
112
+
113
+ # @param source [String]
114
+ # @param class_name [String]
115
+ # @return [String]
116
+ def annotate_source(source, class_name)
117
+ model = infer_model(class_name)
118
+ actions = detect_authorization_actions(source)
119
+
120
+ <<~ANNOTATION
121
+ # ╔═══════════════════════════════════════════════════════════════════════╗
122
+ # ║ Pundit Policy: #{class_name.ljust(53)}║
123
+ # ║ Model: #{model.to_s.ljust(61)}║
124
+ # ║ Actions: #{actions.join(', ').ljust(59)}║
125
+ # ╚═══════════════════════════════════════════════════════════════════════╝
126
+
127
+ #{source}
128
+ ANNOTATION
129
+ end
130
+
131
+ # ──────────────────────────────────────────────────────────────────────
132
+ # Metadata Extraction
133
+ # ──────────────────────────────────────────────────────────────────────
134
+
135
+ # @param source [String]
136
+ # @param class_name [String]
137
+ # @return [Hash]
138
+ def extract_metadata(source, class_name)
139
+ actions = detect_authorization_actions(source)
140
+ {
141
+ model: infer_model(class_name),
142
+ authorization_actions: actions,
143
+ standard_actions: actions & PUNDIT_ACTIONS,
144
+ custom_actions: actions - PUNDIT_ACTIONS,
145
+ has_scope_class: source.match?(/class\s+Scope\b/) || false,
146
+ inherits_application_policy: source.match?(/< ApplicationPolicy/) || false,
147
+ public_methods: extract_public_methods(source),
148
+ class_methods: extract_class_methods(source),
149
+ loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') },
150
+ method_count: source.scan(/def\s+(?:self\.)?\w+/).size
151
+ }
152
+ end
153
+
154
+ # Detect authorization action methods (public methods ending in ?).
155
+ #
156
+ # @param source [String]
157
+ # @return [Array<String>]
158
+ def detect_authorization_actions(source)
159
+ methods = []
160
+ in_private = false
161
+ in_protected = false
162
+ in_scope_class = false
163
+ scope_depth = 0
164
+
165
+ source.each_line do |line|
166
+ stripped = line.strip
167
+
168
+ # Track Scope inner class
169
+ if stripped =~ /class\s+Scope\b/
170
+ in_scope_class = true
171
+ scope_depth = 0
172
+ end
173
+ if in_scope_class
174
+ scope_depth += stripped.scan(/\b(class|module|do)\b/).size
175
+ scope_depth -= stripped.scan(/\bend\b/).size
176
+ if scope_depth <= 0
177
+ in_scope_class = false
178
+ next
179
+ end
180
+ next
181
+ end
182
+
183
+ in_private = true if stripped == 'private'
184
+ in_protected = true if stripped == 'protected'
185
+ in_private = false if stripped == 'public'
186
+ in_protected = false if stripped == 'public'
187
+
188
+ next if in_private || in_protected
189
+
190
+ methods << ::Regexp.last_match(1) if stripped =~ /def\s+(\w+\?)/
191
+ end
192
+
193
+ methods.uniq
194
+ end
195
+
196
+ # Infer the model name from the policy class name.
197
+ #
198
+ # @param class_name [String]
199
+ # @return [String]
200
+ def infer_model(class_name)
201
+ stripped = class_name.split('::').last
202
+ stripped.sub(/Policy\z/, '')
203
+ end
204
+
205
+ # ──────────────────────────────────────────────────────────────────────
206
+ # Dependency Extraction
207
+ # ──────────────────────────────────────────────────────────────────────
208
+
209
+ # @param source [String]
210
+ # @param class_name [String]
211
+ # @return [Array<Hash>]
212
+ def extract_dependencies(source, class_name)
213
+ model = infer_model(class_name)
214
+ deps = [{ type: :model, target: model, via: :authorization }]
215
+
216
+ deps.concat(scan_model_dependencies(source))
217
+ deps.concat(scan_service_dependencies(source))
218
+
219
+ deps.uniq { |d| [d[:type], d[:target]] }
220
+ end
221
+ end
222
+ end
223
+ end