codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,246 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'shared_utility_methods'
|
|
4
|
+
require_relative 'shared_dependency_scanner'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Extractors
|
|
8
|
+
# PoroExtractor handles plain Ruby object extraction from app/models/.
|
|
9
|
+
#
|
|
10
|
+
# Scans app/models/ for Ruby files that define classes which are NOT
|
|
11
|
+
# ActiveRecord descendants (those are handled by ModelExtractor). Captures
|
|
12
|
+
# value objects, form objects, CurrentAttributes subclasses, Struct.new
|
|
13
|
+
# wrappers, and any other non-AR class living alongside AR models.
|
|
14
|
+
#
|
|
15
|
+
# Files under app/models/concerns/ are excluded — those are handled by
|
|
16
|
+
# ConcernExtractor. Module-only files are also excluded.
|
|
17
|
+
#
|
|
18
|
+
# @example
|
|
19
|
+
# extractor = PoroExtractor.new
|
|
20
|
+
# units = extractor.extract_all
|
|
21
|
+
# money = units.find { |u| u.identifier == "Money" }
|
|
22
|
+
# money.metadata[:parent_class] # => nil
|
|
23
|
+
# money.metadata[:method_count] # => 3
|
|
24
|
+
#
|
|
25
|
+
class PoroExtractor
|
|
26
|
+
include SharedUtilityMethods
|
|
27
|
+
include SharedDependencyScanner
|
|
28
|
+
|
|
29
|
+
# Glob pattern for all Ruby files in app/models/ (recursive).
|
|
30
|
+
MODELS_GLOB = 'app/models/**/*.rb'
|
|
31
|
+
|
|
32
|
+
# Subdirectory to exclude — handled by ConcernExtractor.
|
|
33
|
+
CONCERNS_SEGMENT = '/concerns/'
|
|
34
|
+
|
|
35
|
+
def initialize
|
|
36
|
+
@models_dir = Rails.root.join('app/models')
|
|
37
|
+
end
|
|
38
|
+
|
|
39
|
+
# Extract all PORO units from app/models/.
|
|
40
|
+
#
|
|
41
|
+
# Filters out ActiveRecord descendants by name so we don't duplicate
|
|
42
|
+
# what ModelExtractor already produces. Concerns/ subdir is also skipped.
|
|
43
|
+
#
|
|
44
|
+
# @return [Array<ExtractedUnit>] List of PORO units
|
|
45
|
+
def extract_all
|
|
46
|
+
return [] unless @models_dir.directory?
|
|
47
|
+
|
|
48
|
+
ar_names = ActiveRecord::Base.descendants.filter_map(&:name).to_set
|
|
49
|
+
|
|
50
|
+
Dir[Rails.root.join(MODELS_GLOB)].filter_map do |file|
|
|
51
|
+
next if file.include?(CONCERNS_SEGMENT)
|
|
52
|
+
|
|
53
|
+
extract_poro_file(file, ar_names: ar_names)
|
|
54
|
+
end
|
|
55
|
+
end
|
|
56
|
+
|
|
57
|
+
# Extract a single PORO file.
|
|
58
|
+
#
|
|
59
|
+
# Returns nil if the file is not a PORO (e.g., module-only, no class
|
|
60
|
+
# or PORO pattern found, or the inferred class is an AR descendant).
|
|
61
|
+
#
|
|
62
|
+
# @param file_path [String] Absolute path to the Ruby file
|
|
63
|
+
# @param ar_names [Set<String>] Set of AR descendant names to skip
|
|
64
|
+
# @return [ExtractedUnit, nil] The extracted unit or nil
|
|
65
|
+
def extract_poro_file(file_path, ar_names: Set.new)
|
|
66
|
+
source = File.read(file_path)
|
|
67
|
+
|
|
68
|
+
return nil unless poro_file?(source)
|
|
69
|
+
return nil if module_only?(source)
|
|
70
|
+
|
|
71
|
+
class_name = infer_class_name(file_path, source)
|
|
72
|
+
return nil unless class_name
|
|
73
|
+
return nil if ar_names.include?(class_name)
|
|
74
|
+
|
|
75
|
+
unit = ExtractedUnit.new(
|
|
76
|
+
type: :poro,
|
|
77
|
+
identifier: class_name,
|
|
78
|
+
file_path: file_path
|
|
79
|
+
)
|
|
80
|
+
|
|
81
|
+
unit.namespace = extract_namespace(class_name)
|
|
82
|
+
unit.source_code = annotate_source(source, class_name)
|
|
83
|
+
unit.metadata = extract_metadata(source, class_name)
|
|
84
|
+
unit.dependencies = extract_dependencies(source)
|
|
85
|
+
|
|
86
|
+
unit
|
|
87
|
+
rescue StandardError => e
|
|
88
|
+
Rails.logger.error("Failed to extract PORO #{file_path}: #{e.message}")
|
|
89
|
+
nil
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
private
|
|
93
|
+
|
|
94
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
95
|
+
# File Classification
|
|
96
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
97
|
+
|
|
98
|
+
# Determine whether a file is worth examining as a PORO.
|
|
99
|
+
#
|
|
100
|
+
# A file qualifies if it contains a class definition OR uses one of the
|
|
101
|
+
# common PORO-without-class patterns (Struct.new, Data.define).
|
|
102
|
+
# Plain constant assignments and module-only files are excluded upstream.
|
|
103
|
+
#
|
|
104
|
+
# @param source [String] Ruby source code
|
|
105
|
+
# @return [Boolean]
|
|
106
|
+
def poro_file?(source)
|
|
107
|
+
source.match?(/^\s*class\s+/) ||
|
|
108
|
+
source.match?(/\bStruct\.new\b/) ||
|
|
109
|
+
source.match?(/\bData\.define\b/)
|
|
110
|
+
end
|
|
111
|
+
|
|
112
|
+
# Return true when the file defines only modules, no class keyword.
|
|
113
|
+
#
|
|
114
|
+
# @param source [String] Ruby source code
|
|
115
|
+
# @return [Boolean]
|
|
116
|
+
def module_only?(source)
|
|
117
|
+
source.match?(/^\s*module\s+\w+/) && !source.match?(/^\s*class\s+/)
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
121
|
+
# Class Name Inference
|
|
122
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
123
|
+
|
|
124
|
+
# Infer the primary class name from source or fall back to file path.
|
|
125
|
+
#
|
|
126
|
+
# For regular class definitions we parse the first `class Foo` line,
|
|
127
|
+
# joining outer module namespaces when present. For Struct.new / Data.define
|
|
128
|
+
# patterns we read the constant assignment name. Falls back to the
|
|
129
|
+
# Rails camelize convention on the relative path.
|
|
130
|
+
#
|
|
131
|
+
# @param file_path [String] Absolute path to the file
|
|
132
|
+
# @param source [String] Ruby source code
|
|
133
|
+
# @return [String, nil] The inferred class name
|
|
134
|
+
def infer_class_name(file_path, source)
|
|
135
|
+
# Explicit class keyword — combine outer module namespaces + class name
|
|
136
|
+
class_match = source.match(/^\s*class\s+([\w:]+)/)
|
|
137
|
+
if class_match
|
|
138
|
+
base = class_match[1]
|
|
139
|
+
# If already fully qualified (e.g., Order::Update), use as-is
|
|
140
|
+
return base if base.include?('::')
|
|
141
|
+
|
|
142
|
+
namespaces = source.scan(/^\s*module\s+([\w:]+)/).flatten
|
|
143
|
+
return namespaces.any? ? "#{namespaces.join('::')}::#{base}" : base
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# Struct.new / Data.define: ConstantName = Struct.new(...)
|
|
147
|
+
struct_match = source.match(/^(\w[\w:]*)\s*=\s*(?:Struct\.new|Data\.define)/)
|
|
148
|
+
return struct_match[1] if struct_match
|
|
149
|
+
|
|
150
|
+
# Fall back: derive from file path using Rails naming convention
|
|
151
|
+
path_based_class_name(file_path)
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Derive a class name from a file path using Rails camelize convention.
|
|
155
|
+
#
|
|
156
|
+
# app/models/order/update.rb => Order::Update
|
|
157
|
+
# app/models/money.rb => Money
|
|
158
|
+
#
|
|
159
|
+
# @param file_path [String] Absolute path to the file
|
|
160
|
+
# @return [String] Camelize-derived class name
|
|
161
|
+
def path_based_class_name(file_path)
|
|
162
|
+
relative = file_path.sub("#{Rails.root}/", '')
|
|
163
|
+
relative
|
|
164
|
+
.sub(%r{^app/models/}, '')
|
|
165
|
+
.sub('.rb', '')
|
|
166
|
+
.split('/')
|
|
167
|
+
.map(&:camelize)
|
|
168
|
+
.join('::')
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
172
|
+
# Source Annotation
|
|
173
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
174
|
+
|
|
175
|
+
# Prepend a summary annotation header to the source.
|
|
176
|
+
#
|
|
177
|
+
# @param source [String] Ruby source code
|
|
178
|
+
# @param class_name [String] The class name
|
|
179
|
+
# @return [String] Annotated source
|
|
180
|
+
def annotate_source(source, class_name)
|
|
181
|
+
parent = extract_parent_class(source)
|
|
182
|
+
parent_label = parent || 'none'
|
|
183
|
+
|
|
184
|
+
annotation = <<~ANNOTATION
|
|
185
|
+
# ╔═══════════════════════════════════════════════════════════════════════╗
|
|
186
|
+
# ║ PORO: #{class_name.ljust(63)}║
|
|
187
|
+
# ║ Parent: #{parent_label.ljust(61)}║
|
|
188
|
+
# ╚═══════════════════════════════════════════════════════════════════════╝
|
|
189
|
+
|
|
190
|
+
ANNOTATION
|
|
191
|
+
|
|
192
|
+
annotation + source
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
196
|
+
# Metadata Extraction
|
|
197
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
198
|
+
|
|
199
|
+
# Build the metadata hash for a PORO unit.
|
|
200
|
+
#
|
|
201
|
+
# @param source [String] Ruby source code
|
|
202
|
+
# @param class_name [String] The class name
|
|
203
|
+
# @return [Hash] PORO metadata
|
|
204
|
+
def extract_metadata(source, _class_name)
|
|
205
|
+
{
|
|
206
|
+
public_methods: extract_public_methods(source),
|
|
207
|
+
class_methods: extract_class_methods(source),
|
|
208
|
+
initialize_params: extract_initialize_params(source),
|
|
209
|
+
parent_class: extract_parent_class(source),
|
|
210
|
+
loc: count_loc(source),
|
|
211
|
+
method_count: source.scan(/def\s+(?:self\.)?\w+/).size
|
|
212
|
+
}
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Extract the parent class name from a class definition.
|
|
216
|
+
#
|
|
217
|
+
# @param source [String] Ruby source code
|
|
218
|
+
# @return [String, nil] Parent class name or nil
|
|
219
|
+
def extract_parent_class(source)
|
|
220
|
+
match = source.match(/^\s*class\s+[\w:]+\s*<\s*([\w:]+)/)
|
|
221
|
+
match ? match[1] : nil
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
# Count non-blank, non-comment lines of code.
|
|
225
|
+
#
|
|
226
|
+
# @param source [String] Ruby source code
|
|
227
|
+
# @return [Integer] LOC count
|
|
228
|
+
def count_loc(source)
|
|
229
|
+
source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
233
|
+
# Dependency Extraction
|
|
234
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
235
|
+
|
|
236
|
+
# Build the dependency array for a PORO unit using common scanners.
|
|
237
|
+
#
|
|
238
|
+
# @param source [String] Ruby source code
|
|
239
|
+
# @return [Array<Hash>] Dependency hashes with :type, :target, :via
|
|
240
|
+
def extract_dependencies(source)
|
|
241
|
+
deps = scan_common_dependencies(source)
|
|
242
|
+
deps.uniq { |d| [d[:type], d[:target]] }
|
|
243
|
+
end
|
|
244
|
+
end
|
|
245
|
+
end
|
|
246
|
+
end
|
|
@@ -0,0 +1,223 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'shared_utility_methods'
|
|
4
|
+
require_relative 'shared_dependency_scanner'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Extractors
|
|
8
|
+
# PunditExtractor handles Pundit authorization policy extraction.
|
|
9
|
+
#
|
|
10
|
+
# Specifically targets Pundit convention: classes in `app/policies/`
|
|
11
|
+
# that inherit from ApplicationPolicy or follow Pundit patterns
|
|
12
|
+
# (user/record attrs, action? methods). This is distinct from the
|
|
13
|
+
# generic PolicyExtractor which handles domain eligibility policies.
|
|
14
|
+
#
|
|
15
|
+
# @example
|
|
16
|
+
# extractor = PunditExtractor.new
|
|
17
|
+
# units = extractor.extract_all
|
|
18
|
+
# post_policy = units.find { |u| u.identifier == "PostPolicy" }
|
|
19
|
+
#
|
|
20
|
+
class PunditExtractor
|
|
21
|
+
include SharedUtilityMethods
|
|
22
|
+
include SharedDependencyScanner
|
|
23
|
+
|
|
24
|
+
# Directories to scan for Pundit policies
|
|
25
|
+
PUNDIT_DIRECTORIES = %w[
|
|
26
|
+
app/policies
|
|
27
|
+
].freeze
|
|
28
|
+
|
|
29
|
+
# Standard Pundit action methods
|
|
30
|
+
PUNDIT_ACTIONS = %w[index? show? create? new? update? edit? destroy?].freeze
|
|
31
|
+
|
|
32
|
+
def initialize
|
|
33
|
+
@directories = PUNDIT_DIRECTORIES.map { |d| Rails.root.join(d) }
|
|
34
|
+
.select(&:directory?)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
# Extract all Pundit policy classes
|
|
38
|
+
#
|
|
39
|
+
# @return [Array<ExtractedUnit>] List of Pundit policy units
|
|
40
|
+
def extract_all
|
|
41
|
+
@directories.flat_map do |dir|
|
|
42
|
+
Dir[dir.join('**/*.rb')].filter_map do |file|
|
|
43
|
+
extract_pundit_file(file)
|
|
44
|
+
end
|
|
45
|
+
end
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Extract a single Pundit policy file
|
|
49
|
+
#
|
|
50
|
+
# @param file_path [String] Path to the policy file
|
|
51
|
+
# @return [ExtractedUnit, nil] The extracted unit or nil if not a Pundit policy
|
|
52
|
+
def extract_pundit_file(file_path)
|
|
53
|
+
source = File.read(file_path)
|
|
54
|
+
class_name = extract_class_name(file_path, source)
|
|
55
|
+
|
|
56
|
+
return nil unless class_name
|
|
57
|
+
return nil unless pundit_policy?(source)
|
|
58
|
+
|
|
59
|
+
unit = ExtractedUnit.new(
|
|
60
|
+
type: :pundit_policy,
|
|
61
|
+
identifier: class_name,
|
|
62
|
+
file_path: file_path
|
|
63
|
+
)
|
|
64
|
+
|
|
65
|
+
unit.namespace = extract_namespace(class_name)
|
|
66
|
+
unit.source_code = annotate_source(source, class_name)
|
|
67
|
+
unit.metadata = extract_metadata(source, class_name)
|
|
68
|
+
unit.dependencies = extract_dependencies(source, class_name)
|
|
69
|
+
|
|
70
|
+
unit
|
|
71
|
+
rescue StandardError => e
|
|
72
|
+
Rails.logger.error("Failed to extract Pundit policy #{file_path}: #{e.message}")
|
|
73
|
+
nil
|
|
74
|
+
end
|
|
75
|
+
|
|
76
|
+
private
|
|
77
|
+
|
|
78
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
79
|
+
# Class Discovery
|
|
80
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
81
|
+
|
|
82
|
+
# Extract class name from source or infer from file path.
|
|
83
|
+
#
|
|
84
|
+
# @param file_path [String]
|
|
85
|
+
# @param source [String]
|
|
86
|
+
# @return [String, nil]
|
|
87
|
+
def extract_class_name(file_path, source)
|
|
88
|
+
return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
|
|
89
|
+
|
|
90
|
+
file_path
|
|
91
|
+
.sub("#{Rails.root}/", '')
|
|
92
|
+
.sub(%r{^app/policies/}, '')
|
|
93
|
+
.sub('.rb', '')
|
|
94
|
+
.split('/')
|
|
95
|
+
.map { |s| s.split('_').map(&:capitalize).join }
|
|
96
|
+
.join('::')
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
# Detect whether this is a Pundit policy.
|
|
100
|
+
#
|
|
101
|
+
# @param source [String] Ruby source code
|
|
102
|
+
# @return [Boolean]
|
|
103
|
+
def pundit_policy?(source)
|
|
104
|
+
source.match?(/< ApplicationPolicy/) ||
|
|
105
|
+
(source.match?(/attr_reader\s+:user/) && source.match?(/attr_reader.*:record/)) ||
|
|
106
|
+
(source.match?(/def\s+initialize\s*\(\s*user\s*,/) && source.match?(/def\s+\w+\?/))
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
110
|
+
# Source Annotation
|
|
111
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
112
|
+
|
|
113
|
+
# @param source [String]
|
|
114
|
+
# @param class_name [String]
|
|
115
|
+
# @return [String]
|
|
116
|
+
def annotate_source(source, class_name)
|
|
117
|
+
model = infer_model(class_name)
|
|
118
|
+
actions = detect_authorization_actions(source)
|
|
119
|
+
|
|
120
|
+
<<~ANNOTATION
|
|
121
|
+
# ╔═══════════════════════════════════════════════════════════════════════╗
|
|
122
|
+
# ║ Pundit Policy: #{class_name.ljust(53)}║
|
|
123
|
+
# ║ Model: #{model.to_s.ljust(61)}║
|
|
124
|
+
# ║ Actions: #{actions.join(', ').ljust(59)}║
|
|
125
|
+
# ╚═══════════════════════════════════════════════════════════════════════╝
|
|
126
|
+
|
|
127
|
+
#{source}
|
|
128
|
+
ANNOTATION
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
132
|
+
# Metadata Extraction
|
|
133
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
134
|
+
|
|
135
|
+
# @param source [String]
|
|
136
|
+
# @param class_name [String]
|
|
137
|
+
# @return [Hash]
|
|
138
|
+
def extract_metadata(source, class_name)
|
|
139
|
+
actions = detect_authorization_actions(source)
|
|
140
|
+
{
|
|
141
|
+
model: infer_model(class_name),
|
|
142
|
+
authorization_actions: actions,
|
|
143
|
+
standard_actions: actions & PUNDIT_ACTIONS,
|
|
144
|
+
custom_actions: actions - PUNDIT_ACTIONS,
|
|
145
|
+
has_scope_class: source.match?(/class\s+Scope\b/) || false,
|
|
146
|
+
inherits_application_policy: source.match?(/< ApplicationPolicy/) || false,
|
|
147
|
+
public_methods: extract_public_methods(source),
|
|
148
|
+
class_methods: extract_class_methods(source),
|
|
149
|
+
loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') },
|
|
150
|
+
method_count: source.scan(/def\s+(?:self\.)?\w+/).size
|
|
151
|
+
}
|
|
152
|
+
end
|
|
153
|
+
|
|
154
|
+
# Detect authorization action methods (public methods ending in ?).
|
|
155
|
+
#
|
|
156
|
+
# @param source [String]
|
|
157
|
+
# @return [Array<String>]
|
|
158
|
+
def detect_authorization_actions(source)
|
|
159
|
+
methods = []
|
|
160
|
+
in_private = false
|
|
161
|
+
in_protected = false
|
|
162
|
+
in_scope_class = false
|
|
163
|
+
scope_depth = 0
|
|
164
|
+
|
|
165
|
+
source.each_line do |line|
|
|
166
|
+
stripped = line.strip
|
|
167
|
+
|
|
168
|
+
# Track Scope inner class
|
|
169
|
+
if stripped =~ /class\s+Scope\b/
|
|
170
|
+
in_scope_class = true
|
|
171
|
+
scope_depth = 0
|
|
172
|
+
end
|
|
173
|
+
if in_scope_class
|
|
174
|
+
scope_depth += stripped.scan(/\b(class|module|do)\b/).size
|
|
175
|
+
scope_depth -= stripped.scan(/\bend\b/).size
|
|
176
|
+
if scope_depth <= 0
|
|
177
|
+
in_scope_class = false
|
|
178
|
+
next
|
|
179
|
+
end
|
|
180
|
+
next
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
in_private = true if stripped == 'private'
|
|
184
|
+
in_protected = true if stripped == 'protected'
|
|
185
|
+
in_private = false if stripped == 'public'
|
|
186
|
+
in_protected = false if stripped == 'public'
|
|
187
|
+
|
|
188
|
+
next if in_private || in_protected
|
|
189
|
+
|
|
190
|
+
methods << ::Regexp.last_match(1) if stripped =~ /def\s+(\w+\?)/
|
|
191
|
+
end
|
|
192
|
+
|
|
193
|
+
methods.uniq
|
|
194
|
+
end
|
|
195
|
+
|
|
196
|
+
# Infer the model name from the policy class name.
|
|
197
|
+
#
|
|
198
|
+
# @param class_name [String]
|
|
199
|
+
# @return [String]
|
|
200
|
+
def infer_model(class_name)
|
|
201
|
+
stripped = class_name.split('::').last
|
|
202
|
+
stripped.sub(/Policy\z/, '')
|
|
203
|
+
end
|
|
204
|
+
|
|
205
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
206
|
+
# Dependency Extraction
|
|
207
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
208
|
+
|
|
209
|
+
# @param source [String]
|
|
210
|
+
# @param class_name [String]
|
|
211
|
+
# @return [Array<Hash>]
|
|
212
|
+
def extract_dependencies(source, class_name)
|
|
213
|
+
model = infer_model(class_name)
|
|
214
|
+
deps = [{ type: :model, target: model, via: :authorization }]
|
|
215
|
+
|
|
216
|
+
deps.concat(scan_model_dependencies(source))
|
|
217
|
+
deps.concat(scan_service_dependencies(source))
|
|
218
|
+
|
|
219
|
+
deps.uniq { |d| [d[:type], d[:target]] }
|
|
220
|
+
end
|
|
221
|
+
end
|
|
222
|
+
end
|
|
223
|
+
end
|