codebase_index 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/CHANGELOG.md +29 -0
- data/CODE_OF_CONDUCT.md +83 -0
- data/CONTRIBUTING.md +65 -0
- data/LICENSE.txt +21 -0
- data/README.md +481 -0
- data/exe/codebase-console-mcp +22 -0
- data/exe/codebase-index-mcp +61 -0
- data/exe/codebase-index-mcp-http +64 -0
- data/exe/codebase-index-mcp-start +58 -0
- data/lib/codebase_index/ast/call_site_extractor.rb +106 -0
- data/lib/codebase_index/ast/method_extractor.rb +76 -0
- data/lib/codebase_index/ast/node.rb +88 -0
- data/lib/codebase_index/ast/parser.rb +653 -0
- data/lib/codebase_index/ast.rb +6 -0
- data/lib/codebase_index/builder.rb +137 -0
- data/lib/codebase_index/chunking/chunk.rb +84 -0
- data/lib/codebase_index/chunking/semantic_chunker.rb +290 -0
- data/lib/codebase_index/console/adapters/cache_adapter.rb +58 -0
- data/lib/codebase_index/console/adapters/good_job_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/sidekiq_adapter.rb +66 -0
- data/lib/codebase_index/console/adapters/solid_queue_adapter.rb +66 -0
- data/lib/codebase_index/console/audit_logger.rb +75 -0
- data/lib/codebase_index/console/bridge.rb +170 -0
- data/lib/codebase_index/console/confirmation.rb +90 -0
- data/lib/codebase_index/console/connection_manager.rb +173 -0
- data/lib/codebase_index/console/console_response_renderer.rb +78 -0
- data/lib/codebase_index/console/model_validator.rb +81 -0
- data/lib/codebase_index/console/safe_context.rb +82 -0
- data/lib/codebase_index/console/server.rb +557 -0
- data/lib/codebase_index/console/sql_validator.rb +172 -0
- data/lib/codebase_index/console/tools/tier1.rb +118 -0
- data/lib/codebase_index/console/tools/tier2.rb +117 -0
- data/lib/codebase_index/console/tools/tier3.rb +110 -0
- data/lib/codebase_index/console/tools/tier4.rb +79 -0
- data/lib/codebase_index/coordination/pipeline_lock.rb +109 -0
- data/lib/codebase_index/cost_model/embedding_cost.rb +88 -0
- data/lib/codebase_index/cost_model/estimator.rb +128 -0
- data/lib/codebase_index/cost_model/provider_pricing.rb +67 -0
- data/lib/codebase_index/cost_model/storage_cost.rb +52 -0
- data/lib/codebase_index/cost_model.rb +22 -0
- data/lib/codebase_index/db/migrations/001_create_units.rb +38 -0
- data/lib/codebase_index/db/migrations/002_create_edges.rb +35 -0
- data/lib/codebase_index/db/migrations/003_create_embeddings.rb +37 -0
- data/lib/codebase_index/db/migrations/004_create_snapshots.rb +45 -0
- data/lib/codebase_index/db/migrations/005_create_snapshot_units.rb +40 -0
- data/lib/codebase_index/db/migrator.rb +71 -0
- data/lib/codebase_index/db/schema_version.rb +73 -0
- data/lib/codebase_index/dependency_graph.rb +227 -0
- data/lib/codebase_index/embedding/indexer.rb +130 -0
- data/lib/codebase_index/embedding/openai.rb +105 -0
- data/lib/codebase_index/embedding/provider.rb +135 -0
- data/lib/codebase_index/embedding/text_preparer.rb +112 -0
- data/lib/codebase_index/evaluation/baseline_runner.rb +115 -0
- data/lib/codebase_index/evaluation/evaluator.rb +146 -0
- data/lib/codebase_index/evaluation/metrics.rb +79 -0
- data/lib/codebase_index/evaluation/query_set.rb +148 -0
- data/lib/codebase_index/evaluation/report_generator.rb +90 -0
- data/lib/codebase_index/extracted_unit.rb +145 -0
- data/lib/codebase_index/extractor.rb +956 -0
- data/lib/codebase_index/extractors/action_cable_extractor.rb +228 -0
- data/lib/codebase_index/extractors/ast_source_extraction.rb +46 -0
- data/lib/codebase_index/extractors/behavioral_profile.rb +309 -0
- data/lib/codebase_index/extractors/caching_extractor.rb +261 -0
- data/lib/codebase_index/extractors/callback_analyzer.rb +232 -0
- data/lib/codebase_index/extractors/concern_extractor.rb +253 -0
- data/lib/codebase_index/extractors/configuration_extractor.rb +219 -0
- data/lib/codebase_index/extractors/controller_extractor.rb +494 -0
- data/lib/codebase_index/extractors/database_view_extractor.rb +278 -0
- data/lib/codebase_index/extractors/decorator_extractor.rb +260 -0
- data/lib/codebase_index/extractors/engine_extractor.rb +204 -0
- data/lib/codebase_index/extractors/event_extractor.rb +211 -0
- data/lib/codebase_index/extractors/factory_extractor.rb +289 -0
- data/lib/codebase_index/extractors/graphql_extractor.rb +917 -0
- data/lib/codebase_index/extractors/i18n_extractor.rb +117 -0
- data/lib/codebase_index/extractors/job_extractor.rb +369 -0
- data/lib/codebase_index/extractors/lib_extractor.rb +249 -0
- data/lib/codebase_index/extractors/mailer_extractor.rb +339 -0
- data/lib/codebase_index/extractors/manager_extractor.rb +202 -0
- data/lib/codebase_index/extractors/middleware_extractor.rb +133 -0
- data/lib/codebase_index/extractors/migration_extractor.rb +469 -0
- data/lib/codebase_index/extractors/model_extractor.rb +960 -0
- data/lib/codebase_index/extractors/phlex_extractor.rb +252 -0
- data/lib/codebase_index/extractors/policy_extractor.rb +214 -0
- data/lib/codebase_index/extractors/poro_extractor.rb +246 -0
- data/lib/codebase_index/extractors/pundit_extractor.rb +223 -0
- data/lib/codebase_index/extractors/rails_source_extractor.rb +473 -0
- data/lib/codebase_index/extractors/rake_task_extractor.rb +343 -0
- data/lib/codebase_index/extractors/route_extractor.rb +181 -0
- data/lib/codebase_index/extractors/scheduled_job_extractor.rb +331 -0
- data/lib/codebase_index/extractors/serializer_extractor.rb +334 -0
- data/lib/codebase_index/extractors/service_extractor.rb +254 -0
- data/lib/codebase_index/extractors/shared_dependency_scanner.rb +91 -0
- data/lib/codebase_index/extractors/shared_utility_methods.rb +99 -0
- data/lib/codebase_index/extractors/state_machine_extractor.rb +398 -0
- data/lib/codebase_index/extractors/test_mapping_extractor.rb +225 -0
- data/lib/codebase_index/extractors/validator_extractor.rb +225 -0
- data/lib/codebase_index/extractors/view_component_extractor.rb +310 -0
- data/lib/codebase_index/extractors/view_template_extractor.rb +261 -0
- data/lib/codebase_index/feedback/gap_detector.rb +89 -0
- data/lib/codebase_index/feedback/store.rb +119 -0
- data/lib/codebase_index/flow_analysis/operation_extractor.rb +209 -0
- data/lib/codebase_index/flow_analysis/response_code_mapper.rb +154 -0
- data/lib/codebase_index/flow_assembler.rb +290 -0
- data/lib/codebase_index/flow_document.rb +191 -0
- data/lib/codebase_index/flow_precomputer.rb +102 -0
- data/lib/codebase_index/formatting/base.rb +40 -0
- data/lib/codebase_index/formatting/claude_adapter.rb +98 -0
- data/lib/codebase_index/formatting/generic_adapter.rb +56 -0
- data/lib/codebase_index/formatting/gpt_adapter.rb +64 -0
- data/lib/codebase_index/formatting/human_adapter.rb +78 -0
- data/lib/codebase_index/graph_analyzer.rb +374 -0
- data/lib/codebase_index/mcp/index_reader.rb +394 -0
- data/lib/codebase_index/mcp/renderers/claude_renderer.rb +81 -0
- data/lib/codebase_index/mcp/renderers/json_renderer.rb +17 -0
- data/lib/codebase_index/mcp/renderers/markdown_renderer.rb +352 -0
- data/lib/codebase_index/mcp/renderers/plain_renderer.rb +240 -0
- data/lib/codebase_index/mcp/server.rb +935 -0
- data/lib/codebase_index/mcp/tool_response_renderer.rb +62 -0
- data/lib/codebase_index/model_name_cache.rb +51 -0
- data/lib/codebase_index/notion/client.rb +217 -0
- data/lib/codebase_index/notion/exporter.rb +219 -0
- data/lib/codebase_index/notion/mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/column_mapper.rb +65 -0
- data/lib/codebase_index/notion/mappers/migration_mapper.rb +39 -0
- data/lib/codebase_index/notion/mappers/model_mapper.rb +164 -0
- data/lib/codebase_index/notion/rate_limiter.rb +68 -0
- data/lib/codebase_index/observability/health_check.rb +81 -0
- data/lib/codebase_index/observability/instrumentation.rb +34 -0
- data/lib/codebase_index/observability/structured_logger.rb +75 -0
- data/lib/codebase_index/operator/error_escalator.rb +81 -0
- data/lib/codebase_index/operator/pipeline_guard.rb +99 -0
- data/lib/codebase_index/operator/status_reporter.rb +80 -0
- data/lib/codebase_index/railtie.rb +26 -0
- data/lib/codebase_index/resilience/circuit_breaker.rb +99 -0
- data/lib/codebase_index/resilience/index_validator.rb +185 -0
- data/lib/codebase_index/resilience/retryable_provider.rb +108 -0
- data/lib/codebase_index/retrieval/context_assembler.rb +249 -0
- data/lib/codebase_index/retrieval/query_classifier.rb +131 -0
- data/lib/codebase_index/retrieval/ranker.rb +273 -0
- data/lib/codebase_index/retrieval/search_executor.rb +327 -0
- data/lib/codebase_index/retriever.rb +160 -0
- data/lib/codebase_index/ruby_analyzer/class_analyzer.rb +190 -0
- data/lib/codebase_index/ruby_analyzer/dataflow_analyzer.rb +78 -0
- data/lib/codebase_index/ruby_analyzer/fqn_builder.rb +18 -0
- data/lib/codebase_index/ruby_analyzer/mermaid_renderer.rb +275 -0
- data/lib/codebase_index/ruby_analyzer/method_analyzer.rb +143 -0
- data/lib/codebase_index/ruby_analyzer/trace_enricher.rb +139 -0
- data/lib/codebase_index/ruby_analyzer.rb +87 -0
- data/lib/codebase_index/session_tracer/file_store.rb +111 -0
- data/lib/codebase_index/session_tracer/middleware.rb +143 -0
- data/lib/codebase_index/session_tracer/redis_store.rb +112 -0
- data/lib/codebase_index/session_tracer/session_flow_assembler.rb +263 -0
- data/lib/codebase_index/session_tracer/session_flow_document.rb +223 -0
- data/lib/codebase_index/session_tracer/solid_cache_store.rb +145 -0
- data/lib/codebase_index/session_tracer/store.rb +67 -0
- data/lib/codebase_index/storage/graph_store.rb +120 -0
- data/lib/codebase_index/storage/metadata_store.rb +169 -0
- data/lib/codebase_index/storage/pgvector.rb +163 -0
- data/lib/codebase_index/storage/qdrant.rb +172 -0
- data/lib/codebase_index/storage/vector_store.rb +156 -0
- data/lib/codebase_index/temporal/snapshot_store.rb +341 -0
- data/lib/codebase_index/version.rb +5 -0
- data/lib/codebase_index.rb +223 -0
- data/lib/generators/codebase_index/install_generator.rb +32 -0
- data/lib/generators/codebase_index/pgvector_generator.rb +37 -0
- data/lib/generators/codebase_index/templates/add_pgvector_to_codebase_index.rb.erb +15 -0
- data/lib/generators/codebase_index/templates/create_codebase_index_tables.rb.erb +43 -0
- data/lib/tasks/codebase_index.rake +583 -0
- data/lib/tasks/codebase_index_evaluation.rake +115 -0
- metadata +252 -0
|
@@ -0,0 +1,331 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require 'yaml'
|
|
4
|
+
|
|
5
|
+
module CodebaseIndex
|
|
6
|
+
module Extractors
|
|
7
|
+
# ScheduledJobExtractor handles scheduled/recurring job configuration extraction.
|
|
8
|
+
#
|
|
9
|
+
# Scans three schedule file formats to extract one unit per scheduled entry:
|
|
10
|
+
# - `config/recurring.yml` — Solid Queue recurring tasks
|
|
11
|
+
# - `config/sidekiq_cron.yml` — Sidekiq-Cron scheduled jobs
|
|
12
|
+
# - `config/schedule.rb` — Whenever DSL
|
|
13
|
+
#
|
|
14
|
+
# Each scheduled entry becomes its own ExtractedUnit with type `:scheduled_job`.
|
|
15
|
+
# Identifiers are prefixed with "scheduled:" to avoid collision with JobExtractor units.
|
|
16
|
+
#
|
|
17
|
+
# @example
|
|
18
|
+
# extractor = ScheduledJobExtractor.new
|
|
19
|
+
# units = extractor.extract_all
|
|
20
|
+
# cleanup = units.find { |u| u.identifier == "scheduled:periodic_cleanup" }
|
|
21
|
+
#
|
|
22
|
+
class ScheduledJobExtractor
|
|
23
|
+
# Schedule files to scan, mapped to their format
|
|
24
|
+
SCHEDULE_FILES = {
|
|
25
|
+
'config/recurring.yml' => :solid_queue,
|
|
26
|
+
'config/sidekiq_cron.yml' => :sidekiq_cron,
|
|
27
|
+
'config/schedule.rb' => :whenever
|
|
28
|
+
}.freeze
|
|
29
|
+
|
|
30
|
+
# Common cron patterns mapped to human-readable descriptions
|
|
31
|
+
CRON_HUMANIZE = {
|
|
32
|
+
'* * * * *' => 'every minute',
|
|
33
|
+
'0 * * * *' => 'every hour',
|
|
34
|
+
'0 0 * * *' => 'daily at midnight',
|
|
35
|
+
'0 0 * * 0' => 'weekly on Sunday',
|
|
36
|
+
'0 0 * * 1' => 'weekly on Monday',
|
|
37
|
+
'0 0 1 * *' => 'monthly on the 1st',
|
|
38
|
+
'0 0 1 1 *' => 'yearly on January 1st'
|
|
39
|
+
}.freeze
|
|
40
|
+
|
|
41
|
+
# Environment keys to unwrap when nested in YAML
|
|
42
|
+
ENVIRONMENT_KEYS = %w[production development test staging].freeze
|
|
43
|
+
|
|
44
|
+
def initialize
|
|
45
|
+
@schedule_files = SCHEDULE_FILES.each_with_object({}) do |(relative_path, format), hash|
|
|
46
|
+
full_path = Rails.root.join(relative_path)
|
|
47
|
+
hash[full_path.to_s] = format if File.exist?(full_path)
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
# Extract all scheduled job entries from all discovered schedule files.
|
|
52
|
+
#
|
|
53
|
+
# @return [Array<ExtractedUnit>] List of scheduled job units
|
|
54
|
+
def extract_all
|
|
55
|
+
@schedule_files.flat_map do |file_path, format|
|
|
56
|
+
extract_scheduled_job_file(file_path, format)
|
|
57
|
+
end
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
# Extract scheduled job entries from a single schedule file.
|
|
61
|
+
#
|
|
62
|
+
# Unlike other file-based extractors that return a single ExtractedUnit,
|
|
63
|
+
# this returns an Array because each schedule file contains multiple entries.
|
|
64
|
+
#
|
|
65
|
+
# @param file_path [String] Path to the schedule file
|
|
66
|
+
# @param format [Symbol, nil] One of :solid_queue, :sidekiq_cron, :whenever (inferred from filename if nil)
|
|
67
|
+
# @return [Array<ExtractedUnit>] List of scheduled job units
|
|
68
|
+
def extract_scheduled_job_file(file_path, format = nil)
|
|
69
|
+
format ||= infer_format(file_path)
|
|
70
|
+
case format
|
|
71
|
+
when :solid_queue, :sidekiq_cron
|
|
72
|
+
extract_yaml_schedule(file_path, format)
|
|
73
|
+
when :whenever
|
|
74
|
+
extract_whenever_schedule(file_path)
|
|
75
|
+
else
|
|
76
|
+
[]
|
|
77
|
+
end
|
|
78
|
+
rescue StandardError => e
|
|
79
|
+
Rails.logger.error("Failed to extract scheduled jobs from #{file_path}: #{e.message}")
|
|
80
|
+
[]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
private
|
|
84
|
+
|
|
85
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
86
|
+
# YAML-based formats (Solid Queue, Sidekiq-Cron)
|
|
87
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
88
|
+
|
|
89
|
+
# Parse a YAML schedule file and produce units.
|
|
90
|
+
#
|
|
91
|
+
# @param file_path [String] Path to the YAML file
|
|
92
|
+
# @param format [Symbol] :solid_queue or :sidekiq_cron
|
|
93
|
+
# @return [Array<ExtractedUnit>]
|
|
94
|
+
def extract_yaml_schedule(file_path, format)
|
|
95
|
+
source = File.read(file_path)
|
|
96
|
+
data = YAML.safe_load(source, permitted_classes: [Symbol])
|
|
97
|
+
|
|
98
|
+
return [] unless data.is_a?(Hash) && data.any?
|
|
99
|
+
|
|
100
|
+
entries = unwrap_environment_nesting(data)
|
|
101
|
+
return [] unless entries.is_a?(Hash)
|
|
102
|
+
|
|
103
|
+
entries.filter_map do |task_name, config|
|
|
104
|
+
next unless config.is_a?(Hash)
|
|
105
|
+
|
|
106
|
+
build_yaml_unit(task_name, config, file_path, source, format)
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
# Detect and unwrap environment-nested YAML.
|
|
111
|
+
#
|
|
112
|
+
# If the top-level keys are environment names (production, development, etc.),
|
|
113
|
+
# unwrap to the first environment's entries.
|
|
114
|
+
#
|
|
115
|
+
# @param data [Hash] Parsed YAML data
|
|
116
|
+
# @return [Hash] Unwrapped entries
|
|
117
|
+
def unwrap_environment_nesting(data)
|
|
118
|
+
if data.keys.all? { |k| ENVIRONMENT_KEYS.include?(k.to_s) }
|
|
119
|
+
data.values.first || {}
|
|
120
|
+
else
|
|
121
|
+
data
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
|
|
125
|
+
# Build an ExtractedUnit from a YAML schedule entry.
|
|
126
|
+
#
|
|
127
|
+
# @param task_name [String] The task/job name key
|
|
128
|
+
# @param config [Hash] The entry configuration
|
|
129
|
+
# @param file_path [String] Path to the schedule file
|
|
130
|
+
# @param source [String] Raw file content
|
|
131
|
+
# @param format [Symbol] :solid_queue or :sidekiq_cron
|
|
132
|
+
# @return [ExtractedUnit]
|
|
133
|
+
def build_yaml_unit(task_name, config, file_path, source, format)
|
|
134
|
+
job_class = config['class']
|
|
135
|
+
cron = extract_cron(config, format)
|
|
136
|
+
|
|
137
|
+
unit = ExtractedUnit.new(
|
|
138
|
+
type: :scheduled_job,
|
|
139
|
+
identifier: "scheduled:#{task_name}",
|
|
140
|
+
file_path: file_path
|
|
141
|
+
)
|
|
142
|
+
|
|
143
|
+
unit.namespace = job_class.include?('::') ? job_class.split('::')[0..-2].join('::') : nil if job_class
|
|
144
|
+
unit.source_code = source
|
|
145
|
+
unit.metadata = {
|
|
146
|
+
schedule_format: format,
|
|
147
|
+
job_class: job_class,
|
|
148
|
+
cron_expression: cron,
|
|
149
|
+
queue: config['queue'],
|
|
150
|
+
args: config['args'],
|
|
151
|
+
frequency_human_readable: humanize_frequency(cron, format)
|
|
152
|
+
}
|
|
153
|
+
unit.dependencies = build_dependencies(job_class)
|
|
154
|
+
|
|
155
|
+
unit
|
|
156
|
+
end
|
|
157
|
+
|
|
158
|
+
# Extract the cron/schedule expression from config.
|
|
159
|
+
#
|
|
160
|
+
# @param config [Hash] Entry configuration
|
|
161
|
+
# @param format [Symbol] :solid_queue or :sidekiq_cron
|
|
162
|
+
# @return [String, nil]
|
|
163
|
+
def extract_cron(config, format)
|
|
164
|
+
case format
|
|
165
|
+
when :solid_queue
|
|
166
|
+
config['schedule']
|
|
167
|
+
when :sidekiq_cron
|
|
168
|
+
config['cron']
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
|
|
172
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
173
|
+
# Whenever DSL (config/schedule.rb)
|
|
174
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
175
|
+
|
|
176
|
+
# Parse a Whenever schedule.rb file using regex.
|
|
177
|
+
#
|
|
178
|
+
# @param file_path [String] Path to the schedule.rb file
|
|
179
|
+
# @return [Array<ExtractedUnit>]
|
|
180
|
+
def extract_whenever_schedule(file_path)
|
|
181
|
+
source = File.read(file_path)
|
|
182
|
+
blocks = parse_whenever_blocks(source)
|
|
183
|
+
|
|
184
|
+
blocks.each_with_index.map do |block, index|
|
|
185
|
+
build_whenever_unit(block, index, file_path, source)
|
|
186
|
+
end
|
|
187
|
+
end
|
|
188
|
+
|
|
189
|
+
# Parse `every ... do ... end` blocks from Whenever DSL.
|
|
190
|
+
#
|
|
191
|
+
# @param source [String] Ruby source code
|
|
192
|
+
# @return [Array<Hash>] Parsed block data
|
|
193
|
+
def parse_whenever_blocks(source)
|
|
194
|
+
blocks = []
|
|
195
|
+
# Match: every <frequency>[, options] do ... end
|
|
196
|
+
source.scan(/every\s+(.+?)\s+do\s*\n(.*?)end/m) do |frequency_str, body|
|
|
197
|
+
# Clean up the frequency — strip trailing options like ", at: '...'"
|
|
198
|
+
frequency = frequency_str.strip.sub(/,\s*at:.*\z/, '').strip
|
|
199
|
+
|
|
200
|
+
command_type, command_body = detect_whenever_command(body)
|
|
201
|
+
job_class = extract_job_class_from_runner(command_body) if command_type == :runner
|
|
202
|
+
|
|
203
|
+
blocks << {
|
|
204
|
+
frequency: frequency,
|
|
205
|
+
frequency_str: frequency_str.strip,
|
|
206
|
+
command_type: command_type,
|
|
207
|
+
command_body: command_body,
|
|
208
|
+
job_class: job_class
|
|
209
|
+
}
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
blocks
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
# Detect the command type inside a Whenever block body.
|
|
216
|
+
#
|
|
217
|
+
# @param body [String] Block body content
|
|
218
|
+
# @return [Array<Symbol, String>] Command type and body
|
|
219
|
+
def detect_whenever_command(body)
|
|
220
|
+
case body
|
|
221
|
+
when /runner\s+"([^"]+)"/
|
|
222
|
+
[:runner, ::Regexp.last_match(1)]
|
|
223
|
+
when /rake\s+"([^"]+)"/
|
|
224
|
+
[:rake, ::Regexp.last_match(1)]
|
|
225
|
+
when /command\s+"([^"]+)"/
|
|
226
|
+
[:command, ::Regexp.last_match(1)]
|
|
227
|
+
else
|
|
228
|
+
[:unknown, body.strip]
|
|
229
|
+
end
|
|
230
|
+
end
|
|
231
|
+
|
|
232
|
+
# Extract a job class name from a runner string.
|
|
233
|
+
#
|
|
234
|
+
# Looks for patterns like `MyJob.perform_later` or `MyJob.perform_now`.
|
|
235
|
+
#
|
|
236
|
+
# @param runner_str [String] The runner command string
|
|
237
|
+
# @return [String, nil] The job class name or nil
|
|
238
|
+
def extract_job_class_from_runner(runner_str)
|
|
239
|
+
return nil unless runner_str
|
|
240
|
+
|
|
241
|
+
match = runner_str.match(/([A-Z]\w*(?:::\w+)*)\.perform_(later|now)/)
|
|
242
|
+
match ? match[1] : nil
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
# Build an ExtractedUnit from a Whenever block.
|
|
246
|
+
#
|
|
247
|
+
# @param block [Hash] Parsed block data
|
|
248
|
+
# @param index [Integer] Block index for identifier uniqueness
|
|
249
|
+
# @param file_path [String] Path to schedule.rb
|
|
250
|
+
# @param source [String] Raw file content
|
|
251
|
+
# @return [ExtractedUnit]
|
|
252
|
+
def build_whenever_unit(block, index, file_path, source)
|
|
253
|
+
identifier = if block[:job_class]
|
|
254
|
+
"scheduled:whenever_#{block[:job_class].underscore}_#{index}"
|
|
255
|
+
else
|
|
256
|
+
"scheduled:whenever_task_#{index}"
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
unit = ExtractedUnit.new(
|
|
260
|
+
type: :scheduled_job,
|
|
261
|
+
identifier: identifier,
|
|
262
|
+
file_path: file_path
|
|
263
|
+
)
|
|
264
|
+
|
|
265
|
+
unit.namespace = block[:job_class].split('::')[0..-2].join('::') if block[:job_class]&.include?('::')
|
|
266
|
+
unit.source_code = source
|
|
267
|
+
unit.metadata = {
|
|
268
|
+
schedule_format: :whenever,
|
|
269
|
+
job_class: block[:job_class],
|
|
270
|
+
cron_expression: block[:frequency],
|
|
271
|
+
command_type: block[:command_type],
|
|
272
|
+
frequency_human_readable: block[:frequency]
|
|
273
|
+
}
|
|
274
|
+
unit.dependencies = build_dependencies(block[:job_class])
|
|
275
|
+
|
|
276
|
+
unit
|
|
277
|
+
end
|
|
278
|
+
|
|
279
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
280
|
+
# Format Detection
|
|
281
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
282
|
+
|
|
283
|
+
# Infer the schedule format from the file path.
|
|
284
|
+
#
|
|
285
|
+
# @param file_path [String] Path to the schedule file
|
|
286
|
+
# @return [Symbol] One of :solid_queue, :sidekiq_cron, :whenever
|
|
287
|
+
def infer_format(file_path)
|
|
288
|
+
basename = File.basename(file_path)
|
|
289
|
+
SCHEDULE_FILES.each do |relative, fmt|
|
|
290
|
+
return fmt if basename == File.basename(relative)
|
|
291
|
+
end
|
|
292
|
+
:unknown
|
|
293
|
+
end
|
|
294
|
+
|
|
295
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
296
|
+
# Shared helpers
|
|
297
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
298
|
+
|
|
299
|
+
# Build dependency array linking to a job class.
|
|
300
|
+
#
|
|
301
|
+
# @param job_class [String, nil] The job class name
|
|
302
|
+
# @return [Array<Hash>]
|
|
303
|
+
def build_dependencies(job_class)
|
|
304
|
+
return [] unless job_class
|
|
305
|
+
|
|
306
|
+
[{ type: :job, target: job_class, via: :scheduled }]
|
|
307
|
+
end
|
|
308
|
+
|
|
309
|
+
# Humanize a cron expression or Solid Queue frequency string.
|
|
310
|
+
#
|
|
311
|
+
# @param expression [String, nil] Cron expression or frequency
|
|
312
|
+
# @param format [Symbol] Schedule format
|
|
313
|
+
# @return [String, nil]
|
|
314
|
+
def humanize_frequency(expression, format)
|
|
315
|
+
return nil unless expression
|
|
316
|
+
|
|
317
|
+
# Solid Queue schedules are already human-readable
|
|
318
|
+
return expression if format == :solid_queue
|
|
319
|
+
|
|
320
|
+
# Check exact matches
|
|
321
|
+
return CRON_HUMANIZE[expression] if CRON_HUMANIZE.key?(expression)
|
|
322
|
+
|
|
323
|
+
# Check */N minute pattern
|
|
324
|
+
return "every #{::Regexp.last_match(1)} minutes" if expression =~ %r{\A\*/(\d+) \* \* \* \*\z}
|
|
325
|
+
|
|
326
|
+
# Fallback: return raw expression
|
|
327
|
+
expression
|
|
328
|
+
end
|
|
329
|
+
end
|
|
330
|
+
end
|
|
331
|
+
end
|
|
@@ -0,0 +1,334 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative 'shared_utility_methods'
|
|
4
|
+
require_relative 'shared_dependency_scanner'
|
|
5
|
+
|
|
6
|
+
module CodebaseIndex
|
|
7
|
+
module Extractors
|
|
8
|
+
# SerializerExtractor handles extraction of serializers, blueprinters, and decorators.
|
|
9
|
+
#
|
|
10
|
+
# Serializers define the API contract — what data is exposed and how it's shaped.
|
|
11
|
+
# They often wrap models, select attributes, and define associations that map
|
|
12
|
+
# directly to JSON responses. Understanding these is critical for API-aware
|
|
13
|
+
# code analysis.
|
|
14
|
+
#
|
|
15
|
+
# Supports:
|
|
16
|
+
# - ActiveModel::Serializer (AMS)
|
|
17
|
+
# - Blueprinter::Base
|
|
18
|
+
# - Draper::Decorator
|
|
19
|
+
#
|
|
20
|
+
# @example
|
|
21
|
+
# extractor = SerializerExtractor.new
|
|
22
|
+
# units = extractor.extract_all
|
|
23
|
+
# user_serializer = units.find { |u| u.identifier == "UserSerializer" }
|
|
24
|
+
#
|
|
25
|
+
class SerializerExtractor
|
|
26
|
+
include SharedUtilityMethods
|
|
27
|
+
include SharedDependencyScanner
|
|
28
|
+
|
|
29
|
+
# Directories to scan for serializer-like files
|
|
30
|
+
SERIALIZER_DIRECTORIES = %w[
|
|
31
|
+
app/serializers
|
|
32
|
+
app/blueprinters
|
|
33
|
+
app/decorators
|
|
34
|
+
].freeze
|
|
35
|
+
|
|
36
|
+
# Known base classes for runtime discovery
|
|
37
|
+
BASE_CLASSES = {
|
|
38
|
+
'ActiveModel::Serializer' => :ams,
|
|
39
|
+
'Blueprinter::Base' => :blueprinter,
|
|
40
|
+
'Draper::Decorator' => :draper
|
|
41
|
+
}.freeze
|
|
42
|
+
|
|
43
|
+
def initialize
|
|
44
|
+
@directories = SERIALIZER_DIRECTORIES.map { |d| Rails.root.join(d) }
|
|
45
|
+
.select(&:directory?)
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
# Extract all serializers, blueprinters, and decorators in the application
|
|
49
|
+
#
|
|
50
|
+
# @return [Array<ExtractedUnit>] List of serializer units
|
|
51
|
+
def extract_all
|
|
52
|
+
units = []
|
|
53
|
+
|
|
54
|
+
# File-based discovery (catches everything in known directories)
|
|
55
|
+
@directories.each do |dir|
|
|
56
|
+
Dir[dir.join('**/*.rb')].each do |file|
|
|
57
|
+
unit = extract_serializer_file(file)
|
|
58
|
+
units << unit if unit
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
# Class-based discovery for loaded gems
|
|
63
|
+
seen = units.to_set(&:identifier)
|
|
64
|
+
BASE_CLASSES.each_key do |base_class_name|
|
|
65
|
+
base_class = begin
|
|
66
|
+
base_class_name.constantize
|
|
67
|
+
rescue NameError
|
|
68
|
+
nil
|
|
69
|
+
end
|
|
70
|
+
next unless base_class
|
|
71
|
+
|
|
72
|
+
base_class.descendants.each do |klass|
|
|
73
|
+
next if klass.name.nil?
|
|
74
|
+
next if seen.include?(klass.name)
|
|
75
|
+
|
|
76
|
+
unit = extract_serializer_class(klass, base_class_name)
|
|
77
|
+
if unit
|
|
78
|
+
units << unit
|
|
79
|
+
seen << unit.identifier
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
units.compact
|
|
85
|
+
end
|
|
86
|
+
|
|
87
|
+
# Extract a serializer from its file
|
|
88
|
+
#
|
|
89
|
+
# @param file_path [String] Path to the serializer file
|
|
90
|
+
# @return [ExtractedUnit, nil] The extracted unit, or nil if not a serializer
|
|
91
|
+
def extract_serializer_file(file_path)
|
|
92
|
+
source = File.read(file_path)
|
|
93
|
+
class_name = extract_class_name(file_path, source)
|
|
94
|
+
|
|
95
|
+
return nil unless class_name
|
|
96
|
+
return nil unless serializer_file?(source)
|
|
97
|
+
|
|
98
|
+
unit = ExtractedUnit.new(
|
|
99
|
+
type: :serializer,
|
|
100
|
+
identifier: class_name,
|
|
101
|
+
file_path: file_path
|
|
102
|
+
)
|
|
103
|
+
|
|
104
|
+
unit.namespace = extract_namespace(class_name)
|
|
105
|
+
unit.source_code = annotate_source(source, class_name)
|
|
106
|
+
unit.metadata = extract_metadata_from_source(source, class_name)
|
|
107
|
+
unit.dependencies = extract_dependencies(source)
|
|
108
|
+
|
|
109
|
+
unit
|
|
110
|
+
rescue StandardError => e
|
|
111
|
+
Rails.logger.error("Failed to extract serializer #{file_path}: #{e.message}")
|
|
112
|
+
nil
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
private
|
|
116
|
+
|
|
117
|
+
# Extract a serializer from its class (runtime introspection)
|
|
118
|
+
#
|
|
119
|
+
# @param klass [Class] The serializer class
|
|
120
|
+
# @param base_class_name [String] Name of the detected base class
|
|
121
|
+
# @return [ExtractedUnit, nil] The extracted unit
|
|
122
|
+
def extract_serializer_class(klass, base_class_name)
|
|
123
|
+
return nil if klass.name.nil?
|
|
124
|
+
|
|
125
|
+
file_path = source_file_for(klass)
|
|
126
|
+
source = file_path && File.exist?(file_path) ? File.read(file_path) : ''
|
|
127
|
+
|
|
128
|
+
unit = ExtractedUnit.new(
|
|
129
|
+
type: :serializer,
|
|
130
|
+
identifier: klass.name,
|
|
131
|
+
file_path: file_path
|
|
132
|
+
)
|
|
133
|
+
|
|
134
|
+
unit.namespace = extract_namespace(klass.name)
|
|
135
|
+
unit.source_code = annotate_source(source, klass.name)
|
|
136
|
+
unit.metadata = extract_metadata_from_class(klass, source, base_class_name)
|
|
137
|
+
unit.dependencies = extract_dependencies(source)
|
|
138
|
+
|
|
139
|
+
unit
|
|
140
|
+
rescue StandardError => e
|
|
141
|
+
Rails.logger.error("Failed to extract serializer #{klass.name}: #{e.message}")
|
|
142
|
+
nil
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
146
|
+
# Class Discovery
|
|
147
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
148
|
+
|
|
149
|
+
def extract_class_name(file_path, source)
|
|
150
|
+
return ::Regexp.last_match(1) if source =~ /^\s*class\s+([\w:]+)/
|
|
151
|
+
|
|
152
|
+
# Fall back to convention
|
|
153
|
+
file_path
|
|
154
|
+
.sub("#{Rails.root}/", '')
|
|
155
|
+
.sub(%r{^app/(serializers|blueprinters|decorators)/}, '')
|
|
156
|
+
.sub('.rb', '')
|
|
157
|
+
.camelize
|
|
158
|
+
end
|
|
159
|
+
|
|
160
|
+
def serializer_file?(source)
|
|
161
|
+
source.match?(/< ActiveModel::Serializer/) ||
|
|
162
|
+
source.match?(/< Blueprinter::Base/) ||
|
|
163
|
+
source.match?(/< Draper::Decorator/) ||
|
|
164
|
+
source.match?(/< ApplicationSerializer/) ||
|
|
165
|
+
source.match?(/< ApplicationDecorator/) ||
|
|
166
|
+
source.match?(/< BaseSerializer/) ||
|
|
167
|
+
source.match?(/< BaseBlueprinter/) ||
|
|
168
|
+
source.match?(/attributes?\s+:/) ||
|
|
169
|
+
source.match?(/has_many\s+:.*serializer/) ||
|
|
170
|
+
source.match?(/belongs_to\s+:.*serializer/) ||
|
|
171
|
+
source.match?(/view\s+:/) # Blueprinter views
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def source_file_for(klass)
|
|
175
|
+
methods = klass.instance_methods(false)
|
|
176
|
+
if methods.any?
|
|
177
|
+
klass.instance_method(methods.first).source_location&.first
|
|
178
|
+
end || Rails.root.join("app/serializers/#{klass.name.underscore}.rb").to_s
|
|
179
|
+
rescue StandardError
|
|
180
|
+
nil
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
184
|
+
# Source Annotation
|
|
185
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
186
|
+
|
|
187
|
+
def annotate_source(source, class_name)
|
|
188
|
+
serializer_type = detect_serializer_type(source)
|
|
189
|
+
wrapped_model = detect_wrapped_model(source, class_name)
|
|
190
|
+
|
|
191
|
+
<<~ANNOTATION
|
|
192
|
+
# ╔═══════════════════════════════════════════════════════════════════════╗
|
|
193
|
+
# ║ Serializer: #{class_name.ljust(57)}║
|
|
194
|
+
# ║ Type: #{serializer_type.to_s.ljust(61)}║
|
|
195
|
+
# ║ Wraps: #{(wrapped_model || 'unknown').ljust(60)}║
|
|
196
|
+
# ╚═══════════════════════════════════════════════════════════════════════╝
|
|
197
|
+
|
|
198
|
+
#{source}
|
|
199
|
+
ANNOTATION
|
|
200
|
+
end
|
|
201
|
+
|
|
202
|
+
def detect_serializer_type(source)
|
|
203
|
+
return :ams if source.match?(/< ActiveModel::Serializer/) || source.match?(/< ApplicationSerializer/)
|
|
204
|
+
return :blueprinter if source.match?(/< Blueprinter::Base/) || source.match?(/< BaseBlueprinter/)
|
|
205
|
+
return :draper if source.match?(/< Draper::Decorator/) || source.match?(/< ApplicationDecorator/)
|
|
206
|
+
|
|
207
|
+
:unknown
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
def detect_wrapped_model(source, class_name)
|
|
211
|
+
# AMS: `type` declaration
|
|
212
|
+
return ::Regexp.last_match(1).classify if source =~ /type\s+[:"'](\w+)/
|
|
213
|
+
|
|
214
|
+
# Draper: `decorates` declaration
|
|
215
|
+
return ::Regexp.last_match(1).classify if source =~ /decorates\s+[:"'](\w+)/
|
|
216
|
+
|
|
217
|
+
# Convention: strip Serializer/Decorator/Blueprinter suffix
|
|
218
|
+
class_name
|
|
219
|
+
.split('::')
|
|
220
|
+
.last
|
|
221
|
+
.sub(/Serializer$/, '')
|
|
222
|
+
.sub(/Decorator$/, '')
|
|
223
|
+
.sub(/Blueprinter$/, '')
|
|
224
|
+
.sub(/Blueprint$/, '')
|
|
225
|
+
.then { |name| name.empty? ? nil : name }
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
229
|
+
# Metadata Extraction (from source)
|
|
230
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
231
|
+
|
|
232
|
+
def extract_metadata_from_source(source, class_name)
|
|
233
|
+
{
|
|
234
|
+
serializer_type: detect_serializer_type(source),
|
|
235
|
+
wrapped_model: detect_wrapped_model(source, class_name),
|
|
236
|
+
attributes: extract_attributes(source),
|
|
237
|
+
associations: extract_associations(source),
|
|
238
|
+
custom_methods: extract_custom_methods(source),
|
|
239
|
+
views: extract_views(source),
|
|
240
|
+
loc: source.lines.count { |l| l.strip.length.positive? && !l.strip.start_with?('#') }
|
|
241
|
+
}
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
def extract_metadata_from_class(klass, source, base_class_name)
|
|
245
|
+
base_metadata = extract_metadata_from_source(source, klass.name)
|
|
246
|
+
base_metadata[:serializer_type] = BASE_CLASSES[base_class_name] || base_metadata[:serializer_type]
|
|
247
|
+
|
|
248
|
+
# Enhance with runtime introspection if available
|
|
249
|
+
if klass.respond_to?(:_attributes_data)
|
|
250
|
+
# AMS runtime attributes
|
|
251
|
+
runtime_attrs = klass._attributes_data.keys.map(&:to_s)
|
|
252
|
+
base_metadata[:attributes] = runtime_attrs if runtime_attrs.any?
|
|
253
|
+
elsif klass.respond_to?(:definition)
|
|
254
|
+
# Blueprinter runtime fields
|
|
255
|
+
definition = klass.definition
|
|
256
|
+
base_metadata[:views] = definition.keys.map(&:to_s) if definition.respond_to?(:keys)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
base_metadata
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
def extract_attributes(source)
|
|
263
|
+
attrs = []
|
|
264
|
+
|
|
265
|
+
# AMS / generic: `attributes :name, :email, :created_at`
|
|
266
|
+
source.scan(/attributes?\s+((?::\w+(?:,\s*)?)+)/).each do |match|
|
|
267
|
+
match[0].scan(/:(\w+)/).flatten.each { |a| attrs << a }
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
# Blueprinter: `field :name` or `identifier :id`
|
|
271
|
+
source.scan(/(?:field|identifier)\s+:(\w+)/).flatten.each { |a| attrs << a }
|
|
272
|
+
|
|
273
|
+
# Draper: `delegate :name, :email, to: :object`
|
|
274
|
+
source.scan(/delegate\s+((?::\w+(?:,\s*)?)+)\s*,\s*to:\s*:object/).each do |match|
|
|
275
|
+
match[0].scan(/:(\w+)/).flatten.each { |a| attrs << a }
|
|
276
|
+
end
|
|
277
|
+
|
|
278
|
+
attrs.uniq
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
def extract_associations(source)
|
|
282
|
+
assocs = []
|
|
283
|
+
|
|
284
|
+
# AMS: `has_many :comments`, `belongs_to :author`, `has_one :profile`
|
|
285
|
+
source.scan(/(has_many|has_one|belongs_to)\s+:(\w+)(?:,\s*serializer:\s*([\w:]+))?/) do |type, name, serializer|
|
|
286
|
+
assocs << { type: type, name: name, serializer: serializer }.compact
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
# Blueprinter: `association :comments, blueprint: CommentBlueprint`
|
|
290
|
+
source.scan(/association\s+:(\w+)(?:,\s*blueprint:\s*([\w:]+))?/) do |name, blueprint|
|
|
291
|
+
assocs << { type: 'association', name: name, serializer: blueprint }.compact
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
assocs
|
|
295
|
+
end
|
|
296
|
+
|
|
297
|
+
def extract_custom_methods(source)
|
|
298
|
+
methods = []
|
|
299
|
+
|
|
300
|
+
# Instance methods defined in the class (excluding standard callbacks)
|
|
301
|
+
source.scan(/def\s+(\w+)/).flatten.each do |method_name|
|
|
302
|
+
next if %w[initialize].include?(method_name)
|
|
303
|
+
|
|
304
|
+
methods << method_name
|
|
305
|
+
end
|
|
306
|
+
|
|
307
|
+
methods
|
|
308
|
+
end
|
|
309
|
+
|
|
310
|
+
def extract_views(source)
|
|
311
|
+
# Blueprinter views: `view :extended do`
|
|
312
|
+
source.scan(/view\s+:(\w+)/).flatten.map { |v| v }
|
|
313
|
+
end
|
|
314
|
+
|
|
315
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
316
|
+
# Dependency Extraction
|
|
317
|
+
# ──────────────────────────────────────────────────────────────────────
|
|
318
|
+
|
|
319
|
+
def extract_dependencies(source)
|
|
320
|
+
deps = []
|
|
321
|
+
deps.concat(scan_model_dependencies(source, via: :serialization))
|
|
322
|
+
|
|
323
|
+
# Other serializers referenced (e.g., `serializer: CommentSerializer`)
|
|
324
|
+
source.scan(/(?:serializer|blueprint):\s*([\w:]+)/).flatten.uniq.each do |serializer|
|
|
325
|
+
deps << { type: :serializer, target: serializer, via: :serialization }
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
deps.concat(scan_service_dependencies(source))
|
|
329
|
+
|
|
330
|
+
deps.uniq { |d| [d[:type], d[:target]] }
|
|
331
|
+
end
|
|
332
|
+
end
|
|
333
|
+
end
|
|
334
|
+
end
|