rigortype 0.1.17 → 0.1.18

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (70) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +4 -2
  3. data/lib/rigor/analysis/check_rules/always_truthy_condition_collector.rb +18 -1
  4. data/lib/rigor/analysis/check_rules/rule_walk.rb +67 -0
  5. data/lib/rigor/analysis/check_rules/unreachable_clause_collector.rb +18 -1
  6. data/lib/rigor/analysis/check_rules.rb +34 -6
  7. data/lib/rigor/analysis/runner/diagnostic_aggregator.rb +580 -0
  8. data/lib/rigor/analysis/runner/pool_coordinator.rb +569 -0
  9. data/lib/rigor/analysis/runner/project_pre_passes.rb +318 -0
  10. data/lib/rigor/analysis/runner/run_snapshots.rb +46 -0
  11. data/lib/rigor/analysis/runner.rb +160 -1190
  12. data/lib/rigor/analysis/worker_session.rb +47 -8
  13. data/lib/rigor/cache/incremental_snapshot.rb +10 -4
  14. data/lib/rigor/cache/rbs_cache_producer.rb +5 -1
  15. data/lib/rigor/cache/store.rb +46 -13
  16. data/lib/rigor/cli/check_command.rb +705 -0
  17. data/lib/rigor/cli/ci_detector.rb +94 -0
  18. data/lib/rigor/cli/diagnostic_formats.rb +345 -0
  19. data/lib/rigor/cli/prism_colorizer.rb +10 -3
  20. data/lib/rigor/cli/trace_command.rb +143 -0
  21. data/lib/rigor/cli/trace_renderer.rb +310 -0
  22. data/lib/rigor/cli.rb +15 -614
  23. data/lib/rigor/configuration.rb +9 -6
  24. data/lib/rigor/environment/rbs_loader.rb +53 -68
  25. data/lib/rigor/environment.rb +1 -1
  26. data/lib/rigor/inference/acceptance.rb +10 -0
  27. data/lib/rigor/inference/expression_typer.rb +28 -62
  28. data/lib/rigor/inference/flow_tracer.rb +180 -0
  29. data/lib/rigor/inference/macro_block_self_type.rb +10 -11
  30. data/lib/rigor/inference/method_dispatcher/overload_selector.rb +33 -1
  31. data/lib/rigor/inference/method_dispatcher.rb +115 -54
  32. data/lib/rigor/inference/narrowing.rb +60 -0
  33. data/lib/rigor/inference/scope_indexer.rb +75 -15
  34. data/lib/rigor/inference/statement_evaluator.rb +35 -52
  35. data/lib/rigor/plugin/additional_initializer.rb +61 -38
  36. data/lib/rigor/plugin/base.rb +282 -41
  37. data/lib/rigor/plugin/node_rule_walk.rb +147 -0
  38. data/lib/rigor/plugin/registry.rb +263 -35
  39. data/lib/rigor/plugin.rb +1 -0
  40. data/lib/rigor/rbs_extended/conformance_checker.rb +86 -1
  41. data/lib/rigor/scope/discovery_index.rb +58 -0
  42. data/lib/rigor/scope.rb +67 -198
  43. data/lib/rigor/sig_gen/observation_collector.rb +6 -6
  44. data/lib/rigor/source/literals.rb +14 -0
  45. data/lib/rigor/type/combinator.rb +5 -0
  46. data/lib/rigor/version.rb +1 -1
  47. data/lib/rigor.rb +0 -1
  48. data/plugins/rigor-actionpack/lib/rigor/plugin/actionpack/analyzer.rb +1 -2
  49. data/plugins/rigor-activerecord/lib/rigor/plugin/activerecord/model_discoverer.rb +2 -4
  50. data/plugins/rigor-activerecord/lib/rigor/plugin/activerecord.rb +70 -32
  51. data/plugins/rigor-activestorage/lib/rigor/plugin/activestorage/analyzer.rb +3 -3
  52. data/plugins/rigor-activestorage/lib/rigor/plugin/activestorage.rb +15 -21
  53. data/plugins/rigor-activesupport-core-ext/lib/rigor/plugin/activesupport_core_ext.rb +1 -1
  54. data/plugins/rigor-factorybot/lib/rigor/plugin/factorybot/factory_discoverer.rb +1 -2
  55. data/plugins/rigor-graphql/lib/rigor/plugin/graphql/type_scanner.rb +2 -2
  56. data/plugins/rigor-rspec/lib/rigor/plugin/rspec/let_scope_index.rb +12 -2
  57. data/plugins/rigor-rspec/lib/rigor/plugin/rspec/matcher_analyzer.rb +1 -1
  58. data/plugins/rigor-rspec/lib/rigor/plugin/rspec.rb +35 -18
  59. data/plugins/rigor-sorbet/lib/rigor/plugin/sorbet/absurd_recognizer.rb +8 -29
  60. data/plugins/rigor-sorbet/lib/rigor/plugin/sorbet/catalog.rb +17 -1
  61. data/plugins/rigor-sorbet/lib/rigor/plugin/sorbet/sigil_detector.rb +2 -2
  62. data/plugins/rigor-sorbet/lib/rigor/plugin/sorbet.rb +83 -36
  63. data/sig/rigor/environment.rbs +0 -2
  64. data/sig/rigor/inference.rbs +5 -0
  65. data/sig/rigor/plugin/base.rbs +1 -2
  66. data/sig/rigor/scope.rbs +41 -29
  67. data/sig/rigor/source.rbs +1 -0
  68. data/skills/rigor-ci-setup/SKILL.md +319 -0
  69. metadata +15 -2
  70. data/lib/rigor/cache/rbs_instance_definitions.rb +0 -66
@@ -0,0 +1,705 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "fileutils"
4
+ require "json"
5
+ require "optionparser"
6
+
7
+ require_relative "../configuration"
8
+ require_relative "../analysis/result"
9
+ require_relative "command"
10
+ require_relative "options"
11
+ require_relative "diagnostic_formats"
12
+ require_relative "ci_detector"
13
+
14
+ module Rigor
15
+ class CLI
16
+ # Executes `rigor check` — the analyzer's primary command.
17
+ #
18
+ # The other subcommands delegate to a `CLI::Command` subclass once they
19
+ # grow beyond a few lines; `check` is the largest of them, owning option
20
+ # parsing, the baseline filter (ADR-22), the incremental modes (ADR-46),
21
+ # cache-stats reporting, editor mode, the CI-native output formats
22
+ # (ADR-51), and the diagnostic-only / heap / budget appendices. Keeping
23
+ # it in its own class follows the same dispatch-vs-implementation split
24
+ # the rest of the CLI uses and keeps `Rigor::CLI` focused on dispatch.
25
+ #
26
+ # The class-length budget is relaxed (as on `Rigor::CLI` itself) because
27
+ # `check` aggregates several independent concerns that are clearer read
28
+ # together than split across micro-classes.
29
+ class CheckCommand < Command # rubocop:disable Metrics/ClassLength
30
+ # @return [Integer] CLI exit status.
31
+ def run # rubocop:disable Metrics/AbcSize
32
+ load_check_dependencies
33
+ options = parse_check_options
34
+ buffer = Options.resolve_buffer_binding(options, err: @err)
35
+ return CLI::EXIT_USAGE if buffer == :usage_error
36
+
37
+ configuration = load_check_configuration(options)
38
+ cache_root = configuration.cache_path
39
+ handle_clear_cache(cache_root) if options.fetch(:clear_cache)
40
+
41
+ special = dispatch_special_check_mode(configuration, options, cache_root)
42
+ return special unless special.nil?
43
+
44
+ runner = build_check_runner(
45
+ configuration: configuration, options: options,
46
+ buffer: buffer, cache_root: cache_root
47
+ )
48
+ raw_result = runner.run(@argv.empty? ? configuration.paths : @argv)
49
+ result = apply_baseline_filter(raw_result, configuration, options)
50
+
51
+ write_result(result, options.fetch(:format))
52
+ emit_ci_detected_output(result, options)
53
+ write_run_stats(result.stats) if result.stats
54
+ write_trace_appendices
55
+ runner.cache_store&.evict!
56
+ write_cache_stats(cache_root, runner.cache_store) if options.fetch(:cache_stats)
57
+
58
+ exit_code = result.success? ? 0 : 1
59
+ exit_code = 1 if baseline_strict_violation?(raw_result.diagnostics, configuration, options)
60
+ exit_code
61
+ end
62
+
63
+ private
64
+
65
+ # ADR-46 — the two incremental-analysis check modes both fully handle
66
+ # the run and return an exit code (so `run` short-circuits);
67
+ # returns nil for an ordinary check.
68
+ def dispatch_special_check_mode(configuration, options, cache_root)
69
+ return run_verify_incremental(configuration) if options.fetch(:verify_incremental)
70
+ return run_incremental_check(configuration, options, cache_root) if options.fetch(:incremental)
71
+
72
+ nil
73
+ end
74
+
75
+ # ADR-46 — the incremental-analysis acceptance gate. Runs a baseline
76
+ # analysis (recording cross-file dependencies), then re-analyzes a
77
+ # representative subset of files and serves the rest from the per-file
78
+ # cache (the body tier), and asserts the merged diagnostics are
79
+ # byte-identical to a full `--no-cache` analysis. A mismatch means the
80
+ # incremental machinery would serve a stale — manufactured —
81
+ # diagnostic, the soundness failure this gate exists to catch. Prints a
82
+ # one-line PASS (exit 0) or the differing diagnostics (exit 1).
83
+ def run_verify_incremental(configuration)
84
+ paths = @argv.empty? ? nil : @argv
85
+ session = Analysis::IncrementalSession.new(configuration: configuration, paths: paths)
86
+ session.baseline
87
+ analyzed = session.analyzed_files
88
+
89
+ # Every other file forms the re-analyzed subset, so the run exercises
90
+ # BOTH the subset-analysis path and the cache-serving path.
91
+ subset = analyzed.each_with_index.select { |_, index| index.even? }.map(&:first)
92
+ incremental = normalize_diagnostics(session.reanalyze_subset(subset))
93
+ full = normalize_diagnostics(verify_full_diagnostics(configuration, paths))
94
+
95
+ report_verify_incremental(incremental, full, subset_size: subset.size, total: analyzed.size)
96
+ end
97
+
98
+ # ADR-46 — cross-process incremental analysis (`--incremental`). Derives
99
+ # the global fingerprint cheaply (no RBS env build), loads the disk
100
+ # snapshot, and on a fingerprint hit re-analyzes only the files changed
101
+ # since the last run (plus their dependents), serving the rest from the
102
+ # snapshot; on a miss runs a full baseline. Persists the updated
103
+ # snapshot for the next invocation. Diagnostics are identical to a full
104
+ # run (the `--verify-incremental` gate enforces this); the win is
105
+ # skipping per-file inference for unchanged files.
106
+ def run_incremental_check(configuration, options, cache_root)
107
+ paths = @argv.empty? ? nil : @argv
108
+ probe = Analysis::Runner.new(configuration: configuration, cache_store: nil)
109
+ files = paths ? probe.analysis_file_set(paths) : probe.analysis_file_set
110
+ fingerprint = Cache::IncrementalSnapshot.fingerprint(
111
+ configuration: configuration, roots: paths || configuration.paths
112
+ )
113
+ snapshot = Cache::IncrementalSnapshot.new(root: cache_root)
114
+ session = Analysis::IncrementalSession.new(configuration: configuration, paths: paths)
115
+
116
+ diagnostics, warm = session.run_incremental(snapshot: snapshot, fingerprint: fingerprint)
117
+ @err.puts("rigor: --incremental #{warm ? 'warm — reused cached diagnostics' : 'cold — full analysis'} " \
118
+ "(#{files.size} files)")
119
+
120
+ result = apply_baseline_filter(Analysis::Result.new(diagnostics: diagnostics, stats: nil), configuration,
121
+ options)
122
+ write_result(result, options.fetch(:format))
123
+ result.success? ? 0 : 1
124
+ end
125
+
126
+ def verify_full_diagnostics(configuration, paths)
127
+ runner = Analysis::Runner.new(configuration: configuration, cache_store: nil)
128
+ (paths ? runner.run(paths) : runner.run).diagnostics
129
+ end
130
+
131
+ def normalize_diagnostics(diagnostics)
132
+ diagnostics.map(&:to_h).sort_by do |hash|
133
+ [hash["path"].to_s, hash["line"].to_i, hash["column"].to_i, hash["rule"].to_s, hash["message"].to_s]
134
+ end
135
+ end
136
+
137
+ def report_verify_incremental(incremental, full, subset_size:, total:)
138
+ if incremental == full
139
+ @out.puts("rigor: --verify-incremental OK — incremental " \
140
+ "(#{subset_size}/#{total} files re-analyzed, rest from cache) " \
141
+ "matches full (#{full.size} diagnostics)")
142
+ return 0
143
+ end
144
+
145
+ only_incremental = incremental - full
146
+ only_full = full - incremental
147
+ @err.puts("rigor: --verify-incremental FAILED — incremental and full diagnostics differ.")
148
+ @err.puts(" incremental-only: #{only_incremental.size}, full-only: #{only_full.size}")
149
+ (only_incremental + only_full).first(10).each do |hash|
150
+ @err.puts(" #{hash['path']}:#{hash['line']}:#{hash['column']}: [#{hash['rule']}] #{hash['message']}")
151
+ end
152
+ 1
153
+ end
154
+
155
+ # ADR-22 slice 5 — the `--baseline-strict` CI gate. When the
156
+ # flag is set, ANY baseline drift fails the run — not only
157
+ # excess drift (a bucket over threshold, which already fails
158
+ # via the surfaced diagnostics) but also DEFICIT drift
159
+ # (`actual < count`: the baseline has grown looser than the
160
+ # code and should be regenerated). A no-op, with a stderr
161
+ # note, when no baseline is active — the flag never
162
+ # implicitly loads a baseline the config did not name (WD2).
163
+ def baseline_strict_violation?(raw_diagnostics, configuration, options)
164
+ return false unless options.fetch(:baseline_strict)
165
+
166
+ path = resolve_baseline_path(configuration, options)
167
+ if path.nil?
168
+ @err.puts("rigor: --baseline-strict given but no baseline is active; nothing to gate.")
169
+ return false
170
+ end
171
+
172
+ baseline = Analysis::Baseline.load(path, project_root: Dir.pwd)
173
+ return false if baseline.nil? || baseline.empty?
174
+
175
+ drifted = baseline.audit(raw_diagnostics).reject { |row| row.status == :within }
176
+ return false if drifted.empty?
177
+
178
+ report_strict_drift(drifted, path)
179
+ true
180
+ rescue Analysis::Baseline::LoadError => e
181
+ @err.puts("rigor: baseline load failed: #{e.message} (--baseline-strict gate skipped)")
182
+ false
183
+ end
184
+
185
+ def report_strict_drift(rows, path)
186
+ @err.puts("rigor: --baseline-strict — #{rows.size} bucket(s) drifted from #{path}:")
187
+ rows.sort_by { |r| [r.bucket.file, r.bucket.rule] }.each do |row|
188
+ delta = row.delta.positive? ? "+#{row.delta}" : row.delta.to_s
189
+ @err.puts(" #{row.bucket.file} [#{row.bucket.rule}] " \
190
+ "#{row.bucket.count} → #{row.actual_count} (Δ#{delta}, #{row.status})")
191
+ end
192
+ @err.puts("rigor: run `rigor baseline regenerate` to refresh the baseline.")
193
+ end
194
+
195
+ # ADR-22 — apply the baseline filter as the LAST step of
196
+ # the diagnostic pipeline (after `# rigor:disable`,
197
+ # `severity_profile`, etc. — WD6). Resolution order
198
+ # follows WD2 (b):
199
+ #
200
+ # 1. --no-baseline on the CLI → no baseline.
201
+ # 2. --baseline=PATH on the CLI → load that path.
202
+ # 3. .rigor.yml's `baseline: <path>` → load that path.
203
+ # 4. otherwise → no baseline.
204
+ #
205
+ # When the path resolves and loads successfully, the filter
206
+ # replaces `result.diagnostics` with the surfaced set and
207
+ # writes a one-line summary to stderr (WD7) when any
208
+ # diagnostics were silenced. Load failures emit a warning
209
+ # to stderr and fall through to "no baseline" (graceful
210
+ # degradation).
211
+ def apply_baseline_filter(result, configuration, options)
212
+ path = resolve_baseline_path(configuration, options)
213
+ return result if path.nil?
214
+
215
+ baseline = Analysis::Baseline.load(path, project_root: Dir.pwd)
216
+ return result if baseline.nil?
217
+
218
+ surfaced, silenced_count = baseline.filter(result.diagnostics)
219
+ report_baseline_summary(silenced_count, path) if silenced_count.positive?
220
+ Analysis::Result.new(diagnostics: surfaced, stats: result.stats)
221
+ rescue Analysis::Baseline::LoadError => e
222
+ @err.puts("rigor: baseline load failed: #{e.message} (continuing without baseline)")
223
+ result
224
+ end
225
+
226
+ # WD2 (b) — resolve effective baseline path.
227
+ def resolve_baseline_path(configuration, options)
228
+ cli_value = options.fetch(:baseline)
229
+ case cli_value
230
+ when false then nil # --no-baseline
231
+ when :unset then configuration.baseline_path # fall through to config
232
+ else cli_value # --baseline=PATH
233
+ end
234
+ end
235
+
236
+ def report_baseline_summary(silenced_count, baseline_path)
237
+ @err.puts("rigor: #{silenced_count} diagnostic(s) silenced by baseline #{baseline_path}")
238
+ end
239
+
240
+ def build_check_runner(configuration:, options:, buffer:, cache_root:)
241
+ cache_store = if options.fetch(:no_cache)
242
+ nil
243
+ else
244
+ Cache::Store.new(
245
+ root: cache_root,
246
+ max_bytes: configuration.cache_max_bytes
247
+ )
248
+ end
249
+ Analysis::Runner.new(
250
+ configuration: configuration,
251
+ explain: options.fetch(:explain),
252
+ cache_store: cache_store,
253
+ collect_stats: options.fetch(:stats),
254
+ workers: resolve_workers(options, configuration),
255
+ buffer: buffer
256
+ )
257
+ end
258
+
259
+ # ADR-15 Phase 4c — resolves the worker count by
260
+ # precedence: CLI `--workers=N` (most explicit) > env
261
+ # `RIGOR_RACTOR_WORKERS` > config `.rigor.yml`
262
+ # `parallel.workers:` > 0 (sequential default). Returns
263
+ # an Integer; non-numeric values raise so typos fail
264
+ # loudly. CLI / env may pass a negative value — clamped
265
+ # to 0 (sequential) so a stray `-1` doesn't crash the
266
+ # pool spawn loop.
267
+ def resolve_workers(options, configuration)
268
+ cli_value = options[:workers]
269
+ return [Integer(cli_value), 0].max if cli_value
270
+
271
+ env_value = ENV.fetch("RIGOR_RACTOR_WORKERS", nil)
272
+ return [Integer(env_value), 0].max if env_value && !env_value.empty?
273
+
274
+ configuration.parallel_workers
275
+ end
276
+
277
+ def parse_check_options # rubocop:disable Metrics/AbcSize,Metrics/MethodLength
278
+ options = {
279
+ # `nil` triggers `Configuration.discover` (`.rigor.yml` then
280
+ # `.rigor.dist.yml`); an explicit `--config=PATH` overrides.
281
+ config: nil,
282
+ format: "text",
283
+ explain: false,
284
+ cache_stats: false,
285
+ clear_cache: false,
286
+ no_cache: false,
287
+ # Run-stats summary (target files, RBS class universe
288
+ # breakdown, wall time, peak RSS) is on by default
289
+ # because collection is ~free (single syscall for RSS,
290
+ # one walk of `class_decl_paths` for the breakdown).
291
+ # `--no-stats` suppresses it for callers that want a
292
+ # diagnostic-only output stream.
293
+ stats: true,
294
+ # ADR-15 Phase 4c — when nil, falls back to
295
+ # `RIGOR_RACTOR_WORKERS` then `.rigor.yml`
296
+ # `parallel.workers:` then 0 (sequential). See
297
+ # `resolve_workers` for the precedence chain.
298
+ workers: nil,
299
+ # Editor mode (`docs/design/20260516-editor-mode.md`).
300
+ # Both must appear together; the runner uses the pair
301
+ # to bind an in-flight buffer file to its logical path.
302
+ tmp_file: nil,
303
+ instead_of: nil,
304
+ # ADR-22 — baseline filter. `:unset` means "fall through
305
+ # to `.rigor.yml`'s `baseline:` key"; a String overrides
306
+ # the config; `false` (from `--no-baseline`) suppresses
307
+ # any baseline that the config might name.
308
+ baseline: :unset,
309
+ # ADR-22 slice 5 — `--baseline-strict` CI gate: fail the
310
+ # run on any baseline drift, in either direction.
311
+ baseline_strict: false,
312
+ # ADR-32 WD10 carry-over — `--treat-all-as-inline-rbs`
313
+ # forces the `rigor-rbs-inline` plugin into the loaded
314
+ # plugin set with `require_magic_comment: false` so a
315
+ # single ad-hoc `rigor check` invocation treats every
316
+ # analysed file as inline-RBS without the user editing
317
+ # `.rigor.yml`. Intended for single-file / ad-hoc CI use;
318
+ # ordinary projects should configure the plugin in
319
+ # `.rigor.yml`.
320
+ treat_all_as_inline_rbs: false,
321
+ # ADR-46 — the incremental-analysis acceptance gate. Runs a
322
+ # baseline analysis, re-analyzes a subset and serves the rest from
323
+ # the per-file cache, and asserts the merged diagnostics are
324
+ # byte-identical to a full `--no-cache` run. Exits non-zero on any
325
+ # mismatch. Off by default.
326
+ verify_incremental: false,
327
+ # ADR-46 — cross-process incremental analysis. With a disk snapshot
328
+ # of the prior run's per-file diagnostics + dependency graph,
329
+ # re-analyzes only the changed closure and serves the rest from the
330
+ # snapshot. Off by default.
331
+ incremental: false,
332
+ # ADR-51 WD7 — CI auto-detection. When the default `text` format is
333
+ # in effect and a first-class CI is detected (GitHub Actions /
334
+ # TeamCity), also emit that platform's native annotations on top of
335
+ # the human output; for GitLab / reviewdog-routed CIs, print a
336
+ # one-line hint. On by default; `--no-ci-detect` (or
337
+ # `RIGOR_CI_DETECT=0`) disables it.
338
+ ci_detect: true
339
+ }
340
+ parser = OptionParser.new do |opts| # rubocop:disable Metrics/BlockLength
341
+ opts.banner = "Usage: rigor check [options] [paths]"
342
+ opts.on("--config=PATH", "Path to the Rigor configuration file") { |value| options[:config] = value }
343
+ opts.on("--format=FORMAT",
344
+ "Output format: text, json, sarif, github, gitlab, checkstyle, junit, teamcity") do |value|
345
+ options[:format] = value
346
+ end
347
+ opts.on("--explain", "Surface fail-soft fallback events as :info diagnostics") { options[:explain] = true }
348
+ opts.on("--cache-stats", "Print on-disk cache inventory at end of run") { options[:cache_stats] = true }
349
+ opts.on("--clear-cache", "Remove the .rigor/cache directory before running") { options[:clear_cache] = true }
350
+ opts.on("--no-cache", "Disable the persistent cache for this run") { options[:no_cache] = true }
351
+ opts.on("--[no-]stats",
352
+ "Print run summary (files, classes, memory, wall time) to stderr (default: on)") do |value|
353
+ options[:stats] = value
354
+ end
355
+ opts.on("--workers=N", Integer,
356
+ "Dispatch per-file analysis across N Ractor workers (default: 0; sequential)") do |value|
357
+ options[:workers] = value
358
+ end
359
+ Options.add_editor_mode(opts, options)
360
+ opts.on("--baseline=PATH",
361
+ "ADR-22: load baseline from PATH (overrides .rigor.yml `baseline:`)") do |value|
362
+ options[:baseline] = value
363
+ end
364
+ opts.on("--no-baseline",
365
+ "ADR-22: ignore any configured baseline for this run") do
366
+ options[:baseline] = false
367
+ end
368
+ opts.on("--baseline-strict",
369
+ "ADR-22: fail the run on any baseline drift (CI gate)") do
370
+ options[:baseline_strict] = true
371
+ end
372
+ opts.on("--treat-all-as-inline-rbs",
373
+ "ADR-32: force-load rigor-rbs-inline with require_magic_comment: false") do
374
+ options[:treat_all_as_inline_rbs] = true
375
+ end
376
+ opts.on("--verify-incremental",
377
+ "ADR-46: assert incremental analysis matches a full run, then exit") do
378
+ options[:verify_incremental] = true
379
+ end
380
+ opts.on("--incremental",
381
+ "ADR-46: re-analyze only files changed since the last run (cross-process cache)") do
382
+ options[:incremental] = true
383
+ end
384
+ opts.on("--no-ci-detect",
385
+ "ADR-51: do not auto-emit CI-native output when a CI environment is detected") do
386
+ options[:ci_detect] = false
387
+ end
388
+ end
389
+ parser.parse!(@argv)
390
+ options
391
+ end
392
+
393
+ # ADR-32 WD10 carry-over — wraps `Configuration.load` so the
394
+ # CLI's `--treat-all-as-inline-rbs` flag can inject a
395
+ # `rigor-rbs-inline` plugin entry with
396
+ # `require_magic_comment: false` into the loaded plugin
397
+ # set. Re-runs the include-aware YAML load and applies the
398
+ # injection before `Configuration.new` so the new entry
399
+ # follows the normal coercion path. A pre-existing
400
+ # `rigor-rbs-inline` entry (by gem name or `id: rbs-inline`)
401
+ # is removed first so the synthesised entry's
402
+ # `require_magic_comment: false` wins unconditionally.
403
+ def load_check_configuration(options)
404
+ return Configuration.load(options.fetch(:config)) unless options.fetch(:treat_all_as_inline_rbs)
405
+
406
+ path = options.fetch(:config) || Configuration.discover
407
+ data = path && File.exist?(path) ? Configuration.load_with_includes(path) : {}
408
+ data = data.dup
409
+ data["plugins"] = inject_treat_all_as_inline_rbs(Array(data["plugins"]))
410
+ Configuration.new(Configuration::DEFAULTS.merge(data))
411
+ end
412
+
413
+ def inject_treat_all_as_inline_rbs(entries)
414
+ filtered = entries.reject { |entry| rigor_rbs_inline_entry?(entry) }
415
+ filtered + [{
416
+ "gem" => "rigor-rbs-inline",
417
+ "id" => "rbs-inline",
418
+ "config" => { "require_magic_comment" => false }
419
+ }]
420
+ end
421
+
422
+ def rigor_rbs_inline_entry?(entry)
423
+ case entry
424
+ when String
425
+ entry == "rigor-rbs-inline"
426
+ when Hash
427
+ string_keyed = entry.to_h { |k, v| [k.to_s, v] }
428
+ string_keyed["gem"] == "rigor-rbs-inline" || string_keyed["id"] == "rbs-inline"
429
+ else
430
+ false
431
+ end
432
+ end
433
+
434
+ def handle_clear_cache(cache_root)
435
+ if File.directory?(cache_root)
436
+ FileUtils.rm_rf(cache_root)
437
+ @out.puts("Cleared cache: #{cache_root}")
438
+ else
439
+ @out.puts("Cache already empty: #{cache_root}")
440
+ end
441
+ end
442
+
443
+ # Emits the {Analysis::RunStats} summary to STDERR so it
444
+ # doesn't interleave with the diagnostic stream (text or
445
+ # JSON) on STDOUT. JSON consumers can pipe stdout cleanly;
446
+ # interactive users still see the summary on their tty.
447
+ def write_run_stats(stats)
448
+ @err.puts("")
449
+ stats.format(@err)
450
+ end
451
+
452
+ # Opt-in developer diagnostics printed after the run: the
453
+ # inference-cutoff trace (RIGOR_BUDGET_TRACE) and the heap-attribution
454
+ # profile (RIGOR_HEAP_PROFILE). Each gates itself, so this is a no-op
455
+ # on a normal run.
456
+ def write_trace_appendices
457
+ write_budget_trace
458
+ write_heap_profile
459
+ end
460
+
461
+ # Dumps the opt-in inference-cutoff counters (RIGOR_BUDGET_TRACE).
462
+ # These are the hard-coded "budget" guards that silently degrade
463
+ # to `Dynamic[top]` / a fallback bound — counting them shows where
464
+ # inference actually stopped. Process-global counters: meaningful
465
+ # only on a single-process run (`--workers 0`), since they do not
466
+ # cross fork boundaries.
467
+ def write_budget_trace
468
+ return unless Inference::BudgetTrace.enabled?
469
+
470
+ counts = Inference::BudgetTrace.snapshot
471
+ @err.puts("")
472
+ @err.puts("Inference cutoffs (RIGOR_BUDGET_TRACE; --workers 0 for an exact count)")
473
+ @err.puts(" recursion-guard hits: #{counts[Inference::BudgetTrace::RECURSION_GUARD]}")
474
+ @err.puts(" ancestor-walk-limit hits: #{counts[Inference::BudgetTrace::ANCESTOR_WALK_LIMIT]}")
475
+ @err.puts(" hkt-fuel-exhausted hits: #{counts[Inference::BudgetTrace::HKT_FUEL_EXHAUSTED]}")
476
+ write_budget_distributions
477
+ end
478
+
479
+ # Dumps the read-only size distributions (ADR-41 Slice 2a). These
480
+ # observe how large unions actually get, with no cap enforced — the
481
+ # data the `union_size` budget default should be chosen from. The
482
+ # `over` thresholds bracket the TypeProf prior (10) and Rigor's spec
483
+ # default (24).
484
+ def write_budget_distributions
485
+ summary = Inference::BudgetTrace.summarize(Inference::BudgetTrace::UNION_ARITY, over: [10, 24, 40])
486
+ pct = summary[:percentiles]
487
+ @err.puts(" union arity: n=#{summary[:count]} max=#{summary[:max]} " \
488
+ "p50=#{pct[:p50]} p90=#{pct[:p90]} p99=#{pct[:p99]}")
489
+ over = summary[:over]
490
+ @err.puts(" unions ≥10: #{over[10]} ≥24: #{over[24]} ≥40: #{over[40]}")
491
+ end
492
+
493
+ # Dumps a live-heap class breakdown (RIGOR_HEAP_PROFILE) — retained
494
+ # objects by class after a forced GC, ranked by total memsize. The
495
+ # tool for attributing where the analyzer's resident memory goes
496
+ # (ADR-41 Slice 2b): it answers whether the heap is type carriers,
497
+ # RBS objects, Prism nodes, or fact-store Hashes/Strings. Walking the
498
+ # whole heap is slow — a dev probe, not a normal diagnostic. Run
499
+ # single-process (`--workers 0`) so the parent heap is the analysis
500
+ # heap; the gem is required lazily so a normal run never loads it.
501
+ def write_heap_profile
502
+ return if ENV["RIGOR_HEAP_PROFILE"].to_s.empty?
503
+
504
+ by_class, total = tally_live_heap
505
+ @err.puts("")
506
+ @err.puts("Heap profile (RIGOR_HEAP_PROFILE; live objects after GC, by class)")
507
+ @err.puts(" total tracked: #{heap_mb(total)} across #{by_class.size} classes")
508
+ by_class.sort_by { |_, (_, bytes)| -bytes }.first(30).each do |name, (count, bytes)|
509
+ @err.puts(" #{heap_mb(bytes).rjust(10)} #{count.to_s.rjust(9)} obj #{name}")
510
+ end
511
+ write_string_allocation_sites
512
+ end
513
+
514
+ # Loads the analysis-path dependencies lazily (so non-check commands
515
+ # stay light) and starts heap-allocation tracing if requested, before
516
+ # any analysis object is allocated.
517
+ def load_check_dependencies
518
+ require_relative "../analysis/runner"
519
+ require_relative "../analysis/buffer_binding"
520
+ require_relative "../analysis/baseline"
521
+ require_relative "../cache/store"
522
+ start_heap_trace_if_requested
523
+ end
524
+
525
+ # Starts allocation tracing (RIGOR_HEAP_TRACE) as early as possible so
526
+ # the heap profile can attribute retained Strings to their allocation
527
+ # `file:line`. Very high overhead — run on a small file subset only.
528
+ def start_heap_trace_if_requested
529
+ return if ENV["RIGOR_HEAP_TRACE"].to_s.empty?
530
+
531
+ require "objspace"
532
+ ObjectSpace.trace_object_allocations_start
533
+ end
534
+
535
+ # When RIGOR_HEAP_TRACE is on, groups the live String objects by their
536
+ # allocation site (`sourcefile:sourceline`) and prints the top sites by
537
+ # count — pinpointing which engine code retains the millions of strings
538
+ # that dominate the large-app heap (ADR-41 Slice 2b). Strings allocated
539
+ # before tracing started report `(pre-trace)`.
540
+ def write_string_allocation_sites
541
+ return if ENV["RIGOR_HEAP_TRACE"].to_s.empty?
542
+
543
+ by_site = Hash.new(0)
544
+ ObjectSpace.each_object(String) do |str|
545
+ file = ObjectSpace.allocation_sourcefile(str)
546
+ line = ObjectSpace.allocation_sourceline(str)
547
+ by_site[file ? "#{file}:#{line}" : "(pre-trace)"] += 1
548
+ end
549
+ @err.puts("")
550
+ @err.puts(" String allocation sites (top 25 by live count)")
551
+ by_site.sort_by { |_, n| -n }.first(25).each do |site, n|
552
+ @err.puts(" #{n.to_s.rjust(9)} #{site}")
553
+ end
554
+ end
555
+
556
+ # Walks the whole live heap (after a forced GC) and tallies
557
+ # `{class_name => [count, memsize]}` plus the grand total. Returns
558
+ # `[by_class, total]`. Slow — a dev probe only.
559
+ def tally_live_heap
560
+ require "objspace"
561
+ GC.start
562
+ by_class = Hash.new { |h, k| h[k] = [0, 0] }
563
+ total = 0
564
+ ObjectSpace.each_object do |obj|
565
+ size = ObjectSpace.memsize_of(obj)
566
+ entry = by_class[heap_class_name(obj)]
567
+ entry[0] += 1
568
+ entry[1] += size
569
+ total += size
570
+ end
571
+ [by_class, total]
572
+ end
573
+
574
+ def heap_class_name(obj)
575
+ klass = Object.instance_method(:class).bind_call(obj)
576
+ klass.name || klass.inspect
577
+ rescue StandardError
578
+ "(unknown)"
579
+ end
580
+
581
+ def heap_mb(bytes)
582
+ Kernel.format("%.1f MB", bytes / 1_048_576.0)
583
+ end
584
+
585
+ def write_cache_stats(cache_root, runtime_store)
586
+ inv = Cache::Store.disk_inventory(root: cache_root)
587
+
588
+ @out.puts("")
589
+ @out.puts("Cache (root: #{inv.fetch(:root)})")
590
+ schema = inv.fetch(:schema_version)
591
+ @out.puts(" schema_version: #{schema.nil? ? 'absent' : schema}")
592
+ write_disk_inventory(inv)
593
+ write_runtime_stats(runtime_store) if runtime_store
594
+ end
595
+
596
+ def write_disk_inventory(inv)
597
+ if inv.fetch(:total_entries).zero?
598
+ @out.puts(" (empty)")
599
+ return
600
+ end
601
+
602
+ @out.puts(" #{inv.fetch(:total_entries)} entries, #{format_bytes(inv.fetch(:total_bytes))}")
603
+ inv.fetch(:producers).each do |producer|
604
+ bytes = format_bytes(producer.fetch(:bytes))
605
+ @out.puts(" #{producer.fetch(:id)}: #{producer.fetch(:entries)} entries, #{bytes}")
606
+ end
607
+ end
608
+
609
+ def write_runtime_stats(store)
610
+ stats = store.stats
611
+ hits = stats.fetch(:hits)
612
+ misses = stats.fetch(:misses)
613
+ writes = stats.fetch(:writes)
614
+ @out.puts(" this run: #{hits} #{plural(hits, 'hit')}, " \
615
+ "#{misses} #{plural(misses, 'miss', 'misses')}, " \
616
+ "#{writes} #{plural(writes, 'write')}")
617
+ stats.fetch(:by_producer).each do |id, counts|
618
+ @out.puts(" #{id}: #{counts.fetch(:hits)} #{plural(counts.fetch(:hits), 'hit')}, " \
619
+ "#{counts.fetch(:misses)} #{plural(counts.fetch(:misses), 'miss', 'misses')}, " \
620
+ "#{counts.fetch(:writes)} #{plural(counts.fetch(:writes), 'write')}")
621
+ end
622
+ end
623
+
624
+ def plural(count, singular, plural = "#{singular}s")
625
+ count == 1 ? singular : plural
626
+ end
627
+
628
+ def format_bytes(bytes)
629
+ return "#{bytes} B" if bytes < 1024
630
+ return format("%.1f KiB", bytes / 1024.0) if bytes < 1024 * 1024
631
+
632
+ format("%.1f MiB", bytes / (1024.0 * 1024.0))
633
+ end
634
+
635
+ def write_result(result, format)
636
+ case format
637
+ when "json"
638
+ @out.puts(JSON.pretty_generate(result.to_h))
639
+ when "text"
640
+ write_text_result(result)
641
+ when ->(fmt) { CLI::DiagnosticFormats.supports?(fmt) }
642
+ # ADR-51 — CI-native renderings (SARIF / GitHub Actions commands /
643
+ # GitLab Code Quality). The `github` form is empty when there are no
644
+ # diagnostics; the JSON forms always carry a document.
645
+ output = CLI::DiagnosticFormats.render(result, format)
646
+ @out.puts(output) unless output.empty?
647
+ else
648
+ raise OptionParser::InvalidArgument, "unsupported format: #{format}"
649
+ end
650
+ end
651
+
652
+ # ADR-51 WD7 — CI auto-detection. Only augments the default human
653
+ # (`text`) output: an explicit `--format` means the caller is in control
654
+ # and is left untouched. For a first-class stdout-native CI (GitHub
655
+ # Actions / TeamCity) the platform's annotations are emitted on top of
656
+ # the text output (so the human log AND the inline surface both appear,
657
+ # like PHPStan's CI-detecting table formatter). For GitLab (native but
658
+ # artifact-based) and the reviewdog-routed CIs, a one-line hint goes to
659
+ # stderr — but only when there are diagnostics, so a clean run stays
660
+ # quiet.
661
+ def emit_ci_detected_output(result, options)
662
+ return unless options.fetch(:ci_detect)
663
+ return unless options.fetch(:format) == "text"
664
+
665
+ platform = CLI::CiDetector.detect
666
+ return if platform.nil?
667
+
668
+ if platform.native_stdout?
669
+ output = CLI::DiagnosticFormats.render(result, platform.format)
670
+ @out.puts(output) unless output.empty?
671
+ elsif !result.success? || result.diagnostics.any?
672
+ @err.puts(ci_detected_hint(platform))
673
+ end
674
+ end
675
+
676
+ def ci_detected_hint(platform)
677
+ tail = "see `rigor skill print rigor-ci-setup`"
678
+ if platform.native_artifact?
679
+ "rigor: #{platform.name} detected — for the inline report run " \
680
+ "`rigor check --format #{platform.format}` and publish it as the platform's report artifact (#{tail})."
681
+ else
682
+ "rigor: #{platform.name} detected — Rigor has no native format for it; pipe " \
683
+ "`rigor check --format checkstyle` through reviewdog, or use `--format junit` (#{tail})."
684
+ end
685
+ end
686
+
687
+ # Text output adds a one-line summary so users see the
688
+ # diagnostic-count immediately. The summary distinguishes
689
+ # the success and failure cases and reports the affected
690
+ # file count for failures.
691
+ def write_text_result(result)
692
+ result.diagnostics.each { |diagnostic| @out.puts(diagnostic) }
693
+
694
+ if result.success?
695
+ @out.puts("No diagnostics") if result.diagnostics.empty?
696
+ return
697
+ end
698
+
699
+ error_files = result.diagnostics.select(&:error?).map(&:path).uniq.size
700
+ @out.puts("")
701
+ @out.puts("#{result.error_count} error(s) in #{error_files} file(s)")
702
+ end
703
+ end
704
+ end
705
+ end