rigortype 0.1.5 → 0.1.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (72) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +76 -79
  3. data/lib/rigor/analysis/baseline.rb +347 -0
  4. data/lib/rigor/analysis/buffer_binding.rb +36 -0
  5. data/lib/rigor/analysis/check_rules.rb +68 -3
  6. data/lib/rigor/analysis/dependency_source_inference/index.rb +14 -1
  7. data/lib/rigor/analysis/dependency_source_inference/return_type_heuristic.rb +105 -0
  8. data/lib/rigor/analysis/dependency_source_inference/walker.rb +32 -12
  9. data/lib/rigor/analysis/project_scan.rb +39 -0
  10. data/lib/rigor/analysis/runner.rb +309 -22
  11. data/lib/rigor/analysis/worker_session.rb +14 -2
  12. data/lib/rigor/builtins/hkt_builtins.rb +342 -0
  13. data/lib/rigor/builtins/static_return_refinements.rb +142 -0
  14. data/lib/rigor/cache/store.rb +33 -3
  15. data/lib/rigor/cli/baseline_command.rb +377 -0
  16. data/lib/rigor/cli/lsp_command.rb +129 -0
  17. data/lib/rigor/cli/type_of_command.rb +44 -5
  18. data/lib/rigor/cli.rb +142 -13
  19. data/lib/rigor/configuration.rb +58 -2
  20. data/lib/rigor/environment/hkt_registry_holder.rb +33 -0
  21. data/lib/rigor/environment/rbs_coverage_report.rb +1 -1
  22. data/lib/rigor/environment/rbs_loader.rb +67 -2
  23. data/lib/rigor/environment/reporters.rb +40 -0
  24. data/lib/rigor/environment.rb +119 -9
  25. data/lib/rigor/flow_contribution/fact.rb +20 -10
  26. data/lib/rigor/inference/acceptance.rb +48 -3
  27. data/lib/rigor/inference/expression_typer.rb +64 -2
  28. data/lib/rigor/inference/hkt_body.rb +171 -0
  29. data/lib/rigor/inference/hkt_body_parser.rb +363 -0
  30. data/lib/rigor/inference/hkt_reducer.rb +256 -0
  31. data/lib/rigor/inference/hkt_registry.rb +223 -0
  32. data/lib/rigor/inference/method_dispatcher/overload_selector.rb +125 -30
  33. data/lib/rigor/inference/method_dispatcher/rbs_dispatch.rb +32 -11
  34. data/lib/rigor/inference/method_dispatcher/receiver_affinity.rb +87 -0
  35. data/lib/rigor/inference/method_dispatcher.rb +174 -6
  36. data/lib/rigor/inference/narrowing.rb +103 -1
  37. data/lib/rigor/inference/project_patched_methods.rb +70 -0
  38. data/lib/rigor/inference/project_patched_scanner.rb +210 -0
  39. data/lib/rigor/inference/scope_indexer.rb +209 -19
  40. data/lib/rigor/inference/statement_evaluator.rb +172 -11
  41. data/lib/rigor/inference/synthetic_method_scanner.rb +94 -16
  42. data/lib/rigor/language_server/buffer_table.rb +63 -0
  43. data/lib/rigor/language_server/completion_provider.rb +438 -0
  44. data/lib/rigor/language_server/debouncer.rb +86 -0
  45. data/lib/rigor/language_server/diagnostic_publisher.rb +167 -0
  46. data/lib/rigor/language_server/document_symbol_provider.rb +142 -0
  47. data/lib/rigor/language_server/folding_range_provider.rb +75 -0
  48. data/lib/rigor/language_server/hover_provider.rb +74 -0
  49. data/lib/rigor/language_server/hover_renderer.rb +312 -0
  50. data/lib/rigor/language_server/loop.rb +71 -0
  51. data/lib/rigor/language_server/project_context.rb +145 -0
  52. data/lib/rigor/language_server/selection_range_provider.rb +93 -0
  53. data/lib/rigor/language_server/server.rb +384 -0
  54. data/lib/rigor/language_server/signature_help_provider.rb +249 -0
  55. data/lib/rigor/language_server/synchronized_writer.rb +28 -0
  56. data/lib/rigor/language_server/uri.rb +40 -0
  57. data/lib/rigor/language_server.rb +29 -0
  58. data/lib/rigor/plugin/base.rb +63 -0
  59. data/lib/rigor/plugin/macro/heredoc_template.rb +127 -13
  60. data/lib/rigor/plugin/macro/trait_registry.rb +1 -1
  61. data/lib/rigor/plugin/manifest.rb +54 -7
  62. data/lib/rigor/plugin/registry.rb +19 -0
  63. data/lib/rigor/rbs_extended/hkt_directives.rb +326 -0
  64. data/lib/rigor/rbs_extended.rb +82 -2
  65. data/lib/rigor/sig_gen/generator.rb +12 -3
  66. data/lib/rigor/type/app.rb +107 -0
  67. data/lib/rigor/type.rb +1 -0
  68. data/lib/rigor/version.rb +1 -1
  69. data/sig/rigor/environment.rbs +10 -4
  70. data/sig/rigor/inference.rbs +2 -0
  71. data/sig/rigor.rbs +4 -1
  72. metadata +56 -1
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b5960ec17b35768103e97d752f8cc6fd78fcb3f12e12fc43dfa41be07ec5317b
4
- data.tar.gz: e79c9b25c973c8938e9b2f0a2741cca5195342619827b320ca521ec09e54321e
3
+ metadata.gz: 0eaff9cf0ef65d44ceb3666a23fb77003a3dbb0361d890e1d2991ef6539499de
4
+ data.tar.gz: e7fdc58be21409504965f35479559d26bcf4726ba0feabe3fd5128bcffe8419b
5
5
  SHA512:
6
- metadata.gz: af1e033a25410c0f87943f12d43ab18a3a0d2a79c01307c2117c2fc15be4c9db3cb28e6fec10ce598ef6a5bfa063227f280c023a0f7e9025b06c69946df4654d
7
- data.tar.gz: 351b3275dd35f37a11d30a696627e23a6cdca31bfb94fa3eacae762d2de624e4a914c6f8f4eebdd7df0bd17fd9fac13fe55ac434957509b742771a00d352a981
6
+ metadata.gz: 94aae7605ca3243e7226e6f2e1c844f141d3ef04995751718e08ef5fb9dfa550455c6c87420e731332b765ee262442ed2608b5f0d7b05a25b982615b993114e5
7
+ data.tar.gz: f2dedba8fb33b9f7d98ddaa4debcec042edf56396c22a791ce8897736839c559240cb20158916f7c2bc5f483da06c1bebb7212ec2f24b16c3554164f849da621
data/README.md CHANGED
@@ -15,13 +15,32 @@ for any class it can find, and reports a small but trustworthy
15
15
  catalogue of bugs (undefined methods on typed receivers, wrong
16
16
  positional arity, provable `Integer / 0`, …).
17
17
 
18
- The differentiator is a richer type vocabulary than ordinary
19
- RBS expresses. Rigor reasons about *what values an expression
20
- actually produces* literal values, integer ranges,
21
- refinement-type carriers, per-position tuple / hash shapes
22
- not just *which class an object belongs to*. See **[Beyond
23
- `Integer` and `String`](#beyond-integer-and-string-rigors-richer-type-vocabulary)**
24
- for the full type-model story; the short pitch is below.
18
+ **Two design commitments drive Rigor.**
19
+
20
+ 1. **Types are facts, not wishes.** Hand-written type
21
+ annotations drift from the implementation the moment they
22
+ are written. Rigor infers from the code itself every
23
+ carrier in its type vocabulary is derived from what your
24
+ source actually produces, not from a signature you authored
25
+ and might forget to update. When you do want RBS in
26
+ `sig/`, [`rigor sig-gen`](docs/adr/14-rbs-sig-generation.md)
27
+ emits it from inference results so the written form starts
28
+ in sync with reality, and `tighter-return` candidates flag
29
+ the cases where an existing `.rbs` is already weaker than
30
+ what the implementation provably returns.
31
+ 2. **Programmable inference beyond unions.** A plain union
32
+ (`Integer | nil`) is not the type story Ruby needs. Rigor
33
+ reasons about *what values an expression actually
34
+ produces* — literal values, integer ranges, refinement
35
+ carriers, per-position tuple / hash shapes, bound-method
36
+ bindings — and exposes a plugin extension API plus an
37
+ [ADR-16](docs/adr/16-macro-expansion.md) macro / DSL
38
+ expansion substrate so Rails-shape DSLs are first-class
39
+ type sources rather than analysis blind spots.
40
+
41
+ See **[Beyond `Integer` and `String`](#beyond-integer-and-string-rigors-richer-type-vocabulary)**
42
+ for the full type-model story; the carrier-zoo table is the
43
+ short pitch.
25
44
 
26
45
  When you want tighter types than RBS expresses, refine them
27
46
  through the
@@ -426,19 +445,20 @@ plugin-supplied type-vocabulary resolvers, and
426
445
  [ADR-16](docs/adr/16-macro-expansion.md) macro / DSL expansion
427
446
  substrate (declarative Tier A block-as-method / Tier B
428
447
  trait-inlining-registry / Tier C heredoc-template / Tier D
429
- external-file inclusion). **Twenty-four worked examples** ship
430
- under [`examples/`](examples/) — each is a fully-shaped plugin
431
- gem with a runnable demo and an end-to-end integration spec.
448
+ external-file inclusion). Production plugins ship under
449
+ [`plugins/`](plugins/) — each is a fully-shaped plugin gem
450
+ with a runnable demo and an end-to-end integration spec.
451
+ Plugin-contract walkthroughs (deliberately simplified
452
+ virtual use cases that spotlight one architectural surface
453
+ per example) live under [`examples/`](examples/).
432
454
 
433
- **Plugin-contract teaching examples** (focus on a single
434
- extension-point):
455
+ **Plugin-contract walkthroughs** (`examples/`, focus on a
456
+ single extension-point):
435
457
 
436
458
  - [`rigor-deprecations`](examples/rigor-deprecations/) —
437
459
  smallest possible plugin (~80 lines); config-driven rules.
438
460
  - [`rigor-lisp-eval`](examples/rigor-lisp-eval/) — typing literal
439
461
  AST arguments at a method call.
440
- - [`rigor-statesman`](examples/rigor-statesman/) — two-pass DSL
441
- analysis (collect declarations, then validate references).
442
462
  - [`rigor-pattern`](examples/rigor-pattern/) — plugin →
443
463
  analyzer collaboration via `Scope#type_of` and the
444
464
  literal-string carrier.
@@ -446,7 +466,13 @@ extension-point):
446
466
  tracking through arithmetic.
447
467
  - [`rigor-routes`](examples/rigor-routes/) — `Plugin::IoBoundary`
448
468
  reads under `TrustPolicy` plus cache producers.
449
- - [`rigor-typescript-utility-types`](examples/rigor-typescript-utility-types/)
469
+
470
+ **Other production plugins for type-language extension** (`plugins/`):
471
+
472
+ - [`rigor-statesman`](plugins/rigor-statesman/) — two-pass DSL
473
+ analysis (collect declarations, then validate references)
474
+ for the Statesman state-machine gem.
475
+ - [`rigor-typescript-utility-types`](plugins/rigor-typescript-utility-types/)
450
476
  — `Plugin::TypeNodeResolver` chain wiring TS-canonical names
451
477
  (`Pick` / `Omit` / `Partial` / `Required` / `Readonly`) onto
452
478
  Rigor's shape-projection type functions.
@@ -454,16 +480,16 @@ extension-point):
454
480
  **Macro expansion substrate consumers** (ADR-16 — declarative
455
481
  manifest entries, no walker code):
456
482
 
457
- - [`rigor-sinatra`](examples/rigor-sinatra/) — **Tier A**
483
+ - [`rigor-sinatra`](plugins/rigor-sinatra/) — **Tier A**
458
484
  block-as-method. Recognises Sinatra's nine class-level HTTP
459
485
  verb methods and narrows the route block's `self_type` so
460
486
  bare `params` / `redirect` / `halt` resolve through
461
487
  `Sinatra::Base`'s RBS.
462
- - [`rigor-dry-struct`](examples/rigor-dry-struct/) — **Tier C**
488
+ - [`rigor-dry-struct`](plugins/rigor-dry-struct/) — **Tier C**
463
489
  heredoc-template. Synthesises a reader on every `Dry::Struct`
464
490
  subclass for each `attribute :name, T` / `attribute? :name, T`
465
491
  call.
466
- - [`rigor-devise`](examples/rigor-devise/) — **Tier B**
492
+ - [`rigor-devise`](plugins/rigor-devise/) — **Tier B**
467
493
  trait-inlining registry mirroring `lib/devise/modules.rb`.
468
494
  Each `devise :strategy_a, :strategy_b` call explodes the
469
495
  included module's RBS instance methods onto the calling model
@@ -472,28 +498,30 @@ manifest entries, no walker code):
472
498
 
473
499
  **Rails ecosystem plugins** (Tier 1 + Tier 2 + Tier 3 + Sorbet):
474
500
 
475
- - Tier 1: [`rigor-rails-routes`](examples/rigor-rails-routes/),
476
- [`rigor-rails-i18n`](examples/rigor-rails-i18n/),
477
- [`rigor-actionmailer`](examples/rigor-actionmailer/),
478
- [`rigor-activejob`](examples/rigor-activejob/).
479
- - Tier 2: [`rigor-actionpack`](examples/rigor-actionpack/)
501
+ - Tier 1: [`rigor-rails-routes`](plugins/rigor-rails-routes/),
502
+ [`rigor-rails-i18n`](plugins/rigor-rails-i18n/),
503
+ [`rigor-actionmailer`](plugins/rigor-actionmailer/),
504
+ [`rigor-activejob`](plugins/rigor-activejob/).
505
+ - Tier 2: [`rigor-actionpack`](plugins/rigor-actionpack/)
480
506
  (4 phases — routes / filters / renders / strong-params),
481
- [`rigor-factorybot`](examples/rigor-factorybot/),
482
- [`rigor-activerecord`](examples/rigor-activerecord/) —
507
+ [`rigor-factorybot`](plugins/rigor-factorybot/),
508
+ [`rigor-activerecord`](plugins/rigor-activerecord/) —
483
509
  publishes `:model_index` via ADR-9 for the other two
484
510
  to consume.
485
- - Tier 3: [`rigor-pundit`](examples/rigor-pundit/),
486
- [`rigor-sidekiq`](examples/rigor-sidekiq/),
487
- [`rigor-rspec`](examples/rigor-rspec/),
488
- [`rigor-actioncable`](examples/rigor-actioncable/).
489
- - Parallel: [`rigor-sorbet`](examples/rigor-sorbet/) — ingests
511
+ - Tier 3: [`rigor-pundit`](plugins/rigor-pundit/),
512
+ [`rigor-sidekiq`](plugins/rigor-sidekiq/),
513
+ [`rigor-rspec`](plugins/rigor-rspec/),
514
+ [`rigor-actioncable`](plugins/rigor-actioncable/).
515
+ - Parallel: [`rigor-sorbet`](plugins/rigor-sorbet/) — ingests
490
516
  Sorbet `sig` / `T.let` / `T.cast` / `T.must` / `T.bind` /
491
517
  `T.assert_type!` / `T.reveal_type` / `T.absurd` and RBI
492
518
  files as type sources.
493
519
 
494
- [`examples/README.md`](examples/README.md) is the plugin
495
- authoring landing page comparison table, recommended reading
496
- order, and the architectural map of which surface each example
520
+ [`plugins/README.md`](plugins/README.md) is the production
521
+ plugin catalogue (Rails / RSpec / dry-rb / Sorbet / etc.) and
522
+ [`examples/README.md`](examples/README.md) is the walkthrough
523
+ catalogue — comparison table, recommended reading order, and
524
+ the architectural map of which surface each walkthrough
497
525
  exercises. The binding contract for the plugin API lives in
498
526
  [`docs/adr/2-extension-api.md`](docs/adr/2-extension-api.md);
499
527
  the slice-by-slice normative specs are under
@@ -532,7 +560,7 @@ Common knobs the file exposes:
532
560
 
533
561
  ## Status
534
562
 
535
- Current released version: **`v0.1.4`**. The analyzer is usable
563
+ Current released version: **`v0.1.5`**. The analyzer is usable
536
564
  on real Ruby code today; the rule catalogue is deliberately
537
565
  narrow — Rigor's stance is to surface zero false positives
538
566
  while the inference surface stabilises. Forward-looking commitments
@@ -540,50 +568,21 @@ while the inference surface stabilises. Forward-looking commitments
540
568
  [`docs/ROADMAP.md`](docs/ROADMAP.md); the release-by-release
541
569
  "what shipped" record is [`CHANGELOG.md`](CHANGELOG.md).
542
570
 
543
- `v0.1.4` (released 2026-05-14) delivered:
544
-
545
- - **[ADR-10](docs/adr/10-dependency-source-inference.md) closed
546
- end-to-end** — opt-in gem-source inference, per-gem budget,
547
- cache slice, and the `dynamic.dependency-source.boundary-cross`
548
- `:info` diagnostic that surfaces RBS / gem-source overlap
549
- under `mode: :full`.
550
- - **[ADR-11](docs/adr/11-sorbet-input-adapter.md) primary surface
551
- + per-call-site assertion gating** — `rigor-sorbet` ingests
552
- Sorbet `sig { ... }` blocks, `T.let` / `T.cast` / `T.must` /
553
- `T.bind` / `T.assert_type!` / `T.reveal_type` / `T.absurd`,
554
- and RBI files. Per-call-site `enforce_sigil` gates assertion
555
- recognisers by the caller file's `# typed:` sigil.
556
- - **[ADR-13](docs/adr/13-typenode-resolver-plugin.md) plugin
557
- TypeNode resolver + TypeScript-utility-type adapter** —
558
- `Plugin::TypeNodeResolver` extension point + five
559
- Rigor-canonical shape-projection type functions
560
- (`pick_of` / `omit_of` / `partial_of` / `required_of` /
561
- `readonly_of`) + the opt-in `rigor-typescript-utility-types`
562
- plugin mapping TS spellings onto the core functions.
563
- `Pick[T, :a | :b]` round-trips through the directive grammar.
564
- - **[ADR-14](docs/adr/14-rbs-sig-generation.md) — `rigor sig-gen`
565
- CLI** — emits RBS from inference results across five
566
- classifications (`new-file` / `new-method` / `tighter-return`
567
- / `equivalent` / `skipped`); `--params=untyped` default,
568
- `--params=observed` opt-in via `--observe=PATH`.
569
- - **`Method` carrier (`Type::BoundMethod`)** —
570
- `Object#method(:sym).call` / `.()` / `[]` round-trip with
571
- full precision instead of collapsing to `untyped`.
572
- - **Rails ecosystem (Tier 1 + Tier 2)** — `rigor-rails-routes`,
573
- `rigor-rails-i18n`, `rigor-actionmailer`, `rigor-activejob`,
574
- `rigor-actionpack` (4 phases), `rigor-factorybot`, and
575
- `rigor-activerecord` publishing `:model_index` via the
576
- ADR-9 cross-plugin fact channel.
577
-
578
- Twenty-four worked plugin examples now ship under
571
+ `v0.1.5` (released 2026-05-16) delivered (full slice list in `CHANGELOG.md` § `[0.1.5]`):
572
+
573
+ - **ADR-15 Ractor migration end-to-end** (Phases 1–4c + 4b.x) — opt-in `rigor check --workers=N` parallelism; pool ≡ sequential proven on 14 real-world projects (31,840 files); spec-suite wall-clock 162s → 27s on 12 cores via `parallel_tests`.
574
+ - **[ADR-16](docs/adr/16-macro-expansion.md) macro / DSL expansion substrate** — four-tier declarative manifest contract (block-as-method, trait-inlining registry, heredoc-template, external-file) with Tier B/C precision promotion and three worked consumer plugins (`rigor-sinatra`, `rigor-devise`, `rigor-dry-struct`). Closes ROADMAP O2 at the WD13 floor.
575
+ - **Real-world Rails / Ruby survey** — fourteen projects swept; opt-in `rigor-activesupport-core-ext` RBS bundle delivers `−75 %` total diagnostics; built-in vendored gem RBS for six native-extension gems (`pg` / `mysql2` / `nokogiri` / `bcrypt` / `redis` / `idn-ruby`); Bundler-aware sig discovery; `RbsLoader#env` failure-memo (~550× speedup on a conflicting sig).
576
+ - **O4 Layer 3 target-project RBS source discovery (slices 1+2+3)** — `Gemfile.lock` parse + bundle-sig filter, `rbs_collection.lock.yaml` awareness, missing-gem `:info` diagnostic.
577
+ - **DEFAULT_LIBRARIES stdlib coverage expansion** — out-of-the-box RBS classes available 1,273 → 1,427 (+154); 31 additional stdlib libraries auto-load.
578
+ - **`is_a?(C)` lexical-nesting constant resolution** — predicate-narrowing now mirrors Ruby's `Module.nesting`-driven lookup.
579
+
580
+ Production plugins ship under [`plugins/`](plugins/) (Rails /
581
+ RSpec / dry-rb / Sorbet / etc.) — see
582
+ [`plugins/README.md`](plugins/README.md) for the catalogue.
583
+ Plugin-contract walkthroughs ship under
579
584
  [`examples/`](examples/) — see
580
- [`examples/README.md`](examples/README.md) for the comparison
581
- table. The current `[Unreleased]` cycle on `master` (release
582
- pending) also delivered the [ADR-16](docs/adr/16-macro-expansion.md)
583
- macro / DSL expansion substrate (four-tier declarative
584
- manifest contract + engine integration + Tier B/C precision
585
- promotion); see `CHANGELOG.md` `[Unreleased]` for the full
586
- landing notes.
585
+ [`examples/README.md`](examples/README.md).
587
586
 
588
587
  ## Contributing
589
588
 
@@ -594,5 +593,3 @@ skill documentation contributors should know about.
594
593
  ## License
595
594
 
596
595
  Mozilla Public License Version 2.0. See [`LICENSE`](LICENSE).
597
- </content>
598
- </invoke>
@@ -0,0 +1,347 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "yaml"
4
+
5
+ module Rigor
6
+ module Analysis
7
+ # ADR-22 Slice 1 — PHPStan-shaped per-project baseline.
8
+ #
9
+ # Loads `.rigor-baseline.yml`, filters a current run's
10
+ # diagnostic stream against the recorded buckets, and emits
11
+ # an `(surfaced, silenced_count)` pair for the CLI to render.
12
+ #
13
+ # Two row shapes are accepted (WD1):
14
+ #
15
+ # # rule-ID row — bucket key (path, qualified_rule)
16
+ # - file: app/models/user.rb
17
+ # rule: call.undefined-method
18
+ # count: 3
19
+ #
20
+ # # message-pattern row — bucket key
21
+ # # (path, qualified_rule, message_regex)
22
+ # - file: app/lib/sig.rb
23
+ # rule: call.undefined-method
24
+ # message: "undefined method `merge' for Array"
25
+ # count: 1
26
+ #
27
+ # ## Semantics per (file, rule [, message]) bucket (WD4)
28
+ #
29
+ # actual <= count → ALL diagnostics in the bucket are silenced.
30
+ # actual > count → ALL diagnostics in the bucket surface
31
+ # (not just the excess delta — the bucket
32
+ # has crossed its threshold; the team's
33
+ # review focus shifts from "which N is new"
34
+ # to "what's going on with this rule in
35
+ # this file as a whole").
36
+ #
37
+ # ## Filter pipeline position (WD6)
38
+ #
39
+ # The baseline filter runs LAST among the diagnostic-suppression
40
+ # layers:
41
+ #
42
+ # emit → `# rigor:disable` (per-line)
43
+ # → `# rigor:disable-file`
44
+ # → severity_profile re-stamp
45
+ # → baseline filter (this class)
46
+ # → output
47
+ #
48
+ # ## Loading (WD2 (b))
49
+ #
50
+ # `Baseline.load` is called by the CLI when it has resolved
51
+ # an explicit baseline path (from `--baseline=PATH` on the
52
+ # CLI or `baseline: <path>` in `.rigor.yml`). The presence
53
+ # of `.rigor-baseline.yml` on disk alone never triggers a
54
+ # load — that's the CLI / Configuration's job to enforce.
55
+ class Baseline
56
+ # The bucket key is intentionally tuple-shaped so rule-ID
57
+ # rows and message-pattern rows can coexist in a single
58
+ # multimap. `message` is `nil` for rule-ID rows; a Regexp
59
+ # for message-pattern rows.
60
+ # `count` shadows Struct#count; intentional — `count` is the
61
+ # PHPStan-compatible field name and we don't use the
62
+ # Enumerable-style `Struct#count` on Bucket instances.
63
+ Bucket = Struct.new(:file, :rule, :message_regex, :count, keyword_init: true) # rubocop:disable Lint/StructNewOverride
64
+
65
+ CURRENT_VERSION = 1
66
+
67
+ class << self
68
+ # Load a baseline file from disk. Returns `nil` when the
69
+ # path is nil (the caller's "no baseline configured"
70
+ # state). Raises {LoadError} on malformed content;
71
+ # callers translate to a user-facing diagnostic.
72
+ def load(path)
73
+ return nil if path.nil?
74
+ return new([]) unless File.exist?(path)
75
+
76
+ raw = YAML.safe_load_file(path, permitted_classes: [Symbol])
77
+ parse_loaded(raw, path: path)
78
+ end
79
+
80
+ # Build a baseline from a current run's diagnostic stream.
81
+ # `match_mode:` is `:rule` (default) or `:message`. The
82
+ # message-mode generator passes literal messages through
83
+ # `Regexp.escape` so generated rows never accidentally
84
+ # over-match on punctuation.
85
+ def from_diagnostics(diagnostics, match_mode: :rule)
86
+ raise ArgumentError, "match_mode must be :rule or :message" unless %i[rule message].include?(match_mode)
87
+
88
+ grouped = group_for_baseline(diagnostics, match_mode)
89
+ buckets = grouped.map do |key, entries|
90
+ Bucket.new(
91
+ file: key[0],
92
+ rule: key[1],
93
+ message_regex: key[2],
94
+ count: entries.size
95
+ )
96
+ end
97
+ new(buckets)
98
+ end
99
+
100
+ private
101
+
102
+ def parse_loaded(raw, path:)
103
+ raise LoadError, "#{path}: expected a Hash at top level, got #{raw.class}" unless raw.is_a?(Hash)
104
+
105
+ version = raw["version"]
106
+ unless version == CURRENT_VERSION
107
+ raise LoadError, "#{path}: unsupported `version: #{version.inspect}` (expected #{CURRENT_VERSION})"
108
+ end
109
+
110
+ rows = raw["ignored"] || []
111
+ raise LoadError, "#{path}: `ignored:` must be an Array" unless rows.is_a?(Array)
112
+
113
+ new(rows.each_with_index.map { |row, idx| parse_row(row, path: path, index: idx) })
114
+ end
115
+
116
+ def parse_row(row, path:, index:)
117
+ raise LoadError, "#{path}: ignored[#{index}] must be a Hash" unless row.is_a?(Hash)
118
+
119
+ file = row["file"] or raise LoadError, "#{path}: ignored[#{index}] missing `file:`"
120
+ rule = row["rule"] or raise LoadError, "#{path}: ignored[#{index}] missing `rule:`"
121
+ count = row["count"]
122
+ unless count.is_a?(Integer) && count.positive?
123
+ raise LoadError, "#{path}: ignored[#{index}] `count:` must be a positive Integer (got #{count.inspect})"
124
+ end
125
+
126
+ message_regex = nil
127
+ if (message = row["message"])
128
+ message_regex = compile_message_regex(message, path: path, index: index)
129
+ end
130
+
131
+ Bucket.new(file: file, rule: rule, message_regex: message_regex, count: count)
132
+ end
133
+
134
+ def compile_message_regex(source, path:, index:)
135
+ Regexp.new(source.to_s)
136
+ rescue RegexpError => e
137
+ raise LoadError, "#{path}: ignored[#{index}] `message:` is not a valid Regexp: #{e.message}"
138
+ end
139
+
140
+ # Returns Hash{[file, rule, regex_or_nil] => Array<Diagnostic>}.
141
+ # In message mode, each unique message gets its own bucket;
142
+ # in rule mode, every diagnostic for a (file, rule) pair
143
+ # contributes to a single bucket regardless of message.
144
+ def group_for_baseline(diagnostics, match_mode)
145
+ diagnostics.each_with_object({}) do |diag, into|
146
+ next if diag.qualified_rule.nil?
147
+ next if diag.path.nil?
148
+
149
+ key = case match_mode
150
+ when :rule
151
+ [diag.path, diag.qualified_rule, nil]
152
+ when :message
153
+ [diag.path, diag.qualified_rule, message_pattern_for(diag.message)]
154
+ end
155
+ (into[key] ||= []) << diag
156
+ end
157
+ end
158
+
159
+ # Generates a Regexp source string for the baseline row.
160
+ # The string is `Regexp.escape`d so the YAML round-trip
161
+ # produces a regex that matches the literal message.
162
+ # Users hand-editing the row can replace the escaped
163
+ # form with a pattern.
164
+ def message_pattern_for(message)
165
+ Regexp.new(Regexp.escape(message.to_s))
166
+ end
167
+ end
168
+
169
+ class LoadError < StandardError; end
170
+
171
+ attr_reader :buckets
172
+
173
+ def initialize(buckets)
174
+ @buckets = buckets.freeze
175
+ # For each (file, qualified_rule) pair, two arrays:
176
+ # - rule-ID rows (message_regex == nil)
177
+ # - message-pattern rows (message_regex != nil)
178
+ # The matcher walks message-pattern rows first (tighter
179
+ # match takes precedence); diagnostics that don't match
180
+ # any message row fall through to the rule-ID row if
181
+ # one exists.
182
+ @by_pair = buckets.group_by { |b| [b.file, b.rule] }.freeze
183
+ freeze
184
+ end
185
+
186
+ # Apply the baseline filter to a diagnostic stream.
187
+ #
188
+ # Returns a 2-tuple:
189
+ # - `surfaced` — the diagnostics that survived the filter
190
+ # (new findings + entire over-threshold buckets).
191
+ # - `silenced_count` — how many diagnostics the baseline
192
+ # suppressed (for the WD7 stderr summary line).
193
+ def filter(diagnostics)
194
+ return [diagnostics, 0] if buckets.empty?
195
+
196
+ grouped = group_diagnostics_for_filtering(diagnostics)
197
+ surfaced = []
198
+ silenced_count = 0
199
+
200
+ grouped.each_value do |entries|
201
+ bucket = entries[:bucket]
202
+ diags = entries[:diagnostics]
203
+ # No matching bucket → all surface as new findings.
204
+ # `actual <= count` → all silenced (within threshold,
205
+ # WD4). `actual > count` → all surface (over
206
+ # threshold, WD4).
207
+ if bucket && diags.size <= bucket.count
208
+ silenced_count += diags.size
209
+ else
210
+ surfaced.concat(diags)
211
+ end
212
+ end
213
+
214
+ # Diagnostics that lacked a rule or a path bypass the
215
+ # baseline entirely (the baseline can't address them).
216
+ unkeyable = diagnostics.reject { |d| d.qualified_rule && d.path }
217
+ [surfaced + unkeyable, silenced_count]
218
+ end
219
+
220
+ # A single bucket's drift state for slice 2 inspection.
221
+ # `status` is one of:
222
+ #
223
+ # - `:within` — `actual <= count` (silenced by the filter).
224
+ # - `:over` — `actual > count` (over threshold; surfaced
225
+ # in the regular `rigor check` output).
226
+ # - `:cleared` — `actual == 0` (the bucket can be pruned).
227
+ # - `:reducible` — `0 < actual < count` (the bucket's count
228
+ # can be tightened; future `regenerate`
229
+ # slice 5 handles this).
230
+ DriftRow = Struct.new(:bucket, :actual_count, :status, keyword_init: true) do
231
+ def delta
232
+ actual_count - bucket.count
233
+ end
234
+ end
235
+
236
+ # Walk the current diagnostic stream and report
237
+ # bucket-level drift. Each baseline bucket becomes one
238
+ # DriftRow regardless of whether the current run still
239
+ # matches it.
240
+ #
241
+ # @param diagnostics [Array<Diagnostic>] current run's
242
+ # diagnostic stream (PRE-filter — pass the raw
243
+ # `result.diagnostics` from `Runner#run`, not the
244
+ # post-baseline surface).
245
+ # @return [Array<DriftRow>] one entry per baseline bucket,
246
+ # in baseline-file order.
247
+ def audit(diagnostics)
248
+ counts = Hash.new(0)
249
+ diagnostics.each do |diag|
250
+ next if diag.qualified_rule.nil? || diag.path.nil?
251
+
252
+ bucket = claim_bucket_for(diag)
253
+ counts[bucket_key(bucket)] += 1 if bucket
254
+ end
255
+
256
+ buckets.map do |bucket|
257
+ actual = counts[bucket_key(bucket)]
258
+ DriftRow.new(bucket: bucket, actual_count: actual, status: status_for(actual, bucket.count))
259
+ end
260
+ end
261
+
262
+ # Returns a new Baseline with the given buckets dropped.
263
+ # Used by `rigor baseline prune` (slice 2) to remove
264
+ # cleared buckets (`actual == 0`) from the on-disk file.
265
+ def without(buckets_to_drop)
266
+ dropset = buckets_to_drop.to_set
267
+ self.class.new(buckets.reject { |b| dropset.include?(b) })
268
+ end
269
+
270
+ # Serialise to a YAML string. The generator path writes
271
+ # this through `File.write`; the dump format is stable
272
+ # across versions of this class as long as the bucket
273
+ # shape is unchanged.
274
+ def to_yaml
275
+ rows = buckets.map do |bucket|
276
+ row = { "file" => bucket.file, "rule" => bucket.rule }
277
+ row["message"] = bucket.message_regex.source if bucket.message_regex
278
+ row["count"] = bucket.count
279
+ row
280
+ end
281
+
282
+ document = { "version" => CURRENT_VERSION, "ignored" => rows }
283
+ YAML.dump(document)
284
+ end
285
+
286
+ # The number of buckets recorded. Useful for the CLI
287
+ # summary on `generate`.
288
+ def size
289
+ buckets.size
290
+ end
291
+
292
+ def empty?
293
+ buckets.empty?
294
+ end
295
+
296
+ private
297
+
298
+ def status_for(actual, count)
299
+ return :cleared if actual.zero?
300
+ return :over if actual > count
301
+ return :within if actual == count
302
+
303
+ :reducible
304
+ end
305
+
306
+ def bucket_key(bucket)
307
+ [bucket.file, bucket.rule, bucket.message_regex&.source]
308
+ end
309
+
310
+ def group_diagnostics_for_filtering(diagnostics)
311
+ # First pass: bin each diagnostic into the bucket that
312
+ # claims it. Message-pattern rows take precedence over
313
+ # rule-ID rows because they're more specific. A
314
+ # diagnostic that matches no row goes into a synthetic
315
+ # "no-bucket" bin keyed by (file, rule).
316
+ bins = {}
317
+ diagnostics.each do |diag|
318
+ next if diag.qualified_rule.nil? || diag.path.nil?
319
+
320
+ bucket = claim_bucket_for(diag)
321
+ key = if bucket
322
+ [bucket.file, bucket.rule,
323
+ bucket.message_regex&.source]
324
+ else
325
+ [diag.path, diag.qualified_rule, :__none__]
326
+ end
327
+ bin = (bins[key] ||= { bucket: bucket, diagnostics: [] })
328
+ bin[:diagnostics] << diag
329
+ end
330
+ bins
331
+ end
332
+
333
+ def claim_bucket_for(diagnostic)
334
+ candidates = @by_pair[[diagnostic.path, diagnostic.qualified_rule]]
335
+ return nil if candidates.nil? || candidates.empty?
336
+
337
+ # Tighter (message-pattern) buckets first, then the
338
+ # rule-ID bucket as fallback.
339
+ message_buckets, rule_buckets = candidates.partition(&:message_regex)
340
+ message_buckets.each do |b|
341
+ return b if b.message_regex.match?(diagnostic.message.to_s)
342
+ end
343
+ rule_buckets.first
344
+ end
345
+ end
346
+ end
347
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rigor
4
+ module Analysis
5
+ # Binds one logical project path (the path the user is editing,
6
+ # e.g. `lib/foo.rb`) to a physical file containing the in-flight
7
+ # buffer bytes (e.g. `/tmp/9539itfeh2.rb`). When the runner /
8
+ # workers / pre-passes need to read source for the logical path,
9
+ # they read from the physical path instead; when they emit a
10
+ # `Diagnostic`, the path is the logical one so editors highlight
11
+ # the buffer the user is actually looking at.
12
+ #
13
+ # See `docs/design/20260516-editor-mode.md` for the design.
14
+ # The CLI surfaces this through paired `--tmp-file` /
15
+ # `--instead-of` flags on `rigor check` and `rigor type-of`;
16
+ # programmatic callers pass a `BufferBinding` to `Runner.new`.
17
+ BufferBinding = Data.define(:logical_path, :physical_path) do
18
+ # Returns the physical path to read bytes from when the caller
19
+ # is about to parse `path`. For non-logical paths returns the
20
+ # input unchanged. Cheap to call on every path; the binding is
21
+ # singular today (one buffer per run).
22
+ def resolve(path)
23
+ path == logical_path ? physical_path : path
24
+ end
25
+
26
+ # Returns the path the caller should report in user-facing
27
+ # output (diagnostics, run stats) when it currently holds the
28
+ # physical path. The inverse of `#resolve`. Non-physical paths
29
+ # pass through unchanged, so it is safe to stamp every
30
+ # outgoing path through this helper.
31
+ def display_path(path)
32
+ path == physical_path ? logical_path : path
33
+ end
34
+ end
35
+ end
36
+ end