RubyGems - rigortype - Versions diffs - 0.0.3 → 0.0.4 - Mend

rigortype 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (29) hide show

checksums.yaml +4 -4
data/README.md +24 -7
data/data/builtins/ruby_core/hash.yml +936 -0
data/data/builtins/ruby_core/range.yml +389 -0
data/data/builtins/ruby_core/set.yml +594 -0
data/data/builtins/ruby_core/time.yml +750 -0
data/lib/rigor/analysis/check_rules.rb +11 -3
data/lib/rigor/builtins/imported_refinements.rb +192 -10
data/lib/rigor/inference/acceptance.rb +181 -12
data/lib/rigor/inference/builtins/hash_catalog.rb +40 -0
data/lib/rigor/inference/builtins/range_catalog.rb +46 -0
data/lib/rigor/inference/builtins/set_catalog.rb +54 -0
data/lib/rigor/inference/builtins/time_catalog.rb +64 -0
data/lib/rigor/inference/method_dispatcher/constant_folding.rb +28 -8
data/lib/rigor/inference/method_dispatcher/iterator_dispatch.rb +103 -1
data/lib/rigor/inference/method_dispatcher/overload_selector.rb +23 -7
data/lib/rigor/inference/method_dispatcher/shape_dispatch.rb +135 -6
data/lib/rigor/inference/method_parameter_binder.rb +29 -4
data/lib/rigor/inference/narrowing.rb +2 -0
data/lib/rigor/inference/statement_evaluator.rb +2 -0
data/lib/rigor/rbs_extended.rb +167 -16
data/lib/rigor/type/combinator.rb +90 -0
data/lib/rigor/type/intersection.rb +135 -0
data/lib/rigor/type/refined.rb +174 -0
data/lib/rigor/type.rb +2 -0
data/lib/rigor/version.rb +1 -1
data/sig/rigor/rbs_extended.rbs +11 -0
data/sig/rigor/type.rbs +40 -0
metadata +11 -1

data/lib/rigor/analysis/check_rules.rb CHANGED Viewed

@@ -693,14 +693,16 @@ module Rigor
           return nil if method_def.nil? || method_def == true
           return nil unless method_def.method_types.size == 1
-          mismatch = first_argument_mismatch(method_def.method_types.first, call_node, scope)
+          param_overrides = Rigor::RbsExtended.param_type_override_map(method_def)
+          mismatch = first_argument_mismatch(method_def.method_types.first, call_node, scope, param_overrides)
           return nil if mismatch.nil?
           build_argument_type_diagnostic(path, call_node, class_name, mismatch)
         end
         # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/AbcSize
-        def first_argument_mismatch(method_type, call_node, scope) # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity
+        # rubocop:disable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/AbcSize
+        def first_argument_mismatch(method_type, call_node, scope, param_overrides)
           function = method_type.type
           return nil unless argument_check_eligible?(function)
@@ -710,7 +712,12 @@ module Rigor
             param = params[index]
             next if param.nil? # arity mismatch is the wrong-arity rule's concern.
-            param_type = translate_param_type(param.type, scope.environment)
+            # `rigor:v1:param: <name> <refinement>` annotations
+            # tighten the RBS-declared parameter type. The
+            # override is the authoritative contract when
+            # present; otherwise we translate the RBS type as
+            # before.
+            param_type = param_overrides[param.name] || translate_param_type(param.type, scope.environment)
             next if param_type.is_a?(Type::Dynamic) || param_type.is_a?(Type::Top)
             arg_type = scope.type_of(arg)
@@ -721,6 +728,7 @@ module Rigor
           end
           nil
         end
+        # rubocop:enable Metrics/CyclomaticComplexity, Metrics/PerceivedComplexity, Metrics/AbcSize
         def argument_check_eligible?(function)
           # See `arity_eligible?`: `UntypedFunction` lacks

data/lib/rigor/builtins/imported_refinements.rb CHANGED Viewed

@@ -1,5 +1,7 @@
 # frozen_string_literal: true
+require "strscan"
 require_relative "../type"
 module Rigor
@@ -13,7 +15,7 @@ module Rigor
     # `non-empty-array`, …) to the Rigor type each name denotes.
     # The registry is the single integration point for:
     #
-    # - The new `rigor:v1:return:` RBS::Extended directive
+    # - The `rigor:v1:return:` RBS::Extended directive
     #   ([`Rigor::RbsExtended.read_return_type_override`](../rbs_extended.rb)),
     #   which overrides a method's RBS-declared return type
     #   with a refinement carrier.
@@ -28,12 +30,19 @@ module Rigor
     # decide whether to fall back to the RBS-declared type or
     # raise a parse error.
     #
-    # The current registry covers no-argument refinement
-    # names. Parameterised refinements like
-    # `non-empty-array[Integer]` will be parsed by a future
-    # tokeniser; today the no-arg form `non-empty-array` lands
-    # at `non_empty_array(top)` and downstream code projects
-    # to the underlying base nominal.
+    # The registry covers two surfaces:
+    #
+    # - **No-argument refinement names** (`non-empty-string`,
+    #   `non-zero-int`, `lowercase-string`, …) live in `REGISTRY`
+    #   and resolve through `lookup(name)`.
+    # - **Parameterised refinement payloads** (`non-empty-array[Integer]`,
+    #   `non-empty-hash[Symbol, Integer]`, `int<5, 10>`) are
+    #   accepted by `parse(payload)`. The full grammar is documented
+    #   on `Parser`. The two surfaces share `REGISTRY` for the
+    #   no-arg head names; the parameterised head names live in
+    #   `PARAMETERISED_TYPE_BUILDERS` (square-bracket form, type
+    #   args) and `PARAMETERISED_INT_BUILDERS` (angle-bracket form,
+    #   integer bounds).
     module ImportedRefinements
       REGISTRY = {
         "non-empty-string" => -> { Type::Combinator.non_empty_string },
@@ -43,10 +52,51 @@ module Rigor
         "positive-int" => -> { Type::Combinator.positive_int },
         "non-negative-int" => -> { Type::Combinator.non_negative_int },
         "negative-int" => -> { Type::Combinator.negative_int },
-        "non-positive-int" => -> { Type::Combinator.non_positive_int }
+        "non-positive-int" => -> { Type::Combinator.non_positive_int },
+        "lowercase-string" => -> { Type::Combinator.lowercase_string },
+        "uppercase-string" => -> { Type::Combinator.uppercase_string },
+        "numeric-string" => -> { Type::Combinator.numeric_string },
+        "decimal-int-string" => -> { Type::Combinator.decimal_int_string },
+        "octal-int-string" => -> { Type::Combinator.octal_int_string },
+        "hex-int-string" => -> { Type::Combinator.hex_int_string },
+        "non-empty-lowercase-string" => -> { Type::Combinator.non_empty_lowercase_string },
+        "non-empty-uppercase-string" => -> { Type::Combinator.non_empty_uppercase_string }
       }.freeze
       private_constant :REGISTRY
+      # `name[T]` / `name[K, V]` — type-arg parameterised
+      # refinements. Each builder takes an `Array<Rigor::Type>`
+      # and returns a `Rigor::Type` (or `nil` on arity / shape
+      # mismatch so the caller surfaces a parse failure).
+      PARAMETERISED_TYPE_BUILDERS = {
+        "non-empty-array" => lambda { |args|
+          return nil unless args.size == 1
+          Type::Combinator.non_empty_array(args.first)
+        },
+        "non-empty-hash" => lambda { |args|
+          return nil unless args.size == 2
+          Type::Combinator.non_empty_hash(args[0], args[1])
+        }
+      }.freeze
+      private_constant :PARAMETERISED_TYPE_BUILDERS
+      # `name<min, max>` — integer-bound parameterised
+      # refinements. Each builder takes an `Array<Integer>` and
+      # returns a `Rigor::Type` (or `nil`). Bounds are signed
+      # integer literals; `min` MUST be ≤ `max` for the carrier
+      # to construct successfully (`Type::IntegerRange` enforces
+      # the invariant).
+      PARAMETERISED_INT_BUILDERS = {
+        "int" => lambda { |bounds|
+          return nil unless bounds.size == 2
+          Type::Combinator.integer_range(bounds[0], bounds[1])
+        }
+      }.freeze
+      private_constant :PARAMETERISED_INT_BUILDERS
       module_function
       # @param name [String] kebab-case refinement name.
@@ -57,13 +107,145 @@ module Rigor
         builder&.call
       end
+      # @param payload [String] the trailing payload of a
+      #   `rigor:v1:return:` (or sibling) directive. Accepts
+      #   the bare-name forms `lookup` already handles plus the
+      #   parameterised forms documented on {Parser}.
+      # @return [Rigor::Type, nil] the resolved refinement
+      #   carrier, or `nil` when the payload is unparseable or
+      #   names a refinement / class not in the registry.
+      def parse(payload)
+        Parser.new(payload.to_s).parse
+      end
       def known?(name)
-        REGISTRY.key?(name.to_s)
+        REGISTRY.key?(name.to_s) ||
+          PARAMETERISED_TYPE_BUILDERS.key?(name.to_s) ||
+          PARAMETERISED_INT_BUILDERS.key?(name.to_s)
       end
       def known_names
-        REGISTRY.keys
+        REGISTRY.keys + PARAMETERISED_TYPE_BUILDERS.keys + PARAMETERISED_INT_BUILDERS.keys
+      end
+      # Recursive-descent parser for the refinement-payload
+      # grammar:
+      #
+      #   type        := simple_name | parametric
+      #   simple_name := /[a-z][a-z0-9-]*/
+      #   parametric  := simple_name '[' type_arg_list ']'
+      #                | simple_name '<' int_bound_list '>'
+      #   type_arg_list := type_arg (',' type_arg)*
+      #   type_arg    := type | class_name
+      #   class_name  := /[A-Z][A-Za-z0-9_]*(?:::[A-Z][A-Za-z0-9_]*)*/
+      #   int_bound_list := signed_int (',' signed_int)*
+      #   signed_int  := /-?\d+/
+      #
+      # Whitespace between tokens is ignored. The parser fails
+      # soft (returns `nil` from `parse`) on any deviation so the
+      # `RBS::Extended` directive site can fall back to the
+      # RBS-declared type rather than crash on a typo.
+      class Parser
+        def initialize(input)
+          @scanner = StringScanner.new(input.strip)
+        end
+        def parse
+          type = parse_type
+          return nil if type.nil?
+          return nil unless @scanner.eos?
+          type
+        end
+        private
+        SIMPLE_NAME = /[a-z][a-z0-9-]*/
+        CLASS_NAME = /[A-Z][A-Za-z0-9_]*(?:::[A-Z][A-Za-z0-9_]*)*/
+        SIGNED_INT = /-?\d+/
+        private_constant :SIMPLE_NAME, :CLASS_NAME, :SIGNED_INT
+        def parse_type
+          name = @scanner.scan(SIMPLE_NAME)
+          return nil if name.nil?
+          case @scanner.peek(1)
+          when "[" then parse_parametric_type_args(name)
+          when "<" then parse_parametric_int_bounds(name)
+          else          ImportedRefinements.lookup(name)
+          end
+        end
+        def parse_parametric_type_args(name)
+          builder = PARAMETERISED_TYPE_BUILDERS[name]
+          return nil if builder.nil?
+          @scanner.getch # consume '['
+          args = parse_type_arg_list
+          return nil if args.nil?
+          return nil unless @scanner.getch == "]"
+          builder.call(args)
+        end
+        def parse_parametric_int_bounds(name)
+          builder = PARAMETERISED_INT_BUILDERS[name]
+          return nil if builder.nil?
+          @scanner.getch # consume '<'
+          bounds = parse_int_bound_list
+          return nil if bounds.nil?
+          return nil unless @scanner.getch == ">"
+          builder.call(bounds)
+        end
+        def parse_type_arg_list
+          collect_separated_list { parse_type_arg }
+        end
+        def parse_int_bound_list
+          collect_separated_list { parse_int_bound }
+        end
+        def collect_separated_list
+          items = []
+          loop do
+            skip_ws
+            item = yield
+            return nil if item.nil?
+            items << item
+            skip_ws
+            break unless @scanner.peek(1) == ","
+            @scanner.getch # consume ','
+          end
+          items
+        end
+        def parse_type_arg
+          skip_ws
+          if (class_name = @scanner.scan(CLASS_NAME))
+            Type::Combinator.nominal_of(class_name)
+          else
+            parse_type
+          end
+        end
+        def parse_int_bound
+          skip_ws
+          literal = @scanner.scan(SIGNED_INT)
+          return nil if literal.nil?
+          Integer(literal)
+        end
+        def skip_ws
+          @scanner.skip(/\s+/)
+        end
       end
+      private_constant :Parser
     end
   end
 end

data/lib/rigor/inference/acceptance.rb CHANGED Viewed

@@ -47,7 +47,18 @@ module Rigor
         if other_type.is_a?(Type::Dynamic)
           return Type::AcceptsResult.yes(mode: mode, reasons: "gradual: Dynamic[T] passes any boundary")
         end
+        # Structural equality short-circuit. Two identical carriers
+        # describe the same value set, so they always accept each
+        # other. This is sound for any mode and covers cases where
+        # neither side has a per-class rule for the other's exact
+        # carrier kind (the canonical example is
+        # `Intersection.accepts(Intersection)`, where the disjunction
+        # rule below would otherwise reject equal-but-narrow LHSes).
+        return Type::AcceptsResult.yes(mode: mode, reasons: "structural equality") if self_type == other_type
         return accepts_union_other(self_type, other_type, mode) if other_type.is_a?(Type::Union)
+        return accepts_intersection_other(self_type, other_type, mode) if other_type.is_a?(Type::Intersection)
         accepts_one(self_type, other_type, mode)
       end
@@ -66,6 +77,8 @@ module Rigor
         Type::Constant => :accepts_constant,
         Type::IntegerRange => :accepts_integer_range,
         Type::Difference => :accepts_difference,
+        Type::Refined => :accepts_refined,
+        Type::Intersection => :accepts_intersection,
         Type::Tuple => :accepts_tuple,
         Type::HashShape => :accepts_hash_shape
       }.freeze
@@ -128,6 +141,27 @@ module Rigor
           end
         end
+        # self.accepts(Intersection[Y, Z]) iff self accepts at least
+        # one Y_i. Disjunction across members because the intersection
+        # is the meet of its members' value sets, so containment in
+        # any one member implies containment of the whole
+        # intersection. Symmetric counterpart to
+        # `accepts_union_other`.
+        def accepts_intersection_other(self_type, intersection, mode)
+          results = intersection.members.map { |m| accepts(self_type, m, mode: mode) }
+          if results.any?(&:yes?)
+            Type::AcceptsResult.yes(mode: mode, reasons: "self accepts an intersection member")
+          elsif results.any?(&:maybe?)
+            Type::AcceptsResult.maybe(
+              mode: mode,
+              reasons: "self could not be proven to accept any intersection member"
+            )
+          else
+            Type::AcceptsResult.no(mode: mode, reasons: "self rejects every intersection member")
+          end
+        end
         # self.accepts(Union[Y, Z]) iff self accepts every Y_i. Strict
         # AND across members: any "no" turns the whole result no, any
         # "maybe" without a "no" gives maybe, all "yes" gives yes.
@@ -186,20 +220,40 @@ module Rigor
         # - Singleton: never (wrong value kind).
         def accepts_nominal(self_type, other_type, mode)
           case other_type
-          when Type::Nominal
-            accepts_nominal_from_nominal(self_type, other_type, mode)
-          when Type::Constant
-            accepts_nominal_from_constant(self_type, other_type, mode)
-          when Type::Singleton
-            accepts_nominal_from_singleton(self_type, other_type, mode)
-          when Type::IntegerRange
-            accepts_nominal_from_integer_range(self_type, other_type, mode)
+          when Type::Nominal then accepts_nominal_from_nominal(self_type, other_type, mode)
+          when Type::Constant then accepts_nominal_from_constant(self_type, other_type, mode)
+          when Type::Singleton then accepts_nominal_from_singleton(self_type, other_type, mode)
+          when Type::IntegerRange then accepts_nominal_from_integer_range(self_type, other_type, mode)
+          else accepts_nominal_from_shape(self_type, other_type, mode)
+          end
+        end
+        # Tail of `accepts_nominal` that handles structural shape
+        # carriers (`Tuple` / `HashShape`) and refinement carriers
+        # (`Difference` / `Refined`). Each branch projects the
+        # other-side carrier to the nominal layer it sits above
+        # and re-runs acceptance — soundness follows because the
+        # carrier's value set is contained in the projected
+        # nominal's value set.
+        def accepts_nominal_from_shape(self_type, other_type, mode)
+          case other_type
           when Type::Tuple
             accepts(self_type, project_tuple_to_nominal(other_type), mode: mode)
               .with_reason("projected Tuple to Nominal[Array]")
           when Type::HashShape
             accepts(self_type, project_hash_shape_to_nominal(other_type), mode: mode)
               .with_reason("projected HashShape to Nominal[Hash]")
+          when Type::Difference, Type::Refined
+            # A refinement carrier's value set is a subset of its
+            # base. So if `self` (Nominal) accepts the base, it
+            # also accepts the refinement; if it rejects the
+            # base, it cannot accept any subset of it. Forward
+            # through to the base nominal so the standard subtype
+            # check applies. The recursion is bounded because
+            # every refinement carrier's `base` is closer to the
+            # nominal layer.
+            accepts(self_type, other_type.base, mode: mode)
+              .with_reason("projected #{other_type.class.name.split('::').last} to its base")
           else
             Type::AcceptsResult.no(
               mode: mode,
@@ -486,10 +540,125 @@ module Rigor
           when Type::Constant
             !(removed.is_a?(Type::Constant) && removed.value == other_type.value)
           when Type::Difference
-            # `Difference[A, removed_R].accepts(Difference[B, R])` —
-            # the inner difference exhibits the same disjointness;
-            # forward to the base.
-            other_type.removed == removed && provably_disjoint_from_removed?(other_type.base, removed)
+            # `Difference[A, R].accepts(Difference[B, R])`: the
+            # other carrier already excludes `R` at its difference
+            # layer, so the disjointness is exhibited regardless of
+            # how `B` (its base) relates to `R`. We do NOT recurse
+            # into `other_type.base` because that would always fail
+            # (a Nominal base contains the removed value).
+            other_type.removed == removed
+          when Type::Intersection
+            # Disjointness is monotonic over Intersection: if any
+            # member is provably disjoint from `removed`, the meet
+            # is too.
+            other_type.members.any? { |m| provably_disjoint_from_removed?(m, removed) }
+          end
+        end
+        # `Refined[base, predicate]` accepts another type X when
+        # the base accepts the *base* of X *and* X is provably
+        # contained in the predicate's value set. The base
+        # check is delegated to `accepts(self.base, X.base)`
+        # so handlers like `accepts_nominal` see Nominal-vs-
+        # Nominal and return their normal answer (the inner
+        # `accepts_nominal` does not register `Refined` /
+        # `Difference` as direct other-shapes — projecting to
+        # the base is what makes the comparison meaningful).
+        #
+        # Provability rules in gradual mode (the conservative
+        # analogue of `accepts_difference`):
+        #
+        # - X is a `Refined` with the *same* predicate_id —
+        #   exact predicate match, accept.
+        # - X is a `Constant` whose value the predicate's
+        #   recogniser accepts — the value is statically
+        #   contained, accept. A recognised non-match is `:no`.
+        # - Anything else (Nominal, Union, IntegerRange,
+        #   Difference) — predicate-subset cannot be proven
+        #   without a runtime test, so reject under gradual
+        #   mode rather than degrade to `:maybe`. Mirrors the
+        #   `accepts_difference` policy.
+        def accepts_refined(self_type, other_type, mode)
+          case other_type
+          when Type::Refined then accepts_refined_from_refined(self_type, other_type, mode)
+          when Type::Constant then accepts_refined_from_constant(self_type, other_type, mode)
+          else accepts_refined_other_shape(self_type, other_type, mode)
+          end
+        end
+        def accepts_refined_from_refined(self_type, other_type, mode)
+          base_result = accepts(self_type.base, other_type.base, mode: mode)
+          return base_result if base_result.no?
+          if other_type.predicate_id == self_type.predicate_id
+            base_result.with_reason("matching predicate :#{self_type.predicate_id}")
+          else
+            Type::AcceptsResult.no(
+              mode: mode,
+              reasons: "predicate mismatch: :#{self_type.predicate_id} vs :#{other_type.predicate_id}"
+            )
+          end
+        end
+        def accepts_refined_from_constant(self_type, constant, mode)
+          base_result = accepts(self_type.base, constant, mode: mode)
+          return base_result if base_result.no?
+          case self_type.matches?(constant.value)
+          when true
+            base_result.with_reason("Constant value satisfies :#{self_type.predicate_id}")
+          when false
+            Type::AcceptsResult.no(
+              mode: mode,
+              reasons: "Constant value fails :#{self_type.predicate_id}"
+            )
+          else
+            Type::AcceptsResult.maybe(
+              mode: mode,
+              reasons: "predicate :#{self_type.predicate_id} not in registry"
+            )
+          end
+        end
+        def accepts_refined_other_shape(self_type, other_type, mode)
+          base_result = accepts(self_type.base, other_type, mode: mode)
+          return base_result if base_result.no?
+          Type::AcceptsResult.no(
+            mode: mode,
+            reasons: "#{self_type.describe} cannot prove #{other_type.class} satisfies " \
+                     ":#{self_type.predicate_id}"
+          )
+        end
+        # `Intersection[M1, M2, …]` accepts X iff *every* member
+        # accepts X — the meet of value sets is contained iff the
+        # candidate is contained in each. Conjunctive combine: any
+        # `:no` makes the result `:no`, any `:maybe` without a
+        # `:no` makes the result `:maybe`, all `:yes` makes the
+        # result `:yes`. The 0-member case is unreachable because
+        # `Combinator.intersection` collapses empty intersections
+        # to `Top`.
+        def accepts_intersection(self_type, other_type, mode)
+          per_member = self_type.members.map { |m| accepts(m, other_type, mode: mode) }
+          if per_member.any?(&:no?)
+            return Type::AcceptsResult.no(
+              mode: mode,
+              reasons: "an intersection member rejected #{other_type.class}"
+            )
+          end
+          if per_member.any?(&:maybe?)
+            Type::AcceptsResult.maybe(
+              mode: mode,
+              reasons: "an intersection member could not be proven accepted"
+            )
+          else
+            Type::AcceptsResult.yes(
+              mode: mode,
+              reasons: "every intersection member accepted #{other_type.class}"
+            )
           end
         end

data/lib/rigor/inference/builtins/hash_catalog.rb ADDED Viewed

@@ -0,0 +1,40 @@
+# frozen_string_literal: true
+require_relative "method_catalog"
+module Rigor
+  module Inference
+    module Builtins
+      # `Hash` catalog. Singleton — load once, consult during dispatch.
+      #
+      # Hash mirrors Array's mutation pattern: nearly every iteration
+      # method yields through `rb_hash_foreach` plus a per-pair static
+      # callback (`each_value_i`, `keep_if_i`, …), and the C-body
+      # classifier does not follow into the callback so it lands as
+      # `:leaf` despite being block-dependent. The blocklist below
+      # captures every false-positive `:leaf` we have spotted in the
+      # generated YAML — bias toward conservatism so a missed fold is
+      # acceptable but a folded mutator/yielder is not.
+      HASH_CATALOG = MethodCatalog.new(
+        path: File.expand_path(
+          "../../../../data/builtins/ruby_core/hash.yml",
+          __dir__
+        ),
+        mutating_selectors: {
+          "Hash" => Set[
+            # Block-dependent iteration — yields via `rb_hash_foreach`
+            # plus a per-pair callback that the regex classifier does
+            # not follow:
+            :each, :each_pair, :each_key, :each_value,
+            :select, :filter, :reject,
+            :transform_values,
+            # Block-dependent merge — `rb_hash_merge` delegates into
+            # `rb_hash_update`, which yields per conflict when a block
+            # is given:
+            :merge
+          ]
+        }
+      )
+    end
+  end
+end

data/lib/rigor/inference/builtins/range_catalog.rb ADDED Viewed

@@ -0,0 +1,46 @@
+# frozen_string_literal: true
+require_relative "method_catalog"
+module Rigor
+  module Inference
+    module Builtins
+      # `Range` catalog. Singleton — load once, consult during
+      # dispatch.
+      #
+      # Range is largely immutable: `begin`, `end`, and `excl` are
+      # set at construction by `range_initialize` and never mutated
+      # afterwards. The blocklist below therefore stays small. The
+      # entries we DO need are the iteration methods whose C body
+      # routes through a helper the block/yield regex does not
+      # recognise, so the classifier mis-flags them as `:leaf`
+      # despite yielding to a block.
+      RANGE_CATALOG = MethodCatalog.new(
+        path: File.expand_path(
+          "../../../../data/builtins/ruby_core/range.yml",
+          __dir__
+        ),
+        mutating_selectors: {
+          "Range" => Set[
+            # `range_initialize` / `range_initialize_copy` write
+            # `begin`/`end`/`excl` slots on the receiver; classed
+            # `:leaf` because the writes go through the struct
+            # accessor not `rb_check_frozen`. Blocked for symmetry
+            # with String / Array.
+            :initialize, :initialize_copy,
+            # `range_reverse_each` yields to its block via
+            # `range_each_func` -> caller's block; the regex
+            # classifier follows direct `rb_yield*` calls only.
+            :reverse_each,
+            # `range_percent_step` returns an Enumerator unless a
+            # block is supplied, in which case it yields. Treated
+            # as block-dependent so the fold tier never invokes it
+            # against a literal Range and tries to materialise an
+            # Enumerator into a Constant.
+            :%
+          ]
+        }
+      )
+    end
+  end
+end

data/lib/rigor/inference/builtins/set_catalog.rb ADDED Viewed

@@ -0,0 +1,54 @@
+# frozen_string_literal: true
+require_relative "method_catalog"
+module Rigor
+  module Inference
+    module Builtins
+      # `Set` catalog. Singleton — load once, consult during dispatch.
+      #
+      # Set was rewritten in C and folded into CRuby for Ruby 3.2+;
+      # the reference branch (`ruby_4_0`) ships the implementation in
+      # `references/ruby/set.c` with `Init_Set` registering every
+      # method directly. There is no `set.rb` prelude — the trailing
+      # `rb_provide("set.rb")` makes `require "set"` a no-op against
+      # the built-in.
+      #
+      # The blocklist below catches the catalog `:leaf` entries the
+      # C-body classifier mis-attributes. Set's iteration helpers
+      # (`set_iter`, `RETURN_SIZED_ENUMERATOR`) and its identity-
+      # mode and reset paths drive into helpers the regex classifier
+      # does not yet recognise as block-yielding or mutating.
+      SET_CATALOG = MethodCatalog.new(
+        path: File.expand_path(
+          "../../../../data/builtins/ruby_core/set.yml",
+          __dir__
+        ),
+        mutating_selectors: {
+          "Set" => Set[
+            # Indirect mutators classified `:leaf` because the C
+            # classifier did not follow the helper functions:
+            #
+            # - `initialize_copy` calls `set_copy` to overwrite the
+            #   receiver's table.
+            # - `compare_by_identity` swaps the internal hash type
+            #   via `set_reset_table_with_type`.
+            # - `reset` rebuilds the internal table to dedup after
+            #   element mutation.
+            :initialize_copy, :compare_by_identity, :reset,
+            # Block-dependent methods classified `:leaf` because the
+            # C body uses `set_iter` / `RETURN_SIZED_ENUMERATOR`
+            # rather than calling `rb_yield` directly:
+            :each, :classify, :divide,
+            # `disjoint?` delegates into `set_i_intersect`, which
+            # for non-Set enumerables uses `rb_funcall(other,
+            # :any?, ...)` — that is user-redefinable dispatch the
+            # classifier missed because the call site is in a
+            # sibling function.
+            :disjoint?
+          ]
+        }
+      )
+    end
+  end
+end