rigortype 0.1.5 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +36 -50
  3. data/lib/rigor/analysis/buffer_binding.rb +36 -0
  4. data/lib/rigor/analysis/check_rules.rb +11 -1
  5. data/lib/rigor/analysis/dependency_source_inference/index.rb +14 -1
  6. data/lib/rigor/analysis/dependency_source_inference/return_type_heuristic.rb +105 -0
  7. data/lib/rigor/analysis/dependency_source_inference/walker.rb +32 -12
  8. data/lib/rigor/analysis/project_scan.rb +39 -0
  9. data/lib/rigor/analysis/runner.rb +309 -22
  10. data/lib/rigor/analysis/worker_session.rb +14 -2
  11. data/lib/rigor/builtins/hkt_builtins.rb +342 -0
  12. data/lib/rigor/builtins/static_return_refinements.rb +120 -0
  13. data/lib/rigor/cache/store.rb +33 -3
  14. data/lib/rigor/cli/lsp_command.rb +129 -0
  15. data/lib/rigor/cli/type_of_command.rb +44 -5
  16. data/lib/rigor/cli.rb +74 -12
  17. data/lib/rigor/configuration.rb +38 -2
  18. data/lib/rigor/environment/hkt_registry_holder.rb +33 -0
  19. data/lib/rigor/environment/rbs_coverage_report.rb +1 -1
  20. data/lib/rigor/environment/rbs_loader.rb +45 -2
  21. data/lib/rigor/environment/reporters.rb +40 -0
  22. data/lib/rigor/environment.rb +106 -9
  23. data/lib/rigor/inference/acceptance.rb +48 -3
  24. data/lib/rigor/inference/expression_typer.rb +47 -0
  25. data/lib/rigor/inference/hkt_body.rb +171 -0
  26. data/lib/rigor/inference/hkt_body_parser.rb +363 -0
  27. data/lib/rigor/inference/hkt_reducer.rb +256 -0
  28. data/lib/rigor/inference/hkt_registry.rb +223 -0
  29. data/lib/rigor/inference/method_dispatcher/overload_selector.rb +125 -30
  30. data/lib/rigor/inference/method_dispatcher/receiver_affinity.rb +87 -0
  31. data/lib/rigor/inference/method_dispatcher.rb +154 -3
  32. data/lib/rigor/inference/project_patched_methods.rb +70 -0
  33. data/lib/rigor/inference/project_patched_scanner.rb +210 -0
  34. data/lib/rigor/inference/scope_indexer.rb +156 -12
  35. data/lib/rigor/inference/statement_evaluator.rb +106 -6
  36. data/lib/rigor/inference/synthetic_method_scanner.rb +94 -16
  37. data/lib/rigor/language_server/buffer_table.rb +63 -0
  38. data/lib/rigor/language_server/completion_provider.rb +438 -0
  39. data/lib/rigor/language_server/debouncer.rb +86 -0
  40. data/lib/rigor/language_server/diagnostic_publisher.rb +167 -0
  41. data/lib/rigor/language_server/document_symbol_provider.rb +142 -0
  42. data/lib/rigor/language_server/folding_range_provider.rb +75 -0
  43. data/lib/rigor/language_server/hover_provider.rb +74 -0
  44. data/lib/rigor/language_server/hover_renderer.rb +312 -0
  45. data/lib/rigor/language_server/loop.rb +71 -0
  46. data/lib/rigor/language_server/project_context.rb +145 -0
  47. data/lib/rigor/language_server/selection_range_provider.rb +93 -0
  48. data/lib/rigor/language_server/server.rb +384 -0
  49. data/lib/rigor/language_server/signature_help_provider.rb +249 -0
  50. data/lib/rigor/language_server/synchronized_writer.rb +28 -0
  51. data/lib/rigor/language_server/uri.rb +40 -0
  52. data/lib/rigor/language_server.rb +29 -0
  53. data/lib/rigor/plugin/base.rb +63 -0
  54. data/lib/rigor/plugin/macro/heredoc_template.rb +125 -11
  55. data/lib/rigor/plugin/manifest.rb +54 -7
  56. data/lib/rigor/plugin/registry.rb +19 -0
  57. data/lib/rigor/rbs_extended/hkt_directives.rb +326 -0
  58. data/lib/rigor/rbs_extended.rb +82 -2
  59. data/lib/rigor/sig_gen/generator.rb +12 -3
  60. data/lib/rigor/type/app.rb +107 -0
  61. data/lib/rigor/type.rb +1 -0
  62. data/lib/rigor/version.rb +1 -1
  63. data/sig/rigor/environment.rbs +8 -4
  64. data/sig/rigor/inference.rbs +2 -0
  65. data/sig/rigor.rbs +3 -1
  66. metadata +54 -1
@@ -446,13 +446,45 @@ module Rigor
446
446
 
447
447
  def accepts_nominal_from_constant(self_type, constant, mode)
448
448
  ruby_class = resolve_class(self_type.class_name)
449
- if ruby_class.nil?
450
- return Type::AcceptsResult.maybe(
449
+ return constant_is_a_result(ruby_class, constant, self_type, mode) if ruby_class
450
+
451
+ # The host process may not have required the constant's
452
+ # declared self_type (e.g. `BigDecimal` since Ruby 3.4
453
+ # is no longer a default gem). Fall back to inspecting
454
+ # the value's own class ancestor chain — always loadable
455
+ # because the value already exists. Required for
456
+ # OverloadSelector to reject `Integer#+(BigDecimal) ->
457
+ # BigDecimal` overloads contributed by `bigdecimal`'s
458
+ # RBS reopening when the actual arg is a Constant<Integer>.
459
+ ancestor_names = constant.value.class.ancestors.map(&:name)
460
+ if ancestor_names.include?(self_type.class_name)
461
+ Type::AcceptsResult.yes(
451
462
  mode: mode,
452
- reasons: "class #{self_type.class_name} not loadable; cannot prove from Constant"
463
+ reasons: "Constant value class ancestors include #{self_type.class_name}"
464
+ )
465
+ else
466
+ Type::AcceptsResult.no(
467
+ mode: mode,
468
+ reasons: "Constant value class ancestors exclude #{self_type.class_name}"
453
469
  )
454
470
  end
471
+ end
472
+
473
+ def subtype_result_via_ancestors(actual_class, target_name, mode)
474
+ if actual_class.ancestors.map(&:name).include?(target_name)
475
+ Type::AcceptsResult.yes(
476
+ mode: mode,
477
+ reasons: "#{actual_class.name} ancestors include #{target_name}"
478
+ )
479
+ else
480
+ Type::AcceptsResult.no(
481
+ mode: mode,
482
+ reasons: "#{actual_class.name} ancestors exclude #{target_name} (target unloadable)"
483
+ )
484
+ end
485
+ end
455
486
 
487
+ def constant_is_a_result(ruby_class, constant, self_type, mode)
456
488
  if constant.value.is_a?(ruby_class)
457
489
  Type::AcceptsResult.yes(mode: mode, reasons: "Constant value is_a?(#{self_type.class_name})")
458
490
  else
@@ -794,6 +826,19 @@ module Rigor
794
826
 
795
827
  target_class = resolve_class(target_name)
796
828
  actual_class = resolve_class(actual_name)
829
+ # When only `actual` resolves, we can still rule out
830
+ # `actual <:= target` by inspecting `actual`'s ancestor
831
+ # chain. The canonical case: `target=BigDecimal` is not
832
+ # loadable in the host process (no `require` in rigor's
833
+ # own runtime), but `actual=Integer` IS, and Integer's
834
+ # ancestors do not include `BigDecimal`, so the subtype
835
+ # relation MUST be `:no` rather than the conservative
836
+ # `:maybe`. The reverse asymmetry (target resolves,
837
+ # actual doesn't) does not let us conclude anything —
838
+ # the unloaded `actual` could be an unrelated class or
839
+ # a subclass of `target` we can't see, so we still
840
+ # answer `:maybe` there.
841
+ return subtype_result_via_ancestors(actual_class, target_name, mode) if target_class.nil? && actual_class
797
842
  if target_class.nil? || actual_class.nil?
798
843
  return Type::AcceptsResult.maybe(
799
844
  mode: mode,
@@ -61,6 +61,10 @@ module Rigor
61
61
  Prism::RationalNode => :type_of_literal_value,
62
62
  Prism::SymbolNode => :symbol_type_for,
63
63
  Prism::StringNode => :string_type_for,
64
+ Prism::XStringNode => :type_of_xstring,
65
+ Prism::InterpolatedXStringNode => :type_of_xstring,
66
+ Prism::SourceFileNode => :type_of_source_file,
67
+ Prism::SourceLineNode => :type_of_source_line,
64
68
  Prism::TrueNode => :type_of_true,
65
69
  Prism::FalseNode => :type_of_false,
66
70
  Prism::NilNode => :type_of_nil,
@@ -144,6 +148,9 @@ module Rigor
144
148
  Prism::AliasMethodNode => :type_of_nil_value,
145
149
  Prism::AliasGlobalVariableNode => :type_of_nil_value,
146
150
  Prism::UndefNode => :type_of_nil_value,
151
+ Prism::PostExecutionNode => :type_of_nil_value,
152
+ Prism::ShareableConstantNode => :type_of_shareable_constant,
153
+ Prism::ImplicitNode => :type_of_implicit,
147
154
  Prism::ForwardingSuperNode => :type_of_dynamic_top,
148
155
  Prism::BlockArgumentNode => :type_of_non_value,
149
156
  # Parameters and blocks (non-value positions)
@@ -159,6 +166,7 @@ module Rigor
159
166
  Prism::ForwardingParameterNode => :type_of_non_value,
160
167
  Prism::NoKeywordsParameterNode => :type_of_non_value,
161
168
  Prism::ImplicitRestNode => :type_of_non_value,
169
+ Prism::ItParametersNode => :type_of_non_value,
162
170
  Prism::BlockNode => :type_of_dynamic_top,
163
171
  Prism::SplatNode => :type_of_non_value,
164
172
  # Control flow (Slice 3 phase 1): branch types are unioned, jumps
@@ -888,6 +896,45 @@ module Rigor
888
896
  Type::Combinator.constant_of(unescaped)
889
897
  end
890
898
 
899
+ # Backtick (`cmd`) and `%x{cmd}` invoke Kernel#` and always return a
900
+ # String. Even when the content is statically known, we widen to
901
+ # Nominal[String] because the runtime value depends on the
902
+ # subprocess output, not the source text.
903
+ def type_of_xstring(_node)
904
+ Type::Combinator.nominal_of(String)
905
+ end
906
+
907
+ # __FILE__ is the source file path. Always non-empty when
908
+ # parsing a real file (the path resolver gives the buffer
909
+ # name, which is at minimum `"(stdin)"` / `"-e"` / a real
910
+ # path — never the empty String). Widened to
911
+ # `non-empty-string` instead of `Nominal[String]` so
912
+ # downstream String-emptiness checks know the value cannot
913
+ # be `""`.
914
+ def type_of_source_file(_node)
915
+ Type::Combinator.non_empty_string
916
+ end
917
+
918
+ # __LINE__ is the line of the source literal. Ruby line
919
+ # numbers are 1-indexed, so `__LINE__` is always at least
920
+ # 1 — `positive-int` (Integer in `[1, +Inf)`) is the
921
+ # canonical refinement.
922
+ def type_of_source_line(_node)
923
+ Type::Combinator.positive_int
924
+ end
925
+
926
+ # `# shareable_constant_value:` magic comment wraps the next
927
+ # constant write. Type is the wrapped write's value.
928
+ def type_of_shareable_constant(node)
929
+ type_of(node.write)
930
+ end
931
+
932
+ # `{ x: }` shorthand hash. The implicit value is the call to
933
+ # `x` (or a local read of `x`). Delegate.
934
+ def type_of_implicit(node)
935
+ type_of(node.value)
936
+ end
937
+
891
938
  def local_read(node)
892
939
  scope.local(node.name) || dynamic_top
893
940
  end
@@ -0,0 +1,171 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Rigor
4
+ module Inference
5
+ # ADR-20 Slice 2a — node types for the parsed body of a
6
+ # type-function `Definition`. Each node represents one
7
+ # piece of a Rigor-side type expression that the reducer
8
+ # ({HktReducer}) walks against a concrete argument list.
9
+ #
10
+ # Slice 2a ships a programmatic constructor surface only:
11
+ # plugin and Rigor-bundled overlay authors build a body
12
+ # tree by hand using these node types. The string-grammar
13
+ # parser that reads `Definition#body` (the raw String slot
14
+ # already populated by Slice 1's `HktDirectives.parse_define`)
15
+ # into a tree is Slice 2b's deliverable; until it ships, the
16
+ # `body` String stays opaque and `body_tree` is the
17
+ # evaluable form.
18
+ #
19
+ # The five node types cover the JSON.parse and dry-monads
20
+ # use cases ADR-20 § Implementation slicing names as
21
+ # near-term adopters:
22
+ #
23
+ # - {TypeLeaf} — wraps a fully-built `Rigor::Type`
24
+ # (use for atoms like `nil`, `Constant<true>`,
25
+ # `Nominal[Integer]`).
26
+ # - {Param} — reference to a formal parameter
27
+ # declared in the enclosing `Definition#params` list
28
+ # (e.g. `K` in `json::value[K]`). The reducer
29
+ # substitutes from the application's `args`.
30
+ # - {AppRef} — abstract HKT application; the reducer
31
+ # resolves it via the registry, or returns the `App`
32
+ # carrier as-is when the reference is self-recursive
33
+ # (lazy "tying-the-knot" handling that lets recursive
34
+ # sums like `json::value` reduce without infinite
35
+ # expansion).
36
+ # - {Union} — N-ary union of arms.
37
+ # - {NominalApp} — parameterised nominal class
38
+ # (`Array[X]`, `Hash[K, V]`) whose type args are
39
+ # themselves body nodes.
40
+ #
41
+ # Every node is a frozen `Data.define` value; structural
42
+ # equality is by-field.
43
+ module HktBody
44
+ # Wraps a pre-built `Rigor::Type` value. Use for atoms
45
+ # that need no substitution (e.g. `Nominal[Integer]`,
46
+ # `Constant<nil>`).
47
+ TypeLeaf = Data.define(:type) do
48
+ def initialize(type:)
49
+ raise ArgumentError, "type must not be nil" if type.nil?
50
+
51
+ super
52
+ end
53
+ end
54
+
55
+ # Reference to a formal parameter the enclosing
56
+ # `Definition#params` declared. The reducer substitutes
57
+ # this node with the matching positional arg from the
58
+ # `App` being reduced; an unknown name raises during
59
+ # reduction (the parser, when it ships, MUST reject
60
+ # unknown names earlier).
61
+ Param = Data.define(:name) do
62
+ def initialize(name:)
63
+ raise ArgumentError, "name must be a Symbol, got #{name.class}" unless name.is_a?(Symbol)
64
+
65
+ super
66
+ end
67
+ end
68
+
69
+ # Abstract HKT application — the reducer's primary
70
+ # recursion point. `uri` is a namespaced Symbol
71
+ # matching some `Registration` in the registry; `args`
72
+ # is an Array of body nodes (each gets substituted /
73
+ # resolved before being used).
74
+ AppRef = Data.define(:uri, :args) do
75
+ def initialize(uri:, args:)
76
+ raise ArgumentError, "uri must be a Symbol, got #{uri.class}" unless uri.is_a?(Symbol)
77
+ raise ArgumentError, "uri must be namespaced as `:a::b`, got #{uri.inspect}" unless uri.to_s.include?("::")
78
+ raise ArgumentError, "args must be an Array, got #{args.class}" unless args.is_a?(Array)
79
+ raise ArgumentError, "args must be non-empty" if args.empty?
80
+
81
+ super(uri: uri, args: args.dup.freeze)
82
+ end
83
+ end
84
+
85
+ # N-ary union. The reducer builds the result through
86
+ # `Type::Combinator.union(*reduced_arms)` so
87
+ # normalization (flattening, dedup, Bot drop) applies.
88
+ Union = Data.define(:arms) do
89
+ def initialize(arms:)
90
+ raise ArgumentError, "arms must be an Array, got #{arms.class}" unless arms.is_a?(Array)
91
+ raise ArgumentError, "arms must be non-empty" if arms.empty?
92
+
93
+ super(arms: arms.dup.freeze)
94
+ end
95
+ end
96
+
97
+ # Parameterised nominal class. `class_name` is the
98
+ # Ruby class name (`"Array"`, `"Hash"`); `args` is an
99
+ # Array of body nodes for the type arguments. The
100
+ # reducer builds the result through
101
+ # `Type::Combinator.nominal_of(class_name, type_args:
102
+ # reduced_args)`.
103
+ NominalApp = Data.define(:class_name, :args) do
104
+ def initialize(class_name:, args:)
105
+ unless class_name.is_a?(String) && !class_name.empty?
106
+ raise ArgumentError, "class_name must be a non-empty String, got #{class_name.inspect}"
107
+ end
108
+ raise ArgumentError, "args must be an Array, got #{args.class}" unless args.is_a?(Array)
109
+ raise ArgumentError, "args must be non-empty (use TypeLeaf with Nominal for raw class refs)" if args.empty?
110
+
111
+ super(class_name: class_name, args: args.dup.freeze)
112
+ end
113
+ end
114
+
115
+ # ADR-20 § D3 — conditional type form. `test` is a
116
+ # {TestSubtype} / {TestEquality} / {TestMembership}
117
+ # value object the reducer evaluates against the
118
+ # current bindings; `then_branch` / `else_branch` are
119
+ # body nodes. The reducer's trinary handling:
120
+ #
121
+ # - test = `yes` → return the reduced `then_branch`.
122
+ # - test = `no` → return the reduced `else_branch`.
123
+ # - test = `maybe` → widen to the union of both
124
+ # reduced branches (per ADR-20 WD7 / robustness
125
+ # principle).
126
+ Conditional = Data.define(:test, :then_branch, :else_branch) do
127
+ def initialize(test:, then_branch:, else_branch:)
128
+ raise ArgumentError, "test must not be nil" if test.nil?
129
+ raise ArgumentError, "then_branch must not be nil" if then_branch.nil?
130
+ raise ArgumentError, "else_branch must not be nil" if else_branch.nil?
131
+
132
+ super
133
+ end
134
+ end
135
+
136
+ # `left <: right` — subtype check. `left` is typically
137
+ # a {Param} reference; `right` is any body expression.
138
+ TestSubtype = Data.define(:left, :right) do
139
+ def initialize(left:, right:)
140
+ raise ArgumentError, "left/right must not be nil" if left.nil? || right.nil?
141
+
142
+ super
143
+ end
144
+ end
145
+
146
+ # `left == right` — structural equality. Useful for
147
+ # discriminating against literal constants
148
+ # (`E == :symbol`).
149
+ TestEquality = Data.define(:left, :right) do
150
+ def initialize(left:, right:)
151
+ raise ArgumentError, "left/right must not be nil" if left.nil? || right.nil?
152
+
153
+ super
154
+ end
155
+ end
156
+
157
+ # `left in [opt1, opt2, ...]` — set membership. Each
158
+ # `option` is a body node; the test passes iff `left`
159
+ # is structurally equal to any of the options.
160
+ TestMembership = Data.define(:left, :options) do
161
+ def initialize(left:, options:)
162
+ raise ArgumentError, "left must not be nil" if left.nil?
163
+ raise ArgumentError, "options must be an Array, got #{options.class}" unless options.is_a?(Array)
164
+ raise ArgumentError, "options must be non-empty" if options.empty?
165
+
166
+ super(left: left, options: options.dup.freeze)
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,363 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "hkt_body"
4
+ require_relative "../type"
5
+
6
+ module Rigor
7
+ module Inference
8
+ # ADR-20 slice 2b — parses the body of an
9
+ # `HktRegistry::Definition` (a `String`, as populated by
10
+ # Slice 1's `HktDirectives.parse_define` from
11
+ # `%a{rigor:v1:hkt_define}` payloads) into the `HktBody`
12
+ # node tree the Slice 2a reducer evaluates against.
13
+ #
14
+ # The minimum-viable grammar covered here is the
15
+ # union-of-atoms-and-parameterised-forms subset of ADR-20
16
+ # § D3 — sufficient for `JSON.parse`'s `json::value`
17
+ # recursive sum and for any other recursive-data-shape
18
+ # signatures (Lisp value trees, dry-types refinements
19
+ # without conditionals). The conditional / indexed-access
20
+ # forms (`E <: T ? A : B`, `E in [k1, k2]`) drafted in D3
21
+ # remain a follow-up slice — bodies that contain `?`
22
+ # raise `ParseError` and the calling directive parser
23
+ # drops the body_tree (the body String remains stored and
24
+ # the reducer falls back to `app.bound`).
25
+ #
26
+ # ## Grammar (slice 2b)
27
+ #
28
+ # body := union
29
+ # union := type_expr ("|" type_expr)*
30
+ # type_expr := atom | nominal_app | app_ref | param
31
+ # atom := "nil" | "true" | "false" | "bool" | "untyped"
32
+ # param := UCNAME (when UCNAME ∈ params)
33
+ # nominal_app := class_name ("[" type_expr ("," type_expr)* "]")?
34
+ # class_name := "::"? UCNAME ("::" UCNAME)*
35
+ # app_ref := "App" "[" uri "," type_expr ("," type_expr)* "]"
36
+ # uri := IDENT ("::" IDENT)+
37
+ # UCNAME := /[A-Z]\w*/
38
+ # IDENT := /[a-z_]\w*/
39
+ #
40
+ # ## Disambiguation
41
+ #
42
+ # The same syntactic UCNAME spells both a parameter
43
+ # reference (`K` when `params = [:K]`) and a nominal class
44
+ # name (`Integer`). The parser resolves by checking the
45
+ # `params` set passed to {.parse}; an unknown UCNAME is
46
+ # treated as a nominal class name. `App` is reserved at
47
+ # the head position of an `App[...]` form; using `App` as
48
+ # a class name is therefore not supported.
49
+ #
50
+ # Atoms are kept verbatim as `HktBody::TypeLeaf` nodes
51
+ # wrapping the appropriate `Rigor::Type::*` carrier:
52
+ # `nil` / `true` / `false` produce `Constant` carriers;
53
+ # `bool` produces the `Constant<true> | Constant<false>`
54
+ # union; `untyped` produces `Combinator.untyped`
55
+ # (i.e. `Dynamic[Top]`). Nominal class names produce raw
56
+ # `Type::Nominal` carriers (no `name_scope` resolution at
57
+ # this slice — the reducer trusts the name verbatim).
58
+ module HktBodyParser
59
+ class ParseError < StandardError; end
60
+
61
+ module_function
62
+
63
+ def parse(string, params:)
64
+ raise ArgumentError, "string must be a String, got #{string.class}" unless string.is_a?(String)
65
+ raise ArgumentError, "params must be an Array, got #{params.class}" unless params.is_a?(Array)
66
+
67
+ params_set = params.to_h { |p| [p.to_sym, true] }
68
+ tokens = Tokenizer.new(string).tokenize
69
+ parser = Parser.new(tokens, params_set)
70
+ result = parser.parse_union
71
+ parser.expect_eof!
72
+ result
73
+ end
74
+
75
+ Token = Data.define(:kind, :value, :pos)
76
+
77
+ class Tokenizer
78
+ SCANNER_REGEX = /
79
+ \G
80
+ (?:
81
+ (?<ws>\s+)
82
+ | (?<lb>\[)
83
+ | (?<rb>\])
84
+ | (?<lparen>\()
85
+ | (?<rparen>\))
86
+ | (?<comma>,)
87
+ | (?<pipe>\|)
88
+ | (?<sub><:)
89
+ | (?<eq>==)
90
+ | (?<sep>::)
91
+ | (?<colon>:(?!:))
92
+ | (?<question>\?)
93
+ | (?<ident>[a-z_][a-zA-Z0-9_]*)
94
+ | (?<ucname>[A-Z][a-zA-Z0-9_]*)
95
+ )
96
+ /x
97
+
98
+ def initialize(string)
99
+ @string = string
100
+ end
101
+
102
+ TOKEN_KINDS = SCANNER_REGEX.named_captures.keys.freeze
103
+ private_constant :TOKEN_KINDS
104
+
105
+ def tokenize
106
+ tokens = []
107
+ pos = 0
108
+ while pos < @string.size
109
+ match = SCANNER_REGEX.match(@string, pos)
110
+ raise ParseError, "unexpected character at position #{pos}: #{@string[pos].inspect}" if match.nil?
111
+
112
+ kind = TOKEN_KINDS.find { |k| match[k] }
113
+ raise ParseError, "internal tokenizer error at position #{pos}" if kind.nil?
114
+
115
+ value = match[kind.to_sym]
116
+ raise ParseError, "internal tokenizer error: no match for #{kind}" if value.nil?
117
+
118
+ pos += value.size
119
+ next if kind == "ws"
120
+
121
+ tokens << Token.new(kind: kind.to_sym, value: value, pos: pos - value.size)
122
+ end
123
+ tokens
124
+ end
125
+ end
126
+
127
+ class Parser
128
+ def initialize(tokens, params_set)
129
+ @tokens = tokens
130
+ @pos = 0
131
+ @params_set = params_set
132
+ end
133
+
134
+ def parse_union
135
+ arms = [parse_type_expr]
136
+ while peek_kind == :pipe
137
+ consume
138
+ arms << parse_type_expr
139
+ end
140
+ return arms.first if arms.size == 1
141
+
142
+ HktBody::Union.new(arms: arms)
143
+ end
144
+
145
+ def parse_type_expr
146
+ tok = peek
147
+ raise ParseError, "unexpected end of input; expected type expression" if tok.nil?
148
+
149
+ case tok.kind
150
+ when :lparen then parse_conditional
151
+ when :ident then parse_lowercase_atom
152
+ when :ucname then parse_ucname_form
153
+ when :sep then parse_classname_with_leading_sep
154
+ else
155
+ raise ParseError, "unexpected token #{tok.kind} (#{tok.value.inspect}) at position #{tok.pos}"
156
+ end
157
+ end
158
+
159
+ # ADR-20 § D3 conditional parser. Grammar:
160
+ #
161
+ # conditional := "(" test "?" union ":" union ")"
162
+ # test := type_expr ("<:" | "==") type_expr
163
+ #
164
+ # Parens delimit a conditional unambiguously — bare
165
+ # `(type_expr)` grouping is not supported at this slice
166
+ # (no use case yet). Branches can be unions; test sides
167
+ # are single arms (wrap in `App[my_union, ...]` if you
168
+ # need a union there). `in [opt1, opt2]` membership
169
+ # tests are programmatically supported via
170
+ # `HktBody::TestMembership` but the parser does not yet
171
+ # recognise the `in` keyword (no concrete demand yet).
172
+ def parse_conditional
173
+ expect!(:lparen)
174
+ test = parse_test
175
+ expect!(:question)
176
+ then_branch = parse_union
177
+ expect!(:colon)
178
+ else_branch = parse_union
179
+ expect!(:rparen)
180
+ HktBody::Conditional.new(test: test, then_branch: then_branch, else_branch: else_branch)
181
+ end
182
+
183
+ def parse_test
184
+ left = parse_type_expr
185
+ op = peek
186
+ case op&.kind
187
+ when :sub
188
+ consume
189
+ HktBody::TestSubtype.new(left: left, right: parse_type_expr)
190
+ when :eq
191
+ consume
192
+ HktBody::TestEquality.new(left: left, right: parse_type_expr)
193
+ when :ident
194
+ parse_in_membership(left, op_token: op)
195
+ else
196
+ actual = op.nil? ? "end of input" : "#{op.kind} (#{op.value.inspect})"
197
+ raise ParseError, "expected `<:`, `==`, or `in` in conditional test, got #{actual}"
198
+ end
199
+ end
200
+
201
+ # `left in [opt1, opt2, ...]` membership test.
202
+ # Distinguished from a lowercase atom by the
203
+ # subsequent `[` — the only place an identifier
204
+ # `in` is permitted at this position is membership
205
+ # syntax.
206
+ def parse_in_membership(left, op_token:)
207
+ unless op_token.value == "in"
208
+ raise ParseError,
209
+ "expected `<:`, `==`, or `in` in conditional test, got ident (#{op_token.value.inspect})"
210
+ end
211
+
212
+ consume # in
213
+ expect!(:lb)
214
+ options = [parse_type_expr]
215
+ while peek_kind == :comma
216
+ consume
217
+ options << parse_type_expr
218
+ end
219
+ expect!(:rb)
220
+ HktBody::TestMembership.new(left: left, options: options)
221
+ end
222
+
223
+ def parse_lowercase_atom
224
+ tok = consume
225
+ type = case tok.value
226
+ when "nil" then Type::Constant.new(nil)
227
+ when "true" then Type::Constant.new(true)
228
+ when "false" then Type::Constant.new(false)
229
+ when "bool" then Type::Combinator.union(Type::Constant.new(true), Type::Constant.new(false))
230
+ when "untyped" then Type::Combinator.untyped
231
+ else raise ParseError, "unknown atom #{tok.value.inspect} at position #{tok.pos}"
232
+ end
233
+ HktBody::TypeLeaf.new(type: type)
234
+ end
235
+
236
+ def parse_ucname_form
237
+ tok = peek
238
+ return parse_app_ref if tok.value == "App"
239
+
240
+ if @params_set.key?(tok.value.to_sym) && !class_continuation?
241
+ consume
242
+ return HktBody::Param.new(name: tok.value.to_sym)
243
+ end
244
+
245
+ parse_nominal_or_param_with_args
246
+ end
247
+
248
+ # Returns true when the current UCName is followed by
249
+ # `::` (qualified class name continuation) or `[`
250
+ # (parameterised application). In either case the
251
+ # token is a nominal, not a param ref — Slice 2b's
252
+ # `Param` nodes are always single bare identifiers.
253
+ def class_continuation?
254
+ next_tok = @tokens[@pos + 1]
255
+ next_tok && %i[sep lb].include?(next_tok.kind)
256
+ end
257
+
258
+ def parse_nominal_or_param_with_args
259
+ class_name = parse_class_name
260
+ if peek_kind == :lb
261
+ consume
262
+ args = parse_arg_list
263
+ expect!(:rb)
264
+ HktBody::NominalApp.new(class_name: class_name, args: args)
265
+ else
266
+ HktBody::TypeLeaf.new(type: Type::Nominal.new(class_name))
267
+ end
268
+ end
269
+
270
+ def parse_classname_with_leading_sep
271
+ # The leading "::" form (`::Foo::Bar`). Consume the
272
+ # separator so the rest threads through parse_class_name.
273
+ consume
274
+ tok = peek
275
+ raise ParseError, "expected class name after `::`" if tok.nil? || tok.kind != :ucname
276
+
277
+ parse_nominal_or_param_with_args
278
+ end
279
+
280
+ def parse_class_name
281
+ parts = [expect!(:ucname).value]
282
+ while peek_kind == :sep && @tokens[@pos + 1]&.kind == :ucname
283
+ consume # ::
284
+ parts << expect!(:ucname).value
285
+ end
286
+ parts.join("::")
287
+ end
288
+
289
+ def parse_app_ref
290
+ tok = consume
291
+ raise ParseError, "expected `App[...]`, got #{tok.value.inspect}" unless tok.value == "App"
292
+
293
+ expect!(:lb)
294
+ uri = parse_uri
295
+ expect!(:comma)
296
+ args = parse_arg_list
297
+ expect!(:rb)
298
+ HktBody::AppRef.new(uri: uri, args: args)
299
+ end
300
+
301
+ def parse_uri
302
+ parts = [expect!(:ident).value]
303
+ while peek_kind == :sep
304
+ consume
305
+ parts << expect!(:ident).value
306
+ end
307
+ raise ParseError, "uri must be namespaced (`a::b`), got #{parts.first.inspect}" if parts.size < 2
308
+
309
+ parts.join("::").to_sym
310
+ end
311
+
312
+ # Arg list for `Foo[A, B, C]` and `App[uri, A, B]`
313
+ # forms. Each arg is parsed as a union so per-arg
314
+ # `A | B` forms work (`Array[K | nil]`); the COMMA
315
+ # at the top level still separates args, so
316
+ # `Hash[K, V]` reads as two args (each a single-arm
317
+ # union that collapses to the arm) rather than one
318
+ # union of two.
319
+ def parse_arg_list
320
+ args = [parse_union]
321
+ while peek_kind == :comma
322
+ consume
323
+ args << parse_union
324
+ end
325
+ args
326
+ end
327
+
328
+ def expect_eof!
329
+ return if @pos >= @tokens.size
330
+
331
+ tok = @tokens[@pos]
332
+ raise ParseError, "expected end of input, got #{tok.kind} (#{tok.value.inspect}) at position #{tok.pos}"
333
+ end
334
+
335
+ private
336
+
337
+ def peek
338
+ @tokens[@pos]
339
+ end
340
+
341
+ def peek_kind
342
+ @tokens[@pos]&.kind
343
+ end
344
+
345
+ def consume
346
+ tok = @tokens[@pos]
347
+ @pos += 1
348
+ tok
349
+ end
350
+
351
+ def expect!(kind)
352
+ tok = @tokens[@pos]
353
+ if tok.nil? || tok.kind != kind
354
+ actual = tok.nil? ? "end of input" : "#{tok.kind} (#{tok.value.inspect})"
355
+ raise ParseError, "expected #{kind}, got #{actual}"
356
+ end
357
+ @pos += 1
358
+ tok
359
+ end
360
+ end
361
+ end
362
+ end
363
+ end