rigortype 0.1.5 → 0.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +36 -50
- data/lib/rigor/analysis/buffer_binding.rb +36 -0
- data/lib/rigor/analysis/check_rules.rb +11 -1
- data/lib/rigor/analysis/dependency_source_inference/index.rb +14 -1
- data/lib/rigor/analysis/dependency_source_inference/return_type_heuristic.rb +105 -0
- data/lib/rigor/analysis/dependency_source_inference/walker.rb +32 -12
- data/lib/rigor/analysis/project_scan.rb +39 -0
- data/lib/rigor/analysis/runner.rb +309 -22
- data/lib/rigor/analysis/worker_session.rb +14 -2
- data/lib/rigor/builtins/hkt_builtins.rb +342 -0
- data/lib/rigor/builtins/static_return_refinements.rb +120 -0
- data/lib/rigor/cache/store.rb +33 -3
- data/lib/rigor/cli/lsp_command.rb +129 -0
- data/lib/rigor/cli/type_of_command.rb +44 -5
- data/lib/rigor/cli.rb +74 -12
- data/lib/rigor/configuration.rb +38 -2
- data/lib/rigor/environment/hkt_registry_holder.rb +33 -0
- data/lib/rigor/environment/rbs_coverage_report.rb +1 -1
- data/lib/rigor/environment/rbs_loader.rb +45 -2
- data/lib/rigor/environment/reporters.rb +40 -0
- data/lib/rigor/environment.rb +106 -9
- data/lib/rigor/inference/acceptance.rb +48 -3
- data/lib/rigor/inference/expression_typer.rb +47 -0
- data/lib/rigor/inference/hkt_body.rb +171 -0
- data/lib/rigor/inference/hkt_body_parser.rb +363 -0
- data/lib/rigor/inference/hkt_reducer.rb +256 -0
- data/lib/rigor/inference/hkt_registry.rb +223 -0
- data/lib/rigor/inference/method_dispatcher/overload_selector.rb +125 -30
- data/lib/rigor/inference/method_dispatcher/receiver_affinity.rb +87 -0
- data/lib/rigor/inference/method_dispatcher.rb +154 -3
- data/lib/rigor/inference/project_patched_methods.rb +70 -0
- data/lib/rigor/inference/project_patched_scanner.rb +210 -0
- data/lib/rigor/inference/scope_indexer.rb +156 -12
- data/lib/rigor/inference/statement_evaluator.rb +106 -6
- data/lib/rigor/inference/synthetic_method_scanner.rb +94 -16
- data/lib/rigor/language_server/buffer_table.rb +63 -0
- data/lib/rigor/language_server/completion_provider.rb +438 -0
- data/lib/rigor/language_server/debouncer.rb +86 -0
- data/lib/rigor/language_server/diagnostic_publisher.rb +167 -0
- data/lib/rigor/language_server/document_symbol_provider.rb +142 -0
- data/lib/rigor/language_server/folding_range_provider.rb +75 -0
- data/lib/rigor/language_server/hover_provider.rb +74 -0
- data/lib/rigor/language_server/hover_renderer.rb +312 -0
- data/lib/rigor/language_server/loop.rb +71 -0
- data/lib/rigor/language_server/project_context.rb +145 -0
- data/lib/rigor/language_server/selection_range_provider.rb +93 -0
- data/lib/rigor/language_server/server.rb +384 -0
- data/lib/rigor/language_server/signature_help_provider.rb +249 -0
- data/lib/rigor/language_server/synchronized_writer.rb +28 -0
- data/lib/rigor/language_server/uri.rb +40 -0
- data/lib/rigor/language_server.rb +29 -0
- data/lib/rigor/plugin/base.rb +63 -0
- data/lib/rigor/plugin/macro/heredoc_template.rb +125 -11
- data/lib/rigor/plugin/manifest.rb +54 -7
- data/lib/rigor/plugin/registry.rb +19 -0
- data/lib/rigor/rbs_extended/hkt_directives.rb +326 -0
- data/lib/rigor/rbs_extended.rb +82 -2
- data/lib/rigor/sig_gen/generator.rb +12 -3
- data/lib/rigor/type/app.rb +107 -0
- data/lib/rigor/type.rb +1 -0
- data/lib/rigor/version.rb +1 -1
- data/sig/rigor/environment.rbs +8 -4
- data/sig/rigor/inference.rbs +2 -0
- data/sig/rigor.rbs +3 -1
- metadata +54 -1
|
@@ -446,13 +446,45 @@ module Rigor
|
|
|
446
446
|
|
|
447
447
|
def accepts_nominal_from_constant(self_type, constant, mode)
|
|
448
448
|
ruby_class = resolve_class(self_type.class_name)
|
|
449
|
-
if ruby_class
|
|
450
|
-
|
|
449
|
+
return constant_is_a_result(ruby_class, constant, self_type, mode) if ruby_class
|
|
450
|
+
|
|
451
|
+
# The host process may not have required the constant's
|
|
452
|
+
# declared self_type (e.g. `BigDecimal` since Ruby 3.4
|
|
453
|
+
# is no longer a default gem). Fall back to inspecting
|
|
454
|
+
# the value's own class ancestor chain — always loadable
|
|
455
|
+
# because the value already exists. Required for
|
|
456
|
+
# OverloadSelector to reject `Integer#+(BigDecimal) ->
|
|
457
|
+
# BigDecimal` overloads contributed by `bigdecimal`'s
|
|
458
|
+
# RBS reopening when the actual arg is a Constant<Integer>.
|
|
459
|
+
ancestor_names = constant.value.class.ancestors.map(&:name)
|
|
460
|
+
if ancestor_names.include?(self_type.class_name)
|
|
461
|
+
Type::AcceptsResult.yes(
|
|
451
462
|
mode: mode,
|
|
452
|
-
reasons: "class #{self_type.class_name}
|
|
463
|
+
reasons: "Constant value class ancestors include #{self_type.class_name}"
|
|
464
|
+
)
|
|
465
|
+
else
|
|
466
|
+
Type::AcceptsResult.no(
|
|
467
|
+
mode: mode,
|
|
468
|
+
reasons: "Constant value class ancestors exclude #{self_type.class_name}"
|
|
453
469
|
)
|
|
454
470
|
end
|
|
471
|
+
end
|
|
472
|
+
|
|
473
|
+
def subtype_result_via_ancestors(actual_class, target_name, mode)
|
|
474
|
+
if actual_class.ancestors.map(&:name).include?(target_name)
|
|
475
|
+
Type::AcceptsResult.yes(
|
|
476
|
+
mode: mode,
|
|
477
|
+
reasons: "#{actual_class.name} ancestors include #{target_name}"
|
|
478
|
+
)
|
|
479
|
+
else
|
|
480
|
+
Type::AcceptsResult.no(
|
|
481
|
+
mode: mode,
|
|
482
|
+
reasons: "#{actual_class.name} ancestors exclude #{target_name} (target unloadable)"
|
|
483
|
+
)
|
|
484
|
+
end
|
|
485
|
+
end
|
|
455
486
|
|
|
487
|
+
def constant_is_a_result(ruby_class, constant, self_type, mode)
|
|
456
488
|
if constant.value.is_a?(ruby_class)
|
|
457
489
|
Type::AcceptsResult.yes(mode: mode, reasons: "Constant value is_a?(#{self_type.class_name})")
|
|
458
490
|
else
|
|
@@ -794,6 +826,19 @@ module Rigor
|
|
|
794
826
|
|
|
795
827
|
target_class = resolve_class(target_name)
|
|
796
828
|
actual_class = resolve_class(actual_name)
|
|
829
|
+
# When only `actual` resolves, we can still rule out
|
|
830
|
+
# `actual <:= target` by inspecting `actual`'s ancestor
|
|
831
|
+
# chain. The canonical case: `target=BigDecimal` is not
|
|
832
|
+
# loadable in the host process (no `require` in rigor's
|
|
833
|
+
# own runtime), but `actual=Integer` IS, and Integer's
|
|
834
|
+
# ancestors do not include `BigDecimal`, so the subtype
|
|
835
|
+
# relation MUST be `:no` rather than the conservative
|
|
836
|
+
# `:maybe`. The reverse asymmetry (target resolves,
|
|
837
|
+
# actual doesn't) does not let us conclude anything —
|
|
838
|
+
# the unloaded `actual` could be an unrelated class or
|
|
839
|
+
# a subclass of `target` we can't see, so we still
|
|
840
|
+
# answer `:maybe` there.
|
|
841
|
+
return subtype_result_via_ancestors(actual_class, target_name, mode) if target_class.nil? && actual_class
|
|
797
842
|
if target_class.nil? || actual_class.nil?
|
|
798
843
|
return Type::AcceptsResult.maybe(
|
|
799
844
|
mode: mode,
|
|
@@ -61,6 +61,10 @@ module Rigor
|
|
|
61
61
|
Prism::RationalNode => :type_of_literal_value,
|
|
62
62
|
Prism::SymbolNode => :symbol_type_for,
|
|
63
63
|
Prism::StringNode => :string_type_for,
|
|
64
|
+
Prism::XStringNode => :type_of_xstring,
|
|
65
|
+
Prism::InterpolatedXStringNode => :type_of_xstring,
|
|
66
|
+
Prism::SourceFileNode => :type_of_source_file,
|
|
67
|
+
Prism::SourceLineNode => :type_of_source_line,
|
|
64
68
|
Prism::TrueNode => :type_of_true,
|
|
65
69
|
Prism::FalseNode => :type_of_false,
|
|
66
70
|
Prism::NilNode => :type_of_nil,
|
|
@@ -144,6 +148,9 @@ module Rigor
|
|
|
144
148
|
Prism::AliasMethodNode => :type_of_nil_value,
|
|
145
149
|
Prism::AliasGlobalVariableNode => :type_of_nil_value,
|
|
146
150
|
Prism::UndefNode => :type_of_nil_value,
|
|
151
|
+
Prism::PostExecutionNode => :type_of_nil_value,
|
|
152
|
+
Prism::ShareableConstantNode => :type_of_shareable_constant,
|
|
153
|
+
Prism::ImplicitNode => :type_of_implicit,
|
|
147
154
|
Prism::ForwardingSuperNode => :type_of_dynamic_top,
|
|
148
155
|
Prism::BlockArgumentNode => :type_of_non_value,
|
|
149
156
|
# Parameters and blocks (non-value positions)
|
|
@@ -159,6 +166,7 @@ module Rigor
|
|
|
159
166
|
Prism::ForwardingParameterNode => :type_of_non_value,
|
|
160
167
|
Prism::NoKeywordsParameterNode => :type_of_non_value,
|
|
161
168
|
Prism::ImplicitRestNode => :type_of_non_value,
|
|
169
|
+
Prism::ItParametersNode => :type_of_non_value,
|
|
162
170
|
Prism::BlockNode => :type_of_dynamic_top,
|
|
163
171
|
Prism::SplatNode => :type_of_non_value,
|
|
164
172
|
# Control flow (Slice 3 phase 1): branch types are unioned, jumps
|
|
@@ -888,6 +896,45 @@ module Rigor
|
|
|
888
896
|
Type::Combinator.constant_of(unescaped)
|
|
889
897
|
end
|
|
890
898
|
|
|
899
|
+
# Backtick (`cmd`) and `%x{cmd}` invoke Kernel#` and always return a
|
|
900
|
+
# String. Even when the content is statically known, we widen to
|
|
901
|
+
# Nominal[String] because the runtime value depends on the
|
|
902
|
+
# subprocess output, not the source text.
|
|
903
|
+
def type_of_xstring(_node)
|
|
904
|
+
Type::Combinator.nominal_of(String)
|
|
905
|
+
end
|
|
906
|
+
|
|
907
|
+
# __FILE__ is the source file path. Always non-empty when
|
|
908
|
+
# parsing a real file (the path resolver gives the buffer
|
|
909
|
+
# name, which is at minimum `"(stdin)"` / `"-e"` / a real
|
|
910
|
+
# path — never the empty String). Widened to
|
|
911
|
+
# `non-empty-string` instead of `Nominal[String]` so
|
|
912
|
+
# downstream String-emptiness checks know the value cannot
|
|
913
|
+
# be `""`.
|
|
914
|
+
def type_of_source_file(_node)
|
|
915
|
+
Type::Combinator.non_empty_string
|
|
916
|
+
end
|
|
917
|
+
|
|
918
|
+
# __LINE__ is the line of the source literal. Ruby line
|
|
919
|
+
# numbers are 1-indexed, so `__LINE__` is always at least
|
|
920
|
+
# 1 — `positive-int` (Integer in `[1, +Inf)`) is the
|
|
921
|
+
# canonical refinement.
|
|
922
|
+
def type_of_source_line(_node)
|
|
923
|
+
Type::Combinator.positive_int
|
|
924
|
+
end
|
|
925
|
+
|
|
926
|
+
# `# shareable_constant_value:` magic comment wraps the next
|
|
927
|
+
# constant write. Type is the wrapped write's value.
|
|
928
|
+
def type_of_shareable_constant(node)
|
|
929
|
+
type_of(node.write)
|
|
930
|
+
end
|
|
931
|
+
|
|
932
|
+
# `{ x: }` shorthand hash. The implicit value is the call to
|
|
933
|
+
# `x` (or a local read of `x`). Delegate.
|
|
934
|
+
def type_of_implicit(node)
|
|
935
|
+
type_of(node.value)
|
|
936
|
+
end
|
|
937
|
+
|
|
891
938
|
def local_read(node)
|
|
892
939
|
scope.local(node.name) || dynamic_top
|
|
893
940
|
end
|
|
@@ -0,0 +1,171 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Rigor
|
|
4
|
+
module Inference
|
|
5
|
+
# ADR-20 Slice 2a — node types for the parsed body of a
|
|
6
|
+
# type-function `Definition`. Each node represents one
|
|
7
|
+
# piece of a Rigor-side type expression that the reducer
|
|
8
|
+
# ({HktReducer}) walks against a concrete argument list.
|
|
9
|
+
#
|
|
10
|
+
# Slice 2a ships a programmatic constructor surface only:
|
|
11
|
+
# plugin and Rigor-bundled overlay authors build a body
|
|
12
|
+
# tree by hand using these node types. The string-grammar
|
|
13
|
+
# parser that reads `Definition#body` (the raw String slot
|
|
14
|
+
# already populated by Slice 1's `HktDirectives.parse_define`)
|
|
15
|
+
# into a tree is Slice 2b's deliverable; until it ships, the
|
|
16
|
+
# `body` String stays opaque and `body_tree` is the
|
|
17
|
+
# evaluable form.
|
|
18
|
+
#
|
|
19
|
+
# The five node types cover the JSON.parse and dry-monads
|
|
20
|
+
# use cases ADR-20 § Implementation slicing names as
|
|
21
|
+
# near-term adopters:
|
|
22
|
+
#
|
|
23
|
+
# - {TypeLeaf} — wraps a fully-built `Rigor::Type`
|
|
24
|
+
# (use for atoms like `nil`, `Constant<true>`,
|
|
25
|
+
# `Nominal[Integer]`).
|
|
26
|
+
# - {Param} — reference to a formal parameter
|
|
27
|
+
# declared in the enclosing `Definition#params` list
|
|
28
|
+
# (e.g. `K` in `json::value[K]`). The reducer
|
|
29
|
+
# substitutes from the application's `args`.
|
|
30
|
+
# - {AppRef} — abstract HKT application; the reducer
|
|
31
|
+
# resolves it via the registry, or returns the `App`
|
|
32
|
+
# carrier as-is when the reference is self-recursive
|
|
33
|
+
# (lazy "tying-the-knot" handling that lets recursive
|
|
34
|
+
# sums like `json::value` reduce without infinite
|
|
35
|
+
# expansion).
|
|
36
|
+
# - {Union} — N-ary union of arms.
|
|
37
|
+
# - {NominalApp} — parameterised nominal class
|
|
38
|
+
# (`Array[X]`, `Hash[K, V]`) whose type args are
|
|
39
|
+
# themselves body nodes.
|
|
40
|
+
#
|
|
41
|
+
# Every node is a frozen `Data.define` value; structural
|
|
42
|
+
# equality is by-field.
|
|
43
|
+
module HktBody
|
|
44
|
+
# Wraps a pre-built `Rigor::Type` value. Use for atoms
|
|
45
|
+
# that need no substitution (e.g. `Nominal[Integer]`,
|
|
46
|
+
# `Constant<nil>`).
|
|
47
|
+
TypeLeaf = Data.define(:type) do
|
|
48
|
+
def initialize(type:)
|
|
49
|
+
raise ArgumentError, "type must not be nil" if type.nil?
|
|
50
|
+
|
|
51
|
+
super
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
# Reference to a formal parameter the enclosing
|
|
56
|
+
# `Definition#params` declared. The reducer substitutes
|
|
57
|
+
# this node with the matching positional arg from the
|
|
58
|
+
# `App` being reduced; an unknown name raises during
|
|
59
|
+
# reduction (the parser, when it ships, MUST reject
|
|
60
|
+
# unknown names earlier).
|
|
61
|
+
Param = Data.define(:name) do
|
|
62
|
+
def initialize(name:)
|
|
63
|
+
raise ArgumentError, "name must be a Symbol, got #{name.class}" unless name.is_a?(Symbol)
|
|
64
|
+
|
|
65
|
+
super
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
# Abstract HKT application — the reducer's primary
|
|
70
|
+
# recursion point. `uri` is a namespaced Symbol
|
|
71
|
+
# matching some `Registration` in the registry; `args`
|
|
72
|
+
# is an Array of body nodes (each gets substituted /
|
|
73
|
+
# resolved before being used).
|
|
74
|
+
AppRef = Data.define(:uri, :args) do
|
|
75
|
+
def initialize(uri:, args:)
|
|
76
|
+
raise ArgumentError, "uri must be a Symbol, got #{uri.class}" unless uri.is_a?(Symbol)
|
|
77
|
+
raise ArgumentError, "uri must be namespaced as `:a::b`, got #{uri.inspect}" unless uri.to_s.include?("::")
|
|
78
|
+
raise ArgumentError, "args must be an Array, got #{args.class}" unless args.is_a?(Array)
|
|
79
|
+
raise ArgumentError, "args must be non-empty" if args.empty?
|
|
80
|
+
|
|
81
|
+
super(uri: uri, args: args.dup.freeze)
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
|
|
85
|
+
# N-ary union. The reducer builds the result through
|
|
86
|
+
# `Type::Combinator.union(*reduced_arms)` so
|
|
87
|
+
# normalization (flattening, dedup, Bot drop) applies.
|
|
88
|
+
Union = Data.define(:arms) do
|
|
89
|
+
def initialize(arms:)
|
|
90
|
+
raise ArgumentError, "arms must be an Array, got #{arms.class}" unless arms.is_a?(Array)
|
|
91
|
+
raise ArgumentError, "arms must be non-empty" if arms.empty?
|
|
92
|
+
|
|
93
|
+
super(arms: arms.dup.freeze)
|
|
94
|
+
end
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
# Parameterised nominal class. `class_name` is the
|
|
98
|
+
# Ruby class name (`"Array"`, `"Hash"`); `args` is an
|
|
99
|
+
# Array of body nodes for the type arguments. The
|
|
100
|
+
# reducer builds the result through
|
|
101
|
+
# `Type::Combinator.nominal_of(class_name, type_args:
|
|
102
|
+
# reduced_args)`.
|
|
103
|
+
NominalApp = Data.define(:class_name, :args) do
|
|
104
|
+
def initialize(class_name:, args:)
|
|
105
|
+
unless class_name.is_a?(String) && !class_name.empty?
|
|
106
|
+
raise ArgumentError, "class_name must be a non-empty String, got #{class_name.inspect}"
|
|
107
|
+
end
|
|
108
|
+
raise ArgumentError, "args must be an Array, got #{args.class}" unless args.is_a?(Array)
|
|
109
|
+
raise ArgumentError, "args must be non-empty (use TypeLeaf with Nominal for raw class refs)" if args.empty?
|
|
110
|
+
|
|
111
|
+
super(class_name: class_name, args: args.dup.freeze)
|
|
112
|
+
end
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
# ADR-20 § D3 — conditional type form. `test` is a
|
|
116
|
+
# {TestSubtype} / {TestEquality} / {TestMembership}
|
|
117
|
+
# value object the reducer evaluates against the
|
|
118
|
+
# current bindings; `then_branch` / `else_branch` are
|
|
119
|
+
# body nodes. The reducer's trinary handling:
|
|
120
|
+
#
|
|
121
|
+
# - test = `yes` → return the reduced `then_branch`.
|
|
122
|
+
# - test = `no` → return the reduced `else_branch`.
|
|
123
|
+
# - test = `maybe` → widen to the union of both
|
|
124
|
+
# reduced branches (per ADR-20 WD7 / robustness
|
|
125
|
+
# principle).
|
|
126
|
+
Conditional = Data.define(:test, :then_branch, :else_branch) do
|
|
127
|
+
def initialize(test:, then_branch:, else_branch:)
|
|
128
|
+
raise ArgumentError, "test must not be nil" if test.nil?
|
|
129
|
+
raise ArgumentError, "then_branch must not be nil" if then_branch.nil?
|
|
130
|
+
raise ArgumentError, "else_branch must not be nil" if else_branch.nil?
|
|
131
|
+
|
|
132
|
+
super
|
|
133
|
+
end
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
# `left <: right` — subtype check. `left` is typically
|
|
137
|
+
# a {Param} reference; `right` is any body expression.
|
|
138
|
+
TestSubtype = Data.define(:left, :right) do
|
|
139
|
+
def initialize(left:, right:)
|
|
140
|
+
raise ArgumentError, "left/right must not be nil" if left.nil? || right.nil?
|
|
141
|
+
|
|
142
|
+
super
|
|
143
|
+
end
|
|
144
|
+
end
|
|
145
|
+
|
|
146
|
+
# `left == right` — structural equality. Useful for
|
|
147
|
+
# discriminating against literal constants
|
|
148
|
+
# (`E == :symbol`).
|
|
149
|
+
TestEquality = Data.define(:left, :right) do
|
|
150
|
+
def initialize(left:, right:)
|
|
151
|
+
raise ArgumentError, "left/right must not be nil" if left.nil? || right.nil?
|
|
152
|
+
|
|
153
|
+
super
|
|
154
|
+
end
|
|
155
|
+
end
|
|
156
|
+
|
|
157
|
+
# `left in [opt1, opt2, ...]` — set membership. Each
|
|
158
|
+
# `option` is a body node; the test passes iff `left`
|
|
159
|
+
# is structurally equal to any of the options.
|
|
160
|
+
TestMembership = Data.define(:left, :options) do
|
|
161
|
+
def initialize(left:, options:)
|
|
162
|
+
raise ArgumentError, "left must not be nil" if left.nil?
|
|
163
|
+
raise ArgumentError, "options must be an Array, got #{options.class}" unless options.is_a?(Array)
|
|
164
|
+
raise ArgumentError, "options must be non-empty" if options.empty?
|
|
165
|
+
|
|
166
|
+
super(left: left, options: options.dup.freeze)
|
|
167
|
+
end
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
end
|
|
171
|
+
end
|
|
@@ -0,0 +1,363 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "hkt_body"
|
|
4
|
+
require_relative "../type"
|
|
5
|
+
|
|
6
|
+
module Rigor
|
|
7
|
+
module Inference
|
|
8
|
+
# ADR-20 slice 2b — parses the body of an
|
|
9
|
+
# `HktRegistry::Definition` (a `String`, as populated by
|
|
10
|
+
# Slice 1's `HktDirectives.parse_define` from
|
|
11
|
+
# `%a{rigor:v1:hkt_define}` payloads) into the `HktBody`
|
|
12
|
+
# node tree the Slice 2a reducer evaluates against.
|
|
13
|
+
#
|
|
14
|
+
# The minimum-viable grammar covered here is the
|
|
15
|
+
# union-of-atoms-and-parameterised-forms subset of ADR-20
|
|
16
|
+
# § D3 — sufficient for `JSON.parse`'s `json::value`
|
|
17
|
+
# recursive sum and for any other recursive-data-shape
|
|
18
|
+
# signatures (Lisp value trees, dry-types refinements
|
|
19
|
+
# without conditionals). The conditional / indexed-access
|
|
20
|
+
# forms (`E <: T ? A : B`, `E in [k1, k2]`) drafted in D3
|
|
21
|
+
# remain a follow-up slice — bodies that contain `?`
|
|
22
|
+
# raise `ParseError` and the calling directive parser
|
|
23
|
+
# drops the body_tree (the body String remains stored and
|
|
24
|
+
# the reducer falls back to `app.bound`).
|
|
25
|
+
#
|
|
26
|
+
# ## Grammar (slice 2b)
|
|
27
|
+
#
|
|
28
|
+
# body := union
|
|
29
|
+
# union := type_expr ("|" type_expr)*
|
|
30
|
+
# type_expr := atom | nominal_app | app_ref | param
|
|
31
|
+
# atom := "nil" | "true" | "false" | "bool" | "untyped"
|
|
32
|
+
# param := UCNAME (when UCNAME ∈ params)
|
|
33
|
+
# nominal_app := class_name ("[" type_expr ("," type_expr)* "]")?
|
|
34
|
+
# class_name := "::"? UCNAME ("::" UCNAME)*
|
|
35
|
+
# app_ref := "App" "[" uri "," type_expr ("," type_expr)* "]"
|
|
36
|
+
# uri := IDENT ("::" IDENT)+
|
|
37
|
+
# UCNAME := /[A-Z]\w*/
|
|
38
|
+
# IDENT := /[a-z_]\w*/
|
|
39
|
+
#
|
|
40
|
+
# ## Disambiguation
|
|
41
|
+
#
|
|
42
|
+
# The same syntactic UCNAME spells both a parameter
|
|
43
|
+
# reference (`K` when `params = [:K]`) and a nominal class
|
|
44
|
+
# name (`Integer`). The parser resolves by checking the
|
|
45
|
+
# `params` set passed to {.parse}; an unknown UCNAME is
|
|
46
|
+
# treated as a nominal class name. `App` is reserved at
|
|
47
|
+
# the head position of an `App[...]` form; using `App` as
|
|
48
|
+
# a class name is therefore not supported.
|
|
49
|
+
#
|
|
50
|
+
# Atoms are kept verbatim as `HktBody::TypeLeaf` nodes
|
|
51
|
+
# wrapping the appropriate `Rigor::Type::*` carrier:
|
|
52
|
+
# `nil` / `true` / `false` produce `Constant` carriers;
|
|
53
|
+
# `bool` produces the `Constant<true> | Constant<false>`
|
|
54
|
+
# union; `untyped` produces `Combinator.untyped`
|
|
55
|
+
# (i.e. `Dynamic[Top]`). Nominal class names produce raw
|
|
56
|
+
# `Type::Nominal` carriers (no `name_scope` resolution at
|
|
57
|
+
# this slice — the reducer trusts the name verbatim).
|
|
58
|
+
module HktBodyParser
|
|
59
|
+
class ParseError < StandardError; end
|
|
60
|
+
|
|
61
|
+
module_function
|
|
62
|
+
|
|
63
|
+
def parse(string, params:)
|
|
64
|
+
raise ArgumentError, "string must be a String, got #{string.class}" unless string.is_a?(String)
|
|
65
|
+
raise ArgumentError, "params must be an Array, got #{params.class}" unless params.is_a?(Array)
|
|
66
|
+
|
|
67
|
+
params_set = params.to_h { |p| [p.to_sym, true] }
|
|
68
|
+
tokens = Tokenizer.new(string).tokenize
|
|
69
|
+
parser = Parser.new(tokens, params_set)
|
|
70
|
+
result = parser.parse_union
|
|
71
|
+
parser.expect_eof!
|
|
72
|
+
result
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
Token = Data.define(:kind, :value, :pos)
|
|
76
|
+
|
|
77
|
+
class Tokenizer
|
|
78
|
+
SCANNER_REGEX = /
|
|
79
|
+
\G
|
|
80
|
+
(?:
|
|
81
|
+
(?<ws>\s+)
|
|
82
|
+
| (?<lb>\[)
|
|
83
|
+
| (?<rb>\])
|
|
84
|
+
| (?<lparen>\()
|
|
85
|
+
| (?<rparen>\))
|
|
86
|
+
| (?<comma>,)
|
|
87
|
+
| (?<pipe>\|)
|
|
88
|
+
| (?<sub><:)
|
|
89
|
+
| (?<eq>==)
|
|
90
|
+
| (?<sep>::)
|
|
91
|
+
| (?<colon>:(?!:))
|
|
92
|
+
| (?<question>\?)
|
|
93
|
+
| (?<ident>[a-z_][a-zA-Z0-9_]*)
|
|
94
|
+
| (?<ucname>[A-Z][a-zA-Z0-9_]*)
|
|
95
|
+
)
|
|
96
|
+
/x
|
|
97
|
+
|
|
98
|
+
def initialize(string)
|
|
99
|
+
@string = string
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
TOKEN_KINDS = SCANNER_REGEX.named_captures.keys.freeze
|
|
103
|
+
private_constant :TOKEN_KINDS
|
|
104
|
+
|
|
105
|
+
def tokenize
|
|
106
|
+
tokens = []
|
|
107
|
+
pos = 0
|
|
108
|
+
while pos < @string.size
|
|
109
|
+
match = SCANNER_REGEX.match(@string, pos)
|
|
110
|
+
raise ParseError, "unexpected character at position #{pos}: #{@string[pos].inspect}" if match.nil?
|
|
111
|
+
|
|
112
|
+
kind = TOKEN_KINDS.find { |k| match[k] }
|
|
113
|
+
raise ParseError, "internal tokenizer error at position #{pos}" if kind.nil?
|
|
114
|
+
|
|
115
|
+
value = match[kind.to_sym]
|
|
116
|
+
raise ParseError, "internal tokenizer error: no match for #{kind}" if value.nil?
|
|
117
|
+
|
|
118
|
+
pos += value.size
|
|
119
|
+
next if kind == "ws"
|
|
120
|
+
|
|
121
|
+
tokens << Token.new(kind: kind.to_sym, value: value, pos: pos - value.size)
|
|
122
|
+
end
|
|
123
|
+
tokens
|
|
124
|
+
end
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
class Parser
|
|
128
|
+
def initialize(tokens, params_set)
|
|
129
|
+
@tokens = tokens
|
|
130
|
+
@pos = 0
|
|
131
|
+
@params_set = params_set
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
def parse_union
|
|
135
|
+
arms = [parse_type_expr]
|
|
136
|
+
while peek_kind == :pipe
|
|
137
|
+
consume
|
|
138
|
+
arms << parse_type_expr
|
|
139
|
+
end
|
|
140
|
+
return arms.first if arms.size == 1
|
|
141
|
+
|
|
142
|
+
HktBody::Union.new(arms: arms)
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def parse_type_expr
|
|
146
|
+
tok = peek
|
|
147
|
+
raise ParseError, "unexpected end of input; expected type expression" if tok.nil?
|
|
148
|
+
|
|
149
|
+
case tok.kind
|
|
150
|
+
when :lparen then parse_conditional
|
|
151
|
+
when :ident then parse_lowercase_atom
|
|
152
|
+
when :ucname then parse_ucname_form
|
|
153
|
+
when :sep then parse_classname_with_leading_sep
|
|
154
|
+
else
|
|
155
|
+
raise ParseError, "unexpected token #{tok.kind} (#{tok.value.inspect}) at position #{tok.pos}"
|
|
156
|
+
end
|
|
157
|
+
end
|
|
158
|
+
|
|
159
|
+
# ADR-20 § D3 conditional parser. Grammar:
|
|
160
|
+
#
|
|
161
|
+
# conditional := "(" test "?" union ":" union ")"
|
|
162
|
+
# test := type_expr ("<:" | "==") type_expr
|
|
163
|
+
#
|
|
164
|
+
# Parens delimit a conditional unambiguously — bare
|
|
165
|
+
# `(type_expr)` grouping is not supported at this slice
|
|
166
|
+
# (no use case yet). Branches can be unions; test sides
|
|
167
|
+
# are single arms (wrap in `App[my_union, ...]` if you
|
|
168
|
+
# need a union there). `in [opt1, opt2]` membership
|
|
169
|
+
# tests are programmatically supported via
|
|
170
|
+
# `HktBody::TestMembership` but the parser does not yet
|
|
171
|
+
# recognise the `in` keyword (no concrete demand yet).
|
|
172
|
+
def parse_conditional
|
|
173
|
+
expect!(:lparen)
|
|
174
|
+
test = parse_test
|
|
175
|
+
expect!(:question)
|
|
176
|
+
then_branch = parse_union
|
|
177
|
+
expect!(:colon)
|
|
178
|
+
else_branch = parse_union
|
|
179
|
+
expect!(:rparen)
|
|
180
|
+
HktBody::Conditional.new(test: test, then_branch: then_branch, else_branch: else_branch)
|
|
181
|
+
end
|
|
182
|
+
|
|
183
|
+
def parse_test
|
|
184
|
+
left = parse_type_expr
|
|
185
|
+
op = peek
|
|
186
|
+
case op&.kind
|
|
187
|
+
when :sub
|
|
188
|
+
consume
|
|
189
|
+
HktBody::TestSubtype.new(left: left, right: parse_type_expr)
|
|
190
|
+
when :eq
|
|
191
|
+
consume
|
|
192
|
+
HktBody::TestEquality.new(left: left, right: parse_type_expr)
|
|
193
|
+
when :ident
|
|
194
|
+
parse_in_membership(left, op_token: op)
|
|
195
|
+
else
|
|
196
|
+
actual = op.nil? ? "end of input" : "#{op.kind} (#{op.value.inspect})"
|
|
197
|
+
raise ParseError, "expected `<:`, `==`, or `in` in conditional test, got #{actual}"
|
|
198
|
+
end
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
# `left in [opt1, opt2, ...]` membership test.
|
|
202
|
+
# Distinguished from a lowercase atom by the
|
|
203
|
+
# subsequent `[` — the only place an identifier
|
|
204
|
+
# `in` is permitted at this position is membership
|
|
205
|
+
# syntax.
|
|
206
|
+
def parse_in_membership(left, op_token:)
|
|
207
|
+
unless op_token.value == "in"
|
|
208
|
+
raise ParseError,
|
|
209
|
+
"expected `<:`, `==`, or `in` in conditional test, got ident (#{op_token.value.inspect})"
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
consume # in
|
|
213
|
+
expect!(:lb)
|
|
214
|
+
options = [parse_type_expr]
|
|
215
|
+
while peek_kind == :comma
|
|
216
|
+
consume
|
|
217
|
+
options << parse_type_expr
|
|
218
|
+
end
|
|
219
|
+
expect!(:rb)
|
|
220
|
+
HktBody::TestMembership.new(left: left, options: options)
|
|
221
|
+
end
|
|
222
|
+
|
|
223
|
+
def parse_lowercase_atom
|
|
224
|
+
tok = consume
|
|
225
|
+
type = case tok.value
|
|
226
|
+
when "nil" then Type::Constant.new(nil)
|
|
227
|
+
when "true" then Type::Constant.new(true)
|
|
228
|
+
when "false" then Type::Constant.new(false)
|
|
229
|
+
when "bool" then Type::Combinator.union(Type::Constant.new(true), Type::Constant.new(false))
|
|
230
|
+
when "untyped" then Type::Combinator.untyped
|
|
231
|
+
else raise ParseError, "unknown atom #{tok.value.inspect} at position #{tok.pos}"
|
|
232
|
+
end
|
|
233
|
+
HktBody::TypeLeaf.new(type: type)
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
def parse_ucname_form
|
|
237
|
+
tok = peek
|
|
238
|
+
return parse_app_ref if tok.value == "App"
|
|
239
|
+
|
|
240
|
+
if @params_set.key?(tok.value.to_sym) && !class_continuation?
|
|
241
|
+
consume
|
|
242
|
+
return HktBody::Param.new(name: tok.value.to_sym)
|
|
243
|
+
end
|
|
244
|
+
|
|
245
|
+
parse_nominal_or_param_with_args
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Returns true when the current UCName is followed by
|
|
249
|
+
# `::` (qualified class name continuation) or `[`
|
|
250
|
+
# (parameterised application). In either case the
|
|
251
|
+
# token is a nominal, not a param ref — Slice 2b's
|
|
252
|
+
# `Param` nodes are always single bare identifiers.
|
|
253
|
+
def class_continuation?
|
|
254
|
+
next_tok = @tokens[@pos + 1]
|
|
255
|
+
next_tok && %i[sep lb].include?(next_tok.kind)
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def parse_nominal_or_param_with_args
|
|
259
|
+
class_name = parse_class_name
|
|
260
|
+
if peek_kind == :lb
|
|
261
|
+
consume
|
|
262
|
+
args = parse_arg_list
|
|
263
|
+
expect!(:rb)
|
|
264
|
+
HktBody::NominalApp.new(class_name: class_name, args: args)
|
|
265
|
+
else
|
|
266
|
+
HktBody::TypeLeaf.new(type: Type::Nominal.new(class_name))
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
|
|
270
|
+
def parse_classname_with_leading_sep
|
|
271
|
+
# The leading "::" form (`::Foo::Bar`). Consume the
|
|
272
|
+
# separator so the rest threads through parse_class_name.
|
|
273
|
+
consume
|
|
274
|
+
tok = peek
|
|
275
|
+
raise ParseError, "expected class name after `::`" if tok.nil? || tok.kind != :ucname
|
|
276
|
+
|
|
277
|
+
parse_nominal_or_param_with_args
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
def parse_class_name
|
|
281
|
+
parts = [expect!(:ucname).value]
|
|
282
|
+
while peek_kind == :sep && @tokens[@pos + 1]&.kind == :ucname
|
|
283
|
+
consume # ::
|
|
284
|
+
parts << expect!(:ucname).value
|
|
285
|
+
end
|
|
286
|
+
parts.join("::")
|
|
287
|
+
end
|
|
288
|
+
|
|
289
|
+
def parse_app_ref
|
|
290
|
+
tok = consume
|
|
291
|
+
raise ParseError, "expected `App[...]`, got #{tok.value.inspect}" unless tok.value == "App"
|
|
292
|
+
|
|
293
|
+
expect!(:lb)
|
|
294
|
+
uri = parse_uri
|
|
295
|
+
expect!(:comma)
|
|
296
|
+
args = parse_arg_list
|
|
297
|
+
expect!(:rb)
|
|
298
|
+
HktBody::AppRef.new(uri: uri, args: args)
|
|
299
|
+
end
|
|
300
|
+
|
|
301
|
+
def parse_uri
|
|
302
|
+
parts = [expect!(:ident).value]
|
|
303
|
+
while peek_kind == :sep
|
|
304
|
+
consume
|
|
305
|
+
parts << expect!(:ident).value
|
|
306
|
+
end
|
|
307
|
+
raise ParseError, "uri must be namespaced (`a::b`), got #{parts.first.inspect}" if parts.size < 2
|
|
308
|
+
|
|
309
|
+
parts.join("::").to_sym
|
|
310
|
+
end
|
|
311
|
+
|
|
312
|
+
# Arg list for `Foo[A, B, C]` and `App[uri, A, B]`
|
|
313
|
+
# forms. Each arg is parsed as a union so per-arg
|
|
314
|
+
# `A | B` forms work (`Array[K | nil]`); the COMMA
|
|
315
|
+
# at the top level still separates args, so
|
|
316
|
+
# `Hash[K, V]` reads as two args (each a single-arm
|
|
317
|
+
# union that collapses to the arm) rather than one
|
|
318
|
+
# union of two.
|
|
319
|
+
def parse_arg_list
|
|
320
|
+
args = [parse_union]
|
|
321
|
+
while peek_kind == :comma
|
|
322
|
+
consume
|
|
323
|
+
args << parse_union
|
|
324
|
+
end
|
|
325
|
+
args
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def expect_eof!
|
|
329
|
+
return if @pos >= @tokens.size
|
|
330
|
+
|
|
331
|
+
tok = @tokens[@pos]
|
|
332
|
+
raise ParseError, "expected end of input, got #{tok.kind} (#{tok.value.inspect}) at position #{tok.pos}"
|
|
333
|
+
end
|
|
334
|
+
|
|
335
|
+
private
|
|
336
|
+
|
|
337
|
+
def peek
|
|
338
|
+
@tokens[@pos]
|
|
339
|
+
end
|
|
340
|
+
|
|
341
|
+
def peek_kind
|
|
342
|
+
@tokens[@pos]&.kind
|
|
343
|
+
end
|
|
344
|
+
|
|
345
|
+
def consume
|
|
346
|
+
tok = @tokens[@pos]
|
|
347
|
+
@pos += 1
|
|
348
|
+
tok
|
|
349
|
+
end
|
|
350
|
+
|
|
351
|
+
def expect!(kind)
|
|
352
|
+
tok = @tokens[@pos]
|
|
353
|
+
if tok.nil? || tok.kind != kind
|
|
354
|
+
actual = tok.nil? ? "end of input" : "#{tok.kind} (#{tok.value.inspect})"
|
|
355
|
+
raise ParseError, "expected #{kind}, got #{actual}"
|
|
356
|
+
end
|
|
357
|
+
@pos += 1
|
|
358
|
+
tok
|
|
359
|
+
end
|
|
360
|
+
end
|
|
361
|
+
end
|
|
362
|
+
end
|
|
363
|
+
end
|