kumi-parser 0.0.33 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +41 -0
- data/CHANGELOG.md +64 -0
- data/CLAUDE.md +59 -120
- data/README.md +28 -6
- data/examples/parse_and_inspect.rb +34 -0
- data/kumi-parser.gemspec +3 -4
- data/lib/kumi/parser/grammar.rb +120 -0
- data/lib/kumi/parser/lexer.rb +232 -0
- data/lib/kumi/parser/parse_error.rb +52 -0
- data/lib/kumi/parser/parser.rb +692 -0
- data/lib/kumi/parser/source.rb +76 -0
- data/lib/kumi/parser/text_parser.rb +37 -27
- data/lib/kumi/parser/token.rb +10 -71
- data/lib/kumi/parser/version.rb +1 -1
- data/lib/kumi-parser.rb +9 -10
- metadata +16 -37
- data/examples/debug_text_parser.rb +0 -41
- data/examples/debug_transform_rule.rb +0 -26
- data/examples/text_parser_comprehensive_test.rb +0 -333
- data/examples/text_parser_test_with_comments.rb +0 -146
- data/lib/kumi/parser/base.rb +0 -51
- data/lib/kumi/parser/direct_parser.rb +0 -698
- data/lib/kumi/parser/error_extractor.rb +0 -89
- data/lib/kumi/parser/errors.rb +0 -40
- data/lib/kumi/parser/helpers.rb +0 -154
- data/lib/kumi/parser/smart_tokenizer.rb +0 -373
- data/lib/kumi/parser/syntax_validator.rb +0 -21
- data/lib/kumi/parser/text_parser/api.rb +0 -60
- data/lib/kumi/parser/token_constants.rb +0 -468
- data/lib/kumi/text_parser.rb +0 -40
- data/lib/kumi/text_schema.rb +0 -31
|
@@ -0,0 +1,692 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module Kumi
|
|
4
|
+
module Parser
|
|
5
|
+
# Recursive-descent parser for declarations and a Pratt parser for
|
|
6
|
+
# expressions, producing kumi-core's Kumi::Syntax::* AST directly.
|
|
7
|
+
#
|
|
8
|
+
# The parser is the boundary of the parse phase: it reports *shape* errors
|
|
9
|
+
# (a missing `end`, an unexpected token, a malformed hash pair) with exact
|
|
10
|
+
# locations, and it does not attempt to resolve names, check types, or know
|
|
11
|
+
# anything about axes — those are semantic concerns owned by the analyzer.
|
|
12
|
+
class Parser
|
|
13
|
+
include Syntax = Kumi::Syntax
|
|
14
|
+
|
|
15
|
+
def initialize(tokens, source)
|
|
16
|
+
@tokens = tokens
|
|
17
|
+
@source = source
|
|
18
|
+
@pos = 0
|
|
19
|
+
@imported_names = Set.new
|
|
20
|
+
end
|
|
21
|
+
|
|
22
|
+
def parse
|
|
23
|
+
skip_separators
|
|
24
|
+
imports = parse_imports
|
|
25
|
+
@imported_names.merge(imports.flat_map(&:names))
|
|
26
|
+
|
|
27
|
+
root = parse_schema(imports)
|
|
28
|
+
|
|
29
|
+
skip_separators
|
|
30
|
+
expect(:eof, 'end of input')
|
|
31
|
+
root
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
private
|
|
35
|
+
|
|
36
|
+
# ---- cursor --------------------------------------------------------
|
|
37
|
+
|
|
38
|
+
def current
|
|
39
|
+
@tokens[@pos]
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def peek(offset = 1)
|
|
43
|
+
@tokens[@pos + offset] || @tokens.last
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
def at?(*kinds)
|
|
47
|
+
kinds.include?(current.kind)
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
def advance
|
|
51
|
+
token = current
|
|
52
|
+
@pos += 1 unless current.kind == :eof
|
|
53
|
+
token
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Consume a token of the expected kind, or raise a clear parse error
|
|
57
|
+
# naming what was expected and what was found.
|
|
58
|
+
def expect(kind, description = nil)
|
|
59
|
+
return advance if current.kind == kind
|
|
60
|
+
|
|
61
|
+
want = description || describe_kind(kind)
|
|
62
|
+
error("expected #{want}, but found #{describe_token(current)}")
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
def skip_separators
|
|
66
|
+
@pos += 1 while at?(:newline, :comment)
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
def loc(token)
|
|
70
|
+
@source.location(token.offset)
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
# ---- schema --------------------------------------------------------
|
|
74
|
+
|
|
75
|
+
def parse_schema(root_imports)
|
|
76
|
+
schema_token = expect(:schema, '`schema do`')
|
|
77
|
+
expect(:do, '`do` after `schema`')
|
|
78
|
+
skip_separators
|
|
79
|
+
|
|
80
|
+
inner_imports = parse_imports
|
|
81
|
+
@imported_names.merge(inner_imports.flat_map(&:names))
|
|
82
|
+
hints = parse_codegen_directives
|
|
83
|
+
|
|
84
|
+
inputs = parse_input_block
|
|
85
|
+
values, traits = parse_declarations
|
|
86
|
+
|
|
87
|
+
expect(:end, '`end` to close the schema')
|
|
88
|
+
|
|
89
|
+
Syntax::Root.new(
|
|
90
|
+
inputs,
|
|
91
|
+
values,
|
|
92
|
+
traits,
|
|
93
|
+
root_imports + inner_imports,
|
|
94
|
+
hints: hints,
|
|
95
|
+
loc: loc(schema_token)
|
|
96
|
+
)
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
def parse_declarations
|
|
100
|
+
values = []
|
|
101
|
+
traits = []
|
|
102
|
+
skip_separators
|
|
103
|
+
while at?(:value, :let, :trait)
|
|
104
|
+
case current.kind
|
|
105
|
+
when :value then values << parse_value(inline: false)
|
|
106
|
+
when :let then values << parse_value(inline: true)
|
|
107
|
+
when :trait then traits << parse_trait
|
|
108
|
+
end
|
|
109
|
+
skip_separators
|
|
110
|
+
end
|
|
111
|
+
[values, traits]
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
# ---- imports -------------------------------------------------------
|
|
115
|
+
|
|
116
|
+
def parse_imports
|
|
117
|
+
imports = []
|
|
118
|
+
skip_separators
|
|
119
|
+
while at?(:import)
|
|
120
|
+
imports << parse_import
|
|
121
|
+
skip_separators
|
|
122
|
+
end
|
|
123
|
+
imports
|
|
124
|
+
end
|
|
125
|
+
|
|
126
|
+
def parse_import
|
|
127
|
+
import_token = advance
|
|
128
|
+
names = [expect(:symbol, 'a `:name` to import').value]
|
|
129
|
+
|
|
130
|
+
while at?(:comma)
|
|
131
|
+
advance
|
|
132
|
+
skip_separators
|
|
133
|
+
break if from_label?
|
|
134
|
+
|
|
135
|
+
names << expect(:symbol, 'a `:name` to import').value
|
|
136
|
+
end
|
|
137
|
+
|
|
138
|
+
skip_separators
|
|
139
|
+
error('expected `from:` to name the module the import comes from') unless from_label?
|
|
140
|
+
advance # consume `from:`
|
|
141
|
+
skip_separators
|
|
142
|
+
|
|
143
|
+
module_ref = parse_constant_path
|
|
144
|
+
Syntax::ImportDeclaration.new(names, module_ref, loc: loc(import_token))
|
|
145
|
+
end
|
|
146
|
+
|
|
147
|
+
def from_label?
|
|
148
|
+
current.kind == :label && current.value == 'from'
|
|
149
|
+
end
|
|
150
|
+
|
|
151
|
+
def parse_constant_path
|
|
152
|
+
token = expect(:constant, 'a module name like `Foo::Bar`')
|
|
153
|
+
token.value
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
# ---- codegen directives -------------------------------------------
|
|
157
|
+
|
|
158
|
+
def parse_codegen_directives
|
|
159
|
+
hints = {}
|
|
160
|
+
skip_separators
|
|
161
|
+
while at?(:codegen)
|
|
162
|
+
advance
|
|
163
|
+
opts = parse_codegen_options
|
|
164
|
+
hints[:codegen] = (hints[:codegen] || {}).merge(opts)
|
|
165
|
+
skip_separators
|
|
166
|
+
end
|
|
167
|
+
hints
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
def parse_codegen_options
|
|
171
|
+
opts = {}
|
|
172
|
+
loop do
|
|
173
|
+
key = expect(:label, 'a codegen option like `streaming:`')
|
|
174
|
+
error("Unknown codegen option '#{key.value}'", at: key) unless key.value == 'streaming'
|
|
175
|
+
value = expect(:boolean, '`true` or `false`')
|
|
176
|
+
opts[:streaming] = value.value
|
|
177
|
+
break unless at?(:comma)
|
|
178
|
+
|
|
179
|
+
advance
|
|
180
|
+
skip_separators
|
|
181
|
+
end
|
|
182
|
+
opts
|
|
183
|
+
end
|
|
184
|
+
|
|
185
|
+
# ---- input block ---------------------------------------------------
|
|
186
|
+
|
|
187
|
+
def parse_input_block
|
|
188
|
+
expect(:input, 'an `input do` block')
|
|
189
|
+
expect(:do, '`do` after `input`')
|
|
190
|
+
declarations = []
|
|
191
|
+
skip_separators
|
|
192
|
+
while at?(:type_keyword)
|
|
193
|
+
declarations << parse_input_declaration
|
|
194
|
+
skip_separators
|
|
195
|
+
end
|
|
196
|
+
expect(:end, '`end` to close the `input` block')
|
|
197
|
+
declarations
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
def parse_input_declaration
|
|
201
|
+
type_token = expect(:type_keyword)
|
|
202
|
+
type = type_token.value
|
|
203
|
+
name = expect(:symbol, "a `:name` for the #{type} input").value
|
|
204
|
+
|
|
205
|
+
domain, index_name = parse_input_options(type)
|
|
206
|
+
|
|
207
|
+
children = parse_input_children(type)
|
|
208
|
+
|
|
209
|
+
Syntax::InputDeclaration.new(name, domain, type, children, index_name, loc: loc(type_token))
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
# Optional ", domain: …" and ", index: :sym" in any order.
|
|
213
|
+
def parse_input_options(type)
|
|
214
|
+
domain = nil
|
|
215
|
+
index_name = nil
|
|
216
|
+
|
|
217
|
+
while at?(:comma) && peek.kind == :label
|
|
218
|
+
advance # comma
|
|
219
|
+
key = advance # label
|
|
220
|
+
case key.value
|
|
221
|
+
when 'domain'
|
|
222
|
+
domain = parse_domain
|
|
223
|
+
when 'index'
|
|
224
|
+
error('`index:` is only valid on array declarations', at: key) unless type == :array
|
|
225
|
+
index_name = expect(:symbol, 'an index name like `:i`').value
|
|
226
|
+
else
|
|
227
|
+
error("unknown option `#{key.value}:` on an input declaration", at: key)
|
|
228
|
+
end
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
[domain, index_name]
|
|
232
|
+
end
|
|
233
|
+
|
|
234
|
+
def parse_input_children(type)
|
|
235
|
+
return [] unless Grammar::CONTAINER_TYPES.include?(type) && at?(:do)
|
|
236
|
+
|
|
237
|
+
advance # do
|
|
238
|
+
children = []
|
|
239
|
+
skip_separators
|
|
240
|
+
while at?(:type_keyword)
|
|
241
|
+
children << parse_input_declaration
|
|
242
|
+
skip_separators
|
|
243
|
+
end
|
|
244
|
+
expect(:end, '`end` to close the nested input block')
|
|
245
|
+
children
|
|
246
|
+
end
|
|
247
|
+
|
|
248
|
+
# Domains: a bracketed literal list `[a, b]` or a numeric range `lo..hi`.
|
|
249
|
+
def parse_domain
|
|
250
|
+
case current.kind
|
|
251
|
+
when :lbracket
|
|
252
|
+
array = parse_array_literal
|
|
253
|
+
array.elements.map { |e| e.is_a?(Syntax::Literal) ? e.value : e }
|
|
254
|
+
when :integer, :float
|
|
255
|
+
parse_range_domain
|
|
256
|
+
else
|
|
257
|
+
error('expected a domain: a list like `[1, 2]` or a range like `0..10`')
|
|
258
|
+
end
|
|
259
|
+
end
|
|
260
|
+
|
|
261
|
+
def parse_range_domain
|
|
262
|
+
lo = advance.value
|
|
263
|
+
case current.kind
|
|
264
|
+
when :dot_dot
|
|
265
|
+
advance
|
|
266
|
+
(lo..numeric_bound)
|
|
267
|
+
when :dot_dot_dot
|
|
268
|
+
advance
|
|
269
|
+
(lo...numeric_bound)
|
|
270
|
+
else
|
|
271
|
+
[lo]
|
|
272
|
+
end
|
|
273
|
+
end
|
|
274
|
+
|
|
275
|
+
def numeric_bound
|
|
276
|
+
token = expect(current.kind == :float ? :float : :integer, 'a number')
|
|
277
|
+
token.value
|
|
278
|
+
end
|
|
279
|
+
|
|
280
|
+
# ---- value / let / trait ------------------------------------------
|
|
281
|
+
|
|
282
|
+
def parse_value(inline:)
|
|
283
|
+
keyword = advance
|
|
284
|
+
name = expect(:symbol, "a `:name` for the #{inline ? 'let' : 'value'}").value
|
|
285
|
+
|
|
286
|
+
expression =
|
|
287
|
+
if at?(:do)
|
|
288
|
+
parse_cascade
|
|
289
|
+
else
|
|
290
|
+
expect(:comma, '`,` then an expression')
|
|
291
|
+
parse_expression
|
|
292
|
+
end
|
|
293
|
+
|
|
294
|
+
hints = inline ? { inline: true } : {}
|
|
295
|
+
Syntax::ValueDeclaration.new(name, expression, hints: hints, loc: loc(keyword))
|
|
296
|
+
end
|
|
297
|
+
|
|
298
|
+
def parse_trait
|
|
299
|
+
keyword = advance
|
|
300
|
+
name = expect(:symbol, 'a `:name` for the trait').value
|
|
301
|
+
expect(:comma, '`,` then a boolean expression')
|
|
302
|
+
expression = parse_expression
|
|
303
|
+
Syntax::TraitDeclaration.new(name, expression, loc: loc(keyword))
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
# ---- cascade -------------------------------------------------------
|
|
307
|
+
|
|
308
|
+
def parse_cascade
|
|
309
|
+
do_token = expect(:do)
|
|
310
|
+
cases = []
|
|
311
|
+
skip_separators
|
|
312
|
+
while at?(:on, :base)
|
|
313
|
+
cases << parse_case
|
|
314
|
+
skip_separators
|
|
315
|
+
end
|
|
316
|
+
expect(:end, '`end` to close the cascade')
|
|
317
|
+
Syntax::CascadeExpression.new(cases, loc: loc(do_token))
|
|
318
|
+
end
|
|
319
|
+
|
|
320
|
+
def parse_case
|
|
321
|
+
if at?(:on)
|
|
322
|
+
on_token = advance
|
|
323
|
+
exprs = [parse_expression]
|
|
324
|
+
while at?(:comma)
|
|
325
|
+
advance
|
|
326
|
+
exprs << parse_expression
|
|
327
|
+
end
|
|
328
|
+
result = exprs.pop
|
|
329
|
+
condition = build_case_condition(exprs, on_token)
|
|
330
|
+
Syntax::CaseExpression.new(condition, result, loc: loc(on_token))
|
|
331
|
+
else
|
|
332
|
+
base_token = advance
|
|
333
|
+
result = parse_expression
|
|
334
|
+
true_literal = Syntax::Literal.new(true, loc: loc(base_token))
|
|
335
|
+
Syntax::CaseExpression.new(true_literal, result, loc: loc(base_token))
|
|
336
|
+
end
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
# A single trait reference is wrapped in `cascade_and([ref])` so the
|
|
340
|
+
# condition is uniformly an all-of over traits; multiple conditions become
|
|
341
|
+
# `cascade_and([...])` directly.
|
|
342
|
+
def build_case_condition(conditions, on_token)
|
|
343
|
+
if conditions.length == 1
|
|
344
|
+
c = conditions.first
|
|
345
|
+
if c.is_a?(Syntax::DeclarationReference)
|
|
346
|
+
Syntax::CallExpression.new(:cascade_and, [c], loc: loc(on_token))
|
|
347
|
+
else
|
|
348
|
+
c
|
|
349
|
+
end
|
|
350
|
+
else
|
|
351
|
+
Syntax::CallExpression.new(:cascade_and, conditions, loc: loc(on_token))
|
|
352
|
+
end
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
# ---- expressions (Pratt) ------------------------------------------
|
|
356
|
+
|
|
357
|
+
def parse_expression(min_precedence = 0)
|
|
358
|
+
left = parse_postfix(parse_primary)
|
|
359
|
+
skip_separators
|
|
360
|
+
|
|
361
|
+
while Grammar.binary_operator?(current.kind) && Grammar.precedence(current.kind) >= min_precedence
|
|
362
|
+
op = advance
|
|
363
|
+
skip_separators
|
|
364
|
+
next_min = Grammar.right_associative?(op.kind) ? Grammar.precedence(op.kind) : Grammar.precedence(op.kind) + 1
|
|
365
|
+
right = parse_expression(next_min)
|
|
366
|
+
left = Syntax::CallExpression.new(Grammar.operator_fn(op.kind), [left, right], loc: loc(op))
|
|
367
|
+
left = parse_postfix(left)
|
|
368
|
+
skip_separators
|
|
369
|
+
end
|
|
370
|
+
|
|
371
|
+
left
|
|
372
|
+
end
|
|
373
|
+
|
|
374
|
+
# Trailing `[index]` accesses lower to `at(base, index)`.
|
|
375
|
+
def parse_postfix(base)
|
|
376
|
+
skip_separators
|
|
377
|
+
while at?(:lbracket)
|
|
378
|
+
advance
|
|
379
|
+
index = parse_expression
|
|
380
|
+
expect(:rbracket, '`]` to close the index')
|
|
381
|
+
base = Syntax::CallExpression.new(:at, [base, index], loc: base.loc)
|
|
382
|
+
skip_separators
|
|
383
|
+
end
|
|
384
|
+
base
|
|
385
|
+
end
|
|
386
|
+
|
|
387
|
+
def parse_primary
|
|
388
|
+
skip_separators
|
|
389
|
+
token = current
|
|
390
|
+
|
|
391
|
+
case token.kind
|
|
392
|
+
when :integer, :float, :string, :boolean, :symbol
|
|
393
|
+
advance
|
|
394
|
+
Syntax::Literal.new(token.value, loc: loc(token))
|
|
395
|
+
when :constant
|
|
396
|
+
advance
|
|
397
|
+
Syntax::Literal.new(resolve_constant(token), loc: loc(token))
|
|
398
|
+
when :function_sugar
|
|
399
|
+
parse_function_sugar
|
|
400
|
+
when :fn
|
|
401
|
+
parse_fn_call
|
|
402
|
+
when :input
|
|
403
|
+
parse_input_reference
|
|
404
|
+
when :identifier
|
|
405
|
+
parse_identifier_expression
|
|
406
|
+
when :lparen
|
|
407
|
+
advance
|
|
408
|
+
expr = parse_expression
|
|
409
|
+
expect(:rparen, '`)` to close the group')
|
|
410
|
+
expr
|
|
411
|
+
when :lbracket
|
|
412
|
+
parse_array_literal
|
|
413
|
+
when :lbrace
|
|
414
|
+
parse_hash_literal
|
|
415
|
+
when :subtract
|
|
416
|
+
parse_unary_minus
|
|
417
|
+
else
|
|
418
|
+
error("expected an expression, but found #{describe_token(token)}")
|
|
419
|
+
end
|
|
420
|
+
end
|
|
421
|
+
|
|
422
|
+
def parse_unary_minus
|
|
423
|
+
token = advance
|
|
424
|
+
operand = parse_postfix(parse_primary)
|
|
425
|
+
zero = Syntax::Literal.new(0, loc: loc(token))
|
|
426
|
+
Syntax::CallExpression.new(:subtract, [zero, operand], loc: loc(token))
|
|
427
|
+
end
|
|
428
|
+
|
|
429
|
+
def parse_identifier_expression
|
|
430
|
+
token = current
|
|
431
|
+
if token.value == 'input' && peek.kind == :dot
|
|
432
|
+
parse_input_reference
|
|
433
|
+
elsif peek.kind == :lparen
|
|
434
|
+
parse_named_call
|
|
435
|
+
else
|
|
436
|
+
advance
|
|
437
|
+
Syntax::DeclarationReference.new(token.value.to_sym, loc: loc(token))
|
|
438
|
+
end
|
|
439
|
+
end
|
|
440
|
+
|
|
441
|
+
# `input` is a keyword token; `input.field…` builds an input reference.
|
|
442
|
+
def parse_input_reference
|
|
443
|
+
input_token = advance
|
|
444
|
+
error('expected `input`', at: input_token) if input_token.kind == :identifier && input_token.value != 'input'
|
|
445
|
+
expect(:dot, '`.` after `input`')
|
|
446
|
+
path = [expect_field_name]
|
|
447
|
+
while at?(:dot)
|
|
448
|
+
advance
|
|
449
|
+
path << expect_field_name
|
|
450
|
+
end
|
|
451
|
+
if path.length == 1
|
|
452
|
+
Syntax::InputReference.new(path.first, loc: loc(input_token))
|
|
453
|
+
else
|
|
454
|
+
Syntax::InputElementReference.new(path, loc: loc(input_token))
|
|
455
|
+
end
|
|
456
|
+
end
|
|
457
|
+
|
|
458
|
+
def expect_field_name
|
|
459
|
+
token = current
|
|
460
|
+
if token.kind == :identifier || token.kind == :type_keyword || keyword_token?(token)
|
|
461
|
+
advance
|
|
462
|
+
field_name_value(token).to_sym
|
|
463
|
+
else
|
|
464
|
+
error("expected an input field name after `.`, but found #{describe_token(token)}")
|
|
465
|
+
end
|
|
466
|
+
end
|
|
467
|
+
|
|
468
|
+
def keyword_token?(token)
|
|
469
|
+
Grammar::KEYWORDS.value?(token.kind)
|
|
470
|
+
end
|
|
471
|
+
|
|
472
|
+
def field_name_value(token)
|
|
473
|
+
# type_keyword tokens carry the type symbol as value; reconstruct the word.
|
|
474
|
+
return Grammar::TYPE_KEYWORDS.key(token.value) if token.kind == :type_keyword
|
|
475
|
+
|
|
476
|
+
token.value
|
|
477
|
+
end
|
|
478
|
+
|
|
479
|
+
# ---- calls ---------------------------------------------------------
|
|
480
|
+
|
|
481
|
+
# `name(args)` sugar: `select(...)`, `cross(...)`, `index(:i)`, etc.
|
|
482
|
+
# Keyword args here are function options (`policy: :clamp`, `axis_offset: 1`)
|
|
483
|
+
# and are stored as raw scalars, the form the IR lowering reads.
|
|
484
|
+
def parse_function_sugar
|
|
485
|
+
token = advance
|
|
486
|
+
expect(:lparen, "`(` after #{Grammar::FUNCTION_SUGAR.key(token.value)}")
|
|
487
|
+
args, opts = parse_call_arguments(keyword_mode: :literal)
|
|
488
|
+
Syntax::CallExpression.new(token.value, args, opts, loc: loc(token))
|
|
489
|
+
end
|
|
490
|
+
|
|
491
|
+
# `fn(:name, args)` explicit form. Keyword args are function options (raw).
|
|
492
|
+
def parse_fn_call
|
|
493
|
+
fn_token = advance
|
|
494
|
+
expect(:lparen, '`(` after `fn`')
|
|
495
|
+
name = expect(:symbol, 'a `:function_name` inside `fn(...)`').value
|
|
496
|
+
args = []
|
|
497
|
+
opts = {}
|
|
498
|
+
if at?(:comma)
|
|
499
|
+
advance
|
|
500
|
+
args, opts = parse_call_arguments(keyword_mode: :literal)
|
|
501
|
+
else
|
|
502
|
+
expect(:rparen, '`)` to close `fn(...)`')
|
|
503
|
+
end
|
|
504
|
+
build_call(name, args, opts, fn_token)
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
# `name(...)` where `name` is a bare identifier — an imported schema call.
|
|
508
|
+
# Its keyword args are the input mapping, whose values are full
|
|
509
|
+
# expressions (e.g. `subtotal(items: input.order_items)`).
|
|
510
|
+
def parse_named_call
|
|
511
|
+
name_token = advance
|
|
512
|
+
name = name_token.value.to_sym
|
|
513
|
+
expect(:lparen, "`(` after `#{name}`")
|
|
514
|
+
args, opts = parse_call_arguments(keyword_mode: :expression)
|
|
515
|
+
build_call(name, args, opts, name_token)
|
|
516
|
+
end
|
|
517
|
+
|
|
518
|
+
def build_call(name, args, opts, token)
|
|
519
|
+
if @imported_names.include?(name) && args.empty? && !opts.empty?
|
|
520
|
+
Syntax::ImportCall.new(name, opts, loc: loc(token))
|
|
521
|
+
else
|
|
522
|
+
Syntax::CallExpression.new(name, args, opts, loc: loc(token))
|
|
523
|
+
end
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
# Parse the contents of a `(...)` argument list up to and including the
|
|
527
|
+
# closing paren: positional expression args first, then `label: value`
|
|
528
|
+
# keyword args. `keyword_mode` selects how keyword values are read:
|
|
529
|
+
# `:literal` (raw scalars, for function options) or `:expression` (full
|
|
530
|
+
# AST, for import input mappings).
|
|
531
|
+
def parse_call_arguments(keyword_mode:)
|
|
532
|
+
args = []
|
|
533
|
+
opts = {}
|
|
534
|
+
skip_separators
|
|
535
|
+
return [args, opts] if consume_rparen
|
|
536
|
+
|
|
537
|
+
loop do
|
|
538
|
+
if at?(:label)
|
|
539
|
+
key = advance.value.to_sym
|
|
540
|
+
opts[key] = keyword_mode == :literal ? parse_keyword_literal : parse_expression
|
|
541
|
+
else
|
|
542
|
+
args << parse_expression
|
|
543
|
+
end
|
|
544
|
+
skip_separators
|
|
545
|
+
break unless at?(:comma)
|
|
546
|
+
|
|
547
|
+
advance
|
|
548
|
+
skip_separators
|
|
549
|
+
end
|
|
550
|
+
|
|
551
|
+
expect(:rparen, '`)` to close the argument list')
|
|
552
|
+
[args, opts]
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
def consume_rparen
|
|
556
|
+
return false unless at?(:rparen)
|
|
557
|
+
|
|
558
|
+
advance
|
|
559
|
+
true
|
|
560
|
+
end
|
|
561
|
+
|
|
562
|
+
# A function option's value: a raw scalar, not an AST node. `:clamp` stays
|
|
563
|
+
# a symbol, `1` stays an Integer, a bare `label:`-style word becomes a
|
|
564
|
+
# symbol. The IR lowering reads these directly.
|
|
565
|
+
def parse_keyword_literal
|
|
566
|
+
token = current
|
|
567
|
+
case token.kind
|
|
568
|
+
when :integer, :float, :string, :boolean
|
|
569
|
+
advance.value
|
|
570
|
+
when :symbol
|
|
571
|
+
advance.value
|
|
572
|
+
when :label
|
|
573
|
+
advance.value.to_sym
|
|
574
|
+
when :subtract
|
|
575
|
+
advance
|
|
576
|
+
v = parse_keyword_literal
|
|
577
|
+
error('expected a number after unary `-`') unless v.is_a?(Numeric)
|
|
578
|
+
-v
|
|
579
|
+
else
|
|
580
|
+
error('a function option value must be a literal or symbol, ' \
|
|
581
|
+
"but found #{describe_token(token)}")
|
|
582
|
+
end
|
|
583
|
+
end
|
|
584
|
+
|
|
585
|
+
# ---- array / hash literals ----------------------------------------
|
|
586
|
+
|
|
587
|
+
def parse_array_literal
|
|
588
|
+
open = expect(:lbracket)
|
|
589
|
+
elements = []
|
|
590
|
+
skip_separators
|
|
591
|
+
until at?(:rbracket)
|
|
592
|
+
elements << parse_expression
|
|
593
|
+
skip_separators
|
|
594
|
+
break unless at?(:comma)
|
|
595
|
+
|
|
596
|
+
advance
|
|
597
|
+
skip_separators
|
|
598
|
+
end
|
|
599
|
+
expect(:rbracket, '`]` to close the array')
|
|
600
|
+
Syntax::ArrayExpression.new(elements, loc: loc(open))
|
|
601
|
+
end
|
|
602
|
+
|
|
603
|
+
def parse_hash_literal
|
|
604
|
+
open = expect(:lbrace)
|
|
605
|
+
pairs = []
|
|
606
|
+
skip_separators
|
|
607
|
+
until at?(:rbrace)
|
|
608
|
+
pairs << parse_hash_pair
|
|
609
|
+
skip_separators
|
|
610
|
+
break unless at?(:comma)
|
|
611
|
+
|
|
612
|
+
advance
|
|
613
|
+
skip_separators
|
|
614
|
+
end
|
|
615
|
+
expect(:rbrace, '`}` to close the hash')
|
|
616
|
+
Syntax::HashExpression.new(pairs, loc: loc(open))
|
|
617
|
+
end
|
|
618
|
+
|
|
619
|
+
def parse_hash_pair
|
|
620
|
+
key_token = current
|
|
621
|
+
key_value =
|
|
622
|
+
case key_token.kind
|
|
623
|
+
when :label then advance && key_token.value.to_sym
|
|
624
|
+
when :string then advance && key_token.value
|
|
625
|
+
when :symbol then advance && key_token.value
|
|
626
|
+
else
|
|
627
|
+
error('a hash key must be a `name:` label, a `:symbol`, or a "string", ' \
|
|
628
|
+
"but found #{describe_token(key_token)}")
|
|
629
|
+
end
|
|
630
|
+
key = Syntax::Literal.new(key_value, loc: loc(key_token))
|
|
631
|
+
|
|
632
|
+
skip_separators
|
|
633
|
+
if at?(:arrow)
|
|
634
|
+
advance
|
|
635
|
+
elsif key_token.kind != :label
|
|
636
|
+
error('expected `=>` after the hash key')
|
|
637
|
+
end
|
|
638
|
+
skip_separators
|
|
639
|
+
value = parse_expression
|
|
640
|
+
[key, value]
|
|
641
|
+
end
|
|
642
|
+
|
|
643
|
+
# ---- constants -----------------------------------------------------
|
|
644
|
+
|
|
645
|
+
# The parse phase resolves only the handful of constants whose value is
|
|
646
|
+
# part of the language surface; everything else is left to the analyzer.
|
|
647
|
+
KNOWN_CONSTANTS = { 'Float::INFINITY' => Float::INFINITY }.freeze
|
|
648
|
+
|
|
649
|
+
def resolve_constant(token)
|
|
650
|
+
KNOWN_CONSTANTS.fetch(token.value) do
|
|
651
|
+
error("unknown constant `#{token.value}` (use an inline value instead)", at: token)
|
|
652
|
+
end
|
|
653
|
+
end
|
|
654
|
+
|
|
655
|
+
# ---- errors --------------------------------------------------------
|
|
656
|
+
|
|
657
|
+
def error(message, at: current)
|
|
658
|
+
raise ParseError.new(message, source: @source, offset: at.offset)
|
|
659
|
+
end
|
|
660
|
+
|
|
661
|
+
def describe_token(token)
|
|
662
|
+
case token.kind
|
|
663
|
+
when :eof then 'end of input'
|
|
664
|
+
when :newline then 'a line break'
|
|
665
|
+
when :type_keyword then "`#{Grammar::TYPE_KEYWORDS.key(token.value)}`"
|
|
666
|
+
when :function_sugar then "`#{Grammar::FUNCTION_SUGAR.key(token.value)}`"
|
|
667
|
+
when :symbol then "`:#{token.value}`"
|
|
668
|
+
when :label then "`#{token.value}:`"
|
|
669
|
+
when :string then token.value.inspect
|
|
670
|
+
when :integer, :float, :boolean then "`#{token.value}`"
|
|
671
|
+
else "`#{token.value}`"
|
|
672
|
+
end
|
|
673
|
+
end
|
|
674
|
+
|
|
675
|
+
def describe_kind(kind)
|
|
676
|
+
{
|
|
677
|
+
symbol: 'a `:name`',
|
|
678
|
+
comma: '`,`',
|
|
679
|
+
do: '`do`',
|
|
680
|
+
end: '`end`',
|
|
681
|
+
rparen: '`)`',
|
|
682
|
+
rbracket: '`]`',
|
|
683
|
+
rbrace: '`}`',
|
|
684
|
+
lparen: '`(`',
|
|
685
|
+
dot: '`.`',
|
|
686
|
+
arrow: '`=>`',
|
|
687
|
+
eof: 'end of input'
|
|
688
|
+
}.fetch(kind, "`#{kind}`")
|
|
689
|
+
end
|
|
690
|
+
end
|
|
691
|
+
end
|
|
692
|
+
end
|