kumi-parser 0.0.33 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,698 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Kumi
4
- module Parser
5
- # Direct AST construction parser using recursive descent with embedded token metadata
6
- class DirectParser
7
- include Kumi::Parser::Helpers
8
-
9
- def initialize(tokens)
10
- @tokens = tokens
11
- @pos = 0
12
- @imported_names = Set.new
13
- end
14
-
15
- def parse
16
- skip_comments_and_newlines
17
-
18
- # Parse root-level imports (before schema)
19
- root_imports = parse_imports
20
- @imported_names.merge(root_imports.flat_map(&:names))
21
-
22
- schema_node = parse_schema
23
-
24
- # If we have root imports, add them to the schema
25
- if root_imports.any?
26
- # Merge root imports with schema imports
27
- schema_node.imports.concat(root_imports)
28
- end
29
-
30
- skip_comments_and_newlines
31
- expect_token(:eof)
32
- schema_node
33
- end
34
-
35
- private
36
-
37
- def current_token
38
- @tokens[@pos] || @tokens.last # Return EOF if past end
39
- end
40
-
41
- def peek_token(offset = 1)
42
- peek_pos = @pos + offset
43
- return @tokens.last if peek_pos >= @tokens.length # Return EOF
44
-
45
- @tokens[peek_pos]
46
- end
47
-
48
- def advance
49
- @pos += 1 if @pos < @tokens.length - 1
50
- end
51
-
52
- def expect_token(expected_type)
53
- raise_parse_error("Expected #{expected_type}, got #{current_token.type}") if current_token.type != expected_type
54
- token = current_token
55
- advance
56
- token
57
- end
58
-
59
- # Schema: 'schema' 'do' ... 'end'
60
- def parse_schema
61
- schema_token = expect_token(:schema)
62
- expect_token(:do)
63
-
64
- skip_comments_and_newlines
65
- import_declarations = parse_imports
66
- @imported_names.merge(import_declarations.flat_map(&:names))
67
- root_hints = parse_codegen_directives
68
-
69
- input_declarations = parse_input_block
70
-
71
- value_declarations = []
72
- trait_declarations = []
73
-
74
- skip_comments_and_newlines
75
- while %i[value trait let].include?(current_token.type)
76
- case current_token.type
77
- when :value
78
- value_declarations << parse_value_declaration
79
- when :let
80
- value_declarations << parse_let_value_declaration
81
- when :trait
82
- trait_declarations << parse_trait_declaration
83
- end
84
- skip_comments_and_newlines
85
- end
86
-
87
- expect_token(:end)
88
-
89
- Kumi::Syntax::Root.new(
90
- input_declarations,
91
- value_declarations, # values
92
- trait_declarations,
93
- import_declarations,
94
- hints: root_hints,
95
- loc: schema_token.location
96
- )
97
- end
98
-
99
- def parse_codegen_directives
100
- hints = {}
101
- skip_comments_and_newlines
102
-
103
- while current_token.type == :codegen
104
- expect_token(:codegen)
105
- opts = parse_codegen_options
106
- hints[:codegen] = (hints[:codegen] || {}).merge(opts)
107
- skip_comments_and_newlines
108
- end
109
-
110
- hints
111
- end
112
-
113
- def parse_codegen_options
114
- opts = {}
115
-
116
- loop do
117
- key_token = expect_token(:label)
118
- raise_parse_error("Unknown codegen option '#{key_token.value}'") unless key_token.value == 'streaming'
119
-
120
- value_token = expect_token(:boolean)
121
- opts[:streaming] = value_token.value == 'true'
122
- break unless current_token.type == :comma
123
-
124
- expect_token(:comma)
125
- skip_comments_and_newlines
126
- end
127
-
128
- opts
129
- end
130
-
131
- # Parse import declarations: 'import' :symbol, from: Module
132
- def parse_imports
133
- imports = []
134
- skip_comments_and_newlines
135
-
136
- while current_token.type == :import
137
- import_token = expect_token(:import)
138
-
139
- names = []
140
- names << expect_token(:symbol).value.to_sym
141
-
142
- while current_token.type == :comma
143
- expect_token(:comma)
144
- skip_comments_and_newlines
145
-
146
- # Check if this is the 'from:' keyword argument or another symbol to import
147
- if current_token.type == :label && current_token.value == 'from'
148
- # This is 'from:' - end of imports list
149
- break
150
- else
151
- # Another symbol to import
152
- names << expect_token(:symbol).value.to_sym
153
- end
154
- end
155
-
156
- skip_comments_and_newlines
157
-
158
- # Handle 'from:' keyword argument
159
- if current_token.type == :label && current_token.value == 'from'
160
- expect_token(:label) # consume 'from:'
161
- else
162
- raise_parse_error("Expected 'from:' keyword argument in import statement")
163
- end
164
-
165
- skip_comments_and_newlines
166
-
167
- module_ref = parse_constant
168
-
169
- imports << Kumi::Syntax::ImportDeclaration.new(
170
- names,
171
- module_ref,
172
- loc: import_token.location
173
- )
174
-
175
- skip_comments_and_newlines
176
- end
177
-
178
- imports
179
- end
180
-
181
- # Parse a constant reference like Schemas::Tax
182
- def parse_constant
183
- const_parts = []
184
- const_parts << expect_token(:constant).value
185
-
186
- while current_token.type == :colon && peek_token.type == :colon
187
- expect_token(:colon)
188
- expect_token(:colon)
189
- const_parts << expect_token(:constant).value
190
- end
191
-
192
- # Return the full constant path as a string that will be evaluated at runtime
193
- const_parts.join('::')
194
- end
195
-
196
- # Input block: 'input' 'do' ... 'end'
197
- def parse_input_block
198
- expect_token(:input)
199
- expect_token(:do)
200
-
201
- declarations = []
202
- skip_comments_and_newlines
203
-
204
- until %i[end eof].include?(current_token.type)
205
- break unless current_token.metadata[:category] == :type_keyword
206
-
207
- declarations << parse_input_declaration
208
- skip_comments_and_newlines
209
- end
210
-
211
- expect_token(:end)
212
- declarations
213
- end
214
-
215
- def parse_input_declaration
216
- type_token = current_token
217
- unless type_token.metadata[:category] == :type_keyword
218
- raise_parse_error("Expected type keyword, got #{type_token.type}")
219
- end
220
- advance
221
-
222
- name_token = expect_token(:symbol)
223
- actual_type = type_token.metadata[:type_name]
224
-
225
- domain, index_name = parse_optional_decl_kwargs
226
-
227
- raise_parse_error('`index:` only valid on array declarations') if index_name && actual_type != :array
228
-
229
- children = []
230
- if %i[array hash element].include?(actual_type) && current_token.type == :do
231
- advance
232
- skip_comments_and_newlines
233
- until %i[end eof].include?(current_token.type)
234
- break unless current_token.metadata[:category] == :type_keyword
235
-
236
- children << parse_input_declaration
237
- skip_comments_and_newlines
238
- end
239
- expect_token(:end)
240
- end
241
-
242
- Kumi::Syntax::InputDeclaration.new(
243
- name_token.value,
244
- domain,
245
- actual_type,
246
- children,
247
- index_name, # <— NEW
248
- loc: type_token.location
249
- )
250
- end
251
-
252
- def parse_domain_specification
253
- case current_token.type
254
- when :lbracket
255
- array_expr = parse_array_literal
256
- convert_array_expression_to_ruby_array(array_expr)
257
- when :integer, :float
258
- parse_range_domain
259
- else
260
- advance until %i[comma newline eof end].include?(current_token.type)
261
- nil
262
- end
263
- end
264
-
265
- def parse_range_domain
266
- start_token = current_token
267
- start_value = start_token.type == :integer ? start_token.value.to_i : start_token.value.to_f
268
- advance
269
-
270
- case current_token.type
271
- when :dot_dot
272
- advance
273
- end_token = current_token
274
- end_value = end_token.type == :integer ? end_token.value.to_i : end_token.value.to_f
275
- advance
276
- (start_value..end_value)
277
- when :dot_dot_dot
278
- advance
279
- end_token = current_token
280
- end_value = end_token.type == :integer ? end_token.value.to_i : end_token.value.to_f
281
- advance
282
- (start_value...end_value)
283
- else
284
- [start_value]
285
- end
286
- end
287
-
288
- def convert_array_expression_to_ruby_array(array_expr)
289
- return nil unless array_expr.is_a?(Kumi::Syntax::ArrayExpression)
290
-
291
- array_expr.elements.map do |element|
292
- if element.is_a?(Kumi::Syntax::Literal)
293
- element.value
294
- else
295
- element
296
- end
297
- end
298
- end
299
-
300
- # Value declaration: 'value :name, expression' or 'value :name do ... end'
301
- def parse_value_declaration
302
- value_token = expect_token(:value)
303
- name_token = expect_token(:symbol)
304
-
305
- if current_token.type == :do
306
- expression = parse_cascade_expression
307
- else
308
- expect_token(:comma)
309
- expression = parse_expression
310
- end
311
-
312
- Kumi::Syntax::ValueDeclaration.new(
313
- name_token.value,
314
- expression,
315
- loc: value_token.location
316
- )
317
- end
318
-
319
- def parse_let_value_declaration
320
- let_token = expect_token(:let)
321
- name_token = expect_token(:symbol)
322
-
323
- if current_token.type == :do
324
- expression = parse_cascade_expression
325
- else
326
- expect_token(:comma)
327
- expression = parse_expression
328
- end
329
-
330
- Kumi::Syntax::ValueDeclaration.new(
331
- name_token.value,
332
- expression,
333
- hints: { inline: true },
334
- loc: let_token.location
335
- )
336
- end
337
-
338
- # Trait declaration: 'trait :name, expression'
339
- def parse_trait_declaration
340
- trait_token = expect_token(:trait)
341
- name_token = expect_token(:symbol)
342
- expect_token(:comma)
343
- expression = parse_expression
344
-
345
- Kumi::Syntax::TraitDeclaration.new(
346
- name_token.value,
347
- expression,
348
- loc: trait_token.location
349
- )
350
- end
351
-
352
- # Cascade expression: 'do' cases 'end'
353
- def parse_cascade_expression
354
- start_token = expect_token(:do)
355
- cases = []
356
- skip_comments_and_newlines
357
- while %i[on base].include?(current_token.type)
358
- cases << parse_case_expression
359
- skip_comments_and_newlines
360
- end
361
- expect_token(:end)
362
- Kumi::Syntax::CascadeExpression.new(cases, loc: start_token.location)
363
- end
364
-
365
- def parse_case_expression
366
- case current_token.type
367
- when :on
368
- on_token = advance_and_return_token
369
-
370
- expressions = []
371
- expressions << parse_expression
372
- while current_token.type == :comma
373
- advance
374
- expressions << parse_expression
375
- end
376
-
377
- result = expressions.pop
378
- conditions = expressions
379
- condition =
380
- if conditions.length == 1
381
- c = conditions[0]
382
- simple_trait_reference?(c) ? wrap_condition_in_all(c) : c
383
- else
384
- Kumi::Syntax::CallExpression.new(:cascade_and, conditions, loc: on_token.location)
385
- end
386
-
387
- Kumi::Syntax::CaseExpression.new(condition, result, loc: on_token.location)
388
-
389
- when :base
390
- base_token = advance_and_return_token
391
- result = parse_expression
392
- true_literal = Kumi::Syntax::Literal.new(true, loc: base_token.location)
393
- Kumi::Syntax::CaseExpression.new(true_literal, result, loc: base_token.location)
394
-
395
- else
396
- raise_parse_error("Expected 'on' or 'base' in cascade expression")
397
- end
398
- end
399
-
400
- # Pratt parser for expressions
401
- def parse_expression(min_precedence = 0)
402
- left = parse_primary_expression
403
- left = parse_postfix_chain(left)
404
- skip_comments_and_newlines
405
-
406
- while current_token.operator? && current_token.precedence >= min_precedence
407
- operator_token = current_token
408
- advance
409
- skip_comments_and_newlines
410
-
411
- next_min_precedence =
412
- if operator_token.left_associative?
413
- operator_token.precedence + 1
414
- else
415
- operator_token.precedence
416
- end
417
-
418
- right = parse_expression(next_min_precedence)
419
- left = Kumi::Syntax::CallExpression.new(
420
- map_operator_token_to_function_name(operator_token.type),
421
- [left, right],
422
- loc: operator_token.location
423
- )
424
- left = parse_postfix_chain(left)
425
- skip_comments_and_newlines
426
- end
427
-
428
- left
429
- end
430
-
431
- def parse_postfix_chain(base)
432
- skip_comments_and_newlines
433
- while current_token.type == :lbracket
434
- expect_token(:lbracket)
435
- index_expr = parse_expression
436
- expect_token(:rbracket)
437
- base = Kumi::Syntax::CallExpression.new(:at, [base, index_expr], loc: base.loc)
438
- skip_comments_and_newlines
439
- end
440
- base
441
- end
442
-
443
- def parse_primary_expression
444
- token = current_token
445
-
446
- case token.type
447
- when :integer, :float, :string, :boolean, :constant, :symbol
448
- value = convert_literal_value(token)
449
- advance
450
- Kumi::Syntax::Literal.new(value, loc: token.location)
451
-
452
- when :function_sugar
453
- parse_function_sugar
454
-
455
- when :identifier
456
- if token.value == 'input' && peek_token.type == :dot
457
- parse_input_reference
458
- elsif token.value == 'index' && peek_token.type == :lparen
459
- parse_index_intrinsic
460
- elsif peek_token.type == :lparen
461
- # This is a function call like tax(amount: input.amount)
462
- parse_imported_function_call
463
- else
464
- advance
465
- Kumi::Syntax::DeclarationReference.new(token.value.to_sym, loc: token.location)
466
- end
467
-
468
- when :input
469
- return parse_input_reference_from_input_token if peek_token.type == :dot
470
-
471
- raise_parse_error("Unexpected 'input' keyword in expression")
472
-
473
- when :lparen
474
- advance
475
- expr = parse_expression
476
- expect_token(:rparen)
477
- expr
478
-
479
- when :lbracket
480
- parse_array_literal
481
-
482
- when :left_brace
483
- parse_hash_literal
484
-
485
- when :fn
486
- parse_function_call
487
-
488
- when :subtract
489
- advance
490
- skip_comments_and_newlines
491
- operand = parse_primary_expression
492
- Kumi::Syntax::CallExpression.new(:subtract, [Kumi::Syntax::Literal.new(0, loc: token.location), operand],
493
- loc: token.location)
494
-
495
- when :newline, :comment
496
- skip_comments_and_newlines
497
- parse_primary_expression
498
-
499
- else
500
- raise_parse_error("Unexpected token in expression: #{token.type}")
501
- end
502
- end
503
-
504
- def parse_index_intrinsic
505
- start = current_token
506
- if start.type == :index_type || (start.type == :identifier && start.value == 'index')
507
- advance
508
- else
509
- raise_parse_error('Expected index(...)')
510
- end
511
-
512
- expect_token(:lparen)
513
- sym = expect_token(:symbol) # :i, :j, ...
514
- expect_token(:rparen)
515
- Kumi::Syntax::IndexReference.new(sym.value, loc: start.location)
516
- end
517
-
518
- def parse_input_reference
519
- input_token = expect_token(:identifier) # must be 'input'
520
- raise_parse_error("Expected 'input'") unless input_token.value == 'input'
521
- expect_token(:dot)
522
- path = [expect_field_name_token.to_sym]
523
- while current_token.type == :dot
524
- advance
525
- path << expect_field_name_token.to_sym
526
- end
527
- if path.length == 1
528
- Kumi::Syntax::InputReference.new(path.first, loc: input_token.location)
529
- else
530
- Kumi::Syntax::InputElementReference.new(path, loc: input_token.location)
531
- end
532
- end
533
-
534
- def parse_input_reference_from_input_token
535
- input_token = expect_token(:input)
536
- expect_token(:dot)
537
-
538
- path = [expect_field_name_token.to_sym]
539
- while current_token.type == :dot
540
- advance
541
- path << expect_field_name_token.to_sym
542
- end
543
-
544
- if path.length == 1
545
- Kumi::Syntax::InputReference.new(path.first, loc: input_token.location)
546
- else
547
- Kumi::Syntax::InputElementReference.new(path, loc: input_token.location)
548
- end
549
- end
550
-
551
- def parse_function_sugar
552
- sugar = current_token
553
- advance # e.g. shift(...)
554
- expect_token(:lparen)
555
- args, opts = parse_args_and_opts_inside_parens
556
- Kumi::Syntax::CallExpression.new(sugar.value.to_sym, args, opts, loc: sugar.location)
557
- end
558
-
559
- def parse_function_call
560
- advance # saw :fn
561
- expect_token(:lparen)
562
- fn_name_token = expect_token(:symbol) # :shift, :roll, etc.
563
- args = []
564
- opts = {}
565
- if current_token.type == :comma
566
- advance
567
- args, opts = parse_args_and_opts_inside_parens
568
- end
569
- # expect_token(:rparen)
570
-
571
- # Check if this is an imported function call
572
- if @imported_names.include?(fn_name_token.value) && args.empty? && opts.any?
573
- # Convert to ImportCall - opts become the input mapping
574
- Kumi::Syntax::ImportCall.new(fn_name_token.value, opts, loc: fn_name_token.location)
575
- else
576
- # Regular call expression
577
- Kumi::Syntax::CallExpression.new(fn_name_token.value, args, opts, loc: fn_name_token.location)
578
- end
579
- end
580
-
581
- def parse_imported_function_call
582
- fn_name_token = current_token
583
- fn_name = fn_name_token.value.to_sym
584
- advance # consume identifier
585
- expect_token(:lparen)
586
-
587
- # Parse keyword arguments for imported function calls
588
- # Imported functions only accept keyword arguments
589
- opts = {}
590
-
591
- unless current_token.type == :rparen
592
- # Parse keyword arguments with full expression values
593
- while current_token.type == :label
594
- key = current_token.value.to_sym
595
- advance
596
-
597
- opts[key] = parse_expression
598
-
599
- break unless current_token.type == :comma
600
- advance
601
- skip_comments_and_newlines
602
- end
603
- end
604
-
605
- expect_token(:rparen)
606
-
607
- # Check if this is an imported function call
608
- if @imported_names.include?(fn_name) && opts.any?
609
- # Convert to ImportCall - opts become the input mapping
610
- Kumi::Syntax::ImportCall.new(fn_name, opts, loc: fn_name_token.location)
611
- else
612
- # Regular call expression (shouldn't happen for imported functions)
613
- Kumi::Syntax::CallExpression.new(fn_name, [], opts, loc: fn_name_token.location)
614
- end
615
- end
616
-
617
- def parse_array_literal
618
- start_token = expect_token(:lbracket)
619
- elements = []
620
- unless current_token.type == :rbracket
621
- elements << parse_expression
622
- while current_token.type == :comma
623
- advance
624
- elements << parse_expression unless current_token.type == :rbracket
625
- end
626
- end
627
- expect_token(:rbracket)
628
- Kumi::Syntax::ArrayExpression.new(elements, loc: start_token.location)
629
- end
630
-
631
- def parse_hash_literal
632
- start_token = expect_token(:left_brace)
633
- skip_comments_and_newlines
634
- pairs = []
635
-
636
- # Handle empty hash: {}
637
- unless current_token.type == :right_brace
638
- pairs << parse_hash_pair
639
- skip_comments_and_newlines
640
-
641
- while current_token.type == :comma
642
- advance
643
- skip_comments_and_newlines
644
- # Allow trailing comma
645
- break if current_token.type == :right_brace
646
-
647
- pairs << parse_hash_pair
648
- skip_comments_and_newlines
649
- end
650
- end
651
-
652
- expect_token(:right_brace)
653
- Kumi::Syntax::HashExpression.new(pairs, loc: start_token.location)
654
- end
655
-
656
- def parse_hash_pair
657
- key_token = current_token
658
-
659
- key_value =
660
- case key_token.type
661
- when :label then key_token.value.to_sym # render:
662
- when :string then key_token.value # "0" => ...
663
- when :symbol then key_token.value.to_sym # optional support for :foo => ...
664
- else
665
- raise_parse_error('Hash keys must be symbols (:key) or strings ("key")')
666
- end
667
-
668
- advance
669
- key = Kumi::Syntax::Literal.new(key_value, loc: key_token.location)
670
-
671
- skip_comments_and_newlines
672
- if current_token.type == :arrow
673
- advance
674
- else
675
- # Only labels may omit => (Ruby-style `key:`)
676
- raise_parse_error("Expected '=>' in hash pair") unless key_token.type == :label
677
- end
678
-
679
- skip_comments_and_newlines
680
- value = parse_expression
681
- [key, value]
682
- end
683
-
684
- def raise_parse_error(message)
685
- location = current_token.location
686
- raise Errors::ParseError.new(message, token: current_token)
687
- end
688
-
689
- def simple_trait_reference?(condition)
690
- condition.is_a?(Kumi::Syntax::DeclarationReference)
691
- end
692
-
693
- def wrap_condition_in_all(condition)
694
- Kumi::Syntax::CallExpression.new(:cascade_and, [condition], loc: condition.loc)
695
- end
696
- end
697
- end
698
- end