kumi-parser 0.0.24 → 0.0.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: cb3b86394edb12c3c7033ca85d91f84e0945289c384f7a9717ca8c1ce3965432
4
- data.tar.gz: b06faae267e651a8440e0218cc587be32c0f9de2a81aef27661198976dfebec1
3
+ metadata.gz: a6ef763c722f0427eebb47ed51d32ee6990bd348eecdafe129378164c372ab82
4
+ data.tar.gz: cc6cd479a53bef22cdcabbfacd76a6d6ee93112902d0aa61eb776d35f02d7a76
5
5
  SHA512:
6
- metadata.gz: d89bc307ef6d89c11adce650bc9500cddea0a0fa3e487212d9d483ca921c23839a6c5f2fcd9cae396d497e1e22119a414802624c35de8898db4b5d3247186dbc
7
- data.tar.gz: d601d7b9aa7622552dd7bea123ded066bbc7ce57b3bb967a5355ce66d55ead08d0e3fe5553151198469fec7d4a7fd11d1fe6389b787df067b06ee559da9f8313
6
+ metadata.gz: afd489b46a4130cdbd93cf48c59ecd7c6e472e2fb6ffbd4a9e4bd4c0d042bf24ca560b2677776e4d0330f996846ddec5e70fc62ba45c06a67f17782bb36fc80e
7
+ data.tar.gz: db804d45b28ac822e89ea48f01a702a0b86525fe120a3c672adf442950a2a5a71003c8e82276fc44d624dce9bcb428c6499b42df59a4455a8b39f6a428b395ba
@@ -4,6 +4,8 @@ module Kumi
4
4
  module Parser
5
5
  # Direct AST construction parser using recursive descent with embedded token metadata
6
6
  class DirectParser
7
+ include Kumi::Parser::Helpers
8
+
7
9
  def initialize(tokens)
8
10
  @tokens = tokens
9
11
  @pos = 0
@@ -41,14 +43,6 @@ module Kumi
41
43
  token
42
44
  end
43
45
 
44
- def skip_newlines
45
- advance while current_token.type == :newline
46
- end
47
-
48
- def skip_comments_and_newlines
49
- advance while %i[newline comment].include?(current_token.type)
50
- end
51
-
52
46
  # Schema: 'schema' 'do' ... 'end'
53
47
  def parse_schema
54
48
  schema_token = expect_token(:schema)
@@ -102,11 +96,6 @@ module Kumi
102
96
  declarations
103
97
  end
104
98
 
105
- # Input declaration: 'integer :name' or 'array :items do ... end' or 'element :type, :name'
106
- #
107
- # IMPORTANT: For array nodes with a block, this sets the node's access_mode:
108
- # - :element if the block contains exactly one child introduced by `element`
109
- # - :field otherwise
110
99
  def parse_input_declaration
111
100
  type_token = current_token
112
101
  unless type_token.metadata[:category] == :type_keyword
@@ -114,94 +103,34 @@ module Kumi
114
103
  end
115
104
  advance
116
105
 
117
- # element :type, :name (syntactic sugar: the child was declared via `element`)
118
- declared_with_element = (type_token.metadata[:type_name] == :element)
119
- declared_with_index = (type_token.metadata[:type_name] == :index)
120
- if declared_with_element
121
- element_type_token = expect_token(:symbol)
122
- expect_token(:comma)
123
- name_token = expect_token(:symbol)
124
- actual_type = element_type_token.value
125
- elsif declared_with_index
126
- name_token = expect_token(:symbol)
127
- actual_type = :index
128
- else
129
- name_token = expect_token(:symbol)
130
- actual_type = type_token.metadata[:type_name]
131
- end
106
+ name_token = expect_token(:symbol)
107
+ actual_type = type_token.metadata[:type_name]
132
108
 
133
- # Optional: ', domain: ...'
134
- domain = nil
135
- if current_token.type == :comma
136
- advance
137
- if current_token.type == :identifier && current_token.value == 'domain'
138
- advance
139
- expect_token(:colon)
140
- domain = parse_domain_specification
141
- else
142
- @pos -= 1
143
- end
144
- end
109
+ domain, index_name = parse_optional_decl_kwargs
145
110
 
146
- # Parse nested declarations for block forms
147
- children = []
148
- any_element_children = false
149
- any_field_children = false
111
+ raise_parse_error('`index:` only valid on array declarations') if index_name && actual_type != :array
150
112
 
113
+ children = []
151
114
  if %i[array hash element].include?(actual_type) && current_token.type == :do
152
- advance # consume 'do'
115
+ advance
153
116
  skip_comments_and_newlines
154
-
155
117
  until %i[end eof].include?(current_token.type)
156
118
  break unless current_token.metadata[:category] == :type_keyword
157
119
 
158
- # Syntactic decision (NO counting): is this child introduced by `element`?
159
- child_is_element_keyword = (current_token.metadata[:type_name] == :element)
160
- child_is_index_keyword = (current_token.metadata[:type_name] == :index)
161
- any_element_children ||= child_is_element_keyword
162
- any_field_children ||= !child_is_element_keyword && !child_is_index_keyword
163
-
164
120
  children << parse_input_declaration
165
121
  skip_comments_and_newlines
166
122
  end
167
-
168
123
  expect_token(:end)
169
-
170
- # For array blocks, access_mode derives strictly from syntax:
171
- # - :element if ANY direct child used `element`
172
- # - :field if NONE used `element`
173
- # Mixing is invalid.
174
- if actual_type == :array
175
- if any_element_children && any_field_children
176
- raise_parse_error("array :#{name_token.value} mixes `element` and field children; choose one style")
177
- end
178
- access_mode = any_element_children ? :element : :field
179
- else
180
- access_mode = :field # objects/hashes with blocks behave like field containers
181
- end
182
- else
183
- access_mode = nil # leaves carry no access_mode
184
124
  end
185
125
 
186
- if children.empty?
187
- Kumi::Syntax::InputDeclaration.new(
188
- name_token.value,
189
- domain,
190
- actual_type,
191
- children,
192
- loc: type_token.location
193
- )
194
- else
195
- # 5th positional arg in your existing ctor is access_mode
196
- Kumi::Syntax::InputDeclaration.new(
197
- name_token.value,
198
- domain,
199
- actual_type,
200
- children,
201
- access_mode || :field,
202
- loc: type_token.location
203
- )
204
- end
126
+ Kumi::Syntax::InputDeclaration.new(
127
+ name_token.value,
128
+ domain,
129
+ actual_type,
130
+ children,
131
+ index_name, # <— NEW
132
+ loc: type_token.location
133
+ )
205
134
  end
206
135
 
207
136
  def parse_domain_specification
@@ -352,12 +281,6 @@ module Kumi
352
281
  end
353
282
  end
354
283
 
355
- def advance_and_return_token
356
- token = current_token
357
- advance
358
- token
359
- end
360
-
361
284
  # Pratt parser for expressions
362
285
  def parse_expression(min_precedence = 0)
363
286
  left = parse_primary_expression
@@ -405,27 +328,28 @@ module Kumi
405
328
  token = current_token
406
329
 
407
330
  case token.type
408
- when :integer, :float, :string, :boolean, :constant
331
+ when :integer, :float, :string, :boolean, :constant, :symbol
409
332
  value = convert_literal_value(token)
410
333
  advance
411
334
  Kumi::Syntax::Literal.new(value, loc: token.location)
335
+
412
336
  when :function_sugar
413
337
  parse_function_sugar
414
338
 
415
339
  when :identifier
416
340
  if token.value == 'input' && peek_token.type == :dot
417
341
  parse_input_reference
342
+ elsif token.value == 'index' && peek_token.type == :lparen
343
+ parse_index_intrinsic
418
344
  else
419
345
  advance
420
346
  Kumi::Syntax::DeclarationReference.new(token.value.to_sym, loc: token.location)
421
347
  end
422
348
 
423
349
  when :input
424
- if peek_token.type == :dot
425
- parse_input_reference_from_input_token
426
- else
427
- raise_parse_error("Unexpected 'input' keyword in expression")
428
- end
350
+ return parse_input_reference_from_input_token if peek_token.type == :dot
351
+
352
+ raise_parse_error("Unexpected 'input' keyword in expression")
429
353
 
430
354
  when :lparen
431
355
  advance
@@ -440,18 +364,14 @@ module Kumi
440
364
  parse_hash_literal
441
365
 
442
366
  when :fn
443
- # expect_token(:fn)
444
367
  parse_function_call
445
368
 
446
369
  when :subtract
447
370
  advance
448
371
  skip_comments_and_newlines
449
372
  operand = parse_primary_expression
450
- Kumi::Syntax::CallExpression.new(
451
- :subtract,
452
- [Kumi::Syntax::Literal.new(0, loc: token.location), operand],
453
- loc: token.location
454
- )
373
+ Kumi::Syntax::CallExpression.new(:subtract, [Kumi::Syntax::Literal.new(0, loc: token.location), operand],
374
+ loc: token.location)
455
375
 
456
376
  when :newline, :comment
457
377
  skip_comments_and_newlines
@@ -462,16 +382,29 @@ module Kumi
462
382
  end
463
383
  end
464
384
 
385
+ def parse_index_intrinsic
386
+ start = current_token
387
+ if start.type == :index_type || (start.type == :identifier && start.value == 'index')
388
+ advance
389
+ else
390
+ raise_parse_error('Expected index(...)')
391
+ end
392
+
393
+ expect_token(:lparen)
394
+ sym = expect_token(:symbol) # :i, :j, ...
395
+ expect_token(:rparen)
396
+ Kumi::Syntax::IndexReference.new(sym.value, loc: start.location)
397
+ end
398
+
465
399
  def parse_input_reference
466
- input_token = expect_token(:identifier) # 'input'
400
+ input_token = expect_token(:identifier) # must be 'input'
401
+ raise_parse_error("Expected 'input'") unless input_token.value == 'input'
467
402
  expect_token(:dot)
468
-
469
403
  path = [expect_field_name_token.to_sym]
470
404
  while current_token.type == :dot
471
405
  advance
472
406
  path << expect_field_name_token.to_sym
473
407
  end
474
-
475
408
  if path.length == 1
476
409
  Kumi::Syntax::InputReference.new(path.first, loc: input_token.location)
477
410
  else
@@ -518,70 +451,6 @@ module Kumi
518
451
  Kumi::Syntax::CallExpression.new(fn_name_token.value, args, opts, loc: fn_name_token.location)
519
452
  end
520
453
 
521
- def parse_kw_literal_value
522
- t = current_token
523
- case t.type
524
- when :integer then advance
525
- t.value.delete('_').to_i
526
- when :float then advance
527
- t.value.delete('_').to_f
528
- when :string, :symbol then advance
529
- t.value
530
- when :boolean then advance
531
- t.value == 'true'
532
- when :label then advance
533
- t.value.to_sym # :wrap, :clamp, etc.
534
- when :subtract # allow negatives like -1
535
- advance
536
- v = parse_kw_literal_value
537
- raise_parse_error("numeric after unary '-'") unless v.is_a?(Numeric)
538
- -v
539
- else
540
- raise_parse_error('keyword value must be literal/label')
541
- end
542
- end
543
-
544
- def parse_args_and_opts_inside_parens
545
- args = []
546
- opts = {}
547
-
548
- # expect_token(:lparen)
549
-
550
- unless current_token.type == :rparen
551
- # --- positional args ---
552
- unless next_is_kwarg_after_comma?
553
- args << parse_expression
554
- while current_token.type == :comma && !next_is_kwarg_after_comma?
555
- advance
556
- args << parse_expression
557
- end
558
- end
559
- # --- kwargs (labels like `policy:`) ---
560
- if next_is_kwarg_after_comma?
561
- # subsequent pairs: `, label value`
562
- while current_token.type == :comma
563
- # stop if next token is not a kw key
564
- advance
565
-
566
- if current_token.type == :label
567
- key = current_token.value.to_sym
568
- advance
569
- end
570
- opts[key] = parse_kw_literal_value
571
-
572
- break unless next_is_kwarg_after_comma?
573
- end
574
- end
575
- end
576
-
577
- expect_token(:rparen)
578
- [args, opts]
579
- end
580
-
581
- def next_is_kwarg_after_comma?
582
- current_token.type == :comma && peek_token.type == :label
583
- end
584
-
585
454
  def parse_array_literal
586
455
  start_token = expect_token(:lbracket)
587
456
  elements = []
@@ -649,32 +518,6 @@ module Kumi
649
518
  [key, value]
650
519
  end
651
520
 
652
- def convert_literal_value(token)
653
- case token.type
654
- when :integer then token.value.gsub('_', '').to_i
655
- when :float then token.value.gsub('_', '').to_f
656
- when :string then token.value
657
- when :boolean then token.value == 'true'
658
- when :symbol then token.value.to_sym
659
- when :constant
660
- case token.value
661
- when 'Float::INFINITY' then Float::INFINITY
662
- else
663
- raise_parse_error("Unknown constant: #{token.value}")
664
- end
665
- end
666
- end
667
-
668
- def expect_field_name_token
669
- token = current_token
670
- if token.identifier? || token.keyword?
671
- advance
672
- token.value
673
- else
674
- raise_parse_error("Expected field name (identifier or keyword), got #{token.type}")
675
- end
676
- end
677
-
678
521
  def raise_parse_error(message)
679
522
  location = current_token.location
680
523
  raise Errors::ParseError.new(message, token: current_token)
@@ -687,21 +530,6 @@ module Kumi
687
530
  def wrap_condition_in_all(condition)
688
531
  Kumi::Syntax::CallExpression.new(:cascade_and, [condition], loc: condition.loc)
689
532
  end
690
-
691
- def map_operator_token_to_function_name(token_type)
692
- case token_type
693
- when :eq then :==
694
- when :ne then :!=
695
- when :gt then :>
696
- when :lt then :<
697
- when :gte then :>=
698
- when :lte then :<=
699
- when :and then :and
700
- when :or then :or
701
- when :exponent then :power
702
- else token_type
703
- end
704
- end
705
533
  end
706
534
  end
707
535
  end
@@ -0,0 +1,154 @@
1
+ module Kumi
2
+ module Parser
3
+ module Helpers
4
+ # Parses optional ", domain: ..., index: :sym" (order-agnostic, both optional)
5
+ # Cursor is right after the array/hash/type name.
6
+ def parse_optional_decl_kwargs
7
+ domain = nil
8
+ index = nil
9
+
10
+ # nothing to do
11
+ return [domain, index] unless current_token.type == :comma
12
+
13
+ # consume one or more ", key: value" pairs
14
+ while current_token.type == :comma
15
+ advance
16
+ key_tok = current_token
17
+
18
+ unless key_tok.type == :label && %w[domain index].include?(key_tok.value)
19
+ # roll back gracefully if it's not a kw pair
20
+ @pos -= 1
21
+ break
22
+ end
23
+
24
+ advance
25
+
26
+ case key_tok.value
27
+ when 'domain'
28
+ domain = parse_domain_specification
29
+ when 'index'
30
+ sym = expect_token(:symbol)
31
+ index = sym.value.to_sym
32
+ end
33
+ end
34
+
35
+ [domain, index]
36
+ end
37
+
38
+ def convert_literal_value(token)
39
+ case token.type
40
+ when :integer then token.value.gsub('_', '').to_i
41
+ when :float then token.value.gsub('_', '').to_f
42
+ when :string then token.value
43
+ when :boolean then token.value == 'true'
44
+ when :symbol then token.value.to_sym
45
+ when :constant
46
+ case token.value
47
+ when 'Float::INFINITY' then Float::INFINITY
48
+ else
49
+ raise_parse_error("Unknown constant: #{token.value}")
50
+ end
51
+ end
52
+ end
53
+
54
+ def parse_kw_literal_value
55
+ t = current_token
56
+ case t.type
57
+ when :integer then advance
58
+ t.value.delete('_').to_i
59
+ when :float then advance
60
+ t.value.delete('_').to_f
61
+ when :string, :symbol then advance
62
+ t.value
63
+ when :boolean then advance
64
+ t.value == 'true'
65
+ when :label then advance
66
+ t.value.to_sym # :wrap, :clamp, etc.
67
+ when :subtract # allow negatives like -1
68
+ advance
69
+ v = parse_kw_literal_value
70
+ raise_parse_error("numeric after unary '-'") unless v.is_a?(Numeric)
71
+ -v
72
+ else
73
+ raise_parse_error('keyword value must be literal/label')
74
+ end
75
+ end
76
+
77
+ def parse_args_and_opts_inside_parens
78
+ args = []
79
+ opts = {}
80
+
81
+ # expect_token(:lparen)
82
+
83
+ unless current_token.type == :rparen
84
+ # --- positional args ---
85
+ unless next_is_kwarg_after_comma?
86
+ args << parse_expression
87
+ while current_token.type == :comma && !next_is_kwarg_after_comma?
88
+ advance
89
+ args << parse_expression
90
+ end
91
+ end
92
+ # --- kwargs (labels like `policy:`) ---
93
+ if next_is_kwarg_after_comma?
94
+ # subsequent pairs: `, label value`
95
+ while current_token.type == :comma
96
+ # stop if next token is not a kw key
97
+ advance
98
+
99
+ if current_token.type == :label
100
+ key = current_token.value.to_sym
101
+ advance
102
+ end
103
+ opts[key] = parse_kw_literal_value
104
+
105
+ break unless next_is_kwarg_after_comma?
106
+ end
107
+ end
108
+ end
109
+
110
+ expect_token(:rparen)
111
+ [args, opts]
112
+ end
113
+
114
+ def expect_field_name_token
115
+ token = current_token
116
+ if token.identifier? || token.keyword?
117
+ advance
118
+ token.value
119
+ else
120
+ raise_parse_error("Expected field name (identifier or keyword), got #{token.type}")
121
+ end
122
+ end
123
+
124
+ def next_is_kwarg_after_comma?
125
+ current_token.type == :comma && peek_token.type == :label
126
+ end
127
+
128
+ def skip_comments_and_newlines
129
+ advance while %i[newline comment].include?(current_token.type)
130
+ end
131
+
132
+ def advance_and_return_token
133
+ token = current_token
134
+ advance
135
+ token
136
+ end
137
+
138
+ def map_operator_token_to_function_name(token_type)
139
+ case token_type
140
+ when :eq then :==
141
+ when :ne then :!=
142
+ when :gt then :>
143
+ when :lt then :<
144
+ when :gte then :>=
145
+ when :lte then :<=
146
+ when :and then :and
147
+ when :or then :or
148
+ when :exponent then :power
149
+ else token_type
150
+ end
151
+ end
152
+ end
153
+ end
154
+ end
@@ -1,6 +1,6 @@
1
1
  # frozen_string_literal: true
2
2
 
3
- require_relative 'token_metadata'
3
+ require_relative 'token_constants'
4
4
  require_relative 'token'
5
5
  require_relative 'errors'
6
6
 
@@ -400,7 +400,8 @@ module Kumi
400
400
  FUNCTION_SUGAR = {
401
401
  'select' => '__select__',
402
402
  'shift' => 'shift',
403
- 'roll' => 'roll'
403
+ 'roll' => 'roll',
404
+ 'index' => 'index'
404
405
  }
405
406
 
406
407
  # Keywords mapping
@@ -424,8 +425,7 @@ module Kumi
424
425
  'any' => :any_type,
425
426
  'array' => :array_type,
426
427
  'hash' => :hash_type,
427
- 'element' => :element_type,
428
- 'index' => :index_type
428
+ 'element' => :element_type
429
429
  }.freeze
430
430
 
431
431
  # Opener to closer mappings for error recovery
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Kumi
4
4
  module Parser
5
- VERSION = '0.0.24'
5
+ VERSION = '0.0.26'
6
6
  end
7
7
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kumi-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.24
4
+ version: 0.0.26
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kumi Team
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-10-04 00:00:00.000000000 Z
11
+ date: 2025-10-13 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: parslet
@@ -132,12 +132,13 @@ files:
132
132
  - lib/kumi/parser/direct_parser.rb
133
133
  - lib/kumi/parser/error_extractor.rb
134
134
  - lib/kumi/parser/errors.rb
135
+ - lib/kumi/parser/helpers.rb
135
136
  - lib/kumi/parser/smart_tokenizer.rb
136
137
  - lib/kumi/parser/syntax_validator.rb
137
138
  - lib/kumi/parser/text_parser.rb
138
139
  - lib/kumi/parser/text_parser/api.rb
139
140
  - lib/kumi/parser/token.rb
140
- - lib/kumi/parser/token_metadata.rb
141
+ - lib/kumi/parser/token_constants.rb
141
142
  - lib/kumi/parser/version.rb
142
143
  - lib/kumi/text_parser.rb
143
144
  - lib/kumi/text_schema.rb