kumi-parser 0.0.2 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,502 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Parser
5
+ # Direct AST construction parser using recursive descent with embedded token metadata
6
+ class DirectParser
7
+ def initialize(tokens)
8
+ @tokens = tokens
9
+ @pos = 0
10
+ end
11
+
12
+ def parse
13
+ schema_node = parse_schema
14
+ skip_comments_and_newlines
15
+ expect_token(:eof)
16
+ schema_node
17
+ end
18
+
19
+ private
20
+
21
+ def current_token
22
+ @tokens[@pos] || @tokens.last # Return EOF if past end
23
+ end
24
+
25
+ def peek_token(offset = 1)
26
+ peek_pos = @pos + offset
27
+ return @tokens.last if peek_pos >= @tokens.length # Return EOF
28
+
29
+ @tokens[peek_pos]
30
+ end
31
+
32
+ def advance
33
+ @pos += 1 if @pos < @tokens.length - 1
34
+ end
35
+
36
+ def expect_token(expected_type)
37
+ raise_parse_error("Expected #{expected_type}, got #{current_token.type}") if current_token.type != expected_type
38
+ token = current_token
39
+ advance
40
+ token
41
+ end
42
+
43
+ def skip_newlines
44
+ advance while current_token.type == :newline
45
+ end
46
+
47
+ def skip_comments_and_newlines
48
+ advance while %i[newline comment].include?(current_token.type)
49
+ end
50
+
51
+ # Schema: 'schema' 'do' ... 'end'
52
+ def parse_schema
53
+ schema_token = expect_token(:schema)
54
+ expect_token(:do)
55
+
56
+ skip_comments_and_newlines
57
+ input_declarations = parse_input_block
58
+
59
+ value_declarations = []
60
+ trait_declarations = []
61
+
62
+ skip_comments_and_newlines
63
+ while %i[value trait].include?(current_token.type)
64
+ case current_token.type
65
+ when :value
66
+ value_declarations << parse_value_declaration
67
+ when :trait
68
+ trait_declarations << parse_trait_declaration
69
+ end
70
+ skip_comments_and_newlines
71
+ end
72
+
73
+ expect_token(:end)
74
+
75
+ # Construct Root with exact AST.md structure
76
+ Kumi::Syntax::Root.new(
77
+ input_declarations,
78
+ value_declarations, # attributes
79
+ trait_declarations,
80
+ loc: schema_token.location
81
+ )
82
+ end
83
+
84
+ # Input block: 'input' 'do' ... 'end'
85
+ def parse_input_block
86
+ expect_token(:input)
87
+ expect_token(:do)
88
+
89
+ declarations = []
90
+ skip_comments_and_newlines
91
+
92
+ until %i[end eof].include?(current_token.type)
93
+ break unless current_token.metadata[:category] == :type_keyword
94
+
95
+ declarations << parse_input_declaration
96
+
97
+ skip_comments_and_newlines
98
+ end
99
+
100
+ expect_token(:end)
101
+ declarations
102
+ end
103
+
104
+ # Input declaration: 'integer :name' or 'array :items do ... end'
105
+ def parse_input_declaration
106
+ type_token = current_token
107
+
108
+ if type_token.metadata[:category] != :type_keyword
109
+ raise_parse_error("Expected type keyword, got #{type_token.type}")
110
+ end
111
+
112
+ advance
113
+ name_token = expect_token(:symbol)
114
+
115
+ # Handle domain specification: ', domain: [...]'
116
+ domain = nil
117
+ if current_token.type == :comma
118
+ advance
119
+ if current_token.type == :identifier && current_token.value == 'domain'
120
+ advance
121
+ expect_token(:colon)
122
+ domain = parse_domain_specification
123
+ else
124
+ # Put comma back for other parsers
125
+ @pos -= 1
126
+ end
127
+ end
128
+
129
+ # Handle nested array declarations
130
+ children = []
131
+ if type_token.metadata[:type_name] == :array && current_token.type == :do
132
+ advance # consume 'do'
133
+ skip_comments_and_newlines
134
+
135
+ until %i[end eof].include?(current_token.type)
136
+ break unless current_token.metadata[:category] == :type_keyword
137
+
138
+ children << parse_input_declaration
139
+
140
+ skip_comments_and_newlines
141
+ end
142
+
143
+ expect_token(:end)
144
+ end
145
+
146
+ Kumi::Syntax::InputDeclaration.new(
147
+ name_token.value,
148
+ domain,
149
+ type_token.metadata[:type_name],
150
+ children,
151
+ loc: type_token.location
152
+ )
153
+ end
154
+
155
+ def parse_domain_specification
156
+ # For now, just skip the domain spec - we can implement this later
157
+ # This handles cases like: domain: 1..10, domain: %w[a b c], domain: ["x", "y"]
158
+ if current_token.type == :lbracket
159
+ parse_array_literal
160
+ else
161
+ # Skip until comma or newline
162
+ advance until %i[comma newline eof end].include?(current_token.type)
163
+ nil
164
+ end
165
+ end
166
+
167
+ # Value declaration: 'value :name, expression' or 'value :name do ... end'
168
+ def parse_value_declaration
169
+ value_token = expect_token(:value)
170
+ name_token = expect_token(:symbol)
171
+
172
+ if current_token.type == :do
173
+ # Cascade expression: value :name do ... end
174
+ expression = parse_cascade_expression
175
+ else
176
+ # Simple expression: value :name, expression
177
+ expect_token(:comma)
178
+ expression = parse_expression
179
+ end
180
+
181
+ Kumi::Syntax::ValueDeclaration.new(
182
+ name_token.value,
183
+ expression,
184
+ loc: value_token.location
185
+ )
186
+ end
187
+
188
+ # Trait declaration: 'trait :name, expression'
189
+ def parse_trait_declaration
190
+ trait_token = expect_token(:trait)
191
+ name_token = expect_token(:symbol)
192
+ expect_token(:comma)
193
+ expression = parse_expression
194
+
195
+ Kumi::Syntax::TraitDeclaration.new(
196
+ name_token.value,
197
+ expression,
198
+ loc: trait_token.location
199
+ )
200
+ end
201
+
202
+ # Cascade expression: 'do' cases 'end'
203
+ def parse_cascade_expression
204
+ start_token = expect_token(:do)
205
+ cases = []
206
+
207
+ skip_comments_and_newlines
208
+ while %i[on base].include?(current_token.type)
209
+ cases << parse_case_expression
210
+ skip_comments_and_newlines
211
+ end
212
+
213
+ expect_token(:end)
214
+
215
+ Kumi::Syntax::CascadeExpression.new(cases, loc: start_token.location)
216
+ end
217
+
218
+ # Case expression: 'on condition, result' or 'base result'
219
+ def parse_case_expression
220
+ case current_token.type
221
+ when :on
222
+ on_token = advance_and_return_token
223
+ condition = parse_expression
224
+
225
+ # Wrap simple trait references in all? to match Ruby DSL behavior
226
+ condition = wrap_condition_in_all(condition) if simple_trait_reference?(condition)
227
+
228
+ expect_token(:comma)
229
+ result = parse_expression
230
+
231
+ Kumi::Syntax::CaseExpression.new(condition, result, loc: on_token.location)
232
+
233
+ when :base
234
+ base_token = advance_and_return_token
235
+ result = parse_expression
236
+
237
+ # Base case has condition = true
238
+ true_literal = Kumi::Syntax::Literal.new(true, loc: base_token.location)
239
+ Kumi::Syntax::CaseExpression.new(true_literal, result, loc: base_token.location)
240
+
241
+ else
242
+ raise_parse_error("Expected 'on' or 'base' in cascade expression")
243
+ end
244
+ end
245
+
246
+ def advance_and_return_token
247
+ token = current_token
248
+ advance
249
+ token
250
+ end
251
+
252
+ # Expression parsing with operator precedence
253
+ def parse_expression(min_precedence = 0)
254
+ left = parse_primary_expression
255
+
256
+ # Skip whitespace before checking for operators
257
+ skip_comments_and_newlines
258
+
259
+ while current_token.operator? && current_token.precedence >= min_precedence
260
+ operator_token = current_token
261
+ advance
262
+
263
+ # Skip whitespace after operator
264
+ skip_comments_and_newlines
265
+
266
+ # Use embedded associativity from token metadata
267
+ next_min_precedence = if operator_token.left_associative?
268
+ operator_token.precedence + 1
269
+ else
270
+ operator_token.precedence
271
+ end
272
+
273
+ right = parse_expression(next_min_precedence)
274
+ left = Kumi::Syntax::CallExpression.new(
275
+ map_operator_token_to_function_name(operator_token.type),
276
+ [left, right],
277
+ loc: operator_token.location
278
+ )
279
+
280
+ # Skip whitespace before checking for next operator
281
+ skip_comments_and_newlines
282
+ end
283
+
284
+ left
285
+ end
286
+
287
+ def parse_primary_expression
288
+ token = current_token
289
+
290
+ case token.type
291
+ when :integer, :float, :string, :boolean
292
+ # Direct AST construction using token metadata
293
+ value = convert_literal_value(token)
294
+ advance
295
+ Kumi::Syntax::Literal.new(value, loc: token.location)
296
+
297
+ when :identifier
298
+ if token.value == 'input' && peek_token.type == :dot
299
+ parse_input_reference
300
+ elsif peek_token.type == :lbracket
301
+ parse_array_access_reference
302
+ elsif token.value == 'fn'
303
+ parse_function_call
304
+ else
305
+ advance
306
+ Kumi::Syntax::DeclarationReference.new(token.value.to_sym, loc: token.location)
307
+ end
308
+
309
+ when :input
310
+ # Handle input references in expressions (input.field)
311
+ if peek_token.type == :dot
312
+ parse_input_reference_from_input_token
313
+ else
314
+ raise_parse_error("Unexpected 'input' keyword in expression")
315
+ end
316
+
317
+ when :lparen
318
+ advance # consume '('
319
+ expr = parse_expression
320
+ expect_token(:rparen)
321
+ expr
322
+
323
+ when :lbracket
324
+ parse_array_literal
325
+
326
+ when :fn
327
+ parse_function_call_from_fn_token
328
+
329
+ when :newline, :comment
330
+ # Skip newlines and comments in expressions
331
+ skip_comments_and_newlines
332
+ parse_primary_expression
333
+
334
+ else
335
+ raise_parse_error("Unexpected token in expression: #{token.type}")
336
+ end
337
+ end
338
+
339
+ def parse_input_reference
340
+ input_token = expect_token(:identifier) # 'input'
341
+ expect_token(:dot)
342
+
343
+ path = [expect_token(:identifier).value.to_sym]
344
+
345
+ # Handle nested access: input.field.subfield
346
+ while current_token.type == :dot
347
+ advance # consume '.'
348
+ path << expect_token(:identifier).value.to_sym
349
+ end
350
+
351
+ if path.length == 1
352
+ Kumi::Syntax::InputReference.new(path.first, loc: input_token.location)
353
+ else
354
+ Kumi::Syntax::InputElementReference.new(path, loc: input_token.location)
355
+ end
356
+ end
357
+
358
+ def parse_input_reference_from_input_token
359
+ input_token = expect_token(:input) # 'input' keyword token
360
+ expect_token(:dot)
361
+
362
+ path = [expect_token(:identifier).value.to_sym]
363
+
364
+ # Handle nested access: input.field.subfield
365
+ while current_token.type == :dot
366
+ advance # consume '.'
367
+ path << expect_token(:identifier).value.to_sym
368
+ end
369
+
370
+ if path.length == 1
371
+ Kumi::Syntax::InputReference.new(path.first, loc: input_token.location)
372
+ else
373
+ Kumi::Syntax::InputElementReference.new(path, loc: input_token.location)
374
+ end
375
+ end
376
+
377
+ def parse_array_access_reference
378
+ name_token = expect_token(:identifier)
379
+ expect_token(:lbracket)
380
+ index_expr = parse_expression
381
+ expect_token(:rbracket)
382
+
383
+ base_ref = Kumi::Syntax::DeclarationReference.new(name_token.value.to_sym, loc: name_token.location)
384
+ Kumi::Syntax::CallExpression.new(
385
+ :at,
386
+ [base_ref, index_expr],
387
+ loc: name_token.location
388
+ )
389
+ end
390
+
391
+ def parse_function_call
392
+ fn_token = expect_token(:identifier) # 'fn'
393
+
394
+ if current_token.type == :lparen
395
+ # Only syntax: fn(:symbol, args...)
396
+ advance # consume '('
397
+ fn_name_token = expect_token(:symbol)
398
+ fn_name = fn_name_token.value
399
+
400
+ args = []
401
+ while current_token.type == :comma
402
+ advance # consume comma
403
+ args << parse_expression
404
+ end
405
+
406
+ expect_token(:rparen)
407
+ Kumi::Syntax::CallExpression.new(fn_name, args, loc: fn_name_token.location)
408
+
409
+ else
410
+ raise_parse_error("Expected '(' after 'fn'")
411
+ end
412
+ end
413
+
414
+ def parse_function_call_from_fn_token
415
+ fn_token = expect_token(:fn) # 'fn' keyword token
416
+
417
+ if current_token.type == :lparen
418
+ # Only syntax: fn(:symbol, args...)
419
+ advance # consume '('
420
+ fn_name_token = expect_token(:symbol)
421
+ fn_name = fn_name_token.value
422
+
423
+ args = []
424
+ while current_token.type == :comma
425
+ advance # consume comma
426
+ args << parse_expression
427
+ end
428
+
429
+ expect_token(:rparen)
430
+ Kumi::Syntax::CallExpression.new(fn_name, args, loc: fn_name_token.location)
431
+
432
+ else
433
+ raise_parse_error("Expected '(' after 'fn'")
434
+ end
435
+ end
436
+
437
+ def parse_argument_list
438
+ args = []
439
+
440
+ unless current_token.type == :rparen
441
+ args << parse_expression
442
+ while current_token.type == :comma
443
+ advance # consume comma
444
+ args << parse_expression
445
+ end
446
+ end
447
+
448
+ args
449
+ end
450
+
451
+ def parse_array_literal
452
+ start_token = expect_token(:lbracket)
453
+ elements = []
454
+
455
+ unless current_token.type == :rbracket
456
+ elements << parse_expression
457
+ while current_token.type == :comma
458
+ advance # consume comma
459
+ elements << parse_expression unless current_token.type == :rbracket
460
+ end
461
+ end
462
+
463
+ expect_token(:rbracket)
464
+ Kumi::Syntax::ArrayExpression.new(elements, loc: start_token.location)
465
+ end
466
+
467
+ def convert_literal_value(token)
468
+ case token.type
469
+ when :integer then token.value.gsub('_', '').to_i
470
+ when :float then token.value.gsub('_', '').to_f
471
+ when :string then token.value
472
+ when :boolean then token.value == 'true'
473
+ end
474
+ end
475
+
476
+ def raise_parse_error(message)
477
+ location = current_token.location
478
+ raise Errors::ParseError.new(message, token: current_token)
479
+ end
480
+
481
+ # Helper method to check if condition is a simple trait reference
482
+ def simple_trait_reference?(condition)
483
+ condition.is_a?(Kumi::Syntax::DeclarationReference)
484
+ end
485
+
486
+ # Helper method to wrap condition in all? function call
487
+ def wrap_condition_in_all(condition)
488
+ array_expr = Kumi::Syntax::ArrayExpression.new([condition], loc: condition.loc)
489
+ Kumi::Syntax::CallExpression.new(:all?, [array_expr], loc: condition.loc)
490
+ end
491
+
492
+ # Map operator token types to function names for Ruby DSL compatibility
493
+ def map_operator_token_to_function_name(token_type)
494
+ case token_type
495
+ when :eq then :==
496
+ when :ne then :!=
497
+ else token_type
498
+ end
499
+ end
500
+ end
501
+ end
502
+ end
@@ -0,0 +1,89 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Parser
5
+ # Extracts errors from parslet parse failures
6
+ class ErrorExtractor
7
+ def self.extract(error)
8
+ # Basic error extraction from parslet parse failures
9
+ # This would typically parse the parslet error message
10
+ # and extract location information
11
+
12
+ return {} unless error.respond_to?(:message)
13
+
14
+ message = error.message
15
+
16
+ # Determine error type based on class
17
+ error_type = case error.class.name
18
+ when /Syntax/ then :syntax
19
+ else :runtime
20
+ end
21
+
22
+ # Simple regex to extract line/column info
23
+ if match = message.match(/at line (\d+) char (\d+)/)
24
+ line = match[1].to_i
25
+ column = match[2].to_i
26
+ else
27
+ line = 1
28
+ column = 1
29
+ end
30
+
31
+ # Format message based on error type
32
+ formatted_message = if error_type == :syntax
33
+ extract_user_friendly_message(message)
34
+ else
35
+ "#{error.class.name}: #{message}"
36
+ end
37
+
38
+ {
39
+ message: formatted_message,
40
+ line: line,
41
+ column: column,
42
+ severity: :error,
43
+ type: error_type
44
+ }
45
+ end
46
+
47
+ def self.humanize_error_message(raw_message)
48
+ extract_user_friendly_message(raw_message)
49
+ end
50
+
51
+ def self.extract_user_friendly_message(raw_message)
52
+ # Clean up the message first - remove markers, location info, and extra whitespace
53
+ cleaned_message = raw_message.gsub(/^\s*`-\s*/, '').gsub(/ at line \d+ char \d+\.?/, '').strip
54
+
55
+ # Convert parslet's technical error messages to user-friendly ones
56
+ case cleaned_message
57
+ when /Expected ":", but got "(\w+)"/
58
+ "Missing ':' before symbol, but got \"#{::Regexp.last_match(1)}\""
59
+ when /Expected ":"/
60
+ "Missing ':' before symbol"
61
+ when /Expected "do", but got "(\w+)"/
62
+ "Missing 'do' keyword, but got \"#{::Regexp.last_match(1)}\""
63
+ when /Expected "do"/
64
+ "Missing 'do' keyword"
65
+ when /Expected "end", but got (.+)/
66
+ "Missing 'end' keyword, but got #{::Regexp.last_match(1)}"
67
+ when /Expected "end"/
68
+ "Missing 'end' keyword"
69
+ when /Expected "(\w+)", but got "(\w+)"/
70
+ "Missing '#{::Regexp.last_match(1)}' keyword, but got \"#{::Regexp.last_match(2)}\""
71
+ when /Expected '(\w+)'/
72
+ "Expected '#{::Regexp.last_match(1)}'"
73
+ when /Expected "([^"]+)", but got "([^"]+)"/
74
+ "Expected '#{::Regexp.last_match(1)}', but got \"#{::Regexp.last_match(2)}\""
75
+ when /Expected "(\w+)"/
76
+ "Missing '#{::Regexp.last_match(1)}' keyword"
77
+ when /Failed to match.*Premature end of input/m
78
+ 'Failed to match - premature end of input'
79
+ when /Premature end of input/
80
+ "Unexpected end of file - missing 'end'?"
81
+ when /Failed to match/
82
+ 'Failed to match sequence'
83
+ else
84
+ 'Parse error'
85
+ end
86
+ end
87
+ end
88
+ end
89
+ end
@@ -0,0 +1,40 @@
1
+ module Kumi
2
+ module Parser
3
+ # Namespace for parser-related errors
4
+ module Errors
5
+ # Custom error for parsing issues
6
+ class ParseError < StandardError
7
+ attr_reader :token, :suggestions
8
+
9
+ def initialize(message, token:, suggestions: [])
10
+ @token = token
11
+ @suggestions = suggestions
12
+ super(build_error_message(message))
13
+ end
14
+
15
+ private
16
+
17
+ def build_error_message(message)
18
+ lines = ["Parse error at #{@token.location}"]
19
+ lines << " #{message}"
20
+
21
+ if @suggestions.any?
22
+ lines << ' Suggestions:'
23
+ @suggestions.each { |s| lines << " - #{s}" }
24
+ end
25
+
26
+ lines.join("\n")
27
+ end
28
+ end
29
+
30
+ class TokenizerError < StandardError
31
+ attr_reader :location
32
+
33
+ def initialize(message, location:)
34
+ @location = location
35
+ super("#{message} at #{location}")
36
+ end
37
+ end
38
+ end
39
+ end
40
+ end