kumi-parser 0.0.33 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,89 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Kumi
4
- module Parser
5
- # Extracts errors from parslet parse failures
6
- class ErrorExtractor
7
- def self.extract(error)
8
- # Basic error extraction from parslet parse failures
9
- # This would typically parse the parslet error message
10
- # and extract location information
11
-
12
- return {} unless error.respond_to?(:message)
13
-
14
- message = error.message
15
-
16
- # Determine error type based on class
17
- error_type = case error.class.name
18
- when /Syntax/ then :syntax
19
- else :runtime
20
- end
21
-
22
- # Simple regex to extract line/column info
23
- if match = message.match(/at line (\d+) char (\d+)/)
24
- line = match[1].to_i
25
- column = match[2].to_i
26
- else
27
- line = 1
28
- column = 1
29
- end
30
-
31
- # Format message based on error type
32
- formatted_message = if error_type == :syntax
33
- extract_user_friendly_message(message)
34
- else
35
- "#{error.class.name}: #{message}"
36
- end
37
-
38
- {
39
- message: formatted_message,
40
- line: line,
41
- column: column,
42
- severity: :error,
43
- type: error_type
44
- }
45
- end
46
-
47
- def self.humanize_error_message(raw_message)
48
- extract_user_friendly_message(raw_message)
49
- end
50
-
51
- def self.extract_user_friendly_message(raw_message)
52
- # Clean up the message first - remove markers, location info, and extra whitespace
53
- cleaned_message = raw_message.gsub(/^\s*`-\s*/, '').gsub(/ at line \d+ char \d+\.?/, '').strip
54
-
55
- # Convert parslet's technical error messages to user-friendly ones
56
- case cleaned_message
57
- when /Expected ":", but got "(\w+)"/
58
- "Missing ':' before symbol, but got \"#{::Regexp.last_match(1)}\""
59
- when /Expected ":"/
60
- "Missing ':' before symbol"
61
- when /Expected "do", but got "(\w+)"/
62
- "Missing 'do' keyword, but got \"#{::Regexp.last_match(1)}\""
63
- when /Expected "do"/
64
- "Missing 'do' keyword"
65
- when /Expected "end", but got (.+)/
66
- "Missing 'end' keyword, but got #{::Regexp.last_match(1)}"
67
- when /Expected "end"/
68
- "Missing 'end' keyword"
69
- when /Expected "(\w+)", but got "(\w+)"/
70
- "Missing '#{::Regexp.last_match(1)}' keyword, but got \"#{::Regexp.last_match(2)}\""
71
- when /Expected '(\w+)'/
72
- "Expected '#{::Regexp.last_match(1)}'"
73
- when /Expected "([^"]+)", but got "([^"]+)"/
74
- "Expected '#{::Regexp.last_match(1)}', but got \"#{::Regexp.last_match(2)}\""
75
- when /Expected "(\w+)"/
76
- "Missing '#{::Regexp.last_match(1)}' keyword"
77
- when /Failed to match.*Premature end of input/m
78
- 'Failed to match - premature end of input'
79
- when /Premature end of input/
80
- "Unexpected end of file - missing 'end'?"
81
- when /Failed to match/
82
- 'Failed to match sequence'
83
- else
84
- 'Parse error'
85
- end
86
- end
87
- end
88
- end
89
- end
@@ -1,40 +0,0 @@
1
- module Kumi
2
- module Parser
3
- # Namespace for parser-related errors
4
- module Errors
5
- # Custom error for parsing issues
6
- class ParseError < StandardError
7
- attr_reader :token, :suggestions
8
-
9
- def initialize(message, token:, suggestions: [])
10
- @token = token
11
- @suggestions = suggestions
12
- super(build_error_message(message))
13
- end
14
-
15
- private
16
-
17
- def build_error_message(message)
18
- lines = ["Parse error at #{@token.location}"]
19
- lines << " #{message}"
20
-
21
- if @suggestions.any?
22
- lines << ' Suggestions:'
23
- @suggestions.each { |s| lines << " - #{s}" }
24
- end
25
-
26
- lines.join("\n")
27
- end
28
- end
29
-
30
- class TokenizerError < StandardError
31
- attr_reader :location
32
-
33
- def initialize(message, location:)
34
- @location = location
35
- super("#{message} at #{location}")
36
- end
37
- end
38
- end
39
- end
40
- end
@@ -1,154 +0,0 @@
1
- module Kumi
2
- module Parser
3
- module Helpers
4
- # Parses optional ", domain: ..., index: :sym" (order-agnostic, both optional)
5
- # Cursor is right after the array/hash/type name.
6
- def parse_optional_decl_kwargs
7
- domain = nil
8
- index = nil
9
-
10
- # nothing to do
11
- return [domain, index] unless current_token.type == :comma
12
-
13
- # consume one or more ", key: value" pairs
14
- while current_token.type == :comma
15
- advance
16
- key_tok = current_token
17
-
18
- unless key_tok.type == :label && %w[domain index].include?(key_tok.value)
19
- # roll back gracefully if it's not a kw pair
20
- @pos -= 1
21
- break
22
- end
23
-
24
- advance
25
-
26
- case key_tok.value
27
- when 'domain'
28
- domain = parse_domain_specification
29
- when 'index'
30
- sym = expect_token(:symbol)
31
- index = sym.value.to_sym
32
- end
33
- end
34
-
35
- [domain, index]
36
- end
37
-
38
- def convert_literal_value(token)
39
- case token.type
40
- when :integer then token.value.gsub('_', '').to_i
41
- when :float then token.value.gsub('_', '').to_f
42
- when :string then token.value
43
- when :boolean then token.value == 'true'
44
- when :symbol then token.value.to_sym
45
- when :constant
46
- case token.value
47
- when 'Float::INFINITY' then Float::INFINITY
48
- else
49
- raise_parse_error("Unknown constant: #{token.value}")
50
- end
51
- end
52
- end
53
-
54
- def parse_kw_literal_value
55
- t = current_token
56
- case t.type
57
- when :integer then advance
58
- t.value.delete('_').to_i
59
- when :float then advance
60
- t.value.delete('_').to_f
61
- when :string, :symbol then advance
62
- t.value
63
- when :boolean then advance
64
- t.value == 'true'
65
- when :label then advance
66
- t.value.to_sym # :wrap, :clamp, etc.
67
- when :subtract # allow negatives like -1
68
- advance
69
- v = parse_kw_literal_value
70
- raise_parse_error("numeric after unary '-'") unless v.is_a?(Numeric)
71
- -v
72
- else
73
- raise_parse_error('keyword value must be literal/label')
74
- end
75
- end
76
-
77
- def parse_args_and_opts_inside_parens
78
- args = []
79
- opts = {}
80
-
81
- # expect_token(:lparen)
82
-
83
- unless current_token.type == :rparen
84
- # --- positional args ---
85
- unless next_is_kwarg_after_comma?
86
- args << parse_expression
87
- while current_token.type == :comma && !next_is_kwarg_after_comma?
88
- advance
89
- args << parse_expression
90
- end
91
- end
92
- # --- kwargs (labels like `policy:`) ---
93
- if next_is_kwarg_after_comma?
94
- # subsequent pairs: `, label value`
95
- while current_token.type == :comma
96
- # stop if next token is not a kw key
97
- advance
98
-
99
- if current_token.type == :label
100
- key = current_token.value.to_sym
101
- advance
102
- end
103
- opts[key] = parse_kw_literal_value
104
-
105
- break unless next_is_kwarg_after_comma?
106
- end
107
- end
108
- end
109
-
110
- expect_token(:rparen)
111
- [args, opts]
112
- end
113
-
114
- def expect_field_name_token
115
- token = current_token
116
- if token.identifier? || token.keyword?
117
- advance
118
- token.value
119
- else
120
- raise_parse_error("Expected field name (identifier or keyword), got #{token.type}")
121
- end
122
- end
123
-
124
- def next_is_kwarg_after_comma?
125
- current_token.type == :comma && peek_token.type == :label
126
- end
127
-
128
- def skip_comments_and_newlines
129
- advance while %i[newline comment].include?(current_token.type)
130
- end
131
-
132
- def advance_and_return_token
133
- token = current_token
134
- advance
135
- token
136
- end
137
-
138
- def map_operator_token_to_function_name(token_type)
139
- case token_type
140
- when :eq then :==
141
- when :ne then :!=
142
- when :gt then :>
143
- when :lt then :<
144
- when :gte then :>=
145
- when :lte then :<=
146
- when :and then :and
147
- when :or then :or
148
- when :exponent then :power
149
- else token_type
150
- end
151
- end
152
- end
153
- end
154
- end
@@ -1,373 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'token_constants'
4
- require_relative 'token'
5
- require_relative 'errors'
6
-
7
- module Kumi
8
- module Parser
9
- # Context-aware tokenizer that produces tokens with embedded semantic metadata
10
- class SmartTokenizer
11
- def initialize(source, source_file: '<input>')
12
- @source = source
13
- @source_file = source_file
14
- @pos = 0
15
- @line = 1
16
- @column = 1
17
- @context_stack = [:global]
18
- @tokens = []
19
- end
20
-
21
- def tokenize
22
- while @pos < @source.length
23
- skip_whitespace_except_newlines
24
-
25
- case current_char
26
- when nil then break
27
- when "\n" then handle_newline
28
- when '#' then consume_comment
29
- when '"', "'" then consume_string
30
- when /\d/ then consume_number
31
- when '-'
32
- if peek_char && peek_char.match?(/\d/)
33
- consume_number
34
- else
35
- consume_operator_or_punctuation
36
- end
37
- when /[a-zA-Z_]/ then consume_identifier_or_label_or_keyword
38
- when ':' then consume_symbol_or_colon
39
- else
40
- consume_operator_or_punctuation
41
- end
42
- end
43
-
44
- add_token(:eof, nil, {})
45
- @tokens
46
- end
47
-
48
- private
49
-
50
- def current_char
51
- return nil if @pos >= @source.length
52
-
53
- @source[@pos]
54
- end
55
-
56
- def peek_char(offset = 1)
57
- peek_pos = @pos + offset
58
- return nil if peek_pos >= @source.length
59
-
60
- @source[peek_pos]
61
- end
62
-
63
- def advance
64
- if current_char == "\n"
65
- @line += 1
66
- @column = 1
67
- else
68
- @column += 1
69
- end
70
- @pos += 1
71
- end
72
-
73
- def skip_whitespace_except_newlines
74
- advance while current_char && current_char.match?(/[ \t\r]/)
75
- end
76
-
77
- def handle_newline
78
- add_token(:newline, "\n", Kumi::Parser::TOKEN_METADATA[:newline])
79
- advance
80
- end
81
-
82
- def consume_comment
83
- start_column = @column
84
- advance # skip #
85
-
86
- comment_text = ''
87
- while current_char && current_char != "\n"
88
- comment_text += current_char
89
- advance
90
- end
91
-
92
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
93
- add_token(:comment, comment_text, Kumi::Parser::TOKEN_METADATA[:comment])
94
- end
95
-
96
- def consume_string
97
- start_column = @column
98
- quote_char = current_char # Remember which quote type we're using
99
- advance # skip opening quote
100
-
101
- string_content = ''
102
- while current_char && current_char != quote_char
103
- if current_char == '\\'
104
- advance
105
- # Handle escape sequences
106
- case current_char
107
- when 'n' then string_content += "\n"
108
- when 't' then string_content += "\t"
109
- when 'r' then string_content += "\r"
110
- when '\\' then string_content += '\\'
111
- when '"' then string_content += '"'
112
- when "'" then string_content += "'"
113
- else
114
- string_content += current_char if current_char
115
- end
116
- else
117
- string_content += current_char
118
- end
119
- advance
120
- end
121
-
122
- raise_tokenizer_error('Unterminated string literal') if current_char != quote_char
123
-
124
- advance # skip closing quote
125
-
126
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
127
- @tokens << Token.new(:string, string_content, location, Kumi::Parser::TOKEN_METADATA[:string])
128
- end
129
-
130
- def consume_number
131
- start_column = @column
132
- number_str = ''
133
- has_dot = false
134
-
135
- # Handle negative sign if present
136
- if current_char == '-'
137
- number_str += current_char
138
- advance
139
- end
140
-
141
- # Consume digits and underscores, and optionally a decimal point
142
- while current_char && (current_char.match?(/[0-9_]/) || (!has_dot && current_char == '.'))
143
- if current_char == '.'
144
- # Make sure next character is a digit to distinguish from member access
145
- break unless peek_char && peek_char.match?(/\d/)
146
-
147
- has_dot = true
148
- number_str += current_char
149
-
150
- else
151
- number_str += current_char
152
- end
153
- advance
154
- end
155
-
156
- token_type = has_dot ? :float : :integer
157
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
158
- @tokens << Token.new(token_type, number_str, location, Kumi::Parser::TOKEN_METADATA[token_type])
159
- end
160
-
161
- def consume_identifier_or_label_or_keyword
162
- start_column = @column
163
- identifier_or_label_name = consume_while { |c| c.match?(/[a-zA-Z0-9_]/) }
164
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
165
-
166
- # Check if it's a constant FIRST (e.g., Float::INFINITY or Kumi::TestSharedSchemas::Tax)
167
- # This needs to be checked before label detection because labels also start with `:``
168
- if current_char == ':' && peek_char == ':'
169
- full_constant = identifier_or_label_name
170
- while current_char == ':' && peek_char == ':'
171
- advance # consume first :
172
- advance # consume second :
173
- constant_name = consume_while { |c| c.match?(/[a-zA-Z0-9_]/) }
174
- full_constant = "#{full_constant}::#{constant_name}"
175
- end
176
- add_token(:constant, full_constant, Kumi::Parser::TOKEN_METADATA[:constant])
177
- return
178
- end
179
-
180
- # Check if the next character is a single colon (label)
181
- if current_char == ':'
182
- # It's a hash key or a label (e.g., `name:`)
183
- advance # consume the colon
184
- add_token(:label, identifier_or_label_name, Kumi::Parser::TOKEN_METADATA[:label])
185
- return
186
- end
187
-
188
- # If it's not a label, proceed to check for keywords and identifiers
189
- # The logic below is adapted from your original `consume_identifier_or_keyword` method
190
-
191
- # Check if it's a keyword
192
- if keyword_type = Kumi::Parser::KEYWORDS[identifier_or_label_name]
193
- metadata = Kumi::Parser::TOKEN_METADATA[keyword_type].dup
194
-
195
- # Update context based on keyword
196
- case keyword_type
197
- when :schema, :input
198
- @context_stack.push(keyword_type)
199
- metadata[:opens_context] = keyword_type
200
- when :end
201
- closed_context = @context_stack.pop if @context_stack.length > 1
202
- metadata[:closes_context] = closed_context
203
- end
204
- add_token(keyword_type, identifier_or_label_name, metadata)
205
- return
206
- end
207
-
208
- # Check if its a function sugar
209
- if Kumi::Parser::FUNCTION_SUGAR[identifier_or_label_name]
210
- metadata = Kumi::Parser::TOKEN_METADATA[:function_sugar].dup
211
- add_token(:function_sugar, identifier_or_label_name, metadata)
212
- return
213
- end
214
-
215
- # Otherwise is an Identifier
216
- metadata = Kumi::Parser::TOKEN_METADATA[:identifier].dup
217
- case current_context
218
- when :input
219
- metadata[:context] = :input_declaration
220
- when :schema
221
- metadata[:context] = :schema_body
222
- end
223
- add_token(:identifier, identifier_or_label_name, metadata)
224
- end
225
-
226
- def consume_symbol_or_colon
227
- start_column = @column
228
-
229
- if peek_char && peek_char.match?(/[a-zA-Z_]/)
230
- # It's a symbol like :name
231
- advance # skip :
232
- symbol_name = consume_while { |c| c.match?(/[a-zA-Z0-9_]/) }
233
-
234
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
235
- @tokens << Token.new(:symbol, symbol_name.to_sym, location, Kumi::Parser::TOKEN_METADATA[:symbol])
236
- else
237
- # It's just a colon
238
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
239
- @tokens << Token.new(:colon, ':', location, Kumi::Parser::TOKEN_METADATA[:colon])
240
- advance
241
- end
242
- end
243
-
244
- def consume_operator_or_punctuation
245
- start_column = @column
246
- char = current_char
247
-
248
- # Handle multi-character operators
249
- case char
250
- when '='
251
- if peek_char == '>'
252
- advance
253
- advance
254
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
255
- @tokens << Token.new(:arrow, '=>', location, Kumi::Parser::TOKEN_METADATA[:arrow])
256
- elsif peek_char == '='
257
- advance
258
- advance
259
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
260
- @tokens << Token.new(:eq, '==', location, Kumi::Parser::TOKEN_METADATA[:eq])
261
- else
262
- raise_tokenizer_error("Unexpected '=' (did you mean '=='?)")
263
- end
264
- when '!'
265
- if peek_char == '='
266
- advance
267
- advance
268
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
269
- @tokens << Token.new(:ne, '!=', location, Kumi::Parser::TOKEN_METADATA[:ne])
270
- else
271
- raise_tokenizer_error("Unexpected '!' (did you mean '!='?)")
272
- end
273
- when '>'
274
- if peek_char == '='
275
- advance
276
- advance
277
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
278
- @tokens << Token.new(:gte, '>=', location, Kumi::Parser::TOKEN_METADATA[:gte])
279
- else
280
- advance
281
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
282
- @tokens << Token.new(:gt, '>', location, Kumi::Parser::TOKEN_METADATA[:gt])
283
- end
284
- when '<'
285
- if peek_char == '='
286
- advance
287
- advance
288
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
289
- @tokens << Token.new(:lte, '<=', location, Kumi::Parser::TOKEN_METADATA[:lte])
290
- else
291
- advance
292
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
293
- @tokens << Token.new(:lt, '<', location, Kumi::Parser::TOKEN_METADATA[:lt])
294
- end
295
- when '*'
296
- if peek_char == '*'
297
- advance
298
- advance
299
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
300
- @tokens << Token.new(:exponent, '**', location, Kumi::Parser::TOKEN_METADATA[:exponent])
301
- else
302
- # Single asterisk: fall through to single character handling
303
- token_type = CHAR_TO_TOKEN[char]
304
- metadata = Kumi::Parser::TOKEN_METADATA[token_type].dup
305
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
306
- @tokens << Token.new(token_type, char, location, metadata)
307
- advance
308
- end
309
- when '.'
310
- if peek_char == '.'
311
- advance
312
- if peek_char == '.'
313
- # Three dots: ...
314
- advance
315
- advance
316
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
317
- @tokens << Token.new(:dot_dot_dot, '...', location, Kumi::Parser::TOKEN_METADATA[:dot_dot_dot])
318
- else
319
- # Two dots: ..
320
- advance
321
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
322
- @tokens << Token.new(:dot_dot, '..', location, Kumi::Parser::TOKEN_METADATA[:dot_dot])
323
- end
324
- else
325
- # Single dot: fall through to single character handling
326
- token_type = CHAR_TO_TOKEN[char]
327
- metadata = Kumi::Parser::TOKEN_METADATA[token_type].dup
328
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
329
- @tokens << Token.new(token_type, char, location, metadata)
330
- advance
331
- end
332
- else
333
- # Single character operators/punctuation
334
- token_type = CHAR_TO_TOKEN[char]
335
- if token_type
336
- metadata = Kumi::Parser::TOKEN_METADATA[token_type].dup
337
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: start_column)
338
- @tokens << Token.new(token_type, char, location, metadata)
339
- advance
340
- else
341
- raise_tokenizer_error("Unexpected character: #{char}")
342
- end
343
- end
344
- end
345
-
346
- def consume_while(&block)
347
- result = ''
348
- while current_char && block.call(current_char)
349
- result += current_char
350
- advance
351
- end
352
- result
353
- end
354
-
355
- def current_context
356
- @context_stack.last
357
- end
358
-
359
- def add_token(type, value, metadata)
360
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: @column)
361
- token = Token.new(type, value, location, metadata)
362
- @tokens << token
363
- end
364
-
365
- def raise_tokenizer_error(message)
366
- location = Kumi::Syntax::Location.new(file: @source_file, line: @line, column: @column)
367
- raise Errors::TokenizerError.new(message, location: location)
368
- end
369
- end
370
-
371
- # Custom error for tokenization issues
372
- end
373
- end
@@ -1,21 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Kumi
4
- module Parser
5
- # Validates Kumi DSL syntax using new parser
6
- class SyntaxValidator
7
- def validate(text, source_file: '<input>')
8
- Kumi::Parser::Base.validate(text, source_file: source_file)
9
- end
10
-
11
- def valid?(text, source_file: '<input>')
12
- validate(text, source_file: source_file).empty?
13
- end
14
-
15
- def first_error(text, source_file: '<input>')
16
- diagnostics = validate(text, source_file: source_file)
17
- diagnostics.empty? ? nil : diagnostics.first[:message]
18
- end
19
- end
20
- end
21
- end