kumi-parser 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,370 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Kumi
4
+ module Parser
5
+ # Token types
6
+ module TokenType
7
+ # Literals
8
+ INTEGER = :integer
9
+ FLOAT = :float
10
+ STRING = :string
11
+ BOOLEAN = :boolean
12
+
13
+ # Identifiers and symbols
14
+ IDENTIFIER = :identifier
15
+ SYMBOL = :symbol # :name
16
+
17
+ # Keywords
18
+ SCHEMA = :schema
19
+ INPUT = :input
20
+ VALUE = :value
21
+ TRAIT = :trait
22
+ DO = :do
23
+ END_KW = :end
24
+ ON = :on
25
+ BASE = :base
26
+
27
+ # Type keywords
28
+ INTEGER_TYPE = :integer_type # integer
29
+ FLOAT_TYPE = :float_type # float
30
+ STRING_TYPE = :string_type # string
31
+ BOOLEAN_TYPE = :boolean_type # boolean
32
+ ANY_TYPE = :any_type # any
33
+ ARRAY_TYPE = :array_type # array
34
+
35
+ # Function keywords
36
+ FN = :fn
37
+
38
+ # Operators (by precedence)
39
+ MULTIPLY = :multiply # *
40
+ DIVIDE = :divide # /
41
+ MODULO = :modulo # %
42
+ ADD = :add # +
43
+ SUBTRACT = :subtract # -
44
+ GTE = :gte # >=
45
+ LTE = :lte # <=
46
+ GT = :gt # >
47
+ LT = :lt # <
48
+ EQ = :eq # ==
49
+ NE = :ne # !=
50
+ AND = :and # &
51
+ OR = :or # |
52
+
53
+ # Punctuation
54
+ DOT = :dot # .
55
+ COMMA = :comma # ,
56
+ COLON = :colon # :
57
+ LPAREN = :lparen # (
58
+ RPAREN = :rparen # )
59
+ LBRACKET = :lbracket # [
60
+ RBRACKET = :rbracket # ]
61
+
62
+ # Special
63
+ NEWLINE = :newline
64
+ EOF = :eof
65
+ COMMENT = :comment # # comment
66
+ end
67
+
68
+ # Rich metadata for each token type
69
+ TOKEN_METADATA = {
70
+ # Keywords with parsing hints
71
+ schema: {
72
+ category: :keyword,
73
+ expects_block: true,
74
+ block_terminator: :end
75
+ },
76
+ input: {
77
+ category: :keyword,
78
+ expects_block: true,
79
+ block_terminator: :end,
80
+ context: :input_declarations
81
+ },
82
+ value: {
83
+ category: :keyword,
84
+ expects_expression: true,
85
+ declaration_type: :value
86
+ },
87
+ trait: {
88
+ category: :keyword,
89
+ expects_expression: true,
90
+ declaration_type: :trait
91
+ },
92
+ do: {
93
+ category: :keyword,
94
+ block_opener: true
95
+ },
96
+ end: {
97
+ category: :keyword,
98
+ block_closer: true,
99
+ terminates_expression: true
100
+ },
101
+ on: {
102
+ category: :keyword,
103
+ cascade_keyword: true,
104
+ expects_condition: true
105
+ },
106
+ base: {
107
+ category: :keyword,
108
+ cascade_keyword: true,
109
+ is_base_case: true
110
+ },
111
+
112
+ # Type keywords
113
+ integer_type: {
114
+ category: :type_keyword,
115
+ starts_declaration: true,
116
+ type_name: :integer
117
+ },
118
+ float_type: {
119
+ category: :type_keyword,
120
+ starts_declaration: true,
121
+ type_name: :float
122
+ },
123
+ string_type: {
124
+ category: :type_keyword,
125
+ starts_declaration: true,
126
+ type_name: :string
127
+ },
128
+ boolean_type: {
129
+ category: :type_keyword,
130
+ starts_declaration: true,
131
+ type_name: :boolean
132
+ },
133
+ any_type: {
134
+ category: :type_keyword,
135
+ starts_declaration: true,
136
+ type_name: :any
137
+ },
138
+ array_type: {
139
+ category: :type_keyword,
140
+ starts_declaration: true,
141
+ type_name: :array
142
+ },
143
+
144
+ # Function keyword
145
+ fn: {
146
+ category: :keyword,
147
+ function_keyword: true,
148
+ starts_expression: true
149
+ },
150
+
151
+ # Operators with precedence and associativity
152
+ multiply: {
153
+ category: :operator,
154
+ precedence: 6,
155
+ associativity: :left,
156
+ arity: :binary
157
+ },
158
+ divide: {
159
+ category: :operator,
160
+ precedence: 6,
161
+ associativity: :left,
162
+ arity: :binary
163
+ },
164
+ modulo: {
165
+ category: :operator,
166
+ precedence: 6,
167
+ associativity: :left,
168
+ arity: :binary
169
+ },
170
+ add: {
171
+ category: :operator,
172
+ precedence: 5,
173
+ associativity: :left,
174
+ arity: :binary
175
+ },
176
+ subtract: {
177
+ category: :operator,
178
+ precedence: 5,
179
+ associativity: :left,
180
+ arity: :binary
181
+ },
182
+ gte: {
183
+ category: :operator,
184
+ precedence: 4,
185
+ associativity: :left,
186
+ arity: :binary,
187
+ returns_boolean: true
188
+ },
189
+ lte: {
190
+ category: :operator,
191
+ precedence: 4,
192
+ associativity: :left,
193
+ arity: :binary,
194
+ returns_boolean: true
195
+ },
196
+ gt: {
197
+ category: :operator,
198
+ precedence: 4,
199
+ associativity: :left,
200
+ arity: :binary,
201
+ returns_boolean: true
202
+ },
203
+ lt: {
204
+ category: :operator,
205
+ precedence: 4,
206
+ associativity: :left,
207
+ arity: :binary,
208
+ returns_boolean: true
209
+ },
210
+ eq: {
211
+ category: :operator,
212
+ precedence: 4,
213
+ associativity: :left,
214
+ arity: :binary,
215
+ returns_boolean: true
216
+ },
217
+ ne: {
218
+ category: :operator,
219
+ precedence: 4,
220
+ associativity: :left,
221
+ arity: :binary,
222
+ returns_boolean: true
223
+ },
224
+ and: {
225
+ category: :operator,
226
+ precedence: 3,
227
+ associativity: :left,
228
+ arity: :binary,
229
+ requires_boolean: true
230
+ },
231
+ or: {
232
+ category: :operator,
233
+ precedence: 2,
234
+ associativity: :left,
235
+ arity: :binary,
236
+ requires_boolean: true
237
+ },
238
+
239
+ # Literals with type information
240
+ integer: {
241
+ category: :literal,
242
+ starts_expression: true,
243
+ ast_class: 'Kumi::Syntax::Literal'
244
+ },
245
+ float: {
246
+ category: :literal,
247
+ starts_expression: true,
248
+ ast_class: 'Kumi::Syntax::Literal'
249
+ },
250
+ string: {
251
+ category: :literal,
252
+ starts_expression: true,
253
+ ast_class: 'Kumi::Syntax::Literal'
254
+ },
255
+ boolean: {
256
+ category: :literal,
257
+ starts_expression: true,
258
+ ast_class: 'Kumi::Syntax::Literal'
259
+ },
260
+
261
+ # Identifiers and references
262
+ identifier: {
263
+ category: :identifier,
264
+ starts_expression: true,
265
+ can_be_reference: true
266
+ },
267
+ symbol: {
268
+ category: :identifier,
269
+ starts_expression: true,
270
+ is_declaration_name: true
271
+ },
272
+
273
+ # Punctuation with parser hints
274
+ dot: {
275
+ category: :punctuation,
276
+ indicates_member_access: true
277
+ },
278
+ comma: {
279
+ category: :punctuation,
280
+ separates_items: true
281
+ },
282
+ colon: {
283
+ category: :punctuation,
284
+ indicates_symbol: true
285
+ },
286
+ lparen: {
287
+ category: :punctuation,
288
+ opens_group: true,
289
+ group_closer: :rparen,
290
+ starts_expression: true
291
+ },
292
+ rparen: {
293
+ category: :punctuation,
294
+ closes_group: true,
295
+ terminates_expression: true
296
+ },
297
+ lbracket: {
298
+ category: :punctuation,
299
+ opens_group: true,
300
+ group_closer: :rbracket,
301
+ starts_expression: true,
302
+ indicates_array: true
303
+ },
304
+ rbracket: {
305
+ category: :punctuation,
306
+ closes_group: true,
307
+ terminates_expression: true
308
+ },
309
+
310
+ # Special tokens
311
+ newline: {
312
+ category: :whitespace,
313
+ separates_statements: true
314
+ },
315
+ eof: {
316
+ category: :special,
317
+ terminates_input: true
318
+ },
319
+ comment: {
320
+ category: :whitespace,
321
+ ignored_by_parser: true
322
+ }
323
+ }.freeze
324
+
325
+ # Character to token mappings
326
+ CHAR_TO_TOKEN = {
327
+ '(' => :lparen,
328
+ ')' => :rparen,
329
+ '[' => :lbracket,
330
+ ']' => :rbracket,
331
+ ',' => :comma,
332
+ '.' => :dot,
333
+ ':' => :colon,
334
+ '+' => :add,
335
+ '-' => :subtract,
336
+ '*' => :multiply,
337
+ '/' => :divide,
338
+ '%' => :modulo,
339
+ '&' => :and,
340
+ '|' => :or
341
+ }.freeze
342
+
343
+ # Keywords mapping
344
+ KEYWORDS = {
345
+ 'schema' => :schema,
346
+ 'input' => :input,
347
+ 'value' => :value,
348
+ 'trait' => :trait,
349
+ 'do' => :do,
350
+ 'end' => :end,
351
+ 'on' => :on,
352
+ 'base' => :base,
353
+ 'fn' => :fn,
354
+ 'true' => :boolean,
355
+ 'false' => :boolean,
356
+ 'integer' => :integer_type,
357
+ 'float' => :float_type,
358
+ 'string' => :string_type,
359
+ 'boolean' => :boolean_type,
360
+ 'any' => :any_type,
361
+ 'array' => :array_type
362
+ }.freeze
363
+
364
+ # Opener to closer mappings for error recovery
365
+ OPENER_FOR_CLOSER = {
366
+ rparen: :lparen,
367
+ rbracket: :lbracket
368
+ }.freeze
369
+ end
370
+ end
@@ -2,6 +2,6 @@
2
2
 
3
3
  module Kumi
4
4
  module Parser
5
- VERSION = '0.0.3'
5
+ VERSION = '0.0.4'
6
6
  end
7
7
  end
@@ -0,0 +1,40 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'parser/text_parser'
4
+
5
+ module Kumi
6
+ # Top-level text parser module with same interface as Ruby DSL
7
+ module TextParser
8
+ extend self
9
+
10
+ # Parse text schema and return AST (same interface as RubyParser::Dsl.build_syntax_tree)
11
+ def parse(text, source_file: '<input>')
12
+ Parser::TextParser.parse(text, source_file: source_file)
13
+ end
14
+
15
+ # Validate text schema
16
+ def valid?(text, source_file: '<input>')
17
+ Parser::TextParser.valid?(text, source_file: source_file)
18
+ end
19
+
20
+ # Get validation diagnostics
21
+ def validate(text, source_file: '<input>')
22
+ Parser::TextParser.validate(text, source_file: source_file)
23
+ end
24
+
25
+ # Get Monaco Editor format diagnostics
26
+ def diagnostics_for_monaco(text, source_file: '<input>')
27
+ Parser::TextParser.diagnostics_for_monaco(text, source_file: source_file)
28
+ end
29
+
30
+ # Get CodeMirror format diagnostics
31
+ def diagnostics_for_codemirror(text, source_file: '<input>')
32
+ Parser::TextParser.diagnostics_for_codemirror(text, source_file: source_file)
33
+ end
34
+
35
+ # Get JSON format diagnostics
36
+ def diagnostics_as_json(text, source_file: '<input>')
37
+ Parser::TextParser.diagnostics_as_json(text, source_file: source_file)
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,31 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'kumi'
4
+ require_relative 'text_parser'
5
+
6
+ module Kumi
7
+ # Text-based schema that extends Kumi::Schema with text parsing capabilities
8
+ class TextSchema
9
+ extend Kumi::Schema
10
+
11
+ # Create a schema from text using the same pipeline as Ruby DSL
12
+ def self.from_text(text, source_file: '<input>')
13
+ # Parse text to AST (same as RubyParser::Dsl.build_syntax_tree)
14
+ @__syntax_tree__ = Kumi::TextParser.parse(text, source_file: source_file).freeze
15
+ @__analyzer_result__ = Analyzer.analyze!(@__syntax_tree__).freeze
16
+ @__compiled_schema__ = Compiler.compile(@__syntax_tree__, analyzer: @__analyzer_result__).freeze
17
+
18
+ Inspector.new(@__syntax_tree__, @__analyzer_result__, @__compiled_schema__)
19
+ end
20
+
21
+ # Validate text schema
22
+ def self.valid?(text, source_file: '<input>')
23
+ Kumi::TextParser.valid?(text, source_file: source_file)
24
+ end
25
+
26
+ # Get validation diagnostics
27
+ def self.validate(text, source_file: '<input>')
28
+ Kumi::TextParser.validate(text, source_file: source_file)
29
+ end
30
+ end
31
+ end
data/lib/kumi-parser.rb CHANGED
@@ -1,6 +1,7 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require 'kumi'
4
+ require 'kumi/syntax/node'
4
5
  require 'zeitwerk'
5
6
  require 'parslet'
6
7
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: kumi-parser
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - Kumi Team
@@ -130,6 +130,7 @@ extensions: []
130
130
  extra_rdoc_files: []
131
131
  files:
132
132
  - ".rspec"
133
+ - CLAUDE.md
133
134
  - LICENSE
134
135
  - README.md
135
136
  - Rakefile
@@ -139,18 +140,19 @@ files:
139
140
  - examples/text_parser_test_with_comments.rb
140
141
  - kumi-parser.gemspec
141
142
  - lib/kumi-parser.rb
142
- - lib/kumi/parser.rb
143
- - lib/kumi/parser/analyzer_diagnostic_converter.rb
143
+ - lib/kumi/parser/base.rb
144
+ - lib/kumi/parser/direct_parser.rb
144
145
  - lib/kumi/parser/error_extractor.rb
146
+ - lib/kumi/parser/errors.rb
147
+ - lib/kumi/parser/smart_tokenizer.rb
145
148
  - lib/kumi/parser/syntax_validator.rb
146
149
  - lib/kumi/parser/text_parser.rb
147
150
  - lib/kumi/parser/text_parser/api.rb
148
- - lib/kumi/parser/text_parser/editor_diagnostic.rb
149
- - lib/kumi/parser/text_parser/grammar.rb
150
- - lib/kumi/parser/text_parser/parser.rb
151
- - lib/kumi/parser/text_parser/transform.rb
151
+ - lib/kumi/parser/token.rb
152
+ - lib/kumi/parser/token_metadata.rb
152
153
  - lib/kumi/parser/version.rb
153
- - test_basic.rb
154
+ - lib/kumi/text_parser.rb
155
+ - lib/kumi/text_schema.rb
154
156
  homepage: https://github.com/amuta/kumi-parser
155
157
  licenses:
156
158
  - MIT
@@ -1,84 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- require_relative 'text_parser/editor_diagnostic'
4
-
5
- module Kumi
6
- module Parser
7
- # Converts analyzer errors to editor diagnostics
8
- class AnalyzerDiagnosticConverter
9
- def self.convert_errors(errors)
10
- diagnostics = TextParser::DiagnosticCollection.new
11
-
12
- errors.each do |error|
13
- diagnostic = convert_single_error(error)
14
- diagnostics << diagnostic if diagnostic
15
- end
16
-
17
- diagnostics
18
- end
19
-
20
- def self.convert_single_error(error)
21
- # Handle legacy array format [location, message]
22
- if error.is_a?(Array) && error.size == 2
23
- location, message = error
24
- line = location&.respond_to?(:line) ? location.line : 1
25
- column = location&.respond_to?(:column) ? location.column : 1
26
-
27
- return TextParser::EditorDiagnostic.new(
28
- line: line,
29
- column: column,
30
- message: message.to_s,
31
- severity: :error,
32
- type: :semantic
33
- )
34
- end
35
-
36
- # Handle regular error objects
37
- if error&.respond_to?(:message)
38
- line = error.respond_to?(:location) && error.location&.respond_to?(:line) ? error.location.line : 1
39
- column = error.respond_to?(:location) && error.location&.respond_to?(:column) ? error.location.column : 1
40
-
41
- # Extract error type and map to severity
42
- error_type = error.respond_to?(:type) ? error.type : :semantic
43
- severity = map_type_to_severity(error_type)
44
-
45
- return TextParser::EditorDiagnostic.new(
46
- line: line,
47
- column: column,
48
- message: error.message,
49
- severity: severity,
50
- type: error_type
51
- )
52
- end
53
-
54
- # Handle unknown formats (strings, etc.)
55
- return unless error
56
-
57
- TextParser::EditorDiagnostic.new(
58
- line: 1,
59
- column: 1,
60
- message: "Unknown analyzer error: #{error}",
61
- severity: :error,
62
- type: :semantic
63
- )
64
- end
65
-
66
- def self.extract_location(location)
67
- if location&.respond_to?(:line) && location.respond_to?(:column)
68
- { line: location.line, column: location.column }
69
- else
70
- { line: 1, column: 1 }
71
- end
72
- end
73
-
74
- def self.map_type_to_severity(type)
75
- case type
76
- when :warning then :warning
77
- when :info then :info
78
- when :hint then :hint
79
- else :error
80
- end
81
- end
82
- end
83
- end
84
- end
@@ -1,102 +0,0 @@
1
- # frozen_string_literal: true
2
-
3
- module Kumi
4
- module Parser
5
- module TextParser
6
- # Simple diagnostic for online editors
7
- class EditorDiagnostic
8
- attr_reader :line, :column, :message, :severity, :type
9
-
10
- def initialize(line:, column:, message:, severity: :error, type: :syntax)
11
- @line = line
12
- @column = column
13
- @message = message
14
- @severity = severity
15
- @type = type
16
- end
17
-
18
- def to_monaco
19
- {
20
- startLineNumber: line,
21
- startColumn: column,
22
- endLineNumber: line,
23
- endColumn: column + 1,
24
- message: message,
25
- severity: monaco_severity
26
- }
27
- end
28
-
29
- def to_codemirror
30
- {
31
- from: (line - 1) * 1000 + (column - 1),
32
- to: (line - 1) * 1000 + column,
33
- message: message,
34
- severity: severity.to_s
35
- }
36
- end
37
-
38
- def to_h
39
- {
40
- line: line,
41
- column: column,
42
- message: message,
43
- severity: severity.to_s,
44
- type: type.to_s
45
- }
46
- end
47
-
48
- def to_json(*args)
49
- require 'json'
50
- to_h.to_json(*args)
51
- end
52
-
53
- private
54
-
55
- def monaco_severity
56
- case severity
57
- when :error then 8 # Monaco.MarkerSeverity.Error
58
- when :warning then 4 # Monaco.MarkerSeverity.Warning
59
- when :info then 2 # Monaco.MarkerSeverity.Info
60
- else 8
61
- end
62
- end
63
- end
64
-
65
- # Collection of diagnostics
66
- class DiagnosticCollection
67
- def initialize(diagnostics = [])
68
- @diagnostics = diagnostics
69
- end
70
-
71
- def <<(diagnostic)
72
- @diagnostics << diagnostic
73
- end
74
-
75
- def empty?
76
- @diagnostics.empty?
77
- end
78
-
79
- def count
80
- @diagnostics.length
81
- end
82
-
83
- def to_monaco
84
- @diagnostics.map(&:to_monaco)
85
- end
86
-
87
- def to_codemirror
88
- @diagnostics.map(&:to_codemirror)
89
- end
90
-
91
- def to_json(*args)
92
- require 'json'
93
- @diagnostics.map(&:to_h).to_json(*args)
94
- end
95
-
96
- def to_a
97
- @diagnostics
98
- end
99
- end
100
- end
101
- end
102
- end