kumi-parser 0.0.2 → 0.0.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rspec +3 -0
- data/CLAUDE.md +120 -0
- data/LICENSE +21 -0
- data/README.md +73 -0
- data/Rakefile +10 -0
- data/examples/debug_text_parser.rb +41 -0
- data/examples/debug_transform_rule.rb +26 -0
- data/examples/text_parser_comprehensive_test.rb +333 -0
- data/examples/text_parser_test_with_comments.rb +146 -0
- data/kumi-parser.gemspec +45 -0
- data/lib/kumi/parser/base.rb +51 -0
- data/lib/kumi/parser/direct_parser.rb +502 -0
- data/lib/kumi/parser/error_extractor.rb +89 -0
- data/lib/kumi/parser/errors.rb +40 -0
- data/lib/kumi/parser/smart_tokenizer.rb +287 -0
- data/lib/kumi/parser/syntax_validator.rb +21 -0
- data/lib/kumi/parser/text_parser/api.rb +60 -0
- data/lib/kumi/parser/text_parser.rb +38 -0
- data/lib/kumi/parser/token.rb +84 -0
- data/lib/kumi/parser/token_metadata.rb +370 -0
- data/lib/kumi/parser/version.rb +7 -0
- data/lib/kumi/text_parser.rb +40 -0
- data/lib/kumi/text_schema.rb +31 -0
- data/lib/kumi-parser.rb +19 -0
- metadata +26 -2
@@ -0,0 +1,370 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Kumi
|
4
|
+
module Parser
|
5
|
+
# Token types
|
6
|
+
module TokenType
|
7
|
+
# Literals
|
8
|
+
INTEGER = :integer
|
9
|
+
FLOAT = :float
|
10
|
+
STRING = :string
|
11
|
+
BOOLEAN = :boolean
|
12
|
+
|
13
|
+
# Identifiers and symbols
|
14
|
+
IDENTIFIER = :identifier
|
15
|
+
SYMBOL = :symbol # :name
|
16
|
+
|
17
|
+
# Keywords
|
18
|
+
SCHEMA = :schema
|
19
|
+
INPUT = :input
|
20
|
+
VALUE = :value
|
21
|
+
TRAIT = :trait
|
22
|
+
DO = :do
|
23
|
+
END_KW = :end
|
24
|
+
ON = :on
|
25
|
+
BASE = :base
|
26
|
+
|
27
|
+
# Type keywords
|
28
|
+
INTEGER_TYPE = :integer_type # integer
|
29
|
+
FLOAT_TYPE = :float_type # float
|
30
|
+
STRING_TYPE = :string_type # string
|
31
|
+
BOOLEAN_TYPE = :boolean_type # boolean
|
32
|
+
ANY_TYPE = :any_type # any
|
33
|
+
ARRAY_TYPE = :array_type # array
|
34
|
+
|
35
|
+
# Function keywords
|
36
|
+
FN = :fn
|
37
|
+
|
38
|
+
# Operators (by precedence)
|
39
|
+
MULTIPLY = :multiply # *
|
40
|
+
DIVIDE = :divide # /
|
41
|
+
MODULO = :modulo # %
|
42
|
+
ADD = :add # +
|
43
|
+
SUBTRACT = :subtract # -
|
44
|
+
GTE = :gte # >=
|
45
|
+
LTE = :lte # <=
|
46
|
+
GT = :gt # >
|
47
|
+
LT = :lt # <
|
48
|
+
EQ = :eq # ==
|
49
|
+
NE = :ne # !=
|
50
|
+
AND = :and # &
|
51
|
+
OR = :or # |
|
52
|
+
|
53
|
+
# Punctuation
|
54
|
+
DOT = :dot # .
|
55
|
+
COMMA = :comma # ,
|
56
|
+
COLON = :colon # :
|
57
|
+
LPAREN = :lparen # (
|
58
|
+
RPAREN = :rparen # )
|
59
|
+
LBRACKET = :lbracket # [
|
60
|
+
RBRACKET = :rbracket # ]
|
61
|
+
|
62
|
+
# Special
|
63
|
+
NEWLINE = :newline
|
64
|
+
EOF = :eof
|
65
|
+
COMMENT = :comment # # comment
|
66
|
+
end
|
67
|
+
|
68
|
+
# Rich metadata for each token type
|
69
|
+
TOKEN_METADATA = {
|
70
|
+
# Keywords with parsing hints
|
71
|
+
schema: {
|
72
|
+
category: :keyword,
|
73
|
+
expects_block: true,
|
74
|
+
block_terminator: :end
|
75
|
+
},
|
76
|
+
input: {
|
77
|
+
category: :keyword,
|
78
|
+
expects_block: true,
|
79
|
+
block_terminator: :end,
|
80
|
+
context: :input_declarations
|
81
|
+
},
|
82
|
+
value: {
|
83
|
+
category: :keyword,
|
84
|
+
expects_expression: true,
|
85
|
+
declaration_type: :value
|
86
|
+
},
|
87
|
+
trait: {
|
88
|
+
category: :keyword,
|
89
|
+
expects_expression: true,
|
90
|
+
declaration_type: :trait
|
91
|
+
},
|
92
|
+
do: {
|
93
|
+
category: :keyword,
|
94
|
+
block_opener: true
|
95
|
+
},
|
96
|
+
end: {
|
97
|
+
category: :keyword,
|
98
|
+
block_closer: true,
|
99
|
+
terminates_expression: true
|
100
|
+
},
|
101
|
+
on: {
|
102
|
+
category: :keyword,
|
103
|
+
cascade_keyword: true,
|
104
|
+
expects_condition: true
|
105
|
+
},
|
106
|
+
base: {
|
107
|
+
category: :keyword,
|
108
|
+
cascade_keyword: true,
|
109
|
+
is_base_case: true
|
110
|
+
},
|
111
|
+
|
112
|
+
# Type keywords
|
113
|
+
integer_type: {
|
114
|
+
category: :type_keyword,
|
115
|
+
starts_declaration: true,
|
116
|
+
type_name: :integer
|
117
|
+
},
|
118
|
+
float_type: {
|
119
|
+
category: :type_keyword,
|
120
|
+
starts_declaration: true,
|
121
|
+
type_name: :float
|
122
|
+
},
|
123
|
+
string_type: {
|
124
|
+
category: :type_keyword,
|
125
|
+
starts_declaration: true,
|
126
|
+
type_name: :string
|
127
|
+
},
|
128
|
+
boolean_type: {
|
129
|
+
category: :type_keyword,
|
130
|
+
starts_declaration: true,
|
131
|
+
type_name: :boolean
|
132
|
+
},
|
133
|
+
any_type: {
|
134
|
+
category: :type_keyword,
|
135
|
+
starts_declaration: true,
|
136
|
+
type_name: :any
|
137
|
+
},
|
138
|
+
array_type: {
|
139
|
+
category: :type_keyword,
|
140
|
+
starts_declaration: true,
|
141
|
+
type_name: :array
|
142
|
+
},
|
143
|
+
|
144
|
+
# Function keyword
|
145
|
+
fn: {
|
146
|
+
category: :keyword,
|
147
|
+
function_keyword: true,
|
148
|
+
starts_expression: true
|
149
|
+
},
|
150
|
+
|
151
|
+
# Operators with precedence and associativity
|
152
|
+
multiply: {
|
153
|
+
category: :operator,
|
154
|
+
precedence: 6,
|
155
|
+
associativity: :left,
|
156
|
+
arity: :binary
|
157
|
+
},
|
158
|
+
divide: {
|
159
|
+
category: :operator,
|
160
|
+
precedence: 6,
|
161
|
+
associativity: :left,
|
162
|
+
arity: :binary
|
163
|
+
},
|
164
|
+
modulo: {
|
165
|
+
category: :operator,
|
166
|
+
precedence: 6,
|
167
|
+
associativity: :left,
|
168
|
+
arity: :binary
|
169
|
+
},
|
170
|
+
add: {
|
171
|
+
category: :operator,
|
172
|
+
precedence: 5,
|
173
|
+
associativity: :left,
|
174
|
+
arity: :binary
|
175
|
+
},
|
176
|
+
subtract: {
|
177
|
+
category: :operator,
|
178
|
+
precedence: 5,
|
179
|
+
associativity: :left,
|
180
|
+
arity: :binary
|
181
|
+
},
|
182
|
+
gte: {
|
183
|
+
category: :operator,
|
184
|
+
precedence: 4,
|
185
|
+
associativity: :left,
|
186
|
+
arity: :binary,
|
187
|
+
returns_boolean: true
|
188
|
+
},
|
189
|
+
lte: {
|
190
|
+
category: :operator,
|
191
|
+
precedence: 4,
|
192
|
+
associativity: :left,
|
193
|
+
arity: :binary,
|
194
|
+
returns_boolean: true
|
195
|
+
},
|
196
|
+
gt: {
|
197
|
+
category: :operator,
|
198
|
+
precedence: 4,
|
199
|
+
associativity: :left,
|
200
|
+
arity: :binary,
|
201
|
+
returns_boolean: true
|
202
|
+
},
|
203
|
+
lt: {
|
204
|
+
category: :operator,
|
205
|
+
precedence: 4,
|
206
|
+
associativity: :left,
|
207
|
+
arity: :binary,
|
208
|
+
returns_boolean: true
|
209
|
+
},
|
210
|
+
eq: {
|
211
|
+
category: :operator,
|
212
|
+
precedence: 4,
|
213
|
+
associativity: :left,
|
214
|
+
arity: :binary,
|
215
|
+
returns_boolean: true
|
216
|
+
},
|
217
|
+
ne: {
|
218
|
+
category: :operator,
|
219
|
+
precedence: 4,
|
220
|
+
associativity: :left,
|
221
|
+
arity: :binary,
|
222
|
+
returns_boolean: true
|
223
|
+
},
|
224
|
+
and: {
|
225
|
+
category: :operator,
|
226
|
+
precedence: 3,
|
227
|
+
associativity: :left,
|
228
|
+
arity: :binary,
|
229
|
+
requires_boolean: true
|
230
|
+
},
|
231
|
+
or: {
|
232
|
+
category: :operator,
|
233
|
+
precedence: 2,
|
234
|
+
associativity: :left,
|
235
|
+
arity: :binary,
|
236
|
+
requires_boolean: true
|
237
|
+
},
|
238
|
+
|
239
|
+
# Literals with type information
|
240
|
+
integer: {
|
241
|
+
category: :literal,
|
242
|
+
starts_expression: true,
|
243
|
+
ast_class: 'Kumi::Syntax::Literal'
|
244
|
+
},
|
245
|
+
float: {
|
246
|
+
category: :literal,
|
247
|
+
starts_expression: true,
|
248
|
+
ast_class: 'Kumi::Syntax::Literal'
|
249
|
+
},
|
250
|
+
string: {
|
251
|
+
category: :literal,
|
252
|
+
starts_expression: true,
|
253
|
+
ast_class: 'Kumi::Syntax::Literal'
|
254
|
+
},
|
255
|
+
boolean: {
|
256
|
+
category: :literal,
|
257
|
+
starts_expression: true,
|
258
|
+
ast_class: 'Kumi::Syntax::Literal'
|
259
|
+
},
|
260
|
+
|
261
|
+
# Identifiers and references
|
262
|
+
identifier: {
|
263
|
+
category: :identifier,
|
264
|
+
starts_expression: true,
|
265
|
+
can_be_reference: true
|
266
|
+
},
|
267
|
+
symbol: {
|
268
|
+
category: :identifier,
|
269
|
+
starts_expression: true,
|
270
|
+
is_declaration_name: true
|
271
|
+
},
|
272
|
+
|
273
|
+
# Punctuation with parser hints
|
274
|
+
dot: {
|
275
|
+
category: :punctuation,
|
276
|
+
indicates_member_access: true
|
277
|
+
},
|
278
|
+
comma: {
|
279
|
+
category: :punctuation,
|
280
|
+
separates_items: true
|
281
|
+
},
|
282
|
+
colon: {
|
283
|
+
category: :punctuation,
|
284
|
+
indicates_symbol: true
|
285
|
+
},
|
286
|
+
lparen: {
|
287
|
+
category: :punctuation,
|
288
|
+
opens_group: true,
|
289
|
+
group_closer: :rparen,
|
290
|
+
starts_expression: true
|
291
|
+
},
|
292
|
+
rparen: {
|
293
|
+
category: :punctuation,
|
294
|
+
closes_group: true,
|
295
|
+
terminates_expression: true
|
296
|
+
},
|
297
|
+
lbracket: {
|
298
|
+
category: :punctuation,
|
299
|
+
opens_group: true,
|
300
|
+
group_closer: :rbracket,
|
301
|
+
starts_expression: true,
|
302
|
+
indicates_array: true
|
303
|
+
},
|
304
|
+
rbracket: {
|
305
|
+
category: :punctuation,
|
306
|
+
closes_group: true,
|
307
|
+
terminates_expression: true
|
308
|
+
},
|
309
|
+
|
310
|
+
# Special tokens
|
311
|
+
newline: {
|
312
|
+
category: :whitespace,
|
313
|
+
separates_statements: true
|
314
|
+
},
|
315
|
+
eof: {
|
316
|
+
category: :special,
|
317
|
+
terminates_input: true
|
318
|
+
},
|
319
|
+
comment: {
|
320
|
+
category: :whitespace,
|
321
|
+
ignored_by_parser: true
|
322
|
+
}
|
323
|
+
}.freeze
|
324
|
+
|
325
|
+
# Character to token mappings
|
326
|
+
CHAR_TO_TOKEN = {
|
327
|
+
'(' => :lparen,
|
328
|
+
')' => :rparen,
|
329
|
+
'[' => :lbracket,
|
330
|
+
']' => :rbracket,
|
331
|
+
',' => :comma,
|
332
|
+
'.' => :dot,
|
333
|
+
':' => :colon,
|
334
|
+
'+' => :add,
|
335
|
+
'-' => :subtract,
|
336
|
+
'*' => :multiply,
|
337
|
+
'/' => :divide,
|
338
|
+
'%' => :modulo,
|
339
|
+
'&' => :and,
|
340
|
+
'|' => :or
|
341
|
+
}.freeze
|
342
|
+
|
343
|
+
# Keywords mapping
|
344
|
+
KEYWORDS = {
|
345
|
+
'schema' => :schema,
|
346
|
+
'input' => :input,
|
347
|
+
'value' => :value,
|
348
|
+
'trait' => :trait,
|
349
|
+
'do' => :do,
|
350
|
+
'end' => :end,
|
351
|
+
'on' => :on,
|
352
|
+
'base' => :base,
|
353
|
+
'fn' => :fn,
|
354
|
+
'true' => :boolean,
|
355
|
+
'false' => :boolean,
|
356
|
+
'integer' => :integer_type,
|
357
|
+
'float' => :float_type,
|
358
|
+
'string' => :string_type,
|
359
|
+
'boolean' => :boolean_type,
|
360
|
+
'any' => :any_type,
|
361
|
+
'array' => :array_type
|
362
|
+
}.freeze
|
363
|
+
|
364
|
+
# Opener to closer mappings for error recovery
|
365
|
+
OPENER_FOR_CLOSER = {
|
366
|
+
rparen: :lparen,
|
367
|
+
rbracket: :lbracket
|
368
|
+
}.freeze
|
369
|
+
end
|
370
|
+
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative 'parser/text_parser'
|
4
|
+
|
5
|
+
module Kumi
|
6
|
+
# Top-level text parser module with same interface as Ruby DSL
|
7
|
+
module TextParser
|
8
|
+
extend self
|
9
|
+
|
10
|
+
# Parse text schema and return AST (same interface as RubyParser::Dsl.build_syntax_tree)
|
11
|
+
def parse(text, source_file: '<input>')
|
12
|
+
Parser::TextParser.parse(text, source_file: source_file)
|
13
|
+
end
|
14
|
+
|
15
|
+
# Validate text schema
|
16
|
+
def valid?(text, source_file: '<input>')
|
17
|
+
Parser::TextParser.valid?(text, source_file: source_file)
|
18
|
+
end
|
19
|
+
|
20
|
+
# Get validation diagnostics
|
21
|
+
def validate(text, source_file: '<input>')
|
22
|
+
Parser::TextParser.validate(text, source_file: source_file)
|
23
|
+
end
|
24
|
+
|
25
|
+
# Get Monaco Editor format diagnostics
|
26
|
+
def diagnostics_for_monaco(text, source_file: '<input>')
|
27
|
+
Parser::TextParser.diagnostics_for_monaco(text, source_file: source_file)
|
28
|
+
end
|
29
|
+
|
30
|
+
# Get CodeMirror format diagnostics
|
31
|
+
def diagnostics_for_codemirror(text, source_file: '<input>')
|
32
|
+
Parser::TextParser.diagnostics_for_codemirror(text, source_file: source_file)
|
33
|
+
end
|
34
|
+
|
35
|
+
# Get JSON format diagnostics
|
36
|
+
def diagnostics_as_json(text, source_file: '<input>')
|
37
|
+
Parser::TextParser.diagnostics_as_json(text, source_file: source_file)
|
38
|
+
end
|
39
|
+
end
|
40
|
+
end
|
@@ -0,0 +1,31 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'kumi'
|
4
|
+
require_relative 'text_parser'
|
5
|
+
|
6
|
+
module Kumi
|
7
|
+
# Text-based schema that extends Kumi::Schema with text parsing capabilities
|
8
|
+
class TextSchema
|
9
|
+
extend Kumi::Schema
|
10
|
+
|
11
|
+
# Create a schema from text using the same pipeline as Ruby DSL
|
12
|
+
def self.from_text(text, source_file: '<input>')
|
13
|
+
# Parse text to AST (same as RubyParser::Dsl.build_syntax_tree)
|
14
|
+
@__syntax_tree__ = Kumi::TextParser.parse(text, source_file: source_file).freeze
|
15
|
+
@__analyzer_result__ = Analyzer.analyze!(@__syntax_tree__).freeze
|
16
|
+
@__compiled_schema__ = Compiler.compile(@__syntax_tree__, analyzer: @__analyzer_result__).freeze
|
17
|
+
|
18
|
+
Inspector.new(@__syntax_tree__, @__analyzer_result__, @__compiled_schema__)
|
19
|
+
end
|
20
|
+
|
21
|
+
# Validate text schema
|
22
|
+
def self.valid?(text, source_file: '<input>')
|
23
|
+
Kumi::TextParser.valid?(text, source_file: source_file)
|
24
|
+
end
|
25
|
+
|
26
|
+
# Get validation diagnostics
|
27
|
+
def self.validate(text, source_file: '<input>')
|
28
|
+
Kumi::TextParser.validate(text, source_file: source_file)
|
29
|
+
end
|
30
|
+
end
|
31
|
+
end
|
data/lib/kumi-parser.rb
ADDED
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require 'kumi'
|
4
|
+
require 'kumi/syntax/node'
|
5
|
+
require 'zeitwerk'
|
6
|
+
require 'parslet'
|
7
|
+
|
8
|
+
loader = Zeitwerk::Loader.for_gem(warn_on_extra_files: false)
|
9
|
+
loader.ignore("#{__dir__}/kumi-parser.rb")
|
10
|
+
loader.ignore("#{__dir__}/kumi/parser/version.rb")
|
11
|
+
loader.setup
|
12
|
+
|
13
|
+
require_relative 'kumi/parser/version'
|
14
|
+
|
15
|
+
module Kumi
|
16
|
+
module Parser
|
17
|
+
# Parser extension for Kumi DSL
|
18
|
+
end
|
19
|
+
end
|
metadata
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: kumi-parser
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Kumi Team
|
@@ -128,7 +128,31 @@ email:
|
|
128
128
|
executables: []
|
129
129
|
extensions: []
|
130
130
|
extra_rdoc_files: []
|
131
|
-
files:
|
131
|
+
files:
|
132
|
+
- ".rspec"
|
133
|
+
- CLAUDE.md
|
134
|
+
- LICENSE
|
135
|
+
- README.md
|
136
|
+
- Rakefile
|
137
|
+
- examples/debug_text_parser.rb
|
138
|
+
- examples/debug_transform_rule.rb
|
139
|
+
- examples/text_parser_comprehensive_test.rb
|
140
|
+
- examples/text_parser_test_with_comments.rb
|
141
|
+
- kumi-parser.gemspec
|
142
|
+
- lib/kumi-parser.rb
|
143
|
+
- lib/kumi/parser/base.rb
|
144
|
+
- lib/kumi/parser/direct_parser.rb
|
145
|
+
- lib/kumi/parser/error_extractor.rb
|
146
|
+
- lib/kumi/parser/errors.rb
|
147
|
+
- lib/kumi/parser/smart_tokenizer.rb
|
148
|
+
- lib/kumi/parser/syntax_validator.rb
|
149
|
+
- lib/kumi/parser/text_parser.rb
|
150
|
+
- lib/kumi/parser/text_parser/api.rb
|
151
|
+
- lib/kumi/parser/token.rb
|
152
|
+
- lib/kumi/parser/token_metadata.rb
|
153
|
+
- lib/kumi/parser/version.rb
|
154
|
+
- lib/kumi/text_parser.rb
|
155
|
+
- lib/kumi/text_schema.rb
|
132
156
|
homepage: https://github.com/amuta/kumi-parser
|
133
157
|
licenses:
|
134
158
|
- MIT
|