rbtoon 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,452 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RbToon
4
+ class Scanner # :nodoc:
5
+ include RaiseParseError
6
+
7
+ NL = / *\n/
8
+
9
+ BLANK = /(?:^[ \t\n]*\n)|(?:^[ \t\n]+\z)/
10
+
11
+ INDENT = /^[ \t]*/
12
+
13
+ WHITE_SPACES = / +/
14
+
15
+ L_BRACKET = /\[/
16
+
17
+ R_BRACKET = /]/
18
+
19
+ L_BRACE = /{/
20
+
21
+ R_BRACE = /}/
22
+
23
+ COLON = /(?:: )|(?::$)/
24
+
25
+ HYPHEN = /(?:- )|(?:-$)/
26
+
27
+ D_QUOTE = /"/
28
+
29
+ BACK_SLASH = /\\/
30
+
31
+ DELIMITER = /[,\t|]/
32
+
33
+ BOOLEAN = /\A(?:true|false)\Z/
34
+
35
+ NULL = /\Anull\Z/
36
+
37
+ NUMBER = /\A-?(?:0|[1-9]\d*)(?:\.\d+)?(?:e[+-]?\d+)?\Z/i
38
+
39
+ def initialize(string, filename, strict, indent_size)
40
+ @ss = StringScanner.new(string)
41
+ @filename = filename
42
+ @line = 1
43
+ @column = 1
44
+ @delimiters = []
45
+ @strict = strict
46
+ @indent_size = indent_size.to_f
47
+ @indent_depth = 0
48
+ @layer_stack = []
49
+ @array_depth = 0
50
+ @list_array_depth = []
51
+ @control_tokens = []
52
+ end
53
+
54
+ def next_token
55
+ scan_control_tokens if @control_tokens.empty?
56
+
57
+ token =
58
+ if @control_tokens.empty?
59
+ scan_code_token
60
+ else
61
+ @control_tokens.shift
62
+ end
63
+ token && [token.kind, token]
64
+ end
65
+
66
+ def push_array
67
+ @array_depth += 1
68
+ @delimiters << ','
69
+ @delimiters << '|'
70
+ @delimiters << "\t"
71
+ push_layer(:array)
72
+ end
73
+
74
+ def pop_array
75
+ @array_depth -= 1
76
+ @delimiters.clear
77
+ pop_layer
78
+ end
79
+
80
+ def start_list_array_items
81
+ @delimiters.clear
82
+ end
83
+
84
+ def push_object
85
+ push_layer(:object)
86
+ end
87
+
88
+ def pop_object
89
+ pop_layer
90
+ end
91
+
92
+ def current_position
93
+ create_position(@line, @column)
94
+ end
95
+
96
+ def delimiter(token)
97
+ @delimiters.clear
98
+ @delimiters << ((token && token.text[0]) || ',')
99
+ end
100
+
101
+ private
102
+
103
+ def push_layer(layer)
104
+ case @layer_stack.last
105
+ in [Integer => depth, Array => layers] if depth == @indent_depth
106
+ layers.push(layer)
107
+ else
108
+ @layer_stack.push([@indent_depth, [layer]])
109
+ end
110
+ end
111
+
112
+ def pop_layer
113
+ @layer_stack.pop
114
+ end
115
+
116
+ def object_as_list_item?(depth)
117
+ index = @layer_stack.index { |(d, _)| depth == d }
118
+ return false unless index
119
+ return false unless index.positive? && object_layer?(@layer_stack[index])
120
+
121
+ array_layer?(@layer_stack[index - 1])
122
+ end
123
+
124
+ def object_layer?(layer)
125
+ _, layers = layer
126
+ layers.first == :object
127
+ end
128
+
129
+ def array_layer?(layer)
130
+ _, layers = layer
131
+ layers.last == :array
132
+ end
133
+
134
+ def eos?
135
+ @ss.eos?
136
+ end
137
+
138
+ def scan(pattern)
139
+ text = @ss.scan(pattern)
140
+ return unless text
141
+
142
+ line = @line
143
+ column = @column
144
+
145
+ update_state(text)
146
+
147
+ [text, line, column]
148
+ end
149
+
150
+ def scan_token(pattern, kind)
151
+ text, line, column = scan(pattern)
152
+ return unless text
153
+
154
+ create_token(kind, text, line, column)
155
+ end
156
+
157
+ def scan_char
158
+ char = @ss.getch
159
+ return unless char
160
+
161
+ update_state(char)
162
+ char
163
+ end
164
+
165
+ def peek(pattern)
166
+ @ss.check(pattern)
167
+ end
168
+
169
+ def peek_char
170
+ peek(/./)
171
+ end
172
+
173
+ def skip(pattern)
174
+ text, _line, _column = scan(pattern)
175
+ text&.length
176
+ end
177
+
178
+ def advance(char)
179
+ @ss.pos += char.bytesize
180
+ update_state(char)
181
+ end
182
+
183
+ def update_state(text)
184
+ @line, @column = calc_next_position(text, @line, @column)
185
+ end
186
+
187
+ def calc_next_position(text, line, column)
188
+ return [line, column] if text.empty?
189
+
190
+ n_newlines = text.count("\n")
191
+ next_line = line + n_newlines
192
+
193
+ next_column =
194
+ if text[-1] == "\n"
195
+ 1
196
+ elsif n_newlines.positive?
197
+ lines = text.split("\n")
198
+ lines.last.length
199
+ else
200
+ column + text.length
201
+ end
202
+
203
+ [next_line, next_column]
204
+ end
205
+
206
+ def scan_control_tokens
207
+ scan_nl
208
+ scan_blank
209
+ scan_indent
210
+ scan_eos
211
+ end
212
+
213
+ def push_control_token(kind, text, line, column)
214
+ return unless text
215
+
216
+ token = create_token(kind, text, line, column)
217
+ @control_tokens.push(token)
218
+ end
219
+
220
+ def scan_nl
221
+ text, line, column = scan(NL)
222
+ return unless text
223
+
224
+ n_spaces = text.length - 1
225
+ push_control_token(:NL, text[-1], line, column + n_spaces)
226
+ end
227
+
228
+ def scan_blank
229
+ return if @column > 1 || eos?
230
+
231
+ text, line, column = scan(BLANK)
232
+ return unless text
233
+
234
+ push_control_token(:BLANK, text, line, column)
235
+ end
236
+
237
+ def scan_indent
238
+ return if @column > 1 || eos?
239
+
240
+ indent, line, column = scan(INDENT)
241
+ return unless indent
242
+
243
+ check_tabs_in_indent(indent, line, column)
244
+ check_indent_spaces_size(indent, line, column)
245
+
246
+ next_depth = calc_next_depth(indent)
247
+ update_indent_depth(next_depth)
248
+ end
249
+
250
+ def check_tabs_in_indent(indent, line, column)
251
+ return unless @strict && indent.include?("\t")
252
+
253
+ position = create_position(line, column)
254
+ raise_parse_error 'tabs are not allowed in indentation', position
255
+ end
256
+
257
+ def check_indent_spaces_size(indent, line, column)
258
+ return unless @strict && (indent.length % @indent_size).positive?
259
+
260
+ position = create_position(line, column)
261
+ message =
262
+ "indentation must be exact multiple of #{@indent_size.to_i}, " \
263
+ "but found #{indent.length} spaces"
264
+ raise_parse_error message, position
265
+ end
266
+
267
+ def calc_next_depth(indent)
268
+ next_depth = (indent.length / @indent_size).floor
269
+ if object_as_list_item?(next_depth - 1)
270
+ next_depth - 1
271
+ else
272
+ next_depth
273
+ end
274
+ end
275
+
276
+ def update_indent_depth(next_depth)
277
+ if @indent_depth > next_depth
278
+ create_pop_indent_tokens(next_depth)
279
+ elsif next_depth > @indent_depth
280
+ create_push_indent_tokens(next_depth)
281
+ end
282
+
283
+ @indent_depth = next_depth
284
+ end
285
+
286
+ def create_pop_indent_tokens(next_depth)
287
+ count = calc_indent_pop_count(next_depth)
288
+ return unless count.positive?
289
+
290
+ count.times do |i|
291
+ column = ((@indent_depth - i) * @indent_size).to_i
292
+ push_control_token(:POP_INDENT, '', @line, column)
293
+ end
294
+ end
295
+
296
+ def calc_indent_pop_count(next_depth)
297
+ offset = @layer_stack.count do |layer|
298
+ depth, = layer
299
+ next_depth <= depth &&
300
+ (1...@indent_depth).include?(depth) &&
301
+ object_as_list_item?(depth)
302
+ end
303
+ @indent_depth - next_depth - offset
304
+ end
305
+
306
+ def create_push_indent_tokens(next_depth)
307
+ count = calc_indent_push_count(next_depth)
308
+ return unless count.positive?
309
+
310
+ count.times do |i|
311
+ column = ((@indent_depth + i) * @indent_size).to_i
312
+ push_control_token(:PUSH_INDENT, '', @line, column)
313
+ end
314
+ end
315
+
316
+ def calc_indent_push_count(next_depth)
317
+ base =
318
+ if object_as_list_item?(@indent_depth)
319
+ @indent_depth + 1
320
+ else
321
+ @indent_depth
322
+ end
323
+ next_depth - base
324
+ end
325
+
326
+ def scan_eos
327
+ return unless eos?
328
+
329
+ if @control_tokens.none? { |token| token.kind == :NL }
330
+ # Parser requires all lines to be ended with NL.
331
+ # Dummy NL is pushed if no NL exists before EOS.
332
+ push_control_token(:NL, '', @line, @column)
333
+ end
334
+
335
+ update_indent_depth(0)
336
+
337
+ push_control_token(:EOS, '', @line, @column)
338
+ @control_tokens.push(nil)
339
+ end
340
+
341
+ def scan_code_token
342
+ skip(WHITE_SPACES)
343
+
344
+ token = scan_array_symbol
345
+ return token if token
346
+
347
+ token = scan_token(DELIMITER, :DELIMITER)
348
+ return token if token
349
+
350
+ token = scan_quoted_string
351
+ return token if token
352
+
353
+ scan_unquoted_string
354
+ end
355
+
356
+ def scan_array_symbol
357
+ {
358
+ L_BRACKET: L_BRACKET, R_BRACKET: R_BRACKET,
359
+ L_BRACE: L_BRACE, R_BRACE: R_BRACE, COLON: COLON, HYPHEN: HYPHEN
360
+ }.each do |kind, symbol|
361
+ token = scan_token(symbol, kind)
362
+ return token if token
363
+ end
364
+
365
+ nil
366
+ end
367
+
368
+ def scan_quoted_string
369
+ return unless peek(/"/)
370
+
371
+ line = @line
372
+ column = @column
373
+
374
+ buffer = []
375
+ last_char = nil
376
+ while (char = peek_char)
377
+ break if char == "\n"
378
+
379
+ advance(char)
380
+ if char == '\\' && (escaped_char = scan_escaped_char)
381
+ buffer << escaped_char
382
+ last_char = [escaped_char, true]
383
+ else
384
+ buffer << char
385
+ last_char = [char, false]
386
+ break if buffer.size >= 2 && char == '"'
387
+ end
388
+ end
389
+
390
+ # last char should be non-escaped double quort
391
+ if buffer.size < 2 || last_char != ['"', false]
392
+ position = create_position(@line, @column)
393
+ raise_parse_error 'missing closing quote', position
394
+ end
395
+
396
+ text = buffer.join
397
+ create_token(:QUOTED_STRING, text, line, column)
398
+ end
399
+
400
+ def scan_escaped_char
401
+ char = scan_char
402
+ return unless char
403
+
404
+ escaped_char =
405
+ { '\\' => '\\', '"' => '"', 'n' => "\n", 'r' => "\r", 't' => "\t" }[char]
406
+ return escaped_char if escaped_char
407
+
408
+ position = create_position(@line, @column - 1)
409
+ raise_parse_error "invalid escape sequence: \\#{char}", position
410
+ end
411
+
412
+ def scan_unquoted_string
413
+ line = @line
414
+ column = @column
415
+
416
+ buffer = []
417
+ while (char = peek_char)
418
+ break unless valid_unquoted_char?(char)
419
+
420
+ advance(char)
421
+ buffer << char
422
+ end
423
+
424
+ text = buffer.join.strip
425
+ { BOOLEAN: BOOLEAN, NULL: NULL, NUMBER: NUMBER }.each do |kind, pattern|
426
+ return create_token(kind, text, line, column) if pattern.match?(text)
427
+ end
428
+
429
+ create_token(:UNQUOTED_STRING, text, line, column)
430
+ end
431
+
432
+ def valid_unquoted_char?(char)
433
+ return false if char == "\n" || match_delimiter?(char)
434
+
435
+ [L_BRACKET, R_BRACKET, L_BRACE, R_BRACE, COLON, D_QUOTE, BACK_SLASH]
436
+ .none? { |symbol| symbol.match?(char) }
437
+ end
438
+
439
+ def match_delimiter?(char)
440
+ @delimiters.include?(char)
441
+ end
442
+
443
+ def create_token(kind, text, line, column)
444
+ position = create_position(line, column)
445
+ Token.new(text, kind, @indent_depth, position)
446
+ end
447
+
448
+ def create_position(line, column)
449
+ Position.new(@filename, line, column)
450
+ end
451
+ end
452
+ end
@@ -0,0 +1,33 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RbToon
4
+ class Position # :nodoc:
5
+ def initialize(filename, line, column)
6
+ @filename = filename
7
+ @line = line
8
+ @column = column
9
+ end
10
+
11
+ attr_reader :filename
12
+ attr_reader :line
13
+ attr_reader :column
14
+
15
+ def to_s
16
+ "filename: #{filename} line: #{line} column: #{column}"
17
+ end
18
+ end
19
+
20
+ class Token # :nodoc:
21
+ def initialize(text, kind, depth, position)
22
+ @text = text
23
+ @kind = kind
24
+ @depth = depth
25
+ @position = position
26
+ end
27
+
28
+ attr_reader :text
29
+ attr_reader :kind
30
+ attr_reader :depth
31
+ attr_reader :position
32
+ end
33
+ end
@@ -0,0 +1,7 @@
1
+ # frozen_string_literal: true
2
+
3
+ module RbToon
4
+ ##
5
+ # Version string of RbToon.
6
+ VERSION = '0.1.0'
7
+ end
data/lib/rbtoon.rb ADDED
@@ -0,0 +1,126 @@
1
+ # frozen_string_literal: true
2
+
3
+ require 'strscan'
4
+
5
+ require_relative 'rbtoon/version'
6
+ require_relative 'rbtoon/parse_error'
7
+ require_relative 'rbtoon/token'
8
+ require_relative 'rbtoon/nodes/base'
9
+ require_relative 'rbtoon/nodes/blank'
10
+ require_relative 'rbtoon/nodes/scalar'
11
+ require_relative 'rbtoon/nodes/array'
12
+ require_relative 'rbtoon/nodes/object'
13
+ require_relative 'rbtoon/nodes/root'
14
+ require_relative 'rbtoon/scanner'
15
+ require_relative 'rbtoon/handler'
16
+ require_relative 'rbtoon/generated_parser'
17
+ require_relative 'rbtoon/parser'
18
+
19
+ ##
20
+ # RbToon: Toon decoder for Ruby
21
+ #
22
+ # Toon[https://toonformat.dev] is a structural text format optimized for LLM input.
23
+ # RbToon is a Racc-based decoder gem that decodes Toon input into Ruby objects.
24
+ module RbToon
25
+ class << self
26
+ ##
27
+ # Decode the given Toon string into Ruby objects.
28
+ #
29
+ # Example:
30
+ #
31
+ # toon = RbToon.decode(<<~'TOON')
32
+ # context:
33
+ # task: Our favorite hikes together
34
+ # location: Boulder
35
+ # season: spring_2025
36
+ # friends[3]: ana,luis,sam
37
+ # hikes[3]{id,name,distanceKm,elevationGain,companion,wasSunny}:
38
+ # 1,Blue Lake Trail,7.5,320,ana,true
39
+ # 2,Ridge Overlook,9.2,540,luis,false
40
+ # 3,Wildflower Loop,5.1,180,sam,true
41
+ # TOON
42
+ # # =>
43
+ # # {
44
+ # # "context" => {
45
+ # # "task" => "Our favorite hikes together",
46
+ # # "location" => "Boulder", "season" => "spring_2025"
47
+ # # },
48
+ # # ...
49
+ #
50
+ # Error Handling:
51
+ #
52
+ # RbToon::ParseError is raised when the given Toon includes errors listed in
53
+ # the {Toon spec}[https://github.com/toon-format/spec/blob/main/SPEC.md#14-strict-mode-errors-and-diagnostics-authoritative-checklist].
54
+ #
55
+ # begin
56
+ # RbToon.decode('freends[4]: ana,Luis,sam')
57
+ # rescue RbToon::ParseError => e
58
+ # e
59
+ # end
60
+ # # => #<RbToon::ParseError: expected 4 array items, but got 3 -- filename: unknown line: 1 column: 8>
61
+ #
62
+ # Arguments:
63
+ #
64
+ # +string_or_io+::
65
+ # String or IO object containing Toon string to be parsed.
66
+ # +filename+::
67
+ # Filename string which is used for the exception message.
68
+ # (default: 'unknown')
69
+ # +symbolize_names+::
70
+ # All hash keys are symbolized when this option is true.
71
+ # (default: false)
72
+ # +strict+::
73
+ # The +strict+ mode is disabled and some error checks are not performed when this option is false.
74
+ # See the {Toon spec}[https://github.com/toon-format/spec/blob/main/SPEC.md#14-strict-mode-errors-and-diagnostics-authoritative-checklist]
75
+ # for more details.
76
+ # (default: true)
77
+ # +path_expansion+::
78
+ # Dotted keys are split into nested objects when this option is true.
79
+ # See the {Toon spec}[https://github.com/toon-format/spec/blob/main/SPEC.md#decoder-path-expansion]
80
+ # for more details.
81
+ # (default: false)
82
+ # +indent_size+::
83
+ # Indentation unit used to calucurate indentation depth.
84
+ # See the {Toon spec}[https://github.com/toon-format/spec/blob/main/SPEC.md#12-indentation-and-whitespace]
85
+ # for more details.
86
+ # (default: 2)
87
+ # +debug+::
88
+ # Debug messages are displayed when this option is set to true.
89
+ # (default: false)
90
+ def decode(
91
+ string_or_io,
92
+ filename: 'unknown', symbolize_names: false,
93
+ strict: true, path_expansion: false, indent_size: 2, debug: false
94
+ )
95
+ toon =
96
+ if string_or_io.is_a?(String)
97
+ string_or_io
98
+ else
99
+ string_or_io.read
100
+ end
101
+
102
+ output = parse(toon, filename, strict, indent_size, debug)
103
+ output.validate(strict:)
104
+ output.to_ruby(symbolize_names:, strict:, path_expansion:)
105
+ end
106
+
107
+ ##
108
+ # Similar to +RbToon.decode+, but the Toon string is read from the file specified by the +filename+ argument.
109
+ #
110
+ # See also RbToon.decode.
111
+ def decode_file(filename, **optargs)
112
+ File.open(filename, 'r:bom|utf-8') do |fp|
113
+ decode(fp, filename:, **optargs)
114
+ end
115
+ end
116
+
117
+ private
118
+
119
+ def parse(toon, filename, strict, indent_size, debug)
120
+ scanner = Scanner.new(toon, filename, strict, indent_size)
121
+ handler = Handler.new
122
+ parser = Parser.new(scanner, handler, debug:)
123
+ parser.parse
124
+ end
125
+ end
126
+ end
metadata ADDED
@@ -0,0 +1,62 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rbtoon
3
+ version: !ruby/object:Gem::Version
4
+ version: 0.1.0
5
+ platform: ruby
6
+ authors:
7
+ - Taichi Ishitani
8
+ bindir: bin
9
+ cert_chain: []
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
+ dependencies: []
12
+ description: Toon parser for Ruby
13
+ email:
14
+ - taichi730@gmail.com
15
+ executables: []
16
+ extensions: []
17
+ extra_rdoc_files: []
18
+ files:
19
+ - LICENSE.txt
20
+ - README.md
21
+ - lib/rbtoon.rb
22
+ - lib/rbtoon/generated_parser.rb
23
+ - lib/rbtoon/handler.rb
24
+ - lib/rbtoon/nodes/array.rb
25
+ - lib/rbtoon/nodes/base.rb
26
+ - lib/rbtoon/nodes/blank.rb
27
+ - lib/rbtoon/nodes/object.rb
28
+ - lib/rbtoon/nodes/root.rb
29
+ - lib/rbtoon/nodes/scalar.rb
30
+ - lib/rbtoon/parse_error.rb
31
+ - lib/rbtoon/parser.rb
32
+ - lib/rbtoon/scanner.rb
33
+ - lib/rbtoon/token.rb
34
+ - lib/rbtoon/version.rb
35
+ homepage: https://github.com/taichi-ishitani/rbtoon
36
+ licenses:
37
+ - MIT
38
+ metadata:
39
+ bug_tracker_uri: https://github.com/taichi-ishitani/rbtoon/issues
40
+ changelog_uri: https://github.com/taichi-ishitani/rbtoon/releases
41
+ documentation_uri: https://taichi-ishitani.github.io/rbtoon/
42
+ homepage_uri: https://github.com/taichi-ishitani/rbtoon
43
+ rubygems_mfa_required: 'true'
44
+ source_code_uri: https://github.com/taichi-ishitani/rbtoon
45
+ rdoc_options: []
46
+ require_paths:
47
+ - lib
48
+ required_ruby_version: !ruby/object:Gem::Requirement
49
+ requirements:
50
+ - - ">="
51
+ - !ruby/object:Gem::Version
52
+ version: 3.2.0
53
+ required_rubygems_version: !ruby/object:Gem::Requirement
54
+ requirements:
55
+ - - ">="
56
+ - !ruby/object:Gem::Version
57
+ version: '0'
58
+ requirements: []
59
+ rubygems_version: 4.0.3
60
+ specification_version: 4
61
+ summary: Toon parser for Ruby
62
+ test_files: []