sorbet-toon 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,267 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../constants'
4
+ require_relative '../errors'
5
+ require_relative '../shared/string_utils'
6
+ require_relative '../shared/validation'
7
+ require_relative '../shared/literal_utils'
8
+
9
+ module Sorbet
10
+ module Toon
11
+ module Decode
12
+ module Parser
13
+ module_function
14
+
15
+ def parse_array_header_line(content, default_delimiter)
16
+ trimmed = content.lstrip
17
+
18
+ bracket_start = find_bracket_start(trimmed, content)
19
+ return nil unless bracket_start
20
+
21
+ bracket_end = content.index(Constants::CLOSE_BRACKET, bracket_start)
22
+ return nil unless bracket_end
23
+
24
+ colon_index = find_colon_after_brackets(content, bracket_end)
25
+ return nil unless colon_index
26
+
27
+ key = extract_key_from_header(content, bracket_start)
28
+
29
+ bracket_content = content[(bracket_start + 1)...bracket_end]
30
+ parsed_segment = parse_bracket_segment(bracket_content, default_delimiter)
31
+ return nil unless parsed_segment
32
+
33
+ fields = extract_fields(content, bracket_end, colon_index, parsed_segment[:delimiter])
34
+
35
+ after_colon = content[(colon_index + 1)..]&.strip
36
+
37
+ header = {
38
+ key: key,
39
+ length: parsed_segment[:length],
40
+ delimiter: parsed_segment[:delimiter],
41
+ fields: fields,
42
+ has_length_marker: parsed_segment[:has_length_marker]
43
+ }
44
+
45
+ {
46
+ header: header,
47
+ inline_values: after_colon&.empty? ? nil : after_colon
48
+ }
49
+ end
50
+
51
+ def parse_bracket_segment(segment, default_delimiter)
52
+ content = segment
53
+ has_length_marker = false
54
+
55
+ if content.start_with?(Constants::HASH)
56
+ has_length_marker = true
57
+ content = content[1..]
58
+ end
59
+
60
+ delimiter = default_delimiter
61
+ if content.end_with?(Constants::TAB)
62
+ delimiter = Constants::TAB
63
+ content = content[0...-1]
64
+ elsif content.end_with?(Constants::PIPE)
65
+ delimiter = Constants::PIPE
66
+ content = content[0...-1]
67
+ end
68
+
69
+ length = Integer(content, exception: false)
70
+ return nil if length.nil?
71
+
72
+ {
73
+ length: length,
74
+ delimiter: delimiter,
75
+ has_length_marker: has_length_marker
76
+ }
77
+ end
78
+
79
+ def parse_delimited_values(input, delimiter)
80
+ values = []
81
+ current = +''
82
+ in_quotes = false
83
+ i = 0
84
+
85
+ while i < input.length
86
+ char = input[i]
87
+
88
+ if char == Constants::BACKSLASH && in_quotes && (i + 1) < input.length
89
+ current << char << input[i + 1]
90
+ i += 2
91
+ next
92
+ end
93
+
94
+ if char == Constants::DOUBLE_QUOTE
95
+ in_quotes = !in_quotes
96
+ current << char
97
+ i += 1
98
+ next
99
+ end
100
+
101
+ if char == delimiter && !in_quotes
102
+ values << current.strip
103
+ current = +''
104
+ i += 1
105
+ next
106
+ end
107
+
108
+ current << char
109
+ i += 1
110
+ end
111
+
112
+ values << current.strip if !current.empty? || !values.empty?
113
+ values
114
+ end
115
+
116
+ def map_row_values_to_primitives(values)
117
+ values.map { |token| parse_primitive_token(token) }
118
+ end
119
+
120
+ def parse_primitive_token(token)
121
+ trimmed = token.strip
122
+ return '' if trimmed.empty?
123
+
124
+ if trimmed.start_with?(Constants::DOUBLE_QUOTE)
125
+ if trimmed.length < 2 || trimmed[-1] != Constants::DOUBLE_QUOTE
126
+ raise Sorbet::Toon::DecodeError, "Unterminated string literal: #{trimmed}"
127
+ end
128
+ inner = trimmed[1...-1]
129
+ return Shared::StringUtils.unescape_string(inner)
130
+ end
131
+
132
+ case trimmed
133
+ when Constants::TRUE_LITERAL
134
+ true
135
+ when Constants::FALSE_LITERAL
136
+ false
137
+ when Constants::NULL_LITERAL
138
+ nil
139
+ else
140
+ if Shared::LiteralUtils.numeric_literal?(trimmed)
141
+ parsed = Float(trimmed)
142
+ return 0 if parsed.zero?
143
+ return parsed
144
+ end
145
+ trimmed
146
+ end
147
+ end
148
+
149
+ def parse_key_token(content, start_index)
150
+ if content[start_index] == Constants::DOUBLE_QUOTE
151
+ closing = Shared::StringUtils.find_closing_quote(content, start_index)
152
+ raise Sorbet::Toon::DecodeError, 'Unterminated quoted key' if closing == -1
153
+
154
+ key = Shared::StringUtils.unescape_string(content[(start_index + 1)...closing])
155
+ rest_index = closing + 1
156
+ colon_index = Shared::StringUtils.find_unquoted_char(content, Constants::COLON, rest_index)
157
+ raise Sorbet::Toon::DecodeError, 'Key must be followed by colon' if colon_index == -1
158
+ return { key: key, end: colon_index + 1 }
159
+ end
160
+
161
+ colon_index = Shared::StringUtils.find_unquoted_char(content, Constants::COLON, start_index)
162
+ raise Sorbet::Toon::DecodeError, 'Key must be followed by colon' if colon_index == -1
163
+
164
+ key = content[start_index...colon_index].strip
165
+ { key: key, end: colon_index + 1 }
166
+ end
167
+
168
+ def find_bracket_start(trimmed, original)
169
+ if trimmed.start_with?(Constants::DOUBLE_QUOTE)
170
+ closing_quote = Shared::StringUtils.find_closing_quote(trimmed, 0)
171
+ return nil if closing_quote == -1
172
+ key_end_in_original = original.length - trimmed.length + closing_quote + 1
173
+ original.index(Constants::OPEN_BRACKET, key_end_in_original)
174
+ else
175
+ original.index(Constants::OPEN_BRACKET)
176
+ end
177
+ end
178
+ private_class_method :find_bracket_start
179
+
180
+ def find_colon_after_brackets(content, bracket_end)
181
+ brace_end = bracket_end
182
+ brace_start = content.index(Constants::OPEN_BRACE, bracket_end)
183
+ colon_after_bracket = content.index(Constants::COLON, bracket_end)
184
+
185
+ if brace_start && colon_after_bracket && brace_start < colon_after_bracket
186
+ found_brace_end = content.index(Constants::CLOSE_BRACE, brace_start)
187
+ brace_end = found_brace_end ? found_brace_end + 1 : brace_end
188
+ end
189
+
190
+ search_start = [bracket_end, brace_end].max
191
+ content.index(Constants::COLON, search_start)
192
+ end
193
+ private_class_method :find_colon_after_brackets
194
+
195
+ def extract_key_from_header(content, bracket_start)
196
+ return nil if bracket_start.zero?
197
+
198
+ raw_key = content[0...bracket_start].strip
199
+ return nil if raw_key.empty?
200
+
201
+ if raw_key.start_with?(Constants::DOUBLE_QUOTE)
202
+ closing = Shared::StringUtils.find_closing_quote(raw_key, 0)
203
+ raise Sorbet::Toon::DecodeError, 'Unterminated quoted key' if closing == -1
204
+
205
+ return Shared::StringUtils.unescape_string(raw_key[1...closing])
206
+ end
207
+
208
+ raw_key
209
+ end
210
+ private_class_method :extract_key_from_header
211
+
212
+ def extract_fields(content, bracket_end, colon_index, delimiter)
213
+ brace_start = content.index(Constants::OPEN_BRACE, bracket_end)
214
+ return nil unless brace_start && brace_start < colon_index
215
+
216
+ brace_end = content.index(Constants::CLOSE_BRACE, brace_start)
217
+ return nil unless brace_end && brace_end < colon_index
218
+
219
+ fields_content = content[(brace_start + 1)...brace_end]
220
+ ensure_matching_field_delimiter!(fields_content, delimiter)
221
+ parse_delimited_values(fields_content, delimiter).map do |field|
222
+ field.strip!
223
+ if field.start_with?(Constants::DOUBLE_QUOTE)
224
+ Shared::StringUtils.unescape_string(field[1...-1])
225
+ else
226
+ field
227
+ end
228
+ end
229
+ end
230
+ private_class_method :extract_fields
231
+
232
+ def array_header_after_hyphen?(content)
233
+ stripped = content.strip
234
+ stripped.start_with?(Constants::OPEN_BRACKET) &&
235
+ Shared::StringUtils.find_unquoted_char(content, Constants::COLON) != -1
236
+ end
237
+
238
+ def object_first_field_after_hyphen?(content)
239
+ Shared::StringUtils.find_unquoted_char(content, Constants::COLON) != -1
240
+ end
241
+
242
+ def ensure_matching_field_delimiter!(content, delimiter)
243
+ other_delimiters = Constants::DELIMITERS.values.reject { |value| value == delimiter }
244
+ other_delimiters.each do |other|
245
+ next if Shared::StringUtils.find_unquoted_char(content, other).negative?
246
+
247
+ raise Sorbet::Toon::DecodeError,
248
+ "Field delimiter mismatch: expected #{describe_delimiter(delimiter)} but found #{describe_delimiter(other)}"
249
+ end
250
+ end
251
+ private_class_method :ensure_matching_field_delimiter!
252
+
253
+ def describe_delimiter(delimiter)
254
+ case delimiter
255
+ when Constants::TAB
256
+ 'tab (\\t)'
257
+ when Constants::PIPE
258
+ 'pipe (|)'
259
+ else
260
+ 'comma (,)'
261
+ end
262
+ end
263
+ private_class_method :describe_delimiter
264
+ end
265
+ end
266
+ end
267
+ end
@@ -0,0 +1,118 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../constants'
4
+
5
+ module Sorbet
6
+ module Toon
7
+ module Decode
8
+ ParsedLine = Struct.new(:raw, :indent, :content, :depth, :line_number, keyword_init: true)
9
+ BlankLineInfo = Struct.new(:line_number, :indent, :depth, keyword_init: true)
10
+
11
+ class LineCursor
12
+ attr_reader :blank_lines
13
+
14
+ def initialize(lines, blank_lines = [])
15
+ @lines = lines
16
+ @blank_lines = blank_lines
17
+ @index = 0
18
+ end
19
+
20
+ def peek
21
+ @lines[@index]
22
+ end
23
+
24
+ def next
25
+ line = @lines[@index]
26
+ @index += 1 if line
27
+ line
28
+ end
29
+
30
+ def advance
31
+ @index += 1
32
+ end
33
+
34
+ def current
35
+ @index.positive? ? @lines[@index - 1] : nil
36
+ end
37
+
38
+ def at_end?
39
+ @index >= @lines.length
40
+ end
41
+
42
+ def length
43
+ @lines.length
44
+ end
45
+
46
+ def peek_at_depth(target_depth)
47
+ line = peek
48
+ return nil unless line
49
+ return nil if line.depth < target_depth
50
+ return line if line.depth == target_depth
51
+
52
+ nil
53
+ end
54
+
55
+ def has_more_at_depth?(target_depth)
56
+ !peek_at_depth(target_depth).nil?
57
+ end
58
+ end
59
+
60
+ module Scanner
61
+ module_function
62
+
63
+ def to_parsed_lines(source, indent_size, strict)
64
+ return { lines: [], blank_lines: [] } if source.nil? || source.strip.empty?
65
+
66
+ raw_lines = source.split("\n", -1)
67
+ parsed_lines = []
68
+ blank_lines = []
69
+
70
+ raw_lines.each_with_index do |raw, index|
71
+ line_number = index + 1
72
+ leading_whitespace = raw[/\A[ \t]*/] || ''
73
+ indent = leading_whitespace.count(Constants::SPACE)
74
+ content = raw[leading_whitespace.length..] || ''
75
+
76
+ if content.strip.empty?
77
+ depth = compute_depth_from_indent(indent, indent_size)
78
+ blank_lines << BlankLineInfo.new(line_number: line_number, indent: indent, depth: depth)
79
+ next
80
+ end
81
+
82
+ depth = compute_depth_from_indent(indent, indent_size)
83
+
84
+ if strict
85
+ validate_leading_whitespace!(leading_whitespace, line_number, indent, indent_size)
86
+ end
87
+
88
+ parsed_lines << ParsedLine.new(
89
+ raw: raw,
90
+ indent: indent,
91
+ content: content,
92
+ depth: depth,
93
+ line_number: line_number
94
+ )
95
+ end
96
+
97
+ { lines: parsed_lines, blank_lines: blank_lines }
98
+ end
99
+
100
+ def compute_depth_from_indent(indent_spaces, indent_size)
101
+ (indent_spaces / indent_size.to_f).floor
102
+ end
103
+ private_class_method :compute_depth_from_indent
104
+
105
+ def validate_leading_whitespace!(leading_whitespace, line_number, indent, indent_size)
106
+ if leading_whitespace.include?(Constants::TAB)
107
+ raise RuntimeError, "Line #{line_number}: Tabs are not allowed in indentation in strict mode"
108
+ end
109
+
110
+ if indent.positive? && (indent % indent_size != 0)
111
+ raise RuntimeError, "Line #{line_number}: Indentation must be exact multiple of #{indent_size}, but found #{indent} spaces"
112
+ end
113
+ end
114
+ private_class_method :validate_leading_whitespace!
115
+ end
116
+ end
117
+ end
118
+ end
@@ -0,0 +1,66 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative '../constants'
4
+ require_relative '../errors'
5
+
6
+ module Sorbet
7
+ module Toon
8
+ module Decode
9
+ module Validation
10
+ module_function
11
+
12
+ def assert_expected_count(actual, expected, item_type, strict:)
13
+ return unless strict
14
+ return if actual == expected
15
+
16
+ raise RangeError, "Expected #{expected} #{item_type}, but got #{actual}"
17
+ end
18
+
19
+ def validate_no_extra_list_items(cursor, item_depth, expected_count)
20
+ return if cursor.at_end?
21
+
22
+ next_line = cursor.peek
23
+ if next_line && next_line.depth == item_depth && next_line.content.start_with?(Constants::LIST_ITEM_PREFIX)
24
+ raise RangeError, "Expected #{expected_count} list array items, but found more"
25
+ end
26
+ end
27
+
28
+ def validate_no_extra_tabular_rows(cursor, row_depth, header)
29
+ return if cursor.at_end?
30
+
31
+ next_line = cursor.peek
32
+ if next_line &&
33
+ next_line.depth == row_depth &&
34
+ !next_line.content.start_with?(Constants::LIST_ITEM_PREFIX) &&
35
+ data_row?(next_line.content, header[:delimiter])
36
+ raise RangeError, "Expected #{header[:length]} tabular rows, but found more"
37
+ end
38
+ end
39
+
40
+ def validate_no_blank_lines_in_range(start_line, end_line, blank_lines, strict:, context:)
41
+ return unless strict
42
+
43
+ blanks_in_range = blank_lines.select do |blank|
44
+ blank.line_number > start_line && blank.line_number < end_line
45
+ end
46
+
47
+ return if blanks_in_range.empty?
48
+
49
+ raise Sorbet::Toon::DecodeError,
50
+ "Line #{blanks_in_range.first.line_number}: Blank lines inside #{context} are not allowed in strict mode"
51
+ end
52
+
53
+ def data_row?(content, delimiter)
54
+ colon_pos = content.index(Constants::COLON)
55
+ delimiter_pos = content.index(delimiter)
56
+
57
+ return true if colon_pos.nil?
58
+ return true if delimiter_pos && delimiter_pos < colon_pos
59
+
60
+ false
61
+ end
62
+ private_class_method :data_row?
63
+ end
64
+ end
65
+ end
66
+ end
@@ -0,0 +1,42 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative 'codec'
4
+ require_relative 'reconstructor'
5
+
6
+ module Sorbet
7
+ module Toon
8
+ module Decoder
9
+ CONFIG_KEYS = %i[indent strict].freeze
10
+
11
+ class << self
12
+ def decode(payload, config:, signature: nil, role: :output, struct_class: nil, **overrides)
13
+ config_overrides = extract_overrides(overrides, CONFIG_KEYS)
14
+ resolved = config.resolve(config_overrides)
15
+
16
+ decoded = Sorbet::Toon::Codec.decode(
17
+ payload,
18
+ indent: resolved.indent,
19
+ strict: resolved.strict
20
+ )
21
+
22
+ Sorbet::Toon::Reconstructor.reconstruct(
23
+ decoded,
24
+ signature: signature,
25
+ struct_class: struct_class,
26
+ role: role
27
+ )
28
+ end
29
+
30
+ private
31
+
32
+ def extract_overrides(options, keys)
33
+ keys.each_with_object({}) do |key, memo|
34
+ next unless options.key?(key)
35
+
36
+ memo[key] = options.delete(key)
37
+ end
38
+ end
39
+ end
40
+ end
41
+ end
42
+ end