toon-format 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.ruby-version +1 -0
- data/CHANGELOG.md +71 -0
- data/CODE_OF_CONDUCT.md +132 -0
- data/CONTRIBUTING.md +138 -0
- data/LICENSE.txt +21 -0
- data/README.md +242 -0
- data/Rakefile +12 -0
- data/benchmark/README.md +206 -0
- data/benchmark/csv_vs_toon_benchmark.rb +71 -0
- data/benchmark/decode_benchmark.rb +63 -0
- data/benchmark/encode_benchmark.rb +82 -0
- data/benchmark/format_comparison_benchmark.rb +161 -0
- data/benchmark/memory_benchmark.rb +97 -0
- data/benchmark/nesting_benchmark.rb +220 -0
- data/benchmark/real_world_benchmark.rb +230 -0
- data/benchmark/round_trip_benchmark.rb +201 -0
- data/benchmark/run_all_benchmarks.rb +165 -0
- data/benchmark/scalability_benchmark.rb +124 -0
- data/benchmark/token_reduction_benchmark.rb +104 -0
- data/benchmark/validation_benchmark.rb +124 -0
- data/exe/toon-format +155 -0
- data/lib/toon_format/decoder.rb +36 -0
- data/lib/toon_format/encoder.rb +221 -0
- data/lib/toon_format/errors.rb +36 -0
- data/lib/toon_format/parser.rb +269 -0
- data/lib/toon_format/rails/extensions.rb +16 -0
- data/lib/toon_format/railtie.rb +15 -0
- data/lib/toon_format/validator.rb +68 -0
- data/lib/toon_format/version.rb +5 -0
- data/lib/toon_format.rb +73 -0
- data/sig/toon/format.rbs +6 -0
- metadata +76 -0
|
@@ -0,0 +1,221 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ToonFormat
|
|
4
|
+
# Encodes Ruby objects to TOON format strings
|
|
5
|
+
class Encoder
|
|
6
|
+
DEFAULT_OPTIONS = {
|
|
7
|
+
delimiter: ",",
|
|
8
|
+
indent: 2,
|
|
9
|
+
length_marker: true
|
|
10
|
+
}.freeze
|
|
11
|
+
|
|
12
|
+
MAX_DEPTH = 100
|
|
13
|
+
MAX_ARRAY_SIZE = 100_000
|
|
14
|
+
|
|
15
|
+
# Encode Ruby object to TOON format string
|
|
16
|
+
#
|
|
17
|
+
# @param data [Object] Ruby object to encode
|
|
18
|
+
# @param options [Hash] Encoding options
|
|
19
|
+
# @return [String] TOON formatted string
|
|
20
|
+
def self.encode(data, options = {})
|
|
21
|
+
new(options).encode(data)
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def initialize(options = {})
|
|
25
|
+
@options = DEFAULT_OPTIONS.merge(options)
|
|
26
|
+
@indent_level = 0
|
|
27
|
+
@visited = Set.new
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
# Main encode method
|
|
31
|
+
def encode(data, depth = 0)
|
|
32
|
+
check_depth(depth)
|
|
33
|
+
encode_value(data, depth)
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
private
|
|
37
|
+
|
|
38
|
+
def encode_value(data, depth)
|
|
39
|
+
case data
|
|
40
|
+
when NilClass then "null"
|
|
41
|
+
when TrueClass, FalseClass then data.to_s
|
|
42
|
+
when Numeric then encode_number(data)
|
|
43
|
+
when String then encode_string(data)
|
|
44
|
+
when Hash then encode_object(data, depth)
|
|
45
|
+
when Array then encode_array(data, depth)
|
|
46
|
+
else
|
|
47
|
+
raise EncodeError, "Unsupported type: #{data.class}"
|
|
48
|
+
end
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
def check_depth(depth)
|
|
52
|
+
raise EncodeError, "Maximum nesting depth #{MAX_DEPTH} exceeded" if depth > MAX_DEPTH
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
def encode_number(num)
|
|
56
|
+
return "0" if num.zero?
|
|
57
|
+
return "null" if num.respond_to?(:nan?) && num.nan?
|
|
58
|
+
return "null" if num.respond_to?(:infinite?) && num.infinite?
|
|
59
|
+
|
|
60
|
+
num.to_s
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
def encode_string(str)
|
|
64
|
+
return '""' if str.empty?
|
|
65
|
+
return quote(str) if needs_quoting?(str)
|
|
66
|
+
|
|
67
|
+
str
|
|
68
|
+
end
|
|
69
|
+
|
|
70
|
+
def encode_object(hash, depth)
|
|
71
|
+
return "{}" if hash.empty?
|
|
72
|
+
|
|
73
|
+
check_circular_reference(hash)
|
|
74
|
+
|
|
75
|
+
lines = hash.map do |key, value|
|
|
76
|
+
key_str = key.to_s
|
|
77
|
+
|
|
78
|
+
# Temporarily increase indent for nested values
|
|
79
|
+
@indent_level += 1
|
|
80
|
+
value_str = encode(value, depth + 1)
|
|
81
|
+
@indent_level -= 1
|
|
82
|
+
|
|
83
|
+
# Handle multi-line values (nested structures)
|
|
84
|
+
if value_str.include?("\n")
|
|
85
|
+
"#{current_indent}#{key_str}:\n#{value_str}"
|
|
86
|
+
else
|
|
87
|
+
"#{current_indent}#{key_str}: #{value_str}"
|
|
88
|
+
end
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
clear_circular_reference(hash)
|
|
92
|
+
lines.join("\n")
|
|
93
|
+
end
|
|
94
|
+
|
|
95
|
+
def check_circular_reference(obj)
|
|
96
|
+
obj_id = obj.object_id
|
|
97
|
+
raise EncodeError, "Circular reference detected" if @visited.include?(obj_id)
|
|
98
|
+
|
|
99
|
+
@visited.add(obj_id)
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
def clear_circular_reference(obj)
|
|
103
|
+
@visited.delete(obj.object_id)
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
def current_indent
|
|
107
|
+
" " * (@options[:indent] * @indent_level)
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
def indent_lines(text, additional_levels = 0)
|
|
111
|
+
indent_str = " " * (@options[:indent] * (@indent_level + additional_levels))
|
|
112
|
+
text.lines.map { |line| "#{indent_str}#{line}" }.join
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
def encode_array(array, depth)
|
|
116
|
+
return "[]" if array.empty?
|
|
117
|
+
|
|
118
|
+
check_array_size(array)
|
|
119
|
+
check_circular_reference(array)
|
|
120
|
+
|
|
121
|
+
result = if tabular?(array)
|
|
122
|
+
encode_tabular_array(array, depth)
|
|
123
|
+
else
|
|
124
|
+
encode_list_array(array, depth)
|
|
125
|
+
end
|
|
126
|
+
|
|
127
|
+
clear_circular_reference(array)
|
|
128
|
+
result
|
|
129
|
+
end
|
|
130
|
+
|
|
131
|
+
def check_array_size(array)
|
|
132
|
+
return unless array.size > MAX_ARRAY_SIZE
|
|
133
|
+
|
|
134
|
+
raise EncodeError, "Array size #{array.size} exceeds maximum #{MAX_ARRAY_SIZE}"
|
|
135
|
+
end
|
|
136
|
+
|
|
137
|
+
def primitive?(value)
|
|
138
|
+
value.is_a?(String) ||
|
|
139
|
+
value.is_a?(Numeric) ||
|
|
140
|
+
value.is_a?(TrueClass) ||
|
|
141
|
+
value.is_a?(FalseClass) ||
|
|
142
|
+
value.nil?
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
def tabular?(array)
|
|
146
|
+
return false unless array.all? { |el| el.is_a?(Hash) }
|
|
147
|
+
return false if array.empty?
|
|
148
|
+
|
|
149
|
+
keys = array.first.keys
|
|
150
|
+
array.all? do |element|
|
|
151
|
+
element.keys == keys &&
|
|
152
|
+
element.values.all? { |v| primitive?(v) }
|
|
153
|
+
end
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
def encode_tabular_array(array, _depth)
|
|
157
|
+
keys = array.first.keys
|
|
158
|
+
length_marker = @options[:length_marker] ? "[#{array.size},]" : ""
|
|
159
|
+
fields = keys.map(&:to_s).join(@options[:delimiter])
|
|
160
|
+
header = "#{current_indent}#{length_marker}{#{fields}}:"
|
|
161
|
+
|
|
162
|
+
rows = array.map do |row|
|
|
163
|
+
values = keys.map do |key|
|
|
164
|
+
value = row[key]
|
|
165
|
+
encode_primitive_value(value)
|
|
166
|
+
end
|
|
167
|
+
"#{current_indent}#{values.join(@options[:delimiter])}"
|
|
168
|
+
end
|
|
169
|
+
|
|
170
|
+
([header] + rows).join("\n")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
def encode_primitive_value(value)
|
|
174
|
+
case value
|
|
175
|
+
when NilClass then "null"
|
|
176
|
+
when TrueClass, FalseClass then value.to_s
|
|
177
|
+
when Numeric then encode_number(value)
|
|
178
|
+
when String then encode_string(value)
|
|
179
|
+
else
|
|
180
|
+
raise EncodeError, "Non-primitive value in tabular array: #{value.class}"
|
|
181
|
+
end
|
|
182
|
+
end
|
|
183
|
+
|
|
184
|
+
def encode_list_array(array, depth)
|
|
185
|
+
length_marker = @options[:length_marker] ? "[#{array.size}]" : "[]"
|
|
186
|
+
header = "#{current_indent}#{length_marker}:"
|
|
187
|
+
|
|
188
|
+
@indent_level += 1
|
|
189
|
+
elements = array.map do |item|
|
|
190
|
+
encoded = encode(item, depth + 1)
|
|
191
|
+
# If the encoded value doesn't already have indent (single line primitive), add it
|
|
192
|
+
if encoded.include?("\n")
|
|
193
|
+
encoded
|
|
194
|
+
else
|
|
195
|
+
"#{current_indent}#{encoded}"
|
|
196
|
+
end
|
|
197
|
+
end
|
|
198
|
+
@indent_level -= 1
|
|
199
|
+
|
|
200
|
+
([header] + elements).join("\n")
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
QUOTE_PATTERN = /
|
|
204
|
+
\A\s | # Leading whitespace
|
|
205
|
+
\s\z | # Trailing whitespace
|
|
206
|
+
\A(null|true|false)\z | # TOON keywords
|
|
207
|
+
\A-?\d+(\.\d+)?\z | # Numeric pattern
|
|
208
|
+
[:\[\]{},] # Structural characters
|
|
209
|
+
/x
|
|
210
|
+
|
|
211
|
+
def needs_quoting?(str)
|
|
212
|
+
str.match?(QUOTE_PATTERN)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
def quote(str)
|
|
216
|
+
# Escape any double quotes in the string
|
|
217
|
+
escaped = str.gsub('"', '\"')
|
|
218
|
+
"\"#{escaped}\""
|
|
219
|
+
end
|
|
220
|
+
end
|
|
221
|
+
end
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ToonFormat
|
|
4
|
+
# Base error class for all ToonFormat errors
|
|
5
|
+
class Error < StandardError; end
|
|
6
|
+
|
|
7
|
+
# Raised when encoding fails
|
|
8
|
+
class EncodeError < Error; end
|
|
9
|
+
|
|
10
|
+
# Raised when decoding fails
|
|
11
|
+
class DecodeError < Error; end
|
|
12
|
+
|
|
13
|
+
# Raised when strict mode validation fails
|
|
14
|
+
class ValidationError < Error; end
|
|
15
|
+
|
|
16
|
+
# Raised when parsing encounters syntax errors
|
|
17
|
+
class ParseError < DecodeError
|
|
18
|
+
attr_reader :line_number, :column
|
|
19
|
+
|
|
20
|
+
def initialize(message, line: nil, column: nil)
|
|
21
|
+
@line_number = line
|
|
22
|
+
@column = column
|
|
23
|
+
super(format_message(message))
|
|
24
|
+
end
|
|
25
|
+
|
|
26
|
+
private
|
|
27
|
+
|
|
28
|
+
def format_message(message)
|
|
29
|
+
return message unless @line_number
|
|
30
|
+
|
|
31
|
+
location = " at line #{@line_number}"
|
|
32
|
+
location += ", column #{@column}" if @column
|
|
33
|
+
"#{message}#{location}"
|
|
34
|
+
end
|
|
35
|
+
end
|
|
36
|
+
end
|
|
@@ -0,0 +1,269 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ToonFormat
|
|
4
|
+
# Parses TOON format strings into Ruby objects
|
|
5
|
+
class Parser
|
|
6
|
+
def initialize(input, strict: true)
|
|
7
|
+
@input = input
|
|
8
|
+
@lines = input.split("\n")
|
|
9
|
+
@position = 0
|
|
10
|
+
@strict = strict
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
def parse
|
|
14
|
+
return nil if @lines.empty?
|
|
15
|
+
|
|
16
|
+
# Check if the entire input is an object (multiple key-value pairs at root level)
|
|
17
|
+
if looks_like_root_object?
|
|
18
|
+
parse_root_object
|
|
19
|
+
else
|
|
20
|
+
parse_value
|
|
21
|
+
end
|
|
22
|
+
end
|
|
23
|
+
|
|
24
|
+
def looks_like_root_object?
|
|
25
|
+
# If we have multiple lines and they all look like key-value pairs at the same indent level
|
|
26
|
+
return false if @lines.size < 2
|
|
27
|
+
|
|
28
|
+
@lines.all? do |line|
|
|
29
|
+
line.strip.empty? || line =~ KEY_VALUE_PATTERN || line =~ KEY_ONLY_PATTERN
|
|
30
|
+
end
|
|
31
|
+
end
|
|
32
|
+
|
|
33
|
+
def parse_root_object
|
|
34
|
+
result = {}
|
|
35
|
+
|
|
36
|
+
while current_line
|
|
37
|
+
line = current_line
|
|
38
|
+
|
|
39
|
+
if line.strip.empty?
|
|
40
|
+
advance
|
|
41
|
+
next
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
if line =~ KEY_VALUE_PATTERN
|
|
45
|
+
key = ::Regexp.last_match(1)
|
|
46
|
+
value_str = ::Regexp.last_match(2).strip
|
|
47
|
+
advance
|
|
48
|
+
# Check if value is an empty array or empty object
|
|
49
|
+
value = if value_str == "[]"
|
|
50
|
+
[]
|
|
51
|
+
elsif value_str == "{}"
|
|
52
|
+
{}
|
|
53
|
+
else
|
|
54
|
+
parse_primitive(value_str)
|
|
55
|
+
end
|
|
56
|
+
result[key.to_sym] = value
|
|
57
|
+
elsif line =~ KEY_ONLY_PATTERN
|
|
58
|
+
key = ::Regexp.last_match(1)
|
|
59
|
+
advance
|
|
60
|
+
result[key.to_sym] = parse_nested_value
|
|
61
|
+
else
|
|
62
|
+
break
|
|
63
|
+
end
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
result
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
private
|
|
70
|
+
|
|
71
|
+
def current_line
|
|
72
|
+
@lines[@position]
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
def advance
|
|
76
|
+
@position += 1
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
def peek_line(offset = 1)
|
|
80
|
+
@lines[@position + offset]
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
TABULAR_ARRAY_PATTERN = /\A\s*\[(\d+),\]\{(.+)\}:/
|
|
84
|
+
LIST_ARRAY_PATTERN = /\A\s*\[(\d+)\]:/
|
|
85
|
+
EMPTY_ARRAY_PATTERN = /\A\s*\[\]/
|
|
86
|
+
KEY_VALUE_PATTERN = /\A\s*(\w+):\s*(.+)/
|
|
87
|
+
KEY_ONLY_PATTERN = /\A\s*(\w+):\s*$/
|
|
88
|
+
|
|
89
|
+
def parse_value
|
|
90
|
+
line = current_line
|
|
91
|
+
return nil if line.nil?
|
|
92
|
+
|
|
93
|
+
case line
|
|
94
|
+
when TABULAR_ARRAY_PATTERN
|
|
95
|
+
length = ::Regexp.last_match(1).to_i
|
|
96
|
+
fields = ::Regexp.last_match(2).split(",").map(&:strip)
|
|
97
|
+
advance
|
|
98
|
+
parse_tabular_array(length, fields)
|
|
99
|
+
when LIST_ARRAY_PATTERN
|
|
100
|
+
length = ::Regexp.last_match(1).to_i
|
|
101
|
+
advance
|
|
102
|
+
parse_list_array(length)
|
|
103
|
+
when EMPTY_ARRAY_PATTERN
|
|
104
|
+
advance
|
|
105
|
+
[]
|
|
106
|
+
when KEY_ONLY_PATTERN
|
|
107
|
+
::Regexp.last_match(1)
|
|
108
|
+
advance
|
|
109
|
+
parse_nested_value
|
|
110
|
+
when KEY_VALUE_PATTERN
|
|
111
|
+
key = ::Regexp.last_match(1)
|
|
112
|
+
value_str = ::Regexp.last_match(2).strip
|
|
113
|
+
advance
|
|
114
|
+
# Check if value is an empty array or empty object
|
|
115
|
+
value = if value_str == "[]"
|
|
116
|
+
[]
|
|
117
|
+
elsif value_str == "{}"
|
|
118
|
+
{}
|
|
119
|
+
else
|
|
120
|
+
parse_primitive(value_str)
|
|
121
|
+
end
|
|
122
|
+
{ key.to_sym => value }
|
|
123
|
+
else
|
|
124
|
+
advance
|
|
125
|
+
parse_primitive(line.strip)
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
def parse_nested_value
|
|
130
|
+
# Parse a nested value (could be object or array)
|
|
131
|
+
line = current_line
|
|
132
|
+
return nil if line.nil?
|
|
133
|
+
|
|
134
|
+
case line
|
|
135
|
+
when TABULAR_ARRAY_PATTERN
|
|
136
|
+
length = ::Regexp.last_match(1).to_i
|
|
137
|
+
fields = ::Regexp.last_match(2).split(",").map(&:strip)
|
|
138
|
+
advance
|
|
139
|
+
parse_tabular_array(length, fields)
|
|
140
|
+
when LIST_ARRAY_PATTERN
|
|
141
|
+
length = ::Regexp.last_match(1).to_i
|
|
142
|
+
advance
|
|
143
|
+
parse_list_array(length)
|
|
144
|
+
when EMPTY_ARRAY_PATTERN
|
|
145
|
+
advance
|
|
146
|
+
[]
|
|
147
|
+
else
|
|
148
|
+
parse_object_lines
|
|
149
|
+
end
|
|
150
|
+
end
|
|
151
|
+
|
|
152
|
+
def parse_object_lines
|
|
153
|
+
result = {}
|
|
154
|
+
base_indent = get_indent(current_line)
|
|
155
|
+
|
|
156
|
+
while current_line && get_indent(current_line) >= base_indent
|
|
157
|
+
if current_line =~ KEY_VALUE_PATTERN
|
|
158
|
+
key = ::Regexp.last_match(1)
|
|
159
|
+
value_str = ::Regexp.last_match(2)
|
|
160
|
+
advance
|
|
161
|
+
result[key.to_sym] = parse_primitive(value_str)
|
|
162
|
+
elsif current_line =~ KEY_ONLY_PATTERN
|
|
163
|
+
key = ::Regexp.last_match(1)
|
|
164
|
+
advance
|
|
165
|
+
result[key.to_sym] = parse_nested_value
|
|
166
|
+
else
|
|
167
|
+
break
|
|
168
|
+
end
|
|
169
|
+
end
|
|
170
|
+
|
|
171
|
+
result
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
def get_indent(line)
|
|
175
|
+
return 0 if line.nil?
|
|
176
|
+
|
|
177
|
+
line.match(/\A(\s*)/)[1].length
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
def parse_primitive(value)
|
|
181
|
+
stripped = value.strip
|
|
182
|
+
|
|
183
|
+
case stripped
|
|
184
|
+
when "null" then nil
|
|
185
|
+
when "true" then true
|
|
186
|
+
when "false" then false
|
|
187
|
+
when /\A-?\d+\z/ then stripped.to_i
|
|
188
|
+
when /\A-?\d+\.\d+\z/ then stripped.to_f
|
|
189
|
+
when /\A"(.*)"\z/ then unescape_string(::Regexp.last_match(1))
|
|
190
|
+
else stripped
|
|
191
|
+
end
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
def unescape_string(str)
|
|
195
|
+
# Unescape double quotes
|
|
196
|
+
str.gsub('\"', '"')
|
|
197
|
+
end
|
|
198
|
+
|
|
199
|
+
def parse_tabular_array(length, fields)
|
|
200
|
+
result = []
|
|
201
|
+
|
|
202
|
+
length.times do
|
|
203
|
+
line = current_line
|
|
204
|
+
raise ParseError.new("Unexpected end of input in tabular array", line: @position + 1) if line.nil?
|
|
205
|
+
|
|
206
|
+
values = split_row(line.strip)
|
|
207
|
+
|
|
208
|
+
if @strict && values.size != fields.size
|
|
209
|
+
raise ParseError.new(
|
|
210
|
+
"Field count mismatch: expected #{fields.size}, got #{values.size}",
|
|
211
|
+
line: @position + 1
|
|
212
|
+
)
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
row = {}
|
|
216
|
+
fields.each_with_index do |field, index|
|
|
217
|
+
row[field.to_sym] = parse_primitive(values[index] || "")
|
|
218
|
+
end
|
|
219
|
+
|
|
220
|
+
result << row
|
|
221
|
+
advance
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
result
|
|
225
|
+
end
|
|
226
|
+
|
|
227
|
+
def split_row(line)
|
|
228
|
+
# Split by comma, respecting quoted strings
|
|
229
|
+
values = []
|
|
230
|
+
current = ""
|
|
231
|
+
in_quotes = false
|
|
232
|
+
|
|
233
|
+
line.each_char do |char|
|
|
234
|
+
case char
|
|
235
|
+
when '"'
|
|
236
|
+
in_quotes = !in_quotes
|
|
237
|
+
current += char
|
|
238
|
+
when ","
|
|
239
|
+
if in_quotes
|
|
240
|
+
current += char
|
|
241
|
+
else
|
|
242
|
+
values << current
|
|
243
|
+
current = ""
|
|
244
|
+
end
|
|
245
|
+
else
|
|
246
|
+
current += char
|
|
247
|
+
end
|
|
248
|
+
end
|
|
249
|
+
|
|
250
|
+
values << current unless current.empty?
|
|
251
|
+
values.map(&:strip)
|
|
252
|
+
end
|
|
253
|
+
|
|
254
|
+
def parse_list_array(length)
|
|
255
|
+
result = []
|
|
256
|
+
get_indent(current_line)
|
|
257
|
+
|
|
258
|
+
length.times do
|
|
259
|
+
raise ParseError.new("Unexpected end of input in list array", line: @position + 1) if current_line.nil?
|
|
260
|
+
|
|
261
|
+
# Parse each element
|
|
262
|
+
element = parse_value
|
|
263
|
+
result << element
|
|
264
|
+
end
|
|
265
|
+
|
|
266
|
+
result
|
|
267
|
+
end
|
|
268
|
+
end
|
|
269
|
+
end
|
|
@@ -0,0 +1,16 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ToonFormat
|
|
4
|
+
module Rails
|
|
5
|
+
# Extensions for ActiveRecord models
|
|
6
|
+
module Extensions
|
|
7
|
+
# Convert model to TOON format
|
|
8
|
+
#
|
|
9
|
+
# @param options [Hash] Encoding options
|
|
10
|
+
# @return [String] TOON formatted string
|
|
11
|
+
def to_toon(**options)
|
|
12
|
+
ToonFormat.encode(as_json, **options)
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
16
|
+
end
|
|
@@ -0,0 +1,15 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
if defined?(Rails)
|
|
4
|
+
require "toon_format/rails/extensions"
|
|
5
|
+
|
|
6
|
+
module ToonFormat
|
|
7
|
+
class Railtie < Rails::Railtie
|
|
8
|
+
initializer "toon_format.active_record" do
|
|
9
|
+
ActiveSupport.on_load(:active_record) do
|
|
10
|
+
include ToonFormat::Rails::Extensions
|
|
11
|
+
end
|
|
12
|
+
end
|
|
13
|
+
end
|
|
14
|
+
end
|
|
15
|
+
end
|
|
@@ -0,0 +1,68 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
module ToonFormat
|
|
4
|
+
# Validates decoded data in strict mode
|
|
5
|
+
class Validator
|
|
6
|
+
# Validate decoded data against TOON string
|
|
7
|
+
#
|
|
8
|
+
# @param toon_string [String] Original TOON string
|
|
9
|
+
# @param decoded_data [Object] Decoded Ruby object
|
|
10
|
+
# @raise [ValidationError] If validation fails
|
|
11
|
+
def self.validate!(toon_string, decoded_data)
|
|
12
|
+
validate_array_lengths(toon_string, decoded_data)
|
|
13
|
+
end
|
|
14
|
+
|
|
15
|
+
def self.validate_array_lengths(toon_string, data)
|
|
16
|
+
# Extract all length markers from TOON string
|
|
17
|
+
length_markers = toon_string.scan(/\[(\d+)(?:,)?\]/).flatten.map(&:to_i)
|
|
18
|
+
arrays = find_non_empty_arrays(data)
|
|
19
|
+
|
|
20
|
+
# Only validate if we have length markers
|
|
21
|
+
return if length_markers.empty?
|
|
22
|
+
|
|
23
|
+
if length_markers.size != arrays.size
|
|
24
|
+
# This might be okay if there are nested arrays
|
|
25
|
+
# For now, just validate that we have at least as many arrays as markers
|
|
26
|
+
return if arrays.size >= length_markers.size
|
|
27
|
+
|
|
28
|
+
raise ValidationError,
|
|
29
|
+
"Length marker count (#{length_markers.size}) does not match array count (#{arrays.size})"
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
length_markers.each_with_index do |declared_length, index|
|
|
33
|
+
actual_length = arrays[index]&.size || 0
|
|
34
|
+
|
|
35
|
+
next if actual_length == declared_length
|
|
36
|
+
|
|
37
|
+
raise ValidationError,
|
|
38
|
+
"Array #{index}: declared length #{declared_length}, actual length #{actual_length}"
|
|
39
|
+
end
|
|
40
|
+
end
|
|
41
|
+
|
|
42
|
+
def self.find_non_empty_arrays(data, result = [])
|
|
43
|
+
case data
|
|
44
|
+
when Array
|
|
45
|
+
result << data unless data.empty?
|
|
46
|
+
data.each { |item| find_non_empty_arrays(item, result) }
|
|
47
|
+
when Hash
|
|
48
|
+
data.each_value { |value| find_non_empty_arrays(value, result) }
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
result
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
def self.find_arrays(data, result = [])
|
|
55
|
+
case data
|
|
56
|
+
when Array
|
|
57
|
+
result << data
|
|
58
|
+
data.each { |item| find_arrays(item, result) }
|
|
59
|
+
when Hash
|
|
60
|
+
data.each_value { |value| find_arrays(value, result) }
|
|
61
|
+
end
|
|
62
|
+
|
|
63
|
+
result
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private_class_method :validate_array_lengths, :find_non_empty_arrays
|
|
67
|
+
end
|
|
68
|
+
end
|
data/lib/toon_format.rb
ADDED
|
@@ -0,0 +1,73 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require_relative "toon_format/version"
|
|
4
|
+
require_relative "toon_format/errors"
|
|
5
|
+
require_relative "toon_format/encoder"
|
|
6
|
+
require_relative "toon_format/parser"
|
|
7
|
+
require_relative "toon_format/decoder"
|
|
8
|
+
require_relative "toon_format/validator"
|
|
9
|
+
|
|
10
|
+
require_relative "toon_format/rails/extensions" if defined?(ActiveRecord::Base)
|
|
11
|
+
require_relative "toon_format/railtie" if defined?(Rails)
|
|
12
|
+
|
|
13
|
+
require "json"
|
|
14
|
+
|
|
15
|
+
module ToonFormat
|
|
16
|
+
class << self
|
|
17
|
+
# Encode Ruby object to TOON format string
|
|
18
|
+
#
|
|
19
|
+
# @param data [Object] Ruby object to encode
|
|
20
|
+
# @param options [Hash] Encoding options
|
|
21
|
+
# @option options [String] :delimiter (',') Field delimiter
|
|
22
|
+
# @option options [Integer] :indent (2) Indentation spaces
|
|
23
|
+
# @option options [Boolean] :length_marker (true) Include array lengths
|
|
24
|
+
#
|
|
25
|
+
# @return [String] TOON formatted string
|
|
26
|
+
#
|
|
27
|
+
# @raise [ToonFormat::EncodeError] If data cannot be encoded
|
|
28
|
+
def encode(data, **options)
|
|
29
|
+
Encoder.encode(data, options)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
# Decode TOON format string to Ruby object
|
|
33
|
+
#
|
|
34
|
+
# @param toon_string [String] TOON formatted string
|
|
35
|
+
# @param strict [Boolean] Enable strict validation (default: true)
|
|
36
|
+
#
|
|
37
|
+
# @return [Object] Decoded Ruby object
|
|
38
|
+
#
|
|
39
|
+
# @raise [ToonFormat::DecodeError] If decoding fails
|
|
40
|
+
# @raise [ToonFormat::ValidationError] If strict validation fails
|
|
41
|
+
def decode(toon_string, strict: true)
|
|
42
|
+
Decoder.decode(toon_string, strict: strict)
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Estimate token savings compared to JSON
|
|
46
|
+
#
|
|
47
|
+
# @param data [Object] Ruby object to analyze
|
|
48
|
+
#
|
|
49
|
+
# @return [Hash] Statistics including token counts and savings percentage
|
|
50
|
+
def estimate_savings(data)
|
|
51
|
+
json_str = JSON.generate(data)
|
|
52
|
+
toon_str = encode(data)
|
|
53
|
+
|
|
54
|
+
json_size = json_str.bytesize
|
|
55
|
+
toon_size = toon_str.bytesize
|
|
56
|
+
|
|
57
|
+
{
|
|
58
|
+
json_tokens: estimate_tokens(json_str),
|
|
59
|
+
toon_tokens: estimate_tokens(toon_str),
|
|
60
|
+
savings_percent: ((json_size - toon_size) / json_size.to_f * 100).round(1),
|
|
61
|
+
json_size: json_size,
|
|
62
|
+
toon_size: toon_size
|
|
63
|
+
}
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
private
|
|
67
|
+
|
|
68
|
+
def estimate_tokens(text)
|
|
69
|
+
# Simple approximation: ~4 characters per token
|
|
70
|
+
(text.bytesize / 4.0).ceil
|
|
71
|
+
end
|
|
72
|
+
end
|
|
73
|
+
end
|