toon-format 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,221 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ToonFormat
4
+ # Encodes Ruby objects to TOON format strings
5
+ class Encoder
6
+ DEFAULT_OPTIONS = {
7
+ delimiter: ",",
8
+ indent: 2,
9
+ length_marker: true
10
+ }.freeze
11
+
12
+ MAX_DEPTH = 100
13
+ MAX_ARRAY_SIZE = 100_000
14
+
15
+ # Encode Ruby object to TOON format string
16
+ #
17
+ # @param data [Object] Ruby object to encode
18
+ # @param options [Hash] Encoding options
19
+ # @return [String] TOON formatted string
20
+ def self.encode(data, options = {})
21
+ new(options).encode(data)
22
+ end
23
+
24
+ def initialize(options = {})
25
+ @options = DEFAULT_OPTIONS.merge(options)
26
+ @indent_level = 0
27
+ @visited = Set.new
28
+ end
29
+
30
+ # Main encode method
31
+ def encode(data, depth = 0)
32
+ check_depth(depth)
33
+ encode_value(data, depth)
34
+ end
35
+
36
+ private
37
+
38
+ def encode_value(data, depth)
39
+ case data
40
+ when NilClass then "null"
41
+ when TrueClass, FalseClass then data.to_s
42
+ when Numeric then encode_number(data)
43
+ when String then encode_string(data)
44
+ when Hash then encode_object(data, depth)
45
+ when Array then encode_array(data, depth)
46
+ else
47
+ raise EncodeError, "Unsupported type: #{data.class}"
48
+ end
49
+ end
50
+
51
+ def check_depth(depth)
52
+ raise EncodeError, "Maximum nesting depth #{MAX_DEPTH} exceeded" if depth > MAX_DEPTH
53
+ end
54
+
55
+ def encode_number(num)
56
+ return "0" if num.zero?
57
+ return "null" if num.respond_to?(:nan?) && num.nan?
58
+ return "null" if num.respond_to?(:infinite?) && num.infinite?
59
+
60
+ num.to_s
61
+ end
62
+
63
+ def encode_string(str)
64
+ return '""' if str.empty?
65
+ return quote(str) if needs_quoting?(str)
66
+
67
+ str
68
+ end
69
+
70
+ def encode_object(hash, depth)
71
+ return "{}" if hash.empty?
72
+
73
+ check_circular_reference(hash)
74
+
75
+ lines = hash.map do |key, value|
76
+ key_str = key.to_s
77
+
78
+ # Temporarily increase indent for nested values
79
+ @indent_level += 1
80
+ value_str = encode(value, depth + 1)
81
+ @indent_level -= 1
82
+
83
+ # Handle multi-line values (nested structures)
84
+ if value_str.include?("\n")
85
+ "#{current_indent}#{key_str}:\n#{value_str}"
86
+ else
87
+ "#{current_indent}#{key_str}: #{value_str}"
88
+ end
89
+ end
90
+
91
+ clear_circular_reference(hash)
92
+ lines.join("\n")
93
+ end
94
+
95
+ def check_circular_reference(obj)
96
+ obj_id = obj.object_id
97
+ raise EncodeError, "Circular reference detected" if @visited.include?(obj_id)
98
+
99
+ @visited.add(obj_id)
100
+ end
101
+
102
+ def clear_circular_reference(obj)
103
+ @visited.delete(obj.object_id)
104
+ end
105
+
106
+ def current_indent
107
+ " " * (@options[:indent] * @indent_level)
108
+ end
109
+
110
+ def indent_lines(text, additional_levels = 0)
111
+ indent_str = " " * (@options[:indent] * (@indent_level + additional_levels))
112
+ text.lines.map { |line| "#{indent_str}#{line}" }.join
113
+ end
114
+
115
+ def encode_array(array, depth)
116
+ return "[]" if array.empty?
117
+
118
+ check_array_size(array)
119
+ check_circular_reference(array)
120
+
121
+ result = if tabular?(array)
122
+ encode_tabular_array(array, depth)
123
+ else
124
+ encode_list_array(array, depth)
125
+ end
126
+
127
+ clear_circular_reference(array)
128
+ result
129
+ end
130
+
131
+ def check_array_size(array)
132
+ return unless array.size > MAX_ARRAY_SIZE
133
+
134
+ raise EncodeError, "Array size #{array.size} exceeds maximum #{MAX_ARRAY_SIZE}"
135
+ end
136
+
137
+ def primitive?(value)
138
+ value.is_a?(String) ||
139
+ value.is_a?(Numeric) ||
140
+ value.is_a?(TrueClass) ||
141
+ value.is_a?(FalseClass) ||
142
+ value.nil?
143
+ end
144
+
145
+ def tabular?(array)
146
+ return false unless array.all? { |el| el.is_a?(Hash) }
147
+ return false if array.empty?
148
+
149
+ keys = array.first.keys
150
+ array.all? do |element|
151
+ element.keys == keys &&
152
+ element.values.all? { |v| primitive?(v) }
153
+ end
154
+ end
155
+
156
+ def encode_tabular_array(array, _depth)
157
+ keys = array.first.keys
158
+ length_marker = @options[:length_marker] ? "[#{array.size},]" : ""
159
+ fields = keys.map(&:to_s).join(@options[:delimiter])
160
+ header = "#{current_indent}#{length_marker}{#{fields}}:"
161
+
162
+ rows = array.map do |row|
163
+ values = keys.map do |key|
164
+ value = row[key]
165
+ encode_primitive_value(value)
166
+ end
167
+ "#{current_indent}#{values.join(@options[:delimiter])}"
168
+ end
169
+
170
+ ([header] + rows).join("\n")
171
+ end
172
+
173
+ def encode_primitive_value(value)
174
+ case value
175
+ when NilClass then "null"
176
+ when TrueClass, FalseClass then value.to_s
177
+ when Numeric then encode_number(value)
178
+ when String then encode_string(value)
179
+ else
180
+ raise EncodeError, "Non-primitive value in tabular array: #{value.class}"
181
+ end
182
+ end
183
+
184
+ def encode_list_array(array, depth)
185
+ length_marker = @options[:length_marker] ? "[#{array.size}]" : "[]"
186
+ header = "#{current_indent}#{length_marker}:"
187
+
188
+ @indent_level += 1
189
+ elements = array.map do |item|
190
+ encoded = encode(item, depth + 1)
191
+ # If the encoded value doesn't already have indent (single line primitive), add it
192
+ if encoded.include?("\n")
193
+ encoded
194
+ else
195
+ "#{current_indent}#{encoded}"
196
+ end
197
+ end
198
+ @indent_level -= 1
199
+
200
+ ([header] + elements).join("\n")
201
+ end
202
+
203
+ QUOTE_PATTERN = /
204
+ \A\s | # Leading whitespace
205
+ \s\z | # Trailing whitespace
206
+ \A(null|true|false)\z | # TOON keywords
207
+ \A-?\d+(\.\d+)?\z | # Numeric pattern
208
+ [:\[\]{},] # Structural characters
209
+ /x
210
+
211
+ def needs_quoting?(str)
212
+ str.match?(QUOTE_PATTERN)
213
+ end
214
+
215
+ def quote(str)
216
+ # Escape any double quotes in the string
217
+ escaped = str.gsub('"', '\"')
218
+ "\"#{escaped}\""
219
+ end
220
+ end
221
+ end
@@ -0,0 +1,36 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ToonFormat
4
+ # Base error class for all ToonFormat errors
5
+ class Error < StandardError; end
6
+
7
+ # Raised when encoding fails
8
+ class EncodeError < Error; end
9
+
10
+ # Raised when decoding fails
11
+ class DecodeError < Error; end
12
+
13
+ # Raised when strict mode validation fails
14
+ class ValidationError < Error; end
15
+
16
+ # Raised when parsing encounters syntax errors
17
+ class ParseError < DecodeError
18
+ attr_reader :line_number, :column
19
+
20
+ def initialize(message, line: nil, column: nil)
21
+ @line_number = line
22
+ @column = column
23
+ super(format_message(message))
24
+ end
25
+
26
+ private
27
+
28
+ def format_message(message)
29
+ return message unless @line_number
30
+
31
+ location = " at line #{@line_number}"
32
+ location += ", column #{@column}" if @column
33
+ "#{message}#{location}"
34
+ end
35
+ end
36
+ end
@@ -0,0 +1,269 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ToonFormat
4
+ # Parses TOON format strings into Ruby objects
5
+ class Parser
6
+ def initialize(input, strict: true)
7
+ @input = input
8
+ @lines = input.split("\n")
9
+ @position = 0
10
+ @strict = strict
11
+ end
12
+
13
+ def parse
14
+ return nil if @lines.empty?
15
+
16
+ # Check if the entire input is an object (multiple key-value pairs at root level)
17
+ if looks_like_root_object?
18
+ parse_root_object
19
+ else
20
+ parse_value
21
+ end
22
+ end
23
+
24
+ def looks_like_root_object?
25
+ # If we have multiple lines and they all look like key-value pairs at the same indent level
26
+ return false if @lines.size < 2
27
+
28
+ @lines.all? do |line|
29
+ line.strip.empty? || line =~ KEY_VALUE_PATTERN || line =~ KEY_ONLY_PATTERN
30
+ end
31
+ end
32
+
33
+ def parse_root_object
34
+ result = {}
35
+
36
+ while current_line
37
+ line = current_line
38
+
39
+ if line.strip.empty?
40
+ advance
41
+ next
42
+ end
43
+
44
+ if line =~ KEY_VALUE_PATTERN
45
+ key = ::Regexp.last_match(1)
46
+ value_str = ::Regexp.last_match(2).strip
47
+ advance
48
+ # Check if value is an empty array or empty object
49
+ value = if value_str == "[]"
50
+ []
51
+ elsif value_str == "{}"
52
+ {}
53
+ else
54
+ parse_primitive(value_str)
55
+ end
56
+ result[key.to_sym] = value
57
+ elsif line =~ KEY_ONLY_PATTERN
58
+ key = ::Regexp.last_match(1)
59
+ advance
60
+ result[key.to_sym] = parse_nested_value
61
+ else
62
+ break
63
+ end
64
+ end
65
+
66
+ result
67
+ end
68
+
69
+ private
70
+
71
+ def current_line
72
+ @lines[@position]
73
+ end
74
+
75
+ def advance
76
+ @position += 1
77
+ end
78
+
79
+ def peek_line(offset = 1)
80
+ @lines[@position + offset]
81
+ end
82
+
83
+ TABULAR_ARRAY_PATTERN = /\A\s*\[(\d+),\]\{(.+)\}:/
84
+ LIST_ARRAY_PATTERN = /\A\s*\[(\d+)\]:/
85
+ EMPTY_ARRAY_PATTERN = /\A\s*\[\]/
86
+ KEY_VALUE_PATTERN = /\A\s*(\w+):\s*(.+)/
87
+ KEY_ONLY_PATTERN = /\A\s*(\w+):\s*$/
88
+
89
+ def parse_value
90
+ line = current_line
91
+ return nil if line.nil?
92
+
93
+ case line
94
+ when TABULAR_ARRAY_PATTERN
95
+ length = ::Regexp.last_match(1).to_i
96
+ fields = ::Regexp.last_match(2).split(",").map(&:strip)
97
+ advance
98
+ parse_tabular_array(length, fields)
99
+ when LIST_ARRAY_PATTERN
100
+ length = ::Regexp.last_match(1).to_i
101
+ advance
102
+ parse_list_array(length)
103
+ when EMPTY_ARRAY_PATTERN
104
+ advance
105
+ []
106
+ when KEY_ONLY_PATTERN
107
+ ::Regexp.last_match(1)
108
+ advance
109
+ parse_nested_value
110
+ when KEY_VALUE_PATTERN
111
+ key = ::Regexp.last_match(1)
112
+ value_str = ::Regexp.last_match(2).strip
113
+ advance
114
+ # Check if value is an empty array or empty object
115
+ value = if value_str == "[]"
116
+ []
117
+ elsif value_str == "{}"
118
+ {}
119
+ else
120
+ parse_primitive(value_str)
121
+ end
122
+ { key.to_sym => value }
123
+ else
124
+ advance
125
+ parse_primitive(line.strip)
126
+ end
127
+ end
128
+
129
+ def parse_nested_value
130
+ # Parse a nested value (could be object or array)
131
+ line = current_line
132
+ return nil if line.nil?
133
+
134
+ case line
135
+ when TABULAR_ARRAY_PATTERN
136
+ length = ::Regexp.last_match(1).to_i
137
+ fields = ::Regexp.last_match(2).split(",").map(&:strip)
138
+ advance
139
+ parse_tabular_array(length, fields)
140
+ when LIST_ARRAY_PATTERN
141
+ length = ::Regexp.last_match(1).to_i
142
+ advance
143
+ parse_list_array(length)
144
+ when EMPTY_ARRAY_PATTERN
145
+ advance
146
+ []
147
+ else
148
+ parse_object_lines
149
+ end
150
+ end
151
+
152
+ def parse_object_lines
153
+ result = {}
154
+ base_indent = get_indent(current_line)
155
+
156
+ while current_line && get_indent(current_line) >= base_indent
157
+ if current_line =~ KEY_VALUE_PATTERN
158
+ key = ::Regexp.last_match(1)
159
+ value_str = ::Regexp.last_match(2)
160
+ advance
161
+ result[key.to_sym] = parse_primitive(value_str)
162
+ elsif current_line =~ KEY_ONLY_PATTERN
163
+ key = ::Regexp.last_match(1)
164
+ advance
165
+ result[key.to_sym] = parse_nested_value
166
+ else
167
+ break
168
+ end
169
+ end
170
+
171
+ result
172
+ end
173
+
174
+ def get_indent(line)
175
+ return 0 if line.nil?
176
+
177
+ line.match(/\A(\s*)/)[1].length
178
+ end
179
+
180
+ def parse_primitive(value)
181
+ stripped = value.strip
182
+
183
+ case stripped
184
+ when "null" then nil
185
+ when "true" then true
186
+ when "false" then false
187
+ when /\A-?\d+\z/ then stripped.to_i
188
+ when /\A-?\d+\.\d+\z/ then stripped.to_f
189
+ when /\A"(.*)"\z/ then unescape_string(::Regexp.last_match(1))
190
+ else stripped
191
+ end
192
+ end
193
+
194
+ def unescape_string(str)
195
+ # Unescape double quotes
196
+ str.gsub('\"', '"')
197
+ end
198
+
199
+ def parse_tabular_array(length, fields)
200
+ result = []
201
+
202
+ length.times do
203
+ line = current_line
204
+ raise ParseError.new("Unexpected end of input in tabular array", line: @position + 1) if line.nil?
205
+
206
+ values = split_row(line.strip)
207
+
208
+ if @strict && values.size != fields.size
209
+ raise ParseError.new(
210
+ "Field count mismatch: expected #{fields.size}, got #{values.size}",
211
+ line: @position + 1
212
+ )
213
+ end
214
+
215
+ row = {}
216
+ fields.each_with_index do |field, index|
217
+ row[field.to_sym] = parse_primitive(values[index] || "")
218
+ end
219
+
220
+ result << row
221
+ advance
222
+ end
223
+
224
+ result
225
+ end
226
+
227
+ def split_row(line)
228
+ # Split by comma, respecting quoted strings
229
+ values = []
230
+ current = ""
231
+ in_quotes = false
232
+
233
+ line.each_char do |char|
234
+ case char
235
+ when '"'
236
+ in_quotes = !in_quotes
237
+ current += char
238
+ when ","
239
+ if in_quotes
240
+ current += char
241
+ else
242
+ values << current
243
+ current = ""
244
+ end
245
+ else
246
+ current += char
247
+ end
248
+ end
249
+
250
+ values << current unless current.empty?
251
+ values.map(&:strip)
252
+ end
253
+
254
+ def parse_list_array(length)
255
+ result = []
256
+ get_indent(current_line)
257
+
258
+ length.times do
259
+ raise ParseError.new("Unexpected end of input in list array", line: @position + 1) if current_line.nil?
260
+
261
+ # Parse each element
262
+ element = parse_value
263
+ result << element
264
+ end
265
+
266
+ result
267
+ end
268
+ end
269
+ end
@@ -0,0 +1,16 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ToonFormat
4
+ module Rails
5
+ # Extensions for ActiveRecord models
6
+ module Extensions
7
+ # Convert model to TOON format
8
+ #
9
+ # @param options [Hash] Encoding options
10
+ # @return [String] TOON formatted string
11
+ def to_toon(**options)
12
+ ToonFormat.encode(as_json, **options)
13
+ end
14
+ end
15
+ end
16
+ end
@@ -0,0 +1,15 @@
1
+ # frozen_string_literal: true
2
+
3
+ if defined?(Rails)
4
+ require "toon_format/rails/extensions"
5
+
6
+ module ToonFormat
7
+ class Railtie < Rails::Railtie
8
+ initializer "toon_format.active_record" do
9
+ ActiveSupport.on_load(:active_record) do
10
+ include ToonFormat::Rails::Extensions
11
+ end
12
+ end
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,68 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ToonFormat
4
+ # Validates decoded data in strict mode
5
+ class Validator
6
+ # Validate decoded data against TOON string
7
+ #
8
+ # @param toon_string [String] Original TOON string
9
+ # @param decoded_data [Object] Decoded Ruby object
10
+ # @raise [ValidationError] If validation fails
11
+ def self.validate!(toon_string, decoded_data)
12
+ validate_array_lengths(toon_string, decoded_data)
13
+ end
14
+
15
+ def self.validate_array_lengths(toon_string, data)
16
+ # Extract all length markers from TOON string
17
+ length_markers = toon_string.scan(/\[(\d+)(?:,)?\]/).flatten.map(&:to_i)
18
+ arrays = find_non_empty_arrays(data)
19
+
20
+ # Only validate if we have length markers
21
+ return if length_markers.empty?
22
+
23
+ if length_markers.size != arrays.size
24
+ # This might be okay if there are nested arrays
25
+ # For now, just validate that we have at least as many arrays as markers
26
+ return if arrays.size >= length_markers.size
27
+
28
+ raise ValidationError,
29
+ "Length marker count (#{length_markers.size}) does not match array count (#{arrays.size})"
30
+ end
31
+
32
+ length_markers.each_with_index do |declared_length, index|
33
+ actual_length = arrays[index]&.size || 0
34
+
35
+ next if actual_length == declared_length
36
+
37
+ raise ValidationError,
38
+ "Array #{index}: declared length #{declared_length}, actual length #{actual_length}"
39
+ end
40
+ end
41
+
42
+ def self.find_non_empty_arrays(data, result = [])
43
+ case data
44
+ when Array
45
+ result << data unless data.empty?
46
+ data.each { |item| find_non_empty_arrays(item, result) }
47
+ when Hash
48
+ data.each_value { |value| find_non_empty_arrays(value, result) }
49
+ end
50
+
51
+ result
52
+ end
53
+
54
+ def self.find_arrays(data, result = [])
55
+ case data
56
+ when Array
57
+ result << data
58
+ data.each { |item| find_arrays(item, result) }
59
+ when Hash
60
+ data.each_value { |value| find_arrays(value, result) }
61
+ end
62
+
63
+ result
64
+ end
65
+
66
+ private_class_method :validate_array_lengths, :find_non_empty_arrays
67
+ end
68
+ end
@@ -0,0 +1,5 @@
1
+ # frozen_string_literal: true
2
+
3
+ module ToonFormat
4
+ VERSION = "0.1.0"
5
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ require_relative "toon_format/version"
4
+ require_relative "toon_format/errors"
5
+ require_relative "toon_format/encoder"
6
+ require_relative "toon_format/parser"
7
+ require_relative "toon_format/decoder"
8
+ require_relative "toon_format/validator"
9
+
10
+ require_relative "toon_format/rails/extensions" if defined?(ActiveRecord::Base)
11
+ require_relative "toon_format/railtie" if defined?(Rails)
12
+
13
+ require "json"
14
+
15
+ module ToonFormat
16
+ class << self
17
+ # Encode Ruby object to TOON format string
18
+ #
19
+ # @param data [Object] Ruby object to encode
20
+ # @param options [Hash] Encoding options
21
+ # @option options [String] :delimiter (',') Field delimiter
22
+ # @option options [Integer] :indent (2) Indentation spaces
23
+ # @option options [Boolean] :length_marker (true) Include array lengths
24
+ #
25
+ # @return [String] TOON formatted string
26
+ #
27
+ # @raise [ToonFormat::EncodeError] If data cannot be encoded
28
+ def encode(data, **options)
29
+ Encoder.encode(data, options)
30
+ end
31
+
32
+ # Decode TOON format string to Ruby object
33
+ #
34
+ # @param toon_string [String] TOON formatted string
35
+ # @param strict [Boolean] Enable strict validation (default: true)
36
+ #
37
+ # @return [Object] Decoded Ruby object
38
+ #
39
+ # @raise [ToonFormat::DecodeError] If decoding fails
40
+ # @raise [ToonFormat::ValidationError] If strict validation fails
41
+ def decode(toon_string, strict: true)
42
+ Decoder.decode(toon_string, strict: strict)
43
+ end
44
+
45
+ # Estimate token savings compared to JSON
46
+ #
47
+ # @param data [Object] Ruby object to analyze
48
+ #
49
+ # @return [Hash] Statistics including token counts and savings percentage
50
+ def estimate_savings(data)
51
+ json_str = JSON.generate(data)
52
+ toon_str = encode(data)
53
+
54
+ json_size = json_str.bytesize
55
+ toon_size = toon_str.bytesize
56
+
57
+ {
58
+ json_tokens: estimate_tokens(json_str),
59
+ toon_tokens: estimate_tokens(toon_str),
60
+ savings_percent: ((json_size - toon_size) / json_size.to_f * 100).round(1),
61
+ json_size: json_size,
62
+ toon_size: toon_size
63
+ }
64
+ end
65
+
66
+ private
67
+
68
+ def estimate_tokens(text)
69
+ # Simple approximation: ~4 characters per token
70
+ (text.bytesize / 4.0).ceil
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,6 @@
1
+ module Toon
2
+ module Format
3
+ VERSION: String
4
+ # See the writing guide of rbs: https://github.com/ruby/rbs#guides
5
+ end
6
+ end