cton 0.1.1 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/CHANGELOG.md +33 -0
- data/README.md +258 -35
- data/bench/encode_decode_bench.rb +65 -0
- data/lib/cton/decoder.rb +98 -242
- data/lib/cton/encoder.rb +171 -41
- data/lib/cton/version.rb +1 -1
- data/lib/cton.rb +17 -3
- metadata +4 -2
data/lib/cton/encoder.rb
CHANGED
|
@@ -1,27 +1,35 @@
|
|
|
1
1
|
# frozen_string_literal: true
|
|
2
2
|
|
|
3
3
|
require "stringio"
|
|
4
|
+
require "time"
|
|
5
|
+
require "date"
|
|
4
6
|
|
|
5
7
|
module Cton
|
|
6
8
|
class Encoder
|
|
7
|
-
SAFE_TOKEN = /\A[0-9A-Za-z_.:-]+\z
|
|
8
|
-
NUMERIC_TOKEN = /\A-?(?:\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?\z
|
|
9
|
+
SAFE_TOKEN = /\A[0-9A-Za-z_.:-]+\z/
|
|
10
|
+
NUMERIC_TOKEN = /\A-?(?:\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?\z/
|
|
9
11
|
RESERVED_LITERALS = %w[true false null].freeze
|
|
10
12
|
FLOAT_DECIMAL_PRECISION = Float::DIG
|
|
11
13
|
|
|
12
|
-
def initialize(separator: "\n")
|
|
14
|
+
def initialize(separator: "\n", pretty: false, decimal_mode: :fast)
|
|
13
15
|
@separator = separator || ""
|
|
16
|
+
@pretty = pretty
|
|
17
|
+
@decimal_mode = decimal_mode
|
|
18
|
+
raise ArgumentError, "decimal_mode must be :fast or :precise" unless %i[fast precise].include?(@decimal_mode)
|
|
19
|
+
|
|
20
|
+
@indent_level = 0
|
|
21
|
+
@table_schema_cache = {}
|
|
14
22
|
end
|
|
15
23
|
|
|
16
|
-
def encode(payload)
|
|
17
|
-
@io = StringIO.new
|
|
24
|
+
def encode(payload, io: nil)
|
|
25
|
+
@io = io || StringIO.new
|
|
18
26
|
encode_root(payload)
|
|
19
|
-
@io.string
|
|
27
|
+
@io.string if @io.is_a?(StringIO)
|
|
20
28
|
end
|
|
21
29
|
|
|
22
30
|
private
|
|
23
31
|
|
|
24
|
-
attr_reader :separator, :io
|
|
32
|
+
attr_reader :separator, :io, :pretty, :indent_level, :decimal_mode
|
|
25
33
|
|
|
26
34
|
def encode_root(value)
|
|
27
35
|
case value
|
|
@@ -43,6 +51,12 @@ module Cton
|
|
|
43
51
|
end
|
|
44
52
|
|
|
45
53
|
def encode_value(value, context:)
|
|
54
|
+
if defined?(Set) && value.is_a?(Set)
|
|
55
|
+
value = value.to_a
|
|
56
|
+
elsif defined?(OpenStruct) && value.is_a?(OpenStruct)
|
|
57
|
+
value = value.to_h
|
|
58
|
+
end
|
|
59
|
+
|
|
46
60
|
case value
|
|
47
61
|
when Hash
|
|
48
62
|
encode_object(value)
|
|
@@ -61,13 +75,19 @@ module Cton
|
|
|
61
75
|
end
|
|
62
76
|
|
|
63
77
|
io << "("
|
|
78
|
+
indent if pretty
|
|
64
79
|
first = true
|
|
65
80
|
hash.each do |key, value|
|
|
66
|
-
|
|
81
|
+
if first
|
|
82
|
+
first = false
|
|
83
|
+
else
|
|
84
|
+
io << ","
|
|
85
|
+
newline if pretty
|
|
86
|
+
end
|
|
67
87
|
io << format_key(key) << "="
|
|
68
88
|
encode_value(value, context: :object)
|
|
69
|
-
first = false
|
|
70
89
|
end
|
|
90
|
+
outdent if pretty
|
|
71
91
|
io << ")"
|
|
72
92
|
end
|
|
73
93
|
|
|
@@ -80,8 +100,8 @@ module Cton
|
|
|
80
100
|
|
|
81
101
|
io << "[" << length.to_s << "]"
|
|
82
102
|
|
|
83
|
-
if
|
|
84
|
-
encode_table(list)
|
|
103
|
+
if (header = table_schema_for(list))
|
|
104
|
+
encode_table(list, header)
|
|
85
105
|
else
|
|
86
106
|
io << "="
|
|
87
107
|
if list.all? { |value| scalar?(value) }
|
|
@@ -92,65 +112,102 @@ module Cton
|
|
|
92
112
|
end
|
|
93
113
|
end
|
|
94
114
|
|
|
95
|
-
def encode_table(rows)
|
|
96
|
-
header = rows.first.keys
|
|
115
|
+
def encode_table(rows, header)
|
|
97
116
|
io << "{"
|
|
98
117
|
io << header.map { |key| format_key(key) }.join(",")
|
|
99
118
|
io << "}="
|
|
100
119
|
|
|
120
|
+
indent if pretty
|
|
101
121
|
first_row = true
|
|
102
122
|
rows.each do |row|
|
|
103
|
-
|
|
123
|
+
if first_row
|
|
124
|
+
first_row = false
|
|
125
|
+
else
|
|
126
|
+
io << ";"
|
|
127
|
+
newline if pretty
|
|
128
|
+
end
|
|
129
|
+
|
|
104
130
|
first_col = true
|
|
105
131
|
header.each do |field|
|
|
106
132
|
io << "," unless first_col
|
|
107
133
|
encode_scalar(row.fetch(field))
|
|
108
134
|
first_col = false
|
|
109
135
|
end
|
|
110
|
-
first_row = false
|
|
111
136
|
end
|
|
137
|
+
outdent if pretty
|
|
112
138
|
end
|
|
113
139
|
|
|
114
140
|
def encode_scalar_list(list)
|
|
115
|
-
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
141
|
+
if pretty
|
|
142
|
+
indent
|
|
143
|
+
first = true
|
|
144
|
+
list.each do |value|
|
|
145
|
+
if first
|
|
146
|
+
first = false
|
|
147
|
+
else
|
|
148
|
+
io << ","
|
|
149
|
+
newline
|
|
150
|
+
end
|
|
151
|
+
encode_scalar(value)
|
|
152
|
+
end
|
|
153
|
+
outdent
|
|
154
|
+
else
|
|
155
|
+
first = true
|
|
156
|
+
if fast_scalar_stream?(list)
|
|
157
|
+
io << fast_scalar_stream(list)
|
|
158
|
+
else
|
|
159
|
+
list.each do |value|
|
|
160
|
+
io << "," unless first
|
|
161
|
+
encode_scalar(value)
|
|
162
|
+
first = false
|
|
163
|
+
end
|
|
164
|
+
end
|
|
120
165
|
end
|
|
121
166
|
end
|
|
122
167
|
|
|
123
168
|
def encode_mixed_list(list)
|
|
169
|
+
indent if pretty
|
|
124
170
|
first = true
|
|
125
171
|
list.each do |value|
|
|
126
|
-
|
|
172
|
+
if first
|
|
173
|
+
first = false
|
|
174
|
+
else
|
|
175
|
+
io << ","
|
|
176
|
+
newline if pretty
|
|
177
|
+
end
|
|
127
178
|
encode_value(value, context: :array)
|
|
128
|
-
first = false
|
|
129
179
|
end
|
|
180
|
+
outdent if pretty
|
|
130
181
|
end
|
|
131
182
|
|
|
132
183
|
def encode_scalar(value)
|
|
184
|
+
io << scalar_to_string(value)
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
def scalar_to_string(value)
|
|
133
188
|
case value
|
|
134
189
|
when String
|
|
135
|
-
|
|
190
|
+
format_string(value)
|
|
136
191
|
when TrueClass, FalseClass
|
|
137
|
-
|
|
192
|
+
value ? "true" : "false"
|
|
138
193
|
when NilClass
|
|
139
|
-
|
|
194
|
+
"null"
|
|
140
195
|
when Numeric
|
|
141
|
-
|
|
196
|
+
format_number(value)
|
|
197
|
+
when Time, Date
|
|
198
|
+
format_string(value.iso8601)
|
|
142
199
|
else
|
|
143
200
|
raise EncodeError, "Unsupported value: #{value.class}"
|
|
144
201
|
end
|
|
145
202
|
end
|
|
146
203
|
|
|
147
|
-
def
|
|
204
|
+
def format_string(value)
|
|
148
205
|
if value.empty?
|
|
149
|
-
|
|
206
|
+
'""'
|
|
150
207
|
elsif string_needs_quotes?(value)
|
|
151
|
-
|
|
208
|
+
quote_string(value)
|
|
152
209
|
else
|
|
153
|
-
|
|
210
|
+
value
|
|
154
211
|
end
|
|
155
212
|
end
|
|
156
213
|
|
|
@@ -172,7 +229,7 @@ module Cton
|
|
|
172
229
|
end
|
|
173
230
|
|
|
174
231
|
def normalize_decimal_string(string)
|
|
175
|
-
stripped = string.start_with?("+") ? string[1
|
|
232
|
+
stripped = string.start_with?("+") ? string[1..] : string
|
|
176
233
|
return "0" if zero_string?(stripped)
|
|
177
234
|
|
|
178
235
|
if stripped.include?(".")
|
|
@@ -188,6 +245,17 @@ module Cton
|
|
|
188
245
|
end
|
|
189
246
|
|
|
190
247
|
def float_decimal_string(value)
|
|
248
|
+
return precise_float_decimal_string(value) if decimal_mode == :precise
|
|
249
|
+
|
|
250
|
+
decimal = value.to_s
|
|
251
|
+
if decimal.include?("e") || decimal.include?("E")
|
|
252
|
+
precise_float_decimal_string(value)
|
|
253
|
+
else
|
|
254
|
+
decimal
|
|
255
|
+
end
|
|
256
|
+
end
|
|
257
|
+
|
|
258
|
+
def precise_float_decimal_string(value)
|
|
191
259
|
if defined?(BigDecimal)
|
|
192
260
|
BigDecimal(value.to_s).to_s("F")
|
|
193
261
|
else
|
|
@@ -197,14 +265,14 @@ module Cton
|
|
|
197
265
|
|
|
198
266
|
def format_key(key)
|
|
199
267
|
key_string = key.to_s
|
|
200
|
-
unless SAFE_TOKEN.match?(key_string)
|
|
201
|
-
|
|
202
|
-
end
|
|
268
|
+
raise EncodeError, "Invalid key: #{key_string.inspect}" unless SAFE_TOKEN.match?(key_string)
|
|
269
|
+
|
|
203
270
|
key_string
|
|
204
271
|
end
|
|
205
272
|
|
|
206
273
|
def string_needs_quotes?(value)
|
|
207
274
|
return true unless SAFE_TOKEN.match?(value)
|
|
275
|
+
|
|
208
276
|
RESERVED_LITERALS.include?(value) || numeric_like?(value)
|
|
209
277
|
end
|
|
210
278
|
|
|
@@ -229,19 +297,81 @@ module Cton
|
|
|
229
297
|
end
|
|
230
298
|
|
|
231
299
|
def scalar?(value)
|
|
232
|
-
value.is_a?(String) || value.is_a?(Numeric) || value == true || value == false || value.nil?
|
|
300
|
+
value.is_a?(String) || value.is_a?(Numeric) || value == true || value == false || value.nil? || value.is_a?(Time) || value.is_a?(Date)
|
|
301
|
+
end
|
|
302
|
+
|
|
303
|
+
def table_schema_for(rows)
|
|
304
|
+
cache_lookup = @table_schema_cache.fetch(rows.object_id, :__missing__)
|
|
305
|
+
return cache_lookup unless cache_lookup == :__missing__
|
|
306
|
+
|
|
307
|
+
schema = compute_table_schema(rows)
|
|
308
|
+
@table_schema_cache[rows.object_id] = schema
|
|
233
309
|
end
|
|
234
310
|
|
|
235
|
-
def
|
|
236
|
-
return
|
|
311
|
+
def compute_table_schema(rows)
|
|
312
|
+
return nil if rows.empty?
|
|
237
313
|
|
|
238
314
|
first = rows.first
|
|
239
|
-
return
|
|
315
|
+
return nil unless first.is_a?(Hash) && !first.empty?
|
|
316
|
+
|
|
317
|
+
header = first.keys.freeze
|
|
240
318
|
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
319
|
+
rows.each do |row|
|
|
320
|
+
return nil unless row.is_a?(Hash)
|
|
321
|
+
return nil unless row.keys == header
|
|
322
|
+
return nil unless row.values.all? { |val| scalar?(val) }
|
|
244
323
|
end
|
|
324
|
+
|
|
325
|
+
header
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
def fast_scalar_stream?(list)
|
|
329
|
+
!pretty && list.length > 4 && homogeneous_scalar_tokens?(list)
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
def homogeneous_scalar_tokens?(list)
|
|
333
|
+
first_class = nil
|
|
334
|
+
list.all? do |value|
|
|
335
|
+
return false unless scalar?(value)
|
|
336
|
+
|
|
337
|
+
token_class = value.class
|
|
338
|
+
first_class ||= token_class
|
|
339
|
+
token_class == first_class && token_does_not_require_quotes?(value)
|
|
340
|
+
end
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
def token_does_not_require_quotes?(value)
|
|
344
|
+
case value
|
|
345
|
+
when String
|
|
346
|
+
!value.empty? && !string_needs_quotes?(value)
|
|
347
|
+
when Integer, TrueClass, FalseClass, NilClass
|
|
348
|
+
true
|
|
349
|
+
else
|
|
350
|
+
false
|
|
351
|
+
end
|
|
352
|
+
end
|
|
353
|
+
|
|
354
|
+
def fast_scalar_stream(list)
|
|
355
|
+
buffer = String.new
|
|
356
|
+
list.each_with_index do |value, index|
|
|
357
|
+
buffer << "," unless index.zero?
|
|
358
|
+
buffer << scalar_to_string(value)
|
|
359
|
+
end
|
|
360
|
+
buffer
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
def indent
|
|
364
|
+
@indent_level += 1
|
|
365
|
+
newline
|
|
366
|
+
end
|
|
367
|
+
|
|
368
|
+
def outdent
|
|
369
|
+
@indent_level -= 1
|
|
370
|
+
newline
|
|
371
|
+
end
|
|
372
|
+
|
|
373
|
+
def newline
|
|
374
|
+
io << "\n" << (" " * indent_level)
|
|
245
375
|
end
|
|
246
376
|
end
|
|
247
377
|
end
|
data/lib/cton/version.rb
CHANGED
data/lib/cton.rb
CHANGED
|
@@ -12,9 +12,24 @@ module Cton
|
|
|
12
12
|
|
|
13
13
|
module_function
|
|
14
14
|
|
|
15
|
-
def dump(payload,
|
|
15
|
+
def dump(payload, *args)
|
|
16
|
+
io = nil
|
|
17
|
+
options = {}
|
|
18
|
+
|
|
19
|
+
args.each do |arg|
|
|
20
|
+
if arg.is_a?(Hash)
|
|
21
|
+
options.merge!(arg)
|
|
22
|
+
else
|
|
23
|
+
io = arg
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
27
|
+
io ||= options[:io]
|
|
28
|
+
|
|
16
29
|
separator = options.fetch(:separator, "\n")
|
|
17
|
-
|
|
30
|
+
pretty = options.fetch(:pretty, false)
|
|
31
|
+
decimal_mode = options.fetch(:decimal_mode, :fast)
|
|
32
|
+
Encoder.new(separator: separator, pretty: pretty, decimal_mode: decimal_mode).encode(payload, io: io)
|
|
18
33
|
end
|
|
19
34
|
alias generate dump
|
|
20
35
|
|
|
@@ -23,4 +38,3 @@ module Cton
|
|
|
23
38
|
end
|
|
24
39
|
alias parse load
|
|
25
40
|
end
|
|
26
|
-
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: cton
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.3.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Davide Santangelo
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2025-11-
|
|
11
|
+
date: 2025-11-20 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: CTON provides a JSON-compatible, token-efficient text representation
|
|
14
14
|
optimized for LLM prompts.
|
|
@@ -25,6 +25,7 @@ files:
|
|
|
25
25
|
- LICENSE.txt
|
|
26
26
|
- README.md
|
|
27
27
|
- Rakefile
|
|
28
|
+
- bench/encode_decode_bench.rb
|
|
28
29
|
- lib/cton.rb
|
|
29
30
|
- lib/cton/decoder.rb
|
|
30
31
|
- lib/cton/encoder.rb
|
|
@@ -37,6 +38,7 @@ metadata:
|
|
|
37
38
|
homepage_uri: https://github.com/davidesantangelo/cton
|
|
38
39
|
source_code_uri: https://github.com/davidesantangelo/cton
|
|
39
40
|
changelog_uri: https://github.com/davidesantangelo/cton/blob/master/CHANGELOG.md
|
|
41
|
+
rubygems_mfa_required: 'true'
|
|
40
42
|
post_install_message:
|
|
41
43
|
rdoc_options: []
|
|
42
44
|
require_paths:
|