cton 0.1.1 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/cton/encoder.rb CHANGED
@@ -1,27 +1,35 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  require "stringio"
4
+ require "time"
5
+ require "date"
4
6
 
5
7
  module Cton
6
8
  class Encoder
7
- SAFE_TOKEN = /\A[0-9A-Za-z_.:-]+\z/.freeze
8
- NUMERIC_TOKEN = /\A-?(?:\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?\z/.freeze
9
+ SAFE_TOKEN = /\A[0-9A-Za-z_.:-]+\z/
10
+ NUMERIC_TOKEN = /\A-?(?:\d+)(?:\.\d+)?(?:[eE][+-]?\d+)?\z/
9
11
  RESERVED_LITERALS = %w[true false null].freeze
10
12
  FLOAT_DECIMAL_PRECISION = Float::DIG
11
13
 
12
- def initialize(separator: "\n")
14
+ def initialize(separator: "\n", pretty: false, decimal_mode: :fast)
13
15
  @separator = separator || ""
16
+ @pretty = pretty
17
+ @decimal_mode = decimal_mode
18
+ raise ArgumentError, "decimal_mode must be :fast or :precise" unless %i[fast precise].include?(@decimal_mode)
19
+
20
+ @indent_level = 0
21
+ @table_schema_cache = {}
14
22
  end
15
23
 
16
- def encode(payload)
17
- @io = StringIO.new
24
+ def encode(payload, io: nil)
25
+ @io = io || StringIO.new
18
26
  encode_root(payload)
19
- @io.string
27
+ @io.string if @io.is_a?(StringIO)
20
28
  end
21
29
 
22
30
  private
23
31
 
24
- attr_reader :separator, :io
32
+ attr_reader :separator, :io, :pretty, :indent_level, :decimal_mode
25
33
 
26
34
  def encode_root(value)
27
35
  case value
@@ -43,6 +51,12 @@ module Cton
43
51
  end
44
52
 
45
53
  def encode_value(value, context:)
54
+ if defined?(Set) && value.is_a?(Set)
55
+ value = value.to_a
56
+ elsif defined?(OpenStruct) && value.is_a?(OpenStruct)
57
+ value = value.to_h
58
+ end
59
+
46
60
  case value
47
61
  when Hash
48
62
  encode_object(value)
@@ -61,13 +75,19 @@ module Cton
61
75
  end
62
76
 
63
77
  io << "("
78
+ indent if pretty
64
79
  first = true
65
80
  hash.each do |key, value|
66
- io << "," unless first
81
+ if first
82
+ first = false
83
+ else
84
+ io << ","
85
+ newline if pretty
86
+ end
67
87
  io << format_key(key) << "="
68
88
  encode_value(value, context: :object)
69
- first = false
70
89
  end
90
+ outdent if pretty
71
91
  io << ")"
72
92
  end
73
93
 
@@ -80,8 +100,8 @@ module Cton
80
100
 
81
101
  io << "[" << length.to_s << "]"
82
102
 
83
- if table_candidate?(list)
84
- encode_table(list)
103
+ if (header = table_schema_for(list))
104
+ encode_table(list, header)
85
105
  else
86
106
  io << "="
87
107
  if list.all? { |value| scalar?(value) }
@@ -92,65 +112,102 @@ module Cton
92
112
  end
93
113
  end
94
114
 
95
- def encode_table(rows)
96
- header = rows.first.keys
115
+ def encode_table(rows, header)
97
116
  io << "{"
98
117
  io << header.map { |key| format_key(key) }.join(",")
99
118
  io << "}="
100
119
 
120
+ indent if pretty
101
121
  first_row = true
102
122
  rows.each do |row|
103
- io << ";" unless first_row
123
+ if first_row
124
+ first_row = false
125
+ else
126
+ io << ";"
127
+ newline if pretty
128
+ end
129
+
104
130
  first_col = true
105
131
  header.each do |field|
106
132
  io << "," unless first_col
107
133
  encode_scalar(row.fetch(field))
108
134
  first_col = false
109
135
  end
110
- first_row = false
111
136
  end
137
+ outdent if pretty
112
138
  end
113
139
 
114
140
  def encode_scalar_list(list)
115
- first = true
116
- list.each do |value|
117
- io << "," unless first
118
- encode_scalar(value)
119
- first = false
141
+ if pretty
142
+ indent
143
+ first = true
144
+ list.each do |value|
145
+ if first
146
+ first = false
147
+ else
148
+ io << ","
149
+ newline
150
+ end
151
+ encode_scalar(value)
152
+ end
153
+ outdent
154
+ else
155
+ first = true
156
+ if fast_scalar_stream?(list)
157
+ io << fast_scalar_stream(list)
158
+ else
159
+ list.each do |value|
160
+ io << "," unless first
161
+ encode_scalar(value)
162
+ first = false
163
+ end
164
+ end
120
165
  end
121
166
  end
122
167
 
123
168
  def encode_mixed_list(list)
169
+ indent if pretty
124
170
  first = true
125
171
  list.each do |value|
126
- io << "," unless first
172
+ if first
173
+ first = false
174
+ else
175
+ io << ","
176
+ newline if pretty
177
+ end
127
178
  encode_value(value, context: :array)
128
- first = false
129
179
  end
180
+ outdent if pretty
130
181
  end
131
182
 
132
183
  def encode_scalar(value)
184
+ io << scalar_to_string(value)
185
+ end
186
+
187
+ def scalar_to_string(value)
133
188
  case value
134
189
  when String
135
- encode_string(value)
190
+ format_string(value)
136
191
  when TrueClass, FalseClass
137
- io << (value ? "true" : "false")
192
+ value ? "true" : "false"
138
193
  when NilClass
139
- io << "null"
194
+ "null"
140
195
  when Numeric
141
- io << format_number(value)
196
+ format_number(value)
197
+ when Time, Date
198
+ format_string(value.iso8601)
142
199
  else
143
200
  raise EncodeError, "Unsupported value: #{value.class}"
144
201
  end
145
202
  end
146
203
 
147
- def encode_string(value)
204
+ def format_string(value)
148
205
  if value.empty?
149
- io << '""'
206
+ '""'
150
207
  elsif string_needs_quotes?(value)
151
- io << quote_string(value)
208
+ quote_string(value)
152
209
  else
153
- io << value
210
+ value
154
211
  end
155
212
  end
156
213
 
@@ -172,7 +229,7 @@ module Cton
172
229
  end
173
230
 
174
231
  def normalize_decimal_string(string)
175
- stripped = string.start_with?("+") ? string[1..-1] : string
232
+ stripped = string.start_with?("+") ? string[1..] : string
176
233
  return "0" if zero_string?(stripped)
177
234
 
178
235
  if stripped.include?(".")
@@ -188,6 +245,17 @@ module Cton
188
245
  end
189
246
 
190
247
  def float_decimal_string(value)
248
+ return precise_float_decimal_string(value) if decimal_mode == :precise
249
+
250
+ decimal = value.to_s
251
+ if decimal.include?("e") || decimal.include?("E")
252
+ precise_float_decimal_string(value)
253
+ else
254
+ decimal
255
+ end
256
+ end
257
+
258
+ def precise_float_decimal_string(value)
191
259
  if defined?(BigDecimal)
192
260
  BigDecimal(value.to_s).to_s("F")
193
261
  else
@@ -197,14 +265,14 @@ module Cton
197
265
 
198
266
  def format_key(key)
199
267
  key_string = key.to_s
200
- unless SAFE_TOKEN.match?(key_string)
201
- raise EncodeError, "Invalid key: #{key_string.inspect}"
202
- end
268
+ raise EncodeError, "Invalid key: #{key_string.inspect}" unless SAFE_TOKEN.match?(key_string)
269
+
203
270
  key_string
204
271
  end
205
272
 
206
273
  def string_needs_quotes?(value)
207
274
  return true unless SAFE_TOKEN.match?(value)
275
+
208
276
  RESERVED_LITERALS.include?(value) || numeric_like?(value)
209
277
  end
210
278
 
@@ -229,19 +297,81 @@ module Cton
229
297
  end
230
298
 
231
299
  def scalar?(value)
232
- value.is_a?(String) || value.is_a?(Numeric) || value == true || value == false || value.nil?
300
+ value.is_a?(String) || value.is_a?(Numeric) || value == true || value == false || value.nil? || value.is_a?(Time) || value.is_a?(Date)
301
+ end
302
+
303
+ def table_schema_for(rows)
304
+ cache_lookup = @table_schema_cache.fetch(rows.object_id, :__missing__)
305
+ return cache_lookup unless cache_lookup == :__missing__
306
+
307
+ schema = compute_table_schema(rows)
308
+ @table_schema_cache[rows.object_id] = schema
233
309
  end
234
310
 
235
- def table_candidate?(rows)
236
- return false if rows.empty?
311
+ def compute_table_schema(rows)
312
+ return nil if rows.empty?
237
313
 
238
314
  first = rows.first
239
- return false unless first.is_a?(Hash) && !first.empty?
315
+ return nil unless first.is_a?(Hash) && !first.empty?
316
+
317
+ header = first.keys.freeze
240
318
 
241
- keys = first.keys
242
- rows.all? do |row|
243
- row.is_a?(Hash) && row.keys == keys && row.values.all? { |val| scalar?(val) }
319
+ rows.each do |row|
320
+ return nil unless row.is_a?(Hash)
321
+ return nil unless row.keys == header
322
+ return nil unless row.values.all? { |val| scalar?(val) }
244
323
  end
324
+
325
+ header
326
+ end
327
+
328
+ def fast_scalar_stream?(list)
329
+ !pretty && list.length > 4 && homogeneous_scalar_tokens?(list)
330
+ end
331
+
332
+ def homogeneous_scalar_tokens?(list)
333
+ first_class = nil
334
+ list.all? do |value|
335
+ return false unless scalar?(value)
336
+
337
+ token_class = value.class
338
+ first_class ||= token_class
339
+ token_class == first_class && token_does_not_require_quotes?(value)
340
+ end
341
+ end
342
+
343
+ def token_does_not_require_quotes?(value)
344
+ case value
345
+ when String
346
+ !value.empty? && !string_needs_quotes?(value)
347
+ when Integer, TrueClass, FalseClass, NilClass
348
+ true
349
+ else
350
+ false
351
+ end
352
+ end
353
+
354
+ def fast_scalar_stream(list)
355
+ buffer = String.new
356
+ list.each_with_index do |value, index|
357
+ buffer << "," unless index.zero?
358
+ buffer << scalar_to_string(value)
359
+ end
360
+ buffer
361
+ end
362
+
363
+ def indent
364
+ @indent_level += 1
365
+ newline
366
+ end
367
+
368
+ def outdent
369
+ @indent_level -= 1
370
+ newline
371
+ end
372
+
373
+ def newline
374
+ io << "\n" << (" " * indent_level)
245
375
  end
246
376
  end
247
377
  end
data/lib/cton/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
  # frozen_string_literal: true
2
2
 
3
3
  module Cton
4
- VERSION = "0.1.1"
4
+ VERSION = "0.3.0"
5
5
  end
data/lib/cton.rb CHANGED
@@ -12,9 +12,24 @@ module Cton
12
12
 
13
13
  module_function
14
14
 
15
- def dump(payload, options = {})
15
+ def dump(payload, *args)
16
+ io = nil
17
+ options = {}
18
+
19
+ args.each do |arg|
20
+ if arg.is_a?(Hash)
21
+ options.merge!(arg)
22
+ else
23
+ io = arg
24
+ end
25
+ end
26
+
27
+ io ||= options[:io]
28
+
16
29
  separator = options.fetch(:separator, "\n")
17
- Encoder.new(separator: separator).encode(payload)
30
+ pretty = options.fetch(:pretty, false)
31
+ decimal_mode = options.fetch(:decimal_mode, :fast)
32
+ Encoder.new(separator: separator, pretty: pretty, decimal_mode: decimal_mode).encode(payload, io: io)
18
33
  end
19
34
  alias generate dump
20
35
 
@@ -23,4 +38,3 @@ module Cton
23
38
  end
24
39
  alias parse load
25
40
  end
26
-
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cton
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.1
4
+ version: 0.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Davide Santangelo
8
8
  autorequire:
9
9
  bindir: exe
10
10
  cert_chain: []
11
- date: 2025-11-18 00:00:00.000000000 Z
11
+ date: 2025-11-20 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: CTON provides a JSON-compatible, token-efficient text representation
14
14
  optimized for LLM prompts.
@@ -25,6 +25,7 @@ files:
25
25
  - LICENSE.txt
26
26
  - README.md
27
27
  - Rakefile
28
+ - bench/encode_decode_bench.rb
28
29
  - lib/cton.rb
29
30
  - lib/cton/decoder.rb
30
31
  - lib/cton/encoder.rb
@@ -37,6 +38,7 @@ metadata:
37
38
  homepage_uri: https://github.com/davidesantangelo/cton
38
39
  source_code_uri: https://github.com/davidesantangelo/cton
39
40
  changelog_uri: https://github.com/davidesantangelo/cton/blob/master/CHANGELOG.md
41
+ rubygems_mfa_required: 'true'
40
42
  post_install_message:
41
43
  rdoc_options: []
42
44
  require_paths: