cbor-diag 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,74 @@
1
+ # This should work with the C-ext cbor-ruby as well as with our cbor-pure
2
+ unless defined?(CBOR)
3
+ require_relative 'cbor-pure'
4
+ end
5
+
6
+ class Object
7
+ def cbor_diagnostic
8
+ inspect
9
+ end
10
+ end
11
+
12
+ class NilClass
13
+ def cbor_diagnostic
14
+ "null"
15
+ end
16
+ end
17
+
18
+ class Float
19
+ def cbor_diagnostic # do a little bit of JSON.stringify gaming (ECMA-262, 9.8.1)
20
+ a = abs
21
+ if a < 1 && a >= 1e-6
22
+ inspect.sub(/(\d)[.](\d+)e-(\d+)/) {"0.#{"0" * ($3.to_i - 1)}#{$1}#{$2}"}
23
+ else
24
+ inspect.sub(/(e[+-])0+/) {$1}
25
+ end
26
+ end
27
+ end
28
+
29
+ raise unless 0.00006103515625.cbor_diagnostic == "0.00006103515625"
30
+ raise unless 0.99.cbor_diagnostic == "0.99"
31
+ raise unless 0.099.cbor_diagnostic == "0.099"
32
+ raise unless 0.0000099.cbor_diagnostic == "0.0000099"
33
+
34
+ class String
35
+ unless String.instance_methods.include?(:b)
36
+ def b
37
+ dup.force_encoding(Encoding::BINARY)
38
+ end
39
+ end
40
+ def hexbytes(sep = '')
41
+ bytes.map{|x| "%02x" % x}.join(sep)
42
+ end
43
+ def cbor_diagnostic
44
+ if lengths = cbor_stream?
45
+ pos = 0
46
+ "(_ #{lengths.map{|l| r = self[pos, l].cbor_diagnostic; pos += l; r}.join(", ")})"
47
+ else
48
+ if encoding == Encoding::BINARY
49
+ "h'#{hexbytes}'"
50
+ else
51
+ inspect.encode(Encoding::UTF_16BE).bytes.each_slice(2).map {
52
+ |c1, c2| c = (c1 << 8)+c2; c < 128 ? c.chr : '\u%04x' % c }.join
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ class Array
59
+ def cbor_diagnostic
60
+ "[#{"_ " if cbor_stream?}#{map(&:cbor_diagnostic).join(", ")}]"
61
+ end
62
+ end
63
+
64
+ class Hash
65
+ def cbor_diagnostic
66
+ "{#{"_ " if cbor_stream?}#{map{ |k, v| %{#{k.cbor_diagnostic}: #{v.cbor_diagnostic}}}.join(", ")}}"
67
+ end
68
+ end
69
+
70
+ class CBOR::Tagged
71
+ def cbor_diagnostic
72
+ "#{tag}(#{data.cbor_diagnostic})"
73
+ end
74
+ end
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # This should work with the C-ext cbor-ruby as well as with our cbor-pure
4
+ unless defined?(CBOR)
5
+ require_relative 'cbor-pure'
6
+ end
7
+
8
+ class String
9
+ def hexbytes(sep = '')
10
+ bytes.map{|x| "%02x" % x}.join(sep)
11
+ end
12
+ end
13
+
14
+
15
+ class CBOR
16
+ def self.pretty(s, indent = 0, max_target = 40)
17
+ new(s).pretty_item_final(indent, max_target)
18
+ end
19
+
20
+ def take_and_print(n, prefix = '')
21
+ s = take(n)
22
+ @out << prefix
23
+ @out << s.hexbytes
24
+ s
25
+ end
26
+
27
+ def pretty_item_streaming(ib)
28
+ res = nil
29
+ @out << " # #{MT_NAMES[ib >> 5]}(*)\n"
30
+ @indent += 1
31
+ case ib >>= 5
32
+ when 2, 3, 4, 5
33
+ while (element = pretty_item) != BREAK
34
+ end
35
+ when 7; res = BREAK
36
+ else raise "unknown ib #{ib} for additional information 31"
37
+ end
38
+ @indent -= 1
39
+ res
40
+ end
41
+
42
+ MT_NAMES = ["unsigned", "negative", "bytes", "text", "array", "map", "tag", "primitive"]
43
+
44
+ def pretty_item
45
+ ib = take_and_print(1, ' ' * @indent).ord
46
+ ai = ib & 0x1F
47
+ val = case ai
48
+ when 0...24; ai
49
+ when 24; take_and_print(1, ' ').ord
50
+ when 25; take_and_print(2, ' ').unpack("n").first
51
+ when 26; (s = take_and_print(4, ' ')).unpack("N").first
52
+ when 27; (s = take_and_print(8, ' ')).unpack("Q>").first
53
+ when 31; return pretty_item_streaming(ib)
54
+ else raise "unknown additional information #{ai} in ib #{ib}"
55
+ end
56
+ @out << " # #{MT_NAMES[ib >> 5]}(#{val})\n"
57
+ @indent += 1
58
+ case ib >>= 5
59
+ when 6
60
+ pretty_item
61
+ when 2, 3
62
+ @out << ' ' * (@indent)
63
+ s = take_and_print(val)
64
+ @out << " # #{s.inspect}"
65
+ @out << "\n"
66
+ when 4; val.times { pretty_item }
67
+ when 5; val.times { pretty_item; pretty_item}
68
+ end
69
+ @indent -= 1
70
+ nil
71
+ end
72
+
73
+ def pretty_item_final(indent = 0, max_target = 40)
74
+ @out = ''
75
+ @indent = indent
76
+ pretty_item
77
+ raise if @pos != @buffer.size
78
+ target = [@out.each_line.map {|ln| ln =~ /#/ || 0}.max, max_target].min
79
+ @out.each_line.map {|ln|
80
+ col = ln =~ /#/
81
+ if col && col < target
82
+ ln[col, 0] = ' ' * (target - col)
83
+ end
84
+ ln
85
+ }.join
86
+ end
87
+
88
+ end
@@ -0,0 +1,260 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require_relative "half.rb"
4
+
5
+
6
+ class CBOR
7
+ module Streaming
8
+ def cbor_stream?
9
+ @cbor_streaming
10
+ end
11
+ def cbor_stream!(b = true)
12
+ @cbor_streaming = b
13
+ self
14
+ end
15
+ end
16
+ Array.send(:include, Streaming)
17
+ Hash.send(:include, Streaming)
18
+ String.send(:include, Streaming)
19
+
20
+ class Break
21
+ end
22
+ BREAK = Break.new.freeze
23
+
24
+ Tagged = Struct.new(:tag, :data) do
25
+ def to_s
26
+ "#{tag}(#{data})"
27
+ end
28
+ def inspect
29
+ "#{tag}(#{data.inspect})"
30
+ end
31
+ end
32
+
33
+ TAG_BIGNUM_BASE = 2
34
+
35
+ Simple = Struct.new(:value) do
36
+ def to_s
37
+ if value == 23
38
+ "undefined"
39
+ else
40
+ "simple(#{value})"
41
+ end
42
+ end
43
+ alias_method :inspect, :to_s
44
+ end
45
+
46
+ def self.encode(d)
47
+ new.add(d).buffer
48
+ end
49
+ def self.decode(s)
50
+ new(s).decode_item_final
51
+ end
52
+
53
+ attr_reader :buffer
54
+ def initialize(s = String.new)
55
+ @buffer = s
56
+ @pos = 0
57
+ end
58
+
59
+ def head(ib, n)
60
+ @buffer <<
61
+ case n
62
+ when 0...24
63
+ [ib + n].pack("C")
64
+ when 0...256
65
+ [ib + 24, n].pack("CC")
66
+ when 0...65536
67
+ [ib + 25, n].pack("Cn")
68
+ when 0...4294967296
69
+ [ib + 26, n].pack("CN")
70
+ when 0...18446744073709551616
71
+ [ib + 27, n].pack("CQ>")
72
+ else
73
+ yield # throw back to caller
74
+ end
75
+ end
76
+
77
+ HALF_NAN_BYTES = ("\xf9".force_encoding(Encoding::BINARY) + Half::NAN_BYTES).freeze
78
+
79
+ def addfloat(fv)
80
+ if fv.nan?
81
+ @buffer << HALF_NAN_BYTES
82
+ else
83
+ ss = [fv].pack("g") # single-precision
84
+ if ss.unpack("g").first == fv
85
+ if hs = Half.encode_from_single(fv, ss)
86
+ @buffer << 0xf9 << hs
87
+ else
88
+ @buffer << 0xfa << ss
89
+ end
90
+ else
91
+ @buffer << [0xfb, fv].pack("CG") # double-precision
92
+ end
93
+ end
94
+ end
95
+
96
+ def bignum_to_bytes(d)
97
+ s = String.new
98
+ while (d != 0)
99
+ s << (d & 0xFF)
100
+ d >>= 8
101
+ end
102
+ s.reverse!
103
+ end
104
+
105
+ def add(d)
106
+ case d
107
+ when Integer
108
+ ib = if d < 0
109
+ d = -1-d
110
+ 0x20
111
+ else
112
+ 0x00
113
+ end
114
+ head(ib, d) { # block is called if things do not fit
115
+ s = bignum_to_bytes(d)
116
+ head(0xc0, TAG_BIGNUM_BASE + (ib >> 5))
117
+ head(0x40, s.bytesize)
118
+ s
119
+ }
120
+ when Numeric; addfloat(d)
121
+ when Symbol; add(d.to_s) # hack: this should really be tagged
122
+ when Simple; head(0xe0, d.value)
123
+ when false; head(0xe0, 20)
124
+ when true; head(0xe0, 21)
125
+ when nil; head(0xe0, 22)
126
+ when Tagged # we don't handle :simple here
127
+ head(0xc0, d.tag)
128
+ add(d.data)
129
+ when String
130
+ lengths = d.cbor_stream?
131
+ e = d
132
+ ib = if d.encoding == Encoding::BINARY
133
+ 0x40
134
+ else
135
+ d = d.encode(Encoding::UTF_8).force_encoding(Encoding::BINARY)
136
+ 0x60
137
+ end
138
+ if lengths
139
+ @buffer << (ib + 31)
140
+ pos = 0
141
+ lengths.each do |r|
142
+ add(e[pos, r])
143
+ pos += r
144
+ end
145
+ @buffer << 0xff
146
+ else
147
+ head(ib, d.bytesize)
148
+ @buffer << d
149
+ end
150
+ when Array
151
+ if d.cbor_stream?
152
+ @buffer << 0x9f
153
+ d.each {|di| add(di)}
154
+ @buffer << 0xff
155
+ else
156
+ head(0x80, d.size)
157
+ d.each {|di| add(di)}
158
+ end
159
+ when Hash
160
+ if d.cbor_stream?
161
+ @buffer << 0xbf
162
+ d.each {|k, v| add(k); add(v)}
163
+ @buffer << 0xff
164
+ else
165
+ head(0xa0, d.size)
166
+ d.each {|k, v| add(k); add(v)}
167
+ end
168
+ else
169
+ raise("Don't know how to encode #{d.inspect}")
170
+ end
171
+ self
172
+ end
173
+
174
+ def take(n)
175
+ opos = @pos
176
+ @pos += n
177
+ raise "Out of bytes to decode: #{opos} + #{n} > #{@buffer.bytesize}" if @pos > @buffer.bytesize
178
+ @buffer[opos, n]
179
+ end
180
+
181
+ MT_TO_ENCODING = {2 => Encoding::BINARY, 3 => Encoding::UTF_8}
182
+
183
+ def decode_item_streaming(ib, breakable)
184
+ case ib >>= 5
185
+ when 2, 3
186
+ want_encoding = MT_TO_ENCODING[ib]
187
+ subs = []
188
+ while (element = decode_item(true)) != BREAK
189
+ raise "non-string (#{element.inspect}) in streaming string" unless String === element
190
+ raise "bytes/text mismatch (#{element.encoding} != #{want_encoding}) in streaming string" unless element.encoding == want_encoding
191
+ subs << element
192
+ end
193
+ result = subs.join.cbor_stream!(subs.map(&:length)).force_encoding(want_encoding)
194
+ when 4
195
+ result = Array.new;
196
+ while (element = decode_item(true)) != BREAK
197
+ result << element
198
+ end
199
+ result
200
+ when 5
201
+ result = Hash.new
202
+ while (key = decode_item(true)) != BREAK
203
+ result[key] = decode_item
204
+ end
205
+ result
206
+ when 7
207
+ raise "break stop code outside indefinite length item" unless breakable
208
+ BREAK
209
+ else raise "unknown ib #{ib} for additional information 31"
210
+ end
211
+ end
212
+
213
+ def decode_item(breakable = false)
214
+ ib = take(1).ord
215
+ ai = ib & 0x1F
216
+ val = case ai
217
+ when 0...24; ai
218
+ when 24; take(1).ord
219
+ when 25; take(2).unpack("n").first
220
+ when 26; (s = take(4)).unpack("N").first
221
+ when 27; (s = take(8)).unpack("Q>").first
222
+ when 31; return decode_item_streaming(ib, breakable)
223
+ else raise "unknown additional information #{ai} in ib #{ib}"
224
+ end
225
+ case ib >>= 5
226
+ when 0; val
227
+ when 1; -1-val
228
+ when 7
229
+ case ai
230
+ when 20; false
231
+ when 21; true
232
+ when 22; nil
233
+ # when 27; Simple.new(27) # Ruby does not have Undefined
234
+ when 25; Half.decode(val)
235
+ when 26; s.unpack("g").first # cannot go directly from val
236
+ when 27; s.unpack("G").first # in Ruby
237
+ else
238
+ Simple.new(val)
239
+ end
240
+ when 6
241
+ di = decode_item
242
+ if String === di && (val & ~1) == TAG_BIGNUM_BASE
243
+ (TAG_BIGNUM_BASE - val) ^ di.bytes.inject(0) {|sum, b| sum <<= 8; sum += b }
244
+ else
245
+ Tagged.new(val, di)
246
+ end
247
+ when 2; take(val).force_encoding(Encoding::BINARY)
248
+ when 3; take(val).force_encoding(Encoding::UTF_8)
249
+ when 4; Array.new(val) { decode_item }
250
+ when 5; Hash[Array.new(val) {[decode_item, decode_item]}]
251
+ end
252
+ end
253
+
254
+ def decode_item_final
255
+ val = decode_item
256
+ raise "extra bytes follow after a deserialized object" if @pos != @buffer.size
257
+ val
258
+ end
259
+
260
+ end
@@ -0,0 +1,111 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # 16-bit floating point values (IEEE 754 Half Precision) are not
4
+ # supported by #pack/#unpack in Ruby yet.
5
+ # This is a quick hack implementing en- and decoding them.
6
+ # (Since this is just a hack, the brief tests are in this file.)
7
+ #
8
+ # The encoder assumes that we already have a Single-Precision byte
9
+ # string (e.g., from pack("g")), and this is taken apart and
10
+ # reassembled.
11
+ # The decoder is free-standing (trivial).
12
+ #
13
+ # IEEE 754 can be found at:
14
+ # http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4610935
15
+
16
+ module Half
17
+ NAN_BYTES = "\x7e\x00"
18
+
19
+ def self.decode_from_bytes(hs)
20
+ b16, = hs.unpack("n")
21
+ self.decode(b16)
22
+ end
23
+ def self.decode(b16)
24
+ exp = b16 >> 10 & 0x1f
25
+ mant = b16 & 0x3ff
26
+ val =
27
+ if exp == 0
28
+ Math.ldexp(mant, -24)
29
+ elsif exp == 31
30
+ mant == 0 ? Float::INFINITY : Float::NAN
31
+ else
32
+ Math.ldexp(0x400 + mant, exp-25)
33
+ end
34
+ if b16[15] != 0
35
+ -val
36
+ else
37
+ val
38
+ end
39
+ end
40
+
41
+ def self.encode_from_single_bytes(ss) # single-precision string
42
+ b32, = ss.unpack("N")
43
+ s16 = b32 >> 16 & 0x8000
44
+ mant = b32 & 0x7fffff
45
+ exp = b32 >> 23 & 0xff
46
+ # puts "#{fv} #{s16} #{mant.to_s(16)} #{exp}"
47
+ if exp == 0
48
+ s16 if mant == 0 # 0.0, -0.0
49
+ elsif exp >= 103 && exp < 113 # denorm, exp16 = 0
50
+ s16 + ((mant + 0x800000) >> (126 - exp))
51
+ elsif exp >= 113 && exp <= 142 # normalized
52
+ s16 + ((exp - 112) << 10) + (mant >> 13)
53
+ elsif exp == 255 # Inf (handle NaN elsewhere!)
54
+ s16 + 0x7c00 if mant == 0 # +Inf/-Inf
55
+ end
56
+ end
57
+
58
+ def self.encode_from_single(fv, ss)
59
+ if e = self.encode_from_single_bytes(ss)
60
+ # p e.to_s(16)
61
+ hs = [e].pack("n")
62
+ hs if self.decode_from_bytes(hs) == fv
63
+ end
64
+ end
65
+
66
+ def self.encode(fv)
67
+ self.encode_from_single(fv, [fv].pack("g"))
68
+ end
69
+
70
+ end
71
+
72
+ (-24..15).each do |i|
73
+ f = Math.ldexp(1, i)
74
+ s = Half.encode(f)
75
+ fail i unless s
76
+ end
77
+ (-24..6).each do |i|
78
+ f = Math.ldexp(1023, i)
79
+ s = Half.encode(f)
80
+ fail i unless s
81
+ end
82
+
83
+ # p Half.decode("\x7b\xff") 65504.0
84
+
85
+ [ # go through Wikipedia samples
86
+ 0b0_01111_0000000000, 1.0,
87
+ 0b0_01111_0000000001, 1.0 + Math.ldexp(1, -10), # = 1 + 2−10 = 1.0009765625 (next biggest float after 1)
88
+ 0b1_10000_0000000000, -2.0,
89
+
90
+ 0b0_11110_1111111111, 65504.0, # (max half precision)
91
+
92
+ 0b0_00001_0000000000, Math.ldexp(1, -14), # ≈ 6.10352 × 10−5 (minimum positive normal)
93
+ 0b0_00000_1111111111, Math.ldexp(1, -14) - Math.ldexp(1, -24), # ≈ 6.09756 × 10−5 (maximum subnormal)
94
+ 0b0_00000_0000000001, Math.ldexp(1, -24), # ≈ 5.96046 × 10−8 (minimum positive subnormal)
95
+
96
+ 0b0_00000_0000000000, 0.0,
97
+ 0b1_00000_0000000000, -0.0,
98
+
99
+ 0b0_11111_0000000000, 1.0/0.0,
100
+ 0b1_11111_0000000000, -1.0/0.0,
101
+
102
+ 0b0_01101_0101010101, 0.333251953125 #... ≈ 1/3
103
+ ].each_slice(2) do |hv, expected|
104
+ fv = Half.decode(hv)
105
+ raise [hv, fv, expected].inspect unless fv == expected
106
+ end
107
+
108
+ # NaN cannot be compared, so this one needs to be special-cased:
109
+ raise "NaN not detected" unless Half.decode(0b0_11111_1000000000).nan?
110
+ raise "-NaN not detected" unless Half.decode(0b1_11111_1000000000).nan?
111
+ raise "NaN not detected" unless Half.decode_from_bytes(Half::NAN_BYTES).nan?