cbor-diag 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,74 @@
1
+ # This should work with the C-ext cbor-ruby as well as with our cbor-pure
2
+ unless defined?(CBOR)
3
+ require_relative 'cbor-pure'
4
+ end
5
+
6
+ class Object
7
+ def cbor_diagnostic
8
+ inspect
9
+ end
10
+ end
11
+
12
+ class NilClass
13
+ def cbor_diagnostic
14
+ "null"
15
+ end
16
+ end
17
+
18
+ class Float
19
+ def cbor_diagnostic # do a little bit of JSON.stringify gaming (ECMA-262, 9.8.1)
20
+ a = abs
21
+ if a < 1 && a >= 1e-6
22
+ inspect.sub(/(\d)[.](\d+)e-(\d+)/) {"0.#{"0" * ($3.to_i - 1)}#{$1}#{$2}"}
23
+ else
24
+ inspect.sub(/(e[+-])0+/) {$1}
25
+ end
26
+ end
27
+ end
28
+
29
+ raise unless 0.00006103515625.cbor_diagnostic == "0.00006103515625"
30
+ raise unless 0.99.cbor_diagnostic == "0.99"
31
+ raise unless 0.099.cbor_diagnostic == "0.099"
32
+ raise unless 0.0000099.cbor_diagnostic == "0.0000099"
33
+
34
+ class String
35
+ unless String.instance_methods.include?(:b)
36
+ def b
37
+ dup.force_encoding(Encoding::BINARY)
38
+ end
39
+ end
40
+ def hexbytes(sep = '')
41
+ bytes.map{|x| "%02x" % x}.join(sep)
42
+ end
43
+ def cbor_diagnostic
44
+ if lengths = cbor_stream?
45
+ pos = 0
46
+ "(_ #{lengths.map{|l| r = self[pos, l].cbor_diagnostic; pos += l; r}.join(", ")})"
47
+ else
48
+ if encoding == Encoding::BINARY
49
+ "h'#{hexbytes}'"
50
+ else
51
+ inspect.encode(Encoding::UTF_16BE).bytes.each_slice(2).map {
52
+ |c1, c2| c = (c1 << 8)+c2; c < 128 ? c.chr : '\u%04x' % c }.join
53
+ end
54
+ end
55
+ end
56
+ end
57
+
58
+ class Array
59
+ def cbor_diagnostic
60
+ "[#{"_ " if cbor_stream?}#{map(&:cbor_diagnostic).join(", ")}]"
61
+ end
62
+ end
63
+
64
+ class Hash
65
+ def cbor_diagnostic
66
+ "{#{"_ " if cbor_stream?}#{map{ |k, v| %{#{k.cbor_diagnostic}: #{v.cbor_diagnostic}}}.join(", ")}}"
67
+ end
68
+ end
69
+
70
+ class CBOR::Tagged
71
+ def cbor_diagnostic
72
+ "#{tag}(#{data.cbor_diagnostic})"
73
+ end
74
+ end
@@ -0,0 +1,88 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ # This should work with the C-ext cbor-ruby as well as with our cbor-pure
4
+ unless defined?(CBOR)
5
+ require_relative 'cbor-pure'
6
+ end
7
+
8
+ class String
9
+ def hexbytes(sep = '')
10
+ bytes.map{|x| "%02x" % x}.join(sep)
11
+ end
12
+ end
13
+
14
+
15
+ class CBOR
16
+ def self.pretty(s, indent = 0, max_target = 40)
17
+ new(s).pretty_item_final(indent, max_target)
18
+ end
19
+
20
+ def take_and_print(n, prefix = '')
21
+ s = take(n)
22
+ @out << prefix
23
+ @out << s.hexbytes
24
+ s
25
+ end
26
+
27
+ def pretty_item_streaming(ib)
28
+ res = nil
29
+ @out << " # #{MT_NAMES[ib >> 5]}(*)\n"
30
+ @indent += 1
31
+ case ib >>= 5
32
+ when 2, 3, 4, 5
33
+ while (element = pretty_item) != BREAK
34
+ end
35
+ when 7; res = BREAK
36
+ else raise "unknown ib #{ib} for additional information 31"
37
+ end
38
+ @indent -= 1
39
+ res
40
+ end
41
+
42
+ MT_NAMES = ["unsigned", "negative", "bytes", "text", "array", "map", "tag", "primitive"]
43
+
44
+ def pretty_item
45
+ ib = take_and_print(1, ' ' * @indent).ord
46
+ ai = ib & 0x1F
47
+ val = case ai
48
+ when 0...24; ai
49
+ when 24; take_and_print(1, ' ').ord
50
+ when 25; take_and_print(2, ' ').unpack("n").first
51
+ when 26; (s = take_and_print(4, ' ')).unpack("N").first
52
+ when 27; (s = take_and_print(8, ' ')).unpack("Q>").first
53
+ when 31; return pretty_item_streaming(ib)
54
+ else raise "unknown additional information #{ai} in ib #{ib}"
55
+ end
56
+ @out << " # #{MT_NAMES[ib >> 5]}(#{val})\n"
57
+ @indent += 1
58
+ case ib >>= 5
59
+ when 6
60
+ pretty_item
61
+ when 2, 3
62
+ @out << ' ' * (@indent)
63
+ s = take_and_print(val)
64
+ @out << " # #{s.inspect}"
65
+ @out << "\n"
66
+ when 4; val.times { pretty_item }
67
+ when 5; val.times { pretty_item; pretty_item}
68
+ end
69
+ @indent -= 1
70
+ nil
71
+ end
72
+
73
+ def pretty_item_final(indent = 0, max_target = 40)
74
+ @out = ''
75
+ @indent = indent
76
+ pretty_item
77
+ raise if @pos != @buffer.size
78
+ target = [@out.each_line.map {|ln| ln =~ /#/ || 0}.max, max_target].min
79
+ @out.each_line.map {|ln|
80
+ col = ln =~ /#/
81
+ if col && col < target
82
+ ln[col, 0] = ' ' * (target - col)
83
+ end
84
+ ln
85
+ }.join
86
+ end
87
+
88
+ end
@@ -0,0 +1,260 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require_relative "half.rb"
4
+
5
+
6
+ class CBOR
7
+ module Streaming
8
+ def cbor_stream?
9
+ @cbor_streaming
10
+ end
11
+ def cbor_stream!(b = true)
12
+ @cbor_streaming = b
13
+ self
14
+ end
15
+ end
16
+ Array.send(:include, Streaming)
17
+ Hash.send(:include, Streaming)
18
+ String.send(:include, Streaming)
19
+
20
+ class Break
21
+ end
22
+ BREAK = Break.new.freeze
23
+
24
+ Tagged = Struct.new(:tag, :data) do
25
+ def to_s
26
+ "#{tag}(#{data})"
27
+ end
28
+ def inspect
29
+ "#{tag}(#{data.inspect})"
30
+ end
31
+ end
32
+
33
+ TAG_BIGNUM_BASE = 2
34
+
35
+ Simple = Struct.new(:value) do
36
+ def to_s
37
+ if value == 23
38
+ "undefined"
39
+ else
40
+ "simple(#{value})"
41
+ end
42
+ end
43
+ alias_method :inspect, :to_s
44
+ end
45
+
46
+ def self.encode(d)
47
+ new.add(d).buffer
48
+ end
49
+ def self.decode(s)
50
+ new(s).decode_item_final
51
+ end
52
+
53
+ attr_reader :buffer
54
+ def initialize(s = String.new)
55
+ @buffer = s
56
+ @pos = 0
57
+ end
58
+
59
+ def head(ib, n)
60
+ @buffer <<
61
+ case n
62
+ when 0...24
63
+ [ib + n].pack("C")
64
+ when 0...256
65
+ [ib + 24, n].pack("CC")
66
+ when 0...65536
67
+ [ib + 25, n].pack("Cn")
68
+ when 0...4294967296
69
+ [ib + 26, n].pack("CN")
70
+ when 0...18446744073709551616
71
+ [ib + 27, n].pack("CQ>")
72
+ else
73
+ yield # throw back to caller
74
+ end
75
+ end
76
+
77
+ HALF_NAN_BYTES = ("\xf9".force_encoding(Encoding::BINARY) + Half::NAN_BYTES).freeze
78
+
79
+ def addfloat(fv)
80
+ if fv.nan?
81
+ @buffer << HALF_NAN_BYTES
82
+ else
83
+ ss = [fv].pack("g") # single-precision
84
+ if ss.unpack("g").first == fv
85
+ if hs = Half.encode_from_single(fv, ss)
86
+ @buffer << 0xf9 << hs
87
+ else
88
+ @buffer << 0xfa << ss
89
+ end
90
+ else
91
+ @buffer << [0xfb, fv].pack("CG") # double-precision
92
+ end
93
+ end
94
+ end
95
+
96
+ def bignum_to_bytes(d)
97
+ s = String.new
98
+ while (d != 0)
99
+ s << (d & 0xFF)
100
+ d >>= 8
101
+ end
102
+ s.reverse!
103
+ end
104
+
105
+ def add(d)
106
+ case d
107
+ when Integer
108
+ ib = if d < 0
109
+ d = -1-d
110
+ 0x20
111
+ else
112
+ 0x00
113
+ end
114
+ head(ib, d) { # block is called if things do not fit
115
+ s = bignum_to_bytes(d)
116
+ head(0xc0, TAG_BIGNUM_BASE + (ib >> 5))
117
+ head(0x40, s.bytesize)
118
+ s
119
+ }
120
+ when Numeric; addfloat(d)
121
+ when Symbol; add(d.to_s) # hack: this should really be tagged
122
+ when Simple; head(0xe0, d.value)
123
+ when false; head(0xe0, 20)
124
+ when true; head(0xe0, 21)
125
+ when nil; head(0xe0, 22)
126
+ when Tagged # we don't handle :simple here
127
+ head(0xc0, d.tag)
128
+ add(d.data)
129
+ when String
130
+ lengths = d.cbor_stream?
131
+ e = d
132
+ ib = if d.encoding == Encoding::BINARY
133
+ 0x40
134
+ else
135
+ d = d.encode(Encoding::UTF_8).force_encoding(Encoding::BINARY)
136
+ 0x60
137
+ end
138
+ if lengths
139
+ @buffer << (ib + 31)
140
+ pos = 0
141
+ lengths.each do |r|
142
+ add(e[pos, r])
143
+ pos += r
144
+ end
145
+ @buffer << 0xff
146
+ else
147
+ head(ib, d.bytesize)
148
+ @buffer << d
149
+ end
150
+ when Array
151
+ if d.cbor_stream?
152
+ @buffer << 0x9f
153
+ d.each {|di| add(di)}
154
+ @buffer << 0xff
155
+ else
156
+ head(0x80, d.size)
157
+ d.each {|di| add(di)}
158
+ end
159
+ when Hash
160
+ if d.cbor_stream?
161
+ @buffer << 0xbf
162
+ d.each {|k, v| add(k); add(v)}
163
+ @buffer << 0xff
164
+ else
165
+ head(0xa0, d.size)
166
+ d.each {|k, v| add(k); add(v)}
167
+ end
168
+ else
169
+ raise("Don't know how to encode #{d.inspect}")
170
+ end
171
+ self
172
+ end
173
+
174
+ def take(n)
175
+ opos = @pos
176
+ @pos += n
177
+ raise "Out of bytes to decode: #{opos} + #{n} > #{@buffer.bytesize}" if @pos > @buffer.bytesize
178
+ @buffer[opos, n]
179
+ end
180
+
181
+ MT_TO_ENCODING = {2 => Encoding::BINARY, 3 => Encoding::UTF_8}
182
+
183
+ def decode_item_streaming(ib, breakable)
184
+ case ib >>= 5
185
+ when 2, 3
186
+ want_encoding = MT_TO_ENCODING[ib]
187
+ subs = []
188
+ while (element = decode_item(true)) != BREAK
189
+ raise "non-string (#{element.inspect}) in streaming string" unless String === element
190
+ raise "bytes/text mismatch (#{element.encoding} != #{want_encoding}) in streaming string" unless element.encoding == want_encoding
191
+ subs << element
192
+ end
193
+ result = subs.join.cbor_stream!(subs.map(&:length)).force_encoding(want_encoding)
194
+ when 4
195
+ result = Array.new;
196
+ while (element = decode_item(true)) != BREAK
197
+ result << element
198
+ end
199
+ result
200
+ when 5
201
+ result = Hash.new
202
+ while (key = decode_item(true)) != BREAK
203
+ result[key] = decode_item
204
+ end
205
+ result
206
+ when 7
207
+ raise "break stop code outside indefinite length item" unless breakable
208
+ BREAK
209
+ else raise "unknown ib #{ib} for additional information 31"
210
+ end
211
+ end
212
+
213
+ def decode_item(breakable = false)
214
+ ib = take(1).ord
215
+ ai = ib & 0x1F
216
+ val = case ai
217
+ when 0...24; ai
218
+ when 24; take(1).ord
219
+ when 25; take(2).unpack("n").first
220
+ when 26; (s = take(4)).unpack("N").first
221
+ when 27; (s = take(8)).unpack("Q>").first
222
+ when 31; return decode_item_streaming(ib, breakable)
223
+ else raise "unknown additional information #{ai} in ib #{ib}"
224
+ end
225
+ case ib >>= 5
226
+ when 0; val
227
+ when 1; -1-val
228
+ when 7
229
+ case ai
230
+ when 20; false
231
+ when 21; true
232
+ when 22; nil
233
+ # when 27; Simple.new(27) # Ruby does not have Undefined
234
+ when 25; Half.decode(val)
235
+ when 26; s.unpack("g").first # cannot go directly from val
236
+ when 27; s.unpack("G").first # in Ruby
237
+ else
238
+ Simple.new(val)
239
+ end
240
+ when 6
241
+ di = decode_item
242
+ if String === di && (val & ~1) == TAG_BIGNUM_BASE
243
+ (TAG_BIGNUM_BASE - val) ^ di.bytes.inject(0) {|sum, b| sum <<= 8; sum += b }
244
+ else
245
+ Tagged.new(val, di)
246
+ end
247
+ when 2; take(val).force_encoding(Encoding::BINARY)
248
+ when 3; take(val).force_encoding(Encoding::UTF_8)
249
+ when 4; Array.new(val) { decode_item }
250
+ when 5; Hash[Array.new(val) {[decode_item, decode_item]}]
251
+ end
252
+ end
253
+
254
+ def decode_item_final
255
+ val = decode_item
256
+ raise "extra bytes follow after a deserialized object" if @pos != @buffer.size
257
+ val
258
+ end
259
+
260
+ end
@@ -0,0 +1,111 @@
1
+ # -*- coding: utf-8 -*-
2
+ #
3
+ # 16-bit floating point values (IEEE 754 Half Precision) are not
4
+ # supported by #pack/#unpack in Ruby yet.
5
+ # This is a quick hack implementing en- and decoding them.
6
+ # (Since this is just a hack, the brief tests are in this file.)
7
+ #
8
+ # The encoder assumes that we already have a Single-Precision byte
9
+ # string (e.g., from pack("g")), and this is taken apart and
10
+ # reassembled.
11
+ # The decoder is free-standing (trivial).
12
+ #
13
+ # IEEE 754 can be found at:
14
+ # http://ieeexplore.ieee.org/stamp/stamp.jsp?tp=&arnumber=4610935
15
+
16
+ module Half
17
+ NAN_BYTES = "\x7e\x00"
18
+
19
+ def self.decode_from_bytes(hs)
20
+ b16, = hs.unpack("n")
21
+ self.decode(b16)
22
+ end
23
+ def self.decode(b16)
24
+ exp = b16 >> 10 & 0x1f
25
+ mant = b16 & 0x3ff
26
+ val =
27
+ if exp == 0
28
+ Math.ldexp(mant, -24)
29
+ elsif exp == 31
30
+ mant == 0 ? Float::INFINITY : Float::NAN
31
+ else
32
+ Math.ldexp(0x400 + mant, exp-25)
33
+ end
34
+ if b16[15] != 0
35
+ -val
36
+ else
37
+ val
38
+ end
39
+ end
40
+
41
+ def self.encode_from_single_bytes(ss) # single-precision string
42
+ b32, = ss.unpack("N")
43
+ s16 = b32 >> 16 & 0x8000
44
+ mant = b32 & 0x7fffff
45
+ exp = b32 >> 23 & 0xff
46
+ # puts "#{fv} #{s16} #{mant.to_s(16)} #{exp}"
47
+ if exp == 0
48
+ s16 if mant == 0 # 0.0, -0.0
49
+ elsif exp >= 103 && exp < 113 # denorm, exp16 = 0
50
+ s16 + ((mant + 0x800000) >> (126 - exp))
51
+ elsif exp >= 113 && exp <= 142 # normalized
52
+ s16 + ((exp - 112) << 10) + (mant >> 13)
53
+ elsif exp == 255 # Inf (handle NaN elsewhere!)
54
+ s16 + 0x7c00 if mant == 0 # +Inf/-Inf
55
+ end
56
+ end
57
+
58
+ def self.encode_from_single(fv, ss)
59
+ if e = self.encode_from_single_bytes(ss)
60
+ # p e.to_s(16)
61
+ hs = [e].pack("n")
62
+ hs if self.decode_from_bytes(hs) == fv
63
+ end
64
+ end
65
+
66
+ def self.encode(fv)
67
+ self.encode_from_single(fv, [fv].pack("g"))
68
+ end
69
+
70
+ end
71
+
72
+ (-24..15).each do |i|
73
+ f = Math.ldexp(1, i)
74
+ s = Half.encode(f)
75
+ fail i unless s
76
+ end
77
+ (-24..6).each do |i|
78
+ f = Math.ldexp(1023, i)
79
+ s = Half.encode(f)
80
+ fail i unless s
81
+ end
82
+
83
+ # p Half.decode("\x7b\xff") 65504.0
84
+
85
+ [ # go through Wikipedia samples
86
+ 0b0_01111_0000000000, 1.0,
87
+ 0b0_01111_0000000001, 1.0 + Math.ldexp(1, -10), # = 1 + 2−10 = 1.0009765625 (next biggest float after 1)
88
+ 0b1_10000_0000000000, -2.0,
89
+
90
+ 0b0_11110_1111111111, 65504.0, # (max half precision)
91
+
92
+ 0b0_00001_0000000000, Math.ldexp(1, -14), # ≈ 6.10352 × 10−5 (minimum positive normal)
93
+ 0b0_00000_1111111111, Math.ldexp(1, -14) - Math.ldexp(1, -24), # ≈ 6.09756 × 10−5 (maximum subnormal)
94
+ 0b0_00000_0000000001, Math.ldexp(1, -24), # ≈ 5.96046 × 10−8 (minimum positive subnormal)
95
+
96
+ 0b0_00000_0000000000, 0.0,
97
+ 0b1_00000_0000000000, -0.0,
98
+
99
+ 0b0_11111_0000000000, 1.0/0.0,
100
+ 0b1_11111_0000000000, -1.0/0.0,
101
+
102
+ 0b0_01101_0101010101, 0.333251953125 #... ≈ 1/3
103
+ ].each_slice(2) do |hv, expected|
104
+ fv = Half.decode(hv)
105
+ raise [hv, fv, expected].inspect unless fv == expected
106
+ end
107
+
108
+ # NaN cannot be compared, so this one needs to be special-cased:
109
+ raise "NaN not detected" unless Half.decode(0b0_11111_1000000000).nan?
110
+ raise "-NaN not detected" unless Half.decode(0b1_11111_1000000000).nan?
111
+ raise "NaN not detected" unless Half.decode_from_bytes(Half::NAN_BYTES).nan?