bert 1.0.0 → 1.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/.gitignore +3 -0
- data/History.txt +9 -0
- data/README.md +8 -5
- data/Rakefile +34 -4
- data/VERSION +1 -1
- data/bench/bench.rb +36 -0
- data/bench/decode_bench.rb +87 -0
- data/bench/encode_bench.rb +36 -0
- data/bench/results.txt +55 -0
- data/bert.gemspec +14 -6
- data/ext/bert/c/decode.c +438 -0
- data/ext/bert/c/extconf.rb +11 -0
- data/lib/bert.rb +14 -23
- data/lib/bert/bert.rb +21 -0
- data/lib/bert/decode.rb +248 -0
- data/lib/bert/decoder.rb +1 -53
- data/lib/bert/encode.rb +142 -0
- data/lib/bert/encoder.rb +13 -14
- data/lib/bert/types.rb +21 -0
- data/test/decoder_test.rb +51 -24
- data/test/encoder_test.rb +10 -5
- data/test/test_helper.rb +1 -12
- metadata +15 -14
data/lib/bert.rb
CHANGED
@@ -1,33 +1,24 @@
|
|
1
1
|
require 'rubygems'
|
2
|
-
require 'erlectricity'
|
3
2
|
|
4
|
-
|
5
|
-
require 'bert/decoder'
|
3
|
+
$:.unshift File.join(File.dirname(__FILE__), *%w[.. ext])
|
6
4
|
|
7
|
-
|
8
|
-
|
9
|
-
Encoder.encode(ruby)
|
10
|
-
end
|
5
|
+
require 'bert/bert'
|
6
|
+
require 'bert/types'
|
11
7
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
str.each_byte { |b| bytes << b.to_s }
|
19
|
-
"<<" + bytes.join(',') + ">>"
|
20
|
-
end
|
8
|
+
begin
|
9
|
+
# try to load the C extension
|
10
|
+
require 'bert/c/decode'
|
11
|
+
rescue LoadError
|
12
|
+
# fall back on the pure ruby version
|
13
|
+
require 'bert/decode'
|
21
14
|
end
|
22
15
|
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
end
|
28
|
-
end
|
29
|
-
end
|
16
|
+
require 'bert/encode'
|
17
|
+
|
18
|
+
require 'bert/encoder'
|
19
|
+
require 'bert/decoder'
|
30
20
|
|
21
|
+
# Global method for specifying that an array should be encoded as a tuple.
|
31
22
|
def t
|
32
23
|
BERT::Tuple
|
33
24
|
end
|
data/lib/bert/bert.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module BERT
|
2
|
+
def self.encode(ruby)
|
3
|
+
Encoder.encode(ruby)
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.decode(bert)
|
7
|
+
Decoder.decode(bert)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.ebin(str)
|
11
|
+
bytes = []
|
12
|
+
str.each_byte { |b| bytes << b.to_s }
|
13
|
+
"<<" + bytes.join(',') + ">>"
|
14
|
+
end
|
15
|
+
|
16
|
+
class Tuple < Array
|
17
|
+
def inspect
|
18
|
+
"t#{super}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/bert/decode.rb
ADDED
@@ -0,0 +1,248 @@
|
|
1
|
+
module BERT
|
2
|
+
class Decode
|
3
|
+
attr_accessor :in
|
4
|
+
include Types
|
5
|
+
|
6
|
+
def self.impl
|
7
|
+
'Ruby'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.decode(string)
|
11
|
+
new(StringIO.new(string)).read_any
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(ins)
|
15
|
+
@in = ins
|
16
|
+
@peeked = ""
|
17
|
+
end
|
18
|
+
|
19
|
+
def read_any
|
20
|
+
fail("Bad Magic") unless read_1 == MAGIC
|
21
|
+
read_any_raw
|
22
|
+
end
|
23
|
+
|
24
|
+
def read_any_raw
|
25
|
+
case peek_1
|
26
|
+
when ATOM then read_atom
|
27
|
+
when SMALL_INT then read_small_int
|
28
|
+
when INT then read_int
|
29
|
+
when SMALL_BIGNUM then read_small_bignum
|
30
|
+
when LARGE_BIGNUM then read_large_bignum
|
31
|
+
when FLOAT then read_float
|
32
|
+
when SMALL_TUPLE then read_small_tuple
|
33
|
+
when LARGE_TUPLE then read_large_tuple
|
34
|
+
when NIL then read_nil
|
35
|
+
when STRING then read_erl_string
|
36
|
+
when LIST then read_list
|
37
|
+
when BIN then read_bin
|
38
|
+
else
|
39
|
+
fail("Unknown term tag: #{peek_1}")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def read(length)
|
44
|
+
if length < @peeked.length
|
45
|
+
result = @peeked[0...length]
|
46
|
+
@peeked = @peeked[length..-1]
|
47
|
+
length = 0
|
48
|
+
else
|
49
|
+
result = @peeked
|
50
|
+
@peeked = ''
|
51
|
+
length -= result.length
|
52
|
+
end
|
53
|
+
|
54
|
+
if length > 0
|
55
|
+
result << @in.read(length)
|
56
|
+
end
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
def peek(length)
|
61
|
+
if length <= @peeked.length
|
62
|
+
@peeked[0...length]
|
63
|
+
else
|
64
|
+
read_bytes = @in.read(length - @peeked.length)
|
65
|
+
@peeked << read_bytes if read_bytes
|
66
|
+
@peeked
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def peek_1
|
71
|
+
peek(1).unpack("C").first
|
72
|
+
end
|
73
|
+
|
74
|
+
def peek_2
|
75
|
+
peek(2).unpack("n").first
|
76
|
+
end
|
77
|
+
|
78
|
+
def read_1
|
79
|
+
read(1).unpack("C").first
|
80
|
+
end
|
81
|
+
|
82
|
+
def read_2
|
83
|
+
read(2).unpack("n").first
|
84
|
+
end
|
85
|
+
|
86
|
+
def read_4
|
87
|
+
read(4).unpack("N").first
|
88
|
+
end
|
89
|
+
|
90
|
+
def read_string(length)
|
91
|
+
read(length)
|
92
|
+
end
|
93
|
+
|
94
|
+
def read_atom
|
95
|
+
fail("Invalid Type, not an atom") unless read_1 == ATOM
|
96
|
+
length = read_2
|
97
|
+
a = read_string(length)
|
98
|
+
case a
|
99
|
+
when ""
|
100
|
+
Marshal.load("\004\b:\005") # Workaround for inability to do ''.to_sym
|
101
|
+
else
|
102
|
+
a.to_sym
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def read_small_int
|
107
|
+
fail("Invalid Type, not a small int") unless read_1 == SMALL_INT
|
108
|
+
read_1
|
109
|
+
end
|
110
|
+
|
111
|
+
def read_int
|
112
|
+
fail("Invalid Type, not an int") unless read_1 == INT
|
113
|
+
value = read_4
|
114
|
+
negative = (value >> 31)[0] == 1
|
115
|
+
value = (value - (1 << 32)) if negative
|
116
|
+
value = Fixnum.induced_from(value)
|
117
|
+
end
|
118
|
+
|
119
|
+
def read_small_bignum
|
120
|
+
fail("Invalid Type, not a small bignum") unless read_1 == SMALL_BIGNUM
|
121
|
+
size = read_1
|
122
|
+
sign = read_1
|
123
|
+
bytes = read_string(size).unpack("C" * size)
|
124
|
+
added = bytes.zip((0..bytes.length).to_a).inject(0) do |result, byte_index|
|
125
|
+
byte, index = *byte_index
|
126
|
+
value = (byte * (256 ** index))
|
127
|
+
sign != 0 ? (result - value) : (result + value)
|
128
|
+
end
|
129
|
+
Bignum.induced_from(added)
|
130
|
+
end
|
131
|
+
|
132
|
+
def read_large_bignum
|
133
|
+
fail("Invalid Type, not a large bignum") unless read_1 == LARGE_BIGNUM
|
134
|
+
size = read_4
|
135
|
+
sign = read_1
|
136
|
+
bytes = read_string(size).unpack("C" * size)
|
137
|
+
added = bytes.zip((0..bytes.length).to_a).inject(0) do |result, byte_index|
|
138
|
+
byte, index = *byte_index
|
139
|
+
value = (byte * (256 ** index))
|
140
|
+
sign != 0 ? (result - value) : (result + value)
|
141
|
+
end
|
142
|
+
Bignum.induced_from(added)
|
143
|
+
end
|
144
|
+
|
145
|
+
def read_float
|
146
|
+
fail("Invalid Type, not a float") unless read_1 == FLOAT
|
147
|
+
string_value = read_string(31)
|
148
|
+
result = string_value.to_f
|
149
|
+
end
|
150
|
+
|
151
|
+
def read_small_tuple
|
152
|
+
fail("Invalid Type, not a small tuple") unless read_1 == SMALL_TUPLE
|
153
|
+
read_tuple(read_1)
|
154
|
+
end
|
155
|
+
|
156
|
+
def read_large_tuple
|
157
|
+
fail("Invalid Type, not a small tuple") unless read_1 == LARGE_TUPLE
|
158
|
+
read_tuple(read_4)
|
159
|
+
end
|
160
|
+
|
161
|
+
def read_tuple(arity)
|
162
|
+
if arity > 0
|
163
|
+
tag = read_any_raw
|
164
|
+
if tag == :bert
|
165
|
+
read_complex_type(arity)
|
166
|
+
else
|
167
|
+
tuple = Tuple.new(arity)
|
168
|
+
tuple[0] = tag
|
169
|
+
(arity - 1).times { |i| tuple[i + 1] = read_any_raw }
|
170
|
+
tuple
|
171
|
+
end
|
172
|
+
else
|
173
|
+
Tuple.new
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def read_complex_type(arity)
|
178
|
+
case read_any_raw
|
179
|
+
when :nil
|
180
|
+
nil
|
181
|
+
when :true
|
182
|
+
true
|
183
|
+
when :false
|
184
|
+
false
|
185
|
+
when :time
|
186
|
+
Time.at(read_any_raw * 1_000_000 + read_any_raw, read_any_raw)
|
187
|
+
when :regex
|
188
|
+
source = read_any_raw
|
189
|
+
opts = read_any_raw
|
190
|
+
options = 0
|
191
|
+
options |= Regexp::EXTENDED if opts.include?(:extended)
|
192
|
+
options |= Regexp::IGNORECASE if opts.include?(:caseless)
|
193
|
+
options |= Regexp::MULTILINE if opts.include?(:multiline)
|
194
|
+
Regexp.new(source, options)
|
195
|
+
when :dict
|
196
|
+
read_dict
|
197
|
+
else
|
198
|
+
nil
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def read_dict
|
203
|
+
type = read_1
|
204
|
+
fail("Invalid dict spec, not an erlang list") unless [LIST, NIL].include?(type)
|
205
|
+
if type == LIST
|
206
|
+
length = read_4
|
207
|
+
else
|
208
|
+
length = 0
|
209
|
+
end
|
210
|
+
hash = {}
|
211
|
+
length.times do |i|
|
212
|
+
pair = read_any_raw
|
213
|
+
hash[pair[0]] = pair[1]
|
214
|
+
end
|
215
|
+
read_1 if type == LIST
|
216
|
+
hash
|
217
|
+
end
|
218
|
+
|
219
|
+
def read_nil
|
220
|
+
fail("Invalid Type, not a nil list") unless read_1 == NIL
|
221
|
+
[]
|
222
|
+
end
|
223
|
+
|
224
|
+
def read_erl_string
|
225
|
+
fail("Invalid Type, not an erlang string") unless read_1 == STRING
|
226
|
+
length = read_2
|
227
|
+
read_string(length).unpack('C' * length)
|
228
|
+
end
|
229
|
+
|
230
|
+
def read_list
|
231
|
+
fail("Invalid Type, not an erlang list") unless read_1 == LIST
|
232
|
+
length = read_4
|
233
|
+
list = (0...length).map { |i| read_any_raw }
|
234
|
+
read_1
|
235
|
+
list
|
236
|
+
end
|
237
|
+
|
238
|
+
def read_bin
|
239
|
+
fail("Invalid Type, not an erlang binary") unless read_1 == BIN
|
240
|
+
length = read_4
|
241
|
+
read_string(length)
|
242
|
+
end
|
243
|
+
|
244
|
+
def fail(str)
|
245
|
+
raise str
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
data/lib/bert/decoder.rb
CHANGED
@@ -5,59 +5,7 @@ module BERT
|
|
5
5
|
#
|
6
6
|
# Returns a Ruby object
|
7
7
|
def self.decode(bert)
|
8
|
-
|
9
|
-
convert(simple_ruby)
|
10
|
-
end
|
11
|
-
|
12
|
-
# Convert Erlectricity representation of BERT complex types into
|
13
|
-
# corresponding Ruby types.
|
14
|
-
# +item+ is the Ruby object to convert
|
15
|
-
#
|
16
|
-
# Returns the converted Ruby object
|
17
|
-
def self.convert(item)
|
18
|
-
case item
|
19
|
-
when TrueClass, FalseClass
|
20
|
-
item.to_s.to_sym
|
21
|
-
when Erl::List
|
22
|
-
item.map { |x| convert(x) }
|
23
|
-
when Array
|
24
|
-
if item[0] == :bert
|
25
|
-
convert_bert(item)
|
26
|
-
else
|
27
|
-
Tuple.new(item.map { |x| convert(x) })
|
28
|
-
end
|
29
|
-
else
|
30
|
-
item
|
31
|
-
end
|
32
|
-
end
|
33
|
-
|
34
|
-
# Convert complex types.
|
35
|
-
# +item+ is the complex type array
|
36
|
-
#
|
37
|
-
# Returns the converted Ruby object
|
38
|
-
def self.convert_bert(item)
|
39
|
-
case item[1]
|
40
|
-
when :nil
|
41
|
-
nil
|
42
|
-
when :dict
|
43
|
-
item[2].inject({}) do |acc, x|
|
44
|
-
acc[convert(x[0])] = convert(x[1]); acc
|
45
|
-
end
|
46
|
-
when TrueClass
|
47
|
-
true
|
48
|
-
when FalseClass
|
49
|
-
false
|
50
|
-
when :time
|
51
|
-
Time.at(item[2] * 1_000_000 + item[3], item[4])
|
52
|
-
when :regex
|
53
|
-
options = 0
|
54
|
-
options |= Regexp::EXTENDED if item[3].include?(:extended)
|
55
|
-
options |= Regexp::IGNORECASE if item[3].include?(:caseless)
|
56
|
-
options |= Regexp::MULTILINE if item[3].include?(:multiline)
|
57
|
-
Regexp.new(item[2], options)
|
58
|
-
else
|
59
|
-
nil
|
60
|
-
end
|
8
|
+
Decode.decode(bert)
|
61
9
|
end
|
62
10
|
end
|
63
11
|
end
|
data/lib/bert/encode.rb
ADDED
@@ -0,0 +1,142 @@
|
|
1
|
+
module BERT
|
2
|
+
class Encode
|
3
|
+
include Types
|
4
|
+
|
5
|
+
attr_accessor :out
|
6
|
+
|
7
|
+
def initialize(out)
|
8
|
+
self.out = out
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.encode(data)
|
12
|
+
io = StringIO.new
|
13
|
+
self.new(io).write_any(data)
|
14
|
+
io.string
|
15
|
+
end
|
16
|
+
|
17
|
+
def write_any obj
|
18
|
+
write_1 MAGIC
|
19
|
+
write_any_raw obj
|
20
|
+
end
|
21
|
+
|
22
|
+
def write_any_raw obj
|
23
|
+
case obj
|
24
|
+
when Symbol then write_symbol(obj)
|
25
|
+
when Fixnum, Bignum then write_fixnum(obj)
|
26
|
+
when Float then write_float(obj)
|
27
|
+
when Tuple then write_tuple(obj)
|
28
|
+
when Array then write_list(obj)
|
29
|
+
when String then write_binary(obj)
|
30
|
+
else
|
31
|
+
fail(obj)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def write_1(byte)
|
36
|
+
out.write([byte].pack("C"))
|
37
|
+
end
|
38
|
+
|
39
|
+
def write_2(short)
|
40
|
+
out.write([short].pack("n"))
|
41
|
+
end
|
42
|
+
|
43
|
+
def write_4(long)
|
44
|
+
out.write([long].pack("N"))
|
45
|
+
end
|
46
|
+
|
47
|
+
def write_string(string)
|
48
|
+
out.write(string)
|
49
|
+
end
|
50
|
+
|
51
|
+
def write_boolean(bool)
|
52
|
+
write_symbol(bool.to_s.to_sym)
|
53
|
+
end
|
54
|
+
|
55
|
+
def write_symbol(sym)
|
56
|
+
fail(sym) unless sym.is_a?(Symbol)
|
57
|
+
data = sym.to_s
|
58
|
+
write_1 ATOM
|
59
|
+
write_2 data.length
|
60
|
+
write_string data
|
61
|
+
end
|
62
|
+
|
63
|
+
def write_fixnum(num)
|
64
|
+
if num >= 0 && num < 256
|
65
|
+
write_1 SMALL_INT
|
66
|
+
write_1 num
|
67
|
+
elsif num <= MAX_INT && num >= MIN_INT
|
68
|
+
write_1 INT
|
69
|
+
write_4 num
|
70
|
+
else
|
71
|
+
write_bignum num
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def write_float(float)
|
76
|
+
write_1 FLOAT
|
77
|
+
write_string format("%15.15e", float).ljust(31, "\000")
|
78
|
+
end
|
79
|
+
|
80
|
+
def write_bignum(num)
|
81
|
+
if num.is_a?(Bignum)
|
82
|
+
n = num.size
|
83
|
+
else
|
84
|
+
n = (num.to_s(2).size / 8.0).ceil
|
85
|
+
end
|
86
|
+
if n <= 256
|
87
|
+
write_1 SMALL_BIGNUM
|
88
|
+
write_1 n
|
89
|
+
write_bignum_guts(num)
|
90
|
+
else
|
91
|
+
write_1 LARGE_BIGNUM
|
92
|
+
write_4 n
|
93
|
+
write_bignum_guts(num)
|
94
|
+
end
|
95
|
+
end
|
96
|
+
|
97
|
+
def write_bignum_guts(num)
|
98
|
+
write_1 (num >= 0 ? 0 : 1)
|
99
|
+
num = num.abs
|
100
|
+
while num != 0
|
101
|
+
rem = num % 256
|
102
|
+
write_1 rem
|
103
|
+
num = num >> 8
|
104
|
+
end
|
105
|
+
end
|
106
|
+
|
107
|
+
def write_tuple(data)
|
108
|
+
fail(data) unless data.is_a? Array
|
109
|
+
|
110
|
+
if data.length < 256
|
111
|
+
write_1 SMALL_TUPLE
|
112
|
+
write_1 data.length
|
113
|
+
else
|
114
|
+
write_1 LARGE_TUPLE
|
115
|
+
write_4 data.length
|
116
|
+
end
|
117
|
+
|
118
|
+
data.each { |e| write_any_raw e }
|
119
|
+
end
|
120
|
+
|
121
|
+
def write_list(data)
|
122
|
+
fail(data) unless data.is_a? Array
|
123
|
+
write_1 NIL and return if data.empty?
|
124
|
+
write_1 LIST
|
125
|
+
write_4 data.length
|
126
|
+
data.each{|e| write_any_raw e }
|
127
|
+
write_1 NIL
|
128
|
+
end
|
129
|
+
|
130
|
+
def write_binary(data)
|
131
|
+
write_1 BIN
|
132
|
+
write_4 data.length
|
133
|
+
write_string data
|
134
|
+
end
|
135
|
+
|
136
|
+
private
|
137
|
+
|
138
|
+
def fail(obj)
|
139
|
+
raise "Cannot encode to erlang external format: #{obj.inspect}"
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|