bert 1.1.2-java
Sign up to get free protection for your applications and to get access to all the features.
- data/.document +5 -0
- data/.gitignore +8 -0
- data/History.txt +31 -0
- data/LICENSE +20 -0
- data/README.md +77 -0
- data/Rakefile +96 -0
- data/VERSION +1 -0
- data/bench/bench.rb +36 -0
- data/bench/decode_bench.rb +87 -0
- data/bench/encode_bench.rb +36 -0
- data/bench/results.txt +55 -0
- data/bert.gemspec +72 -0
- data/ext/bert/c/decode.c +438 -0
- data/ext/bert/c/extconf.rb +11 -0
- data/lib/bert.rb +25 -0
- data/lib/bert/bert.rb +21 -0
- data/lib/bert/decode.rb +248 -0
- data/lib/bert/decoder.rb +11 -0
- data/lib/bert/encode.rb +138 -0
- data/lib/bert/encoder.rb +45 -0
- data/lib/bert/types.rb +21 -0
- data/test/bert_test.rb +69 -0
- data/test/decoder_test.rb +91 -0
- data/test/encoder_test.rb +92 -0
- data/test/test_helper.rb +10 -0
- metadata +92 -0
data/lib/bert.rb
ADDED
@@ -0,0 +1,25 @@
|
|
1
|
+
|
2
|
+
require 'stringio'
|
3
|
+
|
4
|
+
$:.unshift File.join(File.dirname(__FILE__), *%w[.. ext])
|
5
|
+
|
6
|
+
require 'bert/bert'
|
7
|
+
require 'bert/types'
|
8
|
+
|
9
|
+
begin
|
10
|
+
# try to load the C extension
|
11
|
+
require 'bert/c/decode'
|
12
|
+
rescue LoadError
|
13
|
+
# fall back on the pure ruby version
|
14
|
+
require 'bert/decode'
|
15
|
+
end
|
16
|
+
|
17
|
+
require 'bert/encode'
|
18
|
+
|
19
|
+
require 'bert/encoder'
|
20
|
+
require 'bert/decoder'
|
21
|
+
|
22
|
+
# Global method for specifying that an array should be encoded as a tuple.
|
23
|
+
def t
|
24
|
+
BERT::Tuple
|
25
|
+
end
|
data/lib/bert/bert.rb
ADDED
@@ -0,0 +1,21 @@
|
|
1
|
+
module BERT
|
2
|
+
def self.encode(ruby)
|
3
|
+
Encoder.encode(ruby)
|
4
|
+
end
|
5
|
+
|
6
|
+
def self.decode(bert)
|
7
|
+
Decoder.decode(bert)
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.ebin(str)
|
11
|
+
bytes = []
|
12
|
+
str.each_byte { |b| bytes << b.to_s }
|
13
|
+
"<<" + bytes.join(',') + ">>"
|
14
|
+
end
|
15
|
+
|
16
|
+
class Tuple < Array
|
17
|
+
def inspect
|
18
|
+
"t#{super}"
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
data/lib/bert/decode.rb
ADDED
@@ -0,0 +1,248 @@
|
|
1
|
+
module BERT
|
2
|
+
class Decode
|
3
|
+
attr_accessor :in
|
4
|
+
include Types
|
5
|
+
|
6
|
+
def self.impl
|
7
|
+
'Ruby'
|
8
|
+
end
|
9
|
+
|
10
|
+
def self.decode(string)
|
11
|
+
new(StringIO.new(string)).read_any
|
12
|
+
end
|
13
|
+
|
14
|
+
def initialize(ins)
|
15
|
+
@in = ins
|
16
|
+
@peeked = ""
|
17
|
+
end
|
18
|
+
|
19
|
+
def read_any
|
20
|
+
fail("Bad Magic") unless read_1 == MAGIC
|
21
|
+
read_any_raw
|
22
|
+
end
|
23
|
+
|
24
|
+
def read_any_raw
|
25
|
+
case peek_1
|
26
|
+
when ATOM then read_atom
|
27
|
+
when SMALL_INT then read_small_int
|
28
|
+
when INT then read_int
|
29
|
+
when SMALL_BIGNUM then read_small_bignum
|
30
|
+
when LARGE_BIGNUM then read_large_bignum
|
31
|
+
when FLOAT then read_float
|
32
|
+
when SMALL_TUPLE then read_small_tuple
|
33
|
+
when LARGE_TUPLE then read_large_tuple
|
34
|
+
when NIL then read_nil
|
35
|
+
when STRING then read_erl_string
|
36
|
+
when LIST then read_list
|
37
|
+
when BIN then read_bin
|
38
|
+
else
|
39
|
+
fail("Unknown term tag: #{peek_1}")
|
40
|
+
end
|
41
|
+
end
|
42
|
+
|
43
|
+
def read(length)
|
44
|
+
if length < @peeked.length
|
45
|
+
result = @peeked[0...length]
|
46
|
+
@peeked = @peeked[length..-1]
|
47
|
+
length = 0
|
48
|
+
else
|
49
|
+
result = @peeked
|
50
|
+
@peeked = ''
|
51
|
+
length -= result.length
|
52
|
+
end
|
53
|
+
|
54
|
+
if length > 0
|
55
|
+
result << @in.read(length)
|
56
|
+
end
|
57
|
+
result
|
58
|
+
end
|
59
|
+
|
60
|
+
def peek(length)
|
61
|
+
if length <= @peeked.length
|
62
|
+
@peeked[0...length]
|
63
|
+
else
|
64
|
+
read_bytes = @in.read(length - @peeked.length)
|
65
|
+
@peeked << read_bytes if read_bytes
|
66
|
+
@peeked
|
67
|
+
end
|
68
|
+
end
|
69
|
+
|
70
|
+
def peek_1
|
71
|
+
peek(1).unpack("C").first
|
72
|
+
end
|
73
|
+
|
74
|
+
def peek_2
|
75
|
+
peek(2).unpack("n").first
|
76
|
+
end
|
77
|
+
|
78
|
+
def read_1
|
79
|
+
read(1).unpack("C").first
|
80
|
+
end
|
81
|
+
|
82
|
+
def read_2
|
83
|
+
read(2).unpack("n").first
|
84
|
+
end
|
85
|
+
|
86
|
+
def read_4
|
87
|
+
read(4).unpack("N").first
|
88
|
+
end
|
89
|
+
|
90
|
+
def read_string(length)
|
91
|
+
read(length)
|
92
|
+
end
|
93
|
+
|
94
|
+
def read_atom
|
95
|
+
fail("Invalid Type, not an atom") unless read_1 == ATOM
|
96
|
+
length = read_2
|
97
|
+
a = read_string(length)
|
98
|
+
case a
|
99
|
+
when ""
|
100
|
+
Marshal.load("\004\b:\005") # Workaround for inability to do ''.to_sym
|
101
|
+
else
|
102
|
+
a.to_sym
|
103
|
+
end
|
104
|
+
end
|
105
|
+
|
106
|
+
def read_small_int
|
107
|
+
fail("Invalid Type, not a small int") unless read_1 == SMALL_INT
|
108
|
+
read_1
|
109
|
+
end
|
110
|
+
|
111
|
+
def read_int
|
112
|
+
fail("Invalid Type, not an int") unless read_1 == INT
|
113
|
+
value = read_4
|
114
|
+
negative = (value >> 31)[0] == 1
|
115
|
+
value = (value - (1 << 32)) if negative
|
116
|
+
value = Fixnum.induced_from(value)
|
117
|
+
end
|
118
|
+
|
119
|
+
def read_small_bignum
|
120
|
+
fail("Invalid Type, not a small bignum") unless read_1 == SMALL_BIGNUM
|
121
|
+
size = read_1
|
122
|
+
sign = read_1
|
123
|
+
bytes = read_string(size).unpack("C" * size)
|
124
|
+
added = bytes.zip((0..bytes.length).to_a).inject(0) do |result, byte_index|
|
125
|
+
byte, index = *byte_index
|
126
|
+
value = (byte * (256 ** index))
|
127
|
+
sign != 0 ? (result - value) : (result + value)
|
128
|
+
end
|
129
|
+
Bignum.induced_from(added)
|
130
|
+
end
|
131
|
+
|
132
|
+
def read_large_bignum
|
133
|
+
fail("Invalid Type, not a large bignum") unless read_1 == LARGE_BIGNUM
|
134
|
+
size = read_4
|
135
|
+
sign = read_1
|
136
|
+
bytes = read_string(size).unpack("C" * size)
|
137
|
+
added = bytes.zip((0..bytes.length).to_a).inject(0) do |result, byte_index|
|
138
|
+
byte, index = *byte_index
|
139
|
+
value = (byte * (256 ** index))
|
140
|
+
sign != 0 ? (result - value) : (result + value)
|
141
|
+
end
|
142
|
+
Bignum.induced_from(added)
|
143
|
+
end
|
144
|
+
|
145
|
+
def read_float
|
146
|
+
fail("Invalid Type, not a float") unless read_1 == FLOAT
|
147
|
+
string_value = read_string(31)
|
148
|
+
result = string_value.to_f
|
149
|
+
end
|
150
|
+
|
151
|
+
def read_small_tuple
|
152
|
+
fail("Invalid Type, not a small tuple") unless read_1 == SMALL_TUPLE
|
153
|
+
read_tuple(read_1)
|
154
|
+
end
|
155
|
+
|
156
|
+
def read_large_tuple
|
157
|
+
fail("Invalid Type, not a small tuple") unless read_1 == LARGE_TUPLE
|
158
|
+
read_tuple(read_4)
|
159
|
+
end
|
160
|
+
|
161
|
+
def read_tuple(arity)
|
162
|
+
if arity > 0
|
163
|
+
tag = read_any_raw
|
164
|
+
if tag == :bert
|
165
|
+
read_complex_type(arity)
|
166
|
+
else
|
167
|
+
tuple = Tuple.new(arity)
|
168
|
+
tuple[0] = tag
|
169
|
+
(arity - 1).times { |i| tuple[i + 1] = read_any_raw }
|
170
|
+
tuple
|
171
|
+
end
|
172
|
+
else
|
173
|
+
Tuple.new
|
174
|
+
end
|
175
|
+
end
|
176
|
+
|
177
|
+
def read_complex_type(arity)
|
178
|
+
case read_any_raw
|
179
|
+
when :nil
|
180
|
+
nil
|
181
|
+
when :true
|
182
|
+
true
|
183
|
+
when :false
|
184
|
+
false
|
185
|
+
when :time
|
186
|
+
Time.at(read_any_raw * 1_000_000 + read_any_raw, read_any_raw)
|
187
|
+
when :regex
|
188
|
+
source = read_any_raw
|
189
|
+
opts = read_any_raw
|
190
|
+
options = 0
|
191
|
+
options |= Regexp::EXTENDED if opts.include?(:extended)
|
192
|
+
options |= Regexp::IGNORECASE if opts.include?(:caseless)
|
193
|
+
options |= Regexp::MULTILINE if opts.include?(:multiline)
|
194
|
+
Regexp.new(source, options)
|
195
|
+
when :dict
|
196
|
+
read_dict
|
197
|
+
else
|
198
|
+
nil
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
def read_dict
|
203
|
+
type = read_1
|
204
|
+
fail("Invalid dict spec, not an erlang list") unless [LIST, NIL].include?(type)
|
205
|
+
if type == LIST
|
206
|
+
length = read_4
|
207
|
+
else
|
208
|
+
length = 0
|
209
|
+
end
|
210
|
+
hash = {}
|
211
|
+
length.times do |i|
|
212
|
+
pair = read_any_raw
|
213
|
+
hash[pair[0]] = pair[1]
|
214
|
+
end
|
215
|
+
read_1 if type == LIST
|
216
|
+
hash
|
217
|
+
end
|
218
|
+
|
219
|
+
def read_nil
|
220
|
+
fail("Invalid Type, not a nil list") unless read_1 == NIL
|
221
|
+
[]
|
222
|
+
end
|
223
|
+
|
224
|
+
def read_erl_string
|
225
|
+
fail("Invalid Type, not an erlang string") unless read_1 == STRING
|
226
|
+
length = read_2
|
227
|
+
read_string(length).unpack('C' * length)
|
228
|
+
end
|
229
|
+
|
230
|
+
def read_list
|
231
|
+
fail("Invalid Type, not an erlang list") unless read_1 == LIST
|
232
|
+
length = read_4
|
233
|
+
list = (0...length).map { |i| read_any_raw }
|
234
|
+
read_1
|
235
|
+
list
|
236
|
+
end
|
237
|
+
|
238
|
+
def read_bin
|
239
|
+
fail("Invalid Type, not an erlang binary") unless read_1 == BIN
|
240
|
+
length = read_4
|
241
|
+
read_string(length)
|
242
|
+
end
|
243
|
+
|
244
|
+
def fail(str)
|
245
|
+
raise str
|
246
|
+
end
|
247
|
+
end
|
248
|
+
end
|
data/lib/bert/decoder.rb
ADDED
data/lib/bert/encode.rb
ADDED
@@ -0,0 +1,138 @@
|
|
1
|
+
module BERT
|
2
|
+
class Encode
|
3
|
+
include Types
|
4
|
+
|
5
|
+
attr_accessor :out
|
6
|
+
|
7
|
+
def initialize(out)
|
8
|
+
self.out = out
|
9
|
+
end
|
10
|
+
|
11
|
+
def self.encode(data)
|
12
|
+
io = StringIO.new
|
13
|
+
self.new(io).write_any(data)
|
14
|
+
io.string
|
15
|
+
end
|
16
|
+
|
17
|
+
def write_any obj
|
18
|
+
write_1 MAGIC
|
19
|
+
write_any_raw obj
|
20
|
+
end
|
21
|
+
|
22
|
+
def write_any_raw obj
|
23
|
+
case obj
|
24
|
+
when Symbol then write_symbol(obj)
|
25
|
+
when Fixnum, Bignum then write_fixnum(obj)
|
26
|
+
when Float then write_float(obj)
|
27
|
+
when Tuple then write_tuple(obj)
|
28
|
+
when Array then write_list(obj)
|
29
|
+
when String then write_binary(obj)
|
30
|
+
else
|
31
|
+
fail(obj)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
def write_1(byte)
|
36
|
+
out.write([byte].pack("C"))
|
37
|
+
end
|
38
|
+
|
39
|
+
def write_2(short)
|
40
|
+
out.write([short].pack("n"))
|
41
|
+
end
|
42
|
+
|
43
|
+
def write_4(long)
|
44
|
+
out.write([long].pack("N"))
|
45
|
+
end
|
46
|
+
|
47
|
+
def write_string(string)
|
48
|
+
out.write(string)
|
49
|
+
end
|
50
|
+
|
51
|
+
def write_boolean(bool)
|
52
|
+
write_symbol(bool.to_s.to_sym)
|
53
|
+
end
|
54
|
+
|
55
|
+
def write_symbol(sym)
|
56
|
+
fail(sym) unless sym.is_a?(Symbol)
|
57
|
+
data = sym.to_s
|
58
|
+
write_1 ATOM
|
59
|
+
write_2 data.length
|
60
|
+
write_string data
|
61
|
+
end
|
62
|
+
|
63
|
+
def write_fixnum(num)
|
64
|
+
if num >= 0 && num < 256
|
65
|
+
write_1 SMALL_INT
|
66
|
+
write_1 num
|
67
|
+
elsif num <= MAX_INT && num >= MIN_INT
|
68
|
+
write_1 INT
|
69
|
+
write_4 num
|
70
|
+
else
|
71
|
+
write_bignum num
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
def write_float(float)
|
76
|
+
write_1 FLOAT
|
77
|
+
write_string format("%15.15e", float).ljust(31, "\000")
|
78
|
+
end
|
79
|
+
|
80
|
+
def write_bignum(num)
|
81
|
+
n = (num.to_s(2).size / 8.0).ceil
|
82
|
+
if n < 256
|
83
|
+
write_1 SMALL_BIGNUM
|
84
|
+
write_1 n
|
85
|
+
write_bignum_guts(num)
|
86
|
+
else
|
87
|
+
write_1 LARGE_BIGNUM
|
88
|
+
write_4 n
|
89
|
+
write_bignum_guts(num)
|
90
|
+
end
|
91
|
+
end
|
92
|
+
|
93
|
+
def write_bignum_guts(num)
|
94
|
+
write_1 (num >= 0 ? 0 : 1)
|
95
|
+
num = num.abs
|
96
|
+
while num != 0
|
97
|
+
rem = num % 256
|
98
|
+
write_1 rem
|
99
|
+
num = num >> 8
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def write_tuple(data)
|
104
|
+
fail(data) unless data.is_a? Array
|
105
|
+
|
106
|
+
if data.length < 256
|
107
|
+
write_1 SMALL_TUPLE
|
108
|
+
write_1 data.length
|
109
|
+
else
|
110
|
+
write_1 LARGE_TUPLE
|
111
|
+
write_4 data.length
|
112
|
+
end
|
113
|
+
|
114
|
+
data.each { |e| write_any_raw e }
|
115
|
+
end
|
116
|
+
|
117
|
+
def write_list(data)
|
118
|
+
fail(data) unless data.is_a? Array
|
119
|
+
write_1 NIL and return if data.empty?
|
120
|
+
write_1 LIST
|
121
|
+
write_4 data.length
|
122
|
+
data.each{|e| write_any_raw e }
|
123
|
+
write_1 NIL
|
124
|
+
end
|
125
|
+
|
126
|
+
def write_binary(data)
|
127
|
+
write_1 BIN
|
128
|
+
write_4 data.length
|
129
|
+
write_string data
|
130
|
+
end
|
131
|
+
|
132
|
+
private
|
133
|
+
|
134
|
+
def fail(obj)
|
135
|
+
raise "Cannot encode to erlang external format: #{obj.inspect}"
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|