bert 1.1.2 → 1.1.6
Sign up to get free protection for your applications and to get access to all the features.
- data/History.txt +11 -1
- data/README.md +3 -3
- data/Rakefile +8 -3
- data/VERSION +1 -1
- data/bert.gemspec +1 -1
- data/ext/bert/c/decode.c +387 -329
- data/ext/bert/c/extconf.rb +3 -0
- data/lib/bert/decode.rb +6 -4
- data/lib/bert/encode.rb +4 -3
- data/test/bert_test.rb +2 -1
- data/test/encoder_test.rb +15 -0
- metadata +24 -8
data/History.txt
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
= 1.1.6 / 2012-05-25
|
2
|
+
* Bug fixes
|
3
|
+
* Better handling of utf-8 characters
|
4
|
+
|
5
|
+
= 1.1.5 / 2011-12-09
|
6
|
+
* Bug fixes
|
7
|
+
* Faster and more secure C BERT decoder
|
8
|
+
* Fix for encoding of negative bignums
|
9
|
+
* Ruby 1.9 compatibility
|
10
|
+
|
1
11
|
= 1.1.2 / 2010-02-08
|
2
12
|
* Bug fixes
|
3
13
|
* Fix bignum handling on 256 byte boundary
|
@@ -28,4 +38,4 @@
|
|
28
38
|
* Add roundtrip tests
|
29
39
|
|
30
40
|
= 0.1.0 / 2009-10-08
|
31
|
-
* Birthday!
|
41
|
+
* Birthday!
|
data/README.md
CHANGED
@@ -49,9 +49,9 @@ Usage
|
|
49
49
|
require 'bert'
|
50
50
|
|
51
51
|
bert = BERT.encode(t[:user, {:name => 'TPW', :nick => 'mojombo'}])
|
52
|
-
# => "\203h\002d\000\004userh\
|
53
|
-
\000\004namem\000\000\000\003TPWh\002d\000\004nickm\
|
54
|
-
\amojomboj"
|
52
|
+
# => "\203h\002d\000\004userh\003d\000\004bertd\000\004dictl\000\000\
|
53
|
+
000\002h\002d\000\004namem\000\000\000\003TPWh\002d\000\004nickm\
|
54
|
+
000\000\000\amojomboj"
|
55
55
|
|
56
56
|
BERT.decode(bert)
|
57
57
|
# => t[:user, {:name=>"TPW", :nick=>"mojombo"}]
|
data/Rakefile
CHANGED
@@ -11,9 +11,14 @@ begin
|
|
11
11
|
gem.homepage = "http://github.com/mojombo/bert"
|
12
12
|
gem.authors = ["Tom Preston-Werner"]
|
13
13
|
gem.add_development_dependency("thoughtbot-shoulda")
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
if ENV["JAVA"]
|
15
|
+
gem.extensions = nil
|
16
|
+
gem.platform = 'java'
|
17
|
+
else
|
18
|
+
gem.require_paths = ["lib", "ext"]
|
19
|
+
gem.files.include("ext")
|
20
|
+
gem.extensions << 'ext/bert/c/extconf.rb'
|
21
|
+
end
|
17
22
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
23
|
end
|
19
24
|
rescue LoadError
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1.
|
1
|
+
1.1.6
|
data/bert.gemspec
CHANGED
data/ext/bert/c/decode.c
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
-
#include <
|
2
|
+
#include <stdint.h>
|
3
|
+
#include <netinet/in.h>
|
3
4
|
|
4
|
-
#define ERL_VERSION 131
|
5
5
|
#define ERL_SMALL_INT 97
|
6
6
|
#define ERL_INT 98
|
7
|
-
#define ERL_SMALL_BIGNUM 110
|
8
|
-
#define ERL_LARGE_BIGNUM 111
|
9
7
|
#define ERL_FLOAT 99
|
10
8
|
#define ERL_ATOM 100
|
11
9
|
#define ERL_SMALL_TUPLE 104
|
@@ -14,425 +12,485 @@
|
|
14
12
|
#define ERL_STRING 107
|
15
13
|
#define ERL_LIST 108
|
16
14
|
#define ERL_BIN 109
|
15
|
+
#define ERL_SMALL_BIGNUM 110
|
16
|
+
#define ERL_LARGE_BIGNUM 111
|
17
|
+
#define ERL_VERSION 131
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM)
|
20
|
+
#define BERT_TYPE_OFFSET (ERL_SMALL_INT)
|
21
|
+
|
22
|
+
static VALUE rb_mBERT;
|
23
|
+
static VALUE rb_cDecode;
|
24
|
+
static VALUE rb_cTuple;
|
25
|
+
|
26
|
+
struct bert_buf {
|
27
|
+
const uint8_t *data;
|
28
|
+
const uint8_t *end;
|
29
|
+
};
|
30
|
+
|
31
|
+
static VALUE bert_read_invalid(struct bert_buf *buf);
|
32
|
+
|
33
|
+
static VALUE bert_read_sint(struct bert_buf *buf);
|
34
|
+
static VALUE bert_read_int(struct bert_buf *buf);
|
35
|
+
static VALUE bert_read_float(struct bert_buf *buf);
|
36
|
+
static VALUE bert_read_atom(struct bert_buf *buf);
|
37
|
+
static VALUE bert_read_stuple(struct bert_buf *buf);
|
38
|
+
static VALUE bert_read_ltuple(struct bert_buf *buf);
|
39
|
+
static VALUE bert_read_nil(struct bert_buf *buf);
|
40
|
+
static VALUE bert_read_string(struct bert_buf *buf);
|
41
|
+
static VALUE bert_read_list(struct bert_buf *buf);
|
42
|
+
static VALUE bert_read_bin(struct bert_buf *buf);
|
43
|
+
static VALUE bert_read_sbignum(struct bert_buf *buf);
|
44
|
+
static VALUE bert_read_lbignum(struct bert_buf *buf);
|
45
|
+
|
46
|
+
typedef VALUE (*bert_ptr)(struct bert_buf *buf);
|
47
|
+
static bert_ptr bert_callbacks[] = {
|
48
|
+
&bert_read_sint,
|
49
|
+
&bert_read_int,
|
50
|
+
&bert_read_float,
|
51
|
+
&bert_read_atom,
|
52
|
+
&bert_read_invalid,
|
53
|
+
&bert_read_invalid,
|
54
|
+
&bert_read_invalid,
|
55
|
+
&bert_read_stuple,
|
56
|
+
&bert_read_ltuple,
|
57
|
+
&bert_read_nil,
|
58
|
+
&bert_read_string,
|
59
|
+
&bert_read_list,
|
60
|
+
&bert_read_bin,
|
61
|
+
&bert_read_sbignum,
|
62
|
+
&bert_read_lbignum
|
63
|
+
};
|
64
|
+
|
65
|
+
static inline uint8_t bert_buf_read8(struct bert_buf *buf)
|
66
|
+
{
|
67
|
+
return *buf->data++;
|
31
68
|
}
|
32
69
|
|
33
|
-
|
70
|
+
static inline uint16_t bert_buf_read16(struct bert_buf *buf)
|
71
|
+
{
|
72
|
+
/* Note that this will trigger -Wcast-align and throw a
|
73
|
+
* bus error on platforms where unaligned reads are not
|
74
|
+
* allowed. Also note that this is not breaking any
|
75
|
+
* strict aliasing rules. */
|
76
|
+
uint16_t short_val = *(uint16_t *)buf->data;
|
77
|
+
buf->data += sizeof(uint16_t);
|
78
|
+
return ntohs(short_val);
|
79
|
+
}
|
34
80
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
81
|
+
static inline uint32_t bert_buf_read32(struct bert_buf *buf)
|
82
|
+
{
|
83
|
+
/* Note that this will trigger -Wcast-align and throw a
|
84
|
+
* bus error on platforms where unaligned reads are not
|
85
|
+
* allowed. Also note that this is not breaking any
|
86
|
+
* strict aliasing rules. */
|
87
|
+
uint32_t long_val = *(uint32_t *)buf->data;
|
88
|
+
buf->data += sizeof(uint32_t);
|
89
|
+
return ntohl(long_val);
|
39
90
|
}
|
40
91
|
|
41
|
-
void
|
42
|
-
|
92
|
+
static inline void bert_buf_ensure(struct bert_buf *buf, size_t size)
|
93
|
+
{
|
94
|
+
if (buf->data + size > buf->end)
|
95
|
+
rb_raise(rb_eEOFError, "Unexpected end of BERT stream");
|
43
96
|
}
|
44
97
|
|
45
|
-
|
98
|
+
static VALUE bert_read(struct bert_buf *buf)
|
99
|
+
{
|
100
|
+
uint8_t type;
|
46
101
|
|
47
|
-
|
48
|
-
|
49
|
-
}
|
102
|
+
bert_buf_ensure(buf, 1);
|
103
|
+
type = bert_buf_read8(buf);
|
50
104
|
|
51
|
-
|
52
|
-
|
53
|
-
}
|
105
|
+
if (!BERT_VALID_TYPE(type))
|
106
|
+
rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type);
|
54
107
|
|
55
|
-
|
56
|
-
return (unsigned int) ((**pData << 24) + (*(*pData + 1) << 16) + (*(*pData + 2) << 8) + *(*pData + 3));
|
108
|
+
return bert_callbacks[type - BERT_TYPE_OFFSET](buf);
|
57
109
|
}
|
58
110
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
111
|
+
static VALUE bert_read_dict(struct bert_buf *buf)
|
112
|
+
{
|
113
|
+
uint8_t type;
|
114
|
+
uint32_t length = 0, i;
|
115
|
+
VALUE rb_dict;
|
64
116
|
|
65
|
-
|
66
|
-
|
67
|
-
*pData += 2;
|
68
|
-
return val;
|
69
|
-
}
|
117
|
+
bert_buf_ensure(buf, 1);
|
118
|
+
type = bert_buf_read8(buf);
|
70
119
|
|
71
|
-
|
72
|
-
|
73
|
-
*pData += 4;
|
74
|
-
return val;
|
75
|
-
}
|
120
|
+
if (type != ERL_LIST && type != ERL_NIL)
|
121
|
+
rb_raise(rb_eTypeError, "Invalid dict spec, not an erlang list");
|
76
122
|
|
77
|
-
|
123
|
+
if (type == ERL_LIST) {
|
124
|
+
bert_buf_ensure(buf, 4);
|
125
|
+
length = bert_buf_read32(buf);
|
126
|
+
}
|
78
127
|
|
79
|
-
|
128
|
+
rb_dict = rb_hash_new();
|
80
129
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
}
|
130
|
+
for (i = 0; i < length; ++i) {
|
131
|
+
VALUE key, val;
|
132
|
+
bert_buf_ensure(buf, 2);
|
85
133
|
|
86
|
-
|
134
|
+
if (bert_buf_read8(buf) != ERL_SMALL_TUPLE || bert_buf_read8(buf) != 2)
|
135
|
+
rb_raise(rb_eTypeError, "Invalid dict tuple");
|
87
136
|
|
88
|
-
|
89
|
-
|
90
|
-
}
|
137
|
+
key = bert_read(buf);
|
138
|
+
val = bert_read(buf);
|
91
139
|
|
92
|
-
|
93
|
-
}
|
140
|
+
rb_hash_aset(rb_dict, key, val);
|
141
|
+
}
|
94
142
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
unsigned int length = 0;
|
102
|
-
if(type == ERL_LIST) {
|
103
|
-
length = read_4(pData);
|
104
|
-
}
|
105
|
-
|
106
|
-
VALUE cHash = rb_const_get(rb_cObject, rb_intern("Hash"));
|
107
|
-
VALUE hash = rb_funcall(cHash, rb_intern("new"), 0);
|
108
|
-
|
109
|
-
int i;
|
110
|
-
for(i = 0; i < length; ++i) {
|
111
|
-
VALUE pair = read_dict_pair(pData);
|
112
|
-
VALUE first = rb_ary_entry(pair, 0);
|
113
|
-
VALUE last = rb_ary_entry(pair, 1);
|
114
|
-
rb_funcall(hash, rb_intern("store"), 2, first, last);
|
115
|
-
}
|
116
|
-
|
117
|
-
if(type == ERL_LIST) {
|
118
|
-
read_1(pData);
|
119
|
-
}
|
120
|
-
|
121
|
-
return hash;
|
122
|
-
}
|
143
|
+
if (type == ERL_LIST) {
|
144
|
+
/* disregard tail; adquire women */
|
145
|
+
bert_buf_ensure(buf, 1);
|
146
|
+
(void)bert_buf_read8(buf);
|
147
|
+
}
|
123
148
|
|
124
|
-
|
125
|
-
VALUE type = read_any_raw(pData);
|
126
|
-
ID id = SYM2ID(type);
|
127
|
-
if(id == rb_intern("nil")) {
|
128
|
-
return Qnil;
|
129
|
-
} else if(id == rb_intern("true")) {
|
130
|
-
return Qtrue;
|
131
|
-
} else if(id == rb_intern("false")) {
|
132
|
-
return Qfalse;
|
133
|
-
} else if(id == rb_intern("time")) {
|
134
|
-
VALUE megasecs = read_any_raw(pData);
|
135
|
-
VALUE msecs = rb_funcall(megasecs, rb_intern("*"), 1, INT2NUM(1000000));
|
136
|
-
VALUE secs = read_any_raw(pData);
|
137
|
-
VALUE microsecs = read_any_raw(pData);
|
138
|
-
VALUE stamp = rb_funcall(msecs, rb_intern("+"), 1, secs);
|
139
|
-
return rb_funcall(rb_cTime, rb_intern("at"), 2, stamp, microsecs);
|
140
|
-
} else if(id == rb_intern("regex")) {
|
141
|
-
VALUE source = read_any_raw(pData);
|
142
|
-
VALUE opts = read_any_raw(pData);
|
143
|
-
int flags = 0;
|
144
|
-
if(rb_ary_includes(opts, ID2SYM(rb_intern("caseless"))))
|
145
|
-
flags = flags | 1;
|
146
|
-
if(rb_ary_includes(opts, ID2SYM(rb_intern("extended"))))
|
147
|
-
flags = flags | 2;
|
148
|
-
if(rb_ary_includes(opts, ID2SYM(rb_intern("multiline"))))
|
149
|
-
flags = flags | 4;
|
150
|
-
return rb_funcall(rb_cRegexp, rb_intern("new"), 2, source, INT2NUM(flags));
|
151
|
-
} else if(id == rb_intern("dict")) {
|
152
|
-
return read_dict(pData);
|
153
|
-
} else {
|
154
|
-
return Qnil;
|
155
|
-
}
|
149
|
+
return rb_dict;
|
156
150
|
}
|
157
151
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
return read_complex_type(pData, arity);
|
163
|
-
} else {
|
164
|
-
VALUE tuple = rb_funcall(cTuple, rb_intern("new"), 1, INT2NUM(arity));
|
165
|
-
rb_ary_store(tuple, 0, tag);
|
166
|
-
int i;
|
167
|
-
for(i = 1; i < arity; ++i) {
|
168
|
-
rb_ary_store(tuple, i, read_any_raw(pData));
|
169
|
-
}
|
170
|
-
return tuple;
|
171
|
-
}
|
172
|
-
} else {
|
173
|
-
return rb_funcall(cTuple, rb_intern("new"), 0);
|
174
|
-
}
|
152
|
+
static inline void bert_ensure_arity(uint32_t arity, uint32_t expected)
|
153
|
+
{
|
154
|
+
if (arity != expected)
|
155
|
+
rb_raise(rb_eTypeError, "Invalid tuple arity for complex type");
|
175
156
|
}
|
176
157
|
|
177
|
-
VALUE
|
178
|
-
|
179
|
-
|
180
|
-
|
158
|
+
static VALUE bert_read_complex(struct bert_buf *buf, uint32_t arity)
|
159
|
+
{
|
160
|
+
VALUE rb_type;
|
161
|
+
ID id_type;
|
181
162
|
|
182
|
-
|
183
|
-
|
184
|
-
}
|
163
|
+
rb_type = bert_read(buf);
|
164
|
+
Check_Type(rb_type, T_SYMBOL);
|
185
165
|
|
186
|
-
|
187
|
-
if(read_1(pData) != ERL_LARGE_TUPLE) {
|
188
|
-
rb_raise(rb_eStandardError, "Invalid Type, not a large tuple");
|
189
|
-
}
|
166
|
+
id_type = SYM2ID(rb_type);
|
190
167
|
|
191
|
-
|
192
|
-
|
193
|
-
|
168
|
+
if (id_type == rb_intern("nil")) {
|
169
|
+
bert_ensure_arity(arity, 2);
|
170
|
+
return Qnil;
|
194
171
|
|
195
|
-
|
172
|
+
} else if (id_type == rb_intern("true")) {
|
173
|
+
bert_ensure_arity(arity, 2);
|
174
|
+
return Qtrue;
|
196
175
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
}
|
176
|
+
} else if (id_type == rb_intern("false")) {
|
177
|
+
bert_ensure_arity(arity, 2);
|
178
|
+
return Qfalse;
|
201
179
|
|
202
|
-
|
180
|
+
} else if (id_type == rb_intern("time")) {
|
181
|
+
VALUE rb_megasecs, rb_secs, rb_microsecs, rb_stamp, rb_msecs;
|
203
182
|
|
204
|
-
|
183
|
+
bert_ensure_arity(arity, 5);
|
205
184
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
}
|
185
|
+
rb_megasecs = bert_read(buf);
|
186
|
+
rb_secs = bert_read(buf);
|
187
|
+
rb_microsecs = bert_read(buf);
|
210
188
|
|
211
|
-
|
189
|
+
rb_msecs = rb_funcall(rb_megasecs, rb_intern("*"), 1, INT2NUM(1000000));
|
190
|
+
rb_stamp = rb_funcall(rb_msecs, rb_intern("+"), 1, rb_secs);
|
212
191
|
|
213
|
-
|
214
|
-
}
|
192
|
+
return rb_funcall(rb_cTime, rb_intern("at"), 2, rb_stamp, rb_microsecs);
|
215
193
|
|
216
|
-
|
194
|
+
} else if (id_type == rb_intern("regex")) {
|
195
|
+
VALUE rb_source, rb_opts;
|
196
|
+
int flags = 0;
|
197
|
+
|
198
|
+
bert_ensure_arity(arity, 4);
|
217
199
|
|
218
|
-
|
219
|
-
|
220
|
-
*(dest + length) = (unsigned char) 0;
|
221
|
-
*pData += length;
|
222
|
-
}
|
223
|
-
|
224
|
-
VALUE read_bin(unsigned char **pData) {
|
225
|
-
if(read_1(pData) != ERL_BIN) {
|
226
|
-
rb_raise(rb_eStandardError, "Invalid Type, not an erlang binary");
|
227
|
-
}
|
200
|
+
rb_source = bert_read(buf);
|
201
|
+
rb_opts = bert_read(buf);
|
228
202
|
|
229
|
-
|
203
|
+
Check_Type(rb_source, T_STRING);
|
204
|
+
Check_Type(rb_opts, T_ARRAY);
|
230
205
|
|
231
|
-
|
232
|
-
|
206
|
+
if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("caseless"))))
|
207
|
+
flags = flags | 1;
|
233
208
|
|
234
|
-
|
235
|
-
|
209
|
+
if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("extended"))))
|
210
|
+
flags = flags | 2;
|
236
211
|
|
237
|
-
|
238
|
-
|
239
|
-
rb_raise(rb_eStandardError, "Invalid Type, not an erlang string");
|
240
|
-
}
|
212
|
+
if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("multiline"))))
|
213
|
+
flags = flags | 4;
|
241
214
|
|
242
|
-
|
243
|
-
VALUE array = rb_ary_new2(length);
|
215
|
+
return rb_funcall(rb_cRegexp, rb_intern("new"), 2, rb_source, INT2NUM(flags));
|
244
216
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
}
|
217
|
+
} else if (id_type == rb_intern("dict")) {
|
218
|
+
bert_ensure_arity(arity, 3);
|
219
|
+
return bert_read_dict(buf);
|
220
|
+
}
|
250
221
|
|
251
|
-
|
222
|
+
rb_raise(rb_eTypeError, "Invalid tag for complex value");
|
223
|
+
return Qnil;
|
252
224
|
}
|
253
225
|
|
254
|
-
VALUE
|
255
|
-
|
256
|
-
|
257
|
-
|
226
|
+
static VALUE bert_read_tuple(struct bert_buf *buf, uint32_t arity)
|
227
|
+
{
|
228
|
+
if (arity > 0) {
|
229
|
+
VALUE rb_tag = bert_read(buf);
|
258
230
|
|
259
|
-
|
231
|
+
if (TYPE(rb_tag) == T_SYMBOL && SYM2ID(rb_tag) == rb_intern("bert")) {
|
232
|
+
return bert_read_complex(buf, arity);
|
233
|
+
} else {
|
234
|
+
uint32_t i;
|
235
|
+
VALUE rb_tuple;
|
260
236
|
|
261
|
-
|
262
|
-
|
237
|
+
rb_tuple = rb_funcall(rb_cTuple, rb_intern("new"), 1, INT2NUM(arity));
|
238
|
+
rb_ary_store(rb_tuple, 0, rb_tag);
|
263
239
|
|
264
|
-
|
265
|
-
|
240
|
+
for(i = 1; i < arity; ++i)
|
241
|
+
rb_ary_store(rb_tuple, i, bert_read(buf));
|
242
|
+
|
243
|
+
return rb_tuple;
|
244
|
+
}
|
245
|
+
}
|
266
246
|
|
267
|
-
|
268
|
-
|
269
|
-
rb_raise(rb_eStandardError, "Invalid Type, not a small int");
|
270
|
-
}
|
247
|
+
return rb_funcall(rb_cTuple, rb_intern("new"), 0);
|
248
|
+
}
|
271
249
|
|
272
|
-
|
250
|
+
static VALUE bert_read_stuple(struct bert_buf *buf)
|
251
|
+
{
|
252
|
+
bert_buf_ensure(buf, 1);
|
253
|
+
return bert_read_tuple(buf, bert_buf_read8(buf));
|
254
|
+
}
|
273
255
|
|
274
|
-
|
256
|
+
static VALUE bert_read_ltuple(struct bert_buf *buf)
|
257
|
+
{
|
258
|
+
bert_buf_ensure(buf, 4);
|
259
|
+
return bert_read_tuple(buf, bert_buf_read32(buf));
|
275
260
|
}
|
276
261
|
|
277
|
-
VALUE
|
278
|
-
|
279
|
-
|
280
|
-
|
262
|
+
static VALUE bert_read_list(struct bert_buf *buf)
|
263
|
+
{
|
264
|
+
uint32_t i, length;
|
265
|
+
VALUE rb_list;
|
281
266
|
|
282
|
-
|
267
|
+
bert_buf_ensure(buf, 4);
|
268
|
+
length = bert_buf_read32(buf);
|
269
|
+
rb_list = rb_ary_new2(length);
|
283
270
|
|
284
|
-
|
271
|
+
for(i = 0; i < length; ++i)
|
272
|
+
rb_ary_store(rb_list, i, bert_read(buf));
|
285
273
|
|
286
|
-
|
287
|
-
|
288
|
-
|
274
|
+
/* disregard tail; adquire currency */
|
275
|
+
bert_buf_ensure(buf, 1);
|
276
|
+
(void)bert_buf_read8(buf);
|
289
277
|
|
290
|
-
|
278
|
+
return rb_list;
|
291
279
|
}
|
292
280
|
|
293
|
-
VALUE
|
294
|
-
|
295
|
-
|
296
|
-
|
281
|
+
static VALUE bert_read_bin(struct bert_buf *buf)
|
282
|
+
{
|
283
|
+
uint32_t length;
|
284
|
+
VALUE rb_bin;
|
285
|
+
|
286
|
+
bert_buf_ensure(buf, 4);
|
287
|
+
length = bert_buf_read32(buf);
|
297
288
|
|
298
|
-
|
299
|
-
|
289
|
+
bert_buf_ensure(buf, length);
|
290
|
+
rb_bin = rb_str_new((char *)buf->data, length);
|
291
|
+
buf->data += length;
|
292
|
+
|
293
|
+
return rb_bin;
|
294
|
+
}
|
300
295
|
|
301
|
-
|
302
|
-
|
296
|
+
static VALUE bert_read_string(struct bert_buf *buf)
|
297
|
+
{
|
298
|
+
uint16_t i, length;
|
299
|
+
VALUE rb_string;
|
303
300
|
|
304
|
-
|
305
|
-
|
301
|
+
bert_buf_ensure(buf, 2);
|
302
|
+
length = bert_buf_read16(buf);
|
306
303
|
|
307
|
-
|
308
|
-
|
309
|
-
tmp = INT2FIX(*(buf + i));
|
310
|
-
tmp = rb_funcall(tmp, rb_intern("<<"), 1, INT2NUM(i * 8));
|
311
|
-
num = rb_funcall(num, rb_intern("+"), 1, tmp);
|
312
|
-
}
|
304
|
+
bert_buf_ensure(buf, length);
|
305
|
+
rb_string = rb_ary_new2(length);
|
313
306
|
|
314
|
-
|
315
|
-
|
316
|
-
}
|
307
|
+
for (i = 0; i < length; ++i)
|
308
|
+
rb_ary_store(rb_string, i, INT2FIX(buf->data[i]));
|
317
309
|
|
318
|
-
|
310
|
+
buf->data += length;
|
311
|
+
return rb_string;
|
319
312
|
}
|
320
313
|
|
321
|
-
VALUE
|
322
|
-
|
323
|
-
|
324
|
-
|
314
|
+
static VALUE bert_read_atom(struct bert_buf *buf)
|
315
|
+
{
|
316
|
+
VALUE rb_atom;
|
317
|
+
uint32_t atom_len;
|
325
318
|
|
326
|
-
|
327
|
-
|
319
|
+
bert_buf_ensure(buf, 2);
|
320
|
+
atom_len = bert_buf_read16(buf);
|
328
321
|
|
329
|
-
|
330
|
-
|
322
|
+
/* Instead of trying to build the symbol
|
323
|
+
* from here, just create a Ruby string
|
324
|
+
* and internalize it. this will be faster for
|
325
|
+
* unique symbols */
|
326
|
+
bert_buf_ensure(buf, atom_len);
|
327
|
+
rb_atom = rb_str_new((char *)buf->data, atom_len);
|
328
|
+
buf->data += atom_len;
|
331
329
|
|
332
|
-
|
333
|
-
|
330
|
+
return rb_str_intern(rb_atom);
|
331
|
+
}
|
332
|
+
|
333
|
+
static VALUE bert_read_sint(struct bert_buf *buf)
|
334
|
+
{
|
335
|
+
bert_buf_ensure(buf, 1);
|
336
|
+
return INT2FIX((uint8_t)bert_buf_read8(buf));
|
337
|
+
}
|
338
|
+
|
339
|
+
static VALUE bert_read_int(struct bert_buf *buf)
|
340
|
+
{
|
341
|
+
bert_buf_ensure(buf, 4);
|
342
|
+
return LONG2NUM((int32_t)bert_buf_read32(buf));
|
343
|
+
}
|
334
344
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
345
|
+
static VALUE bert_buf_tobignum(struct bert_buf *buf, uint8_t sign, uint32_t bin_digits)
|
346
|
+
{
|
347
|
+
#ifdef BERT_FAST_BIGNUM
|
348
|
+
uint32_t *bin_buf = NULL;
|
349
|
+
VALUE rb_num;
|
350
|
+
uint32_t round_size;
|
351
|
+
|
352
|
+
bert_buf_ensure(buf, bin_digits);
|
353
|
+
|
354
|
+
/* Hack: ensure that we have at least a full word
|
355
|
+
* of extra padding for the actual string, so Ruby
|
356
|
+
* cannot guess the sign of the bigint from the MSB */
|
357
|
+
round_size = 4 + ((bin_digits + 3) & ~3);
|
358
|
+
bin_buf = xmalloc(round_size);
|
359
|
+
|
360
|
+
memcpy(bin_buf, buf->data, bin_digits);
|
361
|
+
memset((char *)bin_buf + bin_digits, 0x0, round_size - bin_digits);
|
362
|
+
|
363
|
+
/* Make Ruby unpack the string internally.
|
364
|
+
* this is significantly faster than adding
|
365
|
+
* the bytes one by one */
|
366
|
+
rb_num = rb_big_unpack(bin_buf, round_size / 4);
|
367
|
+
|
368
|
+
/* Enfore sign. So fast! */
|
369
|
+
RBIGNUM_SET_SIGN(rb_num, !sign);
|
370
|
+
|
371
|
+
free(bin_buf);
|
372
|
+
return rb_num;
|
373
|
+
#else
|
374
|
+
/**
|
375
|
+
* Slower bignum serialization; convert to a base16
|
376
|
+
* string and then let ruby parse it internally.
|
377
|
+
*
|
378
|
+
* We're shipping with this by default because
|
379
|
+
* `rb_big_unpack` is not trustworthy
|
380
|
+
*/
|
381
|
+
static const char to_hex[] = "0123456789abcdef";
|
382
|
+
char *num_str = NULL, *ptr;
|
383
|
+
VALUE rb_num;
|
384
|
+
int32_t i;
|
385
|
+
|
386
|
+
bert_buf_ensure(buf, bin_digits);
|
387
|
+
|
388
|
+
/* 2 digits per byte + sign + trailing null */
|
389
|
+
num_str = ptr = xmalloc((bin_digits * 2) + 2);
|
390
|
+
|
391
|
+
*ptr++ = sign ? '-' : '+';
|
392
|
+
|
393
|
+
for (i = (int32_t)bin_digits - 1; i >= 0; --i) {
|
394
|
+
uint8_t val = buf->data[i];
|
395
|
+
*ptr++ = to_hex[val >> 4];
|
396
|
+
*ptr++ = to_hex[val & 0xf];
|
397
|
+
}
|
398
|
+
|
399
|
+
*ptr = 0;
|
400
|
+
buf->data += bin_digits;
|
401
|
+
|
402
|
+
rb_num = rb_cstr_to_inum(num_str, 16, 1);
|
403
|
+
free(num_str);
|
404
|
+
|
405
|
+
return rb_num;
|
406
|
+
#endif
|
407
|
+
}
|
408
|
+
|
409
|
+
VALUE bert_read_sbignum(struct bert_buf *buf)
|
410
|
+
{
|
411
|
+
uint8_t sign, bin_digits;
|
339
412
|
|
340
|
-
|
341
|
-
}
|
413
|
+
bert_buf_ensure(buf, 2);
|
342
414
|
|
343
|
-
|
344
|
-
|
345
|
-
}
|
415
|
+
bin_digits = bert_buf_read8(buf);
|
416
|
+
sign = bert_buf_read8(buf);
|
346
417
|
|
347
|
-
|
418
|
+
return bert_buf_tobignum(buf, sign, (uint32_t)bin_digits);
|
348
419
|
}
|
349
420
|
|
350
|
-
VALUE
|
351
|
-
|
352
|
-
|
353
|
-
|
421
|
+
VALUE bert_read_lbignum(struct bert_buf *buf)
|
422
|
+
{
|
423
|
+
uint32_t bin_digits;
|
424
|
+
uint8_t sign;
|
354
425
|
|
355
|
-
|
356
|
-
read_string_raw(buf, pData, 31);
|
426
|
+
bert_buf_ensure(buf, 5);
|
357
427
|
|
358
|
-
|
428
|
+
bin_digits = bert_buf_read32(buf);
|
429
|
+
sign = bert_buf_read8(buf);
|
359
430
|
|
360
|
-
|
431
|
+
return bert_buf_tobignum(buf, sign, bin_digits);
|
361
432
|
}
|
362
433
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
434
|
+
/*
|
435
|
+
* -------------------
|
436
|
+
* |1 | 31 |
|
437
|
+
* |99 | Float String|
|
438
|
+
* -------------------
|
439
|
+
*
|
440
|
+
* A float is stored in string format. the format used in sprintf
|
441
|
+
* to format the float is "%.20e" (there are more bytes allocated
|
442
|
+
* than necessary). To unpack the float use sscanf with format "%lf".
|
443
|
+
*/
|
444
|
+
static VALUE bert_read_float(struct bert_buf *buf)
|
445
|
+
{
|
446
|
+
VALUE rb_float;
|
447
|
+
|
448
|
+
bert_buf_ensure(buf, 31);
|
449
|
+
|
450
|
+
rb_float = rb_str_new((char *)buf->data, 31);
|
451
|
+
buf->data += 31;
|
452
|
+
|
453
|
+
return rb_funcall(rb_float, rb_intern("to_f"), 0);
|
454
|
+
}
|
367
455
|
|
368
|
-
|
456
|
+
static VALUE bert_read_nil(struct bert_buf *buf)
|
457
|
+
{
|
458
|
+
return rb_ary_new2(0);
|
369
459
|
}
|
370
460
|
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
case ERL_SMALL_INT:
|
376
|
-
return read_small_int(pData);
|
377
|
-
break;
|
378
|
-
case ERL_INT:
|
379
|
-
return read_int(pData);
|
380
|
-
break;
|
381
|
-
case ERL_FLOAT:
|
382
|
-
return read_float(pData);
|
383
|
-
break;
|
384
|
-
case ERL_ATOM:
|
385
|
-
return read_atom(pData);
|
386
|
-
break;
|
387
|
-
case ERL_SMALL_TUPLE:
|
388
|
-
return read_small_tuple(pData);
|
389
|
-
break;
|
390
|
-
case ERL_LARGE_TUPLE:
|
391
|
-
return read_large_tuple(pData);
|
392
|
-
break;
|
393
|
-
case ERL_NIL:
|
394
|
-
return read_nil(pData);
|
395
|
-
break;
|
396
|
-
case ERL_STRING:
|
397
|
-
return read_string(pData);
|
398
|
-
break;
|
399
|
-
case ERL_LIST:
|
400
|
-
return read_list(pData);
|
401
|
-
break;
|
402
|
-
case ERL_BIN:
|
403
|
-
return read_bin(pData);
|
404
|
-
break;
|
405
|
-
case ERL_SMALL_BIGNUM:
|
406
|
-
return read_small_bignum(pData);
|
407
|
-
break;
|
408
|
-
case ERL_LARGE_BIGNUM:
|
409
|
-
return read_large_bignum(pData);
|
410
|
-
break;
|
411
|
-
}
|
412
|
-
return Qnil;
|
461
|
+
static VALUE bert_read_invalid(struct bert_buf *buf)
|
462
|
+
{
|
463
|
+
rb_raise(rb_eTypeError, "Invalid object tag in BERT stream");
|
464
|
+
return Qnil;
|
413
465
|
}
|
414
466
|
|
415
|
-
VALUE
|
416
|
-
|
467
|
+
static VALUE rb_bert_decode(VALUE klass, VALUE rb_string)
|
468
|
+
{
|
469
|
+
struct bert_buf buf;
|
417
470
|
|
418
|
-
|
471
|
+
Check_Type(rb_string, T_STRING);
|
472
|
+
buf.data = (uint8_t *)RSTRING_PTR(rb_string);
|
473
|
+
buf.end = buf.data + RSTRING_LEN(rb_string);
|
419
474
|
|
420
|
-
|
421
|
-
if(read_1(pData) != ERL_VERSION) {
|
422
|
-
rb_raise(rb_eStandardError, "Bad Magic");
|
423
|
-
}
|
475
|
+
bert_buf_ensure(&buf, 1);
|
424
476
|
|
425
|
-
|
477
|
+
if (bert_buf_read8(&buf) != ERL_VERSION)
|
478
|
+
rb_raise(rb_eTypeError, "Invalid magic value for BERT string");
|
479
|
+
|
480
|
+
return bert_read(&buf);
|
426
481
|
}
|
427
482
|
|
428
|
-
VALUE
|
429
|
-
|
483
|
+
static VALUE rb_bert_impl(VALUE klass)
|
484
|
+
{
|
485
|
+
return rb_str_new("C", 1);
|
430
486
|
}
|
431
487
|
|
432
|
-
void Init_decode()
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
488
|
+
void Init_decode()
|
489
|
+
{
|
490
|
+
rb_mBERT = rb_const_get(rb_cObject, rb_intern("BERT"));
|
491
|
+
rb_cTuple = rb_const_get(rb_mBERT, rb_intern("Tuple"));
|
492
|
+
|
493
|
+
rb_cDecode = rb_define_class_under(rb_mBERT, "Decode", rb_cObject);
|
494
|
+
rb_define_singleton_method(rb_cDecode, "decode", rb_bert_decode, 1);
|
495
|
+
rb_define_singleton_method(rb_cDecode, "impl", rb_bert_impl, 0);
|
438
496
|
}
|
data/ext/bert/c/extconf.rb
CHANGED
data/lib/bert/decode.rb
CHANGED
@@ -8,7 +8,9 @@ module BERT
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def self.decode(string)
|
11
|
-
|
11
|
+
io = StringIO.new(string)
|
12
|
+
io.set_encoding('binary') if io.respond_to?(:set_encoding)
|
13
|
+
new(io).read_any
|
12
14
|
end
|
13
15
|
|
14
16
|
def initialize(ins)
|
@@ -113,7 +115,7 @@ module BERT
|
|
113
115
|
value = read_4
|
114
116
|
negative = (value >> 31)[0] == 1
|
115
117
|
value = (value - (1 << 32)) if negative
|
116
|
-
value
|
118
|
+
value
|
117
119
|
end
|
118
120
|
|
119
121
|
def read_small_bignum
|
@@ -126,7 +128,7 @@ module BERT
|
|
126
128
|
value = (byte * (256 ** index))
|
127
129
|
sign != 0 ? (result - value) : (result + value)
|
128
130
|
end
|
129
|
-
|
131
|
+
added
|
130
132
|
end
|
131
133
|
|
132
134
|
def read_large_bignum
|
@@ -139,7 +141,7 @@ module BERT
|
|
139
141
|
value = (byte * (256 ** index))
|
140
142
|
sign != 0 ? (result - value) : (result + value)
|
141
143
|
end
|
142
|
-
|
144
|
+
added
|
143
145
|
end
|
144
146
|
|
145
147
|
def read_float
|
data/lib/bert/encode.rb
CHANGED
@@ -10,6 +10,7 @@ module BERT
|
|
10
10
|
|
11
11
|
def self.encode(data)
|
12
12
|
io = StringIO.new
|
13
|
+
io.set_encoding('binary') if io.respond_to?(:set_encoding)
|
13
14
|
self.new(io).write_any(data)
|
14
15
|
io.string
|
15
16
|
end
|
@@ -56,7 +57,7 @@ module BERT
|
|
56
57
|
fail(sym) unless sym.is_a?(Symbol)
|
57
58
|
data = sym.to_s
|
58
59
|
write_1 ATOM
|
59
|
-
write_2 data.
|
60
|
+
write_2 data.bytesize
|
60
61
|
write_string data
|
61
62
|
end
|
62
63
|
|
@@ -78,7 +79,7 @@ module BERT
|
|
78
79
|
end
|
79
80
|
|
80
81
|
def write_bignum(num)
|
81
|
-
n = (num.to_s(2).size / 8.0).ceil
|
82
|
+
n = (num.abs.to_s(2).size / 8.0).ceil
|
82
83
|
if n < 256
|
83
84
|
write_1 SMALL_BIGNUM
|
84
85
|
write_1 n
|
@@ -125,7 +126,7 @@ module BERT
|
|
125
126
|
|
126
127
|
def write_binary(data)
|
127
128
|
write_1 BIN
|
128
|
-
write_4 data.
|
129
|
+
write_4 data.bytesize
|
129
130
|
write_string data
|
130
131
|
end
|
131
132
|
|
data/test/bert_test.rb
CHANGED
data/test/encoder_test.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
require 'test_helper'
|
2
4
|
|
3
5
|
class EncoderTest < Test::Unit::TestCase
|
@@ -79,9 +81,22 @@ class EncoderTest < Test::Unit::TestCase
|
|
79
81
|
assert cruby[1].instance_of?(BERT::Tuple)
|
80
82
|
end
|
81
83
|
|
84
|
+
should 'handle utf8 strings' do
|
85
|
+
bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
|
86
|
+
assert_equal bert, BERT::Encoder.encode("été")
|
87
|
+
end
|
88
|
+
|
89
|
+
should 'handle utf8 symbols' do
|
90
|
+
bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
|
91
|
+
assert_equal bert, BERT::Encoder.encode(:'été')
|
92
|
+
end
|
93
|
+
|
82
94
|
should "handle bignums" do
|
83
95
|
bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
|
84
96
|
assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000)
|
97
|
+
|
98
|
+
bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
|
99
|
+
assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
|
85
100
|
end
|
86
101
|
|
87
102
|
should "leave other stuff alone" do
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bert
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 31
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
- 6
|
10
|
+
version: 1.1.6
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Tom Preston-Werner
|
@@ -14,14 +20,18 @@ default_executable:
|
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: thoughtbot-shoulda
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
23
32
|
version: "0"
|
24
|
-
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
25
35
|
description: BERT Serializiation for Ruby
|
26
36
|
email: tom@mojombo.com
|
27
37
|
executables: []
|
@@ -69,21 +79,27 @@ require_paths:
|
|
69
79
|
- lib
|
70
80
|
- ext
|
71
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
72
83
|
requirements:
|
73
84
|
- - ">="
|
74
85
|
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
87
|
+
segments:
|
88
|
+
- 0
|
75
89
|
version: "0"
|
76
|
-
version:
|
77
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
78
92
|
requirements:
|
79
93
|
- - ">="
|
80
94
|
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
96
|
+
segments:
|
97
|
+
- 0
|
81
98
|
version: "0"
|
82
|
-
version:
|
83
99
|
requirements: []
|
84
100
|
|
85
101
|
rubyforge_project:
|
86
|
-
rubygems_version: 1.
|
102
|
+
rubygems_version: 1.6.2
|
87
103
|
signing_key:
|
88
104
|
specification_version: 3
|
89
105
|
summary: BERT Serializiation for Ruby
|