bert 1.1.2 → 1.1.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +11 -1
- data/README.md +3 -3
- data/Rakefile +8 -3
- data/VERSION +1 -1
- data/bert.gemspec +1 -1
- data/ext/bert/c/decode.c +387 -329
- data/ext/bert/c/extconf.rb +3 -0
- data/lib/bert/decode.rb +6 -4
- data/lib/bert/encode.rb +4 -3
- data/test/bert_test.rb +2 -1
- data/test/encoder_test.rb +15 -0
- metadata +24 -8
data/History.txt
CHANGED
@@ -1,3 +1,13 @@
|
|
1
|
+
= 1.1.6 / 2012-05-25
|
2
|
+
* Bug fixes
|
3
|
+
* Better handling of utf-8 characters
|
4
|
+
|
5
|
+
= 1.1.5 / 2011-12-09
|
6
|
+
* Bug fixes
|
7
|
+
* Faster and more secure C BERT decoder
|
8
|
+
* Fix for encoding of negative bignums
|
9
|
+
* Ruby 1.9 compatibility
|
10
|
+
|
1
11
|
= 1.1.2 / 2010-02-08
|
2
12
|
* Bug fixes
|
3
13
|
* Fix bignum handling on 256 byte boundary
|
@@ -28,4 +38,4 @@
|
|
28
38
|
* Add roundtrip tests
|
29
39
|
|
30
40
|
= 0.1.0 / 2009-10-08
|
31
|
-
* Birthday!
|
41
|
+
* Birthday!
|
data/README.md
CHANGED
@@ -49,9 +49,9 @@ Usage
|
|
49
49
|
require 'bert'
|
50
50
|
|
51
51
|
bert = BERT.encode(t[:user, {:name => 'TPW', :nick => 'mojombo'}])
|
52
|
-
# => "\203h\002d\000\004userh\
|
53
|
-
\000\004namem\000\000\000\003TPWh\002d\000\004nickm\
|
54
|
-
\amojomboj"
|
52
|
+
# => "\203h\002d\000\004userh\003d\000\004bertd\000\004dictl\000\000\
|
53
|
+
000\002h\002d\000\004namem\000\000\000\003TPWh\002d\000\004nickm\
|
54
|
+
000\000\000\amojomboj"
|
55
55
|
|
56
56
|
BERT.decode(bert)
|
57
57
|
# => t[:user, {:name=>"TPW", :nick=>"mojombo"}]
|
data/Rakefile
CHANGED
@@ -11,9 +11,14 @@ begin
|
|
11
11
|
gem.homepage = "http://github.com/mojombo/bert"
|
12
12
|
gem.authors = ["Tom Preston-Werner"]
|
13
13
|
gem.add_development_dependency("thoughtbot-shoulda")
|
14
|
-
|
15
|
-
|
16
|
-
|
14
|
+
if ENV["JAVA"]
|
15
|
+
gem.extensions = nil
|
16
|
+
gem.platform = 'java'
|
17
|
+
else
|
18
|
+
gem.require_paths = ["lib", "ext"]
|
19
|
+
gem.files.include("ext")
|
20
|
+
gem.extensions << 'ext/bert/c/extconf.rb'
|
21
|
+
end
|
17
22
|
# gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
|
18
23
|
end
|
19
24
|
rescue LoadError
|
data/VERSION
CHANGED
@@ -1 +1 @@
|
|
1
|
-
1.1.
|
1
|
+
1.1.6
|
data/bert.gemspec
CHANGED
data/ext/bert/c/decode.c
CHANGED
@@ -1,11 +1,9 @@
|
|
1
1
|
#include "ruby.h"
|
2
|
-
#include <
|
2
|
+
#include <stdint.h>
|
3
|
+
#include <netinet/in.h>
|
3
4
|
|
4
|
-
#define ERL_VERSION 131
|
5
5
|
#define ERL_SMALL_INT 97
|
6
6
|
#define ERL_INT 98
|
7
|
-
#define ERL_SMALL_BIGNUM 110
|
8
|
-
#define ERL_LARGE_BIGNUM 111
|
9
7
|
#define ERL_FLOAT 99
|
10
8
|
#define ERL_ATOM 100
|
11
9
|
#define ERL_SMALL_TUPLE 104
|
@@ -14,425 +12,485 @@
|
|
14
12
|
#define ERL_STRING 107
|
15
13
|
#define ERL_LIST 108
|
16
14
|
#define ERL_BIN 109
|
15
|
+
#define ERL_SMALL_BIGNUM 110
|
16
|
+
#define ERL_LARGE_BIGNUM 111
|
17
|
+
#define ERL_VERSION 131
|
17
18
|
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
19
|
+
#define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM)
|
20
|
+
#define BERT_TYPE_OFFSET (ERL_SMALL_INT)
|
21
|
+
|
22
|
+
static VALUE rb_mBERT;
|
23
|
+
static VALUE rb_cDecode;
|
24
|
+
static VALUE rb_cTuple;
|
25
|
+
|
26
|
+
struct bert_buf {
|
27
|
+
const uint8_t *data;
|
28
|
+
const uint8_t *end;
|
29
|
+
};
|
30
|
+
|
31
|
+
static VALUE bert_read_invalid(struct bert_buf *buf);
|
32
|
+
|
33
|
+
static VALUE bert_read_sint(struct bert_buf *buf);
|
34
|
+
static VALUE bert_read_int(struct bert_buf *buf);
|
35
|
+
static VALUE bert_read_float(struct bert_buf *buf);
|
36
|
+
static VALUE bert_read_atom(struct bert_buf *buf);
|
37
|
+
static VALUE bert_read_stuple(struct bert_buf *buf);
|
38
|
+
static VALUE bert_read_ltuple(struct bert_buf *buf);
|
39
|
+
static VALUE bert_read_nil(struct bert_buf *buf);
|
40
|
+
static VALUE bert_read_string(struct bert_buf *buf);
|
41
|
+
static VALUE bert_read_list(struct bert_buf *buf);
|
42
|
+
static VALUE bert_read_bin(struct bert_buf *buf);
|
43
|
+
static VALUE bert_read_sbignum(struct bert_buf *buf);
|
44
|
+
static VALUE bert_read_lbignum(struct bert_buf *buf);
|
45
|
+
|
46
|
+
typedef VALUE (*bert_ptr)(struct bert_buf *buf);
|
47
|
+
static bert_ptr bert_callbacks[] = {
|
48
|
+
&bert_read_sint,
|
49
|
+
&bert_read_int,
|
50
|
+
&bert_read_float,
|
51
|
+
&bert_read_atom,
|
52
|
+
&bert_read_invalid,
|
53
|
+
&bert_read_invalid,
|
54
|
+
&bert_read_invalid,
|
55
|
+
&bert_read_stuple,
|
56
|
+
&bert_read_ltuple,
|
57
|
+
&bert_read_nil,
|
58
|
+
&bert_read_string,
|
59
|
+
&bert_read_list,
|
60
|
+
&bert_read_bin,
|
61
|
+
&bert_read_sbignum,
|
62
|
+
&bert_read_lbignum
|
63
|
+
};
|
64
|
+
|
65
|
+
static inline uint8_t bert_buf_read8(struct bert_buf *buf)
|
66
|
+
{
|
67
|
+
return *buf->data++;
|
31
68
|
}
|
32
69
|
|
33
|
-
|
70
|
+
static inline uint16_t bert_buf_read16(struct bert_buf *buf)
|
71
|
+
{
|
72
|
+
/* Note that this will trigger -Wcast-align and throw a
|
73
|
+
* bus error on platforms where unaligned reads are not
|
74
|
+
* allowed. Also note that this is not breaking any
|
75
|
+
* strict aliasing rules. */
|
76
|
+
uint16_t short_val = *(uint16_t *)buf->data;
|
77
|
+
buf->data += sizeof(uint16_t);
|
78
|
+
return ntohs(short_val);
|
79
|
+
}
|
34
80
|
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
81
|
+
static inline uint32_t bert_buf_read32(struct bert_buf *buf)
|
82
|
+
{
|
83
|
+
/* Note that this will trigger -Wcast-align and throw a
|
84
|
+
* bus error on platforms where unaligned reads are not
|
85
|
+
* allowed. Also note that this is not breaking any
|
86
|
+
* strict aliasing rules. */
|
87
|
+
uint32_t long_val = *(uint32_t *)buf->data;
|
88
|
+
buf->data += sizeof(uint32_t);
|
89
|
+
return ntohl(long_val);
|
39
90
|
}
|
40
91
|
|
41
|
-
void
|
42
|
-
|
92
|
+
static inline void bert_buf_ensure(struct bert_buf *buf, size_t size)
|
93
|
+
{
|
94
|
+
if (buf->data + size > buf->end)
|
95
|
+
rb_raise(rb_eEOFError, "Unexpected end of BERT stream");
|
43
96
|
}
|
44
97
|
|
45
|
-
|
98
|
+
static VALUE bert_read(struct bert_buf *buf)
|
99
|
+
{
|
100
|
+
uint8_t type;
|
46
101
|
|
47
|
-
|
48
|
-
|
49
|
-
}
|
102
|
+
bert_buf_ensure(buf, 1);
|
103
|
+
type = bert_buf_read8(buf);
|
50
104
|
|
51
|
-
|
52
|
-
|
53
|
-
}
|
105
|
+
if (!BERT_VALID_TYPE(type))
|
106
|
+
rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type);
|
54
107
|
|
55
|
-
|
56
|
-
return (unsigned int) ((**pData << 24) + (*(*pData + 1) << 16) + (*(*pData + 2) << 8) + *(*pData + 3));
|
108
|
+
return bert_callbacks[type - BERT_TYPE_OFFSET](buf);
|
57
109
|
}
|
58
110
|
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
111
|
+
static VALUE bert_read_dict(struct bert_buf *buf)
|
112
|
+
{
|
113
|
+
uint8_t type;
|
114
|
+
uint32_t length = 0, i;
|
115
|
+
VALUE rb_dict;
|
64
116
|
|
65
|
-
|
66
|
-
|
67
|
-
*pData += 2;
|
68
|
-
return val;
|
69
|
-
}
|
117
|
+
bert_buf_ensure(buf, 1);
|
118
|
+
type = bert_buf_read8(buf);
|
70
119
|
|
71
|
-
|
72
|
-
|
73
|
-
*pData += 4;
|
74
|
-
return val;
|
75
|
-
}
|
120
|
+
if (type != ERL_LIST && type != ERL_NIL)
|
121
|
+
rb_raise(rb_eTypeError, "Invalid dict spec, not an erlang list");
|
76
122
|
|
77
|
-
|
123
|
+
if (type == ERL_LIST) {
|
124
|
+
bert_buf_ensure(buf, 4);
|
125
|
+
length = bert_buf_read32(buf);
|
126
|
+
}
|
78
127
|
|
79
|
-
|
128
|
+
rb_dict = rb_hash_new();
|
80
129
|
|
81
|
-
|
82
|
-
|
83
|
-
|
84
|
-
}
|
130
|
+
for (i = 0; i < length; ++i) {
|
131
|
+
VALUE key, val;
|
132
|
+
bert_buf_ensure(buf, 2);
|
85
133
|
|
86
|
-
|
134
|
+
if (bert_buf_read8(buf) != ERL_SMALL_TUPLE || bert_buf_read8(buf) != 2)
|
135
|
+
rb_raise(rb_eTypeError, "Invalid dict tuple");
|
87
136
|
|
88
|
-
|
89
|
-
|
90
|
-
}
|
137
|
+
key = bert_read(buf);
|
138
|
+
val = bert_read(buf);
|
91
139
|
|
92
|
-
|
93
|
-
}
|
140
|
+
rb_hash_aset(rb_dict, key, val);
|
141
|
+
}
|
94
142
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
unsigned int length = 0;
|
102
|
-
if(type == ERL_LIST) {
|
103
|
-
length = read_4(pData);
|
104
|
-
}
|
105
|
-
|
106
|
-
VALUE cHash = rb_const_get(rb_cObject, rb_intern("Hash"));
|
107
|
-
VALUE hash = rb_funcall(cHash, rb_intern("new"), 0);
|
108
|
-
|
109
|
-
int i;
|
110
|
-
for(i = 0; i < length; ++i) {
|
111
|
-
VALUE pair = read_dict_pair(pData);
|
112
|
-
VALUE first = rb_ary_entry(pair, 0);
|
113
|
-
VALUE last = rb_ary_entry(pair, 1);
|
114
|
-
rb_funcall(hash, rb_intern("store"), 2, first, last);
|
115
|
-
}
|
116
|
-
|
117
|
-
if(type == ERL_LIST) {
|
118
|
-
read_1(pData);
|
119
|
-
}
|
120
|
-
|
121
|
-
return hash;
|
122
|
-
}
|
143
|
+
if (type == ERL_LIST) {
|
144
|
+
/* disregard tail; adquire women */
|
145
|
+
bert_buf_ensure(buf, 1);
|
146
|
+
(void)bert_buf_read8(buf);
|
147
|
+
}
|
123
148
|
|
124
|
-
|
125
|
-
VALUE type = read_any_raw(pData);
|
126
|
-
ID id = SYM2ID(type);
|
127
|
-
if(id == rb_intern("nil")) {
|
128
|
-
return Qnil;
|
129
|
-
} else if(id == rb_intern("true")) {
|
130
|
-
return Qtrue;
|
131
|
-
} else if(id == rb_intern("false")) {
|
132
|
-
return Qfalse;
|
133
|
-
} else if(id == rb_intern("time")) {
|
134
|
-
VALUE megasecs = read_any_raw(pData);
|
135
|
-
VALUE msecs = rb_funcall(megasecs, rb_intern("*"), 1, INT2NUM(1000000));
|
136
|
-
VALUE secs = read_any_raw(pData);
|
137
|
-
VALUE microsecs = read_any_raw(pData);
|
138
|
-
VALUE stamp = rb_funcall(msecs, rb_intern("+"), 1, secs);
|
139
|
-
return rb_funcall(rb_cTime, rb_intern("at"), 2, stamp, microsecs);
|
140
|
-
} else if(id == rb_intern("regex")) {
|
141
|
-
VALUE source = read_any_raw(pData);
|
142
|
-
VALUE opts = read_any_raw(pData);
|
143
|
-
int flags = 0;
|
144
|
-
if(rb_ary_includes(opts, ID2SYM(rb_intern("caseless"))))
|
145
|
-
flags = flags | 1;
|
146
|
-
if(rb_ary_includes(opts, ID2SYM(rb_intern("extended"))))
|
147
|
-
flags = flags | 2;
|
148
|
-
if(rb_ary_includes(opts, ID2SYM(rb_intern("multiline"))))
|
149
|
-
flags = flags | 4;
|
150
|
-
return rb_funcall(rb_cRegexp, rb_intern("new"), 2, source, INT2NUM(flags));
|
151
|
-
} else if(id == rb_intern("dict")) {
|
152
|
-
return read_dict(pData);
|
153
|
-
} else {
|
154
|
-
return Qnil;
|
155
|
-
}
|
149
|
+
return rb_dict;
|
156
150
|
}
|
157
151
|
|
158
|
-
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
return read_complex_type(pData, arity);
|
163
|
-
} else {
|
164
|
-
VALUE tuple = rb_funcall(cTuple, rb_intern("new"), 1, INT2NUM(arity));
|
165
|
-
rb_ary_store(tuple, 0, tag);
|
166
|
-
int i;
|
167
|
-
for(i = 1; i < arity; ++i) {
|
168
|
-
rb_ary_store(tuple, i, read_any_raw(pData));
|
169
|
-
}
|
170
|
-
return tuple;
|
171
|
-
}
|
172
|
-
} else {
|
173
|
-
return rb_funcall(cTuple, rb_intern("new"), 0);
|
174
|
-
}
|
152
|
+
static inline void bert_ensure_arity(uint32_t arity, uint32_t expected)
|
153
|
+
{
|
154
|
+
if (arity != expected)
|
155
|
+
rb_raise(rb_eTypeError, "Invalid tuple arity for complex type");
|
175
156
|
}
|
176
157
|
|
177
|
-
VALUE
|
178
|
-
|
179
|
-
|
180
|
-
|
158
|
+
static VALUE bert_read_complex(struct bert_buf *buf, uint32_t arity)
|
159
|
+
{
|
160
|
+
VALUE rb_type;
|
161
|
+
ID id_type;
|
181
162
|
|
182
|
-
|
183
|
-
|
184
|
-
}
|
163
|
+
rb_type = bert_read(buf);
|
164
|
+
Check_Type(rb_type, T_SYMBOL);
|
185
165
|
|
186
|
-
|
187
|
-
if(read_1(pData) != ERL_LARGE_TUPLE) {
|
188
|
-
rb_raise(rb_eStandardError, "Invalid Type, not a large tuple");
|
189
|
-
}
|
166
|
+
id_type = SYM2ID(rb_type);
|
190
167
|
|
191
|
-
|
192
|
-
|
193
|
-
|
168
|
+
if (id_type == rb_intern("nil")) {
|
169
|
+
bert_ensure_arity(arity, 2);
|
170
|
+
return Qnil;
|
194
171
|
|
195
|
-
|
172
|
+
} else if (id_type == rb_intern("true")) {
|
173
|
+
bert_ensure_arity(arity, 2);
|
174
|
+
return Qtrue;
|
196
175
|
|
197
|
-
|
198
|
-
|
199
|
-
|
200
|
-
}
|
176
|
+
} else if (id_type == rb_intern("false")) {
|
177
|
+
bert_ensure_arity(arity, 2);
|
178
|
+
return Qfalse;
|
201
179
|
|
202
|
-
|
180
|
+
} else if (id_type == rb_intern("time")) {
|
181
|
+
VALUE rb_megasecs, rb_secs, rb_microsecs, rb_stamp, rb_msecs;
|
203
182
|
|
204
|
-
|
183
|
+
bert_ensure_arity(arity, 5);
|
205
184
|
|
206
|
-
|
207
|
-
|
208
|
-
|
209
|
-
}
|
185
|
+
rb_megasecs = bert_read(buf);
|
186
|
+
rb_secs = bert_read(buf);
|
187
|
+
rb_microsecs = bert_read(buf);
|
210
188
|
|
211
|
-
|
189
|
+
rb_msecs = rb_funcall(rb_megasecs, rb_intern("*"), 1, INT2NUM(1000000));
|
190
|
+
rb_stamp = rb_funcall(rb_msecs, rb_intern("+"), 1, rb_secs);
|
212
191
|
|
213
|
-
|
214
|
-
}
|
192
|
+
return rb_funcall(rb_cTime, rb_intern("at"), 2, rb_stamp, rb_microsecs);
|
215
193
|
|
216
|
-
|
194
|
+
} else if (id_type == rb_intern("regex")) {
|
195
|
+
VALUE rb_source, rb_opts;
|
196
|
+
int flags = 0;
|
197
|
+
|
198
|
+
bert_ensure_arity(arity, 4);
|
217
199
|
|
218
|
-
|
219
|
-
|
220
|
-
*(dest + length) = (unsigned char) 0;
|
221
|
-
*pData += length;
|
222
|
-
}
|
223
|
-
|
224
|
-
VALUE read_bin(unsigned char **pData) {
|
225
|
-
if(read_1(pData) != ERL_BIN) {
|
226
|
-
rb_raise(rb_eStandardError, "Invalid Type, not an erlang binary");
|
227
|
-
}
|
200
|
+
rb_source = bert_read(buf);
|
201
|
+
rb_opts = bert_read(buf);
|
228
202
|
|
229
|
-
|
203
|
+
Check_Type(rb_source, T_STRING);
|
204
|
+
Check_Type(rb_opts, T_ARRAY);
|
230
205
|
|
231
|
-
|
232
|
-
|
206
|
+
if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("caseless"))))
|
207
|
+
flags = flags | 1;
|
233
208
|
|
234
|
-
|
235
|
-
|
209
|
+
if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("extended"))))
|
210
|
+
flags = flags | 2;
|
236
211
|
|
237
|
-
|
238
|
-
|
239
|
-
rb_raise(rb_eStandardError, "Invalid Type, not an erlang string");
|
240
|
-
}
|
212
|
+
if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("multiline"))))
|
213
|
+
flags = flags | 4;
|
241
214
|
|
242
|
-
|
243
|
-
VALUE array = rb_ary_new2(length);
|
215
|
+
return rb_funcall(rb_cRegexp, rb_intern("new"), 2, rb_source, INT2NUM(flags));
|
244
216
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
|
249
|
-
}
|
217
|
+
} else if (id_type == rb_intern("dict")) {
|
218
|
+
bert_ensure_arity(arity, 3);
|
219
|
+
return bert_read_dict(buf);
|
220
|
+
}
|
250
221
|
|
251
|
-
|
222
|
+
rb_raise(rb_eTypeError, "Invalid tag for complex value");
|
223
|
+
return Qnil;
|
252
224
|
}
|
253
225
|
|
254
|
-
VALUE
|
255
|
-
|
256
|
-
|
257
|
-
|
226
|
+
static VALUE bert_read_tuple(struct bert_buf *buf, uint32_t arity)
|
227
|
+
{
|
228
|
+
if (arity > 0) {
|
229
|
+
VALUE rb_tag = bert_read(buf);
|
258
230
|
|
259
|
-
|
231
|
+
if (TYPE(rb_tag) == T_SYMBOL && SYM2ID(rb_tag) == rb_intern("bert")) {
|
232
|
+
return bert_read_complex(buf, arity);
|
233
|
+
} else {
|
234
|
+
uint32_t i;
|
235
|
+
VALUE rb_tuple;
|
260
236
|
|
261
|
-
|
262
|
-
|
237
|
+
rb_tuple = rb_funcall(rb_cTuple, rb_intern("new"), 1, INT2NUM(arity));
|
238
|
+
rb_ary_store(rb_tuple, 0, rb_tag);
|
263
239
|
|
264
|
-
|
265
|
-
|
240
|
+
for(i = 1; i < arity; ++i)
|
241
|
+
rb_ary_store(rb_tuple, i, bert_read(buf));
|
242
|
+
|
243
|
+
return rb_tuple;
|
244
|
+
}
|
245
|
+
}
|
266
246
|
|
267
|
-
|
268
|
-
|
269
|
-
rb_raise(rb_eStandardError, "Invalid Type, not a small int");
|
270
|
-
}
|
247
|
+
return rb_funcall(rb_cTuple, rb_intern("new"), 0);
|
248
|
+
}
|
271
249
|
|
272
|
-
|
250
|
+
static VALUE bert_read_stuple(struct bert_buf *buf)
|
251
|
+
{
|
252
|
+
bert_buf_ensure(buf, 1);
|
253
|
+
return bert_read_tuple(buf, bert_buf_read8(buf));
|
254
|
+
}
|
273
255
|
|
274
|
-
|
256
|
+
static VALUE bert_read_ltuple(struct bert_buf *buf)
|
257
|
+
{
|
258
|
+
bert_buf_ensure(buf, 4);
|
259
|
+
return bert_read_tuple(buf, bert_buf_read32(buf));
|
275
260
|
}
|
276
261
|
|
277
|
-
VALUE
|
278
|
-
|
279
|
-
|
280
|
-
|
262
|
+
static VALUE bert_read_list(struct bert_buf *buf)
|
263
|
+
{
|
264
|
+
uint32_t i, length;
|
265
|
+
VALUE rb_list;
|
281
266
|
|
282
|
-
|
267
|
+
bert_buf_ensure(buf, 4);
|
268
|
+
length = bert_buf_read32(buf);
|
269
|
+
rb_list = rb_ary_new2(length);
|
283
270
|
|
284
|
-
|
271
|
+
for(i = 0; i < length; ++i)
|
272
|
+
rb_ary_store(rb_list, i, bert_read(buf));
|
285
273
|
|
286
|
-
|
287
|
-
|
288
|
-
|
274
|
+
/* disregard tail; adquire currency */
|
275
|
+
bert_buf_ensure(buf, 1);
|
276
|
+
(void)bert_buf_read8(buf);
|
289
277
|
|
290
|
-
|
278
|
+
return rb_list;
|
291
279
|
}
|
292
280
|
|
293
|
-
VALUE
|
294
|
-
|
295
|
-
|
296
|
-
|
281
|
+
static VALUE bert_read_bin(struct bert_buf *buf)
|
282
|
+
{
|
283
|
+
uint32_t length;
|
284
|
+
VALUE rb_bin;
|
285
|
+
|
286
|
+
bert_buf_ensure(buf, 4);
|
287
|
+
length = bert_buf_read32(buf);
|
297
288
|
|
298
|
-
|
299
|
-
|
289
|
+
bert_buf_ensure(buf, length);
|
290
|
+
rb_bin = rb_str_new((char *)buf->data, length);
|
291
|
+
buf->data += length;
|
292
|
+
|
293
|
+
return rb_bin;
|
294
|
+
}
|
300
295
|
|
301
|
-
|
302
|
-
|
296
|
+
static VALUE bert_read_string(struct bert_buf *buf)
|
297
|
+
{
|
298
|
+
uint16_t i, length;
|
299
|
+
VALUE rb_string;
|
303
300
|
|
304
|
-
|
305
|
-
|
301
|
+
bert_buf_ensure(buf, 2);
|
302
|
+
length = bert_buf_read16(buf);
|
306
303
|
|
307
|
-
|
308
|
-
|
309
|
-
tmp = INT2FIX(*(buf + i));
|
310
|
-
tmp = rb_funcall(tmp, rb_intern("<<"), 1, INT2NUM(i * 8));
|
311
|
-
num = rb_funcall(num, rb_intern("+"), 1, tmp);
|
312
|
-
}
|
304
|
+
bert_buf_ensure(buf, length);
|
305
|
+
rb_string = rb_ary_new2(length);
|
313
306
|
|
314
|
-
|
315
|
-
|
316
|
-
}
|
307
|
+
for (i = 0; i < length; ++i)
|
308
|
+
rb_ary_store(rb_string, i, INT2FIX(buf->data[i]));
|
317
309
|
|
318
|
-
|
310
|
+
buf->data += length;
|
311
|
+
return rb_string;
|
319
312
|
}
|
320
313
|
|
321
|
-
VALUE
|
322
|
-
|
323
|
-
|
324
|
-
|
314
|
+
static VALUE bert_read_atom(struct bert_buf *buf)
|
315
|
+
{
|
316
|
+
VALUE rb_atom;
|
317
|
+
uint32_t atom_len;
|
325
318
|
|
326
|
-
|
327
|
-
|
319
|
+
bert_buf_ensure(buf, 2);
|
320
|
+
atom_len = bert_buf_read16(buf);
|
328
321
|
|
329
|
-
|
330
|
-
|
322
|
+
/* Instead of trying to build the symbol
|
323
|
+
* from here, just create a Ruby string
|
324
|
+
* and internalize it. this will be faster for
|
325
|
+
* unique symbols */
|
326
|
+
bert_buf_ensure(buf, atom_len);
|
327
|
+
rb_atom = rb_str_new((char *)buf->data, atom_len);
|
328
|
+
buf->data += atom_len;
|
331
329
|
|
332
|
-
|
333
|
-
|
330
|
+
return rb_str_intern(rb_atom);
|
331
|
+
}
|
332
|
+
|
333
|
+
static VALUE bert_read_sint(struct bert_buf *buf)
|
334
|
+
{
|
335
|
+
bert_buf_ensure(buf, 1);
|
336
|
+
return INT2FIX((uint8_t)bert_buf_read8(buf));
|
337
|
+
}
|
338
|
+
|
339
|
+
static VALUE bert_read_int(struct bert_buf *buf)
|
340
|
+
{
|
341
|
+
bert_buf_ensure(buf, 4);
|
342
|
+
return LONG2NUM((int32_t)bert_buf_read32(buf));
|
343
|
+
}
|
334
344
|
|
335
|
-
|
336
|
-
|
337
|
-
|
338
|
-
|
345
|
+
static VALUE bert_buf_tobignum(struct bert_buf *buf, uint8_t sign, uint32_t bin_digits)
|
346
|
+
{
|
347
|
+
#ifdef BERT_FAST_BIGNUM
|
348
|
+
uint32_t *bin_buf = NULL;
|
349
|
+
VALUE rb_num;
|
350
|
+
uint32_t round_size;
|
351
|
+
|
352
|
+
bert_buf_ensure(buf, bin_digits);
|
353
|
+
|
354
|
+
/* Hack: ensure that we have at least a full word
|
355
|
+
* of extra padding for the actual string, so Ruby
|
356
|
+
* cannot guess the sign of the bigint from the MSB */
|
357
|
+
round_size = 4 + ((bin_digits + 3) & ~3);
|
358
|
+
bin_buf = xmalloc(round_size);
|
359
|
+
|
360
|
+
memcpy(bin_buf, buf->data, bin_digits);
|
361
|
+
memset((char *)bin_buf + bin_digits, 0x0, round_size - bin_digits);
|
362
|
+
|
363
|
+
/* Make Ruby unpack the string internally.
|
364
|
+
* this is significantly faster than adding
|
365
|
+
* the bytes one by one */
|
366
|
+
rb_num = rb_big_unpack(bin_buf, round_size / 4);
|
367
|
+
|
368
|
+
/* Enfore sign. So fast! */
|
369
|
+
RBIGNUM_SET_SIGN(rb_num, !sign);
|
370
|
+
|
371
|
+
free(bin_buf);
|
372
|
+
return rb_num;
|
373
|
+
#else
|
374
|
+
/**
|
375
|
+
* Slower bignum serialization; convert to a base16
|
376
|
+
* string and then let ruby parse it internally.
|
377
|
+
*
|
378
|
+
* We're shipping with this by default because
|
379
|
+
* `rb_big_unpack` is not trustworthy
|
380
|
+
*/
|
381
|
+
static const char to_hex[] = "0123456789abcdef";
|
382
|
+
char *num_str = NULL, *ptr;
|
383
|
+
VALUE rb_num;
|
384
|
+
int32_t i;
|
385
|
+
|
386
|
+
bert_buf_ensure(buf, bin_digits);
|
387
|
+
|
388
|
+
/* 2 digits per byte + sign + trailing null */
|
389
|
+
num_str = ptr = xmalloc((bin_digits * 2) + 2);
|
390
|
+
|
391
|
+
*ptr++ = sign ? '-' : '+';
|
392
|
+
|
393
|
+
for (i = (int32_t)bin_digits - 1; i >= 0; --i) {
|
394
|
+
uint8_t val = buf->data[i];
|
395
|
+
*ptr++ = to_hex[val >> 4];
|
396
|
+
*ptr++ = to_hex[val & 0xf];
|
397
|
+
}
|
398
|
+
|
399
|
+
*ptr = 0;
|
400
|
+
buf->data += bin_digits;
|
401
|
+
|
402
|
+
rb_num = rb_cstr_to_inum(num_str, 16, 1);
|
403
|
+
free(num_str);
|
404
|
+
|
405
|
+
return rb_num;
|
406
|
+
#endif
|
407
|
+
}
|
408
|
+
|
409
|
+
VALUE bert_read_sbignum(struct bert_buf *buf)
|
410
|
+
{
|
411
|
+
uint8_t sign, bin_digits;
|
339
412
|
|
340
|
-
|
341
|
-
}
|
413
|
+
bert_buf_ensure(buf, 2);
|
342
414
|
|
343
|
-
|
344
|
-
|
345
|
-
}
|
415
|
+
bin_digits = bert_buf_read8(buf);
|
416
|
+
sign = bert_buf_read8(buf);
|
346
417
|
|
347
|
-
|
418
|
+
return bert_buf_tobignum(buf, sign, (uint32_t)bin_digits);
|
348
419
|
}
|
349
420
|
|
350
|
-
VALUE
|
351
|
-
|
352
|
-
|
353
|
-
|
421
|
+
VALUE bert_read_lbignum(struct bert_buf *buf)
|
422
|
+
{
|
423
|
+
uint32_t bin_digits;
|
424
|
+
uint8_t sign;
|
354
425
|
|
355
|
-
|
356
|
-
read_string_raw(buf, pData, 31);
|
426
|
+
bert_buf_ensure(buf, 5);
|
357
427
|
|
358
|
-
|
428
|
+
bin_digits = bert_buf_read32(buf);
|
429
|
+
sign = bert_buf_read8(buf);
|
359
430
|
|
360
|
-
|
431
|
+
return bert_buf_tobignum(buf, sign, bin_digits);
|
361
432
|
}
|
362
433
|
|
363
|
-
|
364
|
-
|
365
|
-
|
366
|
-
|
434
|
+
/*
|
435
|
+
* -------------------
|
436
|
+
* |1 | 31 |
|
437
|
+
* |99 | Float String|
|
438
|
+
* -------------------
|
439
|
+
*
|
440
|
+
* A float is stored in string format. the format used in sprintf
|
441
|
+
* to format the float is "%.20e" (there are more bytes allocated
|
442
|
+
* than necessary). To unpack the float use sscanf with format "%lf".
|
443
|
+
*/
|
444
|
+
static VALUE bert_read_float(struct bert_buf *buf)
|
445
|
+
{
|
446
|
+
VALUE rb_float;
|
447
|
+
|
448
|
+
bert_buf_ensure(buf, 31);
|
449
|
+
|
450
|
+
rb_float = rb_str_new((char *)buf->data, 31);
|
451
|
+
buf->data += 31;
|
452
|
+
|
453
|
+
return rb_funcall(rb_float, rb_intern("to_f"), 0);
|
454
|
+
}
|
367
455
|
|
368
|
-
|
456
|
+
static VALUE bert_read_nil(struct bert_buf *buf)
|
457
|
+
{
|
458
|
+
return rb_ary_new2(0);
|
369
459
|
}
|
370
460
|
|
371
|
-
|
372
|
-
|
373
|
-
|
374
|
-
|
375
|
-
case ERL_SMALL_INT:
|
376
|
-
return read_small_int(pData);
|
377
|
-
break;
|
378
|
-
case ERL_INT:
|
379
|
-
return read_int(pData);
|
380
|
-
break;
|
381
|
-
case ERL_FLOAT:
|
382
|
-
return read_float(pData);
|
383
|
-
break;
|
384
|
-
case ERL_ATOM:
|
385
|
-
return read_atom(pData);
|
386
|
-
break;
|
387
|
-
case ERL_SMALL_TUPLE:
|
388
|
-
return read_small_tuple(pData);
|
389
|
-
break;
|
390
|
-
case ERL_LARGE_TUPLE:
|
391
|
-
return read_large_tuple(pData);
|
392
|
-
break;
|
393
|
-
case ERL_NIL:
|
394
|
-
return read_nil(pData);
|
395
|
-
break;
|
396
|
-
case ERL_STRING:
|
397
|
-
return read_string(pData);
|
398
|
-
break;
|
399
|
-
case ERL_LIST:
|
400
|
-
return read_list(pData);
|
401
|
-
break;
|
402
|
-
case ERL_BIN:
|
403
|
-
return read_bin(pData);
|
404
|
-
break;
|
405
|
-
case ERL_SMALL_BIGNUM:
|
406
|
-
return read_small_bignum(pData);
|
407
|
-
break;
|
408
|
-
case ERL_LARGE_BIGNUM:
|
409
|
-
return read_large_bignum(pData);
|
410
|
-
break;
|
411
|
-
}
|
412
|
-
return Qnil;
|
461
|
+
static VALUE bert_read_invalid(struct bert_buf *buf)
|
462
|
+
{
|
463
|
+
rb_raise(rb_eTypeError, "Invalid object tag in BERT stream");
|
464
|
+
return Qnil;
|
413
465
|
}
|
414
466
|
|
415
|
-
VALUE
|
416
|
-
|
467
|
+
static VALUE rb_bert_decode(VALUE klass, VALUE rb_string)
|
468
|
+
{
|
469
|
+
struct bert_buf buf;
|
417
470
|
|
418
|
-
|
471
|
+
Check_Type(rb_string, T_STRING);
|
472
|
+
buf.data = (uint8_t *)RSTRING_PTR(rb_string);
|
473
|
+
buf.end = buf.data + RSTRING_LEN(rb_string);
|
419
474
|
|
420
|
-
|
421
|
-
if(read_1(pData) != ERL_VERSION) {
|
422
|
-
rb_raise(rb_eStandardError, "Bad Magic");
|
423
|
-
}
|
475
|
+
bert_buf_ensure(&buf, 1);
|
424
476
|
|
425
|
-
|
477
|
+
if (bert_buf_read8(&buf) != ERL_VERSION)
|
478
|
+
rb_raise(rb_eTypeError, "Invalid magic value for BERT string");
|
479
|
+
|
480
|
+
return bert_read(&buf);
|
426
481
|
}
|
427
482
|
|
428
|
-
VALUE
|
429
|
-
|
483
|
+
static VALUE rb_bert_impl(VALUE klass)
|
484
|
+
{
|
485
|
+
return rb_str_new("C", 1);
|
430
486
|
}
|
431
487
|
|
432
|
-
void Init_decode()
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
488
|
+
void Init_decode()
|
489
|
+
{
|
490
|
+
rb_mBERT = rb_const_get(rb_cObject, rb_intern("BERT"));
|
491
|
+
rb_cTuple = rb_const_get(rb_mBERT, rb_intern("Tuple"));
|
492
|
+
|
493
|
+
rb_cDecode = rb_define_class_under(rb_mBERT, "Decode", rb_cObject);
|
494
|
+
rb_define_singleton_method(rb_cDecode, "decode", rb_bert_decode, 1);
|
495
|
+
rb_define_singleton_method(rb_cDecode, "impl", rb_bert_impl, 0);
|
438
496
|
}
|
data/ext/bert/c/extconf.rb
CHANGED
data/lib/bert/decode.rb
CHANGED
@@ -8,7 +8,9 @@ module BERT
|
|
8
8
|
end
|
9
9
|
|
10
10
|
def self.decode(string)
|
11
|
-
|
11
|
+
io = StringIO.new(string)
|
12
|
+
io.set_encoding('binary') if io.respond_to?(:set_encoding)
|
13
|
+
new(io).read_any
|
12
14
|
end
|
13
15
|
|
14
16
|
def initialize(ins)
|
@@ -113,7 +115,7 @@ module BERT
|
|
113
115
|
value = read_4
|
114
116
|
negative = (value >> 31)[0] == 1
|
115
117
|
value = (value - (1 << 32)) if negative
|
116
|
-
value
|
118
|
+
value
|
117
119
|
end
|
118
120
|
|
119
121
|
def read_small_bignum
|
@@ -126,7 +128,7 @@ module BERT
|
|
126
128
|
value = (byte * (256 ** index))
|
127
129
|
sign != 0 ? (result - value) : (result + value)
|
128
130
|
end
|
129
|
-
|
131
|
+
added
|
130
132
|
end
|
131
133
|
|
132
134
|
def read_large_bignum
|
@@ -139,7 +141,7 @@ module BERT
|
|
139
141
|
value = (byte * (256 ** index))
|
140
142
|
sign != 0 ? (result - value) : (result + value)
|
141
143
|
end
|
142
|
-
|
144
|
+
added
|
143
145
|
end
|
144
146
|
|
145
147
|
def read_float
|
data/lib/bert/encode.rb
CHANGED
@@ -10,6 +10,7 @@ module BERT
|
|
10
10
|
|
11
11
|
def self.encode(data)
|
12
12
|
io = StringIO.new
|
13
|
+
io.set_encoding('binary') if io.respond_to?(:set_encoding)
|
13
14
|
self.new(io).write_any(data)
|
14
15
|
io.string
|
15
16
|
end
|
@@ -56,7 +57,7 @@ module BERT
|
|
56
57
|
fail(sym) unless sym.is_a?(Symbol)
|
57
58
|
data = sym.to_s
|
58
59
|
write_1 ATOM
|
59
|
-
write_2 data.
|
60
|
+
write_2 data.bytesize
|
60
61
|
write_string data
|
61
62
|
end
|
62
63
|
|
@@ -78,7 +79,7 @@ module BERT
|
|
78
79
|
end
|
79
80
|
|
80
81
|
def write_bignum(num)
|
81
|
-
n = (num.to_s(2).size / 8.0).ceil
|
82
|
+
n = (num.abs.to_s(2).size / 8.0).ceil
|
82
83
|
if n < 256
|
83
84
|
write_1 SMALL_BIGNUM
|
84
85
|
write_1 n
|
@@ -125,7 +126,7 @@ module BERT
|
|
125
126
|
|
126
127
|
def write_binary(data)
|
127
128
|
write_1 BIN
|
128
|
-
write_4 data.
|
129
|
+
write_4 data.bytesize
|
129
130
|
write_string data
|
130
131
|
end
|
131
132
|
|
data/test/bert_test.rb
CHANGED
data/test/encoder_test.rb
CHANGED
@@ -1,3 +1,5 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
|
1
3
|
require 'test_helper'
|
2
4
|
|
3
5
|
class EncoderTest < Test::Unit::TestCase
|
@@ -79,9 +81,22 @@ class EncoderTest < Test::Unit::TestCase
|
|
79
81
|
assert cruby[1].instance_of?(BERT::Tuple)
|
80
82
|
end
|
81
83
|
|
84
|
+
should 'handle utf8 strings' do
|
85
|
+
bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
|
86
|
+
assert_equal bert, BERT::Encoder.encode("été")
|
87
|
+
end
|
88
|
+
|
89
|
+
should 'handle utf8 symbols' do
|
90
|
+
bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
|
91
|
+
assert_equal bert, BERT::Encoder.encode(:'été')
|
92
|
+
end
|
93
|
+
|
82
94
|
should "handle bignums" do
|
83
95
|
bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
|
84
96
|
assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000)
|
97
|
+
|
98
|
+
bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
|
99
|
+
assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
|
85
100
|
end
|
86
101
|
|
87
102
|
should "leave other stuff alone" do
|
metadata
CHANGED
@@ -1,7 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: bert
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
|
4
|
+
hash: 31
|
5
|
+
prerelease:
|
6
|
+
segments:
|
7
|
+
- 1
|
8
|
+
- 1
|
9
|
+
- 6
|
10
|
+
version: 1.1.6
|
5
11
|
platform: ruby
|
6
12
|
authors:
|
7
13
|
- Tom Preston-Werner
|
@@ -14,14 +20,18 @@ default_executable:
|
|
14
20
|
dependencies:
|
15
21
|
- !ruby/object:Gem::Dependency
|
16
22
|
name: thoughtbot-shoulda
|
17
|
-
|
18
|
-
|
19
|
-
|
23
|
+
prerelease: false
|
24
|
+
requirement: &id001 !ruby/object:Gem::Requirement
|
25
|
+
none: false
|
20
26
|
requirements:
|
21
27
|
- - ">="
|
22
28
|
- !ruby/object:Gem::Version
|
29
|
+
hash: 3
|
30
|
+
segments:
|
31
|
+
- 0
|
23
32
|
version: "0"
|
24
|
-
|
33
|
+
type: :development
|
34
|
+
version_requirements: *id001
|
25
35
|
description: BERT Serializiation for Ruby
|
26
36
|
email: tom@mojombo.com
|
27
37
|
executables: []
|
@@ -69,21 +79,27 @@ require_paths:
|
|
69
79
|
- lib
|
70
80
|
- ext
|
71
81
|
required_ruby_version: !ruby/object:Gem::Requirement
|
82
|
+
none: false
|
72
83
|
requirements:
|
73
84
|
- - ">="
|
74
85
|
- !ruby/object:Gem::Version
|
86
|
+
hash: 3
|
87
|
+
segments:
|
88
|
+
- 0
|
75
89
|
version: "0"
|
76
|
-
version:
|
77
90
|
required_rubygems_version: !ruby/object:Gem::Requirement
|
91
|
+
none: false
|
78
92
|
requirements:
|
79
93
|
- - ">="
|
80
94
|
- !ruby/object:Gem::Version
|
95
|
+
hash: 3
|
96
|
+
segments:
|
97
|
+
- 0
|
81
98
|
version: "0"
|
82
|
-
version:
|
83
99
|
requirements: []
|
84
100
|
|
85
101
|
rubyforge_project:
|
86
|
-
rubygems_version: 1.
|
102
|
+
rubygems_version: 1.6.2
|
87
103
|
signing_key:
|
88
104
|
specification_version: 3
|
89
105
|
summary: BERT Serializiation for Ruby
|