bert 1.1.2 → 1.1.6

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,3 +1,13 @@
1
+ = 1.1.6 / 2012-05-25
2
+ * Bug fixes
3
+ * Better handling of utf-8 characters
4
+
5
+ = 1.1.5 / 2011-12-09
6
+ * Bug fixes
7
+ * Faster and more secure C BERT decoder
8
+ * Fix for encoding of negative bignums
9
+ * Ruby 1.9 compatibility
10
+
1
11
  = 1.1.2 / 2010-02-08
2
12
  * Bug fixes
3
13
  * Fix bignum handling on 256 byte boundary
@@ -28,4 +38,4 @@
28
38
  * Add roundtrip tests
29
39
 
30
40
  = 0.1.0 / 2009-10-08
31
- * Birthday!
41
+ * Birthday!
data/README.md CHANGED
@@ -49,9 +49,9 @@ Usage
49
49
  require 'bert'
50
50
 
51
51
  bert = BERT.encode(t[:user, {:name => 'TPW', :nick => 'mojombo'}])
52
- # => "\203h\002d\000\004userh\002d\000\004dictl\000\000\000\002h\002d
53
- \000\004namem\000\000\000\003TPWh\002d\000\004nickm\000\000\000
54
- \amojomboj"
52
+ # => "\203h\002d\000\004userh\003d\000\004bertd\000\004dictl\000\000\
53
+ 000\002h\002d\000\004namem\000\000\000\003TPWh\002d\000\004nickm\
54
+ 000\000\000\amojomboj"
55
55
 
56
56
  BERT.decode(bert)
57
57
  # => t[:user, {:name=>"TPW", :nick=>"mojombo"}]
data/Rakefile CHANGED
@@ -11,9 +11,14 @@ begin
11
11
  gem.homepage = "http://github.com/mojombo/bert"
12
12
  gem.authors = ["Tom Preston-Werner"]
13
13
  gem.add_development_dependency("thoughtbot-shoulda")
14
- gem.require_paths = ["lib", "ext"]
15
- gem.files.include("ext")
16
- gem.extensions << 'ext/bert/c/extconf.rb'
14
+ if ENV["JAVA"]
15
+ gem.extensions = nil
16
+ gem.platform = 'java'
17
+ else
18
+ gem.require_paths = ["lib", "ext"]
19
+ gem.files.include("ext")
20
+ gem.extensions << 'ext/bert/c/extconf.rb'
21
+ end
17
22
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
23
  end
19
24
  rescue LoadError
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.2
1
+ 1.1.6
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{bert}
8
- s.version = "1.1.2"
8
+ s.version = "1.1.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Tom Preston-Werner"]
@@ -1,11 +1,9 @@
1
1
  #include "ruby.h"
2
- #include <string.h>
2
+ #include <stdint.h>
3
+ #include <netinet/in.h>
3
4
 
4
- #define ERL_VERSION 131
5
5
  #define ERL_SMALL_INT 97
6
6
  #define ERL_INT 98
7
- #define ERL_SMALL_BIGNUM 110
8
- #define ERL_LARGE_BIGNUM 111
9
7
  #define ERL_FLOAT 99
10
8
  #define ERL_ATOM 100
11
9
  #define ERL_SMALL_TUPLE 104
@@ -14,425 +12,485 @@
14
12
  #define ERL_STRING 107
15
13
  #define ERL_LIST 108
16
14
  #define ERL_BIN 109
15
+ #define ERL_SMALL_BIGNUM 110
16
+ #define ERL_LARGE_BIGNUM 111
17
+ #define ERL_VERSION 131
17
18
 
18
- static VALUE mBERT;
19
- static VALUE cDecode;
20
- static VALUE cTuple;
21
- void Init_decode();
22
-
23
- VALUE method_decode(VALUE klass, VALUE rString);
24
-
25
- VALUE read_any_raw(unsigned char **pData);
26
-
27
- // printers
28
-
29
- void p(VALUE val) {
30
- rb_funcall(rb_mKernel, rb_intern("p"), 1, val);
19
+ #define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM)
20
+ #define BERT_TYPE_OFFSET (ERL_SMALL_INT)
21
+
22
+ static VALUE rb_mBERT;
23
+ static VALUE rb_cDecode;
24
+ static VALUE rb_cTuple;
25
+
26
+ struct bert_buf {
27
+ const uint8_t *data;
28
+ const uint8_t *end;
29
+ };
30
+
31
+ static VALUE bert_read_invalid(struct bert_buf *buf);
32
+
33
+ static VALUE bert_read_sint(struct bert_buf *buf);
34
+ static VALUE bert_read_int(struct bert_buf *buf);
35
+ static VALUE bert_read_float(struct bert_buf *buf);
36
+ static VALUE bert_read_atom(struct bert_buf *buf);
37
+ static VALUE bert_read_stuple(struct bert_buf *buf);
38
+ static VALUE bert_read_ltuple(struct bert_buf *buf);
39
+ static VALUE bert_read_nil(struct bert_buf *buf);
40
+ static VALUE bert_read_string(struct bert_buf *buf);
41
+ static VALUE bert_read_list(struct bert_buf *buf);
42
+ static VALUE bert_read_bin(struct bert_buf *buf);
43
+ static VALUE bert_read_sbignum(struct bert_buf *buf);
44
+ static VALUE bert_read_lbignum(struct bert_buf *buf);
45
+
46
+ typedef VALUE (*bert_ptr)(struct bert_buf *buf);
47
+ static bert_ptr bert_callbacks[] = {
48
+ &bert_read_sint,
49
+ &bert_read_int,
50
+ &bert_read_float,
51
+ &bert_read_atom,
52
+ &bert_read_invalid,
53
+ &bert_read_invalid,
54
+ &bert_read_invalid,
55
+ &bert_read_stuple,
56
+ &bert_read_ltuple,
57
+ &bert_read_nil,
58
+ &bert_read_string,
59
+ &bert_read_list,
60
+ &bert_read_bin,
61
+ &bert_read_sbignum,
62
+ &bert_read_lbignum
63
+ };
64
+
65
+ static inline uint8_t bert_buf_read8(struct bert_buf *buf)
66
+ {
67
+ return *buf->data++;
31
68
  }
32
69
 
33
- // checkers
70
+ static inline uint16_t bert_buf_read16(struct bert_buf *buf)
71
+ {
72
+ /* Note that this will trigger -Wcast-align and throw a
73
+ * bus error on platforms where unaligned reads are not
74
+ * allowed. Also note that this is not breaking any
75
+ * strict aliasing rules. */
76
+ uint16_t short_val = *(uint16_t *)buf->data;
77
+ buf->data += sizeof(uint16_t);
78
+ return ntohs(short_val);
79
+ }
34
80
 
35
- void check_int(int num) {
36
- char buf[17];
37
- sprintf(buf, "%u", num);
38
- rb_raise(rb_eStandardError, buf);
81
+ static inline uint32_t bert_buf_read32(struct bert_buf *buf)
82
+ {
83
+ /* Note that this will trigger -Wcast-align and throw a
84
+ * bus error on platforms where unaligned reads are not
85
+ * allowed. Also note that this is not breaking any
86
+ * strict aliasing rules. */
87
+ uint32_t long_val = *(uint32_t *)buf->data;
88
+ buf->data += sizeof(uint32_t);
89
+ return ntohl(long_val);
39
90
  }
40
91
 
41
- void check_str(char *str) {
42
- rb_raise(rb_eStandardError, str);
92
+ static inline void bert_buf_ensure(struct bert_buf *buf, size_t size)
93
+ {
94
+ if (buf->data + size > buf->end)
95
+ rb_raise(rb_eEOFError, "Unexpected end of BERT stream");
43
96
  }
44
97
 
45
- // string peekers/readers
98
+ static VALUE bert_read(struct bert_buf *buf)
99
+ {
100
+ uint8_t type;
46
101
 
47
- unsigned int peek_1(unsigned char **pData) {
48
- return (unsigned int) **pData;
49
- }
102
+ bert_buf_ensure(buf, 1);
103
+ type = bert_buf_read8(buf);
50
104
 
51
- unsigned int peek_2(unsigned char **pData) {
52
- return (unsigned int) ((**pData << 8) + *(*pData + 1));
53
- }
105
+ if (!BERT_VALID_TYPE(type))
106
+ rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type);
54
107
 
55
- unsigned int peek_4(unsigned char **pData) {
56
- return (unsigned int) ((**pData << 24) + (*(*pData + 1) << 16) + (*(*pData + 2) << 8) + *(*pData + 3));
108
+ return bert_callbacks[type - BERT_TYPE_OFFSET](buf);
57
109
  }
58
110
 
59
- unsigned int read_1(unsigned char **pData) {
60
- unsigned int val = peek_1(pData);
61
- *pData += 1;
62
- return val;
63
- }
111
+ static VALUE bert_read_dict(struct bert_buf *buf)
112
+ {
113
+ uint8_t type;
114
+ uint32_t length = 0, i;
115
+ VALUE rb_dict;
64
116
 
65
- unsigned int read_2(unsigned char **pData) {
66
- unsigned int val = peek_2(pData);
67
- *pData += 2;
68
- return val;
69
- }
117
+ bert_buf_ensure(buf, 1);
118
+ type = bert_buf_read8(buf);
70
119
 
71
- unsigned int read_4(unsigned char **pData) {
72
- unsigned int val = peek_4(pData);
73
- *pData += 4;
74
- return val;
75
- }
120
+ if (type != ERL_LIST && type != ERL_NIL)
121
+ rb_raise(rb_eTypeError, "Invalid dict spec, not an erlang list");
76
122
 
77
- // tuples
123
+ if (type == ERL_LIST) {
124
+ bert_buf_ensure(buf, 4);
125
+ length = bert_buf_read32(buf);
126
+ }
78
127
 
79
- VALUE read_tuple(unsigned char **pData, unsigned int arity);
128
+ rb_dict = rb_hash_new();
80
129
 
81
- VALUE read_dict_pair(unsigned char **pData) {
82
- if(read_1(pData) != ERL_SMALL_TUPLE) {
83
- rb_raise(rb_eStandardError, "Invalid dict pair, not a small tuple");
84
- }
130
+ for (i = 0; i < length; ++i) {
131
+ VALUE key, val;
132
+ bert_buf_ensure(buf, 2);
85
133
 
86
- int arity = read_1(pData);
134
+ if (bert_buf_read8(buf) != ERL_SMALL_TUPLE || bert_buf_read8(buf) != 2)
135
+ rb_raise(rb_eTypeError, "Invalid dict tuple");
87
136
 
88
- if(arity != 2) {
89
- rb_raise(rb_eStandardError, "Invalid dict pair, not a 2-tuple");
90
- }
137
+ key = bert_read(buf);
138
+ val = bert_read(buf);
91
139
 
92
- return read_tuple(pData, arity);
93
- }
140
+ rb_hash_aset(rb_dict, key, val);
141
+ }
94
142
 
95
- VALUE read_dict(unsigned char **pData) {
96
- int type = read_1(pData);
97
- if(!(type == ERL_LIST || type == ERL_NIL)) {
98
- rb_raise(rb_eStandardError, "Invalid dict spec, not an erlang list");
99
- }
100
-
101
- unsigned int length = 0;
102
- if(type == ERL_LIST) {
103
- length = read_4(pData);
104
- }
105
-
106
- VALUE cHash = rb_const_get(rb_cObject, rb_intern("Hash"));
107
- VALUE hash = rb_funcall(cHash, rb_intern("new"), 0);
108
-
109
- int i;
110
- for(i = 0; i < length; ++i) {
111
- VALUE pair = read_dict_pair(pData);
112
- VALUE first = rb_ary_entry(pair, 0);
113
- VALUE last = rb_ary_entry(pair, 1);
114
- rb_funcall(hash, rb_intern("store"), 2, first, last);
115
- }
116
-
117
- if(type == ERL_LIST) {
118
- read_1(pData);
119
- }
120
-
121
- return hash;
122
- }
143
+ if (type == ERL_LIST) {
144
+ /* disregard tail; adquire women */
145
+ bert_buf_ensure(buf, 1);
146
+ (void)bert_buf_read8(buf);
147
+ }
123
148
 
124
- VALUE read_complex_type(unsigned char **pData, int arity) {
125
- VALUE type = read_any_raw(pData);
126
- ID id = SYM2ID(type);
127
- if(id == rb_intern("nil")) {
128
- return Qnil;
129
- } else if(id == rb_intern("true")) {
130
- return Qtrue;
131
- } else if(id == rb_intern("false")) {
132
- return Qfalse;
133
- } else if(id == rb_intern("time")) {
134
- VALUE megasecs = read_any_raw(pData);
135
- VALUE msecs = rb_funcall(megasecs, rb_intern("*"), 1, INT2NUM(1000000));
136
- VALUE secs = read_any_raw(pData);
137
- VALUE microsecs = read_any_raw(pData);
138
- VALUE stamp = rb_funcall(msecs, rb_intern("+"), 1, secs);
139
- return rb_funcall(rb_cTime, rb_intern("at"), 2, stamp, microsecs);
140
- } else if(id == rb_intern("regex")) {
141
- VALUE source = read_any_raw(pData);
142
- VALUE opts = read_any_raw(pData);
143
- int flags = 0;
144
- if(rb_ary_includes(opts, ID2SYM(rb_intern("caseless"))))
145
- flags = flags | 1;
146
- if(rb_ary_includes(opts, ID2SYM(rb_intern("extended"))))
147
- flags = flags | 2;
148
- if(rb_ary_includes(opts, ID2SYM(rb_intern("multiline"))))
149
- flags = flags | 4;
150
- return rb_funcall(rb_cRegexp, rb_intern("new"), 2, source, INT2NUM(flags));
151
- } else if(id == rb_intern("dict")) {
152
- return read_dict(pData);
153
- } else {
154
- return Qnil;
155
- }
149
+ return rb_dict;
156
150
  }
157
151
 
158
- VALUE read_tuple(unsigned char **pData, unsigned int arity) {
159
- if(arity > 0) {
160
- VALUE tag = read_any_raw(pData);
161
- if(SYM2ID(tag) == rb_intern("bert")) {
162
- return read_complex_type(pData, arity);
163
- } else {
164
- VALUE tuple = rb_funcall(cTuple, rb_intern("new"), 1, INT2NUM(arity));
165
- rb_ary_store(tuple, 0, tag);
166
- int i;
167
- for(i = 1; i < arity; ++i) {
168
- rb_ary_store(tuple, i, read_any_raw(pData));
169
- }
170
- return tuple;
171
- }
172
- } else {
173
- return rb_funcall(cTuple, rb_intern("new"), 0);
174
- }
152
+ static inline void bert_ensure_arity(uint32_t arity, uint32_t expected)
153
+ {
154
+ if (arity != expected)
155
+ rb_raise(rb_eTypeError, "Invalid tuple arity for complex type");
175
156
  }
176
157
 
177
- VALUE read_small_tuple(unsigned char **pData) {
178
- if(read_1(pData) != ERL_SMALL_TUPLE) {
179
- rb_raise(rb_eStandardError, "Invalid Type, not a small tuple");
180
- }
158
+ static VALUE bert_read_complex(struct bert_buf *buf, uint32_t arity)
159
+ {
160
+ VALUE rb_type;
161
+ ID id_type;
181
162
 
182
- int arity = read_1(pData);
183
- return read_tuple(pData, arity);
184
- }
163
+ rb_type = bert_read(buf);
164
+ Check_Type(rb_type, T_SYMBOL);
185
165
 
186
- VALUE read_large_tuple(unsigned char **pData) {
187
- if(read_1(pData) != ERL_LARGE_TUPLE) {
188
- rb_raise(rb_eStandardError, "Invalid Type, not a large tuple");
189
- }
166
+ id_type = SYM2ID(rb_type);
190
167
 
191
- unsigned int arity = read_4(pData);
192
- return read_tuple(pData, arity);
193
- }
168
+ if (id_type == rb_intern("nil")) {
169
+ bert_ensure_arity(arity, 2);
170
+ return Qnil;
194
171
 
195
- // lists
172
+ } else if (id_type == rb_intern("true")) {
173
+ bert_ensure_arity(arity, 2);
174
+ return Qtrue;
196
175
 
197
- VALUE read_list(unsigned char **pData) {
198
- if(read_1(pData) != ERL_LIST) {
199
- rb_raise(rb_eStandardError, "Invalid Type, not an erlang list");
200
- }
176
+ } else if (id_type == rb_intern("false")) {
177
+ bert_ensure_arity(arity, 2);
178
+ return Qfalse;
201
179
 
202
- unsigned int size = read_4(pData);
180
+ } else if (id_type == rb_intern("time")) {
181
+ VALUE rb_megasecs, rb_secs, rb_microsecs, rb_stamp, rb_msecs;
203
182
 
204
- VALUE array = rb_ary_new2(size);
183
+ bert_ensure_arity(arity, 5);
205
184
 
206
- int i;
207
- for(i = 0; i < size; ++i) {
208
- rb_ary_store(array, i, read_any_raw(pData));
209
- }
185
+ rb_megasecs = bert_read(buf);
186
+ rb_secs = bert_read(buf);
187
+ rb_microsecs = bert_read(buf);
210
188
 
211
- read_1(pData);
189
+ rb_msecs = rb_funcall(rb_megasecs, rb_intern("*"), 1, INT2NUM(1000000));
190
+ rb_stamp = rb_funcall(rb_msecs, rb_intern("+"), 1, rb_secs);
212
191
 
213
- return array;
214
- }
192
+ return rb_funcall(rb_cTime, rb_intern("at"), 2, rb_stamp, rb_microsecs);
215
193
 
216
- // primitives
194
+ } else if (id_type == rb_intern("regex")) {
195
+ VALUE rb_source, rb_opts;
196
+ int flags = 0;
197
+
198
+ bert_ensure_arity(arity, 4);
217
199
 
218
- void read_string_raw(unsigned char *dest, unsigned char **pData, unsigned int length) {
219
- memcpy((char *) dest, (char *) *pData, length);
220
- *(dest + length) = (unsigned char) 0;
221
- *pData += length;
222
- }
223
-
224
- VALUE read_bin(unsigned char **pData) {
225
- if(read_1(pData) != ERL_BIN) {
226
- rb_raise(rb_eStandardError, "Invalid Type, not an erlang binary");
227
- }
200
+ rb_source = bert_read(buf);
201
+ rb_opts = bert_read(buf);
228
202
 
229
- unsigned int length = read_4(pData);
203
+ Check_Type(rb_source, T_STRING);
204
+ Check_Type(rb_opts, T_ARRAY);
230
205
 
231
- VALUE rStr = rb_str_new((char *) *pData, length);
232
- *pData += length;
206
+ if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("caseless"))))
207
+ flags = flags | 1;
233
208
 
234
- return rStr;
235
- }
209
+ if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("extended"))))
210
+ flags = flags | 2;
236
211
 
237
- VALUE read_string(unsigned char **pData) {
238
- if(read_1(pData) != ERL_STRING) {
239
- rb_raise(rb_eStandardError, "Invalid Type, not an erlang string");
240
- }
212
+ if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("multiline"))))
213
+ flags = flags | 4;
241
214
 
242
- int length = read_2(pData);
243
- VALUE array = rb_ary_new2(length);
215
+ return rb_funcall(rb_cRegexp, rb_intern("new"), 2, rb_source, INT2NUM(flags));
244
216
 
245
- int i = 0;
246
- for(i; i < length; ++i) {
247
- rb_ary_store(array, i, INT2NUM(**pData));
248
- *pData += 1;
249
- }
217
+ } else if (id_type == rb_intern("dict")) {
218
+ bert_ensure_arity(arity, 3);
219
+ return bert_read_dict(buf);
220
+ }
250
221
 
251
- return array;
222
+ rb_raise(rb_eTypeError, "Invalid tag for complex value");
223
+ return Qnil;
252
224
  }
253
225
 
254
- VALUE read_atom(unsigned char **pData) {
255
- if(read_1(pData) != ERL_ATOM) {
256
- rb_raise(rb_eStandardError, "Invalid Type, not an atom");
257
- }
226
+ static VALUE bert_read_tuple(struct bert_buf *buf, uint32_t arity)
227
+ {
228
+ if (arity > 0) {
229
+ VALUE rb_tag = bert_read(buf);
258
230
 
259
- int length = read_2(pData);
231
+ if (TYPE(rb_tag) == T_SYMBOL && SYM2ID(rb_tag) == rb_intern("bert")) {
232
+ return bert_read_complex(buf, arity);
233
+ } else {
234
+ uint32_t i;
235
+ VALUE rb_tuple;
260
236
 
261
- unsigned char buf[length + 1];
262
- read_string_raw(buf, pData, length);
237
+ rb_tuple = rb_funcall(rb_cTuple, rb_intern("new"), 1, INT2NUM(arity));
238
+ rb_ary_store(rb_tuple, 0, rb_tag);
263
239
 
264
- return ID2SYM(rb_intern((char *) buf));
265
- }
240
+ for(i = 1; i < arity; ++i)
241
+ rb_ary_store(rb_tuple, i, bert_read(buf));
242
+
243
+ return rb_tuple;
244
+ }
245
+ }
266
246
 
267
- VALUE read_small_int(unsigned char **pData) {
268
- if(read_1(pData) != ERL_SMALL_INT) {
269
- rb_raise(rb_eStandardError, "Invalid Type, not a small int");
270
- }
247
+ return rb_funcall(rb_cTuple, rb_intern("new"), 0);
248
+ }
271
249
 
272
- int value = read_1(pData);
250
+ static VALUE bert_read_stuple(struct bert_buf *buf)
251
+ {
252
+ bert_buf_ensure(buf, 1);
253
+ return bert_read_tuple(buf, bert_buf_read8(buf));
254
+ }
273
255
 
274
- return INT2FIX(value);
256
+ static VALUE bert_read_ltuple(struct bert_buf *buf)
257
+ {
258
+ bert_buf_ensure(buf, 4);
259
+ return bert_read_tuple(buf, bert_buf_read32(buf));
275
260
  }
276
261
 
277
- VALUE read_int(unsigned char **pData) {
278
- if(read_1(pData) != ERL_INT) {
279
- rb_raise(rb_eStandardError, "Invalid Type, not an int");
280
- }
262
+ static VALUE bert_read_list(struct bert_buf *buf)
263
+ {
264
+ uint32_t i, length;
265
+ VALUE rb_list;
281
266
 
282
- long long value = read_4(pData);
267
+ bert_buf_ensure(buf, 4);
268
+ length = bert_buf_read32(buf);
269
+ rb_list = rb_ary_new2(length);
283
270
 
284
- long long negative = ((value >> 31) & 0x1 == 1);
271
+ for(i = 0; i < length; ++i)
272
+ rb_ary_store(rb_list, i, bert_read(buf));
285
273
 
286
- if(negative) {
287
- value = (value - ((long long) 1 << 32));
288
- }
274
+ /* disregard tail; adquire currency */
275
+ bert_buf_ensure(buf, 1);
276
+ (void)bert_buf_read8(buf);
289
277
 
290
- return INT2FIX(value);
278
+ return rb_list;
291
279
  }
292
280
 
293
- VALUE read_small_bignum(unsigned char **pData) {
294
- if(read_1(pData) != ERL_SMALL_BIGNUM) {
295
- rb_raise(rb_eStandardError, "Invalid Type, not a small bignum");
296
- }
281
+ static VALUE bert_read_bin(struct bert_buf *buf)
282
+ {
283
+ uint32_t length;
284
+ VALUE rb_bin;
285
+
286
+ bert_buf_ensure(buf, 4);
287
+ length = bert_buf_read32(buf);
297
288
 
298
- unsigned int size = read_1(pData);
299
- unsigned int sign = read_1(pData);
289
+ bert_buf_ensure(buf, length);
290
+ rb_bin = rb_str_new((char *)buf->data, length);
291
+ buf->data += length;
292
+
293
+ return rb_bin;
294
+ }
300
295
 
301
- VALUE num = INT2NUM(0);
302
- VALUE tmp;
296
+ static VALUE bert_read_string(struct bert_buf *buf)
297
+ {
298
+ uint16_t i, length;
299
+ VALUE rb_string;
303
300
 
304
- unsigned char buf[size + 1];
305
- read_string_raw(buf, pData, size);
301
+ bert_buf_ensure(buf, 2);
302
+ length = bert_buf_read16(buf);
306
303
 
307
- int i;
308
- for(i = 0; i < size; ++i) {
309
- tmp = INT2FIX(*(buf + i));
310
- tmp = rb_funcall(tmp, rb_intern("<<"), 1, INT2NUM(i * 8));
311
- num = rb_funcall(num, rb_intern("+"), 1, tmp);
312
- }
304
+ bert_buf_ensure(buf, length);
305
+ rb_string = rb_ary_new2(length);
313
306
 
314
- if(sign) {
315
- num = rb_funcall(num, rb_intern("*"), 1, INT2NUM(-1));
316
- }
307
+ for (i = 0; i < length; ++i)
308
+ rb_ary_store(rb_string, i, INT2FIX(buf->data[i]));
317
309
 
318
- return num;
310
+ buf->data += length;
311
+ return rb_string;
319
312
  }
320
313
 
321
- VALUE read_large_bignum(unsigned char **pData) {
322
- if(read_1(pData) != ERL_LARGE_BIGNUM) {
323
- rb_raise(rb_eStandardError, "Invalid Type, not a small bignum");
324
- }
314
+ static VALUE bert_read_atom(struct bert_buf *buf)
315
+ {
316
+ VALUE rb_atom;
317
+ uint32_t atom_len;
325
318
 
326
- unsigned int size = read_4(pData);
327
- unsigned int sign = read_1(pData);
319
+ bert_buf_ensure(buf, 2);
320
+ atom_len = bert_buf_read16(buf);
328
321
 
329
- VALUE num = INT2NUM(0);
330
- VALUE tmp;
322
+ /* Instead of trying to build the symbol
323
+ * from here, just create a Ruby string
324
+ * and internalize it. this will be faster for
325
+ * unique symbols */
326
+ bert_buf_ensure(buf, atom_len);
327
+ rb_atom = rb_str_new((char *)buf->data, atom_len);
328
+ buf->data += atom_len;
331
329
 
332
- unsigned char buf[size + 1];
333
- read_string_raw(buf, pData, size);
330
+ return rb_str_intern(rb_atom);
331
+ }
332
+
333
+ static VALUE bert_read_sint(struct bert_buf *buf)
334
+ {
335
+ bert_buf_ensure(buf, 1);
336
+ return INT2FIX((uint8_t)bert_buf_read8(buf));
337
+ }
338
+
339
+ static VALUE bert_read_int(struct bert_buf *buf)
340
+ {
341
+ bert_buf_ensure(buf, 4);
342
+ return LONG2NUM((int32_t)bert_buf_read32(buf));
343
+ }
334
344
 
335
- int i;
336
- for(i = 0; i < size; ++i) {
337
- tmp = INT2FIX(*(buf + i));
338
- tmp = rb_funcall(tmp, rb_intern("<<"), 1, INT2NUM(i * 8));
345
+ static VALUE bert_buf_tobignum(struct bert_buf *buf, uint8_t sign, uint32_t bin_digits)
346
+ {
347
+ #ifdef BERT_FAST_BIGNUM
348
+ uint32_t *bin_buf = NULL;
349
+ VALUE rb_num;
350
+ uint32_t round_size;
351
+
352
+ bert_buf_ensure(buf, bin_digits);
353
+
354
+ /* Hack: ensure that we have at least a full word
355
+ * of extra padding for the actual string, so Ruby
356
+ * cannot guess the sign of the bigint from the MSB */
357
+ round_size = 4 + ((bin_digits + 3) & ~3);
358
+ bin_buf = xmalloc(round_size);
359
+
360
+ memcpy(bin_buf, buf->data, bin_digits);
361
+ memset((char *)bin_buf + bin_digits, 0x0, round_size - bin_digits);
362
+
363
+ /* Make Ruby unpack the string internally.
364
+ * this is significantly faster than adding
365
+ * the bytes one by one */
366
+ rb_num = rb_big_unpack(bin_buf, round_size / 4);
367
+
368
+ /* Enfore sign. So fast! */
369
+ RBIGNUM_SET_SIGN(rb_num, !sign);
370
+
371
+ free(bin_buf);
372
+ return rb_num;
373
+ #else
374
+ /**
375
+ * Slower bignum serialization; convert to a base16
376
+ * string and then let ruby parse it internally.
377
+ *
378
+ * We're shipping with this by default because
379
+ * `rb_big_unpack` is not trustworthy
380
+ */
381
+ static const char to_hex[] = "0123456789abcdef";
382
+ char *num_str = NULL, *ptr;
383
+ VALUE rb_num;
384
+ int32_t i;
385
+
386
+ bert_buf_ensure(buf, bin_digits);
387
+
388
+ /* 2 digits per byte + sign + trailing null */
389
+ num_str = ptr = xmalloc((bin_digits * 2) + 2);
390
+
391
+ *ptr++ = sign ? '-' : '+';
392
+
393
+ for (i = (int32_t)bin_digits - 1; i >= 0; --i) {
394
+ uint8_t val = buf->data[i];
395
+ *ptr++ = to_hex[val >> 4];
396
+ *ptr++ = to_hex[val & 0xf];
397
+ }
398
+
399
+ *ptr = 0;
400
+ buf->data += bin_digits;
401
+
402
+ rb_num = rb_cstr_to_inum(num_str, 16, 1);
403
+ free(num_str);
404
+
405
+ return rb_num;
406
+ #endif
407
+ }
408
+
409
+ VALUE bert_read_sbignum(struct bert_buf *buf)
410
+ {
411
+ uint8_t sign, bin_digits;
339
412
 
340
- num = rb_funcall(num, rb_intern("+"), 1, tmp);
341
- }
413
+ bert_buf_ensure(buf, 2);
342
414
 
343
- if(sign) {
344
- num = rb_funcall(num, rb_intern("*"), 1, INT2NUM(-1));
345
- }
415
+ bin_digits = bert_buf_read8(buf);
416
+ sign = bert_buf_read8(buf);
346
417
 
347
- return num;
418
+ return bert_buf_tobignum(buf, sign, (uint32_t)bin_digits);
348
419
  }
349
420
 
350
- VALUE read_float(unsigned char **pData) {
351
- if(read_1(pData) != ERL_FLOAT) {
352
- rb_raise(rb_eStandardError, "Invalid Type, not a float");
353
- }
421
+ VALUE bert_read_lbignum(struct bert_buf *buf)
422
+ {
423
+ uint32_t bin_digits;
424
+ uint8_t sign;
354
425
 
355
- unsigned char buf[32];
356
- read_string_raw(buf, pData, 31);
426
+ bert_buf_ensure(buf, 5);
357
427
 
358
- VALUE rString = rb_str_new2((char *) buf);
428
+ bin_digits = bert_buf_read32(buf);
429
+ sign = bert_buf_read8(buf);
359
430
 
360
- return rb_funcall(rString, rb_intern("to_f"), 0);
431
+ return bert_buf_tobignum(buf, sign, bin_digits);
361
432
  }
362
433
 
363
- VALUE read_nil(unsigned char **pData) {
364
- if(read_1(pData) != ERL_NIL) {
365
- rb_raise(rb_eStandardError, "Invalid Type, not a nil list");
366
- }
434
+ /*
435
+ * -------------------
436
+ * |1 | 31 |
437
+ * |99 | Float String|
438
+ * -------------------
439
+ *
440
+ * A float is stored in string format. the format used in sprintf
441
+ * to format the float is "%.20e" (there are more bytes allocated
442
+ * than necessary). To unpack the float use sscanf with format "%lf".
443
+ */
444
+ static VALUE bert_read_float(struct bert_buf *buf)
445
+ {
446
+ VALUE rb_float;
447
+
448
+ bert_buf_ensure(buf, 31);
449
+
450
+ rb_float = rb_str_new((char *)buf->data, 31);
451
+ buf->data += 31;
452
+
453
+ return rb_funcall(rb_float, rb_intern("to_f"), 0);
454
+ }
367
455
 
368
- return rb_ary_new2(0);
456
+ static VALUE bert_read_nil(struct bert_buf *buf)
457
+ {
458
+ return rb_ary_new2(0);
369
459
  }
370
460
 
371
- // read_any_raw
372
-
373
- VALUE read_any_raw(unsigned char **pData) {
374
- switch(peek_1(pData)) {
375
- case ERL_SMALL_INT:
376
- return read_small_int(pData);
377
- break;
378
- case ERL_INT:
379
- return read_int(pData);
380
- break;
381
- case ERL_FLOAT:
382
- return read_float(pData);
383
- break;
384
- case ERL_ATOM:
385
- return read_atom(pData);
386
- break;
387
- case ERL_SMALL_TUPLE:
388
- return read_small_tuple(pData);
389
- break;
390
- case ERL_LARGE_TUPLE:
391
- return read_large_tuple(pData);
392
- break;
393
- case ERL_NIL:
394
- return read_nil(pData);
395
- break;
396
- case ERL_STRING:
397
- return read_string(pData);
398
- break;
399
- case ERL_LIST:
400
- return read_list(pData);
401
- break;
402
- case ERL_BIN:
403
- return read_bin(pData);
404
- break;
405
- case ERL_SMALL_BIGNUM:
406
- return read_small_bignum(pData);
407
- break;
408
- case ERL_LARGE_BIGNUM:
409
- return read_large_bignum(pData);
410
- break;
411
- }
412
- return Qnil;
461
+ static VALUE bert_read_invalid(struct bert_buf *buf)
462
+ {
463
+ rb_raise(rb_eTypeError, "Invalid object tag in BERT stream");
464
+ return Qnil;
413
465
  }
414
466
 
415
- VALUE method_decode(VALUE klass, VALUE rString) {
416
- unsigned char *data = (unsigned char *) StringValuePtr(rString);
467
+ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string)
468
+ {
469
+ struct bert_buf buf;
417
470
 
418
- unsigned char **pData = &data;
471
+ Check_Type(rb_string, T_STRING);
472
+ buf.data = (uint8_t *)RSTRING_PTR(rb_string);
473
+ buf.end = buf.data + RSTRING_LEN(rb_string);
419
474
 
420
- // check protocol version
421
- if(read_1(pData) != ERL_VERSION) {
422
- rb_raise(rb_eStandardError, "Bad Magic");
423
- }
475
+ bert_buf_ensure(&buf, 1);
424
476
 
425
- return read_any_raw(pData);
477
+ if (bert_buf_read8(&buf) != ERL_VERSION)
478
+ rb_raise(rb_eTypeError, "Invalid magic value for BERT string");
479
+
480
+ return bert_read(&buf);
426
481
  }
427
482
 
428
- VALUE method_impl(VALUE klass) {
429
- return rb_str_new("C", 1);
483
+ static VALUE rb_bert_impl(VALUE klass)
484
+ {
485
+ return rb_str_new("C", 1);
430
486
  }
431
487
 
432
- void Init_decode() {
433
- mBERT = rb_const_get(rb_cObject, rb_intern("BERT"));
434
- cDecode = rb_define_class_under(mBERT, "Decode", rb_cObject);
435
- cTuple = rb_const_get(mBERT, rb_intern("Tuple"));
436
- rb_define_singleton_method(cDecode, "decode", method_decode, 1);
437
- rb_define_singleton_method(cDecode, "impl", method_impl, 0);
488
+ void Init_decode()
489
+ {
490
+ rb_mBERT = rb_const_get(rb_cObject, rb_intern("BERT"));
491
+ rb_cTuple = rb_const_get(rb_mBERT, rb_intern("Tuple"));
492
+
493
+ rb_cDecode = rb_define_class_under(rb_mBERT, "Decode", rb_cObject);
494
+ rb_define_singleton_method(rb_cDecode, "decode", rb_bert_decode, 1);
495
+ rb_define_singleton_method(rb_cDecode, "impl", rb_bert_impl, 0);
438
496
  }
@@ -1,6 +1,9 @@
1
1
  # Loads mkmf which is used to make makefiles for Ruby extensions
2
2
  require 'mkmf'
3
3
 
4
+ # warnings save lives
5
+ $CFLAGS << " -Wall "
6
+
4
7
  # Give it a name
5
8
  extension_name = 'decode'
6
9
 
@@ -8,7 +8,9 @@ module BERT
8
8
  end
9
9
 
10
10
  def self.decode(string)
11
- new(StringIO.new(string)).read_any
11
+ io = StringIO.new(string)
12
+ io.set_encoding('binary') if io.respond_to?(:set_encoding)
13
+ new(io).read_any
12
14
  end
13
15
 
14
16
  def initialize(ins)
@@ -113,7 +115,7 @@ module BERT
113
115
  value = read_4
114
116
  negative = (value >> 31)[0] == 1
115
117
  value = (value - (1 << 32)) if negative
116
- value = Fixnum.induced_from(value)
118
+ value
117
119
  end
118
120
 
119
121
  def read_small_bignum
@@ -126,7 +128,7 @@ module BERT
126
128
  value = (byte * (256 ** index))
127
129
  sign != 0 ? (result - value) : (result + value)
128
130
  end
129
- Bignum.induced_from(added)
131
+ added
130
132
  end
131
133
 
132
134
  def read_large_bignum
@@ -139,7 +141,7 @@ module BERT
139
141
  value = (byte * (256 ** index))
140
142
  sign != 0 ? (result - value) : (result + value)
141
143
  end
142
- Bignum.induced_from(added)
144
+ added
143
145
  end
144
146
 
145
147
  def read_float
@@ -10,6 +10,7 @@ module BERT
10
10
 
11
11
  def self.encode(data)
12
12
  io = StringIO.new
13
+ io.set_encoding('binary') if io.respond_to?(:set_encoding)
13
14
  self.new(io).write_any(data)
14
15
  io.string
15
16
  end
@@ -56,7 +57,7 @@ module BERT
56
57
  fail(sym) unless sym.is_a?(Symbol)
57
58
  data = sym.to_s
58
59
  write_1 ATOM
59
- write_2 data.length
60
+ write_2 data.bytesize
60
61
  write_string data
61
62
  end
62
63
 
@@ -78,7 +79,7 @@ module BERT
78
79
  end
79
80
 
80
81
  def write_bignum(num)
81
- n = (num.to_s(2).size / 8.0).ceil
82
+ n = (num.abs.to_s(2).size / 8.0).ceil
82
83
  if n < 256
83
84
  write_1 SMALL_BIGNUM
84
85
  write_1 n
@@ -125,7 +126,7 @@ module BERT
125
126
 
126
127
  def write_binary(data)
127
128
  write_1 BIN
128
- write_4 data.length
129
+ write_4 data.bytesize
129
130
  write_string data
130
131
  end
131
132
 
@@ -44,7 +44,8 @@ class BertTest < Test::Unit::TestCase
44
44
  dd << {:a => 1, :b => 2}
45
45
  dd << Time.now
46
46
  dd << /^c(a)t$/i
47
-
47
+
48
+ dd << 178
48
49
  dd << 256**256 - 1
49
50
 
50
51
  dd << :true
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'test_helper'
2
4
 
3
5
  class EncoderTest < Test::Unit::TestCase
@@ -79,9 +81,22 @@ class EncoderTest < Test::Unit::TestCase
79
81
  assert cruby[1].instance_of?(BERT::Tuple)
80
82
  end
81
83
 
84
+ should 'handle utf8 strings' do
85
+ bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
86
+ assert_equal bert, BERT::Encoder.encode("été")
87
+ end
88
+
89
+ should 'handle utf8 symbols' do
90
+ bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
91
+ assert_equal bert, BERT::Encoder.encode(:'été')
92
+ end
93
+
82
94
  should "handle bignums" do
83
95
  bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
84
96
  assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000)
97
+
98
+ bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
99
+ assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
85
100
  end
86
101
 
87
102
  should "leave other stuff alone" do
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bert
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ hash: 31
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 1
9
+ - 6
10
+ version: 1.1.6
5
11
  platform: ruby
6
12
  authors:
7
13
  - Tom Preston-Werner
@@ -14,14 +20,18 @@ default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: thoughtbot-shoulda
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
23
32
  version: "0"
24
- version:
33
+ type: :development
34
+ version_requirements: *id001
25
35
  description: BERT Serializiation for Ruby
26
36
  email: tom@mojombo.com
27
37
  executables: []
@@ -69,21 +79,27 @@ require_paths:
69
79
  - lib
70
80
  - ext
71
81
  required_ruby_version: !ruby/object:Gem::Requirement
82
+ none: false
72
83
  requirements:
73
84
  - - ">="
74
85
  - !ruby/object:Gem::Version
86
+ hash: 3
87
+ segments:
88
+ - 0
75
89
  version: "0"
76
- version:
77
90
  required_rubygems_version: !ruby/object:Gem::Requirement
91
+ none: false
78
92
  requirements:
79
93
  - - ">="
80
94
  - !ruby/object:Gem::Version
95
+ hash: 3
96
+ segments:
97
+ - 0
81
98
  version: "0"
82
- version:
83
99
  requirements: []
84
100
 
85
101
  rubyforge_project:
86
- rubygems_version: 1.3.5
102
+ rubygems_version: 1.6.2
87
103
  signing_key:
88
104
  specification_version: 3
89
105
  summary: BERT Serializiation for Ruby