bert 1.1.2 → 1.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,13 @@
1
+ = 1.1.6 / 2012-05-25
2
+ * Bug fixes
3
+ * Better handling of utf-8 characters
4
+
5
+ = 1.1.5 / 2011-12-09
6
+ * Bug fixes
7
+ * Faster and more secure C BERT decoder
8
+ * Fix for encoding of negative bignums
9
+ * Ruby 1.9 compatibility
10
+
1
11
  = 1.1.2 / 2010-02-08
2
12
  * Bug fixes
3
13
  * Fix bignum handling on 256 byte boundary
@@ -28,4 +38,4 @@
28
38
  * Add roundtrip tests
29
39
 
30
40
  = 0.1.0 / 2009-10-08
31
- * Birthday!
41
+ * Birthday!
data/README.md CHANGED
@@ -49,9 +49,9 @@ Usage
49
49
  require 'bert'
50
50
 
51
51
  bert = BERT.encode(t[:user, {:name => 'TPW', :nick => 'mojombo'}])
52
- # => "\203h\002d\000\004userh\002d\000\004dictl\000\000\000\002h\002d
53
- \000\004namem\000\000\000\003TPWh\002d\000\004nickm\000\000\000
54
- \amojomboj"
52
+ # => "\203h\002d\000\004userh\003d\000\004bertd\000\004dictl\000\000\
53
+ 000\002h\002d\000\004namem\000\000\000\003TPWh\002d\000\004nickm\
54
+ 000\000\000\amojomboj"
55
55
 
56
56
  BERT.decode(bert)
57
57
  # => t[:user, {:name=>"TPW", :nick=>"mojombo"}]
data/Rakefile CHANGED
@@ -11,9 +11,14 @@ begin
11
11
  gem.homepage = "http://github.com/mojombo/bert"
12
12
  gem.authors = ["Tom Preston-Werner"]
13
13
  gem.add_development_dependency("thoughtbot-shoulda")
14
- gem.require_paths = ["lib", "ext"]
15
- gem.files.include("ext")
16
- gem.extensions << 'ext/bert/c/extconf.rb'
14
+ if ENV["JAVA"]
15
+ gem.extensions = nil
16
+ gem.platform = 'java'
17
+ else
18
+ gem.require_paths = ["lib", "ext"]
19
+ gem.files.include("ext")
20
+ gem.extensions << 'ext/bert/c/extconf.rb'
21
+ end
17
22
  # gem is a Gem::Specification... see http://www.rubygems.org/read/chapter/20 for additional settings
18
23
  end
19
24
  rescue LoadError
data/VERSION CHANGED
@@ -1 +1 @@
1
- 1.1.2
1
+ 1.1.6
@@ -5,7 +5,7 @@
5
5
 
6
6
  Gem::Specification.new do |s|
7
7
  s.name = %q{bert}
8
- s.version = "1.1.2"
8
+ s.version = "1.1.6"
9
9
 
10
10
  s.required_rubygems_version = Gem::Requirement.new(">= 0") if s.respond_to? :required_rubygems_version=
11
11
  s.authors = ["Tom Preston-Werner"]
@@ -1,11 +1,9 @@
1
1
  #include "ruby.h"
2
- #include <string.h>
2
+ #include <stdint.h>
3
+ #include <netinet/in.h>
3
4
 
4
- #define ERL_VERSION 131
5
5
  #define ERL_SMALL_INT 97
6
6
  #define ERL_INT 98
7
- #define ERL_SMALL_BIGNUM 110
8
- #define ERL_LARGE_BIGNUM 111
9
7
  #define ERL_FLOAT 99
10
8
  #define ERL_ATOM 100
11
9
  #define ERL_SMALL_TUPLE 104
@@ -14,425 +12,485 @@
14
12
  #define ERL_STRING 107
15
13
  #define ERL_LIST 108
16
14
  #define ERL_BIN 109
15
+ #define ERL_SMALL_BIGNUM 110
16
+ #define ERL_LARGE_BIGNUM 111
17
+ #define ERL_VERSION 131
17
18
 
18
- static VALUE mBERT;
19
- static VALUE cDecode;
20
- static VALUE cTuple;
21
- void Init_decode();
22
-
23
- VALUE method_decode(VALUE klass, VALUE rString);
24
-
25
- VALUE read_any_raw(unsigned char **pData);
26
-
27
- // printers
28
-
29
- void p(VALUE val) {
30
- rb_funcall(rb_mKernel, rb_intern("p"), 1, val);
19
+ #define BERT_VALID_TYPE(t) ((t) >= ERL_SMALL_INT && (t) <= ERL_LARGE_BIGNUM)
20
+ #define BERT_TYPE_OFFSET (ERL_SMALL_INT)
21
+
22
+ static VALUE rb_mBERT;
23
+ static VALUE rb_cDecode;
24
+ static VALUE rb_cTuple;
25
+
26
+ struct bert_buf {
27
+ const uint8_t *data;
28
+ const uint8_t *end;
29
+ };
30
+
31
+ static VALUE bert_read_invalid(struct bert_buf *buf);
32
+
33
+ static VALUE bert_read_sint(struct bert_buf *buf);
34
+ static VALUE bert_read_int(struct bert_buf *buf);
35
+ static VALUE bert_read_float(struct bert_buf *buf);
36
+ static VALUE bert_read_atom(struct bert_buf *buf);
37
+ static VALUE bert_read_stuple(struct bert_buf *buf);
38
+ static VALUE bert_read_ltuple(struct bert_buf *buf);
39
+ static VALUE bert_read_nil(struct bert_buf *buf);
40
+ static VALUE bert_read_string(struct bert_buf *buf);
41
+ static VALUE bert_read_list(struct bert_buf *buf);
42
+ static VALUE bert_read_bin(struct bert_buf *buf);
43
+ static VALUE bert_read_sbignum(struct bert_buf *buf);
44
+ static VALUE bert_read_lbignum(struct bert_buf *buf);
45
+
46
+ typedef VALUE (*bert_ptr)(struct bert_buf *buf);
47
+ static bert_ptr bert_callbacks[] = {
48
+ &bert_read_sint,
49
+ &bert_read_int,
50
+ &bert_read_float,
51
+ &bert_read_atom,
52
+ &bert_read_invalid,
53
+ &bert_read_invalid,
54
+ &bert_read_invalid,
55
+ &bert_read_stuple,
56
+ &bert_read_ltuple,
57
+ &bert_read_nil,
58
+ &bert_read_string,
59
+ &bert_read_list,
60
+ &bert_read_bin,
61
+ &bert_read_sbignum,
62
+ &bert_read_lbignum
63
+ };
64
+
65
+ static inline uint8_t bert_buf_read8(struct bert_buf *buf)
66
+ {
67
+ return *buf->data++;
31
68
  }
32
69
 
33
- // checkers
70
+ static inline uint16_t bert_buf_read16(struct bert_buf *buf)
71
+ {
72
+ /* Note that this will trigger -Wcast-align and throw a
73
+ * bus error on platforms where unaligned reads are not
74
+ * allowed. Also note that this is not breaking any
75
+ * strict aliasing rules. */
76
+ uint16_t short_val = *(uint16_t *)buf->data;
77
+ buf->data += sizeof(uint16_t);
78
+ return ntohs(short_val);
79
+ }
34
80
 
35
- void check_int(int num) {
36
- char buf[17];
37
- sprintf(buf, "%u", num);
38
- rb_raise(rb_eStandardError, buf);
81
+ static inline uint32_t bert_buf_read32(struct bert_buf *buf)
82
+ {
83
+ /* Note that this will trigger -Wcast-align and throw a
84
+ * bus error on platforms where unaligned reads are not
85
+ * allowed. Also note that this is not breaking any
86
+ * strict aliasing rules. */
87
+ uint32_t long_val = *(uint32_t *)buf->data;
88
+ buf->data += sizeof(uint32_t);
89
+ return ntohl(long_val);
39
90
  }
40
91
 
41
- void check_str(char *str) {
42
- rb_raise(rb_eStandardError, str);
92
+ static inline void bert_buf_ensure(struct bert_buf *buf, size_t size)
93
+ {
94
+ if (buf->data + size > buf->end)
95
+ rb_raise(rb_eEOFError, "Unexpected end of BERT stream");
43
96
  }
44
97
 
45
- // string peekers/readers
98
+ static VALUE bert_read(struct bert_buf *buf)
99
+ {
100
+ uint8_t type;
46
101
 
47
- unsigned int peek_1(unsigned char **pData) {
48
- return (unsigned int) **pData;
49
- }
102
+ bert_buf_ensure(buf, 1);
103
+ type = bert_buf_read8(buf);
50
104
 
51
- unsigned int peek_2(unsigned char **pData) {
52
- return (unsigned int) ((**pData << 8) + *(*pData + 1));
53
- }
105
+ if (!BERT_VALID_TYPE(type))
106
+ rb_raise(rb_eRuntimeError, "Invalid tag '%d' for term", type);
54
107
 
55
- unsigned int peek_4(unsigned char **pData) {
56
- return (unsigned int) ((**pData << 24) + (*(*pData + 1) << 16) + (*(*pData + 2) << 8) + *(*pData + 3));
108
+ return bert_callbacks[type - BERT_TYPE_OFFSET](buf);
57
109
  }
58
110
 
59
- unsigned int read_1(unsigned char **pData) {
60
- unsigned int val = peek_1(pData);
61
- *pData += 1;
62
- return val;
63
- }
111
+ static VALUE bert_read_dict(struct bert_buf *buf)
112
+ {
113
+ uint8_t type;
114
+ uint32_t length = 0, i;
115
+ VALUE rb_dict;
64
116
 
65
- unsigned int read_2(unsigned char **pData) {
66
- unsigned int val = peek_2(pData);
67
- *pData += 2;
68
- return val;
69
- }
117
+ bert_buf_ensure(buf, 1);
118
+ type = bert_buf_read8(buf);
70
119
 
71
- unsigned int read_4(unsigned char **pData) {
72
- unsigned int val = peek_4(pData);
73
- *pData += 4;
74
- return val;
75
- }
120
+ if (type != ERL_LIST && type != ERL_NIL)
121
+ rb_raise(rb_eTypeError, "Invalid dict spec, not an erlang list");
76
122
 
77
- // tuples
123
+ if (type == ERL_LIST) {
124
+ bert_buf_ensure(buf, 4);
125
+ length = bert_buf_read32(buf);
126
+ }
78
127
 
79
- VALUE read_tuple(unsigned char **pData, unsigned int arity);
128
+ rb_dict = rb_hash_new();
80
129
 
81
- VALUE read_dict_pair(unsigned char **pData) {
82
- if(read_1(pData) != ERL_SMALL_TUPLE) {
83
- rb_raise(rb_eStandardError, "Invalid dict pair, not a small tuple");
84
- }
130
+ for (i = 0; i < length; ++i) {
131
+ VALUE key, val;
132
+ bert_buf_ensure(buf, 2);
85
133
 
86
- int arity = read_1(pData);
134
+ if (bert_buf_read8(buf) != ERL_SMALL_TUPLE || bert_buf_read8(buf) != 2)
135
+ rb_raise(rb_eTypeError, "Invalid dict tuple");
87
136
 
88
- if(arity != 2) {
89
- rb_raise(rb_eStandardError, "Invalid dict pair, not a 2-tuple");
90
- }
137
+ key = bert_read(buf);
138
+ val = bert_read(buf);
91
139
 
92
- return read_tuple(pData, arity);
93
- }
140
+ rb_hash_aset(rb_dict, key, val);
141
+ }
94
142
 
95
- VALUE read_dict(unsigned char **pData) {
96
- int type = read_1(pData);
97
- if(!(type == ERL_LIST || type == ERL_NIL)) {
98
- rb_raise(rb_eStandardError, "Invalid dict spec, not an erlang list");
99
- }
100
-
101
- unsigned int length = 0;
102
- if(type == ERL_LIST) {
103
- length = read_4(pData);
104
- }
105
-
106
- VALUE cHash = rb_const_get(rb_cObject, rb_intern("Hash"));
107
- VALUE hash = rb_funcall(cHash, rb_intern("new"), 0);
108
-
109
- int i;
110
- for(i = 0; i < length; ++i) {
111
- VALUE pair = read_dict_pair(pData);
112
- VALUE first = rb_ary_entry(pair, 0);
113
- VALUE last = rb_ary_entry(pair, 1);
114
- rb_funcall(hash, rb_intern("store"), 2, first, last);
115
- }
116
-
117
- if(type == ERL_LIST) {
118
- read_1(pData);
119
- }
120
-
121
- return hash;
122
- }
143
+ if (type == ERL_LIST) {
144
+ /* disregard tail; adquire women */
145
+ bert_buf_ensure(buf, 1);
146
+ (void)bert_buf_read8(buf);
147
+ }
123
148
 
124
- VALUE read_complex_type(unsigned char **pData, int arity) {
125
- VALUE type = read_any_raw(pData);
126
- ID id = SYM2ID(type);
127
- if(id == rb_intern("nil")) {
128
- return Qnil;
129
- } else if(id == rb_intern("true")) {
130
- return Qtrue;
131
- } else if(id == rb_intern("false")) {
132
- return Qfalse;
133
- } else if(id == rb_intern("time")) {
134
- VALUE megasecs = read_any_raw(pData);
135
- VALUE msecs = rb_funcall(megasecs, rb_intern("*"), 1, INT2NUM(1000000));
136
- VALUE secs = read_any_raw(pData);
137
- VALUE microsecs = read_any_raw(pData);
138
- VALUE stamp = rb_funcall(msecs, rb_intern("+"), 1, secs);
139
- return rb_funcall(rb_cTime, rb_intern("at"), 2, stamp, microsecs);
140
- } else if(id == rb_intern("regex")) {
141
- VALUE source = read_any_raw(pData);
142
- VALUE opts = read_any_raw(pData);
143
- int flags = 0;
144
- if(rb_ary_includes(opts, ID2SYM(rb_intern("caseless"))))
145
- flags = flags | 1;
146
- if(rb_ary_includes(opts, ID2SYM(rb_intern("extended"))))
147
- flags = flags | 2;
148
- if(rb_ary_includes(opts, ID2SYM(rb_intern("multiline"))))
149
- flags = flags | 4;
150
- return rb_funcall(rb_cRegexp, rb_intern("new"), 2, source, INT2NUM(flags));
151
- } else if(id == rb_intern("dict")) {
152
- return read_dict(pData);
153
- } else {
154
- return Qnil;
155
- }
149
+ return rb_dict;
156
150
  }
157
151
 
158
- VALUE read_tuple(unsigned char **pData, unsigned int arity) {
159
- if(arity > 0) {
160
- VALUE tag = read_any_raw(pData);
161
- if(SYM2ID(tag) == rb_intern("bert")) {
162
- return read_complex_type(pData, arity);
163
- } else {
164
- VALUE tuple = rb_funcall(cTuple, rb_intern("new"), 1, INT2NUM(arity));
165
- rb_ary_store(tuple, 0, tag);
166
- int i;
167
- for(i = 1; i < arity; ++i) {
168
- rb_ary_store(tuple, i, read_any_raw(pData));
169
- }
170
- return tuple;
171
- }
172
- } else {
173
- return rb_funcall(cTuple, rb_intern("new"), 0);
174
- }
152
+ static inline void bert_ensure_arity(uint32_t arity, uint32_t expected)
153
+ {
154
+ if (arity != expected)
155
+ rb_raise(rb_eTypeError, "Invalid tuple arity for complex type");
175
156
  }
176
157
 
177
- VALUE read_small_tuple(unsigned char **pData) {
178
- if(read_1(pData) != ERL_SMALL_TUPLE) {
179
- rb_raise(rb_eStandardError, "Invalid Type, not a small tuple");
180
- }
158
+ static VALUE bert_read_complex(struct bert_buf *buf, uint32_t arity)
159
+ {
160
+ VALUE rb_type;
161
+ ID id_type;
181
162
 
182
- int arity = read_1(pData);
183
- return read_tuple(pData, arity);
184
- }
163
+ rb_type = bert_read(buf);
164
+ Check_Type(rb_type, T_SYMBOL);
185
165
 
186
- VALUE read_large_tuple(unsigned char **pData) {
187
- if(read_1(pData) != ERL_LARGE_TUPLE) {
188
- rb_raise(rb_eStandardError, "Invalid Type, not a large tuple");
189
- }
166
+ id_type = SYM2ID(rb_type);
190
167
 
191
- unsigned int arity = read_4(pData);
192
- return read_tuple(pData, arity);
193
- }
168
+ if (id_type == rb_intern("nil")) {
169
+ bert_ensure_arity(arity, 2);
170
+ return Qnil;
194
171
 
195
- // lists
172
+ } else if (id_type == rb_intern("true")) {
173
+ bert_ensure_arity(arity, 2);
174
+ return Qtrue;
196
175
 
197
- VALUE read_list(unsigned char **pData) {
198
- if(read_1(pData) != ERL_LIST) {
199
- rb_raise(rb_eStandardError, "Invalid Type, not an erlang list");
200
- }
176
+ } else if (id_type == rb_intern("false")) {
177
+ bert_ensure_arity(arity, 2);
178
+ return Qfalse;
201
179
 
202
- unsigned int size = read_4(pData);
180
+ } else if (id_type == rb_intern("time")) {
181
+ VALUE rb_megasecs, rb_secs, rb_microsecs, rb_stamp, rb_msecs;
203
182
 
204
- VALUE array = rb_ary_new2(size);
183
+ bert_ensure_arity(arity, 5);
205
184
 
206
- int i;
207
- for(i = 0; i < size; ++i) {
208
- rb_ary_store(array, i, read_any_raw(pData));
209
- }
185
+ rb_megasecs = bert_read(buf);
186
+ rb_secs = bert_read(buf);
187
+ rb_microsecs = bert_read(buf);
210
188
 
211
- read_1(pData);
189
+ rb_msecs = rb_funcall(rb_megasecs, rb_intern("*"), 1, INT2NUM(1000000));
190
+ rb_stamp = rb_funcall(rb_msecs, rb_intern("+"), 1, rb_secs);
212
191
 
213
- return array;
214
- }
192
+ return rb_funcall(rb_cTime, rb_intern("at"), 2, rb_stamp, rb_microsecs);
215
193
 
216
- // primitives
194
+ } else if (id_type == rb_intern("regex")) {
195
+ VALUE rb_source, rb_opts;
196
+ int flags = 0;
197
+
198
+ bert_ensure_arity(arity, 4);
217
199
 
218
- void read_string_raw(unsigned char *dest, unsigned char **pData, unsigned int length) {
219
- memcpy((char *) dest, (char *) *pData, length);
220
- *(dest + length) = (unsigned char) 0;
221
- *pData += length;
222
- }
223
-
224
- VALUE read_bin(unsigned char **pData) {
225
- if(read_1(pData) != ERL_BIN) {
226
- rb_raise(rb_eStandardError, "Invalid Type, not an erlang binary");
227
- }
200
+ rb_source = bert_read(buf);
201
+ rb_opts = bert_read(buf);
228
202
 
229
- unsigned int length = read_4(pData);
203
+ Check_Type(rb_source, T_STRING);
204
+ Check_Type(rb_opts, T_ARRAY);
230
205
 
231
- VALUE rStr = rb_str_new((char *) *pData, length);
232
- *pData += length;
206
+ if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("caseless"))))
207
+ flags = flags | 1;
233
208
 
234
- return rStr;
235
- }
209
+ if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("extended"))))
210
+ flags = flags | 2;
236
211
 
237
- VALUE read_string(unsigned char **pData) {
238
- if(read_1(pData) != ERL_STRING) {
239
- rb_raise(rb_eStandardError, "Invalid Type, not an erlang string");
240
- }
212
+ if (rb_ary_includes(rb_opts, ID2SYM(rb_intern("multiline"))))
213
+ flags = flags | 4;
241
214
 
242
- int length = read_2(pData);
243
- VALUE array = rb_ary_new2(length);
215
+ return rb_funcall(rb_cRegexp, rb_intern("new"), 2, rb_source, INT2NUM(flags));
244
216
 
245
- int i = 0;
246
- for(i; i < length; ++i) {
247
- rb_ary_store(array, i, INT2NUM(**pData));
248
- *pData += 1;
249
- }
217
+ } else if (id_type == rb_intern("dict")) {
218
+ bert_ensure_arity(arity, 3);
219
+ return bert_read_dict(buf);
220
+ }
250
221
 
251
- return array;
222
+ rb_raise(rb_eTypeError, "Invalid tag for complex value");
223
+ return Qnil;
252
224
  }
253
225
 
254
- VALUE read_atom(unsigned char **pData) {
255
- if(read_1(pData) != ERL_ATOM) {
256
- rb_raise(rb_eStandardError, "Invalid Type, not an atom");
257
- }
226
+ static VALUE bert_read_tuple(struct bert_buf *buf, uint32_t arity)
227
+ {
228
+ if (arity > 0) {
229
+ VALUE rb_tag = bert_read(buf);
258
230
 
259
- int length = read_2(pData);
231
+ if (TYPE(rb_tag) == T_SYMBOL && SYM2ID(rb_tag) == rb_intern("bert")) {
232
+ return bert_read_complex(buf, arity);
233
+ } else {
234
+ uint32_t i;
235
+ VALUE rb_tuple;
260
236
 
261
- unsigned char buf[length + 1];
262
- read_string_raw(buf, pData, length);
237
+ rb_tuple = rb_funcall(rb_cTuple, rb_intern("new"), 1, INT2NUM(arity));
238
+ rb_ary_store(rb_tuple, 0, rb_tag);
263
239
 
264
- return ID2SYM(rb_intern((char *) buf));
265
- }
240
+ for(i = 1; i < arity; ++i)
241
+ rb_ary_store(rb_tuple, i, bert_read(buf));
242
+
243
+ return rb_tuple;
244
+ }
245
+ }
266
246
 
267
- VALUE read_small_int(unsigned char **pData) {
268
- if(read_1(pData) != ERL_SMALL_INT) {
269
- rb_raise(rb_eStandardError, "Invalid Type, not a small int");
270
- }
247
+ return rb_funcall(rb_cTuple, rb_intern("new"), 0);
248
+ }
271
249
 
272
- int value = read_1(pData);
250
+ static VALUE bert_read_stuple(struct bert_buf *buf)
251
+ {
252
+ bert_buf_ensure(buf, 1);
253
+ return bert_read_tuple(buf, bert_buf_read8(buf));
254
+ }
273
255
 
274
- return INT2FIX(value);
256
+ static VALUE bert_read_ltuple(struct bert_buf *buf)
257
+ {
258
+ bert_buf_ensure(buf, 4);
259
+ return bert_read_tuple(buf, bert_buf_read32(buf));
275
260
  }
276
261
 
277
- VALUE read_int(unsigned char **pData) {
278
- if(read_1(pData) != ERL_INT) {
279
- rb_raise(rb_eStandardError, "Invalid Type, not an int");
280
- }
262
+ static VALUE bert_read_list(struct bert_buf *buf)
263
+ {
264
+ uint32_t i, length;
265
+ VALUE rb_list;
281
266
 
282
- long long value = read_4(pData);
267
+ bert_buf_ensure(buf, 4);
268
+ length = bert_buf_read32(buf);
269
+ rb_list = rb_ary_new2(length);
283
270
 
284
- long long negative = ((value >> 31) & 0x1 == 1);
271
+ for(i = 0; i < length; ++i)
272
+ rb_ary_store(rb_list, i, bert_read(buf));
285
273
 
286
- if(negative) {
287
- value = (value - ((long long) 1 << 32));
288
- }
274
+ /* disregard tail; adquire currency */
275
+ bert_buf_ensure(buf, 1);
276
+ (void)bert_buf_read8(buf);
289
277
 
290
- return INT2FIX(value);
278
+ return rb_list;
291
279
  }
292
280
 
293
- VALUE read_small_bignum(unsigned char **pData) {
294
- if(read_1(pData) != ERL_SMALL_BIGNUM) {
295
- rb_raise(rb_eStandardError, "Invalid Type, not a small bignum");
296
- }
281
+ static VALUE bert_read_bin(struct bert_buf *buf)
282
+ {
283
+ uint32_t length;
284
+ VALUE rb_bin;
285
+
286
+ bert_buf_ensure(buf, 4);
287
+ length = bert_buf_read32(buf);
297
288
 
298
- unsigned int size = read_1(pData);
299
- unsigned int sign = read_1(pData);
289
+ bert_buf_ensure(buf, length);
290
+ rb_bin = rb_str_new((char *)buf->data, length);
291
+ buf->data += length;
292
+
293
+ return rb_bin;
294
+ }
300
295
 
301
- VALUE num = INT2NUM(0);
302
- VALUE tmp;
296
+ static VALUE bert_read_string(struct bert_buf *buf)
297
+ {
298
+ uint16_t i, length;
299
+ VALUE rb_string;
303
300
 
304
- unsigned char buf[size + 1];
305
- read_string_raw(buf, pData, size);
301
+ bert_buf_ensure(buf, 2);
302
+ length = bert_buf_read16(buf);
306
303
 
307
- int i;
308
- for(i = 0; i < size; ++i) {
309
- tmp = INT2FIX(*(buf + i));
310
- tmp = rb_funcall(tmp, rb_intern("<<"), 1, INT2NUM(i * 8));
311
- num = rb_funcall(num, rb_intern("+"), 1, tmp);
312
- }
304
+ bert_buf_ensure(buf, length);
305
+ rb_string = rb_ary_new2(length);
313
306
 
314
- if(sign) {
315
- num = rb_funcall(num, rb_intern("*"), 1, INT2NUM(-1));
316
- }
307
+ for (i = 0; i < length; ++i)
308
+ rb_ary_store(rb_string, i, INT2FIX(buf->data[i]));
317
309
 
318
- return num;
310
+ buf->data += length;
311
+ return rb_string;
319
312
  }
320
313
 
321
- VALUE read_large_bignum(unsigned char **pData) {
322
- if(read_1(pData) != ERL_LARGE_BIGNUM) {
323
- rb_raise(rb_eStandardError, "Invalid Type, not a small bignum");
324
- }
314
+ static VALUE bert_read_atom(struct bert_buf *buf)
315
+ {
316
+ VALUE rb_atom;
317
+ uint32_t atom_len;
325
318
 
326
- unsigned int size = read_4(pData);
327
- unsigned int sign = read_1(pData);
319
+ bert_buf_ensure(buf, 2);
320
+ atom_len = bert_buf_read16(buf);
328
321
 
329
- VALUE num = INT2NUM(0);
330
- VALUE tmp;
322
+ /* Instead of trying to build the symbol
323
+ * from here, just create a Ruby string
324
+ * and internalize it. this will be faster for
325
+ * unique symbols */
326
+ bert_buf_ensure(buf, atom_len);
327
+ rb_atom = rb_str_new((char *)buf->data, atom_len);
328
+ buf->data += atom_len;
331
329
 
332
- unsigned char buf[size + 1];
333
- read_string_raw(buf, pData, size);
330
+ return rb_str_intern(rb_atom);
331
+ }
332
+
333
+ static VALUE bert_read_sint(struct bert_buf *buf)
334
+ {
335
+ bert_buf_ensure(buf, 1);
336
+ return INT2FIX((uint8_t)bert_buf_read8(buf));
337
+ }
338
+
339
+ static VALUE bert_read_int(struct bert_buf *buf)
340
+ {
341
+ bert_buf_ensure(buf, 4);
342
+ return LONG2NUM((int32_t)bert_buf_read32(buf));
343
+ }
334
344
 
335
- int i;
336
- for(i = 0; i < size; ++i) {
337
- tmp = INT2FIX(*(buf + i));
338
- tmp = rb_funcall(tmp, rb_intern("<<"), 1, INT2NUM(i * 8));
345
+ static VALUE bert_buf_tobignum(struct bert_buf *buf, uint8_t sign, uint32_t bin_digits)
346
+ {
347
+ #ifdef BERT_FAST_BIGNUM
348
+ uint32_t *bin_buf = NULL;
349
+ VALUE rb_num;
350
+ uint32_t round_size;
351
+
352
+ bert_buf_ensure(buf, bin_digits);
353
+
354
+ /* Hack: ensure that we have at least a full word
355
+ * of extra padding for the actual string, so Ruby
356
+ * cannot guess the sign of the bigint from the MSB */
357
+ round_size = 4 + ((bin_digits + 3) & ~3);
358
+ bin_buf = xmalloc(round_size);
359
+
360
+ memcpy(bin_buf, buf->data, bin_digits);
361
+ memset((char *)bin_buf + bin_digits, 0x0, round_size - bin_digits);
362
+
363
+ /* Make Ruby unpack the string internally.
364
+ * this is significantly faster than adding
365
+ * the bytes one by one */
366
+ rb_num = rb_big_unpack(bin_buf, round_size / 4);
367
+
368
+ /* Enfore sign. So fast! */
369
+ RBIGNUM_SET_SIGN(rb_num, !sign);
370
+
371
+ free(bin_buf);
372
+ return rb_num;
373
+ #else
374
+ /**
375
+ * Slower bignum serialization; convert to a base16
376
+ * string and then let ruby parse it internally.
377
+ *
378
+ * We're shipping with this by default because
379
+ * `rb_big_unpack` is not trustworthy
380
+ */
381
+ static const char to_hex[] = "0123456789abcdef";
382
+ char *num_str = NULL, *ptr;
383
+ VALUE rb_num;
384
+ int32_t i;
385
+
386
+ bert_buf_ensure(buf, bin_digits);
387
+
388
+ /* 2 digits per byte + sign + trailing null */
389
+ num_str = ptr = xmalloc((bin_digits * 2) + 2);
390
+
391
+ *ptr++ = sign ? '-' : '+';
392
+
393
+ for (i = (int32_t)bin_digits - 1; i >= 0; --i) {
394
+ uint8_t val = buf->data[i];
395
+ *ptr++ = to_hex[val >> 4];
396
+ *ptr++ = to_hex[val & 0xf];
397
+ }
398
+
399
+ *ptr = 0;
400
+ buf->data += bin_digits;
401
+
402
+ rb_num = rb_cstr_to_inum(num_str, 16, 1);
403
+ free(num_str);
404
+
405
+ return rb_num;
406
+ #endif
407
+ }
408
+
409
+ VALUE bert_read_sbignum(struct bert_buf *buf)
410
+ {
411
+ uint8_t sign, bin_digits;
339
412
 
340
- num = rb_funcall(num, rb_intern("+"), 1, tmp);
341
- }
413
+ bert_buf_ensure(buf, 2);
342
414
 
343
- if(sign) {
344
- num = rb_funcall(num, rb_intern("*"), 1, INT2NUM(-1));
345
- }
415
+ bin_digits = bert_buf_read8(buf);
416
+ sign = bert_buf_read8(buf);
346
417
 
347
- return num;
418
+ return bert_buf_tobignum(buf, sign, (uint32_t)bin_digits);
348
419
  }
349
420
 
350
- VALUE read_float(unsigned char **pData) {
351
- if(read_1(pData) != ERL_FLOAT) {
352
- rb_raise(rb_eStandardError, "Invalid Type, not a float");
353
- }
421
+ VALUE bert_read_lbignum(struct bert_buf *buf)
422
+ {
423
+ uint32_t bin_digits;
424
+ uint8_t sign;
354
425
 
355
- unsigned char buf[32];
356
- read_string_raw(buf, pData, 31);
426
+ bert_buf_ensure(buf, 5);
357
427
 
358
- VALUE rString = rb_str_new2((char *) buf);
428
+ bin_digits = bert_buf_read32(buf);
429
+ sign = bert_buf_read8(buf);
359
430
 
360
- return rb_funcall(rString, rb_intern("to_f"), 0);
431
+ return bert_buf_tobignum(buf, sign, bin_digits);
361
432
  }
362
433
 
363
- VALUE read_nil(unsigned char **pData) {
364
- if(read_1(pData) != ERL_NIL) {
365
- rb_raise(rb_eStandardError, "Invalid Type, not a nil list");
366
- }
434
+ /*
435
+ * -------------------
436
+ * |1 | 31 |
437
+ * |99 | Float String|
438
+ * -------------------
439
+ *
440
+ * A float is stored in string format. the format used in sprintf
441
+ * to format the float is "%.20e" (there are more bytes allocated
442
+ * than necessary). To unpack the float use sscanf with format "%lf".
443
+ */
444
+ static VALUE bert_read_float(struct bert_buf *buf)
445
+ {
446
+ VALUE rb_float;
447
+
448
+ bert_buf_ensure(buf, 31);
449
+
450
+ rb_float = rb_str_new((char *)buf->data, 31);
451
+ buf->data += 31;
452
+
453
+ return rb_funcall(rb_float, rb_intern("to_f"), 0);
454
+ }
367
455
 
368
- return rb_ary_new2(0);
456
+ static VALUE bert_read_nil(struct bert_buf *buf)
457
+ {
458
+ return rb_ary_new2(0);
369
459
  }
370
460
 
371
- // read_any_raw
372
-
373
- VALUE read_any_raw(unsigned char **pData) {
374
- switch(peek_1(pData)) {
375
- case ERL_SMALL_INT:
376
- return read_small_int(pData);
377
- break;
378
- case ERL_INT:
379
- return read_int(pData);
380
- break;
381
- case ERL_FLOAT:
382
- return read_float(pData);
383
- break;
384
- case ERL_ATOM:
385
- return read_atom(pData);
386
- break;
387
- case ERL_SMALL_TUPLE:
388
- return read_small_tuple(pData);
389
- break;
390
- case ERL_LARGE_TUPLE:
391
- return read_large_tuple(pData);
392
- break;
393
- case ERL_NIL:
394
- return read_nil(pData);
395
- break;
396
- case ERL_STRING:
397
- return read_string(pData);
398
- break;
399
- case ERL_LIST:
400
- return read_list(pData);
401
- break;
402
- case ERL_BIN:
403
- return read_bin(pData);
404
- break;
405
- case ERL_SMALL_BIGNUM:
406
- return read_small_bignum(pData);
407
- break;
408
- case ERL_LARGE_BIGNUM:
409
- return read_large_bignum(pData);
410
- break;
411
- }
412
- return Qnil;
461
+ static VALUE bert_read_invalid(struct bert_buf *buf)
462
+ {
463
+ rb_raise(rb_eTypeError, "Invalid object tag in BERT stream");
464
+ return Qnil;
413
465
  }
414
466
 
415
- VALUE method_decode(VALUE klass, VALUE rString) {
416
- unsigned char *data = (unsigned char *) StringValuePtr(rString);
467
+ static VALUE rb_bert_decode(VALUE klass, VALUE rb_string)
468
+ {
469
+ struct bert_buf buf;
417
470
 
418
- unsigned char **pData = &data;
471
+ Check_Type(rb_string, T_STRING);
472
+ buf.data = (uint8_t *)RSTRING_PTR(rb_string);
473
+ buf.end = buf.data + RSTRING_LEN(rb_string);
419
474
 
420
- // check protocol version
421
- if(read_1(pData) != ERL_VERSION) {
422
- rb_raise(rb_eStandardError, "Bad Magic");
423
- }
475
+ bert_buf_ensure(&buf, 1);
424
476
 
425
- return read_any_raw(pData);
477
+ if (bert_buf_read8(&buf) != ERL_VERSION)
478
+ rb_raise(rb_eTypeError, "Invalid magic value for BERT string");
479
+
480
+ return bert_read(&buf);
426
481
  }
427
482
 
428
- VALUE method_impl(VALUE klass) {
429
- return rb_str_new("C", 1);
483
+ static VALUE rb_bert_impl(VALUE klass)
484
+ {
485
+ return rb_str_new("C", 1);
430
486
  }
431
487
 
432
- void Init_decode() {
433
- mBERT = rb_const_get(rb_cObject, rb_intern("BERT"));
434
- cDecode = rb_define_class_under(mBERT, "Decode", rb_cObject);
435
- cTuple = rb_const_get(mBERT, rb_intern("Tuple"));
436
- rb_define_singleton_method(cDecode, "decode", method_decode, 1);
437
- rb_define_singleton_method(cDecode, "impl", method_impl, 0);
488
+ void Init_decode()
489
+ {
490
+ rb_mBERT = rb_const_get(rb_cObject, rb_intern("BERT"));
491
+ rb_cTuple = rb_const_get(rb_mBERT, rb_intern("Tuple"));
492
+
493
+ rb_cDecode = rb_define_class_under(rb_mBERT, "Decode", rb_cObject);
494
+ rb_define_singleton_method(rb_cDecode, "decode", rb_bert_decode, 1);
495
+ rb_define_singleton_method(rb_cDecode, "impl", rb_bert_impl, 0);
438
496
  }
@@ -1,6 +1,9 @@
1
1
  # Loads mkmf which is used to make makefiles for Ruby extensions
2
2
  require 'mkmf'
3
3
 
4
+ # warnings save lives
5
+ $CFLAGS << " -Wall "
6
+
4
7
  # Give it a name
5
8
  extension_name = 'decode'
6
9
 
@@ -8,7 +8,9 @@ module BERT
8
8
  end
9
9
 
10
10
  def self.decode(string)
11
- new(StringIO.new(string)).read_any
11
+ io = StringIO.new(string)
12
+ io.set_encoding('binary') if io.respond_to?(:set_encoding)
13
+ new(io).read_any
12
14
  end
13
15
 
14
16
  def initialize(ins)
@@ -113,7 +115,7 @@ module BERT
113
115
  value = read_4
114
116
  negative = (value >> 31)[0] == 1
115
117
  value = (value - (1 << 32)) if negative
116
- value = Fixnum.induced_from(value)
118
+ value
117
119
  end
118
120
 
119
121
  def read_small_bignum
@@ -126,7 +128,7 @@ module BERT
126
128
  value = (byte * (256 ** index))
127
129
  sign != 0 ? (result - value) : (result + value)
128
130
  end
129
- Bignum.induced_from(added)
131
+ added
130
132
  end
131
133
 
132
134
  def read_large_bignum
@@ -139,7 +141,7 @@ module BERT
139
141
  value = (byte * (256 ** index))
140
142
  sign != 0 ? (result - value) : (result + value)
141
143
  end
142
- Bignum.induced_from(added)
144
+ added
143
145
  end
144
146
 
145
147
  def read_float
@@ -10,6 +10,7 @@ module BERT
10
10
 
11
11
  def self.encode(data)
12
12
  io = StringIO.new
13
+ io.set_encoding('binary') if io.respond_to?(:set_encoding)
13
14
  self.new(io).write_any(data)
14
15
  io.string
15
16
  end
@@ -56,7 +57,7 @@ module BERT
56
57
  fail(sym) unless sym.is_a?(Symbol)
57
58
  data = sym.to_s
58
59
  write_1 ATOM
59
- write_2 data.length
60
+ write_2 data.bytesize
60
61
  write_string data
61
62
  end
62
63
 
@@ -78,7 +79,7 @@ module BERT
78
79
  end
79
80
 
80
81
  def write_bignum(num)
81
- n = (num.to_s(2).size / 8.0).ceil
82
+ n = (num.abs.to_s(2).size / 8.0).ceil
82
83
  if n < 256
83
84
  write_1 SMALL_BIGNUM
84
85
  write_1 n
@@ -125,7 +126,7 @@ module BERT
125
126
 
126
127
  def write_binary(data)
127
128
  write_1 BIN
128
- write_4 data.length
129
+ write_4 data.bytesize
129
130
  write_string data
130
131
  end
131
132
 
@@ -44,7 +44,8 @@ class BertTest < Test::Unit::TestCase
44
44
  dd << {:a => 1, :b => 2}
45
45
  dd << Time.now
46
46
  dd << /^c(a)t$/i
47
-
47
+
48
+ dd << 178
48
49
  dd << 256**256 - 1
49
50
 
50
51
  dd << :true
@@ -1,3 +1,5 @@
1
+ # encoding: utf-8
2
+
1
3
  require 'test_helper'
2
4
 
3
5
  class EncoderTest < Test::Unit::TestCase
@@ -79,9 +81,22 @@ class EncoderTest < Test::Unit::TestCase
79
81
  assert cruby[1].instance_of?(BERT::Tuple)
80
82
  end
81
83
 
84
+ should 'handle utf8 strings' do
85
+ bert = [131, 109, 0, 0, 0, 5, 195, 169, 116, 195, 169].pack('C*')
86
+ assert_equal bert, BERT::Encoder.encode("été")
87
+ end
88
+
89
+ should 'handle utf8 symbols' do
90
+ bert = [131, 100, 0, 5, 195, 169, 116, 195, 169].pack('C*')
91
+ assert_equal bert, BERT::Encoder.encode(:'été')
92
+ end
93
+
82
94
  should "handle bignums" do
83
95
  bert = [131,110,8,0,0,0,232,137,4,35,199,138].pack('c*')
84
96
  assert_equal bert, BERT::Encoder.encode(10_000_000_000_000_000_000)
97
+
98
+ bert = [131,110,8,1,0,0,232,137,4,35,199,138].pack('c*')
99
+ assert_equal bert, BERT::Encoder.encode(-10_000_000_000_000_000_000)
85
100
  end
86
101
 
87
102
  should "leave other stuff alone" do
metadata CHANGED
@@ -1,7 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: bert
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.2
4
+ hash: 31
5
+ prerelease:
6
+ segments:
7
+ - 1
8
+ - 1
9
+ - 6
10
+ version: 1.1.6
5
11
  platform: ruby
6
12
  authors:
7
13
  - Tom Preston-Werner
@@ -14,14 +20,18 @@ default_executable:
14
20
  dependencies:
15
21
  - !ruby/object:Gem::Dependency
16
22
  name: thoughtbot-shoulda
17
- type: :development
18
- version_requirement:
19
- version_requirements: !ruby/object:Gem::Requirement
23
+ prerelease: false
24
+ requirement: &id001 !ruby/object:Gem::Requirement
25
+ none: false
20
26
  requirements:
21
27
  - - ">="
22
28
  - !ruby/object:Gem::Version
29
+ hash: 3
30
+ segments:
31
+ - 0
23
32
  version: "0"
24
- version:
33
+ type: :development
34
+ version_requirements: *id001
25
35
  description: BERT Serializiation for Ruby
26
36
  email: tom@mojombo.com
27
37
  executables: []
@@ -69,21 +79,27 @@ require_paths:
69
79
  - lib
70
80
  - ext
71
81
  required_ruby_version: !ruby/object:Gem::Requirement
82
+ none: false
72
83
  requirements:
73
84
  - - ">="
74
85
  - !ruby/object:Gem::Version
86
+ hash: 3
87
+ segments:
88
+ - 0
75
89
  version: "0"
76
- version:
77
90
  required_rubygems_version: !ruby/object:Gem::Requirement
91
+ none: false
78
92
  requirements:
79
93
  - - ">="
80
94
  - !ruby/object:Gem::Version
95
+ hash: 3
96
+ segments:
97
+ - 0
81
98
  version: "0"
82
- version:
83
99
  requirements: []
84
100
 
85
101
  rubyforge_project:
86
- rubygems_version: 1.3.5
102
+ rubygems_version: 1.6.2
87
103
  signing_key:
88
104
  specification_version: 3
89
105
  summary: BERT Serializiation for Ruby