sereal 0.0.9 → 0.0.10

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8247824a3c96a8a8a0ef7a190fed6d8517ba85dc
4
- data.tar.gz: 6e8edec6c3143ecd94fb22d6c96bb33664fd706b
3
+ metadata.gz: 0e7b9572c05e8fd8dbb6d80be1a9cd5d15ecabec
4
+ data.tar.gz: a7dc03f83063a2f39378ed99dedf96345440aaa7
5
5
  SHA512:
6
- metadata.gz: a09768f1a182fa25eaae541c8af2e8396910eaccd1e9e08d313d7b5b98fe6aa098cd95de0c4dd88061683135b91a9c273f32489f8ab941cac2b74f4e0e1763cf
7
- data.tar.gz: 611c0cb5a659adf051a0478bd9c621d2cc325d404db686e989217f6a9a755e2495e3697ff6b4f0999e6b38f9b8a5642f83842b6d456609647422ef48a53b15bc
6
+ metadata.gz: ac8b523cf687622f153b5778cb107dae447d5a68f9fb702cc71abb2240e4c62dc173665ac0a6cbd50a4955f03c2658ead475d1e7dd611d208d42c630c8ab4411
7
+ data.tar.gz: 560fea321249afbce734d49e633886fd33fa371b3e11cb4da9d9ae653f56262acc40c560d4044fb59bb14bc776a40ebcfa2db0b65cca0bd37342f80e67f4da0f
data/bin/rsrl CHANGED
@@ -1,12 +1,23 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'pp'
3
+
2
4
  begin
3
5
  require File.join(".",File.dirname(__FILE__),'..','lib','sereal')
4
6
  rescue LoadError
5
7
  require 'sereal'
6
8
  end
7
9
 
10
+ class SerealPerlObject
11
+ def inspect
12
+ "#{@class} - #{@value.inspect}"
13
+ end
14
+ end
15
+
8
16
  content = ENV['Sereal_STREAM'] ? STDIN : ARGF.read
9
17
  compress = Sereal::RAW
18
+ debug = ENV['Sereal_DEBUG'] ? Sereal::DEBUG : 0
19
+ debug |= ENV['Sereal_REF'] ? Sereal::REF : 0
20
+ debug |= ENV['Sereal_COPY'] ? Sereal::COPY : 0
10
21
 
11
22
  {'Sereal_SNAPPY' => Sereal::SNAPPY,'Sereal_SNAPPY_INCR' => Sereal::SNAPPY_INCR }.each do |k,v|
12
23
  if ENV[k]
@@ -15,10 +26,9 @@ compress = Sereal::RAW
15
26
  end
16
27
 
17
28
  if ENV['Sereal_STREAM'] || content[0..3] == '=srl'
18
- Sereal.decode(content) do |x|
19
- STDOUT.write(x)
20
- puts
29
+ Sereal.decode(content,debug | Sereal::THAW) do |x|
30
+ PP.pp(x)
21
31
  end
22
32
  else
23
- STDOUT.write(Sereal.encode(eval(content), compress))
33
+ STDOUT.write(Sereal.encode(eval(content), compress | debug))
24
34
  end
data/ext/sereal/buffer.h CHANGED
@@ -3,7 +3,7 @@
3
3
  static void s_dump(sereal_t *s);
4
4
 
5
5
  static inline void s_free_data_if_not_mine(sereal_t *s) {
6
- if (!(s->flags & FLAG_NOT_MINE)) {
6
+ if (!(s->flags & __NOT_MINE)) {
7
7
  if (s->data)
8
8
  free(s->data);
9
9
  s->data = NULL;
@@ -12,6 +12,7 @@ static inline void s_free_data_if_not_mine(sereal_t *s) {
12
12
 
13
13
  static inline void s_init_tracker(sereal_t *s) {
14
14
  if (s->tracked == Qnil) {
15
+ SD(s,"initializing tracker");
15
16
  s->tracked = rb_hash_new();
16
17
  rb_gc_mark(s->tracked);
17
18
  }
@@ -23,6 +24,13 @@ static inline void s_reset_tracker(sereal_t *s) {
23
24
  }
24
25
  }
25
26
 
27
+ static inline void s_init_copy(sereal_t *s) {
28
+ if (s->copy == Qnil) {
29
+ s->copy = rb_hash_new();
30
+ rb_gc_mark(s->copy);
31
+ }
32
+ }
33
+
26
34
  static inline void s_destroy(sereal_t *s) {
27
35
  if (!s)
28
36
  return;
@@ -46,6 +54,8 @@ static inline sereal_t * s_create(void) {
46
54
  sereal_t *s = s_alloc_or_raise(NULL,sizeof(*s));
47
55
  ZERO(s,sizeof(*s));
48
56
  s->tracked = Qnil;
57
+ s->copy = Qnil;
58
+
49
59
  return s;
50
60
  }
51
61
 
@@ -98,7 +108,7 @@ static inline int s_read_stream(sereal_t *s, u32 end) {
98
108
  static inline void *s_get_p_at_pos(sereal_t *s, u32 pos,u32 req) {
99
109
  // returning s->data[pos], so we just make size count from 0
100
110
  if (pos + req >= s->size) {
101
- if (s->flags & FLAG_STREAM) {
111
+ if (s->flags & __STREAM) {
102
112
  if (s_read_stream(s,pos + req + 1) < 0) {
103
113
  s_raise(s,rb_eRangeError,"stream request for %d bytes failed (err: %s)",
104
114
  req,strerror(errno));
@@ -169,6 +179,11 @@ static inline u32 s_shift_position_bang(sereal_t *s, u32 len) {
169
179
  return len;
170
180
  }
171
181
 
182
+ static inline void s_set_flag_at_pos(sereal_t *s, u32 pos, u8 flag) {
183
+ u8 *p = s_get_p_at_pos(s,pos,0);
184
+ *p |= flag;
185
+ }
186
+
172
187
  static void b_dump(u8 *p, u32 len, u32 pos) {
173
188
  int i;
174
189
 
data/ext/sereal/decode.c CHANGED
@@ -180,34 +180,112 @@ static VALUE s_read_extend(sereal_t *s, u8 tag) {
180
180
  }
181
181
 
182
182
  static VALUE s_read_ref(sereal_t *s, u8 tag) {
183
- u64 off = s_get_varint_bang(s);
184
183
  if (s->tracked == Qnil)
185
184
  s_raise(s,rb_eArgError,"there are no references stored");
186
- return rb_hash_aref(s->tracked,INT2FIX(off + s->hdr_end));
185
+ u64 off = s_get_varint_bang(s);
186
+ VALUE object = rb_hash_lookup(s->tracked,INT2FIX(off + s->hdr_end));
187
+ SD(s,"reading reference from offset: %d, id: %d",off + s->hdr_end,FIX2INT(rb_obj_id(object)));
188
+ return object;
187
189
  }
188
190
 
191
+ #define TRAVEL(s,__stored) \
192
+ do { \
193
+ u32 offset = s_get_varint_bang(s) - 1; \
194
+ __stored = s->pos; \
195
+ s->pos = offset + s->hdr_end; \
196
+ SD(s,"going back offset: %d, stored position: %d (tag: %d)",offset,stored_pos,tag); \
197
+ } while(0)
198
+ #define BACK(s,__stored) \
199
+ do { \
200
+ if (__stored > 0) { \
201
+ SD(s,"going forward to pos: %d",__stored); \
202
+ s->pos = __stored; \
203
+ } \
204
+ } while(0);
205
+
206
+
189
207
  static VALUE s_read_copy(sereal_t *s, u8 tag) {
190
- VALUE ref = s_red_ref(s,tag);
191
- return rb_obj_dup(ref);
208
+ u32 stored_pos = 0;
209
+ TRAVEL(s,stored_pos);
210
+ VALUE object = sereal_to_rb_object(s);
211
+ BACK(s,stored_pos);
212
+ return object;
213
+ }
214
+
215
+
216
+ #define MUST_BE_SOMETHING(__klass,__type) \
217
+ if (TYPE(__klass) != __type) \
218
+ s_raise(s,rb_eTypeError,"unexpected object type: %s (expecting: %s(%d) got: %s(%d))",rb_obj_classname(__klass),(__type == T_STRING ? "String" : (__type == T_ARRAY ? "Array" : "_unknown_")),__type, rb_obj_classname(__klass),TYPE(__klass));
219
+
220
+ static VALUE s_read_perl_object(sereal_t *s, u8 tag) {
221
+ u32 stored_pos = 0;
222
+ if (tag == SRL_HDR_OBJECTV)
223
+ TRAVEL(s,stored_pos);
224
+
225
+ VALUE s_klass = sereal_to_rb_object(s);
226
+ BACK(s,stored_pos);
227
+ MUST_BE_SOMETHING(s_klass,T_STRING);
228
+
229
+ SD(s,"fetched perl class named: %s",RSTRING_PTR(s_klass));
230
+
231
+ VALUE object = sereal_to_rb_object(s);
232
+
233
+ VALUE pobj = rb_class_new_instance(0,NULL,SerealPerlObject);
234
+ rb_ivar_set(pobj,ID_CLASS,s_klass);
235
+ rb_ivar_set(pobj,ID_VALUE,object);
236
+ return pobj;
192
237
  }
193
238
 
239
+ static VALUE s_read_object_freeze(sereal_t *s, u8 tag) {
240
+ if (!(s->flags & __THAW))
241
+ s_raise(s,rb_eTypeError,"object_freeze received, but decoder is initialized without Sereal::THAW option");
242
+
243
+ u32 stored_pos = 0;
244
+ if (tag == SRL_HDR_OBJECTV_FREEZE)
245
+ TRAVEL(s,stored_pos);
246
+
247
+ VALUE s_klass = sereal_to_rb_object(s);
248
+ BACK(s,stored_pos);
249
+ MUST_BE_SOMETHING(s_klass,T_STRING);
250
+
251
+ // hash it?
252
+ VALUE klass = rb_const_get(rb_cObject, rb_intern(RSTRING_PTR(s_klass)));
253
+ if (!rb_obj_respond_to(klass,THAW,0))
254
+ s_raise(s,rb_eTypeError,"class: %s does not respond to THAW",
255
+ rb_obj_classname(s_klass));
256
+
257
+ VALUE object = sereal_to_rb_object(s);
258
+ MUST_BE_SOMETHING(object,T_ARRAY);
259
+ rb_ary_unshift(object,ID2SYM(SEREAL));
260
+
261
+ return rb_funcall2(klass,THAW,RARRAY_LEN(object),RARRAY_PTR(object));
262
+ }
263
+ #undef TRAVEL
264
+ #undef BACK
265
+
194
266
  VALUE sereal_to_rb_object(sereal_t *s) {
195
267
  u8 t, tracked;
196
268
  S_RECURSE_INC(s);
197
269
  u32 pos;
198
- while (s->pos < s->size || (s->flags & FLAG_STREAM)) {
270
+ while (s->pos < s->size || (s->flags & __STREAM)) {
199
271
  t = s_get_u8_bang(s);
200
272
  tracked = (t & SRL_HDR_TRACK_FLAG ? 1 : 0);
201
273
  t &= ~SRL_HDR_TRACK_FLAG;
202
- pos = s->pos;
203
274
 
204
- S_RECURSE_DEC(s);
275
+ pos = s->pos;
205
276
 
206
277
  VALUE decoded = (*READERS[t])(s,t);
278
+
207
279
  if (tracked) {
208
280
  s_init_tracker(s);
209
- rb_hash_aset(s->tracked,INT2FIX(pos),decoded);
281
+ SD(s,"tracking object of class: %s(id: %d) at position: %d",rb_obj_classname(decoded),FIX2INT(rb_obj_id(decoded)),pos);
282
+ VALUE v_pos = INT2FIX(pos);
283
+ if (rb_hash_lookup(s->tracked,v_pos) == Qnil)
284
+ rb_hash_aset(s->tracked,INT2FIX(pos),decoded);
210
285
  }
286
+
287
+ SD(s,"object: %s: %s",rb_obj_classname(decoded),RSTRING_PTR(rb_funcall(decoded,rb_intern("to_s"),0)));
288
+ S_RECURSE_DEC(s);
211
289
  return decoded;
212
290
  }
213
291
  s_raise(s,rb_eArgError,"bad packet, or broken decoder");
@@ -218,9 +296,19 @@ VALUE method_sereal_decode(VALUE self, VALUE args) {
218
296
  u32 argc = RARRAY_LEN(args);
219
297
  if (argc < 1)
220
298
  rb_raise(rb_eArgError,"need at least 1 argument (object)");
221
- VALUE payload = rb_ary_shift(args);
299
+ VALUE payload = rb_ary_entry(args,0);
300
+
222
301
  u8 have_block = rb_block_given_p();
223
302
  sereal_t *s = s_create();
303
+ if (argc == 2) {
304
+ VALUE flags = rb_ary_entry(args,1);
305
+ if (flags != Qnil && flags != Qfalse) {
306
+ if (TYPE(flags) == T_FIXNUM)
307
+ s->flags = FIX2LONG(flags) & __ARGUMENT_FLAGS;
308
+ else
309
+ s_raise(s,rb_eArgError,"second argument must be an integer (used only for flags) %s given",rb_obj_classname(flags));
310
+ }
311
+ }
224
312
  u64 offset = 0;
225
313
 
226
314
  if (TYPE(payload) == T_FILE) {
@@ -229,8 +317,9 @@ VALUE method_sereal_decode(VALUE self, VALUE args) {
229
317
 
230
318
  rb_io_t *fptr;
231
319
  GetOpenFile(payload, fptr);
232
- s->flags |= FLAG_STREAM;
320
+ s->flags |= __STREAM;
233
321
  s->fd = fptr->fd;
322
+ SD(s,"reading strea with fd: %d",s->fd);
234
323
  } else if (TYPE(payload) != T_STRING) {
235
324
  rb_raise(rb_eTypeError,"can not decode objects of type %s",rb_obj_classname(payload));
236
325
  }
@@ -239,14 +328,13 @@ again:
239
328
  s->pos = 0;
240
329
  s_reset_tracker(s);
241
330
 
242
- if (s->flags & FLAG_STREAM) {
331
+ if (s->flags & __STREAM) {
243
332
  s->size = 0;
244
333
  s->rsize = 0;
245
334
  if (s_read_stream(s,__MIN_SIZE) < 0) {
246
335
  s_destroy(s);
247
336
  return Qnil;
248
337
  }
249
-
250
338
  } else {
251
339
  u32 size = RSTRING_LEN(payload) - offset;
252
340
  if (offset > RSTRING_LEN(payload) || (offset > 0 && size < __MIN_SIZE)) {
@@ -256,7 +344,7 @@ again:
256
344
  if (size < __MIN_SIZE)
257
345
  s_raise(s,rb_eTypeError,"size(%d) is less then min packet size %d, offset: %d",size,__MIN_SIZE,offset);
258
346
 
259
- s->flags |= FLAG_NOT_MINE;
347
+ s->flags |= __NOT_MINE;
260
348
  s->data = RSTRING_PTR(payload) + offset;
261
349
  s->size = size;
262
350
  }
@@ -269,12 +357,15 @@ again:
269
357
  u8 suffix = s_get_varint_bang(s);
270
358
  u8 is_compressed;
271
359
 
272
- if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY)
360
+ if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY) {
273
361
  is_compressed = __SNAPPY;
274
- else if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY_INCR)
362
+ } else if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY_INCR) {
275
363
  is_compressed = __SNAPPY_INCR;
276
- else
364
+ } else {
277
365
  is_compressed = __RAW;
366
+ }
367
+
368
+ SD(s,"initialized (s) with compression type: %d",is_compressed);
278
369
 
279
370
  if (is_compressed) {
280
371
  u32 uncompressed_len;
@@ -283,13 +374,13 @@ again:
283
374
  if (is_compressed == __SNAPPY_INCR) {
284
375
  compressed_len = s_get_varint_bang(s);
285
376
  } else {
286
- if (s->flags & FLAG_STREAM)
377
+ if (s->flags & __STREAM)
287
378
  s_raise(s,rb_eTypeError,"parsing non incremental compressed objects, from stream of data, is not supported");
288
379
 
289
380
  compressed_len = s->size - s->pos;
290
381
  }
291
-
292
- if (s->flags & FLAG_STREAM)
382
+ SD(s,"compressed len: %d",compressed_len);
383
+ if (s->flags & __STREAM)
293
384
  s_get_p_req_inclusive(s,compressed_len);
294
385
 
295
386
  int snappy_header_len = csnappy_get_uncompressed_length(s_get_p_req_inclusive(s,compressed_len),
@@ -312,9 +403,11 @@ again:
312
403
  s->size = uncompressed_len;
313
404
  offset += s->pos + compressed_len;
314
405
  s->pos = 0;
315
- s->flags &= ~FLAG_NOT_MINE;
406
+ s->flags &= ~__NOT_MINE;
316
407
  }
408
+
317
409
  s->hdr_end = s->pos;
410
+ SD(s,"header end at %d",s->hdr_end);
318
411
  VALUE result = sereal_to_rb_object(s);
319
412
  if (!is_compressed)
320
413
  offset += s->pos;
data/ext/sereal/decode.h CHANGED
@@ -23,6 +23,8 @@ static VALUE s_read_pad(sereal_t *s, u8 tag);
23
23
  static VALUE s_read_extend(sereal_t *s, u8 tag);
24
24
  static VALUE s_read_ref(sereal_t *s, u8 tag);
25
25
  static VALUE s_read_copy(sereal_t *s, u8 tag);
26
+ static VALUE s_read_object_freeze(sereal_t *s, u8 tag);
27
+ static VALUE s_read_perl_object(sereal_t *s, u8 tag);
26
28
 
27
29
  static VALUE (*READERS[256])(sereal_t *, u8) = {
28
30
  s_read_small_positive_int, // 0 SRL_HDR_POS_LOW
@@ -69,14 +71,14 @@ static VALUE (*READERS[256])(sereal_t *, u8) = {
69
71
  s_read_ref, // 41 SRL_HDR_REFP
70
72
  s_read_hash, // 42 SRL_HDR_HASH
71
73
  s_read_array, // 43 SRL_HDR_ARRAY
72
- s_default_reader, /* XXX */ // 44 SRL_HDR_OBJECT
73
- s_default_reader, /* XXX */ // 45 SRL_HDR_OBJECTV
74
+ s_read_perl_object, // 44 SRL_HDR_OBJECT
75
+ s_read_perl_object, // 45 SRL_HDR_OBJECTV
74
76
  s_read_ref, // 46 SRL_HDR_ALIAS
75
77
  s_read_copy, // 47 SRL_HDR_COPY
76
78
  s_default_reader, /* XXX */ // 48 SRL_HDR_WEAKEN
77
79
  s_read_regexp, // 49 SRL_HDR_REGEXP
78
- s_default_reader, /* XXX */ // 50 SRL_HDR_RESERVED_LOW
79
- s_default_reader, /* XXX */ // 51
80
+ s_read_object_freeze, // 50 OBJECT_FREEZE
81
+ s_read_object_freeze, // 51 OBJECTV_FREEZE
80
82
  s_default_reader, /* XXX */ // 52
81
83
  s_default_reader, /* XXX */ // 53
82
84
  s_default_reader, /* XXX */ // 54
data/ext/sereal/encode.c CHANGED
@@ -28,6 +28,12 @@
28
28
  #elif T_NIL > W_SIZE
29
29
  #define W_SIZE T_NIL
30
30
  #endif
31
+ #define COMPLEX(object) \
32
+ (TYPE(object) == T_ARRAY || \
33
+ TYPE(object) == T_HASH || \
34
+ TYPE(object) == T_SYMBOL || \
35
+ TYPE(object) == T_OBJECT || \
36
+ TYPE(object) == T_STRING)
31
37
 
32
38
  /* function pointer array */
33
39
  void (*WRITER[W_SIZE])(sereal_t *,VALUE);
@@ -146,15 +152,57 @@ static void s_append_hash(sereal_t *s, VALUE object) {
146
152
  convert symbols to strings
147
153
  */
148
154
  static void s_append_symbol(sereal_t *s, VALUE object) {
149
- VALUE string = rb_sym_to_s(object);
150
- s_append_rb_string(s,string);
155
+ VALUE string = rb_sym_to_s(object);
156
+ s_append_rb_string(s,string);
151
157
  }
152
158
 
159
+ static void s_append_copy(sereal_t *s, VALUE object) {
160
+ u32 pos = FIX2LONG(object);
161
+ s_append_hdr_with_varint(s,SRL_HDR_COPY,pos - s->hdr_end + 1);
162
+ }
163
+
164
+ static VALUE s_copy_or_keep_in_mind(sereal_t *s, VALUE object) {
165
+ if (s->copy == Qnil)
166
+ return Qnil;
167
+
168
+ VALUE stored_position = rb_hash_lookup(s->copy,object);
169
+ if (stored_position == Qnil)
170
+ rb_hash_aset(s->copy,object,INT2FIX(s->pos));
171
+ return stored_position;
172
+ }
173
+
174
+
153
175
  /*
154
- call object.to_srl and serialize the result
176
+ try to FREEZE the object so it can be THAW-ed at decode time
177
+ if not possible (no Sereal::THAW argument or object does not
178
+ repsond to FREEZE), just call object.to_srl and serialize the
179
+ result
155
180
  */
156
181
  static void s_append_object(sereal_t *s, VALUE object) {
157
- rb_object_to_sereal(s,rb_funcall(object,rb_intern("to_srl"),0));
182
+ if (s->flags & __THAW && rb_obj_respond_to(object,FREEZE,0)) {
183
+ VALUE klass = rb_class_name(CLASS_OF(object));
184
+ VALUE copy = s_copy_or_keep_in_mind(s,klass);
185
+ if (copy != Qnil) {
186
+ s_append_u8(s,SRL_HDR_OBJECTV_FREEZE);
187
+ s_append_copy(s,copy);
188
+ } else {
189
+ s_append_u8(s,SRL_HDR_OBJECT_FREEZE);
190
+ s_append_rb_string(s,rb_class_name(CLASS_OF(object)));
191
+ }
192
+ VALUE frozen = rb_funcall(object,FREEZE,1,ID2SYM(SEREAL));
193
+ if (TYPE(frozen) != T_ARRAY)
194
+ s_raise(s,rb_eTypeError,"Sereal spec requires FREEZE to return array instead %s",rb_obj_classname(frozen));
195
+
196
+ // REFN + ARRAY
197
+ s_append_u8(s,SRL_HDR_REFN);
198
+ s_append_hdr_with_varint(s,SRL_HDR_ARRAY,RARRAY_LEN(frozen));
199
+ int i;
200
+ for (i = 0; i < RARRAY_LEN(frozen); i++)
201
+ rb_object_to_sereal(s,rb_ary_entry(frozen,i));
202
+
203
+ } else {
204
+ rb_object_to_sereal(s,rb_funcall(object,TO_SRL,0));
205
+ }
158
206
  }
159
207
 
160
208
 
@@ -219,31 +267,31 @@ static void s_append_nil(sereal_t *s, VALUE object) {
219
267
  static void s_append_refp(sereal_t *s, VALUE object) {
220
268
  u32 pos = FIX2LONG(object);
221
269
  s_append_hdr_with_varint(s,SRL_HDR_REFP,pos - s->hdr_end + 1);
222
- u8 *reference = s_get_p_at_pos(s,pos,0);
223
- *reference |= SRL_HDR_TRACK_FLAG;
270
+ s_set_flag_at_pos(s,pos,SRL_HDR_TRACK_FLAG);
224
271
  }
225
272
 
226
273
  /* writer function pointers */
227
274
  static void rb_object_to_sereal(sereal_t *s, VALUE object) {
228
275
  S_RECURSE_INC(s);
229
276
  u32 pos = s->pos;
230
-
231
- if (s->tracked != Qnil &&
232
- TYPE(object) == T_ARRAY ||
233
- TYPE(object) == T_HASH ||
234
- TYPE(object) == T_SYMBOL ||
235
- TYPE(object) == T_STRING) {
236
-
277
+ if (COMPLEX(object)) {
278
+ VALUE stored;
237
279
  if (s->tracked != Qnil) {
238
- VALUE id = rb_obj_id(object);
239
- VALUE stored_position = rb_hash_aref(s->tracked,id);
240
- if (stored_position != Qnil) {
241
- s_append_refp(s,stored_position);
242
- goto out;
243
- } else {
280
+ if (s->tracked != Qnil) {
281
+ VALUE id = rb_obj_id(object);
282
+ stored = rb_hash_lookup(s->tracked,id);
283
+ if (stored != Qnil) {
284
+ s_append_refp(s,stored);
285
+ goto out;
286
+ }
244
287
  rb_hash_aset(s->tracked,id,INT2FIX(pos));
245
288
  }
246
289
  }
290
+ stored = s_copy_or_keep_in_mind(s,object);
291
+ if (stored != Qnil) {
292
+ s_append_copy(s,stored);
293
+ goto out;
294
+ }
247
295
  }
248
296
 
249
297
  (*WRITER[TYPE(object)])(s,object);
@@ -267,16 +315,21 @@ void fixup_varint_from_to(u8 *varint_start, u8 *varint_end, u32 number) {
267
315
  }
268
316
  }
269
317
 
270
- VALUE method_sereal_encode(VALUE self, VALUE args) {
318
+
319
+ /*
320
+ * Encodes object into Sereal
321
+ */
322
+ VALUE
323
+ method_sereal_encode(VALUE self, VALUE args) {
271
324
  u32 argc = RARRAY_LEN(args);
272
325
  if (argc < 1)
273
326
  rb_raise(rb_eArgError,"need at least 1 argument (object)");
274
327
 
275
328
  sereal_t *s = s_create();
276
- VALUE payload = rb_ary_shift(args);
329
+ VALUE payload = rb_ary_entry(args,0);
277
330
  VALUE compress = Qfalse;
278
331
  if (argc == 2)
279
- compress = rb_ary_shift(args);
332
+ compress = rb_ary_entry(args,1);
280
333
 
281
334
  u8 do_compress;
282
335
  u8 version = SRL_PROTOCOL_VERSION;
@@ -286,10 +339,15 @@ VALUE method_sereal_encode(VALUE self, VALUE args) {
286
339
  } else {
287
340
  do_compress = (compress == Qtrue ? 1 : 0);
288
341
  }
289
- if (do_compress & __REF) {
290
- do_compress &= ~__REF;
342
+
343
+ s->flags = do_compress & __ARGUMENT_FLAGS;
344
+ do_compress &=~ __ARGUMENT_FLAGS;
345
+ if (s->flags & __REF)
291
346
  s_init_tracker(s);
292
- }
347
+
348
+ if (s->flags & __COPY)
349
+ s_init_copy(s);
350
+
293
351
  switch(do_compress) {
294
352
  case __SNAPPY:
295
353
  version |= SRL_PROTOCOL_ENCODING_SNAPPY;
data/ext/sereal/proto.h CHANGED
@@ -33,16 +33,19 @@
33
33
  #define SRL_HDR_ARRAY ((char)43) /* <COUNT-VARINT> [<ITEM-TAG> ...] - count followed by items */
34
34
  #define SRL_HDR_OBJECT ((char)44) /* <STR-TAG> <ITEM-TAG> - class, object-item */
35
35
  #define SRL_HDR_OBJECTV ((char)45) /* <OFFSET-VARINT> <ITEM-TAG> - offset of previously used classname tag - object-item */
36
+
36
37
  #define SRL_HDR_ALIAS ((char)46) /* <OFFSET-VARINT> - alias to item defined at offset */
37
38
  #define SRL_HDR_COPY ((char)47) /* <OFFSET-VARINT> - copy of item defined at offset */
38
39
 
39
40
  #define SRL_HDR_WEAKEN ((char)48) /* <REF-TAG> - Weaken the following reference */
40
41
  #define SRL_HDR_REGEXP ((char)49) /* <PATTERN-STR-TAG> <MODIFIERS-STR-TAG>*/
42
+ #define SRL_HDR_OBJECT_FREEZE ((char)50) /* <STR-TAG> <ITEM-TAG> - class, object-item. Need to call "THAW" method on class after decoding */
43
+ #define SRL_HDR_OBJECTV_FREEZE ((char)51) /* <OFFSET-VARINT> <ITEM-TAG> - (OBJECTV_FREEZE is to OBJECT_FREEZE as OBJECTV is to OBJECT) */
41
44
 
42
45
  /* Note: Can do reserved check with a range now, but as we start using
43
46
  * them, might have to explicit == check later. */
44
- #define SRL_HDR_RESERVED ((char)50) /* reserved */
45
- #define SRL_HDR_RESERVED_LOW ((char)50)
47
+ #define SRL_HDR_RESERVED ((char)52) /* reserved */
48
+ #define SRL_HDR_RESERVED_LOW ((char)52)
46
49
  #define SRL_HDR_RESERVED_HIGH ((char)57)
47
50
 
48
51
  #define SRL_HDR_FALSE ((char)58) /* false (PL_sv_no) */
data/ext/sereal/sereal.c CHANGED
@@ -2,36 +2,44 @@
2
2
  #include "encode.h"
3
3
 
4
4
  VALUE Sereal = Qnil;
5
+ VALUE SerealPerlObject = Qnil;
6
+ ID FREEZE;
7
+ ID THAW;
8
+ ID TO_SRL;
9
+ ID SEREAL;
10
+ ID ID_CLASS;
11
+ ID ID_VALUE;
5
12
  void Init_sereal();
6
13
 
7
14
  /*
8
15
  * Encode/Decode object using Sereal binary protocol:
9
16
  * https://github.com/Sereal/Sereal/blob/master/sereal_spec.pod
10
17
  *
11
- * Sereal.encode(object) -> serialized blob
12
- * Sereal.encode(object,Sereal::SNAPPY_INCR) -> snappy compressed blob
13
- * Sereal.encode(object,Sereal::SNAPPY) -> snappy compressed blob
18
+ * ==install:
14
19
  *
15
- * SNAPPY_INCR encoded objects can be appended into one output and then the
16
- * decoder will know what to do.
20
+ * $ gem install sereal
17
21
  *
18
- * Sereal.encode(object,Sereal::REF)
19
- * or Sereal::REF|Sereal::SNAPPY_INC, or Sereal::REF|Sereal::SNAPPY
22
+ * or you can build it from github which requires:
23
+ * 1. rake compiler - <code>gem install rake-compiler</code> (https://github.com/luislavena/rake-compiler)
24
+ * 2. ruby 1.9+ or rubinius supporting 1.9+
20
25
  *
21
- * when encoding will try to use Sereal's REFP tag to transmit only the
22
- * the original object's offset in the packet.
23
- * So:
24
- * one = [ 1,2,3,4,5 ]
25
- * two = [ one, one ]
26
- * Sereal.encode(two,Sereal::REF)
27
- * will send 'one' only once, and one REFP that points to the first one
28
- * it uses one.object_id as a hash key in a local tracker hash
29
- * and if it sees this object_id again it just sends the offset.
26
+ * $ git clone https://github.com/Sereal/Sereal
27
+ * $ cd Sereal/ruby
28
+ * $ gem build sereal.gemspec
29
+ * $ gem install sereal-0.0.?.gem
30
30
  *
31
+ * ==serialize:
32
+ * require 'sereal'
33
+ * Sereal.encode(object)
31
34
  *
32
- * Sereal.decode(blob) - returns the decoded object
33
- *
34
- * If the blob contains multiple compressed
35
+ * ===serializing objects
36
+ * if Sereal::THAW option is given the encoder will try to call FREEZE() instance method on
37
+ * the object beeing serialized, and it will serialize the class name + the output of FREEZE (look the THAW constant for more information)
38
+ * if the object does not respond to FREEZE it will call <code>to_srl</code> and serialize the result of that
39
+ * ==deserialize:
40
+ * require 'sereal'
41
+ * Sereal.decode(blob)
42
+ * If the blob contains multiple compressed objects
35
43
  * sub-blobs you should call it with:
36
44
  *
37
45
  * Sereal.decode(blob) do |decoded|
@@ -39,6 +47,7 @@ void Init_sereal();
39
47
  * end
40
48
  *
41
49
  * otherwise only the first decoded object will be returned
50
+ * ===stream decoding
42
51
  * there is also streaming support which takes any kind of IO object
43
52
  * like socket, or just regular File, and it is really easy to use:
44
53
  *
@@ -46,7 +55,7 @@ void Init_sereal();
46
55
  * # do something with the decoded object
47
56
  * end
48
57
  *
49
- * it works both with `incremental snappy` and with just combined sereal packets.
58
+ * it works both with SNAPPY_INCR and with just combined sereal packets.
50
59
  * another example but with TCPSocket:
51
60
  *
52
61
  * s = TCPSocket.new 'localhost', 2000
@@ -54,15 +63,216 @@ void Init_sereal();
54
63
  * # do something with the decoded object
55
64
  * end
56
65
  *
66
+ * ===multiple packets in one buffer
67
+ * it also supports decoding of multiple packets in one buffer:
68
+ *
69
+ * buf = ""
70
+ * buf << Sereal.encode([1,2,3],Sereal::SNAPPY_INCR)
71
+ * buf << Sereal.encode([3,4,5])
72
+ * buf << Sereal.encode([7,8,9],Sereal::SNAPPY_INCR)
73
+ * Sereal.decode(buf) do |decoded|
74
+ * p decoded
75
+ * end
76
+ * ==Sereal.encode() and Sereal.decode() accept:
77
+ * 1. compression types: RAW, SNAPPY_INCR, and SNAPPY
78
+ * 2. flags: REF, COPY, THAW and DEBUG
79
+ *
80
+ * flags and compression types can be used in combinations like:
81
+ *
82
+ * Sereal.encode([1,2,3],Sereal::REF|Sereal::COPY|Sereal::THAW|Sereal::SNAPPY_INCR)
83
+ *
84
+ * but you can not use 2 types of compression in the same time
85
+ *
86
+ * ==LZ4
87
+ * For brief period (version 0.0.5 to 0.0.6) there was a support for LZ4 and LZ4HC, which was pushed to the master branch by mistake. if you are depending on it please convert yout data using <code>bin/rsrl</code> or just use <code>0.0.5</code> version of the sereal gem.
88
+ *
89
+ * gem 'sereal', '= 0.0.5'
90
+ * #or
91
+ * $ gem install sereal -v 0.0.5
92
+ *
57
93
  */
58
94
  void Init_sereal() {
59
- Sereal = rb_define_class("Sereal", rb_cObject);
60
- rb_define_singleton_method(Sereal, "encode", method_sereal_encode, -2);
61
- rb_define_singleton_method(Sereal, "decode", method_sereal_decode, -2);
62
- rb_define_const(Sereal, "SNAPPY",INT2NUM(__SNAPPY));
63
- rb_define_const(Sereal, "SNAPPY_INCR",INT2NUM(__SNAPPY_INCR));
64
- rb_define_const(Sereal, "RAW",INT2NUM(__RAW));
65
- rb_define_const(Sereal, "REF",INT2NUM(__REF));
66
- s_init_writers();
95
+ TO_SRL = rb_intern("to_srl");
96
+ THAW = rb_intern("THAW");
97
+ FREEZE = rb_intern("FREEZE");
98
+ SEREAL = rb_intern("Sereal");
99
+ ID_CLASS = rb_intern("@class");
100
+ ID_VALUE = rb_intern("@value");
101
+
102
+ SerealPerlObject = rb_define_class("SerealPerlObject", rb_cObject);
103
+
104
+ Sereal = rb_define_class("Sereal", rb_cObject);
105
+ rb_define_singleton_method(Sereal, "encode", method_sereal_encode, -2);
106
+
107
+ rb_define_singleton_method(Sereal, "decode", method_sereal_decode, -2);
108
+
109
+ /*
110
+ * instructs the encoder to use Snappy compression
111
+ * nb: this is Sereal protocol version 1 only
112
+ * do not use it if possible.
113
+ *
114
+ * Sereal.encode(object,Sereal::SNAPPY)
115
+ */
116
+ rb_define_const(Sereal, "SNAPPY",INT2NUM(__SNAPPY));
117
+
118
+
119
+ /*
120
+ * instructs the encoder to use Snappy compression
121
+ * but with support for incremental packet (meaning
122
+ * you can combine packets into one big blob of data
123
+ * and the encoder will be confident that there is no
124
+ * corruption, because the SNAPPY_INCR packet contains the
125
+ * uncompressed length)
126
+ *
127
+ * Sereal.encode(object,Sereal::SNAPPY_INCR)
128
+ */
129
+ rb_define_const(Sereal, "SNAPPY_INCR",INT2NUM(__SNAPPY_INCR));
130
+
131
+ /*
132
+ * intructs the encoder to use no compression (default)
133
+ *
134
+ * Sereal.encode(object,Sereal::RAW)
135
+ */
136
+ rb_define_const(Sereal, "RAW",INT2NUM(__RAW));
137
+
138
+ /*
139
+ * (can also be used with any compression type/RAW)
140
+ * instructs the encoder to keep track of object_ids
141
+ * and when it sees that object with the same id has
142
+ * already been encoded, it just creates REFP reference
143
+ * with offset to the first item
144
+ * so:
145
+ * name = "john doe"
146
+ * object = [ name, name ]
147
+ * Sereal.encode(name,Sereal::REF|Sereal::SNAPPY_INCR)
148
+ *
149
+ * will actually create something that looks like:
150
+ *
151
+ * 000006/000001: 42 066 ARRAYREF(2)
152
+ * 000007/000002: 68* 232 SHORT_BINARY(8): 'john doe'
153
+ * 000016/000011: 29 041 REFP(2)
154
+ *
155
+ * instead of:
156
+ *
157
+ * 000006/000001: 42 066 ARRAYREF(2)
158
+ * 000007/000002: 68 104 SHORT_BINARY(8): 'john doe'
159
+ * 000016/000011: 68 104 SHORT_BINARY(8): 'john doe'
160
+ *
161
+ * as you can see Sereal saved us 7 bytes
162
+ *
163
+ * this can hurts performance because the encoder must get the
164
+ * object_id of every encoded object, and put it in a hash
165
+ * with the current position, so it can be looked up later
166
+ */
167
+ rb_define_const(Sereal, "REF",INT2NUM(__REF));
168
+
169
+
170
+ /*
171
+ * very similar to Sereal::REF, but it instructs the decoder
172
+ * to create new item, by going back in time as if the OFFSET
173
+ * of the COPY tag was beeing read right now
174
+ * it puts every object as a key in a hash (instead of its
175
+ * object_id)
176
+ *
177
+ * object = [ { name => "john"} , { name => "john"} ]
178
+ * Sereal.encode(name,Sereal::COPY|Sereal::SNAPPY_INCR)
179
+ *
180
+ * will produce:
181
+ *
182
+ * 000006/000001: 42 066 ARRAYREF(2)
183
+ * 000007/000002: 51 081 HASHREF(2)
184
+ * KEY:
185
+ * 000008/000003: 64 100 SHORT_BINARY(4): 'name'
186
+ * VALUE:
187
+ * 000013/000008: 64 100 SHORT_BINARY(4): 'john'
188
+ * 000018/000013: 2f 047 COPY(2)
189
+ *
190
+ * as you can see the hash is sent only once
191
+ *
192
+ * COPY can be used with REF like:
193
+ *
194
+ * object = "bazinga"
195
+ *
196
+ * Sereal.encode([object,"bazinga",object],Sereal::REF|Sereal::COPY)
197
+ *
198
+ * will produce:
199
+ * 000006/000001: 43 067 ARRAYREF(3)
200
+ * 000007/000002: 67* 231 SHORT_BINARY(7): 'bazinga'
201
+ * 000015/000010: 2f 047 COPY(2)
202
+ * 000017/000012: 29 041 REFP(2)
203
+ *
204
+ * using Sereal::COPY also hurts performance because every encoding step
205
+ * the encoder must look into a hash if the object exists in it
206
+ * so it can create a COPY tag instead of encoding the object again
207
+ */
208
+ rb_define_const(Sereal, "COPY",INT2NUM(__COPY));
209
+
210
+ /*
211
+ * add support for FREEZE/THAW
212
+ * it calls <code>object.FREEZE(:Sereal)</code> and it serializes the result of that
213
+ * with the object's class name, when deserializing it calls
214
+ * <code>class.THAW(:Sereal,the output of FREEZE)</code>
215
+ *
216
+ * class StorableFile
217
+ * attr_accessor :path
218
+ * def initialize(path,pos)
219
+ * @path = path
220
+ * @pos = pos
221
+ * end
222
+ * def read
223
+ * @pos += 1
224
+ * end
225
+ * def FREEZE(serializer)
226
+ * [@path,@pos]
227
+ * end
228
+ * def self.THAW(serializer,path,pos)
229
+ * self.new(path,pos)
230
+ * end
231
+ * end
232
+ *
233
+ * obj = StorableFile.new("/tmp/sereal.txt",0)
234
+ * obj.read # read some data
235
+ * encoded = Sereal.encode(obj,Sereal::THAW)
236
+ * # this actually encodes something like:
237
+ * # 'StorableFile' -> [ "/tmp/sereal.txt", 1 ]
238
+ * restored = Sereal.decode(encoded,Sereal::THAW)
239
+ * # this will call Sereal.THAW(:Sereal,"/tmp/sereal.txt",1)
240
+ *
241
+ * as you can see the array returned from FREEZE is exploded into
242
+ * arguments for THAW, FREEZE *MUST* return array, otherwise the
243
+ * encoder throws TypeException.
244
+ * *BOTH* encoder *AND* decoder must be started with Sereal::THAW option
245
+ * in order for it to work properly
246
+ */
247
+ rb_define_const(Sereal, "THAW",INT2NUM(__THAW));
248
+
249
+ /*
250
+ * the argument given to FREEZE() and THAW() as serializer
251
+ * which at the moment is:
252
+ *
253
+ * :Sereal
254
+ */
255
+ rb_define_const(Sereal, "FREEZER",ID2SYM(SEREAL));
256
+
257
+ /*
258
+ * enable debug output
259
+ *
260
+ * name = "john"
261
+ * Sereal.decode([name,name],Sereal::REF|Sereal::DEBUG)
262
+ *
263
+ * procudes:
264
+ *
265
+ * initialized (s) with compression type: 0 { p: 6, s: 14, l: 0, h: 0 } method_sereal_decode()
266
+ * header end at 6 { p: 6, s: 14, l: 0, h: 6 } method_sereal_decode()
267
+ * tracking object of class: String(id: 7678920) at position: 8 { p: 12, s: 14, l: 2, h: 6 } sereal_to_rb_object()
268
+ * object: String: john { p: 12, s: 14, l: 2, h: 6 } sereal_to_rb_object()
269
+ * reading reference from offset: 8, id: 7678920 { p: 14, s: 14, l: 2, h: 6 } s_read_ref()
270
+ * object: String: john { p: 14, s: 14, l: 2, h: 6 } sereal_to_rb_object()
271
+ * object: Array: ["john", "john"] { p: 14, s: 14, l: 1, h: 6 } sereal_to_rb_object()
272
+ *
273
+ */
274
+ rb_define_const(Sereal, "DEBUG",INT2NUM(__DEBUG));
275
+
276
+ s_init_writers();
67
277
  }
68
278
 
data/ext/sereal/sereal.h CHANGED
@@ -12,7 +12,6 @@ typedef unsigned int u32;
12
12
  typedef unsigned short u16;
13
13
  typedef unsigned char u8;
14
14
  typedef struct _sereal sereal_t;
15
- typedef struct _track_entry track_t;
16
15
 
17
16
  #define TRUE 1
18
17
  #define FALSE 0
@@ -41,19 +40,27 @@ typedef struct _track_entry track_t;
41
40
  #define EXTENDED RE_OPTION_EXTENDED
42
41
  #endif
43
42
 
44
- #define FORMAT(fmt,arg...) fmt " [%s():%s:%d @ %u]\n",##arg,__func__,__FILE__,__LINE__,(unsigned int) time(NULL)
43
+ #define FORMAT(fmt,arg...) fmt " %s()\n",##arg,__func__
45
44
  #define E(fmt,arg...) fprintf(stderr,FORMAT(fmt,##arg))
46
45
  #define D(fmt,arg...) printf(FORMAT(fmt,##arg))
46
+ #define SD(s,fmt,arg...) \
47
+ do { \
48
+ if (s->flags & __DEBUG) { \
49
+ int i; \
50
+ for (i = 0; i < s->level; i++) { \
51
+ printf(" "); \
52
+ } \
53
+ D(fmt " { p: %d, s: %d, l: %u, h: %u } ",##arg,s->pos,s->size,s->level,s->hdr_end); \
54
+ } \
55
+ } while(0);
47
56
 
48
57
  #define s_raise(what,ex,arg...) \
49
58
  do { \
59
+ SD(s,"s_raise"); \
50
60
  s_destroy(what); \
51
61
  rb_raise(ex,##arg); \
52
62
  } while(0);
53
63
 
54
- #define FLAG_NOT_MINE 1
55
- #define FLAG_STREAM 2
56
- #define FLAG_REF 4
57
64
  struct _sereal {
58
65
  u8 *data;
59
66
  u32 size;
@@ -61,7 +68,9 @@ struct _sereal {
61
68
  u32 rsize;
62
69
  u32 level;
63
70
  u8 flags;
71
+ u8 expect;
64
72
  VALUE tracked;
73
+ VALUE copy;
65
74
  u32 hdr_end;
66
75
  int fd;
67
76
  struct buffer {
@@ -73,13 +82,20 @@ struct _sereal {
73
82
 
74
83
  VALUE method_sereal_encode(VALUE self, VALUE args);
75
84
  VALUE method_sereal_decode(VALUE self, VALUE payload);
85
+ extern ID FREEZE;
86
+ extern ID THAW;
87
+ extern ID TO_SRL;
88
+ extern ID SEREAL;
89
+ extern ID ID_CLASS;
90
+ extern ID ID_VALUE;
91
+ extern VALUE SerealPerlObject;
76
92
 
77
- #define S_RECURSE_INC(s) \
78
- do { \
79
- if((s)->level++ > MAX_RECURSION_DEPTH) \
80
- s_raise((s),rb_eArgError, \
81
- "max recursion depth reached: %d (level: %d)", \
82
- MAX_RECURSION_DEPTH, s->level); \
93
+ #define S_RECURSE_INC(s) \
94
+ do { \
95
+ if((s)->level++ > MAX_RECURSION_DEPTH) \
96
+ s_raise((s),rb_eArgError, \
97
+ "max recursion depth reached: %d (level: %d)", \
98
+ MAX_RECURSION_DEPTH, s->level); \
83
99
  } while(0);
84
100
 
85
101
  #define S_RECURSE_DEC(s) ((s)->level--)
@@ -94,5 +110,12 @@ VALUE method_sereal_decode(VALUE self, VALUE payload);
94
110
  #define __SNAPPY 1
95
111
  #define __SNAPPY_INCR 2
96
112
  #define __REF 4
113
+ #define __DEBUG 8
114
+ #define __NOT_MINE 16
115
+ #define __STREAM 32
116
+ #define __THAW 64
117
+ #define __COPY 64
118
+ #define __ARGUMENT_FLAGS (__DEBUG|__THAW|__REF|__COPY)
119
+
97
120
  #define __MIN_SIZE 6
98
121
  #endif
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sereal
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Borislav Nikolov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-03 00:00:00.000000000 Z
11
+ date: 2014-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -30,7 +30,8 @@ executables:
30
30
  - rsrl
31
31
  extensions:
32
32
  - ext/sereal/extconf.rb
33
- extra_rdoc_files: []
33
+ extra_rdoc_files:
34
+ - ext/sereal/sereal.c
34
35
  files:
35
36
  - ext/sereal/decode.c
36
37
  - ext/sereal/encode.c
@@ -52,7 +53,9 @@ homepage: https://github.com/Sereal/Sereal
52
53
  licenses: []
53
54
  metadata: {}
54
55
  post_install_message:
55
- rdoc_options: []
56
+ rdoc_options:
57
+ - --exclude
58
+ - .*\.so
56
59
  require_paths:
57
60
  - lib
58
61
  required_ruby_version: !ruby/object:Gem::Requirement