sereal 0.0.9 → 0.0.10

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 8247824a3c96a8a8a0ef7a190fed6d8517ba85dc
4
- data.tar.gz: 6e8edec6c3143ecd94fb22d6c96bb33664fd706b
3
+ metadata.gz: 0e7b9572c05e8fd8dbb6d80be1a9cd5d15ecabec
4
+ data.tar.gz: a7dc03f83063a2f39378ed99dedf96345440aaa7
5
5
  SHA512:
6
- metadata.gz: a09768f1a182fa25eaae541c8af2e8396910eaccd1e9e08d313d7b5b98fe6aa098cd95de0c4dd88061683135b91a9c273f32489f8ab941cac2b74f4e0e1763cf
7
- data.tar.gz: 611c0cb5a659adf051a0478bd9c621d2cc325d404db686e989217f6a9a755e2495e3697ff6b4f0999e6b38f9b8a5642f83842b6d456609647422ef48a53b15bc
6
+ metadata.gz: ac8b523cf687622f153b5778cb107dae447d5a68f9fb702cc71abb2240e4c62dc173665ac0a6cbd50a4955f03c2658ead475d1e7dd611d208d42c630c8ab4411
7
+ data.tar.gz: 560fea321249afbce734d49e633886fd33fa371b3e11cb4da9d9ae653f56262acc40c560d4044fb59bb14bc776a40ebcfa2db0b65cca0bd37342f80e67f4da0f
data/bin/rsrl CHANGED
@@ -1,12 +1,23 @@
1
1
  #!/usr/bin/env ruby
2
+ require 'pp'
3
+
2
4
  begin
3
5
  require File.join(".",File.dirname(__FILE__),'..','lib','sereal')
4
6
  rescue LoadError
5
7
  require 'sereal'
6
8
  end
7
9
 
10
+ class SerealPerlObject
11
+ def inspect
12
+ "#{@class} - #{@value.inspect}"
13
+ end
14
+ end
15
+
8
16
  content = ENV['Sereal_STREAM'] ? STDIN : ARGF.read
9
17
  compress = Sereal::RAW
18
+ debug = ENV['Sereal_DEBUG'] ? Sereal::DEBUG : 0
19
+ debug |= ENV['Sereal_REF'] ? Sereal::REF : 0
20
+ debug |= ENV['Sereal_COPY'] ? Sereal::COPY : 0
10
21
 
11
22
  {'Sereal_SNAPPY' => Sereal::SNAPPY,'Sereal_SNAPPY_INCR' => Sereal::SNAPPY_INCR }.each do |k,v|
12
23
  if ENV[k]
@@ -15,10 +26,9 @@ compress = Sereal::RAW
15
26
  end
16
27
 
17
28
  if ENV['Sereal_STREAM'] || content[0..3] == '=srl'
18
- Sereal.decode(content) do |x|
19
- STDOUT.write(x)
20
- puts
29
+ Sereal.decode(content,debug | Sereal::THAW) do |x|
30
+ PP.pp(x)
21
31
  end
22
32
  else
23
- STDOUT.write(Sereal.encode(eval(content), compress))
33
+ STDOUT.write(Sereal.encode(eval(content), compress | debug))
24
34
  end
data/ext/sereal/buffer.h CHANGED
@@ -3,7 +3,7 @@
3
3
  static void s_dump(sereal_t *s);
4
4
 
5
5
  static inline void s_free_data_if_not_mine(sereal_t *s) {
6
- if (!(s->flags & FLAG_NOT_MINE)) {
6
+ if (!(s->flags & __NOT_MINE)) {
7
7
  if (s->data)
8
8
  free(s->data);
9
9
  s->data = NULL;
@@ -12,6 +12,7 @@ static inline void s_free_data_if_not_mine(sereal_t *s) {
12
12
 
13
13
  static inline void s_init_tracker(sereal_t *s) {
14
14
  if (s->tracked == Qnil) {
15
+ SD(s,"initializing tracker");
15
16
  s->tracked = rb_hash_new();
16
17
  rb_gc_mark(s->tracked);
17
18
  }
@@ -23,6 +24,13 @@ static inline void s_reset_tracker(sereal_t *s) {
23
24
  }
24
25
  }
25
26
 
27
+ static inline void s_init_copy(sereal_t *s) {
28
+ if (s->copy == Qnil) {
29
+ s->copy = rb_hash_new();
30
+ rb_gc_mark(s->copy);
31
+ }
32
+ }
33
+
26
34
  static inline void s_destroy(sereal_t *s) {
27
35
  if (!s)
28
36
  return;
@@ -46,6 +54,8 @@ static inline sereal_t * s_create(void) {
46
54
  sereal_t *s = s_alloc_or_raise(NULL,sizeof(*s));
47
55
  ZERO(s,sizeof(*s));
48
56
  s->tracked = Qnil;
57
+ s->copy = Qnil;
58
+
49
59
  return s;
50
60
  }
51
61
 
@@ -98,7 +108,7 @@ static inline int s_read_stream(sereal_t *s, u32 end) {
98
108
  static inline void *s_get_p_at_pos(sereal_t *s, u32 pos,u32 req) {
99
109
  // returning s->data[pos], so we just make size count from 0
100
110
  if (pos + req >= s->size) {
101
- if (s->flags & FLAG_STREAM) {
111
+ if (s->flags & __STREAM) {
102
112
  if (s_read_stream(s,pos + req + 1) < 0) {
103
113
  s_raise(s,rb_eRangeError,"stream request for %d bytes failed (err: %s)",
104
114
  req,strerror(errno));
@@ -169,6 +179,11 @@ static inline u32 s_shift_position_bang(sereal_t *s, u32 len) {
169
179
  return len;
170
180
  }
171
181
 
182
+ static inline void s_set_flag_at_pos(sereal_t *s, u32 pos, u8 flag) {
183
+ u8 *p = s_get_p_at_pos(s,pos,0);
184
+ *p |= flag;
185
+ }
186
+
172
187
  static void b_dump(u8 *p, u32 len, u32 pos) {
173
188
  int i;
174
189
 
data/ext/sereal/decode.c CHANGED
@@ -180,34 +180,112 @@ static VALUE s_read_extend(sereal_t *s, u8 tag) {
180
180
  }
181
181
 
182
182
  static VALUE s_read_ref(sereal_t *s, u8 tag) {
183
- u64 off = s_get_varint_bang(s);
184
183
  if (s->tracked == Qnil)
185
184
  s_raise(s,rb_eArgError,"there are no references stored");
186
- return rb_hash_aref(s->tracked,INT2FIX(off + s->hdr_end));
185
+ u64 off = s_get_varint_bang(s);
186
+ VALUE object = rb_hash_lookup(s->tracked,INT2FIX(off + s->hdr_end));
187
+ SD(s,"reading reference from offset: %d, id: %d",off + s->hdr_end,FIX2INT(rb_obj_id(object)));
188
+ return object;
187
189
  }
188
190
 
191
+ #define TRAVEL(s,__stored) \
192
+ do { \
193
+ u32 offset = s_get_varint_bang(s) - 1; \
194
+ __stored = s->pos; \
195
+ s->pos = offset + s->hdr_end; \
196
+ SD(s,"going back offset: %d, stored position: %d (tag: %d)",offset,stored_pos,tag); \
197
+ } while(0)
198
+ #define BACK(s,__stored) \
199
+ do { \
200
+ if (__stored > 0) { \
201
+ SD(s,"going forward to pos: %d",__stored); \
202
+ s->pos = __stored; \
203
+ } \
204
+ } while(0);
205
+
206
+
189
207
  static VALUE s_read_copy(sereal_t *s, u8 tag) {
190
- VALUE ref = s_red_ref(s,tag);
191
- return rb_obj_dup(ref);
208
+ u32 stored_pos = 0;
209
+ TRAVEL(s,stored_pos);
210
+ VALUE object = sereal_to_rb_object(s);
211
+ BACK(s,stored_pos);
212
+ return object;
213
+ }
214
+
215
+
216
+ #define MUST_BE_SOMETHING(__klass,__type) \
217
+ if (TYPE(__klass) != __type) \
218
+ s_raise(s,rb_eTypeError,"unexpected object type: %s (expecting: %s(%d) got: %s(%d))",rb_obj_classname(__klass),(__type == T_STRING ? "String" : (__type == T_ARRAY ? "Array" : "_unknown_")),__type, rb_obj_classname(__klass),TYPE(__klass));
219
+
220
+ static VALUE s_read_perl_object(sereal_t *s, u8 tag) {
221
+ u32 stored_pos = 0;
222
+ if (tag == SRL_HDR_OBJECTV)
223
+ TRAVEL(s,stored_pos);
224
+
225
+ VALUE s_klass = sereal_to_rb_object(s);
226
+ BACK(s,stored_pos);
227
+ MUST_BE_SOMETHING(s_klass,T_STRING);
228
+
229
+ SD(s,"fetched perl class named: %s",RSTRING_PTR(s_klass));
230
+
231
+ VALUE object = sereal_to_rb_object(s);
232
+
233
+ VALUE pobj = rb_class_new_instance(0,NULL,SerealPerlObject);
234
+ rb_ivar_set(pobj,ID_CLASS,s_klass);
235
+ rb_ivar_set(pobj,ID_VALUE,object);
236
+ return pobj;
192
237
  }
193
238
 
239
+ static VALUE s_read_object_freeze(sereal_t *s, u8 tag) {
240
+ if (!(s->flags & __THAW))
241
+ s_raise(s,rb_eTypeError,"object_freeze received, but decoder is initialized without Sereal::THAW option");
242
+
243
+ u32 stored_pos = 0;
244
+ if (tag == SRL_HDR_OBJECTV_FREEZE)
245
+ TRAVEL(s,stored_pos);
246
+
247
+ VALUE s_klass = sereal_to_rb_object(s);
248
+ BACK(s,stored_pos);
249
+ MUST_BE_SOMETHING(s_klass,T_STRING);
250
+
251
+ // hash it?
252
+ VALUE klass = rb_const_get(rb_cObject, rb_intern(RSTRING_PTR(s_klass)));
253
+ if (!rb_obj_respond_to(klass,THAW,0))
254
+ s_raise(s,rb_eTypeError,"class: %s does not respond to THAW",
255
+ rb_obj_classname(s_klass));
256
+
257
+ VALUE object = sereal_to_rb_object(s);
258
+ MUST_BE_SOMETHING(object,T_ARRAY);
259
+ rb_ary_unshift(object,ID2SYM(SEREAL));
260
+
261
+ return rb_funcall2(klass,THAW,RARRAY_LEN(object),RARRAY_PTR(object));
262
+ }
263
+ #undef TRAVEL
264
+ #undef BACK
265
+
194
266
  VALUE sereal_to_rb_object(sereal_t *s) {
195
267
  u8 t, tracked;
196
268
  S_RECURSE_INC(s);
197
269
  u32 pos;
198
- while (s->pos < s->size || (s->flags & FLAG_STREAM)) {
270
+ while (s->pos < s->size || (s->flags & __STREAM)) {
199
271
  t = s_get_u8_bang(s);
200
272
  tracked = (t & SRL_HDR_TRACK_FLAG ? 1 : 0);
201
273
  t &= ~SRL_HDR_TRACK_FLAG;
202
- pos = s->pos;
203
274
 
204
- S_RECURSE_DEC(s);
275
+ pos = s->pos;
205
276
 
206
277
  VALUE decoded = (*READERS[t])(s,t);
278
+
207
279
  if (tracked) {
208
280
  s_init_tracker(s);
209
- rb_hash_aset(s->tracked,INT2FIX(pos),decoded);
281
+ SD(s,"tracking object of class: %s(id: %d) at position: %d",rb_obj_classname(decoded),FIX2INT(rb_obj_id(decoded)),pos);
282
+ VALUE v_pos = INT2FIX(pos);
283
+ if (rb_hash_lookup(s->tracked,v_pos) == Qnil)
284
+ rb_hash_aset(s->tracked,INT2FIX(pos),decoded);
210
285
  }
286
+
287
+ SD(s,"object: %s: %s",rb_obj_classname(decoded),RSTRING_PTR(rb_funcall(decoded,rb_intern("to_s"),0)));
288
+ S_RECURSE_DEC(s);
211
289
  return decoded;
212
290
  }
213
291
  s_raise(s,rb_eArgError,"bad packet, or broken decoder");
@@ -218,9 +296,19 @@ VALUE method_sereal_decode(VALUE self, VALUE args) {
218
296
  u32 argc = RARRAY_LEN(args);
219
297
  if (argc < 1)
220
298
  rb_raise(rb_eArgError,"need at least 1 argument (object)");
221
- VALUE payload = rb_ary_shift(args);
299
+ VALUE payload = rb_ary_entry(args,0);
300
+
222
301
  u8 have_block = rb_block_given_p();
223
302
  sereal_t *s = s_create();
303
+ if (argc == 2) {
304
+ VALUE flags = rb_ary_entry(args,1);
305
+ if (flags != Qnil && flags != Qfalse) {
306
+ if (TYPE(flags) == T_FIXNUM)
307
+ s->flags = FIX2LONG(flags) & __ARGUMENT_FLAGS;
308
+ else
309
+ s_raise(s,rb_eArgError,"second argument must be an integer (used only for flags) %s given",rb_obj_classname(flags));
310
+ }
311
+ }
224
312
  u64 offset = 0;
225
313
 
226
314
  if (TYPE(payload) == T_FILE) {
@@ -229,8 +317,9 @@ VALUE method_sereal_decode(VALUE self, VALUE args) {
229
317
 
230
318
  rb_io_t *fptr;
231
319
  GetOpenFile(payload, fptr);
232
- s->flags |= FLAG_STREAM;
320
+ s->flags |= __STREAM;
233
321
  s->fd = fptr->fd;
322
+ SD(s,"reading strea with fd: %d",s->fd);
234
323
  } else if (TYPE(payload) != T_STRING) {
235
324
  rb_raise(rb_eTypeError,"can not decode objects of type %s",rb_obj_classname(payload));
236
325
  }
@@ -239,14 +328,13 @@ again:
239
328
  s->pos = 0;
240
329
  s_reset_tracker(s);
241
330
 
242
- if (s->flags & FLAG_STREAM) {
331
+ if (s->flags & __STREAM) {
243
332
  s->size = 0;
244
333
  s->rsize = 0;
245
334
  if (s_read_stream(s,__MIN_SIZE) < 0) {
246
335
  s_destroy(s);
247
336
  return Qnil;
248
337
  }
249
-
250
338
  } else {
251
339
  u32 size = RSTRING_LEN(payload) - offset;
252
340
  if (offset > RSTRING_LEN(payload) || (offset > 0 && size < __MIN_SIZE)) {
@@ -256,7 +344,7 @@ again:
256
344
  if (size < __MIN_SIZE)
257
345
  s_raise(s,rb_eTypeError,"size(%d) is less then min packet size %d, offset: %d",size,__MIN_SIZE,offset);
258
346
 
259
- s->flags |= FLAG_NOT_MINE;
347
+ s->flags |= __NOT_MINE;
260
348
  s->data = RSTRING_PTR(payload) + offset;
261
349
  s->size = size;
262
350
  }
@@ -269,12 +357,15 @@ again:
269
357
  u8 suffix = s_get_varint_bang(s);
270
358
  u8 is_compressed;
271
359
 
272
- if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY)
360
+ if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY) {
273
361
  is_compressed = __SNAPPY;
274
- else if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY_INCR)
362
+ } else if ((version & SRL_PROTOCOL_ENCODING_MASK) == SRL_PROTOCOL_ENCODING_SNAPPY_INCR) {
275
363
  is_compressed = __SNAPPY_INCR;
276
- else
364
+ } else {
277
365
  is_compressed = __RAW;
366
+ }
367
+
368
+ SD(s,"initialized (s) with compression type: %d",is_compressed);
278
369
 
279
370
  if (is_compressed) {
280
371
  u32 uncompressed_len;
@@ -283,13 +374,13 @@ again:
283
374
  if (is_compressed == __SNAPPY_INCR) {
284
375
  compressed_len = s_get_varint_bang(s);
285
376
  } else {
286
- if (s->flags & FLAG_STREAM)
377
+ if (s->flags & __STREAM)
287
378
  s_raise(s,rb_eTypeError,"parsing non incremental compressed objects, from stream of data, is not supported");
288
379
 
289
380
  compressed_len = s->size - s->pos;
290
381
  }
291
-
292
- if (s->flags & FLAG_STREAM)
382
+ SD(s,"compressed len: %d",compressed_len);
383
+ if (s->flags & __STREAM)
293
384
  s_get_p_req_inclusive(s,compressed_len);
294
385
 
295
386
  int snappy_header_len = csnappy_get_uncompressed_length(s_get_p_req_inclusive(s,compressed_len),
@@ -312,9 +403,11 @@ again:
312
403
  s->size = uncompressed_len;
313
404
  offset += s->pos + compressed_len;
314
405
  s->pos = 0;
315
- s->flags &= ~FLAG_NOT_MINE;
406
+ s->flags &= ~__NOT_MINE;
316
407
  }
408
+
317
409
  s->hdr_end = s->pos;
410
+ SD(s,"header end at %d",s->hdr_end);
318
411
  VALUE result = sereal_to_rb_object(s);
319
412
  if (!is_compressed)
320
413
  offset += s->pos;
data/ext/sereal/decode.h CHANGED
@@ -23,6 +23,8 @@ static VALUE s_read_pad(sereal_t *s, u8 tag);
23
23
  static VALUE s_read_extend(sereal_t *s, u8 tag);
24
24
  static VALUE s_read_ref(sereal_t *s, u8 tag);
25
25
  static VALUE s_read_copy(sereal_t *s, u8 tag);
26
+ static VALUE s_read_object_freeze(sereal_t *s, u8 tag);
27
+ static VALUE s_read_perl_object(sereal_t *s, u8 tag);
26
28
 
27
29
  static VALUE (*READERS[256])(sereal_t *, u8) = {
28
30
  s_read_small_positive_int, // 0 SRL_HDR_POS_LOW
@@ -69,14 +71,14 @@ static VALUE (*READERS[256])(sereal_t *, u8) = {
69
71
  s_read_ref, // 41 SRL_HDR_REFP
70
72
  s_read_hash, // 42 SRL_HDR_HASH
71
73
  s_read_array, // 43 SRL_HDR_ARRAY
72
- s_default_reader, /* XXX */ // 44 SRL_HDR_OBJECT
73
- s_default_reader, /* XXX */ // 45 SRL_HDR_OBJECTV
74
+ s_read_perl_object, // 44 SRL_HDR_OBJECT
75
+ s_read_perl_object, // 45 SRL_HDR_OBJECTV
74
76
  s_read_ref, // 46 SRL_HDR_ALIAS
75
77
  s_read_copy, // 47 SRL_HDR_COPY
76
78
  s_default_reader, /* XXX */ // 48 SRL_HDR_WEAKEN
77
79
  s_read_regexp, // 49 SRL_HDR_REGEXP
78
- s_default_reader, /* XXX */ // 50 SRL_HDR_RESERVED_LOW
79
- s_default_reader, /* XXX */ // 51
80
+ s_read_object_freeze, // 50 OBJECT_FREEZE
81
+ s_read_object_freeze, // 51 OBJECTV_FREEZE
80
82
  s_default_reader, /* XXX */ // 52
81
83
  s_default_reader, /* XXX */ // 53
82
84
  s_default_reader, /* XXX */ // 54
data/ext/sereal/encode.c CHANGED
@@ -28,6 +28,12 @@
28
28
  #elif T_NIL > W_SIZE
29
29
  #define W_SIZE T_NIL
30
30
  #endif
31
+ #define COMPLEX(object) \
32
+ (TYPE(object) == T_ARRAY || \
33
+ TYPE(object) == T_HASH || \
34
+ TYPE(object) == T_SYMBOL || \
35
+ TYPE(object) == T_OBJECT || \
36
+ TYPE(object) == T_STRING)
31
37
 
32
38
  /* function pointer array */
33
39
  void (*WRITER[W_SIZE])(sereal_t *,VALUE);
@@ -146,15 +152,57 @@ static void s_append_hash(sereal_t *s, VALUE object) {
146
152
  convert symbols to strings
147
153
  */
148
154
  static void s_append_symbol(sereal_t *s, VALUE object) {
149
- VALUE string = rb_sym_to_s(object);
150
- s_append_rb_string(s,string);
155
+ VALUE string = rb_sym_to_s(object);
156
+ s_append_rb_string(s,string);
151
157
  }
152
158
 
159
+ static void s_append_copy(sereal_t *s, VALUE object) {
160
+ u32 pos = FIX2LONG(object);
161
+ s_append_hdr_with_varint(s,SRL_HDR_COPY,pos - s->hdr_end + 1);
162
+ }
163
+
164
+ static VALUE s_copy_or_keep_in_mind(sereal_t *s, VALUE object) {
165
+ if (s->copy == Qnil)
166
+ return Qnil;
167
+
168
+ VALUE stored_position = rb_hash_lookup(s->copy,object);
169
+ if (stored_position == Qnil)
170
+ rb_hash_aset(s->copy,object,INT2FIX(s->pos));
171
+ return stored_position;
172
+ }
173
+
174
+
153
175
  /*
154
- call object.to_srl and serialize the result
176
+ try to FREEZE the object so it can be THAW-ed at decode time
177
+ if not possible (no Sereal::THAW argument or object does not
178
+ repsond to FREEZE), just call object.to_srl and serialize the
179
+ result
155
180
  */
156
181
  static void s_append_object(sereal_t *s, VALUE object) {
157
- rb_object_to_sereal(s,rb_funcall(object,rb_intern("to_srl"),0));
182
+ if (s->flags & __THAW && rb_obj_respond_to(object,FREEZE,0)) {
183
+ VALUE klass = rb_class_name(CLASS_OF(object));
184
+ VALUE copy = s_copy_or_keep_in_mind(s,klass);
185
+ if (copy != Qnil) {
186
+ s_append_u8(s,SRL_HDR_OBJECTV_FREEZE);
187
+ s_append_copy(s,copy);
188
+ } else {
189
+ s_append_u8(s,SRL_HDR_OBJECT_FREEZE);
190
+ s_append_rb_string(s,rb_class_name(CLASS_OF(object)));
191
+ }
192
+ VALUE frozen = rb_funcall(object,FREEZE,1,ID2SYM(SEREAL));
193
+ if (TYPE(frozen) != T_ARRAY)
194
+ s_raise(s,rb_eTypeError,"Sereal spec requires FREEZE to return array instead %s",rb_obj_classname(frozen));
195
+
196
+ // REFN + ARRAY
197
+ s_append_u8(s,SRL_HDR_REFN);
198
+ s_append_hdr_with_varint(s,SRL_HDR_ARRAY,RARRAY_LEN(frozen));
199
+ int i;
200
+ for (i = 0; i < RARRAY_LEN(frozen); i++)
201
+ rb_object_to_sereal(s,rb_ary_entry(frozen,i));
202
+
203
+ } else {
204
+ rb_object_to_sereal(s,rb_funcall(object,TO_SRL,0));
205
+ }
158
206
  }
159
207
 
160
208
 
@@ -219,31 +267,31 @@ static void s_append_nil(sereal_t *s, VALUE object) {
219
267
  static void s_append_refp(sereal_t *s, VALUE object) {
220
268
  u32 pos = FIX2LONG(object);
221
269
  s_append_hdr_with_varint(s,SRL_HDR_REFP,pos - s->hdr_end + 1);
222
- u8 *reference = s_get_p_at_pos(s,pos,0);
223
- *reference |= SRL_HDR_TRACK_FLAG;
270
+ s_set_flag_at_pos(s,pos,SRL_HDR_TRACK_FLAG);
224
271
  }
225
272
 
226
273
  /* writer function pointers */
227
274
  static void rb_object_to_sereal(sereal_t *s, VALUE object) {
228
275
  S_RECURSE_INC(s);
229
276
  u32 pos = s->pos;
230
-
231
- if (s->tracked != Qnil &&
232
- TYPE(object) == T_ARRAY ||
233
- TYPE(object) == T_HASH ||
234
- TYPE(object) == T_SYMBOL ||
235
- TYPE(object) == T_STRING) {
236
-
277
+ if (COMPLEX(object)) {
278
+ VALUE stored;
237
279
  if (s->tracked != Qnil) {
238
- VALUE id = rb_obj_id(object);
239
- VALUE stored_position = rb_hash_aref(s->tracked,id);
240
- if (stored_position != Qnil) {
241
- s_append_refp(s,stored_position);
242
- goto out;
243
- } else {
280
+ if (s->tracked != Qnil) {
281
+ VALUE id = rb_obj_id(object);
282
+ stored = rb_hash_lookup(s->tracked,id);
283
+ if (stored != Qnil) {
284
+ s_append_refp(s,stored);
285
+ goto out;
286
+ }
244
287
  rb_hash_aset(s->tracked,id,INT2FIX(pos));
245
288
  }
246
289
  }
290
+ stored = s_copy_or_keep_in_mind(s,object);
291
+ if (stored != Qnil) {
292
+ s_append_copy(s,stored);
293
+ goto out;
294
+ }
247
295
  }
248
296
 
249
297
  (*WRITER[TYPE(object)])(s,object);
@@ -267,16 +315,21 @@ void fixup_varint_from_to(u8 *varint_start, u8 *varint_end, u32 number) {
267
315
  }
268
316
  }
269
317
 
270
- VALUE method_sereal_encode(VALUE self, VALUE args) {
318
+
319
+ /*
320
+ * Encodes object into Sereal
321
+ */
322
+ VALUE
323
+ method_sereal_encode(VALUE self, VALUE args) {
271
324
  u32 argc = RARRAY_LEN(args);
272
325
  if (argc < 1)
273
326
  rb_raise(rb_eArgError,"need at least 1 argument (object)");
274
327
 
275
328
  sereal_t *s = s_create();
276
- VALUE payload = rb_ary_shift(args);
329
+ VALUE payload = rb_ary_entry(args,0);
277
330
  VALUE compress = Qfalse;
278
331
  if (argc == 2)
279
- compress = rb_ary_shift(args);
332
+ compress = rb_ary_entry(args,1);
280
333
 
281
334
  u8 do_compress;
282
335
  u8 version = SRL_PROTOCOL_VERSION;
@@ -286,10 +339,15 @@ VALUE method_sereal_encode(VALUE self, VALUE args) {
286
339
  } else {
287
340
  do_compress = (compress == Qtrue ? 1 : 0);
288
341
  }
289
- if (do_compress & __REF) {
290
- do_compress &= ~__REF;
342
+
343
+ s->flags = do_compress & __ARGUMENT_FLAGS;
344
+ do_compress &=~ __ARGUMENT_FLAGS;
345
+ if (s->flags & __REF)
291
346
  s_init_tracker(s);
292
- }
347
+
348
+ if (s->flags & __COPY)
349
+ s_init_copy(s);
350
+
293
351
  switch(do_compress) {
294
352
  case __SNAPPY:
295
353
  version |= SRL_PROTOCOL_ENCODING_SNAPPY;
data/ext/sereal/proto.h CHANGED
@@ -33,16 +33,19 @@
33
33
  #define SRL_HDR_ARRAY ((char)43) /* <COUNT-VARINT> [<ITEM-TAG> ...] - count followed by items */
34
34
  #define SRL_HDR_OBJECT ((char)44) /* <STR-TAG> <ITEM-TAG> - class, object-item */
35
35
  #define SRL_HDR_OBJECTV ((char)45) /* <OFFSET-VARINT> <ITEM-TAG> - offset of previously used classname tag - object-item */
36
+
36
37
  #define SRL_HDR_ALIAS ((char)46) /* <OFFSET-VARINT> - alias to item defined at offset */
37
38
  #define SRL_HDR_COPY ((char)47) /* <OFFSET-VARINT> - copy of item defined at offset */
38
39
 
39
40
  #define SRL_HDR_WEAKEN ((char)48) /* <REF-TAG> - Weaken the following reference */
40
41
  #define SRL_HDR_REGEXP ((char)49) /* <PATTERN-STR-TAG> <MODIFIERS-STR-TAG>*/
42
+ #define SRL_HDR_OBJECT_FREEZE ((char)50) /* <STR-TAG> <ITEM-TAG> - class, object-item. Need to call "THAW" method on class after decoding */
43
+ #define SRL_HDR_OBJECTV_FREEZE ((char)51) /* <OFFSET-VARINT> <ITEM-TAG> - (OBJECTV_FREEZE is to OBJECT_FREEZE as OBJECTV is to OBJECT) */
41
44
 
42
45
  /* Note: Can do reserved check with a range now, but as we start using
43
46
  * them, might have to explicit == check later. */
44
- #define SRL_HDR_RESERVED ((char)50) /* reserved */
45
- #define SRL_HDR_RESERVED_LOW ((char)50)
47
+ #define SRL_HDR_RESERVED ((char)52) /* reserved */
48
+ #define SRL_HDR_RESERVED_LOW ((char)52)
46
49
  #define SRL_HDR_RESERVED_HIGH ((char)57)
47
50
 
48
51
  #define SRL_HDR_FALSE ((char)58) /* false (PL_sv_no) */
data/ext/sereal/sereal.c CHANGED
@@ -2,36 +2,44 @@
2
2
  #include "encode.h"
3
3
 
4
4
  VALUE Sereal = Qnil;
5
+ VALUE SerealPerlObject = Qnil;
6
+ ID FREEZE;
7
+ ID THAW;
8
+ ID TO_SRL;
9
+ ID SEREAL;
10
+ ID ID_CLASS;
11
+ ID ID_VALUE;
5
12
  void Init_sereal();
6
13
 
7
14
  /*
8
15
  * Encode/Decode object using Sereal binary protocol:
9
16
  * https://github.com/Sereal/Sereal/blob/master/sereal_spec.pod
10
17
  *
11
- * Sereal.encode(object) -> serialized blob
12
- * Sereal.encode(object,Sereal::SNAPPY_INCR) -> snappy compressed blob
13
- * Sereal.encode(object,Sereal::SNAPPY) -> snappy compressed blob
18
+ * ==install:
14
19
  *
15
- * SNAPPY_INCR encoded objects can be appended into one output and then the
16
- * decoder will know what to do.
20
+ * $ gem install sereal
17
21
  *
18
- * Sereal.encode(object,Sereal::REF)
19
- * or Sereal::REF|Sereal::SNAPPY_INC, or Sereal::REF|Sereal::SNAPPY
22
+ * or you can build it from github which requires:
23
+ * 1. rake compiler - <code>gem install rake-compiler</code> (https://github.com/luislavena/rake-compiler)
24
+ * 2. ruby 1.9+ or rubinius supporting 1.9+
20
25
  *
21
- * when encoding will try to use Sereal's REFP tag to transmit only the
22
- * the original object's offset in the packet.
23
- * So:
24
- * one = [ 1,2,3,4,5 ]
25
- * two = [ one, one ]
26
- * Sereal.encode(two,Sereal::REF)
27
- * will send 'one' only once, and one REFP that points to the first one
28
- * it uses one.object_id as a hash key in a local tracker hash
29
- * and if it sees this object_id again it just sends the offset.
26
+ * $ git clone https://github.com/Sereal/Sereal
27
+ * $ cd Sereal/ruby
28
+ * $ gem build sereal.gemspec
29
+ * $ gem install sereal-0.0.?.gem
30
30
  *
31
+ * ==serialize:
32
+ * require 'sereal'
33
+ * Sereal.encode(object)
31
34
  *
32
- * Sereal.decode(blob) - returns the decoded object
33
- *
34
- * If the blob contains multiple compressed
35
+ * ===serializing objects
36
+ * if Sereal::THAW option is given the encoder will try to call FREEZE() instance method on
37
+ * the object beeing serialized, and it will serialize the class name + the output of FREEZE (look the THAW constant for more information)
38
+ * if the object does not respond to FREEZE it will call <code>to_srl</code> and serialize the result of that
39
+ * ==deserialize:
40
+ * require 'sereal'
41
+ * Sereal.decode(blob)
42
+ * If the blob contains multiple compressed objects
35
43
  * sub-blobs you should call it with:
36
44
  *
37
45
  * Sereal.decode(blob) do |decoded|
@@ -39,6 +47,7 @@ void Init_sereal();
39
47
  * end
40
48
  *
41
49
  * otherwise only the first decoded object will be returned
50
+ * ===stream decoding
42
51
  * there is also streaming support which takes any kind of IO object
43
52
  * like socket, or just regular File, and it is really easy to use:
44
53
  *
@@ -46,7 +55,7 @@ void Init_sereal();
46
55
  * # do something with the decoded object
47
56
  * end
48
57
  *
49
- * it works both with `incremental snappy` and with just combined sereal packets.
58
+ * it works both with SNAPPY_INCR and with just combined sereal packets.
50
59
  * another example but with TCPSocket:
51
60
  *
52
61
  * s = TCPSocket.new 'localhost', 2000
@@ -54,15 +63,216 @@ void Init_sereal();
54
63
  * # do something with the decoded object
55
64
  * end
56
65
  *
66
+ * ===multiple packets in one buffer
67
+ * it also supports decoding of multiple packets in one buffer:
68
+ *
69
+ * buf = ""
70
+ * buf << Sereal.encode([1,2,3],Sereal::SNAPPY_INCR)
71
+ * buf << Sereal.encode([3,4,5])
72
+ * buf << Sereal.encode([7,8,9],Sereal::SNAPPY_INCR)
73
+ * Sereal.decode(buf) do |decoded|
74
+ * p decoded
75
+ * end
76
+ * ==Sereal.encode() and Sereal.decode() accept:
77
+ * 1. compression types: RAW, SNAPPY_INCR, and SNAPPY
78
+ * 2. flags: REF, COPY, THAW and DEBUG
79
+ *
80
+ * flags and compression types can be used in combinations like:
81
+ *
82
+ * Sereal.encode([1,2,3],Sereal::REF|Sereal::COPY|Sereal::THAW|Sereal::SNAPPY_INCR)
83
+ *
84
+ * but you can not use 2 types of compression in the same time
85
+ *
86
+ * ==LZ4
87
+ * For brief period (version 0.0.5 to 0.0.6) there was a support for LZ4 and LZ4HC, which was pushed to the master branch by mistake. if you are depending on it please convert yout data using <code>bin/rsrl</code> or just use <code>0.0.5</code> version of the sereal gem.
88
+ *
89
+ * gem 'sereal', '= 0.0.5'
90
+ * #or
91
+ * $ gem install sereal -v 0.0.5
92
+ *
57
93
  */
58
94
  void Init_sereal() {
59
- Sereal = rb_define_class("Sereal", rb_cObject);
60
- rb_define_singleton_method(Sereal, "encode", method_sereal_encode, -2);
61
- rb_define_singleton_method(Sereal, "decode", method_sereal_decode, -2);
62
- rb_define_const(Sereal, "SNAPPY",INT2NUM(__SNAPPY));
63
- rb_define_const(Sereal, "SNAPPY_INCR",INT2NUM(__SNAPPY_INCR));
64
- rb_define_const(Sereal, "RAW",INT2NUM(__RAW));
65
- rb_define_const(Sereal, "REF",INT2NUM(__REF));
66
- s_init_writers();
95
+ TO_SRL = rb_intern("to_srl");
96
+ THAW = rb_intern("THAW");
97
+ FREEZE = rb_intern("FREEZE");
98
+ SEREAL = rb_intern("Sereal");
99
+ ID_CLASS = rb_intern("@class");
100
+ ID_VALUE = rb_intern("@value");
101
+
102
+ SerealPerlObject = rb_define_class("SerealPerlObject", rb_cObject);
103
+
104
+ Sereal = rb_define_class("Sereal", rb_cObject);
105
+ rb_define_singleton_method(Sereal, "encode", method_sereal_encode, -2);
106
+
107
+ rb_define_singleton_method(Sereal, "decode", method_sereal_decode, -2);
108
+
109
+ /*
110
+ * instructs the encoder to use Snappy compression
111
+ * nb: this is Sereal protocol version 1 only
112
+ * do not use it if possible.
113
+ *
114
+ * Sereal.encode(object,Sereal::SNAPPY)
115
+ */
116
+ rb_define_const(Sereal, "SNAPPY",INT2NUM(__SNAPPY));
117
+
118
+
119
+ /*
120
+ * instructs the encoder to use Snappy compression
121
+ * but with support for incremental packet (meaning
122
+ * you can combine packets into one big blob of data
123
+ * and the encoder will be confident that there is no
124
+ * corruption, because the SNAPPY_INCR packet contains the
125
+ * uncompressed length)
126
+ *
127
+ * Sereal.encode(object,Sereal::SNAPPY_INCR)
128
+ */
129
+ rb_define_const(Sereal, "SNAPPY_INCR",INT2NUM(__SNAPPY_INCR));
130
+
131
+ /*
132
+ * intructs the encoder to use no compression (default)
133
+ *
134
+ * Sereal.encode(object,Sereal::RAW)
135
+ */
136
+ rb_define_const(Sereal, "RAW",INT2NUM(__RAW));
137
+
138
+ /*
139
+ * (can also be used with any compression type/RAW)
140
+ * instructs the encoder to keep track of object_ids
141
+ * and when it sees that object with the same id has
142
+ * already been encoded, it just creates REFP reference
143
+ * with offset to the first item
144
+ * so:
145
+ * name = "john doe"
146
+ * object = [ name, name ]
147
+ * Sereal.encode(name,Sereal::REF|Sereal::SNAPPY_INCR)
148
+ *
149
+ * will actually create something that looks like:
150
+ *
151
+ * 000006/000001: 42 066 ARRAYREF(2)
152
+ * 000007/000002: 68* 232 SHORT_BINARY(8): 'john doe'
153
+ * 000016/000011: 29 041 REFP(2)
154
+ *
155
+ * instead of:
156
+ *
157
+ * 000006/000001: 42 066 ARRAYREF(2)
158
+ * 000007/000002: 68 104 SHORT_BINARY(8): 'john doe'
159
+ * 000016/000011: 68 104 SHORT_BINARY(8): 'john doe'
160
+ *
161
+ * as you can see Sereal saved us 7 bytes
162
+ *
163
+ * this can hurts performance because the encoder must get the
164
+ * object_id of every encoded object, and put it in a hash
165
+ * with the current position, so it can be looked up later
166
+ */
167
+ rb_define_const(Sereal, "REF",INT2NUM(__REF));
168
+
169
+
170
+ /*
171
+ * very similar to Sereal::REF, but it instructs the decoder
172
+ * to create new item, by going back in time as if the OFFSET
173
+ * of the COPY tag was beeing read right now
174
+ * it puts every object as a key in a hash (instead of its
175
+ * object_id)
176
+ *
177
+ * object = [ { name => "john"} , { name => "john"} ]
178
+ * Sereal.encode(name,Sereal::COPY|Sereal::SNAPPY_INCR)
179
+ *
180
+ * will produce:
181
+ *
182
+ * 000006/000001: 42 066 ARRAYREF(2)
183
+ * 000007/000002: 51 081 HASHREF(2)
184
+ * KEY:
185
+ * 000008/000003: 64 100 SHORT_BINARY(4): 'name'
186
+ * VALUE:
187
+ * 000013/000008: 64 100 SHORT_BINARY(4): 'john'
188
+ * 000018/000013: 2f 047 COPY(2)
189
+ *
190
+ * as you can see the hash is sent only once
191
+ *
192
+ * COPY can be used with REF like:
193
+ *
194
+ * object = "bazinga"
195
+ *
196
+ * Sereal.encode([object,"bazinga",object],Sereal::REF|Sereal::COPY)
197
+ *
198
+ * will produce:
199
+ * 000006/000001: 43 067 ARRAYREF(3)
200
+ * 000007/000002: 67* 231 SHORT_BINARY(7): 'bazinga'
201
+ * 000015/000010: 2f 047 COPY(2)
202
+ * 000017/000012: 29 041 REFP(2)
203
+ *
204
+ * using Sereal::COPY also hurts performance because every encoding step
205
+ * the encoder must look into a hash if the object exists in it
206
+ * so it can create a COPY tag instead of encoding the object again
207
+ */
208
+ rb_define_const(Sereal, "COPY",INT2NUM(__COPY));
209
+
210
+ /*
211
+ * add support for FREEZE/THAW
212
+ * it calls <code>object.FREEZE(:Sereal)</code> and it serializes the result of that
213
+ * with the object's class name, when deserializing it calls
214
+ * <code>class.THAW(:Sereal,the output of FREEZE)</code>
215
+ *
216
+ * class StorableFile
217
+ * attr_accessor :path
218
+ * def initialize(path,pos)
219
+ * @path = path
220
+ * @pos = pos
221
+ * end
222
+ * def read
223
+ * @pos += 1
224
+ * end
225
+ * def FREEZE(serializer)
226
+ * [@path,@pos]
227
+ * end
228
+ * def self.THAW(serializer,path,pos)
229
+ * self.new(path,pos)
230
+ * end
231
+ * end
232
+ *
233
+ * obj = StorableFile.new("/tmp/sereal.txt",0)
234
+ * obj.read # read some data
235
+ * encoded = Sereal.encode(obj,Sereal::THAW)
236
+ * # this actually encodes something like:
237
+ * # 'StorableFile' -> [ "/tmp/sereal.txt", 1 ]
238
+ * restored = Sereal.decode(encoded,Sereal::THAW)
239
+ * # this will call Sereal.THAW(:Sereal,"/tmp/sereal.txt",1)
240
+ *
241
+ * as you can see the array returned from FREEZE is exploded into
242
+ * arguments for THAW, FREEZE *MUST* return array, otherwise the
243
+ * encoder throws TypeException.
244
+ * *BOTH* encoder *AND* decoder must be started with Sereal::THAW option
245
+ * in order for it to work properly
246
+ */
247
+ rb_define_const(Sereal, "THAW",INT2NUM(__THAW));
248
+
249
+ /*
250
+ * the argument given to FREEZE() and THAW() as serializer
251
+ * which at the moment is:
252
+ *
253
+ * :Sereal
254
+ */
255
+ rb_define_const(Sereal, "FREEZER",ID2SYM(SEREAL));
256
+
257
+ /*
258
+ * enable debug output
259
+ *
260
+ * name = "john"
261
+ * Sereal.decode([name,name],Sereal::REF|Sereal::DEBUG)
262
+ *
263
+ * procudes:
264
+ *
265
+ * initialized (s) with compression type: 0 { p: 6, s: 14, l: 0, h: 0 } method_sereal_decode()
266
+ * header end at 6 { p: 6, s: 14, l: 0, h: 6 } method_sereal_decode()
267
+ * tracking object of class: String(id: 7678920) at position: 8 { p: 12, s: 14, l: 2, h: 6 } sereal_to_rb_object()
268
+ * object: String: john { p: 12, s: 14, l: 2, h: 6 } sereal_to_rb_object()
269
+ * reading reference from offset: 8, id: 7678920 { p: 14, s: 14, l: 2, h: 6 } s_read_ref()
270
+ * object: String: john { p: 14, s: 14, l: 2, h: 6 } sereal_to_rb_object()
271
+ * object: Array: ["john", "john"] { p: 14, s: 14, l: 1, h: 6 } sereal_to_rb_object()
272
+ *
273
+ */
274
+ rb_define_const(Sereal, "DEBUG",INT2NUM(__DEBUG));
275
+
276
+ s_init_writers();
67
277
  }
68
278
 
data/ext/sereal/sereal.h CHANGED
@@ -12,7 +12,6 @@ typedef unsigned int u32;
12
12
  typedef unsigned short u16;
13
13
  typedef unsigned char u8;
14
14
  typedef struct _sereal sereal_t;
15
- typedef struct _track_entry track_t;
16
15
 
17
16
  #define TRUE 1
18
17
  #define FALSE 0
@@ -41,19 +40,27 @@ typedef struct _track_entry track_t;
41
40
  #define EXTENDED RE_OPTION_EXTENDED
42
41
  #endif
43
42
 
44
- #define FORMAT(fmt,arg...) fmt " [%s():%s:%d @ %u]\n",##arg,__func__,__FILE__,__LINE__,(unsigned int) time(NULL)
43
+ #define FORMAT(fmt,arg...) fmt " %s()\n",##arg,__func__
45
44
  #define E(fmt,arg...) fprintf(stderr,FORMAT(fmt,##arg))
46
45
  #define D(fmt,arg...) printf(FORMAT(fmt,##arg))
46
+ #define SD(s,fmt,arg...) \
47
+ do { \
48
+ if (s->flags & __DEBUG) { \
49
+ int i; \
50
+ for (i = 0; i < s->level; i++) { \
51
+ printf(" "); \
52
+ } \
53
+ D(fmt " { p: %d, s: %d, l: %u, h: %u } ",##arg,s->pos,s->size,s->level,s->hdr_end); \
54
+ } \
55
+ } while(0);
47
56
 
48
57
  #define s_raise(what,ex,arg...) \
49
58
  do { \
59
+ SD(s,"s_raise"); \
50
60
  s_destroy(what); \
51
61
  rb_raise(ex,##arg); \
52
62
  } while(0);
53
63
 
54
- #define FLAG_NOT_MINE 1
55
- #define FLAG_STREAM 2
56
- #define FLAG_REF 4
57
64
  struct _sereal {
58
65
  u8 *data;
59
66
  u32 size;
@@ -61,7 +68,9 @@ struct _sereal {
61
68
  u32 rsize;
62
69
  u32 level;
63
70
  u8 flags;
71
+ u8 expect;
64
72
  VALUE tracked;
73
+ VALUE copy;
65
74
  u32 hdr_end;
66
75
  int fd;
67
76
  struct buffer {
@@ -73,13 +82,20 @@ struct _sereal {
73
82
 
74
83
  VALUE method_sereal_encode(VALUE self, VALUE args);
75
84
  VALUE method_sereal_decode(VALUE self, VALUE payload);
85
+ extern ID FREEZE;
86
+ extern ID THAW;
87
+ extern ID TO_SRL;
88
+ extern ID SEREAL;
89
+ extern ID ID_CLASS;
90
+ extern ID ID_VALUE;
91
+ extern VALUE SerealPerlObject;
76
92
 
77
- #define S_RECURSE_INC(s) \
78
- do { \
79
- if((s)->level++ > MAX_RECURSION_DEPTH) \
80
- s_raise((s),rb_eArgError, \
81
- "max recursion depth reached: %d (level: %d)", \
82
- MAX_RECURSION_DEPTH, s->level); \
93
+ #define S_RECURSE_INC(s) \
94
+ do { \
95
+ if((s)->level++ > MAX_RECURSION_DEPTH) \
96
+ s_raise((s),rb_eArgError, \
97
+ "max recursion depth reached: %d (level: %d)", \
98
+ MAX_RECURSION_DEPTH, s->level); \
83
99
  } while(0);
84
100
 
85
101
  #define S_RECURSE_DEC(s) ((s)->level--)
@@ -94,5 +110,12 @@ VALUE method_sereal_decode(VALUE self, VALUE payload);
94
110
  #define __SNAPPY 1
95
111
  #define __SNAPPY_INCR 2
96
112
  #define __REF 4
113
+ #define __DEBUG 8
114
+ #define __NOT_MINE 16
115
+ #define __STREAM 32
116
+ #define __THAW 64
117
+ #define __COPY 64
118
+ #define __ARGUMENT_FLAGS (__DEBUG|__THAW|__REF|__COPY)
119
+
97
120
  #define __MIN_SIZE 6
98
121
  #endif
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sereal
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.9
4
+ version: 0.0.10
5
5
  platform: ruby
6
6
  authors:
7
7
  - Borislav Nikolov
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2014-01-03 00:00:00.000000000 Z
11
+ date: 2014-01-07 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: rake-compiler
@@ -30,7 +30,8 @@ executables:
30
30
  - rsrl
31
31
  extensions:
32
32
  - ext/sereal/extconf.rb
33
- extra_rdoc_files: []
33
+ extra_rdoc_files:
34
+ - ext/sereal/sereal.c
34
35
  files:
35
36
  - ext/sereal/decode.c
36
37
  - ext/sereal/encode.c
@@ -52,7 +53,9 @@ homepage: https://github.com/Sereal/Sereal
52
53
  licenses: []
53
54
  metadata: {}
54
55
  post_install_message:
55
- rdoc_options: []
56
+ rdoc_options:
57
+ - --exclude
58
+ - .*\.so
56
59
  require_paths:
57
60
  - lib
58
61
  required_ruby_version: !ruby/object:Gem::Requirement