msgpack 1.3.2 → 1.4.2

Sign up to get free protection for your applications and to get access to all the features.
@@ -23,11 +23,11 @@
23
23
  static ID s_replace;
24
24
  #endif
25
25
 
26
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
27
26
  int msgpack_rb_encindex_utf8;
28
27
  int msgpack_rb_encindex_usascii;
29
28
  int msgpack_rb_encindex_ascii8bit;
30
- #endif
29
+
30
+ ID s_uminus;
31
31
 
32
32
  #ifndef DISABLE_RMEM
33
33
  static msgpack_rmem_t s_rmem;
@@ -35,11 +35,11 @@ static msgpack_rmem_t s_rmem;
35
35
 
36
36
  void msgpack_buffer_static_init()
37
37
  {
38
- #ifdef COMPAT_HAVE_ENCODING
38
+ s_uminus = rb_intern("-@");
39
+
39
40
  msgpack_rb_encindex_utf8 = rb_utf8_encindex();
40
41
  msgpack_rb_encindex_usascii = rb_usascii_encindex();
41
42
  msgpack_rb_encindex_ascii8bit = rb_ascii8bit_encindex();
42
- #endif
43
43
 
44
44
  #ifndef DISABLE_RMEM
45
45
  msgpack_rmem_init(&s_rmem);
@@ -308,9 +308,7 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
308
308
  static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE string)
309
309
  {
310
310
  VALUE mapped_string = rb_str_dup(string);
311
- #ifdef COMPAT_HAVE_ENCODING
312
311
  ENCODING_SET(mapped_string, msgpack_rb_encindex_ascii8bit);
313
- #endif
314
312
 
315
313
  _msgpack_buffer_add_new_chunk(b);
316
314
 
@@ -337,7 +335,6 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
337
335
 
338
336
  if(b->io != Qnil) {
339
337
  msgpack_buffer_flush(b);
340
- #ifdef COMPAT_HAVE_ENCODING
341
338
  if (ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit) {
342
339
  rb_funcall(b->io, b->io_write_all_method, 1, string);
343
340
  } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
@@ -347,10 +344,6 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
347
344
  } else {
348
345
  msgpack_buffer_append(b, RSTRING_PTR(string), length);
349
346
  }
350
- #else
351
- rb_funcall(b->io, b->io_write_all_method, 1, string);
352
- #endif
353
-
354
347
  } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
355
348
  _msgpack_buffer_append_reference(b, string);
356
349
 
@@ -49,11 +49,11 @@
49
49
 
50
50
  #define NO_MAPPED_STRING ((VALUE)0)
51
51
 
52
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
53
52
  extern int msgpack_rb_encindex_utf8;
54
53
  extern int msgpack_rb_encindex_usascii;
55
54
  extern int msgpack_rb_encindex_ascii8bit;
56
- #endif
55
+
56
+ extern ID s_uminus;
57
57
 
58
58
  struct msgpack_buffer_chunk_t;
59
59
  typedef struct msgpack_buffer_chunk_t msgpack_buffer_chunk_t;
@@ -438,7 +438,7 @@ static inline VALUE _msgpack_buffer_refer_head_mapped_string(msgpack_buffer_t* b
438
438
  return rb_str_substr(b->head->mapped_string, offset, length);
439
439
  }
440
440
 
441
- static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen)
441
+ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen, bool utf8)
442
442
  {
443
443
  #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
444
444
  /* optimize */
@@ -446,16 +446,52 @@ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_
446
446
  b->head->mapped_string != NO_MAPPED_STRING &&
447
447
  length >= b->read_reference_threshold) {
448
448
  VALUE result = _msgpack_buffer_refer_head_mapped_string(b, length);
449
+ if (utf8) ENCODING_SET(result, msgpack_rb_encindex_utf8);
449
450
  _msgpack_buffer_consumed(b, length);
450
451
  return result;
451
452
  }
452
453
  #endif
453
454
 
454
- VALUE result = rb_str_new(b->read_buffer, length);
455
+ VALUE result;
456
+
457
+ #ifdef HAVE_RB_ENC_INTERNED_STR
458
+ if (will_be_frozen) {
459
+ result = rb_enc_interned_str(b->read_buffer, length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
460
+ } else {
461
+ if (utf8) {
462
+ result = rb_utf8_str_new(b->read_buffer, length);
463
+ } else {
464
+ result = rb_str_new(b->read_buffer, length);
465
+ }
466
+ }
455
467
  _msgpack_buffer_consumed(b, length);
456
468
  return result;
457
- }
458
469
 
470
+ #else
459
471
 
460
- #endif
472
+ if (utf8) {
473
+ result = rb_utf8_str_new(b->read_buffer, length);
474
+ } else {
475
+ result = rb_str_new(b->read_buffer, length);
476
+ }
477
+
478
+ #if STR_UMINUS_DEDUPE
479
+ if (will_be_frozen) {
480
+ #if STR_UMINUS_DEDUPE_FROZEN
481
+ // Starting from MRI 2.8 it is preferable to freeze the string
482
+ // before deduplication so that it can be interned directly
483
+ // otherwise it would be duplicated first which is wasteful.
484
+ rb_str_freeze(result);
485
+ #endif //STR_UMINUS_DEDUPE_FROZEN
486
+ // MRI 2.5 and older do not deduplicate strings that are already
487
+ // frozen.
488
+ result = rb_funcall(result, s_uminus, 0);
489
+ }
490
+ #endif // STR_UMINUS_DEDUPE
491
+ _msgpack_buffer_consumed(b, length);
492
+ return result;
493
+
494
+ #endif // HAVE_RB_ENC_INTERNED_STR
495
+ }
461
496
 
497
+ #endif
@@ -20,6 +20,7 @@
20
20
 
21
21
  #include <stdbool.h>
22
22
  #include "ruby.h"
23
+ #include "ruby/encoding.h"
23
24
 
24
25
  #if defined(HAVE_RUBY_ST_H)
25
26
  # include "ruby/st.h" /* ruby hash on Ruby 1.9 */
@@ -38,18 +39,6 @@
38
39
  # define ZALLOC_N(type,n) RB_ZALLOC_N(type,n)
39
40
  #endif
40
41
 
41
- /*
42
- * COMPAT_HAVE_ENCODING
43
- */
44
- #ifdef HAVE_RUBY_ENCODING_H
45
- # include "ruby/encoding.h"
46
- # define COMPAT_HAVE_ENCODING
47
- #endif
48
-
49
- #if defined(__MACRUBY__) /* MacRuby */
50
- # undef COMPAT_HAVE_ENCODING
51
- #endif
52
-
53
42
 
54
43
  /*
55
44
  * define STR_DUP_LIKELY_DOES_COPY
@@ -4,6 +4,7 @@ have_header("ruby/st.h")
4
4
  have_header("st.h")
5
5
  have_func("rb_str_replace", ["ruby.h"])
6
6
  have_func("rb_intern_str", ["ruby.h"])
7
+ have_func("rb_enc_interned_str", "ruby.h")
7
8
  have_func("rb_sym2str", ["ruby.h"])
8
9
  have_func("rb_str_intern", ["ruby.h"])
9
10
  have_func("rb_block_lambda", ["ruby.h"])
@@ -25,6 +26,44 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
25
26
  $CFLAGS << %[ -DDISABLE_RMEM]
26
27
  end
27
28
 
29
+ # checking if Hash#[]= (rb_hash_aset) dedupes string keys
30
+ h = {}
31
+ x = {}
32
+ r = rand.to_s
33
+ h[%W(#{r}).join('')] = :foo
34
+ x[%W(#{r}).join('')] = :foo
35
+ if x.keys[0].equal?(h.keys[0])
36
+ $CFLAGS << ' -DHASH_ASET_DEDUPE=1 '
37
+ else
38
+ $CFLAGS << ' -DHASH_ASET_DEDUPE=0 '
39
+ end
40
+
41
+
42
+ # checking if String#-@ (str_uminus) dedupes... '
43
+ begin
44
+ a = -(%w(t e s t).join)
45
+ b = -(%w(t e s t).join)
46
+ if a.equal?(b)
47
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE=1 '
48
+ else
49
+ $CFLAGS += ' -DSTR_UMINUS_DEDUPE=0 '
50
+ end
51
+ rescue NoMethodError
52
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE=0 '
53
+ end
54
+
55
+ # checking if String#-@ (str_uminus) directly interns frozen strings... '
56
+ begin
57
+ s = rand.to_s.freeze
58
+ if (-s).equal?(s) && (-s.dup).equal?(s)
59
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=1 '
60
+ else
61
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 '
62
+ end
63
+ rescue NoMethodError
64
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 '
65
+ end
66
+
28
67
  if warnflags = CONFIG['warnflags']
29
68
  warnflags.slice!(/ -Wdeclaration-after-statement/)
30
69
  end
@@ -396,7 +396,6 @@ static inline void msgpack_packer_write_ext(msgpack_packer_t* pk, int ext_type,
396
396
  msgpack_buffer_append_string(PACKER_BUFFER_(pk), payload);
397
397
  }
398
398
 
399
- #ifdef COMPAT_HAVE_ENCODING
400
399
  static inline bool msgpack_packer_is_binary(VALUE v, int encindex)
401
400
  {
402
401
  return encindex == msgpack_rb_encindex_ascii8bit;
@@ -414,7 +413,6 @@ static inline bool msgpack_packer_is_utf8_compat_string(VALUE v, int encindex)
414
413
  #endif
415
414
  ;
416
415
  }
417
- #endif
418
416
 
419
417
  static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE v)
420
418
  {
@@ -425,7 +423,6 @@ static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE
425
423
  rb_raise(rb_eArgError, "size of string is too long to pack: %lu bytes should be <= %lu", len, 0xffffffffUL);
426
424
  }
427
425
 
428
- #ifdef COMPAT_HAVE_ENCODING
429
426
  int encindex = ENCODING_GET(v);
430
427
  if(msgpack_packer_is_binary(v, encindex) && !pk->compatibility_mode) {
431
428
  /* write ASCII-8BIT string using Binary type */
@@ -443,10 +440,6 @@ static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE
443
440
  msgpack_packer_write_raw_header(pk, (unsigned int)len);
444
441
  msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
445
442
  }
446
- #else
447
- msgpack_packer_write_raw_header(pk, (unsigned int)len);
448
- msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
449
- #endif
450
443
  }
451
444
 
452
445
  static inline void msgpack_packer_write_symbol_string_value(msgpack_packer_t* pk, VALUE v)
@@ -142,33 +142,17 @@ static inline void reset_head_byte(msgpack_unpacker_t* uk)
142
142
 
143
143
  static inline int object_complete(msgpack_unpacker_t* uk, VALUE object)
144
144
  {
145
+ if(uk->freeze) {
146
+ rb_obj_freeze(object);
147
+ }
148
+
145
149
  uk->last_object = object;
146
150
  reset_head_byte(uk);
147
151
  return PRIMITIVE_OBJECT_COMPLETE;
148
152
  }
149
153
 
150
- static inline int object_complete_string(msgpack_unpacker_t* uk, VALUE str)
151
- {
152
- #ifdef COMPAT_HAVE_ENCODING
153
- ENCODING_SET(str, msgpack_rb_encindex_utf8);
154
- #endif
155
- return object_complete(uk, str);
156
- }
157
-
158
- static inline int object_complete_binary(msgpack_unpacker_t* uk, VALUE str)
159
- {
160
- #ifdef COMPAT_HAVE_ENCODING
161
- ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);
162
- #endif
163
- return object_complete(uk, str);
164
- }
165
-
166
154
  static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALUE str)
167
155
  {
168
- #ifdef COMPAT_HAVE_ENCODING
169
- ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);
170
- #endif
171
-
172
156
  VALUE proc = msgpack_unpacker_ext_registry_lookup(&uk->ext_registry, ext_type);
173
157
  if(proc != Qnil) {
174
158
  VALUE obj = rb_funcall(proc, s_call, 1, str);
@@ -271,9 +255,10 @@ static int read_raw_body_cont(msgpack_unpacker_t* uk)
271
255
 
272
256
  int ret;
273
257
  if(uk->reading_raw_type == RAW_TYPE_STRING) {
274
- ret = object_complete_string(uk, uk->reading_raw);
275
- } else if(uk->reading_raw_type == RAW_TYPE_BINARY) {
276
- ret = object_complete_binary(uk, uk->reading_raw);
258
+ ENCODING_SET(uk->reading_raw, msgpack_rb_encindex_utf8);
259
+ ret = object_complete(uk, uk->reading_raw);
260
+ } else if (uk->reading_raw_type == RAW_TYPE_BINARY) {
261
+ ret = object_complete(uk, uk->reading_raw);
277
262
  } else {
278
263
  ret = object_complete_ext(uk, uk->reading_raw_type, uk->reading_raw);
279
264
  }
@@ -290,19 +275,20 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type)
290
275
  if(length <= msgpack_buffer_top_readable_size(UNPACKER_BUFFER_(uk))) {
291
276
  /* don't use zerocopy for hash keys but get a frozen string directly
292
277
  * because rb_hash_aset freezes keys and it causes copying */
293
- bool will_freeze = is_reading_map_key(uk);
294
- VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze);
278
+ bool will_freeze = uk->freeze || is_reading_map_key(uk);
279
+ VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze, raw_type == RAW_TYPE_STRING);
295
280
  int ret;
296
- if(raw_type == RAW_TYPE_STRING) {
297
- ret = object_complete_string(uk, string);
298
- } else if(raw_type == RAW_TYPE_BINARY) {
299
- ret = object_complete_binary(uk, string);
281
+ if(raw_type == RAW_TYPE_STRING || raw_type == RAW_TYPE_BINARY) {
282
+ ret = object_complete(uk, string);
300
283
  } else {
301
284
  ret = object_complete_ext(uk, raw_type, string);
302
285
  }
286
+
287
+ # if !HASH_ASET_DEDUPE
303
288
  if(will_freeze) {
304
289
  rb_obj_freeze(string);
305
290
  }
291
+ # endif
306
292
  uk->reading_raw_remaining = 0;
307
293
  return ret;
308
294
  }
@@ -332,7 +318,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
332
318
  SWITCH_RANGE(b, 0xa0, 0xbf) // FixRaw / fixstr
333
319
  int count = b & 0x1f;
334
320
  if(count == 0) {
335
- return object_complete_string(uk, rb_str_buf_new(0));
321
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
336
322
  }
337
323
  /* read_raw_body_begin sets uk->reading_raw */
338
324
  uk->reading_raw_remaining = count;
@@ -517,7 +503,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
517
503
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
518
504
  uint8_t count = cb->u8;
519
505
  if(count == 0) {
520
- return object_complete_string(uk, rb_str_buf_new(0));
506
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
521
507
  }
522
508
  /* read_raw_body_begin sets uk->reading_raw */
523
509
  uk->reading_raw_remaining = count;
@@ -529,7 +515,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
529
515
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
530
516
  uint16_t count = _msgpack_be16(cb->u16);
531
517
  if(count == 0) {
532
- return object_complete_string(uk, rb_str_buf_new(0));
518
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
533
519
  }
534
520
  /* read_raw_body_begin sets uk->reading_raw */
535
521
  uk->reading_raw_remaining = count;
@@ -541,7 +527,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
541
527
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
542
528
  uint32_t count = _msgpack_be32(cb->u32);
543
529
  if(count == 0) {
544
- return object_complete_string(uk, rb_str_buf_new(0));
530
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
545
531
  }
546
532
  /* read_raw_body_begin sets uk->reading_raw */
547
533
  uk->reading_raw_remaining = count;
@@ -553,7 +539,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
553
539
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
554
540
  uint8_t count = cb->u8;
555
541
  if(count == 0) {
556
- return object_complete_binary(uk, rb_str_buf_new(0));
542
+ return object_complete(uk, rb_str_new_static("", 0));
557
543
  }
558
544
  /* read_raw_body_begin sets uk->reading_raw */
559
545
  uk->reading_raw_remaining = count;
@@ -565,7 +551,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
565
551
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
566
552
  uint16_t count = _msgpack_be16(cb->u16);
567
553
  if(count == 0) {
568
- return object_complete_binary(uk, rb_str_buf_new(0));
554
+ return object_complete(uk, rb_str_new_static("", 0));
569
555
  }
570
556
  /* read_raw_body_begin sets uk->reading_raw */
571
557
  uk->reading_raw_remaining = count;
@@ -577,7 +563,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
577
563
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
578
564
  uint32_t count = _msgpack_be32(cb->u32);
579
565
  if(count == 0) {
580
- return object_complete_binary(uk, rb_str_buf_new(0));
566
+ return object_complete(uk, rb_str_new_static("", 0));
581
567
  }
582
568
  /* read_raw_body_begin sets uk->reading_raw */
583
569
  uk->reading_raw_remaining = count;
@@ -64,6 +64,7 @@ struct msgpack_unpacker_t {
64
64
 
65
65
  /* options */
66
66
  bool symbolize_keys;
67
+ bool freeze;
67
68
  bool allow_unknown_ext;
68
69
  };
69
70
 
@@ -96,6 +97,11 @@ static inline void msgpack_unpacker_set_symbolized_keys(msgpack_unpacker_t* uk,
96
97
  uk->symbolize_keys = enable;
97
98
  }
98
99
 
100
+ static inline void msgpack_unpacker_set_freeze(msgpack_unpacker_t* uk, bool enable)
101
+ {
102
+ uk->freeze = enable;
103
+ }
104
+
99
105
  static inline void msgpack_unpacker_set_allow_unknown_ext(msgpack_unpacker_t* uk, bool enable)
100
106
  {
101
107
  uk->allow_unknown_ext = enable;
@@ -105,6 +105,9 @@ VALUE MessagePack_Unpacker_initialize(int argc, VALUE* argv, VALUE self)
105
105
  v = rb_hash_aref(options, ID2SYM(rb_intern("symbolize_keys")));
106
106
  msgpack_unpacker_set_symbolized_keys(uk, RTEST(v));
107
107
 
108
+ v = rb_hash_aref(options, ID2SYM(rb_intern("freeze")));
109
+ msgpack_unpacker_set_freeze(uk, RTEST(v));
110
+
108
111
  v = rb_hash_aref(options, ID2SYM(rb_intern("allow_unknown_ext")));
109
112
  msgpack_unpacker_set_allow_unknown_ext(uk, RTEST(v));
110
113
  }
@@ -118,6 +121,12 @@ static VALUE Unpacker_symbolized_keys_p(VALUE self)
118
121
  return uk->symbolize_keys ? Qtrue : Qfalse;
119
122
  }
120
123
 
124
+ static VALUE Unpacker_freeze_p(VALUE self)
125
+ {
126
+ UNPACKER(self, uk);
127
+ return uk->freeze ? Qtrue : Qfalse;
128
+ }
129
+
121
130
  static VALUE Unpacker_allow_unknown_ext_p(VALUE self)
122
131
  {
123
132
  UNPACKER(self, uk);
@@ -438,6 +447,7 @@ void MessagePack_Unpacker_module_init(VALUE mMessagePack)
438
447
 
439
448
  rb_define_method(cMessagePack_Unpacker, "initialize", MessagePack_Unpacker_initialize, -1);
440
449
  rb_define_method(cMessagePack_Unpacker, "symbolize_keys?", Unpacker_symbolized_keys_p, 0);
450
+ rb_define_method(cMessagePack_Unpacker, "freeze?", Unpacker_freeze_p, 0);
441
451
  rb_define_method(cMessagePack_Unpacker, "allow_unknown_ext?", Unpacker_allow_unknown_ext_p, 0);
442
452
  rb_define_method(cMessagePack_Unpacker, "buffer", Unpacker_buffer, 0);
443
453
  rb_define_method(cMessagePack_Unpacker, "read", Unpacker_read, 0);
@@ -5,11 +5,7 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" # This is same with `/java/ =
5
5
  require "msgpack/msgpack.jar"
6
6
  org.msgpack.jruby.MessagePackLibrary.new.load(JRuby.runtime, false)
7
7
  else
8
- begin
9
- require "msgpack/#{RUBY_VERSION[/\d+.\d+/]}/msgpack"
10
- rescue LoadError
11
- require "msgpack/msgpack"
12
- end
8
+ require "msgpack/msgpack"
13
9
  end
14
10
 
15
11
  require "msgpack/packer"