msgpack 1.3.2 → 1.4.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -23,11 +23,11 @@
23
23
  static ID s_replace;
24
24
  #endif
25
25
 
26
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
27
26
  int msgpack_rb_encindex_utf8;
28
27
  int msgpack_rb_encindex_usascii;
29
28
  int msgpack_rb_encindex_ascii8bit;
30
- #endif
29
+
30
+ ID s_uminus;
31
31
 
32
32
  #ifndef DISABLE_RMEM
33
33
  static msgpack_rmem_t s_rmem;
@@ -35,11 +35,11 @@ static msgpack_rmem_t s_rmem;
35
35
 
36
36
  void msgpack_buffer_static_init()
37
37
  {
38
- #ifdef COMPAT_HAVE_ENCODING
38
+ s_uminus = rb_intern("-@");
39
+
39
40
  msgpack_rb_encindex_utf8 = rb_utf8_encindex();
40
41
  msgpack_rb_encindex_usascii = rb_usascii_encindex();
41
42
  msgpack_rb_encindex_ascii8bit = rb_ascii8bit_encindex();
42
- #endif
43
43
 
44
44
  #ifndef DISABLE_RMEM
45
45
  msgpack_rmem_init(&s_rmem);
@@ -308,9 +308,7 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
308
308
  static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE string)
309
309
  {
310
310
  VALUE mapped_string = rb_str_dup(string);
311
- #ifdef COMPAT_HAVE_ENCODING
312
311
  ENCODING_SET(mapped_string, msgpack_rb_encindex_ascii8bit);
313
- #endif
314
312
 
315
313
  _msgpack_buffer_add_new_chunk(b);
316
314
 
@@ -337,7 +335,6 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
337
335
 
338
336
  if(b->io != Qnil) {
339
337
  msgpack_buffer_flush(b);
340
- #ifdef COMPAT_HAVE_ENCODING
341
338
  if (ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit) {
342
339
  rb_funcall(b->io, b->io_write_all_method, 1, string);
343
340
  } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
@@ -347,10 +344,6 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
347
344
  } else {
348
345
  msgpack_buffer_append(b, RSTRING_PTR(string), length);
349
346
  }
350
- #else
351
- rb_funcall(b->io, b->io_write_all_method, 1, string);
352
- #endif
353
-
354
347
  } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
355
348
  _msgpack_buffer_append_reference(b, string);
356
349
 
@@ -49,11 +49,11 @@
49
49
 
50
50
  #define NO_MAPPED_STRING ((VALUE)0)
51
51
 
52
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
53
52
  extern int msgpack_rb_encindex_utf8;
54
53
  extern int msgpack_rb_encindex_usascii;
55
54
  extern int msgpack_rb_encindex_ascii8bit;
56
- #endif
55
+
56
+ extern ID s_uminus;
57
57
 
58
58
  struct msgpack_buffer_chunk_t;
59
59
  typedef struct msgpack_buffer_chunk_t msgpack_buffer_chunk_t;
@@ -438,7 +438,7 @@ static inline VALUE _msgpack_buffer_refer_head_mapped_string(msgpack_buffer_t* b
438
438
  return rb_str_substr(b->head->mapped_string, offset, length);
439
439
  }
440
440
 
441
- static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen)
441
+ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen, bool utf8)
442
442
  {
443
443
  #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
444
444
  /* optimize */
@@ -446,16 +446,52 @@ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_
446
446
  b->head->mapped_string != NO_MAPPED_STRING &&
447
447
  length >= b->read_reference_threshold) {
448
448
  VALUE result = _msgpack_buffer_refer_head_mapped_string(b, length);
449
+ if (utf8) ENCODING_SET(result, msgpack_rb_encindex_utf8);
449
450
  _msgpack_buffer_consumed(b, length);
450
451
  return result;
451
452
  }
452
453
  #endif
453
454
 
454
- VALUE result = rb_str_new(b->read_buffer, length);
455
+ VALUE result;
456
+
457
+ #ifdef HAVE_RB_ENC_INTERNED_STR
458
+ if (will_be_frozen) {
459
+ result = rb_enc_interned_str(b->read_buffer, length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
460
+ } else {
461
+ if (utf8) {
462
+ result = rb_utf8_str_new(b->read_buffer, length);
463
+ } else {
464
+ result = rb_str_new(b->read_buffer, length);
465
+ }
466
+ }
455
467
  _msgpack_buffer_consumed(b, length);
456
468
  return result;
457
- }
458
469
 
470
+ #else
459
471
 
460
- #endif
472
+ if (utf8) {
473
+ result = rb_utf8_str_new(b->read_buffer, length);
474
+ } else {
475
+ result = rb_str_new(b->read_buffer, length);
476
+ }
477
+
478
+ #if STR_UMINUS_DEDUPE
479
+ if (will_be_frozen) {
480
+ #if STR_UMINUS_DEDUPE_FROZEN
481
+ // Starting from MRI 2.8 it is preferable to freeze the string
482
+ // before deduplication so that it can be interned directly
483
+ // otherwise it would be duplicated first which is wasteful.
484
+ rb_str_freeze(result);
485
+ #endif //STR_UMINUS_DEDUPE_FROZEN
486
+ // MRI 2.5 and older do not deduplicate strings that are already
487
+ // frozen.
488
+ result = rb_funcall(result, s_uminus, 0);
489
+ }
490
+ #endif // STR_UMINUS_DEDUPE
491
+ _msgpack_buffer_consumed(b, length);
492
+ return result;
493
+
494
+ #endif // HAVE_RB_ENC_INTERNED_STR
495
+ }
461
496
 
497
+ #endif
@@ -20,6 +20,7 @@
20
20
 
21
21
  #include <stdbool.h>
22
22
  #include "ruby.h"
23
+ #include "ruby/encoding.h"
23
24
 
24
25
  #if defined(HAVE_RUBY_ST_H)
25
26
  # include "ruby/st.h" /* ruby hash on Ruby 1.9 */
@@ -38,18 +39,6 @@
38
39
  # define ZALLOC_N(type,n) RB_ZALLOC_N(type,n)
39
40
  #endif
40
41
 
41
- /*
42
- * COMPAT_HAVE_ENCODING
43
- */
44
- #ifdef HAVE_RUBY_ENCODING_H
45
- # include "ruby/encoding.h"
46
- # define COMPAT_HAVE_ENCODING
47
- #endif
48
-
49
- #if defined(__MACRUBY__) /* MacRuby */
50
- # undef COMPAT_HAVE_ENCODING
51
- #endif
52
-
53
42
 
54
43
  /*
55
44
  * define STR_DUP_LIKELY_DOES_COPY
@@ -4,6 +4,7 @@ have_header("ruby/st.h")
4
4
  have_header("st.h")
5
5
  have_func("rb_str_replace", ["ruby.h"])
6
6
  have_func("rb_intern_str", ["ruby.h"])
7
+ have_func("rb_enc_interned_str", "ruby.h")
7
8
  have_func("rb_sym2str", ["ruby.h"])
8
9
  have_func("rb_str_intern", ["ruby.h"])
9
10
  have_func("rb_block_lambda", ["ruby.h"])
@@ -25,6 +26,44 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'rbx'
25
26
  $CFLAGS << %[ -DDISABLE_RMEM]
26
27
  end
27
28
 
29
+ # checking if Hash#[]= (rb_hash_aset) dedupes string keys
30
+ h = {}
31
+ x = {}
32
+ r = rand.to_s
33
+ h[%W(#{r}).join('')] = :foo
34
+ x[%W(#{r}).join('')] = :foo
35
+ if x.keys[0].equal?(h.keys[0])
36
+ $CFLAGS << ' -DHASH_ASET_DEDUPE=1 '
37
+ else
38
+ $CFLAGS << ' -DHASH_ASET_DEDUPE=0 '
39
+ end
40
+
41
+
42
+ # checking if String#-@ (str_uminus) dedupes... '
43
+ begin
44
+ a = -(%w(t e s t).join)
45
+ b = -(%w(t e s t).join)
46
+ if a.equal?(b)
47
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE=1 '
48
+ else
49
+ $CFLAGS += ' -DSTR_UMINUS_DEDUPE=0 '
50
+ end
51
+ rescue NoMethodError
52
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE=0 '
53
+ end
54
+
55
+ # checking if String#-@ (str_uminus) directly interns frozen strings... '
56
+ begin
57
+ s = rand.to_s.freeze
58
+ if (-s).equal?(s) && (-s.dup).equal?(s)
59
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=1 '
60
+ else
61
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 '
62
+ end
63
+ rescue NoMethodError
64
+ $CFLAGS << ' -DSTR_UMINUS_DEDUPE_FROZEN=0 '
65
+ end
66
+
28
67
  if warnflags = CONFIG['warnflags']
29
68
  warnflags.slice!(/ -Wdeclaration-after-statement/)
30
69
  end
@@ -396,7 +396,6 @@ static inline void msgpack_packer_write_ext(msgpack_packer_t* pk, int ext_type,
396
396
  msgpack_buffer_append_string(PACKER_BUFFER_(pk), payload);
397
397
  }
398
398
 
399
- #ifdef COMPAT_HAVE_ENCODING
400
399
  static inline bool msgpack_packer_is_binary(VALUE v, int encindex)
401
400
  {
402
401
  return encindex == msgpack_rb_encindex_ascii8bit;
@@ -414,7 +413,6 @@ static inline bool msgpack_packer_is_utf8_compat_string(VALUE v, int encindex)
414
413
  #endif
415
414
  ;
416
415
  }
417
- #endif
418
416
 
419
417
  static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE v)
420
418
  {
@@ -425,7 +423,6 @@ static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE
425
423
  rb_raise(rb_eArgError, "size of string is too long to pack: %lu bytes should be <= %lu", len, 0xffffffffUL);
426
424
  }
427
425
 
428
- #ifdef COMPAT_HAVE_ENCODING
429
426
  int encindex = ENCODING_GET(v);
430
427
  if(msgpack_packer_is_binary(v, encindex) && !pk->compatibility_mode) {
431
428
  /* write ASCII-8BIT string using Binary type */
@@ -443,10 +440,6 @@ static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE
443
440
  msgpack_packer_write_raw_header(pk, (unsigned int)len);
444
441
  msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
445
442
  }
446
- #else
447
- msgpack_packer_write_raw_header(pk, (unsigned int)len);
448
- msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
449
- #endif
450
443
  }
451
444
 
452
445
  static inline void msgpack_packer_write_symbol_string_value(msgpack_packer_t* pk, VALUE v)
@@ -142,33 +142,17 @@ static inline void reset_head_byte(msgpack_unpacker_t* uk)
142
142
 
143
143
  static inline int object_complete(msgpack_unpacker_t* uk, VALUE object)
144
144
  {
145
+ if(uk->freeze) {
146
+ rb_obj_freeze(object);
147
+ }
148
+
145
149
  uk->last_object = object;
146
150
  reset_head_byte(uk);
147
151
  return PRIMITIVE_OBJECT_COMPLETE;
148
152
  }
149
153
 
150
- static inline int object_complete_string(msgpack_unpacker_t* uk, VALUE str)
151
- {
152
- #ifdef COMPAT_HAVE_ENCODING
153
- ENCODING_SET(str, msgpack_rb_encindex_utf8);
154
- #endif
155
- return object_complete(uk, str);
156
- }
157
-
158
- static inline int object_complete_binary(msgpack_unpacker_t* uk, VALUE str)
159
- {
160
- #ifdef COMPAT_HAVE_ENCODING
161
- ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);
162
- #endif
163
- return object_complete(uk, str);
164
- }
165
-
166
154
  static inline int object_complete_ext(msgpack_unpacker_t* uk, int ext_type, VALUE str)
167
155
  {
168
- #ifdef COMPAT_HAVE_ENCODING
169
- ENCODING_SET(str, msgpack_rb_encindex_ascii8bit);
170
- #endif
171
-
172
156
  VALUE proc = msgpack_unpacker_ext_registry_lookup(&uk->ext_registry, ext_type);
173
157
  if(proc != Qnil) {
174
158
  VALUE obj = rb_funcall(proc, s_call, 1, str);
@@ -271,9 +255,10 @@ static int read_raw_body_cont(msgpack_unpacker_t* uk)
271
255
 
272
256
  int ret;
273
257
  if(uk->reading_raw_type == RAW_TYPE_STRING) {
274
- ret = object_complete_string(uk, uk->reading_raw);
275
- } else if(uk->reading_raw_type == RAW_TYPE_BINARY) {
276
- ret = object_complete_binary(uk, uk->reading_raw);
258
+ ENCODING_SET(uk->reading_raw, msgpack_rb_encindex_utf8);
259
+ ret = object_complete(uk, uk->reading_raw);
260
+ } else if (uk->reading_raw_type == RAW_TYPE_BINARY) {
261
+ ret = object_complete(uk, uk->reading_raw);
277
262
  } else {
278
263
  ret = object_complete_ext(uk, uk->reading_raw_type, uk->reading_raw);
279
264
  }
@@ -290,19 +275,20 @@ static inline int read_raw_body_begin(msgpack_unpacker_t* uk, int raw_type)
290
275
  if(length <= msgpack_buffer_top_readable_size(UNPACKER_BUFFER_(uk))) {
291
276
  /* don't use zerocopy for hash keys but get a frozen string directly
292
277
  * because rb_hash_aset freezes keys and it causes copying */
293
- bool will_freeze = is_reading_map_key(uk);
294
- VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze);
278
+ bool will_freeze = uk->freeze || is_reading_map_key(uk);
279
+ VALUE string = msgpack_buffer_read_top_as_string(UNPACKER_BUFFER_(uk), length, will_freeze, raw_type == RAW_TYPE_STRING);
295
280
  int ret;
296
- if(raw_type == RAW_TYPE_STRING) {
297
- ret = object_complete_string(uk, string);
298
- } else if(raw_type == RAW_TYPE_BINARY) {
299
- ret = object_complete_binary(uk, string);
281
+ if(raw_type == RAW_TYPE_STRING || raw_type == RAW_TYPE_BINARY) {
282
+ ret = object_complete(uk, string);
300
283
  } else {
301
284
  ret = object_complete_ext(uk, raw_type, string);
302
285
  }
286
+
287
+ # if !HASH_ASET_DEDUPE
303
288
  if(will_freeze) {
304
289
  rb_obj_freeze(string);
305
290
  }
291
+ # endif
306
292
  uk->reading_raw_remaining = 0;
307
293
  return ret;
308
294
  }
@@ -332,7 +318,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
332
318
  SWITCH_RANGE(b, 0xa0, 0xbf) // FixRaw / fixstr
333
319
  int count = b & 0x1f;
334
320
  if(count == 0) {
335
- return object_complete_string(uk, rb_str_buf_new(0));
321
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
336
322
  }
337
323
  /* read_raw_body_begin sets uk->reading_raw */
338
324
  uk->reading_raw_remaining = count;
@@ -517,7 +503,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
517
503
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
518
504
  uint8_t count = cb->u8;
519
505
  if(count == 0) {
520
- return object_complete_string(uk, rb_str_buf_new(0));
506
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
521
507
  }
522
508
  /* read_raw_body_begin sets uk->reading_raw */
523
509
  uk->reading_raw_remaining = count;
@@ -529,7 +515,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
529
515
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
530
516
  uint16_t count = _msgpack_be16(cb->u16);
531
517
  if(count == 0) {
532
- return object_complete_string(uk, rb_str_buf_new(0));
518
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
533
519
  }
534
520
  /* read_raw_body_begin sets uk->reading_raw */
535
521
  uk->reading_raw_remaining = count;
@@ -541,7 +527,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
541
527
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
542
528
  uint32_t count = _msgpack_be32(cb->u32);
543
529
  if(count == 0) {
544
- return object_complete_string(uk, rb_str_buf_new(0));
530
+ return object_complete(uk, rb_utf8_str_new_static("", 0));
545
531
  }
546
532
  /* read_raw_body_begin sets uk->reading_raw */
547
533
  uk->reading_raw_remaining = count;
@@ -553,7 +539,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
553
539
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 1);
554
540
  uint8_t count = cb->u8;
555
541
  if(count == 0) {
556
- return object_complete_binary(uk, rb_str_buf_new(0));
542
+ return object_complete(uk, rb_str_new_static("", 0));
557
543
  }
558
544
  /* read_raw_body_begin sets uk->reading_raw */
559
545
  uk->reading_raw_remaining = count;
@@ -565,7 +551,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
565
551
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 2);
566
552
  uint16_t count = _msgpack_be16(cb->u16);
567
553
  if(count == 0) {
568
- return object_complete_binary(uk, rb_str_buf_new(0));
554
+ return object_complete(uk, rb_str_new_static("", 0));
569
555
  }
570
556
  /* read_raw_body_begin sets uk->reading_raw */
571
557
  uk->reading_raw_remaining = count;
@@ -577,7 +563,7 @@ static int read_primitive(msgpack_unpacker_t* uk)
577
563
  READ_CAST_BLOCK_OR_RETURN_EOF(cb, uk, 4);
578
564
  uint32_t count = _msgpack_be32(cb->u32);
579
565
  if(count == 0) {
580
- return object_complete_binary(uk, rb_str_buf_new(0));
566
+ return object_complete(uk, rb_str_new_static("", 0));
581
567
  }
582
568
  /* read_raw_body_begin sets uk->reading_raw */
583
569
  uk->reading_raw_remaining = count;
@@ -64,6 +64,7 @@ struct msgpack_unpacker_t {
64
64
 
65
65
  /* options */
66
66
  bool symbolize_keys;
67
+ bool freeze;
67
68
  bool allow_unknown_ext;
68
69
  };
69
70
 
@@ -96,6 +97,11 @@ static inline void msgpack_unpacker_set_symbolized_keys(msgpack_unpacker_t* uk,
96
97
  uk->symbolize_keys = enable;
97
98
  }
98
99
 
100
+ static inline void msgpack_unpacker_set_freeze(msgpack_unpacker_t* uk, bool enable)
101
+ {
102
+ uk->freeze = enable;
103
+ }
104
+
99
105
  static inline void msgpack_unpacker_set_allow_unknown_ext(msgpack_unpacker_t* uk, bool enable)
100
106
  {
101
107
  uk->allow_unknown_ext = enable;
@@ -105,6 +105,9 @@ VALUE MessagePack_Unpacker_initialize(int argc, VALUE* argv, VALUE self)
105
105
  v = rb_hash_aref(options, ID2SYM(rb_intern("symbolize_keys")));
106
106
  msgpack_unpacker_set_symbolized_keys(uk, RTEST(v));
107
107
 
108
+ v = rb_hash_aref(options, ID2SYM(rb_intern("freeze")));
109
+ msgpack_unpacker_set_freeze(uk, RTEST(v));
110
+
108
111
  v = rb_hash_aref(options, ID2SYM(rb_intern("allow_unknown_ext")));
109
112
  msgpack_unpacker_set_allow_unknown_ext(uk, RTEST(v));
110
113
  }
@@ -118,6 +121,12 @@ static VALUE Unpacker_symbolized_keys_p(VALUE self)
118
121
  return uk->symbolize_keys ? Qtrue : Qfalse;
119
122
  }
120
123
 
124
+ static VALUE Unpacker_freeze_p(VALUE self)
125
+ {
126
+ UNPACKER(self, uk);
127
+ return uk->freeze ? Qtrue : Qfalse;
128
+ }
129
+
121
130
  static VALUE Unpacker_allow_unknown_ext_p(VALUE self)
122
131
  {
123
132
  UNPACKER(self, uk);
@@ -438,6 +447,7 @@ void MessagePack_Unpacker_module_init(VALUE mMessagePack)
438
447
 
439
448
  rb_define_method(cMessagePack_Unpacker, "initialize", MessagePack_Unpacker_initialize, -1);
440
449
  rb_define_method(cMessagePack_Unpacker, "symbolize_keys?", Unpacker_symbolized_keys_p, 0);
450
+ rb_define_method(cMessagePack_Unpacker, "freeze?", Unpacker_freeze_p, 0);
441
451
  rb_define_method(cMessagePack_Unpacker, "allow_unknown_ext?", Unpacker_allow_unknown_ext_p, 0);
442
452
  rb_define_method(cMessagePack_Unpacker, "buffer", Unpacker_buffer, 0);
443
453
  rb_define_method(cMessagePack_Unpacker, "read", Unpacker_read, 0);
@@ -5,11 +5,7 @@ if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby" # This is same with `/java/ =
5
5
  require "msgpack/msgpack.jar"
6
6
  org.msgpack.jruby.MessagePackLibrary.new.load(JRuby.runtime, false)
7
7
  else
8
- begin
9
- require "msgpack/#{RUBY_VERSION[/\d+.\d+/]}/msgpack"
10
- rescue LoadError
11
- require "msgpack/msgpack"
12
- end
8
+ require "msgpack/msgpack"
13
9
  end
14
10
 
15
11
  require "msgpack/packer"