msgpack 1.3.3 → 1.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yaml +57 -0
  3. data/.rubocop.yml +2 -2
  4. data/ChangeLog +74 -0
  5. data/Gemfile +1 -1
  6. data/README.md +266 -0
  7. data/Rakefile +1 -9
  8. data/bench/bench.rb +78 -0
  9. data/bin/console +8 -0
  10. data/doclib/msgpack/factory.rb +47 -3
  11. data/doclib/msgpack/packer.rb +5 -4
  12. data/doclib/msgpack/unpacker.rb +2 -2
  13. data/ext/java/org/msgpack/jruby/Buffer.java +23 -16
  14. data/ext/java/org/msgpack/jruby/Decoder.java +46 -23
  15. data/ext/java/org/msgpack/jruby/Encoder.java +68 -30
  16. data/ext/java/org/msgpack/jruby/ExtensionRegistry.java +37 -49
  17. data/ext/java/org/msgpack/jruby/ExtensionValue.java +5 -8
  18. data/ext/java/org/msgpack/jruby/Factory.java +47 -7
  19. data/ext/java/org/msgpack/jruby/Packer.java +29 -17
  20. data/ext/java/org/msgpack/jruby/Unpacker.java +72 -37
  21. data/ext/msgpack/buffer.c +42 -68
  22. data/ext/msgpack/buffer.h +59 -14
  23. data/ext/msgpack/buffer_class.c +90 -52
  24. data/ext/msgpack/compat.h +1 -111
  25. data/ext/msgpack/extconf.rb +45 -19
  26. data/ext/msgpack/factory_class.c +133 -43
  27. data/ext/msgpack/packer.c +60 -36
  28. data/ext/msgpack/packer.h +27 -25
  29. data/ext/msgpack/packer_class.c +84 -77
  30. data/ext/msgpack/packer_class.h +11 -0
  31. data/ext/msgpack/packer_ext_registry.c +24 -32
  32. data/ext/msgpack/packer_ext_registry.h +40 -33
  33. data/ext/msgpack/sysdep.h +5 -2
  34. data/ext/msgpack/unpacker.c +132 -115
  35. data/ext/msgpack/unpacker.h +23 -10
  36. data/ext/msgpack/unpacker_class.c +83 -78
  37. data/ext/msgpack/unpacker_class.h +11 -0
  38. data/ext/msgpack/unpacker_ext_registry.c +42 -18
  39. data/ext/msgpack/unpacker_ext_registry.h +23 -16
  40. data/lib/msgpack/bigint.rb +69 -0
  41. data/lib/msgpack/factory.rb +103 -0
  42. data/lib/msgpack/symbol.rb +21 -4
  43. data/lib/msgpack/time.rb +1 -1
  44. data/lib/msgpack/version.rb +4 -8
  45. data/lib/msgpack.rb +6 -12
  46. data/msgpack.gemspec +4 -6
  47. data/spec/bigint_spec.rb +26 -0
  48. data/spec/cruby/buffer_spec.rb +17 -0
  49. data/spec/factory_spec.rb +351 -12
  50. data/spec/msgpack_spec.rb +1 -1
  51. data/spec/packer_spec.rb +18 -0
  52. data/spec/spec_helper.rb +37 -3
  53. data/spec/timestamp_spec.rb +38 -0
  54. data/spec/unpacker_spec.rb +157 -4
  55. metadata +31 -61
  56. data/.travis.yml +0 -43
  57. data/README.rdoc +0 -225
  58. data/bench/pack.rb +0 -23
  59. data/bench/pack_log.rb +0 -33
  60. data/bench/pack_log_long.rb +0 -65
  61. data/bench/pack_symbols.rb +0 -28
  62. data/bench/run.sh +0 -14
  63. data/bench/run_long.sh +0 -35
  64. data/bench/run_symbols.sh +0 -26
  65. data/bench/unpack.rb +0 -21
  66. data/bench/unpack_log.rb +0 -34
  67. data/bench/unpack_log_long.rb +0 -67
data/ext/msgpack/buffer.c CHANGED
@@ -23,38 +23,32 @@
23
23
  static ID s_replace;
24
24
  #endif
25
25
 
26
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
27
26
  int msgpack_rb_encindex_utf8;
28
27
  int msgpack_rb_encindex_usascii;
29
28
  int msgpack_rb_encindex_ascii8bit;
30
- #endif
31
29
 
32
- #ifndef DISABLE_RMEM
30
+ ID s_uminus;
31
+
33
32
  static msgpack_rmem_t s_rmem;
34
- #endif
35
33
 
36
- void msgpack_buffer_static_init()
34
+ void msgpack_buffer_static_init(void)
37
35
  {
38
- #ifdef COMPAT_HAVE_ENCODING
36
+ s_uminus = rb_intern("-@");
37
+
39
38
  msgpack_rb_encindex_utf8 = rb_utf8_encindex();
40
39
  msgpack_rb_encindex_usascii = rb_usascii_encindex();
41
40
  msgpack_rb_encindex_ascii8bit = rb_ascii8bit_encindex();
42
- #endif
43
41
 
44
- #ifndef DISABLE_RMEM
45
42
  msgpack_rmem_init(&s_rmem);
46
- #endif
47
43
 
48
44
  #ifndef HAVE_RB_STR_REPLACE
49
45
  s_replace = rb_intern("replace");
50
46
  #endif
51
47
  }
52
48
 
53
- void msgpack_buffer_static_destroy()
49
+ void msgpack_buffer_static_destroy(void)
54
50
  {
55
- #ifndef DISABLE_RMEM
56
51
  msgpack_rmem_destroy(&s_rmem);
57
- #endif
58
52
  }
59
53
 
60
54
  void msgpack_buffer_init(msgpack_buffer_t* b)
@@ -72,16 +66,12 @@ void msgpack_buffer_init(msgpack_buffer_t* b)
72
66
  static void _msgpack_buffer_chunk_destroy(msgpack_buffer_chunk_t* c)
73
67
  {
74
68
  if(c->mem != NULL) {
75
- #ifndef DISABLE_RMEM
76
69
  if(!msgpack_rmem_free(&s_rmem, c->mem)) {
77
70
  xfree(c->mem);
78
71
  }
79
72
  /* no needs to update rmem_owner because chunks will not be
80
73
  * free()ed (left in free_list) and thus *rmem_owner is
81
74
  * always valid. */
82
- #else
83
- xfree(c->mem);
84
- #endif
85
75
  }
86
76
  c->first = NULL;
87
77
  c->last = NULL;
@@ -108,8 +98,25 @@ void msgpack_buffer_destroy(msgpack_buffer_t* b)
108
98
  }
109
99
  }
110
100
 
111
- void msgpack_buffer_mark(msgpack_buffer_t* b)
101
+ size_t msgpack_buffer_memsize(const msgpack_buffer_t* b)
112
102
  {
103
+ size_t memsize = 0;
104
+ msgpack_buffer_chunk_t* c = b->head;
105
+
106
+ while(c) {
107
+ memsize += sizeof(msgpack_buffer_chunk_t);
108
+ if(c->mapped_string != NO_MAPPED_STRING) {
109
+ memsize += (c->last - c->first);
110
+ }
111
+ c = c->next;
112
+ }
113
+
114
+ return memsize;
115
+ }
116
+
117
+ void msgpack_buffer_mark(void *ptr)
118
+ {
119
+ msgpack_buffer_t* b = ptr;
113
120
  /* head is always available */
114
121
  msgpack_buffer_chunk_t* c = b->head;
115
122
  while(c != &b->tail) {
@@ -120,8 +127,6 @@ void msgpack_buffer_mark(msgpack_buffer_t* b)
120
127
 
121
128
  rb_gc_mark(b->io);
122
129
  rb_gc_mark(b->io_buffer);
123
-
124
- rb_gc_mark(b->owner);
125
130
  }
126
131
 
127
132
  bool _msgpack_buffer_shift_chunk(msgpack_buffer_t* b)
@@ -158,24 +163,17 @@ size_t msgpack_buffer_read_to_string_nonblock(msgpack_buffer_t* b, VALUE string,
158
163
  {
159
164
  size_t avail = msgpack_buffer_top_readable_size(b);
160
165
 
161
- #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
162
166
  /* optimize */
163
167
  if(length <= avail && RSTRING_LEN(string) == 0 &&
164
168
  b->head->mapped_string != NO_MAPPED_STRING &&
165
169
  length >= b->read_reference_threshold) {
166
170
  VALUE s = _msgpack_buffer_refer_head_mapped_string(b, length);
167
- #ifndef HAVE_RB_STR_REPLACE
168
- /* TODO MRI 1.8 */
169
- rb_funcall(string, s_replace, 1, s);
170
- #else
171
171
  rb_str_replace(string, s);
172
- #endif
173
172
  /* here doesn't have to call ENCODING_SET because
174
173
  * encoding of s is always ASCII-8BIT */
175
174
  _msgpack_buffer_consumed(b, length);
176
175
  return length;
177
176
  }
178
- #endif
179
177
 
180
178
  size_t const length_orig = length;
181
179
 
@@ -288,15 +286,11 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
288
286
 
289
287
  msgpack_buffer_chunk_t* nc = _msgpack_buffer_alloc_new_chunk(b);
290
288
 
291
- #ifndef DISABLE_RMEM
292
- #ifndef DISABLE_RMEM_REUSE_INTERNAL_FRAGMENT
293
289
  if(b->rmem_last == b->tail_buffer_end) {
294
290
  /* reuse unused rmem space */
295
291
  size_t unused = b->tail_buffer_end - b->tail.last;
296
292
  b->rmem_last -= unused;
297
293
  }
298
- #endif
299
- #endif
300
294
 
301
295
  /* rebuild tail */
302
296
  *nc = b->tail;
@@ -307,10 +301,13 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
307
301
 
308
302
  static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE string)
309
303
  {
310
- VALUE mapped_string = rb_str_dup(string);
311
- #ifdef COMPAT_HAVE_ENCODING
312
- ENCODING_SET(mapped_string, msgpack_rb_encindex_ascii8bit);
313
- #endif
304
+ VALUE mapped_string;
305
+ if(ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit && RTEST(rb_obj_frozen_p(string))) {
306
+ mapped_string = string;
307
+ } else {
308
+ mapped_string = rb_str_dup(string);
309
+ ENCODING_SET(mapped_string, msgpack_rb_encindex_ascii8bit);
310
+ }
314
311
 
315
312
  _msgpack_buffer_add_new_chunk(b);
316
313
 
@@ -337,25 +334,13 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
337
334
 
338
335
  if(b->io != Qnil) {
339
336
  msgpack_buffer_flush(b);
340
- #ifdef COMPAT_HAVE_ENCODING
341
337
  if (ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit) {
342
338
  rb_funcall(b->io, b->io_write_all_method, 1, string);
343
- } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
344
- VALUE s = rb_str_dup(string);
345
- ENCODING_SET(s, msgpack_rb_encindex_ascii8bit);
346
- rb_funcall(b->io, b->io_write_all_method, 1, s);
347
339
  } else {
348
340
  msgpack_buffer_append(b, RSTRING_PTR(string), length);
349
341
  }
350
- #else
351
- rb_funcall(b->io, b->io_write_all_method, 1, string);
352
- #endif
353
-
354
- } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
355
- _msgpack_buffer_append_reference(b, string);
356
-
357
342
  } else {
358
- msgpack_buffer_append(b, RSTRING_PTR(string), length);
343
+ _msgpack_buffer_append_reference(b, string);
359
344
  }
360
345
  }
361
346
 
@@ -363,11 +348,8 @@ static inline void* _msgpack_buffer_chunk_malloc(
363
348
  msgpack_buffer_t* b, msgpack_buffer_chunk_t* c,
364
349
  size_t required_size, size_t* allocated_size)
365
350
  {
366
- #ifndef DISABLE_RMEM
367
351
  if(required_size <= MSGPACK_RMEM_PAGE_SIZE) {
368
- #ifndef DISABLE_RMEM_REUSE_INTERNAL_FRAGMENT
369
352
  if((size_t)(b->rmem_end - b->rmem_last) < required_size) {
370
- #endif
371
353
  /* alloc new rmem page */
372
354
  *allocated_size = MSGPACK_RMEM_PAGE_SIZE;
373
355
  char* buffer = msgpack_rmem_alloc(&s_rmem);
@@ -378,8 +360,6 @@ static inline void* _msgpack_buffer_chunk_malloc(
378
360
  b->rmem_last = b->rmem_end = buffer + MSGPACK_RMEM_PAGE_SIZE;
379
361
 
380
362
  return buffer;
381
-
382
- #ifndef DISABLE_RMEM_REUSE_INTERNAL_FRAGMENT
383
363
  } else {
384
364
  /* reuse unused rmem */
385
365
  *allocated_size = (size_t)(b->rmem_end - b->rmem_last);
@@ -393,13 +373,7 @@ static inline void* _msgpack_buffer_chunk_malloc(
393
373
 
394
374
  return buffer;
395
375
  }
396
- #endif
397
- }
398
- #else
399
- if(required_size < 72) {
400
- required_size = 72;
401
376
  }
402
- #endif
403
377
 
404
378
  // TODO alignment?
405
379
  *allocated_size = required_size;
@@ -454,11 +428,7 @@ void _msgpack_buffer_expand(msgpack_buffer_t* b, const char* data, size_t length
454
428
  size_t capacity = b->tail.last - b->tail.first;
455
429
 
456
430
  /* can't realloc mapped chunk or rmem page */
457
- if(b->tail.mapped_string != NO_MAPPED_STRING
458
- #ifndef DISABLE_RMEM
459
- || capacity <= MSGPACK_RMEM_PAGE_SIZE
460
- #endif
461
- ) {
431
+ if(b->tail.mapped_string != NO_MAPPED_STRING || capacity <= MSGPACK_RMEM_PAGE_SIZE) {
462
432
  /* allocate new chunk */
463
433
  _msgpack_buffer_add_new_chunk(b);
464
434
 
@@ -631,13 +601,13 @@ size_t msgpack_buffer_flush_to_io(msgpack_buffer_t* b, VALUE io, ID write_method
631
601
  size_t _msgpack_buffer_feed_from_io(msgpack_buffer_t* b)
632
602
  {
633
603
  if(b->io_buffer == Qnil) {
634
- b->io_buffer = rb_funcall(b->io, b->io_partial_read_method, 1, LONG2NUM(b->io_buffer_size));
604
+ b->io_buffer = rb_funcall(b->io, b->io_partial_read_method, 1, SIZET2NUM(b->io_buffer_size));
635
605
  if(b->io_buffer == Qnil) {
636
606
  rb_raise(rb_eEOFError, "IO reached end of file");
637
607
  }
638
608
  StringValue(b->io_buffer);
639
609
  } else {
640
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(b->io_buffer_size), b->io_buffer);
610
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(b->io_buffer_size), b->io_buffer);
641
611
  if(ret == Qnil) {
642
612
  rb_raise(rb_eEOFError, "IO reached end of file");
643
613
  }
@@ -656,9 +626,11 @@ size_t _msgpack_buffer_feed_from_io(msgpack_buffer_t* b)
656
626
 
657
627
  size_t _msgpack_buffer_read_from_io_to_string(msgpack_buffer_t* b, VALUE string, size_t length)
658
628
  {
629
+ #define MIN(x, y) (((x) < (y)) ? (x) : (y))
630
+
659
631
  if(RSTRING_LEN(string) == 0) {
660
632
  /* direct read */
661
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), string);
633
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(MIN(b->io_buffer_size, length)), string);
662
634
  if(ret == Qnil) {
663
635
  return 0;
664
636
  }
@@ -670,7 +642,7 @@ size_t _msgpack_buffer_read_from_io_to_string(msgpack_buffer_t* b, VALUE string,
670
642
  b->io_buffer = rb_str_buf_new(0);
671
643
  }
672
644
 
673
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), b->io_buffer);
645
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(MIN(b->io_buffer_size, length)), b->io_buffer);
674
646
  if(ret == Qnil) {
675
647
  return 0;
676
648
  }
@@ -678,6 +650,8 @@ size_t _msgpack_buffer_read_from_io_to_string(msgpack_buffer_t* b, VALUE string,
678
650
 
679
651
  rb_str_buf_cat(string, (const void*)RSTRING_PTR(b->io_buffer), rl);
680
652
  return rl;
653
+
654
+ #undef MIN
681
655
  }
682
656
 
683
657
  size_t _msgpack_buffer_skip_from_io(msgpack_buffer_t* b, size_t length)
@@ -686,7 +660,7 @@ size_t _msgpack_buffer_skip_from_io(msgpack_buffer_t* b, size_t length)
686
660
  b->io_buffer = rb_str_buf_new(0);
687
661
  }
688
662
 
689
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), b->io_buffer);
663
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(length), b->io_buffer);
690
664
  if(ret == Qnil) {
691
665
  return 0;
692
666
  }
data/ext/msgpack/buffer.h CHANGED
@@ -49,11 +49,15 @@
49
49
 
50
50
  #define NO_MAPPED_STRING ((VALUE)0)
51
51
 
52
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
52
+ #ifndef RB_ENC_INTERNED_STR_NULL_CHECK
53
+ #define RB_ENC_INTERNED_STR_NULL_CHECK 0
54
+ #endif
55
+
53
56
  extern int msgpack_rb_encindex_utf8;
54
57
  extern int msgpack_rb_encindex_usascii;
55
58
  extern int msgpack_rb_encindex_ascii8bit;
56
- #endif
59
+
60
+ extern ID s_uminus;
57
61
 
58
62
  struct msgpack_buffer_chunk_t;
59
63
  typedef struct msgpack_buffer_chunk_t msgpack_buffer_chunk_t;
@@ -98,11 +102,9 @@ struct msgpack_buffer_t {
98
102
  msgpack_buffer_chunk_t* head;
99
103
  msgpack_buffer_chunk_t* free_list;
100
104
 
101
- #ifndef DISABLE_RMEM
102
105
  char* rmem_last;
103
106
  char* rmem_end;
104
107
  void** rmem_owner;
105
- #endif
106
108
 
107
109
  union msgpack_buffer_cast_block_t cast_block;
108
110
 
@@ -114,25 +116,25 @@ struct msgpack_buffer_t {
114
116
  size_t write_reference_threshold;
115
117
  size_t read_reference_threshold;
116
118
  size_t io_buffer_size;
117
-
118
- VALUE owner;
119
119
  };
120
120
 
121
121
  /*
122
122
  * initialization functions
123
123
  */
124
- void msgpack_buffer_static_init();
124
+ void msgpack_buffer_static_init(void);
125
125
 
126
- void msgpack_buffer_static_destroy();
126
+ void msgpack_buffer_static_destroy(void);
127
127
 
128
128
  void msgpack_buffer_init(msgpack_buffer_t* b);
129
129
 
130
130
  void msgpack_buffer_destroy(msgpack_buffer_t* b);
131
131
 
132
- void msgpack_buffer_mark(msgpack_buffer_t* b);
132
+ void msgpack_buffer_mark(void* b);
133
133
 
134
134
  void msgpack_buffer_clear(msgpack_buffer_t* b);
135
135
 
136
+ size_t msgpack_buffer_memsize(const msgpack_buffer_t* b);
137
+
136
138
  static inline void msgpack_buffer_set_write_reference_threshold(msgpack_buffer_t* b, size_t length)
137
139
  {
138
140
  if(length < MSGPACK_BUFFER_STRING_WRITE_REFERENCE_MINIMUM) {
@@ -438,24 +440,67 @@ static inline VALUE _msgpack_buffer_refer_head_mapped_string(msgpack_buffer_t* b
438
440
  return rb_str_substr(b->head->mapped_string, offset, length);
439
441
  }
440
442
 
441
- static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen)
443
+ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen, bool utf8)
442
444
  {
443
- #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
444
445
  /* optimize */
445
446
  if(!will_be_frozen &&
446
447
  b->head->mapped_string != NO_MAPPED_STRING &&
447
448
  length >= b->read_reference_threshold) {
448
449
  VALUE result = _msgpack_buffer_refer_head_mapped_string(b, length);
450
+ if (utf8) ENCODING_SET(result, msgpack_rb_encindex_utf8);
449
451
  _msgpack_buffer_consumed(b, length);
450
452
  return result;
451
453
  }
452
- #endif
453
454
 
454
- VALUE result = rb_str_new(b->read_buffer, length);
455
+ VALUE result;
456
+
457
+ #ifdef HAVE_RB_ENC_INTERNED_STR
458
+ if (will_be_frozen) {
459
+ if (RB_ENC_INTERNED_STR_NULL_CHECK && length == 0) {
460
+ result = rb_enc_interned_str("", length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
461
+ } else {
462
+ result = rb_enc_interned_str(b->read_buffer, length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
463
+ }
464
+ } else {
465
+ if (utf8) {
466
+ result = rb_utf8_str_new(b->read_buffer, length);
467
+ } else {
468
+ result = rb_str_new(b->read_buffer, length);
469
+ }
470
+ }
455
471
  _msgpack_buffer_consumed(b, length);
456
472
  return result;
473
+
474
+ #else
475
+
476
+ if (utf8) {
477
+ result = rb_utf8_str_new(b->read_buffer, length);
478
+ } else {
479
+ result = rb_str_new(b->read_buffer, length);
480
+ }
481
+
482
+ #if STR_UMINUS_DEDUPE
483
+ if (will_be_frozen) {
484
+ #if STR_UMINUS_DEDUPE_FROZEN
485
+ // Starting from MRI 2.8 it is preferable to freeze the string
486
+ // before deduplication so that it can be interned directly
487
+ // otherwise it would be duplicated first which is wasteful.
488
+ rb_str_freeze(result);
489
+ #endif //STR_UMINUS_DEDUPE_FROZEN
490
+ // MRI 2.5 and older do not deduplicate strings that are already
491
+ // frozen.
492
+ result = rb_funcall(result, s_uminus, 0);
493
+ }
494
+ #endif // STR_UMINUS_DEDUPE
495
+ _msgpack_buffer_consumed(b, length);
496
+ return result;
497
+
498
+ #endif // HAVE_RB_ENC_INTERNED_STR
457
499
  }
458
500
 
501
+ static inline VALUE msgpack_buffer_read_top_as_symbol(msgpack_buffer_t* b, size_t length, bool utf8)
502
+ {
503
+ return rb_str_intern(msgpack_buffer_read_top_as_string(b, length, true, utf8));
504
+ }
459
505
 
460
506
  #endif
461
-