msgpack 1.3.3 → 1.6.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (67) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/ci.yaml +57 -0
  3. data/.rubocop.yml +2 -2
  4. data/ChangeLog +74 -0
  5. data/Gemfile +1 -1
  6. data/README.md +266 -0
  7. data/Rakefile +1 -9
  8. data/bench/bench.rb +78 -0
  9. data/bin/console +8 -0
  10. data/doclib/msgpack/factory.rb +47 -3
  11. data/doclib/msgpack/packer.rb +5 -4
  12. data/doclib/msgpack/unpacker.rb +2 -2
  13. data/ext/java/org/msgpack/jruby/Buffer.java +23 -16
  14. data/ext/java/org/msgpack/jruby/Decoder.java +46 -23
  15. data/ext/java/org/msgpack/jruby/Encoder.java +68 -30
  16. data/ext/java/org/msgpack/jruby/ExtensionRegistry.java +37 -49
  17. data/ext/java/org/msgpack/jruby/ExtensionValue.java +5 -8
  18. data/ext/java/org/msgpack/jruby/Factory.java +47 -7
  19. data/ext/java/org/msgpack/jruby/Packer.java +29 -17
  20. data/ext/java/org/msgpack/jruby/Unpacker.java +72 -37
  21. data/ext/msgpack/buffer.c +42 -68
  22. data/ext/msgpack/buffer.h +59 -14
  23. data/ext/msgpack/buffer_class.c +90 -52
  24. data/ext/msgpack/compat.h +1 -111
  25. data/ext/msgpack/extconf.rb +45 -19
  26. data/ext/msgpack/factory_class.c +133 -43
  27. data/ext/msgpack/packer.c +60 -36
  28. data/ext/msgpack/packer.h +27 -25
  29. data/ext/msgpack/packer_class.c +84 -77
  30. data/ext/msgpack/packer_class.h +11 -0
  31. data/ext/msgpack/packer_ext_registry.c +24 -32
  32. data/ext/msgpack/packer_ext_registry.h +40 -33
  33. data/ext/msgpack/sysdep.h +5 -2
  34. data/ext/msgpack/unpacker.c +132 -115
  35. data/ext/msgpack/unpacker.h +23 -10
  36. data/ext/msgpack/unpacker_class.c +83 -78
  37. data/ext/msgpack/unpacker_class.h +11 -0
  38. data/ext/msgpack/unpacker_ext_registry.c +42 -18
  39. data/ext/msgpack/unpacker_ext_registry.h +23 -16
  40. data/lib/msgpack/bigint.rb +69 -0
  41. data/lib/msgpack/factory.rb +103 -0
  42. data/lib/msgpack/symbol.rb +21 -4
  43. data/lib/msgpack/time.rb +1 -1
  44. data/lib/msgpack/version.rb +4 -8
  45. data/lib/msgpack.rb +6 -12
  46. data/msgpack.gemspec +4 -6
  47. data/spec/bigint_spec.rb +26 -0
  48. data/spec/cruby/buffer_spec.rb +17 -0
  49. data/spec/factory_spec.rb +351 -12
  50. data/spec/msgpack_spec.rb +1 -1
  51. data/spec/packer_spec.rb +18 -0
  52. data/spec/spec_helper.rb +37 -3
  53. data/spec/timestamp_spec.rb +38 -0
  54. data/spec/unpacker_spec.rb +157 -4
  55. metadata +31 -61
  56. data/.travis.yml +0 -43
  57. data/README.rdoc +0 -225
  58. data/bench/pack.rb +0 -23
  59. data/bench/pack_log.rb +0 -33
  60. data/bench/pack_log_long.rb +0 -65
  61. data/bench/pack_symbols.rb +0 -28
  62. data/bench/run.sh +0 -14
  63. data/bench/run_long.sh +0 -35
  64. data/bench/run_symbols.sh +0 -26
  65. data/bench/unpack.rb +0 -21
  66. data/bench/unpack_log.rb +0 -34
  67. data/bench/unpack_log_long.rb +0 -67
data/ext/msgpack/buffer.c CHANGED
@@ -23,38 +23,32 @@
23
23
  static ID s_replace;
24
24
  #endif
25
25
 
26
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
27
26
  int msgpack_rb_encindex_utf8;
28
27
  int msgpack_rb_encindex_usascii;
29
28
  int msgpack_rb_encindex_ascii8bit;
30
- #endif
31
29
 
32
- #ifndef DISABLE_RMEM
30
+ ID s_uminus;
31
+
33
32
  static msgpack_rmem_t s_rmem;
34
- #endif
35
33
 
36
- void msgpack_buffer_static_init()
34
+ void msgpack_buffer_static_init(void)
37
35
  {
38
- #ifdef COMPAT_HAVE_ENCODING
36
+ s_uminus = rb_intern("-@");
37
+
39
38
  msgpack_rb_encindex_utf8 = rb_utf8_encindex();
40
39
  msgpack_rb_encindex_usascii = rb_usascii_encindex();
41
40
  msgpack_rb_encindex_ascii8bit = rb_ascii8bit_encindex();
42
- #endif
43
41
 
44
- #ifndef DISABLE_RMEM
45
42
  msgpack_rmem_init(&s_rmem);
46
- #endif
47
43
 
48
44
  #ifndef HAVE_RB_STR_REPLACE
49
45
  s_replace = rb_intern("replace");
50
46
  #endif
51
47
  }
52
48
 
53
- void msgpack_buffer_static_destroy()
49
+ void msgpack_buffer_static_destroy(void)
54
50
  {
55
- #ifndef DISABLE_RMEM
56
51
  msgpack_rmem_destroy(&s_rmem);
57
- #endif
58
52
  }
59
53
 
60
54
  void msgpack_buffer_init(msgpack_buffer_t* b)
@@ -72,16 +66,12 @@ void msgpack_buffer_init(msgpack_buffer_t* b)
72
66
  static void _msgpack_buffer_chunk_destroy(msgpack_buffer_chunk_t* c)
73
67
  {
74
68
  if(c->mem != NULL) {
75
- #ifndef DISABLE_RMEM
76
69
  if(!msgpack_rmem_free(&s_rmem, c->mem)) {
77
70
  xfree(c->mem);
78
71
  }
79
72
  /* no needs to update rmem_owner because chunks will not be
80
73
  * free()ed (left in free_list) and thus *rmem_owner is
81
74
  * always valid. */
82
- #else
83
- xfree(c->mem);
84
- #endif
85
75
  }
86
76
  c->first = NULL;
87
77
  c->last = NULL;
@@ -108,8 +98,25 @@ void msgpack_buffer_destroy(msgpack_buffer_t* b)
108
98
  }
109
99
  }
110
100
 
111
- void msgpack_buffer_mark(msgpack_buffer_t* b)
101
+ size_t msgpack_buffer_memsize(const msgpack_buffer_t* b)
112
102
  {
103
+ size_t memsize = 0;
104
+ msgpack_buffer_chunk_t* c = b->head;
105
+
106
+ while(c) {
107
+ memsize += sizeof(msgpack_buffer_chunk_t);
108
+ if(c->mapped_string != NO_MAPPED_STRING) {
109
+ memsize += (c->last - c->first);
110
+ }
111
+ c = c->next;
112
+ }
113
+
114
+ return memsize;
115
+ }
116
+
117
+ void msgpack_buffer_mark(void *ptr)
118
+ {
119
+ msgpack_buffer_t* b = ptr;
113
120
  /* head is always available */
114
121
  msgpack_buffer_chunk_t* c = b->head;
115
122
  while(c != &b->tail) {
@@ -120,8 +127,6 @@ void msgpack_buffer_mark(msgpack_buffer_t* b)
120
127
 
121
128
  rb_gc_mark(b->io);
122
129
  rb_gc_mark(b->io_buffer);
123
-
124
- rb_gc_mark(b->owner);
125
130
  }
126
131
 
127
132
  bool _msgpack_buffer_shift_chunk(msgpack_buffer_t* b)
@@ -158,24 +163,17 @@ size_t msgpack_buffer_read_to_string_nonblock(msgpack_buffer_t* b, VALUE string,
158
163
  {
159
164
  size_t avail = msgpack_buffer_top_readable_size(b);
160
165
 
161
- #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
162
166
  /* optimize */
163
167
  if(length <= avail && RSTRING_LEN(string) == 0 &&
164
168
  b->head->mapped_string != NO_MAPPED_STRING &&
165
169
  length >= b->read_reference_threshold) {
166
170
  VALUE s = _msgpack_buffer_refer_head_mapped_string(b, length);
167
- #ifndef HAVE_RB_STR_REPLACE
168
- /* TODO MRI 1.8 */
169
- rb_funcall(string, s_replace, 1, s);
170
- #else
171
171
  rb_str_replace(string, s);
172
- #endif
173
172
  /* here doesn't have to call ENCODING_SET because
174
173
  * encoding of s is always ASCII-8BIT */
175
174
  _msgpack_buffer_consumed(b, length);
176
175
  return length;
177
176
  }
178
- #endif
179
177
 
180
178
  size_t const length_orig = length;
181
179
 
@@ -288,15 +286,11 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
288
286
 
289
287
  msgpack_buffer_chunk_t* nc = _msgpack_buffer_alloc_new_chunk(b);
290
288
 
291
- #ifndef DISABLE_RMEM
292
- #ifndef DISABLE_RMEM_REUSE_INTERNAL_FRAGMENT
293
289
  if(b->rmem_last == b->tail_buffer_end) {
294
290
  /* reuse unused rmem space */
295
291
  size_t unused = b->tail_buffer_end - b->tail.last;
296
292
  b->rmem_last -= unused;
297
293
  }
298
- #endif
299
- #endif
300
294
 
301
295
  /* rebuild tail */
302
296
  *nc = b->tail;
@@ -307,10 +301,13 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
307
301
 
308
302
  static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE string)
309
303
  {
310
- VALUE mapped_string = rb_str_dup(string);
311
- #ifdef COMPAT_HAVE_ENCODING
312
- ENCODING_SET(mapped_string, msgpack_rb_encindex_ascii8bit);
313
- #endif
304
+ VALUE mapped_string;
305
+ if(ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit && RTEST(rb_obj_frozen_p(string))) {
306
+ mapped_string = string;
307
+ } else {
308
+ mapped_string = rb_str_dup(string);
309
+ ENCODING_SET(mapped_string, msgpack_rb_encindex_ascii8bit);
310
+ }
314
311
 
315
312
  _msgpack_buffer_add_new_chunk(b);
316
313
 
@@ -337,25 +334,13 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
337
334
 
338
335
  if(b->io != Qnil) {
339
336
  msgpack_buffer_flush(b);
340
- #ifdef COMPAT_HAVE_ENCODING
341
337
  if (ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit) {
342
338
  rb_funcall(b->io, b->io_write_all_method, 1, string);
343
- } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
344
- VALUE s = rb_str_dup(string);
345
- ENCODING_SET(s, msgpack_rb_encindex_ascii8bit);
346
- rb_funcall(b->io, b->io_write_all_method, 1, s);
347
339
  } else {
348
340
  msgpack_buffer_append(b, RSTRING_PTR(string), length);
349
341
  }
350
- #else
351
- rb_funcall(b->io, b->io_write_all_method, 1, string);
352
- #endif
353
-
354
- } else if(!STR_DUP_LIKELY_DOES_COPY(string)) {
355
- _msgpack_buffer_append_reference(b, string);
356
-
357
342
  } else {
358
- msgpack_buffer_append(b, RSTRING_PTR(string), length);
343
+ _msgpack_buffer_append_reference(b, string);
359
344
  }
360
345
  }
361
346
 
@@ -363,11 +348,8 @@ static inline void* _msgpack_buffer_chunk_malloc(
363
348
  msgpack_buffer_t* b, msgpack_buffer_chunk_t* c,
364
349
  size_t required_size, size_t* allocated_size)
365
350
  {
366
- #ifndef DISABLE_RMEM
367
351
  if(required_size <= MSGPACK_RMEM_PAGE_SIZE) {
368
- #ifndef DISABLE_RMEM_REUSE_INTERNAL_FRAGMENT
369
352
  if((size_t)(b->rmem_end - b->rmem_last) < required_size) {
370
- #endif
371
353
  /* alloc new rmem page */
372
354
  *allocated_size = MSGPACK_RMEM_PAGE_SIZE;
373
355
  char* buffer = msgpack_rmem_alloc(&s_rmem);
@@ -378,8 +360,6 @@ static inline void* _msgpack_buffer_chunk_malloc(
378
360
  b->rmem_last = b->rmem_end = buffer + MSGPACK_RMEM_PAGE_SIZE;
379
361
 
380
362
  return buffer;
381
-
382
- #ifndef DISABLE_RMEM_REUSE_INTERNAL_FRAGMENT
383
363
  } else {
384
364
  /* reuse unused rmem */
385
365
  *allocated_size = (size_t)(b->rmem_end - b->rmem_last);
@@ -393,13 +373,7 @@ static inline void* _msgpack_buffer_chunk_malloc(
393
373
 
394
374
  return buffer;
395
375
  }
396
- #endif
397
- }
398
- #else
399
- if(required_size < 72) {
400
- required_size = 72;
401
376
  }
402
- #endif
403
377
 
404
378
  // TODO alignment?
405
379
  *allocated_size = required_size;
@@ -454,11 +428,7 @@ void _msgpack_buffer_expand(msgpack_buffer_t* b, const char* data, size_t length
454
428
  size_t capacity = b->tail.last - b->tail.first;
455
429
 
456
430
  /* can't realloc mapped chunk or rmem page */
457
- if(b->tail.mapped_string != NO_MAPPED_STRING
458
- #ifndef DISABLE_RMEM
459
- || capacity <= MSGPACK_RMEM_PAGE_SIZE
460
- #endif
461
- ) {
431
+ if(b->tail.mapped_string != NO_MAPPED_STRING || capacity <= MSGPACK_RMEM_PAGE_SIZE) {
462
432
  /* allocate new chunk */
463
433
  _msgpack_buffer_add_new_chunk(b);
464
434
 
@@ -631,13 +601,13 @@ size_t msgpack_buffer_flush_to_io(msgpack_buffer_t* b, VALUE io, ID write_method
631
601
  size_t _msgpack_buffer_feed_from_io(msgpack_buffer_t* b)
632
602
  {
633
603
  if(b->io_buffer == Qnil) {
634
- b->io_buffer = rb_funcall(b->io, b->io_partial_read_method, 1, LONG2NUM(b->io_buffer_size));
604
+ b->io_buffer = rb_funcall(b->io, b->io_partial_read_method, 1, SIZET2NUM(b->io_buffer_size));
635
605
  if(b->io_buffer == Qnil) {
636
606
  rb_raise(rb_eEOFError, "IO reached end of file");
637
607
  }
638
608
  StringValue(b->io_buffer);
639
609
  } else {
640
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(b->io_buffer_size), b->io_buffer);
610
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(b->io_buffer_size), b->io_buffer);
641
611
  if(ret == Qnil) {
642
612
  rb_raise(rb_eEOFError, "IO reached end of file");
643
613
  }
@@ -656,9 +626,11 @@ size_t _msgpack_buffer_feed_from_io(msgpack_buffer_t* b)
656
626
 
657
627
  size_t _msgpack_buffer_read_from_io_to_string(msgpack_buffer_t* b, VALUE string, size_t length)
658
628
  {
629
+ #define MIN(x, y) (((x) < (y)) ? (x) : (y))
630
+
659
631
  if(RSTRING_LEN(string) == 0) {
660
632
  /* direct read */
661
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), string);
633
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(MIN(b->io_buffer_size, length)), string);
662
634
  if(ret == Qnil) {
663
635
  return 0;
664
636
  }
@@ -670,7 +642,7 @@ size_t _msgpack_buffer_read_from_io_to_string(msgpack_buffer_t* b, VALUE string,
670
642
  b->io_buffer = rb_str_buf_new(0);
671
643
  }
672
644
 
673
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), b->io_buffer);
645
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(MIN(b->io_buffer_size, length)), b->io_buffer);
674
646
  if(ret == Qnil) {
675
647
  return 0;
676
648
  }
@@ -678,6 +650,8 @@ size_t _msgpack_buffer_read_from_io_to_string(msgpack_buffer_t* b, VALUE string,
678
650
 
679
651
  rb_str_buf_cat(string, (const void*)RSTRING_PTR(b->io_buffer), rl);
680
652
  return rl;
653
+
654
+ #undef MIN
681
655
  }
682
656
 
683
657
  size_t _msgpack_buffer_skip_from_io(msgpack_buffer_t* b, size_t length)
@@ -686,7 +660,7 @@ size_t _msgpack_buffer_skip_from_io(msgpack_buffer_t* b, size_t length)
686
660
  b->io_buffer = rb_str_buf_new(0);
687
661
  }
688
662
 
689
- VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, LONG2NUM(length), b->io_buffer);
663
+ VALUE ret = rb_funcall(b->io, b->io_partial_read_method, 2, SIZET2NUM(length), b->io_buffer);
690
664
  if(ret == Qnil) {
691
665
  return 0;
692
666
  }
data/ext/msgpack/buffer.h CHANGED
@@ -49,11 +49,15 @@
49
49
 
50
50
  #define NO_MAPPED_STRING ((VALUE)0)
51
51
 
52
- #ifdef COMPAT_HAVE_ENCODING /* see compat.h*/
52
+ #ifndef RB_ENC_INTERNED_STR_NULL_CHECK
53
+ #define RB_ENC_INTERNED_STR_NULL_CHECK 0
54
+ #endif
55
+
53
56
  extern int msgpack_rb_encindex_utf8;
54
57
  extern int msgpack_rb_encindex_usascii;
55
58
  extern int msgpack_rb_encindex_ascii8bit;
56
- #endif
59
+
60
+ extern ID s_uminus;
57
61
 
58
62
  struct msgpack_buffer_chunk_t;
59
63
  typedef struct msgpack_buffer_chunk_t msgpack_buffer_chunk_t;
@@ -98,11 +102,9 @@ struct msgpack_buffer_t {
98
102
  msgpack_buffer_chunk_t* head;
99
103
  msgpack_buffer_chunk_t* free_list;
100
104
 
101
- #ifndef DISABLE_RMEM
102
105
  char* rmem_last;
103
106
  char* rmem_end;
104
107
  void** rmem_owner;
105
- #endif
106
108
 
107
109
  union msgpack_buffer_cast_block_t cast_block;
108
110
 
@@ -114,25 +116,25 @@ struct msgpack_buffer_t {
114
116
  size_t write_reference_threshold;
115
117
  size_t read_reference_threshold;
116
118
  size_t io_buffer_size;
117
-
118
- VALUE owner;
119
119
  };
120
120
 
121
121
  /*
122
122
  * initialization functions
123
123
  */
124
- void msgpack_buffer_static_init();
124
+ void msgpack_buffer_static_init(void);
125
125
 
126
- void msgpack_buffer_static_destroy();
126
+ void msgpack_buffer_static_destroy(void);
127
127
 
128
128
  void msgpack_buffer_init(msgpack_buffer_t* b);
129
129
 
130
130
  void msgpack_buffer_destroy(msgpack_buffer_t* b);
131
131
 
132
- void msgpack_buffer_mark(msgpack_buffer_t* b);
132
+ void msgpack_buffer_mark(void* b);
133
133
 
134
134
  void msgpack_buffer_clear(msgpack_buffer_t* b);
135
135
 
136
+ size_t msgpack_buffer_memsize(const msgpack_buffer_t* b);
137
+
136
138
  static inline void msgpack_buffer_set_write_reference_threshold(msgpack_buffer_t* b, size_t length)
137
139
  {
138
140
  if(length < MSGPACK_BUFFER_STRING_WRITE_REFERENCE_MINIMUM) {
@@ -438,24 +440,67 @@ static inline VALUE _msgpack_buffer_refer_head_mapped_string(msgpack_buffer_t* b
438
440
  return rb_str_substr(b->head->mapped_string, offset, length);
439
441
  }
440
442
 
441
- static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen)
443
+ static inline VALUE msgpack_buffer_read_top_as_string(msgpack_buffer_t* b, size_t length, bool will_be_frozen, bool utf8)
442
444
  {
443
- #ifndef DISABLE_BUFFER_READ_REFERENCE_OPTIMIZE
444
445
  /* optimize */
445
446
  if(!will_be_frozen &&
446
447
  b->head->mapped_string != NO_MAPPED_STRING &&
447
448
  length >= b->read_reference_threshold) {
448
449
  VALUE result = _msgpack_buffer_refer_head_mapped_string(b, length);
450
+ if (utf8) ENCODING_SET(result, msgpack_rb_encindex_utf8);
449
451
  _msgpack_buffer_consumed(b, length);
450
452
  return result;
451
453
  }
452
- #endif
453
454
 
454
- VALUE result = rb_str_new(b->read_buffer, length);
455
+ VALUE result;
456
+
457
+ #ifdef HAVE_RB_ENC_INTERNED_STR
458
+ if (will_be_frozen) {
459
+ if (RB_ENC_INTERNED_STR_NULL_CHECK && length == 0) {
460
+ result = rb_enc_interned_str("", length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
461
+ } else {
462
+ result = rb_enc_interned_str(b->read_buffer, length, utf8 ? rb_utf8_encoding() : rb_ascii8bit_encoding());
463
+ }
464
+ } else {
465
+ if (utf8) {
466
+ result = rb_utf8_str_new(b->read_buffer, length);
467
+ } else {
468
+ result = rb_str_new(b->read_buffer, length);
469
+ }
470
+ }
455
471
  _msgpack_buffer_consumed(b, length);
456
472
  return result;
473
+
474
+ #else
475
+
476
+ if (utf8) {
477
+ result = rb_utf8_str_new(b->read_buffer, length);
478
+ } else {
479
+ result = rb_str_new(b->read_buffer, length);
480
+ }
481
+
482
+ #if STR_UMINUS_DEDUPE
483
+ if (will_be_frozen) {
484
+ #if STR_UMINUS_DEDUPE_FROZEN
485
+ // Starting from MRI 2.8 it is preferable to freeze the string
486
+ // before deduplication so that it can be interned directly
487
+ // otherwise it would be duplicated first which is wasteful.
488
+ rb_str_freeze(result);
489
+ #endif //STR_UMINUS_DEDUPE_FROZEN
490
+ // MRI 2.5 and older do not deduplicate strings that are already
491
+ // frozen.
492
+ result = rb_funcall(result, s_uminus, 0);
493
+ }
494
+ #endif // STR_UMINUS_DEDUPE
495
+ _msgpack_buffer_consumed(b, length);
496
+ return result;
497
+
498
+ #endif // HAVE_RB_ENC_INTERNED_STR
457
499
  }
458
500
 
501
+ static inline VALUE msgpack_buffer_read_top_as_symbol(msgpack_buffer_t* b, size_t length, bool utf8)
502
+ {
503
+ return rb_str_intern(msgpack_buffer_read_top_as_string(b, length, true, utf8));
504
+ }
459
505
 
460
506
  #endif
461
-