msgpack 1.7.2 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 2f5b1af6b3a51f5ccc6bcf67c94c1fc6193b02fe01b123e2cfb06a6df9607116
4
- data.tar.gz: cc057f24e1ffa4cdc3e331499eb04de4c2383b0657dcf0baeba08300fd20862e
3
+ metadata.gz: efdb772bf54b74587a6c99e9513f9c16c78bbef3e5e3c17064a4be79fd5adb7a
4
+ data.tar.gz: d2f74cb1115947f5337cd730b6283fac61ceeed3a9457597cc90155b3dc93d7e
5
5
  SHA512:
6
- metadata.gz: 3eb06321a534ca9b16e321cc4a71458532578dafe7967314a662223b1fbf4aa93449c98177fa982aa532ce3732ddda4a6d497704df0e9c874da07f378c73595c
7
- data.tar.gz: 8e540755e3db9e21d7dfa4354854e8b0486f5a1bbf82c3994c6095022205f7873153d364df9310d8072c481de38ca2b4c3e088e4221c3451ceb9438312489419
6
+ metadata.gz: 92da2466eac162f0d6d319496d7af3dbbef0d76b77c3aefb4787213e3b957464603859101ae28ac9620be82feb29278b2ed601adf2fd9d9811b364c258061d4b
7
+ data.tar.gz: a469586178eb44bbd50abf4cb34684f6a3fd4038063a80d9b3bf7f5cb9e6a2b34c1fed853be669f0bfb356d618efff3a2eaa51b86dd34d5427787a05453b8e11
data/ChangeLog CHANGED
@@ -1,3 +1,20 @@
1
+ 2025-02-06 1.8.0
2
+
3
+ * Numerous small optimizations.
4
+ * Added `key_cache` option to `Unpacker`.
5
+
6
+ 2024-11-11 1.7.5
7
+
8
+ * Rerelease 1.7.4 with fixed java package.
9
+
10
+ 2024-11-11 1.7.4
11
+
12
+ * Fixed a potental memory leak when recursive unpacker raise.
13
+
14
+ 2024-10-03 1.7.3
15
+
16
+ * Limit initial containers pre-allocation to `SHRT_MAX` (32k) entries.
17
+
1
18
  2023-07-18 1.7.2:
2
19
 
3
20
  * Fix a potential GC bug when packing data using recursive extensions and buffers containing over 512KkiB of data (See #341).
data/README.md CHANGED
@@ -8,15 +8,24 @@ and typical short strings only require an extra byte in addition to the strings
8
8
  If you ever wished to use JSON for convenience (storing an image with metadata) but could
9
9
  not for technical reasons (binary data, size, speed...), MessagePack is a perfect replacement.
10
10
 
11
- require 'msgpack'
12
- msg = [1,2,3].to_msgpack #=> "\x93\x01\x02\x03"
13
- MessagePack.unpack(msg) #=> [1,2,3]
11
+ ```ruby
12
+ require 'msgpack'
13
+ msg = [1,2,3].to_msgpack #=> "\x93\x01\x02\x03"
14
+ MessagePack.unpack(msg) #=> [1,2,3]
15
+ ```
16
+
17
+ Add msgpack to your Gemfile to install with Bundler:
18
+
19
+ ```ruby
20
+ # Gemfile
21
+ gem 'msgpack'
22
+ ```
14
23
 
15
- Use RubyGems to install:
24
+ Or, use RubyGems to install:
16
25
 
17
26
  gem install msgpack
18
27
 
19
- or build msgpack-ruby and install:
28
+ Or, build msgpack-ruby and install from a checked-out msgpack-ruby repository:
20
29
 
21
30
  bundle
22
31
  rake
@@ -27,11 +36,11 @@ or build msgpack-ruby and install:
27
36
 
28
37
  * Create REST API returing MessagePack using Rails + [RABL](https://github.com/nesquena/rabl)
29
38
  * Store objects efficiently serialized by msgpack on memcached or Redis
30
- * In fact Redis supports msgpack in [EVAL-scripts](http://redis.io/commands/eval)
39
+ * In fact Redis supports msgpack in [EVAL-scripts](https://redis.io/docs/latest/commands/eval/)
31
40
  * Upload data in efficient format from mobile devices such as smartphones
32
41
  * MessagePack works on iPhone/iPad and Android. See also [Objective-C](https://github.com/msgpack/msgpack-objectivec) and [Java](https://github.com/msgpack/msgpack-java) implementations
33
42
  * Design a portable protocol to communicate with embedded devices
34
- * Check also [Fluentd](http://fluentd.org/) which is a log collector which uses msgpack for the log format (they say it uses JSON but actually it's msgpack, which is compatible with JSON)
43
+ * Check also [Fluentd](https://www.fluentd.org) which is a log collector which uses msgpack for the log format (they say it uses JSON but actually it's msgpack, which is compatible with JSON)
35
44
  * Exchange objects between software components written in different languages
36
45
  * You'll need a flexible but efficient format so that components exchange objects while keeping compatibility
37
46
 
@@ -128,9 +137,9 @@ being serialized altogether by throwing an exception:
128
137
 
129
138
  ```ruby
130
139
  class Symbol
131
- def to_msgpack_ext
132
- raise "Serialization of symbols prohibited"
133
- end
140
+ def to_msgpack_ext
141
+ raise "Serialization of symbols prohibited"
142
+ end
134
143
  end
135
144
 
136
145
  MessagePack::DefaultFactory.register_type(0x00, Symbol)
@@ -276,8 +285,8 @@ If this directory has Gemfile.lock (generated with MRI), remove it beforehand.
276
285
 
277
286
  ## Updating documents
278
287
 
279
- Online documents (http://ruby.msgpack.org) is generated from gh-pages branch.
280
- Following commands update documents in gh-pages branch:
288
+ Online documentation (https://ruby.msgpack.org) is generated from the gh-pages branch.
289
+ To update documents in gh-pages branch:
281
290
 
282
291
  bundle exec rake doc
283
292
  git checkout gh-pages
data/ext/msgpack/buffer.c CHANGED
@@ -300,7 +300,7 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
300
300
  static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE string)
301
301
  {
302
302
  VALUE mapped_string;
303
- if(ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit && RTEST(rb_obj_frozen_p(string))) {
303
+ if(ENCODING_GET_INLINED(string) == msgpack_rb_encindex_ascii8bit && RB_OBJ_FROZEN_RAW(string)) {
304
304
  mapped_string = string;
305
305
  } else {
306
306
  mapped_string = rb_str_dup(string);
@@ -309,8 +309,9 @@ static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE s
309
309
 
310
310
  _msgpack_buffer_add_new_chunk(b);
311
311
 
312
- char* data = RSTRING_PTR(mapped_string);
313
- size_t length = RSTRING_LEN(mapped_string);
312
+ char* data;
313
+ size_t length;
314
+ RSTRING_GETMEM(mapped_string, data, length);
314
315
 
315
316
  b->tail.first = (char*) data;
316
317
  b->tail.last = (char*) data + length;
@@ -330,7 +331,7 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
330
331
  {
331
332
  if(b->io != Qnil) {
332
333
  msgpack_buffer_flush(b);
333
- if (ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit) {
334
+ if (ENCODING_GET_INLINED(string) == msgpack_rb_encindex_ascii8bit) {
334
335
  rb_funcall(b->io, b->io_write_all_method, 1, string);
335
336
  } else {
336
337
  msgpack_buffer_append(b, RSTRING_PTR(string), RSTRING_LEN(string));
data/ext/msgpack/buffer.h CHANGED
@@ -81,20 +81,6 @@ struct msgpack_buffer_chunk_t {
81
81
  bool rmem;
82
82
  };
83
83
 
84
- union msgpack_buffer_cast_block_t {
85
- char buffer[8];
86
- uint8_t u8;
87
- uint16_t u16;
88
- uint32_t u32;
89
- uint64_t u64;
90
- int8_t i8;
91
- int16_t i16;
92
- int32_t i32;
93
- int64_t i64;
94
- float f;
95
- double d;
96
- };
97
-
98
84
  struct msgpack_buffer_t {
99
85
  char* read_buffer;
100
86
  char* tail_buffer_end;
@@ -107,8 +93,6 @@ struct msgpack_buffer_t {
107
93
  char* rmem_end;
108
94
  void** rmem_owner;
109
95
 
110
- union msgpack_buffer_cast_block_t cast_block;
111
-
112
96
  VALUE io;
113
97
  VALUE io_buffer;
114
98
  ID io_write_all_method;
@@ -253,13 +237,14 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string);
253
237
 
254
238
  static inline size_t msgpack_buffer_append_string(msgpack_buffer_t* b, VALUE string)
255
239
  {
256
- size_t length = RSTRING_LEN(string);
240
+ size_t length;
241
+ char *ptr;
242
+ RSTRING_GETMEM(string, ptr, length);
257
243
 
258
244
  if(length > b->write_reference_threshold) {
259
245
  _msgpack_buffer_append_long_string(b, string);
260
-
261
246
  } else {
262
- msgpack_buffer_append(b, RSTRING_PTR(string), length);
247
+ msgpack_buffer_append(b, ptr, length);
263
248
  }
264
249
 
265
250
  return length;
@@ -383,14 +368,6 @@ static inline size_t msgpack_buffer_skip_nonblock(msgpack_buffer_t* b, size_t le
383
368
  return length;
384
369
  }
385
370
 
386
- static inline union msgpack_buffer_cast_block_t* msgpack_buffer_read_cast_block(msgpack_buffer_t* b, size_t n)
387
- {
388
- if(!msgpack_buffer_read_all(b, b->cast_block.buffer, n)) {
389
- return NULL;
390
- }
391
- return &b->cast_block;
392
- }
393
-
394
371
  size_t msgpack_buffer_read_to_string_nonblock(msgpack_buffer_t* b, VALUE string, size_t length);
395
372
 
396
373
  static inline size_t msgpack_buffer_read_to_string(msgpack_buffer_t* b, VALUE string, size_t length)
@@ -497,4 +474,131 @@ static inline VALUE msgpack_buffer_read_top_as_symbol(msgpack_buffer_t* b, size_
497
474
  return rb_str_intern(msgpack_buffer_read_top_as_string(b, length, true, utf8));
498
475
  }
499
476
 
477
+ // Hash keys are likely to be repeated, and are frozen.
478
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
479
+ // and save much more expensive lookups into the global fstring table.
480
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
481
+ // to be able to fit easily embeded inside msgpack_unpacker_t.
482
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
483
+ // performance.
484
+ #define MSGPACK_KEY_CACHE_CAPACITY 63
485
+
486
+ typedef struct msgpack_key_cache_t msgpack_key_cache_t;
487
+ struct msgpack_key_cache_t {
488
+ int length;
489
+ VALUE entries[MSGPACK_KEY_CACHE_CAPACITY];
490
+ };
491
+
492
+ static inline VALUE build_interned_string(const char *str, const long length)
493
+ {
494
+ # ifdef HAVE_RB_ENC_INTERNED_STR
495
+ return rb_enc_interned_str(str, length, rb_utf8_encoding());
496
+ # else
497
+ VALUE rstring = rb_utf8_str_new(str, length);
498
+ return rb_funcall(rb_str_freeze(rstring), s_uminus, 0);
499
+ # endif
500
+ }
501
+
502
+ static inline VALUE build_symbol(const char *str, const long length)
503
+ {
504
+ return rb_str_intern(build_interned_string(str, length));
505
+ }
506
+
507
+ static void rvalue_cache_insert_at(msgpack_key_cache_t *cache, int index, VALUE rstring)
508
+ {
509
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
510
+ cache->length++;
511
+ cache->entries[index] = rstring;
512
+ }
513
+
514
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
515
+ {
516
+ long rstring_length = RSTRING_LEN(rstring);
517
+ if (length == rstring_length) {
518
+ return memcmp(str, RSTRING_PTR(rstring), length);
519
+ } else {
520
+ return (int)(length - rstring_length);
521
+ }
522
+ }
523
+
524
+ static VALUE rstring_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length)
525
+ {
526
+ int low = 0;
527
+ int high = cache->length - 1;
528
+ int mid = 0;
529
+ int last_cmp = 0;
530
+
531
+ while (low <= high) {
532
+ mid = (high + low) >> 1;
533
+ VALUE entry = cache->entries[mid];
534
+ last_cmp = rstring_cache_cmp(str, length, entry);
535
+
536
+ if (last_cmp == 0) {
537
+ return entry;
538
+ } else if (last_cmp > 0) {
539
+ low = mid + 1;
540
+ } else {
541
+ high = mid - 1;
542
+ }
543
+ }
544
+
545
+ VALUE rstring = build_interned_string(str, length);
546
+
547
+ if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) {
548
+ if (last_cmp > 0) {
549
+ mid += 1;
550
+ }
551
+
552
+ rvalue_cache_insert_at(cache, mid, rstring);
553
+ }
554
+ return rstring;
555
+ }
556
+
557
+ static VALUE rsymbol_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length)
558
+ {
559
+ int low = 0;
560
+ int high = cache->length - 1;
561
+ int mid = 0;
562
+ int last_cmp = 0;
563
+
564
+ while (low <= high) {
565
+ mid = (high + low) >> 1;
566
+ VALUE entry = cache->entries[mid];
567
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
568
+
569
+ if (last_cmp == 0) {
570
+ return entry;
571
+ } else if (last_cmp > 0) {
572
+ low = mid + 1;
573
+ } else {
574
+ high = mid - 1;
575
+ }
576
+ }
577
+
578
+ VALUE rsymbol = build_symbol(str, length);
579
+
580
+ if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) {
581
+ if (last_cmp > 0) {
582
+ mid += 1;
583
+ }
584
+
585
+ rvalue_cache_insert_at(cache, mid, rsymbol);
586
+ }
587
+ return rsymbol;
588
+ }
589
+
590
+ static inline VALUE msgpack_buffer_read_top_as_interned_symbol(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length)
591
+ {
592
+ VALUE result = rsymbol_cache_fetch(cache, b->read_buffer, length);
593
+ _msgpack_buffer_consumed(b, length);
594
+ return result;
595
+ }
596
+
597
+ static inline VALUE msgpack_buffer_read_top_as_interned_string(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length)
598
+ {
599
+ VALUE result = rstring_cache_fetch(cache, b->read_buffer, length);
600
+ _msgpack_buffer_consumed(b, length);
601
+ return result;
602
+ }
603
+
500
604
  #endif
@@ -3,17 +3,19 @@ require 'mkmf'
3
3
  have_func("rb_enc_interned_str", "ruby.h") # Ruby 3.0+
4
4
  have_func("rb_hash_new_capa", "ruby.h") # Ruby 3.2+
5
5
  have_func("rb_proc_call_with_block", "ruby.h") # CRuby (TruffleRuby doesn't have it)
6
+ have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby
6
7
 
7
8
  append_cflags([
8
9
  "-fvisibility=hidden",
9
10
  "-I..",
10
11
  "-Wall",
11
- "-O3",
12
12
  "-std=gnu99"
13
13
  ])
14
- append_cflags(RbConfig::CONFIG["debugflags"]) if RbConfig::CONFIG["debugflags"]
15
14
 
16
- append_cflags("-DRUBY_DEBUG=1") if ENV["MSGPACK_DEBUG"]
15
+ if ENV["MSGPACK_DEBUG"]
16
+ append_cflags(RbConfig::CONFIG["debugflags"]) if RbConfig::CONFIG["debugflags"]
17
+ append_cflags("-DRUBY_DEBUG=1")
18
+ end
17
19
 
18
20
  if RUBY_VERSION.start_with?('3.0.') && RUBY_VERSION <= '3.0.5'
19
21
  # https://bugs.ruby-lang.org/issues/18772
data/ext/msgpack/packer.h CHANGED
@@ -25,21 +25,26 @@
25
25
  #define MSGPACK_PACKER_IO_FLUSH_THRESHOLD_TO_WRITE_STRING_BODY (1024)
26
26
  #endif
27
27
 
28
+ #ifndef UNREACHABLE_RETURN
29
+ // Ruby 2.5
30
+ #define UNREACHABLE_RETURN() return
31
+ #endif
32
+
28
33
  struct msgpack_packer_t;
29
34
  typedef struct msgpack_packer_t msgpack_packer_t;
30
35
 
31
36
  struct msgpack_packer_t {
32
37
  msgpack_buffer_t buffer;
33
38
 
34
- bool compatibility_mode;
35
- bool has_bigint_ext_type;
36
- bool has_symbol_ext_type;
37
-
38
39
  ID to_msgpack_method;
39
40
  VALUE to_msgpack_arg;
40
41
 
41
42
  VALUE buffer_ref;
42
43
 
44
+ bool compatibility_mode;
45
+ bool has_bigint_ext_type;
46
+ bool has_symbol_ext_type;
47
+
43
48
  /* options */
44
49
  bool comaptibility_mode;
45
50
  msgpack_packer_ext_registry_t ext_registry;
@@ -404,27 +409,33 @@ static inline bool msgpack_packer_is_utf8_compat_string(VALUE v, int encindex)
404
409
  {
405
410
  return encindex == msgpack_rb_encindex_utf8
406
411
  || encindex == msgpack_rb_encindex_usascii
407
- || (rb_enc_asciicompat(rb_enc_from_index(encindex)) && ENC_CODERANGE_ASCIIONLY(v));
412
+ || ENC_CODERANGE_ASCIIONLY(v);
408
413
  }
409
414
 
410
415
  static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE v)
411
416
  {
412
- /* actual return type of RSTRING_LEN is long */
413
- unsigned long len = RSTRING_LEN(v);
414
- if(len > 0xffffffffUL) {
415
- // TODO rb_eArgError?
416
- rb_raise(rb_eArgError, "size of string is too long to pack: %lu bytes should be <= %lu", len, 0xffffffffUL);
417
+ long len = RSTRING_LEN(v);
418
+
419
+ if(RB_UNLIKELY(len > 0xffffffffL)) {
420
+ rb_raise(rb_eArgError, "size of string is too long to pack: %lu bytes should be <= %ld", len, 0xffffffffL);
421
+ UNREACHABLE_RETURN();
422
+ }
423
+
424
+ if (RB_UNLIKELY(pk->compatibility_mode)) {
425
+ msgpack_packer_write_raw_header(pk, (unsigned int)len);
426
+ msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
427
+ return;
417
428
  }
418
429
 
419
- int encindex = ENCODING_GET(v);
420
- if(msgpack_packer_is_binary(v, encindex) && !pk->compatibility_mode) {
430
+ int encindex = ENCODING_GET_INLINED(v);
431
+ if(msgpack_packer_is_binary(v, encindex)) {
421
432
  /* write ASCII-8BIT string using Binary type */
422
433
  msgpack_packer_write_bin_header(pk, (unsigned int)len);
423
434
  msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
424
435
  } else {
425
436
  /* write UTF-8, US-ASCII, or 7bit-safe ascii-compatible string using String type directly */
426
437
  /* in compatibility mode, packer packs String values as is */
427
- if(!pk->compatibility_mode && !msgpack_packer_is_utf8_compat_string(v, encindex)) {
438
+ if(RB_UNLIKELY(!msgpack_packer_is_utf8_compat_string(v, encindex))) {
428
439
  /* transcode other strings to UTF-8 and write using String type */
429
440
  VALUE enc = rb_enc_from_encoding(rb_utf8_encoding()); /* rb_enc_from_encoding_index is not extern */
430
441
  v = rb_str_encode(v, enc, 0, Qnil);
@@ -453,11 +464,7 @@ static inline void msgpack_packer_write_symbol_value(msgpack_packer_t* pk, VALUE
453
464
 
454
465
  static inline void msgpack_packer_write_fixnum_value(msgpack_packer_t* pk, VALUE v)
455
466
  {
456
- #ifdef JRUBY
457
- msgpack_packer_write_long(pk, FIXNUM_P(v) ? FIX2LONG(v) : rb_num2ll(v));
458
- #else
459
467
  msgpack_packer_write_long(pk, FIX2LONG(v));
460
- #endif
461
468
  }
462
469
 
463
470
  static inline void msgpack_packer_write_bignum_value(msgpack_packer_t* pk, VALUE v)