msgpack 1.7.2 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ChangeLog +17 -0
- data/README.md +21 -12
- data/ext/msgpack/buffer.c +5 -4
- data/ext/msgpack/buffer.h +131 -27
- data/ext/msgpack/extconf.rb +5 -3
- data/ext/msgpack/packer.h +24 -17
- data/ext/msgpack/unpacker.c +191 -86
- data/ext/msgpack/unpacker.h +18 -8
- data/ext/msgpack/unpacker_class.c +27 -17
- data/lib/msgpack/version.rb +1 -1
- data/msgpack.gemspec +2 -0
- metadata +5 -7
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: efdb772bf54b74587a6c99e9513f9c16c78bbef3e5e3c17064a4be79fd5adb7a
|
4
|
+
data.tar.gz: d2f74cb1115947f5337cd730b6283fac61ceeed3a9457597cc90155b3dc93d7e
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 92da2466eac162f0d6d319496d7af3dbbef0d76b77c3aefb4787213e3b957464603859101ae28ac9620be82feb29278b2ed601adf2fd9d9811b364c258061d4b
|
7
|
+
data.tar.gz: a469586178eb44bbd50abf4cb34684f6a3fd4038063a80d9b3bf7f5cb9e6a2b34c1fed853be669f0bfb356d618efff3a2eaa51b86dd34d5427787a05453b8e11
|
data/ChangeLog
CHANGED
@@ -1,3 +1,20 @@
|
|
1
|
+
2025-02-06 1.8.0
|
2
|
+
|
3
|
+
* Numerous small optimizations.
|
4
|
+
* Added `key_cache` option to `Unpacker`.
|
5
|
+
|
6
|
+
2024-11-11 1.7.5
|
7
|
+
|
8
|
+
* Rerelease 1.7.4 with fixed java package.
|
9
|
+
|
10
|
+
2024-11-11 1.7.4
|
11
|
+
|
12
|
+
* Fixed a potental memory leak when recursive unpacker raise.
|
13
|
+
|
14
|
+
2024-10-03 1.7.3
|
15
|
+
|
16
|
+
* Limit initial containers pre-allocation to `SHRT_MAX` (32k) entries.
|
17
|
+
|
1
18
|
2023-07-18 1.7.2:
|
2
19
|
|
3
20
|
* Fix a potential GC bug when packing data using recursive extensions and buffers containing over 512KkiB of data (See #341).
|
data/README.md
CHANGED
@@ -8,15 +8,24 @@ and typical short strings only require an extra byte in addition to the strings
|
|
8
8
|
If you ever wished to use JSON for convenience (storing an image with metadata) but could
|
9
9
|
not for technical reasons (binary data, size, speed...), MessagePack is a perfect replacement.
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
|
11
|
+
```ruby
|
12
|
+
require 'msgpack'
|
13
|
+
msg = [1,2,3].to_msgpack #=> "\x93\x01\x02\x03"
|
14
|
+
MessagePack.unpack(msg) #=> [1,2,3]
|
15
|
+
```
|
16
|
+
|
17
|
+
Add msgpack to your Gemfile to install with Bundler:
|
18
|
+
|
19
|
+
```ruby
|
20
|
+
# Gemfile
|
21
|
+
gem 'msgpack'
|
22
|
+
```
|
14
23
|
|
15
|
-
|
24
|
+
Or, use RubyGems to install:
|
16
25
|
|
17
26
|
gem install msgpack
|
18
27
|
|
19
|
-
|
28
|
+
Or, build msgpack-ruby and install from a checked-out msgpack-ruby repository:
|
20
29
|
|
21
30
|
bundle
|
22
31
|
rake
|
@@ -27,11 +36,11 @@ or build msgpack-ruby and install:
|
|
27
36
|
|
28
37
|
* Create REST API returing MessagePack using Rails + [RABL](https://github.com/nesquena/rabl)
|
29
38
|
* Store objects efficiently serialized by msgpack on memcached or Redis
|
30
|
-
* In fact Redis supports msgpack in [EVAL-scripts](
|
39
|
+
* In fact Redis supports msgpack in [EVAL-scripts](https://redis.io/docs/latest/commands/eval/)
|
31
40
|
* Upload data in efficient format from mobile devices such as smartphones
|
32
41
|
* MessagePack works on iPhone/iPad and Android. See also [Objective-C](https://github.com/msgpack/msgpack-objectivec) and [Java](https://github.com/msgpack/msgpack-java) implementations
|
33
42
|
* Design a portable protocol to communicate with embedded devices
|
34
|
-
* Check also [Fluentd](
|
43
|
+
* Check also [Fluentd](https://www.fluentd.org) which is a log collector which uses msgpack for the log format (they say it uses JSON but actually it's msgpack, which is compatible with JSON)
|
35
44
|
* Exchange objects between software components written in different languages
|
36
45
|
* You'll need a flexible but efficient format so that components exchange objects while keeping compatibility
|
37
46
|
|
@@ -128,9 +137,9 @@ being serialized altogether by throwing an exception:
|
|
128
137
|
|
129
138
|
```ruby
|
130
139
|
class Symbol
|
131
|
-
|
132
|
-
|
133
|
-
|
140
|
+
def to_msgpack_ext
|
141
|
+
raise "Serialization of symbols prohibited"
|
142
|
+
end
|
134
143
|
end
|
135
144
|
|
136
145
|
MessagePack::DefaultFactory.register_type(0x00, Symbol)
|
@@ -276,8 +285,8 @@ If this directory has Gemfile.lock (generated with MRI), remove it beforehand.
|
|
276
285
|
|
277
286
|
## Updating documents
|
278
287
|
|
279
|
-
Online
|
280
|
-
|
288
|
+
Online documentation (https://ruby.msgpack.org) is generated from the gh-pages branch.
|
289
|
+
To update documents in gh-pages branch:
|
281
290
|
|
282
291
|
bundle exec rake doc
|
283
292
|
git checkout gh-pages
|
data/ext/msgpack/buffer.c
CHANGED
@@ -300,7 +300,7 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
|
|
300
300
|
static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE string)
|
301
301
|
{
|
302
302
|
VALUE mapped_string;
|
303
|
-
if(
|
303
|
+
if(ENCODING_GET_INLINED(string) == msgpack_rb_encindex_ascii8bit && RB_OBJ_FROZEN_RAW(string)) {
|
304
304
|
mapped_string = string;
|
305
305
|
} else {
|
306
306
|
mapped_string = rb_str_dup(string);
|
@@ -309,8 +309,9 @@ static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE s
|
|
309
309
|
|
310
310
|
_msgpack_buffer_add_new_chunk(b);
|
311
311
|
|
312
|
-
char* data
|
313
|
-
size_t length
|
312
|
+
char* data;
|
313
|
+
size_t length;
|
314
|
+
RSTRING_GETMEM(mapped_string, data, length);
|
314
315
|
|
315
316
|
b->tail.first = (char*) data;
|
316
317
|
b->tail.last = (char*) data + length;
|
@@ -330,7 +331,7 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
|
|
330
331
|
{
|
331
332
|
if(b->io != Qnil) {
|
332
333
|
msgpack_buffer_flush(b);
|
333
|
-
if (
|
334
|
+
if (ENCODING_GET_INLINED(string) == msgpack_rb_encindex_ascii8bit) {
|
334
335
|
rb_funcall(b->io, b->io_write_all_method, 1, string);
|
335
336
|
} else {
|
336
337
|
msgpack_buffer_append(b, RSTRING_PTR(string), RSTRING_LEN(string));
|
data/ext/msgpack/buffer.h
CHANGED
@@ -81,20 +81,6 @@ struct msgpack_buffer_chunk_t {
|
|
81
81
|
bool rmem;
|
82
82
|
};
|
83
83
|
|
84
|
-
union msgpack_buffer_cast_block_t {
|
85
|
-
char buffer[8];
|
86
|
-
uint8_t u8;
|
87
|
-
uint16_t u16;
|
88
|
-
uint32_t u32;
|
89
|
-
uint64_t u64;
|
90
|
-
int8_t i8;
|
91
|
-
int16_t i16;
|
92
|
-
int32_t i32;
|
93
|
-
int64_t i64;
|
94
|
-
float f;
|
95
|
-
double d;
|
96
|
-
};
|
97
|
-
|
98
84
|
struct msgpack_buffer_t {
|
99
85
|
char* read_buffer;
|
100
86
|
char* tail_buffer_end;
|
@@ -107,8 +93,6 @@ struct msgpack_buffer_t {
|
|
107
93
|
char* rmem_end;
|
108
94
|
void** rmem_owner;
|
109
95
|
|
110
|
-
union msgpack_buffer_cast_block_t cast_block;
|
111
|
-
|
112
96
|
VALUE io;
|
113
97
|
VALUE io_buffer;
|
114
98
|
ID io_write_all_method;
|
@@ -253,13 +237,14 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string);
|
|
253
237
|
|
254
238
|
static inline size_t msgpack_buffer_append_string(msgpack_buffer_t* b, VALUE string)
|
255
239
|
{
|
256
|
-
size_t length
|
240
|
+
size_t length;
|
241
|
+
char *ptr;
|
242
|
+
RSTRING_GETMEM(string, ptr, length);
|
257
243
|
|
258
244
|
if(length > b->write_reference_threshold) {
|
259
245
|
_msgpack_buffer_append_long_string(b, string);
|
260
|
-
|
261
246
|
} else {
|
262
|
-
msgpack_buffer_append(b,
|
247
|
+
msgpack_buffer_append(b, ptr, length);
|
263
248
|
}
|
264
249
|
|
265
250
|
return length;
|
@@ -383,14 +368,6 @@ static inline size_t msgpack_buffer_skip_nonblock(msgpack_buffer_t* b, size_t le
|
|
383
368
|
return length;
|
384
369
|
}
|
385
370
|
|
386
|
-
static inline union msgpack_buffer_cast_block_t* msgpack_buffer_read_cast_block(msgpack_buffer_t* b, size_t n)
|
387
|
-
{
|
388
|
-
if(!msgpack_buffer_read_all(b, b->cast_block.buffer, n)) {
|
389
|
-
return NULL;
|
390
|
-
}
|
391
|
-
return &b->cast_block;
|
392
|
-
}
|
393
|
-
|
394
371
|
size_t msgpack_buffer_read_to_string_nonblock(msgpack_buffer_t* b, VALUE string, size_t length);
|
395
372
|
|
396
373
|
static inline size_t msgpack_buffer_read_to_string(msgpack_buffer_t* b, VALUE string, size_t length)
|
@@ -497,4 +474,131 @@ static inline VALUE msgpack_buffer_read_top_as_symbol(msgpack_buffer_t* b, size_
|
|
497
474
|
return rb_str_intern(msgpack_buffer_read_top_as_string(b, length, true, utf8));
|
498
475
|
}
|
499
476
|
|
477
|
+
// Hash keys are likely to be repeated, and are frozen.
|
478
|
+
// As such we can re-use them if we keep a cache of the ones we've seen so far,
|
479
|
+
// and save much more expensive lookups into the global fstring table.
|
480
|
+
// This cache implementation is deliberately simple, as we're optimizing for compactness,
|
481
|
+
// to be able to fit easily embeded inside msgpack_unpacker_t.
|
482
|
+
// As such, binary search into a sorted array gives a good tradeoff between compactness and
|
483
|
+
// performance.
|
484
|
+
#define MSGPACK_KEY_CACHE_CAPACITY 63
|
485
|
+
|
486
|
+
typedef struct msgpack_key_cache_t msgpack_key_cache_t;
|
487
|
+
struct msgpack_key_cache_t {
|
488
|
+
int length;
|
489
|
+
VALUE entries[MSGPACK_KEY_CACHE_CAPACITY];
|
490
|
+
};
|
491
|
+
|
492
|
+
static inline VALUE build_interned_string(const char *str, const long length)
|
493
|
+
{
|
494
|
+
# ifdef HAVE_RB_ENC_INTERNED_STR
|
495
|
+
return rb_enc_interned_str(str, length, rb_utf8_encoding());
|
496
|
+
# else
|
497
|
+
VALUE rstring = rb_utf8_str_new(str, length);
|
498
|
+
return rb_funcall(rb_str_freeze(rstring), s_uminus, 0);
|
499
|
+
# endif
|
500
|
+
}
|
501
|
+
|
502
|
+
static inline VALUE build_symbol(const char *str, const long length)
|
503
|
+
{
|
504
|
+
return rb_str_intern(build_interned_string(str, length));
|
505
|
+
}
|
506
|
+
|
507
|
+
static void rvalue_cache_insert_at(msgpack_key_cache_t *cache, int index, VALUE rstring)
|
508
|
+
{
|
509
|
+
MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
|
510
|
+
cache->length++;
|
511
|
+
cache->entries[index] = rstring;
|
512
|
+
}
|
513
|
+
|
514
|
+
static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
|
515
|
+
{
|
516
|
+
long rstring_length = RSTRING_LEN(rstring);
|
517
|
+
if (length == rstring_length) {
|
518
|
+
return memcmp(str, RSTRING_PTR(rstring), length);
|
519
|
+
} else {
|
520
|
+
return (int)(length - rstring_length);
|
521
|
+
}
|
522
|
+
}
|
523
|
+
|
524
|
+
static VALUE rstring_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length)
|
525
|
+
{
|
526
|
+
int low = 0;
|
527
|
+
int high = cache->length - 1;
|
528
|
+
int mid = 0;
|
529
|
+
int last_cmp = 0;
|
530
|
+
|
531
|
+
while (low <= high) {
|
532
|
+
mid = (high + low) >> 1;
|
533
|
+
VALUE entry = cache->entries[mid];
|
534
|
+
last_cmp = rstring_cache_cmp(str, length, entry);
|
535
|
+
|
536
|
+
if (last_cmp == 0) {
|
537
|
+
return entry;
|
538
|
+
} else if (last_cmp > 0) {
|
539
|
+
low = mid + 1;
|
540
|
+
} else {
|
541
|
+
high = mid - 1;
|
542
|
+
}
|
543
|
+
}
|
544
|
+
|
545
|
+
VALUE rstring = build_interned_string(str, length);
|
546
|
+
|
547
|
+
if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) {
|
548
|
+
if (last_cmp > 0) {
|
549
|
+
mid += 1;
|
550
|
+
}
|
551
|
+
|
552
|
+
rvalue_cache_insert_at(cache, mid, rstring);
|
553
|
+
}
|
554
|
+
return rstring;
|
555
|
+
}
|
556
|
+
|
557
|
+
static VALUE rsymbol_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length)
|
558
|
+
{
|
559
|
+
int low = 0;
|
560
|
+
int high = cache->length - 1;
|
561
|
+
int mid = 0;
|
562
|
+
int last_cmp = 0;
|
563
|
+
|
564
|
+
while (low <= high) {
|
565
|
+
mid = (high + low) >> 1;
|
566
|
+
VALUE entry = cache->entries[mid];
|
567
|
+
last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
|
568
|
+
|
569
|
+
if (last_cmp == 0) {
|
570
|
+
return entry;
|
571
|
+
} else if (last_cmp > 0) {
|
572
|
+
low = mid + 1;
|
573
|
+
} else {
|
574
|
+
high = mid - 1;
|
575
|
+
}
|
576
|
+
}
|
577
|
+
|
578
|
+
VALUE rsymbol = build_symbol(str, length);
|
579
|
+
|
580
|
+
if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) {
|
581
|
+
if (last_cmp > 0) {
|
582
|
+
mid += 1;
|
583
|
+
}
|
584
|
+
|
585
|
+
rvalue_cache_insert_at(cache, mid, rsymbol);
|
586
|
+
}
|
587
|
+
return rsymbol;
|
588
|
+
}
|
589
|
+
|
590
|
+
static inline VALUE msgpack_buffer_read_top_as_interned_symbol(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length)
|
591
|
+
{
|
592
|
+
VALUE result = rsymbol_cache_fetch(cache, b->read_buffer, length);
|
593
|
+
_msgpack_buffer_consumed(b, length);
|
594
|
+
return result;
|
595
|
+
}
|
596
|
+
|
597
|
+
static inline VALUE msgpack_buffer_read_top_as_interned_string(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length)
|
598
|
+
{
|
599
|
+
VALUE result = rstring_cache_fetch(cache, b->read_buffer, length);
|
600
|
+
_msgpack_buffer_consumed(b, length);
|
601
|
+
return result;
|
602
|
+
}
|
603
|
+
|
500
604
|
#endif
|
data/ext/msgpack/extconf.rb
CHANGED
@@ -3,17 +3,19 @@ require 'mkmf'
|
|
3
3
|
have_func("rb_enc_interned_str", "ruby.h") # Ruby 3.0+
|
4
4
|
have_func("rb_hash_new_capa", "ruby.h") # Ruby 3.2+
|
5
5
|
have_func("rb_proc_call_with_block", "ruby.h") # CRuby (TruffleRuby doesn't have it)
|
6
|
+
have_func("rb_gc_mark_locations", "ruby.h") # Missing on TruffleRuby
|
6
7
|
|
7
8
|
append_cflags([
|
8
9
|
"-fvisibility=hidden",
|
9
10
|
"-I..",
|
10
11
|
"-Wall",
|
11
|
-
"-O3",
|
12
12
|
"-std=gnu99"
|
13
13
|
])
|
14
|
-
append_cflags(RbConfig::CONFIG["debugflags"]) if RbConfig::CONFIG["debugflags"]
|
15
14
|
|
16
|
-
|
15
|
+
if ENV["MSGPACK_DEBUG"]
|
16
|
+
append_cflags(RbConfig::CONFIG["debugflags"]) if RbConfig::CONFIG["debugflags"]
|
17
|
+
append_cflags("-DRUBY_DEBUG=1")
|
18
|
+
end
|
17
19
|
|
18
20
|
if RUBY_VERSION.start_with?('3.0.') && RUBY_VERSION <= '3.0.5'
|
19
21
|
# https://bugs.ruby-lang.org/issues/18772
|
data/ext/msgpack/packer.h
CHANGED
@@ -25,21 +25,26 @@
|
|
25
25
|
#define MSGPACK_PACKER_IO_FLUSH_THRESHOLD_TO_WRITE_STRING_BODY (1024)
|
26
26
|
#endif
|
27
27
|
|
28
|
+
#ifndef UNREACHABLE_RETURN
|
29
|
+
// Ruby 2.5
|
30
|
+
#define UNREACHABLE_RETURN() return
|
31
|
+
#endif
|
32
|
+
|
28
33
|
struct msgpack_packer_t;
|
29
34
|
typedef struct msgpack_packer_t msgpack_packer_t;
|
30
35
|
|
31
36
|
struct msgpack_packer_t {
|
32
37
|
msgpack_buffer_t buffer;
|
33
38
|
|
34
|
-
bool compatibility_mode;
|
35
|
-
bool has_bigint_ext_type;
|
36
|
-
bool has_symbol_ext_type;
|
37
|
-
|
38
39
|
ID to_msgpack_method;
|
39
40
|
VALUE to_msgpack_arg;
|
40
41
|
|
41
42
|
VALUE buffer_ref;
|
42
43
|
|
44
|
+
bool compatibility_mode;
|
45
|
+
bool has_bigint_ext_type;
|
46
|
+
bool has_symbol_ext_type;
|
47
|
+
|
43
48
|
/* options */
|
44
49
|
bool comaptibility_mode;
|
45
50
|
msgpack_packer_ext_registry_t ext_registry;
|
@@ -404,27 +409,33 @@ static inline bool msgpack_packer_is_utf8_compat_string(VALUE v, int encindex)
|
|
404
409
|
{
|
405
410
|
return encindex == msgpack_rb_encindex_utf8
|
406
411
|
|| encindex == msgpack_rb_encindex_usascii
|
407
|
-
||
|
412
|
+
|| ENC_CODERANGE_ASCIIONLY(v);
|
408
413
|
}
|
409
414
|
|
410
415
|
static inline void msgpack_packer_write_string_value(msgpack_packer_t* pk, VALUE v)
|
411
416
|
{
|
412
|
-
|
413
|
-
|
414
|
-
if(len >
|
415
|
-
|
416
|
-
|
417
|
+
long len = RSTRING_LEN(v);
|
418
|
+
|
419
|
+
if(RB_UNLIKELY(len > 0xffffffffL)) {
|
420
|
+
rb_raise(rb_eArgError, "size of string is too long to pack: %lu bytes should be <= %ld", len, 0xffffffffL);
|
421
|
+
UNREACHABLE_RETURN();
|
422
|
+
}
|
423
|
+
|
424
|
+
if (RB_UNLIKELY(pk->compatibility_mode)) {
|
425
|
+
msgpack_packer_write_raw_header(pk, (unsigned int)len);
|
426
|
+
msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
|
427
|
+
return;
|
417
428
|
}
|
418
429
|
|
419
|
-
int encindex =
|
420
|
-
if(msgpack_packer_is_binary(v, encindex)
|
430
|
+
int encindex = ENCODING_GET_INLINED(v);
|
431
|
+
if(msgpack_packer_is_binary(v, encindex)) {
|
421
432
|
/* write ASCII-8BIT string using Binary type */
|
422
433
|
msgpack_packer_write_bin_header(pk, (unsigned int)len);
|
423
434
|
msgpack_buffer_append_string(PACKER_BUFFER_(pk), v);
|
424
435
|
} else {
|
425
436
|
/* write UTF-8, US-ASCII, or 7bit-safe ascii-compatible string using String type directly */
|
426
437
|
/* in compatibility mode, packer packs String values as is */
|
427
|
-
if(!
|
438
|
+
if(RB_UNLIKELY(!msgpack_packer_is_utf8_compat_string(v, encindex))) {
|
428
439
|
/* transcode other strings to UTF-8 and write using String type */
|
429
440
|
VALUE enc = rb_enc_from_encoding(rb_utf8_encoding()); /* rb_enc_from_encoding_index is not extern */
|
430
441
|
v = rb_str_encode(v, enc, 0, Qnil);
|
@@ -453,11 +464,7 @@ static inline void msgpack_packer_write_symbol_value(msgpack_packer_t* pk, VALUE
|
|
453
464
|
|
454
465
|
static inline void msgpack_packer_write_fixnum_value(msgpack_packer_t* pk, VALUE v)
|
455
466
|
{
|
456
|
-
#ifdef JRUBY
|
457
|
-
msgpack_packer_write_long(pk, FIXNUM_P(v) ? FIX2LONG(v) : rb_num2ll(v));
|
458
|
-
#else
|
459
467
|
msgpack_packer_write_long(pk, FIX2LONG(v));
|
460
|
-
#endif
|
461
468
|
}
|
462
469
|
|
463
470
|
static inline void msgpack_packer_write_bignum_value(msgpack_packer_t* pk, VALUE v)
|