msgpack 1.7.1 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c2caf680ff4cafade89a14da270f52528ba2a862cea237ad028ba27fa6a93e9b
4
- data.tar.gz: 01d8cc906d925acbd32a88c22f6ace7f5534a0b447be1736ea58cfdf9abb85b6
3
+ metadata.gz: efdb772bf54b74587a6c99e9513f9c16c78bbef3e5e3c17064a4be79fd5adb7a
4
+ data.tar.gz: d2f74cb1115947f5337cd730b6283fac61ceeed3a9457597cc90155b3dc93d7e
5
5
  SHA512:
6
- metadata.gz: 48fcd12bfb13741d88a17e1a261cd742c903ba53cb1959a88b7083c2344817d90cfa898e775c136d785fbfcdd76be14b6dc5fc8ba3638a13ecabbb2d1387dbee
7
- data.tar.gz: 310f3ea573cf41b15c47bc0033121496bd8ab7a723ef3667a96a38bc286feb79536ac5aeb8d27c3c71568149f93c8f36cc9ca8e050595e27ff87762cbccfe2dd
6
+ metadata.gz: 92da2466eac162f0d6d319496d7af3dbbef0d76b77c3aefb4787213e3b957464603859101ae28ac9620be82feb29278b2ed601adf2fd9d9811b364c258061d4b
7
+ data.tar.gz: a469586178eb44bbd50abf4cb34684f6a3fd4038063a80d9b3bf7f5cb9e6a2b34c1fed853be669f0bfb356d618efff3a2eaa51b86dd34d5427787a05453b8e11
data/ChangeLog CHANGED
@@ -1,3 +1,25 @@
1
+ 2025-02-06 1.8.0
2
+
3
+ * Numerous small optimizations.
4
+ * Added `key_cache` option to `Unpacker`.
5
+
6
+ 2024-11-11 1.7.5
7
+
8
+ * Rerelease 1.7.4 with fixed java package.
9
+
10
+ 2024-11-11 1.7.4
11
+
12
+ * Fixed a potental memory leak when recursive unpacker raise.
13
+
14
+ 2024-10-03 1.7.3
15
+
16
+ * Limit initial containers pre-allocation to `SHRT_MAX` (32k) entries.
17
+
18
+ 2023-07-18 1.7.2:
19
+
20
+ * Fix a potential GC bug when packing data using recursive extensions and buffers containing over 512KkiB of data (See #341).
21
+ * Fix a regression where feeding an empty string to an Unpacker would be considered like the end of the buffer.
22
+
1
23
  2023-05-19 1.7.1:
2
24
 
3
25
  * Fix JRuby 9.4 compatibility.
data/README.md CHANGED
@@ -8,15 +8,24 @@ and typical short strings only require an extra byte in addition to the strings
8
8
  If you ever wished to use JSON for convenience (storing an image with metadata) but could
9
9
  not for technical reasons (binary data, size, speed...), MessagePack is a perfect replacement.
10
10
 
11
- require 'msgpack'
12
- msg = [1,2,3].to_msgpack #=> "\x93\x01\x02\x03"
13
- MessagePack.unpack(msg) #=> [1,2,3]
11
+ ```ruby
12
+ require 'msgpack'
13
+ msg = [1,2,3].to_msgpack #=> "\x93\x01\x02\x03"
14
+ MessagePack.unpack(msg) #=> [1,2,3]
15
+ ```
16
+
17
+ Add msgpack to your Gemfile to install with Bundler:
18
+
19
+ ```ruby
20
+ # Gemfile
21
+ gem 'msgpack'
22
+ ```
14
23
 
15
- Use RubyGems to install:
24
+ Or, use RubyGems to install:
16
25
 
17
26
  gem install msgpack
18
27
 
19
- or build msgpack-ruby and install:
28
+ Or, build msgpack-ruby and install from a checked-out msgpack-ruby repository:
20
29
 
21
30
  bundle
22
31
  rake
@@ -27,11 +36,11 @@ or build msgpack-ruby and install:
27
36
 
28
37
  * Create REST API returing MessagePack using Rails + [RABL](https://github.com/nesquena/rabl)
29
38
  * Store objects efficiently serialized by msgpack on memcached or Redis
30
- * In fact Redis supports msgpack in [EVAL-scripts](http://redis.io/commands/eval)
39
+ * In fact Redis supports msgpack in [EVAL-scripts](https://redis.io/docs/latest/commands/eval/)
31
40
  * Upload data in efficient format from mobile devices such as smartphones
32
41
  * MessagePack works on iPhone/iPad and Android. See also [Objective-C](https://github.com/msgpack/msgpack-objectivec) and [Java](https://github.com/msgpack/msgpack-java) implementations
33
42
  * Design a portable protocol to communicate with embedded devices
34
- * Check also [Fluentd](http://fluentd.org/) which is a log collector which uses msgpack for the log format (they say it uses JSON but actually it's msgpack, which is compatible with JSON)
43
+ * Check also [Fluentd](https://www.fluentd.org) which is a log collector which uses msgpack for the log format (they say it uses JSON but actually it's msgpack, which is compatible with JSON)
35
44
  * Exchange objects between software components written in different languages
36
45
  * You'll need a flexible but efficient format so that components exchange objects while keeping compatibility
37
46
 
@@ -128,9 +137,9 @@ being serialized altogether by throwing an exception:
128
137
 
129
138
  ```ruby
130
139
  class Symbol
131
- def to_msgpack_ext
132
- raise "Serialization of symbols prohibited"
133
- end
140
+ def to_msgpack_ext
141
+ raise "Serialization of symbols prohibited"
142
+ end
134
143
  end
135
144
 
136
145
  MessagePack::DefaultFactory.register_type(0x00, Symbol)
@@ -276,8 +285,8 @@ If this directory has Gemfile.lock (generated with MRI), remove it beforehand.
276
285
 
277
286
  ## Updating documents
278
287
 
279
- Online documents (http://ruby.msgpack.org) is generated from gh-pages branch.
280
- Following commands update documents in gh-pages branch:
288
+ Online documentation (https://ruby.msgpack.org) is generated from the gh-pages branch.
289
+ To update documents in gh-pages branch:
281
290
 
282
291
  bundle exec rake doc
283
292
  git checkout gh-pages
@@ -54,8 +54,8 @@ public class ExtensionRegistry {
54
54
  return hash;
55
55
  }
56
56
 
57
- public void put(RubyModule mod, int typeId, boolean recursive, IRubyObject packerProc, IRubyObject packerArg, IRubyObject unpackerProc, IRubyObject unpackerArg) {
58
- ExtensionEntry entry = new ExtensionEntry(mod, typeId, recursive, packerProc, packerArg, unpackerProc, unpackerArg);
57
+ public void put(RubyModule mod, int typeId, boolean recursive, IRubyObject packerProc, IRubyObject unpackerProc) {
58
+ ExtensionEntry entry = new ExtensionEntry(mod, typeId, recursive, packerProc, unpackerProc);
59
59
  extensionsByModule.put(mod, entry);
60
60
  extensionsByTypeId[typeId + 128] = entry;
61
61
  extensionsByAncestor.clear();
@@ -114,18 +114,14 @@ public class ExtensionRegistry {
114
114
  private final int typeId;
115
115
  private final boolean recursive;
116
116
  private final IRubyObject packerProc;
117
- private final IRubyObject packerArg;
118
117
  private final IRubyObject unpackerProc;
119
- private final IRubyObject unpackerArg;
120
118
 
121
- public ExtensionEntry(RubyModule mod, int typeId, boolean recursive, IRubyObject packerProc, IRubyObject packerArg, IRubyObject unpackerProc, IRubyObject unpackerArg) {
119
+ public ExtensionEntry(RubyModule mod, int typeId, boolean recursive, IRubyObject packerProc, IRubyObject unpackerProc) {
122
120
  this.mod = mod;
123
121
  this.typeId = typeId;
124
122
  this.recursive = recursive;
125
123
  this.packerProc = packerProc;
126
- this.packerArg = packerArg;
127
124
  this.unpackerProc = unpackerProc;
128
- this.unpackerArg = unpackerArg;
129
125
  }
130
126
 
131
127
  public RubyModule getExtensionModule() {
@@ -157,11 +153,11 @@ public class ExtensionRegistry {
157
153
  }
158
154
 
159
155
  public RubyArray<?> toPackerTuple(ThreadContext ctx) {
160
- return ctx.runtime.newArray(new IRubyObject[] {ctx.runtime.newFixnum(typeId), packerProc, packerArg});
156
+ return ctx.runtime.newArray(new IRubyObject[] {ctx.runtime.newFixnum(typeId), packerProc});
161
157
  }
162
158
 
163
159
  public RubyArray<?> toUnpackerTuple(ThreadContext ctx) {
164
- return ctx.runtime.newArray(new IRubyObject[] {mod, unpackerProc, unpackerArg});
160
+ return ctx.runtime.newArray(new IRubyObject[] {mod, unpackerProc});
165
161
  }
166
162
 
167
163
  public IRubyObject[] toPackerProcTypeIdPair(ThreadContext ctx) {
@@ -80,43 +80,15 @@ public class Factory extends RubyObject {
80
80
  });
81
81
  }
82
82
 
83
- @JRubyMethod(name = "register_type", required = 2, optional = 1)
84
- public IRubyObject registerType(ThreadContext ctx, IRubyObject[] args) {
83
+ @JRubyMethod(name = "register_type_internal", required = 3, visibility = PRIVATE)
84
+ public IRubyObject registerTypeInternal(ThreadContext ctx, IRubyObject type, IRubyObject mod, IRubyObject opts) {
85
85
  testFrozen("MessagePack::Factory");
86
86
 
87
87
  Ruby runtime = ctx.runtime;
88
- IRubyObject type = args[0];
89
- IRubyObject mod = args[1];
90
-
91
- IRubyObject packerArg;
92
- IRubyObject unpackerArg;
93
-
94
- RubyHash options = null;
95
-
96
- if (args.length == 2) {
97
- packerArg = runtime.newSymbol("to_msgpack_ext");
98
- unpackerArg = runtime.newSymbol("from_msgpack_ext");
99
- } else if (args.length == 3) {
100
- if (args[args.length - 1] instanceof RubyHash) {
101
- options = (RubyHash) args[args.length - 1];
102
- packerArg = options.fastARef(runtime.newSymbol("packer"));
103
- if (packerArg != null && packerArg.isNil()) {
104
- packerArg = null;
105
- }
106
- unpackerArg = options.fastARef(runtime.newSymbol("unpacker"));
107
- if (unpackerArg != null && unpackerArg.isNil()) {
108
- unpackerArg = null;
109
- }
110
- IRubyObject optimizedSymbolsParsingArg = options.fastARef(runtime.newSymbol("optimized_symbols_parsing"));
111
- if (optimizedSymbolsParsingArg != null && optimizedSymbolsParsingArg.isTrue()) {
112
- throw runtime.newArgumentError("JRuby implementation does not support the optimized_symbols_parsing option");
113
- }
114
- } else {
115
- throw runtime.newArgumentError(String.format("expected Hash but found %s.", args[args.length - 1].getType().getName()));
116
- }
117
- } else {
118
- throw runtime.newArgumentError(String.format("wrong number of arguments (%d for 2..3)", 2 + args.length));
119
- }
88
+ RubyHash options = (RubyHash) opts;
89
+
90
+ IRubyObject packerProc = options.fastARef(runtime.newSymbol("packer"));
91
+ IRubyObject unpackerProc = options.fastARef(runtime.newSymbol("unpacker"));
120
92
 
121
93
  long typeId = ((RubyFixnum) type).getLongValue();
122
94
  if (typeId < -128 || typeId > 127) {
@@ -128,21 +100,6 @@ public class Factory extends RubyObject {
128
100
  }
129
101
  RubyModule extModule = (RubyModule) mod;
130
102
 
131
- IRubyObject packerProc = runtime.getNil();
132
- IRubyObject unpackerProc = runtime.getNil();
133
- if (packerArg != null) {
134
- packerProc = packerArg.callMethod(ctx, "to_proc");
135
- }
136
- if (unpackerArg != null) {
137
- if (unpackerArg instanceof RubyString || unpackerArg instanceof RubySymbol) {
138
- unpackerProc = extModule.method(unpackerArg.callMethod(ctx, "to_sym"));
139
- } else if (unpackerArg instanceof RubyProc || unpackerArg instanceof RubyMethod) {
140
- unpackerProc = unpackerArg;
141
- } else {
142
- unpackerProc = unpackerArg.callMethod(ctx, "method", runtime.newSymbol("call"));
143
- }
144
- }
145
-
146
103
  boolean recursive = false;
147
104
  if (options != null) {
148
105
  IRubyObject recursiveExtensionArg = options.fastARef(runtime.newSymbol("recursive"));
@@ -151,7 +108,7 @@ public class Factory extends RubyObject {
151
108
  }
152
109
  }
153
110
 
154
- extensionRegistry.put(extModule, (int) typeId, recursive, packerProc, packerArg, unpackerProc, unpackerArg);
111
+ extensionRegistry.put(extModule, (int) typeId, recursive, packerProc, unpackerProc);
155
112
 
156
113
  if (extModule == runtime.getSymbol() && !packerProc.isNil()) {
157
114
  hasSymbolExtType = true;
@@ -93,28 +93,11 @@ public class Packer extends RubyObject {
93
93
  return registry.toInternalPackerRegistry(ctx);
94
94
  }
95
95
 
96
- @JRubyMethod(name = "register_type", required = 2, optional = 1)
97
- public IRubyObject registerType(ThreadContext ctx, IRubyObject[] args, final Block block) {
96
+ @JRubyMethod(name = "register_type_internal", required = 3, visibility = PRIVATE)
97
+ public IRubyObject registerType(ThreadContext ctx, IRubyObject type, IRubyObject mod, IRubyObject proc) {
98
98
  testFrozen("MessagePack::Packer");
99
99
 
100
100
  Ruby runtime = ctx.runtime;
101
- IRubyObject type = args[0];
102
- IRubyObject mod = args[1];
103
-
104
- IRubyObject arg;
105
- IRubyObject proc;
106
- if (args.length == 2) {
107
- if (! block.isGiven()) {
108
- throw runtime.newLocalJumpErrorNoBlock();
109
- }
110
- proc = block.getProcObject();
111
- arg = proc;
112
- } else if (args.length == 3) {
113
- arg = args[2];
114
- proc = arg.callMethod(ctx, "to_proc");
115
- } else {
116
- throw runtime.newArgumentError(String.format("wrong number of arguments (%d for 2..3)", 2 + args.length));
117
- }
118
101
 
119
102
  long typeId = ((RubyFixnum) type).getLongValue();
120
103
  if (typeId < -128 || typeId > 127) {
@@ -126,7 +109,7 @@ public class Packer extends RubyObject {
126
109
  }
127
110
  RubyModule extModule = (RubyModule) mod;
128
111
 
129
- registry.put(extModule, (int) typeId, false, proc, arg, null, null);
112
+ registry.put(extModule, (int) typeId, false, proc, null);
130
113
 
131
114
  if (extModule == runtime.getSymbol() && !proc.isNil()) {
132
115
  encoder.hasSymbolExtType = true;
@@ -126,39 +126,23 @@ public class Unpacker extends RubyObject {
126
126
  return registry.toInternalUnpackerRegistry(ctx);
127
127
  }
128
128
 
129
- @JRubyMethod(name = "register_type", required = 1, optional = 2)
130
- public IRubyObject registerType(ThreadContext ctx, IRubyObject[] args, final Block block) {
129
+ @JRubyMethod(name = "register_type_internal", required = 3, visibility = PRIVATE)
130
+ public IRubyObject registerTypeInternal(ThreadContext ctx, IRubyObject type, IRubyObject mod, IRubyObject proc) {
131
131
  testFrozen("MessagePack::Unpacker");
132
132
 
133
133
  Ruby runtime = ctx.runtime;
134
- IRubyObject type = args[0];
135
-
136
- RubyModule extModule;
137
- IRubyObject arg;
138
- IRubyObject proc;
139
- if (args.length == 1) {
140
- if (! block.isGiven()) {
141
- throw runtime.newLocalJumpErrorNoBlock();
142
- }
143
- proc = RubyProc.newProc(runtime, block, block.type);
144
- if (proc == null)
145
- System.err.println("proc from Block is null");
146
- arg = proc;
147
- extModule = null;
148
- } else if (args.length == 3) {
149
- extModule = (RubyModule) args[1];
150
- arg = args[2];
151
- proc = extModule.method(arg);
152
- } else {
153
- throw runtime.newArgumentError(String.format("wrong number of arguments (%d for 1 or 3)", 2 + args.length));
154
- }
155
134
 
156
135
  long typeId = ((RubyFixnum) type).getLongValue();
157
136
  if (typeId < -128 || typeId > 127) {
158
137
  throw runtime.newRangeError(String.format("integer %d too big to convert to `signed char'", typeId));
159
138
  }
160
139
 
161
- registry.put(extModule, (int) typeId, false, null, null, proc, arg);
140
+ RubyModule extModule = null;
141
+ if (mod != runtime.getNil()) {
142
+ extModule = (RubyModule)mod;
143
+ }
144
+
145
+ registry.put(extModule, (int) typeId, false, null, proc);
162
146
  return runtime.getNil();
163
147
  }
164
148
 
data/ext/msgpack/buffer.c CHANGED
@@ -251,12 +251,14 @@ bool _msgpack_buffer_read_all2(msgpack_buffer_t* b, char* buffer, size_t length)
251
251
 
252
252
  static inline msgpack_buffer_chunk_t* _msgpack_buffer_alloc_new_chunk(msgpack_buffer_t* b)
253
253
  {
254
- msgpack_buffer_chunk_t* reuse = b->free_list;
255
- if(reuse == NULL) {
256
- return xmalloc(sizeof(msgpack_buffer_chunk_t));
254
+ msgpack_buffer_chunk_t* chunk = b->free_list;
255
+ if (chunk) {
256
+ b->free_list = b->free_list->next;
257
+ } else {
258
+ chunk = xmalloc(sizeof(msgpack_buffer_chunk_t));
257
259
  }
258
- b->free_list = b->free_list->next;
259
- return reuse;
260
+ memset(chunk, 0, sizeof(msgpack_buffer_chunk_t));
261
+ return chunk;
260
262
  }
261
263
 
262
264
  static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
@@ -298,7 +300,7 @@ static inline void _msgpack_buffer_add_new_chunk(msgpack_buffer_t* b)
298
300
  static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE string)
299
301
  {
300
302
  VALUE mapped_string;
301
- if(ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit && RTEST(rb_obj_frozen_p(string))) {
303
+ if(ENCODING_GET_INLINED(string) == msgpack_rb_encindex_ascii8bit && RB_OBJ_FROZEN_RAW(string)) {
302
304
  mapped_string = string;
303
305
  } else {
304
306
  mapped_string = rb_str_dup(string);
@@ -307,8 +309,9 @@ static inline void _msgpack_buffer_append_reference(msgpack_buffer_t* b, VALUE s
307
309
 
308
310
  _msgpack_buffer_add_new_chunk(b);
309
311
 
310
- char* data = RSTRING_PTR(mapped_string);
311
- size_t length = RSTRING_LEN(mapped_string);
312
+ char* data;
313
+ size_t length;
314
+ RSTRING_GETMEM(mapped_string, data, length);
312
315
 
313
316
  b->tail.first = (char*) data;
314
317
  b->tail.last = (char*) data + length;
@@ -328,7 +331,7 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string)
328
331
  {
329
332
  if(b->io != Qnil) {
330
333
  msgpack_buffer_flush(b);
331
- if (ENCODING_GET(string) == msgpack_rb_encindex_ascii8bit) {
334
+ if (ENCODING_GET_INLINED(string) == msgpack_rb_encindex_ascii8bit) {
332
335
  rb_funcall(b->io, b->io_write_all_method, 1, string);
333
336
  } else {
334
337
  msgpack_buffer_append(b, RSTRING_PTR(string), RSTRING_LEN(string));
data/ext/msgpack/buffer.h CHANGED
@@ -81,20 +81,6 @@ struct msgpack_buffer_chunk_t {
81
81
  bool rmem;
82
82
  };
83
83
 
84
- union msgpack_buffer_cast_block_t {
85
- char buffer[8];
86
- uint8_t u8;
87
- uint16_t u16;
88
- uint32_t u32;
89
- uint64_t u64;
90
- int8_t i8;
91
- int16_t i16;
92
- int32_t i32;
93
- int64_t i64;
94
- float f;
95
- double d;
96
- };
97
-
98
84
  struct msgpack_buffer_t {
99
85
  char* read_buffer;
100
86
  char* tail_buffer_end;
@@ -107,8 +93,6 @@ struct msgpack_buffer_t {
107
93
  char* rmem_end;
108
94
  void** rmem_owner;
109
95
 
110
- union msgpack_buffer_cast_block_t cast_block;
111
-
112
96
  VALUE io;
113
97
  VALUE io_buffer;
114
98
  ID io_write_all_method;
@@ -253,13 +237,14 @@ void _msgpack_buffer_append_long_string(msgpack_buffer_t* b, VALUE string);
253
237
 
254
238
  static inline size_t msgpack_buffer_append_string(msgpack_buffer_t* b, VALUE string)
255
239
  {
256
- size_t length = RSTRING_LEN(string);
240
+ size_t length;
241
+ char *ptr;
242
+ RSTRING_GETMEM(string, ptr, length);
257
243
 
258
244
  if(length > b->write_reference_threshold) {
259
245
  _msgpack_buffer_append_long_string(b, string);
260
-
261
246
  } else {
262
- msgpack_buffer_append(b, RSTRING_PTR(string), length);
247
+ msgpack_buffer_append(b, ptr, length);
263
248
  }
264
249
 
265
250
  return length;
@@ -268,7 +253,9 @@ static inline size_t msgpack_buffer_append_string(msgpack_buffer_t* b, VALUE str
268
253
  static inline size_t msgpack_buffer_append_string_reference(msgpack_buffer_t* b, VALUE string)
269
254
  {
270
255
  size_t length = RSTRING_LEN(string);
271
- _msgpack_buffer_append_long_string(b, string);
256
+ if (length > 0) {
257
+ _msgpack_buffer_append_long_string(b, string);
258
+ }
272
259
  return length;
273
260
  }
274
261
 
@@ -381,14 +368,6 @@ static inline size_t msgpack_buffer_skip_nonblock(msgpack_buffer_t* b, size_t le
381
368
  return length;
382
369
  }
383
370
 
384
- static inline union msgpack_buffer_cast_block_t* msgpack_buffer_read_cast_block(msgpack_buffer_t* b, size_t n)
385
- {
386
- if(!msgpack_buffer_read_all(b, b->cast_block.buffer, n)) {
387
- return NULL;
388
- }
389
- return &b->cast_block;
390
- }
391
-
392
371
  size_t msgpack_buffer_read_to_string_nonblock(msgpack_buffer_t* b, VALUE string, size_t length);
393
372
 
394
373
  static inline size_t msgpack_buffer_read_to_string(msgpack_buffer_t* b, VALUE string, size_t length)
@@ -495,4 +474,131 @@ static inline VALUE msgpack_buffer_read_top_as_symbol(msgpack_buffer_t* b, size_
495
474
  return rb_str_intern(msgpack_buffer_read_top_as_string(b, length, true, utf8));
496
475
  }
497
476
 
477
+ // Hash keys are likely to be repeated, and are frozen.
478
+ // As such we can re-use them if we keep a cache of the ones we've seen so far,
479
+ // and save much more expensive lookups into the global fstring table.
480
+ // This cache implementation is deliberately simple, as we're optimizing for compactness,
481
+ // to be able to fit easily embeded inside msgpack_unpacker_t.
482
+ // As such, binary search into a sorted array gives a good tradeoff between compactness and
483
+ // performance.
484
+ #define MSGPACK_KEY_CACHE_CAPACITY 63
485
+
486
+ typedef struct msgpack_key_cache_t msgpack_key_cache_t;
487
+ struct msgpack_key_cache_t {
488
+ int length;
489
+ VALUE entries[MSGPACK_KEY_CACHE_CAPACITY];
490
+ };
491
+
492
+ static inline VALUE build_interned_string(const char *str, const long length)
493
+ {
494
+ # ifdef HAVE_RB_ENC_INTERNED_STR
495
+ return rb_enc_interned_str(str, length, rb_utf8_encoding());
496
+ # else
497
+ VALUE rstring = rb_utf8_str_new(str, length);
498
+ return rb_funcall(rb_str_freeze(rstring), s_uminus, 0);
499
+ # endif
500
+ }
501
+
502
+ static inline VALUE build_symbol(const char *str, const long length)
503
+ {
504
+ return rb_str_intern(build_interned_string(str, length));
505
+ }
506
+
507
+ static void rvalue_cache_insert_at(msgpack_key_cache_t *cache, int index, VALUE rstring)
508
+ {
509
+ MEMMOVE(&cache->entries[index + 1], &cache->entries[index], VALUE, cache->length - index);
510
+ cache->length++;
511
+ cache->entries[index] = rstring;
512
+ }
513
+
514
+ static inline int rstring_cache_cmp(const char *str, const long length, VALUE rstring)
515
+ {
516
+ long rstring_length = RSTRING_LEN(rstring);
517
+ if (length == rstring_length) {
518
+ return memcmp(str, RSTRING_PTR(rstring), length);
519
+ } else {
520
+ return (int)(length - rstring_length);
521
+ }
522
+ }
523
+
524
+ static VALUE rstring_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length)
525
+ {
526
+ int low = 0;
527
+ int high = cache->length - 1;
528
+ int mid = 0;
529
+ int last_cmp = 0;
530
+
531
+ while (low <= high) {
532
+ mid = (high + low) >> 1;
533
+ VALUE entry = cache->entries[mid];
534
+ last_cmp = rstring_cache_cmp(str, length, entry);
535
+
536
+ if (last_cmp == 0) {
537
+ return entry;
538
+ } else if (last_cmp > 0) {
539
+ low = mid + 1;
540
+ } else {
541
+ high = mid - 1;
542
+ }
543
+ }
544
+
545
+ VALUE rstring = build_interned_string(str, length);
546
+
547
+ if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) {
548
+ if (last_cmp > 0) {
549
+ mid += 1;
550
+ }
551
+
552
+ rvalue_cache_insert_at(cache, mid, rstring);
553
+ }
554
+ return rstring;
555
+ }
556
+
557
+ static VALUE rsymbol_cache_fetch(msgpack_key_cache_t *cache, const char *str, const long length)
558
+ {
559
+ int low = 0;
560
+ int high = cache->length - 1;
561
+ int mid = 0;
562
+ int last_cmp = 0;
563
+
564
+ while (low <= high) {
565
+ mid = (high + low) >> 1;
566
+ VALUE entry = cache->entries[mid];
567
+ last_cmp = rstring_cache_cmp(str, length, rb_sym2str(entry));
568
+
569
+ if (last_cmp == 0) {
570
+ return entry;
571
+ } else if (last_cmp > 0) {
572
+ low = mid + 1;
573
+ } else {
574
+ high = mid - 1;
575
+ }
576
+ }
577
+
578
+ VALUE rsymbol = build_symbol(str, length);
579
+
580
+ if (cache->length < MSGPACK_KEY_CACHE_CAPACITY) {
581
+ if (last_cmp > 0) {
582
+ mid += 1;
583
+ }
584
+
585
+ rvalue_cache_insert_at(cache, mid, rsymbol);
586
+ }
587
+ return rsymbol;
588
+ }
589
+
590
+ static inline VALUE msgpack_buffer_read_top_as_interned_symbol(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length)
591
+ {
592
+ VALUE result = rsymbol_cache_fetch(cache, b->read_buffer, length);
593
+ _msgpack_buffer_consumed(b, length);
594
+ return result;
595
+ }
596
+
597
+ static inline VALUE msgpack_buffer_read_top_as_interned_string(msgpack_buffer_t* b, msgpack_key_cache_t *cache, size_t length)
598
+ {
599
+ VALUE result = rstring_cache_fetch(cache, b->read_buffer, length);
600
+ _msgpack_buffer_consumed(b, length);
601
+ return result;
602
+ }
603
+
498
604
  #endif
@@ -21,7 +21,8 @@
21
21
  #include "buffer.h"
22
22
  #include "buffer_class.h"
23
23
 
24
- VALUE cMessagePack_Buffer;
24
+ VALUE cMessagePack_Buffer = Qnil;
25
+ VALUE cMessagePack_HeldBuffer = Qnil;
25
26
 
26
27
  static ID s_read;
27
28
  static ID s_readpartial;
@@ -34,6 +35,73 @@ static VALUE sym_read_reference_threshold;
34
35
  static VALUE sym_write_reference_threshold;
35
36
  static VALUE sym_io_buffer_size;
36
37
 
38
+ typedef struct msgpack_held_buffer_t msgpack_held_buffer_t;
39
+ struct msgpack_held_buffer_t {
40
+ size_t size;
41
+ VALUE mapped_strings[];
42
+ };
43
+
44
+ static void HeldBuffer_mark(void *data)
45
+ {
46
+ msgpack_held_buffer_t* held_buffer = (msgpack_held_buffer_t*)data;
47
+ for (size_t index = 0; index < held_buffer->size; index++) {
48
+ rb_gc_mark(held_buffer->mapped_strings[index]);
49
+ }
50
+ }
51
+
52
+ static size_t HeldBuffer_memsize(const void *data)
53
+ {
54
+ const msgpack_held_buffer_t* held_buffer = (msgpack_held_buffer_t*)data;
55
+ return sizeof(size_t) + sizeof(VALUE) * held_buffer->size;
56
+ }
57
+
58
+ static const rb_data_type_t held_buffer_data_type = {
59
+ .wrap_struct_name = "msgpack:held_buffer",
60
+ .function = {
61
+ .dmark = HeldBuffer_mark,
62
+ .dfree = RUBY_TYPED_DEFAULT_FREE,
63
+ .dsize = HeldBuffer_memsize,
64
+ },
65
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY
66
+ };
67
+
68
+ VALUE MessagePack_Buffer_hold(msgpack_buffer_t* buffer)
69
+ {
70
+ size_t mapped_strings_count = 0;
71
+ msgpack_buffer_chunk_t* c = buffer->head;
72
+ while (c != &buffer->tail) {
73
+ if (c->mapped_string != NO_MAPPED_STRING) {
74
+ mapped_strings_count++;
75
+ }
76
+ c = c->next;
77
+ }
78
+ if (c->mapped_string != NO_MAPPED_STRING) {
79
+ mapped_strings_count++;
80
+ }
81
+
82
+ if (mapped_strings_count == 0) {
83
+ return Qnil;
84
+ }
85
+
86
+ msgpack_held_buffer_t* held_buffer = xmalloc(sizeof(msgpack_held_buffer_t) + mapped_strings_count * sizeof(VALUE));
87
+
88
+ c = buffer->head;
89
+ mapped_strings_count = 0;
90
+ while (c != &buffer->tail) {
91
+ if (c->mapped_string != NO_MAPPED_STRING) {
92
+ held_buffer->mapped_strings[mapped_strings_count] = c->mapped_string;
93
+ mapped_strings_count++;
94
+ }
95
+ c = c->next;
96
+ }
97
+ if (c->mapped_string != NO_MAPPED_STRING) {
98
+ held_buffer->mapped_strings[mapped_strings_count] = c->mapped_string;
99
+ mapped_strings_count++;
100
+ }
101
+ held_buffer->size = mapped_strings_count;
102
+ return TypedData_Wrap_Struct(cMessagePack_HeldBuffer, &held_buffer_data_type, held_buffer);
103
+ }
104
+
37
105
 
38
106
  #define CHECK_STRING_TYPE(value) \
39
107
  value = rb_check_string_type(value); \
@@ -520,6 +588,9 @@ void MessagePack_Buffer_module_init(VALUE mMessagePack)
520
588
 
521
589
  msgpack_buffer_static_init();
522
590
 
591
+ cMessagePack_HeldBuffer = rb_define_class_under(mMessagePack, "HeldBuffer", rb_cBasicObject);
592
+ rb_undef_alloc_func(cMessagePack_HeldBuffer);
593
+
523
594
  cMessagePack_Buffer = rb_define_class_under(mMessagePack, "Buffer", rb_cObject);
524
595
 
525
596
  rb_define_alloc_func(cMessagePack_Buffer, Buffer_alloc);
@@ -25,6 +25,7 @@ extern VALUE cMessagePack_Buffer;
25
25
  void MessagePack_Buffer_module_init(VALUE mMessagePack);
26
26
 
27
27
  VALUE MessagePack_Buffer_wrap(msgpack_buffer_t* b, VALUE owner);
28
+ VALUE MessagePack_Buffer_hold(msgpack_buffer_t* b);
28
29
 
29
30
  void MessagePack_Buffer_set_options(msgpack_buffer_t* b, VALUE io, VALUE options);
30
31