oj 3.12.3 → 3.13.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (56) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -3
  3. data/ext/oj/buf.h +9 -0
  4. data/ext/oj/cache.c +341 -0
  5. data/ext/oj/cache.h +21 -0
  6. data/ext/oj/compat.c +7 -22
  7. data/ext/oj/custom.c +15 -17
  8. data/ext/oj/debug.c +132 -0
  9. data/ext/oj/dump.c +12 -15
  10. data/ext/oj/dump_compat.c +3 -3
  11. data/ext/oj/dump_object.c +9 -9
  12. data/ext/oj/dump_strict.c +3 -3
  13. data/ext/oj/err.h +19 -0
  14. data/ext/oj/extconf.rb +5 -0
  15. data/ext/oj/fast.c +7 -18
  16. data/ext/oj/intern.c +281 -0
  17. data/ext/oj/intern.h +26 -0
  18. data/ext/oj/mimic_json.c +2 -2
  19. data/ext/oj/object.c +15 -92
  20. data/ext/oj/odd.c +1 -1
  21. data/ext/oj/oj.c +117 -94
  22. data/ext/oj/oj.h +1 -1
  23. data/ext/oj/parse.c +5 -5
  24. data/ext/oj/parser.c +1483 -0
  25. data/ext/oj/parser.h +90 -0
  26. data/ext/oj/rails.c +5 -5
  27. data/ext/oj/resolve.c +2 -20
  28. data/ext/oj/rxclass.c +1 -1
  29. data/ext/oj/saj.c +1 -1
  30. data/ext/oj/saj2.c +348 -0
  31. data/ext/oj/scp.c +1 -1
  32. data/ext/oj/sparse.c +2 -2
  33. data/ext/oj/stream_writer.c +4 -4
  34. data/ext/oj/strict.c +9 -27
  35. data/ext/oj/string_writer.c +2 -2
  36. data/ext/oj/usual.c +1252 -0
  37. data/ext/oj/validate.c +51 -0
  38. data/ext/oj/wab.c +14 -19
  39. data/lib/oj/error.rb +1 -1
  40. data/lib/oj/state.rb +8 -7
  41. data/lib/oj/version.rb +1 -1
  42. data/pages/Options.md +1 -1
  43. data/pages/Parser.md +309 -0
  44. data/pages/Rails.md +2 -2
  45. data/test/json_gem/json_generator_test.rb +1 -1
  46. data/test/mem.rb +33 -0
  47. data/test/perf_once.rb +58 -0
  48. data/test/perf_parser.rb +189 -0
  49. data/test/test_hash.rb +1 -1
  50. data/test/test_parser.rb +27 -0
  51. data/test/test_parser_saj.rb +245 -0
  52. data/test/test_parser_usual.rb +213 -0
  53. metadata +26 -5
  54. data/ext/oj/hash.c +0 -168
  55. data/ext/oj/hash.h +0 -21
  56. data/ext/oj/hash_test.c +0 -491
data/ext/oj/validate.c ADDED
@@ -0,0 +1,51 @@
1
+ // Copyright (c) 2021, Peter Ohler, All rights reserved.
2
+
3
+ #include "parser.h"
4
+
5
+ static void
6
+ noop(ojParser p) {
7
+ }
8
+
9
+ static VALUE
10
+ option(ojParser p, const char *key, VALUE value) {
11
+ rb_raise(rb_eArgError, "%s is not an option for the validate delegate", key);
12
+ return Qnil;
13
+ }
14
+
15
+ static VALUE
16
+ result(ojParser p) {
17
+ return Qnil;
18
+ }
19
+
20
+ static void
21
+ dfree(ojParser p) {
22
+ }
23
+
24
+ static void
25
+ mark(ojParser p) {
26
+ }
27
+
28
+ void oj_set_parser_validator(ojParser p) {
29
+ p->ctx = NULL;
30
+ Funcs end = p->funcs + 3;
31
+ Funcs f;
32
+
33
+ for (f = p->funcs; f < end; f++) {
34
+ f->add_null = noop;
35
+ f->add_true = noop;
36
+ f->add_false = noop;
37
+ f->add_int = noop;
38
+ f->add_float = noop;
39
+ f->add_big = noop;
40
+ f->add_str = noop;
41
+ f->open_array = noop;
42
+ f->close_array = noop;
43
+ f->open_object = noop;
44
+ f->close_object = noop;
45
+ }
46
+ p->option = option;
47
+ p->result = result;
48
+ p->free = dfree;
49
+ p->mark = mark;
50
+ p->start = noop;
51
+ }
data/ext/oj/wab.c CHANGED
@@ -10,7 +10,7 @@
10
10
  #include "dump.h"
11
11
  #include "encode.h"
12
12
  #include "err.h"
13
- #include "hash.h"
13
+ #include "intern.h"
14
14
  #include "oj.h"
15
15
  #include "parse.h"
16
16
  #include "trace.h"
@@ -233,7 +233,7 @@ static void dump_obj(VALUE obj, int depth, Out out, bool as_ok) {
233
233
  } else if (oj_bigdecimal_class == clas) {
234
234
  volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0);
235
235
 
236
- oj_dump_raw(rb_string_value_ptr((VALUE *)&rstr), (int)RSTRING_LEN(rstr), out);
236
+ oj_dump_raw(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), out);
237
237
  } else if (resolve_wab_uuid_class() == clas) {
238
238
  oj_dump_str(rb_funcall(obj, oj_to_s_id, 0), depth, out, false);
239
239
  } else if (resolve_uri_http_class() == clas) {
@@ -302,21 +302,16 @@ static VALUE calc_hash_key(ParseInfo pi, Val parent) {
302
302
 
303
303
  return rkey;
304
304
  }
305
- if (Yes != pi->options.cache_keys) {
306
- rkey = rb_str_new(parent->key, parent->klen);
307
- rkey = oj_encode(rkey);
308
- rkey = rb_str_intern(rkey);
309
-
310
- return rkey;
311
- }
312
- VALUE *slot;
313
-
314
- if (Qnil == (rkey = oj_sym_hash_get(parent->key, parent->klen, &slot))) {
315
- rkey = rb_str_new(parent->key, parent->klen);
316
- rkey = oj_encode(rkey);
317
- rkey = rb_str_intern(rkey);
318
- *slot = rkey;
319
- rb_gc_register_address(slot);
305
+ if (Yes == pi->options.cache_keys) {
306
+ rkey = oj_sym_intern(parent->key, parent->klen);
307
+ } else {
308
+ #if HAVE_RB_ENC_INTERNED_STR
309
+ rkey = rb_enc_interned_str(parent->key, parent->klen, oj_utf8_encoding);
310
+ #else
311
+ rkey = rb_utf8_str_new(parent->key, parent->klen);
312
+ rkey = rb_str_intern(rkey);
313
+ OBJ_FREEZE(rkey);
314
+ #endif
320
315
  }
321
316
  return rkey;
322
317
  }
@@ -475,8 +470,8 @@ static VALUE cstr_to_rstr(ParseInfo pi, const char *str, size_t len) {
475
470
  return rb_funcall(wab_uuid_clas, oj_new_id, 1, rb_str_new(str, len));
476
471
  }
477
472
  if (7 < len && 0 == strncasecmp("http://", str, 7)) {
478
- int err = 0;
479
- v = rb_str_new(str, len);
473
+ int err = 0;
474
+ v = rb_str_new(str, len);
480
475
  volatile VALUE uri = rb_protect(protect_uri, v, &err);
481
476
 
482
477
  if (0 == err) {
data/lib/oj/error.rb CHANGED
@@ -16,7 +16,7 @@ module Oj
16
16
  # An Exception that is raised if a file fails to load.
17
17
  LoadError = Class.new(Error)
18
18
 
19
- # An Exception that is raised if there is a conflict with mimicing JSON
19
+ # An Exception that is raised if there is a conflict with mimicking JSON
20
20
  MimicError = Class.new(Error)
21
21
 
22
22
  end # Oj
data/lib/oj/state.rb CHANGED
@@ -1,7 +1,7 @@
1
1
 
2
2
  module JSON
3
3
  module Ext
4
- module Generator
4
+ module Generator
5
5
  unless defined?(::JSON::Ext::Generator::State)
6
6
  # This class exists for json gem compatibility only. While it can be
7
7
  # used as the options for other than compatibility a simple Hash is
@@ -44,11 +44,11 @@ module JSON
44
44
  def to_h()
45
45
  return @attrs.dup
46
46
  end
47
-
47
+
48
48
  def to_hash()
49
49
  return @attrs.dup
50
50
  end
51
-
51
+
52
52
  def allow_nan?()
53
53
  @attrs[:allow_nan]
54
54
  end
@@ -104,7 +104,7 @@ module JSON
104
104
  def has_key?(k)
105
105
  @attrs.has_key?(key.to_sym)
106
106
  end
107
-
107
+
108
108
  # Handles requests for Hash values. Others cause an Exception to be raised.
109
109
  # @param [Symbol|String] m method symbol
110
110
  # @return [Boolean] the value of the specified instance variable.
@@ -116,11 +116,12 @@ module JSON
116
116
  m = m.to_s[0..-2]
117
117
  m = m.to_sym
118
118
  return @attrs.store(m, args[0])
119
- else
119
+ end
120
+ if @attrs.has_key?(m.to_sym)
120
121
  raise ArgumentError.new("wrong number of arguments (#{args.size} for 0 with #{m}) to method #{m}") unless args.nil? or args.empty?
121
122
  return @attrs[m.to_sym]
122
- end
123
- raise NoMethodError.new("undefined method #{m}", m)
123
+ end
124
+ return @attrs.send(m, *args, &block)
124
125
  end
125
126
 
126
127
  end # State
data/lib/oj/version.rb CHANGED
@@ -1,5 +1,5 @@
1
1
 
2
2
  module Oj
3
3
  # Current version of the module.
4
- VERSION = '3.12.3'
4
+ VERSION = '3.13.3'
5
5
  end
data/pages/Options.md CHANGED
@@ -268,7 +268,7 @@ Use symbols instead of strings for hash keys.
268
268
  ### :symbolize_names [Boolean]
269
269
 
270
270
  Like :symbol_keys has keys are made into symbols but only when
271
- mimicing the JSON gem and then only as the JSON gem honors it so
271
+ mimicking the JSON gem and then only as the JSON gem honors it so
272
272
  JSON.parse honors the option but JSON.load does not.
273
273
 
274
274
  ### :trace
data/pages/Parser.md ADDED
@@ -0,0 +1,309 @@
1
+ # How Oj Just Got Faster
2
+
3
+ The original Oj parser is a performant parser that supports several
4
+ modes. As of this writing Oj is almost 10 years old. A dinosaur by
5
+ coding standards. It was time for an upgrade. Dealing with issues over
6
+ the years it became clear that a few things could have been done
7
+ better. The new `Oj::Parser` is a response that not only attempts to
8
+ address some of the issues but also give the Oj parser a significant
9
+ boost in performance. `Oj::Parser` takes a different approach to JSON
10
+ parsing than the now legacy Oj parser. Not really a legacy parser yet
11
+ since the `Oj::Parser` is not a drop-in replacement for the JSON gem
12
+ but it is as much 3 times or more faster than the previous parser in
13
+ some modes.
14
+
15
+ ## Address Issues
16
+
17
+ There are a few features of the`Oj.load` parser that continue to be
18
+ the reason for many of the issue on the project. The most significant
19
+ area is compatibility with both Rails and the JSON gem as they battle
20
+ it out for which behavior will win out in any particular
21
+ situation. Most of the issues are on the writing or dumping side of
22
+ the JSON packages but some are present on the parsing as
23
+ well. Conversion of decimals is one area where the Rails and the JSON
24
+ gem vary. The `Oj::Parser` addresses this by allowing for completely
25
+ separate parser instances. Create a parser and configure it for the
26
+ situation and leave the others parsers on their own.
27
+
28
+ The `Oj::Parser` is mostly compatible with the JSON gem and Rails but
29
+ no claims are made that the behavior will be the same as either.
30
+
31
+ The most frequent issues that can addressed with the new parser are
32
+ around the handling of options. For `Oj.load` there is a set of
33
+ default options that can be set and the same options can be specified
34
+ for each call to parse or load. This approach as a couple of
35
+ downsides. One the defaults are shared across all calls to parse no
36
+ matter what the desire mode is. The second is that having to provide
37
+ all the options on each parse call incurs a performance penalty and is
38
+ just annoying to repeat the same set of options over may calls.
39
+
40
+ By localizing options to a specific parser instance there is never any
41
+ bleed over to other instances.
42
+
43
+ ## How
44
+
45
+ It's wonderful to wish for a faster parser that solves all the
46
+ annoyances of the previous parser but how was it done is a much more
47
+ interesting question to answer.
48
+
49
+ At the core, the API for parsing was changed. Instead of a sinle
50
+ global parser any number of parsers can be created and each is separate
51
+ from the others. The parser itself is able to rip through a JSON
52
+ string, stream, or file and then make calls to a delegate to process
53
+ the JSON elements according to the delegate behavior. This is similar
54
+ to the `Oj.load` parser but the new parser takes advantage of
55
+ character maps, reduced conditional branching, and calling function
56
+ pointers.
57
+
58
+ ### Options
59
+
60
+ As mentioned, one way to change the options issues was to change the
61
+ API. Instead of having a shared set of default options a separate
62
+ parser is created and configured for each use case. Options are set
63
+ with methods on the parser so no more guessing what options are
64
+ available. With options isolated to individual parsers there is no
65
+ unintended leakage to other parse use cases.
66
+
67
+ ### Structure
68
+
69
+ A relative small amount of time is spent in the actual parsing of JSON
70
+ in `Oj.load`. Most of the time is spent building the Ruby
71
+ Objects. Even cutting the parsing time in half only gives a 10%
72
+ improvement in performance but 10% is still an improvement.
73
+
74
+ The `Oj::Parser` is designed to reduce conditional branching. To do
75
+ that it uses character maps for the various states that the parser
76
+ goes through when parsing. There is no recursion as the JSON elements
77
+ are parsed. The use of a character maps for each parser state means
78
+ the parser function can and is re-entrant so partial blocks of JSON
79
+ can be parsed and the results combined.
80
+
81
+ There are no Ruby calls in the parser itself. Instead delegates are
82
+ used to implement the various behaviors of the parser which are
83
+ currently validation (validate), callbacks (SAJ), or building Ruby
84
+ objects (usual). The delegates are where all the Ruby calls and
85
+ related optimizations take place.
86
+
87
+ Considering JSON file parsing, `Oj.load_file` is able to read a file a
88
+ block at a time and the new `Oj::Parser` does the same. There was a
89
+ change in how that is done though. `Oj.load_file` sets up a reader
90
+ that must be called for each character. Basically a buffered
91
+ reader. `Oj::Parser` drops down a level and uses a re-entrant parser
92
+ that takes a block of bytes at a time so there is no call needed for
93
+ each character but rather just iterating over the block read from the
94
+ file.
95
+
96
+ Reading a block at a time also allows for an efficient second thread
97
+ to be used for reading blocks. That feature is not in the first
98
+ iteration of the `Oj::Parser` but the stage is set for it in the
99
+ future. The same approach was used successfully in
100
+ [OjC](https://github.com/ohler55/ojc) which is where the code for the
101
+ parser was taken from.
102
+
103
+ ### Delegates
104
+
105
+ There are three delegates; validate, SAJ, and usual.
106
+
107
+ #### Validate
108
+
109
+ The validate delegate is trivial in that does nothing other than let
110
+ the parser complete. There are no options for the validate
111
+ delegate. By not making any Ruby calls other than to start the parsing
112
+ the validate delegate is no surprise that the validate delegate is the
113
+ best performer.
114
+
115
+ #### SAJ (Simple API for JSON)
116
+
117
+ The SAJ delegate is compatible with the SAJ handlers used with
118
+ `Oj.saj_parse` so it needs to keep track of keys for the
119
+ callbacks. Two optimizations are used. The first is a reuseable key
120
+ stack while the second is a string cache similar to the Ruby intern
121
+ function.
122
+
123
+ When parsing a Hash (JSON object) element the key is passed to the
124
+ callback function if the SAJ handler responds to the method. The key
125
+ is also provided when closing an Array or Hash that is part of a
126
+ parent Hash. A key stack supports this.
127
+
128
+ If the option is turned on a lookup is made and previously cached key
129
+ VALUEs are used. This avoids creating the string for the key and
130
+ setting the encoding on it. The cache used is a auto expanding hash
131
+ implementation that is limited to strings less than 35 characters
132
+ which covers most keys. Larger strings use the slower string creation
133
+ approach. The use of the cache reduces object creation which save on
134
+ both memory allocation and time. It is not appropriate for one time
135
+ parsing of say all the keys in a dictionary but is ideally suited for
136
+ loading similar JSON multiple times.
137
+
138
+ #### Usual
139
+
140
+ By far the more complex of the delegates is the 'usual' delegate. The
141
+ usual delegate builds Ruby Objects when parsing JSON. It incorporates
142
+ many options for configuration and makes use of a number of
143
+ optimizations.
144
+
145
+ ##### Reduce Branching
146
+
147
+ In keeping with the goal of reducing conditional branching most of the
148
+ delegate options are implemented by changing a function pointer
149
+ according to the option selected. For example when turning on or off
150
+ `:symbol_keys` the function to calculate the key is changed so no
151
+ decision needs to be made during parsing. Using this approach option
152
+ branching happens when the option is set and not each time when
153
+ parsing.
154
+
155
+ ##### Cache
156
+
157
+ Creating Ruby Objects whether Strings, Array, or some other class is
158
+ expensive. Well expensive when running at the speeds Oj runs at. One
159
+ way to reduce Object creation is to cache those objects on the
160
+ assumption that they will most likely be used again. This is
161
+ especially true of Hash keys and Object attribute IDs. When creating
162
+ Objects from a class name in the JSON a class cache saves resolving
163
+ the string to a class each time. Of course there are times when
164
+ caching is not preferred so caching can be turned on or off with
165
+ option methods on the parser which are passed down to the delegate..
166
+
167
+ The Oj cache implementation is an auto expanding hash. When certain
168
+ limits are reached the hash is expanded and rehashed. Rehashing can
169
+ take some time as the number of items cached increases so there is
170
+ also an option to start with a larger cache size to avoid or reduce
171
+ the likelihood of a rehash.
172
+
173
+ The Oj cache has an advantage over the Ruby intern function
174
+ (`rb_intern()`) in that several steps are needed for some cached
175
+ items. As an example Object attribute IDs are created by adding an `@`
176
+ character prefix to a string and then converting to a ID. This is done
177
+ once when inserting into the cache and after that only a lookup is
178
+ needed.
179
+
180
+ ##### Bulk Insert
181
+
182
+ The Ruby functions available for C extension functions are extensive
183
+ and offer many options across the board. The bulk insert functions for
184
+ both Arrays and Hashes are much faster than appending or setting
185
+ functions that set one value at a time. The Array bulk insert is
186
+ around 15 times faster and for Hash it is about 3 times faster.
187
+
188
+ To take advantage of the bulk inserts arrays of VALUEs are
189
+ needed. With a little planning there VALUE arrays can be reused which
190
+ leads into another optimization, the use of stacks.
191
+
192
+ ##### Stacks
193
+
194
+ Parsing requires memory to keep track of values when parsing nested
195
+ JSON elements. That can be done on the call stack making use of
196
+ recursive calls or it can be done with a stack managed by the
197
+ parser. The `Oj.load` method maintains a stack for Ruby object and
198
+ builds the output as the parsing progresses.
199
+
200
+ `Oj::Parser` uses three different stacks. One stack for values, one
201
+ for keys, and one for collections (Array and Hash). By postponing the
202
+ creation of the collection elements the bulk insertions for Array and
203
+ Hash can be used. For arrays the use of a value stack and creating the
204
+ array after all elements have been identified gives a 15x improvement
205
+ in array creation.
206
+
207
+ For Hash the story is a little different. The bulk insert for Hash
208
+ alternates keys and values but there is a wrinkle to consider. Since
209
+ Ruby Object creation is triggered by the occurrence of an element that
210
+ matches a creation identifier the creation of a collection is not just
211
+ for Array and Hash but also Object. Setting Object attributes uses an
212
+ ID and not a VALUE. For that reason the keys should not be created as
213
+ String or Symbol types as they would be ignored and the VALUE creation
214
+ wasted when setting Object attributes. Using the bulk insert for Hash
215
+ gives a 3x improvement for that part of the object building.
216
+
217
+ Looking at the Object creation the JSON gem expects a class method of
218
+ `#json_create(arg)`. The single argument is the Hash resulting from
219
+ the parsing assuming that the parser parsed to a Hash first. This is
220
+ less than ideal from a performance perspective so `Oj::Parser`
221
+ provides an option to take that approach or to use the much more
222
+ efficient approach of never creating the Hash but instead creating the
223
+ Object and then setting the attributes directly.
224
+
225
+ To further improve performance and reduce the amount of memory
226
+ allocations and frees the stacks are reused from one call to `#parse`
227
+ to another.
228
+
229
+ ## Results
230
+
231
+ The results are even better than expected. Running the
232
+ [perf_parser.rb](https://github.com/ohler55/oj/blob/develop/test/perf_parser.rb)
233
+ file shows the improvements. There are four comparisons all run on a
234
+ MacBook Pro with Intel processor.
235
+
236
+ ### Validation
237
+
238
+ Without a comparible parser that just validates a JSON document the
239
+ `Oj.saj_parse` callback parser with a nil handler is used for
240
+ comparison to the new `Oj::Parser.new(:validate)`. In that case the
241
+ comparison is:
242
+
243
+ ```
244
+ System time (secs) rate (ops/sec)
245
+ ------------------- ----------- --------------
246
+ Oj::Parser.validate 0.101 494369.136
247
+ Oj::Saj.none 0.205 244122.745
248
+ ```
249
+
250
+ The `Oj::Parser.new(:validate)` is **2.03** times faster!
251
+
252
+ ### Callback
253
+
254
+ Oj has two callback parsers. One is SCP and the other SAJ. Both are
255
+ similar in that a handler is provided that implements methods for
256
+ processing the various element types in a JSON document. Comparing
257
+ `Oj.saj_parse` to `Oj::Parser.new(:saj)` with a all callback methods
258
+ implemented handler gives the following raw results:
259
+
260
+ ```
261
+ System time (secs) rate (ops/sec)
262
+ -------------- ----------- --------------
263
+ Oj::Parser.saj 0.783 63836.986
264
+ Oj::Saj.all 1.182 42315.397
265
+ ```
266
+
267
+ The `Oj::Parser.new(:saj)` is **1.51** times faster.
268
+
269
+ ### Parse to Ruby primitives
270
+
271
+ Parsing to Ruby primitives and Array and Hash is possible with most
272
+ parsers including the JSON gem parser. The raw results comparing
273
+ `Oj.strict_load`, `Oj::Parser.new(:usual)`, and the JSON gem are:
274
+
275
+ ```
276
+ System time (secs) rate (ops/sec)
277
+ ---------------- ----------- --------------
278
+ Oj::Parser.usual 0.452 110544.876
279
+ Oj::strict_load 0.699 71490.257
280
+ JSON::Ext 1.009 49555.094
281
+ ```
282
+
283
+ The `Oj::Parser.new(:saj)` is **1.55** times faster than `Oj.load` and
284
+ **2.23** times faster than the JSON gem.
285
+
286
+ ### Object
287
+
288
+ Oj supports two modes for Object serialization and
289
+ deserialization. Comparing to the JSON gem compatible mode
290
+ `Oj.compat_load`, `Oj::Parser.new(:usual)`, and the JSON gem yields
291
+ the following raw results:
292
+
293
+ ```
294
+ System time (secs) rate (ops/sec)
295
+ ---------------- ----------- --------------
296
+ Oj::Parser.usual 0.071 703502.033
297
+ Oj::compat_load 0.225 221762.927
298
+ JSON::Ext 0.401 124638.859
299
+ ```
300
+
301
+ The `Oj::Parser.new(:saj)` is **3.17** times faster than
302
+ `Oj.compat_load` and **5.64** times faster than the JSON gem.
303
+
304
+ ## Summary
305
+
306
+ With a performance boost of from 1.5x to over 3x over the `Oj.load`
307
+ parser the new `Oj::Parser` is a big win in the performance arena. The
308
+ isolation of options is another feature that should make life easier
309
+ for developers.