oj 3.12.3 → 3.13.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/README.md +1 -2
- data/ext/oj/buf.h +9 -0
- data/ext/oj/cache.c +187 -0
- data/ext/oj/cache.h +20 -0
- data/ext/oj/compat.c +8 -22
- data/ext/oj/custom.c +13 -12
- data/ext/oj/debug.c +131 -0
- data/ext/oj/dump.c +11 -11
- data/ext/oj/dump_compat.c +3 -3
- data/ext/oj/dump_object.c +7 -7
- data/ext/oj/dump_strict.c +3 -3
- data/ext/oj/err.h +19 -0
- data/ext/oj/extconf.rb +4 -0
- data/ext/oj/hash_test.c +3 -30
- data/ext/oj/intern.c +398 -0
- data/ext/oj/intern.h +27 -0
- data/ext/oj/object.c +10 -58
- data/ext/oj/odd.c +1 -1
- data/ext/oj/oj.c +111 -88
- data/ext/oj/oj.h +1 -1
- data/ext/oj/parse.c +4 -4
- data/ext/oj/parser.c +1527 -0
- data/ext/oj/parser.h +90 -0
- data/ext/oj/rails.c +4 -4
- data/ext/oj/resolve.c +2 -20
- data/ext/oj/saj2.c +346 -0
- data/ext/oj/scp.c +1 -1
- data/ext/oj/sparse.c +1 -1
- data/ext/oj/stream_writer.c +3 -3
- data/ext/oj/strict.c +10 -27
- data/ext/oj/usual.c +1222 -0
- data/ext/oj/validate.c +50 -0
- data/ext/oj/wab.c +9 -17
- data/lib/oj/version.rb +1 -1
- data/pages/Parser.md +309 -0
- data/test/json_gem/json_common_interface_test.rb +1 -1
- data/test/perf_parser.rb +184 -0
- data/test/test_parser.rb +27 -0
- data/test/test_parser_saj.rb +245 -0
- data/test/test_parser_usual.rb +213 -0
- metadata +22 -4
- data/ext/oj/hash.c +0 -168
- data/ext/oj/hash.h +0 -21
data/ext/oj/validate.c
ADDED
@@ -0,0 +1,50 @@
|
|
1
|
+
// Copyright (c) 2021, Peter Ohler, All rights reserved.
|
2
|
+
|
3
|
+
#include "parser.h"
|
4
|
+
|
5
|
+
static void
|
6
|
+
noop(ojParser p) {
|
7
|
+
}
|
8
|
+
|
9
|
+
static VALUE
|
10
|
+
option(ojParser p, const char *key, VALUE value) {
|
11
|
+
rb_raise(rb_eArgError, "%s is not an option for the validate delegate", key);
|
12
|
+
return Qnil;
|
13
|
+
}
|
14
|
+
|
15
|
+
static VALUE
|
16
|
+
result(ojParser p) {
|
17
|
+
return Qnil;
|
18
|
+
}
|
19
|
+
|
20
|
+
static void
|
21
|
+
dfree(ojParser p) {
|
22
|
+
}
|
23
|
+
|
24
|
+
static void
|
25
|
+
mark(ojParser p) {
|
26
|
+
}
|
27
|
+
|
28
|
+
void oj_set_parser_validator(ojParser p) {
|
29
|
+
p->ctx = NULL;
|
30
|
+
Funcs end = p->funcs + 3;
|
31
|
+
|
32
|
+
for (Funcs f = p->funcs; f < end; f++) {
|
33
|
+
f->add_null = noop;
|
34
|
+
f->add_true = noop;
|
35
|
+
f->add_false = noop;
|
36
|
+
f->add_int = noop;
|
37
|
+
f->add_float = noop;
|
38
|
+
f->add_big = noop;
|
39
|
+
f->add_str = noop;
|
40
|
+
f->open_array = noop;
|
41
|
+
f->close_array = noop;
|
42
|
+
f->open_object = noop;
|
43
|
+
f->close_object = noop;
|
44
|
+
}
|
45
|
+
p->option = option;
|
46
|
+
p->result = result;
|
47
|
+
p->free = dfree;
|
48
|
+
p->mark = mark;
|
49
|
+
p->start = noop;
|
50
|
+
}
|
data/ext/oj/wab.c
CHANGED
@@ -10,7 +10,7 @@
|
|
10
10
|
#include "dump.h"
|
11
11
|
#include "encode.h"
|
12
12
|
#include "err.h"
|
13
|
-
#include "
|
13
|
+
#include "intern.h"
|
14
14
|
#include "oj.h"
|
15
15
|
#include "parse.h"
|
16
16
|
#include "trace.h"
|
@@ -233,7 +233,7 @@ static void dump_obj(VALUE obj, int depth, Out out, bool as_ok) {
|
|
233
233
|
} else if (oj_bigdecimal_class == clas) {
|
234
234
|
volatile VALUE rstr = rb_funcall(obj, oj_to_s_id, 0);
|
235
235
|
|
236
|
-
oj_dump_raw(
|
236
|
+
oj_dump_raw(RSTRING_PTR(rstr), (int)RSTRING_LEN(rstr), out);
|
237
237
|
} else if (resolve_wab_uuid_class() == clas) {
|
238
238
|
oj_dump_str(rb_funcall(obj, oj_to_s_id, 0), depth, out, false);
|
239
239
|
} else if (resolve_uri_http_class() == clas) {
|
@@ -302,22 +302,14 @@ static VALUE calc_hash_key(ParseInfo pi, Val parent) {
|
|
302
302
|
|
303
303
|
return rkey;
|
304
304
|
}
|
305
|
-
if (Yes
|
305
|
+
if (Yes == pi->options.cache_keys) {
|
306
|
+
rkey = oj_sym_intern(parent->key, parent->klen);
|
307
|
+
} else {
|
306
308
|
rkey = rb_str_new(parent->key, parent->klen);
|
307
309
|
rkey = oj_encode(rkey);
|
308
|
-
|
309
|
-
|
310
|
-
return rkey;
|
311
|
-
}
|
312
|
-
VALUE *slot;
|
313
|
-
|
314
|
-
if (Qnil == (rkey = oj_sym_hash_get(parent->key, parent->klen, &slot))) {
|
315
|
-
rkey = rb_str_new(parent->key, parent->klen);
|
316
|
-
rkey = oj_encode(rkey);
|
317
|
-
rkey = rb_str_intern(rkey);
|
318
|
-
*slot = rkey;
|
319
|
-
rb_gc_register_address(slot);
|
310
|
+
rkey = rb_str_intern(rkey);
|
320
311
|
}
|
312
|
+
OBJ_FREEZE(rkey);
|
321
313
|
return rkey;
|
322
314
|
}
|
323
315
|
|
@@ -475,8 +467,8 @@ static VALUE cstr_to_rstr(ParseInfo pi, const char *str, size_t len) {
|
|
475
467
|
return rb_funcall(wab_uuid_clas, oj_new_id, 1, rb_str_new(str, len));
|
476
468
|
}
|
477
469
|
if (7 < len && 0 == strncasecmp("http://", str, 7)) {
|
478
|
-
int
|
479
|
-
|
470
|
+
int err = 0;
|
471
|
+
v = rb_str_new(str, len);
|
480
472
|
volatile VALUE uri = rb_protect(protect_uri, v, &err);
|
481
473
|
|
482
474
|
if (0 == err) {
|
data/lib/oj/version.rb
CHANGED
data/pages/Parser.md
ADDED
@@ -0,0 +1,309 @@
|
|
1
|
+
# How Oj Just Got Faster
|
2
|
+
|
3
|
+
The original Oj parser is a performant parser that supports several
|
4
|
+
modes. As of this writing Oj is almost 10 years old. A dinosaur by
|
5
|
+
coding standards. It was time for an upgrade. Dealing with issues over
|
6
|
+
the years it became clear that a few things could have been done
|
7
|
+
better. The new `Oj::Parser` is a response that not only attempts to
|
8
|
+
address some of the issues but also give the Oj parser a significant
|
9
|
+
boost in performance. `Oj::Parser` takes a different approach to JSON
|
10
|
+
parsing than the now legacy Oj parser. Not really a legacy parser yet
|
11
|
+
since the `Oj::Parser` is not a drop-in replacement for the JSON gem
|
12
|
+
but it is as much 3 times or more faster than the previous parser in
|
13
|
+
some modes.
|
14
|
+
|
15
|
+
## Address Issues
|
16
|
+
|
17
|
+
There are a few features of the`Oj.load` parser that continue to be
|
18
|
+
the reason for many of the issue on the project. The most significant
|
19
|
+
area is compatibility with both Rails and the JSON gem as they battle
|
20
|
+
it out for which behavior will win out in any particular
|
21
|
+
situation. Most of the issues are on the writing or dumping side of
|
22
|
+
the JSON packages but some are present on the parsing as
|
23
|
+
well. Conversion of decimals is one area where the Rails and the JSON
|
24
|
+
gem vary. The `Oj::Parser` addresses this by allowing for completely
|
25
|
+
separate parser instances. Create a parser and configure it for the
|
26
|
+
situation and leave the others parsers on their own.
|
27
|
+
|
28
|
+
The `Oj::Parser` is mostly compatible with the JSON gem and Rails but
|
29
|
+
no claims are made that the behavior will be the same as either.
|
30
|
+
|
31
|
+
The most frequent issues that can addressed with the new parser are
|
32
|
+
around the handling of options. For `Oj.load` there is a set of
|
33
|
+
default options that can be set and the same options can be specified
|
34
|
+
for each call to parse or load. This approach as a couple of
|
35
|
+
downsides. One the defaults are shared across all calls to parse no
|
36
|
+
matter what the desire mode is. The second is that having to provide
|
37
|
+
all the options on each parse call incurs a performance penalty and is
|
38
|
+
just annoying to repeat the same set of options over may calls.
|
39
|
+
|
40
|
+
By localizing options to a specific parser instance there is never any
|
41
|
+
bleed over to other instances.
|
42
|
+
|
43
|
+
## How
|
44
|
+
|
45
|
+
It's wonderfull to wish for a faster parser that solves all the
|
46
|
+
annoyances of the previous parser but how was it done is a much more
|
47
|
+
interesting question to answer.
|
48
|
+
|
49
|
+
At the core, the API for parsing was changed. Instead of a sinle
|
50
|
+
global parser any number of parsers can be created and each is seprate
|
51
|
+
from the others. The parser itself is able to rip through a JSON
|
52
|
+
string, stream, or file and then make calls to a delegate to process
|
53
|
+
the JSON elements according to the delegate behavior. This is similar
|
54
|
+
to the `Oj.load` parser but the new parser takes advantage of
|
55
|
+
character maps, reduced conditional branching, and calling function
|
56
|
+
pointers.
|
57
|
+
|
58
|
+
### Options
|
59
|
+
|
60
|
+
As mentioned, one way to change the options issues was to change the
|
61
|
+
API. Instead of having a shared set of default options a separate
|
62
|
+
parser is created and configured for each use case. Options are set
|
63
|
+
with methods on the parser so no more guessing what options are
|
64
|
+
available. With options isolated to individual parsers there is no
|
65
|
+
unintended leakage to other parse use cases.
|
66
|
+
|
67
|
+
### Structure
|
68
|
+
|
69
|
+
A relative small amount of time is spent in the actual parsing of JSON
|
70
|
+
in `Oj.load`. Most of the time is spent building the Ruby
|
71
|
+
Objects. Even cutting the parsing time in half only gives a 10%
|
72
|
+
improvement in performance but 10% is still an improvement.
|
73
|
+
|
74
|
+
The `Oj::Parser` is designed to reduce conditional branching. To do
|
75
|
+
that it uses character maps for the various states that the parser
|
76
|
+
goes through when parsing. There is no recursion as the JSON elements
|
77
|
+
are parsed. The use of a character maps for each parser state means
|
78
|
+
the parser function can and is re-entrant so partial blocks of JSON
|
79
|
+
can be parsed and the results combined.
|
80
|
+
|
81
|
+
There are no Ruby calls in the parser itself. Instead delegates are
|
82
|
+
used to implement the various behaviors of the parser which are
|
83
|
+
currently validation (validate), callbacks (SAJ), or building Ruby
|
84
|
+
objects (usual). The delegates are where all the Ruby calls and
|
85
|
+
related optimizations take place.
|
86
|
+
|
87
|
+
Considering JSON file parsing, `Oj.load_file` is able to read a file a
|
88
|
+
block at a time and the new `Oj::Parser` does the same. There was a
|
89
|
+
change in how that is done though. `Oj.load_file` sets up a reader
|
90
|
+
that must be called for each character. Basically a buffered
|
91
|
+
reader. `Oj::Parser` drops down a level and uses a re-entrant parser
|
92
|
+
that takes a block of bytes at a time so there is no call needed for
|
93
|
+
each character but rather just iterating over the block read from the
|
94
|
+
file.
|
95
|
+
|
96
|
+
Reading a block at a time also allows for an efficient second thread
|
97
|
+
to be used for reading blocks. That feature is not in the first
|
98
|
+
iteration of the `Oj::Parser` but the stage is set for it in the
|
99
|
+
future. The same approach was used successfully in
|
100
|
+
[OjC](https://github.com/ohler55/ojc) which is where the code for the
|
101
|
+
parser was taken from.
|
102
|
+
|
103
|
+
### Delegates
|
104
|
+
|
105
|
+
There are three delegates; validate, SAJ, and usual.
|
106
|
+
|
107
|
+
#### Validate
|
108
|
+
|
109
|
+
The validate delegate is trivial in that does nothing other than let
|
110
|
+
the parser complete. There are no options for the validate
|
111
|
+
delegate. By not making any Ruby calls other than to start the parsing
|
112
|
+
the validate delegate is no surprise that the validate delegate is the
|
113
|
+
best performer.
|
114
|
+
|
115
|
+
#### SAJ (Simple API for JSON)
|
116
|
+
|
117
|
+
The SAJ delegate is compatible with the SAJ handlers used with
|
118
|
+
`Oj.saj_parse` so it needs to keep track of keys for the
|
119
|
+
callbacks. Two optimizations are used. The first is a reuseable key
|
120
|
+
stack while the second is a string cache similar to the Ruby intern
|
121
|
+
function.
|
122
|
+
|
123
|
+
When parsing a Hash (JSON object) element the key is passed to the
|
124
|
+
callback function if the SAJ handler responds to the method. The key
|
125
|
+
is also provided when closing an Array or Hash that is part of a
|
126
|
+
parent Hash. A key stack supports this.
|
127
|
+
|
128
|
+
If the option is turned on a lookup is made and previously cached key
|
129
|
+
VALUEs are used. This avoids creating the string for the key and
|
130
|
+
setting the encoding on it. The cache used is a auto expanding hash
|
131
|
+
implementation that is limited to strings less than 35 characters
|
132
|
+
which covers most keys. Larger strings use the slower string creation
|
133
|
+
approach. The use of the cache reduces object creation which save on
|
134
|
+
both memory allocation and time. It is not appropriate for one time
|
135
|
+
parsing of say all the keys in a dictionary but is ideally suited for
|
136
|
+
loading similar JSON multiple times.
|
137
|
+
|
138
|
+
#### Usual
|
139
|
+
|
140
|
+
By far the more complex of the delegates is the 'usual' delegate. The
|
141
|
+
usual delegate builds Ruby Objects when parsing JSON. It incorporates
|
142
|
+
many options for configuration and makes use of a number of
|
143
|
+
optimizations.
|
144
|
+
|
145
|
+
##### Reduce Branching
|
146
|
+
|
147
|
+
In keeping with the goal of reducing conditional branching most of the
|
148
|
+
delegate options are implemented by changing a function pointer
|
149
|
+
according to the option selected. For example when turning on or off
|
150
|
+
`:symbol_keys` the function to calculate the key is changed so no
|
151
|
+
decision needs to be made during parsing. Using this approach option
|
152
|
+
branching happens when the option is set and not each time when
|
153
|
+
parsing.
|
154
|
+
|
155
|
+
##### Cache
|
156
|
+
|
157
|
+
Creating Ruby Objects whether Strings, Array, or some other class is
|
158
|
+
expensive. Well expensive when running at the speeds Oj runs at. One
|
159
|
+
way to reduce Object creation is to cache those objects on the
|
160
|
+
assumption that they will most likely be used again. This is
|
161
|
+
especially true of Hash keys and Object attribute IDs. When creating
|
162
|
+
Objects from a class name in the JSON a class cache saves resolving
|
163
|
+
the string to a class each time. Of course there are times when
|
164
|
+
caching is not preferred so caching can be turned on or off with
|
165
|
+
option methods on the parser which are passed down to the delegate..
|
166
|
+
|
167
|
+
The Oj cache implementation is an auto expanding hash. When certain
|
168
|
+
limits are reached the hash is expanded and rehashed. Rehashing can
|
169
|
+
take some time as the number of items cached increases so there is
|
170
|
+
also an option to start with a larger cache size to avoid or reduce
|
171
|
+
the likelihood of a rehash.
|
172
|
+
|
173
|
+
The Oj cache has an advantage over the Ruby intern function
|
174
|
+
(`rb_intern()`) in that several steps are needed for some cached
|
175
|
+
items. As an example Object attribute IDs are created by adding an `@`
|
176
|
+
character prefix to a string and then converting to a ID. This is done
|
177
|
+
once when inserting into the cache and after that only a lookup is
|
178
|
+
needed.
|
179
|
+
|
180
|
+
##### Bulk Insert
|
181
|
+
|
182
|
+
The Ruby functions available for C extension functions are extensive
|
183
|
+
and offer many options across the board. The bulk insert functions for
|
184
|
+
both Arrays and Hashes are much faster than appending or setting
|
185
|
+
functions that set one value at a time. The Array bulk insert is
|
186
|
+
around 15 times faster and for Hash it is about 3 times faster.
|
187
|
+
|
188
|
+
To take advantage of the bulk inserts arrays of VALUEs are
|
189
|
+
needed. With a little planning there VALUE arrays can be reused which
|
190
|
+
leads into another optimization, the use of stacks.
|
191
|
+
|
192
|
+
##### Stacks
|
193
|
+
|
194
|
+
Parsing requires memory to keep track of values when parsing nested
|
195
|
+
JSON elements. That can be done on the call stack making use of
|
196
|
+
recursive calls or it can be done with a stack managed by the
|
197
|
+
parser. The `Oj.load` method maintains a stack for Ruby object and
|
198
|
+
builds the output as the parsing progresses.
|
199
|
+
|
200
|
+
`Oj::Parser` uses three different stacks. One stack for values, one
|
201
|
+
for keys, and one for collections (Array and Hash). By postponing the
|
202
|
+
creation of the collection elements the bulk insertions for Array and
|
203
|
+
Hash can be used. For arrays the use of a value stack and creating the
|
204
|
+
array after all elements have been identified gives a 15x improvement
|
205
|
+
in array creation.
|
206
|
+
|
207
|
+
For Hash the story is a little different. The bulk insert for Hash
|
208
|
+
alternates keys and values but there is a wrinkle to consider. Since
|
209
|
+
Ruby Object creation is triggered by the occurance of an element that
|
210
|
+
matches a creation identifier the creation of a collection is not just
|
211
|
+
for Array and Hash but also Object. Setting Object attributes uses an
|
212
|
+
ID and not a VALUE. For that reason the keys should not be created as
|
213
|
+
String or Symbol types as they would be ignored and the VALUE creation
|
214
|
+
wasted when setting Object attributes. Using the bulk insert for Hash
|
215
|
+
gives a 3x improvement for that part of the object building.
|
216
|
+
|
217
|
+
Looking at the Object creation the JSON gem expects a class method of
|
218
|
+
`#json_create(arg)`. The single argument is the Hash resulting from
|
219
|
+
the parsing assuming that the parser parsed to a Hash first. This is
|
220
|
+
less than ideal from a performance perspective so `Oj::Parser`
|
221
|
+
provides an option to take that approach or to use the much more
|
222
|
+
efficient approach of never creating the Hash but instead creating the
|
223
|
+
Object and then setting the attributes directly.
|
224
|
+
|
225
|
+
To further improve performance and reduce the amount of memory
|
226
|
+
allocations and frees the stacks are reused from one call to `#parse`
|
227
|
+
to another.
|
228
|
+
|
229
|
+
## Results
|
230
|
+
|
231
|
+
The results are even better than expected. Running the
|
232
|
+
[perf_parser.rb](https://github.com/ohler55/oj/blob/develop/test/perf_parser.rb)
|
233
|
+
file shows the improvements. There are four comparisons all run on a
|
234
|
+
MacBook Pro with Intel processor.
|
235
|
+
|
236
|
+
### Validation
|
237
|
+
|
238
|
+
Without a comparible parser that just validates a JSON document the
|
239
|
+
`Oj.saj_parse` callback parser with a nil handler is used for
|
240
|
+
comparison to the new `Oj::Parser.new(:validate)`. In that case the
|
241
|
+
comparison is:
|
242
|
+
|
243
|
+
```
|
244
|
+
System time (secs) rate (ops/sec)
|
245
|
+
------------------- ----------- --------------
|
246
|
+
Oj::Parser.validate 0.101 494369.136
|
247
|
+
Oj::Saj.none 0.205 244122.745
|
248
|
+
```
|
249
|
+
|
250
|
+
The `Oj::Parser.new(:validate)` is **2.03** times faster!
|
251
|
+
|
252
|
+
### Callback
|
253
|
+
|
254
|
+
Oj has two callback parsers. One is SCP and the other SAJ. Both are
|
255
|
+
similar in that a handler is provided that implements methods for
|
256
|
+
processing the various element types in a JSON document. Comparing
|
257
|
+
`Oj.saj_parse` to `Oj::Parser.new(:saj)` with a all callback methods
|
258
|
+
implemented handler gives the following raw results:
|
259
|
+
|
260
|
+
```
|
261
|
+
System time (secs) rate (ops/sec)
|
262
|
+
-------------- ----------- --------------
|
263
|
+
Oj::Parser.saj 0.783 63836.986
|
264
|
+
Oj::Saj.all 1.182 42315.397
|
265
|
+
```
|
266
|
+
|
267
|
+
The `Oj::Parser.new(:saj)` is **1.51** times faster.
|
268
|
+
|
269
|
+
### Parse to Ruby primitives
|
270
|
+
|
271
|
+
Parsing to Ruby primitives and Array and Hash is possible with most
|
272
|
+
parsers including the JSON gem parser. The raw results comparing
|
273
|
+
`Oj.strict_load`, `Oj::Parser.new(:usual)`, and the JSON gem are:
|
274
|
+
|
275
|
+
```
|
276
|
+
System time (secs) rate (ops/sec)
|
277
|
+
---------------- ----------- --------------
|
278
|
+
Oj::Parser.usual 0.452 110544.876
|
279
|
+
Oj::strict_load 0.699 71490.257
|
280
|
+
JSON::Ext 1.009 49555.094
|
281
|
+
```
|
282
|
+
|
283
|
+
The `Oj::Parser.new(:saj)` is **1.55** times faster than `Oj.load` and
|
284
|
+
**2.23** times faster than the JSON gem.
|
285
|
+
|
286
|
+
### Object
|
287
|
+
|
288
|
+
Oj supports two modes for Object serialization and
|
289
|
+
deserialization. Comparing to the JSON gem compatible mode
|
290
|
+
`Oj.compat_load`, `Oj::Parser.new(:usual)`, and the JSON gem yields
|
291
|
+
the following raw results:
|
292
|
+
|
293
|
+
```
|
294
|
+
System time (secs) rate (ops/sec)
|
295
|
+
---------------- ----------- --------------
|
296
|
+
Oj::Parser.usual 0.071 703502.033
|
297
|
+
Oj::compat_load 0.225 221762.927
|
298
|
+
JSON::Ext 0.401 124638.859
|
299
|
+
```
|
300
|
+
|
301
|
+
The `Oj::Parser.new(:saj)` is **3.17** times faster than
|
302
|
+
`Oj.compat_load` and **5.64** times faster than the JSON gem.
|
303
|
+
|
304
|
+
## Summary
|
305
|
+
|
306
|
+
With a performance boost of from 1.5x to over 3x over the `Oj.load`
|
307
|
+
parser the new `Oj::Parser` is a big win in the performance arena. The
|
308
|
+
isolation of options is another feature that should make life easier
|
309
|
+
for developers.
|
@@ -23,7 +23,7 @@ class JSONCommonInterfaceTest < Test::Unit::TestCase
|
|
23
23
|
'h' => 1000.0,
|
24
24
|
'i' => 0.001
|
25
25
|
}
|
26
|
-
# Tired of chasing floating point rounding and precision. Oj
|
26
|
+
# Tired of chasing floating point rounding and precision. Oj not uses the
|
27
27
|
# Ruby float parser in compat mode yet on i386 machines there are issues
|
28
28
|
# with this test when the float is included.
|
29
29
|
#@json = '{"a":2,"b":5.23683071,"c":"c","d":[1,"b",3.14],"e":{"foo":"bar"},'\
|
data/test/perf_parser.rb
ADDED
@@ -0,0 +1,184 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
# encoding: UTF-8
|
3
|
+
|
4
|
+
$: << '.'
|
5
|
+
$: << File.join(File.dirname(__FILE__), "../lib")
|
6
|
+
$: << File.join(File.dirname(__FILE__), "../ext")
|
7
|
+
|
8
|
+
require 'optparse'
|
9
|
+
require 'perf'
|
10
|
+
require 'oj'
|
11
|
+
require 'json'
|
12
|
+
|
13
|
+
$verbose = false
|
14
|
+
$iter = 50_000
|
15
|
+
$with_bignum = false
|
16
|
+
$size = 1
|
17
|
+
$cache_keys = true
|
18
|
+
$symbol_keys = false
|
19
|
+
|
20
|
+
opts = OptionParser.new
|
21
|
+
opts.on("-v", "verbose") { $verbose = true }
|
22
|
+
opts.on("-c", "--count [Int]", Integer, "iterations") { |i| $iter = i }
|
23
|
+
opts.on("-s", "--size [Int]", Integer, "size (~Kbytes)") { |i| $size = i }
|
24
|
+
opts.on("-b", "with bignum") { $with_bignum = true }
|
25
|
+
opts.on("-k", "no cache") { $cache_keys = false }
|
26
|
+
opts.on("-sym", "symbol keys") { $symbol_keys = true }
|
27
|
+
opts.on("-h", "--help", "Show this display") { puts opts; Process.exit!(0) }
|
28
|
+
files = opts.parse(ARGV)
|
29
|
+
|
30
|
+
$obj = {
|
31
|
+
'a' => 'Alpha', # string
|
32
|
+
'b' => true, # boolean
|
33
|
+
'c' => 12345, # number
|
34
|
+
'd' => [ true, [false, [-123456789, nil], 3.9676, ['Something else.', false, 1, nil], nil]], # mix it up array
|
35
|
+
'e' => { 'zero' => nil, 'one' => 1, 'two' => 2, 'three' => [3], 'four' => [0, 1, 2, 3, 4] }, # hash
|
36
|
+
'f' => nil, # nil
|
37
|
+
'h' => { 'a' => { 'b' => { 'c' => { 'd' => {'e' => { 'f' => { 'g' => nil }}}}}}}, # deep hash, not that deep
|
38
|
+
'i' => [[[[[[[nil]]]]]]] # deep array, again, not that deep
|
39
|
+
}
|
40
|
+
$obj['g'] = 12345678901234567890123456789 if $with_bignum
|
41
|
+
|
42
|
+
if 0 < $size
|
43
|
+
o = $obj
|
44
|
+
$obj = []
|
45
|
+
(4 * $size).times do
|
46
|
+
$obj << o
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
$json = Oj.dump($obj)
|
51
|
+
$failed = {} # key is same as String used in tests later
|
52
|
+
Oj.default_options = {create_id: '^', create_additions: true, class_cache: true}
|
53
|
+
if $cache_keys
|
54
|
+
Oj.default_options = {cache_keys: true, cache_str: 6, symbol_keys: $symbol_keys}
|
55
|
+
else
|
56
|
+
Oj.default_options = {cache_keys: false, cache_str: 0, symbol_keys: $symbol_keys}
|
57
|
+
end
|
58
|
+
JSON.parser = JSON::Ext::Parser
|
59
|
+
|
60
|
+
class AllSaj
|
61
|
+
def initialize()
|
62
|
+
end
|
63
|
+
|
64
|
+
def hash_start(key)
|
65
|
+
end
|
66
|
+
|
67
|
+
def hash_end(key)
|
68
|
+
end
|
69
|
+
|
70
|
+
def array_start(key)
|
71
|
+
end
|
72
|
+
|
73
|
+
def array_end(key)
|
74
|
+
end
|
75
|
+
|
76
|
+
def add_value(value, key)
|
77
|
+
end
|
78
|
+
end # AllSaj
|
79
|
+
|
80
|
+
class NoSaj
|
81
|
+
def initialize()
|
82
|
+
end
|
83
|
+
end # NoSaj
|
84
|
+
|
85
|
+
no_handler = NoSaj.new()
|
86
|
+
all_handler = AllSaj.new()
|
87
|
+
|
88
|
+
if $verbose
|
89
|
+
puts "json:\n#{$json}\n"
|
90
|
+
end
|
91
|
+
|
92
|
+
### Validate ######################
|
93
|
+
p_val = Oj::Parser.new(:validate)
|
94
|
+
|
95
|
+
puts '-' * 80
|
96
|
+
puts "Validate Performance"
|
97
|
+
perf = Perf.new()
|
98
|
+
perf.add('Oj::Parser.validate', 'none') { p_val.parse($json) }
|
99
|
+
perf.add('Oj::Saj.none', 'none') { Oj.saj_parse(no_handler, $json) }
|
100
|
+
perf.run($iter)
|
101
|
+
|
102
|
+
### SAJ ######################
|
103
|
+
p_all = Oj::Parser.new(:saj)
|
104
|
+
p_all.handler = all_handler
|
105
|
+
p_all.cache_keys = $cache_keys
|
106
|
+
p_all.cache_strings = 6
|
107
|
+
|
108
|
+
puts '-' * 80
|
109
|
+
puts "Parse Callback Performance"
|
110
|
+
perf = Perf.new()
|
111
|
+
perf.add('Oj::Parser.saj', 'all') { p_all.parse($json) }
|
112
|
+
perf.add('Oj::Saj.all', 'all') { Oj.saj_parse(all_handler, $json) }
|
113
|
+
perf.run($iter)
|
114
|
+
|
115
|
+
### Usual ######################
|
116
|
+
p_usual = Oj::Parser.new(:usual)
|
117
|
+
p_usual.cache_keys = $cache_keys
|
118
|
+
p_usual.cache_strings = ($cache_keys ? 6 : 0)
|
119
|
+
p_usual.symbol_keys = $symbol_keys
|
120
|
+
|
121
|
+
puts '-' * 80
|
122
|
+
puts "Parse Usual Performance"
|
123
|
+
perf = Perf.new()
|
124
|
+
perf.add('Oj::Parser.usual', '') { p_usual.parse($json) }
|
125
|
+
perf.add('Oj::strict_load', '') { Oj.strict_load($json) }
|
126
|
+
perf.add('JSON::Ext', 'parse') { JSON.load($json) }
|
127
|
+
perf.run($iter)
|
128
|
+
|
129
|
+
### Usual Objects ######################
|
130
|
+
|
131
|
+
# Original Oj follows the JSON gem for creating objects which uses the class
|
132
|
+
# json_create(arg) method. Oj::Parser in usual mode supprts the same but also
|
133
|
+
# handles populating the object variables directly which is faster.
|
134
|
+
|
135
|
+
class Stuff
|
136
|
+
attr_accessor :alpha, :bravo, :charlie, :delta, :echo, :foxtrot, :golf, :hotel, :india, :juliet
|
137
|
+
def self.json_create(arg)
|
138
|
+
obj = self.new
|
139
|
+
obj.alpha = arg["alpha"]
|
140
|
+
obj.bravo = arg["bravo"]
|
141
|
+
obj.charlie = arg["charlie"]
|
142
|
+
obj.delta = arg["delta"]
|
143
|
+
obj.echo = arg["echo"]
|
144
|
+
obj.foxtrot = arg["foxtrot"]
|
145
|
+
obj.golf = arg["golf"]
|
146
|
+
obj.hotel = arg["hotel"]
|
147
|
+
obj.india = arg["india"]
|
148
|
+
obj.juliet = arg["juliet"]
|
149
|
+
obj
|
150
|
+
end
|
151
|
+
end
|
152
|
+
|
153
|
+
$obj_json = %|{
|
154
|
+
"alpha": [0, 1,2,3,4,5,6,7,8,9],
|
155
|
+
"bravo": true,
|
156
|
+
"charlie": 123,
|
157
|
+
"delta": "some string",
|
158
|
+
"echo": null,
|
159
|
+
"^": "Stuff",
|
160
|
+
"foxtrot": false,
|
161
|
+
"golf": "gulp",
|
162
|
+
"hotel": {"x": true, "y": false},
|
163
|
+
"india": [null, true, 123],
|
164
|
+
"juliet": "junk"
|
165
|
+
}|
|
166
|
+
|
167
|
+
p_usual.create_id = '^'
|
168
|
+
p_usual.class_cache = true
|
169
|
+
p_usual.ignore_json_create = true
|
170
|
+
|
171
|
+
JSON.create_id = '^'
|
172
|
+
|
173
|
+
puts '-' * 80
|
174
|
+
puts "Parse Usual Object Performance"
|
175
|
+
perf = Perf.new()
|
176
|
+
perf.add('Oj::Parser.usual', '') { p_usual.parse($obj_json) }
|
177
|
+
perf.add('Oj::compat_load', '') { Oj.compat_load($obj_json) }
|
178
|
+
perf.add('JSON::Ext', 'parse') { JSON.load($obj_json) }
|
179
|
+
perf.run($iter)
|
180
|
+
|
181
|
+
unless $failed.empty?
|
182
|
+
puts "The following packages were not included for the reason listed"
|
183
|
+
$failed.each { |tag,msg| puts "***** #{tag}: #{msg}" }
|
184
|
+
end
|