oj 2.18.3 → 3.13.14
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +5 -5
- data/CHANGELOG.md +1324 -0
- data/README.md +51 -204
- data/RELEASE_NOTES.md +61 -0
- data/ext/oj/buf.h +49 -72
- data/ext/oj/cache.c +326 -0
- data/ext/oj/cache.h +21 -0
- data/ext/oj/cache8.c +61 -64
- data/ext/oj/cache8.h +12 -39
- data/ext/oj/circarray.c +37 -68
- data/ext/oj/circarray.h +16 -42
- data/ext/oj/code.c +221 -0
- data/ext/oj/code.h +40 -0
- data/ext/oj/compat.c +231 -107
- data/ext/oj/custom.c +1125 -0
- data/ext/oj/debug.c +132 -0
- data/ext/oj/dump.c +935 -2513
- data/ext/oj/dump.h +108 -0
- data/ext/oj/dump_compat.c +936 -0
- data/ext/oj/dump_leaf.c +164 -0
- data/ext/oj/dump_object.c +761 -0
- data/ext/oj/dump_strict.c +410 -0
- data/ext/oj/encode.h +7 -42
- data/ext/oj/encoder.c +43 -0
- data/ext/oj/err.c +40 -54
- data/ext/oj/err.h +52 -46
- data/ext/oj/extconf.rb +21 -30
- data/ext/oj/fast.c +1097 -1080
- data/ext/oj/intern.c +301 -0
- data/ext/oj/intern.h +26 -0
- data/ext/oj/mimic_json.c +893 -0
- data/ext/oj/object.c +549 -620
- data/ext/oj/odd.c +155 -167
- data/ext/oj/odd.h +37 -63
- data/ext/oj/oj.c +1661 -2063
- data/ext/oj/oj.h +341 -270
- data/ext/oj/parse.c +974 -737
- data/ext/oj/parse.h +105 -97
- data/ext/oj/parser.c +1526 -0
- data/ext/oj/parser.h +90 -0
- data/ext/oj/rails.c +1504 -0
- data/ext/oj/rails.h +18 -0
- data/ext/oj/reader.c +141 -163
- data/ext/oj/reader.h +75 -113
- data/ext/oj/resolve.c +45 -93
- data/ext/oj/resolve.h +7 -34
- data/ext/oj/rxclass.c +143 -0
- data/ext/oj/rxclass.h +26 -0
- data/ext/oj/saj.c +447 -511
- data/ext/oj/saj2.c +348 -0
- data/ext/oj/scp.c +91 -138
- data/ext/oj/sparse.c +793 -644
- data/ext/oj/stream_writer.c +331 -0
- data/ext/oj/strict.c +145 -109
- data/ext/oj/string_writer.c +493 -0
- data/ext/oj/trace.c +72 -0
- data/ext/oj/trace.h +28 -0
- data/ext/oj/usual.c +1254 -0
- data/ext/oj/util.c +136 -0
- data/ext/oj/util.h +20 -0
- data/ext/oj/val_stack.c +62 -70
- data/ext/oj/val_stack.h +95 -129
- data/ext/oj/validate.c +51 -0
- data/ext/oj/wab.c +622 -0
- data/lib/oj/bag.rb +1 -0
- data/lib/oj/easy_hash.rb +17 -8
- data/lib/oj/error.rb +10 -11
- data/lib/oj/json.rb +176 -0
- data/lib/oj/mimic.rb +158 -19
- data/lib/oj/state.rb +132 -0
- data/lib/oj/version.rb +2 -2
- data/lib/oj.rb +1 -31
- data/pages/Advanced.md +22 -0
- data/pages/Compatibility.md +25 -0
- data/pages/Custom.md +23 -0
- data/pages/Encoding.md +65 -0
- data/pages/JsonGem.md +94 -0
- data/pages/Modes.md +161 -0
- data/pages/Options.md +327 -0
- data/pages/Parser.md +309 -0
- data/pages/Rails.md +167 -0
- data/pages/Security.md +20 -0
- data/pages/WAB.md +13 -0
- data/test/activerecord/result_test.rb +32 -0
- data/test/activesupport4/decoding_test.rb +108 -0
- data/test/activesupport4/encoding_test.rb +531 -0
- data/test/activesupport4/test_helper.rb +41 -0
- data/test/activesupport5/abstract_unit.rb +45 -0
- data/test/activesupport5/decoding_test.rb +133 -0
- data/test/activesupport5/encoding_test.rb +500 -0
- data/test/activesupport5/encoding_test_cases.rb +98 -0
- data/test/activesupport5/test_helper.rb +72 -0
- data/test/activesupport5/time_zone_test_helpers.rb +39 -0
- data/test/activesupport6/abstract_unit.rb +44 -0
- data/test/activesupport6/decoding_test.rb +133 -0
- data/test/activesupport6/encoding_test.rb +507 -0
- data/test/activesupport6/encoding_test_cases.rb +98 -0
- data/test/activesupport6/test_common.rb +17 -0
- data/test/activesupport6/test_helper.rb +163 -0
- data/test/activesupport6/time_zone_test_helpers.rb +39 -0
- data/test/activesupport7/abstract_unit.rb +49 -0
- data/test/activesupport7/decoding_test.rb +125 -0
- data/test/activesupport7/encoding_test.rb +486 -0
- data/test/activesupport7/encoding_test_cases.rb +104 -0
- data/test/activesupport7/time_zone_test_helpers.rb +47 -0
- data/test/bar.rb +9 -0
- data/test/baz.rb +16 -0
- data/test/bug.rb +11 -46
- data/test/foo.rb +69 -16
- data/test/helper.rb +10 -1
- data/test/isolated/shared.rb +12 -8
- data/test/isolated/test_mimic_rails_after.rb +3 -3
- data/test/isolated/test_mimic_rails_before.rb +3 -3
- data/test/json_gem/json_addition_test.rb +216 -0
- data/test/json_gem/json_common_interface_test.rb +153 -0
- data/test/json_gem/json_encoding_test.rb +107 -0
- data/test/json_gem/json_ext_parser_test.rb +20 -0
- data/test/json_gem/json_fixtures_test.rb +35 -0
- data/test/json_gem/json_generator_test.rb +397 -0
- data/test/json_gem/json_generic_object_test.rb +90 -0
- data/test/json_gem/json_parser_test.rb +470 -0
- data/test/json_gem/json_string_matching_test.rb +42 -0
- data/test/json_gem/test_helper.rb +26 -0
- data/test/mem.rb +33 -0
- data/test/perf.rb +1 -1
- data/test/perf_compat.rb +30 -28
- data/test/perf_dump.rb +50 -0
- data/test/perf_object.rb +1 -1
- data/test/perf_once.rb +58 -0
- data/test/perf_parser.rb +189 -0
- data/test/perf_scp.rb +11 -10
- data/test/perf_strict.rb +30 -19
- data/test/perf_wab.rb +131 -0
- data/test/prec.rb +23 -0
- data/test/sample.rb +0 -1
- data/test/sample_json.rb +1 -1
- data/test/test_compat.rb +219 -102
- data/test/test_custom.rb +533 -0
- data/test/test_fast.rb +107 -35
- data/test/test_file.rb +19 -25
- data/test/test_generate.rb +21 -0
- data/test/test_hash.rb +11 -1
- data/test/test_integer_range.rb +72 -0
- data/test/test_null.rb +376 -0
- data/test/test_object.rb +357 -70
- data/test/test_parser.rb +27 -0
- data/test/test_parser_saj.rb +245 -0
- data/test/test_parser_usual.rb +217 -0
- data/test/test_rails.rb +35 -0
- data/test/test_saj.rb +1 -1
- data/test/test_scp.rb +39 -2
- data/test/test_strict.rb +186 -7
- data/test/test_various.rb +160 -774
- data/test/test_wab.rb +307 -0
- data/test/test_writer.rb +90 -2
- data/test/tests.rb +24 -0
- data/test/tests_mimic.rb +14 -0
- data/test/tests_mimic_addition.rb +7 -0
- data/test/zoo.rb +13 -0
- metadata +194 -56
- data/ext/oj/hash.c +0 -163
- data/ext/oj/hash.h +0 -46
- data/ext/oj/hash_test.c +0 -512
- data/test/activesupport_datetime_test.rb +0 -23
- data/test/bug2.rb +0 -10
- data/test/bug3.rb +0 -46
- data/test/bug_fast.rb +0 -32
- data/test/bug_load.rb +0 -24
- data/test/crash.rb +0 -111
- data/test/curl/curl_oj.rb +0 -46
- data/test/curl/get_oj.rb +0 -24
- data/test/curl/just_curl.rb +0 -31
- data/test/curl/just_oj.rb +0 -51
- data/test/example.rb +0 -11
- data/test/io.rb +0 -48
- data/test/isolated/test_mimic_rails_datetime.rb +0 -27
- data/test/mod.rb +0 -16
- data/test/rails.rb +0 -50
- data/test/russian.rb +0 -18
- data/test/struct.rb +0 -29
- data/test/test_serializer.rb +0 -59
- data/test/write_timebars.rb +0 -31
data/pages/Parser.md
ADDED
@@ -0,0 +1,309 @@
|
|
1
|
+
# How Oj Just Got Faster
|
2
|
+
|
3
|
+
The original Oj parser is a performant parser that supports several
|
4
|
+
modes. As of this writing Oj is almost 10 years old. A dinosaur by
|
5
|
+
coding standards. It was time for an upgrade. Dealing with issues over
|
6
|
+
the years it became clear that a few things could have been done
|
7
|
+
better. The new `Oj::Parser` is a response that not only attempts to
|
8
|
+
address some of the issues but also give the Oj parser a significant
|
9
|
+
boost in performance. `Oj::Parser` takes a different approach to JSON
|
10
|
+
parsing than the now legacy Oj parser. Not really a legacy parser yet
|
11
|
+
since the `Oj::Parser` is not a drop-in replacement for the JSON gem
|
12
|
+
but it is as much 3 times or more faster than the previous parser in
|
13
|
+
some modes.
|
14
|
+
|
15
|
+
## Address Issues
|
16
|
+
|
17
|
+
There are a few features of the`Oj.load` parser that continue to be
|
18
|
+
the reason for many of the issue on the project. The most significant
|
19
|
+
area is compatibility with both Rails and the JSON gem as they battle
|
20
|
+
it out for which behavior will win out in any particular
|
21
|
+
situation. Most of the issues are on the writing or dumping side of
|
22
|
+
the JSON packages but some are present on the parsing as
|
23
|
+
well. Conversion of decimals is one area where the Rails and the JSON
|
24
|
+
gem vary. The `Oj::Parser` addresses this by allowing for completely
|
25
|
+
separate parser instances. Create a parser and configure it for the
|
26
|
+
situation and leave the others parsers on their own.
|
27
|
+
|
28
|
+
The `Oj::Parser` is mostly compatible with the JSON gem and Rails but
|
29
|
+
no claims are made that the behavior will be the same as either.
|
30
|
+
|
31
|
+
The most frequent issues that can addressed with the new parser are
|
32
|
+
around the handling of options. For `Oj.load` there is a set of
|
33
|
+
default options that can be set and the same options can be specified
|
34
|
+
for each call to parse or load. This approach as a couple of
|
35
|
+
downsides. One the defaults are shared across all calls to parse no
|
36
|
+
matter what the desire mode is. The second is that having to provide
|
37
|
+
all the options on each parse call incurs a performance penalty and is
|
38
|
+
just annoying to repeat the same set of options over may calls.
|
39
|
+
|
40
|
+
By localizing options to a specific parser instance there is never any
|
41
|
+
bleed over to other instances.
|
42
|
+
|
43
|
+
## How
|
44
|
+
|
45
|
+
It's wonderful to wish for a faster parser that solves all the
|
46
|
+
annoyances of the previous parser but how was it done is a much more
|
47
|
+
interesting question to answer.
|
48
|
+
|
49
|
+
At the core, the API for parsing was changed. Instead of a sinle
|
50
|
+
global parser any number of parsers can be created and each is separate
|
51
|
+
from the others. The parser itself is able to rip through a JSON
|
52
|
+
string, stream, or file and then make calls to a delegate to process
|
53
|
+
the JSON elements according to the delegate behavior. This is similar
|
54
|
+
to the `Oj.load` parser but the new parser takes advantage of
|
55
|
+
character maps, reduced conditional branching, and calling function
|
56
|
+
pointers.
|
57
|
+
|
58
|
+
### Options
|
59
|
+
|
60
|
+
As mentioned, one way to change the options issues was to change the
|
61
|
+
API. Instead of having a shared set of default options a separate
|
62
|
+
parser is created and configured for each use case. Options are set
|
63
|
+
with methods on the parser so no more guessing what options are
|
64
|
+
available. With options isolated to individual parsers there is no
|
65
|
+
unintended leakage to other parse use cases.
|
66
|
+
|
67
|
+
### Structure
|
68
|
+
|
69
|
+
A relative small amount of time is spent in the actual parsing of JSON
|
70
|
+
in `Oj.load`. Most of the time is spent building the Ruby
|
71
|
+
Objects. Even cutting the parsing time in half only gives a 10%
|
72
|
+
improvement in performance but 10% is still an improvement.
|
73
|
+
|
74
|
+
The `Oj::Parser` is designed to reduce conditional branching. To do
|
75
|
+
that it uses character maps for the various states that the parser
|
76
|
+
goes through when parsing. There is no recursion as the JSON elements
|
77
|
+
are parsed. The use of a character maps for each parser state means
|
78
|
+
the parser function can and is re-entrant so partial blocks of JSON
|
79
|
+
can be parsed and the results combined.
|
80
|
+
|
81
|
+
There are no Ruby calls in the parser itself. Instead delegates are
|
82
|
+
used to implement the various behaviors of the parser which are
|
83
|
+
currently validation (validate), callbacks (SAJ), or building Ruby
|
84
|
+
objects (usual). The delegates are where all the Ruby calls and
|
85
|
+
related optimizations take place.
|
86
|
+
|
87
|
+
Considering JSON file parsing, `Oj.load_file` is able to read a file a
|
88
|
+
block at a time and the new `Oj::Parser` does the same. There was a
|
89
|
+
change in how that is done though. `Oj.load_file` sets up a reader
|
90
|
+
that must be called for each character. Basically a buffered
|
91
|
+
reader. `Oj::Parser` drops down a level and uses a re-entrant parser
|
92
|
+
that takes a block of bytes at a time so there is no call needed for
|
93
|
+
each character but rather just iterating over the block read from the
|
94
|
+
file.
|
95
|
+
|
96
|
+
Reading a block at a time also allows for an efficient second thread
|
97
|
+
to be used for reading blocks. That feature is not in the first
|
98
|
+
iteration of the `Oj::Parser` but the stage is set for it in the
|
99
|
+
future. The same approach was used successfully in
|
100
|
+
[OjC](https://github.com/ohler55/ojc) which is where the code for the
|
101
|
+
parser was taken from.
|
102
|
+
|
103
|
+
### Delegates
|
104
|
+
|
105
|
+
There are three delegates; validate, SAJ, and usual.
|
106
|
+
|
107
|
+
#### Validate
|
108
|
+
|
109
|
+
The validate delegate is trivial in that does nothing other than let
|
110
|
+
the parser complete. There are no options for the validate
|
111
|
+
delegate. By not making any Ruby calls other than to start the parsing
|
112
|
+
the validate delegate is no surprise that the validate delegate is the
|
113
|
+
best performer.
|
114
|
+
|
115
|
+
#### SAJ (Simple API for JSON)
|
116
|
+
|
117
|
+
The SAJ delegate is compatible with the SAJ handlers used with
|
118
|
+
`Oj.saj_parse` so it needs to keep track of keys for the
|
119
|
+
callbacks. Two optimizations are used. The first is a reuseable key
|
120
|
+
stack while the second is a string cache similar to the Ruby intern
|
121
|
+
function.
|
122
|
+
|
123
|
+
When parsing a Hash (JSON object) element the key is passed to the
|
124
|
+
callback function if the SAJ handler responds to the method. The key
|
125
|
+
is also provided when closing an Array or Hash that is part of a
|
126
|
+
parent Hash. A key stack supports this.
|
127
|
+
|
128
|
+
If the option is turned on a lookup is made and previously cached key
|
129
|
+
VALUEs are used. This avoids creating the string for the key and
|
130
|
+
setting the encoding on it. The cache used is a auto expanding hash
|
131
|
+
implementation that is limited to strings less than 35 characters
|
132
|
+
which covers most keys. Larger strings use the slower string creation
|
133
|
+
approach. The use of the cache reduces object creation which save on
|
134
|
+
both memory allocation and time. It is not appropriate for one time
|
135
|
+
parsing of say all the keys in a dictionary but is ideally suited for
|
136
|
+
loading similar JSON multiple times.
|
137
|
+
|
138
|
+
#### Usual
|
139
|
+
|
140
|
+
By far the more complex of the delegates is the 'usual' delegate. The
|
141
|
+
usual delegate builds Ruby Objects when parsing JSON. It incorporates
|
142
|
+
many options for configuration and makes use of a number of
|
143
|
+
optimizations.
|
144
|
+
|
145
|
+
##### Reduce Branching
|
146
|
+
|
147
|
+
In keeping with the goal of reducing conditional branching most of the
|
148
|
+
delegate options are implemented by changing a function pointer
|
149
|
+
according to the option selected. For example when turning on or off
|
150
|
+
`:symbol_keys` the function to calculate the key is changed so no
|
151
|
+
decision needs to be made during parsing. Using this approach option
|
152
|
+
branching happens when the option is set and not each time when
|
153
|
+
parsing.
|
154
|
+
|
155
|
+
##### Cache
|
156
|
+
|
157
|
+
Creating Ruby Objects whether Strings, Array, or some other class is
|
158
|
+
expensive. Well expensive when running at the speeds Oj runs at. One
|
159
|
+
way to reduce Object creation is to cache those objects on the
|
160
|
+
assumption that they will most likely be used again. This is
|
161
|
+
especially true of Hash keys and Object attribute IDs. When creating
|
162
|
+
Objects from a class name in the JSON a class cache saves resolving
|
163
|
+
the string to a class each time. Of course there are times when
|
164
|
+
caching is not preferred so caching can be turned on or off with
|
165
|
+
option methods on the parser which are passed down to the delegate..
|
166
|
+
|
167
|
+
The Oj cache implementation is an auto expanding hash. When certain
|
168
|
+
limits are reached the hash is expanded and rehashed. Rehashing can
|
169
|
+
take some time as the number of items cached increases so there is
|
170
|
+
also an option to start with a larger cache size to avoid or reduce
|
171
|
+
the likelihood of a rehash.
|
172
|
+
|
173
|
+
The Oj cache has an advantage over the Ruby intern function
|
174
|
+
(`rb_intern()`) in that several steps are needed for some cached
|
175
|
+
items. As an example Object attribute IDs are created by adding an `@`
|
176
|
+
character prefix to a string and then converting to a ID. This is done
|
177
|
+
once when inserting into the cache and after that only a lookup is
|
178
|
+
needed.
|
179
|
+
|
180
|
+
##### Bulk Insert
|
181
|
+
|
182
|
+
The Ruby functions available for C extension functions are extensive
|
183
|
+
and offer many options across the board. The bulk insert functions for
|
184
|
+
both Arrays and Hashes are much faster than appending or setting
|
185
|
+
functions that set one value at a time. The Array bulk insert is
|
186
|
+
around 15 times faster and for Hash it is about 3 times faster.
|
187
|
+
|
188
|
+
To take advantage of the bulk inserts arrays of VALUEs are
|
189
|
+
needed. With a little planning there VALUE arrays can be reused which
|
190
|
+
leads into another optimization, the use of stacks.
|
191
|
+
|
192
|
+
##### Stacks
|
193
|
+
|
194
|
+
Parsing requires memory to keep track of values when parsing nested
|
195
|
+
JSON elements. That can be done on the call stack making use of
|
196
|
+
recursive calls or it can be done with a stack managed by the
|
197
|
+
parser. The `Oj.load` method maintains a stack for Ruby object and
|
198
|
+
builds the output as the parsing progresses.
|
199
|
+
|
200
|
+
`Oj::Parser` uses three different stacks. One stack for values, one
|
201
|
+
for keys, and one for collections (Array and Hash). By postponing the
|
202
|
+
creation of the collection elements the bulk insertions for Array and
|
203
|
+
Hash can be used. For arrays the use of a value stack and creating the
|
204
|
+
array after all elements have been identified gives a 15x improvement
|
205
|
+
in array creation.
|
206
|
+
|
207
|
+
For Hash the story is a little different. The bulk insert for Hash
|
208
|
+
alternates keys and values but there is a wrinkle to consider. Since
|
209
|
+
Ruby Object creation is triggered by the occurrence of an element that
|
210
|
+
matches a creation identifier the creation of a collection is not just
|
211
|
+
for Array and Hash but also Object. Setting Object attributes uses an
|
212
|
+
ID and not a VALUE. For that reason the keys should not be created as
|
213
|
+
String or Symbol types as they would be ignored and the VALUE creation
|
214
|
+
wasted when setting Object attributes. Using the bulk insert for Hash
|
215
|
+
gives a 3x improvement for that part of the object building.
|
216
|
+
|
217
|
+
Looking at the Object creation the JSON gem expects a class method of
|
218
|
+
`#json_create(arg)`. The single argument is the Hash resulting from
|
219
|
+
the parsing assuming that the parser parsed to a Hash first. This is
|
220
|
+
less than ideal from a performance perspective so `Oj::Parser`
|
221
|
+
provides an option to take that approach or to use the much more
|
222
|
+
efficient approach of never creating the Hash but instead creating the
|
223
|
+
Object and then setting the attributes directly.
|
224
|
+
|
225
|
+
To further improve performance and reduce the amount of memory
|
226
|
+
allocations and frees the stacks are reused from one call to `#parse`
|
227
|
+
to another.
|
228
|
+
|
229
|
+
## Results
|
230
|
+
|
231
|
+
The results are even better than expected. Running the
|
232
|
+
[perf_parser.rb](https://github.com/ohler55/oj/blob/develop/test/perf_parser.rb)
|
233
|
+
file shows the improvements. There are four comparisons all run on a
|
234
|
+
MacBook Pro with Intel processor.
|
235
|
+
|
236
|
+
### Validation
|
237
|
+
|
238
|
+
Without a comparible parser that just validates a JSON document the
|
239
|
+
`Oj.saj_parse` callback parser with a nil handler is used for
|
240
|
+
comparison to the new `Oj::Parser.new(:validate)`. In that case the
|
241
|
+
comparison is:
|
242
|
+
|
243
|
+
```
|
244
|
+
System time (secs) rate (ops/sec)
|
245
|
+
------------------- ----------- --------------
|
246
|
+
Oj::Parser.validate 0.101 494369.136
|
247
|
+
Oj::Saj.none 0.205 244122.745
|
248
|
+
```
|
249
|
+
|
250
|
+
The `Oj::Parser.new(:validate)` is **2.03** times faster!
|
251
|
+
|
252
|
+
### Callback
|
253
|
+
|
254
|
+
Oj has two callback parsers. One is SCP and the other SAJ. Both are
|
255
|
+
similar in that a handler is provided that implements methods for
|
256
|
+
processing the various element types in a JSON document. Comparing
|
257
|
+
`Oj.saj_parse` to `Oj::Parser.new(:saj)` with a all callback methods
|
258
|
+
implemented handler gives the following raw results:
|
259
|
+
|
260
|
+
```
|
261
|
+
System time (secs) rate (ops/sec)
|
262
|
+
-------------- ----------- --------------
|
263
|
+
Oj::Parser.saj 0.783 63836.986
|
264
|
+
Oj::Saj.all 1.182 42315.397
|
265
|
+
```
|
266
|
+
|
267
|
+
The `Oj::Parser.new(:saj)` is **1.51** times faster.
|
268
|
+
|
269
|
+
### Parse to Ruby primitives
|
270
|
+
|
271
|
+
Parsing to Ruby primitives and Array and Hash is possible with most
|
272
|
+
parsers including the JSON gem parser. The raw results comparing
|
273
|
+
`Oj.strict_load`, `Oj::Parser.new(:usual)`, and the JSON gem are:
|
274
|
+
|
275
|
+
```
|
276
|
+
System time (secs) rate (ops/sec)
|
277
|
+
---------------- ----------- --------------
|
278
|
+
Oj::Parser.usual 0.452 110544.876
|
279
|
+
Oj::strict_load 0.699 71490.257
|
280
|
+
JSON::Ext 1.009 49555.094
|
281
|
+
```
|
282
|
+
|
283
|
+
The `Oj::Parser.new(:saj)` is **1.55** times faster than `Oj.load` and
|
284
|
+
**2.23** times faster than the JSON gem.
|
285
|
+
|
286
|
+
### Object
|
287
|
+
|
288
|
+
Oj supports two modes for Object serialization and
|
289
|
+
deserialization. Comparing to the JSON gem compatible mode
|
290
|
+
`Oj.compat_load`, `Oj::Parser.new(:usual)`, and the JSON gem yields
|
291
|
+
the following raw results:
|
292
|
+
|
293
|
+
```
|
294
|
+
System time (secs) rate (ops/sec)
|
295
|
+
---------------- ----------- --------------
|
296
|
+
Oj::Parser.usual 0.071 703502.033
|
297
|
+
Oj::compat_load 0.225 221762.927
|
298
|
+
JSON::Ext 0.401 124638.859
|
299
|
+
```
|
300
|
+
|
301
|
+
The `Oj::Parser.new(:saj)` is **3.17** times faster than
|
302
|
+
`Oj.compat_load` and **5.64** times faster than the JSON gem.
|
303
|
+
|
304
|
+
## Summary
|
305
|
+
|
306
|
+
With a performance boost of from 1.5x to over 3x over the `Oj.load`
|
307
|
+
parser the new `Oj::Parser` is a big win in the performance arena. The
|
308
|
+
isolation of options is another feature that should make life easier
|
309
|
+
for developers.
|
data/pages/Rails.md
ADDED
@@ -0,0 +1,167 @@
|
|
1
|
+
# Rails Quickstart
|
2
|
+
|
3
|
+
To universally replace Rails' use of the json gem with Oj, and also
|
4
|
+
have Oj "take over" many methods on the JSON constant (`load`, `parse`, etc.) with
|
5
|
+
their faster Oj counterparts, add this to an initializer:
|
6
|
+
|
7
|
+
```ruby
|
8
|
+
Oj.optimize_rails()
|
9
|
+
```
|
10
|
+
|
11
|
+
For more details and options, read on...
|
12
|
+
|
13
|
+
# Oj Rails Compatibility
|
14
|
+
|
15
|
+
The `:rails` mode mimics the ActiveSupport version 5 encoder. Rails and
|
16
|
+
ActiveSupport are built around the use of the `as_json(*)` method defined for
|
17
|
+
a class. Oj attempts to provide the same functionality by being a drop in
|
18
|
+
replacement with a few exceptions.
|
19
|
+
|
20
|
+
```ruby
|
21
|
+
require 'oj'
|
22
|
+
|
23
|
+
Oj::Rails.set_encoder()
|
24
|
+
Oj::Rails.set_decoder()
|
25
|
+
Oj::Rails.optimize()
|
26
|
+
Oj::Rails.mimic_JSON()
|
27
|
+
```
|
28
|
+
|
29
|
+
or simply call
|
30
|
+
|
31
|
+
```ruby
|
32
|
+
Oj.optimize_rails()
|
33
|
+
```
|
34
|
+
|
35
|
+
Either of those steps will setup Oj to mimic Rails but it will not change the
|
36
|
+
default mode type as the mode type is only used when calling the Oj encoding
|
37
|
+
directly. If Rails mode is also desired then use the `Oj.default_options` to
|
38
|
+
change the default mode.
|
39
|
+
|
40
|
+
Some of the Oj options are supported as arguments to the encoder if called
|
41
|
+
from `Oj::Rails.encode()` but when using the `Oj::Rails::Encoder` class the
|
42
|
+
`encode()` method does not support optional arguments as required by the
|
43
|
+
ActiveSupport compliance guidelines. The general approach Rails takes for
|
44
|
+
configuring encoding options is to either set global values or to create a new
|
45
|
+
instance of the Encoder class and provide options in the initializer.
|
46
|
+
|
47
|
+
The globals that ActiveSupport uses for encoding are:
|
48
|
+
|
49
|
+
* `ActiveSupport::JSON::Encoding.use_standard_json_time_format`
|
50
|
+
* `ActiveSupport::JSON::Encoding.escape_html_entities_in_json`
|
51
|
+
* `ActiveSupport::JSON::Encoding.time_precision`
|
52
|
+
* `ActiveSupport::JSON::Encoding.json_encoder`
|
53
|
+
|
54
|
+
Those globals are aliased to also be accessed from the ActiveSupport module
|
55
|
+
directly so `ActiveSupport::JSON::Encoding.time_precision` can also be accessed
|
56
|
+
from `ActiveSupport.time_precision`. Oj makes use of these globals in mimicking
|
57
|
+
Rails after the `Oj::Rails.set_encode()` method is called. That also sets the
|
58
|
+
`ActiveSupport.json_encoder` to the `Oj::Rails::Encoder` class.
|
59
|
+
|
60
|
+
Options passed into a call to `to_json()` are passed to the `as_json()`
|
61
|
+
methods. These are mostly ignored by Oj and simply passed on without
|
62
|
+
modifications as per the guidelines. The exception to this are the options
|
63
|
+
specific to Oj such as the `:circular` option which it used to detect circular
|
64
|
+
references while encoding.
|
65
|
+
|
66
|
+
By default Oj acts like the ActiveSupport encoder and honors any changes in
|
67
|
+
the `as_json()` methods. There are some optimized Oj encoders for some
|
68
|
+
classes. When the optimized encoder it toggled the `as_json()` methods will not
|
69
|
+
be called for that class but instead the optimized version will be called. The
|
70
|
+
optimized version is the same as the ActiveSupport default encoding for a
|
71
|
+
given class. The optimized versions are toggled with the `optimize()` and
|
72
|
+
`deoptimize()` methods. There is a default optimized version for every class
|
73
|
+
that takes the visible attributes and encodes them but that may not be the
|
74
|
+
same as what Rails uses. Trial and error is the best approach for classes not
|
75
|
+
listed here.
|
76
|
+
|
77
|
+
The classes that can be put in optimized mode and are optimized when
|
78
|
+
`Oj::Rails.optimize` is called with no arguments are:
|
79
|
+
|
80
|
+
* Array
|
81
|
+
* BigDecimal
|
82
|
+
* Float
|
83
|
+
* Hash
|
84
|
+
* Range
|
85
|
+
* Regexp
|
86
|
+
* Time
|
87
|
+
* ActiveSupport::TimeWithZone
|
88
|
+
* ActionController::Parameters
|
89
|
+
* any class inheriting from ActiveRecord::Base
|
90
|
+
* any other class where all attributes should be dumped
|
91
|
+
|
92
|
+
The ActiveSupport decoder is the `JSON.parse()` method. Calling the
|
93
|
+
`Oj::Rails.set_decoder()` method replaces that method with the Oj equivalent.
|
94
|
+
|
95
|
+
### Usage in Rails 3
|
96
|
+
|
97
|
+
To support Rails 3 you can create a new module mixin to prepend to controllers:
|
98
|
+
|
99
|
+
```ruby
|
100
|
+
require 'oj'
|
101
|
+
|
102
|
+
module OjJsonEncoder
|
103
|
+
def render(options = nil, extra_options = {}, &block)
|
104
|
+
if options && options.is_a?(Hash) && options[:json]
|
105
|
+
obj = options.delete(:json)
|
106
|
+
obj = Oj.dump(obj, :mode => :rails) unless obj.is_a?(String)
|
107
|
+
options[:text] = obj
|
108
|
+
response.content_type ||= Mime::JSON
|
109
|
+
end
|
110
|
+
super
|
111
|
+
end
|
112
|
+
end
|
113
|
+
```
|
114
|
+
|
115
|
+
Usage:
|
116
|
+
|
117
|
+
```ruby
|
118
|
+
class MyController < ApplicationController
|
119
|
+
prepend OjJsonEncoder
|
120
|
+
def index
|
121
|
+
render :json => { :hello => 'world' }
|
122
|
+
end
|
123
|
+
end
|
124
|
+
```
|
125
|
+
|
126
|
+
### Older Ruby Version Support (Pre 2.3.0)
|
127
|
+
|
128
|
+
If you are using an older version of Ruby, you can pin `oj` to an earlier version in your Gemfile:
|
129
|
+
|
130
|
+
```ruby
|
131
|
+
gem 'oj', '3.7.12'
|
132
|
+
```
|
133
|
+
|
134
|
+
### Notes:
|
135
|
+
|
136
|
+
1. Optimized Floats set the significant digits to 16. This is different than
|
137
|
+
Ruby which is used by the json gem and by Rails. Ruby varies the
|
138
|
+
significant digits which can be either 16 or 17 depending on the value.
|
139
|
+
|
140
|
+
2. Optimized Hashes do not collapse keys that become the same in the output. As
|
141
|
+
an example, a non-String object that has a `to_s()` method will become the
|
142
|
+
return value of the `to_s()` method in the output without checking to see if
|
143
|
+
that has already been used. This could occur is a mix of String and Symbols
|
144
|
+
are used as keys or if a other non-String objects such as Numerics are mixed
|
145
|
+
with numbers as Strings.
|
146
|
+
|
147
|
+
3. To verify Oj is being used turn on the Oj `:trace` option. Similar to the
|
148
|
+
Ruby Tracer Oj will then print out trace information. Another approach is
|
149
|
+
to turn on C extension tracing. Set `tracer = TracePoint.new(:c_call) do
|
150
|
+
|tp| p [tp.lineno, tp.event, tp.defined_class, tp.method_id] end` or, in
|
151
|
+
older Rubies, set `Tracer.display_c_call = true`.
|
152
|
+
|
153
|
+
For example:
|
154
|
+
|
155
|
+
```
|
156
|
+
require 'active_support/core_ext'
|
157
|
+
require 'active_support/json'
|
158
|
+
require 'oj'
|
159
|
+
Oj.optimize_rails
|
160
|
+
tracer.enable { Time.now.to_json }
|
161
|
+
# prints output including
|
162
|
+
....
|
163
|
+
[20, :c_call, #<Class:Oj::Rails::Encoder>, :new]
|
164
|
+
[20, :c_call, Oj::Rails::Encoder, :encode]
|
165
|
+
....
|
166
|
+
=> "\"2018-02-23T12:13:42.493-06:00\""
|
167
|
+
```
|
data/pages/Security.md
ADDED
@@ -0,0 +1,20 @@
|
|
1
|
+
# Security and Optimization
|
2
|
+
|
3
|
+
Two settings in Oj are useful for parsing but do expose a vulnerability if used
|
4
|
+
from an untrusted source. Symbolized keys can cause memory to be filled with
|
5
|
+
previous versions of ruby. Ruby 2.1 and below does not garbage collect
|
6
|
+
Symbols. The same is true for auto defining classes in all versions of ruby;
|
7
|
+
memory will also be exhausted if too many classes are automatically
|
8
|
+
defined. Auto defining is a useful feature during development and from trusted
|
9
|
+
sources but it allows too many classes to be created in the object load mode and
|
10
|
+
auto defined is used with an untrusted source. The `Oj.safe_load()` method
|
11
|
+
sets and uses the most strict and safest options. It should be used by
|
12
|
+
developers who find it difficult to understand the options available in Oj.
|
13
|
+
|
14
|
+
The options in Oj are designed to provide flexibility to the developer. This
|
15
|
+
flexibility allows Objects to be serialized and deserialized. No methods are
|
16
|
+
ever called on these created Objects but that does not stop the developer from
|
17
|
+
calling methods on them. As in any system, check your inputs before working with
|
18
|
+
them. Taking an arbitrary `String` from a user and evaluating it is never a good
|
19
|
+
idea from an unsecure source. The same is true for `Object` attributes as they
|
20
|
+
are not more than `String`s. Always check inputs from untrusted sources.
|
data/pages/WAB.md
ADDED
@@ -0,0 +1,13 @@
|
|
1
|
+
# WAB mode
|
2
|
+
|
3
|
+
The `:wab` mode ignores all options except the indent option. Performance of
|
4
|
+
this mode is slightly faster than the :strict and :null modes. It is included
|
5
|
+
to support the [WABuR](https://github.com/ohler55/wabur) project.
|
6
|
+
|
7
|
+
Options other than the indentation are not supported since the encoding and
|
8
|
+
formats are defined by the API that is used to encode data being passed from
|
9
|
+
one components in a WAB system and allowing an option that would break the
|
10
|
+
data exchange is best not supported.
|
11
|
+
|
12
|
+
The mode encodes like the strict mode except the URI, Time, WAB::UUID, and
|
13
|
+
BigDecimal are supported.
|
@@ -0,0 +1,32 @@
|
|
1
|
+
#!/usr/bin/env ruby
|
2
|
+
|
3
|
+
$: << File.dirname(__FILE__)
|
4
|
+
$: << File.dirname(File.dirname(__FILE__))
|
5
|
+
|
6
|
+
require 'helper'
|
7
|
+
require "rails/all"
|
8
|
+
|
9
|
+
Oj::Rails.set_encoder()
|
10
|
+
Oj::Rails.optimize()
|
11
|
+
|
12
|
+
Oj.default_options = { mode: :rails }
|
13
|
+
|
14
|
+
class ActiveRecordResultTest < Minitest::Test
|
15
|
+
def test_hash_rows
|
16
|
+
|
17
|
+
result = ActiveRecord::Result.new(["one", "two"],
|
18
|
+
[
|
19
|
+
["row 1 col 1", "row 1 col 2"],
|
20
|
+
["row 2 col 1", "row 2 col 2"],
|
21
|
+
["row 3 col 1", "row 3 col 2"],
|
22
|
+
])
|
23
|
+
#puts "*** result: #{Oj.dump(result, indent: 2)}"
|
24
|
+
json_result = if ActiveRecord.version >= Gem::Version.new("6")
|
25
|
+
result.to_a
|
26
|
+
else
|
27
|
+
result.to_hash
|
28
|
+
end
|
29
|
+
|
30
|
+
assert_equal Oj.dump(result, mode: :rails), Oj.dump(json_result)
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,108 @@
|
|
1
|
+
# encoding: utf-8
|
2
|
+
require 'activesupport4/test_helper'
|
3
|
+
require 'active_support/json'
|
4
|
+
require 'active_support/time'
|
5
|
+
|
6
|
+
class TestJSONDecoding < ActiveSupport::TestCase
|
7
|
+
class Foo
|
8
|
+
def self.json_create(object)
|
9
|
+
"Foo"
|
10
|
+
end
|
11
|
+
end
|
12
|
+
|
13
|
+
TESTS = {
|
14
|
+
%q({"returnTo":{"\/categories":"\/"}}) => {"returnTo" => {"/categories" => "/"}},
|
15
|
+
%q({"return\\"To\\":":{"\/categories":"\/"}}) => {"return\"To\":" => {"/categories" => "/"}},
|
16
|
+
%q({"returnTo":{"\/categories":1}}) => {"returnTo" => {"/categories" => 1}},
|
17
|
+
%({"returnTo":[1,"a"]}) => {"returnTo" => [1, "a"]},
|
18
|
+
%({"returnTo":[1,"\\"a\\",", "b"]}) => {"returnTo" => [1, "\"a\",", "b"]},
|
19
|
+
%({"a": "'", "b": "5,000"}) => {"a" => "'", "b" => "5,000"},
|
20
|
+
%({"a": "a's, b's and c's", "b": "5,000"}) => {"a" => "a's, b's and c's", "b" => "5,000"},
|
21
|
+
# multibyte
|
22
|
+
%({"matzue": "松江", "asakusa": "浅草"}) => {"matzue" => "松江", "asakusa" => "浅草"},
|
23
|
+
%({"a": "2007-01-01"}) => {'a' => Date.new(2007, 1, 1)},
|
24
|
+
%({"a": "2007-01-01 01:12:34 Z"}) => {'a' => Time.utc(2007, 1, 1, 1, 12, 34)},
|
25
|
+
%(["2007-01-01 01:12:34 Z"]) => [Time.utc(2007, 1, 1, 1, 12, 34)],
|
26
|
+
%(["2007-01-01 01:12:34 Z", "2007-01-01 01:12:35 Z"]) => [Time.utc(2007, 1, 1, 1, 12, 34), Time.utc(2007, 1, 1, 1, 12, 35)],
|
27
|
+
# no time zone
|
28
|
+
%({"a": "2007-01-01 01:12:34"}) => {'a' => "2007-01-01 01:12:34"},
|
29
|
+
# invalid date
|
30
|
+
%({"a": "1089-10-40"}) => {'a' => "1089-10-40"},
|
31
|
+
# xmlschema date notation
|
32
|
+
%({"a": "2009-08-10T19:01:02Z"}) => {'a' => Time.utc(2009, 8, 10, 19, 1, 2)},
|
33
|
+
%({"a": "2009-08-10T19:01:02+02:00"}) => {'a' => Time.utc(2009, 8, 10, 17, 1, 2)},
|
34
|
+
%({"a": "2009-08-10T19:01:02-05:00"}) => {'a' => Time.utc(2009, 8, 11, 00, 1, 2)},
|
35
|
+
# needs to be *exact*
|
36
|
+
%({"a": " 2007-01-01 01:12:34 Z "}) => {'a' => " 2007-01-01 01:12:34 Z "},
|
37
|
+
%({"a": "2007-01-01 : it's your birthday"}) => {'a' => "2007-01-01 : it's your birthday"},
|
38
|
+
%([]) => [],
|
39
|
+
%({}) => {},
|
40
|
+
%({"a":1}) => {"a" => 1},
|
41
|
+
%({"a": ""}) => {"a" => ""},
|
42
|
+
%({"a":"\\""}) => {"a" => "\""},
|
43
|
+
%({"a": null}) => {"a" => nil},
|
44
|
+
%({"a": true}) => {"a" => true},
|
45
|
+
%({"a": false}) => {"a" => false},
|
46
|
+
%q({"bad":"\\\\","trailing":""}) => {"bad" => "\\", "trailing" => ""},
|
47
|
+
%q({"a": "http:\/\/test.host\/posts\/1"}) => {"a" => "http://test.host/posts/1"},
|
48
|
+
%q({"a": "\u003cunicode\u0020escape\u003e"}) => {"a" => "<unicode escape>"},
|
49
|
+
%q({"a": "\\\\u0020skip double backslashes"}) => {"a" => "\\u0020skip double backslashes"},
|
50
|
+
%q({"a": "\u003cbr /\u003e"}) => {'a' => "<br />"},
|
51
|
+
%q({"b":["\u003ci\u003e","\u003cb\u003e","\u003cu\u003e"]}) => {'b' => ["<i>","<b>","<u>"]},
|
52
|
+
# test combination of dates and escaped or unicode encoded data in arrays
|
53
|
+
%q([{"d":"1970-01-01", "s":"\u0020escape"},{"d":"1970-01-01", "s":"\u0020escape"}]) =>
|
54
|
+
[{'d' => Date.new(1970, 1, 1), 's' => ' escape'},{'d' => Date.new(1970, 1, 1), 's' => ' escape'}],
|
55
|
+
%q([{"d":"1970-01-01","s":"http:\/\/example.com"},{"d":"1970-01-01","s":"http:\/\/example.com"}]) =>
|
56
|
+
[{'d' => Date.new(1970, 1, 1), 's' => 'http://example.com'},
|
57
|
+
{'d' => Date.new(1970, 1, 1), 's' => 'http://example.com'}],
|
58
|
+
# tests escaping of "\n" char with Yaml backend
|
59
|
+
%q({"a":"\n"}) => {"a"=>"\n"},
|
60
|
+
%q({"a":"\u000a"}) => {"a"=>"\n"},
|
61
|
+
%q({"a":"Line1\u000aLine2"}) => {"a"=>"Line1\nLine2"},
|
62
|
+
# prevent json unmarshalling
|
63
|
+
%q({"json_class":"TestJSONDecoding::Foo"}) => {"json_class"=>"TestJSONDecoding::Foo"},
|
64
|
+
# json "fragments" - these are invalid JSON, but ActionPack relies on this
|
65
|
+
%q("a string") => "a string",
|
66
|
+
%q(1.1) => 1.1,
|
67
|
+
%q(1) => 1,
|
68
|
+
%q(-1) => -1,
|
69
|
+
%q(true) => true,
|
70
|
+
%q(false) => false,
|
71
|
+
%q(null) => nil
|
72
|
+
}
|
73
|
+
|
74
|
+
TESTS.each_with_index do |(json, expected), index|
|
75
|
+
test "json decodes #{index}" do
|
76
|
+
prev = ActiveSupport.parse_json_times
|
77
|
+
ActiveSupport.parse_json_times = true
|
78
|
+
silence_warnings do
|
79
|
+
if expected.nil?
|
80
|
+
assert_nil ActiveSupport::JSON.decode(json), "JSON decoding failed for #{json}"
|
81
|
+
else
|
82
|
+
assert_equal expected, ActiveSupport::JSON.decode(json), "JSON decoding failed for #{json}"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
ActiveSupport.parse_json_times = prev
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
test "json decodes time json with time parsing disabled" do
|
90
|
+
prev = ActiveSupport.parse_json_times
|
91
|
+
ActiveSupport.parse_json_times = false
|
92
|
+
expected = {"a" => "2007-01-01 01:12:34 Z"}
|
93
|
+
assert_equal expected, ActiveSupport::JSON.decode(%({"a": "2007-01-01 01:12:34 Z"}))
|
94
|
+
ActiveSupport.parse_json_times = prev
|
95
|
+
end
|
96
|
+
|
97
|
+
def test_failed_json_decoding
|
98
|
+
assert_raise(ActiveSupport::JSON.parse_error) { ActiveSupport::JSON.decode(%(undefined)) }
|
99
|
+
assert_raise(ActiveSupport::JSON.parse_error) { ActiveSupport::JSON.decode(%({a: 1})) }
|
100
|
+
assert_raise(ActiveSupport::JSON.parse_error) { ActiveSupport::JSON.decode(%({: 1})) }
|
101
|
+
assert_raise(ActiveSupport::JSON.parse_error) { ActiveSupport::JSON.decode(%()) }
|
102
|
+
end
|
103
|
+
|
104
|
+
def test_cannot_pass_unsupported_options
|
105
|
+
assert_raise(ArgumentError) { ActiveSupport::JSON.decode("", create_additions: true) }
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|