oj 3.9.1 → 3.16.11

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (171) hide show
  1. checksums.yaml +4 -4
  2. data/CHANGELOG.md +1452 -0
  3. data/README.md +21 -6
  4. data/RELEASE_NOTES.md +61 -0
  5. data/ext/oj/buf.h +50 -68
  6. data/ext/oj/cache.c +329 -0
  7. data/ext/oj/cache.h +22 -0
  8. data/ext/oj/cache8.c +60 -62
  9. data/ext/oj/cache8.h +9 -36
  10. data/ext/oj/circarray.c +38 -42
  11. data/ext/oj/circarray.h +12 -13
  12. data/ext/oj/code.c +158 -179
  13. data/ext/oj/code.h +20 -22
  14. data/ext/oj/compat.c +145 -205
  15. data/ext/oj/custom.c +740 -880
  16. data/ext/oj/debug.c +126 -0
  17. data/ext/oj/dump.c +1145 -844
  18. data/ext/oj/dump.h +71 -57
  19. data/ext/oj/dump_compat.c +575 -655
  20. data/ext/oj/dump_leaf.c +96 -186
  21. data/ext/oj/dump_object.c +533 -660
  22. data/ext/oj/dump_strict.c +306 -340
  23. data/ext/oj/encode.h +4 -33
  24. data/ext/oj/encoder.c +43 -0
  25. data/ext/oj/err.c +28 -28
  26. data/ext/oj/err.h +39 -42
  27. data/ext/oj/extconf.rb +28 -7
  28. data/ext/oj/fast.c +1052 -1113
  29. data/ext/oj/intern.c +313 -0
  30. data/ext/oj/intern.h +22 -0
  31. data/ext/oj/mem.c +318 -0
  32. data/ext/oj/mem.h +53 -0
  33. data/ext/oj/mimic_json.c +471 -430
  34. data/ext/oj/object.c +532 -580
  35. data/ext/oj/odd.c +156 -142
  36. data/ext/oj/odd.h +25 -26
  37. data/ext/oj/oj.c +1346 -961
  38. data/ext/oj/oj.h +307 -290
  39. data/ext/oj/parse.c +954 -858
  40. data/ext/oj/parse.h +74 -72
  41. data/ext/oj/parser.c +1600 -0
  42. data/ext/oj/parser.h +103 -0
  43. data/ext/oj/rails.c +819 -836
  44. data/ext/oj/rails.h +8 -11
  45. data/ext/oj/reader.c +136 -147
  46. data/ext/oj/reader.h +69 -83
  47. data/ext/oj/resolve.c +41 -63
  48. data/ext/oj/resolve.h +4 -6
  49. data/ext/oj/rxclass.c +69 -72
  50. data/ext/oj/rxclass.h +12 -13
  51. data/ext/oj/saj.c +440 -485
  52. data/ext/oj/saj2.c +584 -0
  53. data/ext/oj/saj2.h +23 -0
  54. data/ext/oj/scp.c +79 -118
  55. data/ext/oj/simd.h +10 -0
  56. data/ext/oj/sparse.c +739 -709
  57. data/ext/oj/stream_writer.c +141 -175
  58. data/ext/oj/strict.c +103 -128
  59. data/ext/oj/string_writer.c +244 -261
  60. data/ext/oj/trace.c +34 -41
  61. data/ext/oj/trace.h +42 -15
  62. data/ext/oj/usual.c +1218 -0
  63. data/ext/oj/usual.h +69 -0
  64. data/ext/oj/util.c +107 -107
  65. data/ext/oj/util.h +4 -3
  66. data/ext/oj/val_stack.c +61 -78
  67. data/ext/oj/val_stack.h +80 -114
  68. data/ext/oj/validate.c +46 -0
  69. data/ext/oj/wab.c +316 -361
  70. data/lib/oj/active_support_helper.rb +1 -3
  71. data/lib/oj/bag.rb +8 -1
  72. data/lib/oj/easy_hash.rb +9 -9
  73. data/lib/oj/error.rb +1 -2
  74. data/lib/oj/json.rb +162 -150
  75. data/lib/oj/mimic.rb +54 -20
  76. data/lib/oj/saj.rb +20 -6
  77. data/lib/oj/schandler.rb +5 -4
  78. data/lib/oj/state.rb +12 -8
  79. data/lib/oj/version.rb +1 -2
  80. data/lib/oj.rb +2 -8
  81. data/pages/Compatibility.md +1 -1
  82. data/pages/Encoding.md +1 -1
  83. data/pages/InstallOptions.md +20 -0
  84. data/pages/JsonGem.md +15 -0
  85. data/pages/Modes.md +9 -3
  86. data/pages/Options.md +62 -12
  87. data/pages/Parser.md +309 -0
  88. data/pages/Rails.md +73 -22
  89. metadata +68 -192
  90. data/ext/oj/hash.c +0 -163
  91. data/ext/oj/hash.h +0 -46
  92. data/ext/oj/hash_test.c +0 -512
  93. data/test/_test_active.rb +0 -76
  94. data/test/_test_active_mimic.rb +0 -96
  95. data/test/_test_mimic_rails.rb +0 -126
  96. data/test/activerecord/result_test.rb +0 -27
  97. data/test/activesupport4/decoding_test.rb +0 -108
  98. data/test/activesupport4/encoding_test.rb +0 -531
  99. data/test/activesupport4/test_helper.rb +0 -41
  100. data/test/activesupport5/decoding_test.rb +0 -125
  101. data/test/activesupport5/encoding_test.rb +0 -485
  102. data/test/activesupport5/encoding_test_cases.rb +0 -90
  103. data/test/activesupport5/test_helper.rb +0 -50
  104. data/test/activesupport5/time_zone_test_helpers.rb +0 -24
  105. data/test/bar.rb +0 -25
  106. data/test/files.rb +0 -29
  107. data/test/foo.rb +0 -21
  108. data/test/helper.rb +0 -26
  109. data/test/isolated/shared.rb +0 -308
  110. data/test/isolated/test_mimic_after.rb +0 -13
  111. data/test/isolated/test_mimic_alone.rb +0 -12
  112. data/test/isolated/test_mimic_as_json.rb +0 -45
  113. data/test/isolated/test_mimic_before.rb +0 -13
  114. data/test/isolated/test_mimic_define.rb +0 -28
  115. data/test/isolated/test_mimic_rails_after.rb +0 -22
  116. data/test/isolated/test_mimic_rails_before.rb +0 -21
  117. data/test/isolated/test_mimic_redefine.rb +0 -15
  118. data/test/json_gem/json_addition_test.rb +0 -216
  119. data/test/json_gem/json_common_interface_test.rb +0 -148
  120. data/test/json_gem/json_encoding_test.rb +0 -107
  121. data/test/json_gem/json_ext_parser_test.rb +0 -20
  122. data/test/json_gem/json_fixtures_test.rb +0 -35
  123. data/test/json_gem/json_generator_test.rb +0 -383
  124. data/test/json_gem/json_generic_object_test.rb +0 -90
  125. data/test/json_gem/json_parser_test.rb +0 -470
  126. data/test/json_gem/json_string_matching_test.rb +0 -42
  127. data/test/json_gem/test_helper.rb +0 -18
  128. data/test/perf.rb +0 -107
  129. data/test/perf_compat.rb +0 -130
  130. data/test/perf_fast.rb +0 -164
  131. data/test/perf_file.rb +0 -64
  132. data/test/perf_object.rb +0 -138
  133. data/test/perf_saj.rb +0 -109
  134. data/test/perf_scp.rb +0 -151
  135. data/test/perf_simple.rb +0 -287
  136. data/test/perf_strict.rb +0 -145
  137. data/test/perf_wab.rb +0 -131
  138. data/test/sample/change.rb +0 -14
  139. data/test/sample/dir.rb +0 -19
  140. data/test/sample/doc.rb +0 -36
  141. data/test/sample/file.rb +0 -48
  142. data/test/sample/group.rb +0 -16
  143. data/test/sample/hasprops.rb +0 -16
  144. data/test/sample/layer.rb +0 -12
  145. data/test/sample/line.rb +0 -20
  146. data/test/sample/oval.rb +0 -10
  147. data/test/sample/rect.rb +0 -10
  148. data/test/sample/shape.rb +0 -35
  149. data/test/sample/text.rb +0 -20
  150. data/test/sample.rb +0 -54
  151. data/test/sample_json.rb +0 -37
  152. data/test/test_compat.rb +0 -509
  153. data/test/test_custom.rb +0 -503
  154. data/test/test_debian.rb +0 -53
  155. data/test/test_fast.rb +0 -470
  156. data/test/test_file.rb +0 -239
  157. data/test/test_gc.rb +0 -49
  158. data/test/test_hash.rb +0 -29
  159. data/test/test_integer_range.rb +0 -73
  160. data/test/test_null.rb +0 -376
  161. data/test/test_object.rb +0 -1018
  162. data/test/test_saj.rb +0 -186
  163. data/test/test_scp.rb +0 -433
  164. data/test/test_strict.rb +0 -410
  165. data/test/test_various.rb +0 -741
  166. data/test/test_wab.rb +0 -307
  167. data/test/test_writer.rb +0 -380
  168. data/test/tests.rb +0 -24
  169. data/test/tests_mimic.rb +0 -14
  170. data/test/tests_mimic_addition.rb +0 -7
  171. data/test/zoo.rb +0 -13
data/lib/oj/state.rb CHANGED
@@ -1,7 +1,6 @@
1
-
2
1
  module JSON
3
2
  module Ext
4
- module Generator
3
+ module Generator
5
4
  unless defined?(::JSON::Ext::Generator::State)
6
5
  # This class exists for json gem compatibility only. While it can be
7
6
  # used as the options for other than compatibility a simple Hash is
@@ -44,11 +43,11 @@ module JSON
44
43
  def to_h()
45
44
  return @attrs.dup
46
45
  end
47
-
46
+
48
47
  def to_hash()
49
48
  return @attrs.dup
50
49
  end
51
-
50
+
52
51
  def allow_nan?()
53
52
  @attrs[:allow_nan]
54
53
  end
@@ -59,6 +58,7 @@ module JSON
59
58
 
60
59
  def configure(opts)
61
60
  raise TypeError.new('expected a Hash') unless opts.respond_to?(:to_h)
61
+
62
62
  @attrs.merge!(opts.to_h)
63
63
  end
64
64
 
@@ -80,10 +80,11 @@ module JSON
80
80
  # @param [Symbol] m method symbol
81
81
  # @return [Boolean] true for any method that matches an instance
82
82
  # variable reader, otherwise false.
83
- def respond_to?(m)
83
+ def respond_to?(m, include_all = false)
84
84
  return true if super
85
85
  return true if has_key?(key)
86
86
  return true if has_key?(key.to_s)
87
+
87
88
  has_key?(key.to_sym)
88
89
  end
89
90
 
@@ -104,7 +105,7 @@ module JSON
104
105
  def has_key?(k)
105
106
  @attrs.has_key?(key.to_sym)
106
107
  end
107
-
108
+
108
109
  # Handles requests for Hash values. Others cause an Exception to be raised.
109
110
  # @param [Symbol|String] m method symbol
110
111
  # @return [Boolean] the value of the specified instance variable.
@@ -113,14 +114,17 @@ module JSON
113
114
  def method_missing(m, *args, &block)
114
115
  if m.to_s.end_with?('=')
115
116
  raise ArgumentError.new("wrong number of arguments (#{args.size} for 1 with #{m}) to method #{m}") if args.nil? or 1 != args.length
117
+
116
118
  m = m.to_s[0..-2]
117
119
  m = m.to_sym
118
120
  return @attrs.store(m, args[0])
119
- else
121
+ end
122
+ if @attrs.has_key?(m.to_sym)
120
123
  raise ArgumentError.new("wrong number of arguments (#{args.size} for 0 with #{m}) to method #{m}") unless args.nil? or args.empty?
124
+
121
125
  return @attrs[m.to_sym]
122
126
  end
123
- raise NoMethodError.new("undefined method #{m}", m)
127
+ return @attrs.send(m, *args, &block)
124
128
  end
125
129
 
126
130
  end # State
data/lib/oj/version.rb CHANGED
@@ -1,5 +1,4 @@
1
-
2
1
  module Oj
3
2
  # Current version of the module.
4
- VERSION = '3.9.1'
3
+ VERSION = '3.16.11'
5
4
  end
data/lib/oj.rb CHANGED
@@ -1,15 +1,9 @@
1
+ # frozen_string_literal: true
1
2
 
3
+ # Oj module is defined in oj.c.
2
4
  module Oj
3
5
  end
4
6
 
5
- begin
6
- # This require exists to get around Rubinius failing to load bigdecimal from
7
- # the C extension.
8
- require 'bigdecimal'
9
- rescue Exception
10
- # ignore
11
- end
12
-
13
7
  require 'oj/version'
14
8
  require 'oj/bag'
15
9
  require 'oj/easy_hash'
@@ -2,7 +2,7 @@
2
2
 
3
3
  **Ruby**
4
4
 
5
- Oj is compatible with Ruby 2.0.0, 2.1, 2.2, 2.3, 2.4 and RBX.
5
+ Oj is compatible with Ruby 2.4+ and RBX.
6
6
  Support for JRuby has been removed as JRuby no longer supports C extensions and
7
7
  there are bugs in the older versions that are not being fixed.
8
8
 
data/pages/Encoding.md CHANGED
@@ -15,7 +15,7 @@ in a JSON document. The formatting follows these rules.
15
15
  * The `'^'` character denotes a special key value when in a JSON Object sequence.
16
16
 
17
17
  * A Ruby String that starts with `':'`or the sequence `'^i'` or `'^r'` are
18
- encoded by excaping the first character so that it appears as `'\u005e'` or
18
+ encoded by escaping the first character so that it appears as `'\u005e'` or
19
19
  `'\u003a'` instead of `':'` or `'^'`.
20
20
 
21
21
  * A `"^c"` JSON Object key indicates the value should be converted to a Ruby
@@ -0,0 +1,20 @@
1
+ # Oj Install Options
2
+
3
+ ### Enable trace log
4
+
5
+ ```
6
+ $ gem install oj -- --enable-trace-log
7
+ ```
8
+
9
+ To enable Oj trace feature, it uses `--enable-trace-log` option when installing the gem.
10
+ Then, the trace logs will be displayed when `:trace` option is set to `true`.
11
+
12
+
13
+ ### Enable SIMD instructions
14
+
15
+ ```
16
+ $ gem install oj -- --with-sse42
17
+ ```
18
+
19
+ To enable the use of SIMD instructions in Oj, it uses the `--with-sse42` option when installing the gem.
20
+ This will enable the use of the SSE4.2 instructions in the internal.
data/pages/JsonGem.md CHANGED
@@ -1,3 +1,18 @@
1
+ # JSON Quickstart
2
+
3
+ To have Oj universally "take over" many methods on the JSON constant (`load`, `parse`, etc.) with
4
+ their faster Oj counterparts, in a mode that is compatible with the json gem:
5
+
6
+ ```ruby
7
+ Oj.mimic_JSON()
8
+ ```
9
+
10
+ If the project does not already use the json gem, `JSON` will become available.
11
+ If the project does require the json gem, `Oj.mimic_JSON()` should be invoked after the
12
+ json gem has been required.
13
+
14
+ For more details and options, read on...
15
+
1
16
  # Oj JSON Gem Compatibility
2
17
 
3
18
  The `:compat` mode mimics the json gem. The json gem is built around the use
data/pages/Modes.md CHANGED
@@ -39,7 +39,8 @@ if a non-native type is encountered instead of raising an Exception.
39
39
  The `:compat` mode mimics the json gem. The json gem is built around the use
40
40
  of the `to_json(*)` method defined for a class. Oj attempts to provide the
41
41
  same functionality by being a drop in replacement with a few
42
- exceptions. [{file:JsonGem.md}](JsonGem.md) includes more details on
42
+ exceptions. To universally replace many `JSON` methods with their faster Oj counterparts,
43
+ simply run `Oj.mimic_json`. [{file:JsonGem.md}](JsonGem.md) includes more details on
43
44
  compatibility and use.
44
45
 
45
46
  ## :rails Mode
@@ -96,6 +97,9 @@ information.
96
97
  | :auto_define | Boolean | | | | | x | x | |
97
98
  | :bigdecimal_as_decimal | Boolean | | | | 3 | x | x | |
98
99
  | :bigdecimal_load | Boolean | | | | | | x | |
100
+ | :compat_bigdecimal | Boolean | | | x | | | x | |
101
+ | :cache_keys | Boolean | x | x | x | x | | x | |
102
+ | :cache_strings | Fixnum | x | x | x | x | | x | |
99
103
  | :circular | Boolean | x | x | x | x | x | x | |
100
104
  | :class_cache | Boolean | | | | | x | x | |
101
105
  | :create_additions | Boolean | | | x | x | | x | |
@@ -105,11 +109,11 @@ information.
105
109
  | :float_precision | Fixnum | x | x | | | | x | |
106
110
  | :hash_class | Class | | | x | x | | x | |
107
111
  | :ignore | Array | | | | | x | x | |
108
- | :indent | Integer | x | x | 3 | 4 | x | x | x |
112
+ | :indent | Integer | x | x | 4 | 4 | x | x | x |
109
113
  | :indent_str | String | | | x | x | | x | |
110
114
  | :integer_range | Range | x | x | x | x | x | x | x |
111
115
  | :match_string | Hash | | | x | x | | x | |
112
- | :max_nesting | Fixnum | 4 | 4 | x | | 5 | 4 | |
116
+ | :max_nesting | Fixnum | 5 | 5 | x | | 5 | 5 | |
113
117
  | :mode | Symbol | - | - | - | - | - | - | |
114
118
  | :nan | Symbol | | | | | | x | |
115
119
  | :nilnil | Boolean | | | | | | x | |
@@ -137,6 +141,8 @@ information.
137
141
  3. By default the bigdecimal_as decimal is not set and the default encoding
138
142
  for Rails is as a string. Setting the value to true will encode a
139
143
  BigDecimal as a number which breaks compatibility.
144
+ Note: after version 3.11.3 both `Oj.generate` and `JSON.generate`
145
+ will not honour this option in Rails Mode, detais on https://github.com/ohler55/oj/pull/716.
140
146
 
141
147
  4. The integer indent value in the default options will be honored by since
142
148
  the json gem expects a String type the indent in calls to 'to_json()',
data/pages/Options.md CHANGED
@@ -66,6 +66,36 @@ Determines how to load decimals.
66
66
 
67
67
  - `:auto` the most precise for the number of digits is used.
68
68
 
69
+ - `:fast` faster conversion to Float.
70
+
71
+ - `:ruby` convert to Float using the Ruby `to_f` conversion.
72
+
73
+ This can also be set with `:decimal_class` when used as a load or
74
+ parse option to match the JSON gem. In that case either `Float`,
75
+ `BigDecimal`, or `nil` can be provided.
76
+
77
+ ### :cache_keys [Boolean]
78
+
79
+ If true Hash keys are cached or interned. There are trade-offs with
80
+ caching keys. Large caches will use more memory and in extreme cases
81
+ (like over a million) the cache may be slower than not using
82
+ it. Repeated parsing of similar JSON docs is where cache_keys shines
83
+ especially with symbol keys.
84
+
85
+ There is a maximum length for cached keys. Any key longer than 34
86
+ bytes is not cached. Everything still works but the key is not cached.
87
+
88
+ ### :cache_strings [Int]
89
+
90
+ Shorter strings can be cached for better performance. A limit,
91
+ cache_strings, defines the upper limit on what strings are cached. As
92
+ with cached keys only strings less than 35 bytes are cached even if
93
+ the limit is set higher. Setting the limit to zero effectively
94
+ disables the caching of string values.
95
+
96
+ Note that caching for strings is for string values and not Hash keys
97
+ or Object attributes.
98
+
69
99
  ### :circular [Boolean]
70
100
 
71
101
  Detect circular references while dumping. In :compat mode raise a
@@ -78,18 +108,26 @@ recreate the looped references on load.
78
108
  Cache classes for faster parsing. This option should not be used if
79
109
  dynamically modifying classes or reloading classes then don't use this.
80
110
 
111
+ ### :compat_bigdecimal [Boolean]
112
+
113
+ Determines how to load decimals when in `:compat` mode.
114
+
115
+ - `true` convert all decimal numbers to BigDecimal.
116
+
117
+ - `false` convert all decimal numbers to Float.
118
+
81
119
  ### :create_additions
82
120
 
83
- A flag indicating the :create_id key when encountered during parsing should
84
- creating an Object mactching the class name specified in the value associated
85
- with the key.
121
+ A flag indicating that the :create_id key, when encountered during parsing,
122
+ should create an Object matching the class name specified in the value
123
+ associated with the key.
86
124
 
87
125
  ### :create_id [String]
88
126
 
89
127
  The :create_id option specifies that key is used for dumping and loading when
90
128
  specifying the class for an encoded object. The default is `json_create`.
91
129
 
92
- In the `:custom` mode setting the `:create_id` to nil will cause Complex,
130
+ In the `:custom` mode, setting the `:create_id` to nil will cause Complex,
93
131
  Rational, Range, and Regexp to be output as strings instead of as JSON
94
132
  objects.
95
133
 
@@ -120,6 +158,8 @@ Determines the characters to escape when dumping. Only the :ascii and
120
158
 
121
159
  - `:json` follows the JSON specification. This is the default mode.
122
160
 
161
+ - `:slash` escapes `/` characters.
162
+
123
163
  - `:xss_safe` escapes HTML and XML characters such as `&` and `<`.
124
164
 
125
165
  - `:ascii` escapes all non-ascii or characters with the hi-bit set.
@@ -179,7 +219,7 @@ customization.
179
219
  ### :nan [Symbol]
180
220
 
181
221
  How to dump Infinity, -Infinity, and NaN in :null, :strict, and :compat
182
- mode. Default is :auto but is ignored in the :compat and :rails mode.
222
+ mode. Default is :auto but is ignored in the :compat and :rails modes.
183
223
 
184
224
  - `:null` places a null
185
225
 
@@ -225,6 +265,10 @@ to true.
225
265
 
226
266
  The number of digits after the decimal when dumping the seconds of time.
227
267
 
268
+ ### :skip_null_byte [Boolean]
269
+
270
+ If true, null bytes in strings will be omitted when dumping.
271
+
228
272
  ### :space
229
273
 
230
274
  String inserted after the ':' character when dumping a JSON object. The
@@ -239,7 +283,13 @@ compatibility. Using just indent as an integer gives better performance.
239
283
 
240
284
  ### :symbol_keys [Boolean]
241
285
 
242
- Use symbols instead of strings for hash keys. :symbolize_names is an alias.
286
+ Use symbols instead of strings for hash keys.
287
+
288
+ ### :symbolize_names [Boolean]
289
+
290
+ Like :symbol_keys has keys are made into symbols but only when
291
+ mimicking the JSON gem and then only as the JSON gem honors it so
292
+ JSON.parse honors the option but JSON.load does not.
243
293
 
244
294
  ### :trace
245
295
 
@@ -252,7 +302,7 @@ The :time_format when dumping.
252
302
 
253
303
  - `:unix` time is output as a decimal number in seconds since epoch including fractions of a second.
254
304
 
255
- - `:unix_zone` similar to the `:unix` format but with the timezone encoded in
305
+ - `:unix_zone` is similar to the `:unix` format but with the timezone encoded in
256
306
  the exponent of the decimal number of seconds since epoch.
257
307
 
258
308
  - `:xmlschema` time is output as a string that follows the XML schema definition.
@@ -262,16 +312,16 @@ The :time_format when dumping.
262
312
  ### :use_as_json [Boolean]
263
313
 
264
314
  Call `as_json()` methods on dump, default is false. The option is ignored in
265
- the :compat and :rails mode.
315
+ the :compat and :rails modes.
266
316
 
267
317
 
268
318
  ### :use_raw_json [Boolean]
269
319
 
270
320
  Call `raw_json()` methods on dump, default is false. The option is
271
- accepted in the :compat and :rails mode even though it is not
321
+ accepted in the :compat and :rails modes even though it is not
272
322
  supported by other JSON gems. It provides a means to optimize dump or
273
323
  generate performance. The `raw_json(depth, indent)` method should be
274
- called only by Oj. It is not intended for any other use. This is mean
324
+ called only by Oj. It is not intended for any other use. This is meant
275
325
  to replace the abused `to_json` methods. Calling `Oj.dump` inside the
276
326
  `raw_json` with the object itself when `:use_raw_json` is true will
277
327
  result in an infinite loop.
@@ -279,9 +329,9 @@ result in an infinite loop.
279
329
  ### :use_to_hash [Boolean]
280
330
 
281
331
  Call `to_hash()` methods on dump, default is false. The option is ignored in
282
- the :compat and :rails mode.
332
+ the :compat and :rails modes.
283
333
 
284
334
  ### :use_to_json [Boolean]
285
335
 
286
336
  Call `to_json()` methods on dump, default is false. The option is ignored in
287
- the :compat and :rails mode.
337
+ the :compat and :rails modes.
data/pages/Parser.md ADDED
@@ -0,0 +1,309 @@
1
+ # How Oj Just Got Faster
2
+
3
+ The original Oj parser is a performant parser that supports several
4
+ modes. As of this writing Oj is almost 10 years old. A dinosaur by
5
+ coding standards. It was time for an upgrade. Dealing with issues over
6
+ the years it became clear that a few things could have been done
7
+ better. The new `Oj::Parser` is a response that not only attempts to
8
+ address some of the issues but also give the Oj parser a significant
9
+ boost in performance. `Oj::Parser` takes a different approach to JSON
10
+ parsing than the now legacy Oj parser. Not really a legacy parser yet
11
+ since the `Oj::Parser` is not a drop-in replacement for the JSON gem
12
+ but it is as much 3 times or more faster than the previous parser in
13
+ some modes.
14
+
15
+ ## Address Issues
16
+
17
+ There are a few features of the`Oj.load` parser that continue to be
18
+ the reason for many of the issue on the project. The most significant
19
+ area is compatibility with both Rails and the JSON gem as they battle
20
+ it out for which behavior will win out in any particular
21
+ situation. Most of the issues are on the writing or dumping side of
22
+ the JSON packages but some are present on the parsing as
23
+ well. Conversion of decimals is one area where the Rails and the JSON
24
+ gem vary. The `Oj::Parser` addresses this by allowing for completely
25
+ separate parser instances. Create a parser and configure it for the
26
+ situation and leave the others parsers on their own.
27
+
28
+ The `Oj::Parser` is mostly compatible with the JSON gem and Rails but
29
+ no claims are made that the behavior will be the same as either.
30
+
31
+ The most frequent issues that can addressed with the new parser are
32
+ around the handling of options. For `Oj.load` there is a set of
33
+ default options that can be set and the same options can be specified
34
+ for each call to parse or load. This approach as a couple of
35
+ downsides. One the defaults are shared across all calls to parse no
36
+ matter what the desire mode is. The second is that having to provide
37
+ all the options on each parse call incurs a performance penalty and is
38
+ just annoying to repeat the same set of options over may calls.
39
+
40
+ By localizing options to a specific parser instance there is never any
41
+ bleed over to other instances.
42
+
43
+ ## How
44
+
45
+ It's wonderful to wish for a faster parser that solves all the
46
+ annoyances of the previous parser but how was it done is a much more
47
+ interesting question to answer.
48
+
49
+ At the core, the API for parsing was changed. Instead of a sinle
50
+ global parser any number of parsers can be created and each is separate
51
+ from the others. The parser itself is able to rip through a JSON
52
+ string, stream, or file and then make calls to a delegate to process
53
+ the JSON elements according to the delegate behavior. This is similar
54
+ to the `Oj.load` parser but the new parser takes advantage of
55
+ character maps, reduced conditional branching, and calling function
56
+ pointers.
57
+
58
+ ### Options
59
+
60
+ As mentioned, one way to change the options issues was to change the
61
+ API. Instead of having a shared set of default options a separate
62
+ parser is created and configured for each use case. Options are set
63
+ with methods on the parser so no more guessing what options are
64
+ available. With options isolated to individual parsers there is no
65
+ unintended leakage to other parse use cases.
66
+
67
+ ### Structure
68
+
69
+ A relative small amount of time is spent in the actual parsing of JSON
70
+ in `Oj.load`. Most of the time is spent building the Ruby
71
+ Objects. Even cutting the parsing time in half only gives a 10%
72
+ improvement in performance but 10% is still an improvement.
73
+
74
+ The `Oj::Parser` is designed to reduce conditional branching. To do
75
+ that it uses character maps for the various states that the parser
76
+ goes through when parsing. There is no recursion as the JSON elements
77
+ are parsed. The use of a character maps for each parser state means
78
+ the parser function can and is re-entrant so partial blocks of JSON
79
+ can be parsed and the results combined.
80
+
81
+ There are no Ruby calls in the parser itself. Instead delegates are
82
+ used to implement the various behaviors of the parser which are
83
+ currently validation (validate), callbacks (SAJ), or building Ruby
84
+ objects (usual). The delegates are where all the Ruby calls and
85
+ related optimizations take place.
86
+
87
+ Considering JSON file parsing, `Oj.load_file` is able to read a file a
88
+ block at a time and the new `Oj::Parser` does the same. There was a
89
+ change in how that is done though. `Oj.load_file` sets up a reader
90
+ that must be called for each character. Basically a buffered
91
+ reader. `Oj::Parser` drops down a level and uses a re-entrant parser
92
+ that takes a block of bytes at a time so there is no call needed for
93
+ each character but rather just iterating over the block read from the
94
+ file.
95
+
96
+ Reading a block at a time also allows for an efficient second thread
97
+ to be used for reading blocks. That feature is not in the first
98
+ iteration of the `Oj::Parser` but the stage is set for it in the
99
+ future. The same approach was used successfully in
100
+ [OjC](https://github.com/ohler55/ojc) which is where the code for the
101
+ parser was taken from.
102
+
103
+ ### Delegates
104
+
105
+ There are three delegates; validate, SAJ, and usual.
106
+
107
+ #### Validate
108
+
109
+ The validate delegate is trivial in that does nothing other than let
110
+ the parser complete. There are no options for the validate
111
+ delegate. By not making any Ruby calls other than to start the parsing
112
+ the validate delegate is no surprise that the validate delegate is the
113
+ best performer.
114
+
115
+ #### SAJ (Simple API for JSON)
116
+
117
+ The SAJ delegate is compatible with the SAJ handlers used with
118
+ `Oj.saj_parse` so it needs to keep track of keys for the
119
+ callbacks. Two optimizations are used. The first is a reuseable key
120
+ stack while the second is a string cache similar to the Ruby intern
121
+ function.
122
+
123
+ When parsing a Hash (JSON object) element the key is passed to the
124
+ callback function if the SAJ handler responds to the method. The key
125
+ is also provided when closing an Array or Hash that is part of a
126
+ parent Hash. A key stack supports this.
127
+
128
+ If the option is turned on a lookup is made and previously cached key
129
+ VALUEs are used. This avoids creating the string for the key and
130
+ setting the encoding on it. The cache used is a auto expanding hash
131
+ implementation that is limited to strings less than 35 characters
132
+ which covers most keys. Larger strings use the slower string creation
133
+ approach. The use of the cache reduces object creation which save on
134
+ both memory allocation and time. It is not appropriate for one time
135
+ parsing of say all the keys in a dictionary but is ideally suited for
136
+ loading similar JSON multiple times.
137
+
138
+ #### Usual
139
+
140
+ By far the more complex of the delegates is the 'usual' delegate. The
141
+ usual delegate builds Ruby Objects when parsing JSON. It incorporates
142
+ many options for configuration and makes use of a number of
143
+ optimizations.
144
+
145
+ ##### Reduce Branching
146
+
147
+ In keeping with the goal of reducing conditional branching most of the
148
+ delegate options are implemented by changing a function pointer
149
+ according to the option selected. For example when turning on or off
150
+ `:symbol_keys` the function to calculate the key is changed so no
151
+ decision needs to be made during parsing. Using this approach option
152
+ branching happens when the option is set and not each time when
153
+ parsing.
154
+
155
+ ##### Cache
156
+
157
+ Creating Ruby Objects whether Strings, Array, or some other class is
158
+ expensive. Well expensive when running at the speeds Oj runs at. One
159
+ way to reduce Object creation is to cache those objects on the
160
+ assumption that they will most likely be used again. This is
161
+ especially true of Hash keys and Object attribute IDs. When creating
162
+ Objects from a class name in the JSON a class cache saves resolving
163
+ the string to a class each time. Of course there are times when
164
+ caching is not preferred so caching can be turned on or off with
165
+ option methods on the parser which are passed down to the delegate..
166
+
167
+ The Oj cache implementation is an auto expanding hash. When certain
168
+ limits are reached the hash is expanded and rehashed. Rehashing can
169
+ take some time as the number of items cached increases so there is
170
+ also an option to start with a larger cache size to avoid or reduce
171
+ the likelihood of a rehash.
172
+
173
+ The Oj cache has an advantage over the Ruby intern function
174
+ (`rb_intern()`) in that several steps are needed for some cached
175
+ items. As an example Object attribute IDs are created by adding an `@`
176
+ character prefix to a string and then converting to a ID. This is done
177
+ once when inserting into the cache and after that only a lookup is
178
+ needed.
179
+
180
+ ##### Bulk Insert
181
+
182
+ The Ruby functions available for C extension functions are extensive
183
+ and offer many options across the board. The bulk insert functions for
184
+ both Arrays and Hashes are much faster than appending or setting
185
+ functions that set one value at a time. The Array bulk insert is
186
+ around 15 times faster and for Hash it is about 3 times faster.
187
+
188
+ To take advantage of the bulk inserts arrays of VALUEs are
189
+ needed. With a little planning there VALUE arrays can be reused which
190
+ leads into another optimization, the use of stacks.
191
+
192
+ ##### Stacks
193
+
194
+ Parsing requires memory to keep track of values when parsing nested
195
+ JSON elements. That can be done on the call stack making use of
196
+ recursive calls or it can be done with a stack managed by the
197
+ parser. The `Oj.load` method maintains a stack for Ruby object and
198
+ builds the output as the parsing progresses.
199
+
200
+ `Oj::Parser` uses three different stacks. One stack for values, one
201
+ for keys, and one for collections (Array and Hash). By postponing the
202
+ creation of the collection elements the bulk insertions for Array and
203
+ Hash can be used. For arrays the use of a value stack and creating the
204
+ array after all elements have been identified gives a 15x improvement
205
+ in array creation.
206
+
207
+ For Hash the story is a little different. The bulk insert for Hash
208
+ alternates keys and values but there is a wrinkle to consider. Since
209
+ Ruby Object creation is triggered by the occurrence of an element that
210
+ matches a creation identifier the creation of a collection is not just
211
+ for Array and Hash but also Object. Setting Object attributes uses an
212
+ ID and not a VALUE. For that reason the keys should not be created as
213
+ String or Symbol types as they would be ignored and the VALUE creation
214
+ wasted when setting Object attributes. Using the bulk insert for Hash
215
+ gives a 3x improvement for that part of the object building.
216
+
217
+ Looking at the Object creation the JSON gem expects a class method of
218
+ `#json_create(arg)`. The single argument is the Hash resulting from
219
+ the parsing assuming that the parser parsed to a Hash first. This is
220
+ less than ideal from a performance perspective so `Oj::Parser`
221
+ provides an option to take that approach or to use the much more
222
+ efficient approach of never creating the Hash but instead creating the
223
+ Object and then setting the attributes directly.
224
+
225
+ To further improve performance and reduce the amount of memory
226
+ allocations and frees the stacks are reused from one call to `#parse`
227
+ to another.
228
+
229
+ ## Results
230
+
231
+ The results are even better than expected. Running the
232
+ [perf_parser.rb](https://github.com/ohler55/oj/blob/develop/test/perf_parser.rb)
233
+ file shows the improvements. There are four comparisons all run on a
234
+ MacBook Pro with Intel processor.
235
+
236
+ ### Validation
237
+
238
+ Without a comparible parser that just validates a JSON document the
239
+ `Oj.saj_parse` callback parser with a nil handler is used for
240
+ comparison to the new `Oj::Parser.new(:validate)`. In that case the
241
+ comparison is:
242
+
243
+ ```
244
+ System time (secs) rate (ops/sec)
245
+ ------------------- ----------- --------------
246
+ Oj::Parser.validate 0.101 494369.136
247
+ Oj::Saj.none 0.205 244122.745
248
+ ```
249
+
250
+ The `Oj::Parser.new(:validate)` is **2.03** times faster!
251
+
252
+ ### Callback
253
+
254
+ Oj has two callback parsers. One is SCP and the other SAJ. Both are
255
+ similar in that a handler is provided that implements methods for
256
+ processing the various element types in a JSON document. Comparing
257
+ `Oj.saj_parse` to `Oj::Parser.new(:saj)` with a all callback methods
258
+ implemented handler gives the following raw results:
259
+
260
+ ```
261
+ System time (secs) rate (ops/sec)
262
+ -------------- ----------- --------------
263
+ Oj::Parser.saj 0.783 63836.986
264
+ Oj::Saj.all 1.182 42315.397
265
+ ```
266
+
267
+ The `Oj::Parser.new(:saj)` is **1.51** times faster.
268
+
269
+ ### Parse to Ruby primitives
270
+
271
+ Parsing to Ruby primitives and Array and Hash is possible with most
272
+ parsers including the JSON gem parser. The raw results comparing
273
+ `Oj.strict_load`, `Oj::Parser.new(:usual)`, and the JSON gem are:
274
+
275
+ ```
276
+ System time (secs) rate (ops/sec)
277
+ ---------------- ----------- --------------
278
+ Oj::Parser.usual 0.452 110544.876
279
+ Oj::strict_load 0.699 71490.257
280
+ JSON::Ext 1.009 49555.094
281
+ ```
282
+
283
+ The `Oj::Parser.new(:saj)` is **1.55** times faster than `Oj.load` and
284
+ **2.23** times faster than the JSON gem.
285
+
286
+ ### Object
287
+
288
+ Oj supports two modes for Object serialization and
289
+ deserialization. Comparing to the JSON gem compatible mode
290
+ `Oj.compat_load`, `Oj::Parser.new(:usual)`, and the JSON gem yields
291
+ the following raw results:
292
+
293
+ ```
294
+ System time (secs) rate (ops/sec)
295
+ ---------------- ----------- --------------
296
+ Oj::Parser.usual 0.071 703502.033
297
+ Oj::compat_load 0.225 221762.927
298
+ JSON::Ext 0.401 124638.859
299
+ ```
300
+
301
+ The `Oj::Parser.new(:saj)` is **3.17** times faster than
302
+ `Oj.compat_load` and **5.64** times faster than the JSON gem.
303
+
304
+ ## Summary
305
+
306
+ With a performance boost of from 1.5x to over 3x over the `Oj.load`
307
+ parser the new `Oj::Parser` is a big win in the performance arena. The
308
+ isolation of options is another feature that should make life easier
309
+ for developers.