simdjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,544 @@
1
+ #ifndef SIMDJSON_PARSEDJSON_H
2
+ #define SIMDJSON_PARSEDJSON_H
3
+
4
+ #include "simdjson/common_defs.h"
5
+ #include "simdjson/jsonformatutils.h"
6
+ #include "simdjson/portability.h"
7
+ #include "simdjson/simdjson.h"
8
+ #include <cinttypes>
9
+ #include <cmath>
10
+ #include <cstring>
11
+ #include <iomanip>
12
+ #include <iostream>
13
+ #include <limits>
14
+
15
+ #define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF
16
+
17
+ #define DEFAULT_MAX_DEPTH \
18
+ 1024 // a JSON document with a depth exceeding 1024 is probably de facto
19
+ // invalid
20
+
21
+ namespace simdjson {
22
+ /************
23
+ * The JSON is parsed to a tape, see the accompanying tape.md file
24
+ * for documentation.
25
+ ***********/
26
+ class ParsedJson {
27
+ public:
28
+ // create a ParsedJson container with zero capacity, call allocate_capacity to
29
+ // allocate memory
30
+ ParsedJson();
31
+ ~ParsedJson();
32
+ ParsedJson(ParsedJson &&p);
33
+
34
+ // if needed, allocate memory so that the object is able to process JSON
35
+ // documents having up to len bytes and max_depth "depth"
36
+ WARN_UNUSED
37
+ bool allocate_capacity(size_t len, size_t max_depth = DEFAULT_MAX_DEPTH);
38
+
39
+ // returns true if the document parsed was valid
40
+ bool is_valid() const;
41
+
42
+ // return an error code corresponding to the last parsing attempt, see
43
+ // simdjson.h will return simdjson::UNITIALIZED if no parsing was attempted
44
+ int get_error_code() const;
45
+
46
+ // return the string equivalent of "get_error_code"
47
+ std::string get_error_message() const;
48
+
49
+ // deallocate memory and set capacity to zero, called automatically by the
50
+ // destructor
51
+ void deallocate();
52
+
53
+ // this should be called when parsing (right before writing the tapes)
54
+ void init();
55
+
56
+ // print the json to stdout (should be valid)
57
+ // return false if the tape is likely wrong (e.g., you did not parse a valid
58
+ // JSON).
59
+ WARN_UNUSED
60
+ bool print_json(std::ostream &os);
61
+ WARN_UNUSED
62
+ bool dump_raw_tape(std::ostream &os);
63
+
64
+ // all nodes are stored on the tape using a 64-bit word.
65
+ //
66
+ // strings, double and ints are stored as
67
+ // a 64-bit word with a pointer to the actual value
68
+ //
69
+ //
70
+ //
71
+ // for objects or arrays, store [ or { at the beginning and } and ] at the
72
+ // end. For the openings ([ or {), we annotate them with a reference to the
73
+ // location on the tape of the end, and for then closings (} and ]), we
74
+ // annotate them with a reference to the location of the opening
75
+ //
76
+ //
77
+
78
+ // this should be considered a private function
79
+ really_inline void write_tape(uint64_t val, uint8_t c) {
80
+ tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
81
+ }
82
+
83
+ really_inline void write_tape_s64(int64_t i) {
84
+ write_tape(0, 'l');
85
+ tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
86
+ }
87
+
88
+ really_inline void write_tape_double(double d) {
89
+ write_tape(0, 'd');
90
+ static_assert(sizeof(d) == sizeof(tape[current_loc]), "mismatch size");
91
+ memcpy(&tape[current_loc++], &d, sizeof(double));
92
+ // tape[current_loc++] = *((uint64_t *)&d);
93
+ }
94
+
95
+ really_inline uint32_t get_current_loc() { return current_loc; }
96
+
97
+ really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
98
+ tape[saved_loc] |= val;
99
+ }
100
+
101
+ class InvalidJSON : public std::exception {
102
+ const char *what() const throw() { return "JSON document is invalid"; }
103
+ };
104
+
105
+ class Iterator {
106
+ // might throw InvalidJSON if ParsedJson is invalid
107
+ public:
108
+ explicit Iterator(ParsedJson &pj_);
109
+ ~Iterator();
110
+
111
+ Iterator(const Iterator &o) noexcept;
112
+
113
+ Iterator(Iterator &&o) noexcept;
114
+
115
+ inline bool is_ok() const;
116
+
117
+ // useful for debuging purposes
118
+ inline size_t get_tape_location() const;
119
+
120
+ // useful for debuging purposes
121
+ inline size_t get_tape_length() const;
122
+
123
+ // returns the current depth (start at 1 with 0 reserved for the fictitious
124
+ // root node)
125
+ inline size_t get_depth() const;
126
+
127
+ // A scope is a series of nodes at the same depth, typically it is either an
128
+ // object ({) or an array ([). The root node has type 'r'.
129
+ inline uint8_t get_scope_type() const;
130
+
131
+ // move forward in document order
132
+ inline bool move_forward();
133
+
134
+ // retrieve the character code of what we're looking at:
135
+ // [{"sltfn are the possibilities
136
+ inline uint8_t get_type() const {
137
+ return current_type; // short functions should be inlined!
138
+ }
139
+
140
+ // get the int64_t value at this node; valid only if we're at "l"
141
+ inline int64_t get_integer() const {
142
+ if (location + 1 >= tape_length) {
143
+ return 0; // default value in case of error
144
+ }
145
+ return static_cast<int64_t>(pj.tape[location + 1]);
146
+ }
147
+
148
+ // get the string value at this node (NULL ended); valid only if we're at "
149
+ // note that tabs, and line endings are escaped in the returned value (see
150
+ // print_with_escapes) return value is valid UTF-8 It may contain NULL chars
151
+ // within the string: get_string_length determines the true string length.
152
+ inline const char *get_string() const {
153
+ return reinterpret_cast<const char *>(
154
+ pj.string_buf + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t));
155
+ }
156
+
157
+ // return the length of the string in bytes
158
+ inline uint32_t get_string_length() const {
159
+ uint32_t answer;
160
+ memcpy(&answer,
161
+ reinterpret_cast<const char *>(pj.string_buf +
162
+ (current_val & JSON_VALUE_MASK)),
163
+ sizeof(uint32_t));
164
+ return answer;
165
+ }
166
+
167
+ // get the double value at this node; valid only if
168
+ // we're at "d"
169
+ inline double get_double() const {
170
+ if (location + 1 >= tape_length) {
171
+ return std::numeric_limits<double>::quiet_NaN(); // default value in
172
+ // case of error
173
+ }
174
+ double answer;
175
+ memcpy(&answer, &pj.tape[location + 1], sizeof(answer));
176
+ return answer;
177
+ }
178
+
179
+ inline bool is_object_or_array() const { return is_object() || is_array(); }
180
+
181
+ inline bool is_object() const { return get_type() == '{'; }
182
+
183
+ inline bool is_array() const { return get_type() == '['; }
184
+
185
+ inline bool is_string() const { return get_type() == '"'; }
186
+
187
+ inline bool is_integer() const { return get_type() == 'l'; }
188
+
189
+ inline bool is_double() const { return get_type() == 'd'; }
190
+
191
+ inline bool is_true() const { return get_type() == 't'; }
192
+
193
+ inline bool is_false() const { return get_type() == 'f'; }
194
+
195
+ inline bool is_null() const { return get_type() == 'n'; }
196
+
197
+ static bool is_object_or_array(uint8_t type) {
198
+ return ((type == '[') || (type == '{'));
199
+ }
200
+
201
+ // when at {, go one level deep, looking for a given key
202
+ // if successful, we are left pointing at the value,
203
+ // if not, we are still pointing at the object ({)
204
+ // (in case of repeated keys, this only finds the first one).
205
+ // We seek the key using C's strcmp so if your JSON strings contain
206
+ // NULL chars, this would trigger a false positive: if you expect that
207
+ // to be the case, take extra precautions.
208
+ inline bool move_to_key(const char *key);
209
+ // when at {, go one level deep, looking for a given key
210
+ // if successful, we are left pointing at the value,
211
+ // if not, we are still pointing at the object ({)
212
+ // (in case of repeated keys, this only finds the first one).
213
+ // The string we search for can contain NULL values.
214
+ inline bool move_to_key(const char *key, uint32_t length);
215
+
216
+ // when at a key location within an object, this moves to the accompanying
217
+ // value (located next to it). this is equivalent but much faster than
218
+ // calling "next()".
219
+ inline void move_to_value();
220
+
221
+ // when at [, go one level deep, and advance to the given index.
222
+ // if successful, we are left pointing at the value,
223
+ // if not, we are still pointing at the array ([)
224
+ inline bool move_to_index(uint32_t index);
225
+
226
+ // Moves the iterator to the value correspoding to the json pointer.
227
+ // Always search from the root of the document.
228
+ // if successful, we are left pointing at the value,
229
+ // if not, we are still pointing the same value we were pointing before the
230
+ // call. The json pointer follows the rfc6901 standard's syntax:
231
+ // https://tools.ietf.org/html/rfc6901 However, the standard says "If a
232
+ // referenced member name is not unique in an object, the member that is
233
+ // referenced is undefined, and evaluation fails". Here we just return the
234
+ // first corresponding value. The length parameter is the length of the
235
+ // jsonpointer string ('pointer').
236
+ bool move_to(const char *pointer, uint32_t length);
237
+
238
+ // Moves the iterator to the value correspoding to the json pointer.
239
+ // Always search from the root of the document.
240
+ // if successful, we are left pointing at the value,
241
+ // if not, we are still pointing the same value we were pointing before the
242
+ // call. The json pointer implementation follows the rfc6901 standard's
243
+ // syntax: https://tools.ietf.org/html/rfc6901 However, the standard says
244
+ // "If a referenced member name is not unique in an object, the member that
245
+ // is referenced is undefined, and evaluation fails". Here we just return
246
+ // the first corresponding value.
247
+ inline bool move_to(const std::string &pointer) {
248
+ return move_to(pointer.c_str(), pointer.length());
249
+ }
250
+
251
+ private:
252
+ // Almost the same as move_to(), except it searchs from the current
253
+ // position. The pointer's syntax is identical, though that case is not
254
+ // handled by the rfc6901 standard. The '/' is still required at the
255
+ // beginning. However, contrary to move_to(), the URI Fragment Identifier
256
+ // Representation is not supported here. Also, in case of failure, we are
257
+ // left pointing at the closest value it could reach. For these reasons it
258
+ // is private. It exists because it is used by move_to().
259
+ bool relative_move_to(const char *pointer, uint32_t length);
260
+
261
+ public:
262
+ // throughout return true if we can do the navigation, false
263
+ // otherwise
264
+
265
+ // Withing a given scope (series of nodes at the same depth within either an
266
+ // array or an object), we move forward.
267
+ // Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, {
268
+ // and [. At the object ({) or at the array ([), you can issue a "down" to
269
+ // visit their content. valid if we're not at the end of a scope (returns
270
+ // true).
271
+ inline bool next();
272
+
273
+ // Withing a given scope (series of nodes at the same depth within either an
274
+ // array or an object), we move backward.
275
+ // Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true
276
+ // when starting at the end of the scope. At the object ({) or at the array
277
+ // ([), you can issue a "down" to visit their content.
278
+ inline bool prev();
279
+
280
+ // Moves back to either the containing array or object (type { or [) from
281
+ // within a contained scope.
282
+ // Valid unless we are at the first level of the document
283
+ inline bool up();
284
+
285
+ // Valid if we're at a [ or { and it starts a non-empty scope; moves us to
286
+ // start of that deeper scope if it not empty. Thus, given [true, null,
287
+ // {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node.
288
+ inline bool down();
289
+
290
+ // move us to the start of our current scope,
291
+ // a scope is a series of nodes at the same level
292
+ inline void to_start_scope();
293
+
294
+ inline void rewind() {
295
+ while (up())
296
+ ;
297
+ }
298
+
299
+ // void to_end_scope(); // move us to
300
+ // the start of our current scope; always succeeds
301
+
302
+ // print the thing we're currently pointing at
303
+ bool print(std::ostream &os, bool escape_strings = true) const;
304
+ typedef struct {
305
+ size_t start_of_scope;
306
+ uint8_t scope_type;
307
+ } scopeindex_t;
308
+
309
+ private:
310
+ Iterator &operator=(const Iterator &other) = delete;
311
+
312
+ ParsedJson &pj;
313
+ size_t depth;
314
+ size_t location; // our current location on a tape
315
+ size_t tape_length;
316
+ uint8_t current_type;
317
+ uint64_t current_val;
318
+ scopeindex_t *depth_index;
319
+ };
320
+
321
+ size_t byte_capacity{0}; // indicates how many bits are meant to be supported
322
+
323
+ size_t depth_capacity{0}; // how deep we can go
324
+ size_t tape_capacity{0};
325
+ size_t string_capacity{0};
326
+ uint32_t current_loc{0};
327
+ uint32_t n_structural_indexes{0};
328
+
329
+ uint32_t *structural_indexes;
330
+
331
+ uint64_t *tape;
332
+ uint32_t *containing_scope_offset;
333
+ #ifdef SIMDJSON_USE_COMPUTED_GOTO
334
+ void **ret_address;
335
+ #else
336
+ char *ret_address;
337
+ #endif
338
+
339
+ uint8_t *string_buf; // should be at least byte_capacity
340
+ uint8_t *current_string_buf_loc;
341
+ bool valid{false};
342
+ int error_code{simdjson::UNITIALIZED};
343
+
344
+ private:
345
+ // we don't want the default constructor to be called
346
+ ParsedJson(const ParsedJson &p) =
347
+ delete; // we don't want the default constructor to be called
348
+ // we don't want the assignment to be called
349
+ ParsedJson &operator=(const ParsedJson &o) = delete;
350
+ };
351
+
352
+ // dump bits low to high
353
+ inline void dumpbits_always(uint64_t v, const std::string &msg) {
354
+ for (uint32_t i = 0; i < 64; i++) {
355
+ std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
356
+ }
357
+ std::cout << " " << msg.c_str() << "\n";
358
+ }
359
+
360
+ inline void dumpbits32_always(uint32_t v, const std::string &msg) {
361
+ for (uint32_t i = 0; i < 32; i++) {
362
+ std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
363
+ }
364
+ std::cout << " " << msg.c_str() << "\n";
365
+ }
366
+
367
+ WARN_UNUSED
368
+ bool ParsedJson::Iterator::is_ok() const { return location < tape_length; }
369
+
370
+ // useful for debuging purposes
371
+ size_t ParsedJson::Iterator::get_tape_location() const { return location; }
372
+
373
+ // useful for debuging purposes
374
+ size_t ParsedJson::Iterator::get_tape_length() const { return tape_length; }
375
+
376
+ // returns the current depth (start at 1 with 0 reserved for the fictitious root
377
+ // node)
378
+ size_t ParsedJson::Iterator::get_depth() const { return depth; }
379
+
380
+ // A scope is a series of nodes at the same depth, typically it is either an
381
+ // object ({) or an array ([). The root node has type 'r'.
382
+ uint8_t ParsedJson::Iterator::get_scope_type() const {
383
+ return depth_index[depth].scope_type;
384
+ }
385
+
386
+ bool ParsedJson::Iterator::move_forward() {
387
+ if (location + 1 >= tape_length) {
388
+ return false; // we are at the end!
389
+ }
390
+
391
+ if ((current_type == '[') || (current_type == '{')) {
392
+ // We are entering a new scope
393
+ depth++;
394
+ depth_index[depth].start_of_scope = location;
395
+ depth_index[depth].scope_type = current_type;
396
+ } else if ((current_type == ']') || (current_type == '}')) {
397
+ // Leaving a scope.
398
+ depth--;
399
+ } else if ((current_type == 'd') || (current_type == 'l')) {
400
+ // d and l types use 2 locations on the tape, not just one.
401
+ location += 1;
402
+ }
403
+
404
+ location += 1;
405
+ current_val = pj.tape[location];
406
+ current_type = (current_val >> 56);
407
+ return true;
408
+ }
409
+
410
+ void ParsedJson::Iterator::move_to_value() {
411
+ // assume that we are on a key, so move by 1.
412
+ location += 1;
413
+ current_val = pj.tape[location];
414
+ current_type = (current_val >> 56);
415
+ }
416
+
417
+ bool ParsedJson::Iterator::move_to_key(const char *key) {
418
+ if (down()) {
419
+ do {
420
+ assert(is_string());
421
+ bool right_key =
422
+ (strcmp(get_string(), key) == 0); // null chars would fool this
423
+ move_to_value();
424
+ if (right_key) {
425
+ return true;
426
+ }
427
+ } while (next());
428
+ assert(up()); // not found
429
+ }
430
+ return false;
431
+ }
432
+
433
+ bool ParsedJson::Iterator::move_to_key(const char *key, uint32_t length) {
434
+ if (down()) {
435
+ do {
436
+ assert(is_string());
437
+ bool right_key = ((get_string_length() == length) &&
438
+ (memcmp(get_string(), key, length) == 0));
439
+ move_to_value();
440
+ if (right_key) {
441
+ return true;
442
+ }
443
+ } while (next());
444
+ assert(up()); // not found
445
+ }
446
+ return false;
447
+ }
448
+
449
+ bool ParsedJson::Iterator::move_to_index(uint32_t index) {
450
+ assert(is_array());
451
+ if (down()) {
452
+ uint32_t i = 0;
453
+ for (; i < index; i++) {
454
+ if (!next()) {
455
+ break;
456
+ }
457
+ }
458
+ if (i == index) {
459
+ return true;
460
+ }
461
+ assert(up());
462
+ }
463
+ return false;
464
+ }
465
+
466
+ bool ParsedJson::Iterator::prev() {
467
+ if (location - 1 < depth_index[depth].start_of_scope) {
468
+ return false;
469
+ }
470
+ location -= 1;
471
+ current_val = pj.tape[location];
472
+ current_type = (current_val >> 56);
473
+ if ((current_type == ']') || (current_type == '}')) {
474
+ // we need to jump
475
+ size_t new_location = (current_val & JSON_VALUE_MASK);
476
+ if (new_location < depth_index[depth].start_of_scope) {
477
+ return false; // shoud never happen
478
+ }
479
+ location = new_location;
480
+ current_val = pj.tape[location];
481
+ current_type = (current_val >> 56);
482
+ }
483
+ return true;
484
+ }
485
+
486
+ bool ParsedJson::Iterator::up() {
487
+ if (depth == 1) {
488
+ return false; // don't allow moving back to root
489
+ }
490
+ to_start_scope();
491
+ // next we just move to the previous value
492
+ depth--;
493
+ location -= 1;
494
+ current_val = pj.tape[location];
495
+ current_type = (current_val >> 56);
496
+ return true;
497
+ }
498
+
499
+ bool ParsedJson::Iterator::down() {
500
+ if (location + 1 >= tape_length) {
501
+ return false;
502
+ }
503
+ if ((current_type == '[') || (current_type == '{')) {
504
+ size_t npos = (current_val & JSON_VALUE_MASK);
505
+ if (npos == location + 2) {
506
+ return false; // we have an empty scope
507
+ }
508
+ depth++;
509
+ location = location + 1;
510
+ depth_index[depth].start_of_scope = location;
511
+ depth_index[depth].scope_type = current_type;
512
+ current_val = pj.tape[location];
513
+ current_type = (current_val >> 56);
514
+ return true;
515
+ }
516
+ return false;
517
+ }
518
+
519
+ void ParsedJson::Iterator::to_start_scope() {
520
+ location = depth_index[depth].start_of_scope;
521
+ current_val = pj.tape[location];
522
+ current_type = (current_val >> 56);
523
+ }
524
+
525
+ bool ParsedJson::Iterator::next() {
526
+ size_t npos;
527
+ if ((current_type == '[') || (current_type == '{')) {
528
+ // we need to jump
529
+ npos = (current_val & JSON_VALUE_MASK);
530
+ } else {
531
+ npos = location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
532
+ }
533
+ uint64_t next_val = pj.tape[npos];
534
+ uint8_t next_type = (next_val >> 56);
535
+ if ((next_type == ']') || (next_type == '}')) {
536
+ return false; // we reached the end of the scope
537
+ }
538
+ location = npos;
539
+ current_val = next_val;
540
+ current_type = next_type;
541
+ return true;
542
+ }
543
+ } // namespace simdjson
544
+ #endif