simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,544 @@
|
|
1
|
+
#ifndef SIMDJSON_PARSEDJSON_H
|
2
|
+
#define SIMDJSON_PARSEDJSON_H
|
3
|
+
|
4
|
+
#include "simdjson/common_defs.h"
|
5
|
+
#include "simdjson/jsonformatutils.h"
|
6
|
+
#include "simdjson/portability.h"
|
7
|
+
#include "simdjson/simdjson.h"
|
8
|
+
#include <cinttypes>
|
9
|
+
#include <cmath>
|
10
|
+
#include <cstring>
|
11
|
+
#include <iomanip>
|
12
|
+
#include <iostream>
|
13
|
+
#include <limits>
|
14
|
+
|
15
|
+
#define JSON_VALUE_MASK 0xFFFFFFFFFFFFFF
|
16
|
+
|
17
|
+
#define DEFAULT_MAX_DEPTH \
|
18
|
+
1024 // a JSON document with a depth exceeding 1024 is probably de facto
|
19
|
+
// invalid
|
20
|
+
|
21
|
+
namespace simdjson {
|
22
|
+
/************
|
23
|
+
* The JSON is parsed to a tape, see the accompanying tape.md file
|
24
|
+
* for documentation.
|
25
|
+
***********/
|
26
|
+
class ParsedJson {
|
27
|
+
public:
|
28
|
+
// create a ParsedJson container with zero capacity, call allocate_capacity to
|
29
|
+
// allocate memory
|
30
|
+
ParsedJson();
|
31
|
+
~ParsedJson();
|
32
|
+
ParsedJson(ParsedJson &&p);
|
33
|
+
|
34
|
+
// if needed, allocate memory so that the object is able to process JSON
|
35
|
+
// documents having up to len bytes and max_depth "depth"
|
36
|
+
WARN_UNUSED
|
37
|
+
bool allocate_capacity(size_t len, size_t max_depth = DEFAULT_MAX_DEPTH);
|
38
|
+
|
39
|
+
// returns true if the document parsed was valid
|
40
|
+
bool is_valid() const;
|
41
|
+
|
42
|
+
// return an error code corresponding to the last parsing attempt, see
|
43
|
+
// simdjson.h will return simdjson::UNITIALIZED if no parsing was attempted
|
44
|
+
int get_error_code() const;
|
45
|
+
|
46
|
+
// return the string equivalent of "get_error_code"
|
47
|
+
std::string get_error_message() const;
|
48
|
+
|
49
|
+
// deallocate memory and set capacity to zero, called automatically by the
|
50
|
+
// destructor
|
51
|
+
void deallocate();
|
52
|
+
|
53
|
+
// this should be called when parsing (right before writing the tapes)
|
54
|
+
void init();
|
55
|
+
|
56
|
+
// print the json to stdout (should be valid)
|
57
|
+
// return false if the tape is likely wrong (e.g., you did not parse a valid
|
58
|
+
// JSON).
|
59
|
+
WARN_UNUSED
|
60
|
+
bool print_json(std::ostream &os);
|
61
|
+
WARN_UNUSED
|
62
|
+
bool dump_raw_tape(std::ostream &os);
|
63
|
+
|
64
|
+
// all nodes are stored on the tape using a 64-bit word.
|
65
|
+
//
|
66
|
+
// strings, double and ints are stored as
|
67
|
+
// a 64-bit word with a pointer to the actual value
|
68
|
+
//
|
69
|
+
//
|
70
|
+
//
|
71
|
+
// for objects or arrays, store [ or { at the beginning and } and ] at the
|
72
|
+
// end. For the openings ([ or {), we annotate them with a reference to the
|
73
|
+
// location on the tape of the end, and for then closings (} and ]), we
|
74
|
+
// annotate them with a reference to the location of the opening
|
75
|
+
//
|
76
|
+
//
|
77
|
+
|
78
|
+
// this should be considered a private function
|
79
|
+
really_inline void write_tape(uint64_t val, uint8_t c) {
|
80
|
+
tape[current_loc++] = val | ((static_cast<uint64_t>(c)) << 56);
|
81
|
+
}
|
82
|
+
|
83
|
+
really_inline void write_tape_s64(int64_t i) {
|
84
|
+
write_tape(0, 'l');
|
85
|
+
tape[current_loc++] = *(reinterpret_cast<uint64_t *>(&i));
|
86
|
+
}
|
87
|
+
|
88
|
+
really_inline void write_tape_double(double d) {
|
89
|
+
write_tape(0, 'd');
|
90
|
+
static_assert(sizeof(d) == sizeof(tape[current_loc]), "mismatch size");
|
91
|
+
memcpy(&tape[current_loc++], &d, sizeof(double));
|
92
|
+
// tape[current_loc++] = *((uint64_t *)&d);
|
93
|
+
}
|
94
|
+
|
95
|
+
really_inline uint32_t get_current_loc() { return current_loc; }
|
96
|
+
|
97
|
+
really_inline void annotate_previous_loc(uint32_t saved_loc, uint64_t val) {
|
98
|
+
tape[saved_loc] |= val;
|
99
|
+
}
|
100
|
+
|
101
|
+
class InvalidJSON : public std::exception {
|
102
|
+
const char *what() const throw() { return "JSON document is invalid"; }
|
103
|
+
};
|
104
|
+
|
105
|
+
class Iterator {
|
106
|
+
// might throw InvalidJSON if ParsedJson is invalid
|
107
|
+
public:
|
108
|
+
explicit Iterator(ParsedJson &pj_);
|
109
|
+
~Iterator();
|
110
|
+
|
111
|
+
Iterator(const Iterator &o) noexcept;
|
112
|
+
|
113
|
+
Iterator(Iterator &&o) noexcept;
|
114
|
+
|
115
|
+
inline bool is_ok() const;
|
116
|
+
|
117
|
+
// useful for debuging purposes
|
118
|
+
inline size_t get_tape_location() const;
|
119
|
+
|
120
|
+
// useful for debuging purposes
|
121
|
+
inline size_t get_tape_length() const;
|
122
|
+
|
123
|
+
// returns the current depth (start at 1 with 0 reserved for the fictitious
|
124
|
+
// root node)
|
125
|
+
inline size_t get_depth() const;
|
126
|
+
|
127
|
+
// A scope is a series of nodes at the same depth, typically it is either an
|
128
|
+
// object ({) or an array ([). The root node has type 'r'.
|
129
|
+
inline uint8_t get_scope_type() const;
|
130
|
+
|
131
|
+
// move forward in document order
|
132
|
+
inline bool move_forward();
|
133
|
+
|
134
|
+
// retrieve the character code of what we're looking at:
|
135
|
+
// [{"sltfn are the possibilities
|
136
|
+
inline uint8_t get_type() const {
|
137
|
+
return current_type; // short functions should be inlined!
|
138
|
+
}
|
139
|
+
|
140
|
+
// get the int64_t value at this node; valid only if we're at "l"
|
141
|
+
inline int64_t get_integer() const {
|
142
|
+
if (location + 1 >= tape_length) {
|
143
|
+
return 0; // default value in case of error
|
144
|
+
}
|
145
|
+
return static_cast<int64_t>(pj.tape[location + 1]);
|
146
|
+
}
|
147
|
+
|
148
|
+
// get the string value at this node (NULL ended); valid only if we're at "
|
149
|
+
// note that tabs, and line endings are escaped in the returned value (see
|
150
|
+
// print_with_escapes) return value is valid UTF-8 It may contain NULL chars
|
151
|
+
// within the string: get_string_length determines the true string length.
|
152
|
+
inline const char *get_string() const {
|
153
|
+
return reinterpret_cast<const char *>(
|
154
|
+
pj.string_buf + (current_val & JSON_VALUE_MASK) + sizeof(uint32_t));
|
155
|
+
}
|
156
|
+
|
157
|
+
// return the length of the string in bytes
|
158
|
+
inline uint32_t get_string_length() const {
|
159
|
+
uint32_t answer;
|
160
|
+
memcpy(&answer,
|
161
|
+
reinterpret_cast<const char *>(pj.string_buf +
|
162
|
+
(current_val & JSON_VALUE_MASK)),
|
163
|
+
sizeof(uint32_t));
|
164
|
+
return answer;
|
165
|
+
}
|
166
|
+
|
167
|
+
// get the double value at this node; valid only if
|
168
|
+
// we're at "d"
|
169
|
+
inline double get_double() const {
|
170
|
+
if (location + 1 >= tape_length) {
|
171
|
+
return std::numeric_limits<double>::quiet_NaN(); // default value in
|
172
|
+
// case of error
|
173
|
+
}
|
174
|
+
double answer;
|
175
|
+
memcpy(&answer, &pj.tape[location + 1], sizeof(answer));
|
176
|
+
return answer;
|
177
|
+
}
|
178
|
+
|
179
|
+
inline bool is_object_or_array() const { return is_object() || is_array(); }
|
180
|
+
|
181
|
+
inline bool is_object() const { return get_type() == '{'; }
|
182
|
+
|
183
|
+
inline bool is_array() const { return get_type() == '['; }
|
184
|
+
|
185
|
+
inline bool is_string() const { return get_type() == '"'; }
|
186
|
+
|
187
|
+
inline bool is_integer() const { return get_type() == 'l'; }
|
188
|
+
|
189
|
+
inline bool is_double() const { return get_type() == 'd'; }
|
190
|
+
|
191
|
+
inline bool is_true() const { return get_type() == 't'; }
|
192
|
+
|
193
|
+
inline bool is_false() const { return get_type() == 'f'; }
|
194
|
+
|
195
|
+
inline bool is_null() const { return get_type() == 'n'; }
|
196
|
+
|
197
|
+
static bool is_object_or_array(uint8_t type) {
|
198
|
+
return ((type == '[') || (type == '{'));
|
199
|
+
}
|
200
|
+
|
201
|
+
// when at {, go one level deep, looking for a given key
|
202
|
+
// if successful, we are left pointing at the value,
|
203
|
+
// if not, we are still pointing at the object ({)
|
204
|
+
// (in case of repeated keys, this only finds the first one).
|
205
|
+
// We seek the key using C's strcmp so if your JSON strings contain
|
206
|
+
// NULL chars, this would trigger a false positive: if you expect that
|
207
|
+
// to be the case, take extra precautions.
|
208
|
+
inline bool move_to_key(const char *key);
|
209
|
+
// when at {, go one level deep, looking for a given key
|
210
|
+
// if successful, we are left pointing at the value,
|
211
|
+
// if not, we are still pointing at the object ({)
|
212
|
+
// (in case of repeated keys, this only finds the first one).
|
213
|
+
// The string we search for can contain NULL values.
|
214
|
+
inline bool move_to_key(const char *key, uint32_t length);
|
215
|
+
|
216
|
+
// when at a key location within an object, this moves to the accompanying
|
217
|
+
// value (located next to it). this is equivalent but much faster than
|
218
|
+
// calling "next()".
|
219
|
+
inline void move_to_value();
|
220
|
+
|
221
|
+
// when at [, go one level deep, and advance to the given index.
|
222
|
+
// if successful, we are left pointing at the value,
|
223
|
+
// if not, we are still pointing at the array ([)
|
224
|
+
inline bool move_to_index(uint32_t index);
|
225
|
+
|
226
|
+
// Moves the iterator to the value correspoding to the json pointer.
|
227
|
+
// Always search from the root of the document.
|
228
|
+
// if successful, we are left pointing at the value,
|
229
|
+
// if not, we are still pointing the same value we were pointing before the
|
230
|
+
// call. The json pointer follows the rfc6901 standard's syntax:
|
231
|
+
// https://tools.ietf.org/html/rfc6901 However, the standard says "If a
|
232
|
+
// referenced member name is not unique in an object, the member that is
|
233
|
+
// referenced is undefined, and evaluation fails". Here we just return the
|
234
|
+
// first corresponding value. The length parameter is the length of the
|
235
|
+
// jsonpointer string ('pointer').
|
236
|
+
bool move_to(const char *pointer, uint32_t length);
|
237
|
+
|
238
|
+
// Moves the iterator to the value correspoding to the json pointer.
|
239
|
+
// Always search from the root of the document.
|
240
|
+
// if successful, we are left pointing at the value,
|
241
|
+
// if not, we are still pointing the same value we were pointing before the
|
242
|
+
// call. The json pointer implementation follows the rfc6901 standard's
|
243
|
+
// syntax: https://tools.ietf.org/html/rfc6901 However, the standard says
|
244
|
+
// "If a referenced member name is not unique in an object, the member that
|
245
|
+
// is referenced is undefined, and evaluation fails". Here we just return
|
246
|
+
// the first corresponding value.
|
247
|
+
inline bool move_to(const std::string &pointer) {
|
248
|
+
return move_to(pointer.c_str(), pointer.length());
|
249
|
+
}
|
250
|
+
|
251
|
+
private:
|
252
|
+
// Almost the same as move_to(), except it searchs from the current
|
253
|
+
// position. The pointer's syntax is identical, though that case is not
|
254
|
+
// handled by the rfc6901 standard. The '/' is still required at the
|
255
|
+
// beginning. However, contrary to move_to(), the URI Fragment Identifier
|
256
|
+
// Representation is not supported here. Also, in case of failure, we are
|
257
|
+
// left pointing at the closest value it could reach. For these reasons it
|
258
|
+
// is private. It exists because it is used by move_to().
|
259
|
+
bool relative_move_to(const char *pointer, uint32_t length);
|
260
|
+
|
261
|
+
public:
|
262
|
+
// throughout return true if we can do the navigation, false
|
263
|
+
// otherwise
|
264
|
+
|
265
|
+
// Withing a given scope (series of nodes at the same depth within either an
|
266
|
+
// array or an object), we move forward.
|
267
|
+
// Thus, given [true, null, {"a":1}, [1,2]], we would visit true, null, {
|
268
|
+
// and [. At the object ({) or at the array ([), you can issue a "down" to
|
269
|
+
// visit their content. valid if we're not at the end of a scope (returns
|
270
|
+
// true).
|
271
|
+
inline bool next();
|
272
|
+
|
273
|
+
// Withing a given scope (series of nodes at the same depth within either an
|
274
|
+
// array or an object), we move backward.
|
275
|
+
// Thus, given [true, null, {"a":1}, [1,2]], we would visit ], }, null, true
|
276
|
+
// when starting at the end of the scope. At the object ({) or at the array
|
277
|
+
// ([), you can issue a "down" to visit their content.
|
278
|
+
inline bool prev();
|
279
|
+
|
280
|
+
// Moves back to either the containing array or object (type { or [) from
|
281
|
+
// within a contained scope.
|
282
|
+
// Valid unless we are at the first level of the document
|
283
|
+
inline bool up();
|
284
|
+
|
285
|
+
// Valid if we're at a [ or { and it starts a non-empty scope; moves us to
|
286
|
+
// start of that deeper scope if it not empty. Thus, given [true, null,
|
287
|
+
// {"a":1}, [1,2]], if we are at the { node, we would move to the "a" node.
|
288
|
+
inline bool down();
|
289
|
+
|
290
|
+
// move us to the start of our current scope,
|
291
|
+
// a scope is a series of nodes at the same level
|
292
|
+
inline void to_start_scope();
|
293
|
+
|
294
|
+
inline void rewind() {
|
295
|
+
while (up())
|
296
|
+
;
|
297
|
+
}
|
298
|
+
|
299
|
+
// void to_end_scope(); // move us to
|
300
|
+
// the start of our current scope; always succeeds
|
301
|
+
|
302
|
+
// print the thing we're currently pointing at
|
303
|
+
bool print(std::ostream &os, bool escape_strings = true) const;
|
304
|
+
typedef struct {
|
305
|
+
size_t start_of_scope;
|
306
|
+
uint8_t scope_type;
|
307
|
+
} scopeindex_t;
|
308
|
+
|
309
|
+
private:
|
310
|
+
Iterator &operator=(const Iterator &other) = delete;
|
311
|
+
|
312
|
+
ParsedJson &pj;
|
313
|
+
size_t depth;
|
314
|
+
size_t location; // our current location on a tape
|
315
|
+
size_t tape_length;
|
316
|
+
uint8_t current_type;
|
317
|
+
uint64_t current_val;
|
318
|
+
scopeindex_t *depth_index;
|
319
|
+
};
|
320
|
+
|
321
|
+
size_t byte_capacity{0}; // indicates how many bits are meant to be supported
|
322
|
+
|
323
|
+
size_t depth_capacity{0}; // how deep we can go
|
324
|
+
size_t tape_capacity{0};
|
325
|
+
size_t string_capacity{0};
|
326
|
+
uint32_t current_loc{0};
|
327
|
+
uint32_t n_structural_indexes{0};
|
328
|
+
|
329
|
+
uint32_t *structural_indexes;
|
330
|
+
|
331
|
+
uint64_t *tape;
|
332
|
+
uint32_t *containing_scope_offset;
|
333
|
+
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
334
|
+
void **ret_address;
|
335
|
+
#else
|
336
|
+
char *ret_address;
|
337
|
+
#endif
|
338
|
+
|
339
|
+
uint8_t *string_buf; // should be at least byte_capacity
|
340
|
+
uint8_t *current_string_buf_loc;
|
341
|
+
bool valid{false};
|
342
|
+
int error_code{simdjson::UNITIALIZED};
|
343
|
+
|
344
|
+
private:
|
345
|
+
// we don't want the default constructor to be called
|
346
|
+
ParsedJson(const ParsedJson &p) =
|
347
|
+
delete; // we don't want the default constructor to be called
|
348
|
+
// we don't want the assignment to be called
|
349
|
+
ParsedJson &operator=(const ParsedJson &o) = delete;
|
350
|
+
};
|
351
|
+
|
352
|
+
// dump bits low to high
|
353
|
+
inline void dumpbits_always(uint64_t v, const std::string &msg) {
|
354
|
+
for (uint32_t i = 0; i < 64; i++) {
|
355
|
+
std::cout << (((v >> static_cast<uint64_t>(i)) & 0x1ULL) ? "1" : "_");
|
356
|
+
}
|
357
|
+
std::cout << " " << msg.c_str() << "\n";
|
358
|
+
}
|
359
|
+
|
360
|
+
inline void dumpbits32_always(uint32_t v, const std::string &msg) {
|
361
|
+
for (uint32_t i = 0; i < 32; i++) {
|
362
|
+
std::cout << (((v >> i) & 0x1ULL) ? "1" : "_");
|
363
|
+
}
|
364
|
+
std::cout << " " << msg.c_str() << "\n";
|
365
|
+
}
|
366
|
+
|
367
|
+
WARN_UNUSED
|
368
|
+
bool ParsedJson::Iterator::is_ok() const { return location < tape_length; }
|
369
|
+
|
370
|
+
// useful for debuging purposes
|
371
|
+
size_t ParsedJson::Iterator::get_tape_location() const { return location; }
|
372
|
+
|
373
|
+
// useful for debuging purposes
|
374
|
+
size_t ParsedJson::Iterator::get_tape_length() const { return tape_length; }
|
375
|
+
|
376
|
+
// returns the current depth (start at 1 with 0 reserved for the fictitious root
|
377
|
+
// node)
|
378
|
+
size_t ParsedJson::Iterator::get_depth() const { return depth; }
|
379
|
+
|
380
|
+
// A scope is a series of nodes at the same depth, typically it is either an
|
381
|
+
// object ({) or an array ([). The root node has type 'r'.
|
382
|
+
uint8_t ParsedJson::Iterator::get_scope_type() const {
|
383
|
+
return depth_index[depth].scope_type;
|
384
|
+
}
|
385
|
+
|
386
|
+
bool ParsedJson::Iterator::move_forward() {
|
387
|
+
if (location + 1 >= tape_length) {
|
388
|
+
return false; // we are at the end!
|
389
|
+
}
|
390
|
+
|
391
|
+
if ((current_type == '[') || (current_type == '{')) {
|
392
|
+
// We are entering a new scope
|
393
|
+
depth++;
|
394
|
+
depth_index[depth].start_of_scope = location;
|
395
|
+
depth_index[depth].scope_type = current_type;
|
396
|
+
} else if ((current_type == ']') || (current_type == '}')) {
|
397
|
+
// Leaving a scope.
|
398
|
+
depth--;
|
399
|
+
} else if ((current_type == 'd') || (current_type == 'l')) {
|
400
|
+
// d and l types use 2 locations on the tape, not just one.
|
401
|
+
location += 1;
|
402
|
+
}
|
403
|
+
|
404
|
+
location += 1;
|
405
|
+
current_val = pj.tape[location];
|
406
|
+
current_type = (current_val >> 56);
|
407
|
+
return true;
|
408
|
+
}
|
409
|
+
|
410
|
+
void ParsedJson::Iterator::move_to_value() {
|
411
|
+
// assume that we are on a key, so move by 1.
|
412
|
+
location += 1;
|
413
|
+
current_val = pj.tape[location];
|
414
|
+
current_type = (current_val >> 56);
|
415
|
+
}
|
416
|
+
|
417
|
+
bool ParsedJson::Iterator::move_to_key(const char *key) {
|
418
|
+
if (down()) {
|
419
|
+
do {
|
420
|
+
assert(is_string());
|
421
|
+
bool right_key =
|
422
|
+
(strcmp(get_string(), key) == 0); // null chars would fool this
|
423
|
+
move_to_value();
|
424
|
+
if (right_key) {
|
425
|
+
return true;
|
426
|
+
}
|
427
|
+
} while (next());
|
428
|
+
assert(up()); // not found
|
429
|
+
}
|
430
|
+
return false;
|
431
|
+
}
|
432
|
+
|
433
|
+
bool ParsedJson::Iterator::move_to_key(const char *key, uint32_t length) {
|
434
|
+
if (down()) {
|
435
|
+
do {
|
436
|
+
assert(is_string());
|
437
|
+
bool right_key = ((get_string_length() == length) &&
|
438
|
+
(memcmp(get_string(), key, length) == 0));
|
439
|
+
move_to_value();
|
440
|
+
if (right_key) {
|
441
|
+
return true;
|
442
|
+
}
|
443
|
+
} while (next());
|
444
|
+
assert(up()); // not found
|
445
|
+
}
|
446
|
+
return false;
|
447
|
+
}
|
448
|
+
|
449
|
+
bool ParsedJson::Iterator::move_to_index(uint32_t index) {
|
450
|
+
assert(is_array());
|
451
|
+
if (down()) {
|
452
|
+
uint32_t i = 0;
|
453
|
+
for (; i < index; i++) {
|
454
|
+
if (!next()) {
|
455
|
+
break;
|
456
|
+
}
|
457
|
+
}
|
458
|
+
if (i == index) {
|
459
|
+
return true;
|
460
|
+
}
|
461
|
+
assert(up());
|
462
|
+
}
|
463
|
+
return false;
|
464
|
+
}
|
465
|
+
|
466
|
+
bool ParsedJson::Iterator::prev() {
|
467
|
+
if (location - 1 < depth_index[depth].start_of_scope) {
|
468
|
+
return false;
|
469
|
+
}
|
470
|
+
location -= 1;
|
471
|
+
current_val = pj.tape[location];
|
472
|
+
current_type = (current_val >> 56);
|
473
|
+
if ((current_type == ']') || (current_type == '}')) {
|
474
|
+
// we need to jump
|
475
|
+
size_t new_location = (current_val & JSON_VALUE_MASK);
|
476
|
+
if (new_location < depth_index[depth].start_of_scope) {
|
477
|
+
return false; // shoud never happen
|
478
|
+
}
|
479
|
+
location = new_location;
|
480
|
+
current_val = pj.tape[location];
|
481
|
+
current_type = (current_val >> 56);
|
482
|
+
}
|
483
|
+
return true;
|
484
|
+
}
|
485
|
+
|
486
|
+
bool ParsedJson::Iterator::up() {
|
487
|
+
if (depth == 1) {
|
488
|
+
return false; // don't allow moving back to root
|
489
|
+
}
|
490
|
+
to_start_scope();
|
491
|
+
// next we just move to the previous value
|
492
|
+
depth--;
|
493
|
+
location -= 1;
|
494
|
+
current_val = pj.tape[location];
|
495
|
+
current_type = (current_val >> 56);
|
496
|
+
return true;
|
497
|
+
}
|
498
|
+
|
499
|
+
bool ParsedJson::Iterator::down() {
|
500
|
+
if (location + 1 >= tape_length) {
|
501
|
+
return false;
|
502
|
+
}
|
503
|
+
if ((current_type == '[') || (current_type == '{')) {
|
504
|
+
size_t npos = (current_val & JSON_VALUE_MASK);
|
505
|
+
if (npos == location + 2) {
|
506
|
+
return false; // we have an empty scope
|
507
|
+
}
|
508
|
+
depth++;
|
509
|
+
location = location + 1;
|
510
|
+
depth_index[depth].start_of_scope = location;
|
511
|
+
depth_index[depth].scope_type = current_type;
|
512
|
+
current_val = pj.tape[location];
|
513
|
+
current_type = (current_val >> 56);
|
514
|
+
return true;
|
515
|
+
}
|
516
|
+
return false;
|
517
|
+
}
|
518
|
+
|
519
|
+
void ParsedJson::Iterator::to_start_scope() {
|
520
|
+
location = depth_index[depth].start_of_scope;
|
521
|
+
current_val = pj.tape[location];
|
522
|
+
current_type = (current_val >> 56);
|
523
|
+
}
|
524
|
+
|
525
|
+
bool ParsedJson::Iterator::next() {
|
526
|
+
size_t npos;
|
527
|
+
if ((current_type == '[') || (current_type == '{')) {
|
528
|
+
// we need to jump
|
529
|
+
npos = (current_val & JSON_VALUE_MASK);
|
530
|
+
} else {
|
531
|
+
npos = location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
|
532
|
+
}
|
533
|
+
uint64_t next_val = pj.tape[npos];
|
534
|
+
uint8_t next_type = (next_val >> 56);
|
535
|
+
if ((next_type == ']') || (next_type == '}')) {
|
536
|
+
return false; // we reached the end of the scope
|
537
|
+
}
|
538
|
+
location = npos;
|
539
|
+
current_val = next_val;
|
540
|
+
current_type = next_type;
|
541
|
+
return true;
|
542
|
+
}
|
543
|
+
} // namespace simdjson
|
544
|
+
#endif
|