simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,323 @@
|
|
1
|
+
#include "simdjson/parsedjson.h"
|
2
|
+
|
3
|
+
namespace simdjson {
|
4
|
+
ParsedJson::ParsedJson()
|
5
|
+
: structural_indexes(nullptr), tape(nullptr),
|
6
|
+
containing_scope_offset(nullptr), ret_address(nullptr),
|
7
|
+
string_buf(nullptr), current_string_buf_loc(nullptr) {}
|
8
|
+
|
9
|
+
ParsedJson::~ParsedJson() { deallocate(); }
|
10
|
+
|
11
|
+
ParsedJson::ParsedJson(ParsedJson &&p)
|
12
|
+
: byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity),
|
13
|
+
tape_capacity(p.tape_capacity), string_capacity(p.string_capacity),
|
14
|
+
current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes),
|
15
|
+
structural_indexes(p.structural_indexes), tape(p.tape),
|
16
|
+
containing_scope_offset(p.containing_scope_offset),
|
17
|
+
ret_address(p.ret_address), string_buf(p.string_buf),
|
18
|
+
current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) {
|
19
|
+
p.structural_indexes = nullptr;
|
20
|
+
p.tape = nullptr;
|
21
|
+
p.containing_scope_offset = nullptr;
|
22
|
+
p.ret_address = nullptr;
|
23
|
+
p.string_buf = nullptr;
|
24
|
+
p.current_string_buf_loc = nullptr;
|
25
|
+
}
|
26
|
+
|
27
|
+
WARN_UNUSED
|
28
|
+
bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
|
29
|
+
if (max_depth <= 0) {
|
30
|
+
max_depth = 1; // don't let the user allocate nothing
|
31
|
+
}
|
32
|
+
if (len <= 0) {
|
33
|
+
len = 64; // allocating 0 bytes is wasteful.
|
34
|
+
}
|
35
|
+
if (len > SIMDJSON_MAXSIZE_BYTES) {
|
36
|
+
return false;
|
37
|
+
}
|
38
|
+
if ((len <= byte_capacity) && (depth_capacity < max_depth)) {
|
39
|
+
return true;
|
40
|
+
}
|
41
|
+
deallocate();
|
42
|
+
valid = false;
|
43
|
+
byte_capacity = 0; // will only set it to len after allocations are a success
|
44
|
+
n_structural_indexes = 0;
|
45
|
+
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
|
46
|
+
structural_indexes = new (std::nothrow) uint32_t[max_structures];
|
47
|
+
// a pathological input like "[[[[..." would generate len tape elements, so
|
48
|
+
// need a capacity of len + 1
|
49
|
+
size_t local_tape_capacity = ROUNDUP_N(len + 1, 64);
|
50
|
+
// a document with only zero-length strings... could have len/3 string
|
51
|
+
// and we would need len/3 * 5 bytes on the string buffer
|
52
|
+
size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64);
|
53
|
+
string_buf = new (std::nothrow) uint8_t[local_string_capacity];
|
54
|
+
tape = new (std::nothrow) uint64_t[local_tape_capacity];
|
55
|
+
containing_scope_offset = new (std::nothrow) uint32_t[max_depth];
|
56
|
+
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
57
|
+
ret_address = new (std::nothrow) void *[max_depth];
|
58
|
+
#else
|
59
|
+
ret_address = new (std::nothrow) char[max_depth];
|
60
|
+
#endif
|
61
|
+
if ((string_buf == nullptr) || (tape == nullptr) ||
|
62
|
+
(containing_scope_offset == nullptr) || (ret_address == nullptr) ||
|
63
|
+
(structural_indexes == nullptr)) {
|
64
|
+
std::cerr << "Could not allocate memory" << std::endl;
|
65
|
+
delete[] ret_address;
|
66
|
+
delete[] containing_scope_offset;
|
67
|
+
delete[] tape;
|
68
|
+
delete[] string_buf;
|
69
|
+
delete[] structural_indexes;
|
70
|
+
|
71
|
+
return false;
|
72
|
+
}
|
73
|
+
/*
|
74
|
+
// We do not need to initialize this content for parsing, though we could
|
75
|
+
// need to initialize it for safety.
|
76
|
+
memset(string_buf, 0 , local_string_capacity);
|
77
|
+
memset(structural_indexes, 0, max_structures * sizeof(uint32_t));
|
78
|
+
memset(tape, 0, local_tape_capacity * sizeof(uint64_t));
|
79
|
+
*/
|
80
|
+
byte_capacity = len;
|
81
|
+
depth_capacity = max_depth;
|
82
|
+
tape_capacity = local_tape_capacity;
|
83
|
+
string_capacity = local_string_capacity;
|
84
|
+
return true;
|
85
|
+
}
|
86
|
+
|
87
|
+
bool ParsedJson::is_valid() const { return valid; }
|
88
|
+
|
89
|
+
int ParsedJson::get_error_code() const { return error_code; }
|
90
|
+
|
91
|
+
std::string ParsedJson::get_error_message() const {
|
92
|
+
return error_message(error_code);
|
93
|
+
}
|
94
|
+
|
95
|
+
void ParsedJson::deallocate() {
|
96
|
+
byte_capacity = 0;
|
97
|
+
depth_capacity = 0;
|
98
|
+
tape_capacity = 0;
|
99
|
+
string_capacity = 0;
|
100
|
+
delete[] ret_address;
|
101
|
+
delete[] containing_scope_offset;
|
102
|
+
delete[] tape;
|
103
|
+
delete[] string_buf;
|
104
|
+
delete[] structural_indexes;
|
105
|
+
valid = false;
|
106
|
+
}
|
107
|
+
|
108
|
+
void ParsedJson::init() {
|
109
|
+
current_string_buf_loc = string_buf;
|
110
|
+
current_loc = 0;
|
111
|
+
valid = false;
|
112
|
+
}
|
113
|
+
|
114
|
+
WARN_UNUSED
|
115
|
+
bool ParsedJson::print_json(std::ostream &os) {
|
116
|
+
if (!valid) {
|
117
|
+
return false;
|
118
|
+
}
|
119
|
+
uint32_t string_length;
|
120
|
+
size_t tape_idx = 0;
|
121
|
+
uint64_t tape_val = tape[tape_idx];
|
122
|
+
uint8_t type = (tape_val >> 56);
|
123
|
+
size_t how_many = 0;
|
124
|
+
if (type == 'r') {
|
125
|
+
how_many = tape_val & JSON_VALUE_MASK;
|
126
|
+
} else {
|
127
|
+
fprintf(stderr, "Error: no starting root node?");
|
128
|
+
return false;
|
129
|
+
}
|
130
|
+
if (how_many > tape_capacity) {
|
131
|
+
fprintf(
|
132
|
+
stderr,
|
133
|
+
"We may be exceeding the tape capacity. Is this a valid document?\n");
|
134
|
+
return false;
|
135
|
+
}
|
136
|
+
tape_idx++;
|
137
|
+
bool *in_object = new bool[depth_capacity];
|
138
|
+
auto *in_object_idx = new size_t[depth_capacity];
|
139
|
+
int depth = 1; // only root at level 0
|
140
|
+
in_object_idx[depth] = 0;
|
141
|
+
in_object[depth] = false;
|
142
|
+
for (; tape_idx < how_many; tape_idx++) {
|
143
|
+
tape_val = tape[tape_idx];
|
144
|
+
uint64_t payload = tape_val & JSON_VALUE_MASK;
|
145
|
+
type = (tape_val >> 56);
|
146
|
+
if (!in_object[depth]) {
|
147
|
+
if ((in_object_idx[depth] > 0) && (type != ']')) {
|
148
|
+
os << ",";
|
149
|
+
}
|
150
|
+
in_object_idx[depth]++;
|
151
|
+
} else { // if (in_object) {
|
152
|
+
if ((in_object_idx[depth] > 0) && ((in_object_idx[depth] & 1) == 0) &&
|
153
|
+
(type != '}')) {
|
154
|
+
os << ",";
|
155
|
+
}
|
156
|
+
if (((in_object_idx[depth] & 1) == 1)) {
|
157
|
+
os << ":";
|
158
|
+
}
|
159
|
+
in_object_idx[depth]++;
|
160
|
+
}
|
161
|
+
switch (type) {
|
162
|
+
case '"': // we have a string
|
163
|
+
os << '"';
|
164
|
+
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
165
|
+
print_with_escapes(
|
166
|
+
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
167
|
+
string_length);
|
168
|
+
os << '"';
|
169
|
+
break;
|
170
|
+
case 'l': // we have a long int
|
171
|
+
if (tape_idx + 1 >= how_many) {
|
172
|
+
delete[] in_object;
|
173
|
+
delete[] in_object_idx;
|
174
|
+
return false;
|
175
|
+
}
|
176
|
+
os << static_cast<int64_t>(tape[++tape_idx]);
|
177
|
+
break;
|
178
|
+
case 'd': // we have a double
|
179
|
+
if (tape_idx + 1 >= how_many) {
|
180
|
+
delete[] in_object;
|
181
|
+
delete[] in_object_idx;
|
182
|
+
return false;
|
183
|
+
}
|
184
|
+
double answer;
|
185
|
+
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
186
|
+
os << answer;
|
187
|
+
break;
|
188
|
+
case 'n': // we have a null
|
189
|
+
os << "null";
|
190
|
+
break;
|
191
|
+
case 't': // we have a true
|
192
|
+
os << "true";
|
193
|
+
break;
|
194
|
+
case 'f': // we have a false
|
195
|
+
os << "false";
|
196
|
+
break;
|
197
|
+
case '{': // we have an object
|
198
|
+
os << '{';
|
199
|
+
depth++;
|
200
|
+
in_object[depth] = true;
|
201
|
+
in_object_idx[depth] = 0;
|
202
|
+
break;
|
203
|
+
case '}': // we end an object
|
204
|
+
depth--;
|
205
|
+
os << '}';
|
206
|
+
break;
|
207
|
+
case '[': // we start an array
|
208
|
+
os << '[';
|
209
|
+
depth++;
|
210
|
+
in_object[depth] = false;
|
211
|
+
in_object_idx[depth] = 0;
|
212
|
+
break;
|
213
|
+
case ']': // we end an array
|
214
|
+
depth--;
|
215
|
+
os << ']';
|
216
|
+
break;
|
217
|
+
case 'r': // we start and end with the root node
|
218
|
+
fprintf(stderr, "should we be hitting the root node?\n");
|
219
|
+
delete[] in_object;
|
220
|
+
delete[] in_object_idx;
|
221
|
+
return false;
|
222
|
+
default:
|
223
|
+
fprintf(stderr, "bug %c\n", type);
|
224
|
+
delete[] in_object;
|
225
|
+
delete[] in_object_idx;
|
226
|
+
return false;
|
227
|
+
}
|
228
|
+
}
|
229
|
+
delete[] in_object;
|
230
|
+
delete[] in_object_idx;
|
231
|
+
return true;
|
232
|
+
}
|
233
|
+
|
234
|
+
WARN_UNUSED
|
235
|
+
bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
236
|
+
if (!valid) {
|
237
|
+
return false;
|
238
|
+
}
|
239
|
+
uint32_t string_length;
|
240
|
+
size_t tape_idx = 0;
|
241
|
+
uint64_t tape_val = tape[tape_idx];
|
242
|
+
uint8_t type = (tape_val >> 56);
|
243
|
+
os << tape_idx << " : " << type;
|
244
|
+
tape_idx++;
|
245
|
+
size_t how_many = 0;
|
246
|
+
if (type == 'r') {
|
247
|
+
how_many = tape_val & JSON_VALUE_MASK;
|
248
|
+
} else {
|
249
|
+
fprintf(stderr, "Error: no starting root node?");
|
250
|
+
return false;
|
251
|
+
}
|
252
|
+
os << "\t// pointing to " << how_many << " (right after last node)\n";
|
253
|
+
uint64_t payload;
|
254
|
+
for (; tape_idx < how_many; tape_idx++) {
|
255
|
+
os << tape_idx << " : ";
|
256
|
+
tape_val = tape[tape_idx];
|
257
|
+
payload = tape_val & JSON_VALUE_MASK;
|
258
|
+
type = (tape_val >> 56);
|
259
|
+
switch (type) {
|
260
|
+
case '"': // we have a string
|
261
|
+
os << "string \"";
|
262
|
+
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
263
|
+
print_with_escapes(
|
264
|
+
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
265
|
+
string_length);
|
266
|
+
os << '"';
|
267
|
+
os << '\n';
|
268
|
+
break;
|
269
|
+
case 'l': // we have a long int
|
270
|
+
if (tape_idx + 1 >= how_many) {
|
271
|
+
return false;
|
272
|
+
}
|
273
|
+
os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
|
274
|
+
break;
|
275
|
+
case 'd': // we have a double
|
276
|
+
os << "float ";
|
277
|
+
if (tape_idx + 1 >= how_many) {
|
278
|
+
return false;
|
279
|
+
}
|
280
|
+
double answer;
|
281
|
+
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
282
|
+
os << answer << '\n';
|
283
|
+
break;
|
284
|
+
case 'n': // we have a null
|
285
|
+
os << "null\n";
|
286
|
+
break;
|
287
|
+
case 't': // we have a true
|
288
|
+
os << "true\n";
|
289
|
+
break;
|
290
|
+
case 'f': // we have a false
|
291
|
+
os << "false\n";
|
292
|
+
break;
|
293
|
+
case '{': // we have an object
|
294
|
+
os << "{\t// pointing to next tape location " << payload
|
295
|
+
<< " (first node after the scope) \n";
|
296
|
+
break;
|
297
|
+
case '}': // we end an object
|
298
|
+
os << "}\t// pointing to previous tape location " << payload
|
299
|
+
<< " (start of the scope) \n";
|
300
|
+
break;
|
301
|
+
case '[': // we start an array
|
302
|
+
os << "[\t// pointing to next tape location " << payload
|
303
|
+
<< " (first node after the scope) \n";
|
304
|
+
break;
|
305
|
+
case ']': // we end an array
|
306
|
+
os << "]\t// pointing to previous tape location " << payload
|
307
|
+
<< " (start of the scope) \n";
|
308
|
+
break;
|
309
|
+
case 'r': // we start and end with the root node
|
310
|
+
printf("end of root\n");
|
311
|
+
return false;
|
312
|
+
default:
|
313
|
+
return false;
|
314
|
+
}
|
315
|
+
}
|
316
|
+
tape_val = tape[tape_idx];
|
317
|
+
payload = tape_val & JSON_VALUE_MASK;
|
318
|
+
type = (tape_val >> 56);
|
319
|
+
os << tape_idx << " : " << type << "\t// pointing to " << payload
|
320
|
+
<< " (start root)\n";
|
321
|
+
return true;
|
322
|
+
}
|
323
|
+
} // namespace simdjson
|
@@ -0,0 +1,272 @@
|
|
1
|
+
#include "simdjson/common_defs.h"
|
2
|
+
#include "simdjson/parsedjson.h"
|
3
|
+
#include <iterator>
|
4
|
+
|
5
|
+
namespace simdjson {
|
6
|
+
ParsedJson::Iterator::Iterator(ParsedJson &pj_)
|
7
|
+
: pj(pj_), depth(0), location(0), tape_length(0), depth_index(nullptr) {
|
8
|
+
if (!pj.is_valid()) {
|
9
|
+
throw InvalidJSON();
|
10
|
+
}
|
11
|
+
// we overallocate by "1" to silence a warning in Visual Studio
|
12
|
+
depth_index = new scopeindex_t[pj.depth_capacity + 1];
|
13
|
+
// memory allocation would throw
|
14
|
+
// if(depth_index == nullptr) {
|
15
|
+
// return;
|
16
|
+
//}
|
17
|
+
depth_index[0].start_of_scope = location;
|
18
|
+
current_val = pj.tape[location++];
|
19
|
+
current_type = (current_val >> 56);
|
20
|
+
depth_index[0].scope_type = current_type;
|
21
|
+
if (current_type == 'r') {
|
22
|
+
tape_length = current_val & JSON_VALUE_MASK;
|
23
|
+
if (location < tape_length) {
|
24
|
+
// If we make it here, then depth_capacity must >=2, but the compiler
|
25
|
+
// may not know this.
|
26
|
+
current_val = pj.tape[location];
|
27
|
+
current_type = (current_val >> 56);
|
28
|
+
depth++;
|
29
|
+
depth_index[depth].start_of_scope = location;
|
30
|
+
depth_index[depth].scope_type = current_type;
|
31
|
+
}
|
32
|
+
} else {
|
33
|
+
// should never happen
|
34
|
+
throw InvalidJSON();
|
35
|
+
}
|
36
|
+
}
|
37
|
+
|
38
|
+
ParsedJson::Iterator::~Iterator() { delete[] depth_index; }
|
39
|
+
|
40
|
+
ParsedJson::Iterator::Iterator(const Iterator &o) noexcept
|
41
|
+
: pj(o.pj), depth(o.depth), location(o.location), tape_length(0),
|
42
|
+
current_type(o.current_type), current_val(o.current_val),
|
43
|
+
depth_index(nullptr) {
|
44
|
+
depth_index = new scopeindex_t[pj.depth_capacity];
|
45
|
+
// allocation might throw
|
46
|
+
memcpy(depth_index, o.depth_index,
|
47
|
+
pj.depth_capacity * sizeof(depth_index[0]));
|
48
|
+
tape_length = o.tape_length;
|
49
|
+
}
|
50
|
+
|
51
|
+
ParsedJson::Iterator::Iterator(Iterator &&o) noexcept
|
52
|
+
: pj(o.pj), depth(o.depth), location(o.location),
|
53
|
+
tape_length(o.tape_length), current_type(o.current_type),
|
54
|
+
current_val(o.current_val), depth_index(o.depth_index) {
|
55
|
+
o.depth_index = nullptr; // we take ownership
|
56
|
+
}
|
57
|
+
|
58
|
+
bool ParsedJson::Iterator::print(std::ostream &os, bool escape_strings) const {
|
59
|
+
if (!is_ok()) {
|
60
|
+
return false;
|
61
|
+
}
|
62
|
+
switch (current_type) {
|
63
|
+
case '"': // we have a string
|
64
|
+
os << '"';
|
65
|
+
if (escape_strings) {
|
66
|
+
print_with_escapes(get_string(), os, get_string_length());
|
67
|
+
} else {
|
68
|
+
// was: os << get_string();, but given that we can include null chars, we
|
69
|
+
// have to do something crazier:
|
70
|
+
std::copy(get_string(), get_string() + get_string_length(),
|
71
|
+
std::ostream_iterator<char>(os));
|
72
|
+
}
|
73
|
+
os << '"';
|
74
|
+
break;
|
75
|
+
case 'l': // we have a long int
|
76
|
+
os << get_integer();
|
77
|
+
break;
|
78
|
+
case 'd':
|
79
|
+
os << get_double();
|
80
|
+
break;
|
81
|
+
case 'n': // we have a null
|
82
|
+
os << "null";
|
83
|
+
break;
|
84
|
+
case 't': // we have a true
|
85
|
+
os << "true";
|
86
|
+
break;
|
87
|
+
case 'f': // we have a false
|
88
|
+
os << "false";
|
89
|
+
break;
|
90
|
+
case '{': // we have an object
|
91
|
+
case '}': // we end an object
|
92
|
+
case '[': // we start an array
|
93
|
+
case ']': // we end an array
|
94
|
+
os << static_cast<char>(current_type);
|
95
|
+
break;
|
96
|
+
default:
|
97
|
+
return false;
|
98
|
+
}
|
99
|
+
return true;
|
100
|
+
}
|
101
|
+
|
102
|
+
bool ParsedJson::Iterator::move_to(const char *pointer, uint32_t length) {
|
103
|
+
char *new_pointer = nullptr;
|
104
|
+
if (pointer[0] == '#') {
|
105
|
+
// Converting fragment representation to string representation
|
106
|
+
new_pointer = new char[length];
|
107
|
+
uint32_t new_length = 0;
|
108
|
+
for (uint32_t i = 1; i < length; i++) {
|
109
|
+
if (pointer[i] == '%' && pointer[i + 1] == 'x') {
|
110
|
+
try {
|
111
|
+
int fragment =
|
112
|
+
std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16);
|
113
|
+
if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
|
114
|
+
// escaping the character
|
115
|
+
new_pointer[new_length] = '\\';
|
116
|
+
new_length++;
|
117
|
+
}
|
118
|
+
new_pointer[new_length] = fragment;
|
119
|
+
i += 3;
|
120
|
+
} catch (std::invalid_argument &) {
|
121
|
+
delete[] new_pointer;
|
122
|
+
return false; // the fragment is invalid
|
123
|
+
}
|
124
|
+
} else {
|
125
|
+
new_pointer[new_length] = pointer[i];
|
126
|
+
}
|
127
|
+
new_length++;
|
128
|
+
}
|
129
|
+
length = new_length;
|
130
|
+
pointer = new_pointer;
|
131
|
+
}
|
132
|
+
|
133
|
+
// saving the current state
|
134
|
+
size_t depth_s = depth;
|
135
|
+
size_t location_s = location;
|
136
|
+
uint8_t current_type_s = current_type;
|
137
|
+
uint64_t current_val_s = current_val;
|
138
|
+
scopeindex_t *depth_index_s = depth_index;
|
139
|
+
|
140
|
+
rewind(); // The json pointer is used from the root of the document.
|
141
|
+
|
142
|
+
bool found = relative_move_to(pointer, length);
|
143
|
+
delete[] new_pointer;
|
144
|
+
|
145
|
+
if (!found) {
|
146
|
+
// since the pointer has found nothing, we get back to the original
|
147
|
+
// position.
|
148
|
+
depth = depth_s;
|
149
|
+
location = location_s;
|
150
|
+
current_type = current_type_s;
|
151
|
+
current_val = current_val_s;
|
152
|
+
depth_index = depth_index_s;
|
153
|
+
}
|
154
|
+
|
155
|
+
return found;
|
156
|
+
}
|
157
|
+
|
158
|
+
bool ParsedJson::Iterator::relative_move_to(const char *pointer,
|
159
|
+
uint32_t length) {
|
160
|
+
if (length == 0) {
|
161
|
+
// returns the whole document
|
162
|
+
return true;
|
163
|
+
}
|
164
|
+
|
165
|
+
if (pointer[0] != '/') {
|
166
|
+
// '/' must be the first character
|
167
|
+
return false;
|
168
|
+
}
|
169
|
+
|
170
|
+
// finding the key in an object or the index in an array
|
171
|
+
std::string key_or_index;
|
172
|
+
uint32_t offset = 1;
|
173
|
+
|
174
|
+
// checking for the "-" case
|
175
|
+
if (is_array() && pointer[1] == '-') {
|
176
|
+
if (length != 2) {
|
177
|
+
// the pointer must be exactly "/-"
|
178
|
+
// there can't be anything more after '-' as an index
|
179
|
+
return false;
|
180
|
+
}
|
181
|
+
key_or_index = '-';
|
182
|
+
offset = length; // will skip the loop coming right after
|
183
|
+
}
|
184
|
+
|
185
|
+
// We either transform the first reference token to a valid json key
|
186
|
+
// or we make sure it is a valid index in an array.
|
187
|
+
for (; offset < length; offset++) {
|
188
|
+
if (pointer[offset] == '/') {
|
189
|
+
// beginning of the next key or index
|
190
|
+
break;
|
191
|
+
}
|
192
|
+
if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
|
193
|
+
// the index of an array must be an integer
|
194
|
+
// we also make sure std::stoi won't discard whitespaces later
|
195
|
+
return false;
|
196
|
+
}
|
197
|
+
if (pointer[offset] == '~') {
|
198
|
+
// "~1" represents "/"
|
199
|
+
if (pointer[offset + 1] == '1') {
|
200
|
+
key_or_index += '/';
|
201
|
+
offset++;
|
202
|
+
continue;
|
203
|
+
}
|
204
|
+
// "~0" represents "~"
|
205
|
+
if (pointer[offset + 1] == '0') {
|
206
|
+
key_or_index += '~';
|
207
|
+
offset++;
|
208
|
+
continue;
|
209
|
+
}
|
210
|
+
}
|
211
|
+
if (pointer[offset] == '\\') {
|
212
|
+
if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
|
213
|
+
(pointer[offset + 1] <= 0x1F)) {
|
214
|
+
key_or_index += pointer[offset + 1];
|
215
|
+
offset++;
|
216
|
+
continue;
|
217
|
+
}
|
218
|
+
return false; // invalid escaped character
|
219
|
+
}
|
220
|
+
if (pointer[offset] == '\"') {
|
221
|
+
// unescaped quote character. this is an invalid case.
|
222
|
+
// lets do nothing and assume most pointers will be valid.
|
223
|
+
// it won't find any corresponding json key anyway.
|
224
|
+
// return false;
|
225
|
+
}
|
226
|
+
key_or_index += pointer[offset];
|
227
|
+
}
|
228
|
+
|
229
|
+
bool found = false;
|
230
|
+
if (is_object()) {
|
231
|
+
if (move_to_key(key_or_index.c_str(), key_or_index.length())) {
|
232
|
+
found = relative_move_to(pointer + offset, length - offset);
|
233
|
+
}
|
234
|
+
} else if (is_array()) {
|
235
|
+
if (key_or_index == "-") { // handling "-" case first
|
236
|
+
if (down()) {
|
237
|
+
while (next())
|
238
|
+
; // moving to the end of the array
|
239
|
+
// moving to the nonexistent value right after...
|
240
|
+
size_t npos;
|
241
|
+
if ((current_type == '[') || (current_type == '{')) {
|
242
|
+
// we need to jump
|
243
|
+
npos = (current_val & JSON_VALUE_MASK);
|
244
|
+
} else {
|
245
|
+
npos =
|
246
|
+
location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
|
247
|
+
}
|
248
|
+
location = npos;
|
249
|
+
current_val = pj.tape[npos];
|
250
|
+
current_type = (current_val >> 56);
|
251
|
+
return true; // how could it fail ?
|
252
|
+
}
|
253
|
+
} else { // regular numeric index
|
254
|
+
// The index can't have a leading '0'
|
255
|
+
if (key_or_index[0] == '0' && key_or_index.length() > 1) {
|
256
|
+
return false;
|
257
|
+
}
|
258
|
+
// it cannot be empty
|
259
|
+
if (key_or_index.length() == 0) {
|
260
|
+
return false;
|
261
|
+
}
|
262
|
+
// we already checked the index contains only valid digits
|
263
|
+
uint32_t index = std::stoi(key_or_index);
|
264
|
+
if (move_to_index(index)) {
|
265
|
+
found = relative_move_to(pointer + offset, length - offset);
|
266
|
+
}
|
267
|
+
}
|
268
|
+
}
|
269
|
+
|
270
|
+
return found;
|
271
|
+
}
|
272
|
+
} // namespace simdjson
|
@@ -0,0 +1,30 @@
|
|
1
|
+
#include "simdjson/simdjson.h"
|
2
|
+
#include <map>
|
3
|
+
|
4
|
+
namespace simdjson {
|
5
|
+
const std::map<int, const std::string> error_strings = {
|
6
|
+
{SUCCESS, "No errors"},
|
7
|
+
{CAPACITY, "This ParsedJson can't support a document that big"},
|
8
|
+
{MEMALLOC, "Error allocating memory, we're most likely out of memory"},
|
9
|
+
{TAPE_ERROR, "Something went wrong while writing to the tape"},
|
10
|
+
{STRING_ERROR, "Problem while parsing a string"},
|
11
|
+
{T_ATOM_ERROR,
|
12
|
+
"Problem while parsing an atom starting with the letter 't'"},
|
13
|
+
{F_ATOM_ERROR,
|
14
|
+
"Problem while parsing an atom starting with the letter 'f'"},
|
15
|
+
{N_ATOM_ERROR,
|
16
|
+
"Problem while parsing an atom starting with the letter 'n'"},
|
17
|
+
{NUMBER_ERROR, "Problem while parsing a number"},
|
18
|
+
{UTF8_ERROR, "The input is not valid UTF-8"},
|
19
|
+
{UNITIALIZED, "Unitialized"},
|
20
|
+
{EMPTY, "Empty"},
|
21
|
+
{UNESCAPED_CHARS, "Within strings, some characters must be escapted, we "
|
22
|
+
"found unescapted characters"},
|
23
|
+
{UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as "
|
24
|
+
"you may have found a bug in simdjson"},
|
25
|
+
};
|
26
|
+
|
27
|
+
const std::string &error_message(const int error_code) {
|
28
|
+
return error_strings.at(error_code);
|
29
|
+
}
|
30
|
+
} // namespace simdjson
|
@@ -0,0 +1,41 @@
|
|
1
|
+
#include "simdjson/portability.h"
|
2
|
+
|
3
|
+
#ifdef IS_X86_64
|
4
|
+
|
5
|
+
#include "simdjson/stage1_find_marks_haswell.h"
|
6
|
+
#include "simdjson/stage1_find_marks_westmere.h"
|
7
|
+
TARGET_HASWELL
|
8
|
+
namespace simdjson {
|
9
|
+
template <>
|
10
|
+
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len,
|
11
|
+
ParsedJson &pj) {
|
12
|
+
FIND_STRUCTURAL_BITS(Architecture::HASWELL, buf, len, pj,
|
13
|
+
simdjson::haswell::flatten_bits);
|
14
|
+
}
|
15
|
+
} // namespace simdjson
|
16
|
+
UNTARGET_REGION
|
17
|
+
|
18
|
+
TARGET_WESTMERE
|
19
|
+
namespace simdjson {
|
20
|
+
template <>
|
21
|
+
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
|
22
|
+
ParsedJson &pj) {
|
23
|
+
FIND_STRUCTURAL_BITS(Architecture::WESTMERE, buf, len, pj,
|
24
|
+
simdjson::flatten_bits);
|
25
|
+
}
|
26
|
+
} // namespace simdjson
|
27
|
+
UNTARGET_REGION
|
28
|
+
|
29
|
+
#endif
|
30
|
+
|
31
|
+
#ifdef IS_ARM64
|
32
|
+
#include "simdjson/stage1_find_marks_arm64.h"
|
33
|
+
namespace simdjson {
|
34
|
+
template <>
|
35
|
+
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len,
|
36
|
+
ParsedJson &pj) {
|
37
|
+
FIND_STRUCTURAL_BITS(Architecture::ARM64, buf, len, pj,
|
38
|
+
simdjson::flatten_bits);
|
39
|
+
}
|
40
|
+
} // namespace simdjson
|
41
|
+
#endif
|