simdjson 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,567 @@
|
|
1
|
+
#include "simdjson/stage2_build_tape.h"
|
2
|
+
|
3
|
+
namespace simdjson {
|
4
|
+
|
5
|
+
// this macro reads the next structural character, updating idx, i and c.
|
6
|
+
#define UPDATE_CHAR() \
|
7
|
+
{ \
|
8
|
+
idx = pj.structural_indexes[i++]; \
|
9
|
+
c = buf[idx]; \
|
10
|
+
}
|
11
|
+
|
12
|
+
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
13
|
+
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue;
|
14
|
+
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue;
|
15
|
+
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue;
|
16
|
+
#define GOTO_CONTINUE() goto *pj.ret_address[depth];
|
17
|
+
#else
|
18
|
+
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a';
|
19
|
+
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o';
|
20
|
+
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's';
|
21
|
+
#define GOTO_CONTINUE() \
|
22
|
+
{ \
|
23
|
+
if (pj.ret_address[depth] == 'a') { \
|
24
|
+
goto array_continue; \
|
25
|
+
} else if (pj.ret_address[depth] == 'o') { \
|
26
|
+
goto object_continue; \
|
27
|
+
} else { \
|
28
|
+
goto start_continue; \
|
29
|
+
} \
|
30
|
+
}
|
31
|
+
#endif
|
32
|
+
|
33
|
+
/************
|
34
|
+
* The JSON is parsed to a tape, see the accompanying tape.md file
|
35
|
+
* for documentation.
|
36
|
+
***********/
|
37
|
+
// We need to compile that code for multiple architectures. However, target
|
38
|
+
// attributes can be used only once by function definition. Huge macro seemed
|
39
|
+
// better than huge code duplication. int UNIFIED_MACHINE(const uint8_t *buf,
|
40
|
+
// size_t len, ParsedJson &pj)
|
41
|
+
#define UNIFIED_MACHINE(T, buf, len, pj) \
|
42
|
+
{ \
|
43
|
+
if (ALLOW_SAME_PAGE_BUFFER_OVERRUN) { \
|
44
|
+
memset((uint8_t *)buf + len, 0, \
|
45
|
+
SIMDJSON_PADDING); /* to please valgrind */ \
|
46
|
+
} \
|
47
|
+
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ \
|
48
|
+
uint32_t \
|
49
|
+
idx; /* location of the structural character in the input (buf) */ \
|
50
|
+
uint8_t c; /* used to track the (structural) character we are looking at, \
|
51
|
+
updated */ \
|
52
|
+
/* by UPDATE_CHAR macro */ \
|
53
|
+
uint32_t depth = 0; /* could have an arbitrary starting depth */ \
|
54
|
+
pj.init(); /* sets is_valid to false */ \
|
55
|
+
if (pj.byte_capacity < len) { \
|
56
|
+
pj.error_code = simdjson::CAPACITY; \
|
57
|
+
return pj.error_code; \
|
58
|
+
} \
|
59
|
+
\
|
60
|
+
/*//////////////////////////// START STATE ///////////////////////////// \
|
61
|
+
*/ \
|
62
|
+
SET_GOTO_START_CONTINUE() \
|
63
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
64
|
+
pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ \
|
65
|
+
/* the root is used, if nothing else, to capture the size of the tape */ \
|
66
|
+
depth++; /* everything starts at depth = 1, depth = 0 is just for the \
|
67
|
+
root, the root may contain an object, an array or something \
|
68
|
+
else. */ \
|
69
|
+
if (depth >= pj.depth_capacity) { \
|
70
|
+
goto fail; \
|
71
|
+
} \
|
72
|
+
\
|
73
|
+
UPDATE_CHAR(); \
|
74
|
+
switch (c) { \
|
75
|
+
case '{': \
|
76
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
77
|
+
SET_GOTO_START_CONTINUE(); \
|
78
|
+
depth++; \
|
79
|
+
if (depth >= pj.depth_capacity) { \
|
80
|
+
goto fail; \
|
81
|
+
} \
|
82
|
+
pj.write_tape( \
|
83
|
+
0, \
|
84
|
+
c); /* strangely, moving this to object_begin slows things down */ \
|
85
|
+
goto object_begin; \
|
86
|
+
case '[': \
|
87
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
88
|
+
SET_GOTO_START_CONTINUE(); \
|
89
|
+
depth++; \
|
90
|
+
if (depth >= pj.depth_capacity) { \
|
91
|
+
goto fail; \
|
92
|
+
} \
|
93
|
+
pj.write_tape(0, c); \
|
94
|
+
goto array_begin; \
|
95
|
+
/* #define SIMDJSON_ALLOWANYTHINGINROOT \
|
96
|
+
* A JSON text is a serialized value. Note that certain previous \
|
97
|
+
* specifications of JSON constrained a JSON text to be an object or an \
|
98
|
+
* array. Implementations that generate only objects or arrays where a \
|
99
|
+
* JSON text is called for will be interoperable in the sense that all \
|
100
|
+
* implementations will accept these as conforming JSON texts. \
|
101
|
+
* https://tools.ietf.org/html/rfc8259 \
|
102
|
+
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ \
|
103
|
+
case '"': { \
|
104
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
105
|
+
goto fail; \
|
106
|
+
} \
|
107
|
+
break; \
|
108
|
+
} \
|
109
|
+
case 't': { \
|
110
|
+
/* we need to make a copy to make sure that the string is space \
|
111
|
+
* terminated. \
|
112
|
+
* this only applies to the JSON document made solely of the true value. \
|
113
|
+
* this will almost never be called in practice */ \
|
114
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
115
|
+
if (copy == nullptr) { \
|
116
|
+
goto fail; \
|
117
|
+
} \
|
118
|
+
memcpy(copy, buf, len); \
|
119
|
+
copy[len] = ' '; \
|
120
|
+
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
121
|
+
idx)) { \
|
122
|
+
free(copy); \
|
123
|
+
goto fail; \
|
124
|
+
} \
|
125
|
+
free(copy); \
|
126
|
+
pj.write_tape(0, c); \
|
127
|
+
break; \
|
128
|
+
} \
|
129
|
+
case 'f': { \
|
130
|
+
/* we need to make a copy to make sure that the string is space \
|
131
|
+
* terminated. \
|
132
|
+
* this only applies to the JSON document made solely of the false \
|
133
|
+
* value. \
|
134
|
+
* this will almost never be called in practice */ \
|
135
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
136
|
+
if (copy == nullptr) { \
|
137
|
+
goto fail; \
|
138
|
+
} \
|
139
|
+
memcpy(copy, buf, len); \
|
140
|
+
copy[len] = ' '; \
|
141
|
+
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
142
|
+
idx)) { \
|
143
|
+
free(copy); \
|
144
|
+
goto fail; \
|
145
|
+
} \
|
146
|
+
free(copy); \
|
147
|
+
pj.write_tape(0, c); \
|
148
|
+
break; \
|
149
|
+
} \
|
150
|
+
case 'n': { \
|
151
|
+
/* we need to make a copy to make sure that the string is space \
|
152
|
+
* terminated. \
|
153
|
+
* this only applies to the JSON document made solely of the null value. \
|
154
|
+
* this will almost never be called in practice */ \
|
155
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
156
|
+
if (copy == nullptr) { \
|
157
|
+
goto fail; \
|
158
|
+
} \
|
159
|
+
memcpy(copy, buf, len); \
|
160
|
+
copy[len] = ' '; \
|
161
|
+
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
162
|
+
idx)) { \
|
163
|
+
free(copy); \
|
164
|
+
goto fail; \
|
165
|
+
} \
|
166
|
+
free(copy); \
|
167
|
+
pj.write_tape(0, c); \
|
168
|
+
break; \
|
169
|
+
} \
|
170
|
+
case '0': \
|
171
|
+
case '1': \
|
172
|
+
case '2': \
|
173
|
+
case '3': \
|
174
|
+
case '4': \
|
175
|
+
case '5': \
|
176
|
+
case '6': \
|
177
|
+
case '7': \
|
178
|
+
case '8': \
|
179
|
+
case '9': { \
|
180
|
+
/* we need to make a copy to make sure that the string is space \
|
181
|
+
* terminated. \
|
182
|
+
* this is done only for JSON documents made of a sole number \
|
183
|
+
* this will almost never be called in practice. We terminate with a \
|
184
|
+
* space \
|
185
|
+
* because we do not want to allow NULLs in the middle of a number \
|
186
|
+
* (whereas a \
|
187
|
+
* space in the middle of a number would be identified in stage 1). */ \
|
188
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
189
|
+
if (copy == nullptr) { \
|
190
|
+
goto fail; \
|
191
|
+
} \
|
192
|
+
memcpy(copy, buf, len); \
|
193
|
+
copy[len] = ' '; \
|
194
|
+
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
|
195
|
+
false)) { \
|
196
|
+
free(copy); \
|
197
|
+
goto fail; \
|
198
|
+
} \
|
199
|
+
free(copy); \
|
200
|
+
break; \
|
201
|
+
} \
|
202
|
+
case '-': { \
|
203
|
+
/* we need to make a copy to make sure that the string is NULL \
|
204
|
+
* terminated. \
|
205
|
+
* this is done only for JSON documents made of a sole number \
|
206
|
+
* this will almost never be called in practice */ \
|
207
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
208
|
+
if (copy == nullptr) { \
|
209
|
+
goto fail; \
|
210
|
+
} \
|
211
|
+
memcpy(copy, buf, len); \
|
212
|
+
copy[len] = ' '; \
|
213
|
+
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
|
214
|
+
true)) { \
|
215
|
+
free(copy); \
|
216
|
+
goto fail; \
|
217
|
+
} \
|
218
|
+
free(copy); \
|
219
|
+
break; \
|
220
|
+
} \
|
221
|
+
default: \
|
222
|
+
goto fail; \
|
223
|
+
} \
|
224
|
+
start_continue: \
|
225
|
+
/* the string might not be NULL terminated. */ \
|
226
|
+
if (i + 1 == pj.n_structural_indexes) { \
|
227
|
+
goto succeed; \
|
228
|
+
} else { \
|
229
|
+
goto fail; \
|
230
|
+
} \
|
231
|
+
/*//////////////////////////// OBJECT STATES ///////////////////////////*/ \
|
232
|
+
\
|
233
|
+
object_begin: \
|
234
|
+
UPDATE_CHAR(); \
|
235
|
+
switch (c) { \
|
236
|
+
case '"': { \
|
237
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
238
|
+
goto fail; \
|
239
|
+
} \
|
240
|
+
goto object_key_state; \
|
241
|
+
} \
|
242
|
+
case '}': \
|
243
|
+
goto scope_end; /* could also go to object_continue */ \
|
244
|
+
default: \
|
245
|
+
goto fail; \
|
246
|
+
} \
|
247
|
+
\
|
248
|
+
object_key_state: \
|
249
|
+
UPDATE_CHAR(); \
|
250
|
+
if (c != ':') { \
|
251
|
+
goto fail; \
|
252
|
+
} \
|
253
|
+
UPDATE_CHAR(); \
|
254
|
+
switch (c) { \
|
255
|
+
case '"': { \
|
256
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
257
|
+
goto fail; \
|
258
|
+
} \
|
259
|
+
break; \
|
260
|
+
} \
|
261
|
+
case 't': \
|
262
|
+
if (!is_valid_true_atom(buf + idx)) { \
|
263
|
+
goto fail; \
|
264
|
+
} \
|
265
|
+
pj.write_tape(0, c); \
|
266
|
+
break; \
|
267
|
+
case 'f': \
|
268
|
+
if (!is_valid_false_atom(buf + idx)) { \
|
269
|
+
goto fail; \
|
270
|
+
} \
|
271
|
+
pj.write_tape(0, c); \
|
272
|
+
break; \
|
273
|
+
case 'n': \
|
274
|
+
if (!is_valid_null_atom(buf + idx)) { \
|
275
|
+
goto fail; \
|
276
|
+
} \
|
277
|
+
pj.write_tape(0, c); \
|
278
|
+
break; \
|
279
|
+
case '0': \
|
280
|
+
case '1': \
|
281
|
+
case '2': \
|
282
|
+
case '3': \
|
283
|
+
case '4': \
|
284
|
+
case '5': \
|
285
|
+
case '6': \
|
286
|
+
case '7': \
|
287
|
+
case '8': \
|
288
|
+
case '9': { \
|
289
|
+
if (!parse_number(buf, pj, idx, false)) { \
|
290
|
+
goto fail; \
|
291
|
+
} \
|
292
|
+
break; \
|
293
|
+
} \
|
294
|
+
case '-': { \
|
295
|
+
if (!parse_number(buf, pj, idx, true)) { \
|
296
|
+
goto fail; \
|
297
|
+
} \
|
298
|
+
break; \
|
299
|
+
} \
|
300
|
+
case '{': { \
|
301
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
302
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
303
|
+
optimized */ \
|
304
|
+
/* we have not yet encountered } so we need to come back for it */ \
|
305
|
+
SET_GOTO_OBJECT_CONTINUE() \
|
306
|
+
/* we found an object inside an object, so we need to increment the \
|
307
|
+
* depth */ \
|
308
|
+
depth++; \
|
309
|
+
if (depth >= pj.depth_capacity) { \
|
310
|
+
goto fail; \
|
311
|
+
} \
|
312
|
+
\
|
313
|
+
goto object_begin; \
|
314
|
+
} \
|
315
|
+
case '[': { \
|
316
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
317
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
318
|
+
optimized */ \
|
319
|
+
/* we have not yet encountered } so we need to come back for it */ \
|
320
|
+
SET_GOTO_OBJECT_CONTINUE() \
|
321
|
+
/* we found an array inside an object, so we need to increment the depth \
|
322
|
+
*/ \
|
323
|
+
depth++; \
|
324
|
+
if (depth >= pj.depth_capacity) { \
|
325
|
+
goto fail; \
|
326
|
+
} \
|
327
|
+
goto array_begin; \
|
328
|
+
} \
|
329
|
+
default: \
|
330
|
+
goto fail; \
|
331
|
+
} \
|
332
|
+
\
|
333
|
+
object_continue: \
|
334
|
+
UPDATE_CHAR(); \
|
335
|
+
switch (c) { \
|
336
|
+
case ',': \
|
337
|
+
UPDATE_CHAR(); \
|
338
|
+
if (c != '"') { \
|
339
|
+
goto fail; \
|
340
|
+
} else { \
|
341
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
342
|
+
goto fail; \
|
343
|
+
} \
|
344
|
+
goto object_key_state; \
|
345
|
+
} \
|
346
|
+
case '}': \
|
347
|
+
goto scope_end; \
|
348
|
+
default: \
|
349
|
+
goto fail; \
|
350
|
+
} \
|
351
|
+
\
|
352
|
+
/*//////////////////////////// COMMON STATE ///////////////////////////*/ \
|
353
|
+
\
|
354
|
+
scope_end: \
|
355
|
+
/* write our tape location to the header scope */ \
|
356
|
+
depth--; \
|
357
|
+
pj.write_tape(pj.containing_scope_offset[depth], c); \
|
358
|
+
pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
|
359
|
+
pj.get_current_loc()); \
|
360
|
+
/* goto saved_state */ \
|
361
|
+
GOTO_CONTINUE() \
|
362
|
+
\
|
363
|
+
/*//////////////////////////// ARRAY STATES ///////////////////////////*/ \
|
364
|
+
array_begin: \
|
365
|
+
UPDATE_CHAR(); \
|
366
|
+
if (c == ']') { \
|
367
|
+
goto scope_end; /* could also go to array_continue */ \
|
368
|
+
} \
|
369
|
+
\
|
370
|
+
main_array_switch: \
|
371
|
+
/* we call update char on all paths in, so we can peek at c on the \
|
372
|
+
* on paths that can accept a close square brace (post-, and at start) */ \
|
373
|
+
switch (c) { \
|
374
|
+
case '"': { \
|
375
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
376
|
+
goto fail; \
|
377
|
+
} \
|
378
|
+
break; \
|
379
|
+
} \
|
380
|
+
case 't': \
|
381
|
+
if (!is_valid_true_atom(buf + idx)) { \
|
382
|
+
goto fail; \
|
383
|
+
} \
|
384
|
+
pj.write_tape(0, c); \
|
385
|
+
break; \
|
386
|
+
case 'f': \
|
387
|
+
if (!is_valid_false_atom(buf + idx)) { \
|
388
|
+
goto fail; \
|
389
|
+
} \
|
390
|
+
pj.write_tape(0, c); \
|
391
|
+
break; \
|
392
|
+
case 'n': \
|
393
|
+
if (!is_valid_null_atom(buf + idx)) { \
|
394
|
+
goto fail; \
|
395
|
+
} \
|
396
|
+
pj.write_tape(0, c); \
|
397
|
+
break; /* goto array_continue; */ \
|
398
|
+
\
|
399
|
+
case '0': \
|
400
|
+
case '1': \
|
401
|
+
case '2': \
|
402
|
+
case '3': \
|
403
|
+
case '4': \
|
404
|
+
case '5': \
|
405
|
+
case '6': \
|
406
|
+
case '7': \
|
407
|
+
case '8': \
|
408
|
+
case '9': { \
|
409
|
+
if (!parse_number(buf, pj, idx, false)) { \
|
410
|
+
goto fail; \
|
411
|
+
} \
|
412
|
+
break; /* goto array_continue; */ \
|
413
|
+
} \
|
414
|
+
case '-': { \
|
415
|
+
if (!parse_number(buf, pj, idx, true)) { \
|
416
|
+
goto fail; \
|
417
|
+
} \
|
418
|
+
break; /* goto array_continue; */ \
|
419
|
+
} \
|
420
|
+
case '{': { \
|
421
|
+
/* we have not yet encountered ] so we need to come back for it */ \
|
422
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
423
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
424
|
+
optimized */ \
|
425
|
+
SET_GOTO_ARRAY_CONTINUE() \
|
426
|
+
/* we found an object inside an array, so we need to increment the depth \
|
427
|
+
*/ \
|
428
|
+
depth++; \
|
429
|
+
if (depth >= pj.depth_capacity) { \
|
430
|
+
goto fail; \
|
431
|
+
} \
|
432
|
+
\
|
433
|
+
goto object_begin; \
|
434
|
+
} \
|
435
|
+
case '[': { \
|
436
|
+
/* we have not yet encountered ] so we need to come back for it */ \
|
437
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
438
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
439
|
+
optimized */ \
|
440
|
+
SET_GOTO_ARRAY_CONTINUE() \
|
441
|
+
/* we found an array inside an array, so we need to increment the depth \
|
442
|
+
*/ \
|
443
|
+
depth++; \
|
444
|
+
if (depth >= pj.depth_capacity) { \
|
445
|
+
goto fail; \
|
446
|
+
} \
|
447
|
+
goto array_begin; \
|
448
|
+
} \
|
449
|
+
default: \
|
450
|
+
goto fail; \
|
451
|
+
} \
|
452
|
+
\
|
453
|
+
array_continue: \
|
454
|
+
UPDATE_CHAR(); \
|
455
|
+
switch (c) { \
|
456
|
+
case ',': \
|
457
|
+
UPDATE_CHAR(); \
|
458
|
+
goto main_array_switch; \
|
459
|
+
case ']': \
|
460
|
+
goto scope_end; \
|
461
|
+
default: \
|
462
|
+
goto fail; \
|
463
|
+
} \
|
464
|
+
\
|
465
|
+
/*//////////////////////////// FINAL STATES ///////////////////////////*/ \
|
466
|
+
\
|
467
|
+
succeed: \
|
468
|
+
depth--; \
|
469
|
+
if (depth != 0) { \
|
470
|
+
fprintf(stderr, "internal bug\n"); \
|
471
|
+
abort(); \
|
472
|
+
} \
|
473
|
+
if (pj.containing_scope_offset[depth] != 0) { \
|
474
|
+
fprintf(stderr, "internal bug\n"); \
|
475
|
+
abort(); \
|
476
|
+
} \
|
477
|
+
pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
|
478
|
+
pj.get_current_loc()); \
|
479
|
+
pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ \
|
480
|
+
\
|
481
|
+
pj.valid = true; \
|
482
|
+
pj.error_code = simdjson::SUCCESS; \
|
483
|
+
return pj.error_code; \
|
484
|
+
fail: \
|
485
|
+
/* we do not need the next line because this is done by pj.init(), \
|
486
|
+
* pessimistically. \
|
487
|
+
* pj.is_valid = false; \
|
488
|
+
* At this point in the code, we have all the time in the world. \
|
489
|
+
* Note that we know exactly where we are in the document so we could, \
|
490
|
+
* without any overhead on the processing code, report a specific \
|
491
|
+
* location. \
|
492
|
+
* We could even trigger special code paths to assess what happened \
|
493
|
+
* carefully, \
|
494
|
+
* all without any added cost. */ \
|
495
|
+
if (depth >= pj.depth_capacity) { \
|
496
|
+
pj.error_code = simdjson::DEPTH_ERROR; \
|
497
|
+
return pj.error_code; \
|
498
|
+
} \
|
499
|
+
switch (c) { \
|
500
|
+
case '"': \
|
501
|
+
pj.error_code = simdjson::STRING_ERROR; \
|
502
|
+
return pj.error_code; \
|
503
|
+
case '0': \
|
504
|
+
case '1': \
|
505
|
+
case '2': \
|
506
|
+
case '3': \
|
507
|
+
case '4': \
|
508
|
+
case '5': \
|
509
|
+
case '6': \
|
510
|
+
case '7': \
|
511
|
+
case '8': \
|
512
|
+
case '9': \
|
513
|
+
case '-': \
|
514
|
+
pj.error_code = simdjson::NUMBER_ERROR; \
|
515
|
+
return pj.error_code; \
|
516
|
+
case 't': \
|
517
|
+
pj.error_code = simdjson::T_ATOM_ERROR; \
|
518
|
+
return pj.error_code; \
|
519
|
+
case 'n': \
|
520
|
+
pj.error_code = simdjson::N_ATOM_ERROR; \
|
521
|
+
return pj.error_code; \
|
522
|
+
case 'f': \
|
523
|
+
pj.error_code = simdjson::F_ATOM_ERROR; \
|
524
|
+
return pj.error_code; \
|
525
|
+
default: \
|
526
|
+
break; \
|
527
|
+
} \
|
528
|
+
pj.error_code = simdjson::TAPE_ERROR; \
|
529
|
+
return pj.error_code; \
|
530
|
+
}
|
531
|
+
|
532
|
+
} // namespace simdjson
|
533
|
+
|
534
|
+
#ifdef IS_X86_64
|
535
|
+
TARGET_HASWELL
|
536
|
+
namespace simdjson {
|
537
|
+
template <>
|
538
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
539
|
+
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len,
|
540
|
+
ParsedJson &pj) {
|
541
|
+
UNIFIED_MACHINE(Architecture::HASWELL, buf, len, pj);
|
542
|
+
}
|
543
|
+
} // namespace simdjson
|
544
|
+
UNTARGET_REGION
|
545
|
+
|
546
|
+
TARGET_WESTMERE
|
547
|
+
namespace simdjson {
|
548
|
+
template <>
|
549
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
550
|
+
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
|
551
|
+
ParsedJson &pj) {
|
552
|
+
UNIFIED_MACHINE(Architecture::WESTMERE, buf, len, pj);
|
553
|
+
}
|
554
|
+
} // namespace simdjson
|
555
|
+
UNTARGET_REGION
|
556
|
+
#endif // IS_X86_64
|
557
|
+
|
558
|
+
#ifdef IS_ARM64
|
559
|
+
namespace simdjson {
|
560
|
+
template <>
|
561
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
562
|
+
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len,
|
563
|
+
ParsedJson &pj) {
|
564
|
+
UNIFIED_MACHINE(Architecture::ARM64, buf, len, pj);
|
565
|
+
}
|
566
|
+
} // namespace simdjson
|
567
|
+
#endif
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
3
|
+
BASE=$SCRIPTPATH/..
|
4
|
+
cd $BASE
|
5
|
+
|
6
|
+
STYLE=$(which clang-format)
|
7
|
+
if [ $? -ne 0 ]; then
|
8
|
+
echo "clang-format not installed. Unable to check source file format policy." >&2
|
9
|
+
exit 1
|
10
|
+
fi
|
11
|
+
OURSTYLE='' # defer to .clang-format
|
12
|
+
OURCONTENT="include benchmark tools tests src"
|
13
|
+
RE=0
|
14
|
+
ALLFILES=$(find $OURCONTENT -name '*.c' -o -name '*.cpp' -o -name '*.h' -o -name '*.cc' -o -name '*.hh')
|
15
|
+
for FILE in $ALLFILES; do
|
16
|
+
echo "checking $FILE"
|
17
|
+
eval "$STYLE $OURSTYLE $BASE/$FILE" | cmp -s $BASE/$FILE -
|
18
|
+
if [ $? -ne 0 ]; then
|
19
|
+
echo "$BASE/$FILE does not respect the coding style." >&2
|
20
|
+
echo "consider typing $STYLE -i $BASE/$FILE $OURSTYLE to fix the problem." >&2
|
21
|
+
RE=1
|
22
|
+
fi
|
23
|
+
done
|
24
|
+
|
25
|
+
exit $RE
|
@@ -0,0 +1,25 @@
|
|
1
|
+
#!/bin/bash
|
2
|
+
SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
|
3
|
+
BASE=$SCRIPTPATH/..
|
4
|
+
cd $BASE
|
5
|
+
|
6
|
+
STYLE=$(which clang-format)
|
7
|
+
if [ $? -ne 0 ]; then
|
8
|
+
echo "clang-format not installed. Unable to check source file format policy." >&2
|
9
|
+
exit 1
|
10
|
+
fi
|
11
|
+
OURSTYLE="" # defer to .clang-format
|
12
|
+
OURCONTENT="include benchmark tools tests src"
|
13
|
+
RE=0
|
14
|
+
BASE=$(git rev-parse --show-toplevel)
|
15
|
+
ALLFILES=$(find $OURCONTENT -name '*.c' -o -name '*.cpp' -o -name '*.h' -o -name '*.cc' -o -name '*.hh')
|
16
|
+
for FILE in $ALLFILES; do
|
17
|
+
eval "$STYLE $OURSTYLE $BASE/$FILE" | cmp -s $BASE/$FILE -
|
18
|
+
if [ $? -ne 0 ]; then
|
19
|
+
echo "$BASE/$FILE does not respect the coding style. Formatting. " >&2
|
20
|
+
eval "$STYLE $OURSTYLE -i $BASE/$FILE"
|
21
|
+
RE=1
|
22
|
+
fi
|
23
|
+
done
|
24
|
+
|
25
|
+
exit $RE
|