simdjson 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,567 @@
1
+ #include "simdjson/stage2_build_tape.h"
2
+
3
+ namespace simdjson {
4
+
5
+ // this macro reads the next structural character, updating idx, i and c.
6
+ #define UPDATE_CHAR() \
7
+ { \
8
+ idx = pj.structural_indexes[i++]; \
9
+ c = buf[idx]; \
10
+ }
11
+
12
+ #ifdef SIMDJSON_USE_COMPUTED_GOTO
13
+ #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue;
14
+ #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue;
15
+ #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue;
16
+ #define GOTO_CONTINUE() goto *pj.ret_address[depth];
17
+ #else
18
+ #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a';
19
+ #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o';
20
+ #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's';
21
+ #define GOTO_CONTINUE() \
22
+ { \
23
+ if (pj.ret_address[depth] == 'a') { \
24
+ goto array_continue; \
25
+ } else if (pj.ret_address[depth] == 'o') { \
26
+ goto object_continue; \
27
+ } else { \
28
+ goto start_continue; \
29
+ } \
30
+ }
31
+ #endif
32
+
33
+ /************
34
+ * The JSON is parsed to a tape, see the accompanying tape.md file
35
+ * for documentation.
36
+ ***********/
37
+ // We need to compile that code for multiple architectures. However, target
38
+ // attributes can be used only once by function definition. Huge macro seemed
39
+ // better than huge code duplication. int UNIFIED_MACHINE(const uint8_t *buf,
40
+ // size_t len, ParsedJson &pj)
41
+ #define UNIFIED_MACHINE(T, buf, len, pj) \
42
+ { \
43
+ if (ALLOW_SAME_PAGE_BUFFER_OVERRUN) { \
44
+ memset((uint8_t *)buf + len, 0, \
45
+ SIMDJSON_PADDING); /* to please valgrind */ \
46
+ } \
47
+ uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ \
48
+ uint32_t \
49
+ idx; /* location of the structural character in the input (buf) */ \
50
+ uint8_t c; /* used to track the (structural) character we are looking at, \
51
+ updated */ \
52
+ /* by UPDATE_CHAR macro */ \
53
+ uint32_t depth = 0; /* could have an arbitrary starting depth */ \
54
+ pj.init(); /* sets is_valid to false */ \
55
+ if (pj.byte_capacity < len) { \
56
+ pj.error_code = simdjson::CAPACITY; \
57
+ return pj.error_code; \
58
+ } \
59
+ \
60
+ /*//////////////////////////// START STATE ///////////////////////////// \
61
+ */ \
62
+ SET_GOTO_START_CONTINUE() \
63
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
64
+ pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ \
65
+ /* the root is used, if nothing else, to capture the size of the tape */ \
66
+ depth++; /* everything starts at depth = 1, depth = 0 is just for the \
67
+ root, the root may contain an object, an array or something \
68
+ else. */ \
69
+ if (depth >= pj.depth_capacity) { \
70
+ goto fail; \
71
+ } \
72
+ \
73
+ UPDATE_CHAR(); \
74
+ switch (c) { \
75
+ case '{': \
76
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
77
+ SET_GOTO_START_CONTINUE(); \
78
+ depth++; \
79
+ if (depth >= pj.depth_capacity) { \
80
+ goto fail; \
81
+ } \
82
+ pj.write_tape( \
83
+ 0, \
84
+ c); /* strangely, moving this to object_begin slows things down */ \
85
+ goto object_begin; \
86
+ case '[': \
87
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
88
+ SET_GOTO_START_CONTINUE(); \
89
+ depth++; \
90
+ if (depth >= pj.depth_capacity) { \
91
+ goto fail; \
92
+ } \
93
+ pj.write_tape(0, c); \
94
+ goto array_begin; \
95
+ /* #define SIMDJSON_ALLOWANYTHINGINROOT \
96
+ * A JSON text is a serialized value. Note that certain previous \
97
+ * specifications of JSON constrained a JSON text to be an object or an \
98
+ * array. Implementations that generate only objects or arrays where a \
99
+ * JSON text is called for will be interoperable in the sense that all \
100
+ * implementations will accept these as conforming JSON texts. \
101
+ * https://tools.ietf.org/html/rfc8259 \
102
+ * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ \
103
+ case '"': { \
104
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
105
+ goto fail; \
106
+ } \
107
+ break; \
108
+ } \
109
+ case 't': { \
110
+ /* we need to make a copy to make sure that the string is space \
111
+ * terminated. \
112
+ * this only applies to the JSON document made solely of the true value. \
113
+ * this will almost never be called in practice */ \
114
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
115
+ if (copy == nullptr) { \
116
+ goto fail; \
117
+ } \
118
+ memcpy(copy, buf, len); \
119
+ copy[len] = ' '; \
120
+ if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + \
121
+ idx)) { \
122
+ free(copy); \
123
+ goto fail; \
124
+ } \
125
+ free(copy); \
126
+ pj.write_tape(0, c); \
127
+ break; \
128
+ } \
129
+ case 'f': { \
130
+ /* we need to make a copy to make sure that the string is space \
131
+ * terminated. \
132
+ * this only applies to the JSON document made solely of the false \
133
+ * value. \
134
+ * this will almost never be called in practice */ \
135
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
136
+ if (copy == nullptr) { \
137
+ goto fail; \
138
+ } \
139
+ memcpy(copy, buf, len); \
140
+ copy[len] = ' '; \
141
+ if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + \
142
+ idx)) { \
143
+ free(copy); \
144
+ goto fail; \
145
+ } \
146
+ free(copy); \
147
+ pj.write_tape(0, c); \
148
+ break; \
149
+ } \
150
+ case 'n': { \
151
+ /* we need to make a copy to make sure that the string is space \
152
+ * terminated. \
153
+ * this only applies to the JSON document made solely of the null value. \
154
+ * this will almost never be called in practice */ \
155
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
156
+ if (copy == nullptr) { \
157
+ goto fail; \
158
+ } \
159
+ memcpy(copy, buf, len); \
160
+ copy[len] = ' '; \
161
+ if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + \
162
+ idx)) { \
163
+ free(copy); \
164
+ goto fail; \
165
+ } \
166
+ free(copy); \
167
+ pj.write_tape(0, c); \
168
+ break; \
169
+ } \
170
+ case '0': \
171
+ case '1': \
172
+ case '2': \
173
+ case '3': \
174
+ case '4': \
175
+ case '5': \
176
+ case '6': \
177
+ case '7': \
178
+ case '8': \
179
+ case '9': { \
180
+ /* we need to make a copy to make sure that the string is space \
181
+ * terminated. \
182
+ * this is done only for JSON documents made of a sole number \
183
+ * this will almost never be called in practice. We terminate with a \
184
+ * space \
185
+ * because we do not want to allow NULLs in the middle of a number \
186
+ * (whereas a \
187
+ * space in the middle of a number would be identified in stage 1). */ \
188
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
189
+ if (copy == nullptr) { \
190
+ goto fail; \
191
+ } \
192
+ memcpy(copy, buf, len); \
193
+ copy[len] = ' '; \
194
+ if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
195
+ false)) { \
196
+ free(copy); \
197
+ goto fail; \
198
+ } \
199
+ free(copy); \
200
+ break; \
201
+ } \
202
+ case '-': { \
203
+ /* we need to make a copy to make sure that the string is NULL \
204
+ * terminated. \
205
+ * this is done only for JSON documents made of a sole number \
206
+ * this will almost never be called in practice */ \
207
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
208
+ if (copy == nullptr) { \
209
+ goto fail; \
210
+ } \
211
+ memcpy(copy, buf, len); \
212
+ copy[len] = ' '; \
213
+ if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
214
+ true)) { \
215
+ free(copy); \
216
+ goto fail; \
217
+ } \
218
+ free(copy); \
219
+ break; \
220
+ } \
221
+ default: \
222
+ goto fail; \
223
+ } \
224
+ start_continue: \
225
+ /* the string might not be NULL terminated. */ \
226
+ if (i + 1 == pj.n_structural_indexes) { \
227
+ goto succeed; \
228
+ } else { \
229
+ goto fail; \
230
+ } \
231
+ /*//////////////////////////// OBJECT STATES ///////////////////////////*/ \
232
+ \
233
+ object_begin: \
234
+ UPDATE_CHAR(); \
235
+ switch (c) { \
236
+ case '"': { \
237
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
238
+ goto fail; \
239
+ } \
240
+ goto object_key_state; \
241
+ } \
242
+ case '}': \
243
+ goto scope_end; /* could also go to object_continue */ \
244
+ default: \
245
+ goto fail; \
246
+ } \
247
+ \
248
+ object_key_state: \
249
+ UPDATE_CHAR(); \
250
+ if (c != ':') { \
251
+ goto fail; \
252
+ } \
253
+ UPDATE_CHAR(); \
254
+ switch (c) { \
255
+ case '"': { \
256
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
257
+ goto fail; \
258
+ } \
259
+ break; \
260
+ } \
261
+ case 't': \
262
+ if (!is_valid_true_atom(buf + idx)) { \
263
+ goto fail; \
264
+ } \
265
+ pj.write_tape(0, c); \
266
+ break; \
267
+ case 'f': \
268
+ if (!is_valid_false_atom(buf + idx)) { \
269
+ goto fail; \
270
+ } \
271
+ pj.write_tape(0, c); \
272
+ break; \
273
+ case 'n': \
274
+ if (!is_valid_null_atom(buf + idx)) { \
275
+ goto fail; \
276
+ } \
277
+ pj.write_tape(0, c); \
278
+ break; \
279
+ case '0': \
280
+ case '1': \
281
+ case '2': \
282
+ case '3': \
283
+ case '4': \
284
+ case '5': \
285
+ case '6': \
286
+ case '7': \
287
+ case '8': \
288
+ case '9': { \
289
+ if (!parse_number(buf, pj, idx, false)) { \
290
+ goto fail; \
291
+ } \
292
+ break; \
293
+ } \
294
+ case '-': { \
295
+ if (!parse_number(buf, pj, idx, true)) { \
296
+ goto fail; \
297
+ } \
298
+ break; \
299
+ } \
300
+ case '{': { \
301
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
302
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
303
+ optimized */ \
304
+ /* we have not yet encountered } so we need to come back for it */ \
305
+ SET_GOTO_OBJECT_CONTINUE() \
306
+ /* we found an object inside an object, so we need to increment the \
307
+ * depth */ \
308
+ depth++; \
309
+ if (depth >= pj.depth_capacity) { \
310
+ goto fail; \
311
+ } \
312
+ \
313
+ goto object_begin; \
314
+ } \
315
+ case '[': { \
316
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
317
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
318
+ optimized */ \
319
+ /* we have not yet encountered } so we need to come back for it */ \
320
+ SET_GOTO_OBJECT_CONTINUE() \
321
+ /* we found an array inside an object, so we need to increment the depth \
322
+ */ \
323
+ depth++; \
324
+ if (depth >= pj.depth_capacity) { \
325
+ goto fail; \
326
+ } \
327
+ goto array_begin; \
328
+ } \
329
+ default: \
330
+ goto fail; \
331
+ } \
332
+ \
333
+ object_continue: \
334
+ UPDATE_CHAR(); \
335
+ switch (c) { \
336
+ case ',': \
337
+ UPDATE_CHAR(); \
338
+ if (c != '"') { \
339
+ goto fail; \
340
+ } else { \
341
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
342
+ goto fail; \
343
+ } \
344
+ goto object_key_state; \
345
+ } \
346
+ case '}': \
347
+ goto scope_end; \
348
+ default: \
349
+ goto fail; \
350
+ } \
351
+ \
352
+ /*//////////////////////////// COMMON STATE ///////////////////////////*/ \
353
+ \
354
+ scope_end: \
355
+ /* write our tape location to the header scope */ \
356
+ depth--; \
357
+ pj.write_tape(pj.containing_scope_offset[depth], c); \
358
+ pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
359
+ pj.get_current_loc()); \
360
+ /* goto saved_state */ \
361
+ GOTO_CONTINUE() \
362
+ \
363
+ /*//////////////////////////// ARRAY STATES ///////////////////////////*/ \
364
+ array_begin: \
365
+ UPDATE_CHAR(); \
366
+ if (c == ']') { \
367
+ goto scope_end; /* could also go to array_continue */ \
368
+ } \
369
+ \
370
+ main_array_switch: \
371
+ /* we call update char on all paths in, so we can peek at c on the \
372
+ * on paths that can accept a close square brace (post-, and at start) */ \
373
+ switch (c) { \
374
+ case '"': { \
375
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
376
+ goto fail; \
377
+ } \
378
+ break; \
379
+ } \
380
+ case 't': \
381
+ if (!is_valid_true_atom(buf + idx)) { \
382
+ goto fail; \
383
+ } \
384
+ pj.write_tape(0, c); \
385
+ break; \
386
+ case 'f': \
387
+ if (!is_valid_false_atom(buf + idx)) { \
388
+ goto fail; \
389
+ } \
390
+ pj.write_tape(0, c); \
391
+ break; \
392
+ case 'n': \
393
+ if (!is_valid_null_atom(buf + idx)) { \
394
+ goto fail; \
395
+ } \
396
+ pj.write_tape(0, c); \
397
+ break; /* goto array_continue; */ \
398
+ \
399
+ case '0': \
400
+ case '1': \
401
+ case '2': \
402
+ case '3': \
403
+ case '4': \
404
+ case '5': \
405
+ case '6': \
406
+ case '7': \
407
+ case '8': \
408
+ case '9': { \
409
+ if (!parse_number(buf, pj, idx, false)) { \
410
+ goto fail; \
411
+ } \
412
+ break; /* goto array_continue; */ \
413
+ } \
414
+ case '-': { \
415
+ if (!parse_number(buf, pj, idx, true)) { \
416
+ goto fail; \
417
+ } \
418
+ break; /* goto array_continue; */ \
419
+ } \
420
+ case '{': { \
421
+ /* we have not yet encountered ] so we need to come back for it */ \
422
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
423
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
424
+ optimized */ \
425
+ SET_GOTO_ARRAY_CONTINUE() \
426
+ /* we found an object inside an array, so we need to increment the depth \
427
+ */ \
428
+ depth++; \
429
+ if (depth >= pj.depth_capacity) { \
430
+ goto fail; \
431
+ } \
432
+ \
433
+ goto object_begin; \
434
+ } \
435
+ case '[': { \
436
+ /* we have not yet encountered ] so we need to come back for it */ \
437
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
438
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
439
+ optimized */ \
440
+ SET_GOTO_ARRAY_CONTINUE() \
441
+ /* we found an array inside an array, so we need to increment the depth \
442
+ */ \
443
+ depth++; \
444
+ if (depth >= pj.depth_capacity) { \
445
+ goto fail; \
446
+ } \
447
+ goto array_begin; \
448
+ } \
449
+ default: \
450
+ goto fail; \
451
+ } \
452
+ \
453
+ array_continue: \
454
+ UPDATE_CHAR(); \
455
+ switch (c) { \
456
+ case ',': \
457
+ UPDATE_CHAR(); \
458
+ goto main_array_switch; \
459
+ case ']': \
460
+ goto scope_end; \
461
+ default: \
462
+ goto fail; \
463
+ } \
464
+ \
465
+ /*//////////////////////////// FINAL STATES ///////////////////////////*/ \
466
+ \
467
+ succeed: \
468
+ depth--; \
469
+ if (depth != 0) { \
470
+ fprintf(stderr, "internal bug\n"); \
471
+ abort(); \
472
+ } \
473
+ if (pj.containing_scope_offset[depth] != 0) { \
474
+ fprintf(stderr, "internal bug\n"); \
475
+ abort(); \
476
+ } \
477
+ pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
478
+ pj.get_current_loc()); \
479
+ pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ \
480
+ \
481
+ pj.valid = true; \
482
+ pj.error_code = simdjson::SUCCESS; \
483
+ return pj.error_code; \
484
+ fail: \
485
+ /* we do not need the next line because this is done by pj.init(), \
486
+ * pessimistically. \
487
+ * pj.is_valid = false; \
488
+ * At this point in the code, we have all the time in the world. \
489
+ * Note that we know exactly where we are in the document so we could, \
490
+ * without any overhead on the processing code, report a specific \
491
+ * location. \
492
+ * We could even trigger special code paths to assess what happened \
493
+ * carefully, \
494
+ * all without any added cost. */ \
495
+ if (depth >= pj.depth_capacity) { \
496
+ pj.error_code = simdjson::DEPTH_ERROR; \
497
+ return pj.error_code; \
498
+ } \
499
+ switch (c) { \
500
+ case '"': \
501
+ pj.error_code = simdjson::STRING_ERROR; \
502
+ return pj.error_code; \
503
+ case '0': \
504
+ case '1': \
505
+ case '2': \
506
+ case '3': \
507
+ case '4': \
508
+ case '5': \
509
+ case '6': \
510
+ case '7': \
511
+ case '8': \
512
+ case '9': \
513
+ case '-': \
514
+ pj.error_code = simdjson::NUMBER_ERROR; \
515
+ return pj.error_code; \
516
+ case 't': \
517
+ pj.error_code = simdjson::T_ATOM_ERROR; \
518
+ return pj.error_code; \
519
+ case 'n': \
520
+ pj.error_code = simdjson::N_ATOM_ERROR; \
521
+ return pj.error_code; \
522
+ case 'f': \
523
+ pj.error_code = simdjson::F_ATOM_ERROR; \
524
+ return pj.error_code; \
525
+ default: \
526
+ break; \
527
+ } \
528
+ pj.error_code = simdjson::TAPE_ERROR; \
529
+ return pj.error_code; \
530
+ }
531
+
532
+ } // namespace simdjson
533
+
534
+ #ifdef IS_X86_64
535
+ TARGET_HASWELL
536
+ namespace simdjson {
537
+ template <>
538
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
539
+ unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len,
540
+ ParsedJson &pj) {
541
+ UNIFIED_MACHINE(Architecture::HASWELL, buf, len, pj);
542
+ }
543
+ } // namespace simdjson
544
+ UNTARGET_REGION
545
+
546
+ TARGET_WESTMERE
547
+ namespace simdjson {
548
+ template <>
549
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
550
+ unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
551
+ ParsedJson &pj) {
552
+ UNIFIED_MACHINE(Architecture::WESTMERE, buf, len, pj);
553
+ }
554
+ } // namespace simdjson
555
+ UNTARGET_REGION
556
+ #endif // IS_X86_64
557
+
558
+ #ifdef IS_ARM64
559
+ namespace simdjson {
560
+ template <>
561
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
562
+ unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len,
563
+ ParsedJson &pj) {
564
+ UNIFIED_MACHINE(Architecture::ARM64, buf, len, pj);
565
+ }
566
+ } // namespace simdjson
567
+ #endif
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ BASE=$SCRIPTPATH/..
4
+ cd $BASE
5
+
6
+ STYLE=$(which clang-format)
7
+ if [ $? -ne 0 ]; then
8
+ echo "clang-format not installed. Unable to check source file format policy." >&2
9
+ exit 1
10
+ fi
11
+ OURSTYLE='' # defer to .clang-format
12
+ OURCONTENT="include benchmark tools tests src"
13
+ RE=0
14
+ ALLFILES=$(find $OURCONTENT -name '*.c' -o -name '*.cpp' -o -name '*.h' -o -name '*.cc' -o -name '*.hh')
15
+ for FILE in $ALLFILES; do
16
+ echo "checking $FILE"
17
+ eval "$STYLE $OURSTYLE $BASE/$FILE" | cmp -s $BASE/$FILE -
18
+ if [ $? -ne 0 ]; then
19
+ echo "$BASE/$FILE does not respect the coding style." >&2
20
+ echo "consider typing $STYLE -i $BASE/$FILE $OURSTYLE to fix the problem." >&2
21
+ RE=1
22
+ fi
23
+ done
24
+
25
+ exit $RE
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ BASE=$SCRIPTPATH/..
4
+ cd $BASE
5
+
6
+ STYLE=$(which clang-format)
7
+ if [ $? -ne 0 ]; then
8
+ echo "clang-format not installed. Unable to check source file format policy." >&2
9
+ exit 1
10
+ fi
11
+ OURSTYLE="" # defer to .clang-format
12
+ OURCONTENT="include benchmark tools tests src"
13
+ RE=0
14
+ BASE=$(git rev-parse --show-toplevel)
15
+ ALLFILES=$(find $OURCONTENT -name '*.c' -o -name '*.cpp' -o -name '*.h' -o -name '*.cc' -o -name '*.hh')
16
+ for FILE in $ALLFILES; do
17
+ eval "$STYLE $OURSTYLE $BASE/$FILE" | cmp -s $BASE/$FILE -
18
+ if [ $? -ne 0 ]; then
19
+ echo "$BASE/$FILE does not respect the coding style. Formatting. " >&2
20
+ eval "$STYLE $OURSTYLE -i $BASE/$FILE"
21
+ RE=1
22
+ fi
23
+ done
24
+
25
+ exit $RE