simdjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,567 @@
1
+ #include "simdjson/stage2_build_tape.h"
2
+
3
+ namespace simdjson {
4
+
5
+ // this macro reads the next structural character, updating idx, i and c.
6
+ #define UPDATE_CHAR() \
7
+ { \
8
+ idx = pj.structural_indexes[i++]; \
9
+ c = buf[idx]; \
10
+ }
11
+
12
+ #ifdef SIMDJSON_USE_COMPUTED_GOTO
13
+ #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue;
14
+ #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue;
15
+ #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue;
16
+ #define GOTO_CONTINUE() goto *pj.ret_address[depth];
17
+ #else
18
+ #define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a';
19
+ #define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o';
20
+ #define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's';
21
+ #define GOTO_CONTINUE() \
22
+ { \
23
+ if (pj.ret_address[depth] == 'a') { \
24
+ goto array_continue; \
25
+ } else if (pj.ret_address[depth] == 'o') { \
26
+ goto object_continue; \
27
+ } else { \
28
+ goto start_continue; \
29
+ } \
30
+ }
31
+ #endif
32
+
33
+ /************
34
+ * The JSON is parsed to a tape, see the accompanying tape.md file
35
+ * for documentation.
36
+ ***********/
37
+ // We need to compile that code for multiple architectures. However, target
38
+ // attributes can be used only once by function definition. Huge macro seemed
39
+ // better than huge code duplication. int UNIFIED_MACHINE(const uint8_t *buf,
40
+ // size_t len, ParsedJson &pj)
41
+ #define UNIFIED_MACHINE(T, buf, len, pj) \
42
+ { \
43
+ if (ALLOW_SAME_PAGE_BUFFER_OVERRUN) { \
44
+ memset((uint8_t *)buf + len, 0, \
45
+ SIMDJSON_PADDING); /* to please valgrind */ \
46
+ } \
47
+ uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ \
48
+ uint32_t \
49
+ idx; /* location of the structural character in the input (buf) */ \
50
+ uint8_t c; /* used to track the (structural) character we are looking at, \
51
+ updated */ \
52
+ /* by UPDATE_CHAR macro */ \
53
+ uint32_t depth = 0; /* could have an arbitrary starting depth */ \
54
+ pj.init(); /* sets is_valid to false */ \
55
+ if (pj.byte_capacity < len) { \
56
+ pj.error_code = simdjson::CAPACITY; \
57
+ return pj.error_code; \
58
+ } \
59
+ \
60
+ /*//////////////////////////// START STATE ///////////////////////////// \
61
+ */ \
62
+ SET_GOTO_START_CONTINUE() \
63
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
64
+ pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ \
65
+ /* the root is used, if nothing else, to capture the size of the tape */ \
66
+ depth++; /* everything starts at depth = 1, depth = 0 is just for the \
67
+ root, the root may contain an object, an array or something \
68
+ else. */ \
69
+ if (depth >= pj.depth_capacity) { \
70
+ goto fail; \
71
+ } \
72
+ \
73
+ UPDATE_CHAR(); \
74
+ switch (c) { \
75
+ case '{': \
76
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
77
+ SET_GOTO_START_CONTINUE(); \
78
+ depth++; \
79
+ if (depth >= pj.depth_capacity) { \
80
+ goto fail; \
81
+ } \
82
+ pj.write_tape( \
83
+ 0, \
84
+ c); /* strangely, moving this to object_begin slows things down */ \
85
+ goto object_begin; \
86
+ case '[': \
87
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
88
+ SET_GOTO_START_CONTINUE(); \
89
+ depth++; \
90
+ if (depth >= pj.depth_capacity) { \
91
+ goto fail; \
92
+ } \
93
+ pj.write_tape(0, c); \
94
+ goto array_begin; \
95
+ /* #define SIMDJSON_ALLOWANYTHINGINROOT \
96
+ * A JSON text is a serialized value. Note that certain previous \
97
+ * specifications of JSON constrained a JSON text to be an object or an \
98
+ * array. Implementations that generate only objects or arrays where a \
99
+ * JSON text is called for will be interoperable in the sense that all \
100
+ * implementations will accept these as conforming JSON texts. \
101
+ * https://tools.ietf.org/html/rfc8259 \
102
+ * #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ \
103
+ case '"': { \
104
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
105
+ goto fail; \
106
+ } \
107
+ break; \
108
+ } \
109
+ case 't': { \
110
+ /* we need to make a copy to make sure that the string is space \
111
+ * terminated. \
112
+ * this only applies to the JSON document made solely of the true value. \
113
+ * this will almost never be called in practice */ \
114
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
115
+ if (copy == nullptr) { \
116
+ goto fail; \
117
+ } \
118
+ memcpy(copy, buf, len); \
119
+ copy[len] = ' '; \
120
+ if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + \
121
+ idx)) { \
122
+ free(copy); \
123
+ goto fail; \
124
+ } \
125
+ free(copy); \
126
+ pj.write_tape(0, c); \
127
+ break; \
128
+ } \
129
+ case 'f': { \
130
+ /* we need to make a copy to make sure that the string is space \
131
+ * terminated. \
132
+ * this only applies to the JSON document made solely of the false \
133
+ * value. \
134
+ * this will almost never be called in practice */ \
135
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
136
+ if (copy == nullptr) { \
137
+ goto fail; \
138
+ } \
139
+ memcpy(copy, buf, len); \
140
+ copy[len] = ' '; \
141
+ if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + \
142
+ idx)) { \
143
+ free(copy); \
144
+ goto fail; \
145
+ } \
146
+ free(copy); \
147
+ pj.write_tape(0, c); \
148
+ break; \
149
+ } \
150
+ case 'n': { \
151
+ /* we need to make a copy to make sure that the string is space \
152
+ * terminated. \
153
+ * this only applies to the JSON document made solely of the null value. \
154
+ * this will almost never be called in practice */ \
155
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
156
+ if (copy == nullptr) { \
157
+ goto fail; \
158
+ } \
159
+ memcpy(copy, buf, len); \
160
+ copy[len] = ' '; \
161
+ if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + \
162
+ idx)) { \
163
+ free(copy); \
164
+ goto fail; \
165
+ } \
166
+ free(copy); \
167
+ pj.write_tape(0, c); \
168
+ break; \
169
+ } \
170
+ case '0': \
171
+ case '1': \
172
+ case '2': \
173
+ case '3': \
174
+ case '4': \
175
+ case '5': \
176
+ case '6': \
177
+ case '7': \
178
+ case '8': \
179
+ case '9': { \
180
+ /* we need to make a copy to make sure that the string is space \
181
+ * terminated. \
182
+ * this is done only for JSON documents made of a sole number \
183
+ * this will almost never be called in practice. We terminate with a \
184
+ * space \
185
+ * because we do not want to allow NULLs in the middle of a number \
186
+ * (whereas a \
187
+ * space in the middle of a number would be identified in stage 1). */ \
188
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
189
+ if (copy == nullptr) { \
190
+ goto fail; \
191
+ } \
192
+ memcpy(copy, buf, len); \
193
+ copy[len] = ' '; \
194
+ if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
195
+ false)) { \
196
+ free(copy); \
197
+ goto fail; \
198
+ } \
199
+ free(copy); \
200
+ break; \
201
+ } \
202
+ case '-': { \
203
+ /* we need to make a copy to make sure that the string is NULL \
204
+ * terminated. \
205
+ * this is done only for JSON documents made of a sole number \
206
+ * this will almost never be called in practice */ \
207
+ char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
208
+ if (copy == nullptr) { \
209
+ goto fail; \
210
+ } \
211
+ memcpy(copy, buf, len); \
212
+ copy[len] = ' '; \
213
+ if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
214
+ true)) { \
215
+ free(copy); \
216
+ goto fail; \
217
+ } \
218
+ free(copy); \
219
+ break; \
220
+ } \
221
+ default: \
222
+ goto fail; \
223
+ } \
224
+ start_continue: \
225
+ /* the string might not be NULL terminated. */ \
226
+ if (i + 1 == pj.n_structural_indexes) { \
227
+ goto succeed; \
228
+ } else { \
229
+ goto fail; \
230
+ } \
231
+ /*//////////////////////////// OBJECT STATES ///////////////////////////*/ \
232
+ \
233
+ object_begin: \
234
+ UPDATE_CHAR(); \
235
+ switch (c) { \
236
+ case '"': { \
237
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
238
+ goto fail; \
239
+ } \
240
+ goto object_key_state; \
241
+ } \
242
+ case '}': \
243
+ goto scope_end; /* could also go to object_continue */ \
244
+ default: \
245
+ goto fail; \
246
+ } \
247
+ \
248
+ object_key_state: \
249
+ UPDATE_CHAR(); \
250
+ if (c != ':') { \
251
+ goto fail; \
252
+ } \
253
+ UPDATE_CHAR(); \
254
+ switch (c) { \
255
+ case '"': { \
256
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
257
+ goto fail; \
258
+ } \
259
+ break; \
260
+ } \
261
+ case 't': \
262
+ if (!is_valid_true_atom(buf + idx)) { \
263
+ goto fail; \
264
+ } \
265
+ pj.write_tape(0, c); \
266
+ break; \
267
+ case 'f': \
268
+ if (!is_valid_false_atom(buf + idx)) { \
269
+ goto fail; \
270
+ } \
271
+ pj.write_tape(0, c); \
272
+ break; \
273
+ case 'n': \
274
+ if (!is_valid_null_atom(buf + idx)) { \
275
+ goto fail; \
276
+ } \
277
+ pj.write_tape(0, c); \
278
+ break; \
279
+ case '0': \
280
+ case '1': \
281
+ case '2': \
282
+ case '3': \
283
+ case '4': \
284
+ case '5': \
285
+ case '6': \
286
+ case '7': \
287
+ case '8': \
288
+ case '9': { \
289
+ if (!parse_number(buf, pj, idx, false)) { \
290
+ goto fail; \
291
+ } \
292
+ break; \
293
+ } \
294
+ case '-': { \
295
+ if (!parse_number(buf, pj, idx, true)) { \
296
+ goto fail; \
297
+ } \
298
+ break; \
299
+ } \
300
+ case '{': { \
301
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
302
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
303
+ optimized */ \
304
+ /* we have not yet encountered } so we need to come back for it */ \
305
+ SET_GOTO_OBJECT_CONTINUE() \
306
+ /* we found an object inside an object, so we need to increment the \
307
+ * depth */ \
308
+ depth++; \
309
+ if (depth >= pj.depth_capacity) { \
310
+ goto fail; \
311
+ } \
312
+ \
313
+ goto object_begin; \
314
+ } \
315
+ case '[': { \
316
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
317
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
318
+ optimized */ \
319
+ /* we have not yet encountered } so we need to come back for it */ \
320
+ SET_GOTO_OBJECT_CONTINUE() \
321
+ /* we found an array inside an object, so we need to increment the depth \
322
+ */ \
323
+ depth++; \
324
+ if (depth >= pj.depth_capacity) { \
325
+ goto fail; \
326
+ } \
327
+ goto array_begin; \
328
+ } \
329
+ default: \
330
+ goto fail; \
331
+ } \
332
+ \
333
+ object_continue: \
334
+ UPDATE_CHAR(); \
335
+ switch (c) { \
336
+ case ',': \
337
+ UPDATE_CHAR(); \
338
+ if (c != '"') { \
339
+ goto fail; \
340
+ } else { \
341
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
342
+ goto fail; \
343
+ } \
344
+ goto object_key_state; \
345
+ } \
346
+ case '}': \
347
+ goto scope_end; \
348
+ default: \
349
+ goto fail; \
350
+ } \
351
+ \
352
+ /*//////////////////////////// COMMON STATE ///////////////////////////*/ \
353
+ \
354
+ scope_end: \
355
+ /* write our tape location to the header scope */ \
356
+ depth--; \
357
+ pj.write_tape(pj.containing_scope_offset[depth], c); \
358
+ pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
359
+ pj.get_current_loc()); \
360
+ /* goto saved_state */ \
361
+ GOTO_CONTINUE() \
362
+ \
363
+ /*//////////////////////////// ARRAY STATES ///////////////////////////*/ \
364
+ array_begin: \
365
+ UPDATE_CHAR(); \
366
+ if (c == ']') { \
367
+ goto scope_end; /* could also go to array_continue */ \
368
+ } \
369
+ \
370
+ main_array_switch: \
371
+ /* we call update char on all paths in, so we can peek at c on the \
372
+ * on paths that can accept a close square brace (post-, and at start) */ \
373
+ switch (c) { \
374
+ case '"': { \
375
+ if (!parse_string<T>(buf, len, pj, depth, idx)) { \
376
+ goto fail; \
377
+ } \
378
+ break; \
379
+ } \
380
+ case 't': \
381
+ if (!is_valid_true_atom(buf + idx)) { \
382
+ goto fail; \
383
+ } \
384
+ pj.write_tape(0, c); \
385
+ break; \
386
+ case 'f': \
387
+ if (!is_valid_false_atom(buf + idx)) { \
388
+ goto fail; \
389
+ } \
390
+ pj.write_tape(0, c); \
391
+ break; \
392
+ case 'n': \
393
+ if (!is_valid_null_atom(buf + idx)) { \
394
+ goto fail; \
395
+ } \
396
+ pj.write_tape(0, c); \
397
+ break; /* goto array_continue; */ \
398
+ \
399
+ case '0': \
400
+ case '1': \
401
+ case '2': \
402
+ case '3': \
403
+ case '4': \
404
+ case '5': \
405
+ case '6': \
406
+ case '7': \
407
+ case '8': \
408
+ case '9': { \
409
+ if (!parse_number(buf, pj, idx, false)) { \
410
+ goto fail; \
411
+ } \
412
+ break; /* goto array_continue; */ \
413
+ } \
414
+ case '-': { \
415
+ if (!parse_number(buf, pj, idx, true)) { \
416
+ goto fail; \
417
+ } \
418
+ break; /* goto array_continue; */ \
419
+ } \
420
+ case '{': { \
421
+ /* we have not yet encountered ] so we need to come back for it */ \
422
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
423
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
424
+ optimized */ \
425
+ SET_GOTO_ARRAY_CONTINUE() \
426
+ /* we found an object inside an array, so we need to increment the depth \
427
+ */ \
428
+ depth++; \
429
+ if (depth >= pj.depth_capacity) { \
430
+ goto fail; \
431
+ } \
432
+ \
433
+ goto object_begin; \
434
+ } \
435
+ case '[': { \
436
+ /* we have not yet encountered ] so we need to come back for it */ \
437
+ pj.containing_scope_offset[depth] = pj.get_current_loc(); \
438
+ pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
439
+ optimized */ \
440
+ SET_GOTO_ARRAY_CONTINUE() \
441
+ /* we found an array inside an array, so we need to increment the depth \
442
+ */ \
443
+ depth++; \
444
+ if (depth >= pj.depth_capacity) { \
445
+ goto fail; \
446
+ } \
447
+ goto array_begin; \
448
+ } \
449
+ default: \
450
+ goto fail; \
451
+ } \
452
+ \
453
+ array_continue: \
454
+ UPDATE_CHAR(); \
455
+ switch (c) { \
456
+ case ',': \
457
+ UPDATE_CHAR(); \
458
+ goto main_array_switch; \
459
+ case ']': \
460
+ goto scope_end; \
461
+ default: \
462
+ goto fail; \
463
+ } \
464
+ \
465
+ /*//////////////////////////// FINAL STATES ///////////////////////////*/ \
466
+ \
467
+ succeed: \
468
+ depth--; \
469
+ if (depth != 0) { \
470
+ fprintf(stderr, "internal bug\n"); \
471
+ abort(); \
472
+ } \
473
+ if (pj.containing_scope_offset[depth] != 0) { \
474
+ fprintf(stderr, "internal bug\n"); \
475
+ abort(); \
476
+ } \
477
+ pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
478
+ pj.get_current_loc()); \
479
+ pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ \
480
+ \
481
+ pj.valid = true; \
482
+ pj.error_code = simdjson::SUCCESS; \
483
+ return pj.error_code; \
484
+ fail: \
485
+ /* we do not need the next line because this is done by pj.init(), \
486
+ * pessimistically. \
487
+ * pj.is_valid = false; \
488
+ * At this point in the code, we have all the time in the world. \
489
+ * Note that we know exactly where we are in the document so we could, \
490
+ * without any overhead on the processing code, report a specific \
491
+ * location. \
492
+ * We could even trigger special code paths to assess what happened \
493
+ * carefully, \
494
+ * all without any added cost. */ \
495
+ if (depth >= pj.depth_capacity) { \
496
+ pj.error_code = simdjson::DEPTH_ERROR; \
497
+ return pj.error_code; \
498
+ } \
499
+ switch (c) { \
500
+ case '"': \
501
+ pj.error_code = simdjson::STRING_ERROR; \
502
+ return pj.error_code; \
503
+ case '0': \
504
+ case '1': \
505
+ case '2': \
506
+ case '3': \
507
+ case '4': \
508
+ case '5': \
509
+ case '6': \
510
+ case '7': \
511
+ case '8': \
512
+ case '9': \
513
+ case '-': \
514
+ pj.error_code = simdjson::NUMBER_ERROR; \
515
+ return pj.error_code; \
516
+ case 't': \
517
+ pj.error_code = simdjson::T_ATOM_ERROR; \
518
+ return pj.error_code; \
519
+ case 'n': \
520
+ pj.error_code = simdjson::N_ATOM_ERROR; \
521
+ return pj.error_code; \
522
+ case 'f': \
523
+ pj.error_code = simdjson::F_ATOM_ERROR; \
524
+ return pj.error_code; \
525
+ default: \
526
+ break; \
527
+ } \
528
+ pj.error_code = simdjson::TAPE_ERROR; \
529
+ return pj.error_code; \
530
+ }
531
+
532
+ } // namespace simdjson
533
+
534
+ #ifdef IS_X86_64
535
+ TARGET_HASWELL
536
+ namespace simdjson {
537
+ template <>
538
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
539
+ unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len,
540
+ ParsedJson &pj) {
541
+ UNIFIED_MACHINE(Architecture::HASWELL, buf, len, pj);
542
+ }
543
+ } // namespace simdjson
544
+ UNTARGET_REGION
545
+
546
+ TARGET_WESTMERE
547
+ namespace simdjson {
548
+ template <>
549
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
550
+ unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
551
+ ParsedJson &pj) {
552
+ UNIFIED_MACHINE(Architecture::WESTMERE, buf, len, pj);
553
+ }
554
+ } // namespace simdjson
555
+ UNTARGET_REGION
556
+ #endif // IS_X86_64
557
+
558
+ #ifdef IS_ARM64
559
+ namespace simdjson {
560
+ template <>
561
+ WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
562
+ unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len,
563
+ ParsedJson &pj) {
564
+ UNIFIED_MACHINE(Architecture::ARM64, buf, len, pj);
565
+ }
566
+ } // namespace simdjson
567
+ #endif
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ BASE=$SCRIPTPATH/..
4
+ cd $BASE
5
+
6
+ STYLE=$(which clang-format)
7
+ if [ $? -ne 0 ]; then
8
+ echo "clang-format not installed. Unable to check source file format policy." >&2
9
+ exit 1
10
+ fi
11
+ OURSTYLE='' # defer to .clang-format
12
+ OURCONTENT="include benchmark tools tests src"
13
+ RE=0
14
+ ALLFILES=$(find $OURCONTENT -name '*.c' -o -name '*.cpp' -o -name '*.h' -o -name '*.cc' -o -name '*.hh')
15
+ for FILE in $ALLFILES; do
16
+ echo "checking $FILE"
17
+ eval "$STYLE $OURSTYLE $BASE/$FILE" | cmp -s $BASE/$FILE -
18
+ if [ $? -ne 0 ]; then
19
+ echo "$BASE/$FILE does not respect the coding style." >&2
20
+ echo "consider typing $STYLE -i $BASE/$FILE $OURSTYLE to fix the problem." >&2
21
+ RE=1
22
+ fi
23
+ done
24
+
25
+ exit $RE
@@ -0,0 +1,25 @@
1
+ #!/bin/bash
2
+ SCRIPTPATH="$( cd "$(dirname "$0")" ; pwd -P )"
3
+ BASE=$SCRIPTPATH/..
4
+ cd $BASE
5
+
6
+ STYLE=$(which clang-format)
7
+ if [ $? -ne 0 ]; then
8
+ echo "clang-format not installed. Unable to check source file format policy." >&2
9
+ exit 1
10
+ fi
11
+ OURSTYLE="" # defer to .clang-format
12
+ OURCONTENT="include benchmark tools tests src"
13
+ RE=0
14
+ BASE=$(git rev-parse --show-toplevel)
15
+ ALLFILES=$(find $OURCONTENT -name '*.c' -o -name '*.cpp' -o -name '*.h' -o -name '*.cc' -o -name '*.hh')
16
+ for FILE in $ALLFILES; do
17
+ eval "$STYLE $OURSTYLE $BASE/$FILE" | cmp -s $BASE/$FILE -
18
+ if [ $? -ne 0 ]; then
19
+ echo "$BASE/$FILE does not respect the coding style. Formatting. " >&2
20
+ eval "$STYLE $OURSTYLE -i $BASE/$FILE"
21
+ RE=1
22
+ fi
23
+ done
24
+
25
+ exit $RE