simdjson 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (132) hide show
  1. checksums.yaml +7 -0
  2. data/.clang-format +5 -0
  3. data/.gitignore +14 -0
  4. data/.gitmodules +3 -0
  5. data/.rubocop.yml +9 -0
  6. data/.travis.yml +7 -0
  7. data/Gemfile +4 -0
  8. data/LICENSE.txt +21 -0
  9. data/README.md +39 -0
  10. data/Rakefile +32 -0
  11. data/benchmark/apache_builds.json +4421 -0
  12. data/benchmark/demo.json +15 -0
  13. data/benchmark/github_events.json +1390 -0
  14. data/benchmark/run_benchmark.rb +30 -0
  15. data/ext/simdjson/extconf.rb +22 -0
  16. data/ext/simdjson/simdjson.cpp +76 -0
  17. data/ext/simdjson/simdjson.hpp +6 -0
  18. data/lib/simdjson/version.rb +3 -0
  19. data/lib/simdjson.rb +2 -0
  20. data/simdjson.gemspec +35 -0
  21. data/vendor/.gitkeep +0 -0
  22. data/vendor/simdjson/AUTHORS +3 -0
  23. data/vendor/simdjson/CMakeLists.txt +63 -0
  24. data/vendor/simdjson/CONTRIBUTORS +27 -0
  25. data/vendor/simdjson/Dockerfile +10 -0
  26. data/vendor/simdjson/LICENSE +201 -0
  27. data/vendor/simdjson/Makefile +203 -0
  28. data/vendor/simdjson/Notes.md +85 -0
  29. data/vendor/simdjson/README.md +581 -0
  30. data/vendor/simdjson/amalgamation.sh +158 -0
  31. data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
  32. data/vendor/simdjson/benchmark/benchmark.h +223 -0
  33. data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
  34. data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
  35. data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
  36. data/vendor/simdjson/benchmark/parse.cpp +393 -0
  37. data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
  38. data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
  39. data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
  40. data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
  41. data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
  42. data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
  43. data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
  44. data/vendor/simdjson/doc/gbps.png +0 -0
  45. data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
  46. data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
  47. data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
  48. data/vendor/simdjson/images/halvarflake.png +0 -0
  49. data/vendor/simdjson/images/logo.png +0 -0
  50. data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
  51. data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
  52. data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
  53. data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
  54. data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
  55. data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
  56. data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
  57. data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
  58. data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
  59. data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
  60. data/vendor/simdjson/include/simdjson/portability.h +172 -0
  61. data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
  62. data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
  63. data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
  64. data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
  65. data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
  66. data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
  67. data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
  68. data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
  69. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
  70. data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
  71. data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
  72. data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
  73. data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
  74. data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
  75. data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
  76. data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
  77. data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
  78. data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
  79. data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
  80. data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
  81. data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
  82. data/vendor/simdjson/scripts/issue150.sh +14 -0
  83. data/vendor/simdjson/scripts/javascript/README.md +3 -0
  84. data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
  85. data/vendor/simdjson/scripts/minifier.sh +11 -0
  86. data/vendor/simdjson/scripts/parseandstat.sh +24 -0
  87. data/vendor/simdjson/scripts/parser.sh +11 -0
  88. data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
  89. data/vendor/simdjson/scripts/plotparse.sh +98 -0
  90. data/vendor/simdjson/scripts/selectparser.sh +11 -0
  91. data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
  92. data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
  93. data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
  94. data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
  95. data/vendor/simdjson/scripts/testjson2json.sh +99 -0
  96. data/vendor/simdjson/scripts/transitions/Makefile +10 -0
  97. data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
  98. data/vendor/simdjson/singleheader/README.md +1 -0
  99. data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
  100. data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
  101. data/vendor/simdjson/singleheader/simdjson.h +39692 -0
  102. data/vendor/simdjson/src/CMakeLists.txt +67 -0
  103. data/vendor/simdjson/src/jsonioutil.cpp +35 -0
  104. data/vendor/simdjson/src/jsonminifier.cpp +285 -0
  105. data/vendor/simdjson/src/jsonparser.cpp +91 -0
  106. data/vendor/simdjson/src/parsedjson.cpp +323 -0
  107. data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
  108. data/vendor/simdjson/src/simdjson.cpp +30 -0
  109. data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
  110. data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
  111. data/vendor/simdjson/style/clang-format-check.sh +25 -0
  112. data/vendor/simdjson/style/clang-format.sh +25 -0
  113. data/vendor/simdjson/style/run-clang-format.py +326 -0
  114. data/vendor/simdjson/tape.md +134 -0
  115. data/vendor/simdjson/tests/CMakeLists.txt +25 -0
  116. data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
  117. data/vendor/simdjson/tests/basictests.cpp +75 -0
  118. data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
  119. data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
  120. data/vendor/simdjson/tests/pointercheck.cpp +38 -0
  121. data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
  122. data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
  123. data/vendor/simdjson/tools/CMakeLists.txt +3 -0
  124. data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
  125. data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
  126. data/vendor/simdjson/tools/json2json.cpp +112 -0
  127. data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
  128. data/vendor/simdjson/tools/jsonstats.cpp +143 -0
  129. data/vendor/simdjson/tools/minify.cpp +21 -0
  130. data/vendor/simdjson/tools/release.py +125 -0
  131. data/vendor/simdjson/windows/dirent_portable.h +1043 -0
  132. metadata +273 -0
@@ -0,0 +1,323 @@
1
+ #include "simdjson/parsedjson.h"
2
+
3
+ namespace simdjson {
4
+ ParsedJson::ParsedJson()
5
+ : structural_indexes(nullptr), tape(nullptr),
6
+ containing_scope_offset(nullptr), ret_address(nullptr),
7
+ string_buf(nullptr), current_string_buf_loc(nullptr) {}
8
+
9
+ ParsedJson::~ParsedJson() { deallocate(); }
10
+
11
+ ParsedJson::ParsedJson(ParsedJson &&p)
12
+ : byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity),
13
+ tape_capacity(p.tape_capacity), string_capacity(p.string_capacity),
14
+ current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes),
15
+ structural_indexes(p.structural_indexes), tape(p.tape),
16
+ containing_scope_offset(p.containing_scope_offset),
17
+ ret_address(p.ret_address), string_buf(p.string_buf),
18
+ current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) {
19
+ p.structural_indexes = nullptr;
20
+ p.tape = nullptr;
21
+ p.containing_scope_offset = nullptr;
22
+ p.ret_address = nullptr;
23
+ p.string_buf = nullptr;
24
+ p.current_string_buf_loc = nullptr;
25
+ }
26
+
27
+ WARN_UNUSED
28
+ bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
29
+ if (max_depth <= 0) {
30
+ max_depth = 1; // don't let the user allocate nothing
31
+ }
32
+ if (len <= 0) {
33
+ len = 64; // allocating 0 bytes is wasteful.
34
+ }
35
+ if (len > SIMDJSON_MAXSIZE_BYTES) {
36
+ return false;
37
+ }
38
+ if ((len <= byte_capacity) && (depth_capacity < max_depth)) {
39
+ return true;
40
+ }
41
+ deallocate();
42
+ valid = false;
43
+ byte_capacity = 0; // will only set it to len after allocations are a success
44
+ n_structural_indexes = 0;
45
+ uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
46
+ structural_indexes = new (std::nothrow) uint32_t[max_structures];
47
+ // a pathological input like "[[[[..." would generate len tape elements, so
48
+ // need a capacity of len + 1
49
+ size_t local_tape_capacity = ROUNDUP_N(len + 1, 64);
50
+ // a document with only zero-length strings... could have len/3 string
51
+ // and we would need len/3 * 5 bytes on the string buffer
52
+ size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64);
53
+ string_buf = new (std::nothrow) uint8_t[local_string_capacity];
54
+ tape = new (std::nothrow) uint64_t[local_tape_capacity];
55
+ containing_scope_offset = new (std::nothrow) uint32_t[max_depth];
56
+ #ifdef SIMDJSON_USE_COMPUTED_GOTO
57
+ ret_address = new (std::nothrow) void *[max_depth];
58
+ #else
59
+ ret_address = new (std::nothrow) char[max_depth];
60
+ #endif
61
+ if ((string_buf == nullptr) || (tape == nullptr) ||
62
+ (containing_scope_offset == nullptr) || (ret_address == nullptr) ||
63
+ (structural_indexes == nullptr)) {
64
+ std::cerr << "Could not allocate memory" << std::endl;
65
+ delete[] ret_address;
66
+ delete[] containing_scope_offset;
67
+ delete[] tape;
68
+ delete[] string_buf;
69
+ delete[] structural_indexes;
70
+
71
+ return false;
72
+ }
73
+ /*
74
+ // We do not need to initialize this content for parsing, though we could
75
+ // need to initialize it for safety.
76
+ memset(string_buf, 0 , local_string_capacity);
77
+ memset(structural_indexes, 0, max_structures * sizeof(uint32_t));
78
+ memset(tape, 0, local_tape_capacity * sizeof(uint64_t));
79
+ */
80
+ byte_capacity = len;
81
+ depth_capacity = max_depth;
82
+ tape_capacity = local_tape_capacity;
83
+ string_capacity = local_string_capacity;
84
+ return true;
85
+ }
86
+
87
+ bool ParsedJson::is_valid() const { return valid; }
88
+
89
+ int ParsedJson::get_error_code() const { return error_code; }
90
+
91
+ std::string ParsedJson::get_error_message() const {
92
+ return error_message(error_code);
93
+ }
94
+
95
+ void ParsedJson::deallocate() {
96
+ byte_capacity = 0;
97
+ depth_capacity = 0;
98
+ tape_capacity = 0;
99
+ string_capacity = 0;
100
+ delete[] ret_address;
101
+ delete[] containing_scope_offset;
102
+ delete[] tape;
103
+ delete[] string_buf;
104
+ delete[] structural_indexes;
105
+ valid = false;
106
+ }
107
+
108
+ void ParsedJson::init() {
109
+ current_string_buf_loc = string_buf;
110
+ current_loc = 0;
111
+ valid = false;
112
+ }
113
+
114
+ WARN_UNUSED
115
+ bool ParsedJson::print_json(std::ostream &os) {
116
+ if (!valid) {
117
+ return false;
118
+ }
119
+ uint32_t string_length;
120
+ size_t tape_idx = 0;
121
+ uint64_t tape_val = tape[tape_idx];
122
+ uint8_t type = (tape_val >> 56);
123
+ size_t how_many = 0;
124
+ if (type == 'r') {
125
+ how_many = tape_val & JSON_VALUE_MASK;
126
+ } else {
127
+ fprintf(stderr, "Error: no starting root node?");
128
+ return false;
129
+ }
130
+ if (how_many > tape_capacity) {
131
+ fprintf(
132
+ stderr,
133
+ "We may be exceeding the tape capacity. Is this a valid document?\n");
134
+ return false;
135
+ }
136
+ tape_idx++;
137
+ bool *in_object = new bool[depth_capacity];
138
+ auto *in_object_idx = new size_t[depth_capacity];
139
+ int depth = 1; // only root at level 0
140
+ in_object_idx[depth] = 0;
141
+ in_object[depth] = false;
142
+ for (; tape_idx < how_many; tape_idx++) {
143
+ tape_val = tape[tape_idx];
144
+ uint64_t payload = tape_val & JSON_VALUE_MASK;
145
+ type = (tape_val >> 56);
146
+ if (!in_object[depth]) {
147
+ if ((in_object_idx[depth] > 0) && (type != ']')) {
148
+ os << ",";
149
+ }
150
+ in_object_idx[depth]++;
151
+ } else { // if (in_object) {
152
+ if ((in_object_idx[depth] > 0) && ((in_object_idx[depth] & 1) == 0) &&
153
+ (type != '}')) {
154
+ os << ",";
155
+ }
156
+ if (((in_object_idx[depth] & 1) == 1)) {
157
+ os << ":";
158
+ }
159
+ in_object_idx[depth]++;
160
+ }
161
+ switch (type) {
162
+ case '"': // we have a string
163
+ os << '"';
164
+ memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
165
+ print_with_escapes(
166
+ (const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
167
+ string_length);
168
+ os << '"';
169
+ break;
170
+ case 'l': // we have a long int
171
+ if (tape_idx + 1 >= how_many) {
172
+ delete[] in_object;
173
+ delete[] in_object_idx;
174
+ return false;
175
+ }
176
+ os << static_cast<int64_t>(tape[++tape_idx]);
177
+ break;
178
+ case 'd': // we have a double
179
+ if (tape_idx + 1 >= how_many) {
180
+ delete[] in_object;
181
+ delete[] in_object_idx;
182
+ return false;
183
+ }
184
+ double answer;
185
+ memcpy(&answer, &tape[++tape_idx], sizeof(answer));
186
+ os << answer;
187
+ break;
188
+ case 'n': // we have a null
189
+ os << "null";
190
+ break;
191
+ case 't': // we have a true
192
+ os << "true";
193
+ break;
194
+ case 'f': // we have a false
195
+ os << "false";
196
+ break;
197
+ case '{': // we have an object
198
+ os << '{';
199
+ depth++;
200
+ in_object[depth] = true;
201
+ in_object_idx[depth] = 0;
202
+ break;
203
+ case '}': // we end an object
204
+ depth--;
205
+ os << '}';
206
+ break;
207
+ case '[': // we start an array
208
+ os << '[';
209
+ depth++;
210
+ in_object[depth] = false;
211
+ in_object_idx[depth] = 0;
212
+ break;
213
+ case ']': // we end an array
214
+ depth--;
215
+ os << ']';
216
+ break;
217
+ case 'r': // we start and end with the root node
218
+ fprintf(stderr, "should we be hitting the root node?\n");
219
+ delete[] in_object;
220
+ delete[] in_object_idx;
221
+ return false;
222
+ default:
223
+ fprintf(stderr, "bug %c\n", type);
224
+ delete[] in_object;
225
+ delete[] in_object_idx;
226
+ return false;
227
+ }
228
+ }
229
+ delete[] in_object;
230
+ delete[] in_object_idx;
231
+ return true;
232
+ }
233
+
234
+ WARN_UNUSED
235
+ bool ParsedJson::dump_raw_tape(std::ostream &os) {
236
+ if (!valid) {
237
+ return false;
238
+ }
239
+ uint32_t string_length;
240
+ size_t tape_idx = 0;
241
+ uint64_t tape_val = tape[tape_idx];
242
+ uint8_t type = (tape_val >> 56);
243
+ os << tape_idx << " : " << type;
244
+ tape_idx++;
245
+ size_t how_many = 0;
246
+ if (type == 'r') {
247
+ how_many = tape_val & JSON_VALUE_MASK;
248
+ } else {
249
+ fprintf(stderr, "Error: no starting root node?");
250
+ return false;
251
+ }
252
+ os << "\t// pointing to " << how_many << " (right after last node)\n";
253
+ uint64_t payload;
254
+ for (; tape_idx < how_many; tape_idx++) {
255
+ os << tape_idx << " : ";
256
+ tape_val = tape[tape_idx];
257
+ payload = tape_val & JSON_VALUE_MASK;
258
+ type = (tape_val >> 56);
259
+ switch (type) {
260
+ case '"': // we have a string
261
+ os << "string \"";
262
+ memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
263
+ print_with_escapes(
264
+ (const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
265
+ string_length);
266
+ os << '"';
267
+ os << '\n';
268
+ break;
269
+ case 'l': // we have a long int
270
+ if (tape_idx + 1 >= how_many) {
271
+ return false;
272
+ }
273
+ os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
274
+ break;
275
+ case 'd': // we have a double
276
+ os << "float ";
277
+ if (tape_idx + 1 >= how_many) {
278
+ return false;
279
+ }
280
+ double answer;
281
+ memcpy(&answer, &tape[++tape_idx], sizeof(answer));
282
+ os << answer << '\n';
283
+ break;
284
+ case 'n': // we have a null
285
+ os << "null\n";
286
+ break;
287
+ case 't': // we have a true
288
+ os << "true\n";
289
+ break;
290
+ case 'f': // we have a false
291
+ os << "false\n";
292
+ break;
293
+ case '{': // we have an object
294
+ os << "{\t// pointing to next tape location " << payload
295
+ << " (first node after the scope) \n";
296
+ break;
297
+ case '}': // we end an object
298
+ os << "}\t// pointing to previous tape location " << payload
299
+ << " (start of the scope) \n";
300
+ break;
301
+ case '[': // we start an array
302
+ os << "[\t// pointing to next tape location " << payload
303
+ << " (first node after the scope) \n";
304
+ break;
305
+ case ']': // we end an array
306
+ os << "]\t// pointing to previous tape location " << payload
307
+ << " (start of the scope) \n";
308
+ break;
309
+ case 'r': // we start and end with the root node
310
+ printf("end of root\n");
311
+ return false;
312
+ default:
313
+ return false;
314
+ }
315
+ }
316
+ tape_val = tape[tape_idx];
317
+ payload = tape_val & JSON_VALUE_MASK;
318
+ type = (tape_val >> 56);
319
+ os << tape_idx << " : " << type << "\t// pointing to " << payload
320
+ << " (start root)\n";
321
+ return true;
322
+ }
323
+ } // namespace simdjson
@@ -0,0 +1,272 @@
1
+ #include "simdjson/common_defs.h"
2
+ #include "simdjson/parsedjson.h"
3
+ #include <iterator>
4
+
5
+ namespace simdjson {
6
+ ParsedJson::Iterator::Iterator(ParsedJson &pj_)
7
+ : pj(pj_), depth(0), location(0), tape_length(0), depth_index(nullptr) {
8
+ if (!pj.is_valid()) {
9
+ throw InvalidJSON();
10
+ }
11
+ // we overallocate by "1" to silence a warning in Visual Studio
12
+ depth_index = new scopeindex_t[pj.depth_capacity + 1];
13
+ // memory allocation would throw
14
+ // if(depth_index == nullptr) {
15
+ // return;
16
+ //}
17
+ depth_index[0].start_of_scope = location;
18
+ current_val = pj.tape[location++];
19
+ current_type = (current_val >> 56);
20
+ depth_index[0].scope_type = current_type;
21
+ if (current_type == 'r') {
22
+ tape_length = current_val & JSON_VALUE_MASK;
23
+ if (location < tape_length) {
24
+ // If we make it here, then depth_capacity must >=2, but the compiler
25
+ // may not know this.
26
+ current_val = pj.tape[location];
27
+ current_type = (current_val >> 56);
28
+ depth++;
29
+ depth_index[depth].start_of_scope = location;
30
+ depth_index[depth].scope_type = current_type;
31
+ }
32
+ } else {
33
+ // should never happen
34
+ throw InvalidJSON();
35
+ }
36
+ }
37
+
38
+ ParsedJson::Iterator::~Iterator() { delete[] depth_index; }
39
+
40
+ ParsedJson::Iterator::Iterator(const Iterator &o) noexcept
41
+ : pj(o.pj), depth(o.depth), location(o.location), tape_length(0),
42
+ current_type(o.current_type), current_val(o.current_val),
43
+ depth_index(nullptr) {
44
+ depth_index = new scopeindex_t[pj.depth_capacity];
45
+ // allocation might throw
46
+ memcpy(depth_index, o.depth_index,
47
+ pj.depth_capacity * sizeof(depth_index[0]));
48
+ tape_length = o.tape_length;
49
+ }
50
+
51
+ ParsedJson::Iterator::Iterator(Iterator &&o) noexcept
52
+ : pj(o.pj), depth(o.depth), location(o.location),
53
+ tape_length(o.tape_length), current_type(o.current_type),
54
+ current_val(o.current_val), depth_index(o.depth_index) {
55
+ o.depth_index = nullptr; // we take ownership
56
+ }
57
+
58
+ bool ParsedJson::Iterator::print(std::ostream &os, bool escape_strings) const {
59
+ if (!is_ok()) {
60
+ return false;
61
+ }
62
+ switch (current_type) {
63
+ case '"': // we have a string
64
+ os << '"';
65
+ if (escape_strings) {
66
+ print_with_escapes(get_string(), os, get_string_length());
67
+ } else {
68
+ // was: os << get_string();, but given that we can include null chars, we
69
+ // have to do something crazier:
70
+ std::copy(get_string(), get_string() + get_string_length(),
71
+ std::ostream_iterator<char>(os));
72
+ }
73
+ os << '"';
74
+ break;
75
+ case 'l': // we have a long int
76
+ os << get_integer();
77
+ break;
78
+ case 'd':
79
+ os << get_double();
80
+ break;
81
+ case 'n': // we have a null
82
+ os << "null";
83
+ break;
84
+ case 't': // we have a true
85
+ os << "true";
86
+ break;
87
+ case 'f': // we have a false
88
+ os << "false";
89
+ break;
90
+ case '{': // we have an object
91
+ case '}': // we end an object
92
+ case '[': // we start an array
93
+ case ']': // we end an array
94
+ os << static_cast<char>(current_type);
95
+ break;
96
+ default:
97
+ return false;
98
+ }
99
+ return true;
100
+ }
101
+
102
+ bool ParsedJson::Iterator::move_to(const char *pointer, uint32_t length) {
103
+ char *new_pointer = nullptr;
104
+ if (pointer[0] == '#') {
105
+ // Converting fragment representation to string representation
106
+ new_pointer = new char[length];
107
+ uint32_t new_length = 0;
108
+ for (uint32_t i = 1; i < length; i++) {
109
+ if (pointer[i] == '%' && pointer[i + 1] == 'x') {
110
+ try {
111
+ int fragment =
112
+ std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16);
113
+ if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
114
+ // escaping the character
115
+ new_pointer[new_length] = '\\';
116
+ new_length++;
117
+ }
118
+ new_pointer[new_length] = fragment;
119
+ i += 3;
120
+ } catch (std::invalid_argument &) {
121
+ delete[] new_pointer;
122
+ return false; // the fragment is invalid
123
+ }
124
+ } else {
125
+ new_pointer[new_length] = pointer[i];
126
+ }
127
+ new_length++;
128
+ }
129
+ length = new_length;
130
+ pointer = new_pointer;
131
+ }
132
+
133
+ // saving the current state
134
+ size_t depth_s = depth;
135
+ size_t location_s = location;
136
+ uint8_t current_type_s = current_type;
137
+ uint64_t current_val_s = current_val;
138
+ scopeindex_t *depth_index_s = depth_index;
139
+
140
+ rewind(); // The json pointer is used from the root of the document.
141
+
142
+ bool found = relative_move_to(pointer, length);
143
+ delete[] new_pointer;
144
+
145
+ if (!found) {
146
+ // since the pointer has found nothing, we get back to the original
147
+ // position.
148
+ depth = depth_s;
149
+ location = location_s;
150
+ current_type = current_type_s;
151
+ current_val = current_val_s;
152
+ depth_index = depth_index_s;
153
+ }
154
+
155
+ return found;
156
+ }
157
+
158
+ bool ParsedJson::Iterator::relative_move_to(const char *pointer,
159
+ uint32_t length) {
160
+ if (length == 0) {
161
+ // returns the whole document
162
+ return true;
163
+ }
164
+
165
+ if (pointer[0] != '/') {
166
+ // '/' must be the first character
167
+ return false;
168
+ }
169
+
170
+ // finding the key in an object or the index in an array
171
+ std::string key_or_index;
172
+ uint32_t offset = 1;
173
+
174
+ // checking for the "-" case
175
+ if (is_array() && pointer[1] == '-') {
176
+ if (length != 2) {
177
+ // the pointer must be exactly "/-"
178
+ // there can't be anything more after '-' as an index
179
+ return false;
180
+ }
181
+ key_or_index = '-';
182
+ offset = length; // will skip the loop coming right after
183
+ }
184
+
185
+ // We either transform the first reference token to a valid json key
186
+ // or we make sure it is a valid index in an array.
187
+ for (; offset < length; offset++) {
188
+ if (pointer[offset] == '/') {
189
+ // beginning of the next key or index
190
+ break;
191
+ }
192
+ if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
193
+ // the index of an array must be an integer
194
+ // we also make sure std::stoi won't discard whitespaces later
195
+ return false;
196
+ }
197
+ if (pointer[offset] == '~') {
198
+ // "~1" represents "/"
199
+ if (pointer[offset + 1] == '1') {
200
+ key_or_index += '/';
201
+ offset++;
202
+ continue;
203
+ }
204
+ // "~0" represents "~"
205
+ if (pointer[offset + 1] == '0') {
206
+ key_or_index += '~';
207
+ offset++;
208
+ continue;
209
+ }
210
+ }
211
+ if (pointer[offset] == '\\') {
212
+ if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
213
+ (pointer[offset + 1] <= 0x1F)) {
214
+ key_or_index += pointer[offset + 1];
215
+ offset++;
216
+ continue;
217
+ }
218
+ return false; // invalid escaped character
219
+ }
220
+ if (pointer[offset] == '\"') {
221
+ // unescaped quote character. this is an invalid case.
222
+ // lets do nothing and assume most pointers will be valid.
223
+ // it won't find any corresponding json key anyway.
224
+ // return false;
225
+ }
226
+ key_or_index += pointer[offset];
227
+ }
228
+
229
+ bool found = false;
230
+ if (is_object()) {
231
+ if (move_to_key(key_or_index.c_str(), key_or_index.length())) {
232
+ found = relative_move_to(pointer + offset, length - offset);
233
+ }
234
+ } else if (is_array()) {
235
+ if (key_or_index == "-") { // handling "-" case first
236
+ if (down()) {
237
+ while (next())
238
+ ; // moving to the end of the array
239
+ // moving to the nonexistent value right after...
240
+ size_t npos;
241
+ if ((current_type == '[') || (current_type == '{')) {
242
+ // we need to jump
243
+ npos = (current_val & JSON_VALUE_MASK);
244
+ } else {
245
+ npos =
246
+ location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
247
+ }
248
+ location = npos;
249
+ current_val = pj.tape[npos];
250
+ current_type = (current_val >> 56);
251
+ return true; // how could it fail ?
252
+ }
253
+ } else { // regular numeric index
254
+ // The index can't have a leading '0'
255
+ if (key_or_index[0] == '0' && key_or_index.length() > 1) {
256
+ return false;
257
+ }
258
+ // it cannot be empty
259
+ if (key_or_index.length() == 0) {
260
+ return false;
261
+ }
262
+ // we already checked the index contains only valid digits
263
+ uint32_t index = std::stoi(key_or_index);
264
+ if (move_to_index(index)) {
265
+ found = relative_move_to(pointer + offset, length - offset);
266
+ }
267
+ }
268
+ }
269
+
270
+ return found;
271
+ }
272
+ } // namespace simdjson
@@ -0,0 +1,30 @@
1
+ #include "simdjson/simdjson.h"
2
+ #include <map>
3
+
4
+ namespace simdjson {
5
+ const std::map<int, const std::string> error_strings = {
6
+ {SUCCESS, "No errors"},
7
+ {CAPACITY, "This ParsedJson can't support a document that big"},
8
+ {MEMALLOC, "Error allocating memory, we're most likely out of memory"},
9
+ {TAPE_ERROR, "Something went wrong while writing to the tape"},
10
+ {STRING_ERROR, "Problem while parsing a string"},
11
+ {T_ATOM_ERROR,
12
+ "Problem while parsing an atom starting with the letter 't'"},
13
+ {F_ATOM_ERROR,
14
+ "Problem while parsing an atom starting with the letter 'f'"},
15
+ {N_ATOM_ERROR,
16
+ "Problem while parsing an atom starting with the letter 'n'"},
17
+ {NUMBER_ERROR, "Problem while parsing a number"},
18
+ {UTF8_ERROR, "The input is not valid UTF-8"},
19
+ {UNITIALIZED, "Unitialized"},
20
+ {EMPTY, "Empty"},
21
+ {UNESCAPED_CHARS, "Within strings, some characters must be escapted, we "
22
+ "found unescapted characters"},
23
+ {UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as "
24
+ "you may have found a bug in simdjson"},
25
+ };
26
+
27
+ const std::string &error_message(const int error_code) {
28
+ return error_strings.at(error_code);
29
+ }
30
+ } // namespace simdjson
@@ -0,0 +1,41 @@
1
+ #include "simdjson/portability.h"
2
+
3
+ #ifdef IS_X86_64
4
+
5
+ #include "simdjson/stage1_find_marks_haswell.h"
6
+ #include "simdjson/stage1_find_marks_westmere.h"
7
+ TARGET_HASWELL
8
+ namespace simdjson {
9
+ template <>
10
+ int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len,
11
+ ParsedJson &pj) {
12
+ FIND_STRUCTURAL_BITS(Architecture::HASWELL, buf, len, pj,
13
+ simdjson::haswell::flatten_bits);
14
+ }
15
+ } // namespace simdjson
16
+ UNTARGET_REGION
17
+
18
+ TARGET_WESTMERE
19
+ namespace simdjson {
20
+ template <>
21
+ int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
22
+ ParsedJson &pj) {
23
+ FIND_STRUCTURAL_BITS(Architecture::WESTMERE, buf, len, pj,
24
+ simdjson::flatten_bits);
25
+ }
26
+ } // namespace simdjson
27
+ UNTARGET_REGION
28
+
29
+ #endif
30
+
31
+ #ifdef IS_ARM64
32
+ #include "simdjson/stage1_find_marks_arm64.h"
33
+ namespace simdjson {
34
+ template <>
35
+ int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len,
36
+ ParsedJson &pj) {
37
+ FIND_STRUCTURAL_BITS(Architecture::ARM64, buf, len, pj,
38
+ simdjson::flatten_bits);
39
+ }
40
+ } // namespace simdjson
41
+ #endif