simdjson 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,1652 @@
|
|
1
|
+
/* auto-generated on Sun Aug 4 15:43:41 EDT 2019. Do not edit! */
|
2
|
+
#include "simdjson.h"
|
3
|
+
|
4
|
+
/* used for http://dmalloc.com/ Dmalloc - Debug Malloc Library */
|
5
|
+
#ifdef DMALLOC
|
6
|
+
#include "dmalloc.h"
|
7
|
+
#endif
|
8
|
+
|
9
|
+
/* begin file src/simdjson.cpp */
|
10
|
+
#include <map>
|
11
|
+
|
12
|
+
namespace simdjson {
|
13
|
+
const std::map<int, const std::string> error_strings = {
|
14
|
+
{SUCCESS, "No errors"},
|
15
|
+
{CAPACITY, "This ParsedJson can't support a document that big"},
|
16
|
+
{MEMALLOC, "Error allocating memory, we're most likely out of memory"},
|
17
|
+
{TAPE_ERROR, "Something went wrong while writing to the tape"},
|
18
|
+
{STRING_ERROR, "Problem while parsing a string"},
|
19
|
+
{T_ATOM_ERROR,
|
20
|
+
"Problem while parsing an atom starting with the letter 't'"},
|
21
|
+
{F_ATOM_ERROR,
|
22
|
+
"Problem while parsing an atom starting with the letter 'f'"},
|
23
|
+
{N_ATOM_ERROR,
|
24
|
+
"Problem while parsing an atom starting with the letter 'n'"},
|
25
|
+
{NUMBER_ERROR, "Problem while parsing a number"},
|
26
|
+
{UTF8_ERROR, "The input is not valid UTF-8"},
|
27
|
+
{UNITIALIZED, "Unitialized"},
|
28
|
+
{EMPTY, "Empty"},
|
29
|
+
{UNESCAPED_CHARS, "Within strings, some characters must be escapted, we "
|
30
|
+
"found unescapted characters"},
|
31
|
+
{UNEXPECTED_ERROR, "Unexpected error, consider reporting this problem as "
|
32
|
+
"you may have found a bug in simdjson"},
|
33
|
+
};
|
34
|
+
|
35
|
+
const std::string &error_message(const int error_code) {
|
36
|
+
return error_strings.at(error_code);
|
37
|
+
}
|
38
|
+
} // namespace simdjson
|
39
|
+
/* end file src/simdjson.cpp */
|
40
|
+
/* begin file src/jsonioutil.cpp */
|
41
|
+
#include <cstdlib>
|
42
|
+
#include <cstring>
|
43
|
+
|
44
|
+
namespace simdjson {
|
45
|
+
char *allocate_padded_buffer(size_t length) {
|
46
|
+
// we could do a simple malloc
|
47
|
+
// return (char *) malloc(length + SIMDJSON_PADDING);
|
48
|
+
// However, we might as well align to cache lines...
|
49
|
+
size_t totalpaddedlength = length + SIMDJSON_PADDING;
|
50
|
+
char *padded_buffer = aligned_malloc_char(64, totalpaddedlength);
|
51
|
+
return padded_buffer;
|
52
|
+
}
|
53
|
+
|
54
|
+
padded_string get_corpus(const std::string &filename) {
|
55
|
+
std::FILE *fp = std::fopen(filename.c_str(), "rb");
|
56
|
+
if (fp != nullptr) {
|
57
|
+
std::fseek(fp, 0, SEEK_END);
|
58
|
+
size_t len = std::ftell(fp);
|
59
|
+
padded_string s(len);
|
60
|
+
if (s.data() == nullptr) {
|
61
|
+
std::fclose(fp);
|
62
|
+
throw std::runtime_error("could not allocate memory");
|
63
|
+
}
|
64
|
+
std::rewind(fp);
|
65
|
+
size_t readb = std::fread(s.data(), 1, len, fp);
|
66
|
+
std::fclose(fp);
|
67
|
+
if (readb != len) {
|
68
|
+
throw std::runtime_error("could not read the data");
|
69
|
+
}
|
70
|
+
return s;
|
71
|
+
}
|
72
|
+
throw std::runtime_error("could not load corpus");
|
73
|
+
}
|
74
|
+
} // namespace simdjson
|
75
|
+
/* end file src/jsonioutil.cpp */
|
76
|
+
/* begin file src/jsonminifier.cpp */
|
77
|
+
#include <cstdint>
|
78
|
+
|
79
|
+
#ifndef __AVX2__
|
80
|
+
|
81
|
+
namespace simdjson {
|
82
|
+
static uint8_t jump_table[256 * 3] = {
|
83
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
84
|
+
1, 1, 0, 1, 0, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0, 1, 1, 0, 1, 1, 0, 1,
|
85
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
86
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 0, 0,
|
87
|
+
1, 1, 1, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
88
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
89
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
90
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
91
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
92
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
93
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
94
|
+
1, 0, 0, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
95
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
96
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
97
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
98
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
99
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
100
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
101
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
102
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
103
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
104
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
105
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
106
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
107
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
108
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
109
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
110
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0,
|
111
|
+
1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1,
|
112
|
+
1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
113
|
+
0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1, 0, 1, 1,
|
114
|
+
};
|
115
|
+
|
116
|
+
size_t json_minify(const unsigned char *bytes, size_t how_many,
|
117
|
+
unsigned char *out) {
|
118
|
+
size_t i = 0, pos = 0;
|
119
|
+
uint8_t quote = 0;
|
120
|
+
uint8_t nonescape = 1;
|
121
|
+
|
122
|
+
while (i < how_many) {
|
123
|
+
unsigned char c = bytes[i];
|
124
|
+
uint8_t *meta = jump_table + 3 * c;
|
125
|
+
|
126
|
+
quote = quote ^ (meta[0] & nonescape);
|
127
|
+
out[pos] = c;
|
128
|
+
pos += meta[2] | quote;
|
129
|
+
|
130
|
+
i += 1;
|
131
|
+
nonescape = (~nonescape) | (meta[1]);
|
132
|
+
}
|
133
|
+
return pos;
|
134
|
+
}
|
135
|
+
} // namespace simdjson
|
136
|
+
#else
|
137
|
+
#include <cstring>
|
138
|
+
|
139
|
+
namespace simdjson {
|
140
|
+
|
141
|
+
// some intrinsics are missing under GCC?
|
142
|
+
#ifndef __clang__
|
143
|
+
#ifndef _MSC_VER
|
144
|
+
static __m256i inline _mm256_loadu2_m128i(__m128i const *__addr_hi,
|
145
|
+
__m128i const *__addr_lo) {
|
146
|
+
__m256i __v256 = _mm256_castsi128_si256(_mm_loadu_si128(__addr_lo));
|
147
|
+
return _mm256_insertf128_si256(__v256, _mm_loadu_si128(__addr_hi), 1);
|
148
|
+
}
|
149
|
+
|
150
|
+
static inline void _mm256_storeu2_m128i(__m128i *__addr_hi, __m128i *__addr_lo,
|
151
|
+
__m256i __a) {
|
152
|
+
__m128i __v128;
|
153
|
+
__v128 = _mm256_castsi256_si128(__a);
|
154
|
+
_mm_storeu_si128(__addr_lo, __v128);
|
155
|
+
__v128 = _mm256_extractf128_si256(__a, 1);
|
156
|
+
_mm_storeu_si128(__addr_hi, __v128);
|
157
|
+
}
|
158
|
+
#endif
|
159
|
+
#endif
|
160
|
+
|
161
|
+
// a straightforward comparison of a mask against input.
|
162
|
+
static uint64_t cmp_mask_against_input_mini(__m256i input_lo, __m256i input_hi,
|
163
|
+
__m256i mask) {
|
164
|
+
__m256i cmp_res_0 = _mm256_cmpeq_epi8(input_lo, mask);
|
165
|
+
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
166
|
+
__m256i cmp_res_1 = _mm256_cmpeq_epi8(input_hi, mask);
|
167
|
+
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
168
|
+
return res_0 | (res_1 << 32);
|
169
|
+
}
|
170
|
+
|
171
|
+
// take input from buf and remove useless whitespace, input and output can be
|
172
|
+
// the same, result is null terminated, return the string length (minus the null
|
173
|
+
// termination)
|
174
|
+
size_t json_minify(const uint8_t *buf, size_t len, uint8_t *out) {
|
175
|
+
// Useful constant masks
|
176
|
+
const uint64_t even_bits = 0x5555555555555555ULL;
|
177
|
+
const uint64_t odd_bits = ~even_bits;
|
178
|
+
uint8_t *initout(out);
|
179
|
+
uint64_t prev_iter_ends_odd_backslash =
|
180
|
+
0ULL; // either 0 or 1, but a 64-bit value
|
181
|
+
uint64_t prev_iter_inside_quote = 0ULL; // either all zeros or all ones
|
182
|
+
size_t idx = 0;
|
183
|
+
if (len >= 64) {
|
184
|
+
size_t avx_len = len - 63;
|
185
|
+
|
186
|
+
for (; idx < avx_len; idx += 64) {
|
187
|
+
__m256i input_lo =
|
188
|
+
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 0));
|
189
|
+
__m256i input_hi =
|
190
|
+
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buf + idx + 32));
|
191
|
+
uint64_t bs_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
192
|
+
_mm256_set1_epi8('\\'));
|
193
|
+
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
194
|
+
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
195
|
+
uint64_t even_starts = start_edges & even_start_mask;
|
196
|
+
uint64_t odd_starts = start_edges & ~even_start_mask;
|
197
|
+
uint64_t even_carries = bs_bits + even_starts;
|
198
|
+
uint64_t odd_carries;
|
199
|
+
bool iter_ends_odd_backslash =
|
200
|
+
add_overflow(bs_bits, odd_starts, &odd_carries);
|
201
|
+
odd_carries |= prev_iter_ends_odd_backslash;
|
202
|
+
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
203
|
+
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
204
|
+
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
205
|
+
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
206
|
+
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
207
|
+
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
208
|
+
uint64_t quote_bits = cmp_mask_against_input_mini(input_lo, input_hi,
|
209
|
+
_mm256_set1_epi8('"'));
|
210
|
+
quote_bits = quote_bits & ~odd_ends;
|
211
|
+
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
212
|
+
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
213
|
+
quote_mask ^= prev_iter_inside_quote;
|
214
|
+
prev_iter_inside_quote = static_cast<uint64_t>(
|
215
|
+
static_cast<int64_t>(quote_mask) >>
|
216
|
+
63); // might be undefined behavior, should be fully defined in C++20,
|
217
|
+
// ok according to John Regher from Utah University
|
218
|
+
const __m256i low_nibble_mask = _mm256_setr_epi8(
|
219
|
+
// 0 9 a b c d
|
220
|
+
16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0, 16, 0, 0, 0, 0, 0,
|
221
|
+
0, 0, 0, 8, 12, 1, 2, 9, 0, 0);
|
222
|
+
const __m256i high_nibble_mask = _mm256_setr_epi8(
|
223
|
+
// 0 2 3 5 7
|
224
|
+
8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0, 8, 0, 18, 4, 0, 1, 0,
|
225
|
+
1, 0, 0, 0, 3, 2, 1, 0, 0);
|
226
|
+
__m256i whitespace_shufti_mask = _mm256_set1_epi8(0x18);
|
227
|
+
__m256i v_lo = _mm256_and_si256(
|
228
|
+
_mm256_shuffle_epi8(low_nibble_mask, input_lo),
|
229
|
+
_mm256_shuffle_epi8(high_nibble_mask,
|
230
|
+
_mm256_and_si256(_mm256_srli_epi32(input_lo, 4),
|
231
|
+
_mm256_set1_epi8(0x7f))));
|
232
|
+
|
233
|
+
__m256i v_hi = _mm256_and_si256(
|
234
|
+
_mm256_shuffle_epi8(low_nibble_mask, input_hi),
|
235
|
+
_mm256_shuffle_epi8(high_nibble_mask,
|
236
|
+
_mm256_and_si256(_mm256_srli_epi32(input_hi, 4),
|
237
|
+
_mm256_set1_epi8(0x7f))));
|
238
|
+
__m256i tmp_ws_lo = _mm256_cmpeq_epi8(
|
239
|
+
_mm256_and_si256(v_lo, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
240
|
+
__m256i tmp_ws_hi = _mm256_cmpeq_epi8(
|
241
|
+
_mm256_and_si256(v_hi, whitespace_shufti_mask), _mm256_set1_epi8(0));
|
242
|
+
|
243
|
+
uint64_t ws_res_0 =
|
244
|
+
static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
245
|
+
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
246
|
+
uint64_t whitespace = ~(ws_res_0 | (ws_res_1 << 32));
|
247
|
+
whitespace &= ~quote_mask;
|
248
|
+
int mask1 = whitespace & 0xFFFF;
|
249
|
+
int mask2 = (whitespace >> 16) & 0xFFFF;
|
250
|
+
int mask3 = (whitespace >> 32) & 0xFFFF;
|
251
|
+
int mask4 = (whitespace >> 48) & 0xFFFF;
|
252
|
+
int pop1 = hamming((~whitespace) & 0xFFFF);
|
253
|
+
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
|
254
|
+
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
255
|
+
int pop4 = hamming((~whitespace));
|
256
|
+
__m256i vmask1 = _mm256_loadu2_m128i(
|
257
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
258
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
259
|
+
__m256i vmask2 = _mm256_loadu2_m128i(
|
260
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
261
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
262
|
+
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
263
|
+
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
264
|
+
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop1),
|
265
|
+
reinterpret_cast<__m128i *>(out), result1);
|
266
|
+
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(out + pop3),
|
267
|
+
reinterpret_cast<__m128i *>(out + pop2), result2);
|
268
|
+
out += pop4;
|
269
|
+
}
|
270
|
+
}
|
271
|
+
// we finish off the job... copying and pasting the code is not ideal here,
|
272
|
+
// but it gets the job done.
|
273
|
+
if (idx < len) {
|
274
|
+
uint8_t buffer[64];
|
275
|
+
memset(buffer, 0, 64);
|
276
|
+
memcpy(buffer, buf + idx, len - idx);
|
277
|
+
__m256i input_lo =
|
278
|
+
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer));
|
279
|
+
__m256i input_hi =
|
280
|
+
_mm256_loadu_si256(reinterpret_cast<const __m256i *>(buffer + 32));
|
281
|
+
uint64_t bs_bits =
|
282
|
+
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('\\'));
|
283
|
+
uint64_t start_edges = bs_bits & ~(bs_bits << 1);
|
284
|
+
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash;
|
285
|
+
uint64_t even_starts = start_edges & even_start_mask;
|
286
|
+
uint64_t odd_starts = start_edges & ~even_start_mask;
|
287
|
+
uint64_t even_carries = bs_bits + even_starts;
|
288
|
+
uint64_t odd_carries;
|
289
|
+
// bool iter_ends_odd_backslash =
|
290
|
+
add_overflow(bs_bits, odd_starts, &odd_carries);
|
291
|
+
odd_carries |= prev_iter_ends_odd_backslash;
|
292
|
+
// prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL;
|
293
|
+
// // we never use it
|
294
|
+
uint64_t even_carry_ends = even_carries & ~bs_bits;
|
295
|
+
uint64_t odd_carry_ends = odd_carries & ~bs_bits;
|
296
|
+
uint64_t even_start_odd_end = even_carry_ends & odd_bits;
|
297
|
+
uint64_t odd_start_even_end = odd_carry_ends & even_bits;
|
298
|
+
uint64_t odd_ends = even_start_odd_end | odd_start_even_end;
|
299
|
+
uint64_t quote_bits =
|
300
|
+
cmp_mask_against_input_mini(input_lo, input_hi, _mm256_set1_epi8('"'));
|
301
|
+
quote_bits = quote_bits & ~odd_ends;
|
302
|
+
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
303
|
+
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFF), 0));
|
304
|
+
quote_mask ^= prev_iter_inside_quote;
|
305
|
+
// prev_iter_inside_quote = (uint64_t)((int64_t)quote_mask >> 63);// we
|
306
|
+
// don't need this anymore
|
307
|
+
|
308
|
+
__m256i mask_20 = _mm256_set1_epi8(0x20); // c==32
|
309
|
+
__m256i mask_70 =
|
310
|
+
_mm256_set1_epi8(0x70); // adding 0x70 does not check low 4-bits
|
311
|
+
// but moves any value >= 16 above 128
|
312
|
+
|
313
|
+
__m256i lut_cntrl = _mm256_setr_epi8(
|
314
|
+
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0xFF, 0x00,
|
315
|
+
0x00, 0xFF, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
316
|
+
0x00, 0xFF, 0xFF, 0x00, 0x00, 0xFF, 0x00, 0x00);
|
317
|
+
|
318
|
+
__m256i tmp_ws_lo = _mm256_or_si256(
|
319
|
+
_mm256_cmpeq_epi8(mask_20, input_lo),
|
320
|
+
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_lo)));
|
321
|
+
__m256i tmp_ws_hi = _mm256_or_si256(
|
322
|
+
_mm256_cmpeq_epi8(mask_20, input_hi),
|
323
|
+
_mm256_shuffle_epi8(lut_cntrl, _mm256_adds_epu8(mask_70, input_hi)));
|
324
|
+
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(tmp_ws_lo));
|
325
|
+
uint64_t ws_res_1 = _mm256_movemask_epi8(tmp_ws_hi);
|
326
|
+
uint64_t whitespace = (ws_res_0 | (ws_res_1 << 32));
|
327
|
+
whitespace &= ~quote_mask;
|
328
|
+
|
329
|
+
if (len - idx < 64) {
|
330
|
+
whitespace |= UINT64_C(0xFFFFFFFFFFFFFFFF) << (len - idx);
|
331
|
+
}
|
332
|
+
int mask1 = whitespace & 0xFFFF;
|
333
|
+
int mask2 = (whitespace >> 16) & 0xFFFF;
|
334
|
+
int mask3 = (whitespace >> 32) & 0xFFFF;
|
335
|
+
int mask4 = (whitespace >> 48) & 0xFFFF;
|
336
|
+
int pop1 = hamming((~whitespace) & 0xFFFF);
|
337
|
+
int pop2 = hamming((~whitespace) & UINT64_C(0xFFFFFFFF));
|
338
|
+
int pop3 = hamming((~whitespace) & UINT64_C(0xFFFFFFFFFFFF));
|
339
|
+
int pop4 = hamming((~whitespace));
|
340
|
+
__m256i vmask1 = _mm256_loadu2_m128i(
|
341
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask2 & 0x7FFF),
|
342
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask1 & 0x7FFF));
|
343
|
+
__m256i vmask2 = _mm256_loadu2_m128i(
|
344
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask4 & 0x7FFF),
|
345
|
+
reinterpret_cast<const __m128i *>(mask128_epi8) + (mask3 & 0x7FFF));
|
346
|
+
__m256i result1 = _mm256_shuffle_epi8(input_lo, vmask1);
|
347
|
+
__m256i result2 = _mm256_shuffle_epi8(input_hi, vmask2);
|
348
|
+
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop1),
|
349
|
+
reinterpret_cast<__m128i *>(buffer), result1);
|
350
|
+
_mm256_storeu2_m128i(reinterpret_cast<__m128i *>(buffer + pop3),
|
351
|
+
reinterpret_cast<__m128i *>(buffer + pop2), result2);
|
352
|
+
memcpy(out, buffer, pop4);
|
353
|
+
out += pop4;
|
354
|
+
}
|
355
|
+
*out = '\0'; // NULL termination
|
356
|
+
return out - initout;
|
357
|
+
}
|
358
|
+
} // namespace simdjson
|
359
|
+
#endif
|
360
|
+
/* end file src/jsonminifier.cpp */
|
361
|
+
/* begin file src/jsonparser.cpp */
|
362
|
+
|
363
|
+
namespace simdjson {
|
364
|
+
|
365
|
+
// The function that users are expected to call is json_parse.
|
366
|
+
// We have more than one such function because we want to support several
|
367
|
+
// instruction sets.
|
368
|
+
|
369
|
+
// function pointer type for json_parse
|
370
|
+
using json_parse_functype = int(const uint8_t *buf, size_t len, ParsedJson &pj,
|
371
|
+
bool realloc_if_needed);
|
372
|
+
|
373
|
+
// Pointer that holds the json_parse implementation corresponding to the
|
374
|
+
// available SIMD instruction set
|
375
|
+
extern json_parse_functype *json_parse_ptr;
|
376
|
+
|
377
|
+
int json_parse(const uint8_t *buf, size_t len, ParsedJson &pj,
|
378
|
+
bool realloc_if_needed) {
|
379
|
+
return json_parse_ptr(buf, len, pj, realloc_if_needed);
|
380
|
+
}
|
381
|
+
|
382
|
+
int json_parse(const char *buf, size_t len, ParsedJson &pj,
|
383
|
+
bool realloc_if_needed) {
|
384
|
+
return json_parse_ptr(reinterpret_cast<const uint8_t *>(buf), len, pj,
|
385
|
+
realloc_if_needed);
|
386
|
+
}
|
387
|
+
|
388
|
+
Architecture find_best_supported_implementation() {
|
389
|
+
constexpr uint32_t haswell_flags =
|
390
|
+
instruction_set::AVX2 | instruction_set::PCLMULQDQ |
|
391
|
+
instruction_set::BMI1 | instruction_set::BMI2;
|
392
|
+
constexpr uint32_t westmere_flags =
|
393
|
+
instruction_set::SSE42 | instruction_set::PCLMULQDQ;
|
394
|
+
|
395
|
+
uint32_t supports = detect_supported_architectures();
|
396
|
+
// Order from best to worst (within architecture)
|
397
|
+
if ((haswell_flags & supports) == haswell_flags)
|
398
|
+
return Architecture::HASWELL;
|
399
|
+
if ((westmere_flags & supports) == westmere_flags)
|
400
|
+
return Architecture::WESTMERE;
|
401
|
+
if (instruction_set::NEON)
|
402
|
+
return Architecture::ARM64;
|
403
|
+
|
404
|
+
return Architecture::NONE;
|
405
|
+
}
|
406
|
+
|
407
|
+
// Responsible to select the best json_parse implementation
|
408
|
+
int json_parse_dispatch(const uint8_t *buf, size_t len, ParsedJson &pj,
|
409
|
+
bool realloc_if_needed) {
|
410
|
+
Architecture best_implementation = find_best_supported_implementation();
|
411
|
+
// Selecting the best implementation
|
412
|
+
switch (best_implementation) {
|
413
|
+
#ifdef IS_X86_64
|
414
|
+
case Architecture::HASWELL:
|
415
|
+
json_parse_ptr = &json_parse_implementation<Architecture::HASWELL>;
|
416
|
+
break;
|
417
|
+
case Architecture::WESTMERE:
|
418
|
+
json_parse_ptr = &json_parse_implementation<Architecture::WESTMERE>;
|
419
|
+
break;
|
420
|
+
#endif
|
421
|
+
#ifdef IS_ARM64
|
422
|
+
case Architecture::ARM64:
|
423
|
+
json_parse_ptr = &json_parse_implementation<Architecture::ARM64>;
|
424
|
+
break;
|
425
|
+
#endif
|
426
|
+
default:
|
427
|
+
std::cerr << "The processor is not supported by simdjson." << std::endl;
|
428
|
+
return simdjson::UNEXPECTED_ERROR;
|
429
|
+
}
|
430
|
+
|
431
|
+
return json_parse_ptr(buf, len, pj, realloc_if_needed);
|
432
|
+
}
|
433
|
+
|
434
|
+
json_parse_functype *json_parse_ptr = &json_parse_dispatch;
|
435
|
+
|
436
|
+
WARN_UNUSED
|
437
|
+
ParsedJson build_parsed_json(const uint8_t *buf, size_t len,
|
438
|
+
bool realloc_if_needed) {
|
439
|
+
ParsedJson pj;
|
440
|
+
bool ok = pj.allocate_capacity(len);
|
441
|
+
if (ok) {
|
442
|
+
json_parse(buf, len, pj, realloc_if_needed);
|
443
|
+
} else {
|
444
|
+
std::cerr << "failure during memory allocation " << std::endl;
|
445
|
+
}
|
446
|
+
return pj;
|
447
|
+
}
|
448
|
+
} // namespace simdjson
|
449
|
+
/* end file src/jsonparser.cpp */
|
450
|
+
/* begin file src/stage1_find_marks.cpp */
|
451
|
+
|
452
|
+
#ifdef IS_X86_64
|
453
|
+
|
454
|
+
TARGET_HASWELL
|
455
|
+
namespace simdjson {
|
456
|
+
template <>
|
457
|
+
int find_structural_bits<Architecture::HASWELL>(const uint8_t *buf, size_t len,
|
458
|
+
ParsedJson &pj) {
|
459
|
+
FIND_STRUCTURAL_BITS(Architecture::HASWELL, buf, len, pj,
|
460
|
+
simdjson::haswell::flatten_bits);
|
461
|
+
}
|
462
|
+
} // namespace simdjson
|
463
|
+
UNTARGET_REGION
|
464
|
+
|
465
|
+
TARGET_WESTMERE
|
466
|
+
namespace simdjson {
|
467
|
+
template <>
|
468
|
+
int find_structural_bits<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
|
469
|
+
ParsedJson &pj) {
|
470
|
+
FIND_STRUCTURAL_BITS(Architecture::WESTMERE, buf, len, pj,
|
471
|
+
simdjson::flatten_bits);
|
472
|
+
}
|
473
|
+
} // namespace simdjson
|
474
|
+
UNTARGET_REGION
|
475
|
+
|
476
|
+
#endif
|
477
|
+
|
478
|
+
#ifdef IS_ARM64
|
479
|
+
namespace simdjson {
|
480
|
+
template <>
|
481
|
+
int find_structural_bits<Architecture::ARM64>(const uint8_t *buf, size_t len,
|
482
|
+
ParsedJson &pj) {
|
483
|
+
FIND_STRUCTURAL_BITS(Architecture::ARM64, buf, len, pj,
|
484
|
+
simdjson::flatten_bits);
|
485
|
+
}
|
486
|
+
} // namespace simdjson
|
487
|
+
#endif
|
488
|
+
/* end file src/stage1_find_marks.cpp */
|
489
|
+
/* begin file src/stage2_build_tape.cpp */
|
490
|
+
|
491
|
+
namespace simdjson {
|
492
|
+
|
493
|
+
// this macro reads the next structural character, updating idx, i and c.
|
494
|
+
#define UPDATE_CHAR() \
|
495
|
+
{ \
|
496
|
+
idx = pj.structural_indexes[i++]; \
|
497
|
+
c = buf[idx]; \
|
498
|
+
}
|
499
|
+
|
500
|
+
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
501
|
+
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = &&array_continue;
|
502
|
+
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = &&object_continue;
|
503
|
+
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = &&start_continue;
|
504
|
+
#define GOTO_CONTINUE() goto *pj.ret_address[depth];
|
505
|
+
#else
|
506
|
+
#define SET_GOTO_ARRAY_CONTINUE() pj.ret_address[depth] = 'a';
|
507
|
+
#define SET_GOTO_OBJECT_CONTINUE() pj.ret_address[depth] = 'o';
|
508
|
+
#define SET_GOTO_START_CONTINUE() pj.ret_address[depth] = 's';
|
509
|
+
#define GOTO_CONTINUE() \
|
510
|
+
{ \
|
511
|
+
if (pj.ret_address[depth] == 'a') { \
|
512
|
+
goto array_continue; \
|
513
|
+
} else if (pj.ret_address[depth] == 'o') { \
|
514
|
+
goto object_continue; \
|
515
|
+
} else { \
|
516
|
+
goto start_continue; \
|
517
|
+
} \
|
518
|
+
}
|
519
|
+
#endif
|
520
|
+
|
521
|
+
/************
|
522
|
+
* The JSON is parsed to a tape, see the accompanying tape.md file
|
523
|
+
* for documentation.
|
524
|
+
***********/
|
525
|
+
// We need to compile that code for multiple architectures. However, target
|
526
|
+
// attributes can be used only once by function definition. Huge macro seemed
|
527
|
+
// better than huge code duplication. int UNIFIED_MACHINE(const uint8_t *buf,
|
528
|
+
// size_t len, ParsedJson &pj)
|
529
|
+
#define UNIFIED_MACHINE(T, buf, len, pj) \
|
530
|
+
{ \
|
531
|
+
if (ALLOW_SAME_PAGE_BUFFER_OVERRUN) { \
|
532
|
+
memset((uint8_t *)buf + len, 0, \
|
533
|
+
SIMDJSON_PADDING); /* to please valgrind */ \
|
534
|
+
} \
|
535
|
+
uint32_t i = 0; /* index of the structural character (0,1,2,3...) */ \
|
536
|
+
uint32_t \
|
537
|
+
idx; /* location of the structural character in the input (buf) */ \
|
538
|
+
uint8_t c; /* used to track the (structural) character we are looking at, \
|
539
|
+
updated */ \
|
540
|
+
/* by UPDATE_CHAR macro */ \
|
541
|
+
uint32_t depth = 0; /* could have an arbitrary starting depth */ \
|
542
|
+
pj.init(); /* sets is_valid to false */ \
|
543
|
+
if (pj.byte_capacity < len) { \
|
544
|
+
pj.error_code = simdjson::CAPACITY; \
|
545
|
+
return pj.error_code; \
|
546
|
+
} \
|
547
|
+
\
|
548
|
+
/*//////////////////////////// START STATE ///////////////////////////// \
|
549
|
+
*/ \
|
550
|
+
SET_GOTO_START_CONTINUE() \
|
551
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
552
|
+
pj.write_tape(0, 'r'); /* r for root, 0 is going to get overwritten */ \
|
553
|
+
/* the root is used, if nothing else, to capture the size of the tape */ \
|
554
|
+
depth++; /* everything starts at depth = 1, depth = 0 is just for the \
|
555
|
+
root, the root may contain an object, an array or something \
|
556
|
+
else. */ \
|
557
|
+
if (depth >= pj.depth_capacity) { \
|
558
|
+
goto fail; \
|
559
|
+
} \
|
560
|
+
\
|
561
|
+
UPDATE_CHAR(); \
|
562
|
+
switch (c) { \
|
563
|
+
case '{': \
|
564
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
565
|
+
SET_GOTO_START_CONTINUE(); \
|
566
|
+
depth++; \
|
567
|
+
if (depth >= pj.depth_capacity) { \
|
568
|
+
goto fail; \
|
569
|
+
} \
|
570
|
+
pj.write_tape( \
|
571
|
+
0, \
|
572
|
+
c); /* strangely, moving this to object_begin slows things down */ \
|
573
|
+
goto object_begin; \
|
574
|
+
case '[': \
|
575
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
576
|
+
SET_GOTO_START_CONTINUE(); \
|
577
|
+
depth++; \
|
578
|
+
if (depth >= pj.depth_capacity) { \
|
579
|
+
goto fail; \
|
580
|
+
} \
|
581
|
+
pj.write_tape(0, c); \
|
582
|
+
goto array_begin; \
|
583
|
+
/* #define SIMDJSON_ALLOWANYTHINGINROOT \
|
584
|
+
* A JSON text is a serialized value. Note that certain previous \
|
585
|
+
* specifications of JSON constrained a JSON text to be an object or an \
|
586
|
+
* array. Implementations that generate only objects or arrays where a \
|
587
|
+
* JSON text is called for will be interoperable in the sense that all \
|
588
|
+
* implementations will accept these as conforming JSON texts. \
|
589
|
+
* https://tools.ietf.org/html/rfc8259 \
|
590
|
+
* #ifdef SIMDJSON_ALLOWANYTHINGINROOT */ \
|
591
|
+
case '"': { \
|
592
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
593
|
+
goto fail; \
|
594
|
+
} \
|
595
|
+
break; \
|
596
|
+
} \
|
597
|
+
case 't': { \
|
598
|
+
/* we need to make a copy to make sure that the string is space \
|
599
|
+
* terminated. \
|
600
|
+
* this only applies to the JSON document made solely of the true value. \
|
601
|
+
* this will almost never be called in practice */ \
|
602
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
603
|
+
if (copy == nullptr) { \
|
604
|
+
goto fail; \
|
605
|
+
} \
|
606
|
+
memcpy(copy, buf, len); \
|
607
|
+
copy[len] = ' '; \
|
608
|
+
if (!is_valid_true_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
609
|
+
idx)) { \
|
610
|
+
free(copy); \
|
611
|
+
goto fail; \
|
612
|
+
} \
|
613
|
+
free(copy); \
|
614
|
+
pj.write_tape(0, c); \
|
615
|
+
break; \
|
616
|
+
} \
|
617
|
+
case 'f': { \
|
618
|
+
/* we need to make a copy to make sure that the string is space \
|
619
|
+
* terminated. \
|
620
|
+
* this only applies to the JSON document made solely of the false \
|
621
|
+
* value. \
|
622
|
+
* this will almost never be called in practice */ \
|
623
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
624
|
+
if (copy == nullptr) { \
|
625
|
+
goto fail; \
|
626
|
+
} \
|
627
|
+
memcpy(copy, buf, len); \
|
628
|
+
copy[len] = ' '; \
|
629
|
+
if (!is_valid_false_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
630
|
+
idx)) { \
|
631
|
+
free(copy); \
|
632
|
+
goto fail; \
|
633
|
+
} \
|
634
|
+
free(copy); \
|
635
|
+
pj.write_tape(0, c); \
|
636
|
+
break; \
|
637
|
+
} \
|
638
|
+
case 'n': { \
|
639
|
+
/* we need to make a copy to make sure that the string is space \
|
640
|
+
* terminated. \
|
641
|
+
* this only applies to the JSON document made solely of the null value. \
|
642
|
+
* this will almost never be called in practice */ \
|
643
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
644
|
+
if (copy == nullptr) { \
|
645
|
+
goto fail; \
|
646
|
+
} \
|
647
|
+
memcpy(copy, buf, len); \
|
648
|
+
copy[len] = ' '; \
|
649
|
+
if (!is_valid_null_atom(reinterpret_cast<const uint8_t *>(copy) + \
|
650
|
+
idx)) { \
|
651
|
+
free(copy); \
|
652
|
+
goto fail; \
|
653
|
+
} \
|
654
|
+
free(copy); \
|
655
|
+
pj.write_tape(0, c); \
|
656
|
+
break; \
|
657
|
+
} \
|
658
|
+
case '0': \
|
659
|
+
case '1': \
|
660
|
+
case '2': \
|
661
|
+
case '3': \
|
662
|
+
case '4': \
|
663
|
+
case '5': \
|
664
|
+
case '6': \
|
665
|
+
case '7': \
|
666
|
+
case '8': \
|
667
|
+
case '9': { \
|
668
|
+
/* we need to make a copy to make sure that the string is space \
|
669
|
+
* terminated. \
|
670
|
+
* this is done only for JSON documents made of a sole number \
|
671
|
+
* this will almost never be called in practice. We terminate with a \
|
672
|
+
* space \
|
673
|
+
* because we do not want to allow NULLs in the middle of a number \
|
674
|
+
* (whereas a \
|
675
|
+
* space in the middle of a number would be identified in stage 1). */ \
|
676
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
677
|
+
if (copy == nullptr) { \
|
678
|
+
goto fail; \
|
679
|
+
} \
|
680
|
+
memcpy(copy, buf, len); \
|
681
|
+
copy[len] = ' '; \
|
682
|
+
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
|
683
|
+
false)) { \
|
684
|
+
free(copy); \
|
685
|
+
goto fail; \
|
686
|
+
} \
|
687
|
+
free(copy); \
|
688
|
+
break; \
|
689
|
+
} \
|
690
|
+
case '-': { \
|
691
|
+
/* we need to make a copy to make sure that the string is NULL \
|
692
|
+
* terminated. \
|
693
|
+
* this is done only for JSON documents made of a sole number \
|
694
|
+
* this will almost never be called in practice */ \
|
695
|
+
char *copy = static_cast<char *>(malloc(len + SIMDJSON_PADDING)); \
|
696
|
+
if (copy == nullptr) { \
|
697
|
+
goto fail; \
|
698
|
+
} \
|
699
|
+
memcpy(copy, buf, len); \
|
700
|
+
copy[len] = ' '; \
|
701
|
+
if (!parse_number(reinterpret_cast<const uint8_t *>(copy), pj, idx, \
|
702
|
+
true)) { \
|
703
|
+
free(copy); \
|
704
|
+
goto fail; \
|
705
|
+
} \
|
706
|
+
free(copy); \
|
707
|
+
break; \
|
708
|
+
} \
|
709
|
+
default: \
|
710
|
+
goto fail; \
|
711
|
+
} \
|
712
|
+
start_continue: \
|
713
|
+
/* the string might not be NULL terminated. */ \
|
714
|
+
if (i + 1 == pj.n_structural_indexes) { \
|
715
|
+
goto succeed; \
|
716
|
+
} else { \
|
717
|
+
goto fail; \
|
718
|
+
} \
|
719
|
+
/*//////////////////////////// OBJECT STATES ///////////////////////////*/ \
|
720
|
+
\
|
721
|
+
object_begin: \
|
722
|
+
UPDATE_CHAR(); \
|
723
|
+
switch (c) { \
|
724
|
+
case '"': { \
|
725
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
726
|
+
goto fail; \
|
727
|
+
} \
|
728
|
+
goto object_key_state; \
|
729
|
+
} \
|
730
|
+
case '}': \
|
731
|
+
goto scope_end; /* could also go to object_continue */ \
|
732
|
+
default: \
|
733
|
+
goto fail; \
|
734
|
+
} \
|
735
|
+
\
|
736
|
+
object_key_state: \
|
737
|
+
UPDATE_CHAR(); \
|
738
|
+
if (c != ':') { \
|
739
|
+
goto fail; \
|
740
|
+
} \
|
741
|
+
UPDATE_CHAR(); \
|
742
|
+
switch (c) { \
|
743
|
+
case '"': { \
|
744
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
745
|
+
goto fail; \
|
746
|
+
} \
|
747
|
+
break; \
|
748
|
+
} \
|
749
|
+
case 't': \
|
750
|
+
if (!is_valid_true_atom(buf + idx)) { \
|
751
|
+
goto fail; \
|
752
|
+
} \
|
753
|
+
pj.write_tape(0, c); \
|
754
|
+
break; \
|
755
|
+
case 'f': \
|
756
|
+
if (!is_valid_false_atom(buf + idx)) { \
|
757
|
+
goto fail; \
|
758
|
+
} \
|
759
|
+
pj.write_tape(0, c); \
|
760
|
+
break; \
|
761
|
+
case 'n': \
|
762
|
+
if (!is_valid_null_atom(buf + idx)) { \
|
763
|
+
goto fail; \
|
764
|
+
} \
|
765
|
+
pj.write_tape(0, c); \
|
766
|
+
break; \
|
767
|
+
case '0': \
|
768
|
+
case '1': \
|
769
|
+
case '2': \
|
770
|
+
case '3': \
|
771
|
+
case '4': \
|
772
|
+
case '5': \
|
773
|
+
case '6': \
|
774
|
+
case '7': \
|
775
|
+
case '8': \
|
776
|
+
case '9': { \
|
777
|
+
if (!parse_number(buf, pj, idx, false)) { \
|
778
|
+
goto fail; \
|
779
|
+
} \
|
780
|
+
break; \
|
781
|
+
} \
|
782
|
+
case '-': { \
|
783
|
+
if (!parse_number(buf, pj, idx, true)) { \
|
784
|
+
goto fail; \
|
785
|
+
} \
|
786
|
+
break; \
|
787
|
+
} \
|
788
|
+
case '{': { \
|
789
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
790
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
791
|
+
optimized */ \
|
792
|
+
/* we have not yet encountered } so we need to come back for it */ \
|
793
|
+
SET_GOTO_OBJECT_CONTINUE() \
|
794
|
+
/* we found an object inside an object, so we need to increment the \
|
795
|
+
* depth */ \
|
796
|
+
depth++; \
|
797
|
+
if (depth >= pj.depth_capacity) { \
|
798
|
+
goto fail; \
|
799
|
+
} \
|
800
|
+
\
|
801
|
+
goto object_begin; \
|
802
|
+
} \
|
803
|
+
case '[': { \
|
804
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
805
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
806
|
+
optimized */ \
|
807
|
+
/* we have not yet encountered } so we need to come back for it */ \
|
808
|
+
SET_GOTO_OBJECT_CONTINUE() \
|
809
|
+
/* we found an array inside an object, so we need to increment the depth \
|
810
|
+
*/ \
|
811
|
+
depth++; \
|
812
|
+
if (depth >= pj.depth_capacity) { \
|
813
|
+
goto fail; \
|
814
|
+
} \
|
815
|
+
goto array_begin; \
|
816
|
+
} \
|
817
|
+
default: \
|
818
|
+
goto fail; \
|
819
|
+
} \
|
820
|
+
\
|
821
|
+
object_continue: \
|
822
|
+
UPDATE_CHAR(); \
|
823
|
+
switch (c) { \
|
824
|
+
case ',': \
|
825
|
+
UPDATE_CHAR(); \
|
826
|
+
if (c != '"') { \
|
827
|
+
goto fail; \
|
828
|
+
} else { \
|
829
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
830
|
+
goto fail; \
|
831
|
+
} \
|
832
|
+
goto object_key_state; \
|
833
|
+
} \
|
834
|
+
case '}': \
|
835
|
+
goto scope_end; \
|
836
|
+
default: \
|
837
|
+
goto fail; \
|
838
|
+
} \
|
839
|
+
\
|
840
|
+
/*//////////////////////////// COMMON STATE ///////////////////////////*/ \
|
841
|
+
\
|
842
|
+
scope_end: \
|
843
|
+
/* write our tape location to the header scope */ \
|
844
|
+
depth--; \
|
845
|
+
pj.write_tape(pj.containing_scope_offset[depth], c); \
|
846
|
+
pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
|
847
|
+
pj.get_current_loc()); \
|
848
|
+
/* goto saved_state */ \
|
849
|
+
GOTO_CONTINUE() \
|
850
|
+
\
|
851
|
+
/*//////////////////////////// ARRAY STATES ///////////////////////////*/ \
|
852
|
+
array_begin: \
|
853
|
+
UPDATE_CHAR(); \
|
854
|
+
if (c == ']') { \
|
855
|
+
goto scope_end; /* could also go to array_continue */ \
|
856
|
+
} \
|
857
|
+
\
|
858
|
+
main_array_switch: \
|
859
|
+
/* we call update char on all paths in, so we can peek at c on the \
|
860
|
+
* on paths that can accept a close square brace (post-, and at start) */ \
|
861
|
+
switch (c) { \
|
862
|
+
case '"': { \
|
863
|
+
if (!parse_string<T>(buf, len, pj, depth, idx)) { \
|
864
|
+
goto fail; \
|
865
|
+
} \
|
866
|
+
break; \
|
867
|
+
} \
|
868
|
+
case 't': \
|
869
|
+
if (!is_valid_true_atom(buf + idx)) { \
|
870
|
+
goto fail; \
|
871
|
+
} \
|
872
|
+
pj.write_tape(0, c); \
|
873
|
+
break; \
|
874
|
+
case 'f': \
|
875
|
+
if (!is_valid_false_atom(buf + idx)) { \
|
876
|
+
goto fail; \
|
877
|
+
} \
|
878
|
+
pj.write_tape(0, c); \
|
879
|
+
break; \
|
880
|
+
case 'n': \
|
881
|
+
if (!is_valid_null_atom(buf + idx)) { \
|
882
|
+
goto fail; \
|
883
|
+
} \
|
884
|
+
pj.write_tape(0, c); \
|
885
|
+
break; /* goto array_continue; */ \
|
886
|
+
\
|
887
|
+
case '0': \
|
888
|
+
case '1': \
|
889
|
+
case '2': \
|
890
|
+
case '3': \
|
891
|
+
case '4': \
|
892
|
+
case '5': \
|
893
|
+
case '6': \
|
894
|
+
case '7': \
|
895
|
+
case '8': \
|
896
|
+
case '9': { \
|
897
|
+
if (!parse_number(buf, pj, idx, false)) { \
|
898
|
+
goto fail; \
|
899
|
+
} \
|
900
|
+
break; /* goto array_continue; */ \
|
901
|
+
} \
|
902
|
+
case '-': { \
|
903
|
+
if (!parse_number(buf, pj, idx, true)) { \
|
904
|
+
goto fail; \
|
905
|
+
} \
|
906
|
+
break; /* goto array_continue; */ \
|
907
|
+
} \
|
908
|
+
case '{': { \
|
909
|
+
/* we have not yet encountered ] so we need to come back for it */ \
|
910
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
911
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
912
|
+
optimized */ \
|
913
|
+
SET_GOTO_ARRAY_CONTINUE() \
|
914
|
+
/* we found an object inside an array, so we need to increment the depth \
|
915
|
+
*/ \
|
916
|
+
depth++; \
|
917
|
+
if (depth >= pj.depth_capacity) { \
|
918
|
+
goto fail; \
|
919
|
+
} \
|
920
|
+
\
|
921
|
+
goto object_begin; \
|
922
|
+
} \
|
923
|
+
case '[': { \
|
924
|
+
/* we have not yet encountered ] so we need to come back for it */ \
|
925
|
+
pj.containing_scope_offset[depth] = pj.get_current_loc(); \
|
926
|
+
pj.write_tape(0, c); /* here the compilers knows what c is so this gets \
|
927
|
+
optimized */ \
|
928
|
+
SET_GOTO_ARRAY_CONTINUE() \
|
929
|
+
/* we found an array inside an array, so we need to increment the depth \
|
930
|
+
*/ \
|
931
|
+
depth++; \
|
932
|
+
if (depth >= pj.depth_capacity) { \
|
933
|
+
goto fail; \
|
934
|
+
} \
|
935
|
+
goto array_begin; \
|
936
|
+
} \
|
937
|
+
default: \
|
938
|
+
goto fail; \
|
939
|
+
} \
|
940
|
+
\
|
941
|
+
array_continue: \
|
942
|
+
UPDATE_CHAR(); \
|
943
|
+
switch (c) { \
|
944
|
+
case ',': \
|
945
|
+
UPDATE_CHAR(); \
|
946
|
+
goto main_array_switch; \
|
947
|
+
case ']': \
|
948
|
+
goto scope_end; \
|
949
|
+
default: \
|
950
|
+
goto fail; \
|
951
|
+
} \
|
952
|
+
\
|
953
|
+
/*//////////////////////////// FINAL STATES ///////////////////////////*/ \
|
954
|
+
\
|
955
|
+
succeed: \
|
956
|
+
depth--; \
|
957
|
+
if (depth != 0) { \
|
958
|
+
fprintf(stderr, "internal bug\n"); \
|
959
|
+
abort(); \
|
960
|
+
} \
|
961
|
+
if (pj.containing_scope_offset[depth] != 0) { \
|
962
|
+
fprintf(stderr, "internal bug\n"); \
|
963
|
+
abort(); \
|
964
|
+
} \
|
965
|
+
pj.annotate_previous_loc(pj.containing_scope_offset[depth], \
|
966
|
+
pj.get_current_loc()); \
|
967
|
+
pj.write_tape(pj.containing_scope_offset[depth], 'r'); /* r is root */ \
|
968
|
+
\
|
969
|
+
pj.valid = true; \
|
970
|
+
pj.error_code = simdjson::SUCCESS; \
|
971
|
+
return pj.error_code; \
|
972
|
+
fail: \
|
973
|
+
/* we do not need the next line because this is done by pj.init(), \
|
974
|
+
* pessimistically. \
|
975
|
+
* pj.is_valid = false; \
|
976
|
+
* At this point in the code, we have all the time in the world. \
|
977
|
+
* Note that we know exactly where we are in the document so we could, \
|
978
|
+
* without any overhead on the processing code, report a specific \
|
979
|
+
* location. \
|
980
|
+
* We could even trigger special code paths to assess what happened \
|
981
|
+
* carefully, \
|
982
|
+
* all without any added cost. */ \
|
983
|
+
if (depth >= pj.depth_capacity) { \
|
984
|
+
pj.error_code = simdjson::DEPTH_ERROR; \
|
985
|
+
return pj.error_code; \
|
986
|
+
} \
|
987
|
+
switch (c) { \
|
988
|
+
case '"': \
|
989
|
+
pj.error_code = simdjson::STRING_ERROR; \
|
990
|
+
return pj.error_code; \
|
991
|
+
case '0': \
|
992
|
+
case '1': \
|
993
|
+
case '2': \
|
994
|
+
case '3': \
|
995
|
+
case '4': \
|
996
|
+
case '5': \
|
997
|
+
case '6': \
|
998
|
+
case '7': \
|
999
|
+
case '8': \
|
1000
|
+
case '9': \
|
1001
|
+
case '-': \
|
1002
|
+
pj.error_code = simdjson::NUMBER_ERROR; \
|
1003
|
+
return pj.error_code; \
|
1004
|
+
case 't': \
|
1005
|
+
pj.error_code = simdjson::T_ATOM_ERROR; \
|
1006
|
+
return pj.error_code; \
|
1007
|
+
case 'n': \
|
1008
|
+
pj.error_code = simdjson::N_ATOM_ERROR; \
|
1009
|
+
return pj.error_code; \
|
1010
|
+
case 'f': \
|
1011
|
+
pj.error_code = simdjson::F_ATOM_ERROR; \
|
1012
|
+
return pj.error_code; \
|
1013
|
+
default: \
|
1014
|
+
break; \
|
1015
|
+
} \
|
1016
|
+
pj.error_code = simdjson::TAPE_ERROR; \
|
1017
|
+
return pj.error_code; \
|
1018
|
+
}
|
1019
|
+
|
1020
|
+
} // namespace simdjson
|
1021
|
+
|
1022
|
+
#ifdef IS_X86_64
|
1023
|
+
TARGET_HASWELL
|
1024
|
+
namespace simdjson {
|
1025
|
+
template <>
|
1026
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
1027
|
+
unified_machine<Architecture::HASWELL>(const uint8_t *buf, size_t len,
|
1028
|
+
ParsedJson &pj) {
|
1029
|
+
UNIFIED_MACHINE(Architecture::HASWELL, buf, len, pj);
|
1030
|
+
}
|
1031
|
+
} // namespace simdjson
|
1032
|
+
UNTARGET_REGION
|
1033
|
+
|
1034
|
+
TARGET_WESTMERE
|
1035
|
+
namespace simdjson {
|
1036
|
+
template <>
|
1037
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
1038
|
+
unified_machine<Architecture::WESTMERE>(const uint8_t *buf, size_t len,
|
1039
|
+
ParsedJson &pj) {
|
1040
|
+
UNIFIED_MACHINE(Architecture::WESTMERE, buf, len, pj);
|
1041
|
+
}
|
1042
|
+
} // namespace simdjson
|
1043
|
+
UNTARGET_REGION
|
1044
|
+
#endif // IS_X86_64
|
1045
|
+
|
1046
|
+
#ifdef IS_ARM64
|
1047
|
+
namespace simdjson {
|
1048
|
+
template <>
|
1049
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
1050
|
+
unified_machine<Architecture::ARM64>(const uint8_t *buf, size_t len,
|
1051
|
+
ParsedJson &pj) {
|
1052
|
+
UNIFIED_MACHINE(Architecture::ARM64, buf, len, pj);
|
1053
|
+
}
|
1054
|
+
} // namespace simdjson
|
1055
|
+
#endif
|
1056
|
+
/* end file src/stage2_build_tape.cpp */
|
1057
|
+
/* begin file src/parsedjson.cpp */
|
1058
|
+
|
1059
|
+
namespace simdjson {
|
1060
|
+
ParsedJson::ParsedJson()
|
1061
|
+
: structural_indexes(nullptr), tape(nullptr),
|
1062
|
+
containing_scope_offset(nullptr), ret_address(nullptr),
|
1063
|
+
string_buf(nullptr), current_string_buf_loc(nullptr) {}
|
1064
|
+
|
1065
|
+
ParsedJson::~ParsedJson() { deallocate(); }
|
1066
|
+
|
1067
|
+
ParsedJson::ParsedJson(ParsedJson &&p)
|
1068
|
+
: byte_capacity(p.byte_capacity), depth_capacity(p.depth_capacity),
|
1069
|
+
tape_capacity(p.tape_capacity), string_capacity(p.string_capacity),
|
1070
|
+
current_loc(p.current_loc), n_structural_indexes(p.n_structural_indexes),
|
1071
|
+
structural_indexes(p.structural_indexes), tape(p.tape),
|
1072
|
+
containing_scope_offset(p.containing_scope_offset),
|
1073
|
+
ret_address(p.ret_address), string_buf(p.string_buf),
|
1074
|
+
current_string_buf_loc(p.current_string_buf_loc), valid(p.valid) {
|
1075
|
+
p.structural_indexes = nullptr;
|
1076
|
+
p.tape = nullptr;
|
1077
|
+
p.containing_scope_offset = nullptr;
|
1078
|
+
p.ret_address = nullptr;
|
1079
|
+
p.string_buf = nullptr;
|
1080
|
+
p.current_string_buf_loc = nullptr;
|
1081
|
+
}
|
1082
|
+
|
1083
|
+
WARN_UNUSED
|
1084
|
+
bool ParsedJson::allocate_capacity(size_t len, size_t max_depth) {
|
1085
|
+
if (max_depth <= 0) {
|
1086
|
+
max_depth = 1; // don't let the user allocate nothing
|
1087
|
+
}
|
1088
|
+
if (len <= 0) {
|
1089
|
+
len = 64; // allocating 0 bytes is wasteful.
|
1090
|
+
}
|
1091
|
+
if (len > SIMDJSON_MAXSIZE_BYTES) {
|
1092
|
+
return false;
|
1093
|
+
}
|
1094
|
+
if ((len <= byte_capacity) && (depth_capacity < max_depth)) {
|
1095
|
+
return true;
|
1096
|
+
}
|
1097
|
+
deallocate();
|
1098
|
+
valid = false;
|
1099
|
+
byte_capacity = 0; // will only set it to len after allocations are a success
|
1100
|
+
n_structural_indexes = 0;
|
1101
|
+
uint32_t max_structures = ROUNDUP_N(len, 64) + 2 + 7;
|
1102
|
+
structural_indexes = new (std::nothrow) uint32_t[max_structures];
|
1103
|
+
// a pathological input like "[[[[..." would generate len tape elements, so
|
1104
|
+
// need a capacity of len + 1
|
1105
|
+
size_t local_tape_capacity = ROUNDUP_N(len + 1, 64);
|
1106
|
+
// a document with only zero-length strings... could have len/3 string
|
1107
|
+
// and we would need len/3 * 5 bytes on the string buffer
|
1108
|
+
size_t local_string_capacity = ROUNDUP_N(5 * len / 3 + 32, 64);
|
1109
|
+
string_buf = new (std::nothrow) uint8_t[local_string_capacity];
|
1110
|
+
tape = new (std::nothrow) uint64_t[local_tape_capacity];
|
1111
|
+
containing_scope_offset = new (std::nothrow) uint32_t[max_depth];
|
1112
|
+
#ifdef SIMDJSON_USE_COMPUTED_GOTO
|
1113
|
+
ret_address = new (std::nothrow) void *[max_depth];
|
1114
|
+
#else
|
1115
|
+
ret_address = new (std::nothrow) char[max_depth];
|
1116
|
+
#endif
|
1117
|
+
if ((string_buf == nullptr) || (tape == nullptr) ||
|
1118
|
+
(containing_scope_offset == nullptr) || (ret_address == nullptr) ||
|
1119
|
+
(structural_indexes == nullptr)) {
|
1120
|
+
std::cerr << "Could not allocate memory" << std::endl;
|
1121
|
+
delete[] ret_address;
|
1122
|
+
delete[] containing_scope_offset;
|
1123
|
+
delete[] tape;
|
1124
|
+
delete[] string_buf;
|
1125
|
+
delete[] structural_indexes;
|
1126
|
+
|
1127
|
+
return false;
|
1128
|
+
}
|
1129
|
+
/*
|
1130
|
+
// We do not need to initialize this content for parsing, though we could
|
1131
|
+
// need to initialize it for safety.
|
1132
|
+
memset(string_buf, 0 , local_string_capacity);
|
1133
|
+
memset(structural_indexes, 0, max_structures * sizeof(uint32_t));
|
1134
|
+
memset(tape, 0, local_tape_capacity * sizeof(uint64_t));
|
1135
|
+
*/
|
1136
|
+
byte_capacity = len;
|
1137
|
+
depth_capacity = max_depth;
|
1138
|
+
tape_capacity = local_tape_capacity;
|
1139
|
+
string_capacity = local_string_capacity;
|
1140
|
+
return true;
|
1141
|
+
}
|
1142
|
+
|
1143
|
+
bool ParsedJson::is_valid() const { return valid; }
|
1144
|
+
|
1145
|
+
int ParsedJson::get_error_code() const { return error_code; }
|
1146
|
+
|
1147
|
+
std::string ParsedJson::get_error_message() const {
|
1148
|
+
return error_message(error_code);
|
1149
|
+
}
|
1150
|
+
|
1151
|
+
void ParsedJson::deallocate() {
|
1152
|
+
byte_capacity = 0;
|
1153
|
+
depth_capacity = 0;
|
1154
|
+
tape_capacity = 0;
|
1155
|
+
string_capacity = 0;
|
1156
|
+
delete[] ret_address;
|
1157
|
+
delete[] containing_scope_offset;
|
1158
|
+
delete[] tape;
|
1159
|
+
delete[] string_buf;
|
1160
|
+
delete[] structural_indexes;
|
1161
|
+
valid = false;
|
1162
|
+
}
|
1163
|
+
|
1164
|
+
void ParsedJson::init() {
|
1165
|
+
current_string_buf_loc = string_buf;
|
1166
|
+
current_loc = 0;
|
1167
|
+
valid = false;
|
1168
|
+
}
|
1169
|
+
|
1170
|
+
WARN_UNUSED
|
1171
|
+
bool ParsedJson::print_json(std::ostream &os) {
|
1172
|
+
if (!valid) {
|
1173
|
+
return false;
|
1174
|
+
}
|
1175
|
+
uint32_t string_length;
|
1176
|
+
size_t tape_idx = 0;
|
1177
|
+
uint64_t tape_val = tape[tape_idx];
|
1178
|
+
uint8_t type = (tape_val >> 56);
|
1179
|
+
size_t how_many = 0;
|
1180
|
+
if (type == 'r') {
|
1181
|
+
how_many = tape_val & JSON_VALUE_MASK;
|
1182
|
+
} else {
|
1183
|
+
fprintf(stderr, "Error: no starting root node?");
|
1184
|
+
return false;
|
1185
|
+
}
|
1186
|
+
if (how_many > tape_capacity) {
|
1187
|
+
fprintf(
|
1188
|
+
stderr,
|
1189
|
+
"We may be exceeding the tape capacity. Is this a valid document?\n");
|
1190
|
+
return false;
|
1191
|
+
}
|
1192
|
+
tape_idx++;
|
1193
|
+
bool *in_object = new bool[depth_capacity];
|
1194
|
+
auto *in_object_idx = new size_t[depth_capacity];
|
1195
|
+
int depth = 1; // only root at level 0
|
1196
|
+
in_object_idx[depth] = 0;
|
1197
|
+
in_object[depth] = false;
|
1198
|
+
for (; tape_idx < how_many; tape_idx++) {
|
1199
|
+
tape_val = tape[tape_idx];
|
1200
|
+
uint64_t payload = tape_val & JSON_VALUE_MASK;
|
1201
|
+
type = (tape_val >> 56);
|
1202
|
+
if (!in_object[depth]) {
|
1203
|
+
if ((in_object_idx[depth] > 0) && (type != ']')) {
|
1204
|
+
os << ",";
|
1205
|
+
}
|
1206
|
+
in_object_idx[depth]++;
|
1207
|
+
} else { // if (in_object) {
|
1208
|
+
if ((in_object_idx[depth] > 0) && ((in_object_idx[depth] & 1) == 0) &&
|
1209
|
+
(type != '}')) {
|
1210
|
+
os << ",";
|
1211
|
+
}
|
1212
|
+
if (((in_object_idx[depth] & 1) == 1)) {
|
1213
|
+
os << ":";
|
1214
|
+
}
|
1215
|
+
in_object_idx[depth]++;
|
1216
|
+
}
|
1217
|
+
switch (type) {
|
1218
|
+
case '"': // we have a string
|
1219
|
+
os << '"';
|
1220
|
+
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
1221
|
+
print_with_escapes(
|
1222
|
+
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
1223
|
+
string_length);
|
1224
|
+
os << '"';
|
1225
|
+
break;
|
1226
|
+
case 'l': // we have a long int
|
1227
|
+
if (tape_idx + 1 >= how_many) {
|
1228
|
+
delete[] in_object;
|
1229
|
+
delete[] in_object_idx;
|
1230
|
+
return false;
|
1231
|
+
}
|
1232
|
+
os << static_cast<int64_t>(tape[++tape_idx]);
|
1233
|
+
break;
|
1234
|
+
case 'd': // we have a double
|
1235
|
+
if (tape_idx + 1 >= how_many) {
|
1236
|
+
delete[] in_object;
|
1237
|
+
delete[] in_object_idx;
|
1238
|
+
return false;
|
1239
|
+
}
|
1240
|
+
double answer;
|
1241
|
+
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
1242
|
+
os << answer;
|
1243
|
+
break;
|
1244
|
+
case 'n': // we have a null
|
1245
|
+
os << "null";
|
1246
|
+
break;
|
1247
|
+
case 't': // we have a true
|
1248
|
+
os << "true";
|
1249
|
+
break;
|
1250
|
+
case 'f': // we have a false
|
1251
|
+
os << "false";
|
1252
|
+
break;
|
1253
|
+
case '{': // we have an object
|
1254
|
+
os << '{';
|
1255
|
+
depth++;
|
1256
|
+
in_object[depth] = true;
|
1257
|
+
in_object_idx[depth] = 0;
|
1258
|
+
break;
|
1259
|
+
case '}': // we end an object
|
1260
|
+
depth--;
|
1261
|
+
os << '}';
|
1262
|
+
break;
|
1263
|
+
case '[': // we start an array
|
1264
|
+
os << '[';
|
1265
|
+
depth++;
|
1266
|
+
in_object[depth] = false;
|
1267
|
+
in_object_idx[depth] = 0;
|
1268
|
+
break;
|
1269
|
+
case ']': // we end an array
|
1270
|
+
depth--;
|
1271
|
+
os << ']';
|
1272
|
+
break;
|
1273
|
+
case 'r': // we start and end with the root node
|
1274
|
+
fprintf(stderr, "should we be hitting the root node?\n");
|
1275
|
+
delete[] in_object;
|
1276
|
+
delete[] in_object_idx;
|
1277
|
+
return false;
|
1278
|
+
default:
|
1279
|
+
fprintf(stderr, "bug %c\n", type);
|
1280
|
+
delete[] in_object;
|
1281
|
+
delete[] in_object_idx;
|
1282
|
+
return false;
|
1283
|
+
}
|
1284
|
+
}
|
1285
|
+
delete[] in_object;
|
1286
|
+
delete[] in_object_idx;
|
1287
|
+
return true;
|
1288
|
+
}
|
1289
|
+
|
1290
|
+
WARN_UNUSED
|
1291
|
+
bool ParsedJson::dump_raw_tape(std::ostream &os) {
|
1292
|
+
if (!valid) {
|
1293
|
+
return false;
|
1294
|
+
}
|
1295
|
+
uint32_t string_length;
|
1296
|
+
size_t tape_idx = 0;
|
1297
|
+
uint64_t tape_val = tape[tape_idx];
|
1298
|
+
uint8_t type = (tape_val >> 56);
|
1299
|
+
os << tape_idx << " : " << type;
|
1300
|
+
tape_idx++;
|
1301
|
+
size_t how_many = 0;
|
1302
|
+
if (type == 'r') {
|
1303
|
+
how_many = tape_val & JSON_VALUE_MASK;
|
1304
|
+
} else {
|
1305
|
+
fprintf(stderr, "Error: no starting root node?");
|
1306
|
+
return false;
|
1307
|
+
}
|
1308
|
+
os << "\t// pointing to " << how_many << " (right after last node)\n";
|
1309
|
+
uint64_t payload;
|
1310
|
+
for (; tape_idx < how_many; tape_idx++) {
|
1311
|
+
os << tape_idx << " : ";
|
1312
|
+
tape_val = tape[tape_idx];
|
1313
|
+
payload = tape_val & JSON_VALUE_MASK;
|
1314
|
+
type = (tape_val >> 56);
|
1315
|
+
switch (type) {
|
1316
|
+
case '"': // we have a string
|
1317
|
+
os << "string \"";
|
1318
|
+
memcpy(&string_length, string_buf + payload, sizeof(uint32_t));
|
1319
|
+
print_with_escapes(
|
1320
|
+
(const unsigned char *)(string_buf + payload + sizeof(uint32_t)),
|
1321
|
+
string_length);
|
1322
|
+
os << '"';
|
1323
|
+
os << '\n';
|
1324
|
+
break;
|
1325
|
+
case 'l': // we have a long int
|
1326
|
+
if (tape_idx + 1 >= how_many) {
|
1327
|
+
return false;
|
1328
|
+
}
|
1329
|
+
os << "integer " << static_cast<int64_t>(tape[++tape_idx]) << "\n";
|
1330
|
+
break;
|
1331
|
+
case 'd': // we have a double
|
1332
|
+
os << "float ";
|
1333
|
+
if (tape_idx + 1 >= how_many) {
|
1334
|
+
return false;
|
1335
|
+
}
|
1336
|
+
double answer;
|
1337
|
+
memcpy(&answer, &tape[++tape_idx], sizeof(answer));
|
1338
|
+
os << answer << '\n';
|
1339
|
+
break;
|
1340
|
+
case 'n': // we have a null
|
1341
|
+
os << "null\n";
|
1342
|
+
break;
|
1343
|
+
case 't': // we have a true
|
1344
|
+
os << "true\n";
|
1345
|
+
break;
|
1346
|
+
case 'f': // we have a false
|
1347
|
+
os << "false\n";
|
1348
|
+
break;
|
1349
|
+
case '{': // we have an object
|
1350
|
+
os << "{\t// pointing to next tape location " << payload
|
1351
|
+
<< " (first node after the scope) \n";
|
1352
|
+
break;
|
1353
|
+
case '}': // we end an object
|
1354
|
+
os << "}\t// pointing to previous tape location " << payload
|
1355
|
+
<< " (start of the scope) \n";
|
1356
|
+
break;
|
1357
|
+
case '[': // we start an array
|
1358
|
+
os << "[\t// pointing to next tape location " << payload
|
1359
|
+
<< " (first node after the scope) \n";
|
1360
|
+
break;
|
1361
|
+
case ']': // we end an array
|
1362
|
+
os << "]\t// pointing to previous tape location " << payload
|
1363
|
+
<< " (start of the scope) \n";
|
1364
|
+
break;
|
1365
|
+
case 'r': // we start and end with the root node
|
1366
|
+
printf("end of root\n");
|
1367
|
+
return false;
|
1368
|
+
default:
|
1369
|
+
return false;
|
1370
|
+
}
|
1371
|
+
}
|
1372
|
+
tape_val = tape[tape_idx];
|
1373
|
+
payload = tape_val & JSON_VALUE_MASK;
|
1374
|
+
type = (tape_val >> 56);
|
1375
|
+
os << tape_idx << " : " << type << "\t// pointing to " << payload
|
1376
|
+
<< " (start root)\n";
|
1377
|
+
return true;
|
1378
|
+
}
|
1379
|
+
} // namespace simdjson
|
1380
|
+
/* end file src/parsedjson.cpp */
|
1381
|
+
/* begin file src/parsedjsoniterator.cpp */
|
1382
|
+
#include <iterator>
|
1383
|
+
|
1384
|
+
namespace simdjson {
|
1385
|
+
ParsedJson::Iterator::Iterator(ParsedJson &pj_)
|
1386
|
+
: pj(pj_), depth(0), location(0), tape_length(0), depth_index(nullptr) {
|
1387
|
+
if (!pj.is_valid()) {
|
1388
|
+
throw InvalidJSON();
|
1389
|
+
}
|
1390
|
+
// we overallocate by "1" to silence a warning in Visual Studio
|
1391
|
+
depth_index = new scopeindex_t[pj.depth_capacity + 1];
|
1392
|
+
// memory allocation would throw
|
1393
|
+
// if(depth_index == nullptr) {
|
1394
|
+
// return;
|
1395
|
+
//}
|
1396
|
+
depth_index[0].start_of_scope = location;
|
1397
|
+
current_val = pj.tape[location++];
|
1398
|
+
current_type = (current_val >> 56);
|
1399
|
+
depth_index[0].scope_type = current_type;
|
1400
|
+
if (current_type == 'r') {
|
1401
|
+
tape_length = current_val & JSON_VALUE_MASK;
|
1402
|
+
if (location < tape_length) {
|
1403
|
+
// If we make it here, then depth_capacity must >=2, but the compiler
|
1404
|
+
// may not know this.
|
1405
|
+
current_val = pj.tape[location];
|
1406
|
+
current_type = (current_val >> 56);
|
1407
|
+
depth++;
|
1408
|
+
depth_index[depth].start_of_scope = location;
|
1409
|
+
depth_index[depth].scope_type = current_type;
|
1410
|
+
}
|
1411
|
+
} else {
|
1412
|
+
// should never happen
|
1413
|
+
throw InvalidJSON();
|
1414
|
+
}
|
1415
|
+
}
|
1416
|
+
|
1417
|
+
ParsedJson::Iterator::~Iterator() { delete[] depth_index; }
|
1418
|
+
|
1419
|
+
ParsedJson::Iterator::Iterator(const Iterator &o) noexcept
|
1420
|
+
: pj(o.pj), depth(o.depth), location(o.location), tape_length(0),
|
1421
|
+
current_type(o.current_type), current_val(o.current_val),
|
1422
|
+
depth_index(nullptr) {
|
1423
|
+
depth_index = new scopeindex_t[pj.depth_capacity];
|
1424
|
+
// allocation might throw
|
1425
|
+
memcpy(depth_index, o.depth_index,
|
1426
|
+
pj.depth_capacity * sizeof(depth_index[0]));
|
1427
|
+
tape_length = o.tape_length;
|
1428
|
+
}
|
1429
|
+
|
1430
|
+
ParsedJson::Iterator::Iterator(Iterator &&o) noexcept
|
1431
|
+
: pj(o.pj), depth(o.depth), location(o.location),
|
1432
|
+
tape_length(o.tape_length), current_type(o.current_type),
|
1433
|
+
current_val(o.current_val), depth_index(o.depth_index) {
|
1434
|
+
o.depth_index = nullptr; // we take ownership
|
1435
|
+
}
|
1436
|
+
|
1437
|
+
bool ParsedJson::Iterator::print(std::ostream &os, bool escape_strings) const {
|
1438
|
+
if (!is_ok()) {
|
1439
|
+
return false;
|
1440
|
+
}
|
1441
|
+
switch (current_type) {
|
1442
|
+
case '"': // we have a string
|
1443
|
+
os << '"';
|
1444
|
+
if (escape_strings) {
|
1445
|
+
print_with_escapes(get_string(), os, get_string_length());
|
1446
|
+
} else {
|
1447
|
+
// was: os << get_string();, but given that we can include null chars, we
|
1448
|
+
// have to do something crazier:
|
1449
|
+
std::copy(get_string(), get_string() + get_string_length(),
|
1450
|
+
std::ostream_iterator<char>(os));
|
1451
|
+
}
|
1452
|
+
os << '"';
|
1453
|
+
break;
|
1454
|
+
case 'l': // we have a long int
|
1455
|
+
os << get_integer();
|
1456
|
+
break;
|
1457
|
+
case 'd':
|
1458
|
+
os << get_double();
|
1459
|
+
break;
|
1460
|
+
case 'n': // we have a null
|
1461
|
+
os << "null";
|
1462
|
+
break;
|
1463
|
+
case 't': // we have a true
|
1464
|
+
os << "true";
|
1465
|
+
break;
|
1466
|
+
case 'f': // we have a false
|
1467
|
+
os << "false";
|
1468
|
+
break;
|
1469
|
+
case '{': // we have an object
|
1470
|
+
case '}': // we end an object
|
1471
|
+
case '[': // we start an array
|
1472
|
+
case ']': // we end an array
|
1473
|
+
os << static_cast<char>(current_type);
|
1474
|
+
break;
|
1475
|
+
default:
|
1476
|
+
return false;
|
1477
|
+
}
|
1478
|
+
return true;
|
1479
|
+
}
|
1480
|
+
|
1481
|
+
bool ParsedJson::Iterator::move_to(const char *pointer, uint32_t length) {
|
1482
|
+
char *new_pointer = nullptr;
|
1483
|
+
if (pointer[0] == '#') {
|
1484
|
+
// Converting fragment representation to string representation
|
1485
|
+
new_pointer = new char[length];
|
1486
|
+
uint32_t new_length = 0;
|
1487
|
+
for (uint32_t i = 1; i < length; i++) {
|
1488
|
+
if (pointer[i] == '%' && pointer[i + 1] == 'x') {
|
1489
|
+
try {
|
1490
|
+
int fragment =
|
1491
|
+
std::stoi(std::string(&pointer[i + 2], 2), nullptr, 16);
|
1492
|
+
if (fragment == '\\' || fragment == '"' || (fragment <= 0x1F)) {
|
1493
|
+
// escaping the character
|
1494
|
+
new_pointer[new_length] = '\\';
|
1495
|
+
new_length++;
|
1496
|
+
}
|
1497
|
+
new_pointer[new_length] = fragment;
|
1498
|
+
i += 3;
|
1499
|
+
} catch (std::invalid_argument &) {
|
1500
|
+
delete[] new_pointer;
|
1501
|
+
return false; // the fragment is invalid
|
1502
|
+
}
|
1503
|
+
} else {
|
1504
|
+
new_pointer[new_length] = pointer[i];
|
1505
|
+
}
|
1506
|
+
new_length++;
|
1507
|
+
}
|
1508
|
+
length = new_length;
|
1509
|
+
pointer = new_pointer;
|
1510
|
+
}
|
1511
|
+
|
1512
|
+
// saving the current state
|
1513
|
+
size_t depth_s = depth;
|
1514
|
+
size_t location_s = location;
|
1515
|
+
uint8_t current_type_s = current_type;
|
1516
|
+
uint64_t current_val_s = current_val;
|
1517
|
+
scopeindex_t *depth_index_s = depth_index;
|
1518
|
+
|
1519
|
+
rewind(); // The json pointer is used from the root of the document.
|
1520
|
+
|
1521
|
+
bool found = relative_move_to(pointer, length);
|
1522
|
+
delete[] new_pointer;
|
1523
|
+
|
1524
|
+
if (!found) {
|
1525
|
+
// since the pointer has found nothing, we get back to the original
|
1526
|
+
// position.
|
1527
|
+
depth = depth_s;
|
1528
|
+
location = location_s;
|
1529
|
+
current_type = current_type_s;
|
1530
|
+
current_val = current_val_s;
|
1531
|
+
depth_index = depth_index_s;
|
1532
|
+
}
|
1533
|
+
|
1534
|
+
return found;
|
1535
|
+
}
|
1536
|
+
|
1537
|
+
bool ParsedJson::Iterator::relative_move_to(const char *pointer,
|
1538
|
+
uint32_t length) {
|
1539
|
+
if (length == 0) {
|
1540
|
+
// returns the whole document
|
1541
|
+
return true;
|
1542
|
+
}
|
1543
|
+
|
1544
|
+
if (pointer[0] != '/') {
|
1545
|
+
// '/' must be the first character
|
1546
|
+
return false;
|
1547
|
+
}
|
1548
|
+
|
1549
|
+
// finding the key in an object or the index in an array
|
1550
|
+
std::string key_or_index;
|
1551
|
+
uint32_t offset = 1;
|
1552
|
+
|
1553
|
+
// checking for the "-" case
|
1554
|
+
if (is_array() && pointer[1] == '-') {
|
1555
|
+
if (length != 2) {
|
1556
|
+
// the pointer must be exactly "/-"
|
1557
|
+
// there can't be anything more after '-' as an index
|
1558
|
+
return false;
|
1559
|
+
}
|
1560
|
+
key_or_index = '-';
|
1561
|
+
offset = length; // will skip the loop coming right after
|
1562
|
+
}
|
1563
|
+
|
1564
|
+
// We either transform the first reference token to a valid json key
|
1565
|
+
// or we make sure it is a valid index in an array.
|
1566
|
+
for (; offset < length; offset++) {
|
1567
|
+
if (pointer[offset] == '/') {
|
1568
|
+
// beginning of the next key or index
|
1569
|
+
break;
|
1570
|
+
}
|
1571
|
+
if (is_array() && (pointer[offset] < '0' || pointer[offset] > '9')) {
|
1572
|
+
// the index of an array must be an integer
|
1573
|
+
// we also make sure std::stoi won't discard whitespaces later
|
1574
|
+
return false;
|
1575
|
+
}
|
1576
|
+
if (pointer[offset] == '~') {
|
1577
|
+
// "~1" represents "/"
|
1578
|
+
if (pointer[offset + 1] == '1') {
|
1579
|
+
key_or_index += '/';
|
1580
|
+
offset++;
|
1581
|
+
continue;
|
1582
|
+
}
|
1583
|
+
// "~0" represents "~"
|
1584
|
+
if (pointer[offset + 1] == '0') {
|
1585
|
+
key_or_index += '~';
|
1586
|
+
offset++;
|
1587
|
+
continue;
|
1588
|
+
}
|
1589
|
+
}
|
1590
|
+
if (pointer[offset] == '\\') {
|
1591
|
+
if (pointer[offset + 1] == '\\' || pointer[offset + 1] == '"' ||
|
1592
|
+
(pointer[offset + 1] <= 0x1F)) {
|
1593
|
+
key_or_index += pointer[offset + 1];
|
1594
|
+
offset++;
|
1595
|
+
continue;
|
1596
|
+
}
|
1597
|
+
return false; // invalid escaped character
|
1598
|
+
}
|
1599
|
+
if (pointer[offset] == '\"') {
|
1600
|
+
// unescaped quote character. this is an invalid case.
|
1601
|
+
// lets do nothing and assume most pointers will be valid.
|
1602
|
+
// it won't find any corresponding json key anyway.
|
1603
|
+
// return false;
|
1604
|
+
}
|
1605
|
+
key_or_index += pointer[offset];
|
1606
|
+
}
|
1607
|
+
|
1608
|
+
bool found = false;
|
1609
|
+
if (is_object()) {
|
1610
|
+
if (move_to_key(key_or_index.c_str(), key_or_index.length())) {
|
1611
|
+
found = relative_move_to(pointer + offset, length - offset);
|
1612
|
+
}
|
1613
|
+
} else if (is_array()) {
|
1614
|
+
if (key_or_index == "-") { // handling "-" case first
|
1615
|
+
if (down()) {
|
1616
|
+
while (next())
|
1617
|
+
; // moving to the end of the array
|
1618
|
+
// moving to the nonexistent value right after...
|
1619
|
+
size_t npos;
|
1620
|
+
if ((current_type == '[') || (current_type == '{')) {
|
1621
|
+
// we need to jump
|
1622
|
+
npos = (current_val & JSON_VALUE_MASK);
|
1623
|
+
} else {
|
1624
|
+
npos =
|
1625
|
+
location + ((current_type == 'd' || current_type == 'l') ? 2 : 1);
|
1626
|
+
}
|
1627
|
+
location = npos;
|
1628
|
+
current_val = pj.tape[npos];
|
1629
|
+
current_type = (current_val >> 56);
|
1630
|
+
return true; // how could it fail ?
|
1631
|
+
}
|
1632
|
+
} else { // regular numeric index
|
1633
|
+
// The index can't have a leading '0'
|
1634
|
+
if (key_or_index[0] == '0' && key_or_index.length() > 1) {
|
1635
|
+
return false;
|
1636
|
+
}
|
1637
|
+
// it cannot be empty
|
1638
|
+
if (key_or_index.length() == 0) {
|
1639
|
+
return false;
|
1640
|
+
}
|
1641
|
+
// we already checked the index contains only valid digits
|
1642
|
+
uint32_t index = std::stoi(key_or_index);
|
1643
|
+
if (move_to_index(index)) {
|
1644
|
+
found = relative_move_to(pointer + offset, length - offset);
|
1645
|
+
}
|
1646
|
+
}
|
1647
|
+
}
|
1648
|
+
|
1649
|
+
return found;
|
1650
|
+
}
|
1651
|
+
} // namespace simdjson
|
1652
|
+
/* end file src/parsedjsoniterator.cpp */
|