simdjson 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,239 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE1_FIND_MARKS_MACROS_H
|
2
|
+
#define SIMDJSON_STAGE1_FIND_MARKS_MACROS_H
|
3
|
+
|
4
|
+
// return a bitvector indicating where we have characters that end an odd-length
|
5
|
+
// sequence of backslashes (and thus change the behavior of the next character
|
6
|
+
// to follow). A even-length sequence of backslashes, and, for that matter, the
|
7
|
+
// largest even-length prefix of our odd-length sequence of backslashes, simply
|
8
|
+
// modify the behavior of the backslashes themselves.
|
9
|
+
// We also update the prev_iter_ends_odd_backslash reference parameter to
|
10
|
+
// indicate whether we end an iteration on an odd-length sequence of
|
11
|
+
// backslashes, which modifies our subsequent search for odd-length
|
12
|
+
// sequences of backslashes in an obvious way.
|
13
|
+
// We need to compile that code for multiple architectures. However, target
|
14
|
+
// attributes can be used only once by function definition. Huge macro seemed
|
15
|
+
// better than huge code duplication. uint64_t
|
16
|
+
// FIND_ODD_BACKSLASH_SEQUENCES(Architecture T, simd_input<T> in, uint64_t
|
17
|
+
// &prev_iter_ends_odd_backslash)
|
18
|
+
#define FIND_ODD_BACKSLASH_SEQUENCES(T, in, prev_iter_ends_odd_backslash) \
|
19
|
+
{ \
|
20
|
+
const uint64_t even_bits = 0x5555555555555555ULL; \
|
21
|
+
const uint64_t odd_bits = ~even_bits; \
|
22
|
+
uint64_t bs_bits = cmp_mask_against_input<T>(in, '\\'); \
|
23
|
+
uint64_t start_edges = bs_bits & ~(bs_bits << 1); \
|
24
|
+
/* flip lowest if we have an odd-length run at the end of the prior \
|
25
|
+
* iteration */ \
|
26
|
+
uint64_t even_start_mask = even_bits ^ prev_iter_ends_odd_backslash; \
|
27
|
+
uint64_t even_starts = start_edges & even_start_mask; \
|
28
|
+
uint64_t odd_starts = start_edges & ~even_start_mask; \
|
29
|
+
uint64_t even_carries = bs_bits + even_starts; \
|
30
|
+
\
|
31
|
+
uint64_t odd_carries; \
|
32
|
+
/* must record the carry-out of our odd-carries out of bit 63; this \
|
33
|
+
* indicates whether the sense of any edge going to the next iteration \
|
34
|
+
* should be flipped */ \
|
35
|
+
bool iter_ends_odd_backslash = \
|
36
|
+
add_overflow(bs_bits, odd_starts, &odd_carries); \
|
37
|
+
\
|
38
|
+
odd_carries |= prev_iter_ends_odd_backslash; /* push in bit zero as a \
|
39
|
+
* potential end if we had an \
|
40
|
+
* odd-numbered run at the \
|
41
|
+
* end of the previous \
|
42
|
+
* iteration */ \
|
43
|
+
prev_iter_ends_odd_backslash = iter_ends_odd_backslash ? 0x1ULL : 0x0ULL; \
|
44
|
+
uint64_t even_carry_ends = even_carries & ~bs_bits; \
|
45
|
+
uint64_t odd_carry_ends = odd_carries & ~bs_bits; \
|
46
|
+
uint64_t even_start_odd_end = even_carry_ends & odd_bits; \
|
47
|
+
uint64_t odd_start_even_end = odd_carry_ends & even_bits; \
|
48
|
+
uint64_t odd_ends = even_start_odd_end | odd_start_even_end; \
|
49
|
+
return odd_ends; \
|
50
|
+
}
|
51
|
+
|
52
|
+
// return both the quote mask (which is a half-open mask that covers the first
|
53
|
+
// quote
|
54
|
+
// in an unescaped quote pair and everything in the quote pair) and the quote
|
55
|
+
// bits, which are the simple
|
56
|
+
// unescaped quoted bits. We also update the prev_iter_inside_quote value to
|
57
|
+
// tell the next iteration
|
58
|
+
// whether we finished the final iteration inside a quote pair; if so, this
|
59
|
+
// inverts our behavior of
|
60
|
+
// whether we're inside quotes for the next iteration.
|
61
|
+
// Note that we don't do any error checking to see if we have backslash
|
62
|
+
// sequences outside quotes; these
|
63
|
+
// backslash sequences (of any length) will be detected elsewhere.
|
64
|
+
// We need to compile that code for multiple architectures. However, target
|
65
|
+
// attributes can be used only once by function definition. Huge macro seemed
|
66
|
+
// better than huge code duplication. uint64_t
|
67
|
+
// FIND_QUOTE_MASK_AND_BITS(Architecture T, simd_input<T> in, uint64_t odd_ends,
|
68
|
+
// uint64_t &prev_iter_inside_quote, uint64_t "e_bits, uint64_t
|
69
|
+
// &error_mask)
|
70
|
+
#define FIND_QUOTE_MASK_AND_BITS(T, in, odd_ends, prev_iter_inside_quote, \
|
71
|
+
quote_bits, error_mask) \
|
72
|
+
{ \
|
73
|
+
quote_bits = cmp_mask_against_input<T>(in, '"'); \
|
74
|
+
quote_bits = quote_bits & ~odd_ends; \
|
75
|
+
uint64_t quote_mask = compute_quote_mask<T>(quote_bits); \
|
76
|
+
quote_mask ^= prev_iter_inside_quote; \
|
77
|
+
/* All Unicode characters may be placed within the \
|
78
|
+
* quotation marks, except for the characters that MUST be escaped: \
|
79
|
+
* quotation mark, reverse solidus, and the control characters (U+0000 \
|
80
|
+
* through U+001F). \
|
81
|
+
* https://tools.ietf.org/html/rfc8259 */ \
|
82
|
+
uint64_t unescaped = unsigned_lteq_against_input<T>(in, 0x1F); \
|
83
|
+
error_mask |= quote_mask & unescaped; \
|
84
|
+
/* right shift of a signed value expected to be well-defined and standard \
|
85
|
+
* compliant as of C++20, \
|
86
|
+
* John Regher from Utah U. says this is fine code */ \
|
87
|
+
prev_iter_inside_quote = \
|
88
|
+
static_cast<uint64_t>(static_cast<int64_t>(quote_mask) >> 63); \
|
89
|
+
return quote_mask; \
|
90
|
+
}
|
91
|
+
|
92
|
+
// Find structural bits in a 64-byte chunk.
|
93
|
+
// We need to compile that code for multiple architectures. However, target
|
94
|
+
// attributes can be used only once by function definition. Huge macro seemed
|
95
|
+
// better than huge code duplication. void FIND_STRUCTURAL_BITS_64(
|
96
|
+
// Architecture T,
|
97
|
+
// const uint8_t *buf,
|
98
|
+
// size_t idx,
|
99
|
+
// uint32_t *base_ptr,
|
100
|
+
// uint32_t &base,
|
101
|
+
// uint64_t &prev_iter_ends_odd_backslash,
|
102
|
+
// uint64_t &prev_iter_inside_quote,
|
103
|
+
// uint64_t &prev_iter_ends_pseudo_pred,
|
104
|
+
// uint64_t &structurals,
|
105
|
+
// uint64_t &error_mask,
|
106
|
+
// utf8_checking_state<T> &utf8_state, flatten
|
107
|
+
// function)
|
108
|
+
#define FIND_STRUCTURAL_BITS_64( \
|
109
|
+
T, buf, idx, base_ptr, base, prev_iter_ends_odd_backslash, \
|
110
|
+
prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, \
|
111
|
+
error_mask, utf8_state, flat) \
|
112
|
+
{ \
|
113
|
+
simd_input<T> in = fill_input<T>(buf); \
|
114
|
+
check_utf8<T>(in, utf8_state); \
|
115
|
+
/* detect odd sequences of backslashes */ \
|
116
|
+
uint64_t odd_ends = \
|
117
|
+
find_odd_backslash_sequences<T>(in, prev_iter_ends_odd_backslash); \
|
118
|
+
\
|
119
|
+
/* detect insides of quote pairs ("quote_mask") and also our quote_bits \
|
120
|
+
* themselves */ \
|
121
|
+
uint64_t quote_bits; \
|
122
|
+
uint64_t quote_mask = find_quote_mask_and_bits<T>( \
|
123
|
+
in, odd_ends, prev_iter_inside_quote, quote_bits, error_mask); \
|
124
|
+
\
|
125
|
+
/* take the previous iterations structural bits, not our current \
|
126
|
+
* iteration, \
|
127
|
+
* and flatten */ \
|
128
|
+
flat(base_ptr, base, idx, structurals); \
|
129
|
+
\
|
130
|
+
uint64_t whitespace; \
|
131
|
+
find_whitespace_and_structurals<T>(in, whitespace, structurals); \
|
132
|
+
\
|
133
|
+
/* fixup structurals to reflect quotes and add pseudo-structural \
|
134
|
+
* characters */ \
|
135
|
+
structurals = \
|
136
|
+
finalize_structurals(structurals, whitespace, quote_mask, quote_bits, \
|
137
|
+
prev_iter_ends_pseudo_pred); \
|
138
|
+
}
|
139
|
+
|
140
|
+
// We need to compile that code for multiple architectures. However, target
|
141
|
+
// attributes can be used only once by function definition. Huge macro seemed
|
142
|
+
// better than huge code duplication. ErrorValues
|
143
|
+
// FIND_STRUCTURAL_BITS(Architecture T, const uint8_t *buf, size_t len,
|
144
|
+
// ParsedJson &pj, flatten function)
|
145
|
+
#define FIND_STRUCTURAL_BITS(T, buf, len, pj, flat) \
|
146
|
+
{ \
|
147
|
+
if (len > pj.byte_capacity) { \
|
148
|
+
std::cerr << "Your ParsedJson object only supports documents up to " \
|
149
|
+
<< pj.byte_capacity << " bytes but you are trying to process " \
|
150
|
+
<< len << " bytes" << std::endl; \
|
151
|
+
return simdjson::CAPACITY; \
|
152
|
+
} \
|
153
|
+
uint32_t *base_ptr = pj.structural_indexes; \
|
154
|
+
uint32_t base = 0; \
|
155
|
+
utf8_checking_state<T> utf8_state; \
|
156
|
+
\
|
157
|
+
/* we have padded the input out to 64 byte multiple with the remainder \
|
158
|
+
* being zeros persistent state across loop does the last iteration end \
|
159
|
+
* with an odd-length sequence of backslashes? */ \
|
160
|
+
\
|
161
|
+
/* either 0 or 1, but a 64-bit value */ \
|
162
|
+
uint64_t prev_iter_ends_odd_backslash = 0ULL; \
|
163
|
+
/* does the previous iteration end inside a double-quote pair? */ \
|
164
|
+
uint64_t prev_iter_inside_quote = \
|
165
|
+
0ULL; /* either all zeros or all ones \
|
166
|
+
* does the previous iteration end on something that is a \
|
167
|
+
* predecessor of a pseudo-structural character - i.e. \
|
168
|
+
* whitespace or a structural character effectively the very \
|
169
|
+
* first char is considered to follow "whitespace" for the \
|
170
|
+
* purposes of pseudo-structural character detection so we \
|
171
|
+
* initialize to 1 */ \
|
172
|
+
uint64_t prev_iter_ends_pseudo_pred = 1ULL; \
|
173
|
+
\
|
174
|
+
/* structurals are persistent state across loop as we flatten them on the \
|
175
|
+
* subsequent iteration into our array pointed to be base_ptr. \
|
176
|
+
* This is harmless on the first iteration as structurals==0 \
|
177
|
+
* and is done for performance reasons; we can hide some of the latency of \
|
178
|
+
* the \
|
179
|
+
* expensive carryless multiply in the previous step with this work */ \
|
180
|
+
uint64_t structurals = 0; \
|
181
|
+
\
|
182
|
+
size_t lenminus64 = len < 64 ? 0 : len - 64; \
|
183
|
+
size_t idx = 0; \
|
184
|
+
uint64_t error_mask = 0; /* for unescaped characters within strings (ASCII \
|
185
|
+
code points < 0x20) */ \
|
186
|
+
\
|
187
|
+
for (; idx < lenminus64; idx += 64) { \
|
188
|
+
FIND_STRUCTURAL_BITS_64( \
|
189
|
+
T, &buf[idx], idx, base_ptr, base, prev_iter_ends_odd_backslash, \
|
190
|
+
prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, \
|
191
|
+
error_mask, utf8_state, flat); \
|
192
|
+
} \
|
193
|
+
/* If we have a final chunk of less than 64 bytes, pad it to 64 with \
|
194
|
+
* spaces before processing it (otherwise, we risk invalidating the UTF-8 \
|
195
|
+
* checks). */ \
|
196
|
+
if (idx < len) { \
|
197
|
+
uint8_t tmp_buf[64]; \
|
198
|
+
memset(tmp_buf, 0x20, 64); \
|
199
|
+
memcpy(tmp_buf, buf + idx, len - idx); \
|
200
|
+
FIND_STRUCTURAL_BITS_64( \
|
201
|
+
T, &tmp_buf[0], idx, base_ptr, base, prev_iter_ends_odd_backslash, \
|
202
|
+
prev_iter_inside_quote, prev_iter_ends_pseudo_pred, structurals, \
|
203
|
+
error_mask, utf8_state, flat); \
|
204
|
+
idx += 64; \
|
205
|
+
} \
|
206
|
+
\
|
207
|
+
/* is last string quote closed? */ \
|
208
|
+
if (prev_iter_inside_quote) { \
|
209
|
+
return simdjson::UNCLOSED_STRING; \
|
210
|
+
} \
|
211
|
+
\
|
212
|
+
/* finally, flatten out the remaining structurals from the last iteration \
|
213
|
+
*/ \
|
214
|
+
flat(base_ptr, base, idx, structurals); \
|
215
|
+
\
|
216
|
+
pj.n_structural_indexes = base; \
|
217
|
+
/* a valid JSON file cannot have zero structural indexes - we should have \
|
218
|
+
* found something */ \
|
219
|
+
if (pj.n_structural_indexes == 0u) { \
|
220
|
+
return simdjson::EMPTY; \
|
221
|
+
} \
|
222
|
+
if (base_ptr[pj.n_structural_indexes - 1] > len) { \
|
223
|
+
return simdjson::UNEXPECTED_ERROR; \
|
224
|
+
} \
|
225
|
+
if (len != base_ptr[pj.n_structural_indexes - 1]) { \
|
226
|
+
/* the string might not be NULL terminated, but we add a virtual NULL \
|
227
|
+
* ending \
|
228
|
+
* character. */ \
|
229
|
+
base_ptr[pj.n_structural_indexes++] = len; \
|
230
|
+
} \
|
231
|
+
/* make it safe to dereference one beyond this array */ \
|
232
|
+
base_ptr[pj.n_structural_indexes] = 0; \
|
233
|
+
if (error_mask) { \
|
234
|
+
return simdjson::UNESCAPED_CHARS; \
|
235
|
+
} \
|
236
|
+
return check_utf8_errors<T>(utf8_state); \
|
237
|
+
}
|
238
|
+
|
239
|
+
#endif // SIMDJSON_STAGE1_FIND_MARKS_MACROS_H
|
@@ -0,0 +1,194 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
2
|
+
#define SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
3
|
+
|
4
|
+
#include "simdjson/simdutf8check_westmere.h"
|
5
|
+
#include "simdjson/stage1_find_marks.h"
|
6
|
+
#include "simdjson/stage1_find_marks_flatten.h"
|
7
|
+
#include "simdjson/stage1_find_marks_macros.h"
|
8
|
+
|
9
|
+
#ifdef IS_X86_64
|
10
|
+
|
11
|
+
TARGET_WESTMERE
|
12
|
+
namespace simdjson {
|
13
|
+
template <> struct simd_input<Architecture::WESTMERE> {
|
14
|
+
__m128i v0;
|
15
|
+
__m128i v1;
|
16
|
+
__m128i v2;
|
17
|
+
__m128i v3;
|
18
|
+
};
|
19
|
+
|
20
|
+
template <>
|
21
|
+
really_inline simd_input<Architecture::WESTMERE>
|
22
|
+
fill_input<Architecture::WESTMERE>(const uint8_t *ptr) {
|
23
|
+
struct simd_input<Architecture::WESTMERE> in;
|
24
|
+
in.v0 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 0));
|
25
|
+
in.v1 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 16));
|
26
|
+
in.v2 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 32));
|
27
|
+
in.v3 = _mm_loadu_si128(reinterpret_cast<const __m128i *>(ptr + 48));
|
28
|
+
return in;
|
29
|
+
}
|
30
|
+
|
31
|
+
template <>
|
32
|
+
really_inline uint64_t
|
33
|
+
compute_quote_mask<Architecture::WESTMERE>(uint64_t quote_bits) {
|
34
|
+
return _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
35
|
+
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
36
|
+
}
|
37
|
+
|
38
|
+
template <> struct utf8_checking_state<Architecture::WESTMERE> {
|
39
|
+
__m128i has_error = _mm_setzero_si128();
|
40
|
+
processed_utf_bytes previous{
|
41
|
+
_mm_setzero_si128(), // raw_bytes
|
42
|
+
_mm_setzero_si128(), // high_nibbles
|
43
|
+
_mm_setzero_si128() // carried_continuations
|
44
|
+
};
|
45
|
+
};
|
46
|
+
|
47
|
+
template <>
|
48
|
+
really_inline void check_utf8<Architecture::WESTMERE>(
|
49
|
+
simd_input<Architecture::WESTMERE> in,
|
50
|
+
utf8_checking_state<Architecture::WESTMERE> &state) {
|
51
|
+
__m128i high_bit = _mm_set1_epi8(0x80u);
|
52
|
+
if ((_mm_testz_si128(_mm_or_si128(in.v0, in.v1), high_bit)) == 1) {
|
53
|
+
// it is ascii, we just check continuation
|
54
|
+
state.has_error =
|
55
|
+
_mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations,
|
56
|
+
_mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
57
|
+
9, 9, 9, 9, 9, 1)),
|
58
|
+
state.has_error);
|
59
|
+
} else {
|
60
|
+
// it is not ascii so we have to do heavy work
|
61
|
+
state.previous =
|
62
|
+
check_utf8_bytes(in.v0, &(state.previous), &(state.has_error));
|
63
|
+
state.previous =
|
64
|
+
check_utf8_bytes(in.v1, &(state.previous), &(state.has_error));
|
65
|
+
}
|
66
|
+
|
67
|
+
if ((_mm_testz_si128(_mm_or_si128(in.v2, in.v3), high_bit)) == 1) {
|
68
|
+
// it is ascii, we just check continuation
|
69
|
+
state.has_error =
|
70
|
+
_mm_or_si128(_mm_cmpgt_epi8(state.previous.carried_continuations,
|
71
|
+
_mm_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
72
|
+
9, 9, 9, 9, 9, 1)),
|
73
|
+
state.has_error);
|
74
|
+
} else {
|
75
|
+
// it is not ascii so we have to do heavy work
|
76
|
+
state.previous =
|
77
|
+
check_utf8_bytes(in.v2, &(state.previous), &(state.has_error));
|
78
|
+
state.previous =
|
79
|
+
check_utf8_bytes(in.v3, &(state.previous), &(state.has_error));
|
80
|
+
}
|
81
|
+
}
|
82
|
+
|
83
|
+
template <>
|
84
|
+
really_inline ErrorValues check_utf8_errors<Architecture::WESTMERE>(
|
85
|
+
utf8_checking_state<Architecture::WESTMERE> &state) {
|
86
|
+
return _mm_testz_si128(state.has_error, state.has_error) == 0
|
87
|
+
? simdjson::UTF8_ERROR
|
88
|
+
: simdjson::SUCCESS;
|
89
|
+
}
|
90
|
+
|
91
|
+
template <>
|
92
|
+
really_inline uint64_t cmp_mask_against_input<Architecture::WESTMERE>(
|
93
|
+
simd_input<Architecture::WESTMERE> in, uint8_t m) {
|
94
|
+
const __m128i mask = _mm_set1_epi8(m);
|
95
|
+
__m128i cmp_res_0 = _mm_cmpeq_epi8(in.v0, mask);
|
96
|
+
uint64_t res_0 = _mm_movemask_epi8(cmp_res_0);
|
97
|
+
__m128i cmp_res_1 = _mm_cmpeq_epi8(in.v1, mask);
|
98
|
+
uint64_t res_1 = _mm_movemask_epi8(cmp_res_1);
|
99
|
+
__m128i cmp_res_2 = _mm_cmpeq_epi8(in.v2, mask);
|
100
|
+
uint64_t res_2 = _mm_movemask_epi8(cmp_res_2);
|
101
|
+
__m128i cmp_res_3 = _mm_cmpeq_epi8(in.v3, mask);
|
102
|
+
uint64_t res_3 = _mm_movemask_epi8(cmp_res_3);
|
103
|
+
return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48);
|
104
|
+
}
|
105
|
+
|
106
|
+
template <>
|
107
|
+
really_inline uint64_t unsigned_lteq_against_input<Architecture::WESTMERE>(
|
108
|
+
simd_input<Architecture::WESTMERE> in, uint8_t m) {
|
109
|
+
const __m128i maxval = _mm_set1_epi8(m);
|
110
|
+
__m128i cmp_res_0 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v0), maxval);
|
111
|
+
uint64_t res_0 = _mm_movemask_epi8(cmp_res_0);
|
112
|
+
__m128i cmp_res_1 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v1), maxval);
|
113
|
+
uint64_t res_1 = _mm_movemask_epi8(cmp_res_1);
|
114
|
+
__m128i cmp_res_2 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v2), maxval);
|
115
|
+
uint64_t res_2 = _mm_movemask_epi8(cmp_res_2);
|
116
|
+
__m128i cmp_res_3 = _mm_cmpeq_epi8(_mm_max_epu8(maxval, in.v3), maxval);
|
117
|
+
uint64_t res_3 = _mm_movemask_epi8(cmp_res_3);
|
118
|
+
return res_0 | (res_1 << 16) | (res_2 << 32) | (res_3 << 48);
|
119
|
+
}
|
120
|
+
|
121
|
+
template <>
|
122
|
+
really_inline uint64_t find_odd_backslash_sequences<Architecture::WESTMERE>(
|
123
|
+
simd_input<Architecture::WESTMERE> in,
|
124
|
+
uint64_t &prev_iter_ends_odd_backslash) {
|
125
|
+
FIND_ODD_BACKSLASH_SEQUENCES(Architecture::WESTMERE, in,
|
126
|
+
prev_iter_ends_odd_backslash);
|
127
|
+
}
|
128
|
+
|
129
|
+
template <>
|
130
|
+
really_inline uint64_t find_quote_mask_and_bits<Architecture::WESTMERE>(
|
131
|
+
simd_input<Architecture::WESTMERE> in, uint64_t odd_ends,
|
132
|
+
uint64_t &prev_iter_inside_quote, uint64_t "e_bits,
|
133
|
+
uint64_t &error_mask) {
|
134
|
+
FIND_QUOTE_MASK_AND_BITS(Architecture::WESTMERE, in, odd_ends,
|
135
|
+
prev_iter_inside_quote, quote_bits, error_mask)
|
136
|
+
}
|
137
|
+
|
138
|
+
template <>
|
139
|
+
really_inline void find_whitespace_and_structurals<Architecture::WESTMERE>(
|
140
|
+
simd_input<Architecture::WESTMERE> in, uint64_t &whitespace,
|
141
|
+
uint64_t &structurals) {
|
142
|
+
const __m128i structural_table =
|
143
|
+
_mm_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
144
|
+
const __m128i white_table = _mm_setr_epi8(32, 100, 100, 100, 17, 100, 113, 2,
|
145
|
+
100, 9, 10, 112, 100, 13, 100, 100);
|
146
|
+
const __m128i struct_offset = _mm_set1_epi8(0xd4u);
|
147
|
+
const __m128i struct_mask = _mm_set1_epi8(32);
|
148
|
+
|
149
|
+
__m128i white0 = _mm_cmpeq_epi8(in.v0, _mm_shuffle_epi8(white_table, in.v0));
|
150
|
+
__m128i white1 = _mm_cmpeq_epi8(in.v1, _mm_shuffle_epi8(white_table, in.v1));
|
151
|
+
__m128i white2 = _mm_cmpeq_epi8(in.v2, _mm_shuffle_epi8(white_table, in.v2));
|
152
|
+
__m128i white3 = _mm_cmpeq_epi8(in.v3, _mm_shuffle_epi8(white_table, in.v3));
|
153
|
+
uint64_t ws_res_0 = _mm_movemask_epi8(white0);
|
154
|
+
uint64_t ws_res_1 = _mm_movemask_epi8(white1);
|
155
|
+
uint64_t ws_res_2 = _mm_movemask_epi8(white2);
|
156
|
+
uint64_t ws_res_3 = _mm_movemask_epi8(white3);
|
157
|
+
|
158
|
+
whitespace =
|
159
|
+
(ws_res_0 | (ws_res_1 << 16) | (ws_res_2 << 32) | (ws_res_3 << 48));
|
160
|
+
|
161
|
+
__m128i struct1_r1 = _mm_add_epi8(struct_offset, in.v0);
|
162
|
+
__m128i struct2_r1 = _mm_add_epi8(struct_offset, in.v1);
|
163
|
+
__m128i struct3_r1 = _mm_add_epi8(struct_offset, in.v2);
|
164
|
+
__m128i struct4_r1 = _mm_add_epi8(struct_offset, in.v3);
|
165
|
+
|
166
|
+
__m128i struct1_r2 = _mm_or_si128(in.v0, struct_mask);
|
167
|
+
__m128i struct2_r2 = _mm_or_si128(in.v1, struct_mask);
|
168
|
+
__m128i struct3_r2 = _mm_or_si128(in.v2, struct_mask);
|
169
|
+
__m128i struct4_r2 = _mm_or_si128(in.v3, struct_mask);
|
170
|
+
|
171
|
+
__m128i struct1_r3 = _mm_shuffle_epi8(structural_table, struct1_r1);
|
172
|
+
__m128i struct2_r3 = _mm_shuffle_epi8(structural_table, struct2_r1);
|
173
|
+
__m128i struct3_r3 = _mm_shuffle_epi8(structural_table, struct3_r1);
|
174
|
+
__m128i struct4_r3 = _mm_shuffle_epi8(structural_table, struct4_r1);
|
175
|
+
|
176
|
+
__m128i struct1 = _mm_cmpeq_epi8(struct1_r2, struct1_r3);
|
177
|
+
__m128i struct2 = _mm_cmpeq_epi8(struct2_r2, struct2_r3);
|
178
|
+
__m128i struct3 = _mm_cmpeq_epi8(struct3_r2, struct3_r3);
|
179
|
+
__m128i struct4 = _mm_cmpeq_epi8(struct4_r2, struct4_r3);
|
180
|
+
|
181
|
+
uint64_t structural_res_0 = _mm_movemask_epi8(struct1);
|
182
|
+
uint64_t structural_res_1 = _mm_movemask_epi8(struct2);
|
183
|
+
uint64_t structural_res_2 = _mm_movemask_epi8(struct3);
|
184
|
+
uint64_t structural_res_3 = _mm_movemask_epi8(struct4);
|
185
|
+
|
186
|
+
structurals = (structural_res_0 | (structural_res_1 << 16) |
|
187
|
+
(structural_res_2 << 32) | (structural_res_3 << 48));
|
188
|
+
}
|
189
|
+
|
190
|
+
} // namespace simdjson
|
191
|
+
UNTARGET_REGION
|
192
|
+
|
193
|
+
#endif // IS_X86_64
|
194
|
+
#endif // SIMDJSON_STAGE1_FIND_MARKS_WESTMERE_H
|
@@ -0,0 +1,85 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE2_BUILD_TAPE_H
|
2
|
+
#define SIMDJSON_STAGE2_BUILD_TAPE_H
|
3
|
+
|
4
|
+
#include <cassert>
|
5
|
+
#include <cstring>
|
6
|
+
#include <iostream>
|
7
|
+
|
8
|
+
#include "simdjson/common_defs.h"
|
9
|
+
#include "simdjson/jsoncharutils.h"
|
10
|
+
#include "simdjson/numberparsing.h"
|
11
|
+
#include "simdjson/parsedjson.h"
|
12
|
+
#include "simdjson/simdjson.h"
|
13
|
+
#include "simdjson/stringparsing.h"
|
14
|
+
|
15
|
+
namespace simdjson {
|
16
|
+
void init_state_machine();
|
17
|
+
|
18
|
+
WARN_UNUSED
|
19
|
+
really_inline bool is_valid_true_atom(const uint8_t *loc) {
|
20
|
+
uint64_t tv = *reinterpret_cast<const uint64_t *>("true ");
|
21
|
+
uint64_t mask4 = 0x00000000ffffffff;
|
22
|
+
uint32_t error = 0;
|
23
|
+
uint64_t
|
24
|
+
locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
25
|
+
// this can read up to 7 bytes beyond the buffer size, but we require
|
26
|
+
// SIMDJSON_PADDING of padding
|
27
|
+
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
28
|
+
std::memcpy(&locval, loc, sizeof(uint64_t));
|
29
|
+
error = (locval & mask4) ^ tv;
|
30
|
+
error |= is_not_structural_or_whitespace(loc[4]);
|
31
|
+
return error == 0;
|
32
|
+
}
|
33
|
+
|
34
|
+
WARN_UNUSED
|
35
|
+
really_inline bool is_valid_false_atom(const uint8_t *loc) {
|
36
|
+
// We have to use an integer constant because the space in the cast
|
37
|
+
// below would lead to values illegally being qualified
|
38
|
+
// uint64_t fv = *reinterpret_cast<const uint64_t *>("false ");
|
39
|
+
// using this constant (that is the same false) but nulls out the
|
40
|
+
// unused bits solves that
|
41
|
+
uint64_t fv = 0x00000065736c6166; // takes into account endianness
|
42
|
+
uint64_t mask5 = 0x000000ffffffffff;
|
43
|
+
// we can't use the 32 bit value for checking for errors otherwise
|
44
|
+
// the last character of false (it being 5 byte long!) would be
|
45
|
+
// ignored
|
46
|
+
uint64_t error = 0;
|
47
|
+
uint64_t
|
48
|
+
locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
49
|
+
// this can read up to 7 bytes beyond the buffer size, but we require
|
50
|
+
// SIMDJSON_PADDING of padding
|
51
|
+
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
52
|
+
std::memcpy(&locval, loc, sizeof(uint64_t));
|
53
|
+
error = (locval & mask5) ^ fv;
|
54
|
+
error |= is_not_structural_or_whitespace(loc[5]);
|
55
|
+
return error == 0;
|
56
|
+
}
|
57
|
+
|
58
|
+
WARN_UNUSED
|
59
|
+
really_inline bool is_valid_null_atom(const uint8_t *loc) {
|
60
|
+
uint64_t nv = *reinterpret_cast<const uint64_t *>("null ");
|
61
|
+
uint64_t mask4 = 0x00000000ffffffff;
|
62
|
+
uint32_t error = 0;
|
63
|
+
uint64_t
|
64
|
+
locval; // we want to avoid unaligned 64-bit loads (undefined in C/C++)
|
65
|
+
// this can read up to 7 bytes beyond the buffer size, but we require
|
66
|
+
// SIMDJSON_PADDING of padding
|
67
|
+
static_assert(sizeof(uint64_t) - 1 <= SIMDJSON_PADDING);
|
68
|
+
std::memcpy(&locval, loc, sizeof(uint64_t));
|
69
|
+
error = (locval & mask4) ^ nv;
|
70
|
+
error |= is_not_structural_or_whitespace(loc[4]);
|
71
|
+
return error == 0;
|
72
|
+
}
|
73
|
+
|
74
|
+
template <Architecture T = Architecture::NATIVE>
|
75
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER int
|
76
|
+
unified_machine(const uint8_t *buf, size_t len, ParsedJson &pj);
|
77
|
+
|
78
|
+
template <Architecture T = Architecture::NATIVE>
|
79
|
+
int unified_machine(const char *buf, size_t len, ParsedJson &pj) {
|
80
|
+
return unified_machine<T>(reinterpret_cast<const uint8_t *>(buf), len, pj);
|
81
|
+
}
|
82
|
+
|
83
|
+
} // namespace simdjson
|
84
|
+
|
85
|
+
#endif
|
@@ -0,0 +1,105 @@
|
|
1
|
+
#ifndef SIMDJSON_STRINGPARSING_H
|
2
|
+
#define SIMDJSON_STRINGPARSING_H
|
3
|
+
|
4
|
+
#include "simdjson/common_defs.h"
|
5
|
+
#include "simdjson/jsoncharutils.h"
|
6
|
+
#include "simdjson/parsedjson.h"
|
7
|
+
|
8
|
+
#ifdef JSON_TEST_STRINGS
|
9
|
+
void found_string(const uint8_t *buf, const uint8_t *parsed_begin,
|
10
|
+
const uint8_t *parsed_end);
|
11
|
+
void found_bad_string(const uint8_t *buf);
|
12
|
+
#endif
|
13
|
+
|
14
|
+
namespace simdjson {
|
15
|
+
// begin copypasta
|
16
|
+
// These chars yield themselves: " \ /
|
17
|
+
// b -> backspace, f -> formfeed, n -> newline, r -> cr, t -> horizontal tab
|
18
|
+
// u not handled in this table as it's complex
|
19
|
+
static const uint8_t escape_map[256] = {
|
20
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x0.
|
21
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
22
|
+
0, 0, 0x22, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x2f,
|
23
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
24
|
+
|
25
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x4.
|
26
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0x5c, 0, 0, 0, // 0x5.
|
27
|
+
0, 0, 0x08, 0, 0, 0, 0x0c, 0, 0, 0, 0, 0, 0, 0, 0x0a, 0, // 0x6.
|
28
|
+
0, 0, 0x0d, 0, 0x09, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, // 0x7.
|
29
|
+
|
30
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
31
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
32
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
33
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
34
|
+
|
35
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
36
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
37
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
38
|
+
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
39
|
+
};
|
40
|
+
|
41
|
+
// handle a unicode codepoint
|
42
|
+
// write appropriate values into dest
|
43
|
+
// src will advance 6 bytes or 12 bytes
|
44
|
+
// dest will advance a variable amount (return via pointer)
|
45
|
+
// return true if the unicode codepoint was valid
|
46
|
+
// We work in little-endian then swap at write time
|
47
|
+
WARN_UNUSED
|
48
|
+
really_inline bool handle_unicode_codepoint(const uint8_t **src_ptr,
|
49
|
+
uint8_t **dst_ptr) {
|
50
|
+
// hex_to_u32_nocheck fills high 16 bits of the return value with 1s if the
|
51
|
+
// conversion isn't valid; we defer the check for this to inside the
|
52
|
+
// multilingual plane check
|
53
|
+
uint32_t code_point = hex_to_u32_nocheck(*src_ptr + 2);
|
54
|
+
*src_ptr += 6;
|
55
|
+
// check for low surrogate for characters outside the Basic
|
56
|
+
// Multilingual Plane.
|
57
|
+
if (code_point >= 0xd800 && code_point < 0xdc00) {
|
58
|
+
if (((*src_ptr)[0] != '\\') || (*src_ptr)[1] != 'u') {
|
59
|
+
return false;
|
60
|
+
}
|
61
|
+
uint32_t code_point_2 = hex_to_u32_nocheck(*src_ptr + 2);
|
62
|
+
|
63
|
+
// if the first code point is invalid we will get here, as we will go past
|
64
|
+
// the check for being outside the Basic Multilingual plane. If we don't
|
65
|
+
// find a \u immediately afterwards we fail out anyhow, but if we do,
|
66
|
+
// this check catches both the case of the first code point being invalid
|
67
|
+
// or the second code point being invalid.
|
68
|
+
if ((code_point | code_point_2) >> 16) {
|
69
|
+
return false;
|
70
|
+
}
|
71
|
+
|
72
|
+
code_point =
|
73
|
+
(((code_point - 0xd800) << 10) | (code_point_2 - 0xdc00)) + 0x10000;
|
74
|
+
*src_ptr += 6;
|
75
|
+
}
|
76
|
+
size_t offset = codepoint_to_utf8(code_point, *dst_ptr);
|
77
|
+
*dst_ptr += offset;
|
78
|
+
return offset > 0;
|
79
|
+
}
|
80
|
+
|
81
|
+
// Holds backslashes and quotes locations.
|
82
|
+
struct parse_string_helper {
|
83
|
+
uint32_t bs_bits;
|
84
|
+
uint32_t quote_bits;
|
85
|
+
};
|
86
|
+
|
87
|
+
// Finds where the backslashes and quotes are located.
|
88
|
+
template <Architecture>
|
89
|
+
parse_string_helper find_bs_bits_and_quote_bits(const uint8_t *src,
|
90
|
+
uint8_t *dst);
|
91
|
+
|
92
|
+
template <Architecture T>
|
93
|
+
WARN_UNUSED ALLOW_SAME_PAGE_BUFFER_OVERRUN_QUALIFIER LENIENT_MEM_SANITIZER
|
94
|
+
really_inline bool
|
95
|
+
parse_string(UNUSED const uint8_t *buf, UNUSED size_t len, ParsedJson &pj,
|
96
|
+
UNUSED const uint32_t depth, UNUSED uint32_t offset);
|
97
|
+
|
98
|
+
} // namespace simdjson
|
99
|
+
|
100
|
+
/// Now include the specializations:
|
101
|
+
#include "simdjson/stringparsing_arm64.h"
|
102
|
+
#include "simdjson/stringparsing_haswell.h"
|
103
|
+
#include "simdjson/stringparsing_westmere.h"
|
104
|
+
|
105
|
+
#endif
|