simdjson 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,210 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
2
|
+
#define SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
3
|
+
|
4
|
+
#include "simdjson/simdutf8check_arm64.h"
|
5
|
+
#include "simdjson/stage1_find_marks.h"
|
6
|
+
#include "simdjson/stage1_find_marks_flatten.h"
|
7
|
+
#include "simdjson/stage1_find_marks_macros.h"
|
8
|
+
|
9
|
+
#ifdef IS_ARM64
|
10
|
+
namespace simdjson {
|
11
|
+
template <> struct simd_input<Architecture::ARM64> {
|
12
|
+
uint8x16_t i0;
|
13
|
+
uint8x16_t i1;
|
14
|
+
uint8x16_t i2;
|
15
|
+
uint8x16_t i3;
|
16
|
+
};
|
17
|
+
|
18
|
+
template <>
|
19
|
+
really_inline simd_input<Architecture::ARM64>
|
20
|
+
fill_input<Architecture::ARM64>(const uint8_t *ptr) {
|
21
|
+
struct simd_input<Architecture::ARM64> in;
|
22
|
+
in.i0 = vld1q_u8(ptr + 0);
|
23
|
+
in.i1 = vld1q_u8(ptr + 16);
|
24
|
+
in.i2 = vld1q_u8(ptr + 32);
|
25
|
+
in.i3 = vld1q_u8(ptr + 48);
|
26
|
+
return in;
|
27
|
+
}
|
28
|
+
|
29
|
+
really_inline uint16_t neon_movemask(uint8x16_t input) {
|
30
|
+
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
31
|
+
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
32
|
+
uint8x16_t minput = vandq_u8(input, bit_mask);
|
33
|
+
uint8x16_t tmp = vpaddq_u8(minput, minput);
|
34
|
+
tmp = vpaddq_u8(tmp, tmp);
|
35
|
+
tmp = vpaddq_u8(tmp, tmp);
|
36
|
+
return vgetq_lane_u16(vreinterpretq_u16_u8(tmp), 0);
|
37
|
+
}
|
38
|
+
|
39
|
+
really_inline uint64_t neon_movemask_bulk(uint8x16_t p0, uint8x16_t p1,
|
40
|
+
uint8x16_t p2, uint8x16_t p3) {
|
41
|
+
const uint8x16_t bit_mask = {0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80,
|
42
|
+
0x01, 0x02, 0x4, 0x8, 0x10, 0x20, 0x40, 0x80};
|
43
|
+
uint8x16_t t0 = vandq_u8(p0, bit_mask);
|
44
|
+
uint8x16_t t1 = vandq_u8(p1, bit_mask);
|
45
|
+
uint8x16_t t2 = vandq_u8(p2, bit_mask);
|
46
|
+
uint8x16_t t3 = vandq_u8(p3, bit_mask);
|
47
|
+
uint8x16_t sum0 = vpaddq_u8(t0, t1);
|
48
|
+
uint8x16_t sum1 = vpaddq_u8(t2, t3);
|
49
|
+
sum0 = vpaddq_u8(sum0, sum1);
|
50
|
+
sum0 = vpaddq_u8(sum0, sum0);
|
51
|
+
return vgetq_lane_u64(vreinterpretq_u64_u8(sum0), 0);
|
52
|
+
}
|
53
|
+
|
54
|
+
template <>
|
55
|
+
really_inline uint64_t
|
56
|
+
compute_quote_mask<Architecture::ARM64>(uint64_t quote_bits) {
|
57
|
+
#ifdef __ARM_FEATURE_CRYPTO // some ARM processors lack this extension
|
58
|
+
return vmull_p64(-1ULL, quote_bits);
|
59
|
+
#else
|
60
|
+
return portable_compute_quote_mask(quote_bits);
|
61
|
+
#endif
|
62
|
+
}
|
63
|
+
|
64
|
+
template <> struct utf8_checking_state<Architecture::ARM64> {
|
65
|
+
int8x16_t has_error{};
|
66
|
+
processed_utf_bytes previous{};
|
67
|
+
};
|
68
|
+
|
69
|
+
// Checks that all bytes are ascii
|
70
|
+
really_inline bool check_ascii_neon(simd_input<Architecture::ARM64> in) {
|
71
|
+
// checking if the most significant bit is always equal to 0.
|
72
|
+
uint8x16_t high_bit = vdupq_n_u8(0x80);
|
73
|
+
uint8x16_t t0 = vorrq_u8(in.i0, in.i1);
|
74
|
+
uint8x16_t t1 = vorrq_u8(in.i2, in.i3);
|
75
|
+
uint8x16_t t3 = vorrq_u8(t0, t1);
|
76
|
+
uint8x16_t t4 = vandq_u8(t3, high_bit);
|
77
|
+
uint64x2_t v64 = vreinterpretq_u64_u8(t4);
|
78
|
+
uint32x2_t v32 = vqmovn_u64(v64);
|
79
|
+
uint64x1_t result = vreinterpret_u64_u32(v32);
|
80
|
+
return vget_lane_u64(result, 0) == 0;
|
81
|
+
}
|
82
|
+
|
83
|
+
template <>
|
84
|
+
really_inline void check_utf8<Architecture::ARM64>(
|
85
|
+
simd_input<Architecture::ARM64> in,
|
86
|
+
utf8_checking_state<Architecture::ARM64> &state) {
|
87
|
+
if (check_ascii_neon(in)) {
|
88
|
+
// All bytes are ascii. Therefore the byte that was just before must be
|
89
|
+
// ascii too. We only check the byte that was just before simd_input. Nines
|
90
|
+
// are arbitrary values.
|
91
|
+
const int8x16_t verror =
|
92
|
+
(int8x16_t){9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 1};
|
93
|
+
state.has_error =
|
94
|
+
vorrq_s8(vreinterpretq_s8_u8(
|
95
|
+
vcgtq_s8(state.previous.carried_continuations, verror)),
|
96
|
+
state.has_error);
|
97
|
+
} else {
|
98
|
+
// it is not ascii so we have to do heavy work
|
99
|
+
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i0),
|
100
|
+
&(state.previous), &(state.has_error));
|
101
|
+
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i1),
|
102
|
+
&(state.previous), &(state.has_error));
|
103
|
+
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i2),
|
104
|
+
&(state.previous), &(state.has_error));
|
105
|
+
state.previous = check_utf8_bytes(vreinterpretq_s8_u8(in.i3),
|
106
|
+
&(state.previous), &(state.has_error));
|
107
|
+
}
|
108
|
+
}
|
109
|
+
|
110
|
+
template <>
|
111
|
+
really_inline ErrorValues check_utf8_errors<Architecture::ARM64>(
|
112
|
+
utf8_checking_state<Architecture::ARM64> &state) {
|
113
|
+
uint64x2_t v64 = vreinterpretq_u64_s8(state.has_error);
|
114
|
+
uint32x2_t v32 = vqmovn_u64(v64);
|
115
|
+
uint64x1_t result = vreinterpret_u64_u32(v32);
|
116
|
+
return vget_lane_u64(result, 0) != 0 ? simdjson::UTF8_ERROR
|
117
|
+
: simdjson::SUCCESS;
|
118
|
+
}
|
119
|
+
|
120
|
+
template <>
|
121
|
+
really_inline uint64_t cmp_mask_against_input<Architecture::ARM64>(
|
122
|
+
simd_input<Architecture::ARM64> in, uint8_t m) {
|
123
|
+
const uint8x16_t mask = vmovq_n_u8(m);
|
124
|
+
uint8x16_t cmp_res_0 = vceqq_u8(in.i0, mask);
|
125
|
+
uint8x16_t cmp_res_1 = vceqq_u8(in.i1, mask);
|
126
|
+
uint8x16_t cmp_res_2 = vceqq_u8(in.i2, mask);
|
127
|
+
uint8x16_t cmp_res_3 = vceqq_u8(in.i3, mask);
|
128
|
+
return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
|
129
|
+
}
|
130
|
+
|
131
|
+
template <>
|
132
|
+
really_inline uint64_t unsigned_lteq_against_input<Architecture::ARM64>(
|
133
|
+
simd_input<Architecture::ARM64> in, uint8_t m) {
|
134
|
+
const uint8x16_t mask = vmovq_n_u8(m);
|
135
|
+
uint8x16_t cmp_res_0 = vcleq_u8(in.i0, mask);
|
136
|
+
uint8x16_t cmp_res_1 = vcleq_u8(in.i1, mask);
|
137
|
+
uint8x16_t cmp_res_2 = vcleq_u8(in.i2, mask);
|
138
|
+
uint8x16_t cmp_res_3 = vcleq_u8(in.i3, mask);
|
139
|
+
return neon_movemask_bulk(cmp_res_0, cmp_res_1, cmp_res_2, cmp_res_3);
|
140
|
+
}
|
141
|
+
|
142
|
+
template <>
|
143
|
+
really_inline uint64_t find_odd_backslash_sequences<Architecture::ARM64>(
|
144
|
+
simd_input<Architecture::ARM64> in,
|
145
|
+
uint64_t &prev_iter_ends_odd_backslash) {
|
146
|
+
FIND_ODD_BACKSLASH_SEQUENCES(Architecture::ARM64, in,
|
147
|
+
prev_iter_ends_odd_backslash);
|
148
|
+
}
|
149
|
+
|
150
|
+
template <>
|
151
|
+
really_inline uint64_t find_quote_mask_and_bits<Architecture::ARM64>(
|
152
|
+
simd_input<Architecture::ARM64> in, uint64_t odd_ends,
|
153
|
+
uint64_t &prev_iter_inside_quote, uint64_t "e_bits,
|
154
|
+
uint64_t &error_mask) {
|
155
|
+
FIND_QUOTE_MASK_AND_BITS(Architecture::ARM64, in, odd_ends,
|
156
|
+
prev_iter_inside_quote, quote_bits, error_mask)
|
157
|
+
}
|
158
|
+
|
159
|
+
template <>
|
160
|
+
really_inline void find_whitespace_and_structurals<Architecture::ARM64>(
|
161
|
+
simd_input<Architecture::ARM64> in, uint64_t &whitespace,
|
162
|
+
uint64_t &structurals) {
|
163
|
+
const uint8x16_t low_nibble_mask =
|
164
|
+
(uint8x16_t){16, 0, 0, 0, 0, 0, 0, 0, 0, 8, 12, 1, 2, 9, 0, 0};
|
165
|
+
const uint8x16_t high_nibble_mask =
|
166
|
+
(uint8x16_t){8, 0, 18, 4, 0, 1, 0, 1, 0, 0, 0, 3, 2, 1, 0, 0};
|
167
|
+
const uint8x16_t structural_shufti_mask = vmovq_n_u8(0x7);
|
168
|
+
const uint8x16_t whitespace_shufti_mask = vmovq_n_u8(0x18);
|
169
|
+
const uint8x16_t low_nib_and_mask = vmovq_n_u8(0xf);
|
170
|
+
|
171
|
+
uint8x16_t nib_0_lo = vandq_u8(in.i0, low_nib_and_mask);
|
172
|
+
uint8x16_t nib_0_hi = vshrq_n_u8(in.i0, 4);
|
173
|
+
uint8x16_t shuf_0_lo = vqtbl1q_u8(low_nibble_mask, nib_0_lo);
|
174
|
+
uint8x16_t shuf_0_hi = vqtbl1q_u8(high_nibble_mask, nib_0_hi);
|
175
|
+
uint8x16_t v_0 = vandq_u8(shuf_0_lo, shuf_0_hi);
|
176
|
+
|
177
|
+
uint8x16_t nib_1_lo = vandq_u8(in.i1, low_nib_and_mask);
|
178
|
+
uint8x16_t nib_1_hi = vshrq_n_u8(in.i1, 4);
|
179
|
+
uint8x16_t shuf_1_lo = vqtbl1q_u8(low_nibble_mask, nib_1_lo);
|
180
|
+
uint8x16_t shuf_1_hi = vqtbl1q_u8(high_nibble_mask, nib_1_hi);
|
181
|
+
uint8x16_t v_1 = vandq_u8(shuf_1_lo, shuf_1_hi);
|
182
|
+
|
183
|
+
uint8x16_t nib_2_lo = vandq_u8(in.i2, low_nib_and_mask);
|
184
|
+
uint8x16_t nib_2_hi = vshrq_n_u8(in.i2, 4);
|
185
|
+
uint8x16_t shuf_2_lo = vqtbl1q_u8(low_nibble_mask, nib_2_lo);
|
186
|
+
uint8x16_t shuf_2_hi = vqtbl1q_u8(high_nibble_mask, nib_2_hi);
|
187
|
+
uint8x16_t v_2 = vandq_u8(shuf_2_lo, shuf_2_hi);
|
188
|
+
|
189
|
+
uint8x16_t nib_3_lo = vandq_u8(in.i3, low_nib_and_mask);
|
190
|
+
uint8x16_t nib_3_hi = vshrq_n_u8(in.i3, 4);
|
191
|
+
uint8x16_t shuf_3_lo = vqtbl1q_u8(low_nibble_mask, nib_3_lo);
|
192
|
+
uint8x16_t shuf_3_hi = vqtbl1q_u8(high_nibble_mask, nib_3_hi);
|
193
|
+
uint8x16_t v_3 = vandq_u8(shuf_3_lo, shuf_3_hi);
|
194
|
+
|
195
|
+
uint8x16_t tmp_0 = vtstq_u8(v_0, structural_shufti_mask);
|
196
|
+
uint8x16_t tmp_1 = vtstq_u8(v_1, structural_shufti_mask);
|
197
|
+
uint8x16_t tmp_2 = vtstq_u8(v_2, structural_shufti_mask);
|
198
|
+
uint8x16_t tmp_3 = vtstq_u8(v_3, structural_shufti_mask);
|
199
|
+
structurals = neon_movemask_bulk(tmp_0, tmp_1, tmp_2, tmp_3);
|
200
|
+
|
201
|
+
uint8x16_t tmp_ws_0 = vtstq_u8(v_0, whitespace_shufti_mask);
|
202
|
+
uint8x16_t tmp_ws_1 = vtstq_u8(v_1, whitespace_shufti_mask);
|
203
|
+
uint8x16_t tmp_ws_2 = vtstq_u8(v_2, whitespace_shufti_mask);
|
204
|
+
uint8x16_t tmp_ws_3 = vtstq_u8(v_3, whitespace_shufti_mask);
|
205
|
+
whitespace = neon_movemask_bulk(tmp_ws_0, tmp_ws_1, tmp_ws_2, tmp_ws_3);
|
206
|
+
}
|
207
|
+
} // namespace simdjson
|
208
|
+
|
209
|
+
#endif // IS_ARM64
|
210
|
+
#endif // SIMDJSON_STAGE1_FIND_MARKS_ARM64_H
|
@@ -0,0 +1,93 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
2
|
+
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
3
|
+
|
4
|
+
namespace simdjson {
|
5
|
+
|
6
|
+
#ifdef SIMDJSON_NAIVE_FLATTEN // useful for benchmarking
|
7
|
+
//
|
8
|
+
// This is just a naive implementation. It should be normally
|
9
|
+
// disable, but can be used for research purposes to compare
|
10
|
+
// again our optimized version.
|
11
|
+
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
12
|
+
uint32_t idx, uint64_t bits) {
|
13
|
+
uint32_t *out_ptr = base_ptr + base;
|
14
|
+
idx -= 64;
|
15
|
+
while (bits != 0) {
|
16
|
+
out_ptr[0] = idx + trailing_zeroes(bits);
|
17
|
+
bits = bits & (bits - 1);
|
18
|
+
out_ptr++;
|
19
|
+
}
|
20
|
+
base = (out_ptr - base_ptr);
|
21
|
+
}
|
22
|
+
|
23
|
+
#else
|
24
|
+
// flatten out values in 'bits' assuming that they are are to have values of idx
|
25
|
+
// plus their position in the bitvector, and store these indexes at
|
26
|
+
// base_ptr[base] incrementing base as we go
|
27
|
+
// will potentially store extra values beyond end of valid bits, so base_ptr
|
28
|
+
// needs to be large enough to handle this
|
29
|
+
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
30
|
+
uint32_t idx, uint64_t bits) {
|
31
|
+
// In some instances, the next branch is expensive because it is mispredicted.
|
32
|
+
// Unfortunately, in other cases,
|
33
|
+
// it helps tremendously.
|
34
|
+
if (bits == 0)
|
35
|
+
return;
|
36
|
+
uint32_t cnt = hamming(bits);
|
37
|
+
uint32_t next_base = base + cnt;
|
38
|
+
idx -= 64;
|
39
|
+
base_ptr += base;
|
40
|
+
{
|
41
|
+
base_ptr[0] = idx + trailing_zeroes(bits);
|
42
|
+
bits = bits & (bits - 1);
|
43
|
+
base_ptr[1] = idx + trailing_zeroes(bits);
|
44
|
+
bits = bits & (bits - 1);
|
45
|
+
base_ptr[2] = idx + trailing_zeroes(bits);
|
46
|
+
bits = bits & (bits - 1);
|
47
|
+
base_ptr[3] = idx + trailing_zeroes(bits);
|
48
|
+
bits = bits & (bits - 1);
|
49
|
+
base_ptr[4] = idx + trailing_zeroes(bits);
|
50
|
+
bits = bits & (bits - 1);
|
51
|
+
base_ptr[5] = idx + trailing_zeroes(bits);
|
52
|
+
bits = bits & (bits - 1);
|
53
|
+
base_ptr[6] = idx + trailing_zeroes(bits);
|
54
|
+
bits = bits & (bits - 1);
|
55
|
+
base_ptr[7] = idx + trailing_zeroes(bits);
|
56
|
+
bits = bits & (bits - 1);
|
57
|
+
base_ptr += 8;
|
58
|
+
}
|
59
|
+
// We hope that the next branch is easily predicted.
|
60
|
+
if (cnt > 8) {
|
61
|
+
base_ptr[0] = idx + trailing_zeroes(bits);
|
62
|
+
bits = bits & (bits - 1);
|
63
|
+
base_ptr[1] = idx + trailing_zeroes(bits);
|
64
|
+
bits = bits & (bits - 1);
|
65
|
+
base_ptr[2] = idx + trailing_zeroes(bits);
|
66
|
+
bits = bits & (bits - 1);
|
67
|
+
base_ptr[3] = idx + trailing_zeroes(bits);
|
68
|
+
bits = bits & (bits - 1);
|
69
|
+
base_ptr[4] = idx + trailing_zeroes(bits);
|
70
|
+
bits = bits & (bits - 1);
|
71
|
+
base_ptr[5] = idx + trailing_zeroes(bits);
|
72
|
+
bits = bits & (bits - 1);
|
73
|
+
base_ptr[6] = idx + trailing_zeroes(bits);
|
74
|
+
bits = bits & (bits - 1);
|
75
|
+
base_ptr[7] = idx + trailing_zeroes(bits);
|
76
|
+
bits = bits & (bits - 1);
|
77
|
+
base_ptr += 8;
|
78
|
+
}
|
79
|
+
if (cnt > 16) { // unluckly: we rarely get here
|
80
|
+
// since it means having one structural or pseudo-structral element
|
81
|
+
// every 4 characters (possible with inputs like "","","",...).
|
82
|
+
do {
|
83
|
+
base_ptr[0] = idx + trailing_zeroes(bits);
|
84
|
+
bits = bits & (bits - 1);
|
85
|
+
base_ptr++;
|
86
|
+
} while (bits != 0);
|
87
|
+
}
|
88
|
+
base = next_base;
|
89
|
+
}
|
90
|
+
#endif // SIMDJSON_NAIVE_FLATTEN
|
91
|
+
} // namespace simdjson
|
92
|
+
|
93
|
+
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
@@ -0,0 +1,95 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
|
2
|
+
#define SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_HASWELL_H
|
3
|
+
|
4
|
+
// This file provides the same function as
|
5
|
+
// stage1_find_marks_flatten.h, but uses Intel intrinsics.
|
6
|
+
// This should provide better performance on Visual Studio
|
7
|
+
// and other compilers that do a conservative optimization.
|
8
|
+
|
9
|
+
// Specifically, on x64 processors with BMI,
|
10
|
+
// x & (x - 1) should be mapped to
|
11
|
+
// the blsr instruction. By using the
|
12
|
+
// _blsr_u64 intrinsic, we
|
13
|
+
// ensure that this will happen.
|
14
|
+
/////////
|
15
|
+
|
16
|
+
#include "simdjson/common_defs.h"
|
17
|
+
#include "simdjson/portability.h"
|
18
|
+
|
19
|
+
#ifdef IS_X86_64
|
20
|
+
|
21
|
+
TARGET_HASWELL
|
22
|
+
namespace simdjson {
|
23
|
+
namespace haswell {
|
24
|
+
|
25
|
+
// flatten out values in 'bits' assuming that they are are to have values of idx
|
26
|
+
// plus their position in the bitvector, and store these indexes at
|
27
|
+
// base_ptr[base] incrementing base as we go
|
28
|
+
// will potentially store extra values beyond end of valid bits, so base_ptr
|
29
|
+
// needs to be large enough to handle this
|
30
|
+
really_inline void flatten_bits(uint32_t *base_ptr, uint32_t &base,
|
31
|
+
uint32_t idx, uint64_t bits) {
|
32
|
+
// In some instances, the next branch is expensive because it is mispredicted.
|
33
|
+
// Unfortunately, in other cases,
|
34
|
+
// it helps tremendously.
|
35
|
+
if (bits == 0)
|
36
|
+
return;
|
37
|
+
uint32_t cnt = _mm_popcnt_u64(bits);
|
38
|
+
uint32_t next_base = base + cnt;
|
39
|
+
idx -= 64;
|
40
|
+
base_ptr += base;
|
41
|
+
{
|
42
|
+
base_ptr[0] = idx + trailing_zeroes(bits);
|
43
|
+
bits = _blsr_u64(bits);
|
44
|
+
base_ptr[1] = idx + trailing_zeroes(bits);
|
45
|
+
bits = _blsr_u64(bits);
|
46
|
+
base_ptr[2] = idx + trailing_zeroes(bits);
|
47
|
+
bits = _blsr_u64(bits);
|
48
|
+
base_ptr[3] = idx + trailing_zeroes(bits);
|
49
|
+
bits = _blsr_u64(bits);
|
50
|
+
base_ptr[4] = idx + trailing_zeroes(bits);
|
51
|
+
bits = _blsr_u64(bits);
|
52
|
+
base_ptr[5] = idx + trailing_zeroes(bits);
|
53
|
+
bits = _blsr_u64(bits);
|
54
|
+
base_ptr[6] = idx + trailing_zeroes(bits);
|
55
|
+
bits = _blsr_u64(bits);
|
56
|
+
base_ptr[7] = idx + trailing_zeroes(bits);
|
57
|
+
bits = _blsr_u64(bits);
|
58
|
+
base_ptr += 8;
|
59
|
+
}
|
60
|
+
// We hope that the next branch is easily predicted.
|
61
|
+
if (cnt > 8) {
|
62
|
+
base_ptr[0] = idx + trailing_zeroes(bits);
|
63
|
+
bits = _blsr_u64(bits);
|
64
|
+
base_ptr[1] = idx + trailing_zeroes(bits);
|
65
|
+
bits = _blsr_u64(bits);
|
66
|
+
base_ptr[2] = idx + trailing_zeroes(bits);
|
67
|
+
bits = _blsr_u64(bits);
|
68
|
+
base_ptr[3] = idx + trailing_zeroes(bits);
|
69
|
+
bits = _blsr_u64(bits);
|
70
|
+
base_ptr[4] = idx + trailing_zeroes(bits);
|
71
|
+
bits = _blsr_u64(bits);
|
72
|
+
base_ptr[5] = idx + trailing_zeroes(bits);
|
73
|
+
bits = _blsr_u64(bits);
|
74
|
+
base_ptr[6] = idx + trailing_zeroes(bits);
|
75
|
+
bits = _blsr_u64(bits);
|
76
|
+
base_ptr[7] = idx + trailing_zeroes(bits);
|
77
|
+
bits = _blsr_u64(bits);
|
78
|
+
base_ptr += 8;
|
79
|
+
}
|
80
|
+
if (cnt > 16) { // unluckly: we rarely get here
|
81
|
+
// since it means having one structural or pseudo-structral element
|
82
|
+
// every 4 characters (possible with inputs like "","","",...).
|
83
|
+
do {
|
84
|
+
base_ptr[0] = idx + trailing_zeroes(bits);
|
85
|
+
bits = _blsr_u64(bits);
|
86
|
+
base_ptr++;
|
87
|
+
} while (bits != 0);
|
88
|
+
}
|
89
|
+
base = next_base;
|
90
|
+
}
|
91
|
+
} // namespace haswell
|
92
|
+
} // namespace simdjson
|
93
|
+
UNTARGET_REGION
|
94
|
+
#endif // IS_X86_64
|
95
|
+
#endif // SIMDJSON_STAGE1_FIND_MARKS_FLATTEN_H
|
@@ -0,0 +1,210 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
2
|
+
#define SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|
3
|
+
|
4
|
+
#include "simdjson/simdutf8check_haswell.h"
|
5
|
+
#include "simdjson/stage1_find_marks.h"
|
6
|
+
#include "simdjson/stage1_find_marks_flatten_haswell.h"
|
7
|
+
#include "simdjson/stage1_find_marks_macros.h"
|
8
|
+
|
9
|
+
#ifdef IS_X86_64
|
10
|
+
|
11
|
+
TARGET_HASWELL
|
12
|
+
namespace simdjson {
|
13
|
+
template <> struct simd_input<Architecture::HASWELL> {
|
14
|
+
__m256i lo;
|
15
|
+
__m256i hi;
|
16
|
+
};
|
17
|
+
|
18
|
+
template <>
|
19
|
+
really_inline simd_input<Architecture::HASWELL>
|
20
|
+
fill_input<Architecture::HASWELL>(const uint8_t *ptr) {
|
21
|
+
struct simd_input<Architecture::HASWELL> in;
|
22
|
+
in.lo = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 0));
|
23
|
+
in.hi = _mm256_loadu_si256(reinterpret_cast<const __m256i *>(ptr + 32));
|
24
|
+
return in;
|
25
|
+
}
|
26
|
+
|
27
|
+
template <>
|
28
|
+
really_inline uint64_t
|
29
|
+
compute_quote_mask<Architecture::HASWELL>(uint64_t quote_bits) {
|
30
|
+
// There should be no such thing with a processing supporting avx2
|
31
|
+
// but not clmul.
|
32
|
+
uint64_t quote_mask = _mm_cvtsi128_si64(_mm_clmulepi64_si128(
|
33
|
+
_mm_set_epi64x(0ULL, quote_bits), _mm_set1_epi8(0xFFu), 0));
|
34
|
+
return quote_mask;
|
35
|
+
}
|
36
|
+
|
37
|
+
template <> struct utf8_checking_state<Architecture::HASWELL> {
|
38
|
+
__m256i has_error;
|
39
|
+
avx_processed_utf_bytes previous;
|
40
|
+
utf8_checking_state() {
|
41
|
+
has_error = _mm256_setzero_si256();
|
42
|
+
previous.raw_bytes = _mm256_setzero_si256();
|
43
|
+
previous.high_nibbles = _mm256_setzero_si256();
|
44
|
+
previous.carried_continuations = _mm256_setzero_si256();
|
45
|
+
}
|
46
|
+
};
|
47
|
+
|
48
|
+
template <>
|
49
|
+
really_inline void check_utf8<Architecture::HASWELL>(
|
50
|
+
simd_input<Architecture::HASWELL> in,
|
51
|
+
utf8_checking_state<Architecture::HASWELL> &state) {
|
52
|
+
__m256i high_bit = _mm256_set1_epi8(0x80u);
|
53
|
+
if ((_mm256_testz_si256(_mm256_or_si256(in.lo, in.hi), high_bit)) == 1) {
|
54
|
+
// it is ascii, we just check continuation
|
55
|
+
state.has_error = _mm256_or_si256(
|
56
|
+
_mm256_cmpgt_epi8(state.previous.carried_continuations,
|
57
|
+
_mm256_setr_epi8(9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
58
|
+
9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9, 9,
|
59
|
+
9, 9, 9, 9, 9, 9, 9, 1)),
|
60
|
+
state.has_error);
|
61
|
+
} else {
|
62
|
+
// it is not ascii so we have to do heavy work
|
63
|
+
state.previous =
|
64
|
+
avx_check_utf8_bytes(in.lo, &(state.previous), &(state.has_error));
|
65
|
+
state.previous =
|
66
|
+
avx_check_utf8_bytes(in.hi, &(state.previous), &(state.has_error));
|
67
|
+
}
|
68
|
+
}
|
69
|
+
|
70
|
+
template <>
|
71
|
+
really_inline ErrorValues check_utf8_errors<Architecture::HASWELL>(
|
72
|
+
utf8_checking_state<Architecture::HASWELL> &state) {
|
73
|
+
return _mm256_testz_si256(state.has_error, state.has_error) == 0
|
74
|
+
? simdjson::UTF8_ERROR
|
75
|
+
: simdjson::SUCCESS;
|
76
|
+
}
|
77
|
+
|
78
|
+
template <>
|
79
|
+
really_inline uint64_t cmp_mask_against_input<Architecture::HASWELL>(
|
80
|
+
simd_input<Architecture::HASWELL> in, uint8_t m) {
|
81
|
+
const __m256i mask = _mm256_set1_epi8(m);
|
82
|
+
__m256i cmp_res_0 = _mm256_cmpeq_epi8(in.lo, mask);
|
83
|
+
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
84
|
+
__m256i cmp_res_1 = _mm256_cmpeq_epi8(in.hi, mask);
|
85
|
+
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
86
|
+
return res_0 | (res_1 << 32);
|
87
|
+
}
|
88
|
+
|
89
|
+
template <>
|
90
|
+
really_inline uint64_t unsigned_lteq_against_input<Architecture::HASWELL>(
|
91
|
+
simd_input<Architecture::HASWELL> in, uint8_t m) {
|
92
|
+
const __m256i maxval = _mm256_set1_epi8(m);
|
93
|
+
__m256i cmp_res_0 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.lo), maxval);
|
94
|
+
uint64_t res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(cmp_res_0));
|
95
|
+
__m256i cmp_res_1 = _mm256_cmpeq_epi8(_mm256_max_epu8(maxval, in.hi), maxval);
|
96
|
+
uint64_t res_1 = _mm256_movemask_epi8(cmp_res_1);
|
97
|
+
return res_0 | (res_1 << 32);
|
98
|
+
}
|
99
|
+
|
100
|
+
template <>
|
101
|
+
really_inline uint64_t find_odd_backslash_sequences<Architecture::HASWELL>(
|
102
|
+
simd_input<Architecture::HASWELL> in,
|
103
|
+
uint64_t &prev_iter_ends_odd_backslash) {
|
104
|
+
FIND_ODD_BACKSLASH_SEQUENCES(Architecture::HASWELL, in,
|
105
|
+
prev_iter_ends_odd_backslash);
|
106
|
+
}
|
107
|
+
|
108
|
+
template <>
|
109
|
+
really_inline uint64_t find_quote_mask_and_bits<Architecture::HASWELL>(
|
110
|
+
simd_input<Architecture::HASWELL> in, uint64_t odd_ends,
|
111
|
+
uint64_t &prev_iter_inside_quote, uint64_t "e_bits,
|
112
|
+
uint64_t &error_mask) {
|
113
|
+
FIND_QUOTE_MASK_AND_BITS(Architecture::HASWELL, in, odd_ends,
|
114
|
+
prev_iter_inside_quote, quote_bits, error_mask)
|
115
|
+
}
|
116
|
+
|
117
|
+
template <>
|
118
|
+
really_inline void find_whitespace_and_structurals<Architecture::HASWELL>(
|
119
|
+
simd_input<Architecture::HASWELL> in, uint64_t &whitespace,
|
120
|
+
uint64_t &structurals) {
|
121
|
+
#ifdef SIMDJSON_NAIVE_STRUCTURAL
|
122
|
+
// You should never need this naive approach, but it can be useful
|
123
|
+
// for research purposes
|
124
|
+
const __m256i mask_open_brace = _mm256_set1_epi8(0x7b);
|
125
|
+
__m256i struct_lo = _mm256_cmpeq_epi8(in.lo, mask_open_brace);
|
126
|
+
__m256i struct_hi = _mm256_cmpeq_epi8(in.hi, mask_open_brace);
|
127
|
+
const __m256i mask_close_brace = _mm256_set1_epi8(0x7d);
|
128
|
+
struct_lo =
|
129
|
+
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_brace));
|
130
|
+
struct_hi =
|
131
|
+
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_brace));
|
132
|
+
const __m256i mask_open_bracket = _mm256_set1_epi8(0x5b);
|
133
|
+
struct_lo =
|
134
|
+
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_open_bracket));
|
135
|
+
struct_hi =
|
136
|
+
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_open_bracket));
|
137
|
+
const __m256i mask_close_bracket = _mm256_set1_epi8(0x5d);
|
138
|
+
struct_lo =
|
139
|
+
_mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_close_bracket));
|
140
|
+
struct_hi =
|
141
|
+
_mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_close_bracket));
|
142
|
+
const __m256i mask_column = _mm256_set1_epi8(0x3a);
|
143
|
+
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_column));
|
144
|
+
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_column));
|
145
|
+
const __m256i mask_comma = _mm256_set1_epi8(0x2c);
|
146
|
+
struct_lo = _mm256_or_si256(struct_lo, _mm256_cmpeq_epi8(in.lo, mask_comma));
|
147
|
+
struct_hi = _mm256_or_si256(struct_hi, _mm256_cmpeq_epi8(in.hi, mask_comma));
|
148
|
+
uint64_t structural_res_0 =
|
149
|
+
static_cast<uint32_t>(_mm256_movemask_epi8(struct_lo));
|
150
|
+
uint64_t structural_res_1 = _mm256_movemask_epi8(struct_hi);
|
151
|
+
structurals = (structural_res_0 | (structural_res_1 << 32));
|
152
|
+
|
153
|
+
const __m256i mask_space = _mm256_set1_epi8(0x20);
|
154
|
+
__m256i space_lo = _mm256_cmpeq_epi8(in.lo, mask_space);
|
155
|
+
__m256i space_hi = _mm256_cmpeq_epi8(in.hi, mask_space);
|
156
|
+
const __m256i mask_linefeed = _mm256_set1_epi8(0x0a);
|
157
|
+
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_linefeed));
|
158
|
+
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_linefeed));
|
159
|
+
const __m256i mask_tab = _mm256_set1_epi8(0x09);
|
160
|
+
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_tab));
|
161
|
+
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_tab));
|
162
|
+
const __m256i mask_carriage = _mm256_set1_epi8(0x0d);
|
163
|
+
space_lo = _mm256_or_si256(space_lo, _mm256_cmpeq_epi8(in.lo, mask_carriage));
|
164
|
+
space_hi = _mm256_or_si256(space_hi, _mm256_cmpeq_epi8(in.hi, mask_carriage));
|
165
|
+
|
166
|
+
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(space_lo));
|
167
|
+
uint64_t ws_res_1 = _mm256_movemask_epi8(space_hi);
|
168
|
+
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
169
|
+
// end of naive approach
|
170
|
+
|
171
|
+
#else // SIMDJSON_NAIVE_STRUCTURAL
|
172
|
+
// clang-format off
|
173
|
+
const __m256i structural_table =
|
174
|
+
_mm256_setr_epi8(44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123,
|
175
|
+
44, 125, 0, 0, 0xc0u, 0, 0, 0, 0, 0, 0, 0, 0, 0, 58, 123);
|
176
|
+
const __m256i white_table = _mm256_setr_epi8(
|
177
|
+
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100,
|
178
|
+
32, 100, 100, 100, 17, 100, 113, 2, 100, 9, 10, 112, 100, 13, 100, 100);
|
179
|
+
// clang-format on
|
180
|
+
const __m256i struct_offset = _mm256_set1_epi8(0xd4u);
|
181
|
+
const __m256i struct_mask = _mm256_set1_epi8(32);
|
182
|
+
|
183
|
+
__m256i lo_white =
|
184
|
+
_mm256_cmpeq_epi8(in.lo, _mm256_shuffle_epi8(white_table, in.lo));
|
185
|
+
__m256i hi_white =
|
186
|
+
_mm256_cmpeq_epi8(in.hi, _mm256_shuffle_epi8(white_table, in.hi));
|
187
|
+
uint64_t ws_res_0 = static_cast<uint32_t>(_mm256_movemask_epi8(lo_white));
|
188
|
+
uint64_t ws_res_1 = _mm256_movemask_epi8(hi_white);
|
189
|
+
whitespace = (ws_res_0 | (ws_res_1 << 32));
|
190
|
+
__m256i lo_struct_r1 = _mm256_add_epi8(struct_offset, in.lo);
|
191
|
+
__m256i hi_struct_r1 = _mm256_add_epi8(struct_offset, in.hi);
|
192
|
+
__m256i lo_struct_r2 = _mm256_or_si256(in.lo, struct_mask);
|
193
|
+
__m256i hi_struct_r2 = _mm256_or_si256(in.hi, struct_mask);
|
194
|
+
__m256i lo_struct_r3 = _mm256_shuffle_epi8(structural_table, lo_struct_r1);
|
195
|
+
__m256i hi_struct_r3 = _mm256_shuffle_epi8(structural_table, hi_struct_r1);
|
196
|
+
__m256i lo_struct = _mm256_cmpeq_epi8(lo_struct_r2, lo_struct_r3);
|
197
|
+
__m256i hi_struct = _mm256_cmpeq_epi8(hi_struct_r2, hi_struct_r3);
|
198
|
+
|
199
|
+
uint64_t structural_res_0 =
|
200
|
+
static_cast<uint32_t>(_mm256_movemask_epi8(lo_struct));
|
201
|
+
uint64_t structural_res_1 = _mm256_movemask_epi8(hi_struct);
|
202
|
+
structurals = (structural_res_0 | (structural_res_1 << 32));
|
203
|
+
#endif // SIMDJSON_NAIVE_STRUCTURAL
|
204
|
+
}
|
205
|
+
|
206
|
+
} // namespace simdjson
|
207
|
+
UNTARGET_REGION
|
208
|
+
|
209
|
+
#endif // IS_X86_64
|
210
|
+
#endif // SIMDJSON_STAGE1_FIND_MARKS_HASWELL_H
|