simdjson 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/.clang-format +5 -0
- data/.gitignore +14 -0
- data/.gitmodules +3 -0
- data/.rubocop.yml +9 -0
- data/.travis.yml +7 -0
- data/Gemfile +4 -0
- data/LICENSE.txt +21 -0
- data/README.md +39 -0
- data/Rakefile +32 -0
- data/benchmark/apache_builds.json +4421 -0
- data/benchmark/demo.json +15 -0
- data/benchmark/github_events.json +1390 -0
- data/benchmark/run_benchmark.rb +30 -0
- data/ext/simdjson/extconf.rb +22 -0
- data/ext/simdjson/simdjson.cpp +76 -0
- data/ext/simdjson/simdjson.hpp +6 -0
- data/lib/simdjson/version.rb +3 -0
- data/lib/simdjson.rb +2 -0
- data/simdjson.gemspec +35 -0
- data/vendor/.gitkeep +0 -0
- data/vendor/simdjson/AUTHORS +3 -0
- data/vendor/simdjson/CMakeLists.txt +63 -0
- data/vendor/simdjson/CONTRIBUTORS +27 -0
- data/vendor/simdjson/Dockerfile +10 -0
- data/vendor/simdjson/LICENSE +201 -0
- data/vendor/simdjson/Makefile +203 -0
- data/vendor/simdjson/Notes.md +85 -0
- data/vendor/simdjson/README.md +581 -0
- data/vendor/simdjson/amalgamation.sh +158 -0
- data/vendor/simdjson/benchmark/CMakeLists.txt +8 -0
- data/vendor/simdjson/benchmark/benchmark.h +223 -0
- data/vendor/simdjson/benchmark/distinctuseridcompetition.cpp +347 -0
- data/vendor/simdjson/benchmark/linux/linux-perf-events.h +93 -0
- data/vendor/simdjson/benchmark/minifiercompetition.cpp +181 -0
- data/vendor/simdjson/benchmark/parse.cpp +393 -0
- data/vendor/simdjson/benchmark/parseandstatcompetition.cpp +305 -0
- data/vendor/simdjson/benchmark/parsingcompetition.cpp +298 -0
- data/vendor/simdjson/benchmark/statisticalmodel.cpp +208 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json-forwards.h +344 -0
- data/vendor/simdjson/dependencies/jsoncppdist/json/json.h +2366 -0
- data/vendor/simdjson/dependencies/jsoncppdist/jsoncpp.cpp +5418 -0
- data/vendor/simdjson/doc/apache_builds.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/gbps.png +0 -0
- data/vendor/simdjson/doc/github_events.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/twitter.jsonparseandstat.png +0 -0
- data/vendor/simdjson/doc/update-center.jsonparseandstat.png +0 -0
- data/vendor/simdjson/images/halvarflake.png +0 -0
- data/vendor/simdjson/images/logo.png +0 -0
- data/vendor/simdjson/include/simdjson/common_defs.h +102 -0
- data/vendor/simdjson/include/simdjson/isadetection.h +152 -0
- data/vendor/simdjson/include/simdjson/jsoncharutils.h +301 -0
- data/vendor/simdjson/include/simdjson/jsonformatutils.h +202 -0
- data/vendor/simdjson/include/simdjson/jsonioutil.h +32 -0
- data/vendor/simdjson/include/simdjson/jsonminifier.h +30 -0
- data/vendor/simdjson/include/simdjson/jsonparser.h +250 -0
- data/vendor/simdjson/include/simdjson/numberparsing.h +587 -0
- data/vendor/simdjson/include/simdjson/padded_string.h +70 -0
- data/vendor/simdjson/include/simdjson/parsedjson.h +544 -0
- data/vendor/simdjson/include/simdjson/portability.h +172 -0
- data/vendor/simdjson/include/simdjson/simdjson.h +44 -0
- data/vendor/simdjson/include/simdjson/simdjson_version.h +13 -0
- data/vendor/simdjson/include/simdjson/simdprune_tables.h +35074 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_arm64.h +180 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_haswell.h +198 -0
- data/vendor/simdjson/include/simdjson/simdutf8check_westmere.h +169 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks.h +121 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_arm64.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten.h +93 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_flatten_haswell.h +95 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_haswell.h +210 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_macros.h +239 -0
- data/vendor/simdjson/include/simdjson/stage1_find_marks_westmere.h +194 -0
- data/vendor/simdjson/include/simdjson/stage2_build_tape.h +85 -0
- data/vendor/simdjson/include/simdjson/stringparsing.h +105 -0
- data/vendor/simdjson/include/simdjson/stringparsing_arm64.h +56 -0
- data/vendor/simdjson/include/simdjson/stringparsing_haswell.h +43 -0
- data/vendor/simdjson/include/simdjson/stringparsing_macros.h +88 -0
- data/vendor/simdjson/include/simdjson/stringparsing_westmere.h +41 -0
- data/vendor/simdjson/jsonexamples/small/jsoniter_scala/README.md +4 -0
- data/vendor/simdjson/scripts/dumpsimplestats.sh +11 -0
- data/vendor/simdjson/scripts/issue150.sh +14 -0
- data/vendor/simdjson/scripts/javascript/README.md +3 -0
- data/vendor/simdjson/scripts/javascript/generatelargejson.js +19 -0
- data/vendor/simdjson/scripts/minifier.sh +11 -0
- data/vendor/simdjson/scripts/parseandstat.sh +24 -0
- data/vendor/simdjson/scripts/parser.sh +11 -0
- data/vendor/simdjson/scripts/parsingcompdata.sh +26 -0
- data/vendor/simdjson/scripts/plotparse.sh +98 -0
- data/vendor/simdjson/scripts/selectparser.sh +11 -0
- data/vendor/simdjson/scripts/setupfortesting/disablehyperthreading.sh +15 -0
- data/vendor/simdjson/scripts/setupfortesting/powerpolicy.sh +32 -0
- data/vendor/simdjson/scripts/setupfortesting/setupfortesting.sh +6 -0
- data/vendor/simdjson/scripts/setupfortesting/turboboost.sh +51 -0
- data/vendor/simdjson/scripts/testjson2json.sh +99 -0
- data/vendor/simdjson/scripts/transitions/Makefile +10 -0
- data/vendor/simdjson/scripts/transitions/generatetransitions.cpp +20 -0
- data/vendor/simdjson/singleheader/README.md +1 -0
- data/vendor/simdjson/singleheader/amalgamation_demo.cpp +20 -0
- data/vendor/simdjson/singleheader/simdjson.cpp +1652 -0
- data/vendor/simdjson/singleheader/simdjson.h +39692 -0
- data/vendor/simdjson/src/CMakeLists.txt +67 -0
- data/vendor/simdjson/src/jsonioutil.cpp +35 -0
- data/vendor/simdjson/src/jsonminifier.cpp +285 -0
- data/vendor/simdjson/src/jsonparser.cpp +91 -0
- data/vendor/simdjson/src/parsedjson.cpp +323 -0
- data/vendor/simdjson/src/parsedjsoniterator.cpp +272 -0
- data/vendor/simdjson/src/simdjson.cpp +30 -0
- data/vendor/simdjson/src/stage1_find_marks.cpp +41 -0
- data/vendor/simdjson/src/stage2_build_tape.cpp +567 -0
- data/vendor/simdjson/style/clang-format-check.sh +25 -0
- data/vendor/simdjson/style/clang-format.sh +25 -0
- data/vendor/simdjson/style/run-clang-format.py +326 -0
- data/vendor/simdjson/tape.md +134 -0
- data/vendor/simdjson/tests/CMakeLists.txt +25 -0
- data/vendor/simdjson/tests/allparserscheckfile.cpp +192 -0
- data/vendor/simdjson/tests/basictests.cpp +75 -0
- data/vendor/simdjson/tests/jsoncheck.cpp +136 -0
- data/vendor/simdjson/tests/numberparsingcheck.cpp +224 -0
- data/vendor/simdjson/tests/pointercheck.cpp +38 -0
- data/vendor/simdjson/tests/singleheadertest.cpp +22 -0
- data/vendor/simdjson/tests/stringparsingcheck.cpp +408 -0
- data/vendor/simdjson/tools/CMakeLists.txt +3 -0
- data/vendor/simdjson/tools/cmake/FindCTargets.cmake +15 -0
- data/vendor/simdjson/tools/cmake/FindOptions.cmake +52 -0
- data/vendor/simdjson/tools/json2json.cpp +112 -0
- data/vendor/simdjson/tools/jsonpointer.cpp +93 -0
- data/vendor/simdjson/tools/jsonstats.cpp +143 -0
- data/vendor/simdjson/tools/minify.cpp +21 -0
- data/vendor/simdjson/tools/release.py +125 -0
- data/vendor/simdjson/windows/dirent_portable.h +1043 -0
- metadata +273 -0
@@ -0,0 +1,180 @@
|
|
1
|
+
// From https://github.com/cyb70289/utf8/blob/master/lemire-neon.c
|
2
|
+
// Adapted from https://github.com/lemire/fastvalidate-utf-8
|
3
|
+
|
4
|
+
#ifndef SIMDJSON_SIMDUTF8CHECK_ARM64_H
|
5
|
+
#define SIMDJSON_SIMDUTF8CHECK_ARM64_H
|
6
|
+
|
7
|
+
#if defined(_ARM_NEON) || defined(__aarch64__) || \
|
8
|
+
(defined(_MSC_VER) && defined(_M_ARM64))
|
9
|
+
|
10
|
+
#include <arm_neon.h>
|
11
|
+
#include <cinttypes>
|
12
|
+
#include <cstddef>
|
13
|
+
#include <cstdint>
|
14
|
+
#include <cstdio>
|
15
|
+
#include <cstring>
|
16
|
+
|
17
|
+
/*
|
18
|
+
* legal utf-8 byte sequence
|
19
|
+
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
20
|
+
*
|
21
|
+
* Code Points 1st 2s 3s 4s
|
22
|
+
* U+0000..U+007F 00..7F
|
23
|
+
* U+0080..U+07FF C2..DF 80..BF
|
24
|
+
* U+0800..U+0FFF E0 A0..BF 80..BF
|
25
|
+
* U+1000..U+CFFF E1..EC 80..BF 80..BF
|
26
|
+
* U+D000..U+D7FF ED 80..9F 80..BF
|
27
|
+
* U+E000..U+FFFF EE..EF 80..BF 80..BF
|
28
|
+
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
29
|
+
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
30
|
+
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
31
|
+
*
|
32
|
+
*/
|
33
|
+
namespace simdjson {
|
34
|
+
|
35
|
+
// all byte values must be no larger than 0xF4
|
36
|
+
static inline void check_smaller_than_0xF4(int8x16_t current_bytes,
|
37
|
+
int8x16_t *has_error) {
|
38
|
+
// unsigned, saturates to 0 below max
|
39
|
+
*has_error = vorrq_s8(
|
40
|
+
*has_error, vreinterpretq_s8_u8(vqsubq_u8(
|
41
|
+
vreinterpretq_u8_s8(current_bytes), vdupq_n_u8(0xF4))));
|
42
|
+
}
|
43
|
+
|
44
|
+
static const int8_t _nibbles[] = {
|
45
|
+
1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
|
46
|
+
0, 0, 0, 0, // 10xx (continuation)
|
47
|
+
2, 2, // 110x
|
48
|
+
3, // 1110
|
49
|
+
4, // 1111, next should be 0 (not checked here)
|
50
|
+
};
|
51
|
+
|
52
|
+
static inline int8x16_t continuation_lengths(int8x16_t high_nibbles) {
|
53
|
+
return vqtbl1q_s8(vld1q_s8(_nibbles), vreinterpretq_u8_s8(high_nibbles));
|
54
|
+
}
|
55
|
+
|
56
|
+
static inline int8x16_t carry_continuations(int8x16_t initial_lengths,
|
57
|
+
int8x16_t previous_carries) {
|
58
|
+
|
59
|
+
int8x16_t right1 = vreinterpretq_s8_u8(vqsubq_u8(
|
60
|
+
vreinterpretq_u8_s8(vextq_s8(previous_carries, initial_lengths, 16 - 1)),
|
61
|
+
vdupq_n_u8(1)));
|
62
|
+
int8x16_t sum = vaddq_s8(initial_lengths, right1);
|
63
|
+
|
64
|
+
int8x16_t right2 = vreinterpretq_s8_u8(
|
65
|
+
vqsubq_u8(vreinterpretq_u8_s8(vextq_s8(previous_carries, sum, 16 - 2)),
|
66
|
+
vdupq_n_u8(2)));
|
67
|
+
return vaddq_s8(sum, right2);
|
68
|
+
}
|
69
|
+
|
70
|
+
static inline void check_continuations(int8x16_t initial_lengths,
|
71
|
+
int8x16_t carries,
|
72
|
+
int8x16_t *has_error) {
|
73
|
+
|
74
|
+
// overlap || underlap
|
75
|
+
// carry > length && length > 0 || !(carry > length) && !(length > 0)
|
76
|
+
// (carries > length) == (lengths > 0)
|
77
|
+
uint8x16_t overunder = vceqq_u8(vcgtq_s8(carries, initial_lengths),
|
78
|
+
vcgtq_s8(initial_lengths, vdupq_n_s8(0)));
|
79
|
+
|
80
|
+
*has_error = vorrq_s8(*has_error, vreinterpretq_s8_u8(overunder));
|
81
|
+
}
|
82
|
+
|
83
|
+
// when 0xED is found, next byte must be no larger than 0x9F
|
84
|
+
// when 0xF4 is found, next byte must be no larger than 0x8F
|
85
|
+
// next byte must be continuation, ie sign bit is set, so signed < is ok
|
86
|
+
static inline void check_first_continuation_max(int8x16_t current_bytes,
|
87
|
+
int8x16_t off1_current_bytes,
|
88
|
+
int8x16_t *has_error) {
|
89
|
+
uint8x16_t maskED = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xED));
|
90
|
+
uint8x16_t maskF4 = vceqq_s8(off1_current_bytes, vdupq_n_s8(0xF4));
|
91
|
+
|
92
|
+
uint8x16_t badfollowED =
|
93
|
+
vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(0x9F)), maskED);
|
94
|
+
uint8x16_t badfollowF4 =
|
95
|
+
vandq_u8(vcgtq_s8(current_bytes, vdupq_n_s8(0x8F)), maskF4);
|
96
|
+
|
97
|
+
*has_error = vorrq_s8(
|
98
|
+
*has_error, vreinterpretq_s8_u8(vorrq_u8(badfollowED, badfollowF4)));
|
99
|
+
}
|
100
|
+
|
101
|
+
static const int8_t _initial_mins[] = {
|
102
|
+
-128, -128, -128, -128, -128, -128,
|
103
|
+
-128, -128, -128, -128, -128, -128, // 10xx => false
|
104
|
+
(int8_t)0xC2, -128, // 110x
|
105
|
+
(int8_t)0xE1, // 1110
|
106
|
+
(int8_t)0xF1,
|
107
|
+
};
|
108
|
+
|
109
|
+
static const int8_t _second_mins[] = {
|
110
|
+
-128, -128, -128, -128, -128, -128,
|
111
|
+
-128, -128, -128, -128, -128, -128, // 10xx => false
|
112
|
+
127, 127, // 110x => true
|
113
|
+
(int8_t)0xA0, // 1110
|
114
|
+
(int8_t)0x90,
|
115
|
+
};
|
116
|
+
|
117
|
+
// map off1_hibits => error condition
|
118
|
+
// hibits off1 cur
|
119
|
+
// C => < C2 && true
|
120
|
+
// E => < E1 && < A0
|
121
|
+
// F => < F1 && < 90
|
122
|
+
// else false && false
|
123
|
+
static inline void check_overlong(int8x16_t current_bytes,
|
124
|
+
int8x16_t off1_current_bytes,
|
125
|
+
int8x16_t hibits, int8x16_t previous_hibits,
|
126
|
+
int8x16_t *has_error) {
|
127
|
+
int8x16_t off1_hibits = vextq_s8(previous_hibits, hibits, 16 - 1);
|
128
|
+
int8x16_t initial_mins =
|
129
|
+
vqtbl1q_s8(vld1q_s8(_initial_mins), vreinterpretq_u8_s8(off1_hibits));
|
130
|
+
|
131
|
+
uint8x16_t initial_under = vcgtq_s8(initial_mins, off1_current_bytes);
|
132
|
+
|
133
|
+
int8x16_t second_mins =
|
134
|
+
vqtbl1q_s8(vld1q_s8(_second_mins), vreinterpretq_u8_s8(off1_hibits));
|
135
|
+
uint8x16_t second_under = vcgtq_s8(second_mins, current_bytes);
|
136
|
+
*has_error = vorrq_s8(
|
137
|
+
*has_error, vreinterpretq_s8_u8(vandq_u8(initial_under, second_under)));
|
138
|
+
}
|
139
|
+
|
140
|
+
struct processed_utf_bytes {
|
141
|
+
int8x16_t raw_bytes;
|
142
|
+
int8x16_t high_nibbles;
|
143
|
+
int8x16_t carried_continuations;
|
144
|
+
};
|
145
|
+
|
146
|
+
static inline void count_nibbles(int8x16_t bytes,
|
147
|
+
struct processed_utf_bytes *answer) {
|
148
|
+
answer->raw_bytes = bytes;
|
149
|
+
answer->high_nibbles =
|
150
|
+
vreinterpretq_s8_u8(vshrq_n_u8(vreinterpretq_u8_s8(bytes), 4));
|
151
|
+
}
|
152
|
+
|
153
|
+
// check whether the current bytes are valid UTF-8
|
154
|
+
// at the end of the function, previous gets updated
|
155
|
+
static inline struct processed_utf_bytes
|
156
|
+
check_utf8_bytes(int8x16_t current_bytes, struct processed_utf_bytes *previous,
|
157
|
+
int8x16_t *has_error) {
|
158
|
+
struct processed_utf_bytes pb;
|
159
|
+
count_nibbles(current_bytes, &pb);
|
160
|
+
|
161
|
+
check_smaller_than_0xF4(current_bytes, has_error);
|
162
|
+
|
163
|
+
int8x16_t initial_lengths = continuation_lengths(pb.high_nibbles);
|
164
|
+
|
165
|
+
pb.carried_continuations =
|
166
|
+
carry_continuations(initial_lengths, previous->carried_continuations);
|
167
|
+
|
168
|
+
check_continuations(initial_lengths, pb.carried_continuations, has_error);
|
169
|
+
|
170
|
+
int8x16_t off1_current_bytes =
|
171
|
+
vextq_s8(previous->raw_bytes, pb.raw_bytes, 16 - 1);
|
172
|
+
check_first_continuation_max(current_bytes, off1_current_bytes, has_error);
|
173
|
+
|
174
|
+
check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles,
|
175
|
+
previous->high_nibbles, has_error);
|
176
|
+
return pb;
|
177
|
+
}
|
178
|
+
} // namespace simdjson
|
179
|
+
#endif
|
180
|
+
#endif
|
@@ -0,0 +1,198 @@
|
|
1
|
+
#ifndef SIMDJSON_SIMDUTF8CHECK_HASWELL_H
|
2
|
+
#define SIMDJSON_SIMDUTF8CHECK_HASWELL_H
|
3
|
+
|
4
|
+
#include "simdjson/portability.h"
|
5
|
+
#include <stddef.h>
|
6
|
+
#include <stdint.h>
|
7
|
+
#include <string.h>
|
8
|
+
|
9
|
+
#ifdef IS_X86_64
|
10
|
+
/*
|
11
|
+
* legal utf-8 byte sequence
|
12
|
+
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
13
|
+
*
|
14
|
+
* Code Points 1st 2s 3s 4s
|
15
|
+
* U+0000..U+007F 00..7F
|
16
|
+
* U+0080..U+07FF C2..DF 80..BF
|
17
|
+
* U+0800..U+0FFF E0 A0..BF 80..BF
|
18
|
+
* U+1000..U+CFFF E1..EC 80..BF 80..BF
|
19
|
+
* U+D000..U+D7FF ED 80..9F 80..BF
|
20
|
+
* U+E000..U+FFFF EE..EF 80..BF 80..BF
|
21
|
+
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
22
|
+
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
23
|
+
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
24
|
+
*
|
25
|
+
*/
|
26
|
+
|
27
|
+
// all byte values must be no larger than 0xF4
|
28
|
+
|
29
|
+
TARGET_HASWELL
|
30
|
+
namespace simdjson {
|
31
|
+
static inline __m256i push_last_byte_of_a_to_b(__m256i a, __m256i b) {
|
32
|
+
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 15);
|
33
|
+
}
|
34
|
+
|
35
|
+
static inline __m256i push_last_2bytes_of_a_to_b(__m256i a, __m256i b) {
|
36
|
+
return _mm256_alignr_epi8(b, _mm256_permute2x128_si256(a, b, 0x21), 14);
|
37
|
+
}
|
38
|
+
|
39
|
+
// all byte values must be no larger than 0xF4
|
40
|
+
static inline void avx_check_smaller_than_0xF4(__m256i current_bytes,
|
41
|
+
__m256i *has_error) {
|
42
|
+
// unsigned, saturates to 0 below max
|
43
|
+
*has_error = _mm256_or_si256(
|
44
|
+
*has_error, _mm256_subs_epu8(current_bytes, _mm256_set1_epi8(0xF4u)));
|
45
|
+
}
|
46
|
+
|
47
|
+
static inline __m256i avx_continuation_lengths(__m256i high_nibbles) {
|
48
|
+
return _mm256_shuffle_epi8(
|
49
|
+
_mm256_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
|
50
|
+
0, 0, 0, 0, // 10xx (continuation)
|
51
|
+
2, 2, // 110x
|
52
|
+
3, // 1110
|
53
|
+
4, // 1111, next should be 0 (not checked here)
|
54
|
+
1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
|
55
|
+
0, 0, 0, 0, // 10xx (continuation)
|
56
|
+
2, 2, // 110x
|
57
|
+
3, // 1110
|
58
|
+
4 // 1111, next should be 0 (not checked here)
|
59
|
+
),
|
60
|
+
high_nibbles);
|
61
|
+
}
|
62
|
+
|
63
|
+
static inline __m256i avx_carry_continuations(__m256i initial_lengths,
|
64
|
+
__m256i previous_carries) {
|
65
|
+
|
66
|
+
__m256i right1 = _mm256_subs_epu8(
|
67
|
+
push_last_byte_of_a_to_b(previous_carries, initial_lengths),
|
68
|
+
_mm256_set1_epi8(1));
|
69
|
+
__m256i sum = _mm256_add_epi8(initial_lengths, right1);
|
70
|
+
|
71
|
+
__m256i right2 = _mm256_subs_epu8(
|
72
|
+
push_last_2bytes_of_a_to_b(previous_carries, sum), _mm256_set1_epi8(2));
|
73
|
+
return _mm256_add_epi8(sum, right2);
|
74
|
+
}
|
75
|
+
|
76
|
+
static inline void avx_check_continuations(__m256i initial_lengths,
|
77
|
+
__m256i carries,
|
78
|
+
__m256i *has_error) {
|
79
|
+
|
80
|
+
// overlap || underlap
|
81
|
+
// carry > length && length > 0 || !(carry > length) && !(length > 0)
|
82
|
+
// (carries > length) == (lengths > 0)
|
83
|
+
__m256i overunder = _mm256_cmpeq_epi8(
|
84
|
+
_mm256_cmpgt_epi8(carries, initial_lengths),
|
85
|
+
_mm256_cmpgt_epi8(initial_lengths, _mm256_setzero_si256()));
|
86
|
+
|
87
|
+
*has_error = _mm256_or_si256(*has_error, overunder);
|
88
|
+
}
|
89
|
+
|
90
|
+
// when 0xED is found, next byte must be no larger than 0x9F
|
91
|
+
// when 0xF4 is found, next byte must be no larger than 0x8F
|
92
|
+
// next byte must be continuation, ie sign bit is set, so signed < is ok
|
93
|
+
static inline void avx_check_first_continuation_max(__m256i current_bytes,
|
94
|
+
__m256i off1_current_bytes,
|
95
|
+
__m256i *has_error) {
|
96
|
+
__m256i maskED =
|
97
|
+
_mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8(0xEDu));
|
98
|
+
__m256i maskF4 =
|
99
|
+
_mm256_cmpeq_epi8(off1_current_bytes, _mm256_set1_epi8(0xF4u));
|
100
|
+
|
101
|
+
__m256i badfollowED = _mm256_and_si256(
|
102
|
+
_mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8(0x9Fu)), maskED);
|
103
|
+
__m256i badfollowF4 = _mm256_and_si256(
|
104
|
+
_mm256_cmpgt_epi8(current_bytes, _mm256_set1_epi8(0x8Fu)), maskF4);
|
105
|
+
|
106
|
+
*has_error =
|
107
|
+
_mm256_or_si256(*has_error, _mm256_or_si256(badfollowED, badfollowF4));
|
108
|
+
}
|
109
|
+
|
110
|
+
// map off1_hibits => error condition
|
111
|
+
// hibits off1 cur
|
112
|
+
// C => < C2 && true
|
113
|
+
// E => < E1 && < A0
|
114
|
+
// F => < F1 && < 90
|
115
|
+
// else false && false
|
116
|
+
static inline void avx_check_overlong(__m256i current_bytes,
|
117
|
+
__m256i off1_current_bytes,
|
118
|
+
__m256i hibits, __m256i previous_hibits,
|
119
|
+
__m256i *has_error) {
|
120
|
+
__m256i off1_hibits = push_last_byte_of_a_to_b(previous_hibits, hibits);
|
121
|
+
__m256i initial_mins = _mm256_shuffle_epi8(
|
122
|
+
_mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
|
123
|
+
-128, -128, -128, // 10xx => false
|
124
|
+
0xC2u, -128, // 110x
|
125
|
+
0xE1u, // 1110
|
126
|
+
0xF1u, // 1111
|
127
|
+
-128, -128, -128, -128, -128, -128, -128, -128, -128,
|
128
|
+
-128, -128, -128, // 10xx => false
|
129
|
+
0xC2u, -128, // 110x
|
130
|
+
0xE1u, // 1110
|
131
|
+
0xF1u), // 1111
|
132
|
+
off1_hibits);
|
133
|
+
|
134
|
+
__m256i initial_under = _mm256_cmpgt_epi8(initial_mins, off1_current_bytes);
|
135
|
+
|
136
|
+
__m256i second_mins = _mm256_shuffle_epi8(
|
137
|
+
_mm256_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128,
|
138
|
+
-128, -128, -128, // 10xx => false
|
139
|
+
127, 127, // 110x => true
|
140
|
+
0xA0u, // 1110
|
141
|
+
0x90u, // 1111
|
142
|
+
-128, -128, -128, -128, -128, -128, -128, -128, -128,
|
143
|
+
-128, -128, -128, // 10xx => false
|
144
|
+
127, 127, // 110x => true
|
145
|
+
0xA0u, // 1110
|
146
|
+
0x90u), // 1111
|
147
|
+
off1_hibits);
|
148
|
+
__m256i second_under = _mm256_cmpgt_epi8(second_mins, current_bytes);
|
149
|
+
*has_error = _mm256_or_si256(*has_error,
|
150
|
+
_mm256_and_si256(initial_under, second_under));
|
151
|
+
}
|
152
|
+
|
153
|
+
struct avx_processed_utf_bytes {
|
154
|
+
__m256i raw_bytes;
|
155
|
+
__m256i high_nibbles;
|
156
|
+
__m256i carried_continuations;
|
157
|
+
};
|
158
|
+
|
159
|
+
static inline void avx_count_nibbles(__m256i bytes,
|
160
|
+
struct avx_processed_utf_bytes *answer) {
|
161
|
+
answer->raw_bytes = bytes;
|
162
|
+
answer->high_nibbles =
|
163
|
+
_mm256_and_si256(_mm256_srli_epi16(bytes, 4), _mm256_set1_epi8(0x0F));
|
164
|
+
}
|
165
|
+
|
166
|
+
// check whether the current bytes are valid UTF-8
|
167
|
+
// at the end of the function, previous gets updated
|
168
|
+
static inline struct avx_processed_utf_bytes
|
169
|
+
avx_check_utf8_bytes(__m256i current_bytes,
|
170
|
+
struct avx_processed_utf_bytes *previous,
|
171
|
+
__m256i *has_error) {
|
172
|
+
struct avx_processed_utf_bytes pb {};
|
173
|
+
avx_count_nibbles(current_bytes, &pb);
|
174
|
+
|
175
|
+
avx_check_smaller_than_0xF4(current_bytes, has_error);
|
176
|
+
|
177
|
+
__m256i initial_lengths = avx_continuation_lengths(pb.high_nibbles);
|
178
|
+
|
179
|
+
pb.carried_continuations =
|
180
|
+
avx_carry_continuations(initial_lengths, previous->carried_continuations);
|
181
|
+
|
182
|
+
avx_check_continuations(initial_lengths, pb.carried_continuations, has_error);
|
183
|
+
|
184
|
+
__m256i off1_current_bytes =
|
185
|
+
push_last_byte_of_a_to_b(previous->raw_bytes, pb.raw_bytes);
|
186
|
+
avx_check_first_continuation_max(current_bytes, off1_current_bytes,
|
187
|
+
has_error);
|
188
|
+
|
189
|
+
avx_check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles,
|
190
|
+
previous->high_nibbles, has_error);
|
191
|
+
return pb;
|
192
|
+
}
|
193
|
+
} // namespace simdjson
|
194
|
+
UNTARGET_REGION // haswell
|
195
|
+
|
196
|
+
#endif // IS_X86_64
|
197
|
+
|
198
|
+
#endif
|
@@ -0,0 +1,169 @@
|
|
1
|
+
#ifndef SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
|
2
|
+
#define SIMDJSON_SIMDUTF8CHECK_WESTMERE_H
|
3
|
+
|
4
|
+
#include "simdjson/portability.h"
|
5
|
+
#include <stddef.h>
|
6
|
+
#include <stdint.h>
|
7
|
+
#include <string.h>
|
8
|
+
#ifdef IS_X86_64
|
9
|
+
|
10
|
+
/*
|
11
|
+
* legal utf-8 byte sequence
|
12
|
+
* http://www.unicode.org/versions/Unicode6.0.0/ch03.pdf - page 94
|
13
|
+
*
|
14
|
+
* Code Points 1st 2s 3s 4s
|
15
|
+
* U+0000..U+007F 00..7F
|
16
|
+
* U+0080..U+07FF C2..DF 80..BF
|
17
|
+
* U+0800..U+0FFF E0 A0..BF 80..BF
|
18
|
+
* U+1000..U+CFFF E1..EC 80..BF 80..BF
|
19
|
+
* U+D000..U+D7FF ED 80..9F 80..BF
|
20
|
+
* U+E000..U+FFFF EE..EF 80..BF 80..BF
|
21
|
+
* U+10000..U+3FFFF F0 90..BF 80..BF 80..BF
|
22
|
+
* U+40000..U+FFFFF F1..F3 80..BF 80..BF 80..BF
|
23
|
+
* U+100000..U+10FFFF F4 80..8F 80..BF 80..BF
|
24
|
+
*
|
25
|
+
*/
|
26
|
+
|
27
|
+
// all byte values must be no larger than 0xF4
|
28
|
+
|
29
|
+
/********** sse code **********/
|
30
|
+
TARGET_WESTMERE
|
31
|
+
|
32
|
+
namespace simdjson {
|
33
|
+
// all byte values must be no larger than 0xF4
|
34
|
+
static inline void check_smaller_than_0xF4(__m128i current_bytes,
|
35
|
+
__m128i *has_error) {
|
36
|
+
// unsigned, saturates to 0 below max
|
37
|
+
*has_error = _mm_or_si128(*has_error,
|
38
|
+
_mm_subs_epu8(current_bytes, _mm_set1_epi8(0xF4u)));
|
39
|
+
}
|
40
|
+
|
41
|
+
static inline __m128i continuation_lengths(__m128i high_nibbles) {
|
42
|
+
return _mm_shuffle_epi8(
|
43
|
+
_mm_setr_epi8(1, 1, 1, 1, 1, 1, 1, 1, // 0xxx (ASCII)
|
44
|
+
0, 0, 0, 0, // 10xx (continuation)
|
45
|
+
2, 2, // 110x
|
46
|
+
3, // 1110
|
47
|
+
4), // 1111, next should be 0 (not checked here)
|
48
|
+
high_nibbles);
|
49
|
+
}
|
50
|
+
|
51
|
+
static inline __m128i carry_continuations(__m128i initial_lengths,
|
52
|
+
__m128i previous_carries) {
|
53
|
+
|
54
|
+
__m128i right1 =
|
55
|
+
_mm_subs_epu8(_mm_alignr_epi8(initial_lengths, previous_carries, 16 - 1),
|
56
|
+
_mm_set1_epi8(1));
|
57
|
+
__m128i sum = _mm_add_epi8(initial_lengths, right1);
|
58
|
+
|
59
|
+
__m128i right2 = _mm_subs_epu8(_mm_alignr_epi8(sum, previous_carries, 16 - 2),
|
60
|
+
_mm_set1_epi8(2));
|
61
|
+
return _mm_add_epi8(sum, right2);
|
62
|
+
}
|
63
|
+
|
64
|
+
static inline void check_continuations(__m128i initial_lengths, __m128i carries,
|
65
|
+
__m128i *has_error) {
|
66
|
+
|
67
|
+
// overlap || underlap
|
68
|
+
// carry > length && length > 0 || !(carry > length) && !(length > 0)
|
69
|
+
// (carries > length) == (lengths > 0)
|
70
|
+
__m128i overunder =
|
71
|
+
_mm_cmpeq_epi8(_mm_cmpgt_epi8(carries, initial_lengths),
|
72
|
+
_mm_cmpgt_epi8(initial_lengths, _mm_setzero_si128()));
|
73
|
+
|
74
|
+
*has_error = _mm_or_si128(*has_error, overunder);
|
75
|
+
}
|
76
|
+
|
77
|
+
// when 0xED is found, next byte must be no larger than 0x9F
|
78
|
+
// when 0xF4 is found, next byte must be no larger than 0x8F
|
79
|
+
// next byte must be continuation, ie sign bit is set, so signed < is ok
|
80
|
+
static inline void check_first_continuation_max(__m128i current_bytes,
|
81
|
+
__m128i off1_current_bytes,
|
82
|
+
__m128i *has_error) {
|
83
|
+
__m128i maskED = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xEDu));
|
84
|
+
__m128i maskF4 = _mm_cmpeq_epi8(off1_current_bytes, _mm_set1_epi8(0xF4u));
|
85
|
+
|
86
|
+
__m128i badfollowED = _mm_and_si128(
|
87
|
+
_mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x9Fu)), maskED);
|
88
|
+
__m128i badfollowF4 = _mm_and_si128(
|
89
|
+
_mm_cmpgt_epi8(current_bytes, _mm_set1_epi8(0x8Fu)), maskF4);
|
90
|
+
|
91
|
+
*has_error = _mm_or_si128(*has_error, _mm_or_si128(badfollowED, badfollowF4));
|
92
|
+
}
|
93
|
+
|
94
|
+
// map off1_hibits => error condition
|
95
|
+
// hibits off1 cur
|
96
|
+
// C => < C2 && true
|
97
|
+
// E => < E1 && < A0
|
98
|
+
// F => < F1 && < 90
|
99
|
+
// else false && false
|
100
|
+
static inline void check_overlong(__m128i current_bytes,
|
101
|
+
__m128i off1_current_bytes, __m128i hibits,
|
102
|
+
__m128i previous_hibits, __m128i *has_error) {
|
103
|
+
__m128i off1_hibits = _mm_alignr_epi8(hibits, previous_hibits, 16 - 1);
|
104
|
+
__m128i initial_mins = _mm_shuffle_epi8(
|
105
|
+
_mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
|
106
|
+
-128, -128, // 10xx => false
|
107
|
+
0xC2u, -128, // 110x
|
108
|
+
0xE1u, // 1110
|
109
|
+
0xF1u),
|
110
|
+
off1_hibits);
|
111
|
+
|
112
|
+
__m128i initial_under = _mm_cmpgt_epi8(initial_mins, off1_current_bytes);
|
113
|
+
|
114
|
+
__m128i second_mins = _mm_shuffle_epi8(
|
115
|
+
_mm_setr_epi8(-128, -128, -128, -128, -128, -128, -128, -128, -128, -128,
|
116
|
+
-128, -128, // 10xx => false
|
117
|
+
127, 127, // 110x => true
|
118
|
+
0xA0u, // 1110
|
119
|
+
0x90u),
|
120
|
+
off1_hibits);
|
121
|
+
__m128i second_under = _mm_cmpgt_epi8(second_mins, current_bytes);
|
122
|
+
*has_error =
|
123
|
+
_mm_or_si128(*has_error, _mm_and_si128(initial_under, second_under));
|
124
|
+
}
|
125
|
+
|
126
|
+
struct processed_utf_bytes {
|
127
|
+
__m128i raw_bytes;
|
128
|
+
__m128i high_nibbles;
|
129
|
+
__m128i carried_continuations;
|
130
|
+
};
|
131
|
+
|
132
|
+
static inline void count_nibbles(__m128i bytes,
|
133
|
+
struct processed_utf_bytes *answer) {
|
134
|
+
answer->raw_bytes = bytes;
|
135
|
+
answer->high_nibbles =
|
136
|
+
_mm_and_si128(_mm_srli_epi16(bytes, 4), _mm_set1_epi8(0x0F));
|
137
|
+
}
|
138
|
+
|
139
|
+
// check whether the current bytes are valid UTF-8
|
140
|
+
// at the end of the function, previous gets updated
|
141
|
+
static struct processed_utf_bytes
|
142
|
+
check_utf8_bytes(__m128i current_bytes, struct processed_utf_bytes *previous,
|
143
|
+
__m128i *has_error) {
|
144
|
+
struct processed_utf_bytes pb;
|
145
|
+
count_nibbles(current_bytes, &pb);
|
146
|
+
|
147
|
+
check_smaller_than_0xF4(current_bytes, has_error);
|
148
|
+
|
149
|
+
__m128i initial_lengths = continuation_lengths(pb.high_nibbles);
|
150
|
+
|
151
|
+
pb.carried_continuations =
|
152
|
+
carry_continuations(initial_lengths, previous->carried_continuations);
|
153
|
+
|
154
|
+
check_continuations(initial_lengths, pb.carried_continuations, has_error);
|
155
|
+
|
156
|
+
__m128i off1_current_bytes =
|
157
|
+
_mm_alignr_epi8(pb.raw_bytes, previous->raw_bytes, 16 - 1);
|
158
|
+
check_first_continuation_max(current_bytes, off1_current_bytes, has_error);
|
159
|
+
|
160
|
+
check_overlong(current_bytes, off1_current_bytes, pb.high_nibbles,
|
161
|
+
previous->high_nibbles, has_error);
|
162
|
+
return pb;
|
163
|
+
}
|
164
|
+
} // namespace simdjson
|
165
|
+
UNTARGET_REGION // westmere
|
166
|
+
|
167
|
+
#endif // IS_X86_64
|
168
|
+
|
169
|
+
#endif
|
@@ -0,0 +1,121 @@
|
|
1
|
+
#ifndef SIMDJSON_STAGE1_FIND_MARKS_H
|
2
|
+
#define SIMDJSON_STAGE1_FIND_MARKS_H
|
3
|
+
|
4
|
+
#include "simdjson/common_defs.h"
|
5
|
+
#include "simdjson/parsedjson.h"
|
6
|
+
#include "simdjson/portability.h"
|
7
|
+
#include "simdjson/simdjson.h"
|
8
|
+
#include <cassert>
|
9
|
+
|
10
|
+
namespace simdjson {
|
11
|
+
|
12
|
+
template <Architecture> struct simd_input;
|
13
|
+
|
14
|
+
template <Architecture> uint64_t compute_quote_mask(uint64_t quote_bits);
|
15
|
+
|
16
|
+
namespace {
|
17
|
+
// for when clmul is unavailable
|
18
|
+
[[maybe_unused]] uint64_t portable_compute_quote_mask(uint64_t quote_bits) {
|
19
|
+
uint64_t quote_mask = quote_bits ^ (quote_bits << 1);
|
20
|
+
quote_mask = quote_mask ^ (quote_mask << 2);
|
21
|
+
quote_mask = quote_mask ^ (quote_mask << 4);
|
22
|
+
quote_mask = quote_mask ^ (quote_mask << 8);
|
23
|
+
quote_mask = quote_mask ^ (quote_mask << 16);
|
24
|
+
quote_mask = quote_mask ^ (quote_mask << 32);
|
25
|
+
return quote_mask;
|
26
|
+
}
|
27
|
+
} // namespace
|
28
|
+
|
29
|
+
// Holds the state required to perform check_utf8().
|
30
|
+
template <Architecture> struct utf8_checking_state;
|
31
|
+
|
32
|
+
template <Architecture T>
|
33
|
+
void check_utf8(simd_input<T> in, utf8_checking_state<T> &state);
|
34
|
+
|
35
|
+
// Checks if the utf8 validation has found any error.
|
36
|
+
template <Architecture T>
|
37
|
+
ErrorValues check_utf8_errors(utf8_checking_state<T> &state);
|
38
|
+
|
39
|
+
// a straightforward comparison of a mask against input.
|
40
|
+
template <Architecture T>
|
41
|
+
uint64_t cmp_mask_against_input(simd_input<T> in, uint8_t m);
|
42
|
+
|
43
|
+
template <Architecture T> simd_input<T> fill_input(const uint8_t *ptr);
|
44
|
+
|
45
|
+
// find all values less than or equal than the content of maxval (using unsigned
|
46
|
+
// arithmetic)
|
47
|
+
template <Architecture T>
|
48
|
+
uint64_t unsigned_lteq_against_input(simd_input<T> in, uint8_t m);
|
49
|
+
|
50
|
+
template <Architecture T>
|
51
|
+
really_inline uint64_t find_odd_backslash_sequences(
|
52
|
+
simd_input<T> in, uint64_t &prev_iter_ends_odd_backslash);
|
53
|
+
|
54
|
+
template <Architecture T>
|
55
|
+
really_inline uint64_t find_quote_mask_and_bits(
|
56
|
+
simd_input<T> in, uint64_t odd_ends, uint64_t &prev_iter_inside_quote,
|
57
|
+
uint64_t "e_bits, uint64_t &error_mask);
|
58
|
+
|
59
|
+
// do a 'shufti' to detect structural JSON characters
|
60
|
+
// they are { 0x7b } 0x7d : 0x3a [ 0x5b ] 0x5d , 0x2c
|
61
|
+
// these go into the first 3 buckets of the comparison (1/2/4)
|
62
|
+
|
63
|
+
// we are also interested in the four whitespace characters
|
64
|
+
// space 0x20, linefeed 0x0a, horizontal tab 0x09 and carriage return 0x0d
|
65
|
+
// these go into the next 2 buckets of the comparison (8/16)
|
66
|
+
template <Architecture T>
|
67
|
+
void find_whitespace_and_structurals(simd_input<T> in, uint64_t &whitespace,
|
68
|
+
uint64_t &structurals);
|
69
|
+
|
70
|
+
// return a updated structural bit vector with quoted contents cleared out and
|
71
|
+
// pseudo-structural characters added to the mask
|
72
|
+
// updates prev_iter_ends_pseudo_pred which tells us whether the previous
|
73
|
+
// iteration ended on a whitespace or a structural character (which means that
|
74
|
+
// the next iteration
|
75
|
+
// will have a pseudo-structural character at its start)
|
76
|
+
really_inline uint64_t finalize_structurals(
|
77
|
+
uint64_t structurals, uint64_t whitespace, uint64_t quote_mask,
|
78
|
+
uint64_t quote_bits, uint64_t &prev_iter_ends_pseudo_pred) {
|
79
|
+
// mask off anything inside quotes
|
80
|
+
structurals &= ~quote_mask;
|
81
|
+
// add the real quote bits back into our bit_mask as well, so we can
|
82
|
+
// quickly traverse the strings we've spent all this trouble gathering
|
83
|
+
structurals |= quote_bits;
|
84
|
+
// Now, establish "pseudo-structural characters". These are non-whitespace
|
85
|
+
// characters that are (a) outside quotes and (b) have a predecessor that's
|
86
|
+
// either whitespace or a structural character. This means that subsequent
|
87
|
+
// passes will get a chance to encounter the first character of every string
|
88
|
+
// of non-whitespace and, if we're parsing an atom like true/false/null or a
|
89
|
+
// number we can stop at the first whitespace or structural character
|
90
|
+
// following it.
|
91
|
+
|
92
|
+
// a qualified predecessor is something that can happen 1 position before an
|
93
|
+
// pseudo-structural character
|
94
|
+
uint64_t pseudo_pred = structurals | whitespace;
|
95
|
+
|
96
|
+
uint64_t shifted_pseudo_pred =
|
97
|
+
(pseudo_pred << 1) | prev_iter_ends_pseudo_pred;
|
98
|
+
prev_iter_ends_pseudo_pred = pseudo_pred >> 63;
|
99
|
+
uint64_t pseudo_structurals =
|
100
|
+
shifted_pseudo_pred & (~whitespace) & (~quote_mask);
|
101
|
+
structurals |= pseudo_structurals;
|
102
|
+
|
103
|
+
// now, we've used our close quotes all we need to. So let's switch them off
|
104
|
+
// they will be off in the quote mask and on in quote bits.
|
105
|
+
structurals &= ~(quote_bits & ~quote_mask);
|
106
|
+
return structurals;
|
107
|
+
}
|
108
|
+
|
109
|
+
template <Architecture T = Architecture::NATIVE>
|
110
|
+
int find_structural_bits(const uint8_t *buf, size_t len,
|
111
|
+
simdjson::ParsedJson &pj);
|
112
|
+
|
113
|
+
template <Architecture T = Architecture::NATIVE>
|
114
|
+
int find_structural_bits(const char *buf, size_t len,
|
115
|
+
simdjson::ParsedJson &pj) {
|
116
|
+
return find_structural_bits((const uint8_t *)buf, len, pj);
|
117
|
+
}
|
118
|
+
|
119
|
+
} // namespace simdjson
|
120
|
+
|
121
|
+
#endif
|