sin_fast_blank 4.0.0 → 4.0.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/sin_fast_blank/sin_fast_blank.c +200 -239
- metadata +6 -6
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 715b972c44a78f3a18dcee12c9133c5b38acce03a76fee4031df21d3c5ec8b2f
|
|
4
|
+
data.tar.gz: bce4b9d3bd8058ab567c620f0cd684f817f4193b20271e481c25a0560954631b
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c8643cd88797cb3d5824536ddfc7f60b4f7bf5718a20a3464b9231720c50e0558fa5234c1f23f0adcb1a8b31664d3a4bb25c3209545619ea0a00e1f5e89408bf
|
|
7
|
+
data.tar.gz: 14d825f306f16680badaab6c8a1153ef817f889f73ad45169ec6069abf67f25358ed4324c4184c742760cb198cd2066ad32f249fd71e67ddbeafbd755ee2c88f
|
|
@@ -2,34 +2,28 @@
|
|
|
2
2
|
#include <ruby/encoding.h>
|
|
3
3
|
#include <stdbool.h>
|
|
4
4
|
#ifdef __SSE2__
|
|
5
|
-
|
|
5
|
+
#include <emmintrin.h>
|
|
6
6
|
#endif
|
|
7
7
|
#ifdef __AVX2__
|
|
8
|
-
|
|
8
|
+
#include <immintrin.h>
|
|
9
9
|
#endif
|
|
10
|
-
#
|
|
11
|
-
|
|
10
|
+
#if defined(__ARM_NEON) && defined(__aarch64__)
|
|
11
|
+
#include <arm_neon.h>
|
|
12
12
|
#endif
|
|
13
13
|
|
|
14
14
|
#define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
|
|
15
15
|
|
|
16
|
-
#define
|
|
17
|
-
#define
|
|
18
|
-
#define
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
return c == ASCII_BLANK_SPACE ||
|
|
25
|
-
c == ASCII_BLANK_TAB ||
|
|
26
|
-
c == ASCII_BLANK_LF ||
|
|
27
|
-
c == ASCII_BLANK_VT ||
|
|
28
|
-
c == ASCII_BLANK_FF ||
|
|
29
|
-
c == ASCII_BLANK_CR;
|
|
16
|
+
#define ASCII_WS_RANGE_MIN 0x09
|
|
17
|
+
#define ASCII_WS_RANGE_MAX 0x0d
|
|
18
|
+
#define ASCII_WS_SPACE 0x20
|
|
19
|
+
|
|
20
|
+
static inline bool is_ascii_blank_char(unsigned char c) { return (c >= ASCII_WS_RANGE_MIN && c <= ASCII_WS_RANGE_MAX) || c == ASCII_WS_SPACE; }
|
|
21
|
+
|
|
22
|
+
static inline bool is_ascii_blank_or_null_char(unsigned char c) {
|
|
23
|
+
return c == 0x00 || (c >= ASCII_WS_RANGE_MIN && c <= ASCII_WS_RANGE_MAX) || c == ASCII_WS_SPACE;
|
|
30
24
|
}
|
|
31
25
|
|
|
32
|
-
static inline
|
|
26
|
+
static inline bool is_unicode_blank(unsigned int codepoint) {
|
|
33
27
|
switch (codepoint) {
|
|
34
28
|
case 0x9:
|
|
35
29
|
case 0xa:
|
|
@@ -56,65 +50,15 @@ static inline int is_unicode_blank(unsigned int codepoint) {
|
|
|
56
50
|
case 0x202f:
|
|
57
51
|
case 0x205f:
|
|
58
52
|
case 0x3000:
|
|
59
|
-
return
|
|
53
|
+
return true;
|
|
60
54
|
default:
|
|
61
|
-
return
|
|
55
|
+
return false;
|
|
62
56
|
}
|
|
63
57
|
}
|
|
64
58
|
|
|
65
|
-
|
|
66
|
-
static bool
|
|
67
|
-
|
|
68
|
-
const __m256i space = _mm256_set1_epi8(ASCII_BLANK_SPACE);
|
|
69
|
-
const __m256i tab = _mm256_set1_epi8(ASCII_BLANK_TAB);
|
|
70
|
-
const __m256i lf = _mm256_set1_epi8(ASCII_BLANK_LF);
|
|
71
|
-
const __m256i vt = _mm256_set1_epi8(ASCII_BLANK_VT);
|
|
72
|
-
const __m256i ff = _mm256_set1_epi8(ASCII_BLANK_FF);
|
|
73
|
-
const __m256i cr = _mm256_set1_epi8(ASCII_BLANK_CR);
|
|
74
|
-
|
|
75
|
-
size_t i = 0;
|
|
76
|
-
|
|
77
|
-
for (; i + 31 < len; i += 32) {
|
|
78
|
-
__m256i chunk = _mm256_loadu_si256((const __m256i *)(ptr + i));
|
|
79
|
-
|
|
80
|
-
__m256i non_ascii = _mm256_and_si256(chunk, ascii_mask);
|
|
81
|
-
if (!_mm256_testz_si256(non_ascii, non_ascii)) {
|
|
82
|
-
for (size_t j = 0; j < 32; j++) {
|
|
83
|
-
if (ptr[i + j] >= 0x80) {
|
|
84
|
-
*non_ascii_pos = ptr + i + j;
|
|
85
|
-
return false;
|
|
86
|
-
}
|
|
87
|
-
}
|
|
88
|
-
}
|
|
89
|
-
|
|
90
|
-
__m256i is_space = _mm256_cmpeq_epi8(chunk, space);
|
|
91
|
-
__m256i is_tab = _mm256_cmpeq_epi8(chunk, tab);
|
|
92
|
-
__m256i is_lf = _mm256_cmpeq_epi8(chunk, lf);
|
|
93
|
-
__m256i is_vt = _mm256_cmpeq_epi8(chunk, vt);
|
|
94
|
-
__m256i is_ff = _mm256_cmpeq_epi8(chunk, ff);
|
|
95
|
-
__m256i is_cr = _mm256_cmpeq_epi8(chunk, cr);
|
|
96
|
-
|
|
97
|
-
__m256i is_blank = _mm256_or_si256(is_space, is_tab);
|
|
98
|
-
is_blank = _mm256_or_si256(is_blank, is_lf);
|
|
99
|
-
is_blank = _mm256_or_si256(is_blank, is_vt);
|
|
100
|
-
is_blank = _mm256_or_si256(is_blank, is_ff);
|
|
101
|
-
is_blank = _mm256_or_si256(is_blank, is_cr);
|
|
102
|
-
|
|
103
|
-
if (_mm256_movemask_epi8(is_blank) != -1) {
|
|
104
|
-
for (size_t j = 0; j < 32; j++) {
|
|
105
|
-
unsigned char c = ptr[i + j];
|
|
106
|
-
if (c >= 0x80) {
|
|
107
|
-
*non_ascii_pos = ptr + i + j;
|
|
108
|
-
return false;
|
|
109
|
-
}
|
|
110
|
-
if (!is_ascii_blank_char(c)) {
|
|
111
|
-
return false;
|
|
112
|
-
}
|
|
113
|
-
}
|
|
114
|
-
}
|
|
115
|
-
}
|
|
116
|
-
|
|
117
|
-
for (; i < len; i++) {
|
|
59
|
+
/* Returns true if all blank. On false, sets *non_ascii_pos if non-ASCII found. NULL if non-blank ASCII found. */
|
|
60
|
+
static inline bool scan_ascii_blank(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
61
|
+
for (size_t i = 0; i < len; i++) {
|
|
118
62
|
unsigned char c = ptr[i];
|
|
119
63
|
if (c >= 0x80) {
|
|
120
64
|
*non_ascii_pos = ptr + i;
|
|
@@ -124,207 +68,227 @@ static bool check_blank_avx2(const unsigned char *ptr, size_t len, const unsigne
|
|
|
124
68
|
return false;
|
|
125
69
|
}
|
|
126
70
|
}
|
|
127
|
-
|
|
128
71
|
return true;
|
|
129
72
|
}
|
|
130
|
-
#endif
|
|
131
|
-
|
|
132
|
-
#ifdef __SSE2__
|
|
133
|
-
static bool check_blank_sse2(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
|
|
134
|
-
const __m128i ascii_mask = _mm_set1_epi8(0x80);
|
|
135
|
-
const __m128i space = _mm_set1_epi8(ASCII_BLANK_SPACE);
|
|
136
|
-
const __m128i tab = _mm_set1_epi8(ASCII_BLANK_TAB);
|
|
137
|
-
const __m128i lf = _mm_set1_epi8(ASCII_BLANK_LF);
|
|
138
|
-
const __m128i vt = _mm_set1_epi8(ASCII_BLANK_VT);
|
|
139
|
-
const __m128i ff = _mm_set1_epi8(ASCII_BLANK_FF);
|
|
140
|
-
const __m128i cr = _mm_set1_epi8(ASCII_BLANK_CR);
|
|
141
|
-
|
|
142
|
-
size_t i = 0;
|
|
143
|
-
|
|
144
|
-
for (; i + 15 < len; i += 16) {
|
|
145
|
-
__m128i chunk = _mm_loadu_si128((const __m128i *)(ptr + i));
|
|
146
|
-
|
|
147
|
-
__m128i non_ascii = _mm_and_si128(chunk, ascii_mask);
|
|
148
|
-
if (_mm_movemask_epi8(non_ascii) != 0) {
|
|
149
|
-
for (size_t j = 0; j < 16; j++) {
|
|
150
|
-
if (ptr[i + j] >= 0x80) {
|
|
151
|
-
*non_ascii_pos = ptr + i + j;
|
|
152
|
-
return false;
|
|
153
|
-
}
|
|
154
|
-
}
|
|
155
|
-
}
|
|
156
|
-
|
|
157
|
-
__m128i is_space = _mm_cmpeq_epi8(chunk, space);
|
|
158
|
-
__m128i is_tab = _mm_cmpeq_epi8(chunk, tab);
|
|
159
|
-
__m128i is_lf = _mm_cmpeq_epi8(chunk, lf);
|
|
160
|
-
__m128i is_vt = _mm_cmpeq_epi8(chunk, vt);
|
|
161
|
-
__m128i is_ff = _mm_cmpeq_epi8(chunk, ff);
|
|
162
|
-
__m128i is_cr = _mm_cmpeq_epi8(chunk, cr);
|
|
163
|
-
|
|
164
|
-
__m128i is_blank = _mm_or_si128(is_space, is_tab);
|
|
165
|
-
is_blank = _mm_or_si128(is_blank, is_lf);
|
|
166
|
-
is_blank = _mm_or_si128(is_blank, is_vt);
|
|
167
|
-
is_blank = _mm_or_si128(is_blank, is_ff);
|
|
168
|
-
is_blank = _mm_or_si128(is_blank, is_cr);
|
|
169
|
-
|
|
170
|
-
if (_mm_movemask_epi8(is_blank) != 0xFFFF) {
|
|
171
|
-
for (size_t j = 0; j < 16; j++) {
|
|
172
|
-
unsigned char c = ptr[i + j];
|
|
173
|
-
if (c >= 0x80) {
|
|
174
|
-
*non_ascii_pos = ptr + i + j;
|
|
175
|
-
return false;
|
|
176
|
-
}
|
|
177
|
-
if (!is_ascii_blank_char(c)) {
|
|
178
|
-
return false;
|
|
179
|
-
}
|
|
180
|
-
}
|
|
181
|
-
}
|
|
182
|
-
}
|
|
183
73
|
|
|
184
|
-
|
|
74
|
+
static inline bool scan_ascii_blank_or_null(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
75
|
+
for (size_t i = 0; i < len; i++) {
|
|
185
76
|
unsigned char c = ptr[i];
|
|
186
77
|
if (c >= 0x80) {
|
|
187
78
|
*non_ascii_pos = ptr + i;
|
|
188
79
|
return false;
|
|
189
80
|
}
|
|
190
|
-
if (!
|
|
81
|
+
if (!is_ascii_blank_or_null_char(c)) {
|
|
191
82
|
return false;
|
|
192
83
|
}
|
|
193
84
|
}
|
|
194
|
-
|
|
195
85
|
return true;
|
|
196
86
|
}
|
|
197
|
-
#endif
|
|
198
87
|
|
|
199
|
-
#ifdef
|
|
200
|
-
static bool
|
|
201
|
-
const
|
|
202
|
-
const
|
|
203
|
-
const
|
|
204
|
-
const uint8x16_t lf = vdupq_n_u8(ASCII_BLANK_LF);
|
|
205
|
-
const uint8x16_t vt = vdupq_n_u8(ASCII_BLANK_VT);
|
|
206
|
-
const uint8x16_t ff = vdupq_n_u8(ASCII_BLANK_FF);
|
|
207
|
-
const uint8x16_t cr = vdupq_n_u8(ASCII_BLANK_CR);
|
|
88
|
+
#ifdef __AVX2__
|
|
89
|
+
static bool check_blank_avx2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
90
|
+
const __m256i ws_base = _mm256_set1_epi8(ASCII_WS_RANGE_MIN);
|
|
91
|
+
const __m256i four = _mm256_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
|
|
92
|
+
const __m256i space = _mm256_set1_epi8(ASCII_WS_SPACE);
|
|
208
93
|
|
|
209
94
|
size_t i = 0;
|
|
95
|
+
for (; i + 31 < len; i += 32) {
|
|
96
|
+
__m256i chunk = _mm256_loadu_si256((const __m256i*)(ptr + i));
|
|
97
|
+
__m256i adjusted = _mm256_sub_epi8(chunk, ws_base);
|
|
98
|
+
__m256i in_range = _mm256_cmpeq_epi8(_mm256_min_epu8(adjusted, four), adjusted);
|
|
99
|
+
__m256i is_sp = _mm256_cmpeq_epi8(chunk, space);
|
|
100
|
+
__m256i is_blank = _mm256_or_si256(in_range, is_sp);
|
|
101
|
+
|
|
102
|
+
int mask = _mm256_movemask_epi8(is_blank);
|
|
103
|
+
if (mask != -1) {
|
|
104
|
+
int first = __builtin_ctz(~mask);
|
|
105
|
+
unsigned char c = ptr[i + first];
|
|
106
|
+
if (c >= 0x80) {
|
|
107
|
+
*non_ascii_pos = ptr + i + first;
|
|
108
|
+
}
|
|
109
|
+
return false;
|
|
110
|
+
}
|
|
111
|
+
}
|
|
210
112
|
|
|
211
|
-
|
|
212
|
-
|
|
113
|
+
return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
|
|
114
|
+
}
|
|
213
115
|
|
|
214
|
-
|
|
215
|
-
|
|
116
|
+
static bool check_ascii_blank_avx2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
117
|
+
const __m256i ws_base = _mm256_set1_epi8(ASCII_WS_RANGE_MIN);
|
|
118
|
+
const __m256i four = _mm256_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
|
|
119
|
+
const __m256i space = _mm256_set1_epi8(ASCII_WS_SPACE);
|
|
120
|
+
const __m256i zero = _mm256_setzero_si256();
|
|
216
121
|
|
|
217
|
-
|
|
218
|
-
|
|
219
|
-
|
|
220
|
-
|
|
221
|
-
|
|
222
|
-
|
|
122
|
+
size_t i = 0;
|
|
123
|
+
for (; i + 31 < len; i += 32) {
|
|
124
|
+
__m256i chunk = _mm256_loadu_si256((const __m256i*)(ptr + i));
|
|
125
|
+
__m256i adjusted = _mm256_sub_epi8(chunk, ws_base);
|
|
126
|
+
__m256i in_range = _mm256_cmpeq_epi8(_mm256_min_epu8(adjusted, four), adjusted);
|
|
127
|
+
__m256i is_sp = _mm256_cmpeq_epi8(chunk, space);
|
|
128
|
+
__m256i is_null = _mm256_cmpeq_epi8(chunk, zero);
|
|
129
|
+
__m256i is_blank = _mm256_or_si256(_mm256_or_si256(in_range, is_sp), is_null);
|
|
130
|
+
|
|
131
|
+
int mask = _mm256_movemask_epi8(is_blank);
|
|
132
|
+
if (mask != -1) {
|
|
133
|
+
int first = __builtin_ctz(~mask);
|
|
134
|
+
unsigned char c = ptr[i + first];
|
|
135
|
+
if (c >= 0x80) {
|
|
136
|
+
*non_ascii_pos = ptr + i + first;
|
|
223
137
|
}
|
|
138
|
+
return false;
|
|
224
139
|
}
|
|
140
|
+
}
|
|
225
141
|
|
|
226
|
-
|
|
227
|
-
|
|
228
|
-
|
|
229
|
-
uint8x16_t is_vt = vceqq_u8(chunk, vt);
|
|
230
|
-
uint8x16_t is_ff = vceqq_u8(chunk, ff);
|
|
231
|
-
uint8x16_t is_cr = vceqq_u8(chunk, cr);
|
|
142
|
+
return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
|
|
143
|
+
}
|
|
144
|
+
#endif
|
|
232
145
|
|
|
233
|
-
|
|
234
|
-
|
|
235
|
-
|
|
236
|
-
|
|
237
|
-
|
|
146
|
+
#ifdef __SSE2__
|
|
147
|
+
static bool check_blank_sse2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
148
|
+
const __m128i ws_base = _mm_set1_epi8(ASCII_WS_RANGE_MIN);
|
|
149
|
+
const __m128i four = _mm_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
|
|
150
|
+
const __m128i space = _mm_set1_epi8(ASCII_WS_SPACE);
|
|
238
151
|
|
|
239
|
-
|
|
240
|
-
|
|
241
|
-
|
|
242
|
-
|
|
243
|
-
|
|
244
|
-
|
|
245
|
-
|
|
246
|
-
|
|
247
|
-
|
|
248
|
-
|
|
152
|
+
size_t i = 0;
|
|
153
|
+
for (; i + 15 < len; i += 16) {
|
|
154
|
+
__m128i chunk = _mm_loadu_si128((const __m128i*)(ptr + i));
|
|
155
|
+
__m128i adjusted = _mm_sub_epi8(chunk, ws_base);
|
|
156
|
+
__m128i in_range = _mm_cmpeq_epi8(_mm_min_epu8(adjusted, four), adjusted);
|
|
157
|
+
__m128i is_sp = _mm_cmpeq_epi8(chunk, space);
|
|
158
|
+
__m128i is_blank = _mm_or_si128(in_range, is_sp);
|
|
159
|
+
|
|
160
|
+
int mask = _mm_movemask_epi8(is_blank);
|
|
161
|
+
if (mask != 0xFFFF) {
|
|
162
|
+
int first = __builtin_ctz(~mask & 0xFFFF);
|
|
163
|
+
unsigned char c = ptr[i + first];
|
|
164
|
+
if (c >= 0x80) {
|
|
165
|
+
*non_ascii_pos = ptr + i + first;
|
|
249
166
|
}
|
|
167
|
+
return false;
|
|
250
168
|
}
|
|
251
169
|
}
|
|
252
170
|
|
|
253
|
-
|
|
254
|
-
|
|
255
|
-
|
|
256
|
-
|
|
257
|
-
|
|
258
|
-
|
|
259
|
-
|
|
171
|
+
return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
|
|
172
|
+
}
|
|
173
|
+
|
|
174
|
+
static bool check_ascii_blank_sse2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
175
|
+
const __m128i ws_base = _mm_set1_epi8(ASCII_WS_RANGE_MIN);
|
|
176
|
+
const __m128i four = _mm_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
|
|
177
|
+
const __m128i space = _mm_set1_epi8(ASCII_WS_SPACE);
|
|
178
|
+
const __m128i zero = _mm_setzero_si128();
|
|
179
|
+
|
|
180
|
+
size_t i = 0;
|
|
181
|
+
for (; i + 15 < len; i += 16) {
|
|
182
|
+
__m128i chunk = _mm_loadu_si128((const __m128i*)(ptr + i));
|
|
183
|
+
__m128i adjusted = _mm_sub_epi8(chunk, ws_base);
|
|
184
|
+
__m128i in_range = _mm_cmpeq_epi8(_mm_min_epu8(adjusted, four), adjusted);
|
|
185
|
+
__m128i is_sp = _mm_cmpeq_epi8(chunk, space);
|
|
186
|
+
__m128i is_null = _mm_cmpeq_epi8(chunk, zero);
|
|
187
|
+
__m128i is_blank = _mm_or_si128(_mm_or_si128(in_range, is_sp), is_null);
|
|
188
|
+
|
|
189
|
+
int mask = _mm_movemask_epi8(is_blank);
|
|
190
|
+
if (mask != 0xFFFF) {
|
|
191
|
+
int first = __builtin_ctz(~mask & 0xFFFF);
|
|
192
|
+
unsigned char c = ptr[i + first];
|
|
193
|
+
if (c >= 0x80) {
|
|
194
|
+
*non_ascii_pos = ptr + i + first;
|
|
195
|
+
}
|
|
260
196
|
return false;
|
|
261
197
|
}
|
|
262
198
|
}
|
|
263
199
|
|
|
264
|
-
return
|
|
200
|
+
return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
|
|
265
201
|
}
|
|
266
202
|
#endif
|
|
267
203
|
|
|
268
|
-
|
|
269
|
-
|
|
270
|
-
|
|
204
|
+
#if defined(__ARM_NEON) && defined(__aarch64__)
|
|
205
|
+
static bool check_blank_neon(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
206
|
+
const uint8x16_t ws_base = vdupq_n_u8(ASCII_WS_RANGE_MIN);
|
|
207
|
+
const uint8x16_t four = vdupq_n_u8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
|
|
208
|
+
const uint8x16_t space = vdupq_n_u8(ASCII_WS_SPACE);
|
|
271
209
|
|
|
272
|
-
|
|
273
|
-
|
|
274
|
-
|
|
210
|
+
size_t i = 0;
|
|
211
|
+
for (; i + 15 < len; i += 16) {
|
|
212
|
+
uint8x16_t chunk = vld1q_u8(ptr + i);
|
|
213
|
+
uint8x16_t adjusted = vsubq_u8(chunk, ws_base);
|
|
214
|
+
uint8x16_t in_range = vceqq_u8(vminq_u8(adjusted, four), adjusted);
|
|
215
|
+
uint8x16_t is_sp = vceqq_u8(chunk, space);
|
|
216
|
+
uint8x16_t is_blank = vorrq_u8(in_range, is_sp);
|
|
217
|
+
|
|
218
|
+
if (vminvq_u8(is_blank) == 0) {
|
|
219
|
+
if (!scan_ascii_blank(ptr + i, 16, non_ascii_pos)) return false;
|
|
275
220
|
}
|
|
221
|
+
}
|
|
276
222
|
|
|
277
|
-
|
|
278
|
-
|
|
223
|
+
return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
|
|
224
|
+
}
|
|
225
|
+
|
|
226
|
+
static bool check_ascii_blank_neon(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
227
|
+
const uint8x16_t ws_base = vdupq_n_u8(ASCII_WS_RANGE_MIN);
|
|
228
|
+
const uint8x16_t four = vdupq_n_u8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
|
|
229
|
+
const uint8x16_t space = vdupq_n_u8(ASCII_WS_SPACE);
|
|
230
|
+
const uint8x16_t zero = vdupq_n_u8(0);
|
|
231
|
+
|
|
232
|
+
size_t i = 0;
|
|
233
|
+
for (; i + 15 < len; i += 16) {
|
|
234
|
+
uint8x16_t chunk = vld1q_u8(ptr + i);
|
|
235
|
+
uint8x16_t adjusted = vsubq_u8(chunk, ws_base);
|
|
236
|
+
uint8x16_t in_range = vceqq_u8(vminq_u8(adjusted, four), adjusted);
|
|
237
|
+
uint8x16_t is_sp = vceqq_u8(chunk, space);
|
|
238
|
+
uint8x16_t is_null = vceqq_u8(chunk, zero);
|
|
239
|
+
uint8x16_t is_blank = vorrq_u8(vorrq_u8(in_range, is_sp), is_null);
|
|
240
|
+
|
|
241
|
+
if (vminvq_u8(is_blank) == 0) {
|
|
242
|
+
if (!scan_ascii_blank_or_null(ptr + i, 16, non_ascii_pos)) return false;
|
|
279
243
|
}
|
|
280
244
|
}
|
|
281
245
|
|
|
282
|
-
return
|
|
246
|
+
return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
|
|
247
|
+
}
|
|
248
|
+
#endif
|
|
249
|
+
|
|
250
|
+
#if !defined(__AVX2__) && !defined(__SSE2__) && !(defined(__ARM_NEON) && defined(__aarch64__))
|
|
251
|
+
static bool check_blank_scalar(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
252
|
+
return scan_ascii_blank(ptr, len, non_ascii_pos);
|
|
283
253
|
}
|
|
284
254
|
|
|
255
|
+
static bool check_ascii_blank_scalar(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
|
|
256
|
+
return scan_ascii_blank_or_null(ptr, len, non_ascii_pos);
|
|
257
|
+
}
|
|
258
|
+
#endif
|
|
259
|
+
|
|
285
260
|
static VALUE rb_str_blank(VALUE str) {
|
|
286
261
|
long len = RSTRING_LEN(str);
|
|
287
|
-
if (len == 0)
|
|
288
|
-
return Qtrue;
|
|
289
|
-
}
|
|
262
|
+
if (len == 0) return Qtrue;
|
|
290
263
|
|
|
291
|
-
const unsigned char
|
|
292
|
-
const unsigned char
|
|
293
|
-
rb_encoding
|
|
264
|
+
const unsigned char* ptr = (const unsigned char*)RSTRING_PTR(str);
|
|
265
|
+
const unsigned char* end = ptr + len;
|
|
266
|
+
rb_encoding* enc = STR_ENC_GET(str);
|
|
294
267
|
|
|
295
268
|
if (rb_enc_asciicompat(enc)) {
|
|
296
|
-
const unsigned char
|
|
269
|
+
const unsigned char* non_ascii_pos = NULL;
|
|
297
270
|
bool is_blank = false;
|
|
298
271
|
|
|
299
272
|
#ifdef __AVX2__
|
|
300
|
-
is_blank = check_blank_avx2(ptr, len, &non_ascii_pos);
|
|
273
|
+
is_blank = check_blank_avx2(ptr, (size_t)len, &non_ascii_pos);
|
|
301
274
|
#elif defined(__SSE2__)
|
|
302
|
-
is_blank = check_blank_sse2(ptr, len, &non_ascii_pos);
|
|
303
|
-
#elif defined(__ARM_NEON)
|
|
304
|
-
is_blank = check_blank_neon(ptr, len, &non_ascii_pos);
|
|
275
|
+
is_blank = check_blank_sse2(ptr, (size_t)len, &non_ascii_pos);
|
|
276
|
+
#elif defined(__ARM_NEON) && defined(__aarch64__)
|
|
277
|
+
is_blank = check_blank_neon(ptr, (size_t)len, &non_ascii_pos);
|
|
305
278
|
#else
|
|
306
|
-
is_blank = check_blank_scalar(ptr, len, &non_ascii_pos);
|
|
279
|
+
is_blank = check_blank_scalar(ptr, (size_t)len, &non_ascii_pos);
|
|
307
280
|
#endif
|
|
308
281
|
|
|
309
|
-
if (is_blank)
|
|
310
|
-
|
|
311
|
-
}
|
|
282
|
+
if (is_blank) return Qtrue;
|
|
283
|
+
if (non_ascii_pos == NULL) return Qfalse;
|
|
312
284
|
|
|
313
|
-
|
|
314
|
-
return Qfalse;
|
|
315
|
-
}
|
|
316
|
-
|
|
317
|
-
ptr = (const unsigned char *)non_ascii_pos;
|
|
285
|
+
ptr = non_ascii_pos;
|
|
318
286
|
}
|
|
319
287
|
|
|
320
|
-
while (
|
|
288
|
+
while (ptr < end) {
|
|
321
289
|
int clen;
|
|
322
|
-
unsigned int codepoint = rb_enc_codepoint_len((const char
|
|
323
|
-
|
|
324
|
-
if (!is_unicode_blank(codepoint)) {
|
|
325
|
-
return Qfalse;
|
|
326
|
-
}
|
|
327
|
-
|
|
290
|
+
unsigned int codepoint = rb_enc_codepoint_len((const char*)ptr, (const char*)end, &clen, enc);
|
|
291
|
+
if (!is_unicode_blank(codepoint)) return Qfalse;
|
|
328
292
|
ptr += clen;
|
|
329
293
|
}
|
|
330
294
|
|
|
@@ -333,39 +297,36 @@ static VALUE rb_str_blank(VALUE str) {
|
|
|
333
297
|
|
|
334
298
|
static VALUE rb_str_ascii_blank(VALUE str) {
|
|
335
299
|
long len = RSTRING_LEN(str);
|
|
336
|
-
if (len == 0)
|
|
337
|
-
return Qtrue;
|
|
338
|
-
}
|
|
300
|
+
if (len == 0) return Qtrue;
|
|
339
301
|
|
|
340
|
-
const char
|
|
341
|
-
const char
|
|
342
|
-
rb_encoding
|
|
302
|
+
const unsigned char* ptr = (const unsigned char*)RSTRING_PTR(str);
|
|
303
|
+
const unsigned char* end = ptr + len;
|
|
304
|
+
rb_encoding* enc = STR_ENC_GET(str);
|
|
343
305
|
|
|
344
306
|
if (rb_enc_asciicompat(enc)) {
|
|
345
|
-
|
|
346
|
-
|
|
307
|
+
const unsigned char* non_ascii_pos = NULL;
|
|
308
|
+
bool is_blank = false;
|
|
347
309
|
|
|
348
|
-
|
|
349
|
-
|
|
350
|
-
|
|
310
|
+
#ifdef __AVX2__
|
|
311
|
+
is_blank = check_ascii_blank_avx2(ptr, (size_t)len, &non_ascii_pos);
|
|
312
|
+
#elif defined(__SSE2__)
|
|
313
|
+
is_blank = check_ascii_blank_sse2(ptr, (size_t)len, &non_ascii_pos);
|
|
314
|
+
#elif defined(__ARM_NEON) && defined(__aarch64__)
|
|
315
|
+
is_blank = check_ascii_blank_neon(ptr, (size_t)len, &non_ascii_pos);
|
|
316
|
+
#else
|
|
317
|
+
is_blank = check_ascii_blank_scalar(ptr, (size_t)len, &non_ascii_pos);
|
|
318
|
+
#endif
|
|
351
319
|
|
|
352
|
-
|
|
353
|
-
|
|
354
|
-
}
|
|
355
|
-
}
|
|
320
|
+
if (is_blank) return Qtrue;
|
|
321
|
+
if (non_ascii_pos == NULL) return Qfalse;
|
|
356
322
|
|
|
357
|
-
|
|
323
|
+
ptr = non_ascii_pos;
|
|
358
324
|
}
|
|
359
325
|
|
|
360
|
-
FULL_CHECK:;
|
|
361
326
|
while (ptr < end) {
|
|
362
327
|
int clen;
|
|
363
|
-
unsigned int codepoint = rb_enc_codepoint_len(ptr, end, &clen, enc);
|
|
364
|
-
|
|
365
|
-
if (codepoint != 0 && !rb_isspace(codepoint)) {
|
|
366
|
-
return Qfalse;
|
|
367
|
-
}
|
|
368
|
-
|
|
328
|
+
unsigned int codepoint = rb_enc_codepoint_len((const char*)ptr, (const char*)end, &clen, enc);
|
|
329
|
+
if (codepoint != 0 && !rb_isspace(codepoint)) return Qfalse;
|
|
369
330
|
ptr += clen;
|
|
370
331
|
}
|
|
371
332
|
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: sin_fast_blank
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 4.0.
|
|
4
|
+
version: 4.0.1
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Masahiro
|
|
@@ -19,15 +19,15 @@ extra_rdoc_files: []
|
|
|
19
19
|
files:
|
|
20
20
|
- ext/sin_fast_blank/extconf.rb
|
|
21
21
|
- ext/sin_fast_blank/sin_fast_blank.c
|
|
22
|
-
homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.
|
|
22
|
+
homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
|
|
23
23
|
licenses:
|
|
24
24
|
- MIT
|
|
25
25
|
metadata:
|
|
26
|
-
homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.
|
|
27
|
-
source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.
|
|
28
|
-
changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v4.0.
|
|
26
|
+
homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
|
|
27
|
+
source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
|
|
28
|
+
changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v4.0.1/CHANGELOG.md
|
|
29
29
|
bug_tracker_uri: https://github.com/cadenza-tech/sin_fast_blank/issues
|
|
30
|
-
documentation_uri: https://rubydoc.info/gems/sin_fast_blank/4.0.
|
|
30
|
+
documentation_uri: https://rubydoc.info/gems/sin_fast_blank/4.0.1
|
|
31
31
|
funding_uri: https://patreon.com/CadenzaTech
|
|
32
32
|
rubygems_mfa_required: 'true'
|
|
33
33
|
required_jruby_version: ">= 9.3.0.0"
|