sin_fast_blank 3.1.1 → 4.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/ext/sin_fast_blank/extconf.rb +20 -0
- data/ext/sin_fast_blank/sin_fast_blank.c +269 -21
- metadata +8 -8
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: bcfe1a1adde100fe8c917a5ed228c8d768db39889057af0bf416e00da3d1ad79
|
4
|
+
data.tar.gz: 96d8c90536ebf9af9ecd318a7f55bffcf25891e97be4f0f729479171cd2da1a3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c9f184a85aeea502010499f881f16b2aa21c199f2f84dbe57b8e93641a31603a5a1a27afadcbbd805dc49e25a1361bfa91af1ef71e25ae30e0caf2a73d06bd67
|
7
|
+
data.tar.gz: 9fb4fe47265d8c23b5f093dc9d0ce05f5b5d81b501a7e70466cbaec7389719f18999a46981ed96c027580051ae06fd151f734bdf3f5d6ed0a9ee43bf8f010b8d
|
@@ -2,4 +2,24 @@
|
|
2
2
|
|
3
3
|
require 'mkmf'
|
4
4
|
|
5
|
+
old_truffleruby = false
|
6
|
+
if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'truffleruby' && defined?(RUBY_ENGINE_VERSION)
|
7
|
+
major_version = RUBY_ENGINE_VERSION.split('.').first.to_i
|
8
|
+
old_truffleruby = major_version < 24
|
9
|
+
end
|
10
|
+
|
11
|
+
unless old_truffleruby
|
12
|
+
case RbConfig::CONFIG['host_cpu']
|
13
|
+
when /x86_64|i[3-6]86/
|
14
|
+
$CFLAGS << ' -msse2'
|
15
|
+
$CFLAGS << ' -mavx2' if have_header('immintrin.h') && try_compile('#include <immintrin.h>')
|
16
|
+
when /aarch64|arm64/
|
17
|
+
# No special flags needed as NEON is enabled by default on ARM64
|
18
|
+
when /arm/
|
19
|
+
$CFLAGS << ' -mfpu=neon' if have_header('arm_neon.h') && try_compile('#include <arm_neon.h>')
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
$CFLAGS << ' -O3 -funroll-loops'
|
24
|
+
|
5
25
|
create_makefile 'sin_fast_blank'
|
@@ -1,8 +1,34 @@
|
|
1
1
|
#include <ruby.h>
|
2
2
|
#include <ruby/encoding.h>
|
3
|
+
#include <stdbool.h>
|
4
|
+
#ifdef __SSE2__
|
5
|
+
#include <emmintrin.h>
|
6
|
+
#endif
|
7
|
+
#ifdef __AVX2__
|
8
|
+
#include <immintrin.h>
|
9
|
+
#endif
|
10
|
+
#ifdef __ARM_NEON
|
11
|
+
#include <arm_neon.h>
|
12
|
+
#endif
|
3
13
|
|
4
14
|
#define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
|
5
15
|
|
16
|
+
#define ASCII_BLANK_TAB 0x09
|
17
|
+
#define ASCII_BLANK_LF 0x0a
|
18
|
+
#define ASCII_BLANK_VT 0x0b
|
19
|
+
#define ASCII_BLANK_FF 0x0c
|
20
|
+
#define ASCII_BLANK_CR 0x0d
|
21
|
+
#define ASCII_BLANK_SPACE 0x20
|
22
|
+
|
23
|
+
static inline bool is_ascii_blank_char(unsigned char c) {
|
24
|
+
return c == ASCII_BLANK_SPACE ||
|
25
|
+
c == ASCII_BLANK_TAB ||
|
26
|
+
c == ASCII_BLANK_LF ||
|
27
|
+
c == ASCII_BLANK_VT ||
|
28
|
+
c == ASCII_BLANK_FF ||
|
29
|
+
c == ASCII_BLANK_CR;
|
30
|
+
}
|
31
|
+
|
6
32
|
static inline int is_unicode_blank(unsigned int codepoint) {
|
7
33
|
switch (codepoint) {
|
8
34
|
case 0x9:
|
@@ -36,42 +62,264 @@ static inline int is_unicode_blank(unsigned int codepoint) {
|
|
36
62
|
}
|
37
63
|
}
|
38
64
|
|
65
|
+
#ifdef __AVX2__
|
66
|
+
static bool check_blank_avx2(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
|
67
|
+
const __m256i ascii_mask = _mm256_set1_epi8(0x80);
|
68
|
+
const __m256i space = _mm256_set1_epi8(ASCII_BLANK_SPACE);
|
69
|
+
const __m256i tab = _mm256_set1_epi8(ASCII_BLANK_TAB);
|
70
|
+
const __m256i lf = _mm256_set1_epi8(ASCII_BLANK_LF);
|
71
|
+
const __m256i vt = _mm256_set1_epi8(ASCII_BLANK_VT);
|
72
|
+
const __m256i ff = _mm256_set1_epi8(ASCII_BLANK_FF);
|
73
|
+
const __m256i cr = _mm256_set1_epi8(ASCII_BLANK_CR);
|
74
|
+
|
75
|
+
size_t i = 0;
|
76
|
+
|
77
|
+
for (; i + 31 < len; i += 32) {
|
78
|
+
__m256i chunk = _mm256_loadu_si256((const __m256i *)(ptr + i));
|
79
|
+
|
80
|
+
__m256i non_ascii = _mm256_and_si256(chunk, ascii_mask);
|
81
|
+
if (!_mm256_testz_si256(non_ascii, non_ascii)) {
|
82
|
+
for (size_t j = 0; j < 32; j++) {
|
83
|
+
if (ptr[i + j] >= 0x80) {
|
84
|
+
*non_ascii_pos = ptr + i + j;
|
85
|
+
return false;
|
86
|
+
}
|
87
|
+
}
|
88
|
+
}
|
89
|
+
|
90
|
+
__m256i is_space = _mm256_cmpeq_epi8(chunk, space);
|
91
|
+
__m256i is_tab = _mm256_cmpeq_epi8(chunk, tab);
|
92
|
+
__m256i is_lf = _mm256_cmpeq_epi8(chunk, lf);
|
93
|
+
__m256i is_vt = _mm256_cmpeq_epi8(chunk, vt);
|
94
|
+
__m256i is_ff = _mm256_cmpeq_epi8(chunk, ff);
|
95
|
+
__m256i is_cr = _mm256_cmpeq_epi8(chunk, cr);
|
96
|
+
|
97
|
+
__m256i is_blank = _mm256_or_si256(is_space, is_tab);
|
98
|
+
is_blank = _mm256_or_si256(is_blank, is_lf);
|
99
|
+
is_blank = _mm256_or_si256(is_blank, is_vt);
|
100
|
+
is_blank = _mm256_or_si256(is_blank, is_ff);
|
101
|
+
is_blank = _mm256_or_si256(is_blank, is_cr);
|
102
|
+
|
103
|
+
if (_mm256_movemask_epi8(is_blank) != -1) {
|
104
|
+
for (size_t j = 0; j < 32; j++) {
|
105
|
+
unsigned char c = ptr[i + j];
|
106
|
+
if (c >= 0x80) {
|
107
|
+
*non_ascii_pos = ptr + i + j;
|
108
|
+
return false;
|
109
|
+
}
|
110
|
+
if (!is_ascii_blank_char(c)) {
|
111
|
+
return false;
|
112
|
+
}
|
113
|
+
}
|
114
|
+
}
|
115
|
+
}
|
116
|
+
|
117
|
+
for (; i < len; i++) {
|
118
|
+
unsigned char c = ptr[i];
|
119
|
+
if (c >= 0x80) {
|
120
|
+
*non_ascii_pos = ptr + i;
|
121
|
+
return false;
|
122
|
+
}
|
123
|
+
if (!is_ascii_blank_char(c)) {
|
124
|
+
return false;
|
125
|
+
}
|
126
|
+
}
|
127
|
+
|
128
|
+
return true;
|
129
|
+
}
|
130
|
+
#endif
|
131
|
+
|
132
|
+
#ifdef __SSE2__
|
133
|
+
static bool check_blank_sse2(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
|
134
|
+
const __m128i ascii_mask = _mm_set1_epi8(0x80);
|
135
|
+
const __m128i space = _mm_set1_epi8(ASCII_BLANK_SPACE);
|
136
|
+
const __m128i tab = _mm_set1_epi8(ASCII_BLANK_TAB);
|
137
|
+
const __m128i lf = _mm_set1_epi8(ASCII_BLANK_LF);
|
138
|
+
const __m128i vt = _mm_set1_epi8(ASCII_BLANK_VT);
|
139
|
+
const __m128i ff = _mm_set1_epi8(ASCII_BLANK_FF);
|
140
|
+
const __m128i cr = _mm_set1_epi8(ASCII_BLANK_CR);
|
141
|
+
|
142
|
+
size_t i = 0;
|
143
|
+
|
144
|
+
for (; i + 15 < len; i += 16) {
|
145
|
+
__m128i chunk = _mm_loadu_si128((const __m128i *)(ptr + i));
|
146
|
+
|
147
|
+
__m128i non_ascii = _mm_and_si128(chunk, ascii_mask);
|
148
|
+
if (_mm_movemask_epi8(non_ascii) != 0) {
|
149
|
+
for (size_t j = 0; j < 16; j++) {
|
150
|
+
if (ptr[i + j] >= 0x80) {
|
151
|
+
*non_ascii_pos = ptr + i + j;
|
152
|
+
return false;
|
153
|
+
}
|
154
|
+
}
|
155
|
+
}
|
156
|
+
|
157
|
+
__m128i is_space = _mm_cmpeq_epi8(chunk, space);
|
158
|
+
__m128i is_tab = _mm_cmpeq_epi8(chunk, tab);
|
159
|
+
__m128i is_lf = _mm_cmpeq_epi8(chunk, lf);
|
160
|
+
__m128i is_vt = _mm_cmpeq_epi8(chunk, vt);
|
161
|
+
__m128i is_ff = _mm_cmpeq_epi8(chunk, ff);
|
162
|
+
__m128i is_cr = _mm_cmpeq_epi8(chunk, cr);
|
163
|
+
|
164
|
+
__m128i is_blank = _mm_or_si128(is_space, is_tab);
|
165
|
+
is_blank = _mm_or_si128(is_blank, is_lf);
|
166
|
+
is_blank = _mm_or_si128(is_blank, is_vt);
|
167
|
+
is_blank = _mm_or_si128(is_blank, is_ff);
|
168
|
+
is_blank = _mm_or_si128(is_blank, is_cr);
|
169
|
+
|
170
|
+
if (_mm_movemask_epi8(is_blank) != 0xFFFF) {
|
171
|
+
for (size_t j = 0; j < 16; j++) {
|
172
|
+
unsigned char c = ptr[i + j];
|
173
|
+
if (c >= 0x80) {
|
174
|
+
*non_ascii_pos = ptr + i + j;
|
175
|
+
return false;
|
176
|
+
}
|
177
|
+
if (!is_ascii_blank_char(c)) {
|
178
|
+
return false;
|
179
|
+
}
|
180
|
+
}
|
181
|
+
}
|
182
|
+
}
|
183
|
+
|
184
|
+
for (; i < len; i++) {
|
185
|
+
unsigned char c = ptr[i];
|
186
|
+
if (c >= 0x80) {
|
187
|
+
*non_ascii_pos = ptr + i;
|
188
|
+
return false;
|
189
|
+
}
|
190
|
+
if (!is_ascii_blank_char(c)) {
|
191
|
+
return false;
|
192
|
+
}
|
193
|
+
}
|
194
|
+
|
195
|
+
return true;
|
196
|
+
}
|
197
|
+
#endif
|
198
|
+
|
199
|
+
#ifdef __ARM_NEON
|
200
|
+
static bool check_blank_neon(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
|
201
|
+
const uint8x16_t ascii_mask = vdupq_n_u8(0x80);
|
202
|
+
const uint8x16_t space = vdupq_n_u8(ASCII_BLANK_SPACE);
|
203
|
+
const uint8x16_t tab = vdupq_n_u8(ASCII_BLANK_TAB);
|
204
|
+
const uint8x16_t lf = vdupq_n_u8(ASCII_BLANK_LF);
|
205
|
+
const uint8x16_t vt = vdupq_n_u8(ASCII_BLANK_VT);
|
206
|
+
const uint8x16_t ff = vdupq_n_u8(ASCII_BLANK_FF);
|
207
|
+
const uint8x16_t cr = vdupq_n_u8(ASCII_BLANK_CR);
|
208
|
+
|
209
|
+
size_t i = 0;
|
210
|
+
|
211
|
+
for (; i + 15 < len; i += 16) {
|
212
|
+
uint8x16_t chunk = vld1q_u8(ptr + i);
|
213
|
+
|
214
|
+
uint8x16_t non_ascii = vandq_u8(chunk, ascii_mask);
|
215
|
+
uint8x16_t has_non_ascii = vceqq_u8(non_ascii, ascii_mask);
|
216
|
+
|
217
|
+
if (vmaxvq_u8(has_non_ascii) != 0) {
|
218
|
+
for (size_t j = 0; j < 16; j++) {
|
219
|
+
if (ptr[i + j] >= 0x80) {
|
220
|
+
*non_ascii_pos = ptr + i + j;
|
221
|
+
return false;
|
222
|
+
}
|
223
|
+
}
|
224
|
+
}
|
225
|
+
|
226
|
+
uint8x16_t is_space = vceqq_u8(chunk, space);
|
227
|
+
uint8x16_t is_tab = vceqq_u8(chunk, tab);
|
228
|
+
uint8x16_t is_lf = vceqq_u8(chunk, lf);
|
229
|
+
uint8x16_t is_vt = vceqq_u8(chunk, vt);
|
230
|
+
uint8x16_t is_ff = vceqq_u8(chunk, ff);
|
231
|
+
uint8x16_t is_cr = vceqq_u8(chunk, cr);
|
232
|
+
|
233
|
+
uint8x16_t is_blank = vorrq_u8(is_space, is_tab);
|
234
|
+
is_blank = vorrq_u8(is_blank, is_lf);
|
235
|
+
is_blank = vorrq_u8(is_blank, is_vt);
|
236
|
+
is_blank = vorrq_u8(is_blank, is_ff);
|
237
|
+
is_blank = vorrq_u8(is_blank, is_cr);
|
238
|
+
|
239
|
+
if (vminvq_u8(is_blank) == 0) {
|
240
|
+
for (size_t j = 0; j < 16; j++) {
|
241
|
+
unsigned char c = ptr[i + j];
|
242
|
+
if (c >= 0x80) {
|
243
|
+
*non_ascii_pos = ptr + i + j;
|
244
|
+
return false;
|
245
|
+
}
|
246
|
+
if (!is_ascii_blank_char(c)) {
|
247
|
+
return false;
|
248
|
+
}
|
249
|
+
}
|
250
|
+
}
|
251
|
+
}
|
252
|
+
|
253
|
+
for (; i < len; i++) {
|
254
|
+
unsigned char c = ptr[i];
|
255
|
+
if (c >= 0x80) {
|
256
|
+
*non_ascii_pos = ptr + i;
|
257
|
+
return false;
|
258
|
+
}
|
259
|
+
if (!is_ascii_blank_char(c)) {
|
260
|
+
return false;
|
261
|
+
}
|
262
|
+
}
|
263
|
+
|
264
|
+
return true;
|
265
|
+
}
|
266
|
+
#endif
|
267
|
+
|
268
|
+
static bool check_blank_scalar(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
|
269
|
+
for (size_t i = 0; i < len; i++) {
|
270
|
+
unsigned char c = ptr[i];
|
271
|
+
|
272
|
+
if (c >= 0x80) {
|
273
|
+
*non_ascii_pos = ptr + i;
|
274
|
+
return false;
|
275
|
+
}
|
276
|
+
|
277
|
+
if (!is_ascii_blank_char(c)) {
|
278
|
+
return false;
|
279
|
+
}
|
280
|
+
}
|
281
|
+
|
282
|
+
return true;
|
283
|
+
}
|
284
|
+
|
39
285
|
static VALUE rb_str_blank(VALUE str) {
|
40
286
|
long len = RSTRING_LEN(str);
|
41
287
|
if (len == 0) {
|
42
288
|
return Qtrue;
|
43
289
|
}
|
44
290
|
|
45
|
-
const char *ptr = RSTRING_PTR(str);
|
46
|
-
const char *end = ptr + len;
|
291
|
+
const unsigned char *ptr = (const unsigned char *)RSTRING_PTR(str);
|
292
|
+
const unsigned char *end = ptr + len;
|
47
293
|
rb_encoding *enc = STR_ENC_GET(str);
|
48
294
|
|
49
295
|
if (rb_enc_asciicompat(enc)) {
|
50
|
-
|
51
|
-
|
52
|
-
goto FULL_CHECK;
|
53
|
-
}
|
296
|
+
const unsigned char *non_ascii_pos = NULL;
|
297
|
+
bool is_blank = false;
|
54
298
|
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
62
|
-
|
63
|
-
|
64
|
-
|
65
|
-
|
299
|
+
#ifdef __AVX2__
|
300
|
+
is_blank = check_blank_avx2(ptr, len, &non_ascii_pos);
|
301
|
+
#elif defined(__SSE2__)
|
302
|
+
is_blank = check_blank_sse2(ptr, len, &non_ascii_pos);
|
303
|
+
#elif defined(__ARM_NEON)
|
304
|
+
is_blank = check_blank_neon(ptr, len, &non_ascii_pos);
|
305
|
+
#else
|
306
|
+
is_blank = check_blank_scalar(ptr, len, &non_ascii_pos);
|
307
|
+
#endif
|
308
|
+
|
309
|
+
if (is_blank) {
|
310
|
+
return Qtrue;
|
66
311
|
}
|
67
312
|
|
68
|
-
|
313
|
+
if (non_ascii_pos == NULL) {
|
314
|
+
return Qfalse;
|
315
|
+
}
|
316
|
+
|
317
|
+
ptr = (const unsigned char *)non_ascii_pos;
|
69
318
|
}
|
70
319
|
|
71
|
-
|
72
|
-
while (ptr < end) {
|
320
|
+
while ((const char *)ptr < (const char *)end) {
|
73
321
|
int clen;
|
74
|
-
unsigned int codepoint = rb_enc_codepoint_len(ptr, end, &clen, enc);
|
322
|
+
unsigned int codepoint = rb_enc_codepoint_len((const char *)ptr, (const char *)end, &clen, enc);
|
75
323
|
|
76
324
|
if (!is_unicode_blank(codepoint)) {
|
77
325
|
return Qfalse;
|
metadata
CHANGED
@@ -1,13 +1,13 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: sin_fast_blank
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version:
|
4
|
+
version: 4.0.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Masahiro
|
8
8
|
bindir: exe
|
9
9
|
cert_chain: []
|
10
|
-
date:
|
10
|
+
date: 1980-01-02 00:00:00.000000000 Z
|
11
11
|
dependencies: []
|
12
12
|
description: Check for blank string faster than FastBlank or ActiveSupport
|
13
13
|
email:
|
@@ -19,15 +19,15 @@ extra_rdoc_files: []
|
|
19
19
|
files:
|
20
20
|
- ext/sin_fast_blank/extconf.rb
|
21
21
|
- ext/sin_fast_blank/sin_fast_blank.c
|
22
|
-
homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/
|
22
|
+
homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
|
23
23
|
licenses:
|
24
24
|
- MIT
|
25
25
|
metadata:
|
26
|
-
homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/
|
27
|
-
source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/
|
28
|
-
changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/
|
26
|
+
homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
|
27
|
+
source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
|
28
|
+
changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v4.0.0/CHANGELOG.md
|
29
29
|
bug_tracker_uri: https://github.com/cadenza-tech/sin_fast_blank/issues
|
30
|
-
documentation_uri: https://rubydoc.info/gems/sin_fast_blank/
|
30
|
+
documentation_uri: https://rubydoc.info/gems/sin_fast_blank/4.0.0
|
31
31
|
funding_uri: https://patreon.com/CadenzaTech
|
32
32
|
rubygems_mfa_required: 'true'
|
33
33
|
required_jruby_version: ">= 9.3.0.0"
|
@@ -47,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
47
47
|
- !ruby/object:Gem::Version
|
48
48
|
version: '0'
|
49
49
|
requirements: []
|
50
|
-
rubygems_version: 3.6.
|
50
|
+
rubygems_version: 3.6.9
|
51
51
|
specification_version: 4
|
52
52
|
summary: Check for blank string faster than FastBlank or ActiveSupport
|
53
53
|
test_files: []
|