sin_fast_blank 3.1.1 → 4.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5f869e6841cba98f0703192b7a3828655a825f886a202954cf0f9a5ca4cdf3d0
4
- data.tar.gz: 78b82b2944c4fa1cf20c9ef4b344268545a6c5655a7dbc7b1f207f15d9f89e4d
3
+ metadata.gz: bcfe1a1adde100fe8c917a5ed228c8d768db39889057af0bf416e00da3d1ad79
4
+ data.tar.gz: 96d8c90536ebf9af9ecd318a7f55bffcf25891e97be4f0f729479171cd2da1a3
5
5
  SHA512:
6
- metadata.gz: f3598a5dc110f741cdca4aea40f9627442e91dc32401fb8456b8e731dd20a98e8b2e0d6dfc70275644523cab20e0fee287730571c7b9081977f08dd406dae939
7
- data.tar.gz: 26f0d5727b6d5f82ca033e360f745b857dad99654ba111a75267e2054ce600e5584c96c2781e6a82ba62e09650d1652d9673aeec92e1f6a319f2e1a41494bcc1
6
+ metadata.gz: c9f184a85aeea502010499f881f16b2aa21c199f2f84dbe57b8e93641a31603a5a1a27afadcbbd805dc49e25a1361bfa91af1ef71e25ae30e0caf2a73d06bd67
7
+ data.tar.gz: 9fb4fe47265d8c23b5f093dc9d0ce05f5b5d81b501a7e70466cbaec7389719f18999a46981ed96c027580051ae06fd151f734bdf3f5d6ed0a9ee43bf8f010b8d
@@ -2,4 +2,24 @@
2
2
 
3
3
  require 'mkmf'
4
4
 
5
+ old_truffleruby = false
6
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'truffleruby' && defined?(RUBY_ENGINE_VERSION)
7
+ major_version = RUBY_ENGINE_VERSION.split('.').first.to_i
8
+ old_truffleruby = major_version < 24
9
+ end
10
+
11
+ unless old_truffleruby
12
+ case RbConfig::CONFIG['host_cpu']
13
+ when /x86_64|i[3-6]86/
14
+ $CFLAGS << ' -msse2'
15
+ $CFLAGS << ' -mavx2' if have_header('immintrin.h') && try_compile('#include <immintrin.h>')
16
+ when /aarch64|arm64/
17
+ # No special flags needed as NEON is enabled by default on ARM64
18
+ when /arm/
19
+ $CFLAGS << ' -mfpu=neon' if have_header('arm_neon.h') && try_compile('#include <arm_neon.h>')
20
+ end
21
+ end
22
+
23
+ $CFLAGS << ' -O3 -funroll-loops'
24
+
5
25
  create_makefile 'sin_fast_blank'
@@ -1,8 +1,34 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/encoding.h>
3
+ #include <stdbool.h>
4
+ #ifdef __SSE2__
5
+ #include <emmintrin.h>
6
+ #endif
7
+ #ifdef __AVX2__
8
+ #include <immintrin.h>
9
+ #endif
10
+ #ifdef __ARM_NEON
11
+ #include <arm_neon.h>
12
+ #endif
3
13
 
4
14
  #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
5
15
 
16
+ #define ASCII_BLANK_TAB 0x09
17
+ #define ASCII_BLANK_LF 0x0a
18
+ #define ASCII_BLANK_VT 0x0b
19
+ #define ASCII_BLANK_FF 0x0c
20
+ #define ASCII_BLANK_CR 0x0d
21
+ #define ASCII_BLANK_SPACE 0x20
22
+
23
+ static inline bool is_ascii_blank_char(unsigned char c) {
24
+ return c == ASCII_BLANK_SPACE ||
25
+ c == ASCII_BLANK_TAB ||
26
+ c == ASCII_BLANK_LF ||
27
+ c == ASCII_BLANK_VT ||
28
+ c == ASCII_BLANK_FF ||
29
+ c == ASCII_BLANK_CR;
30
+ }
31
+
6
32
  static inline int is_unicode_blank(unsigned int codepoint) {
7
33
  switch (codepoint) {
8
34
  case 0x9:
@@ -36,42 +62,264 @@ static inline int is_unicode_blank(unsigned int codepoint) {
36
62
  }
37
63
  }
38
64
 
65
+ #ifdef __AVX2__
66
+ static bool check_blank_avx2(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
67
+ const __m256i ascii_mask = _mm256_set1_epi8(0x80);
68
+ const __m256i space = _mm256_set1_epi8(ASCII_BLANK_SPACE);
69
+ const __m256i tab = _mm256_set1_epi8(ASCII_BLANK_TAB);
70
+ const __m256i lf = _mm256_set1_epi8(ASCII_BLANK_LF);
71
+ const __m256i vt = _mm256_set1_epi8(ASCII_BLANK_VT);
72
+ const __m256i ff = _mm256_set1_epi8(ASCII_BLANK_FF);
73
+ const __m256i cr = _mm256_set1_epi8(ASCII_BLANK_CR);
74
+
75
+ size_t i = 0;
76
+
77
+ for (; i + 31 < len; i += 32) {
78
+ __m256i chunk = _mm256_loadu_si256((const __m256i *)(ptr + i));
79
+
80
+ __m256i non_ascii = _mm256_and_si256(chunk, ascii_mask);
81
+ if (!_mm256_testz_si256(non_ascii, non_ascii)) {
82
+ for (size_t j = 0; j < 32; j++) {
83
+ if (ptr[i + j] >= 0x80) {
84
+ *non_ascii_pos = ptr + i + j;
85
+ return false;
86
+ }
87
+ }
88
+ }
89
+
90
+ __m256i is_space = _mm256_cmpeq_epi8(chunk, space);
91
+ __m256i is_tab = _mm256_cmpeq_epi8(chunk, tab);
92
+ __m256i is_lf = _mm256_cmpeq_epi8(chunk, lf);
93
+ __m256i is_vt = _mm256_cmpeq_epi8(chunk, vt);
94
+ __m256i is_ff = _mm256_cmpeq_epi8(chunk, ff);
95
+ __m256i is_cr = _mm256_cmpeq_epi8(chunk, cr);
96
+
97
+ __m256i is_blank = _mm256_or_si256(is_space, is_tab);
98
+ is_blank = _mm256_or_si256(is_blank, is_lf);
99
+ is_blank = _mm256_or_si256(is_blank, is_vt);
100
+ is_blank = _mm256_or_si256(is_blank, is_ff);
101
+ is_blank = _mm256_or_si256(is_blank, is_cr);
102
+
103
+ if (_mm256_movemask_epi8(is_blank) != -1) {
104
+ for (size_t j = 0; j < 32; j++) {
105
+ unsigned char c = ptr[i + j];
106
+ if (c >= 0x80) {
107
+ *non_ascii_pos = ptr + i + j;
108
+ return false;
109
+ }
110
+ if (!is_ascii_blank_char(c)) {
111
+ return false;
112
+ }
113
+ }
114
+ }
115
+ }
116
+
117
+ for (; i < len; i++) {
118
+ unsigned char c = ptr[i];
119
+ if (c >= 0x80) {
120
+ *non_ascii_pos = ptr + i;
121
+ return false;
122
+ }
123
+ if (!is_ascii_blank_char(c)) {
124
+ return false;
125
+ }
126
+ }
127
+
128
+ return true;
129
+ }
130
+ #endif
131
+
132
+ #ifdef __SSE2__
133
+ static bool check_blank_sse2(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
134
+ const __m128i ascii_mask = _mm_set1_epi8(0x80);
135
+ const __m128i space = _mm_set1_epi8(ASCII_BLANK_SPACE);
136
+ const __m128i tab = _mm_set1_epi8(ASCII_BLANK_TAB);
137
+ const __m128i lf = _mm_set1_epi8(ASCII_BLANK_LF);
138
+ const __m128i vt = _mm_set1_epi8(ASCII_BLANK_VT);
139
+ const __m128i ff = _mm_set1_epi8(ASCII_BLANK_FF);
140
+ const __m128i cr = _mm_set1_epi8(ASCII_BLANK_CR);
141
+
142
+ size_t i = 0;
143
+
144
+ for (; i + 15 < len; i += 16) {
145
+ __m128i chunk = _mm_loadu_si128((const __m128i *)(ptr + i));
146
+
147
+ __m128i non_ascii = _mm_and_si128(chunk, ascii_mask);
148
+ if (_mm_movemask_epi8(non_ascii) != 0) {
149
+ for (size_t j = 0; j < 16; j++) {
150
+ if (ptr[i + j] >= 0x80) {
151
+ *non_ascii_pos = ptr + i + j;
152
+ return false;
153
+ }
154
+ }
155
+ }
156
+
157
+ __m128i is_space = _mm_cmpeq_epi8(chunk, space);
158
+ __m128i is_tab = _mm_cmpeq_epi8(chunk, tab);
159
+ __m128i is_lf = _mm_cmpeq_epi8(chunk, lf);
160
+ __m128i is_vt = _mm_cmpeq_epi8(chunk, vt);
161
+ __m128i is_ff = _mm_cmpeq_epi8(chunk, ff);
162
+ __m128i is_cr = _mm_cmpeq_epi8(chunk, cr);
163
+
164
+ __m128i is_blank = _mm_or_si128(is_space, is_tab);
165
+ is_blank = _mm_or_si128(is_blank, is_lf);
166
+ is_blank = _mm_or_si128(is_blank, is_vt);
167
+ is_blank = _mm_or_si128(is_blank, is_ff);
168
+ is_blank = _mm_or_si128(is_blank, is_cr);
169
+
170
+ if (_mm_movemask_epi8(is_blank) != 0xFFFF) {
171
+ for (size_t j = 0; j < 16; j++) {
172
+ unsigned char c = ptr[i + j];
173
+ if (c >= 0x80) {
174
+ *non_ascii_pos = ptr + i + j;
175
+ return false;
176
+ }
177
+ if (!is_ascii_blank_char(c)) {
178
+ return false;
179
+ }
180
+ }
181
+ }
182
+ }
183
+
184
+ for (; i < len; i++) {
185
+ unsigned char c = ptr[i];
186
+ if (c >= 0x80) {
187
+ *non_ascii_pos = ptr + i;
188
+ return false;
189
+ }
190
+ if (!is_ascii_blank_char(c)) {
191
+ return false;
192
+ }
193
+ }
194
+
195
+ return true;
196
+ }
197
+ #endif
198
+
199
+ #ifdef __ARM_NEON
200
+ static bool check_blank_neon(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
201
+ const uint8x16_t ascii_mask = vdupq_n_u8(0x80);
202
+ const uint8x16_t space = vdupq_n_u8(ASCII_BLANK_SPACE);
203
+ const uint8x16_t tab = vdupq_n_u8(ASCII_BLANK_TAB);
204
+ const uint8x16_t lf = vdupq_n_u8(ASCII_BLANK_LF);
205
+ const uint8x16_t vt = vdupq_n_u8(ASCII_BLANK_VT);
206
+ const uint8x16_t ff = vdupq_n_u8(ASCII_BLANK_FF);
207
+ const uint8x16_t cr = vdupq_n_u8(ASCII_BLANK_CR);
208
+
209
+ size_t i = 0;
210
+
211
+ for (; i + 15 < len; i += 16) {
212
+ uint8x16_t chunk = vld1q_u8(ptr + i);
213
+
214
+ uint8x16_t non_ascii = vandq_u8(chunk, ascii_mask);
215
+ uint8x16_t has_non_ascii = vceqq_u8(non_ascii, ascii_mask);
216
+
217
+ if (vmaxvq_u8(has_non_ascii) != 0) {
218
+ for (size_t j = 0; j < 16; j++) {
219
+ if (ptr[i + j] >= 0x80) {
220
+ *non_ascii_pos = ptr + i + j;
221
+ return false;
222
+ }
223
+ }
224
+ }
225
+
226
+ uint8x16_t is_space = vceqq_u8(chunk, space);
227
+ uint8x16_t is_tab = vceqq_u8(chunk, tab);
228
+ uint8x16_t is_lf = vceqq_u8(chunk, lf);
229
+ uint8x16_t is_vt = vceqq_u8(chunk, vt);
230
+ uint8x16_t is_ff = vceqq_u8(chunk, ff);
231
+ uint8x16_t is_cr = vceqq_u8(chunk, cr);
232
+
233
+ uint8x16_t is_blank = vorrq_u8(is_space, is_tab);
234
+ is_blank = vorrq_u8(is_blank, is_lf);
235
+ is_blank = vorrq_u8(is_blank, is_vt);
236
+ is_blank = vorrq_u8(is_blank, is_ff);
237
+ is_blank = vorrq_u8(is_blank, is_cr);
238
+
239
+ if (vminvq_u8(is_blank) == 0) {
240
+ for (size_t j = 0; j < 16; j++) {
241
+ unsigned char c = ptr[i + j];
242
+ if (c >= 0x80) {
243
+ *non_ascii_pos = ptr + i + j;
244
+ return false;
245
+ }
246
+ if (!is_ascii_blank_char(c)) {
247
+ return false;
248
+ }
249
+ }
250
+ }
251
+ }
252
+
253
+ for (; i < len; i++) {
254
+ unsigned char c = ptr[i];
255
+ if (c >= 0x80) {
256
+ *non_ascii_pos = ptr + i;
257
+ return false;
258
+ }
259
+ if (!is_ascii_blank_char(c)) {
260
+ return false;
261
+ }
262
+ }
263
+
264
+ return true;
265
+ }
266
+ #endif
267
+
268
+ static bool check_blank_scalar(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
269
+ for (size_t i = 0; i < len; i++) {
270
+ unsigned char c = ptr[i];
271
+
272
+ if (c >= 0x80) {
273
+ *non_ascii_pos = ptr + i;
274
+ return false;
275
+ }
276
+
277
+ if (!is_ascii_blank_char(c)) {
278
+ return false;
279
+ }
280
+ }
281
+
282
+ return true;
283
+ }
284
+
39
285
  static VALUE rb_str_blank(VALUE str) {
40
286
  long len = RSTRING_LEN(str);
41
287
  if (len == 0) {
42
288
  return Qtrue;
43
289
  }
44
290
 
45
- const char *ptr = RSTRING_PTR(str);
46
- const char *end = ptr + len;
291
+ const unsigned char *ptr = (const unsigned char *)RSTRING_PTR(str);
292
+ const unsigned char *end = ptr + len;
47
293
  rb_encoding *enc = STR_ENC_GET(str);
48
294
 
49
295
  if (rb_enc_asciicompat(enc)) {
50
- for (const unsigned char *p = (const unsigned char *)ptr; p < (const unsigned char *)end; p++) {
51
- if (*p >= 0x80) {
52
- goto FULL_CHECK;
53
- }
296
+ const unsigned char *non_ascii_pos = NULL;
297
+ bool is_blank = false;
54
298
 
55
- switch (*p) {
56
- case 0x9:
57
- case 0xa:
58
- case 0xb:
59
- case 0xc:
60
- case 0xd:
61
- case 0x20:
62
- break;
63
- default:
64
- return Qfalse;
65
- }
299
+ #ifdef __AVX2__
300
+ is_blank = check_blank_avx2(ptr, len, &non_ascii_pos);
301
+ #elif defined(__SSE2__)
302
+ is_blank = check_blank_sse2(ptr, len, &non_ascii_pos);
303
+ #elif defined(__ARM_NEON)
304
+ is_blank = check_blank_neon(ptr, len, &non_ascii_pos);
305
+ #else
306
+ is_blank = check_blank_scalar(ptr, len, &non_ascii_pos);
307
+ #endif
308
+
309
+ if (is_blank) {
310
+ return Qtrue;
66
311
  }
67
312
 
68
- return Qtrue;
313
+ if (non_ascii_pos == NULL) {
314
+ return Qfalse;
315
+ }
316
+
317
+ ptr = (const unsigned char *)non_ascii_pos;
69
318
  }
70
319
 
71
- FULL_CHECK:;
72
- while (ptr < end) {
320
+ while ((const char *)ptr < (const char *)end) {
73
321
  int clen;
74
- unsigned int codepoint = rb_enc_codepoint_len(ptr, end, &clen, enc);
322
+ unsigned int codepoint = rb_enc_codepoint_len((const char *)ptr, (const char *)end, &clen, enc);
75
323
 
76
324
  if (!is_unicode_blank(codepoint)) {
77
325
  return Qfalse;
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sin_fast_blank
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 4.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masahiro
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-03-18 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: Check for blank string faster than FastBlank or ActiveSupport
13
13
  email:
@@ -19,15 +19,15 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - ext/sin_fast_blank/extconf.rb
21
21
  - ext/sin_fast_blank/sin_fast_blank.c
22
- homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v3.1.1
22
+ homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
23
23
  licenses:
24
24
  - MIT
25
25
  metadata:
26
- homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v3.1.1
27
- source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v3.1.1
28
- changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v3.1.1/CHANGELOG.md
26
+ homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
27
+ source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
28
+ changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v4.0.0/CHANGELOG.md
29
29
  bug_tracker_uri: https://github.com/cadenza-tech/sin_fast_blank/issues
30
- documentation_uri: https://rubydoc.info/gems/sin_fast_blank/3.1.1
30
+ documentation_uri: https://rubydoc.info/gems/sin_fast_blank/4.0.0
31
31
  funding_uri: https://patreon.com/CadenzaTech
32
32
  rubygems_mfa_required: 'true'
33
33
  required_jruby_version: ">= 9.3.0.0"
@@ -47,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
47
47
  - !ruby/object:Gem::Version
48
48
  version: '0'
49
49
  requirements: []
50
- rubygems_version: 3.6.2
50
+ rubygems_version: 3.6.9
51
51
  specification_version: 4
52
52
  summary: Check for blank string faster than FastBlank or ActiveSupport
53
53
  test_files: []