sin_fast_blank 3.1.1 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 5f869e6841cba98f0703192b7a3828655a825f886a202954cf0f9a5ca4cdf3d0
4
- data.tar.gz: 78b82b2944c4fa1cf20c9ef4b344268545a6c5655a7dbc7b1f207f15d9f89e4d
3
+ metadata.gz: 715b972c44a78f3a18dcee12c9133c5b38acce03a76fee4031df21d3c5ec8b2f
4
+ data.tar.gz: bce4b9d3bd8058ab567c620f0cd684f817f4193b20271e481c25a0560954631b
5
5
  SHA512:
6
- metadata.gz: f3598a5dc110f741cdca4aea40f9627442e91dc32401fb8456b8e731dd20a98e8b2e0d6dfc70275644523cab20e0fee287730571c7b9081977f08dd406dae939
7
- data.tar.gz: 26f0d5727b6d5f82ca033e360f745b857dad99654ba111a75267e2054ce600e5584c96c2781e6a82ba62e09650d1652d9673aeec92e1f6a319f2e1a41494bcc1
6
+ metadata.gz: c8643cd88797cb3d5824536ddfc7f60b4f7bf5718a20a3464b9231720c50e0558fa5234c1f23f0adcb1a8b31664d3a4bb25c3209545619ea0a00e1f5e89408bf
7
+ data.tar.gz: 14d825f306f16680badaab6c8a1153ef817f889f73ad45169ec6069abf67f25358ed4324c4184c742760cb198cd2066ad32f249fd71e67ddbeafbd755ee2c88f
@@ -2,4 +2,24 @@
2
2
 
3
3
  require 'mkmf'
4
4
 
5
+ old_truffleruby = false
6
+ if defined?(RUBY_ENGINE) && RUBY_ENGINE == 'truffleruby' && defined?(RUBY_ENGINE_VERSION)
7
+ major_version = RUBY_ENGINE_VERSION.split('.').first.to_i
8
+ old_truffleruby = major_version < 24
9
+ end
10
+
11
+ unless old_truffleruby
12
+ case RbConfig::CONFIG['host_cpu']
13
+ when /x86_64|i[3-6]86/
14
+ $CFLAGS << ' -msse2'
15
+ $CFLAGS << ' -mavx2' if have_header('immintrin.h') && try_compile('#include <immintrin.h>')
16
+ when /aarch64|arm64/
17
+ # No special flags needed as NEON is enabled by default on ARM64
18
+ when /arm/
19
+ $CFLAGS << ' -mfpu=neon' if have_header('arm_neon.h') && try_compile('#include <arm_neon.h>')
20
+ end
21
+ end
22
+
23
+ $CFLAGS << ' -O3 -funroll-loops'
24
+
5
25
  create_makefile 'sin_fast_blank'
@@ -1,9 +1,29 @@
1
1
  #include <ruby.h>
2
2
  #include <ruby/encoding.h>
3
+ #include <stdbool.h>
4
+ #ifdef __SSE2__
5
+ #include <emmintrin.h>
6
+ #endif
7
+ #ifdef __AVX2__
8
+ #include <immintrin.h>
9
+ #endif
10
+ #if defined(__ARM_NEON) && defined(__aarch64__)
11
+ #include <arm_neon.h>
12
+ #endif
3
13
 
4
14
  #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
5
15
 
6
- static inline int is_unicode_blank(unsigned int codepoint) {
16
+ #define ASCII_WS_RANGE_MIN 0x09
17
+ #define ASCII_WS_RANGE_MAX 0x0d
18
+ #define ASCII_WS_SPACE 0x20
19
+
20
+ static inline bool is_ascii_blank_char(unsigned char c) { return (c >= ASCII_WS_RANGE_MIN && c <= ASCII_WS_RANGE_MAX) || c == ASCII_WS_SPACE; }
21
+
22
+ static inline bool is_ascii_blank_or_null_char(unsigned char c) {
23
+ return c == 0x00 || (c >= ASCII_WS_RANGE_MIN && c <= ASCII_WS_RANGE_MAX) || c == ASCII_WS_SPACE;
24
+ }
25
+
26
+ static inline bool is_unicode_blank(unsigned int codepoint) {
7
27
  switch (codepoint) {
8
28
  case 0x9:
9
29
  case 0xa:
@@ -30,53 +50,245 @@ static inline int is_unicode_blank(unsigned int codepoint) {
30
50
  case 0x202f:
31
51
  case 0x205f:
32
52
  case 0x3000:
33
- return 1;
53
+ return true;
34
54
  default:
35
- return 0;
55
+ return false;
36
56
  }
37
57
  }
38
58
 
39
- static VALUE rb_str_blank(VALUE str) {
40
- long len = RSTRING_LEN(str);
41
- if (len == 0) {
42
- return Qtrue;
59
+ /* Returns true if all blank. On false, sets *non_ascii_pos if non-ASCII found. NULL if non-blank ASCII found. */
60
+ static inline bool scan_ascii_blank(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
61
+ for (size_t i = 0; i < len; i++) {
62
+ unsigned char c = ptr[i];
63
+ if (c >= 0x80) {
64
+ *non_ascii_pos = ptr + i;
65
+ return false;
66
+ }
67
+ if (!is_ascii_blank_char(c)) {
68
+ return false;
69
+ }
70
+ }
71
+ return true;
72
+ }
73
+
74
+ static inline bool scan_ascii_blank_or_null(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
75
+ for (size_t i = 0; i < len; i++) {
76
+ unsigned char c = ptr[i];
77
+ if (c >= 0x80) {
78
+ *non_ascii_pos = ptr + i;
79
+ return false;
80
+ }
81
+ if (!is_ascii_blank_or_null_char(c)) {
82
+ return false;
83
+ }
43
84
  }
85
+ return true;
86
+ }
44
87
 
45
- const char *ptr = RSTRING_PTR(str);
46
- const char *end = ptr + len;
47
- rb_encoding *enc = STR_ENC_GET(str);
88
+ #ifdef __AVX2__
89
+ static bool check_blank_avx2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
90
+ const __m256i ws_base = _mm256_set1_epi8(ASCII_WS_RANGE_MIN);
91
+ const __m256i four = _mm256_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
92
+ const __m256i space = _mm256_set1_epi8(ASCII_WS_SPACE);
48
93
 
49
- if (rb_enc_asciicompat(enc)) {
50
- for (const unsigned char *p = (const unsigned char *)ptr; p < (const unsigned char *)end; p++) {
51
- if (*p >= 0x80) {
52
- goto FULL_CHECK;
94
+ size_t i = 0;
95
+ for (; i + 31 < len; i += 32) {
96
+ __m256i chunk = _mm256_loadu_si256((const __m256i*)(ptr + i));
97
+ __m256i adjusted = _mm256_sub_epi8(chunk, ws_base);
98
+ __m256i in_range = _mm256_cmpeq_epi8(_mm256_min_epu8(adjusted, four), adjusted);
99
+ __m256i is_sp = _mm256_cmpeq_epi8(chunk, space);
100
+ __m256i is_blank = _mm256_or_si256(in_range, is_sp);
101
+
102
+ int mask = _mm256_movemask_epi8(is_blank);
103
+ if (mask != -1) {
104
+ int first = __builtin_ctz(~mask);
105
+ unsigned char c = ptr[i + first];
106
+ if (c >= 0x80) {
107
+ *non_ascii_pos = ptr + i + first;
53
108
  }
109
+ return false;
110
+ }
111
+ }
112
+
113
+ return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
114
+ }
115
+
116
+ static bool check_ascii_blank_avx2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
117
+ const __m256i ws_base = _mm256_set1_epi8(ASCII_WS_RANGE_MIN);
118
+ const __m256i four = _mm256_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
119
+ const __m256i space = _mm256_set1_epi8(ASCII_WS_SPACE);
120
+ const __m256i zero = _mm256_setzero_si256();
121
+
122
+ size_t i = 0;
123
+ for (; i + 31 < len; i += 32) {
124
+ __m256i chunk = _mm256_loadu_si256((const __m256i*)(ptr + i));
125
+ __m256i adjusted = _mm256_sub_epi8(chunk, ws_base);
126
+ __m256i in_range = _mm256_cmpeq_epi8(_mm256_min_epu8(adjusted, four), adjusted);
127
+ __m256i is_sp = _mm256_cmpeq_epi8(chunk, space);
128
+ __m256i is_null = _mm256_cmpeq_epi8(chunk, zero);
129
+ __m256i is_blank = _mm256_or_si256(_mm256_or_si256(in_range, is_sp), is_null);
54
130
 
55
- switch (*p) {
56
- case 0x9:
57
- case 0xa:
58
- case 0xb:
59
- case 0xc:
60
- case 0xd:
61
- case 0x20:
62
- break;
63
- default:
64
- return Qfalse;
131
+ int mask = _mm256_movemask_epi8(is_blank);
132
+ if (mask != -1) {
133
+ int first = __builtin_ctz(~mask);
134
+ unsigned char c = ptr[i + first];
135
+ if (c >= 0x80) {
136
+ *non_ascii_pos = ptr + i + first;
65
137
  }
138
+ return false;
66
139
  }
140
+ }
67
141
 
68
- return Qtrue;
142
+ return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
143
+ }
144
+ #endif
145
+
146
+ #ifdef __SSE2__
147
+ static bool check_blank_sse2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
148
+ const __m128i ws_base = _mm_set1_epi8(ASCII_WS_RANGE_MIN);
149
+ const __m128i four = _mm_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
150
+ const __m128i space = _mm_set1_epi8(ASCII_WS_SPACE);
151
+
152
+ size_t i = 0;
153
+ for (; i + 15 < len; i += 16) {
154
+ __m128i chunk = _mm_loadu_si128((const __m128i*)(ptr + i));
155
+ __m128i adjusted = _mm_sub_epi8(chunk, ws_base);
156
+ __m128i in_range = _mm_cmpeq_epi8(_mm_min_epu8(adjusted, four), adjusted);
157
+ __m128i is_sp = _mm_cmpeq_epi8(chunk, space);
158
+ __m128i is_blank = _mm_or_si128(in_range, is_sp);
159
+
160
+ int mask = _mm_movemask_epi8(is_blank);
161
+ if (mask != 0xFFFF) {
162
+ int first = __builtin_ctz(~mask & 0xFFFF);
163
+ unsigned char c = ptr[i + first];
164
+ if (c >= 0x80) {
165
+ *non_ascii_pos = ptr + i + first;
166
+ }
167
+ return false;
168
+ }
69
169
  }
70
170
 
71
- FULL_CHECK:;
72
- while (ptr < end) {
73
- int clen;
74
- unsigned int codepoint = rb_enc_codepoint_len(ptr, end, &clen, enc);
171
+ return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
172
+ }
173
+
174
+ static bool check_ascii_blank_sse2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
175
+ const __m128i ws_base = _mm_set1_epi8(ASCII_WS_RANGE_MIN);
176
+ const __m128i four = _mm_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
177
+ const __m128i space = _mm_set1_epi8(ASCII_WS_SPACE);
178
+ const __m128i zero = _mm_setzero_si128();
179
+
180
+ size_t i = 0;
181
+ for (; i + 15 < len; i += 16) {
182
+ __m128i chunk = _mm_loadu_si128((const __m128i*)(ptr + i));
183
+ __m128i adjusted = _mm_sub_epi8(chunk, ws_base);
184
+ __m128i in_range = _mm_cmpeq_epi8(_mm_min_epu8(adjusted, four), adjusted);
185
+ __m128i is_sp = _mm_cmpeq_epi8(chunk, space);
186
+ __m128i is_null = _mm_cmpeq_epi8(chunk, zero);
187
+ __m128i is_blank = _mm_or_si128(_mm_or_si128(in_range, is_sp), is_null);
188
+
189
+ int mask = _mm_movemask_epi8(is_blank);
190
+ if (mask != 0xFFFF) {
191
+ int first = __builtin_ctz(~mask & 0xFFFF);
192
+ unsigned char c = ptr[i + first];
193
+ if (c >= 0x80) {
194
+ *non_ascii_pos = ptr + i + first;
195
+ }
196
+ return false;
197
+ }
198
+ }
199
+
200
+ return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
201
+ }
202
+ #endif
203
+
204
+ #if defined(__ARM_NEON) && defined(__aarch64__)
205
+ static bool check_blank_neon(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
206
+ const uint8x16_t ws_base = vdupq_n_u8(ASCII_WS_RANGE_MIN);
207
+ const uint8x16_t four = vdupq_n_u8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
208
+ const uint8x16_t space = vdupq_n_u8(ASCII_WS_SPACE);
209
+
210
+ size_t i = 0;
211
+ for (; i + 15 < len; i += 16) {
212
+ uint8x16_t chunk = vld1q_u8(ptr + i);
213
+ uint8x16_t adjusted = vsubq_u8(chunk, ws_base);
214
+ uint8x16_t in_range = vceqq_u8(vminq_u8(adjusted, four), adjusted);
215
+ uint8x16_t is_sp = vceqq_u8(chunk, space);
216
+ uint8x16_t is_blank = vorrq_u8(in_range, is_sp);
217
+
218
+ if (vminvq_u8(is_blank) == 0) {
219
+ if (!scan_ascii_blank(ptr + i, 16, non_ascii_pos)) return false;
220
+ }
221
+ }
222
+
223
+ return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
224
+ }
225
+
226
+ static bool check_ascii_blank_neon(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
227
+ const uint8x16_t ws_base = vdupq_n_u8(ASCII_WS_RANGE_MIN);
228
+ const uint8x16_t four = vdupq_n_u8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
229
+ const uint8x16_t space = vdupq_n_u8(ASCII_WS_SPACE);
230
+ const uint8x16_t zero = vdupq_n_u8(0);
231
+
232
+ size_t i = 0;
233
+ for (; i + 15 < len; i += 16) {
234
+ uint8x16_t chunk = vld1q_u8(ptr + i);
235
+ uint8x16_t adjusted = vsubq_u8(chunk, ws_base);
236
+ uint8x16_t in_range = vceqq_u8(vminq_u8(adjusted, four), adjusted);
237
+ uint8x16_t is_sp = vceqq_u8(chunk, space);
238
+ uint8x16_t is_null = vceqq_u8(chunk, zero);
239
+ uint8x16_t is_blank = vorrq_u8(vorrq_u8(in_range, is_sp), is_null);
75
240
 
76
- if (!is_unicode_blank(codepoint)) {
77
- return Qfalse;
241
+ if (vminvq_u8(is_blank) == 0) {
242
+ if (!scan_ascii_blank_or_null(ptr + i, 16, non_ascii_pos)) return false;
78
243
  }
244
+ }
245
+
246
+ return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
247
+ }
248
+ #endif
249
+
250
+ #if !defined(__AVX2__) && !defined(__SSE2__) && !(defined(__ARM_NEON) && defined(__aarch64__))
251
+ static bool check_blank_scalar(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
252
+ return scan_ascii_blank(ptr, len, non_ascii_pos);
253
+ }
254
+
255
+ static bool check_ascii_blank_scalar(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
256
+ return scan_ascii_blank_or_null(ptr, len, non_ascii_pos);
257
+ }
258
+ #endif
79
259
 
260
+ static VALUE rb_str_blank(VALUE str) {
261
+ long len = RSTRING_LEN(str);
262
+ if (len == 0) return Qtrue;
263
+
264
+ const unsigned char* ptr = (const unsigned char*)RSTRING_PTR(str);
265
+ const unsigned char* end = ptr + len;
266
+ rb_encoding* enc = STR_ENC_GET(str);
267
+
268
+ if (rb_enc_asciicompat(enc)) {
269
+ const unsigned char* non_ascii_pos = NULL;
270
+ bool is_blank = false;
271
+
272
+ #ifdef __AVX2__
273
+ is_blank = check_blank_avx2(ptr, (size_t)len, &non_ascii_pos);
274
+ #elif defined(__SSE2__)
275
+ is_blank = check_blank_sse2(ptr, (size_t)len, &non_ascii_pos);
276
+ #elif defined(__ARM_NEON) && defined(__aarch64__)
277
+ is_blank = check_blank_neon(ptr, (size_t)len, &non_ascii_pos);
278
+ #else
279
+ is_blank = check_blank_scalar(ptr, (size_t)len, &non_ascii_pos);
280
+ #endif
281
+
282
+ if (is_blank) return Qtrue;
283
+ if (non_ascii_pos == NULL) return Qfalse;
284
+
285
+ ptr = non_ascii_pos;
286
+ }
287
+
288
+ while (ptr < end) {
289
+ int clen;
290
+ unsigned int codepoint = rb_enc_codepoint_len((const char*)ptr, (const char*)end, &clen, enc);
291
+ if (!is_unicode_blank(codepoint)) return Qfalse;
80
292
  ptr += clen;
81
293
  }
82
294
 
@@ -85,39 +297,36 @@ FULL_CHECK:;
85
297
 
86
298
  static VALUE rb_str_ascii_blank(VALUE str) {
87
299
  long len = RSTRING_LEN(str);
88
- if (len == 0) {
89
- return Qtrue;
90
- }
300
+ if (len == 0) return Qtrue;
91
301
 
92
- const char *ptr = RSTRING_PTR(str);
93
- const char *end = ptr + len;
94
- rb_encoding *enc = STR_ENC_GET(str);
302
+ const unsigned char* ptr = (const unsigned char*)RSTRING_PTR(str);
303
+ const unsigned char* end = ptr + len;
304
+ rb_encoding* enc = STR_ENC_GET(str);
95
305
 
96
306
  if (rb_enc_asciicompat(enc)) {
97
- for (; ptr < end; ptr++) {
98
- unsigned char c = (unsigned char)*ptr;
307
+ const unsigned char* non_ascii_pos = NULL;
308
+ bool is_blank = false;
99
309
 
100
- if (c >= 0x80) {
101
- goto FULL_CHECK;
102
- }
310
+ #ifdef __AVX2__
311
+ is_blank = check_ascii_blank_avx2(ptr, (size_t)len, &non_ascii_pos);
312
+ #elif defined(__SSE2__)
313
+ is_blank = check_ascii_blank_sse2(ptr, (size_t)len, &non_ascii_pos);
314
+ #elif defined(__ARM_NEON) && defined(__aarch64__)
315
+ is_blank = check_ascii_blank_neon(ptr, (size_t)len, &non_ascii_pos);
316
+ #else
317
+ is_blank = check_ascii_blank_scalar(ptr, (size_t)len, &non_ascii_pos);
318
+ #endif
103
319
 
104
- if (!rb_isspace(c) && c != 0) {
105
- return Qfalse;
106
- }
107
- }
320
+ if (is_blank) return Qtrue;
321
+ if (non_ascii_pos == NULL) return Qfalse;
108
322
 
109
- return Qtrue;
323
+ ptr = non_ascii_pos;
110
324
  }
111
325
 
112
- FULL_CHECK:;
113
326
  while (ptr < end) {
114
327
  int clen;
115
- unsigned int codepoint = rb_enc_codepoint_len(ptr, end, &clen, enc);
116
-
117
- if (codepoint != 0 && !rb_isspace(codepoint)) {
118
- return Qfalse;
119
- }
120
-
328
+ unsigned int codepoint = rb_enc_codepoint_len((const char*)ptr, (const char*)end, &clen, enc);
329
+ if (codepoint != 0 && !rb_isspace(codepoint)) return Qfalse;
121
330
  ptr += clen;
122
331
  }
123
332
 
metadata CHANGED
@@ -1,13 +1,13 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sin_fast_blank
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.1
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masahiro
8
8
  bindir: exe
9
9
  cert_chain: []
10
- date: 2025-03-18 00:00:00.000000000 Z
10
+ date: 1980-01-02 00:00:00.000000000 Z
11
11
  dependencies: []
12
12
  description: Check for blank string faster than FastBlank or ActiveSupport
13
13
  email:
@@ -19,15 +19,15 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - ext/sin_fast_blank/extconf.rb
21
21
  - ext/sin_fast_blank/sin_fast_blank.c
22
- homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v3.1.1
22
+ homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
23
23
  licenses:
24
24
  - MIT
25
25
  metadata:
26
- homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v3.1.1
27
- source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v3.1.1
28
- changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v3.1.1/CHANGELOG.md
26
+ homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
27
+ source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
28
+ changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v4.0.1/CHANGELOG.md
29
29
  bug_tracker_uri: https://github.com/cadenza-tech/sin_fast_blank/issues
30
- documentation_uri: https://rubydoc.info/gems/sin_fast_blank/3.1.1
30
+ documentation_uri: https://rubydoc.info/gems/sin_fast_blank/4.0.1
31
31
  funding_uri: https://patreon.com/CadenzaTech
32
32
  rubygems_mfa_required: 'true'
33
33
  required_jruby_version: ">= 9.3.0.0"
@@ -47,7 +47,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
47
47
  - !ruby/object:Gem::Version
48
48
  version: '0'
49
49
  requirements: []
50
- rubygems_version: 3.6.2
50
+ rubygems_version: 3.6.9
51
51
  specification_version: 4
52
52
  summary: Check for blank string faster than FastBlank or ActiveSupport
53
53
  test_files: []