sin_fast_blank 4.0.0 → 4.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bcfe1a1adde100fe8c917a5ed228c8d768db39889057af0bf416e00da3d1ad79
4
- data.tar.gz: 96d8c90536ebf9af9ecd318a7f55bffcf25891e97be4f0f729479171cd2da1a3
3
+ metadata.gz: 715b972c44a78f3a18dcee12c9133c5b38acce03a76fee4031df21d3c5ec8b2f
4
+ data.tar.gz: bce4b9d3bd8058ab567c620f0cd684f817f4193b20271e481c25a0560954631b
5
5
  SHA512:
6
- metadata.gz: c9f184a85aeea502010499f881f16b2aa21c199f2f84dbe57b8e93641a31603a5a1a27afadcbbd805dc49e25a1361bfa91af1ef71e25ae30e0caf2a73d06bd67
7
- data.tar.gz: 9fb4fe47265d8c23b5f093dc9d0ce05f5b5d81b501a7e70466cbaec7389719f18999a46981ed96c027580051ae06fd151f734bdf3f5d6ed0a9ee43bf8f010b8d
6
+ metadata.gz: c8643cd88797cb3d5824536ddfc7f60b4f7bf5718a20a3464b9231720c50e0558fa5234c1f23f0adcb1a8b31664d3a4bb25c3209545619ea0a00e1f5e89408bf
7
+ data.tar.gz: 14d825f306f16680badaab6c8a1153ef817f889f73ad45169ec6069abf67f25358ed4324c4184c742760cb198cd2066ad32f249fd71e67ddbeafbd755ee2c88f
@@ -2,34 +2,28 @@
2
2
  #include <ruby/encoding.h>
3
3
  #include <stdbool.h>
4
4
  #ifdef __SSE2__
5
- #include <emmintrin.h>
5
+ #include <emmintrin.h>
6
6
  #endif
7
7
  #ifdef __AVX2__
8
- #include <immintrin.h>
8
+ #include <immintrin.h>
9
9
  #endif
10
- #ifdef __ARM_NEON
11
- #include <arm_neon.h>
10
+ #if defined(__ARM_NEON) && defined(__aarch64__)
11
+ #include <arm_neon.h>
12
12
  #endif
13
13
 
14
14
  #define STR_ENC_GET(str) rb_enc_from_index(ENCODING_GET(str))
15
15
 
16
- #define ASCII_BLANK_TAB 0x09
17
- #define ASCII_BLANK_LF 0x0a
18
- #define ASCII_BLANK_VT 0x0b
19
- #define ASCII_BLANK_FF 0x0c
20
- #define ASCII_BLANK_CR 0x0d
21
- #define ASCII_BLANK_SPACE 0x20
22
-
23
- static inline bool is_ascii_blank_char(unsigned char c) {
24
- return c == ASCII_BLANK_SPACE ||
25
- c == ASCII_BLANK_TAB ||
26
- c == ASCII_BLANK_LF ||
27
- c == ASCII_BLANK_VT ||
28
- c == ASCII_BLANK_FF ||
29
- c == ASCII_BLANK_CR;
16
+ #define ASCII_WS_RANGE_MIN 0x09
17
+ #define ASCII_WS_RANGE_MAX 0x0d
18
+ #define ASCII_WS_SPACE 0x20
19
+
20
+ static inline bool is_ascii_blank_char(unsigned char c) { return (c >= ASCII_WS_RANGE_MIN && c <= ASCII_WS_RANGE_MAX) || c == ASCII_WS_SPACE; }
21
+
22
+ static inline bool is_ascii_blank_or_null_char(unsigned char c) {
23
+ return c == 0x00 || (c >= ASCII_WS_RANGE_MIN && c <= ASCII_WS_RANGE_MAX) || c == ASCII_WS_SPACE;
30
24
  }
31
25
 
32
- static inline int is_unicode_blank(unsigned int codepoint) {
26
+ static inline bool is_unicode_blank(unsigned int codepoint) {
33
27
  switch (codepoint) {
34
28
  case 0x9:
35
29
  case 0xa:
@@ -56,65 +50,15 @@ static inline int is_unicode_blank(unsigned int codepoint) {
56
50
  case 0x202f:
57
51
  case 0x205f:
58
52
  case 0x3000:
59
- return 1;
53
+ return true;
60
54
  default:
61
- return 0;
55
+ return false;
62
56
  }
63
57
  }
64
58
 
65
- #ifdef __AVX2__
66
- static bool check_blank_avx2(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
67
- const __m256i ascii_mask = _mm256_set1_epi8(0x80);
68
- const __m256i space = _mm256_set1_epi8(ASCII_BLANK_SPACE);
69
- const __m256i tab = _mm256_set1_epi8(ASCII_BLANK_TAB);
70
- const __m256i lf = _mm256_set1_epi8(ASCII_BLANK_LF);
71
- const __m256i vt = _mm256_set1_epi8(ASCII_BLANK_VT);
72
- const __m256i ff = _mm256_set1_epi8(ASCII_BLANK_FF);
73
- const __m256i cr = _mm256_set1_epi8(ASCII_BLANK_CR);
74
-
75
- size_t i = 0;
76
-
77
- for (; i + 31 < len; i += 32) {
78
- __m256i chunk = _mm256_loadu_si256((const __m256i *)(ptr + i));
79
-
80
- __m256i non_ascii = _mm256_and_si256(chunk, ascii_mask);
81
- if (!_mm256_testz_si256(non_ascii, non_ascii)) {
82
- for (size_t j = 0; j < 32; j++) {
83
- if (ptr[i + j] >= 0x80) {
84
- *non_ascii_pos = ptr + i + j;
85
- return false;
86
- }
87
- }
88
- }
89
-
90
- __m256i is_space = _mm256_cmpeq_epi8(chunk, space);
91
- __m256i is_tab = _mm256_cmpeq_epi8(chunk, tab);
92
- __m256i is_lf = _mm256_cmpeq_epi8(chunk, lf);
93
- __m256i is_vt = _mm256_cmpeq_epi8(chunk, vt);
94
- __m256i is_ff = _mm256_cmpeq_epi8(chunk, ff);
95
- __m256i is_cr = _mm256_cmpeq_epi8(chunk, cr);
96
-
97
- __m256i is_blank = _mm256_or_si256(is_space, is_tab);
98
- is_blank = _mm256_or_si256(is_blank, is_lf);
99
- is_blank = _mm256_or_si256(is_blank, is_vt);
100
- is_blank = _mm256_or_si256(is_blank, is_ff);
101
- is_blank = _mm256_or_si256(is_blank, is_cr);
102
-
103
- if (_mm256_movemask_epi8(is_blank) != -1) {
104
- for (size_t j = 0; j < 32; j++) {
105
- unsigned char c = ptr[i + j];
106
- if (c >= 0x80) {
107
- *non_ascii_pos = ptr + i + j;
108
- return false;
109
- }
110
- if (!is_ascii_blank_char(c)) {
111
- return false;
112
- }
113
- }
114
- }
115
- }
116
-
117
- for (; i < len; i++) {
59
+ /* Returns true if all blank. On false, sets *non_ascii_pos if non-ASCII found. NULL if non-blank ASCII found. */
60
+ static inline bool scan_ascii_blank(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
61
+ for (size_t i = 0; i < len; i++) {
118
62
  unsigned char c = ptr[i];
119
63
  if (c >= 0x80) {
120
64
  *non_ascii_pos = ptr + i;
@@ -124,207 +68,227 @@ static bool check_blank_avx2(const unsigned char *ptr, size_t len, const unsigne
124
68
  return false;
125
69
  }
126
70
  }
127
-
128
71
  return true;
129
72
  }
130
- #endif
131
-
132
- #ifdef __SSE2__
133
- static bool check_blank_sse2(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
134
- const __m128i ascii_mask = _mm_set1_epi8(0x80);
135
- const __m128i space = _mm_set1_epi8(ASCII_BLANK_SPACE);
136
- const __m128i tab = _mm_set1_epi8(ASCII_BLANK_TAB);
137
- const __m128i lf = _mm_set1_epi8(ASCII_BLANK_LF);
138
- const __m128i vt = _mm_set1_epi8(ASCII_BLANK_VT);
139
- const __m128i ff = _mm_set1_epi8(ASCII_BLANK_FF);
140
- const __m128i cr = _mm_set1_epi8(ASCII_BLANK_CR);
141
-
142
- size_t i = 0;
143
-
144
- for (; i + 15 < len; i += 16) {
145
- __m128i chunk = _mm_loadu_si128((const __m128i *)(ptr + i));
146
-
147
- __m128i non_ascii = _mm_and_si128(chunk, ascii_mask);
148
- if (_mm_movemask_epi8(non_ascii) != 0) {
149
- for (size_t j = 0; j < 16; j++) {
150
- if (ptr[i + j] >= 0x80) {
151
- *non_ascii_pos = ptr + i + j;
152
- return false;
153
- }
154
- }
155
- }
156
-
157
- __m128i is_space = _mm_cmpeq_epi8(chunk, space);
158
- __m128i is_tab = _mm_cmpeq_epi8(chunk, tab);
159
- __m128i is_lf = _mm_cmpeq_epi8(chunk, lf);
160
- __m128i is_vt = _mm_cmpeq_epi8(chunk, vt);
161
- __m128i is_ff = _mm_cmpeq_epi8(chunk, ff);
162
- __m128i is_cr = _mm_cmpeq_epi8(chunk, cr);
163
-
164
- __m128i is_blank = _mm_or_si128(is_space, is_tab);
165
- is_blank = _mm_or_si128(is_blank, is_lf);
166
- is_blank = _mm_or_si128(is_blank, is_vt);
167
- is_blank = _mm_or_si128(is_blank, is_ff);
168
- is_blank = _mm_or_si128(is_blank, is_cr);
169
-
170
- if (_mm_movemask_epi8(is_blank) != 0xFFFF) {
171
- for (size_t j = 0; j < 16; j++) {
172
- unsigned char c = ptr[i + j];
173
- if (c >= 0x80) {
174
- *non_ascii_pos = ptr + i + j;
175
- return false;
176
- }
177
- if (!is_ascii_blank_char(c)) {
178
- return false;
179
- }
180
- }
181
- }
182
- }
183
73
 
184
- for (; i < len; i++) {
74
+ static inline bool scan_ascii_blank_or_null(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
75
+ for (size_t i = 0; i < len; i++) {
185
76
  unsigned char c = ptr[i];
186
77
  if (c >= 0x80) {
187
78
  *non_ascii_pos = ptr + i;
188
79
  return false;
189
80
  }
190
- if (!is_ascii_blank_char(c)) {
81
+ if (!is_ascii_blank_or_null_char(c)) {
191
82
  return false;
192
83
  }
193
84
  }
194
-
195
85
  return true;
196
86
  }
197
- #endif
198
87
 
199
- #ifdef __ARM_NEON
200
- static bool check_blank_neon(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
201
- const uint8x16_t ascii_mask = vdupq_n_u8(0x80);
202
- const uint8x16_t space = vdupq_n_u8(ASCII_BLANK_SPACE);
203
- const uint8x16_t tab = vdupq_n_u8(ASCII_BLANK_TAB);
204
- const uint8x16_t lf = vdupq_n_u8(ASCII_BLANK_LF);
205
- const uint8x16_t vt = vdupq_n_u8(ASCII_BLANK_VT);
206
- const uint8x16_t ff = vdupq_n_u8(ASCII_BLANK_FF);
207
- const uint8x16_t cr = vdupq_n_u8(ASCII_BLANK_CR);
88
+ #ifdef __AVX2__
89
+ static bool check_blank_avx2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
90
+ const __m256i ws_base = _mm256_set1_epi8(ASCII_WS_RANGE_MIN);
91
+ const __m256i four = _mm256_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
92
+ const __m256i space = _mm256_set1_epi8(ASCII_WS_SPACE);
208
93
 
209
94
  size_t i = 0;
95
+ for (; i + 31 < len; i += 32) {
96
+ __m256i chunk = _mm256_loadu_si256((const __m256i*)(ptr + i));
97
+ __m256i adjusted = _mm256_sub_epi8(chunk, ws_base);
98
+ __m256i in_range = _mm256_cmpeq_epi8(_mm256_min_epu8(adjusted, four), adjusted);
99
+ __m256i is_sp = _mm256_cmpeq_epi8(chunk, space);
100
+ __m256i is_blank = _mm256_or_si256(in_range, is_sp);
101
+
102
+ int mask = _mm256_movemask_epi8(is_blank);
103
+ if (mask != -1) {
104
+ int first = __builtin_ctz(~mask);
105
+ unsigned char c = ptr[i + first];
106
+ if (c >= 0x80) {
107
+ *non_ascii_pos = ptr + i + first;
108
+ }
109
+ return false;
110
+ }
111
+ }
210
112
 
211
- for (; i + 15 < len; i += 16) {
212
- uint8x16_t chunk = vld1q_u8(ptr + i);
113
+ return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
114
+ }
213
115
 
214
- uint8x16_t non_ascii = vandq_u8(chunk, ascii_mask);
215
- uint8x16_t has_non_ascii = vceqq_u8(non_ascii, ascii_mask);
116
+ static bool check_ascii_blank_avx2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
117
+ const __m256i ws_base = _mm256_set1_epi8(ASCII_WS_RANGE_MIN);
118
+ const __m256i four = _mm256_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
119
+ const __m256i space = _mm256_set1_epi8(ASCII_WS_SPACE);
120
+ const __m256i zero = _mm256_setzero_si256();
216
121
 
217
- if (vmaxvq_u8(has_non_ascii) != 0) {
218
- for (size_t j = 0; j < 16; j++) {
219
- if (ptr[i + j] >= 0x80) {
220
- *non_ascii_pos = ptr + i + j;
221
- return false;
222
- }
122
+ size_t i = 0;
123
+ for (; i + 31 < len; i += 32) {
124
+ __m256i chunk = _mm256_loadu_si256((const __m256i*)(ptr + i));
125
+ __m256i adjusted = _mm256_sub_epi8(chunk, ws_base);
126
+ __m256i in_range = _mm256_cmpeq_epi8(_mm256_min_epu8(adjusted, four), adjusted);
127
+ __m256i is_sp = _mm256_cmpeq_epi8(chunk, space);
128
+ __m256i is_null = _mm256_cmpeq_epi8(chunk, zero);
129
+ __m256i is_blank = _mm256_or_si256(_mm256_or_si256(in_range, is_sp), is_null);
130
+
131
+ int mask = _mm256_movemask_epi8(is_blank);
132
+ if (mask != -1) {
133
+ int first = __builtin_ctz(~mask);
134
+ unsigned char c = ptr[i + first];
135
+ if (c >= 0x80) {
136
+ *non_ascii_pos = ptr + i + first;
223
137
  }
138
+ return false;
224
139
  }
140
+ }
225
141
 
226
- uint8x16_t is_space = vceqq_u8(chunk, space);
227
- uint8x16_t is_tab = vceqq_u8(chunk, tab);
228
- uint8x16_t is_lf = vceqq_u8(chunk, lf);
229
- uint8x16_t is_vt = vceqq_u8(chunk, vt);
230
- uint8x16_t is_ff = vceqq_u8(chunk, ff);
231
- uint8x16_t is_cr = vceqq_u8(chunk, cr);
142
+ return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
143
+ }
144
+ #endif
232
145
 
233
- uint8x16_t is_blank = vorrq_u8(is_space, is_tab);
234
- is_blank = vorrq_u8(is_blank, is_lf);
235
- is_blank = vorrq_u8(is_blank, is_vt);
236
- is_blank = vorrq_u8(is_blank, is_ff);
237
- is_blank = vorrq_u8(is_blank, is_cr);
146
+ #ifdef __SSE2__
147
+ static bool check_blank_sse2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
148
+ const __m128i ws_base = _mm_set1_epi8(ASCII_WS_RANGE_MIN);
149
+ const __m128i four = _mm_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
150
+ const __m128i space = _mm_set1_epi8(ASCII_WS_SPACE);
238
151
 
239
- if (vminvq_u8(is_blank) == 0) {
240
- for (size_t j = 0; j < 16; j++) {
241
- unsigned char c = ptr[i + j];
242
- if (c >= 0x80) {
243
- *non_ascii_pos = ptr + i + j;
244
- return false;
245
- }
246
- if (!is_ascii_blank_char(c)) {
247
- return false;
248
- }
152
+ size_t i = 0;
153
+ for (; i + 15 < len; i += 16) {
154
+ __m128i chunk = _mm_loadu_si128((const __m128i*)(ptr + i));
155
+ __m128i adjusted = _mm_sub_epi8(chunk, ws_base);
156
+ __m128i in_range = _mm_cmpeq_epi8(_mm_min_epu8(adjusted, four), adjusted);
157
+ __m128i is_sp = _mm_cmpeq_epi8(chunk, space);
158
+ __m128i is_blank = _mm_or_si128(in_range, is_sp);
159
+
160
+ int mask = _mm_movemask_epi8(is_blank);
161
+ if (mask != 0xFFFF) {
162
+ int first = __builtin_ctz(~mask & 0xFFFF);
163
+ unsigned char c = ptr[i + first];
164
+ if (c >= 0x80) {
165
+ *non_ascii_pos = ptr + i + first;
249
166
  }
167
+ return false;
250
168
  }
251
169
  }
252
170
 
253
- for (; i < len; i++) {
254
- unsigned char c = ptr[i];
255
- if (c >= 0x80) {
256
- *non_ascii_pos = ptr + i;
257
- return false;
258
- }
259
- if (!is_ascii_blank_char(c)) {
171
+ return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
172
+ }
173
+
174
+ static bool check_ascii_blank_sse2(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
175
+ const __m128i ws_base = _mm_set1_epi8(ASCII_WS_RANGE_MIN);
176
+ const __m128i four = _mm_set1_epi8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
177
+ const __m128i space = _mm_set1_epi8(ASCII_WS_SPACE);
178
+ const __m128i zero = _mm_setzero_si128();
179
+
180
+ size_t i = 0;
181
+ for (; i + 15 < len; i += 16) {
182
+ __m128i chunk = _mm_loadu_si128((const __m128i*)(ptr + i));
183
+ __m128i adjusted = _mm_sub_epi8(chunk, ws_base);
184
+ __m128i in_range = _mm_cmpeq_epi8(_mm_min_epu8(adjusted, four), adjusted);
185
+ __m128i is_sp = _mm_cmpeq_epi8(chunk, space);
186
+ __m128i is_null = _mm_cmpeq_epi8(chunk, zero);
187
+ __m128i is_blank = _mm_or_si128(_mm_or_si128(in_range, is_sp), is_null);
188
+
189
+ int mask = _mm_movemask_epi8(is_blank);
190
+ if (mask != 0xFFFF) {
191
+ int first = __builtin_ctz(~mask & 0xFFFF);
192
+ unsigned char c = ptr[i + first];
193
+ if (c >= 0x80) {
194
+ *non_ascii_pos = ptr + i + first;
195
+ }
260
196
  return false;
261
197
  }
262
198
  }
263
199
 
264
- return true;
200
+ return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
265
201
  }
266
202
  #endif
267
203
 
268
- static bool check_blank_scalar(const unsigned char *ptr, size_t len, const unsigned char **non_ascii_pos) {
269
- for (size_t i = 0; i < len; i++) {
270
- unsigned char c = ptr[i];
204
+ #if defined(__ARM_NEON) && defined(__aarch64__)
205
+ static bool check_blank_neon(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
206
+ const uint8x16_t ws_base = vdupq_n_u8(ASCII_WS_RANGE_MIN);
207
+ const uint8x16_t four = vdupq_n_u8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
208
+ const uint8x16_t space = vdupq_n_u8(ASCII_WS_SPACE);
271
209
 
272
- if (c >= 0x80) {
273
- *non_ascii_pos = ptr + i;
274
- return false;
210
+ size_t i = 0;
211
+ for (; i + 15 < len; i += 16) {
212
+ uint8x16_t chunk = vld1q_u8(ptr + i);
213
+ uint8x16_t adjusted = vsubq_u8(chunk, ws_base);
214
+ uint8x16_t in_range = vceqq_u8(vminq_u8(adjusted, four), adjusted);
215
+ uint8x16_t is_sp = vceqq_u8(chunk, space);
216
+ uint8x16_t is_blank = vorrq_u8(in_range, is_sp);
217
+
218
+ if (vminvq_u8(is_blank) == 0) {
219
+ if (!scan_ascii_blank(ptr + i, 16, non_ascii_pos)) return false;
275
220
  }
221
+ }
276
222
 
277
- if (!is_ascii_blank_char(c)) {
278
- return false;
223
+ return scan_ascii_blank(ptr + i, len - i, non_ascii_pos);
224
+ }
225
+
226
+ static bool check_ascii_blank_neon(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
227
+ const uint8x16_t ws_base = vdupq_n_u8(ASCII_WS_RANGE_MIN);
228
+ const uint8x16_t four = vdupq_n_u8(ASCII_WS_RANGE_MAX - ASCII_WS_RANGE_MIN);
229
+ const uint8x16_t space = vdupq_n_u8(ASCII_WS_SPACE);
230
+ const uint8x16_t zero = vdupq_n_u8(0);
231
+
232
+ size_t i = 0;
233
+ for (; i + 15 < len; i += 16) {
234
+ uint8x16_t chunk = vld1q_u8(ptr + i);
235
+ uint8x16_t adjusted = vsubq_u8(chunk, ws_base);
236
+ uint8x16_t in_range = vceqq_u8(vminq_u8(adjusted, four), adjusted);
237
+ uint8x16_t is_sp = vceqq_u8(chunk, space);
238
+ uint8x16_t is_null = vceqq_u8(chunk, zero);
239
+ uint8x16_t is_blank = vorrq_u8(vorrq_u8(in_range, is_sp), is_null);
240
+
241
+ if (vminvq_u8(is_blank) == 0) {
242
+ if (!scan_ascii_blank_or_null(ptr + i, 16, non_ascii_pos)) return false;
279
243
  }
280
244
  }
281
245
 
282
- return true;
246
+ return scan_ascii_blank_or_null(ptr + i, len - i, non_ascii_pos);
247
+ }
248
+ #endif
249
+
250
+ #if !defined(__AVX2__) && !defined(__SSE2__) && !(defined(__ARM_NEON) && defined(__aarch64__))
251
+ static bool check_blank_scalar(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
252
+ return scan_ascii_blank(ptr, len, non_ascii_pos);
283
253
  }
284
254
 
255
+ static bool check_ascii_blank_scalar(const unsigned char* ptr, size_t len, const unsigned char** non_ascii_pos) {
256
+ return scan_ascii_blank_or_null(ptr, len, non_ascii_pos);
257
+ }
258
+ #endif
259
+
285
260
  static VALUE rb_str_blank(VALUE str) {
286
261
  long len = RSTRING_LEN(str);
287
- if (len == 0) {
288
- return Qtrue;
289
- }
262
+ if (len == 0) return Qtrue;
290
263
 
291
- const unsigned char *ptr = (const unsigned char *)RSTRING_PTR(str);
292
- const unsigned char *end = ptr + len;
293
- rb_encoding *enc = STR_ENC_GET(str);
264
+ const unsigned char* ptr = (const unsigned char*)RSTRING_PTR(str);
265
+ const unsigned char* end = ptr + len;
266
+ rb_encoding* enc = STR_ENC_GET(str);
294
267
 
295
268
  if (rb_enc_asciicompat(enc)) {
296
- const unsigned char *non_ascii_pos = NULL;
269
+ const unsigned char* non_ascii_pos = NULL;
297
270
  bool is_blank = false;
298
271
 
299
272
  #ifdef __AVX2__
300
- is_blank = check_blank_avx2(ptr, len, &non_ascii_pos);
273
+ is_blank = check_blank_avx2(ptr, (size_t)len, &non_ascii_pos);
301
274
  #elif defined(__SSE2__)
302
- is_blank = check_blank_sse2(ptr, len, &non_ascii_pos);
303
- #elif defined(__ARM_NEON)
304
- is_blank = check_blank_neon(ptr, len, &non_ascii_pos);
275
+ is_blank = check_blank_sse2(ptr, (size_t)len, &non_ascii_pos);
276
+ #elif defined(__ARM_NEON) && defined(__aarch64__)
277
+ is_blank = check_blank_neon(ptr, (size_t)len, &non_ascii_pos);
305
278
  #else
306
- is_blank = check_blank_scalar(ptr, len, &non_ascii_pos);
279
+ is_blank = check_blank_scalar(ptr, (size_t)len, &non_ascii_pos);
307
280
  #endif
308
281
 
309
- if (is_blank) {
310
- return Qtrue;
311
- }
282
+ if (is_blank) return Qtrue;
283
+ if (non_ascii_pos == NULL) return Qfalse;
312
284
 
313
- if (non_ascii_pos == NULL) {
314
- return Qfalse;
315
- }
316
-
317
- ptr = (const unsigned char *)non_ascii_pos;
285
+ ptr = non_ascii_pos;
318
286
  }
319
287
 
320
- while ((const char *)ptr < (const char *)end) {
288
+ while (ptr < end) {
321
289
  int clen;
322
- unsigned int codepoint = rb_enc_codepoint_len((const char *)ptr, (const char *)end, &clen, enc);
323
-
324
- if (!is_unicode_blank(codepoint)) {
325
- return Qfalse;
326
- }
327
-
290
+ unsigned int codepoint = rb_enc_codepoint_len((const char*)ptr, (const char*)end, &clen, enc);
291
+ if (!is_unicode_blank(codepoint)) return Qfalse;
328
292
  ptr += clen;
329
293
  }
330
294
 
@@ -333,39 +297,36 @@ static VALUE rb_str_blank(VALUE str) {
333
297
 
334
298
  static VALUE rb_str_ascii_blank(VALUE str) {
335
299
  long len = RSTRING_LEN(str);
336
- if (len == 0) {
337
- return Qtrue;
338
- }
300
+ if (len == 0) return Qtrue;
339
301
 
340
- const char *ptr = RSTRING_PTR(str);
341
- const char *end = ptr + len;
342
- rb_encoding *enc = STR_ENC_GET(str);
302
+ const unsigned char* ptr = (const unsigned char*)RSTRING_PTR(str);
303
+ const unsigned char* end = ptr + len;
304
+ rb_encoding* enc = STR_ENC_GET(str);
343
305
 
344
306
  if (rb_enc_asciicompat(enc)) {
345
- for (; ptr < end; ptr++) {
346
- unsigned char c = (unsigned char)*ptr;
307
+ const unsigned char* non_ascii_pos = NULL;
308
+ bool is_blank = false;
347
309
 
348
- if (c >= 0x80) {
349
- goto FULL_CHECK;
350
- }
310
+ #ifdef __AVX2__
311
+ is_blank = check_ascii_blank_avx2(ptr, (size_t)len, &non_ascii_pos);
312
+ #elif defined(__SSE2__)
313
+ is_blank = check_ascii_blank_sse2(ptr, (size_t)len, &non_ascii_pos);
314
+ #elif defined(__ARM_NEON) && defined(__aarch64__)
315
+ is_blank = check_ascii_blank_neon(ptr, (size_t)len, &non_ascii_pos);
316
+ #else
317
+ is_blank = check_ascii_blank_scalar(ptr, (size_t)len, &non_ascii_pos);
318
+ #endif
351
319
 
352
- if (!rb_isspace(c) && c != 0) {
353
- return Qfalse;
354
- }
355
- }
320
+ if (is_blank) return Qtrue;
321
+ if (non_ascii_pos == NULL) return Qfalse;
356
322
 
357
- return Qtrue;
323
+ ptr = non_ascii_pos;
358
324
  }
359
325
 
360
- FULL_CHECK:;
361
326
  while (ptr < end) {
362
327
  int clen;
363
- unsigned int codepoint = rb_enc_codepoint_len(ptr, end, &clen, enc);
364
-
365
- if (codepoint != 0 && !rb_isspace(codepoint)) {
366
- return Qfalse;
367
- }
368
-
328
+ unsigned int codepoint = rb_enc_codepoint_len((const char*)ptr, (const char*)end, &clen, enc);
329
+ if (codepoint != 0 && !rb_isspace(codepoint)) return Qfalse;
369
330
  ptr += clen;
370
331
  }
371
332
 
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: sin_fast_blank
3
3
  version: !ruby/object:Gem::Version
4
- version: 4.0.0
4
+ version: 4.0.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Masahiro
@@ -19,15 +19,15 @@ extra_rdoc_files: []
19
19
  files:
20
20
  - ext/sin_fast_blank/extconf.rb
21
21
  - ext/sin_fast_blank/sin_fast_blank.c
22
- homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
22
+ homepage: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
23
23
  licenses:
24
24
  - MIT
25
25
  metadata:
26
- homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
27
- source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.0
28
- changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v4.0.0/CHANGELOG.md
26
+ homepage_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
27
+ source_code_uri: https://github.com/cadenza-tech/sin_fast_blank/tree/v4.0.1
28
+ changelog_uri: https://github.com/cadenza-tech/sin_fast_blank/blob/v4.0.1/CHANGELOG.md
29
29
  bug_tracker_uri: https://github.com/cadenza-tech/sin_fast_blank/issues
30
- documentation_uri: https://rubydoc.info/gems/sin_fast_blank/4.0.0
30
+ documentation_uri: https://rubydoc.info/gems/sin_fast_blank/4.0.1
31
31
  funding_uri: https://patreon.com/CadenzaTech
32
32
  rubygems_mfa_required: 'true'
33
33
  required_jruby_version: ">= 9.3.0.0"