string_bits 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1802 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+
4
+ #include <limits.h> /* CHAR_BIT */
5
+ #include <stdint.h> /* uint64_t, UINT64_MAX */
6
+ #include <string.h> /* memcpy */
7
+ #include <sys/types.h> /* ssize_t (Ruby typedefs it on Windows) */
8
+
9
+ /* popcount ----------------------------------------------------------------- */
10
+ /*
11
+ * Porting to Ruby Core:
12
+ * 1. Remove sb_popcount64 and the #include block below.
13
+ * Use rb_popcount64 from internal/bits.h instead.
14
+ * 2. Add #include "internal/bits.h" at the top of string.c (or wherever
15
+ * String#popcount is implemented).
16
+ * 3. Replace all sb_popcount64 calls with rb_popcount64.
17
+ * 4. Move rb_str_popcount into string.c and register it in Init_String().
18
+ */
19
+
20
+ #if defined(HAVE_X86INTRIN_H)
21
+ # include <x86intrin.h>
22
+ #elif defined(_MSC_VER)
23
+ # include <intrin.h>
24
+ #endif
25
+
26
+ /* __has_builtin polyfill: GCC < 10 and other compilers do not define it.
27
+ * Treating it as always-false there causes the fallback paths to be used. */
28
+ #ifndef __has_builtin
29
+ # define __has_builtin(x) 0
30
+ #endif
31
+
32
+ /* Endianness detection.
33
+ * Prefer Ruby's autoconf-derived WORDS_BIGENDIAN (always available in Ruby
34
+ * extension builds). Falls back to __BYTE_ORDER__ (GCC/Clang) and known LE
35
+ * targets for MSVC. SB_LITTLE_ENDIAN evaluates to 1 only when we can prove
36
+ * the platform is LE; otherwise the portable byte-by-byte path is used. */
37
+ #if defined(WORDS_BIGENDIAN)
38
+ # define SB_LITTLE_ENDIAN 0
39
+ #elif defined(__BYTE_ORDER__) && __BYTE_ORDER__ == __ORDER_LITTLE_ENDIAN__
40
+ # define SB_LITTLE_ENDIAN 1
41
+ #elif defined(_MSC_VER)
42
+ # define SB_LITTLE_ENDIAN 1 /* MSVC targets (x86/x64/ARM64) are all LE */
43
+ #else
44
+ # define SB_LITTLE_ENDIAN 0
45
+ #endif
46
+
47
+ static inline unsigned int
48
+ sb_popcount64(uint64_t x)
49
+ {
50
+ #if defined(_MSC_VER) && defined(__AVX__)
51
+ return (unsigned int)__popcnt64(x);
52
+ #elif __has_builtin(__builtin_popcount)
53
+ if (sizeof(long) * CHAR_BIT == 64) {
54
+ return (unsigned int)__builtin_popcountl((unsigned long)x);
55
+ }
56
+ else {
57
+ return (unsigned int)__builtin_popcountll((unsigned long long)x);
58
+ }
59
+ #else
60
+ x = (x & 0x5555555555555555) + (x >> 1 & 0x5555555555555555);
61
+ x = (x & 0x3333333333333333) + (x >> 2 & 0x3333333333333333);
62
+ x = (x & 0x0707070707070707) + (x >> 4 & 0x0707070707070707);
63
+ x = (x & 0x000f000f000f000f) + (x >> 8 & 0x000f000f000f000f);
64
+ x = (x & 0x0000001f0000001f) + (x >>16 & 0x0000001f0000001f);
65
+ x = (x & 0x000000000000003f) + (x >>32 & 0x000000000000003f);
66
+ return (unsigned int)x;
67
+ #endif
68
+ }
69
+
70
+ /* ctz / clz helpers for set-bit iteration ---------------------------------- */
71
+
72
+ static ID id_bracket;
73
+ static VALUE sym_lsb_first, sym_lsb, sym_msb, sym_invert;
74
+
75
+ enum sb_kw_flag {
76
+ SB_KW_INVERT = 1 << 0,
77
+ SB_KW_LSB_FIRST = 1 << 1
78
+ };
79
+
80
+ static inline int
81
+ sb_ctz8(unsigned int x)
82
+ {
83
+ /* position of lowest set bit; x must be non-zero */
84
+ #if __has_builtin(__builtin_ctz)
85
+ return __builtin_ctz(x);
86
+ #elif defined(_MSC_VER)
87
+ unsigned long index;
88
+ _BitScanForward(&index, x);
89
+ return (int)index;
90
+ #else
91
+ int n = 0;
92
+ if (!(x & 0x0F)) { n += 4; x >>= 4; }
93
+ if (!(x & 0x03)) { n += 2; x >>= 2; }
94
+ if (!(x & 0x01)) n += 1;
95
+ return n;
96
+ #endif
97
+ }
98
+
99
+ static inline int
100
+ sb_highest_bit8(unsigned int x)
101
+ {
102
+ /* position of highest set bit; x must be non-zero */
103
+ #if __has_builtin(__builtin_clz)
104
+ return 31 - __builtin_clz(x);
105
+ #elif defined(_MSC_VER)
106
+ unsigned long index;
107
+ _BitScanReverse(&index, x);
108
+ return (int)index;
109
+ #else
110
+ int n = 0;
111
+ if (x >= 16) { n += 4; x >>= 4; }
112
+ if (x >= 4) { n += 2; x >>= 2; }
113
+ if (x >= 2) n += 1;
114
+ return n;
115
+ #endif
116
+ }
117
+
118
+ static inline int
119
+ sb_ctzll(uint64_t x)
120
+ {
121
+ /* position of lowest set bit in a non-zero 64-bit word */
122
+ #if __has_builtin(__builtin_ctzll)
123
+ return __builtin_ctzll(x);
124
+ #elif defined(_MSC_VER)
125
+ unsigned long index;
126
+ _BitScanForward64(&index, x);
127
+ return (int)index;
128
+ #else
129
+ int n = 0;
130
+ if (!(x & 0x00000000FFFFFFFFull)) { n += 32; x >>= 32; }
131
+ if (!(x & 0x0000FFFFull)) { n += 16; x >>= 16; }
132
+ if (!(x & 0x00FFull)) { n += 8; x >>= 8; }
133
+ if (!(x & 0x0Full)) { n += 4; x >>= 4; }
134
+ if (!(x & 0x03ull)) { n += 2; x >>= 2; }
135
+ if (!(x & 0x01ull)) n += 1;
136
+ return n;
137
+ #endif
138
+ }
139
+
140
+ /* common functions --------------------------------------------------------- */
141
+
142
+ /*
143
+ * rb_str_get_bit: read one bit from a raw byte sequence.
144
+ *
145
+ * Porting to Ruby Core:
146
+ * 1. Move this declaration to include/ruby/internal/string.h (or
147
+ * internal/string.h for a non-public internal API).
148
+ * 2. Remove the `static inline` storage class; declare as:
149
+ * static inline int rb_str_get_bit(const char *ptr, ssize_t bit_index, int lsb_first);
150
+ * in the header so both string.c and array.c can include it.
151
+ * 3. Replace all call sites in string.c and array.c accordingly.
152
+ *
153
+ * Parameters:
154
+ * ptr - pointer to the first byte of the bitmap
155
+ * bit_index - flat zero-based position; byte = bit_index/8 from ptr
156
+ * lsb_first - non-zero: bit 0 of each byte is the LSB (Arrow/hardware convention)
157
+ * zero: bit 0 of each byte is the MSB (byte order preserved)
158
+ *
159
+ * Returns 0 or 1.
160
+ */
161
+ static inline int
162
+ rb_str_get_bit(const char *ptr, ssize_t bit_index, int lsb_first)
163
+ {
164
+ ssize_t byte_index = bit_index / 8;
165
+ int bit_offset = lsb_first ? (bit_index % 8) : (7 - bit_index % 8);
166
+ return (ptr[byte_index] >> bit_offset) & 1;
167
+ }
168
+
169
+ static inline int
170
+ test_bit(const char *ptr, ssize_t bit_index)
171
+ {
172
+ return rb_str_get_bit(ptr, bit_index, 1);
173
+ }
174
+
175
+ /* Convert a Ruby Integer to a ssize_t bit index.
176
+ *
177
+ * Raises ArgumentError for Bignums on all platforms: a Bignum cannot be a
178
+ * valid bit index for any real string, and raising explicitly is clearer than
179
+ * silently mapping to a sentinel value that later triggers a different error.
180
+ * NUM2SSIZET is width-aware (uses FIX2LL on LLP64, FIX2LONG on LP64) so the
181
+ * FIXNUM extraction does not truncate large FIXNUMs on Windows.
182
+ *
183
+ * RBIGNUM_NEGATIVE_P is available via ruby.h -> ruby/internal/core/rbignum.h. */
184
+ static ssize_t
185
+ integer_to_bit_idx(VALUE n)
186
+ {
187
+ if (FIXNUM_P(n)) return NUM2SSIZET(n);
188
+ RUBY_ASSERT(RB_TYPE_P(n, T_BIGNUM));
189
+ rb_raise(rb_eArgError, "bit index out of representable range");
190
+ UNREACHABLE_RETURN(0);
191
+ }
192
+
193
+ static ssize_t
194
+ check_bit_index(VALUE self, VALUE n, int lsb_first)
195
+ {
196
+ if (!rb_integer_type_p(n)) {
197
+ rb_raise(rb_eTypeError, "bit index must be an integer");
198
+ }
199
+ ssize_t idx = integer_to_bit_idx(n);
200
+ ssize_t size = RSTRING_LEN(self) * 8;
201
+ if (idx < 0 || idx >= size) {
202
+ rb_raise(rb_eIndexError, "bit index out of range");
203
+ }
204
+ if (!lsb_first) idx = (idx & ~7L) | (7 - (idx & 7L));
205
+ return idx;
206
+ }
207
+
208
+ static inline ssize_t
209
+ physical_to_count_from(ssize_t physical, int lsb_first)
210
+ {
211
+ return lsb_first ? physical : ((physical & ~7L) | (7 - (physical & 7L)));
212
+ }
213
+
214
+ static inline ssize_t
215
+ logical_to_physical(ssize_t logical, int lsb_first)
216
+ {
217
+ return lsb_first ? logical : ((logical & ~7L) | (7 - (logical & 7L)));
218
+ }
219
+
220
+ static inline int
221
+ logical_get_bit(const unsigned char *ptr, ssize_t logical_index, int lsb_first)
222
+ {
223
+ return test_bit((const char *)ptr, logical_to_physical(logical_index, lsb_first));
224
+ }
225
+
226
+ static inline void
227
+ physical_write_bit(unsigned char *ptr, ssize_t bit_index, int bit)
228
+ {
229
+ unsigned char mask = (unsigned char)(1u << (bit_index & 7));
230
+ if (bit) ptr[bit_index >> 3] |= mask;
231
+ else ptr[bit_index >> 3] &= (unsigned char)~mask;
232
+ }
233
+
234
+ static inline void
235
+ logical_write_bit(unsigned char *ptr, ssize_t logical_index, int lsb_first, int bit)
236
+ {
237
+ physical_write_bit(ptr, logical_to_physical(logical_index, lsb_first), bit);
238
+ }
239
+
240
+ /* ssize_t-interface wrapper around rb_range_beg_len.
241
+ *
242
+ * rb_range_beg_len() takes (long *begp, long *lenp, long len), but this
243
+ * extension uses ssize_t throughout for LP64/LLP64 uniformity. On Windows
244
+ * (LLP64) long is 32-bit while ssize_t is 64-bit, so passing &ssize_t to the
245
+ * stock API is a type error and the build fails. This wrapper bridges the
246
+ * two and clamps the input length to LONG_MAX on platforms where ssize_t
247
+ * is wider than long, which has no practical effect: a 2 GiB string is
248
+ * already past what any realistic caller will hand us.
249
+ *
250
+ * It also catches the RangeError that rb_range_beg_len raises when a Range
251
+ * endpoint is a Bignum too large for `long`, and re-raises it as IndexError
252
+ * so that out-of-range bit positions report uniformly across LP64 and LLP64.
253
+ */
254
+ struct sb_range_args {
255
+ VALUE range;
256
+ long *lbegp;
257
+ long *llenp;
258
+ long len;
259
+ int err;
260
+ };
261
+
262
+ static VALUE
263
+ sb_range_beg_len_call(VALUE arg)
264
+ {
265
+ struct sb_range_args *a = (struct sb_range_args *)arg;
266
+ return rb_range_beg_len(a->range, a->lbegp, a->llenp, a->len, a->err);
267
+ }
268
+
269
+ static inline VALUE
270
+ sb_range_beg_len(VALUE range, ssize_t *begp, ssize_t *lenp, ssize_t len, int err)
271
+ {
272
+ long lbeg = 0, llen = 0;
273
+ long clipped = (len > (ssize_t)LONG_MAX) ? LONG_MAX : (long)len;
274
+ struct sb_range_args args = { range, &lbeg, &llen, clipped, err };
275
+ int state = 0;
276
+ VALUE result = rb_protect(sb_range_beg_len_call, (VALUE)&args, &state);
277
+ if (state) {
278
+ VALUE exc = rb_errinfo();
279
+ rb_set_errinfo(Qnil);
280
+ if (rb_obj_is_kind_of(exc, rb_eRangeError)) {
281
+ rb_raise(rb_eIndexError, "bit range out of range");
282
+ }
283
+ rb_exc_raise(exc);
284
+ }
285
+ if (begp) *begp = (ssize_t)lbeg;
286
+ if (lenp) *lenp = (ssize_t)llen;
287
+ return result;
288
+ }
289
+
290
+ static void
291
+ validate_option_hash(VALUE opts, unsigned allowed)
292
+ {
293
+ if (NIL_P(opts)) return;
294
+ Check_Type(opts, T_HASH);
295
+
296
+ VALUE keys = rb_funcall(opts, rb_intern("keys"), 0);
297
+ ssize_t len = RARRAY_LEN(keys);
298
+
299
+ for (ssize_t i = 0; i < len; i++) {
300
+ VALUE key = RARRAY_AREF(keys, i);
301
+ if (((allowed & SB_KW_LSB_FIRST) && key == sym_lsb_first) ||
302
+ ((allowed & SB_KW_INVERT) && key == sym_invert)) {
303
+ continue;
304
+ }
305
+
306
+ rb_raise(rb_eArgError, "unknown keyword: %"PRIsVALUE, rb_inspect(key));
307
+ }
308
+ }
309
+
310
+ static int
311
+ parse_bool_opt(VALUE opts, VALUE key, const char *name, int default_value)
312
+ {
313
+ if (NIL_P(opts)) return default_value;
314
+ VALUE value = rb_hash_aref(opts, key);
315
+ if (NIL_P(value)) return default_value;
316
+ if (value == Qtrue) return 1;
317
+ if (value == Qfalse) return 0;
318
+ rb_raise(rb_eArgError, "%s must be true or false", name);
319
+ return default_value;
320
+ }
321
+
322
+ static int
323
+ parse_lsb_first_opt(VALUE opts)
324
+ {
325
+ return parse_bool_opt(opts, sym_lsb_first, "lsb_first", 1);
326
+ }
327
+
328
+ static int
329
+ parse_lsb_first(int argc, VALUE *argv)
330
+ {
331
+ VALUE opts = Qnil;
332
+ rb_scan_args(argc, argv, "0:", &opts);
333
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
334
+ return parse_lsb_first_opt(opts);
335
+ }
336
+
337
+ /* read -------------------------------------------------------------------- */
338
+
339
+ /* String#bit_at(n, lsb_first: true) -> true or false
340
+ *
341
+ * bit_at uses flat/Arrow convention: byte_index = n/8 from start, bit = n%8 from LSB
342
+ * e.g. "\xAA\xCC": bit 0..7 live in byte[0]=0xAA, bit 8..15 live in byte[1]=0xCC
343
+ *
344
+ * str = "\xFF\xAA" # 11111111 10101010
345
+ * str.bit_at(0) # => true (1st bit is set)
346
+ * str.bit_at(7) # => true (8th bit is set)
347
+ * str.bit_at(8) # => false (9th bit is clear)
348
+ * str.bit_at(9) # => true (10th bit is set)
349
+ * str.bit_at(16) # => nil
350
+ */
351
+ static VALUE
352
+ rb_str_bit_at(int argc, VALUE *argv, VALUE self)
353
+ {
354
+ VALUE n, opts;
355
+ rb_scan_args(argc, argv, "1:", &n, &opts);
356
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
357
+
358
+ if (!rb_integer_type_p(n)) {
359
+ rb_raise(rb_eTypeError, "bit index must be an integer");
360
+ }
361
+ ssize_t idx = integer_to_bit_idx(n);
362
+ if (idx < 0) {
363
+ rb_raise(rb_eArgError, "bit index must be non-negative");
364
+ }
365
+ ssize_t size = RSTRING_LEN(self) * 8;
366
+ if (size <= idx) {
367
+ return Qnil;
368
+ }
369
+
370
+ int lsb_first = parse_lsb_first_opt(opts);
371
+
372
+ if (!lsb_first) {
373
+ idx = (idx & ~7L) | (7 - (idx & 7L));
374
+ }
375
+
376
+ if (test_bit(RSTRING_PTR(self), idx)) {
377
+ return Qtrue;
378
+ } else {
379
+ return Qfalse;
380
+ }
381
+ }
382
+
383
+ static VALUE
384
+ rb_str_bit_count(VALUE self)
385
+ {
386
+ ssize_t count = 0;
387
+ ssize_t len = RSTRING_LEN(self);
388
+ const char *str = RSTRING_PTR(self);
389
+ ssize_t off = 0;
390
+ ssize_t unrolled_end = len & ~31L;
391
+ ssize_t aligned_end = len & ~7L;
392
+
393
+ /* Use memcpy to avoid unaligned loads (SIGBUS on SPARC, MIPS, etc.)
394
+ * and strict-aliasing violations. Modern compilers fold 8-byte memcpy
395
+ * into a single load on platforms that allow unaligned access. */
396
+ for (; off < unrolled_end; off += 32) {
397
+ uint64_t w0, w1, w2, w3;
398
+ memcpy(&w0, str + off, 8);
399
+ memcpy(&w1, str + off + 8, 8);
400
+ memcpy(&w2, str + off + 16, 8);
401
+ memcpy(&w3, str + off + 24, 8);
402
+ count += sb_popcount64(w0);
403
+ count += sb_popcount64(w1);
404
+ count += sb_popcount64(w2);
405
+ count += sb_popcount64(w3);
406
+ }
407
+
408
+ for (; off < aligned_end; off += 8) {
409
+ uint64_t w;
410
+ memcpy(&w, str + off, 8);
411
+ count += sb_popcount64(w);
412
+ }
413
+
414
+ ssize_t remainder = len - aligned_end;
415
+ if (remainder > 0) {
416
+ uint64_t last = 0;
417
+ const unsigned char *tail = (const unsigned char *)(str + aligned_end);
418
+ for (ssize_t i = 0; i < remainder; i++) {
419
+ last |= (uint64_t)tail[i] << (i * 8);
420
+ }
421
+ count += sb_popcount64(last);
422
+ }
423
+
424
+ return SSIZET2NUM(count);
425
+ }
426
+
427
+ /* iterate bits ------------------------------------------------------------ */
428
+
429
+ static VALUE
430
+ rb_str_each_bit(int argc, VALUE *argv, VALUE self)
431
+ {
432
+ RETURN_ENUMERATOR(self, argc, argv);
433
+
434
+ int lsb_first = parse_lsb_first(argc, argv);
435
+ ssize_t len = RSTRING_LEN(self);
436
+ const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
437
+
438
+ for (ssize_t i = 0; i < len; i++) {
439
+ unsigned char b = str[i];
440
+ if (lsb_first) {
441
+ for (int j = 0; j < 8; j++) {
442
+ rb_yield((b >> j) & 1 ? Qtrue : Qfalse);
443
+ }
444
+ } else {
445
+ for (int j = 7; j >= 0; j--) {
446
+ rb_yield((b >> j) & 1 ? Qtrue : Qfalse);
447
+ }
448
+ }
449
+ }
450
+
451
+ return self;
452
+ }
453
+
454
+ static VALUE
455
+ rb_str_bits(int argc, VALUE *argv, VALUE self)
456
+ {
457
+ int lsb_first = parse_lsb_first(argc, argv);
458
+ ssize_t len = RSTRING_LEN(self);
459
+ const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
460
+ ssize_t total_bits = len * 8;
461
+ int have_block = rb_block_given_p();
462
+
463
+ VALUE ary = have_block ? Qnil : rb_ary_new_capa(total_bits);
464
+
465
+ for (ssize_t i = 0; i < len; i++) {
466
+ unsigned char b = str[i];
467
+ if (lsb_first) {
468
+ for (int j = 0; j < 8; j++) {
469
+ VALUE bit = (b >> j) & 1 ? Qtrue : Qfalse;
470
+ have_block ? rb_yield(bit) : rb_ary_push(ary, bit);
471
+ }
472
+ } else {
473
+ for (int j = 7; j >= 0; j--) {
474
+ VALUE bit = (b >> j) & 1 ? Qtrue : Qfalse;
475
+ have_block ? rb_yield(bit) : rb_ary_push(ary, bit);
476
+ }
477
+ }
478
+ }
479
+
480
+ return have_block ? self : ary;
481
+ }
482
+
483
+ /* iterate set-bit positions ----------------------------------------------- */
484
+
485
+ static VALUE
486
+ rb_str_each_set_bit_offset(int argc, VALUE *argv, VALUE self)
487
+ {
488
+ RETURN_ENUMERATOR(self, argc, argv);
489
+
490
+ int lsb_first = parse_lsb_first(argc, argv);
491
+ ssize_t len = RSTRING_LEN(self);
492
+ const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
493
+ if (lsb_first) {
494
+ /* LSB-first: ascending positions 0, 1, 2, ...
495
+ * On little-endian, loading 8 bytes as a uint64_t preserves the flat
496
+ * LSB-first bit numbering: word bit 0 == position 0, bit 63 == 63.
497
+ * memcpy avoids unaligned-load SIGBUS on strict-alignment platforms. */
498
+ #if SB_LITTLE_ENDIAN
499
+ ssize_t n_words = len >> 3;
500
+ for (ssize_t wi = 0; wi < n_words; wi++) {
501
+ uint64_t w;
502
+ memcpy(&w, str + wi * 8, 8);
503
+ while (w != 0) {
504
+ int bit = sb_ctzll(w);
505
+ rb_yield(SSIZET2NUM(wi * 64 + bit));
506
+ w &= w - 1;
507
+ }
508
+ }
509
+ for (ssize_t bi = n_words << 3; bi < len; bi++) {
510
+ unsigned int b = str[bi];
511
+ while (b != 0) {
512
+ int bit = sb_ctz8(b);
513
+ rb_yield(SSIZET2NUM(bi * 8 + bit));
514
+ b &= b - 1;
515
+ }
516
+ }
517
+ #else
518
+ for (ssize_t bi = 0; bi < len; bi++) {
519
+ unsigned int b = str[bi];
520
+ while (b != 0) {
521
+ int bit = sb_ctz8(b);
522
+ rb_yield(SSIZET2NUM(bi * 8 + bit));
523
+ b &= b - 1;
524
+ }
525
+ }
526
+ #endif
527
+ }
528
+ else {
529
+ /* lsb_first: false => byte order preserved, bits 7..0 map to logical 0..7 */
530
+ for (ssize_t bi = 0; bi < len; bi++) {
531
+ unsigned int b = str[bi];
532
+ while (b != 0) {
533
+ int bit = sb_highest_bit8(b);
534
+ ssize_t physical = bi * 8 + bit;
535
+ rb_yield(SSIZET2NUM(physical_to_count_from(physical, 0)));
536
+ b ^= (1u << bit); /* clear highest set bit */
537
+ }
538
+ }
539
+ }
540
+
541
+ return self;
542
+ }
543
+
544
+ static VALUE
545
+ rb_str_set_bit_offsets(int argc, VALUE *argv, VALUE self)
546
+ {
547
+ int lsb_first = parse_lsb_first(argc, argv);
548
+ ssize_t len = RSTRING_LEN(self);
549
+ const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
550
+ int have_block = rb_block_given_p();
551
+
552
+ VALUE ary;
553
+ if (have_block) {
554
+ ary = Qnil;
555
+ }
556
+ else {
557
+ /* Pre-size the Array with popcount to avoid repeated reallocation.
558
+ * memcpy avoids unaligned-load issues on strict-alignment platforms. */
559
+ ssize_t count = 0;
560
+ ssize_t nw = len >> 3;
561
+ for (ssize_t wi = 0; wi < nw; wi++) {
562
+ uint64_t w;
563
+ memcpy(&w, str + wi * 8, 8);
564
+ count += sb_popcount64(w);
565
+ }
566
+ for (ssize_t bi = nw << 3; bi < len; bi++)
567
+ count += sb_popcount64((uint64_t)(unsigned char)str[bi]);
568
+ ary = rb_ary_new_capa(count);
569
+ }
570
+
571
+ if (lsb_first) {
572
+ #if SB_LITTLE_ENDIAN
573
+ ssize_t n_words = len >> 3;
574
+ for (ssize_t wi = 0; wi < n_words; wi++) {
575
+ uint64_t w;
576
+ memcpy(&w, str + wi * 8, 8);
577
+ while (w != 0) {
578
+ int bit = sb_ctzll(w);
579
+ VALUE pos = SSIZET2NUM(wi * 64 + bit);
580
+ have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
581
+ w &= w - 1;
582
+ }
583
+ }
584
+ for (ssize_t bi = n_words << 3; bi < len; bi++) {
585
+ unsigned int b = str[bi];
586
+ while (b != 0) {
587
+ int bit = sb_ctz8(b);
588
+ VALUE pos = SSIZET2NUM(bi * 8 + bit);
589
+ have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
590
+ b &= b - 1;
591
+ }
592
+ }
593
+ #else
594
+ for (ssize_t bi = 0; bi < len; bi++) {
595
+ unsigned int b = str[bi];
596
+ while (b != 0) {
597
+ int bit = sb_ctz8(b);
598
+ VALUE pos = SSIZET2NUM(bi * 8 + bit);
599
+ have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
600
+ b &= b - 1;
601
+ }
602
+ }
603
+ #endif
604
+ }
605
+ else {
606
+ for (ssize_t bi = 0; bi < len; bi++) {
607
+ unsigned int b = str[bi];
608
+ while (b != 0) {
609
+ int bit = sb_highest_bit8(b);
610
+ ssize_t physical = bi * 8 + bit;
611
+ VALUE pos = SSIZET2NUM(physical_to_count_from(physical, 0));
612
+ have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
613
+ b ^= (1u << bit);
614
+ }
615
+ }
616
+ }
617
+
618
+ return have_block ? self : ary;
619
+ }
620
+
621
+ /* multi-bit mutation ------------------------------------------------------ */
622
+
623
+ /*
624
+ * bit_copy_core: copy `length` bits from src[src_bit_off] to dst[dst_bit_off].
625
+ *
626
+ * Both offsets are in the LSB-first flat bit numbering used throughout
627
+ * string_bits. The routine does not resize dst; the caller must ensure
628
+ * dst has enough bytes.
629
+ *
630
+ * Algorithm:
631
+ * 1. Extract the src bits into a small aligned tmp buffer (identical to the
632
+ * bit_slice read path).
633
+ * 2. Write tmp into dst with shift/mask merge (handles the unaligned case).
634
+ *
635
+ * Porting to Ruby Core:
636
+ * 1. Move alongside bit_slice in string.c.
637
+ * 2. Share the extract loop with rb_str_bit_slice via ebs_extract.
638
+ * 3. Remove `static`.
639
+ */
640
+ static void
641
+ bit_copy_core(unsigned char *dst, ssize_t dst_bit_off,
642
+ const unsigned char *src, ssize_t src_len_bytes,
643
+ ssize_t src_bit_off, ssize_t length)
644
+ {
645
+ if (length == 0) return;
646
+ ssize_t out_bytes = (length + 7) >> 3;
647
+
648
+ unsigned char stack_tmp[256];
649
+ unsigned char *tmp = (out_bytes <= (ssize_t)sizeof(stack_tmp))
650
+ ? stack_tmp
651
+ : (unsigned char *)ruby_xmalloc(out_bytes);
652
+
653
+ /* Step 1: extract src bits into tmp (aligned, zero-padded tail) */
654
+ {
655
+ ssize_t src_byte_off = src_bit_off >> 3;
656
+ int src_shift = (int)(src_bit_off & 7);
657
+ if (src_shift == 0) {
658
+ memcpy(tmp, src + src_byte_off, out_bytes);
659
+ }
660
+ else {
661
+ int anti = 8 - src_shift;
662
+ for (ssize_t i = 0; i < out_bytes; i++) {
663
+ unsigned char lo = src[src_byte_off + i];
664
+ unsigned char hi = (src_byte_off + i + 1 < src_len_bytes)
665
+ ? src[src_byte_off + i + 1] : 0;
666
+ tmp[i] = (unsigned char)((lo >> src_shift) | (hi << anti));
667
+ }
668
+ }
669
+ int tail = (int)(length & 7);
670
+ if (tail) tmp[out_bytes - 1] &= (unsigned char)((1u << tail) - 1);
671
+ }
672
+
673
+ /* Step 2: write aligned tmp into dst at dst_bit_off */
674
+ {
675
+ ssize_t dst_byte_off = dst_bit_off >> 3;
676
+ int dst_shift = (int)(dst_bit_off & 7);
677
+
678
+ if (dst_shift == 0) {
679
+ ssize_t full = length >> 3;
680
+ int tail = (int)(length & 7);
681
+ memcpy(dst + dst_byte_off, tmp, full);
682
+ if (tail) {
683
+ unsigned char mask = (unsigned char)((1u << tail) - 1);
684
+ dst[dst_byte_off + full] =
685
+ (dst[dst_byte_off + full] & (unsigned char)~mask)
686
+ | (tmp[full] & mask);
687
+ }
688
+ }
689
+ else {
690
+ int anti = 8 - dst_shift;
691
+ ssize_t n_dst = ((dst_bit_off + length - 1) >> 3) - dst_byte_off + 1;
692
+
693
+ for (ssize_t i = 0; i < n_dst; i++) {
694
+ ssize_t byte_base = (dst_byte_off + i) * 8;
695
+ ssize_t wstart = dst_bit_off > byte_base ? dst_bit_off - byte_base : 0;
696
+ ssize_t wend = (dst_bit_off + length - 1 < byte_base + 7)
697
+ ? dst_bit_off + length - 1 - byte_base : 7;
698
+ unsigned char wmask =
699
+ (unsigned char)(((1u << (wend + 1)) - 1) ^ ((1u << wstart) - 1));
700
+
701
+ /* lo_t: high bits of the previous tmp byte spill into this dst byte */
702
+ unsigned char lo_t = (i > 0 && i - 1 < out_bytes) ? tmp[i - 1] : 0;
703
+ /* hi_t: low bits of the current tmp byte fill the upper part */
704
+ unsigned char hi_t = (i < out_bytes) ? tmp[i] : 0;
705
+ unsigned char nv = (unsigned char)((lo_t >> anti) | (hi_t << dst_shift));
706
+ dst[dst_byte_off + i] =
707
+ (dst[dst_byte_off + i] & (unsigned char)~wmask) | (nv & wmask);
708
+ }
709
+ }
710
+ }
711
+
712
+ if (tmp != stack_tmp) ruby_xfree(tmp);
713
+ }
714
+
715
+ /* String#bit_slice(bit_offset, bit_length) -> String
716
+ * String#bit_slice(range) -> String
717
+ *
718
+ * str = "\xFF\x00" # 11111111 00000000
719
+ * str.bit_slice(4, 8) # => "\xF0" (11110000)
720
+ */
721
+ static VALUE
722
+ rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
723
+ {
724
+ ssize_t src_len = RSTRING_LEN(self);
725
+ ssize_t total_bits = src_len * 8;
726
+ ssize_t offset, length;
727
+ VALUE v0, v1, opts;
728
+ int n_pos = rb_scan_args(argc, argv, "11:", &v0, &v1, &opts);
729
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
730
+ int lsb_first = parse_lsb_first_opt(opts);
731
+
732
+ if (n_pos == 1 && rb_obj_is_kind_of(v0, rb_cRange)) {
733
+ ssize_t beg, len;
734
+ if (!RTEST(sb_range_beg_len(v0, &beg, &len, total_bits, 0))) {
735
+ return Qnil;
736
+ }
737
+ offset = beg;
738
+ length = len;
739
+ }
740
+ else if (n_pos == 2) {
741
+ if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1)) {
742
+ return Qnil;
743
+ }
744
+
745
+ offset = integer_to_bit_idx(v0);
746
+ length = integer_to_bit_idx(v1);
747
+
748
+ if (offset < 0 || length < 0) return Qnil;
749
+ }
750
+ else if (n_pos == 1) {
751
+ return Qnil;
752
+ }
753
+ else {
754
+ rb_raise(rb_eArgError,
755
+ "wrong number of arguments (given %d, expected 1 or 2)", n_pos);
756
+ }
757
+
758
+ if (offset > total_bits) return Qnil;
759
+ ssize_t available = total_bits - offset;
760
+ if (length > available) length = available;
761
+
762
+ if (length == 0) return rb_str_new("", 0);
763
+
764
+ ssize_t out_bytes = (length + 7) / 8;
765
+ VALUE result = rb_str_buf_new(out_bytes);
766
+ rb_str_resize(result, out_bytes);
767
+ rb_enc_associate(result, rb_enc_get(self));
768
+ unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
769
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
770
+
771
+ memset(dst, 0, out_bytes);
772
+
773
+ if (lsb_first) {
774
+ bit_copy_core(dst, 0, src, src_len, offset, length);
775
+ } else {
776
+ ssize_t dst_bit = 0;
777
+ ssize_t start_byte = offset >> 3;
778
+ ssize_t end_byte = (offset + length - 1) >> 3;
779
+
780
+ for (ssize_t b = start_byte; b <= end_byte; b++) {
781
+ ssize_t b_start_l = b << 3;
782
+ ssize_t b_end_l = b_start_l + 7;
783
+ ssize_t l_min = (offset > b_start_l) ? offset : b_start_l;
784
+ ssize_t l_max = ((offset + length - 1) < b_end_l) ? (offset + length - 1) : b_end_l;
785
+
786
+ ssize_t p_min = b_start_l + (7 - (l_max & 7L));
787
+ ssize_t p_max = b_start_l + (7 - (l_min & 7L));
788
+ ssize_t chunk_len = p_max - p_min + 1;
789
+
790
+ bit_copy_core(dst, dst_bit, src, src_len, p_min, chunk_len);
791
+ dst_bit += chunk_len;
792
+ }
793
+ }
794
+ return result;
795
+ }
796
+
797
+ /* single-bit mutation ----------------------------------------------------- */
798
+
799
+ enum sb_mutation_op {
800
+ SB_MUT_SET = 1,
801
+ SB_MUT_CLEAR = 2,
802
+ SB_MUT_FLIP = 3
803
+ };
804
+
805
+ static VALUE
806
+ rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
807
+ {
808
+ VALUE target, opts;
809
+ rb_scan_args(argc, argv, "1:", &target, &opts);
810
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
811
+ int lsb_first = parse_lsb_first_opt(opts);
812
+
813
+ rb_str_modify(self);
814
+ unsigned char *ptr = (unsigned char *)RSTRING_PTR(self);
815
+
816
+ if (rb_integer_type_p(target)) {
817
+ ssize_t idx = check_bit_index(self, target, lsb_first);
818
+ unsigned char mask = (unsigned char)(1u << (idx % 8));
819
+ switch (op) {
820
+ case SB_MUT_SET: ptr[idx / 8] |= mask; break;
821
+ case SB_MUT_CLEAR: ptr[idx / 8] &= (unsigned char)~mask; break;
822
+ case SB_MUT_FLIP: ptr[idx / 8] ^= mask; break;
823
+ }
824
+ return self;
825
+ }
826
+
827
+ if (rb_obj_is_kind_of(target, rb_cRange)) {
828
+ ssize_t total_bits = RSTRING_LEN(self) * 8;
829
+ ssize_t beg, len;
830
+
831
+ /* err=0 returns Qnil for out-of-range begin (after negative normalization);
832
+ * convert that to IndexError to stay consistent with single-bit access. */
833
+ if (!RTEST(sb_range_beg_len(target, &beg, &len, total_bits, 0))) {
834
+ rb_raise(rb_eIndexError, "bit range out of range");
835
+ }
836
+
837
+ /* err=0 silently clamps end > total. Detect that and raise instead,
838
+ * to stay consistent with bit_splice and single-bit mutation. */
839
+ VALUE rng_end = rb_funcall(target, rb_intern("end"), 0);
840
+ if (!NIL_P(rng_end)) {
841
+ ssize_t end_val = integer_to_bit_idx(rng_end);
842
+ if (end_val < 0) end_val += total_bits;
843
+ int exclusive = RTEST(rb_funcall(target, rb_intern("exclude_end?"), 0));
844
+ ssize_t end_excl = exclusive ? end_val : end_val + 1;
845
+ if (end_excl > total_bits) {
846
+ rb_raise(rb_eIndexError, "bit range out of range");
847
+ }
848
+ }
849
+
850
+ for (ssize_t logical = beg; logical < beg + len; logical++) {
851
+ ssize_t idx = lsb_first ? logical : ((logical & ~7L) | (7 - (logical & 7L)));
852
+ unsigned char mask = (unsigned char)(1u << (idx % 8));
853
+ switch (op) {
854
+ case SB_MUT_SET: ptr[idx / 8] |= mask; break;
855
+ case SB_MUT_CLEAR: ptr[idx / 8] &= (unsigned char)~mask; break;
856
+ case SB_MUT_FLIP: ptr[idx / 8] ^= mask; break;
857
+ }
858
+ }
859
+ return self;
860
+ }
861
+
862
+ rb_raise(rb_eTypeError, "bit index must be an integer or Range");
863
+ UNREACHABLE_RETURN(Qnil);
864
+ }
865
+
866
+ static VALUE
867
+ rb_str_set_bit(int argc, VALUE *argv, VALUE self)
868
+ {
869
+ return rb_str_mutate_bits(argc, argv, self, SB_MUT_SET);
870
+ }
871
+
872
+ static VALUE
873
+ rb_str_clear_bit(int argc, VALUE *argv, VALUE self)
874
+ {
875
+ return rb_str_mutate_bits(argc, argv, self, SB_MUT_CLEAR);
876
+ }
877
+
878
+ static VALUE
879
+ rb_str_flip_bit(int argc, VALUE *argv, VALUE self)
880
+ {
881
+ return rb_str_mutate_bits(argc, argv, self, SB_MUT_FLIP);
882
+ }
883
+
884
+ /* bulk bitwise ------------------------------------------------------------ */
885
+
886
+ static void
887
+ check_binary_op_lengths(VALUE self, VALUE other)
888
+ {
889
+ if (RSTRING_LEN(self) != RSTRING_LEN(other)) {
890
+ rb_raise(rb_eArgError, "operands must have the same length (%ld vs %ld)",
891
+ RSTRING_LEN(self), RSTRING_LEN(other));
892
+ }
893
+ }
894
+
895
+ static VALUE
896
+ alloc_result(VALUE self)
897
+ {
898
+ ssize_t len = RSTRING_LEN(self);
899
+ VALUE result = rb_str_buf_new(len);
900
+ rb_str_resize(result, len);
901
+ rb_enc_associate(result, rb_enc_get(self));
902
+ return result;
903
+ }
904
+
905
+ static VALUE
906
+ rb_str_bit_not(VALUE self)
907
+ {
908
+ ssize_t len = RSTRING_LEN(self);
909
+ VALUE result = alloc_result(self);
910
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
911
+ unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
912
+ for (ssize_t i = 0; i < len; i++) dst[i] = ~src[i];
913
+ return result;
914
+ }
915
+
916
+ static VALUE
917
+ rb_str_bit_not_bang(VALUE self)
918
+ {
919
+ rb_str_modify(self);
920
+ ssize_t len = RSTRING_LEN(self);
921
+ unsigned char *ptr = (unsigned char *)RSTRING_PTR(self);
922
+ for (ssize_t i = 0; i < len; i++) ptr[i] = ~ptr[i];
923
+ return self;
924
+ }
925
+
926
+ static VALUE
927
+ rb_str_bit_and(VALUE self, VALUE other)
928
+ {
929
+ check_binary_op_lengths(self, other);
930
+ ssize_t len = RSTRING_LEN(self);
931
+ VALUE result = alloc_result(self);
932
+ const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
933
+ const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
934
+ unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
935
+ for (ssize_t i = 0; i < len; i++) dst[i] = a[i] & b[i];
936
+ return result;
937
+ }
938
+
939
+ static VALUE
940
+ rb_str_bit_and_bang(VALUE self, VALUE other)
941
+ {
942
+ check_binary_op_lengths(self, other);
943
+ rb_str_modify(self);
944
+ ssize_t len = RSTRING_LEN(self);
945
+ unsigned char *a = (unsigned char *)RSTRING_PTR(self);
946
+ const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
947
+ for (ssize_t i = 0; i < len; i++) a[i] &= b[i];
948
+ return self;
949
+ }
950
+
951
+ static VALUE
952
+ rb_str_bit_or(VALUE self, VALUE other)
953
+ {
954
+ check_binary_op_lengths(self, other);
955
+ ssize_t len = RSTRING_LEN(self);
956
+ VALUE result = alloc_result(self);
957
+ const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
958
+ const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
959
+ unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
960
+ for (ssize_t i = 0; i < len; i++) dst[i] = a[i] | b[i];
961
+ return result;
962
+ }
963
+
964
+ static VALUE
965
+ rb_str_bit_or_bang(VALUE self, VALUE other)
966
+ {
967
+ check_binary_op_lengths(self, other);
968
+ rb_str_modify(self);
969
+ ssize_t len = RSTRING_LEN(self);
970
+ unsigned char *a = (unsigned char *)RSTRING_PTR(self);
971
+ const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
972
+ for (ssize_t i = 0; i < len; i++) a[i] |= b[i];
973
+ return self;
974
+ }
975
+
976
+ static VALUE
977
+ rb_str_bit_xor(VALUE self, VALUE other)
978
+ {
979
+ check_binary_op_lengths(self, other);
980
+ ssize_t len = RSTRING_LEN(self);
981
+ VALUE result = alloc_result(self);
982
+ const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
983
+ const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
984
+ unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
985
+ for (ssize_t i = 0; i < len; i++) dst[i] = a[i] ^ b[i];
986
+ return result;
987
+ }
988
+
989
+ static VALUE
990
+ rb_str_bit_xor_bang(VALUE self, VALUE other)
991
+ {
992
+ check_binary_op_lengths(self, other);
993
+ rb_str_modify(self);
994
+ ssize_t len = RSTRING_LEN(self);
995
+ unsigned char *a = (unsigned char *)RSTRING_PTR(self);
996
+ const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
997
+ for (ssize_t i = 0; i < len; i++) a[i] ^= b[i];
998
+ return self;
999
+ }
1000
+
1001
+ /* packed bit-field iteration ---------------------------------------------- */
1002
+ /*
1003
+ * NOTE: each_bit_field and bit_fields are implemented here and fully tested,
1004
+ * but are NOT part of the current core proposal (see FUTURE_PROPOSAL_PLAN.md).
1005
+ * They are deferred because yielding Integer field values is a qualitatively
1006
+ * different contract from the rest of the API, and that difference is expected
1007
+ * to extend core-ruby-dev discussion. The code is kept so the proposal can be
1008
+ * extended later without re-implementation.
1009
+ */
1010
+
1011
+ /*
1012
+ * extract_uint64: extract up to 64 bits starting at bit_offset from src as an
1013
+ * unsigned 64-bit integer (LSB-first bit layout, matching bit_slice).
1014
+ *
1015
+ * Porting to Ruby Core:
1016
+ * 1. Move into string.c alongside the bit_slice implementation.
1017
+ * 2. Share with rb_str_bit_slice to avoid duplication.
1018
+ */
1019
+ static uint64_t
1020
+ extract_uint64_lsb(const unsigned char *src, ssize_t src_len, ssize_t bit_offset, ssize_t bitlen)
1021
+ {
1022
+ uint64_t val = 0;
1023
+ ssize_t byte_off = bit_offset >> 3;
1024
+ int shift = (int)(bit_offset & 7);
1025
+ int n = (shift + (int)bitlen + 7) / 8;
1026
+ for (int i = 0; i < n; i++) {
1027
+ if (byte_off + i < src_len)
1028
+ val |= (uint64_t)src[byte_off + i] << (i * 8);
1029
+ }
1030
+ val >>= shift;
1031
+ if (bitlen < 64) val &= (UINT64_C(1) << bitlen) - 1;
1032
+ return val;
1033
+ }
1034
+
1035
+ static uint64_t
1036
+ extract_uint64(const unsigned char *src, ssize_t src_len,
1037
+ ssize_t bit_offset, ssize_t bitlen, int lsb_first)
1038
+ {
1039
+ if (lsb_first) return extract_uint64_lsb(src, src_len, bit_offset, bitlen);
1040
+
1041
+ /* MSB-first integer packing: the first collected bit becomes the MSB of
1042
+ * the result. This matches the natural integer encoding of MSB-first
1043
+ * packed records (e.g. mruby OP_ENTER fields, RFC header bit-fields,
1044
+ * BitTorrent piece-index fields). */
1045
+ uint64_t val = 0;
1046
+ for (ssize_t j = 0; j < bitlen; j++) {
1047
+ if (logical_get_bit(src, bit_offset + j, 0)) {
1048
+ val |= UINT64_C(1) << (bitlen - 1 - j);
1049
+ }
1050
+ }
1051
+ return val;
1052
+ }
1053
+
1054
+ /* String#each_bit_field(*bitlens, lsb_first: true) -> self
1055
+ * String#each_bit_field(*bitlens, lsb_first: true) -> Enumerator
1056
+ *
1057
+ * Iterates over the string as a sequence of packed bit-field records. Each
1058
+ * positional argument specifies the width (in bits) of one field in the record.
1059
+ * On each iteration, one Integer per field is yielded (LSB-first bit layout).
1060
+ * Each bitlen must be in the range 1..64.
1061
+ *
1062
+ * lsb_first: true (default) -- intra-byte field extraction uses bit 0..7.
1063
+ * lsb_first: false -- intra-byte field extraction uses bit 7..0.
1064
+ *
1065
+ * Incomplete trailing bits (when bytesize*8 is not a multiple of sum(bitlens))
1066
+ * are silently dropped, matching the behavior of Enumerable#each_slice.
1067
+ *
1068
+ * Porting to Ruby Core:
1069
+ * 1. Move extract_uint64 and this function into string.c.
1070
+ * 2. Register with rb_define_method in Init_String().
1071
+ * 3. Replace ALLOCA_N with stack arrays for small field counts and heap otherwise.
1072
+ */
1073
+ static VALUE
1074
+ rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
1075
+ {
1076
+ RETURN_ENUMERATOR(self, argc, argv);
1077
+
1078
+ VALUE rest, opts;
1079
+ rb_scan_args(argc, argv, "*:", &rest, &opts);
1080
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
1081
+
1082
+ ssize_t num_fields = RARRAY_LEN(rest);
1083
+ if (num_fields == 0) {
1084
+ rb_raise(rb_eArgError, "wrong number of arguments (given 0, expected 1+)");
1085
+ }
1086
+
1087
+ ssize_t *bitlens = ALLOCA_N(ssize_t, num_fields);
1088
+ ssize_t step = 0;
1089
+ for (ssize_t f = 0; f < num_fields; f++) {
1090
+ VALUE v = RARRAY_AREF(rest, f);
1091
+ if (!rb_integer_type_p(v)) {
1092
+ rb_raise(rb_eTypeError, "bitlen must be an integer");
1093
+ }
1094
+ ssize_t bl = NUM2SSIZET(v);
1095
+ if (bl <= 0) {
1096
+ rb_raise(rb_eArgError, "bitlen must be positive");
1097
+ }
1098
+ if (bl > 64) {
1099
+ rb_raise(rb_eArgError, "bitlen must be <= 64 (got %ld)", bl);
1100
+ }
1101
+ bitlens[f] = bl;
1102
+ step += bl;
1103
+ }
1104
+
1105
+ int lsb_first = parse_lsb_first_opt(opts);
1106
+
1107
+ ssize_t src_len = RSTRING_LEN(self);
1108
+ ssize_t total_bits = src_len * 8;
1109
+ ssize_t iterations = total_bits / step;
1110
+
1111
+ VALUE *field_vals = ALLOCA_N(VALUE, num_fields);
1112
+
1113
+ for (ssize_t iter = 0; iter < iterations; iter++) {
1114
+ ssize_t base_bit = iter * step;
1115
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1116
+ ssize_t field_bit = base_bit;
1117
+ for (ssize_t f = 0; f < num_fields; f++) {
1118
+ uint64_t val = extract_uint64(src, src_len, field_bit, bitlens[f], lsb_first);
1119
+ field_vals[f] = ULL2NUM(val);
1120
+ field_bit += bitlens[f];
1121
+ }
1122
+ rb_yield_values2((int)num_fields, field_vals);
1123
+ }
1124
+
1125
+ return self;
1126
+ }
1127
+
1128
+ /* String#bit_fields(*bitlens, lsb_first: true) -> Array
1129
+ * String#bit_fields(*bitlens, lsb_first: true) { |*fields| } -> self
1130
+ *
1131
+ * Non-iterator complement of each_bit_field. Without a block, returns an
1132
+ * Array of all extracted records. With a single bitlen the array is flat
1133
+ * (matching each_bit_field(n).to_a); with multiple bitlens each record is
1134
+ * itself an Array (matching each_bit_field(a, b, ...).to_a).
1135
+ *
1136
+ * With a block, behaves identically to each_bit_field without with: ---
1137
+ * yielding one Integer per field and returning self.
1138
+ *
1139
+ * Porting to Ruby Core:
1140
+ * 1. Move alongside each_bit_field in string.c.
1141
+ * 2. Share extract_uint64 and the bitlen validation logic.
1142
+ * 3. Register with rb_define_method in Init_String().
1143
+ */
1144
+ static VALUE
1145
+ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
1146
+ {
1147
+ VALUE rest, opts;
1148
+ rb_scan_args(argc, argv, "*:", &rest, &opts);
1149
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
1150
+
1151
+ ssize_t num_fields = RARRAY_LEN(rest);
1152
+ if (num_fields == 0) {
1153
+ rb_raise(rb_eArgError, "wrong number of arguments (given 0, expected 1+)");
1154
+ }
1155
+
1156
+ ssize_t *bitlens = ALLOCA_N(ssize_t, num_fields);
1157
+ ssize_t step = 0;
1158
+ for (ssize_t f = 0; f < num_fields; f++) {
1159
+ VALUE v = RARRAY_AREF(rest, f);
1160
+ if (!rb_integer_type_p(v)) {
1161
+ rb_raise(rb_eTypeError, "bitlen must be an integer");
1162
+ }
1163
+ ssize_t bl = NUM2SSIZET(v);
1164
+ if (bl <= 0) {
1165
+ rb_raise(rb_eArgError, "bitlen must be positive");
1166
+ }
1167
+ if (bl > 64) {
1168
+ rb_raise(rb_eArgError, "bitlen must be <= 64 (got %ld)", bl);
1169
+ }
1170
+ bitlens[f] = bl;
1171
+ step += bl;
1172
+ }
1173
+
1174
+ int lsb_first = parse_lsb_first_opt(opts);
1175
+
1176
+ ssize_t src_len = RSTRING_LEN(self);
1177
+ ssize_t total_bits = src_len * 8;
1178
+ ssize_t iterations = total_bits / step;
1179
+
1180
+ int have_block = rb_block_given_p();
1181
+ VALUE result = have_block ? Qnil : rb_ary_new_capa(iterations);
1182
+
1183
+ VALUE *field_vals = ALLOCA_N(VALUE, num_fields);
1184
+
1185
+ for (ssize_t iter = 0; iter < iterations; iter++) {
1186
+ ssize_t base_bit = iter * step;
1187
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1188
+ ssize_t field_bit = base_bit;
1189
+ for (ssize_t f = 0; f < num_fields; f++) {
1190
+ uint64_t val = extract_uint64(src, src_len, field_bit, bitlens[f], lsb_first);
1191
+ field_vals[f] = ULL2NUM(val);
1192
+ field_bit += bitlens[f];
1193
+ }
1194
+ if (have_block) {
1195
+ rb_yield_values2((int)num_fields, field_vals);
1196
+ } else if (num_fields == 1) {
1197
+ rb_ary_push(result, field_vals[0]);
1198
+ } else {
1199
+ rb_ary_push(result, rb_ary_new_from_values(num_fields, field_vals));
1200
+ }
1201
+ }
1202
+
1203
+ return have_block ? self : result;
1204
+ }
1205
+
1206
+ /* run-length iteration ---------------------------------------------------- */
1207
+
1208
+ /*
1209
+ * count_run_lsb: count consecutive bits equal to `target` starting at flat
1210
+ * position `pos` (LSB-first). Uses ctz / ctzll to skip bits in bulk:
1211
+ * - partial first byte: ctz on the inverted masked nibble
1212
+ * - full 64-bit words (LE): ctzll on the inverted word (64 bits per step)
1213
+ * - remaining bytes: ctz on the inverted byte
1214
+ *
1215
+ * Porting to Ruby Core:
1216
+ * 1. Move to string.c alongside bit_at and each_bit.
1217
+ * 2. Share sb_ctz8 / sb_ctzll with the existing set-bit helpers.
1218
+ */
1219
+ static ssize_t
1220
+ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target)
1221
+ {
1222
+ ssize_t max_run = src_len * 8 - pos;
1223
+ ssize_t byte_idx = pos >> 3;
1224
+ int bit_off = pos & 7;
1225
+ ssize_t count = 0;
1226
+
1227
+ /* partial first byte: shift pos to bit 0, mask remaining bits */
1228
+ {
1229
+ int remaining = 8 - bit_off;
1230
+ unsigned int b = (unsigned int)src[byte_idx] >> bit_off;
1231
+ if (!target) b = ~b;
1232
+ unsigned int mask = (1u << remaining) - 1;
1233
+ b &= mask;
1234
+ unsigned int inv = (~b) & mask;
1235
+ int run = (inv == 0) ? remaining : sb_ctz8(inv);
1236
+ count += run;
1237
+ byte_idx++;
1238
+ if (run < remaining)
1239
+ return count < max_run ? count : max_run;
1240
+ }
1241
+
1242
+ #if SB_LITTLE_ENDIAN
1243
+ /* full 8-byte words: skip 64 identical bits per iteration */
1244
+ while (byte_idx + 8 <= src_len) {
1245
+ uint64_t word;
1246
+ memcpy(&word, src + byte_idx, 8);
1247
+ if (!target) word = ~word;
1248
+ if (word == UINT64_MAX) {
1249
+ count += 64;
1250
+ byte_idx += 8;
1251
+ } else {
1252
+ count += sb_ctzll(~word);
1253
+ return count < max_run ? count : max_run;
1254
+ }
1255
+ }
1256
+ #endif
1257
+
1258
+ /* remaining bytes (< 8) */
1259
+ while (byte_idx < src_len) {
1260
+ unsigned int b = (unsigned int)src[byte_idx];
1261
+ if (!target) b = ~b;
1262
+ b &= 0xFF;
1263
+ if (b == 0xFF) {
1264
+ count += 8;
1265
+ byte_idx++;
1266
+ } else {
1267
+ count += sb_ctz8(~b);
1268
+ return count < max_run ? count : max_run;
1269
+ }
1270
+ }
1271
+
1272
+ return count < max_run ? count : max_run;
1273
+ }
1274
+
1275
+ /* String#bit_run_count(pos, bit) -> Integer | nil
1276
+ *
1277
+ * Returns the length of the consecutive run of `bit` starting at flat
1278
+ * position `pos`. Returns nil when `pos` is out of range or the bit at `pos`
1279
+ * does not equal `bit`.
1280
+ *
1281
+ * `bit` accepts 0, 1, false, or true (false/true are aliases for 0/1,
1282
+ * matching the values yielded by each_bit_run).
1283
+ *
1284
+ * Counts forward from `pos` toward higher bit indices.
1285
+ *
1286
+ * Inspired by Gauche Scheme's (bitvector-count-run bit bvec i).
1287
+ *
1288
+ * Uses the same flat LSB-first addressing as bit_at: byte[pos/8] bit pos%8.
1289
+ *
1290
+ * Porting to Ruby Core:
1291
+ * 1. Move to string.c; register in Init_String().
1292
+ * 2. Reuse integer_to_bit_idx for consistent Bignum handling.
1293
+ */
1294
+ static VALUE
1295
+ rb_str_bit_run_count(int argc, VALUE *argv, VALUE self)
1296
+ {
1297
+ VALUE pos_val, bit_val, opts;
1298
+ rb_scan_args(argc, argv, "20:", &pos_val, &bit_val, &opts);
1299
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
1300
+ int lsb_first = parse_lsb_first_opt(opts);
1301
+
1302
+ if (!rb_integer_type_p(pos_val)) {
1303
+ rb_raise(rb_eTypeError, "position must be an integer");
1304
+ }
1305
+ int target;
1306
+ if (bit_val == Qtrue || bit_val == INT2FIX(1)) {
1307
+ target = 1;
1308
+ } else if (bit_val == Qfalse || bit_val == INT2FIX(0)) {
1309
+ target = 0;
1310
+ } else {
1311
+ rb_raise(rb_eArgError, "bit must be 0, 1, false, or true");
1312
+ }
1313
+ ssize_t pos = integer_to_bit_idx(pos_val);
1314
+ ssize_t src_len = RSTRING_LEN(self);
1315
+ if (pos < 0 || pos >= src_len * 8) return Qnil;
1316
+
1317
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1318
+ if (lsb_first) {
1319
+ if (((src[pos >> 3] >> (pos & 7)) & 1) != target) return Qnil;
1320
+ return SSIZET2NUM(count_run_lsb(src, src_len, pos, target));
1321
+ }
1322
+
1323
+ if (logical_get_bit(src, pos, 0) != target) return Qnil;
1324
+
1325
+ ssize_t run = 1;
1326
+ ssize_t total_bits = src_len * 8;
1327
+ while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == target) {
1328
+ run++;
1329
+ }
1330
+ return SSIZET2NUM(run);
1331
+ }
1332
+
1333
+ /* String#each_bit_run(lsb_first: true) { |bit, len| } -> self
1334
+ * String#each_bit_run(lsb_first: true) -> Enumerator
1335
+ *
1336
+ * Yields (bit, run_length) pairs for each consecutive run of identical bits.
1337
+ * Run-length boundary detection and counting happen entirely in C, replacing
1338
+ * the Ruby-level current/count state machine required when using each_bit.
1339
+ *
1340
+ * For random data (~50% density) each_bit_run yields ~half as many times as
1341
+ * each_bit. For structured data (sparse validity bitmaps, sensor bursts) the
1342
+ * ratio is proportional to the average run length.
1343
+ *
1344
+ * lsb_first: true (default) iterates bit 0..7 within each byte.
1345
+ * lsb_first: false iterates bit 7..0 within each byte.
1346
+ *
1347
+ * Porting to Ruby Core:
1348
+ * 1. Move to string.c; register in Init_String().
1349
+ * 2. count_run_lsb / count_run_msb move with it.
1350
+ */
1351
+ static VALUE
1352
+ rb_str_each_bit_run(int argc, VALUE *argv, VALUE self)
1353
+ {
1354
+ RETURN_ENUMERATOR(self, argc, argv);
1355
+
1356
+ int lsb_first = parse_lsb_first(argc, argv);
1357
+ ssize_t src_len = RSTRING_LEN(self);
1358
+ if (src_len == 0) return self;
1359
+
1360
+ ssize_t total_bits = src_len * 8;
1361
+
1362
+ if (lsb_first) {
1363
+ ssize_t pos = 0;
1364
+ while (pos < total_bits) {
1365
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1366
+ int bit = (src[pos >> 3] >> (pos & 7)) & 1;
1367
+ ssize_t run = count_run_lsb(src, src_len, pos, bit);
1368
+ rb_yield_values(2, bit ? Qtrue : Qfalse, SSIZET2NUM(run));
1369
+ pos += run;
1370
+ }
1371
+ }
1372
+ else {
1373
+ ssize_t pos = 0;
1374
+ while (pos < total_bits) {
1375
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1376
+ int bit = logical_get_bit(src, pos, 0);
1377
+ ssize_t run = 1;
1378
+ while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == bit) {
1379
+ run++;
1380
+ }
1381
+ rb_yield_values(2, bit ? Qtrue : Qfalse, SSIZET2NUM(run));
1382
+ pos += run;
1383
+ }
1384
+ }
1385
+
1386
+ return self;
1387
+ }
1388
+
1389
+ /* String#bit_runs(lsb_first: true) -> Array
1390
+ * String#bit_runs(lsb_first: true) { |bit, len| } -> self
1391
+ *
1392
+ * Non-iterator complement of each_bit_run. Without a block, collects all
1393
+ * (bit, run_length) pairs into an Array and returns it. With a block,
1394
+ * yields each pair and returns self.
1395
+ *
1396
+ * Follows the same pattern as String#bytes vs String#each_byte.
1397
+ *
1398
+ * Porting to Ruby Core:
1399
+ * 1. Move to string.c alongside each_bit_run; register in Init_String().
1400
+ */
1401
+ static VALUE
1402
+ rb_str_bit_runs(int argc, VALUE *argv, VALUE self)
1403
+ {
1404
+ int lsb_first = parse_lsb_first(argc, argv);
1405
+ ssize_t src_len = RSTRING_LEN(self);
1406
+ int have_block = rb_block_given_p();
1407
+
1408
+ if (src_len == 0) return have_block ? self : rb_ary_new();
1409
+
1410
+ ssize_t total_bits = src_len * 8;
1411
+ VALUE result = have_block ? Qnil : rb_ary_new();
1412
+
1413
+ if (lsb_first) {
1414
+ ssize_t pos = 0;
1415
+ while (pos < total_bits) {
1416
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1417
+ int bit = (src[pos >> 3] >> (pos & 7)) & 1;
1418
+ ssize_t run = count_run_lsb(src, src_len, pos, bit);
1419
+ VALUE bval = bit ? Qtrue : Qfalse;
1420
+ VALUE lval = SSIZET2NUM(run);
1421
+ have_block ? rb_yield_values(2, bval, lval)
1422
+ : rb_ary_push(result, rb_assoc_new(bval, lval));
1423
+ pos += run;
1424
+ }
1425
+ } else {
1426
+ ssize_t pos = 0;
1427
+ while (pos < total_bits) {
1428
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1429
+ int bit = logical_get_bit(src, pos, 0);
1430
+ ssize_t run = 1;
1431
+ while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == bit) {
1432
+ run++;
1433
+ }
1434
+ VALUE bval = bit ? Qtrue : Qfalse;
1435
+ VALUE lval = SSIZET2NUM(run);
1436
+ have_block ? rb_yield_values(2, bval, lval)
1437
+ : rb_ary_push(result, rb_assoc_new(bval, lval));
1438
+ pos += run;
1439
+ }
1440
+ }
1441
+
1442
+ return have_block ? self : result;
1443
+ }
1444
+
1445
+ /* String#bit_splice(bit_index, bit_length, str) -> self
1446
+ * String#bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) -> self
1447
+ * String#bit_splice(range, str) -> self
1448
+ * String#bit_splice(range, str, str_range) -> self
1449
+ *
1450
+ * Writes bits from str into self at bit-level granularity. The inverse of
1451
+ * bit_slice: where bit_slice reads a sub-sequence of bits, bit_splice writes one.
1452
+ *
1453
+ * The destination and source bit lengths must be equal; bit_splice does not
1454
+ * resize self (sub-byte resize is undefined). This mirrors the constraint that
1455
+ * bytesplice imposes when the replacement has the same byte length.
1456
+ *
1457
+ * Negative indices count backward from the end, exactly as in bytesplice.
1458
+ * Returns self.
1459
+ *
1460
+ * Porting to Ruby Core:
1461
+ * 1. Move to string.c; register in Init_String().
1462
+ * 2. Use rb_str_modify_expand if resize support is ever added.
1463
+ * 3. bit_copy_core moves with it; share ebs_extract with bit_slice.
1464
+ */
1465
+ static VALUE
1466
+ rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
1467
+ {
1468
+ ssize_t dst_bit_off, dst_bit_len;
1469
+ ssize_t src_bit_off, src_bit_len;
1470
+ VALUE str;
1471
+ ssize_t dst_total = RSTRING_LEN(self) * 8;
1472
+ VALUE v0, v1, v2, v3, v4, opts;
1473
+
1474
+ int n_pos = rb_scan_args(argc, argv, "23:", &v0, &v1, &v2, &v3, &v4, &opts);
1475
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
1476
+ int lsb_first = parse_lsb_first_opt(opts);
1477
+
1478
+ if (n_pos == 2 && rb_obj_is_kind_of(v0, rb_cRange)) {
1479
+ /* bit_splice(range, str) */
1480
+ ssize_t beg, len;
1481
+ sb_range_beg_len(v0, &beg, &len, dst_total, 1);
1482
+ dst_bit_off = beg;
1483
+ dst_bit_len = len;
1484
+ str = v1;
1485
+ Check_Type(str, T_STRING);
1486
+ src_bit_off = 0;
1487
+ src_bit_len = dst_bit_len;
1488
+ }
1489
+ else if (n_pos == 3 && rb_obj_is_kind_of(v0, rb_cRange)) {
1490
+ /* bit_splice(range, str, str_range) */
1491
+ ssize_t beg, len;
1492
+ sb_range_beg_len(v0, &beg, &len, dst_total, 1);
1493
+ dst_bit_off = beg;
1494
+ dst_bit_len = len;
1495
+ str = v1;
1496
+ Check_Type(str, T_STRING);
1497
+ if (!rb_obj_is_kind_of(v2, rb_cRange)) {
1498
+ rb_raise(rb_eTypeError, "third argument must be a Range");
1499
+ }
1500
+ ssize_t src_total = RSTRING_LEN(str) * 8;
1501
+ sb_range_beg_len(v2, &beg, &len, src_total, 1);
1502
+ src_bit_off = beg;
1503
+ src_bit_len = len;
1504
+ }
1505
+ else if (n_pos == 3) {
1506
+ /* bit_splice(bit_index, bit_length, str) */
1507
+ if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1)) {
1508
+ rb_raise(rb_eTypeError, "bit index and length must be integers");
1509
+ }
1510
+ dst_bit_off = integer_to_bit_idx(v0);
1511
+ dst_bit_len = integer_to_bit_idx(v1);
1512
+ if (dst_bit_off < 0) dst_bit_off += dst_total;
1513
+
1514
+ /*
1515
+ * Integer source support was prototyped here, but it is intentionally
1516
+ * disabled in the current proposal to keep the public API limited to
1517
+ * String-to-String splicing.
1518
+ */
1519
+ if (rb_integer_type_p(v2)) {
1520
+ rb_raise(rb_eArgError,
1521
+ "bit_splice source must be a String in the current proposal");
1522
+ }
1523
+
1524
+ str = v2;
1525
+ Check_Type(str, T_STRING);
1526
+ src_bit_off = 0;
1527
+ src_bit_len = dst_bit_len;
1528
+ }
1529
+ else if (n_pos == 5) {
1530
+ /* bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) */
1531
+ if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1) ||
1532
+ !rb_integer_type_p(v3) || !rb_integer_type_p(v4)) {
1533
+ rb_raise(rb_eTypeError, "bit indices and lengths must be integers");
1534
+ }
1535
+ dst_bit_off = integer_to_bit_idx(v0);
1536
+ dst_bit_len = integer_to_bit_idx(v1);
1537
+ if (dst_bit_off < 0) dst_bit_off += dst_total;
1538
+ str = v2;
1539
+ Check_Type(str, T_STRING);
1540
+ ssize_t src_total = RSTRING_LEN(str) * 8;
1541
+ src_bit_off = integer_to_bit_idx(v3);
1542
+ src_bit_len = integer_to_bit_idx(v4);
1543
+ if (src_bit_off < 0) src_bit_off += src_total;
1544
+ }
1545
+ else {
1546
+ rb_raise(rb_eArgError,
1547
+ "wrong number of arguments (given %d, expected 2, 3, or 5)", n_pos);
1548
+ }
1549
+
1550
+ if (dst_bit_off < 0 || dst_bit_len < 0 || dst_bit_off + dst_bit_len > dst_total) {
1551
+ rb_raise(rb_eIndexError,
1552
+ "bit_splice: destination range [%ld, %ld] out of bounds (total %ld bits)",
1553
+ dst_bit_off, dst_bit_len, dst_total);
1554
+ }
1555
+
1556
+ ssize_t src_total_bits = RSTRING_LEN(str) * 8;
1557
+ if (src_bit_off < 0 || src_bit_len < 0 || src_bit_off + src_bit_len > src_total_bits) {
1558
+ rb_raise(rb_eIndexError,
1559
+ "bit_splice: source range [%ld, %ld] out of bounds (total %ld bits)",
1560
+ src_bit_off, src_bit_len, src_total_bits);
1561
+ }
1562
+
1563
+ if (dst_bit_len != src_bit_len) {
1564
+ rb_raise(rb_eArgError,
1565
+ "bit_splice: destination length (%ld) must equal source length (%ld)",
1566
+ dst_bit_len, src_bit_len);
1567
+ }
1568
+
1569
+ if (dst_bit_len == 0) return self;
1570
+
1571
+ /* Guard against self-aliasing: duplicate src before modifying self */
1572
+ VALUE src_str = (str == self) ? rb_str_dup(str) : str;
1573
+
1574
+ rb_str_modify(self);
1575
+
1576
+ unsigned char *dst = (unsigned char *)RSTRING_PTR(self);
1577
+ const unsigned char *src = (const unsigned char *)RSTRING_PTR(src_str);
1578
+ ssize_t src_len_bytes = RSTRING_LEN(src_str);
1579
+
1580
+ if (lsb_first) {
1581
+ bit_copy_core(dst, dst_bit_off, src, src_len_bytes, src_bit_off, dst_bit_len);
1582
+ } else {
1583
+ ssize_t current_src_bit = src_bit_off;
1584
+ ssize_t start_byte = dst_bit_off >> 3;
1585
+ ssize_t end_byte = (dst_bit_off + dst_bit_len - 1) >> 3;
1586
+
1587
+ for (ssize_t b = start_byte; b <= end_byte; b++) {
1588
+ ssize_t b_start_l = b << 3;
1589
+ ssize_t b_end_l = b_start_l + 7;
1590
+ ssize_t l_min = (dst_bit_off > b_start_l) ? dst_bit_off : b_start_l;
1591
+ ssize_t l_max = ((dst_bit_off + dst_bit_len - 1) < b_end_l) ? (dst_bit_off + dst_bit_len - 1) : b_end_l;
1592
+
1593
+ ssize_t p_min = b_start_l + (7 - (l_max & 7L));
1594
+ ssize_t p_max = b_start_l + (7 - (l_min & 7L));
1595
+ ssize_t chunk_len = p_max - p_min + 1;
1596
+
1597
+ bit_copy_core(dst, p_min, src, src_len_bytes, current_src_bit, chunk_len);
1598
+ current_src_bit += chunk_len;
1599
+ }
1600
+ }
1601
+
1602
+ RB_GC_GUARD(src_str);
1603
+ return self;
1604
+ }
1605
+
1606
+ /* Array#mask and Array#mask! --------------------------------------- */
1607
+ /*
1608
+ * NOTE: Array#mask and Array#mask! are implemented here and fully tested,
1609
+ * but are NOT part of the current core proposal (see FUTURE_PROPOSAL_PLAN.md).
1610
+ */
1611
+ /*
1612
+ * parse_mask_kwargs: extract bitmap, lsb_first:, and invert: from method arguments.
1613
+ *
1614
+ * Porting to Ruby Core:
1615
+ * 1. Keep this as a `static` helper in array.c — it is only called by
1616
+ * ary_mask and ary_mask_bang, so no header declaration is needed.
1617
+ * 2. Rename to ary_mask_kwargs or similar to follow array.c conventions
1618
+ * (static helpers in array.c use the `ary_` prefix, not `rb_ary_`.
1619
+ */
1620
+ static void
1621
+ parse_mask_kwargs(int argc, VALUE *argv, VALUE *bitmap_out,
1622
+ int *lsb_first_out, int *invert_out, int *is_integer_out)
1623
+ {
1624
+ VALUE bitmap, opts;
1625
+ rb_scan_args(argc, argv, "1:", &bitmap, &opts);
1626
+ validate_option_hash(opts, SB_KW_LSB_FIRST | SB_KW_INVERT);
1627
+
1628
+ int is_integer = rb_integer_type_p(bitmap);
1629
+
1630
+ if (!is_integer) Check_Type(bitmap, T_STRING);
1631
+
1632
+ int lsb_first = parse_lsb_first_opt(opts);
1633
+ int invert = 0; /* default false */
1634
+
1635
+ if (!lsb_first && is_integer) {
1636
+ rb_raise(rb_eArgError,
1637
+ "lsb_first: false is not supported for Integer bitmap; "
1638
+ "Integer bits are always LSB-first");
1639
+ }
1640
+
1641
+ if (!NIL_P(opts)) {
1642
+ VALUE inv = rb_hash_aref(opts, sym_invert);
1643
+ if (!NIL_P(inv)) {
1644
+ invert = RTEST(inv) ? 1 : 0;
1645
+ }
1646
+ }
1647
+
1648
+ *bitmap_out = bitmap;
1649
+ *lsb_first_out = lsb_first;
1650
+ *invert_out = invert;
1651
+ *is_integer_out = is_integer;
1652
+ }
1653
+
1654
+ /* Read bit i from an Integer bitmap (always LSB-first).
1655
+ * Bits beyond the integer's width are 0 (valid for non-negative integers). */
1656
+ static inline int
1657
+ integer_get_bit(VALUE n, ssize_t i)
1658
+ {
1659
+ if (FIXNUM_P(n)) {
1660
+ ssize_t v = NUM2SSIZET(n);
1661
+ if (v < 0)
1662
+ rb_raise(rb_eArgError, "Integer bitmap must be non-negative");
1663
+ if (i >= (ssize_t)(sizeof(ssize_t) * CHAR_BIT) - 1) return 0;
1664
+ return (int)((v >> i) & 1);
1665
+ }
1666
+ if (RBIGNUM_NEGATIVE_P(n))
1667
+ rb_raise(rb_eArgError, "Integer bitmap must be non-negative");
1668
+ VALUE bit = rb_funcall(n, id_bracket, 1, SSIZET2NUM(i));
1669
+ return RB_TEST(bit) ? 1 : 0;
1670
+ }
1671
+
1672
+ static VALUE
1673
+ rb_ary_mask(int argc, VALUE *argv, VALUE self)
1674
+ {
1675
+ VALUE bitmap;
1676
+ int lsb_first, invert, is_integer;
1677
+ parse_mask_kwargs(argc, argv, &bitmap, &lsb_first, &invert, &is_integer);
1678
+
1679
+ ssize_t ary_len = RARRAY_LEN(self);
1680
+ const VALUE *src = RARRAY_CONST_PTR(self);
1681
+ VALUE result = rb_ary_new_capa(ary_len);
1682
+
1683
+ if (is_integer) {
1684
+ for (ssize_t i = 0; i < ary_len; i++) {
1685
+ int bit = integer_get_bit(bitmap, i);
1686
+ if (invert) bit = !bit;
1687
+ rb_ary_push(result, bit ? src[i] : Qnil);
1688
+ }
1689
+ }
1690
+ else {
1691
+ const unsigned char *bmp = (const unsigned char *)RSTRING_PTR(bitmap);
1692
+ ssize_t bmp_len = RSTRING_LEN(bitmap);
1693
+ ssize_t needed = (ary_len + 7) >> 3;
1694
+ if (needed > bmp_len)
1695
+ rb_raise(rb_eArgError,
1696
+ "bitmap too short: need %ld bytes for %ld elements, got %ld",
1697
+ needed, ary_len, bmp_len);
1698
+
1699
+ if (!lsb_first) {
1700
+ for (ssize_t i = 0; i < ary_len; i++) {
1701
+ int bit = (bmp[i >> 3] >> (7 - (i & 7))) & 1;
1702
+ if (invert) bit = !bit;
1703
+ rb_ary_push(result, bit ? src[i] : Qnil);
1704
+ }
1705
+ }
1706
+ else {
1707
+ for (ssize_t i = 0; i < ary_len; i++) {
1708
+ int bit = (bmp[i >> 3] >> (i & 7)) & 1;
1709
+ if (invert) bit = !bit;
1710
+ rb_ary_push(result, bit ? src[i] : Qnil);
1711
+ }
1712
+ }
1713
+ }
1714
+
1715
+ return result;
1716
+ }
1717
+
1718
+ static VALUE
1719
+ rb_ary_mask_bang(int argc, VALUE *argv, VALUE self)
1720
+ {
1721
+ VALUE bitmap;
1722
+ int lsb_first, invert, is_integer;
1723
+ parse_mask_kwargs(argc, argv, &bitmap, &lsb_first, &invert, &is_integer);
1724
+
1725
+ ssize_t ary_len = RARRAY_LEN(self);
1726
+ rb_ary_modify(self);
1727
+
1728
+ if (is_integer) {
1729
+ for (ssize_t i = 0; i < ary_len; i++) {
1730
+ int bit = integer_get_bit(bitmap, i);
1731
+ if (invert) bit = !bit;
1732
+ if (!bit) rb_ary_store(self, i, Qnil);
1733
+ }
1734
+ }
1735
+ else {
1736
+ const unsigned char *bmp = (const unsigned char *)RSTRING_PTR(bitmap);
1737
+ ssize_t bmp_len = RSTRING_LEN(bitmap);
1738
+ ssize_t needed = (ary_len + 7) >> 3;
1739
+ if (needed > bmp_len)
1740
+ rb_raise(rb_eArgError,
1741
+ "bitmap too short: need %ld bytes for %ld elements, got %ld",
1742
+ needed, ary_len, bmp_len);
1743
+
1744
+ if (!lsb_first) {
1745
+ for (ssize_t i = 0; i < ary_len; i++) {
1746
+ int bit = (bmp[i >> 3] >> (7 - (i & 7))) & 1;
1747
+ if (invert) bit = !bit;
1748
+ if (!bit) rb_ary_store(self, i, Qnil);
1749
+ }
1750
+ }
1751
+ else {
1752
+ for (ssize_t i = 0; i < ary_len; i++) {
1753
+ int bit = (bmp[i >> 3] >> (i & 7)) & 1;
1754
+ if (invert) bit = !bit;
1755
+ if (!bit) rb_ary_store(self, i, Qnil);
1756
+ }
1757
+ }
1758
+ }
1759
+
1760
+ return self;
1761
+ }
1762
+
1763
+ /* Init -------------------------------------------------------------------- */
1764
+
1765
+ void
1766
+ Init_string_bits(void)
1767
+ {
1768
+ id_bracket = rb_intern("[]");
1769
+ sym_lsb_first = ID2SYM(rb_intern("lsb_first"));
1770
+ sym_lsb = ID2SYM(rb_intern("lsb"));
1771
+ sym_msb = ID2SYM(rb_intern("msb"));
1772
+ sym_invert = ID2SYM(rb_intern("invert"));
1773
+
1774
+ rb_define_method(rb_cString, "bit_at", rb_str_bit_at, -1);
1775
+ rb_define_method(rb_cString, "bit_count", rb_str_bit_count, 0);
1776
+ rb_define_method(rb_cString, "each_bit", rb_str_each_bit, -1);
1777
+ rb_define_method(rb_cString, "bits", rb_str_bits, -1);
1778
+ rb_define_method(rb_cString, "each_set_bit_offset", rb_str_each_set_bit_offset, -1);
1779
+ rb_define_method(rb_cString, "set_bit_offsets", rb_str_set_bit_offsets, -1);
1780
+ rb_define_method(rb_cString, "bit_slice", rb_str_bit_slice, -1);
1781
+ rb_define_method(rb_cString, "bit_splice", rb_str_bit_splice, -1);
1782
+ rb_define_method(rb_cString, "bit_run_count", rb_str_bit_run_count, -1);
1783
+ rb_define_method(rb_cString, "each_bit_run", rb_str_each_bit_run, -1);
1784
+ rb_define_method(rb_cString, "bit_runs", rb_str_bit_runs, -1);
1785
+ rb_define_method(rb_cString, "set_bit", rb_str_set_bit, -1);
1786
+ rb_define_method(rb_cString, "clear_bit", rb_str_clear_bit, -1);
1787
+ rb_define_method(rb_cString, "flip_bit", rb_str_flip_bit, -1);
1788
+ rb_define_method(rb_cString, "bit_not", rb_str_bit_not, 0);
1789
+ rb_define_method(rb_cString, "bit_not!", rb_str_bit_not_bang, 0);
1790
+ rb_define_method(rb_cString, "bit_and", rb_str_bit_and, 1);
1791
+ rb_define_method(rb_cString, "bit_and!", rb_str_bit_and_bang, 1);
1792
+ rb_define_method(rb_cString, "bit_or", rb_str_bit_or, 1);
1793
+ rb_define_method(rb_cString, "bit_or!", rb_str_bit_or_bang, 1);
1794
+ rb_define_method(rb_cString, "bit_xor", rb_str_bit_xor, 1);
1795
+ rb_define_method(rb_cString, "bit_xor!", rb_str_bit_xor_bang, 1);
1796
+
1797
+ // These methods are defined here to avoid cluttering this file, but they are not part of the current core proposal (see FUTURE_PROPOSAL_PLAN.md).
1798
+ rb_define_method(rb_cString, "each_bit_field", rb_str_each_bit_field, -1);
1799
+ rb_define_method(rb_cString, "bit_fields", rb_str_bit_fields, -1);
1800
+ rb_define_method(rb_cArray, "mask", rb_ary_mask, -1);
1801
+ rb_define_method(rb_cArray, "mask!", rb_ary_mask_bang, -1);
1802
+ }