string_bits 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (4) hide show
  1. checksums.yaml +4 -4
  2. data/README.md +2 -2
  3. data/ext/string_bits/string_bits.c +756 -576
  4. metadata +1 -1
@@ -2,10 +2,40 @@
2
2
  #include "ruby/encoding.h"
3
3
 
4
4
  #include <limits.h> /* CHAR_BIT */
5
- #include <stdint.h> /* uint64_t, UINT64_MAX */
5
+ #include <stdint.h> /* uint64_t, UINT64_MAX, int64_t, intptr_t */
6
+ #include <inttypes.h> /* PRIdPTR (ssize_t via intptr_t), PRId64 */
6
7
  #include <string.h> /* memcpy */
7
8
  #include <sys/types.h> /* ssize_t (Ruby typedefs it on Windows) */
8
9
 
10
+ /* Whole-string bit length, computed in 64 bits.
11
+ *
12
+ * RSTRING_LEN returns a pointer-width signed length, so `RSTRING_LEN(s) * 8`
13
+ * overflows a signed 32-bit ssize_t once a string reaches 2**28 bytes (256 MiB)
14
+ * on an ILP32 build, corrupting every bounds check that compares a bit offset
15
+ * against it. Valid bit indices are confined to the Fixnum range and always fit
16
+ * ssize_t, so only this whole-string bit length needs the wider type: computing
17
+ * it in int64_t keeps the bounds checks correct on 32-bit without changing the
18
+ * public pointer-width bit-index contract (see Discussion.md, "Error behavior
19
+ * for out-of-range bit indices").
20
+ *
21
+ * Porting to Ruby Core:
22
+ * 1. Core String lengths are `long` (RSTRING_LEN), which is pointer-width,
23
+ * so `RSTRING_LEN(str) * 8` overflows on ILP32 for strings >= 256 MiB
24
+ * exactly as it does for ssize_t here. Keep the whole-string bit length
25
+ * in a 64-bit intermediate at every bounds check; do not hold it in a
26
+ * `long`. Reuse this macro (or an equivalent inline) rather than open-
27
+ * coding `len * 8`.
28
+ * 2. Keep the public bit-index type pointer-width and keep rejecting
29
+ * out-of-range positions with ArgumentError (see the cross-reference
30
+ * above). Only this internal length is widened, so the contract that
31
+ * core inherits is unchanged.
32
+ * 3. The error-message format specifiers below (<inttypes.h>: (intptr_t)
33
+ * with PRIdPTR for bit offsets, PRId64 for this widened length) exist
34
+ * only because this length is wider than the offsets. In core, follow
35
+ * the local convention for formatting `long` offsets and pick a 64-bit
36
+ * specifier for the widened length accordingly. */
37
+ #define SB_BIT_LEN(byte_len) ((int64_t)(byte_len) * 8)
38
+
9
39
  /* popcount ----------------------------------------------------------------- */
10
40
  /*
11
41
  * Porting to Ruby Core:
@@ -70,7 +100,7 @@ sb_popcount64(uint64_t x)
70
100
  /* ctz / clz helpers for set-bit iteration ---------------------------------- */
71
101
 
72
102
  static ID id_bracket;
73
- static VALUE sym_lsb_first, sym_lsb, sym_msb, sym_invert;
103
+ static VALUE sym_lsb_first, sym_invert;
74
104
 
75
105
  enum sb_kw_flag {
76
106
  SB_KW_INVERT = 1 << 0,
@@ -190,27 +220,10 @@ integer_to_bit_idx(VALUE n)
190
220
  UNREACHABLE_RETURN(0);
191
221
  }
192
222
 
193
- static ssize_t
194
- check_bit_index(VALUE self, VALUE n, int lsb_first)
195
- {
196
- if (!rb_integer_type_p(n)) {
197
- rb_raise(rb_eTypeError, "bit index must be an integer");
198
- }
199
- ssize_t idx = integer_to_bit_idx(n);
200
- ssize_t size = RSTRING_LEN(self) * 8;
201
- if (idx < 0 || idx >= size) {
202
- rb_raise(rb_eIndexError, "bit index out of range");
203
- }
204
- if (!lsb_first) idx = (idx & ~7L) | (7 - (idx & 7L));
205
- return idx;
206
- }
207
-
208
- static inline ssize_t
209
- physical_to_count_from(ssize_t physical, int lsb_first)
210
- {
211
- return lsb_first ? physical : ((physical & ~7L) | (7 - (physical & 7L)));
212
- }
213
-
223
+ /* Bit numbering between byte-with-LSB-as-bit-0 and byte-with-MSB-as-bit-0
224
+ * is an involution: swapping in either direction uses the same formula
225
+ * `(x & ~7) | (7 - (x & 7))`. logical_to_physical is therefore symmetric and
226
+ * is reused on the return path (physical -> logical) as well. */
214
227
  static inline ssize_t
215
228
  logical_to_physical(ssize_t logical, int lsb_first)
216
229
  {
@@ -237,6 +250,20 @@ logical_write_bit(unsigned char *ptr, ssize_t logical_index, int lsb_first, int
237
250
  physical_write_bit(ptr, logical_to_physical(logical_index, lsb_first), bit);
238
251
  }
239
252
 
253
+ static ssize_t
254
+ check_bit_index(VALUE self, VALUE n, int lsb_first)
255
+ {
256
+ if (!rb_integer_type_p(n)) {
257
+ rb_raise(rb_eTypeError, "bit index must be an integer");
258
+ }
259
+ ssize_t idx = integer_to_bit_idx(n);
260
+ int64_t size = SB_BIT_LEN(RSTRING_LEN(self));
261
+ if (idx < 0 || idx >= size) {
262
+ rb_raise(rb_eIndexError, "bit index out of range");
263
+ }
264
+ return logical_to_physical(idx, lsb_first);
265
+ }
266
+
240
267
  /* ssize_t-interface wrapper around rb_range_beg_len.
241
268
  *
242
269
  * rb_range_beg_len() takes (long *begp, long *lenp, long len), but this
@@ -266,11 +293,49 @@ sb_range_beg_len_call(VALUE arg)
266
293
  return rb_range_beg_len(a->range, a->lbegp, a->llenp, a->len, a->err);
267
294
  }
268
295
 
296
+ /* Validate Range endpoints for bit position arguments.
297
+ * Raises ArgumentError for:
298
+ * - any explicit (non-nil) Bignum endpoint: cannot address any real string,
299
+ * consistent with integer_to_bit_idx behavior for scalar indices.
300
+ * - any explicit (non-nil) negative endpoint: count-from-end semantics
301
+ * interact confusingly with lsb_first: true/false.
302
+ * RBIGNUM_NEGATIVE_P is used for the negativity check on Bignums to avoid
303
+ * calling NUM2LL on values that do not fit in long long.
304
+ *
305
+ * Porting to Ruby Core:
306
+ * Replace rb_range_values() with direct struct access:
307
+ * #include "internal/range.h"
308
+ * beg = RANGE_BEG(range);
309
+ * end = RANGE_END(range);
310
+ * excl = RANGE_EXCL(range);
311
+ */
312
+ static void
313
+ sb_range_validate_endpoints(VALUE range)
314
+ {
315
+ VALUE beg, end;
316
+ int excl;
317
+ rb_range_values(range, &beg, &end, &excl);
318
+ if (!NIL_P(beg) && rb_integer_type_p(beg)) {
319
+ if (!FIXNUM_P(beg))
320
+ rb_raise(rb_eArgError, "bit index out of representable range");
321
+ if (FIX2LONG(beg) < 0)
322
+ rb_raise(rb_eIndexError,
323
+ "negative Range endpoint is not allowed for bit positions");
324
+ }
325
+ if (!NIL_P(end) && rb_integer_type_p(end)) {
326
+ if (!FIXNUM_P(end))
327
+ rb_raise(rb_eArgError, "bit index out of representable range");
328
+ if (FIX2LONG(end) < 0)
329
+ rb_raise(rb_eIndexError,
330
+ "negative Range endpoint is not allowed for bit positions");
331
+ }
332
+ }
333
+
269
334
  static inline VALUE
270
- sb_range_beg_len(VALUE range, ssize_t *begp, ssize_t *lenp, ssize_t len, int err)
335
+ sb_range_beg_len(VALUE range, ssize_t *begp, ssize_t *lenp, int64_t len, int err)
271
336
  {
272
337
  long lbeg = 0, llen = 0;
273
- long clipped = (len > (ssize_t)LONG_MAX) ? LONG_MAX : (long)len;
338
+ long clipped = (len > (int64_t)LONG_MAX) ? LONG_MAX : (long)len;
274
339
  struct sb_range_args args = { range, &lbeg, &llen, clipped, err };
275
340
  int state = 0;
276
341
  VALUE result = rb_protect(sb_range_beg_len_call, (VALUE)&args, &state);
@@ -325,53 +390,42 @@ parse_lsb_first_opt(VALUE opts)
325
390
  return parse_bool_opt(opts, sym_lsb_first, "lsb_first", 1);
326
391
  }
327
392
 
328
- static int
329
- parse_lsb_first(int argc, VALUE *argv)
393
+ /* Parse an optional start_offset positional argument (Qnil => 0).
394
+ * Raises ArgumentError for Bignum, IndexError for negative Fixnum. */
395
+ static ssize_t
396
+ parse_start_offset(VALUE v)
330
397
  {
331
- VALUE opts = Qnil;
332
- rb_scan_args(argc, argv, "0:", &opts);
333
- validate_option_hash(opts, SB_KW_LSB_FIRST);
334
- return parse_lsb_first_opt(opts);
398
+ if (NIL_P(v)) return 0;
399
+ ssize_t start_offset = integer_to_bit_idx(v); /* raises ArgumentError for Bignum */
400
+ if (start_offset < 0)
401
+ rb_raise(rb_eIndexError, "bit_offset must be non-negative");
402
+ return start_offset;
335
403
  }
336
404
 
337
405
  /* read -------------------------------------------------------------------- */
338
406
 
339
- /* String#bit_at(n, lsb_first: true) -> true or false
340
- *
341
- * bit_at uses flat/Arrow convention: byte_index = n/8 from start, bit = n%8 from LSB
342
- * e.g. "\xAA\xCC": bit 0..7 live in byte[0]=0xAA, bit 8..15 live in byte[1]=0xCC
343
- *
344
- * str = "\xFF\xAA" # 11111111 10101010
345
- * str.bit_at(0) # => true (1st bit is set)
346
- * str.bit_at(7) # => true (8th bit is set)
347
- * str.bit_at(8) # => false (9th bit is clear)
348
- * str.bit_at(9) # => true (10th bit is set)
349
- * str.bit_at(16) # => nil
350
- */
407
+ /* Return true/false/nil for the bit at flat position n. */
351
408
  static VALUE
352
409
  rb_str_bit_at(int argc, VALUE *argv, VALUE self)
353
410
  {
354
- VALUE n, opts;
355
- rb_scan_args(argc, argv, "1:", &n, &opts);
411
+ VALUE bit_offset_v, opts;
412
+ rb_scan_args(argc, argv, "1:", &bit_offset_v, &opts);
356
413
  validate_option_hash(opts, SB_KW_LSB_FIRST);
357
414
 
358
- if (!rb_integer_type_p(n)) {
415
+ if (!rb_integer_type_p(bit_offset_v)) {
359
416
  rb_raise(rb_eTypeError, "bit index must be an integer");
360
417
  }
361
- ssize_t idx = integer_to_bit_idx(n);
362
- if (idx < 0) {
363
- rb_raise(rb_eArgError, "bit index must be non-negative");
418
+ ssize_t bit_offset = integer_to_bit_idx(bit_offset_v);
419
+ if (bit_offset < 0) {
420
+ rb_raise(rb_eIndexError, "bit index out of range");
364
421
  }
365
- ssize_t size = RSTRING_LEN(self) * 8;
366
- if (size <= idx) {
422
+ int64_t size = SB_BIT_LEN(RSTRING_LEN(self));
423
+ if (size <= bit_offset) {
367
424
  return Qnil;
368
425
  }
369
426
 
370
427
  int lsb_first = parse_lsb_first_opt(opts);
371
-
372
- if (!lsb_first) {
373
- idx = (idx & ~7L) | (7 - (idx & 7L));
374
- }
428
+ ssize_t idx = logical_to_physical(bit_offset, lsb_first);
375
429
 
376
430
  if (test_bit(RSTRING_PTR(self), idx)) {
377
431
  return Qtrue;
@@ -380,19 +434,21 @@ rb_str_bit_at(int argc, VALUE *argv, VALUE self)
380
434
  }
381
435
  }
382
436
 
383
- static VALUE
384
- rb_str_bit_count(VALUE self)
437
+ /* count_set_bits: popcount over a raw byte buffer.
438
+ *
439
+ * Uses a 32-byte (4 x uint64_t) unrolled inner loop, falls back to 8-byte
440
+ * steps, and finally collects the partial trailing bytes into a single
441
+ * uint64_t for one more popcount. memcpy avoids unaligned-load issues on
442
+ * strict-alignment platforms (SPARC, MIPS); modern compilers fold the 8-byte
443
+ * memcpy into a single load on platforms that allow unaligned access. */
444
+ static ssize_t
445
+ count_set_bits(const unsigned char *str, ssize_t len)
385
446
  {
386
447
  ssize_t count = 0;
387
- ssize_t len = RSTRING_LEN(self);
388
- const char *str = RSTRING_PTR(self);
389
448
  ssize_t off = 0;
390
449
  ssize_t unrolled_end = len & ~31L;
391
450
  ssize_t aligned_end = len & ~7L;
392
451
 
393
- /* Use memcpy to avoid unaligned loads (SIGBUS on SPARC, MIPS, etc.)
394
- * and strict-aliasing violations. Modern compilers fold 8-byte memcpy
395
- * into a single load on platforms that allow unaligned access. */
396
452
  for (; off < unrolled_end; off += 32) {
397
453
  uint64_t w0, w1, w2, w3;
398
454
  memcpy(&w0, str + off, 8);
@@ -414,208 +470,374 @@ rb_str_bit_count(VALUE self)
414
470
  ssize_t remainder = len - aligned_end;
415
471
  if (remainder > 0) {
416
472
  uint64_t last = 0;
417
- const unsigned char *tail = (const unsigned char *)(str + aligned_end);
473
+ const unsigned char *tail = str + aligned_end;
418
474
  for (ssize_t i = 0; i < remainder; i++) {
419
475
  last |= (uint64_t)tail[i] << (i * 8);
420
476
  }
421
477
  count += sb_popcount64(last);
422
478
  }
423
479
 
424
- return SSIZET2NUM(count);
480
+ return count;
425
481
  }
426
482
 
427
- /* iterate bits ------------------------------------------------------------ */
483
+ /* count_set_bits_range: popcount over [start, start+length) in LSB-first numbering.
484
+ * Handles non-byte-aligned start and length by masking partial first/last bytes. */
485
+ static ssize_t
486
+ count_set_bits_range(const unsigned char *str, ssize_t total_bytes,
487
+ ssize_t start, ssize_t length)
488
+ {
489
+ if (length <= 0) return 0;
490
+ int64_t total_bits = SB_BIT_LEN(total_bytes);
491
+ if (start >= total_bits) return 0;
492
+ if (start + length > total_bits) length = (ssize_t)(total_bits - start);
428
493
 
429
- static VALUE
430
- rb_str_each_bit(int argc, VALUE *argv, VALUE self)
494
+ ssize_t byte_start = start >> 3;
495
+ int bit_lo = (int)(start & 7);
496
+ ssize_t end_bit = start + length;
497
+ ssize_t last_byte = (end_bit - 1) >> 3;
498
+ int e_bit = (int)(end_bit & 7); /* bits to use in last byte; 0 means full byte */
499
+
500
+ if (byte_start == last_byte) {
501
+ unsigned int b = (unsigned int)str[byte_start] >> bit_lo;
502
+ b &= (1u << (unsigned)length) - 1u;
503
+ return (ssize_t)sb_popcount64(b);
504
+ }
505
+
506
+ ssize_t count = 0;
507
+ if (bit_lo != 0) {
508
+ count += sb_popcount64((unsigned int)str[byte_start] >> bit_lo);
509
+ byte_start++;
510
+ }
511
+ ssize_t full_last = (e_bit == 0) ? last_byte + 1 : last_byte;
512
+ count += count_set_bits(str + byte_start, full_last - byte_start);
513
+ if (e_bit != 0) {
514
+ unsigned int b = (unsigned int)str[last_byte] & ((1u << (unsigned)e_bit) - 1u);
515
+ count += sb_popcount64(b);
516
+ }
517
+ return count;
518
+ }
519
+
520
+ /* count_set_bits_range_msb: same as count_set_bits_range but for MSB-first numbering.
521
+ * In MSB-first, position 0 within a byte is physical bit 7 (the MSB). */
522
+ static ssize_t
523
+ count_set_bits_range_msb(const unsigned char *str, ssize_t total_bytes,
524
+ ssize_t start, ssize_t length)
431
525
  {
432
- RETURN_ENUMERATOR(self, argc, argv);
526
+ if (length <= 0) return 0;
527
+ int64_t total_bits = SB_BIT_LEN(total_bytes);
528
+ if (start >= total_bits) return 0;
529
+ if (start + length > total_bits) length = (ssize_t)(total_bits - start);
433
530
 
434
- int lsb_first = parse_lsb_first(argc, argv);
435
- ssize_t len = RSTRING_LEN(self);
436
- const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
531
+ ssize_t byte_start = start >> 3;
532
+ int s_bit = (int)(start & 7); /* MSB-first within-byte start index */
533
+ ssize_t end_bit = start + length;
534
+ ssize_t last_byte = (end_bit - 1) >> 3;
535
+ int e_bit = (int)(end_bit & 7); /* bits to use in last byte; 0 means full byte */
437
536
 
438
- for (ssize_t i = 0; i < len; i++) {
439
- unsigned char b = str[i];
440
- if (lsb_first) {
441
- for (int j = 0; j < 8; j++) {
442
- rb_yield((b >> j) & 1 ? Qtrue : Qfalse);
443
- }
444
- } else {
445
- for (int j = 7; j >= 0; j--) {
446
- rb_yield((b >> j) & 1 ? Qtrue : Qfalse);
447
- }
448
- }
537
+ if (byte_start == last_byte) {
538
+ /* physical bits (7-s_bit) down to (7-s_bit-length+1) */
539
+ unsigned int b = (unsigned int)str[byte_start] >> (unsigned)(8 - s_bit - (int)length);
540
+ b &= (1u << (unsigned)length) - 1u;
541
+ return (ssize_t)sb_popcount64(b);
449
542
  }
450
543
 
451
- return self;
544
+ ssize_t count = 0;
545
+ /* partial first byte: MSB-first positions s_bit..7 = physical bits 0..(7-s_bit) */
546
+ if (s_bit != 0) {
547
+ unsigned int b = (unsigned int)str[byte_start] & ((1u << (unsigned)(8 - s_bit)) - 1u);
548
+ count += sb_popcount64(b);
549
+ byte_start++;
550
+ }
551
+ ssize_t full_last = (e_bit == 0) ? last_byte + 1 : last_byte;
552
+ count += count_set_bits(str + byte_start, full_last - byte_start);
553
+ /* partial last byte: MSB-first positions 0..(e_bit-1) = physical bits (8-e_bit)..7 */
554
+ if (e_bit != 0) {
555
+ unsigned int b = (unsigned int)str[last_byte] >> (unsigned)(8 - e_bit);
556
+ count += sb_popcount64(b);
557
+ }
558
+ return count;
452
559
  }
453
560
 
454
561
  static VALUE
455
- rb_str_bits(int argc, VALUE *argv, VALUE self)
562
+ rb_str_bit_count(int argc, VALUE *argv, VALUE self)
456
563
  {
457
- int lsb_first = parse_lsb_first(argc, argv);
458
- ssize_t len = RSTRING_LEN(self);
459
564
  const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
460
- ssize_t total_bits = len * 8;
461
- int have_block = rb_block_given_p();
565
+ ssize_t src_len = RSTRING_LEN(self);
462
566
 
463
- VALUE ary = have_block ? Qnil : rb_ary_new_capa(total_bits);
567
+ VALUE v0 = Qnil, v1 = Qnil, opts = Qnil;
568
+ rb_scan_args(argc, argv, "02:", &v0, &v1, &opts);
569
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
464
570
 
465
- for (ssize_t i = 0; i < len; i++) {
466
- unsigned char b = str[i];
467
- if (lsb_first) {
468
- for (int j = 0; j < 8; j++) {
469
- VALUE bit = (b >> j) & 1 ? Qtrue : Qfalse;
470
- have_block ? rb_yield(bit) : rb_ary_push(ary, bit);
471
- }
472
- } else {
473
- for (int j = 7; j >= 0; j--) {
474
- VALUE bit = (b >> j) & 1 ? Qtrue : Qfalse;
475
- have_block ? rb_yield(bit) : rb_ary_push(ary, bit);
476
- }
477
- }
571
+ /* No positional args: count the whole string; lsb_first: is ignored (order-independent) */
572
+ if (NIL_P(v0))
573
+ return SSIZET2NUM(count_set_bits(str, src_len));
574
+
575
+ int lsb_first = parse_lsb_first_opt(opts);
576
+ int64_t total_bits = SB_BIT_LEN(src_len);
577
+ ssize_t bit_offset, bit_length;
578
+
579
+ if (rb_obj_is_kind_of(v0, rb_cRange)) {
580
+ if (!NIL_P(v1))
581
+ rb_raise(rb_eArgError, "wrong number of arguments");
582
+ sb_range_validate_endpoints(v0);
583
+ ssize_t beg, len;
584
+ if (!RTEST(sb_range_beg_len(v0, &beg, &len, total_bits, 0)))
585
+ return INT2FIX(0);
586
+ bit_offset = beg;
587
+ bit_length = len;
588
+ }
589
+ else if (!NIL_P(v1)) {
590
+ if (!rb_integer_type_p(v0))
591
+ rb_raise(rb_eTypeError, "bit_offset must be an integer");
592
+ if (!rb_integer_type_p(v1))
593
+ rb_raise(rb_eTypeError, "bit_length must be an integer");
594
+ bit_offset = integer_to_bit_idx(v0);
595
+ if (bit_offset < 0)
596
+ rb_raise(rb_eIndexError, "bit_offset must be non-negative");
597
+ bit_length = integer_to_bit_idx(v1);
598
+ if (bit_length < 0)
599
+ rb_raise(rb_eArgError, "bit_length must be non-negative");
600
+ }
601
+ else {
602
+ rb_raise(rb_eArgError,
603
+ "wrong number of arguments (given 1, expected 0, 1 Range, or 2)");
478
604
  }
479
605
 
480
- return have_block ? self : ary;
606
+ if (lsb_first)
607
+ return SSIZET2NUM(count_set_bits_range(str, src_len, bit_offset, bit_length));
608
+ else
609
+ return SSIZET2NUM(count_set_bits_range_msb(str, src_len, bit_offset, bit_length));
481
610
  }
482
611
 
483
- /* iterate set-bit positions ----------------------------------------------- */
612
+ /* iterate bits ------------------------------------------------------------ */
484
613
 
485
- static VALUE
486
- rb_str_each_set_bit_offset(int argc, VALUE *argv, VALUE self)
614
+ /* Unified emitter for each_bit / bits.
615
+ *
616
+ * Yields (when ary == Qnil) or pushes to a pre-allocated Array. lsb_first is
617
+ * hoisted outside the byte loop so the inner walk direction is straight-line
618
+ * code, removing a per-byte branch.
619
+ */
620
+ static void
621
+ emit_bits(const unsigned char *str, ssize_t len, int lsb_first, ssize_t start_offset, VALUE ary)
487
622
  {
488
- RETURN_ENUMERATOR(self, argc, argv);
623
+ if (start_offset >= SB_BIT_LEN(len)) return;
624
+
625
+ #define SB_EMIT(v) \
626
+ do { VALUE _b = (v); \
627
+ if (ary == Qnil) rb_yield(_b); else rb_ary_push(ary, _b); } while (0)
628
+
629
+ ssize_t byte_start = start_offset >> 3;
630
+ int bit_start = (int)(start_offset & 7);
489
631
 
490
- int lsb_first = parse_lsb_first(argc, argv);
491
- ssize_t len = RSTRING_LEN(self);
492
- const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
493
632
  if (lsb_first) {
494
- /* LSB-first: ascending positions 0, 1, 2, ...
495
- * On little-endian, loading 8 bytes as a uint64_t preserves the flat
496
- * LSB-first bit numbering: word bit 0 == position 0, bit 63 == 63.
497
- * memcpy avoids unaligned-load SIGBUS on strict-alignment platforms. */
498
- #if SB_LITTLE_ENDIAN
499
- ssize_t n_words = len >> 3;
500
- for (ssize_t wi = 0; wi < n_words; wi++) {
501
- uint64_t w;
502
- memcpy(&w, str + wi * 8, 8);
503
- while (w != 0) {
504
- int bit = sb_ctzll(w);
505
- rb_yield(SSIZET2NUM(wi * 64 + bit));
506
- w &= w - 1;
633
+ for (ssize_t i = byte_start; i < len; i++) {
634
+ unsigned char b = str[i];
635
+ int j_start = (i == byte_start) ? bit_start : 0;
636
+ for (int j = j_start; j < 8; j++) {
637
+ SB_EMIT((b >> j) & 1 ? Qtrue : Qfalse);
507
638
  }
508
639
  }
509
- for (ssize_t bi = n_words << 3; bi < len; bi++) {
510
- unsigned int b = str[bi];
511
- while (b != 0) {
512
- int bit = sb_ctz8(b);
513
- rb_yield(SSIZET2NUM(bi * 8 + bit));
514
- b &= b - 1;
515
- }
516
- }
517
- #else
518
- for (ssize_t bi = 0; bi < len; bi++) {
519
- unsigned int b = str[bi];
520
- while (b != 0) {
521
- int bit = sb_ctz8(b);
522
- rb_yield(SSIZET2NUM(bi * 8 + bit));
523
- b &= b - 1;
524
- }
525
- }
526
- #endif
527
- }
528
- else {
529
- /* lsb_first: false => byte order preserved, bits 7..0 map to logical 0..7 */
530
- for (ssize_t bi = 0; bi < len; bi++) {
531
- unsigned int b = str[bi];
532
- while (b != 0) {
533
- int bit = sb_highest_bit8(b);
534
- ssize_t physical = bi * 8 + bit;
535
- rb_yield(SSIZET2NUM(physical_to_count_from(physical, 0)));
536
- b ^= (1u << bit); /* clear highest set bit */
640
+ } else {
641
+ for (ssize_t i = byte_start; i < len; i++) {
642
+ unsigned char b = str[i];
643
+ int j_end = (i == byte_start) ? (7 - bit_start) : 7;
644
+ for (int j = j_end; j >= 0; j--) {
645
+ SB_EMIT((b >> j) & 1 ? Qtrue : Qfalse);
537
646
  }
538
647
  }
539
648
  }
540
649
 
650
+ #undef SB_EMIT
651
+ }
652
+
653
+ static VALUE
654
+ rb_str_each_bit(int argc, VALUE *argv, VALUE self)
655
+ {
656
+ RETURN_ENUMERATOR(self, argc, argv);
657
+
658
+ VALUE start_offset_v = Qnil, opts = Qnil;
659
+ rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
660
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
661
+ int lsb_first = parse_lsb_first_opt(opts);
662
+ ssize_t start_offset = parse_start_offset(start_offset_v);
663
+
664
+ emit_bits((const unsigned char *)RSTRING_PTR(self), RSTRING_LEN(self),
665
+ lsb_first, start_offset, Qnil);
541
666
  return self;
542
667
  }
543
668
 
544
669
  static VALUE
545
- rb_str_set_bit_offsets(int argc, VALUE *argv, VALUE self)
670
+ rb_str_bits(int argc, VALUE *argv, VALUE self)
546
671
  {
547
- int lsb_first = parse_lsb_first(argc, argv);
672
+ VALUE start_offset_v = Qnil, opts = Qnil;
673
+ rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
674
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
675
+ int lsb_first = parse_lsb_first_opt(opts);
676
+ ssize_t start_offset = parse_start_offset(start_offset_v);
548
677
  ssize_t len = RSTRING_LEN(self);
549
678
  const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
550
- int have_block = rb_block_given_p();
551
679
 
552
- VALUE ary;
553
- if (have_block) {
554
- ary = Qnil;
555
- }
556
- else {
557
- /* Pre-size the Array with popcount to avoid repeated reallocation.
558
- * memcpy avoids unaligned-load issues on strict-alignment platforms. */
559
- ssize_t count = 0;
560
- ssize_t nw = len >> 3;
561
- for (ssize_t wi = 0; wi < nw; wi++) {
562
- uint64_t w;
563
- memcpy(&w, str + wi * 8, 8);
564
- count += sb_popcount64(w);
565
- }
566
- for (ssize_t bi = nw << 3; bi < len; bi++)
567
- count += sb_popcount64((uint64_t)(unsigned char)str[bi]);
568
- ary = rb_ary_new_capa(count);
680
+ if (rb_block_given_p()) {
681
+ emit_bits(str, len, lsb_first, start_offset, Qnil);
682
+ return self;
569
683
  }
570
684
 
685
+ int64_t total_bits = SB_BIT_LEN(len);
686
+ ssize_t nbits = (start_offset >= total_bits) ? 0 : (ssize_t)(total_bits - start_offset);
687
+ VALUE ary = rb_ary_new_capa(nbits);
688
+ emit_bits(str, len, lsb_first, start_offset, ary);
689
+ return ary;
690
+ }
691
+
692
+ /* iterate bit positions matching `bit` ------------------------------------ */
693
+
694
+ /* parse the required `bit` argument (true/false/1/0) to 0 or 1 */
695
+ static int
696
+ parse_bit_target(VALUE bit_val)
697
+ {
698
+ if (bit_val == Qtrue || bit_val == INT2FIX(1)) return 1;
699
+ if (bit_val == Qfalse || bit_val == INT2FIX(0)) return 0;
700
+ rb_raise(rb_eArgError, "bit must be 0, 1, false, or true");
701
+ UNREACHABLE_RETURN(0);
702
+ }
703
+
704
+ /* Unified scanner for each_bit_offset / bit_offsets.
705
+ *
706
+ * Emit each bit position equal to `target` either by yielding to the block
707
+ * (when ary == Qnil) or by pushing to the pre-allocated Array. Both call
708
+ * paths share the same hot loops; the only per-emit cost is one branch on
709
+ * (ary == Qnil), which the compiler can lift out of the inner while loop.
710
+ *
711
+ * LSB-first path: on little-endian, an 8-byte memcpy preserves the flat
712
+ * LSB-first bit numbering (word bit 0 == position 0), so we can scan 64 bits
713
+ * per ctzll. For target=0, invert the loaded word/byte; all 8/64 bits of the
714
+ * inverted unit are valid positions since each byte contributes exactly 8.
715
+ *
716
+ * MSB-first path: walk byte-by-byte with sb_highest_bit8, mapping each
717
+ * physical (LSB-first) bit position into the MSB-first count via
718
+ * logical_to_physical (the operation is its own inverse).
719
+ */
720
+ static void
721
+ emit_bit_offsets(const unsigned char *str, ssize_t len, int target, int lsb_first,
722
+ ssize_t start_offset, VALUE ary)
723
+ {
724
+ if (start_offset >= SB_BIT_LEN(len)) return;
725
+
726
+ #define SB_EMIT(pos_val) \
727
+ do { VALUE _p = (pos_val); \
728
+ if (ary == Qnil) rb_yield(_p); else rb_ary_push(ary, _p); } while (0)
729
+
730
+ ssize_t byte_start = start_offset >> 3;
731
+ int bit_lo = (int)(start_offset & 7);
732
+
571
733
  if (lsb_first) {
734
+ /* Handle the partial first byte before aligning to byte boundary */
735
+ if (bit_lo != 0) {
736
+ unsigned int b = str[byte_start];
737
+ if (target == 0) b = (~b) & 0xFF;
738
+ b >>= bit_lo;
739
+ while (b != 0) {
740
+ int bit = sb_ctz8(b);
741
+ SB_EMIT(SSIZET2NUM(byte_start * 8 + bit_lo + bit));
742
+ b &= b - 1;
743
+ }
744
+ byte_start++;
745
+ }
572
746
  #if SB_LITTLE_ENDIAN
573
- ssize_t n_words = len >> 3;
747
+ ssize_t n_words = (len - byte_start) >> 3;
574
748
  for (ssize_t wi = 0; wi < n_words; wi++) {
575
749
  uint64_t w;
576
- memcpy(&w, str + wi * 8, 8);
750
+ memcpy(&w, str + byte_start + wi * 8, 8);
751
+ if (target == 0) w = ~w;
577
752
  while (w != 0) {
578
753
  int bit = sb_ctzll(w);
579
- VALUE pos = SSIZET2NUM(wi * 64 + bit);
580
- have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
754
+ SB_EMIT(SSIZET2NUM((byte_start + wi * 8) * 8 + bit));
581
755
  w &= w - 1;
582
756
  }
583
757
  }
584
- for (ssize_t bi = n_words << 3; bi < len; bi++) {
758
+ for (ssize_t bi = byte_start + (n_words << 3); bi < len; bi++) {
585
759
  unsigned int b = str[bi];
760
+ if (target == 0) b = (~b) & 0xFF;
586
761
  while (b != 0) {
587
762
  int bit = sb_ctz8(b);
588
- VALUE pos = SSIZET2NUM(bi * 8 + bit);
589
- have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
763
+ SB_EMIT(SSIZET2NUM(bi * 8 + bit));
590
764
  b &= b - 1;
591
765
  }
592
766
  }
593
767
  #else
594
- for (ssize_t bi = 0; bi < len; bi++) {
768
+ for (ssize_t bi = byte_start; bi < len; bi++) {
595
769
  unsigned int b = str[bi];
770
+ if (target == 0) b = (~b) & 0xFF;
596
771
  while (b != 0) {
597
772
  int bit = sb_ctz8(b);
598
- VALUE pos = SSIZET2NUM(bi * 8 + bit);
599
- have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
773
+ SB_EMIT(SSIZET2NUM(bi * 8 + bit));
600
774
  b &= b - 1;
601
775
  }
602
776
  }
603
777
  #endif
604
778
  }
605
779
  else {
606
- for (ssize_t bi = 0; bi < len; bi++) {
780
+ /* lsb_first: false => byte order preserved, bits 7..0 map to logical 0..7.
781
+ * In the first (possibly partial) byte, skip the top bit_lo bits. */
782
+ for (ssize_t bi = byte_start; bi < len; bi++) {
607
783
  unsigned int b = str[bi];
784
+ if (target == 0) b = (~b) & 0xFF;
785
+ if (bi == byte_start && bit_lo != 0)
786
+ b &= (1u << (8 - bit_lo)) - 1; /* clear top bit_lo bits */
608
787
  while (b != 0) {
609
788
  int bit = sb_highest_bit8(b);
610
789
  ssize_t physical = bi * 8 + bit;
611
- VALUE pos = SSIZET2NUM(physical_to_count_from(physical, 0));
612
- have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
790
+ SB_EMIT(SSIZET2NUM(logical_to_physical(physical, 0)));
613
791
  b ^= (1u << bit);
614
792
  }
615
793
  }
616
794
  }
617
795
 
618
- return have_block ? self : ary;
796
+ #undef SB_EMIT
797
+ }
798
+
799
+ static VALUE
800
+ rb_str_each_bit_offset(int argc, VALUE *argv, VALUE self)
801
+ {
802
+ RETURN_ENUMERATOR(self, argc, argv);
803
+
804
+ VALUE bit_val, start_offset_v = Qnil, opts = Qnil;
805
+ rb_scan_args(argc, argv, "11:", &bit_val, &start_offset_v, &opts);
806
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
807
+ int lsb_first = parse_lsb_first_opt(opts);
808
+ int target = parse_bit_target(bit_val);
809
+ ssize_t start_offset = parse_start_offset(start_offset_v);
810
+
811
+ emit_bit_offsets((const unsigned char *)RSTRING_PTR(self), RSTRING_LEN(self),
812
+ target, lsb_first, start_offset, Qnil);
813
+ return self;
814
+ }
815
+
816
+ static VALUE
817
+ rb_str_bit_offsets(int argc, VALUE *argv, VALUE self)
818
+ {
819
+ VALUE bit_val, start_offset_v = Qnil, opts = Qnil;
820
+ rb_scan_args(argc, argv, "11:", &bit_val, &start_offset_v, &opts);
821
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
822
+ int lsb_first = parse_lsb_first_opt(opts);
823
+ int target = parse_bit_target(bit_val);
824
+ ssize_t start_offset = parse_start_offset(start_offset_v);
825
+
826
+ ssize_t len = RSTRING_LEN(self);
827
+ const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
828
+
829
+ if (rb_block_given_p()) {
830
+ emit_bit_offsets(str, len, target, lsb_first, start_offset, Qnil);
831
+ return self;
832
+ }
833
+
834
+ /* Pre-size the Array using popcount to avoid repeated reallocation.
835
+ * For target=0 the expected count is (len * 8 - popcount). */
836
+ ssize_t set_count = count_set_bits(str, len);
837
+ ssize_t count = (target == 1) ? set_count : (ssize_t)(SB_BIT_LEN(len) - set_count);
838
+ VALUE ary = rb_ary_new_capa(count);
839
+ emit_bit_offsets(str, len, target, lsb_first, start_offset, ary);
840
+ return ary;
619
841
  }
620
842
 
621
843
  /* multi-bit mutation ------------------------------------------------------ */
@@ -712,40 +934,36 @@ bit_copy_core(unsigned char *dst, ssize_t dst_bit_off,
712
934
  if (tmp != stack_tmp) ruby_xfree(tmp);
713
935
  }
714
936
 
715
- /* String#bit_slice(bit_offset, bit_length) -> String
716
- * String#bit_slice(range) -> String
717
- *
718
- * str = "\xFF\x00" # 11111111 00000000
719
- * str.bit_slice(4, 8) # => "\xF0" (11110000)
720
- */
937
+ /* Extract a sub-sequence of bits into a new String. */
721
938
  static VALUE
722
939
  rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
723
940
  {
724
941
  ssize_t src_len = RSTRING_LEN(self);
725
- ssize_t total_bits = src_len * 8;
726
- ssize_t offset, length;
942
+ int64_t total_bits = SB_BIT_LEN(src_len);
943
+ ssize_t bit_offset, bit_length;
727
944
  VALUE v0, v1, opts;
728
945
  int n_pos = rb_scan_args(argc, argv, "11:", &v0, &v1, &opts);
729
946
  validate_option_hash(opts, SB_KW_LSB_FIRST);
730
947
  int lsb_first = parse_lsb_first_opt(opts);
731
948
 
732
949
  if (n_pos == 1 && rb_obj_is_kind_of(v0, rb_cRange)) {
950
+ sb_range_validate_endpoints(v0);
733
951
  ssize_t beg, len;
734
952
  if (!RTEST(sb_range_beg_len(v0, &beg, &len, total_bits, 0))) {
735
953
  return Qnil;
736
954
  }
737
- offset = beg;
738
- length = len;
955
+ bit_offset = beg;
956
+ bit_length = len;
739
957
  }
740
958
  else if (n_pos == 2) {
741
959
  if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1)) {
742
960
  return Qnil;
743
961
  }
744
962
 
745
- offset = integer_to_bit_idx(v0);
746
- length = integer_to_bit_idx(v1);
963
+ bit_offset = integer_to_bit_idx(v0);
964
+ bit_length = integer_to_bit_idx(v1);
747
965
 
748
- if (offset < 0 || length < 0) return Qnil;
966
+ if (bit_offset < 0 || bit_length < 0) return Qnil;
749
967
  }
750
968
  else if (n_pos == 1) {
751
969
  return Qnil;
@@ -755,13 +973,13 @@ rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
755
973
  "wrong number of arguments (given %d, expected 1 or 2)", n_pos);
756
974
  }
757
975
 
758
- if (offset > total_bits) return Qnil;
759
- ssize_t available = total_bits - offset;
760
- if (length > available) length = available;
976
+ if (bit_offset > total_bits) return Qnil;
977
+ int64_t available = total_bits - bit_offset;
978
+ if (bit_length > available) bit_length = (ssize_t)available;
761
979
 
762
- if (length == 0) return rb_str_new("", 0);
980
+ if (bit_length == 0) return rb_str_new("", 0);
763
981
 
764
- ssize_t out_bytes = (length + 7) / 8;
982
+ ssize_t out_bytes = (bit_length + 7) / 8;
765
983
  VALUE result = rb_str_buf_new(out_bytes);
766
984
  rb_str_resize(result, out_bytes);
767
985
  rb_enc_associate(result, rb_enc_get(self));
@@ -771,17 +989,17 @@ rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
771
989
  memset(dst, 0, out_bytes);
772
990
 
773
991
  if (lsb_first) {
774
- bit_copy_core(dst, 0, src, src_len, offset, length);
992
+ bit_copy_core(dst, 0, src, src_len, bit_offset, bit_length);
775
993
  } else {
776
994
  ssize_t dst_bit = 0;
777
- ssize_t start_byte = offset >> 3;
778
- ssize_t end_byte = (offset + length - 1) >> 3;
995
+ ssize_t start_byte = bit_offset >> 3;
996
+ ssize_t end_byte = (bit_offset + bit_length - 1) >> 3;
779
997
 
780
998
  for (ssize_t b = start_byte; b <= end_byte; b++) {
781
999
  ssize_t b_start_l = b << 3;
782
1000
  ssize_t b_end_l = b_start_l + 7;
783
- ssize_t l_min = (offset > b_start_l) ? offset : b_start_l;
784
- ssize_t l_max = ((offset + length - 1) < b_end_l) ? (offset + length - 1) : b_end_l;
1001
+ ssize_t l_min = (bit_offset > b_start_l) ? bit_offset : b_start_l;
1002
+ ssize_t l_max = ((bit_offset + bit_length - 1) < b_end_l) ? (bit_offset + bit_length - 1) : b_end_l;
785
1003
 
786
1004
  ssize_t p_min = b_start_l + (7 - (l_max & 7L));
787
1005
  ssize_t p_max = b_start_l + (7 - (l_min & 7L));
@@ -805,8 +1023,8 @@ enum sb_mutation_op {
805
1023
  static VALUE
806
1024
  rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
807
1025
  {
808
- VALUE target, opts;
809
- rb_scan_args(argc, argv, "1:", &target, &opts);
1026
+ VALUE target, bit_length_v = Qnil, opts = Qnil;
1027
+ rb_scan_args(argc, argv, "11:", &target, &bit_length_v, &opts);
810
1028
  validate_option_hash(opts, SB_KW_LSB_FIRST);
811
1029
  int lsb_first = parse_lsb_first_opt(opts);
812
1030
 
@@ -814,18 +1032,48 @@ rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
814
1032
  unsigned char *ptr = (unsigned char *)RSTRING_PTR(self);
815
1033
 
816
1034
  if (rb_integer_type_p(target)) {
817
- ssize_t idx = check_bit_index(self, target, lsb_first);
818
- unsigned char mask = (unsigned char)(1u << (idx % 8));
819
- switch (op) {
820
- case SB_MUT_SET: ptr[idx / 8] |= mask; break;
821
- case SB_MUT_CLEAR: ptr[idx / 8] &= (unsigned char)~mask; break;
822
- case SB_MUT_FLIP: ptr[idx / 8] ^= mask; break;
1035
+ if (NIL_P(bit_length_v)) {
1036
+ /* Single-bit form: bit_set(n) */
1037
+ ssize_t idx = check_bit_index(self, target, lsb_first);
1038
+ unsigned char mask = (unsigned char)(1u << (idx % 8));
1039
+ switch (op) {
1040
+ case SB_MUT_SET: ptr[idx / 8] |= mask; break;
1041
+ case SB_MUT_CLEAR: ptr[idx / 8] &= (unsigned char)~mask; break;
1042
+ case SB_MUT_FLIP: ptr[idx / 8] ^= mask; break;
1043
+ }
1044
+ return self;
1045
+ }
1046
+ /* 2-arg form: bit_set(bit_offset, bit_length) */
1047
+ if (!rb_integer_type_p(bit_length_v))
1048
+ rb_raise(rb_eTypeError, "bit_length must be an integer");
1049
+ ssize_t bit_offset = integer_to_bit_idx(target);
1050
+ if (bit_offset < 0)
1051
+ rb_raise(rb_eIndexError, "bit_offset must be non-negative");
1052
+ ssize_t bit_length = integer_to_bit_idx(bit_length_v);
1053
+ if (bit_length < 0)
1054
+ rb_raise(rb_eArgError, "bit_length must be non-negative");
1055
+ if (bit_length == 0) return self;
1056
+ int64_t total_bits = SB_BIT_LEN(RSTRING_LEN(self));
1057
+ if (bit_offset >= total_bits || bit_offset + bit_length > total_bits)
1058
+ rb_raise(rb_eIndexError, "bit range out of range");
1059
+ for (ssize_t logical = bit_offset; logical < bit_offset + bit_length; logical++) {
1060
+ ssize_t idx = logical_to_physical(logical, lsb_first);
1061
+ unsigned char mask = (unsigned char)(1u << (idx % 8));
1062
+ switch (op) {
1063
+ case SB_MUT_SET: ptr[idx / 8] |= mask; break;
1064
+ case SB_MUT_CLEAR: ptr[idx / 8] &= (unsigned char)~mask; break;
1065
+ case SB_MUT_FLIP: ptr[idx / 8] ^= mask; break;
1066
+ }
823
1067
  }
824
1068
  return self;
825
1069
  }
826
1070
 
1071
+ if (!NIL_P(bit_length_v))
1072
+ rb_raise(rb_eArgError, "wrong number of arguments");
1073
+
827
1074
  if (rb_obj_is_kind_of(target, rb_cRange)) {
828
- ssize_t total_bits = RSTRING_LEN(self) * 8;
1075
+ sb_range_validate_endpoints(target);
1076
+ int64_t total_bits = SB_BIT_LEN(RSTRING_LEN(self));
829
1077
  ssize_t beg, len;
830
1078
 
831
1079
  /* err=0 returns Qnil for out-of-range begin (after negative normalization);
@@ -836,19 +1084,20 @@ rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
836
1084
 
837
1085
  /* err=0 silently clamps end > total. Detect that and raise instead,
838
1086
  * to stay consistent with bit_splice and single-bit mutation. */
839
- VALUE rng_end = rb_funcall(target, rb_intern("end"), 0);
840
- if (!NIL_P(rng_end)) {
841
- ssize_t end_val = integer_to_bit_idx(rng_end);
842
- if (end_val < 0) end_val += total_bits;
843
- int exclusive = RTEST(rb_funcall(target, rb_intern("exclude_end?"), 0));
844
- ssize_t end_excl = exclusive ? end_val : end_val + 1;
1087
+ VALUE rng_beg_unused, rng_end_v;
1088
+ int excl;
1089
+ rb_range_values(target, &rng_beg_unused, &rng_end_v, &excl);
1090
+ (void)rng_beg_unused;
1091
+ if (!NIL_P(rng_end_v)) {
1092
+ ssize_t end_val = integer_to_bit_idx(rng_end_v);
1093
+ ssize_t end_excl = excl ? end_val : end_val + 1;
845
1094
  if (end_excl > total_bits) {
846
1095
  rb_raise(rb_eIndexError, "bit range out of range");
847
1096
  }
848
1097
  }
849
1098
 
850
1099
  for (ssize_t logical = beg; logical < beg + len; logical++) {
851
- ssize_t idx = lsb_first ? logical : ((logical & ~7L) | (7 - (logical & 7L)));
1100
+ ssize_t idx = logical_to_physical(logical, lsb_first);
852
1101
  unsigned char mask = (unsigned char)(1u << (idx % 8));
853
1102
  switch (op) {
854
1103
  case SB_MUT_SET: ptr[idx / 8] |= mask; break;
@@ -864,19 +1113,19 @@ rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
864
1113
  }
865
1114
 
866
1115
  static VALUE
867
- rb_str_set_bit(int argc, VALUE *argv, VALUE self)
1116
+ rb_str_bit_set(int argc, VALUE *argv, VALUE self)
868
1117
  {
869
1118
  return rb_str_mutate_bits(argc, argv, self, SB_MUT_SET);
870
1119
  }
871
1120
 
872
1121
  static VALUE
873
- rb_str_clear_bit(int argc, VALUE *argv, VALUE self)
1122
+ rb_str_bit_clear(int argc, VALUE *argv, VALUE self)
874
1123
  {
875
1124
  return rb_str_mutate_bits(argc, argv, self, SB_MUT_CLEAR);
876
1125
  }
877
1126
 
878
1127
  static VALUE
879
- rb_str_flip_bit(int argc, VALUE *argv, VALUE self)
1128
+ rb_str_bit_flip(int argc, VALUE *argv, VALUE self)
880
1129
  {
881
1130
  return rb_str_mutate_bits(argc, argv, self, SB_MUT_FLIP);
882
1131
  }
@@ -902,101 +1151,141 @@ alloc_result(VALUE self)
902
1151
  return result;
903
1152
  }
904
1153
 
905
- static VALUE
906
- rb_str_bit_not(VALUE self)
907
- {
908
- ssize_t len = RSTRING_LEN(self);
909
- VALUE result = alloc_result(self);
910
- const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
911
- unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
912
- for (ssize_t i = 0; i < len; i++) dst[i] = ~src[i];
913
- return result;
914
- }
915
-
916
- static VALUE
917
- rb_str_bit_not_bang(VALUE self)
918
- {
919
- rb_str_modify(self);
920
- ssize_t len = RSTRING_LEN(self);
921
- unsigned char *ptr = (unsigned char *)RSTRING_PTR(self);
922
- for (ssize_t i = 0; i < len; i++) ptr[i] = ~ptr[i];
923
- return self;
924
- }
925
-
926
- static VALUE
927
- rb_str_bit_and(VALUE self, VALUE other)
928
- {
929
- check_binary_op_lengths(self, other);
930
- ssize_t len = RSTRING_LEN(self);
931
- VALUE result = alloc_result(self);
932
- const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
933
- const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
934
- unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
935
- for (ssize_t i = 0; i < len; i++) dst[i] = a[i] & b[i];
936
- return result;
937
- }
938
-
939
- static VALUE
940
- rb_str_bit_and_bang(VALUE self, VALUE other)
941
- {
942
- check_binary_op_lengths(self, other);
943
- rb_str_modify(self);
944
- ssize_t len = RSTRING_LEN(self);
945
- unsigned char *a = (unsigned char *)RSTRING_PTR(self);
946
- const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
947
- for (ssize_t i = 0; i < len; i++) a[i] &= b[i];
948
- return self;
949
- }
950
-
951
- static VALUE
952
- rb_str_bit_or(VALUE self, VALUE other)
953
- {
954
- check_binary_op_lengths(self, other);
955
- ssize_t len = RSTRING_LEN(self);
956
- VALUE result = alloc_result(self);
957
- const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
958
- const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
959
- unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
960
- for (ssize_t i = 0; i < len; i++) dst[i] = a[i] | b[i];
961
- return result;
962
- }
963
-
964
- static VALUE
965
- rb_str_bit_or_bang(VALUE self, VALUE other)
966
- {
967
- check_binary_op_lengths(self, other);
968
- rb_str_modify(self);
969
- ssize_t len = RSTRING_LEN(self);
970
- unsigned char *a = (unsigned char *)RSTRING_PTR(self);
971
- const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
972
- for (ssize_t i = 0; i < len; i++) a[i] |= b[i];
973
- return self;
974
- }
975
-
976
- static VALUE
977
- rb_str_bit_xor(VALUE self, VALUE other)
978
- {
979
- check_binary_op_lengths(self, other);
980
- ssize_t len = RSTRING_LEN(self);
981
- VALUE result = alloc_result(self);
982
- const unsigned char *a = (const unsigned char *)RSTRING_PTR(self);
983
- const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
984
- unsigned char *dst = (unsigned char *)RSTRING_PTR(result);
985
- for (ssize_t i = 0; i < len; i++) dst[i] = a[i] ^ b[i];
986
- return result;
987
- }
988
-
989
- static VALUE
990
- rb_str_bit_xor_bang(VALUE self, VALUE other)
991
- {
992
- check_binary_op_lengths(self, other);
993
- rb_str_modify(self);
994
- ssize_t len = RSTRING_LEN(self);
995
- unsigned char *a = (unsigned char *)RSTRING_PTR(self);
996
- const unsigned char *b = (const unsigned char *)RSTRING_PTR(other);
997
- for (ssize_t i = 0; i < len; i++) a[i] ^= b[i];
998
- return self;
999
- }
1154
+ /*
1155
+ * Bitwise op kernels: process 32 bytes (4 x uint64_t) per loop iteration via
1156
+ * memcpy + word-wise op + memcpy, then any 8-byte tail, then byte-by-byte for
1157
+ * the final < 8 bytes. memcpy avoids unaligned-load/store issues on strict-
1158
+ * alignment platforms; modern compilers fold each 8-byte memcpy into a single
1159
+ * load/store. Macro-generated to avoid 8 near-identical functions.
1160
+ *
1161
+ * NOT operands take only `src`; binary AND/OR/XOR take `a` and `b`.
1162
+ */
1163
+ #define SB_DEFINE_UNARY_KERNEL(name, expr_word, expr_byte) \
1164
+ static void \
1165
+ name(unsigned char *dst, const unsigned char *src, ssize_t len) \
1166
+ { \
1167
+ ssize_t off = 0; \
1168
+ ssize_t unrolled_end = len & ~31L; \
1169
+ ssize_t aligned_end = len & ~7L; \
1170
+ for (; off < unrolled_end; off += 32) { \
1171
+ uint64_t s0, s1, s2, s3; \
1172
+ memcpy(&s0, src + off, 8); \
1173
+ memcpy(&s1, src + off + 8, 8); \
1174
+ memcpy(&s2, src + off + 16, 8); \
1175
+ memcpy(&s3, src + off + 24, 8); \
1176
+ uint64_t d0 = (expr_word(s0)); \
1177
+ uint64_t d1 = (expr_word(s1)); \
1178
+ uint64_t d2 = (expr_word(s2)); \
1179
+ uint64_t d3 = (expr_word(s3)); \
1180
+ memcpy(dst + off, &d0, 8); \
1181
+ memcpy(dst + off + 8, &d1, 8); \
1182
+ memcpy(dst + off + 16, &d2, 8); \
1183
+ memcpy(dst + off + 24, &d3, 8); \
1184
+ } \
1185
+ for (; off < aligned_end; off += 8) { \
1186
+ uint64_t s; \
1187
+ memcpy(&s, src + off, 8); \
1188
+ uint64_t d = (expr_word(s)); \
1189
+ memcpy(dst + off, &d, 8); \
1190
+ } \
1191
+ for (; off < len; off++) dst[off] = (expr_byte(src[off])); \
1192
+ }
1193
+
1194
+ #define SB_DEFINE_BINARY_KERNEL(name, expr_word, expr_byte) \
1195
+ static void \
1196
+ name(unsigned char *dst, const unsigned char *a, const unsigned char *b, \
1197
+ ssize_t len) \
1198
+ { \
1199
+ ssize_t off = 0; \
1200
+ ssize_t unrolled_end = len & ~31L; \
1201
+ ssize_t aligned_end = len & ~7L; \
1202
+ for (; off < unrolled_end; off += 32) { \
1203
+ uint64_t a0, a1, a2, a3, b0, b1, b2, b3; \
1204
+ memcpy(&a0, a + off, 8); memcpy(&b0, b + off, 8); \
1205
+ memcpy(&a1, a + off + 8, 8); memcpy(&b1, b + off + 8, 8); \
1206
+ memcpy(&a2, a + off + 16, 8); memcpy(&b2, b + off + 16, 8); \
1207
+ memcpy(&a3, a + off + 24, 8); memcpy(&b3, b + off + 24, 8); \
1208
+ uint64_t d0 = expr_word(a0, b0); \
1209
+ uint64_t d1 = expr_word(a1, b1); \
1210
+ uint64_t d2 = expr_word(a2, b2); \
1211
+ uint64_t d3 = expr_word(a3, b3); \
1212
+ memcpy(dst + off, &d0, 8); \
1213
+ memcpy(dst + off + 8, &d1, 8); \
1214
+ memcpy(dst + off + 16, &d2, 8); \
1215
+ memcpy(dst + off + 24, &d3, 8); \
1216
+ } \
1217
+ for (; off < aligned_end; off += 8) { \
1218
+ uint64_t av, bv; \
1219
+ memcpy(&av, a + off, 8); memcpy(&bv, b + off, 8); \
1220
+ uint64_t d = expr_word(av, bv); \
1221
+ memcpy(dst + off, &d, 8); \
1222
+ } \
1223
+ for (; off < len; off++) dst[off] = expr_byte(a[off], b[off]); \
1224
+ }
1225
+
1226
+ #define SB_NOT_WORD(x) (~(x))
1227
+ #define SB_NOT_BYTE(x) ((unsigned char)~(x))
1228
+ #define SB_AND_WORD(x, y) ((x) & (y))
1229
+ #define SB_AND_BYTE(x, y) ((unsigned char)((x) & (y)))
1230
+ #define SB_OR_WORD(x, y) ((x) | (y))
1231
+ #define SB_OR_BYTE(x, y) ((unsigned char)((x) | (y)))
1232
+ #define SB_XOR_WORD(x, y) ((x) ^ (y))
1233
+ #define SB_XOR_BYTE(x, y) ((unsigned char)((x) ^ (y)))
1234
+
1235
+ SB_DEFINE_UNARY_KERNEL (kern_not, SB_NOT_WORD, SB_NOT_BYTE)
1236
+ SB_DEFINE_BINARY_KERNEL(kern_and, SB_AND_WORD, SB_AND_BYTE)
1237
+ SB_DEFINE_BINARY_KERNEL(kern_or, SB_OR_WORD, SB_OR_BYTE)
1238
+ SB_DEFINE_BINARY_KERNEL(kern_xor, SB_XOR_WORD, SB_XOR_BYTE)
1239
+
1240
+ /* Method wrappers: allocate-and-return form, and the in-place (!) form. */
1241
+ #define SB_DEFINE_UNARY_METHODS(op_name, kernel) \
1242
+ static VALUE \
1243
+ rb_str_bitwise_##op_name(VALUE self) \
1244
+ { \
1245
+ ssize_t len = RSTRING_LEN(self); \
1246
+ VALUE result = alloc_result(self); \
1247
+ kernel((unsigned char *)RSTRING_PTR(result), \
1248
+ (const unsigned char *)RSTRING_PTR(self), len); \
1249
+ return result; \
1250
+ } \
1251
+ static VALUE \
1252
+ rb_str_bitwise_##op_name##_bang(VALUE self) \
1253
+ { \
1254
+ rb_str_modify(self); \
1255
+ ssize_t len = RSTRING_LEN(self); \
1256
+ unsigned char *ptr = (unsigned char *)RSTRING_PTR(self); \
1257
+ kernel(ptr, ptr, len); \
1258
+ return self; \
1259
+ }
1260
+
1261
+ #define SB_DEFINE_BINARY_METHODS(op_name, kernel) \
1262
+ static VALUE \
1263
+ rb_str_bitwise_##op_name(VALUE self, VALUE other) \
1264
+ { \
1265
+ check_binary_op_lengths(self, other); \
1266
+ ssize_t len = RSTRING_LEN(self); \
1267
+ VALUE result = alloc_result(self); \
1268
+ kernel((unsigned char *)RSTRING_PTR(result), \
1269
+ (const unsigned char *)RSTRING_PTR(self), \
1270
+ (const unsigned char *)RSTRING_PTR(other), len); \
1271
+ return result; \
1272
+ } \
1273
+ static VALUE \
1274
+ rb_str_bitwise_##op_name##_bang(VALUE self, VALUE other) \
1275
+ { \
1276
+ check_binary_op_lengths(self, other); \
1277
+ rb_str_modify(self); \
1278
+ ssize_t len = RSTRING_LEN(self); \
1279
+ unsigned char *a = (unsigned char *)RSTRING_PTR(self); \
1280
+ const unsigned char *b = (const unsigned char *)RSTRING_PTR(other); \
1281
+ kernel(a, a, b, len); \
1282
+ return self; \
1283
+ }
1284
+
1285
+ SB_DEFINE_UNARY_METHODS (not, kern_not)
1286
+ SB_DEFINE_BINARY_METHODS(and, kern_and)
1287
+ SB_DEFINE_BINARY_METHODS(or, kern_or)
1288
+ SB_DEFINE_BINARY_METHODS(xor, kern_xor)
1000
1289
 
1001
1290
  /* packed bit-field iteration ---------------------------------------------- */
1002
1291
  /*
@@ -1051,25 +1340,7 @@ extract_uint64(const unsigned char *src, ssize_t src_len,
1051
1340
  return val;
1052
1341
  }
1053
1342
 
1054
- /* String#each_bit_field(*bitlens, lsb_first: true) -> self
1055
- * String#each_bit_field(*bitlens, lsb_first: true) -> Enumerator
1056
- *
1057
- * Iterates over the string as a sequence of packed bit-field records. Each
1058
- * positional argument specifies the width (in bits) of one field in the record.
1059
- * On each iteration, one Integer per field is yielded (LSB-first bit layout).
1060
- * Each bitlen must be in the range 1..64.
1061
- *
1062
- * lsb_first: true (default) -- intra-byte field extraction uses bit 0..7.
1063
- * lsb_first: false -- intra-byte field extraction uses bit 7..0.
1064
- *
1065
- * Incomplete trailing bits (when bytesize*8 is not a multiple of sum(bitlens))
1066
- * are silently dropped, matching the behavior of Enumerable#each_slice.
1067
- *
1068
- * Porting to Ruby Core:
1069
- * 1. Move extract_uint64 and this function into string.c.
1070
- * 2. Register with rb_define_method in Init_String().
1071
- * 3. Replace ALLOCA_N with stack arrays for small field counts and heap otherwise.
1072
- */
1343
+ /* Yield each packed bit-field record as one Integer per field. */
1073
1344
  static VALUE
1074
1345
  rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
1075
1346
  {
@@ -1096,7 +1367,7 @@ rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
1096
1367
  rb_raise(rb_eArgError, "bitlen must be positive");
1097
1368
  }
1098
1369
  if (bl > 64) {
1099
- rb_raise(rb_eArgError, "bitlen must be <= 64 (got %ld)", bl);
1370
+ rb_raise(rb_eArgError, "bitlen must be <= 64 (got %" PRIdPTR ")", (intptr_t)bl);
1100
1371
  }
1101
1372
  bitlens[f] = bl;
1102
1373
  step += bl;
@@ -1105,8 +1376,8 @@ rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
1105
1376
  int lsb_first = parse_lsb_first_opt(opts);
1106
1377
 
1107
1378
  ssize_t src_len = RSTRING_LEN(self);
1108
- ssize_t total_bits = src_len * 8;
1109
- ssize_t iterations = total_bits / step;
1379
+ int64_t total_bits = SB_BIT_LEN(src_len);
1380
+ ssize_t iterations = (ssize_t)(total_bits / step);
1110
1381
 
1111
1382
  VALUE *field_vals = ALLOCA_N(VALUE, num_fields);
1112
1383
 
@@ -1125,22 +1396,7 @@ rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
1125
1396
  return self;
1126
1397
  }
1127
1398
 
1128
- /* String#bit_fields(*bitlens, lsb_first: true) -> Array
1129
- * String#bit_fields(*bitlens, lsb_first: true) { |*fields| } -> self
1130
- *
1131
- * Non-iterator complement of each_bit_field. Without a block, returns an
1132
- * Array of all extracted records. With a single bitlen the array is flat
1133
- * (matching each_bit_field(n).to_a); with multiple bitlens each record is
1134
- * itself an Array (matching each_bit_field(a, b, ...).to_a).
1135
- *
1136
- * With a block, behaves identically to each_bit_field without with: ---
1137
- * yielding one Integer per field and returning self.
1138
- *
1139
- * Porting to Ruby Core:
1140
- * 1. Move alongside each_bit_field in string.c.
1141
- * 2. Share extract_uint64 and the bitlen validation logic.
1142
- * 3. Register with rb_define_method in Init_String().
1143
- */
1399
+ /* Non-iterator form of each_bit_field; collect bit-field records into an Array. */
1144
1400
  static VALUE
1145
1401
  rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
1146
1402
  {
@@ -1165,7 +1421,7 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
1165
1421
  rb_raise(rb_eArgError, "bitlen must be positive");
1166
1422
  }
1167
1423
  if (bl > 64) {
1168
- rb_raise(rb_eArgError, "bitlen must be <= 64 (got %ld)", bl);
1424
+ rb_raise(rb_eArgError, "bitlen must be <= 64 (got %" PRIdPTR ")", (intptr_t)bl);
1169
1425
  }
1170
1426
  bitlens[f] = bl;
1171
1427
  step += bl;
@@ -1174,8 +1430,8 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
1174
1430
  int lsb_first = parse_lsb_first_opt(opts);
1175
1431
 
1176
1432
  ssize_t src_len = RSTRING_LEN(self);
1177
- ssize_t total_bits = src_len * 8;
1178
- ssize_t iterations = total_bits / step;
1433
+ int64_t total_bits = SB_BIT_LEN(src_len);
1434
+ ssize_t iterations = (ssize_t)(total_bits / step);
1179
1435
 
1180
1436
  int have_block = rb_block_given_p();
1181
1437
  VALUE result = have_block ? Qnil : rb_ary_new_capa(iterations);
@@ -1207,7 +1463,7 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
1207
1463
 
1208
1464
  /*
1209
1465
  * count_run_lsb: count consecutive bits equal to `target` starting at flat
1210
- * position `pos` (LSB-first). Uses ctz / ctzll to skip bits in bulk:
1466
+ * position `bit_offset` (LSB-first). Uses ctz / ctzll to skip bits in bulk:
1211
1467
  * - partial first byte: ctz on the inverted masked nibble
1212
1468
  * - full 64-bit words (LE): ctzll on the inverted word (64 bits per step)
1213
1469
  * - remaining bytes: ctz on the inverted byte
@@ -1217,11 +1473,11 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
1217
1473
  * 2. Share sb_ctz8 / sb_ctzll with the existing set-bit helpers.
1218
1474
  */
1219
1475
  static ssize_t
1220
- count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target)
1476
+ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t bit_offset, int target)
1221
1477
  {
1222
- ssize_t max_run = src_len * 8 - pos;
1223
- ssize_t byte_idx = pos >> 3;
1224
- int bit_off = pos & 7;
1478
+ int64_t max_run = SB_BIT_LEN(src_len) - bit_offset;
1479
+ ssize_t byte_idx = bit_offset >> 3;
1480
+ int bit_off = bit_offset & 7;
1225
1481
  ssize_t count = 0;
1226
1482
 
1227
1483
  /* partial first byte: shift pos to bit 0, mask remaining bits */
@@ -1236,7 +1492,7 @@ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target
1236
1492
  count += run;
1237
1493
  byte_idx++;
1238
1494
  if (run < remaining)
1239
- return count < max_run ? count : max_run;
1495
+ return (ssize_t)(count < max_run ? count : max_run);
1240
1496
  }
1241
1497
 
1242
1498
  #if SB_LITTLE_ENDIAN
@@ -1250,7 +1506,7 @@ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target
1250
1506
  byte_idx += 8;
1251
1507
  } else {
1252
1508
  count += sb_ctzll(~word);
1253
- return count < max_run ? count : max_run;
1509
+ return (ssize_t)(count < max_run ? count : max_run);
1254
1510
  }
1255
1511
  }
1256
1512
  #endif
@@ -1265,218 +1521,146 @@ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target
1265
1521
  byte_idx++;
1266
1522
  } else {
1267
1523
  count += sb_ctz8(~b);
1268
- return count < max_run ? count : max_run;
1524
+ return (ssize_t)(count < max_run ? count : max_run);
1269
1525
  }
1270
1526
  }
1271
1527
 
1272
- return count < max_run ? count : max_run;
1528
+ return (ssize_t)(count < max_run ? count : max_run);
1273
1529
  }
1274
1530
 
1275
- /* String#bit_run_count(pos, bit) -> Integer | nil
1276
- *
1277
- * Returns the length of the consecutive run of `bit` starting at flat
1278
- * position `pos`. Returns nil when `pos` is out of range or the bit at `pos`
1279
- * does not equal `bit`.
1280
- *
1281
- * `bit` accepts 0, 1, false, or true (false/true are aliases for 0/1,
1282
- * matching the values yielded by each_bit_run).
1283
- *
1284
- * Counts forward from `pos` toward higher bit indices.
1285
- *
1286
- * Inspired by Gauche Scheme's (bitvector-count-run bit bvec i).
1287
- *
1288
- * Uses the same flat LSB-first addressing as bit_at: byte[pos/8] bit pos%8.
1289
- *
1290
- * Porting to Ruby Core:
1291
- * 1. Move to string.c; register in Init_String().
1292
- * 2. Reuse integer_to_bit_idx for consistent Bignum handling.
1293
- */
1531
+ /* Return the length of the consecutive run of `bit` starting at pos, or nil. */
1294
1532
  static VALUE
1295
1533
  rb_str_bit_run_count(int argc, VALUE *argv, VALUE self)
1296
1534
  {
1297
- VALUE pos_val, bit_val, opts;
1298
- rb_scan_args(argc, argv, "20:", &pos_val, &bit_val, &opts);
1535
+ VALUE bit_offset_v, bit_val, opts;
1536
+ rb_scan_args(argc, argv, "20:", &bit_val, &bit_offset_v, &opts);
1299
1537
  validate_option_hash(opts, SB_KW_LSB_FIRST);
1300
1538
  int lsb_first = parse_lsb_first_opt(opts);
1301
1539
 
1302
- if (!rb_integer_type_p(pos_val)) {
1540
+ if (!rb_integer_type_p(bit_offset_v)) {
1303
1541
  rb_raise(rb_eTypeError, "position must be an integer");
1304
1542
  }
1305
- int target;
1306
- if (bit_val == Qtrue || bit_val == INT2FIX(1)) {
1307
- target = 1;
1308
- } else if (bit_val == Qfalse || bit_val == INT2FIX(0)) {
1309
- target = 0;
1310
- } else {
1311
- rb_raise(rb_eArgError, "bit must be 0, 1, false, or true");
1312
- }
1313
- ssize_t pos = integer_to_bit_idx(pos_val);
1543
+ int target = parse_bit_target(bit_val);
1544
+ ssize_t bit_offset = integer_to_bit_idx(bit_offset_v);
1314
1545
  ssize_t src_len = RSTRING_LEN(self);
1315
- if (pos < 0 || pos >= src_len * 8) return Qnil;
1546
+ if (bit_offset < 0 || bit_offset >= SB_BIT_LEN(src_len)) return Qnil;
1316
1547
 
1317
1548
  const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1318
1549
  if (lsb_first) {
1319
- if (((src[pos >> 3] >> (pos & 7)) & 1) != target) return Qnil;
1320
- return SSIZET2NUM(count_run_lsb(src, src_len, pos, target));
1550
+ if (((src[bit_offset >> 3] >> (bit_offset & 7)) & 1) != target) return Qnil;
1551
+ return SSIZET2NUM(count_run_lsb(src, src_len, bit_offset, target));
1321
1552
  }
1322
1553
 
1323
- if (logical_get_bit(src, pos, 0) != target) return Qnil;
1554
+ if (logical_get_bit(src, bit_offset, 0) != target) return Qnil;
1324
1555
 
1325
1556
  ssize_t run = 1;
1326
- ssize_t total_bits = src_len * 8;
1327
- while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == target) {
1557
+ int64_t total_bits = SB_BIT_LEN(src_len);
1558
+ while (bit_offset + run < total_bits && logical_get_bit(src, bit_offset + run, 0) == target) {
1328
1559
  run++;
1329
1560
  }
1330
1561
  return SSIZET2NUM(run);
1331
1562
  }
1332
1563
 
1333
- /* String#each_bit_run(lsb_first: true) { |bit, len| } -> self
1334
- * String#each_bit_run(lsb_first: true) -> Enumerator
1335
- *
1336
- * Yields (bit, run_length) pairs for each consecutive run of identical bits.
1337
- * Run-length boundary detection and counting happen entirely in C, replacing
1338
- * the Ruby-level current/count state machine required when using each_bit.
1564
+ /* Yield (bit, offset, run_length) triples for each consecutive run of identical bits. */
1565
+ /* Unified emitter for each_bit_run / bit_runs.
1339
1566
  *
1340
- * For random data (~50% density) each_bit_run yields ~half as many times as
1341
- * each_bit. For structured data (sparse validity bitmaps, sensor bursts) the
1342
- * ratio is proportional to the average run length.
1567
+ * Walks the bitmap in (bit, run_length) chunks. Yields each pair (when
1568
+ * ary == Qnil) or pushes (bit, run_length) Arrays to the pre-allocated
1569
+ * result. The LSB-first path uses the fast count_run_lsb (word-at-a-time
1570
+ * via ctzll); the MSB-first path scans bit by bit through logical_get_bit.
1343
1571
  *
1344
- * lsb_first: true (default) iterates bit 0..7 within each byte.
1345
- * lsb_first: false iterates bit 7..0 within each byte.
1346
- *
1347
- * Porting to Ruby Core:
1348
- * 1. Move to string.c; register in Init_String().
1349
- * 2. count_run_lsb / count_run_msb move with it.
1572
+ * self is re-read inside the loop because rb_yield can invoke Ruby code
1573
+ * that mutates the receiver, potentially invalidating RSTRING_PTR.
1350
1574
  */
1351
- static VALUE
1352
- rb_str_each_bit_run(int argc, VALUE *argv, VALUE self)
1575
+ static void
1576
+ emit_bit_runs(VALUE self, int lsb_first, ssize_t start_offset, VALUE ary)
1353
1577
  {
1354
- RETURN_ENUMERATOR(self, argc, argv);
1355
-
1356
- int lsb_first = parse_lsb_first(argc, argv);
1357
- ssize_t src_len = RSTRING_LEN(self);
1358
- if (src_len == 0) return self;
1578
+ ssize_t src_len = RSTRING_LEN(self);
1579
+ int64_t total_bits = SB_BIT_LEN(src_len);
1580
+ if (src_len == 0 || start_offset >= total_bits) return;
1581
+ ssize_t offset = start_offset;
1359
1582
 
1360
- ssize_t total_bits = src_len * 8;
1583
+ #define SB_EMIT_TRIPLE(bval, oval, lval) \
1584
+ do { if (ary == Qnil) rb_yield_values(3, (bval), (oval), (lval)); \
1585
+ else rb_ary_push(ary, rb_ary_new3(3, (bval), (oval), (lval))); } while (0)
1361
1586
 
1362
1587
  if (lsb_first) {
1363
- ssize_t pos = 0;
1364
- while (pos < total_bits) {
1588
+ while (offset < total_bits) {
1365
1589
  const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1366
- int bit = (src[pos >> 3] >> (pos & 7)) & 1;
1367
- ssize_t run = count_run_lsb(src, src_len, pos, bit);
1368
- rb_yield_values(2, bit ? Qtrue : Qfalse, SSIZET2NUM(run));
1369
- pos += run;
1590
+ int bit = (src[offset >> 3] >> (offset & 7)) & 1;
1591
+ ssize_t run = count_run_lsb(src, src_len, offset, bit);
1592
+ SB_EMIT_TRIPLE(bit ? Qtrue : Qfalse, SSIZET2NUM(offset), SSIZET2NUM(run));
1593
+ offset += run;
1370
1594
  }
1371
1595
  }
1372
1596
  else {
1373
- ssize_t pos = 0;
1374
- while (pos < total_bits) {
1597
+ while (offset < total_bits) {
1375
1598
  const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1376
- int bit = logical_get_bit(src, pos, 0);
1599
+ int bit = logical_get_bit(src, offset, 0);
1377
1600
  ssize_t run = 1;
1378
- while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == bit) {
1601
+ while (offset + run < total_bits && logical_get_bit(src, offset + run, 0) == bit) {
1379
1602
  run++;
1380
1603
  }
1381
- rb_yield_values(2, bit ? Qtrue : Qfalse, SSIZET2NUM(run));
1382
- pos += run;
1604
+ SB_EMIT_TRIPLE(bit ? Qtrue : Qfalse, SSIZET2NUM(offset), SSIZET2NUM(run));
1605
+ offset += run;
1383
1606
  }
1384
1607
  }
1385
1608
 
1386
- return self;
1609
+ #undef SB_EMIT_TRIPLE
1387
1610
  }
1388
1611
 
1389
- /* String#bit_runs(lsb_first: true) -> Array
1390
- * String#bit_runs(lsb_first: true) { |bit, len| } -> self
1391
- *
1392
- * Non-iterator complement of each_bit_run. Without a block, collects all
1393
- * (bit, run_length) pairs into an Array and returns it. With a block,
1394
- * yields each pair and returns self.
1395
- *
1396
- * Follows the same pattern as String#bytes vs String#each_byte.
1397
- *
1398
- * Porting to Ruby Core:
1399
- * 1. Move to string.c alongside each_bit_run; register in Init_String().
1400
- */
1401
1612
  static VALUE
1402
- rb_str_bit_runs(int argc, VALUE *argv, VALUE self)
1613
+ rb_str_each_bit_run(int argc, VALUE *argv, VALUE self)
1403
1614
  {
1404
- int lsb_first = parse_lsb_first(argc, argv);
1405
- ssize_t src_len = RSTRING_LEN(self);
1406
- int have_block = rb_block_given_p();
1615
+ RETURN_ENUMERATOR(self, argc, argv);
1407
1616
 
1408
- if (src_len == 0) return have_block ? self : rb_ary_new();
1617
+ VALUE start_offset_v = Qnil, opts = Qnil;
1618
+ rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
1619
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
1620
+ int lsb_first = parse_lsb_first_opt(opts);
1621
+ ssize_t start_offset = parse_start_offset(start_offset_v);
1409
1622
 
1410
- ssize_t total_bits = src_len * 8;
1411
- VALUE result = have_block ? Qnil : rb_ary_new();
1623
+ emit_bit_runs(self, lsb_first, start_offset, Qnil);
1624
+ return self;
1625
+ }
1412
1626
 
1413
- if (lsb_first) {
1414
- ssize_t pos = 0;
1415
- while (pos < total_bits) {
1416
- const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1417
- int bit = (src[pos >> 3] >> (pos & 7)) & 1;
1418
- ssize_t run = count_run_lsb(src, src_len, pos, bit);
1419
- VALUE bval = bit ? Qtrue : Qfalse;
1420
- VALUE lval = SSIZET2NUM(run);
1421
- have_block ? rb_yield_values(2, bval, lval)
1422
- : rb_ary_push(result, rb_assoc_new(bval, lval));
1423
- pos += run;
1424
- }
1425
- } else {
1426
- ssize_t pos = 0;
1427
- while (pos < total_bits) {
1428
- const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
1429
- int bit = logical_get_bit(src, pos, 0);
1430
- ssize_t run = 1;
1431
- while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == bit) {
1432
- run++;
1433
- }
1434
- VALUE bval = bit ? Qtrue : Qfalse;
1435
- VALUE lval = SSIZET2NUM(run);
1436
- have_block ? rb_yield_values(2, bval, lval)
1437
- : rb_ary_push(result, rb_assoc_new(bval, lval));
1438
- pos += run;
1439
- }
1627
+ /* Non-iterator form of each_bit_run; collect run triples into an Array. */
1628
+ static VALUE
1629
+ rb_str_bit_runs(int argc, VALUE *argv, VALUE self)
1630
+ {
1631
+ VALUE start_offset_v = Qnil, opts = Qnil;
1632
+ rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
1633
+ validate_option_hash(opts, SB_KW_LSB_FIRST);
1634
+ int lsb_first = parse_lsb_first_opt(opts);
1635
+ ssize_t start_offset = parse_start_offset(start_offset_v);
1636
+
1637
+ if (rb_block_given_p()) {
1638
+ emit_bit_runs(self, lsb_first, start_offset, Qnil);
1639
+ return self;
1440
1640
  }
1441
1641
 
1442
- return have_block ? self : result;
1642
+ VALUE ary = rb_ary_new();
1643
+ emit_bit_runs(self, lsb_first, start_offset, ary);
1644
+ return ary;
1443
1645
  }
1444
1646
 
1445
- /* String#bit_splice(bit_index, bit_length, str) -> self
1446
- * String#bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) -> self
1447
- * String#bit_splice(range, str) -> self
1448
- * String#bit_splice(range, str, str_range) -> self
1449
- *
1450
- * Writes bits from str into self at bit-level granularity. The inverse of
1451
- * bit_slice: where bit_slice reads a sub-sequence of bits, bit_splice writes one.
1452
- *
1453
- * The destination and source bit lengths must be equal; bit_splice does not
1454
- * resize self (sub-byte resize is undefined). This mirrors the constraint that
1455
- * bytesplice imposes when the replacement has the same byte length.
1456
- *
1457
- * Negative indices count backward from the end, exactly as in bytesplice.
1458
- * Returns self.
1459
- *
1460
- * Porting to Ruby Core:
1461
- * 1. Move to string.c; register in Init_String().
1462
- * 2. Use rb_str_modify_expand if resize support is ever added.
1463
- * 3. bit_copy_core moves with it; share ebs_extract with bit_slice.
1464
- */
1647
+ /* Write bits from str into self at bit-level granularity (inverse of bit_slice). */
1465
1648
  static VALUE
1466
1649
  rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
1467
1650
  {
1468
1651
  ssize_t dst_bit_off, dst_bit_len;
1469
1652
  ssize_t src_bit_off, src_bit_len;
1470
1653
  VALUE str;
1471
- ssize_t dst_total = RSTRING_LEN(self) * 8;
1472
- VALUE v0, v1, v2, v3, v4, opts;
1654
+ int64_t dst_total = SB_BIT_LEN(RSTRING_LEN(self));
1655
+ VALUE v0, v1, v2, v3, opts;
1473
1656
 
1474
- int n_pos = rb_scan_args(argc, argv, "23:", &v0, &v1, &v2, &v3, &v4, &opts);
1657
+ int n_pos = rb_scan_args(argc, argv, "22:", &v0, &v1, &v2, &v3, &opts);
1475
1658
  validate_option_hash(opts, SB_KW_LSB_FIRST);
1476
1659
  int lsb_first = parse_lsb_first_opt(opts);
1477
1660
 
1478
1661
  if (n_pos == 2 && rb_obj_is_kind_of(v0, rb_cRange)) {
1479
1662
  /* bit_splice(range, str) */
1663
+ sb_range_validate_endpoints(v0);
1480
1664
  ssize_t beg, len;
1481
1665
  sb_range_beg_len(v0, &beg, &len, dst_total, 1);
1482
1666
  dst_bit_off = beg;
@@ -1487,20 +1671,21 @@ rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
1487
1671
  src_bit_len = dst_bit_len;
1488
1672
  }
1489
1673
  else if (n_pos == 3 && rb_obj_is_kind_of(v0, rb_cRange)) {
1490
- /* bit_splice(range, str, str_range) */
1674
+ /* bit_splice(range, str, str_bit_index) */
1675
+ sb_range_validate_endpoints(v0);
1491
1676
  ssize_t beg, len;
1492
1677
  sb_range_beg_len(v0, &beg, &len, dst_total, 1);
1493
1678
  dst_bit_off = beg;
1494
1679
  dst_bit_len = len;
1495
1680
  str = v1;
1496
1681
  Check_Type(str, T_STRING);
1497
- if (!rb_obj_is_kind_of(v2, rb_cRange)) {
1498
- rb_raise(rb_eTypeError, "third argument must be a Range");
1682
+ if (!rb_integer_type_p(v2)) {
1683
+ rb_raise(rb_eTypeError, "third argument must be an Integer");
1499
1684
  }
1500
- ssize_t src_total = RSTRING_LEN(str) * 8;
1501
- sb_range_beg_len(v2, &beg, &len, src_total, 1);
1502
- src_bit_off = beg;
1503
- src_bit_len = len;
1685
+ int64_t src_total = SB_BIT_LEN(RSTRING_LEN(str));
1686
+ src_bit_off = integer_to_bit_idx(v2);
1687
+ if (src_bit_off < 0) src_bit_off += src_total;
1688
+ src_bit_len = dst_bit_len;
1504
1689
  }
1505
1690
  else if (n_pos == 3) {
1506
1691
  /* bit_splice(bit_index, bit_length, str) */
@@ -1526,10 +1711,9 @@ rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
1526
1711
  src_bit_off = 0;
1527
1712
  src_bit_len = dst_bit_len;
1528
1713
  }
1529
- else if (n_pos == 5) {
1530
- /* bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) */
1531
- if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1) ||
1532
- !rb_integer_type_p(v3) || !rb_integer_type_p(v4)) {
1714
+ else if (n_pos == 4) {
1715
+ /* bit_splice(bit_index, bit_length, str, str_bit_index) */
1716
+ if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1) || !rb_integer_type_p(v3)) {
1533
1717
  rb_raise(rb_eTypeError, "bit indices and lengths must be integers");
1534
1718
  }
1535
1719
  dst_bit_off = integer_to_bit_idx(v0);
@@ -1537,33 +1721,29 @@ rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
1537
1721
  if (dst_bit_off < 0) dst_bit_off += dst_total;
1538
1722
  str = v2;
1539
1723
  Check_Type(str, T_STRING);
1540
- ssize_t src_total = RSTRING_LEN(str) * 8;
1724
+ int64_t src_total = SB_BIT_LEN(RSTRING_LEN(str));
1541
1725
  src_bit_off = integer_to_bit_idx(v3);
1542
- src_bit_len = integer_to_bit_idx(v4);
1543
1726
  if (src_bit_off < 0) src_bit_off += src_total;
1727
+ src_bit_len = dst_bit_len;
1544
1728
  }
1545
1729
  else {
1546
1730
  rb_raise(rb_eArgError,
1547
- "wrong number of arguments (given %d, expected 2, 3, or 5)", n_pos);
1731
+ "wrong number of arguments (given %d, expected 2, 3, or 4)", n_pos);
1548
1732
  }
1549
1733
 
1550
1734
  if (dst_bit_off < 0 || dst_bit_len < 0 || dst_bit_off + dst_bit_len > dst_total) {
1551
1735
  rb_raise(rb_eIndexError,
1552
- "bit_splice: destination range [%ld, %ld] out of bounds (total %ld bits)",
1553
- dst_bit_off, dst_bit_len, dst_total);
1736
+ "bit_splice: destination range [%" PRIdPTR ", %" PRIdPTR
1737
+ "] out of bounds (total %" PRId64 " bits)",
1738
+ (intptr_t)dst_bit_off, (intptr_t)dst_bit_len, (int64_t)dst_total);
1554
1739
  }
1555
1740
 
1556
- ssize_t src_total_bits = RSTRING_LEN(str) * 8;
1741
+ int64_t src_total_bits = SB_BIT_LEN(RSTRING_LEN(str));
1557
1742
  if (src_bit_off < 0 || src_bit_len < 0 || src_bit_off + src_bit_len > src_total_bits) {
1558
1743
  rb_raise(rb_eIndexError,
1559
- "bit_splice: source range [%ld, %ld] out of bounds (total %ld bits)",
1560
- src_bit_off, src_bit_len, src_total_bits);
1561
- }
1562
-
1563
- if (dst_bit_len != src_bit_len) {
1564
- rb_raise(rb_eArgError,
1565
- "bit_splice: destination length (%ld) must equal source length (%ld)",
1566
- dst_bit_len, src_bit_len);
1744
+ "bit_splice: source range [%" PRIdPTR ", %" PRIdPTR
1745
+ "] out of bounds (total %" PRId64 " bits)",
1746
+ (intptr_t)src_bit_off, (intptr_t)src_bit_len, (int64_t)src_total_bits);
1567
1747
  }
1568
1748
 
1569
1749
  if (dst_bit_len == 0) return self;
@@ -1693,8 +1873,9 @@ rb_ary_mask(int argc, VALUE *argv, VALUE self)
1693
1873
  ssize_t needed = (ary_len + 7) >> 3;
1694
1874
  if (needed > bmp_len)
1695
1875
  rb_raise(rb_eArgError,
1696
- "bitmap too short: need %ld bytes for %ld elements, got %ld",
1697
- needed, ary_len, bmp_len);
1876
+ "bitmap too short: need %" PRIdPTR " bytes for %" PRIdPTR
1877
+ " elements, got %" PRIdPTR,
1878
+ (intptr_t)needed, (intptr_t)ary_len, (intptr_t)bmp_len);
1698
1879
 
1699
1880
  if (!lsb_first) {
1700
1881
  for (ssize_t i = 0; i < ary_len; i++) {
@@ -1738,8 +1919,9 @@ rb_ary_mask_bang(int argc, VALUE *argv, VALUE self)
1738
1919
  ssize_t needed = (ary_len + 7) >> 3;
1739
1920
  if (needed > bmp_len)
1740
1921
  rb_raise(rb_eArgError,
1741
- "bitmap too short: need %ld bytes for %ld elements, got %ld",
1742
- needed, ary_len, bmp_len);
1922
+ "bitmap too short: need %" PRIdPTR " bytes for %" PRIdPTR
1923
+ " elements, got %" PRIdPTR,
1924
+ (intptr_t)needed, (intptr_t)ary_len, (intptr_t)bmp_len);
1743
1925
 
1744
1926
  if (!lsb_first) {
1745
1927
  for (ssize_t i = 0; i < ary_len; i++) {
@@ -1765,38 +1947,36 @@ rb_ary_mask_bang(int argc, VALUE *argv, VALUE self)
1765
1947
  void
1766
1948
  Init_string_bits(void)
1767
1949
  {
1768
- id_bracket = rb_intern("[]");
1769
- sym_lsb_first = ID2SYM(rb_intern("lsb_first"));
1770
- sym_lsb = ID2SYM(rb_intern("lsb"));
1771
- sym_msb = ID2SYM(rb_intern("msb"));
1772
- sym_invert = ID2SYM(rb_intern("invert"));
1773
-
1774
- rb_define_method(rb_cString, "bit_at", rb_str_bit_at, -1);
1775
- rb_define_method(rb_cString, "bit_count", rb_str_bit_count, 0);
1776
- rb_define_method(rb_cString, "each_bit", rb_str_each_bit, -1);
1777
- rb_define_method(rb_cString, "bits", rb_str_bits, -1);
1778
- rb_define_method(rb_cString, "each_set_bit_offset", rb_str_each_set_bit_offset, -1);
1779
- rb_define_method(rb_cString, "set_bit_offsets", rb_str_set_bit_offsets, -1);
1780
- rb_define_method(rb_cString, "bit_slice", rb_str_bit_slice, -1);
1781
- rb_define_method(rb_cString, "bit_splice", rb_str_bit_splice, -1);
1782
- rb_define_method(rb_cString, "bit_run_count", rb_str_bit_run_count, -1);
1783
- rb_define_method(rb_cString, "each_bit_run", rb_str_each_bit_run, -1);
1784
- rb_define_method(rb_cString, "bit_runs", rb_str_bit_runs, -1);
1785
- rb_define_method(rb_cString, "set_bit", rb_str_set_bit, -1);
1786
- rb_define_method(rb_cString, "clear_bit", rb_str_clear_bit, -1);
1787
- rb_define_method(rb_cString, "flip_bit", rb_str_flip_bit, -1);
1788
- rb_define_method(rb_cString, "bit_not", rb_str_bit_not, 0);
1789
- rb_define_method(rb_cString, "bit_not!", rb_str_bit_not_bang, 0);
1790
- rb_define_method(rb_cString, "bit_and", rb_str_bit_and, 1);
1791
- rb_define_method(rb_cString, "bit_and!", rb_str_bit_and_bang, 1);
1792
- rb_define_method(rb_cString, "bit_or", rb_str_bit_or, 1);
1793
- rb_define_method(rb_cString, "bit_or!", rb_str_bit_or_bang, 1);
1794
- rb_define_method(rb_cString, "bit_xor", rb_str_bit_xor, 1);
1795
- rb_define_method(rb_cString, "bit_xor!", rb_str_bit_xor_bang, 1);
1950
+ id_bracket = rb_intern("[]");
1951
+ sym_lsb_first = ID2SYM(rb_intern("lsb_first"));
1952
+ sym_invert = ID2SYM(rb_intern("invert"));
1953
+
1954
+ rb_define_method(rb_cString, "bit_at", rb_str_bit_at, -1);
1955
+ rb_define_method(rb_cString, "bit_count", rb_str_bit_count, -1);
1956
+ rb_define_method(rb_cString, "each_bit", rb_str_each_bit, -1);
1957
+ rb_define_method(rb_cString, "bits", rb_str_bits, -1);
1958
+ rb_define_method(rb_cString, "each_bit_offset", rb_str_each_bit_offset, -1);
1959
+ rb_define_method(rb_cString, "bit_offsets", rb_str_bit_offsets, -1);
1960
+ rb_define_method(rb_cString, "bit_slice", rb_str_bit_slice, -1);
1961
+ rb_define_method(rb_cString, "bit_splice", rb_str_bit_splice, -1);
1962
+ rb_define_method(rb_cString, "bit_run_count", rb_str_bit_run_count, -1);
1963
+ rb_define_method(rb_cString, "each_bit_run", rb_str_each_bit_run, -1);
1964
+ rb_define_method(rb_cString, "bit_runs", rb_str_bit_runs, -1);
1965
+ rb_define_method(rb_cString, "bit_set", rb_str_bit_set, -1);
1966
+ rb_define_method(rb_cString, "bit_clear", rb_str_bit_clear, -1);
1967
+ rb_define_method(rb_cString, "bit_flip", rb_str_bit_flip, -1);
1968
+ rb_define_method(rb_cString, "bitwise_not", rb_str_bitwise_not, 0);
1969
+ rb_define_method(rb_cString, "bitwise_not!", rb_str_bitwise_not_bang, 0);
1970
+ rb_define_method(rb_cString, "bitwise_and", rb_str_bitwise_and, 1);
1971
+ rb_define_method(rb_cString, "bitwise_and!", rb_str_bitwise_and_bang, 1);
1972
+ rb_define_method(rb_cString, "bitwise_or", rb_str_bitwise_or, 1);
1973
+ rb_define_method(rb_cString, "bitwise_or!", rb_str_bitwise_or_bang, 1);
1974
+ rb_define_method(rb_cString, "bitwise_xor", rb_str_bitwise_xor, 1);
1975
+ rb_define_method(rb_cString, "bitwise_xor!", rb_str_bitwise_xor_bang, 1);
1796
1976
 
1797
1977
  // These methods are defined here to avoid cluttering this file, but they are not part of the current core proposal (see FUTURE_PROPOSAL_PLAN.md).
1798
- rb_define_method(rb_cString, "each_bit_field", rb_str_each_bit_field, -1);
1799
- rb_define_method(rb_cString, "bit_fields", rb_str_bit_fields, -1);
1800
- rb_define_method(rb_cArray, "mask", rb_ary_mask, -1);
1801
- rb_define_method(rb_cArray, "mask!", rb_ary_mask_bang, -1);
1978
+ rb_define_method(rb_cString, "each_bit_field", rb_str_each_bit_field, -1);
1979
+ rb_define_method(rb_cString, "bit_fields", rb_str_bit_fields, -1);
1980
+ rb_define_method(rb_cArray, "mask", rb_ary_mask, -1);
1981
+ rb_define_method(rb_cArray, "mask!", rb_ary_mask_bang, -1);
1802
1982
  }