string_bits 0.1.0 → 0.2.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +2 -2
- data/ext/string_bits/string_bits.c +756 -576
- metadata +1 -1
|
@@ -2,10 +2,40 @@
|
|
|
2
2
|
#include "ruby/encoding.h"
|
|
3
3
|
|
|
4
4
|
#include <limits.h> /* CHAR_BIT */
|
|
5
|
-
#include <stdint.h> /* uint64_t, UINT64_MAX */
|
|
5
|
+
#include <stdint.h> /* uint64_t, UINT64_MAX, int64_t, intptr_t */
|
|
6
|
+
#include <inttypes.h> /* PRIdPTR (ssize_t via intptr_t), PRId64 */
|
|
6
7
|
#include <string.h> /* memcpy */
|
|
7
8
|
#include <sys/types.h> /* ssize_t (Ruby typedefs it on Windows) */
|
|
8
9
|
|
|
10
|
+
/* Whole-string bit length, computed in 64 bits.
|
|
11
|
+
*
|
|
12
|
+
* RSTRING_LEN returns a pointer-width signed length, so `RSTRING_LEN(s) * 8`
|
|
13
|
+
* overflows a signed 32-bit ssize_t once a string reaches 2**28 bytes (256 MiB)
|
|
14
|
+
* on an ILP32 build, corrupting every bounds check that compares a bit offset
|
|
15
|
+
* against it. Valid bit indices are confined to the Fixnum range and always fit
|
|
16
|
+
* ssize_t, so only this whole-string bit length needs the wider type: computing
|
|
17
|
+
* it in int64_t keeps the bounds checks correct on 32-bit without changing the
|
|
18
|
+
* public pointer-width bit-index contract (see Discussion.md, "Error behavior
|
|
19
|
+
* for out-of-range bit indices").
|
|
20
|
+
*
|
|
21
|
+
* Porting to Ruby Core:
|
|
22
|
+
* 1. Core String lengths are `long` (RSTRING_LEN), which is pointer-width,
|
|
23
|
+
* so `RSTRING_LEN(str) * 8` overflows on ILP32 for strings >= 256 MiB
|
|
24
|
+
* exactly as it does for ssize_t here. Keep the whole-string bit length
|
|
25
|
+
* in a 64-bit intermediate at every bounds check; do not hold it in a
|
|
26
|
+
* `long`. Reuse this macro (or an equivalent inline) rather than open-
|
|
27
|
+
* coding `len * 8`.
|
|
28
|
+
* 2. Keep the public bit-index type pointer-width and keep rejecting
|
|
29
|
+
* out-of-range positions with ArgumentError (see the cross-reference
|
|
30
|
+
* above). Only this internal length is widened, so the contract that
|
|
31
|
+
* core inherits is unchanged.
|
|
32
|
+
* 3. The error-message format specifiers below (<inttypes.h>: (intptr_t)
|
|
33
|
+
* with PRIdPTR for bit offsets, PRId64 for this widened length) exist
|
|
34
|
+
* only because this length is wider than the offsets. In core, follow
|
|
35
|
+
* the local convention for formatting `long` offsets and pick a 64-bit
|
|
36
|
+
* specifier for the widened length accordingly. */
|
|
37
|
+
#define SB_BIT_LEN(byte_len) ((int64_t)(byte_len) * 8)
|
|
38
|
+
|
|
9
39
|
/* popcount ----------------------------------------------------------------- */
|
|
10
40
|
/*
|
|
11
41
|
* Porting to Ruby Core:
|
|
@@ -70,7 +100,7 @@ sb_popcount64(uint64_t x)
|
|
|
70
100
|
/* ctz / clz helpers for set-bit iteration ---------------------------------- */
|
|
71
101
|
|
|
72
102
|
static ID id_bracket;
|
|
73
|
-
static VALUE sym_lsb_first,
|
|
103
|
+
static VALUE sym_lsb_first, sym_invert;
|
|
74
104
|
|
|
75
105
|
enum sb_kw_flag {
|
|
76
106
|
SB_KW_INVERT = 1 << 0,
|
|
@@ -190,27 +220,10 @@ integer_to_bit_idx(VALUE n)
|
|
|
190
220
|
UNREACHABLE_RETURN(0);
|
|
191
221
|
}
|
|
192
222
|
|
|
193
|
-
|
|
194
|
-
|
|
195
|
-
|
|
196
|
-
|
|
197
|
-
rb_raise(rb_eTypeError, "bit index must be an integer");
|
|
198
|
-
}
|
|
199
|
-
ssize_t idx = integer_to_bit_idx(n);
|
|
200
|
-
ssize_t size = RSTRING_LEN(self) * 8;
|
|
201
|
-
if (idx < 0 || idx >= size) {
|
|
202
|
-
rb_raise(rb_eIndexError, "bit index out of range");
|
|
203
|
-
}
|
|
204
|
-
if (!lsb_first) idx = (idx & ~7L) | (7 - (idx & 7L));
|
|
205
|
-
return idx;
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
static inline ssize_t
|
|
209
|
-
physical_to_count_from(ssize_t physical, int lsb_first)
|
|
210
|
-
{
|
|
211
|
-
return lsb_first ? physical : ((physical & ~7L) | (7 - (physical & 7L)));
|
|
212
|
-
}
|
|
213
|
-
|
|
223
|
+
/* Bit numbering between byte-with-LSB-as-bit-0 and byte-with-MSB-as-bit-0
|
|
224
|
+
* is an involution: swapping in either direction uses the same formula
|
|
225
|
+
* `(x & ~7) | (7 - (x & 7))`. logical_to_physical is therefore symmetric and
|
|
226
|
+
* is reused on the return path (physical -> logical) as well. */
|
|
214
227
|
static inline ssize_t
|
|
215
228
|
logical_to_physical(ssize_t logical, int lsb_first)
|
|
216
229
|
{
|
|
@@ -237,6 +250,20 @@ logical_write_bit(unsigned char *ptr, ssize_t logical_index, int lsb_first, int
|
|
|
237
250
|
physical_write_bit(ptr, logical_to_physical(logical_index, lsb_first), bit);
|
|
238
251
|
}
|
|
239
252
|
|
|
253
|
+
static ssize_t
|
|
254
|
+
check_bit_index(VALUE self, VALUE n, int lsb_first)
|
|
255
|
+
{
|
|
256
|
+
if (!rb_integer_type_p(n)) {
|
|
257
|
+
rb_raise(rb_eTypeError, "bit index must be an integer");
|
|
258
|
+
}
|
|
259
|
+
ssize_t idx = integer_to_bit_idx(n);
|
|
260
|
+
int64_t size = SB_BIT_LEN(RSTRING_LEN(self));
|
|
261
|
+
if (idx < 0 || idx >= size) {
|
|
262
|
+
rb_raise(rb_eIndexError, "bit index out of range");
|
|
263
|
+
}
|
|
264
|
+
return logical_to_physical(idx, lsb_first);
|
|
265
|
+
}
|
|
266
|
+
|
|
240
267
|
/* ssize_t-interface wrapper around rb_range_beg_len.
|
|
241
268
|
*
|
|
242
269
|
* rb_range_beg_len() takes (long *begp, long *lenp, long len), but this
|
|
@@ -266,11 +293,49 @@ sb_range_beg_len_call(VALUE arg)
|
|
|
266
293
|
return rb_range_beg_len(a->range, a->lbegp, a->llenp, a->len, a->err);
|
|
267
294
|
}
|
|
268
295
|
|
|
296
|
+
/* Validate Range endpoints for bit position arguments.
|
|
297
|
+
* Raises ArgumentError for:
|
|
298
|
+
* - any explicit (non-nil) Bignum endpoint: cannot address any real string,
|
|
299
|
+
* consistent with integer_to_bit_idx behavior for scalar indices.
|
|
300
|
+
* - any explicit (non-nil) negative endpoint: count-from-end semantics
|
|
301
|
+
* interact confusingly with lsb_first: true/false.
|
|
302
|
+
* RBIGNUM_NEGATIVE_P is used for the negativity check on Bignums to avoid
|
|
303
|
+
* calling NUM2LL on values that do not fit in long long.
|
|
304
|
+
*
|
|
305
|
+
* Porting to Ruby Core:
|
|
306
|
+
* Replace rb_range_values() with direct struct access:
|
|
307
|
+
* #include "internal/range.h"
|
|
308
|
+
* beg = RANGE_BEG(range);
|
|
309
|
+
* end = RANGE_END(range);
|
|
310
|
+
* excl = RANGE_EXCL(range);
|
|
311
|
+
*/
|
|
312
|
+
static void
|
|
313
|
+
sb_range_validate_endpoints(VALUE range)
|
|
314
|
+
{
|
|
315
|
+
VALUE beg, end;
|
|
316
|
+
int excl;
|
|
317
|
+
rb_range_values(range, &beg, &end, &excl);
|
|
318
|
+
if (!NIL_P(beg) && rb_integer_type_p(beg)) {
|
|
319
|
+
if (!FIXNUM_P(beg))
|
|
320
|
+
rb_raise(rb_eArgError, "bit index out of representable range");
|
|
321
|
+
if (FIX2LONG(beg) < 0)
|
|
322
|
+
rb_raise(rb_eIndexError,
|
|
323
|
+
"negative Range endpoint is not allowed for bit positions");
|
|
324
|
+
}
|
|
325
|
+
if (!NIL_P(end) && rb_integer_type_p(end)) {
|
|
326
|
+
if (!FIXNUM_P(end))
|
|
327
|
+
rb_raise(rb_eArgError, "bit index out of representable range");
|
|
328
|
+
if (FIX2LONG(end) < 0)
|
|
329
|
+
rb_raise(rb_eIndexError,
|
|
330
|
+
"negative Range endpoint is not allowed for bit positions");
|
|
331
|
+
}
|
|
332
|
+
}
|
|
333
|
+
|
|
269
334
|
static inline VALUE
|
|
270
|
-
sb_range_beg_len(VALUE range, ssize_t *begp, ssize_t *lenp,
|
|
335
|
+
sb_range_beg_len(VALUE range, ssize_t *begp, ssize_t *lenp, int64_t len, int err)
|
|
271
336
|
{
|
|
272
337
|
long lbeg = 0, llen = 0;
|
|
273
|
-
long clipped = (len > (
|
|
338
|
+
long clipped = (len > (int64_t)LONG_MAX) ? LONG_MAX : (long)len;
|
|
274
339
|
struct sb_range_args args = { range, &lbeg, &llen, clipped, err };
|
|
275
340
|
int state = 0;
|
|
276
341
|
VALUE result = rb_protect(sb_range_beg_len_call, (VALUE)&args, &state);
|
|
@@ -325,53 +390,42 @@ parse_lsb_first_opt(VALUE opts)
|
|
|
325
390
|
return parse_bool_opt(opts, sym_lsb_first, "lsb_first", 1);
|
|
326
391
|
}
|
|
327
392
|
|
|
328
|
-
|
|
329
|
-
|
|
393
|
+
/* Parse an optional start_offset positional argument (Qnil => 0).
|
|
394
|
+
* Raises ArgumentError for Bignum, IndexError for negative Fixnum. */
|
|
395
|
+
static ssize_t
|
|
396
|
+
parse_start_offset(VALUE v)
|
|
330
397
|
{
|
|
331
|
-
|
|
332
|
-
|
|
333
|
-
|
|
334
|
-
|
|
398
|
+
if (NIL_P(v)) return 0;
|
|
399
|
+
ssize_t start_offset = integer_to_bit_idx(v); /* raises ArgumentError for Bignum */
|
|
400
|
+
if (start_offset < 0)
|
|
401
|
+
rb_raise(rb_eIndexError, "bit_offset must be non-negative");
|
|
402
|
+
return start_offset;
|
|
335
403
|
}
|
|
336
404
|
|
|
337
405
|
/* read -------------------------------------------------------------------- */
|
|
338
406
|
|
|
339
|
-
/*
|
|
340
|
-
*
|
|
341
|
-
* bit_at uses flat/Arrow convention: byte_index = n/8 from start, bit = n%8 from LSB
|
|
342
|
-
* e.g. "\xAA\xCC": bit 0..7 live in byte[0]=0xAA, bit 8..15 live in byte[1]=0xCC
|
|
343
|
-
*
|
|
344
|
-
* str = "\xFF\xAA" # 11111111 10101010
|
|
345
|
-
* str.bit_at(0) # => true (1st bit is set)
|
|
346
|
-
* str.bit_at(7) # => true (8th bit is set)
|
|
347
|
-
* str.bit_at(8) # => false (9th bit is clear)
|
|
348
|
-
* str.bit_at(9) # => true (10th bit is set)
|
|
349
|
-
* str.bit_at(16) # => nil
|
|
350
|
-
*/
|
|
407
|
+
/* Return true/false/nil for the bit at flat position n. */
|
|
351
408
|
static VALUE
|
|
352
409
|
rb_str_bit_at(int argc, VALUE *argv, VALUE self)
|
|
353
410
|
{
|
|
354
|
-
VALUE
|
|
355
|
-
rb_scan_args(argc, argv, "1:", &
|
|
411
|
+
VALUE bit_offset_v, opts;
|
|
412
|
+
rb_scan_args(argc, argv, "1:", &bit_offset_v, &opts);
|
|
356
413
|
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
357
414
|
|
|
358
|
-
if (!rb_integer_type_p(
|
|
415
|
+
if (!rb_integer_type_p(bit_offset_v)) {
|
|
359
416
|
rb_raise(rb_eTypeError, "bit index must be an integer");
|
|
360
417
|
}
|
|
361
|
-
ssize_t
|
|
362
|
-
if (
|
|
363
|
-
rb_raise(
|
|
418
|
+
ssize_t bit_offset = integer_to_bit_idx(bit_offset_v);
|
|
419
|
+
if (bit_offset < 0) {
|
|
420
|
+
rb_raise(rb_eIndexError, "bit index out of range");
|
|
364
421
|
}
|
|
365
|
-
|
|
366
|
-
if (size <=
|
|
422
|
+
int64_t size = SB_BIT_LEN(RSTRING_LEN(self));
|
|
423
|
+
if (size <= bit_offset) {
|
|
367
424
|
return Qnil;
|
|
368
425
|
}
|
|
369
426
|
|
|
370
427
|
int lsb_first = parse_lsb_first_opt(opts);
|
|
371
|
-
|
|
372
|
-
if (!lsb_first) {
|
|
373
|
-
idx = (idx & ~7L) | (7 - (idx & 7L));
|
|
374
|
-
}
|
|
428
|
+
ssize_t idx = logical_to_physical(bit_offset, lsb_first);
|
|
375
429
|
|
|
376
430
|
if (test_bit(RSTRING_PTR(self), idx)) {
|
|
377
431
|
return Qtrue;
|
|
@@ -380,19 +434,21 @@ rb_str_bit_at(int argc, VALUE *argv, VALUE self)
|
|
|
380
434
|
}
|
|
381
435
|
}
|
|
382
436
|
|
|
383
|
-
|
|
384
|
-
|
|
437
|
+
/* count_set_bits: popcount over a raw byte buffer.
|
|
438
|
+
*
|
|
439
|
+
* Uses a 32-byte (4 x uint64_t) unrolled inner loop, falls back to 8-byte
|
|
440
|
+
* steps, and finally collects the partial trailing bytes into a single
|
|
441
|
+
* uint64_t for one more popcount. memcpy avoids unaligned-load issues on
|
|
442
|
+
* strict-alignment platforms (SPARC, MIPS); modern compilers fold the 8-byte
|
|
443
|
+
* memcpy into a single load on platforms that allow unaligned access. */
|
|
444
|
+
static ssize_t
|
|
445
|
+
count_set_bits(const unsigned char *str, ssize_t len)
|
|
385
446
|
{
|
|
386
447
|
ssize_t count = 0;
|
|
387
|
-
ssize_t len = RSTRING_LEN(self);
|
|
388
|
-
const char *str = RSTRING_PTR(self);
|
|
389
448
|
ssize_t off = 0;
|
|
390
449
|
ssize_t unrolled_end = len & ~31L;
|
|
391
450
|
ssize_t aligned_end = len & ~7L;
|
|
392
451
|
|
|
393
|
-
/* Use memcpy to avoid unaligned loads (SIGBUS on SPARC, MIPS, etc.)
|
|
394
|
-
* and strict-aliasing violations. Modern compilers fold 8-byte memcpy
|
|
395
|
-
* into a single load on platforms that allow unaligned access. */
|
|
396
452
|
for (; off < unrolled_end; off += 32) {
|
|
397
453
|
uint64_t w0, w1, w2, w3;
|
|
398
454
|
memcpy(&w0, str + off, 8);
|
|
@@ -414,208 +470,374 @@ rb_str_bit_count(VALUE self)
|
|
|
414
470
|
ssize_t remainder = len - aligned_end;
|
|
415
471
|
if (remainder > 0) {
|
|
416
472
|
uint64_t last = 0;
|
|
417
|
-
const unsigned char *tail =
|
|
473
|
+
const unsigned char *tail = str + aligned_end;
|
|
418
474
|
for (ssize_t i = 0; i < remainder; i++) {
|
|
419
475
|
last |= (uint64_t)tail[i] << (i * 8);
|
|
420
476
|
}
|
|
421
477
|
count += sb_popcount64(last);
|
|
422
478
|
}
|
|
423
479
|
|
|
424
|
-
return
|
|
480
|
+
return count;
|
|
425
481
|
}
|
|
426
482
|
|
|
427
|
-
/*
|
|
483
|
+
/* count_set_bits_range: popcount over [start, start+length) in LSB-first numbering.
|
|
484
|
+
* Handles non-byte-aligned start and length by masking partial first/last bytes. */
|
|
485
|
+
static ssize_t
|
|
486
|
+
count_set_bits_range(const unsigned char *str, ssize_t total_bytes,
|
|
487
|
+
ssize_t start, ssize_t length)
|
|
488
|
+
{
|
|
489
|
+
if (length <= 0) return 0;
|
|
490
|
+
int64_t total_bits = SB_BIT_LEN(total_bytes);
|
|
491
|
+
if (start >= total_bits) return 0;
|
|
492
|
+
if (start + length > total_bits) length = (ssize_t)(total_bits - start);
|
|
428
493
|
|
|
429
|
-
|
|
430
|
-
|
|
494
|
+
ssize_t byte_start = start >> 3;
|
|
495
|
+
int bit_lo = (int)(start & 7);
|
|
496
|
+
ssize_t end_bit = start + length;
|
|
497
|
+
ssize_t last_byte = (end_bit - 1) >> 3;
|
|
498
|
+
int e_bit = (int)(end_bit & 7); /* bits to use in last byte; 0 means full byte */
|
|
499
|
+
|
|
500
|
+
if (byte_start == last_byte) {
|
|
501
|
+
unsigned int b = (unsigned int)str[byte_start] >> bit_lo;
|
|
502
|
+
b &= (1u << (unsigned)length) - 1u;
|
|
503
|
+
return (ssize_t)sb_popcount64(b);
|
|
504
|
+
}
|
|
505
|
+
|
|
506
|
+
ssize_t count = 0;
|
|
507
|
+
if (bit_lo != 0) {
|
|
508
|
+
count += sb_popcount64((unsigned int)str[byte_start] >> bit_lo);
|
|
509
|
+
byte_start++;
|
|
510
|
+
}
|
|
511
|
+
ssize_t full_last = (e_bit == 0) ? last_byte + 1 : last_byte;
|
|
512
|
+
count += count_set_bits(str + byte_start, full_last - byte_start);
|
|
513
|
+
if (e_bit != 0) {
|
|
514
|
+
unsigned int b = (unsigned int)str[last_byte] & ((1u << (unsigned)e_bit) - 1u);
|
|
515
|
+
count += sb_popcount64(b);
|
|
516
|
+
}
|
|
517
|
+
return count;
|
|
518
|
+
}
|
|
519
|
+
|
|
520
|
+
/* count_set_bits_range_msb: same as count_set_bits_range but for MSB-first numbering.
|
|
521
|
+
* In MSB-first, position 0 within a byte is physical bit 7 (the MSB). */
|
|
522
|
+
static ssize_t
|
|
523
|
+
count_set_bits_range_msb(const unsigned char *str, ssize_t total_bytes,
|
|
524
|
+
ssize_t start, ssize_t length)
|
|
431
525
|
{
|
|
432
|
-
|
|
526
|
+
if (length <= 0) return 0;
|
|
527
|
+
int64_t total_bits = SB_BIT_LEN(total_bytes);
|
|
528
|
+
if (start >= total_bits) return 0;
|
|
529
|
+
if (start + length > total_bits) length = (ssize_t)(total_bits - start);
|
|
433
530
|
|
|
434
|
-
|
|
435
|
-
|
|
436
|
-
|
|
531
|
+
ssize_t byte_start = start >> 3;
|
|
532
|
+
int s_bit = (int)(start & 7); /* MSB-first within-byte start index */
|
|
533
|
+
ssize_t end_bit = start + length;
|
|
534
|
+
ssize_t last_byte = (end_bit - 1) >> 3;
|
|
535
|
+
int e_bit = (int)(end_bit & 7); /* bits to use in last byte; 0 means full byte */
|
|
437
536
|
|
|
438
|
-
|
|
439
|
-
|
|
440
|
-
|
|
441
|
-
|
|
442
|
-
|
|
443
|
-
}
|
|
444
|
-
} else {
|
|
445
|
-
for (int j = 7; j >= 0; j--) {
|
|
446
|
-
rb_yield((b >> j) & 1 ? Qtrue : Qfalse);
|
|
447
|
-
}
|
|
448
|
-
}
|
|
537
|
+
if (byte_start == last_byte) {
|
|
538
|
+
/* physical bits (7-s_bit) down to (7-s_bit-length+1) */
|
|
539
|
+
unsigned int b = (unsigned int)str[byte_start] >> (unsigned)(8 - s_bit - (int)length);
|
|
540
|
+
b &= (1u << (unsigned)length) - 1u;
|
|
541
|
+
return (ssize_t)sb_popcount64(b);
|
|
449
542
|
}
|
|
450
543
|
|
|
451
|
-
|
|
544
|
+
ssize_t count = 0;
|
|
545
|
+
/* partial first byte: MSB-first positions s_bit..7 = physical bits 0..(7-s_bit) */
|
|
546
|
+
if (s_bit != 0) {
|
|
547
|
+
unsigned int b = (unsigned int)str[byte_start] & ((1u << (unsigned)(8 - s_bit)) - 1u);
|
|
548
|
+
count += sb_popcount64(b);
|
|
549
|
+
byte_start++;
|
|
550
|
+
}
|
|
551
|
+
ssize_t full_last = (e_bit == 0) ? last_byte + 1 : last_byte;
|
|
552
|
+
count += count_set_bits(str + byte_start, full_last - byte_start);
|
|
553
|
+
/* partial last byte: MSB-first positions 0..(e_bit-1) = physical bits (8-e_bit)..7 */
|
|
554
|
+
if (e_bit != 0) {
|
|
555
|
+
unsigned int b = (unsigned int)str[last_byte] >> (unsigned)(8 - e_bit);
|
|
556
|
+
count += sb_popcount64(b);
|
|
557
|
+
}
|
|
558
|
+
return count;
|
|
452
559
|
}
|
|
453
560
|
|
|
454
561
|
static VALUE
|
|
455
|
-
|
|
562
|
+
rb_str_bit_count(int argc, VALUE *argv, VALUE self)
|
|
456
563
|
{
|
|
457
|
-
int lsb_first = parse_lsb_first(argc, argv);
|
|
458
|
-
ssize_t len = RSTRING_LEN(self);
|
|
459
564
|
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
|
|
460
|
-
ssize_t
|
|
461
|
-
int have_block = rb_block_given_p();
|
|
565
|
+
ssize_t src_len = RSTRING_LEN(self);
|
|
462
566
|
|
|
463
|
-
VALUE
|
|
567
|
+
VALUE v0 = Qnil, v1 = Qnil, opts = Qnil;
|
|
568
|
+
rb_scan_args(argc, argv, "02:", &v0, &v1, &opts);
|
|
569
|
+
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
464
570
|
|
|
465
|
-
|
|
466
|
-
|
|
467
|
-
|
|
468
|
-
|
|
469
|
-
|
|
470
|
-
|
|
471
|
-
|
|
472
|
-
|
|
473
|
-
|
|
474
|
-
|
|
475
|
-
|
|
476
|
-
|
|
477
|
-
|
|
571
|
+
/* No positional args: count the whole string; lsb_first: is ignored (order-independent) */
|
|
572
|
+
if (NIL_P(v0))
|
|
573
|
+
return SSIZET2NUM(count_set_bits(str, src_len));
|
|
574
|
+
|
|
575
|
+
int lsb_first = parse_lsb_first_opt(opts);
|
|
576
|
+
int64_t total_bits = SB_BIT_LEN(src_len);
|
|
577
|
+
ssize_t bit_offset, bit_length;
|
|
578
|
+
|
|
579
|
+
if (rb_obj_is_kind_of(v0, rb_cRange)) {
|
|
580
|
+
if (!NIL_P(v1))
|
|
581
|
+
rb_raise(rb_eArgError, "wrong number of arguments");
|
|
582
|
+
sb_range_validate_endpoints(v0);
|
|
583
|
+
ssize_t beg, len;
|
|
584
|
+
if (!RTEST(sb_range_beg_len(v0, &beg, &len, total_bits, 0)))
|
|
585
|
+
return INT2FIX(0);
|
|
586
|
+
bit_offset = beg;
|
|
587
|
+
bit_length = len;
|
|
588
|
+
}
|
|
589
|
+
else if (!NIL_P(v1)) {
|
|
590
|
+
if (!rb_integer_type_p(v0))
|
|
591
|
+
rb_raise(rb_eTypeError, "bit_offset must be an integer");
|
|
592
|
+
if (!rb_integer_type_p(v1))
|
|
593
|
+
rb_raise(rb_eTypeError, "bit_length must be an integer");
|
|
594
|
+
bit_offset = integer_to_bit_idx(v0);
|
|
595
|
+
if (bit_offset < 0)
|
|
596
|
+
rb_raise(rb_eIndexError, "bit_offset must be non-negative");
|
|
597
|
+
bit_length = integer_to_bit_idx(v1);
|
|
598
|
+
if (bit_length < 0)
|
|
599
|
+
rb_raise(rb_eArgError, "bit_length must be non-negative");
|
|
600
|
+
}
|
|
601
|
+
else {
|
|
602
|
+
rb_raise(rb_eArgError,
|
|
603
|
+
"wrong number of arguments (given 1, expected 0, 1 Range, or 2)");
|
|
478
604
|
}
|
|
479
605
|
|
|
480
|
-
|
|
606
|
+
if (lsb_first)
|
|
607
|
+
return SSIZET2NUM(count_set_bits_range(str, src_len, bit_offset, bit_length));
|
|
608
|
+
else
|
|
609
|
+
return SSIZET2NUM(count_set_bits_range_msb(str, src_len, bit_offset, bit_length));
|
|
481
610
|
}
|
|
482
611
|
|
|
483
|
-
/* iterate
|
|
612
|
+
/* iterate bits ------------------------------------------------------------ */
|
|
484
613
|
|
|
485
|
-
|
|
486
|
-
|
|
614
|
+
/* Unified emitter for each_bit / bits.
|
|
615
|
+
*
|
|
616
|
+
* Yields (when ary == Qnil) or pushes to a pre-allocated Array. lsb_first is
|
|
617
|
+
* hoisted outside the byte loop so the inner walk direction is straight-line
|
|
618
|
+
* code, removing a per-byte branch.
|
|
619
|
+
*/
|
|
620
|
+
static void
|
|
621
|
+
emit_bits(const unsigned char *str, ssize_t len, int lsb_first, ssize_t start_offset, VALUE ary)
|
|
487
622
|
{
|
|
488
|
-
|
|
623
|
+
if (start_offset >= SB_BIT_LEN(len)) return;
|
|
624
|
+
|
|
625
|
+
#define SB_EMIT(v) \
|
|
626
|
+
do { VALUE _b = (v); \
|
|
627
|
+
if (ary == Qnil) rb_yield(_b); else rb_ary_push(ary, _b); } while (0)
|
|
628
|
+
|
|
629
|
+
ssize_t byte_start = start_offset >> 3;
|
|
630
|
+
int bit_start = (int)(start_offset & 7);
|
|
489
631
|
|
|
490
|
-
int lsb_first = parse_lsb_first(argc, argv);
|
|
491
|
-
ssize_t len = RSTRING_LEN(self);
|
|
492
|
-
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
|
|
493
632
|
if (lsb_first) {
|
|
494
|
-
|
|
495
|
-
|
|
496
|
-
|
|
497
|
-
|
|
498
|
-
|
|
499
|
-
ssize_t n_words = len >> 3;
|
|
500
|
-
for (ssize_t wi = 0; wi < n_words; wi++) {
|
|
501
|
-
uint64_t w;
|
|
502
|
-
memcpy(&w, str + wi * 8, 8);
|
|
503
|
-
while (w != 0) {
|
|
504
|
-
int bit = sb_ctzll(w);
|
|
505
|
-
rb_yield(SSIZET2NUM(wi * 64 + bit));
|
|
506
|
-
w &= w - 1;
|
|
633
|
+
for (ssize_t i = byte_start; i < len; i++) {
|
|
634
|
+
unsigned char b = str[i];
|
|
635
|
+
int j_start = (i == byte_start) ? bit_start : 0;
|
|
636
|
+
for (int j = j_start; j < 8; j++) {
|
|
637
|
+
SB_EMIT((b >> j) & 1 ? Qtrue : Qfalse);
|
|
507
638
|
}
|
|
508
639
|
}
|
|
509
|
-
|
|
510
|
-
|
|
511
|
-
|
|
512
|
-
|
|
513
|
-
|
|
514
|
-
b
|
|
515
|
-
}
|
|
516
|
-
}
|
|
517
|
-
#else
|
|
518
|
-
for (ssize_t bi = 0; bi < len; bi++) {
|
|
519
|
-
unsigned int b = str[bi];
|
|
520
|
-
while (b != 0) {
|
|
521
|
-
int bit = sb_ctz8(b);
|
|
522
|
-
rb_yield(SSIZET2NUM(bi * 8 + bit));
|
|
523
|
-
b &= b - 1;
|
|
524
|
-
}
|
|
525
|
-
}
|
|
526
|
-
#endif
|
|
527
|
-
}
|
|
528
|
-
else {
|
|
529
|
-
/* lsb_first: false => byte order preserved, bits 7..0 map to logical 0..7 */
|
|
530
|
-
for (ssize_t bi = 0; bi < len; bi++) {
|
|
531
|
-
unsigned int b = str[bi];
|
|
532
|
-
while (b != 0) {
|
|
533
|
-
int bit = sb_highest_bit8(b);
|
|
534
|
-
ssize_t physical = bi * 8 + bit;
|
|
535
|
-
rb_yield(SSIZET2NUM(physical_to_count_from(physical, 0)));
|
|
536
|
-
b ^= (1u << bit); /* clear highest set bit */
|
|
640
|
+
} else {
|
|
641
|
+
for (ssize_t i = byte_start; i < len; i++) {
|
|
642
|
+
unsigned char b = str[i];
|
|
643
|
+
int j_end = (i == byte_start) ? (7 - bit_start) : 7;
|
|
644
|
+
for (int j = j_end; j >= 0; j--) {
|
|
645
|
+
SB_EMIT((b >> j) & 1 ? Qtrue : Qfalse);
|
|
537
646
|
}
|
|
538
647
|
}
|
|
539
648
|
}
|
|
540
649
|
|
|
650
|
+
#undef SB_EMIT
|
|
651
|
+
}
|
|
652
|
+
|
|
653
|
+
static VALUE
|
|
654
|
+
rb_str_each_bit(int argc, VALUE *argv, VALUE self)
|
|
655
|
+
{
|
|
656
|
+
RETURN_ENUMERATOR(self, argc, argv);
|
|
657
|
+
|
|
658
|
+
VALUE start_offset_v = Qnil, opts = Qnil;
|
|
659
|
+
rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
|
|
660
|
+
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
661
|
+
int lsb_first = parse_lsb_first_opt(opts);
|
|
662
|
+
ssize_t start_offset = parse_start_offset(start_offset_v);
|
|
663
|
+
|
|
664
|
+
emit_bits((const unsigned char *)RSTRING_PTR(self), RSTRING_LEN(self),
|
|
665
|
+
lsb_first, start_offset, Qnil);
|
|
541
666
|
return self;
|
|
542
667
|
}
|
|
543
668
|
|
|
544
669
|
static VALUE
|
|
545
|
-
|
|
670
|
+
rb_str_bits(int argc, VALUE *argv, VALUE self)
|
|
546
671
|
{
|
|
547
|
-
|
|
672
|
+
VALUE start_offset_v = Qnil, opts = Qnil;
|
|
673
|
+
rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
|
|
674
|
+
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
675
|
+
int lsb_first = parse_lsb_first_opt(opts);
|
|
676
|
+
ssize_t start_offset = parse_start_offset(start_offset_v);
|
|
548
677
|
ssize_t len = RSTRING_LEN(self);
|
|
549
678
|
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
|
|
550
|
-
int have_block = rb_block_given_p();
|
|
551
679
|
|
|
552
|
-
|
|
553
|
-
|
|
554
|
-
|
|
555
|
-
}
|
|
556
|
-
else {
|
|
557
|
-
/* Pre-size the Array with popcount to avoid repeated reallocation.
|
|
558
|
-
* memcpy avoids unaligned-load issues on strict-alignment platforms. */
|
|
559
|
-
ssize_t count = 0;
|
|
560
|
-
ssize_t nw = len >> 3;
|
|
561
|
-
for (ssize_t wi = 0; wi < nw; wi++) {
|
|
562
|
-
uint64_t w;
|
|
563
|
-
memcpy(&w, str + wi * 8, 8);
|
|
564
|
-
count += sb_popcount64(w);
|
|
565
|
-
}
|
|
566
|
-
for (ssize_t bi = nw << 3; bi < len; bi++)
|
|
567
|
-
count += sb_popcount64((uint64_t)(unsigned char)str[bi]);
|
|
568
|
-
ary = rb_ary_new_capa(count);
|
|
680
|
+
if (rb_block_given_p()) {
|
|
681
|
+
emit_bits(str, len, lsb_first, start_offset, Qnil);
|
|
682
|
+
return self;
|
|
569
683
|
}
|
|
570
684
|
|
|
685
|
+
int64_t total_bits = SB_BIT_LEN(len);
|
|
686
|
+
ssize_t nbits = (start_offset >= total_bits) ? 0 : (ssize_t)(total_bits - start_offset);
|
|
687
|
+
VALUE ary = rb_ary_new_capa(nbits);
|
|
688
|
+
emit_bits(str, len, lsb_first, start_offset, ary);
|
|
689
|
+
return ary;
|
|
690
|
+
}
|
|
691
|
+
|
|
692
|
+
/* iterate bit positions matching `bit` ------------------------------------ */
|
|
693
|
+
|
|
694
|
+
/* parse the required `bit` argument (true/false/1/0) to 0 or 1 */
|
|
695
|
+
static int
|
|
696
|
+
parse_bit_target(VALUE bit_val)
|
|
697
|
+
{
|
|
698
|
+
if (bit_val == Qtrue || bit_val == INT2FIX(1)) return 1;
|
|
699
|
+
if (bit_val == Qfalse || bit_val == INT2FIX(0)) return 0;
|
|
700
|
+
rb_raise(rb_eArgError, "bit must be 0, 1, false, or true");
|
|
701
|
+
UNREACHABLE_RETURN(0);
|
|
702
|
+
}
|
|
703
|
+
|
|
704
|
+
/* Unified scanner for each_bit_offset / bit_offsets.
|
|
705
|
+
*
|
|
706
|
+
* Emit each bit position equal to `target` either by yielding to the block
|
|
707
|
+
* (when ary == Qnil) or by pushing to the pre-allocated Array. Both call
|
|
708
|
+
* paths share the same hot loops; the only per-emit cost is one branch on
|
|
709
|
+
* (ary == Qnil), which the compiler can lift out of the inner while loop.
|
|
710
|
+
*
|
|
711
|
+
* LSB-first path: on little-endian, an 8-byte memcpy preserves the flat
|
|
712
|
+
* LSB-first bit numbering (word bit 0 == position 0), so we can scan 64 bits
|
|
713
|
+
* per ctzll. For target=0, invert the loaded word/byte; all 8/64 bits of the
|
|
714
|
+
* inverted unit are valid positions since each byte contributes exactly 8.
|
|
715
|
+
*
|
|
716
|
+
* MSB-first path: walk byte-by-byte with sb_highest_bit8, mapping each
|
|
717
|
+
* physical (LSB-first) bit position into the MSB-first count via
|
|
718
|
+
* logical_to_physical (the operation is its own inverse).
|
|
719
|
+
*/
|
|
720
|
+
static void
|
|
721
|
+
emit_bit_offsets(const unsigned char *str, ssize_t len, int target, int lsb_first,
|
|
722
|
+
ssize_t start_offset, VALUE ary)
|
|
723
|
+
{
|
|
724
|
+
if (start_offset >= SB_BIT_LEN(len)) return;
|
|
725
|
+
|
|
726
|
+
#define SB_EMIT(pos_val) \
|
|
727
|
+
do { VALUE _p = (pos_val); \
|
|
728
|
+
if (ary == Qnil) rb_yield(_p); else rb_ary_push(ary, _p); } while (0)
|
|
729
|
+
|
|
730
|
+
ssize_t byte_start = start_offset >> 3;
|
|
731
|
+
int bit_lo = (int)(start_offset & 7);
|
|
732
|
+
|
|
571
733
|
if (lsb_first) {
|
|
734
|
+
/* Handle the partial first byte before aligning to byte boundary */
|
|
735
|
+
if (bit_lo != 0) {
|
|
736
|
+
unsigned int b = str[byte_start];
|
|
737
|
+
if (target == 0) b = (~b) & 0xFF;
|
|
738
|
+
b >>= bit_lo;
|
|
739
|
+
while (b != 0) {
|
|
740
|
+
int bit = sb_ctz8(b);
|
|
741
|
+
SB_EMIT(SSIZET2NUM(byte_start * 8 + bit_lo + bit));
|
|
742
|
+
b &= b - 1;
|
|
743
|
+
}
|
|
744
|
+
byte_start++;
|
|
745
|
+
}
|
|
572
746
|
#if SB_LITTLE_ENDIAN
|
|
573
|
-
ssize_t n_words = len >> 3;
|
|
747
|
+
ssize_t n_words = (len - byte_start) >> 3;
|
|
574
748
|
for (ssize_t wi = 0; wi < n_words; wi++) {
|
|
575
749
|
uint64_t w;
|
|
576
|
-
memcpy(&w, str + wi * 8, 8);
|
|
750
|
+
memcpy(&w, str + byte_start + wi * 8, 8);
|
|
751
|
+
if (target == 0) w = ~w;
|
|
577
752
|
while (w != 0) {
|
|
578
753
|
int bit = sb_ctzll(w);
|
|
579
|
-
|
|
580
|
-
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
|
|
754
|
+
SB_EMIT(SSIZET2NUM((byte_start + wi * 8) * 8 + bit));
|
|
581
755
|
w &= w - 1;
|
|
582
756
|
}
|
|
583
757
|
}
|
|
584
|
-
for (ssize_t bi = n_words << 3; bi < len; bi++) {
|
|
758
|
+
for (ssize_t bi = byte_start + (n_words << 3); bi < len; bi++) {
|
|
585
759
|
unsigned int b = str[bi];
|
|
760
|
+
if (target == 0) b = (~b) & 0xFF;
|
|
586
761
|
while (b != 0) {
|
|
587
762
|
int bit = sb_ctz8(b);
|
|
588
|
-
|
|
589
|
-
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
|
|
763
|
+
SB_EMIT(SSIZET2NUM(bi * 8 + bit));
|
|
590
764
|
b &= b - 1;
|
|
591
765
|
}
|
|
592
766
|
}
|
|
593
767
|
#else
|
|
594
|
-
for (ssize_t bi =
|
|
768
|
+
for (ssize_t bi = byte_start; bi < len; bi++) {
|
|
595
769
|
unsigned int b = str[bi];
|
|
770
|
+
if (target == 0) b = (~b) & 0xFF;
|
|
596
771
|
while (b != 0) {
|
|
597
772
|
int bit = sb_ctz8(b);
|
|
598
|
-
|
|
599
|
-
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
|
|
773
|
+
SB_EMIT(SSIZET2NUM(bi * 8 + bit));
|
|
600
774
|
b &= b - 1;
|
|
601
775
|
}
|
|
602
776
|
}
|
|
603
777
|
#endif
|
|
604
778
|
}
|
|
605
779
|
else {
|
|
606
|
-
|
|
780
|
+
/* lsb_first: false => byte order preserved, bits 7..0 map to logical 0..7.
|
|
781
|
+
* In the first (possibly partial) byte, skip the top bit_lo bits. */
|
|
782
|
+
for (ssize_t bi = byte_start; bi < len; bi++) {
|
|
607
783
|
unsigned int b = str[bi];
|
|
784
|
+
if (target == 0) b = (~b) & 0xFF;
|
|
785
|
+
if (bi == byte_start && bit_lo != 0)
|
|
786
|
+
b &= (1u << (8 - bit_lo)) - 1; /* clear top bit_lo bits */
|
|
608
787
|
while (b != 0) {
|
|
609
788
|
int bit = sb_highest_bit8(b);
|
|
610
789
|
ssize_t physical = bi * 8 + bit;
|
|
611
|
-
|
|
612
|
-
have_block ? rb_yield(pos) : rb_ary_push(ary, pos);
|
|
790
|
+
SB_EMIT(SSIZET2NUM(logical_to_physical(physical, 0)));
|
|
613
791
|
b ^= (1u << bit);
|
|
614
792
|
}
|
|
615
793
|
}
|
|
616
794
|
}
|
|
617
795
|
|
|
618
|
-
|
|
796
|
+
#undef SB_EMIT
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
static VALUE
|
|
800
|
+
rb_str_each_bit_offset(int argc, VALUE *argv, VALUE self)
|
|
801
|
+
{
|
|
802
|
+
RETURN_ENUMERATOR(self, argc, argv);
|
|
803
|
+
|
|
804
|
+
VALUE bit_val, start_offset_v = Qnil, opts = Qnil;
|
|
805
|
+
rb_scan_args(argc, argv, "11:", &bit_val, &start_offset_v, &opts);
|
|
806
|
+
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
807
|
+
int lsb_first = parse_lsb_first_opt(opts);
|
|
808
|
+
int target = parse_bit_target(bit_val);
|
|
809
|
+
ssize_t start_offset = parse_start_offset(start_offset_v);
|
|
810
|
+
|
|
811
|
+
emit_bit_offsets((const unsigned char *)RSTRING_PTR(self), RSTRING_LEN(self),
|
|
812
|
+
target, lsb_first, start_offset, Qnil);
|
|
813
|
+
return self;
|
|
814
|
+
}
|
|
815
|
+
|
|
816
|
+
static VALUE
|
|
817
|
+
rb_str_bit_offsets(int argc, VALUE *argv, VALUE self)
|
|
818
|
+
{
|
|
819
|
+
VALUE bit_val, start_offset_v = Qnil, opts = Qnil;
|
|
820
|
+
rb_scan_args(argc, argv, "11:", &bit_val, &start_offset_v, &opts);
|
|
821
|
+
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
822
|
+
int lsb_first = parse_lsb_first_opt(opts);
|
|
823
|
+
int target = parse_bit_target(bit_val);
|
|
824
|
+
ssize_t start_offset = parse_start_offset(start_offset_v);
|
|
825
|
+
|
|
826
|
+
ssize_t len = RSTRING_LEN(self);
|
|
827
|
+
const unsigned char *str = (const unsigned char *)RSTRING_PTR(self);
|
|
828
|
+
|
|
829
|
+
if (rb_block_given_p()) {
|
|
830
|
+
emit_bit_offsets(str, len, target, lsb_first, start_offset, Qnil);
|
|
831
|
+
return self;
|
|
832
|
+
}
|
|
833
|
+
|
|
834
|
+
/* Pre-size the Array using popcount to avoid repeated reallocation.
|
|
835
|
+
* For target=0 the expected count is (len * 8 - popcount). */
|
|
836
|
+
ssize_t set_count = count_set_bits(str, len);
|
|
837
|
+
ssize_t count = (target == 1) ? set_count : (ssize_t)(SB_BIT_LEN(len) - set_count);
|
|
838
|
+
VALUE ary = rb_ary_new_capa(count);
|
|
839
|
+
emit_bit_offsets(str, len, target, lsb_first, start_offset, ary);
|
|
840
|
+
return ary;
|
|
619
841
|
}
|
|
620
842
|
|
|
621
843
|
/* multi-bit mutation ------------------------------------------------------ */
|
|
@@ -712,40 +934,36 @@ bit_copy_core(unsigned char *dst, ssize_t dst_bit_off,
|
|
|
712
934
|
if (tmp != stack_tmp) ruby_xfree(tmp);
|
|
713
935
|
}
|
|
714
936
|
|
|
715
|
-
/*
|
|
716
|
-
* String#bit_slice(range) -> String
|
|
717
|
-
*
|
|
718
|
-
* str = "\xFF\x00" # 11111111 00000000
|
|
719
|
-
* str.bit_slice(4, 8) # => "\xF0" (11110000)
|
|
720
|
-
*/
|
|
937
|
+
/* Extract a sub-sequence of bits into a new String. */
|
|
721
938
|
static VALUE
|
|
722
939
|
rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
|
|
723
940
|
{
|
|
724
941
|
ssize_t src_len = RSTRING_LEN(self);
|
|
725
|
-
|
|
726
|
-
ssize_t
|
|
942
|
+
int64_t total_bits = SB_BIT_LEN(src_len);
|
|
943
|
+
ssize_t bit_offset, bit_length;
|
|
727
944
|
VALUE v0, v1, opts;
|
|
728
945
|
int n_pos = rb_scan_args(argc, argv, "11:", &v0, &v1, &opts);
|
|
729
946
|
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
730
947
|
int lsb_first = parse_lsb_first_opt(opts);
|
|
731
948
|
|
|
732
949
|
if (n_pos == 1 && rb_obj_is_kind_of(v0, rb_cRange)) {
|
|
950
|
+
sb_range_validate_endpoints(v0);
|
|
733
951
|
ssize_t beg, len;
|
|
734
952
|
if (!RTEST(sb_range_beg_len(v0, &beg, &len, total_bits, 0))) {
|
|
735
953
|
return Qnil;
|
|
736
954
|
}
|
|
737
|
-
|
|
738
|
-
|
|
955
|
+
bit_offset = beg;
|
|
956
|
+
bit_length = len;
|
|
739
957
|
}
|
|
740
958
|
else if (n_pos == 2) {
|
|
741
959
|
if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1)) {
|
|
742
960
|
return Qnil;
|
|
743
961
|
}
|
|
744
962
|
|
|
745
|
-
|
|
746
|
-
|
|
963
|
+
bit_offset = integer_to_bit_idx(v0);
|
|
964
|
+
bit_length = integer_to_bit_idx(v1);
|
|
747
965
|
|
|
748
|
-
if (
|
|
966
|
+
if (bit_offset < 0 || bit_length < 0) return Qnil;
|
|
749
967
|
}
|
|
750
968
|
else if (n_pos == 1) {
|
|
751
969
|
return Qnil;
|
|
@@ -755,13 +973,13 @@ rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
|
|
|
755
973
|
"wrong number of arguments (given %d, expected 1 or 2)", n_pos);
|
|
756
974
|
}
|
|
757
975
|
|
|
758
|
-
if (
|
|
759
|
-
|
|
760
|
-
if (
|
|
976
|
+
if (bit_offset > total_bits) return Qnil;
|
|
977
|
+
int64_t available = total_bits - bit_offset;
|
|
978
|
+
if (bit_length > available) bit_length = (ssize_t)available;
|
|
761
979
|
|
|
762
|
-
if (
|
|
980
|
+
if (bit_length == 0) return rb_str_new("", 0);
|
|
763
981
|
|
|
764
|
-
ssize_t out_bytes = (
|
|
982
|
+
ssize_t out_bytes = (bit_length + 7) / 8;
|
|
765
983
|
VALUE result = rb_str_buf_new(out_bytes);
|
|
766
984
|
rb_str_resize(result, out_bytes);
|
|
767
985
|
rb_enc_associate(result, rb_enc_get(self));
|
|
@@ -771,17 +989,17 @@ rb_str_bit_slice(int argc, VALUE *argv, VALUE self)
|
|
|
771
989
|
memset(dst, 0, out_bytes);
|
|
772
990
|
|
|
773
991
|
if (lsb_first) {
|
|
774
|
-
bit_copy_core(dst, 0, src, src_len,
|
|
992
|
+
bit_copy_core(dst, 0, src, src_len, bit_offset, bit_length);
|
|
775
993
|
} else {
|
|
776
994
|
ssize_t dst_bit = 0;
|
|
777
|
-
ssize_t start_byte =
|
|
778
|
-
ssize_t end_byte = (
|
|
995
|
+
ssize_t start_byte = bit_offset >> 3;
|
|
996
|
+
ssize_t end_byte = (bit_offset + bit_length - 1) >> 3;
|
|
779
997
|
|
|
780
998
|
for (ssize_t b = start_byte; b <= end_byte; b++) {
|
|
781
999
|
ssize_t b_start_l = b << 3;
|
|
782
1000
|
ssize_t b_end_l = b_start_l + 7;
|
|
783
|
-
ssize_t l_min = (
|
|
784
|
-
ssize_t l_max = ((
|
|
1001
|
+
ssize_t l_min = (bit_offset > b_start_l) ? bit_offset : b_start_l;
|
|
1002
|
+
ssize_t l_max = ((bit_offset + bit_length - 1) < b_end_l) ? (bit_offset + bit_length - 1) : b_end_l;
|
|
785
1003
|
|
|
786
1004
|
ssize_t p_min = b_start_l + (7 - (l_max & 7L));
|
|
787
1005
|
ssize_t p_max = b_start_l + (7 - (l_min & 7L));
|
|
@@ -805,8 +1023,8 @@ enum sb_mutation_op {
|
|
|
805
1023
|
static VALUE
|
|
806
1024
|
rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
|
|
807
1025
|
{
|
|
808
|
-
VALUE target, opts;
|
|
809
|
-
rb_scan_args(argc, argv, "
|
|
1026
|
+
VALUE target, bit_length_v = Qnil, opts = Qnil;
|
|
1027
|
+
rb_scan_args(argc, argv, "11:", &target, &bit_length_v, &opts);
|
|
810
1028
|
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
811
1029
|
int lsb_first = parse_lsb_first_opt(opts);
|
|
812
1030
|
|
|
@@ -814,18 +1032,48 @@ rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
|
|
|
814
1032
|
unsigned char *ptr = (unsigned char *)RSTRING_PTR(self);
|
|
815
1033
|
|
|
816
1034
|
if (rb_integer_type_p(target)) {
|
|
817
|
-
|
|
818
|
-
|
|
819
|
-
|
|
820
|
-
|
|
821
|
-
|
|
822
|
-
|
|
1035
|
+
if (NIL_P(bit_length_v)) {
|
|
1036
|
+
/* Single-bit form: bit_set(n) */
|
|
1037
|
+
ssize_t idx = check_bit_index(self, target, lsb_first);
|
|
1038
|
+
unsigned char mask = (unsigned char)(1u << (idx % 8));
|
|
1039
|
+
switch (op) {
|
|
1040
|
+
case SB_MUT_SET: ptr[idx / 8] |= mask; break;
|
|
1041
|
+
case SB_MUT_CLEAR: ptr[idx / 8] &= (unsigned char)~mask; break;
|
|
1042
|
+
case SB_MUT_FLIP: ptr[idx / 8] ^= mask; break;
|
|
1043
|
+
}
|
|
1044
|
+
return self;
|
|
1045
|
+
}
|
|
1046
|
+
/* 2-arg form: bit_set(bit_offset, bit_length) */
|
|
1047
|
+
if (!rb_integer_type_p(bit_length_v))
|
|
1048
|
+
rb_raise(rb_eTypeError, "bit_length must be an integer");
|
|
1049
|
+
ssize_t bit_offset = integer_to_bit_idx(target);
|
|
1050
|
+
if (bit_offset < 0)
|
|
1051
|
+
rb_raise(rb_eIndexError, "bit_offset must be non-negative");
|
|
1052
|
+
ssize_t bit_length = integer_to_bit_idx(bit_length_v);
|
|
1053
|
+
if (bit_length < 0)
|
|
1054
|
+
rb_raise(rb_eArgError, "bit_length must be non-negative");
|
|
1055
|
+
if (bit_length == 0) return self;
|
|
1056
|
+
int64_t total_bits = SB_BIT_LEN(RSTRING_LEN(self));
|
|
1057
|
+
if (bit_offset >= total_bits || bit_offset + bit_length > total_bits)
|
|
1058
|
+
rb_raise(rb_eIndexError, "bit range out of range");
|
|
1059
|
+
for (ssize_t logical = bit_offset; logical < bit_offset + bit_length; logical++) {
|
|
1060
|
+
ssize_t idx = logical_to_physical(logical, lsb_first);
|
|
1061
|
+
unsigned char mask = (unsigned char)(1u << (idx % 8));
|
|
1062
|
+
switch (op) {
|
|
1063
|
+
case SB_MUT_SET: ptr[idx / 8] |= mask; break;
|
|
1064
|
+
case SB_MUT_CLEAR: ptr[idx / 8] &= (unsigned char)~mask; break;
|
|
1065
|
+
case SB_MUT_FLIP: ptr[idx / 8] ^= mask; break;
|
|
1066
|
+
}
|
|
823
1067
|
}
|
|
824
1068
|
return self;
|
|
825
1069
|
}
|
|
826
1070
|
|
|
1071
|
+
if (!NIL_P(bit_length_v))
|
|
1072
|
+
rb_raise(rb_eArgError, "wrong number of arguments");
|
|
1073
|
+
|
|
827
1074
|
if (rb_obj_is_kind_of(target, rb_cRange)) {
|
|
828
|
-
|
|
1075
|
+
sb_range_validate_endpoints(target);
|
|
1076
|
+
int64_t total_bits = SB_BIT_LEN(RSTRING_LEN(self));
|
|
829
1077
|
ssize_t beg, len;
|
|
830
1078
|
|
|
831
1079
|
/* err=0 returns Qnil for out-of-range begin (after negative normalization);
|
|
@@ -836,19 +1084,20 @@ rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
|
|
|
836
1084
|
|
|
837
1085
|
/* err=0 silently clamps end > total. Detect that and raise instead,
|
|
838
1086
|
* to stay consistent with bit_splice and single-bit mutation. */
|
|
839
|
-
VALUE
|
|
840
|
-
|
|
841
|
-
|
|
842
|
-
|
|
843
|
-
|
|
844
|
-
ssize_t
|
|
1087
|
+
VALUE rng_beg_unused, rng_end_v;
|
|
1088
|
+
int excl;
|
|
1089
|
+
rb_range_values(target, &rng_beg_unused, &rng_end_v, &excl);
|
|
1090
|
+
(void)rng_beg_unused;
|
|
1091
|
+
if (!NIL_P(rng_end_v)) {
|
|
1092
|
+
ssize_t end_val = integer_to_bit_idx(rng_end_v);
|
|
1093
|
+
ssize_t end_excl = excl ? end_val : end_val + 1;
|
|
845
1094
|
if (end_excl > total_bits) {
|
|
846
1095
|
rb_raise(rb_eIndexError, "bit range out of range");
|
|
847
1096
|
}
|
|
848
1097
|
}
|
|
849
1098
|
|
|
850
1099
|
for (ssize_t logical = beg; logical < beg + len; logical++) {
|
|
851
|
-
ssize_t idx =
|
|
1100
|
+
ssize_t idx = logical_to_physical(logical, lsb_first);
|
|
852
1101
|
unsigned char mask = (unsigned char)(1u << (idx % 8));
|
|
853
1102
|
switch (op) {
|
|
854
1103
|
case SB_MUT_SET: ptr[idx / 8] |= mask; break;
|
|
@@ -864,19 +1113,19 @@ rb_str_mutate_bits(int argc, VALUE *argv, VALUE self, enum sb_mutation_op op)
|
|
|
864
1113
|
}
|
|
865
1114
|
|
|
866
1115
|
static VALUE
|
|
867
|
-
|
|
1116
|
+
rb_str_bit_set(int argc, VALUE *argv, VALUE self)
|
|
868
1117
|
{
|
|
869
1118
|
return rb_str_mutate_bits(argc, argv, self, SB_MUT_SET);
|
|
870
1119
|
}
|
|
871
1120
|
|
|
872
1121
|
static VALUE
|
|
873
|
-
|
|
1122
|
+
rb_str_bit_clear(int argc, VALUE *argv, VALUE self)
|
|
874
1123
|
{
|
|
875
1124
|
return rb_str_mutate_bits(argc, argv, self, SB_MUT_CLEAR);
|
|
876
1125
|
}
|
|
877
1126
|
|
|
878
1127
|
static VALUE
|
|
879
|
-
|
|
1128
|
+
rb_str_bit_flip(int argc, VALUE *argv, VALUE self)
|
|
880
1129
|
{
|
|
881
1130
|
return rb_str_mutate_bits(argc, argv, self, SB_MUT_FLIP);
|
|
882
1131
|
}
|
|
@@ -902,101 +1151,141 @@ alloc_result(VALUE self)
|
|
|
902
1151
|
return result;
|
|
903
1152
|
}
|
|
904
1153
|
|
|
905
|
-
|
|
906
|
-
|
|
907
|
-
|
|
908
|
-
|
|
909
|
-
|
|
910
|
-
|
|
911
|
-
|
|
912
|
-
|
|
913
|
-
|
|
914
|
-
|
|
915
|
-
|
|
916
|
-
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
|
|
924
|
-
|
|
925
|
-
|
|
926
|
-
|
|
927
|
-
|
|
928
|
-
|
|
929
|
-
|
|
930
|
-
|
|
931
|
-
|
|
932
|
-
|
|
933
|
-
|
|
934
|
-
|
|
935
|
-
|
|
936
|
-
|
|
937
|
-
|
|
938
|
-
|
|
939
|
-
|
|
940
|
-
|
|
941
|
-
|
|
942
|
-
|
|
943
|
-
|
|
944
|
-
|
|
945
|
-
|
|
946
|
-
|
|
947
|
-
|
|
948
|
-
|
|
949
|
-
|
|
950
|
-
|
|
951
|
-
|
|
952
|
-
|
|
953
|
-
{
|
|
954
|
-
|
|
955
|
-
|
|
956
|
-
|
|
957
|
-
|
|
958
|
-
|
|
959
|
-
|
|
960
|
-
|
|
961
|
-
|
|
962
|
-
|
|
963
|
-
|
|
964
|
-
|
|
965
|
-
|
|
966
|
-
|
|
967
|
-
|
|
968
|
-
|
|
969
|
-
|
|
970
|
-
|
|
971
|
-
|
|
972
|
-
|
|
973
|
-
|
|
974
|
-
|
|
975
|
-
|
|
976
|
-
|
|
977
|
-
|
|
978
|
-
|
|
979
|
-
|
|
980
|
-
|
|
981
|
-
|
|
982
|
-
|
|
983
|
-
|
|
984
|
-
|
|
985
|
-
|
|
986
|
-
|
|
987
|
-
|
|
988
|
-
|
|
989
|
-
|
|
990
|
-
|
|
991
|
-
|
|
992
|
-
|
|
993
|
-
|
|
994
|
-
|
|
995
|
-
|
|
996
|
-
|
|
997
|
-
|
|
998
|
-
|
|
999
|
-
|
|
1154
|
+
/*
|
|
1155
|
+
* Bitwise op kernels: process 32 bytes (4 x uint64_t) per loop iteration via
|
|
1156
|
+
* memcpy + word-wise op + memcpy, then any 8-byte tail, then byte-by-byte for
|
|
1157
|
+
* the final < 8 bytes. memcpy avoids unaligned-load/store issues on strict-
|
|
1158
|
+
* alignment platforms; modern compilers fold each 8-byte memcpy into a single
|
|
1159
|
+
* load/store. Macro-generated to avoid 8 near-identical functions.
|
|
1160
|
+
*
|
|
1161
|
+
* NOT operands take only `src`; binary AND/OR/XOR take `a` and `b`.
|
|
1162
|
+
*/
|
|
1163
|
+
#define SB_DEFINE_UNARY_KERNEL(name, expr_word, expr_byte) \
|
|
1164
|
+
static void \
|
|
1165
|
+
name(unsigned char *dst, const unsigned char *src, ssize_t len) \
|
|
1166
|
+
{ \
|
|
1167
|
+
ssize_t off = 0; \
|
|
1168
|
+
ssize_t unrolled_end = len & ~31L; \
|
|
1169
|
+
ssize_t aligned_end = len & ~7L; \
|
|
1170
|
+
for (; off < unrolled_end; off += 32) { \
|
|
1171
|
+
uint64_t s0, s1, s2, s3; \
|
|
1172
|
+
memcpy(&s0, src + off, 8); \
|
|
1173
|
+
memcpy(&s1, src + off + 8, 8); \
|
|
1174
|
+
memcpy(&s2, src + off + 16, 8); \
|
|
1175
|
+
memcpy(&s3, src + off + 24, 8); \
|
|
1176
|
+
uint64_t d0 = (expr_word(s0)); \
|
|
1177
|
+
uint64_t d1 = (expr_word(s1)); \
|
|
1178
|
+
uint64_t d2 = (expr_word(s2)); \
|
|
1179
|
+
uint64_t d3 = (expr_word(s3)); \
|
|
1180
|
+
memcpy(dst + off, &d0, 8); \
|
|
1181
|
+
memcpy(dst + off + 8, &d1, 8); \
|
|
1182
|
+
memcpy(dst + off + 16, &d2, 8); \
|
|
1183
|
+
memcpy(dst + off + 24, &d3, 8); \
|
|
1184
|
+
} \
|
|
1185
|
+
for (; off < aligned_end; off += 8) { \
|
|
1186
|
+
uint64_t s; \
|
|
1187
|
+
memcpy(&s, src + off, 8); \
|
|
1188
|
+
uint64_t d = (expr_word(s)); \
|
|
1189
|
+
memcpy(dst + off, &d, 8); \
|
|
1190
|
+
} \
|
|
1191
|
+
for (; off < len; off++) dst[off] = (expr_byte(src[off])); \
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
#define SB_DEFINE_BINARY_KERNEL(name, expr_word, expr_byte) \
|
|
1195
|
+
static void \
|
|
1196
|
+
name(unsigned char *dst, const unsigned char *a, const unsigned char *b, \
|
|
1197
|
+
ssize_t len) \
|
|
1198
|
+
{ \
|
|
1199
|
+
ssize_t off = 0; \
|
|
1200
|
+
ssize_t unrolled_end = len & ~31L; \
|
|
1201
|
+
ssize_t aligned_end = len & ~7L; \
|
|
1202
|
+
for (; off < unrolled_end; off += 32) { \
|
|
1203
|
+
uint64_t a0, a1, a2, a3, b0, b1, b2, b3; \
|
|
1204
|
+
memcpy(&a0, a + off, 8); memcpy(&b0, b + off, 8); \
|
|
1205
|
+
memcpy(&a1, a + off + 8, 8); memcpy(&b1, b + off + 8, 8); \
|
|
1206
|
+
memcpy(&a2, a + off + 16, 8); memcpy(&b2, b + off + 16, 8); \
|
|
1207
|
+
memcpy(&a3, a + off + 24, 8); memcpy(&b3, b + off + 24, 8); \
|
|
1208
|
+
uint64_t d0 = expr_word(a0, b0); \
|
|
1209
|
+
uint64_t d1 = expr_word(a1, b1); \
|
|
1210
|
+
uint64_t d2 = expr_word(a2, b2); \
|
|
1211
|
+
uint64_t d3 = expr_word(a3, b3); \
|
|
1212
|
+
memcpy(dst + off, &d0, 8); \
|
|
1213
|
+
memcpy(dst + off + 8, &d1, 8); \
|
|
1214
|
+
memcpy(dst + off + 16, &d2, 8); \
|
|
1215
|
+
memcpy(dst + off + 24, &d3, 8); \
|
|
1216
|
+
} \
|
|
1217
|
+
for (; off < aligned_end; off += 8) { \
|
|
1218
|
+
uint64_t av, bv; \
|
|
1219
|
+
memcpy(&av, a + off, 8); memcpy(&bv, b + off, 8); \
|
|
1220
|
+
uint64_t d = expr_word(av, bv); \
|
|
1221
|
+
memcpy(dst + off, &d, 8); \
|
|
1222
|
+
} \
|
|
1223
|
+
for (; off < len; off++) dst[off] = expr_byte(a[off], b[off]); \
|
|
1224
|
+
}
|
|
1225
|
+
|
|
1226
|
+
#define SB_NOT_WORD(x) (~(x))
|
|
1227
|
+
#define SB_NOT_BYTE(x) ((unsigned char)~(x))
|
|
1228
|
+
#define SB_AND_WORD(x, y) ((x) & (y))
|
|
1229
|
+
#define SB_AND_BYTE(x, y) ((unsigned char)((x) & (y)))
|
|
1230
|
+
#define SB_OR_WORD(x, y) ((x) | (y))
|
|
1231
|
+
#define SB_OR_BYTE(x, y) ((unsigned char)((x) | (y)))
|
|
1232
|
+
#define SB_XOR_WORD(x, y) ((x) ^ (y))
|
|
1233
|
+
#define SB_XOR_BYTE(x, y) ((unsigned char)((x) ^ (y)))
|
|
1234
|
+
|
|
1235
|
+
SB_DEFINE_UNARY_KERNEL (kern_not, SB_NOT_WORD, SB_NOT_BYTE)
|
|
1236
|
+
SB_DEFINE_BINARY_KERNEL(kern_and, SB_AND_WORD, SB_AND_BYTE)
|
|
1237
|
+
SB_DEFINE_BINARY_KERNEL(kern_or, SB_OR_WORD, SB_OR_BYTE)
|
|
1238
|
+
SB_DEFINE_BINARY_KERNEL(kern_xor, SB_XOR_WORD, SB_XOR_BYTE)
|
|
1239
|
+
|
|
1240
|
+
/* Method wrappers: allocate-and-return form, and the in-place (!) form. */
|
|
1241
|
+
#define SB_DEFINE_UNARY_METHODS(op_name, kernel) \
|
|
1242
|
+
static VALUE \
|
|
1243
|
+
rb_str_bitwise_##op_name(VALUE self) \
|
|
1244
|
+
{ \
|
|
1245
|
+
ssize_t len = RSTRING_LEN(self); \
|
|
1246
|
+
VALUE result = alloc_result(self); \
|
|
1247
|
+
kernel((unsigned char *)RSTRING_PTR(result), \
|
|
1248
|
+
(const unsigned char *)RSTRING_PTR(self), len); \
|
|
1249
|
+
return result; \
|
|
1250
|
+
} \
|
|
1251
|
+
static VALUE \
|
|
1252
|
+
rb_str_bitwise_##op_name##_bang(VALUE self) \
|
|
1253
|
+
{ \
|
|
1254
|
+
rb_str_modify(self); \
|
|
1255
|
+
ssize_t len = RSTRING_LEN(self); \
|
|
1256
|
+
unsigned char *ptr = (unsigned char *)RSTRING_PTR(self); \
|
|
1257
|
+
kernel(ptr, ptr, len); \
|
|
1258
|
+
return self; \
|
|
1259
|
+
}
|
|
1260
|
+
|
|
1261
|
+
#define SB_DEFINE_BINARY_METHODS(op_name, kernel) \
|
|
1262
|
+
static VALUE \
|
|
1263
|
+
rb_str_bitwise_##op_name(VALUE self, VALUE other) \
|
|
1264
|
+
{ \
|
|
1265
|
+
check_binary_op_lengths(self, other); \
|
|
1266
|
+
ssize_t len = RSTRING_LEN(self); \
|
|
1267
|
+
VALUE result = alloc_result(self); \
|
|
1268
|
+
kernel((unsigned char *)RSTRING_PTR(result), \
|
|
1269
|
+
(const unsigned char *)RSTRING_PTR(self), \
|
|
1270
|
+
(const unsigned char *)RSTRING_PTR(other), len); \
|
|
1271
|
+
return result; \
|
|
1272
|
+
} \
|
|
1273
|
+
static VALUE \
|
|
1274
|
+
rb_str_bitwise_##op_name##_bang(VALUE self, VALUE other) \
|
|
1275
|
+
{ \
|
|
1276
|
+
check_binary_op_lengths(self, other); \
|
|
1277
|
+
rb_str_modify(self); \
|
|
1278
|
+
ssize_t len = RSTRING_LEN(self); \
|
|
1279
|
+
unsigned char *a = (unsigned char *)RSTRING_PTR(self); \
|
|
1280
|
+
const unsigned char *b = (const unsigned char *)RSTRING_PTR(other); \
|
|
1281
|
+
kernel(a, a, b, len); \
|
|
1282
|
+
return self; \
|
|
1283
|
+
}
|
|
1284
|
+
|
|
1285
|
+
SB_DEFINE_UNARY_METHODS (not, kern_not)
|
|
1286
|
+
SB_DEFINE_BINARY_METHODS(and, kern_and)
|
|
1287
|
+
SB_DEFINE_BINARY_METHODS(or, kern_or)
|
|
1288
|
+
SB_DEFINE_BINARY_METHODS(xor, kern_xor)
|
|
1000
1289
|
|
|
1001
1290
|
/* packed bit-field iteration ---------------------------------------------- */
|
|
1002
1291
|
/*
|
|
@@ -1051,25 +1340,7 @@ extract_uint64(const unsigned char *src, ssize_t src_len,
|
|
|
1051
1340
|
return val;
|
|
1052
1341
|
}
|
|
1053
1342
|
|
|
1054
|
-
/*
|
|
1055
|
-
* String#each_bit_field(*bitlens, lsb_first: true) -> Enumerator
|
|
1056
|
-
*
|
|
1057
|
-
* Iterates over the string as a sequence of packed bit-field records. Each
|
|
1058
|
-
* positional argument specifies the width (in bits) of one field in the record.
|
|
1059
|
-
* On each iteration, one Integer per field is yielded (LSB-first bit layout).
|
|
1060
|
-
* Each bitlen must be in the range 1..64.
|
|
1061
|
-
*
|
|
1062
|
-
* lsb_first: true (default) -- intra-byte field extraction uses bit 0..7.
|
|
1063
|
-
* lsb_first: false -- intra-byte field extraction uses bit 7..0.
|
|
1064
|
-
*
|
|
1065
|
-
* Incomplete trailing bits (when bytesize*8 is not a multiple of sum(bitlens))
|
|
1066
|
-
* are silently dropped, matching the behavior of Enumerable#each_slice.
|
|
1067
|
-
*
|
|
1068
|
-
* Porting to Ruby Core:
|
|
1069
|
-
* 1. Move extract_uint64 and this function into string.c.
|
|
1070
|
-
* 2. Register with rb_define_method in Init_String().
|
|
1071
|
-
* 3. Replace ALLOCA_N with stack arrays for small field counts and heap otherwise.
|
|
1072
|
-
*/
|
|
1343
|
+
/* Yield each packed bit-field record as one Integer per field. */
|
|
1073
1344
|
static VALUE
|
|
1074
1345
|
rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
|
|
1075
1346
|
{
|
|
@@ -1096,7 +1367,7 @@ rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
|
|
|
1096
1367
|
rb_raise(rb_eArgError, "bitlen must be positive");
|
|
1097
1368
|
}
|
|
1098
1369
|
if (bl > 64) {
|
|
1099
|
-
rb_raise(rb_eArgError, "bitlen must be <= 64 (got %
|
|
1370
|
+
rb_raise(rb_eArgError, "bitlen must be <= 64 (got %" PRIdPTR ")", (intptr_t)bl);
|
|
1100
1371
|
}
|
|
1101
1372
|
bitlens[f] = bl;
|
|
1102
1373
|
step += bl;
|
|
@@ -1105,8 +1376,8 @@ rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
|
|
|
1105
1376
|
int lsb_first = parse_lsb_first_opt(opts);
|
|
1106
1377
|
|
|
1107
1378
|
ssize_t src_len = RSTRING_LEN(self);
|
|
1108
|
-
|
|
1109
|
-
ssize_t iterations = total_bits / step;
|
|
1379
|
+
int64_t total_bits = SB_BIT_LEN(src_len);
|
|
1380
|
+
ssize_t iterations = (ssize_t)(total_bits / step);
|
|
1110
1381
|
|
|
1111
1382
|
VALUE *field_vals = ALLOCA_N(VALUE, num_fields);
|
|
1112
1383
|
|
|
@@ -1125,22 +1396,7 @@ rb_str_each_bit_field(int argc, VALUE *argv, VALUE self)
|
|
|
1125
1396
|
return self;
|
|
1126
1397
|
}
|
|
1127
1398
|
|
|
1128
|
-
/*
|
|
1129
|
-
* String#bit_fields(*bitlens, lsb_first: true) { |*fields| } -> self
|
|
1130
|
-
*
|
|
1131
|
-
* Non-iterator complement of each_bit_field. Without a block, returns an
|
|
1132
|
-
* Array of all extracted records. With a single bitlen the array is flat
|
|
1133
|
-
* (matching each_bit_field(n).to_a); with multiple bitlens each record is
|
|
1134
|
-
* itself an Array (matching each_bit_field(a, b, ...).to_a).
|
|
1135
|
-
*
|
|
1136
|
-
* With a block, behaves identically to each_bit_field without with: ---
|
|
1137
|
-
* yielding one Integer per field and returning self.
|
|
1138
|
-
*
|
|
1139
|
-
* Porting to Ruby Core:
|
|
1140
|
-
* 1. Move alongside each_bit_field in string.c.
|
|
1141
|
-
* 2. Share extract_uint64 and the bitlen validation logic.
|
|
1142
|
-
* 3. Register with rb_define_method in Init_String().
|
|
1143
|
-
*/
|
|
1399
|
+
/* Non-iterator form of each_bit_field; collect bit-field records into an Array. */
|
|
1144
1400
|
static VALUE
|
|
1145
1401
|
rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
|
|
1146
1402
|
{
|
|
@@ -1165,7 +1421,7 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
|
|
|
1165
1421
|
rb_raise(rb_eArgError, "bitlen must be positive");
|
|
1166
1422
|
}
|
|
1167
1423
|
if (bl > 64) {
|
|
1168
|
-
rb_raise(rb_eArgError, "bitlen must be <= 64 (got %
|
|
1424
|
+
rb_raise(rb_eArgError, "bitlen must be <= 64 (got %" PRIdPTR ")", (intptr_t)bl);
|
|
1169
1425
|
}
|
|
1170
1426
|
bitlens[f] = bl;
|
|
1171
1427
|
step += bl;
|
|
@@ -1174,8 +1430,8 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
|
|
|
1174
1430
|
int lsb_first = parse_lsb_first_opt(opts);
|
|
1175
1431
|
|
|
1176
1432
|
ssize_t src_len = RSTRING_LEN(self);
|
|
1177
|
-
|
|
1178
|
-
ssize_t iterations = total_bits / step;
|
|
1433
|
+
int64_t total_bits = SB_BIT_LEN(src_len);
|
|
1434
|
+
ssize_t iterations = (ssize_t)(total_bits / step);
|
|
1179
1435
|
|
|
1180
1436
|
int have_block = rb_block_given_p();
|
|
1181
1437
|
VALUE result = have_block ? Qnil : rb_ary_new_capa(iterations);
|
|
@@ -1207,7 +1463,7 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
|
|
|
1207
1463
|
|
|
1208
1464
|
/*
|
|
1209
1465
|
* count_run_lsb: count consecutive bits equal to `target` starting at flat
|
|
1210
|
-
* position `
|
|
1466
|
+
* position `bit_offset` (LSB-first). Uses ctz / ctzll to skip bits in bulk:
|
|
1211
1467
|
* - partial first byte: ctz on the inverted masked nibble
|
|
1212
1468
|
* - full 64-bit words (LE): ctzll on the inverted word (64 bits per step)
|
|
1213
1469
|
* - remaining bytes: ctz on the inverted byte
|
|
@@ -1217,11 +1473,11 @@ rb_str_bit_fields(int argc, VALUE *argv, VALUE self)
|
|
|
1217
1473
|
* 2. Share sb_ctz8 / sb_ctzll with the existing set-bit helpers.
|
|
1218
1474
|
*/
|
|
1219
1475
|
static ssize_t
|
|
1220
|
-
count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t
|
|
1476
|
+
count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t bit_offset, int target)
|
|
1221
1477
|
{
|
|
1222
|
-
|
|
1223
|
-
ssize_t byte_idx =
|
|
1224
|
-
int bit_off =
|
|
1478
|
+
int64_t max_run = SB_BIT_LEN(src_len) - bit_offset;
|
|
1479
|
+
ssize_t byte_idx = bit_offset >> 3;
|
|
1480
|
+
int bit_off = bit_offset & 7;
|
|
1225
1481
|
ssize_t count = 0;
|
|
1226
1482
|
|
|
1227
1483
|
/* partial first byte: shift pos to bit 0, mask remaining bits */
|
|
@@ -1236,7 +1492,7 @@ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target
|
|
|
1236
1492
|
count += run;
|
|
1237
1493
|
byte_idx++;
|
|
1238
1494
|
if (run < remaining)
|
|
1239
|
-
return count < max_run ? count : max_run;
|
|
1495
|
+
return (ssize_t)(count < max_run ? count : max_run);
|
|
1240
1496
|
}
|
|
1241
1497
|
|
|
1242
1498
|
#if SB_LITTLE_ENDIAN
|
|
@@ -1250,7 +1506,7 @@ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target
|
|
|
1250
1506
|
byte_idx += 8;
|
|
1251
1507
|
} else {
|
|
1252
1508
|
count += sb_ctzll(~word);
|
|
1253
|
-
return count < max_run ? count : max_run;
|
|
1509
|
+
return (ssize_t)(count < max_run ? count : max_run);
|
|
1254
1510
|
}
|
|
1255
1511
|
}
|
|
1256
1512
|
#endif
|
|
@@ -1265,218 +1521,146 @@ count_run_lsb(const unsigned char *src, ssize_t src_len, ssize_t pos, int target
|
|
|
1265
1521
|
byte_idx++;
|
|
1266
1522
|
} else {
|
|
1267
1523
|
count += sb_ctz8(~b);
|
|
1268
|
-
return count < max_run ? count : max_run;
|
|
1524
|
+
return (ssize_t)(count < max_run ? count : max_run);
|
|
1269
1525
|
}
|
|
1270
1526
|
}
|
|
1271
1527
|
|
|
1272
|
-
return count < max_run ? count : max_run;
|
|
1528
|
+
return (ssize_t)(count < max_run ? count : max_run);
|
|
1273
1529
|
}
|
|
1274
1530
|
|
|
1275
|
-
/*
|
|
1276
|
-
*
|
|
1277
|
-
* Returns the length of the consecutive run of `bit` starting at flat
|
|
1278
|
-
* position `pos`. Returns nil when `pos` is out of range or the bit at `pos`
|
|
1279
|
-
* does not equal `bit`.
|
|
1280
|
-
*
|
|
1281
|
-
* `bit` accepts 0, 1, false, or true (false/true are aliases for 0/1,
|
|
1282
|
-
* matching the values yielded by each_bit_run).
|
|
1283
|
-
*
|
|
1284
|
-
* Counts forward from `pos` toward higher bit indices.
|
|
1285
|
-
*
|
|
1286
|
-
* Inspired by Gauche Scheme's (bitvector-count-run bit bvec i).
|
|
1287
|
-
*
|
|
1288
|
-
* Uses the same flat LSB-first addressing as bit_at: byte[pos/8] bit pos%8.
|
|
1289
|
-
*
|
|
1290
|
-
* Porting to Ruby Core:
|
|
1291
|
-
* 1. Move to string.c; register in Init_String().
|
|
1292
|
-
* 2. Reuse integer_to_bit_idx for consistent Bignum handling.
|
|
1293
|
-
*/
|
|
1531
|
+
/* Return the length of the consecutive run of `bit` starting at pos, or nil. */
|
|
1294
1532
|
static VALUE
|
|
1295
1533
|
rb_str_bit_run_count(int argc, VALUE *argv, VALUE self)
|
|
1296
1534
|
{
|
|
1297
|
-
VALUE
|
|
1298
|
-
rb_scan_args(argc, argv, "20:", &
|
|
1535
|
+
VALUE bit_offset_v, bit_val, opts;
|
|
1536
|
+
rb_scan_args(argc, argv, "20:", &bit_val, &bit_offset_v, &opts);
|
|
1299
1537
|
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
1300
1538
|
int lsb_first = parse_lsb_first_opt(opts);
|
|
1301
1539
|
|
|
1302
|
-
if (!rb_integer_type_p(
|
|
1540
|
+
if (!rb_integer_type_p(bit_offset_v)) {
|
|
1303
1541
|
rb_raise(rb_eTypeError, "position must be an integer");
|
|
1304
1542
|
}
|
|
1305
|
-
int target;
|
|
1306
|
-
|
|
1307
|
-
target = 1;
|
|
1308
|
-
} else if (bit_val == Qfalse || bit_val == INT2FIX(0)) {
|
|
1309
|
-
target = 0;
|
|
1310
|
-
} else {
|
|
1311
|
-
rb_raise(rb_eArgError, "bit must be 0, 1, false, or true");
|
|
1312
|
-
}
|
|
1313
|
-
ssize_t pos = integer_to_bit_idx(pos_val);
|
|
1543
|
+
int target = parse_bit_target(bit_val);
|
|
1544
|
+
ssize_t bit_offset = integer_to_bit_idx(bit_offset_v);
|
|
1314
1545
|
ssize_t src_len = RSTRING_LEN(self);
|
|
1315
|
-
if (
|
|
1546
|
+
if (bit_offset < 0 || bit_offset >= SB_BIT_LEN(src_len)) return Qnil;
|
|
1316
1547
|
|
|
1317
1548
|
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
|
|
1318
1549
|
if (lsb_first) {
|
|
1319
|
-
if (((src[
|
|
1320
|
-
return SSIZET2NUM(count_run_lsb(src, src_len,
|
|
1550
|
+
if (((src[bit_offset >> 3] >> (bit_offset & 7)) & 1) != target) return Qnil;
|
|
1551
|
+
return SSIZET2NUM(count_run_lsb(src, src_len, bit_offset, target));
|
|
1321
1552
|
}
|
|
1322
1553
|
|
|
1323
|
-
if (logical_get_bit(src,
|
|
1554
|
+
if (logical_get_bit(src, bit_offset, 0) != target) return Qnil;
|
|
1324
1555
|
|
|
1325
1556
|
ssize_t run = 1;
|
|
1326
|
-
|
|
1327
|
-
while (
|
|
1557
|
+
int64_t total_bits = SB_BIT_LEN(src_len);
|
|
1558
|
+
while (bit_offset + run < total_bits && logical_get_bit(src, bit_offset + run, 0) == target) {
|
|
1328
1559
|
run++;
|
|
1329
1560
|
}
|
|
1330
1561
|
return SSIZET2NUM(run);
|
|
1331
1562
|
}
|
|
1332
1563
|
|
|
1333
|
-
/*
|
|
1334
|
-
|
|
1335
|
-
*
|
|
1336
|
-
* Yields (bit, run_length) pairs for each consecutive run of identical bits.
|
|
1337
|
-
* Run-length boundary detection and counting happen entirely in C, replacing
|
|
1338
|
-
* the Ruby-level current/count state machine required when using each_bit.
|
|
1564
|
+
/* Yield (bit, offset, run_length) triples for each consecutive run of identical bits. */
|
|
1565
|
+
/* Unified emitter for each_bit_run / bit_runs.
|
|
1339
1566
|
*
|
|
1340
|
-
*
|
|
1341
|
-
*
|
|
1342
|
-
*
|
|
1567
|
+
* Walks the bitmap in (bit, run_length) chunks. Yields each pair (when
|
|
1568
|
+
* ary == Qnil) or pushes (bit, run_length) Arrays to the pre-allocated
|
|
1569
|
+
* result. The LSB-first path uses the fast count_run_lsb (word-at-a-time
|
|
1570
|
+
* via ctzll); the MSB-first path scans bit by bit through logical_get_bit.
|
|
1343
1571
|
*
|
|
1344
|
-
*
|
|
1345
|
-
*
|
|
1346
|
-
*
|
|
1347
|
-
* Porting to Ruby Core:
|
|
1348
|
-
* 1. Move to string.c; register in Init_String().
|
|
1349
|
-
* 2. count_run_lsb / count_run_msb move with it.
|
|
1572
|
+
* self is re-read inside the loop because rb_yield can invoke Ruby code
|
|
1573
|
+
* that mutates the receiver, potentially invalidating RSTRING_PTR.
|
|
1350
1574
|
*/
|
|
1351
|
-
static
|
|
1352
|
-
|
|
1575
|
+
static void
|
|
1576
|
+
emit_bit_runs(VALUE self, int lsb_first, ssize_t start_offset, VALUE ary)
|
|
1353
1577
|
{
|
|
1354
|
-
|
|
1355
|
-
|
|
1356
|
-
|
|
1357
|
-
ssize_t
|
|
1358
|
-
if (src_len == 0) return self;
|
|
1578
|
+
ssize_t src_len = RSTRING_LEN(self);
|
|
1579
|
+
int64_t total_bits = SB_BIT_LEN(src_len);
|
|
1580
|
+
if (src_len == 0 || start_offset >= total_bits) return;
|
|
1581
|
+
ssize_t offset = start_offset;
|
|
1359
1582
|
|
|
1360
|
-
|
|
1583
|
+
#define SB_EMIT_TRIPLE(bval, oval, lval) \
|
|
1584
|
+
do { if (ary == Qnil) rb_yield_values(3, (bval), (oval), (lval)); \
|
|
1585
|
+
else rb_ary_push(ary, rb_ary_new3(3, (bval), (oval), (lval))); } while (0)
|
|
1361
1586
|
|
|
1362
1587
|
if (lsb_first) {
|
|
1363
|
-
|
|
1364
|
-
while (pos < total_bits) {
|
|
1588
|
+
while (offset < total_bits) {
|
|
1365
1589
|
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
|
|
1366
|
-
int bit
|
|
1367
|
-
ssize_t run = count_run_lsb(src, src_len,
|
|
1368
|
-
|
|
1369
|
-
|
|
1590
|
+
int bit = (src[offset >> 3] >> (offset & 7)) & 1;
|
|
1591
|
+
ssize_t run = count_run_lsb(src, src_len, offset, bit);
|
|
1592
|
+
SB_EMIT_TRIPLE(bit ? Qtrue : Qfalse, SSIZET2NUM(offset), SSIZET2NUM(run));
|
|
1593
|
+
offset += run;
|
|
1370
1594
|
}
|
|
1371
1595
|
}
|
|
1372
1596
|
else {
|
|
1373
|
-
|
|
1374
|
-
while (pos < total_bits) {
|
|
1597
|
+
while (offset < total_bits) {
|
|
1375
1598
|
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
|
|
1376
|
-
int bit = logical_get_bit(src,
|
|
1599
|
+
int bit = logical_get_bit(src, offset, 0);
|
|
1377
1600
|
ssize_t run = 1;
|
|
1378
|
-
while (
|
|
1601
|
+
while (offset + run < total_bits && logical_get_bit(src, offset + run, 0) == bit) {
|
|
1379
1602
|
run++;
|
|
1380
1603
|
}
|
|
1381
|
-
|
|
1382
|
-
|
|
1604
|
+
SB_EMIT_TRIPLE(bit ? Qtrue : Qfalse, SSIZET2NUM(offset), SSIZET2NUM(run));
|
|
1605
|
+
offset += run;
|
|
1383
1606
|
}
|
|
1384
1607
|
}
|
|
1385
1608
|
|
|
1386
|
-
|
|
1609
|
+
#undef SB_EMIT_TRIPLE
|
|
1387
1610
|
}
|
|
1388
1611
|
|
|
1389
|
-
/* String#bit_runs(lsb_first: true) -> Array
|
|
1390
|
-
* String#bit_runs(lsb_first: true) { |bit, len| } -> self
|
|
1391
|
-
*
|
|
1392
|
-
* Non-iterator complement of each_bit_run. Without a block, collects all
|
|
1393
|
-
* (bit, run_length) pairs into an Array and returns it. With a block,
|
|
1394
|
-
* yields each pair and returns self.
|
|
1395
|
-
*
|
|
1396
|
-
* Follows the same pattern as String#bytes vs String#each_byte.
|
|
1397
|
-
*
|
|
1398
|
-
* Porting to Ruby Core:
|
|
1399
|
-
* 1. Move to string.c alongside each_bit_run; register in Init_String().
|
|
1400
|
-
*/
|
|
1401
1612
|
static VALUE
|
|
1402
|
-
|
|
1613
|
+
rb_str_each_bit_run(int argc, VALUE *argv, VALUE self)
|
|
1403
1614
|
{
|
|
1404
|
-
|
|
1405
|
-
ssize_t src_len = RSTRING_LEN(self);
|
|
1406
|
-
int have_block = rb_block_given_p();
|
|
1615
|
+
RETURN_ENUMERATOR(self, argc, argv);
|
|
1407
1616
|
|
|
1408
|
-
|
|
1617
|
+
VALUE start_offset_v = Qnil, opts = Qnil;
|
|
1618
|
+
rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
|
|
1619
|
+
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
1620
|
+
int lsb_first = parse_lsb_first_opt(opts);
|
|
1621
|
+
ssize_t start_offset = parse_start_offset(start_offset_v);
|
|
1409
1622
|
|
|
1410
|
-
|
|
1411
|
-
|
|
1623
|
+
emit_bit_runs(self, lsb_first, start_offset, Qnil);
|
|
1624
|
+
return self;
|
|
1625
|
+
}
|
|
1412
1626
|
|
|
1413
|
-
|
|
1414
|
-
|
|
1415
|
-
|
|
1416
|
-
|
|
1417
|
-
|
|
1418
|
-
|
|
1419
|
-
|
|
1420
|
-
|
|
1421
|
-
|
|
1422
|
-
|
|
1423
|
-
|
|
1424
|
-
|
|
1425
|
-
|
|
1426
|
-
ssize_t pos = 0;
|
|
1427
|
-
while (pos < total_bits) {
|
|
1428
|
-
const unsigned char *src = (const unsigned char *)RSTRING_PTR(self);
|
|
1429
|
-
int bit = logical_get_bit(src, pos, 0);
|
|
1430
|
-
ssize_t run = 1;
|
|
1431
|
-
while (pos + run < total_bits && logical_get_bit(src, pos + run, 0) == bit) {
|
|
1432
|
-
run++;
|
|
1433
|
-
}
|
|
1434
|
-
VALUE bval = bit ? Qtrue : Qfalse;
|
|
1435
|
-
VALUE lval = SSIZET2NUM(run);
|
|
1436
|
-
have_block ? rb_yield_values(2, bval, lval)
|
|
1437
|
-
: rb_ary_push(result, rb_assoc_new(bval, lval));
|
|
1438
|
-
pos += run;
|
|
1439
|
-
}
|
|
1627
|
+
/* Non-iterator form of each_bit_run; collect run triples into an Array. */
|
|
1628
|
+
static VALUE
|
|
1629
|
+
rb_str_bit_runs(int argc, VALUE *argv, VALUE self)
|
|
1630
|
+
{
|
|
1631
|
+
VALUE start_offset_v = Qnil, opts = Qnil;
|
|
1632
|
+
rb_scan_args(argc, argv, "01:", &start_offset_v, &opts);
|
|
1633
|
+
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
1634
|
+
int lsb_first = parse_lsb_first_opt(opts);
|
|
1635
|
+
ssize_t start_offset = parse_start_offset(start_offset_v);
|
|
1636
|
+
|
|
1637
|
+
if (rb_block_given_p()) {
|
|
1638
|
+
emit_bit_runs(self, lsb_first, start_offset, Qnil);
|
|
1639
|
+
return self;
|
|
1440
1640
|
}
|
|
1441
1641
|
|
|
1442
|
-
|
|
1642
|
+
VALUE ary = rb_ary_new();
|
|
1643
|
+
emit_bit_runs(self, lsb_first, start_offset, ary);
|
|
1644
|
+
return ary;
|
|
1443
1645
|
}
|
|
1444
1646
|
|
|
1445
|
-
/*
|
|
1446
|
-
* String#bit_splice(bit_index, bit_length, str, str_bit_index, str_bit_length) -> self
|
|
1447
|
-
* String#bit_splice(range, str) -> self
|
|
1448
|
-
* String#bit_splice(range, str, str_range) -> self
|
|
1449
|
-
*
|
|
1450
|
-
* Writes bits from str into self at bit-level granularity. The inverse of
|
|
1451
|
-
* bit_slice: where bit_slice reads a sub-sequence of bits, bit_splice writes one.
|
|
1452
|
-
*
|
|
1453
|
-
* The destination and source bit lengths must be equal; bit_splice does not
|
|
1454
|
-
* resize self (sub-byte resize is undefined). This mirrors the constraint that
|
|
1455
|
-
* bytesplice imposes when the replacement has the same byte length.
|
|
1456
|
-
*
|
|
1457
|
-
* Negative indices count backward from the end, exactly as in bytesplice.
|
|
1458
|
-
* Returns self.
|
|
1459
|
-
*
|
|
1460
|
-
* Porting to Ruby Core:
|
|
1461
|
-
* 1. Move to string.c; register in Init_String().
|
|
1462
|
-
* 2. Use rb_str_modify_expand if resize support is ever added.
|
|
1463
|
-
* 3. bit_copy_core moves with it; share ebs_extract with bit_slice.
|
|
1464
|
-
*/
|
|
1647
|
+
/* Write bits from str into self at bit-level granularity (inverse of bit_slice). */
|
|
1465
1648
|
static VALUE
|
|
1466
1649
|
rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
|
|
1467
1650
|
{
|
|
1468
1651
|
ssize_t dst_bit_off, dst_bit_len;
|
|
1469
1652
|
ssize_t src_bit_off, src_bit_len;
|
|
1470
1653
|
VALUE str;
|
|
1471
|
-
|
|
1472
|
-
VALUE v0, v1, v2, v3,
|
|
1654
|
+
int64_t dst_total = SB_BIT_LEN(RSTRING_LEN(self));
|
|
1655
|
+
VALUE v0, v1, v2, v3, opts;
|
|
1473
1656
|
|
|
1474
|
-
int n_pos = rb_scan_args(argc, argv, "
|
|
1657
|
+
int n_pos = rb_scan_args(argc, argv, "22:", &v0, &v1, &v2, &v3, &opts);
|
|
1475
1658
|
validate_option_hash(opts, SB_KW_LSB_FIRST);
|
|
1476
1659
|
int lsb_first = parse_lsb_first_opt(opts);
|
|
1477
1660
|
|
|
1478
1661
|
if (n_pos == 2 && rb_obj_is_kind_of(v0, rb_cRange)) {
|
|
1479
1662
|
/* bit_splice(range, str) */
|
|
1663
|
+
sb_range_validate_endpoints(v0);
|
|
1480
1664
|
ssize_t beg, len;
|
|
1481
1665
|
sb_range_beg_len(v0, &beg, &len, dst_total, 1);
|
|
1482
1666
|
dst_bit_off = beg;
|
|
@@ -1487,20 +1671,21 @@ rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
|
|
|
1487
1671
|
src_bit_len = dst_bit_len;
|
|
1488
1672
|
}
|
|
1489
1673
|
else if (n_pos == 3 && rb_obj_is_kind_of(v0, rb_cRange)) {
|
|
1490
|
-
/* bit_splice(range, str,
|
|
1674
|
+
/* bit_splice(range, str, str_bit_index) */
|
|
1675
|
+
sb_range_validate_endpoints(v0);
|
|
1491
1676
|
ssize_t beg, len;
|
|
1492
1677
|
sb_range_beg_len(v0, &beg, &len, dst_total, 1);
|
|
1493
1678
|
dst_bit_off = beg;
|
|
1494
1679
|
dst_bit_len = len;
|
|
1495
1680
|
str = v1;
|
|
1496
1681
|
Check_Type(str, T_STRING);
|
|
1497
|
-
if (!
|
|
1498
|
-
rb_raise(rb_eTypeError, "third argument must be
|
|
1682
|
+
if (!rb_integer_type_p(v2)) {
|
|
1683
|
+
rb_raise(rb_eTypeError, "third argument must be an Integer");
|
|
1499
1684
|
}
|
|
1500
|
-
|
|
1501
|
-
|
|
1502
|
-
src_bit_off
|
|
1503
|
-
src_bit_len =
|
|
1685
|
+
int64_t src_total = SB_BIT_LEN(RSTRING_LEN(str));
|
|
1686
|
+
src_bit_off = integer_to_bit_idx(v2);
|
|
1687
|
+
if (src_bit_off < 0) src_bit_off += src_total;
|
|
1688
|
+
src_bit_len = dst_bit_len;
|
|
1504
1689
|
}
|
|
1505
1690
|
else if (n_pos == 3) {
|
|
1506
1691
|
/* bit_splice(bit_index, bit_length, str) */
|
|
@@ -1526,10 +1711,9 @@ rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
|
|
|
1526
1711
|
src_bit_off = 0;
|
|
1527
1712
|
src_bit_len = dst_bit_len;
|
|
1528
1713
|
}
|
|
1529
|
-
else if (n_pos ==
|
|
1530
|
-
/* bit_splice(bit_index, bit_length, str, str_bit_index
|
|
1531
|
-
if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1) ||
|
|
1532
|
-
!rb_integer_type_p(v3) || !rb_integer_type_p(v4)) {
|
|
1714
|
+
else if (n_pos == 4) {
|
|
1715
|
+
/* bit_splice(bit_index, bit_length, str, str_bit_index) */
|
|
1716
|
+
if (!rb_integer_type_p(v0) || !rb_integer_type_p(v1) || !rb_integer_type_p(v3)) {
|
|
1533
1717
|
rb_raise(rb_eTypeError, "bit indices and lengths must be integers");
|
|
1534
1718
|
}
|
|
1535
1719
|
dst_bit_off = integer_to_bit_idx(v0);
|
|
@@ -1537,33 +1721,29 @@ rb_str_bit_splice(int argc, VALUE *argv, VALUE self)
|
|
|
1537
1721
|
if (dst_bit_off < 0) dst_bit_off += dst_total;
|
|
1538
1722
|
str = v2;
|
|
1539
1723
|
Check_Type(str, T_STRING);
|
|
1540
|
-
|
|
1724
|
+
int64_t src_total = SB_BIT_LEN(RSTRING_LEN(str));
|
|
1541
1725
|
src_bit_off = integer_to_bit_idx(v3);
|
|
1542
|
-
src_bit_len = integer_to_bit_idx(v4);
|
|
1543
1726
|
if (src_bit_off < 0) src_bit_off += src_total;
|
|
1727
|
+
src_bit_len = dst_bit_len;
|
|
1544
1728
|
}
|
|
1545
1729
|
else {
|
|
1546
1730
|
rb_raise(rb_eArgError,
|
|
1547
|
-
"wrong number of arguments (given %d, expected 2, 3, or
|
|
1731
|
+
"wrong number of arguments (given %d, expected 2, 3, or 4)", n_pos);
|
|
1548
1732
|
}
|
|
1549
1733
|
|
|
1550
1734
|
if (dst_bit_off < 0 || dst_bit_len < 0 || dst_bit_off + dst_bit_len > dst_total) {
|
|
1551
1735
|
rb_raise(rb_eIndexError,
|
|
1552
|
-
"bit_splice: destination range [%
|
|
1553
|
-
|
|
1736
|
+
"bit_splice: destination range [%" PRIdPTR ", %" PRIdPTR
|
|
1737
|
+
"] out of bounds (total %" PRId64 " bits)",
|
|
1738
|
+
(intptr_t)dst_bit_off, (intptr_t)dst_bit_len, (int64_t)dst_total);
|
|
1554
1739
|
}
|
|
1555
1740
|
|
|
1556
|
-
|
|
1741
|
+
int64_t src_total_bits = SB_BIT_LEN(RSTRING_LEN(str));
|
|
1557
1742
|
if (src_bit_off < 0 || src_bit_len < 0 || src_bit_off + src_bit_len > src_total_bits) {
|
|
1558
1743
|
rb_raise(rb_eIndexError,
|
|
1559
|
-
"bit_splice: source range [%
|
|
1560
|
-
|
|
1561
|
-
|
|
1562
|
-
|
|
1563
|
-
if (dst_bit_len != src_bit_len) {
|
|
1564
|
-
rb_raise(rb_eArgError,
|
|
1565
|
-
"bit_splice: destination length (%ld) must equal source length (%ld)",
|
|
1566
|
-
dst_bit_len, src_bit_len);
|
|
1744
|
+
"bit_splice: source range [%" PRIdPTR ", %" PRIdPTR
|
|
1745
|
+
"] out of bounds (total %" PRId64 " bits)",
|
|
1746
|
+
(intptr_t)src_bit_off, (intptr_t)src_bit_len, (int64_t)src_total_bits);
|
|
1567
1747
|
}
|
|
1568
1748
|
|
|
1569
1749
|
if (dst_bit_len == 0) return self;
|
|
@@ -1693,8 +1873,9 @@ rb_ary_mask(int argc, VALUE *argv, VALUE self)
|
|
|
1693
1873
|
ssize_t needed = (ary_len + 7) >> 3;
|
|
1694
1874
|
if (needed > bmp_len)
|
|
1695
1875
|
rb_raise(rb_eArgError,
|
|
1696
|
-
"bitmap too short: need %
|
|
1697
|
-
|
|
1876
|
+
"bitmap too short: need %" PRIdPTR " bytes for %" PRIdPTR
|
|
1877
|
+
" elements, got %" PRIdPTR,
|
|
1878
|
+
(intptr_t)needed, (intptr_t)ary_len, (intptr_t)bmp_len);
|
|
1698
1879
|
|
|
1699
1880
|
if (!lsb_first) {
|
|
1700
1881
|
for (ssize_t i = 0; i < ary_len; i++) {
|
|
@@ -1738,8 +1919,9 @@ rb_ary_mask_bang(int argc, VALUE *argv, VALUE self)
|
|
|
1738
1919
|
ssize_t needed = (ary_len + 7) >> 3;
|
|
1739
1920
|
if (needed > bmp_len)
|
|
1740
1921
|
rb_raise(rb_eArgError,
|
|
1741
|
-
"bitmap too short: need %
|
|
1742
|
-
|
|
1922
|
+
"bitmap too short: need %" PRIdPTR " bytes for %" PRIdPTR
|
|
1923
|
+
" elements, got %" PRIdPTR,
|
|
1924
|
+
(intptr_t)needed, (intptr_t)ary_len, (intptr_t)bmp_len);
|
|
1743
1925
|
|
|
1744
1926
|
if (!lsb_first) {
|
|
1745
1927
|
for (ssize_t i = 0; i < ary_len; i++) {
|
|
@@ -1765,38 +1947,36 @@ rb_ary_mask_bang(int argc, VALUE *argv, VALUE self)
|
|
|
1765
1947
|
void
|
|
1766
1948
|
Init_string_bits(void)
|
|
1767
1949
|
{
|
|
1768
|
-
id_bracket
|
|
1769
|
-
sym_lsb_first
|
|
1770
|
-
|
|
1771
|
-
|
|
1772
|
-
|
|
1773
|
-
|
|
1774
|
-
rb_define_method(rb_cString, "
|
|
1775
|
-
rb_define_method(rb_cString, "
|
|
1776
|
-
rb_define_method(rb_cString, "
|
|
1777
|
-
rb_define_method(rb_cString, "
|
|
1778
|
-
rb_define_method(rb_cString, "
|
|
1779
|
-
rb_define_method(rb_cString, "
|
|
1780
|
-
rb_define_method(rb_cString, "
|
|
1781
|
-
rb_define_method(rb_cString, "
|
|
1782
|
-
rb_define_method(rb_cString, "
|
|
1783
|
-
rb_define_method(rb_cString, "
|
|
1784
|
-
rb_define_method(rb_cString, "
|
|
1785
|
-
rb_define_method(rb_cString, "
|
|
1786
|
-
rb_define_method(rb_cString, "
|
|
1787
|
-
rb_define_method(rb_cString, "
|
|
1788
|
-
rb_define_method(rb_cString, "
|
|
1789
|
-
rb_define_method(rb_cString, "
|
|
1790
|
-
rb_define_method(rb_cString, "
|
|
1791
|
-
rb_define_method(rb_cString, "
|
|
1792
|
-
rb_define_method(rb_cString, "
|
|
1793
|
-
rb_define_method(rb_cString, "
|
|
1794
|
-
rb_define_method(rb_cString, "bit_xor", rb_str_bit_xor, 1);
|
|
1795
|
-
rb_define_method(rb_cString, "bit_xor!", rb_str_bit_xor_bang, 1);
|
|
1950
|
+
id_bracket = rb_intern("[]");
|
|
1951
|
+
sym_lsb_first = ID2SYM(rb_intern("lsb_first"));
|
|
1952
|
+
sym_invert = ID2SYM(rb_intern("invert"));
|
|
1953
|
+
|
|
1954
|
+
rb_define_method(rb_cString, "bit_at", rb_str_bit_at, -1);
|
|
1955
|
+
rb_define_method(rb_cString, "bit_count", rb_str_bit_count, -1);
|
|
1956
|
+
rb_define_method(rb_cString, "each_bit", rb_str_each_bit, -1);
|
|
1957
|
+
rb_define_method(rb_cString, "bits", rb_str_bits, -1);
|
|
1958
|
+
rb_define_method(rb_cString, "each_bit_offset", rb_str_each_bit_offset, -1);
|
|
1959
|
+
rb_define_method(rb_cString, "bit_offsets", rb_str_bit_offsets, -1);
|
|
1960
|
+
rb_define_method(rb_cString, "bit_slice", rb_str_bit_slice, -1);
|
|
1961
|
+
rb_define_method(rb_cString, "bit_splice", rb_str_bit_splice, -1);
|
|
1962
|
+
rb_define_method(rb_cString, "bit_run_count", rb_str_bit_run_count, -1);
|
|
1963
|
+
rb_define_method(rb_cString, "each_bit_run", rb_str_each_bit_run, -1);
|
|
1964
|
+
rb_define_method(rb_cString, "bit_runs", rb_str_bit_runs, -1);
|
|
1965
|
+
rb_define_method(rb_cString, "bit_set", rb_str_bit_set, -1);
|
|
1966
|
+
rb_define_method(rb_cString, "bit_clear", rb_str_bit_clear, -1);
|
|
1967
|
+
rb_define_method(rb_cString, "bit_flip", rb_str_bit_flip, -1);
|
|
1968
|
+
rb_define_method(rb_cString, "bitwise_not", rb_str_bitwise_not, 0);
|
|
1969
|
+
rb_define_method(rb_cString, "bitwise_not!", rb_str_bitwise_not_bang, 0);
|
|
1970
|
+
rb_define_method(rb_cString, "bitwise_and", rb_str_bitwise_and, 1);
|
|
1971
|
+
rb_define_method(rb_cString, "bitwise_and!", rb_str_bitwise_and_bang, 1);
|
|
1972
|
+
rb_define_method(rb_cString, "bitwise_or", rb_str_bitwise_or, 1);
|
|
1973
|
+
rb_define_method(rb_cString, "bitwise_or!", rb_str_bitwise_or_bang, 1);
|
|
1974
|
+
rb_define_method(rb_cString, "bitwise_xor", rb_str_bitwise_xor, 1);
|
|
1975
|
+
rb_define_method(rb_cString, "bitwise_xor!", rb_str_bitwise_xor_bang, 1);
|
|
1796
1976
|
|
|
1797
1977
|
// These methods are defined here to avoid cluttering this file, but they are not part of the current core proposal (see FUTURE_PROPOSAL_PLAN.md).
|
|
1798
|
-
rb_define_method(rb_cString, "each_bit_field",
|
|
1799
|
-
rb_define_method(rb_cString, "bit_fields",
|
|
1800
|
-
rb_define_method(rb_cArray, "mask",
|
|
1801
|
-
rb_define_method(rb_cArray, "mask!",
|
|
1978
|
+
rb_define_method(rb_cString, "each_bit_field", rb_str_each_bit_field, -1);
|
|
1979
|
+
rb_define_method(rb_cString, "bit_fields", rb_str_bit_fields, -1);
|
|
1980
|
+
rb_define_method(rb_cArray, "mask", rb_ary_mask, -1);
|
|
1981
|
+
rb_define_method(rb_cArray, "mask!", rb_ary_mask_bang, -1);
|
|
1802
1982
|
}
|