string_view 0.1.0 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,45 +1,25 @@
1
- #include "ruby.h"
2
- #include "ruby/encoding.h"
3
- #include "ruby/re.h"
4
- #include "simdutf_c.h"
5
-
6
- #define SV_LIKELY(x) __builtin_expect(!!(x), 1)
7
- #define SV_UNLIKELY(x) __builtin_expect(!!(x), 0)
8
-
9
- #ifdef __GNUC__
10
- #define SV_INLINE static inline __attribute__((always_inline))
11
- #else
12
- #define SV_INLINE static inline
13
- #endif
1
+ #include "string_view.h"
14
2
 
15
3
  /* ========================================================================= */
16
- /* Struct & TypedData */
4
+ /* Globals */
17
5
  /* ========================================================================= */
18
6
 
19
- /*
20
- * Stride index: maps every STRIDE_CHARS-th character to its byte offset.
21
- * Built lazily on first char-indexed access. Enables O(1) char→byte
22
- * lookup for any offset (small scalar scan within one stride).
23
- */
24
- #define STRIDE_CHARS 128
25
-
26
- typedef struct {
27
- long *offsets; /* offsets[i] = byte offset of character i*STRIDE_CHARS */
28
- long count; /* number of entries = ceil(charlen / STRIDE_CHARS) + 1 */
29
- } stride_index_t;
30
-
31
- typedef struct {
32
- VALUE backing; /* frozen String that owns the bytes */
33
- const char *base; /* cached RSTRING_PTR(backing) — avoids indirection */
34
- rb_encoding *enc; /* cached encoding — avoids rb_enc_get per call */
35
- long offset; /* byte offset into backing */
36
- long length; /* byte length of this view */
37
- long charlen; /* cached character count; -1 = not yet computed */
38
- int single_byte; /* cached: 1 if char==byte (ASCII/single-byte enc), 0 if multibyte, -1 unknown */
39
- stride_index_t *stride_idx; /* lazily built stride index for multibyte, NULL if not built */
40
- } string_view_t;
41
-
42
- static VALUE cStringView;
7
+ VALUE cStringView;
8
+ VALUE cStringViewStrict;
9
+ VALUE eWouldAllocate;
10
+
11
+ /* Cached method IDs — initialized once in Init_string_view */
12
+ static ID id_index, id_rindex, id_byteindex, id_byterindex;
13
+ static ID id_match, id_match_p, id_match_op;
14
+ static ID id_begin, id_aref;
15
+ static ID id_upcase, id_downcase, id_capitalize, id_swapcase;
16
+ static ID id_strip, id_lstrip, id_rstrip;
17
+ static ID id_chomp, id_chop, id_reverse, id_squeeze;
18
+ static ID id_encode, id_gsub, id_sub, id_tr, id_tr_s;
19
+ static ID id_delete, id_count, id_scan, id_split;
20
+ static ID id_center, id_ljust, id_rjust;
21
+ static ID id_format_op, id_plus, id_multiply;
22
+ static ID id_unpack1, id_scrub, id_unicode_normalize;
43
23
 
44
24
  /*
45
25
  * GC callbacks.
@@ -85,14 +65,18 @@ static size_t sv_memsize(const void *ptr) {
85
65
  return size;
86
66
  }
87
67
 
88
- static const rb_data_type_t string_view_type = {
68
+ const rb_data_type_t string_view_type = {
89
69
  .wrap_struct_name = "StringView",
90
70
  .function = { .dmark = sv_mark, .dfree = sv_free, .dsize = sv_memsize, .dcompact = sv_compact },
91
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
71
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
92
72
  };
93
73
 
94
- /* Forward declarations */
95
- static int sv_compute_single_byte(VALUE backing, rb_encoding *enc);
74
+ /* Forward declarations for functions defined later in this file */
75
+ static long sv_char_count(string_view_t *sv);
76
+ static long sv_char_to_byte_offset(string_view_t *sv, long char_idx);
77
+ SV_INLINE int sv_single_byte_optimizable(string_view_t *sv);
78
+ SV_INLINE int sv_is_utf8(string_view_t *sv);
79
+ static long sv_utf8_char_count(const char *p, long len);
96
80
 
97
81
  /* ========================================================================= */
98
82
  /* Internal helpers */
@@ -122,10 +106,12 @@ static VALUE sv_as_shared_str(string_view_t *sv) {
122
106
  return shared;
123
107
  }
124
108
 
125
- /* Allocate a new StringView from a parent that already has cached base/enc */
126
- SV_INLINE VALUE sv_new_from_parent(string_view_t *parent, long offset, long length) {
109
+ /* Allocate a new StringView from a parent that already has cached base/enc.
110
+ * Preserves the class of parent_obj (StringView or StringView::Strict). */
111
+ SV_INLINE VALUE sv_new_from_parent_obj(VALUE parent_obj, string_view_t *parent, long offset, long length) {
127
112
  string_view_t *sv;
128
- VALUE obj = TypedData_Make_Struct(cStringView, string_view_t,
113
+ VALUE klass = rb_obj_class(parent_obj);
114
+ VALUE obj = TypedData_Make_Struct(klass, string_view_t,
129
115
  &string_view_type, sv);
130
116
  RB_OBJ_WRITE(obj, &sv->backing, parent->backing);
131
117
  sv->base = parent->base;
@@ -135,7 +121,7 @@ SV_INLINE VALUE sv_new_from_parent(string_view_t *parent, long offset, long leng
135
121
  sv->single_byte = parent->single_byte;
136
122
  sv->charlen = -1;
137
123
  sv->stride_idx = NULL;
138
- FL_SET_RAW(obj, FL_FREEZE);
124
+ /* Not frozen — see sv_initialize comment for rationale */
139
125
  return obj;
140
126
  }
141
127
 
@@ -169,13 +155,7 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
169
155
 
170
156
  rb_scan_args(argc, argv, "12", &str, &voffset, &vlength);
171
157
 
172
- if (!RB_TYPE_P(str, T_STRING)) {
173
- rb_raise(rb_eTypeError,
174
- "no implicit conversion of %s into String",
175
- rb_obj_classname(str));
176
- }
177
-
178
- rb_str_freeze(str);
158
+ sv_check_frozen_string(str);
179
159
 
180
160
  long backing_len = RSTRING_LEN(str);
181
161
 
@@ -185,26 +165,20 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
185
165
  } else {
186
166
  offset = NUM2LONG(voffset);
187
167
  length = NUM2LONG(vlength);
188
-
189
- if (offset < 0 || length < 0 || offset + length > backing_len) {
190
- rb_raise(rb_eArgError,
191
- "offset %ld, length %ld out of range for string of bytesize %ld",
192
- offset, length, backing_len);
193
- }
168
+ sv_check_bounds(offset, length, backing_len);
194
169
  }
195
170
 
196
171
  string_view_t *sv = sv_get_struct(self);
197
- rb_encoding *enc = rb_enc_get(str);
198
- RB_OBJ_WRITE(self, &sv->backing, str);
199
- sv->base = RSTRING_PTR(str);
200
- sv->enc = enc;
201
- sv->offset = offset;
202
- sv->length = length;
203
- sv->single_byte = sv_compute_single_byte(str, enc);
204
- sv->charlen = -1;
205
- sv->stride_idx = NULL;
172
+ sv_init_fields(self, sv, str, RSTRING_PTR(str), rb_enc_get(str),
173
+ offset, length);
206
174
 
207
- rb_obj_freeze(self);
175
+ /*
176
+ * We intentionally do NOT freeze self. StringView blocks content
177
+ * mutation via the immutable frozen backing and explicit FrozenError
178
+ * on bang methods. Not freezing allows reset! to work without
179
+ * violating Ruby's frozen? contract — libraries and Ruby itself
180
+ * use frozen? to assume immutability for hash keys and Ractor sharing.
181
+ */
208
182
 
209
183
  return self;
210
184
  }
@@ -236,43 +210,27 @@ static VALUE sv_inspect(VALUE self) {
236
210
  (void *)self, content, sv->offset, sv->length);
237
211
  }
238
212
 
239
- static VALUE sv_frozen_p(VALUE self) {
240
- return Qtrue;
241
- }
242
-
243
213
  /*
244
214
  * reset!(new_backing, byte_offset, byte_length) -> self
245
215
  */
246
216
  static VALUE sv_reset(VALUE self, VALUE new_backing, VALUE voffset, VALUE vlength) {
217
+ rb_check_frozen(self);
247
218
  string_view_t *sv = sv_get_struct(self);
248
219
 
249
- if (!RB_TYPE_P(new_backing, T_STRING)) {
250
- rb_raise(rb_eTypeError,
251
- "no implicit conversion of %s into String",
252
- rb_obj_classname(new_backing));
253
- }
254
-
255
- rb_str_freeze(new_backing);
220
+ sv_check_frozen_string(new_backing);
256
221
 
257
222
  long off = NUM2LONG(voffset);
258
223
  long len = NUM2LONG(vlength);
259
- long backing_len = RSTRING_LEN(new_backing);
224
+ sv_check_bounds(off, len, RSTRING_LEN(new_backing));
260
225
 
261
- if (off < 0 || len < 0 || off + len > backing_len) {
262
- rb_raise(rb_eArgError,
263
- "offset %ld, length %ld out of range for string of bytesize %ld",
264
- off, len, backing_len);
226
+ /* Free old stride index before reinitializing */
227
+ if (sv->stride_idx) {
228
+ xfree(sv->stride_idx->offsets);
229
+ xfree(sv->stride_idx);
265
230
  }
266
231
 
267
- rb_encoding *enc = rb_enc_get(new_backing);
268
- RB_OBJ_WRITE(self, &sv->backing, new_backing);
269
- sv->base = RSTRING_PTR(new_backing);
270
- sv->enc = enc;
271
- sv->offset = off;
272
- sv->length = len;
273
- sv->single_byte = sv_compute_single_byte(new_backing, enc);
274
- sv->charlen = -1;
275
- sv->stride_idx = NULL;
232
+ sv_init_fields(self, sv, new_backing, RSTRING_PTR(new_backing),
233
+ rb_enc_get(new_backing), off, len);
276
234
 
277
235
  return self;
278
236
  }
@@ -306,6 +264,8 @@ static VALUE sv_encoding(VALUE self) {
306
264
 
307
265
  static VALUE sv_ascii_only_p(VALUE self) {
308
266
  string_view_t *sv = sv_get_struct(self);
267
+ if (sv_single_byte_optimizable(sv)) return Qtrue;
268
+ /* single_byte resolved to 0 (multibyte) — scan to confirm non-ASCII bytes */
309
269
  const char *p = sv_ptr(sv);
310
270
  long i;
311
271
  for (i = 0; i < sv->length; i++) {
@@ -361,16 +321,137 @@ static VALUE sv_end_with_p(int argc, VALUE *argv, VALUE self) {
361
321
  return Qfalse;
362
322
  }
363
323
 
364
- static VALUE sv_index(int argc, VALUE *argv, VALUE self) {
324
+ /*
325
+ * index(substring[, offset]) → Integer or nil
326
+ *
327
+ * For String arguments: native zero-alloc implementation using rb_memsearch.
328
+ * For Regexp arguments: delegates to String#index via shared string.
329
+ */
330
+ VALUE sv_index(int argc, VALUE *argv, VALUE self) {
365
331
  string_view_t *sv = sv_get_struct(self);
366
- VALUE shared = sv_as_shared_str(sv);
367
- return rb_funcallv(shared, rb_intern("index"), argc, argv);
332
+ VALUE pattern, voffset;
333
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
334
+
335
+ /* Regexp path: delegate via shared string */
336
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
337
+ VALUE shared = sv_as_shared_str(sv);
338
+ return rb_funcallv(shared, id_index, argc, argv);
339
+ }
340
+
341
+ StringValue(pattern);
342
+ const char *p = sv_ptr(sv);
343
+ long plen = RSTRING_LEN(pattern);
344
+
345
+ /* Determine starting char offset */
346
+ long char_off = NIL_P(voffset) ? 0 : NUM2LONG(voffset);
347
+ long total_chars = sv_char_count(sv);
348
+
349
+ if (char_off < 0) char_off += total_chars;
350
+ if (char_off < 0 || char_off > total_chars) return Qnil;
351
+
352
+ /* Convert char offset to byte offset */
353
+ long byte_off = sv_char_to_byte_offset(sv, char_off);
354
+ if (byte_off < 0) return Qnil;
355
+
356
+ if (plen == 0) return LONG2NUM(char_off);
357
+ if (plen > sv->length - byte_off) return Qnil;
358
+
359
+ long pos = rb_memsearch(RSTRING_PTR(pattern), plen,
360
+ p + byte_off, sv->length - byte_off,
361
+ sv_enc(sv));
362
+ if (pos < 0 || pos > sv->length - byte_off - plen) return Qnil;
363
+
364
+ /* Convert byte position back to character position */
365
+ if (sv_single_byte_optimizable(sv)) {
366
+ return LONG2NUM(char_off + pos);
367
+ }
368
+ /* Count chars from byte_off to byte_off+pos */
369
+ if (sv_is_utf8(sv)) {
370
+ long chars = sv_utf8_char_count(p + byte_off, pos);
371
+ return LONG2NUM(char_off + chars);
372
+ }
373
+ rb_encoding *enc = sv_enc(sv);
374
+ const char *s = p + byte_off;
375
+ const char *e = s + pos;
376
+ long chars = rb_enc_strlen(s, e, enc);
377
+ return LONG2NUM(char_off + chars);
368
378
  }
369
379
 
370
- static VALUE sv_rindex(int argc, VALUE *argv, VALUE self) {
380
+ /*
381
+ * rindex(substring[, offset]) → Integer or nil
382
+ *
383
+ * For String arguments: native zero-alloc reverse search.
384
+ * For Regexp arguments: delegates to String#rindex via shared string.
385
+ */
386
+ VALUE sv_rindex(int argc, VALUE *argv, VALUE self) {
371
387
  string_view_t *sv = sv_get_struct(self);
372
- VALUE shared = sv_as_shared_str(sv);
373
- return rb_funcallv(shared, rb_intern("rindex"), argc, argv);
388
+ VALUE pattern, voffset;
389
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
390
+
391
+ /* Regexp path: delegate */
392
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
393
+ VALUE shared = sv_as_shared_str(sv);
394
+ return rb_funcallv(shared, id_rindex, argc, argv);
395
+ }
396
+
397
+ StringValue(pattern);
398
+ const char *p = sv_ptr(sv);
399
+ long plen = RSTRING_LEN(pattern);
400
+ long total_chars = sv_char_count(sv);
401
+
402
+ /* Determine the maximum char position to search from */
403
+ long max_char;
404
+ if (NIL_P(voffset)) {
405
+ max_char = total_chars;
406
+ } else {
407
+ max_char = NUM2LONG(voffset);
408
+ if (max_char < 0) max_char += total_chars;
409
+ if (max_char < 0) return Qnil;
410
+ if (max_char > total_chars) max_char = total_chars;
411
+ }
412
+
413
+ if (plen == 0) {
414
+ return LONG2NUM(max_char > total_chars ? total_chars : max_char);
415
+ }
416
+ if (plen > sv->length) return Qnil;
417
+
418
+ /* Convert max_char to a byte limit */
419
+ long max_byte = sv_char_to_byte_offset(sv, max_char);
420
+ if (max_byte < 0) max_byte = sv->length;
421
+
422
+ /* Ensure we don't search past the point where the pattern can't fit */
423
+ long search_end = max_byte;
424
+ if (search_end + plen > sv->length) {
425
+ search_end = sv->length - plen;
426
+ }
427
+
428
+ /* Reverse byte search */
429
+ const char *needle = RSTRING_PTR(pattern);
430
+ const char *s;
431
+ for (s = p + search_end; s >= p; ) {
432
+ if (memcmp(s, needle, plen) == 0) {
433
+ long byte_pos = s - p;
434
+ /* Convert byte position to char position */
435
+ if (sv_single_byte_optimizable(sv)) {
436
+ return LONG2NUM(byte_pos);
437
+ }
438
+ if (sv_is_utf8(sv)) {
439
+ return LONG2NUM(sv_utf8_char_count(p, byte_pos));
440
+ }
441
+ rb_encoding *enc = sv_enc(sv);
442
+ return LONG2NUM(rb_enc_strlen(p, s, enc));
443
+ }
444
+ /* Move back one character */
445
+ if (s == p) break;
446
+ if (sv_single_byte_optimizable(sv)) {
447
+ s--;
448
+ } else {
449
+ rb_encoding *enc = sv_enc(sv);
450
+ s = rb_enc_prev_char(p, s, p + sv->length, enc);
451
+ if (s == NULL) break;
452
+ }
453
+ }
454
+ return Qnil;
374
455
  }
375
456
 
376
457
  static VALUE sv_getbyte(VALUE self, VALUE vidx) {
@@ -381,16 +462,83 @@ static VALUE sv_getbyte(VALUE self, VALUE vidx) {
381
462
  return INT2FIX((unsigned char)sv_ptr(sv)[idx]);
382
463
  }
383
464
 
384
- static VALUE sv_byteindex(int argc, VALUE *argv, VALUE self) {
465
+ /*
466
+ * byteindex(substring[, offset]) → Integer or nil
467
+ *
468
+ * For String arguments: native zero-alloc byte-level search.
469
+ * For Regexp arguments: delegates to String#byteindex via shared string.
470
+ */
471
+ VALUE sv_byteindex(int argc, VALUE *argv, VALUE self) {
385
472
  string_view_t *sv = sv_get_struct(self);
386
- VALUE shared = sv_as_shared_str(sv);
387
- return rb_funcallv(shared, rb_intern("byteindex"), argc, argv);
473
+ VALUE pattern, voffset;
474
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
475
+
476
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
477
+ VALUE shared = sv_as_shared_str(sv);
478
+ return rb_funcallv(shared, id_byteindex, argc, argv);
479
+ }
480
+
481
+ StringValue(pattern);
482
+ const char *p = sv_ptr(sv);
483
+ long plen = RSTRING_LEN(pattern);
484
+ long byte_off = NIL_P(voffset) ? 0 : NUM2LONG(voffset);
485
+
486
+ if (byte_off < 0) byte_off += sv->length;
487
+ if (byte_off < 0 || byte_off > sv->length) return Qnil;
488
+ if (plen == 0) return LONG2NUM(byte_off);
489
+ if (plen > sv->length - byte_off) return Qnil;
490
+
491
+ long pos = rb_memsearch(RSTRING_PTR(pattern), plen,
492
+ p + byte_off, sv->length - byte_off,
493
+ sv_enc(sv));
494
+ if (pos < 0 || pos > sv->length - byte_off - plen) return Qnil;
495
+ return LONG2NUM(byte_off + pos);
388
496
  }
389
497
 
390
- static VALUE sv_byterindex(int argc, VALUE *argv, VALUE self) {
498
+ /*
499
+ * byterindex(substring[, offset]) → Integer or nil
500
+ *
501
+ * For String arguments: native zero-alloc reverse byte-level search.
502
+ * For Regexp arguments: delegates to String#byterindex via shared string.
503
+ */
504
+ VALUE sv_byterindex(int argc, VALUE *argv, VALUE self) {
391
505
  string_view_t *sv = sv_get_struct(self);
392
- VALUE shared = sv_as_shared_str(sv);
393
- return rb_funcallv(shared, rb_intern("byterindex"), argc, argv);
506
+ VALUE pattern, voffset;
507
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
508
+
509
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
510
+ VALUE shared = sv_as_shared_str(sv);
511
+ return rb_funcallv(shared, id_byterindex, argc, argv);
512
+ }
513
+
514
+ StringValue(pattern);
515
+ const char *p = sv_ptr(sv);
516
+ long plen = RSTRING_LEN(pattern);
517
+ long max_byte;
518
+
519
+ if (NIL_P(voffset)) {
520
+ max_byte = sv->length;
521
+ } else {
522
+ max_byte = NUM2LONG(voffset);
523
+ if (max_byte < 0) max_byte += sv->length;
524
+ if (max_byte < 0) return Qnil;
525
+ if (max_byte > sv->length) max_byte = sv->length;
526
+ }
527
+
528
+ if (plen == 0) return LONG2NUM(max_byte > sv->length ? sv->length : max_byte);
529
+ if (plen > sv->length) return Qnil;
530
+
531
+ long search_end = max_byte;
532
+ if (search_end + plen > sv->length) search_end = sv->length - plen;
533
+
534
+ const char *needle = RSTRING_PTR(pattern);
535
+ long i;
536
+ for (i = search_end; i >= 0; i--) {
537
+ if (memcmp(p + i, needle, plen) == 0) {
538
+ return LONG2NUM(i);
539
+ }
540
+ }
541
+ return Qnil;
394
542
  }
395
543
 
396
544
  /* ========================================================================= */
@@ -454,19 +602,19 @@ static VALUE sv_chars(VALUE self) {
454
602
  static VALUE sv_match(int argc, VALUE *argv, VALUE self) {
455
603
  string_view_t *sv = sv_get_struct(self);
456
604
  VALUE shared = sv_as_shared_str(sv);
457
- return rb_funcallv(shared, rb_intern("match"), argc, argv);
605
+ return rb_funcallv(shared, id_match, argc, argv);
458
606
  }
459
607
 
460
608
  static VALUE sv_match_p(int argc, VALUE *argv, VALUE self) {
461
609
  string_view_t *sv = sv_get_struct(self);
462
610
  VALUE shared = sv_as_shared_str(sv);
463
- return rb_funcallv(shared, rb_intern("match?"), argc, argv);
611
+ return rb_funcallv(shared, id_match_p, argc, argv);
464
612
  }
465
613
 
466
614
  static VALUE sv_match_operator(VALUE self, VALUE pattern) {
467
615
  string_view_t *sv = sv_get_struct(self);
468
616
  VALUE shared = sv_as_shared_str(sv);
469
- return rb_funcall(shared, rb_intern("=~"), 1, pattern);
617
+ return rb_funcall(shared, id_match_op, 1, pattern);
470
618
  }
471
619
 
472
620
  /* ========================================================================= */
@@ -561,6 +709,41 @@ static VALUE sv_oct(VALUE self) {
561
709
  /* Tier 1: Comparison */
562
710
  /* ========================================================================= */
563
711
 
712
+ /*
713
+ * Returns 1 if all bytes in the view are < 128 (7-bit ASCII).
714
+ * Uses the single_byte cache when available.
715
+ */
716
+ SV_INLINE int sv_is_7bit(string_view_t *sv) {
717
+ if (sv_single_byte_optimizable(sv)) return 1;
718
+ const char *p = sv_ptr(sv);
719
+ long i;
720
+ for (i = 0; i < sv->length; i++) {
721
+ if ((unsigned char)p[i] > 127) return 0;
722
+ }
723
+ return 1;
724
+ }
725
+
726
+ /*
727
+ * Check encoding compatibility for equality, mirroring Ruby's String#==.
728
+ * Two encodings are compatible for comparison if:
729
+ * - They are the same encoding, OR
730
+ * - Both are ASCII-compatible and at least one side is 7-bit
731
+ * (e.g. UTF-8 "hello" == US-ASCII "hello")
732
+ */
733
+ SV_INLINE int sv_enc_compatible_for_eq(
734
+ rb_encoding *enc1, int is_7bit_1,
735
+ rb_encoding *enc2, int is_7bit_2)
736
+ {
737
+ if (enc1 == enc2) return 1;
738
+ if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) return 0;
739
+ return is_7bit_1 || is_7bit_2;
740
+ }
741
+
742
+ SV_INLINE int sv_is_string_view(VALUE obj) {
743
+ VALUE klass = rb_obj_class(obj);
744
+ return klass == cStringView || klass == cStringViewStrict;
745
+ }
746
+
564
747
  static VALUE sv_eq(VALUE self, VALUE other) {
565
748
  string_view_t *sv = sv_get_struct(self);
566
749
  const char *p = sv_ptr(sv);
@@ -568,13 +751,26 @@ static VALUE sv_eq(VALUE self, VALUE other) {
568
751
  /* Fast path: String is the most common comparison target */
569
752
  if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
570
753
  if (sv->length != RSTRING_LEN(other)) return Qfalse;
754
+ rb_encoding *oenc = rb_enc_get(other);
755
+ if (sv->enc != oenc) {
756
+ int sv_7bit = sv_is_7bit(sv);
757
+ int o_7bit = rb_enc_str_asciionly_p(other);
758
+ if (!sv_enc_compatible_for_eq(sv->enc, sv_7bit, oenc, o_7bit))
759
+ return Qfalse;
760
+ }
571
761
  return memcmp(p, RSTRING_PTR(other), sv->length) == 0 ? Qtrue : Qfalse;
572
762
  }
573
763
 
574
- /* Check for StringView via class pointer (faster than rb_obj_is_kind_of) */
575
- if (rb_obj_class(other) == cStringView) {
764
+ /* Check for StringView or StringView::Strict */
765
+ if (sv_is_string_view(other)) {
576
766
  string_view_t *o = sv_get_struct(other);
577
767
  if (sv->length != o->length) return Qfalse;
768
+ if (sv->enc != o->enc) {
769
+ int sv_7bit = sv_is_7bit(sv);
770
+ int o_7bit = sv_is_7bit(o);
771
+ if (!sv_enc_compatible_for_eq(sv->enc, sv_7bit, o->enc, o_7bit))
772
+ return Qfalse;
773
+ }
578
774
  return memcmp(p, sv_ptr(o), sv->length) == 0 ? Qtrue : Qfalse;
579
775
  }
580
776
 
@@ -590,7 +786,7 @@ static VALUE sv_cmp(VALUE self, VALUE other) {
590
786
  if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
591
787
  op = RSTRING_PTR(other);
592
788
  olen = RSTRING_LEN(other);
593
- } else if (rb_obj_class(other) == cStringView) {
789
+ } else if (sv_is_string_view(other)) {
594
790
  string_view_t *o = sv_get_struct(other);
595
791
  op = sv_ptr(o);
596
792
  olen = o->length;
@@ -610,15 +806,22 @@ static VALUE sv_cmp(VALUE self, VALUE other) {
610
806
  }
611
807
 
612
808
  static VALUE sv_eql_p(VALUE self, VALUE other) {
613
- if (rb_obj_class(other) != cStringView) return Qfalse;
809
+ if (!sv_is_string_view(other)) return Qfalse;
614
810
  return sv_eq(self, other);
615
811
  }
616
812
 
617
813
  static VALUE sv_hash(VALUE self) {
618
814
  string_view_t *sv = sv_get_struct(self);
619
815
  const char *p = sv_ptr(sv);
816
+ /*
817
+ * Mirror CRuby's rb_str_hash: normalize encoding index to 0 for
818
+ * 7-bit content so that e.g. UTF-8 "hello" and US-ASCII "hello"
819
+ * produce the same hash (they compare equal via sv_eq).
820
+ */
821
+ int e = rb_enc_to_index(sv->enc);
822
+ if (e && sv_is_7bit(sv)) e = 0;
620
823
  st_index_t h = rb_memhash(p, sv->length);
621
- h ^= (st_index_t)rb_enc_get_index(sv->backing);
824
+ h ^= (st_index_t)e;
622
825
  return ST2FIX(h);
623
826
  }
624
827
 
@@ -636,13 +839,17 @@ static VALUE sv_hash(VALUE self) {
636
839
  * Compute single-byte flag from encoding + coderange.
637
840
  * Called once at construction time and cached in sv->single_byte.
638
841
  */
639
- static int sv_compute_single_byte(VALUE backing, rb_encoding *enc) {
842
+ int sv_compute_single_byte(VALUE backing, rb_encoding *enc) {
640
843
  if (rb_enc_mbmaxlen(enc) == 1) return 1;
641
844
  int cr = ENC_CODERANGE(backing);
642
845
  if (cr == ENC_CODERANGE_7BIT) return 1;
643
- /* For VALID (known multibyte) we know it's not single-byte */
644
- if (cr == ENC_CODERANGE_VALID) return 0;
645
- /* UNKNOWN: we don't know yet return -1 (will be resolved lazily) */
846
+ /*
847
+ * For VALID and UNKNOWN: the coderange reflects the entire backing
848
+ * string, not this slice. A view over an ASCII-only prefix of a
849
+ * multibyte string would incorrectly get single_byte=0 here.
850
+ * Return -1 (unknown) and let sv_single_byte_optimizable resolve
851
+ * it lazily by scanning the actual slice bytes.
852
+ */
646
853
  return -1;
647
854
  }
648
855
 
@@ -880,7 +1087,7 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
880
1087
  if (idx < 0) idx += total;
881
1088
  if (SV_UNLIKELY(idx < 0 || idx > total || len < 0)) return Qnil;
882
1089
  if (idx + len > total) len = total - idx;
883
- return sv_new_from_parent(sv,
1090
+ return sv_new_from_parent_obj(self, sv,
884
1091
  sv->offset + idx,
885
1092
  len);
886
1093
  }
@@ -906,7 +1113,7 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
906
1113
  long byte_end = sv_char_to_byte_offset(sv, idx + len);
907
1114
  long byte_len = byte_end - byte_off;
908
1115
 
909
- return sv_new_from_parent(sv,
1116
+ return sv_new_from_parent_obj(self, sv,
910
1117
  sv->offset + byte_off,
911
1118
  byte_len);
912
1119
  }
@@ -914,48 +1121,34 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
914
1121
  if (rb_obj_is_kind_of(arg1, rb_cRange)) {
915
1122
  long total_chars = sv_char_count(sv);
916
1123
  long beg, len;
917
- int excl;
918
- VALUE rb_beg = rb_funcall(arg1, rb_intern("begin"), 0);
919
- VALUE rb_end = rb_funcall(arg1, rb_intern("end"), 0);
920
- excl = RTEST(rb_funcall(arg1, rb_intern("exclude_end?"), 0));
921
-
922
- beg = NIL_P(rb_beg) ? 0 : NUM2LONG(rb_beg);
923
- if (beg < 0) beg += total_chars;
924
- if (beg < 0) return Qnil;
925
-
926
- long e;
927
- if (NIL_P(rb_end)) {
928
- e = total_chars;
929
- } else {
930
- e = NUM2LONG(rb_end);
931
- if (e < 0) e += total_chars;
932
- if (!excl) e += 1;
1124
+
1125
+ /* rb_range_beg_len resolves negative indices and clamps to total,
1126
+ * replacing 3 Ruby method dispatches with a single C call. */
1127
+ switch (rb_range_beg_len(arg1, &beg, &len, total_chars, 1)) {
1128
+ case Qfalse: return Qnil;
1129
+ case Qnil: return Qnil;
933
1130
  }
934
- if (e < beg) e = beg;
935
- len = e - beg;
936
- if (beg > total_chars) return Qnil;
937
- if (beg + len > total_chars) len = total_chars - beg;
938
1131
 
939
1132
  long byte_off = sv_char_to_byte_offset(sv, beg);
940
1133
  long byte_len = sv_chars_to_bytes(sv, byte_off, len);
941
1134
 
942
- return sv_new_from_parent(sv,
1135
+ return sv_new_from_parent_obj(self, sv,
943
1136
  sv->offset + byte_off,
944
1137
  byte_len);
945
1138
  }
946
1139
 
947
1140
  if (rb_obj_is_kind_of(arg1, rb_cRegexp)) {
948
1141
  VALUE shared = sv_as_shared_str(sv);
949
- VALUE m = rb_funcall(arg1, rb_intern("match"), 1, shared);
1142
+ VALUE m = rb_funcall(arg1, id_match, 1, shared);
950
1143
  if (NIL_P(m)) return Qnil;
951
1144
 
952
- VALUE matched = rb_funcall(m, rb_intern("[]"), 1, INT2FIX(0));
953
- long match_beg = NUM2LONG(rb_funcall(m, rb_intern("begin"), 1, INT2FIX(0)));
1145
+ VALUE matched = rb_funcall(m, id_aref, 1, INT2FIX(0));
1146
+ long match_beg = NUM2LONG(rb_funcall(m, id_begin, 1, INT2FIX(0)));
954
1147
 
955
1148
  long byte_off = sv_char_to_byte_offset(sv, match_beg);
956
1149
  long byte_len = RSTRING_LEN(matched);
957
1150
 
958
- return sv_new_from_parent(sv,
1151
+ return sv_new_from_parent_obj(self, sv,
959
1152
  sv->offset + byte_off,
960
1153
  byte_len);
961
1154
  }
@@ -964,14 +1157,14 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
964
1157
  const char *p = sv_ptr(sv);
965
1158
  long slen = RSTRING_LEN(arg1);
966
1159
  if (slen == 0) {
967
- return sv_new_from_parent(sv, sv->offset, 0);
1160
+ return sv_new_from_parent_obj(self, sv, sv->offset, 0);
968
1161
  }
969
1162
  if (slen > sv->length) return Qnil;
970
1163
 
971
1164
  long pos = rb_memsearch(RSTRING_PTR(arg1), slen, p, sv->length, sv_enc(sv));
972
1165
  if (pos < 0 || pos > sv->length - slen) return Qnil;
973
1166
 
974
- return sv_new_from_parent(sv, sv->offset + pos, slen);
1167
+ return sv_new_from_parent_obj(self, sv, sv->offset + pos, slen);
975
1168
  }
976
1169
 
977
1170
  if (RB_INTEGER_TYPE_P(arg1)) {
@@ -986,7 +1179,7 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
986
1179
 
987
1180
  long byte_len = sv_chars_to_bytes(sv, byte_off, 1);
988
1181
 
989
- return sv_new_from_parent(sv,
1182
+ return sv_new_from_parent_obj(self, sv,
990
1183
  sv->offset + byte_off,
991
1184
  byte_len);
992
1185
  }
@@ -1015,96 +1208,381 @@ static VALUE sv_byteslice(int argc, VALUE *argv, VALUE self) {
1015
1208
  if (len < 0) return Qnil;
1016
1209
  if (off + len > sv->length) len = sv->length - off;
1017
1210
 
1018
- return sv_new_from_parent(sv, sv->offset + off, len);
1211
+ return sv_new_from_parent_obj(self, sv, sv->offset + off, len);
1019
1212
  }
1020
1213
 
1021
1214
  if (rb_obj_is_kind_of(arg1, rb_cRange)) {
1022
1215
  long beg, len;
1023
- VALUE rb_beg = rb_funcall(arg1, rb_intern("begin"), 0);
1024
- VALUE rb_end = rb_funcall(arg1, rb_intern("end"), 0);
1025
- int excl = RTEST(rb_funcall(arg1, rb_intern("exclude_end?"), 0));
1026
-
1027
- beg = NIL_P(rb_beg) ? 0 : NUM2LONG(rb_beg);
1028
- if (beg < 0) beg += sv->length;
1029
- if (beg < 0) return Qnil;
1030
1216
 
1031
- long e;
1032
- if (NIL_P(rb_end)) {
1033
- e = sv->length;
1034
- } else {
1035
- e = NUM2LONG(rb_end);
1036
- if (e < 0) e += sv->length;
1037
- if (!excl) e += 1;
1217
+ switch (rb_range_beg_len(arg1, &beg, &len, sv->length, 1)) {
1218
+ case Qfalse: return Qnil;
1219
+ case Qnil: return Qnil;
1038
1220
  }
1039
- if (e < beg) e = beg;
1040
- len = e - beg;
1041
- if (beg > sv->length) return Qnil;
1042
- if (beg + len > sv->length) len = sv->length - beg;
1043
1221
 
1044
- return sv_new_from_parent(sv, sv->offset + beg, len);
1222
+ return sv_new_from_parent_obj(self, sv, sv->offset + beg, len);
1045
1223
  }
1046
1224
 
1047
1225
  {
1048
1226
  long idx = NUM2LONG(arg1);
1049
1227
  if (idx < 0) idx += sv->length;
1050
1228
  if (idx < 0 || idx >= sv->length) return Qnil;
1051
- return sv_new_from_parent(sv, sv->offset + idx, 1);
1229
+ return sv_new_from_parent_obj(self, sv, sv->offset + idx, 1);
1052
1230
  }
1053
1231
  }
1054
1232
 
1233
+ /* ========================================================================= */
1234
+ /* Tier 1.5: Zero-copy transforms — returns StringView via offset adjustment */
1235
+ /* ========================================================================= */
1236
+
1237
+ /*
1238
+ * Helper: check if a byte is ASCII whitespace.
1239
+ * Matches Ruby's strip behavior for ASCII-compatible encodings:
1240
+ * space, tab, newline, vertical tab, form feed, carriage return, NUL.
1241
+ */
1242
+ SV_INLINE int sv_is_ascii_whitespace(unsigned char c) {
1243
+ return c == ' ' || (c >= '\t' && c <= '\r') || c == '\0';
1244
+ }
1245
+
1246
+ /*
1247
+ * strip → StringView
1248
+ * Returns a new StringView with leading and trailing ASCII whitespace removed.
1249
+ * Zero allocations for the byte content — only a new StringView struct.
1250
+ */
1251
+ static VALUE sv_strip(int argc, VALUE *argv, VALUE self) {
1252
+ rb_check_arity(argc, 0, 0);
1253
+ string_view_t *sv = sv_get_struct(self);
1254
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1255
+ long len = sv->length;
1256
+
1257
+ /* Skip leading whitespace */
1258
+ long left = 0;
1259
+ while (left < len && sv_is_ascii_whitespace(p[left])) left++;
1260
+
1261
+ /* Skip trailing whitespace */
1262
+ long right = len;
1263
+ while (right > left && sv_is_ascii_whitespace(p[right - 1])) right--;
1264
+
1265
+ if (left == 0 && right == len) return self;
1266
+ return sv_new_from_parent_obj(self, sv, sv->offset + left, right - left);
1267
+ }
1268
+
1269
+ /*
1270
+ * lstrip → StringView
1271
+ * Returns a new StringView with leading ASCII whitespace removed.
1272
+ */
1273
+ static VALUE sv_lstrip(int argc, VALUE *argv, VALUE self) {
1274
+ rb_check_arity(argc, 0, 0);
1275
+ string_view_t *sv = sv_get_struct(self);
1276
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1277
+ long len = sv->length;
1278
+
1279
+ long left = 0;
1280
+ while (left < len && sv_is_ascii_whitespace(p[left])) left++;
1281
+
1282
+ if (left == 0) return self;
1283
+ return sv_new_from_parent_obj(self, sv, sv->offset + left, len - left);
1284
+ }
1285
+
1286
+ /*
1287
+ * rstrip → StringView
1288
+ * Returns a new StringView with trailing ASCII whitespace removed.
1289
+ */
1290
+ static VALUE sv_rstrip(int argc, VALUE *argv, VALUE self) {
1291
+ rb_check_arity(argc, 0, 0);
1292
+ string_view_t *sv = sv_get_struct(self);
1293
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1294
+ long len = sv->length;
1295
+
1296
+ long right = len;
1297
+ while (right > 0 && sv_is_ascii_whitespace(p[right - 1])) right--;
1298
+
1299
+ if (right == len) return self;
1300
+ return sv_new_from_parent_obj(self, sv, sv->offset, right);
1301
+ }
1302
+
1303
+ /*
1304
+ * chomp([separator]) → StringView
1305
+ * Returns a new StringView with the trailing record separator removed.
1306
+ * Default separator is $/ (typically "\n").
1307
+ * Handles "\n", "\r\n", and "\r" when separator is "\n".
1308
+ */
1309
+ static VALUE sv_chomp(int argc, VALUE *argv, VALUE self) {
1310
+ rb_check_arity(argc, 0, 1);
1311
+ string_view_t *sv = sv_get_struct(self);
1312
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1313
+ long len = sv->length;
1314
+
1315
+ if (len == 0) return self;
1316
+
1317
+ if (argc == 0 || NIL_P(argv[0])) {
1318
+ /* Default: remove trailing \n, \r\n, or \r */
1319
+ /* Use $/ (input record separator) when no arg given */
1320
+ VALUE rs;
1321
+ if (argc == 0) {
1322
+ rs = rb_rs; /* global $/ */
1323
+ if (NIL_P(rs)) return self; /* $/ is nil, no chomp */
1324
+ } else {
1325
+ return self; /* chomp(nil) returns self */
1326
+ }
1327
+
1328
+ /* Fast path for default $/ which is "\n" */
1329
+ if (RB_TYPE_P(rs, T_STRING) && RSTRING_LEN(rs) == 1 && RSTRING_PTR(rs)[0] == '\n') {
1330
+ if (p[len - 1] == '\n') {
1331
+ long newlen = len - 1;
1332
+ if (newlen > 0 && p[newlen - 1] == '\r') newlen--;
1333
+ return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
1334
+ } else if (p[len - 1] == '\r') {
1335
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
1336
+ }
1337
+ return self;
1338
+ }
1339
+
1340
+ /* Non-default $/ — use the separator */
1341
+ if (!RB_TYPE_P(rs, T_STRING)) return self;
1342
+ const char *sep = RSTRING_PTR(rs);
1343
+ long seplen = RSTRING_LEN(rs);
1344
+ if (seplen == 0) {
1345
+ /* Paragraph mode: remove trailing \n+ */
1346
+ long right = len;
1347
+ while (right > 0 && p[right - 1] == '\n') right--;
1348
+ if (right == len) return self;
1349
+ return sv_new_from_parent_obj(self, sv, sv->offset, right);
1350
+ }
1351
+ if (seplen > len) return self;
1352
+ if (memcmp(p + len - seplen, sep, seplen) == 0) {
1353
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - seplen);
1354
+ }
1355
+ return self;
1356
+ }
1357
+
1358
+ /* Explicit separator argument */
1359
+ VALUE sep_val = argv[0];
1360
+ if (NIL_P(sep_val)) return self;
1361
+ StringValue(sep_val);
1362
+ const char *sep = RSTRING_PTR(sep_val);
1363
+ long seplen = RSTRING_LEN(sep_val);
1364
+
1365
+ if (seplen == 0) {
1366
+ /* Paragraph mode: remove all trailing newlines */
1367
+ long right = len;
1368
+ while (right > 0 && p[right - 1] == '\n') right--;
1369
+ if (right == len) return self;
1370
+ return sv_new_from_parent_obj(self, sv, sv->offset, right);
1371
+ }
1372
+
1373
+ /* Special handling for "\n": also removes \r\n and \r */
1374
+ if (seplen == 1 && sep[0] == '\n') {
1375
+ if (p[len - 1] == '\n') {
1376
+ long newlen = len - 1;
1377
+ if (newlen > 0 && p[newlen - 1] == '\r') newlen--;
1378
+ return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
1379
+ } else if (p[len - 1] == '\r') {
1380
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
1381
+ }
1382
+ return self;
1383
+ }
1384
+
1385
+ if (seplen > len) return self;
1386
+ if (memcmp(p + len - seplen, sep, seplen) == 0) {
1387
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - seplen);
1388
+ }
1389
+ return self;
1390
+ }
1391
+
1392
+ /*
1393
+ * chop → StringView
1394
+ * Returns a new StringView with the last character removed.
1395
+ * If the string ends with \r\n, both characters are removed.
1396
+ */
1397
+ static VALUE sv_chop(int argc, VALUE *argv, VALUE self) {
1398
+ rb_check_arity(argc, 0, 0);
1399
+ string_view_t *sv = sv_get_struct(self);
1400
+ long len = sv->length;
1401
+
1402
+ if (len == 0) return self;
1403
+
1404
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1405
+
1406
+ /* Check for \r\n at the end */
1407
+ if (len >= 2 && p[len - 1] == '\n' && p[len - 2] == '\r') {
1408
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 2);
1409
+ }
1410
+
1411
+ /* Remove last character (respecting encoding) */
1412
+ if (sv_single_byte_optimizable(sv)) {
1413
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
1414
+ }
1415
+
1416
+ /* Multibyte: find start of last character */
1417
+ rb_encoding *enc = sv_enc(sv);
1418
+ const char *start = sv_ptr(sv);
1419
+ const char *end = start + len;
1420
+ const char *prev = rb_enc_prev_char(start, end, end, enc);
1421
+ if (prev == NULL) prev = start;
1422
+ long newlen = (long)(prev - start);
1423
+
1424
+ return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
1425
+ }
1426
+
1427
+ /*
1428
+ * delete_prefix(prefix) → StringView
1429
+ * Returns a new StringView with the given prefix removed, or self if
1430
+ * the string doesn't start with the prefix.
1431
+ */
1432
+ static VALUE sv_delete_prefix(VALUE self, VALUE prefix) {
1433
+ string_view_t *sv = sv_get_struct(self);
1434
+ StringValue(prefix);
1435
+ const char *p = sv_ptr(sv);
1436
+ long plen = RSTRING_LEN(prefix);
1437
+
1438
+ if (plen > sv->length) return self;
1439
+ if (plen == 0) return self;
1440
+ if (memcmp(p, RSTRING_PTR(prefix), plen) != 0) return self;
1441
+
1442
+ return sv_new_from_parent_obj(self, sv, sv->offset + plen, sv->length - plen);
1443
+ }
1444
+
1445
+ /*
1446
+ * delete_suffix(suffix) → StringView
1447
+ * Returns a new StringView with the given suffix removed, or self if
1448
+ * the string doesn't end with the suffix.
1449
+ */
1450
+ static VALUE sv_delete_suffix(VALUE self, VALUE suffix) {
1451
+ string_view_t *sv = sv_get_struct(self);
1452
+ StringValue(suffix);
1453
+ const char *p = sv_ptr(sv);
1454
+ long slen = RSTRING_LEN(suffix);
1455
+
1456
+ if (slen > sv->length) return self;
1457
+ if (slen == 0) return self;
1458
+ if (memcmp(p + sv->length - slen, RSTRING_PTR(suffix), slen) != 0) return self;
1459
+
1460
+ return sv_new_from_parent_obj(self, sv, sv->offset, sv->length - slen);
1461
+ }
1462
+
1463
+ /*
1464
+ * chr → StringView
1465
+ * Returns the first character as a StringView.
1466
+ */
1467
+ static VALUE sv_chr(VALUE self) {
1468
+ string_view_t *sv = sv_get_struct(self);
1469
+
1470
+ if (sv->length == 0) return self;
1471
+
1472
+ if (sv_single_byte_optimizable(sv)) {
1473
+ return sv_new_from_parent_obj(self, sv, sv->offset, 1);
1474
+ }
1475
+
1476
+ rb_encoding *enc = sv_enc(sv);
1477
+ const char *p = sv_ptr(sv);
1478
+ const char *e = p + sv->length;
1479
+ int clen = rb_enc_fast_mbclen(p, e, enc);
1480
+
1481
+ return sv_new_from_parent_obj(self, sv, sv->offset, clen);
1482
+ }
1483
+
1484
+ /*
1485
+ * ord → Integer
1486
+ * Returns the codepoint of the first character.
1487
+ */
1488
+ static VALUE sv_ord(VALUE self) {
1489
+ string_view_t *sv = sv_get_struct(self);
1490
+
1491
+ if (sv->length == 0) {
1492
+ rb_raise(rb_eArgError, "empty string");
1493
+ }
1494
+
1495
+ rb_encoding *enc = sv_enc(sv);
1496
+ const char *p = sv_ptr(sv);
1497
+ const char *e = p + sv->length;
1498
+ unsigned int c = rb_enc_codepoint_len(p, e, NULL, enc);
1499
+ return UINT2NUM(c);
1500
+ }
1501
+
1502
+ /*
1503
+ * valid_encoding? → true/false
1504
+ * Returns whether the view's bytes are valid in its encoding.
1505
+ */
1506
+ static VALUE sv_valid_encoding_p(VALUE self) {
1507
+ string_view_t *sv = sv_get_struct(self);
1508
+ rb_encoding *enc = sv_enc(sv);
1509
+ const char *p = sv_ptr(sv);
1510
+ const char *e = p + sv->length;
1511
+
1512
+ while (p < e) {
1513
+ int len = rb_enc_precise_mbclen(p, e, enc);
1514
+ if (!MBCLEN_CHARFOUND_P(len)) return Qfalse;
1515
+ p += MBCLEN_CHARFOUND_LEN(len);
1516
+ }
1517
+ return Qtrue;
1518
+ }
1519
+
1520
+ /*
1521
+ * b → StringView
1522
+ * Returns a new StringView that references the same bytes but with
1523
+ * ASCII-8BIT encoding. Since we share the same backing bytes, this is
1524
+ * only valid when the backing is also binary-compatible, which it always
1525
+ * is — we just reinterpret the bytes.
1526
+ *
1527
+ * Note: We need to create a new backing with binary encoding since
1528
+ * the encoding is tied to the backing string.
1529
+ * Actually, the encoding is cached in sv->enc, so we can create a
1530
+ * lightweight view with different encoding. But the backing string
1531
+ * has its own encoding... For true zero-alloc we store enc separately.
1532
+ */
1533
+
1055
1534
  /* ========================================================================= */
1056
1535
  /* Tier 3: Transform delegation */
1057
1536
  /* ========================================================================= */
1058
1537
 
1059
- #define SV_DELEGATE_FUNCALL(cname, rbname) \
1538
+ #define SV_DELEGATE_FUNCALL(cname, cached_id) \
1060
1539
  static VALUE sv_##cname(int argc, VALUE *argv, VALUE self) { \
1061
1540
  string_view_t *sv = sv_get_struct(self); \
1062
1541
  VALUE shared = sv_as_shared_str(sv); \
1063
1542
  if (rb_block_given_p()) { \
1064
- return rb_funcall_with_block(shared, rb_intern(rbname), \
1543
+ return rb_funcall_with_block(shared, cached_id, \
1065
1544
  argc, argv, rb_block_proc()); \
1066
1545
  } \
1067
- return rb_funcallv(shared, rb_intern(rbname), argc, argv); \
1546
+ return rb_funcallv(shared, cached_id, argc, argv); \
1068
1547
  }
1069
1548
 
1070
- SV_DELEGATE_FUNCALL(upcase, "upcase")
1071
- SV_DELEGATE_FUNCALL(downcase, "downcase")
1072
- SV_DELEGATE_FUNCALL(capitalize,"capitalize")
1073
- SV_DELEGATE_FUNCALL(swapcase, "swapcase")
1074
- SV_DELEGATE_FUNCALL(strip, "strip")
1075
- SV_DELEGATE_FUNCALL(lstrip, "lstrip")
1076
- SV_DELEGATE_FUNCALL(rstrip, "rstrip")
1077
- SV_DELEGATE_FUNCALL(chomp, "chomp")
1078
- SV_DELEGATE_FUNCALL(chop, "chop")
1079
- SV_DELEGATE_FUNCALL(reverse, "reverse")
1080
- SV_DELEGATE_FUNCALL(squeeze, "squeeze")
1081
- SV_DELEGATE_FUNCALL(encode, "encode")
1082
- SV_DELEGATE_FUNCALL(gsub, "gsub")
1083
- SV_DELEGATE_FUNCALL(sub, "sub")
1084
- SV_DELEGATE_FUNCALL(tr, "tr")
1085
- SV_DELEGATE_FUNCALL(tr_s, "tr_s")
1086
- SV_DELEGATE_FUNCALL(sv_delete, "delete")
1087
- SV_DELEGATE_FUNCALL(count, "count")
1088
- SV_DELEGATE_FUNCALL(scan, "scan")
1089
- SV_DELEGATE_FUNCALL(split, "split")
1090
- SV_DELEGATE_FUNCALL(center, "center")
1091
- SV_DELEGATE_FUNCALL(ljust, "ljust")
1092
- SV_DELEGATE_FUNCALL(rjust, "rjust")
1093
- SV_DELEGATE_FUNCALL(format_op, "%")
1094
- SV_DELEGATE_FUNCALL(plus, "+")
1095
- SV_DELEGATE_FUNCALL(multiply, "*")
1096
- SV_DELEGATE_FUNCALL(unpack1, "unpack1")
1097
- SV_DELEGATE_FUNCALL(scrub, "scrub")
1098
- SV_DELEGATE_FUNCALL(unicode_normalize, "unicode_normalize")
1549
+ SV_DELEGATE_FUNCALL(upcase, id_upcase)
1550
+ SV_DELEGATE_FUNCALL(downcase, id_downcase)
1551
+ SV_DELEGATE_FUNCALL(capitalize,id_capitalize)
1552
+ SV_DELEGATE_FUNCALL(swapcase, id_swapcase)
1553
+ SV_DELEGATE_FUNCALL(reverse, id_reverse)
1554
+ SV_DELEGATE_FUNCALL(squeeze, id_squeeze)
1555
+ SV_DELEGATE_FUNCALL(encode, id_encode)
1556
+ SV_DELEGATE_FUNCALL(gsub, id_gsub)
1557
+ SV_DELEGATE_FUNCALL(sub, id_sub)
1558
+ SV_DELEGATE_FUNCALL(tr, id_tr)
1559
+ SV_DELEGATE_FUNCALL(tr_s, id_tr_s)
1560
+ SV_DELEGATE_FUNCALL(delete_str,id_delete)
1561
+ /*
1562
+ * count(set, ...) → Integer
1563
+ * Delegates to String#count via shared string.
1564
+ * (Character set parsing is complex — reuse Ruby's implementation.)
1565
+ */
1566
+ SV_DELEGATE_FUNCALL(count, id_count)
1567
+ SV_DELEGATE_FUNCALL(scan, id_scan)
1568
+ SV_DELEGATE_FUNCALL(split, id_split)
1569
+ SV_DELEGATE_FUNCALL(center, id_center)
1570
+ SV_DELEGATE_FUNCALL(ljust, id_ljust)
1571
+ SV_DELEGATE_FUNCALL(rjust, id_rjust)
1572
+ SV_DELEGATE_FUNCALL(format_op, id_format_op)
1573
+ SV_DELEGATE_FUNCALL(plus, id_plus)
1574
+ SV_DELEGATE_FUNCALL(multiply, id_multiply)
1575
+ SV_DELEGATE_FUNCALL(unpack1, id_unpack1)
1576
+ SV_DELEGATE_FUNCALL(scrub, id_scrub)
1577
+ SV_DELEGATE_FUNCALL(unicode_normalize, id_unicode_normalize)
1099
1578
 
1100
1579
  /* ========================================================================= */
1101
1580
  /* Bang methods — always raise FrozenError */
1102
1581
  /* ========================================================================= */
1103
1582
 
1104
1583
  static VALUE sv_frozen_error(int argc, VALUE *argv, VALUE self) {
1105
- VALUE str = sv_to_s(self);
1106
- rb_raise(rb_eFrozenError, "can't modify frozen StringView: \"%s\"",
1107
- StringValueCStr(str));
1584
+ (void)argc; (void)argv;
1585
+ rb_raise(rb_eFrozenError, "can't modify frozen StringView");
1108
1586
  return Qnil;
1109
1587
  }
1110
1588
 
@@ -1115,6 +1593,46 @@ static VALUE sv_frozen_error(int argc, VALUE *argv, VALUE self) {
1115
1593
  void Init_string_view(void) {
1116
1594
  enc_utf8 = rb_utf8_encoding();
1117
1595
 
1596
+ /* Cache method IDs — avoids rb_intern hash lookup on every call */
1597
+ id_index = rb_intern("index");
1598
+ id_rindex = rb_intern("rindex");
1599
+ id_byteindex = rb_intern("byteindex");
1600
+ id_byterindex = rb_intern("byterindex");
1601
+ id_match = rb_intern("match");
1602
+ id_match_p = rb_intern("match?");
1603
+ id_match_op = rb_intern("=~");
1604
+ id_begin = rb_intern("begin");
1605
+ id_aref = rb_intern("[]");
1606
+ id_upcase = rb_intern("upcase");
1607
+ id_downcase = rb_intern("downcase");
1608
+ id_capitalize = rb_intern("capitalize");
1609
+ id_swapcase = rb_intern("swapcase");
1610
+ id_strip = rb_intern("strip");
1611
+ id_lstrip = rb_intern("lstrip");
1612
+ id_rstrip = rb_intern("rstrip");
1613
+ id_chomp = rb_intern("chomp");
1614
+ id_chop = rb_intern("chop");
1615
+ id_reverse = rb_intern("reverse");
1616
+ id_squeeze = rb_intern("squeeze");
1617
+ id_encode = rb_intern("encode");
1618
+ id_gsub = rb_intern("gsub");
1619
+ id_sub = rb_intern("sub");
1620
+ id_tr = rb_intern("tr");
1621
+ id_tr_s = rb_intern("tr_s");
1622
+ id_delete = rb_intern("delete");
1623
+ id_count = rb_intern("count");
1624
+ id_scan = rb_intern("scan");
1625
+ id_split = rb_intern("split");
1626
+ id_center = rb_intern("center");
1627
+ id_ljust = rb_intern("ljust");
1628
+ id_rjust = rb_intern("rjust");
1629
+ id_format_op = rb_intern("%");
1630
+ id_plus = rb_intern("+");
1631
+ id_multiply = rb_intern("*");
1632
+ id_unpack1 = rb_intern("unpack1");
1633
+ id_scrub = rb_intern("scrub");
1634
+ id_unicode_normalize = rb_intern("unicode_normalize");
1635
+
1118
1636
  cStringView = rb_define_class("StringView", rb_cObject);
1119
1637
  rb_include_module(cStringView, rb_mComparable);
1120
1638
 
@@ -1122,11 +1640,10 @@ void Init_string_view(void) {
1122
1640
  rb_define_method(cStringView, "initialize", sv_initialize, -1);
1123
1641
 
1124
1642
  rb_define_method(cStringView, "to_s", sv_to_s, 0);
1643
+ rb_define_method(cStringView, "materialize", sv_to_s, 0);
1125
1644
  rb_define_private_method(cStringView, "to_str", sv_to_str, 0);
1126
1645
  rb_define_method(cStringView, "inspect", sv_inspect, 0);
1127
- rb_define_method(cStringView, "frozen?", sv_frozen_p, 0);
1128
1646
  rb_define_method(cStringView, "reset!", sv_reset, 3);
1129
- rb_define_alias(cStringView, "materialize", "to_s");
1130
1647
 
1131
1648
  rb_define_method(cStringView, "bytesize", sv_bytesize, 0);
1132
1649
  rb_define_method(cStringView, "length", sv_length, 0);
@@ -1176,6 +1693,11 @@ void Init_string_view(void) {
1176
1693
  rb_define_method(cStringView, "rstrip", sv_rstrip, -1);
1177
1694
  rb_define_method(cStringView, "chomp", sv_chomp, -1);
1178
1695
  rb_define_method(cStringView, "chop", sv_chop, -1);
1696
+ rb_define_method(cStringView, "delete_prefix", sv_delete_prefix, 1);
1697
+ rb_define_method(cStringView, "delete_suffix", sv_delete_suffix, 1);
1698
+ rb_define_method(cStringView, "chr", sv_chr, 0);
1699
+ rb_define_method(cStringView, "ord", sv_ord, 0);
1700
+ rb_define_method(cStringView, "valid_encoding?", sv_valid_encoding_p, 0);
1179
1701
  rb_define_method(cStringView, "reverse", sv_reverse, -1);
1180
1702
  rb_define_method(cStringView, "squeeze", sv_squeeze, -1);
1181
1703
  rb_define_method(cStringView, "encode", sv_encode, -1);
@@ -1183,7 +1705,7 @@ void Init_string_view(void) {
1183
1705
  rb_define_method(cStringView, "sub", sv_sub, -1);
1184
1706
  rb_define_method(cStringView, "tr", sv_tr, -1);
1185
1707
  rb_define_method(cStringView, "tr_s", sv_tr_s, -1);
1186
- rb_define_method(cStringView, "delete", sv_sv_delete, -1);
1708
+ rb_define_method(cStringView, "delete", sv_delete_str, -1);
1187
1709
  rb_define_method(cStringView, "count", sv_count, -1);
1188
1710
  rb_define_method(cStringView, "scan", sv_scan, -1);
1189
1711
  rb_define_method(cStringView, "split", sv_split, -1);
@@ -1214,4 +1736,10 @@ void Init_string_view(void) {
1214
1736
  rb_define_method(cStringView, "gsub!", sv_frozen_error, -1);
1215
1737
  rb_define_method(cStringView, "sub!", sv_frozen_error, -1);
1216
1738
  rb_define_method(cStringView, "slice!", sv_frozen_error, -1);
1739
+ rb_define_method(cStringView, "delete_prefix!", sv_frozen_error, -1);
1740
+ rb_define_method(cStringView, "delete_suffix!", sv_frozen_error, -1);
1741
+
1742
+ Init_string_view_strict();
1743
+ Init_string_view_pool();
1744
+ Init_string_view_core_ext();
1217
1745
  }