string_view 0.1.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,45 +1,25 @@
1
- #include "ruby.h"
2
- #include "ruby/encoding.h"
3
- #include "ruby/re.h"
4
- #include "simdutf_c.h"
5
-
6
- #define SV_LIKELY(x) __builtin_expect(!!(x), 1)
7
- #define SV_UNLIKELY(x) __builtin_expect(!!(x), 0)
8
-
9
- #ifdef __GNUC__
10
- #define SV_INLINE static inline __attribute__((always_inline))
11
- #else
12
- #define SV_INLINE static inline
13
- #endif
1
+ #include "string_view.h"
14
2
 
15
3
  /* ========================================================================= */
16
- /* Struct & TypedData */
4
+ /* Globals */
17
5
  /* ========================================================================= */
18
6
 
19
- /*
20
- * Stride index: maps every STRIDE_CHARS-th character to its byte offset.
21
- * Built lazily on first char-indexed access. Enables O(1) char→byte
22
- * lookup for any offset (small scalar scan within one stride).
23
- */
24
- #define STRIDE_CHARS 128
25
-
26
- typedef struct {
27
- long *offsets; /* offsets[i] = byte offset of character i*STRIDE_CHARS */
28
- long count; /* number of entries = ceil(charlen / STRIDE_CHARS) + 1 */
29
- } stride_index_t;
30
-
31
- typedef struct {
32
- VALUE backing; /* frozen String that owns the bytes */
33
- const char *base; /* cached RSTRING_PTR(backing) — avoids indirection */
34
- rb_encoding *enc; /* cached encoding — avoids rb_enc_get per call */
35
- long offset; /* byte offset into backing */
36
- long length; /* byte length of this view */
37
- long charlen; /* cached character count; -1 = not yet computed */
38
- int single_byte; /* cached: 1 if char==byte (ASCII/single-byte enc), 0 if multibyte, -1 unknown */
39
- stride_index_t *stride_idx; /* lazily built stride index for multibyte, NULL if not built */
40
- } string_view_t;
41
-
42
- static VALUE cStringView;
7
+ VALUE cStringView;
8
+ VALUE cStringViewStrict;
9
+ VALUE eWouldAllocate;
10
+
11
+ /* Cached method IDs — initialized once in Init_string_view */
12
+ static ID id_index, id_rindex, id_byteindex, id_byterindex;
13
+ static ID id_match, id_match_p, id_match_op;
14
+ static ID id_begin, id_aref;
15
+ static ID id_upcase, id_downcase, id_capitalize, id_swapcase;
16
+ static ID id_strip, id_lstrip, id_rstrip;
17
+ static ID id_chomp, id_chop, id_reverse, id_squeeze;
18
+ static ID id_encode, id_gsub, id_sub, id_tr, id_tr_s;
19
+ static ID id_delete, id_count, id_scan, id_split;
20
+ static ID id_center, id_ljust, id_rjust;
21
+ static ID id_format_op, id_plus, id_multiply;
22
+ static ID id_unpack1, id_scrub, id_unicode_normalize;
43
23
 
44
24
  /*
45
25
  * GC callbacks.
@@ -70,10 +50,7 @@ static void sv_compact(void *ptr) {
70
50
 
71
51
  static void sv_free(void *ptr) {
72
52
  string_view_t *sv = (string_view_t *)ptr;
73
- if (sv->stride_idx) {
74
- xfree(sv->stride_idx->offsets);
75
- xfree(sv->stride_idx);
76
- }
53
+ sv_clear_stride_index(sv);
77
54
  }
78
55
 
79
56
  static size_t sv_memsize(const void *ptr) {
@@ -85,14 +62,20 @@ static size_t sv_memsize(const void *ptr) {
85
62
  return size;
86
63
  }
87
64
 
88
- static const rb_data_type_t string_view_type = {
65
+ const rb_data_type_t string_view_type = {
89
66
  .wrap_struct_name = "StringView",
90
67
  .function = { .dmark = sv_mark, .dfree = sv_free, .dsize = sv_memsize, .dcompact = sv_compact },
91
- .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
68
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
92
69
  };
93
70
 
94
- /* Forward declarations */
95
- static int sv_compute_single_byte(VALUE backing, rb_encoding *enc);
71
+ /* Forward declarations for functions defined later in this file */
72
+ static long sv_char_count(string_view_t *sv);
73
+ static long sv_char_to_byte_offset(string_view_t *sv, long char_idx);
74
+ static long sv_char_count_partial(string_view_t *sv, const char *p, long len);
75
+ SV_INLINE int sv_single_byte_optimizable(string_view_t *sv);
76
+ SV_INLINE int sv_is_7bit(string_view_t *sv);
77
+ SV_INLINE int sv_is_utf8(string_view_t *sv);
78
+ static long sv_utf8_char_count(const char *p, long len);
96
79
 
97
80
  /* ========================================================================= */
98
81
  /* Internal helpers */
@@ -122,10 +105,12 @@ static VALUE sv_as_shared_str(string_view_t *sv) {
122
105
  return shared;
123
106
  }
124
107
 
125
- /* Allocate a new StringView from a parent that already has cached base/enc */
126
- SV_INLINE VALUE sv_new_from_parent(string_view_t *parent, long offset, long length) {
108
+ /* Allocate a new StringView from a parent that already has cached base/enc.
109
+ * Preserves the class of parent_obj (StringView or StringView::Strict). */
110
+ SV_INLINE VALUE sv_new_from_parent_obj(VALUE parent_obj, string_view_t *parent, long offset, long length) {
127
111
  string_view_t *sv;
128
- VALUE obj = TypedData_Make_Struct(cStringView, string_view_t,
112
+ VALUE klass = rb_obj_class(parent_obj);
113
+ VALUE obj = TypedData_Make_Struct(klass, string_view_t,
129
114
  &string_view_type, sv);
130
115
  RB_OBJ_WRITE(obj, &sv->backing, parent->backing);
131
116
  sv->base = parent->base;
@@ -133,9 +118,11 @@ SV_INLINE VALUE sv_new_from_parent(string_view_t *parent, long offset, long leng
133
118
  sv->offset = offset;
134
119
  sv->length = length;
135
120
  sv->single_byte = parent->single_byte;
121
+ sv->valid_encoding = sv->single_byte == 1 ? 1 : -1;
122
+ sv->pooled = 0;
136
123
  sv->charlen = -1;
137
124
  sv->stride_idx = NULL;
138
- FL_SET_RAW(obj, FL_FREEZE);
125
+ /* Not frozen — see sv_initialize comment for rationale */
139
126
  return obj;
140
127
  }
141
128
 
@@ -154,6 +141,8 @@ static VALUE sv_alloc(VALUE klass) {
154
141
  sv->offset = 0;
155
142
  sv->length = 0;
156
143
  sv->single_byte = -1;
144
+ sv->valid_encoding = -1;
145
+ sv->pooled = 0;
157
146
  sv->charlen = -1;
158
147
  sv->stride_idx = NULL;
159
148
  return obj;
@@ -169,13 +158,7 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
169
158
 
170
159
  rb_scan_args(argc, argv, "12", &str, &voffset, &vlength);
171
160
 
172
- if (!RB_TYPE_P(str, T_STRING)) {
173
- rb_raise(rb_eTypeError,
174
- "no implicit conversion of %s into String",
175
- rb_obj_classname(str));
176
- }
177
-
178
- rb_str_freeze(str);
161
+ sv_check_frozen_string(str);
179
162
 
180
163
  long backing_len = RSTRING_LEN(str);
181
164
 
@@ -185,26 +168,20 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
185
168
  } else {
186
169
  offset = NUM2LONG(voffset);
187
170
  length = NUM2LONG(vlength);
188
-
189
- if (offset < 0 || length < 0 || offset + length > backing_len) {
190
- rb_raise(rb_eArgError,
191
- "offset %ld, length %ld out of range for string of bytesize %ld",
192
- offset, length, backing_len);
193
- }
171
+ sv_check_bounds(offset, length, backing_len);
194
172
  }
195
173
 
196
174
  string_view_t *sv = sv_get_struct(self);
197
- rb_encoding *enc = rb_enc_get(str);
198
- RB_OBJ_WRITE(self, &sv->backing, str);
199
- sv->base = RSTRING_PTR(str);
200
- sv->enc = enc;
201
- sv->offset = offset;
202
- sv->length = length;
203
- sv->single_byte = sv_compute_single_byte(str, enc);
204
- sv->charlen = -1;
205
- sv->stride_idx = NULL;
175
+ sv_init_fields(self, sv, str, RSTRING_PTR(str), rb_enc_get(str),
176
+ offset, length);
206
177
 
207
- rb_obj_freeze(self);
178
+ /*
179
+ * We intentionally do NOT freeze self. StringView blocks content
180
+ * mutation via the immutable frozen backing and explicit FrozenError
181
+ * on bang methods. Not freezing allows reset! to work without
182
+ * violating Ruby's frozen? contract — libraries and Ruby itself
183
+ * use frozen? to assume immutability for hash keys and Ractor sharing.
184
+ */
208
185
 
209
186
  return self;
210
187
  }
@@ -236,43 +213,29 @@ static VALUE sv_inspect(VALUE self) {
236
213
  (void *)self, content, sv->offset, sv->length);
237
214
  }
238
215
 
239
- static VALUE sv_frozen_p(VALUE self) {
240
- return Qtrue;
241
- }
242
-
243
216
  /*
244
217
  * reset!(new_backing, byte_offset, byte_length) -> self
245
218
  */
246
219
  static VALUE sv_reset(VALUE self, VALUE new_backing, VALUE voffset, VALUE vlength) {
220
+ rb_check_frozen(self);
247
221
  string_view_t *sv = sv_get_struct(self);
248
222
 
249
- if (!RB_TYPE_P(new_backing, T_STRING)) {
250
- rb_raise(rb_eTypeError,
251
- "no implicit conversion of %s into String",
252
- rb_obj_classname(new_backing));
223
+ if (SV_UNLIKELY(sv->pooled)) {
224
+ rb_raise(rb_eRuntimeError,
225
+ "can't reset a pooled StringView directly; call StringView::Pool#reset! instead");
253
226
  }
254
227
 
255
- rb_str_freeze(new_backing);
228
+ sv_check_frozen_string(new_backing);
256
229
 
257
230
  long off = NUM2LONG(voffset);
258
231
  long len = NUM2LONG(vlength);
259
- long backing_len = RSTRING_LEN(new_backing);
232
+ sv_check_bounds(off, len, RSTRING_LEN(new_backing));
260
233
 
261
- if (off < 0 || len < 0 || off + len > backing_len) {
262
- rb_raise(rb_eArgError,
263
- "offset %ld, length %ld out of range for string of bytesize %ld",
264
- off, len, backing_len);
265
- }
234
+ /* Free old stride index before reinitializing */
235
+ sv_clear_stride_index(sv);
266
236
 
267
- rb_encoding *enc = rb_enc_get(new_backing);
268
- RB_OBJ_WRITE(self, &sv->backing, new_backing);
269
- sv->base = RSTRING_PTR(new_backing);
270
- sv->enc = enc;
271
- sv->offset = off;
272
- sv->length = len;
273
- sv->single_byte = sv_compute_single_byte(new_backing, enc);
274
- sv->charlen = -1;
275
- sv->stride_idx = NULL;
237
+ sv_init_fields(self, sv, new_backing, RSTRING_PTR(new_backing),
238
+ rb_enc_get(new_backing), off, len);
276
239
 
277
240
  return self;
278
241
  }
@@ -306,6 +269,8 @@ static VALUE sv_encoding(VALUE self) {
306
269
 
307
270
  static VALUE sv_ascii_only_p(VALUE self) {
308
271
  string_view_t *sv = sv_get_struct(self);
272
+ if (sv_single_byte_optimizable(sv)) return Qtrue;
273
+ /* single_byte resolved to 0 (multibyte) — scan to confirm non-ASCII bytes */
309
274
  const char *p = sv_ptr(sv);
310
275
  long i;
311
276
  for (i = 0; i < sv->length; i++) {
@@ -314,6 +279,94 @@ static VALUE sv_ascii_only_p(VALUE self) {
314
279
  return Qtrue;
315
280
  }
316
281
 
282
+ SV_INLINE long sv_precise_char_len(const char *p, const char *e, rb_encoding *enc) {
283
+ int len = rb_enc_precise_mbclen(p, e, enc);
284
+ if (MBCLEN_CHARFOUND_P(len)) return MBCLEN_CHARFOUND_LEN(len);
285
+ return 1;
286
+ }
287
+
288
+ static int sv_compute_valid_encoding_slice(string_view_t *sv) {
289
+ if (sv_single_byte_optimizable(sv)) return 1;
290
+
291
+ if (SV_LIKELY(sv_is_utf8(sv))) {
292
+ return simdutf_validate_utf8(sv_ptr(sv), (size_t)sv->length) ? 1 : 0;
293
+ }
294
+
295
+ rb_encoding *enc = sv_enc(sv);
296
+ const char *p = sv_ptr(sv);
297
+ const char *e = p + sv->length;
298
+
299
+ while (p < e) {
300
+ int len = rb_enc_precise_mbclen(p, e, enc);
301
+ if (!MBCLEN_CHARFOUND_P(len)) return 0;
302
+ p += MBCLEN_CHARFOUND_LEN(len);
303
+ }
304
+
305
+ return 1;
306
+ }
307
+
308
+ SV_INLINE int sv_valid_encoding_cached(string_view_t *sv) {
309
+ if (SV_LIKELY(sv->valid_encoding >= 0)) return sv->valid_encoding;
310
+ sv->valid_encoding = sv_compute_valid_encoding_slice(sv);
311
+ return sv->valid_encoding;
312
+ }
313
+
314
+ static long sv_tolerant_char_count(const char *p, const char *e, rb_encoding *enc) {
315
+ long count = 0;
316
+
317
+ while (p < e) {
318
+ p += sv_precise_char_len(p, e, enc);
319
+ count++;
320
+ }
321
+
322
+ return count;
323
+ }
324
+
325
+ static long sv_tolerant_char_to_byte_offset(string_view_t *sv, long char_idx) {
326
+ rb_encoding *enc = sv_enc(sv);
327
+ const char *p = sv_ptr(sv);
328
+ const char *e = p + sv->length;
329
+ const char *start = p;
330
+ long i = 0;
331
+
332
+ while (i < char_idx && p < e) {
333
+ p += sv_precise_char_len(p, e, enc);
334
+ i++;
335
+ }
336
+
337
+ if (i < char_idx) return -1;
338
+ return p - start;
339
+ }
340
+
341
+ static long sv_tolerant_chars_to_bytes(string_view_t *sv, long byte_off, long n) {
342
+ rb_encoding *enc = sv_enc(sv);
343
+ const char *start = sv_ptr(sv) + byte_off;
344
+ const char *p = start;
345
+ const char *e = sv_ptr(sv) + sv->length;
346
+ long i = 0;
347
+
348
+ while (i < n && p < e) {
349
+ p += sv_precise_char_len(p, e, enc);
350
+ i++;
351
+ }
352
+
353
+ return p - start;
354
+ }
355
+
356
+ SV_INLINE void sv_check_compatible_string(string_view_t *sv, VALUE other) {
357
+ rb_encoding *oenc = rb_enc_get(other);
358
+
359
+ if (sv->enc == oenc) return;
360
+ if (rb_enc_asciicompat(sv->enc) && rb_enc_asciicompat(oenc) &&
361
+ (sv_is_7bit(sv) || rb_enc_str_asciionly_p(other))) {
362
+ return;
363
+ }
364
+
365
+ rb_raise(rb_eEncCompatError,
366
+ "incompatible character encodings: %s and %s",
367
+ rb_enc_name(sv->enc), rb_enc_name(oenc));
368
+ }
369
+
317
370
  /* ========================================================================= */
318
371
  /* Tier 1: Searching */
319
372
  /* ========================================================================= */
@@ -324,6 +377,7 @@ static VALUE sv_include_p(VALUE self, VALUE substr) {
324
377
  const char *p = sv_ptr(sv);
325
378
  long slen = RSTRING_LEN(substr);
326
379
  if (slen == 0) return Qtrue;
380
+ sv_check_compatible_string(sv, substr);
327
381
  if (slen > sv->length) return Qfalse;
328
382
 
329
383
  long pos = rb_memsearch(RSTRING_PTR(substr), slen, p, sv->length, sv_enc(sv));
@@ -339,6 +393,8 @@ static VALUE sv_start_with_p(int argc, VALUE *argv, VALUE self) {
339
393
  VALUE prefix = argv[i];
340
394
  StringValue(prefix);
341
395
  long plen = RSTRING_LEN(prefix);
396
+ if (plen == 0) return Qtrue;
397
+ sv_check_compatible_string(sv, prefix);
342
398
  if (plen > sv->length) continue;
343
399
  if (memcmp(p, RSTRING_PTR(prefix), plen) == 0) return Qtrue;
344
400
  }
@@ -354,6 +410,8 @@ static VALUE sv_end_with_p(int argc, VALUE *argv, VALUE self) {
354
410
  VALUE suffix = argv[i];
355
411
  StringValue(suffix);
356
412
  long slen = RSTRING_LEN(suffix);
413
+ if (slen == 0) return Qtrue;
414
+ sv_check_compatible_string(sv, suffix);
357
415
  if (slen > sv->length) continue;
358
416
  if (memcmp(p + sv->length - slen, RSTRING_PTR(suffix), slen) == 0)
359
417
  return Qtrue;
@@ -361,16 +419,127 @@ static VALUE sv_end_with_p(int argc, VALUE *argv, VALUE self) {
361
419
  return Qfalse;
362
420
  }
363
421
 
364
- static VALUE sv_index(int argc, VALUE *argv, VALUE self) {
422
+ /*
423
+ * index(substring[, offset]) → Integer or nil
424
+ *
425
+ * For String arguments: native zero-alloc implementation using rb_memsearch.
426
+ * For Regexp arguments: delegates to String#index via shared string.
427
+ */
428
+ VALUE sv_index(int argc, VALUE *argv, VALUE self) {
365
429
  string_view_t *sv = sv_get_struct(self);
366
- VALUE shared = sv_as_shared_str(sv);
367
- return rb_funcallv(shared, rb_intern("index"), argc, argv);
430
+ VALUE pattern, voffset;
431
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
432
+
433
+ /* Regexp path: delegate via shared string */
434
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
435
+ VALUE shared = sv_as_shared_str(sv);
436
+ return rb_funcallv(shared, id_index, argc, argv);
437
+ }
438
+
439
+ StringValue(pattern);
440
+ const char *p = sv_ptr(sv);
441
+ long plen = RSTRING_LEN(pattern);
442
+
443
+ /* Determine starting char offset */
444
+ long char_off = NIL_P(voffset) ? 0 : NUM2LONG(voffset);
445
+ long total_chars = sv_char_count(sv);
446
+
447
+ if (char_off < 0) char_off += total_chars;
448
+ if (char_off < 0 || char_off > total_chars) return Qnil;
449
+ if (plen == 0) return LONG2NUM(char_off);
450
+ sv_check_compatible_string(sv, pattern);
451
+
452
+ /* Convert char offset to byte offset */
453
+ long byte_off = sv_char_to_byte_offset(sv, char_off);
454
+ if (byte_off < 0) return Qnil;
455
+
456
+ if (plen > sv->length - byte_off) return Qnil;
457
+
458
+ long pos = rb_memsearch(RSTRING_PTR(pattern), plen,
459
+ p + byte_off, sv->length - byte_off,
460
+ sv_enc(sv));
461
+ if (pos < 0 || pos > sv->length - byte_off - plen) return Qnil;
462
+
463
+ /* Convert byte position back to character position */
464
+ if (sv_single_byte_optimizable(sv)) {
465
+ return LONG2NUM(char_off + pos);
466
+ }
467
+
468
+ return LONG2NUM(char_off + sv_char_count_partial(sv, p + byte_off, pos));
368
469
  }
369
470
 
370
- static VALUE sv_rindex(int argc, VALUE *argv, VALUE self) {
471
+ /*
472
+ * rindex(substring[, offset]) → Integer or nil
473
+ *
474
+ * For String arguments: native zero-alloc reverse search.
475
+ * For Regexp arguments: delegates to String#rindex via shared string.
476
+ */
477
+ VALUE sv_rindex(int argc, VALUE *argv, VALUE self) {
371
478
  string_view_t *sv = sv_get_struct(self);
372
- VALUE shared = sv_as_shared_str(sv);
373
- return rb_funcallv(shared, rb_intern("rindex"), argc, argv);
479
+ VALUE pattern, voffset;
480
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
481
+
482
+ /* Regexp path: delegate */
483
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
484
+ VALUE shared = sv_as_shared_str(sv);
485
+ return rb_funcallv(shared, id_rindex, argc, argv);
486
+ }
487
+
488
+ StringValue(pattern);
489
+ const char *p = sv_ptr(sv);
490
+ long plen = RSTRING_LEN(pattern);
491
+ long total_chars = sv_char_count(sv);
492
+
493
+ /* Determine the maximum char position to search from */
494
+ long max_char;
495
+ if (NIL_P(voffset)) {
496
+ max_char = total_chars;
497
+ } else {
498
+ max_char = NUM2LONG(voffset);
499
+ if (max_char < 0) max_char += total_chars;
500
+ if (max_char < 0) return Qnil;
501
+ if (max_char > total_chars) max_char = total_chars;
502
+ }
503
+
504
+ if (plen == 0) {
505
+ return LONG2NUM(max_char > total_chars ? total_chars : max_char);
506
+ }
507
+ sv_check_compatible_string(sv, pattern);
508
+ if (plen > sv->length) return Qnil;
509
+
510
+ /* Convert max_char to a byte limit */
511
+ long max_byte = sv_char_to_byte_offset(sv, max_char);
512
+ if (max_byte < 0) max_byte = sv->length;
513
+
514
+ /* Ensure we don't search past the point where the pattern can't fit */
515
+ long search_end = max_byte;
516
+ if (search_end + plen > sv->length) {
517
+ search_end = sv->length - plen;
518
+ }
519
+
520
+ /* Reverse byte search */
521
+ const char *needle = RSTRING_PTR(pattern);
522
+ const char *s;
523
+ for (s = p + search_end; s >= p; ) {
524
+ if (memcmp(s, needle, plen) == 0) {
525
+ long byte_pos = s - p;
526
+ /* Convert byte position to char position */
527
+ if (sv_single_byte_optimizable(sv)) {
528
+ return LONG2NUM(byte_pos);
529
+ }
530
+ return LONG2NUM(sv_char_count_partial(sv, p, byte_pos));
531
+ }
532
+ /* Move back one character */
533
+ if (s == p) break;
534
+ if (sv_single_byte_optimizable(sv)) {
535
+ s--;
536
+ } else {
537
+ rb_encoding *enc = sv_enc(sv);
538
+ s = rb_enc_prev_char(p, s, p + sv->length, enc);
539
+ if (s == NULL) break;
540
+ }
541
+ }
542
+ return Qnil;
374
543
  }
375
544
 
376
545
  static VALUE sv_getbyte(VALUE self, VALUE vidx) {
@@ -381,16 +550,85 @@ static VALUE sv_getbyte(VALUE self, VALUE vidx) {
381
550
  return INT2FIX((unsigned char)sv_ptr(sv)[idx]);
382
551
  }
383
552
 
384
- static VALUE sv_byteindex(int argc, VALUE *argv, VALUE self) {
553
+ /*
554
+ * byteindex(substring[, offset]) → Integer or nil
555
+ *
556
+ * For String arguments: native zero-alloc byte-level search.
557
+ * For Regexp arguments: delegates to String#byteindex via shared string.
558
+ */
559
+ VALUE sv_byteindex(int argc, VALUE *argv, VALUE self) {
385
560
  string_view_t *sv = sv_get_struct(self);
386
- VALUE shared = sv_as_shared_str(sv);
387
- return rb_funcallv(shared, rb_intern("byteindex"), argc, argv);
561
+ VALUE pattern, voffset;
562
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
563
+
564
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
565
+ VALUE shared = sv_as_shared_str(sv);
566
+ return rb_funcallv(shared, id_byteindex, argc, argv);
567
+ }
568
+
569
+ StringValue(pattern);
570
+ const char *p = sv_ptr(sv);
571
+ long plen = RSTRING_LEN(pattern);
572
+ long byte_off = NIL_P(voffset) ? 0 : NUM2LONG(voffset);
573
+
574
+ if (byte_off < 0) byte_off += sv->length;
575
+ if (byte_off < 0 || byte_off > sv->length) return Qnil;
576
+ if (plen == 0) return LONG2NUM(byte_off);
577
+ sv_check_compatible_string(sv, pattern);
578
+ if (plen > sv->length - byte_off) return Qnil;
579
+
580
+ long pos = rb_memsearch(RSTRING_PTR(pattern), plen,
581
+ p + byte_off, sv->length - byte_off,
582
+ sv_enc(sv));
583
+ if (pos < 0 || pos > sv->length - byte_off - plen) return Qnil;
584
+ return LONG2NUM(byte_off + pos);
388
585
  }
389
586
 
390
- static VALUE sv_byterindex(int argc, VALUE *argv, VALUE self) {
587
+ /*
588
+ * byterindex(substring[, offset]) → Integer or nil
589
+ *
590
+ * For String arguments: native zero-alloc reverse byte-level search.
591
+ * For Regexp arguments: delegates to String#byterindex via shared string.
592
+ */
593
+ VALUE sv_byterindex(int argc, VALUE *argv, VALUE self) {
391
594
  string_view_t *sv = sv_get_struct(self);
392
- VALUE shared = sv_as_shared_str(sv);
393
- return rb_funcallv(shared, rb_intern("byterindex"), argc, argv);
595
+ VALUE pattern, voffset;
596
+ rb_scan_args(argc, argv, "11", &pattern, &voffset);
597
+
598
+ if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
599
+ VALUE shared = sv_as_shared_str(sv);
600
+ return rb_funcallv(shared, id_byterindex, argc, argv);
601
+ }
602
+
603
+ StringValue(pattern);
604
+ const char *p = sv_ptr(sv);
605
+ long plen = RSTRING_LEN(pattern);
606
+ long max_byte;
607
+
608
+ if (NIL_P(voffset)) {
609
+ max_byte = sv->length;
610
+ } else {
611
+ max_byte = NUM2LONG(voffset);
612
+ if (max_byte < 0) max_byte += sv->length;
613
+ if (max_byte < 0) return Qnil;
614
+ if (max_byte > sv->length) max_byte = sv->length;
615
+ }
616
+
617
+ if (plen == 0) return LONG2NUM(max_byte > sv->length ? sv->length : max_byte);
618
+ sv_check_compatible_string(sv, pattern);
619
+ if (plen > sv->length) return Qnil;
620
+
621
+ long search_end = max_byte;
622
+ if (search_end + plen > sv->length) search_end = sv->length - plen;
623
+
624
+ const char *needle = RSTRING_PTR(pattern);
625
+ long i;
626
+ for (i = search_end; i >= 0; i--) {
627
+ if (memcmp(p + i, needle, plen) == 0) {
628
+ return LONG2NUM(i);
629
+ }
630
+ }
631
+ return Qnil;
394
632
  }
395
633
 
396
634
  /* ========================================================================= */
@@ -454,19 +692,19 @@ static VALUE sv_chars(VALUE self) {
454
692
  static VALUE sv_match(int argc, VALUE *argv, VALUE self) {
455
693
  string_view_t *sv = sv_get_struct(self);
456
694
  VALUE shared = sv_as_shared_str(sv);
457
- return rb_funcallv(shared, rb_intern("match"), argc, argv);
695
+ return rb_funcallv(shared, id_match, argc, argv);
458
696
  }
459
697
 
460
698
  static VALUE sv_match_p(int argc, VALUE *argv, VALUE self) {
461
699
  string_view_t *sv = sv_get_struct(self);
462
700
  VALUE shared = sv_as_shared_str(sv);
463
- return rb_funcallv(shared, rb_intern("match?"), argc, argv);
701
+ return rb_funcallv(shared, id_match_p, argc, argv);
464
702
  }
465
703
 
466
704
  static VALUE sv_match_operator(VALUE self, VALUE pattern) {
467
705
  string_view_t *sv = sv_get_struct(self);
468
706
  VALUE shared = sv_as_shared_str(sv);
469
- return rb_funcall(shared, rb_intern("=~"), 1, pattern);
707
+ return rb_funcall(shared, id_match_op, 1, pattern);
470
708
  }
471
709
 
472
710
  /* ========================================================================= */
@@ -485,6 +723,11 @@ typedef struct {
485
723
  char *ptr;
486
724
  } sv_cstr_t;
487
725
 
726
+ typedef struct {
727
+ sv_cstr_t *cs;
728
+ int base;
729
+ } sv_inum_args_t;
730
+
488
731
  SV_INLINE void sv_cstr_init(sv_cstr_t *cs, string_view_t *sv) {
489
732
  const char *p = sv_ptr(sv);
490
733
  long len = sv->length;
@@ -505,6 +748,22 @@ SV_INLINE void sv_cstr_free(sv_cstr_t *cs) {
505
748
  }
506
749
  }
507
750
 
751
+ static VALUE sv_cstr_free_ensure(VALUE arg) {
752
+ sv_cstr_free((sv_cstr_t *)arg);
753
+ return Qnil;
754
+ }
755
+
756
+ static VALUE sv_to_i_body(VALUE arg) {
757
+ sv_inum_args_t *args = (sv_inum_args_t *)arg;
758
+ return rb_cstr_to_inum(args->cs->ptr, args->base, 0);
759
+ }
760
+
761
+ static VALUE sv_to_f_body(VALUE arg) {
762
+ sv_cstr_t *cs = (sv_cstr_t *)arg;
763
+ double d = rb_cstr_to_dbl(cs->ptr, 0);
764
+ return DBL2NUM(d);
765
+ }
766
+
508
767
  /*
509
768
  * to_i([base]) — parse integer directly from byte pointer, zero allocations.
510
769
  * Uses rb_cstr_to_inum which parses from a NUL-terminated C string.
@@ -515,10 +774,12 @@ static VALUE sv_to_i(int argc, VALUE *argv, VALUE self) {
515
774
  if (argc > 0) base = NUM2INT(argv[0]);
516
775
 
517
776
  sv_cstr_t cs;
777
+ sv_inum_args_t args;
518
778
  sv_cstr_init(&cs, sv);
519
- VALUE result = rb_cstr_to_inum(cs.ptr, base, 0);
520
- sv_cstr_free(&cs);
521
- return result;
779
+ args.cs = &cs;
780
+ args.base = base;
781
+ return rb_ensure(sv_to_i_body, (VALUE)&args,
782
+ sv_cstr_free_ensure, (VALUE)&cs);
522
783
  }
523
784
 
524
785
  /*
@@ -528,9 +789,8 @@ static VALUE sv_to_f(VALUE self) {
528
789
  string_view_t *sv = sv_get_struct(self);
529
790
  sv_cstr_t cs;
530
791
  sv_cstr_init(&cs, sv);
531
- double d = rb_cstr_to_dbl(cs.ptr, 0);
532
- sv_cstr_free(&cs);
533
- return DBL2NUM(d);
792
+ return rb_ensure(sv_to_f_body, (VALUE)&cs,
793
+ sv_cstr_free_ensure, (VALUE)&cs);
534
794
  }
535
795
 
536
796
  /*
@@ -539,10 +799,12 @@ static VALUE sv_to_f(VALUE self) {
539
799
  static VALUE sv_hex(VALUE self) {
540
800
  string_view_t *sv = sv_get_struct(self);
541
801
  sv_cstr_t cs;
802
+ sv_inum_args_t args;
542
803
  sv_cstr_init(&cs, sv);
543
- VALUE result = rb_cstr_to_inum(cs.ptr, 16, 0);
544
- sv_cstr_free(&cs);
545
- return result;
804
+ args.cs = &cs;
805
+ args.base = 16;
806
+ return rb_ensure(sv_to_i_body, (VALUE)&args,
807
+ sv_cstr_free_ensure, (VALUE)&cs);
546
808
  }
547
809
 
548
810
  /*
@@ -551,16 +813,52 @@ static VALUE sv_hex(VALUE self) {
551
813
  static VALUE sv_oct(VALUE self) {
552
814
  string_view_t *sv = sv_get_struct(self);
553
815
  sv_cstr_t cs;
816
+ sv_inum_args_t args;
554
817
  sv_cstr_init(&cs, sv);
555
- VALUE result = rb_cstr_to_inum(cs.ptr, 8, 0);
556
- sv_cstr_free(&cs);
557
- return result;
818
+ args.cs = &cs;
819
+ args.base = 8;
820
+ return rb_ensure(sv_to_i_body, (VALUE)&args,
821
+ sv_cstr_free_ensure, (VALUE)&cs);
558
822
  }
559
823
 
560
824
  /* ========================================================================= */
561
825
  /* Tier 1: Comparison */
562
826
  /* ========================================================================= */
563
827
 
828
+ /*
829
+ * Returns 1 if all bytes in the view are < 128 (7-bit ASCII).
830
+ * Uses the single_byte cache when available.
831
+ */
832
+ SV_INLINE int sv_is_7bit(string_view_t *sv) {
833
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
834
+ long i;
835
+ for (i = 0; i < sv->length; i++) {
836
+ if (p[i] > 127) return 0;
837
+ }
838
+ return 1;
839
+ }
840
+
841
+ /*
842
+ * Check encoding compatibility for equality, mirroring Ruby's String#==.
843
+ * Two encodings are compatible for comparison if:
844
+ * - They are the same encoding, OR
845
+ * - Both are ASCII-compatible and at least one side is 7-bit
846
+ * (e.g. UTF-8 "hello" == US-ASCII "hello")
847
+ */
848
+ SV_INLINE int sv_enc_compatible_for_eq(
849
+ rb_encoding *enc1, int is_7bit_1,
850
+ rb_encoding *enc2, int is_7bit_2)
851
+ {
852
+ if (enc1 == enc2) return 1;
853
+ if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) return 0;
854
+ return is_7bit_1 || is_7bit_2;
855
+ }
856
+
857
+ SV_INLINE int sv_is_string_view(VALUE obj) {
858
+ VALUE klass = rb_obj_class(obj);
859
+ return klass == cStringView || klass == cStringViewStrict;
860
+ }
861
+
564
862
  static VALUE sv_eq(VALUE self, VALUE other) {
565
863
  string_view_t *sv = sv_get_struct(self);
566
864
  const char *p = sv_ptr(sv);
@@ -568,13 +866,26 @@ static VALUE sv_eq(VALUE self, VALUE other) {
568
866
  /* Fast path: String is the most common comparison target */
569
867
  if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
570
868
  if (sv->length != RSTRING_LEN(other)) return Qfalse;
869
+ rb_encoding *oenc = rb_enc_get(other);
870
+ if (sv->enc != oenc) {
871
+ int sv_7bit = sv_is_7bit(sv);
872
+ int o_7bit = rb_enc_str_asciionly_p(other);
873
+ if (!sv_enc_compatible_for_eq(sv->enc, sv_7bit, oenc, o_7bit))
874
+ return Qfalse;
875
+ }
571
876
  return memcmp(p, RSTRING_PTR(other), sv->length) == 0 ? Qtrue : Qfalse;
572
877
  }
573
878
 
574
- /* Check for StringView via class pointer (faster than rb_obj_is_kind_of) */
575
- if (rb_obj_class(other) == cStringView) {
879
+ /* Check for StringView or StringView::Strict */
880
+ if (sv_is_string_view(other)) {
576
881
  string_view_t *o = sv_get_struct(other);
577
882
  if (sv->length != o->length) return Qfalse;
883
+ if (sv->enc != o->enc) {
884
+ int sv_7bit = sv_is_7bit(sv);
885
+ int o_7bit = sv_is_7bit(o);
886
+ if (!sv_enc_compatible_for_eq(sv->enc, sv_7bit, o->enc, o_7bit))
887
+ return Qfalse;
888
+ }
578
889
  return memcmp(p, sv_ptr(o), sv->length) == 0 ? Qtrue : Qfalse;
579
890
  }
580
891
 
@@ -590,7 +901,7 @@ static VALUE sv_cmp(VALUE self, VALUE other) {
590
901
  if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
591
902
  op = RSTRING_PTR(other);
592
903
  olen = RSTRING_LEN(other);
593
- } else if (rb_obj_class(other) == cStringView) {
904
+ } else if (sv_is_string_view(other)) {
594
905
  string_view_t *o = sv_get_struct(other);
595
906
  op = sv_ptr(o);
596
907
  olen = o->length;
@@ -610,15 +921,22 @@ static VALUE sv_cmp(VALUE self, VALUE other) {
610
921
  }
611
922
 
612
923
  static VALUE sv_eql_p(VALUE self, VALUE other) {
613
- if (rb_obj_class(other) != cStringView) return Qfalse;
924
+ if (!sv_is_string_view(other)) return Qfalse;
614
925
  return sv_eq(self, other);
615
926
  }
616
927
 
617
928
  static VALUE sv_hash(VALUE self) {
618
929
  string_view_t *sv = sv_get_struct(self);
619
930
  const char *p = sv_ptr(sv);
931
+ /*
932
+ * Mirror CRuby's rb_str_hash: normalize encoding index to 0 for
933
+ * 7-bit content so that e.g. UTF-8 "hello" and US-ASCII "hello"
934
+ * produce the same hash (they compare equal via sv_eq).
935
+ */
936
+ int e = rb_enc_to_index(sv->enc);
937
+ if (e && sv_is_7bit(sv)) e = 0;
620
938
  st_index_t h = rb_memhash(p, sv->length);
621
- h ^= (st_index_t)rb_enc_get_index(sv->backing);
939
+ h ^= (st_index_t)e;
622
940
  return ST2FIX(h);
623
941
  }
624
942
 
@@ -636,13 +954,17 @@ static VALUE sv_hash(VALUE self) {
636
954
  * Compute single-byte flag from encoding + coderange.
637
955
  * Called once at construction time and cached in sv->single_byte.
638
956
  */
639
- static int sv_compute_single_byte(VALUE backing, rb_encoding *enc) {
957
+ int sv_compute_single_byte(VALUE backing, rb_encoding *enc) {
640
958
  if (rb_enc_mbmaxlen(enc) == 1) return 1;
641
959
  int cr = ENC_CODERANGE(backing);
642
960
  if (cr == ENC_CODERANGE_7BIT) return 1;
643
- /* For VALID (known multibyte) we know it's not single-byte */
644
- if (cr == ENC_CODERANGE_VALID) return 0;
645
- /* UNKNOWN: we don't know yet return -1 (will be resolved lazily) */
961
+ /*
962
+ * For VALID and UNKNOWN: the coderange reflects the entire backing
963
+ * string, not this slice. A view over an ASCII-only prefix of a
964
+ * multibyte string would incorrectly get single_byte=0 here.
965
+ * Return -1 (unknown) and let sv_single_byte_optimizable resolve
966
+ * it lazily by scanning the actual slice bytes.
967
+ */
646
968
  return -1;
647
969
  }
648
970
 
@@ -801,22 +1123,11 @@ static long sv_char_to_byte_offset(string_view_t *sv, long char_idx) {
801
1123
  return char_idx;
802
1124
  }
803
1125
 
804
- if (SV_LIKELY(sv_is_utf8(sv))) {
1126
+ if (SV_LIKELY(sv_is_utf8(sv)) && sv_valid_encoding_cached(sv)) {
805
1127
  return sv_utf8_char_to_byte_offset_indexed(sv, char_idx);
806
1128
  }
807
1129
 
808
- rb_encoding *enc = sv_enc(sv);
809
- const char *p = sv_ptr(sv);
810
- const char *e = p + sv->length;
811
- const char *start = p;
812
- long i;
813
-
814
- for (i = 0; i < char_idx && p < e; i++) {
815
- p += rb_enc_fast_mbclen(p, e, enc);
816
- }
817
-
818
- if (i < char_idx) return -1;
819
- return p - start;
1130
+ return sv_tolerant_char_to_byte_offset(sv, char_idx);
820
1131
  }
821
1132
 
822
1133
  static long sv_char_count(string_view_t *sv) {
@@ -826,12 +1137,11 @@ static long sv_char_count(string_view_t *sv) {
826
1137
  long count;
827
1138
  if (sv_single_byte_optimizable(sv)) {
828
1139
  count = sv->length;
829
- } else if (SV_LIKELY(sv_is_utf8(sv))) {
1140
+ } else if (SV_LIKELY(sv_is_utf8(sv)) && sv_valid_encoding_cached(sv)) {
830
1141
  count = sv_utf8_char_count(sv_ptr(sv), sv->length);
831
1142
  } else {
832
- rb_encoding *enc = sv_enc(sv);
833
- const char *p = sv_ptr(sv);
834
- count = rb_enc_strlen(p, p + sv->length, enc);
1143
+ count = sv_tolerant_char_count(sv_ptr(sv), sv_ptr(sv) + sv->length,
1144
+ sv_enc(sv));
835
1145
  }
836
1146
 
837
1147
  sv->charlen = count;
@@ -844,20 +1154,20 @@ static long sv_chars_to_bytes(string_view_t *sv, long byte_off, long n) {
844
1154
  return n < remaining ? n : remaining;
845
1155
  }
846
1156
 
847
- if (SV_LIKELY(sv_is_utf8(sv))) {
1157
+ if (SV_LIKELY(sv_is_utf8(sv)) && sv_valid_encoding_cached(sv)) {
848
1158
  return sv_utf8_chars_to_bytes(sv_ptr(sv), sv->length, byte_off, n);
849
1159
  }
850
1160
 
851
- rb_encoding *enc = sv_enc(sv);
852
- const char *p = sv_ptr(sv) + byte_off;
853
- const char *e = sv_ptr(sv) + sv->length;
854
- long i;
855
- const char *start = p;
1161
+ return sv_tolerant_chars_to_bytes(sv, byte_off, n);
1162
+ }
856
1163
 
857
- for (i = 0; i < n && p < e; i++) {
858
- p += rb_enc_fast_mbclen(p, e, enc);
1164
+ static long sv_char_count_partial(string_view_t *sv, const char *p, long len) {
1165
+ if (len <= 0) return 0;
1166
+ if (sv_single_byte_optimizable(sv)) return len;
1167
+ if (SV_LIKELY(sv_is_utf8(sv)) && sv_valid_encoding_cached(sv)) {
1168
+ return sv_utf8_char_count(p, len);
859
1169
  }
860
- return p - start;
1170
+ return sv_tolerant_char_count(p, p + len, sv_enc(sv));
861
1171
  }
862
1172
 
863
1173
  static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
@@ -879,8 +1189,8 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
879
1189
  long total = sv->length;
880
1190
  if (idx < 0) idx += total;
881
1191
  if (SV_UNLIKELY(idx < 0 || idx > total || len < 0)) return Qnil;
882
- if (idx + len > total) len = total - idx;
883
- return sv_new_from_parent(sv,
1192
+ if (len > total - idx) len = total - idx;
1193
+ return sv_new_from_parent_obj(self, sv,
884
1194
  sv->offset + idx,
885
1195
  len);
886
1196
  }
@@ -901,12 +1211,12 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
901
1211
 
902
1212
  /* Clamp len to remaining characters */
903
1213
  long total_chars = sv_char_count(sv);
904
- if (idx + len > total_chars) len = total_chars - idx;
1214
+ if (len > total_chars - idx) len = total_chars - idx;
905
1215
 
906
1216
  long byte_end = sv_char_to_byte_offset(sv, idx + len);
907
1217
  long byte_len = byte_end - byte_off;
908
1218
 
909
- return sv_new_from_parent(sv,
1219
+ return sv_new_from_parent_obj(self, sv,
910
1220
  sv->offset + byte_off,
911
1221
  byte_len);
912
1222
  }
@@ -914,48 +1224,34 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
914
1224
  if (rb_obj_is_kind_of(arg1, rb_cRange)) {
915
1225
  long total_chars = sv_char_count(sv);
916
1226
  long beg, len;
917
- int excl;
918
- VALUE rb_beg = rb_funcall(arg1, rb_intern("begin"), 0);
919
- VALUE rb_end = rb_funcall(arg1, rb_intern("end"), 0);
920
- excl = RTEST(rb_funcall(arg1, rb_intern("exclude_end?"), 0));
921
-
922
- beg = NIL_P(rb_beg) ? 0 : NUM2LONG(rb_beg);
923
- if (beg < 0) beg += total_chars;
924
- if (beg < 0) return Qnil;
925
-
926
- long e;
927
- if (NIL_P(rb_end)) {
928
- e = total_chars;
929
- } else {
930
- e = NUM2LONG(rb_end);
931
- if (e < 0) e += total_chars;
932
- if (!excl) e += 1;
1227
+
1228
+ /* rb_range_beg_len resolves negative indices and clamps to total,
1229
+ * replacing 3 Ruby method dispatches with a single C call. */
1230
+ switch (rb_range_beg_len(arg1, &beg, &len, total_chars, 1)) {
1231
+ case Qfalse: return Qnil;
1232
+ case Qnil: return Qnil;
933
1233
  }
934
- if (e < beg) e = beg;
935
- len = e - beg;
936
- if (beg > total_chars) return Qnil;
937
- if (beg + len > total_chars) len = total_chars - beg;
938
1234
 
939
1235
  long byte_off = sv_char_to_byte_offset(sv, beg);
940
1236
  long byte_len = sv_chars_to_bytes(sv, byte_off, len);
941
1237
 
942
- return sv_new_from_parent(sv,
1238
+ return sv_new_from_parent_obj(self, sv,
943
1239
  sv->offset + byte_off,
944
1240
  byte_len);
945
1241
  }
946
1242
 
947
1243
  if (rb_obj_is_kind_of(arg1, rb_cRegexp)) {
948
1244
  VALUE shared = sv_as_shared_str(sv);
949
- VALUE m = rb_funcall(arg1, rb_intern("match"), 1, shared);
1245
+ VALUE m = rb_funcall(arg1, id_match, 1, shared);
950
1246
  if (NIL_P(m)) return Qnil;
951
1247
 
952
- VALUE matched = rb_funcall(m, rb_intern("[]"), 1, INT2FIX(0));
953
- long match_beg = NUM2LONG(rb_funcall(m, rb_intern("begin"), 1, INT2FIX(0)));
1248
+ VALUE matched = rb_funcall(m, id_aref, 1, INT2FIX(0));
1249
+ long match_beg = NUM2LONG(rb_funcall(m, id_begin, 1, INT2FIX(0)));
954
1250
 
955
1251
  long byte_off = sv_char_to_byte_offset(sv, match_beg);
956
1252
  long byte_len = RSTRING_LEN(matched);
957
1253
 
958
- return sv_new_from_parent(sv,
1254
+ return sv_new_from_parent_obj(self, sv,
959
1255
  sv->offset + byte_off,
960
1256
  byte_len);
961
1257
  }
@@ -964,14 +1260,15 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
964
1260
  const char *p = sv_ptr(sv);
965
1261
  long slen = RSTRING_LEN(arg1);
966
1262
  if (slen == 0) {
967
- return sv_new_from_parent(sv, sv->offset, 0);
1263
+ return sv_new_from_parent_obj(self, sv, sv->offset, 0);
968
1264
  }
1265
+ sv_check_compatible_string(sv, arg1);
969
1266
  if (slen > sv->length) return Qnil;
970
1267
 
971
1268
  long pos = rb_memsearch(RSTRING_PTR(arg1), slen, p, sv->length, sv_enc(sv));
972
1269
  if (pos < 0 || pos > sv->length - slen) return Qnil;
973
1270
 
974
- return sv_new_from_parent(sv, sv->offset + pos, slen);
1271
+ return sv_new_from_parent_obj(self, sv, sv->offset + pos, slen);
975
1272
  }
976
1273
 
977
1274
  if (RB_INTEGER_TYPE_P(arg1)) {
@@ -986,7 +1283,7 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
986
1283
 
987
1284
  long byte_len = sv_chars_to_bytes(sv, byte_off, 1);
988
1285
 
989
- return sv_new_from_parent(sv,
1286
+ return sv_new_from_parent_obj(self, sv,
990
1287
  sv->offset + byte_off,
991
1288
  byte_len);
992
1289
  }
@@ -1013,98 +1310,376 @@ static VALUE sv_byteslice(int argc, VALUE *argv, VALUE self) {
1013
1310
  if (off < 0) off += sv->length;
1014
1311
  if (off < 0 || off > sv->length) return Qnil;
1015
1312
  if (len < 0) return Qnil;
1016
- if (off + len > sv->length) len = sv->length - off;
1313
+ if (len > sv->length - off) len = sv->length - off;
1017
1314
 
1018
- return sv_new_from_parent(sv, sv->offset + off, len);
1315
+ return sv_new_from_parent_obj(self, sv, sv->offset + off, len);
1019
1316
  }
1020
1317
 
1021
1318
  if (rb_obj_is_kind_of(arg1, rb_cRange)) {
1022
1319
  long beg, len;
1023
- VALUE rb_beg = rb_funcall(arg1, rb_intern("begin"), 0);
1024
- VALUE rb_end = rb_funcall(arg1, rb_intern("end"), 0);
1025
- int excl = RTEST(rb_funcall(arg1, rb_intern("exclude_end?"), 0));
1026
1320
 
1027
- beg = NIL_P(rb_beg) ? 0 : NUM2LONG(rb_beg);
1028
- if (beg < 0) beg += sv->length;
1029
- if (beg < 0) return Qnil;
1030
-
1031
- long e;
1032
- if (NIL_P(rb_end)) {
1033
- e = sv->length;
1034
- } else {
1035
- e = NUM2LONG(rb_end);
1036
- if (e < 0) e += sv->length;
1037
- if (!excl) e += 1;
1321
+ switch (rb_range_beg_len(arg1, &beg, &len, sv->length, 1)) {
1322
+ case Qfalse: return Qnil;
1323
+ case Qnil: return Qnil;
1038
1324
  }
1039
- if (e < beg) e = beg;
1040
- len = e - beg;
1041
- if (beg > sv->length) return Qnil;
1042
- if (beg + len > sv->length) len = sv->length - beg;
1043
1325
 
1044
- return sv_new_from_parent(sv, sv->offset + beg, len);
1326
+ return sv_new_from_parent_obj(self, sv, sv->offset + beg, len);
1045
1327
  }
1046
1328
 
1047
1329
  {
1048
1330
  long idx = NUM2LONG(arg1);
1049
1331
  if (idx < 0) idx += sv->length;
1050
1332
  if (idx < 0 || idx >= sv->length) return Qnil;
1051
- return sv_new_from_parent(sv, sv->offset + idx, 1);
1333
+ return sv_new_from_parent_obj(self, sv, sv->offset + idx, 1);
1052
1334
  }
1053
1335
  }
1054
1336
 
1337
+ /* ========================================================================= */
1338
+ /* Tier 1.5: Zero-copy transforms — returns StringView via offset adjustment */
1339
+ /* ========================================================================= */
1340
+
1341
+ /*
1342
+ * Helper: check if a byte is ASCII whitespace.
1343
+ * Matches Ruby's strip behavior for ASCII-compatible encodings:
1344
+ * space, tab, newline, vertical tab, form feed, carriage return, NUL.
1345
+ */
1346
+ SV_INLINE int sv_is_ascii_whitespace(unsigned char c) {
1347
+ return c == ' ' || (c >= '\t' && c <= '\r') || c == '\0';
1348
+ }
1349
+
1350
+ /*
1351
+ * strip → StringView
1352
+ * Returns a new StringView with leading and trailing ASCII whitespace removed.
1353
+ * Zero allocations for the byte content — only a new StringView struct.
1354
+ */
1355
+ static VALUE sv_strip(int argc, VALUE *argv, VALUE self) {
1356
+ rb_check_arity(argc, 0, 0);
1357
+ string_view_t *sv = sv_get_struct(self);
1358
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1359
+ long len = sv->length;
1360
+
1361
+ /* Skip leading whitespace */
1362
+ long left = 0;
1363
+ while (left < len && sv_is_ascii_whitespace(p[left])) left++;
1364
+
1365
+ /* Skip trailing whitespace */
1366
+ long right = len;
1367
+ while (right > left && sv_is_ascii_whitespace(p[right - 1])) right--;
1368
+
1369
+ if (left == 0 && right == len) return self;
1370
+ return sv_new_from_parent_obj(self, sv, sv->offset + left, right - left);
1371
+ }
1372
+
1373
+ /*
1374
+ * lstrip → StringView
1375
+ * Returns a new StringView with leading ASCII whitespace removed.
1376
+ */
1377
+ static VALUE sv_lstrip(int argc, VALUE *argv, VALUE self) {
1378
+ rb_check_arity(argc, 0, 0);
1379
+ string_view_t *sv = sv_get_struct(self);
1380
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1381
+ long len = sv->length;
1382
+
1383
+ long left = 0;
1384
+ while (left < len && sv_is_ascii_whitespace(p[left])) left++;
1385
+
1386
+ if (left == 0) return self;
1387
+ return sv_new_from_parent_obj(self, sv, sv->offset + left, len - left);
1388
+ }
1389
+
1390
+ /*
1391
+ * rstrip → StringView
1392
+ * Returns a new StringView with trailing ASCII whitespace removed.
1393
+ */
1394
+ static VALUE sv_rstrip(int argc, VALUE *argv, VALUE self) {
1395
+ rb_check_arity(argc, 0, 0);
1396
+ string_view_t *sv = sv_get_struct(self);
1397
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1398
+ long len = sv->length;
1399
+
1400
+ long right = len;
1401
+ while (right > 0 && sv_is_ascii_whitespace(p[right - 1])) right--;
1402
+
1403
+ if (right == len) return self;
1404
+ return sv_new_from_parent_obj(self, sv, sv->offset, right);
1405
+ }
1406
+
1407
+ /*
1408
+ * chomp([separator]) → StringView
1409
+ * Returns a new StringView with the trailing record separator removed.
1410
+ * Default separator is $/ (typically "\n").
1411
+ * Handles "\n", "\r\n", and "\r" when separator is "\n".
1412
+ */
1413
+ static VALUE sv_chomp(int argc, VALUE *argv, VALUE self) {
1414
+ rb_check_arity(argc, 0, 1);
1415
+ string_view_t *sv = sv_get_struct(self);
1416
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1417
+ long len = sv->length;
1418
+
1419
+ if (len == 0) return self;
1420
+
1421
+ if (argc == 0 || NIL_P(argv[0])) {
1422
+ /* Default: remove trailing \n, \r\n, or \r */
1423
+ /* Use $/ (input record separator) when no arg given */
1424
+ VALUE rs;
1425
+ if (argc == 0) {
1426
+ rs = rb_rs; /* global $/ */
1427
+ if (NIL_P(rs)) return self; /* $/ is nil, no chomp */
1428
+ } else {
1429
+ return self; /* chomp(nil) returns self */
1430
+ }
1431
+
1432
+ /* Fast path for default $/ which is "\n" */
1433
+ if (RB_TYPE_P(rs, T_STRING) && RSTRING_LEN(rs) == 1 && RSTRING_PTR(rs)[0] == '\n') {
1434
+ if (p[len - 1] == '\n') {
1435
+ long newlen = len - 1;
1436
+ if (newlen > 0 && p[newlen - 1] == '\r') newlen--;
1437
+ return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
1438
+ } else if (p[len - 1] == '\r') {
1439
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
1440
+ }
1441
+ return self;
1442
+ }
1443
+
1444
+ /* Non-default $/ — use the separator */
1445
+ if (!RB_TYPE_P(rs, T_STRING)) return self;
1446
+ const char *sep = RSTRING_PTR(rs);
1447
+ long seplen = RSTRING_LEN(rs);
1448
+ if (seplen == 0) {
1449
+ /* Paragraph mode: remove trailing \n+ */
1450
+ long right = len;
1451
+ while (right > 0 && p[right - 1] == '\n') right--;
1452
+ if (right == len) return self;
1453
+ return sv_new_from_parent_obj(self, sv, sv->offset, right);
1454
+ }
1455
+ if (seplen > len) return self;
1456
+ if (memcmp(p + len - seplen, sep, seplen) == 0) {
1457
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - seplen);
1458
+ }
1459
+ return self;
1460
+ }
1461
+
1462
+ /* Explicit separator argument */
1463
+ VALUE sep_val = argv[0];
1464
+ if (NIL_P(sep_val)) return self;
1465
+ StringValue(sep_val);
1466
+ const char *sep = RSTRING_PTR(sep_val);
1467
+ long seplen = RSTRING_LEN(sep_val);
1468
+
1469
+ if (seplen == 0) {
1470
+ /* Paragraph mode: remove all trailing newlines */
1471
+ long right = len;
1472
+ while (right > 0 && p[right - 1] == '\n') right--;
1473
+ if (right == len) return self;
1474
+ return sv_new_from_parent_obj(self, sv, sv->offset, right);
1475
+ }
1476
+
1477
+ /* Special handling for "\n": also removes \r\n and \r */
1478
+ if (seplen == 1 && sep[0] == '\n') {
1479
+ if (p[len - 1] == '\n') {
1480
+ long newlen = len - 1;
1481
+ if (newlen > 0 && p[newlen - 1] == '\r') newlen--;
1482
+ return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
1483
+ } else if (p[len - 1] == '\r') {
1484
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
1485
+ }
1486
+ return self;
1487
+ }
1488
+
1489
+ if (seplen > len) return self;
1490
+ if (memcmp(p + len - seplen, sep, seplen) == 0) {
1491
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - seplen);
1492
+ }
1493
+ return self;
1494
+ }
1495
+
1496
+ /*
1497
+ * chop → StringView
1498
+ * Returns a new StringView with the last character removed.
1499
+ * If the string ends with \r\n, both characters are removed.
1500
+ */
1501
+ static VALUE sv_chop(int argc, VALUE *argv, VALUE self) {
1502
+ rb_check_arity(argc, 0, 0);
1503
+ string_view_t *sv = sv_get_struct(self);
1504
+ long len = sv->length;
1505
+
1506
+ if (len == 0) return self;
1507
+
1508
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
1509
+
1510
+ /* Check for \r\n at the end */
1511
+ if (len >= 2 && p[len - 1] == '\n' && p[len - 2] == '\r') {
1512
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 2);
1513
+ }
1514
+
1515
+ /* Remove last character (respecting encoding) */
1516
+ if (sv_single_byte_optimizable(sv)) {
1517
+ return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
1518
+ }
1519
+
1520
+ /* Multibyte: find start of last character */
1521
+ rb_encoding *enc = sv_enc(sv);
1522
+ const char *start = sv_ptr(sv);
1523
+ const char *end = start + len;
1524
+ const char *prev = rb_enc_prev_char(start, end, end, enc);
1525
+ if (prev == NULL) prev = start;
1526
+ long newlen = (long)(prev - start);
1527
+
1528
+ return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
1529
+ }
1530
+
1531
+ /*
1532
+ * delete_prefix(prefix) → StringView
1533
+ * Returns a new StringView with the given prefix removed, or self if
1534
+ * the string doesn't start with the prefix.
1535
+ */
1536
+ static VALUE sv_delete_prefix(VALUE self, VALUE prefix) {
1537
+ string_view_t *sv = sv_get_struct(self);
1538
+ StringValue(prefix);
1539
+ const char *p = sv_ptr(sv);
1540
+ long plen = RSTRING_LEN(prefix);
1541
+
1542
+ if (plen == 0) return self;
1543
+ sv_check_compatible_string(sv, prefix);
1544
+ if (plen > sv->length) return self;
1545
+ if (memcmp(p, RSTRING_PTR(prefix), plen) != 0) return self;
1546
+
1547
+ return sv_new_from_parent_obj(self, sv, sv->offset + plen, sv->length - plen);
1548
+ }
1549
+
1550
+ /*
1551
+ * delete_suffix(suffix) → StringView
1552
+ * Returns a new StringView with the given suffix removed, or self if
1553
+ * the string doesn't end with the suffix.
1554
+ */
1555
+ static VALUE sv_delete_suffix(VALUE self, VALUE suffix) {
1556
+ string_view_t *sv = sv_get_struct(self);
1557
+ StringValue(suffix);
1558
+ const char *p = sv_ptr(sv);
1559
+ long slen = RSTRING_LEN(suffix);
1560
+
1561
+ if (slen == 0) return self;
1562
+ sv_check_compatible_string(sv, suffix);
1563
+ if (slen > sv->length) return self;
1564
+ if (memcmp(p + sv->length - slen, RSTRING_PTR(suffix), slen) != 0) return self;
1565
+
1566
+ return sv_new_from_parent_obj(self, sv, sv->offset, sv->length - slen);
1567
+ }
1568
+
1569
+ /*
1570
+ * chr → StringView
1571
+ * Returns the first character as a StringView.
1572
+ */
1573
+ static VALUE sv_chr(VALUE self) {
1574
+ string_view_t *sv = sv_get_struct(self);
1575
+
1576
+ if (sv->length == 0) return self;
1577
+
1578
+ if (sv_single_byte_optimizable(sv)) {
1579
+ return sv_new_from_parent_obj(self, sv, sv->offset, 1);
1580
+ }
1581
+
1582
+ rb_encoding *enc = sv_enc(sv);
1583
+ const char *p = sv_ptr(sv);
1584
+ const char *e = p + sv->length;
1585
+ int clen = rb_enc_fast_mbclen(p, e, enc);
1586
+
1587
+ return sv_new_from_parent_obj(self, sv, sv->offset, clen);
1588
+ }
1589
+
1590
+ /*
1591
+ * ord → Integer
1592
+ * Returns the codepoint of the first character.
1593
+ */
1594
+ static VALUE sv_ord(VALUE self) {
1595
+ string_view_t *sv = sv_get_struct(self);
1596
+
1597
+ if (sv->length == 0) {
1598
+ rb_raise(rb_eArgError, "empty string");
1599
+ }
1600
+
1601
+ rb_encoding *enc = sv_enc(sv);
1602
+ const char *p = sv_ptr(sv);
1603
+ const char *e = p + sv->length;
1604
+ unsigned int c = rb_enc_codepoint_len(p, e, NULL, enc);
1605
+ return UINT2NUM(c);
1606
+ }
1607
+
1608
+ /*
1609
+ * valid_encoding? → true/false
1610
+ * Returns whether the view's bytes are valid in its encoding.
1611
+ */
1612
+ static VALUE sv_valid_encoding_p(VALUE self) {
1613
+ string_view_t *sv = sv_get_struct(self);
1614
+ return sv_valid_encoding_cached(sv) ? Qtrue : Qfalse;
1615
+ }
1616
+
1617
+ /*
1618
+ * b → StringView
1619
+ * Returns a new StringView that references the same bytes but with
1620
+ * ASCII-8BIT encoding. Since we share the same backing bytes, this is
1621
+ * only valid when the backing is also binary-compatible, which it always
1622
+ * is — we just reinterpret the bytes.
1623
+ *
1624
+ * Note: We need to create a new backing with binary encoding since
1625
+ * the encoding is tied to the backing string.
1626
+ * Actually, the encoding is cached in sv->enc, so we can create a
1627
+ * lightweight view with different encoding. But the backing string
1628
+ * has its own encoding... For true zero-alloc we store enc separately.
1629
+ */
1630
+
1055
1631
  /* ========================================================================= */
1056
1632
  /* Tier 3: Transform delegation */
1057
1633
  /* ========================================================================= */
1058
1634
 
1059
- #define SV_DELEGATE_FUNCALL(cname, rbname) \
1635
+ #define SV_DELEGATE_FUNCALL(cname, cached_id) \
1060
1636
  static VALUE sv_##cname(int argc, VALUE *argv, VALUE self) { \
1061
1637
  string_view_t *sv = sv_get_struct(self); \
1062
1638
  VALUE shared = sv_as_shared_str(sv); \
1063
1639
  if (rb_block_given_p()) { \
1064
- return rb_funcall_with_block(shared, rb_intern(rbname), \
1640
+ return rb_funcall_with_block(shared, cached_id, \
1065
1641
  argc, argv, rb_block_proc()); \
1066
1642
  } \
1067
- return rb_funcallv(shared, rb_intern(rbname), argc, argv); \
1068
- }
1069
-
1070
- SV_DELEGATE_FUNCALL(upcase, "upcase")
1071
- SV_DELEGATE_FUNCALL(downcase, "downcase")
1072
- SV_DELEGATE_FUNCALL(capitalize,"capitalize")
1073
- SV_DELEGATE_FUNCALL(swapcase, "swapcase")
1074
- SV_DELEGATE_FUNCALL(strip, "strip")
1075
- SV_DELEGATE_FUNCALL(lstrip, "lstrip")
1076
- SV_DELEGATE_FUNCALL(rstrip, "rstrip")
1077
- SV_DELEGATE_FUNCALL(chomp, "chomp")
1078
- SV_DELEGATE_FUNCALL(chop, "chop")
1079
- SV_DELEGATE_FUNCALL(reverse, "reverse")
1080
- SV_DELEGATE_FUNCALL(squeeze, "squeeze")
1081
- SV_DELEGATE_FUNCALL(encode, "encode")
1082
- SV_DELEGATE_FUNCALL(gsub, "gsub")
1083
- SV_DELEGATE_FUNCALL(sub, "sub")
1084
- SV_DELEGATE_FUNCALL(tr, "tr")
1085
- SV_DELEGATE_FUNCALL(tr_s, "tr_s")
1086
- SV_DELEGATE_FUNCALL(sv_delete, "delete")
1087
- SV_DELEGATE_FUNCALL(count, "count")
1088
- SV_DELEGATE_FUNCALL(scan, "scan")
1089
- SV_DELEGATE_FUNCALL(split, "split")
1090
- SV_DELEGATE_FUNCALL(center, "center")
1091
- SV_DELEGATE_FUNCALL(ljust, "ljust")
1092
- SV_DELEGATE_FUNCALL(rjust, "rjust")
1093
- SV_DELEGATE_FUNCALL(format_op, "%")
1094
- SV_DELEGATE_FUNCALL(plus, "+")
1095
- SV_DELEGATE_FUNCALL(multiply, "*")
1096
- SV_DELEGATE_FUNCALL(unpack1, "unpack1")
1097
- SV_DELEGATE_FUNCALL(scrub, "scrub")
1098
- SV_DELEGATE_FUNCALL(unicode_normalize, "unicode_normalize")
1643
+ return rb_funcallv(shared, cached_id, argc, argv); \
1644
+ }
1645
+
1646
+ SV_DELEGATE_FUNCALL(upcase, id_upcase)
1647
+ SV_DELEGATE_FUNCALL(downcase, id_downcase)
1648
+ SV_DELEGATE_FUNCALL(capitalize,id_capitalize)
1649
+ SV_DELEGATE_FUNCALL(swapcase, id_swapcase)
1650
+ SV_DELEGATE_FUNCALL(reverse, id_reverse)
1651
+ SV_DELEGATE_FUNCALL(squeeze, id_squeeze)
1652
+ SV_DELEGATE_FUNCALL(encode, id_encode)
1653
+ SV_DELEGATE_FUNCALL(gsub, id_gsub)
1654
+ SV_DELEGATE_FUNCALL(sub, id_sub)
1655
+ SV_DELEGATE_FUNCALL(tr, id_tr)
1656
+ SV_DELEGATE_FUNCALL(tr_s, id_tr_s)
1657
+ SV_DELEGATE_FUNCALL(delete_str,id_delete)
1658
+ /*
1659
+ * count(set, ...) → Integer
1660
+ * Delegates to String#count via shared string.
1661
+ * (Character set parsing is complex — reuse Ruby's implementation.)
1662
+ */
1663
+ SV_DELEGATE_FUNCALL(count, id_count)
1664
+ SV_DELEGATE_FUNCALL(scan, id_scan)
1665
+ SV_DELEGATE_FUNCALL(split, id_split)
1666
+ SV_DELEGATE_FUNCALL(center, id_center)
1667
+ SV_DELEGATE_FUNCALL(ljust, id_ljust)
1668
+ SV_DELEGATE_FUNCALL(rjust, id_rjust)
1669
+ SV_DELEGATE_FUNCALL(format_op, id_format_op)
1670
+ SV_DELEGATE_FUNCALL(plus, id_plus)
1671
+ SV_DELEGATE_FUNCALL(multiply, id_multiply)
1672
+ SV_DELEGATE_FUNCALL(unpack1, id_unpack1)
1673
+ SV_DELEGATE_FUNCALL(scrub, id_scrub)
1674
+ SV_DELEGATE_FUNCALL(unicode_normalize, id_unicode_normalize)
1099
1675
 
1100
1676
  /* ========================================================================= */
1101
1677
  /* Bang methods — always raise FrozenError */
1102
1678
  /* ========================================================================= */
1103
1679
 
1104
1680
  static VALUE sv_frozen_error(int argc, VALUE *argv, VALUE self) {
1105
- VALUE str = sv_to_s(self);
1106
- rb_raise(rb_eFrozenError, "can't modify frozen StringView: \"%s\"",
1107
- StringValueCStr(str));
1681
+ (void)argc; (void)argv;
1682
+ rb_raise(rb_eFrozenError, "can't modify frozen StringView");
1108
1683
  return Qnil;
1109
1684
  }
1110
1685
 
@@ -1115,6 +1690,46 @@ static VALUE sv_frozen_error(int argc, VALUE *argv, VALUE self) {
1115
1690
  void Init_string_view(void) {
1116
1691
  enc_utf8 = rb_utf8_encoding();
1117
1692
 
1693
+ /* Cache method IDs — avoids rb_intern hash lookup on every call */
1694
+ id_index = rb_intern("index");
1695
+ id_rindex = rb_intern("rindex");
1696
+ id_byteindex = rb_intern("byteindex");
1697
+ id_byterindex = rb_intern("byterindex");
1698
+ id_match = rb_intern("match");
1699
+ id_match_p = rb_intern("match?");
1700
+ id_match_op = rb_intern("=~");
1701
+ id_begin = rb_intern("begin");
1702
+ id_aref = rb_intern("[]");
1703
+ id_upcase = rb_intern("upcase");
1704
+ id_downcase = rb_intern("downcase");
1705
+ id_capitalize = rb_intern("capitalize");
1706
+ id_swapcase = rb_intern("swapcase");
1707
+ id_strip = rb_intern("strip");
1708
+ id_lstrip = rb_intern("lstrip");
1709
+ id_rstrip = rb_intern("rstrip");
1710
+ id_chomp = rb_intern("chomp");
1711
+ id_chop = rb_intern("chop");
1712
+ id_reverse = rb_intern("reverse");
1713
+ id_squeeze = rb_intern("squeeze");
1714
+ id_encode = rb_intern("encode");
1715
+ id_gsub = rb_intern("gsub");
1716
+ id_sub = rb_intern("sub");
1717
+ id_tr = rb_intern("tr");
1718
+ id_tr_s = rb_intern("tr_s");
1719
+ id_delete = rb_intern("delete");
1720
+ id_count = rb_intern("count");
1721
+ id_scan = rb_intern("scan");
1722
+ id_split = rb_intern("split");
1723
+ id_center = rb_intern("center");
1724
+ id_ljust = rb_intern("ljust");
1725
+ id_rjust = rb_intern("rjust");
1726
+ id_format_op = rb_intern("%");
1727
+ id_plus = rb_intern("+");
1728
+ id_multiply = rb_intern("*");
1729
+ id_unpack1 = rb_intern("unpack1");
1730
+ id_scrub = rb_intern("scrub");
1731
+ id_unicode_normalize = rb_intern("unicode_normalize");
1732
+
1118
1733
  cStringView = rb_define_class("StringView", rb_cObject);
1119
1734
  rb_include_module(cStringView, rb_mComparable);
1120
1735
 
@@ -1122,11 +1737,10 @@ void Init_string_view(void) {
1122
1737
  rb_define_method(cStringView, "initialize", sv_initialize, -1);
1123
1738
 
1124
1739
  rb_define_method(cStringView, "to_s", sv_to_s, 0);
1740
+ rb_define_method(cStringView, "materialize", sv_to_s, 0);
1125
1741
  rb_define_private_method(cStringView, "to_str", sv_to_str, 0);
1126
1742
  rb_define_method(cStringView, "inspect", sv_inspect, 0);
1127
- rb_define_method(cStringView, "frozen?", sv_frozen_p, 0);
1128
1743
  rb_define_method(cStringView, "reset!", sv_reset, 3);
1129
- rb_define_alias(cStringView, "materialize", "to_s");
1130
1744
 
1131
1745
  rb_define_method(cStringView, "bytesize", sv_bytesize, 0);
1132
1746
  rb_define_method(cStringView, "length", sv_length, 0);
@@ -1176,6 +1790,11 @@ void Init_string_view(void) {
1176
1790
  rb_define_method(cStringView, "rstrip", sv_rstrip, -1);
1177
1791
  rb_define_method(cStringView, "chomp", sv_chomp, -1);
1178
1792
  rb_define_method(cStringView, "chop", sv_chop, -1);
1793
+ rb_define_method(cStringView, "delete_prefix", sv_delete_prefix, 1);
1794
+ rb_define_method(cStringView, "delete_suffix", sv_delete_suffix, 1);
1795
+ rb_define_method(cStringView, "chr", sv_chr, 0);
1796
+ rb_define_method(cStringView, "ord", sv_ord, 0);
1797
+ rb_define_method(cStringView, "valid_encoding?", sv_valid_encoding_p, 0);
1179
1798
  rb_define_method(cStringView, "reverse", sv_reverse, -1);
1180
1799
  rb_define_method(cStringView, "squeeze", sv_squeeze, -1);
1181
1800
  rb_define_method(cStringView, "encode", sv_encode, -1);
@@ -1183,7 +1802,7 @@ void Init_string_view(void) {
1183
1802
  rb_define_method(cStringView, "sub", sv_sub, -1);
1184
1803
  rb_define_method(cStringView, "tr", sv_tr, -1);
1185
1804
  rb_define_method(cStringView, "tr_s", sv_tr_s, -1);
1186
- rb_define_method(cStringView, "delete", sv_sv_delete, -1);
1805
+ rb_define_method(cStringView, "delete", sv_delete_str, -1);
1187
1806
  rb_define_method(cStringView, "count", sv_count, -1);
1188
1807
  rb_define_method(cStringView, "scan", sv_scan, -1);
1189
1808
  rb_define_method(cStringView, "split", sv_split, -1);
@@ -1214,4 +1833,10 @@ void Init_string_view(void) {
1214
1833
  rb_define_method(cStringView, "gsub!", sv_frozen_error, -1);
1215
1834
  rb_define_method(cStringView, "sub!", sv_frozen_error, -1);
1216
1835
  rb_define_method(cStringView, "slice!", sv_frozen_error, -1);
1836
+ rb_define_method(cStringView, "delete_prefix!", sv_frozen_error, -1);
1837
+ rb_define_method(cStringView, "delete_suffix!", sv_frozen_error, -1);
1838
+
1839
+ Init_string_view_strict();
1840
+ Init_string_view_pool();
1841
+ Init_string_view_core_ext();
1217
1842
  }