string_view 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,1217 @@
1
+ #include "ruby.h"
2
+ #include "ruby/encoding.h"
3
+ #include "ruby/re.h"
4
+ #include "simdutf_c.h"
5
+
6
+ #define SV_LIKELY(x) __builtin_expect(!!(x), 1)
7
+ #define SV_UNLIKELY(x) __builtin_expect(!!(x), 0)
8
+
9
+ #ifdef __GNUC__
10
+ #define SV_INLINE static inline __attribute__((always_inline))
11
+ #else
12
+ #define SV_INLINE static inline
13
+ #endif
14
+
15
+ /* ========================================================================= */
16
+ /* Struct & TypedData */
17
+ /* ========================================================================= */
18
+
19
+ /*
20
+ * Stride index: maps every STRIDE_CHARS-th character to its byte offset.
21
+ * Built lazily on first char-indexed access. Enables O(1) char→byte
22
+ * lookup for any offset (small scalar scan within one stride).
23
+ */
24
+ #define STRIDE_CHARS 128
25
+
26
+ typedef struct {
27
+ long *offsets; /* offsets[i] = byte offset of character i*STRIDE_CHARS */
28
+ long count; /* number of entries = ceil(charlen / STRIDE_CHARS) + 1 */
29
+ } stride_index_t;
30
+
31
+ typedef struct {
32
+ VALUE backing; /* frozen String that owns the bytes */
33
+ const char *base; /* cached RSTRING_PTR(backing) — avoids indirection */
34
+ rb_encoding *enc; /* cached encoding — avoids rb_enc_get per call */
35
+ long offset; /* byte offset into backing */
36
+ long length; /* byte length of this view */
37
+ long charlen; /* cached character count; -1 = not yet computed */
38
+ int single_byte; /* cached: 1 if char==byte (ASCII/single-byte enc), 0 if multibyte, -1 unknown */
39
+ stride_index_t *stride_idx; /* lazily built stride index for multibyte, NULL if not built */
40
+ } string_view_t;
41
+
42
+ static VALUE cStringView;
43
+
44
+ /*
45
+ * GC callbacks.
46
+ *
47
+ * We use rb_gc_mark_movable (strong mark) so the view keeps the backing
48
+ * alive. This is the fast path — no WeakMap, no rb_funcall overhead.
49
+ *
50
+ * The intended ownership model is still that the *caller* keeps the
51
+ * backing alive (like std::string_view), but the GC enforces safety:
52
+ * if the caller drops their reference, the view's strong mark prevents
53
+ * a dangling pointer. When rb_gc_mark_weak becomes a public C API,
54
+ * we can switch to true non-owning semantics with zero API changes.
55
+ */
56
+ static void sv_mark(void *ptr) {
57
+ string_view_t *sv = (string_view_t *)ptr;
58
+ if (sv->backing != Qnil) {
59
+ rb_gc_mark_movable(sv->backing);
60
+ }
61
+ }
62
+
63
+ static void sv_compact(void *ptr) {
64
+ string_view_t *sv = (string_view_t *)ptr;
65
+ if (sv->backing != Qnil) {
66
+ sv->backing = rb_gc_location(sv->backing);
67
+ sv->base = RSTRING_PTR(sv->backing);
68
+ }
69
+ }
70
+
71
+ static void sv_free(void *ptr) {
72
+ string_view_t *sv = (string_view_t *)ptr;
73
+ if (sv->stride_idx) {
74
+ xfree(sv->stride_idx->offsets);
75
+ xfree(sv->stride_idx);
76
+ }
77
+ }
78
+
79
+ static size_t sv_memsize(const void *ptr) {
80
+ const string_view_t *sv = (const string_view_t *)ptr;
81
+ size_t size = sizeof(string_view_t);
82
+ if (sv->stride_idx) {
83
+ size += sizeof(stride_index_t) + sv->stride_idx->count * sizeof(long);
84
+ }
85
+ return size;
86
+ }
87
+
88
+ static const rb_data_type_t string_view_type = {
89
+ .wrap_struct_name = "StringView",
90
+ .function = { .dmark = sv_mark, .dfree = sv_free, .dsize = sv_memsize, .dcompact = sv_compact },
91
+ .flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE | RUBY_TYPED_EMBEDDABLE,
92
+ };
93
+
94
+ /* Forward declarations */
95
+ static int sv_compute_single_byte(VALUE backing, rb_encoding *enc);
96
+
97
+ /* ========================================================================= */
98
+ /* Internal helpers */
99
+ /* ========================================================================= */
100
+
101
+ SV_INLINE string_view_t *sv_get_struct(VALUE self) {
102
+ return (string_view_t *)RTYPEDDATA_GET_DATA(self);
103
+ }
104
+
105
+ /* Pointer to the start of this view's bytes */
106
+ SV_INLINE const char *sv_ptr(string_view_t *sv) {
107
+ return sv->base + sv->offset;
108
+ }
109
+
110
+ /* encoding of the backing string */
111
+ SV_INLINE rb_encoding *sv_enc(string_view_t *sv) {
112
+ return sv->enc;
113
+ }
114
+
115
+ /*
116
+ * Create a shared String that aliases the backing's heap buffer.
117
+ * The result is frozen to prevent mutation through the alias.
118
+ */
119
+ static VALUE sv_as_shared_str(string_view_t *sv) {
120
+ VALUE shared = rb_str_subseq(sv->backing, sv->offset, sv->length);
121
+ rb_obj_freeze(shared);
122
+ return shared;
123
+ }
124
+
125
+ /* Allocate a new StringView from a parent that already has cached base/enc */
126
+ SV_INLINE VALUE sv_new_from_parent(string_view_t *parent, long offset, long length) {
127
+ string_view_t *sv;
128
+ VALUE obj = TypedData_Make_Struct(cStringView, string_view_t,
129
+ &string_view_type, sv);
130
+ RB_OBJ_WRITE(obj, &sv->backing, parent->backing);
131
+ sv->base = parent->base;
132
+ sv->enc = parent->enc;
133
+ sv->offset = offset;
134
+ sv->length = length;
135
+ sv->single_byte = parent->single_byte;
136
+ sv->charlen = -1;
137
+ sv->stride_idx = NULL;
138
+ FL_SET_RAW(obj, FL_FREEZE);
139
+ return obj;
140
+ }
141
+
142
+
143
+ /* ========================================================================= */
144
+ /* Construction */
145
+ /* ========================================================================= */
146
+
147
+ static VALUE sv_alloc(VALUE klass) {
148
+ string_view_t *sv;
149
+ VALUE obj = TypedData_Make_Struct(klass, string_view_t,
150
+ &string_view_type, sv);
151
+ sv->backing = Qnil;
152
+ sv->base = NULL;
153
+ sv->enc = NULL;
154
+ sv->offset = 0;
155
+ sv->length = 0;
156
+ sv->single_byte = -1;
157
+ sv->charlen = -1;
158
+ sv->stride_idx = NULL;
159
+ return obj;
160
+ }
161
+
162
+ /*
163
+ * StringView.new(string)
164
+ * StringView.new(string, byte_offset, byte_length)
165
+ */
166
+ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
167
+ VALUE str, voffset, vlength;
168
+ long offset, length;
169
+
170
+ rb_scan_args(argc, argv, "12", &str, &voffset, &vlength);
171
+
172
+ if (!RB_TYPE_P(str, T_STRING)) {
173
+ rb_raise(rb_eTypeError,
174
+ "no implicit conversion of %s into String",
175
+ rb_obj_classname(str));
176
+ }
177
+
178
+ rb_str_freeze(str);
179
+
180
+ long backing_len = RSTRING_LEN(str);
181
+
182
+ if (NIL_P(voffset)) {
183
+ offset = 0;
184
+ length = backing_len;
185
+ } else {
186
+ offset = NUM2LONG(voffset);
187
+ length = NUM2LONG(vlength);
188
+
189
+ if (offset < 0 || length < 0 || offset + length > backing_len) {
190
+ rb_raise(rb_eArgError,
191
+ "offset %ld, length %ld out of range for string of bytesize %ld",
192
+ offset, length, backing_len);
193
+ }
194
+ }
195
+
196
+ string_view_t *sv = sv_get_struct(self);
197
+ rb_encoding *enc = rb_enc_get(str);
198
+ RB_OBJ_WRITE(self, &sv->backing, str);
199
+ sv->base = RSTRING_PTR(str);
200
+ sv->enc = enc;
201
+ sv->offset = offset;
202
+ sv->length = length;
203
+ sv->single_byte = sv_compute_single_byte(str, enc);
204
+ sv->charlen = -1;
205
+ sv->stride_idx = NULL;
206
+
207
+ rb_obj_freeze(self);
208
+
209
+ return self;
210
+ }
211
+
212
+ /* ========================================================================= */
213
+ /* to_s / materialize / inspect / reset! */
214
+ /* ========================================================================= */
215
+
216
+ static VALUE sv_to_s(VALUE self) {
217
+ string_view_t *sv = sv_get_struct(self);
218
+ return rb_enc_str_new(sv_ptr(sv), sv->length, sv_enc(sv));
219
+ }
220
+
221
+ /*
222
+ * to_str: implicit String coercion.
223
+ * Returns a frozen shared string (zero-copy for heap-allocated backings).
224
+ * This enables StringView to work with Regexp#=~, IO#write, and other
225
+ * Ruby methods that call to_str for implicit coercion.
226
+ */
227
+ static VALUE sv_to_str(VALUE self) {
228
+ string_view_t *sv = sv_get_struct(self);
229
+ return sv_as_shared_str(sv);
230
+ }
231
+
232
+ static VALUE sv_inspect(VALUE self) {
233
+ string_view_t *sv = sv_get_struct(self);
234
+ VALUE content = rb_enc_str_new(sv_ptr(sv), sv->length, sv_enc(sv));
235
+ return rb_sprintf("#<StringView:%p \"%"PRIsVALUE"\" offset=%ld length=%ld>",
236
+ (void *)self, content, sv->offset, sv->length);
237
+ }
238
+
239
+ static VALUE sv_frozen_p(VALUE self) {
240
+ return Qtrue;
241
+ }
242
+
243
+ /*
244
+ * reset!(new_backing, byte_offset, byte_length) -> self
245
+ */
246
+ static VALUE sv_reset(VALUE self, VALUE new_backing, VALUE voffset, VALUE vlength) {
247
+ string_view_t *sv = sv_get_struct(self);
248
+
249
+ if (!RB_TYPE_P(new_backing, T_STRING)) {
250
+ rb_raise(rb_eTypeError,
251
+ "no implicit conversion of %s into String",
252
+ rb_obj_classname(new_backing));
253
+ }
254
+
255
+ rb_str_freeze(new_backing);
256
+
257
+ long off = NUM2LONG(voffset);
258
+ long len = NUM2LONG(vlength);
259
+ long backing_len = RSTRING_LEN(new_backing);
260
+
261
+ if (off < 0 || len < 0 || off + len > backing_len) {
262
+ rb_raise(rb_eArgError,
263
+ "offset %ld, length %ld out of range for string of bytesize %ld",
264
+ off, len, backing_len);
265
+ }
266
+
267
+ rb_encoding *enc = rb_enc_get(new_backing);
268
+ RB_OBJ_WRITE(self, &sv->backing, new_backing);
269
+ sv->base = RSTRING_PTR(new_backing);
270
+ sv->enc = enc;
271
+ sv->offset = off;
272
+ sv->length = len;
273
+ sv->single_byte = sv_compute_single_byte(new_backing, enc);
274
+ sv->charlen = -1;
275
+ sv->stride_idx = NULL;
276
+
277
+ return self;
278
+ }
279
+
280
+ /* ========================================================================= */
281
+ /* Tier 1: Structural */
282
+ /* ========================================================================= */
283
+
284
+ static VALUE sv_bytesize(VALUE self) {
285
+ string_view_t *sv = sv_get_struct(self);
286
+ return LONG2NUM(sv->length);
287
+ }
288
+
289
+ /* Forward: sv_char_count is defined in Tier 2 but needed here */
290
+ static long sv_char_count(string_view_t *sv);
291
+
292
+ static VALUE sv_length(VALUE self) {
293
+ string_view_t *sv = sv_get_struct(self);
294
+ return LONG2NUM(sv_char_count(sv));
295
+ }
296
+
297
+ static VALUE sv_empty_p(VALUE self) {
298
+ string_view_t *sv = sv_get_struct(self);
299
+ return sv->length == 0 ? Qtrue : Qfalse;
300
+ }
301
+
302
+ static VALUE sv_encoding(VALUE self) {
303
+ string_view_t *sv = sv_get_struct(self);
304
+ return rb_enc_from_encoding(sv_enc(sv));
305
+ }
306
+
307
+ static VALUE sv_ascii_only_p(VALUE self) {
308
+ string_view_t *sv = sv_get_struct(self);
309
+ const char *p = sv_ptr(sv);
310
+ long i;
311
+ for (i = 0; i < sv->length; i++) {
312
+ if ((unsigned char)p[i] > 127) return Qfalse;
313
+ }
314
+ return Qtrue;
315
+ }
316
+
317
+ /* ========================================================================= */
318
+ /* Tier 1: Searching */
319
+ /* ========================================================================= */
320
+
321
+ static VALUE sv_include_p(VALUE self, VALUE substr) {
322
+ string_view_t *sv = sv_get_struct(self);
323
+ StringValue(substr);
324
+ const char *p = sv_ptr(sv);
325
+ long slen = RSTRING_LEN(substr);
326
+ if (slen == 0) return Qtrue;
327
+ if (slen > sv->length) return Qfalse;
328
+
329
+ long pos = rb_memsearch(RSTRING_PTR(substr), slen, p, sv->length, sv_enc(sv));
330
+ return pos >= 0 && pos <= sv->length - slen ? Qtrue : Qfalse;
331
+ }
332
+
333
+ static VALUE sv_start_with_p(int argc, VALUE *argv, VALUE self) {
334
+ string_view_t *sv = sv_get_struct(self);
335
+ const char *p = sv_ptr(sv);
336
+ int i;
337
+
338
+ for (i = 0; i < argc; i++) {
339
+ VALUE prefix = argv[i];
340
+ StringValue(prefix);
341
+ long plen = RSTRING_LEN(prefix);
342
+ if (plen > sv->length) continue;
343
+ if (memcmp(p, RSTRING_PTR(prefix), plen) == 0) return Qtrue;
344
+ }
345
+ return Qfalse;
346
+ }
347
+
348
+ static VALUE sv_end_with_p(int argc, VALUE *argv, VALUE self) {
349
+ string_view_t *sv = sv_get_struct(self);
350
+ const char *p = sv_ptr(sv);
351
+ int i;
352
+
353
+ for (i = 0; i < argc; i++) {
354
+ VALUE suffix = argv[i];
355
+ StringValue(suffix);
356
+ long slen = RSTRING_LEN(suffix);
357
+ if (slen > sv->length) continue;
358
+ if (memcmp(p + sv->length - slen, RSTRING_PTR(suffix), slen) == 0)
359
+ return Qtrue;
360
+ }
361
+ return Qfalse;
362
+ }
363
+
364
+ static VALUE sv_index(int argc, VALUE *argv, VALUE self) {
365
+ string_view_t *sv = sv_get_struct(self);
366
+ VALUE shared = sv_as_shared_str(sv);
367
+ return rb_funcallv(shared, rb_intern("index"), argc, argv);
368
+ }
369
+
370
+ static VALUE sv_rindex(int argc, VALUE *argv, VALUE self) {
371
+ string_view_t *sv = sv_get_struct(self);
372
+ VALUE shared = sv_as_shared_str(sv);
373
+ return rb_funcallv(shared, rb_intern("rindex"), argc, argv);
374
+ }
375
+
376
+ static VALUE sv_getbyte(VALUE self, VALUE vidx) {
377
+ string_view_t *sv = sv_get_struct(self);
378
+ long idx = NUM2LONG(vidx);
379
+ if (idx < 0) idx += sv->length;
380
+ if (idx < 0 || idx >= sv->length) return Qnil;
381
+ return INT2FIX((unsigned char)sv_ptr(sv)[idx]);
382
+ }
383
+
384
+ static VALUE sv_byteindex(int argc, VALUE *argv, VALUE self) {
385
+ string_view_t *sv = sv_get_struct(self);
386
+ VALUE shared = sv_as_shared_str(sv);
387
+ return rb_funcallv(shared, rb_intern("byteindex"), argc, argv);
388
+ }
389
+
390
+ static VALUE sv_byterindex(int argc, VALUE *argv, VALUE self) {
391
+ string_view_t *sv = sv_get_struct(self);
392
+ VALUE shared = sv_as_shared_str(sv);
393
+ return rb_funcallv(shared, rb_intern("byterindex"), argc, argv);
394
+ }
395
+
396
+ /* ========================================================================= */
397
+ /* Tier 1: Iteration */
398
+ /* ========================================================================= */
399
+
400
+ static VALUE sv_each_byte(VALUE self) {
401
+ string_view_t *sv = sv_get_struct(self);
402
+ RETURN_ENUMERATOR(self, 0, 0);
403
+ const char *p = sv_ptr(sv);
404
+ long i;
405
+ for (i = 0; i < sv->length; i++) {
406
+ rb_yield(INT2FIX((unsigned char)p[i]));
407
+ }
408
+ return self;
409
+ }
410
+
411
+ static VALUE sv_each_char(VALUE self) {
412
+ string_view_t *sv = sv_get_struct(self);
413
+ RETURN_ENUMERATOR(self, 0, 0);
414
+ rb_encoding *enc = sv_enc(sv);
415
+ const char *p = sv_ptr(sv);
416
+ const char *e = p + sv->length;
417
+ while (p < e) {
418
+ int clen = rb_enc_fast_mbclen(p, e, enc);
419
+ rb_yield(rb_enc_str_new(p, clen, enc));
420
+ p += clen;
421
+ }
422
+ return self;
423
+ }
424
+
425
+ static VALUE sv_bytes(VALUE self) {
426
+ string_view_t *sv = sv_get_struct(self);
427
+ const char *p = sv_ptr(sv);
428
+ VALUE ary = rb_ary_new_capa(sv->length);
429
+ long i;
430
+ for (i = 0; i < sv->length; i++) {
431
+ rb_ary_push(ary, INT2FIX((unsigned char)p[i]));
432
+ }
433
+ return ary;
434
+ }
435
+
436
+ static VALUE sv_chars(VALUE self) {
437
+ string_view_t *sv = sv_get_struct(self);
438
+ rb_encoding *enc = sv_enc(sv);
439
+ const char *p = sv_ptr(sv);
440
+ const char *e = p + sv->length;
441
+ VALUE ary = rb_ary_new();
442
+ while (p < e) {
443
+ int clen = rb_enc_fast_mbclen(p, e, enc);
444
+ rb_ary_push(ary, rb_enc_str_new(p, clen, enc));
445
+ p += clen;
446
+ }
447
+ return ary;
448
+ }
449
+
450
+ /* ========================================================================= */
451
+ /* Tier 1: Pattern matching */
452
+ /* ========================================================================= */
453
+
454
+ static VALUE sv_match(int argc, VALUE *argv, VALUE self) {
455
+ string_view_t *sv = sv_get_struct(self);
456
+ VALUE shared = sv_as_shared_str(sv);
457
+ return rb_funcallv(shared, rb_intern("match"), argc, argv);
458
+ }
459
+
460
+ static VALUE sv_match_p(int argc, VALUE *argv, VALUE self) {
461
+ string_view_t *sv = sv_get_struct(self);
462
+ VALUE shared = sv_as_shared_str(sv);
463
+ return rb_funcallv(shared, rb_intern("match?"), argc, argv);
464
+ }
465
+
466
+ static VALUE sv_match_operator(VALUE self, VALUE pattern) {
467
+ string_view_t *sv = sv_get_struct(self);
468
+ VALUE shared = sv_as_shared_str(sv);
469
+ return rb_funcall(shared, rb_intern("=~"), 1, pattern);
470
+ }
471
+
472
+ /* ========================================================================= */
473
+ /* Tier 1: Numeric conversions */
474
+ /* ========================================================================= */
475
+
476
+ /*
477
+ * Get a NUL-terminated C string from the view for numeric parsing.
478
+ * Uses a stack buffer for short strings (common case), heap for long ones.
479
+ * The caller must call sv_cstr_free() after use if heap was allocated.
480
+ */
481
+ #define SV_CSTR_STACK_SIZE 128
482
+
483
+ typedef struct {
484
+ char stack_buf[SV_CSTR_STACK_SIZE];
485
+ char *ptr;
486
+ } sv_cstr_t;
487
+
488
+ SV_INLINE void sv_cstr_init(sv_cstr_t *cs, string_view_t *sv) {
489
+ const char *p = sv_ptr(sv);
490
+ long len = sv->length;
491
+ if (SV_LIKELY(len < SV_CSTR_STACK_SIZE)) {
492
+ memcpy(cs->stack_buf, p, len);
493
+ cs->stack_buf[len] = '\0';
494
+ cs->ptr = cs->stack_buf;
495
+ } else {
496
+ cs->ptr = (char *)xmalloc(len + 1);
497
+ memcpy(cs->ptr, p, len);
498
+ cs->ptr[len] = '\0';
499
+ }
500
+ }
501
+
502
+ SV_INLINE void sv_cstr_free(sv_cstr_t *cs) {
503
+ if (cs->ptr != cs->stack_buf) {
504
+ xfree(cs->ptr);
505
+ }
506
+ }
507
+
508
+ /*
509
+ * to_i([base]) — parse integer directly from byte pointer, zero allocations.
510
+ * Uses rb_cstr_to_inum which parses from a NUL-terminated C string.
511
+ */
512
+ static VALUE sv_to_i(int argc, VALUE *argv, VALUE self) {
513
+ string_view_t *sv = sv_get_struct(self);
514
+ int base = 10;
515
+ if (argc > 0) base = NUM2INT(argv[0]);
516
+
517
+ sv_cstr_t cs;
518
+ sv_cstr_init(&cs, sv);
519
+ VALUE result = rb_cstr_to_inum(cs.ptr, base, 0);
520
+ sv_cstr_free(&cs);
521
+ return result;
522
+ }
523
+
524
+ /*
525
+ * to_f — parse float directly from byte pointer, zero allocations.
526
+ */
527
+ static VALUE sv_to_f(VALUE self) {
528
+ string_view_t *sv = sv_get_struct(self);
529
+ sv_cstr_t cs;
530
+ sv_cstr_init(&cs, sv);
531
+ double d = rb_cstr_to_dbl(cs.ptr, 0);
532
+ sv_cstr_free(&cs);
533
+ return DBL2NUM(d);
534
+ }
535
+
536
+ /*
537
+ * hex — parse hexadecimal integer directly.
538
+ */
539
+ static VALUE sv_hex(VALUE self) {
540
+ string_view_t *sv = sv_get_struct(self);
541
+ sv_cstr_t cs;
542
+ sv_cstr_init(&cs, sv);
543
+ VALUE result = rb_cstr_to_inum(cs.ptr, 16, 0);
544
+ sv_cstr_free(&cs);
545
+ return result;
546
+ }
547
+
548
+ /*
549
+ * oct — parse octal integer directly.
550
+ */
551
+ static VALUE sv_oct(VALUE self) {
552
+ string_view_t *sv = sv_get_struct(self);
553
+ sv_cstr_t cs;
554
+ sv_cstr_init(&cs, sv);
555
+ VALUE result = rb_cstr_to_inum(cs.ptr, 8, 0);
556
+ sv_cstr_free(&cs);
557
+ return result;
558
+ }
559
+
560
+ /* ========================================================================= */
561
+ /* Tier 1: Comparison */
562
+ /* ========================================================================= */
563
+
564
+ static VALUE sv_eq(VALUE self, VALUE other) {
565
+ string_view_t *sv = sv_get_struct(self);
566
+ const char *p = sv_ptr(sv);
567
+
568
+ /* Fast path: String is the most common comparison target */
569
+ if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
570
+ if (sv->length != RSTRING_LEN(other)) return Qfalse;
571
+ return memcmp(p, RSTRING_PTR(other), sv->length) == 0 ? Qtrue : Qfalse;
572
+ }
573
+
574
+ /* Check for StringView via class pointer (faster than rb_obj_is_kind_of) */
575
+ if (rb_obj_class(other) == cStringView) {
576
+ string_view_t *o = sv_get_struct(other);
577
+ if (sv->length != o->length) return Qfalse;
578
+ return memcmp(p, sv_ptr(o), sv->length) == 0 ? Qtrue : Qfalse;
579
+ }
580
+
581
+ return Qfalse;
582
+ }
583
+
584
+ static VALUE sv_cmp(VALUE self, VALUE other) {
585
+ string_view_t *sv = sv_get_struct(self);
586
+ const char *p = sv_ptr(sv);
587
+ const char *op;
588
+ long olen;
589
+
590
+ if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
591
+ op = RSTRING_PTR(other);
592
+ olen = RSTRING_LEN(other);
593
+ } else if (rb_obj_class(other) == cStringView) {
594
+ string_view_t *o = sv_get_struct(other);
595
+ op = sv_ptr(o);
596
+ olen = o->length;
597
+ } else {
598
+ return Qnil;
599
+ }
600
+
601
+ long min = sv->length < olen ? sv->length : olen;
602
+ int cmp = memcmp(p, op, min);
603
+ if (cmp == 0) {
604
+ if (sv->length < olen) cmp = -1;
605
+ else if (sv->length > olen) cmp = 1;
606
+ } else {
607
+ cmp = cmp > 0 ? 1 : -1;
608
+ }
609
+ return INT2FIX(cmp);
610
+ }
611
+
612
+ static VALUE sv_eql_p(VALUE self, VALUE other) {
613
+ if (rb_obj_class(other) != cStringView) return Qfalse;
614
+ return sv_eq(self, other);
615
+ }
616
+
617
+ static VALUE sv_hash(VALUE self) {
618
+ string_view_t *sv = sv_get_struct(self);
619
+ const char *p = sv_ptr(sv);
620
+ st_index_t h = rb_memhash(p, sv->length);
621
+ h ^= (st_index_t)rb_enc_get_index(sv->backing);
622
+ return ST2FIX(h);
623
+ }
624
+
625
+ /* ========================================================================= */
626
+ /* Tier 2: Slicing — returns StringView */
627
+ /* ========================================================================= */
628
+
629
+ /*
630
+ * Returns true if this view's content is single-byte: either the encoding
631
+ * has mbmaxlen==1 (e.g. ASCII, ISO-8859-*) or we can quickly determine
632
+ * all bytes are ASCII (< 128) in a UTF-8 string via the backing string's
633
+ * coderange.
634
+ */
635
+ /*
636
+ * Compute single-byte flag from encoding + coderange.
637
+ * Called once at construction time and cached in sv->single_byte.
638
+ */
639
+ static int sv_compute_single_byte(VALUE backing, rb_encoding *enc) {
640
+ if (rb_enc_mbmaxlen(enc) == 1) return 1;
641
+ int cr = ENC_CODERANGE(backing);
642
+ if (cr == ENC_CODERANGE_7BIT) return 1;
643
+ /* For VALID (known multibyte) we know it's not single-byte */
644
+ if (cr == ENC_CODERANGE_VALID) return 0;
645
+ /* UNKNOWN: we don't know yet — return -1 (will be resolved lazily) */
646
+ return -1;
647
+ }
648
+
649
+ SV_INLINE int sv_single_byte_optimizable(string_view_t *sv) {
650
+ int sb = sv->single_byte;
651
+ if (SV_LIKELY(sb >= 0)) return sb;
652
+ /* Resolve unknown coderange by scanning our slice */
653
+ const char *p = sv_ptr(sv);
654
+ long i;
655
+ for (i = 0; i < sv->length; i++) {
656
+ if (SV_UNLIKELY((unsigned char)p[i] > 127)) {
657
+ sv->single_byte = 0;
658
+ return 0;
659
+ }
660
+ }
661
+ sv->single_byte = 1;
662
+ return 1;
663
+ }
664
+
665
+ /* ---- UTF-8 optimized helpers ------------------------------------------- */
666
+
667
+ static rb_encoding *enc_utf8 = NULL;
668
+
669
+ SV_INLINE int sv_is_utf8(string_view_t *sv) {
670
+ return sv->enc == enc_utf8;
671
+ }
672
+
673
+ /*
674
+ * UTF-8 character count using simdutf — SIMD-accelerated (NEON/SSE/AVX).
675
+ * Processes billions of characters per second on modern hardware.
676
+ */
677
+ static long sv_utf8_char_count(const char *p, long len) {
678
+ return (long)simdutf_count_utf8(p, (size_t)len);
679
+ }
680
+
681
+ /*
682
+ * UTF-8 character byte length from the lead byte, via lookup table.
683
+ * Assumes valid UTF-8 (which is guaranteed by Ruby's frozen backing).
684
+ */
685
+ static const unsigned char utf8_char_len[256] = {
686
+ /* 0x00-0x7F: ASCII, 1 byte */
687
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
688
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
689
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
690
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
691
+ /* 0x80-0xBF: continuation bytes — shouldn't be lead bytes, treat as 1 */
692
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
693
+ 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1, 1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
694
+ /* 0xC0-0xDF: 2-byte sequences */
695
+ 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2, 2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
696
+ /* 0xE0-0xEF: 3-byte sequences */
697
+ 3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,3,
698
+ /* 0xF0-0xF7: 4-byte sequences */
699
+ 4,4,4,4,4,4,4,4,
700
+ /* 0xF8-0xFF: invalid, treat as 1 */
701
+ 1,1,1,1,1,1,1,1
702
+ };
703
+
704
+ /*
705
+ * Build the stride index for a UTF-8 view. Maps every STRIDE_CHARS-th
706
+ * character to its byte offset using simdutf SIMD counting for bulk
707
+ * char→byte conversion. Built lazily on first char-indexed access.
708
+ *
709
+ * After building: offsets[i] = byte offset of character (i * STRIDE_CHARS).
710
+ * To find char N: look up offsets[N / STRIDE_CHARS], then scalar-scan
711
+ * at most STRIDE_CHARS characters. This is O(1) for any offset.
712
+ */
713
+ static void sv_build_stride_index(string_view_t *sv) {
714
+ if (sv->stride_idx) return; /* already built */
715
+
716
+ long total_chars = sv_char_count(sv); /* ensures charlen is cached */
717
+ long n_entries = total_chars / STRIDE_CHARS + 1;
718
+
719
+ stride_index_t *idx = (stride_index_t *)xmalloc(sizeof(stride_index_t));
720
+ idx->offsets = (long *)xmalloc(n_entries * sizeof(long));
721
+ idx->count = n_entries;
722
+
723
+ const unsigned char *p = (const unsigned char *)sv_ptr(sv);
724
+ const unsigned char *e = p + sv->length;
725
+ long entry = 0;
726
+
727
+ idx->offsets[entry++] = 0; /* char 0 is at byte 0 */
728
+
729
+ /* Walk the string, recording byte offset every STRIDE_CHARS characters */
730
+ const unsigned char *s = p;
731
+
732
+ while (s < e && entry < n_entries) {
733
+ /* Advance STRIDE_CHARS characters */
734
+ long remaining = STRIDE_CHARS;
735
+ while (s < e && remaining > 0) {
736
+ s += utf8_char_len[*s];
737
+ remaining--;
738
+ }
739
+ idx->offsets[entry++] = (long)(s - p);
740
+ }
741
+
742
+ sv->stride_idx = idx;
743
+ }
744
+
745
+ /*
746
+ * Find the byte offset of the char_idx-th character in a UTF-8 string.
747
+ *
748
+ * Uses the stride index for O(1) lookup: jump to the nearest stride
749
+ * boundary, then scalar-scan at most STRIDE_CHARS characters.
750
+ */
751
+ static long sv_utf8_char_to_byte_offset_indexed(string_view_t *sv, long char_idx) {
752
+ if (char_idx == 0) return 0;
753
+
754
+ sv_build_stride_index(sv);
755
+
756
+ stride_index_t *idx = sv->stride_idx;
757
+ long slot = char_idx / STRIDE_CHARS;
758
+ long remainder = char_idx % STRIDE_CHARS;
759
+
760
+ if (slot >= idx->count) return -1;
761
+
762
+ long byte_off = idx->offsets[slot];
763
+
764
+ if (remainder == 0) return byte_off;
765
+
766
+ /* Scalar scan for the remaining characters within one stride */
767
+ const unsigned char *s = (const unsigned char *)sv_ptr(sv) + byte_off;
768
+ const unsigned char *e = (const unsigned char *)sv_ptr(sv) + sv->length;
769
+
770
+ while (s < e && remainder > 0) {
771
+ s += utf8_char_len[*s];
772
+ remainder--;
773
+ }
774
+
775
+ if (remainder > 0) return -1;
776
+ return (long)(s - (const unsigned char *)sv_ptr(sv));
777
+ }
778
+
779
+ /*
780
+ * Count byte length for n characters starting at byte offset byte_off
781
+ * in a UTF-8 string.
782
+ */
783
+ static long sv_utf8_chars_to_bytes(const char *p, long len, long byte_off, long n) {
784
+ const unsigned char *s = (const unsigned char *)p + byte_off;
785
+ const unsigned char *e = (const unsigned char *)p + len;
786
+ const unsigned char *start = s;
787
+ long chars = 0;
788
+
789
+ while (s < e && chars < n) {
790
+ s += utf8_char_len[*s];
791
+ chars++;
792
+ }
793
+
794
+ return (long)(s - start);
795
+ }
796
+
797
+ /* ---- Generic encoding helpers with UTF-8 fast paths -------------------- */
798
+
799
+ static long sv_char_to_byte_offset(string_view_t *sv, long char_idx) {
800
+ if (sv_single_byte_optimizable(sv)) {
801
+ return char_idx;
802
+ }
803
+
804
+ if (SV_LIKELY(sv_is_utf8(sv))) {
805
+ return sv_utf8_char_to_byte_offset_indexed(sv, char_idx);
806
+ }
807
+
808
+ rb_encoding *enc = sv_enc(sv);
809
+ const char *p = sv_ptr(sv);
810
+ const char *e = p + sv->length;
811
+ const char *start = p;
812
+ long i;
813
+
814
+ for (i = 0; i < char_idx && p < e; i++) {
815
+ p += rb_enc_fast_mbclen(p, e, enc);
816
+ }
817
+
818
+ if (i < char_idx) return -1;
819
+ return p - start;
820
+ }
821
+
822
+ static long sv_char_count(string_view_t *sv) {
823
+ /* Return cached value if available */
824
+ if (SV_LIKELY(sv->charlen >= 0)) return sv->charlen;
825
+
826
+ long count;
827
+ if (sv_single_byte_optimizable(sv)) {
828
+ count = sv->length;
829
+ } else if (SV_LIKELY(sv_is_utf8(sv))) {
830
+ count = sv_utf8_char_count(sv_ptr(sv), sv->length);
831
+ } else {
832
+ rb_encoding *enc = sv_enc(sv);
833
+ const char *p = sv_ptr(sv);
834
+ count = rb_enc_strlen(p, p + sv->length, enc);
835
+ }
836
+
837
+ sv->charlen = count;
838
+ return count;
839
+ }
840
+
841
+ static long sv_chars_to_bytes(string_view_t *sv, long byte_off, long n) {
842
+ if (sv_single_byte_optimizable(sv)) {
843
+ long remaining = sv->length - byte_off;
844
+ return n < remaining ? n : remaining;
845
+ }
846
+
847
+ if (SV_LIKELY(sv_is_utf8(sv))) {
848
+ return sv_utf8_chars_to_bytes(sv_ptr(sv), sv->length, byte_off, n);
849
+ }
850
+
851
+ rb_encoding *enc = sv_enc(sv);
852
+ const char *p = sv_ptr(sv) + byte_off;
853
+ const char *e = sv_ptr(sv) + sv->length;
854
+ long i;
855
+ const char *start = p;
856
+
857
+ for (i = 0; i < n && p < e; i++) {
858
+ p += rb_enc_fast_mbclen(p, e, enc);
859
+ }
860
+ return p - start;
861
+ }
862
+
863
+ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
864
+ string_view_t *sv = sv_get_struct(self);
865
+ VALUE arg1, arg2;
866
+
867
+ if (SV_UNLIKELY(argc < 1 || argc > 2)) {
868
+ rb_error_arity(argc, 1, 2);
869
+ }
870
+ arg1 = argv[0];
871
+ arg2 = (argc == 2) ? argv[1] : Qnil;
872
+
873
+ if (argc == 2) {
874
+ long idx = NUM2LONG(arg1);
875
+ long len = NUM2LONG(arg2);
876
+
877
+ if (SV_LIKELY(sv_single_byte_optimizable(sv))) {
878
+ /* Fast path: char == byte for ASCII content */
879
+ long total = sv->length;
880
+ if (idx < 0) idx += total;
881
+ if (SV_UNLIKELY(idx < 0 || idx > total || len < 0)) return Qnil;
882
+ if (idx + len > total) len = total - idx;
883
+ return sv_new_from_parent(sv,
884
+ sv->offset + idx,
885
+ len);
886
+ }
887
+
888
+ /* Multibyte path */
889
+ if (len < 0) return Qnil;
890
+
891
+ if (idx < 0) {
892
+ /* Negative index: need total char count */
893
+ long total_chars = sv_char_count(sv);
894
+ idx += total_chars;
895
+ if (idx < 0) return Qnil;
896
+ }
897
+
898
+ /* Two O(1) stride lookups for start and end byte offsets */
899
+ long byte_off = sv_char_to_byte_offset(sv, idx);
900
+ if (byte_off < 0) return Qnil;
901
+
902
+ /* Clamp len to remaining characters */
903
+ long total_chars = sv_char_count(sv);
904
+ if (idx + len > total_chars) len = total_chars - idx;
905
+
906
+ long byte_end = sv_char_to_byte_offset(sv, idx + len);
907
+ long byte_len = byte_end - byte_off;
908
+
909
+ return sv_new_from_parent(sv,
910
+ sv->offset + byte_off,
911
+ byte_len);
912
+ }
913
+
914
+ if (rb_obj_is_kind_of(arg1, rb_cRange)) {
915
+ long total_chars = sv_char_count(sv);
916
+ long beg, len;
917
+ int excl;
918
+ VALUE rb_beg = rb_funcall(arg1, rb_intern("begin"), 0);
919
+ VALUE rb_end = rb_funcall(arg1, rb_intern("end"), 0);
920
+ excl = RTEST(rb_funcall(arg1, rb_intern("exclude_end?"), 0));
921
+
922
+ beg = NIL_P(rb_beg) ? 0 : NUM2LONG(rb_beg);
923
+ if (beg < 0) beg += total_chars;
924
+ if (beg < 0) return Qnil;
925
+
926
+ long e;
927
+ if (NIL_P(rb_end)) {
928
+ e = total_chars;
929
+ } else {
930
+ e = NUM2LONG(rb_end);
931
+ if (e < 0) e += total_chars;
932
+ if (!excl) e += 1;
933
+ }
934
+ if (e < beg) e = beg;
935
+ len = e - beg;
936
+ if (beg > total_chars) return Qnil;
937
+ if (beg + len > total_chars) len = total_chars - beg;
938
+
939
+ long byte_off = sv_char_to_byte_offset(sv, beg);
940
+ long byte_len = sv_chars_to_bytes(sv, byte_off, len);
941
+
942
+ return sv_new_from_parent(sv,
943
+ sv->offset + byte_off,
944
+ byte_len);
945
+ }
946
+
947
+ if (rb_obj_is_kind_of(arg1, rb_cRegexp)) {
948
+ VALUE shared = sv_as_shared_str(sv);
949
+ VALUE m = rb_funcall(arg1, rb_intern("match"), 1, shared);
950
+ if (NIL_P(m)) return Qnil;
951
+
952
+ VALUE matched = rb_funcall(m, rb_intern("[]"), 1, INT2FIX(0));
953
+ long match_beg = NUM2LONG(rb_funcall(m, rb_intern("begin"), 1, INT2FIX(0)));
954
+
955
+ long byte_off = sv_char_to_byte_offset(sv, match_beg);
956
+ long byte_len = RSTRING_LEN(matched);
957
+
958
+ return sv_new_from_parent(sv,
959
+ sv->offset + byte_off,
960
+ byte_len);
961
+ }
962
+
963
+ if (RB_TYPE_P(arg1, T_STRING)) {
964
+ const char *p = sv_ptr(sv);
965
+ long slen = RSTRING_LEN(arg1);
966
+ if (slen == 0) {
967
+ return sv_new_from_parent(sv, sv->offset, 0);
968
+ }
969
+ if (slen > sv->length) return Qnil;
970
+
971
+ long pos = rb_memsearch(RSTRING_PTR(arg1), slen, p, sv->length, sv_enc(sv));
972
+ if (pos < 0 || pos > sv->length - slen) return Qnil;
973
+
974
+ return sv_new_from_parent(sv, sv->offset + pos, slen);
975
+ }
976
+
977
+ if (RB_INTEGER_TYPE_P(arg1)) {
978
+ long char_idx = NUM2LONG(arg1);
979
+ long total_chars = sv_char_count(sv);
980
+
981
+ if (char_idx < 0) char_idx += total_chars;
982
+ if (char_idx < 0 || char_idx >= total_chars) return Qnil;
983
+
984
+ long byte_off = sv_char_to_byte_offset(sv, char_idx);
985
+ if (byte_off < 0) return Qnil;
986
+
987
+ long byte_len = sv_chars_to_bytes(sv, byte_off, 1);
988
+
989
+ return sv_new_from_parent(sv,
990
+ sv->offset + byte_off,
991
+ byte_len);
992
+ }
993
+
994
+ rb_raise(rb_eTypeError, "no implicit conversion of %s into Integer",
995
+ rb_obj_classname(arg1));
996
+ return Qnil;
997
+ }
998
+
999
+ static VALUE sv_byteslice(int argc, VALUE *argv, VALUE self) {
1000
+ string_view_t *sv = sv_get_struct(self);
1001
+ VALUE arg1, arg2;
1002
+
1003
+ if (SV_UNLIKELY(argc < 1 || argc > 2)) {
1004
+ rb_error_arity(argc, 1, 2);
1005
+ }
1006
+ arg1 = argv[0];
1007
+ arg2 = (argc == 2) ? argv[1] : Qnil;
1008
+
1009
+ if (argc == 2) {
1010
+ long off = NUM2LONG(arg1);
1011
+ long len = NUM2LONG(arg2);
1012
+
1013
+ if (off < 0) off += sv->length;
1014
+ if (off < 0 || off > sv->length) return Qnil;
1015
+ if (len < 0) return Qnil;
1016
+ if (off + len > sv->length) len = sv->length - off;
1017
+
1018
+ return sv_new_from_parent(sv, sv->offset + off, len);
1019
+ }
1020
+
1021
+ if (rb_obj_is_kind_of(arg1, rb_cRange)) {
1022
+ long beg, len;
1023
+ VALUE rb_beg = rb_funcall(arg1, rb_intern("begin"), 0);
1024
+ VALUE rb_end = rb_funcall(arg1, rb_intern("end"), 0);
1025
+ int excl = RTEST(rb_funcall(arg1, rb_intern("exclude_end?"), 0));
1026
+
1027
+ beg = NIL_P(rb_beg) ? 0 : NUM2LONG(rb_beg);
1028
+ if (beg < 0) beg += sv->length;
1029
+ if (beg < 0) return Qnil;
1030
+
1031
+ long e;
1032
+ if (NIL_P(rb_end)) {
1033
+ e = sv->length;
1034
+ } else {
1035
+ e = NUM2LONG(rb_end);
1036
+ if (e < 0) e += sv->length;
1037
+ if (!excl) e += 1;
1038
+ }
1039
+ if (e < beg) e = beg;
1040
+ len = e - beg;
1041
+ if (beg > sv->length) return Qnil;
1042
+ if (beg + len > sv->length) len = sv->length - beg;
1043
+
1044
+ return sv_new_from_parent(sv, sv->offset + beg, len);
1045
+ }
1046
+
1047
+ {
1048
+ long idx = NUM2LONG(arg1);
1049
+ if (idx < 0) idx += sv->length;
1050
+ if (idx < 0 || idx >= sv->length) return Qnil;
1051
+ return sv_new_from_parent(sv, sv->offset + idx, 1);
1052
+ }
1053
+ }
1054
+
1055
+ /* ========================================================================= */
1056
+ /* Tier 3: Transform delegation */
1057
+ /* ========================================================================= */
1058
+
1059
+ #define SV_DELEGATE_FUNCALL(cname, rbname) \
1060
+ static VALUE sv_##cname(int argc, VALUE *argv, VALUE self) { \
1061
+ string_view_t *sv = sv_get_struct(self); \
1062
+ VALUE shared = sv_as_shared_str(sv); \
1063
+ if (rb_block_given_p()) { \
1064
+ return rb_funcall_with_block(shared, rb_intern(rbname), \
1065
+ argc, argv, rb_block_proc()); \
1066
+ } \
1067
+ return rb_funcallv(shared, rb_intern(rbname), argc, argv); \
1068
+ }
1069
+
1070
+ SV_DELEGATE_FUNCALL(upcase, "upcase")
1071
+ SV_DELEGATE_FUNCALL(downcase, "downcase")
1072
+ SV_DELEGATE_FUNCALL(capitalize,"capitalize")
1073
+ SV_DELEGATE_FUNCALL(swapcase, "swapcase")
1074
+ SV_DELEGATE_FUNCALL(strip, "strip")
1075
+ SV_DELEGATE_FUNCALL(lstrip, "lstrip")
1076
+ SV_DELEGATE_FUNCALL(rstrip, "rstrip")
1077
+ SV_DELEGATE_FUNCALL(chomp, "chomp")
1078
+ SV_DELEGATE_FUNCALL(chop, "chop")
1079
+ SV_DELEGATE_FUNCALL(reverse, "reverse")
1080
+ SV_DELEGATE_FUNCALL(squeeze, "squeeze")
1081
+ SV_DELEGATE_FUNCALL(encode, "encode")
1082
+ SV_DELEGATE_FUNCALL(gsub, "gsub")
1083
+ SV_DELEGATE_FUNCALL(sub, "sub")
1084
+ SV_DELEGATE_FUNCALL(tr, "tr")
1085
+ SV_DELEGATE_FUNCALL(tr_s, "tr_s")
1086
+ SV_DELEGATE_FUNCALL(sv_delete, "delete")
1087
+ SV_DELEGATE_FUNCALL(count, "count")
1088
+ SV_DELEGATE_FUNCALL(scan, "scan")
1089
+ SV_DELEGATE_FUNCALL(split, "split")
1090
+ SV_DELEGATE_FUNCALL(center, "center")
1091
+ SV_DELEGATE_FUNCALL(ljust, "ljust")
1092
+ SV_DELEGATE_FUNCALL(rjust, "rjust")
1093
+ SV_DELEGATE_FUNCALL(format_op, "%")
1094
+ SV_DELEGATE_FUNCALL(plus, "+")
1095
+ SV_DELEGATE_FUNCALL(multiply, "*")
1096
+ SV_DELEGATE_FUNCALL(unpack1, "unpack1")
1097
+ SV_DELEGATE_FUNCALL(scrub, "scrub")
1098
+ SV_DELEGATE_FUNCALL(unicode_normalize, "unicode_normalize")
1099
+
1100
+ /* ========================================================================= */
1101
+ /* Bang methods — always raise FrozenError */
1102
+ /* ========================================================================= */
1103
+
1104
+ static VALUE sv_frozen_error(int argc, VALUE *argv, VALUE self) {
1105
+ VALUE str = sv_to_s(self);
1106
+ rb_raise(rb_eFrozenError, "can't modify frozen StringView: \"%s\"",
1107
+ StringValueCStr(str));
1108
+ return Qnil;
1109
+ }
1110
+
1111
+ /* ========================================================================= */
1112
+ /* Init */
1113
+ /* ========================================================================= */
1114
+
1115
+ void Init_string_view(void) {
1116
+ enc_utf8 = rb_utf8_encoding();
1117
+
1118
+ cStringView = rb_define_class("StringView", rb_cObject);
1119
+ rb_include_module(cStringView, rb_mComparable);
1120
+
1121
+ rb_define_alloc_func(cStringView, sv_alloc);
1122
+ rb_define_method(cStringView, "initialize", sv_initialize, -1);
1123
+
1124
+ rb_define_method(cStringView, "to_s", sv_to_s, 0);
1125
+ rb_define_private_method(cStringView, "to_str", sv_to_str, 0);
1126
+ rb_define_method(cStringView, "inspect", sv_inspect, 0);
1127
+ rb_define_method(cStringView, "frozen?", sv_frozen_p, 0);
1128
+ rb_define_method(cStringView, "reset!", sv_reset, 3);
1129
+ rb_define_alias(cStringView, "materialize", "to_s");
1130
+
1131
+ rb_define_method(cStringView, "bytesize", sv_bytesize, 0);
1132
+ rb_define_method(cStringView, "length", sv_length, 0);
1133
+ rb_define_alias(cStringView, "size", "length");
1134
+ rb_define_method(cStringView, "empty?", sv_empty_p, 0);
1135
+ rb_define_method(cStringView, "encoding", sv_encoding, 0);
1136
+ rb_define_method(cStringView, "ascii_only?", sv_ascii_only_p,0);
1137
+
1138
+ rb_define_method(cStringView, "include?", sv_include_p, 1);
1139
+ rb_define_method(cStringView, "start_with?", sv_start_with_p,-1);
1140
+ rb_define_method(cStringView, "end_with?", sv_end_with_p, -1);
1141
+ rb_define_method(cStringView, "index", sv_index, -1);
1142
+ rb_define_method(cStringView, "rindex", sv_rindex, -1);
1143
+ rb_define_method(cStringView, "getbyte", sv_getbyte, 1);
1144
+ rb_define_method(cStringView, "byteindex", sv_byteindex, -1);
1145
+ rb_define_method(cStringView, "byterindex", sv_byterindex, -1);
1146
+
1147
+ rb_define_method(cStringView, "each_byte", sv_each_byte, 0);
1148
+ rb_define_method(cStringView, "each_char", sv_each_char, 0);
1149
+ rb_define_method(cStringView, "bytes", sv_bytes, 0);
1150
+ rb_define_method(cStringView, "chars", sv_chars, 0);
1151
+
1152
+ rb_define_method(cStringView, "match", sv_match, -1);
1153
+ rb_define_method(cStringView, "match?", sv_match_p, -1);
1154
+ rb_define_method(cStringView, "=~", sv_match_operator, 1);
1155
+
1156
+ rb_define_method(cStringView, "to_i", sv_to_i, -1);
1157
+ rb_define_method(cStringView, "to_f", sv_to_f, 0);
1158
+ rb_define_method(cStringView, "hex", sv_hex, 0);
1159
+ rb_define_method(cStringView, "oct", sv_oct, 0);
1160
+
1161
+ rb_define_method(cStringView, "==", sv_eq, 1);
1162
+ rb_define_method(cStringView, "<=>", sv_cmp, 1);
1163
+ rb_define_method(cStringView, "eql?", sv_eql_p, 1);
1164
+ rb_define_method(cStringView, "hash", sv_hash, 0);
1165
+
1166
+ rb_define_method(cStringView, "[]", sv_aref, -1);
1167
+ rb_define_alias(cStringView, "slice", "[]");
1168
+ rb_define_method(cStringView, "byteslice", sv_byteslice, -1);
1169
+
1170
+ rb_define_method(cStringView, "upcase", sv_upcase, -1);
1171
+ rb_define_method(cStringView, "downcase", sv_downcase, -1);
1172
+ rb_define_method(cStringView, "capitalize", sv_capitalize, -1);
1173
+ rb_define_method(cStringView, "swapcase", sv_swapcase, -1);
1174
+ rb_define_method(cStringView, "strip", sv_strip, -1);
1175
+ rb_define_method(cStringView, "lstrip", sv_lstrip, -1);
1176
+ rb_define_method(cStringView, "rstrip", sv_rstrip, -1);
1177
+ rb_define_method(cStringView, "chomp", sv_chomp, -1);
1178
+ rb_define_method(cStringView, "chop", sv_chop, -1);
1179
+ rb_define_method(cStringView, "reverse", sv_reverse, -1);
1180
+ rb_define_method(cStringView, "squeeze", sv_squeeze, -1);
1181
+ rb_define_method(cStringView, "encode", sv_encode, -1);
1182
+ rb_define_method(cStringView, "gsub", sv_gsub, -1);
1183
+ rb_define_method(cStringView, "sub", sv_sub, -1);
1184
+ rb_define_method(cStringView, "tr", sv_tr, -1);
1185
+ rb_define_method(cStringView, "tr_s", sv_tr_s, -1);
1186
+ rb_define_method(cStringView, "delete", sv_sv_delete, -1);
1187
+ rb_define_method(cStringView, "count", sv_count, -1);
1188
+ rb_define_method(cStringView, "scan", sv_scan, -1);
1189
+ rb_define_method(cStringView, "split", sv_split, -1);
1190
+ rb_define_method(cStringView, "center", sv_center, -1);
1191
+ rb_define_method(cStringView, "ljust", sv_ljust, -1);
1192
+ rb_define_method(cStringView, "rjust", sv_rjust, -1);
1193
+ rb_define_method(cStringView, "%", sv_format_op, -1);
1194
+ rb_define_method(cStringView, "+", sv_plus, -1);
1195
+ rb_define_method(cStringView, "*", sv_multiply, -1);
1196
+ rb_define_method(cStringView, "unpack1", sv_unpack1, -1);
1197
+ rb_define_method(cStringView, "scrub", sv_scrub, -1);
1198
+ rb_define_method(cStringView, "unicode_normalize", sv_unicode_normalize, -1);
1199
+
1200
+ rb_define_method(cStringView, "upcase!", sv_frozen_error, -1);
1201
+ rb_define_method(cStringView, "downcase!", sv_frozen_error, -1);
1202
+ rb_define_method(cStringView, "capitalize!", sv_frozen_error, -1);
1203
+ rb_define_method(cStringView, "swapcase!", sv_frozen_error, -1);
1204
+ rb_define_method(cStringView, "strip!", sv_frozen_error, -1);
1205
+ rb_define_method(cStringView, "lstrip!", sv_frozen_error, -1);
1206
+ rb_define_method(cStringView, "rstrip!", sv_frozen_error, -1);
1207
+ rb_define_method(cStringView, "chomp!", sv_frozen_error, -1);
1208
+ rb_define_method(cStringView, "chop!", sv_frozen_error, -1);
1209
+ rb_define_method(cStringView, "squeeze!", sv_frozen_error, -1);
1210
+ rb_define_method(cStringView, "tr!", sv_frozen_error, -1);
1211
+ rb_define_method(cStringView, "delete!", sv_frozen_error, -1);
1212
+ rb_define_method(cStringView, "replace", sv_frozen_error, -1);
1213
+ rb_define_method(cStringView, "reverse!", sv_frozen_error, -1);
1214
+ rb_define_method(cStringView, "gsub!", sv_frozen_error, -1);
1215
+ rb_define_method(cStringView, "sub!", sv_frozen_error, -1);
1216
+ rb_define_method(cStringView, "slice!", sv_frozen_error, -1);
1217
+ }