string_view 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -4
- data/ext/string_view/extconf.rb +1 -1
- data/ext/string_view/string_view.c +735 -207
- data/ext/string_view/string_view.h +121 -0
- data/ext/string_view/string_view_core_ext.c +44 -0
- data/ext/string_view/string_view_pool.c +204 -0
- data/ext/string_view/string_view_strict.c +102 -0
- data/lib/string_view/core_ext.rb +5 -0
- data/lib/string_view/version.rb +1 -1
- metadata +7 -2
|
@@ -1,45 +1,25 @@
|
|
|
1
|
-
#include "
|
|
2
|
-
#include "ruby/encoding.h"
|
|
3
|
-
#include "ruby/re.h"
|
|
4
|
-
#include "simdutf_c.h"
|
|
5
|
-
|
|
6
|
-
#define SV_LIKELY(x) __builtin_expect(!!(x), 1)
|
|
7
|
-
#define SV_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
|
8
|
-
|
|
9
|
-
#ifdef __GNUC__
|
|
10
|
-
#define SV_INLINE static inline __attribute__((always_inline))
|
|
11
|
-
#else
|
|
12
|
-
#define SV_INLINE static inline
|
|
13
|
-
#endif
|
|
1
|
+
#include "string_view.h"
|
|
14
2
|
|
|
15
3
|
/* ========================================================================= */
|
|
16
|
-
/*
|
|
4
|
+
/* Globals */
|
|
17
5
|
/* ========================================================================= */
|
|
18
6
|
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
*/
|
|
24
|
-
|
|
25
|
-
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
30
|
-
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
34
|
-
|
|
35
|
-
long offset; /* byte offset into backing */
|
|
36
|
-
long length; /* byte length of this view */
|
|
37
|
-
long charlen; /* cached character count; -1 = not yet computed */
|
|
38
|
-
int single_byte; /* cached: 1 if char==byte (ASCII/single-byte enc), 0 if multibyte, -1 unknown */
|
|
39
|
-
stride_index_t *stride_idx; /* lazily built stride index for multibyte, NULL if not built */
|
|
40
|
-
} string_view_t;
|
|
41
|
-
|
|
42
|
-
static VALUE cStringView;
|
|
7
|
+
VALUE cStringView;
|
|
8
|
+
VALUE cStringViewStrict;
|
|
9
|
+
VALUE eWouldAllocate;
|
|
10
|
+
|
|
11
|
+
/* Cached method IDs — initialized once in Init_string_view */
|
|
12
|
+
static ID id_index, id_rindex, id_byteindex, id_byterindex;
|
|
13
|
+
static ID id_match, id_match_p, id_match_op;
|
|
14
|
+
static ID id_begin, id_aref;
|
|
15
|
+
static ID id_upcase, id_downcase, id_capitalize, id_swapcase;
|
|
16
|
+
static ID id_strip, id_lstrip, id_rstrip;
|
|
17
|
+
static ID id_chomp, id_chop, id_reverse, id_squeeze;
|
|
18
|
+
static ID id_encode, id_gsub, id_sub, id_tr, id_tr_s;
|
|
19
|
+
static ID id_delete, id_count, id_scan, id_split;
|
|
20
|
+
static ID id_center, id_ljust, id_rjust;
|
|
21
|
+
static ID id_format_op, id_plus, id_multiply;
|
|
22
|
+
static ID id_unpack1, id_scrub, id_unicode_normalize;
|
|
43
23
|
|
|
44
24
|
/*
|
|
45
25
|
* GC callbacks.
|
|
@@ -85,14 +65,18 @@ static size_t sv_memsize(const void *ptr) {
|
|
|
85
65
|
return size;
|
|
86
66
|
}
|
|
87
67
|
|
|
88
|
-
|
|
68
|
+
const rb_data_type_t string_view_type = {
|
|
89
69
|
.wrap_struct_name = "StringView",
|
|
90
70
|
.function = { .dmark = sv_mark, .dfree = sv_free, .dsize = sv_memsize, .dcompact = sv_compact },
|
|
91
|
-
.flags = RUBY_TYPED_FREE_IMMEDIATELY |
|
|
71
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
92
72
|
};
|
|
93
73
|
|
|
94
|
-
/* Forward declarations */
|
|
95
|
-
static
|
|
74
|
+
/* Forward declarations for functions defined later in this file */
|
|
75
|
+
static long sv_char_count(string_view_t *sv);
|
|
76
|
+
static long sv_char_to_byte_offset(string_view_t *sv, long char_idx);
|
|
77
|
+
SV_INLINE int sv_single_byte_optimizable(string_view_t *sv);
|
|
78
|
+
SV_INLINE int sv_is_utf8(string_view_t *sv);
|
|
79
|
+
static long sv_utf8_char_count(const char *p, long len);
|
|
96
80
|
|
|
97
81
|
/* ========================================================================= */
|
|
98
82
|
/* Internal helpers */
|
|
@@ -122,10 +106,12 @@ static VALUE sv_as_shared_str(string_view_t *sv) {
|
|
|
122
106
|
return shared;
|
|
123
107
|
}
|
|
124
108
|
|
|
125
|
-
/* Allocate a new StringView from a parent that already has cached base/enc
|
|
126
|
-
|
|
109
|
+
/* Allocate a new StringView from a parent that already has cached base/enc.
|
|
110
|
+
* Preserves the class of parent_obj (StringView or StringView::Strict). */
|
|
111
|
+
SV_INLINE VALUE sv_new_from_parent_obj(VALUE parent_obj, string_view_t *parent, long offset, long length) {
|
|
127
112
|
string_view_t *sv;
|
|
128
|
-
VALUE
|
|
113
|
+
VALUE klass = rb_obj_class(parent_obj);
|
|
114
|
+
VALUE obj = TypedData_Make_Struct(klass, string_view_t,
|
|
129
115
|
&string_view_type, sv);
|
|
130
116
|
RB_OBJ_WRITE(obj, &sv->backing, parent->backing);
|
|
131
117
|
sv->base = parent->base;
|
|
@@ -135,7 +121,7 @@ SV_INLINE VALUE sv_new_from_parent(string_view_t *parent, long offset, long leng
|
|
|
135
121
|
sv->single_byte = parent->single_byte;
|
|
136
122
|
sv->charlen = -1;
|
|
137
123
|
sv->stride_idx = NULL;
|
|
138
|
-
|
|
124
|
+
/* Not frozen — see sv_initialize comment for rationale */
|
|
139
125
|
return obj;
|
|
140
126
|
}
|
|
141
127
|
|
|
@@ -169,13 +155,7 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
169
155
|
|
|
170
156
|
rb_scan_args(argc, argv, "12", &str, &voffset, &vlength);
|
|
171
157
|
|
|
172
|
-
|
|
173
|
-
rb_raise(rb_eTypeError,
|
|
174
|
-
"no implicit conversion of %s into String",
|
|
175
|
-
rb_obj_classname(str));
|
|
176
|
-
}
|
|
177
|
-
|
|
178
|
-
rb_str_freeze(str);
|
|
158
|
+
sv_check_frozen_string(str);
|
|
179
159
|
|
|
180
160
|
long backing_len = RSTRING_LEN(str);
|
|
181
161
|
|
|
@@ -185,26 +165,20 @@ static VALUE sv_initialize(int argc, VALUE *argv, VALUE self) {
|
|
|
185
165
|
} else {
|
|
186
166
|
offset = NUM2LONG(voffset);
|
|
187
167
|
length = NUM2LONG(vlength);
|
|
188
|
-
|
|
189
|
-
if (offset < 0 || length < 0 || offset + length > backing_len) {
|
|
190
|
-
rb_raise(rb_eArgError,
|
|
191
|
-
"offset %ld, length %ld out of range for string of bytesize %ld",
|
|
192
|
-
offset, length, backing_len);
|
|
193
|
-
}
|
|
168
|
+
sv_check_bounds(offset, length, backing_len);
|
|
194
169
|
}
|
|
195
170
|
|
|
196
171
|
string_view_t *sv = sv_get_struct(self);
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
sv->base = RSTRING_PTR(str);
|
|
200
|
-
sv->enc = enc;
|
|
201
|
-
sv->offset = offset;
|
|
202
|
-
sv->length = length;
|
|
203
|
-
sv->single_byte = sv_compute_single_byte(str, enc);
|
|
204
|
-
sv->charlen = -1;
|
|
205
|
-
sv->stride_idx = NULL;
|
|
172
|
+
sv_init_fields(self, sv, str, RSTRING_PTR(str), rb_enc_get(str),
|
|
173
|
+
offset, length);
|
|
206
174
|
|
|
207
|
-
|
|
175
|
+
/*
|
|
176
|
+
* We intentionally do NOT freeze self. StringView blocks content
|
|
177
|
+
* mutation via the immutable frozen backing and explicit FrozenError
|
|
178
|
+
* on bang methods. Not freezing allows reset! to work without
|
|
179
|
+
* violating Ruby's frozen? contract — libraries and Ruby itself
|
|
180
|
+
* use frozen? to assume immutability for hash keys and Ractor sharing.
|
|
181
|
+
*/
|
|
208
182
|
|
|
209
183
|
return self;
|
|
210
184
|
}
|
|
@@ -236,43 +210,27 @@ static VALUE sv_inspect(VALUE self) {
|
|
|
236
210
|
(void *)self, content, sv->offset, sv->length);
|
|
237
211
|
}
|
|
238
212
|
|
|
239
|
-
static VALUE sv_frozen_p(VALUE self) {
|
|
240
|
-
return Qtrue;
|
|
241
|
-
}
|
|
242
|
-
|
|
243
213
|
/*
|
|
244
214
|
* reset!(new_backing, byte_offset, byte_length) -> self
|
|
245
215
|
*/
|
|
246
216
|
static VALUE sv_reset(VALUE self, VALUE new_backing, VALUE voffset, VALUE vlength) {
|
|
217
|
+
rb_check_frozen(self);
|
|
247
218
|
string_view_t *sv = sv_get_struct(self);
|
|
248
219
|
|
|
249
|
-
|
|
250
|
-
rb_raise(rb_eTypeError,
|
|
251
|
-
"no implicit conversion of %s into String",
|
|
252
|
-
rb_obj_classname(new_backing));
|
|
253
|
-
}
|
|
254
|
-
|
|
255
|
-
rb_str_freeze(new_backing);
|
|
220
|
+
sv_check_frozen_string(new_backing);
|
|
256
221
|
|
|
257
222
|
long off = NUM2LONG(voffset);
|
|
258
223
|
long len = NUM2LONG(vlength);
|
|
259
|
-
|
|
224
|
+
sv_check_bounds(off, len, RSTRING_LEN(new_backing));
|
|
260
225
|
|
|
261
|
-
|
|
262
|
-
|
|
263
|
-
|
|
264
|
-
|
|
226
|
+
/* Free old stride index before reinitializing */
|
|
227
|
+
if (sv->stride_idx) {
|
|
228
|
+
xfree(sv->stride_idx->offsets);
|
|
229
|
+
xfree(sv->stride_idx);
|
|
265
230
|
}
|
|
266
231
|
|
|
267
|
-
|
|
268
|
-
|
|
269
|
-
sv->base = RSTRING_PTR(new_backing);
|
|
270
|
-
sv->enc = enc;
|
|
271
|
-
sv->offset = off;
|
|
272
|
-
sv->length = len;
|
|
273
|
-
sv->single_byte = sv_compute_single_byte(new_backing, enc);
|
|
274
|
-
sv->charlen = -1;
|
|
275
|
-
sv->stride_idx = NULL;
|
|
232
|
+
sv_init_fields(self, sv, new_backing, RSTRING_PTR(new_backing),
|
|
233
|
+
rb_enc_get(new_backing), off, len);
|
|
276
234
|
|
|
277
235
|
return self;
|
|
278
236
|
}
|
|
@@ -306,6 +264,8 @@ static VALUE sv_encoding(VALUE self) {
|
|
|
306
264
|
|
|
307
265
|
static VALUE sv_ascii_only_p(VALUE self) {
|
|
308
266
|
string_view_t *sv = sv_get_struct(self);
|
|
267
|
+
if (sv_single_byte_optimizable(sv)) return Qtrue;
|
|
268
|
+
/* single_byte resolved to 0 (multibyte) — scan to confirm non-ASCII bytes */
|
|
309
269
|
const char *p = sv_ptr(sv);
|
|
310
270
|
long i;
|
|
311
271
|
for (i = 0; i < sv->length; i++) {
|
|
@@ -361,16 +321,137 @@ static VALUE sv_end_with_p(int argc, VALUE *argv, VALUE self) {
|
|
|
361
321
|
return Qfalse;
|
|
362
322
|
}
|
|
363
323
|
|
|
364
|
-
|
|
324
|
+
/*
|
|
325
|
+
* index(substring[, offset]) → Integer or nil
|
|
326
|
+
*
|
|
327
|
+
* For String arguments: native zero-alloc implementation using rb_memsearch.
|
|
328
|
+
* For Regexp arguments: delegates to String#index via shared string.
|
|
329
|
+
*/
|
|
330
|
+
VALUE sv_index(int argc, VALUE *argv, VALUE self) {
|
|
365
331
|
string_view_t *sv = sv_get_struct(self);
|
|
366
|
-
VALUE
|
|
367
|
-
|
|
332
|
+
VALUE pattern, voffset;
|
|
333
|
+
rb_scan_args(argc, argv, "11", &pattern, &voffset);
|
|
334
|
+
|
|
335
|
+
/* Regexp path: delegate via shared string */
|
|
336
|
+
if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
|
|
337
|
+
VALUE shared = sv_as_shared_str(sv);
|
|
338
|
+
return rb_funcallv(shared, id_index, argc, argv);
|
|
339
|
+
}
|
|
340
|
+
|
|
341
|
+
StringValue(pattern);
|
|
342
|
+
const char *p = sv_ptr(sv);
|
|
343
|
+
long plen = RSTRING_LEN(pattern);
|
|
344
|
+
|
|
345
|
+
/* Determine starting char offset */
|
|
346
|
+
long char_off = NIL_P(voffset) ? 0 : NUM2LONG(voffset);
|
|
347
|
+
long total_chars = sv_char_count(sv);
|
|
348
|
+
|
|
349
|
+
if (char_off < 0) char_off += total_chars;
|
|
350
|
+
if (char_off < 0 || char_off > total_chars) return Qnil;
|
|
351
|
+
|
|
352
|
+
/* Convert char offset to byte offset */
|
|
353
|
+
long byte_off = sv_char_to_byte_offset(sv, char_off);
|
|
354
|
+
if (byte_off < 0) return Qnil;
|
|
355
|
+
|
|
356
|
+
if (plen == 0) return LONG2NUM(char_off);
|
|
357
|
+
if (plen > sv->length - byte_off) return Qnil;
|
|
358
|
+
|
|
359
|
+
long pos = rb_memsearch(RSTRING_PTR(pattern), plen,
|
|
360
|
+
p + byte_off, sv->length - byte_off,
|
|
361
|
+
sv_enc(sv));
|
|
362
|
+
if (pos < 0 || pos > sv->length - byte_off - plen) return Qnil;
|
|
363
|
+
|
|
364
|
+
/* Convert byte position back to character position */
|
|
365
|
+
if (sv_single_byte_optimizable(sv)) {
|
|
366
|
+
return LONG2NUM(char_off + pos);
|
|
367
|
+
}
|
|
368
|
+
/* Count chars from byte_off to byte_off+pos */
|
|
369
|
+
if (sv_is_utf8(sv)) {
|
|
370
|
+
long chars = sv_utf8_char_count(p + byte_off, pos);
|
|
371
|
+
return LONG2NUM(char_off + chars);
|
|
372
|
+
}
|
|
373
|
+
rb_encoding *enc = sv_enc(sv);
|
|
374
|
+
const char *s = p + byte_off;
|
|
375
|
+
const char *e = s + pos;
|
|
376
|
+
long chars = rb_enc_strlen(s, e, enc);
|
|
377
|
+
return LONG2NUM(char_off + chars);
|
|
368
378
|
}
|
|
369
379
|
|
|
370
|
-
|
|
380
|
+
/*
|
|
381
|
+
* rindex(substring[, offset]) → Integer or nil
|
|
382
|
+
*
|
|
383
|
+
* For String arguments: native zero-alloc reverse search.
|
|
384
|
+
* For Regexp arguments: delegates to String#rindex via shared string.
|
|
385
|
+
*/
|
|
386
|
+
VALUE sv_rindex(int argc, VALUE *argv, VALUE self) {
|
|
371
387
|
string_view_t *sv = sv_get_struct(self);
|
|
372
|
-
VALUE
|
|
373
|
-
|
|
388
|
+
VALUE pattern, voffset;
|
|
389
|
+
rb_scan_args(argc, argv, "11", &pattern, &voffset);
|
|
390
|
+
|
|
391
|
+
/* Regexp path: delegate */
|
|
392
|
+
if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
|
|
393
|
+
VALUE shared = sv_as_shared_str(sv);
|
|
394
|
+
return rb_funcallv(shared, id_rindex, argc, argv);
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
StringValue(pattern);
|
|
398
|
+
const char *p = sv_ptr(sv);
|
|
399
|
+
long plen = RSTRING_LEN(pattern);
|
|
400
|
+
long total_chars = sv_char_count(sv);
|
|
401
|
+
|
|
402
|
+
/* Determine the maximum char position to search from */
|
|
403
|
+
long max_char;
|
|
404
|
+
if (NIL_P(voffset)) {
|
|
405
|
+
max_char = total_chars;
|
|
406
|
+
} else {
|
|
407
|
+
max_char = NUM2LONG(voffset);
|
|
408
|
+
if (max_char < 0) max_char += total_chars;
|
|
409
|
+
if (max_char < 0) return Qnil;
|
|
410
|
+
if (max_char > total_chars) max_char = total_chars;
|
|
411
|
+
}
|
|
412
|
+
|
|
413
|
+
if (plen == 0) {
|
|
414
|
+
return LONG2NUM(max_char > total_chars ? total_chars : max_char);
|
|
415
|
+
}
|
|
416
|
+
if (plen > sv->length) return Qnil;
|
|
417
|
+
|
|
418
|
+
/* Convert max_char to a byte limit */
|
|
419
|
+
long max_byte = sv_char_to_byte_offset(sv, max_char);
|
|
420
|
+
if (max_byte < 0) max_byte = sv->length;
|
|
421
|
+
|
|
422
|
+
/* Ensure we don't search past the point where the pattern can't fit */
|
|
423
|
+
long search_end = max_byte;
|
|
424
|
+
if (search_end + plen > sv->length) {
|
|
425
|
+
search_end = sv->length - plen;
|
|
426
|
+
}
|
|
427
|
+
|
|
428
|
+
/* Reverse byte search */
|
|
429
|
+
const char *needle = RSTRING_PTR(pattern);
|
|
430
|
+
const char *s;
|
|
431
|
+
for (s = p + search_end; s >= p; ) {
|
|
432
|
+
if (memcmp(s, needle, plen) == 0) {
|
|
433
|
+
long byte_pos = s - p;
|
|
434
|
+
/* Convert byte position to char position */
|
|
435
|
+
if (sv_single_byte_optimizable(sv)) {
|
|
436
|
+
return LONG2NUM(byte_pos);
|
|
437
|
+
}
|
|
438
|
+
if (sv_is_utf8(sv)) {
|
|
439
|
+
return LONG2NUM(sv_utf8_char_count(p, byte_pos));
|
|
440
|
+
}
|
|
441
|
+
rb_encoding *enc = sv_enc(sv);
|
|
442
|
+
return LONG2NUM(rb_enc_strlen(p, s, enc));
|
|
443
|
+
}
|
|
444
|
+
/* Move back one character */
|
|
445
|
+
if (s == p) break;
|
|
446
|
+
if (sv_single_byte_optimizable(sv)) {
|
|
447
|
+
s--;
|
|
448
|
+
} else {
|
|
449
|
+
rb_encoding *enc = sv_enc(sv);
|
|
450
|
+
s = rb_enc_prev_char(p, s, p + sv->length, enc);
|
|
451
|
+
if (s == NULL) break;
|
|
452
|
+
}
|
|
453
|
+
}
|
|
454
|
+
return Qnil;
|
|
374
455
|
}
|
|
375
456
|
|
|
376
457
|
static VALUE sv_getbyte(VALUE self, VALUE vidx) {
|
|
@@ -381,16 +462,83 @@ static VALUE sv_getbyte(VALUE self, VALUE vidx) {
|
|
|
381
462
|
return INT2FIX((unsigned char)sv_ptr(sv)[idx]);
|
|
382
463
|
}
|
|
383
464
|
|
|
384
|
-
|
|
465
|
+
/*
|
|
466
|
+
* byteindex(substring[, offset]) → Integer or nil
|
|
467
|
+
*
|
|
468
|
+
* For String arguments: native zero-alloc byte-level search.
|
|
469
|
+
* For Regexp arguments: delegates to String#byteindex via shared string.
|
|
470
|
+
*/
|
|
471
|
+
VALUE sv_byteindex(int argc, VALUE *argv, VALUE self) {
|
|
385
472
|
string_view_t *sv = sv_get_struct(self);
|
|
386
|
-
VALUE
|
|
387
|
-
|
|
473
|
+
VALUE pattern, voffset;
|
|
474
|
+
rb_scan_args(argc, argv, "11", &pattern, &voffset);
|
|
475
|
+
|
|
476
|
+
if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
|
|
477
|
+
VALUE shared = sv_as_shared_str(sv);
|
|
478
|
+
return rb_funcallv(shared, id_byteindex, argc, argv);
|
|
479
|
+
}
|
|
480
|
+
|
|
481
|
+
StringValue(pattern);
|
|
482
|
+
const char *p = sv_ptr(sv);
|
|
483
|
+
long plen = RSTRING_LEN(pattern);
|
|
484
|
+
long byte_off = NIL_P(voffset) ? 0 : NUM2LONG(voffset);
|
|
485
|
+
|
|
486
|
+
if (byte_off < 0) byte_off += sv->length;
|
|
487
|
+
if (byte_off < 0 || byte_off > sv->length) return Qnil;
|
|
488
|
+
if (plen == 0) return LONG2NUM(byte_off);
|
|
489
|
+
if (plen > sv->length - byte_off) return Qnil;
|
|
490
|
+
|
|
491
|
+
long pos = rb_memsearch(RSTRING_PTR(pattern), plen,
|
|
492
|
+
p + byte_off, sv->length - byte_off,
|
|
493
|
+
sv_enc(sv));
|
|
494
|
+
if (pos < 0 || pos > sv->length - byte_off - plen) return Qnil;
|
|
495
|
+
return LONG2NUM(byte_off + pos);
|
|
388
496
|
}
|
|
389
497
|
|
|
390
|
-
|
|
498
|
+
/*
|
|
499
|
+
* byterindex(substring[, offset]) → Integer or nil
|
|
500
|
+
*
|
|
501
|
+
* For String arguments: native zero-alloc reverse byte-level search.
|
|
502
|
+
* For Regexp arguments: delegates to String#byterindex via shared string.
|
|
503
|
+
*/
|
|
504
|
+
VALUE sv_byterindex(int argc, VALUE *argv, VALUE self) {
|
|
391
505
|
string_view_t *sv = sv_get_struct(self);
|
|
392
|
-
VALUE
|
|
393
|
-
|
|
506
|
+
VALUE pattern, voffset;
|
|
507
|
+
rb_scan_args(argc, argv, "11", &pattern, &voffset);
|
|
508
|
+
|
|
509
|
+
if (rb_obj_is_kind_of(pattern, rb_cRegexp)) {
|
|
510
|
+
VALUE shared = sv_as_shared_str(sv);
|
|
511
|
+
return rb_funcallv(shared, id_byterindex, argc, argv);
|
|
512
|
+
}
|
|
513
|
+
|
|
514
|
+
StringValue(pattern);
|
|
515
|
+
const char *p = sv_ptr(sv);
|
|
516
|
+
long plen = RSTRING_LEN(pattern);
|
|
517
|
+
long max_byte;
|
|
518
|
+
|
|
519
|
+
if (NIL_P(voffset)) {
|
|
520
|
+
max_byte = sv->length;
|
|
521
|
+
} else {
|
|
522
|
+
max_byte = NUM2LONG(voffset);
|
|
523
|
+
if (max_byte < 0) max_byte += sv->length;
|
|
524
|
+
if (max_byte < 0) return Qnil;
|
|
525
|
+
if (max_byte > sv->length) max_byte = sv->length;
|
|
526
|
+
}
|
|
527
|
+
|
|
528
|
+
if (plen == 0) return LONG2NUM(max_byte > sv->length ? sv->length : max_byte);
|
|
529
|
+
if (plen > sv->length) return Qnil;
|
|
530
|
+
|
|
531
|
+
long search_end = max_byte;
|
|
532
|
+
if (search_end + plen > sv->length) search_end = sv->length - plen;
|
|
533
|
+
|
|
534
|
+
const char *needle = RSTRING_PTR(pattern);
|
|
535
|
+
long i;
|
|
536
|
+
for (i = search_end; i >= 0; i--) {
|
|
537
|
+
if (memcmp(p + i, needle, plen) == 0) {
|
|
538
|
+
return LONG2NUM(i);
|
|
539
|
+
}
|
|
540
|
+
}
|
|
541
|
+
return Qnil;
|
|
394
542
|
}
|
|
395
543
|
|
|
396
544
|
/* ========================================================================= */
|
|
@@ -454,19 +602,19 @@ static VALUE sv_chars(VALUE self) {
|
|
|
454
602
|
static VALUE sv_match(int argc, VALUE *argv, VALUE self) {
|
|
455
603
|
string_view_t *sv = sv_get_struct(self);
|
|
456
604
|
VALUE shared = sv_as_shared_str(sv);
|
|
457
|
-
return rb_funcallv(shared,
|
|
605
|
+
return rb_funcallv(shared, id_match, argc, argv);
|
|
458
606
|
}
|
|
459
607
|
|
|
460
608
|
static VALUE sv_match_p(int argc, VALUE *argv, VALUE self) {
|
|
461
609
|
string_view_t *sv = sv_get_struct(self);
|
|
462
610
|
VALUE shared = sv_as_shared_str(sv);
|
|
463
|
-
return rb_funcallv(shared,
|
|
611
|
+
return rb_funcallv(shared, id_match_p, argc, argv);
|
|
464
612
|
}
|
|
465
613
|
|
|
466
614
|
static VALUE sv_match_operator(VALUE self, VALUE pattern) {
|
|
467
615
|
string_view_t *sv = sv_get_struct(self);
|
|
468
616
|
VALUE shared = sv_as_shared_str(sv);
|
|
469
|
-
return rb_funcall(shared,
|
|
617
|
+
return rb_funcall(shared, id_match_op, 1, pattern);
|
|
470
618
|
}
|
|
471
619
|
|
|
472
620
|
/* ========================================================================= */
|
|
@@ -561,6 +709,41 @@ static VALUE sv_oct(VALUE self) {
|
|
|
561
709
|
/* Tier 1: Comparison */
|
|
562
710
|
/* ========================================================================= */
|
|
563
711
|
|
|
712
|
+
/*
|
|
713
|
+
* Returns 1 if all bytes in the view are < 128 (7-bit ASCII).
|
|
714
|
+
* Uses the single_byte cache when available.
|
|
715
|
+
*/
|
|
716
|
+
SV_INLINE int sv_is_7bit(string_view_t *sv) {
|
|
717
|
+
if (sv_single_byte_optimizable(sv)) return 1;
|
|
718
|
+
const char *p = sv_ptr(sv);
|
|
719
|
+
long i;
|
|
720
|
+
for (i = 0; i < sv->length; i++) {
|
|
721
|
+
if ((unsigned char)p[i] > 127) return 0;
|
|
722
|
+
}
|
|
723
|
+
return 1;
|
|
724
|
+
}
|
|
725
|
+
|
|
726
|
+
/*
|
|
727
|
+
* Check encoding compatibility for equality, mirroring Ruby's String#==.
|
|
728
|
+
* Two encodings are compatible for comparison if:
|
|
729
|
+
* - They are the same encoding, OR
|
|
730
|
+
* - Both are ASCII-compatible and at least one side is 7-bit
|
|
731
|
+
* (e.g. UTF-8 "hello" == US-ASCII "hello")
|
|
732
|
+
*/
|
|
733
|
+
SV_INLINE int sv_enc_compatible_for_eq(
|
|
734
|
+
rb_encoding *enc1, int is_7bit_1,
|
|
735
|
+
rb_encoding *enc2, int is_7bit_2)
|
|
736
|
+
{
|
|
737
|
+
if (enc1 == enc2) return 1;
|
|
738
|
+
if (!rb_enc_asciicompat(enc1) || !rb_enc_asciicompat(enc2)) return 0;
|
|
739
|
+
return is_7bit_1 || is_7bit_2;
|
|
740
|
+
}
|
|
741
|
+
|
|
742
|
+
SV_INLINE int sv_is_string_view(VALUE obj) {
|
|
743
|
+
VALUE klass = rb_obj_class(obj);
|
|
744
|
+
return klass == cStringView || klass == cStringViewStrict;
|
|
745
|
+
}
|
|
746
|
+
|
|
564
747
|
static VALUE sv_eq(VALUE self, VALUE other) {
|
|
565
748
|
string_view_t *sv = sv_get_struct(self);
|
|
566
749
|
const char *p = sv_ptr(sv);
|
|
@@ -568,13 +751,26 @@ static VALUE sv_eq(VALUE self, VALUE other) {
|
|
|
568
751
|
/* Fast path: String is the most common comparison target */
|
|
569
752
|
if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
|
|
570
753
|
if (sv->length != RSTRING_LEN(other)) return Qfalse;
|
|
754
|
+
rb_encoding *oenc = rb_enc_get(other);
|
|
755
|
+
if (sv->enc != oenc) {
|
|
756
|
+
int sv_7bit = sv_is_7bit(sv);
|
|
757
|
+
int o_7bit = rb_enc_str_asciionly_p(other);
|
|
758
|
+
if (!sv_enc_compatible_for_eq(sv->enc, sv_7bit, oenc, o_7bit))
|
|
759
|
+
return Qfalse;
|
|
760
|
+
}
|
|
571
761
|
return memcmp(p, RSTRING_PTR(other), sv->length) == 0 ? Qtrue : Qfalse;
|
|
572
762
|
}
|
|
573
763
|
|
|
574
|
-
/* Check for StringView
|
|
575
|
-
if (
|
|
764
|
+
/* Check for StringView or StringView::Strict */
|
|
765
|
+
if (sv_is_string_view(other)) {
|
|
576
766
|
string_view_t *o = sv_get_struct(other);
|
|
577
767
|
if (sv->length != o->length) return Qfalse;
|
|
768
|
+
if (sv->enc != o->enc) {
|
|
769
|
+
int sv_7bit = sv_is_7bit(sv);
|
|
770
|
+
int o_7bit = sv_is_7bit(o);
|
|
771
|
+
if (!sv_enc_compatible_for_eq(sv->enc, sv_7bit, o->enc, o_7bit))
|
|
772
|
+
return Qfalse;
|
|
773
|
+
}
|
|
578
774
|
return memcmp(p, sv_ptr(o), sv->length) == 0 ? Qtrue : Qfalse;
|
|
579
775
|
}
|
|
580
776
|
|
|
@@ -590,7 +786,7 @@ static VALUE sv_cmp(VALUE self, VALUE other) {
|
|
|
590
786
|
if (SV_LIKELY(RB_TYPE_P(other, T_STRING))) {
|
|
591
787
|
op = RSTRING_PTR(other);
|
|
592
788
|
olen = RSTRING_LEN(other);
|
|
593
|
-
} else if (
|
|
789
|
+
} else if (sv_is_string_view(other)) {
|
|
594
790
|
string_view_t *o = sv_get_struct(other);
|
|
595
791
|
op = sv_ptr(o);
|
|
596
792
|
olen = o->length;
|
|
@@ -610,15 +806,22 @@ static VALUE sv_cmp(VALUE self, VALUE other) {
|
|
|
610
806
|
}
|
|
611
807
|
|
|
612
808
|
static VALUE sv_eql_p(VALUE self, VALUE other) {
|
|
613
|
-
if (
|
|
809
|
+
if (!sv_is_string_view(other)) return Qfalse;
|
|
614
810
|
return sv_eq(self, other);
|
|
615
811
|
}
|
|
616
812
|
|
|
617
813
|
static VALUE sv_hash(VALUE self) {
|
|
618
814
|
string_view_t *sv = sv_get_struct(self);
|
|
619
815
|
const char *p = sv_ptr(sv);
|
|
816
|
+
/*
|
|
817
|
+
* Mirror CRuby's rb_str_hash: normalize encoding index to 0 for
|
|
818
|
+
* 7-bit content so that e.g. UTF-8 "hello" and US-ASCII "hello"
|
|
819
|
+
* produce the same hash (they compare equal via sv_eq).
|
|
820
|
+
*/
|
|
821
|
+
int e = rb_enc_to_index(sv->enc);
|
|
822
|
+
if (e && sv_is_7bit(sv)) e = 0;
|
|
620
823
|
st_index_t h = rb_memhash(p, sv->length);
|
|
621
|
-
h ^= (st_index_t)
|
|
824
|
+
h ^= (st_index_t)e;
|
|
622
825
|
return ST2FIX(h);
|
|
623
826
|
}
|
|
624
827
|
|
|
@@ -636,13 +839,17 @@ static VALUE sv_hash(VALUE self) {
|
|
|
636
839
|
* Compute single-byte flag from encoding + coderange.
|
|
637
840
|
* Called once at construction time and cached in sv->single_byte.
|
|
638
841
|
*/
|
|
639
|
-
|
|
842
|
+
int sv_compute_single_byte(VALUE backing, rb_encoding *enc) {
|
|
640
843
|
if (rb_enc_mbmaxlen(enc) == 1) return 1;
|
|
641
844
|
int cr = ENC_CODERANGE(backing);
|
|
642
845
|
if (cr == ENC_CODERANGE_7BIT) return 1;
|
|
643
|
-
/*
|
|
644
|
-
|
|
645
|
-
|
|
846
|
+
/*
|
|
847
|
+
* For VALID and UNKNOWN: the coderange reflects the entire backing
|
|
848
|
+
* string, not this slice. A view over an ASCII-only prefix of a
|
|
849
|
+
* multibyte string would incorrectly get single_byte=0 here.
|
|
850
|
+
* Return -1 (unknown) and let sv_single_byte_optimizable resolve
|
|
851
|
+
* it lazily by scanning the actual slice bytes.
|
|
852
|
+
*/
|
|
646
853
|
return -1;
|
|
647
854
|
}
|
|
648
855
|
|
|
@@ -880,7 +1087,7 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
|
|
|
880
1087
|
if (idx < 0) idx += total;
|
|
881
1088
|
if (SV_UNLIKELY(idx < 0 || idx > total || len < 0)) return Qnil;
|
|
882
1089
|
if (idx + len > total) len = total - idx;
|
|
883
|
-
return
|
|
1090
|
+
return sv_new_from_parent_obj(self, sv,
|
|
884
1091
|
sv->offset + idx,
|
|
885
1092
|
len);
|
|
886
1093
|
}
|
|
@@ -906,7 +1113,7 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
|
|
|
906
1113
|
long byte_end = sv_char_to_byte_offset(sv, idx + len);
|
|
907
1114
|
long byte_len = byte_end - byte_off;
|
|
908
1115
|
|
|
909
|
-
return
|
|
1116
|
+
return sv_new_from_parent_obj(self, sv,
|
|
910
1117
|
sv->offset + byte_off,
|
|
911
1118
|
byte_len);
|
|
912
1119
|
}
|
|
@@ -914,48 +1121,34 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
|
|
|
914
1121
|
if (rb_obj_is_kind_of(arg1, rb_cRange)) {
|
|
915
1122
|
long total_chars = sv_char_count(sv);
|
|
916
1123
|
long beg, len;
|
|
917
|
-
|
|
918
|
-
|
|
919
|
-
|
|
920
|
-
|
|
921
|
-
|
|
922
|
-
|
|
923
|
-
if (beg < 0) beg += total_chars;
|
|
924
|
-
if (beg < 0) return Qnil;
|
|
925
|
-
|
|
926
|
-
long e;
|
|
927
|
-
if (NIL_P(rb_end)) {
|
|
928
|
-
e = total_chars;
|
|
929
|
-
} else {
|
|
930
|
-
e = NUM2LONG(rb_end);
|
|
931
|
-
if (e < 0) e += total_chars;
|
|
932
|
-
if (!excl) e += 1;
|
|
1124
|
+
|
|
1125
|
+
/* rb_range_beg_len resolves negative indices and clamps to total,
|
|
1126
|
+
* replacing 3 Ruby method dispatches with a single C call. */
|
|
1127
|
+
switch (rb_range_beg_len(arg1, &beg, &len, total_chars, 1)) {
|
|
1128
|
+
case Qfalse: return Qnil;
|
|
1129
|
+
case Qnil: return Qnil;
|
|
933
1130
|
}
|
|
934
|
-
if (e < beg) e = beg;
|
|
935
|
-
len = e - beg;
|
|
936
|
-
if (beg > total_chars) return Qnil;
|
|
937
|
-
if (beg + len > total_chars) len = total_chars - beg;
|
|
938
1131
|
|
|
939
1132
|
long byte_off = sv_char_to_byte_offset(sv, beg);
|
|
940
1133
|
long byte_len = sv_chars_to_bytes(sv, byte_off, len);
|
|
941
1134
|
|
|
942
|
-
return
|
|
1135
|
+
return sv_new_from_parent_obj(self, sv,
|
|
943
1136
|
sv->offset + byte_off,
|
|
944
1137
|
byte_len);
|
|
945
1138
|
}
|
|
946
1139
|
|
|
947
1140
|
if (rb_obj_is_kind_of(arg1, rb_cRegexp)) {
|
|
948
1141
|
VALUE shared = sv_as_shared_str(sv);
|
|
949
|
-
VALUE m = rb_funcall(arg1,
|
|
1142
|
+
VALUE m = rb_funcall(arg1, id_match, 1, shared);
|
|
950
1143
|
if (NIL_P(m)) return Qnil;
|
|
951
1144
|
|
|
952
|
-
VALUE matched = rb_funcall(m,
|
|
953
|
-
long match_beg = NUM2LONG(rb_funcall(m,
|
|
1145
|
+
VALUE matched = rb_funcall(m, id_aref, 1, INT2FIX(0));
|
|
1146
|
+
long match_beg = NUM2LONG(rb_funcall(m, id_begin, 1, INT2FIX(0)));
|
|
954
1147
|
|
|
955
1148
|
long byte_off = sv_char_to_byte_offset(sv, match_beg);
|
|
956
1149
|
long byte_len = RSTRING_LEN(matched);
|
|
957
1150
|
|
|
958
|
-
return
|
|
1151
|
+
return sv_new_from_parent_obj(self, sv,
|
|
959
1152
|
sv->offset + byte_off,
|
|
960
1153
|
byte_len);
|
|
961
1154
|
}
|
|
@@ -964,14 +1157,14 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
|
|
|
964
1157
|
const char *p = sv_ptr(sv);
|
|
965
1158
|
long slen = RSTRING_LEN(arg1);
|
|
966
1159
|
if (slen == 0) {
|
|
967
|
-
return
|
|
1160
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, 0);
|
|
968
1161
|
}
|
|
969
1162
|
if (slen > sv->length) return Qnil;
|
|
970
1163
|
|
|
971
1164
|
long pos = rb_memsearch(RSTRING_PTR(arg1), slen, p, sv->length, sv_enc(sv));
|
|
972
1165
|
if (pos < 0 || pos > sv->length - slen) return Qnil;
|
|
973
1166
|
|
|
974
|
-
return
|
|
1167
|
+
return sv_new_from_parent_obj(self, sv, sv->offset + pos, slen);
|
|
975
1168
|
}
|
|
976
1169
|
|
|
977
1170
|
if (RB_INTEGER_TYPE_P(arg1)) {
|
|
@@ -986,7 +1179,7 @@ static VALUE sv_aref(int argc, VALUE *argv, VALUE self) {
|
|
|
986
1179
|
|
|
987
1180
|
long byte_len = sv_chars_to_bytes(sv, byte_off, 1);
|
|
988
1181
|
|
|
989
|
-
return
|
|
1182
|
+
return sv_new_from_parent_obj(self, sv,
|
|
990
1183
|
sv->offset + byte_off,
|
|
991
1184
|
byte_len);
|
|
992
1185
|
}
|
|
@@ -1015,96 +1208,381 @@ static VALUE sv_byteslice(int argc, VALUE *argv, VALUE self) {
|
|
|
1015
1208
|
if (len < 0) return Qnil;
|
|
1016
1209
|
if (off + len > sv->length) len = sv->length - off;
|
|
1017
1210
|
|
|
1018
|
-
return
|
|
1211
|
+
return sv_new_from_parent_obj(self, sv, sv->offset + off, len);
|
|
1019
1212
|
}
|
|
1020
1213
|
|
|
1021
1214
|
if (rb_obj_is_kind_of(arg1, rb_cRange)) {
|
|
1022
1215
|
long beg, len;
|
|
1023
|
-
VALUE rb_beg = rb_funcall(arg1, rb_intern("begin"), 0);
|
|
1024
|
-
VALUE rb_end = rb_funcall(arg1, rb_intern("end"), 0);
|
|
1025
|
-
int excl = RTEST(rb_funcall(arg1, rb_intern("exclude_end?"), 0));
|
|
1026
|
-
|
|
1027
|
-
beg = NIL_P(rb_beg) ? 0 : NUM2LONG(rb_beg);
|
|
1028
|
-
if (beg < 0) beg += sv->length;
|
|
1029
|
-
if (beg < 0) return Qnil;
|
|
1030
1216
|
|
|
1031
|
-
|
|
1032
|
-
|
|
1033
|
-
|
|
1034
|
-
} else {
|
|
1035
|
-
e = NUM2LONG(rb_end);
|
|
1036
|
-
if (e < 0) e += sv->length;
|
|
1037
|
-
if (!excl) e += 1;
|
|
1217
|
+
switch (rb_range_beg_len(arg1, &beg, &len, sv->length, 1)) {
|
|
1218
|
+
case Qfalse: return Qnil;
|
|
1219
|
+
case Qnil: return Qnil;
|
|
1038
1220
|
}
|
|
1039
|
-
if (e < beg) e = beg;
|
|
1040
|
-
len = e - beg;
|
|
1041
|
-
if (beg > sv->length) return Qnil;
|
|
1042
|
-
if (beg + len > sv->length) len = sv->length - beg;
|
|
1043
1221
|
|
|
1044
|
-
return
|
|
1222
|
+
return sv_new_from_parent_obj(self, sv, sv->offset + beg, len);
|
|
1045
1223
|
}
|
|
1046
1224
|
|
|
1047
1225
|
{
|
|
1048
1226
|
long idx = NUM2LONG(arg1);
|
|
1049
1227
|
if (idx < 0) idx += sv->length;
|
|
1050
1228
|
if (idx < 0 || idx >= sv->length) return Qnil;
|
|
1051
|
-
return
|
|
1229
|
+
return sv_new_from_parent_obj(self, sv, sv->offset + idx, 1);
|
|
1052
1230
|
}
|
|
1053
1231
|
}
|
|
1054
1232
|
|
|
1233
|
+
/* ========================================================================= */
|
|
1234
|
+
/* Tier 1.5: Zero-copy transforms — returns StringView via offset adjustment */
|
|
1235
|
+
/* ========================================================================= */
|
|
1236
|
+
|
|
1237
|
+
/*
|
|
1238
|
+
* Helper: check if a byte is ASCII whitespace.
|
|
1239
|
+
* Matches Ruby's strip behavior for ASCII-compatible encodings:
|
|
1240
|
+
* space, tab, newline, vertical tab, form feed, carriage return, NUL.
|
|
1241
|
+
*/
|
|
1242
|
+
SV_INLINE int sv_is_ascii_whitespace(unsigned char c) {
|
|
1243
|
+
return c == ' ' || (c >= '\t' && c <= '\r') || c == '\0';
|
|
1244
|
+
}
|
|
1245
|
+
|
|
1246
|
+
/*
|
|
1247
|
+
* strip → StringView
|
|
1248
|
+
* Returns a new StringView with leading and trailing ASCII whitespace removed.
|
|
1249
|
+
* Zero allocations for the byte content — only a new StringView struct.
|
|
1250
|
+
*/
|
|
1251
|
+
static VALUE sv_strip(int argc, VALUE *argv, VALUE self) {
|
|
1252
|
+
rb_check_arity(argc, 0, 0);
|
|
1253
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1254
|
+
const unsigned char *p = (const unsigned char *)sv_ptr(sv);
|
|
1255
|
+
long len = sv->length;
|
|
1256
|
+
|
|
1257
|
+
/* Skip leading whitespace */
|
|
1258
|
+
long left = 0;
|
|
1259
|
+
while (left < len && sv_is_ascii_whitespace(p[left])) left++;
|
|
1260
|
+
|
|
1261
|
+
/* Skip trailing whitespace */
|
|
1262
|
+
long right = len;
|
|
1263
|
+
while (right > left && sv_is_ascii_whitespace(p[right - 1])) right--;
|
|
1264
|
+
|
|
1265
|
+
if (left == 0 && right == len) return self;
|
|
1266
|
+
return sv_new_from_parent_obj(self, sv, sv->offset + left, right - left);
|
|
1267
|
+
}
|
|
1268
|
+
|
|
1269
|
+
/*
|
|
1270
|
+
* lstrip → StringView
|
|
1271
|
+
* Returns a new StringView with leading ASCII whitespace removed.
|
|
1272
|
+
*/
|
|
1273
|
+
static VALUE sv_lstrip(int argc, VALUE *argv, VALUE self) {
|
|
1274
|
+
rb_check_arity(argc, 0, 0);
|
|
1275
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1276
|
+
const unsigned char *p = (const unsigned char *)sv_ptr(sv);
|
|
1277
|
+
long len = sv->length;
|
|
1278
|
+
|
|
1279
|
+
long left = 0;
|
|
1280
|
+
while (left < len && sv_is_ascii_whitespace(p[left])) left++;
|
|
1281
|
+
|
|
1282
|
+
if (left == 0) return self;
|
|
1283
|
+
return sv_new_from_parent_obj(self, sv, sv->offset + left, len - left);
|
|
1284
|
+
}
|
|
1285
|
+
|
|
1286
|
+
/*
|
|
1287
|
+
* rstrip → StringView
|
|
1288
|
+
* Returns a new StringView with trailing ASCII whitespace removed.
|
|
1289
|
+
*/
|
|
1290
|
+
static VALUE sv_rstrip(int argc, VALUE *argv, VALUE self) {
|
|
1291
|
+
rb_check_arity(argc, 0, 0);
|
|
1292
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1293
|
+
const unsigned char *p = (const unsigned char *)sv_ptr(sv);
|
|
1294
|
+
long len = sv->length;
|
|
1295
|
+
|
|
1296
|
+
long right = len;
|
|
1297
|
+
while (right > 0 && sv_is_ascii_whitespace(p[right - 1])) right--;
|
|
1298
|
+
|
|
1299
|
+
if (right == len) return self;
|
|
1300
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, right);
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
/*
|
|
1304
|
+
* chomp([separator]) → StringView
|
|
1305
|
+
* Returns a new StringView with the trailing record separator removed.
|
|
1306
|
+
* Default separator is $/ (typically "\n").
|
|
1307
|
+
* Handles "\n", "\r\n", and "\r" when separator is "\n".
|
|
1308
|
+
*/
|
|
1309
|
+
static VALUE sv_chomp(int argc, VALUE *argv, VALUE self) {
|
|
1310
|
+
rb_check_arity(argc, 0, 1);
|
|
1311
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1312
|
+
const unsigned char *p = (const unsigned char *)sv_ptr(sv);
|
|
1313
|
+
long len = sv->length;
|
|
1314
|
+
|
|
1315
|
+
if (len == 0) return self;
|
|
1316
|
+
|
|
1317
|
+
if (argc == 0 || NIL_P(argv[0])) {
|
|
1318
|
+
/* Default: remove trailing \n, \r\n, or \r */
|
|
1319
|
+
/* Use $/ (input record separator) when no arg given */
|
|
1320
|
+
VALUE rs;
|
|
1321
|
+
if (argc == 0) {
|
|
1322
|
+
rs = rb_rs; /* global $/ */
|
|
1323
|
+
if (NIL_P(rs)) return self; /* $/ is nil, no chomp */
|
|
1324
|
+
} else {
|
|
1325
|
+
return self; /* chomp(nil) returns self */
|
|
1326
|
+
}
|
|
1327
|
+
|
|
1328
|
+
/* Fast path for default $/ which is "\n" */
|
|
1329
|
+
if (RB_TYPE_P(rs, T_STRING) && RSTRING_LEN(rs) == 1 && RSTRING_PTR(rs)[0] == '\n') {
|
|
1330
|
+
if (p[len - 1] == '\n') {
|
|
1331
|
+
long newlen = len - 1;
|
|
1332
|
+
if (newlen > 0 && p[newlen - 1] == '\r') newlen--;
|
|
1333
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
|
|
1334
|
+
} else if (p[len - 1] == '\r') {
|
|
1335
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
|
|
1336
|
+
}
|
|
1337
|
+
return self;
|
|
1338
|
+
}
|
|
1339
|
+
|
|
1340
|
+
/* Non-default $/ — use the separator */
|
|
1341
|
+
if (!RB_TYPE_P(rs, T_STRING)) return self;
|
|
1342
|
+
const char *sep = RSTRING_PTR(rs);
|
|
1343
|
+
long seplen = RSTRING_LEN(rs);
|
|
1344
|
+
if (seplen == 0) {
|
|
1345
|
+
/* Paragraph mode: remove trailing \n+ */
|
|
1346
|
+
long right = len;
|
|
1347
|
+
while (right > 0 && p[right - 1] == '\n') right--;
|
|
1348
|
+
if (right == len) return self;
|
|
1349
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, right);
|
|
1350
|
+
}
|
|
1351
|
+
if (seplen > len) return self;
|
|
1352
|
+
if (memcmp(p + len - seplen, sep, seplen) == 0) {
|
|
1353
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, len - seplen);
|
|
1354
|
+
}
|
|
1355
|
+
return self;
|
|
1356
|
+
}
|
|
1357
|
+
|
|
1358
|
+
/* Explicit separator argument */
|
|
1359
|
+
VALUE sep_val = argv[0];
|
|
1360
|
+
if (NIL_P(sep_val)) return self;
|
|
1361
|
+
StringValue(sep_val);
|
|
1362
|
+
const char *sep = RSTRING_PTR(sep_val);
|
|
1363
|
+
long seplen = RSTRING_LEN(sep_val);
|
|
1364
|
+
|
|
1365
|
+
if (seplen == 0) {
|
|
1366
|
+
/* Paragraph mode: remove all trailing newlines */
|
|
1367
|
+
long right = len;
|
|
1368
|
+
while (right > 0 && p[right - 1] == '\n') right--;
|
|
1369
|
+
if (right == len) return self;
|
|
1370
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, right);
|
|
1371
|
+
}
|
|
1372
|
+
|
|
1373
|
+
/* Special handling for "\n": also removes \r\n and \r */
|
|
1374
|
+
if (seplen == 1 && sep[0] == '\n') {
|
|
1375
|
+
if (p[len - 1] == '\n') {
|
|
1376
|
+
long newlen = len - 1;
|
|
1377
|
+
if (newlen > 0 && p[newlen - 1] == '\r') newlen--;
|
|
1378
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
|
|
1379
|
+
} else if (p[len - 1] == '\r') {
|
|
1380
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
|
|
1381
|
+
}
|
|
1382
|
+
return self;
|
|
1383
|
+
}
|
|
1384
|
+
|
|
1385
|
+
if (seplen > len) return self;
|
|
1386
|
+
if (memcmp(p + len - seplen, sep, seplen) == 0) {
|
|
1387
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, len - seplen);
|
|
1388
|
+
}
|
|
1389
|
+
return self;
|
|
1390
|
+
}
|
|
1391
|
+
|
|
1392
|
+
/*
|
|
1393
|
+
* chop → StringView
|
|
1394
|
+
* Returns a new StringView with the last character removed.
|
|
1395
|
+
* If the string ends with \r\n, both characters are removed.
|
|
1396
|
+
*/
|
|
1397
|
+
static VALUE sv_chop(int argc, VALUE *argv, VALUE self) {
|
|
1398
|
+
rb_check_arity(argc, 0, 0);
|
|
1399
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1400
|
+
long len = sv->length;
|
|
1401
|
+
|
|
1402
|
+
if (len == 0) return self;
|
|
1403
|
+
|
|
1404
|
+
const unsigned char *p = (const unsigned char *)sv_ptr(sv);
|
|
1405
|
+
|
|
1406
|
+
/* Check for \r\n at the end */
|
|
1407
|
+
if (len >= 2 && p[len - 1] == '\n' && p[len - 2] == '\r') {
|
|
1408
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, len - 2);
|
|
1409
|
+
}
|
|
1410
|
+
|
|
1411
|
+
/* Remove last character (respecting encoding) */
|
|
1412
|
+
if (sv_single_byte_optimizable(sv)) {
|
|
1413
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, len - 1);
|
|
1414
|
+
}
|
|
1415
|
+
|
|
1416
|
+
/* Multibyte: find start of last character */
|
|
1417
|
+
rb_encoding *enc = sv_enc(sv);
|
|
1418
|
+
const char *start = sv_ptr(sv);
|
|
1419
|
+
const char *end = start + len;
|
|
1420
|
+
const char *prev = rb_enc_prev_char(start, end, end, enc);
|
|
1421
|
+
if (prev == NULL) prev = start;
|
|
1422
|
+
long newlen = (long)(prev - start);
|
|
1423
|
+
|
|
1424
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, newlen);
|
|
1425
|
+
}
|
|
1426
|
+
|
|
1427
|
+
/*
|
|
1428
|
+
* delete_prefix(prefix) → StringView
|
|
1429
|
+
* Returns a new StringView with the given prefix removed, or self if
|
|
1430
|
+
* the string doesn't start with the prefix.
|
|
1431
|
+
*/
|
|
1432
|
+
static VALUE sv_delete_prefix(VALUE self, VALUE prefix) {
|
|
1433
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1434
|
+
StringValue(prefix);
|
|
1435
|
+
const char *p = sv_ptr(sv);
|
|
1436
|
+
long plen = RSTRING_LEN(prefix);
|
|
1437
|
+
|
|
1438
|
+
if (plen > sv->length) return self;
|
|
1439
|
+
if (plen == 0) return self;
|
|
1440
|
+
if (memcmp(p, RSTRING_PTR(prefix), plen) != 0) return self;
|
|
1441
|
+
|
|
1442
|
+
return sv_new_from_parent_obj(self, sv, sv->offset + plen, sv->length - plen);
|
|
1443
|
+
}
|
|
1444
|
+
|
|
1445
|
+
/*
|
|
1446
|
+
* delete_suffix(suffix) → StringView
|
|
1447
|
+
* Returns a new StringView with the given suffix removed, or self if
|
|
1448
|
+
* the string doesn't end with the suffix.
|
|
1449
|
+
*/
|
|
1450
|
+
static VALUE sv_delete_suffix(VALUE self, VALUE suffix) {
|
|
1451
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1452
|
+
StringValue(suffix);
|
|
1453
|
+
const char *p = sv_ptr(sv);
|
|
1454
|
+
long slen = RSTRING_LEN(suffix);
|
|
1455
|
+
|
|
1456
|
+
if (slen > sv->length) return self;
|
|
1457
|
+
if (slen == 0) return self;
|
|
1458
|
+
if (memcmp(p + sv->length - slen, RSTRING_PTR(suffix), slen) != 0) return self;
|
|
1459
|
+
|
|
1460
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, sv->length - slen);
|
|
1461
|
+
}
|
|
1462
|
+
|
|
1463
|
+
/*
|
|
1464
|
+
* chr → StringView
|
|
1465
|
+
* Returns the first character as a StringView.
|
|
1466
|
+
*/
|
|
1467
|
+
static VALUE sv_chr(VALUE self) {
|
|
1468
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1469
|
+
|
|
1470
|
+
if (sv->length == 0) return self;
|
|
1471
|
+
|
|
1472
|
+
if (sv_single_byte_optimizable(sv)) {
|
|
1473
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, 1);
|
|
1474
|
+
}
|
|
1475
|
+
|
|
1476
|
+
rb_encoding *enc = sv_enc(sv);
|
|
1477
|
+
const char *p = sv_ptr(sv);
|
|
1478
|
+
const char *e = p + sv->length;
|
|
1479
|
+
int clen = rb_enc_fast_mbclen(p, e, enc);
|
|
1480
|
+
|
|
1481
|
+
return sv_new_from_parent_obj(self, sv, sv->offset, clen);
|
|
1482
|
+
}
|
|
1483
|
+
|
|
1484
|
+
/*
|
|
1485
|
+
* ord → Integer
|
|
1486
|
+
* Returns the codepoint of the first character.
|
|
1487
|
+
*/
|
|
1488
|
+
static VALUE sv_ord(VALUE self) {
|
|
1489
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1490
|
+
|
|
1491
|
+
if (sv->length == 0) {
|
|
1492
|
+
rb_raise(rb_eArgError, "empty string");
|
|
1493
|
+
}
|
|
1494
|
+
|
|
1495
|
+
rb_encoding *enc = sv_enc(sv);
|
|
1496
|
+
const char *p = sv_ptr(sv);
|
|
1497
|
+
const char *e = p + sv->length;
|
|
1498
|
+
unsigned int c = rb_enc_codepoint_len(p, e, NULL, enc);
|
|
1499
|
+
return UINT2NUM(c);
|
|
1500
|
+
}
|
|
1501
|
+
|
|
1502
|
+
/*
|
|
1503
|
+
* valid_encoding? → true/false
|
|
1504
|
+
* Returns whether the view's bytes are valid in its encoding.
|
|
1505
|
+
*/
|
|
1506
|
+
static VALUE sv_valid_encoding_p(VALUE self) {
|
|
1507
|
+
string_view_t *sv = sv_get_struct(self);
|
|
1508
|
+
rb_encoding *enc = sv_enc(sv);
|
|
1509
|
+
const char *p = sv_ptr(sv);
|
|
1510
|
+
const char *e = p + sv->length;
|
|
1511
|
+
|
|
1512
|
+
while (p < e) {
|
|
1513
|
+
int len = rb_enc_precise_mbclen(p, e, enc);
|
|
1514
|
+
if (!MBCLEN_CHARFOUND_P(len)) return Qfalse;
|
|
1515
|
+
p += MBCLEN_CHARFOUND_LEN(len);
|
|
1516
|
+
}
|
|
1517
|
+
return Qtrue;
|
|
1518
|
+
}
|
|
1519
|
+
|
|
1520
|
+
/*
|
|
1521
|
+
* b → StringView
|
|
1522
|
+
* Returns a new StringView that references the same bytes but with
|
|
1523
|
+
* ASCII-8BIT encoding. Since we share the same backing bytes, this is
|
|
1524
|
+
* only valid when the backing is also binary-compatible, which it always
|
|
1525
|
+
* is — we just reinterpret the bytes.
|
|
1526
|
+
*
|
|
1527
|
+
* Note: We need to create a new backing with binary encoding since
|
|
1528
|
+
* the encoding is tied to the backing string.
|
|
1529
|
+
* Actually, the encoding is cached in sv->enc, so we can create a
|
|
1530
|
+
* lightweight view with different encoding. But the backing string
|
|
1531
|
+
* has its own encoding... For true zero-alloc we store enc separately.
|
|
1532
|
+
*/
|
|
1533
|
+
|
|
1055
1534
|
/* ========================================================================= */
|
|
1056
1535
|
/* Tier 3: Transform delegation */
|
|
1057
1536
|
/* ========================================================================= */
|
|
1058
1537
|
|
|
1059
|
-
#define SV_DELEGATE_FUNCALL(cname,
|
|
1538
|
+
#define SV_DELEGATE_FUNCALL(cname, cached_id) \
|
|
1060
1539
|
static VALUE sv_##cname(int argc, VALUE *argv, VALUE self) { \
|
|
1061
1540
|
string_view_t *sv = sv_get_struct(self); \
|
|
1062
1541
|
VALUE shared = sv_as_shared_str(sv); \
|
|
1063
1542
|
if (rb_block_given_p()) { \
|
|
1064
|
-
return rb_funcall_with_block(shared,
|
|
1543
|
+
return rb_funcall_with_block(shared, cached_id, \
|
|
1065
1544
|
argc, argv, rb_block_proc()); \
|
|
1066
1545
|
} \
|
|
1067
|
-
return rb_funcallv(shared,
|
|
1546
|
+
return rb_funcallv(shared, cached_id, argc, argv); \
|
|
1068
1547
|
}
|
|
1069
1548
|
|
|
1070
|
-
SV_DELEGATE_FUNCALL(upcase,
|
|
1071
|
-
SV_DELEGATE_FUNCALL(downcase,
|
|
1072
|
-
SV_DELEGATE_FUNCALL(capitalize,
|
|
1073
|
-
SV_DELEGATE_FUNCALL(swapcase,
|
|
1074
|
-
SV_DELEGATE_FUNCALL(
|
|
1075
|
-
SV_DELEGATE_FUNCALL(
|
|
1076
|
-
SV_DELEGATE_FUNCALL(
|
|
1077
|
-
SV_DELEGATE_FUNCALL(
|
|
1078
|
-
SV_DELEGATE_FUNCALL(
|
|
1079
|
-
SV_DELEGATE_FUNCALL(
|
|
1080
|
-
SV_DELEGATE_FUNCALL(
|
|
1081
|
-
SV_DELEGATE_FUNCALL(
|
|
1082
|
-
|
|
1083
|
-
|
|
1084
|
-
|
|
1085
|
-
|
|
1086
|
-
|
|
1087
|
-
SV_DELEGATE_FUNCALL(count,
|
|
1088
|
-
SV_DELEGATE_FUNCALL(scan,
|
|
1089
|
-
SV_DELEGATE_FUNCALL(split,
|
|
1090
|
-
SV_DELEGATE_FUNCALL(center,
|
|
1091
|
-
SV_DELEGATE_FUNCALL(ljust,
|
|
1092
|
-
SV_DELEGATE_FUNCALL(rjust,
|
|
1093
|
-
SV_DELEGATE_FUNCALL(format_op,
|
|
1094
|
-
SV_DELEGATE_FUNCALL(plus,
|
|
1095
|
-
SV_DELEGATE_FUNCALL(multiply,
|
|
1096
|
-
SV_DELEGATE_FUNCALL(unpack1,
|
|
1097
|
-
SV_DELEGATE_FUNCALL(scrub,
|
|
1098
|
-
SV_DELEGATE_FUNCALL(unicode_normalize,
|
|
1549
|
+
SV_DELEGATE_FUNCALL(upcase, id_upcase)
|
|
1550
|
+
SV_DELEGATE_FUNCALL(downcase, id_downcase)
|
|
1551
|
+
SV_DELEGATE_FUNCALL(capitalize,id_capitalize)
|
|
1552
|
+
SV_DELEGATE_FUNCALL(swapcase, id_swapcase)
|
|
1553
|
+
SV_DELEGATE_FUNCALL(reverse, id_reverse)
|
|
1554
|
+
SV_DELEGATE_FUNCALL(squeeze, id_squeeze)
|
|
1555
|
+
SV_DELEGATE_FUNCALL(encode, id_encode)
|
|
1556
|
+
SV_DELEGATE_FUNCALL(gsub, id_gsub)
|
|
1557
|
+
SV_DELEGATE_FUNCALL(sub, id_sub)
|
|
1558
|
+
SV_DELEGATE_FUNCALL(tr, id_tr)
|
|
1559
|
+
SV_DELEGATE_FUNCALL(tr_s, id_tr_s)
|
|
1560
|
+
SV_DELEGATE_FUNCALL(delete_str,id_delete)
|
|
1561
|
+
/*
|
|
1562
|
+
* count(set, ...) → Integer
|
|
1563
|
+
* Delegates to String#count via shared string.
|
|
1564
|
+
* (Character set parsing is complex — reuse Ruby's implementation.)
|
|
1565
|
+
*/
|
|
1566
|
+
SV_DELEGATE_FUNCALL(count, id_count)
|
|
1567
|
+
SV_DELEGATE_FUNCALL(scan, id_scan)
|
|
1568
|
+
SV_DELEGATE_FUNCALL(split, id_split)
|
|
1569
|
+
SV_DELEGATE_FUNCALL(center, id_center)
|
|
1570
|
+
SV_DELEGATE_FUNCALL(ljust, id_ljust)
|
|
1571
|
+
SV_DELEGATE_FUNCALL(rjust, id_rjust)
|
|
1572
|
+
SV_DELEGATE_FUNCALL(format_op, id_format_op)
|
|
1573
|
+
SV_DELEGATE_FUNCALL(plus, id_plus)
|
|
1574
|
+
SV_DELEGATE_FUNCALL(multiply, id_multiply)
|
|
1575
|
+
SV_DELEGATE_FUNCALL(unpack1, id_unpack1)
|
|
1576
|
+
SV_DELEGATE_FUNCALL(scrub, id_scrub)
|
|
1577
|
+
SV_DELEGATE_FUNCALL(unicode_normalize, id_unicode_normalize)
|
|
1099
1578
|
|
|
1100
1579
|
/* ========================================================================= */
|
|
1101
1580
|
/* Bang methods — always raise FrozenError */
|
|
1102
1581
|
/* ========================================================================= */
|
|
1103
1582
|
|
|
1104
1583
|
static VALUE sv_frozen_error(int argc, VALUE *argv, VALUE self) {
|
|
1105
|
-
|
|
1106
|
-
rb_raise(rb_eFrozenError, "can't modify frozen StringView
|
|
1107
|
-
StringValueCStr(str));
|
|
1584
|
+
(void)argc; (void)argv;
|
|
1585
|
+
rb_raise(rb_eFrozenError, "can't modify frozen StringView");
|
|
1108
1586
|
return Qnil;
|
|
1109
1587
|
}
|
|
1110
1588
|
|
|
@@ -1115,6 +1593,46 @@ static VALUE sv_frozen_error(int argc, VALUE *argv, VALUE self) {
|
|
|
1115
1593
|
void Init_string_view(void) {
|
|
1116
1594
|
enc_utf8 = rb_utf8_encoding();
|
|
1117
1595
|
|
|
1596
|
+
/* Cache method IDs — avoids rb_intern hash lookup on every call */
|
|
1597
|
+
id_index = rb_intern("index");
|
|
1598
|
+
id_rindex = rb_intern("rindex");
|
|
1599
|
+
id_byteindex = rb_intern("byteindex");
|
|
1600
|
+
id_byterindex = rb_intern("byterindex");
|
|
1601
|
+
id_match = rb_intern("match");
|
|
1602
|
+
id_match_p = rb_intern("match?");
|
|
1603
|
+
id_match_op = rb_intern("=~");
|
|
1604
|
+
id_begin = rb_intern("begin");
|
|
1605
|
+
id_aref = rb_intern("[]");
|
|
1606
|
+
id_upcase = rb_intern("upcase");
|
|
1607
|
+
id_downcase = rb_intern("downcase");
|
|
1608
|
+
id_capitalize = rb_intern("capitalize");
|
|
1609
|
+
id_swapcase = rb_intern("swapcase");
|
|
1610
|
+
id_strip = rb_intern("strip");
|
|
1611
|
+
id_lstrip = rb_intern("lstrip");
|
|
1612
|
+
id_rstrip = rb_intern("rstrip");
|
|
1613
|
+
id_chomp = rb_intern("chomp");
|
|
1614
|
+
id_chop = rb_intern("chop");
|
|
1615
|
+
id_reverse = rb_intern("reverse");
|
|
1616
|
+
id_squeeze = rb_intern("squeeze");
|
|
1617
|
+
id_encode = rb_intern("encode");
|
|
1618
|
+
id_gsub = rb_intern("gsub");
|
|
1619
|
+
id_sub = rb_intern("sub");
|
|
1620
|
+
id_tr = rb_intern("tr");
|
|
1621
|
+
id_tr_s = rb_intern("tr_s");
|
|
1622
|
+
id_delete = rb_intern("delete");
|
|
1623
|
+
id_count = rb_intern("count");
|
|
1624
|
+
id_scan = rb_intern("scan");
|
|
1625
|
+
id_split = rb_intern("split");
|
|
1626
|
+
id_center = rb_intern("center");
|
|
1627
|
+
id_ljust = rb_intern("ljust");
|
|
1628
|
+
id_rjust = rb_intern("rjust");
|
|
1629
|
+
id_format_op = rb_intern("%");
|
|
1630
|
+
id_plus = rb_intern("+");
|
|
1631
|
+
id_multiply = rb_intern("*");
|
|
1632
|
+
id_unpack1 = rb_intern("unpack1");
|
|
1633
|
+
id_scrub = rb_intern("scrub");
|
|
1634
|
+
id_unicode_normalize = rb_intern("unicode_normalize");
|
|
1635
|
+
|
|
1118
1636
|
cStringView = rb_define_class("StringView", rb_cObject);
|
|
1119
1637
|
rb_include_module(cStringView, rb_mComparable);
|
|
1120
1638
|
|
|
@@ -1122,11 +1640,10 @@ void Init_string_view(void) {
|
|
|
1122
1640
|
rb_define_method(cStringView, "initialize", sv_initialize, -1);
|
|
1123
1641
|
|
|
1124
1642
|
rb_define_method(cStringView, "to_s", sv_to_s, 0);
|
|
1643
|
+
rb_define_method(cStringView, "materialize", sv_to_s, 0);
|
|
1125
1644
|
rb_define_private_method(cStringView, "to_str", sv_to_str, 0);
|
|
1126
1645
|
rb_define_method(cStringView, "inspect", sv_inspect, 0);
|
|
1127
|
-
rb_define_method(cStringView, "frozen?", sv_frozen_p, 0);
|
|
1128
1646
|
rb_define_method(cStringView, "reset!", sv_reset, 3);
|
|
1129
|
-
rb_define_alias(cStringView, "materialize", "to_s");
|
|
1130
1647
|
|
|
1131
1648
|
rb_define_method(cStringView, "bytesize", sv_bytesize, 0);
|
|
1132
1649
|
rb_define_method(cStringView, "length", sv_length, 0);
|
|
@@ -1176,6 +1693,11 @@ void Init_string_view(void) {
|
|
|
1176
1693
|
rb_define_method(cStringView, "rstrip", sv_rstrip, -1);
|
|
1177
1694
|
rb_define_method(cStringView, "chomp", sv_chomp, -1);
|
|
1178
1695
|
rb_define_method(cStringView, "chop", sv_chop, -1);
|
|
1696
|
+
rb_define_method(cStringView, "delete_prefix", sv_delete_prefix, 1);
|
|
1697
|
+
rb_define_method(cStringView, "delete_suffix", sv_delete_suffix, 1);
|
|
1698
|
+
rb_define_method(cStringView, "chr", sv_chr, 0);
|
|
1699
|
+
rb_define_method(cStringView, "ord", sv_ord, 0);
|
|
1700
|
+
rb_define_method(cStringView, "valid_encoding?", sv_valid_encoding_p, 0);
|
|
1179
1701
|
rb_define_method(cStringView, "reverse", sv_reverse, -1);
|
|
1180
1702
|
rb_define_method(cStringView, "squeeze", sv_squeeze, -1);
|
|
1181
1703
|
rb_define_method(cStringView, "encode", sv_encode, -1);
|
|
@@ -1183,7 +1705,7 @@ void Init_string_view(void) {
|
|
|
1183
1705
|
rb_define_method(cStringView, "sub", sv_sub, -1);
|
|
1184
1706
|
rb_define_method(cStringView, "tr", sv_tr, -1);
|
|
1185
1707
|
rb_define_method(cStringView, "tr_s", sv_tr_s, -1);
|
|
1186
|
-
rb_define_method(cStringView, "delete",
|
|
1708
|
+
rb_define_method(cStringView, "delete", sv_delete_str, -1);
|
|
1187
1709
|
rb_define_method(cStringView, "count", sv_count, -1);
|
|
1188
1710
|
rb_define_method(cStringView, "scan", sv_scan, -1);
|
|
1189
1711
|
rb_define_method(cStringView, "split", sv_split, -1);
|
|
@@ -1214,4 +1736,10 @@ void Init_string_view(void) {
|
|
|
1214
1736
|
rb_define_method(cStringView, "gsub!", sv_frozen_error, -1);
|
|
1215
1737
|
rb_define_method(cStringView, "sub!", sv_frozen_error, -1);
|
|
1216
1738
|
rb_define_method(cStringView, "slice!", sv_frozen_error, -1);
|
|
1739
|
+
rb_define_method(cStringView, "delete_prefix!", sv_frozen_error, -1);
|
|
1740
|
+
rb_define_method(cStringView, "delete_suffix!", sv_frozen_error, -1);
|
|
1741
|
+
|
|
1742
|
+
Init_string_view_strict();
|
|
1743
|
+
Init_string_view_pool();
|
|
1744
|
+
Init_string_view_core_ext();
|
|
1217
1745
|
}
|