string_view 0.1.0 → 0.2.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.md +25 -4
- data/ext/string_view/extconf.rb +1 -1
- data/ext/string_view/string_view.c +735 -207
- data/ext/string_view/string_view.h +121 -0
- data/ext/string_view/string_view_core_ext.c +44 -0
- data/ext/string_view/string_view_pool.c +204 -0
- data/ext/string_view/string_view_strict.c +102 -0
- data/lib/string_view/core_ext.rb +5 -0
- data/lib/string_view/version.rb +1 -1
- metadata +7 -2
|
@@ -0,0 +1,121 @@
|
|
|
1
|
+
#ifndef STRING_VIEW_H
|
|
2
|
+
#define STRING_VIEW_H
|
|
3
|
+
|
|
4
|
+
#include "ruby.h"
|
|
5
|
+
#include "ruby/encoding.h"
|
|
6
|
+
#include "ruby/re.h"
|
|
7
|
+
#include "simdutf_c.h"
|
|
8
|
+
|
|
9
|
+
#define SV_LIKELY(x) __builtin_expect(!!(x), 1)
|
|
10
|
+
#define SV_UNLIKELY(x) __builtin_expect(!!(x), 0)
|
|
11
|
+
|
|
12
|
+
#ifdef __GNUC__
|
|
13
|
+
#define SV_INLINE static inline __attribute__((always_inline))
|
|
14
|
+
#else
|
|
15
|
+
#define SV_INLINE static inline
|
|
16
|
+
#endif
|
|
17
|
+
|
|
18
|
+
/* ========================================================================= */
|
|
19
|
+
/* Struct & TypedData */
|
|
20
|
+
/* ========================================================================= */
|
|
21
|
+
|
|
22
|
+
/*
|
|
23
|
+
* Stride index: maps every STRIDE_CHARS-th character to its byte offset.
|
|
24
|
+
* Built lazily on first char-indexed access. Enables O(1) char→byte
|
|
25
|
+
* lookup for any offset (small scalar scan within one stride).
|
|
26
|
+
*/
|
|
27
|
+
#define STRIDE_CHARS 128
|
|
28
|
+
|
|
29
|
+
typedef struct {
|
|
30
|
+
long *offsets; /* offsets[i] = byte offset of character i*STRIDE_CHARS */
|
|
31
|
+
long count; /* number of entries = ceil(charlen / STRIDE_CHARS) + 1 */
|
|
32
|
+
} stride_index_t;
|
|
33
|
+
|
|
34
|
+
typedef struct {
|
|
35
|
+
VALUE backing; /* frozen String that owns the bytes */
|
|
36
|
+
const char *base; /* cached RSTRING_PTR(backing) — avoids indirection */
|
|
37
|
+
rb_encoding *enc; /* cached encoding — avoids rb_enc_get per call */
|
|
38
|
+
long offset; /* byte offset into backing */
|
|
39
|
+
long length; /* byte length of this view */
|
|
40
|
+
long charlen; /* cached character count; -1 = not yet computed */
|
|
41
|
+
int single_byte; /* cached: 1 if char==byte (ASCII/single-byte enc), 0 if multibyte, -1 unknown */
|
|
42
|
+
stride_index_t *stride_idx; /* lazily built stride index for multibyte, NULL if not built */
|
|
43
|
+
} string_view_t;
|
|
44
|
+
|
|
45
|
+
/* Global class/error VALUE variables */
|
|
46
|
+
extern VALUE cStringView;
|
|
47
|
+
extern VALUE cStringViewStrict;
|
|
48
|
+
extern VALUE cStringViewPool;
|
|
49
|
+
extern VALUE eWouldAllocate;
|
|
50
|
+
|
|
51
|
+
/* TypedData descriptor */
|
|
52
|
+
extern const rb_data_type_t string_view_type;
|
|
53
|
+
|
|
54
|
+
/* ========================================================================= */
|
|
55
|
+
/* Shared helpers */
|
|
56
|
+
/* ========================================================================= */
|
|
57
|
+
|
|
58
|
+
/* Forward-declared helpers (defined in string_view.c) */
|
|
59
|
+
int sv_compute_single_byte(VALUE backing, rb_encoding *enc);
|
|
60
|
+
|
|
61
|
+
/* Validate that str is a frozen T_STRING. Raises TypeError if not a
|
|
62
|
+
* String, FrozenError if not frozen. */
|
|
63
|
+
SV_INLINE void sv_check_frozen_string(VALUE str) {
|
|
64
|
+
if (SV_UNLIKELY(!RB_TYPE_P(str, T_STRING))) {
|
|
65
|
+
rb_raise(rb_eTypeError,
|
|
66
|
+
"no implicit conversion of %s into String",
|
|
67
|
+
rb_obj_classname(str));
|
|
68
|
+
}
|
|
69
|
+
if (SV_UNLIKELY(!OBJ_FROZEN(str))) {
|
|
70
|
+
rb_raise(rb_eFrozenError,
|
|
71
|
+
"string must be frozen; call .freeze before creating a view");
|
|
72
|
+
}
|
|
73
|
+
}
|
|
74
|
+
|
|
75
|
+
/* Validate byte offset + length against a backing string's bytesize.
|
|
76
|
+
* Uses overflow-safe comparison (checks off > max before subtracting). */
|
|
77
|
+
SV_INLINE void sv_check_bounds(long off, long len, long backing_len) {
|
|
78
|
+
if (SV_UNLIKELY(off < 0 || len < 0 || off > backing_len ||
|
|
79
|
+
len > backing_len - off)) {
|
|
80
|
+
rb_raise(rb_eArgError,
|
|
81
|
+
"offset %ld, length %ld out of range for string of bytesize %ld",
|
|
82
|
+
off, len, backing_len);
|
|
83
|
+
}
|
|
84
|
+
}
|
|
85
|
+
|
|
86
|
+
/*
|
|
87
|
+
* Initialize (or reinitialize) a string_view_t's fields from a frozen backing
|
|
88
|
+
* string. Caller is responsible for freeing any prior stride_idx.
|
|
89
|
+
*/
|
|
90
|
+
SV_INLINE void sv_init_fields(VALUE obj, string_view_t *sv, VALUE backing,
|
|
91
|
+
const char *base, rb_encoding *enc,
|
|
92
|
+
long offset, long length) {
|
|
93
|
+
RB_OBJ_WRITE(obj, &sv->backing, backing);
|
|
94
|
+
sv->base = base;
|
|
95
|
+
sv->enc = enc;
|
|
96
|
+
sv->offset = offset;
|
|
97
|
+
sv->length = length;
|
|
98
|
+
sv->single_byte = sv_compute_single_byte(backing, enc);
|
|
99
|
+
sv->charlen = -1;
|
|
100
|
+
sv->stride_idx = NULL;
|
|
101
|
+
}
|
|
102
|
+
|
|
103
|
+
/* ========================================================================= */
|
|
104
|
+
/* Functions shared across compilation units */
|
|
105
|
+
/* ========================================================================= */
|
|
106
|
+
|
|
107
|
+
/* Search functions (defined in string_view.c, used by Strict) */
|
|
108
|
+
VALUE sv_index(int argc, VALUE *argv, VALUE self);
|
|
109
|
+
VALUE sv_rindex(int argc, VALUE *argv, VALUE self);
|
|
110
|
+
VALUE sv_byteindex(int argc, VALUE *argv, VALUE self);
|
|
111
|
+
VALUE sv_byterindex(int argc, VALUE *argv, VALUE self);
|
|
112
|
+
|
|
113
|
+
/* Pool view (defined in string_view_pool.c, used by core_ext) */
|
|
114
|
+
VALUE pool_view(VALUE self, VALUE voffset, VALUE vlength);
|
|
115
|
+
|
|
116
|
+
/* Init functions for submodules */
|
|
117
|
+
void Init_string_view_strict(void);
|
|
118
|
+
void Init_string_view_pool(void);
|
|
119
|
+
void Init_string_view_core_ext(void);
|
|
120
|
+
|
|
121
|
+
#endif /* STRING_VIEW_H */
|
|
@@ -0,0 +1,44 @@
|
|
|
1
|
+
#include "string_view.h"
|
|
2
|
+
|
|
3
|
+
/* ========================================================================= */
|
|
4
|
+
/* StringView::CoreExt — module with String#view, included on demand */
|
|
5
|
+
/* ========================================================================= */
|
|
6
|
+
|
|
7
|
+
/* ObjectSpace::WeakKeyMap caching String → Pool.
|
|
8
|
+
* Keys (strings) are held weakly — when a string is GC'd, its entry
|
|
9
|
+
* is automatically removed. Values (pools) are held strongly. */
|
|
10
|
+
static VALUE pool_cache;
|
|
11
|
+
static ID id_aref;
|
|
12
|
+
static ID id_aset;
|
|
13
|
+
|
|
14
|
+
/*
|
|
15
|
+
* view(byte_offset, byte_length) → StringView
|
|
16
|
+
*
|
|
17
|
+
* Returns a StringView into this string at the given byte range.
|
|
18
|
+
* Lazily creates a StringView::Pool and caches it in a global
|
|
19
|
+
* WeakKeyMap for automatic cleanup when the string is GC'd.
|
|
20
|
+
*/
|
|
21
|
+
static VALUE string_view_method(VALUE self, VALUE voffset, VALUE vlength) {
|
|
22
|
+
rb_str_freeze(self);
|
|
23
|
+
|
|
24
|
+
VALUE pool = rb_funcall(pool_cache, id_aref, 1, self);
|
|
25
|
+
if (NIL_P(pool)) {
|
|
26
|
+
pool = rb_class_new_instance(1, &self, cStringViewPool);
|
|
27
|
+
rb_funcall(pool_cache, id_aset, 2, self, pool);
|
|
28
|
+
}
|
|
29
|
+
return pool_view(pool, voffset, vlength);
|
|
30
|
+
}
|
|
31
|
+
|
|
32
|
+
void Init_string_view_core_ext(void) {
|
|
33
|
+
id_aref = rb_intern("[]");
|
|
34
|
+
id_aset = rb_intern("[]=");
|
|
35
|
+
|
|
36
|
+
VALUE cWeakKeyMap = rb_const_get(
|
|
37
|
+
rb_const_get(rb_cObject, rb_intern("ObjectSpace")),
|
|
38
|
+
rb_intern("WeakKeyMap"));
|
|
39
|
+
pool_cache = rb_class_new_instance(0, NULL, cWeakKeyMap);
|
|
40
|
+
rb_gc_register_mark_object(pool_cache);
|
|
41
|
+
|
|
42
|
+
VALUE mCoreExt = rb_define_module_under(cStringView, "CoreExt");
|
|
43
|
+
rb_define_method(mCoreExt, "view", string_view_method, 2);
|
|
44
|
+
}
|
|
@@ -0,0 +1,204 @@
|
|
|
1
|
+
#include "string_view.h"
|
|
2
|
+
|
|
3
|
+
/* ========================================================================= */
|
|
4
|
+
/* StringView::Pool */
|
|
5
|
+
/* ========================================================================= */
|
|
6
|
+
|
|
7
|
+
VALUE cStringViewPool;
|
|
8
|
+
|
|
9
|
+
#define POOL_INITIAL_CAP 32
|
|
10
|
+
#define POOL_MAX_GROW 4096
|
|
11
|
+
|
|
12
|
+
typedef struct {
|
|
13
|
+
VALUE backing; /* frozen String that owns the bytes */
|
|
14
|
+
const char *base; /* cached RSTRING_PTR(backing) */
|
|
15
|
+
rb_encoding *enc; /* cached encoding */
|
|
16
|
+
int single_byte; /* cached single-byte flag */
|
|
17
|
+
long backing_len; /* cached RSTRING_LEN(backing) */
|
|
18
|
+
VALUE views; /* Ruby Array of pre-allocated StringView objects */
|
|
19
|
+
long next_idx; /* index of next available view in the array */
|
|
20
|
+
long capacity; /* current size of the views array */
|
|
21
|
+
} sv_pool_t;
|
|
22
|
+
|
|
23
|
+
static void pool_mark(void *ptr) {
|
|
24
|
+
sv_pool_t *pool = (sv_pool_t *)ptr;
|
|
25
|
+
if (pool->backing != Qnil)
|
|
26
|
+
rb_gc_mark_movable(pool->backing);
|
|
27
|
+
if (pool->views != Qnil)
|
|
28
|
+
rb_gc_mark_movable(pool->views);
|
|
29
|
+
}
|
|
30
|
+
|
|
31
|
+
static void pool_compact(void *ptr) {
|
|
32
|
+
sv_pool_t *pool = (sv_pool_t *)ptr;
|
|
33
|
+
if (pool->backing != Qnil) {
|
|
34
|
+
pool->backing = rb_gc_location(pool->backing);
|
|
35
|
+
pool->base = RSTRING_PTR(pool->backing);
|
|
36
|
+
}
|
|
37
|
+
if (pool->views != Qnil) {
|
|
38
|
+
pool->views = rb_gc_location(pool->views);
|
|
39
|
+
}
|
|
40
|
+
}
|
|
41
|
+
|
|
42
|
+
static size_t pool_memsize(const void *ptr) {
|
|
43
|
+
const sv_pool_t *pool = (const sv_pool_t *)ptr;
|
|
44
|
+
size_t size = sizeof(sv_pool_t);
|
|
45
|
+
/* Each pre-allocated view is a separate GC object with a string_view_t
|
|
46
|
+
* struct. Report their cost here so ObjectSpace.memsize_of gives a
|
|
47
|
+
* realistic picture of the pool's total footprint. */
|
|
48
|
+
size += (size_t)pool->capacity * sizeof(string_view_t);
|
|
49
|
+
return size;
|
|
50
|
+
}
|
|
51
|
+
|
|
52
|
+
static const rb_data_type_t pool_type = {
|
|
53
|
+
.wrap_struct_name = "StringView::Pool",
|
|
54
|
+
.function = { .dmark = pool_mark, .dfree = RUBY_DEFAULT_FREE,
|
|
55
|
+
.dsize = pool_memsize, .dcompact = pool_compact },
|
|
56
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_EMBEDDABLE,
|
|
57
|
+
};
|
|
58
|
+
|
|
59
|
+
/*
|
|
60
|
+
* Allocate a batch of StringView objects pre-initialized with the pool's
|
|
61
|
+
* backing string. They start with offset=0, length=0 (empty views).
|
|
62
|
+
* The `.view()` method sets the real offset+length before returning.
|
|
63
|
+
*/
|
|
64
|
+
static void pool_grow(sv_pool_t *pool, VALUE pool_obj) {
|
|
65
|
+
long grow = pool->capacity == 0 ? POOL_INITIAL_CAP : pool->capacity;
|
|
66
|
+
if (grow > POOL_MAX_GROW) grow = POOL_MAX_GROW;
|
|
67
|
+
long new_cap = pool->capacity + grow;
|
|
68
|
+
long old_cap = pool->capacity;
|
|
69
|
+
|
|
70
|
+
/* Grow the Ruby Array to hold the new views */
|
|
71
|
+
for (long i = old_cap; i < new_cap; i++) {
|
|
72
|
+
string_view_t *sv;
|
|
73
|
+
VALUE obj = TypedData_Make_Struct(cStringView, string_view_t,
|
|
74
|
+
&string_view_type, sv);
|
|
75
|
+
sv_init_fields(obj, sv, pool->backing, pool->base, pool->enc, 0, 0);
|
|
76
|
+
rb_ary_push(pool->views, obj);
|
|
77
|
+
}
|
|
78
|
+
|
|
79
|
+
pool->capacity = new_cap;
|
|
80
|
+
}
|
|
81
|
+
|
|
82
|
+
/*
|
|
83
|
+
* Pool.new(string) → Pool
|
|
84
|
+
*/
|
|
85
|
+
static VALUE pool_initialize(VALUE self, VALUE str) {
|
|
86
|
+
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
|
|
87
|
+
sv_check_frozen_string(str);
|
|
88
|
+
|
|
89
|
+
RB_OBJ_WRITE(self, &pool->backing, str);
|
|
90
|
+
pool->base = RSTRING_PTR(str);
|
|
91
|
+
pool->enc = rb_enc_get(str);
|
|
92
|
+
pool->single_byte = sv_compute_single_byte(str, pool->enc);
|
|
93
|
+
pool->backing_len = RSTRING_LEN(str);
|
|
94
|
+
|
|
95
|
+
/* Create the views array and pre-allocate the initial batch */
|
|
96
|
+
VALUE ary = rb_ary_new_capa(POOL_INITIAL_CAP);
|
|
97
|
+
RB_OBJ_WRITE(self, &pool->views, ary);
|
|
98
|
+
pool->next_idx = 0;
|
|
99
|
+
pool->capacity = 0;
|
|
100
|
+
|
|
101
|
+
pool_grow(pool, self);
|
|
102
|
+
|
|
103
|
+
return self;
|
|
104
|
+
}
|
|
105
|
+
|
|
106
|
+
static VALUE pool_alloc(VALUE klass) {
|
|
107
|
+
sv_pool_t *pool;
|
|
108
|
+
VALUE obj = TypedData_Make_Struct(klass, sv_pool_t, &pool_type, pool);
|
|
109
|
+
pool->backing = Qnil;
|
|
110
|
+
pool->base = NULL;
|
|
111
|
+
pool->enc = NULL;
|
|
112
|
+
pool->single_byte = -1;
|
|
113
|
+
pool->backing_len = 0;
|
|
114
|
+
pool->views = Qnil;
|
|
115
|
+
pool->next_idx = 0;
|
|
116
|
+
pool->capacity = 0;
|
|
117
|
+
return obj;
|
|
118
|
+
}
|
|
119
|
+
|
|
120
|
+
/*
|
|
121
|
+
* pool.view(byte_offset, byte_length) → StringView
|
|
122
|
+
*
|
|
123
|
+
* Returns a pre-allocated StringView pointed at the given byte range.
|
|
124
|
+
* If the pool is exhausted, grows exponentially before returning.
|
|
125
|
+
*/
|
|
126
|
+
VALUE pool_view(VALUE self, VALUE voffset, VALUE vlength) {
|
|
127
|
+
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
|
|
128
|
+
|
|
129
|
+
/* Refresh cached base/len from the live backing string so that views
|
|
130
|
+
* created after a mutation always see the current buffer pointer. */
|
|
131
|
+
pool->base = RSTRING_PTR(pool->backing);
|
|
132
|
+
pool->backing_len = RSTRING_LEN(pool->backing);
|
|
133
|
+
|
|
134
|
+
long off = NUM2LONG(voffset);
|
|
135
|
+
long len = NUM2LONG(vlength);
|
|
136
|
+
|
|
137
|
+
sv_check_bounds(off, len, pool->backing_len);
|
|
138
|
+
|
|
139
|
+
/* Grow if exhausted */
|
|
140
|
+
if (SV_UNLIKELY(pool->next_idx >= pool->capacity)) {
|
|
141
|
+
pool_grow(pool, self);
|
|
142
|
+
}
|
|
143
|
+
|
|
144
|
+
/* Grab the next pre-allocated view and set its range */
|
|
145
|
+
VALUE view = RARRAY_AREF(pool->views, pool->next_idx);
|
|
146
|
+
pool->next_idx++;
|
|
147
|
+
|
|
148
|
+
string_view_t *sv = (string_view_t *)RTYPEDDATA_GET_DATA(view);
|
|
149
|
+
sv->base = pool->base; /* refresh in case backing was mutated */
|
|
150
|
+
sv->offset = off;
|
|
151
|
+
sv->length = len;
|
|
152
|
+
sv->charlen = -1; /* invalidate cached char count */
|
|
153
|
+
sv->stride_idx = NULL; /* invalidate stride index */
|
|
154
|
+
|
|
155
|
+
return view;
|
|
156
|
+
}
|
|
157
|
+
|
|
158
|
+
/*
|
|
159
|
+
* pool.size → Integer
|
|
160
|
+
* Number of views handed out so far.
|
|
161
|
+
*/
|
|
162
|
+
static VALUE pool_size(VALUE self) {
|
|
163
|
+
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
|
|
164
|
+
return LONG2NUM(pool->next_idx);
|
|
165
|
+
}
|
|
166
|
+
|
|
167
|
+
/*
|
|
168
|
+
* pool.capacity → Integer
|
|
169
|
+
* Current number of pre-allocated view slots.
|
|
170
|
+
*/
|
|
171
|
+
static VALUE pool_capacity(VALUE self) {
|
|
172
|
+
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
|
|
173
|
+
return LONG2NUM(pool->capacity);
|
|
174
|
+
}
|
|
175
|
+
|
|
176
|
+
/*
|
|
177
|
+
* pool.reset! → self
|
|
178
|
+
* Reset the cursor to 0, allowing all pre-allocated views to be reused.
|
|
179
|
+
* Previously returned views become invalid (their offsets may be overwritten).
|
|
180
|
+
*/
|
|
181
|
+
static VALUE pool_reset(VALUE self) {
|
|
182
|
+
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
|
|
183
|
+
pool->next_idx = 0;
|
|
184
|
+
return self;
|
|
185
|
+
}
|
|
186
|
+
|
|
187
|
+
/*
|
|
188
|
+
* pool.backing → String (frozen)
|
|
189
|
+
*/
|
|
190
|
+
static VALUE pool_backing(VALUE self) {
|
|
191
|
+
sv_pool_t *pool = (sv_pool_t *)RTYPEDDATA_GET_DATA(self);
|
|
192
|
+
return pool->backing;
|
|
193
|
+
}
|
|
194
|
+
|
|
195
|
+
void Init_string_view_pool(void) {
|
|
196
|
+
cStringViewPool = rb_define_class_under(cStringView, "Pool", rb_cObject);
|
|
197
|
+
rb_define_alloc_func(cStringViewPool, pool_alloc);
|
|
198
|
+
rb_define_method(cStringViewPool, "initialize", pool_initialize, 1);
|
|
199
|
+
rb_define_method(cStringViewPool, "view", pool_view, 2);
|
|
200
|
+
rb_define_method(cStringViewPool, "size", pool_size, 0);
|
|
201
|
+
rb_define_method(cStringViewPool, "capacity", pool_capacity, 0);
|
|
202
|
+
rb_define_method(cStringViewPool, "reset!", pool_reset, 0);
|
|
203
|
+
rb_define_method(cStringViewPool, "backing", pool_backing, 0);
|
|
204
|
+
}
|
|
@@ -0,0 +1,102 @@
|
|
|
1
|
+
#include "string_view.h"
|
|
2
|
+
|
|
3
|
+
/* ========================================================================= */
|
|
4
|
+
/* StringView::Strict — raises WouldAllocate for any allocating method */
|
|
5
|
+
/* ========================================================================= */
|
|
6
|
+
|
|
7
|
+
/*
|
|
8
|
+
* Raise StringView::WouldAllocate with the method name.
|
|
9
|
+
* Used for all methods on Strict that would create a String.
|
|
10
|
+
*/
|
|
11
|
+
static VALUE sv_would_allocate(int argc, VALUE *argv, VALUE self) {
|
|
12
|
+
const char *method_name = rb_id2name(rb_frame_this_func());
|
|
13
|
+
rb_raise(eWouldAllocate,
|
|
14
|
+
"StringView::Strict#%s would allocate a String — "
|
|
15
|
+
"call .materialize to get a String, or .reset! to repoint the view",
|
|
16
|
+
method_name);
|
|
17
|
+
return Qnil;
|
|
18
|
+
}
|
|
19
|
+
|
|
20
|
+
/*
|
|
21
|
+
* Strict versions of index/rindex/byteindex/byterindex:
|
|
22
|
+
* String args work zero-alloc. Regexp args raise WouldAllocate.
|
|
23
|
+
*/
|
|
24
|
+
#define SV_STRICT_SEARCH(cname, method_str, base_fn) \
|
|
25
|
+
static VALUE sv_strict_##cname(int argc, VALUE *argv, VALUE self) { \
|
|
26
|
+
if (argc >= 1 && rb_obj_is_kind_of(argv[0], rb_cRegexp)) { \
|
|
27
|
+
rb_raise(eWouldAllocate, \
|
|
28
|
+
"StringView::Strict#" method_str " with Regexp would " \
|
|
29
|
+
"allocate a String — call .materialize to get a " \
|
|
30
|
+
"String, or .reset! to repoint the view"); \
|
|
31
|
+
} \
|
|
32
|
+
return base_fn(argc, argv, self); \
|
|
33
|
+
}
|
|
34
|
+
|
|
35
|
+
SV_STRICT_SEARCH(index, "index", sv_index)
|
|
36
|
+
SV_STRICT_SEARCH(rindex, "rindex", sv_rindex)
|
|
37
|
+
SV_STRICT_SEARCH(byteindex, "byteindex", sv_byteindex)
|
|
38
|
+
SV_STRICT_SEARCH(byterindex, "byterindex", sv_byterindex)
|
|
39
|
+
|
|
40
|
+
void Init_string_view_strict(void) {
|
|
41
|
+
eWouldAllocate = rb_define_class_under(cStringView, "WouldAllocate", rb_eRuntimeError);
|
|
42
|
+
|
|
43
|
+
cStringViewStrict = rb_define_class_under(cStringView, "Strict", cStringView);
|
|
44
|
+
|
|
45
|
+
/* Strict inherits everything from StringView (including alloc, initialize,
|
|
46
|
+
* all zero-copy methods, slicing, comparisons, etc.).
|
|
47
|
+
*
|
|
48
|
+
* Override only methods that would allocate a String object.
|
|
49
|
+
* to_s / materialize is the explicit escape hatch. */
|
|
50
|
+
|
|
51
|
+
/* Tier 3 transforms — all allocate a result String */
|
|
52
|
+
rb_define_method(cStringViewStrict, "upcase", sv_would_allocate, -1);
|
|
53
|
+
rb_define_method(cStringViewStrict, "downcase", sv_would_allocate, -1);
|
|
54
|
+
rb_define_method(cStringViewStrict, "capitalize", sv_would_allocate, -1);
|
|
55
|
+
rb_define_method(cStringViewStrict, "swapcase", sv_would_allocate, -1);
|
|
56
|
+
rb_define_method(cStringViewStrict, "reverse", sv_would_allocate, -1);
|
|
57
|
+
rb_define_method(cStringViewStrict, "squeeze", sv_would_allocate, -1);
|
|
58
|
+
rb_define_method(cStringViewStrict, "encode", sv_would_allocate, -1);
|
|
59
|
+
rb_define_method(cStringViewStrict, "gsub", sv_would_allocate, -1);
|
|
60
|
+
rb_define_method(cStringViewStrict, "sub", sv_would_allocate, -1);
|
|
61
|
+
rb_define_method(cStringViewStrict, "tr", sv_would_allocate, -1);
|
|
62
|
+
rb_define_method(cStringViewStrict, "tr_s", sv_would_allocate, -1);
|
|
63
|
+
rb_define_method(cStringViewStrict, "delete", sv_would_allocate, -1);
|
|
64
|
+
rb_define_method(cStringViewStrict, "scan", sv_would_allocate, -1);
|
|
65
|
+
rb_define_method(cStringViewStrict, "split", sv_would_allocate, -1);
|
|
66
|
+
rb_define_method(cStringViewStrict, "center", sv_would_allocate, -1);
|
|
67
|
+
rb_define_method(cStringViewStrict, "ljust", sv_would_allocate, -1);
|
|
68
|
+
rb_define_method(cStringViewStrict, "rjust", sv_would_allocate, -1);
|
|
69
|
+
rb_define_method(cStringViewStrict, "%", sv_would_allocate, -1);
|
|
70
|
+
rb_define_method(cStringViewStrict, "+", sv_would_allocate, -1);
|
|
71
|
+
rb_define_method(cStringViewStrict, "*", sv_would_allocate, -1);
|
|
72
|
+
rb_define_method(cStringViewStrict, "unpack1", sv_would_allocate, -1);
|
|
73
|
+
rb_define_method(cStringViewStrict, "scrub", sv_would_allocate, -1);
|
|
74
|
+
rb_define_method(cStringViewStrict, "unicode_normalize", sv_would_allocate, -1);
|
|
75
|
+
rb_define_method(cStringViewStrict, "count", sv_would_allocate, -1);
|
|
76
|
+
|
|
77
|
+
/* index/rindex/byteindex/byterindex: String args are zero-alloc,
|
|
78
|
+
* Regexp args raise WouldAllocate. */
|
|
79
|
+
rb_define_method(cStringViewStrict, "index", sv_strict_index, -1);
|
|
80
|
+
rb_define_method(cStringViewStrict, "rindex", sv_strict_rindex, -1);
|
|
81
|
+
rb_define_method(cStringViewStrict, "byteindex", sv_strict_byteindex, -1);
|
|
82
|
+
rb_define_method(cStringViewStrict, "byterindex", sv_strict_byterindex, -1);
|
|
83
|
+
|
|
84
|
+
/* Regex-based methods — always allocate */
|
|
85
|
+
rb_define_method(cStringViewStrict, "match", sv_would_allocate, -1);
|
|
86
|
+
rb_define_method(cStringViewStrict, "match?", sv_would_allocate, -1);
|
|
87
|
+
rb_define_method(cStringViewStrict, "=~", sv_would_allocate, -1);
|
|
88
|
+
|
|
89
|
+
/* Iteration methods that yield/return Strings */
|
|
90
|
+
rb_define_method(cStringViewStrict, "each_char", sv_would_allocate, -1);
|
|
91
|
+
rb_define_method(cStringViewStrict, "chars", sv_would_allocate, -1);
|
|
92
|
+
|
|
93
|
+
/* Implicit coercion — would create a shared String */
|
|
94
|
+
rb_define_private_method(cStringViewStrict, "to_str", sv_would_allocate, -1);
|
|
95
|
+
|
|
96
|
+
/* inspect allocates a String (but we keep it — debugging is essential) */
|
|
97
|
+
|
|
98
|
+
/* to_s raises — Strict views act like frozen strings, not string sources.
|
|
99
|
+
* materialize is the EXPLICIT escape hatch (inherited from StringView,
|
|
100
|
+
* defined as a separate method pointing at sv_to_s). */
|
|
101
|
+
rb_define_method(cStringViewStrict, "to_s", sv_would_allocate, -1);
|
|
102
|
+
}
|
data/lib/string_view/version.rb
CHANGED
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: string_view
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.2.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Shopify
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: bin
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-03-
|
|
11
|
+
date: 2026-03-19 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: StringView provides a read-only, zero-copy view into a frozen Ruby String,
|
|
14
14
|
avoiding intermediate allocations for slicing, searching, and delegation of transform
|
|
@@ -29,7 +29,12 @@ files:
|
|
|
29
29
|
- ext/string_view/simdutf.h
|
|
30
30
|
- ext/string_view/simdutf_c.h
|
|
31
31
|
- ext/string_view/string_view.c
|
|
32
|
+
- ext/string_view/string_view.h
|
|
33
|
+
- ext/string_view/string_view_core_ext.c
|
|
34
|
+
- ext/string_view/string_view_pool.c
|
|
35
|
+
- ext/string_view/string_view_strict.c
|
|
32
36
|
- lib/string_view.rb
|
|
37
|
+
- lib/string_view/core_ext.rb
|
|
33
38
|
- lib/string_view/version.rb
|
|
34
39
|
homepage: https://github.com/Shopify/string_view
|
|
35
40
|
licenses:
|