character_set 1.2.0-java → 1.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +3 -0
- data/.github/workflows/gouteur.yml +20 -0
- data/.github/workflows/lint.yml +29 -0
- data/.github/workflows/tests.yml +22 -0
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +17 -0
- data/BENCHMARK.md +53 -17
- data/CHANGELOG.md +54 -0
- data/README.md +51 -12
- data/Rakefile +20 -18
- data/benchmarks/count_in.rb +13 -0
- data/benchmarks/delete_in.rb +1 -1
- data/benchmarks/scan.rb +13 -0
- data/benchmarks/shared.rb +5 -0
- data/benchmarks/z_add.rb +12 -0
- data/benchmarks/z_delete.rb +12 -0
- data/benchmarks/z_merge.rb +15 -0
- data/benchmarks/z_minmax.rb +12 -0
- data/bin/console +2 -0
- data/character_set.gemspec +17 -4
- data/ext/character_set/character_set.c +969 -415
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
- data/lib/character_set/character.rb +1 -1
- data/lib/character_set/core_ext/regexp_ext.rb +1 -1
- data/lib/character_set/core_ext/string_ext.rb +3 -1
- data/lib/character_set/expression_converter.rb +41 -43
- data/lib/character_set/parser.rb +1 -1
- data/lib/character_set/predefined_sets/any.cps +1 -0
- data/lib/character_set/predefined_sets/ascii.cps +1 -0
- data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
- data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
- data/lib/character_set/predefined_sets/assigned.cps +677 -0
- data/lib/character_set/predefined_sets/bmp.cps +2 -0
- data/lib/character_set/predefined_sets/crypt.cps +2 -0
- data/lib/character_set/predefined_sets/emoji.cps +152 -0
- data/lib/character_set/predefined_sets/newline.cps +3 -0
- data/lib/character_set/predefined_sets/surrogate.cps +1 -0
- data/lib/character_set/predefined_sets/unicode.cps +2 -0
- data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
- data/lib/character_set/predefined_sets/url_host.cps +10 -0
- data/lib/character_set/predefined_sets/url_path.cps +7 -0
- data/lib/character_set/predefined_sets/url_query.cps +8 -0
- data/lib/character_set/predefined_sets/whitespace.cps +10 -0
- data/lib/character_set/predefined_sets.rb +25 -260
- data/lib/character_set/ruby_fallback/character_set_methods.rb +60 -9
- data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
- data/lib/character_set/ruby_fallback.rb +5 -3
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +69 -50
- data/lib/character_set/version.rb +1 -1
- data/lib/character_set/writer.rb +98 -27
- metadata +114 -17
- data/.travis.yml +0 -8
- data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -2,81 +2,180 @@
|
|
2
2
|
#include "ruby/encoding.h"
|
3
3
|
#include "unicode_casefold_table.h"
|
4
4
|
|
5
|
-
#define
|
6
|
-
#define
|
7
|
-
#define
|
5
|
+
#define UNICODE_PLANE_SIZE 0x10000
|
6
|
+
#define UNICODE_PLANE_COUNT 17
|
7
|
+
#define UNICODE_CP_COUNT (UNICODE_PLANE_SIZE * UNICODE_PLANE_COUNT)
|
8
8
|
|
9
|
-
|
10
|
-
|
9
|
+
// start at ascii size
|
10
|
+
#define CS_DEFAULT_INITIAL_LEN 128
|
11
11
|
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
12
|
+
typedef char cs_ar;
|
13
|
+
typedef unsigned long cs_cp;
|
14
|
+
|
15
|
+
struct cs_data
|
16
|
+
{
|
17
|
+
cs_ar *cps;
|
18
|
+
cs_cp len;
|
19
|
+
};
|
20
|
+
|
21
|
+
#define CS_MSIZE(len) (sizeof(cs_ar) * (len / 8))
|
22
|
+
|
23
|
+
static inline void
|
24
|
+
add_memspace_for_another_plane(struct cs_data *data)
|
25
|
+
{
|
26
|
+
data->cps = ruby_xrealloc(data->cps, CS_MSIZE(data->len + UNICODE_PLANE_SIZE));
|
27
|
+
memset(data->cps + CS_MSIZE(data->len), 0, CS_MSIZE(UNICODE_PLANE_SIZE));
|
28
|
+
data->len += UNICODE_PLANE_SIZE;
|
29
|
+
}
|
30
|
+
|
31
|
+
static inline void
|
32
|
+
ensure_memsize_fits(struct cs_data *data, cs_cp target_cp)
|
33
|
+
{
|
34
|
+
while (target_cp >= data->len)
|
35
|
+
{
|
36
|
+
add_memspace_for_another_plane(data);
|
37
|
+
}
|
38
|
+
}
|
39
|
+
|
40
|
+
static inline void
|
41
|
+
set_cp(struct cs_data *data, cs_cp cp)
|
42
|
+
{
|
43
|
+
ensure_memsize_fits(data, cp);
|
44
|
+
data->cps[cp >> 3] |= (1 << (cp & 0x07));
|
45
|
+
}
|
46
|
+
|
47
|
+
static inline int
|
48
|
+
tst_cp(cs_ar *cps, cs_cp len, cs_cp cp)
|
49
|
+
{
|
50
|
+
return ((cp < len) && cps[cp >> 3] & (1 << (cp & 0x07)));
|
51
|
+
}
|
52
|
+
|
53
|
+
static inline void
|
54
|
+
clr_cp(cs_ar *cps, cs_cp len, cs_cp cp)
|
55
|
+
{
|
56
|
+
if (cp < len)
|
57
|
+
{
|
58
|
+
cps[cp >> 3] &= ~(1 << (cp & 0x07));
|
59
|
+
}
|
60
|
+
}
|
16
61
|
|
17
62
|
static void
|
18
|
-
|
19
|
-
|
63
|
+
cs_free(void *ptr)
|
64
|
+
{
|
65
|
+
struct cs_data *data = ptr;
|
66
|
+
ruby_xfree(data->cps);
|
67
|
+
ruby_xfree(data);
|
20
68
|
}
|
21
69
|
|
22
70
|
static size_t
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
.
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
71
|
+
cs_memsize(const void *ptr)
|
72
|
+
{
|
73
|
+
const struct cs_data *data = ptr;
|
74
|
+
return sizeof(*data) + CS_MSIZE(data->len);
|
75
|
+
}
|
76
|
+
|
77
|
+
static const rb_data_type_t cs_type = {
|
78
|
+
.wrap_struct_name = "character_set",
|
79
|
+
.function = {
|
80
|
+
.dmark = NULL,
|
81
|
+
.dfree = cs_free,
|
82
|
+
.dsize = cs_memsize,
|
83
|
+
},
|
84
|
+
.data = NULL,
|
85
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
37
86
|
};
|
38
87
|
|
39
|
-
|
40
|
-
|
88
|
+
static inline VALUE
|
89
|
+
cs_alloc_len(VALUE klass, struct cs_data **data_ptr, cs_cp len)
|
90
|
+
{
|
91
|
+
VALUE cs;
|
92
|
+
struct cs_data *data;
|
93
|
+
cs = TypedData_Make_Struct(klass, struct cs_data, &cs_type, data);
|
94
|
+
data->cps = ruby_xmalloc(CS_MSIZE(len));
|
95
|
+
memset(data->cps, 0, CS_MSIZE(len));
|
96
|
+
data->len = len;
|
97
|
+
|
98
|
+
if (data_ptr)
|
99
|
+
{
|
100
|
+
*data_ptr = data;
|
101
|
+
}
|
41
102
|
|
42
|
-
|
43
|
-
|
103
|
+
return cs;
|
104
|
+
}
|
44
105
|
|
45
|
-
static VALUE
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
return NEW_CHARACTER_SET(self, cp_arr);
|
106
|
+
static inline VALUE
|
107
|
+
cs_alloc(VALUE klass, struct cs_data **data_ptr)
|
108
|
+
{
|
109
|
+
return cs_alloc_len(klass, data_ptr, CS_DEFAULT_INITIAL_LEN);
|
50
110
|
}
|
51
111
|
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
112
|
+
static inline struct cs_data *
|
113
|
+
cs_fetch_data(VALUE cs)
|
114
|
+
{
|
115
|
+
struct cs_data *data;
|
116
|
+
TypedData_Get_Struct(cs, struct cs_data, &cs_type, data);
|
117
|
+
return data;
|
118
|
+
}
|
119
|
+
|
120
|
+
static inline cs_ar *
|
121
|
+
cs_fetch_cps(VALUE cs, cs_cp *len_ptr)
|
122
|
+
{
|
123
|
+
struct cs_data *data;
|
124
|
+
data = cs_fetch_data(cs);
|
125
|
+
*len_ptr = data->len;
|
126
|
+
return data->cps;
|
127
|
+
}
|
128
|
+
|
129
|
+
static VALUE
|
130
|
+
cs_method_allocate(VALUE self)
|
131
|
+
{
|
132
|
+
return cs_alloc(self, 0);
|
133
|
+
}
|
134
|
+
|
135
|
+
#define FOR_EACH_ACTIVE_CODEPOINT(action) \
|
136
|
+
do \
|
137
|
+
{ \
|
138
|
+
cs_cp cp, len; \
|
139
|
+
cs_ar *cps; \
|
140
|
+
cps = cs_fetch_cps(self, &len); \
|
141
|
+
for (cp = 0; cp < len; cp++) \
|
142
|
+
{ \
|
143
|
+
if (tst_cp(cps, len, cp)) \
|
144
|
+
{ \
|
145
|
+
action; \
|
146
|
+
} \
|
147
|
+
} \
|
148
|
+
} while (0)
|
59
149
|
|
60
150
|
// ***************************
|
61
151
|
// `Set` compatibility methods
|
62
152
|
// ***************************
|
63
153
|
|
64
|
-
static inline
|
65
|
-
|
66
|
-
|
154
|
+
static inline cs_cp
|
155
|
+
cs_active_cp_count(VALUE self)
|
156
|
+
{
|
157
|
+
cs_cp count;
|
67
158
|
count = 0;
|
68
159
|
FOR_EACH_ACTIVE_CODEPOINT(count++);
|
69
|
-
return
|
160
|
+
return count;
|
70
161
|
}
|
71
162
|
|
72
163
|
static VALUE
|
73
|
-
|
74
|
-
|
164
|
+
cs_method_length(VALUE self)
|
165
|
+
{
|
166
|
+
return LONG2FIX(cs_active_cp_count(self));
|
167
|
+
}
|
168
|
+
|
169
|
+
static inline VALUE
|
170
|
+
cs_enumerator_length(VALUE self, VALUE args, VALUE eobj)
|
171
|
+
{
|
172
|
+
return LONG2FIX(cs_active_cp_count(self));
|
75
173
|
}
|
76
174
|
|
77
175
|
static VALUE
|
78
|
-
|
79
|
-
|
176
|
+
cs_method_each(VALUE self)
|
177
|
+
{
|
178
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
80
179
|
FOR_EACH_ACTIVE_CODEPOINT(rb_yield(LONG2FIX(cp)));
|
81
180
|
return self;
|
82
181
|
}
|
@@ -84,16 +183,19 @@ method_each(VALUE self) {
|
|
84
183
|
// returns an Array of codepoint Integers by default.
|
85
184
|
// returns an Array of Strings of length 1 if passed `true`.
|
86
185
|
static VALUE
|
87
|
-
|
186
|
+
cs_method_to_a(int argc, VALUE *argv, VALUE self)
|
187
|
+
{
|
88
188
|
VALUE arr;
|
89
189
|
rb_encoding *enc;
|
90
190
|
rb_check_arity(argc, 0, 1);
|
91
191
|
|
92
192
|
arr = rb_ary_new();
|
93
|
-
if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
|
193
|
+
if (!argc || NIL_P(argv[0]) || argv[0] == Qfalse)
|
194
|
+
{
|
94
195
|
FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, LONG2FIX(cp)));
|
95
196
|
}
|
96
|
-
else
|
197
|
+
else
|
198
|
+
{
|
97
199
|
enc = rb_utf8_encoding();
|
98
200
|
FOR_EACH_ACTIVE_CODEPOINT(rb_ary_push(arr, rb_enc_uint_chr((int)cp, enc)));
|
99
201
|
}
|
@@ -102,302 +204,473 @@ method_to_a(int argc, VALUE *argv, VALUE self) {
|
|
102
204
|
}
|
103
205
|
|
104
206
|
static VALUE
|
105
|
-
|
207
|
+
cs_method_empty_p(VALUE self)
|
208
|
+
{
|
106
209
|
FOR_EACH_ACTIVE_CODEPOINT(return Qfalse);
|
107
210
|
return Qtrue;
|
108
211
|
}
|
109
212
|
|
110
213
|
static VALUE
|
111
|
-
|
112
|
-
|
113
|
-
|
114
|
-
|
214
|
+
cs_method_hash(VALUE self)
|
215
|
+
{
|
216
|
+
cs_cp cp, len, hash, four_byte_value;
|
217
|
+
cs_ar *cps;
|
218
|
+
cps = cs_fetch_cps(self, &len);
|
219
|
+
four_byte_value = 0;
|
115
220
|
|
116
221
|
hash = 17;
|
117
|
-
for (cp = 0; cp <
|
118
|
-
|
119
|
-
|
222
|
+
for (cp = 0; cp < len; cp++)
|
223
|
+
{
|
224
|
+
if (cp % 32 == 0)
|
225
|
+
{
|
226
|
+
if (cp != 0)
|
227
|
+
{
|
228
|
+
hash = hash * 23 + four_byte_value;
|
229
|
+
}
|
120
230
|
four_byte_value = 0;
|
121
231
|
}
|
122
|
-
if (
|
232
|
+
if (tst_cp(cps, len, cp))
|
233
|
+
{
|
234
|
+
four_byte_value++;
|
235
|
+
}
|
123
236
|
}
|
124
237
|
|
125
238
|
return LONG2FIX(hash);
|
126
239
|
}
|
127
240
|
|
128
241
|
static inline VALUE
|
129
|
-
|
242
|
+
cs_delete_if_block_result(VALUE self, int truthy)
|
243
|
+
{
|
130
244
|
VALUE result;
|
131
245
|
rb_need_block();
|
132
246
|
rb_check_frozen(self);
|
133
247
|
FOR_EACH_ACTIVE_CODEPOINT(
|
134
|
-
|
135
|
-
|
136
|
-
);
|
248
|
+
result = rb_yield(LONG2FIX(cp));
|
249
|
+
if ((NIL_P(result) || result == Qfalse) != truthy) clr_cp(cps, len, cp););
|
137
250
|
return self;
|
138
251
|
}
|
139
252
|
|
140
253
|
static VALUE
|
141
|
-
|
142
|
-
|
143
|
-
|
254
|
+
cs_method_delete_if(VALUE self)
|
255
|
+
{
|
256
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
257
|
+
return cs_delete_if_block_result(self, 1);
|
144
258
|
}
|
145
259
|
|
146
260
|
static VALUE
|
147
|
-
|
148
|
-
|
149
|
-
|
261
|
+
cs_method_keep_if(VALUE self)
|
262
|
+
{
|
263
|
+
RETURN_SIZED_ENUMERATOR(self, 0, 0, cs_enumerator_length);
|
264
|
+
return cs_delete_if_block_result(self, 0);
|
150
265
|
}
|
151
266
|
|
152
267
|
static VALUE
|
153
|
-
|
154
|
-
|
155
|
-
|
268
|
+
cs_method_clear(VALUE self)
|
269
|
+
{
|
270
|
+
struct cs_data *data;
|
156
271
|
rb_check_frozen(self);
|
157
|
-
|
158
|
-
|
159
|
-
CLRBIT(cps, cp);
|
160
|
-
}
|
272
|
+
data = cs_fetch_data(self);
|
273
|
+
memset(data->cps, 0, CS_MSIZE(data->len));
|
161
274
|
return self;
|
162
275
|
}
|
163
276
|
|
164
|
-
|
165
|
-
|
166
|
-
|
167
|
-
|
168
|
-
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
277
|
+
static VALUE
|
278
|
+
cs_method_min(VALUE self)
|
279
|
+
{
|
280
|
+
FOR_EACH_ACTIVE_CODEPOINT(return LONG2FIX(cp));
|
281
|
+
return Qnil;
|
282
|
+
}
|
283
|
+
|
284
|
+
static VALUE
|
285
|
+
cs_method_max(VALUE self)
|
286
|
+
{
|
287
|
+
cs_cp len;
|
288
|
+
long reverse_idx;
|
289
|
+
cs_ar *cps;
|
290
|
+
cps = cs_fetch_cps(self, &len);
|
291
|
+
for (reverse_idx = len; reverse_idx >= 0; reverse_idx--)
|
292
|
+
{
|
293
|
+
if (tst_cp(cps, len, reverse_idx))
|
294
|
+
{
|
295
|
+
return LONG2FIX(reverse_idx);
|
296
|
+
}
|
297
|
+
}
|
298
|
+
return Qnil;
|
299
|
+
}
|
300
|
+
|
301
|
+
static VALUE
|
302
|
+
cs_method_minmax(VALUE self)
|
303
|
+
{
|
304
|
+
VALUE arr;
|
305
|
+
arr = rb_ary_new2(2);
|
306
|
+
rb_ary_push(arr, cs_method_min(self));
|
307
|
+
rb_ary_push(arr, cs_method_max(self));
|
308
|
+
return arr;
|
309
|
+
}
|
310
|
+
|
311
|
+
#define RETURN_COMBINED_CS(cs_a, cs_b, comp_op) \
|
312
|
+
do \
|
313
|
+
{ \
|
314
|
+
VALUE new_cs; \
|
315
|
+
cs_cp cp, alen, blen; \
|
316
|
+
cs_ar *acps, *bcps; \
|
317
|
+
struct cs_data *new_data; \
|
318
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data); \
|
319
|
+
acps = cs_fetch_cps(cs_a, &alen); \
|
320
|
+
bcps = cs_fetch_cps(cs_b, &blen); \
|
321
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++) \
|
322
|
+
{ \
|
323
|
+
if (tst_cp(acps, alen, cp) comp_op tst_cp(bcps, blen, cp)) \
|
324
|
+
{ \
|
325
|
+
set_cp(new_data, cp); \
|
326
|
+
} \
|
327
|
+
} \
|
328
|
+
return new_cs; \
|
329
|
+
} while (0)
|
174
330
|
|
175
331
|
static VALUE
|
176
|
-
|
177
|
-
|
332
|
+
cs_method_intersection(VALUE self, VALUE other)
|
333
|
+
{
|
334
|
+
RETURN_COMBINED_CS(self, other, &&);
|
178
335
|
}
|
179
336
|
|
180
337
|
static VALUE
|
181
|
-
|
182
|
-
|
338
|
+
cs_method_exclusion(VALUE self, VALUE other)
|
339
|
+
{
|
340
|
+
RETURN_COMBINED_CS(self, other, ^);
|
183
341
|
}
|
184
342
|
|
185
343
|
static VALUE
|
186
|
-
|
187
|
-
|
344
|
+
cs_method_union(VALUE self, VALUE other)
|
345
|
+
{
|
346
|
+
RETURN_COMBINED_CS(self, other, ||);
|
188
347
|
}
|
189
348
|
|
190
349
|
static VALUE
|
191
|
-
|
192
|
-
|
350
|
+
cs_method_difference(VALUE self, VALUE other)
|
351
|
+
{
|
352
|
+
RETURN_COMBINED_CS(self, other, >);
|
193
353
|
}
|
194
354
|
|
195
355
|
static VALUE
|
196
|
-
|
197
|
-
|
198
|
-
|
199
|
-
|
356
|
+
cs_method_include_p(VALUE self, VALUE num)
|
357
|
+
{
|
358
|
+
cs_ar *cps;
|
359
|
+
cs_cp len;
|
360
|
+
cps = cs_fetch_cps(self, &len);
|
361
|
+
return (tst_cp(cps, len, FIX2ULONG(num)) ? Qtrue : Qfalse);
|
200
362
|
}
|
201
363
|
|
202
|
-
static inline
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
364
|
+
static inline VALUE
|
365
|
+
cs_toggle_codepoint(VALUE cs, VALUE cp_num, int on, int return_nil_if_noop)
|
366
|
+
{
|
367
|
+
cs_cp cp, len;
|
368
|
+
cs_ar *cps;
|
369
|
+
struct cs_data *data;
|
370
|
+
rb_check_frozen(cs);
|
371
|
+
data = cs_fetch_data(cs);
|
372
|
+
cps = data->cps;
|
373
|
+
len = data->len;
|
208
374
|
cp = FIX2ULONG(cp_num);
|
209
|
-
if (
|
210
|
-
|
375
|
+
if (return_nil_if_noop && (!tst_cp(cps, len, cp) == !on))
|
376
|
+
{
|
377
|
+
return Qnil;
|
211
378
|
}
|
212
|
-
else
|
213
|
-
|
214
|
-
|
215
|
-
|
379
|
+
else
|
380
|
+
{
|
381
|
+
if (on)
|
382
|
+
{
|
383
|
+
set_cp(data, cp);
|
384
|
+
}
|
385
|
+
else
|
386
|
+
{
|
387
|
+
clr_cp(cps, len, cp);
|
388
|
+
}
|
389
|
+
return cs;
|
216
390
|
}
|
217
391
|
}
|
218
392
|
|
219
393
|
static VALUE
|
220
|
-
|
221
|
-
|
394
|
+
cs_method_add(VALUE self, VALUE cp_num)
|
395
|
+
{
|
396
|
+
return cs_toggle_codepoint(self, cp_num, 1, 0);
|
222
397
|
}
|
223
398
|
|
224
399
|
static VALUE
|
225
|
-
|
226
|
-
|
400
|
+
cs_method_add_p(VALUE self, VALUE cp_num)
|
401
|
+
{
|
402
|
+
return cs_toggle_codepoint(self, cp_num, 1, 1);
|
227
403
|
}
|
228
404
|
|
229
405
|
static VALUE
|
230
|
-
|
231
|
-
|
406
|
+
cs_method_delete(VALUE self, VALUE cp_num)
|
407
|
+
{
|
408
|
+
return cs_toggle_codepoint(self, cp_num, 0, 0);
|
232
409
|
}
|
233
410
|
|
234
411
|
static VALUE
|
235
|
-
|
236
|
-
|
412
|
+
cs_method_delete_p(VALUE self, VALUE cp_num)
|
413
|
+
{
|
414
|
+
return cs_toggle_codepoint(self, cp_num, 0, 1);
|
237
415
|
}
|
238
416
|
|
239
|
-
#define COMPARE_SETS(action)\
|
240
|
-
cp_index cp;\
|
241
|
-
cp_byte *cps, *other_cps;\
|
242
|
-
FETCH_CODEPOINTS(self, cps);\
|
243
|
-
FETCH_CODEPOINTS(other, other_cps);\
|
244
|
-
for (cp = 0; cp < UNICODE_CP_COUNT; cp++) { action; }\
|
245
|
-
|
246
417
|
static VALUE
|
247
|
-
|
248
|
-
|
418
|
+
cs_method_intersect_p(VALUE self, VALUE other)
|
419
|
+
{
|
420
|
+
cs_cp cp, alen, blen;
|
421
|
+
cs_ar *acps, *bcps;
|
422
|
+
acps = cs_fetch_cps(self, &alen);
|
423
|
+
bcps = cs_fetch_cps(other, &blen);
|
424
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
425
|
+
{
|
426
|
+
if (tst_cp(acps, alen, cp) && tst_cp(bcps, blen, cp))
|
427
|
+
{
|
428
|
+
return Qtrue;
|
429
|
+
}
|
430
|
+
}
|
249
431
|
return Qfalse;
|
250
432
|
}
|
251
433
|
|
252
434
|
static VALUE
|
253
|
-
|
254
|
-
|
435
|
+
cs_method_disjoint_p(VALUE self, VALUE other)
|
436
|
+
{
|
437
|
+
return cs_method_intersect_p(self, other) ? Qfalse : Qtrue;
|
255
438
|
}
|
256
439
|
|
257
440
|
static inline int
|
258
|
-
|
259
|
-
|
441
|
+
cs_check_type(VALUE obj)
|
442
|
+
{
|
443
|
+
return rb_typeddata_is_kind_of(obj, &cs_type);
|
260
444
|
}
|
261
445
|
|
262
446
|
static VALUE
|
263
|
-
|
264
|
-
|
265
|
-
|
266
|
-
|
267
|
-
|
268
|
-
|
447
|
+
cs_cps_eql(VALUE cs_a, VALUE cs_b)
|
448
|
+
{
|
449
|
+
cs_cp cp, alen, blen;
|
450
|
+
cs_ar *acps, *bcps;
|
451
|
+
acps = cs_fetch_cps(cs_a, &alen);
|
452
|
+
bcps = cs_fetch_cps(cs_b, &blen);
|
453
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
454
|
+
{
|
455
|
+
if (tst_cp(acps, alen, cp) != tst_cp(bcps, blen, cp))
|
456
|
+
{
|
457
|
+
return Qfalse;
|
458
|
+
}
|
459
|
+
}
|
269
460
|
return Qtrue;
|
270
461
|
}
|
271
462
|
|
463
|
+
static VALUE
|
464
|
+
cs_method_eql_p(VALUE self, VALUE other)
|
465
|
+
{
|
466
|
+
if (!cs_check_type(other))
|
467
|
+
{
|
468
|
+
return Qfalse;
|
469
|
+
}
|
470
|
+
if (self == other) // same object_id
|
471
|
+
{
|
472
|
+
return Qtrue;
|
473
|
+
}
|
474
|
+
return cs_cps_eql(self, other);
|
475
|
+
}
|
476
|
+
|
272
477
|
static inline VALUE
|
273
|
-
|
274
|
-
|
275
|
-
|
478
|
+
cs_merge_cs(VALUE recipient, VALUE source)
|
479
|
+
{
|
480
|
+
cs_cp cp, source_len;
|
481
|
+
struct cs_data *data;
|
482
|
+
cs_ar *source_cps;
|
483
|
+
data = cs_fetch_data(recipient);
|
484
|
+
source_cps = cs_fetch_cps(source, &source_len);
|
485
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
486
|
+
{
|
487
|
+
if (tst_cp(source_cps, source_len, cp))
|
488
|
+
{
|
489
|
+
set_cp(data, cp);
|
490
|
+
}
|
491
|
+
}
|
492
|
+
return recipient;
|
276
493
|
}
|
277
494
|
|
278
|
-
static inline
|
279
|
-
|
280
|
-
|
495
|
+
static inline cs_cp
|
496
|
+
cs_checked_cp(VALUE object_id)
|
497
|
+
{
|
498
|
+
if (FIXNUM_P(object_id) && object_id > 0 && object_id < 0x220001)
|
499
|
+
{
|
500
|
+
return FIX2ULONG(object_id);
|
501
|
+
}
|
281
502
|
rb_raise(rb_eArgError, "CharacterSet members must be between 0 and 0x10FFFF");
|
282
503
|
}
|
283
504
|
|
284
505
|
static inline VALUE
|
285
|
-
|
506
|
+
cs_merge_rb_range(VALUE self, VALUE rb_range)
|
507
|
+
{
|
286
508
|
VALUE from_id, upto_id;
|
509
|
+
cs_cp from_cp, upto_cp, cont_len, rem;
|
287
510
|
int excl;
|
288
|
-
|
289
|
-
|
290
|
-
FETCH_CODEPOINTS(self, cps);
|
511
|
+
struct cs_data *data;
|
512
|
+
data = cs_fetch_data(self);
|
291
513
|
|
292
|
-
if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl)))
|
514
|
+
if (!RTEST(rb_range_values(rb_range, &from_id, &upto_id, &excl)))
|
515
|
+
{
|
293
516
|
rb_raise(rb_eArgError, "pass a Range");
|
294
517
|
}
|
295
|
-
if (excl)
|
518
|
+
if (excl)
|
519
|
+
{
|
520
|
+
upto_id -= 2;
|
521
|
+
}
|
522
|
+
|
523
|
+
from_cp = cs_checked_cp(from_id);
|
524
|
+
upto_cp = cs_checked_cp(upto_id);
|
296
525
|
|
297
|
-
|
298
|
-
|
526
|
+
if (upto_cp > from_cp && (upto_cp - from_cp > 6))
|
527
|
+
{
|
528
|
+
// set bits in preceding partially toggled bytes individually
|
529
|
+
for (/* */; (from_cp <= upto_cp) && (from_cp % 8); from_cp++)
|
530
|
+
{
|
531
|
+
set_cp(data, from_cp);
|
532
|
+
}
|
533
|
+
// memset contiguous bits directly
|
534
|
+
cont_len = upto_cp - from_cp + 1;
|
535
|
+
rem = cont_len % 8;
|
536
|
+
ensure_memsize_fits(data, upto_cp);
|
537
|
+
memset(data->cps + CS_MSIZE(from_cp), 0xFF, CS_MSIZE(cont_len - rem) / 8);
|
538
|
+
from_cp = upto_cp - rem + 1;
|
539
|
+
}
|
299
540
|
|
300
|
-
|
301
|
-
|
302
|
-
|
541
|
+
// set bits in partially toggled bytes individually
|
542
|
+
for (/* */; from_cp <= upto_cp; from_cp++)
|
543
|
+
{
|
544
|
+
set_cp(data, from_cp);
|
303
545
|
}
|
546
|
+
|
304
547
|
return self;
|
305
548
|
}
|
306
549
|
|
307
550
|
static inline VALUE
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
FETCH_CODEPOINTS(self, cps);
|
551
|
+
cs_merge_rb_array(VALUE self, VALUE rb_array)
|
552
|
+
{
|
553
|
+
VALUE el, array_length, i;
|
554
|
+
struct cs_data *data;
|
313
555
|
Check_Type(rb_array, T_ARRAY);
|
556
|
+
data = cs_fetch_data(self);
|
314
557
|
array_length = RARRAY_LEN(rb_array);
|
315
|
-
for (i = 0; i < array_length; i++)
|
558
|
+
for (i = 0; i < array_length; i++)
|
559
|
+
{
|
316
560
|
el = RARRAY_AREF(rb_array, i);
|
317
|
-
|
318
|
-
SETBIT(cps, FIX2ULONG(el));
|
561
|
+
set_cp(data, cs_checked_cp(el));
|
319
562
|
}
|
320
563
|
return self;
|
321
564
|
}
|
322
565
|
|
323
566
|
static VALUE
|
324
|
-
|
567
|
+
cs_method_merge(VALUE self, VALUE other)
|
568
|
+
{
|
325
569
|
rb_check_frozen(self);
|
326
|
-
if (
|
327
|
-
|
570
|
+
if (cs_check_type(other))
|
571
|
+
{
|
572
|
+
return cs_merge_cs(self, other);
|
328
573
|
}
|
329
|
-
else if (TYPE(other) == T_ARRAY)
|
330
|
-
|
574
|
+
else if (TYPE(other) == T_ARRAY)
|
575
|
+
{
|
576
|
+
return cs_merge_rb_array(self, other);
|
331
577
|
}
|
332
|
-
return
|
578
|
+
return cs_merge_rb_range(self, other);
|
333
579
|
}
|
334
580
|
|
335
581
|
static VALUE
|
336
|
-
|
337
|
-
|
338
|
-
|
582
|
+
cs_method_initialize_copy(VALUE self, VALUE orig)
|
583
|
+
{
|
584
|
+
cs_merge_cs(self, orig);
|
585
|
+
return self;
|
339
586
|
}
|
340
587
|
|
341
588
|
static VALUE
|
342
|
-
|
589
|
+
cs_method_subtract(VALUE self, VALUE other)
|
590
|
+
{
|
591
|
+
cs_cp cp, len, other_len;
|
592
|
+
cs_ar *cps, *other_cps;
|
343
593
|
rb_check_frozen(self);
|
344
|
-
|
594
|
+
cps = cs_fetch_cps(self, &len);
|
595
|
+
other_cps = cs_fetch_cps(other, &other_len);
|
596
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
597
|
+
{
|
598
|
+
if (tst_cp(other_cps, other_len, cp))
|
599
|
+
{
|
600
|
+
clr_cp(cps, len, cp);
|
601
|
+
}
|
602
|
+
}
|
345
603
|
return self;
|
346
604
|
}
|
347
605
|
|
348
606
|
static inline int
|
349
|
-
|
350
|
-
|
351
|
-
|
607
|
+
cs_a_subset_of_b(VALUE cs_a, VALUE cs_b, int *is_proper_ptr)
|
608
|
+
{
|
609
|
+
cs_ar *a, *b;
|
610
|
+
cs_cp cp, alen, blen, count_a, count_b;
|
352
611
|
|
353
|
-
if (!
|
612
|
+
if (!cs_check_type(cs_a) || !cs_check_type(cs_b))
|
613
|
+
{
|
354
614
|
rb_raise(rb_eArgError, "pass a CharacterSet");
|
355
615
|
}
|
356
616
|
|
357
|
-
|
358
|
-
|
359
|
-
|
360
|
-
|
361
|
-
|
362
|
-
|
363
|
-
|
364
|
-
|
365
|
-
if (
|
366
|
-
|
367
|
-
|
368
|
-
|
617
|
+
a = cs_fetch_cps(cs_a, &alen);
|
618
|
+
b = cs_fetch_cps(cs_b, &blen);
|
619
|
+
|
620
|
+
count_a = 0;
|
621
|
+
count_b = 0;
|
622
|
+
|
623
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
624
|
+
{
|
625
|
+
if (tst_cp(a, alen, cp))
|
626
|
+
{
|
627
|
+
if (!tst_cp(b, blen, cp))
|
628
|
+
{
|
629
|
+
return 0;
|
630
|
+
}
|
631
|
+
count_a++;
|
632
|
+
count_b++;
|
633
|
+
}
|
634
|
+
else if (tst_cp(b, blen, cp))
|
635
|
+
{
|
636
|
+
count_b++;
|
369
637
|
}
|
370
|
-
else if (TSTBIT(cps_b, cp)) size_b++;
|
371
638
|
}
|
372
639
|
|
373
|
-
if (
|
640
|
+
if (is_proper_ptr)
|
641
|
+
{
|
642
|
+
*is_proper_ptr = count_b > count_a;
|
643
|
+
}
|
644
|
+
|
374
645
|
return 1;
|
375
646
|
}
|
376
647
|
|
377
648
|
static VALUE
|
378
|
-
|
379
|
-
|
380
|
-
return
|
649
|
+
cs_method_subset_p(VALUE self, VALUE other)
|
650
|
+
{
|
651
|
+
return cs_a_subset_of_b(self, other, NULL) ? Qtrue : Qfalse;
|
381
652
|
}
|
382
653
|
|
383
654
|
static VALUE
|
384
|
-
|
385
|
-
|
386
|
-
|
387
|
-
|
655
|
+
cs_method_proper_subset_p(VALUE self, VALUE other)
|
656
|
+
{
|
657
|
+
int is_subset, is_proper;
|
658
|
+
is_subset = cs_a_subset_of_b(self, other, &is_proper);
|
659
|
+
return (is_subset && is_proper) ? Qtrue : Qfalse;
|
388
660
|
}
|
389
661
|
|
390
662
|
static VALUE
|
391
|
-
|
392
|
-
|
393
|
-
return
|
663
|
+
cs_method_superset_p(VALUE self, VALUE other)
|
664
|
+
{
|
665
|
+
return cs_a_subset_of_b(other, self, NULL) ? Qtrue : Qfalse;
|
394
666
|
}
|
395
667
|
|
396
668
|
static VALUE
|
397
|
-
|
398
|
-
|
399
|
-
|
400
|
-
|
669
|
+
cs_method_proper_superset_p(VALUE self, VALUE other)
|
670
|
+
{
|
671
|
+
int is_superset, is_proper;
|
672
|
+
is_superset = cs_a_subset_of_b(other, self, &is_proper);
|
673
|
+
return (is_superset && is_proper) ? Qtrue : Qfalse;
|
401
674
|
}
|
402
675
|
|
403
676
|
// *******************************
|
@@ -405,42 +678,44 @@ method_proper_superset_p(VALUE self, VALUE other) {
|
|
405
678
|
// *******************************
|
406
679
|
|
407
680
|
static VALUE
|
408
|
-
|
409
|
-
|
410
|
-
|
681
|
+
cs_class_method_from_ranges(VALUE self, VALUE ranges)
|
682
|
+
{
|
683
|
+
VALUE new_cs, range_count, i;
|
684
|
+
new_cs = rb_class_new_instance(0, 0, self);
|
411
685
|
range_count = RARRAY_LEN(ranges);
|
412
|
-
for (i = 0; i < range_count; i++)
|
413
|
-
|
686
|
+
for (i = 0; i < range_count; i++)
|
687
|
+
{
|
688
|
+
cs_merge_rb_range(new_cs, RARRAY_AREF(ranges, i));
|
414
689
|
}
|
415
|
-
return
|
690
|
+
return new_cs;
|
416
691
|
}
|
417
692
|
|
418
693
|
static VALUE
|
419
|
-
|
420
|
-
|
694
|
+
cs_method_ranges(VALUE self)
|
695
|
+
{
|
696
|
+
VALUE ranges, cp_num, previous_cp_num, current_start, current_end;
|
421
697
|
|
422
698
|
ranges = rb_ary_new();
|
423
|
-
|
699
|
+
previous_cp_num = 0;
|
424
700
|
current_start = 0;
|
425
701
|
current_end = 0;
|
426
702
|
|
427
703
|
FOR_EACH_ACTIVE_CODEPOINT(
|
428
|
-
|
429
|
-
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
434
|
-
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
previous_codepoint = codepoint;
|
440
|
-
);
|
704
|
+
cp_num = LONG2FIX(cp);
|
705
|
+
|
706
|
+
if (!previous_cp_num) {
|
707
|
+
current_start = cp_num;
|
708
|
+
} else if (previous_cp_num + 2 != cp_num)
|
709
|
+
{
|
710
|
+
// gap found, finalize previous range
|
711
|
+
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
712
|
+
current_start = cp_num;
|
713
|
+
} current_end = cp_num;
|
714
|
+
previous_cp_num = cp_num;);
|
441
715
|
|
442
716
|
// add final range
|
443
|
-
if (current_start)
|
717
|
+
if (current_start)
|
718
|
+
{
|
444
719
|
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
445
720
|
}
|
446
721
|
|
@@ -448,117 +723,233 @@ method_ranges(VALUE self) {
|
|
448
723
|
}
|
449
724
|
|
450
725
|
static VALUE
|
451
|
-
|
452
|
-
|
726
|
+
cs_method_sample(int argc, VALUE *argv, VALUE self)
|
727
|
+
{
|
728
|
+
VALUE array, to_a_args[1] = {Qtrue};
|
453
729
|
rb_check_arity(argc, 0, 1);
|
454
|
-
|
455
|
-
array = method_to_a(1, to_a_args, self);
|
730
|
+
array = cs_method_to_a(1, to_a_args, self);
|
456
731
|
return rb_funcall(array, rb_intern("sample"), argc, argc ? argv[0] : 0);
|
457
732
|
}
|
458
733
|
|
459
734
|
static inline VALUE
|
460
|
-
|
461
|
-
|
462
|
-
|
463
|
-
|
464
|
-
|
465
|
-
|
466
|
-
|
735
|
+
cs_from_section(VALUE set, cs_cp from, cs_cp upto)
|
736
|
+
{
|
737
|
+
VALUE new_cs;
|
738
|
+
cs_ar *cps;
|
739
|
+
cs_cp cp, len;
|
740
|
+
struct cs_data *new_data;
|
741
|
+
new_cs = cs_alloc(RBASIC(set)->klass, &new_data);
|
742
|
+
cps = cs_fetch_cps(set, &len);
|
743
|
+
for (cp = from; cp <= upto; cp++)
|
744
|
+
{
|
745
|
+
if (tst_cp(cps, len, cp))
|
746
|
+
{
|
747
|
+
set_cp(new_data, cp);
|
748
|
+
}
|
467
749
|
}
|
468
|
-
return
|
750
|
+
return new_cs;
|
469
751
|
}
|
470
752
|
|
471
753
|
static VALUE
|
472
|
-
|
473
|
-
|
754
|
+
cs_method_ext_section(VALUE self, VALUE from, VALUE upto)
|
755
|
+
{
|
756
|
+
return cs_from_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
757
|
+
}
|
758
|
+
|
759
|
+
static inline cs_cp
|
760
|
+
cs_active_cp_count_in_section(VALUE set, cs_cp from, cs_cp upto)
|
761
|
+
{
|
762
|
+
cs_ar *cps;
|
763
|
+
cs_cp cp, count, len;
|
764
|
+
cps = cs_fetch_cps(set, &len);
|
765
|
+
for (count = 0, cp = from; cp <= upto; cp++)
|
766
|
+
{
|
767
|
+
if (tst_cp(cps, len, cp))
|
768
|
+
{
|
769
|
+
count++;
|
770
|
+
}
|
771
|
+
}
|
772
|
+
return count;
|
474
773
|
}
|
475
774
|
|
476
775
|
static VALUE
|
477
|
-
|
478
|
-
|
776
|
+
cs_method_ext_count_in_section(VALUE self, VALUE from, VALUE upto)
|
777
|
+
{
|
778
|
+
cs_cp count;
|
779
|
+
count = cs_active_cp_count_in_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
780
|
+
return LONG2FIX(count);
|
479
781
|
}
|
480
782
|
|
481
783
|
static inline VALUE
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
784
|
+
cs_has_cp_in_section(cs_ar *cps, cs_cp len, cs_cp from, cs_cp upto)
|
785
|
+
{
|
786
|
+
cs_cp cp;
|
787
|
+
for (cp = from; cp <= upto; cp++)
|
788
|
+
{
|
789
|
+
if (tst_cp(cps, len, cp))
|
790
|
+
{
|
791
|
+
return Qtrue;
|
792
|
+
}
|
490
793
|
}
|
491
794
|
return Qfalse;
|
492
795
|
}
|
493
796
|
|
494
797
|
static VALUE
|
495
|
-
|
798
|
+
cs_method_ext_section_p(VALUE self, VALUE from, VALUE upto)
|
799
|
+
{
|
800
|
+
cs_ar *cps;
|
801
|
+
cs_cp len;
|
802
|
+
cps = cs_fetch_cps(self, &len);
|
803
|
+
return cs_has_cp_in_section(cps, len, FIX2ULONG(from), FIX2ULONG(upto));
|
804
|
+
}
|
805
|
+
|
806
|
+
static inline VALUE
|
807
|
+
cs_ratio_of_section(VALUE set, cs_cp from, cs_cp upto)
|
808
|
+
{
|
809
|
+
double section_count, total_count;
|
810
|
+
section_count = (double)cs_active_cp_count_in_section(set, from, upto);
|
811
|
+
total_count = (double)cs_active_cp_count(set);
|
812
|
+
return DBL2NUM(section_count / total_count);
|
813
|
+
}
|
814
|
+
|
815
|
+
static VALUE
|
816
|
+
cs_method_ext_section_ratio(VALUE self, VALUE from, VALUE upto)
|
817
|
+
{
|
818
|
+
return cs_ratio_of_section(self, FIX2ULONG(from), FIX2ULONG(upto));
|
819
|
+
}
|
820
|
+
|
821
|
+
#define MAX_CP 0x10FFFF
|
822
|
+
#define MAX_ASCII_CP 0x7F
|
823
|
+
#define MAX_BMP_CP 0xFFFF
|
824
|
+
#define MIN_ASTRAL_CP 0x10000
|
825
|
+
|
826
|
+
static inline VALUE
|
827
|
+
cs_has_cp_in_plane(cs_ar *cps, cs_cp len, unsigned int plane)
|
828
|
+
{
|
829
|
+
cs_cp plane_beg, plane_end;
|
830
|
+
plane_beg = plane * UNICODE_PLANE_SIZE;
|
831
|
+
plane_end = (plane + 1) * MAX_BMP_CP;
|
832
|
+
return cs_has_cp_in_section(cps, len, plane_beg, plane_end);
|
833
|
+
}
|
834
|
+
|
835
|
+
static VALUE
|
836
|
+
cs_method_planes(VALUE self)
|
837
|
+
{
|
838
|
+
cs_ar *cps;
|
839
|
+
cs_cp len;
|
496
840
|
unsigned int i;
|
497
841
|
VALUE planes;
|
842
|
+
cps = cs_fetch_cps(self, &len);
|
498
843
|
planes = rb_ary_new();
|
499
|
-
for (i = 0; i < UNICODE_PLANE_COUNT; i++)
|
500
|
-
|
844
|
+
for (i = 0; i < UNICODE_PLANE_COUNT; i++)
|
845
|
+
{
|
846
|
+
if (cs_has_cp_in_plane(cps, len, i))
|
847
|
+
{
|
848
|
+
rb_ary_push(planes, INT2FIX(i));
|
849
|
+
}
|
501
850
|
}
|
502
851
|
return planes;
|
503
852
|
}
|
504
853
|
|
505
|
-
static
|
506
|
-
|
854
|
+
static inline int
|
855
|
+
cs_valid_plane_num(VALUE num)
|
856
|
+
{
|
507
857
|
int plane;
|
508
|
-
Check_Type(
|
509
|
-
plane = FIX2INT(
|
510
|
-
if (plane < 0 || plane >= UNICODE_PLANE_COUNT)
|
511
|
-
|
858
|
+
Check_Type(num, T_FIXNUM);
|
859
|
+
plane = FIX2INT(num);
|
860
|
+
if (plane < 0 || plane >= UNICODE_PLANE_COUNT)
|
861
|
+
{
|
862
|
+
rb_raise(rb_eArgError, "plane must be between 0 and %d", UNICODE_PLANE_COUNT - 1);
|
512
863
|
}
|
513
|
-
return
|
864
|
+
return plane;
|
865
|
+
}
|
866
|
+
|
867
|
+
static VALUE
|
868
|
+
cs_method_plane(VALUE self, VALUE plane_num)
|
869
|
+
{
|
870
|
+
cs_cp plane, plane_beg, plane_end;
|
871
|
+
plane = cs_valid_plane_num(plane_num);
|
872
|
+
plane_beg = plane * UNICODE_PLANE_SIZE;
|
873
|
+
plane_end = (plane + 1) * MAX_BMP_CP;
|
874
|
+
return cs_from_section(self, plane_beg, plane_end);
|
875
|
+
}
|
876
|
+
|
877
|
+
static VALUE
|
878
|
+
cs_method_member_in_plane_p(VALUE self, VALUE plane_num)
|
879
|
+
{
|
880
|
+
cs_ar *cps;
|
881
|
+
cs_cp len;
|
882
|
+
unsigned int plane;
|
883
|
+
plane = cs_valid_plane_num(plane_num);
|
884
|
+
cps = cs_fetch_cps(self, &len);
|
885
|
+
return cs_has_cp_in_plane(cps, len, plane);
|
514
886
|
}
|
515
887
|
|
516
888
|
#define NON_SURROGATE(cp) (cp > 0xDFFF || cp < 0xD800)
|
517
889
|
|
518
890
|
static VALUE
|
519
|
-
|
520
|
-
|
521
|
-
|
522
|
-
|
523
|
-
|
891
|
+
cs_method_ext_inversion(int argc, VALUE *argv, VALUE self)
|
892
|
+
{
|
893
|
+
int inc_surr;
|
894
|
+
cs_cp upto, cp, len;
|
895
|
+
cs_ar *cps;
|
896
|
+
VALUE new_cs;
|
897
|
+
struct cs_data *new_data;
|
898
|
+
|
524
899
|
rb_check_arity(argc, 0, 2);
|
525
|
-
|
526
|
-
|
527
|
-
|
528
|
-
|
529
|
-
|
530
|
-
|
900
|
+
|
901
|
+
cps = cs_fetch_cps(self, &len);
|
902
|
+
inc_surr = argc && argv[0] == Qtrue;
|
903
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
|
904
|
+
upto = argc > 1 && FIXNUM_P(argv[1]) ? FIX2ULONG(argv[1]) : UNICODE_CP_COUNT;
|
905
|
+
|
906
|
+
for (cp = 0; cp < UNICODE_CP_COUNT; cp++)
|
907
|
+
{
|
908
|
+
if (cp <= upto && !tst_cp(cps, len, cp) && (inc_surr || NON_SURROGATE(cp)))
|
909
|
+
{
|
910
|
+
set_cp(new_data, cp);
|
911
|
+
}
|
531
912
|
}
|
532
|
-
|
533
|
-
|
534
|
-
);
|
913
|
+
|
914
|
+
return new_cs;
|
535
915
|
}
|
536
916
|
|
537
|
-
typedef int(*str_cp_handler)(unsigned int,
|
917
|
+
typedef int (*str_cp_handler)(unsigned int, cs_ar *, cs_cp len, struct cs_data *data, VALUE *memo);
|
538
918
|
|
539
919
|
static inline int
|
540
|
-
add_str_cp_to_arr(unsigned int str_cp,
|
541
|
-
|
920
|
+
add_str_cp_to_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
921
|
+
{
|
922
|
+
set_cp(data, str_cp);
|
542
923
|
return 1;
|
543
924
|
}
|
544
925
|
|
545
926
|
static VALUE
|
546
|
-
|
547
|
-
|
548
|
-
|
549
|
-
|
550
|
-
|
927
|
+
cs_method_case_insensitive(VALUE self)
|
928
|
+
{
|
929
|
+
cs_cp i, len;
|
930
|
+
cs_ar *cps;
|
931
|
+
VALUE new_cs;
|
932
|
+
struct cs_data *new_data;
|
551
933
|
|
552
|
-
|
934
|
+
cps = cs_fetch_cps(self, &len);
|
935
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data);
|
936
|
+
cs_merge_cs(new_cs, self);
|
553
937
|
|
554
|
-
for (i = 0; i < CASEFOLD_COUNT; i++)
|
938
|
+
for (i = 0; i < CASEFOLD_COUNT; i++)
|
939
|
+
{
|
555
940
|
casefold_mapping m = unicode_casefold_table[i];
|
556
941
|
|
557
|
-
if
|
558
|
-
|
942
|
+
if (tst_cp(cps, len, m.from))
|
943
|
+
{
|
944
|
+
set_cp(new_data, m.to);
|
945
|
+
}
|
946
|
+
else if (tst_cp(cps, len, m.to))
|
947
|
+
{
|
948
|
+
set_cp(new_data, m.from);
|
949
|
+
}
|
559
950
|
}
|
560
951
|
|
561
|
-
return
|
952
|
+
return new_cs;
|
562
953
|
|
563
954
|
// OnigCaseFoldType flags;
|
564
955
|
// rb_encoding *enc;
|
@@ -573,20 +964,27 @@ method_case_insensitive(VALUE self) {
|
|
573
964
|
}
|
574
965
|
|
575
966
|
static inline VALUE
|
576
|
-
each_sb_cp(VALUE str, str_cp_handler func,
|
577
|
-
|
967
|
+
each_sb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
968
|
+
{
|
969
|
+
long i, str_len;
|
578
970
|
unsigned int str_cp;
|
971
|
+
str_len = RSTRING_LEN(str);
|
579
972
|
|
580
|
-
for (i = 0; i <
|
973
|
+
for (i = 0; i < str_len; i++)
|
974
|
+
{
|
581
975
|
str_cp = (RSTRING_PTR(str)[i] & 0xff);
|
582
|
-
if (!(*func)(str_cp, cp_arr))
|
976
|
+
if (!(*func)(str_cp, cp_arr, len, data, memo))
|
977
|
+
{
|
978
|
+
return Qfalse;
|
979
|
+
}
|
583
980
|
}
|
584
981
|
|
585
982
|
return Qtrue;
|
586
983
|
}
|
587
984
|
|
588
985
|
static inline VALUE
|
589
|
-
each_mb_cp(VALUE str, str_cp_handler func,
|
986
|
+
each_mb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
987
|
+
{
|
590
988
|
int n;
|
591
989
|
unsigned int str_cp;
|
592
990
|
const char *ptr, *end;
|
@@ -597,9 +995,13 @@ each_mb_cp(VALUE str, str_cp_handler func, cp_byte *cp_arr) {
|
|
597
995
|
end = RSTRING_END(str);
|
598
996
|
enc = rb_enc_get(str);
|
599
997
|
|
600
|
-
while (ptr < end)
|
998
|
+
while (ptr < end)
|
999
|
+
{
|
601
1000
|
str_cp = rb_enc_codepoint_len(ptr, end, &n, enc);
|
602
|
-
if (!(*func)(str_cp, cp_arr))
|
1001
|
+
if (!(*func)(str_cp, cp_arr, len, data, memo))
|
1002
|
+
{
|
1003
|
+
return Qfalse;
|
1004
|
+
}
|
603
1005
|
ptr += n;
|
604
1006
|
}
|
605
1007
|
|
@@ -611,105 +1013,240 @@ static inline int
|
|
611
1013
|
single_byte_optimizable(VALUE str)
|
612
1014
|
{
|
613
1015
|
rb_encoding *enc;
|
614
|
-
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
|
1016
|
+
if (ENC_CODERANGE(str) == ENC_CODERANGE_7BIT)
|
1017
|
+
{
|
1018
|
+
return 1;
|
1019
|
+
}
|
615
1020
|
|
616
1021
|
enc = rb_enc_get(str);
|
617
|
-
if (rb_enc_mbmaxlen(enc) == 1)
|
1022
|
+
if (rb_enc_mbmaxlen(enc) == 1)
|
1023
|
+
{
|
1024
|
+
return 1;
|
1025
|
+
}
|
618
1026
|
|
619
1027
|
return 0;
|
620
1028
|
}
|
621
1029
|
|
622
1030
|
static inline VALUE
|
623
|
-
each_cp(VALUE str, str_cp_handler func,
|
624
|
-
|
625
|
-
|
1031
|
+
each_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1032
|
+
{
|
1033
|
+
if (single_byte_optimizable(str))
|
1034
|
+
{
|
1035
|
+
return each_sb_cp(str, func, cp_arr, len, data, memo);
|
626
1036
|
}
|
627
|
-
return each_mb_cp(str, func, cp_arr);
|
1037
|
+
return each_mb_cp(str, func, cp_arr, len, data, memo);
|
628
1038
|
}
|
629
1039
|
|
630
1040
|
static inline void
|
631
|
-
raise_arg_err_unless_string(VALUE val)
|
632
|
-
|
1041
|
+
raise_arg_err_unless_string(VALUE val)
|
1042
|
+
{
|
1043
|
+
if (!RB_TYPE_P(val, T_STRING))
|
1044
|
+
{
|
1045
|
+
rb_raise(rb_eArgError, "pass a String");
|
1046
|
+
}
|
1047
|
+
}
|
1048
|
+
|
1049
|
+
static VALUE
|
1050
|
+
cs_class_method_of(int argc, VALUE *argv, VALUE self)
|
1051
|
+
{
|
1052
|
+
VALUE new_cs;
|
1053
|
+
struct cs_data *new_data;
|
1054
|
+
int i;
|
1055
|
+
new_cs = cs_alloc(self, &new_data);
|
1056
|
+
for (i = 0; i < argc; i++)
|
1057
|
+
{
|
1058
|
+
raise_arg_err_unless_string(argv[i]);
|
1059
|
+
each_cp(argv[i], add_str_cp_to_arr, 0, 0, new_data, 0);
|
1060
|
+
}
|
1061
|
+
return new_cs;
|
1062
|
+
}
|
1063
|
+
|
1064
|
+
static inline int
|
1065
|
+
count_str_cp(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1066
|
+
{
|
1067
|
+
if (tst_cp(cp_arr, len, str_cp))
|
1068
|
+
{
|
1069
|
+
*memo += 1;
|
1070
|
+
}
|
1071
|
+
return 1;
|
633
1072
|
}
|
634
1073
|
|
635
1074
|
static VALUE
|
636
|
-
|
637
|
-
|
1075
|
+
cs_method_count_in(VALUE self, VALUE str)
|
1076
|
+
{
|
1077
|
+
VALUE count;
|
1078
|
+
struct cs_data *data;
|
638
1079
|
raise_arg_err_unless_string(str);
|
639
|
-
|
640
|
-
|
641
|
-
|
1080
|
+
data = cs_fetch_data(self);
|
1081
|
+
count = 0;
|
1082
|
+
each_cp(str, count_str_cp, data->cps, data->len, data, &count);
|
1083
|
+
return INT2NUM((int)count);
|
642
1084
|
}
|
643
1085
|
|
644
1086
|
static inline int
|
645
|
-
|
646
|
-
|
1087
|
+
str_cp_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1088
|
+
{
|
1089
|
+
return tst_cp(cp_arr, len, str_cp);
|
647
1090
|
}
|
648
1091
|
|
649
1092
|
static VALUE
|
650
|
-
|
651
|
-
|
652
|
-
|
1093
|
+
cs_method_cover_p(VALUE self, VALUE str)
|
1094
|
+
{
|
1095
|
+
struct cs_data *data;
|
653
1096
|
raise_arg_err_unless_string(str);
|
654
|
-
|
655
|
-
|
656
|
-
return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
|
1097
|
+
data = cs_fetch_data(self);
|
1098
|
+
return each_cp(str, str_cp_in_arr, data->cps, data->len, data, 0);
|
657
1099
|
}
|
658
1100
|
|
659
1101
|
static inline int
|
660
|
-
|
661
|
-
|
1102
|
+
add_str_cp_to_str_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1103
|
+
{
|
1104
|
+
if (tst_cp(cp_arr, len, str_cp))
|
1105
|
+
{
|
1106
|
+
rb_ary_push(memo[0], rb_enc_uint_chr((int)str_cp, (rb_encoding *)memo[1]));
|
1107
|
+
}
|
1108
|
+
return 1;
|
662
1109
|
}
|
663
1110
|
|
664
1111
|
static VALUE
|
665
|
-
|
666
|
-
|
1112
|
+
cs_method_scan(VALUE self, VALUE str)
|
1113
|
+
{
|
1114
|
+
VALUE memo[2];
|
1115
|
+
struct cs_data *data;
|
667
1116
|
raise_arg_err_unless_string(str);
|
668
|
-
|
669
|
-
|
1117
|
+
data = cs_fetch_data(self);
|
1118
|
+
memo[0] = rb_ary_new();
|
1119
|
+
memo[1] = (VALUE)rb_enc_get(str);
|
1120
|
+
each_cp(str, add_str_cp_to_str_arr, data->cps, data->len, data, memo);
|
1121
|
+
return memo[0];
|
1122
|
+
}
|
1123
|
+
|
1124
|
+
static inline int
|
1125
|
+
str_cp_not_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE *memo)
|
1126
|
+
{
|
1127
|
+
return !tst_cp(cp_arr, len, str_cp);
|
1128
|
+
}
|
1129
|
+
|
1130
|
+
static VALUE
|
1131
|
+
cs_method_used_by_p(VALUE self, VALUE str)
|
1132
|
+
{
|
1133
|
+
VALUE only_uses_other_cps;
|
1134
|
+
struct cs_data *data;
|
1135
|
+
raise_arg_err_unless_string(str);
|
1136
|
+
data = cs_fetch_data(self);
|
1137
|
+
only_uses_other_cps = each_cp(str, str_cp_not_in_arr, data->cps, data->len, data, 0);
|
1138
|
+
return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
|
1139
|
+
}
|
1140
|
+
|
1141
|
+
static void
|
1142
|
+
cs_str_buf_cat(VALUE str, const char *ptr, long len)
|
1143
|
+
{
|
1144
|
+
long total, olen;
|
1145
|
+
char *sptr;
|
1146
|
+
|
1147
|
+
RSTRING_GETMEM(str, sptr, olen);
|
1148
|
+
sptr = RSTRING(str)->as.heap.ptr;
|
1149
|
+
olen = RSTRING(str)->as.heap.len;
|
1150
|
+
total = olen + len;
|
1151
|
+
memcpy(sptr + olen, ptr, len);
|
1152
|
+
RSTRING(str)->as.heap.len = total;
|
1153
|
+
}
|
1154
|
+
|
1155
|
+
#ifndef TERM_FILL
|
1156
|
+
#define TERM_FILL(ptr, termlen) \
|
1157
|
+
do \
|
1158
|
+
{ \
|
1159
|
+
char *const term_fill_ptr = (ptr); \
|
1160
|
+
const int term_fill_len = (termlen); \
|
1161
|
+
*term_fill_ptr = '\0'; \
|
1162
|
+
if (__builtin_expect(!!(term_fill_len > 1), 0)) \
|
1163
|
+
memset(term_fill_ptr, 0, term_fill_len); \
|
1164
|
+
} while (0)
|
1165
|
+
#endif
|
1166
|
+
|
1167
|
+
static void
|
1168
|
+
cs_str_buf_terminate(VALUE str, rb_encoding *enc)
|
1169
|
+
{
|
1170
|
+
char *ptr;
|
1171
|
+
long len;
|
1172
|
+
|
1173
|
+
ptr = RSTRING(str)->as.heap.ptr;
|
1174
|
+
len = RSTRING(str)->as.heap.len;
|
1175
|
+
TERM_FILL(ptr + len, rb_enc_mbminlen(enc));
|
670
1176
|
}
|
671
1177
|
|
672
1178
|
static inline VALUE
|
673
|
-
|
674
|
-
|
1179
|
+
cs_apply_to_str(VALUE set, VALUE str, int delete, int bang)
|
1180
|
+
{
|
1181
|
+
cs_ar *cps;
|
1182
|
+
cs_cp len;
|
675
1183
|
rb_encoding *str_enc;
|
676
|
-
VALUE orig_len,
|
677
|
-
int
|
1184
|
+
VALUE orig_len, new_str_buf;
|
1185
|
+
int cp_len;
|
678
1186
|
unsigned int str_cp;
|
679
1187
|
const char *ptr, *end;
|
680
1188
|
|
681
1189
|
raise_arg_err_unless_string(str);
|
682
1190
|
|
683
|
-
|
1191
|
+
cps = cs_fetch_cps(set, &len);
|
684
1192
|
|
685
1193
|
orig_len = RSTRING_LEN(str);
|
686
|
-
|
687
|
-
|
1194
|
+
if (orig_len < 1) // empty string, will never change
|
1195
|
+
{
|
1196
|
+
if (bang)
|
1197
|
+
{
|
1198
|
+
return Qnil;
|
1199
|
+
}
|
1200
|
+
return rb_str_dup(str);
|
1201
|
+
}
|
1202
|
+
|
1203
|
+
new_str_buf = rb_str_buf_new(orig_len + 30); // len + margin
|
688
1204
|
str_enc = rb_enc_get(str);
|
689
1205
|
rb_enc_associate(new_str_buf, str_enc);
|
690
|
-
|
691
|
-
|
1206
|
+
rb_str_modify(new_str_buf);
|
1207
|
+
ENC_CODERANGE_SET(new_str_buf, rb_enc_asciicompat(str_enc) ? ENC_CODERANGE_7BIT : ENC_CODERANGE_VALID);
|
692
1208
|
|
693
1209
|
ptr = RSTRING_PTR(str);
|
694
1210
|
end = RSTRING_END(str);
|
695
1211
|
|
696
|
-
|
697
|
-
|
698
|
-
|
699
|
-
|
700
|
-
|
1212
|
+
if (single_byte_optimizable(str))
|
1213
|
+
{
|
1214
|
+
while (ptr < end)
|
1215
|
+
{
|
1216
|
+
str_cp = *ptr & 0xff;
|
1217
|
+
if ((!tst_cp(cps, len, str_cp)) == delete)
|
1218
|
+
{
|
1219
|
+
cs_str_buf_cat(new_str_buf, ptr, 1);
|
1220
|
+
}
|
1221
|
+
ptr++;
|
1222
|
+
}
|
1223
|
+
}
|
1224
|
+
else // likely to be multibyte string
|
1225
|
+
{
|
1226
|
+
while (ptr < end)
|
1227
|
+
{
|
1228
|
+
str_cp = rb_enc_codepoint_len(ptr, end, &cp_len, str_enc);
|
1229
|
+
if ((!tst_cp(cps, len, str_cp)) == delete)
|
1230
|
+
{
|
1231
|
+
cs_str_buf_cat(new_str_buf, ptr, cp_len);
|
1232
|
+
}
|
1233
|
+
ptr += cp_len;
|
701
1234
|
}
|
702
|
-
ptr += n;
|
703
1235
|
}
|
704
1236
|
|
705
|
-
|
706
|
-
|
1237
|
+
cs_str_buf_terminate(new_str_buf, str_enc);
|
1238
|
+
|
1239
|
+
if (bang)
|
1240
|
+
{
|
1241
|
+
if (RSTRING_LEN(new_str_buf) == (long)orig_len) // string unchanged
|
1242
|
+
{
|
1243
|
+
return Qnil;
|
1244
|
+
}
|
707
1245
|
rb_str_shared_replace(str, new_str_buf);
|
708
1246
|
}
|
709
|
-
else
|
1247
|
+
else
|
1248
|
+
{
|
710
1249
|
RB_OBJ_WRITE(new_str_buf, &(RBASIC(new_str_buf))->klass, rb_obj_class(str));
|
711
|
-
// slightly cumbersome approach needed for compatibility with Ruby < 2.3:
|
712
|
-
RBASIC(new_str_buf)->flags |= (RBASIC(str)->flags&(FL_TAINT));
|
713
1250
|
str = new_str_buf;
|
714
1251
|
}
|
715
1252
|
|
@@ -717,98 +1254,115 @@ apply_to_str(VALUE set, VALUE str, int delete, int bang) {
|
|
717
1254
|
}
|
718
1255
|
|
719
1256
|
static VALUE
|
720
|
-
|
721
|
-
|
1257
|
+
cs_method_delete_in(VALUE self, VALUE str)
|
1258
|
+
{
|
1259
|
+
return cs_apply_to_str(self, str, 1, 0);
|
1260
|
+
}
|
1261
|
+
|
1262
|
+
static VALUE
|
1263
|
+
cs_method_delete_in_bang(VALUE self, VALUE str)
|
1264
|
+
{
|
1265
|
+
return cs_apply_to_str(self, str, 1, 1);
|
722
1266
|
}
|
723
1267
|
|
724
1268
|
static VALUE
|
725
|
-
|
726
|
-
|
1269
|
+
cs_method_keep_in(VALUE self, VALUE str)
|
1270
|
+
{
|
1271
|
+
return cs_apply_to_str(self, str, 0, 0);
|
727
1272
|
}
|
728
1273
|
|
729
1274
|
static VALUE
|
730
|
-
|
731
|
-
|
1275
|
+
cs_method_keep_in_bang(VALUE self, VALUE str)
|
1276
|
+
{
|
1277
|
+
return cs_apply_to_str(self, str, 0, 1);
|
732
1278
|
}
|
733
1279
|
|
734
1280
|
static VALUE
|
735
|
-
|
736
|
-
|
1281
|
+
cs_method_allocated_length(VALUE self)
|
1282
|
+
{
|
1283
|
+
return LONG2FIX(cs_fetch_data(self)->len);
|
737
1284
|
}
|
738
1285
|
|
739
1286
|
// ****
|
740
1287
|
// init
|
741
1288
|
// ****
|
742
1289
|
|
743
|
-
void
|
744
|
-
Init_character_set()
|
1290
|
+
void Init_character_set()
|
745
1291
|
{
|
746
1292
|
VALUE cs = rb_define_class("CharacterSet", rb_cObject);
|
747
1293
|
|
748
|
-
rb_define_alloc_func(cs,
|
1294
|
+
rb_define_alloc_func(cs, cs_method_allocate);
|
749
1295
|
|
750
1296
|
// `Set` compatibility methods
|
751
1297
|
|
752
|
-
rb_define_method(cs, "each",
|
753
|
-
rb_define_method(cs, "to_a",
|
754
|
-
rb_define_method(cs, "length",
|
755
|
-
rb_define_method(cs, "size",
|
756
|
-
rb_define_method(cs, "
|
757
|
-
rb_define_method(cs, "
|
758
|
-
rb_define_method(cs, "
|
759
|
-
rb_define_method(cs, "
|
760
|
-
rb_define_method(cs, "
|
761
|
-
rb_define_method(cs, "
|
762
|
-
rb_define_method(cs, "
|
763
|
-
rb_define_method(cs, "
|
764
|
-
rb_define_method(cs, "
|
765
|
-
rb_define_method(cs, "
|
766
|
-
rb_define_method(cs, "
|
767
|
-
rb_define_method(cs, "
|
768
|
-
rb_define_method(cs, "
|
769
|
-
rb_define_method(cs, "
|
770
|
-
rb_define_method(cs, "
|
771
|
-
rb_define_method(cs, "
|
772
|
-
rb_define_method(cs, "
|
773
|
-
rb_define_method(cs, "
|
774
|
-
rb_define_method(cs, "
|
775
|
-
rb_define_method(cs, "add
|
776
|
-
rb_define_method(cs, "
|
777
|
-
rb_define_method(cs, "
|
778
|
-
rb_define_method(cs, "
|
779
|
-
rb_define_method(cs, "
|
780
|
-
rb_define_method(cs, "
|
781
|
-
rb_define_method(cs, "
|
782
|
-
rb_define_method(cs, "
|
783
|
-
rb_define_method(cs, "
|
784
|
-
rb_define_method(cs, "
|
785
|
-
rb_define_method(cs, "
|
786
|
-
rb_define_method(cs, "
|
787
|
-
rb_define_method(cs, "
|
788
|
-
rb_define_method(cs, "
|
789
|
-
rb_define_method(cs, "
|
790
|
-
rb_define_method(cs, "
|
791
|
-
rb_define_method(cs, "
|
792
|
-
rb_define_method(cs, "
|
793
|
-
rb_define_method(cs, "
|
1298
|
+
rb_define_method(cs, "each", cs_method_each, 0);
|
1299
|
+
rb_define_method(cs, "to_a", cs_method_to_a, -1);
|
1300
|
+
rb_define_method(cs, "length", cs_method_length, 0);
|
1301
|
+
rb_define_method(cs, "size", cs_method_length, 0);
|
1302
|
+
rb_define_method(cs, "empty?", cs_method_empty_p, 0);
|
1303
|
+
rb_define_method(cs, "hash", cs_method_hash, 0);
|
1304
|
+
rb_define_method(cs, "keep_if", cs_method_keep_if, 0);
|
1305
|
+
rb_define_method(cs, "delete_if", cs_method_delete_if, 0);
|
1306
|
+
rb_define_method(cs, "clear", cs_method_clear, 0);
|
1307
|
+
rb_define_method(cs, "min", cs_method_min, 0);
|
1308
|
+
rb_define_method(cs, "max", cs_method_max, 0);
|
1309
|
+
rb_define_method(cs, "minmax", cs_method_minmax, 0);
|
1310
|
+
rb_define_method(cs, "intersection", cs_method_intersection, 1);
|
1311
|
+
rb_define_method(cs, "&", cs_method_intersection, 1);
|
1312
|
+
rb_define_method(cs, "union", cs_method_union, 1);
|
1313
|
+
rb_define_method(cs, "+", cs_method_union, 1);
|
1314
|
+
rb_define_method(cs, "|", cs_method_union, 1);
|
1315
|
+
rb_define_method(cs, "difference", cs_method_difference, 1);
|
1316
|
+
rb_define_method(cs, "-", cs_method_difference, 1);
|
1317
|
+
rb_define_method(cs, "^", cs_method_exclusion, 1);
|
1318
|
+
rb_define_method(cs, "include?", cs_method_include_p, 1);
|
1319
|
+
rb_define_method(cs, "member?", cs_method_include_p, 1);
|
1320
|
+
rb_define_method(cs, "===", cs_method_include_p, 1);
|
1321
|
+
rb_define_method(cs, "add", cs_method_add, 1);
|
1322
|
+
rb_define_method(cs, "<<", cs_method_add, 1);
|
1323
|
+
rb_define_method(cs, "add?", cs_method_add_p, 1);
|
1324
|
+
rb_define_method(cs, "delete", cs_method_delete, 1);
|
1325
|
+
rb_define_method(cs, "delete?", cs_method_delete_p, 1);
|
1326
|
+
rb_define_method(cs, "intersect?", cs_method_intersect_p, 1);
|
1327
|
+
rb_define_method(cs, "disjoint?", cs_method_disjoint_p, 1);
|
1328
|
+
rb_define_method(cs, "eql?", cs_method_eql_p, 1);
|
1329
|
+
rb_define_method(cs, "==", cs_method_eql_p, 1);
|
1330
|
+
rb_define_method(cs, "merge", cs_method_merge, 1);
|
1331
|
+
rb_define_method(cs, "initialize_clone", cs_method_initialize_copy, 1);
|
1332
|
+
rb_define_method(cs, "initialize_dup", cs_method_initialize_copy, 1);
|
1333
|
+
rb_define_method(cs, "subtract", cs_method_subtract, 1);
|
1334
|
+
rb_define_method(cs, "subset?", cs_method_subset_p, 1);
|
1335
|
+
rb_define_method(cs, "<=", cs_method_subset_p, 1);
|
1336
|
+
rb_define_method(cs, "proper_subset?", cs_method_proper_subset_p, 1);
|
1337
|
+
rb_define_method(cs, "<", cs_method_proper_subset_p, 1);
|
1338
|
+
rb_define_method(cs, "superset?", cs_method_superset_p, 1);
|
1339
|
+
rb_define_method(cs, ">=", cs_method_superset_p, 1);
|
1340
|
+
rb_define_method(cs, "proper_superset?", cs_method_proper_superset_p, 1);
|
1341
|
+
rb_define_method(cs, ">", cs_method_proper_superset_p, 1);
|
794
1342
|
|
795
1343
|
// `CharacterSet`-specific methods
|
796
1344
|
|
797
|
-
rb_define_singleton_method(cs, "from_ranges",
|
798
|
-
rb_define_singleton_method(cs, "of",
|
799
|
-
|
800
|
-
rb_define_method(cs, "ranges",
|
801
|
-
rb_define_method(cs, "sample",
|
802
|
-
rb_define_method(cs, "
|
803
|
-
rb_define_method(cs, "
|
804
|
-
rb_define_method(cs, "
|
805
|
-
rb_define_method(cs, "
|
806
|
-
rb_define_method(cs, "
|
807
|
-
rb_define_method(cs, "
|
808
|
-
rb_define_method(cs, "
|
809
|
-
rb_define_method(cs, "
|
810
|
-
rb_define_method(cs, "
|
811
|
-
rb_define_method(cs, "
|
812
|
-
rb_define_method(cs, "
|
813
|
-
rb_define_method(cs, "
|
1345
|
+
rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
|
1346
|
+
rb_define_singleton_method(cs, "of", cs_class_method_of, -1);
|
1347
|
+
|
1348
|
+
rb_define_method(cs, "ranges", cs_method_ranges, 0);
|
1349
|
+
rb_define_method(cs, "sample", cs_method_sample, -1);
|
1350
|
+
rb_define_method(cs, "ext_section", cs_method_ext_section, 2);
|
1351
|
+
rb_define_method(cs, "ext_count_in_section", cs_method_ext_count_in_section, 2);
|
1352
|
+
rb_define_method(cs, "ext_section?", cs_method_ext_section_p, 2);
|
1353
|
+
rb_define_method(cs, "ext_section_ratio", cs_method_ext_section_ratio, 2);
|
1354
|
+
rb_define_method(cs, "planes", cs_method_planes, 0);
|
1355
|
+
rb_define_method(cs, "plane", cs_method_plane, 1);
|
1356
|
+
rb_define_method(cs, "member_in_plane?", cs_method_member_in_plane_p, 1);
|
1357
|
+
rb_define_method(cs, "ext_inversion", cs_method_ext_inversion, -1);
|
1358
|
+
rb_define_method(cs, "case_insensitive", cs_method_case_insensitive, 0);
|
1359
|
+
rb_define_method(cs, "count_in", cs_method_count_in, 1);
|
1360
|
+
rb_define_method(cs, "cover?", cs_method_cover_p, 1);
|
1361
|
+
rb_define_method(cs, "delete_in", cs_method_delete_in, 1);
|
1362
|
+
rb_define_method(cs, "delete_in!", cs_method_delete_in_bang, 1);
|
1363
|
+
rb_define_method(cs, "keep_in", cs_method_keep_in, 1);
|
1364
|
+
rb_define_method(cs, "keep_in!", cs_method_keep_in_bang, 1);
|
1365
|
+
rb_define_method(cs, "scan", cs_method_scan, 1);
|
1366
|
+
rb_define_method(cs, "used_by?", cs_method_used_by_p, 1);
|
1367
|
+
rb_define_method(cs, "allocated_length", cs_method_allocated_length, 0);
|
814
1368
|
}
|