character_set 1.5.0-java → 1.7.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +1 -1
- data/.github/workflows/tests.yml +6 -2
- data/BENCHMARK.md +35 -31
- data/CHANGELOG.md +30 -1
- data/Gemfile +14 -0
- data/README.md +9 -6
- data/Rakefile +2 -120
- data/character_set.gemspec +0 -21
- data/ext/character_set/character_set.c +110 -125
- data/lib/character_set/core_ext/string_ext.rb +1 -1
- data/lib/character_set/parser.rb +8 -4
- data/lib/character_set/predefined_sets/assigned.cps +73 -52
- data/lib/character_set/predefined_sets/emoji.cps +10 -9
- data/lib/character_set/predefined_sets.rb +11 -0
- data/lib/character_set/ruby_fallback/character_set_methods.rb +17 -20
- data/lib/character_set/ruby_fallback/set_methods.rb +4 -18
- data/lib/character_set/ruby_fallback/vendored_set_classes.rb +492 -0
- data/lib/character_set/ruby_fallback.rb +2 -6
- data/lib/character_set/shared_methods.rb +8 -2
- data/lib/character_set/version.rb +1 -1
- data/tasks/benchmark.rake +20 -0
- data/{benchmarks → tasks/benchmarks}/delete_in.rb +5 -1
- data/{benchmarks → tasks/benchmarks}/keep_in.rb +5 -1
- data/tasks/benchmarks/shared.rb +28 -0
- data/tasks/sync_casefold_data.rake +20 -0
- data/tasks/sync_predefined_sets.rake +9 -0
- data/tasks/sync_ruby_spec.rake +65 -0
- metadata +19 -182
- data/benchmarks/shared.rb +0 -30
- /data/{benchmarks → tasks/benchmarks}/count_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/cover.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/scan.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/used_by.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_add.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_delete.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_merge.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_minmax.rb +0 -0
@@ -82,7 +82,11 @@ static const rb_data_type_t cs_type = {
|
|
82
82
|
.dsize = cs_memsize,
|
83
83
|
},
|
84
84
|
.data = NULL,
|
85
|
+
#ifdef RUBY_TYPED_FROZEN_SHAREABLE
|
86
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY | RUBY_TYPED_FROZEN_SHAREABLE,
|
87
|
+
#else
|
85
88
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
89
|
+
#endif
|
86
90
|
};
|
87
91
|
|
88
92
|
static inline VALUE
|
@@ -315,9 +319,9 @@ cs_method_minmax(VALUE self)
|
|
315
319
|
cs_cp cp, alen, blen; \
|
316
320
|
cs_ar *acps, *bcps; \
|
317
321
|
struct cs_data *new_data; \
|
318
|
-
new_cs = cs_alloc(RBASIC(self)->klass, &new_data); \
|
319
322
|
acps = cs_fetch_cps(cs_a, &alen); \
|
320
323
|
bcps = cs_fetch_cps(cs_b, &blen); \
|
324
|
+
new_cs = cs_alloc(RBASIC(self)->klass, &new_data); \
|
321
325
|
for (cp = 0; cp < UNICODE_CP_COUNT; cp++) \
|
322
326
|
{ \
|
323
327
|
if (tst_cp(acps, alen, cp) comp_op tst_cp(bcps, blen, cp)) \
|
@@ -372,22 +376,20 @@ cs_toggle_codepoint(VALUE cs, VALUE cp_num, int on, int return_nil_if_noop)
|
|
372
376
|
cps = data->cps;
|
373
377
|
len = data->len;
|
374
378
|
cp = FIX2ULONG(cp_num);
|
375
|
-
if (return_nil_if_noop &&
|
379
|
+
if (return_nil_if_noop && tst_cp(cps, len, cp) == on)
|
376
380
|
{
|
377
381
|
return Qnil;
|
378
382
|
}
|
383
|
+
|
384
|
+
if (on)
|
385
|
+
{
|
386
|
+
set_cp(data, cp);
|
387
|
+
}
|
379
388
|
else
|
380
389
|
{
|
381
|
-
|
382
|
-
{
|
383
|
-
set_cp(data, cp);
|
384
|
-
}
|
385
|
-
else
|
386
|
-
{
|
387
|
-
clr_cp(cps, len, cp);
|
388
|
-
}
|
389
|
-
return cs;
|
390
|
+
clr_cp(cps, len, cp);
|
390
391
|
}
|
392
|
+
return cs;
|
391
393
|
}
|
392
394
|
|
393
395
|
static VALUE
|
@@ -571,7 +573,7 @@ cs_method_merge(VALUE self, VALUE other)
|
|
571
573
|
{
|
572
574
|
return cs_merge_cs(self, other);
|
573
575
|
}
|
574
|
-
|
576
|
+
if (TYPE(other) == T_ARRAY)
|
575
577
|
{
|
576
578
|
return cs_merge_rb_array(self, other);
|
577
579
|
}
|
@@ -705,8 +707,7 @@ cs_method_ranges(VALUE self)
|
|
705
707
|
|
706
708
|
if (!previous_cp_num) {
|
707
709
|
current_start = cp_num;
|
708
|
-
} else if (previous_cp_num + 2 != cp_num)
|
709
|
-
{
|
710
|
+
} else if (previous_cp_num + 2 != cp_num) {
|
710
711
|
// gap found, finalize previous range
|
711
712
|
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
712
713
|
current_start = cp_num;
|
@@ -914,10 +915,10 @@ cs_method_ext_inversion(int argc, VALUE *argv, VALUE self)
|
|
914
915
|
return new_cs;
|
915
916
|
}
|
916
917
|
|
917
|
-
typedef int (*str_cp_handler)(unsigned int, cs_ar *, cs_cp len, struct cs_data *data, VALUE
|
918
|
+
typedef int (*str_cp_handler)(unsigned int, cs_ar *, cs_cp len, struct cs_data *data, VALUE memo);
|
918
919
|
|
919
920
|
static inline int
|
920
|
-
add_str_cp_to_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
921
|
+
add_str_cp_to_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
921
922
|
{
|
922
923
|
set_cp(data, str_cp);
|
923
924
|
return 1;
|
@@ -964,7 +965,7 @@ cs_method_case_insensitive(VALUE self)
|
|
964
965
|
}
|
965
966
|
|
966
967
|
static inline VALUE
|
967
|
-
each_sb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
968
|
+
each_sb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
968
969
|
{
|
969
970
|
long i, str_len;
|
970
971
|
unsigned int str_cp;
|
@@ -983,21 +984,29 @@ each_sb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_d
|
|
983
984
|
}
|
984
985
|
|
985
986
|
static inline VALUE
|
986
|
-
each_mb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
987
|
+
each_mb_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
987
988
|
{
|
988
989
|
int n;
|
989
990
|
unsigned int str_cp;
|
990
991
|
const char *ptr, *end;
|
991
|
-
rb_encoding *
|
992
|
+
rb_encoding *utf8;
|
993
|
+
|
994
|
+
utf8 = rb_utf8_encoding();
|
995
|
+
if (rb_enc_get(str) == utf8)
|
996
|
+
{
|
997
|
+
str = rb_str_new_frozen(str);
|
998
|
+
}
|
999
|
+
else
|
1000
|
+
{
|
1001
|
+
str = rb_str_encode(str, rb_enc_from_encoding(utf8), 0, Qnil);
|
1002
|
+
}
|
992
1003
|
|
993
|
-
str = rb_str_new_frozen(str);
|
994
1004
|
ptr = RSTRING_PTR(str);
|
995
1005
|
end = RSTRING_END(str);
|
996
|
-
enc = rb_enc_get(str);
|
997
1006
|
|
998
1007
|
while (ptr < end)
|
999
1008
|
{
|
1000
|
-
str_cp = rb_enc_codepoint_len(ptr, end, &n,
|
1009
|
+
str_cp = rb_enc_codepoint_len(ptr, end, &n, utf8);
|
1001
1010
|
if (!(*func)(str_cp, cp_arr, len, data, memo))
|
1002
1011
|
{
|
1003
1012
|
return Qfalse;
|
@@ -1028,12 +1037,13 @@ single_byte_optimizable(VALUE str)
|
|
1028
1037
|
}
|
1029
1038
|
|
1030
1039
|
static inline VALUE
|
1031
|
-
each_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
1040
|
+
each_cp(VALUE str, str_cp_handler func, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
1032
1041
|
{
|
1033
1042
|
if (single_byte_optimizable(str))
|
1034
1043
|
{
|
1035
1044
|
return each_sb_cp(str, func, cp_arr, len, data, memo);
|
1036
1045
|
}
|
1046
|
+
|
1037
1047
|
return each_mb_cp(str, func, cp_arr, len, data, memo);
|
1038
1048
|
}
|
1039
1049
|
|
@@ -1047,26 +1057,23 @@ raise_arg_err_unless_string(VALUE val)
|
|
1047
1057
|
}
|
1048
1058
|
|
1049
1059
|
static VALUE
|
1050
|
-
|
1060
|
+
cs_class_method_of_string(VALUE self, VALUE string)
|
1051
1061
|
{
|
1052
1062
|
VALUE new_cs;
|
1053
1063
|
struct cs_data *new_data;
|
1054
|
-
|
1064
|
+
|
1065
|
+
raise_arg_err_unless_string(string);
|
1055
1066
|
new_cs = cs_alloc(self, &new_data);
|
1056
|
-
|
1057
|
-
{
|
1058
|
-
raise_arg_err_unless_string(argv[i]);
|
1059
|
-
each_cp(argv[i], add_str_cp_to_arr, 0, 0, new_data, 0);
|
1060
|
-
}
|
1067
|
+
each_cp(string, add_str_cp_to_arr, 0, 0, new_data, 0);
|
1061
1068
|
return new_cs;
|
1062
1069
|
}
|
1063
1070
|
|
1064
1071
|
static inline int
|
1065
|
-
count_str_cp(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
1072
|
+
count_str_cp(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
1066
1073
|
{
|
1067
1074
|
if (tst_cp(cp_arr, len, str_cp))
|
1068
1075
|
{
|
1069
|
-
*memo += 1;
|
1076
|
+
*((VALUE *)memo) += 1;
|
1070
1077
|
}
|
1071
1078
|
return 1;
|
1072
1079
|
}
|
@@ -1074,17 +1081,17 @@ count_str_cp(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data
|
|
1074
1081
|
static VALUE
|
1075
1082
|
cs_method_count_in(VALUE self, VALUE str)
|
1076
1083
|
{
|
1077
|
-
|
1084
|
+
long count;
|
1078
1085
|
struct cs_data *data;
|
1079
1086
|
raise_arg_err_unless_string(str);
|
1080
1087
|
data = cs_fetch_data(self);
|
1081
1088
|
count = 0;
|
1082
|
-
each_cp(str, count_str_cp, data->cps, data->len, data, &count);
|
1083
|
-
return
|
1089
|
+
each_cp(str, count_str_cp, data->cps, data->len, data, (VALUE)&count);
|
1090
|
+
return LONG2FIX(count);
|
1084
1091
|
}
|
1085
1092
|
|
1086
1093
|
static inline int
|
1087
|
-
str_cp_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
1094
|
+
str_cp_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
1088
1095
|
{
|
1089
1096
|
return tst_cp(cp_arr, len, str_cp);
|
1090
1097
|
}
|
@@ -1099,11 +1106,11 @@ cs_method_cover_p(VALUE self, VALUE str)
|
|
1099
1106
|
}
|
1100
1107
|
|
1101
1108
|
static inline int
|
1102
|
-
add_str_cp_to_str_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
1109
|
+
add_str_cp_to_str_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
1103
1110
|
{
|
1104
1111
|
if (tst_cp(cp_arr, len, str_cp))
|
1105
1112
|
{
|
1106
|
-
rb_ary_push(memo
|
1113
|
+
rb_ary_push(memo, rb_enc_uint_chr((int)str_cp, rb_utf8_encoding()));
|
1107
1114
|
}
|
1108
1115
|
return 1;
|
1109
1116
|
}
|
@@ -1111,18 +1118,17 @@ add_str_cp_to_str_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_d
|
|
1111
1118
|
static VALUE
|
1112
1119
|
cs_method_scan(VALUE self, VALUE str)
|
1113
1120
|
{
|
1114
|
-
VALUE memo
|
1121
|
+
VALUE memo;
|
1115
1122
|
struct cs_data *data;
|
1116
1123
|
raise_arg_err_unless_string(str);
|
1117
1124
|
data = cs_fetch_data(self);
|
1118
|
-
memo
|
1119
|
-
memo[1] = (VALUE)rb_enc_get(str);
|
1125
|
+
memo = rb_ary_new();
|
1120
1126
|
each_cp(str, add_str_cp_to_str_arr, data->cps, data->len, data, memo);
|
1121
|
-
return memo
|
1127
|
+
return memo;
|
1122
1128
|
}
|
1123
1129
|
|
1124
1130
|
static inline int
|
1125
|
-
str_cp_not_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE
|
1131
|
+
str_cp_not_in_arr(unsigned int str_cp, cs_ar *cp_arr, cs_cp len, struct cs_data *data, VALUE memo)
|
1126
1132
|
{
|
1127
1133
|
return !tst_cp(cp_arr, len, str_cp);
|
1128
1134
|
}
|
@@ -1138,116 +1144,91 @@ cs_method_used_by_p(VALUE self, VALUE str)
|
|
1138
1144
|
return only_uses_other_cps == Qfalse ? Qtrue : Qfalse;
|
1139
1145
|
}
|
1140
1146
|
|
1141
|
-
|
1142
|
-
cs_str_buf_cat(VALUE str, const char *ptr, long len)
|
1143
|
-
{
|
1144
|
-
long total, olen;
|
1145
|
-
char *sptr;
|
1146
|
-
|
1147
|
-
RSTRING_GETMEM(str, sptr, olen);
|
1148
|
-
sptr = RSTRING(str)->as.heap.ptr;
|
1149
|
-
olen = RSTRING(str)->as.heap.len;
|
1150
|
-
total = olen + len;
|
1151
|
-
memcpy(sptr + olen, ptr, len);
|
1152
|
-
RSTRING(str)->as.heap.len = total;
|
1153
|
-
}
|
1154
|
-
|
1155
|
-
#ifndef TERM_FILL
|
1156
|
-
#define TERM_FILL(ptr, termlen) \
|
1157
|
-
do \
|
1158
|
-
{ \
|
1159
|
-
char *const term_fill_ptr = (ptr); \
|
1160
|
-
const int term_fill_len = (termlen); \
|
1161
|
-
*term_fill_ptr = '\0'; \
|
1162
|
-
if (__builtin_expect(!!(term_fill_len > 1), 0)) \
|
1163
|
-
memset(term_fill_ptr, 0, term_fill_len); \
|
1164
|
-
} while (0)
|
1165
|
-
#endif
|
1166
|
-
|
1167
|
-
static void
|
1168
|
-
cs_str_buf_terminate(VALUE str, rb_encoding *enc)
|
1169
|
-
{
|
1170
|
-
char *ptr;
|
1171
|
-
long len;
|
1172
|
-
|
1173
|
-
ptr = RSTRING(str)->as.heap.ptr;
|
1174
|
-
len = RSTRING(str)->as.heap.len;
|
1175
|
-
TERM_FILL(ptr + len, rb_enc_mbminlen(enc));
|
1176
|
-
}
|
1177
|
-
|
1147
|
+
// partially based on rb_str_delete_bang
|
1178
1148
|
static inline VALUE
|
1179
1149
|
cs_apply_to_str(VALUE set, VALUE str, int delete, int bang)
|
1180
1150
|
{
|
1181
1151
|
cs_ar *cps;
|
1182
|
-
cs_cp
|
1183
|
-
|
1184
|
-
|
1185
|
-
|
1186
|
-
|
1187
|
-
|
1152
|
+
cs_cp cs_len;
|
1153
|
+
VALUE orig_str_len;
|
1154
|
+
|
1155
|
+
rb_encoding *orig_enc, *utf8;
|
1156
|
+
char *s, *send, *t;
|
1157
|
+
int orig_was_utf8, cr;
|
1188
1158
|
|
1189
1159
|
raise_arg_err_unless_string(str);
|
1190
1160
|
|
1191
|
-
|
1161
|
+
orig_str_len = RSTRING_LEN(str);
|
1162
|
+
|
1163
|
+
if (orig_str_len == 0)
|
1164
|
+
{
|
1165
|
+
return bang ? Qnil : str;
|
1166
|
+
}
|
1192
1167
|
|
1193
|
-
|
1194
|
-
|
1168
|
+
orig_enc = rb_enc_get(str);
|
1169
|
+
utf8 = rb_utf8_encoding();
|
1170
|
+
orig_was_utf8 = orig_enc == utf8;
|
1171
|
+
|
1172
|
+
if (!orig_was_utf8 && orig_enc != rb_usascii_encoding())
|
1173
|
+
{
|
1174
|
+
str = rb_str_encode(str, rb_enc_from_encoding(utf8), 0, Qnil);
|
1175
|
+
}
|
1176
|
+
else
|
1195
1177
|
{
|
1196
|
-
if (bang)
|
1178
|
+
if (!bang)
|
1197
1179
|
{
|
1198
|
-
|
1180
|
+
str = rb_str_dup(str);
|
1199
1181
|
}
|
1200
|
-
return rb_str_dup(str);
|
1201
1182
|
}
|
1202
1183
|
|
1203
|
-
|
1204
|
-
|
1205
|
-
|
1206
|
-
|
1207
|
-
|
1184
|
+
cps = cs_fetch_cps(set, &cs_len);
|
1185
|
+
rb_str_modify(str);
|
1186
|
+
s = t = RSTRING_PTR(str);
|
1187
|
+
send = RSTRING_END(str);
|
1188
|
+
cr = ENC_CODERANGE_7BIT;
|
1208
1189
|
|
1209
|
-
|
1210
|
-
end = RSTRING_END(str);
|
1211
|
-
|
1212
|
-
if (single_byte_optimizable(str))
|
1190
|
+
while (s < send)
|
1213
1191
|
{
|
1214
|
-
|
1192
|
+
unsigned int c;
|
1193
|
+
int clen;
|
1194
|
+
|
1195
|
+
if ((c = *(unsigned char *)s) < 0x80)
|
1215
1196
|
{
|
1216
|
-
|
1217
|
-
if ((!tst_cp(cps, len, str_cp)) == delete)
|
1197
|
+
if (tst_cp(cps, cs_len, c) != delete)
|
1218
1198
|
{
|
1219
|
-
|
1199
|
+
if (t != s)
|
1200
|
+
*t = c;
|
1201
|
+
t++;
|
1220
1202
|
}
|
1221
|
-
|
1203
|
+
s++;
|
1222
1204
|
}
|
1223
|
-
|
1224
|
-
else // likely to be multibyte string
|
1225
|
-
{
|
1226
|
-
while (ptr < end)
|
1205
|
+
else
|
1227
1206
|
{
|
1228
|
-
|
1229
|
-
|
1207
|
+
c = rb_enc_codepoint_len(s, send, &clen, utf8);
|
1208
|
+
|
1209
|
+
if (tst_cp(cps, cs_len, c) != delete)
|
1230
1210
|
{
|
1231
|
-
|
1211
|
+
if (t != s)
|
1212
|
+
rb_enc_mbcput(c, t, utf8);
|
1213
|
+
t += clen;
|
1214
|
+
if (cr == ENC_CODERANGE_7BIT)
|
1215
|
+
cr = ENC_CODERANGE_VALID;
|
1232
1216
|
}
|
1233
|
-
|
1217
|
+
s += clen;
|
1234
1218
|
}
|
1235
1219
|
}
|
1236
1220
|
|
1237
|
-
|
1221
|
+
rb_str_set_len(str, t - RSTRING_PTR(str));
|
1222
|
+
ENC_CODERANGE_SET(str, cr);
|
1238
1223
|
|
1239
|
-
if (bang)
|
1224
|
+
if (bang && (RSTRING_LEN(str) == (long)orig_str_len)) // string unchanged
|
1240
1225
|
{
|
1241
|
-
|
1242
|
-
{
|
1243
|
-
return Qnil;
|
1244
|
-
}
|
1245
|
-
rb_str_shared_replace(str, new_str_buf);
|
1226
|
+
return Qnil;
|
1246
1227
|
}
|
1247
|
-
|
1228
|
+
|
1229
|
+
if (!orig_was_utf8)
|
1248
1230
|
{
|
1249
|
-
|
1250
|
-
str = new_str_buf;
|
1231
|
+
return rb_str_encode(str, rb_enc_from_encoding(orig_enc), 0, Qnil);
|
1251
1232
|
}
|
1252
1233
|
|
1253
1234
|
return str;
|
@@ -1289,6 +1270,10 @@ cs_method_allocated_length(VALUE self)
|
|
1289
1270
|
|
1290
1271
|
void Init_character_set()
|
1291
1272
|
{
|
1273
|
+
#ifdef HAVE_RB_EXT_RACTOR_SAFE
|
1274
|
+
rb_ext_ractor_safe(true);
|
1275
|
+
#endif
|
1276
|
+
|
1292
1277
|
VALUE cs = rb_define_class("CharacterSet", rb_cObject);
|
1293
1278
|
|
1294
1279
|
rb_define_alloc_func(cs, cs_method_allocate);
|
@@ -1343,7 +1328,7 @@ void Init_character_set()
|
|
1343
1328
|
// `CharacterSet`-specific methods
|
1344
1329
|
|
1345
1330
|
rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
|
1346
|
-
rb_define_singleton_method(cs, "
|
1331
|
+
rb_define_singleton_method(cs, "of_string", cs_class_method_of_string, 1);
|
1347
1332
|
|
1348
1333
|
rb_define_method(cs, "ranges", cs_method_ranges, 0);
|
1349
1334
|
rb_define_method(cs, "sample", cs_method_sample, -1);
|
data/lib/character_set/parser.rb
CHANGED
@@ -4,11 +4,15 @@ class CharacterSet
|
|
4
4
|
|
5
5
|
def codepoints_from_enumerable(object)
|
6
6
|
raise ArgumentError, 'pass an Enumerable' unless object.respond_to?(:each)
|
7
|
+
|
7
8
|
# Use #each to check first element (only this works for all Enumerables)
|
8
|
-
object.each do |
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
object.each do |el| # rubocop:disable Lint/UnreachableLoop
|
10
|
+
if el.is_a?(Integer) && el >= 0 && el < 0x110000
|
11
|
+
return object
|
12
|
+
elsif el.is_a?(String) && el.length == 1
|
13
|
+
return object.to_a.join.encode('utf-8').codepoints
|
14
|
+
end
|
15
|
+
raise ArgumentError, "#{el.inspect} is not valid as a codepoint"
|
12
16
|
end
|
13
17
|
end
|
14
18
|
|