character_set 1.4.1 → 1.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitattributes +1 -1
- data/.github/workflows/gouteur.yml +20 -0
- data/.github/workflows/tests.yml +6 -2
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +7 -1
- data/BENCHMARK.md +35 -31
- data/CHANGELOG.md +32 -0
- data/Gemfile +14 -0
- data/README.md +22 -6
- data/Rakefile +5 -2
- data/benchmarks/delete_in.rb +5 -1
- data/benchmarks/keep_in.rb +5 -1
- data/character_set.gemspec +0 -13
- data/ext/character_set/character_set.c +59 -90
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/lib/character_set/core_ext/string_ext.rb +1 -1
- data/lib/character_set/expression_converter.rb +23 -23
- data/lib/character_set/predefined_sets/assigned.cps +51 -40
- data/lib/character_set/predefined_sets/emoji.cps +12 -11
- data/lib/character_set/predefined_sets.rb +11 -0
- data/lib/character_set/ruby_fallback/character_set_methods.rb +3 -3
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +15 -1
- data/lib/character_set/version.rb +1 -1
- metadata +5 -143
@@ -6,7 +6,7 @@ typedef struct casefold_mapping {
|
|
6
6
|
unsigned long to;
|
7
7
|
} casefold_mapping;
|
8
8
|
|
9
|
-
#define CASEFOLD_COUNT
|
9
|
+
#define CASEFOLD_COUNT 1426
|
10
10
|
|
11
11
|
static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
12
12
|
{0x0041,0x0061},
|
@@ -564,6 +564,41 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
564
564
|
{0x104D1,0x104F9},
|
565
565
|
{0x104D2,0x104FA},
|
566
566
|
{0x104D3,0x104FB},
|
567
|
+
{0x10570,0x10597},
|
568
|
+
{0x10571,0x10598},
|
569
|
+
{0x10572,0x10599},
|
570
|
+
{0x10573,0x1059A},
|
571
|
+
{0x10574,0x1059B},
|
572
|
+
{0x10575,0x1059C},
|
573
|
+
{0x10576,0x1059D},
|
574
|
+
{0x10577,0x1059E},
|
575
|
+
{0x10578,0x1059F},
|
576
|
+
{0x10579,0x105A0},
|
577
|
+
{0x1057A,0x105A1},
|
578
|
+
{0x1057C,0x105A3},
|
579
|
+
{0x1057D,0x105A4},
|
580
|
+
{0x1057E,0x105A5},
|
581
|
+
{0x1057F,0x105A6},
|
582
|
+
{0x10580,0x105A7},
|
583
|
+
{0x10581,0x105A8},
|
584
|
+
{0x10582,0x105A9},
|
585
|
+
{0x10583,0x105AA},
|
586
|
+
{0x10584,0x105AB},
|
587
|
+
{0x10585,0x105AC},
|
588
|
+
{0x10586,0x105AD},
|
589
|
+
{0x10587,0x105AE},
|
590
|
+
{0x10588,0x105AF},
|
591
|
+
{0x10589,0x105B0},
|
592
|
+
{0x1058A,0x105B1},
|
593
|
+
{0x1058C,0x105B3},
|
594
|
+
{0x1058D,0x105B4},
|
595
|
+
{0x1058E,0x105B5},
|
596
|
+
{0x1058F,0x105B6},
|
597
|
+
{0x10590,0x105B7},
|
598
|
+
{0x10591,0x105B8},
|
599
|
+
{0x10592,0x105B9},
|
600
|
+
{0x10594,0x105BB},
|
601
|
+
{0x10595,0x105BC},
|
567
602
|
{0x10A0,0x2D00},
|
568
603
|
{0x10A1,0x2D01},
|
569
604
|
{0x10A2,0x2D02},
|
@@ -1102,6 +1137,7 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
1102
1137
|
{0x2C2C,0x2C5C},
|
1103
1138
|
{0x2C2D,0x2C5D},
|
1104
1139
|
{0x2C2E,0x2C5E},
|
1140
|
+
{0x2C2F,0x2C5F},
|
1105
1141
|
{0x2C60,0x2C61},
|
1106
1142
|
{0x2C62,0x026B},
|
1107
1143
|
{0x2C63,0x1D7D},
|
@@ -1282,10 +1318,17 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
1282
1318
|
{0xA7BA,0xA7BB},
|
1283
1319
|
{0xA7BC,0xA7BD},
|
1284
1320
|
{0xA7BE,0xA7BF},
|
1321
|
+
{0xA7C0,0xA7C1},
|
1285
1322
|
{0xA7C2,0xA7C3},
|
1286
1323
|
{0xA7C4,0xA794},
|
1287
1324
|
{0xA7C5,0x0282},
|
1288
1325
|
{0xA7C6,0x1D8E},
|
1326
|
+
{0xA7C7,0xA7C8},
|
1327
|
+
{0xA7C9,0xA7CA},
|
1328
|
+
{0xA7D0,0xA7D1},
|
1329
|
+
{0xA7D6,0xA7D7},
|
1330
|
+
{0xA7D8,0xA7D9},
|
1331
|
+
{0xA7F5,0xA7F6},
|
1289
1332
|
{0xAB70,0x13A0},
|
1290
1333
|
{0xAB71,0x13A1},
|
1291
1334
|
{0xAB72,0x13A2},
|
@@ -4,7 +4,7 @@ class CharacterSet
|
|
4
4
|
|
5
5
|
Error = Class.new(ArgumentError)
|
6
6
|
|
7
|
-
def convert(expression)
|
7
|
+
def convert(expression, to = CharacterSet)
|
8
8
|
CharacterSet.require_optional_dependency('regexp_parser', __method__)
|
9
9
|
|
10
10
|
case expression
|
@@ -12,49 +12,49 @@ class CharacterSet
|
|
12
12
|
if expression.count != 1
|
13
13
|
raise Error, 'Pass a Regexp with exactly one expression, e.g. /[a-z]/'
|
14
14
|
end
|
15
|
-
convert(expression[0])
|
15
|
+
convert(expression[0], to)
|
16
16
|
|
17
17
|
when Regexp::Expression::CharacterSet
|
18
|
-
content = expression.map { |subexp| convert(subexp) }.reduce(:+)
|
19
|
-
content ||=
|
18
|
+
content = expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
19
|
+
content ||= to[]
|
20
20
|
expression.negative? ? content.inversion : content
|
21
21
|
|
22
22
|
when Regexp::Expression::CharacterSet::Intersection
|
23
|
-
expression.map { |subexp| convert(subexp) }.reduce(:&)
|
23
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:&)
|
24
24
|
|
25
25
|
when Regexp::Expression::CharacterSet::IntersectedSequence
|
26
|
-
expression.map { |subexp| convert(subexp) }.reduce(:+) ||
|
26
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:+) || to[]
|
27
27
|
|
28
28
|
when Regexp::Expression::CharacterSet::Range
|
29
|
-
start, finish = expression.map { |subexp| convert(subexp) }
|
30
|
-
|
29
|
+
start, finish = expression.map { |subexp| convert(subexp, to) }
|
30
|
+
to.new((start.min)..(finish.max))
|
31
31
|
|
32
32
|
when Regexp::Expression::CharacterType::Any
|
33
|
-
|
33
|
+
to.unicode
|
34
34
|
|
35
35
|
when Regexp::Expression::CharacterType::Base
|
36
36
|
/(?<negative>non)?(?<base_name>.+)/ =~ expression.token
|
37
37
|
content =
|
38
38
|
if expression.unicode_classes?
|
39
39
|
# in u-mode, type shortcuts match the same as \p{<long type name>}
|
40
|
-
|
40
|
+
to.of_property(base_name)
|
41
41
|
else
|
42
42
|
# in normal mode, types match only ascii chars
|
43
43
|
case base_name.to_sym
|
44
|
-
when :digit then
|
45
|
-
when :hex then
|
46
|
-
when :space then
|
47
|
-
when :word then
|
44
|
+
when :digit then to.from_ranges(48..57)
|
45
|
+
when :hex then to.from_ranges(48..57, 65..70, 97..102)
|
46
|
+
when :space then to.from_ranges(9..13, 32..32)
|
47
|
+
when :word then to.from_ranges(48..57, 65..90, 95..95, 97..122)
|
48
48
|
else raise Error, "Unsupported CharacterType #{base_name}"
|
49
49
|
end
|
50
50
|
end
|
51
51
|
negative ? content.inversion : content
|
52
52
|
|
53
53
|
when Regexp::Expression::EscapeSequence::CodepointList
|
54
|
-
|
54
|
+
to.new(expression.codepoints)
|
55
55
|
|
56
56
|
when Regexp::Expression::EscapeSequence::Base
|
57
|
-
|
57
|
+
to[expression.codepoint]
|
58
58
|
|
59
59
|
when Regexp::Expression::Group::Capture,
|
60
60
|
Regexp::Expression::Group::Passive,
|
@@ -62,19 +62,19 @@ class CharacterSet
|
|
62
62
|
Regexp::Expression::Group::Atomic,
|
63
63
|
Regexp::Expression::Group::Options
|
64
64
|
case expression.count
|
65
|
-
when 0 then
|
66
|
-
when 1 then convert(expression.first)
|
65
|
+
when 0 then to[]
|
66
|
+
when 1 then convert(expression.first, to)
|
67
67
|
else
|
68
68
|
raise Error, 'Groups must contain exactly one expression, e.g. ([a-z])'
|
69
69
|
end
|
70
70
|
|
71
71
|
when Regexp::Expression::Alternation # rubocop:disable Lint/DuplicateBranch
|
72
|
-
expression.map { |subexp| convert(subexp) }.reduce(:+)
|
72
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
73
73
|
|
74
74
|
when Regexp::Expression::Alternative
|
75
75
|
case expression.count
|
76
|
-
when 0 then
|
77
|
-
when 1 then convert(expression.first)
|
76
|
+
when 0 then to[]
|
77
|
+
when 1 then convert(expression.first, to)
|
78
78
|
else
|
79
79
|
raise Error, 'Alternatives must contain exactly one expression'
|
80
80
|
end
|
@@ -83,11 +83,11 @@ class CharacterSet
|
|
83
83
|
if expression.set_level == 0 && expression.text.size != 1
|
84
84
|
raise Error, 'Literal runs outside of sets are codepoint *sequences*'
|
85
85
|
end
|
86
|
-
|
86
|
+
to[expression.text.ord]
|
87
87
|
|
88
88
|
when Regexp::Expression::UnicodeProperty::Base,
|
89
89
|
Regexp::Expression::PosixClass
|
90
|
-
content =
|
90
|
+
content = to.of_property(expression.token)
|
91
91
|
if expression.type == :posixclass && expression.ascii_classes?
|
92
92
|
content = content.ascii_part
|
93
93
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
85E,85E
|
22
22
|
860,86A
|
23
23
|
8A0,8B4
|
24
|
-
8B6,
|
24
|
+
8B6,8C7
|
25
25
|
8D3,983
|
26
26
|
985,98C
|
27
27
|
98F,990
|
@@ -76,7 +76,7 @@ B35,B39
|
|
76
76
|
B3C,B44
|
77
77
|
B47,B48
|
78
78
|
B4B,B4D
|
79
|
-
|
79
|
+
B55,B57
|
80
80
|
B5C,B5D
|
81
81
|
B5F,B63
|
82
82
|
B66,B77
|
@@ -120,15 +120,14 @@ CDE,CDE
|
|
120
120
|
CE0,CE3
|
121
121
|
CE6,CEF
|
122
122
|
CF1,CF2
|
123
|
-
D00,
|
124
|
-
D05,D0C
|
123
|
+
D00,D0C
|
125
124
|
D0E,D10
|
126
125
|
D12,D44
|
127
126
|
D46,D48
|
128
127
|
D4A,D4F
|
129
128
|
D54,D63
|
130
129
|
D66,D7F
|
131
|
-
|
130
|
+
D81,D83
|
132
131
|
D85,D96
|
133
132
|
D9A,DB1
|
134
133
|
DB3,DBB
|
@@ -214,7 +213,7 @@ FCE,FDA
|
|
214
213
|
1A7F,1A89
|
215
214
|
1A90,1A99
|
216
215
|
1AA0,1AAD
|
217
|
-
1AB0,
|
216
|
+
1AB0,1AC0
|
218
217
|
1B00,1B4B
|
219
218
|
1B50,1B7C
|
220
219
|
1B80,1BF3
|
@@ -252,7 +251,7 @@ FCE,FDA
|
|
252
251
|
2440,244A
|
253
252
|
2460,2B73
|
254
253
|
2B76,2B95
|
255
|
-
|
254
|
+
2B97,2C2E
|
256
255
|
2C30,2C5E
|
257
256
|
2C60,2CF3
|
258
257
|
2CF9,2D25
|
@@ -269,7 +268,7 @@ FCE,FDA
|
|
269
268
|
2DC8,2DCE
|
270
269
|
2DD0,2DD6
|
271
270
|
2DD8,2DDE
|
272
|
-
2DE0,
|
271
|
+
2DE0,2E52
|
273
272
|
2E80,2E99
|
274
273
|
2E9B,2EF3
|
275
274
|
2F00,2FD5
|
@@ -279,18 +278,16 @@ FCE,FDA
|
|
279
278
|
3099,30FF
|
280
279
|
3105,312F
|
281
280
|
3131,318E
|
282
|
-
3190,
|
283
|
-
31C0,31E3
|
281
|
+
3190,31E3
|
284
282
|
31F0,321E
|
285
|
-
3220,
|
286
|
-
4DC0,9FEF
|
283
|
+
3220,9FFC
|
287
284
|
A000,A48C
|
288
285
|
A490,A4C6
|
289
286
|
A4D0,A62B
|
290
287
|
A640,A6F7
|
291
288
|
A700,A7BF
|
292
|
-
A7C2,
|
293
|
-
|
289
|
+
A7C2,A7CA
|
290
|
+
A7F5,A82C
|
294
291
|
A830,A839
|
295
292
|
A840,A877
|
296
293
|
A880,A8C5
|
@@ -310,7 +307,7 @@ AB09,AB0E
|
|
310
307
|
AB11,AB16
|
311
308
|
AB20,AB26
|
312
309
|
AB28,AB2E
|
313
|
-
AB30,
|
310
|
+
AB30,AB6B
|
314
311
|
AB70,ABED
|
315
312
|
ABF0,ABF9
|
316
313
|
AC00,D7A3
|
@@ -355,7 +352,7 @@ FFF9,FFFD
|
|
355
352
|
10100,10102
|
356
353
|
10107,10133
|
357
354
|
10137,1018E
|
358
|
-
10190,
|
355
|
+
10190,1019C
|
359
356
|
101A0,101A0
|
360
357
|
101D0,101FD
|
361
358
|
10280,1029C
|
@@ -415,8 +412,12 @@ FFF9,FFFD
|
|
415
412
|
10CFA,10D27
|
416
413
|
10D30,10D39
|
417
414
|
10E60,10E7E
|
415
|
+
10E80,10EA9
|
416
|
+
10EAB,10EAD
|
417
|
+
10EB0,10EB1
|
418
418
|
10F00,10F27
|
419
419
|
10F30,10F59
|
420
|
+
10FB0,10FCB
|
420
421
|
10FE0,10FF6
|
421
422
|
11000,1104D
|
422
423
|
11052,1106F
|
@@ -425,10 +426,9 @@ FFF9,FFFD
|
|
425
426
|
110D0,110E8
|
426
427
|
110F0,110F9
|
427
428
|
11100,11134
|
428
|
-
11136,
|
429
|
+
11136,11147
|
429
430
|
11150,11176
|
430
|
-
11180,
|
431
|
-
111D0,111DF
|
431
|
+
11180,111DF
|
432
432
|
111E1,111F4
|
433
433
|
11200,11211
|
434
434
|
11213,1123E
|
@@ -454,9 +454,8 @@ FFF9,FFFD
|
|
454
454
|
1135D,11363
|
455
455
|
11366,1136C
|
456
456
|
11370,11374
|
457
|
-
11400,
|
458
|
-
|
459
|
-
1145D,1145F
|
457
|
+
11400,1145B
|
458
|
+
1145D,11461
|
460
459
|
11480,114C7
|
461
460
|
114D0,114D9
|
462
461
|
11580,115B5
|
@@ -471,7 +470,14 @@ FFF9,FFFD
|
|
471
470
|
11730,1173F
|
472
471
|
11800,1183B
|
473
472
|
118A0,118F2
|
474
|
-
118FF,
|
473
|
+
118FF,11906
|
474
|
+
11909,11909
|
475
|
+
1190C,11913
|
476
|
+
11915,11916
|
477
|
+
11918,11935
|
478
|
+
11937,11938
|
479
|
+
1193B,11946
|
480
|
+
11950,11959
|
475
481
|
119A0,119A7
|
476
482
|
119AA,119D7
|
477
483
|
119DA,119E4
|
@@ -499,6 +505,7 @@ FFF9,FFFD
|
|
499
505
|
11D93,11D98
|
500
506
|
11DA0,11DA9
|
501
507
|
11EE0,11EF8
|
508
|
+
11FB0,11FB0
|
502
509
|
11FC0,11FF1
|
503
510
|
11FFF,12399
|
504
511
|
12400,1246E
|
@@ -522,9 +529,11 @@ FFF9,FFFD
|
|
522
529
|
16F00,16F4A
|
523
530
|
16F4F,16F87
|
524
531
|
16F8F,16F9F
|
525
|
-
16FE0,
|
532
|
+
16FE0,16FE4
|
533
|
+
16FF0,16FF1
|
526
534
|
17000,187F7
|
527
|
-
18800,
|
535
|
+
18800,18CD5
|
536
|
+
18D00,18D08
|
528
537
|
1B000,1B11E
|
529
538
|
1B150,1B152
|
530
539
|
1B164,1B167
|
@@ -622,17 +631,15 @@ FFF9,FFFD
|
|
622
631
|
1F0B1,1F0BF
|
623
632
|
1F0C1,1F0CF
|
624
633
|
1F0D1,1F0F5
|
625
|
-
1F100,
|
626
|
-
1F110,1F16C
|
627
|
-
1F170,1F1AC
|
634
|
+
1F100,1F1AD
|
628
635
|
1F1E6,1F202
|
629
636
|
1F210,1F23B
|
630
637
|
1F240,1F248
|
631
638
|
1F250,1F251
|
632
639
|
1F260,1F265
|
633
|
-
1F300,
|
640
|
+
1F300,1F6D7
|
634
641
|
1F6E0,1F6EC
|
635
|
-
1F6F0,
|
642
|
+
1F6F0,1F6FC
|
636
643
|
1F700,1F773
|
637
644
|
1F780,1F7D8
|
638
645
|
1F7E0,1F7EB
|
@@ -641,24 +648,28 @@ FFF9,FFFD
|
|
641
648
|
1F850,1F859
|
642
649
|
1F860,1F887
|
643
650
|
1F890,1F8AD
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
1F97A,1F9A2
|
648
|
-
1F9A5,1F9AA
|
649
|
-
1F9AE,1F9CA
|
651
|
+
1F8B0,1F8B1
|
652
|
+
1F900,1F978
|
653
|
+
1F97A,1F9CB
|
650
654
|
1F9CD,1FA53
|
651
655
|
1FA60,1FA6D
|
652
|
-
1FA70,
|
656
|
+
1FA70,1FA74
|
653
657
|
1FA78,1FA7A
|
654
|
-
1FA80,
|
655
|
-
1FA90,
|
656
|
-
|
658
|
+
1FA80,1FA86
|
659
|
+
1FA90,1FAA8
|
660
|
+
1FAB0,1FAB6
|
661
|
+
1FAC0,1FAC2
|
662
|
+
1FAD0,1FAD6
|
663
|
+
1FB00,1FB92
|
664
|
+
1FB94,1FBCA
|
665
|
+
1FBF0,1FBF9
|
666
|
+
20000,2A6DD
|
657
667
|
2A700,2B734
|
658
668
|
2B740,2B81D
|
659
669
|
2B820,2CEA1
|
660
670
|
2CEB0,2EBE0
|
661
671
|
2F800,2FA1D
|
672
|
+
30000,3134A
|
662
673
|
E0001,E0001
|
663
674
|
E0020,E007F
|
664
675
|
E0100,E01EF
|
@@ -44,6 +44,7 @@ AE,AE
|
|
44
44
|
2699,2699
|
45
45
|
269B,269C
|
46
46
|
26A0,26A1
|
47
|
+
26A7,26A7
|
47
48
|
26AA,26AB
|
48
49
|
26B0,26B1
|
49
50
|
26BD,26BE
|
@@ -130,22 +131,22 @@ AE,AE
|
|
130
131
|
1F5FA,1F64F
|
131
132
|
1F680,1F6C5
|
132
133
|
1F6CB,1F6D2
|
133
|
-
1F6D5,
|
134
|
+
1F6D5,1F6D7
|
134
135
|
1F6E0,1F6E5
|
135
136
|
1F6E9,1F6E9
|
136
137
|
1F6EB,1F6EC
|
137
138
|
1F6F0,1F6F0
|
138
|
-
1F6F3,
|
139
|
+
1F6F3,1F6FC
|
139
140
|
1F7E0,1F7EB
|
140
|
-
|
141
|
+
1F90C,1F93A
|
141
142
|
1F93C,1F945
|
142
|
-
1F947,
|
143
|
-
|
144
|
-
1F97A,1F9A2
|
145
|
-
1F9A5,1F9AA
|
146
|
-
1F9AE,1F9CA
|
143
|
+
1F947,1F978
|
144
|
+
1F97A,1F9CB
|
147
145
|
1F9CD,1F9FF
|
148
|
-
1FA70,
|
146
|
+
1FA70,1FA74
|
149
147
|
1FA78,1FA7A
|
150
|
-
1FA80,
|
151
|
-
1FA90,
|
148
|
+
1FA80,1FA86
|
149
|
+
1FA90,1FAA8
|
150
|
+
1FAB0,1FAB6
|
151
|
+
1FAC0,1FAC2
|
152
|
+
1FAD0,1FAD6
|
@@ -22,6 +22,17 @@ class CharacterSet
|
|
22
22
|
alias valid unicode
|
23
23
|
|
24
24
|
def build_from_cps_file(path)
|
25
|
+
if defined?(Ractor) && Ractor.current != Ractor.main
|
26
|
+
raise <<-EOS.gsub(/^ */, '')
|
27
|
+
CharacterSet's predefined sets are lazy-loaded.
|
28
|
+
Pre-load them to use them in Ractors. E.g.:
|
29
|
+
|
30
|
+
CharacterSet.ascii # pre-load
|
31
|
+
Ractor.new { CharacterSet.ascii.size }.take # => 128
|
32
|
+
Ractor.new { 'abc'.keep_character_set(:ascii) }.take # => 'abc'
|
33
|
+
EOS
|
34
|
+
end
|
35
|
+
|
25
36
|
File.readlines(path).inject(new) do |set, line|
|
26
37
|
range_start, range_end = line.split(',')
|
27
38
|
set.merge((range_start.to_i(16))..(range_end.to_i(16)))
|
@@ -6,9 +6,9 @@ class CharacterSet
|
|
6
6
|
new(Array(ranges).flat_map(&:to_a))
|
7
7
|
end
|
8
8
|
|
9
|
-
def
|
10
|
-
raise ArgumentError, 'pass a String' unless
|
11
|
-
|
9
|
+
def of_string(str)
|
10
|
+
raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
|
11
|
+
str.codepoints.each_with_object(new) { |cp, set| set << cp }
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -22,13 +22,14 @@ class CharacterSet
|
|
22
22
|
|
23
23
|
# Allow some methods to take an Enum just as well as another CharacterSet.
|
24
24
|
# Tested by ruby-spec.
|
25
|
-
%w[& + - ^ | difference
|
25
|
+
%w[& + - ^ | difference disjoint? intersect? intersection
|
26
|
+
subtract union].each do |method|
|
26
27
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
27
28
|
def #{method}(arg)
|
28
29
|
if arg.is_a?(CharacterSet)
|
29
|
-
super
|
30
|
+
super(arg)
|
30
31
|
elsif arg.respond_to?(:each)
|
31
|
-
super(
|
32
|
+
super(self.class.new(arg.to_a))
|
32
33
|
else
|
33
34
|
raise ArgumentError, 'pass an enumerable'
|
34
35
|
end
|
@@ -15,6 +15,12 @@ class CharacterSet
|
|
15
15
|
new(Array(args))
|
16
16
|
end
|
17
17
|
|
18
|
+
def of(*args)
|
19
|
+
args.map do |arg|
|
20
|
+
arg.is_a?(Regexp) ? of_regexp(arg) : of_string(arg)
|
21
|
+
end.reduce(:merge) || new
|
22
|
+
end
|
23
|
+
|
18
24
|
def parse(string)
|
19
25
|
codepoints = Parser.codepoints_from_bracket_expression(string)
|
20
26
|
result = new(codepoints)
|
@@ -36,7 +42,7 @@ class CharacterSet
|
|
36
42
|
end
|
37
43
|
|
38
44
|
def of_expression(expression)
|
39
|
-
ExpressionConverter.convert(expression)
|
45
|
+
ExpressionConverter.convert(expression, self)
|
40
46
|
end
|
41
47
|
|
42
48
|
def require_optional_dependency(name, method)
|
@@ -90,6 +96,14 @@ class CharacterSet
|
|
90
96
|
Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
|
91
97
|
end
|
92
98
|
|
99
|
+
def secure_token(length = 32)
|
100
|
+
CharacterSet.require_optional_dependency('securerandom', __method__)
|
101
|
+
cps = to_a
|
102
|
+
len = cps.count
|
103
|
+
1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
|
104
|
+
end
|
105
|
+
alias random_token secure_token
|
106
|
+
|
93
107
|
def inspect
|
94
108
|
len = length
|
95
109
|
"#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
|