character_set 1.4.1-java → 1.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/gouteur.yml +20 -0
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +7 -1
- data/CHANGELOG.md +17 -0
- data/README.md +14 -1
- data/character_set.gemspec +2 -1
- data/ext/character_set/character_set.c +10 -5
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/lib/character_set/expression_converter.rb +23 -23
- data/lib/character_set/predefined_sets/assigned.cps +51 -40
- data/lib/character_set/predefined_sets/emoji.cps +12 -11
- data/lib/character_set/ruby_fallback/character_set_methods.rb +7 -3
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +9 -1
- data/lib/character_set/version.rb +1 -1
- metadata +21 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 52823f4f35cdec44378c3828b4b38eba1f9f5bce402a70962eae1fb786132d8c
|
4
|
+
data.tar.gz: b1d6419575a3614675c194cbfde8530be02195cc73365a8c6ca446dd6ea909e6
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: c021975f912100174a5274454cfc6099a0955262e7e5fac619989a0a2aa5d624e048fe8b5f68b167157aca425c771df1bf137be12924b609b8d30dff1608142e
|
7
|
+
data.tar.gz: 1b702ea538bc5a5209c3544c88c9b38d328080db52640bb4a5780454d296970d8c2557ebe9c3cdd014e3a20af254c77fe694a9f56f09f3f29c039aef81dc381f
|
@@ -0,0 +1,20 @@
|
|
1
|
+
name: gouteur
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
|
9
|
+
steps:
|
10
|
+
- uses: actions/checkout@v2
|
11
|
+
- name: Set up Ruby
|
12
|
+
uses: ruby/setup-ruby@v1
|
13
|
+
with:
|
14
|
+
ruby-version: 2.7
|
15
|
+
- name: Prepare
|
16
|
+
run: |
|
17
|
+
bundle install --jobs 4
|
18
|
+
bundle exec rake compile
|
19
|
+
- name: Test
|
20
|
+
run: bundle exec gouteur
|
data/.gitignore
CHANGED
data/.gouteur.yml
ADDED
data/.rubocop.yml
CHANGED
@@ -8,4 +8,10 @@ AllCops:
|
|
8
8
|
RubyInterpreters:
|
9
9
|
- ruby
|
10
10
|
- rake
|
11
|
-
TargetRubyVersion: 2.
|
11
|
+
TargetRubyVersion: 2.5 # really 2.1, but 2.5 is lowest supported by rubocop
|
12
|
+
|
13
|
+
Lint/AmbiguousOperatorPrecedence:
|
14
|
+
Enabled: false
|
15
|
+
|
16
|
+
Lint/AmbiguousRegexpLiteral:
|
17
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## [1.5.0] - 2021-12-05
|
8
|
+
|
9
|
+
### Added
|
10
|
+
|
11
|
+
- new codepoints for `::assigned` and `::emoji` predefined sets, as in Ruby 3.1.0
|
12
|
+
- latest unicode case-folding data (for `#case_insensitive`)
|
13
|
+
- support for passing any Enumerable to `#disjoint?`, `#intersect?`
|
14
|
+
- this matches recent broadening of these methods in `ruby/set`
|
15
|
+
- new instance method `#secure_token` (see README)
|
16
|
+
- class method `::of` now accepts more than one `String`
|
17
|
+
- `CharacterSet::ExpressionConverter` can now build output of any Set-like class
|
18
|
+
|
19
|
+
### Fixed
|
20
|
+
|
21
|
+
- `CharacterSet::Pure::of_expression` now returns a `CharacterSet::Pure`
|
22
|
+
- it used to return a regular `CharacterSet`
|
23
|
+
|
7
24
|
## [1.4.1] - 2020-01-10
|
8
25
|
|
9
26
|
### Fixed
|
data/README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
[](http://badge.fury.io/rb/character_set)
|
4
4
|
[](https://github.com/jaynetics/character_set/actions)
|
5
|
+
[](https://github.com/jaynetics/character_set/actions)
|
5
6
|
[](https://codecov.io/gh/jaynetics/character_set)
|
6
7
|
|
7
8
|
This is a C-extended Ruby gem to work with sets of Unicode codepoints. It can read and write these sets in various formats and implements the stdlib `Set` interface for them.
|
@@ -10,6 +11,7 @@ It also offers an alternate paradigm of `String` processing which grants much be
|
|
10
11
|
|
11
12
|
Many parts can be used independently, e.g.:
|
12
13
|
- `CharacterSet::Character`
|
14
|
+
- `CharacterSet::ExpressionConverter`
|
13
15
|
- `CharacterSet::Parser`
|
14
16
|
- `CharacterSet::Writer`
|
15
17
|
- [`RangeCompressor`](https://github.com/jaynetics/range_compressor)
|
@@ -181,7 +183,18 @@ set.to_s_with_surrogate_alternation
|
|
181
183
|
# => '(?:[ab]|\uD83E\uDD29|\uD83E\uDD2A|\uD83E\uDD2B)'
|
182
184
|
```
|
183
185
|
|
184
|
-
###
|
186
|
+
### Other features
|
187
|
+
|
188
|
+
#### Secure tokens
|
189
|
+
|
190
|
+
Generate secure random strings of characters from a set:
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
CharacterSet.new('a'..'z').secure_token(8) # => "ugwpujmt"
|
194
|
+
CharacterSet.crypt.secure_token # => "8.1w7aBT737/pMfcMoO4y2y8/=0xtmo:"
|
195
|
+
```
|
196
|
+
|
197
|
+
#### Unicode planes
|
185
198
|
|
186
199
|
There are some methods to check for planes and to handle ASCII, [BMP](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) and astral parts:
|
187
200
|
```Ruby
|
data/character_set.gemspec
CHANGED
@@ -34,11 +34,12 @@ Gem::Specification.new do |s|
|
|
34
34
|
s.add_development_dependency 'rake', '~> 13.0'
|
35
35
|
s.add_development_dependency 'rake-compiler', '~> 1.1'
|
36
36
|
s.add_development_dependency 'range_compressor', '~> 1.0'
|
37
|
-
s.add_development_dependency 'regexp_parser', '~> 1
|
37
|
+
s.add_development_dependency 'regexp_parser', '~> 2.1'
|
38
38
|
s.add_development_dependency 'regexp_property_values', '~> 1.0'
|
39
39
|
s.add_development_dependency 'rspec', '~> 3.8'
|
40
40
|
if RUBY_VERSION.to_f >= 2.7
|
41
41
|
s.add_development_dependency 'codecov', '~> 0.2.12'
|
42
|
+
s.add_development_dependency 'gouteur', '~> 1.0.0'
|
42
43
|
s.add_development_dependency 'rubocop', '~> 1.8'
|
43
44
|
end
|
44
45
|
end
|
@@ -705,7 +705,8 @@ cs_method_ranges(VALUE self)
|
|
705
705
|
|
706
706
|
if (!previous_cp_num) {
|
707
707
|
current_start = cp_num;
|
708
|
-
} else if (previous_cp_num + 2 != cp_num)
|
708
|
+
} else if (previous_cp_num + 2 != cp_num)
|
709
|
+
{
|
709
710
|
// gap found, finalize previous range
|
710
711
|
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
711
712
|
current_start = cp_num;
|
@@ -1046,13 +1047,17 @@ raise_arg_err_unless_string(VALUE val)
|
|
1046
1047
|
}
|
1047
1048
|
|
1048
1049
|
static VALUE
|
1049
|
-
cs_class_method_of(VALUE
|
1050
|
+
cs_class_method_of(int argc, VALUE *argv, VALUE self)
|
1050
1051
|
{
|
1051
1052
|
VALUE new_cs;
|
1052
1053
|
struct cs_data *new_data;
|
1054
|
+
int i;
|
1053
1055
|
new_cs = cs_alloc(self, &new_data);
|
1054
|
-
|
1055
|
-
|
1056
|
+
for (i = 0; i < argc; i++)
|
1057
|
+
{
|
1058
|
+
raise_arg_err_unless_string(argv[i]);
|
1059
|
+
each_cp(argv[i], add_str_cp_to_arr, 0, 0, new_data, 0);
|
1060
|
+
}
|
1056
1061
|
return new_cs;
|
1057
1062
|
}
|
1058
1063
|
|
@@ -1338,7 +1343,7 @@ void Init_character_set()
|
|
1338
1343
|
// `CharacterSet`-specific methods
|
1339
1344
|
|
1340
1345
|
rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
|
1341
|
-
rb_define_singleton_method(cs, "of", cs_class_method_of, 1);
|
1346
|
+
rb_define_singleton_method(cs, "of", cs_class_method_of, -1);
|
1342
1347
|
|
1343
1348
|
rb_define_method(cs, "ranges", cs_method_ranges, 0);
|
1344
1349
|
rb_define_method(cs, "sample", cs_method_sample, -1);
|
@@ -6,7 +6,7 @@ typedef struct casefold_mapping {
|
|
6
6
|
unsigned long to;
|
7
7
|
} casefold_mapping;
|
8
8
|
|
9
|
-
#define CASEFOLD_COUNT
|
9
|
+
#define CASEFOLD_COUNT 1426
|
10
10
|
|
11
11
|
static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
12
12
|
{0x0041,0x0061},
|
@@ -564,6 +564,41 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
564
564
|
{0x104D1,0x104F9},
|
565
565
|
{0x104D2,0x104FA},
|
566
566
|
{0x104D3,0x104FB},
|
567
|
+
{0x10570,0x10597},
|
568
|
+
{0x10571,0x10598},
|
569
|
+
{0x10572,0x10599},
|
570
|
+
{0x10573,0x1059A},
|
571
|
+
{0x10574,0x1059B},
|
572
|
+
{0x10575,0x1059C},
|
573
|
+
{0x10576,0x1059D},
|
574
|
+
{0x10577,0x1059E},
|
575
|
+
{0x10578,0x1059F},
|
576
|
+
{0x10579,0x105A0},
|
577
|
+
{0x1057A,0x105A1},
|
578
|
+
{0x1057C,0x105A3},
|
579
|
+
{0x1057D,0x105A4},
|
580
|
+
{0x1057E,0x105A5},
|
581
|
+
{0x1057F,0x105A6},
|
582
|
+
{0x10580,0x105A7},
|
583
|
+
{0x10581,0x105A8},
|
584
|
+
{0x10582,0x105A9},
|
585
|
+
{0x10583,0x105AA},
|
586
|
+
{0x10584,0x105AB},
|
587
|
+
{0x10585,0x105AC},
|
588
|
+
{0x10586,0x105AD},
|
589
|
+
{0x10587,0x105AE},
|
590
|
+
{0x10588,0x105AF},
|
591
|
+
{0x10589,0x105B0},
|
592
|
+
{0x1058A,0x105B1},
|
593
|
+
{0x1058C,0x105B3},
|
594
|
+
{0x1058D,0x105B4},
|
595
|
+
{0x1058E,0x105B5},
|
596
|
+
{0x1058F,0x105B6},
|
597
|
+
{0x10590,0x105B7},
|
598
|
+
{0x10591,0x105B8},
|
599
|
+
{0x10592,0x105B9},
|
600
|
+
{0x10594,0x105BB},
|
601
|
+
{0x10595,0x105BC},
|
567
602
|
{0x10A0,0x2D00},
|
568
603
|
{0x10A1,0x2D01},
|
569
604
|
{0x10A2,0x2D02},
|
@@ -1102,6 +1137,7 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
1102
1137
|
{0x2C2C,0x2C5C},
|
1103
1138
|
{0x2C2D,0x2C5D},
|
1104
1139
|
{0x2C2E,0x2C5E},
|
1140
|
+
{0x2C2F,0x2C5F},
|
1105
1141
|
{0x2C60,0x2C61},
|
1106
1142
|
{0x2C62,0x026B},
|
1107
1143
|
{0x2C63,0x1D7D},
|
@@ -1282,10 +1318,17 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
1282
1318
|
{0xA7BA,0xA7BB},
|
1283
1319
|
{0xA7BC,0xA7BD},
|
1284
1320
|
{0xA7BE,0xA7BF},
|
1321
|
+
{0xA7C0,0xA7C1},
|
1285
1322
|
{0xA7C2,0xA7C3},
|
1286
1323
|
{0xA7C4,0xA794},
|
1287
1324
|
{0xA7C5,0x0282},
|
1288
1325
|
{0xA7C6,0x1D8E},
|
1326
|
+
{0xA7C7,0xA7C8},
|
1327
|
+
{0xA7C9,0xA7CA},
|
1328
|
+
{0xA7D0,0xA7D1},
|
1329
|
+
{0xA7D6,0xA7D7},
|
1330
|
+
{0xA7D8,0xA7D9},
|
1331
|
+
{0xA7F5,0xA7F6},
|
1289
1332
|
{0xAB70,0x13A0},
|
1290
1333
|
{0xAB71,0x13A1},
|
1291
1334
|
{0xAB72,0x13A2},
|
@@ -4,7 +4,7 @@ class CharacterSet
|
|
4
4
|
|
5
5
|
Error = Class.new(ArgumentError)
|
6
6
|
|
7
|
-
def convert(expression)
|
7
|
+
def convert(expression, to = CharacterSet)
|
8
8
|
CharacterSet.require_optional_dependency('regexp_parser', __method__)
|
9
9
|
|
10
10
|
case expression
|
@@ -12,49 +12,49 @@ class CharacterSet
|
|
12
12
|
if expression.count != 1
|
13
13
|
raise Error, 'Pass a Regexp with exactly one expression, e.g. /[a-z]/'
|
14
14
|
end
|
15
|
-
convert(expression[0])
|
15
|
+
convert(expression[0], to)
|
16
16
|
|
17
17
|
when Regexp::Expression::CharacterSet
|
18
|
-
content = expression.map { |subexp| convert(subexp) }.reduce(:+)
|
19
|
-
content ||=
|
18
|
+
content = expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
19
|
+
content ||= to[]
|
20
20
|
expression.negative? ? content.inversion : content
|
21
21
|
|
22
22
|
when Regexp::Expression::CharacterSet::Intersection
|
23
|
-
expression.map { |subexp| convert(subexp) }.reduce(:&)
|
23
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:&)
|
24
24
|
|
25
25
|
when Regexp::Expression::CharacterSet::IntersectedSequence
|
26
|
-
expression.map { |subexp| convert(subexp) }.reduce(:+) ||
|
26
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:+) || to[]
|
27
27
|
|
28
28
|
when Regexp::Expression::CharacterSet::Range
|
29
|
-
start, finish = expression.map { |subexp| convert(subexp) }
|
30
|
-
|
29
|
+
start, finish = expression.map { |subexp| convert(subexp, to) }
|
30
|
+
to.new((start.min)..(finish.max))
|
31
31
|
|
32
32
|
when Regexp::Expression::CharacterType::Any
|
33
|
-
|
33
|
+
to.unicode
|
34
34
|
|
35
35
|
when Regexp::Expression::CharacterType::Base
|
36
36
|
/(?<negative>non)?(?<base_name>.+)/ =~ expression.token
|
37
37
|
content =
|
38
38
|
if expression.unicode_classes?
|
39
39
|
# in u-mode, type shortcuts match the same as \p{<long type name>}
|
40
|
-
|
40
|
+
to.of_property(base_name)
|
41
41
|
else
|
42
42
|
# in normal mode, types match only ascii chars
|
43
43
|
case base_name.to_sym
|
44
|
-
when :digit then
|
45
|
-
when :hex then
|
46
|
-
when :space then
|
47
|
-
when :word then
|
44
|
+
when :digit then to.from_ranges(48..57)
|
45
|
+
when :hex then to.from_ranges(48..57, 65..70, 97..102)
|
46
|
+
when :space then to.from_ranges(9..13, 32..32)
|
47
|
+
when :word then to.from_ranges(48..57, 65..90, 95..95, 97..122)
|
48
48
|
else raise Error, "Unsupported CharacterType #{base_name}"
|
49
49
|
end
|
50
50
|
end
|
51
51
|
negative ? content.inversion : content
|
52
52
|
|
53
53
|
when Regexp::Expression::EscapeSequence::CodepointList
|
54
|
-
|
54
|
+
to.new(expression.codepoints)
|
55
55
|
|
56
56
|
when Regexp::Expression::EscapeSequence::Base
|
57
|
-
|
57
|
+
to[expression.codepoint]
|
58
58
|
|
59
59
|
when Regexp::Expression::Group::Capture,
|
60
60
|
Regexp::Expression::Group::Passive,
|
@@ -62,19 +62,19 @@ class CharacterSet
|
|
62
62
|
Regexp::Expression::Group::Atomic,
|
63
63
|
Regexp::Expression::Group::Options
|
64
64
|
case expression.count
|
65
|
-
when 0 then
|
66
|
-
when 1 then convert(expression.first)
|
65
|
+
when 0 then to[]
|
66
|
+
when 1 then convert(expression.first, to)
|
67
67
|
else
|
68
68
|
raise Error, 'Groups must contain exactly one expression, e.g. ([a-z])'
|
69
69
|
end
|
70
70
|
|
71
71
|
when Regexp::Expression::Alternation # rubocop:disable Lint/DuplicateBranch
|
72
|
-
expression.map { |subexp| convert(subexp) }.reduce(:+)
|
72
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
73
73
|
|
74
74
|
when Regexp::Expression::Alternative
|
75
75
|
case expression.count
|
76
|
-
when 0 then
|
77
|
-
when 1 then convert(expression.first)
|
76
|
+
when 0 then to[]
|
77
|
+
when 1 then convert(expression.first, to)
|
78
78
|
else
|
79
79
|
raise Error, 'Alternatives must contain exactly one expression'
|
80
80
|
end
|
@@ -83,11 +83,11 @@ class CharacterSet
|
|
83
83
|
if expression.set_level == 0 && expression.text.size != 1
|
84
84
|
raise Error, 'Literal runs outside of sets are codepoint *sequences*'
|
85
85
|
end
|
86
|
-
|
86
|
+
to[expression.text.ord]
|
87
87
|
|
88
88
|
when Regexp::Expression::UnicodeProperty::Base,
|
89
89
|
Regexp::Expression::PosixClass
|
90
|
-
content =
|
90
|
+
content = to.of_property(expression.token)
|
91
91
|
if expression.type == :posixclass && expression.ascii_classes?
|
92
92
|
content = content.ascii_part
|
93
93
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
85E,85E
|
22
22
|
860,86A
|
23
23
|
8A0,8B4
|
24
|
-
8B6,
|
24
|
+
8B6,8C7
|
25
25
|
8D3,983
|
26
26
|
985,98C
|
27
27
|
98F,990
|
@@ -76,7 +76,7 @@ B35,B39
|
|
76
76
|
B3C,B44
|
77
77
|
B47,B48
|
78
78
|
B4B,B4D
|
79
|
-
|
79
|
+
B55,B57
|
80
80
|
B5C,B5D
|
81
81
|
B5F,B63
|
82
82
|
B66,B77
|
@@ -120,15 +120,14 @@ CDE,CDE
|
|
120
120
|
CE0,CE3
|
121
121
|
CE6,CEF
|
122
122
|
CF1,CF2
|
123
|
-
D00,
|
124
|
-
D05,D0C
|
123
|
+
D00,D0C
|
125
124
|
D0E,D10
|
126
125
|
D12,D44
|
127
126
|
D46,D48
|
128
127
|
D4A,D4F
|
129
128
|
D54,D63
|
130
129
|
D66,D7F
|
131
|
-
|
130
|
+
D81,D83
|
132
131
|
D85,D96
|
133
132
|
D9A,DB1
|
134
133
|
DB3,DBB
|
@@ -214,7 +213,7 @@ FCE,FDA
|
|
214
213
|
1A7F,1A89
|
215
214
|
1A90,1A99
|
216
215
|
1AA0,1AAD
|
217
|
-
1AB0,
|
216
|
+
1AB0,1AC0
|
218
217
|
1B00,1B4B
|
219
218
|
1B50,1B7C
|
220
219
|
1B80,1BF3
|
@@ -252,7 +251,7 @@ FCE,FDA
|
|
252
251
|
2440,244A
|
253
252
|
2460,2B73
|
254
253
|
2B76,2B95
|
255
|
-
|
254
|
+
2B97,2C2E
|
256
255
|
2C30,2C5E
|
257
256
|
2C60,2CF3
|
258
257
|
2CF9,2D25
|
@@ -269,7 +268,7 @@ FCE,FDA
|
|
269
268
|
2DC8,2DCE
|
270
269
|
2DD0,2DD6
|
271
270
|
2DD8,2DDE
|
272
|
-
2DE0,
|
271
|
+
2DE0,2E52
|
273
272
|
2E80,2E99
|
274
273
|
2E9B,2EF3
|
275
274
|
2F00,2FD5
|
@@ -279,18 +278,16 @@ FCE,FDA
|
|
279
278
|
3099,30FF
|
280
279
|
3105,312F
|
281
280
|
3131,318E
|
282
|
-
3190,
|
283
|
-
31C0,31E3
|
281
|
+
3190,31E3
|
284
282
|
31F0,321E
|
285
|
-
3220,
|
286
|
-
4DC0,9FEF
|
283
|
+
3220,9FFC
|
287
284
|
A000,A48C
|
288
285
|
A490,A4C6
|
289
286
|
A4D0,A62B
|
290
287
|
A640,A6F7
|
291
288
|
A700,A7BF
|
292
|
-
A7C2,
|
293
|
-
|
289
|
+
A7C2,A7CA
|
290
|
+
A7F5,A82C
|
294
291
|
A830,A839
|
295
292
|
A840,A877
|
296
293
|
A880,A8C5
|
@@ -310,7 +307,7 @@ AB09,AB0E
|
|
310
307
|
AB11,AB16
|
311
308
|
AB20,AB26
|
312
309
|
AB28,AB2E
|
313
|
-
AB30,
|
310
|
+
AB30,AB6B
|
314
311
|
AB70,ABED
|
315
312
|
ABF0,ABF9
|
316
313
|
AC00,D7A3
|
@@ -355,7 +352,7 @@ FFF9,FFFD
|
|
355
352
|
10100,10102
|
356
353
|
10107,10133
|
357
354
|
10137,1018E
|
358
|
-
10190,
|
355
|
+
10190,1019C
|
359
356
|
101A0,101A0
|
360
357
|
101D0,101FD
|
361
358
|
10280,1029C
|
@@ -415,8 +412,12 @@ FFF9,FFFD
|
|
415
412
|
10CFA,10D27
|
416
413
|
10D30,10D39
|
417
414
|
10E60,10E7E
|
415
|
+
10E80,10EA9
|
416
|
+
10EAB,10EAD
|
417
|
+
10EB0,10EB1
|
418
418
|
10F00,10F27
|
419
419
|
10F30,10F59
|
420
|
+
10FB0,10FCB
|
420
421
|
10FE0,10FF6
|
421
422
|
11000,1104D
|
422
423
|
11052,1106F
|
@@ -425,10 +426,9 @@ FFF9,FFFD
|
|
425
426
|
110D0,110E8
|
426
427
|
110F0,110F9
|
427
428
|
11100,11134
|
428
|
-
11136,
|
429
|
+
11136,11147
|
429
430
|
11150,11176
|
430
|
-
11180,
|
431
|
-
111D0,111DF
|
431
|
+
11180,111DF
|
432
432
|
111E1,111F4
|
433
433
|
11200,11211
|
434
434
|
11213,1123E
|
@@ -454,9 +454,8 @@ FFF9,FFFD
|
|
454
454
|
1135D,11363
|
455
455
|
11366,1136C
|
456
456
|
11370,11374
|
457
|
-
11400,
|
458
|
-
|
459
|
-
1145D,1145F
|
457
|
+
11400,1145B
|
458
|
+
1145D,11461
|
460
459
|
11480,114C7
|
461
460
|
114D0,114D9
|
462
461
|
11580,115B5
|
@@ -471,7 +470,14 @@ FFF9,FFFD
|
|
471
470
|
11730,1173F
|
472
471
|
11800,1183B
|
473
472
|
118A0,118F2
|
474
|
-
118FF,
|
473
|
+
118FF,11906
|
474
|
+
11909,11909
|
475
|
+
1190C,11913
|
476
|
+
11915,11916
|
477
|
+
11918,11935
|
478
|
+
11937,11938
|
479
|
+
1193B,11946
|
480
|
+
11950,11959
|
475
481
|
119A0,119A7
|
476
482
|
119AA,119D7
|
477
483
|
119DA,119E4
|
@@ -499,6 +505,7 @@ FFF9,FFFD
|
|
499
505
|
11D93,11D98
|
500
506
|
11DA0,11DA9
|
501
507
|
11EE0,11EF8
|
508
|
+
11FB0,11FB0
|
502
509
|
11FC0,11FF1
|
503
510
|
11FFF,12399
|
504
511
|
12400,1246E
|
@@ -522,9 +529,11 @@ FFF9,FFFD
|
|
522
529
|
16F00,16F4A
|
523
530
|
16F4F,16F87
|
524
531
|
16F8F,16F9F
|
525
|
-
16FE0,
|
532
|
+
16FE0,16FE4
|
533
|
+
16FF0,16FF1
|
526
534
|
17000,187F7
|
527
|
-
18800,
|
535
|
+
18800,18CD5
|
536
|
+
18D00,18D08
|
528
537
|
1B000,1B11E
|
529
538
|
1B150,1B152
|
530
539
|
1B164,1B167
|
@@ -622,17 +631,15 @@ FFF9,FFFD
|
|
622
631
|
1F0B1,1F0BF
|
623
632
|
1F0C1,1F0CF
|
624
633
|
1F0D1,1F0F5
|
625
|
-
1F100,
|
626
|
-
1F110,1F16C
|
627
|
-
1F170,1F1AC
|
634
|
+
1F100,1F1AD
|
628
635
|
1F1E6,1F202
|
629
636
|
1F210,1F23B
|
630
637
|
1F240,1F248
|
631
638
|
1F250,1F251
|
632
639
|
1F260,1F265
|
633
|
-
1F300,
|
640
|
+
1F300,1F6D7
|
634
641
|
1F6E0,1F6EC
|
635
|
-
1F6F0,
|
642
|
+
1F6F0,1F6FC
|
636
643
|
1F700,1F773
|
637
644
|
1F780,1F7D8
|
638
645
|
1F7E0,1F7EB
|
@@ -641,24 +648,28 @@ FFF9,FFFD
|
|
641
648
|
1F850,1F859
|
642
649
|
1F860,1F887
|
643
650
|
1F890,1F8AD
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
1F97A,1F9A2
|
648
|
-
1F9A5,1F9AA
|
649
|
-
1F9AE,1F9CA
|
651
|
+
1F8B0,1F8B1
|
652
|
+
1F900,1F978
|
653
|
+
1F97A,1F9CB
|
650
654
|
1F9CD,1FA53
|
651
655
|
1FA60,1FA6D
|
652
|
-
1FA70,
|
656
|
+
1FA70,1FA74
|
653
657
|
1FA78,1FA7A
|
654
|
-
1FA80,
|
655
|
-
1FA90,
|
656
|
-
|
658
|
+
1FA80,1FA86
|
659
|
+
1FA90,1FAA8
|
660
|
+
1FAB0,1FAB6
|
661
|
+
1FAC0,1FAC2
|
662
|
+
1FAD0,1FAD6
|
663
|
+
1FB00,1FB92
|
664
|
+
1FB94,1FBCA
|
665
|
+
1FBF0,1FBF9
|
666
|
+
20000,2A6DD
|
657
667
|
2A700,2B734
|
658
668
|
2B740,2B81D
|
659
669
|
2B820,2CEA1
|
660
670
|
2CEB0,2EBE0
|
661
671
|
2F800,2FA1D
|
672
|
+
30000,3134A
|
662
673
|
E0001,E0001
|
663
674
|
E0020,E007F
|
664
675
|
E0100,E01EF
|
@@ -44,6 +44,7 @@ AE,AE
|
|
44
44
|
2699,2699
|
45
45
|
269B,269C
|
46
46
|
26A0,26A1
|
47
|
+
26A7,26A7
|
47
48
|
26AA,26AB
|
48
49
|
26B0,26B1
|
49
50
|
26BD,26BE
|
@@ -130,22 +131,22 @@ AE,AE
|
|
130
131
|
1F5FA,1F64F
|
131
132
|
1F680,1F6C5
|
132
133
|
1F6CB,1F6D2
|
133
|
-
1F6D5,
|
134
|
+
1F6D5,1F6D7
|
134
135
|
1F6E0,1F6E5
|
135
136
|
1F6E9,1F6E9
|
136
137
|
1F6EB,1F6EC
|
137
138
|
1F6F0,1F6F0
|
138
|
-
1F6F3,
|
139
|
+
1F6F3,1F6FC
|
139
140
|
1F7E0,1F7EB
|
140
|
-
|
141
|
+
1F90C,1F93A
|
141
142
|
1F93C,1F945
|
142
|
-
1F947,
|
143
|
-
|
144
|
-
1F97A,1F9A2
|
145
|
-
1F9A5,1F9AA
|
146
|
-
1F9AE,1F9CA
|
143
|
+
1F947,1F978
|
144
|
+
1F97A,1F9CB
|
147
145
|
1F9CD,1F9FF
|
148
|
-
1FA70,
|
146
|
+
1FA70,1FA74
|
149
147
|
1FA78,1FA7A
|
150
|
-
1FA80,
|
151
|
-
1FA90,
|
148
|
+
1FA80,1FA86
|
149
|
+
1FA90,1FAA8
|
150
|
+
1FAB0,1FAB6
|
151
|
+
1FAC0,1FAC2
|
152
|
+
1FAD0,1FAD6
|
@@ -6,9 +6,13 @@ class CharacterSet
|
|
6
6
|
new(Array(ranges).flat_map(&:to_a))
|
7
7
|
end
|
8
8
|
|
9
|
-
def of(
|
10
|
-
|
11
|
-
|
9
|
+
def of(*strings)
|
10
|
+
new_set = new
|
11
|
+
strings.each do |str|
|
12
|
+
raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
|
13
|
+
str.codepoints.each { |cp| new_set << cp }
|
14
|
+
end
|
15
|
+
new_set
|
12
16
|
end
|
13
17
|
end
|
14
18
|
|
@@ -22,13 +22,14 @@ class CharacterSet
|
|
22
22
|
|
23
23
|
# Allow some methods to take an Enum just as well as another CharacterSet.
|
24
24
|
# Tested by ruby-spec.
|
25
|
-
%w[& + - ^ | difference
|
25
|
+
%w[& + - ^ | difference disjoint? intersect? intersection
|
26
|
+
subtract union].each do |method|
|
26
27
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
27
28
|
def #{method}(arg)
|
28
29
|
if arg.is_a?(CharacterSet)
|
29
|
-
super
|
30
|
+
super(arg)
|
30
31
|
elsif arg.respond_to?(:each)
|
31
|
-
super(
|
32
|
+
super(self.class.new(arg.to_a))
|
32
33
|
else
|
33
34
|
raise ArgumentError, 'pass an enumerable'
|
34
35
|
end
|
@@ -36,7 +36,7 @@ class CharacterSet
|
|
36
36
|
end
|
37
37
|
|
38
38
|
def of_expression(expression)
|
39
|
-
ExpressionConverter.convert(expression)
|
39
|
+
ExpressionConverter.convert(expression, self)
|
40
40
|
end
|
41
41
|
|
42
42
|
def require_optional_dependency(name, method)
|
@@ -90,6 +90,14 @@ class CharacterSet
|
|
90
90
|
Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
|
91
91
|
end
|
92
92
|
|
93
|
+
def secure_token(length = 32)
|
94
|
+
CharacterSet.require_optional_dependency('securerandom', __method__)
|
95
|
+
cps = to_a
|
96
|
+
len = cps.count
|
97
|
+
1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
|
98
|
+
end
|
99
|
+
alias random_token secure_token
|
100
|
+
|
93
101
|
def inspect
|
94
102
|
len = length
|
95
103
|
"#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: character_set
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sorted_set
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '1
|
103
|
+
version: '2.1'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '1
|
110
|
+
version: '2.1'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: regexp_property_values
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.2.12
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: gouteur
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 1.0.0
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 1.0.0
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: rubocop
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -186,9 +200,11 @@ extensions: []
|
|
186
200
|
extra_rdoc_files: []
|
187
201
|
files:
|
188
202
|
- ".gitattributes"
|
203
|
+
- ".github/workflows/gouteur.yml"
|
189
204
|
- ".github/workflows/lint.yml"
|
190
205
|
- ".github/workflows/tests.yml"
|
191
206
|
- ".gitignore"
|
207
|
+
- ".gouteur.yml"
|
192
208
|
- ".rspec"
|
193
209
|
- ".rubocop.yml"
|
194
210
|
- BENCHMARK.md
|
@@ -266,7 +282,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
266
282
|
- !ruby/object:Gem::Version
|
267
283
|
version: '0'
|
268
284
|
requirements: []
|
269
|
-
rubygems_version: 3.
|
285
|
+
rubygems_version: 3.3.0.dev
|
270
286
|
signing_key:
|
271
287
|
specification_version: 4
|
272
288
|
summary: Build, read, write and compare sets of Unicode codepoints.
|