character_set 1.4.1 → 1.5.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/gouteur.yml +20 -0
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +7 -1
- data/CHANGELOG.md +17 -0
- data/README.md +14 -1
- data/character_set.gemspec +2 -1
- data/ext/character_set/character_set.c +10 -5
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/lib/character_set/expression_converter.rb +23 -23
- data/lib/character_set/predefined_sets/assigned.cps +51 -40
- data/lib/character_set/predefined_sets/emoji.cps +12 -11
- data/lib/character_set/ruby_fallback/character_set_methods.rb +7 -3
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +9 -1
- data/lib/character_set/version.rb +1 -1
- metadata +21 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 9622bc20bbdb48f8deff84dbed9e800e6bc500a6a08a27e7b3aea2ea651cd278
|
4
|
+
data.tar.gz: 5853e8d5be7e9a1963419aa4f9fbc631148fe5bef45aa185b9117d32b44aa959
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2cc2a60b9388a2e3beef66da20aa8205cc501980a7dc66f2716c66f7e999a083927b27a761e6b932b6d5c16b8e5968f8e04370ecf3c999326f378f60bfa3cedc
|
7
|
+
data.tar.gz: a2a8d1f9ac6cdf6302af98662fc3efda4b8c6fe003c7cdc853a61a64f9c7a596b1bbd7a79dca19081b8ce2576f9c3d848869141b164c145e22befaaffec8b265
|
@@ -0,0 +1,20 @@
|
|
1
|
+
name: gouteur
|
2
|
+
|
3
|
+
on: [push, pull_request]
|
4
|
+
|
5
|
+
jobs:
|
6
|
+
build:
|
7
|
+
runs-on: ubuntu-latest
|
8
|
+
|
9
|
+
steps:
|
10
|
+
- uses: actions/checkout@v2
|
11
|
+
- name: Set up Ruby
|
12
|
+
uses: ruby/setup-ruby@v1
|
13
|
+
with:
|
14
|
+
ruby-version: 2.7
|
15
|
+
- name: Prepare
|
16
|
+
run: |
|
17
|
+
bundle install --jobs 4
|
18
|
+
bundle exec rake compile
|
19
|
+
- name: Test
|
20
|
+
run: bundle exec gouteur
|
data/.gitignore
CHANGED
data/.gouteur.yml
ADDED
data/.rubocop.yml
CHANGED
@@ -8,4 +8,10 @@ AllCops:
|
|
8
8
|
RubyInterpreters:
|
9
9
|
- ruby
|
10
10
|
- rake
|
11
|
-
TargetRubyVersion: 2.
|
11
|
+
TargetRubyVersion: 2.5 # really 2.1, but 2.5 is lowest supported by rubocop
|
12
|
+
|
13
|
+
Lint/AmbiguousOperatorPrecedence:
|
14
|
+
Enabled: false
|
15
|
+
|
16
|
+
Lint/AmbiguousRegexpLiteral:
|
17
|
+
Enabled: false
|
data/CHANGELOG.md
CHANGED
@@ -4,6 +4,23 @@ All notable changes to this project will be documented in this file.
|
|
4
4
|
The format is based on [Keep a Changelog](http://keepachangelog.com/en/1.0.0/)
|
5
5
|
and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.html).
|
6
6
|
|
7
|
+
## [1.5.0] - 2021-12-05
|
8
|
+
|
9
|
+
### Added
|
10
|
+
|
11
|
+
- new codepoints for `::assigned` and `::emoji` predefined sets, as in Ruby 3.1.0
|
12
|
+
- latest unicode case-folding data (for `#case_insensitive`)
|
13
|
+
- support for passing any Enumerable to `#disjoint?`, `#intersect?`
|
14
|
+
- this matches recent broadening of these methods in `ruby/set`
|
15
|
+
- new instance method `#secure_token` (see README)
|
16
|
+
- class method `::of` now accepts more than one `String`
|
17
|
+
- `CharacterSet::ExpressionConverter` can now build output of any Set-like class
|
18
|
+
|
19
|
+
### Fixed
|
20
|
+
|
21
|
+
- `CharacterSet::Pure::of_expression` now returns a `CharacterSet::Pure`
|
22
|
+
- it used to return a regular `CharacterSet`
|
23
|
+
|
7
24
|
## [1.4.1] - 2020-01-10
|
8
25
|
|
9
26
|
### Fixed
|
data/README.md
CHANGED
@@ -2,6 +2,7 @@
|
|
2
2
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/character_set.svg)](http://badge.fury.io/rb/character_set)
|
4
4
|
[![Build Status](https://github.com/jaynetics/character_set/workflows/tests/badge.svg)](https://github.com/jaynetics/character_set/actions)
|
5
|
+
[![Build Status](https://github.com/jaynetics/character_set/workflows/gouteur/badge.svg)](https://github.com/jaynetics/character_set/actions)
|
5
6
|
[![codecov](https://codecov.io/gh/jaynetics/character_set/branch/master/graph/badge.svg)](https://codecov.io/gh/jaynetics/character_set)
|
6
7
|
|
7
8
|
This is a C-extended Ruby gem to work with sets of Unicode codepoints. It can read and write these sets in various formats and implements the stdlib `Set` interface for them.
|
@@ -10,6 +11,7 @@ It also offers an alternate paradigm of `String` processing which grants much be
|
|
10
11
|
|
11
12
|
Many parts can be used independently, e.g.:
|
12
13
|
- `CharacterSet::Character`
|
14
|
+
- `CharacterSet::ExpressionConverter`
|
13
15
|
- `CharacterSet::Parser`
|
14
16
|
- `CharacterSet::Writer`
|
15
17
|
- [`RangeCompressor`](https://github.com/jaynetics/range_compressor)
|
@@ -181,7 +183,18 @@ set.to_s_with_surrogate_alternation
|
|
181
183
|
# => '(?:[ab]|\uD83E\uDD29|\uD83E\uDD2A|\uD83E\uDD2B)'
|
182
184
|
```
|
183
185
|
|
184
|
-
###
|
186
|
+
### Other features
|
187
|
+
|
188
|
+
#### Secure tokens
|
189
|
+
|
190
|
+
Generate secure random strings of characters from a set:
|
191
|
+
|
192
|
+
```ruby
|
193
|
+
CharacterSet.new('a'..'z').secure_token(8) # => "ugwpujmt"
|
194
|
+
CharacterSet.crypt.secure_token # => "8.1w7aBT737/pMfcMoO4y2y8/=0xtmo:"
|
195
|
+
```
|
196
|
+
|
197
|
+
#### Unicode planes
|
185
198
|
|
186
199
|
There are some methods to check for planes and to handle ASCII, [BMP](https://en.wikipedia.org/wiki/Plane_%28Unicode%29#Basic_Multilingual_Plane) and astral parts:
|
187
200
|
```Ruby
|
data/character_set.gemspec
CHANGED
@@ -34,11 +34,12 @@ Gem::Specification.new do |s|
|
|
34
34
|
s.add_development_dependency 'rake', '~> 13.0'
|
35
35
|
s.add_development_dependency 'rake-compiler', '~> 1.1'
|
36
36
|
s.add_development_dependency 'range_compressor', '~> 1.0'
|
37
|
-
s.add_development_dependency 'regexp_parser', '~> 1
|
37
|
+
s.add_development_dependency 'regexp_parser', '~> 2.1'
|
38
38
|
s.add_development_dependency 'regexp_property_values', '~> 1.0'
|
39
39
|
s.add_development_dependency 'rspec', '~> 3.8'
|
40
40
|
if RUBY_VERSION.to_f >= 2.7
|
41
41
|
s.add_development_dependency 'codecov', '~> 0.2.12'
|
42
|
+
s.add_development_dependency 'gouteur', '~> 1.0.0'
|
42
43
|
s.add_development_dependency 'rubocop', '~> 1.8'
|
43
44
|
end
|
44
45
|
end
|
@@ -705,7 +705,8 @@ cs_method_ranges(VALUE self)
|
|
705
705
|
|
706
706
|
if (!previous_cp_num) {
|
707
707
|
current_start = cp_num;
|
708
|
-
} else if (previous_cp_num + 2 != cp_num)
|
708
|
+
} else if (previous_cp_num + 2 != cp_num)
|
709
|
+
{
|
709
710
|
// gap found, finalize previous range
|
710
711
|
rb_ary_push(ranges, rb_range_new(current_start, current_end, 0));
|
711
712
|
current_start = cp_num;
|
@@ -1046,13 +1047,17 @@ raise_arg_err_unless_string(VALUE val)
|
|
1046
1047
|
}
|
1047
1048
|
|
1048
1049
|
static VALUE
|
1049
|
-
cs_class_method_of(VALUE
|
1050
|
+
cs_class_method_of(int argc, VALUE *argv, VALUE self)
|
1050
1051
|
{
|
1051
1052
|
VALUE new_cs;
|
1052
1053
|
struct cs_data *new_data;
|
1054
|
+
int i;
|
1053
1055
|
new_cs = cs_alloc(self, &new_data);
|
1054
|
-
|
1055
|
-
|
1056
|
+
for (i = 0; i < argc; i++)
|
1057
|
+
{
|
1058
|
+
raise_arg_err_unless_string(argv[i]);
|
1059
|
+
each_cp(argv[i], add_str_cp_to_arr, 0, 0, new_data, 0);
|
1060
|
+
}
|
1056
1061
|
return new_cs;
|
1057
1062
|
}
|
1058
1063
|
|
@@ -1338,7 +1343,7 @@ void Init_character_set()
|
|
1338
1343
|
// `CharacterSet`-specific methods
|
1339
1344
|
|
1340
1345
|
rb_define_singleton_method(cs, "from_ranges", cs_class_method_from_ranges, -2);
|
1341
|
-
rb_define_singleton_method(cs, "of", cs_class_method_of, 1);
|
1346
|
+
rb_define_singleton_method(cs, "of", cs_class_method_of, -1);
|
1342
1347
|
|
1343
1348
|
rb_define_method(cs, "ranges", cs_method_ranges, 0);
|
1344
1349
|
rb_define_method(cs, "sample", cs_method_sample, -1);
|
@@ -6,7 +6,7 @@ typedef struct casefold_mapping {
|
|
6
6
|
unsigned long to;
|
7
7
|
} casefold_mapping;
|
8
8
|
|
9
|
-
#define CASEFOLD_COUNT
|
9
|
+
#define CASEFOLD_COUNT 1426
|
10
10
|
|
11
11
|
static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
12
12
|
{0x0041,0x0061},
|
@@ -564,6 +564,41 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
564
564
|
{0x104D1,0x104F9},
|
565
565
|
{0x104D2,0x104FA},
|
566
566
|
{0x104D3,0x104FB},
|
567
|
+
{0x10570,0x10597},
|
568
|
+
{0x10571,0x10598},
|
569
|
+
{0x10572,0x10599},
|
570
|
+
{0x10573,0x1059A},
|
571
|
+
{0x10574,0x1059B},
|
572
|
+
{0x10575,0x1059C},
|
573
|
+
{0x10576,0x1059D},
|
574
|
+
{0x10577,0x1059E},
|
575
|
+
{0x10578,0x1059F},
|
576
|
+
{0x10579,0x105A0},
|
577
|
+
{0x1057A,0x105A1},
|
578
|
+
{0x1057C,0x105A3},
|
579
|
+
{0x1057D,0x105A4},
|
580
|
+
{0x1057E,0x105A5},
|
581
|
+
{0x1057F,0x105A6},
|
582
|
+
{0x10580,0x105A7},
|
583
|
+
{0x10581,0x105A8},
|
584
|
+
{0x10582,0x105A9},
|
585
|
+
{0x10583,0x105AA},
|
586
|
+
{0x10584,0x105AB},
|
587
|
+
{0x10585,0x105AC},
|
588
|
+
{0x10586,0x105AD},
|
589
|
+
{0x10587,0x105AE},
|
590
|
+
{0x10588,0x105AF},
|
591
|
+
{0x10589,0x105B0},
|
592
|
+
{0x1058A,0x105B1},
|
593
|
+
{0x1058C,0x105B3},
|
594
|
+
{0x1058D,0x105B4},
|
595
|
+
{0x1058E,0x105B5},
|
596
|
+
{0x1058F,0x105B6},
|
597
|
+
{0x10590,0x105B7},
|
598
|
+
{0x10591,0x105B8},
|
599
|
+
{0x10592,0x105B9},
|
600
|
+
{0x10594,0x105BB},
|
601
|
+
{0x10595,0x105BC},
|
567
602
|
{0x10A0,0x2D00},
|
568
603
|
{0x10A1,0x2D01},
|
569
604
|
{0x10A2,0x2D02},
|
@@ -1102,6 +1137,7 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
1102
1137
|
{0x2C2C,0x2C5C},
|
1103
1138
|
{0x2C2D,0x2C5D},
|
1104
1139
|
{0x2C2E,0x2C5E},
|
1140
|
+
{0x2C2F,0x2C5F},
|
1105
1141
|
{0x2C60,0x2C61},
|
1106
1142
|
{0x2C62,0x026B},
|
1107
1143
|
{0x2C63,0x1D7D},
|
@@ -1282,10 +1318,17 @@ static const casefold_mapping unicode_casefold_table[CASEFOLD_COUNT] = {
|
|
1282
1318
|
{0xA7BA,0xA7BB},
|
1283
1319
|
{0xA7BC,0xA7BD},
|
1284
1320
|
{0xA7BE,0xA7BF},
|
1321
|
+
{0xA7C0,0xA7C1},
|
1285
1322
|
{0xA7C2,0xA7C3},
|
1286
1323
|
{0xA7C4,0xA794},
|
1287
1324
|
{0xA7C5,0x0282},
|
1288
1325
|
{0xA7C6,0x1D8E},
|
1326
|
+
{0xA7C7,0xA7C8},
|
1327
|
+
{0xA7C9,0xA7CA},
|
1328
|
+
{0xA7D0,0xA7D1},
|
1329
|
+
{0xA7D6,0xA7D7},
|
1330
|
+
{0xA7D8,0xA7D9},
|
1331
|
+
{0xA7F5,0xA7F6},
|
1289
1332
|
{0xAB70,0x13A0},
|
1290
1333
|
{0xAB71,0x13A1},
|
1291
1334
|
{0xAB72,0x13A2},
|
@@ -4,7 +4,7 @@ class CharacterSet
|
|
4
4
|
|
5
5
|
Error = Class.new(ArgumentError)
|
6
6
|
|
7
|
-
def convert(expression)
|
7
|
+
def convert(expression, to = CharacterSet)
|
8
8
|
CharacterSet.require_optional_dependency('regexp_parser', __method__)
|
9
9
|
|
10
10
|
case expression
|
@@ -12,49 +12,49 @@ class CharacterSet
|
|
12
12
|
if expression.count != 1
|
13
13
|
raise Error, 'Pass a Regexp with exactly one expression, e.g. /[a-z]/'
|
14
14
|
end
|
15
|
-
convert(expression[0])
|
15
|
+
convert(expression[0], to)
|
16
16
|
|
17
17
|
when Regexp::Expression::CharacterSet
|
18
|
-
content = expression.map { |subexp| convert(subexp) }.reduce(:+)
|
19
|
-
content ||=
|
18
|
+
content = expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
19
|
+
content ||= to[]
|
20
20
|
expression.negative? ? content.inversion : content
|
21
21
|
|
22
22
|
when Regexp::Expression::CharacterSet::Intersection
|
23
|
-
expression.map { |subexp| convert(subexp) }.reduce(:&)
|
23
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:&)
|
24
24
|
|
25
25
|
when Regexp::Expression::CharacterSet::IntersectedSequence
|
26
|
-
expression.map { |subexp| convert(subexp) }.reduce(:+) ||
|
26
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:+) || to[]
|
27
27
|
|
28
28
|
when Regexp::Expression::CharacterSet::Range
|
29
|
-
start, finish = expression.map { |subexp| convert(subexp) }
|
30
|
-
|
29
|
+
start, finish = expression.map { |subexp| convert(subexp, to) }
|
30
|
+
to.new((start.min)..(finish.max))
|
31
31
|
|
32
32
|
when Regexp::Expression::CharacterType::Any
|
33
|
-
|
33
|
+
to.unicode
|
34
34
|
|
35
35
|
when Regexp::Expression::CharacterType::Base
|
36
36
|
/(?<negative>non)?(?<base_name>.+)/ =~ expression.token
|
37
37
|
content =
|
38
38
|
if expression.unicode_classes?
|
39
39
|
# in u-mode, type shortcuts match the same as \p{<long type name>}
|
40
|
-
|
40
|
+
to.of_property(base_name)
|
41
41
|
else
|
42
42
|
# in normal mode, types match only ascii chars
|
43
43
|
case base_name.to_sym
|
44
|
-
when :digit then
|
45
|
-
when :hex then
|
46
|
-
when :space then
|
47
|
-
when :word then
|
44
|
+
when :digit then to.from_ranges(48..57)
|
45
|
+
when :hex then to.from_ranges(48..57, 65..70, 97..102)
|
46
|
+
when :space then to.from_ranges(9..13, 32..32)
|
47
|
+
when :word then to.from_ranges(48..57, 65..90, 95..95, 97..122)
|
48
48
|
else raise Error, "Unsupported CharacterType #{base_name}"
|
49
49
|
end
|
50
50
|
end
|
51
51
|
negative ? content.inversion : content
|
52
52
|
|
53
53
|
when Regexp::Expression::EscapeSequence::CodepointList
|
54
|
-
|
54
|
+
to.new(expression.codepoints)
|
55
55
|
|
56
56
|
when Regexp::Expression::EscapeSequence::Base
|
57
|
-
|
57
|
+
to[expression.codepoint]
|
58
58
|
|
59
59
|
when Regexp::Expression::Group::Capture,
|
60
60
|
Regexp::Expression::Group::Passive,
|
@@ -62,19 +62,19 @@ class CharacterSet
|
|
62
62
|
Regexp::Expression::Group::Atomic,
|
63
63
|
Regexp::Expression::Group::Options
|
64
64
|
case expression.count
|
65
|
-
when 0 then
|
66
|
-
when 1 then convert(expression.first)
|
65
|
+
when 0 then to[]
|
66
|
+
when 1 then convert(expression.first, to)
|
67
67
|
else
|
68
68
|
raise Error, 'Groups must contain exactly one expression, e.g. ([a-z])'
|
69
69
|
end
|
70
70
|
|
71
71
|
when Regexp::Expression::Alternation # rubocop:disable Lint/DuplicateBranch
|
72
|
-
expression.map { |subexp| convert(subexp) }.reduce(:+)
|
72
|
+
expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
73
73
|
|
74
74
|
when Regexp::Expression::Alternative
|
75
75
|
case expression.count
|
76
|
-
when 0 then
|
77
|
-
when 1 then convert(expression.first)
|
76
|
+
when 0 then to[]
|
77
|
+
when 1 then convert(expression.first, to)
|
78
78
|
else
|
79
79
|
raise Error, 'Alternatives must contain exactly one expression'
|
80
80
|
end
|
@@ -83,11 +83,11 @@ class CharacterSet
|
|
83
83
|
if expression.set_level == 0 && expression.text.size != 1
|
84
84
|
raise Error, 'Literal runs outside of sets are codepoint *sequences*'
|
85
85
|
end
|
86
|
-
|
86
|
+
to[expression.text.ord]
|
87
87
|
|
88
88
|
when Regexp::Expression::UnicodeProperty::Base,
|
89
89
|
Regexp::Expression::PosixClass
|
90
|
-
content =
|
90
|
+
content = to.of_property(expression.token)
|
91
91
|
if expression.type == :posixclass && expression.ascii_classes?
|
92
92
|
content = content.ascii_part
|
93
93
|
end
|
@@ -21,7 +21,7 @@
|
|
21
21
|
85E,85E
|
22
22
|
860,86A
|
23
23
|
8A0,8B4
|
24
|
-
8B6,
|
24
|
+
8B6,8C7
|
25
25
|
8D3,983
|
26
26
|
985,98C
|
27
27
|
98F,990
|
@@ -76,7 +76,7 @@ B35,B39
|
|
76
76
|
B3C,B44
|
77
77
|
B47,B48
|
78
78
|
B4B,B4D
|
79
|
-
|
79
|
+
B55,B57
|
80
80
|
B5C,B5D
|
81
81
|
B5F,B63
|
82
82
|
B66,B77
|
@@ -120,15 +120,14 @@ CDE,CDE
|
|
120
120
|
CE0,CE3
|
121
121
|
CE6,CEF
|
122
122
|
CF1,CF2
|
123
|
-
D00,
|
124
|
-
D05,D0C
|
123
|
+
D00,D0C
|
125
124
|
D0E,D10
|
126
125
|
D12,D44
|
127
126
|
D46,D48
|
128
127
|
D4A,D4F
|
129
128
|
D54,D63
|
130
129
|
D66,D7F
|
131
|
-
|
130
|
+
D81,D83
|
132
131
|
D85,D96
|
133
132
|
D9A,DB1
|
134
133
|
DB3,DBB
|
@@ -214,7 +213,7 @@ FCE,FDA
|
|
214
213
|
1A7F,1A89
|
215
214
|
1A90,1A99
|
216
215
|
1AA0,1AAD
|
217
|
-
1AB0,
|
216
|
+
1AB0,1AC0
|
218
217
|
1B00,1B4B
|
219
218
|
1B50,1B7C
|
220
219
|
1B80,1BF3
|
@@ -252,7 +251,7 @@ FCE,FDA
|
|
252
251
|
2440,244A
|
253
252
|
2460,2B73
|
254
253
|
2B76,2B95
|
255
|
-
|
254
|
+
2B97,2C2E
|
256
255
|
2C30,2C5E
|
257
256
|
2C60,2CF3
|
258
257
|
2CF9,2D25
|
@@ -269,7 +268,7 @@ FCE,FDA
|
|
269
268
|
2DC8,2DCE
|
270
269
|
2DD0,2DD6
|
271
270
|
2DD8,2DDE
|
272
|
-
2DE0,
|
271
|
+
2DE0,2E52
|
273
272
|
2E80,2E99
|
274
273
|
2E9B,2EF3
|
275
274
|
2F00,2FD5
|
@@ -279,18 +278,16 @@ FCE,FDA
|
|
279
278
|
3099,30FF
|
280
279
|
3105,312F
|
281
280
|
3131,318E
|
282
|
-
3190,
|
283
|
-
31C0,31E3
|
281
|
+
3190,31E3
|
284
282
|
31F0,321E
|
285
|
-
3220,
|
286
|
-
4DC0,9FEF
|
283
|
+
3220,9FFC
|
287
284
|
A000,A48C
|
288
285
|
A490,A4C6
|
289
286
|
A4D0,A62B
|
290
287
|
A640,A6F7
|
291
288
|
A700,A7BF
|
292
|
-
A7C2,
|
293
|
-
|
289
|
+
A7C2,A7CA
|
290
|
+
A7F5,A82C
|
294
291
|
A830,A839
|
295
292
|
A840,A877
|
296
293
|
A880,A8C5
|
@@ -310,7 +307,7 @@ AB09,AB0E
|
|
310
307
|
AB11,AB16
|
311
308
|
AB20,AB26
|
312
309
|
AB28,AB2E
|
313
|
-
AB30,
|
310
|
+
AB30,AB6B
|
314
311
|
AB70,ABED
|
315
312
|
ABF0,ABF9
|
316
313
|
AC00,D7A3
|
@@ -355,7 +352,7 @@ FFF9,FFFD
|
|
355
352
|
10100,10102
|
356
353
|
10107,10133
|
357
354
|
10137,1018E
|
358
|
-
10190,
|
355
|
+
10190,1019C
|
359
356
|
101A0,101A0
|
360
357
|
101D0,101FD
|
361
358
|
10280,1029C
|
@@ -415,8 +412,12 @@ FFF9,FFFD
|
|
415
412
|
10CFA,10D27
|
416
413
|
10D30,10D39
|
417
414
|
10E60,10E7E
|
415
|
+
10E80,10EA9
|
416
|
+
10EAB,10EAD
|
417
|
+
10EB0,10EB1
|
418
418
|
10F00,10F27
|
419
419
|
10F30,10F59
|
420
|
+
10FB0,10FCB
|
420
421
|
10FE0,10FF6
|
421
422
|
11000,1104D
|
422
423
|
11052,1106F
|
@@ -425,10 +426,9 @@ FFF9,FFFD
|
|
425
426
|
110D0,110E8
|
426
427
|
110F0,110F9
|
427
428
|
11100,11134
|
428
|
-
11136,
|
429
|
+
11136,11147
|
429
430
|
11150,11176
|
430
|
-
11180,
|
431
|
-
111D0,111DF
|
431
|
+
11180,111DF
|
432
432
|
111E1,111F4
|
433
433
|
11200,11211
|
434
434
|
11213,1123E
|
@@ -454,9 +454,8 @@ FFF9,FFFD
|
|
454
454
|
1135D,11363
|
455
455
|
11366,1136C
|
456
456
|
11370,11374
|
457
|
-
11400,
|
458
|
-
|
459
|
-
1145D,1145F
|
457
|
+
11400,1145B
|
458
|
+
1145D,11461
|
460
459
|
11480,114C7
|
461
460
|
114D0,114D9
|
462
461
|
11580,115B5
|
@@ -471,7 +470,14 @@ FFF9,FFFD
|
|
471
470
|
11730,1173F
|
472
471
|
11800,1183B
|
473
472
|
118A0,118F2
|
474
|
-
118FF,
|
473
|
+
118FF,11906
|
474
|
+
11909,11909
|
475
|
+
1190C,11913
|
476
|
+
11915,11916
|
477
|
+
11918,11935
|
478
|
+
11937,11938
|
479
|
+
1193B,11946
|
480
|
+
11950,11959
|
475
481
|
119A0,119A7
|
476
482
|
119AA,119D7
|
477
483
|
119DA,119E4
|
@@ -499,6 +505,7 @@ FFF9,FFFD
|
|
499
505
|
11D93,11D98
|
500
506
|
11DA0,11DA9
|
501
507
|
11EE0,11EF8
|
508
|
+
11FB0,11FB0
|
502
509
|
11FC0,11FF1
|
503
510
|
11FFF,12399
|
504
511
|
12400,1246E
|
@@ -522,9 +529,11 @@ FFF9,FFFD
|
|
522
529
|
16F00,16F4A
|
523
530
|
16F4F,16F87
|
524
531
|
16F8F,16F9F
|
525
|
-
16FE0,
|
532
|
+
16FE0,16FE4
|
533
|
+
16FF0,16FF1
|
526
534
|
17000,187F7
|
527
|
-
18800,
|
535
|
+
18800,18CD5
|
536
|
+
18D00,18D08
|
528
537
|
1B000,1B11E
|
529
538
|
1B150,1B152
|
530
539
|
1B164,1B167
|
@@ -622,17 +631,15 @@ FFF9,FFFD
|
|
622
631
|
1F0B1,1F0BF
|
623
632
|
1F0C1,1F0CF
|
624
633
|
1F0D1,1F0F5
|
625
|
-
1F100,
|
626
|
-
1F110,1F16C
|
627
|
-
1F170,1F1AC
|
634
|
+
1F100,1F1AD
|
628
635
|
1F1E6,1F202
|
629
636
|
1F210,1F23B
|
630
637
|
1F240,1F248
|
631
638
|
1F250,1F251
|
632
639
|
1F260,1F265
|
633
|
-
1F300,
|
640
|
+
1F300,1F6D7
|
634
641
|
1F6E0,1F6EC
|
635
|
-
1F6F0,
|
642
|
+
1F6F0,1F6FC
|
636
643
|
1F700,1F773
|
637
644
|
1F780,1F7D8
|
638
645
|
1F7E0,1F7EB
|
@@ -641,24 +648,28 @@ FFF9,FFFD
|
|
641
648
|
1F850,1F859
|
642
649
|
1F860,1F887
|
643
650
|
1F890,1F8AD
|
644
|
-
|
645
|
-
|
646
|
-
|
647
|
-
1F97A,1F9A2
|
648
|
-
1F9A5,1F9AA
|
649
|
-
1F9AE,1F9CA
|
651
|
+
1F8B0,1F8B1
|
652
|
+
1F900,1F978
|
653
|
+
1F97A,1F9CB
|
650
654
|
1F9CD,1FA53
|
651
655
|
1FA60,1FA6D
|
652
|
-
1FA70,
|
656
|
+
1FA70,1FA74
|
653
657
|
1FA78,1FA7A
|
654
|
-
1FA80,
|
655
|
-
1FA90,
|
656
|
-
|
658
|
+
1FA80,1FA86
|
659
|
+
1FA90,1FAA8
|
660
|
+
1FAB0,1FAB6
|
661
|
+
1FAC0,1FAC2
|
662
|
+
1FAD0,1FAD6
|
663
|
+
1FB00,1FB92
|
664
|
+
1FB94,1FBCA
|
665
|
+
1FBF0,1FBF9
|
666
|
+
20000,2A6DD
|
657
667
|
2A700,2B734
|
658
668
|
2B740,2B81D
|
659
669
|
2B820,2CEA1
|
660
670
|
2CEB0,2EBE0
|
661
671
|
2F800,2FA1D
|
672
|
+
30000,3134A
|
662
673
|
E0001,E0001
|
663
674
|
E0020,E007F
|
664
675
|
E0100,E01EF
|
@@ -44,6 +44,7 @@ AE,AE
|
|
44
44
|
2699,2699
|
45
45
|
269B,269C
|
46
46
|
26A0,26A1
|
47
|
+
26A7,26A7
|
47
48
|
26AA,26AB
|
48
49
|
26B0,26B1
|
49
50
|
26BD,26BE
|
@@ -130,22 +131,22 @@ AE,AE
|
|
130
131
|
1F5FA,1F64F
|
131
132
|
1F680,1F6C5
|
132
133
|
1F6CB,1F6D2
|
133
|
-
1F6D5,
|
134
|
+
1F6D5,1F6D7
|
134
135
|
1F6E0,1F6E5
|
135
136
|
1F6E9,1F6E9
|
136
137
|
1F6EB,1F6EC
|
137
138
|
1F6F0,1F6F0
|
138
|
-
1F6F3,
|
139
|
+
1F6F3,1F6FC
|
139
140
|
1F7E0,1F7EB
|
140
|
-
|
141
|
+
1F90C,1F93A
|
141
142
|
1F93C,1F945
|
142
|
-
1F947,
|
143
|
-
|
144
|
-
1F97A,1F9A2
|
145
|
-
1F9A5,1F9AA
|
146
|
-
1F9AE,1F9CA
|
143
|
+
1F947,1F978
|
144
|
+
1F97A,1F9CB
|
147
145
|
1F9CD,1F9FF
|
148
|
-
1FA70,
|
146
|
+
1FA70,1FA74
|
149
147
|
1FA78,1FA7A
|
150
|
-
1FA80,
|
151
|
-
1FA90,
|
148
|
+
1FA80,1FA86
|
149
|
+
1FA90,1FAA8
|
150
|
+
1FAB0,1FAB6
|
151
|
+
1FAC0,1FAC2
|
152
|
+
1FAD0,1FAD6
|
@@ -6,9 +6,13 @@ class CharacterSet
|
|
6
6
|
new(Array(ranges).flat_map(&:to_a))
|
7
7
|
end
|
8
8
|
|
9
|
-
def of(
|
10
|
-
|
11
|
-
|
9
|
+
def of(*strings)
|
10
|
+
new_set = new
|
11
|
+
strings.each do |str|
|
12
|
+
raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
|
13
|
+
str.codepoints.each { |cp| new_set << cp }
|
14
|
+
end
|
15
|
+
new_set
|
12
16
|
end
|
13
17
|
end
|
14
18
|
|
@@ -22,13 +22,14 @@ class CharacterSet
|
|
22
22
|
|
23
23
|
# Allow some methods to take an Enum just as well as another CharacterSet.
|
24
24
|
# Tested by ruby-spec.
|
25
|
-
%w[& + - ^ | difference
|
25
|
+
%w[& + - ^ | difference disjoint? intersect? intersection
|
26
|
+
subtract union].each do |method|
|
26
27
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
27
28
|
def #{method}(arg)
|
28
29
|
if arg.is_a?(CharacterSet)
|
29
|
-
super
|
30
|
+
super(arg)
|
30
31
|
elsif arg.respond_to?(:each)
|
31
|
-
super(
|
32
|
+
super(self.class.new(arg.to_a))
|
32
33
|
else
|
33
34
|
raise ArgumentError, 'pass an enumerable'
|
34
35
|
end
|
@@ -36,7 +36,7 @@ class CharacterSet
|
|
36
36
|
end
|
37
37
|
|
38
38
|
def of_expression(expression)
|
39
|
-
ExpressionConverter.convert(expression)
|
39
|
+
ExpressionConverter.convert(expression, self)
|
40
40
|
end
|
41
41
|
|
42
42
|
def require_optional_dependency(name, method)
|
@@ -90,6 +90,14 @@ class CharacterSet
|
|
90
90
|
Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
|
91
91
|
end
|
92
92
|
|
93
|
+
def secure_token(length = 32)
|
94
|
+
CharacterSet.require_optional_dependency('securerandom', __method__)
|
95
|
+
cps = to_a
|
96
|
+
len = cps.count
|
97
|
+
1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
|
98
|
+
end
|
99
|
+
alias random_token secure_token
|
100
|
+
|
93
101
|
def inspect
|
94
102
|
len = length
|
95
103
|
"#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: character_set
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2021-
|
11
|
+
date: 2021-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: sorted_set
|
@@ -100,14 +100,14 @@ dependencies:
|
|
100
100
|
requirements:
|
101
101
|
- - "~>"
|
102
102
|
- !ruby/object:Gem::Version
|
103
|
-
version: '1
|
103
|
+
version: '2.1'
|
104
104
|
type: :development
|
105
105
|
prerelease: false
|
106
106
|
version_requirements: !ruby/object:Gem::Requirement
|
107
107
|
requirements:
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
|
-
version: '1
|
110
|
+
version: '2.1'
|
111
111
|
- !ruby/object:Gem::Dependency
|
112
112
|
name: regexp_property_values
|
113
113
|
requirement: !ruby/object:Gem::Requirement
|
@@ -150,6 +150,20 @@ dependencies:
|
|
150
150
|
- - "~>"
|
151
151
|
- !ruby/object:Gem::Version
|
152
152
|
version: 0.2.12
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: gouteur
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 1.0.0
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 1.0.0
|
153
167
|
- !ruby/object:Gem::Dependency
|
154
168
|
name: rubocop
|
155
169
|
requirement: !ruby/object:Gem::Requirement
|
@@ -173,9 +187,11 @@ extensions:
|
|
173
187
|
extra_rdoc_files: []
|
174
188
|
files:
|
175
189
|
- ".gitattributes"
|
190
|
+
- ".github/workflows/gouteur.yml"
|
176
191
|
- ".github/workflows/lint.yml"
|
177
192
|
- ".github/workflows/tests.yml"
|
178
193
|
- ".gitignore"
|
194
|
+
- ".gouteur.yml"
|
179
195
|
- ".rspec"
|
180
196
|
- ".rubocop.yml"
|
181
197
|
- BENCHMARK.md
|
@@ -253,7 +269,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
253
269
|
- !ruby/object:Gem::Version
|
254
270
|
version: '0'
|
255
271
|
requirements: []
|
256
|
-
rubygems_version: 3.
|
272
|
+
rubygems_version: 3.3.0.dev
|
257
273
|
signing_key:
|
258
274
|
specification_version: 4
|
259
275
|
summary: Build, read, write and compare sets of Unicode codepoints.
|