character_set 1.7.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/gouteur.yml +1 -1
- data/.github/workflows/lint.yml +1 -1
- data/.github/workflows/tests.yml +3 -1
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile +7 -6
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/character_set/character_set.c +13 -0
- data/lib/character_set/core_ext/regexp_ext.rb +8 -0
- data/lib/character_set/expression_converter.rb +37 -54
- data/lib/character_set/ruby_fallback/character_set_methods.rb +0 -4
- data/lib/character_set/ruby_fallback/set_methods.rb +3 -4
- data/lib/character_set/ruby_fallback/vendored_set_classes.rb +325 -432
- data/lib/character_set/ruby_fallback.rb +17 -1
- data/lib/character_set/set_method_adapters.rb +1 -1
- data/lib/character_set/shared_methods.rb +5 -1
- data/lib/character_set/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ebb6792f685df02534f1ef04a92d7f0c5fdcb482e5aaa4856d7a39726e17f007
|
4
|
+
data.tar.gz: c6630aab9b6506c46a970ba83c257cd753f8f76760b6ce8d2639f51efba83eeb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c773a0546d05939d0b295e50355c6efe870a1ed74901d63c24097ff598d4a43bcd00ce2d03fb492a48fd9c03968a79ee78b789d92836843d6621dca3e8f313c
|
7
|
+
data.tar.gz: 560d3c3aa3f7e4daac3b6d2c89fb9dd6840777fa4d5896fb33564023ef745d81a7e4d0e51fe0ba42f6cd4504bc0b088657cd4ef1ab15d213aa1bb096ba404542
|
data/.github/workflows/lint.yml
CHANGED
data/.github/workflows/tests.yml
CHANGED
@@ -12,7 +12,7 @@ jobs:
|
|
12
12
|
|
13
13
|
strategy:
|
14
14
|
matrix:
|
15
|
-
ruby: [ '2.
|
15
|
+
ruby: [ '2.4', '2.7', '3.0', '3.1', '3.2', '3.3', 'ruby-head', 'jruby-head' ]
|
16
16
|
|
17
17
|
steps:
|
18
18
|
- uses: actions/checkout@v2
|
@@ -24,3 +24,5 @@ jobs:
|
|
24
24
|
run: bundle install --jobs 4
|
25
25
|
- name: Test with Rake
|
26
26
|
run: bundle exec rake
|
27
|
+
- uses: codecov/codecov-action@v3
|
28
|
+
if: matrix.ruby == '3.2'
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [1.8.0] - 2024-01-07
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
- support for `#<=>` and `#join`, which were added to `set` in the meantime
|
14
|
+
- support for getting the (overall) character set of a Regexp with multiple expressions
|
15
|
+
- support for global and local case-insensitivity in Regexp inputs
|
16
|
+
- `Regexp#{covered_by_character_set?,uses_character_set?}` methods (if core ext is used)
|
17
|
+
|
9
18
|
## [1.7.0] - 2023-05-12
|
10
19
|
|
11
20
|
### Added
|
data/Gemfile
CHANGED
@@ -7,14 +7,15 @@ gemspec
|
|
7
7
|
|
8
8
|
gem 'benchmark-ips', '~> 2.7'
|
9
9
|
gem 'get_process_mem', '~> 0.2.3'
|
10
|
-
gem 'rake', '~> 13.
|
10
|
+
gem 'rake', '~> 13.1'
|
11
11
|
gem 'rake-compiler', '~> 1.1'
|
12
12
|
gem 'range_compressor', '~> 1.0'
|
13
|
-
gem 'regexp_parser', '~> 2.
|
14
|
-
gem 'regexp_property_values', '~> 1.
|
13
|
+
gem 'regexp_parser', '~> 2.9'
|
14
|
+
gem 'regexp_property_values', '~> 1.5'
|
15
15
|
gem 'rspec', '~> 3.8'
|
16
|
-
|
17
|
-
|
16
|
+
gem 'warning', '~> 1.3'
|
17
|
+
if RUBY_VERSION.to_f >= 3.0
|
18
18
|
gem 'gouteur', '~> 1.0.0'
|
19
|
-
gem 'rubocop', '~> 1.
|
19
|
+
gem 'rubocop', '~> 1.59'
|
20
|
+
gem 'simplecov-cobertura', require: false
|
20
21
|
end
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[](http://badge.fury.io/rb/character_set)
|
4
4
|
[](https://github.com/jaynetics/character_set/actions)
|
5
5
|
[](https://github.com/jaynetics/character_set/actions)
|
6
|
-
[](https://codecov.io/gh/jaynetics/character_set)
|
7
7
|
|
8
8
|
This is a C-extended Ruby gem to work with sets of Unicode codepoints.
|
9
9
|
|
@@ -43,7 +43,7 @@ CharacterSet.parse('[a-c]')
|
|
43
43
|
CharacterSet.parse('\U00000061-\U00000063')
|
44
44
|
```
|
45
45
|
|
46
|
-
If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/jaynetics/regexp_property_values) are installed, `Regexp` and unicode property names can also be read.
|
46
|
+
If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/jaynetics/regexp_property_values) are installed, `Regexp` instances and unicode property names can also be read.
|
47
47
|
|
48
48
|
```ruby
|
49
49
|
CharacterSet.of(/./) # => #<CharacterSet (size: 1112064)>
|
@@ -675,6 +675,18 @@ cs_method_proper_superset_p(VALUE self, VALUE other)
|
|
675
675
|
return (is_superset && is_proper) ? Qtrue : Qfalse;
|
676
676
|
}
|
677
677
|
|
678
|
+
static VALUE
|
679
|
+
cs_method_spaceship_operator(VALUE self, VALUE other)
|
680
|
+
{
|
681
|
+
if (cs_method_eql_p(self, other))
|
682
|
+
return INT2FIX(0);
|
683
|
+
if (cs_method_proper_subset_p(self, other))
|
684
|
+
return INT2FIX(-1);
|
685
|
+
if (cs_method_proper_superset_p(self, other))
|
686
|
+
return INT2FIX(1);
|
687
|
+
return Qnil;
|
688
|
+
}
|
689
|
+
|
678
690
|
// *******************************
|
679
691
|
// `CharacterSet`-specific methods
|
680
692
|
// *******************************
|
@@ -1324,6 +1336,7 @@ void Init_character_set()
|
|
1324
1336
|
rb_define_method(cs, ">=", cs_method_superset_p, 1);
|
1325
1337
|
rb_define_method(cs, "proper_superset?", cs_method_proper_superset_p, 1);
|
1326
1338
|
rb_define_method(cs, ">", cs_method_proper_superset_p, 1);
|
1339
|
+
rb_define_method(cs, "<=>", cs_method_spaceship_operator, 1);
|
1327
1340
|
|
1328
1341
|
// `CharacterSet`-specific methods
|
1329
1342
|
|
@@ -4,6 +4,14 @@ class CharacterSet
|
|
4
4
|
def character_set
|
5
5
|
CharacterSet.of_regexp(self)
|
6
6
|
end
|
7
|
+
|
8
|
+
def covered_by_character_set?(other)
|
9
|
+
other.superset?(character_set)
|
10
|
+
end
|
11
|
+
|
12
|
+
def uses_character_set?(other)
|
13
|
+
other.intersect?(character_set)
|
14
|
+
end
|
7
15
|
end
|
8
16
|
end
|
9
17
|
end
|
@@ -4,86 +4,61 @@ class CharacterSet
|
|
4
4
|
|
5
5
|
Error = Class.new(ArgumentError)
|
6
6
|
|
7
|
-
def convert(expression, to = CharacterSet)
|
7
|
+
def convert(expression, to = CharacterSet, acc = [])
|
8
8
|
CharacterSet.require_optional_dependency('regexp_parser', __method__)
|
9
9
|
|
10
10
|
case expression
|
11
|
-
when Regexp::Expression::Root
|
12
|
-
if expression.count != 1
|
13
|
-
raise Error, 'Pass a Regexp with exactly one expression, e.g. /[a-z]/'
|
14
|
-
end
|
15
|
-
convert(expression[0], to)
|
16
|
-
|
17
11
|
when Regexp::Expression::CharacterSet
|
18
|
-
content = expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
19
|
-
content
|
20
|
-
expression.negative? ? content.inversion : content
|
12
|
+
content = expression.map { |subexp| convert(subexp, to) }.reduce(:+) || to[]
|
13
|
+
acc << (expression.negative? ? content.inversion : content)
|
21
14
|
|
22
15
|
when Regexp::Expression::CharacterSet::Intersection
|
23
|
-
expression.map { |subexp| convert(subexp, to) }.reduce(:&)
|
24
|
-
|
25
|
-
when Regexp::Expression::CharacterSet::IntersectedSequence
|
26
|
-
expression.map { |subexp| convert(subexp, to) }.reduce(:+) || to[]
|
16
|
+
acc << expression.map { |subexp| convert(subexp, to) }.reduce(:&)
|
27
17
|
|
28
18
|
when Regexp::Expression::CharacterSet::Range
|
29
19
|
start, finish = expression.map { |subexp| convert(subexp, to) }
|
30
|
-
to.new((start.min)..(finish.max))
|
20
|
+
acc << to.new((start.min)..(finish.max))
|
21
|
+
|
22
|
+
when Regexp::Expression::Subexpression # root, group, alternation, etc.
|
23
|
+
expression.each { |subexp| convert(subexp, to, acc) }
|
31
24
|
|
32
25
|
when Regexp::Expression::CharacterType::Any
|
33
|
-
to.unicode
|
26
|
+
acc << to.unicode
|
34
27
|
|
35
28
|
when Regexp::Expression::CharacterType::Base
|
36
29
|
/(?<negative>non)?(?<base_name>.+)/ =~ expression.token
|
37
30
|
content =
|
38
31
|
if expression.unicode_classes?
|
39
|
-
# in u-mode, type shortcuts match the same as \p{<long type name>}
|
40
|
-
|
32
|
+
# in u-mode, most type shortcuts match the same as \p{<long type name>}
|
33
|
+
if base_name == 'linebreak'
|
34
|
+
to.from_ranges(10..13, 133..133, 8232..8233)
|
35
|
+
else
|
36
|
+
to.of_property(base_name)
|
37
|
+
end
|
41
38
|
else
|
42
39
|
# in normal mode, types match only ascii chars
|
43
40
|
case base_name.to_sym
|
44
|
-
when :digit
|
45
|
-
when :hex
|
46
|
-
when :
|
47
|
-
when :
|
41
|
+
when :digit then to.from_ranges(48..57)
|
42
|
+
when :hex then to.from_ranges(48..57, 65..70, 97..102)
|
43
|
+
when :linebreak then to.from_ranges(10..13)
|
44
|
+
when :space then to.from_ranges(9..13, 32..32)
|
45
|
+
when :word then to.from_ranges(48..57, 65..90, 95..95, 97..122)
|
48
46
|
else raise Error, "Unsupported CharacterType #{base_name}"
|
49
47
|
end
|
50
48
|
end
|
51
|
-
negative ? content.inversion : content
|
49
|
+
acc << (negative ? content.inversion : content)
|
52
50
|
|
53
51
|
when Regexp::Expression::EscapeSequence::CodepointList
|
54
|
-
to.new(expression.codepoints)
|
52
|
+
content = to.new(expression.codepoints)
|
53
|
+
acc << (expression.i? ? content.case_insensitive : content)
|
55
54
|
|
56
55
|
when Regexp::Expression::EscapeSequence::Base
|
57
|
-
to[expression.codepoint]
|
58
|
-
|
59
|
-
when Regexp::Expression::Group::Capture,
|
60
|
-
Regexp::Expression::Group::Passive,
|
61
|
-
Regexp::Expression::Group::Named,
|
62
|
-
Regexp::Expression::Group::Atomic,
|
63
|
-
Regexp::Expression::Group::Options
|
64
|
-
case expression.count
|
65
|
-
when 0 then to[]
|
66
|
-
when 1 then convert(expression.first, to)
|
67
|
-
else
|
68
|
-
raise Error, 'Groups must contain exactly one expression, e.g. ([a-z])'
|
69
|
-
end
|
70
|
-
|
71
|
-
when Regexp::Expression::Alternation # rubocop:disable Lint/DuplicateBranch
|
72
|
-
expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
73
|
-
|
74
|
-
when Regexp::Expression::Alternative
|
75
|
-
case expression.count
|
76
|
-
when 0 then to[]
|
77
|
-
when 1 then convert(expression.first, to)
|
78
|
-
else
|
79
|
-
raise Error, 'Alternatives must contain exactly one expression'
|
80
|
-
end
|
56
|
+
content = to[expression.codepoint]
|
57
|
+
acc << (expression.i? ? content.case_insensitive : content)
|
81
58
|
|
82
59
|
when Regexp::Expression::Literal
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
to[expression.text.ord]
|
60
|
+
content = to[*expression.text.chars]
|
61
|
+
acc << (expression.i? ? content.case_insensitive : content)
|
87
62
|
|
88
63
|
when Regexp::Expression::UnicodeProperty::Base,
|
89
64
|
Regexp::Expression::PosixClass
|
@@ -91,14 +66,22 @@ class CharacterSet
|
|
91
66
|
if expression.type == :posixclass && expression.ascii_classes?
|
92
67
|
content = content.ascii_part
|
93
68
|
end
|
94
|
-
expression.negative? ? content.inversion : content
|
69
|
+
acc << (expression.negative? ? content.inversion : content)
|
70
|
+
|
71
|
+
when Regexp::Expression::Anchor::Base,
|
72
|
+
Regexp::Expression::Backreference::Base,
|
73
|
+
Regexp::Expression::Keep::Mark,
|
74
|
+
Regexp::Expression::Quantifier
|
75
|
+
# ignore zero-length and repeat expressions
|
95
76
|
|
96
77
|
when Regexp::Expression::Base
|
97
78
|
raise Error, "Unsupported expression class `#{expression.class}`"
|
98
79
|
|
99
80
|
else
|
100
|
-
raise Error,
|
81
|
+
raise Error, 'Pass an expression (result of Regexp::Parser.parse)'
|
101
82
|
end
|
83
|
+
|
84
|
+
acc.reduce(:+) || to[]
|
102
85
|
end
|
103
86
|
end
|
104
87
|
end
|
@@ -122,10 +122,6 @@ class CharacterSet
|
|
122
122
|
raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
|
123
123
|
obj.encode('utf-8')
|
124
124
|
end
|
125
|
-
|
126
|
-
def make_new_str(original, &block)
|
127
|
-
utf8_str!(original).each_codepoint.with_object('', &block)
|
128
|
-
end
|
129
125
|
end
|
130
126
|
end
|
131
127
|
end
|
@@ -11,7 +11,7 @@ class CharacterSet
|
|
11
11
|
RUBY
|
12
12
|
end
|
13
13
|
|
14
|
-
%i[< <= > >= === disjoint? include? intersect? member?
|
14
|
+
%i[< <= <=> > >= === disjoint? include? intersect? member?
|
15
15
|
proper_subset? proper_superset? subset? superset?].each do |mthd|
|
16
16
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
17
17
|
def #{mthd}(enum, &block)
|
@@ -23,9 +23,8 @@ class CharacterSet
|
|
23
23
|
RUBY
|
24
24
|
end
|
25
25
|
|
26
|
-
%i[<< add add? clear
|
27
|
-
|
28
|
-
select! subtract].each do |mthd|
|
26
|
+
%i[<< add add? clear delete delete? delete_if each filter! keep_if
|
27
|
+
reject! select! subtract].each do |mthd|
|
29
28
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
30
29
|
def #{mthd}(*args, &block)
|
31
30
|
result = @__set.#{mthd}(*args, &block)
|
@@ -1,492 +1,385 @@
|
|
1
|
-
# set
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
class CharacterSet
|
11
|
-
module RubyFallback
|
12
|
-
if RUBY_PLATFORM[/java/i]
|
13
|
-
# Vendoring is not needed for JRuby which has sorted_set in the stdlib.
|
14
|
-
require 'set'
|
15
|
-
|
16
|
-
Set = ::Set
|
17
|
-
SortedSet = ::SortedSet
|
18
|
-
else
|
19
|
-
# set, vendored from https://github.com/ruby/set/blob/master/lib/set.rb,
|
20
|
-
# with comments removed and linted.
|
21
|
-
class Set
|
22
|
-
include Enumerable
|
23
|
-
|
24
|
-
def self.[](*ary)
|
25
|
-
new(ary)
|
26
|
-
end
|
27
|
-
|
28
|
-
def initialize(enum = nil, &block)
|
29
|
-
@hash = Hash.new(false)
|
30
|
-
|
31
|
-
enum.nil? and return
|
32
|
-
|
33
|
-
if block
|
34
|
-
do_with_enum(enum) { |o| add(block[o]) }
|
35
|
-
else
|
36
|
-
merge(enum)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def compare_by_identity
|
41
|
-
if @hash.respond_to?(:compare_by_identity)
|
42
|
-
@hash.compare_by_identity
|
43
|
-
self
|
44
|
-
else
|
45
|
-
raise NotImplementedError, "#{self.class.name}\##{__method__} is not implemented"
|
46
|
-
end
|
47
|
-
end
|
1
|
+
# set, vendored from https://github.com/ruby/set/blob/master/lib/set.rb,
|
2
|
+
# with comments removed and linted.
|
3
|
+
class CharacterSet::RubyFallback::Set
|
4
|
+
Set = self
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
def self.[](*ary)
|
8
|
+
new(ary)
|
9
|
+
end
|
48
10
|
|
49
|
-
|
50
|
-
|
51
|
-
end
|
11
|
+
def initialize(enum = nil, &block)
|
12
|
+
@hash = Hash.new(false)
|
52
13
|
|
53
|
-
|
54
|
-
if enum.respond_to?(:each_entry)
|
55
|
-
enum.each_entry(&block) if block
|
56
|
-
elsif enum.respond_to?(:each)
|
57
|
-
enum.each(&block) if block
|
58
|
-
else
|
59
|
-
raise ArgumentError, "value must be enumerable"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
private :do_with_enum
|
63
|
-
|
64
|
-
def initialize_dup(orig)
|
65
|
-
super
|
66
|
-
@hash = orig.instance_variable_get(:@hash).dup
|
67
|
-
end
|
14
|
+
enum.nil? and return
|
68
15
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
def initialize_clone(orig)
|
76
|
-
super
|
77
|
-
@hash = orig.instance_variable_get(:@hash).clone
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def freeze
|
82
|
-
@hash.freeze
|
83
|
-
super
|
84
|
-
end
|
16
|
+
if block
|
17
|
+
do_with_enum(enum) { |o| add(block[o]) }
|
18
|
+
else
|
19
|
+
merge(enum)
|
20
|
+
end
|
21
|
+
end
|
85
22
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
23
|
+
def do_with_enum(enum, &block)
|
24
|
+
if enum.respond_to?(:each_entry)
|
25
|
+
enum.each_entry(&block) if block
|
26
|
+
elsif enum.respond_to?(:each)
|
27
|
+
enum.each(&block) if block
|
28
|
+
else
|
29
|
+
raise ArgumentError, "value must be enumerable"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
private :do_with_enum
|
90
33
|
|
91
|
-
|
92
|
-
|
93
|
-
|
34
|
+
def initialize_dup(orig)
|
35
|
+
super
|
36
|
+
@hash = orig.instance_variable_get(:@hash).dup
|
37
|
+
end
|
94
38
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
39
|
+
if Kernel.instance_method(:initialize_clone).arity != 1
|
40
|
+
def initialize_clone(orig, **options)
|
41
|
+
super
|
42
|
+
@hash = orig.instance_variable_get(:@hash).clone(**options)
|
43
|
+
end
|
44
|
+
else
|
45
|
+
def initialize_clone(orig)
|
46
|
+
super
|
47
|
+
@hash = orig.instance_variable_get(:@hash).clone
|
48
|
+
end
|
49
|
+
end
|
99
50
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
else
|
105
|
-
do_with_enum(enum)
|
106
|
-
clear
|
107
|
-
merge(enum)
|
108
|
-
end
|
109
|
-
end
|
51
|
+
def freeze
|
52
|
+
@hash.freeze
|
53
|
+
super
|
54
|
+
end
|
110
55
|
|
111
|
-
|
112
|
-
|
113
|
-
|
56
|
+
def size
|
57
|
+
@hash.size
|
58
|
+
end
|
59
|
+
alias length size
|
114
60
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
61
|
+
def empty?
|
62
|
+
@hash.empty?
|
63
|
+
end
|
119
64
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
raise ArgumentError, "tried to flatten recursive Set"
|
125
|
-
end
|
126
|
-
|
127
|
-
seen.add(e_id)
|
128
|
-
flatten_merge(e, seen)
|
129
|
-
seen.delete(e_id)
|
130
|
-
else
|
131
|
-
add(e)
|
132
|
-
end
|
133
|
-
}
|
134
|
-
|
135
|
-
self
|
136
|
-
end
|
137
|
-
protected :flatten_merge
|
65
|
+
def clear
|
66
|
+
@hash.clear
|
67
|
+
self
|
68
|
+
end
|
138
69
|
|
139
|
-
|
140
|
-
|
141
|
-
|
70
|
+
def to_a
|
71
|
+
@hash.keys
|
72
|
+
end
|
142
73
|
|
143
|
-
|
144
|
-
|
145
|
-
|
74
|
+
def include?(o)
|
75
|
+
@hash[o]
|
76
|
+
end
|
77
|
+
alias member? include?
|
78
|
+
|
79
|
+
def superset?(set)
|
80
|
+
case
|
81
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>=)
|
82
|
+
@hash >= set.instance_variable_get(:@hash)
|
83
|
+
when set.is_a?(Set)
|
84
|
+
size >= set.size && set.all? { |o| include?(o) }
|
85
|
+
else
|
86
|
+
raise ArgumentError, "value must be a set"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
alias >= superset?
|
90
|
+
|
91
|
+
def proper_superset?(set)
|
92
|
+
case
|
93
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>)
|
94
|
+
@hash > set.instance_variable_get(:@hash)
|
95
|
+
when set.is_a?(Set)
|
96
|
+
size > set.size && set.all? { |o| include?(o) }
|
97
|
+
else
|
98
|
+
raise ArgumentError, "value must be a set"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
alias > proper_superset?
|
102
|
+
|
103
|
+
def subset?(set)
|
104
|
+
case
|
105
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<=)
|
106
|
+
@hash <= set.instance_variable_get(:@hash)
|
107
|
+
when set.is_a?(Set)
|
108
|
+
size <= set.size && all? { |o| set.include?(o) }
|
109
|
+
else
|
110
|
+
raise ArgumentError, "value must be a set"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
alias <= subset?
|
114
|
+
|
115
|
+
def proper_subset?(set)
|
116
|
+
case
|
117
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<)
|
118
|
+
@hash < set.instance_variable_get(:@hash)
|
119
|
+
when set.is_a?(Set)
|
120
|
+
size < set.size && all? { |o| set.include?(o) }
|
121
|
+
else
|
122
|
+
raise ArgumentError, "value must be a set"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
alias < proper_subset?
|
146
126
|
|
147
|
-
|
148
|
-
|
149
|
-
end
|
150
|
-
alias member? include?
|
151
|
-
|
152
|
-
def superset?(set)
|
153
|
-
case
|
154
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:>=)
|
155
|
-
@hash >= set.instance_variable_get(:@hash)
|
156
|
-
when set.is_a?(Set)
|
157
|
-
size >= set.size && set.all? { |o| include?(o) }
|
158
|
-
else
|
159
|
-
raise ArgumentError, "value must be a set"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
alias >= superset?
|
163
|
-
|
164
|
-
def proper_superset?(set)
|
165
|
-
case
|
166
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:>)
|
167
|
-
@hash > set.instance_variable_get(:@hash)
|
168
|
-
when set.is_a?(Set)
|
169
|
-
size > set.size && set.all? { |o| include?(o) }
|
170
|
-
else
|
171
|
-
raise ArgumentError, "value must be a set"
|
172
|
-
end
|
173
|
-
end
|
174
|
-
alias > proper_superset?
|
175
|
-
|
176
|
-
def subset?(set)
|
177
|
-
case
|
178
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:<=)
|
179
|
-
@hash <= set.instance_variable_get(:@hash)
|
180
|
-
when set.is_a?(Set)
|
181
|
-
size <= set.size && all? { |o| set.include?(o) }
|
182
|
-
else
|
183
|
-
raise ArgumentError, "value must be a set"
|
184
|
-
end
|
185
|
-
end
|
186
|
-
alias <= subset?
|
187
|
-
|
188
|
-
def proper_subset?(set)
|
189
|
-
case
|
190
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:<)
|
191
|
-
@hash < set.instance_variable_get(:@hash)
|
192
|
-
when set.is_a?(Set)
|
193
|
-
size < set.size && all? { |o| set.include?(o) }
|
194
|
-
else
|
195
|
-
raise ArgumentError, "value must be a set"
|
196
|
-
end
|
197
|
-
end
|
198
|
-
alias < proper_subset?
|
127
|
+
def <=>(set)
|
128
|
+
return unless set.is_a?(Set)
|
199
129
|
|
200
|
-
|
201
|
-
|
130
|
+
case size <=> set.size
|
131
|
+
when -1 then -1 if proper_subset?(set)
|
132
|
+
when +1 then +1 if proper_superset?(set)
|
133
|
+
else 0 if self.==(set)
|
134
|
+
end
|
135
|
+
end
|
202
136
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
137
|
+
def intersect?(set)
|
138
|
+
case set
|
139
|
+
when Set
|
140
|
+
if size < set.size
|
141
|
+
any? { |o| set.include?(o) }
|
142
|
+
else
|
143
|
+
set.any? { |o| include?(o) }
|
144
|
+
end
|
145
|
+
when Enumerable
|
146
|
+
set.any? { |o| include?(o) }
|
147
|
+
else
|
148
|
+
raise ArgumentError, "value must be enumerable"
|
149
|
+
end
|
150
|
+
end
|
209
151
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
if size < set.size
|
214
|
-
any? { |o| set.include?(o) }
|
215
|
-
else
|
216
|
-
set.any? { |o| include?(o) }
|
217
|
-
end
|
218
|
-
when Enumerable
|
219
|
-
set.any? { |o| include?(o) }
|
220
|
-
else
|
221
|
-
raise ArgumentError, "value must be enumerable"
|
222
|
-
end
|
223
|
-
end
|
152
|
+
def disjoint?(set)
|
153
|
+
!intersect?(set)
|
154
|
+
end
|
224
155
|
|
225
|
-
|
226
|
-
|
227
|
-
|
156
|
+
def each(&block)
|
157
|
+
block_given? or return enum_for(__method__) { size }
|
158
|
+
@hash.each_key(&block)
|
159
|
+
self
|
160
|
+
end
|
228
161
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
162
|
+
def add(o)
|
163
|
+
@hash[o] = true
|
164
|
+
self
|
165
|
+
end
|
166
|
+
alias << add
|
234
167
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
end
|
239
|
-
alias << add
|
168
|
+
def add?(o)
|
169
|
+
add(o) unless include?(o)
|
170
|
+
end
|
240
171
|
|
241
|
-
|
242
|
-
|
243
|
-
|
172
|
+
def delete(o)
|
173
|
+
@hash.delete(o)
|
174
|
+
self
|
175
|
+
end
|
244
176
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
end
|
177
|
+
def delete?(o)
|
178
|
+
delete(o) if include?(o)
|
179
|
+
end
|
249
180
|
|
250
|
-
|
251
|
-
|
252
|
-
|
181
|
+
def delete_if
|
182
|
+
block_given? or return enum_for(__method__) { size }
|
183
|
+
select { |o| yield o }.each { |o| @hash.delete(o) }
|
184
|
+
self
|
185
|
+
end
|
253
186
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
187
|
+
def keep_if
|
188
|
+
block_given? or return enum_for(__method__) { size }
|
189
|
+
reject { |o| yield o }.each { |o| @hash.delete(o) }
|
190
|
+
self
|
191
|
+
end
|
259
192
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
193
|
+
def reject!(&block)
|
194
|
+
block_given? or return enum_for(__method__) { size }
|
195
|
+
n = size
|
196
|
+
delete_if(&block)
|
197
|
+
self if size != n
|
198
|
+
end
|
265
199
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
alias map! collect!
|
200
|
+
def select!(&block)
|
201
|
+
block_given? or return enum_for(__method__) { size }
|
202
|
+
n = size
|
203
|
+
keep_if(&block)
|
204
|
+
self if size != n
|
205
|
+
end
|
273
206
|
|
274
|
-
|
275
|
-
block_given? or return enum_for(__method__) { size }
|
276
|
-
n = size
|
277
|
-
delete_if(&block)
|
278
|
-
self if size != n
|
279
|
-
end
|
207
|
+
alias filter! select!
|
280
208
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
209
|
+
def merge(*enums, **_rest)
|
210
|
+
enums.each do |enum|
|
211
|
+
if enum.instance_of?(self.class)
|
212
|
+
@hash.update(enum.instance_variable_get(:@hash))
|
213
|
+
else
|
214
|
+
do_with_enum(enum) { |o| add(o) }
|
215
|
+
end
|
216
|
+
end
|
287
217
|
|
288
|
-
|
218
|
+
self
|
219
|
+
end
|
289
220
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
else
|
295
|
-
do_with_enum(enum) { |o| add(o) }
|
296
|
-
end
|
297
|
-
end
|
221
|
+
def subtract(enum)
|
222
|
+
do_with_enum(enum) { |o| delete(o) }
|
223
|
+
self
|
224
|
+
end
|
298
225
|
|
299
|
-
|
300
|
-
|
226
|
+
def |(enum)
|
227
|
+
dup.merge(enum)
|
228
|
+
end
|
229
|
+
alias + |
|
230
|
+
alias union |
|
301
231
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
232
|
+
def -(enum)
|
233
|
+
dup.subtract(enum)
|
234
|
+
end
|
235
|
+
alias difference -
|
236
|
+
|
237
|
+
def &(enum)
|
238
|
+
n = self.class.new
|
239
|
+
if enum.is_a?(Set)
|
240
|
+
if enum.size > size
|
241
|
+
each { |o| n.add(o) if enum.include?(o) }
|
242
|
+
else
|
243
|
+
enum.each { |o| n.add(o) if include?(o) }
|
244
|
+
end
|
245
|
+
else
|
246
|
+
do_with_enum(enum) { |o| n.add(o) if include?(o) }
|
247
|
+
end
|
248
|
+
n
|
249
|
+
end
|
250
|
+
alias intersection &
|
306
251
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
252
|
+
def ^(enum)
|
253
|
+
n = Set.new(enum)
|
254
|
+
each { |o| n.add(o) unless n.delete?(o) }
|
255
|
+
n
|
256
|
+
end
|
312
257
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
enum.each { |o| n.add(o) if include?(o) }
|
325
|
-
end
|
326
|
-
else
|
327
|
-
do_with_enum(enum) { |o| n.add(o) if include?(o) }
|
328
|
-
end
|
329
|
-
n
|
330
|
-
end
|
331
|
-
alias intersection &
|
258
|
+
def ==(other)
|
259
|
+
if self.equal?(other)
|
260
|
+
true
|
261
|
+
elsif other.instance_of?(self.class)
|
262
|
+
@hash == other.instance_variable_get(:@hash)
|
263
|
+
elsif other.is_a?(Set) && self.size == other.size
|
264
|
+
other.all? { |o| @hash.include?(o) }
|
265
|
+
else
|
266
|
+
false
|
267
|
+
end
|
268
|
+
end
|
332
269
|
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
n
|
337
|
-
end
|
270
|
+
def hash
|
271
|
+
@hash.hash
|
272
|
+
end
|
338
273
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
@hash == other.instance_variable_get(:@hash)
|
344
|
-
elsif other.is_a?(Set) && self.size == other.size
|
345
|
-
other.all? { |o| @hash.include?(o) }
|
346
|
-
else
|
347
|
-
false
|
348
|
-
end
|
349
|
-
end
|
274
|
+
def eql?(o)
|
275
|
+
return false unless o.is_a?(Set)
|
276
|
+
@hash.eql?(o.instance_variable_get(:@hash))
|
277
|
+
end
|
350
278
|
|
351
|
-
|
352
|
-
@hash.hash
|
353
|
-
end
|
279
|
+
alias === include?
|
354
280
|
|
355
|
-
|
356
|
-
|
357
|
-
@hash.eql?(o.instance_variable_get(:@hash))
|
358
|
-
end
|
281
|
+
def classify
|
282
|
+
block_given? or return enum_for(__method__) { size }
|
359
283
|
|
360
|
-
|
361
|
-
if @hash.respond_to?(:rehash)
|
362
|
-
@hash.rehash
|
363
|
-
else
|
364
|
-
raise FrozenError, "can't modify frozen #{self.class.name}" if frozen?
|
365
|
-
end
|
366
|
-
self
|
367
|
-
end
|
368
|
-
alias === include?
|
284
|
+
h = {}
|
369
285
|
|
370
|
-
|
371
|
-
|
286
|
+
each { |i|
|
287
|
+
(h[yield(i)] ||= self.class.new).add(i)
|
288
|
+
}
|
372
289
|
|
373
|
-
|
290
|
+
h
|
291
|
+
end
|
374
292
|
|
375
|
-
|
376
|
-
|
377
|
-
}
|
293
|
+
def divide(&func)
|
294
|
+
func or return enum_for(__method__) { size }
|
378
295
|
|
379
|
-
|
380
|
-
|
296
|
+
if func.arity == 2
|
297
|
+
require 'tsort'
|
381
298
|
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
if func.arity == 2
|
386
|
-
require 'tsort'
|
387
|
-
|
388
|
-
class << dig = {}
|
389
|
-
include TSort
|
390
|
-
|
391
|
-
alias tsort_each_node each_key
|
392
|
-
def tsort_each_child(node, &block)
|
393
|
-
fetch(node).each(&block)
|
394
|
-
end
|
395
|
-
end
|
396
|
-
|
397
|
-
each { |u|
|
398
|
-
dig[u] = a = []
|
399
|
-
each{ |v| func.call(u, v) and a << v }
|
400
|
-
}
|
401
|
-
|
402
|
-
set = Set.new()
|
403
|
-
dig.each_strongly_connected_component { |css|
|
404
|
-
set.add(self.class.new(css))
|
405
|
-
}
|
406
|
-
set
|
407
|
-
else
|
408
|
-
Set.new(classify(&func).values)
|
409
|
-
end
|
410
|
-
end
|
299
|
+
class << dig = {}
|
300
|
+
include TSort
|
411
301
|
|
412
|
-
|
413
|
-
|
302
|
+
alias tsort_each_node each_key
|
303
|
+
def tsort_each_child(node, &block)
|
304
|
+
fetch(node).each(&block)
|
414
305
|
end
|
415
306
|
end
|
416
307
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
@keys = nil
|
422
|
-
super
|
423
|
-
end
|
308
|
+
each { |u|
|
309
|
+
dig[u] = a = []
|
310
|
+
each{ |v| func.call(u, v) and a << v }
|
311
|
+
}
|
424
312
|
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
313
|
+
set = Set.new()
|
314
|
+
dig.each_strongly_connected_component { |css|
|
315
|
+
set.add(self.class.new(css))
|
316
|
+
}
|
317
|
+
set
|
318
|
+
else
|
319
|
+
Set.new(classify(&func).values)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
429
323
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
324
|
+
# sorted_set without rbtree dependency, vendored from
|
325
|
+
# https://github.com/ruby/set/blob/72f08c4/lib/set.rb#L731-L800
|
326
|
+
class CharacterSet::RubyFallback::SortedSet < CharacterSet::RubyFallback::Set
|
327
|
+
def initialize(*args)
|
328
|
+
@keys = nil
|
329
|
+
super
|
330
|
+
end
|
434
331
|
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
end
|
440
|
-
alias << add
|
332
|
+
def clear
|
333
|
+
@keys = nil
|
334
|
+
super
|
335
|
+
end
|
441
336
|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
337
|
+
def add(o)
|
338
|
+
@keys = nil
|
339
|
+
super
|
340
|
+
end
|
341
|
+
alias << add
|
447
342
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
self
|
454
|
-
end
|
343
|
+
def delete(o)
|
344
|
+
@keys = nil
|
345
|
+
@hash.delete(o)
|
346
|
+
self
|
347
|
+
end
|
455
348
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
349
|
+
def delete_if
|
350
|
+
block_given? or return enum_for(__method__) { size }
|
351
|
+
n = @hash.size
|
352
|
+
super
|
353
|
+
@keys = nil if @hash.size != n
|
354
|
+
self
|
355
|
+
end
|
463
356
|
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
357
|
+
def keep_if
|
358
|
+
block_given? or return enum_for(__method__) { size }
|
359
|
+
n = @hash.size
|
360
|
+
super
|
361
|
+
@keys = nil if @hash.size != n
|
362
|
+
self
|
363
|
+
end
|
468
364
|
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
end
|
365
|
+
def merge(enum)
|
366
|
+
@keys = nil
|
367
|
+
super
|
368
|
+
end
|
474
369
|
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
370
|
+
def each(&block)
|
371
|
+
block or return enum_for(__method__) { size }
|
372
|
+
to_a.each(&block)
|
373
|
+
self
|
374
|
+
end
|
479
375
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
376
|
+
def to_a
|
377
|
+
(@keys = @hash.keys).sort! unless @keys
|
378
|
+
@keys.dup
|
379
|
+
end
|
484
380
|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
end
|
489
|
-
end
|
490
|
-
end
|
381
|
+
def freeze
|
382
|
+
to_a
|
383
|
+
super
|
491
384
|
end
|
492
385
|
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'character_set/ruby_fallback/set_methods'
|
2
2
|
require 'character_set/ruby_fallback/character_set_methods'
|
3
|
-
require 'character_set/ruby_fallback/vendored_set_classes'
|
4
3
|
|
5
4
|
class CharacterSet
|
6
5
|
module RubyFallback
|
@@ -17,3 +16,20 @@ class CharacterSet
|
|
17
16
|
end
|
18
17
|
end
|
19
18
|
end
|
19
|
+
|
20
|
+
if RUBY_PLATFORM[/java/i]
|
21
|
+
# JRuby has sorted_set in the stdlib.
|
22
|
+
require 'set'
|
23
|
+
CharacterSet::RubyFallback::Set = ::Set
|
24
|
+
CharacterSet::RubyFallback::SortedSet = ::SortedSet
|
25
|
+
else
|
26
|
+
# For other rubies, set/sorted_set are vendored due to dependency issues:
|
27
|
+
#
|
28
|
+
# - issues with default vs. installed gems such as [#2]
|
29
|
+
# - issues with the sorted_set dependency rb_tree
|
30
|
+
# - long-standing issues in recent versions of sorted_set
|
31
|
+
#
|
32
|
+
# The RubyFallback, and thus these set classes, are only used for testing,
|
33
|
+
# and for exotic rubies which use neither C nor Java.
|
34
|
+
require 'character_set/ruby_fallback/vendored_set_classes'
|
35
|
+
end
|
@@ -22,7 +22,7 @@ class CharacterSet
|
|
22
22
|
|
23
23
|
# Allow some methods to take an Enum just as well as another CharacterSet.
|
24
24
|
# Tested by ruby-spec.
|
25
|
-
%w[& + - ^ | difference disjoint? intersect? intersection
|
25
|
+
%w[& + - ^ | <=> difference disjoint? intersect? intersection
|
26
26
|
subtract union].each do |method|
|
27
27
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
28
28
|
def #{method}(arg)
|
@@ -165,9 +165,13 @@ class CharacterSet
|
|
165
165
|
end
|
166
166
|
|
167
167
|
def divide(&func)
|
168
|
-
require 'character_set/ruby_fallback
|
168
|
+
require 'character_set/ruby_fallback'
|
169
169
|
CharacterSet::RubyFallback::Set.new(to_a).divide(&func)
|
170
170
|
end
|
171
|
+
|
172
|
+
def join(separator = '')
|
173
|
+
to_a(true).join(separator)
|
174
|
+
end
|
171
175
|
RUBY
|
172
176
|
|
173
177
|
# CharacterSet-specific section methods
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: character_set
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -106,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
106
|
- !ruby/object:Gem::Version
|
107
107
|
version: '0'
|
108
108
|
requirements: []
|
109
|
-
rubygems_version: 3.
|
109
|
+
rubygems_version: 3.5.0.dev
|
110
110
|
signing_key:
|
111
111
|
specification_version: 4
|
112
112
|
summary: Build, read, write and compare sets of Unicode codepoints.
|