character_set 1.7.0 → 1.8.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/gouteur.yml +1 -1
- data/.github/workflows/lint.yml +1 -1
- data/.github/workflows/tests.yml +3 -1
- data/.rubocop.yml +3 -0
- data/CHANGELOG.md +9 -0
- data/Gemfile +7 -6
- data/LICENSE.txt +1 -1
- data/README.md +2 -2
- data/ext/character_set/character_set.c +13 -0
- data/lib/character_set/core_ext/regexp_ext.rb +8 -0
- data/lib/character_set/expression_converter.rb +37 -54
- data/lib/character_set/ruby_fallback/character_set_methods.rb +0 -4
- data/lib/character_set/ruby_fallback/set_methods.rb +3 -4
- data/lib/character_set/ruby_fallback/vendored_set_classes.rb +325 -432
- data/lib/character_set/ruby_fallback.rb +17 -1
- data/lib/character_set/set_method_adapters.rb +1 -1
- data/lib/character_set/shared_methods.rb +5 -1
- data/lib/character_set/version.rb +1 -1
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ebb6792f685df02534f1ef04a92d7f0c5fdcb482e5aaa4856d7a39726e17f007
|
4
|
+
data.tar.gz: c6630aab9b6506c46a970ba83c257cd753f8f76760b6ce8d2639f51efba83eeb
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 4c773a0546d05939d0b295e50355c6efe870a1ed74901d63c24097ff598d4a43bcd00ce2d03fb492a48fd9c03968a79ee78b789d92836843d6621dca3e8f313c
|
7
|
+
data.tar.gz: 560d3c3aa3f7e4daac3b6d2c89fb9dd6840777fa4d5896fb33564023ef745d81a7e4d0e51fe0ba42f6cd4504bc0b088657cd4ef1ab15d213aa1bb096ba404542
|
data/.github/workflows/lint.yml
CHANGED
data/.github/workflows/tests.yml
CHANGED
@@ -12,7 +12,7 @@ jobs:
|
|
12
12
|
|
13
13
|
strategy:
|
14
14
|
matrix:
|
15
|
-
ruby: [ '2.
|
15
|
+
ruby: [ '2.4', '2.7', '3.0', '3.1', '3.2', '3.3', 'ruby-head', 'jruby-head' ]
|
16
16
|
|
17
17
|
steps:
|
18
18
|
- uses: actions/checkout@v2
|
@@ -24,3 +24,5 @@ jobs:
|
|
24
24
|
run: bundle install --jobs 4
|
25
25
|
- name: Test with Rake
|
26
26
|
run: bundle exec rake
|
27
|
+
- uses: codecov/codecov-action@v3
|
28
|
+
if: matrix.ruby == '3.2'
|
data/.rubocop.yml
CHANGED
data/CHANGELOG.md
CHANGED
@@ -6,6 +6,15 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
|
|
6
6
|
|
7
7
|
## [Unreleased]
|
8
8
|
|
9
|
+
## [1.8.0] - 2024-01-07
|
10
|
+
|
11
|
+
### Added
|
12
|
+
|
13
|
+
- support for `#<=>` and `#join`, which were added to `set` in the meantime
|
14
|
+
- support for getting the (overall) character set of a Regexp with multiple expressions
|
15
|
+
- support for global and local case-insensitivity in Regexp inputs
|
16
|
+
- `Regexp#{covered_by_character_set?,uses_character_set?}` methods (if core ext is used)
|
17
|
+
|
9
18
|
## [1.7.0] - 2023-05-12
|
10
19
|
|
11
20
|
### Added
|
data/Gemfile
CHANGED
@@ -7,14 +7,15 @@ gemspec
|
|
7
7
|
|
8
8
|
gem 'benchmark-ips', '~> 2.7'
|
9
9
|
gem 'get_process_mem', '~> 0.2.3'
|
10
|
-
gem 'rake', '~> 13.
|
10
|
+
gem 'rake', '~> 13.1'
|
11
11
|
gem 'rake-compiler', '~> 1.1'
|
12
12
|
gem 'range_compressor', '~> 1.0'
|
13
|
-
gem 'regexp_parser', '~> 2.
|
14
|
-
gem 'regexp_property_values', '~> 1.
|
13
|
+
gem 'regexp_parser', '~> 2.9'
|
14
|
+
gem 'regexp_property_values', '~> 1.5'
|
15
15
|
gem 'rspec', '~> 3.8'
|
16
|
-
|
17
|
-
|
16
|
+
gem 'warning', '~> 1.3'
|
17
|
+
if RUBY_VERSION.to_f >= 3.0
|
18
18
|
gem 'gouteur', '~> 1.0.0'
|
19
|
-
gem 'rubocop', '~> 1.
|
19
|
+
gem 'rubocop', '~> 1.59'
|
20
|
+
gem 'simplecov-cobertura', require: false
|
20
21
|
end
|
data/LICENSE.txt
CHANGED
data/README.md
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
[![Gem Version](https://badge.fury.io/rb/character_set.svg)](http://badge.fury.io/rb/character_set)
|
4
4
|
[![Build Status](https://github.com/jaynetics/character_set/workflows/tests/badge.svg)](https://github.com/jaynetics/character_set/actions)
|
5
5
|
[![Build Status](https://github.com/jaynetics/character_set/workflows/gouteur/badge.svg)](https://github.com/jaynetics/character_set/actions)
|
6
|
-
[![
|
6
|
+
[![Coverage](https://codecov.io/gh/jaynetics/character_set/branch/main/graph/badge.svg?token=oY7gcWNbIN)](https://codecov.io/gh/jaynetics/character_set)
|
7
7
|
|
8
8
|
This is a C-extended Ruby gem to work with sets of Unicode codepoints.
|
9
9
|
|
@@ -43,7 +43,7 @@ CharacterSet.parse('[a-c]')
|
|
43
43
|
CharacterSet.parse('\U00000061-\U00000063')
|
44
44
|
```
|
45
45
|
|
46
|
-
If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/jaynetics/regexp_property_values) are installed, `Regexp` and unicode property names can also be read.
|
46
|
+
If the gems [`regexp_parser`](https://github.com/ammar/regexp_parser) and [`regexp_property_values`](https://github.com/jaynetics/regexp_property_values) are installed, `Regexp` instances and unicode property names can also be read.
|
47
47
|
|
48
48
|
```ruby
|
49
49
|
CharacterSet.of(/./) # => #<CharacterSet (size: 1112064)>
|
@@ -675,6 +675,18 @@ cs_method_proper_superset_p(VALUE self, VALUE other)
|
|
675
675
|
return (is_superset && is_proper) ? Qtrue : Qfalse;
|
676
676
|
}
|
677
677
|
|
678
|
+
static VALUE
|
679
|
+
cs_method_spaceship_operator(VALUE self, VALUE other)
|
680
|
+
{
|
681
|
+
if (cs_method_eql_p(self, other))
|
682
|
+
return INT2FIX(0);
|
683
|
+
if (cs_method_proper_subset_p(self, other))
|
684
|
+
return INT2FIX(-1);
|
685
|
+
if (cs_method_proper_superset_p(self, other))
|
686
|
+
return INT2FIX(1);
|
687
|
+
return Qnil;
|
688
|
+
}
|
689
|
+
|
678
690
|
// *******************************
|
679
691
|
// `CharacterSet`-specific methods
|
680
692
|
// *******************************
|
@@ -1324,6 +1336,7 @@ void Init_character_set()
|
|
1324
1336
|
rb_define_method(cs, ">=", cs_method_superset_p, 1);
|
1325
1337
|
rb_define_method(cs, "proper_superset?", cs_method_proper_superset_p, 1);
|
1326
1338
|
rb_define_method(cs, ">", cs_method_proper_superset_p, 1);
|
1339
|
+
rb_define_method(cs, "<=>", cs_method_spaceship_operator, 1);
|
1327
1340
|
|
1328
1341
|
// `CharacterSet`-specific methods
|
1329
1342
|
|
@@ -4,6 +4,14 @@ class CharacterSet
|
|
4
4
|
def character_set
|
5
5
|
CharacterSet.of_regexp(self)
|
6
6
|
end
|
7
|
+
|
8
|
+
def covered_by_character_set?(other)
|
9
|
+
other.superset?(character_set)
|
10
|
+
end
|
11
|
+
|
12
|
+
def uses_character_set?(other)
|
13
|
+
other.intersect?(character_set)
|
14
|
+
end
|
7
15
|
end
|
8
16
|
end
|
9
17
|
end
|
@@ -4,86 +4,61 @@ class CharacterSet
|
|
4
4
|
|
5
5
|
Error = Class.new(ArgumentError)
|
6
6
|
|
7
|
-
def convert(expression, to = CharacterSet)
|
7
|
+
def convert(expression, to = CharacterSet, acc = [])
|
8
8
|
CharacterSet.require_optional_dependency('regexp_parser', __method__)
|
9
9
|
|
10
10
|
case expression
|
11
|
-
when Regexp::Expression::Root
|
12
|
-
if expression.count != 1
|
13
|
-
raise Error, 'Pass a Regexp with exactly one expression, e.g. /[a-z]/'
|
14
|
-
end
|
15
|
-
convert(expression[0], to)
|
16
|
-
|
17
11
|
when Regexp::Expression::CharacterSet
|
18
|
-
content = expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
19
|
-
content
|
20
|
-
expression.negative? ? content.inversion : content
|
12
|
+
content = expression.map { |subexp| convert(subexp, to) }.reduce(:+) || to[]
|
13
|
+
acc << (expression.negative? ? content.inversion : content)
|
21
14
|
|
22
15
|
when Regexp::Expression::CharacterSet::Intersection
|
23
|
-
expression.map { |subexp| convert(subexp, to) }.reduce(:&)
|
24
|
-
|
25
|
-
when Regexp::Expression::CharacterSet::IntersectedSequence
|
26
|
-
expression.map { |subexp| convert(subexp, to) }.reduce(:+) || to[]
|
16
|
+
acc << expression.map { |subexp| convert(subexp, to) }.reduce(:&)
|
27
17
|
|
28
18
|
when Regexp::Expression::CharacterSet::Range
|
29
19
|
start, finish = expression.map { |subexp| convert(subexp, to) }
|
30
|
-
to.new((start.min)..(finish.max))
|
20
|
+
acc << to.new((start.min)..(finish.max))
|
21
|
+
|
22
|
+
when Regexp::Expression::Subexpression # root, group, alternation, etc.
|
23
|
+
expression.each { |subexp| convert(subexp, to, acc) }
|
31
24
|
|
32
25
|
when Regexp::Expression::CharacterType::Any
|
33
|
-
to.unicode
|
26
|
+
acc << to.unicode
|
34
27
|
|
35
28
|
when Regexp::Expression::CharacterType::Base
|
36
29
|
/(?<negative>non)?(?<base_name>.+)/ =~ expression.token
|
37
30
|
content =
|
38
31
|
if expression.unicode_classes?
|
39
|
-
# in u-mode, type shortcuts match the same as \p{<long type name>}
|
40
|
-
|
32
|
+
# in u-mode, most type shortcuts match the same as \p{<long type name>}
|
33
|
+
if base_name == 'linebreak'
|
34
|
+
to.from_ranges(10..13, 133..133, 8232..8233)
|
35
|
+
else
|
36
|
+
to.of_property(base_name)
|
37
|
+
end
|
41
38
|
else
|
42
39
|
# in normal mode, types match only ascii chars
|
43
40
|
case base_name.to_sym
|
44
|
-
when :digit
|
45
|
-
when :hex
|
46
|
-
when :
|
47
|
-
when :
|
41
|
+
when :digit then to.from_ranges(48..57)
|
42
|
+
when :hex then to.from_ranges(48..57, 65..70, 97..102)
|
43
|
+
when :linebreak then to.from_ranges(10..13)
|
44
|
+
when :space then to.from_ranges(9..13, 32..32)
|
45
|
+
when :word then to.from_ranges(48..57, 65..90, 95..95, 97..122)
|
48
46
|
else raise Error, "Unsupported CharacterType #{base_name}"
|
49
47
|
end
|
50
48
|
end
|
51
|
-
negative ? content.inversion : content
|
49
|
+
acc << (negative ? content.inversion : content)
|
52
50
|
|
53
51
|
when Regexp::Expression::EscapeSequence::CodepointList
|
54
|
-
to.new(expression.codepoints)
|
52
|
+
content = to.new(expression.codepoints)
|
53
|
+
acc << (expression.i? ? content.case_insensitive : content)
|
55
54
|
|
56
55
|
when Regexp::Expression::EscapeSequence::Base
|
57
|
-
to[expression.codepoint]
|
58
|
-
|
59
|
-
when Regexp::Expression::Group::Capture,
|
60
|
-
Regexp::Expression::Group::Passive,
|
61
|
-
Regexp::Expression::Group::Named,
|
62
|
-
Regexp::Expression::Group::Atomic,
|
63
|
-
Regexp::Expression::Group::Options
|
64
|
-
case expression.count
|
65
|
-
when 0 then to[]
|
66
|
-
when 1 then convert(expression.first, to)
|
67
|
-
else
|
68
|
-
raise Error, 'Groups must contain exactly one expression, e.g. ([a-z])'
|
69
|
-
end
|
70
|
-
|
71
|
-
when Regexp::Expression::Alternation # rubocop:disable Lint/DuplicateBranch
|
72
|
-
expression.map { |subexp| convert(subexp, to) }.reduce(:+)
|
73
|
-
|
74
|
-
when Regexp::Expression::Alternative
|
75
|
-
case expression.count
|
76
|
-
when 0 then to[]
|
77
|
-
when 1 then convert(expression.first, to)
|
78
|
-
else
|
79
|
-
raise Error, 'Alternatives must contain exactly one expression'
|
80
|
-
end
|
56
|
+
content = to[expression.codepoint]
|
57
|
+
acc << (expression.i? ? content.case_insensitive : content)
|
81
58
|
|
82
59
|
when Regexp::Expression::Literal
|
83
|
-
|
84
|
-
|
85
|
-
end
|
86
|
-
to[expression.text.ord]
|
60
|
+
content = to[*expression.text.chars]
|
61
|
+
acc << (expression.i? ? content.case_insensitive : content)
|
87
62
|
|
88
63
|
when Regexp::Expression::UnicodeProperty::Base,
|
89
64
|
Regexp::Expression::PosixClass
|
@@ -91,14 +66,22 @@ class CharacterSet
|
|
91
66
|
if expression.type == :posixclass && expression.ascii_classes?
|
92
67
|
content = content.ascii_part
|
93
68
|
end
|
94
|
-
expression.negative? ? content.inversion : content
|
69
|
+
acc << (expression.negative? ? content.inversion : content)
|
70
|
+
|
71
|
+
when Regexp::Expression::Anchor::Base,
|
72
|
+
Regexp::Expression::Backreference::Base,
|
73
|
+
Regexp::Expression::Keep::Mark,
|
74
|
+
Regexp::Expression::Quantifier
|
75
|
+
# ignore zero-length and repeat expressions
|
95
76
|
|
96
77
|
when Regexp::Expression::Base
|
97
78
|
raise Error, "Unsupported expression class `#{expression.class}`"
|
98
79
|
|
99
80
|
else
|
100
|
-
raise Error,
|
81
|
+
raise Error, 'Pass an expression (result of Regexp::Parser.parse)'
|
101
82
|
end
|
83
|
+
|
84
|
+
acc.reduce(:+) || to[]
|
102
85
|
end
|
103
86
|
end
|
104
87
|
end
|
@@ -122,10 +122,6 @@ class CharacterSet
|
|
122
122
|
raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
|
123
123
|
obj.encode('utf-8')
|
124
124
|
end
|
125
|
-
|
126
|
-
def make_new_str(original, &block)
|
127
|
-
utf8_str!(original).each_codepoint.with_object('', &block)
|
128
|
-
end
|
129
125
|
end
|
130
126
|
end
|
131
127
|
end
|
@@ -11,7 +11,7 @@ class CharacterSet
|
|
11
11
|
RUBY
|
12
12
|
end
|
13
13
|
|
14
|
-
%i[< <= > >= === disjoint? include? intersect? member?
|
14
|
+
%i[< <= <=> > >= === disjoint? include? intersect? member?
|
15
15
|
proper_subset? proper_superset? subset? superset?].each do |mthd|
|
16
16
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
17
17
|
def #{mthd}(enum, &block)
|
@@ -23,9 +23,8 @@ class CharacterSet
|
|
23
23
|
RUBY
|
24
24
|
end
|
25
25
|
|
26
|
-
%i[<< add add? clear
|
27
|
-
|
28
|
-
select! subtract].each do |mthd|
|
26
|
+
%i[<< add add? clear delete delete? delete_if each filter! keep_if
|
27
|
+
reject! select! subtract].each do |mthd|
|
29
28
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
30
29
|
def #{mthd}(*args, &block)
|
31
30
|
result = @__set.#{mthd}(*args, &block)
|
@@ -1,492 +1,385 @@
|
|
1
|
-
# set
|
2
|
-
#
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
class CharacterSet
|
11
|
-
module RubyFallback
|
12
|
-
if RUBY_PLATFORM[/java/i]
|
13
|
-
# Vendoring is not needed for JRuby which has sorted_set in the stdlib.
|
14
|
-
require 'set'
|
15
|
-
|
16
|
-
Set = ::Set
|
17
|
-
SortedSet = ::SortedSet
|
18
|
-
else
|
19
|
-
# set, vendored from https://github.com/ruby/set/blob/master/lib/set.rb,
|
20
|
-
# with comments removed and linted.
|
21
|
-
class Set
|
22
|
-
include Enumerable
|
23
|
-
|
24
|
-
def self.[](*ary)
|
25
|
-
new(ary)
|
26
|
-
end
|
27
|
-
|
28
|
-
def initialize(enum = nil, &block)
|
29
|
-
@hash = Hash.new(false)
|
30
|
-
|
31
|
-
enum.nil? and return
|
32
|
-
|
33
|
-
if block
|
34
|
-
do_with_enum(enum) { |o| add(block[o]) }
|
35
|
-
else
|
36
|
-
merge(enum)
|
37
|
-
end
|
38
|
-
end
|
39
|
-
|
40
|
-
def compare_by_identity
|
41
|
-
if @hash.respond_to?(:compare_by_identity)
|
42
|
-
@hash.compare_by_identity
|
43
|
-
self
|
44
|
-
else
|
45
|
-
raise NotImplementedError, "#{self.class.name}\##{__method__} is not implemented"
|
46
|
-
end
|
47
|
-
end
|
1
|
+
# set, vendored from https://github.com/ruby/set/blob/master/lib/set.rb,
|
2
|
+
# with comments removed and linted.
|
3
|
+
class CharacterSet::RubyFallback::Set
|
4
|
+
Set = self
|
5
|
+
include Enumerable
|
6
|
+
|
7
|
+
def self.[](*ary)
|
8
|
+
new(ary)
|
9
|
+
end
|
48
10
|
|
49
|
-
|
50
|
-
|
51
|
-
end
|
11
|
+
def initialize(enum = nil, &block)
|
12
|
+
@hash = Hash.new(false)
|
52
13
|
|
53
|
-
|
54
|
-
if enum.respond_to?(:each_entry)
|
55
|
-
enum.each_entry(&block) if block
|
56
|
-
elsif enum.respond_to?(:each)
|
57
|
-
enum.each(&block) if block
|
58
|
-
else
|
59
|
-
raise ArgumentError, "value must be enumerable"
|
60
|
-
end
|
61
|
-
end
|
62
|
-
private :do_with_enum
|
63
|
-
|
64
|
-
def initialize_dup(orig)
|
65
|
-
super
|
66
|
-
@hash = orig.instance_variable_get(:@hash).dup
|
67
|
-
end
|
14
|
+
enum.nil? and return
|
68
15
|
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
def initialize_clone(orig)
|
76
|
-
super
|
77
|
-
@hash = orig.instance_variable_get(:@hash).clone
|
78
|
-
end
|
79
|
-
end
|
80
|
-
|
81
|
-
def freeze
|
82
|
-
@hash.freeze
|
83
|
-
super
|
84
|
-
end
|
16
|
+
if block
|
17
|
+
do_with_enum(enum) { |o| add(block[o]) }
|
18
|
+
else
|
19
|
+
merge(enum)
|
20
|
+
end
|
21
|
+
end
|
85
22
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
23
|
+
def do_with_enum(enum, &block)
|
24
|
+
if enum.respond_to?(:each_entry)
|
25
|
+
enum.each_entry(&block) if block
|
26
|
+
elsif enum.respond_to?(:each)
|
27
|
+
enum.each(&block) if block
|
28
|
+
else
|
29
|
+
raise ArgumentError, "value must be enumerable"
|
30
|
+
end
|
31
|
+
end
|
32
|
+
private :do_with_enum
|
90
33
|
|
91
|
-
|
92
|
-
|
93
|
-
|
34
|
+
def initialize_dup(orig)
|
35
|
+
super
|
36
|
+
@hash = orig.instance_variable_get(:@hash).dup
|
37
|
+
end
|
94
38
|
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
39
|
+
if Kernel.instance_method(:initialize_clone).arity != 1
|
40
|
+
def initialize_clone(orig, **options)
|
41
|
+
super
|
42
|
+
@hash = orig.instance_variable_get(:@hash).clone(**options)
|
43
|
+
end
|
44
|
+
else
|
45
|
+
def initialize_clone(orig)
|
46
|
+
super
|
47
|
+
@hash = orig.instance_variable_get(:@hash).clone
|
48
|
+
end
|
49
|
+
end
|
99
50
|
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
else
|
105
|
-
do_with_enum(enum)
|
106
|
-
clear
|
107
|
-
merge(enum)
|
108
|
-
end
|
109
|
-
end
|
51
|
+
def freeze
|
52
|
+
@hash.freeze
|
53
|
+
super
|
54
|
+
end
|
110
55
|
|
111
|
-
|
112
|
-
|
113
|
-
|
56
|
+
def size
|
57
|
+
@hash.size
|
58
|
+
end
|
59
|
+
alias length size
|
114
60
|
|
115
|
-
|
116
|
-
|
117
|
-
|
118
|
-
end
|
61
|
+
def empty?
|
62
|
+
@hash.empty?
|
63
|
+
end
|
119
64
|
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
raise ArgumentError, "tried to flatten recursive Set"
|
125
|
-
end
|
126
|
-
|
127
|
-
seen.add(e_id)
|
128
|
-
flatten_merge(e, seen)
|
129
|
-
seen.delete(e_id)
|
130
|
-
else
|
131
|
-
add(e)
|
132
|
-
end
|
133
|
-
}
|
134
|
-
|
135
|
-
self
|
136
|
-
end
|
137
|
-
protected :flatten_merge
|
65
|
+
def clear
|
66
|
+
@hash.clear
|
67
|
+
self
|
68
|
+
end
|
138
69
|
|
139
|
-
|
140
|
-
|
141
|
-
|
70
|
+
def to_a
|
71
|
+
@hash.keys
|
72
|
+
end
|
142
73
|
|
143
|
-
|
144
|
-
|
145
|
-
|
74
|
+
def include?(o)
|
75
|
+
@hash[o]
|
76
|
+
end
|
77
|
+
alias member? include?
|
78
|
+
|
79
|
+
def superset?(set)
|
80
|
+
case
|
81
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>=)
|
82
|
+
@hash >= set.instance_variable_get(:@hash)
|
83
|
+
when set.is_a?(Set)
|
84
|
+
size >= set.size && set.all? { |o| include?(o) }
|
85
|
+
else
|
86
|
+
raise ArgumentError, "value must be a set"
|
87
|
+
end
|
88
|
+
end
|
89
|
+
alias >= superset?
|
90
|
+
|
91
|
+
def proper_superset?(set)
|
92
|
+
case
|
93
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>)
|
94
|
+
@hash > set.instance_variable_get(:@hash)
|
95
|
+
when set.is_a?(Set)
|
96
|
+
size > set.size && set.all? { |o| include?(o) }
|
97
|
+
else
|
98
|
+
raise ArgumentError, "value must be a set"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
alias > proper_superset?
|
102
|
+
|
103
|
+
def subset?(set)
|
104
|
+
case
|
105
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<=)
|
106
|
+
@hash <= set.instance_variable_get(:@hash)
|
107
|
+
when set.is_a?(Set)
|
108
|
+
size <= set.size && all? { |o| set.include?(o) }
|
109
|
+
else
|
110
|
+
raise ArgumentError, "value must be a set"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
alias <= subset?
|
114
|
+
|
115
|
+
def proper_subset?(set)
|
116
|
+
case
|
117
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<)
|
118
|
+
@hash < set.instance_variable_get(:@hash)
|
119
|
+
when set.is_a?(Set)
|
120
|
+
size < set.size && all? { |o| set.include?(o) }
|
121
|
+
else
|
122
|
+
raise ArgumentError, "value must be a set"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
alias < proper_subset?
|
146
126
|
|
147
|
-
|
148
|
-
|
149
|
-
end
|
150
|
-
alias member? include?
|
151
|
-
|
152
|
-
def superset?(set)
|
153
|
-
case
|
154
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:>=)
|
155
|
-
@hash >= set.instance_variable_get(:@hash)
|
156
|
-
when set.is_a?(Set)
|
157
|
-
size >= set.size && set.all? { |o| include?(o) }
|
158
|
-
else
|
159
|
-
raise ArgumentError, "value must be a set"
|
160
|
-
end
|
161
|
-
end
|
162
|
-
alias >= superset?
|
163
|
-
|
164
|
-
def proper_superset?(set)
|
165
|
-
case
|
166
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:>)
|
167
|
-
@hash > set.instance_variable_get(:@hash)
|
168
|
-
when set.is_a?(Set)
|
169
|
-
size > set.size && set.all? { |o| include?(o) }
|
170
|
-
else
|
171
|
-
raise ArgumentError, "value must be a set"
|
172
|
-
end
|
173
|
-
end
|
174
|
-
alias > proper_superset?
|
175
|
-
|
176
|
-
def subset?(set)
|
177
|
-
case
|
178
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:<=)
|
179
|
-
@hash <= set.instance_variable_get(:@hash)
|
180
|
-
when set.is_a?(Set)
|
181
|
-
size <= set.size && all? { |o| set.include?(o) }
|
182
|
-
else
|
183
|
-
raise ArgumentError, "value must be a set"
|
184
|
-
end
|
185
|
-
end
|
186
|
-
alias <= subset?
|
187
|
-
|
188
|
-
def proper_subset?(set)
|
189
|
-
case
|
190
|
-
when set.instance_of?(self.class) && @hash.respond_to?(:<)
|
191
|
-
@hash < set.instance_variable_get(:@hash)
|
192
|
-
when set.is_a?(Set)
|
193
|
-
size < set.size && all? { |o| set.include?(o) }
|
194
|
-
else
|
195
|
-
raise ArgumentError, "value must be a set"
|
196
|
-
end
|
197
|
-
end
|
198
|
-
alias < proper_subset?
|
127
|
+
def <=>(set)
|
128
|
+
return unless set.is_a?(Set)
|
199
129
|
|
200
|
-
|
201
|
-
|
130
|
+
case size <=> set.size
|
131
|
+
when -1 then -1 if proper_subset?(set)
|
132
|
+
when +1 then +1 if proper_superset?(set)
|
133
|
+
else 0 if self.==(set)
|
134
|
+
end
|
135
|
+
end
|
202
136
|
|
203
|
-
|
204
|
-
|
205
|
-
|
206
|
-
|
207
|
-
|
208
|
-
|
137
|
+
def intersect?(set)
|
138
|
+
case set
|
139
|
+
when Set
|
140
|
+
if size < set.size
|
141
|
+
any? { |o| set.include?(o) }
|
142
|
+
else
|
143
|
+
set.any? { |o| include?(o) }
|
144
|
+
end
|
145
|
+
when Enumerable
|
146
|
+
set.any? { |o| include?(o) }
|
147
|
+
else
|
148
|
+
raise ArgumentError, "value must be enumerable"
|
149
|
+
end
|
150
|
+
end
|
209
151
|
|
210
|
-
|
211
|
-
|
212
|
-
|
213
|
-
if size < set.size
|
214
|
-
any? { |o| set.include?(o) }
|
215
|
-
else
|
216
|
-
set.any? { |o| include?(o) }
|
217
|
-
end
|
218
|
-
when Enumerable
|
219
|
-
set.any? { |o| include?(o) }
|
220
|
-
else
|
221
|
-
raise ArgumentError, "value must be enumerable"
|
222
|
-
end
|
223
|
-
end
|
152
|
+
def disjoint?(set)
|
153
|
+
!intersect?(set)
|
154
|
+
end
|
224
155
|
|
225
|
-
|
226
|
-
|
227
|
-
|
156
|
+
def each(&block)
|
157
|
+
block_given? or return enum_for(__method__) { size }
|
158
|
+
@hash.each_key(&block)
|
159
|
+
self
|
160
|
+
end
|
228
161
|
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
162
|
+
def add(o)
|
163
|
+
@hash[o] = true
|
164
|
+
self
|
165
|
+
end
|
166
|
+
alias << add
|
234
167
|
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
end
|
239
|
-
alias << add
|
168
|
+
def add?(o)
|
169
|
+
add(o) unless include?(o)
|
170
|
+
end
|
240
171
|
|
241
|
-
|
242
|
-
|
243
|
-
|
172
|
+
def delete(o)
|
173
|
+
@hash.delete(o)
|
174
|
+
self
|
175
|
+
end
|
244
176
|
|
245
|
-
|
246
|
-
|
247
|
-
|
248
|
-
end
|
177
|
+
def delete?(o)
|
178
|
+
delete(o) if include?(o)
|
179
|
+
end
|
249
180
|
|
250
|
-
|
251
|
-
|
252
|
-
|
181
|
+
def delete_if
|
182
|
+
block_given? or return enum_for(__method__) { size }
|
183
|
+
select { |o| yield o }.each { |o| @hash.delete(o) }
|
184
|
+
self
|
185
|
+
end
|
253
186
|
|
254
|
-
|
255
|
-
|
256
|
-
|
257
|
-
|
258
|
-
|
187
|
+
def keep_if
|
188
|
+
block_given? or return enum_for(__method__) { size }
|
189
|
+
reject { |o| yield o }.each { |o| @hash.delete(o) }
|
190
|
+
self
|
191
|
+
end
|
259
192
|
|
260
|
-
|
261
|
-
|
262
|
-
|
263
|
-
|
264
|
-
|
193
|
+
def reject!(&block)
|
194
|
+
block_given? or return enum_for(__method__) { size }
|
195
|
+
n = size
|
196
|
+
delete_if(&block)
|
197
|
+
self if size != n
|
198
|
+
end
|
265
199
|
|
266
|
-
|
267
|
-
|
268
|
-
|
269
|
-
|
270
|
-
|
271
|
-
|
272
|
-
alias map! collect!
|
200
|
+
def select!(&block)
|
201
|
+
block_given? or return enum_for(__method__) { size }
|
202
|
+
n = size
|
203
|
+
keep_if(&block)
|
204
|
+
self if size != n
|
205
|
+
end
|
273
206
|
|
274
|
-
|
275
|
-
block_given? or return enum_for(__method__) { size }
|
276
|
-
n = size
|
277
|
-
delete_if(&block)
|
278
|
-
self if size != n
|
279
|
-
end
|
207
|
+
alias filter! select!
|
280
208
|
|
281
|
-
|
282
|
-
|
283
|
-
|
284
|
-
|
285
|
-
|
286
|
-
|
209
|
+
def merge(*enums, **_rest)
|
210
|
+
enums.each do |enum|
|
211
|
+
if enum.instance_of?(self.class)
|
212
|
+
@hash.update(enum.instance_variable_get(:@hash))
|
213
|
+
else
|
214
|
+
do_with_enum(enum) { |o| add(o) }
|
215
|
+
end
|
216
|
+
end
|
287
217
|
|
288
|
-
|
218
|
+
self
|
219
|
+
end
|
289
220
|
|
290
|
-
|
291
|
-
|
292
|
-
|
293
|
-
|
294
|
-
else
|
295
|
-
do_with_enum(enum) { |o| add(o) }
|
296
|
-
end
|
297
|
-
end
|
221
|
+
def subtract(enum)
|
222
|
+
do_with_enum(enum) { |o| delete(o) }
|
223
|
+
self
|
224
|
+
end
|
298
225
|
|
299
|
-
|
300
|
-
|
226
|
+
def |(enum)
|
227
|
+
dup.merge(enum)
|
228
|
+
end
|
229
|
+
alias + |
|
230
|
+
alias union |
|
301
231
|
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
232
|
+
def -(enum)
|
233
|
+
dup.subtract(enum)
|
234
|
+
end
|
235
|
+
alias difference -
|
236
|
+
|
237
|
+
def &(enum)
|
238
|
+
n = self.class.new
|
239
|
+
if enum.is_a?(Set)
|
240
|
+
if enum.size > size
|
241
|
+
each { |o| n.add(o) if enum.include?(o) }
|
242
|
+
else
|
243
|
+
enum.each { |o| n.add(o) if include?(o) }
|
244
|
+
end
|
245
|
+
else
|
246
|
+
do_with_enum(enum) { |o| n.add(o) if include?(o) }
|
247
|
+
end
|
248
|
+
n
|
249
|
+
end
|
250
|
+
alias intersection &
|
306
251
|
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
252
|
+
def ^(enum)
|
253
|
+
n = Set.new(enum)
|
254
|
+
each { |o| n.add(o) unless n.delete?(o) }
|
255
|
+
n
|
256
|
+
end
|
312
257
|
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
|
317
|
-
|
318
|
-
|
319
|
-
|
320
|
-
|
321
|
-
|
322
|
-
|
323
|
-
|
324
|
-
enum.each { |o| n.add(o) if include?(o) }
|
325
|
-
end
|
326
|
-
else
|
327
|
-
do_with_enum(enum) { |o| n.add(o) if include?(o) }
|
328
|
-
end
|
329
|
-
n
|
330
|
-
end
|
331
|
-
alias intersection &
|
258
|
+
def ==(other)
|
259
|
+
if self.equal?(other)
|
260
|
+
true
|
261
|
+
elsif other.instance_of?(self.class)
|
262
|
+
@hash == other.instance_variable_get(:@hash)
|
263
|
+
elsif other.is_a?(Set) && self.size == other.size
|
264
|
+
other.all? { |o| @hash.include?(o) }
|
265
|
+
else
|
266
|
+
false
|
267
|
+
end
|
268
|
+
end
|
332
269
|
|
333
|
-
|
334
|
-
|
335
|
-
|
336
|
-
n
|
337
|
-
end
|
270
|
+
def hash
|
271
|
+
@hash.hash
|
272
|
+
end
|
338
273
|
|
339
|
-
|
340
|
-
|
341
|
-
|
342
|
-
|
343
|
-
@hash == other.instance_variable_get(:@hash)
|
344
|
-
elsif other.is_a?(Set) && self.size == other.size
|
345
|
-
other.all? { |o| @hash.include?(o) }
|
346
|
-
else
|
347
|
-
false
|
348
|
-
end
|
349
|
-
end
|
274
|
+
def eql?(o)
|
275
|
+
return false unless o.is_a?(Set)
|
276
|
+
@hash.eql?(o.instance_variable_get(:@hash))
|
277
|
+
end
|
350
278
|
|
351
|
-
|
352
|
-
@hash.hash
|
353
|
-
end
|
279
|
+
alias === include?
|
354
280
|
|
355
|
-
|
356
|
-
|
357
|
-
@hash.eql?(o.instance_variable_get(:@hash))
|
358
|
-
end
|
281
|
+
def classify
|
282
|
+
block_given? or return enum_for(__method__) { size }
|
359
283
|
|
360
|
-
|
361
|
-
if @hash.respond_to?(:rehash)
|
362
|
-
@hash.rehash
|
363
|
-
else
|
364
|
-
raise FrozenError, "can't modify frozen #{self.class.name}" if frozen?
|
365
|
-
end
|
366
|
-
self
|
367
|
-
end
|
368
|
-
alias === include?
|
284
|
+
h = {}
|
369
285
|
|
370
|
-
|
371
|
-
|
286
|
+
each { |i|
|
287
|
+
(h[yield(i)] ||= self.class.new).add(i)
|
288
|
+
}
|
372
289
|
|
373
|
-
|
290
|
+
h
|
291
|
+
end
|
374
292
|
|
375
|
-
|
376
|
-
|
377
|
-
}
|
293
|
+
def divide(&func)
|
294
|
+
func or return enum_for(__method__) { size }
|
378
295
|
|
379
|
-
|
380
|
-
|
296
|
+
if func.arity == 2
|
297
|
+
require 'tsort'
|
381
298
|
|
382
|
-
|
383
|
-
|
384
|
-
|
385
|
-
if func.arity == 2
|
386
|
-
require 'tsort'
|
387
|
-
|
388
|
-
class << dig = {}
|
389
|
-
include TSort
|
390
|
-
|
391
|
-
alias tsort_each_node each_key
|
392
|
-
def tsort_each_child(node, &block)
|
393
|
-
fetch(node).each(&block)
|
394
|
-
end
|
395
|
-
end
|
396
|
-
|
397
|
-
each { |u|
|
398
|
-
dig[u] = a = []
|
399
|
-
each{ |v| func.call(u, v) and a << v }
|
400
|
-
}
|
401
|
-
|
402
|
-
set = Set.new()
|
403
|
-
dig.each_strongly_connected_component { |css|
|
404
|
-
set.add(self.class.new(css))
|
405
|
-
}
|
406
|
-
set
|
407
|
-
else
|
408
|
-
Set.new(classify(&func).values)
|
409
|
-
end
|
410
|
-
end
|
299
|
+
class << dig = {}
|
300
|
+
include TSort
|
411
301
|
|
412
|
-
|
413
|
-
|
302
|
+
alias tsort_each_node each_key
|
303
|
+
def tsort_each_child(node, &block)
|
304
|
+
fetch(node).each(&block)
|
414
305
|
end
|
415
306
|
end
|
416
307
|
|
417
|
-
|
418
|
-
|
419
|
-
|
420
|
-
|
421
|
-
@keys = nil
|
422
|
-
super
|
423
|
-
end
|
308
|
+
each { |u|
|
309
|
+
dig[u] = a = []
|
310
|
+
each{ |v| func.call(u, v) and a << v }
|
311
|
+
}
|
424
312
|
|
425
|
-
|
426
|
-
|
427
|
-
|
428
|
-
|
313
|
+
set = Set.new()
|
314
|
+
dig.each_strongly_connected_component { |css|
|
315
|
+
set.add(self.class.new(css))
|
316
|
+
}
|
317
|
+
set
|
318
|
+
else
|
319
|
+
Set.new(classify(&func).values)
|
320
|
+
end
|
321
|
+
end
|
322
|
+
end
|
429
323
|
|
430
|
-
|
431
|
-
|
432
|
-
|
433
|
-
|
324
|
+
# sorted_set without rbtree dependency, vendored from
|
325
|
+
# https://github.com/ruby/set/blob/72f08c4/lib/set.rb#L731-L800
|
326
|
+
class CharacterSet::RubyFallback::SortedSet < CharacterSet::RubyFallback::Set
|
327
|
+
def initialize(*args)
|
328
|
+
@keys = nil
|
329
|
+
super
|
330
|
+
end
|
434
331
|
|
435
|
-
|
436
|
-
|
437
|
-
|
438
|
-
|
439
|
-
end
|
440
|
-
alias << add
|
332
|
+
def clear
|
333
|
+
@keys = nil
|
334
|
+
super
|
335
|
+
end
|
441
336
|
|
442
|
-
|
443
|
-
|
444
|
-
|
445
|
-
|
446
|
-
|
337
|
+
def add(o)
|
338
|
+
@keys = nil
|
339
|
+
super
|
340
|
+
end
|
341
|
+
alias << add
|
447
342
|
|
448
|
-
|
449
|
-
|
450
|
-
|
451
|
-
|
452
|
-
|
453
|
-
self
|
454
|
-
end
|
343
|
+
def delete(o)
|
344
|
+
@keys = nil
|
345
|
+
@hash.delete(o)
|
346
|
+
self
|
347
|
+
end
|
455
348
|
|
456
|
-
|
457
|
-
|
458
|
-
|
459
|
-
|
460
|
-
|
461
|
-
|
462
|
-
|
349
|
+
def delete_if
|
350
|
+
block_given? or return enum_for(__method__) { size }
|
351
|
+
n = @hash.size
|
352
|
+
super
|
353
|
+
@keys = nil if @hash.size != n
|
354
|
+
self
|
355
|
+
end
|
463
356
|
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
357
|
+
def keep_if
|
358
|
+
block_given? or return enum_for(__method__) { size }
|
359
|
+
n = @hash.size
|
360
|
+
super
|
361
|
+
@keys = nil if @hash.size != n
|
362
|
+
self
|
363
|
+
end
|
468
364
|
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
end
|
365
|
+
def merge(enum)
|
366
|
+
@keys = nil
|
367
|
+
super
|
368
|
+
end
|
474
369
|
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
370
|
+
def each(&block)
|
371
|
+
block or return enum_for(__method__) { size }
|
372
|
+
to_a.each(&block)
|
373
|
+
self
|
374
|
+
end
|
479
375
|
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
376
|
+
def to_a
|
377
|
+
(@keys = @hash.keys).sort! unless @keys
|
378
|
+
@keys.dup
|
379
|
+
end
|
484
380
|
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
end
|
489
|
-
end
|
490
|
-
end
|
381
|
+
def freeze
|
382
|
+
to_a
|
383
|
+
super
|
491
384
|
end
|
492
385
|
end
|
@@ -1,6 +1,5 @@
|
|
1
1
|
require 'character_set/ruby_fallback/set_methods'
|
2
2
|
require 'character_set/ruby_fallback/character_set_methods'
|
3
|
-
require 'character_set/ruby_fallback/vendored_set_classes'
|
4
3
|
|
5
4
|
class CharacterSet
|
6
5
|
module RubyFallback
|
@@ -17,3 +16,20 @@ class CharacterSet
|
|
17
16
|
end
|
18
17
|
end
|
19
18
|
end
|
19
|
+
|
20
|
+
if RUBY_PLATFORM[/java/i]
|
21
|
+
# JRuby has sorted_set in the stdlib.
|
22
|
+
require 'set'
|
23
|
+
CharacterSet::RubyFallback::Set = ::Set
|
24
|
+
CharacterSet::RubyFallback::SortedSet = ::SortedSet
|
25
|
+
else
|
26
|
+
# For other rubies, set/sorted_set are vendored due to dependency issues:
|
27
|
+
#
|
28
|
+
# - issues with default vs. installed gems such as [#2]
|
29
|
+
# - issues with the sorted_set dependency rb_tree
|
30
|
+
# - long-standing issues in recent versions of sorted_set
|
31
|
+
#
|
32
|
+
# The RubyFallback, and thus these set classes, are only used for testing,
|
33
|
+
# and for exotic rubies which use neither C nor Java.
|
34
|
+
require 'character_set/ruby_fallback/vendored_set_classes'
|
35
|
+
end
|
@@ -22,7 +22,7 @@ class CharacterSet
|
|
22
22
|
|
23
23
|
# Allow some methods to take an Enum just as well as another CharacterSet.
|
24
24
|
# Tested by ruby-spec.
|
25
|
-
%w[& + - ^ | difference disjoint? intersect? intersection
|
25
|
+
%w[& + - ^ | <=> difference disjoint? intersect? intersection
|
26
26
|
subtract union].each do |method|
|
27
27
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
28
28
|
def #{method}(arg)
|
@@ -165,9 +165,13 @@ class CharacterSet
|
|
165
165
|
end
|
166
166
|
|
167
167
|
def divide(&func)
|
168
|
-
require 'character_set/ruby_fallback
|
168
|
+
require 'character_set/ruby_fallback'
|
169
169
|
CharacterSet::RubyFallback::Set.new(to_a).divide(&func)
|
170
170
|
end
|
171
|
+
|
172
|
+
def join(separator = '')
|
173
|
+
to_a(true).join(separator)
|
174
|
+
end
|
171
175
|
RUBY
|
172
176
|
|
173
177
|
# CharacterSet-specific section methods
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: character_set
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.8.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2024-01-07 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description:
|
14
14
|
email:
|
@@ -106,7 +106,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
106
106
|
- !ruby/object:Gem::Version
|
107
107
|
version: '0'
|
108
108
|
requirements: []
|
109
|
-
rubygems_version: 3.
|
109
|
+
rubygems_version: 3.5.0.dev
|
110
110
|
signing_key:
|
111
111
|
specification_version: 4
|
112
112
|
summary: Build, read, write and compare sets of Unicode codepoints.
|