character_set 1.2.0-java → 1.5.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +3 -0
- data/.github/workflows/gouteur.yml +20 -0
- data/.github/workflows/lint.yml +29 -0
- data/.github/workflows/tests.yml +22 -0
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +17 -0
- data/BENCHMARK.md +53 -17
- data/CHANGELOG.md +54 -0
- data/README.md +51 -12
- data/Rakefile +20 -18
- data/benchmarks/count_in.rb +13 -0
- data/benchmarks/delete_in.rb +1 -1
- data/benchmarks/scan.rb +13 -0
- data/benchmarks/shared.rb +5 -0
- data/benchmarks/z_add.rb +12 -0
- data/benchmarks/z_delete.rb +12 -0
- data/benchmarks/z_merge.rb +15 -0
- data/benchmarks/z_minmax.rb +12 -0
- data/bin/console +2 -0
- data/character_set.gemspec +17 -4
- data/ext/character_set/character_set.c +969 -415
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
- data/lib/character_set/character.rb +1 -1
- data/lib/character_set/core_ext/regexp_ext.rb +1 -1
- data/lib/character_set/core_ext/string_ext.rb +3 -1
- data/lib/character_set/expression_converter.rb +41 -43
- data/lib/character_set/parser.rb +1 -1
- data/lib/character_set/predefined_sets/any.cps +1 -0
- data/lib/character_set/predefined_sets/ascii.cps +1 -0
- data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
- data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
- data/lib/character_set/predefined_sets/assigned.cps +677 -0
- data/lib/character_set/predefined_sets/bmp.cps +2 -0
- data/lib/character_set/predefined_sets/crypt.cps +2 -0
- data/lib/character_set/predefined_sets/emoji.cps +152 -0
- data/lib/character_set/predefined_sets/newline.cps +3 -0
- data/lib/character_set/predefined_sets/surrogate.cps +1 -0
- data/lib/character_set/predefined_sets/unicode.cps +2 -0
- data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
- data/lib/character_set/predefined_sets/url_host.cps +10 -0
- data/lib/character_set/predefined_sets/url_path.cps +7 -0
- data/lib/character_set/predefined_sets/url_query.cps +8 -0
- data/lib/character_set/predefined_sets/whitespace.cps +10 -0
- data/lib/character_set/predefined_sets.rb +25 -260
- data/lib/character_set/ruby_fallback/character_set_methods.rb +60 -9
- data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
- data/lib/character_set/ruby_fallback.rb +5 -3
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +69 -50
- data/lib/character_set/version.rb +1 -1
- data/lib/character_set/writer.rb +98 -27
- metadata +114 -17
- data/.travis.yml +0 -8
- data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -1,12 +1,14 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
|
2
|
+
require 'sorted_set'
|
3
|
+
else
|
4
|
+
require 'set'
|
5
|
+
end
|
2
6
|
require 'character_set/ruby_fallback/set_methods'
|
3
|
-
require 'character_set/ruby_fallback/plane_methods'
|
4
7
|
require 'character_set/ruby_fallback/character_set_methods'
|
5
8
|
|
6
9
|
class CharacterSet
|
7
10
|
module RubyFallback
|
8
11
|
include CharacterSet::RubyFallback::SetMethods
|
9
|
-
include CharacterSet::RubyFallback::PlaneMethods
|
10
12
|
include CharacterSet::RubyFallback::CharacterSetMethods
|
11
13
|
|
12
14
|
def self.prepended(klass)
|
@@ -22,13 +22,14 @@ class CharacterSet
|
|
22
22
|
|
23
23
|
# Allow some methods to take an Enum just as well as another CharacterSet.
|
24
24
|
# Tested by ruby-spec.
|
25
|
-
%w[& + - ^ | difference
|
25
|
+
%w[& + - ^ | difference disjoint? intersect? intersection
|
26
|
+
subtract union].each do |method|
|
26
27
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
27
28
|
def #{method}(arg)
|
28
29
|
if arg.is_a?(CharacterSet)
|
29
|
-
super
|
30
|
+
super(arg)
|
30
31
|
elsif arg.respond_to?(:each)
|
31
|
-
super(
|
32
|
+
super(self.class.new(arg.to_a))
|
32
33
|
else
|
33
34
|
raise ArgumentError, 'pass an enumerable'
|
34
35
|
end
|
@@ -22,33 +22,29 @@ class CharacterSet
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def of_property(property_name)
|
25
|
-
require_optional_dependency('regexp_property_values')
|
25
|
+
require_optional_dependency('regexp_property_values', __method__)
|
26
26
|
|
27
27
|
property = RegexpPropertyValues[property_name.to_s]
|
28
28
|
from_ranges(*property.matched_ranges)
|
29
29
|
end
|
30
30
|
|
31
31
|
def of_regexp(regexp)
|
32
|
-
require_optional_dependency('regexp_parser')
|
32
|
+
require_optional_dependency('regexp_parser', __method__)
|
33
33
|
|
34
34
|
root = ::Regexp::Parser.parse(regexp)
|
35
35
|
of_expression(root)
|
36
36
|
end
|
37
37
|
|
38
38
|
def of_expression(expression)
|
39
|
-
ExpressionConverter.convert(expression)
|
39
|
+
ExpressionConverter.convert(expression, self)
|
40
40
|
end
|
41
41
|
|
42
|
-
def require_optional_dependency(name)
|
42
|
+
def require_optional_dependency(name, method)
|
43
43
|
required_optional_dependencies[name] ||= begin
|
44
44
|
require name
|
45
45
|
true
|
46
46
|
rescue ::LoadError
|
47
|
-
|
48
|
-
loc.absolute_path.to_s.include?('/lib/character_set')
|
49
|
-
end
|
50
|
-
method = entry_point && entry_point.label
|
51
|
-
raise LoadError, 'You must the install the optional dependency '\
|
47
|
+
raise LoadError, 'You must install the optional dependency '\
|
52
48
|
"'\#{name}' to use the method `\#{method}'."
|
53
49
|
end
|
54
50
|
end
|
@@ -70,37 +66,64 @@ class CharacterSet
|
|
70
66
|
merge(enum)
|
71
67
|
end
|
72
68
|
|
73
|
-
#
|
69
|
+
# CharacterSet-specific conversion methods
|
70
|
+
|
71
|
+
def assigned_part
|
72
|
+
self & self.class.assigned
|
73
|
+
end
|
74
|
+
|
75
|
+
def valid_part
|
76
|
+
self - self.class.surrogate
|
77
|
+
end
|
78
|
+
|
79
|
+
# CharacterSet-specific stringification methods
|
74
80
|
|
75
81
|
def to_s(opts = {}, &block)
|
76
82
|
Writer.write(ranges, opts, &block)
|
77
83
|
end
|
78
84
|
|
85
|
+
def to_s_with_surrogate_ranges
|
86
|
+
Writer.write_surrogate_ranges(bmp_part.ranges, astral_part.ranges)
|
87
|
+
end
|
88
|
+
|
79
89
|
def to_s_with_surrogate_alternation
|
80
90
|
Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
|
81
91
|
end
|
82
92
|
|
93
|
+
def secure_token(length = 32)
|
94
|
+
CharacterSet.require_optional_dependency('securerandom', __method__)
|
95
|
+
cps = to_a
|
96
|
+
len = cps.count
|
97
|
+
1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
|
98
|
+
end
|
99
|
+
alias random_token secure_token
|
100
|
+
|
83
101
|
def inspect
|
84
102
|
len = length
|
85
|
-
"
|
103
|
+
"#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
|
86
104
|
end
|
87
105
|
|
88
|
-
#
|
106
|
+
# C-extension adapter methods. Need overriding in pure fallback.
|
107
|
+
# Parsing kwargs in C is slower, verbose, and kinda deprecated.
|
89
108
|
|
90
|
-
def
|
91
|
-
|
109
|
+
def inversion(include_surrogates: false, upto: 0x10FFFF)
|
110
|
+
ext_inversion(include_surrogates, upto)
|
92
111
|
end
|
93
112
|
|
94
|
-
def
|
95
|
-
|
113
|
+
def section(from:, upto: 0x10FFFF)
|
114
|
+
ext_section(from, upto)
|
96
115
|
end
|
97
116
|
|
98
|
-
def
|
99
|
-
|
117
|
+
def count_in_section(from:, upto: 0x10FFFF)
|
118
|
+
ext_count_in_section(from, upto)
|
100
119
|
end
|
101
120
|
|
102
|
-
def
|
103
|
-
|
121
|
+
def section?(from:, upto: 0x10FFFF)
|
122
|
+
ext_section?(from, upto)
|
123
|
+
end
|
124
|
+
|
125
|
+
def section_ratio(from:, upto: 0x10FFFF)
|
126
|
+
ext_section_ratio(from, upto)
|
104
127
|
end
|
105
128
|
|
106
129
|
#
|
@@ -136,42 +159,38 @@ class CharacterSet
|
|
136
159
|
end
|
137
160
|
|
138
161
|
def divide(&func)
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
require 'tsort'
|
162
|
+
CharacterSet.require_optional_dependency('set', __method__)
|
163
|
+
Set.new(to_a).divide(&func)
|
164
|
+
end
|
165
|
+
RUBY
|
144
166
|
|
145
|
-
|
146
|
-
include TSort
|
167
|
+
# CharacterSet-specific section methods
|
147
168
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
169
|
+
{
|
170
|
+
ascii: 0..0x7F,
|
171
|
+
bmp: 0..0xFFFF,
|
172
|
+
astral: 0x10000..0x10FFFF,
|
173
|
+
}.each do |section_name, range|
|
174
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
175
|
+
def #{section_name}_part
|
176
|
+
section(from: #{range.begin}, upto: #{range.end})
|
177
|
+
end
|
153
178
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
end
|
179
|
+
def #{section_name}_part?
|
180
|
+
section?(from: #{range.begin}, upto: #{range.end})
|
181
|
+
end
|
158
182
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
set
|
164
|
-
else
|
165
|
-
Set.new(classify(&func).values)
|
183
|
+
def #{section_name}_only?
|
184
|
+
#{range.begin == 0 ?
|
185
|
+
"!section?(from: #{range.end}, upto: 0x10FFFF)" :
|
186
|
+
"!section?(from: 0, upto: #{range.begin})"}
|
166
187
|
end
|
167
|
-
end
|
168
188
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
RUBY
|
189
|
+
def #{section_name}_ratio
|
190
|
+
section_ratio(from: #{range.begin}, upto: #{range.end})
|
191
|
+
end
|
192
|
+
RUBY
|
193
|
+
end
|
175
194
|
end # self.included
|
176
195
|
end # SharedMethods
|
177
196
|
end
|
data/lib/character_set/writer.rb
CHANGED
@@ -1,37 +1,108 @@
|
|
1
1
|
class CharacterSet
|
2
2
|
module Writer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
3
|
+
class << self
|
4
|
+
def write(codepoint_ranges, opts = {}, &block)
|
5
|
+
content = codepoint_ranges.map do |range|
|
6
|
+
if range.size > 2 && opts[:abbreviate] != false
|
7
|
+
bounds = [range.min, range.max]
|
8
|
+
bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
|
9
|
+
else
|
10
|
+
range.map { |cp| write_codepoint(cp, opts, &block) }.join
|
11
|
+
end
|
12
|
+
end.join
|
13
|
+
opts[:in_brackets] ? "[#{content}]" : content
|
14
|
+
end
|
15
|
+
|
16
|
+
def write_codepoint(codepoint, opts = {}, &block)
|
17
|
+
Character.new(codepoint).escape(opts, &block)
|
18
|
+
end
|
19
|
+
|
20
|
+
def write_surrogate_ranges(bmp_ranges, astral_ranges)
|
21
|
+
astral_branches = surrogate_range_expressions(astral_ranges)
|
22
|
+
bmp_set_with_alternatives(bmp_ranges, astral_branches)
|
23
|
+
end
|
24
|
+
|
25
|
+
def write_surrogate_alternation(bmp_ranges, astral_ranges)
|
26
|
+
astral_branches = surrogate_pairs(astral_ranges)
|
27
|
+
bmp_set_with_alternatives(bmp_ranges, astral_branches)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def surrogate_range_expressions(astral_ranges)
|
33
|
+
compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
|
34
|
+
[hi_ranges, lo_ranges].map do |ranges|
|
35
|
+
use_brackets = ranges.size > 1 || ranges.first.size > 1
|
36
|
+
write(ranges, format: :js, in_brackets: use_brackets)
|
37
|
+
end.join
|
11
38
|
end
|
12
|
-
end
|
13
|
-
|
14
|
-
|
39
|
+
end
|
40
|
+
|
41
|
+
def compressed_surrogate_range_pairs(astral_ranges)
|
42
|
+
halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }
|
15
43
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
44
|
+
# compress high surrogate codepoint ranges with common low range half
|
45
|
+
with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
|
46
|
+
hi_ranges = pairs.map(&:first)
|
47
|
+
compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
|
48
|
+
prev = arr.last
|
49
|
+
if prev.nil? || prev.max + 1 < range.min # first or gap
|
50
|
+
arr << range
|
51
|
+
else # continuous codepoints, expand previous range
|
52
|
+
arr[-1] = (prev.min)..(range.max)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
[compressed_hi_ranges, lo_range]
|
56
|
+
end
|
57
|
+
|
58
|
+
# compress low surrogate codepoint ranges with common high ranges
|
59
|
+
with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
|
60
|
+
(hash[hi_ranges] ||= []) << lo_range
|
61
|
+
end
|
23
62
|
end
|
24
|
-
end
|
25
63
|
|
26
|
-
|
27
|
-
|
28
|
-
|
64
|
+
def surrogate_half_ranges(astral_range)
|
65
|
+
hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
|
66
|
+
hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
|
67
|
+
hi_count = 1 + hi_max - hi_min
|
68
|
+
return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1
|
69
|
+
|
70
|
+
ranges = []
|
71
|
+
|
72
|
+
# first high surrogate might be partially covered (if lo_min > 0xDC00)
|
73
|
+
ranges << [hi_min..hi_min, lo_min..0xDFFF]
|
74
|
+
|
75
|
+
# any high surrogates in between are fully covered
|
76
|
+
ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2
|
29
77
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
78
|
+
# last high surrogate might be partially covered (if lo_max < 0xDFFF)
|
79
|
+
ranges << [hi_max..hi_max, 0xDC00..lo_max]
|
80
|
+
|
81
|
+
ranges
|
82
|
+
end
|
83
|
+
|
84
|
+
def surrogate_pair_codepoints(astral_codepoint)
|
85
|
+
base = astral_codepoint - 0x10000
|
86
|
+
high = base / 1024 + 0xD800
|
87
|
+
low = base % 1024 + 0xDC00
|
88
|
+
[high, low]
|
89
|
+
end
|
90
|
+
|
91
|
+
def bmp_set_with_alternatives(bmp_ranges, alternatives)
|
92
|
+
bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
|
93
|
+
return bmp_set if alternatives.empty? && bmp_ranges.any?
|
94
|
+
|
95
|
+
"(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
|
96
|
+
end
|
97
|
+
|
98
|
+
def surrogate_pairs(astral_ranges)
|
99
|
+
astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
|
100
|
+
end
|
101
|
+
|
102
|
+
def surrogate_pair(astral_codepoint)
|
103
|
+
surrogate_pair_codepoints(astral_codepoint)
|
104
|
+
.map { |half| write_codepoint(half, format: :js) }.join
|
105
|
+
end
|
35
106
|
end
|
36
107
|
end
|
37
108
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: character_set
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: java
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sorted_set
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: benchmark-ips
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -24,34 +38,48 @@ dependencies:
|
|
24
38
|
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
40
|
version: '2.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: get_process_mem
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.2.3
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.2.3
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: rake
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
30
58
|
requirements:
|
31
59
|
- - "~>"
|
32
60
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
61
|
+
version: '13.0'
|
34
62
|
type: :development
|
35
63
|
prerelease: false
|
36
64
|
version_requirements: !ruby/object:Gem::Requirement
|
37
65
|
requirements:
|
38
66
|
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
68
|
+
version: '13.0'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: rake-compiler
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
44
72
|
requirements:
|
45
73
|
- - "~>"
|
46
74
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
75
|
+
version: '1.1'
|
48
76
|
type: :development
|
49
77
|
prerelease: false
|
50
78
|
version_requirements: !ruby/object:Gem::Requirement
|
51
79
|
requirements:
|
52
80
|
- - "~>"
|
53
81
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1.
|
82
|
+
version: '1.1'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: range_compressor
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,28 +100,28 @@ dependencies:
|
|
72
100
|
requirements:
|
73
101
|
- - "~>"
|
74
102
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1
|
103
|
+
version: '2.1'
|
76
104
|
type: :development
|
77
105
|
prerelease: false
|
78
106
|
version_requirements: !ruby/object:Gem::Requirement
|
79
107
|
requirements:
|
80
108
|
- - "~>"
|
81
109
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1
|
110
|
+
version: '2.1'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
112
|
name: regexp_property_values
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
86
114
|
requirements:
|
87
115
|
- - "~>"
|
88
116
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0
|
117
|
+
version: '1.0'
|
90
118
|
type: :development
|
91
119
|
prerelease: false
|
92
120
|
version_requirements: !ruby/object:Gem::Requirement
|
93
121
|
requirements:
|
94
122
|
- - "~>"
|
95
123
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0
|
124
|
+
version: '1.0'
|
97
125
|
- !ruby/object:Gem::Dependency
|
98
126
|
name: rspec
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,6 +136,48 @@ dependencies:
|
|
108
136
|
- - "~>"
|
109
137
|
- !ruby/object:Gem::Version
|
110
138
|
version: '3.8'
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: codecov
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 0.2.12
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 0.2.12
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: gouteur
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 1.0.0
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 1.0.0
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: rubocop
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '1.8'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '1.8'
|
111
181
|
- !ruby/object:Gem::Dependency
|
112
182
|
name: range_compressor
|
113
183
|
requirement: !ruby/object:Gem::Requirement
|
@@ -122,33 +192,45 @@ dependencies:
|
|
122
192
|
- - "~>"
|
123
193
|
- !ruby/object:Gem::Version
|
124
194
|
version: '1.0'
|
125
|
-
description:
|
195
|
+
description:
|
126
196
|
email:
|
127
197
|
- janosch84@gmail.com
|
128
198
|
executables: []
|
129
199
|
extensions: []
|
130
200
|
extra_rdoc_files: []
|
131
201
|
files:
|
202
|
+
- ".gitattributes"
|
203
|
+
- ".github/workflows/gouteur.yml"
|
204
|
+
- ".github/workflows/lint.yml"
|
205
|
+
- ".github/workflows/tests.yml"
|
132
206
|
- ".gitignore"
|
207
|
+
- ".gouteur.yml"
|
133
208
|
- ".rspec"
|
134
|
-
- ".
|
209
|
+
- ".rubocop.yml"
|
135
210
|
- BENCHMARK.md
|
136
211
|
- CHANGELOG.md
|
137
212
|
- Gemfile
|
138
213
|
- LICENSE.txt
|
139
214
|
- README.md
|
140
215
|
- Rakefile
|
216
|
+
- benchmarks/count_in.rb
|
141
217
|
- benchmarks/cover.rb
|
142
218
|
- benchmarks/delete_in.rb
|
143
219
|
- benchmarks/keep_in.rb
|
220
|
+
- benchmarks/scan.rb
|
144
221
|
- benchmarks/shared.rb
|
145
222
|
- benchmarks/used_by.rb
|
223
|
+
- benchmarks/z_add.rb
|
224
|
+
- benchmarks/z_delete.rb
|
225
|
+
- benchmarks/z_merge.rb
|
226
|
+
- benchmarks/z_minmax.rb
|
146
227
|
- bin/console
|
147
228
|
- bin/setup
|
148
229
|
- character_set.gemspec
|
149
230
|
- ext/character_set/character_set.c
|
150
231
|
- ext/character_set/extconf.rb
|
151
232
|
- ext/character_set/unicode_casefold_table.h
|
233
|
+
- ext/character_set/unicode_casefold_table.h.tmpl
|
152
234
|
- lib/character_set.rb
|
153
235
|
- lib/character_set/character.rb
|
154
236
|
- lib/character_set/core_ext.rb
|
@@ -157,10 +239,25 @@ files:
|
|
157
239
|
- lib/character_set/expression_converter.rb
|
158
240
|
- lib/character_set/parser.rb
|
159
241
|
- lib/character_set/predefined_sets.rb
|
242
|
+
- lib/character_set/predefined_sets/any.cps
|
243
|
+
- lib/character_set/predefined_sets/ascii.cps
|
244
|
+
- lib/character_set/predefined_sets/ascii_alnum.cps
|
245
|
+
- lib/character_set/predefined_sets/ascii_letter.cps
|
246
|
+
- lib/character_set/predefined_sets/assigned.cps
|
247
|
+
- lib/character_set/predefined_sets/bmp.cps
|
248
|
+
- lib/character_set/predefined_sets/crypt.cps
|
249
|
+
- lib/character_set/predefined_sets/emoji.cps
|
250
|
+
- lib/character_set/predefined_sets/newline.cps
|
251
|
+
- lib/character_set/predefined_sets/surrogate.cps
|
252
|
+
- lib/character_set/predefined_sets/unicode.cps
|
253
|
+
- lib/character_set/predefined_sets/url_fragment.cps
|
254
|
+
- lib/character_set/predefined_sets/url_host.cps
|
255
|
+
- lib/character_set/predefined_sets/url_path.cps
|
256
|
+
- lib/character_set/predefined_sets/url_query.cps
|
257
|
+
- lib/character_set/predefined_sets/whitespace.cps
|
160
258
|
- lib/character_set/pure.rb
|
161
259
|
- lib/character_set/ruby_fallback.rb
|
162
260
|
- lib/character_set/ruby_fallback/character_set_methods.rb
|
163
|
-
- lib/character_set/ruby_fallback/plane_methods.rb
|
164
261
|
- lib/character_set/ruby_fallback/set_methods.rb
|
165
262
|
- lib/character_set/set_method_adapters.rb
|
166
263
|
- lib/character_set/shared_methods.rb
|
@@ -170,7 +267,7 @@ homepage: https://github.com/jaynetics/character_set
|
|
170
267
|
licenses:
|
171
268
|
- MIT
|
172
269
|
metadata: {}
|
173
|
-
post_install_message:
|
270
|
+
post_install_message:
|
174
271
|
rdoc_options: []
|
175
272
|
require_paths:
|
176
273
|
- lib
|
@@ -185,8 +282,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
185
282
|
- !ruby/object:Gem::Version
|
186
283
|
version: '0'
|
187
284
|
requirements: []
|
188
|
-
rubygems_version: 3.0.
|
189
|
-
signing_key:
|
285
|
+
rubygems_version: 3.3.0.dev
|
286
|
+
signing_key:
|
190
287
|
specification_version: 4
|
191
288
|
summary: Build, read, write and compare sets of Unicode codepoints.
|
192
289
|
test_files: []
|
data/.travis.yml
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
class CharacterSet
|
2
|
-
module RubyFallback
|
3
|
-
module PlaneMethods
|
4
|
-
def bmp_part
|
5
|
-
dup.keep_if { |cp| cp < 0x10000 }
|
6
|
-
end
|
7
|
-
|
8
|
-
def astral_part
|
9
|
-
dup.keep_if { |cp| cp >= 0x10000 }
|
10
|
-
end
|
11
|
-
|
12
|
-
def planes
|
13
|
-
plane_set = {}
|
14
|
-
plane_size = 0x10000.to_f
|
15
|
-
each do |cp|
|
16
|
-
plane = (cp / plane_size).floor
|
17
|
-
plane_set[plane] = true
|
18
|
-
end
|
19
|
-
plane_set.keys
|
20
|
-
end
|
21
|
-
|
22
|
-
def member_in_plane?(num)
|
23
|
-
((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|