character_set 1.2.0 → 1.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +3 -0
- data/.github/workflows/gouteur.yml +20 -0
- data/.github/workflows/lint.yml +29 -0
- data/.github/workflows/tests.yml +22 -0
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +17 -0
- data/BENCHMARK.md +53 -17
- data/CHANGELOG.md +54 -0
- data/README.md +51 -12
- data/Rakefile +20 -18
- data/benchmarks/count_in.rb +13 -0
- data/benchmarks/delete_in.rb +1 -1
- data/benchmarks/scan.rb +13 -0
- data/benchmarks/shared.rb +5 -0
- data/benchmarks/z_add.rb +12 -0
- data/benchmarks/z_delete.rb +12 -0
- data/benchmarks/z_merge.rb +15 -0
- data/benchmarks/z_minmax.rb +12 -0
- data/bin/console +2 -0
- data/character_set.gemspec +17 -4
- data/ext/character_set/character_set.c +969 -415
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
- data/lib/character_set/character.rb +1 -1
- data/lib/character_set/core_ext/regexp_ext.rb +1 -1
- data/lib/character_set/core_ext/string_ext.rb +3 -1
- data/lib/character_set/expression_converter.rb +41 -43
- data/lib/character_set/parser.rb +1 -1
- data/lib/character_set/predefined_sets/any.cps +1 -0
- data/lib/character_set/predefined_sets/ascii.cps +1 -0
- data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
- data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
- data/lib/character_set/predefined_sets/assigned.cps +677 -0
- data/lib/character_set/predefined_sets/bmp.cps +2 -0
- data/lib/character_set/predefined_sets/crypt.cps +2 -0
- data/lib/character_set/predefined_sets/emoji.cps +152 -0
- data/lib/character_set/predefined_sets/newline.cps +3 -0
- data/lib/character_set/predefined_sets/surrogate.cps +1 -0
- data/lib/character_set/predefined_sets/unicode.cps +2 -0
- data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
- data/lib/character_set/predefined_sets/url_host.cps +10 -0
- data/lib/character_set/predefined_sets/url_path.cps +7 -0
- data/lib/character_set/predefined_sets/url_query.cps +8 -0
- data/lib/character_set/predefined_sets/whitespace.cps +10 -0
- data/lib/character_set/predefined_sets.rb +25 -260
- data/lib/character_set/ruby_fallback/character_set_methods.rb +60 -9
- data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
- data/lib/character_set/ruby_fallback.rb +5 -3
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +69 -50
- data/lib/character_set/version.rb +1 -1
- data/lib/character_set/writer.rb +98 -27
- metadata +114 -17
- data/.travis.yml +0 -8
- data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -1,12 +1,14 @@
|
|
1
|
-
|
1
|
+
if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
|
2
|
+
require 'sorted_set'
|
3
|
+
else
|
4
|
+
require 'set'
|
5
|
+
end
|
2
6
|
require 'character_set/ruby_fallback/set_methods'
|
3
|
-
require 'character_set/ruby_fallback/plane_methods'
|
4
7
|
require 'character_set/ruby_fallback/character_set_methods'
|
5
8
|
|
6
9
|
class CharacterSet
|
7
10
|
module RubyFallback
|
8
11
|
include CharacterSet::RubyFallback::SetMethods
|
9
|
-
include CharacterSet::RubyFallback::PlaneMethods
|
10
12
|
include CharacterSet::RubyFallback::CharacterSetMethods
|
11
13
|
|
12
14
|
def self.prepended(klass)
|
@@ -22,13 +22,14 @@ class CharacterSet
|
|
22
22
|
|
23
23
|
# Allow some methods to take an Enum just as well as another CharacterSet.
|
24
24
|
# Tested by ruby-spec.
|
25
|
-
%w[& + - ^ | difference
|
25
|
+
%w[& + - ^ | difference disjoint? intersect? intersection
|
26
|
+
subtract union].each do |method|
|
26
27
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
27
28
|
def #{method}(arg)
|
28
29
|
if arg.is_a?(CharacterSet)
|
29
|
-
super
|
30
|
+
super(arg)
|
30
31
|
elsif arg.respond_to?(:each)
|
31
|
-
super(
|
32
|
+
super(self.class.new(arg.to_a))
|
32
33
|
else
|
33
34
|
raise ArgumentError, 'pass an enumerable'
|
34
35
|
end
|
@@ -22,33 +22,29 @@ class CharacterSet
|
|
22
22
|
end
|
23
23
|
|
24
24
|
def of_property(property_name)
|
25
|
-
require_optional_dependency('regexp_property_values')
|
25
|
+
require_optional_dependency('regexp_property_values', __method__)
|
26
26
|
|
27
27
|
property = RegexpPropertyValues[property_name.to_s]
|
28
28
|
from_ranges(*property.matched_ranges)
|
29
29
|
end
|
30
30
|
|
31
31
|
def of_regexp(regexp)
|
32
|
-
require_optional_dependency('regexp_parser')
|
32
|
+
require_optional_dependency('regexp_parser', __method__)
|
33
33
|
|
34
34
|
root = ::Regexp::Parser.parse(regexp)
|
35
35
|
of_expression(root)
|
36
36
|
end
|
37
37
|
|
38
38
|
def of_expression(expression)
|
39
|
-
ExpressionConverter.convert(expression)
|
39
|
+
ExpressionConverter.convert(expression, self)
|
40
40
|
end
|
41
41
|
|
42
|
-
def require_optional_dependency(name)
|
42
|
+
def require_optional_dependency(name, method)
|
43
43
|
required_optional_dependencies[name] ||= begin
|
44
44
|
require name
|
45
45
|
true
|
46
46
|
rescue ::LoadError
|
47
|
-
|
48
|
-
loc.absolute_path.to_s.include?('/lib/character_set')
|
49
|
-
end
|
50
|
-
method = entry_point && entry_point.label
|
51
|
-
raise LoadError, 'You must the install the optional dependency '\
|
47
|
+
raise LoadError, 'You must install the optional dependency '\
|
52
48
|
"'\#{name}' to use the method `\#{method}'."
|
53
49
|
end
|
54
50
|
end
|
@@ -70,37 +66,64 @@ class CharacterSet
|
|
70
66
|
merge(enum)
|
71
67
|
end
|
72
68
|
|
73
|
-
#
|
69
|
+
# CharacterSet-specific conversion methods
|
70
|
+
|
71
|
+
def assigned_part
|
72
|
+
self & self.class.assigned
|
73
|
+
end
|
74
|
+
|
75
|
+
def valid_part
|
76
|
+
self - self.class.surrogate
|
77
|
+
end
|
78
|
+
|
79
|
+
# CharacterSet-specific stringification methods
|
74
80
|
|
75
81
|
def to_s(opts = {}, &block)
|
76
82
|
Writer.write(ranges, opts, &block)
|
77
83
|
end
|
78
84
|
|
85
|
+
def to_s_with_surrogate_ranges
|
86
|
+
Writer.write_surrogate_ranges(bmp_part.ranges, astral_part.ranges)
|
87
|
+
end
|
88
|
+
|
79
89
|
def to_s_with_surrogate_alternation
|
80
90
|
Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
|
81
91
|
end
|
82
92
|
|
93
|
+
def secure_token(length = 32)
|
94
|
+
CharacterSet.require_optional_dependency('securerandom', __method__)
|
95
|
+
cps = to_a
|
96
|
+
len = cps.count
|
97
|
+
1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
|
98
|
+
end
|
99
|
+
alias random_token secure_token
|
100
|
+
|
83
101
|
def inspect
|
84
102
|
len = length
|
85
|
-
"
|
103
|
+
"#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
|
86
104
|
end
|
87
105
|
|
88
|
-
#
|
106
|
+
# C-extension adapter methods. Need overriding in pure fallback.
|
107
|
+
# Parsing kwargs in C is slower, verbose, and kinda deprecated.
|
89
108
|
|
90
|
-
def
|
91
|
-
|
109
|
+
def inversion(include_surrogates: false, upto: 0x10FFFF)
|
110
|
+
ext_inversion(include_surrogates, upto)
|
92
111
|
end
|
93
112
|
|
94
|
-
def
|
95
|
-
|
113
|
+
def section(from:, upto: 0x10FFFF)
|
114
|
+
ext_section(from, upto)
|
96
115
|
end
|
97
116
|
|
98
|
-
def
|
99
|
-
|
117
|
+
def count_in_section(from:, upto: 0x10FFFF)
|
118
|
+
ext_count_in_section(from, upto)
|
100
119
|
end
|
101
120
|
|
102
|
-
def
|
103
|
-
|
121
|
+
def section?(from:, upto: 0x10FFFF)
|
122
|
+
ext_section?(from, upto)
|
123
|
+
end
|
124
|
+
|
125
|
+
def section_ratio(from:, upto: 0x10FFFF)
|
126
|
+
ext_section_ratio(from, upto)
|
104
127
|
end
|
105
128
|
|
106
129
|
#
|
@@ -136,42 +159,38 @@ class CharacterSet
|
|
136
159
|
end
|
137
160
|
|
138
161
|
def divide(&func)
|
139
|
-
|
140
|
-
|
141
|
-
|
142
|
-
|
143
|
-
require 'tsort'
|
162
|
+
CharacterSet.require_optional_dependency('set', __method__)
|
163
|
+
Set.new(to_a).divide(&func)
|
164
|
+
end
|
165
|
+
RUBY
|
144
166
|
|
145
|
-
|
146
|
-
include TSort
|
167
|
+
# CharacterSet-specific section methods
|
147
168
|
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
152
|
-
|
169
|
+
{
|
170
|
+
ascii: 0..0x7F,
|
171
|
+
bmp: 0..0xFFFF,
|
172
|
+
astral: 0x10000..0x10FFFF,
|
173
|
+
}.each do |section_name, range|
|
174
|
+
klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
175
|
+
def #{section_name}_part
|
176
|
+
section(from: #{range.begin}, upto: #{range.end})
|
177
|
+
end
|
153
178
|
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
end
|
179
|
+
def #{section_name}_part?
|
180
|
+
section?(from: #{range.begin}, upto: #{range.end})
|
181
|
+
end
|
158
182
|
|
159
|
-
|
160
|
-
|
161
|
-
|
162
|
-
|
163
|
-
set
|
164
|
-
else
|
165
|
-
Set.new(classify(&func).values)
|
183
|
+
def #{section_name}_only?
|
184
|
+
#{range.begin == 0 ?
|
185
|
+
"!section?(from: #{range.end}, upto: 0x10FFFF)" :
|
186
|
+
"!section?(from: 0, upto: #{range.begin})"}
|
166
187
|
end
|
167
|
-
end
|
168
188
|
|
169
|
-
|
170
|
-
|
171
|
-
|
172
|
-
|
173
|
-
|
174
|
-
RUBY
|
189
|
+
def #{section_name}_ratio
|
190
|
+
section_ratio(from: #{range.begin}, upto: #{range.end})
|
191
|
+
end
|
192
|
+
RUBY
|
193
|
+
end
|
175
194
|
end # self.included
|
176
195
|
end # SharedMethods
|
177
196
|
end
|
data/lib/character_set/writer.rb
CHANGED
@@ -1,37 +1,108 @@
|
|
1
1
|
class CharacterSet
|
2
2
|
module Writer
|
3
|
-
|
4
|
-
|
5
|
-
|
6
|
-
|
7
|
-
|
8
|
-
|
9
|
-
|
10
|
-
|
3
|
+
class << self
|
4
|
+
def write(codepoint_ranges, opts = {}, &block)
|
5
|
+
content = codepoint_ranges.map do |range|
|
6
|
+
if range.size > 2 && opts[:abbreviate] != false
|
7
|
+
bounds = [range.min, range.max]
|
8
|
+
bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
|
9
|
+
else
|
10
|
+
range.map { |cp| write_codepoint(cp, opts, &block) }.join
|
11
|
+
end
|
12
|
+
end.join
|
13
|
+
opts[:in_brackets] ? "[#{content}]" : content
|
14
|
+
end
|
15
|
+
|
16
|
+
def write_codepoint(codepoint, opts = {}, &block)
|
17
|
+
Character.new(codepoint).escape(opts, &block)
|
18
|
+
end
|
19
|
+
|
20
|
+
def write_surrogate_ranges(bmp_ranges, astral_ranges)
|
21
|
+
astral_branches = surrogate_range_expressions(astral_ranges)
|
22
|
+
bmp_set_with_alternatives(bmp_ranges, astral_branches)
|
23
|
+
end
|
24
|
+
|
25
|
+
def write_surrogate_alternation(bmp_ranges, astral_ranges)
|
26
|
+
astral_branches = surrogate_pairs(astral_ranges)
|
27
|
+
bmp_set_with_alternatives(bmp_ranges, astral_branches)
|
28
|
+
end
|
29
|
+
|
30
|
+
private
|
31
|
+
|
32
|
+
def surrogate_range_expressions(astral_ranges)
|
33
|
+
compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
|
34
|
+
[hi_ranges, lo_ranges].map do |ranges|
|
35
|
+
use_brackets = ranges.size > 1 || ranges.first.size > 1
|
36
|
+
write(ranges, format: :js, in_brackets: use_brackets)
|
37
|
+
end.join
|
11
38
|
end
|
12
|
-
end
|
13
|
-
|
14
|
-
|
39
|
+
end
|
40
|
+
|
41
|
+
def compressed_surrogate_range_pairs(astral_ranges)
|
42
|
+
halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }
|
15
43
|
|
16
|
-
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
|
22
|
-
|
44
|
+
# compress high surrogate codepoint ranges with common low range half
|
45
|
+
with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
|
46
|
+
hi_ranges = pairs.map(&:first)
|
47
|
+
compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
|
48
|
+
prev = arr.last
|
49
|
+
if prev.nil? || prev.max + 1 < range.min # first or gap
|
50
|
+
arr << range
|
51
|
+
else # continuous codepoints, expand previous range
|
52
|
+
arr[-1] = (prev.min)..(range.max)
|
53
|
+
end
|
54
|
+
end
|
55
|
+
[compressed_hi_ranges, lo_range]
|
56
|
+
end
|
57
|
+
|
58
|
+
# compress low surrogate codepoint ranges with common high ranges
|
59
|
+
with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
|
60
|
+
(hash[hi_ranges] ||= []) << lo_range
|
61
|
+
end
|
23
62
|
end
|
24
|
-
end
|
25
63
|
|
26
|
-
|
27
|
-
|
28
|
-
|
64
|
+
def surrogate_half_ranges(astral_range)
|
65
|
+
hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
|
66
|
+
hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
|
67
|
+
hi_count = 1 + hi_max - hi_min
|
68
|
+
return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1
|
69
|
+
|
70
|
+
ranges = []
|
71
|
+
|
72
|
+
# first high surrogate might be partially covered (if lo_min > 0xDC00)
|
73
|
+
ranges << [hi_min..hi_min, lo_min..0xDFFF]
|
74
|
+
|
75
|
+
# any high surrogates in between are fully covered
|
76
|
+
ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2
|
29
77
|
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
78
|
+
# last high surrogate might be partially covered (if lo_max < 0xDFFF)
|
79
|
+
ranges << [hi_max..hi_max, 0xDC00..lo_max]
|
80
|
+
|
81
|
+
ranges
|
82
|
+
end
|
83
|
+
|
84
|
+
def surrogate_pair_codepoints(astral_codepoint)
|
85
|
+
base = astral_codepoint - 0x10000
|
86
|
+
high = base / 1024 + 0xD800
|
87
|
+
low = base % 1024 + 0xDC00
|
88
|
+
[high, low]
|
89
|
+
end
|
90
|
+
|
91
|
+
def bmp_set_with_alternatives(bmp_ranges, alternatives)
|
92
|
+
bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
|
93
|
+
return bmp_set if alternatives.empty? && bmp_ranges.any?
|
94
|
+
|
95
|
+
"(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
|
96
|
+
end
|
97
|
+
|
98
|
+
def surrogate_pairs(astral_ranges)
|
99
|
+
astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
|
100
|
+
end
|
101
|
+
|
102
|
+
def surrogate_pair(astral_codepoint)
|
103
|
+
surrogate_pair_codepoints(astral_codepoint)
|
104
|
+
.map { |half| write_codepoint(half, format: :js) }.join
|
105
|
+
end
|
35
106
|
end
|
36
107
|
end
|
37
108
|
end
|
metadata
CHANGED
@@ -1,15 +1,29 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: character_set
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Janosch Müller
|
8
|
-
autorequire:
|
8
|
+
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2021-12-05 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
|
+
- !ruby/object:Gem::Dependency
|
14
|
+
name: sorted_set
|
15
|
+
requirement: !ruby/object:Gem::Requirement
|
16
|
+
requirements:
|
17
|
+
- - "~>"
|
18
|
+
- !ruby/object:Gem::Version
|
19
|
+
version: '1.0'
|
20
|
+
type: :runtime
|
21
|
+
prerelease: false
|
22
|
+
version_requirements: !ruby/object:Gem::Requirement
|
23
|
+
requirements:
|
24
|
+
- - "~>"
|
25
|
+
- !ruby/object:Gem::Version
|
26
|
+
version: '1.0'
|
13
27
|
- !ruby/object:Gem::Dependency
|
14
28
|
name: benchmark-ips
|
15
29
|
requirement: !ruby/object:Gem::Requirement
|
@@ -24,34 +38,48 @@ dependencies:
|
|
24
38
|
- - "~>"
|
25
39
|
- !ruby/object:Gem::Version
|
26
40
|
version: '2.7'
|
41
|
+
- !ruby/object:Gem::Dependency
|
42
|
+
name: get_process_mem
|
43
|
+
requirement: !ruby/object:Gem::Requirement
|
44
|
+
requirements:
|
45
|
+
- - "~>"
|
46
|
+
- !ruby/object:Gem::Version
|
47
|
+
version: 0.2.3
|
48
|
+
type: :development
|
49
|
+
prerelease: false
|
50
|
+
version_requirements: !ruby/object:Gem::Requirement
|
51
|
+
requirements:
|
52
|
+
- - "~>"
|
53
|
+
- !ruby/object:Gem::Version
|
54
|
+
version: 0.2.3
|
27
55
|
- !ruby/object:Gem::Dependency
|
28
56
|
name: rake
|
29
57
|
requirement: !ruby/object:Gem::Requirement
|
30
58
|
requirements:
|
31
59
|
- - "~>"
|
32
60
|
- !ruby/object:Gem::Version
|
33
|
-
version: '
|
61
|
+
version: '13.0'
|
34
62
|
type: :development
|
35
63
|
prerelease: false
|
36
64
|
version_requirements: !ruby/object:Gem::Requirement
|
37
65
|
requirements:
|
38
66
|
- - "~>"
|
39
67
|
- !ruby/object:Gem::Version
|
40
|
-
version: '
|
68
|
+
version: '13.0'
|
41
69
|
- !ruby/object:Gem::Dependency
|
42
70
|
name: rake-compiler
|
43
71
|
requirement: !ruby/object:Gem::Requirement
|
44
72
|
requirements:
|
45
73
|
- - "~>"
|
46
74
|
- !ruby/object:Gem::Version
|
47
|
-
version: '1.
|
75
|
+
version: '1.1'
|
48
76
|
type: :development
|
49
77
|
prerelease: false
|
50
78
|
version_requirements: !ruby/object:Gem::Requirement
|
51
79
|
requirements:
|
52
80
|
- - "~>"
|
53
81
|
- !ruby/object:Gem::Version
|
54
|
-
version: '1.
|
82
|
+
version: '1.1'
|
55
83
|
- !ruby/object:Gem::Dependency
|
56
84
|
name: range_compressor
|
57
85
|
requirement: !ruby/object:Gem::Requirement
|
@@ -72,28 +100,28 @@ dependencies:
|
|
72
100
|
requirements:
|
73
101
|
- - "~>"
|
74
102
|
- !ruby/object:Gem::Version
|
75
|
-
version: '1
|
103
|
+
version: '2.1'
|
76
104
|
type: :development
|
77
105
|
prerelease: false
|
78
106
|
version_requirements: !ruby/object:Gem::Requirement
|
79
107
|
requirements:
|
80
108
|
- - "~>"
|
81
109
|
- !ruby/object:Gem::Version
|
82
|
-
version: '1
|
110
|
+
version: '2.1'
|
83
111
|
- !ruby/object:Gem::Dependency
|
84
112
|
name: regexp_property_values
|
85
113
|
requirement: !ruby/object:Gem::Requirement
|
86
114
|
requirements:
|
87
115
|
- - "~>"
|
88
116
|
- !ruby/object:Gem::Version
|
89
|
-
version: 0
|
117
|
+
version: '1.0'
|
90
118
|
type: :development
|
91
119
|
prerelease: false
|
92
120
|
version_requirements: !ruby/object:Gem::Requirement
|
93
121
|
requirements:
|
94
122
|
- - "~>"
|
95
123
|
- !ruby/object:Gem::Version
|
96
|
-
version: 0
|
124
|
+
version: '1.0'
|
97
125
|
- !ruby/object:Gem::Dependency
|
98
126
|
name: rspec
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -108,7 +136,49 @@ dependencies:
|
|
108
136
|
- - "~>"
|
109
137
|
- !ruby/object:Gem::Version
|
110
138
|
version: '3.8'
|
111
|
-
|
139
|
+
- !ruby/object:Gem::Dependency
|
140
|
+
name: codecov
|
141
|
+
requirement: !ruby/object:Gem::Requirement
|
142
|
+
requirements:
|
143
|
+
- - "~>"
|
144
|
+
- !ruby/object:Gem::Version
|
145
|
+
version: 0.2.12
|
146
|
+
type: :development
|
147
|
+
prerelease: false
|
148
|
+
version_requirements: !ruby/object:Gem::Requirement
|
149
|
+
requirements:
|
150
|
+
- - "~>"
|
151
|
+
- !ruby/object:Gem::Version
|
152
|
+
version: 0.2.12
|
153
|
+
- !ruby/object:Gem::Dependency
|
154
|
+
name: gouteur
|
155
|
+
requirement: !ruby/object:Gem::Requirement
|
156
|
+
requirements:
|
157
|
+
- - "~>"
|
158
|
+
- !ruby/object:Gem::Version
|
159
|
+
version: 1.0.0
|
160
|
+
type: :development
|
161
|
+
prerelease: false
|
162
|
+
version_requirements: !ruby/object:Gem::Requirement
|
163
|
+
requirements:
|
164
|
+
- - "~>"
|
165
|
+
- !ruby/object:Gem::Version
|
166
|
+
version: 1.0.0
|
167
|
+
- !ruby/object:Gem::Dependency
|
168
|
+
name: rubocop
|
169
|
+
requirement: !ruby/object:Gem::Requirement
|
170
|
+
requirements:
|
171
|
+
- - "~>"
|
172
|
+
- !ruby/object:Gem::Version
|
173
|
+
version: '1.8'
|
174
|
+
type: :development
|
175
|
+
prerelease: false
|
176
|
+
version_requirements: !ruby/object:Gem::Requirement
|
177
|
+
requirements:
|
178
|
+
- - "~>"
|
179
|
+
- !ruby/object:Gem::Version
|
180
|
+
version: '1.8'
|
181
|
+
description:
|
112
182
|
email:
|
113
183
|
- janosch84@gmail.com
|
114
184
|
executables: []
|
@@ -116,26 +186,38 @@ extensions:
|
|
116
186
|
- ext/character_set/extconf.rb
|
117
187
|
extra_rdoc_files: []
|
118
188
|
files:
|
189
|
+
- ".gitattributes"
|
190
|
+
- ".github/workflows/gouteur.yml"
|
191
|
+
- ".github/workflows/lint.yml"
|
192
|
+
- ".github/workflows/tests.yml"
|
119
193
|
- ".gitignore"
|
194
|
+
- ".gouteur.yml"
|
120
195
|
- ".rspec"
|
121
|
-
- ".
|
196
|
+
- ".rubocop.yml"
|
122
197
|
- BENCHMARK.md
|
123
198
|
- CHANGELOG.md
|
124
199
|
- Gemfile
|
125
200
|
- LICENSE.txt
|
126
201
|
- README.md
|
127
202
|
- Rakefile
|
203
|
+
- benchmarks/count_in.rb
|
128
204
|
- benchmarks/cover.rb
|
129
205
|
- benchmarks/delete_in.rb
|
130
206
|
- benchmarks/keep_in.rb
|
207
|
+
- benchmarks/scan.rb
|
131
208
|
- benchmarks/shared.rb
|
132
209
|
- benchmarks/used_by.rb
|
210
|
+
- benchmarks/z_add.rb
|
211
|
+
- benchmarks/z_delete.rb
|
212
|
+
- benchmarks/z_merge.rb
|
213
|
+
- benchmarks/z_minmax.rb
|
133
214
|
- bin/console
|
134
215
|
- bin/setup
|
135
216
|
- character_set.gemspec
|
136
217
|
- ext/character_set/character_set.c
|
137
218
|
- ext/character_set/extconf.rb
|
138
219
|
- ext/character_set/unicode_casefold_table.h
|
220
|
+
- ext/character_set/unicode_casefold_table.h.tmpl
|
139
221
|
- lib/character_set.rb
|
140
222
|
- lib/character_set/character.rb
|
141
223
|
- lib/character_set/core_ext.rb
|
@@ -144,10 +226,25 @@ files:
|
|
144
226
|
- lib/character_set/expression_converter.rb
|
145
227
|
- lib/character_set/parser.rb
|
146
228
|
- lib/character_set/predefined_sets.rb
|
229
|
+
- lib/character_set/predefined_sets/any.cps
|
230
|
+
- lib/character_set/predefined_sets/ascii.cps
|
231
|
+
- lib/character_set/predefined_sets/ascii_alnum.cps
|
232
|
+
- lib/character_set/predefined_sets/ascii_letter.cps
|
233
|
+
- lib/character_set/predefined_sets/assigned.cps
|
234
|
+
- lib/character_set/predefined_sets/bmp.cps
|
235
|
+
- lib/character_set/predefined_sets/crypt.cps
|
236
|
+
- lib/character_set/predefined_sets/emoji.cps
|
237
|
+
- lib/character_set/predefined_sets/newline.cps
|
238
|
+
- lib/character_set/predefined_sets/surrogate.cps
|
239
|
+
- lib/character_set/predefined_sets/unicode.cps
|
240
|
+
- lib/character_set/predefined_sets/url_fragment.cps
|
241
|
+
- lib/character_set/predefined_sets/url_host.cps
|
242
|
+
- lib/character_set/predefined_sets/url_path.cps
|
243
|
+
- lib/character_set/predefined_sets/url_query.cps
|
244
|
+
- lib/character_set/predefined_sets/whitespace.cps
|
147
245
|
- lib/character_set/pure.rb
|
148
246
|
- lib/character_set/ruby_fallback.rb
|
149
247
|
- lib/character_set/ruby_fallback/character_set_methods.rb
|
150
|
-
- lib/character_set/ruby_fallback/plane_methods.rb
|
151
248
|
- lib/character_set/ruby_fallback/set_methods.rb
|
152
249
|
- lib/character_set/set_method_adapters.rb
|
153
250
|
- lib/character_set/shared_methods.rb
|
@@ -157,7 +254,7 @@ homepage: https://github.com/jaynetics/character_set
|
|
157
254
|
licenses:
|
158
255
|
- MIT
|
159
256
|
metadata: {}
|
160
|
-
post_install_message:
|
257
|
+
post_install_message:
|
161
258
|
rdoc_options: []
|
162
259
|
require_paths:
|
163
260
|
- lib
|
@@ -172,8 +269,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
172
269
|
- !ruby/object:Gem::Version
|
173
270
|
version: '0'
|
174
271
|
requirements: []
|
175
|
-
rubygems_version: 3.0.
|
176
|
-
signing_key:
|
272
|
+
rubygems_version: 3.3.0.dev
|
273
|
+
signing_key:
|
177
274
|
specification_version: 4
|
178
275
|
summary: Build, read, write and compare sets of Unicode codepoints.
|
179
276
|
test_files: []
|
data/.travis.yml
DELETED
@@ -1,27 +0,0 @@
|
|
1
|
-
class CharacterSet
|
2
|
-
module RubyFallback
|
3
|
-
module PlaneMethods
|
4
|
-
def bmp_part
|
5
|
-
dup.keep_if { |cp| cp < 0x10000 }
|
6
|
-
end
|
7
|
-
|
8
|
-
def astral_part
|
9
|
-
dup.keep_if { |cp| cp >= 0x10000 }
|
10
|
-
end
|
11
|
-
|
12
|
-
def planes
|
13
|
-
plane_set = {}
|
14
|
-
plane_size = 0x10000.to_f
|
15
|
-
each do |cp|
|
16
|
-
plane = (cp / plane_size).floor
|
17
|
-
plane_set[plane] = true
|
18
|
-
end
|
19
|
-
plane_set.keys
|
20
|
-
end
|
21
|
-
|
22
|
-
def member_in_plane?(num)
|
23
|
-
((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
|
24
|
-
end
|
25
|
-
end
|
26
|
-
end
|
27
|
-
end
|