character_set 1.2.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +3 -0
  3. data/.github/workflows/gouteur.yml +20 -0
  4. data/.github/workflows/lint.yml +29 -0
  5. data/.github/workflows/tests.yml +22 -0
  6. data/.gitignore +1 -0
  7. data/.gouteur.yml +2 -0
  8. data/.rubocop.yml +17 -0
  9. data/BENCHMARK.md +53 -17
  10. data/CHANGELOG.md +54 -0
  11. data/README.md +51 -12
  12. data/Rakefile +20 -18
  13. data/benchmarks/count_in.rb +13 -0
  14. data/benchmarks/delete_in.rb +1 -1
  15. data/benchmarks/scan.rb +13 -0
  16. data/benchmarks/shared.rb +5 -0
  17. data/benchmarks/z_add.rb +12 -0
  18. data/benchmarks/z_delete.rb +12 -0
  19. data/benchmarks/z_merge.rb +15 -0
  20. data/benchmarks/z_minmax.rb +12 -0
  21. data/bin/console +2 -0
  22. data/character_set.gemspec +17 -4
  23. data/ext/character_set/character_set.c +969 -415
  24. data/ext/character_set/unicode_casefold_table.h +44 -1
  25. data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
  26. data/lib/character_set/character.rb +1 -1
  27. data/lib/character_set/core_ext/regexp_ext.rb +1 -1
  28. data/lib/character_set/core_ext/string_ext.rb +3 -1
  29. data/lib/character_set/expression_converter.rb +41 -43
  30. data/lib/character_set/parser.rb +1 -1
  31. data/lib/character_set/predefined_sets/any.cps +1 -0
  32. data/lib/character_set/predefined_sets/ascii.cps +1 -0
  33. data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
  34. data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
  35. data/lib/character_set/predefined_sets/assigned.cps +677 -0
  36. data/lib/character_set/predefined_sets/bmp.cps +2 -0
  37. data/lib/character_set/predefined_sets/crypt.cps +2 -0
  38. data/lib/character_set/predefined_sets/emoji.cps +152 -0
  39. data/lib/character_set/predefined_sets/newline.cps +3 -0
  40. data/lib/character_set/predefined_sets/surrogate.cps +1 -0
  41. data/lib/character_set/predefined_sets/unicode.cps +2 -0
  42. data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
  43. data/lib/character_set/predefined_sets/url_host.cps +10 -0
  44. data/lib/character_set/predefined_sets/url_path.cps +7 -0
  45. data/lib/character_set/predefined_sets/url_query.cps +8 -0
  46. data/lib/character_set/predefined_sets/whitespace.cps +10 -0
  47. data/lib/character_set/predefined_sets.rb +25 -260
  48. data/lib/character_set/ruby_fallback/character_set_methods.rb +60 -9
  49. data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
  50. data/lib/character_set/ruby_fallback.rb +5 -3
  51. data/lib/character_set/set_method_adapters.rb +4 -3
  52. data/lib/character_set/shared_methods.rb +69 -50
  53. data/lib/character_set/version.rb +1 -1
  54. data/lib/character_set/writer.rb +98 -27
  55. metadata +114 -17
  56. data/.travis.yml +0 -8
  57. data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -1,12 +1,14 @@
1
- require 'set'
1
+ if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
2
+ require 'sorted_set'
3
+ else
4
+ require 'set'
5
+ end
2
6
  require 'character_set/ruby_fallback/set_methods'
3
- require 'character_set/ruby_fallback/plane_methods'
4
7
  require 'character_set/ruby_fallback/character_set_methods'
5
8
 
6
9
  class CharacterSet
7
10
  module RubyFallback
8
11
  include CharacterSet::RubyFallback::SetMethods
9
- include CharacterSet::RubyFallback::PlaneMethods
10
12
  include CharacterSet::RubyFallback::CharacterSetMethods
11
13
 
12
14
  def self.prepended(klass)
@@ -22,13 +22,14 @@ class CharacterSet
22
22
 
23
23
  # Allow some methods to take an Enum just as well as another CharacterSet.
24
24
  # Tested by ruby-spec.
25
- %w[& + - ^ | difference intersection subtract union].each do |method|
25
+ %w[& + - ^ | difference disjoint? intersect? intersection
26
+ subtract union].each do |method|
26
27
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
27
28
  def #{method}(arg)
28
29
  if arg.is_a?(CharacterSet)
29
- super
30
+ super(arg)
30
31
  elsif arg.respond_to?(:each)
31
- super(CharacterSet.new(arg.to_a))
32
+ super(self.class.new(arg.to_a))
32
33
  else
33
34
  raise ArgumentError, 'pass an enumerable'
34
35
  end
@@ -22,33 +22,29 @@ class CharacterSet
22
22
  end
23
23
 
24
24
  def of_property(property_name)
25
- require_optional_dependency('regexp_property_values')
25
+ require_optional_dependency('regexp_property_values', __method__)
26
26
 
27
27
  property = RegexpPropertyValues[property_name.to_s]
28
28
  from_ranges(*property.matched_ranges)
29
29
  end
30
30
 
31
31
  def of_regexp(regexp)
32
- require_optional_dependency('regexp_parser')
32
+ require_optional_dependency('regexp_parser', __method__)
33
33
 
34
34
  root = ::Regexp::Parser.parse(regexp)
35
35
  of_expression(root)
36
36
  end
37
37
 
38
38
  def of_expression(expression)
39
- ExpressionConverter.convert(expression)
39
+ ExpressionConverter.convert(expression, self)
40
40
  end
41
41
 
42
- def require_optional_dependency(name)
42
+ def require_optional_dependency(name, method)
43
43
  required_optional_dependencies[name] ||= begin
44
44
  require name
45
45
  true
46
46
  rescue ::LoadError
47
- entry_point = caller_locations.reverse.find do |loc|
48
- loc.absolute_path.to_s.include?('/lib/character_set')
49
- end
50
- method = entry_point && entry_point.label
51
- raise LoadError, 'You must the install the optional dependency '\
47
+ raise LoadError, 'You must install the optional dependency '\
52
48
  "'\#{name}' to use the method `\#{method}'."
53
49
  end
54
50
  end
@@ -70,37 +66,64 @@ class CharacterSet
70
66
  merge(enum)
71
67
  end
72
68
 
73
- # stringification methods
69
+ # CharacterSet-specific conversion methods
70
+
71
+ def assigned_part
72
+ self & self.class.assigned
73
+ end
74
+
75
+ def valid_part
76
+ self - self.class.surrogate
77
+ end
78
+
79
+ # CharacterSet-specific stringification methods
74
80
 
75
81
  def to_s(opts = {}, &block)
76
82
  Writer.write(ranges, opts, &block)
77
83
  end
78
84
 
85
+ def to_s_with_surrogate_ranges
86
+ Writer.write_surrogate_ranges(bmp_part.ranges, astral_part.ranges)
87
+ end
88
+
79
89
  def to_s_with_surrogate_alternation
80
90
  Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
81
91
  end
82
92
 
93
+ def secure_token(length = 32)
94
+ CharacterSet.require_optional_dependency('securerandom', __method__)
95
+ cps = to_a
96
+ len = cps.count
97
+ 1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
98
+ end
99
+ alias random_token secure_token
100
+
83
101
  def inspect
84
102
  len = length
85
- "#<CharacterSet: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
103
+ "#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
86
104
  end
87
105
 
88
- # unicode-plane-related methods
106
+ # C-extension adapter methods. Need overriding in pure fallback.
107
+ # Parsing kwargs in C is slower, verbose, and kinda deprecated.
89
108
 
90
- def bmp_part?
91
- !bmp_part.empty?
109
+ def inversion(include_surrogates: false, upto: 0x10FFFF)
110
+ ext_inversion(include_surrogates, upto)
92
111
  end
93
112
 
94
- def astral_part?
95
- !astral_part.empty?
113
+ def section(from:, upto: 0x10FFFF)
114
+ ext_section(from, upto)
96
115
  end
97
116
 
98
- def bmp_ratio
99
- bmp_part.count / count.to_f
117
+ def count_in_section(from:, upto: 0x10FFFF)
118
+ ext_count_in_section(from, upto)
100
119
  end
101
120
 
102
- def astral_ratio
103
- astral_part.count / count.to_f
121
+ def section?(from:, upto: 0x10FFFF)
122
+ ext_section?(from, upto)
123
+ end
124
+
125
+ def section_ratio(from:, upto: 0x10FFFF)
126
+ ext_section_ratio(from, upto)
104
127
  end
105
128
 
106
129
  #
@@ -136,42 +159,38 @@ class CharacterSet
136
159
  end
137
160
 
138
161
  def divide(&func)
139
- block_given? or return enum_for(__method__) { size }
140
- require 'set'
141
-
142
- if func.arity == 2
143
- require 'tsort'
162
+ CharacterSet.require_optional_dependency('set', __method__)
163
+ Set.new(to_a).divide(&func)
164
+ end
165
+ RUBY
144
166
 
145
- class << dig = {}
146
- include TSort
167
+ # CharacterSet-specific section methods
147
168
 
148
- alias tsort_each_node each_key
149
- def tsort_each_child(node, &block)
150
- fetch(node).each(&block)
151
- end
152
- end
169
+ {
170
+ ascii: 0..0x7F,
171
+ bmp: 0..0xFFFF,
172
+ astral: 0x10000..0x10FFFF,
173
+ }.each do |section_name, range|
174
+ klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
175
+ def #{section_name}_part
176
+ section(from: #{range.begin}, upto: #{range.end})
177
+ end
153
178
 
154
- each do |u|
155
- dig[u] = a = []
156
- each{ |v| a << v if yield(u, v) }
157
- end
179
+ def #{section_name}_part?
180
+ section?(from: #{range.begin}, upto: #{range.end})
181
+ end
158
182
 
159
- set = Set.new
160
- dig.each_strongly_connected_component do |css|
161
- set.add(self.class.new(css))
162
- end
163
- set
164
- else
165
- Set.new(classify(&func).values)
183
+ def #{section_name}_only?
184
+ #{range.begin == 0 ?
185
+ "!section?(from: #{range.end}, upto: 0x10FFFF)" :
186
+ "!section?(from: 0, upto: #{range.begin})"}
166
187
  end
167
- end
168
188
 
169
- # C-extension adapter method. Needs overriding in pure fallback.
170
- # Parsing kwargs in C is slower, verbose, and kinda deprecated.
171
- def inversion(include_surrogates: false, upto: 0x10FFFF)
172
- ext_inversion(include_surrogates, upto)
173
- end
174
- RUBY
189
+ def #{section_name}_ratio
190
+ section_ratio(from: #{range.begin}, upto: #{range.end})
191
+ end
192
+ RUBY
193
+ end
175
194
  end # self.included
176
195
  end # SharedMethods
177
196
  end
@@ -1,3 +1,3 @@
1
1
  class CharacterSet
2
- VERSION = '1.2.0'
2
+ VERSION = '1.5.0'
3
3
  end
@@ -1,37 +1,108 @@
1
1
  class CharacterSet
2
2
  module Writer
3
- module_function
4
-
5
- def write(codepoint_ranges, opts = {}, &block)
6
- content = codepoint_ranges.map do |range|
7
- if range.size > 2 && opts[:abbreviate] != false
8
- range.minmax.map { |cp| Character.new(cp).escape(opts, &block) }.join('-')
9
- else
10
- range.map { |cp| Character.new(cp).escape(opts, &block) }.join
3
+ class << self
4
+ def write(codepoint_ranges, opts = {}, &block)
5
+ content = codepoint_ranges.map do |range|
6
+ if range.size > 2 && opts[:abbreviate] != false
7
+ bounds = [range.min, range.max]
8
+ bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
9
+ else
10
+ range.map { |cp| write_codepoint(cp, opts, &block) }.join
11
+ end
12
+ end.join
13
+ opts[:in_brackets] ? "[#{content}]" : content
14
+ end
15
+
16
+ def write_codepoint(codepoint, opts = {}, &block)
17
+ Character.new(codepoint).escape(opts, &block)
18
+ end
19
+
20
+ def write_surrogate_ranges(bmp_ranges, astral_ranges)
21
+ astral_branches = surrogate_range_expressions(astral_ranges)
22
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
23
+ end
24
+
25
+ def write_surrogate_alternation(bmp_ranges, astral_ranges)
26
+ astral_branches = surrogate_pairs(astral_ranges)
27
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
28
+ end
29
+
30
+ private
31
+
32
+ def surrogate_range_expressions(astral_ranges)
33
+ compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
34
+ [hi_ranges, lo_ranges].map do |ranges|
35
+ use_brackets = ranges.size > 1 || ranges.first.size > 1
36
+ write(ranges, format: :js, in_brackets: use_brackets)
37
+ end.join
11
38
  end
12
- end.join
13
- opts[:in_brackets] ? "[#{content}]" : content
14
- end
39
+ end
40
+
41
+ def compressed_surrogate_range_pairs(astral_ranges)
42
+ halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }
15
43
 
16
- def write_surrogate_alternation(bmp_ranges, astral_ranges)
17
- bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
18
- if astral_ranges.empty?
19
- bmp_set
20
- else
21
- surrogate_pairs = surrogate_pairs(astral_ranges)
22
- "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + surrogate_pairs) * '|'})"
44
+ # compress high surrogate codepoint ranges with common low range half
45
+ with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
46
+ hi_ranges = pairs.map(&:first)
47
+ compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
48
+ prev = arr.last
49
+ if prev.nil? || prev.max + 1 < range.min # first or gap
50
+ arr << range
51
+ else # continuous codepoints, expand previous range
52
+ arr[-1] = (prev.min)..(range.max)
53
+ end
54
+ end
55
+ [compressed_hi_ranges, lo_range]
56
+ end
57
+
58
+ # compress low surrogate codepoint ranges with common high ranges
59
+ with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
60
+ (hash[hi_ranges] ||= []) << lo_range
61
+ end
23
62
  end
24
- end
25
63
 
26
- def surrogate_pairs(astral_ranges)
27
- astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
28
- end
64
+ def surrogate_half_ranges(astral_range)
65
+ hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
66
+ hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
67
+ hi_count = 1 + hi_max - hi_min
68
+ return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1
69
+
70
+ ranges = []
71
+
72
+ # first high surrogate might be partially covered (if lo_min > 0xDC00)
73
+ ranges << [hi_min..hi_min, lo_min..0xDFFF]
74
+
75
+ # any high surrogates in between are fully covered
76
+ ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2
29
77
 
30
- def surrogate_pair(astral_codepoint)
31
- base = astral_codepoint - 0x10000
32
- high = ((base / 1024).floor + 0xD800).to_s(16)
33
- low = (base % 1024 + 0xDC00).to_s(16)
34
- "\\u#{high}\\u#{low}"
78
+ # last high surrogate might be partially covered (if lo_max < 0xDFFF)
79
+ ranges << [hi_max..hi_max, 0xDC00..lo_max]
80
+
81
+ ranges
82
+ end
83
+
84
+ def surrogate_pair_codepoints(astral_codepoint)
85
+ base = astral_codepoint - 0x10000
86
+ high = base / 1024 + 0xD800
87
+ low = base % 1024 + 0xDC00
88
+ [high, low]
89
+ end
90
+
91
+ def bmp_set_with_alternatives(bmp_ranges, alternatives)
92
+ bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
93
+ return bmp_set if alternatives.empty? && bmp_ranges.any?
94
+
95
+ "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
96
+ end
97
+
98
+ def surrogate_pairs(astral_ranges)
99
+ astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
100
+ end
101
+
102
+ def surrogate_pair(astral_codepoint)
103
+ surrogate_pair_codepoints(astral_codepoint)
104
+ .map { |half| write_codepoint(half, format: :js) }.join
105
+ end
35
106
  end
36
107
  end
37
108
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: character_set
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.0
4
+ version: 1.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-04-02 00:00:00.000000000 Z
11
+ date: 2021-12-05 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sorted_set
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: benchmark-ips
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -24,34 +38,48 @@ dependencies:
24
38
  - - "~>"
25
39
  - !ruby/object:Gem::Version
26
40
  version: '2.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: get_process_mem
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: 0.2.3
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: 0.2.3
27
55
  - !ruby/object:Gem::Dependency
28
56
  name: rake
29
57
  requirement: !ruby/object:Gem::Requirement
30
58
  requirements:
31
59
  - - "~>"
32
60
  - !ruby/object:Gem::Version
33
- version: '12.0'
61
+ version: '13.0'
34
62
  type: :development
35
63
  prerelease: false
36
64
  version_requirements: !ruby/object:Gem::Requirement
37
65
  requirements:
38
66
  - - "~>"
39
67
  - !ruby/object:Gem::Version
40
- version: '12.0'
68
+ version: '13.0'
41
69
  - !ruby/object:Gem::Dependency
42
70
  name: rake-compiler
43
71
  requirement: !ruby/object:Gem::Requirement
44
72
  requirements:
45
73
  - - "~>"
46
74
  - !ruby/object:Gem::Version
47
- version: '1.0'
75
+ version: '1.1'
48
76
  type: :development
49
77
  prerelease: false
50
78
  version_requirements: !ruby/object:Gem::Requirement
51
79
  requirements:
52
80
  - - "~>"
53
81
  - !ruby/object:Gem::Version
54
- version: '1.0'
82
+ version: '1.1'
55
83
  - !ruby/object:Gem::Dependency
56
84
  name: range_compressor
57
85
  requirement: !ruby/object:Gem::Requirement
@@ -72,28 +100,28 @@ dependencies:
72
100
  requirements:
73
101
  - - "~>"
74
102
  - !ruby/object:Gem::Version
75
- version: '1.3'
103
+ version: '2.1'
76
104
  type: :development
77
105
  prerelease: false
78
106
  version_requirements: !ruby/object:Gem::Requirement
79
107
  requirements:
80
108
  - - "~>"
81
109
  - !ruby/object:Gem::Version
82
- version: '1.3'
110
+ version: '2.1'
83
111
  - !ruby/object:Gem::Dependency
84
112
  name: regexp_property_values
85
113
  requirement: !ruby/object:Gem::Requirement
86
114
  requirements:
87
115
  - - "~>"
88
116
  - !ruby/object:Gem::Version
89
- version: 0.3.5
117
+ version: '1.0'
90
118
  type: :development
91
119
  prerelease: false
92
120
  version_requirements: !ruby/object:Gem::Requirement
93
121
  requirements:
94
122
  - - "~>"
95
123
  - !ruby/object:Gem::Version
96
- version: 0.3.5
124
+ version: '1.0'
97
125
  - !ruby/object:Gem::Dependency
98
126
  name: rspec
99
127
  requirement: !ruby/object:Gem::Requirement
@@ -108,7 +136,49 @@ dependencies:
108
136
  - - "~>"
109
137
  - !ruby/object:Gem::Version
110
138
  version: '3.8'
111
- description:
139
+ - !ruby/object:Gem::Dependency
140
+ name: codecov
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.2.12
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.2.12
153
+ - !ruby/object:Gem::Dependency
154
+ name: gouteur
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: 1.0.0
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: 1.0.0
167
+ - !ruby/object:Gem::Dependency
168
+ name: rubocop
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.8'
174
+ type: :development
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.8'
181
+ description:
112
182
  email:
113
183
  - janosch84@gmail.com
114
184
  executables: []
@@ -116,26 +186,38 @@ extensions:
116
186
  - ext/character_set/extconf.rb
117
187
  extra_rdoc_files: []
118
188
  files:
189
+ - ".gitattributes"
190
+ - ".github/workflows/gouteur.yml"
191
+ - ".github/workflows/lint.yml"
192
+ - ".github/workflows/tests.yml"
119
193
  - ".gitignore"
194
+ - ".gouteur.yml"
120
195
  - ".rspec"
121
- - ".travis.yml"
196
+ - ".rubocop.yml"
122
197
  - BENCHMARK.md
123
198
  - CHANGELOG.md
124
199
  - Gemfile
125
200
  - LICENSE.txt
126
201
  - README.md
127
202
  - Rakefile
203
+ - benchmarks/count_in.rb
128
204
  - benchmarks/cover.rb
129
205
  - benchmarks/delete_in.rb
130
206
  - benchmarks/keep_in.rb
207
+ - benchmarks/scan.rb
131
208
  - benchmarks/shared.rb
132
209
  - benchmarks/used_by.rb
210
+ - benchmarks/z_add.rb
211
+ - benchmarks/z_delete.rb
212
+ - benchmarks/z_merge.rb
213
+ - benchmarks/z_minmax.rb
133
214
  - bin/console
134
215
  - bin/setup
135
216
  - character_set.gemspec
136
217
  - ext/character_set/character_set.c
137
218
  - ext/character_set/extconf.rb
138
219
  - ext/character_set/unicode_casefold_table.h
220
+ - ext/character_set/unicode_casefold_table.h.tmpl
139
221
  - lib/character_set.rb
140
222
  - lib/character_set/character.rb
141
223
  - lib/character_set/core_ext.rb
@@ -144,10 +226,25 @@ files:
144
226
  - lib/character_set/expression_converter.rb
145
227
  - lib/character_set/parser.rb
146
228
  - lib/character_set/predefined_sets.rb
229
+ - lib/character_set/predefined_sets/any.cps
230
+ - lib/character_set/predefined_sets/ascii.cps
231
+ - lib/character_set/predefined_sets/ascii_alnum.cps
232
+ - lib/character_set/predefined_sets/ascii_letter.cps
233
+ - lib/character_set/predefined_sets/assigned.cps
234
+ - lib/character_set/predefined_sets/bmp.cps
235
+ - lib/character_set/predefined_sets/crypt.cps
236
+ - lib/character_set/predefined_sets/emoji.cps
237
+ - lib/character_set/predefined_sets/newline.cps
238
+ - lib/character_set/predefined_sets/surrogate.cps
239
+ - lib/character_set/predefined_sets/unicode.cps
240
+ - lib/character_set/predefined_sets/url_fragment.cps
241
+ - lib/character_set/predefined_sets/url_host.cps
242
+ - lib/character_set/predefined_sets/url_path.cps
243
+ - lib/character_set/predefined_sets/url_query.cps
244
+ - lib/character_set/predefined_sets/whitespace.cps
147
245
  - lib/character_set/pure.rb
148
246
  - lib/character_set/ruby_fallback.rb
149
247
  - lib/character_set/ruby_fallback/character_set_methods.rb
150
- - lib/character_set/ruby_fallback/plane_methods.rb
151
248
  - lib/character_set/ruby_fallback/set_methods.rb
152
249
  - lib/character_set/set_method_adapters.rb
153
250
  - lib/character_set/shared_methods.rb
@@ -157,7 +254,7 @@ homepage: https://github.com/jaynetics/character_set
157
254
  licenses:
158
255
  - MIT
159
256
  metadata: {}
160
- post_install_message:
257
+ post_install_message:
161
258
  rdoc_options: []
162
259
  require_paths:
163
260
  - lib
@@ -172,8 +269,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
172
269
  - !ruby/object:Gem::Version
173
270
  version: '0'
174
271
  requirements: []
175
- rubygems_version: 3.0.3
176
- signing_key:
272
+ rubygems_version: 3.3.0.dev
273
+ signing_key:
177
274
  specification_version: 4
178
275
  summary: Build, read, write and compare sets of Unicode codepoints.
179
276
  test_files: []
data/.travis.yml DELETED
@@ -1,8 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.1
5
- - 2.4
6
- - 2.5
7
- - 2.6
8
- - jruby-9.1.9.0
@@ -1,27 +0,0 @@
1
- class CharacterSet
2
- module RubyFallback
3
- module PlaneMethods
4
- def bmp_part
5
- dup.keep_if { |cp| cp < 0x10000 }
6
- end
7
-
8
- def astral_part
9
- dup.keep_if { |cp| cp >= 0x10000 }
10
- end
11
-
12
- def planes
13
- plane_set = {}
14
- plane_size = 0x10000.to_f
15
- each do |cp|
16
- plane = (cp / plane_size).floor
17
- plane_set[plane] = true
18
- end
19
- plane_set.keys
20
- end
21
-
22
- def member_in_plane?(num)
23
- ((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
24
- end
25
- end
26
- end
27
- end