character_set 1.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,103 @@
1
+ class CharacterSet
2
+ module RubyFallback
3
+ module SetMethods
4
+ Enumerable.instance_methods.concat(%w[empty? length size]).each do |mthd|
5
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
6
+ def #{mthd}(*args, &block)
7
+ @__set.#{mthd}(*args, &block)
8
+ end
9
+ RUBY
10
+ end
11
+
12
+ %w[< <= > >= disjoint? intersect? proper_subset? proper_superset?
13
+ subset? superset?].each do |mthd|
14
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
15
+ def #{mthd}(enum, &block)
16
+ if enum.is_a?(CharacterSet) || enum.is_a?(CharacterSet::Pure)
17
+ enum = enum.instance_variable_get(:@__set)
18
+ end
19
+ @__set.#{mthd}(enum, &block)
20
+ end
21
+ RUBY
22
+ end
23
+
24
+ %w[<< === add add? clear collect! delete delete? delete_if
25
+ each filter! hash include? map! member? keep_if reject!
26
+ select! subtract].each do |mthd|
27
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
28
+ def #{mthd}(*args, &block)
29
+ result = @__set.#{mthd}(*args, &block)
30
+ result.is_a?(Set) ? self : result
31
+ end
32
+ RUBY
33
+ end
34
+
35
+ %w[& + - ^ | difference intersection union].each do |mthd|
36
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
37
+ def #{mthd}(enum, &block)
38
+ if enum.respond_to?(:map)
39
+ enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
40
+ end
41
+ self.class.new(@__set.#{mthd}(enum, &block).to_a)
42
+ end
43
+ RUBY
44
+ end
45
+
46
+ %w[taint untaint].each do |mthd|
47
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
48
+ def #{mthd}
49
+ @__set.#{mthd}
50
+ super
51
+ end
52
+ RUBY
53
+ end
54
+
55
+ unless RUBY_PLATFORM[/java/i]
56
+ def freeze
57
+ @__set.to_a
58
+ @__set.freeze
59
+ super
60
+ end
61
+ end
62
+
63
+ def merge(other)
64
+ raise ArgumentError, 'pass an Enumerable' unless other.respond_to?(:each)
65
+ # pass through #add to use the checks in SetMethodAdapters
66
+ other.each { |e| add(e) }
67
+ self
68
+ end
69
+
70
+ def ==(other)
71
+ if equal?(other)
72
+ true
73
+ elsif other.instance_of?(self.class)
74
+ @__set == other.instance_variable_get(:@__set)
75
+ elsif other.is_a?(self.class) && size == other.size
76
+ other.all? { |cp| @__set.include?(cp) }
77
+ else
78
+ false
79
+ end
80
+ end
81
+
82
+ def eql?(other)
83
+ return false unless other.is_a?(self.class)
84
+ @__set.eql?(other.instance_variable_get(:@__set))
85
+ end
86
+
87
+ def initialize_dup(orig)
88
+ super
89
+ @__set = orig.instance_variable_get(:@__set).dup
90
+ end
91
+
92
+ def initialize_clone(orig)
93
+ super
94
+ @__set = orig.instance_variable_get(:@__set).clone
95
+ end
96
+
97
+ def to_a(stringify = false)
98
+ result = @__set.to_a
99
+ stringify ? result.map { |cp| cp.chr('utf-8') } : result
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,21 @@
1
+ require 'set'
2
+ require 'character_set/ruby_fallback/set_methods'
3
+ require 'character_set/ruby_fallback/plane_methods'
4
+ require 'character_set/ruby_fallback/character_set_methods'
5
+
6
+ class CharacterSet
7
+ module RubyFallback
8
+ include CharacterSet::RubyFallback::SetMethods
9
+ include CharacterSet::RubyFallback::PlaneMethods
10
+ include CharacterSet::RubyFallback::CharacterSetMethods
11
+
12
+ def self.prepended(klass)
13
+ klass.extend CharacterSet::RubyFallback::CharacterSetMethods::ClassMethods
14
+ end
15
+
16
+ def initialize(enum = [])
17
+ @__set = SortedSet.new
18
+ super
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,39 @@
1
+ class CharacterSet
2
+ module SetMethodAdapters
3
+ # Allow some methods to work with String in addition to Integer args
4
+ # (the internal representation is geared towards codepoint Integers).
5
+ %w[add add? << delete delete? include? member? ===].each do |method|
6
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
7
+ def #{method}(arg)
8
+ case arg
9
+ when String
10
+ super(arg.ord)
11
+ when Integer
12
+ if arg < 0 || arg > 0x10FFFF
13
+ raise ArgumentError, 'pass an Integer between 0 and 0x10FFFF'
14
+ end
15
+ super(arg)
16
+ else
17
+ raise ArgumentError, 'pass a String or an Integer'
18
+ end
19
+ end
20
+ RUBY
21
+ end
22
+
23
+ # Allow some methods to take an Enum just as well as another CharacterSet.
24
+ # Tested by ruby-spec.
25
+ %w[& + - ^ | difference intersection subtract union].each do |method|
26
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
27
+ def #{method}(arg)
28
+ if arg.is_a?(CharacterSet)
29
+ super
30
+ elsif arg.respond_to?(:each)
31
+ super(CharacterSet.new(arg.to_a))
32
+ else
33
+ raise ArgumentError, 'pass an enumerable'
34
+ end
35
+ end
36
+ RUBY
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,155 @@
1
+ #
2
+ # Various methods shared by the pure-Ruby and the extended implementation.
3
+ #
4
+ # Many of these methods are hotspots, so they are defined directly on
5
+ # the including classes for better performance.
6
+ #
7
+ class CharacterSet
8
+ module SharedMethods
9
+ def self.included(klass)
10
+ klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
11
+ def self.[](*args)
12
+ new(Array(args))
13
+ end
14
+
15
+ def self.parse(string)
16
+ codepoints = Parser.codepoints_from_bracket_expression(string)
17
+ result = new(codepoints)
18
+ string.start_with?('[^') ? result.inversion : result
19
+ end
20
+
21
+ def self.of_property(property_name)
22
+ @regexp_property_values_required ||= require 'regexp_property_values'
23
+
24
+ property = RegexpPropertyValues[property_name.to_s]
25
+ from_ranges(*property.matched_ranges)
26
+ end
27
+
28
+ def self.of_regexp(regexp)
29
+ @regexp_parser_required ||= require 'regexp_parser'
30
+
31
+ root = ::Regexp::Parser.parse(regexp)
32
+ of_expression(root)
33
+ end
34
+
35
+ def self.of_expression(expression)
36
+ ExpressionConverter.convert(expression)
37
+ end
38
+
39
+ def initialize(enumerable = [])
40
+ merge(Parser.codepoints_from_enumerable(enumerable))
41
+ end
42
+
43
+ def replace(enum)
44
+ unless [Array, CharacterSet, Range].include?(enum.class)
45
+ enum = self.class.new(enum)
46
+ end
47
+ clear
48
+ merge(enum)
49
+ end
50
+
51
+ # stringification methods
52
+
53
+ def to_s(opts = {}, &block)
54
+ Writer.write(ranges, opts, &block)
55
+ end
56
+
57
+ def to_s_with_surrogate_alternation
58
+ Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
59
+ end
60
+
61
+ def inspect
62
+ len = length
63
+ "#<CharacterSet: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
64
+ end
65
+
66
+ # unicode-plane-related methods
67
+
68
+ def bmp_part?
69
+ !bmp_part.empty?
70
+ end
71
+
72
+ def astral_part?
73
+ !astral_part.empty?
74
+ end
75
+
76
+ def bmp_ratio
77
+ bmp_part.count / count.to_f
78
+ end
79
+
80
+ def astral_ratio
81
+ astral_part.count / count.to_f
82
+ end
83
+
84
+ #
85
+ # The following methods are here for `Set` compatibility, but they are
86
+ # comparatively slow. Prefer others.
87
+ #
88
+ def map!
89
+ block_given? or return enum_for(__method__) { size }
90
+ arr = []
91
+ each { |cp| arr << yield(cp) }
92
+ replace(arr)
93
+ end
94
+ alias collect! map!
95
+
96
+ def reject!(&block)
97
+ block_given? or return enum_for(__method__) { size }
98
+ old_size = size
99
+ delete_if(&block)
100
+ self if size != old_size
101
+ end
102
+
103
+ def select!(&block)
104
+ block_given? or return enum_for(__method__) { size }
105
+ old_size = size
106
+ keep_if(&block)
107
+ self if size != old_size
108
+ end
109
+ alias filter! select!
110
+
111
+ def classify
112
+ block_given? or return enum_for(__method__) { size }
113
+ each_with_object({}) { |cp, h| (h[yield(cp)] ||= self.class.new).add(cp) }
114
+ end
115
+
116
+ def divide(&func)
117
+ block_given? or return enum_for(__method__) { size }
118
+ require 'set'
119
+
120
+ if func.arity == 2
121
+ require 'tsort'
122
+
123
+ class << dig = {}
124
+ include TSort
125
+
126
+ alias tsort_each_node each_key
127
+ def tsort_each_child(node, &block)
128
+ fetch(node).each(&block)
129
+ end
130
+ end
131
+
132
+ each do |u|
133
+ dig[u] = a = []
134
+ each{ |v| a << v if yield(u, v) }
135
+ end
136
+
137
+ set = Set.new
138
+ dig.each_strongly_connected_component do |css|
139
+ set.add(self.class.new(css))
140
+ end
141
+ set
142
+ else
143
+ Set.new(classify(&func).values)
144
+ end
145
+ end
146
+
147
+ # C-extension adapter method. Needs overriding in pure fallback.
148
+ # Parsing kwargs in C is slower, verbose, and kinda deprecated.
149
+ def inversion(include_surrogates: false, upto: 0x10FFFF)
150
+ ext_inversion(include_surrogates, upto)
151
+ end
152
+ RUBY
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,3 @@
1
+ class CharacterSet
2
+ VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,37 @@
1
+ class CharacterSet
2
+ module Writer
3
+ module_function
4
+
5
+ def write(codepoint_ranges, opts = {}, &block)
6
+ content = codepoint_ranges.map do |range|
7
+ if range.size > 2 && opts[:abbreviate] != false
8
+ range.minmax.map { |cp| Character.new(cp).escape(opts, &block) }.join('-')
9
+ else
10
+ range.map { |cp| Character.new(cp).escape(opts, &block) }.join
11
+ end
12
+ end.join
13
+ opts[:in_brackets] ? "[#{content}]" : content
14
+ end
15
+
16
+ def write_surrogate_alternation(bmp_ranges, astral_ranges)
17
+ bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
18
+ if astral_ranges.empty?
19
+ bmp_set
20
+ else
21
+ surrogate_pairs = surrogate_pairs(astral_ranges)
22
+ "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + surrogate_pairs) * '|'})"
23
+ end
24
+ end
25
+
26
+ def surrogate_pairs(astral_ranges)
27
+ astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
28
+ end
29
+
30
+ def surrogate_pair(astral_codepoint)
31
+ base = astral_codepoint - 0x10000
32
+ high = ((base / 1024).floor + 0xD800).to_s(16)
33
+ low = (base % 1024 + 0xDC00).to_s(16)
34
+ "\\u#{high}\\u#{low}"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,21 @@
1
+ require 'character_set/character'
2
+ require 'character_set/common_sets'
3
+ require 'character_set/expression_converter'
4
+ require 'character_set/parser'
5
+ require 'character_set/set_method_adapters'
6
+ require 'character_set/shared_methods'
7
+ require 'character_set/version'
8
+ require 'character_set/writer'
9
+
10
+ class CharacterSet
11
+ begin
12
+ require 'character_set/character_set'
13
+ rescue LoadError
14
+ require 'character_set/ruby_fallback'
15
+ prepend RubyFallback
16
+ end
17
+ prepend SetMethodAdapters
18
+ include Enumerable
19
+ include SharedMethods
20
+ extend CommonSets
21
+ end
metadata ADDED
@@ -0,0 +1,193 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: character_set
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Janosch Müller
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-09-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: range_compressor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: benchmark-ips
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.7'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.16'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake-compiler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: regexp_parser
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: regexp_property_values
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.3.2
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.3.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '3.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '3.0'
125
+ description:
126
+ email:
127
+ - janosch84@gmail.com
128
+ executables: []
129
+ extensions:
130
+ - ext/character_set/extconf.rb
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".gitignore"
134
+ - ".rspec"
135
+ - ".travis.yml"
136
+ - BENCHMARK.md
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.md
140
+ - Rakefile
141
+ - benchmarks/cover.rb
142
+ - benchmarks/delete_in.rb
143
+ - benchmarks/keep_in.rb
144
+ - benchmarks/shared.rb
145
+ - benchmarks/used_by.rb
146
+ - bin/console
147
+ - bin/setup
148
+ - character_set.gemspec
149
+ - ext/character_set/character_set.c
150
+ - ext/character_set/extconf.rb
151
+ - ext/character_set/unicode_casefold_table.h
152
+ - lib/character_set.rb
153
+ - lib/character_set/character.rb
154
+ - lib/character_set/common_sets.rb
155
+ - lib/character_set/core_ext.rb
156
+ - lib/character_set/core_ext/regexp_ext.rb
157
+ - lib/character_set/core_ext/string_ext.rb
158
+ - lib/character_set/expression_converter.rb
159
+ - lib/character_set/parser.rb
160
+ - lib/character_set/pure.rb
161
+ - lib/character_set/ruby_fallback.rb
162
+ - lib/character_set/ruby_fallback/character_set_methods.rb
163
+ - lib/character_set/ruby_fallback/plane_methods.rb
164
+ - lib/character_set/ruby_fallback/set_methods.rb
165
+ - lib/character_set/set_method_adapters.rb
166
+ - lib/character_set/shared_methods.rb
167
+ - lib/character_set/version.rb
168
+ - lib/character_set/writer.rb
169
+ homepage: https://github.com/janosch-x/character_set
170
+ licenses:
171
+ - MIT
172
+ metadata: {}
173
+ post_install_message:
174
+ rdoc_options: []
175
+ require_paths:
176
+ - lib
177
+ required_ruby_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ version: 2.1.0
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ requirements: []
188
+ rubyforge_project:
189
+ rubygems_version: 2.2.2
190
+ signing_key:
191
+ specification_version: 4
192
+ summary: Build, read, write and compare sets of Unicode codepoints.
193
+ test_files: []