character_set 1.0.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,103 @@
1
+ class CharacterSet
2
+ module RubyFallback
3
+ module SetMethods
4
+ Enumerable.instance_methods.concat(%w[empty? length size]).each do |mthd|
5
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
6
+ def #{mthd}(*args, &block)
7
+ @__set.#{mthd}(*args, &block)
8
+ end
9
+ RUBY
10
+ end
11
+
12
+ %w[< <= > >= disjoint? intersect? proper_subset? proper_superset?
13
+ subset? superset?].each do |mthd|
14
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
15
+ def #{mthd}(enum, &block)
16
+ if enum.is_a?(CharacterSet) || enum.is_a?(CharacterSet::Pure)
17
+ enum = enum.instance_variable_get(:@__set)
18
+ end
19
+ @__set.#{mthd}(enum, &block)
20
+ end
21
+ RUBY
22
+ end
23
+
24
+ %w[<< === add add? clear collect! delete delete? delete_if
25
+ each filter! hash include? map! member? keep_if reject!
26
+ select! subtract].each do |mthd|
27
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
28
+ def #{mthd}(*args, &block)
29
+ result = @__set.#{mthd}(*args, &block)
30
+ result.is_a?(Set) ? self : result
31
+ end
32
+ RUBY
33
+ end
34
+
35
+ %w[& + - ^ | difference intersection union].each do |mthd|
36
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
37
+ def #{mthd}(enum, &block)
38
+ if enum.respond_to?(:map)
39
+ enum = enum.map { |el| el.is_a?(String) ? el.ord : el }
40
+ end
41
+ self.class.new(@__set.#{mthd}(enum, &block).to_a)
42
+ end
43
+ RUBY
44
+ end
45
+
46
+ %w[taint untaint].each do |mthd|
47
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
48
+ def #{mthd}
49
+ @__set.#{mthd}
50
+ super
51
+ end
52
+ RUBY
53
+ end
54
+
55
+ unless RUBY_PLATFORM[/java/i]
56
+ def freeze
57
+ @__set.to_a
58
+ @__set.freeze
59
+ super
60
+ end
61
+ end
62
+
63
+ def merge(other)
64
+ raise ArgumentError, 'pass an Enumerable' unless other.respond_to?(:each)
65
+ # pass through #add to use the checks in SetMethodAdapters
66
+ other.each { |e| add(e) }
67
+ self
68
+ end
69
+
70
+ def ==(other)
71
+ if equal?(other)
72
+ true
73
+ elsif other.instance_of?(self.class)
74
+ @__set == other.instance_variable_get(:@__set)
75
+ elsif other.is_a?(self.class) && size == other.size
76
+ other.all? { |cp| @__set.include?(cp) }
77
+ else
78
+ false
79
+ end
80
+ end
81
+
82
+ def eql?(other)
83
+ return false unless other.is_a?(self.class)
84
+ @__set.eql?(other.instance_variable_get(:@__set))
85
+ end
86
+
87
+ def initialize_dup(orig)
88
+ super
89
+ @__set = orig.instance_variable_get(:@__set).dup
90
+ end
91
+
92
+ def initialize_clone(orig)
93
+ super
94
+ @__set = orig.instance_variable_get(:@__set).clone
95
+ end
96
+
97
+ def to_a(stringify = false)
98
+ result = @__set.to_a
99
+ stringify ? result.map { |cp| cp.chr('utf-8') } : result
100
+ end
101
+ end
102
+ end
103
+ end
@@ -0,0 +1,21 @@
1
+ require 'set'
2
+ require 'character_set/ruby_fallback/set_methods'
3
+ require 'character_set/ruby_fallback/plane_methods'
4
+ require 'character_set/ruby_fallback/character_set_methods'
5
+
6
+ class CharacterSet
7
+ module RubyFallback
8
+ include CharacterSet::RubyFallback::SetMethods
9
+ include CharacterSet::RubyFallback::PlaneMethods
10
+ include CharacterSet::RubyFallback::CharacterSetMethods
11
+
12
+ def self.prepended(klass)
13
+ klass.extend CharacterSet::RubyFallback::CharacterSetMethods::ClassMethods
14
+ end
15
+
16
+ def initialize(enum = [])
17
+ @__set = SortedSet.new
18
+ super
19
+ end
20
+ end
21
+ end
@@ -0,0 +1,39 @@
1
+ class CharacterSet
2
+ module SetMethodAdapters
3
+ # Allow some methods to work with String in addition to Integer args
4
+ # (the internal representation is geared towards codepoint Integers).
5
+ %w[add add? << delete delete? include? member? ===].each do |method|
6
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
7
+ def #{method}(arg)
8
+ case arg
9
+ when String
10
+ super(arg.ord)
11
+ when Integer
12
+ if arg < 0 || arg > 0x10FFFF
13
+ raise ArgumentError, 'pass an Integer between 0 and 0x10FFFF'
14
+ end
15
+ super(arg)
16
+ else
17
+ raise ArgumentError, 'pass a String or an Integer'
18
+ end
19
+ end
20
+ RUBY
21
+ end
22
+
23
+ # Allow some methods to take an Enum just as well as another CharacterSet.
24
+ # Tested by ruby-spec.
25
+ %w[& + - ^ | difference intersection subtract union].each do |method|
26
+ class_eval <<-RUBY, __FILE__, __LINE__ + 1
27
+ def #{method}(arg)
28
+ if arg.is_a?(CharacterSet)
29
+ super
30
+ elsif arg.respond_to?(:each)
31
+ super(CharacterSet.new(arg.to_a))
32
+ else
33
+ raise ArgumentError, 'pass an enumerable'
34
+ end
35
+ end
36
+ RUBY
37
+ end
38
+ end
39
+ end
@@ -0,0 +1,155 @@
1
+ #
2
+ # Various methods shared by the pure-Ruby and the extended implementation.
3
+ #
4
+ # Many of these methods are hotspots, so they are defined directly on
5
+ # the including classes for better performance.
6
+ #
7
+ class CharacterSet
8
+ module SharedMethods
9
+ def self.included(klass)
10
+ klass.class_eval <<-RUBY, __FILE__, __LINE__ + 1
11
+ def self.[](*args)
12
+ new(Array(args))
13
+ end
14
+
15
+ def self.parse(string)
16
+ codepoints = Parser.codepoints_from_bracket_expression(string)
17
+ result = new(codepoints)
18
+ string.start_with?('[^') ? result.inversion : result
19
+ end
20
+
21
+ def self.of_property(property_name)
22
+ @regexp_property_values_required ||= require 'regexp_property_values'
23
+
24
+ property = RegexpPropertyValues[property_name.to_s]
25
+ from_ranges(*property.matched_ranges)
26
+ end
27
+
28
+ def self.of_regexp(regexp)
29
+ @regexp_parser_required ||= require 'regexp_parser'
30
+
31
+ root = ::Regexp::Parser.parse(regexp)
32
+ of_expression(root)
33
+ end
34
+
35
+ def self.of_expression(expression)
36
+ ExpressionConverter.convert(expression)
37
+ end
38
+
39
+ def initialize(enumerable = [])
40
+ merge(Parser.codepoints_from_enumerable(enumerable))
41
+ end
42
+
43
+ def replace(enum)
44
+ unless [Array, CharacterSet, Range].include?(enum.class)
45
+ enum = self.class.new(enum)
46
+ end
47
+ clear
48
+ merge(enum)
49
+ end
50
+
51
+ # stringification methods
52
+
53
+ def to_s(opts = {}, &block)
54
+ Writer.write(ranges, opts, &block)
55
+ end
56
+
57
+ def to_s_with_surrogate_alternation
58
+ Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
59
+ end
60
+
61
+ def inspect
62
+ len = length
63
+ "#<CharacterSet: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
64
+ end
65
+
66
+ # unicode-plane-related methods
67
+
68
+ def bmp_part?
69
+ !bmp_part.empty?
70
+ end
71
+
72
+ def astral_part?
73
+ !astral_part.empty?
74
+ end
75
+
76
+ def bmp_ratio
77
+ bmp_part.count / count.to_f
78
+ end
79
+
80
+ def astral_ratio
81
+ astral_part.count / count.to_f
82
+ end
83
+
84
+ #
85
+ # The following methods are here for `Set` compatibility, but they are
86
+ # comparatively slow. Prefer others.
87
+ #
88
+ def map!
89
+ block_given? or return enum_for(__method__) { size }
90
+ arr = []
91
+ each { |cp| arr << yield(cp) }
92
+ replace(arr)
93
+ end
94
+ alias collect! map!
95
+
96
+ def reject!(&block)
97
+ block_given? or return enum_for(__method__) { size }
98
+ old_size = size
99
+ delete_if(&block)
100
+ self if size != old_size
101
+ end
102
+
103
+ def select!(&block)
104
+ block_given? or return enum_for(__method__) { size }
105
+ old_size = size
106
+ keep_if(&block)
107
+ self if size != old_size
108
+ end
109
+ alias filter! select!
110
+
111
+ def classify
112
+ block_given? or return enum_for(__method__) { size }
113
+ each_with_object({}) { |cp, h| (h[yield(cp)] ||= self.class.new).add(cp) }
114
+ end
115
+
116
+ def divide(&func)
117
+ block_given? or return enum_for(__method__) { size }
118
+ require 'set'
119
+
120
+ if func.arity == 2
121
+ require 'tsort'
122
+
123
+ class << dig = {}
124
+ include TSort
125
+
126
+ alias tsort_each_node each_key
127
+ def tsort_each_child(node, &block)
128
+ fetch(node).each(&block)
129
+ end
130
+ end
131
+
132
+ each do |u|
133
+ dig[u] = a = []
134
+ each{ |v| a << v if yield(u, v) }
135
+ end
136
+
137
+ set = Set.new
138
+ dig.each_strongly_connected_component do |css|
139
+ set.add(self.class.new(css))
140
+ end
141
+ set
142
+ else
143
+ Set.new(classify(&func).values)
144
+ end
145
+ end
146
+
147
+ # C-extension adapter method. Needs overriding in pure fallback.
148
+ # Parsing kwargs in C is slower, verbose, and kinda deprecated.
149
+ def inversion(include_surrogates: false, upto: 0x10FFFF)
150
+ ext_inversion(include_surrogates, upto)
151
+ end
152
+ RUBY
153
+ end
154
+ end
155
+ end
@@ -0,0 +1,3 @@
1
+ class CharacterSet
2
+ VERSION = '1.0.0'
3
+ end
@@ -0,0 +1,37 @@
1
+ class CharacterSet
2
+ module Writer
3
+ module_function
4
+
5
+ def write(codepoint_ranges, opts = {}, &block)
6
+ content = codepoint_ranges.map do |range|
7
+ if range.size > 2 && opts[:abbreviate] != false
8
+ range.minmax.map { |cp| Character.new(cp).escape(opts, &block) }.join('-')
9
+ else
10
+ range.map { |cp| Character.new(cp).escape(opts, &block) }.join
11
+ end
12
+ end.join
13
+ opts[:in_brackets] ? "[#{content}]" : content
14
+ end
15
+
16
+ def write_surrogate_alternation(bmp_ranges, astral_ranges)
17
+ bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
18
+ if astral_ranges.empty?
19
+ bmp_set
20
+ else
21
+ surrogate_pairs = surrogate_pairs(astral_ranges)
22
+ "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + surrogate_pairs) * '|'})"
23
+ end
24
+ end
25
+
26
+ def surrogate_pairs(astral_ranges)
27
+ astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
28
+ end
29
+
30
+ def surrogate_pair(astral_codepoint)
31
+ base = astral_codepoint - 0x10000
32
+ high = ((base / 1024).floor + 0xD800).to_s(16)
33
+ low = (base % 1024 + 0xDC00).to_s(16)
34
+ "\\u#{high}\\u#{low}"
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,21 @@
1
+ require 'character_set/character'
2
+ require 'character_set/common_sets'
3
+ require 'character_set/expression_converter'
4
+ require 'character_set/parser'
5
+ require 'character_set/set_method_adapters'
6
+ require 'character_set/shared_methods'
7
+ require 'character_set/version'
8
+ require 'character_set/writer'
9
+
10
+ class CharacterSet
11
+ begin
12
+ require 'character_set/character_set'
13
+ rescue LoadError
14
+ require 'character_set/ruby_fallback'
15
+ prepend RubyFallback
16
+ end
17
+ prepend SetMethodAdapters
18
+ include Enumerable
19
+ include SharedMethods
20
+ extend CommonSets
21
+ end
metadata ADDED
@@ -0,0 +1,193 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: character_set
3
+ version: !ruby/object:Gem::Version
4
+ version: 1.0.0
5
+ platform: ruby
6
+ authors:
7
+ - Janosch Müller
8
+ autorequire:
9
+ bindir: bin
10
+ cert_chain: []
11
+ date: 2018-09-02 00:00:00.000000000 Z
12
+ dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: range_compressor
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
27
+ - !ruby/object:Gem::Dependency
28
+ name: benchmark-ips
29
+ requirement: !ruby/object:Gem::Requirement
30
+ requirements:
31
+ - - "~>"
32
+ - !ruby/object:Gem::Version
33
+ version: '2.7'
34
+ type: :development
35
+ prerelease: false
36
+ version_requirements: !ruby/object:Gem::Requirement
37
+ requirements:
38
+ - - "~>"
39
+ - !ruby/object:Gem::Version
40
+ version: '2.7'
41
+ - !ruby/object:Gem::Dependency
42
+ name: bundler
43
+ requirement: !ruby/object:Gem::Requirement
44
+ requirements:
45
+ - - "~>"
46
+ - !ruby/object:Gem::Version
47
+ version: '1.16'
48
+ type: :development
49
+ prerelease: false
50
+ version_requirements: !ruby/object:Gem::Requirement
51
+ requirements:
52
+ - - "~>"
53
+ - !ruby/object:Gem::Version
54
+ version: '1.16'
55
+ - !ruby/object:Gem::Dependency
56
+ name: rake
57
+ requirement: !ruby/object:Gem::Requirement
58
+ requirements:
59
+ - - "~>"
60
+ - !ruby/object:Gem::Version
61
+ version: '10.0'
62
+ type: :development
63
+ prerelease: false
64
+ version_requirements: !ruby/object:Gem::Requirement
65
+ requirements:
66
+ - - "~>"
67
+ - !ruby/object:Gem::Version
68
+ version: '10.0'
69
+ - !ruby/object:Gem::Dependency
70
+ name: rake-compiler
71
+ requirement: !ruby/object:Gem::Requirement
72
+ requirements:
73
+ - - "~>"
74
+ - !ruby/object:Gem::Version
75
+ version: '1.0'
76
+ type: :development
77
+ prerelease: false
78
+ version_requirements: !ruby/object:Gem::Requirement
79
+ requirements:
80
+ - - "~>"
81
+ - !ruby/object:Gem::Version
82
+ version: '1.0'
83
+ - !ruby/object:Gem::Dependency
84
+ name: regexp_parser
85
+ requirement: !ruby/object:Gem::Requirement
86
+ requirements:
87
+ - - "~>"
88
+ - !ruby/object:Gem::Version
89
+ version: '1.0'
90
+ type: :development
91
+ prerelease: false
92
+ version_requirements: !ruby/object:Gem::Requirement
93
+ requirements:
94
+ - - "~>"
95
+ - !ruby/object:Gem::Version
96
+ version: '1.0'
97
+ - !ruby/object:Gem::Dependency
98
+ name: regexp_property_values
99
+ requirement: !ruby/object:Gem::Requirement
100
+ requirements:
101
+ - - "~>"
102
+ - !ruby/object:Gem::Version
103
+ version: 0.3.2
104
+ type: :development
105
+ prerelease: false
106
+ version_requirements: !ruby/object:Gem::Requirement
107
+ requirements:
108
+ - - "~>"
109
+ - !ruby/object:Gem::Version
110
+ version: 0.3.2
111
+ - !ruby/object:Gem::Dependency
112
+ name: rspec
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: '3.0'
118
+ type: :development
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: '3.0'
125
+ description:
126
+ email:
127
+ - janosch84@gmail.com
128
+ executables: []
129
+ extensions:
130
+ - ext/character_set/extconf.rb
131
+ extra_rdoc_files: []
132
+ files:
133
+ - ".gitignore"
134
+ - ".rspec"
135
+ - ".travis.yml"
136
+ - BENCHMARK.md
137
+ - Gemfile
138
+ - LICENSE.txt
139
+ - README.md
140
+ - Rakefile
141
+ - benchmarks/cover.rb
142
+ - benchmarks/delete_in.rb
143
+ - benchmarks/keep_in.rb
144
+ - benchmarks/shared.rb
145
+ - benchmarks/used_by.rb
146
+ - bin/console
147
+ - bin/setup
148
+ - character_set.gemspec
149
+ - ext/character_set/character_set.c
150
+ - ext/character_set/extconf.rb
151
+ - ext/character_set/unicode_casefold_table.h
152
+ - lib/character_set.rb
153
+ - lib/character_set/character.rb
154
+ - lib/character_set/common_sets.rb
155
+ - lib/character_set/core_ext.rb
156
+ - lib/character_set/core_ext/regexp_ext.rb
157
+ - lib/character_set/core_ext/string_ext.rb
158
+ - lib/character_set/expression_converter.rb
159
+ - lib/character_set/parser.rb
160
+ - lib/character_set/pure.rb
161
+ - lib/character_set/ruby_fallback.rb
162
+ - lib/character_set/ruby_fallback/character_set_methods.rb
163
+ - lib/character_set/ruby_fallback/plane_methods.rb
164
+ - lib/character_set/ruby_fallback/set_methods.rb
165
+ - lib/character_set/set_method_adapters.rb
166
+ - lib/character_set/shared_methods.rb
167
+ - lib/character_set/version.rb
168
+ - lib/character_set/writer.rb
169
+ homepage: https://github.com/janosch-x/character_set
170
+ licenses:
171
+ - MIT
172
+ metadata: {}
173
+ post_install_message:
174
+ rdoc_options: []
175
+ require_paths:
176
+ - lib
177
+ required_ruby_version: !ruby/object:Gem::Requirement
178
+ requirements:
179
+ - - ">="
180
+ - !ruby/object:Gem::Version
181
+ version: 2.1.0
182
+ required_rubygems_version: !ruby/object:Gem::Requirement
183
+ requirements:
184
+ - - ">="
185
+ - !ruby/object:Gem::Version
186
+ version: '0'
187
+ requirements: []
188
+ rubyforge_project:
189
+ rubygems_version: 2.2.2
190
+ signing_key:
191
+ specification_version: 4
192
+ summary: Build, read, write and compare sets of Unicode codepoints.
193
+ test_files: []