character_set 1.1.1 → 1.4.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +3 -0
  3. data/.github/workflows/lint.yml +29 -0
  4. data/.github/workflows/tests.yml +22 -0
  5. data/.gitignore +1 -0
  6. data/.rubocop.yml +11 -0
  7. data/BENCHMARK.md +53 -17
  8. data/CHANGELOG.md +47 -0
  9. data/README.md +38 -14
  10. data/Rakefile +60 -36
  11. data/benchmarks/count_in.rb +13 -0
  12. data/benchmarks/delete_in.rb +1 -1
  13. data/benchmarks/scan.rb +13 -0
  14. data/benchmarks/shared.rb +5 -0
  15. data/benchmarks/z_add.rb +12 -0
  16. data/benchmarks/z_delete.rb +12 -0
  17. data/benchmarks/z_merge.rb +15 -0
  18. data/benchmarks/z_minmax.rb +12 -0
  19. data/bin/console +2 -0
  20. data/character_set.gemspec +17 -6
  21. data/ext/character_set/character_set.c +963 -414
  22. data/ext/character_set/unicode_casefold_table.h +10 -2
  23. data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
  24. data/lib/character_set/character.rb +1 -1
  25. data/lib/character_set/core_ext/regexp_ext.rb +1 -1
  26. data/lib/character_set/core_ext/string_ext.rb +3 -1
  27. data/lib/character_set/expression_converter.rb +25 -27
  28. data/lib/character_set/parser.rb +1 -1
  29. data/lib/character_set/predefined_sets.rb +25 -260
  30. data/lib/character_set/predefined_sets/any.cps +1 -0
  31. data/lib/character_set/predefined_sets/ascii.cps +1 -0
  32. data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
  33. data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
  34. data/lib/character_set/predefined_sets/assigned.cps +666 -0
  35. data/lib/character_set/predefined_sets/bmp.cps +2 -0
  36. data/lib/character_set/predefined_sets/crypt.cps +2 -0
  37. data/lib/character_set/predefined_sets/emoji.cps +151 -0
  38. data/lib/character_set/predefined_sets/newline.cps +3 -0
  39. data/lib/character_set/predefined_sets/surrogate.cps +1 -0
  40. data/lib/character_set/predefined_sets/unicode.cps +2 -0
  41. data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
  42. data/lib/character_set/predefined_sets/url_host.cps +10 -0
  43. data/lib/character_set/predefined_sets/url_path.cps +7 -0
  44. data/lib/character_set/predefined_sets/url_query.cps +8 -0
  45. data/lib/character_set/predefined_sets/whitespace.cps +10 -0
  46. data/lib/character_set/ruby_fallback.rb +5 -3
  47. data/lib/character_set/ruby_fallback/character_set_methods.rb +53 -6
  48. data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
  49. data/lib/character_set/shared_methods.rb +60 -49
  50. data/lib/character_set/version.rb +1 -1
  51. data/lib/character_set/writer.rb +98 -27
  52. metadata +88 -22
  53. data/.travis.yml +0 -11
  54. data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -1,3 +1,3 @@
1
1
  class CharacterSet
2
- VERSION = '1.1.1'
2
+ VERSION = '1.4.1'
3
3
  end
@@ -1,37 +1,108 @@
1
1
  class CharacterSet
2
2
  module Writer
3
- module_function
4
-
5
- def write(codepoint_ranges, opts = {}, &block)
6
- content = codepoint_ranges.map do |range|
7
- if range.size > 2 && opts[:abbreviate] != false
8
- range.minmax.map { |cp| Character.new(cp).escape(opts, &block) }.join('-')
9
- else
10
- range.map { |cp| Character.new(cp).escape(opts, &block) }.join
3
+ class << self
4
+ def write(codepoint_ranges, opts = {}, &block)
5
+ content = codepoint_ranges.map do |range|
6
+ if range.size > 2 && opts[:abbreviate] != false
7
+ bounds = [range.min, range.max]
8
+ bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
9
+ else
10
+ range.map { |cp| write_codepoint(cp, opts, &block) }.join
11
+ end
12
+ end.join
13
+ opts[:in_brackets] ? "[#{content}]" : content
14
+ end
15
+
16
+ def write_codepoint(codepoint, opts = {}, &block)
17
+ Character.new(codepoint).escape(opts, &block)
18
+ end
19
+
20
+ def write_surrogate_ranges(bmp_ranges, astral_ranges)
21
+ astral_branches = surrogate_range_expressions(astral_ranges)
22
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
23
+ end
24
+
25
+ def write_surrogate_alternation(bmp_ranges, astral_ranges)
26
+ astral_branches = surrogate_pairs(astral_ranges)
27
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
28
+ end
29
+
30
+ private
31
+
32
+ def surrogate_range_expressions(astral_ranges)
33
+ compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
34
+ [hi_ranges, lo_ranges].map do |ranges|
35
+ use_brackets = ranges.size > 1 || ranges.first.size > 1
36
+ write(ranges, format: :js, in_brackets: use_brackets)
37
+ end.join
11
38
  end
12
- end.join
13
- opts[:in_brackets] ? "[#{content}]" : content
14
- end
39
+ end
40
+
41
+ def compressed_surrogate_range_pairs(astral_ranges)
42
+ halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }
15
43
 
16
- def write_surrogate_alternation(bmp_ranges, astral_ranges)
17
- bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
18
- if astral_ranges.empty?
19
- bmp_set
20
- else
21
- surrogate_pairs = surrogate_pairs(astral_ranges)
22
- "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + surrogate_pairs) * '|'})"
44
+ # compress high surrogate codepoint ranges with common low range half
45
+ with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
46
+ hi_ranges = pairs.map(&:first)
47
+ compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
48
+ prev = arr.last
49
+ if prev.nil? || prev.max + 1 < range.min # first or gap
50
+ arr << range
51
+ else # continuous codepoints, expand previous range
52
+ arr[-1] = (prev.min)..(range.max)
53
+ end
54
+ end
55
+ [compressed_hi_ranges, lo_range]
56
+ end
57
+
58
+ # compress low surrogate codepoint ranges with common high ranges
59
+ with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
60
+ (hash[hi_ranges] ||= []) << lo_range
61
+ end
23
62
  end
24
- end
25
63
 
26
- def surrogate_pairs(astral_ranges)
27
- astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
28
- end
64
+ def surrogate_half_ranges(astral_range)
65
+ hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
66
+ hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
67
+ hi_count = 1 + hi_max - hi_min
68
+ return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1
69
+
70
+ ranges = []
71
+
72
+ # first high surrogate might be partially covered (if lo_min > 0xDC00)
73
+ ranges << [hi_min..hi_min, lo_min..0xDFFF]
74
+
75
+ # any high surrogates in between are fully covered
76
+ ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2
29
77
 
30
- def surrogate_pair(astral_codepoint)
31
- base = astral_codepoint - 0x10000
32
- high = ((base / 1024).floor + 0xD800).to_s(16)
33
- low = (base % 1024 + 0xDC00).to_s(16)
34
- "\\u#{high}\\u#{low}"
78
+ # last high surrogate might be partially covered (if lo_max < 0xDFFF)
79
+ ranges << [hi_max..hi_max, 0xDC00..lo_max]
80
+
81
+ ranges
82
+ end
83
+
84
+ def surrogate_pair_codepoints(astral_codepoint)
85
+ base = astral_codepoint - 0x10000
86
+ high = base / 1024 + 0xD800
87
+ low = base % 1024 + 0xDC00
88
+ [high, low]
89
+ end
90
+
91
+ def bmp_set_with_alternatives(bmp_ranges, alternatives)
92
+ bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
93
+ return bmp_set if alternatives.empty? && bmp_ranges.any?
94
+
95
+ "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
96
+ end
97
+
98
+ def surrogate_pairs(astral_ranges)
99
+ astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
100
+ end
101
+
102
+ def surrogate_pair(astral_codepoint)
103
+ surrogate_pair_codepoints(astral_codepoint)
104
+ .map { |half| write_codepoint(half, format: :js) }.join
105
+ end
35
106
  end
36
107
  end
37
108
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: character_set
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.4.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Janosch Müller
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-24 00:00:00.000000000 Z
11
+ date: 2021-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sorted_set
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: benchmark-ips
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -25,47 +39,47 @@ dependencies:
25
39
  - !ruby/object:Gem::Version
26
40
  version: '2.7'
27
41
  - !ruby/object:Gem::Dependency
28
- name: bundler
42
+ name: get_process_mem
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - "~>"
32
46
  - !ruby/object:Gem::Version
33
- version: '1.16'
47
+ version: 0.2.3
34
48
  type: :development
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - "~>"
39
53
  - !ruby/object:Gem::Version
40
- version: '1.16'
54
+ version: 0.2.3
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: '12.0'
61
+ version: '13.0'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: '12.0'
68
+ version: '13.0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake-compiler
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '1.0'
75
+ version: '1.1'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '1.0'
82
+ version: '1.1'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: range_compressor
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -86,28 +100,28 @@ dependencies:
86
100
  requirements:
87
101
  - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: '1.1'
103
+ version: '1.6'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: '1.1'
110
+ version: '1.6'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: regexp_property_values
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: 0.3.4
117
+ version: '1.0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: 0.3.4
124
+ version: '1.0'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rspec
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -122,7 +136,35 @@ dependencies:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
138
  version: '3.8'
125
- description:
139
+ - !ruby/object:Gem::Dependency
140
+ name: codecov
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.2.12
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.2.12
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubocop
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '1.8'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '1.8'
167
+ description:
126
168
  email:
127
169
  - janosch84@gmail.com
128
170
  executables: []
@@ -130,26 +172,36 @@ extensions:
130
172
  - ext/character_set/extconf.rb
131
173
  extra_rdoc_files: []
132
174
  files:
175
+ - ".gitattributes"
176
+ - ".github/workflows/lint.yml"
177
+ - ".github/workflows/tests.yml"
133
178
  - ".gitignore"
134
179
  - ".rspec"
135
- - ".travis.yml"
180
+ - ".rubocop.yml"
136
181
  - BENCHMARK.md
137
182
  - CHANGELOG.md
138
183
  - Gemfile
139
184
  - LICENSE.txt
140
185
  - README.md
141
186
  - Rakefile
187
+ - benchmarks/count_in.rb
142
188
  - benchmarks/cover.rb
143
189
  - benchmarks/delete_in.rb
144
190
  - benchmarks/keep_in.rb
191
+ - benchmarks/scan.rb
145
192
  - benchmarks/shared.rb
146
193
  - benchmarks/used_by.rb
194
+ - benchmarks/z_add.rb
195
+ - benchmarks/z_delete.rb
196
+ - benchmarks/z_merge.rb
197
+ - benchmarks/z_minmax.rb
147
198
  - bin/console
148
199
  - bin/setup
149
200
  - character_set.gemspec
150
201
  - ext/character_set/character_set.c
151
202
  - ext/character_set/extconf.rb
152
203
  - ext/character_set/unicode_casefold_table.h
204
+ - ext/character_set/unicode_casefold_table.h.tmpl
153
205
  - lib/character_set.rb
154
206
  - lib/character_set/character.rb
155
207
  - lib/character_set/core_ext.rb
@@ -158,20 +210,35 @@ files:
158
210
  - lib/character_set/expression_converter.rb
159
211
  - lib/character_set/parser.rb
160
212
  - lib/character_set/predefined_sets.rb
213
+ - lib/character_set/predefined_sets/any.cps
214
+ - lib/character_set/predefined_sets/ascii.cps
215
+ - lib/character_set/predefined_sets/ascii_alnum.cps
216
+ - lib/character_set/predefined_sets/ascii_letter.cps
217
+ - lib/character_set/predefined_sets/assigned.cps
218
+ - lib/character_set/predefined_sets/bmp.cps
219
+ - lib/character_set/predefined_sets/crypt.cps
220
+ - lib/character_set/predefined_sets/emoji.cps
221
+ - lib/character_set/predefined_sets/newline.cps
222
+ - lib/character_set/predefined_sets/surrogate.cps
223
+ - lib/character_set/predefined_sets/unicode.cps
224
+ - lib/character_set/predefined_sets/url_fragment.cps
225
+ - lib/character_set/predefined_sets/url_host.cps
226
+ - lib/character_set/predefined_sets/url_path.cps
227
+ - lib/character_set/predefined_sets/url_query.cps
228
+ - lib/character_set/predefined_sets/whitespace.cps
161
229
  - lib/character_set/pure.rb
162
230
  - lib/character_set/ruby_fallback.rb
163
231
  - lib/character_set/ruby_fallback/character_set_methods.rb
164
- - lib/character_set/ruby_fallback/plane_methods.rb
165
232
  - lib/character_set/ruby_fallback/set_methods.rb
166
233
  - lib/character_set/set_method_adapters.rb
167
234
  - lib/character_set/shared_methods.rb
168
235
  - lib/character_set/version.rb
169
236
  - lib/character_set/writer.rb
170
- homepage: https://github.com/janosch-x/character_set
237
+ homepage: https://github.com/jaynetics/character_set
171
238
  licenses:
172
239
  - MIT
173
240
  metadata: {}
174
- post_install_message:
241
+ post_install_message:
175
242
  rdoc_options: []
176
243
  require_paths:
177
244
  - lib
@@ -186,9 +253,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
186
253
  - !ruby/object:Gem::Version
187
254
  version: '0'
188
255
  requirements: []
189
- rubyforge_project:
190
- rubygems_version: 2.7.6
191
- signing_key:
256
+ rubygems_version: 3.2.3
257
+ signing_key:
192
258
  specification_version: 4
193
259
  summary: Build, read, write and compare sets of Unicode codepoints.
194
260
  test_files: []
@@ -1,11 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.1
5
- - 2.4
6
- - 2.5
7
- - 2.6
8
- - jruby-9.1.9.0
9
- before_install:
10
- - gem update --system
11
- - gem install bundler
@@ -1,27 +0,0 @@
1
- class CharacterSet
2
- module RubyFallback
3
- module PlaneMethods
4
- def bmp_part
5
- dup.keep_if { |cp| cp < 0x10000 }
6
- end
7
-
8
- def astral_part
9
- dup.keep_if { |cp| cp >= 0x10000 }
10
- end
11
-
12
- def planes
13
- plane_set = {}
14
- plane_size = 0x10000.to_f
15
- each do |cp|
16
- plane = (cp / plane_size).floor
17
- plane_set[plane] = true
18
- end
19
- plane_set.keys
20
- end
21
-
22
- def member_in_plane?(num)
23
- ((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
24
- end
25
- end
26
- end
27
- end