character_set 1.1.1-java → 1.4.1-java

Sign up to get free protection for your applications and to get access to all the features.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +3 -0
  3. data/.github/workflows/lint.yml +29 -0
  4. data/.github/workflows/tests.yml +22 -0
  5. data/.gitignore +1 -0
  6. data/.rubocop.yml +11 -0
  7. data/BENCHMARK.md +53 -17
  8. data/CHANGELOG.md +47 -0
  9. data/README.md +38 -14
  10. data/Rakefile +60 -36
  11. data/benchmarks/count_in.rb +13 -0
  12. data/benchmarks/delete_in.rb +1 -1
  13. data/benchmarks/scan.rb +13 -0
  14. data/benchmarks/shared.rb +5 -0
  15. data/benchmarks/z_add.rb +12 -0
  16. data/benchmarks/z_delete.rb +12 -0
  17. data/benchmarks/z_merge.rb +15 -0
  18. data/benchmarks/z_minmax.rb +12 -0
  19. data/bin/console +2 -0
  20. data/character_set.gemspec +17 -6
  21. data/ext/character_set/character_set.c +963 -414
  22. data/ext/character_set/unicode_casefold_table.h +10 -2
  23. data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
  24. data/lib/character_set/character.rb +1 -1
  25. data/lib/character_set/core_ext/regexp_ext.rb +1 -1
  26. data/lib/character_set/core_ext/string_ext.rb +3 -1
  27. data/lib/character_set/expression_converter.rb +25 -27
  28. data/lib/character_set/parser.rb +1 -1
  29. data/lib/character_set/predefined_sets.rb +25 -260
  30. data/lib/character_set/predefined_sets/any.cps +1 -0
  31. data/lib/character_set/predefined_sets/ascii.cps +1 -0
  32. data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
  33. data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
  34. data/lib/character_set/predefined_sets/assigned.cps +666 -0
  35. data/lib/character_set/predefined_sets/bmp.cps +2 -0
  36. data/lib/character_set/predefined_sets/crypt.cps +2 -0
  37. data/lib/character_set/predefined_sets/emoji.cps +151 -0
  38. data/lib/character_set/predefined_sets/newline.cps +3 -0
  39. data/lib/character_set/predefined_sets/surrogate.cps +1 -0
  40. data/lib/character_set/predefined_sets/unicode.cps +2 -0
  41. data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
  42. data/lib/character_set/predefined_sets/url_host.cps +10 -0
  43. data/lib/character_set/predefined_sets/url_path.cps +7 -0
  44. data/lib/character_set/predefined_sets/url_query.cps +8 -0
  45. data/lib/character_set/predefined_sets/whitespace.cps +10 -0
  46. data/lib/character_set/ruby_fallback.rb +5 -3
  47. data/lib/character_set/ruby_fallback/character_set_methods.rb +53 -6
  48. data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
  49. data/lib/character_set/shared_methods.rb +60 -49
  50. data/lib/character_set/version.rb +1 -1
  51. data/lib/character_set/writer.rb +98 -27
  52. metadata +102 -22
  53. data/.travis.yml +0 -11
  54. data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -1,3 +1,3 @@
1
1
  class CharacterSet
2
- VERSION = '1.1.1'
2
+ VERSION = '1.4.1'
3
3
  end
@@ -1,37 +1,108 @@
1
1
  class CharacterSet
2
2
  module Writer
3
- module_function
4
-
5
- def write(codepoint_ranges, opts = {}, &block)
6
- content = codepoint_ranges.map do |range|
7
- if range.size > 2 && opts[:abbreviate] != false
8
- range.minmax.map { |cp| Character.new(cp).escape(opts, &block) }.join('-')
9
- else
10
- range.map { |cp| Character.new(cp).escape(opts, &block) }.join
3
+ class << self
4
+ def write(codepoint_ranges, opts = {}, &block)
5
+ content = codepoint_ranges.map do |range|
6
+ if range.size > 2 && opts[:abbreviate] != false
7
+ bounds = [range.min, range.max]
8
+ bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
9
+ else
10
+ range.map { |cp| write_codepoint(cp, opts, &block) }.join
11
+ end
12
+ end.join
13
+ opts[:in_brackets] ? "[#{content}]" : content
14
+ end
15
+
16
+ def write_codepoint(codepoint, opts = {}, &block)
17
+ Character.new(codepoint).escape(opts, &block)
18
+ end
19
+
20
+ def write_surrogate_ranges(bmp_ranges, astral_ranges)
21
+ astral_branches = surrogate_range_expressions(astral_ranges)
22
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
23
+ end
24
+
25
+ def write_surrogate_alternation(bmp_ranges, astral_ranges)
26
+ astral_branches = surrogate_pairs(astral_ranges)
27
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
28
+ end
29
+
30
+ private
31
+
32
+ def surrogate_range_expressions(astral_ranges)
33
+ compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
34
+ [hi_ranges, lo_ranges].map do |ranges|
35
+ use_brackets = ranges.size > 1 || ranges.first.size > 1
36
+ write(ranges, format: :js, in_brackets: use_brackets)
37
+ end.join
11
38
  end
12
- end.join
13
- opts[:in_brackets] ? "[#{content}]" : content
14
- end
39
+ end
40
+
41
+ def compressed_surrogate_range_pairs(astral_ranges)
42
+ halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }
15
43
 
16
- def write_surrogate_alternation(bmp_ranges, astral_ranges)
17
- bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
18
- if astral_ranges.empty?
19
- bmp_set
20
- else
21
- surrogate_pairs = surrogate_pairs(astral_ranges)
22
- "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + surrogate_pairs) * '|'})"
44
+ # compress high surrogate codepoint ranges with common low range half
45
+ with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
46
+ hi_ranges = pairs.map(&:first)
47
+ compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
48
+ prev = arr.last
49
+ if prev.nil? || prev.max + 1 < range.min # first or gap
50
+ arr << range
51
+ else # continuous codepoints, expand previous range
52
+ arr[-1] = (prev.min)..(range.max)
53
+ end
54
+ end
55
+ [compressed_hi_ranges, lo_range]
56
+ end
57
+
58
+ # compress low surrogate codepoint ranges with common high ranges
59
+ with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
60
+ (hash[hi_ranges] ||= []) << lo_range
61
+ end
23
62
  end
24
- end
25
63
 
26
- def surrogate_pairs(astral_ranges)
27
- astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
28
- end
64
+ def surrogate_half_ranges(astral_range)
65
+ hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
66
+ hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
67
+ hi_count = 1 + hi_max - hi_min
68
+ return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1
69
+
70
+ ranges = []
71
+
72
+ # first high surrogate might be partially covered (if lo_min > 0xDC00)
73
+ ranges << [hi_min..hi_min, lo_min..0xDFFF]
74
+
75
+ # any high surrogates in between are fully covered
76
+ ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2
29
77
 
30
- def surrogate_pair(astral_codepoint)
31
- base = astral_codepoint - 0x10000
32
- high = ((base / 1024).floor + 0xD800).to_s(16)
33
- low = (base % 1024 + 0xDC00).to_s(16)
34
- "\\u#{high}\\u#{low}"
78
+ # last high surrogate might be partially covered (if lo_max < 0xDFFF)
79
+ ranges << [hi_max..hi_max, 0xDC00..lo_max]
80
+
81
+ ranges
82
+ end
83
+
84
+ def surrogate_pair_codepoints(astral_codepoint)
85
+ base = astral_codepoint - 0x10000
86
+ high = base / 1024 + 0xD800
87
+ low = base % 1024 + 0xDC00
88
+ [high, low]
89
+ end
90
+
91
+ def bmp_set_with_alternatives(bmp_ranges, alternatives)
92
+ bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
93
+ return bmp_set if alternatives.empty? && bmp_ranges.any?
94
+
95
+ "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
96
+ end
97
+
98
+ def surrogate_pairs(astral_ranges)
99
+ astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
100
+ end
101
+
102
+ def surrogate_pair(astral_codepoint)
103
+ surrogate_pair_codepoints(astral_codepoint)
104
+ .map { |half| write_codepoint(half, format: :js) }.join
105
+ end
35
106
  end
36
107
  end
37
108
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: character_set
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.4.1
5
5
  platform: java
6
6
  authors:
7
7
  - Janosch Müller
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-24 00:00:00.000000000 Z
11
+ date: 2021-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sorted_set
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: benchmark-ips
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -25,47 +39,47 @@ dependencies:
25
39
  - !ruby/object:Gem::Version
26
40
  version: '2.7'
27
41
  - !ruby/object:Gem::Dependency
28
- name: bundler
42
+ name: get_process_mem
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - "~>"
32
46
  - !ruby/object:Gem::Version
33
- version: '1.16'
47
+ version: 0.2.3
34
48
  type: :development
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - "~>"
39
53
  - !ruby/object:Gem::Version
40
- version: '1.16'
54
+ version: 0.2.3
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: '12.0'
61
+ version: '13.0'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: '12.0'
68
+ version: '13.0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake-compiler
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '1.0'
75
+ version: '1.1'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '1.0'
82
+ version: '1.1'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: range_compressor
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -86,28 +100,28 @@ dependencies:
86
100
  requirements:
87
101
  - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: '1.1'
103
+ version: '1.6'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: '1.1'
110
+ version: '1.6'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: regexp_property_values
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: 0.3.4
117
+ version: '1.0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: 0.3.4
124
+ version: '1.0'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rspec
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -122,33 +136,85 @@ dependencies:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
138
  version: '3.8'
125
- description:
139
+ - !ruby/object:Gem::Dependency
140
+ name: codecov
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.2.12
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.2.12
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubocop
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '1.8'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '1.8'
167
+ - !ruby/object:Gem::Dependency
168
+ name: range_compressor
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.0'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.0'
181
+ description:
126
182
  email:
127
183
  - janosch84@gmail.com
128
184
  executables: []
129
185
  extensions: []
130
186
  extra_rdoc_files: []
131
187
  files:
188
+ - ".gitattributes"
189
+ - ".github/workflows/lint.yml"
190
+ - ".github/workflows/tests.yml"
132
191
  - ".gitignore"
133
192
  - ".rspec"
134
- - ".travis.yml"
193
+ - ".rubocop.yml"
135
194
  - BENCHMARK.md
136
195
  - CHANGELOG.md
137
196
  - Gemfile
138
197
  - LICENSE.txt
139
198
  - README.md
140
199
  - Rakefile
200
+ - benchmarks/count_in.rb
141
201
  - benchmarks/cover.rb
142
202
  - benchmarks/delete_in.rb
143
203
  - benchmarks/keep_in.rb
204
+ - benchmarks/scan.rb
144
205
  - benchmarks/shared.rb
145
206
  - benchmarks/used_by.rb
207
+ - benchmarks/z_add.rb
208
+ - benchmarks/z_delete.rb
209
+ - benchmarks/z_merge.rb
210
+ - benchmarks/z_minmax.rb
146
211
  - bin/console
147
212
  - bin/setup
148
213
  - character_set.gemspec
149
214
  - ext/character_set/character_set.c
150
215
  - ext/character_set/extconf.rb
151
216
  - ext/character_set/unicode_casefold_table.h
217
+ - ext/character_set/unicode_casefold_table.h.tmpl
152
218
  - lib/character_set.rb
153
219
  - lib/character_set/character.rb
154
220
  - lib/character_set/core_ext.rb
@@ -157,20 +223,35 @@ files:
157
223
  - lib/character_set/expression_converter.rb
158
224
  - lib/character_set/parser.rb
159
225
  - lib/character_set/predefined_sets.rb
226
+ - lib/character_set/predefined_sets/any.cps
227
+ - lib/character_set/predefined_sets/ascii.cps
228
+ - lib/character_set/predefined_sets/ascii_alnum.cps
229
+ - lib/character_set/predefined_sets/ascii_letter.cps
230
+ - lib/character_set/predefined_sets/assigned.cps
231
+ - lib/character_set/predefined_sets/bmp.cps
232
+ - lib/character_set/predefined_sets/crypt.cps
233
+ - lib/character_set/predefined_sets/emoji.cps
234
+ - lib/character_set/predefined_sets/newline.cps
235
+ - lib/character_set/predefined_sets/surrogate.cps
236
+ - lib/character_set/predefined_sets/unicode.cps
237
+ - lib/character_set/predefined_sets/url_fragment.cps
238
+ - lib/character_set/predefined_sets/url_host.cps
239
+ - lib/character_set/predefined_sets/url_path.cps
240
+ - lib/character_set/predefined_sets/url_query.cps
241
+ - lib/character_set/predefined_sets/whitespace.cps
160
242
  - lib/character_set/pure.rb
161
243
  - lib/character_set/ruby_fallback.rb
162
244
  - lib/character_set/ruby_fallback/character_set_methods.rb
163
- - lib/character_set/ruby_fallback/plane_methods.rb
164
245
  - lib/character_set/ruby_fallback/set_methods.rb
165
246
  - lib/character_set/set_method_adapters.rb
166
247
  - lib/character_set/shared_methods.rb
167
248
  - lib/character_set/version.rb
168
249
  - lib/character_set/writer.rb
169
- homepage: https://github.com/janosch-x/character_set
250
+ homepage: https://github.com/jaynetics/character_set
170
251
  licenses:
171
252
  - MIT
172
253
  metadata: {}
173
- post_install_message:
254
+ post_install_message:
174
255
  rdoc_options: []
175
256
  require_paths:
176
257
  - lib
@@ -185,9 +266,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
185
266
  - !ruby/object:Gem::Version
186
267
  version: '0'
187
268
  requirements: []
188
- rubyforge_project:
189
- rubygems_version: 2.7.6
190
- signing_key:
269
+ rubygems_version: 3.2.3
270
+ signing_key:
191
271
  specification_version: 4
192
272
  summary: Build, read, write and compare sets of Unicode codepoints.
193
273
  test_files: []
@@ -1,11 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.1
5
- - 2.4
6
- - 2.5
7
- - 2.6
8
- - jruby-9.1.9.0
9
- before_install:
10
- - gem update --system
11
- - gem install bundler
@@ -1,27 +0,0 @@
1
- class CharacterSet
2
- module RubyFallback
3
- module PlaneMethods
4
- def bmp_part
5
- dup.keep_if { |cp| cp < 0x10000 }
6
- end
7
-
8
- def astral_part
9
- dup.keep_if { |cp| cp >= 0x10000 }
10
- end
11
-
12
- def planes
13
- plane_set = {}
14
- plane_size = 0x10000.to_f
15
- each do |cp|
16
- plane = (cp / plane_size).floor
17
- plane_set[plane] = true
18
- end
19
- plane_set.keys
20
- end
21
-
22
- def member_in_plane?(num)
23
- ((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
24
- end
25
- end
26
- end
27
- end