character_set 1.1.1-java → 1.4.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +3 -0
  3. data/.github/workflows/lint.yml +29 -0
  4. data/.github/workflows/tests.yml +22 -0
  5. data/.gitignore +1 -0
  6. data/.rubocop.yml +11 -0
  7. data/BENCHMARK.md +53 -17
  8. data/CHANGELOG.md +47 -0
  9. data/README.md +38 -14
  10. data/Rakefile +60 -36
  11. data/benchmarks/count_in.rb +13 -0
  12. data/benchmarks/delete_in.rb +1 -1
  13. data/benchmarks/scan.rb +13 -0
  14. data/benchmarks/shared.rb +5 -0
  15. data/benchmarks/z_add.rb +12 -0
  16. data/benchmarks/z_delete.rb +12 -0
  17. data/benchmarks/z_merge.rb +15 -0
  18. data/benchmarks/z_minmax.rb +12 -0
  19. data/bin/console +2 -0
  20. data/character_set.gemspec +17 -6
  21. data/ext/character_set/character_set.c +963 -414
  22. data/ext/character_set/unicode_casefold_table.h +10 -2
  23. data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
  24. data/lib/character_set/character.rb +1 -1
  25. data/lib/character_set/core_ext/regexp_ext.rb +1 -1
  26. data/lib/character_set/core_ext/string_ext.rb +3 -1
  27. data/lib/character_set/expression_converter.rb +25 -27
  28. data/lib/character_set/parser.rb +1 -1
  29. data/lib/character_set/predefined_sets.rb +25 -260
  30. data/lib/character_set/predefined_sets/any.cps +1 -0
  31. data/lib/character_set/predefined_sets/ascii.cps +1 -0
  32. data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
  33. data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
  34. data/lib/character_set/predefined_sets/assigned.cps +666 -0
  35. data/lib/character_set/predefined_sets/bmp.cps +2 -0
  36. data/lib/character_set/predefined_sets/crypt.cps +2 -0
  37. data/lib/character_set/predefined_sets/emoji.cps +151 -0
  38. data/lib/character_set/predefined_sets/newline.cps +3 -0
  39. data/lib/character_set/predefined_sets/surrogate.cps +1 -0
  40. data/lib/character_set/predefined_sets/unicode.cps +2 -0
  41. data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
  42. data/lib/character_set/predefined_sets/url_host.cps +10 -0
  43. data/lib/character_set/predefined_sets/url_path.cps +7 -0
  44. data/lib/character_set/predefined_sets/url_query.cps +8 -0
  45. data/lib/character_set/predefined_sets/whitespace.cps +10 -0
  46. data/lib/character_set/ruby_fallback.rb +5 -3
  47. data/lib/character_set/ruby_fallback/character_set_methods.rb +53 -6
  48. data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
  49. data/lib/character_set/shared_methods.rb +60 -49
  50. data/lib/character_set/version.rb +1 -1
  51. data/lib/character_set/writer.rb +98 -27
  52. metadata +102 -22
  53. data/.travis.yml +0 -11
  54. data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -1,3 +1,3 @@
1
1
  class CharacterSet
2
- VERSION = '1.1.1'
2
+ VERSION = '1.4.1'
3
3
  end
@@ -1,37 +1,108 @@
1
1
  class CharacterSet
2
2
  module Writer
3
- module_function
4
-
5
- def write(codepoint_ranges, opts = {}, &block)
6
- content = codepoint_ranges.map do |range|
7
- if range.size > 2 && opts[:abbreviate] != false
8
- range.minmax.map { |cp| Character.new(cp).escape(opts, &block) }.join('-')
9
- else
10
- range.map { |cp| Character.new(cp).escape(opts, &block) }.join
3
+ class << self
4
+ def write(codepoint_ranges, opts = {}, &block)
5
+ content = codepoint_ranges.map do |range|
6
+ if range.size > 2 && opts[:abbreviate] != false
7
+ bounds = [range.min, range.max]
8
+ bounds.map { |cp| write_codepoint(cp, opts, &block) }.join('-')
9
+ else
10
+ range.map { |cp| write_codepoint(cp, opts, &block) }.join
11
+ end
12
+ end.join
13
+ opts[:in_brackets] ? "[#{content}]" : content
14
+ end
15
+
16
+ def write_codepoint(codepoint, opts = {}, &block)
17
+ Character.new(codepoint).escape(opts, &block)
18
+ end
19
+
20
+ def write_surrogate_ranges(bmp_ranges, astral_ranges)
21
+ astral_branches = surrogate_range_expressions(astral_ranges)
22
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
23
+ end
24
+
25
+ def write_surrogate_alternation(bmp_ranges, astral_ranges)
26
+ astral_branches = surrogate_pairs(astral_ranges)
27
+ bmp_set_with_alternatives(bmp_ranges, astral_branches)
28
+ end
29
+
30
+ private
31
+
32
+ def surrogate_range_expressions(astral_ranges)
33
+ compressed_surrogate_range_pairs(astral_ranges).map do |hi_ranges, lo_ranges|
34
+ [hi_ranges, lo_ranges].map do |ranges|
35
+ use_brackets = ranges.size > 1 || ranges.first.size > 1
36
+ write(ranges, format: :js, in_brackets: use_brackets)
37
+ end.join
11
38
  end
12
- end.join
13
- opts[:in_brackets] ? "[#{content}]" : content
14
- end
39
+ end
40
+
41
+ def compressed_surrogate_range_pairs(astral_ranges)
42
+ halves = astral_ranges.flat_map { |range| surrogate_half_ranges(range) }
15
43
 
16
- def write_surrogate_alternation(bmp_ranges, astral_ranges)
17
- bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
18
- if astral_ranges.empty?
19
- bmp_set
20
- else
21
- surrogate_pairs = surrogate_pairs(astral_ranges)
22
- "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + surrogate_pairs) * '|'})"
44
+ # compress high surrogate codepoint ranges with common low range half
45
+ with_common_lo = halves.group_by(&:last).map do |lo_range, pairs|
46
+ hi_ranges = pairs.map(&:first)
47
+ compressed_hi_ranges = hi_ranges.each_with_object([]) do |range, arr|
48
+ prev = arr.last
49
+ if prev.nil? || prev.max + 1 < range.min # first or gap
50
+ arr << range
51
+ else # continuous codepoints, expand previous range
52
+ arr[-1] = (prev.min)..(range.max)
53
+ end
54
+ end
55
+ [compressed_hi_ranges, lo_range]
56
+ end
57
+
58
+ # compress low surrogate codepoint ranges with common high ranges
59
+ with_common_lo.each_with_object({}) do |(hi_ranges, lo_range), hash|
60
+ (hash[hi_ranges] ||= []) << lo_range
61
+ end
23
62
  end
24
- end
25
63
 
26
- def surrogate_pairs(astral_ranges)
27
- astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
28
- end
64
+ def surrogate_half_ranges(astral_range)
65
+ hi_min, lo_min = surrogate_pair_codepoints(astral_range.min)
66
+ hi_max, lo_max = surrogate_pair_codepoints(astral_range.max)
67
+ hi_count = 1 + hi_max - hi_min
68
+ return [[hi_min..hi_min, lo_min..lo_max]] if hi_count == 1
69
+
70
+ ranges = []
71
+
72
+ # first high surrogate might be partially covered (if lo_min > 0xDC00)
73
+ ranges << [hi_min..hi_min, lo_min..0xDFFF]
74
+
75
+ # any high surrogates in between are fully covered
76
+ ranges << [(hi_min + 1)..(hi_max - 1), 0xDC00..0xDFFF] if hi_count > 2
29
77
 
30
- def surrogate_pair(astral_codepoint)
31
- base = astral_codepoint - 0x10000
32
- high = ((base / 1024).floor + 0xD800).to_s(16)
33
- low = (base % 1024 + 0xDC00).to_s(16)
34
- "\\u#{high}\\u#{low}"
78
+ # last high surrogate might be partially covered (if lo_max < 0xDFFF)
79
+ ranges << [hi_max..hi_max, 0xDC00..lo_max]
80
+
81
+ ranges
82
+ end
83
+
84
+ def surrogate_pair_codepoints(astral_codepoint)
85
+ base = astral_codepoint - 0x10000
86
+ high = base / 1024 + 0xD800
87
+ low = base % 1024 + 0xDC00
88
+ [high, low]
89
+ end
90
+
91
+ def bmp_set_with_alternatives(bmp_ranges, alternatives)
92
+ bmp_set = write(bmp_ranges, format: :js, in_brackets: true)
93
+ return bmp_set if alternatives.empty? && bmp_ranges.any?
94
+
95
+ "(?:#{((bmp_ranges.any? ? [bmp_set] : []) + alternatives).join('|')})"
96
+ end
97
+
98
+ def surrogate_pairs(astral_ranges)
99
+ astral_ranges.flat_map { |range| range.map { |cp| surrogate_pair(cp) } }
100
+ end
101
+
102
+ def surrogate_pair(astral_codepoint)
103
+ surrogate_pair_codepoints(astral_codepoint)
104
+ .map { |half| write_codepoint(half, format: :js) }.join
105
+ end
35
106
  end
36
107
  end
37
108
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: character_set
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.4.1
5
5
  platform: java
6
6
  authors:
7
7
  - Janosch Müller
8
- autorequire:
8
+ autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-09-24 00:00:00.000000000 Z
11
+ date: 2021-01-11 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: sorted_set
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: '1.0'
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: '1.0'
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: benchmark-ips
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -25,47 +39,47 @@ dependencies:
25
39
  - !ruby/object:Gem::Version
26
40
  version: '2.7'
27
41
  - !ruby/object:Gem::Dependency
28
- name: bundler
42
+ name: get_process_mem
29
43
  requirement: !ruby/object:Gem::Requirement
30
44
  requirements:
31
45
  - - "~>"
32
46
  - !ruby/object:Gem::Version
33
- version: '1.16'
47
+ version: 0.2.3
34
48
  type: :development
35
49
  prerelease: false
36
50
  version_requirements: !ruby/object:Gem::Requirement
37
51
  requirements:
38
52
  - - "~>"
39
53
  - !ruby/object:Gem::Version
40
- version: '1.16'
54
+ version: 0.2.3
41
55
  - !ruby/object:Gem::Dependency
42
56
  name: rake
43
57
  requirement: !ruby/object:Gem::Requirement
44
58
  requirements:
45
59
  - - "~>"
46
60
  - !ruby/object:Gem::Version
47
- version: '12.0'
61
+ version: '13.0'
48
62
  type: :development
49
63
  prerelease: false
50
64
  version_requirements: !ruby/object:Gem::Requirement
51
65
  requirements:
52
66
  - - "~>"
53
67
  - !ruby/object:Gem::Version
54
- version: '12.0'
68
+ version: '13.0'
55
69
  - !ruby/object:Gem::Dependency
56
70
  name: rake-compiler
57
71
  requirement: !ruby/object:Gem::Requirement
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: '1.0'
75
+ version: '1.1'
62
76
  type: :development
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: '1.0'
82
+ version: '1.1'
69
83
  - !ruby/object:Gem::Dependency
70
84
  name: range_compressor
71
85
  requirement: !ruby/object:Gem::Requirement
@@ -86,28 +100,28 @@ dependencies:
86
100
  requirements:
87
101
  - - "~>"
88
102
  - !ruby/object:Gem::Version
89
- version: '1.1'
103
+ version: '1.6'
90
104
  type: :development
91
105
  prerelease: false
92
106
  version_requirements: !ruby/object:Gem::Requirement
93
107
  requirements:
94
108
  - - "~>"
95
109
  - !ruby/object:Gem::Version
96
- version: '1.1'
110
+ version: '1.6'
97
111
  - !ruby/object:Gem::Dependency
98
112
  name: regexp_property_values
99
113
  requirement: !ruby/object:Gem::Requirement
100
114
  requirements:
101
115
  - - "~>"
102
116
  - !ruby/object:Gem::Version
103
- version: 0.3.4
117
+ version: '1.0'
104
118
  type: :development
105
119
  prerelease: false
106
120
  version_requirements: !ruby/object:Gem::Requirement
107
121
  requirements:
108
122
  - - "~>"
109
123
  - !ruby/object:Gem::Version
110
- version: 0.3.4
124
+ version: '1.0'
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: rspec
113
127
  requirement: !ruby/object:Gem::Requirement
@@ -122,33 +136,85 @@ dependencies:
122
136
  - - "~>"
123
137
  - !ruby/object:Gem::Version
124
138
  version: '3.8'
125
- description:
139
+ - !ruby/object:Gem::Dependency
140
+ name: codecov
141
+ requirement: !ruby/object:Gem::Requirement
142
+ requirements:
143
+ - - "~>"
144
+ - !ruby/object:Gem::Version
145
+ version: 0.2.12
146
+ type: :development
147
+ prerelease: false
148
+ version_requirements: !ruby/object:Gem::Requirement
149
+ requirements:
150
+ - - "~>"
151
+ - !ruby/object:Gem::Version
152
+ version: 0.2.12
153
+ - !ruby/object:Gem::Dependency
154
+ name: rubocop
155
+ requirement: !ruby/object:Gem::Requirement
156
+ requirements:
157
+ - - "~>"
158
+ - !ruby/object:Gem::Version
159
+ version: '1.8'
160
+ type: :development
161
+ prerelease: false
162
+ version_requirements: !ruby/object:Gem::Requirement
163
+ requirements:
164
+ - - "~>"
165
+ - !ruby/object:Gem::Version
166
+ version: '1.8'
167
+ - !ruby/object:Gem::Dependency
168
+ name: range_compressor
169
+ requirement: !ruby/object:Gem::Requirement
170
+ requirements:
171
+ - - "~>"
172
+ - !ruby/object:Gem::Version
173
+ version: '1.0'
174
+ type: :runtime
175
+ prerelease: false
176
+ version_requirements: !ruby/object:Gem::Requirement
177
+ requirements:
178
+ - - "~>"
179
+ - !ruby/object:Gem::Version
180
+ version: '1.0'
181
+ description:
126
182
  email:
127
183
  - janosch84@gmail.com
128
184
  executables: []
129
185
  extensions: []
130
186
  extra_rdoc_files: []
131
187
  files:
188
+ - ".gitattributes"
189
+ - ".github/workflows/lint.yml"
190
+ - ".github/workflows/tests.yml"
132
191
  - ".gitignore"
133
192
  - ".rspec"
134
- - ".travis.yml"
193
+ - ".rubocop.yml"
135
194
  - BENCHMARK.md
136
195
  - CHANGELOG.md
137
196
  - Gemfile
138
197
  - LICENSE.txt
139
198
  - README.md
140
199
  - Rakefile
200
+ - benchmarks/count_in.rb
141
201
  - benchmarks/cover.rb
142
202
  - benchmarks/delete_in.rb
143
203
  - benchmarks/keep_in.rb
204
+ - benchmarks/scan.rb
144
205
  - benchmarks/shared.rb
145
206
  - benchmarks/used_by.rb
207
+ - benchmarks/z_add.rb
208
+ - benchmarks/z_delete.rb
209
+ - benchmarks/z_merge.rb
210
+ - benchmarks/z_minmax.rb
146
211
  - bin/console
147
212
  - bin/setup
148
213
  - character_set.gemspec
149
214
  - ext/character_set/character_set.c
150
215
  - ext/character_set/extconf.rb
151
216
  - ext/character_set/unicode_casefold_table.h
217
+ - ext/character_set/unicode_casefold_table.h.tmpl
152
218
  - lib/character_set.rb
153
219
  - lib/character_set/character.rb
154
220
  - lib/character_set/core_ext.rb
@@ -157,20 +223,35 @@ files:
157
223
  - lib/character_set/expression_converter.rb
158
224
  - lib/character_set/parser.rb
159
225
  - lib/character_set/predefined_sets.rb
226
+ - lib/character_set/predefined_sets/any.cps
227
+ - lib/character_set/predefined_sets/ascii.cps
228
+ - lib/character_set/predefined_sets/ascii_alnum.cps
229
+ - lib/character_set/predefined_sets/ascii_letter.cps
230
+ - lib/character_set/predefined_sets/assigned.cps
231
+ - lib/character_set/predefined_sets/bmp.cps
232
+ - lib/character_set/predefined_sets/crypt.cps
233
+ - lib/character_set/predefined_sets/emoji.cps
234
+ - lib/character_set/predefined_sets/newline.cps
235
+ - lib/character_set/predefined_sets/surrogate.cps
236
+ - lib/character_set/predefined_sets/unicode.cps
237
+ - lib/character_set/predefined_sets/url_fragment.cps
238
+ - lib/character_set/predefined_sets/url_host.cps
239
+ - lib/character_set/predefined_sets/url_path.cps
240
+ - lib/character_set/predefined_sets/url_query.cps
241
+ - lib/character_set/predefined_sets/whitespace.cps
160
242
  - lib/character_set/pure.rb
161
243
  - lib/character_set/ruby_fallback.rb
162
244
  - lib/character_set/ruby_fallback/character_set_methods.rb
163
- - lib/character_set/ruby_fallback/plane_methods.rb
164
245
  - lib/character_set/ruby_fallback/set_methods.rb
165
246
  - lib/character_set/set_method_adapters.rb
166
247
  - lib/character_set/shared_methods.rb
167
248
  - lib/character_set/version.rb
168
249
  - lib/character_set/writer.rb
169
- homepage: https://github.com/janosch-x/character_set
250
+ homepage: https://github.com/jaynetics/character_set
170
251
  licenses:
171
252
  - MIT
172
253
  metadata: {}
173
- post_install_message:
254
+ post_install_message:
174
255
  rdoc_options: []
175
256
  require_paths:
176
257
  - lib
@@ -185,9 +266,8 @@ required_rubygems_version: !ruby/object:Gem::Requirement
185
266
  - !ruby/object:Gem::Version
186
267
  version: '0'
187
268
  requirements: []
188
- rubyforge_project:
189
- rubygems_version: 2.7.6
190
- signing_key:
269
+ rubygems_version: 3.2.3
270
+ signing_key:
191
271
  specification_version: 4
192
272
  summary: Build, read, write and compare sets of Unicode codepoints.
193
273
  test_files: []
@@ -1,11 +0,0 @@
1
- sudo: false
2
- language: ruby
3
- rvm:
4
- - 2.1
5
- - 2.4
6
- - 2.5
7
- - 2.6
8
- - jruby-9.1.9.0
9
- before_install:
10
- - gem update --system
11
- - gem install bundler
@@ -1,27 +0,0 @@
1
- class CharacterSet
2
- module RubyFallback
3
- module PlaneMethods
4
- def bmp_part
5
- dup.keep_if { |cp| cp < 0x10000 }
6
- end
7
-
8
- def astral_part
9
- dup.keep_if { |cp| cp >= 0x10000 }
10
- end
11
-
12
- def planes
13
- plane_set = {}
14
- plane_size = 0x10000.to_f
15
- each do |cp|
16
- plane = (cp / plane_size).floor
17
- plane_set[plane] = true
18
- end
19
- plane_set.keys
20
- end
21
-
22
- def member_in_plane?(num)
23
- ((num * 0x10000)...((num + 1) * 0x10000)).any? { |cp| include?(cp) }
24
- end
25
- end
26
- end
27
- end