character_set 1.4.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +1 -1
  3. data/.github/workflows/gouteur.yml +20 -0
  4. data/.github/workflows/lint.yml +29 -0
  5. data/.github/workflows/tests.yml +28 -0
  6. data/.gitignore +1 -0
  7. data/.gouteur.yml +2 -0
  8. data/.rubocop.yml +20 -0
  9. data/BENCHMARK.md +35 -31
  10. data/CHANGELOG.md +64 -1
  11. data/Gemfile +15 -0
  12. data/LICENSE.txt +1 -1
  13. data/README.md +25 -9
  14. data/Rakefile +2 -120
  15. data/character_set.gemspec +0 -10
  16. data/ext/character_set/character_set.c +123 -121
  17. data/ext/character_set/unicode_casefold_table.h +44 -1
  18. data/lib/character_set/core_ext/regexp_ext.rb +9 -1
  19. data/lib/character_set/core_ext/string_ext.rb +2 -2
  20. data/lib/character_set/expression_converter.rb +40 -56
  21. data/lib/character_set/parser.rb +8 -4
  22. data/lib/character_set/predefined_sets/assigned.cps +110 -78
  23. data/lib/character_set/predefined_sets/emoji.cps +16 -14
  24. data/lib/character_set/predefined_sets.rb +11 -0
  25. data/lib/character_set/ruby_fallback/character_set_methods.rb +17 -21
  26. data/lib/character_set/ruby_fallback/set_methods.rb +9 -16
  27. data/lib/character_set/ruby_fallback/vendored_set_classes.rb +385 -0
  28. data/lib/character_set/ruby_fallback.rb +18 -2
  29. data/lib/character_set/set_method_adapters.rb +4 -3
  30. data/lib/character_set/shared_methods.rb +25 -11
  31. data/lib/character_set/version.rb +1 -1
  32. data/tasks/benchmark.rake +20 -0
  33. data/{benchmarks → tasks/benchmarks}/delete_in.rb +5 -1
  34. data/{benchmarks → tasks/benchmarks}/keep_in.rb +5 -1
  35. data/tasks/benchmarks/shared.rb +28 -0
  36. data/tasks/sync_casefold_data.rake +20 -0
  37. data/tasks/sync_predefined_sets.rake +9 -0
  38. data/tasks/sync_ruby_spec.rake +65 -0
  39. metadata +29 -146
  40. data/.travis.yml +0 -9
  41. data/benchmarks/shared.rb +0 -26
  42. /data/{benchmarks → tasks/benchmarks}/count_in.rb +0 -0
  43. /data/{benchmarks → tasks/benchmarks}/cover.rb +0 -0
  44. /data/{benchmarks → tasks/benchmarks}/scan.rb +0 -0
  45. /data/{benchmarks → tasks/benchmarks}/used_by.rb +0 -0
  46. /data/{benchmarks → tasks/benchmarks}/z_add.rb +0 -0
  47. /data/{benchmarks → tasks/benchmarks}/z_delete.rb +0 -0
  48. /data/{benchmarks → tasks/benchmarks}/z_merge.rb +0 -0
  49. /data/{benchmarks → tasks/benchmarks}/z_minmax.rb +0 -0
@@ -0,0 +1,385 @@
1
+ # set, vendored from https://github.com/ruby/set/blob/master/lib/set.rb,
2
+ # with comments removed and linted.
3
+ class CharacterSet::RubyFallback::Set
4
+ Set = self
5
+ include Enumerable
6
+
7
+ def self.[](*ary)
8
+ new(ary)
9
+ end
10
+
11
+ def initialize(enum = nil, &block)
12
+ @hash = Hash.new(false)
13
+
14
+ enum.nil? and return
15
+
16
+ if block
17
+ do_with_enum(enum) { |o| add(block[o]) }
18
+ else
19
+ merge(enum)
20
+ end
21
+ end
22
+
23
+ def do_with_enum(enum, &block)
24
+ if enum.respond_to?(:each_entry)
25
+ enum.each_entry(&block) if block
26
+ elsif enum.respond_to?(:each)
27
+ enum.each(&block) if block
28
+ else
29
+ raise ArgumentError, "value must be enumerable"
30
+ end
31
+ end
32
+ private :do_with_enum
33
+
34
+ def initialize_dup(orig)
35
+ super
36
+ @hash = orig.instance_variable_get(:@hash).dup
37
+ end
38
+
39
+ if Kernel.instance_method(:initialize_clone).arity != 1
40
+ def initialize_clone(orig, **options)
41
+ super
42
+ @hash = orig.instance_variable_get(:@hash).clone(**options)
43
+ end
44
+ else
45
+ def initialize_clone(orig)
46
+ super
47
+ @hash = orig.instance_variable_get(:@hash).clone
48
+ end
49
+ end
50
+
51
+ def freeze
52
+ @hash.freeze
53
+ super
54
+ end
55
+
56
+ def size
57
+ @hash.size
58
+ end
59
+ alias length size
60
+
61
+ def empty?
62
+ @hash.empty?
63
+ end
64
+
65
+ def clear
66
+ @hash.clear
67
+ self
68
+ end
69
+
70
+ def to_a
71
+ @hash.keys
72
+ end
73
+
74
+ def include?(o)
75
+ @hash[o]
76
+ end
77
+ alias member? include?
78
+
79
+ def superset?(set)
80
+ case
81
+ when set.instance_of?(self.class) && @hash.respond_to?(:>=)
82
+ @hash >= set.instance_variable_get(:@hash)
83
+ when set.is_a?(Set)
84
+ size >= set.size && set.all? { |o| include?(o) }
85
+ else
86
+ raise ArgumentError, "value must be a set"
87
+ end
88
+ end
89
+ alias >= superset?
90
+
91
+ def proper_superset?(set)
92
+ case
93
+ when set.instance_of?(self.class) && @hash.respond_to?(:>)
94
+ @hash > set.instance_variable_get(:@hash)
95
+ when set.is_a?(Set)
96
+ size > set.size && set.all? { |o| include?(o) }
97
+ else
98
+ raise ArgumentError, "value must be a set"
99
+ end
100
+ end
101
+ alias > proper_superset?
102
+
103
+ def subset?(set)
104
+ case
105
+ when set.instance_of?(self.class) && @hash.respond_to?(:<=)
106
+ @hash <= set.instance_variable_get(:@hash)
107
+ when set.is_a?(Set)
108
+ size <= set.size && all? { |o| set.include?(o) }
109
+ else
110
+ raise ArgumentError, "value must be a set"
111
+ end
112
+ end
113
+ alias <= subset?
114
+
115
+ def proper_subset?(set)
116
+ case
117
+ when set.instance_of?(self.class) && @hash.respond_to?(:<)
118
+ @hash < set.instance_variable_get(:@hash)
119
+ when set.is_a?(Set)
120
+ size < set.size && all? { |o| set.include?(o) }
121
+ else
122
+ raise ArgumentError, "value must be a set"
123
+ end
124
+ end
125
+ alias < proper_subset?
126
+
127
+ def <=>(set)
128
+ return unless set.is_a?(Set)
129
+
130
+ case size <=> set.size
131
+ when -1 then -1 if proper_subset?(set)
132
+ when +1 then +1 if proper_superset?(set)
133
+ else 0 if self.==(set)
134
+ end
135
+ end
136
+
137
+ def intersect?(set)
138
+ case set
139
+ when Set
140
+ if size < set.size
141
+ any? { |o| set.include?(o) }
142
+ else
143
+ set.any? { |o| include?(o) }
144
+ end
145
+ when Enumerable
146
+ set.any? { |o| include?(o) }
147
+ else
148
+ raise ArgumentError, "value must be enumerable"
149
+ end
150
+ end
151
+
152
+ def disjoint?(set)
153
+ !intersect?(set)
154
+ end
155
+
156
+ def each(&block)
157
+ block_given? or return enum_for(__method__) { size }
158
+ @hash.each_key(&block)
159
+ self
160
+ end
161
+
162
+ def add(o)
163
+ @hash[o] = true
164
+ self
165
+ end
166
+ alias << add
167
+
168
+ def add?(o)
169
+ add(o) unless include?(o)
170
+ end
171
+
172
+ def delete(o)
173
+ @hash.delete(o)
174
+ self
175
+ end
176
+
177
+ def delete?(o)
178
+ delete(o) if include?(o)
179
+ end
180
+
181
+ def delete_if
182
+ block_given? or return enum_for(__method__) { size }
183
+ select { |o| yield o }.each { |o| @hash.delete(o) }
184
+ self
185
+ end
186
+
187
+ def keep_if
188
+ block_given? or return enum_for(__method__) { size }
189
+ reject { |o| yield o }.each { |o| @hash.delete(o) }
190
+ self
191
+ end
192
+
193
+ def reject!(&block)
194
+ block_given? or return enum_for(__method__) { size }
195
+ n = size
196
+ delete_if(&block)
197
+ self if size != n
198
+ end
199
+
200
+ def select!(&block)
201
+ block_given? or return enum_for(__method__) { size }
202
+ n = size
203
+ keep_if(&block)
204
+ self if size != n
205
+ end
206
+
207
+ alias filter! select!
208
+
209
+ def merge(*enums, **_rest)
210
+ enums.each do |enum|
211
+ if enum.instance_of?(self.class)
212
+ @hash.update(enum.instance_variable_get(:@hash))
213
+ else
214
+ do_with_enum(enum) { |o| add(o) }
215
+ end
216
+ end
217
+
218
+ self
219
+ end
220
+
221
+ def subtract(enum)
222
+ do_with_enum(enum) { |o| delete(o) }
223
+ self
224
+ end
225
+
226
+ def |(enum)
227
+ dup.merge(enum)
228
+ end
229
+ alias + |
230
+ alias union |
231
+
232
+ def -(enum)
233
+ dup.subtract(enum)
234
+ end
235
+ alias difference -
236
+
237
+ def &(enum)
238
+ n = self.class.new
239
+ if enum.is_a?(Set)
240
+ if enum.size > size
241
+ each { |o| n.add(o) if enum.include?(o) }
242
+ else
243
+ enum.each { |o| n.add(o) if include?(o) }
244
+ end
245
+ else
246
+ do_with_enum(enum) { |o| n.add(o) if include?(o) }
247
+ end
248
+ n
249
+ end
250
+ alias intersection &
251
+
252
+ def ^(enum)
253
+ n = Set.new(enum)
254
+ each { |o| n.add(o) unless n.delete?(o) }
255
+ n
256
+ end
257
+
258
+ def ==(other)
259
+ if self.equal?(other)
260
+ true
261
+ elsif other.instance_of?(self.class)
262
+ @hash == other.instance_variable_get(:@hash)
263
+ elsif other.is_a?(Set) && self.size == other.size
264
+ other.all? { |o| @hash.include?(o) }
265
+ else
266
+ false
267
+ end
268
+ end
269
+
270
+ def hash
271
+ @hash.hash
272
+ end
273
+
274
+ def eql?(o)
275
+ return false unless o.is_a?(Set)
276
+ @hash.eql?(o.instance_variable_get(:@hash))
277
+ end
278
+
279
+ alias === include?
280
+
281
+ def classify
282
+ block_given? or return enum_for(__method__) { size }
283
+
284
+ h = {}
285
+
286
+ each { |i|
287
+ (h[yield(i)] ||= self.class.new).add(i)
288
+ }
289
+
290
+ h
291
+ end
292
+
293
+ def divide(&func)
294
+ func or return enum_for(__method__) { size }
295
+
296
+ if func.arity == 2
297
+ require 'tsort'
298
+
299
+ class << dig = {}
300
+ include TSort
301
+
302
+ alias tsort_each_node each_key
303
+ def tsort_each_child(node, &block)
304
+ fetch(node).each(&block)
305
+ end
306
+ end
307
+
308
+ each { |u|
309
+ dig[u] = a = []
310
+ each{ |v| func.call(u, v) and a << v }
311
+ }
312
+
313
+ set = Set.new()
314
+ dig.each_strongly_connected_component { |css|
315
+ set.add(self.class.new(css))
316
+ }
317
+ set
318
+ else
319
+ Set.new(classify(&func).values)
320
+ end
321
+ end
322
+ end
323
+
324
+ # sorted_set without rbtree dependency, vendored from
325
+ # https://github.com/ruby/set/blob/72f08c4/lib/set.rb#L731-L800
326
+ class CharacterSet::RubyFallback::SortedSet < CharacterSet::RubyFallback::Set
327
+ def initialize(*args)
328
+ @keys = nil
329
+ super
330
+ end
331
+
332
+ def clear
333
+ @keys = nil
334
+ super
335
+ end
336
+
337
+ def add(o)
338
+ @keys = nil
339
+ super
340
+ end
341
+ alias << add
342
+
343
+ def delete(o)
344
+ @keys = nil
345
+ @hash.delete(o)
346
+ self
347
+ end
348
+
349
+ def delete_if
350
+ block_given? or return enum_for(__method__) { size }
351
+ n = @hash.size
352
+ super
353
+ @keys = nil if @hash.size != n
354
+ self
355
+ end
356
+
357
+ def keep_if
358
+ block_given? or return enum_for(__method__) { size }
359
+ n = @hash.size
360
+ super
361
+ @keys = nil if @hash.size != n
362
+ self
363
+ end
364
+
365
+ def merge(enum)
366
+ @keys = nil
367
+ super
368
+ end
369
+
370
+ def each(&block)
371
+ block or return enum_for(__method__) { size }
372
+ to_a.each(&block)
373
+ self
374
+ end
375
+
376
+ def to_a
377
+ (@keys = @hash.keys).sort! unless @keys
378
+ @keys.dup
379
+ end
380
+
381
+ def freeze
382
+ to_a
383
+ super
384
+ end
385
+ end
@@ -1,4 +1,3 @@
1
- require 'set'
2
1
  require 'character_set/ruby_fallback/set_methods'
3
2
  require 'character_set/ruby_fallback/character_set_methods'
4
3
 
@@ -12,8 +11,25 @@ class CharacterSet
12
11
  end
13
12
 
14
13
  def initialize(enum = [])
15
- @__set = SortedSet.new
14
+ @__set = CharacterSet::RubyFallback::SortedSet.new
16
15
  super
17
16
  end
18
17
  end
19
18
  end
19
+
20
+ if RUBY_PLATFORM[/java/i]
21
+ # JRuby has sorted_set in the stdlib.
22
+ require 'set'
23
+ CharacterSet::RubyFallback::Set = ::Set
24
+ CharacterSet::RubyFallback::SortedSet = ::SortedSet
25
+ else
26
+ # For other rubies, set/sorted_set are vendored due to dependency issues:
27
+ #
28
+ # - issues with default vs. installed gems such as [#2]
29
+ # - issues with the sorted_set dependency rb_tree
30
+ # - long-standing issues in recent versions of sorted_set
31
+ #
32
+ # The RubyFallback, and thus these set classes, are only used for testing,
33
+ # and for exotic rubies which use neither C nor Java.
34
+ require 'character_set/ruby_fallback/vendored_set_classes'
35
+ end
@@ -22,13 +22,14 @@ class CharacterSet
22
22
 
23
23
  # Allow some methods to take an Enum just as well as another CharacterSet.
24
24
  # Tested by ruby-spec.
25
- %w[& + - ^ | difference intersection subtract union].each do |method|
25
+ %w[& + - ^ | <=> difference disjoint? intersect? intersection
26
+ subtract union].each do |method|
26
27
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
27
28
  def #{method}(arg)
28
29
  if arg.is_a?(CharacterSet)
29
- super
30
+ super(arg)
30
31
  elsif arg.respond_to?(:each)
31
- super(CharacterSet.new(arg.to_a))
32
+ super(self.class.new(arg.to_a))
32
33
  else
33
34
  raise ArgumentError, 'pass an enumerable'
34
35
  end
@@ -15,6 +15,12 @@ class CharacterSet
15
15
  new(Array(args))
16
16
  end
17
17
 
18
+ def of(*args)
19
+ args.map do |arg|
20
+ arg.is_a?(Regexp) ? of_regexp(arg) : of_string(arg)
21
+ end.reduce(:merge) || new
22
+ end
23
+
18
24
  def parse(string)
19
25
  codepoints = Parser.codepoints_from_bracket_expression(string)
20
26
  result = new(codepoints)
@@ -22,33 +28,29 @@ class CharacterSet
22
28
  end
23
29
 
24
30
  def of_property(property_name)
25
- require_optional_dependency('regexp_property_values')
31
+ require_optional_dependency('regexp_property_values', __method__)
26
32
 
27
33
  property = RegexpPropertyValues[property_name.to_s]
28
34
  from_ranges(*property.matched_ranges)
29
35
  end
30
36
 
31
37
  def of_regexp(regexp)
32
- require_optional_dependency('regexp_parser')
38
+ require_optional_dependency('regexp_parser', __method__)
33
39
 
34
40
  root = ::Regexp::Parser.parse(regexp)
35
41
  of_expression(root)
36
42
  end
37
43
 
38
44
  def of_expression(expression)
39
- ExpressionConverter.convert(expression)
45
+ ExpressionConverter.convert(expression, self)
40
46
  end
41
47
 
42
- def require_optional_dependency(name)
48
+ def require_optional_dependency(name, method)
43
49
  required_optional_dependencies[name] ||= begin
44
50
  require name
45
51
  true
46
52
  rescue ::LoadError
47
- entry_point = caller_locations.reverse.find do |loc|
48
- loc.absolute_path.to_s.include?('/lib/character_set')
49
- end
50
- method = entry_point && entry_point.label
51
- raise LoadError, 'You must the install the optional dependency '\
53
+ raise LoadError, 'You must install the optional dependency '\
52
54
  "'\#{name}' to use the method `\#{method}'."
53
55
  end
54
56
  end
@@ -94,6 +96,14 @@ class CharacterSet
94
96
  Writer.write_surrogate_alternation(bmp_part.ranges, astral_part.ranges)
95
97
  end
96
98
 
99
+ def secure_token(length = 32)
100
+ CharacterSet.require_optional_dependency('securerandom', __method__)
101
+ cps = to_a
102
+ len = cps.count
103
+ 1.upto(length).map { cps[SecureRandom.random_number(len)] }.pack('U*')
104
+ end
105
+ alias random_token secure_token
106
+
97
107
  def inspect
98
108
  len = length
99
109
  "#<#{klass.name}: {\#{first(5) * ', '}\#{'...' if len > 5}} (size: \#{len})>"
@@ -155,8 +165,12 @@ class CharacterSet
155
165
  end
156
166
 
157
167
  def divide(&func)
158
- require 'set'
159
- Set.new(to_a).divide(&func)
168
+ require 'character_set/ruby_fallback'
169
+ CharacterSet::RubyFallback::Set.new(to_a).divide(&func)
170
+ end
171
+
172
+ def join(separator = '')
173
+ to_a(true).join(separator)
160
174
  end
161
175
  RUBY
162
176
 
@@ -1,3 +1,3 @@
1
1
  class CharacterSet
2
- VERSION = '1.4.0'
2
+ VERSION = '1.8.0'
3
3
  end
@@ -0,0 +1,20 @@
1
+ desc 'Run all IPS benchmarks'
2
+ task :benchmark do
3
+ Dir["#{__dir__}/benchmarks/*.rb"].sort.each { |file| load(file) }
4
+ end
5
+
6
+ namespace :benchmark do
7
+ desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
8
+ task :write_to_file do
9
+ Rake.application[:benchmark].invoke
10
+
11
+ # extract comparison results from reports
12
+ results = $benchmark_results
13
+ .map { |caption, report| "```\n#{caption}\n\n#{report[/(?<=Comparison:).+/m].strip}\n```" }
14
+ .join("\n")
15
+ .gsub(/ \(±[^)]+\) |(?<=same-ish).*/, '') # remove some noise
16
+
17
+ File.write "#{__dir__}/../BENCHMARK.md",
18
+ "Results of `rake:benchmark` on #{RUBY_DESCRIPTION}\n\n#{results}\n"
19
+ end
20
+ end
@@ -2,24 +2,28 @@ require_relative './shared'
2
2
 
3
3
  str = 'Lorem ipsum et dolorem'
4
4
  rx = /\s/
5
+ trt = "\t\n\v\f\r\s"
5
6
  cs = CharacterSet.whitespace
6
7
 
7
8
  benchmark(
8
- caption: 'Removing whitespace',
9
+ caption: 'Removing ASCII whitespace',
9
10
  cases: {
10
11
  'String#gsub' => -> { str.gsub(rx, '') },
12
+ 'String#tr' => -> { str.tr(trt, '') },
11
13
  'CharacterSet#delete_in' => -> { cs.delete_in(str) },
12
14
  }
13
15
  )
14
16
 
15
17
  str = 'Lörem ipsüm ⛷ et dölörem'
16
18
  rx = /[\s\p{emoji}äüö]/
19
+ trt = "\t\n\v\f\r\s😀-🙏äüö"
17
20
  cs = CharacterSet.whitespace + CharacterSet.emoji + CharacterSet['ä', 'ö', 'ü']
18
21
 
19
22
  benchmark(
20
23
  caption: 'Removing whitespace, emoji and umlauts',
21
24
  cases: {
22
25
  'String#gsub' => -> { str.gsub(rx, '') },
26
+ 'String#tr' => -> { str.tr(trt, '') },
23
27
  'CharacterSet#delete_in' => -> { cs.delete_in(str) },
24
28
  }
25
29
  )
@@ -2,24 +2,28 @@ require_relative './shared'
2
2
 
3
3
  str = 'Lorem ipsum et dolorem'
4
4
  rx = /\S/
5
+ trt = "\u{0080}-\u{10FFFF}" # approximation
5
6
  cs = CharacterSet.whitespace
6
7
 
7
8
  benchmark(
8
9
  caption: 'Removing non-whitespace',
9
10
  cases: {
10
11
  'String#gsub' => -> { str.gsub(rx, '') },
12
+ 'String#tr' => -> { str.tr(trt, '') },
11
13
  'CharacterSet#keep_in' => -> { cs.keep_in(str) },
12
14
  }
13
15
  )
14
16
 
15
17
  str = 'Lorem ipsum ⛷ et dolorem'
16
18
  rx = /\p{^emoji}/
19
+ trt = "\u0000-\u{1F599}\u{1F650}-\u{10FFFF}"
17
20
  cs = CharacterSet.emoji
18
21
 
19
22
  benchmark(
20
- caption: 'Extracting emoji',
23
+ caption: 'Keeping only emoji',
21
24
  cases: {
22
25
  'String#gsub' => -> { str.gsub(rx, '') },
26
+ 'String#tr' => -> { str.tr(trt, '') },
23
27
  'CharacterSet#keep_in' => -> { cs.keep_in(str) },
24
28
  }
25
29
  )
@@ -0,0 +1,28 @@
1
+ require 'benchmark/ips'
2
+ require_relative '../../lib/character_set'
3
+ if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
4
+ require 'sorted_set'
5
+ else
6
+ require 'set'
7
+ end
8
+
9
+ def benchmark(caption: nil, cases: {})
10
+ with_stdouts($stdout, string_io = StringIO.new) do
11
+ puts caption
12
+ Benchmark.ips do |x|
13
+ cases.each { |label, callable| x.report(label, &callable) }
14
+ x.compare!
15
+ end
16
+ end
17
+ ($benchmark_results ||= {})[caption] = string_io.string
18
+ end
19
+
20
+ def with_stdouts(*ios)
21
+ old_stdout = $stdout
22
+ ios.define_singleton_method(:method_missing) { |*args| each { |io| io.send(*args) } }
23
+ ios.define_singleton_method(:respond_to?) { |*args| IO.respond_to?(*args) }
24
+ $stdout = ios
25
+ yield
26
+ ensure
27
+ $stdout = old_stdout
28
+ end
@@ -0,0 +1,20 @@
1
+ desc 'Download unicode casefold data and write new C header file'
2
+ task :sync_casefold_data do
3
+ src_path = './CaseFolding.txt'
4
+ dst_path = "#{__dir__}/../ext/character_set/unicode_casefold_table.h"
5
+
6
+ `wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt`
7
+
8
+ mapping = File.foreach(src_path).each_with_object({}) do |line, hash|
9
+ from, type, to = line.split(/\s*;\s*/).first(3)
10
+ # type 'C' stands for 'common', excludes mappings to multiple chars
11
+ hash[from] = to if type == 'C'
12
+ end.sort
13
+
14
+ content = File.read(dst_path + '.tmpl')
15
+ .sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
16
+ .sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))
17
+
18
+ File.write(dst_path, content)
19
+ File.unlink(src_path)
20
+ end
@@ -0,0 +1,9 @@
1
+ desc 'Update codepoint data for predefined sets, based on Onigmo'
2
+ task :sync_predefined_sets do
3
+ %w[assigned emoji whitespace].each do |prop|
4
+ require 'regexp_property_values'
5
+ ranges = RegexpPropertyValues[prop].matched_ranges
6
+ str = ranges.map { |r| "#{r.min.to_s(16)},#{r.max.to_s(16)}\n" }.join.upcase
7
+ File.write("#{__dir__}/../lib/character_set/predefined_sets/#{prop}.cps", str, mode: 'w')
8
+ end
9
+ end