character_set 1.6.0-java → 1.7.0-java
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/BENCHMARK.md +32 -32
- data/CHANGELOG.md +15 -1
- data/README.md +1 -1
- data/Rakefile +2 -123
- data/character_set.gemspec +0 -7
- data/ext/character_set/character_set.c +64 -43
- data/lib/character_set/parser.rb +8 -4
- data/lib/character_set/predefined_sets/assigned.cps +73 -52
- data/lib/character_set/predefined_sets/emoji.cps +10 -9
- data/lib/character_set/ruby_fallback/character_set_methods.rb +15 -14
- data/lib/character_set/ruby_fallback/set_methods.rb +4 -18
- data/lib/character_set/ruby_fallback/vendored_set_classes.rb +492 -0
- data/lib/character_set/ruby_fallback.rb +2 -6
- data/lib/character_set/shared_methods.rb +2 -2
- data/lib/character_set/version.rb +1 -1
- data/tasks/benchmark.rake +20 -0
- data/tasks/benchmarks/shared.rb +28 -0
- data/tasks/sync_casefold_data.rake +20 -0
- data/tasks/sync_predefined_sets.rake +9 -0
- data/tasks/sync_ruby_spec.rake +65 -0
- metadata +19 -28
- data/benchmarks/shared.rb +0 -30
- /data/{benchmarks → tasks/benchmarks}/count_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/cover.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/delete_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/keep_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/scan.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/used_by.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_add.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_delete.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_merge.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_minmax.rb +0 -0
@@ -0,0 +1,492 @@
|
|
1
|
+
# set and sorted_set are vendored due to various dependency issues:
|
2
|
+
#
|
3
|
+
# - issues with default vs. installed gems such as [#2]
|
4
|
+
# - issues with the sorted_set dependency rb_tree
|
5
|
+
# - long-standing issues in recent versions of sorted_set
|
6
|
+
#
|
7
|
+
# The RubyFallback (and thus these set classes), are only used for testing,
|
8
|
+
# and for exotic rubies which use neither C nor Java.
|
9
|
+
|
10
|
+
class CharacterSet
|
11
|
+
module RubyFallback
|
12
|
+
if RUBY_PLATFORM[/java/i]
|
13
|
+
# Vendoring is not needed for JRuby which has sorted_set in the stdlib.
|
14
|
+
require 'set'
|
15
|
+
|
16
|
+
Set = ::Set
|
17
|
+
SortedSet = ::SortedSet
|
18
|
+
else
|
19
|
+
# set, vendored from https://github.com/ruby/set/blob/master/lib/set.rb,
|
20
|
+
# with comments removed and linted.
|
21
|
+
class Set
|
22
|
+
include Enumerable
|
23
|
+
|
24
|
+
def self.[](*ary)
|
25
|
+
new(ary)
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(enum = nil, &block)
|
29
|
+
@hash = Hash.new(false)
|
30
|
+
|
31
|
+
enum.nil? and return
|
32
|
+
|
33
|
+
if block
|
34
|
+
do_with_enum(enum) { |o| add(block[o]) }
|
35
|
+
else
|
36
|
+
merge(enum)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def compare_by_identity
|
41
|
+
if @hash.respond_to?(:compare_by_identity)
|
42
|
+
@hash.compare_by_identity
|
43
|
+
self
|
44
|
+
else
|
45
|
+
raise NotImplementedError, "#{self.class.name}\##{__method__} is not implemented"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def compare_by_identity?
|
50
|
+
@hash.respond_to?(:compare_by_identity?) && @hash.compare_by_identity?
|
51
|
+
end
|
52
|
+
|
53
|
+
def do_with_enum(enum, &block)
|
54
|
+
if enum.respond_to?(:each_entry)
|
55
|
+
enum.each_entry(&block) if block
|
56
|
+
elsif enum.respond_to?(:each)
|
57
|
+
enum.each(&block) if block
|
58
|
+
else
|
59
|
+
raise ArgumentError, "value must be enumerable"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
private :do_with_enum
|
63
|
+
|
64
|
+
def initialize_dup(orig)
|
65
|
+
super
|
66
|
+
@hash = orig.instance_variable_get(:@hash).dup
|
67
|
+
end
|
68
|
+
|
69
|
+
if Kernel.instance_method(:initialize_clone).arity != 1
|
70
|
+
def initialize_clone(orig, **options)
|
71
|
+
super
|
72
|
+
@hash = orig.instance_variable_get(:@hash).clone(**options)
|
73
|
+
end
|
74
|
+
else
|
75
|
+
def initialize_clone(orig)
|
76
|
+
super
|
77
|
+
@hash = orig.instance_variable_get(:@hash).clone
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def freeze
|
82
|
+
@hash.freeze
|
83
|
+
super
|
84
|
+
end
|
85
|
+
|
86
|
+
def size
|
87
|
+
@hash.size
|
88
|
+
end
|
89
|
+
alias length size
|
90
|
+
|
91
|
+
def empty?
|
92
|
+
@hash.empty?
|
93
|
+
end
|
94
|
+
|
95
|
+
def clear
|
96
|
+
@hash.clear
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
def replace(enum)
|
101
|
+
if enum.instance_of?(self.class)
|
102
|
+
@hash.replace(enum.instance_variable_get(:@hash))
|
103
|
+
self
|
104
|
+
else
|
105
|
+
do_with_enum(enum)
|
106
|
+
clear
|
107
|
+
merge(enum)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def to_a
|
112
|
+
@hash.keys
|
113
|
+
end
|
114
|
+
|
115
|
+
def to_set(klass = Set, *args, &block)
|
116
|
+
return self if instance_of?(Set) && klass == Set && block.nil? && args.empty?
|
117
|
+
klass.new(self, *args, &block)
|
118
|
+
end
|
119
|
+
|
120
|
+
def flatten_merge(set, seen = Set.new)
|
121
|
+
set.each { |e|
|
122
|
+
if e.is_a?(Set)
|
123
|
+
if seen.include?(e_id = e.object_id)
|
124
|
+
raise ArgumentError, "tried to flatten recursive Set"
|
125
|
+
end
|
126
|
+
|
127
|
+
seen.add(e_id)
|
128
|
+
flatten_merge(e, seen)
|
129
|
+
seen.delete(e_id)
|
130
|
+
else
|
131
|
+
add(e)
|
132
|
+
end
|
133
|
+
}
|
134
|
+
|
135
|
+
self
|
136
|
+
end
|
137
|
+
protected :flatten_merge
|
138
|
+
|
139
|
+
def flatten
|
140
|
+
self.class.new.flatten_merge(self)
|
141
|
+
end
|
142
|
+
|
143
|
+
def flatten!
|
144
|
+
replace(flatten()) if any? { |e| e.is_a?(Set) }
|
145
|
+
end
|
146
|
+
|
147
|
+
def include?(o)
|
148
|
+
@hash[o]
|
149
|
+
end
|
150
|
+
alias member? include?
|
151
|
+
|
152
|
+
def superset?(set)
|
153
|
+
case
|
154
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>=)
|
155
|
+
@hash >= set.instance_variable_get(:@hash)
|
156
|
+
when set.is_a?(Set)
|
157
|
+
size >= set.size && set.all? { |o| include?(o) }
|
158
|
+
else
|
159
|
+
raise ArgumentError, "value must be a set"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
alias >= superset?
|
163
|
+
|
164
|
+
def proper_superset?(set)
|
165
|
+
case
|
166
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>)
|
167
|
+
@hash > set.instance_variable_get(:@hash)
|
168
|
+
when set.is_a?(Set)
|
169
|
+
size > set.size && set.all? { |o| include?(o) }
|
170
|
+
else
|
171
|
+
raise ArgumentError, "value must be a set"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
alias > proper_superset?
|
175
|
+
|
176
|
+
def subset?(set)
|
177
|
+
case
|
178
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<=)
|
179
|
+
@hash <= set.instance_variable_get(:@hash)
|
180
|
+
when set.is_a?(Set)
|
181
|
+
size <= set.size && all? { |o| set.include?(o) }
|
182
|
+
else
|
183
|
+
raise ArgumentError, "value must be a set"
|
184
|
+
end
|
185
|
+
end
|
186
|
+
alias <= subset?
|
187
|
+
|
188
|
+
def proper_subset?(set)
|
189
|
+
case
|
190
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<)
|
191
|
+
@hash < set.instance_variable_get(:@hash)
|
192
|
+
when set.is_a?(Set)
|
193
|
+
size < set.size && all? { |o| set.include?(o) }
|
194
|
+
else
|
195
|
+
raise ArgumentError, "value must be a set"
|
196
|
+
end
|
197
|
+
end
|
198
|
+
alias < proper_subset?
|
199
|
+
|
200
|
+
def <=>(set)
|
201
|
+
return unless set.is_a?(Set)
|
202
|
+
|
203
|
+
case size <=> set.size
|
204
|
+
when -1 then -1 if proper_subset?(set)
|
205
|
+
when +1 then +1 if proper_superset?(set)
|
206
|
+
else 0 if self.==(set)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def intersect?(set)
|
211
|
+
case set
|
212
|
+
when Set
|
213
|
+
if size < set.size
|
214
|
+
any? { |o| set.include?(o) }
|
215
|
+
else
|
216
|
+
set.any? { |o| include?(o) }
|
217
|
+
end
|
218
|
+
when Enumerable
|
219
|
+
set.any? { |o| include?(o) }
|
220
|
+
else
|
221
|
+
raise ArgumentError, "value must be enumerable"
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def disjoint?(set)
|
226
|
+
!intersect?(set)
|
227
|
+
end
|
228
|
+
|
229
|
+
def each(&block)
|
230
|
+
block_given? or return enum_for(__method__) { size }
|
231
|
+
@hash.each_key(&block)
|
232
|
+
self
|
233
|
+
end
|
234
|
+
|
235
|
+
def add(o)
|
236
|
+
@hash[o] = true
|
237
|
+
self
|
238
|
+
end
|
239
|
+
alias << add
|
240
|
+
|
241
|
+
def add?(o)
|
242
|
+
add(o) unless include?(o)
|
243
|
+
end
|
244
|
+
|
245
|
+
def delete(o)
|
246
|
+
@hash.delete(o)
|
247
|
+
self
|
248
|
+
end
|
249
|
+
|
250
|
+
def delete?(o)
|
251
|
+
delete(o) if include?(o)
|
252
|
+
end
|
253
|
+
|
254
|
+
def delete_if
|
255
|
+
block_given? or return enum_for(__method__) { size }
|
256
|
+
select { |o| yield o }.each { |o| @hash.delete(o) }
|
257
|
+
self
|
258
|
+
end
|
259
|
+
|
260
|
+
def keep_if
|
261
|
+
block_given? or return enum_for(__method__) { size }
|
262
|
+
reject { |o| yield o }.each { |o| @hash.delete(o) }
|
263
|
+
self
|
264
|
+
end
|
265
|
+
|
266
|
+
def collect!
|
267
|
+
block_given? or return enum_for(__method__) { size }
|
268
|
+
set = self.class.new
|
269
|
+
each { |o| set << yield(o) }
|
270
|
+
replace(set)
|
271
|
+
end
|
272
|
+
alias map! collect!
|
273
|
+
|
274
|
+
def reject!(&block)
|
275
|
+
block_given? or return enum_for(__method__) { size }
|
276
|
+
n = size
|
277
|
+
delete_if(&block)
|
278
|
+
self if size != n
|
279
|
+
end
|
280
|
+
|
281
|
+
def select!(&block)
|
282
|
+
block_given? or return enum_for(__method__) { size }
|
283
|
+
n = size
|
284
|
+
keep_if(&block)
|
285
|
+
self if size != n
|
286
|
+
end
|
287
|
+
|
288
|
+
alias filter! select!
|
289
|
+
|
290
|
+
def merge(*enums, **_rest)
|
291
|
+
enums.each do |enum|
|
292
|
+
if enum.instance_of?(self.class)
|
293
|
+
@hash.update(enum.instance_variable_get(:@hash))
|
294
|
+
else
|
295
|
+
do_with_enum(enum) { |o| add(o) }
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
self
|
300
|
+
end
|
301
|
+
|
302
|
+
def subtract(enum)
|
303
|
+
do_with_enum(enum) { |o| delete(o) }
|
304
|
+
self
|
305
|
+
end
|
306
|
+
|
307
|
+
def |(enum)
|
308
|
+
dup.merge(enum)
|
309
|
+
end
|
310
|
+
alias + |
|
311
|
+
alias union |
|
312
|
+
|
313
|
+
def -(enum)
|
314
|
+
dup.subtract(enum)
|
315
|
+
end
|
316
|
+
alias difference -
|
317
|
+
|
318
|
+
def &(enum)
|
319
|
+
n = self.class.new
|
320
|
+
if enum.is_a?(Set)
|
321
|
+
if enum.size > size
|
322
|
+
each { |o| n.add(o) if enum.include?(o) }
|
323
|
+
else
|
324
|
+
enum.each { |o| n.add(o) if include?(o) }
|
325
|
+
end
|
326
|
+
else
|
327
|
+
do_with_enum(enum) { |o| n.add(o) if include?(o) }
|
328
|
+
end
|
329
|
+
n
|
330
|
+
end
|
331
|
+
alias intersection &
|
332
|
+
|
333
|
+
def ^(enum)
|
334
|
+
n = Set.new(enum)
|
335
|
+
each { |o| n.add(o) unless n.delete?(o) }
|
336
|
+
n
|
337
|
+
end
|
338
|
+
|
339
|
+
def ==(other)
|
340
|
+
if self.equal?(other)
|
341
|
+
true
|
342
|
+
elsif other.instance_of?(self.class)
|
343
|
+
@hash == other.instance_variable_get(:@hash)
|
344
|
+
elsif other.is_a?(Set) && self.size == other.size
|
345
|
+
other.all? { |o| @hash.include?(o) }
|
346
|
+
else
|
347
|
+
false
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
def hash
|
352
|
+
@hash.hash
|
353
|
+
end
|
354
|
+
|
355
|
+
def eql?(o)
|
356
|
+
return false unless o.is_a?(Set)
|
357
|
+
@hash.eql?(o.instance_variable_get(:@hash))
|
358
|
+
end
|
359
|
+
|
360
|
+
def reset
|
361
|
+
if @hash.respond_to?(:rehash)
|
362
|
+
@hash.rehash
|
363
|
+
else
|
364
|
+
raise FrozenError, "can't modify frozen #{self.class.name}" if frozen?
|
365
|
+
end
|
366
|
+
self
|
367
|
+
end
|
368
|
+
alias === include?
|
369
|
+
|
370
|
+
def classify
|
371
|
+
block_given? or return enum_for(__method__) { size }
|
372
|
+
|
373
|
+
h = {}
|
374
|
+
|
375
|
+
each { |i|
|
376
|
+
(h[yield(i)] ||= self.class.new).add(i)
|
377
|
+
}
|
378
|
+
|
379
|
+
h
|
380
|
+
end
|
381
|
+
|
382
|
+
def divide(&func)
|
383
|
+
func or return enum_for(__method__) { size }
|
384
|
+
|
385
|
+
if func.arity == 2
|
386
|
+
require 'tsort'
|
387
|
+
|
388
|
+
class << dig = {}
|
389
|
+
include TSort
|
390
|
+
|
391
|
+
alias tsort_each_node each_key
|
392
|
+
def tsort_each_child(node, &block)
|
393
|
+
fetch(node).each(&block)
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
each { |u|
|
398
|
+
dig[u] = a = []
|
399
|
+
each{ |v| func.call(u, v) and a << v }
|
400
|
+
}
|
401
|
+
|
402
|
+
set = Set.new()
|
403
|
+
dig.each_strongly_connected_component { |css|
|
404
|
+
set.add(self.class.new(css))
|
405
|
+
}
|
406
|
+
set
|
407
|
+
else
|
408
|
+
Set.new(classify(&func).values)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
def join(separator=nil)
|
413
|
+
to_a.join(separator)
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
# sorted_set without rbtree dependency, vendored from
|
418
|
+
# https://github.com/ruby/set/blob/72f08c4/lib/set.rb#L731-L800
|
419
|
+
class SortedSet < Set
|
420
|
+
def initialize(*args)
|
421
|
+
@keys = nil
|
422
|
+
super
|
423
|
+
end
|
424
|
+
|
425
|
+
def clear
|
426
|
+
@keys = nil
|
427
|
+
super
|
428
|
+
end
|
429
|
+
|
430
|
+
def replace(enum)
|
431
|
+
@keys = nil
|
432
|
+
super
|
433
|
+
end
|
434
|
+
|
435
|
+
def add(o)
|
436
|
+
o.respond_to?(:<=>) or raise ArgumentError, "value must respond to <=>"
|
437
|
+
@keys = nil
|
438
|
+
super
|
439
|
+
end
|
440
|
+
alias << add
|
441
|
+
|
442
|
+
def delete(o)
|
443
|
+
@keys = nil
|
444
|
+
@hash.delete(o)
|
445
|
+
self
|
446
|
+
end
|
447
|
+
|
448
|
+
def delete_if
|
449
|
+
block_given? or return enum_for(__method__) { size }
|
450
|
+
n = @hash.size
|
451
|
+
super
|
452
|
+
@keys = nil if @hash.size != n
|
453
|
+
self
|
454
|
+
end
|
455
|
+
|
456
|
+
def keep_if
|
457
|
+
block_given? or return enum_for(__method__) { size }
|
458
|
+
n = @hash.size
|
459
|
+
super
|
460
|
+
@keys = nil if @hash.size != n
|
461
|
+
self
|
462
|
+
end
|
463
|
+
|
464
|
+
def merge(enum)
|
465
|
+
@keys = nil
|
466
|
+
super
|
467
|
+
end
|
468
|
+
|
469
|
+
def each(&block)
|
470
|
+
block or return enum_for(__method__) { size }
|
471
|
+
to_a.each(&block)
|
472
|
+
self
|
473
|
+
end
|
474
|
+
|
475
|
+
def to_a
|
476
|
+
(@keys = @hash.keys).sort! unless @keys
|
477
|
+
@keys.dup
|
478
|
+
end
|
479
|
+
|
480
|
+
def freeze
|
481
|
+
to_a
|
482
|
+
super
|
483
|
+
end
|
484
|
+
|
485
|
+
def rehash
|
486
|
+
@keys = nil
|
487
|
+
super
|
488
|
+
end
|
489
|
+
end
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|
@@ -1,10 +1,6 @@
|
|
1
|
-
if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
|
2
|
-
require 'sorted_set'
|
3
|
-
else
|
4
|
-
require 'set'
|
5
|
-
end
|
6
1
|
require 'character_set/ruby_fallback/set_methods'
|
7
2
|
require 'character_set/ruby_fallback/character_set_methods'
|
3
|
+
require 'character_set/ruby_fallback/vendored_set_classes'
|
8
4
|
|
9
5
|
class CharacterSet
|
10
6
|
module RubyFallback
|
@@ -16,7 +12,7 @@ class CharacterSet
|
|
16
12
|
end
|
17
13
|
|
18
14
|
def initialize(enum = [])
|
19
|
-
@__set = SortedSet.new
|
15
|
+
@__set = CharacterSet::RubyFallback::SortedSet.new
|
20
16
|
super
|
21
17
|
end
|
22
18
|
end
|
@@ -165,8 +165,8 @@ class CharacterSet
|
|
165
165
|
end
|
166
166
|
|
167
167
|
def divide(&func)
|
168
|
-
|
169
|
-
Set.new(to_a).divide(&func)
|
168
|
+
require 'character_set/ruby_fallback/vendored_set_classes'
|
169
|
+
CharacterSet::RubyFallback::Set.new(to_a).divide(&func)
|
170
170
|
end
|
171
171
|
RUBY
|
172
172
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
desc 'Run all IPS benchmarks'
|
2
|
+
task :benchmark do
|
3
|
+
Dir["#{__dir__}/benchmarks/*.rb"].sort.each { |file| load(file) }
|
4
|
+
end
|
5
|
+
|
6
|
+
namespace :benchmark do
|
7
|
+
desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
|
8
|
+
task :write_to_file do
|
9
|
+
Rake.application[:benchmark].invoke
|
10
|
+
|
11
|
+
# extract comparison results from reports
|
12
|
+
results = $benchmark_results
|
13
|
+
.map { |caption, report| "```\n#{caption}\n\n#{report[/(?<=Comparison:).+/m].strip}\n```" }
|
14
|
+
.join("\n")
|
15
|
+
.gsub(/ \(±[^)]+\) |(?<=same-ish).*/, '') # remove some noise
|
16
|
+
|
17
|
+
File.write "#{__dir__}/../BENCHMARK.md",
|
18
|
+
"Results of `rake:benchmark` on #{RUBY_DESCRIPTION}\n\n#{results}\n"
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'benchmark/ips'
|
2
|
+
require_relative '../../lib/character_set'
|
3
|
+
if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
|
4
|
+
require 'sorted_set'
|
5
|
+
else
|
6
|
+
require 'set'
|
7
|
+
end
|
8
|
+
|
9
|
+
def benchmark(caption: nil, cases: {})
|
10
|
+
with_stdouts($stdout, string_io = StringIO.new) do
|
11
|
+
puts caption
|
12
|
+
Benchmark.ips do |x|
|
13
|
+
cases.each { |label, callable| x.report(label, &callable) }
|
14
|
+
x.compare!
|
15
|
+
end
|
16
|
+
end
|
17
|
+
($benchmark_results ||= {})[caption] = string_io.string
|
18
|
+
end
|
19
|
+
|
20
|
+
def with_stdouts(*ios)
|
21
|
+
old_stdout = $stdout
|
22
|
+
ios.define_singleton_method(:method_missing) { |*args| each { |io| io.send(*args) } }
|
23
|
+
ios.define_singleton_method(:respond_to?) { |*args| IO.respond_to?(*args) }
|
24
|
+
$stdout = ios
|
25
|
+
yield
|
26
|
+
ensure
|
27
|
+
$stdout = old_stdout
|
28
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
desc 'Download unicode casefold data and write new C header file'
|
2
|
+
task :sync_casefold_data do
|
3
|
+
src_path = './CaseFolding.txt'
|
4
|
+
dst_path = "#{__dir__}/../ext/character_set/unicode_casefold_table.h"
|
5
|
+
|
6
|
+
`wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt`
|
7
|
+
|
8
|
+
mapping = File.foreach(src_path).each_with_object({}) do |line, hash|
|
9
|
+
from, type, to = line.split(/\s*;\s*/).first(3)
|
10
|
+
# type 'C' stands for 'common', excludes mappings to multiple chars
|
11
|
+
hash[from] = to if type == 'C'
|
12
|
+
end.sort
|
13
|
+
|
14
|
+
content = File.read(dst_path + '.tmpl')
|
15
|
+
.sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
|
16
|
+
.sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))
|
17
|
+
|
18
|
+
File.write(dst_path, content)
|
19
|
+
File.unlink(src_path)
|
20
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
desc 'Update codepoint data for predefined sets, based on Onigmo'
|
2
|
+
task :sync_predefined_sets do
|
3
|
+
%w[assigned emoji whitespace].each do |prop|
|
4
|
+
require 'regexp_property_values'
|
5
|
+
ranges = RegexpPropertyValues[prop].matched_ranges
|
6
|
+
str = ranges.map { |r| "#{r.min.to_s(16)},#{r.max.to_s(16)}\n" }.join.upcase
|
7
|
+
File.write("#{__dir__}/../lib/character_set/predefined_sets/#{prop}.cps", str, mode: 'w')
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
desc 'Download relevant ruby/spec tests, adapt to CharacterSet and its variants'
|
2
|
+
task :sync_ruby_spec do
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
variants = {
|
6
|
+
'CharacterSet' => "#{__dir__}/../spec/ruby-spec/library/character_set",
|
7
|
+
'CharacterSet::Pure' => "#{__dir__}/../spec/ruby-spec/library/character_set_pure",
|
8
|
+
}
|
9
|
+
|
10
|
+
# download fresh specs from ruby/spec repository
|
11
|
+
variants.each do |_, dir|
|
12
|
+
FileUtils.rm_rf(dir)
|
13
|
+
`svn export https://github.com/ruby/spec/trunk/library/set/sortedset #{dir}`
|
14
|
+
end
|
15
|
+
|
16
|
+
# make copies for each CharacterSet variant
|
17
|
+
base = variants.first[1]
|
18
|
+
variants.each_value { |dir| FileUtils.copy_entry(base, dir) unless dir == base }
|
19
|
+
|
20
|
+
# adapt specs to work with CharacterSet
|
21
|
+
variants.each do |class_name, dir|
|
22
|
+
Dir["#{dir}/**/*.rb"].each do |spec|
|
23
|
+
# ignore some tests that do not apply or are covered otherwise
|
24
|
+
if spec =~ %r{/(classify|divide|flatten|initialize|pretty_print)}
|
25
|
+
File.delete(spec)
|
26
|
+
next
|
27
|
+
end
|
28
|
+
|
29
|
+
adapted_content =
|
30
|
+
File.read(spec).
|
31
|
+
# adapt class name
|
32
|
+
gsub('SortedSet', (spec['/shared/'] ? 'variant' : class_name)).
|
33
|
+
gsub(/(it_behaves_like :[^,\n]+), (:[^,\n]+)/, "\\1, #{class_name}, \\2").
|
34
|
+
# get shared specs from a single shared dir at the parent level
|
35
|
+
gsub(/(require_relative ['"])(shared\/)/, '\1../\2').
|
36
|
+
# make 'mspec' syntax rspec-compatible
|
37
|
+
gsub(/describe (.*), shared.*$/, 'shared_examples \1 do |variant, method|').
|
38
|
+
gsub(/be_(false|true)/, 'be \1').
|
39
|
+
gsub('stub!', 'stub').
|
40
|
+
gsub('mock', 'double').
|
41
|
+
gsub('@method', 'method').
|
42
|
+
# remove unneeded requires
|
43
|
+
gsub(/require 'set'\n/, '').
|
44
|
+
gsub(/require.*spec_helper.*\n/, '').
|
45
|
+
gsub(/\A\n+/, '').
|
46
|
+
# make examples use Integers/codepoints
|
47
|
+
gsub(/1\.0|"cat"|"dog"|"hello"|"test"/, '0').
|
48
|
+
gsub('"one"', '1').
|
49
|
+
gsub('"two"', '2').
|
50
|
+
gsub('"three"', '3').
|
51
|
+
gsub('"four"', '4').
|
52
|
+
gsub('"five"', '5').
|
53
|
+
gsub(/x.(size|length) == 3/, 'x != 3').
|
54
|
+
gsub(/x.(size|length) != 3/, 'x == 3').
|
55
|
+
gsub(/(add)\(\d\)(\.to_a \}.should raise)/, '\1(:foo)\2')
|
56
|
+
|
57
|
+
File.open(spec, 'w') { |f| f.puts adapted_content }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# keep only one copy of the shared specs, at the parent level
|
62
|
+
FileUtils.rm_rf(base + '/../shared')
|
63
|
+
FileUtils.mv(base + '/shared', base + '/../')
|
64
|
+
variants.each_value { |dir| FileUtils.rm_rf(dir + '/shared') }
|
65
|
+
end
|