character_set 1.6.0-java → 1.7.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BENCHMARK.md +32 -32
- data/CHANGELOG.md +15 -1
- data/README.md +1 -1
- data/Rakefile +2 -123
- data/character_set.gemspec +0 -7
- data/ext/character_set/character_set.c +64 -43
- data/lib/character_set/parser.rb +8 -4
- data/lib/character_set/predefined_sets/assigned.cps +73 -52
- data/lib/character_set/predefined_sets/emoji.cps +10 -9
- data/lib/character_set/ruby_fallback/character_set_methods.rb +15 -14
- data/lib/character_set/ruby_fallback/set_methods.rb +4 -18
- data/lib/character_set/ruby_fallback/vendored_set_classes.rb +492 -0
- data/lib/character_set/ruby_fallback.rb +2 -6
- data/lib/character_set/shared_methods.rb +2 -2
- data/lib/character_set/version.rb +1 -1
- data/tasks/benchmark.rake +20 -0
- data/tasks/benchmarks/shared.rb +28 -0
- data/tasks/sync_casefold_data.rake +20 -0
- data/tasks/sync_predefined_sets.rake +9 -0
- data/tasks/sync_ruby_spec.rake +65 -0
- metadata +19 -28
- data/benchmarks/shared.rb +0 -30
- /data/{benchmarks → tasks/benchmarks}/count_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/cover.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/delete_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/keep_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/scan.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/used_by.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_add.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_delete.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_merge.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_minmax.rb +0 -0
@@ -0,0 +1,492 @@
|
|
1
|
+
# set and sorted_set are vendored due to various dependency issues:
|
2
|
+
#
|
3
|
+
# - issues with default vs. installed gems such as [#2]
|
4
|
+
# - issues with the sorted_set dependency rb_tree
|
5
|
+
# - long-standing issues in recent versions of sorted_set
|
6
|
+
#
|
7
|
+
# The RubyFallback (and thus these set classes), are only used for testing,
|
8
|
+
# and for exotic rubies which use neither C nor Java.
|
9
|
+
|
10
|
+
class CharacterSet
|
11
|
+
module RubyFallback
|
12
|
+
if RUBY_PLATFORM[/java/i]
|
13
|
+
# Vendoring is not needed for JRuby which has sorted_set in the stdlib.
|
14
|
+
require 'set'
|
15
|
+
|
16
|
+
Set = ::Set
|
17
|
+
SortedSet = ::SortedSet
|
18
|
+
else
|
19
|
+
# set, vendored from https://github.com/ruby/set/blob/master/lib/set.rb,
|
20
|
+
# with comments removed and linted.
|
21
|
+
class Set
|
22
|
+
include Enumerable
|
23
|
+
|
24
|
+
def self.[](*ary)
|
25
|
+
new(ary)
|
26
|
+
end
|
27
|
+
|
28
|
+
def initialize(enum = nil, &block)
|
29
|
+
@hash = Hash.new(false)
|
30
|
+
|
31
|
+
enum.nil? and return
|
32
|
+
|
33
|
+
if block
|
34
|
+
do_with_enum(enum) { |o| add(block[o]) }
|
35
|
+
else
|
36
|
+
merge(enum)
|
37
|
+
end
|
38
|
+
end
|
39
|
+
|
40
|
+
def compare_by_identity
|
41
|
+
if @hash.respond_to?(:compare_by_identity)
|
42
|
+
@hash.compare_by_identity
|
43
|
+
self
|
44
|
+
else
|
45
|
+
raise NotImplementedError, "#{self.class.name}\##{__method__} is not implemented"
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
def compare_by_identity?
|
50
|
+
@hash.respond_to?(:compare_by_identity?) && @hash.compare_by_identity?
|
51
|
+
end
|
52
|
+
|
53
|
+
def do_with_enum(enum, &block)
|
54
|
+
if enum.respond_to?(:each_entry)
|
55
|
+
enum.each_entry(&block) if block
|
56
|
+
elsif enum.respond_to?(:each)
|
57
|
+
enum.each(&block) if block
|
58
|
+
else
|
59
|
+
raise ArgumentError, "value must be enumerable"
|
60
|
+
end
|
61
|
+
end
|
62
|
+
private :do_with_enum
|
63
|
+
|
64
|
+
def initialize_dup(orig)
|
65
|
+
super
|
66
|
+
@hash = orig.instance_variable_get(:@hash).dup
|
67
|
+
end
|
68
|
+
|
69
|
+
if Kernel.instance_method(:initialize_clone).arity != 1
|
70
|
+
def initialize_clone(orig, **options)
|
71
|
+
super
|
72
|
+
@hash = orig.instance_variable_get(:@hash).clone(**options)
|
73
|
+
end
|
74
|
+
else
|
75
|
+
def initialize_clone(orig)
|
76
|
+
super
|
77
|
+
@hash = orig.instance_variable_get(:@hash).clone
|
78
|
+
end
|
79
|
+
end
|
80
|
+
|
81
|
+
def freeze
|
82
|
+
@hash.freeze
|
83
|
+
super
|
84
|
+
end
|
85
|
+
|
86
|
+
def size
|
87
|
+
@hash.size
|
88
|
+
end
|
89
|
+
alias length size
|
90
|
+
|
91
|
+
def empty?
|
92
|
+
@hash.empty?
|
93
|
+
end
|
94
|
+
|
95
|
+
def clear
|
96
|
+
@hash.clear
|
97
|
+
self
|
98
|
+
end
|
99
|
+
|
100
|
+
def replace(enum)
|
101
|
+
if enum.instance_of?(self.class)
|
102
|
+
@hash.replace(enum.instance_variable_get(:@hash))
|
103
|
+
self
|
104
|
+
else
|
105
|
+
do_with_enum(enum)
|
106
|
+
clear
|
107
|
+
merge(enum)
|
108
|
+
end
|
109
|
+
end
|
110
|
+
|
111
|
+
def to_a
|
112
|
+
@hash.keys
|
113
|
+
end
|
114
|
+
|
115
|
+
def to_set(klass = Set, *args, &block)
|
116
|
+
return self if instance_of?(Set) && klass == Set && block.nil? && args.empty?
|
117
|
+
klass.new(self, *args, &block)
|
118
|
+
end
|
119
|
+
|
120
|
+
def flatten_merge(set, seen = Set.new)
|
121
|
+
set.each { |e|
|
122
|
+
if e.is_a?(Set)
|
123
|
+
if seen.include?(e_id = e.object_id)
|
124
|
+
raise ArgumentError, "tried to flatten recursive Set"
|
125
|
+
end
|
126
|
+
|
127
|
+
seen.add(e_id)
|
128
|
+
flatten_merge(e, seen)
|
129
|
+
seen.delete(e_id)
|
130
|
+
else
|
131
|
+
add(e)
|
132
|
+
end
|
133
|
+
}
|
134
|
+
|
135
|
+
self
|
136
|
+
end
|
137
|
+
protected :flatten_merge
|
138
|
+
|
139
|
+
def flatten
|
140
|
+
self.class.new.flatten_merge(self)
|
141
|
+
end
|
142
|
+
|
143
|
+
def flatten!
|
144
|
+
replace(flatten()) if any? { |e| e.is_a?(Set) }
|
145
|
+
end
|
146
|
+
|
147
|
+
def include?(o)
|
148
|
+
@hash[o]
|
149
|
+
end
|
150
|
+
alias member? include?
|
151
|
+
|
152
|
+
def superset?(set)
|
153
|
+
case
|
154
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>=)
|
155
|
+
@hash >= set.instance_variable_get(:@hash)
|
156
|
+
when set.is_a?(Set)
|
157
|
+
size >= set.size && set.all? { |o| include?(o) }
|
158
|
+
else
|
159
|
+
raise ArgumentError, "value must be a set"
|
160
|
+
end
|
161
|
+
end
|
162
|
+
alias >= superset?
|
163
|
+
|
164
|
+
def proper_superset?(set)
|
165
|
+
case
|
166
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:>)
|
167
|
+
@hash > set.instance_variable_get(:@hash)
|
168
|
+
when set.is_a?(Set)
|
169
|
+
size > set.size && set.all? { |o| include?(o) }
|
170
|
+
else
|
171
|
+
raise ArgumentError, "value must be a set"
|
172
|
+
end
|
173
|
+
end
|
174
|
+
alias > proper_superset?
|
175
|
+
|
176
|
+
def subset?(set)
|
177
|
+
case
|
178
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<=)
|
179
|
+
@hash <= set.instance_variable_get(:@hash)
|
180
|
+
when set.is_a?(Set)
|
181
|
+
size <= set.size && all? { |o| set.include?(o) }
|
182
|
+
else
|
183
|
+
raise ArgumentError, "value must be a set"
|
184
|
+
end
|
185
|
+
end
|
186
|
+
alias <= subset?
|
187
|
+
|
188
|
+
def proper_subset?(set)
|
189
|
+
case
|
190
|
+
when set.instance_of?(self.class) && @hash.respond_to?(:<)
|
191
|
+
@hash < set.instance_variable_get(:@hash)
|
192
|
+
when set.is_a?(Set)
|
193
|
+
size < set.size && all? { |o| set.include?(o) }
|
194
|
+
else
|
195
|
+
raise ArgumentError, "value must be a set"
|
196
|
+
end
|
197
|
+
end
|
198
|
+
alias < proper_subset?
|
199
|
+
|
200
|
+
def <=>(set)
|
201
|
+
return unless set.is_a?(Set)
|
202
|
+
|
203
|
+
case size <=> set.size
|
204
|
+
when -1 then -1 if proper_subset?(set)
|
205
|
+
when +1 then +1 if proper_superset?(set)
|
206
|
+
else 0 if self.==(set)
|
207
|
+
end
|
208
|
+
end
|
209
|
+
|
210
|
+
def intersect?(set)
|
211
|
+
case set
|
212
|
+
when Set
|
213
|
+
if size < set.size
|
214
|
+
any? { |o| set.include?(o) }
|
215
|
+
else
|
216
|
+
set.any? { |o| include?(o) }
|
217
|
+
end
|
218
|
+
when Enumerable
|
219
|
+
set.any? { |o| include?(o) }
|
220
|
+
else
|
221
|
+
raise ArgumentError, "value must be enumerable"
|
222
|
+
end
|
223
|
+
end
|
224
|
+
|
225
|
+
def disjoint?(set)
|
226
|
+
!intersect?(set)
|
227
|
+
end
|
228
|
+
|
229
|
+
def each(&block)
|
230
|
+
block_given? or return enum_for(__method__) { size }
|
231
|
+
@hash.each_key(&block)
|
232
|
+
self
|
233
|
+
end
|
234
|
+
|
235
|
+
def add(o)
|
236
|
+
@hash[o] = true
|
237
|
+
self
|
238
|
+
end
|
239
|
+
alias << add
|
240
|
+
|
241
|
+
def add?(o)
|
242
|
+
add(o) unless include?(o)
|
243
|
+
end
|
244
|
+
|
245
|
+
def delete(o)
|
246
|
+
@hash.delete(o)
|
247
|
+
self
|
248
|
+
end
|
249
|
+
|
250
|
+
def delete?(o)
|
251
|
+
delete(o) if include?(o)
|
252
|
+
end
|
253
|
+
|
254
|
+
def delete_if
|
255
|
+
block_given? or return enum_for(__method__) { size }
|
256
|
+
select { |o| yield o }.each { |o| @hash.delete(o) }
|
257
|
+
self
|
258
|
+
end
|
259
|
+
|
260
|
+
def keep_if
|
261
|
+
block_given? or return enum_for(__method__) { size }
|
262
|
+
reject { |o| yield o }.each { |o| @hash.delete(o) }
|
263
|
+
self
|
264
|
+
end
|
265
|
+
|
266
|
+
def collect!
|
267
|
+
block_given? or return enum_for(__method__) { size }
|
268
|
+
set = self.class.new
|
269
|
+
each { |o| set << yield(o) }
|
270
|
+
replace(set)
|
271
|
+
end
|
272
|
+
alias map! collect!
|
273
|
+
|
274
|
+
def reject!(&block)
|
275
|
+
block_given? or return enum_for(__method__) { size }
|
276
|
+
n = size
|
277
|
+
delete_if(&block)
|
278
|
+
self if size != n
|
279
|
+
end
|
280
|
+
|
281
|
+
def select!(&block)
|
282
|
+
block_given? or return enum_for(__method__) { size }
|
283
|
+
n = size
|
284
|
+
keep_if(&block)
|
285
|
+
self if size != n
|
286
|
+
end
|
287
|
+
|
288
|
+
alias filter! select!
|
289
|
+
|
290
|
+
def merge(*enums, **_rest)
|
291
|
+
enums.each do |enum|
|
292
|
+
if enum.instance_of?(self.class)
|
293
|
+
@hash.update(enum.instance_variable_get(:@hash))
|
294
|
+
else
|
295
|
+
do_with_enum(enum) { |o| add(o) }
|
296
|
+
end
|
297
|
+
end
|
298
|
+
|
299
|
+
self
|
300
|
+
end
|
301
|
+
|
302
|
+
def subtract(enum)
|
303
|
+
do_with_enum(enum) { |o| delete(o) }
|
304
|
+
self
|
305
|
+
end
|
306
|
+
|
307
|
+
def |(enum)
|
308
|
+
dup.merge(enum)
|
309
|
+
end
|
310
|
+
alias + |
|
311
|
+
alias union |
|
312
|
+
|
313
|
+
def -(enum)
|
314
|
+
dup.subtract(enum)
|
315
|
+
end
|
316
|
+
alias difference -
|
317
|
+
|
318
|
+
def &(enum)
|
319
|
+
n = self.class.new
|
320
|
+
if enum.is_a?(Set)
|
321
|
+
if enum.size > size
|
322
|
+
each { |o| n.add(o) if enum.include?(o) }
|
323
|
+
else
|
324
|
+
enum.each { |o| n.add(o) if include?(o) }
|
325
|
+
end
|
326
|
+
else
|
327
|
+
do_with_enum(enum) { |o| n.add(o) if include?(o) }
|
328
|
+
end
|
329
|
+
n
|
330
|
+
end
|
331
|
+
alias intersection &
|
332
|
+
|
333
|
+
def ^(enum)
|
334
|
+
n = Set.new(enum)
|
335
|
+
each { |o| n.add(o) unless n.delete?(o) }
|
336
|
+
n
|
337
|
+
end
|
338
|
+
|
339
|
+
def ==(other)
|
340
|
+
if self.equal?(other)
|
341
|
+
true
|
342
|
+
elsif other.instance_of?(self.class)
|
343
|
+
@hash == other.instance_variable_get(:@hash)
|
344
|
+
elsif other.is_a?(Set) && self.size == other.size
|
345
|
+
other.all? { |o| @hash.include?(o) }
|
346
|
+
else
|
347
|
+
false
|
348
|
+
end
|
349
|
+
end
|
350
|
+
|
351
|
+
def hash
|
352
|
+
@hash.hash
|
353
|
+
end
|
354
|
+
|
355
|
+
def eql?(o)
|
356
|
+
return false unless o.is_a?(Set)
|
357
|
+
@hash.eql?(o.instance_variable_get(:@hash))
|
358
|
+
end
|
359
|
+
|
360
|
+
def reset
|
361
|
+
if @hash.respond_to?(:rehash)
|
362
|
+
@hash.rehash
|
363
|
+
else
|
364
|
+
raise FrozenError, "can't modify frozen #{self.class.name}" if frozen?
|
365
|
+
end
|
366
|
+
self
|
367
|
+
end
|
368
|
+
alias === include?
|
369
|
+
|
370
|
+
def classify
|
371
|
+
block_given? or return enum_for(__method__) { size }
|
372
|
+
|
373
|
+
h = {}
|
374
|
+
|
375
|
+
each { |i|
|
376
|
+
(h[yield(i)] ||= self.class.new).add(i)
|
377
|
+
}
|
378
|
+
|
379
|
+
h
|
380
|
+
end
|
381
|
+
|
382
|
+
def divide(&func)
|
383
|
+
func or return enum_for(__method__) { size }
|
384
|
+
|
385
|
+
if func.arity == 2
|
386
|
+
require 'tsort'
|
387
|
+
|
388
|
+
class << dig = {}
|
389
|
+
include TSort
|
390
|
+
|
391
|
+
alias tsort_each_node each_key
|
392
|
+
def tsort_each_child(node, &block)
|
393
|
+
fetch(node).each(&block)
|
394
|
+
end
|
395
|
+
end
|
396
|
+
|
397
|
+
each { |u|
|
398
|
+
dig[u] = a = []
|
399
|
+
each{ |v| func.call(u, v) and a << v }
|
400
|
+
}
|
401
|
+
|
402
|
+
set = Set.new()
|
403
|
+
dig.each_strongly_connected_component { |css|
|
404
|
+
set.add(self.class.new(css))
|
405
|
+
}
|
406
|
+
set
|
407
|
+
else
|
408
|
+
Set.new(classify(&func).values)
|
409
|
+
end
|
410
|
+
end
|
411
|
+
|
412
|
+
def join(separator=nil)
|
413
|
+
to_a.join(separator)
|
414
|
+
end
|
415
|
+
end
|
416
|
+
|
417
|
+
# sorted_set without rbtree dependency, vendored from
|
418
|
+
# https://github.com/ruby/set/blob/72f08c4/lib/set.rb#L731-L800
|
419
|
+
class SortedSet < Set
|
420
|
+
def initialize(*args)
|
421
|
+
@keys = nil
|
422
|
+
super
|
423
|
+
end
|
424
|
+
|
425
|
+
def clear
|
426
|
+
@keys = nil
|
427
|
+
super
|
428
|
+
end
|
429
|
+
|
430
|
+
def replace(enum)
|
431
|
+
@keys = nil
|
432
|
+
super
|
433
|
+
end
|
434
|
+
|
435
|
+
def add(o)
|
436
|
+
o.respond_to?(:<=>) or raise ArgumentError, "value must respond to <=>"
|
437
|
+
@keys = nil
|
438
|
+
super
|
439
|
+
end
|
440
|
+
alias << add
|
441
|
+
|
442
|
+
def delete(o)
|
443
|
+
@keys = nil
|
444
|
+
@hash.delete(o)
|
445
|
+
self
|
446
|
+
end
|
447
|
+
|
448
|
+
def delete_if
|
449
|
+
block_given? or return enum_for(__method__) { size }
|
450
|
+
n = @hash.size
|
451
|
+
super
|
452
|
+
@keys = nil if @hash.size != n
|
453
|
+
self
|
454
|
+
end
|
455
|
+
|
456
|
+
def keep_if
|
457
|
+
block_given? or return enum_for(__method__) { size }
|
458
|
+
n = @hash.size
|
459
|
+
super
|
460
|
+
@keys = nil if @hash.size != n
|
461
|
+
self
|
462
|
+
end
|
463
|
+
|
464
|
+
def merge(enum)
|
465
|
+
@keys = nil
|
466
|
+
super
|
467
|
+
end
|
468
|
+
|
469
|
+
def each(&block)
|
470
|
+
block or return enum_for(__method__) { size }
|
471
|
+
to_a.each(&block)
|
472
|
+
self
|
473
|
+
end
|
474
|
+
|
475
|
+
def to_a
|
476
|
+
(@keys = @hash.keys).sort! unless @keys
|
477
|
+
@keys.dup
|
478
|
+
end
|
479
|
+
|
480
|
+
def freeze
|
481
|
+
to_a
|
482
|
+
super
|
483
|
+
end
|
484
|
+
|
485
|
+
def rehash
|
486
|
+
@keys = nil
|
487
|
+
super
|
488
|
+
end
|
489
|
+
end
|
490
|
+
end
|
491
|
+
end
|
492
|
+
end
|
@@ -1,10 +1,6 @@
|
|
1
|
-
if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
|
2
|
-
require 'sorted_set'
|
3
|
-
else
|
4
|
-
require 'set'
|
5
|
-
end
|
6
1
|
require 'character_set/ruby_fallback/set_methods'
|
7
2
|
require 'character_set/ruby_fallback/character_set_methods'
|
3
|
+
require 'character_set/ruby_fallback/vendored_set_classes'
|
8
4
|
|
9
5
|
class CharacterSet
|
10
6
|
module RubyFallback
|
@@ -16,7 +12,7 @@ class CharacterSet
|
|
16
12
|
end
|
17
13
|
|
18
14
|
def initialize(enum = [])
|
19
|
-
@__set = SortedSet.new
|
15
|
+
@__set = CharacterSet::RubyFallback::SortedSet.new
|
20
16
|
super
|
21
17
|
end
|
22
18
|
end
|
@@ -165,8 +165,8 @@ class CharacterSet
|
|
165
165
|
end
|
166
166
|
|
167
167
|
def divide(&func)
|
168
|
-
|
169
|
-
Set.new(to_a).divide(&func)
|
168
|
+
require 'character_set/ruby_fallback/vendored_set_classes'
|
169
|
+
CharacterSet::RubyFallback::Set.new(to_a).divide(&func)
|
170
170
|
end
|
171
171
|
RUBY
|
172
172
|
|
@@ -0,0 +1,20 @@
|
|
1
|
+
desc 'Run all IPS benchmarks'
|
2
|
+
task :benchmark do
|
3
|
+
Dir["#{__dir__}/benchmarks/*.rb"].sort.each { |file| load(file) }
|
4
|
+
end
|
5
|
+
|
6
|
+
namespace :benchmark do
|
7
|
+
desc 'Run all IPS benchmarks and store the comparison results in BENCHMARK.md'
|
8
|
+
task :write_to_file do
|
9
|
+
Rake.application[:benchmark].invoke
|
10
|
+
|
11
|
+
# extract comparison results from reports
|
12
|
+
results = $benchmark_results
|
13
|
+
.map { |caption, report| "```\n#{caption}\n\n#{report[/(?<=Comparison:).+/m].strip}\n```" }
|
14
|
+
.join("\n")
|
15
|
+
.gsub(/ \(±[^)]+\) |(?<=same-ish).*/, '') # remove some noise
|
16
|
+
|
17
|
+
File.write "#{__dir__}/../BENCHMARK.md",
|
18
|
+
"Results of `rake:benchmark` on #{RUBY_DESCRIPTION}\n\n#{results}\n"
|
19
|
+
end
|
20
|
+
end
|
@@ -0,0 +1,28 @@
|
|
1
|
+
require 'benchmark/ips'
|
2
|
+
require_relative '../../lib/character_set'
|
3
|
+
if RUBY_VERSION.to_f >= 3.0 && !RUBY_PLATFORM[/java/i]
|
4
|
+
require 'sorted_set'
|
5
|
+
else
|
6
|
+
require 'set'
|
7
|
+
end
|
8
|
+
|
9
|
+
def benchmark(caption: nil, cases: {})
|
10
|
+
with_stdouts($stdout, string_io = StringIO.new) do
|
11
|
+
puts caption
|
12
|
+
Benchmark.ips do |x|
|
13
|
+
cases.each { |label, callable| x.report(label, &callable) }
|
14
|
+
x.compare!
|
15
|
+
end
|
16
|
+
end
|
17
|
+
($benchmark_results ||= {})[caption] = string_io.string
|
18
|
+
end
|
19
|
+
|
20
|
+
def with_stdouts(*ios)
|
21
|
+
old_stdout = $stdout
|
22
|
+
ios.define_singleton_method(:method_missing) { |*args| each { |io| io.send(*args) } }
|
23
|
+
ios.define_singleton_method(:respond_to?) { |*args| IO.respond_to?(*args) }
|
24
|
+
$stdout = ios
|
25
|
+
yield
|
26
|
+
ensure
|
27
|
+
$stdout = old_stdout
|
28
|
+
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
desc 'Download unicode casefold data and write new C header file'
|
2
|
+
task :sync_casefold_data do
|
3
|
+
src_path = './CaseFolding.txt'
|
4
|
+
dst_path = "#{__dir__}/../ext/character_set/unicode_casefold_table.h"
|
5
|
+
|
6
|
+
`wget http://www.unicode.org/Public/UNIDATA/CaseFolding.txt`
|
7
|
+
|
8
|
+
mapping = File.foreach(src_path).each_with_object({}) do |line, hash|
|
9
|
+
from, type, to = line.split(/\s*;\s*/).first(3)
|
10
|
+
# type 'C' stands for 'common', excludes mappings to multiple chars
|
11
|
+
hash[from] = to if type == 'C'
|
12
|
+
end.sort
|
13
|
+
|
14
|
+
content = File.read(dst_path + '.tmpl')
|
15
|
+
.sub(/(CASEFOLD_COUNT )0/, "\\1#{mapping.count}")
|
16
|
+
.sub('{}', ['{', mapping.map { |a, b| "{0x#{a},0x#{b}}," }, '}'].join("\n"))
|
17
|
+
|
18
|
+
File.write(dst_path, content)
|
19
|
+
File.unlink(src_path)
|
20
|
+
end
|
@@ -0,0 +1,9 @@
|
|
1
|
+
desc 'Update codepoint data for predefined sets, based on Onigmo'
|
2
|
+
task :sync_predefined_sets do
|
3
|
+
%w[assigned emoji whitespace].each do |prop|
|
4
|
+
require 'regexp_property_values'
|
5
|
+
ranges = RegexpPropertyValues[prop].matched_ranges
|
6
|
+
str = ranges.map { |r| "#{r.min.to_s(16)},#{r.max.to_s(16)}\n" }.join.upcase
|
7
|
+
File.write("#{__dir__}/../lib/character_set/predefined_sets/#{prop}.cps", str, mode: 'w')
|
8
|
+
end
|
9
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
desc 'Download relevant ruby/spec tests, adapt to CharacterSet and its variants'
|
2
|
+
task :sync_ruby_spec do
|
3
|
+
require 'fileutils'
|
4
|
+
|
5
|
+
variants = {
|
6
|
+
'CharacterSet' => "#{__dir__}/../spec/ruby-spec/library/character_set",
|
7
|
+
'CharacterSet::Pure' => "#{__dir__}/../spec/ruby-spec/library/character_set_pure",
|
8
|
+
}
|
9
|
+
|
10
|
+
# download fresh specs from ruby/spec repository
|
11
|
+
variants.each do |_, dir|
|
12
|
+
FileUtils.rm_rf(dir)
|
13
|
+
`svn export https://github.com/ruby/spec/trunk/library/set/sortedset #{dir}`
|
14
|
+
end
|
15
|
+
|
16
|
+
# make copies for each CharacterSet variant
|
17
|
+
base = variants.first[1]
|
18
|
+
variants.each_value { |dir| FileUtils.copy_entry(base, dir) unless dir == base }
|
19
|
+
|
20
|
+
# adapt specs to work with CharacterSet
|
21
|
+
variants.each do |class_name, dir|
|
22
|
+
Dir["#{dir}/**/*.rb"].each do |spec|
|
23
|
+
# ignore some tests that do not apply or are covered otherwise
|
24
|
+
if spec =~ %r{/(classify|divide|flatten|initialize|pretty_print)}
|
25
|
+
File.delete(spec)
|
26
|
+
next
|
27
|
+
end
|
28
|
+
|
29
|
+
adapted_content =
|
30
|
+
File.read(spec).
|
31
|
+
# adapt class name
|
32
|
+
gsub('SortedSet', (spec['/shared/'] ? 'variant' : class_name)).
|
33
|
+
gsub(/(it_behaves_like :[^,\n]+), (:[^,\n]+)/, "\\1, #{class_name}, \\2").
|
34
|
+
# get shared specs from a single shared dir at the parent level
|
35
|
+
gsub(/(require_relative ['"])(shared\/)/, '\1../\2').
|
36
|
+
# make 'mspec' syntax rspec-compatible
|
37
|
+
gsub(/describe (.*), shared.*$/, 'shared_examples \1 do |variant, method|').
|
38
|
+
gsub(/be_(false|true)/, 'be \1').
|
39
|
+
gsub('stub!', 'stub').
|
40
|
+
gsub('mock', 'double').
|
41
|
+
gsub('@method', 'method').
|
42
|
+
# remove unneeded requires
|
43
|
+
gsub(/require 'set'\n/, '').
|
44
|
+
gsub(/require.*spec_helper.*\n/, '').
|
45
|
+
gsub(/\A\n+/, '').
|
46
|
+
# make examples use Integers/codepoints
|
47
|
+
gsub(/1\.0|"cat"|"dog"|"hello"|"test"/, '0').
|
48
|
+
gsub('"one"', '1').
|
49
|
+
gsub('"two"', '2').
|
50
|
+
gsub('"three"', '3').
|
51
|
+
gsub('"four"', '4').
|
52
|
+
gsub('"five"', '5').
|
53
|
+
gsub(/x.(size|length) == 3/, 'x != 3').
|
54
|
+
gsub(/x.(size|length) != 3/, 'x == 3').
|
55
|
+
gsub(/(add)\(\d\)(\.to_a \}.should raise)/, '\1(:foo)\2')
|
56
|
+
|
57
|
+
File.open(spec, 'w') { |f| f.puts adapted_content }
|
58
|
+
end
|
59
|
+
end
|
60
|
+
|
61
|
+
# keep only one copy of the shared specs, at the parent level
|
62
|
+
FileUtils.rm_rf(base + '/../shared')
|
63
|
+
FileUtils.mv(base + '/shared', base + '/../')
|
64
|
+
variants.each_value { |dir| FileUtils.rm_rf(dir + '/shared') }
|
65
|
+
end
|