character_set 1.6.0-java → 1.7.0-java
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/BENCHMARK.md +32 -32
- data/CHANGELOG.md +15 -1
- data/README.md +1 -1
- data/Rakefile +2 -123
- data/character_set.gemspec +0 -7
- data/ext/character_set/character_set.c +64 -43
- data/lib/character_set/parser.rb +8 -4
- data/lib/character_set/predefined_sets/assigned.cps +73 -52
- data/lib/character_set/predefined_sets/emoji.cps +10 -9
- data/lib/character_set/ruby_fallback/character_set_methods.rb +15 -14
- data/lib/character_set/ruby_fallback/set_methods.rb +4 -18
- data/lib/character_set/ruby_fallback/vendored_set_classes.rb +492 -0
- data/lib/character_set/ruby_fallback.rb +2 -6
- data/lib/character_set/shared_methods.rb +2 -2
- data/lib/character_set/version.rb +1 -1
- data/tasks/benchmark.rake +20 -0
- data/tasks/benchmarks/shared.rb +28 -0
- data/tasks/sync_casefold_data.rake +20 -0
- data/tasks/sync_predefined_sets.rake +9 -0
- data/tasks/sync_ruby_spec.rake +65 -0
- metadata +19 -28
- data/benchmarks/shared.rb +0 -30
- /data/{benchmarks → tasks/benchmarks}/count_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/cover.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/delete_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/keep_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/scan.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/used_by.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_add.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_delete.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_merge.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_minmax.rb +0 -0
@@ -10,8 +10,7 @@
|
|
10
10
|
591,5C7
|
11
11
|
5D0,5EA
|
12
12
|
5EF,5F4
|
13
|
-
600,
|
14
|
-
61E,70D
|
13
|
+
600,70D
|
15
14
|
70F,74A
|
16
15
|
74D,7B1
|
17
16
|
7C0,7FA
|
@@ -20,9 +19,9 @@
|
|
20
19
|
840,85B
|
21
20
|
85E,85E
|
22
21
|
860,86A
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
870,88E
|
23
|
+
890,891
|
24
|
+
898,983
|
26
25
|
985,98C
|
27
26
|
98F,990
|
28
27
|
993,9A8
|
@@ -100,11 +99,12 @@ C00,C0C
|
|
100
99
|
C0E,C10
|
101
100
|
C12,C28
|
102
101
|
C2A,C39
|
103
|
-
|
102
|
+
C3C,C44
|
104
103
|
C46,C48
|
105
104
|
C4A,C4D
|
106
105
|
C55,C56
|
107
106
|
C58,C5A
|
107
|
+
C5D,C5D
|
108
108
|
C60,C63
|
109
109
|
C66,C6F
|
110
110
|
C77,C8C
|
@@ -116,7 +116,7 @@ CBC,CC4
|
|
116
116
|
CC6,CC8
|
117
117
|
CCA,CCD
|
118
118
|
CD5,CD6
|
119
|
-
|
119
|
+
CDD,CDE
|
120
120
|
CE0,CE3
|
121
121
|
CE6,CEF
|
122
122
|
CF1,CF2
|
@@ -183,9 +183,8 @@ FCE,FDA
|
|
183
183
|
13F8,13FD
|
184
184
|
1400,169C
|
185
185
|
16A0,16F8
|
186
|
-
1700,
|
187
|
-
|
188
|
-
1720,1736
|
186
|
+
1700,1715
|
187
|
+
171F,1736
|
189
188
|
1740,1753
|
190
189
|
1760,176C
|
191
190
|
176E,1770
|
@@ -193,8 +192,7 @@ FCE,FDA
|
|
193
192
|
1780,17DD
|
194
193
|
17E0,17E9
|
195
194
|
17F0,17F9
|
196
|
-
1800,
|
197
|
-
1810,1819
|
195
|
+
1800,1819
|
198
196
|
1820,1878
|
199
197
|
1880,18AA
|
200
198
|
18B0,18F5
|
@@ -213,9 +211,9 @@ FCE,FDA
|
|
213
211
|
1A7F,1A89
|
214
212
|
1A90,1A99
|
215
213
|
1AA0,1AAD
|
216
|
-
1AB0,
|
217
|
-
1B00,
|
218
|
-
1B50,
|
214
|
+
1AB0,1ACE
|
215
|
+
1B00,1B4C
|
216
|
+
1B50,1B7E
|
219
217
|
1B80,1BF3
|
220
218
|
1BFC,1C37
|
221
219
|
1C3B,1C49
|
@@ -223,8 +221,7 @@ FCE,FDA
|
|
223
221
|
1C90,1CBA
|
224
222
|
1CBD,1CC7
|
225
223
|
1CD0,1CFA
|
226
|
-
1D00,
|
227
|
-
1DFB,1F15
|
224
|
+
1D00,1F15
|
228
225
|
1F18,1F1D
|
229
226
|
1F20,1F45
|
230
227
|
1F48,1F4D
|
@@ -244,16 +241,14 @@ FCE,FDA
|
|
244
241
|
2066,2071
|
245
242
|
2074,208E
|
246
243
|
2090,209C
|
247
|
-
20A0,
|
244
|
+
20A0,20C0
|
248
245
|
20D0,20F0
|
249
246
|
2100,218B
|
250
247
|
2190,2426
|
251
248
|
2440,244A
|
252
249
|
2460,2B73
|
253
250
|
2B76,2B95
|
254
|
-
2B97,
|
255
|
-
2C30,2C5E
|
256
|
-
2C60,2CF3
|
251
|
+
2B97,2CF3
|
257
252
|
2CF9,2D25
|
258
253
|
2D27,2D27
|
259
254
|
2D2D,2D2D
|
@@ -268,7 +263,7 @@ FCE,FDA
|
|
268
263
|
2DC8,2DCE
|
269
264
|
2DD0,2DD6
|
270
265
|
2DD8,2DDE
|
271
|
-
2DE0,
|
266
|
+
2DE0,2E5D
|
272
267
|
2E80,2E99
|
273
268
|
2E9B,2EF3
|
274
269
|
2F00,2FD5
|
@@ -280,14 +275,15 @@ FCE,FDA
|
|
280
275
|
3131,318E
|
281
276
|
3190,31E3
|
282
277
|
31F0,321E
|
283
|
-
3220,
|
284
|
-
A000,A48C
|
278
|
+
3220,A48C
|
285
279
|
A490,A4C6
|
286
280
|
A4D0,A62B
|
287
281
|
A640,A6F7
|
288
|
-
A700,
|
289
|
-
|
290
|
-
|
282
|
+
A700,A7CA
|
283
|
+
A7D0,A7D1
|
284
|
+
A7D3,A7D3
|
285
|
+
A7D5,A7D9
|
286
|
+
A7F2,A82C
|
291
287
|
A830,A839
|
292
288
|
A840,A877
|
293
289
|
A880,A8C5
|
@@ -322,12 +318,11 @@ FB38,FB3C
|
|
322
318
|
FB3E,FB3E
|
323
319
|
FB40,FB41
|
324
320
|
FB43,FB44
|
325
|
-
FB46,
|
326
|
-
FBD3,
|
327
|
-
FD50,FD8F
|
321
|
+
FB46,FBC2
|
322
|
+
FBD3,FD8F
|
328
323
|
FD92,FDC7
|
329
|
-
|
330
|
-
|
324
|
+
FDCF,FDCF
|
325
|
+
FDF0,FE19
|
331
326
|
FE20,FE52
|
332
327
|
FE54,FE66
|
333
328
|
FE68,FE6B
|
@@ -370,10 +365,20 @@ FFF9,FFFD
|
|
370
365
|
104D8,104FB
|
371
366
|
10500,10527
|
372
367
|
10530,10563
|
373
|
-
1056F,
|
368
|
+
1056F,1057A
|
369
|
+
1057C,1058A
|
370
|
+
1058C,10592
|
371
|
+
10594,10595
|
372
|
+
10597,105A1
|
373
|
+
105A3,105B1
|
374
|
+
105B3,105B9
|
375
|
+
105BB,105BC
|
374
376
|
10600,10736
|
375
377
|
10740,10755
|
376
378
|
10760,10767
|
379
|
+
10780,10785
|
380
|
+
10787,107B0
|
381
|
+
107B2,107BA
|
377
382
|
10800,10805
|
378
383
|
10808,10808
|
379
384
|
1080A,10835
|
@@ -417,11 +422,12 @@ FFF9,FFFD
|
|
417
422
|
10EB0,10EB1
|
418
423
|
10F00,10F27
|
419
424
|
10F30,10F59
|
425
|
+
10F70,10F89
|
420
426
|
10FB0,10FCB
|
421
427
|
10FE0,10FF6
|
422
428
|
11000,1104D
|
423
|
-
11052,
|
424
|
-
1107F,
|
429
|
+
11052,11075
|
430
|
+
1107F,110C2
|
425
431
|
110CD,110CD
|
426
432
|
110D0,110E8
|
427
433
|
110F0,110F9
|
@@ -463,11 +469,11 @@ FFF9,FFFD
|
|
463
469
|
11600,11644
|
464
470
|
11650,11659
|
465
471
|
11660,1166C
|
466
|
-
11680,
|
472
|
+
11680,116B9
|
467
473
|
116C0,116C9
|
468
474
|
11700,1171A
|
469
475
|
1171D,1172B
|
470
|
-
11730,
|
476
|
+
11730,11746
|
471
477
|
11800,1183B
|
472
478
|
118A0,118F2
|
473
479
|
118FF,11906
|
@@ -483,7 +489,7 @@ FFF9,FFFD
|
|
483
489
|
119DA,119E4
|
484
490
|
11A00,11A47
|
485
491
|
11A50,11AA2
|
486
|
-
|
492
|
+
11AB0,11AF8
|
487
493
|
11C00,11C08
|
488
494
|
11C0A,11C36
|
489
495
|
11C38,11C45
|
@@ -511,13 +517,15 @@ FFF9,FFFD
|
|
511
517
|
12400,1246E
|
512
518
|
12470,12474
|
513
519
|
12480,12543
|
520
|
+
12F90,12FF2
|
514
521
|
13000,1342E
|
515
522
|
13430,13438
|
516
523
|
14400,14646
|
517
524
|
16800,16A38
|
518
525
|
16A40,16A5E
|
519
526
|
16A60,16A69
|
520
|
-
16A6E,
|
527
|
+
16A6E,16ABE
|
528
|
+
16AC0,16AC9
|
521
529
|
16AD0,16AED
|
522
530
|
16AF0,16AF5
|
523
531
|
16B00,16B45
|
@@ -534,7 +542,10 @@ FFF9,FFFD
|
|
534
542
|
17000,187F7
|
535
543
|
18800,18CD5
|
536
544
|
18D00,18D08
|
537
|
-
|
545
|
+
1AFF0,1AFF3
|
546
|
+
1AFF5,1AFFB
|
547
|
+
1AFFD,1AFFE
|
548
|
+
1B000,1B122
|
538
549
|
1B150,1B152
|
539
550
|
1B164,1B167
|
540
551
|
1B170,1B2FB
|
@@ -543,9 +554,12 @@ FFF9,FFFD
|
|
543
554
|
1BC80,1BC88
|
544
555
|
1BC90,1BC99
|
545
556
|
1BC9C,1BCA3
|
557
|
+
1CF00,1CF2D
|
558
|
+
1CF30,1CF46
|
559
|
+
1CF50,1CFC3
|
546
560
|
1D000,1D0F5
|
547
561
|
1D100,1D126
|
548
|
-
1D129,
|
562
|
+
1D129,1D1EA
|
549
563
|
1D200,1D245
|
550
564
|
1D2E0,1D2F3
|
551
565
|
1D300,1D356
|
@@ -573,6 +587,7 @@ FFF9,FFFD
|
|
573
587
|
1D7CE,1DA8B
|
574
588
|
1DA9B,1DA9F
|
575
589
|
1DAA1,1DAAF
|
590
|
+
1DF00,1DF1E
|
576
591
|
1E000,1E006
|
577
592
|
1E008,1E018
|
578
593
|
1E01B,1E021
|
@@ -582,8 +597,13 @@ FFF9,FFFD
|
|
582
597
|
1E130,1E13D
|
583
598
|
1E140,1E149
|
584
599
|
1E14E,1E14F
|
600
|
+
1E290,1E2AE
|
585
601
|
1E2C0,1E2F9
|
586
602
|
1E2FF,1E2FF
|
603
|
+
1E7E0,1E7E6
|
604
|
+
1E7E8,1E7EB
|
605
|
+
1E7ED,1E7EE
|
606
|
+
1E7F0,1E7FE
|
587
607
|
1E800,1E8C4
|
588
608
|
1E8C7,1E8D6
|
589
609
|
1E900,1E94B
|
@@ -638,33 +658,34 @@ FFF9,FFFD
|
|
638
658
|
1F250,1F251
|
639
659
|
1F260,1F265
|
640
660
|
1F300,1F6D7
|
641
|
-
|
661
|
+
1F6DD,1F6EC
|
642
662
|
1F6F0,1F6FC
|
643
663
|
1F700,1F773
|
644
664
|
1F780,1F7D8
|
645
665
|
1F7E0,1F7EB
|
666
|
+
1F7F0,1F7F0
|
646
667
|
1F800,1F80B
|
647
668
|
1F810,1F847
|
648
669
|
1F850,1F859
|
649
670
|
1F860,1F887
|
650
671
|
1F890,1F8AD
|
651
672
|
1F8B0,1F8B1
|
652
|
-
1F900,
|
653
|
-
1F97A,1F9CB
|
654
|
-
1F9CD,1FA53
|
673
|
+
1F900,1FA53
|
655
674
|
1FA60,1FA6D
|
656
675
|
1FA70,1FA74
|
657
|
-
1FA78,
|
676
|
+
1FA78,1FA7C
|
658
677
|
1FA80,1FA86
|
659
|
-
1FA90,
|
660
|
-
1FAB0,
|
661
|
-
1FAC0,
|
662
|
-
1FAD0,
|
678
|
+
1FA90,1FAAC
|
679
|
+
1FAB0,1FABA
|
680
|
+
1FAC0,1FAC5
|
681
|
+
1FAD0,1FAD9
|
682
|
+
1FAE0,1FAE7
|
683
|
+
1FAF0,1FAF6
|
663
684
|
1FB00,1FB92
|
664
685
|
1FB94,1FBCA
|
665
686
|
1FBF0,1FBF9
|
666
|
-
20000,
|
667
|
-
2A700,
|
687
|
+
20000,2A6DF
|
688
|
+
2A700,2B738
|
668
689
|
2B740,2B81D
|
669
690
|
2B820,2CEA1
|
670
691
|
2CEB0,2EBE0
|
@@ -132,21 +132,22 @@ AE,AE
|
|
132
132
|
1F680,1F6C5
|
133
133
|
1F6CB,1F6D2
|
134
134
|
1F6D5,1F6D7
|
135
|
-
|
135
|
+
1F6DD,1F6E5
|
136
136
|
1F6E9,1F6E9
|
137
137
|
1F6EB,1F6EC
|
138
138
|
1F6F0,1F6F0
|
139
139
|
1F6F3,1F6FC
|
140
140
|
1F7E0,1F7EB
|
141
|
+
1F7F0,1F7F0
|
141
142
|
1F90C,1F93A
|
142
143
|
1F93C,1F945
|
143
|
-
1F947,
|
144
|
-
1F97A,1F9CB
|
145
|
-
1F9CD,1F9FF
|
144
|
+
1F947,1F9FF
|
146
145
|
1FA70,1FA74
|
147
|
-
1FA78,
|
146
|
+
1FA78,1FA7C
|
148
147
|
1FA80,1FA86
|
149
|
-
1FA90,
|
150
|
-
1FAB0,
|
151
|
-
1FAC0,
|
152
|
-
1FAD0,
|
148
|
+
1FA90,1FAAC
|
149
|
+
1FAB0,1FABA
|
150
|
+
1FAC0,1FAC5
|
151
|
+
1FAD0,1FAD9
|
152
|
+
1FAE0,1FAE7
|
153
|
+
1FAF0,1FAF6
|
@@ -8,7 +8,7 @@ class CharacterSet
|
|
8
8
|
|
9
9
|
def of_string(str)
|
10
10
|
raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
|
11
|
-
str.
|
11
|
+
str.encode('utf-8').each_codepoint.with_object(new) { |cp, set| set << cp }
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -40,16 +40,18 @@ class CharacterSet
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def count_in(string)
|
43
|
-
|
43
|
+
utf8_str!(string).each_codepoint.count { |cp| include?(cp) }
|
44
44
|
end
|
45
45
|
|
46
46
|
def cover?(string)
|
47
|
-
|
47
|
+
utf8_str!(string).each_codepoint { |cp| return false unless include?(cp) }
|
48
48
|
true
|
49
49
|
end
|
50
50
|
|
51
51
|
def delete_in(string)
|
52
|
-
|
52
|
+
utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
|
53
|
+
include?(cp) || (new_str << cp)
|
54
|
+
end.encode(string.encoding)
|
53
55
|
end
|
54
56
|
|
55
57
|
def delete_in!(string)
|
@@ -58,7 +60,9 @@ class CharacterSet
|
|
58
60
|
end
|
59
61
|
|
60
62
|
def keep_in(string)
|
61
|
-
|
63
|
+
utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
|
64
|
+
include?(cp) && (new_str << cp)
|
65
|
+
end.encode(string.encoding)
|
62
66
|
end
|
63
67
|
|
64
68
|
def keep_in!(string)
|
@@ -67,14 +71,13 @@ class CharacterSet
|
|
67
71
|
end
|
68
72
|
|
69
73
|
def scan(string)
|
70
|
-
|
71
|
-
|
72
|
-
include?(cp) ? arr.push(cp.chr(encoding)) : arr
|
74
|
+
utf8_str!(string).each_codepoint.with_object([]) do |cp, arr|
|
75
|
+
arr.push(cp.chr('utf-8')) if include?(cp)
|
73
76
|
end
|
74
77
|
end
|
75
78
|
|
76
79
|
def used_by?(string)
|
77
|
-
|
80
|
+
utf8_str!(string).each_codepoint { |cp| return true if include?(cp) }
|
78
81
|
false
|
79
82
|
end
|
80
83
|
|
@@ -115,15 +118,13 @@ class CharacterSet
|
|
115
118
|
num >= 0 && num <= 16 or raise ArgumentError, 'plane must be between 0 and 16'
|
116
119
|
end
|
117
120
|
|
118
|
-
def
|
121
|
+
def utf8_str!(obj)
|
119
122
|
raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
|
120
|
-
obj
|
123
|
+
obj.encode('utf-8')
|
121
124
|
end
|
122
125
|
|
123
126
|
def make_new_str(original, &block)
|
124
|
-
|
125
|
-
.each_codepoint
|
126
|
-
.each_with_object(''.encode(original.encoding), &block)
|
127
|
+
utf8_str!(original).each_codepoint.with_object('', &block)
|
127
128
|
end
|
128
129
|
end
|
129
130
|
end
|
@@ -3,7 +3,7 @@ class CharacterSet
|
|
3
3
|
module SetMethods
|
4
4
|
(Enumerable.instance_methods -
|
5
5
|
%i[include? member? to_a] +
|
6
|
-
%i[empty? length size]).each do |mthd|
|
6
|
+
%i[empty? hash length size]).each do |mthd|
|
7
7
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
8
8
|
def #{mthd}(*args, &block)
|
9
9
|
@__set.#{mthd}(*args, &block)
|
@@ -11,8 +11,8 @@ class CharacterSet
|
|
11
11
|
RUBY
|
12
12
|
end
|
13
13
|
|
14
|
-
%i[< <= > >= disjoint?
|
15
|
-
subset? superset?].each do |mthd|
|
14
|
+
%i[< <= > >= === disjoint? include? intersect? member?
|
15
|
+
proper_subset? proper_superset? subset? superset?].each do |mthd|
|
16
16
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
17
17
|
def #{mthd}(enum, &block)
|
18
18
|
if enum.is_a?(CharacterSet) || enum.is_a?(CharacterSet::Pure)
|
@@ -34,15 +34,6 @@ class CharacterSet
|
|
34
34
|
RUBY
|
35
35
|
end
|
36
36
|
|
37
|
-
# revert if https://github.com/knu/sorted_set/issues/2 is resolved
|
38
|
-
%i[=== include? member?].each do |mthd|
|
39
|
-
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
40
|
-
def #{mthd}(*args, &block)
|
41
|
-
!!@__set.#{mthd}(*args, &block)
|
42
|
-
end
|
43
|
-
RUBY
|
44
|
-
end
|
45
|
-
|
46
37
|
%i[& + - ^ | difference intersection union].each do |mthd|
|
47
38
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
48
39
|
def #{mthd}(enum, &block)
|
@@ -83,13 +74,8 @@ class CharacterSet
|
|
83
74
|
|
84
75
|
def eql?(other)
|
85
76
|
return false unless other.is_a?(self.class)
|
86
|
-
# revert if https://github.com/knu/sorted_set/issues/3 is resolved
|
87
|
-
hash == other.hash
|
88
|
-
end
|
89
77
|
|
90
|
-
|
91
|
-
def hash
|
92
|
-
@__set.to_a.hash
|
78
|
+
@__set.eql?(other.instance_variable_get(:@__set))
|
93
79
|
end
|
94
80
|
|
95
81
|
def initialize_dup(orig)
|