character_set 1.4.0 → 1.8.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitattributes +1 -1
- data/.github/workflows/gouteur.yml +20 -0
- data/.github/workflows/lint.yml +29 -0
- data/.github/workflows/tests.yml +28 -0
- data/.gitignore +1 -0
- data/.gouteur.yml +2 -0
- data/.rubocop.yml +20 -0
- data/BENCHMARK.md +35 -31
- data/CHANGELOG.md +64 -1
- data/Gemfile +15 -0
- data/LICENSE.txt +1 -1
- data/README.md +25 -9
- data/Rakefile +2 -120
- data/character_set.gemspec +0 -10
- data/ext/character_set/character_set.c +123 -121
- data/ext/character_set/unicode_casefold_table.h +44 -1
- data/lib/character_set/core_ext/regexp_ext.rb +9 -1
- data/lib/character_set/core_ext/string_ext.rb +2 -2
- data/lib/character_set/expression_converter.rb +40 -56
- data/lib/character_set/parser.rb +8 -4
- data/lib/character_set/predefined_sets/assigned.cps +110 -78
- data/lib/character_set/predefined_sets/emoji.cps +16 -14
- data/lib/character_set/predefined_sets.rb +11 -0
- data/lib/character_set/ruby_fallback/character_set_methods.rb +17 -21
- data/lib/character_set/ruby_fallback/set_methods.rb +9 -16
- data/lib/character_set/ruby_fallback/vendored_set_classes.rb +385 -0
- data/lib/character_set/ruby_fallback.rb +18 -2
- data/lib/character_set/set_method_adapters.rb +4 -3
- data/lib/character_set/shared_methods.rb +25 -11
- data/lib/character_set/version.rb +1 -1
- data/tasks/benchmark.rake +20 -0
- data/{benchmarks → tasks/benchmarks}/delete_in.rb +5 -1
- data/{benchmarks → tasks/benchmarks}/keep_in.rb +5 -1
- data/tasks/benchmarks/shared.rb +28 -0
- data/tasks/sync_casefold_data.rake +20 -0
- data/tasks/sync_predefined_sets.rake +9 -0
- data/tasks/sync_ruby_spec.rake +65 -0
- metadata +29 -146
- data/.travis.yml +0 -9
- data/benchmarks/shared.rb +0 -26
- /data/{benchmarks → tasks/benchmarks}/count_in.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/cover.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/scan.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/used_by.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_add.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_delete.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_merge.rb +0 -0
- /data/{benchmarks → tasks/benchmarks}/z_minmax.rb +0 -0
data/lib/character_set/parser.rb
CHANGED
@@ -4,11 +4,15 @@ class CharacterSet
|
|
4
4
|
|
5
5
|
def codepoints_from_enumerable(object)
|
6
6
|
raise ArgumentError, 'pass an Enumerable' unless object.respond_to?(:each)
|
7
|
+
|
7
8
|
# Use #each to check first element (only this works for all Enumerables)
|
8
|
-
object.each do |
|
9
|
-
|
10
|
-
|
11
|
-
|
9
|
+
object.each do |el| # rubocop:disable Lint/UnreachableLoop
|
10
|
+
if el.is_a?(Integer) && el >= 0 && el < 0x110000
|
11
|
+
return object
|
12
|
+
elsif el.is_a?(String) && el.length == 1
|
13
|
+
return object.to_a.join.encode('utf-8').codepoints
|
14
|
+
end
|
15
|
+
raise ArgumentError, "#{el.inspect} is not valid as a codepoint"
|
12
16
|
end
|
13
17
|
end
|
14
18
|
|
@@ -10,8 +10,7 @@
|
|
10
10
|
591,5C7
|
11
11
|
5D0,5EA
|
12
12
|
5EF,5F4
|
13
|
-
600,
|
14
|
-
61E,70D
|
13
|
+
600,70D
|
15
14
|
70F,74A
|
16
15
|
74D,7B1
|
17
16
|
7C0,7FA
|
@@ -20,9 +19,9 @@
|
|
20
19
|
840,85B
|
21
20
|
85E,85E
|
22
21
|
860,86A
|
23
|
-
|
24
|
-
|
25
|
-
|
22
|
+
870,88E
|
23
|
+
890,891
|
24
|
+
898,983
|
26
25
|
985,98C
|
27
26
|
98F,990
|
28
27
|
993,9A8
|
@@ -76,7 +75,7 @@ B35,B39
|
|
76
75
|
B3C,B44
|
77
76
|
B47,B48
|
78
77
|
B4B,B4D
|
79
|
-
|
78
|
+
B55,B57
|
80
79
|
B5C,B5D
|
81
80
|
B5F,B63
|
82
81
|
B66,B77
|
@@ -100,11 +99,12 @@ C00,C0C
|
|
100
99
|
C0E,C10
|
101
100
|
C12,C28
|
102
101
|
C2A,C39
|
103
|
-
|
102
|
+
C3C,C44
|
104
103
|
C46,C48
|
105
104
|
C4A,C4D
|
106
105
|
C55,C56
|
107
106
|
C58,C5A
|
107
|
+
C5D,C5D
|
108
108
|
C60,C63
|
109
109
|
C66,C6F
|
110
110
|
C77,C8C
|
@@ -116,19 +116,18 @@ CBC,CC4
|
|
116
116
|
CC6,CC8
|
117
117
|
CCA,CCD
|
118
118
|
CD5,CD6
|
119
|
-
|
119
|
+
CDD,CDE
|
120
120
|
CE0,CE3
|
121
121
|
CE6,CEF
|
122
122
|
CF1,CF2
|
123
|
-
D00,
|
124
|
-
D05,D0C
|
123
|
+
D00,D0C
|
125
124
|
D0E,D10
|
126
125
|
D12,D44
|
127
126
|
D46,D48
|
128
127
|
D4A,D4F
|
129
128
|
D54,D63
|
130
129
|
D66,D7F
|
131
|
-
|
130
|
+
D81,D83
|
132
131
|
D85,D96
|
133
132
|
D9A,DB1
|
134
133
|
DB3,DBB
|
@@ -184,9 +183,8 @@ FCE,FDA
|
|
184
183
|
13F8,13FD
|
185
184
|
1400,169C
|
186
185
|
16A0,16F8
|
187
|
-
1700,
|
188
|
-
|
189
|
-
1720,1736
|
186
|
+
1700,1715
|
187
|
+
171F,1736
|
190
188
|
1740,1753
|
191
189
|
1760,176C
|
192
190
|
176E,1770
|
@@ -194,8 +192,7 @@ FCE,FDA
|
|
194
192
|
1780,17DD
|
195
193
|
17E0,17E9
|
196
194
|
17F0,17F9
|
197
|
-
1800,
|
198
|
-
1810,1819
|
195
|
+
1800,1819
|
199
196
|
1820,1878
|
200
197
|
1880,18AA
|
201
198
|
18B0,18F5
|
@@ -214,9 +211,9 @@ FCE,FDA
|
|
214
211
|
1A7F,1A89
|
215
212
|
1A90,1A99
|
216
213
|
1AA0,1AAD
|
217
|
-
1AB0,
|
218
|
-
1B00,
|
219
|
-
1B50,
|
214
|
+
1AB0,1ACE
|
215
|
+
1B00,1B4C
|
216
|
+
1B50,1B7E
|
220
217
|
1B80,1BF3
|
221
218
|
1BFC,1C37
|
222
219
|
1C3B,1C49
|
@@ -224,8 +221,7 @@ FCE,FDA
|
|
224
221
|
1C90,1CBA
|
225
222
|
1CBD,1CC7
|
226
223
|
1CD0,1CFA
|
227
|
-
1D00,
|
228
|
-
1DFB,1F15
|
224
|
+
1D00,1F15
|
229
225
|
1F18,1F1D
|
230
226
|
1F20,1F45
|
231
227
|
1F48,1F4D
|
@@ -245,16 +241,14 @@ FCE,FDA
|
|
245
241
|
2066,2071
|
246
242
|
2074,208E
|
247
243
|
2090,209C
|
248
|
-
20A0,
|
244
|
+
20A0,20C0
|
249
245
|
20D0,20F0
|
250
246
|
2100,218B
|
251
247
|
2190,2426
|
252
248
|
2440,244A
|
253
249
|
2460,2B73
|
254
250
|
2B76,2B95
|
255
|
-
|
256
|
-
2C30,2C5E
|
257
|
-
2C60,2CF3
|
251
|
+
2B97,2CF3
|
258
252
|
2CF9,2D25
|
259
253
|
2D27,2D27
|
260
254
|
2D2D,2D2D
|
@@ -269,7 +263,7 @@ FCE,FDA
|
|
269
263
|
2DC8,2DCE
|
270
264
|
2DD0,2DD6
|
271
265
|
2DD8,2DDE
|
272
|
-
2DE0,
|
266
|
+
2DE0,2E5D
|
273
267
|
2E80,2E99
|
274
268
|
2E9B,2EF3
|
275
269
|
2F00,2FD5
|
@@ -279,18 +273,17 @@ FCE,FDA
|
|
279
273
|
3099,30FF
|
280
274
|
3105,312F
|
281
275
|
3131,318E
|
282
|
-
3190,
|
283
|
-
31C0,31E3
|
276
|
+
3190,31E3
|
284
277
|
31F0,321E
|
285
|
-
3220,
|
286
|
-
4DC0,9FEF
|
287
|
-
A000,A48C
|
278
|
+
3220,A48C
|
288
279
|
A490,A4C6
|
289
280
|
A4D0,A62B
|
290
281
|
A640,A6F7
|
291
|
-
A700,
|
292
|
-
|
293
|
-
|
282
|
+
A700,A7CA
|
283
|
+
A7D0,A7D1
|
284
|
+
A7D3,A7D3
|
285
|
+
A7D5,A7D9
|
286
|
+
A7F2,A82C
|
294
287
|
A830,A839
|
295
288
|
A840,A877
|
296
289
|
A880,A8C5
|
@@ -310,7 +303,7 @@ AB09,AB0E
|
|
310
303
|
AB11,AB16
|
311
304
|
AB20,AB26
|
312
305
|
AB28,AB2E
|
313
|
-
AB30,
|
306
|
+
AB30,AB6B
|
314
307
|
AB70,ABED
|
315
308
|
ABF0,ABF9
|
316
309
|
AC00,D7A3
|
@@ -325,12 +318,11 @@ FB38,FB3C
|
|
325
318
|
FB3E,FB3E
|
326
319
|
FB40,FB41
|
327
320
|
FB43,FB44
|
328
|
-
FB46,
|
329
|
-
FBD3,
|
330
|
-
FD50,FD8F
|
321
|
+
FB46,FBC2
|
322
|
+
FBD3,FD8F
|
331
323
|
FD92,FDC7
|
332
|
-
|
333
|
-
|
324
|
+
FDCF,FDCF
|
325
|
+
FDF0,FE19
|
334
326
|
FE20,FE52
|
335
327
|
FE54,FE66
|
336
328
|
FE68,FE6B
|
@@ -355,7 +347,7 @@ FFF9,FFFD
|
|
355
347
|
10100,10102
|
356
348
|
10107,10133
|
357
349
|
10137,1018E
|
358
|
-
10190,
|
350
|
+
10190,1019C
|
359
351
|
101A0,101A0
|
360
352
|
101D0,101FD
|
361
353
|
10280,1029C
|
@@ -373,10 +365,20 @@ FFF9,FFFD
|
|
373
365
|
104D8,104FB
|
374
366
|
10500,10527
|
375
367
|
10530,10563
|
376
|
-
1056F,
|
368
|
+
1056F,1057A
|
369
|
+
1057C,1058A
|
370
|
+
1058C,10592
|
371
|
+
10594,10595
|
372
|
+
10597,105A1
|
373
|
+
105A3,105B1
|
374
|
+
105B3,105B9
|
375
|
+
105BB,105BC
|
377
376
|
10600,10736
|
378
377
|
10740,10755
|
379
378
|
10760,10767
|
379
|
+
10780,10785
|
380
|
+
10787,107B0
|
381
|
+
107B2,107BA
|
380
382
|
10800,10805
|
381
383
|
10808,10808
|
382
384
|
1080A,10835
|
@@ -415,20 +417,24 @@ FFF9,FFFD
|
|
415
417
|
10CFA,10D27
|
416
418
|
10D30,10D39
|
417
419
|
10E60,10E7E
|
420
|
+
10E80,10EA9
|
421
|
+
10EAB,10EAD
|
422
|
+
10EB0,10EB1
|
418
423
|
10F00,10F27
|
419
424
|
10F30,10F59
|
425
|
+
10F70,10F89
|
426
|
+
10FB0,10FCB
|
420
427
|
10FE0,10FF6
|
421
428
|
11000,1104D
|
422
|
-
11052,
|
423
|
-
1107F,
|
429
|
+
11052,11075
|
430
|
+
1107F,110C2
|
424
431
|
110CD,110CD
|
425
432
|
110D0,110E8
|
426
433
|
110F0,110F9
|
427
434
|
11100,11134
|
428
|
-
11136,
|
435
|
+
11136,11147
|
429
436
|
11150,11176
|
430
|
-
11180,
|
431
|
-
111D0,111DF
|
437
|
+
11180,111DF
|
432
438
|
111E1,111F4
|
433
439
|
11200,11211
|
434
440
|
11213,1123E
|
@@ -454,9 +460,8 @@ FFF9,FFFD
|
|
454
460
|
1135D,11363
|
455
461
|
11366,1136C
|
456
462
|
11370,11374
|
457
|
-
11400,
|
458
|
-
|
459
|
-
1145D,1145F
|
463
|
+
11400,1145B
|
464
|
+
1145D,11461
|
460
465
|
11480,114C7
|
461
466
|
114D0,114D9
|
462
467
|
11580,115B5
|
@@ -464,20 +469,27 @@ FFF9,FFFD
|
|
464
469
|
11600,11644
|
465
470
|
11650,11659
|
466
471
|
11660,1166C
|
467
|
-
11680,
|
472
|
+
11680,116B9
|
468
473
|
116C0,116C9
|
469
474
|
11700,1171A
|
470
475
|
1171D,1172B
|
471
|
-
11730,
|
476
|
+
11730,11746
|
472
477
|
11800,1183B
|
473
478
|
118A0,118F2
|
474
|
-
118FF,
|
479
|
+
118FF,11906
|
480
|
+
11909,11909
|
481
|
+
1190C,11913
|
482
|
+
11915,11916
|
483
|
+
11918,11935
|
484
|
+
11937,11938
|
485
|
+
1193B,11946
|
486
|
+
11950,11959
|
475
487
|
119A0,119A7
|
476
488
|
119AA,119D7
|
477
489
|
119DA,119E4
|
478
490
|
11A00,11A47
|
479
491
|
11A50,11AA2
|
480
|
-
|
492
|
+
11AB0,11AF8
|
481
493
|
11C00,11C08
|
482
494
|
11C0A,11C36
|
483
495
|
11C38,11C45
|
@@ -499,18 +511,21 @@ FFF9,FFFD
|
|
499
511
|
11D93,11D98
|
500
512
|
11DA0,11DA9
|
501
513
|
11EE0,11EF8
|
514
|
+
11FB0,11FB0
|
502
515
|
11FC0,11FF1
|
503
516
|
11FFF,12399
|
504
517
|
12400,1246E
|
505
518
|
12470,12474
|
506
519
|
12480,12543
|
520
|
+
12F90,12FF2
|
507
521
|
13000,1342E
|
508
522
|
13430,13438
|
509
523
|
14400,14646
|
510
524
|
16800,16A38
|
511
525
|
16A40,16A5E
|
512
526
|
16A60,16A69
|
513
|
-
16A6E,
|
527
|
+
16A6E,16ABE
|
528
|
+
16AC0,16AC9
|
514
529
|
16AD0,16AED
|
515
530
|
16AF0,16AF5
|
516
531
|
16B00,16B45
|
@@ -522,10 +537,15 @@ FFF9,FFFD
|
|
522
537
|
16F00,16F4A
|
523
538
|
16F4F,16F87
|
524
539
|
16F8F,16F9F
|
525
|
-
16FE0,
|
540
|
+
16FE0,16FE4
|
541
|
+
16FF0,16FF1
|
526
542
|
17000,187F7
|
527
|
-
18800,
|
528
|
-
|
543
|
+
18800,18CD5
|
544
|
+
18D00,18D08
|
545
|
+
1AFF0,1AFF3
|
546
|
+
1AFF5,1AFFB
|
547
|
+
1AFFD,1AFFE
|
548
|
+
1B000,1B122
|
529
549
|
1B150,1B152
|
530
550
|
1B164,1B167
|
531
551
|
1B170,1B2FB
|
@@ -534,9 +554,12 @@ FFF9,FFFD
|
|
534
554
|
1BC80,1BC88
|
535
555
|
1BC90,1BC99
|
536
556
|
1BC9C,1BCA3
|
557
|
+
1CF00,1CF2D
|
558
|
+
1CF30,1CF46
|
559
|
+
1CF50,1CFC3
|
537
560
|
1D000,1D0F5
|
538
561
|
1D100,1D126
|
539
|
-
1D129,
|
562
|
+
1D129,1D1EA
|
540
563
|
1D200,1D245
|
541
564
|
1D2E0,1D2F3
|
542
565
|
1D300,1D356
|
@@ -564,6 +587,7 @@ FFF9,FFFD
|
|
564
587
|
1D7CE,1DA8B
|
565
588
|
1DA9B,1DA9F
|
566
589
|
1DAA1,1DAAF
|
590
|
+
1DF00,1DF1E
|
567
591
|
1E000,1E006
|
568
592
|
1E008,1E018
|
569
593
|
1E01B,1E021
|
@@ -573,8 +597,13 @@ FFF9,FFFD
|
|
573
597
|
1E130,1E13D
|
574
598
|
1E140,1E149
|
575
599
|
1E14E,1E14F
|
600
|
+
1E290,1E2AE
|
576
601
|
1E2C0,1E2F9
|
577
602
|
1E2FF,1E2FF
|
603
|
+
1E7E0,1E7E6
|
604
|
+
1E7E8,1E7EB
|
605
|
+
1E7ED,1E7EE
|
606
|
+
1E7F0,1E7FE
|
578
607
|
1E800,1E8C4
|
579
608
|
1E8C7,1E8D6
|
580
609
|
1E900,1E94B
|
@@ -622,43 +651,46 @@ FFF9,FFFD
|
|
622
651
|
1F0B1,1F0BF
|
623
652
|
1F0C1,1F0CF
|
624
653
|
1F0D1,1F0F5
|
625
|
-
1F100,
|
626
|
-
1F110,1F16C
|
627
|
-
1F170,1F1AC
|
654
|
+
1F100,1F1AD
|
628
655
|
1F1E6,1F202
|
629
656
|
1F210,1F23B
|
630
657
|
1F240,1F248
|
631
658
|
1F250,1F251
|
632
659
|
1F260,1F265
|
633
|
-
1F300,
|
634
|
-
|
635
|
-
1F6F0,
|
660
|
+
1F300,1F6D7
|
661
|
+
1F6DD,1F6EC
|
662
|
+
1F6F0,1F6FC
|
636
663
|
1F700,1F773
|
637
664
|
1F780,1F7D8
|
638
665
|
1F7E0,1F7EB
|
666
|
+
1F7F0,1F7F0
|
639
667
|
1F800,1F80B
|
640
668
|
1F810,1F847
|
641
669
|
1F850,1F859
|
642
670
|
1F860,1F887
|
643
671
|
1F890,1F8AD
|
644
|
-
|
645
|
-
|
646
|
-
1F973,1F976
|
647
|
-
1F97A,1F9A2
|
648
|
-
1F9A5,1F9AA
|
649
|
-
1F9AE,1F9CA
|
650
|
-
1F9CD,1FA53
|
672
|
+
1F8B0,1F8B1
|
673
|
+
1F900,1FA53
|
651
674
|
1FA60,1FA6D
|
652
|
-
1FA70,
|
653
|
-
1FA78,
|
654
|
-
1FA80,
|
655
|
-
1FA90,
|
656
|
-
|
657
|
-
|
675
|
+
1FA70,1FA74
|
676
|
+
1FA78,1FA7C
|
677
|
+
1FA80,1FA86
|
678
|
+
1FA90,1FAAC
|
679
|
+
1FAB0,1FABA
|
680
|
+
1FAC0,1FAC5
|
681
|
+
1FAD0,1FAD9
|
682
|
+
1FAE0,1FAE7
|
683
|
+
1FAF0,1FAF6
|
684
|
+
1FB00,1FB92
|
685
|
+
1FB94,1FBCA
|
686
|
+
1FBF0,1FBF9
|
687
|
+
20000,2A6DF
|
688
|
+
2A700,2B738
|
658
689
|
2B740,2B81D
|
659
690
|
2B820,2CEA1
|
660
691
|
2CEB0,2EBE0
|
661
692
|
2F800,2FA1D
|
693
|
+
30000,3134A
|
662
694
|
E0001,E0001
|
663
695
|
E0020,E007F
|
664
696
|
E0100,E01EF
|
@@ -44,6 +44,7 @@ AE,AE
|
|
44
44
|
2699,2699
|
45
45
|
269B,269C
|
46
46
|
26A0,26A1
|
47
|
+
26A7,26A7
|
47
48
|
26AA,26AB
|
48
49
|
26B0,26B1
|
49
50
|
26BD,26BE
|
@@ -130,22 +131,23 @@ AE,AE
|
|
130
131
|
1F5FA,1F64F
|
131
132
|
1F680,1F6C5
|
132
133
|
1F6CB,1F6D2
|
133
|
-
1F6D5,
|
134
|
-
|
134
|
+
1F6D5,1F6D7
|
135
|
+
1F6DD,1F6E5
|
135
136
|
1F6E9,1F6E9
|
136
137
|
1F6EB,1F6EC
|
137
138
|
1F6F0,1F6F0
|
138
|
-
1F6F3,
|
139
|
+
1F6F3,1F6FC
|
139
140
|
1F7E0,1F7EB
|
140
|
-
|
141
|
+
1F7F0,1F7F0
|
142
|
+
1F90C,1F93A
|
141
143
|
1F93C,1F945
|
142
|
-
1F947,
|
143
|
-
|
144
|
-
|
145
|
-
|
146
|
-
|
147
|
-
|
148
|
-
|
149
|
-
|
150
|
-
|
151
|
-
|
144
|
+
1F947,1F9FF
|
145
|
+
1FA70,1FA74
|
146
|
+
1FA78,1FA7C
|
147
|
+
1FA80,1FA86
|
148
|
+
1FA90,1FAAC
|
149
|
+
1FAB0,1FABA
|
150
|
+
1FAC0,1FAC5
|
151
|
+
1FAD0,1FAD9
|
152
|
+
1FAE0,1FAE7
|
153
|
+
1FAF0,1FAF6
|
@@ -22,6 +22,17 @@ class CharacterSet
|
|
22
22
|
alias valid unicode
|
23
23
|
|
24
24
|
def build_from_cps_file(path)
|
25
|
+
if defined?(Ractor) && Ractor.current != Ractor.main
|
26
|
+
raise <<-EOS.gsub(/^ */, '')
|
27
|
+
CharacterSet's predefined sets are lazy-loaded.
|
28
|
+
Pre-load them to use them in Ractors. E.g.:
|
29
|
+
|
30
|
+
CharacterSet.ascii # pre-load
|
31
|
+
Ractor.new { CharacterSet.ascii.size }.take # => 128
|
32
|
+
Ractor.new { 'abc'.keep_character_set(:ascii) }.take # => 'abc'
|
33
|
+
EOS
|
34
|
+
end
|
35
|
+
|
25
36
|
File.readlines(path).inject(new) do |set, line|
|
26
37
|
range_start, range_end = line.split(',')
|
27
38
|
set.merge((range_start.to_i(16))..(range_end.to_i(16)))
|
@@ -6,9 +6,9 @@ class CharacterSet
|
|
6
6
|
new(Array(ranges).flat_map(&:to_a))
|
7
7
|
end
|
8
8
|
|
9
|
-
def
|
10
|
-
raise ArgumentError, 'pass a String' unless
|
11
|
-
|
9
|
+
def of_string(str)
|
10
|
+
raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
|
11
|
+
str.encode('utf-8').each_codepoint.with_object(new) { |cp, set| set << cp }
|
12
12
|
end
|
13
13
|
end
|
14
14
|
|
@@ -31,7 +31,7 @@ class CharacterSet
|
|
31
31
|
end
|
32
32
|
|
33
33
|
def ranges
|
34
|
-
CharacterSet.require_optional_dependency('range_compressor')
|
34
|
+
CharacterSet.require_optional_dependency('range_compressor', __method__)
|
35
35
|
RangeCompressor.compress(self)
|
36
36
|
end
|
37
37
|
|
@@ -40,16 +40,18 @@ class CharacterSet
|
|
40
40
|
end
|
41
41
|
|
42
42
|
def count_in(string)
|
43
|
-
|
43
|
+
utf8_str!(string).each_codepoint.count { |cp| include?(cp) }
|
44
44
|
end
|
45
45
|
|
46
46
|
def cover?(string)
|
47
|
-
|
47
|
+
utf8_str!(string).each_codepoint { |cp| return false unless include?(cp) }
|
48
48
|
true
|
49
49
|
end
|
50
50
|
|
51
51
|
def delete_in(string)
|
52
|
-
|
52
|
+
utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
|
53
|
+
include?(cp) || (new_str << cp)
|
54
|
+
end.encode(string.encoding)
|
53
55
|
end
|
54
56
|
|
55
57
|
def delete_in!(string)
|
@@ -58,7 +60,9 @@ class CharacterSet
|
|
58
60
|
end
|
59
61
|
|
60
62
|
def keep_in(string)
|
61
|
-
|
63
|
+
utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
|
64
|
+
include?(cp) && (new_str << cp)
|
65
|
+
end.encode(string.encoding)
|
62
66
|
end
|
63
67
|
|
64
68
|
def keep_in!(string)
|
@@ -67,14 +71,13 @@ class CharacterSet
|
|
67
71
|
end
|
68
72
|
|
69
73
|
def scan(string)
|
70
|
-
|
71
|
-
|
72
|
-
include?(cp) ? arr.push(cp.chr(encoding)) : arr
|
74
|
+
utf8_str!(string).each_codepoint.with_object([]) do |cp, arr|
|
75
|
+
arr.push(cp.chr('utf-8')) if include?(cp)
|
73
76
|
end
|
74
77
|
end
|
75
78
|
|
76
79
|
def used_by?(string)
|
77
|
-
|
80
|
+
utf8_str!(string).each_codepoint { |cp| return true if include?(cp) }
|
78
81
|
false
|
79
82
|
end
|
80
83
|
|
@@ -115,16 +118,9 @@ class CharacterSet
|
|
115
118
|
num >= 0 && num <= 16 or raise ArgumentError, 'plane must be between 0 and 16'
|
116
119
|
end
|
117
120
|
|
118
|
-
def
|
121
|
+
def utf8_str!(obj)
|
119
122
|
raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
|
120
|
-
obj
|
121
|
-
end
|
122
|
-
|
123
|
-
def make_new_str(original, &block)
|
124
|
-
new_string = str!(original)
|
125
|
-
.each_codepoint
|
126
|
-
.each_with_object(''.encode(original.encoding), &block)
|
127
|
-
original.tainted? ? new_string.taint : new_string
|
123
|
+
obj.encode('utf-8')
|
128
124
|
end
|
129
125
|
end
|
130
126
|
end
|
@@ -1,7 +1,9 @@
|
|
1
1
|
class CharacterSet
|
2
2
|
module RubyFallback
|
3
3
|
module SetMethods
|
4
|
-
Enumerable.instance_methods
|
4
|
+
(Enumerable.instance_methods -
|
5
|
+
%i[include? member? to_a] +
|
6
|
+
%i[empty? hash length size]).each do |mthd|
|
5
7
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
6
8
|
def #{mthd}(*args, &block)
|
7
9
|
@__set.#{mthd}(*args, &block)
|
@@ -9,8 +11,8 @@ class CharacterSet
|
|
9
11
|
RUBY
|
10
12
|
end
|
11
13
|
|
12
|
-
%
|
13
|
-
subset? superset?].each do |mthd|
|
14
|
+
%i[< <= <=> > >= === disjoint? include? intersect? member?
|
15
|
+
proper_subset? proper_superset? subset? superset?].each do |mthd|
|
14
16
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
15
17
|
def #{mthd}(enum, &block)
|
16
18
|
if enum.is_a?(CharacterSet) || enum.is_a?(CharacterSet::Pure)
|
@@ -21,9 +23,8 @@ class CharacterSet
|
|
21
23
|
RUBY
|
22
24
|
end
|
23
25
|
|
24
|
-
%
|
25
|
-
|
26
|
-
select! subtract].each do |mthd|
|
26
|
+
%i[<< add add? clear delete delete? delete_if each filter! keep_if
|
27
|
+
reject! select! subtract].each do |mthd|
|
27
28
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
28
29
|
def #{mthd}(*args, &block)
|
29
30
|
result = @__set.#{mthd}(*args, &block)
|
@@ -32,7 +33,7 @@ class CharacterSet
|
|
32
33
|
RUBY
|
33
34
|
end
|
34
35
|
|
35
|
-
%
|
36
|
+
%i[& + - ^ | difference intersection union].each do |mthd|
|
36
37
|
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
37
38
|
def #{mthd}(enum, &block)
|
38
39
|
if enum.respond_to?(:map)
|
@@ -43,15 +44,6 @@ class CharacterSet
|
|
43
44
|
RUBY
|
44
45
|
end
|
45
46
|
|
46
|
-
%w[taint untaint].each do |mthd|
|
47
|
-
class_eval <<-RUBY, __FILE__, __LINE__ + 1
|
48
|
-
def #{mthd}
|
49
|
-
@__set.#{mthd}
|
50
|
-
super
|
51
|
-
end
|
52
|
-
RUBY
|
53
|
-
end
|
54
|
-
|
55
47
|
unless RUBY_PLATFORM[/java/i]
|
56
48
|
def freeze
|
57
49
|
@__set.to_a
|
@@ -81,6 +73,7 @@ class CharacterSet
|
|
81
73
|
|
82
74
|
def eql?(other)
|
83
75
|
return false unless other.is_a?(self.class)
|
76
|
+
|
84
77
|
@__set.eql?(other.instance_variable_get(:@__set))
|
85
78
|
end
|
86
79
|
|