character_set 1.4.0 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (49) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +1 -1
  3. data/.github/workflows/gouteur.yml +20 -0
  4. data/.github/workflows/lint.yml +29 -0
  5. data/.github/workflows/tests.yml +28 -0
  6. data/.gitignore +1 -0
  7. data/.gouteur.yml +2 -0
  8. data/.rubocop.yml +20 -0
  9. data/BENCHMARK.md +35 -31
  10. data/CHANGELOG.md +64 -1
  11. data/Gemfile +15 -0
  12. data/LICENSE.txt +1 -1
  13. data/README.md +25 -9
  14. data/Rakefile +2 -120
  15. data/character_set.gemspec +0 -10
  16. data/ext/character_set/character_set.c +123 -121
  17. data/ext/character_set/unicode_casefold_table.h +44 -1
  18. data/lib/character_set/core_ext/regexp_ext.rb +9 -1
  19. data/lib/character_set/core_ext/string_ext.rb +2 -2
  20. data/lib/character_set/expression_converter.rb +40 -56
  21. data/lib/character_set/parser.rb +8 -4
  22. data/lib/character_set/predefined_sets/assigned.cps +110 -78
  23. data/lib/character_set/predefined_sets/emoji.cps +16 -14
  24. data/lib/character_set/predefined_sets.rb +11 -0
  25. data/lib/character_set/ruby_fallback/character_set_methods.rb +17 -21
  26. data/lib/character_set/ruby_fallback/set_methods.rb +9 -16
  27. data/lib/character_set/ruby_fallback/vendored_set_classes.rb +385 -0
  28. data/lib/character_set/ruby_fallback.rb +18 -2
  29. data/lib/character_set/set_method_adapters.rb +4 -3
  30. data/lib/character_set/shared_methods.rb +25 -11
  31. data/lib/character_set/version.rb +1 -1
  32. data/tasks/benchmark.rake +20 -0
  33. data/{benchmarks → tasks/benchmarks}/delete_in.rb +5 -1
  34. data/{benchmarks → tasks/benchmarks}/keep_in.rb +5 -1
  35. data/tasks/benchmarks/shared.rb +28 -0
  36. data/tasks/sync_casefold_data.rake +20 -0
  37. data/tasks/sync_predefined_sets.rake +9 -0
  38. data/tasks/sync_ruby_spec.rake +65 -0
  39. metadata +29 -146
  40. data/.travis.yml +0 -9
  41. data/benchmarks/shared.rb +0 -26
  42. /data/{benchmarks → tasks/benchmarks}/count_in.rb +0 -0
  43. /data/{benchmarks → tasks/benchmarks}/cover.rb +0 -0
  44. /data/{benchmarks → tasks/benchmarks}/scan.rb +0 -0
  45. /data/{benchmarks → tasks/benchmarks}/used_by.rb +0 -0
  46. /data/{benchmarks → tasks/benchmarks}/z_add.rb +0 -0
  47. /data/{benchmarks → tasks/benchmarks}/z_delete.rb +0 -0
  48. /data/{benchmarks → tasks/benchmarks}/z_merge.rb +0 -0
  49. /data/{benchmarks → tasks/benchmarks}/z_minmax.rb +0 -0
@@ -4,11 +4,15 @@ class CharacterSet
4
4
 
5
5
  def codepoints_from_enumerable(object)
6
6
  raise ArgumentError, 'pass an Enumerable' unless object.respond_to?(:each)
7
+
7
8
  # Use #each to check first element (only this works for all Enumerables)
8
- object.each do |e|
9
- return object if e.is_a?(Integer) && e >= 0 && e < 0x110000
10
- return object.map(&:ord) if e.is_a?(String) && e.length == 1
11
- raise ArgumentError, "#{e.inspect} is not valid as a codepoint"
9
+ object.each do |el| # rubocop:disable Lint/UnreachableLoop
10
+ if el.is_a?(Integer) && el >= 0 && el < 0x110000
11
+ return object
12
+ elsif el.is_a?(String) && el.length == 1
13
+ return object.to_a.join.encode('utf-8').codepoints
14
+ end
15
+ raise ArgumentError, "#{el.inspect} is not valid as a codepoint"
12
16
  end
13
17
  end
14
18
 
@@ -10,8 +10,7 @@
10
10
  591,5C7
11
11
  5D0,5EA
12
12
  5EF,5F4
13
- 600,61C
14
- 61E,70D
13
+ 600,70D
15
14
  70F,74A
16
15
  74D,7B1
17
16
  7C0,7FA
@@ -20,9 +19,9 @@
20
19
  840,85B
21
20
  85E,85E
22
21
  860,86A
23
- 8A0,8B4
24
- 8B6,8BD
25
- 8D3,983
22
+ 870,88E
23
+ 890,891
24
+ 898,983
26
25
  985,98C
27
26
  98F,990
28
27
  993,9A8
@@ -76,7 +75,7 @@ B35,B39
76
75
  B3C,B44
77
76
  B47,B48
78
77
  B4B,B4D
79
- B56,B57
78
+ B55,B57
80
79
  B5C,B5D
81
80
  B5F,B63
82
81
  B66,B77
@@ -100,11 +99,12 @@ C00,C0C
100
99
  C0E,C10
101
100
  C12,C28
102
101
  C2A,C39
103
- C3D,C44
102
+ C3C,C44
104
103
  C46,C48
105
104
  C4A,C4D
106
105
  C55,C56
107
106
  C58,C5A
107
+ C5D,C5D
108
108
  C60,C63
109
109
  C66,C6F
110
110
  C77,C8C
@@ -116,19 +116,18 @@ CBC,CC4
116
116
  CC6,CC8
117
117
  CCA,CCD
118
118
  CD5,CD6
119
- CDE,CDE
119
+ CDD,CDE
120
120
  CE0,CE3
121
121
  CE6,CEF
122
122
  CF1,CF2
123
- D00,D03
124
- D05,D0C
123
+ D00,D0C
125
124
  D0E,D10
126
125
  D12,D44
127
126
  D46,D48
128
127
  D4A,D4F
129
128
  D54,D63
130
129
  D66,D7F
131
- D82,D83
130
+ D81,D83
132
131
  D85,D96
133
132
  D9A,DB1
134
133
  DB3,DBB
@@ -184,9 +183,8 @@ FCE,FDA
184
183
  13F8,13FD
185
184
  1400,169C
186
185
  16A0,16F8
187
- 1700,170C
188
- 170E,1714
189
- 1720,1736
186
+ 1700,1715
187
+ 171F,1736
190
188
  1740,1753
191
189
  1760,176C
192
190
  176E,1770
@@ -194,8 +192,7 @@ FCE,FDA
194
192
  1780,17DD
195
193
  17E0,17E9
196
194
  17F0,17F9
197
- 1800,180E
198
- 1810,1819
195
+ 1800,1819
199
196
  1820,1878
200
197
  1880,18AA
201
198
  18B0,18F5
@@ -214,9 +211,9 @@ FCE,FDA
214
211
  1A7F,1A89
215
212
  1A90,1A99
216
213
  1AA0,1AAD
217
- 1AB0,1ABE
218
- 1B00,1B4B
219
- 1B50,1B7C
214
+ 1AB0,1ACE
215
+ 1B00,1B4C
216
+ 1B50,1B7E
220
217
  1B80,1BF3
221
218
  1BFC,1C37
222
219
  1C3B,1C49
@@ -224,8 +221,7 @@ FCE,FDA
224
221
  1C90,1CBA
225
222
  1CBD,1CC7
226
223
  1CD0,1CFA
227
- 1D00,1DF9
228
- 1DFB,1F15
224
+ 1D00,1F15
229
225
  1F18,1F1D
230
226
  1F20,1F45
231
227
  1F48,1F4D
@@ -245,16 +241,14 @@ FCE,FDA
245
241
  2066,2071
246
242
  2074,208E
247
243
  2090,209C
248
- 20A0,20BF
244
+ 20A0,20C0
249
245
  20D0,20F0
250
246
  2100,218B
251
247
  2190,2426
252
248
  2440,244A
253
249
  2460,2B73
254
250
  2B76,2B95
255
- 2B98,2C2E
256
- 2C30,2C5E
257
- 2C60,2CF3
251
+ 2B97,2CF3
258
252
  2CF9,2D25
259
253
  2D27,2D27
260
254
  2D2D,2D2D
@@ -269,7 +263,7 @@ FCE,FDA
269
263
  2DC8,2DCE
270
264
  2DD0,2DD6
271
265
  2DD8,2DDE
272
- 2DE0,2E4F
266
+ 2DE0,2E5D
273
267
  2E80,2E99
274
268
  2E9B,2EF3
275
269
  2F00,2FD5
@@ -279,18 +273,17 @@ FCE,FDA
279
273
  3099,30FF
280
274
  3105,312F
281
275
  3131,318E
282
- 3190,31BA
283
- 31C0,31E3
276
+ 3190,31E3
284
277
  31F0,321E
285
- 3220,4DB5
286
- 4DC0,9FEF
287
- A000,A48C
278
+ 3220,A48C
288
279
  A490,A4C6
289
280
  A4D0,A62B
290
281
  A640,A6F7
291
- A700,A7BF
292
- A7C2,A7C6
293
- A7F7,A82B
282
+ A700,A7CA
283
+ A7D0,A7D1
284
+ A7D3,A7D3
285
+ A7D5,A7D9
286
+ A7F2,A82C
294
287
  A830,A839
295
288
  A840,A877
296
289
  A880,A8C5
@@ -310,7 +303,7 @@ AB09,AB0E
310
303
  AB11,AB16
311
304
  AB20,AB26
312
305
  AB28,AB2E
313
- AB30,AB67
306
+ AB30,AB6B
314
307
  AB70,ABED
315
308
  ABF0,ABF9
316
309
  AC00,D7A3
@@ -325,12 +318,11 @@ FB38,FB3C
325
318
  FB3E,FB3E
326
319
  FB40,FB41
327
320
  FB43,FB44
328
- FB46,FBC1
329
- FBD3,FD3F
330
- FD50,FD8F
321
+ FB46,FBC2
322
+ FBD3,FD8F
331
323
  FD92,FDC7
332
- FDF0,FDFD
333
- FE00,FE19
324
+ FDCF,FDCF
325
+ FDF0,FE19
334
326
  FE20,FE52
335
327
  FE54,FE66
336
328
  FE68,FE6B
@@ -355,7 +347,7 @@ FFF9,FFFD
355
347
  10100,10102
356
348
  10107,10133
357
349
  10137,1018E
358
- 10190,1019B
350
+ 10190,1019C
359
351
  101A0,101A0
360
352
  101D0,101FD
361
353
  10280,1029C
@@ -373,10 +365,20 @@ FFF9,FFFD
373
365
  104D8,104FB
374
366
  10500,10527
375
367
  10530,10563
376
- 1056F,1056F
368
+ 1056F,1057A
369
+ 1057C,1058A
370
+ 1058C,10592
371
+ 10594,10595
372
+ 10597,105A1
373
+ 105A3,105B1
374
+ 105B3,105B9
375
+ 105BB,105BC
377
376
  10600,10736
378
377
  10740,10755
379
378
  10760,10767
379
+ 10780,10785
380
+ 10787,107B0
381
+ 107B2,107BA
380
382
  10800,10805
381
383
  10808,10808
382
384
  1080A,10835
@@ -415,20 +417,24 @@ FFF9,FFFD
415
417
  10CFA,10D27
416
418
  10D30,10D39
417
419
  10E60,10E7E
420
+ 10E80,10EA9
421
+ 10EAB,10EAD
422
+ 10EB0,10EB1
418
423
  10F00,10F27
419
424
  10F30,10F59
425
+ 10F70,10F89
426
+ 10FB0,10FCB
420
427
  10FE0,10FF6
421
428
  11000,1104D
422
- 11052,1106F
423
- 1107F,110C1
429
+ 11052,11075
430
+ 1107F,110C2
424
431
  110CD,110CD
425
432
  110D0,110E8
426
433
  110F0,110F9
427
434
  11100,11134
428
- 11136,11146
435
+ 11136,11147
429
436
  11150,11176
430
- 11180,111CD
431
- 111D0,111DF
437
+ 11180,111DF
432
438
  111E1,111F4
433
439
  11200,11211
434
440
  11213,1123E
@@ -454,9 +460,8 @@ FFF9,FFFD
454
460
  1135D,11363
455
461
  11366,1136C
456
462
  11370,11374
457
- 11400,11459
458
- 1145B,1145B
459
- 1145D,1145F
463
+ 11400,1145B
464
+ 1145D,11461
460
465
  11480,114C7
461
466
  114D0,114D9
462
467
  11580,115B5
@@ -464,20 +469,27 @@ FFF9,FFFD
464
469
  11600,11644
465
470
  11650,11659
466
471
  11660,1166C
467
- 11680,116B8
472
+ 11680,116B9
468
473
  116C0,116C9
469
474
  11700,1171A
470
475
  1171D,1172B
471
- 11730,1173F
476
+ 11730,11746
472
477
  11800,1183B
473
478
  118A0,118F2
474
- 118FF,118FF
479
+ 118FF,11906
480
+ 11909,11909
481
+ 1190C,11913
482
+ 11915,11916
483
+ 11918,11935
484
+ 11937,11938
485
+ 1193B,11946
486
+ 11950,11959
475
487
  119A0,119A7
476
488
  119AA,119D7
477
489
  119DA,119E4
478
490
  11A00,11A47
479
491
  11A50,11AA2
480
- 11AC0,11AF8
492
+ 11AB0,11AF8
481
493
  11C00,11C08
482
494
  11C0A,11C36
483
495
  11C38,11C45
@@ -499,18 +511,21 @@ FFF9,FFFD
499
511
  11D93,11D98
500
512
  11DA0,11DA9
501
513
  11EE0,11EF8
514
+ 11FB0,11FB0
502
515
  11FC0,11FF1
503
516
  11FFF,12399
504
517
  12400,1246E
505
518
  12470,12474
506
519
  12480,12543
520
+ 12F90,12FF2
507
521
  13000,1342E
508
522
  13430,13438
509
523
  14400,14646
510
524
  16800,16A38
511
525
  16A40,16A5E
512
526
  16A60,16A69
513
- 16A6E,16A6F
527
+ 16A6E,16ABE
528
+ 16AC0,16AC9
514
529
  16AD0,16AED
515
530
  16AF0,16AF5
516
531
  16B00,16B45
@@ -522,10 +537,15 @@ FFF9,FFFD
522
537
  16F00,16F4A
523
538
  16F4F,16F87
524
539
  16F8F,16F9F
525
- 16FE0,16FE3
540
+ 16FE0,16FE4
541
+ 16FF0,16FF1
526
542
  17000,187F7
527
- 18800,18AF2
528
- 1B000,1B11E
543
+ 18800,18CD5
544
+ 18D00,18D08
545
+ 1AFF0,1AFF3
546
+ 1AFF5,1AFFB
547
+ 1AFFD,1AFFE
548
+ 1B000,1B122
529
549
  1B150,1B152
530
550
  1B164,1B167
531
551
  1B170,1B2FB
@@ -534,9 +554,12 @@ FFF9,FFFD
534
554
  1BC80,1BC88
535
555
  1BC90,1BC99
536
556
  1BC9C,1BCA3
557
+ 1CF00,1CF2D
558
+ 1CF30,1CF46
559
+ 1CF50,1CFC3
537
560
  1D000,1D0F5
538
561
  1D100,1D126
539
- 1D129,1D1E8
562
+ 1D129,1D1EA
540
563
  1D200,1D245
541
564
  1D2E0,1D2F3
542
565
  1D300,1D356
@@ -564,6 +587,7 @@ FFF9,FFFD
564
587
  1D7CE,1DA8B
565
588
  1DA9B,1DA9F
566
589
  1DAA1,1DAAF
590
+ 1DF00,1DF1E
567
591
  1E000,1E006
568
592
  1E008,1E018
569
593
  1E01B,1E021
@@ -573,8 +597,13 @@ FFF9,FFFD
573
597
  1E130,1E13D
574
598
  1E140,1E149
575
599
  1E14E,1E14F
600
+ 1E290,1E2AE
576
601
  1E2C0,1E2F9
577
602
  1E2FF,1E2FF
603
+ 1E7E0,1E7E6
604
+ 1E7E8,1E7EB
605
+ 1E7ED,1E7EE
606
+ 1E7F0,1E7FE
578
607
  1E800,1E8C4
579
608
  1E8C7,1E8D6
580
609
  1E900,1E94B
@@ -622,43 +651,46 @@ FFF9,FFFD
622
651
  1F0B1,1F0BF
623
652
  1F0C1,1F0CF
624
653
  1F0D1,1F0F5
625
- 1F100,1F10C
626
- 1F110,1F16C
627
- 1F170,1F1AC
654
+ 1F100,1F1AD
628
655
  1F1E6,1F202
629
656
  1F210,1F23B
630
657
  1F240,1F248
631
658
  1F250,1F251
632
659
  1F260,1F265
633
- 1F300,1F6D5
634
- 1F6E0,1F6EC
635
- 1F6F0,1F6FA
660
+ 1F300,1F6D7
661
+ 1F6DD,1F6EC
662
+ 1F6F0,1F6FC
636
663
  1F700,1F773
637
664
  1F780,1F7D8
638
665
  1F7E0,1F7EB
666
+ 1F7F0,1F7F0
639
667
  1F800,1F80B
640
668
  1F810,1F847
641
669
  1F850,1F859
642
670
  1F860,1F887
643
671
  1F890,1F8AD
644
- 1F900,1F90B
645
- 1F90D,1F971
646
- 1F973,1F976
647
- 1F97A,1F9A2
648
- 1F9A5,1F9AA
649
- 1F9AE,1F9CA
650
- 1F9CD,1FA53
672
+ 1F8B0,1F8B1
673
+ 1F900,1FA53
651
674
  1FA60,1FA6D
652
- 1FA70,1FA73
653
- 1FA78,1FA7A
654
- 1FA80,1FA82
655
- 1FA90,1FA95
656
- 20000,2A6D6
657
- 2A700,2B734
675
+ 1FA70,1FA74
676
+ 1FA78,1FA7C
677
+ 1FA80,1FA86
678
+ 1FA90,1FAAC
679
+ 1FAB0,1FABA
680
+ 1FAC0,1FAC5
681
+ 1FAD0,1FAD9
682
+ 1FAE0,1FAE7
683
+ 1FAF0,1FAF6
684
+ 1FB00,1FB92
685
+ 1FB94,1FBCA
686
+ 1FBF0,1FBF9
687
+ 20000,2A6DF
688
+ 2A700,2B738
658
689
  2B740,2B81D
659
690
  2B820,2CEA1
660
691
  2CEB0,2EBE0
661
692
  2F800,2FA1D
693
+ 30000,3134A
662
694
  E0001,E0001
663
695
  E0020,E007F
664
696
  E0100,E01EF
@@ -44,6 +44,7 @@ AE,AE
44
44
  2699,2699
45
45
  269B,269C
46
46
  26A0,26A1
47
+ 26A7,26A7
47
48
  26AA,26AB
48
49
  26B0,26B1
49
50
  26BD,26BE
@@ -130,22 +131,23 @@ AE,AE
130
131
  1F5FA,1F64F
131
132
  1F680,1F6C5
132
133
  1F6CB,1F6D2
133
- 1F6D5,1F6D5
134
- 1F6E0,1F6E5
134
+ 1F6D5,1F6D7
135
+ 1F6DD,1F6E5
135
136
  1F6E9,1F6E9
136
137
  1F6EB,1F6EC
137
138
  1F6F0,1F6F0
138
- 1F6F3,1F6FA
139
+ 1F6F3,1F6FC
139
140
  1F7E0,1F7EB
140
- 1F90D,1F93A
141
+ 1F7F0,1F7F0
142
+ 1F90C,1F93A
141
143
  1F93C,1F945
142
- 1F947,1F971
143
- 1F973,1F976
144
- 1F97A,1F9A2
145
- 1F9A5,1F9AA
146
- 1F9AE,1F9CA
147
- 1F9CD,1F9FF
148
- 1FA70,1FA73
149
- 1FA78,1FA7A
150
- 1FA80,1FA82
151
- 1FA90,1FA95
144
+ 1F947,1F9FF
145
+ 1FA70,1FA74
146
+ 1FA78,1FA7C
147
+ 1FA80,1FA86
148
+ 1FA90,1FAAC
149
+ 1FAB0,1FABA
150
+ 1FAC0,1FAC5
151
+ 1FAD0,1FAD9
152
+ 1FAE0,1FAE7
153
+ 1FAF0,1FAF6
@@ -22,6 +22,17 @@ class CharacterSet
22
22
  alias valid unicode
23
23
 
24
24
  def build_from_cps_file(path)
25
+ if defined?(Ractor) && Ractor.current != Ractor.main
26
+ raise <<-EOS.gsub(/^ */, '')
27
+ CharacterSet's predefined sets are lazy-loaded.
28
+ Pre-load them to use them in Ractors. E.g.:
29
+
30
+ CharacterSet.ascii # pre-load
31
+ Ractor.new { CharacterSet.ascii.size }.take # => 128
32
+ Ractor.new { 'abc'.keep_character_set(:ascii) }.take # => 'abc'
33
+ EOS
34
+ end
35
+
25
36
  File.readlines(path).inject(new) do |set, line|
26
37
  range_start, range_end = line.split(',')
27
38
  set.merge((range_start.to_i(16))..(range_end.to_i(16)))
@@ -6,9 +6,9 @@ class CharacterSet
6
6
  new(Array(ranges).flat_map(&:to_a))
7
7
  end
8
8
 
9
- def of(string)
10
- raise ArgumentError, 'pass a String' unless string.is_a?(String)
11
- new(string.codepoints)
9
+ def of_string(str)
10
+ raise ArgumentError, 'pass a String' unless str.respond_to?(:codepoints)
11
+ str.encode('utf-8').each_codepoint.with_object(new) { |cp, set| set << cp }
12
12
  end
13
13
  end
14
14
 
@@ -31,7 +31,7 @@ class CharacterSet
31
31
  end
32
32
 
33
33
  def ranges
34
- CharacterSet.require_optional_dependency('range_compressor')
34
+ CharacterSet.require_optional_dependency('range_compressor', __method__)
35
35
  RangeCompressor.compress(self)
36
36
  end
37
37
 
@@ -40,16 +40,18 @@ class CharacterSet
40
40
  end
41
41
 
42
42
  def count_in(string)
43
- str!(string).each_codepoint.count { |cp| include?(cp) }
43
+ utf8_str!(string).each_codepoint.count { |cp| include?(cp) }
44
44
  end
45
45
 
46
46
  def cover?(string)
47
- str!(string).each_codepoint { |cp| return false unless include?(cp) }
47
+ utf8_str!(string).each_codepoint { |cp| return false unless include?(cp) }
48
48
  true
49
49
  end
50
50
 
51
51
  def delete_in(string)
52
- make_new_str(string) { |cp, new_str| include?(cp) || (new_str << cp) }
52
+ utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
53
+ include?(cp) || (new_str << cp)
54
+ end.encode(string.encoding)
53
55
  end
54
56
 
55
57
  def delete_in!(string)
@@ -58,7 +60,9 @@ class CharacterSet
58
60
  end
59
61
 
60
62
  def keep_in(string)
61
- make_new_str(string) { |cp, new_str| include?(cp) && (new_str << cp) }
63
+ utf8_str!(string).each_codepoint.with_object('') do |cp, new_str|
64
+ include?(cp) && (new_str << cp)
65
+ end.encode(string.encoding)
62
66
  end
63
67
 
64
68
  def keep_in!(string)
@@ -67,14 +71,13 @@ class CharacterSet
67
71
  end
68
72
 
69
73
  def scan(string)
70
- encoding = str!(string).encoding
71
- string.each_codepoint.inject([]) do |arr, cp|
72
- include?(cp) ? arr.push(cp.chr(encoding)) : arr
74
+ utf8_str!(string).each_codepoint.with_object([]) do |cp, arr|
75
+ arr.push(cp.chr('utf-8')) if include?(cp)
73
76
  end
74
77
  end
75
78
 
76
79
  def used_by?(string)
77
- str!(string).each_codepoint { |cp| return true if include?(cp) }
80
+ utf8_str!(string).each_codepoint { |cp| return true if include?(cp) }
78
81
  false
79
82
  end
80
83
 
@@ -115,16 +118,9 @@ class CharacterSet
115
118
  num >= 0 && num <= 16 or raise ArgumentError, 'plane must be between 0 and 16'
116
119
  end
117
120
 
118
- def str!(obj)
121
+ def utf8_str!(obj)
119
122
  raise ArgumentError, 'pass a String' unless obj.respond_to?(:codepoints)
120
- obj
121
- end
122
-
123
- def make_new_str(original, &block)
124
- new_string = str!(original)
125
- .each_codepoint
126
- .each_with_object(''.encode(original.encoding), &block)
127
- original.tainted? ? new_string.taint : new_string
123
+ obj.encode('utf-8')
128
124
  end
129
125
  end
130
126
  end
@@ -1,7 +1,9 @@
1
1
  class CharacterSet
2
2
  module RubyFallback
3
3
  module SetMethods
4
- Enumerable.instance_methods.concat(%w[empty? length size]).each do |mthd|
4
+ (Enumerable.instance_methods -
5
+ %i[include? member? to_a] +
6
+ %i[empty? hash length size]).each do |mthd|
5
7
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
6
8
  def #{mthd}(*args, &block)
7
9
  @__set.#{mthd}(*args, &block)
@@ -9,8 +11,8 @@ class CharacterSet
9
11
  RUBY
10
12
  end
11
13
 
12
- %w[< <= > >= disjoint? intersect? proper_subset? proper_superset?
13
- subset? superset?].each do |mthd|
14
+ %i[< <= <=> > >= === disjoint? include? intersect? member?
15
+ proper_subset? proper_superset? subset? superset?].each do |mthd|
14
16
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
15
17
  def #{mthd}(enum, &block)
16
18
  if enum.is_a?(CharacterSet) || enum.is_a?(CharacterSet::Pure)
@@ -21,9 +23,8 @@ class CharacterSet
21
23
  RUBY
22
24
  end
23
25
 
24
- %w[<< === add add? clear collect! delete delete? delete_if
25
- each filter! hash include? map! member? keep_if reject!
26
- select! subtract].each do |mthd|
26
+ %i[<< add add? clear delete delete? delete_if each filter! keep_if
27
+ reject! select! subtract].each do |mthd|
27
28
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
28
29
  def #{mthd}(*args, &block)
29
30
  result = @__set.#{mthd}(*args, &block)
@@ -32,7 +33,7 @@ class CharacterSet
32
33
  RUBY
33
34
  end
34
35
 
35
- %w[& + - ^ | difference intersection union].each do |mthd|
36
+ %i[& + - ^ | difference intersection union].each do |mthd|
36
37
  class_eval <<-RUBY, __FILE__, __LINE__ + 1
37
38
  def #{mthd}(enum, &block)
38
39
  if enum.respond_to?(:map)
@@ -43,15 +44,6 @@ class CharacterSet
43
44
  RUBY
44
45
  end
45
46
 
46
- %w[taint untaint].each do |mthd|
47
- class_eval <<-RUBY, __FILE__, __LINE__ + 1
48
- def #{mthd}
49
- @__set.#{mthd}
50
- super
51
- end
52
- RUBY
53
- end
54
-
55
47
  unless RUBY_PLATFORM[/java/i]
56
48
  def freeze
57
49
  @__set.to_a
@@ -81,6 +73,7 @@ class CharacterSet
81
73
 
82
74
  def eql?(other)
83
75
  return false unless other.is_a?(self.class)
76
+
84
77
  @__set.eql?(other.instance_variable_get(:@__set))
85
78
  end
86
79