character_set 1.1.1-java → 1.4.1-java

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (54) hide show
  1. checksums.yaml +4 -4
  2. data/.gitattributes +3 -0
  3. data/.github/workflows/lint.yml +29 -0
  4. data/.github/workflows/tests.yml +22 -0
  5. data/.gitignore +1 -0
  6. data/.rubocop.yml +11 -0
  7. data/BENCHMARK.md +53 -17
  8. data/CHANGELOG.md +47 -0
  9. data/README.md +38 -14
  10. data/Rakefile +60 -36
  11. data/benchmarks/count_in.rb +13 -0
  12. data/benchmarks/delete_in.rb +1 -1
  13. data/benchmarks/scan.rb +13 -0
  14. data/benchmarks/shared.rb +5 -0
  15. data/benchmarks/z_add.rb +12 -0
  16. data/benchmarks/z_delete.rb +12 -0
  17. data/benchmarks/z_merge.rb +15 -0
  18. data/benchmarks/z_minmax.rb +12 -0
  19. data/bin/console +2 -0
  20. data/character_set.gemspec +17 -6
  21. data/ext/character_set/character_set.c +963 -414
  22. data/ext/character_set/unicode_casefold_table.h +10 -2
  23. data/ext/character_set/unicode_casefold_table.h.tmpl +11 -0
  24. data/lib/character_set/character.rb +1 -1
  25. data/lib/character_set/core_ext/regexp_ext.rb +1 -1
  26. data/lib/character_set/core_ext/string_ext.rb +3 -1
  27. data/lib/character_set/expression_converter.rb +25 -27
  28. data/lib/character_set/parser.rb +1 -1
  29. data/lib/character_set/predefined_sets.rb +25 -260
  30. data/lib/character_set/predefined_sets/any.cps +1 -0
  31. data/lib/character_set/predefined_sets/ascii.cps +1 -0
  32. data/lib/character_set/predefined_sets/ascii_alnum.cps +3 -0
  33. data/lib/character_set/predefined_sets/ascii_letter.cps +2 -0
  34. data/lib/character_set/predefined_sets/assigned.cps +666 -0
  35. data/lib/character_set/predefined_sets/bmp.cps +2 -0
  36. data/lib/character_set/predefined_sets/crypt.cps +2 -0
  37. data/lib/character_set/predefined_sets/emoji.cps +151 -0
  38. data/lib/character_set/predefined_sets/newline.cps +3 -0
  39. data/lib/character_set/predefined_sets/surrogate.cps +1 -0
  40. data/lib/character_set/predefined_sets/unicode.cps +2 -0
  41. data/lib/character_set/predefined_sets/url_fragment.cps +8 -0
  42. data/lib/character_set/predefined_sets/url_host.cps +10 -0
  43. data/lib/character_set/predefined_sets/url_path.cps +7 -0
  44. data/lib/character_set/predefined_sets/url_query.cps +8 -0
  45. data/lib/character_set/predefined_sets/whitespace.cps +10 -0
  46. data/lib/character_set/ruby_fallback.rb +5 -3
  47. data/lib/character_set/ruby_fallback/character_set_methods.rb +53 -6
  48. data/lib/character_set/ruby_fallback/set_methods.rb +25 -17
  49. data/lib/character_set/shared_methods.rb +60 -49
  50. data/lib/character_set/version.rb +1 -1
  51. data/lib/character_set/writer.rb +98 -27
  52. metadata +102 -22
  53. data/.travis.yml +0 -11
  54. data/lib/character_set/ruby_fallback/plane_methods.rb +0 -27
@@ -0,0 +1,3 @@
1
+ 30,39
2
+ 41,5A
3
+ 61,7A
@@ -0,0 +1,666 @@
1
+ 0,377
2
+ 37A,37F
3
+ 384,38A
4
+ 38C,38C
5
+ 38E,3A1
6
+ 3A3,52F
7
+ 531,556
8
+ 559,58A
9
+ 58D,58F
10
+ 591,5C7
11
+ 5D0,5EA
12
+ 5EF,5F4
13
+ 600,61C
14
+ 61E,70D
15
+ 70F,74A
16
+ 74D,7B1
17
+ 7C0,7FA
18
+ 7FD,82D
19
+ 830,83E
20
+ 840,85B
21
+ 85E,85E
22
+ 860,86A
23
+ 8A0,8B4
24
+ 8B6,8BD
25
+ 8D3,983
26
+ 985,98C
27
+ 98F,990
28
+ 993,9A8
29
+ 9AA,9B0
30
+ 9B2,9B2
31
+ 9B6,9B9
32
+ 9BC,9C4
33
+ 9C7,9C8
34
+ 9CB,9CE
35
+ 9D7,9D7
36
+ 9DC,9DD
37
+ 9DF,9E3
38
+ 9E6,9FE
39
+ A01,A03
40
+ A05,A0A
41
+ A0F,A10
42
+ A13,A28
43
+ A2A,A30
44
+ A32,A33
45
+ A35,A36
46
+ A38,A39
47
+ A3C,A3C
48
+ A3E,A42
49
+ A47,A48
50
+ A4B,A4D
51
+ A51,A51
52
+ A59,A5C
53
+ A5E,A5E
54
+ A66,A76
55
+ A81,A83
56
+ A85,A8D
57
+ A8F,A91
58
+ A93,AA8
59
+ AAA,AB0
60
+ AB2,AB3
61
+ AB5,AB9
62
+ ABC,AC5
63
+ AC7,AC9
64
+ ACB,ACD
65
+ AD0,AD0
66
+ AE0,AE3
67
+ AE6,AF1
68
+ AF9,AFF
69
+ B01,B03
70
+ B05,B0C
71
+ B0F,B10
72
+ B13,B28
73
+ B2A,B30
74
+ B32,B33
75
+ B35,B39
76
+ B3C,B44
77
+ B47,B48
78
+ B4B,B4D
79
+ B56,B57
80
+ B5C,B5D
81
+ B5F,B63
82
+ B66,B77
83
+ B82,B83
84
+ B85,B8A
85
+ B8E,B90
86
+ B92,B95
87
+ B99,B9A
88
+ B9C,B9C
89
+ B9E,B9F
90
+ BA3,BA4
91
+ BA8,BAA
92
+ BAE,BB9
93
+ BBE,BC2
94
+ BC6,BC8
95
+ BCA,BCD
96
+ BD0,BD0
97
+ BD7,BD7
98
+ BE6,BFA
99
+ C00,C0C
100
+ C0E,C10
101
+ C12,C28
102
+ C2A,C39
103
+ C3D,C44
104
+ C46,C48
105
+ C4A,C4D
106
+ C55,C56
107
+ C58,C5A
108
+ C60,C63
109
+ C66,C6F
110
+ C77,C8C
111
+ C8E,C90
112
+ C92,CA8
113
+ CAA,CB3
114
+ CB5,CB9
115
+ CBC,CC4
116
+ CC6,CC8
117
+ CCA,CCD
118
+ CD5,CD6
119
+ CDE,CDE
120
+ CE0,CE3
121
+ CE6,CEF
122
+ CF1,CF2
123
+ D00,D03
124
+ D05,D0C
125
+ D0E,D10
126
+ D12,D44
127
+ D46,D48
128
+ D4A,D4F
129
+ D54,D63
130
+ D66,D7F
131
+ D82,D83
132
+ D85,D96
133
+ D9A,DB1
134
+ DB3,DBB
135
+ DBD,DBD
136
+ DC0,DC6
137
+ DCA,DCA
138
+ DCF,DD4
139
+ DD6,DD6
140
+ DD8,DDF
141
+ DE6,DEF
142
+ DF2,DF4
143
+ E01,E3A
144
+ E3F,E5B
145
+ E81,E82
146
+ E84,E84
147
+ E86,E8A
148
+ E8C,EA3
149
+ EA5,EA5
150
+ EA7,EBD
151
+ EC0,EC4
152
+ EC6,EC6
153
+ EC8,ECD
154
+ ED0,ED9
155
+ EDC,EDF
156
+ F00,F47
157
+ F49,F6C
158
+ F71,F97
159
+ F99,FBC
160
+ FBE,FCC
161
+ FCE,FDA
162
+ 1000,10C5
163
+ 10C7,10C7
164
+ 10CD,10CD
165
+ 10D0,1248
166
+ 124A,124D
167
+ 1250,1256
168
+ 1258,1258
169
+ 125A,125D
170
+ 1260,1288
171
+ 128A,128D
172
+ 1290,12B0
173
+ 12B2,12B5
174
+ 12B8,12BE
175
+ 12C0,12C0
176
+ 12C2,12C5
177
+ 12C8,12D6
178
+ 12D8,1310
179
+ 1312,1315
180
+ 1318,135A
181
+ 135D,137C
182
+ 1380,1399
183
+ 13A0,13F5
184
+ 13F8,13FD
185
+ 1400,169C
186
+ 16A0,16F8
187
+ 1700,170C
188
+ 170E,1714
189
+ 1720,1736
190
+ 1740,1753
191
+ 1760,176C
192
+ 176E,1770
193
+ 1772,1773
194
+ 1780,17DD
195
+ 17E0,17E9
196
+ 17F0,17F9
197
+ 1800,180E
198
+ 1810,1819
199
+ 1820,1878
200
+ 1880,18AA
201
+ 18B0,18F5
202
+ 1900,191E
203
+ 1920,192B
204
+ 1930,193B
205
+ 1940,1940
206
+ 1944,196D
207
+ 1970,1974
208
+ 1980,19AB
209
+ 19B0,19C9
210
+ 19D0,19DA
211
+ 19DE,1A1B
212
+ 1A1E,1A5E
213
+ 1A60,1A7C
214
+ 1A7F,1A89
215
+ 1A90,1A99
216
+ 1AA0,1AAD
217
+ 1AB0,1ABE
218
+ 1B00,1B4B
219
+ 1B50,1B7C
220
+ 1B80,1BF3
221
+ 1BFC,1C37
222
+ 1C3B,1C49
223
+ 1C4D,1C88
224
+ 1C90,1CBA
225
+ 1CBD,1CC7
226
+ 1CD0,1CFA
227
+ 1D00,1DF9
228
+ 1DFB,1F15
229
+ 1F18,1F1D
230
+ 1F20,1F45
231
+ 1F48,1F4D
232
+ 1F50,1F57
233
+ 1F59,1F59
234
+ 1F5B,1F5B
235
+ 1F5D,1F5D
236
+ 1F5F,1F7D
237
+ 1F80,1FB4
238
+ 1FB6,1FC4
239
+ 1FC6,1FD3
240
+ 1FD6,1FDB
241
+ 1FDD,1FEF
242
+ 1FF2,1FF4
243
+ 1FF6,1FFE
244
+ 2000,2064
245
+ 2066,2071
246
+ 2074,208E
247
+ 2090,209C
248
+ 20A0,20BF
249
+ 20D0,20F0
250
+ 2100,218B
251
+ 2190,2426
252
+ 2440,244A
253
+ 2460,2B73
254
+ 2B76,2B95
255
+ 2B98,2C2E
256
+ 2C30,2C5E
257
+ 2C60,2CF3
258
+ 2CF9,2D25
259
+ 2D27,2D27
260
+ 2D2D,2D2D
261
+ 2D30,2D67
262
+ 2D6F,2D70
263
+ 2D7F,2D96
264
+ 2DA0,2DA6
265
+ 2DA8,2DAE
266
+ 2DB0,2DB6
267
+ 2DB8,2DBE
268
+ 2DC0,2DC6
269
+ 2DC8,2DCE
270
+ 2DD0,2DD6
271
+ 2DD8,2DDE
272
+ 2DE0,2E4F
273
+ 2E80,2E99
274
+ 2E9B,2EF3
275
+ 2F00,2FD5
276
+ 2FF0,2FFB
277
+ 3000,303F
278
+ 3041,3096
279
+ 3099,30FF
280
+ 3105,312F
281
+ 3131,318E
282
+ 3190,31BA
283
+ 31C0,31E3
284
+ 31F0,321E
285
+ 3220,4DB5
286
+ 4DC0,9FEF
287
+ A000,A48C
288
+ A490,A4C6
289
+ A4D0,A62B
290
+ A640,A6F7
291
+ A700,A7BF
292
+ A7C2,A7C6
293
+ A7F7,A82B
294
+ A830,A839
295
+ A840,A877
296
+ A880,A8C5
297
+ A8CE,A8D9
298
+ A8E0,A953
299
+ A95F,A97C
300
+ A980,A9CD
301
+ A9CF,A9D9
302
+ A9DE,A9FE
303
+ AA00,AA36
304
+ AA40,AA4D
305
+ AA50,AA59
306
+ AA5C,AAC2
307
+ AADB,AAF6
308
+ AB01,AB06
309
+ AB09,AB0E
310
+ AB11,AB16
311
+ AB20,AB26
312
+ AB28,AB2E
313
+ AB30,AB67
314
+ AB70,ABED
315
+ ABF0,ABF9
316
+ AC00,D7A3
317
+ D7B0,D7C6
318
+ D7CB,D7FB
319
+ D800,FA6D
320
+ FA70,FAD9
321
+ FB00,FB06
322
+ FB13,FB17
323
+ FB1D,FB36
324
+ FB38,FB3C
325
+ FB3E,FB3E
326
+ FB40,FB41
327
+ FB43,FB44
328
+ FB46,FBC1
329
+ FBD3,FD3F
330
+ FD50,FD8F
331
+ FD92,FDC7
332
+ FDF0,FDFD
333
+ FE00,FE19
334
+ FE20,FE52
335
+ FE54,FE66
336
+ FE68,FE6B
337
+ FE70,FE74
338
+ FE76,FEFC
339
+ FEFF,FEFF
340
+ FF01,FFBE
341
+ FFC2,FFC7
342
+ FFCA,FFCF
343
+ FFD2,FFD7
344
+ FFDA,FFDC
345
+ FFE0,FFE6
346
+ FFE8,FFEE
347
+ FFF9,FFFD
348
+ 10000,1000B
349
+ 1000D,10026
350
+ 10028,1003A
351
+ 1003C,1003D
352
+ 1003F,1004D
353
+ 10050,1005D
354
+ 10080,100FA
355
+ 10100,10102
356
+ 10107,10133
357
+ 10137,1018E
358
+ 10190,1019B
359
+ 101A0,101A0
360
+ 101D0,101FD
361
+ 10280,1029C
362
+ 102A0,102D0
363
+ 102E0,102FB
364
+ 10300,10323
365
+ 1032D,1034A
366
+ 10350,1037A
367
+ 10380,1039D
368
+ 1039F,103C3
369
+ 103C8,103D5
370
+ 10400,1049D
371
+ 104A0,104A9
372
+ 104B0,104D3
373
+ 104D8,104FB
374
+ 10500,10527
375
+ 10530,10563
376
+ 1056F,1056F
377
+ 10600,10736
378
+ 10740,10755
379
+ 10760,10767
380
+ 10800,10805
381
+ 10808,10808
382
+ 1080A,10835
383
+ 10837,10838
384
+ 1083C,1083C
385
+ 1083F,10855
386
+ 10857,1089E
387
+ 108A7,108AF
388
+ 108E0,108F2
389
+ 108F4,108F5
390
+ 108FB,1091B
391
+ 1091F,10939
392
+ 1093F,1093F
393
+ 10980,109B7
394
+ 109BC,109CF
395
+ 109D2,10A03
396
+ 10A05,10A06
397
+ 10A0C,10A13
398
+ 10A15,10A17
399
+ 10A19,10A35
400
+ 10A38,10A3A
401
+ 10A3F,10A48
402
+ 10A50,10A58
403
+ 10A60,10A9F
404
+ 10AC0,10AE6
405
+ 10AEB,10AF6
406
+ 10B00,10B35
407
+ 10B39,10B55
408
+ 10B58,10B72
409
+ 10B78,10B91
410
+ 10B99,10B9C
411
+ 10BA9,10BAF
412
+ 10C00,10C48
413
+ 10C80,10CB2
414
+ 10CC0,10CF2
415
+ 10CFA,10D27
416
+ 10D30,10D39
417
+ 10E60,10E7E
418
+ 10F00,10F27
419
+ 10F30,10F59
420
+ 10FE0,10FF6
421
+ 11000,1104D
422
+ 11052,1106F
423
+ 1107F,110C1
424
+ 110CD,110CD
425
+ 110D0,110E8
426
+ 110F0,110F9
427
+ 11100,11134
428
+ 11136,11146
429
+ 11150,11176
430
+ 11180,111CD
431
+ 111D0,111DF
432
+ 111E1,111F4
433
+ 11200,11211
434
+ 11213,1123E
435
+ 11280,11286
436
+ 11288,11288
437
+ 1128A,1128D
438
+ 1128F,1129D
439
+ 1129F,112A9
440
+ 112B0,112EA
441
+ 112F0,112F9
442
+ 11300,11303
443
+ 11305,1130C
444
+ 1130F,11310
445
+ 11313,11328
446
+ 1132A,11330
447
+ 11332,11333
448
+ 11335,11339
449
+ 1133B,11344
450
+ 11347,11348
451
+ 1134B,1134D
452
+ 11350,11350
453
+ 11357,11357
454
+ 1135D,11363
455
+ 11366,1136C
456
+ 11370,11374
457
+ 11400,11459
458
+ 1145B,1145B
459
+ 1145D,1145F
460
+ 11480,114C7
461
+ 114D0,114D9
462
+ 11580,115B5
463
+ 115B8,115DD
464
+ 11600,11644
465
+ 11650,11659
466
+ 11660,1166C
467
+ 11680,116B8
468
+ 116C0,116C9
469
+ 11700,1171A
470
+ 1171D,1172B
471
+ 11730,1173F
472
+ 11800,1183B
473
+ 118A0,118F2
474
+ 118FF,118FF
475
+ 119A0,119A7
476
+ 119AA,119D7
477
+ 119DA,119E4
478
+ 11A00,11A47
479
+ 11A50,11AA2
480
+ 11AC0,11AF8
481
+ 11C00,11C08
482
+ 11C0A,11C36
483
+ 11C38,11C45
484
+ 11C50,11C6C
485
+ 11C70,11C8F
486
+ 11C92,11CA7
487
+ 11CA9,11CB6
488
+ 11D00,11D06
489
+ 11D08,11D09
490
+ 11D0B,11D36
491
+ 11D3A,11D3A
492
+ 11D3C,11D3D
493
+ 11D3F,11D47
494
+ 11D50,11D59
495
+ 11D60,11D65
496
+ 11D67,11D68
497
+ 11D6A,11D8E
498
+ 11D90,11D91
499
+ 11D93,11D98
500
+ 11DA0,11DA9
501
+ 11EE0,11EF8
502
+ 11FC0,11FF1
503
+ 11FFF,12399
504
+ 12400,1246E
505
+ 12470,12474
506
+ 12480,12543
507
+ 13000,1342E
508
+ 13430,13438
509
+ 14400,14646
510
+ 16800,16A38
511
+ 16A40,16A5E
512
+ 16A60,16A69
513
+ 16A6E,16A6F
514
+ 16AD0,16AED
515
+ 16AF0,16AF5
516
+ 16B00,16B45
517
+ 16B50,16B59
518
+ 16B5B,16B61
519
+ 16B63,16B77
520
+ 16B7D,16B8F
521
+ 16E40,16E9A
522
+ 16F00,16F4A
523
+ 16F4F,16F87
524
+ 16F8F,16F9F
525
+ 16FE0,16FE3
526
+ 17000,187F7
527
+ 18800,18AF2
528
+ 1B000,1B11E
529
+ 1B150,1B152
530
+ 1B164,1B167
531
+ 1B170,1B2FB
532
+ 1BC00,1BC6A
533
+ 1BC70,1BC7C
534
+ 1BC80,1BC88
535
+ 1BC90,1BC99
536
+ 1BC9C,1BCA3
537
+ 1D000,1D0F5
538
+ 1D100,1D126
539
+ 1D129,1D1E8
540
+ 1D200,1D245
541
+ 1D2E0,1D2F3
542
+ 1D300,1D356
543
+ 1D360,1D378
544
+ 1D400,1D454
545
+ 1D456,1D49C
546
+ 1D49E,1D49F
547
+ 1D4A2,1D4A2
548
+ 1D4A5,1D4A6
549
+ 1D4A9,1D4AC
550
+ 1D4AE,1D4B9
551
+ 1D4BB,1D4BB
552
+ 1D4BD,1D4C3
553
+ 1D4C5,1D505
554
+ 1D507,1D50A
555
+ 1D50D,1D514
556
+ 1D516,1D51C
557
+ 1D51E,1D539
558
+ 1D53B,1D53E
559
+ 1D540,1D544
560
+ 1D546,1D546
561
+ 1D54A,1D550
562
+ 1D552,1D6A5
563
+ 1D6A8,1D7CB
564
+ 1D7CE,1DA8B
565
+ 1DA9B,1DA9F
566
+ 1DAA1,1DAAF
567
+ 1E000,1E006
568
+ 1E008,1E018
569
+ 1E01B,1E021
570
+ 1E023,1E024
571
+ 1E026,1E02A
572
+ 1E100,1E12C
573
+ 1E130,1E13D
574
+ 1E140,1E149
575
+ 1E14E,1E14F
576
+ 1E2C0,1E2F9
577
+ 1E2FF,1E2FF
578
+ 1E800,1E8C4
579
+ 1E8C7,1E8D6
580
+ 1E900,1E94B
581
+ 1E950,1E959
582
+ 1E95E,1E95F
583
+ 1EC71,1ECB4
584
+ 1ED01,1ED3D
585
+ 1EE00,1EE03
586
+ 1EE05,1EE1F
587
+ 1EE21,1EE22
588
+ 1EE24,1EE24
589
+ 1EE27,1EE27
590
+ 1EE29,1EE32
591
+ 1EE34,1EE37
592
+ 1EE39,1EE39
593
+ 1EE3B,1EE3B
594
+ 1EE42,1EE42
595
+ 1EE47,1EE47
596
+ 1EE49,1EE49
597
+ 1EE4B,1EE4B
598
+ 1EE4D,1EE4F
599
+ 1EE51,1EE52
600
+ 1EE54,1EE54
601
+ 1EE57,1EE57
602
+ 1EE59,1EE59
603
+ 1EE5B,1EE5B
604
+ 1EE5D,1EE5D
605
+ 1EE5F,1EE5F
606
+ 1EE61,1EE62
607
+ 1EE64,1EE64
608
+ 1EE67,1EE6A
609
+ 1EE6C,1EE72
610
+ 1EE74,1EE77
611
+ 1EE79,1EE7C
612
+ 1EE7E,1EE7E
613
+ 1EE80,1EE89
614
+ 1EE8B,1EE9B
615
+ 1EEA1,1EEA3
616
+ 1EEA5,1EEA9
617
+ 1EEAB,1EEBB
618
+ 1EEF0,1EEF1
619
+ 1F000,1F02B
620
+ 1F030,1F093
621
+ 1F0A0,1F0AE
622
+ 1F0B1,1F0BF
623
+ 1F0C1,1F0CF
624
+ 1F0D1,1F0F5
625
+ 1F100,1F10C
626
+ 1F110,1F16C
627
+ 1F170,1F1AC
628
+ 1F1E6,1F202
629
+ 1F210,1F23B
630
+ 1F240,1F248
631
+ 1F250,1F251
632
+ 1F260,1F265
633
+ 1F300,1F6D5
634
+ 1F6E0,1F6EC
635
+ 1F6F0,1F6FA
636
+ 1F700,1F773
637
+ 1F780,1F7D8
638
+ 1F7E0,1F7EB
639
+ 1F800,1F80B
640
+ 1F810,1F847
641
+ 1F850,1F859
642
+ 1F860,1F887
643
+ 1F890,1F8AD
644
+ 1F900,1F90B
645
+ 1F90D,1F971
646
+ 1F973,1F976
647
+ 1F97A,1F9A2
648
+ 1F9A5,1F9AA
649
+ 1F9AE,1F9CA
650
+ 1F9CD,1FA53
651
+ 1FA60,1FA6D
652
+ 1FA70,1FA73
653
+ 1FA78,1FA7A
654
+ 1FA80,1FA82
655
+ 1FA90,1FA95
656
+ 20000,2A6D6
657
+ 2A700,2B734
658
+ 2B740,2B81D
659
+ 2B820,2CEA1
660
+ 2CEB0,2EBE0
661
+ 2F800,2FA1D
662
+ E0001,E0001
663
+ E0020,E007F
664
+ E0100,E01EF
665
+ F0000,FFFFD
666
+ 100000,10FFFD