unicode_script_detector 0.0.6 → 0.0.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/unicode_script_detector/scripts.rb +113 -69
- data/lib/unicode_script_detector/version.rb +1 -1
- metadata +1 -1
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: a0febe236b556e42077b401d8e117b3996c6065dcbb33c974a572a8af64d14a4
|
|
4
|
+
data.tar.gz: f8ef874b90e0ca8e387bc16d5fc947745fcbe34bbd8ea53c328f98d8c3d8eddd
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: c5b931f1a8f527900f9c37ba0af67ca4e1fbfc0a0fde2f382fc1c2853a2c8813654f1d334f692e39882421eb3a4825f8e056d91d92d58fb4c2c1c22266c347b3
|
|
7
|
+
data.tar.gz: 69ac1e2314cefd944af0958cba15764afdf05e6e6eb314b68bc75172c7d1085be00725d40df5fe7f1af23abccc0243fa5d134c0b590da62d83913dce6570f0c1
|
|
@@ -1,16 +1,6 @@
|
|
|
1
1
|
module UnicodeScriptDetector
|
|
2
2
|
class Scripts
|
|
3
3
|
LIST = [
|
|
4
|
-
{
|
|
5
|
-
script: :Whitespace,
|
|
6
|
-
name: "Whitespace",
|
|
7
|
-
regex: /\s/
|
|
8
|
-
},
|
|
9
|
-
{
|
|
10
|
-
script: :Digit,
|
|
11
|
-
name: "Digit",
|
|
12
|
-
regex: /\d/
|
|
13
|
-
},
|
|
14
4
|
{
|
|
15
5
|
script: :Adlam,
|
|
16
6
|
name: "Adlam",
|
|
@@ -23,7 +13,7 @@ module UnicodeScriptDetector
|
|
|
23
13
|
},
|
|
24
14
|
{
|
|
25
15
|
script: :Anatolian_Hieroglyphs,
|
|
26
|
-
name: "
|
|
16
|
+
name: "Anatolian Hieroglyphs",
|
|
27
17
|
regex: /\p{Anatolian_Hieroglyphs}/,
|
|
28
18
|
},
|
|
29
19
|
{
|
|
@@ -53,7 +43,7 @@ module UnicodeScriptDetector
|
|
|
53
43
|
},
|
|
54
44
|
{
|
|
55
45
|
script: :Bassa_Vah,
|
|
56
|
-
name: "
|
|
46
|
+
name: "Bassa Vah",
|
|
57
47
|
regex: /\p{Bassa_Vah}/,
|
|
58
48
|
},
|
|
59
49
|
{
|
|
@@ -98,7 +88,7 @@ module UnicodeScriptDetector
|
|
|
98
88
|
},
|
|
99
89
|
{
|
|
100
90
|
script: :Canadian_Aboriginal,
|
|
101
|
-
name: "
|
|
91
|
+
name: "Canadian Aboriginal",
|
|
102
92
|
regex: /\p{Canadian_Aboriginal}/,
|
|
103
93
|
},
|
|
104
94
|
{
|
|
@@ -108,7 +98,7 @@ module UnicodeScriptDetector
|
|
|
108
98
|
},
|
|
109
99
|
{
|
|
110
100
|
script: :Caucasian_Albanian,
|
|
111
|
-
name: "
|
|
101
|
+
name: "Caucasian Albanian",
|
|
112
102
|
regex: /\p{Caucasian_Albanian}/,
|
|
113
103
|
},
|
|
114
104
|
{
|
|
@@ -148,7 +138,7 @@ module UnicodeScriptDetector
|
|
|
148
138
|
},
|
|
149
139
|
{
|
|
150
140
|
script: :Cypro_Minoan,
|
|
151
|
-
name: "
|
|
141
|
+
name: "Cypro Minoan",
|
|
152
142
|
regex: /\p{Cypro_Minoan}/,
|
|
153
143
|
},
|
|
154
144
|
{
|
|
@@ -168,7 +158,7 @@ module UnicodeScriptDetector
|
|
|
168
158
|
},
|
|
169
159
|
{
|
|
170
160
|
script: :Dives_Akuru,
|
|
171
|
-
name: "
|
|
161
|
+
name: "Dives Akuru",
|
|
172
162
|
regex: /\p{Dives_Akuru}/,
|
|
173
163
|
},
|
|
174
164
|
{
|
|
@@ -183,8 +173,8 @@ module UnicodeScriptDetector
|
|
|
183
173
|
},
|
|
184
174
|
{
|
|
185
175
|
script: :Egyptian_Hieroglyphs,
|
|
186
|
-
name: "
|
|
187
|
-
regex: /\p{Egyptian_Hieroglyphs}/,
|
|
176
|
+
name: "Egyptian Hieroglyphs",
|
|
177
|
+
regex: /\p{Egyptian_Hieroglyphs}|[\u{13460}-\u{1355F}]/,
|
|
188
178
|
},
|
|
189
179
|
{
|
|
190
180
|
script: :Elbasan,
|
|
@@ -201,7 +191,11 @@ module UnicodeScriptDetector
|
|
|
201
191
|
name: "Ethiopic",
|
|
202
192
|
regex: /\p{Ethiopic}/,
|
|
203
193
|
},
|
|
204
|
-
|
|
194
|
+
{
|
|
195
|
+
script: :Garay,
|
|
196
|
+
name: "Garay",
|
|
197
|
+
regex: /[\u{10D40}-\u{10D8F}]/,
|
|
198
|
+
},
|
|
205
199
|
{
|
|
206
200
|
script: :Georgian,
|
|
207
201
|
name: "Georgian",
|
|
@@ -234,7 +228,7 @@ module UnicodeScriptDetector
|
|
|
234
228
|
},
|
|
235
229
|
{
|
|
236
230
|
script: :Gunjala_Gondi,
|
|
237
|
-
name: "
|
|
231
|
+
name: "Gunjala Gondi",
|
|
238
232
|
regex: /\p{Gunjala_Gondi}/,
|
|
239
233
|
},
|
|
240
234
|
{
|
|
@@ -242,11 +236,15 @@ module UnicodeScriptDetector
|
|
|
242
236
|
name: "Gurmukhi",
|
|
243
237
|
regex: /\p{Gurmukhi}/,
|
|
244
238
|
},
|
|
245
|
-
|
|
239
|
+
{
|
|
240
|
+
script: :Gurung_Khema,
|
|
241
|
+
name: "Gurung Khema",
|
|
242
|
+
regex: /[\u{16100}-\u{1613F}]/,
|
|
243
|
+
},
|
|
246
244
|
{
|
|
247
245
|
script: :Han,
|
|
248
246
|
name: "Han",
|
|
249
|
-
regex: /\p{Han}/,
|
|
247
|
+
regex: /\p{Han}|[\u{323B0}-\u{3347F}]/,
|
|
250
248
|
},
|
|
251
249
|
{
|
|
252
250
|
script: :Hangul,
|
|
@@ -255,7 +253,7 @@ module UnicodeScriptDetector
|
|
|
255
253
|
},
|
|
256
254
|
{
|
|
257
255
|
script: :Hanifi_Rohingya,
|
|
258
|
-
name: "
|
|
256
|
+
name: "Hanifi Rohingya",
|
|
259
257
|
regex: /\p{Hanifi_Rohingya}/,
|
|
260
258
|
},
|
|
261
259
|
{
|
|
@@ -280,7 +278,7 @@ module UnicodeScriptDetector
|
|
|
280
278
|
},
|
|
281
279
|
{
|
|
282
280
|
script: :Imperial_Aramaic,
|
|
283
|
-
name: "
|
|
281
|
+
name: "Imperial Aramaic",
|
|
284
282
|
regex: /\p{Imperial_Aramaic}/,
|
|
285
283
|
},
|
|
286
284
|
{
|
|
@@ -290,12 +288,12 @@ module UnicodeScriptDetector
|
|
|
290
288
|
},
|
|
291
289
|
{
|
|
292
290
|
script: :Inscriptional_Pahlavi,
|
|
293
|
-
name: "
|
|
291
|
+
name: "Inscriptional Pahlavi",
|
|
294
292
|
regex: /\p{Inscriptional_Pahlavi}/,
|
|
295
293
|
},
|
|
296
294
|
{
|
|
297
295
|
script: :Inscriptional_Parthian,
|
|
298
|
-
name: "
|
|
296
|
+
name: "Inscriptional Parthian",
|
|
299
297
|
regex: /\p{Inscriptional_Parthian}/,
|
|
300
298
|
},
|
|
301
299
|
{
|
|
@@ -318,7 +316,6 @@ module UnicodeScriptDetector
|
|
|
318
316
|
name: "Katakana",
|
|
319
317
|
regex: /\p{Katakana}/,
|
|
320
318
|
},
|
|
321
|
-
|
|
322
319
|
{
|
|
323
320
|
script: :Kawi,
|
|
324
321
|
name: "Kawi",
|
|
@@ -326,7 +323,7 @@ module UnicodeScriptDetector
|
|
|
326
323
|
},
|
|
327
324
|
{
|
|
328
325
|
script: :Kayah_Li,
|
|
329
|
-
name: "
|
|
326
|
+
name: "Kayah Li",
|
|
330
327
|
regex: /\p{Kayah_Li}/,
|
|
331
328
|
},
|
|
332
329
|
{
|
|
@@ -336,7 +333,7 @@ module UnicodeScriptDetector
|
|
|
336
333
|
},
|
|
337
334
|
{
|
|
338
335
|
script: :Khitan_Small_Script,
|
|
339
|
-
name: "
|
|
336
|
+
name: "Khitan Small Script",
|
|
340
337
|
regex: /\p{Khitan_Small_Script}/,
|
|
341
338
|
},
|
|
342
339
|
{
|
|
@@ -354,7 +351,11 @@ module UnicodeScriptDetector
|
|
|
354
351
|
name: "Khudawadi",
|
|
355
352
|
regex: /\p{Khudawadi}/,
|
|
356
353
|
},
|
|
357
|
-
|
|
354
|
+
{
|
|
355
|
+
script: :Kirat_Rai,
|
|
356
|
+
name: "Kirat Rai",
|
|
357
|
+
regex: /[\u{16D40}-\u{16D7F}]/,
|
|
358
|
+
},
|
|
358
359
|
{
|
|
359
360
|
script: :Lao,
|
|
360
361
|
name: "Lao",
|
|
@@ -377,12 +378,12 @@ module UnicodeScriptDetector
|
|
|
377
378
|
},
|
|
378
379
|
{
|
|
379
380
|
script: :Linear_A,
|
|
380
|
-
name: "
|
|
381
|
+
name: "Linear A",
|
|
381
382
|
regex: /\p{Linear_A}/,
|
|
382
383
|
},
|
|
383
384
|
{
|
|
384
385
|
script: :Linear_B,
|
|
385
|
-
name: "
|
|
386
|
+
name: "Linear B",
|
|
386
387
|
regex: /\p{Linear_B}/,
|
|
387
388
|
},
|
|
388
389
|
{
|
|
@@ -432,7 +433,7 @@ module UnicodeScriptDetector
|
|
|
432
433
|
},
|
|
433
434
|
{
|
|
434
435
|
script: :Masaram_Gondi,
|
|
435
|
-
name: "
|
|
436
|
+
name: "Masaram Gondi",
|
|
436
437
|
regex: /\p{Masaram_Gondi}/,
|
|
437
438
|
},
|
|
438
439
|
{
|
|
@@ -442,22 +443,22 @@ module UnicodeScriptDetector
|
|
|
442
443
|
},
|
|
443
444
|
{
|
|
444
445
|
script: :Meetei_Mayek,
|
|
445
|
-
name: "
|
|
446
|
+
name: "Meetei Mayek",
|
|
446
447
|
regex: /\p{Meetei_Mayek}/,
|
|
447
448
|
},
|
|
448
449
|
{
|
|
449
450
|
script: :Mende_Kikakui,
|
|
450
|
-
name: "
|
|
451
|
+
name: "Mende Kikakui",
|
|
451
452
|
regex: /\p{Mende_Kikakui}/,
|
|
452
453
|
},
|
|
453
454
|
{
|
|
454
455
|
script: :Meroitic_Cursive,
|
|
455
|
-
name: "
|
|
456
|
+
name: "Meroitic Cursive",
|
|
456
457
|
regex: /\p{Meroitic_Cursive}/,
|
|
457
458
|
},
|
|
458
459
|
{
|
|
459
460
|
script: :Meroitic_Hieroglyphs,
|
|
460
|
-
name: "
|
|
461
|
+
name: "Meroitic Hieroglyphs",
|
|
461
462
|
regex: /\p{Meroitic_Hieroglyphs}/,
|
|
462
463
|
},
|
|
463
464
|
{
|
|
@@ -488,7 +489,7 @@ module UnicodeScriptDetector
|
|
|
488
489
|
{
|
|
489
490
|
script: :Myanmar,
|
|
490
491
|
name: "Myanmar",
|
|
491
|
-
regex: /\p{Myanmar}/,
|
|
492
|
+
regex: /\p{Myanmar}|[\u{116D0}-\u{116FF}]/,
|
|
492
493
|
},
|
|
493
494
|
{
|
|
494
495
|
script: :Nabataean,
|
|
@@ -497,7 +498,7 @@ module UnicodeScriptDetector
|
|
|
497
498
|
},
|
|
498
499
|
{
|
|
499
500
|
script: :Nag_Mundari,
|
|
500
|
-
name: "
|
|
501
|
+
name: "Nag Mundari",
|
|
501
502
|
regex: /\p{Nag_Mundari}/,
|
|
502
503
|
},
|
|
503
504
|
{
|
|
@@ -507,7 +508,7 @@ module UnicodeScriptDetector
|
|
|
507
508
|
},
|
|
508
509
|
{
|
|
509
510
|
script: :New_Tai_Lue,
|
|
510
|
-
name: "
|
|
511
|
+
name: "New Tai Lue",
|
|
511
512
|
regex: /\p{New_Tai_Lue}/,
|
|
512
513
|
},
|
|
513
514
|
{
|
|
@@ -527,7 +528,7 @@ module UnicodeScriptDetector
|
|
|
527
528
|
},
|
|
528
529
|
{
|
|
529
530
|
script: :Nyiakeng_Puachue_Hmong,
|
|
530
|
-
name: "
|
|
531
|
+
name: "Nyiakeng Puachue Hmong",
|
|
531
532
|
regex: /\p{Nyiakeng_Puachue_Hmong}/,
|
|
532
533
|
},
|
|
533
534
|
{
|
|
@@ -537,53 +538,57 @@ module UnicodeScriptDetector
|
|
|
537
538
|
},
|
|
538
539
|
{
|
|
539
540
|
script: :Ol_Chiki,
|
|
540
|
-
name: "
|
|
541
|
+
name: "Ol Chiki",
|
|
541
542
|
regex: /\p{Ol_Chiki}/,
|
|
542
543
|
},
|
|
543
|
-
|
|
544
|
+
{
|
|
545
|
+
script: :Ol_Onal,
|
|
546
|
+
name: "Ol Onal",
|
|
547
|
+
regex: /[\u{1E5D0}-\u{1E5FF}]/,
|
|
548
|
+
},
|
|
544
549
|
{
|
|
545
550
|
script: :Old_Hungarian,
|
|
546
|
-
name: "
|
|
551
|
+
name: "Old Hungarian",
|
|
547
552
|
regex: /\p{Old_Hungarian}/,
|
|
548
553
|
},
|
|
549
554
|
{
|
|
550
555
|
script: :Old_Italic,
|
|
551
|
-
name: "
|
|
556
|
+
name: "Old Italic",
|
|
552
557
|
regex: /\p{Old_Italic}/,
|
|
553
558
|
},
|
|
554
559
|
{
|
|
555
560
|
script: :Old_North_Arabian,
|
|
556
|
-
name: "
|
|
561
|
+
name: "Old North Arabian",
|
|
557
562
|
regex: /\p{Old_North_Arabian}/,
|
|
558
563
|
},
|
|
559
564
|
{
|
|
560
565
|
script: :Old_Permic,
|
|
561
|
-
name: "
|
|
566
|
+
name: "Old Permic",
|
|
562
567
|
regex: /\p{Old_Permic}/,
|
|
563
568
|
},
|
|
564
569
|
{
|
|
565
570
|
script: :Old_Persian,
|
|
566
|
-
name: "
|
|
571
|
+
name: "Old Persian",
|
|
567
572
|
regex: /\p{Old_Persian}/,
|
|
568
573
|
},
|
|
569
574
|
{
|
|
570
575
|
script: :Old_Sogdian,
|
|
571
|
-
name: "
|
|
576
|
+
name: "Old Sogdian",
|
|
572
577
|
regex: /\p{Old_Sogdian}/,
|
|
573
578
|
},
|
|
574
579
|
{
|
|
575
580
|
script: :Old_South_Arabian,
|
|
576
|
-
name: "
|
|
581
|
+
name: "Old South Arabian",
|
|
577
582
|
regex: /\p{Old_South_Arabian}/,
|
|
578
583
|
},
|
|
579
584
|
{
|
|
580
585
|
script: :Old_Turkic,
|
|
581
|
-
name: "
|
|
586
|
+
name: "Old Turkic",
|
|
582
587
|
regex: /\p{Old_Turkic}/,
|
|
583
588
|
},
|
|
584
589
|
{
|
|
585
590
|
script: :Old_Uyghur,
|
|
586
|
-
name: "
|
|
591
|
+
name: "Old Uyghur",
|
|
587
592
|
regex: /\p{Old_Uyghur}/,
|
|
588
593
|
},
|
|
589
594
|
{
|
|
@@ -603,7 +608,7 @@ module UnicodeScriptDetector
|
|
|
603
608
|
},
|
|
604
609
|
{
|
|
605
610
|
script: :Pahawh_Hmong,
|
|
606
|
-
name: "
|
|
611
|
+
name: "Pahawh Hmong",
|
|
607
612
|
regex: /\p{Pahawh_Hmong}/,
|
|
608
613
|
},
|
|
609
614
|
{
|
|
@@ -613,12 +618,12 @@ module UnicodeScriptDetector
|
|
|
613
618
|
},
|
|
614
619
|
{
|
|
615
620
|
script: :Pau_Cin_Hau,
|
|
616
|
-
name: "
|
|
621
|
+
name: "Pau Cin Hau",
|
|
617
622
|
regex: /\p{Pau_Cin_Hau}/,
|
|
618
623
|
},
|
|
619
624
|
{
|
|
620
625
|
script: :Phags_Pa,
|
|
621
|
-
name: "
|
|
626
|
+
name: "Phags Pa",
|
|
622
627
|
regex: /\p{Phags_Pa}/,
|
|
623
628
|
},
|
|
624
629
|
{
|
|
@@ -628,7 +633,7 @@ module UnicodeScriptDetector
|
|
|
628
633
|
},
|
|
629
634
|
{
|
|
630
635
|
script: :Psalter_Pahlavi,
|
|
631
|
-
name: "
|
|
636
|
+
name: "Psalter Pahlavi",
|
|
632
637
|
regex: /\p{Psalter_Pahlavi}/,
|
|
633
638
|
},
|
|
634
639
|
{
|
|
@@ -654,7 +659,7 @@ module UnicodeScriptDetector
|
|
|
654
659
|
{
|
|
655
660
|
script: :Sharada,
|
|
656
661
|
name: "Sharada",
|
|
657
|
-
regex: /\p{Sharada}/,
|
|
662
|
+
regex: /\p{Sharada}|[\u{11B60}-\u{11B7F}]/,
|
|
658
663
|
},
|
|
659
664
|
{
|
|
660
665
|
script: :Shavian,
|
|
@@ -666,6 +671,11 @@ module UnicodeScriptDetector
|
|
|
666
671
|
name: "Siddham",
|
|
667
672
|
regex: /\p{Siddham}/,
|
|
668
673
|
},
|
|
674
|
+
{
|
|
675
|
+
script: :Sidetic,
|
|
676
|
+
name: "Sidetic",
|
|
677
|
+
regex: /[\u{10940}-\u{1095F}]/
|
|
678
|
+
},
|
|
669
679
|
{
|
|
670
680
|
script: :SignWriting,
|
|
671
681
|
name: "SignWriting",
|
|
@@ -683,7 +693,7 @@ module UnicodeScriptDetector
|
|
|
683
693
|
},
|
|
684
694
|
{
|
|
685
695
|
script: :Sora_Sompeng,
|
|
686
|
-
name: "
|
|
696
|
+
name: "Sora Sompeng",
|
|
687
697
|
regex: /\p{Sora_Sompeng}/,
|
|
688
698
|
},
|
|
689
699
|
{
|
|
@@ -696,10 +706,14 @@ module UnicodeScriptDetector
|
|
|
696
706
|
name: "Sundanese",
|
|
697
707
|
regex: /\p{Sundanese}/,
|
|
698
708
|
},
|
|
699
|
-
|
|
709
|
+
{
|
|
710
|
+
script: :Sunuwar,
|
|
711
|
+
name: "Sunuwar",
|
|
712
|
+
regex: /[\u{11BC0}-\u{11BFF}]/,
|
|
713
|
+
},
|
|
700
714
|
{
|
|
701
715
|
script: :Syloti_Nagri,
|
|
702
|
-
name: "
|
|
716
|
+
name: "Syloti Nagri",
|
|
703
717
|
regex: /\p{Syloti_Nagri}/,
|
|
704
718
|
},
|
|
705
719
|
{
|
|
@@ -719,19 +733,24 @@ module UnicodeScriptDetector
|
|
|
719
733
|
},
|
|
720
734
|
{
|
|
721
735
|
script: :Tai_Le,
|
|
722
|
-
name: "
|
|
736
|
+
name: "Tai Le",
|
|
723
737
|
regex: /\p{Tai_Le}/,
|
|
724
738
|
},
|
|
725
739
|
{
|
|
726
740
|
script: :Tai_Tham,
|
|
727
|
-
name: "
|
|
741
|
+
name: "Tai Tham",
|
|
728
742
|
regex: /\p{Tai_Tham}/,
|
|
729
743
|
},
|
|
730
744
|
{
|
|
731
745
|
script: :Tai_Viet,
|
|
732
|
-
name: "
|
|
746
|
+
name: "Tai Viet",
|
|
733
747
|
regex: /\p{Tai_Viet}/,
|
|
734
748
|
},
|
|
749
|
+
{
|
|
750
|
+
script: :Tai_Yo,
|
|
751
|
+
name: "Tai Yo",
|
|
752
|
+
regex: /[\u{1E6C0}-\u{1E6FF}]/,
|
|
753
|
+
},
|
|
735
754
|
{
|
|
736
755
|
script: :Takri,
|
|
737
756
|
name: "Takri",
|
|
@@ -750,7 +769,7 @@ module UnicodeScriptDetector
|
|
|
750
769
|
{
|
|
751
770
|
script: :Tangut,
|
|
752
771
|
name: "Tangut",
|
|
753
|
-
regex: /\p{Tangut}/,
|
|
772
|
+
regex: /\p{Tangut}|[\u{18D80}-\u{18DFF}]/,
|
|
754
773
|
},
|
|
755
774
|
{
|
|
756
775
|
script: :Telugu,
|
|
@@ -782,13 +801,26 @@ module UnicodeScriptDetector
|
|
|
782
801
|
name: "Tirhuta",
|
|
783
802
|
regex: /\p{Tirhuta}/,
|
|
784
803
|
},
|
|
785
|
-
|
|
804
|
+
{
|
|
805
|
+
script: :Todhri,
|
|
806
|
+
name: "Todhri",
|
|
807
|
+
regex: /[\u{105C0}-\u{105FF}]/,
|
|
808
|
+
},
|
|
809
|
+
{
|
|
810
|
+
script: :Tolong_Siki,
|
|
811
|
+
name: "Tolong Siki",
|
|
812
|
+
regex: /[\u{11DB0}-\u{11DEF}]/,
|
|
813
|
+
},
|
|
786
814
|
{
|
|
787
815
|
script: :Toto,
|
|
788
816
|
name: "Toto",
|
|
789
817
|
regex: /\p{Toto}/,
|
|
790
818
|
},
|
|
791
|
-
|
|
819
|
+
{
|
|
820
|
+
script: :Tulu_Tigalari,
|
|
821
|
+
name: "Tulu Tigalari",
|
|
822
|
+
regex: /[\u{11380}-\u{113FF}]/,
|
|
823
|
+
},
|
|
792
824
|
{
|
|
793
825
|
script: :Ugaritic,
|
|
794
826
|
name: "Ugaritic",
|
|
@@ -816,7 +848,7 @@ module UnicodeScriptDetector
|
|
|
816
848
|
},
|
|
817
849
|
{
|
|
818
850
|
script: :Warang_Citi,
|
|
819
|
-
name: "
|
|
851
|
+
name: "Warang Citi",
|
|
820
852
|
regex: /\p{Warang_Citi}/,
|
|
821
853
|
},
|
|
822
854
|
{
|
|
@@ -831,9 +863,21 @@ module UnicodeScriptDetector
|
|
|
831
863
|
},
|
|
832
864
|
{
|
|
833
865
|
script: :Zanabazar_Square,
|
|
834
|
-
name: "
|
|
866
|
+
name: "Zanabazar Square",
|
|
835
867
|
regex: /\p{Zanabazar_Square}/,
|
|
836
868
|
},
|
|
869
|
+
|
|
870
|
+
#Special characters
|
|
871
|
+
{
|
|
872
|
+
script: :Whitespace,
|
|
873
|
+
name: "Whitespace",
|
|
874
|
+
regex: /\s/
|
|
875
|
+
},
|
|
876
|
+
{
|
|
877
|
+
script: :Digit,
|
|
878
|
+
name: "Digit",
|
|
879
|
+
regex: /\d/
|
|
880
|
+
},
|
|
837
881
|
{
|
|
838
882
|
script: :Emoji,
|
|
839
883
|
name: "Emoji",
|
|
@@ -847,7 +891,7 @@ module UnicodeScriptDetector
|
|
|
847
891
|
{
|
|
848
892
|
script: :Common,
|
|
849
893
|
name: "Common",
|
|
850
|
-
regex: /\p{Common}/,
|
|
894
|
+
regex: /\p{Common}|[\u{1CEC0}-\u{1CEFF}]|[\u{1CC00}-\u{1CEBF}]/,
|
|
851
895
|
},
|
|
852
896
|
]
|
|
853
897
|
end
|