unicode_script_detector 0.0.6 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 58ac1a16218a1a336bb9c2fa6c75c5519a69a6c846c1007ffd236d991f3ef223
4
- data.tar.gz: 11c82345a0b3990737b1c1a85c97d6c7552644a10e0c1d9265a4a46e4a0a9ce3
3
+ metadata.gz: a0febe236b556e42077b401d8e117b3996c6065dcbb33c974a572a8af64d14a4
4
+ data.tar.gz: f8ef874b90e0ca8e387bc16d5fc947745fcbe34bbd8ea53c328f98d8c3d8eddd
5
5
  SHA512:
6
- metadata.gz: baad85a939325ce58dc96c132e103b81bf1c9cd0f4a3120675bb6db7ec81bad305759b40da55e8d868128baa3d5bbdf3e28d044ae6f735d0ebce5ed4f89bb190
7
- data.tar.gz: cfc04d633c2bdbbb0810c505d1cb6d671045b4b0c3651584be63231cb9b722a413c65c59487dbf5953374646af7bdc953ec7e66c4b71c7c1ad069004c8d9dd46
6
+ metadata.gz: c5b931f1a8f527900f9c37ba0af67ca4e1fbfc0a0fde2f382fc1c2853a2c8813654f1d334f692e39882421eb3a4825f8e056d91d92d58fb4c2c1c22266c347b3
7
+ data.tar.gz: 69ac1e2314cefd944af0958cba15764afdf05e6e6eb314b68bc75172c7d1085be00725d40df5fe7f1af23abccc0243fa5d134c0b590da62d83913dce6570f0c1
@@ -1,16 +1,6 @@
1
1
  module UnicodeScriptDetector
2
2
  class Scripts
3
3
  LIST = [
4
- {
5
- script: :Whitespace,
6
- name: "Whitespace",
7
- regex: /\s/
8
- },
9
- {
10
- script: :Digit,
11
- name: "Digit",
12
- regex: /\d/
13
- },
14
4
  {
15
5
  script: :Adlam,
16
6
  name: "Adlam",
@@ -23,7 +13,7 @@ module UnicodeScriptDetector
23
13
  },
24
14
  {
25
15
  script: :Anatolian_Hieroglyphs,
26
- name: "Anatolian_Hieroglyphs",
16
+ name: "Anatolian Hieroglyphs",
27
17
  regex: /\p{Anatolian_Hieroglyphs}/,
28
18
  },
29
19
  {
@@ -53,7 +43,7 @@ module UnicodeScriptDetector
53
43
  },
54
44
  {
55
45
  script: :Bassa_Vah,
56
- name: "Bassa_Vah",
46
+ name: "Bassa Vah",
57
47
  regex: /\p{Bassa_Vah}/,
58
48
  },
59
49
  {
@@ -98,7 +88,7 @@ module UnicodeScriptDetector
98
88
  },
99
89
  {
100
90
  script: :Canadian_Aboriginal,
101
- name: "Canadian_Aboriginal",
91
+ name: "Canadian Aboriginal",
102
92
  regex: /\p{Canadian_Aboriginal}/,
103
93
  },
104
94
  {
@@ -108,7 +98,7 @@ module UnicodeScriptDetector
108
98
  },
109
99
  {
110
100
  script: :Caucasian_Albanian,
111
- name: "Caucasian_Albanian",
101
+ name: "Caucasian Albanian",
112
102
  regex: /\p{Caucasian_Albanian}/,
113
103
  },
114
104
  {
@@ -148,7 +138,7 @@ module UnicodeScriptDetector
148
138
  },
149
139
  {
150
140
  script: :Cypro_Minoan,
151
- name: "Cypro_Minoan",
141
+ name: "Cypro Minoan",
152
142
  regex: /\p{Cypro_Minoan}/,
153
143
  },
154
144
  {
@@ -168,7 +158,7 @@ module UnicodeScriptDetector
168
158
  },
169
159
  {
170
160
  script: :Dives_Akuru,
171
- name: "Dives_Akuru",
161
+ name: "Dives Akuru",
172
162
  regex: /\p{Dives_Akuru}/,
173
163
  },
174
164
  {
@@ -183,8 +173,8 @@ module UnicodeScriptDetector
183
173
  },
184
174
  {
185
175
  script: :Egyptian_Hieroglyphs,
186
- name: "Egyptian_Hieroglyphs",
187
- regex: /\p{Egyptian_Hieroglyphs}/,
176
+ name: "Egyptian Hieroglyphs",
177
+ regex: /\p{Egyptian_Hieroglyphs}|[\u{13460}-\u{1355F}]/,
188
178
  },
189
179
  {
190
180
  script: :Elbasan,
@@ -201,7 +191,11 @@ module UnicodeScriptDetector
201
191
  name: "Ethiopic",
202
192
  regex: /\p{Ethiopic}/,
203
193
  },
204
-
194
+ {
195
+ script: :Garay,
196
+ name: "Garay",
197
+ regex: /[\u{10D40}-\u{10D8F}]/,
198
+ },
205
199
  {
206
200
  script: :Georgian,
207
201
  name: "Georgian",
@@ -234,7 +228,7 @@ module UnicodeScriptDetector
234
228
  },
235
229
  {
236
230
  script: :Gunjala_Gondi,
237
- name: "Gunjala_Gondi",
231
+ name: "Gunjala Gondi",
238
232
  regex: /\p{Gunjala_Gondi}/,
239
233
  },
240
234
  {
@@ -242,11 +236,15 @@ module UnicodeScriptDetector
242
236
  name: "Gurmukhi",
243
237
  regex: /\p{Gurmukhi}/,
244
238
  },
245
-
239
+ {
240
+ script: :Gurung_Khema,
241
+ name: "Gurung Khema",
242
+ regex: /[\u{16100}-\u{1613F}]/,
243
+ },
246
244
  {
247
245
  script: :Han,
248
246
  name: "Han",
249
- regex: /\p{Han}/,
247
+ regex: /\p{Han}|[\u{323B0}-\u{3347F}]/,
250
248
  },
251
249
  {
252
250
  script: :Hangul,
@@ -255,7 +253,7 @@ module UnicodeScriptDetector
255
253
  },
256
254
  {
257
255
  script: :Hanifi_Rohingya,
258
- name: "Hanifi_Rohingya",
256
+ name: "Hanifi Rohingya",
259
257
  regex: /\p{Hanifi_Rohingya}/,
260
258
  },
261
259
  {
@@ -280,7 +278,7 @@ module UnicodeScriptDetector
280
278
  },
281
279
  {
282
280
  script: :Imperial_Aramaic,
283
- name: "Imperial_Aramaic",
281
+ name: "Imperial Aramaic",
284
282
  regex: /\p{Imperial_Aramaic}/,
285
283
  },
286
284
  {
@@ -290,12 +288,12 @@ module UnicodeScriptDetector
290
288
  },
291
289
  {
292
290
  script: :Inscriptional_Pahlavi,
293
- name: "Inscriptional_Pahlavi",
291
+ name: "Inscriptional Pahlavi",
294
292
  regex: /\p{Inscriptional_Pahlavi}/,
295
293
  },
296
294
  {
297
295
  script: :Inscriptional_Parthian,
298
- name: "Inscriptional_Parthian",
296
+ name: "Inscriptional Parthian",
299
297
  regex: /\p{Inscriptional_Parthian}/,
300
298
  },
301
299
  {
@@ -318,7 +316,6 @@ module UnicodeScriptDetector
318
316
  name: "Katakana",
319
317
  regex: /\p{Katakana}/,
320
318
  },
321
-
322
319
  {
323
320
  script: :Kawi,
324
321
  name: "Kawi",
@@ -326,7 +323,7 @@ module UnicodeScriptDetector
326
323
  },
327
324
  {
328
325
  script: :Kayah_Li,
329
- name: "Kayah_Li",
326
+ name: "Kayah Li",
330
327
  regex: /\p{Kayah_Li}/,
331
328
  },
332
329
  {
@@ -336,7 +333,7 @@ module UnicodeScriptDetector
336
333
  },
337
334
  {
338
335
  script: :Khitan_Small_Script,
339
- name: "Khitan_Small_Script",
336
+ name: "Khitan Small Script",
340
337
  regex: /\p{Khitan_Small_Script}/,
341
338
  },
342
339
  {
@@ -354,7 +351,11 @@ module UnicodeScriptDetector
354
351
  name: "Khudawadi",
355
352
  regex: /\p{Khudawadi}/,
356
353
  },
357
-
354
+ {
355
+ script: :Kirat_Rai,
356
+ name: "Kirat Rai",
357
+ regex: /[\u{16D40}-\u{16D7F}]/,
358
+ },
358
359
  {
359
360
  script: :Lao,
360
361
  name: "Lao",
@@ -377,12 +378,12 @@ module UnicodeScriptDetector
377
378
  },
378
379
  {
379
380
  script: :Linear_A,
380
- name: "Linear_A",
381
+ name: "Linear A",
381
382
  regex: /\p{Linear_A}/,
382
383
  },
383
384
  {
384
385
  script: :Linear_B,
385
- name: "Linear_B",
386
+ name: "Linear B",
386
387
  regex: /\p{Linear_B}/,
387
388
  },
388
389
  {
@@ -432,7 +433,7 @@ module UnicodeScriptDetector
432
433
  },
433
434
  {
434
435
  script: :Masaram_Gondi,
435
- name: "Masaram_Gondi",
436
+ name: "Masaram Gondi",
436
437
  regex: /\p{Masaram_Gondi}/,
437
438
  },
438
439
  {
@@ -442,22 +443,22 @@ module UnicodeScriptDetector
442
443
  },
443
444
  {
444
445
  script: :Meetei_Mayek,
445
- name: "Meetei_Mayek",
446
+ name: "Meetei Mayek",
446
447
  regex: /\p{Meetei_Mayek}/,
447
448
  },
448
449
  {
449
450
  script: :Mende_Kikakui,
450
- name: "Mende_Kikakui",
451
+ name: "Mende Kikakui",
451
452
  regex: /\p{Mende_Kikakui}/,
452
453
  },
453
454
  {
454
455
  script: :Meroitic_Cursive,
455
- name: "Meroitic_Cursive",
456
+ name: "Meroitic Cursive",
456
457
  regex: /\p{Meroitic_Cursive}/,
457
458
  },
458
459
  {
459
460
  script: :Meroitic_Hieroglyphs,
460
- name: "Meroitic_Hieroglyphs",
461
+ name: "Meroitic Hieroglyphs",
461
462
  regex: /\p{Meroitic_Hieroglyphs}/,
462
463
  },
463
464
  {
@@ -488,7 +489,7 @@ module UnicodeScriptDetector
488
489
  {
489
490
  script: :Myanmar,
490
491
  name: "Myanmar",
491
- regex: /\p{Myanmar}/,
492
+ regex: /\p{Myanmar}|[\u{116D0}-\u{116FF}]/,
492
493
  },
493
494
  {
494
495
  script: :Nabataean,
@@ -497,7 +498,7 @@ module UnicodeScriptDetector
497
498
  },
498
499
  {
499
500
  script: :Nag_Mundari,
500
- name: "Nag_Mundari",
501
+ name: "Nag Mundari",
501
502
  regex: /\p{Nag_Mundari}/,
502
503
  },
503
504
  {
@@ -507,7 +508,7 @@ module UnicodeScriptDetector
507
508
  },
508
509
  {
509
510
  script: :New_Tai_Lue,
510
- name: "New_Tai_Lue",
511
+ name: "New Tai Lue",
511
512
  regex: /\p{New_Tai_Lue}/,
512
513
  },
513
514
  {
@@ -527,7 +528,7 @@ module UnicodeScriptDetector
527
528
  },
528
529
  {
529
530
  script: :Nyiakeng_Puachue_Hmong,
530
- name: "Nyiakeng_Puachue_Hmong",
531
+ name: "Nyiakeng Puachue Hmong",
531
532
  regex: /\p{Nyiakeng_Puachue_Hmong}/,
532
533
  },
533
534
  {
@@ -537,53 +538,57 @@ module UnicodeScriptDetector
537
538
  },
538
539
  {
539
540
  script: :Ol_Chiki,
540
- name: "Ol_Chiki",
541
+ name: "Ol Chiki",
541
542
  regex: /\p{Ol_Chiki}/,
542
543
  },
543
-
544
+ {
545
+ script: :Ol_Onal,
546
+ name: "Ol Onal",
547
+ regex: /[\u{1E5D0}-\u{1E5FF}]/,
548
+ },
544
549
  {
545
550
  script: :Old_Hungarian,
546
- name: "Old_Hungarian",
551
+ name: "Old Hungarian",
547
552
  regex: /\p{Old_Hungarian}/,
548
553
  },
549
554
  {
550
555
  script: :Old_Italic,
551
- name: "Old_Italic",
556
+ name: "Old Italic",
552
557
  regex: /\p{Old_Italic}/,
553
558
  },
554
559
  {
555
560
  script: :Old_North_Arabian,
556
- name: "Old_North_Arabian",
561
+ name: "Old North Arabian",
557
562
  regex: /\p{Old_North_Arabian}/,
558
563
  },
559
564
  {
560
565
  script: :Old_Permic,
561
- name: "Old_Permic",
566
+ name: "Old Permic",
562
567
  regex: /\p{Old_Permic}/,
563
568
  },
564
569
  {
565
570
  script: :Old_Persian,
566
- name: "Old_Persian",
571
+ name: "Old Persian",
567
572
  regex: /\p{Old_Persian}/,
568
573
  },
569
574
  {
570
575
  script: :Old_Sogdian,
571
- name: "Old_Sogdian",
576
+ name: "Old Sogdian",
572
577
  regex: /\p{Old_Sogdian}/,
573
578
  },
574
579
  {
575
580
  script: :Old_South_Arabian,
576
- name: "Old_South_Arabian",
581
+ name: "Old South Arabian",
577
582
  regex: /\p{Old_South_Arabian}/,
578
583
  },
579
584
  {
580
585
  script: :Old_Turkic,
581
- name: "Old_Turkic",
586
+ name: "Old Turkic",
582
587
  regex: /\p{Old_Turkic}/,
583
588
  },
584
589
  {
585
590
  script: :Old_Uyghur,
586
- name: "Old_Uyghur",
591
+ name: "Old Uyghur",
587
592
  regex: /\p{Old_Uyghur}/,
588
593
  },
589
594
  {
@@ -603,7 +608,7 @@ module UnicodeScriptDetector
603
608
  },
604
609
  {
605
610
  script: :Pahawh_Hmong,
606
- name: "Pahawh_Hmong",
611
+ name: "Pahawh Hmong",
607
612
  regex: /\p{Pahawh_Hmong}/,
608
613
  },
609
614
  {
@@ -613,12 +618,12 @@ module UnicodeScriptDetector
613
618
  },
614
619
  {
615
620
  script: :Pau_Cin_Hau,
616
- name: "Pau_Cin_Hau",
621
+ name: "Pau Cin Hau",
617
622
  regex: /\p{Pau_Cin_Hau}/,
618
623
  },
619
624
  {
620
625
  script: :Phags_Pa,
621
- name: "Phags_Pa",
626
+ name: "Phags Pa",
622
627
  regex: /\p{Phags_Pa}/,
623
628
  },
624
629
  {
@@ -628,7 +633,7 @@ module UnicodeScriptDetector
628
633
  },
629
634
  {
630
635
  script: :Psalter_Pahlavi,
631
- name: "Psalter_Pahlavi",
636
+ name: "Psalter Pahlavi",
632
637
  regex: /\p{Psalter_Pahlavi}/,
633
638
  },
634
639
  {
@@ -654,7 +659,7 @@ module UnicodeScriptDetector
654
659
  {
655
660
  script: :Sharada,
656
661
  name: "Sharada",
657
- regex: /\p{Sharada}/,
662
+ regex: /\p{Sharada}|[\u{11B60}-\u{11B7F}]/,
658
663
  },
659
664
  {
660
665
  script: :Shavian,
@@ -666,6 +671,11 @@ module UnicodeScriptDetector
666
671
  name: "Siddham",
667
672
  regex: /\p{Siddham}/,
668
673
  },
674
+ {
675
+ script: :Sidetic,
676
+ name: "Sidetic",
677
+ regex: /[\u{10940}-\u{1095F}]/
678
+ },
669
679
  {
670
680
  script: :SignWriting,
671
681
  name: "SignWriting",
@@ -683,7 +693,7 @@ module UnicodeScriptDetector
683
693
  },
684
694
  {
685
695
  script: :Sora_Sompeng,
686
- name: "Sora_Sompeng",
696
+ name: "Sora Sompeng",
687
697
  regex: /\p{Sora_Sompeng}/,
688
698
  },
689
699
  {
@@ -696,10 +706,14 @@ module UnicodeScriptDetector
696
706
  name: "Sundanese",
697
707
  regex: /\p{Sundanese}/,
698
708
  },
699
-
709
+ {
710
+ script: :Sunuwar,
711
+ name: "Sunuwar",
712
+ regex: /[\u{11BC0}-\u{11BFF}]/,
713
+ },
700
714
  {
701
715
  script: :Syloti_Nagri,
702
- name: "Syloti_Nagri",
716
+ name: "Syloti Nagri",
703
717
  regex: /\p{Syloti_Nagri}/,
704
718
  },
705
719
  {
@@ -719,19 +733,24 @@ module UnicodeScriptDetector
719
733
  },
720
734
  {
721
735
  script: :Tai_Le,
722
- name: "Tai_Le",
736
+ name: "Tai Le",
723
737
  regex: /\p{Tai_Le}/,
724
738
  },
725
739
  {
726
740
  script: :Tai_Tham,
727
- name: "Tai_Tham",
741
+ name: "Tai Tham",
728
742
  regex: /\p{Tai_Tham}/,
729
743
  },
730
744
  {
731
745
  script: :Tai_Viet,
732
- name: "Tai_Viet",
746
+ name: "Tai Viet",
733
747
  regex: /\p{Tai_Viet}/,
734
748
  },
749
+ {
750
+ script: :Tai_Yo,
751
+ name: "Tai Yo",
752
+ regex: /[\u{1E6C0}-\u{1E6FF}]/,
753
+ },
735
754
  {
736
755
  script: :Takri,
737
756
  name: "Takri",
@@ -750,7 +769,7 @@ module UnicodeScriptDetector
750
769
  {
751
770
  script: :Tangut,
752
771
  name: "Tangut",
753
- regex: /\p{Tangut}/,
772
+ regex: /\p{Tangut}|[\u{18D80}-\u{18DFF}]/,
754
773
  },
755
774
  {
756
775
  script: :Telugu,
@@ -782,13 +801,26 @@ module UnicodeScriptDetector
782
801
  name: "Tirhuta",
783
802
  regex: /\p{Tirhuta}/,
784
803
  },
785
-
804
+ {
805
+ script: :Todhri,
806
+ name: "Todhri",
807
+ regex: /[\u{105C0}-\u{105FF}]/,
808
+ },
809
+ {
810
+ script: :Tolong_Siki,
811
+ name: "Tolong Siki",
812
+ regex: /[\u{11DB0}-\u{11DEF}]/,
813
+ },
786
814
  {
787
815
  script: :Toto,
788
816
  name: "Toto",
789
817
  regex: /\p{Toto}/,
790
818
  },
791
-
819
+ {
820
+ script: :Tulu_Tigalari,
821
+ name: "Tulu Tigalari",
822
+ regex: /[\u{11380}-\u{113FF}]/,
823
+ },
792
824
  {
793
825
  script: :Ugaritic,
794
826
  name: "Ugaritic",
@@ -816,7 +848,7 @@ module UnicodeScriptDetector
816
848
  },
817
849
  {
818
850
  script: :Warang_Citi,
819
- name: "Warang_Citi",
851
+ name: "Warang Citi",
820
852
  regex: /\p{Warang_Citi}/,
821
853
  },
822
854
  {
@@ -831,9 +863,21 @@ module UnicodeScriptDetector
831
863
  },
832
864
  {
833
865
  script: :Zanabazar_Square,
834
- name: "Zanabazar_Square",
866
+ name: "Zanabazar Square",
835
867
  regex: /\p{Zanabazar_Square}/,
836
868
  },
869
+
870
+ #Special characters
871
+ {
872
+ script: :Whitespace,
873
+ name: "Whitespace",
874
+ regex: /\s/
875
+ },
876
+ {
877
+ script: :Digit,
878
+ name: "Digit",
879
+ regex: /\d/
880
+ },
837
881
  {
838
882
  script: :Emoji,
839
883
  name: "Emoji",
@@ -847,7 +891,7 @@ module UnicodeScriptDetector
847
891
  {
848
892
  script: :Common,
849
893
  name: "Common",
850
- regex: /\p{Common}/,
894
+ regex: /\p{Common}|[\u{1CEC0}-\u{1CEFF}]|[\u{1CC00}-\u{1CEBF}]/,
851
895
  },
852
896
  ]
853
897
  end
@@ -1,3 +1,3 @@
1
1
  module UnicodeScriptDetector
2
- VERSION = "0.0.6"
2
+ VERSION = "0.0.7"
3
3
  end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_script_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.6
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Arendsen