unicode_script_detector 0.0.3 → 0.0.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 369f6039d5c7793db0e9f7d39d0815fedc07ee0a827b6a7b758404321b24f7f2
4
- data.tar.gz: 1af7b6b33e9bdae2a27b21230ca5f5b879aaa1c9d802c7d7ec5c824f1766eaf9
3
+ metadata.gz: d80a777771e5076bfb67e1b63ea3a0900f9ca53b16b0e90224608b60c06c78bc
4
+ data.tar.gz: 266e4ca3ad0728cacbd4a4f37819f271b22205a893f5750c14c862bda8d5cdc2
5
5
  SHA512:
6
- metadata.gz: 2028fbfdb634b896d4fa91f3b0f3f0a14af75611200f147d5a2fbafede24102d74bc8725e4b826d38651832db068aa9b8dc0e8e621be041f8386455310d1a31d
7
- data.tar.gz: ad0dc5d58cbcb42638b1571969029ddef45f6041da20f9927bea468cd0a4fe6fa681a9710500eb2370924cce997b855a073c97a9d404289898cbfdce47391ae7
6
+ metadata.gz: a0d6741c40ae5adfde691ec742b1bc5440e2b45f265d448cda9b6c40978843404b05419e82ebad258ef40e752bcaffb26c985e625cb52025de7ae37b7aee505a
7
+ data.tar.gz: 2c81a22c0cb25ec024eb875f4da5a19891bff344acf7daaaa8a4155caf89e0ba77021d5b475b3b3f8e986eaf1b6be8bf8e20747a88afdbd0bfdd083d344c2680
@@ -1,15 +1,30 @@
1
1
  module UnicodeScriptDetector
2
2
  class Scripts
3
3
  LIST = [
4
+ {
5
+ script: :Whitespace,
6
+ name: "Whitespace",
7
+ regex: /\s/
8
+ },
4
9
  {
5
10
  script: :Digit,
6
11
  name: "Digit",
7
- regex: /\d/,
12
+ regex: /\d/
8
13
  },
9
- {
10
- script: :Whitespace,
11
- name: "Whitespace",
12
- regex: /\s/,
14
+ {
15
+ script: :Adlam,
16
+ name: "Adlam",
17
+ regex: /\p{Adlam}/,
18
+ },
19
+ {
20
+ script: :Ahom,
21
+ name: "Ahom",
22
+ regex: /\p{Ahom}/,
23
+ },
24
+ {
25
+ script: :Anatolian_Hieroglyphs,
26
+ name: "Anatolian_Hieroglyphs",
27
+ regex: /\p{Anatolian_Hieroglyphs}/,
13
28
  },
14
29
  {
15
30
  script: :Arabic,
@@ -21,6 +36,11 @@ module UnicodeScriptDetector
21
36
  name: "Armenian",
22
37
  regex: /\p{Armenian}/,
23
38
  },
39
+ {
40
+ script: :Avestan,
41
+ name: "Avestan",
42
+ regex: /\p{Avestan}/,
43
+ },
24
44
  {
25
45
  script: :Balinese,
26
46
  name: "Balinese",
@@ -31,6 +51,11 @@ module UnicodeScriptDetector
31
51
  name: "Bamum",
32
52
  regex: /\p{Bamum}/,
33
53
  },
54
+ {
55
+ script: :Bassa_Vah,
56
+ name: "Bassa_Vah",
57
+ regex: /\p{Bassa_Vah}/,
58
+ },
34
59
  {
35
60
  script: :Batak,
36
61
  name: "Batak",
@@ -41,6 +66,11 @@ module UnicodeScriptDetector
41
66
  name: "Bengali",
42
67
  regex: /\p{Bengali}/,
43
68
  },
69
+ {
70
+ script: :Bhaiksuki,
71
+ name: "Bhaiksuki",
72
+ regex: /\p{Bhaiksuki}/,
73
+ },
44
74
  {
45
75
  script: :Bopomofo,
46
76
  name: "Bopomofo",
@@ -76,6 +106,11 @@ module UnicodeScriptDetector
76
106
  name: "Carian",
77
107
  regex: /\p{Carian}/,
78
108
  },
109
+ {
110
+ script: :Caucasian_Albanian,
111
+ name: "Caucasian_Albanian",
112
+ regex: /\p{Caucasian_Albanian}/,
113
+ },
79
114
  {
80
115
  script: :Chakma,
81
116
  name: "Chakma",
@@ -91,6 +126,11 @@ module UnicodeScriptDetector
91
126
  name: "Cherokee",
92
127
  regex: /\p{Cherokee}/,
93
128
  },
129
+ {
130
+ script: :Chorasmian,
131
+ name: "Chorasmian",
132
+ regex: /\p{Chorasmian}/,
133
+ },
94
134
  {
95
135
  script: :Coptic,
96
136
  name: "Coptic",
@@ -106,6 +146,11 @@ module UnicodeScriptDetector
106
146
  name: "Cypriot",
107
147
  regex: /\p{Cypriot}/,
108
148
  },
149
+ {
150
+ script: :Cypro_Minoan,
151
+ name: "Cypro_Minoan",
152
+ regex: /\p{Cypro_Minoan}/,
153
+ },
109
154
  {
110
155
  script: :Cyrillic,
111
156
  name: "Cyrillic",
@@ -121,16 +166,42 @@ module UnicodeScriptDetector
121
166
  name: "Devanagari",
122
167
  regex: /\p{Devanagari}/,
123
168
  },
169
+ {
170
+ script: :Dives_Akuru,
171
+ name: "Dives_Akuru",
172
+ regex: /\p{Dives_Akuru}/,
173
+ },
174
+ {
175
+ script: :Dogra,
176
+ name: "Dogra",
177
+ regex: /\p{Dogra}/,
178
+ },
179
+ {
180
+ script: :Duployan,
181
+ name: "Duployan",
182
+ regex: /\p{Duployan}/,
183
+ },
124
184
  {
125
185
  script: :Egyptian_Hieroglyphs,
126
186
  name: "Egyptian_Hieroglyphs",
127
187
  regex: /\p{Egyptian_Hieroglyphs}/,
128
188
  },
189
+ {
190
+ script: :Elbasan,
191
+ name: "Elbasan",
192
+ regex: /\p{Elbasan}/,
193
+ },
194
+ {
195
+ script: :Elymaic,
196
+ name: "Elymaic",
197
+ regex: /\p{Elymaic}/,
198
+ },
129
199
  {
130
200
  script: :Ethiopic,
131
201
  name: "Ethiopic",
132
202
  regex: /\p{Ethiopic}/,
133
203
  },
204
+
134
205
  {
135
206
  script: :Georgian,
136
207
  name: "Georgian",
@@ -146,6 +217,11 @@ module UnicodeScriptDetector
146
217
  name: "Gothic",
147
218
  regex: /\p{Gothic}/,
148
219
  },
220
+ {
221
+ script: :Grantha,
222
+ name: "Grantha",
223
+ regex: /\p{Grantha}/,
224
+ },
149
225
  {
150
226
  script: :Greek,
151
227
  name: "Greek",
@@ -156,11 +232,17 @@ module UnicodeScriptDetector
156
232
  name: "Gujarati",
157
233
  regex: /\p{Gujarati}/,
158
234
  },
235
+ {
236
+ script: :Gunjala_Gondi,
237
+ name: "Gunjala_Gondi",
238
+ regex: /\p{Gunjala_Gondi}/,
239
+ },
159
240
  {
160
241
  script: :Gurmukhi,
161
242
  name: "Gurmukhi",
162
243
  regex: /\p{Gurmukhi}/,
163
244
  },
245
+
164
246
  {
165
247
  script: :Han,
166
248
  name: "Han",
@@ -171,11 +253,21 @@ module UnicodeScriptDetector
171
253
  name: "Hangul",
172
254
  regex: /\p{Hangul}/,
173
255
  },
256
+ {
257
+ script: :Hanifi_Rohingya,
258
+ name: "Hanifi_Rohingya",
259
+ regex: /\p{Hanifi_Rohingya}/,
260
+ },
174
261
  {
175
262
  script: :Hanunoo,
176
263
  name: "Hanunoo",
177
264
  regex: /\p{Hanunoo}/,
178
265
  },
266
+ {
267
+ script: :Hatran,
268
+ name: "Hatran",
269
+ regex: /\p{Hatran}/,
270
+ },
179
271
  {
180
272
  script: :Hebrew,
181
273
  name: "Hebrew",
@@ -226,6 +318,12 @@ module UnicodeScriptDetector
226
318
  name: "Katakana",
227
319
  regex: /\p{Katakana}/,
228
320
  },
321
+
322
+ {
323
+ script: :Kawi,
324
+ name: "Kawi",
325
+ regex: /\p{Kawi}/,
326
+ },
229
327
  {
230
328
  script: :Kayah_Li,
231
329
  name: "Kayah_Li",
@@ -236,11 +334,27 @@ module UnicodeScriptDetector
236
334
  name: "Kharoshthi",
237
335
  regex: /\p{Kharoshthi}/,
238
336
  },
337
+ {
338
+ script: :Khitan_Small_Script,
339
+ name: "Khitan_Small_Script",
340
+ regex: /\p{Khitan_Small_Script}/,
341
+ },
239
342
  {
240
343
  script: :Khmer,
241
344
  name: "Khmer",
242
345
  regex: /\p{Khmer}/,
243
346
  },
347
+ {
348
+ script: :Khojki,
349
+ name: "Khojki",
350
+ regex: /\p{Khojki}/,
351
+ },
352
+ {
353
+ script: :Khudawadi,
354
+ name: "Khudawadi",
355
+ regex: /\p{Khudawadi}/,
356
+ },
357
+
244
358
  {
245
359
  script: :Lao,
246
360
  name: "Lao",
@@ -261,11 +375,21 @@ module UnicodeScriptDetector
261
375
  name: "Limbu",
262
376
  regex: /\p{Limbu}/,
263
377
  },
378
+ {
379
+ script: :Linear_A,
380
+ name: "Linear_A",
381
+ regex: /\p{Linear_A}/,
382
+ },
264
383
  {
265
384
  script: :Linear_B,
266
385
  name: "Linear_B",
267
386
  regex: /\p{Linear_B}/,
268
387
  },
388
+ {
389
+ script: :Lisu,
390
+ name: "Lisu",
391
+ regex: /\p{Lisu}/,
392
+ },
269
393
  {
270
394
  script: :Lycian,
271
395
  name: "Lycian",
@@ -276,6 +400,16 @@ module UnicodeScriptDetector
276
400
  name: "Lydian",
277
401
  regex: /\p{Lydian}/,
278
402
  },
403
+ {
404
+ script: :Mahajani,
405
+ name: "Mahajani",
406
+ regex: /\p{Mahajani}/,
407
+ },
408
+ {
409
+ script: :Makasar,
410
+ name: "Makasar",
411
+ regex: /\p{Makasar}/,
412
+ },
279
413
  {
280
414
  script: :Malayalam,
281
415
  name: "Malayalam",
@@ -286,11 +420,36 @@ module UnicodeScriptDetector
286
420
  name: "Mandaic",
287
421
  regex: /\p{Mandaic}/,
288
422
  },
423
+ {
424
+ script: :Manichaean,
425
+ name: "Manichaean",
426
+ regex: /\p{Manichaean}/,
427
+ },
428
+ {
429
+ script: :Marchen,
430
+ name: "Marchen",
431
+ regex: /\p{Marchen}/,
432
+ },
433
+ {
434
+ script: :Masaram_Gondi,
435
+ name: "Masaram_Gondi",
436
+ regex: /\p{Masaram_Gondi}/,
437
+ },
438
+ {
439
+ script: :Medefaidrin,
440
+ name: "Medefaidrin",
441
+ regex: /\p{Medefaidrin}/,
442
+ },
289
443
  {
290
444
  script: :Meetei_Mayek,
291
445
  name: "Meetei_Mayek",
292
446
  regex: /\p{Meetei_Mayek}/,
293
447
  },
448
+ {
449
+ script: :Mende_Kikakui,
450
+ name: "Mende_Kikakui",
451
+ regex: /\p{Mende_Kikakui}/,
452
+ },
294
453
  {
295
454
  script: :Meroitic_Cursive,
296
455
  name: "Meroitic_Cursive",
@@ -306,26 +465,71 @@ module UnicodeScriptDetector
306
465
  name: "Miao",
307
466
  regex: /\p{Miao}/,
308
467
  },
468
+ {
469
+ script: :Modi,
470
+ name: "Modi",
471
+ regex: /\p{Modi}/,
472
+ },
309
473
  {
310
474
  script: :Mongolian,
311
475
  name: "Mongolian",
312
476
  regex: /\p{Mongolian}/,
313
477
  },
478
+ {
479
+ script: :Mro,
480
+ name: "Mro",
481
+ regex: /\p{Mro}/,
482
+ },
483
+ {
484
+ script: :Multani,
485
+ name: "Multani",
486
+ regex: /\p{Multani}/,
487
+ },
314
488
  {
315
489
  script: :Myanmar,
316
490
  name: "Myanmar",
317
491
  regex: /\p{Myanmar}/,
318
492
  },
493
+ {
494
+ script: :Nabataean,
495
+ name: "Nabataean",
496
+ regex: /\p{Nabataean}/,
497
+ },
498
+ {
499
+ script: :Nag_Mundari,
500
+ name: "Nag_Mundari",
501
+ regex: /\p{Nag_Mundari}/,
502
+ },
503
+ {
504
+ script: :Nandinagari,
505
+ name: "Nandinagari",
506
+ regex: /\p{Nandinagari}/,
507
+ },
319
508
  {
320
509
  script: :New_Tai_Lue,
321
510
  name: "New_Tai_Lue",
322
511
  regex: /\p{New_Tai_Lue}/,
323
512
  },
513
+ {
514
+ script: :Newa,
515
+ name: "Newa",
516
+ regex: /\p{Newa}/,
517
+ },
324
518
  {
325
519
  script: :Nko,
326
520
  name: "Nko",
327
521
  regex: /\p{Nko}/,
328
522
  },
523
+ {
524
+ script: :Nushu,
525
+ name: "Nushu",
526
+ regex: /\p{Nushu}/,
527
+ },
528
+ {
529
+ script: :Nyiakeng_Puachue_Hmong,
530
+ name: "Nyiakeng_Puachue_Hmong",
531
+ regex: /\p{Nyiakeng_Puachue_Hmong}/,
532
+ },
329
533
  {
330
534
  script: :Ogham,
331
535
  name: "Ogham",
@@ -336,16 +540,37 @@ module UnicodeScriptDetector
336
540
  name: "Ol_Chiki",
337
541
  regex: /\p{Ol_Chiki}/,
338
542
  },
543
+
544
+ {
545
+ script: :Old_Hungarian,
546
+ name: "Old_Hungarian",
547
+ regex: /\p{Old_Hungarian}/,
548
+ },
339
549
  {
340
550
  script: :Old_Italic,
341
551
  name: "Old_Italic",
342
552
  regex: /\p{Old_Italic}/,
343
553
  },
554
+ {
555
+ script: :Old_North_Arabian,
556
+ name: "Old_North_Arabian",
557
+ regex: /\p{Old_North_Arabian}/,
558
+ },
559
+ {
560
+ script: :Old_Permic,
561
+ name: "Old_Permic",
562
+ regex: /\p{Old_Permic}/,
563
+ },
344
564
  {
345
565
  script: :Old_Persian,
346
566
  name: "Old_Persian",
347
567
  regex: /\p{Old_Persian}/,
348
568
  },
569
+ {
570
+ script: :Old_Sogdian,
571
+ name: "Old_Sogdian",
572
+ regex: /\p{Old_Sogdian}/,
573
+ },
349
574
  {
350
575
  script: :Old_South_Arabian,
351
576
  name: "Old_South_Arabian",
@@ -356,16 +581,41 @@ module UnicodeScriptDetector
356
581
  name: "Old_Turkic",
357
582
  regex: /\p{Old_Turkic}/,
358
583
  },
584
+ {
585
+ script: :Old_Uyghur,
586
+ name: "Old_Uyghur",
587
+ regex: /\p{Old_Uyghur}/,
588
+ },
359
589
  {
360
590
  script: :Oriya,
361
591
  name: "Oriya",
362
592
  regex: /\p{Oriya}/,
363
593
  },
594
+ {
595
+ script: :Osage,
596
+ name: "Osage",
597
+ regex: /\p{Osage}/,
598
+ },
364
599
  {
365
600
  script: :Osmanya,
366
601
  name: "Osmanya",
367
602
  regex: /\p{Osmanya}/,
368
603
  },
604
+ {
605
+ script: :Pahawh_Hmong,
606
+ name: "Pahawh_Hmong",
607
+ regex: /\p{Pahawh_Hmong}/,
608
+ },
609
+ {
610
+ script: :Palmyrene,
611
+ name: "Palmyrene",
612
+ regex: /\p{Palmyrene}/,
613
+ },
614
+ {
615
+ script: :Pau_Cin_Hau,
616
+ name: "Pau_Cin_Hau",
617
+ regex: /\p{Pau_Cin_Hau}/,
618
+ },
369
619
  {
370
620
  script: :Phags_Pa,
371
621
  name: "Phags_Pa",
@@ -376,6 +626,11 @@ module UnicodeScriptDetector
376
626
  name: "Phoenician",
377
627
  regex: /\p{Phoenician}/,
378
628
  },
629
+ {
630
+ script: :Psalter_Pahlavi,
631
+ name: "Psalter_Pahlavi",
632
+ regex: /\p{Psalter_Pahlavi}/,
633
+ },
379
634
  {
380
635
  script: :Rejang,
381
636
  name: "Rejang",
@@ -386,6 +641,11 @@ module UnicodeScriptDetector
386
641
  name: "Runic",
387
642
  regex: /\p{Runic}/,
388
643
  },
644
+ {
645
+ script: :Samaritan,
646
+ name: "Samaritan",
647
+ regex: /\p{Samaritan}/,
648
+ },
389
649
  {
390
650
  script: :Saurashtra,
391
651
  name: "Saurashtra",
@@ -401,21 +661,42 @@ module UnicodeScriptDetector
401
661
  name: "Shavian",
402
662
  regex: /\p{Shavian}/,
403
663
  },
664
+ {
665
+ script: :Siddham,
666
+ name: "Siddham",
667
+ regex: /\p{Siddham}/,
668
+ },
669
+ {
670
+ script: :SignWriting,
671
+ name: "SignWriting",
672
+ regex: /\p{SignWriting}/,
673
+ },
404
674
  {
405
675
  script: :Sinhala,
406
676
  name: "Sinhala",
407
677
  regex: /\p{Sinhala}/,
408
678
  },
679
+ {
680
+ script: :Sogdian,
681
+ name: "Sogdian",
682
+ regex: /\p{Sogdian}/,
683
+ },
409
684
  {
410
685
  script: :Sora_Sompeng,
411
686
  name: "Sora_Sompeng",
412
687
  regex: /\p{Sora_Sompeng}/,
413
688
  },
689
+ {
690
+ script: :Soyombo,
691
+ name: "Soyombo",
692
+ regex: /\p{Soyombo}/,
693
+ },
414
694
  {
415
695
  script: :Sundanese,
416
696
  name: "Sundanese",
417
697
  regex: /\p{Sundanese}/,
418
698
  },
699
+
419
700
  {
420
701
  script: :Syloti_Nagri,
421
702
  name: "Syloti_Nagri",
@@ -461,6 +742,16 @@ module UnicodeScriptDetector
461
742
  name: "Tamil",
462
743
  regex: /\p{Tamil}/,
463
744
  },
745
+ {
746
+ script: :Tangsa,
747
+ name: "Tangsa",
748
+ regex: /\p{Tangsa}/,
749
+ },
750
+ {
751
+ script: :Tangut,
752
+ name: "Tangut",
753
+ regex: /\p{Tangut}/,
754
+ },
464
755
  {
465
756
  script: :Telugu,
466
757
  name: "Telugu",
@@ -486,25 +777,67 @@ module UnicodeScriptDetector
486
777
  name: "Tifinagh",
487
778
  regex: /\p{Tifinagh}/,
488
779
  },
780
+ {
781
+ script: :Tirhuta,
782
+ name: "Tirhuta",
783
+ regex: /\p{Tirhuta}/,
784
+ },
785
+
786
+ {
787
+ script: :Toto,
788
+ name: "Toto",
789
+ regex: /\p{Toto}/,
790
+ },
791
+
489
792
  {
490
793
  script: :Ugaritic,
491
794
  name: "Ugaritic",
492
795
  regex: /\p{Ugaritic}/,
493
796
  },
797
+ {
798
+ script: :Unknown,
799
+ name: "Unknown",
800
+ regex: /\p{Unknown}/,
801
+ },
494
802
  {
495
803
  script: :Vai,
496
804
  name: "Vai",
497
805
  regex: /\p{Vai}/,
498
806
  },
807
+ {
808
+ script: :Vithkuqi,
809
+ name: "Vithkuqi",
810
+ regex: /\p{Vithkuqi}/,
811
+ },
812
+ {
813
+ script: :Wancho,
814
+ name: "Wancho",
815
+ regex: /\p{Wancho}/,
816
+ },
817
+ {
818
+ script: :Warang_Citi,
819
+ name: "Warang_Citi",
820
+ regex: /\p{Warang_Citi}/,
821
+ },
822
+ {
823
+ script: :Yezidi,
824
+ name: "Yezidi",
825
+ regex: /\p{Yezidi}/,
826
+ },
499
827
  {
500
828
  script: :Yi,
501
829
  name: "Yi",
502
830
  regex: /\p{Yi}/,
503
831
  },
504
- {
505
- script: :Emoji,
506
- name: "Emoji",
507
- regex: /\p{Emoji}/,
832
+ {
833
+ script: :Zanabazar_Square,
834
+ name: "Zanabazar_Square",
835
+ regex: /\p{Zanabazar_Square}/,
836
+ },
837
+ {
838
+ script: :Emoji,
839
+ name: "Emoji",
840
+ regex: /\p{Emoji}/,
508
841
  },
509
842
  {
510
843
  script: :Common,
@@ -513,4 +846,4 @@ module UnicodeScriptDetector
513
846
  },
514
847
  ]
515
848
  end
516
- end
849
+ end
@@ -1,3 +1,3 @@
1
1
  module UnicodeScriptDetector
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.4"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_script_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.4
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Arendsen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-02 00:00:00.000000000 Z
11
+ date: 2024-12-04 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk
@@ -79,7 +79,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
79
79
  - !ruby/object:Gem::Version
80
80
  version: '0'
81
81
  requirements: []
82
- rubygems_version: 3.5.11
82
+ rubygems_version: 3.5.23
83
83
  signing_key:
84
84
  specification_version: 4
85
85
  summary: Unicode Script Detector