unicode_script_detector 0.0.3 → 0.0.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 369f6039d5c7793db0e9f7d39d0815fedc07ee0a827b6a7b758404321b24f7f2
4
- data.tar.gz: 1af7b6b33e9bdae2a27b21230ca5f5b879aaa1c9d802c7d7ec5c824f1766eaf9
3
+ metadata.gz: 4795cdb246ac34ccb0ee5183ff0e704d25e4e67410acee321f36f4446dd28356
4
+ data.tar.gz: 0c7b9c4c835718f2fc7509225204e38c0a7148949c5745f1448b299c27e0e88d
5
5
  SHA512:
6
- metadata.gz: 2028fbfdb634b896d4fa91f3b0f3f0a14af75611200f147d5a2fbafede24102d74bc8725e4b826d38651832db068aa9b8dc0e8e621be041f8386455310d1a31d
7
- data.tar.gz: ad0dc5d58cbcb42638b1571969029ddef45f6041da20f9927bea468cd0a4fe6fa681a9710500eb2370924cce997b855a073c97a9d404289898cbfdce47391ae7
6
+ metadata.gz: 5da7422c57295f4ac3dee3ac9ccfaa99b5586418de956a88876035541da023e9fa4afe609a4aa79d4c3a1a5f9b1ffe64370984657844c06fc6a575578beb5ee2
7
+ data.tar.gz: aa9fecf48386b6eb5a0074cbbec8819af80153c6111e042debd9e9c312145bc11a936b3003a406370a28e8a27a48b2bd0409c0cf043b26fe465f4c58ee9669e2
data/README.md CHANGED
@@ -20,23 +20,45 @@ $ gem install unicode_script_detector
20
20
  UnicodeScriptDetector.detect_characters "Hel6б"
21
21
 
22
22
  #Output:
23
- [#<UnicodeScriptDetector::Character:0x00007768fefdead8 @char="H", @name="Latin", @script=:Latin>,
24
- #<UnicodeScriptDetector::Character:0x00007768fefdea10 @char="e", @name="Latin", @script=:Latin>,
25
- #<UnicodeScriptDetector::Character:0x00007768fefde970 @char="l", @name="Latin", @script=:Latin>,
26
- #<UnicodeScriptDetector::Character:0x00007768fefde8d0 @char="6", @name="Digit", @script=:Digit>,
27
- #<UnicodeScriptDetector::Character:0x00007768fefde830 @char="б", @name="Cyrillic", @script=:Cyrillic>]
23
+ [
24
+ #<UnicodeScriptDetector::Character:0x00007768fefdead8 @char="H", @name="Latin", @script=:Latin>,
25
+ #<UnicodeScriptDetector::Character:0x00007768fefdea10 @char="e", @name="Latin", @script=:Latin>,
26
+ #<UnicodeScriptDetector::Character:0x00007768fefde970 @char="l", @name="Latin", @script=:Latin>,
27
+ #<UnicodeScriptDetector::Character:0x00007768fefde8d0 @char="6", @name="Digit", @script=:Digit>,
28
+ #<UnicodeScriptDetector::Character:0x00007768fefde830 @char="б", @name="Cyrillic", @script=:Cyrillic>
29
+ ]
28
30
  ```
29
31
 
30
32
  ## Detect if a script contains certain scripts
31
33
  ```ruby
32
34
  # This will return true because it contains Latin and Cyrillic
33
- UnicodeScriptDetector.contains? "Hellб🔥", [:Latin, :Cyrillic]
35
+ UnicodeScriptDetector.contains? "Helб🔥", [:Latin, :Cyrillic]
34
36
  ```
35
37
 
36
38
  ## Detect if a script contains only certain scripts
37
39
  ```ruby
38
40
  # This will return false because it contains an Emoji as well
39
- UnicodeScriptDetector.contains_only? "Hellб🔥", [:Latin, :Cyrillic]
41
+ UnicodeScriptDetector.contains_only? "Helб🔥", [:Latin, :Cyrillic]
42
+ ```
43
+
44
+ ## Detect all the characters of a string, grouped by the script
45
+ ```ruby
46
+ UnicodeScriptDetector.script_groups("Hel6б how are you?").each do |group|
47
+ puts "#{group.name}: #{group.text} (#{group.length} characters)"
48
+ end
49
+
50
+ #Output:
51
+
52
+ Latin: Hel (3 characters)
53
+ Digit: 6 (1 characters)
54
+ Cyrillic: б (1 characters)
55
+ Whitespace: (1 characters)
56
+ Latin: how (3 characters)
57
+ Whitespace: (1 characters)
58
+ Latin: are (3 characters)
59
+ Whitespace: (1 characters)
60
+ Latin: you (3 characters)
61
+ Punctuation: ? (1 characters)
40
62
  ```
41
63
 
42
64
  ## Development
@@ -47,4 +69,4 @@ Run the tests with `bin/test`.
47
69
  You're welcome to contribute to this project. See https://github.com/davidarendsen/unicode_script_detector.
48
70
 
49
71
  ## License
50
- This software is released under the [MIT license](LICENSE).
72
+ This software is released under the [MIT license](LICENSE).
@@ -15,5 +15,17 @@ module UnicodeScriptDetector
15
15
  def hiragana?
16
16
  @script === :Hiragana
17
17
  end
18
+
19
+ def punctuation?
20
+ @script === :Punctuation
21
+ end
22
+
23
+ def emoji?
24
+ @script === :Emoji
25
+ end
26
+
27
+ def digit?
28
+ @script === :Digit
29
+ end
18
30
  end
19
- end
31
+ end
@@ -41,5 +41,15 @@ module UnicodeScriptDetector
41
41
 
42
42
  @scripts.uniq.sort == scripts.uniq.sort
43
43
  end
44
+
45
+ def script_groups
46
+ @characters
47
+ .chunk { |char| char.script }
48
+ .map { |script, chars| ScriptGroup.new(script, chars) }
49
+ end
50
+
51
+ def grouped_scripts_hash
52
+ script_groups.map { |group| [group.script, group.text] }.to_h
53
+ end
44
54
  end
45
- end
55
+ end
@@ -0,0 +1,19 @@
1
+ module UnicodeScriptDetector
2
+ class ScriptGroup
3
+ attr_reader :script, :characters, :text
4
+
5
+ def initialize(script, characters)
6
+ @script = script
7
+ @characters = characters
8
+ @text = characters.map(&:char).join
9
+ end
10
+
11
+ def length
12
+ @characters.length
13
+ end
14
+
15
+ def name
16
+ @characters.first&.name
17
+ end
18
+ end
19
+ end
@@ -1,15 +1,35 @@
1
1
  module UnicodeScriptDetector
2
2
  class Scripts
3
3
  LIST = [
4
+ {
5
+ script: :Whitespace,
6
+ name: "Whitespace",
7
+ regex: /\s/
8
+ },
4
9
  {
5
10
  script: :Digit,
6
11
  name: "Digit",
7
- regex: /\d/,
12
+ regex: /\d/
8
13
  },
9
14
  {
10
- script: :Whitespace,
11
- name: "Whitespace",
12
- regex: /\s/,
15
+ script: :Punctuation,
16
+ name: "Punctuation",
17
+ regex: /[[:punct:]]/
18
+ },
19
+ {
20
+ script: :Adlam,
21
+ name: "Adlam",
22
+ regex: /\p{Adlam}/,
23
+ },
24
+ {
25
+ script: :Ahom,
26
+ name: "Ahom",
27
+ regex: /\p{Ahom}/,
28
+ },
29
+ {
30
+ script: :Anatolian_Hieroglyphs,
31
+ name: "Anatolian_Hieroglyphs",
32
+ regex: /\p{Anatolian_Hieroglyphs}/,
13
33
  },
14
34
  {
15
35
  script: :Arabic,
@@ -21,6 +41,11 @@ module UnicodeScriptDetector
21
41
  name: "Armenian",
22
42
  regex: /\p{Armenian}/,
23
43
  },
44
+ {
45
+ script: :Avestan,
46
+ name: "Avestan",
47
+ regex: /\p{Avestan}/,
48
+ },
24
49
  {
25
50
  script: :Balinese,
26
51
  name: "Balinese",
@@ -31,6 +56,11 @@ module UnicodeScriptDetector
31
56
  name: "Bamum",
32
57
  regex: /\p{Bamum}/,
33
58
  },
59
+ {
60
+ script: :Bassa_Vah,
61
+ name: "Bassa_Vah",
62
+ regex: /\p{Bassa_Vah}/,
63
+ },
34
64
  {
35
65
  script: :Batak,
36
66
  name: "Batak",
@@ -41,6 +71,11 @@ module UnicodeScriptDetector
41
71
  name: "Bengali",
42
72
  regex: /\p{Bengali}/,
43
73
  },
74
+ {
75
+ script: :Bhaiksuki,
76
+ name: "Bhaiksuki",
77
+ regex: /\p{Bhaiksuki}/,
78
+ },
44
79
  {
45
80
  script: :Bopomofo,
46
81
  name: "Bopomofo",
@@ -76,6 +111,11 @@ module UnicodeScriptDetector
76
111
  name: "Carian",
77
112
  regex: /\p{Carian}/,
78
113
  },
114
+ {
115
+ script: :Caucasian_Albanian,
116
+ name: "Caucasian_Albanian",
117
+ regex: /\p{Caucasian_Albanian}/,
118
+ },
79
119
  {
80
120
  script: :Chakma,
81
121
  name: "Chakma",
@@ -91,6 +131,11 @@ module UnicodeScriptDetector
91
131
  name: "Cherokee",
92
132
  regex: /\p{Cherokee}/,
93
133
  },
134
+ {
135
+ script: :Chorasmian,
136
+ name: "Chorasmian",
137
+ regex: /\p{Chorasmian}/,
138
+ },
94
139
  {
95
140
  script: :Coptic,
96
141
  name: "Coptic",
@@ -106,6 +151,11 @@ module UnicodeScriptDetector
106
151
  name: "Cypriot",
107
152
  regex: /\p{Cypriot}/,
108
153
  },
154
+ {
155
+ script: :Cypro_Minoan,
156
+ name: "Cypro_Minoan",
157
+ regex: /\p{Cypro_Minoan}/,
158
+ },
109
159
  {
110
160
  script: :Cyrillic,
111
161
  name: "Cyrillic",
@@ -121,16 +171,42 @@ module UnicodeScriptDetector
121
171
  name: "Devanagari",
122
172
  regex: /\p{Devanagari}/,
123
173
  },
174
+ {
175
+ script: :Dives_Akuru,
176
+ name: "Dives_Akuru",
177
+ regex: /\p{Dives_Akuru}/,
178
+ },
179
+ {
180
+ script: :Dogra,
181
+ name: "Dogra",
182
+ regex: /\p{Dogra}/,
183
+ },
184
+ {
185
+ script: :Duployan,
186
+ name: "Duployan",
187
+ regex: /\p{Duployan}/,
188
+ },
124
189
  {
125
190
  script: :Egyptian_Hieroglyphs,
126
191
  name: "Egyptian_Hieroglyphs",
127
192
  regex: /\p{Egyptian_Hieroglyphs}/,
128
193
  },
194
+ {
195
+ script: :Elbasan,
196
+ name: "Elbasan",
197
+ regex: /\p{Elbasan}/,
198
+ },
199
+ {
200
+ script: :Elymaic,
201
+ name: "Elymaic",
202
+ regex: /\p{Elymaic}/,
203
+ },
129
204
  {
130
205
  script: :Ethiopic,
131
206
  name: "Ethiopic",
132
207
  regex: /\p{Ethiopic}/,
133
208
  },
209
+
134
210
  {
135
211
  script: :Georgian,
136
212
  name: "Georgian",
@@ -146,6 +222,11 @@ module UnicodeScriptDetector
146
222
  name: "Gothic",
147
223
  regex: /\p{Gothic}/,
148
224
  },
225
+ {
226
+ script: :Grantha,
227
+ name: "Grantha",
228
+ regex: /\p{Grantha}/,
229
+ },
149
230
  {
150
231
  script: :Greek,
151
232
  name: "Greek",
@@ -156,11 +237,17 @@ module UnicodeScriptDetector
156
237
  name: "Gujarati",
157
238
  regex: /\p{Gujarati}/,
158
239
  },
240
+ {
241
+ script: :Gunjala_Gondi,
242
+ name: "Gunjala_Gondi",
243
+ regex: /\p{Gunjala_Gondi}/,
244
+ },
159
245
  {
160
246
  script: :Gurmukhi,
161
247
  name: "Gurmukhi",
162
248
  regex: /\p{Gurmukhi}/,
163
249
  },
250
+
164
251
  {
165
252
  script: :Han,
166
253
  name: "Han",
@@ -171,11 +258,21 @@ module UnicodeScriptDetector
171
258
  name: "Hangul",
172
259
  regex: /\p{Hangul}/,
173
260
  },
261
+ {
262
+ script: :Hanifi_Rohingya,
263
+ name: "Hanifi_Rohingya",
264
+ regex: /\p{Hanifi_Rohingya}/,
265
+ },
174
266
  {
175
267
  script: :Hanunoo,
176
268
  name: "Hanunoo",
177
269
  regex: /\p{Hanunoo}/,
178
270
  },
271
+ {
272
+ script: :Hatran,
273
+ name: "Hatran",
274
+ regex: /\p{Hatran}/,
275
+ },
179
276
  {
180
277
  script: :Hebrew,
181
278
  name: "Hebrew",
@@ -226,6 +323,12 @@ module UnicodeScriptDetector
226
323
  name: "Katakana",
227
324
  regex: /\p{Katakana}/,
228
325
  },
326
+
327
+ {
328
+ script: :Kawi,
329
+ name: "Kawi",
330
+ regex: /\p{Kawi}/,
331
+ },
229
332
  {
230
333
  script: :Kayah_Li,
231
334
  name: "Kayah_Li",
@@ -236,11 +339,27 @@ module UnicodeScriptDetector
236
339
  name: "Kharoshthi",
237
340
  regex: /\p{Kharoshthi}/,
238
341
  },
342
+ {
343
+ script: :Khitan_Small_Script,
344
+ name: "Khitan_Small_Script",
345
+ regex: /\p{Khitan_Small_Script}/,
346
+ },
239
347
  {
240
348
  script: :Khmer,
241
349
  name: "Khmer",
242
350
  regex: /\p{Khmer}/,
243
351
  },
352
+ {
353
+ script: :Khojki,
354
+ name: "Khojki",
355
+ regex: /\p{Khojki}/,
356
+ },
357
+ {
358
+ script: :Khudawadi,
359
+ name: "Khudawadi",
360
+ regex: /\p{Khudawadi}/,
361
+ },
362
+
244
363
  {
245
364
  script: :Lao,
246
365
  name: "Lao",
@@ -261,11 +380,21 @@ module UnicodeScriptDetector
261
380
  name: "Limbu",
262
381
  regex: /\p{Limbu}/,
263
382
  },
383
+ {
384
+ script: :Linear_A,
385
+ name: "Linear_A",
386
+ regex: /\p{Linear_A}/,
387
+ },
264
388
  {
265
389
  script: :Linear_B,
266
390
  name: "Linear_B",
267
391
  regex: /\p{Linear_B}/,
268
392
  },
393
+ {
394
+ script: :Lisu,
395
+ name: "Lisu",
396
+ regex: /\p{Lisu}/,
397
+ },
269
398
  {
270
399
  script: :Lycian,
271
400
  name: "Lycian",
@@ -276,6 +405,16 @@ module UnicodeScriptDetector
276
405
  name: "Lydian",
277
406
  regex: /\p{Lydian}/,
278
407
  },
408
+ {
409
+ script: :Mahajani,
410
+ name: "Mahajani",
411
+ regex: /\p{Mahajani}/,
412
+ },
413
+ {
414
+ script: :Makasar,
415
+ name: "Makasar",
416
+ regex: /\p{Makasar}/,
417
+ },
279
418
  {
280
419
  script: :Malayalam,
281
420
  name: "Malayalam",
@@ -286,11 +425,36 @@ module UnicodeScriptDetector
286
425
  name: "Mandaic",
287
426
  regex: /\p{Mandaic}/,
288
427
  },
428
+ {
429
+ script: :Manichaean,
430
+ name: "Manichaean",
431
+ regex: /\p{Manichaean}/,
432
+ },
433
+ {
434
+ script: :Marchen,
435
+ name: "Marchen",
436
+ regex: /\p{Marchen}/,
437
+ },
438
+ {
439
+ script: :Masaram_Gondi,
440
+ name: "Masaram_Gondi",
441
+ regex: /\p{Masaram_Gondi}/,
442
+ },
443
+ {
444
+ script: :Medefaidrin,
445
+ name: "Medefaidrin",
446
+ regex: /\p{Medefaidrin}/,
447
+ },
289
448
  {
290
449
  script: :Meetei_Mayek,
291
450
  name: "Meetei_Mayek",
292
451
  regex: /\p{Meetei_Mayek}/,
293
452
  },
453
+ {
454
+ script: :Mende_Kikakui,
455
+ name: "Mende_Kikakui",
456
+ regex: /\p{Mende_Kikakui}/,
457
+ },
294
458
  {
295
459
  script: :Meroitic_Cursive,
296
460
  name: "Meroitic_Cursive",
@@ -306,26 +470,71 @@ module UnicodeScriptDetector
306
470
  name: "Miao",
307
471
  regex: /\p{Miao}/,
308
472
  },
473
+ {
474
+ script: :Modi,
475
+ name: "Modi",
476
+ regex: /\p{Modi}/,
477
+ },
309
478
  {
310
479
  script: :Mongolian,
311
480
  name: "Mongolian",
312
481
  regex: /\p{Mongolian}/,
313
482
  },
483
+ {
484
+ script: :Mro,
485
+ name: "Mro",
486
+ regex: /\p{Mro}/,
487
+ },
488
+ {
489
+ script: :Multani,
490
+ name: "Multani",
491
+ regex: /\p{Multani}/,
492
+ },
314
493
  {
315
494
  script: :Myanmar,
316
495
  name: "Myanmar",
317
496
  regex: /\p{Myanmar}/,
318
497
  },
498
+ {
499
+ script: :Nabataean,
500
+ name: "Nabataean",
501
+ regex: /\p{Nabataean}/,
502
+ },
503
+ {
504
+ script: :Nag_Mundari,
505
+ name: "Nag_Mundari",
506
+ regex: /\p{Nag_Mundari}/,
507
+ },
508
+ {
509
+ script: :Nandinagari,
510
+ name: "Nandinagari",
511
+ regex: /\p{Nandinagari}/,
512
+ },
319
513
  {
320
514
  script: :New_Tai_Lue,
321
515
  name: "New_Tai_Lue",
322
516
  regex: /\p{New_Tai_Lue}/,
323
517
  },
518
+ {
519
+ script: :Newa,
520
+ name: "Newa",
521
+ regex: /\p{Newa}/,
522
+ },
324
523
  {
325
524
  script: :Nko,
326
525
  name: "Nko",
327
526
  regex: /\p{Nko}/,
328
527
  },
528
+ {
529
+ script: :Nushu,
530
+ name: "Nushu",
531
+ regex: /\p{Nushu}/,
532
+ },
533
+ {
534
+ script: :Nyiakeng_Puachue_Hmong,
535
+ name: "Nyiakeng_Puachue_Hmong",
536
+ regex: /\p{Nyiakeng_Puachue_Hmong}/,
537
+ },
329
538
  {
330
539
  script: :Ogham,
331
540
  name: "Ogham",
@@ -336,16 +545,37 @@ module UnicodeScriptDetector
336
545
  name: "Ol_Chiki",
337
546
  regex: /\p{Ol_Chiki}/,
338
547
  },
548
+
549
+ {
550
+ script: :Old_Hungarian,
551
+ name: "Old_Hungarian",
552
+ regex: /\p{Old_Hungarian}/,
553
+ },
339
554
  {
340
555
  script: :Old_Italic,
341
556
  name: "Old_Italic",
342
557
  regex: /\p{Old_Italic}/,
343
558
  },
559
+ {
560
+ script: :Old_North_Arabian,
561
+ name: "Old_North_Arabian",
562
+ regex: /\p{Old_North_Arabian}/,
563
+ },
564
+ {
565
+ script: :Old_Permic,
566
+ name: "Old_Permic",
567
+ regex: /\p{Old_Permic}/,
568
+ },
344
569
  {
345
570
  script: :Old_Persian,
346
571
  name: "Old_Persian",
347
572
  regex: /\p{Old_Persian}/,
348
573
  },
574
+ {
575
+ script: :Old_Sogdian,
576
+ name: "Old_Sogdian",
577
+ regex: /\p{Old_Sogdian}/,
578
+ },
349
579
  {
350
580
  script: :Old_South_Arabian,
351
581
  name: "Old_South_Arabian",
@@ -356,16 +586,41 @@ module UnicodeScriptDetector
356
586
  name: "Old_Turkic",
357
587
  regex: /\p{Old_Turkic}/,
358
588
  },
589
+ {
590
+ script: :Old_Uyghur,
591
+ name: "Old_Uyghur",
592
+ regex: /\p{Old_Uyghur}/,
593
+ },
359
594
  {
360
595
  script: :Oriya,
361
596
  name: "Oriya",
362
597
  regex: /\p{Oriya}/,
363
598
  },
599
+ {
600
+ script: :Osage,
601
+ name: "Osage",
602
+ regex: /\p{Osage}/,
603
+ },
364
604
  {
365
605
  script: :Osmanya,
366
606
  name: "Osmanya",
367
607
  regex: /\p{Osmanya}/,
368
608
  },
609
+ {
610
+ script: :Pahawh_Hmong,
611
+ name: "Pahawh_Hmong",
612
+ regex: /\p{Pahawh_Hmong}/,
613
+ },
614
+ {
615
+ script: :Palmyrene,
616
+ name: "Palmyrene",
617
+ regex: /\p{Palmyrene}/,
618
+ },
619
+ {
620
+ script: :Pau_Cin_Hau,
621
+ name: "Pau_Cin_Hau",
622
+ regex: /\p{Pau_Cin_Hau}/,
623
+ },
369
624
  {
370
625
  script: :Phags_Pa,
371
626
  name: "Phags_Pa",
@@ -376,6 +631,11 @@ module UnicodeScriptDetector
376
631
  name: "Phoenician",
377
632
  regex: /\p{Phoenician}/,
378
633
  },
634
+ {
635
+ script: :Psalter_Pahlavi,
636
+ name: "Psalter_Pahlavi",
637
+ regex: /\p{Psalter_Pahlavi}/,
638
+ },
379
639
  {
380
640
  script: :Rejang,
381
641
  name: "Rejang",
@@ -386,6 +646,11 @@ module UnicodeScriptDetector
386
646
  name: "Runic",
387
647
  regex: /\p{Runic}/,
388
648
  },
649
+ {
650
+ script: :Samaritan,
651
+ name: "Samaritan",
652
+ regex: /\p{Samaritan}/,
653
+ },
389
654
  {
390
655
  script: :Saurashtra,
391
656
  name: "Saurashtra",
@@ -401,21 +666,42 @@ module UnicodeScriptDetector
401
666
  name: "Shavian",
402
667
  regex: /\p{Shavian}/,
403
668
  },
669
+ {
670
+ script: :Siddham,
671
+ name: "Siddham",
672
+ regex: /\p{Siddham}/,
673
+ },
674
+ {
675
+ script: :SignWriting,
676
+ name: "SignWriting",
677
+ regex: /\p{SignWriting}/,
678
+ },
404
679
  {
405
680
  script: :Sinhala,
406
681
  name: "Sinhala",
407
682
  regex: /\p{Sinhala}/,
408
683
  },
684
+ {
685
+ script: :Sogdian,
686
+ name: "Sogdian",
687
+ regex: /\p{Sogdian}/,
688
+ },
409
689
  {
410
690
  script: :Sora_Sompeng,
411
691
  name: "Sora_Sompeng",
412
692
  regex: /\p{Sora_Sompeng}/,
413
693
  },
694
+ {
695
+ script: :Soyombo,
696
+ name: "Soyombo",
697
+ regex: /\p{Soyombo}/,
698
+ },
414
699
  {
415
700
  script: :Sundanese,
416
701
  name: "Sundanese",
417
702
  regex: /\p{Sundanese}/,
418
703
  },
704
+
419
705
  {
420
706
  script: :Syloti_Nagri,
421
707
  name: "Syloti_Nagri",
@@ -461,6 +747,16 @@ module UnicodeScriptDetector
461
747
  name: "Tamil",
462
748
  regex: /\p{Tamil}/,
463
749
  },
750
+ {
751
+ script: :Tangsa,
752
+ name: "Tangsa",
753
+ regex: /\p{Tangsa}/,
754
+ },
755
+ {
756
+ script: :Tangut,
757
+ name: "Tangut",
758
+ regex: /\p{Tangut}/,
759
+ },
464
760
  {
465
761
  script: :Telugu,
466
762
  name: "Telugu",
@@ -486,25 +782,67 @@ module UnicodeScriptDetector
486
782
  name: "Tifinagh",
487
783
  regex: /\p{Tifinagh}/,
488
784
  },
785
+ {
786
+ script: :Tirhuta,
787
+ name: "Tirhuta",
788
+ regex: /\p{Tirhuta}/,
789
+ },
790
+
791
+ {
792
+ script: :Toto,
793
+ name: "Toto",
794
+ regex: /\p{Toto}/,
795
+ },
796
+
489
797
  {
490
798
  script: :Ugaritic,
491
799
  name: "Ugaritic",
492
800
  regex: /\p{Ugaritic}/,
493
801
  },
802
+ {
803
+ script: :Unknown,
804
+ name: "Unknown",
805
+ regex: /\p{Unknown}/,
806
+ },
494
807
  {
495
808
  script: :Vai,
496
809
  name: "Vai",
497
810
  regex: /\p{Vai}/,
498
811
  },
812
+ {
813
+ script: :Vithkuqi,
814
+ name: "Vithkuqi",
815
+ regex: /\p{Vithkuqi}/,
816
+ },
817
+ {
818
+ script: :Wancho,
819
+ name: "Wancho",
820
+ regex: /\p{Wancho}/,
821
+ },
822
+ {
823
+ script: :Warang_Citi,
824
+ name: "Warang_Citi",
825
+ regex: /\p{Warang_Citi}/,
826
+ },
827
+ {
828
+ script: :Yezidi,
829
+ name: "Yezidi",
830
+ regex: /\p{Yezidi}/,
831
+ },
499
832
  {
500
833
  script: :Yi,
501
834
  name: "Yi",
502
835
  regex: /\p{Yi}/,
503
836
  },
504
- {
505
- script: :Emoji,
506
- name: "Emoji",
507
- regex: /\p{Emoji}/,
837
+ {
838
+ script: :Zanabazar_Square,
839
+ name: "Zanabazar_Square",
840
+ regex: /\p{Zanabazar_Square}/,
841
+ },
842
+ {
843
+ script: :Emoji,
844
+ name: "Emoji",
845
+ regex: /\p{Emoji}/,
508
846
  },
509
847
  {
510
848
  script: :Common,
@@ -513,4 +851,4 @@ module UnicodeScriptDetector
513
851
  },
514
852
  ]
515
853
  end
516
- end
854
+ end
@@ -1,3 +1,3 @@
1
1
  module UnicodeScriptDetector
2
- VERSION = "0.0.3"
2
+ VERSION = "0.0.5"
3
3
  end
@@ -9,6 +9,10 @@ module UnicodeScriptDetector
9
9
  UnicodeScriptDetector::Detector.new(string).characters
10
10
  end
11
11
 
12
+ def script_groups(string)
13
+ UnicodeScriptDetector::Detector.new(string).script_groups
14
+ end
15
+
12
16
  def contains?(string, scripts)
13
17
  UnicodeScriptDetector::Detector.new(string).contains?(scripts)
14
18
  end
@@ -17,4 +21,4 @@ module UnicodeScriptDetector
17
21
  UnicodeScriptDetector::Detector.new(string).contains_only?(scripts)
18
22
  end
19
23
  end
20
- end
24
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_script_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.3
4
+ version: 0.0.5
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Arendsen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-12-02 00:00:00.000000000 Z
11
+ date: 2025-12-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk
@@ -55,6 +55,7 @@ files:
55
55
  - lib/unicode_script_detector.rb
56
56
  - lib/unicode_script_detector/character.rb
57
57
  - lib/unicode_script_detector/detector.rb
58
+ - lib/unicode_script_detector/script_group.rb
58
59
  - lib/unicode_script_detector/scripts.rb
59
60
  - lib/unicode_script_detector/version.rb
60
61
  homepage: https://github.com/davidarendsen/unicode_script_detector