unicode_script_detector 0.0.5 → 0.0.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4795cdb246ac34ccb0ee5183ff0e704d25e4e67410acee321f36f4446dd28356
4
- data.tar.gz: 0c7b9c4c835718f2fc7509225204e38c0a7148949c5745f1448b299c27e0e88d
3
+ metadata.gz: a0febe236b556e42077b401d8e117b3996c6065dcbb33c974a572a8af64d14a4
4
+ data.tar.gz: f8ef874b90e0ca8e387bc16d5fc947745fcbe34bbd8ea53c328f98d8c3d8eddd
5
5
  SHA512:
6
- metadata.gz: 5da7422c57295f4ac3dee3ac9ccfaa99b5586418de956a88876035541da023e9fa4afe609a4aa79d4c3a1a5f9b1ffe64370984657844c06fc6a575578beb5ee2
7
- data.tar.gz: aa9fecf48386b6eb5a0074cbbec8819af80153c6111e042debd9e9c312145bc11a936b3003a406370a28e8a27a48b2bd0409c0cf043b26fe465f4c58ee9669e2
6
+ metadata.gz: c5b931f1a8f527900f9c37ba0af67ca4e1fbfc0a0fde2f382fc1c2853a2c8813654f1d334f692e39882421eb3a4825f8e056d91d92d58fb4c2c1c22266c347b3
7
+ data.tar.gz: 69ac1e2314cefd944af0958cba15764afdf05e6e6eb314b68bc75172c7d1085be00725d40df5fe7f1af23abccc0243fa5d134c0b590da62d83913dce6570f0c1
@@ -1,55 +1,65 @@
1
1
  module UnicodeScriptDetector
2
2
  class Detector
3
- attr_reader :characters, :scripts
3
+ attr_reader :scripts
4
4
 
5
5
  def initialize(string)
6
6
  @string = string
7
- @characters = []
8
- @scripts = []
7
+ @char_scripts = []
8
+ @script_names = []
9
9
 
10
10
  detect_scripts
11
11
  end
12
12
 
13
13
  def scripts
14
- @scripts.uniq
15
- end
16
-
17
- def detect_scripts
18
- @string.chars.each_with_index do |char, index|
19
- detected = false
20
- Scripts::LIST.each_with_index do |script_data, index|
21
- if char.match?(script_data[:regex])
22
- @characters << Character.new(char, script_data[:script], script_data[:name])
23
- @scripts << script_data[:script]
24
- detected = true
25
- break
26
- end
27
- end
28
- @characters << Character.new(char, :Other, "Other") unless detected
29
- @scripts << :Other unless detected
30
- end
14
+ @char_scripts.uniq
31
15
  end
32
16
 
33
17
  def contains?(scripts)
34
- return @scripts.include?(scripts) if scripts.is_a?(Symbol)
35
-
36
- scripts.all? { |script| @scripts.include?(script) }
18
+ return @char_scripts.include?(scripts) if scripts.is_a?(Symbol)
19
+ scripts.all? { |script| @char_scripts.include?(script) }
37
20
  end
38
21
 
39
22
  def contains_only?(scripts)
40
- return @scripts.uniq == [scripts] if scripts.is_a?(Symbol)
41
-
42
- @scripts.uniq.sort == scripts.uniq.sort
23
+ return @char_scripts.uniq == [scripts] if scripts.is_a?(Symbol)
24
+ @char_scripts.uniq.sort == scripts.uniq.sort
43
25
  end
44
26
 
45
27
  def script_groups
46
- @characters
47
- .chunk { |char| char.script }
48
- .map { |script, chars| ScriptGroup.new(script, chars) }
28
+ @string.chars
29
+ .zip(@char_scripts, @script_names)
30
+ .chunk { |_, script, _| script }
31
+ .map do |script, char_data|
32
+ chars = char_data.map(&:first)
33
+ name = char_data.first[2]
34
+ ScriptGroup.new(script, chars, name)
35
+ end
49
36
  end
50
37
 
51
38
  def grouped_scripts_hash
52
39
  script_groups.map { |group| [group.script, group.text] }.to_h
53
40
  end
41
+
42
+ def characters
43
+ @characters ||= @string.chars.zip(@char_scripts, @script_names).map do |char, script, name|
44
+ Character.new(char, script, name)
45
+ end
46
+ end
47
+
48
+ private
49
+ def detect_scripts
50
+ @string.chars.each do |char|
51
+ script_info = find_script_for_char(char)
52
+ @char_scripts << script_info[:script]
53
+ @script_names << script_info[:name]
54
+ end
55
+ end
56
+
57
+ def find_script_for_char(char)
58
+ Scripts::LIST.each do |script_data|
59
+ return script_data if char.match?(script_data[:regex])
60
+ end
61
+ { script: :Other, name: "Other" }
62
+ end
63
+
54
64
  end
55
65
  end
@@ -1,19 +1,20 @@
1
1
  module UnicodeScriptDetector
2
2
  class ScriptGroup
3
- attr_reader :script, :characters, :text
3
+ attr_reader :script, :text, :name
4
4
 
5
- def initialize(script, characters)
5
+ def initialize(script, chars, name)
6
6
  @script = script
7
- @characters = characters
8
- @text = characters.map(&:char).join
7
+ @chars = chars
8
+ @text = chars.join
9
+ @name = name
9
10
  end
10
11
 
11
12
  def length
12
- @characters.length
13
+ @chars.length
13
14
  end
14
15
 
15
- def name
16
- @characters.first&.name
16
+ def characters
17
+ @characters ||= @chars.map { |char| Character.new(char, @script, @name) }
17
18
  end
18
19
  end
19
20
  end
@@ -1,21 +1,6 @@
1
1
  module UnicodeScriptDetector
2
2
  class Scripts
3
3
  LIST = [
4
- {
5
- script: :Whitespace,
6
- name: "Whitespace",
7
- regex: /\s/
8
- },
9
- {
10
- script: :Digit,
11
- name: "Digit",
12
- regex: /\d/
13
- },
14
- {
15
- script: :Punctuation,
16
- name: "Punctuation",
17
- regex: /[[:punct:]]/
18
- },
19
4
  {
20
5
  script: :Adlam,
21
6
  name: "Adlam",
@@ -28,7 +13,7 @@ module UnicodeScriptDetector
28
13
  },
29
14
  {
30
15
  script: :Anatolian_Hieroglyphs,
31
- name: "Anatolian_Hieroglyphs",
16
+ name: "Anatolian Hieroglyphs",
32
17
  regex: /\p{Anatolian_Hieroglyphs}/,
33
18
  },
34
19
  {
@@ -58,7 +43,7 @@ module UnicodeScriptDetector
58
43
  },
59
44
  {
60
45
  script: :Bassa_Vah,
61
- name: "Bassa_Vah",
46
+ name: "Bassa Vah",
62
47
  regex: /\p{Bassa_Vah}/,
63
48
  },
64
49
  {
@@ -103,7 +88,7 @@ module UnicodeScriptDetector
103
88
  },
104
89
  {
105
90
  script: :Canadian_Aboriginal,
106
- name: "Canadian_Aboriginal",
91
+ name: "Canadian Aboriginal",
107
92
  regex: /\p{Canadian_Aboriginal}/,
108
93
  },
109
94
  {
@@ -113,7 +98,7 @@ module UnicodeScriptDetector
113
98
  },
114
99
  {
115
100
  script: :Caucasian_Albanian,
116
- name: "Caucasian_Albanian",
101
+ name: "Caucasian Albanian",
117
102
  regex: /\p{Caucasian_Albanian}/,
118
103
  },
119
104
  {
@@ -153,7 +138,7 @@ module UnicodeScriptDetector
153
138
  },
154
139
  {
155
140
  script: :Cypro_Minoan,
156
- name: "Cypro_Minoan",
141
+ name: "Cypro Minoan",
157
142
  regex: /\p{Cypro_Minoan}/,
158
143
  },
159
144
  {
@@ -173,7 +158,7 @@ module UnicodeScriptDetector
173
158
  },
174
159
  {
175
160
  script: :Dives_Akuru,
176
- name: "Dives_Akuru",
161
+ name: "Dives Akuru",
177
162
  regex: /\p{Dives_Akuru}/,
178
163
  },
179
164
  {
@@ -188,8 +173,8 @@ module UnicodeScriptDetector
188
173
  },
189
174
  {
190
175
  script: :Egyptian_Hieroglyphs,
191
- name: "Egyptian_Hieroglyphs",
192
- regex: /\p{Egyptian_Hieroglyphs}/,
176
+ name: "Egyptian Hieroglyphs",
177
+ regex: /\p{Egyptian_Hieroglyphs}|[\u{13460}-\u{1355F}]/,
193
178
  },
194
179
  {
195
180
  script: :Elbasan,
@@ -206,7 +191,11 @@ module UnicodeScriptDetector
206
191
  name: "Ethiopic",
207
192
  regex: /\p{Ethiopic}/,
208
193
  },
209
-
194
+ {
195
+ script: :Garay,
196
+ name: "Garay",
197
+ regex: /[\u{10D40}-\u{10D8F}]/,
198
+ },
210
199
  {
211
200
  script: :Georgian,
212
201
  name: "Georgian",
@@ -239,7 +228,7 @@ module UnicodeScriptDetector
239
228
  },
240
229
  {
241
230
  script: :Gunjala_Gondi,
242
- name: "Gunjala_Gondi",
231
+ name: "Gunjala Gondi",
243
232
  regex: /\p{Gunjala_Gondi}/,
244
233
  },
245
234
  {
@@ -247,11 +236,15 @@ module UnicodeScriptDetector
247
236
  name: "Gurmukhi",
248
237
  regex: /\p{Gurmukhi}/,
249
238
  },
250
-
239
+ {
240
+ script: :Gurung_Khema,
241
+ name: "Gurung Khema",
242
+ regex: /[\u{16100}-\u{1613F}]/,
243
+ },
251
244
  {
252
245
  script: :Han,
253
246
  name: "Han",
254
- regex: /\p{Han}/,
247
+ regex: /\p{Han}|[\u{323B0}-\u{3347F}]/,
255
248
  },
256
249
  {
257
250
  script: :Hangul,
@@ -260,7 +253,7 @@ module UnicodeScriptDetector
260
253
  },
261
254
  {
262
255
  script: :Hanifi_Rohingya,
263
- name: "Hanifi_Rohingya",
256
+ name: "Hanifi Rohingya",
264
257
  regex: /\p{Hanifi_Rohingya}/,
265
258
  },
266
259
  {
@@ -285,7 +278,7 @@ module UnicodeScriptDetector
285
278
  },
286
279
  {
287
280
  script: :Imperial_Aramaic,
288
- name: "Imperial_Aramaic",
281
+ name: "Imperial Aramaic",
289
282
  regex: /\p{Imperial_Aramaic}/,
290
283
  },
291
284
  {
@@ -295,12 +288,12 @@ module UnicodeScriptDetector
295
288
  },
296
289
  {
297
290
  script: :Inscriptional_Pahlavi,
298
- name: "Inscriptional_Pahlavi",
291
+ name: "Inscriptional Pahlavi",
299
292
  regex: /\p{Inscriptional_Pahlavi}/,
300
293
  },
301
294
  {
302
295
  script: :Inscriptional_Parthian,
303
- name: "Inscriptional_Parthian",
296
+ name: "Inscriptional Parthian",
304
297
  regex: /\p{Inscriptional_Parthian}/,
305
298
  },
306
299
  {
@@ -323,7 +316,6 @@ module UnicodeScriptDetector
323
316
  name: "Katakana",
324
317
  regex: /\p{Katakana}/,
325
318
  },
326
-
327
319
  {
328
320
  script: :Kawi,
329
321
  name: "Kawi",
@@ -331,7 +323,7 @@ module UnicodeScriptDetector
331
323
  },
332
324
  {
333
325
  script: :Kayah_Li,
334
- name: "Kayah_Li",
326
+ name: "Kayah Li",
335
327
  regex: /\p{Kayah_Li}/,
336
328
  },
337
329
  {
@@ -341,7 +333,7 @@ module UnicodeScriptDetector
341
333
  },
342
334
  {
343
335
  script: :Khitan_Small_Script,
344
- name: "Khitan_Small_Script",
336
+ name: "Khitan Small Script",
345
337
  regex: /\p{Khitan_Small_Script}/,
346
338
  },
347
339
  {
@@ -359,7 +351,11 @@ module UnicodeScriptDetector
359
351
  name: "Khudawadi",
360
352
  regex: /\p{Khudawadi}/,
361
353
  },
362
-
354
+ {
355
+ script: :Kirat_Rai,
356
+ name: "Kirat Rai",
357
+ regex: /[\u{16D40}-\u{16D7F}]/,
358
+ },
363
359
  {
364
360
  script: :Lao,
365
361
  name: "Lao",
@@ -382,12 +378,12 @@ module UnicodeScriptDetector
382
378
  },
383
379
  {
384
380
  script: :Linear_A,
385
- name: "Linear_A",
381
+ name: "Linear A",
386
382
  regex: /\p{Linear_A}/,
387
383
  },
388
384
  {
389
385
  script: :Linear_B,
390
- name: "Linear_B",
386
+ name: "Linear B",
391
387
  regex: /\p{Linear_B}/,
392
388
  },
393
389
  {
@@ -437,7 +433,7 @@ module UnicodeScriptDetector
437
433
  },
438
434
  {
439
435
  script: :Masaram_Gondi,
440
- name: "Masaram_Gondi",
436
+ name: "Masaram Gondi",
441
437
  regex: /\p{Masaram_Gondi}/,
442
438
  },
443
439
  {
@@ -447,22 +443,22 @@ module UnicodeScriptDetector
447
443
  },
448
444
  {
449
445
  script: :Meetei_Mayek,
450
- name: "Meetei_Mayek",
446
+ name: "Meetei Mayek",
451
447
  regex: /\p{Meetei_Mayek}/,
452
448
  },
453
449
  {
454
450
  script: :Mende_Kikakui,
455
- name: "Mende_Kikakui",
451
+ name: "Mende Kikakui",
456
452
  regex: /\p{Mende_Kikakui}/,
457
453
  },
458
454
  {
459
455
  script: :Meroitic_Cursive,
460
- name: "Meroitic_Cursive",
456
+ name: "Meroitic Cursive",
461
457
  regex: /\p{Meroitic_Cursive}/,
462
458
  },
463
459
  {
464
460
  script: :Meroitic_Hieroglyphs,
465
- name: "Meroitic_Hieroglyphs",
461
+ name: "Meroitic Hieroglyphs",
466
462
  regex: /\p{Meroitic_Hieroglyphs}/,
467
463
  },
468
464
  {
@@ -493,7 +489,7 @@ module UnicodeScriptDetector
493
489
  {
494
490
  script: :Myanmar,
495
491
  name: "Myanmar",
496
- regex: /\p{Myanmar}/,
492
+ regex: /\p{Myanmar}|[\u{116D0}-\u{116FF}]/,
497
493
  },
498
494
  {
499
495
  script: :Nabataean,
@@ -502,7 +498,7 @@ module UnicodeScriptDetector
502
498
  },
503
499
  {
504
500
  script: :Nag_Mundari,
505
- name: "Nag_Mundari",
501
+ name: "Nag Mundari",
506
502
  regex: /\p{Nag_Mundari}/,
507
503
  },
508
504
  {
@@ -512,7 +508,7 @@ module UnicodeScriptDetector
512
508
  },
513
509
  {
514
510
  script: :New_Tai_Lue,
515
- name: "New_Tai_Lue",
511
+ name: "New Tai Lue",
516
512
  regex: /\p{New_Tai_Lue}/,
517
513
  },
518
514
  {
@@ -532,7 +528,7 @@ module UnicodeScriptDetector
532
528
  },
533
529
  {
534
530
  script: :Nyiakeng_Puachue_Hmong,
535
- name: "Nyiakeng_Puachue_Hmong",
531
+ name: "Nyiakeng Puachue Hmong",
536
532
  regex: /\p{Nyiakeng_Puachue_Hmong}/,
537
533
  },
538
534
  {
@@ -542,53 +538,57 @@ module UnicodeScriptDetector
542
538
  },
543
539
  {
544
540
  script: :Ol_Chiki,
545
- name: "Ol_Chiki",
541
+ name: "Ol Chiki",
546
542
  regex: /\p{Ol_Chiki}/,
547
543
  },
548
-
544
+ {
545
+ script: :Ol_Onal,
546
+ name: "Ol Onal",
547
+ regex: /[\u{1E5D0}-\u{1E5FF}]/,
548
+ },
549
549
  {
550
550
  script: :Old_Hungarian,
551
- name: "Old_Hungarian",
551
+ name: "Old Hungarian",
552
552
  regex: /\p{Old_Hungarian}/,
553
553
  },
554
554
  {
555
555
  script: :Old_Italic,
556
- name: "Old_Italic",
556
+ name: "Old Italic",
557
557
  regex: /\p{Old_Italic}/,
558
558
  },
559
559
  {
560
560
  script: :Old_North_Arabian,
561
- name: "Old_North_Arabian",
561
+ name: "Old North Arabian",
562
562
  regex: /\p{Old_North_Arabian}/,
563
563
  },
564
564
  {
565
565
  script: :Old_Permic,
566
- name: "Old_Permic",
566
+ name: "Old Permic",
567
567
  regex: /\p{Old_Permic}/,
568
568
  },
569
569
  {
570
570
  script: :Old_Persian,
571
- name: "Old_Persian",
571
+ name: "Old Persian",
572
572
  regex: /\p{Old_Persian}/,
573
573
  },
574
574
  {
575
575
  script: :Old_Sogdian,
576
- name: "Old_Sogdian",
576
+ name: "Old Sogdian",
577
577
  regex: /\p{Old_Sogdian}/,
578
578
  },
579
579
  {
580
580
  script: :Old_South_Arabian,
581
- name: "Old_South_Arabian",
581
+ name: "Old South Arabian",
582
582
  regex: /\p{Old_South_Arabian}/,
583
583
  },
584
584
  {
585
585
  script: :Old_Turkic,
586
- name: "Old_Turkic",
586
+ name: "Old Turkic",
587
587
  regex: /\p{Old_Turkic}/,
588
588
  },
589
589
  {
590
590
  script: :Old_Uyghur,
591
- name: "Old_Uyghur",
591
+ name: "Old Uyghur",
592
592
  regex: /\p{Old_Uyghur}/,
593
593
  },
594
594
  {
@@ -608,7 +608,7 @@ module UnicodeScriptDetector
608
608
  },
609
609
  {
610
610
  script: :Pahawh_Hmong,
611
- name: "Pahawh_Hmong",
611
+ name: "Pahawh Hmong",
612
612
  regex: /\p{Pahawh_Hmong}/,
613
613
  },
614
614
  {
@@ -618,12 +618,12 @@ module UnicodeScriptDetector
618
618
  },
619
619
  {
620
620
  script: :Pau_Cin_Hau,
621
- name: "Pau_Cin_Hau",
621
+ name: "Pau Cin Hau",
622
622
  regex: /\p{Pau_Cin_Hau}/,
623
623
  },
624
624
  {
625
625
  script: :Phags_Pa,
626
- name: "Phags_Pa",
626
+ name: "Phags Pa",
627
627
  regex: /\p{Phags_Pa}/,
628
628
  },
629
629
  {
@@ -633,7 +633,7 @@ module UnicodeScriptDetector
633
633
  },
634
634
  {
635
635
  script: :Psalter_Pahlavi,
636
- name: "Psalter_Pahlavi",
636
+ name: "Psalter Pahlavi",
637
637
  regex: /\p{Psalter_Pahlavi}/,
638
638
  },
639
639
  {
@@ -659,7 +659,7 @@ module UnicodeScriptDetector
659
659
  {
660
660
  script: :Sharada,
661
661
  name: "Sharada",
662
- regex: /\p{Sharada}/,
662
+ regex: /\p{Sharada}|[\u{11B60}-\u{11B7F}]/,
663
663
  },
664
664
  {
665
665
  script: :Shavian,
@@ -671,6 +671,11 @@ module UnicodeScriptDetector
671
671
  name: "Siddham",
672
672
  regex: /\p{Siddham}/,
673
673
  },
674
+ {
675
+ script: :Sidetic,
676
+ name: "Sidetic",
677
+ regex: /[\u{10940}-\u{1095F}]/
678
+ },
674
679
  {
675
680
  script: :SignWriting,
676
681
  name: "SignWriting",
@@ -688,7 +693,7 @@ module UnicodeScriptDetector
688
693
  },
689
694
  {
690
695
  script: :Sora_Sompeng,
691
- name: "Sora_Sompeng",
696
+ name: "Sora Sompeng",
692
697
  regex: /\p{Sora_Sompeng}/,
693
698
  },
694
699
  {
@@ -701,10 +706,14 @@ module UnicodeScriptDetector
701
706
  name: "Sundanese",
702
707
  regex: /\p{Sundanese}/,
703
708
  },
704
-
709
+ {
710
+ script: :Sunuwar,
711
+ name: "Sunuwar",
712
+ regex: /[\u{11BC0}-\u{11BFF}]/,
713
+ },
705
714
  {
706
715
  script: :Syloti_Nagri,
707
- name: "Syloti_Nagri",
716
+ name: "Syloti Nagri",
708
717
  regex: /\p{Syloti_Nagri}/,
709
718
  },
710
719
  {
@@ -724,19 +733,24 @@ module UnicodeScriptDetector
724
733
  },
725
734
  {
726
735
  script: :Tai_Le,
727
- name: "Tai_Le",
736
+ name: "Tai Le",
728
737
  regex: /\p{Tai_Le}/,
729
738
  },
730
739
  {
731
740
  script: :Tai_Tham,
732
- name: "Tai_Tham",
741
+ name: "Tai Tham",
733
742
  regex: /\p{Tai_Tham}/,
734
743
  },
735
744
  {
736
745
  script: :Tai_Viet,
737
- name: "Tai_Viet",
746
+ name: "Tai Viet",
738
747
  regex: /\p{Tai_Viet}/,
739
748
  },
749
+ {
750
+ script: :Tai_Yo,
751
+ name: "Tai Yo",
752
+ regex: /[\u{1E6C0}-\u{1E6FF}]/,
753
+ },
740
754
  {
741
755
  script: :Takri,
742
756
  name: "Takri",
@@ -755,7 +769,7 @@ module UnicodeScriptDetector
755
769
  {
756
770
  script: :Tangut,
757
771
  name: "Tangut",
758
- regex: /\p{Tangut}/,
772
+ regex: /\p{Tangut}|[\u{18D80}-\u{18DFF}]/,
759
773
  },
760
774
  {
761
775
  script: :Telugu,
@@ -787,13 +801,26 @@ module UnicodeScriptDetector
787
801
  name: "Tirhuta",
788
802
  regex: /\p{Tirhuta}/,
789
803
  },
790
-
804
+ {
805
+ script: :Todhri,
806
+ name: "Todhri",
807
+ regex: /[\u{105C0}-\u{105FF}]/,
808
+ },
809
+ {
810
+ script: :Tolong_Siki,
811
+ name: "Tolong Siki",
812
+ regex: /[\u{11DB0}-\u{11DEF}]/,
813
+ },
791
814
  {
792
815
  script: :Toto,
793
816
  name: "Toto",
794
817
  regex: /\p{Toto}/,
795
818
  },
796
-
819
+ {
820
+ script: :Tulu_Tigalari,
821
+ name: "Tulu Tigalari",
822
+ regex: /[\u{11380}-\u{113FF}]/,
823
+ },
797
824
  {
798
825
  script: :Ugaritic,
799
826
  name: "Ugaritic",
@@ -821,7 +848,7 @@ module UnicodeScriptDetector
821
848
  },
822
849
  {
823
850
  script: :Warang_Citi,
824
- name: "Warang_Citi",
851
+ name: "Warang Citi",
825
852
  regex: /\p{Warang_Citi}/,
826
853
  },
827
854
  {
@@ -836,18 +863,35 @@ module UnicodeScriptDetector
836
863
  },
837
864
  {
838
865
  script: :Zanabazar_Square,
839
- name: "Zanabazar_Square",
866
+ name: "Zanabazar Square",
840
867
  regex: /\p{Zanabazar_Square}/,
841
868
  },
869
+
870
+ #Special characters
871
+ {
872
+ script: :Whitespace,
873
+ name: "Whitespace",
874
+ regex: /\s/
875
+ },
876
+ {
877
+ script: :Digit,
878
+ name: "Digit",
879
+ regex: /\d/
880
+ },
842
881
  {
843
882
  script: :Emoji,
844
883
  name: "Emoji",
845
- regex: /\p{Emoji}/,
884
+ regex: /\p{Emoji_Presentation}/,
885
+ },
886
+ {
887
+ script: :Punctuation,
888
+ name: "Punctuation",
889
+ regex: /[[:punct:]]/
846
890
  },
847
891
  {
848
892
  script: :Common,
849
893
  name: "Common",
850
- regex: /\p{Common}/,
894
+ regex: /\p{Common}|[\u{1CEC0}-\u{1CEFF}]|[\u{1CC00}-\u{1CEBF}]/,
851
895
  },
852
896
  ]
853
897
  end
@@ -1,3 +1,3 @@
1
1
  module UnicodeScriptDetector
2
- VERSION = "0.0.5"
2
+ VERSION = "0.0.7"
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode_script_detector
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.5
4
+ version: 0.0.7
5
5
  platform: ruby
6
6
  authors:
7
7
  - David Arendsen
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2025-12-31 00:00:00.000000000 Z
11
+ date: 2026-01-02 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: zeitwerk