unicode-script 0.1.0 → 0.1.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,15 +1,7 @@
1
1
  ---
2
- !binary "U0hBMQ==":
3
- metadata.gz: !binary |-
4
- N2M5NTcxNWNkMDVkYWFiMDg3M2U1ZDRjMjEwODhhMGQ1MjI5M2MyNw==
5
- data.tar.gz: !binary |-
6
- MTM2NzVkNDBmZGRmMWE3MTk4YTA5ODIzMDQxN2VmNDQzZWQ5MGJkNg==
2
+ SHA1:
3
+ metadata.gz: beeafac906b9c14b8d3510ad8bfee403d7d648f2
4
+ data.tar.gz: 36bdbfa5841bebe1ba16a5e31238ef53bd94f873
7
5
  SHA512:
8
- metadata.gz: !binary |-
9
- ZTg5ZWVlMmJkYjA0MGNlMGJjZDk2YTI1ZWMyYjEyODIzZmUyZTI1N2JlZDI3
10
- ZTgzYmQyMGE0MGU0OGVkZGJkMGE0YzE1NzY4MzA5MTg5NzAwOWI1YjMxZmJj
11
- MzYwNWJjN2Q1YjNkZDJiNTgzZTE3OTNlZWM3Yjg0NjM4NTYxODQ=
12
- data.tar.gz: !binary |-
13
- NzAzMzY1ODRhZWFkYzczMjUwZGYyOThmMGVjNDhiNWUxOGY5M2FmOTY2MDVm
14
- YmRmOWU2Njk3MGRmYjJmNDIwYmZkMzJiZGIyYzg3OTBmYjg1YjhjNmUzOWM4
15
- YTk1MGRkMjQ2OGViOTQwYTJlM2NiNmNjNGE3YzAwNDcwNDhmOTA=
6
+ metadata.gz: 3d3c9bf74deb4bf4dc3f1207ada3f0151824560ef9de8e14bdac665df2251dcd872341a3f94483d2705e24e750db987742e3422da380a43fdeb3423eadca34f4
7
+ data.tar.gz: 47680ec779b08865201ac5f1e1fa0e7f2f5b3e34eaaafe4d79cc06a19ebd176a62c32dce2839cff5c41c035dbdc0abe2499a6dd2560651238c403b18950b1a5c
@@ -1,161 +1,161 @@
1
1
  module UnicodeScript
2
- CHARTS = [{:name => 'Armenian', :range => (0x0530..0x058F)},
3
- {:name => 'Coptic', :range => (0x2C80..0x2CFF)},
4
- {:name => 'Greek and Coptic', :range => (0x0370..0x03FF)},
5
- {:name => 'Cypriot Syllabary', :range => (0x10800..0x1083F)},
6
- {:name => 'Cyrilic', :range => (0x0400..0x04FF)},
7
- {:name => 'Cyrilic Supplement', :range => (0x0500..0x052F)},
8
- {:name => 'Cyrillic Extended-A', :range => (0x2DE0..0x2DFF)},
9
- {:name => 'Cyrillic Extended-B', :range => (0xA640..0xA69F)},
10
- {:name => 'Georgian', :range => (0x10A0..0x10FF)},
11
- {:name => 'Georgian Supplement', :range => (0x2D00..0x2D2F)},
12
- {:name => 'Hiragana', :range => (0x3040..0x309F)},
13
- {:name => 'Glagolitic', :range => (0x2C00..0x2C5F)},
14
- {:name => 'Gothic', :range => (0x10330..0x1034F)},
15
- {:name => 'Greek Extended', :range => (0x1F00..0x1FFF)},
16
- {:name => 'Basic Latin', :range => (0x0000..0x007F)},
17
- {:name => 'C1 Controls and Latin-1 Supplement', :range => (0x0080..0x00FF)},
18
- {:name => 'Latin Extended-A', :range => (0x0100..0x017F)},
19
- {:name => 'Latin Extended-B', :range => (0x0180..0x024F)},
20
- {:name => 'Latin Extended-C', :range => (0x2C60..0x2C7F)},
21
- {:name => 'Latin Extended-D', :range => (0xA720..0xA7FF)},
22
- {:name => 'Latin Extended Additional', :range => (0x1E00..0x1EFF)},
23
- {:name => 'Fullwidth ASCII', :range => (0x0020..0x007E)},
24
- {:name => 'Halfwidth CJK punctuation', :range => (0x3000..0x303F)},
25
- {:name => 'Halfwidth Hangul', :range => (0x3130..0x318F)},
26
- {:name => 'Linear B Syllabary', :range => (0x10000..0x1007F)},
27
- {:name => 'Linear B Ideograms', :range => (0x10080..0x100FF)},
28
- {:name => 'Ogham', :range => (0x1680..0x169F)},
29
- {:name => 'Old Italic', :range => (0x10300..0x1032F)},
30
- {:name => 'Phaistos Disc', :range => (0x101D0..0x101FF)},
31
- {:name => 'Runic', :range => (0x16A0..0x16FF)},
32
- {:name => 'Shavian', :range => (0x10450..0x1047F)},
33
- {:name => 'IPA Extensions', :range => (0x0250..0x02AF)},
34
- {:name => 'Phonetic Extensions', :range => (0x1D00..0x1D7F)},
35
- {:name => 'Phonetic Extensions Supplement', :range => (0x1D80..0x1DBF)},
36
- {:name => 'Modifier Tone Letters', :range => (0xA700..0xA71F)},
37
- {:name => 'Spacing Modifier Letters', :range => (0x02B0..0x02FF)},
38
- {:name => 'Superscripts and Subscripts', :range => (0x2070..0x209F)},
39
- {:name => 'Combining Diacritical Marks', :range => (0x0300..0x036F)},
40
- {:name => 'Combining Diacritical Marks Supplement', :range => (0x1DC0..0x1DFF)},
41
- {:name => 'Combining Half Marks', :range => (0xFE20..0xFE2F)},
42
- {:name => 'Bamum', :range => (0xA6A0..0xA6FF)},
43
- {:name => 'Bamum Supplement', :range => (0x16800..0x16A3F)},
44
- {:name => 'Egyptian Hieroglyphs', :range => (0x13000..0x1342F)},
45
- {:name => 'Ethiopic', :range => (0x1200..0x137F)},
46
- {:name => 'Ethiopic Supplement', :range => (0x1380..0x139F)},
47
- {:name => 'Ethiopic Extended', :range => (0x2D80..0x2DDF)},
48
- {:name => 'Ethiopic Extended-A', :range => (0xAB00..0xAB2F)},
49
- {:name => 'Meroitic Cursive', :range => (0x109A0..0x109FF)},
50
- {:name => 'Meroitic Hieroglyphs', :range => (0x10980..0x1099F)},
51
- {:name => 'NKo', :range => (0x07C0..0x07FF)},
52
- {:name => 'Osmanya', :range => (0x10480..0x104AF)},
53
- {:name => 'Tifinagh', :range => (0x2D30..0x2D7F)},
54
- {:name => 'Vai', :range => (0xA500..0xA63F)},
55
- {:name => 'Arabic', :range => (0x0600..0x06FF)},
56
- {:name => 'Arabic Supplement', :range => (0x0750..0x077F)},
57
- {:name => 'Arabic Extended-A', :range => (0x08A0..0x08FF)},
58
- {:name => 'Arabic Presentation Forms-A', :range => (0xFB50..0xFDFF)},
59
- {:name => 'Arabic Presentation Forms-B', :range => (0xFE70..0xFEFF)},
60
- {:name => 'Imperial Aramaic', :range => (0x10840..0x1085F)},
61
- {:name => 'Avestan', :range => (0x10B00..0x10B3F)},
62
- {:name => 'Carian', :range => (0x102A0..0x102DF)},
63
- {:name => 'Cuneiform', :range => (0x12000..0x123FF)},
64
- {:name => 'Cuneiform Numbers and Punctuation', :range => (0x12400..0x1247F)},
65
- {:name => 'Old Persian', :range => (0x103A0..0x103DF)},
66
- {:name => 'Ugaritic', :range => (0x10380..0x1039F)},
67
- {:name => 'Hebrew', :range => (0x0590..0x05FF)},
68
- {:name => 'Lycian', :range => (0x10280..0x1029F)},
69
- {:name => 'Lydian', :range => (0x10920..0x1093F)},
70
- {:name => 'Mandaic', :range => (0x0840..0x085F)},
71
- {:name => 'Old South Arabian', :range => (0x10A60..0x10A7F)},
72
- {:name => 'Inscriptional Pahlavi', :range => (0x10B60..0x10B7F)},
73
- {:name => 'Inscriptional Parthian', :range => (0x10B40..0x10B5F)},
74
- {:name => 'Phoenician', :range => (0x10900..0x1091F)},
75
- {:name => 'Samaritan', :range => (0x0800..0x083F)},
76
- {:name => 'Syriac', :range => (0x0700..0x074F)},
77
- {:name => 'Mongolian', :range => (0x1800..0x18AF)},
78
- {:name => 'Old Turkic', :range => (0x10C00..0x10C4F)},
79
- {:name => 'Phags-pa', :range => (0xA840..0xA87F)},
80
- {:name => 'Tibetan', :range => (0x0F00..0x0FFF)},
81
- {:name => 'Bengali', :range => (0x0980..0x09FF)},
82
- {:name => 'Brahmi', :range => (0x11000..0x1107F)},
83
- {:name => 'Chakma', :range => (0x11100..0x1114F)},
84
- {:name => 'Devanagari', :range => (0x0900..0x097F)},
85
- {:name => 'Devanagari Extended', :range => (0xA8E0..0xA8FF)},
86
- {:name => 'Gujarati', :range => (0x0A80..0x0AFF)},
87
- {:name => 'Gurmukhi', :range => (0x0A00..0x0A7F)},
88
- {:name => 'Kaithi', :range => (0x11080..0x110CF)},
89
- {:name => 'Kannada', :range => (0x0C80..0x0CFF)},
90
- {:name => 'Kharoshthi', :range => (0x10A00..0x10A5F)},
91
- {:name => 'Lepcha', :range => (0x1C00..0x1C4F)},
92
- {:name => 'Limbu', :range => (0x1900..0x194F)},
93
- {:name => 'Malayalam', :range => (0x0D00..0x0D7F)},
94
- {:name => 'Meetei Mayek', :range => (0xABC0..0xABFF)},
95
- {:name => 'Meetei Mayek Extensions', :range => (0xAAE0..0xAAFF)},
96
- {:name => 'Ol Chiki', :range => (0x1C50..0x1C7F)},
97
- {:name => 'Oriya', :range => (0x0B00..0x0B7F)},
98
- {:name => 'Saurashtra', :range => (0xA880..0xA8DF)},
99
- {:name => 'Sharada', :range => (0x11180..0x111DF)},
100
- {:name => 'Sinhala', :range => (0x0D80..0x0DFF)},
101
- {:name => 'Sora Sompeng', :range => (0x110D0..0x110FF)},
102
- {:name => 'Syloti Nagri', :range => (0xA800..0xA82F)},
103
- {:name => 'Takri', :range => (0x11680..0x116CF)},
104
- {:name => 'Tamil', :range => (0x0B80..0x0BFF)},
105
- {:name => 'Telugu', :range => (0x0C00..0x0C7F)},
106
- {:name => 'Thaana', :range => (0x0780..0x07BF)},
107
- {:name => 'Vedic Extensions', :range => (0x1CD0..0x1CFF)},
108
- {:name => 'Balinese', :range => (0x1B00..0x1B7F)},
109
- {:name => 'Batak', :range => (0x1BC0..0x1BFF)},
110
- {:name => 'Buginese', :range => (0x1A00..0x1A1F)},
111
- {:name => 'Cham', :range => (0xAA00..0xAA5F)},
112
- {:name => 'Javanese', :range => (0xA980..0xA9DF)},
113
- {:name => 'Kayah Li', :range => (0xA900..0xA92F)},
114
- {:name => 'Khmer', :range => (0x1780..0x17FF)},
115
- {:name => 'Khmer Symbols', :range => (0x19E0..0x19FF)},
116
- {:name => 'Lao', :range => (0x0E80..0x0EFF)},
117
- {:name => 'Myanmar', :range => (0x1000..0x109F)},
118
- {:name => 'Myanmar Extended-A', :range => (0xAA60..0xAA7F)},
119
- {:name => 'New Tai Lue', :range => (0x1980..0x19DF)},
120
- {:name => 'Rejang', :range => (0xA930..0xA95F)},
121
- {:name => 'Sundanese', :range => (0x1B80..0x1BBF)},
122
- {:name => 'Sundanese Supplement', :range => (0x1CC0..0x1CCF)},
123
- {:name => 'Tai Le', :range => (0x1950..0x197F)},
124
- {:name => 'Tai Tham', :range => (0x1A20..0x1AAF)},
125
- {:name => 'Tai Viet', :range => (0xAA80..0xAADF)},
126
- {:name => 'Thai', :range => (0x0E00..0x0E7F)},
127
- {:name => 'Buhid', :range => (0x1740..0x175F)},
128
- {:name => 'Hanunoo', :range => (0x1720..0x173F)},
129
- {:name => 'Tagalog', :range => (0x1700..0x171F)},
130
- {:name => 'Tagbanwa', :range => (0x1760..0x177F)},
131
- {:name => 'Bopomofo', :range => (0x3100..0x312F)},
132
- {:name => 'Bopomofo Extended', :range => (0x31A0..0x31BF)},
133
- {:name => 'CJK Unified Ideographs', :range => (0x4E00..0x9FCC)},
134
- {:name => 'CJK Unified Ideographs Extension A', :range => (0x3400..0x4DB5)},
135
- {:name => 'CJK Unified Ideographs Extension B', :range => (0x20000..0x2A6D6)},
136
- {:name => 'CJK Unified Ideographs Extension C', :range => (0x2A700..0x2B734)},
137
- {:name => 'CJK Unified Ideographs Extension D', :range => (0x2B740..0x2B81D)},
138
- {:name => 'CJK Compatibility Ideographs', :range => (0xF900..0xFAFF)},
139
- {:name => 'CJK Compatibility Ideographs Supplement', :range => (0x2F800..0x2FA1F)},
140
- {:name => 'Kangxi Radicals', :range => (0x2F00..0x2FDF)},
141
- {:name => 'CJK Radicals Supplement', :range => (0x2E80..0x2EFF)},
142
- {:name => 'CJK Strokes', :range => (0x31C0..0x31EF)},
143
- {:name => 'Hangul Jamo', :range => (0x1100..0x11FF)},
144
- {:name => 'Hangul Jamo Extended-A', :range => (0xA960..0xA97F)},
145
- {:name => 'Hangul Jamo Extended-B', :range => (0xD7B0..0xD7FF)},
146
- {:name => 'Hangul Compatibility Jamo', :range => (0x3130..0x318F)},
147
- {:name => 'Hiragana', :range => (0x3040..0x309F)},
148
- {:name => 'Katakana', :range => (0x30A0..0x30FF)},
149
- {:name => 'Katakana Phonetic Extensions', :range => (0x31F0..0x31FF)},
150
- {:name => 'Kana Supplement', :range => (0x1B000..0x1B0FF)},
151
- {:name => 'Kanbun', :range => (0x3190..0x319F)},
152
- {:name => 'Lisu', :range => (0xA4D0..0xA4FF)},
153
- {:name => 'Miao', :range => (0x16F00..0x16F9F)},
154
- {:name => 'Yi Syllables', :range => (0xA000..0xA48F)},
155
- {:name => 'Yi Radicals', :range => (0xA490..0xA4CF)},
156
- {:name => 'Cherokee', :range => (0x13A0..0x13FF)},
157
- {:name => 'Deseret', :range => (0x10400..0x1044F)},
158
- {:name => 'Unified Canadian Aboriginal Syllabics', :range => (0x1400..0x167F)},
159
- {:name => 'Unified Canadian Aboriginal Syllabics Extended', :range => (0x18B0..0x18FF)}
160
- ]
2
+ CHARTS = {'armenian' => (0x0530..0x058f),
3
+ 'coptic' => (0x2c80..0x2cff),
4
+ 'greek and coptic' => (0x0370..0x03ff),
5
+ 'cypriot syllabary' => (0x10800..0x1083f),
6
+ 'cyrilic' => (0x0400..0x04ff),
7
+ 'cyrilic supplement' => (0x0500..0x052f),
8
+ 'cyrillic extended-a' => (0x2de0..0x2dff),
9
+ 'cyrillic extended-b' => (0xa640..0xa69f),
10
+ 'georgian' => (0x10a0..0x10ff),
11
+ 'georgian supplement' => (0x2d00..0x2d2f),
12
+ 'hiragana' => (0x3040..0x309f),
13
+ 'glagolitic' => (0x2c00..0x2c5f),
14
+ 'gothic' => (0x10330..0x1034f),
15
+ 'greek extended' => (0x1f00..0x1fff),
16
+ 'basic latin' => (0x0000..0x007f),
17
+ 'c1 controls and latin-1 supplement' => (0x0080..0x00ff),
18
+ 'latin extended-a' => (0x0100..0x017f),
19
+ 'latin extended-b' => (0x0180..0x024f),
20
+ 'latin extended-c' => (0x2c60..0x2c7f),
21
+ 'latin extended-d' => (0xa720..0xa7ff),
22
+ 'latin extended additional' => (0x1e00..0x1eff),
23
+ 'fullwidth ascii' => (0x0020..0x007e),
24
+ 'halfwidth cjk punctuation' => (0x3000..0x303f),
25
+ 'halfwidth hangul' => (0x3130..0x318f),
26
+ 'linear b syllabary' => (0x10000..0x1007f),
27
+ 'linear b ideograms' => (0x10080..0x100ff),
28
+ 'ogham' => (0x1680..0x169f),
29
+ 'old italic' => (0x10300..0x1032f),
30
+ 'phaistos disc' => (0x101d0..0x101ff),
31
+ 'runic' => (0x16a0..0x16ff),
32
+ 'shavian' => (0x10450..0x1047f),
33
+ 'ipa extensions' => (0x0250..0x02af),
34
+ 'phonetic extensions' => (0x1d00..0x1d7f),
35
+ 'phonetic extensions supplement' => (0x1d80..0x1dbf),
36
+ 'modifier tone letters' => (0xa700..0xa71f),
37
+ 'spacing modifier letters' => (0x02b0..0x02ff),
38
+ 'superscripts and subscripts' => (0x2070..0x209f),
39
+ 'combining diacritical marks' => (0x0300..0x036f),
40
+ 'combining diacritical marks supplement' => (0x1dc0..0x1dff),
41
+ 'combining half marks' => (0xfe20..0xfe2f),
42
+ 'bamum' => (0xa6a0..0xa6ff),
43
+ 'bamum supplement' => (0x16800..0x16a3f),
44
+ 'egyptian hieroglyphs' => (0x13000..0x1342f),
45
+ 'ethiopic' => (0x1200..0x137f),
46
+ 'ethiopic supplement' => (0x1380..0x139f),
47
+ 'ethiopic extended' => (0x2d80..0x2ddf),
48
+ 'ethiopic extended-a' => (0xab00..0xab2f),
49
+ 'meroitic cursive' => (0x109a0..0x109ff),
50
+ 'meroitic hieroglyphs' => (0x10980..0x1099f),
51
+ 'nko' => (0x07c0..0x07ff),
52
+ 'osmanya' => (0x10480..0x104af),
53
+ 'tifinagh' => (0x2d30..0x2d7f),
54
+ 'vai' => (0xa500..0xa63f),
55
+ 'arabic' => (0x0600..0x06ff),
56
+ 'arabic supplement' => (0x0750..0x077f),
57
+ 'arabic extended-a' => (0x08a0..0x08ff),
58
+ 'arabic presentation forms-a' => (0xfb50..0xfdff),
59
+ 'arabic presentation forms-b' => (0xfe70..0xfeff),
60
+ 'imperial aramaic' => (0x10840..0x1085f),
61
+ 'avestan' => (0x10b00..0x10b3f),
62
+ 'carian' => (0x102a0..0x102df),
63
+ 'cuneiform' => (0x12000..0x123ff),
64
+ 'cuneiform numbers and punctuation' => (0x12400..0x1247f),
65
+ 'old persian' => (0x103a0..0x103df),
66
+ 'ugaritic' => (0x10380..0x1039f),
67
+ 'hebrew' => (0x0590..0x05ff),
68
+ 'lycian' => (0x10280..0x1029f),
69
+ 'lydian' => (0x10920..0x1093f),
70
+ 'mandaic' => (0x0840..0x085f),
71
+ 'old south arabian' => (0x10a60..0x10a7f),
72
+ 'inscriptional pahlavi' => (0x10b60..0x10b7f),
73
+ 'inscriptional parthian' => (0x10b40..0x10b5f),
74
+ 'phoenician' => (0x10900..0x1091f),
75
+ 'samaritan' => (0x0800..0x083f),
76
+ 'syriac' => (0x0700..0x074f),
77
+ 'mongolian' => (0x1800..0x18af),
78
+ 'old turkic' => (0x10c00..0x10c4f),
79
+ 'phags-pa' => (0xa840..0xa87f),
80
+ 'tibetan' => (0x0f00..0x0fff),
81
+ 'bengali' => (0x0980..0x09ff),
82
+ 'brahmi' => (0x11000..0x1107f),
83
+ 'chakma' => (0x11100..0x1114f),
84
+ 'devanagari' => (0x0900..0x097f),
85
+ 'devanagari extended' => (0xa8e0..0xa8ff),
86
+ 'gujarati' => (0x0a80..0x0aff),
87
+ 'gurmukhi' => (0x0a00..0x0a7f),
88
+ 'kaithi' => (0x11080..0x110cf),
89
+ 'kannada' => (0x0c80..0x0cff),
90
+ 'kharoshthi' => (0x10a00..0x10a5f),
91
+ 'lepcha' => (0x1c00..0x1c4f),
92
+ 'limbu' => (0x1900..0x194f),
93
+ 'malayalam' => (0x0d00..0x0d7f),
94
+ 'meetei mayek' => (0xabc0..0xabff),
95
+ 'meetei mayek extensions' => (0xaae0..0xaaff),
96
+ 'ol chiki' => (0x1c50..0x1c7f),
97
+ 'oriya' => (0x0b00..0x0b7f),
98
+ 'saurashtra' => (0xa880..0xa8df),
99
+ 'sharada' => (0x11180..0x111df),
100
+ 'sinhala' => (0x0d80..0x0dff),
101
+ 'sora sompeng' => (0x110d0..0x110ff),
102
+ 'syloti nagri' => (0xa800..0xa82f),
103
+ 'takri' => (0x11680..0x116cf),
104
+ 'tamil' => (0x0b80..0x0bff),
105
+ 'telugu' => (0x0c00..0x0c7f),
106
+ 'thaana' => (0x0780..0x07bf),
107
+ 'vedic extensions' => (0x1cd0..0x1cff),
108
+ 'balinese' => (0x1b00..0x1b7f),
109
+ 'batak' => (0x1bc0..0x1bff),
110
+ 'buginese' => (0x1a00..0x1a1f),
111
+ 'cham' => (0xaa00..0xaa5f),
112
+ 'javanese' => (0xa980..0xa9df),
113
+ 'kayah li' => (0xa900..0xa92f),
114
+ 'khmer' => (0x1780..0x17ff),
115
+ 'khmer symbols' => (0x19e0..0x19ff),
116
+ 'lao' => (0x0e80..0x0eff),
117
+ 'myanmar' => (0x1000..0x109f),
118
+ 'myanmar extended-a' => (0xaa60..0xaa7f),
119
+ 'new tai lue' => (0x1980..0x19df),
120
+ 'rejang' => (0xa930..0xa95f),
121
+ 'sundanese' => (0x1b80..0x1bbf),
122
+ 'sundanese supplement' => (0x1cc0..0x1ccf),
123
+ 'tai le' => (0x1950..0x197f),
124
+ 'tai tham' => (0x1a20..0x1aaf),
125
+ 'tai viet' => (0xaa80..0xaadf),
126
+ 'thai' => (0x0e00..0x0e7f),
127
+ 'buhid' => (0x1740..0x175f),
128
+ 'hanunoo' => (0x1720..0x173f),
129
+ 'tagalog' => (0x1700..0x171f),
130
+ 'tagbanwa' => (0x1760..0x177f),
131
+ 'bopomofo' => (0x3100..0x312f),
132
+ 'bopomofo extended' => (0x31a0..0x31bf),
133
+ 'cjk unified ideographs' => (0x4e00..0x9fcc),
134
+ 'cjk unified ideographs extension a' => (0x3400..0x4db5),
135
+ 'cjk unified ideographs extension b' => (0x20000..0x2a6d6),
136
+ 'cjk unified ideographs extension c' => (0x2a700..0x2b734),
137
+ 'cjk unified ideographs extension d' => (0x2b740..0x2b81d),
138
+ 'cjk compatibility ideographs' => (0xf900..0xfaff),
139
+ 'cjk compatibility ideographs supplement' => (0x2f800..0x2fa1f),
140
+ 'kangxi radicals' => (0x2f00..0x2fdf),
141
+ 'cjk radicals supplement' => (0x2e80..0x2eff),
142
+ 'cjk strokes' => (0x31c0..0x31ef),
143
+ 'hangul jamo' => (0x1100..0x11ff),
144
+ 'hangul jamo extended-a' => (0xa960..0xa97f),
145
+ 'hangul jamo extended-b' => (0xd7b0..0xd7ff),
146
+ 'hangul compatibility jamo' => (0x3130..0x318f),
147
+ 'hiragana' => (0x3040..0x309f),
148
+ 'katakana' => (0x30a0..0x30ff),
149
+ 'katakana phonetic extensions' => (0x31f0..0x31ff),
150
+ 'kana supplement' => (0x1b000..0x1b0ff),
151
+ 'kanbun' => (0x3190..0x319f),
152
+ 'lisu' => (0xa4d0..0xa4ff),
153
+ 'miao' => (0x16f00..0x16f9f),
154
+ 'yi syllables' => (0xa000..0xa48f),
155
+ 'yi radicals' => (0xa490..0xa4cf),
156
+ 'cherokee' => (0x13a0..0x13ff),
157
+ 'deseret' => (0x10400..0x1044f),
158
+ 'unified canadian aboriginal syllabics' => (0x1400..0x167f),
159
+ 'unified canadian aboriginal syllabics extended' => (0x18b0..0x18ff)
160
+ }
161
161
  end
@@ -1,46 +1,54 @@
1
1
  module UnicodeScript
2
2
 
3
- def self.detect string
4
- res = []
5
- string.tr!(' ','')
6
- string.codepoints.each do |c|
7
- script = find_script(c)
8
- index = res.find_index{|v| v[:script] == script}
9
- if script
10
- if index
11
- res[index][:value].push(c.chr)
12
- else
13
- res.push({:script => script, :value => [].push(c.chr)})
14
- end
3
+ def self.detect(string)
4
+ res = []
5
+ string.tr!(' ','')
6
+ string.codepoints.each do |c|
7
+ script = find_script(c)
8
+ index = res.find_index{|v| v[:script] == script}
9
+ if script
10
+ if index
11
+ res[index][:value].push(c.chr)
12
+ else
13
+ res.push({script: script, value: [].push(c.chr)})
15
14
  end
16
-
17
- end
18
- res.each do |r|
19
- r[:value] = r[:value].join('')
20
15
  end
21
- res
16
+
22
17
  end
23
-
24
-
25
- def self.method_missing method, val
26
- CHARTS.each do |c|
27
- if c[:name].downcase == method.to_s.chop
28
- val.codepoints.each do |p|
29
- return false if !(c[:range].include?(p))
30
- end
31
- return true
32
- end
18
+ res.each do |r|
19
+ r[:value] = r[:value].join('')
20
+ end
21
+ res
22
+ end
23
+
24
+ def self.method_missing(method, val)
25
+ script_name = method.to_s.gsub('_', ' ').chop
26
+ puts script_name
27
+ if charted? script_name
28
+ val.codepoints.each do |point|
29
+ return false if !(CHARTS[script_name].include?(point))
33
30
  end
31
+ return true
32
+ else
34
33
  super
35
34
  end
36
-
37
- private
38
-
39
- def self.find_script codepoint
40
- CHARTS.each do |c|
41
- return c[:name] if c[:range].include? codepoint
42
- end
43
- nil
35
+ end
36
+
37
+ def self.respond_to_missing?(method, include_private = false)
38
+ charted?(method.to_s.gsub('_', ' ').chop) || super
39
+ end
40
+
41
+ private
42
+
43
+ def self.charted? script
44
+ CHARTS.has_key?(script)
45
+ end
46
+
47
+ def self.find_script(codepoint)
48
+ CHARTS.each do |k, v|
49
+ return k if v.include? codepoint
44
50
  end
51
+ nil
52
+ end
45
53
 
46
- end
54
+ end
@@ -2,8 +2,7 @@ module UnicodeScript
2
2
  module Version
3
3
  MAJOR = 0
4
4
  MINOR = 1
5
- TINY = 0
5
+ TINY = 6
6
6
  STRING = [Version::MAJOR, Version::MINOR, Version::TINY].compact * '.'
7
7
  end
8
-
9
- end
8
+ end
@@ -6,16 +6,16 @@ describe 'UnicodeScript' do
6
6
  h = 'ひらがな'
7
7
  k = 'カタカナ'
8
8
  mixed = "東京 Tokyo"
9
- UnicodeScript.detect(h).should eq([{:script => 'Hiragana', :value => 'ひらがな'}])
10
- UnicodeScript.detect(k).should eq([{:script => 'Katakana', :value => 'カタカナ'}])
11
- UnicodeScript.detect(mixed).should eq([{:script => 'CJK Unified Ideographs', :value => '東京'},
12
- {:script => 'Basic Latin', :value => 'Tokyo'}])
9
+ UnicodeScript.detect(h).should eq([{:script => 'hiragana', :value => 'ひらがな'}])
10
+ UnicodeScript.detect(k).should eq([{:script => 'katakana', :value => 'カタカナ'}])
11
+ UnicodeScript.detect(mixed).should eq([{:script => 'cjk unified ideographs', :value => '東京'},
12
+ {:script => 'basic latin', :value => 'Tokyo'}])
13
13
  end
14
-
14
+
15
15
  it 'should be able to check whether string belongs to certain script' do
16
- h = 'ひらがな'
16
+ h = '漢字'
17
17
  mixed = 'ひらaaaがな'
18
- UnicodeScript.hiragana?(h).should eq(true)
18
+ UnicodeScript.cjk_unified_ideographs?(h).should eq(true)
19
19
  UnicodeScript.hiragana?(mixed).should eq(false)
20
20
  end
21
- end
21
+ end
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unicode-script
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.1.0
4
+ version: 0.1.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - yuri-gg
@@ -14,28 +14,28 @@ dependencies:
14
14
  name: rake
15
15
  requirement: !ruby/object:Gem::Requirement
16
16
  requirements:
17
- - - ! '>='
17
+ - - ">="
18
18
  - !ruby/object:Gem::Version
19
19
  version: '0'
20
20
  type: :development
21
21
  prerelease: false
22
22
  version_requirements: !ruby/object:Gem::Requirement
23
23
  requirements:
24
- - - ! '>='
24
+ - - ">="
25
25
  - !ruby/object:Gem::Version
26
26
  version: '0'
27
27
  - !ruby/object:Gem::Dependency
28
28
  name: rspec
29
29
  requirement: !ruby/object:Gem::Requirement
30
30
  requirements:
31
- - - ! '>='
31
+ - - ">="
32
32
  - !ruby/object:Gem::Version
33
33
  version: '0'
34
34
  type: :development
35
35
  prerelease: false
36
36
  version_requirements: !ruby/object:Gem::Requirement
37
37
  requirements:
38
- - - ! '>='
38
+ - - ">="
39
39
  - !ruby/object:Gem::Version
40
40
  version: '0'
41
41
  description: Small utility that allows you to detect scripts (languages) in unicode
@@ -61,17 +61,17 @@ require_paths:
61
61
  - lib
62
62
  required_ruby_version: !ruby/object:Gem::Requirement
63
63
  requirements:
64
- - - ! '>='
64
+ - - ">="
65
65
  - !ruby/object:Gem::Version
66
66
  version: '0'
67
67
  required_rubygems_version: !ruby/object:Gem::Requirement
68
68
  requirements:
69
- - - ! '>='
69
+ - - ">="
70
70
  - !ruby/object:Gem::Version
71
71
  version: '0'
72
72
  requirements: []
73
73
  rubyforge_project:
74
- rubygems_version: 2.1.11
74
+ rubygems_version: 2.2.0.preview.1
75
75
  signing_key:
76
76
  specification_version: 4
77
77
  summary: Unicode script detector