unihan2 0.0.4 → 0.1.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/unihan2.rb +226 -58
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5eb38245b07a071dd3489121782a109ec34c6bf3371822c6d610df11b035257
|
4
|
+
data.tar.gz: 0f8349355c0c597c3fc1dbfe7d191bb0a38d54a08fb534f415ca31884753c8f2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5e69eea0311f7b462c9c5b3a39f0d5a2bc6b2dbf35098f6a4dd01064c81e6670a3a5c07ddbbf4c95ca56c029d323069f5032ab89b0b7d1ab497e1e18e5c7a12
|
7
|
+
data.tar.gz: 3f5b644491711e5da436dc780b67e331fa425df852f115b9245f0b6f945c523f7d732907aefeb2756fa99764bc2fb618deaae7b50b221db7360b47b5b38959a4
|
data/lib/unihan2.rb
CHANGED
@@ -16,6 +16,8 @@ class Unihan2
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
+
# Listing of Characters Covered by the Unihan Database
|
20
|
+
# https://www.unicode.org/reports/tr38/tr38-29.html#BlockListing
|
19
21
|
def self.unicode_version(code)
|
20
22
|
if code.is_a? Integer
|
21
23
|
i = code
|
@@ -23,63 +25,10 @@ class Unihan2
|
|
23
25
|
i = code.hex
|
24
26
|
end
|
25
27
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
when 0x4DB6..0x4DBF # Unicode 13.0: Extension A
|
31
|
-
13.0
|
32
|
-
when 0x4E00..0x9FA5 # CJK Unified Ideographs
|
33
|
-
1.1
|
34
|
-
when 0x9FA6..0x9FBB # CJK Unified Ideographs
|
35
|
-
4.1
|
36
|
-
when 0x9FBC..0x9FC3 # CJK Unified Ideographs
|
37
|
-
5.1
|
38
|
-
when 0x9FC4..0x9FCB # CJK Unified Ideographs
|
39
|
-
5.2
|
40
|
-
when 0x9FCC # CJK Unified Ideographs
|
41
|
-
6.1
|
42
|
-
when 0x9FCD..0x9FD5 # CJK Unified Ideographs
|
43
|
-
8.0
|
44
|
-
when 0x9FD6..0x9FEA # CJK Unified Ideographs
|
45
|
-
10.0
|
46
|
-
when 0x9FEB..0x9FEF # CJK Unified Ideographs
|
47
|
-
11.0
|
48
|
-
when 0x9FF0..0x9FFC # CJK Unified Ideographs
|
49
|
-
13.0
|
50
|
-
end
|
51
|
-
elsif i < 0x20000
|
52
|
-
case i
|
53
|
-
when 0xF900..0xFA2D # CJK Compatibility Ideographs
|
54
|
-
1.1
|
55
|
-
when 0xFA2E..0xFA2F # CJK Compatibility Ideographs
|
56
|
-
6.1
|
57
|
-
when 0xFA30..0xFA6A # CJK Compatibility Ideographs
|
58
|
-
3.2
|
59
|
-
when 0xFA6B..0xFA6D # CJK Compatibility Ideographs
|
60
|
-
5.2
|
61
|
-
when 0xFA70..0xFAD9 # CJK Compatibility Ideographs
|
62
|
-
4.1
|
63
|
-
end
|
64
|
-
else
|
65
|
-
case i
|
66
|
-
when 0x20000..0x2A6D6 # Extension B
|
67
|
-
3.1
|
68
|
-
when 0x2A6D7..0x2A6DD # Extension B
|
69
|
-
13.0
|
70
|
-
when 0x2A700..0x2B734 # extension C
|
71
|
-
5.2
|
72
|
-
when 0x2B740..0x2B81D # extension D
|
73
|
-
6.0
|
74
|
-
when 0x2B820..0x2CEA1 # extension E
|
75
|
-
8.0
|
76
|
-
when 0x2CEB0..0x2EBE0 # extension F
|
77
|
-
10.0
|
78
|
-
when 0x2F800..0x2FA1D # Unicode 3.1: CJK Compatibility Supplement
|
79
|
-
3.1
|
80
|
-
when 0x30000..0x3134A # extension G
|
81
|
-
13.0
|
82
|
-
end
|
28
|
+
case i
|
29
|
+
when 0..0xFFFF then uv0(i)
|
30
|
+
when 0x20000..0x2FFFF then uv2(i)
|
31
|
+
when 0x30000..0x3134A then 13.0
|
83
32
|
end
|
84
33
|
end
|
85
34
|
|
@@ -90,4 +39,223 @@ class Unihan2
|
|
90
39
|
@strokes[char]
|
91
40
|
end
|
92
41
|
|
93
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def self.uv0(i)
|
45
|
+
case i
|
46
|
+
when 0..0x0FFF then uv00(i)
|
47
|
+
when 0x1E00..0x1EFF
|
48
|
+
case i
|
49
|
+
when 0x1E00..0x1E9A then 1.1
|
50
|
+
when 0x1E9B then 2.0
|
51
|
+
when 0x1E9C..0x1E9F then 5.1
|
52
|
+
when 0x1EA0..0x1EF9 then 1.1
|
53
|
+
when 0x1EFA..0x1EFF then 5.1
|
54
|
+
end
|
55
|
+
when 0x2000..0x2FFF then uv02(i)
|
56
|
+
when 0x3000..0x33FF then uv03(i)
|
57
|
+
when 0x3400..0x9FA5
|
58
|
+
case i
|
59
|
+
when 0x3400..0x4DB5 then 3.0 # CJK Extension A 中日韓統一表意文字擴充 A 區
|
60
|
+
when 0x4DB6..0x4DBF then 13.0 # Extension A
|
61
|
+
when 0x4E00..0x9FA5 then 1.1 # CJK Unified Ideographs
|
62
|
+
end
|
63
|
+
when 0x9FA6..0x9FFF
|
64
|
+
case i
|
65
|
+
when 0x9FA6..0x9FBB then 4.1 # CJK Unified Ideographs
|
66
|
+
when 0x9FBC..0x9FC3 then 5.1 # CJK Unified Ideographs
|
67
|
+
when 0x9FC4..0x9FCB then 5.2 # CJK Unified Ideographs
|
68
|
+
when 0x9FCC then 6.1 # CJK Unified Ideographs
|
69
|
+
when 0x9FCD..0x9FD5 then 8.0 # CJK Unified Ideographs
|
70
|
+
when 0x9FD6..0x9FEA then 10.0 # CJK Unified Ideographs
|
71
|
+
when 0x9FEB..0x9FEF then 11.0 # CJK Unified Ideographs
|
72
|
+
when 0x9FF0..0x9FFC then 13.0 # CJK Unified Ideographs
|
73
|
+
end
|
74
|
+
when 0xA000..0xDFFF
|
75
|
+
case i
|
76
|
+
when 0xA000..0xA48C then 3.0 # Yi Syllables 彝族文字區
|
77
|
+
when 0xAC00..0xD7A3 then 2.0 # Hangul Syllables 韓文拼音
|
78
|
+
end
|
79
|
+
when 0xF000..0xFFFF
|
80
|
+
case i
|
81
|
+
when 0xF900..0xFA2D then 1.1 # CJK Compatibility Ideographs
|
82
|
+
when 0xFA2E..0xFA2F then 6.1 # CJK Compatibility Ideographs
|
83
|
+
when 0xFA30..0xFA6A then 3.2 # CJK Compatibility Ideographs
|
84
|
+
when 0xFA6B..0xFA6D then 5.2 # CJK Compatibility Ideographs
|
85
|
+
when 0xFA70..0xFAD9 then 4.1 # CJK Compatibility Ideographs
|
86
|
+
when 0xFE10..0xFE19 then 4.1 # Vertical Forms 中文直排標點
|
87
|
+
when 0xFE20..0xFE23 then 1.1 # Combining Half Marks
|
88
|
+
when 0xFE24..0xFE26 then 5.1 # Combining Half Marks
|
89
|
+
when 0xFE30..0xFE44 then 1.0 # CJK Compatibility Forms 兼容性表格
|
90
|
+
when 0xFE45..0xFE46 then 3.2 # CJK Compatibility Forms 兼容性表格
|
91
|
+
when 0xFE47..0xFE48 then 4.0 # CJK Compatibility Forms 兼容性表格
|
92
|
+
when 0xFE49..0xFE4F then 1.0 # CJK Compatibility Forms 兼容性表格
|
93
|
+
when 0xFE50..0xFE52 then 1.0 # Small Form Variants
|
94
|
+
when 0xFE54..0xFE66 then 1.0 # Small Form Variants
|
95
|
+
when 0xFE68..0xFE6B then 1.0 # Small Form Variants
|
96
|
+
when 0xFF01..0xFF5E then 1.0 # Halfwidth and Fullwidth Forms
|
97
|
+
when 0xFF5F..0xFF60 then 3.2 # Halfwidth and Fullwidth Forms
|
98
|
+
when 0xFF61..0xFF9F then 1.0 # Halfwidth and Fullwidth Forms
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.uv00(i)
|
104
|
+
case i
|
105
|
+
when 0..0x01FF
|
106
|
+
case i
|
107
|
+
when 0..0x017E then 1.0
|
108
|
+
when 0x017F then 1.1
|
109
|
+
when 0x0180..0x01F0 then 1.0
|
110
|
+
when 0x01F1..0x01F5 then 1.1
|
111
|
+
when 0x01F6..0x01F9 then 3.0
|
112
|
+
when 0x01FA..0x0217 then 1.1
|
113
|
+
end
|
114
|
+
when 0x0200..0x02FF
|
115
|
+
case i
|
116
|
+
when 0x0218..0x021F then 3.0
|
117
|
+
when 0x0220 then 3.2
|
118
|
+
when 0x0221 then 4.0
|
119
|
+
when 0x0222..0x0233 then 3.0
|
120
|
+
when 0x0234..0x0236 then 4.0
|
121
|
+
when 0x0237..0x0241 then 4.1
|
122
|
+
when 0x0242..0x024F then 5.0
|
123
|
+
when 0x0250..0x02A8 then 1.0
|
124
|
+
when 0x02A9..0x02AD then 3.0
|
125
|
+
when 0x02AE..0x02AF then 4.0
|
126
|
+
when 0x02B0..0x02DE then 1.0
|
127
|
+
when 0x02DF then 3.0
|
128
|
+
when 0x02E0..0x02E9 then 1.0
|
129
|
+
when 0x02EA..0x02EE then 3.0
|
130
|
+
when 0x02EF..0x02FF then 4.0
|
131
|
+
end
|
132
|
+
when 0x0300..0x0341 then 1.0
|
133
|
+
when 0x0400..0x04FF
|
134
|
+
case i
|
135
|
+
when 0x0401..0x040C then 1.0
|
136
|
+
when 0x040D then 3.0
|
137
|
+
when 0x040E..0x044F then 1.0
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.uv02(i)
|
143
|
+
case i
|
144
|
+
when 0x2000..0x20FF
|
145
|
+
case i
|
146
|
+
when 0x2000..0x202E then 1.0
|
147
|
+
when 0x2045..0x2046 then 1.1
|
148
|
+
when 0x2047 then 3.2
|
149
|
+
when 0x2048..0x204F then 3.0
|
150
|
+
end
|
151
|
+
when 0x2100..0x21FF
|
152
|
+
case i
|
153
|
+
when 0x2100..0x2138 then 1.0
|
154
|
+
when 0x2153..0x2182 then 1.0
|
155
|
+
when 0x2190..0x21EA then 1.0
|
156
|
+
end
|
157
|
+
when 0x2200..0x22F1 then 1.0
|
158
|
+
when 0x2400..0x24FF
|
159
|
+
case i
|
160
|
+
when 0x2460..0x24EA then 1.0
|
161
|
+
when 0x24EB..0x24FE then 3.2
|
162
|
+
when 0x24FF then 4.0
|
163
|
+
end
|
164
|
+
when 0x2500..0x25FF
|
165
|
+
case i
|
166
|
+
when 0x2500..0x2595 then 1.0
|
167
|
+
when 0x2596..0x259F then 3.2
|
168
|
+
when 0x25A0..0x25EE then 1.0
|
169
|
+
when 0x25EF then 1.1
|
170
|
+
end
|
171
|
+
when 0x2600..0x26FF
|
172
|
+
case i
|
173
|
+
when 0x2600..0x2613 then 1.0
|
174
|
+
when 0x261A..0x266F then 1.0
|
175
|
+
end
|
176
|
+
when 0x2E00..0x2FFF
|
177
|
+
case i
|
178
|
+
when 0x2E80..0x2EF3 then 3.0
|
179
|
+
when 0x2F00..0x2FD5 then 3.0
|
180
|
+
when 0x2FF0..0x2FFB then 3.0
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def self.uv03(i)
|
186
|
+
case i
|
187
|
+
when 0x3000..0x30FF
|
188
|
+
case i
|
189
|
+
when 0x3000..0x3036 then 1.0 # CJK Symbols and Punctuation
|
190
|
+
when 0x3037 then 1.1 # CJK Symbols and Punctuation
|
191
|
+
when 0x3038..0x303A then 3.0 # CJK Symbols and Punctuation
|
192
|
+
when 0x303B..0x303D then 3.2 # CJK Symbols and Punctuation
|
193
|
+
when 0x303E then 3.0 # CJK Symbols and Punctuation
|
194
|
+
when 0x303F then 1.0 # CJK Symbols and Punctuation
|
195
|
+
when 0x3041..0x3094 then 1.0 # Hiragana 日文平假名
|
196
|
+
when 0x3095..0x3096 then 3.2 # Hiragana 日文平假名
|
197
|
+
when 0x3099..0x309E then 1.0 # Hiragana 日文平假名
|
198
|
+
when 0x309F then 3.2 # Hiragana 日文平假名
|
199
|
+
when 0x30A0 then 3.2 # Katakana 日文片假名
|
200
|
+
when 0x30A1..0x30F6 then 1.0 # Katakana 日文片假名
|
201
|
+
when 0x30F7..0x30FA then 1.1 # Katakana 日文片假名
|
202
|
+
when 0x30FB..0x30FE then 1.0 # Katakana 日文片假名
|
203
|
+
when 0x30FF then 3.2 # Katakana 日文片假名 (Unicode 3.2)
|
204
|
+
end
|
205
|
+
when 0x3100..0x31FF
|
206
|
+
case i
|
207
|
+
when 0x3105..0x312C then 1.0 # Bopomofo 注音符號
|
208
|
+
when 0x312D then 5.1 # Bopomofo 上下顛倒的 'ㄓ'
|
209
|
+
when 0x3131..0x318E then 1.0 # Hangul Compatibility Jamo 韓文
|
210
|
+
when 0x3190..0x319F then 1.0 # Kanbun 在上方的小漢字
|
211
|
+
when 0x31A0..0x31B7 then 3.0 # Bopomofo Extended 注音擴展
|
212
|
+
when 0x31B8..0x31BA then 6.0 # Bopomofo Extended 注音擴展
|
213
|
+
when 0x31C0..0x31CF then 4.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
|
214
|
+
when 0x31D0..0x31E3 then 5.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
|
215
|
+
when 0x31F0..0x31FF then 3.2 # Katakana Phonetic Extensions 日文片假名語音擴展
|
216
|
+
end
|
217
|
+
when 0x3200..0x32FF
|
218
|
+
case i
|
219
|
+
when 0x3200..0x321C then 1.0 # Enclosed CJK Letters and Months 括號韓文
|
220
|
+
when 0x321D..0x321E then 4.0 # Enclosed CJK Letters and Months 括號韓文
|
221
|
+
when 0x3220..0x3243 then 1.0 # Enclosed CJK Letters and Months 括號一~十及漢字
|
222
|
+
when 0x3244..0x324F then 5.2 # Enclosed CJK Letters and Months 圓圈中有字及10~80
|
223
|
+
when 0x3250 then 4.0 # Enclosed CJK Letters and Months 'PTE' 組成一字
|
224
|
+
when 0x3251..0x325F then 3.2 # Enclosed CJK Letters and Months 圓圈 21~35
|
225
|
+
when 0x3260..0x327B then 1.0 # Enclosed CJK Letters and Months 圓圈韓文
|
226
|
+
when 0x327C..0x327D then 4.0 # Enclosed CJK Letters and Months 圓圈韓文
|
227
|
+
when 0x327E then 4.1 # Enclosed CJK Letters and Months 圓圈韓文
|
228
|
+
when 0x327F..0x32B0 then 1.0 # Enclosed CJK Letters and Months 圓圈一~十及漢字
|
229
|
+
when 0x32B1..0x32BF then 3.2 # Enclosed CJK Letters and Months 圓圈 36~50
|
230
|
+
when 0x32C0..0x32CB then 1.1 # Enclosed CJK Letters and Months 1月~12月
|
231
|
+
when 0x32CC..0x32CF then 4.0 # Enclosed CJK Letters and Months 多英文組成一個字
|
232
|
+
when 0x32D0..0x32FE then 1.0 # Enclosed CJK Letters and Months 圓圈日文
|
233
|
+
end
|
234
|
+
when 0x3300..0x33FF
|
235
|
+
case i
|
236
|
+
when 0x3300..0x3357 then 1.0 # CJK Compatibility 多個日文組成一字
|
237
|
+
when 0x3358..0x3376 then 1.1 # CJK Compatibility 0点~24点 及多英文組成一字
|
238
|
+
when 0x3377..0x337A then 4.0 # CJK Compatibility 多英文組成一字
|
239
|
+
when 0x337B..0x33DD then 1.0 # CJK Compatibility 多日本漢字及多英文組成一字
|
240
|
+
when 0x33DE..0x33DF then 4.0 # CJK Compatibility 多英文組成一字
|
241
|
+
when 0x33E0..0x33FE then 1.1 # CJK Compatibility 1日~31日
|
242
|
+
when 0x33FF then 4.0 # CJK Compatibility 'gal' 組成一字
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def self.uv2(i)
|
248
|
+
case i
|
249
|
+
when 0x20000..0x2A6D6 then 3.1 # Extension B
|
250
|
+
when 0x2A6D7..0x2A6DD then 13.0 # Extension B
|
251
|
+
when 0x2A700..0x2B734 then 5.2 # extension C
|
252
|
+
when 0x2B740..0x2B81D then 6.0 # extension D
|
253
|
+
when 0x2B820..0x2CEA1 then 8.0 # extension E
|
254
|
+
when 0x2CEB0..0x2EBE0 then 10.0 # extension F
|
255
|
+
when 0x2F800..0x2FA1D then 3.1 # Unicode 3.1: CJK Compatibility Supplement
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
private_class_method :uv0, :uv00, :uv02, :uv03, :uv2
|
260
|
+
|
261
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unihan2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-12-
|
11
|
+
date: 2020-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Unihan Database Utilities
|
14
14
|
email: zhoubx@gmail.com
|