unihan2 0.0.4 → 0.1.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/unihan2.rb +226 -58
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a5eb38245b07a071dd3489121782a109ec34c6bf3371822c6d610df11b035257
|
4
|
+
data.tar.gz: 0f8349355c0c597c3fc1dbfe7d191bb0a38d54a08fb534f415ca31884753c8f2
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: d5e69eea0311f7b462c9c5b3a39f0d5a2bc6b2dbf35098f6a4dd01064c81e6670a3a5c07ddbbf4c95ca56c029d323069f5032ab89b0b7d1ab497e1e18e5c7a12
|
7
|
+
data.tar.gz: 3f5b644491711e5da436dc780b67e331fa425df852f115b9245f0b6f945c523f7d732907aefeb2756fa99764bc2fb618deaae7b50b221db7360b47b5b38959a4
|
data/lib/unihan2.rb
CHANGED
@@ -16,6 +16,8 @@ class Unihan2
|
|
16
16
|
end
|
17
17
|
end
|
18
18
|
|
19
|
+
# Listing of Characters Covered by the Unihan Database
|
20
|
+
# https://www.unicode.org/reports/tr38/tr38-29.html#BlockListing
|
19
21
|
def self.unicode_version(code)
|
20
22
|
if code.is_a? Integer
|
21
23
|
i = code
|
@@ -23,63 +25,10 @@ class Unihan2
|
|
23
25
|
i = code.hex
|
24
26
|
end
|
25
27
|
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
when 0x4DB6..0x4DBF # Unicode 13.0: Extension A
|
31
|
-
13.0
|
32
|
-
when 0x4E00..0x9FA5 # CJK Unified Ideographs
|
33
|
-
1.1
|
34
|
-
when 0x9FA6..0x9FBB # CJK Unified Ideographs
|
35
|
-
4.1
|
36
|
-
when 0x9FBC..0x9FC3 # CJK Unified Ideographs
|
37
|
-
5.1
|
38
|
-
when 0x9FC4..0x9FCB # CJK Unified Ideographs
|
39
|
-
5.2
|
40
|
-
when 0x9FCC # CJK Unified Ideographs
|
41
|
-
6.1
|
42
|
-
when 0x9FCD..0x9FD5 # CJK Unified Ideographs
|
43
|
-
8.0
|
44
|
-
when 0x9FD6..0x9FEA # CJK Unified Ideographs
|
45
|
-
10.0
|
46
|
-
when 0x9FEB..0x9FEF # CJK Unified Ideographs
|
47
|
-
11.0
|
48
|
-
when 0x9FF0..0x9FFC # CJK Unified Ideographs
|
49
|
-
13.0
|
50
|
-
end
|
51
|
-
elsif i < 0x20000
|
52
|
-
case i
|
53
|
-
when 0xF900..0xFA2D # CJK Compatibility Ideographs
|
54
|
-
1.1
|
55
|
-
when 0xFA2E..0xFA2F # CJK Compatibility Ideographs
|
56
|
-
6.1
|
57
|
-
when 0xFA30..0xFA6A # CJK Compatibility Ideographs
|
58
|
-
3.2
|
59
|
-
when 0xFA6B..0xFA6D # CJK Compatibility Ideographs
|
60
|
-
5.2
|
61
|
-
when 0xFA70..0xFAD9 # CJK Compatibility Ideographs
|
62
|
-
4.1
|
63
|
-
end
|
64
|
-
else
|
65
|
-
case i
|
66
|
-
when 0x20000..0x2A6D6 # Extension B
|
67
|
-
3.1
|
68
|
-
when 0x2A6D7..0x2A6DD # Extension B
|
69
|
-
13.0
|
70
|
-
when 0x2A700..0x2B734 # extension C
|
71
|
-
5.2
|
72
|
-
when 0x2B740..0x2B81D # extension D
|
73
|
-
6.0
|
74
|
-
when 0x2B820..0x2CEA1 # extension E
|
75
|
-
8.0
|
76
|
-
when 0x2CEB0..0x2EBE0 # extension F
|
77
|
-
10.0
|
78
|
-
when 0x2F800..0x2FA1D # Unicode 3.1: CJK Compatibility Supplement
|
79
|
-
3.1
|
80
|
-
when 0x30000..0x3134A # extension G
|
81
|
-
13.0
|
82
|
-
end
|
28
|
+
case i
|
29
|
+
when 0..0xFFFF then uv0(i)
|
30
|
+
when 0x20000..0x2FFFF then uv2(i)
|
31
|
+
when 0x30000..0x3134A then 13.0
|
83
32
|
end
|
84
33
|
end
|
85
34
|
|
@@ -90,4 +39,223 @@ class Unihan2
|
|
90
39
|
@strokes[char]
|
91
40
|
end
|
92
41
|
|
93
|
-
|
42
|
+
private
|
43
|
+
|
44
|
+
def self.uv0(i)
|
45
|
+
case i
|
46
|
+
when 0..0x0FFF then uv00(i)
|
47
|
+
when 0x1E00..0x1EFF
|
48
|
+
case i
|
49
|
+
when 0x1E00..0x1E9A then 1.1
|
50
|
+
when 0x1E9B then 2.0
|
51
|
+
when 0x1E9C..0x1E9F then 5.1
|
52
|
+
when 0x1EA0..0x1EF9 then 1.1
|
53
|
+
when 0x1EFA..0x1EFF then 5.1
|
54
|
+
end
|
55
|
+
when 0x2000..0x2FFF then uv02(i)
|
56
|
+
when 0x3000..0x33FF then uv03(i)
|
57
|
+
when 0x3400..0x9FA5
|
58
|
+
case i
|
59
|
+
when 0x3400..0x4DB5 then 3.0 # CJK Extension A 中日韓統一表意文字擴充 A 區
|
60
|
+
when 0x4DB6..0x4DBF then 13.0 # Extension A
|
61
|
+
when 0x4E00..0x9FA5 then 1.1 # CJK Unified Ideographs
|
62
|
+
end
|
63
|
+
when 0x9FA6..0x9FFF
|
64
|
+
case i
|
65
|
+
when 0x9FA6..0x9FBB then 4.1 # CJK Unified Ideographs
|
66
|
+
when 0x9FBC..0x9FC3 then 5.1 # CJK Unified Ideographs
|
67
|
+
when 0x9FC4..0x9FCB then 5.2 # CJK Unified Ideographs
|
68
|
+
when 0x9FCC then 6.1 # CJK Unified Ideographs
|
69
|
+
when 0x9FCD..0x9FD5 then 8.0 # CJK Unified Ideographs
|
70
|
+
when 0x9FD6..0x9FEA then 10.0 # CJK Unified Ideographs
|
71
|
+
when 0x9FEB..0x9FEF then 11.0 # CJK Unified Ideographs
|
72
|
+
when 0x9FF0..0x9FFC then 13.0 # CJK Unified Ideographs
|
73
|
+
end
|
74
|
+
when 0xA000..0xDFFF
|
75
|
+
case i
|
76
|
+
when 0xA000..0xA48C then 3.0 # Yi Syllables 彝族文字區
|
77
|
+
when 0xAC00..0xD7A3 then 2.0 # Hangul Syllables 韓文拼音
|
78
|
+
end
|
79
|
+
when 0xF000..0xFFFF
|
80
|
+
case i
|
81
|
+
when 0xF900..0xFA2D then 1.1 # CJK Compatibility Ideographs
|
82
|
+
when 0xFA2E..0xFA2F then 6.1 # CJK Compatibility Ideographs
|
83
|
+
when 0xFA30..0xFA6A then 3.2 # CJK Compatibility Ideographs
|
84
|
+
when 0xFA6B..0xFA6D then 5.2 # CJK Compatibility Ideographs
|
85
|
+
when 0xFA70..0xFAD9 then 4.1 # CJK Compatibility Ideographs
|
86
|
+
when 0xFE10..0xFE19 then 4.1 # Vertical Forms 中文直排標點
|
87
|
+
when 0xFE20..0xFE23 then 1.1 # Combining Half Marks
|
88
|
+
when 0xFE24..0xFE26 then 5.1 # Combining Half Marks
|
89
|
+
when 0xFE30..0xFE44 then 1.0 # CJK Compatibility Forms 兼容性表格
|
90
|
+
when 0xFE45..0xFE46 then 3.2 # CJK Compatibility Forms 兼容性表格
|
91
|
+
when 0xFE47..0xFE48 then 4.0 # CJK Compatibility Forms 兼容性表格
|
92
|
+
when 0xFE49..0xFE4F then 1.0 # CJK Compatibility Forms 兼容性表格
|
93
|
+
when 0xFE50..0xFE52 then 1.0 # Small Form Variants
|
94
|
+
when 0xFE54..0xFE66 then 1.0 # Small Form Variants
|
95
|
+
when 0xFE68..0xFE6B then 1.0 # Small Form Variants
|
96
|
+
when 0xFF01..0xFF5E then 1.0 # Halfwidth and Fullwidth Forms
|
97
|
+
when 0xFF5F..0xFF60 then 3.2 # Halfwidth and Fullwidth Forms
|
98
|
+
when 0xFF61..0xFF9F then 1.0 # Halfwidth and Fullwidth Forms
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
102
|
+
|
103
|
+
def self.uv00(i)
|
104
|
+
case i
|
105
|
+
when 0..0x01FF
|
106
|
+
case i
|
107
|
+
when 0..0x017E then 1.0
|
108
|
+
when 0x017F then 1.1
|
109
|
+
when 0x0180..0x01F0 then 1.0
|
110
|
+
when 0x01F1..0x01F5 then 1.1
|
111
|
+
when 0x01F6..0x01F9 then 3.0
|
112
|
+
when 0x01FA..0x0217 then 1.1
|
113
|
+
end
|
114
|
+
when 0x0200..0x02FF
|
115
|
+
case i
|
116
|
+
when 0x0218..0x021F then 3.0
|
117
|
+
when 0x0220 then 3.2
|
118
|
+
when 0x0221 then 4.0
|
119
|
+
when 0x0222..0x0233 then 3.0
|
120
|
+
when 0x0234..0x0236 then 4.0
|
121
|
+
when 0x0237..0x0241 then 4.1
|
122
|
+
when 0x0242..0x024F then 5.0
|
123
|
+
when 0x0250..0x02A8 then 1.0
|
124
|
+
when 0x02A9..0x02AD then 3.0
|
125
|
+
when 0x02AE..0x02AF then 4.0
|
126
|
+
when 0x02B0..0x02DE then 1.0
|
127
|
+
when 0x02DF then 3.0
|
128
|
+
when 0x02E0..0x02E9 then 1.0
|
129
|
+
when 0x02EA..0x02EE then 3.0
|
130
|
+
when 0x02EF..0x02FF then 4.0
|
131
|
+
end
|
132
|
+
when 0x0300..0x0341 then 1.0
|
133
|
+
when 0x0400..0x04FF
|
134
|
+
case i
|
135
|
+
when 0x0401..0x040C then 1.0
|
136
|
+
when 0x040D then 3.0
|
137
|
+
when 0x040E..0x044F then 1.0
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|
141
|
+
|
142
|
+
def self.uv02(i)
|
143
|
+
case i
|
144
|
+
when 0x2000..0x20FF
|
145
|
+
case i
|
146
|
+
when 0x2000..0x202E then 1.0
|
147
|
+
when 0x2045..0x2046 then 1.1
|
148
|
+
when 0x2047 then 3.2
|
149
|
+
when 0x2048..0x204F then 3.0
|
150
|
+
end
|
151
|
+
when 0x2100..0x21FF
|
152
|
+
case i
|
153
|
+
when 0x2100..0x2138 then 1.0
|
154
|
+
when 0x2153..0x2182 then 1.0
|
155
|
+
when 0x2190..0x21EA then 1.0
|
156
|
+
end
|
157
|
+
when 0x2200..0x22F1 then 1.0
|
158
|
+
when 0x2400..0x24FF
|
159
|
+
case i
|
160
|
+
when 0x2460..0x24EA then 1.0
|
161
|
+
when 0x24EB..0x24FE then 3.2
|
162
|
+
when 0x24FF then 4.0
|
163
|
+
end
|
164
|
+
when 0x2500..0x25FF
|
165
|
+
case i
|
166
|
+
when 0x2500..0x2595 then 1.0
|
167
|
+
when 0x2596..0x259F then 3.2
|
168
|
+
when 0x25A0..0x25EE then 1.0
|
169
|
+
when 0x25EF then 1.1
|
170
|
+
end
|
171
|
+
when 0x2600..0x26FF
|
172
|
+
case i
|
173
|
+
when 0x2600..0x2613 then 1.0
|
174
|
+
when 0x261A..0x266F then 1.0
|
175
|
+
end
|
176
|
+
when 0x2E00..0x2FFF
|
177
|
+
case i
|
178
|
+
when 0x2E80..0x2EF3 then 3.0
|
179
|
+
when 0x2F00..0x2FD5 then 3.0
|
180
|
+
when 0x2FF0..0x2FFB then 3.0
|
181
|
+
end
|
182
|
+
end
|
183
|
+
end
|
184
|
+
|
185
|
+
def self.uv03(i)
|
186
|
+
case i
|
187
|
+
when 0x3000..0x30FF
|
188
|
+
case i
|
189
|
+
when 0x3000..0x3036 then 1.0 # CJK Symbols and Punctuation
|
190
|
+
when 0x3037 then 1.1 # CJK Symbols and Punctuation
|
191
|
+
when 0x3038..0x303A then 3.0 # CJK Symbols and Punctuation
|
192
|
+
when 0x303B..0x303D then 3.2 # CJK Symbols and Punctuation
|
193
|
+
when 0x303E then 3.0 # CJK Symbols and Punctuation
|
194
|
+
when 0x303F then 1.0 # CJK Symbols and Punctuation
|
195
|
+
when 0x3041..0x3094 then 1.0 # Hiragana 日文平假名
|
196
|
+
when 0x3095..0x3096 then 3.2 # Hiragana 日文平假名
|
197
|
+
when 0x3099..0x309E then 1.0 # Hiragana 日文平假名
|
198
|
+
when 0x309F then 3.2 # Hiragana 日文平假名
|
199
|
+
when 0x30A0 then 3.2 # Katakana 日文片假名
|
200
|
+
when 0x30A1..0x30F6 then 1.0 # Katakana 日文片假名
|
201
|
+
when 0x30F7..0x30FA then 1.1 # Katakana 日文片假名
|
202
|
+
when 0x30FB..0x30FE then 1.0 # Katakana 日文片假名
|
203
|
+
when 0x30FF then 3.2 # Katakana 日文片假名 (Unicode 3.2)
|
204
|
+
end
|
205
|
+
when 0x3100..0x31FF
|
206
|
+
case i
|
207
|
+
when 0x3105..0x312C then 1.0 # Bopomofo 注音符號
|
208
|
+
when 0x312D then 5.1 # Bopomofo 上下顛倒的 'ㄓ'
|
209
|
+
when 0x3131..0x318E then 1.0 # Hangul Compatibility Jamo 韓文
|
210
|
+
when 0x3190..0x319F then 1.0 # Kanbun 在上方的小漢字
|
211
|
+
when 0x31A0..0x31B7 then 3.0 # Bopomofo Extended 注音擴展
|
212
|
+
when 0x31B8..0x31BA then 6.0 # Bopomofo Extended 注音擴展
|
213
|
+
when 0x31C0..0x31CF then 4.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
|
214
|
+
when 0x31D0..0x31E3 then 5.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
|
215
|
+
when 0x31F0..0x31FF then 3.2 # Katakana Phonetic Extensions 日文片假名語音擴展
|
216
|
+
end
|
217
|
+
when 0x3200..0x32FF
|
218
|
+
case i
|
219
|
+
when 0x3200..0x321C then 1.0 # Enclosed CJK Letters and Months 括號韓文
|
220
|
+
when 0x321D..0x321E then 4.0 # Enclosed CJK Letters and Months 括號韓文
|
221
|
+
when 0x3220..0x3243 then 1.0 # Enclosed CJK Letters and Months 括號一~十及漢字
|
222
|
+
when 0x3244..0x324F then 5.2 # Enclosed CJK Letters and Months 圓圈中有字及10~80
|
223
|
+
when 0x3250 then 4.0 # Enclosed CJK Letters and Months 'PTE' 組成一字
|
224
|
+
when 0x3251..0x325F then 3.2 # Enclosed CJK Letters and Months 圓圈 21~35
|
225
|
+
when 0x3260..0x327B then 1.0 # Enclosed CJK Letters and Months 圓圈韓文
|
226
|
+
when 0x327C..0x327D then 4.0 # Enclosed CJK Letters and Months 圓圈韓文
|
227
|
+
when 0x327E then 4.1 # Enclosed CJK Letters and Months 圓圈韓文
|
228
|
+
when 0x327F..0x32B0 then 1.0 # Enclosed CJK Letters and Months 圓圈一~十及漢字
|
229
|
+
when 0x32B1..0x32BF then 3.2 # Enclosed CJK Letters and Months 圓圈 36~50
|
230
|
+
when 0x32C0..0x32CB then 1.1 # Enclosed CJK Letters and Months 1月~12月
|
231
|
+
when 0x32CC..0x32CF then 4.0 # Enclosed CJK Letters and Months 多英文組成一個字
|
232
|
+
when 0x32D0..0x32FE then 1.0 # Enclosed CJK Letters and Months 圓圈日文
|
233
|
+
end
|
234
|
+
when 0x3300..0x33FF
|
235
|
+
case i
|
236
|
+
when 0x3300..0x3357 then 1.0 # CJK Compatibility 多個日文組成一字
|
237
|
+
when 0x3358..0x3376 then 1.1 # CJK Compatibility 0点~24点 及多英文組成一字
|
238
|
+
when 0x3377..0x337A then 4.0 # CJK Compatibility 多英文組成一字
|
239
|
+
when 0x337B..0x33DD then 1.0 # CJK Compatibility 多日本漢字及多英文組成一字
|
240
|
+
when 0x33DE..0x33DF then 4.0 # CJK Compatibility 多英文組成一字
|
241
|
+
when 0x33E0..0x33FE then 1.1 # CJK Compatibility 1日~31日
|
242
|
+
when 0x33FF then 4.0 # CJK Compatibility 'gal' 組成一字
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
|
247
|
+
def self.uv2(i)
|
248
|
+
case i
|
249
|
+
when 0x20000..0x2A6D6 then 3.1 # Extension B
|
250
|
+
when 0x2A6D7..0x2A6DD then 13.0 # Extension B
|
251
|
+
when 0x2A700..0x2B734 then 5.2 # extension C
|
252
|
+
when 0x2B740..0x2B81D then 6.0 # extension D
|
253
|
+
when 0x2B820..0x2CEA1 then 8.0 # extension E
|
254
|
+
when 0x2CEB0..0x2EBE0 then 10.0 # extension F
|
255
|
+
when 0x2F800..0x2FA1D then 3.1 # Unicode 3.1: CJK Compatibility Supplement
|
256
|
+
end
|
257
|
+
end
|
258
|
+
|
259
|
+
private_class_method :uv0, :uv00, :uv02, :uv03, :uv2
|
260
|
+
|
261
|
+
end
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: unihan2
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.1.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-12-
|
11
|
+
date: 2020-12-10 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Unihan Database Utilities
|
14
14
|
email: zhoubx@gmail.com
|