unihan2 0.0.4 → 0.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/unihan2.rb +226 -58
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bdca2031f4b271b10f7e955fac3a1f74df5ccbd0d9ed7f731b5b6469a6b80280
4
- data.tar.gz: d9e0d288ec3380057b1680c1e6bc4c36375a155f05a6dad6ea8c8266882665ed
3
+ metadata.gz: a5eb38245b07a071dd3489121782a109ec34c6bf3371822c6d610df11b035257
4
+ data.tar.gz: 0f8349355c0c597c3fc1dbfe7d191bb0a38d54a08fb534f415ca31884753c8f2
5
5
  SHA512:
6
- metadata.gz: b77a5a7d65066e30e144643a4e39790ee91c35f51f3a38d7993d5cd8c57d0000a49afe119b0a30a50fde03172218dc87beda7fc0993ae6b0136e05db9ed730b3
7
- data.tar.gz: b30b2d5f59c3e09733708f71604bcfbebb1c3e3a59956ad93e12268c2aa6b287781369bf52007821dae70e552cc59a476fdfeae4e00f390e1a128ca19a3a5fe6
6
+ metadata.gz: d5e69eea0311f7b462c9c5b3a39f0d5a2bc6b2dbf35098f6a4dd01064c81e6670a3a5c07ddbbf4c95ca56c029d323069f5032ab89b0b7d1ab497e1e18e5c7a12
7
+ data.tar.gz: 3f5b644491711e5da436dc780b67e331fa425df852f115b9245f0b6f945c523f7d732907aefeb2756fa99764bc2fb618deaae7b50b221db7360b47b5b38959a4
@@ -16,6 +16,8 @@ class Unihan2
16
16
  end
17
17
  end
18
18
 
19
+ # Listing of Characters Covered by the Unihan Database
20
+ # https://www.unicode.org/reports/tr38/tr38-29.html#BlockListing
19
21
  def self.unicode_version(code)
20
22
  if code.is_a? Integer
21
23
  i = code
@@ -23,63 +25,10 @@ class Unihan2
23
25
  i = code.hex
24
26
  end
25
27
 
26
- if i < 0xF000
27
- case i
28
- when 0x3400..0x4DB5 # Unicode 3.0: Extension A
29
- 3.0
30
- when 0x4DB6..0x4DBF # Unicode 13.0: Extension A
31
- 13.0
32
- when 0x4E00..0x9FA5 # CJK Unified Ideographs
33
- 1.1
34
- when 0x9FA6..0x9FBB # CJK Unified Ideographs
35
- 4.1
36
- when 0x9FBC..0x9FC3 # CJK Unified Ideographs
37
- 5.1
38
- when 0x9FC4..0x9FCB # CJK Unified Ideographs
39
- 5.2
40
- when 0x9FCC # CJK Unified Ideographs
41
- 6.1
42
- when 0x9FCD..0x9FD5 # CJK Unified Ideographs
43
- 8.0
44
- when 0x9FD6..0x9FEA # CJK Unified Ideographs
45
- 10.0
46
- when 0x9FEB..0x9FEF # CJK Unified Ideographs
47
- 11.0
48
- when 0x9FF0..0x9FFC # CJK Unified Ideographs
49
- 13.0
50
- end
51
- elsif i < 0x20000
52
- case i
53
- when 0xF900..0xFA2D # CJK Compatibility Ideographs
54
- 1.1
55
- when 0xFA2E..0xFA2F # CJK Compatibility Ideographs
56
- 6.1
57
- when 0xFA30..0xFA6A # CJK Compatibility Ideographs
58
- 3.2
59
- when 0xFA6B..0xFA6D # CJK Compatibility Ideographs
60
- 5.2
61
- when 0xFA70..0xFAD9 # CJK Compatibility Ideographs
62
- 4.1
63
- end
64
- else
65
- case i
66
- when 0x20000..0x2A6D6 # Extension B
67
- 3.1
68
- when 0x2A6D7..0x2A6DD # Extension B
69
- 13.0
70
- when 0x2A700..0x2B734 # extension C
71
- 5.2
72
- when 0x2B740..0x2B81D # extension D
73
- 6.0
74
- when 0x2B820..0x2CEA1 # extension E
75
- 8.0
76
- when 0x2CEB0..0x2EBE0 # extension F
77
- 10.0
78
- when 0x2F800..0x2FA1D # Unicode 3.1: CJK Compatibility Supplement
79
- 3.1
80
- when 0x30000..0x3134A # extension G
81
- 13.0
82
- end
28
+ case i
29
+ when 0..0xFFFF then uv0(i)
30
+ when 0x20000..0x2FFFF then uv2(i)
31
+ when 0x30000..0x3134A then 13.0
83
32
  end
84
33
  end
85
34
 
@@ -90,4 +39,223 @@ class Unihan2
90
39
  @strokes[char]
91
40
  end
92
41
 
93
- end
42
+ private
43
+
44
+ def self.uv0(i)
45
+ case i
46
+ when 0..0x0FFF then uv00(i)
47
+ when 0x1E00..0x1EFF
48
+ case i
49
+ when 0x1E00..0x1E9A then 1.1
50
+ when 0x1E9B then 2.0
51
+ when 0x1E9C..0x1E9F then 5.1
52
+ when 0x1EA0..0x1EF9 then 1.1
53
+ when 0x1EFA..0x1EFF then 5.1
54
+ end
55
+ when 0x2000..0x2FFF then uv02(i)
56
+ when 0x3000..0x33FF then uv03(i)
57
+ when 0x3400..0x9FA5
58
+ case i
59
+ when 0x3400..0x4DB5 then 3.0 # CJK Extension A 中日韓統一表意文字擴充 A 區
60
+ when 0x4DB6..0x4DBF then 13.0 # Extension A
61
+ when 0x4E00..0x9FA5 then 1.1 # CJK Unified Ideographs
62
+ end
63
+ when 0x9FA6..0x9FFF
64
+ case i
65
+ when 0x9FA6..0x9FBB then 4.1 # CJK Unified Ideographs
66
+ when 0x9FBC..0x9FC3 then 5.1 # CJK Unified Ideographs
67
+ when 0x9FC4..0x9FCB then 5.2 # CJK Unified Ideographs
68
+ when 0x9FCC then 6.1 # CJK Unified Ideographs
69
+ when 0x9FCD..0x9FD5 then 8.0 # CJK Unified Ideographs
70
+ when 0x9FD6..0x9FEA then 10.0 # CJK Unified Ideographs
71
+ when 0x9FEB..0x9FEF then 11.0 # CJK Unified Ideographs
72
+ when 0x9FF0..0x9FFC then 13.0 # CJK Unified Ideographs
73
+ end
74
+ when 0xA000..0xDFFF
75
+ case i
76
+ when 0xA000..0xA48C then 3.0 # Yi Syllables 彝族文字區
77
+ when 0xAC00..0xD7A3 then 2.0 # Hangul Syllables 韓文拼音
78
+ end
79
+ when 0xF000..0xFFFF
80
+ case i
81
+ when 0xF900..0xFA2D then 1.1 # CJK Compatibility Ideographs
82
+ when 0xFA2E..0xFA2F then 6.1 # CJK Compatibility Ideographs
83
+ when 0xFA30..0xFA6A then 3.2 # CJK Compatibility Ideographs
84
+ when 0xFA6B..0xFA6D then 5.2 # CJK Compatibility Ideographs
85
+ when 0xFA70..0xFAD9 then 4.1 # CJK Compatibility Ideographs
86
+ when 0xFE10..0xFE19 then 4.1 # Vertical Forms 中文直排標點
87
+ when 0xFE20..0xFE23 then 1.1 # Combining Half Marks
88
+ when 0xFE24..0xFE26 then 5.1 # Combining Half Marks
89
+ when 0xFE30..0xFE44 then 1.0 # CJK Compatibility Forms 兼容性表格
90
+ when 0xFE45..0xFE46 then 3.2 # CJK Compatibility Forms 兼容性表格
91
+ when 0xFE47..0xFE48 then 4.0 # CJK Compatibility Forms 兼容性表格
92
+ when 0xFE49..0xFE4F then 1.0 # CJK Compatibility Forms 兼容性表格
93
+ when 0xFE50..0xFE52 then 1.0 # Small Form Variants
94
+ when 0xFE54..0xFE66 then 1.0 # Small Form Variants
95
+ when 0xFE68..0xFE6B then 1.0 # Small Form Variants
96
+ when 0xFF01..0xFF5E then 1.0 # Halfwidth and Fullwidth Forms
97
+ when 0xFF5F..0xFF60 then 3.2 # Halfwidth and Fullwidth Forms
98
+ when 0xFF61..0xFF9F then 1.0 # Halfwidth and Fullwidth Forms
99
+ end
100
+ end
101
+ end
102
+
103
+ def self.uv00(i)
104
+ case i
105
+ when 0..0x01FF
106
+ case i
107
+ when 0..0x017E then 1.0
108
+ when 0x017F then 1.1
109
+ when 0x0180..0x01F0 then 1.0
110
+ when 0x01F1..0x01F5 then 1.1
111
+ when 0x01F6..0x01F9 then 3.0
112
+ when 0x01FA..0x0217 then 1.1
113
+ end
114
+ when 0x0200..0x02FF
115
+ case i
116
+ when 0x0218..0x021F then 3.0
117
+ when 0x0220 then 3.2
118
+ when 0x0221 then 4.0
119
+ when 0x0222..0x0233 then 3.0
120
+ when 0x0234..0x0236 then 4.0
121
+ when 0x0237..0x0241 then 4.1
122
+ when 0x0242..0x024F then 5.0
123
+ when 0x0250..0x02A8 then 1.0
124
+ when 0x02A9..0x02AD then 3.0
125
+ when 0x02AE..0x02AF then 4.0
126
+ when 0x02B0..0x02DE then 1.0
127
+ when 0x02DF then 3.0
128
+ when 0x02E0..0x02E9 then 1.0
129
+ when 0x02EA..0x02EE then 3.0
130
+ when 0x02EF..0x02FF then 4.0
131
+ end
132
+ when 0x0300..0x0341 then 1.0
133
+ when 0x0400..0x04FF
134
+ case i
135
+ when 0x0401..0x040C then 1.0
136
+ when 0x040D then 3.0
137
+ when 0x040E..0x044F then 1.0
138
+ end
139
+ end
140
+ end
141
+
142
+ def self.uv02(i)
143
+ case i
144
+ when 0x2000..0x20FF
145
+ case i
146
+ when 0x2000..0x202E then 1.0
147
+ when 0x2045..0x2046 then 1.1
148
+ when 0x2047 then 3.2
149
+ when 0x2048..0x204F then 3.0
150
+ end
151
+ when 0x2100..0x21FF
152
+ case i
153
+ when 0x2100..0x2138 then 1.0
154
+ when 0x2153..0x2182 then 1.0
155
+ when 0x2190..0x21EA then 1.0
156
+ end
157
+ when 0x2200..0x22F1 then 1.0
158
+ when 0x2400..0x24FF
159
+ case i
160
+ when 0x2460..0x24EA then 1.0
161
+ when 0x24EB..0x24FE then 3.2
162
+ when 0x24FF then 4.0
163
+ end
164
+ when 0x2500..0x25FF
165
+ case i
166
+ when 0x2500..0x2595 then 1.0
167
+ when 0x2596..0x259F then 3.2
168
+ when 0x25A0..0x25EE then 1.0
169
+ when 0x25EF then 1.1
170
+ end
171
+ when 0x2600..0x26FF
172
+ case i
173
+ when 0x2600..0x2613 then 1.0
174
+ when 0x261A..0x266F then 1.0
175
+ end
176
+ when 0x2E00..0x2FFF
177
+ case i
178
+ when 0x2E80..0x2EF3 then 3.0
179
+ when 0x2F00..0x2FD5 then 3.0
180
+ when 0x2FF0..0x2FFB then 3.0
181
+ end
182
+ end
183
+ end
184
+
185
+ def self.uv03(i)
186
+ case i
187
+ when 0x3000..0x30FF
188
+ case i
189
+ when 0x3000..0x3036 then 1.0 # CJK Symbols and Punctuation
190
+ when 0x3037 then 1.1 # CJK Symbols and Punctuation
191
+ when 0x3038..0x303A then 3.0 # CJK Symbols and Punctuation
192
+ when 0x303B..0x303D then 3.2 # CJK Symbols and Punctuation
193
+ when 0x303E then 3.0 # CJK Symbols and Punctuation
194
+ when 0x303F then 1.0 # CJK Symbols and Punctuation
195
+ when 0x3041..0x3094 then 1.0 # Hiragana 日文平假名
196
+ when 0x3095..0x3096 then 3.2 # Hiragana 日文平假名
197
+ when 0x3099..0x309E then 1.0 # Hiragana 日文平假名
198
+ when 0x309F then 3.2 # Hiragana 日文平假名
199
+ when 0x30A0 then 3.2 # Katakana 日文片假名
200
+ when 0x30A1..0x30F6 then 1.0 # Katakana 日文片假名
201
+ when 0x30F7..0x30FA then 1.1 # Katakana 日文片假名
202
+ when 0x30FB..0x30FE then 1.0 # Katakana 日文片假名
203
+ when 0x30FF then 3.2 # Katakana 日文片假名 (Unicode 3.2)
204
+ end
205
+ when 0x3100..0x31FF
206
+ case i
207
+ when 0x3105..0x312C then 1.0 # Bopomofo 注音符號
208
+ when 0x312D then 5.1 # Bopomofo 上下顛倒的 'ㄓ'
209
+ when 0x3131..0x318E then 1.0 # Hangul Compatibility Jamo 韓文
210
+ when 0x3190..0x319F then 1.0 # Kanbun 在上方的小漢字
211
+ when 0x31A0..0x31B7 then 3.0 # Bopomofo Extended 注音擴展
212
+ when 0x31B8..0x31BA then 6.0 # Bopomofo Extended 注音擴展
213
+ when 0x31C0..0x31CF then 4.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
214
+ when 0x31D0..0x31E3 then 5.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
215
+ when 0x31F0..0x31FF then 3.2 # Katakana Phonetic Extensions 日文片假名語音擴展
216
+ end
217
+ when 0x3200..0x32FF
218
+ case i
219
+ when 0x3200..0x321C then 1.0 # Enclosed CJK Letters and Months 括號韓文
220
+ when 0x321D..0x321E then 4.0 # Enclosed CJK Letters and Months 括號韓文
221
+ when 0x3220..0x3243 then 1.0 # Enclosed CJK Letters and Months 括號一~十及漢字
222
+ when 0x3244..0x324F then 5.2 # Enclosed CJK Letters and Months 圓圈中有字及10~80
223
+ when 0x3250 then 4.0 # Enclosed CJK Letters and Months 'PTE' 組成一字
224
+ when 0x3251..0x325F then 3.2 # Enclosed CJK Letters and Months 圓圈 21~35
225
+ when 0x3260..0x327B then 1.0 # Enclosed CJK Letters and Months 圓圈韓文
226
+ when 0x327C..0x327D then 4.0 # Enclosed CJK Letters and Months 圓圈韓文
227
+ when 0x327E then 4.1 # Enclosed CJK Letters and Months 圓圈韓文
228
+ when 0x327F..0x32B0 then 1.0 # Enclosed CJK Letters and Months 圓圈一~十及漢字
229
+ when 0x32B1..0x32BF then 3.2 # Enclosed CJK Letters and Months 圓圈 36~50
230
+ when 0x32C0..0x32CB then 1.1 # Enclosed CJK Letters and Months 1月~12月
231
+ when 0x32CC..0x32CF then 4.0 # Enclosed CJK Letters and Months 多英文組成一個字
232
+ when 0x32D0..0x32FE then 1.0 # Enclosed CJK Letters and Months 圓圈日文
233
+ end
234
+ when 0x3300..0x33FF
235
+ case i
236
+ when 0x3300..0x3357 then 1.0 # CJK Compatibility 多個日文組成一字
237
+ when 0x3358..0x3376 then 1.1 # CJK Compatibility 0点~24点 及多英文組成一字
238
+ when 0x3377..0x337A then 4.0 # CJK Compatibility 多英文組成一字
239
+ when 0x337B..0x33DD then 1.0 # CJK Compatibility 多日本漢字及多英文組成一字
240
+ when 0x33DE..0x33DF then 4.0 # CJK Compatibility 多英文組成一字
241
+ when 0x33E0..0x33FE then 1.1 # CJK Compatibility 1日~31日
242
+ when 0x33FF then 4.0 # CJK Compatibility 'gal' 組成一字
243
+ end
244
+ end
245
+ end
246
+
247
+ def self.uv2(i)
248
+ case i
249
+ when 0x20000..0x2A6D6 then 3.1 # Extension B
250
+ when 0x2A6D7..0x2A6DD then 13.0 # Extension B
251
+ when 0x2A700..0x2B734 then 5.2 # extension C
252
+ when 0x2B740..0x2B81D then 6.0 # extension D
253
+ when 0x2B820..0x2CEA1 then 8.0 # extension E
254
+ when 0x2CEB0..0x2EBE0 then 10.0 # extension F
255
+ when 0x2F800..0x2FA1D then 3.1 # Unicode 3.1: CJK Compatibility Supplement
256
+ end
257
+ end
258
+
259
+ private_class_method :uv0, :uv00, :uv02, :uv03, :uv2
260
+
261
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unihan2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-07 00:00:00.000000000 Z
11
+ date: 2020-12-10 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Unihan Database Utilities
14
14
  email: zhoubx@gmail.com