unihan2 0.0.4 → 0.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/unihan2.rb +226 -58
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: bdca2031f4b271b10f7e955fac3a1f74df5ccbd0d9ed7f731b5b6469a6b80280
4
- data.tar.gz: d9e0d288ec3380057b1680c1e6bc4c36375a155f05a6dad6ea8c8266882665ed
3
+ metadata.gz: a5eb38245b07a071dd3489121782a109ec34c6bf3371822c6d610df11b035257
4
+ data.tar.gz: 0f8349355c0c597c3fc1dbfe7d191bb0a38d54a08fb534f415ca31884753c8f2
5
5
  SHA512:
6
- metadata.gz: b77a5a7d65066e30e144643a4e39790ee91c35f51f3a38d7993d5cd8c57d0000a49afe119b0a30a50fde03172218dc87beda7fc0993ae6b0136e05db9ed730b3
7
- data.tar.gz: b30b2d5f59c3e09733708f71604bcfbebb1c3e3a59956ad93e12268c2aa6b287781369bf52007821dae70e552cc59a476fdfeae4e00f390e1a128ca19a3a5fe6
6
+ metadata.gz: d5e69eea0311f7b462c9c5b3a39f0d5a2bc6b2dbf35098f6a4dd01064c81e6670a3a5c07ddbbf4c95ca56c029d323069f5032ab89b0b7d1ab497e1e18e5c7a12
7
+ data.tar.gz: 3f5b644491711e5da436dc780b67e331fa425df852f115b9245f0b6f945c523f7d732907aefeb2756fa99764bc2fb618deaae7b50b221db7360b47b5b38959a4
@@ -16,6 +16,8 @@ class Unihan2
16
16
  end
17
17
  end
18
18
 
19
+ # Listing of Characters Covered by the Unihan Database
20
+ # https://www.unicode.org/reports/tr38/tr38-29.html#BlockListing
19
21
  def self.unicode_version(code)
20
22
  if code.is_a? Integer
21
23
  i = code
@@ -23,63 +25,10 @@ class Unihan2
23
25
  i = code.hex
24
26
  end
25
27
 
26
- if i < 0xF000
27
- case i
28
- when 0x3400..0x4DB5 # Unicode 3.0: Extension A
29
- 3.0
30
- when 0x4DB6..0x4DBF # Unicode 13.0: Extension A
31
- 13.0
32
- when 0x4E00..0x9FA5 # CJK Unified Ideographs
33
- 1.1
34
- when 0x9FA6..0x9FBB # CJK Unified Ideographs
35
- 4.1
36
- when 0x9FBC..0x9FC3 # CJK Unified Ideographs
37
- 5.1
38
- when 0x9FC4..0x9FCB # CJK Unified Ideographs
39
- 5.2
40
- when 0x9FCC # CJK Unified Ideographs
41
- 6.1
42
- when 0x9FCD..0x9FD5 # CJK Unified Ideographs
43
- 8.0
44
- when 0x9FD6..0x9FEA # CJK Unified Ideographs
45
- 10.0
46
- when 0x9FEB..0x9FEF # CJK Unified Ideographs
47
- 11.0
48
- when 0x9FF0..0x9FFC # CJK Unified Ideographs
49
- 13.0
50
- end
51
- elsif i < 0x20000
52
- case i
53
- when 0xF900..0xFA2D # CJK Compatibility Ideographs
54
- 1.1
55
- when 0xFA2E..0xFA2F # CJK Compatibility Ideographs
56
- 6.1
57
- when 0xFA30..0xFA6A # CJK Compatibility Ideographs
58
- 3.2
59
- when 0xFA6B..0xFA6D # CJK Compatibility Ideographs
60
- 5.2
61
- when 0xFA70..0xFAD9 # CJK Compatibility Ideographs
62
- 4.1
63
- end
64
- else
65
- case i
66
- when 0x20000..0x2A6D6 # Extension B
67
- 3.1
68
- when 0x2A6D7..0x2A6DD # Extension B
69
- 13.0
70
- when 0x2A700..0x2B734 # extension C
71
- 5.2
72
- when 0x2B740..0x2B81D # extension D
73
- 6.0
74
- when 0x2B820..0x2CEA1 # extension E
75
- 8.0
76
- when 0x2CEB0..0x2EBE0 # extension F
77
- 10.0
78
- when 0x2F800..0x2FA1D # Unicode 3.1: CJK Compatibility Supplement
79
- 3.1
80
- when 0x30000..0x3134A # extension G
81
- 13.0
82
- end
28
+ case i
29
+ when 0..0xFFFF then uv0(i)
30
+ when 0x20000..0x2FFFF then uv2(i)
31
+ when 0x30000..0x3134A then 13.0
83
32
  end
84
33
  end
85
34
 
@@ -90,4 +39,223 @@ class Unihan2
90
39
  @strokes[char]
91
40
  end
92
41
 
93
- end
42
+ private
43
+
44
+ def self.uv0(i)
45
+ case i
46
+ when 0..0x0FFF then uv00(i)
47
+ when 0x1E00..0x1EFF
48
+ case i
49
+ when 0x1E00..0x1E9A then 1.1
50
+ when 0x1E9B then 2.0
51
+ when 0x1E9C..0x1E9F then 5.1
52
+ when 0x1EA0..0x1EF9 then 1.1
53
+ when 0x1EFA..0x1EFF then 5.1
54
+ end
55
+ when 0x2000..0x2FFF then uv02(i)
56
+ when 0x3000..0x33FF then uv03(i)
57
+ when 0x3400..0x9FA5
58
+ case i
59
+ when 0x3400..0x4DB5 then 3.0 # CJK Extension A 中日韓統一表意文字擴充 A 區
60
+ when 0x4DB6..0x4DBF then 13.0 # Extension A
61
+ when 0x4E00..0x9FA5 then 1.1 # CJK Unified Ideographs
62
+ end
63
+ when 0x9FA6..0x9FFF
64
+ case i
65
+ when 0x9FA6..0x9FBB then 4.1 # CJK Unified Ideographs
66
+ when 0x9FBC..0x9FC3 then 5.1 # CJK Unified Ideographs
67
+ when 0x9FC4..0x9FCB then 5.2 # CJK Unified Ideographs
68
+ when 0x9FCC then 6.1 # CJK Unified Ideographs
69
+ when 0x9FCD..0x9FD5 then 8.0 # CJK Unified Ideographs
70
+ when 0x9FD6..0x9FEA then 10.0 # CJK Unified Ideographs
71
+ when 0x9FEB..0x9FEF then 11.0 # CJK Unified Ideographs
72
+ when 0x9FF0..0x9FFC then 13.0 # CJK Unified Ideographs
73
+ end
74
+ when 0xA000..0xDFFF
75
+ case i
76
+ when 0xA000..0xA48C then 3.0 # Yi Syllables 彝族文字區
77
+ when 0xAC00..0xD7A3 then 2.0 # Hangul Syllables 韓文拼音
78
+ end
79
+ when 0xF000..0xFFFF
80
+ case i
81
+ when 0xF900..0xFA2D then 1.1 # CJK Compatibility Ideographs
82
+ when 0xFA2E..0xFA2F then 6.1 # CJK Compatibility Ideographs
83
+ when 0xFA30..0xFA6A then 3.2 # CJK Compatibility Ideographs
84
+ when 0xFA6B..0xFA6D then 5.2 # CJK Compatibility Ideographs
85
+ when 0xFA70..0xFAD9 then 4.1 # CJK Compatibility Ideographs
86
+ when 0xFE10..0xFE19 then 4.1 # Vertical Forms 中文直排標點
87
+ when 0xFE20..0xFE23 then 1.1 # Combining Half Marks
88
+ when 0xFE24..0xFE26 then 5.1 # Combining Half Marks
89
+ when 0xFE30..0xFE44 then 1.0 # CJK Compatibility Forms 兼容性表格
90
+ when 0xFE45..0xFE46 then 3.2 # CJK Compatibility Forms 兼容性表格
91
+ when 0xFE47..0xFE48 then 4.0 # CJK Compatibility Forms 兼容性表格
92
+ when 0xFE49..0xFE4F then 1.0 # CJK Compatibility Forms 兼容性表格
93
+ when 0xFE50..0xFE52 then 1.0 # Small Form Variants
94
+ when 0xFE54..0xFE66 then 1.0 # Small Form Variants
95
+ when 0xFE68..0xFE6B then 1.0 # Small Form Variants
96
+ when 0xFF01..0xFF5E then 1.0 # Halfwidth and Fullwidth Forms
97
+ when 0xFF5F..0xFF60 then 3.2 # Halfwidth and Fullwidth Forms
98
+ when 0xFF61..0xFF9F then 1.0 # Halfwidth and Fullwidth Forms
99
+ end
100
+ end
101
+ end
102
+
103
+ def self.uv00(i)
104
+ case i
105
+ when 0..0x01FF
106
+ case i
107
+ when 0..0x017E then 1.0
108
+ when 0x017F then 1.1
109
+ when 0x0180..0x01F0 then 1.0
110
+ when 0x01F1..0x01F5 then 1.1
111
+ when 0x01F6..0x01F9 then 3.0
112
+ when 0x01FA..0x0217 then 1.1
113
+ end
114
+ when 0x0200..0x02FF
115
+ case i
116
+ when 0x0218..0x021F then 3.0
117
+ when 0x0220 then 3.2
118
+ when 0x0221 then 4.0
119
+ when 0x0222..0x0233 then 3.0
120
+ when 0x0234..0x0236 then 4.0
121
+ when 0x0237..0x0241 then 4.1
122
+ when 0x0242..0x024F then 5.0
123
+ when 0x0250..0x02A8 then 1.0
124
+ when 0x02A9..0x02AD then 3.0
125
+ when 0x02AE..0x02AF then 4.0
126
+ when 0x02B0..0x02DE then 1.0
127
+ when 0x02DF then 3.0
128
+ when 0x02E0..0x02E9 then 1.0
129
+ when 0x02EA..0x02EE then 3.0
130
+ when 0x02EF..0x02FF then 4.0
131
+ end
132
+ when 0x0300..0x0341 then 1.0
133
+ when 0x0400..0x04FF
134
+ case i
135
+ when 0x0401..0x040C then 1.0
136
+ when 0x040D then 3.0
137
+ when 0x040E..0x044F then 1.0
138
+ end
139
+ end
140
+ end
141
+
142
+ def self.uv02(i)
143
+ case i
144
+ when 0x2000..0x20FF
145
+ case i
146
+ when 0x2000..0x202E then 1.0
147
+ when 0x2045..0x2046 then 1.1
148
+ when 0x2047 then 3.2
149
+ when 0x2048..0x204F then 3.0
150
+ end
151
+ when 0x2100..0x21FF
152
+ case i
153
+ when 0x2100..0x2138 then 1.0
154
+ when 0x2153..0x2182 then 1.0
155
+ when 0x2190..0x21EA then 1.0
156
+ end
157
+ when 0x2200..0x22F1 then 1.0
158
+ when 0x2400..0x24FF
159
+ case i
160
+ when 0x2460..0x24EA then 1.0
161
+ when 0x24EB..0x24FE then 3.2
162
+ when 0x24FF then 4.0
163
+ end
164
+ when 0x2500..0x25FF
165
+ case i
166
+ when 0x2500..0x2595 then 1.0
167
+ when 0x2596..0x259F then 3.2
168
+ when 0x25A0..0x25EE then 1.0
169
+ when 0x25EF then 1.1
170
+ end
171
+ when 0x2600..0x26FF
172
+ case i
173
+ when 0x2600..0x2613 then 1.0
174
+ when 0x261A..0x266F then 1.0
175
+ end
176
+ when 0x2E00..0x2FFF
177
+ case i
178
+ when 0x2E80..0x2EF3 then 3.0
179
+ when 0x2F00..0x2FD5 then 3.0
180
+ when 0x2FF0..0x2FFB then 3.0
181
+ end
182
+ end
183
+ end
184
+
185
+ def self.uv03(i)
186
+ case i
187
+ when 0x3000..0x30FF
188
+ case i
189
+ when 0x3000..0x3036 then 1.0 # CJK Symbols and Punctuation
190
+ when 0x3037 then 1.1 # CJK Symbols and Punctuation
191
+ when 0x3038..0x303A then 3.0 # CJK Symbols and Punctuation
192
+ when 0x303B..0x303D then 3.2 # CJK Symbols and Punctuation
193
+ when 0x303E then 3.0 # CJK Symbols and Punctuation
194
+ when 0x303F then 1.0 # CJK Symbols and Punctuation
195
+ when 0x3041..0x3094 then 1.0 # Hiragana 日文平假名
196
+ when 0x3095..0x3096 then 3.2 # Hiragana 日文平假名
197
+ when 0x3099..0x309E then 1.0 # Hiragana 日文平假名
198
+ when 0x309F then 3.2 # Hiragana 日文平假名
199
+ when 0x30A0 then 3.2 # Katakana 日文片假名
200
+ when 0x30A1..0x30F6 then 1.0 # Katakana 日文片假名
201
+ when 0x30F7..0x30FA then 1.1 # Katakana 日文片假名
202
+ when 0x30FB..0x30FE then 1.0 # Katakana 日文片假名
203
+ when 0x30FF then 3.2 # Katakana 日文片假名 (Unicode 3.2)
204
+ end
205
+ when 0x3100..0x31FF
206
+ case i
207
+ when 0x3105..0x312C then 1.0 # Bopomofo 注音符號
208
+ when 0x312D then 5.1 # Bopomofo 上下顛倒的 'ㄓ'
209
+ when 0x3131..0x318E then 1.0 # Hangul Compatibility Jamo 韓文
210
+ when 0x3190..0x319F then 1.0 # Kanbun 在上方的小漢字
211
+ when 0x31A0..0x31B7 then 3.0 # Bopomofo Extended 注音擴展
212
+ when 0x31B8..0x31BA then 6.0 # Bopomofo Extended 注音擴展
213
+ when 0x31C0..0x31CF then 4.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
214
+ when 0x31D0..0x31E3 then 5.1 # CJK Strokes 筆劃 (基本筆劃, 如撇, 勾, 點...)
215
+ when 0x31F0..0x31FF then 3.2 # Katakana Phonetic Extensions 日文片假名語音擴展
216
+ end
217
+ when 0x3200..0x32FF
218
+ case i
219
+ when 0x3200..0x321C then 1.0 # Enclosed CJK Letters and Months 括號韓文
220
+ when 0x321D..0x321E then 4.0 # Enclosed CJK Letters and Months 括號韓文
221
+ when 0x3220..0x3243 then 1.0 # Enclosed CJK Letters and Months 括號一~十及漢字
222
+ when 0x3244..0x324F then 5.2 # Enclosed CJK Letters and Months 圓圈中有字及10~80
223
+ when 0x3250 then 4.0 # Enclosed CJK Letters and Months 'PTE' 組成一字
224
+ when 0x3251..0x325F then 3.2 # Enclosed CJK Letters and Months 圓圈 21~35
225
+ when 0x3260..0x327B then 1.0 # Enclosed CJK Letters and Months 圓圈韓文
226
+ when 0x327C..0x327D then 4.0 # Enclosed CJK Letters and Months 圓圈韓文
227
+ when 0x327E then 4.1 # Enclosed CJK Letters and Months 圓圈韓文
228
+ when 0x327F..0x32B0 then 1.0 # Enclosed CJK Letters and Months 圓圈一~十及漢字
229
+ when 0x32B1..0x32BF then 3.2 # Enclosed CJK Letters and Months 圓圈 36~50
230
+ when 0x32C0..0x32CB then 1.1 # Enclosed CJK Letters and Months 1月~12月
231
+ when 0x32CC..0x32CF then 4.0 # Enclosed CJK Letters and Months 多英文組成一個字
232
+ when 0x32D0..0x32FE then 1.0 # Enclosed CJK Letters and Months 圓圈日文
233
+ end
234
+ when 0x3300..0x33FF
235
+ case i
236
+ when 0x3300..0x3357 then 1.0 # CJK Compatibility 多個日文組成一字
237
+ when 0x3358..0x3376 then 1.1 # CJK Compatibility 0点~24点 及多英文組成一字
238
+ when 0x3377..0x337A then 4.0 # CJK Compatibility 多英文組成一字
239
+ when 0x337B..0x33DD then 1.0 # CJK Compatibility 多日本漢字及多英文組成一字
240
+ when 0x33DE..0x33DF then 4.0 # CJK Compatibility 多英文組成一字
241
+ when 0x33E0..0x33FE then 1.1 # CJK Compatibility 1日~31日
242
+ when 0x33FF then 4.0 # CJK Compatibility 'gal' 組成一字
243
+ end
244
+ end
245
+ end
246
+
247
+ def self.uv2(i)
248
+ case i
249
+ when 0x20000..0x2A6D6 then 3.1 # Extension B
250
+ when 0x2A6D7..0x2A6DD then 13.0 # Extension B
251
+ when 0x2A700..0x2B734 then 5.2 # extension C
252
+ when 0x2B740..0x2B81D then 6.0 # extension D
253
+ when 0x2B820..0x2CEA1 then 8.0 # extension E
254
+ when 0x2CEB0..0x2EBE0 then 10.0 # extension F
255
+ when 0x2F800..0x2FA1D then 3.1 # Unicode 3.1: CJK Compatibility Supplement
256
+ end
257
+ end
258
+
259
+ private_class_method :uv0, :uv00, :uv02, :uv03, :uv2
260
+
261
+ end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: unihan2
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.4
4
+ version: 0.1.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-12-07 00:00:00.000000000 Z
11
+ date: 2020-12-10 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Unihan Database Utilities
14
14
  email: zhoubx@gmail.com