cbeta 3.2.0 → 3.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cbeta/gaiji.rb +7 -2
- data/lib/cbeta/unicode_service.rb +19 -0
- data/lib/cbeta/xml_document.rb +46 -6
- data/lib/cbeta.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6371fac001d191b3245ba02d1b3287cedfe46213bea091d9259d9d24c2454b7
|
4
|
+
data.tar.gz: f71e61e1598462fd02fed5eb8e1c0111ee259d1b3bb85424ce869a9f0ea0dad8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fcc4e69ae85ef617ab214e489842d6c82a4c61fa7326c5f3d9ada35f49e71b602bb4f7c4153aeb9e3aa5c978a52ac19cc926842ea2535e6518b4c518e80f274a
|
7
|
+
data.tar.gz: 837dc5642b1285cf602a66572908d0ac4b3b3a32e7a509ee1c19337ea5f9363b3cbf9b1d59cab65565f45dd39808eff511383521c633ecc08fe480c9c360a6bb
|
data/lib/cbeta/gaiji.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
require 'json'
|
2
|
+
require 'unihan2'
|
2
3
|
|
3
4
|
# 存取 CBETA 缺字資料庫
|
4
5
|
class CBETA::Gaiji
|
5
6
|
|
6
7
|
# 載入 CBETA 缺字資料庫
|
7
8
|
def initialize
|
9
|
+
@us = CBETA::UnicodeService.new
|
8
10
|
folder = File.join(File.dirname(__FILE__), '../data')
|
9
11
|
fn = File.join(folder, 'cbeta_gaiji.json')
|
10
12
|
@gaijis = JSON.parse(File.read(fn))
|
@@ -73,9 +75,12 @@ class CBETA::Gaiji
|
|
73
75
|
|
74
76
|
if gid.start_with? 'CB'
|
75
77
|
cb_priority.each do |k|
|
76
|
-
|
78
|
+
case k
|
79
|
+
when 'PUA'
|
77
80
|
return CBETA.pua(gid)
|
78
|
-
|
81
|
+
when 'uni_char', 'norm_uni_char'
|
82
|
+
return g[k] if @us.level2?(g[k])
|
83
|
+
else
|
79
84
|
return g[k] unless g[k].empty?
|
80
85
|
end
|
81
86
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class CBETA::UnicodeService
|
2
|
+
def initialize
|
3
|
+
@u2 = Unihan2.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def level1?(code)
|
7
|
+
return false if code.nil?
|
8
|
+
# Unicode 3.0 以內 在 mobile 可以正確顯示
|
9
|
+
v = @u2.ver(code)
|
10
|
+
raise "Unihan2.ver 回傳 nil, code: #{code}" if v.nil?
|
11
|
+
v <= 3
|
12
|
+
end
|
13
|
+
|
14
|
+
def level2?(code)
|
15
|
+
return false if code.nil?
|
16
|
+
# Unicode 10 以內 在 desktop 有字型可以顯示
|
17
|
+
@u2.ver(code) <= 10
|
18
|
+
end
|
19
|
+
end
|
data/lib/cbeta/xml_document.rb
CHANGED
@@ -3,6 +3,8 @@ require 'nokogiri'
|
|
3
3
|
class CBETA::XMLDocument
|
4
4
|
PASS = %w(back graphic mulu rdg sic teiHeader)
|
5
5
|
|
6
|
+
attr_reader :doc
|
7
|
+
|
6
8
|
def initialize(string_or_io)
|
7
9
|
@doc = Nokogiri::XML(string_or_io)
|
8
10
|
@doc.remove_namespaces!
|
@@ -44,9 +46,21 @@ class CBETA::XMLDocument
|
|
44
46
|
' '
|
45
47
|
end
|
46
48
|
|
49
|
+
def e_caption(e)
|
50
|
+
traverse(e) + "\n"
|
51
|
+
end
|
52
|
+
|
47
53
|
def e_cell(e)
|
48
54
|
traverse(e) + "\n"
|
49
55
|
end
|
56
|
+
|
57
|
+
def e_cit(e)
|
58
|
+
traverse(e)
|
59
|
+
end
|
60
|
+
|
61
|
+
def e_closer(e)
|
62
|
+
traverse(e) + "\n"
|
63
|
+
end
|
50
64
|
|
51
65
|
def e_corr(e)
|
52
66
|
traverse(e)
|
@@ -63,6 +77,10 @@ class CBETA::XMLDocument
|
|
63
77
|
def e_div(e)
|
64
78
|
traverse(e)
|
65
79
|
end
|
80
|
+
|
81
|
+
def e_docAuthor(e)
|
82
|
+
traverse(e)
|
83
|
+
end
|
66
84
|
|
67
85
|
def e_docNumber(e)
|
68
86
|
traverse(e) + "\n"
|
@@ -73,6 +91,12 @@ class CBETA::XMLDocument
|
|
73
91
|
end
|
74
92
|
|
75
93
|
def e_figure(e)
|
94
|
+
r = traverse(e)
|
95
|
+
r << "\n" unless r.empty?
|
96
|
+
r
|
97
|
+
end
|
98
|
+
|
99
|
+
def e_figDesc(e)
|
76
100
|
traverse(e) + "\n"
|
77
101
|
end
|
78
102
|
|
@@ -88,14 +112,19 @@ class CBETA::XMLDocument
|
|
88
112
|
cb_priority = %w(uni_char composition)
|
89
113
|
end
|
90
114
|
|
91
|
-
gid = e['ref']
|
92
|
-
|
93
|
-
|
94
|
-
|
115
|
+
gid = e['ref'].delete_prefix('#')
|
116
|
+
|
117
|
+
unless @gaiji.key?(gid)
|
118
|
+
raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}"
|
119
|
+
end
|
120
|
+
|
121
|
+
@gaiji.to_s(gid, cb_priority:)
|
95
122
|
end
|
96
123
|
|
97
124
|
def e_head(e)
|
98
|
-
traverse(e)
|
125
|
+
r = traverse(e)
|
126
|
+
r << "\n" unless r.empty?
|
127
|
+
r
|
99
128
|
end
|
100
129
|
|
101
130
|
def e_hi(e)
|
@@ -178,6 +207,14 @@ class CBETA::XMLDocument
|
|
178
207
|
''
|
179
208
|
end
|
180
209
|
|
210
|
+
def e_quote(e)
|
211
|
+
traverse(e)
|
212
|
+
end
|
213
|
+
|
214
|
+
def e_ref(e)
|
215
|
+
traverse(e)
|
216
|
+
end
|
217
|
+
|
181
218
|
def e_reg(e)
|
182
219
|
r = ''
|
183
220
|
choice = e.at_xpath('ancestor::choice')
|
@@ -189,6 +226,10 @@ class CBETA::XMLDocument
|
|
189
226
|
traverse(e) + "\n"
|
190
227
|
end
|
191
228
|
|
229
|
+
def e_seg(e)
|
230
|
+
traverse(e)
|
231
|
+
end
|
232
|
+
|
192
233
|
def e_sg(e)
|
193
234
|
'(' + traverse(e) + ')'
|
194
235
|
end
|
@@ -266,7 +307,6 @@ class CBETA::XMLDocument
|
|
266
307
|
r
|
267
308
|
end
|
268
309
|
|
269
|
-
|
270
310
|
def handle_node(e)
|
271
311
|
return '' if e.comment?
|
272
312
|
return handle_text(e) if e.text?
|
data/lib/cbeta.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|
@@ -31,6 +31,7 @@ files:
|
|
31
31
|
- lib/cbeta/p5a_to_simple_html.rb
|
32
32
|
- lib/cbeta/p5a_to_text.rb
|
33
33
|
- lib/cbeta/p5a_validator.rb
|
34
|
+
- lib/cbeta/unicode_service.rb
|
34
35
|
- lib/cbeta/xml_document.rb
|
35
36
|
- lib/data/canons.csv
|
36
37
|
- lib/data/categories.json
|