cbeta 3.2.0 → 3.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta/gaiji.rb +7 -2
- data/lib/cbeta/unicode_service.rb +19 -0
- data/lib/cbeta/xml_document.rb +46 -6
- data/lib/cbeta.rb +1 -0
- metadata +3 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: d6371fac001d191b3245ba02d1b3287cedfe46213bea091d9259d9d24c2454b7
|
4
|
+
data.tar.gz: f71e61e1598462fd02fed5eb8e1c0111ee259d1b3bb85424ce869a9f0ea0dad8
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: fcc4e69ae85ef617ab214e489842d6c82a4c61fa7326c5f3d9ada35f49e71b602bb4f7c4153aeb9e3aa5c978a52ac19cc926842ea2535e6518b4c518e80f274a
|
7
|
+
data.tar.gz: 837dc5642b1285cf602a66572908d0ac4b3b3a32e7a509ee1c19337ea5f9363b3cbf9b1d59cab65565f45dd39808eff511383521c633ecc08fe480c9c360a6bb
|
data/lib/cbeta/gaiji.rb
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
require 'json'
|
2
|
+
require 'unihan2'
|
2
3
|
|
3
4
|
# 存取 CBETA 缺字資料庫
|
4
5
|
class CBETA::Gaiji
|
5
6
|
|
6
7
|
# 載入 CBETA 缺字資料庫
|
7
8
|
def initialize
|
9
|
+
@us = CBETA::UnicodeService.new
|
8
10
|
folder = File.join(File.dirname(__FILE__), '../data')
|
9
11
|
fn = File.join(folder, 'cbeta_gaiji.json')
|
10
12
|
@gaijis = JSON.parse(File.read(fn))
|
@@ -73,9 +75,12 @@ class CBETA::Gaiji
|
|
73
75
|
|
74
76
|
if gid.start_with? 'CB'
|
75
77
|
cb_priority.each do |k|
|
76
|
-
|
78
|
+
case k
|
79
|
+
when 'PUA'
|
77
80
|
return CBETA.pua(gid)
|
78
|
-
|
81
|
+
when 'uni_char', 'norm_uni_char'
|
82
|
+
return g[k] if @us.level2?(g[k])
|
83
|
+
else
|
79
84
|
return g[k] unless g[k].empty?
|
80
85
|
end
|
81
86
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
class CBETA::UnicodeService
|
2
|
+
def initialize
|
3
|
+
@u2 = Unihan2.new
|
4
|
+
end
|
5
|
+
|
6
|
+
def level1?(code)
|
7
|
+
return false if code.nil?
|
8
|
+
# Unicode 3.0 以內 在 mobile 可以正確顯示
|
9
|
+
v = @u2.ver(code)
|
10
|
+
raise "Unihan2.ver 回傳 nil, code: #{code}" if v.nil?
|
11
|
+
v <= 3
|
12
|
+
end
|
13
|
+
|
14
|
+
def level2?(code)
|
15
|
+
return false if code.nil?
|
16
|
+
# Unicode 10 以內 在 desktop 有字型可以顯示
|
17
|
+
@u2.ver(code) <= 10
|
18
|
+
end
|
19
|
+
end
|
data/lib/cbeta/xml_document.rb
CHANGED
@@ -3,6 +3,8 @@ require 'nokogiri'
|
|
3
3
|
class CBETA::XMLDocument
|
4
4
|
PASS = %w(back graphic mulu rdg sic teiHeader)
|
5
5
|
|
6
|
+
attr_reader :doc
|
7
|
+
|
6
8
|
def initialize(string_or_io)
|
7
9
|
@doc = Nokogiri::XML(string_or_io)
|
8
10
|
@doc.remove_namespaces!
|
@@ -44,9 +46,21 @@ class CBETA::XMLDocument
|
|
44
46
|
' '
|
45
47
|
end
|
46
48
|
|
49
|
+
def e_caption(e)
|
50
|
+
traverse(e) + "\n"
|
51
|
+
end
|
52
|
+
|
47
53
|
def e_cell(e)
|
48
54
|
traverse(e) + "\n"
|
49
55
|
end
|
56
|
+
|
57
|
+
def e_cit(e)
|
58
|
+
traverse(e)
|
59
|
+
end
|
60
|
+
|
61
|
+
def e_closer(e)
|
62
|
+
traverse(e) + "\n"
|
63
|
+
end
|
50
64
|
|
51
65
|
def e_corr(e)
|
52
66
|
traverse(e)
|
@@ -63,6 +77,10 @@ class CBETA::XMLDocument
|
|
63
77
|
def e_div(e)
|
64
78
|
traverse(e)
|
65
79
|
end
|
80
|
+
|
81
|
+
def e_docAuthor(e)
|
82
|
+
traverse(e)
|
83
|
+
end
|
66
84
|
|
67
85
|
def e_docNumber(e)
|
68
86
|
traverse(e) + "\n"
|
@@ -73,6 +91,12 @@ class CBETA::XMLDocument
|
|
73
91
|
end
|
74
92
|
|
75
93
|
def e_figure(e)
|
94
|
+
r = traverse(e)
|
95
|
+
r << "\n" unless r.empty?
|
96
|
+
r
|
97
|
+
end
|
98
|
+
|
99
|
+
def e_figDesc(e)
|
76
100
|
traverse(e) + "\n"
|
77
101
|
end
|
78
102
|
|
@@ -88,14 +112,19 @@ class CBETA::XMLDocument
|
|
88
112
|
cb_priority = %w(uni_char composition)
|
89
113
|
end
|
90
114
|
|
91
|
-
gid = e['ref']
|
92
|
-
|
93
|
-
|
94
|
-
|
115
|
+
gid = e['ref'].delete_prefix('#')
|
116
|
+
|
117
|
+
unless @gaiji.key?(gid)
|
118
|
+
raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}"
|
119
|
+
end
|
120
|
+
|
121
|
+
@gaiji.to_s(gid, cb_priority:)
|
95
122
|
end
|
96
123
|
|
97
124
|
def e_head(e)
|
98
|
-
traverse(e)
|
125
|
+
r = traverse(e)
|
126
|
+
r << "\n" unless r.empty?
|
127
|
+
r
|
99
128
|
end
|
100
129
|
|
101
130
|
def e_hi(e)
|
@@ -178,6 +207,14 @@ class CBETA::XMLDocument
|
|
178
207
|
''
|
179
208
|
end
|
180
209
|
|
210
|
+
def e_quote(e)
|
211
|
+
traverse(e)
|
212
|
+
end
|
213
|
+
|
214
|
+
def e_ref(e)
|
215
|
+
traverse(e)
|
216
|
+
end
|
217
|
+
|
181
218
|
def e_reg(e)
|
182
219
|
r = ''
|
183
220
|
choice = e.at_xpath('ancestor::choice')
|
@@ -189,6 +226,10 @@ class CBETA::XMLDocument
|
|
189
226
|
traverse(e) + "\n"
|
190
227
|
end
|
191
228
|
|
229
|
+
def e_seg(e)
|
230
|
+
traverse(e)
|
231
|
+
end
|
232
|
+
|
192
233
|
def e_sg(e)
|
193
234
|
'(' + traverse(e) + ')'
|
194
235
|
end
|
@@ -266,7 +307,6 @@ class CBETA::XMLDocument
|
|
266
307
|
r
|
267
308
|
end
|
268
309
|
|
269
|
-
|
270
310
|
def handle_node(e)
|
271
311
|
return '' if e.comment?
|
272
312
|
return handle_text(e) if e.text?
|
data/lib/cbeta.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 3.
|
4
|
+
version: 3.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2024-03-
|
11
|
+
date: 2024-03-14 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|
@@ -31,6 +31,7 @@ files:
|
|
31
31
|
- lib/cbeta/p5a_to_simple_html.rb
|
32
32
|
- lib/cbeta/p5a_to_text.rb
|
33
33
|
- lib/cbeta/p5a_validator.rb
|
34
|
+
- lib/cbeta/unicode_service.rb
|
34
35
|
- lib/cbeta/xml_document.rb
|
35
36
|
- lib/data/canons.csv
|
36
37
|
- lib/data/categories.json
|