cbeta 3.2.0 → 3.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 982e4d10689bc1fa6f71d8c27e880cbfdcf248c5cbdb8994ad65bf6d2c5e5259
4
- data.tar.gz: 7c6cfa28c5f48f1f84bb4ad2921e45ac1db1f2039626999a1fd620bbe91c9564
3
+ metadata.gz: d6371fac001d191b3245ba02d1b3287cedfe46213bea091d9259d9d24c2454b7
4
+ data.tar.gz: f71e61e1598462fd02fed5eb8e1c0111ee259d1b3bb85424ce869a9f0ea0dad8
5
5
  SHA512:
6
- metadata.gz: b1ac00d3090b9a19df3e3d25d4cf1c2dc4f50a37e6de3cbbdf4b09a9cd6cb68b1fa7700e825cad8352628f7a9bab8ed34a916abbd4d8380a68d5783e6aef8513
7
- data.tar.gz: b1a6f2ebb218737938ae7bc35eb5bc648cc544c8ac964647054335bf15f218e17db7a337e8b529060eccd69094872df6725ebed18b0e7cb0681253f3eccdf23d
6
+ metadata.gz: fcc4e69ae85ef617ab214e489842d6c82a4c61fa7326c5f3d9ada35f49e71b602bb4f7c4153aeb9e3aa5c978a52ac19cc926842ea2535e6518b4c518e80f274a
7
+ data.tar.gz: 837dc5642b1285cf602a66572908d0ac4b3b3a32e7a509ee1c19337ea5f9363b3cbf9b1d59cab65565f45dd39808eff511383521c633ecc08fe480c9c360a6bb
data/lib/cbeta/gaiji.rb CHANGED
@@ -1,10 +1,12 @@
1
1
  require 'json'
2
+ require 'unihan2'
2
3
 
3
4
  # 存取 CBETA 缺字資料庫
4
5
  class CBETA::Gaiji
5
6
 
6
7
  # 載入 CBETA 缺字資料庫
7
8
  def initialize
9
+ @us = CBETA::UnicodeService.new
8
10
  folder = File.join(File.dirname(__FILE__), '../data')
9
11
  fn = File.join(folder, 'cbeta_gaiji.json')
10
12
  @gaijis = JSON.parse(File.read(fn))
@@ -73,9 +75,12 @@ class CBETA::Gaiji
73
75
 
74
76
  if gid.start_with? 'CB'
75
77
  cb_priority.each do |k|
76
- if k == 'PUA'
78
+ case k
79
+ when 'PUA'
77
80
  return CBETA.pua(gid)
78
- elsif g.key? k
81
+ when 'uni_char', 'norm_uni_char'
82
+ return g[k] if @us.level2?(g[k])
83
+ else
79
84
  return g[k] unless g[k].empty?
80
85
  end
81
86
  end
@@ -0,0 +1,19 @@
1
+ class CBETA::UnicodeService
2
+ def initialize
3
+ @u2 = Unihan2.new
4
+ end
5
+
6
+ def level1?(code)
7
+ return false if code.nil?
8
+ # Unicode 3.0 以內 在 mobile 可以正確顯示
9
+ v = @u2.ver(code)
10
+ raise "Unihan2.ver 回傳 nil, code: #{code}" if v.nil?
11
+ v <= 3
12
+ end
13
+
14
+ def level2?(code)
15
+ return false if code.nil?
16
+ # Unicode 10 以內 在 desktop 有字型可以顯示
17
+ @u2.ver(code) <= 10
18
+ end
19
+ end
@@ -3,6 +3,8 @@ require 'nokogiri'
3
3
  class CBETA::XMLDocument
4
4
  PASS = %w(back graphic mulu rdg sic teiHeader)
5
5
 
6
+ attr_reader :doc
7
+
6
8
  def initialize(string_or_io)
7
9
  @doc = Nokogiri::XML(string_or_io)
8
10
  @doc.remove_namespaces!
@@ -44,9 +46,21 @@ class CBETA::XMLDocument
44
46
  ' '
45
47
  end
46
48
 
49
+ def e_caption(e)
50
+ traverse(e) + "\n"
51
+ end
52
+
47
53
  def e_cell(e)
48
54
  traverse(e) + "\n"
49
55
  end
56
+
57
+ def e_cit(e)
58
+ traverse(e)
59
+ end
60
+
61
+ def e_closer(e)
62
+ traverse(e) + "\n"
63
+ end
50
64
 
51
65
  def e_corr(e)
52
66
  traverse(e)
@@ -63,6 +77,10 @@ class CBETA::XMLDocument
63
77
  def e_div(e)
64
78
  traverse(e)
65
79
  end
80
+
81
+ def e_docAuthor(e)
82
+ traverse(e)
83
+ end
66
84
 
67
85
  def e_docNumber(e)
68
86
  traverse(e) + "\n"
@@ -73,6 +91,12 @@ class CBETA::XMLDocument
73
91
  end
74
92
 
75
93
  def e_figure(e)
94
+ r = traverse(e)
95
+ r << "\n" unless r.empty?
96
+ r
97
+ end
98
+
99
+ def e_figDesc(e)
76
100
  traverse(e) + "\n"
77
101
  end
78
102
 
@@ -88,14 +112,19 @@ class CBETA::XMLDocument
88
112
  cb_priority = %w(uni_char composition)
89
113
  end
90
114
 
91
- gid = e['ref'][1..-1]
92
- r = @gaiji.to_s(gid, cb_priority:)
93
- abort "Line:#{__LINE__} 缺字處理失敗:#{gid}" if r.nil?
94
- r
115
+ gid = e['ref'].delete_prefix('#')
116
+
117
+ unless @gaiji.key?(gid)
118
+ raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}"
119
+ end
120
+
121
+ @gaiji.to_s(gid, cb_priority:)
95
122
  end
96
123
 
97
124
  def e_head(e)
98
- traverse(e) + "\n"
125
+ r = traverse(e)
126
+ r << "\n" unless r.empty?
127
+ r
99
128
  end
100
129
 
101
130
  def e_hi(e)
@@ -178,6 +207,14 @@ class CBETA::XMLDocument
178
207
  ''
179
208
  end
180
209
 
210
+ def e_quote(e)
211
+ traverse(e)
212
+ end
213
+
214
+ def e_ref(e)
215
+ traverse(e)
216
+ end
217
+
181
218
  def e_reg(e)
182
219
  r = ''
183
220
  choice = e.at_xpath('ancestor::choice')
@@ -189,6 +226,10 @@ class CBETA::XMLDocument
189
226
  traverse(e) + "\n"
190
227
  end
191
228
 
229
+ def e_seg(e)
230
+ traverse(e)
231
+ end
232
+
192
233
  def e_sg(e)
193
234
  '(' + traverse(e) + ')'
194
235
  end
@@ -266,7 +307,6 @@ class CBETA::XMLDocument
266
307
  r
267
308
  end
268
309
 
269
-
270
310
  def handle_node(e)
271
311
  return '' if e.comment?
272
312
  return handle_text(e) if e.text?
data/lib/cbeta.rb CHANGED
@@ -234,4 +234,5 @@ require 'cbeta/p5a_to_simple_html'
234
234
  require 'cbeta/p5a_to_text'
235
235
  require 'cbeta/p5a_validator'
236
236
  require 'cbeta/html_to_text'
237
+ require 'cbeta/unicode_service'
237
238
  require 'cbeta/xml_document'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-09 00:00:00.000000000 Z
11
+ date: 2024-03-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -31,6 +31,7 @@ files:
31
31
  - lib/cbeta/p5a_to_simple_html.rb
32
32
  - lib/cbeta/p5a_to_text.rb
33
33
  - lib/cbeta/p5a_validator.rb
34
+ - lib/cbeta/unicode_service.rb
34
35
  - lib/cbeta/xml_document.rb
35
36
  - lib/data/canons.csv
36
37
  - lib/data/categories.json