cbeta 3.2.0 → 3.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 982e4d10689bc1fa6f71d8c27e880cbfdcf248c5cbdb8994ad65bf6d2c5e5259
4
- data.tar.gz: 7c6cfa28c5f48f1f84bb4ad2921e45ac1db1f2039626999a1fd620bbe91c9564
3
+ metadata.gz: d6371fac001d191b3245ba02d1b3287cedfe46213bea091d9259d9d24c2454b7
4
+ data.tar.gz: f71e61e1598462fd02fed5eb8e1c0111ee259d1b3bb85424ce869a9f0ea0dad8
5
5
  SHA512:
6
- metadata.gz: b1ac00d3090b9a19df3e3d25d4cf1c2dc4f50a37e6de3cbbdf4b09a9cd6cb68b1fa7700e825cad8352628f7a9bab8ed34a916abbd4d8380a68d5783e6aef8513
7
- data.tar.gz: b1a6f2ebb218737938ae7bc35eb5bc648cc544c8ac964647054335bf15f218e17db7a337e8b529060eccd69094872df6725ebed18b0e7cb0681253f3eccdf23d
6
+ metadata.gz: fcc4e69ae85ef617ab214e489842d6c82a4c61fa7326c5f3d9ada35f49e71b602bb4f7c4153aeb9e3aa5c978a52ac19cc926842ea2535e6518b4c518e80f274a
7
+ data.tar.gz: 837dc5642b1285cf602a66572908d0ac4b3b3a32e7a509ee1c19337ea5f9363b3cbf9b1d59cab65565f45dd39808eff511383521c633ecc08fe480c9c360a6bb
data/lib/cbeta/gaiji.rb CHANGED
@@ -1,10 +1,12 @@
1
1
  require 'json'
2
+ require 'unihan2'
2
3
 
3
4
  # 存取 CBETA 缺字資料庫
4
5
  class CBETA::Gaiji
5
6
 
6
7
  # 載入 CBETA 缺字資料庫
7
8
  def initialize
9
+ @us = CBETA::UnicodeService.new
8
10
  folder = File.join(File.dirname(__FILE__), '../data')
9
11
  fn = File.join(folder, 'cbeta_gaiji.json')
10
12
  @gaijis = JSON.parse(File.read(fn))
@@ -73,9 +75,12 @@ class CBETA::Gaiji
73
75
 
74
76
  if gid.start_with? 'CB'
75
77
  cb_priority.each do |k|
76
- if k == 'PUA'
78
+ case k
79
+ when 'PUA'
77
80
  return CBETA.pua(gid)
78
- elsif g.key? k
81
+ when 'uni_char', 'norm_uni_char'
82
+ return g[k] if @us.level2?(g[k])
83
+ else
79
84
  return g[k] unless g[k].empty?
80
85
  end
81
86
  end
@@ -0,0 +1,19 @@
1
+ class CBETA::UnicodeService
2
+ def initialize
3
+ @u2 = Unihan2.new
4
+ end
5
+
6
+ def level1?(code)
7
+ return false if code.nil?
8
+ # Unicode 3.0 以內 在 mobile 可以正確顯示
9
+ v = @u2.ver(code)
10
+ raise "Unihan2.ver 回傳 nil, code: #{code}" if v.nil?
11
+ v <= 3
12
+ end
13
+
14
+ def level2?(code)
15
+ return false if code.nil?
16
+ # Unicode 10 以內 在 desktop 有字型可以顯示
17
+ @u2.ver(code) <= 10
18
+ end
19
+ end
@@ -3,6 +3,8 @@ require 'nokogiri'
3
3
  class CBETA::XMLDocument
4
4
  PASS = %w(back graphic mulu rdg sic teiHeader)
5
5
 
6
+ attr_reader :doc
7
+
6
8
  def initialize(string_or_io)
7
9
  @doc = Nokogiri::XML(string_or_io)
8
10
  @doc.remove_namespaces!
@@ -44,9 +46,21 @@ class CBETA::XMLDocument
44
46
  ' '
45
47
  end
46
48
 
49
+ def e_caption(e)
50
+ traverse(e) + "\n"
51
+ end
52
+
47
53
  def e_cell(e)
48
54
  traverse(e) + "\n"
49
55
  end
56
+
57
+ def e_cit(e)
58
+ traverse(e)
59
+ end
60
+
61
+ def e_closer(e)
62
+ traverse(e) + "\n"
63
+ end
50
64
 
51
65
  def e_corr(e)
52
66
  traverse(e)
@@ -63,6 +77,10 @@ class CBETA::XMLDocument
63
77
  def e_div(e)
64
78
  traverse(e)
65
79
  end
80
+
81
+ def e_docAuthor(e)
82
+ traverse(e)
83
+ end
66
84
 
67
85
  def e_docNumber(e)
68
86
  traverse(e) + "\n"
@@ -73,6 +91,12 @@ class CBETA::XMLDocument
73
91
  end
74
92
 
75
93
  def e_figure(e)
94
+ r = traverse(e)
95
+ r << "\n" unless r.empty?
96
+ r
97
+ end
98
+
99
+ def e_figDesc(e)
76
100
  traverse(e) + "\n"
77
101
  end
78
102
 
@@ -88,14 +112,19 @@ class CBETA::XMLDocument
88
112
  cb_priority = %w(uni_char composition)
89
113
  end
90
114
 
91
- gid = e['ref'][1..-1]
92
- r = @gaiji.to_s(gid, cb_priority:)
93
- abort "Line:#{__LINE__} 缺字處理失敗:#{gid}" if r.nil?
94
- r
115
+ gid = e['ref'].delete_prefix('#')
116
+
117
+ unless @gaiji.key?(gid)
118
+ raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}"
119
+ end
120
+
121
+ @gaiji.to_s(gid, cb_priority:)
95
122
  end
96
123
 
97
124
  def e_head(e)
98
- traverse(e) + "\n"
125
+ r = traverse(e)
126
+ r << "\n" unless r.empty?
127
+ r
99
128
  end
100
129
 
101
130
  def e_hi(e)
@@ -178,6 +207,14 @@ class CBETA::XMLDocument
178
207
  ''
179
208
  end
180
209
 
210
+ def e_quote(e)
211
+ traverse(e)
212
+ end
213
+
214
+ def e_ref(e)
215
+ traverse(e)
216
+ end
217
+
181
218
  def e_reg(e)
182
219
  r = ''
183
220
  choice = e.at_xpath('ancestor::choice')
@@ -189,6 +226,10 @@ class CBETA::XMLDocument
189
226
  traverse(e) + "\n"
190
227
  end
191
228
 
229
+ def e_seg(e)
230
+ traverse(e)
231
+ end
232
+
192
233
  def e_sg(e)
193
234
  '(' + traverse(e) + ')'
194
235
  end
@@ -266,7 +307,6 @@ class CBETA::XMLDocument
266
307
  r
267
308
  end
268
309
 
269
-
270
310
  def handle_node(e)
271
311
  return '' if e.comment?
272
312
  return handle_text(e) if e.text?
data/lib/cbeta.rb CHANGED
@@ -234,4 +234,5 @@ require 'cbeta/p5a_to_simple_html'
234
234
  require 'cbeta/p5a_to_text'
235
235
  require 'cbeta/p5a_validator'
236
236
  require 'cbeta/html_to_text'
237
+ require 'cbeta/unicode_service'
237
238
  require 'cbeta/xml_document'
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.2.0
4
+ version: 3.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-03-09 00:00:00.000000000 Z
11
+ date: 2024-03-14 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -31,6 +31,7 @@ files:
31
31
  - lib/cbeta/p5a_to_simple_html.rb
32
32
  - lib/cbeta/p5a_to_text.rb
33
33
  - lib/cbeta/p5a_validator.rb
34
+ - lib/cbeta/unicode_service.rb
34
35
  - lib/cbeta/xml_document.rb
35
36
  - lib/data/canons.csv
36
37
  - lib/data/categories.json