cbeta 3.1.3 → 3.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4bb29e86cf68e6503bb935c35f6499132f8106fc43e11de6a2e733df44901009
4
- data.tar.gz: 454dcfbffbc118c6071c4f7932fc8c18118bcd7bbc60e3797f7386920e4f7b77
3
+ metadata.gz: 5d32b2ace3526aa8492fcfb42bbbebf08edfa1a1df1d3ef5aecc8e409e7423a9
4
+ data.tar.gz: 67bb568adb4710f114216161e16a6c7c5e0a0c34af6b770a42fc1e26143a3b03
5
5
  SHA512:
6
- metadata.gz: d2ddd9c064712fe4bc74912d936479046080b97fe68b9884af164be26802efdc0a6455d7171e7f2854df5ce3106a8112441589dbcd4563388f44dbb3567a018d
7
- data.tar.gz: c5d210953964044077cac5b264fdf07fe0a3e0fe04a3b74c2dadf36ec28e0117e5b5292b7d6333075d621ca67894b078c63e0bf0667bf0ade266e2a27bba764b
6
+ metadata.gz: 71ade7189b3f3bf90f9436e1d756c88f72164285972d7cab8cb81bb2ed27d2c13a048e2909cfb58f8250288e5d31bee888acedf6cf4ea225db5e7ea5605db575
7
+ data.tar.gz: f84625e849635021b7ba2c10b7b54725830bce6786eb40fe8b3696b8b732274ec89cbbb4fbedb9e494be88f221e7252c23d90fe83fe8e8b81edc8189bf9969f0
data/lib/cbeta/gaiji.rb CHANGED
@@ -69,6 +69,8 @@ class CBETA::Gaiji
69
69
  end
70
70
 
71
71
  g = @gaijis[gid]
72
+ return nil if g.nil?
73
+
72
74
  if gid.start_with? 'CB'
73
75
  cb_priority.each do |k|
74
76
  if k == 'PUA'
@@ -0,0 +1,340 @@
1
+ require 'nokogiri'
2
+
3
+ class CBETA::XMLDocument
4
+ PASS = %w(back graphic mulu rdg sic teiHeader)
5
+
6
+ attr_reader :doc
7
+
8
+ def initialize(string_or_io)
9
+ @doc = Nokogiri::XML(string_or_io)
10
+ @doc.remove_namespaces!
11
+ @gaiji = CBETA::Gaiji.new
12
+ end
13
+
14
+ def to_text
15
+ @format = 'text'
16
+ @gaiji_norm = [true]
17
+ @next_line_buf = ''
18
+ traverse(@doc.root)
19
+ end
20
+
21
+ private
22
+
23
+ def e_anchor(e)
24
+ if e.has_attribute?('type')
25
+ if e['type'] == 'circle'
26
+ return '◎'
27
+ end
28
+ end
29
+
30
+ ''
31
+ end
32
+
33
+ def e_app(e)
34
+ traverse(e)
35
+ end
36
+
37
+ def e_body(e)
38
+ traverse(e)
39
+ end
40
+
41
+ def e_byline(e)
42
+ traverse(e) + "\n"
43
+ end
44
+
45
+ def e_caesura(e)
46
+ ' '
47
+ end
48
+
49
+ def e_caption(e)
50
+ traverse(e) + "\n"
51
+ end
52
+
53
+ def e_cell(e)
54
+ traverse(e) + "\n"
55
+ end
56
+
57
+ def e_cit(e)
58
+ traverse(e)
59
+ end
60
+
61
+ def e_closer(e)
62
+ traverse(e) + "\n"
63
+ end
64
+
65
+ def e_corr(e)
66
+ traverse(e)
67
+ end
68
+
69
+ def e_date(e)
70
+ traverse(e)
71
+ end
72
+
73
+ def e_dialog(e)
74
+ traverse(e)
75
+ end
76
+
77
+ def e_div(e)
78
+ traverse(e)
79
+ end
80
+
81
+ def e_docAuthor(e)
82
+ traverse(e)
83
+ end
84
+
85
+ def e_docNumber(e)
86
+ traverse(e) + "\n"
87
+ end
88
+
89
+ def e_event(e)
90
+ traverse(e) + "\n"
91
+ end
92
+
93
+ def e_figure(e)
94
+ r = traverse(e)
95
+ r << "\n" unless r.empty?
96
+ r
97
+ end
98
+
99
+ def e_figDesc(e)
100
+ traverse(e) + "\n"
101
+ end
102
+
103
+ def e_foreign(e)
104
+ return '' if e.key?('place') and e['place'].include?('foot')
105
+ traverse(e)
106
+ end
107
+
108
+ def e_g(e)
109
+ if @gaiji_norm.last
110
+ cb_priority = %w(uni_char norm_uni_char norm_big5_char composition)
111
+ else
112
+ cb_priority = %w(uni_char composition)
113
+ end
114
+
115
+ gid = e['ref'].delete_prefix('#')
116
+
117
+ unless @gaiji.key?(gid)
118
+ raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}"
119
+ end
120
+
121
+ @gaiji.to_s(gid, cb_priority:)
122
+ end
123
+
124
+ def e_head(e)
125
+ r = traverse(e)
126
+ r << "\n" unless r.empty?
127
+ r
128
+ end
129
+
130
+ def e_hi(e)
131
+ traverse(e)
132
+ end
133
+
134
+ def e_item(e)
135
+ r = "\n"
136
+
137
+ list_level = e.xpath('ancestor::list').size
138
+ r << ' ' * (list_level - 1)
139
+ r << traverse(e)
140
+ if e.key? 'n'
141
+ r = e['n'] + r
142
+ end
143
+ r
144
+ end
145
+
146
+ def e_jhead(e)
147
+ traverse(e)
148
+ end
149
+
150
+ def e_juan(e)
151
+ traverse(e) + "\n"
152
+ end
153
+
154
+ def e_l(e)
155
+ r = traverse(e)
156
+ r << "\n" unless @lg_type == 'abnormal'
157
+ r
158
+ end
159
+
160
+ def e_lb(e)
161
+ return '' if e['type']=='old'
162
+ r = ''
163
+ r << "\n" if @p_type == 'pre'
164
+ unless @next_line_buf.empty?
165
+ r << @next_line_buf + "\n"
166
+ @next_line_buf = ''
167
+ end
168
+ r
169
+ end
170
+
171
+ def e_lem(e)
172
+ traverse(e)
173
+ end
174
+
175
+ def e_lg(e)
176
+ traverse(e)
177
+ end
178
+
179
+ def e_list(e)
180
+ r = traverse(e)
181
+ r << "\n\n" unless e.parent.name == 'item'
182
+ r
183
+ end
184
+
185
+ def e_milestone(e)
186
+ ''
187
+ end
188
+
189
+ def e_note(e)
190
+ if e.has_attribute?('place')
191
+ if "inline inline2 interlinear".include?(e['place'])
192
+ r = traverse(e)
193
+ return "(#{r})"
194
+ end
195
+ end
196
+ ''
197
+ end
198
+
199
+ def e_p(e)
200
+ @p_type = e['type']
201
+ r = traverse(e) + "\n"
202
+ @p_type = nil
203
+ r
204
+ end
205
+
206
+ def e_pb(e)
207
+ ''
208
+ end
209
+
210
+ def e_quote(e)
211
+ traverse(e)
212
+ end
213
+
214
+ def e_ref(e)
215
+ traverse(e)
216
+ end
217
+
218
+ def e_reg(e)
219
+ r = ''
220
+ choice = e.at_xpath('ancestor::choice')
221
+ r = traverse(e) if choice.nil?
222
+ r
223
+ end
224
+
225
+ def e_row(e)
226
+ traverse(e) + "\n"
227
+ end
228
+
229
+ def e_seg(e)
230
+ traverse(e)
231
+ end
232
+
233
+ def e_sg(e)
234
+ '(' + traverse(e) + ')'
235
+ end
236
+
237
+ # speech
238
+ def e_sp(e)
239
+ traverse(e)
240
+ end
241
+
242
+ def e_space(e)
243
+ return '' if e['quantity']=='0'
244
+ ' ' * e['quantity'].to_i
245
+ end
246
+
247
+ def e_t(e)
248
+ if e.has_attribute? 'place'
249
+ return '' if e['place'].include? 'foot'
250
+ end
251
+ r = traverse(e)
252
+
253
+ # 如果不是雙行對照
254
+ tt = e.at_xpath('ancestor::tt')
255
+ unless tt.nil?
256
+ return r if %w(app single-line).include? tt['type']
257
+ return r if tt['place'] == 'inline'
258
+ return r if tt['rend'] == 'normal'
259
+ end
260
+
261
+ # 處理雙行對照
262
+ i = e.xpath('../t').index(e)
263
+ case i
264
+ when 0
265
+ return r + ' '
266
+ when 1
267
+ @next_line_buf << r + ' '
268
+ return ''
269
+ else
270
+ return r
271
+ end
272
+ end
273
+
274
+ def e_table(e)
275
+ traverse(e) + "\n"
276
+ end
277
+
278
+ def e_term(e)
279
+ norm = true
280
+ if e['behaviour'] == "no-norm"
281
+ norm = false
282
+ end
283
+ @gaiji_norm.push norm
284
+ r = traverse(e)
285
+ @gaiji_norm.pop
286
+ r
287
+ end
288
+
289
+ def e_text(e)
290
+ norm = true
291
+ if e['behaviour'] == "no-norm"
292
+ norm = false
293
+ end
294
+ @gaiji_norm.push norm
295
+ r = traverse(e)
296
+ @gaiji_norm.pop
297
+ r
298
+ end
299
+
300
+ def e_tt(e)
301
+ traverse(e)
302
+ end
303
+
304
+ def e_unclear(e)
305
+ r = traverse(e)
306
+ r = '▆' if r.empty?
307
+ r
308
+ end
309
+
310
+ def handle_node(e)
311
+ return '' if e.comment?
312
+ return handle_text(e) if e.text?
313
+ return '' if PASS.include?(e.name)
314
+ send("e_#{e.name}", e)
315
+ end
316
+
317
+ def handle_text(e)
318
+ s = e.content().chomp
319
+ return '' if s.empty?
320
+ return '' if e.parent.name == 'app'
321
+
322
+ # cbeta xml 文字之間會有多餘的換行
323
+ r = s.gsub(/[\n\r]/, '')
324
+
325
+ if @format == 'html'
326
+ r = CGI.escapeHTML(r) # 把 & 轉為 &amp;
327
+ end
328
+
329
+ r
330
+ end
331
+
332
+ def traverse(e)
333
+ r = ''
334
+ e.children.each do |c|
335
+ r << handle_node(c)
336
+ end
337
+ r
338
+ end
339
+
340
+ end
data/lib/cbeta.rb CHANGED
@@ -234,3 +234,4 @@ require 'cbeta/p5a_to_simple_html'
234
234
  require 'cbeta/p5a_to_text'
235
235
  require 'cbeta/p5a_validator'
236
236
  require 'cbeta/html_to_text'
237
+ require 'cbeta/xml_document'
@@ -174568,5 +174568,17 @@
174568
174568
  "composition": "[弓*并]",
174569
174569
  "moe_variant_id": "C03427",
174570
174570
  "pua": "U+F87C6"
174571
+ },
174572
+ "CB34759": {
174573
+ "composition": "[△@▲]",
174574
+ "pua": "U+F87C7"
174575
+ },
174576
+ "CB34760": {
174577
+ "unicode": "20B4F",
174578
+ "uni_char": "𠭏",
174579
+ "composition": "[山/〦/中/又]",
174580
+ "norm_big5_char": "事",
174581
+ "moe_variant_id": "A00048-003",
174582
+ "pua": "U+F87C8"
174571
174583
  }
174572
174584
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.3
4
+ version: 3.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-06 00:00:00.000000000 Z
11
+ date: 2024-03-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -31,6 +31,7 @@ files:
31
31
  - lib/cbeta/p5a_to_simple_html.rb
32
32
  - lib/cbeta/p5a_to_text.rb
33
33
  - lib/cbeta/p5a_validator.rb
34
+ - lib/cbeta/xml_document.rb
34
35
  - lib/data/canons.csv
35
36
  - lib/data/categories.json
36
37
  - lib/data/cbeta_gaiji.json
@@ -57,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  - !ruby/object:Gem::Version
58
59
  version: '0'
59
60
  requirements: []
60
- rubygems_version: 3.4.22
61
+ rubygems_version: 3.5.6
61
62
  signing_key:
62
63
  specification_version: 4
63
64
  summary: CBETA Tools