cbeta 3.1.3 → 3.2.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4bb29e86cf68e6503bb935c35f6499132f8106fc43e11de6a2e733df44901009
4
- data.tar.gz: 454dcfbffbc118c6071c4f7932fc8c18118bcd7bbc60e3797f7386920e4f7b77
3
+ metadata.gz: 5d32b2ace3526aa8492fcfb42bbbebf08edfa1a1df1d3ef5aecc8e409e7423a9
4
+ data.tar.gz: 67bb568adb4710f114216161e16a6c7c5e0a0c34af6b770a42fc1e26143a3b03
5
5
  SHA512:
6
- metadata.gz: d2ddd9c064712fe4bc74912d936479046080b97fe68b9884af164be26802efdc0a6455d7171e7f2854df5ce3106a8112441589dbcd4563388f44dbb3567a018d
7
- data.tar.gz: c5d210953964044077cac5b264fdf07fe0a3e0fe04a3b74c2dadf36ec28e0117e5b5292b7d6333075d621ca67894b078c63e0bf0667bf0ade266e2a27bba764b
6
+ metadata.gz: 71ade7189b3f3bf90f9436e1d756c88f72164285972d7cab8cb81bb2ed27d2c13a048e2909cfb58f8250288e5d31bee888acedf6cf4ea225db5e7ea5605db575
7
+ data.tar.gz: f84625e849635021b7ba2c10b7b54725830bce6786eb40fe8b3696b8b732274ec89cbbb4fbedb9e494be88f221e7252c23d90fe83fe8e8b81edc8189bf9969f0
data/lib/cbeta/gaiji.rb CHANGED
@@ -69,6 +69,8 @@ class CBETA::Gaiji
69
69
  end
70
70
 
71
71
  g = @gaijis[gid]
72
+ return nil if g.nil?
73
+
72
74
  if gid.start_with? 'CB'
73
75
  cb_priority.each do |k|
74
76
  if k == 'PUA'
@@ -0,0 +1,340 @@
1
+ require 'nokogiri'
2
+
3
+ class CBETA::XMLDocument
4
+ PASS = %w(back graphic mulu rdg sic teiHeader)
5
+
6
+ attr_reader :doc
7
+
8
+ def initialize(string_or_io)
9
+ @doc = Nokogiri::XML(string_or_io)
10
+ @doc.remove_namespaces!
11
+ @gaiji = CBETA::Gaiji.new
12
+ end
13
+
14
+ def to_text
15
+ @format = 'text'
16
+ @gaiji_norm = [true]
17
+ @next_line_buf = ''
18
+ traverse(@doc.root)
19
+ end
20
+
21
+ private
22
+
23
+ def e_anchor(e)
24
+ if e.has_attribute?('type')
25
+ if e['type'] == 'circle'
26
+ return '◎'
27
+ end
28
+ end
29
+
30
+ ''
31
+ end
32
+
33
+ def e_app(e)
34
+ traverse(e)
35
+ end
36
+
37
+ def e_body(e)
38
+ traverse(e)
39
+ end
40
+
41
+ def e_byline(e)
42
+ traverse(e) + "\n"
43
+ end
44
+
45
+ def e_caesura(e)
46
+ ' '
47
+ end
48
+
49
+ def e_caption(e)
50
+ traverse(e) + "\n"
51
+ end
52
+
53
+ def e_cell(e)
54
+ traverse(e) + "\n"
55
+ end
56
+
57
+ def e_cit(e)
58
+ traverse(e)
59
+ end
60
+
61
+ def e_closer(e)
62
+ traverse(e) + "\n"
63
+ end
64
+
65
+ def e_corr(e)
66
+ traverse(e)
67
+ end
68
+
69
+ def e_date(e)
70
+ traverse(e)
71
+ end
72
+
73
+ def e_dialog(e)
74
+ traverse(e)
75
+ end
76
+
77
+ def e_div(e)
78
+ traverse(e)
79
+ end
80
+
81
+ def e_docAuthor(e)
82
+ traverse(e)
83
+ end
84
+
85
+ def e_docNumber(e)
86
+ traverse(e) + "\n"
87
+ end
88
+
89
+ def e_event(e)
90
+ traverse(e) + "\n"
91
+ end
92
+
93
+ def e_figure(e)
94
+ r = traverse(e)
95
+ r << "\n" unless r.empty?
96
+ r
97
+ end
98
+
99
+ def e_figDesc(e)
100
+ traverse(e) + "\n"
101
+ end
102
+
103
+ def e_foreign(e)
104
+ return '' if e.key?('place') and e['place'].include?('foot')
105
+ traverse(e)
106
+ end
107
+
108
+ def e_g(e)
109
+ if @gaiji_norm.last
110
+ cb_priority = %w(uni_char norm_uni_char norm_big5_char composition)
111
+ else
112
+ cb_priority = %w(uni_char composition)
113
+ end
114
+
115
+ gid = e['ref'].delete_prefix('#')
116
+
117
+ unless @gaiji.key?(gid)
118
+ raise "在 CBETA 缺字庫中找不到此缺字碼: #{gid}"
119
+ end
120
+
121
+ @gaiji.to_s(gid, cb_priority:)
122
+ end
123
+
124
+ def e_head(e)
125
+ r = traverse(e)
126
+ r << "\n" unless r.empty?
127
+ r
128
+ end
129
+
130
+ def e_hi(e)
131
+ traverse(e)
132
+ end
133
+
134
+ def e_item(e)
135
+ r = "\n"
136
+
137
+ list_level = e.xpath('ancestor::list').size
138
+ r << ' ' * (list_level - 1)
139
+ r << traverse(e)
140
+ if e.key? 'n'
141
+ r = e['n'] + r
142
+ end
143
+ r
144
+ end
145
+
146
+ def e_jhead(e)
147
+ traverse(e)
148
+ end
149
+
150
+ def e_juan(e)
151
+ traverse(e) + "\n"
152
+ end
153
+
154
+ def e_l(e)
155
+ r = traverse(e)
156
+ r << "\n" unless @lg_type == 'abnormal'
157
+ r
158
+ end
159
+
160
+ def e_lb(e)
161
+ return '' if e['type']=='old'
162
+ r = ''
163
+ r << "\n" if @p_type == 'pre'
164
+ unless @next_line_buf.empty?
165
+ r << @next_line_buf + "\n"
166
+ @next_line_buf = ''
167
+ end
168
+ r
169
+ end
170
+
171
+ def e_lem(e)
172
+ traverse(e)
173
+ end
174
+
175
+ def e_lg(e)
176
+ traverse(e)
177
+ end
178
+
179
+ def e_list(e)
180
+ r = traverse(e)
181
+ r << "\n\n" unless e.parent.name == 'item'
182
+ r
183
+ end
184
+
185
+ def e_milestone(e)
186
+ ''
187
+ end
188
+
189
+ def e_note(e)
190
+ if e.has_attribute?('place')
191
+ if "inline inline2 interlinear".include?(e['place'])
192
+ r = traverse(e)
193
+ return "(#{r})"
194
+ end
195
+ end
196
+ ''
197
+ end
198
+
199
+ def e_p(e)
200
+ @p_type = e['type']
201
+ r = traverse(e) + "\n"
202
+ @p_type = nil
203
+ r
204
+ end
205
+
206
+ def e_pb(e)
207
+ ''
208
+ end
209
+
210
+ def e_quote(e)
211
+ traverse(e)
212
+ end
213
+
214
+ def e_ref(e)
215
+ traverse(e)
216
+ end
217
+
218
+ def e_reg(e)
219
+ r = ''
220
+ choice = e.at_xpath('ancestor::choice')
221
+ r = traverse(e) if choice.nil?
222
+ r
223
+ end
224
+
225
+ def e_row(e)
226
+ traverse(e) + "\n"
227
+ end
228
+
229
+ def e_seg(e)
230
+ traverse(e)
231
+ end
232
+
233
+ def e_sg(e)
234
+ '(' + traverse(e) + ')'
235
+ end
236
+
237
+ # speech
238
+ def e_sp(e)
239
+ traverse(e)
240
+ end
241
+
242
+ def e_space(e)
243
+ return '' if e['quantity']=='0'
244
+ ' ' * e['quantity'].to_i
245
+ end
246
+
247
+ def e_t(e)
248
+ if e.has_attribute? 'place'
249
+ return '' if e['place'].include? 'foot'
250
+ end
251
+ r = traverse(e)
252
+
253
+ # 如果不是雙行對照
254
+ tt = e.at_xpath('ancestor::tt')
255
+ unless tt.nil?
256
+ return r if %w(app single-line).include? tt['type']
257
+ return r if tt['place'] == 'inline'
258
+ return r if tt['rend'] == 'normal'
259
+ end
260
+
261
+ # 處理雙行對照
262
+ i = e.xpath('../t').index(e)
263
+ case i
264
+ when 0
265
+ return r + ' '
266
+ when 1
267
+ @next_line_buf << r + ' '
268
+ return ''
269
+ else
270
+ return r
271
+ end
272
+ end
273
+
274
+ def e_table(e)
275
+ traverse(e) + "\n"
276
+ end
277
+
278
+ def e_term(e)
279
+ norm = true
280
+ if e['behaviour'] == "no-norm"
281
+ norm = false
282
+ end
283
+ @gaiji_norm.push norm
284
+ r = traverse(e)
285
+ @gaiji_norm.pop
286
+ r
287
+ end
288
+
289
+ def e_text(e)
290
+ norm = true
291
+ if e['behaviour'] == "no-norm"
292
+ norm = false
293
+ end
294
+ @gaiji_norm.push norm
295
+ r = traverse(e)
296
+ @gaiji_norm.pop
297
+ r
298
+ end
299
+
300
+ def e_tt(e)
301
+ traverse(e)
302
+ end
303
+
304
+ def e_unclear(e)
305
+ r = traverse(e)
306
+ r = '▆' if r.empty?
307
+ r
308
+ end
309
+
310
+ def handle_node(e)
311
+ return '' if e.comment?
312
+ return handle_text(e) if e.text?
313
+ return '' if PASS.include?(e.name)
314
+ send("e_#{e.name}", e)
315
+ end
316
+
317
+ def handle_text(e)
318
+ s = e.content().chomp
319
+ return '' if s.empty?
320
+ return '' if e.parent.name == 'app'
321
+
322
+ # cbeta xml 文字之間會有多餘的換行
323
+ r = s.gsub(/[\n\r]/, '')
324
+
325
+ if @format == 'html'
326
+ r = CGI.escapeHTML(r) # 把 & 轉為 &amp;
327
+ end
328
+
329
+ r
330
+ end
331
+
332
+ def traverse(e)
333
+ r = ''
334
+ e.children.each do |c|
335
+ r << handle_node(c)
336
+ end
337
+ r
338
+ end
339
+
340
+ end
data/lib/cbeta.rb CHANGED
@@ -234,3 +234,4 @@ require 'cbeta/p5a_to_simple_html'
234
234
  require 'cbeta/p5a_to_text'
235
235
  require 'cbeta/p5a_validator'
236
236
  require 'cbeta/html_to_text'
237
+ require 'cbeta/xml_document'
@@ -174568,5 +174568,17 @@
174568
174568
  "composition": "[弓*并]",
174569
174569
  "moe_variant_id": "C03427",
174570
174570
  "pua": "U+F87C6"
174571
+ },
174572
+ "CB34759": {
174573
+ "composition": "[△@▲]",
174574
+ "pua": "U+F87C7"
174575
+ },
174576
+ "CB34760": {
174577
+ "unicode": "20B4F",
174578
+ "uni_char": "𠭏",
174579
+ "composition": "[山/〦/中/又]",
174580
+ "norm_big5_char": "事",
174581
+ "moe_variant_id": "A00048-003",
174582
+ "pua": "U+F87C8"
174571
174583
  }
174572
174584
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.3
4
+ version: 3.2.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-06 00:00:00.000000000 Z
11
+ date: 2024-03-11 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -31,6 +31,7 @@ files:
31
31
  - lib/cbeta/p5a_to_simple_html.rb
32
32
  - lib/cbeta/p5a_to_text.rb
33
33
  - lib/cbeta/p5a_validator.rb
34
+ - lib/cbeta/xml_document.rb
34
35
  - lib/data/canons.csv
35
36
  - lib/data/categories.json
36
37
  - lib/data/cbeta_gaiji.json
@@ -57,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  - !ruby/object:Gem::Version
58
59
  version: '0'
59
60
  requirements: []
60
- rubygems_version: 3.4.22
61
+ rubygems_version: 3.5.6
61
62
  signing_key:
62
63
  specification_version: 4
63
64
  summary: CBETA Tools