cbeta 3.1.3 → 3.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4bb29e86cf68e6503bb935c35f6499132f8106fc43e11de6a2e733df44901009
4
- data.tar.gz: 454dcfbffbc118c6071c4f7932fc8c18118bcd7bbc60e3797f7386920e4f7b77
3
+ metadata.gz: 982e4d10689bc1fa6f71d8c27e880cbfdcf248c5cbdb8994ad65bf6d2c5e5259
4
+ data.tar.gz: 7c6cfa28c5f48f1f84bb4ad2921e45ac1db1f2039626999a1fd620bbe91c9564
5
5
  SHA512:
6
- metadata.gz: d2ddd9c064712fe4bc74912d936479046080b97fe68b9884af164be26802efdc0a6455d7171e7f2854df5ce3106a8112441589dbcd4563388f44dbb3567a018d
7
- data.tar.gz: c5d210953964044077cac5b264fdf07fe0a3e0fe04a3b74c2dadf36ec28e0117e5b5292b7d6333075d621ca67894b078c63e0bf0667bf0ade266e2a27bba764b
6
+ metadata.gz: b1ac00d3090b9a19df3e3d25d4cf1c2dc4f50a37e6de3cbbdf4b09a9cd6cb68b1fa7700e825cad8352628f7a9bab8ed34a916abbd4d8380a68d5783e6aef8513
7
+ data.tar.gz: b1a6f2ebb218737938ae7bc35eb5bc648cc544c8ac964647054335bf15f218e17db7a337e8b529060eccd69094872df6725ebed18b0e7cb0681253f3eccdf23d
data/lib/cbeta/gaiji.rb CHANGED
@@ -69,6 +69,8 @@ class CBETA::Gaiji
69
69
  end
70
70
 
71
71
  g = @gaijis[gid]
72
+ return nil if g.nil?
73
+
72
74
  if gid.start_with? 'CB'
73
75
  cb_priority.each do |k|
74
76
  if k == 'PUA'
@@ -0,0 +1,300 @@
1
+ require 'nokogiri'
2
+
3
+ class CBETA::XMLDocument
4
+ PASS = %w(back graphic mulu rdg sic teiHeader)
5
+
6
+ def initialize(string_or_io)
7
+ @doc = Nokogiri::XML(string_or_io)
8
+ @doc.remove_namespaces!
9
+ @gaiji = CBETA::Gaiji.new
10
+ end
11
+
12
+ def to_text
13
+ @format = 'text'
14
+ @gaiji_norm = [true]
15
+ @next_line_buf = ''
16
+ traverse(@doc.root)
17
+ end
18
+
19
+ private
20
+
21
+ def e_anchor(e)
22
+ if e.has_attribute?('type')
23
+ if e['type'] == 'circle'
24
+ return '◎'
25
+ end
26
+ end
27
+
28
+ ''
29
+ end
30
+
31
+ def e_app(e)
32
+ traverse(e)
33
+ end
34
+
35
+ def e_body(e)
36
+ traverse(e)
37
+ end
38
+
39
+ def e_byline(e)
40
+ traverse(e) + "\n"
41
+ end
42
+
43
+ def e_caesura(e)
44
+ ' '
45
+ end
46
+
47
+ def e_cell(e)
48
+ traverse(e) + "\n"
49
+ end
50
+
51
+ def e_corr(e)
52
+ traverse(e)
53
+ end
54
+
55
+ def e_date(e)
56
+ traverse(e)
57
+ end
58
+
59
+ def e_dialog(e)
60
+ traverse(e)
61
+ end
62
+
63
+ def e_div(e)
64
+ traverse(e)
65
+ end
66
+
67
+ def e_docNumber(e)
68
+ traverse(e) + "\n"
69
+ end
70
+
71
+ def e_event(e)
72
+ traverse(e) + "\n"
73
+ end
74
+
75
+ def e_figure(e)
76
+ traverse(e) + "\n"
77
+ end
78
+
79
+ def e_foreign(e)
80
+ return '' if e.key?('place') and e['place'].include?('foot')
81
+ traverse(e)
82
+ end
83
+
84
+ def e_g(e)
85
+ if @gaiji_norm.last
86
+ cb_priority = %w(uni_char norm_uni_char norm_big5_char composition)
87
+ else
88
+ cb_priority = %w(uni_char composition)
89
+ end
90
+
91
+ gid = e['ref'][1..-1]
92
+ r = @gaiji.to_s(gid, cb_priority:)
93
+ abort "Line:#{__LINE__} 缺字處理失敗:#{gid}" if r.nil?
94
+ r
95
+ end
96
+
97
+ def e_head(e)
98
+ traverse(e) + "\n"
99
+ end
100
+
101
+ def e_hi(e)
102
+ traverse(e)
103
+ end
104
+
105
+ def e_item(e)
106
+ r = "\n"
107
+
108
+ list_level = e.xpath('ancestor::list').size
109
+ r << ' ' * (list_level - 1)
110
+ r << traverse(e)
111
+ if e.key? 'n'
112
+ r = e['n'] + r
113
+ end
114
+ r
115
+ end
116
+
117
+ def e_jhead(e)
118
+ traverse(e)
119
+ end
120
+
121
+ def e_juan(e)
122
+ traverse(e) + "\n"
123
+ end
124
+
125
+ def e_l(e)
126
+ r = traverse(e)
127
+ r << "\n" unless @lg_type == 'abnormal'
128
+ r
129
+ end
130
+
131
+ def e_lb(e)
132
+ return '' if e['type']=='old'
133
+ r = ''
134
+ r << "\n" if @p_type == 'pre'
135
+ unless @next_line_buf.empty?
136
+ r << @next_line_buf + "\n"
137
+ @next_line_buf = ''
138
+ end
139
+ r
140
+ end
141
+
142
+ def e_lem(e)
143
+ traverse(e)
144
+ end
145
+
146
+ def e_lg(e)
147
+ traverse(e)
148
+ end
149
+
150
+ def e_list(e)
151
+ r = traverse(e)
152
+ r << "\n\n" unless e.parent.name == 'item'
153
+ r
154
+ end
155
+
156
+ def e_milestone(e)
157
+ ''
158
+ end
159
+
160
+ def e_note(e)
161
+ if e.has_attribute?('place')
162
+ if "inline inline2 interlinear".include?(e['place'])
163
+ r = traverse(e)
164
+ return "(#{r})"
165
+ end
166
+ end
167
+ ''
168
+ end
169
+
170
+ def e_p(e)
171
+ @p_type = e['type']
172
+ r = traverse(e) + "\n"
173
+ @p_type = nil
174
+ r
175
+ end
176
+
177
+ def e_pb(e)
178
+ ''
179
+ end
180
+
181
+ def e_reg(e)
182
+ r = ''
183
+ choice = e.at_xpath('ancestor::choice')
184
+ r = traverse(e) if choice.nil?
185
+ r
186
+ end
187
+
188
+ def e_row(e)
189
+ traverse(e) + "\n"
190
+ end
191
+
192
+ def e_sg(e)
193
+ '(' + traverse(e) + ')'
194
+ end
195
+
196
+ # speech
197
+ def e_sp(e)
198
+ traverse(e)
199
+ end
200
+
201
+ def e_space(e)
202
+ return '' if e['quantity']=='0'
203
+ ' ' * e['quantity'].to_i
204
+ end
205
+
206
+ def e_t(e)
207
+ if e.has_attribute? 'place'
208
+ return '' if e['place'].include? 'foot'
209
+ end
210
+ r = traverse(e)
211
+
212
+ # 如果不是雙行對照
213
+ tt = e.at_xpath('ancestor::tt')
214
+ unless tt.nil?
215
+ return r if %w(app single-line).include? tt['type']
216
+ return r if tt['place'] == 'inline'
217
+ return r if tt['rend'] == 'normal'
218
+ end
219
+
220
+ # 處理雙行對照
221
+ i = e.xpath('../t').index(e)
222
+ case i
223
+ when 0
224
+ return r + ' '
225
+ when 1
226
+ @next_line_buf << r + ' '
227
+ return ''
228
+ else
229
+ return r
230
+ end
231
+ end
232
+
233
+ def e_table(e)
234
+ traverse(e) + "\n"
235
+ end
236
+
237
+ def e_term(e)
238
+ norm = true
239
+ if e['behaviour'] == "no-norm"
240
+ norm = false
241
+ end
242
+ @gaiji_norm.push norm
243
+ r = traverse(e)
244
+ @gaiji_norm.pop
245
+ r
246
+ end
247
+
248
+ def e_text(e)
249
+ norm = true
250
+ if e['behaviour'] == "no-norm"
251
+ norm = false
252
+ end
253
+ @gaiji_norm.push norm
254
+ r = traverse(e)
255
+ @gaiji_norm.pop
256
+ r
257
+ end
258
+
259
+ def e_tt(e)
260
+ traverse(e)
261
+ end
262
+
263
+ def e_unclear(e)
264
+ r = traverse(e)
265
+ r = '▆' if r.empty?
266
+ r
267
+ end
268
+
269
+
270
+ def handle_node(e)
271
+ return '' if e.comment?
272
+ return handle_text(e) if e.text?
273
+ return '' if PASS.include?(e.name)
274
+ send("e_#{e.name}", e)
275
+ end
276
+
277
+ def handle_text(e)
278
+ s = e.content().chomp
279
+ return '' if s.empty?
280
+ return '' if e.parent.name == 'app'
281
+
282
+ # cbeta xml 文字之間會有多餘的換行
283
+ r = s.gsub(/[\n\r]/, '')
284
+
285
+ if @format == 'html'
286
+ r = CGI.escapeHTML(r) # 把 & 轉為 &amp;
287
+ end
288
+
289
+ r
290
+ end
291
+
292
+ def traverse(e)
293
+ r = ''
294
+ e.children.each do |c|
295
+ r << handle_node(c)
296
+ end
297
+ r
298
+ end
299
+
300
+ end
data/lib/cbeta.rb CHANGED
@@ -234,3 +234,4 @@ require 'cbeta/p5a_to_simple_html'
234
234
  require 'cbeta/p5a_to_text'
235
235
  require 'cbeta/p5a_validator'
236
236
  require 'cbeta/html_to_text'
237
+ require 'cbeta/xml_document'
@@ -174568,5 +174568,17 @@
174568
174568
  "composition": "[弓*并]",
174569
174569
  "moe_variant_id": "C03427",
174570
174570
  "pua": "U+F87C6"
174571
+ },
174572
+ "CB34759": {
174573
+ "composition": "[△@▲]",
174574
+ "pua": "U+F87C7"
174575
+ },
174576
+ "CB34760": {
174577
+ "unicode": "20B4F",
174578
+ "uni_char": "𠭏",
174579
+ "composition": "[山/〦/中/又]",
174580
+ "norm_big5_char": "事",
174581
+ "moe_variant_id": "A00048-003",
174582
+ "pua": "U+F87C8"
174571
174583
  }
174572
174584
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.3
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-06 00:00:00.000000000 Z
11
+ date: 2024-03-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -31,6 +31,7 @@ files:
31
31
  - lib/cbeta/p5a_to_simple_html.rb
32
32
  - lib/cbeta/p5a_to_text.rb
33
33
  - lib/cbeta/p5a_validator.rb
34
+ - lib/cbeta/xml_document.rb
34
35
  - lib/data/canons.csv
35
36
  - lib/data/categories.json
36
37
  - lib/data/cbeta_gaiji.json
@@ -57,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  - !ruby/object:Gem::Version
58
59
  version: '0'
59
60
  requirements: []
60
- rubygems_version: 3.4.22
61
+ rubygems_version: 3.5.6
61
62
  signing_key:
62
63
  specification_version: 4
63
64
  summary: CBETA Tools