cbeta 3.1.3 → 3.2.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 4bb29e86cf68e6503bb935c35f6499132f8106fc43e11de6a2e733df44901009
4
- data.tar.gz: 454dcfbffbc118c6071c4f7932fc8c18118bcd7bbc60e3797f7386920e4f7b77
3
+ metadata.gz: 982e4d10689bc1fa6f71d8c27e880cbfdcf248c5cbdb8994ad65bf6d2c5e5259
4
+ data.tar.gz: 7c6cfa28c5f48f1f84bb4ad2921e45ac1db1f2039626999a1fd620bbe91c9564
5
5
  SHA512:
6
- metadata.gz: d2ddd9c064712fe4bc74912d936479046080b97fe68b9884af164be26802efdc0a6455d7171e7f2854df5ce3106a8112441589dbcd4563388f44dbb3567a018d
7
- data.tar.gz: c5d210953964044077cac5b264fdf07fe0a3e0fe04a3b74c2dadf36ec28e0117e5b5292b7d6333075d621ca67894b078c63e0bf0667bf0ade266e2a27bba764b
6
+ metadata.gz: b1ac00d3090b9a19df3e3d25d4cf1c2dc4f50a37e6de3cbbdf4b09a9cd6cb68b1fa7700e825cad8352628f7a9bab8ed34a916abbd4d8380a68d5783e6aef8513
7
+ data.tar.gz: b1a6f2ebb218737938ae7bc35eb5bc648cc544c8ac964647054335bf15f218e17db7a337e8b529060eccd69094872df6725ebed18b0e7cb0681253f3eccdf23d
data/lib/cbeta/gaiji.rb CHANGED
@@ -69,6 +69,8 @@ class CBETA::Gaiji
69
69
  end
70
70
 
71
71
  g = @gaijis[gid]
72
+ return nil if g.nil?
73
+
72
74
  if gid.start_with? 'CB'
73
75
  cb_priority.each do |k|
74
76
  if k == 'PUA'
@@ -0,0 +1,300 @@
1
+ require 'nokogiri'
2
+
3
+ class CBETA::XMLDocument
4
+ PASS = %w(back graphic mulu rdg sic teiHeader)
5
+
6
+ def initialize(string_or_io)
7
+ @doc = Nokogiri::XML(string_or_io)
8
+ @doc.remove_namespaces!
9
+ @gaiji = CBETA::Gaiji.new
10
+ end
11
+
12
+ def to_text
13
+ @format = 'text'
14
+ @gaiji_norm = [true]
15
+ @next_line_buf = ''
16
+ traverse(@doc.root)
17
+ end
18
+
19
+ private
20
+
21
+ def e_anchor(e)
22
+ if e.has_attribute?('type')
23
+ if e['type'] == 'circle'
24
+ return '◎'
25
+ end
26
+ end
27
+
28
+ ''
29
+ end
30
+
31
+ def e_app(e)
32
+ traverse(e)
33
+ end
34
+
35
+ def e_body(e)
36
+ traverse(e)
37
+ end
38
+
39
+ def e_byline(e)
40
+ traverse(e) + "\n"
41
+ end
42
+
43
+ def e_caesura(e)
44
+ ' '
45
+ end
46
+
47
+ def e_cell(e)
48
+ traverse(e) + "\n"
49
+ end
50
+
51
+ def e_corr(e)
52
+ traverse(e)
53
+ end
54
+
55
+ def e_date(e)
56
+ traverse(e)
57
+ end
58
+
59
+ def e_dialog(e)
60
+ traverse(e)
61
+ end
62
+
63
+ def e_div(e)
64
+ traverse(e)
65
+ end
66
+
67
+ def e_docNumber(e)
68
+ traverse(e) + "\n"
69
+ end
70
+
71
+ def e_event(e)
72
+ traverse(e) + "\n"
73
+ end
74
+
75
+ def e_figure(e)
76
+ traverse(e) + "\n"
77
+ end
78
+
79
+ def e_foreign(e)
80
+ return '' if e.key?('place') and e['place'].include?('foot')
81
+ traverse(e)
82
+ end
83
+
84
+ def e_g(e)
85
+ if @gaiji_norm.last
86
+ cb_priority = %w(uni_char norm_uni_char norm_big5_char composition)
87
+ else
88
+ cb_priority = %w(uni_char composition)
89
+ end
90
+
91
+ gid = e['ref'][1..-1]
92
+ r = @gaiji.to_s(gid, cb_priority:)
93
+ abort "Line:#{__LINE__} 缺字處理失敗:#{gid}" if r.nil?
94
+ r
95
+ end
96
+
97
+ def e_head(e)
98
+ traverse(e) + "\n"
99
+ end
100
+
101
+ def e_hi(e)
102
+ traverse(e)
103
+ end
104
+
105
+ def e_item(e)
106
+ r = "\n"
107
+
108
+ list_level = e.xpath('ancestor::list').size
109
+ r << ' ' * (list_level - 1)
110
+ r << traverse(e)
111
+ if e.key? 'n'
112
+ r = e['n'] + r
113
+ end
114
+ r
115
+ end
116
+
117
+ def e_jhead(e)
118
+ traverse(e)
119
+ end
120
+
121
+ def e_juan(e)
122
+ traverse(e) + "\n"
123
+ end
124
+
125
+ def e_l(e)
126
+ r = traverse(e)
127
+ r << "\n" unless @lg_type == 'abnormal'
128
+ r
129
+ end
130
+
131
+ def e_lb(e)
132
+ return '' if e['type']=='old'
133
+ r = ''
134
+ r << "\n" if @p_type == 'pre'
135
+ unless @next_line_buf.empty?
136
+ r << @next_line_buf + "\n"
137
+ @next_line_buf = ''
138
+ end
139
+ r
140
+ end
141
+
142
+ def e_lem(e)
143
+ traverse(e)
144
+ end
145
+
146
+ def e_lg(e)
147
+ traverse(e)
148
+ end
149
+
150
+ def e_list(e)
151
+ r = traverse(e)
152
+ r << "\n\n" unless e.parent.name == 'item'
153
+ r
154
+ end
155
+
156
+ def e_milestone(e)
157
+ ''
158
+ end
159
+
160
+ def e_note(e)
161
+ if e.has_attribute?('place')
162
+ if "inline inline2 interlinear".include?(e['place'])
163
+ r = traverse(e)
164
+ return "(#{r})"
165
+ end
166
+ end
167
+ ''
168
+ end
169
+
170
+ def e_p(e)
171
+ @p_type = e['type']
172
+ r = traverse(e) + "\n"
173
+ @p_type = nil
174
+ r
175
+ end
176
+
177
+ def e_pb(e)
178
+ ''
179
+ end
180
+
181
+ def e_reg(e)
182
+ r = ''
183
+ choice = e.at_xpath('ancestor::choice')
184
+ r = traverse(e) if choice.nil?
185
+ r
186
+ end
187
+
188
+ def e_row(e)
189
+ traverse(e) + "\n"
190
+ end
191
+
192
+ def e_sg(e)
193
+ '(' + traverse(e) + ')'
194
+ end
195
+
196
+ # speech
197
+ def e_sp(e)
198
+ traverse(e)
199
+ end
200
+
201
+ def e_space(e)
202
+ return '' if e['quantity']=='0'
203
+ ' ' * e['quantity'].to_i
204
+ end
205
+
206
+ def e_t(e)
207
+ if e.has_attribute? 'place'
208
+ return '' if e['place'].include? 'foot'
209
+ end
210
+ r = traverse(e)
211
+
212
+ # 如果不是雙行對照
213
+ tt = e.at_xpath('ancestor::tt')
214
+ unless tt.nil?
215
+ return r if %w(app single-line).include? tt['type']
216
+ return r if tt['place'] == 'inline'
217
+ return r if tt['rend'] == 'normal'
218
+ end
219
+
220
+ # 處理雙行對照
221
+ i = e.xpath('../t').index(e)
222
+ case i
223
+ when 0
224
+ return r + ' '
225
+ when 1
226
+ @next_line_buf << r + ' '
227
+ return ''
228
+ else
229
+ return r
230
+ end
231
+ end
232
+
233
+ def e_table(e)
234
+ traverse(e) + "\n"
235
+ end
236
+
237
+ def e_term(e)
238
+ norm = true
239
+ if e['behaviour'] == "no-norm"
240
+ norm = false
241
+ end
242
+ @gaiji_norm.push norm
243
+ r = traverse(e)
244
+ @gaiji_norm.pop
245
+ r
246
+ end
247
+
248
+ def e_text(e)
249
+ norm = true
250
+ if e['behaviour'] == "no-norm"
251
+ norm = false
252
+ end
253
+ @gaiji_norm.push norm
254
+ r = traverse(e)
255
+ @gaiji_norm.pop
256
+ r
257
+ end
258
+
259
+ def e_tt(e)
260
+ traverse(e)
261
+ end
262
+
263
+ def e_unclear(e)
264
+ r = traverse(e)
265
+ r = '▆' if r.empty?
266
+ r
267
+ end
268
+
269
+
270
+ def handle_node(e)
271
+ return '' if e.comment?
272
+ return handle_text(e) if e.text?
273
+ return '' if PASS.include?(e.name)
274
+ send("e_#{e.name}", e)
275
+ end
276
+
277
+ def handle_text(e)
278
+ s = e.content().chomp
279
+ return '' if s.empty?
280
+ return '' if e.parent.name == 'app'
281
+
282
+ # cbeta xml 文字之間會有多餘的換行
283
+ r = s.gsub(/[\n\r]/, '')
284
+
285
+ if @format == 'html'
286
+ r = CGI.escapeHTML(r) # 把 & 轉為 &amp;
287
+ end
288
+
289
+ r
290
+ end
291
+
292
+ def traverse(e)
293
+ r = ''
294
+ e.children.each do |c|
295
+ r << handle_node(c)
296
+ end
297
+ r
298
+ end
299
+
300
+ end
data/lib/cbeta.rb CHANGED
@@ -234,3 +234,4 @@ require 'cbeta/p5a_to_simple_html'
234
234
  require 'cbeta/p5a_to_text'
235
235
  require 'cbeta/p5a_validator'
236
236
  require 'cbeta/html_to_text'
237
+ require 'cbeta/xml_document'
@@ -174568,5 +174568,17 @@
174568
174568
  "composition": "[弓*并]",
174569
174569
  "moe_variant_id": "C03427",
174570
174570
  "pua": "U+F87C6"
174571
+ },
174572
+ "CB34759": {
174573
+ "composition": "[△@▲]",
174574
+ "pua": "U+F87C7"
174575
+ },
174576
+ "CB34760": {
174577
+ "unicode": "20B4F",
174578
+ "uni_char": "𠭏",
174579
+ "composition": "[山/〦/中/又]",
174580
+ "norm_big5_char": "事",
174581
+ "moe_variant_id": "A00048-003",
174582
+ "pua": "U+F87C8"
174571
174583
  }
174572
174584
  }
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 3.1.3
4
+ version: 3.2.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2024-02-06 00:00:00.000000000 Z
11
+ date: 2024-03-09 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -31,6 +31,7 @@ files:
31
31
  - lib/cbeta/p5a_to_simple_html.rb
32
32
  - lib/cbeta/p5a_to_text.rb
33
33
  - lib/cbeta/p5a_validator.rb
34
+ - lib/cbeta/xml_document.rb
34
35
  - lib/data/canons.csv
35
36
  - lib/data/categories.json
36
37
  - lib/data/cbeta_gaiji.json
@@ -57,7 +58,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
57
58
  - !ruby/object:Gem::Version
58
59
  version: '0'
59
60
  requirements: []
60
- rubygems_version: 3.4.22
61
+ rubygems_version: 3.5.6
61
62
  signing_key:
62
63
  specification_version: 4
63
64
  summary: CBETA Tools