cbeta 2.2.6 → 2.2.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 388e5affee54e7c33d2afd0c1451330cc719898a
4
- data.tar.gz: 11293a7c5ad31752a8ad2ee189bcf515c10072f5
3
+ metadata.gz: a3f8edd88730de817c9ecec347b516f552cd148a
4
+ data.tar.gz: 3f9b2eb3e8010094ce3ce47b84e261319d8ccac5
5
5
  SHA512:
6
- metadata.gz: 0a24c14869c9d85cad5dc1ce61237f5e2cd97b7e382fceaf0df68a732604f3c06a13815019a9fcb75da68abd4c5a4b0ea1dbccc1c538d1f1b95d27cc62b5d871
7
- data.tar.gz: f6b0d1a6f73a9cebeddf5e4809c0d382edaa88a3ba0dab11c9b6c9355abc2d38157acab7cc4caf23f7c2c90bc87523852ddbd525269feb5a41a10fb3602fe436
6
+ metadata.gz: 7e05cadab483733f7a5c531966fca41fcd453e1da9ae44f64433bf04e72d3ae9b129ea7b4dc0f94c3e7f304d9d5755ec192a572cd463a6528d3088ed4bcab5f0
7
+ data.tar.gz: 15f31074085847f332c3633f7de85706167c059709ae48a889081b9ebbe26f9e32cf626a07c8492cb29c6aadd2089b11e2dae16a66406d8c44038961047a55eb
@@ -6,8 +6,41 @@
6
6
  require 'csv'
7
7
 
8
8
  class CBETA
9
+ CANON = 'DA|GA|GB|[A-Z]'
9
10
  DATA = File.join(File.dirname(__FILE__), 'data')
10
11
  PUNCS = '.[]。,、?「」『』《》<>〈〉〔〕[]【】〖〗'
12
+
13
+ # 由 行首資訊 取得 藏經 ID
14
+ # @param linehead[String] 行首資訊, 例如 "T01n0001_p0001a01" 或 "GA009n0008_p0003a01"
15
+ # @return [String] 藏經 ID,例如 "T" 或 "GA"
16
+ def self.get_canon_id_from_linehead(linehead)
17
+ linehead.sub(/^(#{CANON}).*$/, '\1')
18
+ end
19
+
20
+ # 由 冊號 取得 藏經 ID
21
+ # @param vol[String] 冊號, 例如 "T01" 或 "GA009"
22
+ # @return [String] 藏經 ID,例如 "T" 或 "GA"
23
+ def self.get_canon_from_vol(vol)
24
+ vol.sub(/^(#{CANON}).*$/, '\1')
25
+ end
26
+
27
+ # 由 行首資訊 取得 XML檔相對路徑
28
+ # @param linehead[String] 行首資訊, 例如 "GA009n0008_p0003a01"
29
+ # @return [String] XML檔相對路徑,例如 "GA/GA009/GA009n0008.xml"
30
+ def self.linehead_to_xml_file_path(linehead)
31
+ if m = linehead.match(/^(?<work>(?<vol>(?<canon>#{CANON})\d+)n\d+[a-zA-Z]?).*$/)
32
+ File.join(m[:canon], m[:vol], m[:work]+'.xml')
33
+ else
34
+ nil
35
+ end
36
+ end
37
+
38
+ # 由 XML檔主檔名 取得 典籍編號
39
+ # @param fn[String] 檔名, 例如 "T01n0001" 或 "GA009n0008"
40
+ # @return [String] 典籍編號,例如 "T0001" 或 "GA0008"
41
+ def self.get_work_id_from_file_basename(fn)
42
+ fn.sub(/^(#{CANON})\d{2,3}n(.*)$/, '\1\2')
43
+ end
11
44
 
12
45
  # 將行首資訊轉為引用格式
13
46
  #
@@ -18,7 +51,7 @@ class CBETA
18
51
  # CBETA.linehead_to_s('T85n2838_p1291a03')
19
52
  # # return "T85, no. 2838, p. 1291, a03"
20
53
  def self.linehead_to_s(linehead)
21
- linehead.match(/^([A-Z]\d+)n(.*)_p(\d+)([a-z]\d+)$/) {
54
+ linehead.match(/^((?:#{CANON})\d+)n(.*)_p(\d+)([a-z]\d+)$/) {
22
55
  return "#{$1}, no. #{$2}, p. #{$3}, #{$4}"
23
56
  }
24
57
  nil
@@ -68,12 +101,12 @@ class CBETA
68
101
  s = File.read(fn)
69
102
  @categories = JSON.parse(s)
70
103
  end
71
-
104
+
72
105
  # @param id [String] 藏經 ID, 例如大正藏的 ID 是 "T"
73
106
  # @return [String] 藏經短名,例如 "大正藏"
74
- def get_canon_nickname(id)
75
- return nil unless @canon_nickname.key? id
76
- @canon_nickname[id]
107
+ def get_canon_nickname(id)
108
+ return nil unless @canon_nickname.key? id
109
+ @canon_nickname[id]
77
110
  end
78
111
 
79
112
  # 取得藏經略符
@@ -75,7 +75,6 @@ class CBETA::Gaiji
75
75
 
76
76
  def char_to_hash(char)
77
77
  r = {}
78
- id = char['id']
79
78
  field_mapping = {
80
79
  'big5' => 'big5',
81
80
  'Character in the Siddham font' => 'char_in_siddham_font',
@@ -103,7 +103,7 @@ class CBETA::HTMLToText
103
103
 
104
104
  def prepare_folder()
105
105
  folder = File.join(@out_root, @corpus, @vol)
106
- FileUtils.remove_dir(folder, force=true)
106
+ FileUtils.remove_dir(folder, true)
107
107
  FileUtils.mkdir_p(folder)
108
108
  folder
109
109
  end
@@ -540,10 +540,7 @@ class CBETA::P5aToHTML
540
540
  text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
541
541
 
542
542
  juans = text.split(/(<juan \d+>)/)
543
- open = false
544
- fo = nil
545
543
  juan_no = nil
546
- fn = ''
547
544
  buf = ''
548
545
  # 一卷一檔
549
546
  juans.each { |j|
@@ -618,9 +615,9 @@ class CBETA::P5aToHTML
618
615
  abort "未處理底本" if @orig.nil?
619
616
 
620
617
  @vol = vol
621
- @series = vol[0]
618
+ @series = CBETA.get_canon_from_vol(vol)
622
619
  @out_folder = File.join(@out_root, @series, vol)
623
- FileUtils.remove_dir(@out_folder, force=true)
620
+ FileUtils.remove_dir(@out_folder, true)
624
621
  FileUtils::mkdir_p @out_folder
625
622
 
626
623
  source = File.join(@xml_root, @series, vol)
@@ -631,7 +628,7 @@ class CBETA::P5aToHTML
631
628
 
632
629
  def handle_vols(v1, v2)
633
630
  puts "convert volumns: #{v1}..#{v2}"
634
- @series = v1[0]
631
+ @series = CBETA.get_canon_from_vol(v1)
635
632
  folder = File.join(@xml_root, @series)
636
633
  Dir.foreach(folder) { |vol|
637
634
  next if vol < v1
@@ -677,16 +674,11 @@ class CBETA::P5aToHTML
677
674
  end
678
675
 
679
676
  def linehead_exist_in_cbeta(s)
680
- @xml_root
681
- corpus = s[0]
682
- if s.match(/^(([A-Z]\d+)n\d+[a-zA-Z]?).*$/)
683
- sutra = $1
684
- vol = $2
685
- path = File.join(@xml_root, corpus, vol, sutra+'.xml')
686
- return File.exist? path
687
- else
688
- return false
689
- end
677
+ fn = CBETA.linehead_to_xml_file_path(s)
678
+ return false if fn.nil?
679
+
680
+ path = File.join(@xml_root, fn)
681
+ File.exist? path
690
682
  end
691
683
 
692
684
  def open_xml(fn)
@@ -106,10 +106,7 @@ class CBETA::P5aToHTMLForEveryEdition
106
106
  text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
107
107
 
108
108
  juans = text.split(/(<juan \d+>)/)
109
- open = false
110
- fo = nil
111
109
  juan_no = nil
112
- fn = ''
113
110
  buf = ''
114
111
  # 一卷一檔
115
112
  juans.each { |j|
@@ -122,8 +119,7 @@ class CBETA::P5aToHTMLForEveryEdition
122
119
  buf = ''
123
120
  end
124
121
  }
125
- end
126
-
122
+ end
127
123
 
128
124
  def convert_vol(vol)
129
125
  puts "convert volumn: #{vol}"
@@ -414,12 +410,12 @@ class CBETA::P5aToHTMLForEveryEdition
414
410
  def handle_lem(e)
415
411
  r = ''
416
412
  content = traverse(e)
417
- w = e['wit']
418
- if w.include? 'CBETA' and not w.include? @orig
413
+ wit = e['wit']
414
+ if wit.include? 'CBETA' and not wit.include? @orig
419
415
  n = @notes_dila[@juan].size + 1
420
416
  r = "<a class='noteAnchor dila' href='#dila_note#{n}'></a>"
421
417
  r += "<span class='cbeta'>%s</span>" % content
422
- r = "<r w='#{w}' l='#{@lb}'>#{r}</r>"
418
+ r = "<r w='#{wit}' l='#{@lb}'>#{r}</r>"
423
419
 
424
420
  note = lem_note_cf(e)
425
421
  note += lem_note_rdg(e)
@@ -625,7 +621,6 @@ class CBETA::P5aToHTMLForEveryEdition
625
621
 
626
622
  def handle_rdg(e)
627
623
  r = traverse(e)
628
- w = e['wit'].scan(/【.*?】/)
629
624
  "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
630
625
  end
631
626
 
@@ -788,16 +783,11 @@ class CBETA::P5aToHTMLForEveryEdition
788
783
  end
789
784
 
790
785
  def linehead_exist_in_cbeta(s)
791
- @xml_root
792
- corpus = s[0]
793
- if s.match(/^(([A-Z]\d+)n\d+[a-zA-Z]?).*$/)
794
- sutra = $1
795
- vol = $2
796
- path = File.join(@xml_root, corpus, vol, sutra+'.xml')
797
- return File.exist? path
798
- else
799
- return false
800
- end
786
+ fn = CBETA.linehead_to_xml_file_path(s)
787
+ return false if fn.nil?
788
+
789
+ path = File.join(@xml_root, fn)
790
+ File.exist? path
801
791
  end
802
792
 
803
793
  def open_xml(fn)
@@ -872,10 +862,9 @@ class CBETA::P5aToHTMLForEveryEdition
872
862
  else
873
863
  work = @sutra_no.sub(/^([A-Z]{1,2})\d{2,3}n(.*)$/, '\1\2')
874
864
  end
875
- canon = work[0]
876
865
  juan = "%03d" % juan_no
877
866
  folder = File.join(@out_folder, work, juan)
878
- FileUtils.remove_dir(folder, force=true)
867
+ FileUtils.remove_dir(folder, true)
879
868
  FileUtils.makedirs folder
880
869
 
881
870
  @editions.each do |ed|
@@ -198,8 +198,6 @@ class CBETA::P5aToHTMLForPDF
198
198
  end
199
199
 
200
200
  def handle_anchor(e)
201
- id = e['id']
202
-
203
201
  if e.has_attribute?('type')
204
202
  if e['type'] == 'circle'
205
203
  return '◎'
@@ -496,7 +494,6 @@ class CBETA::P5aToHTMLForPDF
496
494
  end
497
495
 
498
496
  def handle_note(e)
499
- n = e['n']
500
497
  if e.has_attribute?('type')
501
498
  t = e['type']
502
499
  if %w(equivalent orig orig_biao orig_ke mod rest).include? t
@@ -638,13 +635,9 @@ class CBETA::P5aToHTMLForPDF
638
635
  abort "未處理底本" if @orig.nil?
639
636
 
640
637
  @vol = vol
641
- if vol.start_with? 'DA'
642
- @series = 'DA'
643
- else
644
- @series = vol[0]
645
- end
638
+ @series = CBETA.get_canon_from_vol(vol)
646
639
  @out_folder = File.join(@out_root, @series, vol)
647
- FileUtils.remove_dir(@out_folder, force=true)
640
+ FileUtils.remove_dir(@out_folder, true)
648
641
  FileUtils::mkdir_p @out_folder
649
642
 
650
643
  source = File.join(@xml_root, @series, vol)
@@ -657,7 +650,7 @@ class CBETA::P5aToHTMLForPDF
657
650
 
658
651
  def handle_vols(v1, v2)
659
652
  puts "convert volumns: #{v1}..#{v2}"
660
- @series = v1[0]
653
+ @series = CBETA.get_canon_from_vol(v1)
661
654
  folder = File.join(@xml_root, @series)
662
655
  Dir.foreach(folder) { |vol|
663
656
  next if vol < v1
@@ -119,7 +119,6 @@ class CBETA::P5aToSimpleHTML
119
119
  gid = e['ref'][1..-1]
120
120
  g = @gaijis[gid]
121
121
  abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
122
- zzs = g['zzs']
123
122
 
124
123
  if gid.start_with?('SD') # 悉曇字
125
124
  case gid
@@ -257,10 +256,7 @@ class CBETA::P5aToSimpleHTML
257
256
  FileUtils.makedirs @out_sutra
258
257
 
259
258
  juans = text.split(/(<juan \d+>)/)
260
- open = false
261
- fo = nil
262
259
  juan_no = nil
263
- fn = ''
264
260
  buf = ''
265
261
  # 一卷一檔
266
262
  juans.each { |j|
@@ -324,9 +320,9 @@ class CBETA::P5aToSimpleHTML
324
320
  @orig_short = @orig.sub(/^【(.*)】$/, '\1')
325
321
 
326
322
  @vol = vol
327
- @series = vol[0]
323
+ @series = CBETA.get_canon_from_vol(vol)
328
324
  @out_vol = File.join(@output_root, @series, vol)
329
- FileUtils.remove_dir(@out_vol, force=true)
325
+ FileUtils.remove_dir(@out_vol, true)
330
326
  FileUtils.makedirs @out_vol
331
327
 
332
328
  source = File.join(@xml_root, @series, vol)
@@ -337,7 +333,7 @@ class CBETA::P5aToSimpleHTML
337
333
 
338
334
  def handle_vols(v1, v2)
339
335
  puts "convert volumns: #{v1}..#{v2}"
340
- @series = v1[0]
336
+ @series = CBETA.get_canon_from_vol(v1)
341
337
  folder = File.join(@xml_root, @series)
342
338
  Dir.foreach(folder) { |vol|
343
339
  next if vol < v1
@@ -70,8 +70,8 @@ class CBETA::P5aToText
70
70
  return convert_all if target.nil?
71
71
 
72
72
  arg = target.upcase
73
- if arg.size == 1
74
- handle_collection(arg)
73
+ if arg.size <= 2
74
+ handle_canon(arg)
75
75
  else
76
76
  if arg.include? '..'
77
77
  arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
@@ -124,7 +124,7 @@ class CBETA::P5aToText
124
124
  def convert_all
125
125
  Dir.entries(@xml_root).sort.each do |c|
126
126
  next unless c.match(/^[A-Z]$/)
127
- handle_collection(c)
127
+ handle_canon(c)
128
128
  end
129
129
  end
130
130
 
@@ -138,7 +138,7 @@ class CBETA::P5aToText
138
138
  r
139
139
  end
140
140
 
141
- def handle_anchor(e)
141
+ def e_anchor(e)
142
142
  if e.has_attribute?('type')
143
143
  if e['type'] == 'circle'
144
144
  return '◎'
@@ -148,53 +148,43 @@ class CBETA::P5aToText
148
148
  ''
149
149
  end
150
150
 
151
- def handle_app(e)
151
+ def e_app(e)
152
152
  traverse(e)
153
153
  end
154
154
 
155
- def handle_byline(e)
155
+ def e_byline(e)
156
156
  r = traverse(e)
157
157
  r += @settings[:format]=='app' ? "\t" : "\n"
158
158
  r
159
159
  end
160
160
 
161
- def handle_cell(e)
161
+ def e_cell(e)
162
162
  r = traverse(e)
163
163
  r += @settings[:format]=='app' ? "\t" : "\n"
164
164
  r
165
165
  end
166
166
 
167
- def handle_collection(c)
168
- @series = c
169
- puts 'handle_collection ' + c
170
- folder = File.join(@xml_root, @series)
171
- Dir.entries(folder).sort.each do |vol|
172
- next if vol.start_with? '.'
173
- handle_vol(vol)
174
- end
175
- end
176
-
177
- def handle_corr(e)
167
+ def e_corr(e)
178
168
  "<r w='【CBETA】'>%s</r>" % traverse(e)
179
169
  end
180
170
 
181
- def handle_div(e)
171
+ def e_div(e)
182
172
  traverse(e)
183
173
  end
184
174
 
185
- def handle_docNumber(e)
175
+ def e_docNumber(e)
186
176
  r = traverse(e)
187
177
  r += @settings[:format] == 'app' ? "\t" : "\n"
188
178
  r
189
179
  end
190
180
 
191
- def handle_figure(e)
181
+ def e_figure(e)
192
182
  r = traverse(e)
193
183
  r += @settings[:format] == 'app' ? "\t" : "\n"
194
184
  r
195
185
  end
196
186
 
197
- def handle_g(e)
187
+ def e_g(e)
198
188
  # if 悉曇字、蘭札體
199
189
  # 使用 Unicode PUA
200
190
  # else if 有 <mapping type="unicode">
@@ -215,7 +205,6 @@ class CBETA::P5aToText
215
205
 
216
206
  g = @gaijis[gid]
217
207
  abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
218
- zzs = g['zzs']
219
208
 
220
209
  if gid.start_with?('SD') # 悉曇字
221
210
  case gid
@@ -240,28 +229,28 @@ class CBETA::P5aToText
240
229
  [0xf0000 + gid[2..-1].to_i].pack 'U'
241
230
  end
242
231
 
243
- def handle_graphic(e)
232
+ def e_graphic(e)
244
233
  ''
245
234
  end
246
235
 
247
- def handle_head(e)
236
+ def e_head(e)
248
237
  r = traverse(e)
249
238
  r += @settings[:format] == 'app' ? "\t" : "\n"
250
239
  r
251
240
  end
252
241
 
253
- def handle_item(e)
242
+ def e_item(e)
254
243
  r = traverse(e)
255
244
  r += @settings[:format] == 'app' ? "\t" : "\n"
256
245
  end
257
246
 
258
- def handle_juan(e)
247
+ def e_juan(e)
259
248
  r = traverse(e)
260
249
  r += @settings[:format] == 'app' ? "\t" : "\n"
261
250
  r
262
251
  end
263
252
 
264
- def handle_l(e)
253
+ def e_l(e)
265
254
  r = traverse(e)
266
255
  if @settings[:format] == 'app'
267
256
  r += "\t"
@@ -271,7 +260,7 @@ class CBETA::P5aToText
271
260
  r
272
261
  end
273
262
 
274
- def handle_lb(e)
263
+ def e_lb(e)
275
264
  r = ''
276
265
  if @settings[:format] == 'app'
277
266
  r += "\n#{e['n']}║"
@@ -283,7 +272,7 @@ class CBETA::P5aToText
283
272
  r
284
273
  end
285
274
 
286
- def handle_lem(e)
275
+ def e_lem(e)
287
276
  # 沒有 rdg 的版本,用字同 lem
288
277
  editions = Set.new @editions
289
278
  e.xpath('./following-sibling::rdg').each do |rdg|
@@ -296,17 +285,17 @@ class CBETA::P5aToText
296
285
  "<r w='#{w}'>%s</r>" % traverse(e)
297
286
  end
298
287
 
299
- def handle_lg(e)
288
+ def e_lg(e)
300
289
  traverse(e)
301
290
  end
302
291
 
303
- def handle_list(e)
292
+ def e_list(e)
304
293
  r = ''
305
294
  r += "\n" unless @settings[:format] == 'app'
306
295
  r + traverse(e)
307
296
  end
308
297
 
309
- def handle_milestone(e)
298
+ def e_milestone(e)
310
299
  r = ''
311
300
  if e['unit'] == 'juan'
312
301
  @juan = e['n'].to_i
@@ -315,55 +304,11 @@ class CBETA::P5aToText
315
304
  r
316
305
  end
317
306
 
318
- def handle_mulu(e)
307
+ def e_mulu(e)
319
308
  ''
320
309
  end
321
310
 
322
- def handle_node(e)
323
- return '' if e.comment?
324
- return handle_text(e) if e.text?
325
- return '' if PASS.include?(e.name)
326
- r = case e.name
327
- when 'anchor' then handle_anchor(e)
328
- when 'app' then handle_app(e)
329
- when 'back' then ''
330
- when 'byline' then handle_byline(e)
331
- when 'cell' then handle_cell(e)
332
- when 'corr' then handle_corr(e)
333
- when 'div' then handle_div(e)
334
- when 'docNumber' then handle_docNumber(e)
335
- when 'figure' then handle_figure(e)
336
- when 'foreign' then ''
337
- when 'g' then handle_g(e)
338
- when 'graphic' then handle_graphic(e)
339
- when 'head' then handle_head(e)
340
- when 'item' then handle_item(e)
341
- when 'juan' then handle_juan(e)
342
- when 'l' then handle_l(e)
343
- when 'lb' then handle_lb(e)
344
- when 'lem' then handle_lem(e)
345
- when 'lg' then handle_lg(e)
346
- when 'list' then handle_list(e)
347
- when 'mulu' then handle_mulu(e)
348
- when 'note' then handle_note(e)
349
- when 'milestone' then handle_milestone(e)
350
- when 'p' then handle_p(e)
351
- when 'rdg' then handle_rdg(e)
352
- when 'reg' then ''
353
- when 'row' then handle_row(e)
354
- when 'sic' then handle_sic(e)
355
- when 'sg' then handle_sg(e)
356
- when 'tt' then handle_tt(e)
357
- when 't' then handle_t(e)
358
- when 'table' then handle_table(e)
359
- when 'teiHeader' then ''
360
- when 'unclear' then '▆'
361
- else traverse(e)
362
- end
363
- r
364
- end
365
-
366
- def handle_note(e)
311
+ def e_note(e)
367
312
  if e.has_attribute?('place') && e['place']=='inline'
368
313
  r = traverse(e)
369
314
  return "(#{r})"
@@ -371,28 +316,108 @@ class CBETA::P5aToText
371
316
  ''
372
317
  end
373
318
 
374
- def handle_p(e)
319
+ def e_p(e)
375
320
  r = traverse(e)
376
321
  r += @settings[:format] == 'app' ? "\t" : "\n"
377
322
  r
378
323
  end
379
324
 
380
- def handle_rdg(e)
325
+ def e_rdg(e)
381
326
  "<r w='#{e['wit']}'>%s</r>" % traverse(e)
382
327
  end
383
328
 
384
- def handle_row(e)
329
+ def e_row(e)
385
330
  traverse(e)
386
331
  end
387
332
 
388
- def handle_sg(e)
333
+ def e_sg(e)
389
334
  '(' + traverse(e) + ')'
390
335
  end
391
336
 
392
- def handle_sic(e)
337
+ def e_sic(e)
393
338
  "<r w='#{@orig}'>" + traverse(e) + "</r>"
394
339
  end
395
340
 
341
+ def e_t(e)
342
+ if e.has_attribute? 'place'
343
+ return '' if e['place'].include? 'foot'
344
+ end
345
+ r = traverse(e)
346
+
347
+ # 不是雙行對照
348
+ return r if @tt_type == 'app'
349
+
350
+ # 處理雙行對照
351
+ i = e.xpath('../t').index(e)
352
+ case i
353
+ when 0
354
+ return r + ' '
355
+ when 1
356
+ @next_line_buf += r + ' '
357
+ return ''
358
+ else
359
+ return r
360
+ end
361
+ end
362
+
363
+ def e_table(e)
364
+ traverse(e)
365
+ end
366
+
367
+ def handle_canon(c)
368
+ @canon = c
369
+ puts 'handle_canon ' + c
370
+ folder = File.join(@xml_root, @canon)
371
+ Dir.entries(folder).sort.each do |vol|
372
+ next if vol.start_with? '.'
373
+ handle_vol(vol)
374
+ end
375
+ end
376
+
377
+ def handle_node(e)
378
+ return '' if e.comment?
379
+ return handle_text(e) if e.text?
380
+ return '' if PASS.include?(e.name)
381
+ r = case e.name
382
+ when 'anchor' then e_anchor(e)
383
+ when 'app' then e_app(e)
384
+ when 'back' then ''
385
+ when 'byline' then e_byline(e)
386
+ when 'cell' then e_cell(e)
387
+ when 'corr' then e_corr(e)
388
+ when 'div' then e_div(e)
389
+ when 'docNumber' then e_docNumber(e)
390
+ when 'figure' then e_figure(e)
391
+ when 'foreign' then ''
392
+ when 'g' then e_g(e)
393
+ when 'graphic' then e_graphic(e)
394
+ when 'head' then e_head(e)
395
+ when 'item' then e_item(e)
396
+ when 'juan' then e_juan(e)
397
+ when 'l' then e_l(e)
398
+ when 'lb' then e_lb(e)
399
+ when 'lem' then e_lem(e)
400
+ when 'lg' then e_lg(e)
401
+ when 'list' then e_list(e)
402
+ when 'mulu' then e_mulu(e)
403
+ when 'note' then e_note(e)
404
+ when 'milestone' then e_milestone(e)
405
+ when 'p' then e_p(e)
406
+ when 'rdg' then e_rdg(e)
407
+ when 'reg' then ''
408
+ when 'row' then e_row(e)
409
+ when 'sic' then e_sic(e)
410
+ when 'sg' then e_sg(e)
411
+ when 'tt' then e_tt(e)
412
+ when 't' then e_t(e)
413
+ when 'table' then e_table(e)
414
+ when 'teiHeader' then ''
415
+ when 'unclear' then '▆'
416
+ else traverse(e)
417
+ end
418
+ r
419
+ end
420
+
396
421
  def handle_sutra(xml_fn)
397
422
  puts "convert sutra #{xml_fn}"
398
423
  @dila_note = 0
@@ -418,10 +443,7 @@ class CBETA::P5aToText
418
443
  FileUtils.makedirs @out_sutra
419
444
 
420
445
  juans = text.split(/(<juan \d+>)/)
421
- open = false
422
- fo = nil
423
446
  juan_no = nil
424
- fn = ''
425
447
  buf = ''
426
448
  # 一卷一檔
427
449
  juans.each { |j|
@@ -438,32 +460,6 @@ class CBETA::P5aToText
438
460
  }
439
461
  end
440
462
 
441
- def handle_t(e)
442
- if e.has_attribute? 'place'
443
- return '' if e['place'].include? 'foot'
444
- end
445
- r = traverse(e)
446
-
447
- # 不是雙行對照
448
- return r if @tt_type == 'app'
449
-
450
- # 處理雙行對照
451
- i = e.xpath('../t').index(e)
452
- case i
453
- when 0
454
- return r + ' '
455
- when 1
456
- @next_line_buf += r + ' '
457
- return ''
458
- else
459
- return r
460
- end
461
- end
462
-
463
- def handle_table(e)
464
- traverse(e)
465
- end
466
-
467
463
  def handle_text(e)
468
464
  s = e.content().chomp
469
465
  return '' if s.empty?
@@ -476,7 +472,7 @@ class CBETA::P5aToText
476
472
  CGI.escapeHTML(r)
477
473
  end
478
474
 
479
- def handle_tt(e)
475
+ def e_tt(e)
480
476
  @tt_type = e['type']
481
477
  traverse(e)
482
478
  end
@@ -488,12 +484,12 @@ class CBETA::P5aToText
488
484
  abort "未處理底本" if @orig.nil?
489
485
 
490
486
  @vol = vol
491
- @series = vol[0]
492
- @out_vol = File.join(@output_root, @series, vol)
493
- FileUtils.remove_dir(@out_vol, force=true)
487
+ @canon = CBETA.get_canon_from_vol(vol)
488
+ @out_vol = File.join(@output_root, @canon, vol)
489
+ FileUtils.remove_dir(@out_vol, true)
494
490
  FileUtils.makedirs @out_vol
495
491
 
496
- source = File.join(@xml_root, @series, vol)
492
+ source = File.join(@xml_root, @canon, vol)
497
493
  Dir.entries(source).sort.each { |f|
498
494
  next if f.start_with? '.'
499
495
  fn = File.join(source, f)
@@ -503,8 +499,8 @@ class CBETA::P5aToText
503
499
 
504
500
  def handle_vols(v1, v2)
505
501
  puts "convert volumns: #{v1}..#{v2}"
506
- @series = v1[0]
507
- folder = File.join(@xml_root, @series)
502
+ @canon = get_canon_from_vol(v1)
503
+ folder = File.join(@xml_root, @canon)
508
504
  Dir.entries(folder).sort.each do |vol|
509
505
  next if vol < v1
510
506
  next if vol > v2
@@ -85,7 +85,7 @@ class CBETA::P5aValidator
85
85
  def check_well_form(xml)
86
86
  r = ''
87
87
  begin
88
- doc = Nokogiri::XML(xml) { |config| config.strict }
88
+ Nokogiri::XML(xml) { |config| config.strict }
89
89
  rescue Nokogiri::XML::SyntaxError => e
90
90
  r = "caught exception: #{e}"
91
91
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.6
4
+ version: 2.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-29 00:00:00.000000000 Z
11
+ date: 2016-08-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com