cbeta 2.2.6 → 2.2.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 388e5affee54e7c33d2afd0c1451330cc719898a
4
- data.tar.gz: 11293a7c5ad31752a8ad2ee189bcf515c10072f5
3
+ metadata.gz: a3f8edd88730de817c9ecec347b516f552cd148a
4
+ data.tar.gz: 3f9b2eb3e8010094ce3ce47b84e261319d8ccac5
5
5
  SHA512:
6
- metadata.gz: 0a24c14869c9d85cad5dc1ce61237f5e2cd97b7e382fceaf0df68a732604f3c06a13815019a9fcb75da68abd4c5a4b0ea1dbccc1c538d1f1b95d27cc62b5d871
7
- data.tar.gz: f6b0d1a6f73a9cebeddf5e4809c0d382edaa88a3ba0dab11c9b6c9355abc2d38157acab7cc4caf23f7c2c90bc87523852ddbd525269feb5a41a10fb3602fe436
6
+ metadata.gz: 7e05cadab483733f7a5c531966fca41fcd453e1da9ae44f64433bf04e72d3ae9b129ea7b4dc0f94c3e7f304d9d5755ec192a572cd463a6528d3088ed4bcab5f0
7
+ data.tar.gz: 15f31074085847f332c3633f7de85706167c059709ae48a889081b9ebbe26f9e32cf626a07c8492cb29c6aadd2089b11e2dae16a66406d8c44038961047a55eb
@@ -6,8 +6,41 @@
6
6
  require 'csv'
7
7
 
8
8
  class CBETA
9
+ CANON = 'DA|GA|GB|[A-Z]'
9
10
  DATA = File.join(File.dirname(__FILE__), 'data')
10
11
  PUNCS = '.[]。,、?「」『』《》<>〈〉〔〕[]【】〖〗'
12
+
13
+ # 由 行首資訊 取得 藏經 ID
14
+ # @param linehead[String] 行首資訊, 例如 "T01n0001_p0001a01" 或 "GA009n0008_p0003a01"
15
+ # @return [String] 藏經 ID,例如 "T" 或 "GA"
16
+ def self.get_canon_id_from_linehead(linehead)
17
+ linehead.sub(/^(#{CANON}).*$/, '\1')
18
+ end
19
+
20
+ # 由 冊號 取得 藏經 ID
21
+ # @param vol[String] 冊號, 例如 "T01" 或 "GA009"
22
+ # @return [String] 藏經 ID,例如 "T" 或 "GA"
23
+ def self.get_canon_from_vol(vol)
24
+ vol.sub(/^(#{CANON}).*$/, '\1')
25
+ end
26
+
27
+ # 由 行首資訊 取得 XML檔相對路徑
28
+ # @param linehead[String] 行首資訊, 例如 "GA009n0008_p0003a01"
29
+ # @return [String] XML檔相對路徑,例如 "GA/GA009/GA009n0008.xml"
30
+ def self.linehead_to_xml_file_path(linehead)
31
+ if m = linehead.match(/^(?<work>(?<vol>(?<canon>#{CANON})\d+)n\d+[a-zA-Z]?).*$/)
32
+ File.join(m[:canon], m[:vol], m[:work]+'.xml')
33
+ else
34
+ nil
35
+ end
36
+ end
37
+
38
+ # 由 XML檔主檔名 取得 典籍編號
39
+ # @param fn[String] 檔名, 例如 "T01n0001" 或 "GA009n0008"
40
+ # @return [String] 典籍編號,例如 "T0001" 或 "GA0008"
41
+ def self.get_work_id_from_file_basename(fn)
42
+ fn.sub(/^(#{CANON})\d{2,3}n(.*)$/, '\1\2')
43
+ end
11
44
 
12
45
  # 將行首資訊轉為引用格式
13
46
  #
@@ -18,7 +51,7 @@ class CBETA
18
51
  # CBETA.linehead_to_s('T85n2838_p1291a03')
19
52
  # # return "T85, no. 2838, p. 1291, a03"
20
53
  def self.linehead_to_s(linehead)
21
- linehead.match(/^([A-Z]\d+)n(.*)_p(\d+)([a-z]\d+)$/) {
54
+ linehead.match(/^((?:#{CANON})\d+)n(.*)_p(\d+)([a-z]\d+)$/) {
22
55
  return "#{$1}, no. #{$2}, p. #{$3}, #{$4}"
23
56
  }
24
57
  nil
@@ -68,12 +101,12 @@ class CBETA
68
101
  s = File.read(fn)
69
102
  @categories = JSON.parse(s)
70
103
  end
71
-
104
+
72
105
  # @param id [String] 藏經 ID, 例如大正藏的 ID 是 "T"
73
106
  # @return [String] 藏經短名,例如 "大正藏"
74
- def get_canon_nickname(id)
75
- return nil unless @canon_nickname.key? id
76
- @canon_nickname[id]
107
+ def get_canon_nickname(id)
108
+ return nil unless @canon_nickname.key? id
109
+ @canon_nickname[id]
77
110
  end
78
111
 
79
112
  # 取得藏經略符
@@ -75,7 +75,6 @@ class CBETA::Gaiji
75
75
 
76
76
  def char_to_hash(char)
77
77
  r = {}
78
- id = char['id']
79
78
  field_mapping = {
80
79
  'big5' => 'big5',
81
80
  'Character in the Siddham font' => 'char_in_siddham_font',
@@ -103,7 +103,7 @@ class CBETA::HTMLToText
103
103
 
104
104
  def prepare_folder()
105
105
  folder = File.join(@out_root, @corpus, @vol)
106
- FileUtils.remove_dir(folder, force=true)
106
+ FileUtils.remove_dir(folder, true)
107
107
  FileUtils.mkdir_p(folder)
108
108
  folder
109
109
  end
@@ -540,10 +540,7 @@ class CBETA::P5aToHTML
540
540
  text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
541
541
 
542
542
  juans = text.split(/(<juan \d+>)/)
543
- open = false
544
- fo = nil
545
543
  juan_no = nil
546
- fn = ''
547
544
  buf = ''
548
545
  # 一卷一檔
549
546
  juans.each { |j|
@@ -618,9 +615,9 @@ class CBETA::P5aToHTML
618
615
  abort "未處理底本" if @orig.nil?
619
616
 
620
617
  @vol = vol
621
- @series = vol[0]
618
+ @series = CBETA.get_canon_from_vol(vol)
622
619
  @out_folder = File.join(@out_root, @series, vol)
623
- FileUtils.remove_dir(@out_folder, force=true)
620
+ FileUtils.remove_dir(@out_folder, true)
624
621
  FileUtils::mkdir_p @out_folder
625
622
 
626
623
  source = File.join(@xml_root, @series, vol)
@@ -631,7 +628,7 @@ class CBETA::P5aToHTML
631
628
 
632
629
  def handle_vols(v1, v2)
633
630
  puts "convert volumns: #{v1}..#{v2}"
634
- @series = v1[0]
631
+ @series = CBETA.get_canon_from_vol(v1)
635
632
  folder = File.join(@xml_root, @series)
636
633
  Dir.foreach(folder) { |vol|
637
634
  next if vol < v1
@@ -677,16 +674,11 @@ class CBETA::P5aToHTML
677
674
  end
678
675
 
679
676
  def linehead_exist_in_cbeta(s)
680
- @xml_root
681
- corpus = s[0]
682
- if s.match(/^(([A-Z]\d+)n\d+[a-zA-Z]?).*$/)
683
- sutra = $1
684
- vol = $2
685
- path = File.join(@xml_root, corpus, vol, sutra+'.xml')
686
- return File.exist? path
687
- else
688
- return false
689
- end
677
+ fn = CBETA.linehead_to_xml_file_path(s)
678
+ return false if fn.nil?
679
+
680
+ path = File.join(@xml_root, fn)
681
+ File.exist? path
690
682
  end
691
683
 
692
684
  def open_xml(fn)
@@ -106,10 +106,7 @@ class CBETA::P5aToHTMLForEveryEdition
106
106
  text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
107
107
 
108
108
  juans = text.split(/(<juan \d+>)/)
109
- open = false
110
- fo = nil
111
109
  juan_no = nil
112
- fn = ''
113
110
  buf = ''
114
111
  # 一卷一檔
115
112
  juans.each { |j|
@@ -122,8 +119,7 @@ class CBETA::P5aToHTMLForEveryEdition
122
119
  buf = ''
123
120
  end
124
121
  }
125
- end
126
-
122
+ end
127
123
 
128
124
  def convert_vol(vol)
129
125
  puts "convert volumn: #{vol}"
@@ -414,12 +410,12 @@ class CBETA::P5aToHTMLForEveryEdition
414
410
  def handle_lem(e)
415
411
  r = ''
416
412
  content = traverse(e)
417
- w = e['wit']
418
- if w.include? 'CBETA' and not w.include? @orig
413
+ wit = e['wit']
414
+ if wit.include? 'CBETA' and not wit.include? @orig
419
415
  n = @notes_dila[@juan].size + 1
420
416
  r = "<a class='noteAnchor dila' href='#dila_note#{n}'></a>"
421
417
  r += "<span class='cbeta'>%s</span>" % content
422
- r = "<r w='#{w}' l='#{@lb}'>#{r}</r>"
418
+ r = "<r w='#{wit}' l='#{@lb}'>#{r}</r>"
423
419
 
424
420
  note = lem_note_cf(e)
425
421
  note += lem_note_rdg(e)
@@ -625,7 +621,6 @@ class CBETA::P5aToHTMLForEveryEdition
625
621
 
626
622
  def handle_rdg(e)
627
623
  r = traverse(e)
628
- w = e['wit'].scan(/【.*?】/)
629
624
  "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
630
625
  end
631
626
 
@@ -788,16 +783,11 @@ class CBETA::P5aToHTMLForEveryEdition
788
783
  end
789
784
 
790
785
  def linehead_exist_in_cbeta(s)
791
- @xml_root
792
- corpus = s[0]
793
- if s.match(/^(([A-Z]\d+)n\d+[a-zA-Z]?).*$/)
794
- sutra = $1
795
- vol = $2
796
- path = File.join(@xml_root, corpus, vol, sutra+'.xml')
797
- return File.exist? path
798
- else
799
- return false
800
- end
786
+ fn = CBETA.linehead_to_xml_file_path(s)
787
+ return false if fn.nil?
788
+
789
+ path = File.join(@xml_root, fn)
790
+ File.exist? path
801
791
  end
802
792
 
803
793
  def open_xml(fn)
@@ -872,10 +862,9 @@ class CBETA::P5aToHTMLForEveryEdition
872
862
  else
873
863
  work = @sutra_no.sub(/^([A-Z]{1,2})\d{2,3}n(.*)$/, '\1\2')
874
864
  end
875
- canon = work[0]
876
865
  juan = "%03d" % juan_no
877
866
  folder = File.join(@out_folder, work, juan)
878
- FileUtils.remove_dir(folder, force=true)
867
+ FileUtils.remove_dir(folder, true)
879
868
  FileUtils.makedirs folder
880
869
 
881
870
  @editions.each do |ed|
@@ -198,8 +198,6 @@ class CBETA::P5aToHTMLForPDF
198
198
  end
199
199
 
200
200
  def handle_anchor(e)
201
- id = e['id']
202
-
203
201
  if e.has_attribute?('type')
204
202
  if e['type'] == 'circle'
205
203
  return '◎'
@@ -496,7 +494,6 @@ class CBETA::P5aToHTMLForPDF
496
494
  end
497
495
 
498
496
  def handle_note(e)
499
- n = e['n']
500
497
  if e.has_attribute?('type')
501
498
  t = e['type']
502
499
  if %w(equivalent orig orig_biao orig_ke mod rest).include? t
@@ -638,13 +635,9 @@ class CBETA::P5aToHTMLForPDF
638
635
  abort "未處理底本" if @orig.nil?
639
636
 
640
637
  @vol = vol
641
- if vol.start_with? 'DA'
642
- @series = 'DA'
643
- else
644
- @series = vol[0]
645
- end
638
+ @series = CBETA.get_canon_from_vol(vol)
646
639
  @out_folder = File.join(@out_root, @series, vol)
647
- FileUtils.remove_dir(@out_folder, force=true)
640
+ FileUtils.remove_dir(@out_folder, true)
648
641
  FileUtils::mkdir_p @out_folder
649
642
 
650
643
  source = File.join(@xml_root, @series, vol)
@@ -657,7 +650,7 @@ class CBETA::P5aToHTMLForPDF
657
650
 
658
651
  def handle_vols(v1, v2)
659
652
  puts "convert volumns: #{v1}..#{v2}"
660
- @series = v1[0]
653
+ @series = CBETA.get_canon_from_vol(v1)
661
654
  folder = File.join(@xml_root, @series)
662
655
  Dir.foreach(folder) { |vol|
663
656
  next if vol < v1
@@ -119,7 +119,6 @@ class CBETA::P5aToSimpleHTML
119
119
  gid = e['ref'][1..-1]
120
120
  g = @gaijis[gid]
121
121
  abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
122
- zzs = g['zzs']
123
122
 
124
123
  if gid.start_with?('SD') # 悉曇字
125
124
  case gid
@@ -257,10 +256,7 @@ class CBETA::P5aToSimpleHTML
257
256
  FileUtils.makedirs @out_sutra
258
257
 
259
258
  juans = text.split(/(<juan \d+>)/)
260
- open = false
261
- fo = nil
262
259
  juan_no = nil
263
- fn = ''
264
260
  buf = ''
265
261
  # 一卷一檔
266
262
  juans.each { |j|
@@ -324,9 +320,9 @@ class CBETA::P5aToSimpleHTML
324
320
  @orig_short = @orig.sub(/^【(.*)】$/, '\1')
325
321
 
326
322
  @vol = vol
327
- @series = vol[0]
323
+ @series = CBETA.get_canon_from_vol(vol)
328
324
  @out_vol = File.join(@output_root, @series, vol)
329
- FileUtils.remove_dir(@out_vol, force=true)
325
+ FileUtils.remove_dir(@out_vol, true)
330
326
  FileUtils.makedirs @out_vol
331
327
 
332
328
  source = File.join(@xml_root, @series, vol)
@@ -337,7 +333,7 @@ class CBETA::P5aToSimpleHTML
337
333
 
338
334
  def handle_vols(v1, v2)
339
335
  puts "convert volumns: #{v1}..#{v2}"
340
- @series = v1[0]
336
+ @series = CBETA.get_canon_from_vol(v1)
341
337
  folder = File.join(@xml_root, @series)
342
338
  Dir.foreach(folder) { |vol|
343
339
  next if vol < v1
@@ -70,8 +70,8 @@ class CBETA::P5aToText
70
70
  return convert_all if target.nil?
71
71
 
72
72
  arg = target.upcase
73
- if arg.size == 1
74
- handle_collection(arg)
73
+ if arg.size <= 2
74
+ handle_canon(arg)
75
75
  else
76
76
  if arg.include? '..'
77
77
  arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
@@ -124,7 +124,7 @@ class CBETA::P5aToText
124
124
  def convert_all
125
125
  Dir.entries(@xml_root).sort.each do |c|
126
126
  next unless c.match(/^[A-Z]$/)
127
- handle_collection(c)
127
+ handle_canon(c)
128
128
  end
129
129
  end
130
130
 
@@ -138,7 +138,7 @@ class CBETA::P5aToText
138
138
  r
139
139
  end
140
140
 
141
- def handle_anchor(e)
141
+ def e_anchor(e)
142
142
  if e.has_attribute?('type')
143
143
  if e['type'] == 'circle'
144
144
  return '◎'
@@ -148,53 +148,43 @@ class CBETA::P5aToText
148
148
  ''
149
149
  end
150
150
 
151
- def handle_app(e)
151
+ def e_app(e)
152
152
  traverse(e)
153
153
  end
154
154
 
155
- def handle_byline(e)
155
+ def e_byline(e)
156
156
  r = traverse(e)
157
157
  r += @settings[:format]=='app' ? "\t" : "\n"
158
158
  r
159
159
  end
160
160
 
161
- def handle_cell(e)
161
+ def e_cell(e)
162
162
  r = traverse(e)
163
163
  r += @settings[:format]=='app' ? "\t" : "\n"
164
164
  r
165
165
  end
166
166
 
167
- def handle_collection(c)
168
- @series = c
169
- puts 'handle_collection ' + c
170
- folder = File.join(@xml_root, @series)
171
- Dir.entries(folder).sort.each do |vol|
172
- next if vol.start_with? '.'
173
- handle_vol(vol)
174
- end
175
- end
176
-
177
- def handle_corr(e)
167
+ def e_corr(e)
178
168
  "<r w='【CBETA】'>%s</r>" % traverse(e)
179
169
  end
180
170
 
181
- def handle_div(e)
171
+ def e_div(e)
182
172
  traverse(e)
183
173
  end
184
174
 
185
- def handle_docNumber(e)
175
+ def e_docNumber(e)
186
176
  r = traverse(e)
187
177
  r += @settings[:format] == 'app' ? "\t" : "\n"
188
178
  r
189
179
  end
190
180
 
191
- def handle_figure(e)
181
+ def e_figure(e)
192
182
  r = traverse(e)
193
183
  r += @settings[:format] == 'app' ? "\t" : "\n"
194
184
  r
195
185
  end
196
186
 
197
- def handle_g(e)
187
+ def e_g(e)
198
188
  # if 悉曇字、蘭札體
199
189
  # 使用 Unicode PUA
200
190
  # else if 有 <mapping type="unicode">
@@ -215,7 +205,6 @@ class CBETA::P5aToText
215
205
 
216
206
  g = @gaijis[gid]
217
207
  abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
218
- zzs = g['zzs']
219
208
 
220
209
  if gid.start_with?('SD') # 悉曇字
221
210
  case gid
@@ -240,28 +229,28 @@ class CBETA::P5aToText
240
229
  [0xf0000 + gid[2..-1].to_i].pack 'U'
241
230
  end
242
231
 
243
- def handle_graphic(e)
232
+ def e_graphic(e)
244
233
  ''
245
234
  end
246
235
 
247
- def handle_head(e)
236
+ def e_head(e)
248
237
  r = traverse(e)
249
238
  r += @settings[:format] == 'app' ? "\t" : "\n"
250
239
  r
251
240
  end
252
241
 
253
- def handle_item(e)
242
+ def e_item(e)
254
243
  r = traverse(e)
255
244
  r += @settings[:format] == 'app' ? "\t" : "\n"
256
245
  end
257
246
 
258
- def handle_juan(e)
247
+ def e_juan(e)
259
248
  r = traverse(e)
260
249
  r += @settings[:format] == 'app' ? "\t" : "\n"
261
250
  r
262
251
  end
263
252
 
264
- def handle_l(e)
253
+ def e_l(e)
265
254
  r = traverse(e)
266
255
  if @settings[:format] == 'app'
267
256
  r += "\t"
@@ -271,7 +260,7 @@ class CBETA::P5aToText
271
260
  r
272
261
  end
273
262
 
274
- def handle_lb(e)
263
+ def e_lb(e)
275
264
  r = ''
276
265
  if @settings[:format] == 'app'
277
266
  r += "\n#{e['n']}║"
@@ -283,7 +272,7 @@ class CBETA::P5aToText
283
272
  r
284
273
  end
285
274
 
286
- def handle_lem(e)
275
+ def e_lem(e)
287
276
  # 沒有 rdg 的版本,用字同 lem
288
277
  editions = Set.new @editions
289
278
  e.xpath('./following-sibling::rdg').each do |rdg|
@@ -296,17 +285,17 @@ class CBETA::P5aToText
296
285
  "<r w='#{w}'>%s</r>" % traverse(e)
297
286
  end
298
287
 
299
- def handle_lg(e)
288
+ def e_lg(e)
300
289
  traverse(e)
301
290
  end
302
291
 
303
- def handle_list(e)
292
+ def e_list(e)
304
293
  r = ''
305
294
  r += "\n" unless @settings[:format] == 'app'
306
295
  r + traverse(e)
307
296
  end
308
297
 
309
- def handle_milestone(e)
298
+ def e_milestone(e)
310
299
  r = ''
311
300
  if e['unit'] == 'juan'
312
301
  @juan = e['n'].to_i
@@ -315,55 +304,11 @@ class CBETA::P5aToText
315
304
  r
316
305
  end
317
306
 
318
- def handle_mulu(e)
307
+ def e_mulu(e)
319
308
  ''
320
309
  end
321
310
 
322
- def handle_node(e)
323
- return '' if e.comment?
324
- return handle_text(e) if e.text?
325
- return '' if PASS.include?(e.name)
326
- r = case e.name
327
- when 'anchor' then handle_anchor(e)
328
- when 'app' then handle_app(e)
329
- when 'back' then ''
330
- when 'byline' then handle_byline(e)
331
- when 'cell' then handle_cell(e)
332
- when 'corr' then handle_corr(e)
333
- when 'div' then handle_div(e)
334
- when 'docNumber' then handle_docNumber(e)
335
- when 'figure' then handle_figure(e)
336
- when 'foreign' then ''
337
- when 'g' then handle_g(e)
338
- when 'graphic' then handle_graphic(e)
339
- when 'head' then handle_head(e)
340
- when 'item' then handle_item(e)
341
- when 'juan' then handle_juan(e)
342
- when 'l' then handle_l(e)
343
- when 'lb' then handle_lb(e)
344
- when 'lem' then handle_lem(e)
345
- when 'lg' then handle_lg(e)
346
- when 'list' then handle_list(e)
347
- when 'mulu' then handle_mulu(e)
348
- when 'note' then handle_note(e)
349
- when 'milestone' then handle_milestone(e)
350
- when 'p' then handle_p(e)
351
- when 'rdg' then handle_rdg(e)
352
- when 'reg' then ''
353
- when 'row' then handle_row(e)
354
- when 'sic' then handle_sic(e)
355
- when 'sg' then handle_sg(e)
356
- when 'tt' then handle_tt(e)
357
- when 't' then handle_t(e)
358
- when 'table' then handle_table(e)
359
- when 'teiHeader' then ''
360
- when 'unclear' then '▆'
361
- else traverse(e)
362
- end
363
- r
364
- end
365
-
366
- def handle_note(e)
311
+ def e_note(e)
367
312
  if e.has_attribute?('place') && e['place']=='inline'
368
313
  r = traverse(e)
369
314
  return "(#{r})"
@@ -371,28 +316,108 @@ class CBETA::P5aToText
371
316
  ''
372
317
  end
373
318
 
374
- def handle_p(e)
319
+ def e_p(e)
375
320
  r = traverse(e)
376
321
  r += @settings[:format] == 'app' ? "\t" : "\n"
377
322
  r
378
323
  end
379
324
 
380
- def handle_rdg(e)
325
+ def e_rdg(e)
381
326
  "<r w='#{e['wit']}'>%s</r>" % traverse(e)
382
327
  end
383
328
 
384
- def handle_row(e)
329
+ def e_row(e)
385
330
  traverse(e)
386
331
  end
387
332
 
388
- def handle_sg(e)
333
+ def e_sg(e)
389
334
  '(' + traverse(e) + ')'
390
335
  end
391
336
 
392
- def handle_sic(e)
337
+ def e_sic(e)
393
338
  "<r w='#{@orig}'>" + traverse(e) + "</r>"
394
339
  end
395
340
 
341
+ def e_t(e)
342
+ if e.has_attribute? 'place'
343
+ return '' if e['place'].include? 'foot'
344
+ end
345
+ r = traverse(e)
346
+
347
+ # 不是雙行對照
348
+ return r if @tt_type == 'app'
349
+
350
+ # 處理雙行對照
351
+ i = e.xpath('../t').index(e)
352
+ case i
353
+ when 0
354
+ return r + ' '
355
+ when 1
356
+ @next_line_buf += r + ' '
357
+ return ''
358
+ else
359
+ return r
360
+ end
361
+ end
362
+
363
+ def e_table(e)
364
+ traverse(e)
365
+ end
366
+
367
+ def handle_canon(c)
368
+ @canon = c
369
+ puts 'handle_canon ' + c
370
+ folder = File.join(@xml_root, @canon)
371
+ Dir.entries(folder).sort.each do |vol|
372
+ next if vol.start_with? '.'
373
+ handle_vol(vol)
374
+ end
375
+ end
376
+
377
+ def handle_node(e)
378
+ return '' if e.comment?
379
+ return handle_text(e) if e.text?
380
+ return '' if PASS.include?(e.name)
381
+ r = case e.name
382
+ when 'anchor' then e_anchor(e)
383
+ when 'app' then e_app(e)
384
+ when 'back' then ''
385
+ when 'byline' then e_byline(e)
386
+ when 'cell' then e_cell(e)
387
+ when 'corr' then e_corr(e)
388
+ when 'div' then e_div(e)
389
+ when 'docNumber' then e_docNumber(e)
390
+ when 'figure' then e_figure(e)
391
+ when 'foreign' then ''
392
+ when 'g' then e_g(e)
393
+ when 'graphic' then e_graphic(e)
394
+ when 'head' then e_head(e)
395
+ when 'item' then e_item(e)
396
+ when 'juan' then e_juan(e)
397
+ when 'l' then e_l(e)
398
+ when 'lb' then e_lb(e)
399
+ when 'lem' then e_lem(e)
400
+ when 'lg' then e_lg(e)
401
+ when 'list' then e_list(e)
402
+ when 'mulu' then e_mulu(e)
403
+ when 'note' then e_note(e)
404
+ when 'milestone' then e_milestone(e)
405
+ when 'p' then e_p(e)
406
+ when 'rdg' then e_rdg(e)
407
+ when 'reg' then ''
408
+ when 'row' then e_row(e)
409
+ when 'sic' then e_sic(e)
410
+ when 'sg' then e_sg(e)
411
+ when 'tt' then e_tt(e)
412
+ when 't' then e_t(e)
413
+ when 'table' then e_table(e)
414
+ when 'teiHeader' then ''
415
+ when 'unclear' then '▆'
416
+ else traverse(e)
417
+ end
418
+ r
419
+ end
420
+
396
421
  def handle_sutra(xml_fn)
397
422
  puts "convert sutra #{xml_fn}"
398
423
  @dila_note = 0
@@ -418,10 +443,7 @@ class CBETA::P5aToText
418
443
  FileUtils.makedirs @out_sutra
419
444
 
420
445
  juans = text.split(/(<juan \d+>)/)
421
- open = false
422
- fo = nil
423
446
  juan_no = nil
424
- fn = ''
425
447
  buf = ''
426
448
  # 一卷一檔
427
449
  juans.each { |j|
@@ -438,32 +460,6 @@ class CBETA::P5aToText
438
460
  }
439
461
  end
440
462
 
441
- def handle_t(e)
442
- if e.has_attribute? 'place'
443
- return '' if e['place'].include? 'foot'
444
- end
445
- r = traverse(e)
446
-
447
- # 不是雙行對照
448
- return r if @tt_type == 'app'
449
-
450
- # 處理雙行對照
451
- i = e.xpath('../t').index(e)
452
- case i
453
- when 0
454
- return r + ' '
455
- when 1
456
- @next_line_buf += r + ' '
457
- return ''
458
- else
459
- return r
460
- end
461
- end
462
-
463
- def handle_table(e)
464
- traverse(e)
465
- end
466
-
467
463
  def handle_text(e)
468
464
  s = e.content().chomp
469
465
  return '' if s.empty?
@@ -476,7 +472,7 @@ class CBETA::P5aToText
476
472
  CGI.escapeHTML(r)
477
473
  end
478
474
 
479
- def handle_tt(e)
475
+ def e_tt(e)
480
476
  @tt_type = e['type']
481
477
  traverse(e)
482
478
  end
@@ -488,12 +484,12 @@ class CBETA::P5aToText
488
484
  abort "未處理底本" if @orig.nil?
489
485
 
490
486
  @vol = vol
491
- @series = vol[0]
492
- @out_vol = File.join(@output_root, @series, vol)
493
- FileUtils.remove_dir(@out_vol, force=true)
487
+ @canon = CBETA.get_canon_from_vol(vol)
488
+ @out_vol = File.join(@output_root, @canon, vol)
489
+ FileUtils.remove_dir(@out_vol, true)
494
490
  FileUtils.makedirs @out_vol
495
491
 
496
- source = File.join(@xml_root, @series, vol)
492
+ source = File.join(@xml_root, @canon, vol)
497
493
  Dir.entries(source).sort.each { |f|
498
494
  next if f.start_with? '.'
499
495
  fn = File.join(source, f)
@@ -503,8 +499,8 @@ class CBETA::P5aToText
503
499
 
504
500
  def handle_vols(v1, v2)
505
501
  puts "convert volumns: #{v1}..#{v2}"
506
- @series = v1[0]
507
- folder = File.join(@xml_root, @series)
502
+ @canon = get_canon_from_vol(v1)
503
+ folder = File.join(@xml_root, @canon)
508
504
  Dir.entries(folder).sort.each do |vol|
509
505
  next if vol < v1
510
506
  next if vol > v2
@@ -85,7 +85,7 @@ class CBETA::P5aValidator
85
85
  def check_well_form(xml)
86
86
  r = ''
87
87
  begin
88
- doc = Nokogiri::XML(xml) { |config| config.strict }
88
+ Nokogiri::XML(xml) { |config| config.strict }
89
89
  rescue Nokogiri::XML::SyntaxError => e
90
90
  r = "caught exception: #{e}"
91
91
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.2.6
4
+ version: 2.2.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2016-07-29 00:00:00.000000000 Z
11
+ date: 2016-08-31 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com