cbeta 1.0.0 → 1.1.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: e7b3afd9fe14f3c71a3f519aa0a15dc5b2e198ab
4
- data.tar.gz: 898fddd049d4f076edb446c6f2751a10e41e248e
3
+ metadata.gz: 74ddab76ff4ed1e2d344968065e14c1a8927c3ad
4
+ data.tar.gz: 63d97041bacf9b3e45db780f1338a3ccd98b0540
5
5
  SHA512:
6
- metadata.gz: bd71006daf93b24dc46bd1afe004d1726cc6ef0b6de6159357ec01a3c255e732642f551037ffc8dc05be7e2997599bbb2ca015dcbc05bc497c029c903a0fca3c
7
- data.tar.gz: f860ffec038a6924de638861a46e94c8bcb325a8114fb00aad19f898036c2722657229e4b25f83ac8b19143f65b100212041f4e51a95c3a208f632e163f7c276
6
+ metadata.gz: de3374f578bc6025e02f88b232c00ad7414eb9d0d23209b7661236a7d9f203b326e8e6d79dee96c853abac2765a7eafd4dad5d14f840edac480f01c26112e8d7
7
+ data.tar.gz: 1aacbf6fa0137843b85af1e9cfb21b1d0af44230edf66720c92b696350683feb7c3c8f8c855c0e1c1fd12c9f254cfd8d03926d53a2b749060603ddef7efd1649
data/lib/cbeta.rb CHANGED
@@ -43,6 +43,10 @@ class CBETA
43
43
  next if row['abbreviation'].empty?
44
44
  @canon_abbr[row['id']] = row['abbreviation']
45
45
  end
46
+
47
+ fn = File.join(File.dirname(__FILE__), 'data/categories.json')
48
+ s = File.read(fn)
49
+ @categories = JSON.parse(s)
46
50
  end
47
51
 
48
52
  # 取得藏經略符
@@ -71,12 +75,24 @@ class CBETA
71
75
  return nil if r.nil?
72
76
  r.sub(/^【(.*?)】$/, '\1')
73
77
  end
78
+
79
+ # 傳入經號,取得部類
80
+ # @param book_id [String] Book ID (經號), ex. "T0220"
81
+ # @return [String] 部類名稱,例如 "阿含部類"
82
+ #
83
+ # @example
84
+ # cbeta = CBETA.new
85
+ # cbeta.get_category('T0220') # return '般若部類'
86
+ def get_category(book_id)
87
+ @categories[book_id]
88
+ end
74
89
  end
75
90
 
76
91
  require 'cbeta/gaiji'
77
92
  require 'cbeta/bm_to_text'
78
93
  require 'cbeta/p5a_to_epub'
79
94
  require 'cbeta/p5a_to_html'
95
+ require 'cbeta/p5a_to_html_for_every_edition'
80
96
  require 'cbeta/p5a_to_simple_html'
81
97
  require 'cbeta/p5a_to_text'
82
98
  require 'cbeta/p5a_validator'
@@ -27,6 +27,10 @@ class CBETA::P5aToEPUB
27
27
  # @param temp_folder [String] 供 EPUB 暫存工作檔案的路徑
28
28
  # @option opts [Integer] :epub_version (3) EPUB 版本
29
29
  # @option opts [String] :graphic_base 圖檔路徑
30
+ # * graphic_base/covers: 封面圖檔位置
31
+ # * graphic_base/figures: 插圖圖檔位置
32
+ # * graphic_base/sd-gif: 悉曇字圖檔位置
33
+ # * graphic_base/rj-gif: 蘭札體圖檔位置
30
34
  # @option opts [String] :front_page 內文前可以加一份 HTML 檔,例如「編輯說明」
31
35
  # @option opts [String] :front_page_title 加在目錄的 front_page 標題
32
36
  # @option opts [String] :back_page 內文後可以加一份 HTML 檔,例如「版權聲明」
@@ -75,8 +79,8 @@ class CBETA::P5aToEPUB
75
79
  create_epub(output_path)
76
80
  end
77
81
 
78
- # 將某個資料夾下的每個 xml 檔都轉為一個對應的 EPUB。
79
- # 資料夾可以是巢狀,全部都會遞迴處理。
82
+ # 將某個資料夾下的每部作品都轉為一個對應的 EPUB。
83
+ # 跨冊的作品也會合成一個 EPUB。
80
84
  #
81
85
  # @example
82
86
  # require 'cbeta'
@@ -87,19 +91,10 @@ class CBETA::P5aToEPUB
87
91
  # c = CBETA::P5aToEPUB.new(TEMP, IMG)
88
92
  # c.convert_folder('/Users/ray/Documents/Projects/D道安/xml-p5a/DA', '/temp/cbeta-epub/DA')
89
93
  def convert_folder(input_folder, output_folder)
90
- FileUtils.remove_dir(output_folder, force=true)
91
- FileUtils::mkdir_p output_folder
92
- Dir.foreach(input_folder) do |f|
93
- next if f.start_with? '.'
94
- p1 = File.join(input_folder, f)
95
- if File.file?(p1)
96
- f.sub!(/.xml$/, '.epub')
97
- p2 = File.join(output_folder, f)
98
- convert_file(p1, p2)
99
- else
100
- p2 = File.join(output_folder, f)
101
- convert_folder(p1, p2)
102
- end
94
+ @todo = {}
95
+ prepare_todo_list(input_folder, output_folder)
96
+ @todo.each_pair do |k, v|
97
+ convert_sutra(k, v[:xml_files], v[:epub])
103
98
  end
104
99
  end
105
100
 
@@ -129,13 +124,17 @@ class CBETA::P5aToEPUB
129
124
  # ]
130
125
  #
131
126
  # c = CBETA::P5aToEPUB.new(TEMP)
132
- # c.convert_sutra('T0220', '大般若經', xml_files, '/temp/cbeta-epub/T0220.epub')
133
- def convert_sutra(book_id, title, xml_files, out)
127
+ # c.convert_sutra('T0220', xml_files, '/temp/cbeta-epub/T0220.epub')
128
+ def convert_sutra(book_id, xml_files, out)
134
129
  @book_id = book_id
135
130
  sutra_init
136
131
  xml_files.each { |f| handle_file(f) }
137
132
 
138
- @title = title
133
+ if xml_files.size > 1
134
+ @title.sub!(/^(.*)\(.*?\)$/, '\1')
135
+ @title.sub!(/^(.*?)((.*?))+$/, '\1')
136
+ puts @title
137
+ end
139
138
  create_epub(out)
140
139
  end
141
140
 
@@ -199,6 +198,15 @@ class CBETA::P5aToEPUB
199
198
  }
200
199
  }
201
200
  builder.book.version = @settings[:epub_version]
201
+
202
+ canon = book_id.sub(/^([A-Z]{1,2}).*$/, '\1')
203
+ cover = File.join(settings[:graphic_base], 'covers', canon, "#{book_id}.jpg")
204
+ if File.exist? cover
205
+ File.open(cover) do |io|
206
+ builder.book.add_item(cover, io).cover_image
207
+ end
208
+ end
209
+
202
210
  builder.generate_epub(output_path)
203
211
  puts "output: #{output_path}"
204
212
  end
@@ -791,6 +799,31 @@ eos
791
799
  text
792
800
  end
793
801
 
802
+ def prepare_todo_list(input_folder, output_folder)
803
+ Dir.foreach(input_folder) do |f|
804
+ next if f.start_with? '.'
805
+ p1 = File.join(input_folder, f)
806
+ if File.file?(p1)
807
+ work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
808
+ work = 'T0220' if work.start_with? 'T0220'
809
+ unless @todo.key? work
810
+ @todo[work] = { xml_files: [] }
811
+ end
812
+ hash = @todo[work]
813
+ hash[:xml_files] << p1
814
+
815
+ folders = output_folder.split('/')
816
+ folders.pop if folders[-1].match(/^[A-Z]{1,2}\d{2,3}$/)
817
+ folder = folders.join('/')
818
+ FileUtils::mkdir_p folder
819
+ hash[:epub] = File.join(folder, "#{work}.epub")
820
+ else
821
+ p2 = File.join(output_folder, f)
822
+ prepare_todo_list(p1, p2)
823
+ end
824
+ end
825
+ end
826
+
794
827
  def remove_empty_nav(node_list)
795
828
  node_list.each do |n|
796
829
  if n[:nav].empty?
@@ -0,0 +1,756 @@
1
+ require 'cgi'
2
+ require 'date'
3
+ require 'fileutils'
4
+ require 'json'
5
+ require 'nokogiri'
6
+ require 'set'
7
+
8
+ # Convert CBETA XML P5a to HTML for every edition
9
+ #
10
+ # CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
11
+ #
12
+ # 轉檔規則請參考: http://wiki.ddbc.edu.tw/pages/CBETA_XML_P5a_轉_HTML
13
+ class CBETA::P5aToHTMLForEveryEdition
14
+ # 內容不輸出的元素
15
+ PASS=['back', 'teiHeader']
16
+
17
+ # 某版用字缺的符號
18
+ MISSING = '-'
19
+
20
+ private_constant :PASS, :MISSING
21
+
22
+ # @param xml_root [String] 來源 CBETA XML P5a 路徑
23
+ # @param out_root [String] 輸出 HTML 路徑
24
+ def initialize(xml_root, out_root)
25
+ @xml_root = xml_root
26
+ @out_root = out_root
27
+ @cbeta = CBETA.new
28
+ @gaijis = CBETA::Gaiji.new
29
+ end
30
+
31
+ # 將 CBETA XML P5a 轉為 HTML
32
+ #
33
+ # @example for convert 大正藏第一冊:
34
+ #
35
+ # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
36
+ # x2h.convert('T01')
37
+ #
38
+ # @example for convert 大正藏全部:
39
+ #
40
+ # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
41
+ # x2h.convert('T')
42
+ #
43
+ # @example for convert 大正藏第五冊至第七冊:
44
+ #
45
+ # x2h = CBETA::P5aToHTML.new('/PATH/TO/CBETA/XML/P5a', '/OUTPUT/FOLDER')
46
+ # x2h.convert('T05..T07')
47
+ #
48
+ # T 是大正藏的 ID, CBETA 的藏經 ID 系統請參考: http://www.cbeta.org/format/id.php
49
+ def convert(target=nil)
50
+ return convert_all if target.nil?
51
+
52
+ arg = target.upcase
53
+ if arg.size == 1
54
+ handle_collection(arg)
55
+ else
56
+ if arg.include? '..'
57
+ arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
58
+ handle_vols($1, $2)
59
+ }
60
+ else
61
+ handle_vol(arg)
62
+ end
63
+ end
64
+ end
65
+
66
+ private
67
+
68
+ def convert_all
69
+ Dir.foreach(@xml_root) { |c|
70
+ next unless c.match(/^[A-Z]$/)
71
+ handle_collection(c)
72
+ }
73
+ end
74
+
75
+ def handle_anchor(e)
76
+ id = e['id']
77
+ if e.has_attribute?('id')
78
+ if id.start_with?('nkr_note_orig')
79
+ note = @notes[id]
80
+ note_text = traverse(note)
81
+ n = id[/^nkr_note_orig_(.*)$/, 1]
82
+ @back[@juan] += "<span class='footnote' id='n#{n}'>#{note_text}</span>\n"
83
+ return "<a class='noteAnchor' href='#n#{n}'></a>"
84
+ elsif id.start_with? 'fx'
85
+ return "<span class='star'>[*]</span>"
86
+ end
87
+ end
88
+
89
+ if e.has_attribute?('type')
90
+ if e['type'] == 'circle'
91
+ return '◎'
92
+ end
93
+ end
94
+
95
+ ''
96
+ end
97
+
98
+ def handle_app(e)
99
+ r = ''
100
+ if e['type'] == 'star'
101
+ c = e['corresp'][1..-1]
102
+ r = "<a class='noteAnchor star' href='#n#{c}'></a>"
103
+ end
104
+ r + traverse(e)
105
+ end
106
+
107
+ def handle_byline(e)
108
+ r = '<p class="byline">'
109
+ r += "<span class='lineInfo'>#{@lb}</span>"
110
+ r += traverse(e)
111
+ r + '</p>'
112
+ end
113
+
114
+ def handle_cell(e)
115
+ doc = Nokogiri::XML::Document.new
116
+ cell = doc.create_element('div')
117
+ cell['class'] = 'bip-table-cell'
118
+ cell['rowspan'] = e['rows'] if e.key? 'rows'
119
+ cell['colspan'] = e['cols'] if e.key? 'cols'
120
+ cell.inner_html = traverse(e)
121
+ to_html(cell)
122
+ end
123
+
124
+ def handle_collection(c)
125
+ @series = c
126
+ puts 'handle_collection ' + c
127
+ folder = File.join(@xml_root, @series)
128
+ Dir.foreach(folder) { |vol|
129
+ next if ['.', '..', '.DS_Store'].include? vol
130
+ handle_vol(vol)
131
+ }
132
+ end
133
+
134
+ def handle_corr(e)
135
+ "<r w='【CBETA】' l='#{@lb}' w='#{@char_count}'>%s</r>" % traverse(e)
136
+ end
137
+
138
+ def handle_div(e)
139
+ @div_count += 1
140
+ n = @div_count
141
+ if e.has_attribute? 'type'
142
+ @open_divs << e
143
+ r = traverse(e)
144
+ @open_divs.pop
145
+ return "<!-- begin div#{n}--><div class='div-#{e['type']}'>#{r}</div><!-- end of div#{n} -->"
146
+ else
147
+ return traverse(e)
148
+ end
149
+ end
150
+
151
+ def handle_figure(e)
152
+ "<p class='figure'>%s</p>" % traverse(e)
153
+ end
154
+
155
+ def handle_g(e, mode)
156
+ # if 有 <mapping type="unicode">
157
+ # if 不在 Unicode Extension C, D, E 範圍裡
158
+ # 直接採用
159
+ # else
160
+ # 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
161
+ # else if 有 <mapping type="normal_unicode">
162
+ # 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
163
+ # else if 有 normalized form
164
+ # 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
165
+ # else
166
+ # 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
167
+ gid = e['ref'][1..-1]
168
+ g = @gaijis[gid]
169
+ abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
170
+ zzs = g['zzs']
171
+
172
+ if mode == 'txt'
173
+ return g['roman'] if gid.start_with?('SD')
174
+ if zzs.nil?
175
+ abort "缺組字式:#{g}"
176
+ else
177
+ return zzs
178
+ end
179
+ end
180
+
181
+ @char_count += 1
182
+
183
+ if gid.start_with?('SD')
184
+ case gid
185
+ when 'SD-E35A'
186
+ return '('
187
+ when 'SD-E35B'
188
+ return ')'
189
+ else
190
+ return "<span class='siddam' roman='#{g['roman']}' code='#{gid}' char='#{g['sd-char']}'/>"
191
+ end
192
+ end
193
+
194
+ if gid.start_with?('RJ')
195
+ return "<span class='ranja' roman='#{g['roman']}' code='#{gid}' char='#{g['rj-char']}'/>"
196
+ end
197
+
198
+ default = ''
199
+ if g.has_key?('unicode')
200
+ #if @unicode1.include?(g['unicode'])
201
+ # 如果在 unicode ext-C, ext-D, ext-E 範圍內
202
+ if (0x2A700..0x2CEAF).include? g['unicode'].hex
203
+ default = g['unicode-char']
204
+ else
205
+ return g['unicode-char'] # 直接採用 unicode
206
+ end
207
+ end
208
+
209
+ nor = ''
210
+ if g.has_key?('normal_unicode')
211
+ nor = g['normal_unicode']
212
+ default = nor if default.empty?
213
+ end
214
+
215
+ if g.has_key?('normal')
216
+ nor += ', ' unless nor==''
217
+ nor += g['normal']
218
+ default = g['normal'] if default.empty?
219
+ end
220
+
221
+ default = zzs if default.empty?
222
+
223
+ href = 'http://dict.cbeta.org/dict_word/gaiji-cb/%s/%s.gif' % [gid[2, 2], gid]
224
+ unless @back[@juan].include?(href)
225
+ @back[@juan] += "<span id='#{gid}' class='gaijiInfo' figure_url='#{href}' zzs='#{zzs}' nor='#{nor}'>#{default}</span>\n"
226
+ end
227
+ "<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
228
+ end
229
+
230
+ def handle_graphic(e)
231
+ url = File.basename(e['url'])
232
+ "<span imgsrc='#{url}' class='graphic'></span>"
233
+ end
234
+
235
+ def handle_head(e)
236
+ r = ''
237
+ unless e['type'] == 'added'
238
+ i = @open_divs.size
239
+ r = "<p class='head' data-head-level='#{i}'>%s</p>" % traverse(e)
240
+ end
241
+ r
242
+ end
243
+
244
+ def handle_item(e)
245
+ "<li>%s</li>\n" % traverse(e)
246
+ end
247
+
248
+ def handle_juan(e)
249
+ "<p class='juan'>%s</p>" % traverse(e)
250
+ end
251
+
252
+ def handle_l(e)
253
+ if @lg_type == 'abnormal'
254
+ return traverse(e)
255
+ end
256
+
257
+ @in_l = true
258
+
259
+ doc = Nokogiri::XML::Document.new
260
+ cell = doc.create_element('div')
261
+ cell['class'] = 'lg-cell'
262
+ cell.inner_html = traverse(e)
263
+
264
+ if @first_l
265
+ parent = e.parent()
266
+ if parent.has_attribute?('rend')
267
+ indent = parent['rend'].scan(/text-indent:[^:]*/)
268
+ unless indent.empty?
269
+ cell['style'] = indent[0]
270
+ end
271
+ end
272
+ @first_l = false
273
+ end
274
+ r = to_html(cell)
275
+
276
+ unless @lg_row_open
277
+ r = "\n<div class='lg-row'>" + r
278
+ @lg_row_open = true
279
+ end
280
+ @in_l = false
281
+ r
282
+ end
283
+
284
+ def handle_lb(e)
285
+ # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
286
+ return '' if e['ed'] != @series
287
+
288
+ @char_count = 1
289
+ @lb = e['n']
290
+ line_head = @sutra_no + '_p' + e['n']
291
+ r = ''
292
+ #if e.parent.name == 'lg' and $lg_row_open
293
+ if @lg_row_open && !@in_l
294
+ # 每行偈頌放在一個 lg-row 裡面
295
+ # T46n1937, p. 914a01, l 包雙行夾註跨行
296
+ # T20n1092, 337c16, lb 在 l 中間,不結束 lg-row
297
+ r += "</div><!-- end of lg-row -->"
298
+ @lg_row_open = false
299
+ end
300
+ r += "<span class='lb' id='#{line_head}'>#{line_head}</span>"
301
+ unless @next_line_buf.empty?
302
+ r += @next_line_buf
303
+ @next_line_buf = ''
304
+ end
305
+ r
306
+ end
307
+
308
+ def handle_lem(e)
309
+ w = e['wit'].scan(/【.*?】/)
310
+ @editions.merge w
311
+ w = w.join(' ')
312
+
313
+ r = traverse(e)
314
+ "<r w='#{w}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
315
+ end
316
+
317
+ def handle_lg(e)
318
+ r = ''
319
+ @lg_type = e['type']
320
+ if @lg_type == 'abnormal'
321
+ r = "<p class='lg-abnormal'>" + traverse(e) + "</p>"
322
+ else
323
+ @first_l = true
324
+ doc = Nokogiri::XML::Document.new
325
+ node = doc.create_element('div')
326
+ node['class'] = 'lg'
327
+ if e.has_attribute?('rend')
328
+ rend = e['rend'].gsub(/text-indent:[^:]*/, '')
329
+ node['style'] = rend
330
+ end
331
+ @lg_row_open = false
332
+ node.inner_html = traverse(e)
333
+ if @lg_row_open
334
+ node.inner_html += '</div><!-- end of lg -->'
335
+ @lg_row_open = false
336
+ end
337
+ r = "\n" + to_html(node)
338
+ end
339
+ r
340
+ end
341
+
342
+ def handle_list(e)
343
+ "<ul>%s</ul>" % traverse(e)
344
+ end
345
+
346
+ def handle_milestone(e)
347
+ r = ''
348
+ if e['unit'] == 'juan'
349
+
350
+ r += "</div>" * @open_divs.size # 如果有 div 跨卷,要先結束, ex: T55n2154, p. 680a29, 跨 19, 20 兩卷
351
+ @juan = e['n'].to_i
352
+ @back[@juan] = @back[0]
353
+ r += "<juan #{@juan}>"
354
+ @open_divs.each { |d|
355
+ r += "<div class='div-#{d['type']}'>"
356
+ }
357
+ end
358
+ r
359
+ end
360
+
361
+ def handle_mulu(e)
362
+ r = ''
363
+ if e['type'] == '品'
364
+ @pass << false
365
+ r = "<mulu class='pin' s='%s'/>" % traverse(e, 'txt')
366
+ @pass.pop
367
+ end
368
+ r
369
+ end
370
+
371
+ def handle_node(e, mode)
372
+ return '' if e.comment?
373
+ return handle_text(e, mode) if e.text?
374
+ return '' if PASS.include?(e.name)
375
+ r = case e.name
376
+ when 'anchor' then handle_anchor(e)
377
+ when 'app' then handle_app(e)
378
+ when 'byline' then handle_byline(e)
379
+ when 'cell' then handle_cell(e)
380
+ when 'corr' then handle_corr(e)
381
+ when 'div' then handle_div(e)
382
+ when 'figure' then handle_figure(e)
383
+ when 'foreign' then ''
384
+ when 'g' then handle_g(e, mode)
385
+ when 'graphic' then handle_graphic(e)
386
+ when 'head' then handle_head(e)
387
+ when 'item' then handle_item(e)
388
+ when 'juan' then handle_juan(e)
389
+ when 'l' then handle_l(e)
390
+ when 'lb' then handle_lb(e)
391
+ when 'lem' then handle_lem(e)
392
+ when 'lg' then handle_lg(e)
393
+ when 'list' then handle_list(e)
394
+ when 'mulu' then handle_mulu(e)
395
+ when 'note' then handle_note(e)
396
+ when 'milestone' then handle_milestone(e)
397
+ when 'p' then handle_p(e)
398
+ when 'rdg' then handle_rdg(e)
399
+ when 'reg' then ''
400
+ when 'row' then handle_row(e)
401
+ when 'sic' then handle_sic(e)
402
+ when 'sg' then handle_sg(e)
403
+ when 't' then handle_t(e)
404
+ when 'tt' then handle_tt(e)
405
+ when 'table' then handle_table(e)
406
+ else traverse(e)
407
+ end
408
+ r
409
+ end
410
+
411
+ def handle_note(e)
412
+ n = e['n']
413
+ if e.has_attribute?('type')
414
+ t = e['type']
415
+ case t
416
+ when 'equivalent'
417
+ return ''
418
+ when 'orig'
419
+ return handle_note_orig(e)
420
+ when 'orig_biao'
421
+ return handle_note_orig(e, 'biao')
422
+ when 'orig_ke'
423
+ return handle_note_orig(e, 'ke')
424
+ when 'mod'
425
+ @pass << false
426
+ s = traverse(e)
427
+ @pass.pop
428
+ @back[@juan] += "<span class='footnote_cb' id='n#{n}'>#{s}</span>\n"
429
+ return "<a class='noteAnchor' href='#n#{n}'></a>"
430
+ when 'rest'
431
+ return ''
432
+ else
433
+ return '' if t.start_with?('cf')
434
+ end
435
+ end
436
+
437
+ if e.has_attribute?('resp')
438
+ return '' if e['resp'].start_with? 'CBETA'
439
+ end
440
+
441
+ if e.has_attribute?('place') && e['place']=='inline'
442
+ r = traverse(e)
443
+ return "<span class='doube-line-note'>#{r}</span>"
444
+ else
445
+ return traverse(e)
446
+ end
447
+ end
448
+
449
+ def handle_note_orig(e, anchor_type=nil)
450
+ n = e['n']
451
+ @pass << false
452
+ s = traverse(e)
453
+ @pass.pop
454
+ @back[@juan] += "<span class='footnote_orig' id='n#{n}'>#{s}</span>\n"
455
+
456
+ if @mod_notes.include? n
457
+ return ''
458
+ else
459
+ label = case anchor_type
460
+ when 'biao' then " data-label='標#{n[-2..-1]}'"
461
+ when 'ke' then " data-label='科#{n[-2..-1]}'"
462
+ else ''
463
+ end
464
+ return "<a class='noteAnchor' href='#n#{n}'#{label}></a>"
465
+ end
466
+ end
467
+
468
+ def handle_p(e)
469
+ r = '<p>'
470
+ r += "<span class='lineInfo'>#{@lb}</span>"
471
+ r += traverse(e)
472
+ r + '</p>'
473
+ end
474
+
475
+ def handle_rdg(e)
476
+ r = traverse(e)
477
+ w = e['wit'].scan(/【.*?】/)
478
+ @editions.merge w
479
+ "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
480
+ end
481
+
482
+ def handle_row(e)
483
+ "<div class='bip-table-row'>" + traverse(e) + "</div>"
484
+ end
485
+
486
+ def handle_sg(e)
487
+ '(' + traverse(e) + ')'
488
+ end
489
+
490
+ def handle_sic(e)
491
+ "<r w='#{@orig}' l='#{@lb}' w='#{@char_count}'>" + traverse(e) + "</r>"
492
+ end
493
+
494
+ def handle_sutra(xml_fn)
495
+ puts "convert sutra #{xml_fn}"
496
+ @editions = Set.new ["【CBETA】"]
497
+ @back = { 0 => '' }
498
+ @char_count = 1
499
+ @dila_note = 0
500
+ @div_count = 0
501
+ @in_l = false
502
+ @juan = 0
503
+ @lg_row_open = false
504
+ @mod_notes = Set.new
505
+ @next_line_buf = ''
506
+ @open_divs = []
507
+ @sutra_no = File.basename(xml_fn, ".xml")
508
+
509
+ text = parse_xml(xml_fn)
510
+
511
+ # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
512
+ text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
513
+
514
+ juans = text.split(/(<juan \d+>)/)
515
+ open = false
516
+ fo = nil
517
+ juan_no = nil
518
+ fn = ''
519
+ buf = ''
520
+ # 一卷一檔
521
+ juans.each { |j|
522
+ if j =~ /<juan (\d+)>$/
523
+ juan_no = $1.to_i
524
+ elsif juan_no.nil?
525
+ buf = j
526
+ else
527
+ write_juan(juan_no, buf+j)
528
+ buf = ''
529
+ end
530
+ }
531
+ end
532
+
533
+ def handle_t(e)
534
+ if e.has_attribute? 'place'
535
+ return '' if e['place'].include? 'foot'
536
+ end
537
+ r = traverse(e)
538
+
539
+ # <tt type="app"> 不是 悉漢雙行對照
540
+ return r if @tt_type == 'app'
541
+
542
+ # 處理雙行對照
543
+ i = e.xpath('../t').index(e)
544
+ case i
545
+ when 0
546
+ return r + ' '
547
+ when 1
548
+ @next_line_buf += r + ' '
549
+ return ''
550
+ else
551
+ return r
552
+ end
553
+ end
554
+
555
+ def handle_tt(e)
556
+ @tt_type = e['type']
557
+ traverse(e)
558
+ end
559
+
560
+ def handle_table(e)
561
+ "<div class='bip-table'>" + traverse(e) + "</div>"
562
+ end
563
+
564
+ def handle_text(e, mode)
565
+ s = e.content().chomp
566
+ return '' if s.empty?
567
+ return '' if e.parent.name == 'app'
568
+
569
+ # cbeta xml 文字之間會有多餘的換行
570
+ r = s.gsub(/[\n\r]/, '')
571
+
572
+ text_size = r.size
573
+
574
+ # 把 & 轉為 &amp;
575
+ r = CGI.escapeHTML(r)
576
+
577
+ # 正文區的文字外面要包 span
578
+ if @pass.last and mode=='html'
579
+ r = "<span class='t' l='#{@lb}' w='#{@char_count}'>#{r}</span>"
580
+ @char_count += text_size
581
+ end
582
+ r
583
+ end
584
+
585
+ def handle_vol(vol)
586
+ puts "convert volumn: #{vol}"
587
+
588
+ @orig = @cbeta.get_canon_symbol(vol[0])
589
+ abort "未處理底本" if @orig.nil?
590
+
591
+ @vol = vol
592
+ @series = vol[0]
593
+ @out_folder = File.join(@out_root, @series)
594
+ FileUtils::mkdir_p @out_folder
595
+
596
+ source = File.join(@xml_root, @series, vol)
597
+ Dir[source+"/*"].each { |f|
598
+ handle_sutra(f)
599
+ }
600
+ end
601
+
602
+ def handle_vols(v1, v2)
603
+ puts "convert volumns: #{v1}..#{v2}"
604
+ @series = v1[0]
605
+ folder = File.join(@xml_root, @series)
606
+ Dir.foreach(folder) { |vol|
607
+ next if vol < v1
608
+ next if vol > v2
609
+ handle_vol(vol)
610
+ }
611
+ end
612
+
613
+ def lem_note_cf(e)
614
+ # ex: T32n1670A.xml, p. 703a16
615
+ # <note type="cf1">K30n1002_p0257a01-a23</note>
616
+ refs = []
617
+ e.xpath('./note').each { |n|
618
+ if n.key?('type') and n['type'].start_with? 'cf'
619
+ s = n.content
620
+ if linehead_exist_in_cbeta(s)
621
+ s = "<span class='note_cf'>#{s}</span>"
622
+ end
623
+ refs << s
624
+ end
625
+ }
626
+ if refs.empty?
627
+ ''
628
+ else
629
+ '修訂依據:' + refs.join(';') + '。'
630
+ end
631
+ end
632
+
633
+ def lem_note_rdg(lem)
634
+ r = ''
635
+ app = lem.parent
636
+ @pass << false
637
+ app.xpath('rdg').each { |rdg|
638
+ if rdg['wit'].include? @orig
639
+ s = traverse(rdg, 'back')
640
+ s = MISSING if s.empty?
641
+ r += @orig + s
642
+ end
643
+ }
644
+ @pass.pop
645
+ r += '。' unless r.empty?
646
+ r
647
+ end
648
+
649
+ def linehead_exist_in_cbeta(s)
650
+ @xml_root
651
+ corpus = s[0]
652
+ if s.match(/^(([A-Z]\d+)n\d+[a-zA-Z]?).*$/)
653
+ sutra = $1
654
+ vol = $2
655
+ path = File.join(@xml_root, corpus, vol, sutra+'.xml')
656
+ return File.exist? path
657
+ else
658
+ return false
659
+ end
660
+ end
661
+
662
+ def open_xml(fn)
663
+ s = File.read(fn)
664
+
665
+ if fn.include? 'T16n0657'
666
+ # 這個地方 雙行夾註 跨兩行偈頌
667
+ # 把 lb 移到 note 結束之前
668
+ # 讓 lg-row 先結束,再結束雙行夾註
669
+ s.sub!(/(<\/note>)(\n<lb n="0206b29" ed="T"\/>)/, '\2\1')
670
+ end
671
+
672
+ # <milestone unit="juan"> 前面的 lb 屬於新的這一卷
673
+ s.gsub!(%r{((?:<pb [^>]+>\n?)?(?:<lb [^>]+>\n?)+)(<milestone [^>]*unit="juan"[^/>]*/>)}, '\2\1')
674
+
675
+ doc = Nokogiri::XML(s)
676
+ doc.remove_namespaces!()
677
+ doc
678
+ end
679
+
680
+ def read_mod_notes(doc)
681
+ doc.xpath("//note[@type='mod']").each { |e|
682
+ @mod_notes << e['n']
683
+ }
684
+ end
685
+
686
+ def parse_xml(xml_fn)
687
+ @pass = [false]
688
+
689
+ doc = open_xml(xml_fn)
690
+
691
+ e = doc.xpath("//titleStmt/title")[0]
692
+ @title = traverse(e, 'txt')
693
+ @title = @title.split()[-1]
694
+
695
+ read_mod_notes(doc)
696
+
697
+ root = doc.root()
698
+ body = root.xpath("text/body")[0]
699
+ @pass = [true]
700
+
701
+ text = traverse(body)
702
+ text
703
+ end
704
+
705
+ def to_html(e)
706
+ e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
707
+ end
708
+
709
+ def traverse(e, mode='html')
710
+ r = ''
711
+ e.children.each { |c|
712
+ s = handle_node(c, mode)
713
+ r += s
714
+ }
715
+ r
716
+ end
717
+
718
+ def write_juan(juan_no, html)
719
+ if @sutra_no.match(/^(T05|T06|T07)n0220/)
720
+ work = "T0220"
721
+ else
722
+ work = @sutra_no.sub(/^([A-Z])\d{2,3}n(.*)$/, '\1\2')
723
+ end
724
+ canon = work[0]
725
+ juan = "%03d" % juan_no
726
+ folder = File.join(@out_folder, work, juan)
727
+ FileUtils.remove_dir(folder, force=true)
728
+ FileUtils.makedirs folder
729
+ @editions.each do |ed|
730
+ frag = Nokogiri::HTML.fragment("<div id='body'>#{html}</div>")
731
+ frag.search("r").each do |node|
732
+ if node['w'] == ed
733
+ node.add_previous_sibling node.inner_html
734
+ end
735
+ node.remove
736
+ end
737
+ text = frag.to_html
738
+
739
+ fn = ed.sub(/^【(.*)】$/, '\1') + '.htm'
740
+ output_path = File.join(folder, fn)
741
+ text = <<eos
742
+ <html>
743
+ <head>
744
+ <meta http-equiv="Content-Type" content="text/html; charset=utf-8" />
745
+ <meta name="filename" content="#{fn}" />
746
+ <title>#{@title}</title>
747
+ </head>
748
+ <body>
749
+ #{text}
750
+ </body></html>
751
+ eos
752
+ File.write(output_path, text)
753
+ end
754
+ end
755
+
756
+ end