cbeta 1.3.6 → 2.0.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 593f41e3ed434a6e6cecb82d6ab138060bdc411b
4
- data.tar.gz: e66b25b39df32cf7dac8616742903865eb4dc1f3
3
+ metadata.gz: ff4869a9d67955fc89ddd9c5f6f8e88db19f598d
4
+ data.tar.gz: 716234a36f10298fa572d6c7fac10115316918ce
5
5
  SHA512:
6
- metadata.gz: b346f87c1eddb9908eebd99c55c7c5a2d92acf6e131b88323a9905ea0159134a22ee446d15dbb0160228d990dc723dfca5be39bcefb793915484a845d8c64ebe
7
- data.tar.gz: 657358264f56a387e2e80d84a1474b66f220e5cb4428e7995d6dfec867250777bd11f42045b9bdda20c1c01ed27322988e791c02560233a82b11f02ddd911c04
6
+ metadata.gz: 64cf5990e9579b23e70a861696862afaafb8738e30819cb3617e92b23a54753e33542a41a0473b7368a5cd1752ec517b9ab84a8bd9da3e017c9e901918825857
7
+ data.tar.gz: b42dadbd97ceb255e1300231b4f43148e1eebcce2c17a837ed91535ea6c9eed3e66021d3298770c698c1fe2520d3d9c5e723f39f301af7113fb119e71c94770f
data/lib/cbeta.rb CHANGED
@@ -121,7 +121,6 @@ require 'cbeta/bm_to_text'
121
121
  require 'cbeta/char_count'
122
122
  require 'cbeta/char_freq'
123
123
  require 'cbeta/html_to_pdf'
124
- require 'cbeta/p5a_to_epub'
125
124
  require 'cbeta/p5a_to_html'
126
125
  require 'cbeta/p5a_to_html_for_every_edition'
127
126
  require 'cbeta/p5a_to_html_for_pdf'
@@ -64,6 +64,23 @@ class CBETA::P5aToHTMLForEveryEdition
64
64
  end
65
65
 
66
66
  private
67
+
68
+ def before_parse_xml(xml_fn)
69
+ @back = { 0 => '' }
70
+ @back_orig = { 0 => '' }
71
+ @char_count = 1
72
+ @dila_note = 0
73
+ @div_count = 0
74
+ @in_l = false
75
+ @juan = 0
76
+ @lg_row_open = false
77
+ @mod_notes = Set.new
78
+ @next_line_buf = ''
79
+ @notes_mod = {}
80
+ @notes_orig = {}
81
+ @open_divs = []
82
+ @sutra_no = File.basename(xml_fn, ".xml")
83
+ end
67
84
 
68
85
  def convert_all
69
86
  Dir.entries(@xml_root).sort.each do |c|
@@ -71,6 +88,15 @@ class CBETA::P5aToHTMLForEveryEdition
71
88
  handle_collection(c)
72
89
  end
73
90
  end
91
+
92
+ def get_editions(doc)
93
+ r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
94
+ doc.xpath('//lem|//rdg').each do |e|
95
+ w = e['wit'].scan(/【.*?】/)
96
+ r.merge w
97
+ end
98
+ r
99
+ end
74
100
 
75
101
  def handle_anchor(e)
76
102
  id = e['id']
@@ -309,12 +335,17 @@ class CBETA::P5aToHTMLForEveryEdition
309
335
  end
310
336
 
311
337
  def handle_lem(e)
312
- w = e['wit'].scan(/【.*?】/)
313
- @editions.merge w
314
- w = w.join(' ')
338
+ # 沒有 rdg 的版本,用字同 lem
339
+ editions = Set.new @editions
340
+ e.xpath('./following-sibling::rdg').each do |rdg|
341
+ rdg['wit'].scan(/【.*?】/).each do |w|
342
+ editions.delete w
343
+ end
344
+ end
345
+ w = editions.to_a.join(' ')
315
346
 
316
347
  r = traverse(e)
317
- "<r w='#{w}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
348
+ "<r w='#{w}' l='#{@lb}'>#{r}</r>"
318
349
  end
319
350
 
320
351
  def handle_lg(e)
@@ -485,7 +516,6 @@ class CBETA::P5aToHTMLForEveryEdition
485
516
  def handle_rdg(e)
486
517
  r = traverse(e)
487
518
  w = e['wit'].scan(/【.*?】/)
488
- @editions.merge w
489
519
  "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
490
520
  end
491
521
 
@@ -503,21 +533,8 @@ class CBETA::P5aToHTMLForEveryEdition
503
533
 
504
534
  def handle_sutra(xml_fn)
505
535
  puts "convert sutra #{xml_fn}"
506
- @editions = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
507
- @back = { 0 => '' }
508
- @back_orig = { 0 => '' }
509
- @char_count = 1
510
- @dila_note = 0
511
- @div_count = 0
512
- @in_l = false
513
- @juan = 0
514
- @lg_row_open = false
515
- @mod_notes = Set.new
516
- @next_line_buf = ''
517
- @notes_mod = {}
518
- @notes_orig = {}
519
- @open_divs = []
520
- @sutra_no = File.basename(xml_fn, ".xml")
536
+
537
+ before_parse_xml(xml_fn)
521
538
 
522
539
  text = parse_xml(xml_fn)
523
540
 
@@ -600,6 +617,7 @@ class CBETA::P5aToHTMLForEveryEdition
600
617
 
601
618
  @orig = @cbeta.get_canon_symbol(vol[0])
602
619
  abort "未處理底本" if @orig.nil?
620
+ @orig_short = @orig.sub(/^【(.*)】$/, '\1')
603
621
 
604
622
  @vol = vol
605
623
  @series = vol[0]
@@ -693,6 +711,8 @@ class CBETA::P5aToHTMLForEveryEdition
693
711
  root = doc.root()
694
712
  body = root.xpath("text/body")[0]
695
713
  @pass = [true]
714
+
715
+ @editions = get_editions(doc)
696
716
 
697
717
  text = traverse(body)
698
718
  text
@@ -725,7 +745,7 @@ class CBETA::P5aToHTMLForEveryEdition
725
745
  @editions.each do |ed|
726
746
  frag = Nokogiri::HTML.fragment("<div id='body'>#{html}</div>")
727
747
  frag.search("r").each do |node|
728
- if node['w'] == ed
748
+ if node['w'].include? ed
729
749
  node.add_previous_sibling node.inner_html
730
750
  end
731
751
  node.remove
@@ -734,7 +754,11 @@ class CBETA::P5aToHTMLForEveryEdition
734
754
 
735
755
  back = html_back(juan_no, ed)
736
756
 
737
- fn = ed.sub(/^【(.*)】$/, '\1') + '.htm'
757
+ fn = ed.sub(/^【(.*)】$/, '\1')
758
+ if fn != 'CBETA' and fn != @orig_short
759
+ fn = @orig_short + '→' + fn
760
+ end
761
+ fn += '.htm'
738
762
  output_path = File.join(folder, fn)
739
763
  text = <<eos
740
764
  <html>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.6
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-14 00:00:00.000000000 Z
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -23,7 +23,6 @@ files:
23
23
  - lib/cbeta/gaiji.rb
24
24
  - lib/cbeta/html_to_pdf.rb
25
25
  - lib/cbeta/html_to_text.rb
26
- - lib/cbeta/p5a_to_epub.rb
27
26
  - lib/cbeta/p5a_to_html.rb
28
27
  - lib/cbeta/p5a_to_html_for_every_edition.rb
29
28
  - lib/cbeta/p5a_to_html_for_pdf.rb
@@ -1,872 +0,0 @@
1
- require 'cgi'
2
- require 'date'
3
- require 'fileutils'
4
- require 'json'
5
- require 'nokogiri'
6
- require 'set'
7
- require 'gepub'
8
- require 'pp'
9
-
10
- # Convert CBETA XML P5a to EPUB
11
- #
12
- # CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
13
- class CBETA::P5aToEPUB
14
- # 內容不輸出的元素
15
- PASS=['back', 'teiHeader']
16
-
17
- # 某版用字缺的符號
18
- MISSING = '-'
19
-
20
- SCRIPT_FOLDER = File.dirname(__FILE__)
21
- NAV_TEMPLATE = File.read(File.join(SCRIPT_FOLDER, '../data/epub-nav.xhtml'))
22
- MAIN = 'main.xhtml'
23
- DATA = File.join(SCRIPT_FOLDER, '../data')
24
-
25
- private_constant :PASS, :MISSING, :SCRIPT_FOLDER, :NAV_TEMPLATE, :MAIN, :DATA
26
-
27
- # @param temp_folder [String] 供 EPUB 暫存工作檔案的路徑
28
- # @option opts [Integer] :epub_version (3) EPUB 版本
29
- # @option opts [String] :graphic_base 圖檔路徑
30
- # * graphic_base/covers: 封面圖檔位置
31
- # * graphic_base/figures: 插圖圖檔位置
32
- # * graphic_base/sd-gif: 悉曇字圖檔位置
33
- # * graphic_base/rj-gif: 蘭札體圖檔位置
34
- # @option opts [String] :front_page 內文前可以加一份 HTML 檔,例如「編輯說明」
35
- # @option opts [String] :front_page_title 加在目錄的 front_page 標題
36
- # @option opts [String] :back_page 內文後可以加一份 HTML 檔,例如「版權聲明」
37
- # @option opts [String] :back_page_title 加在目錄的 back_page 標題
38
- # @option opts [Boolean] :juan_toc 目次中是否要有卷目次,預設為 true
39
- #
40
- # @example
41
- # options = {
42
- # epub_version: 3,
43
- # front_page: '/path/to/front_page.xhtml',
44
- # front_page_title: '編輯說明',
45
- # back_page: '/path/to/back_page.xhtml',
46
- # back_page_title: '贊助資訊',
47
- # graphic_base: '/path/to/grphic/files/root'
48
- # }
49
- # c = CBETA::P5aToEPUB.new('/path/to/temp/working/folder', options)
50
- # c.convert_folder('/path/to/xml/roo', '/path/for/output/epubs')
51
- def initialize(temp_folder, opts={})
52
- @temp_folder = temp_folder
53
- @settings = {
54
- epub_version: 3,
55
- juan_toc: true
56
- }
57
- @settings.merge!(opts)
58
- @cbeta = CBETA.new
59
- @gaijis = CBETA::Gaiji.new
60
-
61
- # 載入 unicode 1.1 字集列表
62
- fn = File.join(DATA, 'unicode-1.1.json')
63
- json = File.read(fn)
64
- @unicode1 = JSON.parse(json)
65
- end
66
-
67
- # 將某個 xml 轉為一個 EPUB
68
- # @param input_path [String] 輸入 XML 檔路徑
69
- # @param output_paath [String] 輸出 EPUB 檔路徑
70
- def convert_file(input_path, output_path)
71
- return false unless input_path.end_with? '.xml'
72
-
73
- @book_id = File.basename(input_path, ".xml")
74
-
75
- sutra_init
76
-
77
- handle_file(input_path)
78
- create_epub(output_path)
79
- end
80
-
81
- # 將某個資料夾下的每部作品都轉為一個對應的 EPUB。
82
- # 跨冊的作品也會合成一個 EPUB。
83
- #
84
- # @example
85
- # require 'cbeta'
86
- #
87
- # TEMP = '/temp/epub-work'
88
- # IMG = '/Users/ray/Documents/Projects/D道安/figures'
89
- #
90
- # c = CBETA::P5aToEPUB.new(TEMP, IMG)
91
- # c.convert_folder('/Users/ray/Documents/Projects/D道安/xml-p5a/DA', '/temp/cbeta-epub/DA')
92
- def convert_folder(input_folder, output_folder)
93
- puts "convert folder: #{input_folder} to #{output_folder}"
94
- @todo = {}
95
-
96
- # 先檢視整個資料夾,哪些是要多檔合一
97
- prepare_todo_list(input_folder, output_folder)
98
-
99
- @todo.each_pair do |k, v|
100
- convert_sutra(k, v[:xml_files], v[:epub])
101
- end
102
- end
103
-
104
- # 將多個 xml 檔案合成一個 EPUB
105
- #
106
- # @example 大般若經 跨三冊 合成一個 EPUB
107
- # require 'cbeta'
108
- #
109
- # TEMP = '/temp/epub-work'
110
- #
111
- # xml_files = [
112
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/05/T05n0220a.xml',
113
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/06/T06n0220b.xml',
114
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220c.xml',
115
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220d.xml',
116
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220e.xml',
117
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220f.xml',
118
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220g.xml',
119
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220h.xml',
120
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220i.xml',
121
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220j.xml',
122
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220k.xml',
123
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220l.xml',
124
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220m.xml',
125
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220n.xml',
126
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220o.xml',
127
- # ]
128
- #
129
- # c = CBETA::P5aToEPUB.new(TEMP)
130
- # c.convert_sutra('T0220', xml_files, '/temp/cbeta-epub/T0220.epub')
131
- def convert_sutra(book_id, xml_files, out)
132
- @book_id = book_id
133
- sutra_init
134
- xml_files.each { |f| handle_file(f) }
135
-
136
- if xml_files.size > 1
137
- @title.sub!(/^(.*)\(.*?\)$/, '\1')
138
- @title.sub!(/^(.*?)((.*?))+$/, '\1')
139
- end
140
- create_epub(out)
141
- end
142
-
143
- private
144
-
145
- def clear_temp_folder
146
- FileUtils.remove_dir(@temp_folder, force=true)
147
- FileUtils::mkdir_p @temp_folder
148
- end
149
-
150
- def copy_static_files(src, dest)
151
- dest = File.join(@temp_folder, dest)
152
- FileUtils.copy(src, dest)
153
- end
154
-
155
- def create_epub(output_path)
156
- if @settings[:front_page]
157
- copy_static_files(@settings[:front_page], 'front.xhtml')
158
- end
159
-
160
- if @settings[:back_page]
161
- copy_static_files(@settings[:back_page], 'back.xhtml')
162
- end
163
-
164
- src = File.join(DATA, 'epub.css')
165
- copy_static_files(src, 'cbeta.css')
166
-
167
- create_html_by_juan
168
- create_nav_html
169
-
170
- title = @title
171
- book_id = @book_id
172
- creator = @author
173
- builder = GEPUB::Builder.new {
174
- language 'zh-TW'
175
- unique_identifier "http://www.cbeta.org/#{book_id}", 'BookID', 'URL'
176
- title title
177
-
178
- creator creator
179
-
180
- contributors 'DILA'
181
-
182
- date Date.today.to_s
183
- }
184
-
185
- juan_dir = File.join(@temp_folder, 'juans')
186
- settings = @settings
187
- # in resources block, you can define resources by its relative path and datasource.
188
- # item creator methods are: files, file.
189
- builder.resources(:workdir => @temp_folder) {
190
- glob 'img/*'
191
- file 'cbeta.css'
192
-
193
- # this is navigation document.
194
- nav 'nav.xhtml'
195
-
196
- # ordered item. will be added to spine.
197
- ordered {
198
- file 'front.xhtml' if settings[:front_page]
199
-
200
- Dir.entries(juan_dir).sort.each do |f|
201
- next if f.start_with? '.'
202
- file "juans/#{f}"
203
- end
204
-
205
- file 'back.xhtml' if settings[:back_page]
206
- }
207
- }
208
- builder.book.version = @settings[:epub_version]
209
-
210
- canon = book_id.sub(/^([A-Z]{1,2}).*$/, '\1')
211
- cover = File.join(settings[:graphic_base], 'covers', canon, "#{book_id}.jpg")
212
- if File.exist? cover
213
- File.open(cover) do |io|
214
- builder.book.add_item(cover, io).cover_image
215
- end
216
- end
217
-
218
- builder.generate_epub(output_path)
219
- puts "output: #{output_path}\n\n"
220
- end
221
-
222
- def create_html_by_juan
223
- juans = @main_text.split(/(<juan \d+>)/)
224
- open = false
225
- fo = nil
226
- juan_no = nil
227
- fn = ''
228
- buf = ''
229
- # 一卷一檔
230
- juans.each do |j|
231
- if j =~ /<juan (\d+)>$/
232
- juan_no = $1.to_i
233
- fn = "%03d.xhtml" % juan_no
234
- output_path = File.join(@temp_folder, 'juans', fn)
235
- fo = File.open(output_path, 'w')
236
- open = true
237
- s = <<eos
238
- <html xmlns="http://www.w3.org/1999/xhtml">
239
- <head>
240
- <meta charset="utf-8" />
241
- <title>#{@title}</title>
242
- <link rel="stylesheet" type="text/css" href="../cbeta.css" />
243
- </head>
244
- <body>
245
- <div id='body'>
246
- eos
247
- fo.write(s)
248
- fo.write(buf)
249
- buf = ''
250
- elsif open
251
- fo.write(j + "\n</div><!-- end of div[@id='body'] -->\n")
252
- fo.write('</body></html>')
253
- fo.close
254
- else
255
- buf = j
256
- end
257
- end
258
- end
259
-
260
- def create_nav_html
261
- if @settings[:back_page_title]
262
- s = @settings[:back_page_title]
263
- @nav_root_ol.add_child("<li><a href='back.xhtml'>#{s}</a></li>")
264
- end
265
-
266
- #s = @nav_root_ol.to_xml(indent: 2, encoding: 'UTF-8', pertty: true, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
267
- s = @nav_root_ol.to_xml
268
-
269
- #s += "" % @toc_juan
270
-
271
- fn = File.join(@temp_folder, 'nav.xhtml')
272
- s = NAV_TEMPLATE % s
273
- File.write(fn, s)
274
- end
275
-
276
- def handle_anchor(e)
277
- if e.has_attribute?('type')
278
- if e['type'] == 'circle'
279
- return '◎'
280
- end
281
- end
282
-
283
- ''
284
- end
285
-
286
- def handle_app(e)
287
- traverse(e)
288
- end
289
-
290
- def handle_byline(e)
291
- r = '<p class="byline">'
292
- r += traverse(e)
293
- r + '</p>'
294
- end
295
-
296
- def handle_cell(e)
297
- doc = Nokogiri::XML::Document.new
298
- cell = doc.create_element('td')
299
- cell['rowspan'] = e['rows'] if e.key? 'rows'
300
- cell['colspan'] = e['cols'] if e.key? 'cols'
301
- cell.inner_html = traverse(e)
302
- to_html(cell) + "\n"
303
- end
304
-
305
- def handle_corr(e)
306
- "<span class='corr'>" + traverse(e) + "</span>"
307
- end
308
-
309
- def handle_div(e)
310
- if e.has_attribute? 'type'
311
- @open_divs << e
312
- r = traverse(e)
313
- @open_divs.pop
314
- return "<div class='div-#{e['type']}'>#{r}</div>"
315
- else
316
- return traverse(e)
317
- end
318
- end
319
-
320
- def handle_figure(e)
321
- "<div class='figure'>%s</div>" % traverse(e)
322
- end
323
-
324
- def handle_g(e, mode)
325
- # if 有 <mapping type="unicode">
326
- # if 不在 Unicode Extension C, D, E 範圍裡
327
- # 直接採用
328
- # else
329
- # 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
330
- # else if 有 <mapping type="normal_unicode">
331
- # 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
332
- # else if 有 normalized form
333
- # 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
334
- # else
335
- # 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
336
- gid = e['ref'][1..-1]
337
- g = @gaijis[gid]
338
- abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
339
- zzs = g['zzs']
340
-
341
-
342
- if gid.start_with?('SD')
343
- case gid
344
- when 'SD-E35A'
345
- return '('
346
- when 'SD-E35B'
347
- return ')'
348
- else
349
- return g['roman'] if g.key? 'roman'
350
-
351
- if mode == 'txt'
352
- puts "警告:純文字模式出現悉曇字:#{gid}"
353
- return gid
354
- else
355
- # 如果沒有羅馬轉寫就顯示圖檔
356
- src = File.join(@settings[:graphic_base], 'sd-gif', gid[3..4], gid+'.gif')
357
- basename = File.basename(src)
358
- dest = File.join(@temp_folder, 'img', basename)
359
- FileUtils.copy(src, dest)
360
- return "<img src='../img/#{basename}' />"
361
- end
362
- end
363
- end
364
-
365
- if gid.start_with?('RJ')
366
- return g['roman'] if g.key? 'roman'
367
-
368
- if mode == 'txt'
369
- puts "警告:純文字模式出現蘭札體:#{gid}"
370
- return gid
371
- else
372
- # 如果沒有羅馬轉寫就顯示圖檔
373
- src = File.join(@settings[:graphic_base], 'rj-gif', gid[3..4], gid+'.gif')
374
- basename = File.basename(src)
375
- dest = File.join(@temp_folder, 'img', basename)
376
- FileUtils.copy(src, dest)
377
- return "<img src='../img/#{basename}' />"
378
- end
379
- end
380
-
381
- if mode == 'txt'
382
- abort "缺組字式:#{g}" if zzs.nil?
383
- return zzs
384
- end
385
-
386
- default = ''
387
- if g.has_key?('unicode')
388
- if @unicode1.include?(g['unicode'])
389
- return g['unicode-char'] # unicode 1.1 直接用
390
- end
391
- end
392
-
393
- zzs
394
- end
395
-
396
- def handle_graphic(e)
397
- url = e['url']
398
- url.sub!(/^.*(figures\/.*)$/, '\1')
399
-
400
- src = File.join(@settings[:graphic_base], url)
401
- basename = File.basename(src)
402
- dest = File.join(@temp_folder, 'img', basename)
403
- FileUtils.copy(src, dest)
404
-
405
- "<img src='../img/#{basename}' />"
406
- end
407
-
408
- def handle_head(e)
409
- r = ''
410
- unless e['type'] == 'added'
411
- i = @open_divs.size
412
- r = "<p class='h#{i}'>%s</p>" % traverse(e)
413
- end
414
- r
415
- end
416
-
417
- def handle_item(e)
418
- "<li>%s</li>\n" % traverse(e)
419
- end
420
-
421
- def handle_juan(e)
422
- "<p class='juan'>%s</p>" % traverse(e)
423
- end
424
-
425
- def handle_l(e)
426
- if @lg_type == 'abnormal'
427
- return traverse(e)
428
- end
429
-
430
- @in_l = true
431
-
432
- doc = Nokogiri::XML::Document.new
433
- cell = doc.create_element('div')
434
- cell['class'] = 'lg-cell'
435
- cell.inner_html = traverse(e)
436
-
437
- if @first_l
438
- parent = e.parent()
439
- if parent.has_attribute?('rend')
440
- indent = parent['rend'].scan(/text-indent:[^:]*/)
441
- unless indent.empty?
442
- cell['style'] = indent[0]
443
- end
444
- end
445
- @first_l = false
446
- end
447
- r = to_html(cell)
448
-
449
- unless @lg_row_open
450
- r = "\n<div class='lg-row'>" + r
451
- @lg_row_open = true
452
- end
453
- @in_l = false
454
- r
455
- end
456
-
457
- def handle_lb(e)
458
- # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
459
- return '' if e['ed'] != @series
460
-
461
- @lb = e['n']
462
- r = ''
463
- #if e.parent.name == 'lg' and $lg_row_open
464
- if @lg_row_open && !@in_l
465
- # 每行偈頌放在一個 lg-row 裡面
466
- # T46n1937, p. 914a01, l 包雙行夾註跨行
467
- # T20n1092, 337c16, lb 在 l 中間,不結束 lg-row
468
- r += "</div><!-- end of lg-row -->"
469
- @lg_row_open = false
470
- end
471
- unless @next_line_buf.empty?
472
- r += @next_line_buf
473
- @next_line_buf = ''
474
- end
475
- r
476
- end
477
-
478
- def handle_lem(e)
479
- r = ''
480
- w = e['wit']
481
- if w.include? 'CBETA' and not w.include? @orig
482
- r = "<span class='corr'>%s</span>" % traverse(e)
483
- else
484
- r = traverse(e)
485
- end
486
- r
487
- end
488
-
489
- def handle_lg(e)
490
- r = ''
491
- @lg_type = e['type']
492
- if @lg_type == 'abnormal'
493
- r = "<p class='lg-abnormal'>" + traverse(e) + "</p>"
494
- else
495
- @first_l = true
496
- doc = Nokogiri::XML::Document.new
497
- node = doc.create_element('div')
498
- node['class'] = 'lg'
499
- if e.has_attribute?('rend')
500
- rend = e['rend'].gsub(/text-indent:[^:]*/, '')
501
- node['style'] = rend
502
- end
503
- @lg_row_open = false
504
- node.inner_html = traverse(e)
505
- if @lg_row_open
506
- node.inner_html += '</div><!-- end of lg -->'
507
- @lg_row_open = false
508
- end
509
- r = "\n" + to_html(node)
510
- end
511
- r
512
- end
513
-
514
- def handle_list(e)
515
- doc = Nokogiri::XML::Document.new
516
- node = doc.create_element('ul')
517
- node.inner_html = traverse(e)
518
- if e.key? 'rendition'
519
- node['class'] = e['rendition']
520
- end
521
- to_html(node)
522
- end
523
-
524
- def handle_milestone(e)
525
- r = ''
526
- if e['unit'] == 'juan'
527
- r += "</div>" * @open_divs.size # 如果有 div 跨卷,要先結束, ex: T55n2154, p. 680a29, 跨 19, 20 兩卷
528
- @juan += 1
529
- r += "<juan #{@juan}>"
530
- @open_divs.each { |d|
531
- r += "<div class='#{d['type']}'>"
532
- }
533
- end
534
- r
535
- end
536
-
537
- def handle_mulu(e)
538
- @mulu_count += 1
539
- fn = "juans/%03d.xhtml" % @juan
540
- if e['type'] == '卷'
541
- if @settings[:juan_toc]
542
- label = e['n']
543
- @juan_nav.add_child("<li><a href='#{fn}#mulu#{@mulu_count}'>#{label}</a></li>")
544
- end
545
- else
546
- level = e['level'].to_i
547
- while @current_nav.size > (level+1)
548
- @current_nav.pop
549
- end
550
-
551
- label = traverse(e, 'txt')
552
- li = @current_nav.last.add_child("<li><a href='#{fn}#mulu#{@mulu_count}'>#{label}</a></li>").first
553
- ol = li.add_child('<ol></ol>').first
554
- @current_nav << ol
555
- end
556
- "<a id='mulu#{@mulu_count}' />"
557
- end
558
-
559
- def handle_node(e, mode)
560
- return '' if e.comment?
561
- return handle_text(e, mode) if e.text?
562
- return '' if PASS.include?(e.name)
563
-
564
- r = case e.name
565
- when 'anchor' then handle_anchor(e)
566
- when 'app' then handle_app(e)
567
- when 'byline' then handle_byline(e)
568
- when 'cell' then handle_cell(e)
569
- when 'corr' then handle_corr(e)
570
- when 'div' then handle_div(e)
571
- when 'figure' then handle_figure(e)
572
- when 'foreign' then ''
573
- when 'g' then handle_g(e, mode)
574
- when 'graphic' then handle_graphic(e)
575
- when 'head' then handle_head(e)
576
- when 'item' then handle_item(e)
577
- when 'juan' then handle_juan(e)
578
- when 'l' then handle_l(e)
579
- when 'lb' then handle_lb(e)
580
- when 'lem' then handle_lem(e)
581
- when 'lg' then handle_lg(e)
582
- when 'list' then handle_list(e)
583
- when 'mulu' then handle_mulu(e)
584
- when 'note' then handle_note(e)
585
- when 'milestone' then handle_milestone(e)
586
- when 'p' then handle_p(e)
587
- when 'rdg' then ''
588
- when 'reg' then ''
589
- when 'row' then handle_row(e)
590
- when 'sic' then ''
591
- when 'sg' then handle_sg(e)
592
- when 't' then handle_t(e)
593
- when 'tt' then handle_tt(e)
594
- when 'table' then handle_table(e)
595
- else traverse(e)
596
- end
597
- r
598
- end
599
-
600
- def handle_note(e)
601
- n = e['n']
602
- if e.has_attribute?('type')
603
- t = e['type']
604
- case t
605
- when 'equivalent'
606
- return ''
607
- when 'orig'
608
- return ''
609
- when 'orig_biao'
610
- return ''
611
- when 'orig_ke'
612
- return ''
613
- when 'mod'
614
- return ""
615
- when 'rest'
616
- return ''
617
- else
618
- return '' if t.start_with?('cf')
619
- end
620
- end
621
-
622
- if e.has_attribute?('resp')
623
- return '' if e['resp'].start_with? 'CBETA'
624
- end
625
-
626
- if e.has_attribute?('place') && e['place']=='inline'
627
- r = traverse(e)
628
- return "(#{r})"
629
- else
630
- return traverse(e)
631
- end
632
- end
633
-
634
- def handle_p(e)
635
- r = "<div class='p'>\n"
636
- r += traverse(e)
637
- r + "</div>\n"
638
- end
639
-
640
- def handle_row(e)
641
- "<tr>" + traverse(e) + "</tr>\n"
642
- end
643
-
644
- def handle_sg(e)
645
- '(' + traverse(e) + ')'
646
- end
647
-
648
- def handle_file(xml_fn)
649
- puts "read #{xml_fn}"
650
- @in_l = false
651
- @lg_row_open = false
652
- @mod_notes = Set.new
653
- @next_line_buf = ''
654
- @open_divs = []
655
-
656
- if @book_id.start_with? 'DA'
657
- @orig = nil?
658
- else
659
- @orig = @cbeta.get_canon_abbr(@book_id[0])
660
- abort "未處理底本: #{@book_id[0]}" if @orig.nil?
661
- end
662
-
663
- text = parse_xml(xml_fn)
664
-
665
- # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
666
- text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
667
-
668
- @main_text += text
669
- end
670
-
671
- def handle_t(e)
672
- if e.has_attribute? 'place'
673
- return '' if e['place'].include? 'foot'
674
- end
675
- r = traverse(e)
676
-
677
- # <tt type="app"> 不是 悉漢雙行對照
678
- return r if @tt_type == 'app'
679
-
680
- # 處理雙行對照
681
- i = e.xpath('../t').index(e)
682
- case i
683
- when 0
684
- return r + ' '
685
- when 1
686
- @next_line_buf += r + ' '
687
- return ''
688
- else
689
- return r
690
- end
691
- end
692
-
693
- def handle_tt(e)
694
- @tt_type = e['type']
695
- traverse(e)
696
- end
697
-
698
- def handle_table(e)
699
- "<table>" + traverse(e) + "</table>"
700
- end
701
-
702
- def handle_text(e, mode)
703
- s = e.content().chomp
704
- return '' if s.empty?
705
- return '' if e.parent.name == 'app'
706
-
707
- # cbeta xml 文字之間會有多餘的換行
708
- r = s.gsub(/[\n\r]/, '')
709
-
710
- # 把 & 轉為 &amp;
711
- CGI.escapeHTML(r)
712
- end
713
-
714
- def lem_note_cf(e)
715
- # ex: T32n1670A.xml, p. 703a16
716
- # <note type="cf1">K30n1002_p0257a01-a23</note>
717
- refs = []
718
- e.xpath('./note').each { |n|
719
- if n.key?('type') and n['type'].start_with? 'cf'
720
- refs << n.content
721
- end
722
- }
723
- if refs.empty?
724
- ''
725
- else
726
- '修訂依據:' + refs.join(';') + '。'
727
- end
728
- end
729
-
730
- def lem_note_rdg(lem)
731
- r = ''
732
- app = lem.parent
733
- @pass << false
734
- app.xpath('rdg').each { |rdg|
735
- if rdg['wit'].include? @orig
736
- s = traverse(rdg, 'back')
737
- s = MISSING if s.empty?
738
- r += @orig + s
739
- end
740
- }
741
- @pass.pop
742
- r += '。' unless r.empty?
743
- r
744
- end
745
-
746
- def sutra_init
747
- clear_temp_folder
748
-
749
- s = NAV_TEMPLATE % '<ol></ol>'
750
- @nav_doc = Nokogiri::XML(s)
751
-
752
- @nav_doc.remove_namespaces!()
753
- @nav_root_ol = @nav_doc.at_xpath('//ol')
754
- @current_nav = [@nav_root_ol]
755
-
756
- if @settings[:front_page_title]
757
- @nav_root_ol.add_child("<li><a href='front.xhtml'>編輯說明</a></li>")
758
- end
759
-
760
- li = @nav_root_ol.add_child("<li><a href='#'>章節目次</a></li>").first
761
- ol = li.add_child('<ol></ol>').first
762
- @current_nav << ol
763
-
764
- if @settings[:juan_toc]
765
- li = @nav_root_ol.add_child("<li><a href='#'>卷目次</a></li>").first
766
- @juan_nav = li.add_child('<ol></ol>').first
767
- end
768
-
769
- @mulu_count = 0
770
- @main_text = ''
771
- @dila_note = 0
772
- @toc_juan = '' # 卷目次
773
- @juan = 0
774
-
775
- FileUtils::mkdir_p File.join(@temp_folder, 'img')
776
- FileUtils::mkdir_p File.join(@temp_folder, 'juans')
777
- end
778
-
779
- def open_xml(fn)
780
- s = File.read(fn)
781
-
782
- if fn.include? 'T16n0657'
783
- # 這個地方 雙行夾註 跨兩行偈頌
784
- # 把 lb 移到 note 結束之前
785
- # 讓 lg-row 先結束,再結束雙行夾註
786
- s.sub!(/(<\/note>)(\n<lb n="0206b29" ed="T"\/>)/, '\2\1')
787
- end
788
-
789
- # <milestone unit="juan"> 前面的 lb 屬於新的這一卷
790
- s.gsub!(%r{((?:<pb [^>]+>\n?)?(?:<lb [^>]+>\n?)+)(<milestone [^>]*unit="juan"[^/>]*/>)}, '\2\1')
791
-
792
- doc = Nokogiri::XML(s)
793
- doc.remove_namespaces!()
794
- doc
795
- end
796
-
797
- def read_mod_notes(doc)
798
- doc.xpath("//note[@type='mod']").each { |e|
799
- @mod_notes << e['n']
800
- }
801
- end
802
-
803
- def parse_xml(xml_fn)
804
- @pass = [false]
805
-
806
- doc = open_xml(xml_fn)
807
-
808
- e = doc.xpath("//titleStmt/title")[0]
809
- @title = traverse(e, 'txt')
810
- @title = @title.split()[-1]
811
-
812
- @author = doc.at_xpath("//titleStmt/author").text
813
-
814
- read_mod_notes(doc)
815
-
816
- root = doc.root()
817
- body = root.xpath("text/body")[0]
818
- @pass = [true]
819
-
820
- text = traverse(body)
821
- text
822
- end
823
-
824
- def prepare_todo_list(input_folder, output_folder)
825
- Dir.foreach(input_folder) do |f|
826
- next if f.start_with? '.'
827
- p1 = File.join(input_folder, f)
828
- if File.file?(p1)
829
- work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
830
- work = 'T0220' if work.start_with? 'T0220'
831
- unless @todo.key? work
832
- @todo[work] = { xml_files: [] }
833
- end
834
- hash = @todo[work]
835
- hash[:xml_files] << p1
836
-
837
- folders = output_folder.split('/')
838
- folders.pop if folders[-1].match(/^[A-Z]{1,2}\d{2,3}$/)
839
- folder = folders.join('/')
840
- FileUtils::mkdir_p folder
841
- hash[:epub] = File.join(folder, "#{work}.epub")
842
- else
843
- p2 = File.join(output_folder, f)
844
- prepare_todo_list(p1, p2)
845
- end
846
- end
847
- end
848
-
849
- def remove_empty_nav(node_list)
850
- node_list.each do |n|
851
- if n[:nav].empty?
852
- n.delete(:nav)
853
- else
854
- remove_empty_nav(n[:nav])
855
- end
856
- end
857
- end
858
-
859
- def to_html(e)
860
- e.to_xml(encoding: 'UTF-8', pertty: true, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
861
- end
862
-
863
- def traverse(e, mode='html')
864
- r = ''
865
- e.children.each { |c|
866
- s = handle_node(c, mode)
867
- r += s
868
- }
869
- r
870
- end
871
-
872
- end