cbeta 1.3.6 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 593f41e3ed434a6e6cecb82d6ab138060bdc411b
4
- data.tar.gz: e66b25b39df32cf7dac8616742903865eb4dc1f3
3
+ metadata.gz: ff4869a9d67955fc89ddd9c5f6f8e88db19f598d
4
+ data.tar.gz: 716234a36f10298fa572d6c7fac10115316918ce
5
5
  SHA512:
6
- metadata.gz: b346f87c1eddb9908eebd99c55c7c5a2d92acf6e131b88323a9905ea0159134a22ee446d15dbb0160228d990dc723dfca5be39bcefb793915484a845d8c64ebe
7
- data.tar.gz: 657358264f56a387e2e80d84a1474b66f220e5cb4428e7995d6dfec867250777bd11f42045b9bdda20c1c01ed27322988e791c02560233a82b11f02ddd911c04
6
+ metadata.gz: 64cf5990e9579b23e70a861696862afaafb8738e30819cb3617e92b23a54753e33542a41a0473b7368a5cd1752ec517b9ab84a8bd9da3e017c9e901918825857
7
+ data.tar.gz: b42dadbd97ceb255e1300231b4f43148e1eebcce2c17a837ed91535ea6c9eed3e66021d3298770c698c1fe2520d3d9c5e723f39f301af7113fb119e71c94770f
data/lib/cbeta.rb CHANGED
@@ -121,7 +121,6 @@ require 'cbeta/bm_to_text'
121
121
  require 'cbeta/char_count'
122
122
  require 'cbeta/char_freq'
123
123
  require 'cbeta/html_to_pdf'
124
- require 'cbeta/p5a_to_epub'
125
124
  require 'cbeta/p5a_to_html'
126
125
  require 'cbeta/p5a_to_html_for_every_edition'
127
126
  require 'cbeta/p5a_to_html_for_pdf'
@@ -64,6 +64,23 @@ class CBETA::P5aToHTMLForEveryEdition
64
64
  end
65
65
 
66
66
  private
67
+
68
+ def before_parse_xml(xml_fn)
69
+ @back = { 0 => '' }
70
+ @back_orig = { 0 => '' }
71
+ @char_count = 1
72
+ @dila_note = 0
73
+ @div_count = 0
74
+ @in_l = false
75
+ @juan = 0
76
+ @lg_row_open = false
77
+ @mod_notes = Set.new
78
+ @next_line_buf = ''
79
+ @notes_mod = {}
80
+ @notes_orig = {}
81
+ @open_divs = []
82
+ @sutra_no = File.basename(xml_fn, ".xml")
83
+ end
67
84
 
68
85
  def convert_all
69
86
  Dir.entries(@xml_root).sort.each do |c|
@@ -71,6 +88,15 @@ class CBETA::P5aToHTMLForEveryEdition
71
88
  handle_collection(c)
72
89
  end
73
90
  end
91
+
92
+ def get_editions(doc)
93
+ r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
94
+ doc.xpath('//lem|//rdg').each do |e|
95
+ w = e['wit'].scan(/【.*?】/)
96
+ r.merge w
97
+ end
98
+ r
99
+ end
74
100
 
75
101
  def handle_anchor(e)
76
102
  id = e['id']
@@ -309,12 +335,17 @@ class CBETA::P5aToHTMLForEveryEdition
309
335
  end
310
336
 
311
337
  def handle_lem(e)
312
- w = e['wit'].scan(/【.*?】/)
313
- @editions.merge w
314
- w = w.join(' ')
338
+ # 沒有 rdg 的版本,用字同 lem
339
+ editions = Set.new @editions
340
+ e.xpath('./following-sibling::rdg').each do |rdg|
341
+ rdg['wit'].scan(/【.*?】/).each do |w|
342
+ editions.delete w
343
+ end
344
+ end
345
+ w = editions.to_a.join(' ')
315
346
 
316
347
  r = traverse(e)
317
- "<r w='#{w}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
348
+ "<r w='#{w}' l='#{@lb}'>#{r}</r>"
318
349
  end
319
350
 
320
351
  def handle_lg(e)
@@ -485,7 +516,6 @@ class CBETA::P5aToHTMLForEveryEdition
485
516
  def handle_rdg(e)
486
517
  r = traverse(e)
487
518
  w = e['wit'].scan(/【.*?】/)
488
- @editions.merge w
489
519
  "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
490
520
  end
491
521
 
@@ -503,21 +533,8 @@ class CBETA::P5aToHTMLForEveryEdition
503
533
 
504
534
  def handle_sutra(xml_fn)
505
535
  puts "convert sutra #{xml_fn}"
506
- @editions = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
507
- @back = { 0 => '' }
508
- @back_orig = { 0 => '' }
509
- @char_count = 1
510
- @dila_note = 0
511
- @div_count = 0
512
- @in_l = false
513
- @juan = 0
514
- @lg_row_open = false
515
- @mod_notes = Set.new
516
- @next_line_buf = ''
517
- @notes_mod = {}
518
- @notes_orig = {}
519
- @open_divs = []
520
- @sutra_no = File.basename(xml_fn, ".xml")
536
+
537
+ before_parse_xml(xml_fn)
521
538
 
522
539
  text = parse_xml(xml_fn)
523
540
 
@@ -600,6 +617,7 @@ class CBETA::P5aToHTMLForEveryEdition
600
617
 
601
618
  @orig = @cbeta.get_canon_symbol(vol[0])
602
619
  abort "未處理底本" if @orig.nil?
620
+ @orig_short = @orig.sub(/^【(.*)】$/, '\1')
603
621
 
604
622
  @vol = vol
605
623
  @series = vol[0]
@@ -693,6 +711,8 @@ class CBETA::P5aToHTMLForEveryEdition
693
711
  root = doc.root()
694
712
  body = root.xpath("text/body")[0]
695
713
  @pass = [true]
714
+
715
+ @editions = get_editions(doc)
696
716
 
697
717
  text = traverse(body)
698
718
  text
@@ -725,7 +745,7 @@ class CBETA::P5aToHTMLForEveryEdition
725
745
  @editions.each do |ed|
726
746
  frag = Nokogiri::HTML.fragment("<div id='body'>#{html}</div>")
727
747
  frag.search("r").each do |node|
728
- if node['w'] == ed
748
+ if node['w'].include? ed
729
749
  node.add_previous_sibling node.inner_html
730
750
  end
731
751
  node.remove
@@ -734,7 +754,11 @@ class CBETA::P5aToHTMLForEveryEdition
734
754
 
735
755
  back = html_back(juan_no, ed)
736
756
 
737
- fn = ed.sub(/^【(.*)】$/, '\1') + '.htm'
757
+ fn = ed.sub(/^【(.*)】$/, '\1')
758
+ if fn != 'CBETA' and fn != @orig_short
759
+ fn = @orig_short + '→' + fn
760
+ end
761
+ fn += '.htm'
738
762
  output_path = File.join(folder, fn)
739
763
  text = <<eos
740
764
  <html>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.6
4
+ version: 2.0.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-14 00:00:00.000000000 Z
11
+ date: 2015-12-16 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -23,7 +23,6 @@ files:
23
23
  - lib/cbeta/gaiji.rb
24
24
  - lib/cbeta/html_to_pdf.rb
25
25
  - lib/cbeta/html_to_text.rb
26
- - lib/cbeta/p5a_to_epub.rb
27
26
  - lib/cbeta/p5a_to_html.rb
28
27
  - lib/cbeta/p5a_to_html_for_every_edition.rb
29
28
  - lib/cbeta/p5a_to_html_for_pdf.rb
@@ -1,872 +0,0 @@
1
- require 'cgi'
2
- require 'date'
3
- require 'fileutils'
4
- require 'json'
5
- require 'nokogiri'
6
- require 'set'
7
- require 'gepub'
8
- require 'pp'
9
-
10
- # Convert CBETA XML P5a to EPUB
11
- #
12
- # CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
13
- class CBETA::P5aToEPUB
14
- # 內容不輸出的元素
15
- PASS=['back', 'teiHeader']
16
-
17
- # 某版用字缺的符號
18
- MISSING = '-'
19
-
20
- SCRIPT_FOLDER = File.dirname(__FILE__)
21
- NAV_TEMPLATE = File.read(File.join(SCRIPT_FOLDER, '../data/epub-nav.xhtml'))
22
- MAIN = 'main.xhtml'
23
- DATA = File.join(SCRIPT_FOLDER, '../data')
24
-
25
- private_constant :PASS, :MISSING, :SCRIPT_FOLDER, :NAV_TEMPLATE, :MAIN, :DATA
26
-
27
- # @param temp_folder [String] 供 EPUB 暫存工作檔案的路徑
28
- # @option opts [Integer] :epub_version (3) EPUB 版本
29
- # @option opts [String] :graphic_base 圖檔路徑
30
- # * graphic_base/covers: 封面圖檔位置
31
- # * graphic_base/figures: 插圖圖檔位置
32
- # * graphic_base/sd-gif: 悉曇字圖檔位置
33
- # * graphic_base/rj-gif: 蘭札體圖檔位置
34
- # @option opts [String] :front_page 內文前可以加一份 HTML 檔,例如「編輯說明」
35
- # @option opts [String] :front_page_title 加在目錄的 front_page 標題
36
- # @option opts [String] :back_page 內文後可以加一份 HTML 檔,例如「版權聲明」
37
- # @option opts [String] :back_page_title 加在目錄的 back_page 標題
38
- # @option opts [Boolean] :juan_toc 目次中是否要有卷目次,預設為 true
39
- #
40
- # @example
41
- # options = {
42
- # epub_version: 3,
43
- # front_page: '/path/to/front_page.xhtml',
44
- # front_page_title: '編輯說明',
45
- # back_page: '/path/to/back_page.xhtml',
46
- # back_page_title: '贊助資訊',
47
- # graphic_base: '/path/to/grphic/files/root'
48
- # }
49
- # c = CBETA::P5aToEPUB.new('/path/to/temp/working/folder', options)
50
- # c.convert_folder('/path/to/xml/roo', '/path/for/output/epubs')
51
- def initialize(temp_folder, opts={})
52
- @temp_folder = temp_folder
53
- @settings = {
54
- epub_version: 3,
55
- juan_toc: true
56
- }
57
- @settings.merge!(opts)
58
- @cbeta = CBETA.new
59
- @gaijis = CBETA::Gaiji.new
60
-
61
- # 載入 unicode 1.1 字集列表
62
- fn = File.join(DATA, 'unicode-1.1.json')
63
- json = File.read(fn)
64
- @unicode1 = JSON.parse(json)
65
- end
66
-
67
- # 將某個 xml 轉為一個 EPUB
68
- # @param input_path [String] 輸入 XML 檔路徑
69
- # @param output_paath [String] 輸出 EPUB 檔路徑
70
- def convert_file(input_path, output_path)
71
- return false unless input_path.end_with? '.xml'
72
-
73
- @book_id = File.basename(input_path, ".xml")
74
-
75
- sutra_init
76
-
77
- handle_file(input_path)
78
- create_epub(output_path)
79
- end
80
-
81
- # 將某個資料夾下的每部作品都轉為一個對應的 EPUB。
82
- # 跨冊的作品也會合成一個 EPUB。
83
- #
84
- # @example
85
- # require 'cbeta'
86
- #
87
- # TEMP = '/temp/epub-work'
88
- # IMG = '/Users/ray/Documents/Projects/D道安/figures'
89
- #
90
- # c = CBETA::P5aToEPUB.new(TEMP, IMG)
91
- # c.convert_folder('/Users/ray/Documents/Projects/D道安/xml-p5a/DA', '/temp/cbeta-epub/DA')
92
- def convert_folder(input_folder, output_folder)
93
- puts "convert folder: #{input_folder} to #{output_folder}"
94
- @todo = {}
95
-
96
- # 先檢視整個資料夾,哪些是要多檔合一
97
- prepare_todo_list(input_folder, output_folder)
98
-
99
- @todo.each_pair do |k, v|
100
- convert_sutra(k, v[:xml_files], v[:epub])
101
- end
102
- end
103
-
104
- # 將多個 xml 檔案合成一個 EPUB
105
- #
106
- # @example 大般若經 跨三冊 合成一個 EPUB
107
- # require 'cbeta'
108
- #
109
- # TEMP = '/temp/epub-work'
110
- #
111
- # xml_files = [
112
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/05/T05n0220a.xml',
113
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/06/T06n0220b.xml',
114
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220c.xml',
115
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220d.xml',
116
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220e.xml',
117
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220f.xml',
118
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220g.xml',
119
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220h.xml',
120
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220i.xml',
121
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220j.xml',
122
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220k.xml',
123
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220l.xml',
124
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220m.xml',
125
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220n.xml',
126
- # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220o.xml',
127
- # ]
128
- #
129
- # c = CBETA::P5aToEPUB.new(TEMP)
130
- # c.convert_sutra('T0220', xml_files, '/temp/cbeta-epub/T0220.epub')
131
- def convert_sutra(book_id, xml_files, out)
132
- @book_id = book_id
133
- sutra_init
134
- xml_files.each { |f| handle_file(f) }
135
-
136
- if xml_files.size > 1
137
- @title.sub!(/^(.*)\(.*?\)$/, '\1')
138
- @title.sub!(/^(.*?)((.*?))+$/, '\1')
139
- end
140
- create_epub(out)
141
- end
142
-
143
- private
144
-
145
- def clear_temp_folder
146
- FileUtils.remove_dir(@temp_folder, force=true)
147
- FileUtils::mkdir_p @temp_folder
148
- end
149
-
150
- def copy_static_files(src, dest)
151
- dest = File.join(@temp_folder, dest)
152
- FileUtils.copy(src, dest)
153
- end
154
-
155
- def create_epub(output_path)
156
- if @settings[:front_page]
157
- copy_static_files(@settings[:front_page], 'front.xhtml')
158
- end
159
-
160
- if @settings[:back_page]
161
- copy_static_files(@settings[:back_page], 'back.xhtml')
162
- end
163
-
164
- src = File.join(DATA, 'epub.css')
165
- copy_static_files(src, 'cbeta.css')
166
-
167
- create_html_by_juan
168
- create_nav_html
169
-
170
- title = @title
171
- book_id = @book_id
172
- creator = @author
173
- builder = GEPUB::Builder.new {
174
- language 'zh-TW'
175
- unique_identifier "http://www.cbeta.org/#{book_id}", 'BookID', 'URL'
176
- title title
177
-
178
- creator creator
179
-
180
- contributors 'DILA'
181
-
182
- date Date.today.to_s
183
- }
184
-
185
- juan_dir = File.join(@temp_folder, 'juans')
186
- settings = @settings
187
- # in resources block, you can define resources by its relative path and datasource.
188
- # item creator methods are: files, file.
189
- builder.resources(:workdir => @temp_folder) {
190
- glob 'img/*'
191
- file 'cbeta.css'
192
-
193
- # this is navigation document.
194
- nav 'nav.xhtml'
195
-
196
- # ordered item. will be added to spine.
197
- ordered {
198
- file 'front.xhtml' if settings[:front_page]
199
-
200
- Dir.entries(juan_dir).sort.each do |f|
201
- next if f.start_with? '.'
202
- file "juans/#{f}"
203
- end
204
-
205
- file 'back.xhtml' if settings[:back_page]
206
- }
207
- }
208
- builder.book.version = @settings[:epub_version]
209
-
210
- canon = book_id.sub(/^([A-Z]{1,2}).*$/, '\1')
211
- cover = File.join(settings[:graphic_base], 'covers', canon, "#{book_id}.jpg")
212
- if File.exist? cover
213
- File.open(cover) do |io|
214
- builder.book.add_item(cover, io).cover_image
215
- end
216
- end
217
-
218
- builder.generate_epub(output_path)
219
- puts "output: #{output_path}\n\n"
220
- end
221
-
222
- def create_html_by_juan
223
- juans = @main_text.split(/(<juan \d+>)/)
224
- open = false
225
- fo = nil
226
- juan_no = nil
227
- fn = ''
228
- buf = ''
229
- # 一卷一檔
230
- juans.each do |j|
231
- if j =~ /<juan (\d+)>$/
232
- juan_no = $1.to_i
233
- fn = "%03d.xhtml" % juan_no
234
- output_path = File.join(@temp_folder, 'juans', fn)
235
- fo = File.open(output_path, 'w')
236
- open = true
237
- s = <<eos
238
- <html xmlns="http://www.w3.org/1999/xhtml">
239
- <head>
240
- <meta charset="utf-8" />
241
- <title>#{@title}</title>
242
- <link rel="stylesheet" type="text/css" href="../cbeta.css" />
243
- </head>
244
- <body>
245
- <div id='body'>
246
- eos
247
- fo.write(s)
248
- fo.write(buf)
249
- buf = ''
250
- elsif open
251
- fo.write(j + "\n</div><!-- end of div[@id='body'] -->\n")
252
- fo.write('</body></html>')
253
- fo.close
254
- else
255
- buf = j
256
- end
257
- end
258
- end
259
-
260
- def create_nav_html
261
- if @settings[:back_page_title]
262
- s = @settings[:back_page_title]
263
- @nav_root_ol.add_child("<li><a href='back.xhtml'>#{s}</a></li>")
264
- end
265
-
266
- #s = @nav_root_ol.to_xml(indent: 2, encoding: 'UTF-8', pertty: true, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
267
- s = @nav_root_ol.to_xml
268
-
269
- #s += "" % @toc_juan
270
-
271
- fn = File.join(@temp_folder, 'nav.xhtml')
272
- s = NAV_TEMPLATE % s
273
- File.write(fn, s)
274
- end
275
-
276
- def handle_anchor(e)
277
- if e.has_attribute?('type')
278
- if e['type'] == 'circle'
279
- return '◎'
280
- end
281
- end
282
-
283
- ''
284
- end
285
-
286
- def handle_app(e)
287
- traverse(e)
288
- end
289
-
290
- def handle_byline(e)
291
- r = '<p class="byline">'
292
- r += traverse(e)
293
- r + '</p>'
294
- end
295
-
296
- def handle_cell(e)
297
- doc = Nokogiri::XML::Document.new
298
- cell = doc.create_element('td')
299
- cell['rowspan'] = e['rows'] if e.key? 'rows'
300
- cell['colspan'] = e['cols'] if e.key? 'cols'
301
- cell.inner_html = traverse(e)
302
- to_html(cell) + "\n"
303
- end
304
-
305
- def handle_corr(e)
306
- "<span class='corr'>" + traverse(e) + "</span>"
307
- end
308
-
309
- def handle_div(e)
310
- if e.has_attribute? 'type'
311
- @open_divs << e
312
- r = traverse(e)
313
- @open_divs.pop
314
- return "<div class='div-#{e['type']}'>#{r}</div>"
315
- else
316
- return traverse(e)
317
- end
318
- end
319
-
320
- def handle_figure(e)
321
- "<div class='figure'>%s</div>" % traverse(e)
322
- end
323
-
324
- def handle_g(e, mode)
325
- # if 有 <mapping type="unicode">
326
- # if 不在 Unicode Extension C, D, E 範圍裡
327
- # 直接採用
328
- # else
329
- # 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
330
- # else if 有 <mapping type="normal_unicode">
331
- # 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
332
- # else if 有 normalized form
333
- # 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
334
- # else
335
- # 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
336
- gid = e['ref'][1..-1]
337
- g = @gaijis[gid]
338
- abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
339
- zzs = g['zzs']
340
-
341
-
342
- if gid.start_with?('SD')
343
- case gid
344
- when 'SD-E35A'
345
- return '('
346
- when 'SD-E35B'
347
- return ')'
348
- else
349
- return g['roman'] if g.key? 'roman'
350
-
351
- if mode == 'txt'
352
- puts "警告:純文字模式出現悉曇字:#{gid}"
353
- return gid
354
- else
355
- # 如果沒有羅馬轉寫就顯示圖檔
356
- src = File.join(@settings[:graphic_base], 'sd-gif', gid[3..4], gid+'.gif')
357
- basename = File.basename(src)
358
- dest = File.join(@temp_folder, 'img', basename)
359
- FileUtils.copy(src, dest)
360
- return "<img src='../img/#{basename}' />"
361
- end
362
- end
363
- end
364
-
365
- if gid.start_with?('RJ')
366
- return g['roman'] if g.key? 'roman'
367
-
368
- if mode == 'txt'
369
- puts "警告:純文字模式出現蘭札體:#{gid}"
370
- return gid
371
- else
372
- # 如果沒有羅馬轉寫就顯示圖檔
373
- src = File.join(@settings[:graphic_base], 'rj-gif', gid[3..4], gid+'.gif')
374
- basename = File.basename(src)
375
- dest = File.join(@temp_folder, 'img', basename)
376
- FileUtils.copy(src, dest)
377
- return "<img src='../img/#{basename}' />"
378
- end
379
- end
380
-
381
- if mode == 'txt'
382
- abort "缺組字式:#{g}" if zzs.nil?
383
- return zzs
384
- end
385
-
386
- default = ''
387
- if g.has_key?('unicode')
388
- if @unicode1.include?(g['unicode'])
389
- return g['unicode-char'] # unicode 1.1 直接用
390
- end
391
- end
392
-
393
- zzs
394
- end
395
-
396
- def handle_graphic(e)
397
- url = e['url']
398
- url.sub!(/^.*(figures\/.*)$/, '\1')
399
-
400
- src = File.join(@settings[:graphic_base], url)
401
- basename = File.basename(src)
402
- dest = File.join(@temp_folder, 'img', basename)
403
- FileUtils.copy(src, dest)
404
-
405
- "<img src='../img/#{basename}' />"
406
- end
407
-
408
- def handle_head(e)
409
- r = ''
410
- unless e['type'] == 'added'
411
- i = @open_divs.size
412
- r = "<p class='h#{i}'>%s</p>" % traverse(e)
413
- end
414
- r
415
- end
416
-
417
- def handle_item(e)
418
- "<li>%s</li>\n" % traverse(e)
419
- end
420
-
421
- def handle_juan(e)
422
- "<p class='juan'>%s</p>" % traverse(e)
423
- end
424
-
425
- def handle_l(e)
426
- if @lg_type == 'abnormal'
427
- return traverse(e)
428
- end
429
-
430
- @in_l = true
431
-
432
- doc = Nokogiri::XML::Document.new
433
- cell = doc.create_element('div')
434
- cell['class'] = 'lg-cell'
435
- cell.inner_html = traverse(e)
436
-
437
- if @first_l
438
- parent = e.parent()
439
- if parent.has_attribute?('rend')
440
- indent = parent['rend'].scan(/text-indent:[^:]*/)
441
- unless indent.empty?
442
- cell['style'] = indent[0]
443
- end
444
- end
445
- @first_l = false
446
- end
447
- r = to_html(cell)
448
-
449
- unless @lg_row_open
450
- r = "\n<div class='lg-row'>" + r
451
- @lg_row_open = true
452
- end
453
- @in_l = false
454
- r
455
- end
456
-
457
- def handle_lb(e)
458
- # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
459
- return '' if e['ed'] != @series
460
-
461
- @lb = e['n']
462
- r = ''
463
- #if e.parent.name == 'lg' and $lg_row_open
464
- if @lg_row_open && !@in_l
465
- # 每行偈頌放在一個 lg-row 裡面
466
- # T46n1937, p. 914a01, l 包雙行夾註跨行
467
- # T20n1092, 337c16, lb 在 l 中間,不結束 lg-row
468
- r += "</div><!-- end of lg-row -->"
469
- @lg_row_open = false
470
- end
471
- unless @next_line_buf.empty?
472
- r += @next_line_buf
473
- @next_line_buf = ''
474
- end
475
- r
476
- end
477
-
478
- def handle_lem(e)
479
- r = ''
480
- w = e['wit']
481
- if w.include? 'CBETA' and not w.include? @orig
482
- r = "<span class='corr'>%s</span>" % traverse(e)
483
- else
484
- r = traverse(e)
485
- end
486
- r
487
- end
488
-
489
- def handle_lg(e)
490
- r = ''
491
- @lg_type = e['type']
492
- if @lg_type == 'abnormal'
493
- r = "<p class='lg-abnormal'>" + traverse(e) + "</p>"
494
- else
495
- @first_l = true
496
- doc = Nokogiri::XML::Document.new
497
- node = doc.create_element('div')
498
- node['class'] = 'lg'
499
- if e.has_attribute?('rend')
500
- rend = e['rend'].gsub(/text-indent:[^:]*/, '')
501
- node['style'] = rend
502
- end
503
- @lg_row_open = false
504
- node.inner_html = traverse(e)
505
- if @lg_row_open
506
- node.inner_html += '</div><!-- end of lg -->'
507
- @lg_row_open = false
508
- end
509
- r = "\n" + to_html(node)
510
- end
511
- r
512
- end
513
-
514
- def handle_list(e)
515
- doc = Nokogiri::XML::Document.new
516
- node = doc.create_element('ul')
517
- node.inner_html = traverse(e)
518
- if e.key? 'rendition'
519
- node['class'] = e['rendition']
520
- end
521
- to_html(node)
522
- end
523
-
524
- def handle_milestone(e)
525
- r = ''
526
- if e['unit'] == 'juan'
527
- r += "</div>" * @open_divs.size # 如果有 div 跨卷,要先結束, ex: T55n2154, p. 680a29, 跨 19, 20 兩卷
528
- @juan += 1
529
- r += "<juan #{@juan}>"
530
- @open_divs.each { |d|
531
- r += "<div class='#{d['type']}'>"
532
- }
533
- end
534
- r
535
- end
536
-
537
- def handle_mulu(e)
538
- @mulu_count += 1
539
- fn = "juans/%03d.xhtml" % @juan
540
- if e['type'] == '卷'
541
- if @settings[:juan_toc]
542
- label = e['n']
543
- @juan_nav.add_child("<li><a href='#{fn}#mulu#{@mulu_count}'>#{label}</a></li>")
544
- end
545
- else
546
- level = e['level'].to_i
547
- while @current_nav.size > (level+1)
548
- @current_nav.pop
549
- end
550
-
551
- label = traverse(e, 'txt')
552
- li = @current_nav.last.add_child("<li><a href='#{fn}#mulu#{@mulu_count}'>#{label}</a></li>").first
553
- ol = li.add_child('<ol></ol>').first
554
- @current_nav << ol
555
- end
556
- "<a id='mulu#{@mulu_count}' />"
557
- end
558
-
559
- def handle_node(e, mode)
560
- return '' if e.comment?
561
- return handle_text(e, mode) if e.text?
562
- return '' if PASS.include?(e.name)
563
-
564
- r = case e.name
565
- when 'anchor' then handle_anchor(e)
566
- when 'app' then handle_app(e)
567
- when 'byline' then handle_byline(e)
568
- when 'cell' then handle_cell(e)
569
- when 'corr' then handle_corr(e)
570
- when 'div' then handle_div(e)
571
- when 'figure' then handle_figure(e)
572
- when 'foreign' then ''
573
- when 'g' then handle_g(e, mode)
574
- when 'graphic' then handle_graphic(e)
575
- when 'head' then handle_head(e)
576
- when 'item' then handle_item(e)
577
- when 'juan' then handle_juan(e)
578
- when 'l' then handle_l(e)
579
- when 'lb' then handle_lb(e)
580
- when 'lem' then handle_lem(e)
581
- when 'lg' then handle_lg(e)
582
- when 'list' then handle_list(e)
583
- when 'mulu' then handle_mulu(e)
584
- when 'note' then handle_note(e)
585
- when 'milestone' then handle_milestone(e)
586
- when 'p' then handle_p(e)
587
- when 'rdg' then ''
588
- when 'reg' then ''
589
- when 'row' then handle_row(e)
590
- when 'sic' then ''
591
- when 'sg' then handle_sg(e)
592
- when 't' then handle_t(e)
593
- when 'tt' then handle_tt(e)
594
- when 'table' then handle_table(e)
595
- else traverse(e)
596
- end
597
- r
598
- end
599
-
600
- def handle_note(e)
601
- n = e['n']
602
- if e.has_attribute?('type')
603
- t = e['type']
604
- case t
605
- when 'equivalent'
606
- return ''
607
- when 'orig'
608
- return ''
609
- when 'orig_biao'
610
- return ''
611
- when 'orig_ke'
612
- return ''
613
- when 'mod'
614
- return ""
615
- when 'rest'
616
- return ''
617
- else
618
- return '' if t.start_with?('cf')
619
- end
620
- end
621
-
622
- if e.has_attribute?('resp')
623
- return '' if e['resp'].start_with? 'CBETA'
624
- end
625
-
626
- if e.has_attribute?('place') && e['place']=='inline'
627
- r = traverse(e)
628
- return "(#{r})"
629
- else
630
- return traverse(e)
631
- end
632
- end
633
-
634
- def handle_p(e)
635
- r = "<div class='p'>\n"
636
- r += traverse(e)
637
- r + "</div>\n"
638
- end
639
-
640
- def handle_row(e)
641
- "<tr>" + traverse(e) + "</tr>\n"
642
- end
643
-
644
- def handle_sg(e)
645
- '(' + traverse(e) + ')'
646
- end
647
-
648
- def handle_file(xml_fn)
649
- puts "read #{xml_fn}"
650
- @in_l = false
651
- @lg_row_open = false
652
- @mod_notes = Set.new
653
- @next_line_buf = ''
654
- @open_divs = []
655
-
656
- if @book_id.start_with? 'DA'
657
- @orig = nil?
658
- else
659
- @orig = @cbeta.get_canon_abbr(@book_id[0])
660
- abort "未處理底本: #{@book_id[0]}" if @orig.nil?
661
- end
662
-
663
- text = parse_xml(xml_fn)
664
-
665
- # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
666
- text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
667
-
668
- @main_text += text
669
- end
670
-
671
- def handle_t(e)
672
- if e.has_attribute? 'place'
673
- return '' if e['place'].include? 'foot'
674
- end
675
- r = traverse(e)
676
-
677
- # <tt type="app"> 不是 悉漢雙行對照
678
- return r if @tt_type == 'app'
679
-
680
- # 處理雙行對照
681
- i = e.xpath('../t').index(e)
682
- case i
683
- when 0
684
- return r + ' '
685
- when 1
686
- @next_line_buf += r + ' '
687
- return ''
688
- else
689
- return r
690
- end
691
- end
692
-
693
- def handle_tt(e)
694
- @tt_type = e['type']
695
- traverse(e)
696
- end
697
-
698
- def handle_table(e)
699
- "<table>" + traverse(e) + "</table>"
700
- end
701
-
702
- def handle_text(e, mode)
703
- s = e.content().chomp
704
- return '' if s.empty?
705
- return '' if e.parent.name == 'app'
706
-
707
- # cbeta xml 文字之間會有多餘的換行
708
- r = s.gsub(/[\n\r]/, '')
709
-
710
- # 把 & 轉為 &amp;
711
- CGI.escapeHTML(r)
712
- end
713
-
714
- def lem_note_cf(e)
715
- # ex: T32n1670A.xml, p. 703a16
716
- # <note type="cf1">K30n1002_p0257a01-a23</note>
717
- refs = []
718
- e.xpath('./note').each { |n|
719
- if n.key?('type') and n['type'].start_with? 'cf'
720
- refs << n.content
721
- end
722
- }
723
- if refs.empty?
724
- ''
725
- else
726
- '修訂依據:' + refs.join(';') + '。'
727
- end
728
- end
729
-
730
- def lem_note_rdg(lem)
731
- r = ''
732
- app = lem.parent
733
- @pass << false
734
- app.xpath('rdg').each { |rdg|
735
- if rdg['wit'].include? @orig
736
- s = traverse(rdg, 'back')
737
- s = MISSING if s.empty?
738
- r += @orig + s
739
- end
740
- }
741
- @pass.pop
742
- r += '。' unless r.empty?
743
- r
744
- end
745
-
746
- def sutra_init
747
- clear_temp_folder
748
-
749
- s = NAV_TEMPLATE % '<ol></ol>'
750
- @nav_doc = Nokogiri::XML(s)
751
-
752
- @nav_doc.remove_namespaces!()
753
- @nav_root_ol = @nav_doc.at_xpath('//ol')
754
- @current_nav = [@nav_root_ol]
755
-
756
- if @settings[:front_page_title]
757
- @nav_root_ol.add_child("<li><a href='front.xhtml'>編輯說明</a></li>")
758
- end
759
-
760
- li = @nav_root_ol.add_child("<li><a href='#'>章節目次</a></li>").first
761
- ol = li.add_child('<ol></ol>').first
762
- @current_nav << ol
763
-
764
- if @settings[:juan_toc]
765
- li = @nav_root_ol.add_child("<li><a href='#'>卷目次</a></li>").first
766
- @juan_nav = li.add_child('<ol></ol>').first
767
- end
768
-
769
- @mulu_count = 0
770
- @main_text = ''
771
- @dila_note = 0
772
- @toc_juan = '' # 卷目次
773
- @juan = 0
774
-
775
- FileUtils::mkdir_p File.join(@temp_folder, 'img')
776
- FileUtils::mkdir_p File.join(@temp_folder, 'juans')
777
- end
778
-
779
- def open_xml(fn)
780
- s = File.read(fn)
781
-
782
- if fn.include? 'T16n0657'
783
- # 這個地方 雙行夾註 跨兩行偈頌
784
- # 把 lb 移到 note 結束之前
785
- # 讓 lg-row 先結束,再結束雙行夾註
786
- s.sub!(/(<\/note>)(\n<lb n="0206b29" ed="T"\/>)/, '\2\1')
787
- end
788
-
789
- # <milestone unit="juan"> 前面的 lb 屬於新的這一卷
790
- s.gsub!(%r{((?:<pb [^>]+>\n?)?(?:<lb [^>]+>\n?)+)(<milestone [^>]*unit="juan"[^/>]*/>)}, '\2\1')
791
-
792
- doc = Nokogiri::XML(s)
793
- doc.remove_namespaces!()
794
- doc
795
- end
796
-
797
- def read_mod_notes(doc)
798
- doc.xpath("//note[@type='mod']").each { |e|
799
- @mod_notes << e['n']
800
- }
801
- end
802
-
803
- def parse_xml(xml_fn)
804
- @pass = [false]
805
-
806
- doc = open_xml(xml_fn)
807
-
808
- e = doc.xpath("//titleStmt/title")[0]
809
- @title = traverse(e, 'txt')
810
- @title = @title.split()[-1]
811
-
812
- @author = doc.at_xpath("//titleStmt/author").text
813
-
814
- read_mod_notes(doc)
815
-
816
- root = doc.root()
817
- body = root.xpath("text/body")[0]
818
- @pass = [true]
819
-
820
- text = traverse(body)
821
- text
822
- end
823
-
824
- def prepare_todo_list(input_folder, output_folder)
825
- Dir.foreach(input_folder) do |f|
826
- next if f.start_with? '.'
827
- p1 = File.join(input_folder, f)
828
- if File.file?(p1)
829
- work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
830
- work = 'T0220' if work.start_with? 'T0220'
831
- unless @todo.key? work
832
- @todo[work] = { xml_files: [] }
833
- end
834
- hash = @todo[work]
835
- hash[:xml_files] << p1
836
-
837
- folders = output_folder.split('/')
838
- folders.pop if folders[-1].match(/^[A-Z]{1,2}\d{2,3}$/)
839
- folder = folders.join('/')
840
- FileUtils::mkdir_p folder
841
- hash[:epub] = File.join(folder, "#{work}.epub")
842
- else
843
- p2 = File.join(output_folder, f)
844
- prepare_todo_list(p1, p2)
845
- end
846
- end
847
- end
848
-
849
- def remove_empty_nav(node_list)
850
- node_list.each do |n|
851
- if n[:nav].empty?
852
- n.delete(:nav)
853
- else
854
- remove_empty_nav(n[:nav])
855
- end
856
- end
857
- end
858
-
859
- def to_html(e)
860
- e.to_xml(encoding: 'UTF-8', pertty: true, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
861
- end
862
-
863
- def traverse(e, mode='html')
864
- r = ''
865
- e.children.each { |c|
866
- s = handle_node(c, mode)
867
- r += s
868
- }
869
- r
870
- end
871
-
872
- end