cbeta 0.4.3 → 0.5.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1c29d0e70255be20d828cecb8f3feff8f0d4b448
4
- data.tar.gz: f90911d3955acb1f491e1e7c61fd1797617aa8f2
3
+ metadata.gz: 416026a7921c0c2bb6c602be134095f4079951fe
4
+ data.tar.gz: d72055681fb1c8ff22a4317ed0310fc368381e1a
5
5
  SHA512:
6
- metadata.gz: 2a545c0cb3fdc26d8b79a79172112b967f225aadaf03afe41b4117acfc44a67f7626cb1f07cb3262d0b6b4041978af9f87f15f9b6d4b574472f5da6c81153d21
7
- data.tar.gz: fcad67ccefa4b4250a6d236671373cc4586f48d4da49b60bfddb659ae1fc0246e613f31e580ad6958b0cd0a1a358b68f9b37f34764a95efbe43f94e8711e8b7e
6
+ metadata.gz: 0d71cdacc97c1d6111cc2dc1dc9b5190d2398f05b9a45280744b7475579dd8f612efa382c0cfd16f07429391f99e330a8fe1c3db1c48d770076f90abe20d15c4
7
+ data.tar.gz: bd62c4cdfe0def5b62d7440f0ec029c2535e1384d0ac9865e2ba91a643db46b6a1fed02ed8a4c4435d9e26b878d8acfc7e3c91994bac50899b7e8ec4413b8717
@@ -35,7 +35,7 @@ class CBETA
35
35
 
36
36
  # 載入藏經資料
37
37
  def initialize()
38
- fn = File.join(File.dirname(__FILE__), 'canons.csv')
38
+ fn = File.join(File.dirname(__FILE__), 'data/canons.csv')
39
39
  text = File.read(fn)
40
40
  @canon_abbr = {}
41
41
  CSV.parse(text, :headers => true) do |row|
@@ -61,6 +61,7 @@ end
61
61
 
62
62
  require 'cbeta/gaiji'
63
63
  require 'cbeta/bm_to_text'
64
+ require 'cbeta/p5a_to_epub'
64
65
  require 'cbeta/p5a_to_html'
65
66
  require 'cbeta/p5a_to_simple_html'
66
67
  require 'cbeta/p5a_to_text'
@@ -4,7 +4,7 @@ require 'json'
4
4
  class CBETA::Gaiji
5
5
  # 載入 CBETA 缺字資料庫
6
6
  def initialize()
7
- fn = File.join(File.dirname(__FILE__), 'gaiji.json')
7
+ fn = File.join(File.dirname(__FILE__), '../data/gaiji.json')
8
8
  @gaijis = JSON.parse(File.read(fn))
9
9
  end
10
10
 
@@ -0,0 +1,780 @@
1
+ require 'cgi'
2
+ require 'date'
3
+ require 'fileutils'
4
+ require 'json'
5
+ require 'nokogiri'
6
+ require 'set'
7
+ require 'gepub'
8
+ require 'pp'
9
+
10
+ # Convert CBETA XML P5a to EPUB
11
+ #
12
+ # CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
13
+ class CBETA::P5aToEPUB
14
+ # 內容不輸出的元素
15
+ PASS=['back', 'teiHeader']
16
+
17
+ # 某版用字缺的符號
18
+ MISSING = '-'
19
+
20
+ NAV_TEMPLATE = File.read(File.join(File.dirname(__FILE__), '../data/epub-nav.xhtml'))
21
+ MAIN = 'main.xhtml'
22
+
23
+ # @param temp_folder [String] 供 EPUB 暫存工作檔案的路徑
24
+ # @param graphic_base [String] 存放圖片的路徑
25
+ def initialize(temp_folder, graphic_base)
26
+ @temp_folder = temp_folder
27
+ @graphic_base = graphic_base
28
+ @cbeta = CBETA.new
29
+ @gaijis = CBETA::Gaiji.new
30
+ end
31
+
32
+ # 將某個 xml 轉為一個 EPUB
33
+ def convert_file(input_path, output_path)
34
+ return false unless input_path.end_with? '.xml'
35
+
36
+ FileUtils.remove_dir(@temp_folder, force=true)
37
+ FileUtils::mkdir_p @temp_folder
38
+
39
+ @book_id = File.basename(input_path, ".xml")
40
+
41
+ sutra_init
42
+
43
+ handle_file(input_path)
44
+ create_epub(output_path)
45
+ end
46
+
47
+ # 將某個資料夾下的每個 xml 檔都轉為一個對應的 EPUB。
48
+ # 資料夾可以是巢狀,全部都會遞迴處理。
49
+ #
50
+ # @example
51
+ # require 'cbeta'
52
+ #
53
+ # TEMP = '/temp/epub-work'
54
+ # IMG = '/Users/ray/Documents/Projects/D道安/figures'
55
+ #
56
+ # c = CBETA::P5aToEPUB.new(TEMP, IMG)
57
+ # c.convert_folder('/Users/ray/Documents/Projects/D道安/xml-p5a/DA', '/temp/cbeta-epub/DA')
58
+ def convert_folder(input_folder, output_folder)
59
+ FileUtils.remove_dir(output_folder, force=true)
60
+ FileUtils::mkdir_p output_folder
61
+ Dir.foreach(input_folder) do |f|
62
+ next if f.start_with? '.'
63
+ p1 = File.join(input_folder, f)
64
+ if File.file?(p1)
65
+ f.sub!(/.xml$/, '.epub')
66
+ p2 = File.join(output_folder, f)
67
+ convert_file(p1, p2)
68
+ else
69
+ p2 = File.join(output_folder, f)
70
+ convert_folder(p1, p2)
71
+ end
72
+ end
73
+ end
74
+
75
+ # 將多個 xml 檔案合成一個 EPUB
76
+ #
77
+ # @example 大般若經 跨三冊 合成一個 EPUB
78
+ # require 'cbeta'
79
+ #
80
+ # TEMP = '/temp/epub-work'
81
+ #
82
+ # xml_files = [
83
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/05/T05n0220a.xml',
84
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/06/T06n0220b.xml',
85
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220c.xml',
86
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220d.xml',
87
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220e.xml',
88
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220f.xml',
89
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220g.xml',
90
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220h.xml',
91
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220i.xml',
92
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220j.xml',
93
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220k.xml',
94
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220l.xml',
95
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220m.xml',
96
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220n.xml',
97
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220o.xml',
98
+ # ]
99
+ #
100
+ # c = CBETA::P5aToEPUB.new(TEMP)
101
+ # c.convert_sutra('T0220', '大般若經', xml_files, '/temp/cbeta-epub/T0220.epub')
102
+ def convert_sutra(book_id, title, xml_files, out)
103
+ @book_id = book_id
104
+ sutra_init
105
+ xml_files.each { |f| handle_file(f) }
106
+
107
+ @title = title
108
+ create_epub(out)
109
+ end
110
+
111
+ private
112
+
113
+ def copy_static_files(src, dest)
114
+ p1 = File.join(File.dirname(__FILE__), '../data', src)
115
+ p2 = File.join(@temp_folder, dest)
116
+ FileUtils.copy(p1, p2)
117
+ end
118
+
119
+ def create_epub(output_path)
120
+ copy_static_files('epub-readme.xhtml', 'readme.xhtml')
121
+ copy_static_files('epub-donate.xhtml', 'donate.xhtml')
122
+ create_main_html
123
+ create_nav_html
124
+
125
+ title = @title
126
+ book_id = @book_id
127
+ builder = GEPUB::Builder.new {
128
+ language 'zh-TW'
129
+ unique_identifier "http://www.cbeta.org/#{book_id}", 'BookID', 'URL'
130
+ title title
131
+
132
+ creator 'CBETA'
133
+
134
+ contributors 'DILA'
135
+
136
+ date Date.today.to_s
137
+ }
138
+
139
+ # in resources block, you can define resources by its relative path and datasource.
140
+ # item creator methods are: files, file.
141
+ builder.resources(:workdir => @temp_folder) {
142
+ glob 'img/*'
143
+
144
+ # this is navigation document.
145
+ nav 'nav.xhtml'
146
+
147
+ # ordered item. will be added to spine.
148
+ ordered {
149
+ file 'readme.xhtml'
150
+ file 'main.xhtml'
151
+ file 'donate.xhtml'
152
+ }
153
+ }
154
+ builder.generate_epub(output_path)
155
+ puts "output: #{output_path}"
156
+ end
157
+
158
+ def create_main_html
159
+ fn = File.join(@temp_folder, MAIN)
160
+ s = <<eos
161
+ <html xmlns="http://www.w3.org/1999/xhtml">
162
+ <head>
163
+ <meta charset="utf-8" />
164
+ <title>#{@title}</title>
165
+ </head>
166
+ <body>
167
+ <div id='body'>
168
+ eos
169
+ s += @main_text + "\n</div><!-- end of div[@id='body'] -->\n"
170
+ s += "<div id='back'>\n" + @back + "</div></body></html>\n"
171
+ File.write(fn, s)
172
+ end
173
+
174
+ def create_nav_html
175
+ @nav_root_ol.add_child("<li><a href='donate.xhtml'>贊助資訊</a></li>")
176
+
177
+ fn = File.join(@temp_folder, 'nav.xhtml')
178
+ s = NAV_TEMPLATE % to_html(@nav_root_ol)
179
+ File.write(fn, s)
180
+ end
181
+
182
+ def handle_anchor(e)
183
+ id = e['id']
184
+ if e.has_attribute?('id')
185
+ if id.start_with?('nkr_note_orig')
186
+ note = @notes[id]
187
+ note_text = traverse(note)
188
+ n = id[/^nkr_note_orig_(.*)$/, 1]
189
+ @back += "<span class='footnote' id='n#{n}'>#{note_text}</span>\n"
190
+ return "<a class='noteAnchor' href='#n#{n}'></a>"
191
+ elsif id.start_with? 'fx'
192
+ return "<span class='star'>[*]</span>"
193
+ end
194
+ end
195
+
196
+ if e.has_attribute?('type')
197
+ if e['type'] == 'circle'
198
+ return '◎'
199
+ end
200
+ end
201
+
202
+ ''
203
+ end
204
+
205
+ def handle_app(e)
206
+ r = ''
207
+ if e['type'] == 'star'
208
+ c = e['corresp'][1..-1]
209
+ r = "<a class='noteAnchor star' href='#n#{c}'></a>"
210
+ end
211
+ r + traverse(e)
212
+ end
213
+
214
+ def handle_byline(e)
215
+ r = '<p class="byline">'
216
+ r += "<span class='lineInfo'>#{@lb}</span>"
217
+ r += traverse(e)
218
+ r + '</p>'
219
+ end
220
+
221
+ def handle_cell(e)
222
+ doc = Nokogiri::XML::Document.new
223
+ cell = doc.create_element('td')
224
+ cell['rowspan'] = e['rows'] if e.key? 'rows'
225
+ cell['colspan'] = e['cols'] if e.key? 'cols'
226
+ cell.inner_html = traverse(e)
227
+ to_html(cell) + "\n"
228
+ end
229
+
230
+ def handle_corr(e)
231
+ r = ''
232
+ if e.parent.name == 'choice'
233
+ sic = e.parent.at_xpath('sic')
234
+ unless sic.nil?
235
+ @dila_note += 1
236
+ r = "<a class='noteAnchor dila' href='#dila_note#{@dila_note}'></a>"
237
+
238
+ note = @orig
239
+ sic_text = traverse(sic, 'back')
240
+ if sic_text.empty?
241
+ note += MISSING
242
+ else
243
+ note += sic_text
244
+ end
245
+ @back += "<span class='footnote_dila' id='dila_note#{@dila_note}'>#{note}</span>\n"
246
+ end
247
+ end
248
+ r + "<span class='cbeta'>%s</span>" % traverse(e)
249
+ end
250
+
251
+ def handle_div(e)
252
+ if e.has_attribute? 'type'
253
+ @open_divs << e
254
+ r = traverse(e)
255
+ @open_divs.pop
256
+ return "<div class='div-#{e['type']}'>#{r}</div>"
257
+ else
258
+ return traverse(e)
259
+ end
260
+ end
261
+
262
+ def handle_figure(e)
263
+ "<div class='figure'>%s</div>" % traverse(e)
264
+ end
265
+
266
+ def handle_g(e, mode)
267
+ # if 有 <mapping type="unicode">
268
+ # if 不在 Unicode Extension C, D, E 範圍裡
269
+ # 直接採用
270
+ # else
271
+ # 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
272
+ # else if 有 <mapping type="normal_unicode">
273
+ # 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
274
+ # else if 有 normalized form
275
+ # 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
276
+ # else
277
+ # 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
278
+ gid = e['ref'][1..-1]
279
+ g = @gaijis[gid]
280
+ abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
281
+ zzs = g['zzs']
282
+
283
+ if mode == 'txt'
284
+ return g['roman'] if gid.start_with?('SD')
285
+ if zzs.nil?
286
+ abort "缺組字式:#{g}"
287
+ else
288
+ return zzs
289
+ end
290
+ end
291
+
292
+ if gid.start_with?('SD')
293
+ case gid
294
+ when 'SD-E35A'
295
+ return '('
296
+ when 'SD-E35B'
297
+ return ')'
298
+ else
299
+ return "<span class='siddam' roman='#{g['roman']}' code='#{gid}' char='#{g['sd-char']}'/>"
300
+ end
301
+ end
302
+
303
+ if gid.start_with?('RJ')
304
+ return "<span class='ranja' roman='#{g['roman']}' code='#{gid}' char='#{g['rj-char']}'/>"
305
+ end
306
+
307
+ default = ''
308
+ if g.has_key?('unicode')
309
+ #if @unicode1.include?(g['unicode'])
310
+ # 如果在 unicode ext-C, ext-D, ext-E 範圍內
311
+ if (0x2A700..0x2CEAF).include? g['unicode'].hex
312
+ default = g['unicode-char']
313
+ else
314
+ return g['unicode-char'] # 直接採用 unicode
315
+ end
316
+ end
317
+
318
+ nor = ''
319
+ if g.has_key?('normal_unicode')
320
+ nor = g['normal_unicode']
321
+ default = nor if default.empty?
322
+ end
323
+
324
+ if g.has_key?('normal')
325
+ nor += ', ' unless nor==''
326
+ nor += g['normal']
327
+ default = g['normal'] if default.empty?
328
+ end
329
+
330
+ default = zzs if default.empty?
331
+
332
+ href = 'http://dict.cbeta.org/dict_word/gaiji-cb/%s/%s.gif' % [gid[2, 2], gid]
333
+ unless @back.include?(href)
334
+ @back += "<span id='#{gid}' class='gaijiInfo' figure_url='#{href}' zzs='#{zzs}' nor='#{nor}'>#{default}</span>\n"
335
+ end
336
+ "<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
337
+ end
338
+
339
+ def handle_graphic(e)
340
+ url = e['url']
341
+ url.sub!(/^.*figures\/(.*)$/, '\1')
342
+
343
+ src = File.join(@graphic_base, url)
344
+ basename = File.basename(src)
345
+ dest = File.join(@temp_folder, 'img', basename)
346
+ FileUtils.copy(src, dest)
347
+
348
+ "<img src='img/#{basename}' />"
349
+ end
350
+
351
+ def handle_head(e)
352
+ r = ''
353
+ unless e['type'] == 'added'
354
+ i = @open_divs.size
355
+ r = "<p class='head' data-head-level='#{i}'>%s</p>" % traverse(e)
356
+ end
357
+ r
358
+ end
359
+
360
+ def handle_item(e)
361
+ "<li>%s</li>\n" % traverse(e)
362
+ end
363
+
364
+ def handle_juan(e)
365
+ "<p class='juan'>%s</p>" % traverse(e)
366
+ end
367
+
368
+ def handle_l(e)
369
+ if @lg_type == 'abnormal'
370
+ return traverse(e)
371
+ end
372
+
373
+ @in_l = true
374
+
375
+ doc = Nokogiri::XML::Document.new
376
+ cell = doc.create_element('div')
377
+ cell['class'] = 'lg-cell'
378
+ cell.inner_html = traverse(e)
379
+
380
+ if @first_l
381
+ parent = e.parent()
382
+ if parent.has_attribute?('rend')
383
+ indent = parent['rend'].scan(/text-indent:[^:]*/)
384
+ unless indent.empty?
385
+ cell['style'] = indent[0]
386
+ end
387
+ end
388
+ @first_l = false
389
+ end
390
+ r = to_html(cell)
391
+
392
+ unless @lg_row_open
393
+ r = "\n<div class='lg-row'>" + r
394
+ @lg_row_open = true
395
+ end
396
+ @in_l = false
397
+ r
398
+ end
399
+
400
+ def handle_lb(e)
401
+ # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
402
+ return '' if e['ed'] != @series
403
+
404
+ @lb = e['n']
405
+ r = ''
406
+ #if e.parent.name == 'lg' and $lg_row_open
407
+ if @lg_row_open && !@in_l
408
+ # 每行偈頌放在一個 lg-row 裡面
409
+ # T46n1937, p. 914a01, l 包雙行夾註跨行
410
+ # T20n1092, 337c16, lb 在 l 中間,不結束 lg-row
411
+ r += "</div><!-- end of lg-row -->"
412
+ @lg_row_open = false
413
+ end
414
+ unless @next_line_buf.empty?
415
+ r += @next_line_buf
416
+ @next_line_buf = ''
417
+ end
418
+ r
419
+ end
420
+
421
+ def handle_lem(e)
422
+ r = ''
423
+ w = e['wit']
424
+ if w.include? 'CBETA' and not w.include? @orig
425
+ @dila_note += 1
426
+ r = "<a class='noteAnchor dila' href='#dila_note#{@dila_note}'></a>"
427
+ r += "<span class='cbeta'>%s</span>" % traverse(e)
428
+
429
+ note = lem_note_cf(e)
430
+ note += lem_note_rdg(e)
431
+ @back += "<span class='footnote_dila' id='dila_note#{@dila_note}'>#{note}</span>\n"
432
+ else
433
+ r = traverse(e)
434
+ end
435
+ r
436
+ end
437
+
438
+ def handle_lg(e)
439
+ r = ''
440
+ @lg_type = e['type']
441
+ if @lg_type == 'abnormal'
442
+ r = "<p class='lg-abnormal'>" + traverse(e) + "</p>"
443
+ else
444
+ @first_l = true
445
+ doc = Nokogiri::XML::Document.new
446
+ node = doc.create_element('div')
447
+ node['class'] = 'lg'
448
+ if e.has_attribute?('rend')
449
+ rend = e['rend'].gsub(/text-indent:[^:]*/, '')
450
+ node['style'] = rend
451
+ end
452
+ @lg_row_open = false
453
+ node.inner_html = traverse(e)
454
+ if @lg_row_open
455
+ node.inner_html += '</div><!-- end of lg -->'
456
+ @lg_row_open = false
457
+ end
458
+ r = "\n" + to_html(node)
459
+ end
460
+ r
461
+ end
462
+
463
+ def handle_list(e)
464
+ "<ul>%s</ul>" % traverse(e)
465
+ end
466
+
467
+ def handle_milestone(e)
468
+ ''
469
+ end
470
+
471
+ def handle_mulu(e)
472
+ return '' if e['type'] == '卷'
473
+
474
+ level = e['level'].to_i
475
+ while @current_nav.size > level
476
+ @current_nav.pop
477
+ end
478
+
479
+ label = traverse(e, 'txt')
480
+ @mulu_count += 1
481
+ li = @current_nav.last.add_child("<li><a href='#{@main_html}#mulu#{@mulu_count}'>#{label}</a></li>").first
482
+ ol = li.add_child('<ol></ol>').first
483
+ @current_nav << ol
484
+ "<a id='mulu#{@mulu_count}' />"
485
+ end
486
+
487
+ def handle_node(e, mode)
488
+ return '' if e.comment?
489
+ return handle_text(e, mode) if e.text?
490
+ return '' if PASS.include?(e.name)
491
+ r = case e.name
492
+ when 'anchor' then handle_anchor(e)
493
+ when 'app' then handle_app(e)
494
+ when 'byline' then handle_byline(e)
495
+ when 'cell' then handle_cell(e)
496
+ when 'corr' then handle_corr(e)
497
+ when 'div' then handle_div(e)
498
+ when 'figure' then handle_figure(e)
499
+ when 'foreign' then ''
500
+ when 'g' then handle_g(e, mode)
501
+ when 'graphic' then handle_graphic(e)
502
+ when 'head' then handle_head(e)
503
+ when 'item' then handle_item(e)
504
+ when 'juan' then handle_juan(e)
505
+ when 'l' then handle_l(e)
506
+ when 'lb' then handle_lb(e)
507
+ when 'lem' then handle_lem(e)
508
+ when 'lg' then handle_lg(e)
509
+ when 'list' then handle_list(e)
510
+ when 'mulu' then handle_mulu(e)
511
+ when 'note' then handle_note(e)
512
+ when 'milestone' then handle_milestone(e)
513
+ when 'p' then handle_p(e)
514
+ when 'rdg' then ''
515
+ when 'reg' then ''
516
+ when 'row' then handle_row(e)
517
+ when 'sic' then ''
518
+ when 'sg' then handle_sg(e)
519
+ when 't' then handle_t(e)
520
+ when 'tt' then handle_tt(e)
521
+ when 'table' then handle_table(e)
522
+ else traverse(e)
523
+ end
524
+ r
525
+ end
526
+
527
+ def handle_note(e)
528
+ n = e['n']
529
+ if e.has_attribute?('type')
530
+ t = e['type']
531
+ case t
532
+ when 'equivalent'
533
+ return ''
534
+ when 'orig'
535
+ return handle_note_orig(e)
536
+ when 'orig_biao'
537
+ return handle_note_orig(e, 'biao')
538
+ when 'orig_ke'
539
+ return handle_note_orig(e, 'ke')
540
+ when 'mod'
541
+ @pass << false
542
+ s = traverse(e)
543
+ @pass.pop
544
+ @back += "<span class='footnote_cb' id='n#{n}'>#{s}</span>\n"
545
+ return "<a class='noteAnchor' href='#n#{n}'></a>"
546
+ when 'rest'
547
+ return ''
548
+ else
549
+ return '' if t.start_with?('cf')
550
+ end
551
+ end
552
+
553
+ if e.has_attribute?('resp')
554
+ return '' if e['resp'].start_with? 'CBETA'
555
+ end
556
+
557
+ if e.has_attribute?('place') && e['place']=='inline'
558
+ r = traverse(e)
559
+ return "<span class='doube-line-note'>#{r}</span>"
560
+ else
561
+ return traverse(e)
562
+ end
563
+ end
564
+
565
+ def handle_note_orig(e, anchor_type=nil)
566
+ n = e['n']
567
+ @pass << false
568
+ s = traverse(e)
569
+ @pass.pop
570
+ @back += "<span class='footnote_orig' id='n#{n}'>#{s}</span>\n"
571
+
572
+ if @mod_notes.include? n
573
+ return ''
574
+ else
575
+ label = case anchor_type
576
+ when 'biao' then " data-label='標#{n[-2..-1]}'"
577
+ when 'ke' then " data-label='科#{n[-2..-1]}'"
578
+ else ''
579
+ end
580
+ return "<a class='noteAnchor' href='#n#{n}'#{label}></a>"
581
+ end
582
+ end
583
+
584
+ def handle_p(e)
585
+ r = "<div class='p'>\n"
586
+ r += traverse(e)
587
+ r + "</div>\n"
588
+ end
589
+
590
+ def handle_row(e)
591
+ "<tr>" + traverse(e) + "</tr>\n"
592
+ end
593
+
594
+ def handle_sg(e)
595
+ '(' + traverse(e) + ')'
596
+ end
597
+
598
+ def handle_file(xml_fn)
599
+ puts "read #{xml_fn}"
600
+ @in_l = false
601
+ @lg_row_open = false
602
+ @mod_notes = Set.new
603
+ @next_line_buf = ''
604
+ @open_divs = []
605
+
606
+ if @book_id.start_with? 'DA'
607
+ @orig = nil?
608
+ else
609
+ @orig = @cbeta.get_canon_abbr(@book_id[0])
610
+ abort "未處理底本: #{@book_id[0]}" if @orig.nil?
611
+ end
612
+
613
+ text = parse_xml(xml_fn)
614
+
615
+ # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
616
+ text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
617
+
618
+ @main_text += text
619
+ end
620
+
621
+ def handle_t(e)
622
+ if e.has_attribute? 'place'
623
+ return '' if e['place'].include? 'foot'
624
+ end
625
+ r = traverse(e)
626
+
627
+ # <tt type="app"> 不是 悉漢雙行對照
628
+ return r if @tt_type == 'app'
629
+
630
+ # 處理雙行對照
631
+ i = e.xpath('../t').index(e)
632
+ case i
633
+ when 0
634
+ return r + ' '
635
+ when 1
636
+ @next_line_buf += r + ' '
637
+ return ''
638
+ else
639
+ return r
640
+ end
641
+ end
642
+
643
+ def handle_tt(e)
644
+ @tt_type = e['type']
645
+ traverse(e)
646
+ end
647
+
648
+ def handle_table(e)
649
+ "<table>" + traverse(e) + "</table>"
650
+ end
651
+
652
+ def handle_text(e, mode)
653
+ s = e.content().chomp
654
+ return '' if s.empty?
655
+ return '' if e.parent.name == 'app'
656
+
657
+ # cbeta xml 文字之間會有多餘的換行
658
+ r = s.gsub(/[\n\r]/, '')
659
+
660
+ # 把 & 轉為 &amp;
661
+ CGI.escapeHTML(r)
662
+ end
663
+
664
+ def lem_note_cf(e)
665
+ # ex: T32n1670A.xml, p. 703a16
666
+ # <note type="cf1">K30n1002_p0257a01-a23</note>
667
+ refs = []
668
+ e.xpath('./note').each { |n|
669
+ if n.key?('type') and n['type'].start_with? 'cf'
670
+ refs << n.content
671
+ end
672
+ }
673
+ if refs.empty?
674
+ ''
675
+ else
676
+ '修訂依據:' + refs.join(';') + '。'
677
+ end
678
+ end
679
+
680
+ def lem_note_rdg(lem)
681
+ r = ''
682
+ app = lem.parent
683
+ @pass << false
684
+ app.xpath('rdg').each { |rdg|
685
+ if rdg['wit'].include? @orig
686
+ s = traverse(rdg, 'back')
687
+ s = MISSING if s.empty?
688
+ r += @orig + s
689
+ end
690
+ }
691
+ @pass.pop
692
+ r += '。' unless r.empty?
693
+ r
694
+ end
695
+
696
+ def sutra_init
697
+ s = NAV_TEMPLATE % '<ol></ol>'
698
+ @nav_doc = Nokogiri::XML(s)
699
+
700
+ @nav_doc.remove_namespaces!()
701
+ @nav_root_ol = @nav_doc.at_xpath('//ol')
702
+ @current_nav = [@nav_root_ol]
703
+
704
+ @nav_root_ol.add_child("<li><a href='readme.xhtml'>編輯說明</a></li>")
705
+
706
+ @mulu_count = 0
707
+ @main_text = ''
708
+ @back = ''
709
+ @dila_note = 0
710
+
711
+ FileUtils::mkdir_p File.join(@temp_folder, 'img')
712
+ end
713
+
714
+ def open_xml(fn)
715
+ s = File.read(fn)
716
+
717
+ if fn.include? 'T16n0657'
718
+ # 這個地方 雙行夾註 跨兩行偈頌
719
+ # 把 lb 移到 note 結束之前
720
+ # 讓 lg-row 先結束,再結束雙行夾註
721
+ s.sub!(/(<\/note>)(\n<lb n="0206b29" ed="T"\/>)/, '\2\1')
722
+ end
723
+
724
+ # <milestone unit="juan"> 前面的 lb 屬於新的這一卷
725
+ s.gsub!(%r{((?:<pb [^>]+>\n?)?(?:<lb [^>]+>\n?)+)(<milestone [^>]*unit="juan"[^/>]*/>)}, '\2\1')
726
+
727
+ doc = Nokogiri::XML(s)
728
+ doc.remove_namespaces!()
729
+ doc
730
+ end
731
+
732
+ def read_mod_notes(doc)
733
+ doc.xpath("//note[@type='mod']").each { |e|
734
+ @mod_notes << e['n']
735
+ }
736
+ end
737
+
738
+ def parse_xml(xml_fn)
739
+ @pass = [false]
740
+
741
+ doc = open_xml(xml_fn)
742
+
743
+ e = doc.xpath("//titleStmt/title")[0]
744
+ @title = traverse(e, 'txt')
745
+ @title = @title.split()[-1]
746
+
747
+ read_mod_notes(doc)
748
+
749
+ root = doc.root()
750
+ body = root.xpath("text/body")[0]
751
+ @pass = [true]
752
+
753
+ text = traverse(body)
754
+ text
755
+ end
756
+
757
+ def remove_empty_nav(node_list)
758
+ node_list.each do |n|
759
+ if n[:nav].empty?
760
+ n.delete(:nav)
761
+ else
762
+ remove_empty_nav(n[:nav])
763
+ end
764
+ end
765
+ end
766
+
767
+ def to_html(e)
768
+ e.to_xml(encoding: 'UTF-8', pertty: true, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
769
+ end
770
+
771
+ def traverse(e, mode='html')
772
+ r = ''
773
+ e.children.each { |c|
774
+ s = handle_node(c, mode)
775
+ r += s
776
+ }
777
+ r
778
+ end
779
+
780
+ end
@@ -5,18 +5,17 @@ require 'json'
5
5
  require 'nokogiri'
6
6
  require 'set'
7
7
 
8
- # 內容不輸出的元素
9
- PASS=['back', 'teiHeader']
10
-
11
- # 某版用字缺的符號
12
- MISSING = '-'
13
-
14
8
  # Convert CBETA XML P5a to HTML
15
9
  #
16
10
  # CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
17
11
  #
18
12
  # 轉檔規則請參考: http://wiki.ddbc.edu.tw/pages/CBETA_XML_P5a_轉_HTML
19
13
  class CBETA::P5aToHTML
14
+ # 內容不輸出的元素
15
+ PASS=['back', 'teiHeader']
16
+
17
+ # 某版用字缺的符號
18
+ MISSING = '-'
20
19
 
21
20
  # @param xml_root [String] 來源 CBETA XML P5a 路徑
22
21
  # @param out_root [String] 輸出 HTML 路徑
@@ -25,11 +24,6 @@ class CBETA::P5aToHTML
25
24
  @out_root = out_root
26
25
  @cbeta = CBETA.new
27
26
  @gaijis = CBETA::Gaiji.new
28
-
29
- # 載入 unicode 1.1 字集列表
30
- #fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
31
- #json = File.read(fn)
32
- #@unicode1 = JSON.parse(json)
33
27
  end
34
28
 
35
29
  # 將 CBETA XML P5a 轉為 HTML
@@ -22,11 +22,6 @@ class CBETA::P5aToSimpleHTML
22
22
  @output_root = output_root
23
23
  @cbeta = CBETA.new
24
24
  @gaijis = CBETA::Gaiji.new
25
-
26
- # 載入 unicode 1.1 字集列表
27
- fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
28
- json = File.read(fn)
29
- @unicode1 = JSON.parse(json)
30
25
  end
31
26
 
32
27
  # 將 CBETA XML P5a 轉為 Text
@@ -25,11 +25,6 @@ class CBETA::P5aToText
25
25
  @format = format
26
26
  @cbeta = CBETA.new
27
27
  @gaijis = CBETA::Gaiji.new
28
-
29
- # 載入 unicode 1.1 字集列表
30
- fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
31
- json = File.read(fn)
32
- @unicode1 = JSON.parse(json)
33
28
  end
34
29
 
35
30
  # 將 CBETA XML P5a 轉為 Text
File without changes
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-TW" xmlns:xml="http://www.w3.org/XML/1998/namespace">
6
+ <head>
7
+ <link href="../Styles/stylesheet.css" rel="stylesheet" type="text/css" />
8
+
9
+ <title>贊助</title>
10
+ </head>
11
+
12
+ <body>
13
+ <p><b>歡迎隨喜贊助</b></p>
14
+
15
+ <p><b>劃撥捐款</b></p>
16
+
17
+ <p>郵政劃撥帳號:19624224</p>
18
+
19
+ <p>戶名:財團法人智諭老和尚教育紀念基金會</p>
20
+
21
+ <p>若欲指定特殊用途者,請特別註明,我們會專款專用。</p>
22
+ </body>
23
+ </html>
@@ -0,0 +1,11 @@
1
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
2
+ <head>
3
+ <meta charset="utf-8" />
4
+ </head>
5
+ <body>
6
+ <nav epub:type="toc" id="toc">
7
+ <h1>Table of contents</h1>
8
+ %s
9
+ </nav>
10
+ </body>
11
+ </html>
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-TW" xmlns:xml="http://www.w3.org/XML/1998/namespace">
6
+ <head>
7
+ <link href="../Styles/stylesheet.css" rel="stylesheet" type="text/css" />
8
+
9
+ <title>說明</title>
10
+ </head>
11
+
12
+ <body>
13
+ <div>
14
+ <h2>編輯說明</h2>
15
+
16
+ <ul>
17
+ <li>本電子書以<a href="http://www.seeland.org.tw/www/zhiyu/index.html">「西蓮淨苑智諭老和尚著作全集」</a>為資料來源。</li>
18
+
19
+ <li>漢字呈現以 Unicode 1.1 為基礎,不在此範圍的字則採用 <a href="http://www.cbeta.org/format/rare-rule.php">組字式</a> 表達。</li>
20
+
21
+ <li><span style="line-height: 1.6em;">若有發現任何問題,歡迎來函</span> <a href="mailto:seeland77@gmail.com" style="line-height: 1.6em;">seeland77@gmail.com</a> <span style="line-height: 1.6em;">回報。</span><br /></li>
22
+
23
+ <li>版權所有,歡迎自由流通,但禁止營利使用。</li>
24
+ </ul><br />
25
+ </div>
26
+ </body>
27
+ </html>
File without changes
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-20 00:00:00.000000000 Z
11
+ date: 2015-08-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -16,16 +16,20 @@ executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
- - lib/canons.csv
20
19
  - lib/cbeta.rb
21
20
  - lib/cbeta/bm_to_text.rb
22
- - lib/cbeta/gaiji.json
23
21
  - lib/cbeta/gaiji.rb
24
22
  - lib/cbeta/html_to_text.rb
23
+ - lib/cbeta/p5a_to_epub.rb
25
24
  - lib/cbeta/p5a_to_html.rb
26
25
  - lib/cbeta/p5a_to_simple_html.rb
27
26
  - lib/cbeta/p5a_to_text.rb
28
- - lib/cbeta/unicode-1.1.json
27
+ - lib/data/canons.csv
28
+ - lib/data/epub-donate.xhtml
29
+ - lib/data/epub-nav.xhtml
30
+ - lib/data/epub-readme.xhtml
31
+ - lib/data/gaiji.json
32
+ - lib/data/unicode-1.1.json
29
33
  homepage: https://github.com/RayCHOU/ruby-cbeta
30
34
  licenses:
31
35
  - MIT