cbeta 0.4.3 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 1c29d0e70255be20d828cecb8f3feff8f0d4b448
4
- data.tar.gz: f90911d3955acb1f491e1e7c61fd1797617aa8f2
3
+ metadata.gz: 416026a7921c0c2bb6c602be134095f4079951fe
4
+ data.tar.gz: d72055681fb1c8ff22a4317ed0310fc368381e1a
5
5
  SHA512:
6
- metadata.gz: 2a545c0cb3fdc26d8b79a79172112b967f225aadaf03afe41b4117acfc44a67f7626cb1f07cb3262d0b6b4041978af9f87f15f9b6d4b574472f5da6c81153d21
7
- data.tar.gz: fcad67ccefa4b4250a6d236671373cc4586f48d4da49b60bfddb659ae1fc0246e613f31e580ad6958b0cd0a1a358b68f9b37f34764a95efbe43f94e8711e8b7e
6
+ metadata.gz: 0d71cdacc97c1d6111cc2dc1dc9b5190d2398f05b9a45280744b7475579dd8f612efa382c0cfd16f07429391f99e330a8fe1c3db1c48d770076f90abe20d15c4
7
+ data.tar.gz: bd62c4cdfe0def5b62d7440f0ec029c2535e1384d0ac9865e2ba91a643db46b6a1fed02ed8a4c4435d9e26b878d8acfc7e3c91994bac50899b7e8ec4413b8717
@@ -35,7 +35,7 @@ class CBETA
35
35
 
36
36
  # 載入藏經資料
37
37
  def initialize()
38
- fn = File.join(File.dirname(__FILE__), 'canons.csv')
38
+ fn = File.join(File.dirname(__FILE__), 'data/canons.csv')
39
39
  text = File.read(fn)
40
40
  @canon_abbr = {}
41
41
  CSV.parse(text, :headers => true) do |row|
@@ -61,6 +61,7 @@ end
61
61
 
62
62
  require 'cbeta/gaiji'
63
63
  require 'cbeta/bm_to_text'
64
+ require 'cbeta/p5a_to_epub'
64
65
  require 'cbeta/p5a_to_html'
65
66
  require 'cbeta/p5a_to_simple_html'
66
67
  require 'cbeta/p5a_to_text'
@@ -4,7 +4,7 @@ require 'json'
4
4
  class CBETA::Gaiji
5
5
  # 載入 CBETA 缺字資料庫
6
6
  def initialize()
7
- fn = File.join(File.dirname(__FILE__), 'gaiji.json')
7
+ fn = File.join(File.dirname(__FILE__), '../data/gaiji.json')
8
8
  @gaijis = JSON.parse(File.read(fn))
9
9
  end
10
10
 
@@ -0,0 +1,780 @@
1
+ require 'cgi'
2
+ require 'date'
3
+ require 'fileutils'
4
+ require 'json'
5
+ require 'nokogiri'
6
+ require 'set'
7
+ require 'gepub'
8
+ require 'pp'
9
+
10
+ # Convert CBETA XML P5a to EPUB
11
+ #
12
+ # CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
13
+ class CBETA::P5aToEPUB
14
+ # 內容不輸出的元素
15
+ PASS=['back', 'teiHeader']
16
+
17
+ # 某版用字缺的符號
18
+ MISSING = '-'
19
+
20
+ NAV_TEMPLATE = File.read(File.join(File.dirname(__FILE__), '../data/epub-nav.xhtml'))
21
+ MAIN = 'main.xhtml'
22
+
23
+ # @param temp_folder [String] 供 EPUB 暫存工作檔案的路徑
24
+ # @param graphic_base [String] 存放圖片的路徑
25
+ def initialize(temp_folder, graphic_base)
26
+ @temp_folder = temp_folder
27
+ @graphic_base = graphic_base
28
+ @cbeta = CBETA.new
29
+ @gaijis = CBETA::Gaiji.new
30
+ end
31
+
32
+ # 將某個 xml 轉為一個 EPUB
33
+ def convert_file(input_path, output_path)
34
+ return false unless input_path.end_with? '.xml'
35
+
36
+ FileUtils.remove_dir(@temp_folder, force=true)
37
+ FileUtils::mkdir_p @temp_folder
38
+
39
+ @book_id = File.basename(input_path, ".xml")
40
+
41
+ sutra_init
42
+
43
+ handle_file(input_path)
44
+ create_epub(output_path)
45
+ end
46
+
47
+ # 將某個資料夾下的每個 xml 檔都轉為一個對應的 EPUB。
48
+ # 資料夾可以是巢狀,全部都會遞迴處理。
49
+ #
50
+ # @example
51
+ # require 'cbeta'
52
+ #
53
+ # TEMP = '/temp/epub-work'
54
+ # IMG = '/Users/ray/Documents/Projects/D道安/figures'
55
+ #
56
+ # c = CBETA::P5aToEPUB.new(TEMP, IMG)
57
+ # c.convert_folder('/Users/ray/Documents/Projects/D道安/xml-p5a/DA', '/temp/cbeta-epub/DA')
58
+ def convert_folder(input_folder, output_folder)
59
+ FileUtils.remove_dir(output_folder, force=true)
60
+ FileUtils::mkdir_p output_folder
61
+ Dir.foreach(input_folder) do |f|
62
+ next if f.start_with? '.'
63
+ p1 = File.join(input_folder, f)
64
+ if File.file?(p1)
65
+ f.sub!(/.xml$/, '.epub')
66
+ p2 = File.join(output_folder, f)
67
+ convert_file(p1, p2)
68
+ else
69
+ p2 = File.join(output_folder, f)
70
+ convert_folder(p1, p2)
71
+ end
72
+ end
73
+ end
74
+
75
+ # 將多個 xml 檔案合成一個 EPUB
76
+ #
77
+ # @example 大般若經 跨三冊 合成一個 EPUB
78
+ # require 'cbeta'
79
+ #
80
+ # TEMP = '/temp/epub-work'
81
+ #
82
+ # xml_files = [
83
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/05/T05n0220a.xml',
84
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/06/T06n0220b.xml',
85
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220c.xml',
86
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220d.xml',
87
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220e.xml',
88
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220f.xml',
89
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220g.xml',
90
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220h.xml',
91
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220i.xml',
92
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220j.xml',
93
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220k.xml',
94
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220l.xml',
95
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220m.xml',
96
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220n.xml',
97
+ # '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220o.xml',
98
+ # ]
99
+ #
100
+ # c = CBETA::P5aToEPUB.new(TEMP)
101
+ # c.convert_sutra('T0220', '大般若經', xml_files, '/temp/cbeta-epub/T0220.epub')
102
+ def convert_sutra(book_id, title, xml_files, out)
103
+ @book_id = book_id
104
+ sutra_init
105
+ xml_files.each { |f| handle_file(f) }
106
+
107
+ @title = title
108
+ create_epub(out)
109
+ end
110
+
111
+ private
112
+
113
+ def copy_static_files(src, dest)
114
+ p1 = File.join(File.dirname(__FILE__), '../data', src)
115
+ p2 = File.join(@temp_folder, dest)
116
+ FileUtils.copy(p1, p2)
117
+ end
118
+
119
+ def create_epub(output_path)
120
+ copy_static_files('epub-readme.xhtml', 'readme.xhtml')
121
+ copy_static_files('epub-donate.xhtml', 'donate.xhtml')
122
+ create_main_html
123
+ create_nav_html
124
+
125
+ title = @title
126
+ book_id = @book_id
127
+ builder = GEPUB::Builder.new {
128
+ language 'zh-TW'
129
+ unique_identifier "http://www.cbeta.org/#{book_id}", 'BookID', 'URL'
130
+ title title
131
+
132
+ creator 'CBETA'
133
+
134
+ contributors 'DILA'
135
+
136
+ date Date.today.to_s
137
+ }
138
+
139
+ # in resources block, you can define resources by its relative path and datasource.
140
+ # item creator methods are: files, file.
141
+ builder.resources(:workdir => @temp_folder) {
142
+ glob 'img/*'
143
+
144
+ # this is navigation document.
145
+ nav 'nav.xhtml'
146
+
147
+ # ordered item. will be added to spine.
148
+ ordered {
149
+ file 'readme.xhtml'
150
+ file 'main.xhtml'
151
+ file 'donate.xhtml'
152
+ }
153
+ }
154
+ builder.generate_epub(output_path)
155
+ puts "output: #{output_path}"
156
+ end
157
+
158
+ def create_main_html
159
+ fn = File.join(@temp_folder, MAIN)
160
+ s = <<eos
161
+ <html xmlns="http://www.w3.org/1999/xhtml">
162
+ <head>
163
+ <meta charset="utf-8" />
164
+ <title>#{@title}</title>
165
+ </head>
166
+ <body>
167
+ <div id='body'>
168
+ eos
169
+ s += @main_text + "\n</div><!-- end of div[@id='body'] -->\n"
170
+ s += "<div id='back'>\n" + @back + "</div></body></html>\n"
171
+ File.write(fn, s)
172
+ end
173
+
174
+ def create_nav_html
175
+ @nav_root_ol.add_child("<li><a href='donate.xhtml'>贊助資訊</a></li>")
176
+
177
+ fn = File.join(@temp_folder, 'nav.xhtml')
178
+ s = NAV_TEMPLATE % to_html(@nav_root_ol)
179
+ File.write(fn, s)
180
+ end
181
+
182
+ def handle_anchor(e)
183
+ id = e['id']
184
+ if e.has_attribute?('id')
185
+ if id.start_with?('nkr_note_orig')
186
+ note = @notes[id]
187
+ note_text = traverse(note)
188
+ n = id[/^nkr_note_orig_(.*)$/, 1]
189
+ @back += "<span class='footnote' id='n#{n}'>#{note_text}</span>\n"
190
+ return "<a class='noteAnchor' href='#n#{n}'></a>"
191
+ elsif id.start_with? 'fx'
192
+ return "<span class='star'>[*]</span>"
193
+ end
194
+ end
195
+
196
+ if e.has_attribute?('type')
197
+ if e['type'] == 'circle'
198
+ return '◎'
199
+ end
200
+ end
201
+
202
+ ''
203
+ end
204
+
205
+ def handle_app(e)
206
+ r = ''
207
+ if e['type'] == 'star'
208
+ c = e['corresp'][1..-1]
209
+ r = "<a class='noteAnchor star' href='#n#{c}'></a>"
210
+ end
211
+ r + traverse(e)
212
+ end
213
+
214
+ def handle_byline(e)
215
+ r = '<p class="byline">'
216
+ r += "<span class='lineInfo'>#{@lb}</span>"
217
+ r += traverse(e)
218
+ r + '</p>'
219
+ end
220
+
221
+ def handle_cell(e)
222
+ doc = Nokogiri::XML::Document.new
223
+ cell = doc.create_element('td')
224
+ cell['rowspan'] = e['rows'] if e.key? 'rows'
225
+ cell['colspan'] = e['cols'] if e.key? 'cols'
226
+ cell.inner_html = traverse(e)
227
+ to_html(cell) + "\n"
228
+ end
229
+
230
+ def handle_corr(e)
231
+ r = ''
232
+ if e.parent.name == 'choice'
233
+ sic = e.parent.at_xpath('sic')
234
+ unless sic.nil?
235
+ @dila_note += 1
236
+ r = "<a class='noteAnchor dila' href='#dila_note#{@dila_note}'></a>"
237
+
238
+ note = @orig
239
+ sic_text = traverse(sic, 'back')
240
+ if sic_text.empty?
241
+ note += MISSING
242
+ else
243
+ note += sic_text
244
+ end
245
+ @back += "<span class='footnote_dila' id='dila_note#{@dila_note}'>#{note}</span>\n"
246
+ end
247
+ end
248
+ r + "<span class='cbeta'>%s</span>" % traverse(e)
249
+ end
250
+
251
+ def handle_div(e)
252
+ if e.has_attribute? 'type'
253
+ @open_divs << e
254
+ r = traverse(e)
255
+ @open_divs.pop
256
+ return "<div class='div-#{e['type']}'>#{r}</div>"
257
+ else
258
+ return traverse(e)
259
+ end
260
+ end
261
+
262
+ def handle_figure(e)
263
+ "<div class='figure'>%s</div>" % traverse(e)
264
+ end
265
+
266
+ def handle_g(e, mode)
267
+ # if 有 <mapping type="unicode">
268
+ # if 不在 Unicode Extension C, D, E 範圍裡
269
+ # 直接採用
270
+ # else
271
+ # 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
272
+ # else if 有 <mapping type="normal_unicode">
273
+ # 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
274
+ # else if 有 normalized form
275
+ # 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
276
+ # else
277
+ # 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
278
+ gid = e['ref'][1..-1]
279
+ g = @gaijis[gid]
280
+ abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
281
+ zzs = g['zzs']
282
+
283
+ if mode == 'txt'
284
+ return g['roman'] if gid.start_with?('SD')
285
+ if zzs.nil?
286
+ abort "缺組字式:#{g}"
287
+ else
288
+ return zzs
289
+ end
290
+ end
291
+
292
+ if gid.start_with?('SD')
293
+ case gid
294
+ when 'SD-E35A'
295
+ return '('
296
+ when 'SD-E35B'
297
+ return ')'
298
+ else
299
+ return "<span class='siddam' roman='#{g['roman']}' code='#{gid}' char='#{g['sd-char']}'/>"
300
+ end
301
+ end
302
+
303
+ if gid.start_with?('RJ')
304
+ return "<span class='ranja' roman='#{g['roman']}' code='#{gid}' char='#{g['rj-char']}'/>"
305
+ end
306
+
307
+ default = ''
308
+ if g.has_key?('unicode')
309
+ #if @unicode1.include?(g['unicode'])
310
+ # 如果在 unicode ext-C, ext-D, ext-E 範圍內
311
+ if (0x2A700..0x2CEAF).include? g['unicode'].hex
312
+ default = g['unicode-char']
313
+ else
314
+ return g['unicode-char'] # 直接採用 unicode
315
+ end
316
+ end
317
+
318
+ nor = ''
319
+ if g.has_key?('normal_unicode')
320
+ nor = g['normal_unicode']
321
+ default = nor if default.empty?
322
+ end
323
+
324
+ if g.has_key?('normal')
325
+ nor += ', ' unless nor==''
326
+ nor += g['normal']
327
+ default = g['normal'] if default.empty?
328
+ end
329
+
330
+ default = zzs if default.empty?
331
+
332
+ href = 'http://dict.cbeta.org/dict_word/gaiji-cb/%s/%s.gif' % [gid[2, 2], gid]
333
+ unless @back.include?(href)
334
+ @back += "<span id='#{gid}' class='gaijiInfo' figure_url='#{href}' zzs='#{zzs}' nor='#{nor}'>#{default}</span>\n"
335
+ end
336
+ "<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
337
+ end
338
+
339
+ def handle_graphic(e)
340
+ url = e['url']
341
+ url.sub!(/^.*figures\/(.*)$/, '\1')
342
+
343
+ src = File.join(@graphic_base, url)
344
+ basename = File.basename(src)
345
+ dest = File.join(@temp_folder, 'img', basename)
346
+ FileUtils.copy(src, dest)
347
+
348
+ "<img src='img/#{basename}' />"
349
+ end
350
+
351
+ def handle_head(e)
352
+ r = ''
353
+ unless e['type'] == 'added'
354
+ i = @open_divs.size
355
+ r = "<p class='head' data-head-level='#{i}'>%s</p>" % traverse(e)
356
+ end
357
+ r
358
+ end
359
+
360
+ def handle_item(e)
361
+ "<li>%s</li>\n" % traverse(e)
362
+ end
363
+
364
+ def handle_juan(e)
365
+ "<p class='juan'>%s</p>" % traverse(e)
366
+ end
367
+
368
+ def handle_l(e)
369
+ if @lg_type == 'abnormal'
370
+ return traverse(e)
371
+ end
372
+
373
+ @in_l = true
374
+
375
+ doc = Nokogiri::XML::Document.new
376
+ cell = doc.create_element('div')
377
+ cell['class'] = 'lg-cell'
378
+ cell.inner_html = traverse(e)
379
+
380
+ if @first_l
381
+ parent = e.parent()
382
+ if parent.has_attribute?('rend')
383
+ indent = parent['rend'].scan(/text-indent:[^:]*/)
384
+ unless indent.empty?
385
+ cell['style'] = indent[0]
386
+ end
387
+ end
388
+ @first_l = false
389
+ end
390
+ r = to_html(cell)
391
+
392
+ unless @lg_row_open
393
+ r = "\n<div class='lg-row'>" + r
394
+ @lg_row_open = true
395
+ end
396
+ @in_l = false
397
+ r
398
+ end
399
+
400
+ def handle_lb(e)
401
+ # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
402
+ return '' if e['ed'] != @series
403
+
404
+ @lb = e['n']
405
+ r = ''
406
+ #if e.parent.name == 'lg' and $lg_row_open
407
+ if @lg_row_open && !@in_l
408
+ # 每行偈頌放在一個 lg-row 裡面
409
+ # T46n1937, p. 914a01, l 包雙行夾註跨行
410
+ # T20n1092, 337c16, lb 在 l 中間,不結束 lg-row
411
+ r += "</div><!-- end of lg-row -->"
412
+ @lg_row_open = false
413
+ end
414
+ unless @next_line_buf.empty?
415
+ r += @next_line_buf
416
+ @next_line_buf = ''
417
+ end
418
+ r
419
+ end
420
+
421
+ def handle_lem(e)
422
+ r = ''
423
+ w = e['wit']
424
+ if w.include? 'CBETA' and not w.include? @orig
425
+ @dila_note += 1
426
+ r = "<a class='noteAnchor dila' href='#dila_note#{@dila_note}'></a>"
427
+ r += "<span class='cbeta'>%s</span>" % traverse(e)
428
+
429
+ note = lem_note_cf(e)
430
+ note += lem_note_rdg(e)
431
+ @back += "<span class='footnote_dila' id='dila_note#{@dila_note}'>#{note}</span>\n"
432
+ else
433
+ r = traverse(e)
434
+ end
435
+ r
436
+ end
437
+
438
+ def handle_lg(e)
439
+ r = ''
440
+ @lg_type = e['type']
441
+ if @lg_type == 'abnormal'
442
+ r = "<p class='lg-abnormal'>" + traverse(e) + "</p>"
443
+ else
444
+ @first_l = true
445
+ doc = Nokogiri::XML::Document.new
446
+ node = doc.create_element('div')
447
+ node['class'] = 'lg'
448
+ if e.has_attribute?('rend')
449
+ rend = e['rend'].gsub(/text-indent:[^:]*/, '')
450
+ node['style'] = rend
451
+ end
452
+ @lg_row_open = false
453
+ node.inner_html = traverse(e)
454
+ if @lg_row_open
455
+ node.inner_html += '</div><!-- end of lg -->'
456
+ @lg_row_open = false
457
+ end
458
+ r = "\n" + to_html(node)
459
+ end
460
+ r
461
+ end
462
+
463
+ def handle_list(e)
464
+ "<ul>%s</ul>" % traverse(e)
465
+ end
466
+
467
+ def handle_milestone(e)
468
+ ''
469
+ end
470
+
471
+ def handle_mulu(e)
472
+ return '' if e['type'] == '卷'
473
+
474
+ level = e['level'].to_i
475
+ while @current_nav.size > level
476
+ @current_nav.pop
477
+ end
478
+
479
+ label = traverse(e, 'txt')
480
+ @mulu_count += 1
481
+ li = @current_nav.last.add_child("<li><a href='#{@main_html}#mulu#{@mulu_count}'>#{label}</a></li>").first
482
+ ol = li.add_child('<ol></ol>').first
483
+ @current_nav << ol
484
+ "<a id='mulu#{@mulu_count}' />"
485
+ end
486
+
487
+ def handle_node(e, mode)
488
+ return '' if e.comment?
489
+ return handle_text(e, mode) if e.text?
490
+ return '' if PASS.include?(e.name)
491
+ r = case e.name
492
+ when 'anchor' then handle_anchor(e)
493
+ when 'app' then handle_app(e)
494
+ when 'byline' then handle_byline(e)
495
+ when 'cell' then handle_cell(e)
496
+ when 'corr' then handle_corr(e)
497
+ when 'div' then handle_div(e)
498
+ when 'figure' then handle_figure(e)
499
+ when 'foreign' then ''
500
+ when 'g' then handle_g(e, mode)
501
+ when 'graphic' then handle_graphic(e)
502
+ when 'head' then handle_head(e)
503
+ when 'item' then handle_item(e)
504
+ when 'juan' then handle_juan(e)
505
+ when 'l' then handle_l(e)
506
+ when 'lb' then handle_lb(e)
507
+ when 'lem' then handle_lem(e)
508
+ when 'lg' then handle_lg(e)
509
+ when 'list' then handle_list(e)
510
+ when 'mulu' then handle_mulu(e)
511
+ when 'note' then handle_note(e)
512
+ when 'milestone' then handle_milestone(e)
513
+ when 'p' then handle_p(e)
514
+ when 'rdg' then ''
515
+ when 'reg' then ''
516
+ when 'row' then handle_row(e)
517
+ when 'sic' then ''
518
+ when 'sg' then handle_sg(e)
519
+ when 't' then handle_t(e)
520
+ when 'tt' then handle_tt(e)
521
+ when 'table' then handle_table(e)
522
+ else traverse(e)
523
+ end
524
+ r
525
+ end
526
+
527
+ def handle_note(e)
528
+ n = e['n']
529
+ if e.has_attribute?('type')
530
+ t = e['type']
531
+ case t
532
+ when 'equivalent'
533
+ return ''
534
+ when 'orig'
535
+ return handle_note_orig(e)
536
+ when 'orig_biao'
537
+ return handle_note_orig(e, 'biao')
538
+ when 'orig_ke'
539
+ return handle_note_orig(e, 'ke')
540
+ when 'mod'
541
+ @pass << false
542
+ s = traverse(e)
543
+ @pass.pop
544
+ @back += "<span class='footnote_cb' id='n#{n}'>#{s}</span>\n"
545
+ return "<a class='noteAnchor' href='#n#{n}'></a>"
546
+ when 'rest'
547
+ return ''
548
+ else
549
+ return '' if t.start_with?('cf')
550
+ end
551
+ end
552
+
553
+ if e.has_attribute?('resp')
554
+ return '' if e['resp'].start_with? 'CBETA'
555
+ end
556
+
557
+ if e.has_attribute?('place') && e['place']=='inline'
558
+ r = traverse(e)
559
+ return "<span class='doube-line-note'>#{r}</span>"
560
+ else
561
+ return traverse(e)
562
+ end
563
+ end
564
+
565
+ def handle_note_orig(e, anchor_type=nil)
566
+ n = e['n']
567
+ @pass << false
568
+ s = traverse(e)
569
+ @pass.pop
570
+ @back += "<span class='footnote_orig' id='n#{n}'>#{s}</span>\n"
571
+
572
+ if @mod_notes.include? n
573
+ return ''
574
+ else
575
+ label = case anchor_type
576
+ when 'biao' then " data-label='標#{n[-2..-1]}'"
577
+ when 'ke' then " data-label='科#{n[-2..-1]}'"
578
+ else ''
579
+ end
580
+ return "<a class='noteAnchor' href='#n#{n}'#{label}></a>"
581
+ end
582
+ end
583
+
584
+ def handle_p(e)
585
+ r = "<div class='p'>\n"
586
+ r += traverse(e)
587
+ r + "</div>\n"
588
+ end
589
+
590
+ def handle_row(e)
591
+ "<tr>" + traverse(e) + "</tr>\n"
592
+ end
593
+
594
+ def handle_sg(e)
595
+ '(' + traverse(e) + ')'
596
+ end
597
+
598
+ def handle_file(xml_fn)
599
+ puts "read #{xml_fn}"
600
+ @in_l = false
601
+ @lg_row_open = false
602
+ @mod_notes = Set.new
603
+ @next_line_buf = ''
604
+ @open_divs = []
605
+
606
+ if @book_id.start_with? 'DA'
607
+ @orig = nil?
608
+ else
609
+ @orig = @cbeta.get_canon_abbr(@book_id[0])
610
+ abort "未處理底本: #{@book_id[0]}" if @orig.nil?
611
+ end
612
+
613
+ text = parse_xml(xml_fn)
614
+
615
+ # 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
616
+ text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
617
+
618
+ @main_text += text
619
+ end
620
+
621
+ def handle_t(e)
622
+ if e.has_attribute? 'place'
623
+ return '' if e['place'].include? 'foot'
624
+ end
625
+ r = traverse(e)
626
+
627
+ # <tt type="app"> 不是 悉漢雙行對照
628
+ return r if @tt_type == 'app'
629
+
630
+ # 處理雙行對照
631
+ i = e.xpath('../t').index(e)
632
+ case i
633
+ when 0
634
+ return r + ' '
635
+ when 1
636
+ @next_line_buf += r + ' '
637
+ return ''
638
+ else
639
+ return r
640
+ end
641
+ end
642
+
643
+ def handle_tt(e)
644
+ @tt_type = e['type']
645
+ traverse(e)
646
+ end
647
+
648
+ def handle_table(e)
649
+ "<table>" + traverse(e) + "</table>"
650
+ end
651
+
652
+ def handle_text(e, mode)
653
+ s = e.content().chomp
654
+ return '' if s.empty?
655
+ return '' if e.parent.name == 'app'
656
+
657
+ # cbeta xml 文字之間會有多餘的換行
658
+ r = s.gsub(/[\n\r]/, '')
659
+
660
+ # 把 & 轉為 &amp;
661
+ CGI.escapeHTML(r)
662
+ end
663
+
664
+ def lem_note_cf(e)
665
+ # ex: T32n1670A.xml, p. 703a16
666
+ # <note type="cf1">K30n1002_p0257a01-a23</note>
667
+ refs = []
668
+ e.xpath('./note').each { |n|
669
+ if n.key?('type') and n['type'].start_with? 'cf'
670
+ refs << n.content
671
+ end
672
+ }
673
+ if refs.empty?
674
+ ''
675
+ else
676
+ '修訂依據:' + refs.join(';') + '。'
677
+ end
678
+ end
679
+
680
+ def lem_note_rdg(lem)
681
+ r = ''
682
+ app = lem.parent
683
+ @pass << false
684
+ app.xpath('rdg').each { |rdg|
685
+ if rdg['wit'].include? @orig
686
+ s = traverse(rdg, 'back')
687
+ s = MISSING if s.empty?
688
+ r += @orig + s
689
+ end
690
+ }
691
+ @pass.pop
692
+ r += '。' unless r.empty?
693
+ r
694
+ end
695
+
696
+ def sutra_init
697
+ s = NAV_TEMPLATE % '<ol></ol>'
698
+ @nav_doc = Nokogiri::XML(s)
699
+
700
+ @nav_doc.remove_namespaces!()
701
+ @nav_root_ol = @nav_doc.at_xpath('//ol')
702
+ @current_nav = [@nav_root_ol]
703
+
704
+ @nav_root_ol.add_child("<li><a href='readme.xhtml'>編輯說明</a></li>")
705
+
706
+ @mulu_count = 0
707
+ @main_text = ''
708
+ @back = ''
709
+ @dila_note = 0
710
+
711
+ FileUtils::mkdir_p File.join(@temp_folder, 'img')
712
+ end
713
+
714
+ def open_xml(fn)
715
+ s = File.read(fn)
716
+
717
+ if fn.include? 'T16n0657'
718
+ # 這個地方 雙行夾註 跨兩行偈頌
719
+ # 把 lb 移到 note 結束之前
720
+ # 讓 lg-row 先結束,再結束雙行夾註
721
+ s.sub!(/(<\/note>)(\n<lb n="0206b29" ed="T"\/>)/, '\2\1')
722
+ end
723
+
724
+ # <milestone unit="juan"> 前面的 lb 屬於新的這一卷
725
+ s.gsub!(%r{((?:<pb [^>]+>\n?)?(?:<lb [^>]+>\n?)+)(<milestone [^>]*unit="juan"[^/>]*/>)}, '\2\1')
726
+
727
+ doc = Nokogiri::XML(s)
728
+ doc.remove_namespaces!()
729
+ doc
730
+ end
731
+
732
+ def read_mod_notes(doc)
733
+ doc.xpath("//note[@type='mod']").each { |e|
734
+ @mod_notes << e['n']
735
+ }
736
+ end
737
+
738
+ def parse_xml(xml_fn)
739
+ @pass = [false]
740
+
741
+ doc = open_xml(xml_fn)
742
+
743
+ e = doc.xpath("//titleStmt/title")[0]
744
+ @title = traverse(e, 'txt')
745
+ @title = @title.split()[-1]
746
+
747
+ read_mod_notes(doc)
748
+
749
+ root = doc.root()
750
+ body = root.xpath("text/body")[0]
751
+ @pass = [true]
752
+
753
+ text = traverse(body)
754
+ text
755
+ end
756
+
757
+ def remove_empty_nav(node_list)
758
+ node_list.each do |n|
759
+ if n[:nav].empty?
760
+ n.delete(:nav)
761
+ else
762
+ remove_empty_nav(n[:nav])
763
+ end
764
+ end
765
+ end
766
+
767
+ def to_html(e)
768
+ e.to_xml(encoding: 'UTF-8', pertty: true, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
769
+ end
770
+
771
+ def traverse(e, mode='html')
772
+ r = ''
773
+ e.children.each { |c|
774
+ s = handle_node(c, mode)
775
+ r += s
776
+ }
777
+ r
778
+ end
779
+
780
+ end
@@ -5,18 +5,17 @@ require 'json'
5
5
  require 'nokogiri'
6
6
  require 'set'
7
7
 
8
- # 內容不輸出的元素
9
- PASS=['back', 'teiHeader']
10
-
11
- # 某版用字缺的符號
12
- MISSING = '-'
13
-
14
8
  # Convert CBETA XML P5a to HTML
15
9
  #
16
10
  # CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
17
11
  #
18
12
  # 轉檔規則請參考: http://wiki.ddbc.edu.tw/pages/CBETA_XML_P5a_轉_HTML
19
13
  class CBETA::P5aToHTML
14
+ # 內容不輸出的元素
15
+ PASS=['back', 'teiHeader']
16
+
17
+ # 某版用字缺的符號
18
+ MISSING = '-'
20
19
 
21
20
  # @param xml_root [String] 來源 CBETA XML P5a 路徑
22
21
  # @param out_root [String] 輸出 HTML 路徑
@@ -25,11 +24,6 @@ class CBETA::P5aToHTML
25
24
  @out_root = out_root
26
25
  @cbeta = CBETA.new
27
26
  @gaijis = CBETA::Gaiji.new
28
-
29
- # 載入 unicode 1.1 字集列表
30
- #fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
31
- #json = File.read(fn)
32
- #@unicode1 = JSON.parse(json)
33
27
  end
34
28
 
35
29
  # 將 CBETA XML P5a 轉為 HTML
@@ -22,11 +22,6 @@ class CBETA::P5aToSimpleHTML
22
22
  @output_root = output_root
23
23
  @cbeta = CBETA.new
24
24
  @gaijis = CBETA::Gaiji.new
25
-
26
- # 載入 unicode 1.1 字集列表
27
- fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
28
- json = File.read(fn)
29
- @unicode1 = JSON.parse(json)
30
25
  end
31
26
 
32
27
  # 將 CBETA XML P5a 轉為 Text
@@ -25,11 +25,6 @@ class CBETA::P5aToText
25
25
  @format = format
26
26
  @cbeta = CBETA.new
27
27
  @gaijis = CBETA::Gaiji.new
28
-
29
- # 載入 unicode 1.1 字集列表
30
- fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
31
- json = File.read(fn)
32
- @unicode1 = JSON.parse(json)
33
28
  end
34
29
 
35
30
  # 將 CBETA XML P5a 轉為 Text
File without changes
@@ -0,0 +1,23 @@
1
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-TW" xmlns:xml="http://www.w3.org/XML/1998/namespace">
6
+ <head>
7
+ <link href="../Styles/stylesheet.css" rel="stylesheet" type="text/css" />
8
+
9
+ <title>贊助</title>
10
+ </head>
11
+
12
+ <body>
13
+ <p><b>歡迎隨喜贊助</b></p>
14
+
15
+ <p><b>劃撥捐款</b></p>
16
+
17
+ <p>郵政劃撥帳號:19624224</p>
18
+
19
+ <p>戶名:財團法人智諭老和尚教育紀念基金會</p>
20
+
21
+ <p>若欲指定特殊用途者,請特別註明,我們會專款專用。</p>
22
+ </body>
23
+ </html>
@@ -0,0 +1,11 @@
1
+ <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
2
+ <head>
3
+ <meta charset="utf-8" />
4
+ </head>
5
+ <body>
6
+ <nav epub:type="toc" id="toc">
7
+ <h1>Table of contents</h1>
8
+ %s
9
+ </nav>
10
+ </body>
11
+ </html>
@@ -0,0 +1,27 @@
1
+ <?xml version="1.0" encoding="utf-8" standalone="no"?>
2
+ <!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
3
+ "http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
4
+
5
+ <html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-TW" xmlns:xml="http://www.w3.org/XML/1998/namespace">
6
+ <head>
7
+ <link href="../Styles/stylesheet.css" rel="stylesheet" type="text/css" />
8
+
9
+ <title>說明</title>
10
+ </head>
11
+
12
+ <body>
13
+ <div>
14
+ <h2>編輯說明</h2>
15
+
16
+ <ul>
17
+ <li>本電子書以<a href="http://www.seeland.org.tw/www/zhiyu/index.html">「西蓮淨苑智諭老和尚著作全集」</a>為資料來源。</li>
18
+
19
+ <li>漢字呈現以 Unicode 1.1 為基礎,不在此範圍的字則採用 <a href="http://www.cbeta.org/format/rare-rule.php">組字式</a> 表達。</li>
20
+
21
+ <li><span style="line-height: 1.6em;">若有發現任何問題,歡迎來函</span> <a href="mailto:seeland77@gmail.com" style="line-height: 1.6em;">seeland77@gmail.com</a> <span style="line-height: 1.6em;">回報。</span><br /></li>
22
+
23
+ <li>版權所有,歡迎自由流通,但禁止營利使用。</li>
24
+ </ul><br />
25
+ </div>
26
+ </body>
27
+ </html>
File without changes
File without changes
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.3
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-08-20 00:00:00.000000000 Z
11
+ date: 2015-08-22 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -16,16 +16,20 @@ executables: []
16
16
  extensions: []
17
17
  extra_rdoc_files: []
18
18
  files:
19
- - lib/canons.csv
20
19
  - lib/cbeta.rb
21
20
  - lib/cbeta/bm_to_text.rb
22
- - lib/cbeta/gaiji.json
23
21
  - lib/cbeta/gaiji.rb
24
22
  - lib/cbeta/html_to_text.rb
23
+ - lib/cbeta/p5a_to_epub.rb
25
24
  - lib/cbeta/p5a_to_html.rb
26
25
  - lib/cbeta/p5a_to_simple_html.rb
27
26
  - lib/cbeta/p5a_to_text.rb
28
- - lib/cbeta/unicode-1.1.json
27
+ - lib/data/canons.csv
28
+ - lib/data/epub-donate.xhtml
29
+ - lib/data/epub-nav.xhtml
30
+ - lib/data/epub-readme.xhtml
31
+ - lib/data/gaiji.json
32
+ - lib/data/unicode-1.1.json
29
33
  homepage: https://github.com/RayCHOU/ruby-cbeta
30
34
  licenses:
31
35
  - MIT