cbeta 0.4.3 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta.rb +2 -1
- data/lib/cbeta/gaiji.rb +1 -1
- data/lib/cbeta/p5a_to_epub.rb +780 -0
- data/lib/cbeta/p5a_to_html.rb +5 -11
- data/lib/cbeta/p5a_to_simple_html.rb +0 -5
- data/lib/cbeta/p5a_to_text.rb +0 -5
- data/lib/{canons.csv → data/canons.csv} +0 -0
- data/lib/data/epub-donate.xhtml +23 -0
- data/lib/data/epub-nav.xhtml +11 -0
- data/lib/data/epub-readme.xhtml +27 -0
- data/lib/{cbeta → data}/gaiji.json +0 -0
- data/lib/{cbeta → data}/unicode-1.1.json +0 -0
- metadata +9 -5
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 416026a7921c0c2bb6c602be134095f4079951fe
|
4
|
+
data.tar.gz: d72055681fb1c8ff22a4317ed0310fc368381e1a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 0d71cdacc97c1d6111cc2dc1dc9b5190d2398f05b9a45280744b7475579dd8f612efa382c0cfd16f07429391f99e330a8fe1c3db1c48d770076f90abe20d15c4
|
7
|
+
data.tar.gz: bd62c4cdfe0def5b62d7440f0ec029c2535e1384d0ac9865e2ba91a643db46b6a1fed02ed8a4c4435d9e26b878d8acfc7e3c91994bac50899b7e8ec4413b8717
|
data/lib/cbeta.rb
CHANGED
@@ -35,7 +35,7 @@ class CBETA
|
|
35
35
|
|
36
36
|
# 載入藏經資料
|
37
37
|
def initialize()
|
38
|
-
fn = File.join(File.dirname(__FILE__), 'canons.csv')
|
38
|
+
fn = File.join(File.dirname(__FILE__), 'data/canons.csv')
|
39
39
|
text = File.read(fn)
|
40
40
|
@canon_abbr = {}
|
41
41
|
CSV.parse(text, :headers => true) do |row|
|
@@ -61,6 +61,7 @@ end
|
|
61
61
|
|
62
62
|
require 'cbeta/gaiji'
|
63
63
|
require 'cbeta/bm_to_text'
|
64
|
+
require 'cbeta/p5a_to_epub'
|
64
65
|
require 'cbeta/p5a_to_html'
|
65
66
|
require 'cbeta/p5a_to_simple_html'
|
66
67
|
require 'cbeta/p5a_to_text'
|
data/lib/cbeta/gaiji.rb
CHANGED
@@ -0,0 +1,780 @@
|
|
1
|
+
require 'cgi'
|
2
|
+
require 'date'
|
3
|
+
require 'fileutils'
|
4
|
+
require 'json'
|
5
|
+
require 'nokogiri'
|
6
|
+
require 'set'
|
7
|
+
require 'gepub'
|
8
|
+
require 'pp'
|
9
|
+
|
10
|
+
# Convert CBETA XML P5a to EPUB
|
11
|
+
#
|
12
|
+
# CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
|
13
|
+
class CBETA::P5aToEPUB
|
14
|
+
# 內容不輸出的元素
|
15
|
+
PASS=['back', 'teiHeader']
|
16
|
+
|
17
|
+
# 某版用字缺的符號
|
18
|
+
MISSING = '-'
|
19
|
+
|
20
|
+
NAV_TEMPLATE = File.read(File.join(File.dirname(__FILE__), '../data/epub-nav.xhtml'))
|
21
|
+
MAIN = 'main.xhtml'
|
22
|
+
|
23
|
+
# @param temp_folder [String] 供 EPUB 暫存工作檔案的路徑
|
24
|
+
# @param graphic_base [String] 存放圖片的路徑
|
25
|
+
def initialize(temp_folder, graphic_base)
|
26
|
+
@temp_folder = temp_folder
|
27
|
+
@graphic_base = graphic_base
|
28
|
+
@cbeta = CBETA.new
|
29
|
+
@gaijis = CBETA::Gaiji.new
|
30
|
+
end
|
31
|
+
|
32
|
+
# 將某個 xml 轉為一個 EPUB
|
33
|
+
def convert_file(input_path, output_path)
|
34
|
+
return false unless input_path.end_with? '.xml'
|
35
|
+
|
36
|
+
FileUtils.remove_dir(@temp_folder, force=true)
|
37
|
+
FileUtils::mkdir_p @temp_folder
|
38
|
+
|
39
|
+
@book_id = File.basename(input_path, ".xml")
|
40
|
+
|
41
|
+
sutra_init
|
42
|
+
|
43
|
+
handle_file(input_path)
|
44
|
+
create_epub(output_path)
|
45
|
+
end
|
46
|
+
|
47
|
+
# 將某個資料夾下的每個 xml 檔都轉為一個對應的 EPUB。
|
48
|
+
# 資料夾可以是巢狀,全部都會遞迴處理。
|
49
|
+
#
|
50
|
+
# @example
|
51
|
+
# require 'cbeta'
|
52
|
+
#
|
53
|
+
# TEMP = '/temp/epub-work'
|
54
|
+
# IMG = '/Users/ray/Documents/Projects/D道安/figures'
|
55
|
+
#
|
56
|
+
# c = CBETA::P5aToEPUB.new(TEMP, IMG)
|
57
|
+
# c.convert_folder('/Users/ray/Documents/Projects/D道安/xml-p5a/DA', '/temp/cbeta-epub/DA')
|
58
|
+
def convert_folder(input_folder, output_folder)
|
59
|
+
FileUtils.remove_dir(output_folder, force=true)
|
60
|
+
FileUtils::mkdir_p output_folder
|
61
|
+
Dir.foreach(input_folder) do |f|
|
62
|
+
next if f.start_with? '.'
|
63
|
+
p1 = File.join(input_folder, f)
|
64
|
+
if File.file?(p1)
|
65
|
+
f.sub!(/.xml$/, '.epub')
|
66
|
+
p2 = File.join(output_folder, f)
|
67
|
+
convert_file(p1, p2)
|
68
|
+
else
|
69
|
+
p2 = File.join(output_folder, f)
|
70
|
+
convert_folder(p1, p2)
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
75
|
+
# 將多個 xml 檔案合成一個 EPUB
|
76
|
+
#
|
77
|
+
# @example 大般若經 跨三冊 合成一個 EPUB
|
78
|
+
# require 'cbeta'
|
79
|
+
#
|
80
|
+
# TEMP = '/temp/epub-work'
|
81
|
+
#
|
82
|
+
# xml_files = [
|
83
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/05/T05n0220a.xml',
|
84
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/06/T06n0220b.xml',
|
85
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220c.xml',
|
86
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220d.xml',
|
87
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220e.xml',
|
88
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220f.xml',
|
89
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220g.xml',
|
90
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220h.xml',
|
91
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220i.xml',
|
92
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220j.xml',
|
93
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220k.xml',
|
94
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220l.xml',
|
95
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220m.xml',
|
96
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220n.xml',
|
97
|
+
# '/Users/ray/git-repos/cbeta-xml-p5a/T/07/T07n0220o.xml',
|
98
|
+
# ]
|
99
|
+
#
|
100
|
+
# c = CBETA::P5aToEPUB.new(TEMP)
|
101
|
+
# c.convert_sutra('T0220', '大般若經', xml_files, '/temp/cbeta-epub/T0220.epub')
|
102
|
+
def convert_sutra(book_id, title, xml_files, out)
|
103
|
+
@book_id = book_id
|
104
|
+
sutra_init
|
105
|
+
xml_files.each { |f| handle_file(f) }
|
106
|
+
|
107
|
+
@title = title
|
108
|
+
create_epub(out)
|
109
|
+
end
|
110
|
+
|
111
|
+
private
|
112
|
+
|
113
|
+
def copy_static_files(src, dest)
|
114
|
+
p1 = File.join(File.dirname(__FILE__), '../data', src)
|
115
|
+
p2 = File.join(@temp_folder, dest)
|
116
|
+
FileUtils.copy(p1, p2)
|
117
|
+
end
|
118
|
+
|
119
|
+
def create_epub(output_path)
|
120
|
+
copy_static_files('epub-readme.xhtml', 'readme.xhtml')
|
121
|
+
copy_static_files('epub-donate.xhtml', 'donate.xhtml')
|
122
|
+
create_main_html
|
123
|
+
create_nav_html
|
124
|
+
|
125
|
+
title = @title
|
126
|
+
book_id = @book_id
|
127
|
+
builder = GEPUB::Builder.new {
|
128
|
+
language 'zh-TW'
|
129
|
+
unique_identifier "http://www.cbeta.org/#{book_id}", 'BookID', 'URL'
|
130
|
+
title title
|
131
|
+
|
132
|
+
creator 'CBETA'
|
133
|
+
|
134
|
+
contributors 'DILA'
|
135
|
+
|
136
|
+
date Date.today.to_s
|
137
|
+
}
|
138
|
+
|
139
|
+
# in resources block, you can define resources by its relative path and datasource.
|
140
|
+
# item creator methods are: files, file.
|
141
|
+
builder.resources(:workdir => @temp_folder) {
|
142
|
+
glob 'img/*'
|
143
|
+
|
144
|
+
# this is navigation document.
|
145
|
+
nav 'nav.xhtml'
|
146
|
+
|
147
|
+
# ordered item. will be added to spine.
|
148
|
+
ordered {
|
149
|
+
file 'readme.xhtml'
|
150
|
+
file 'main.xhtml'
|
151
|
+
file 'donate.xhtml'
|
152
|
+
}
|
153
|
+
}
|
154
|
+
builder.generate_epub(output_path)
|
155
|
+
puts "output: #{output_path}"
|
156
|
+
end
|
157
|
+
|
158
|
+
def create_main_html
|
159
|
+
fn = File.join(@temp_folder, MAIN)
|
160
|
+
s = <<eos
|
161
|
+
<html xmlns="http://www.w3.org/1999/xhtml">
|
162
|
+
<head>
|
163
|
+
<meta charset="utf-8" />
|
164
|
+
<title>#{@title}</title>
|
165
|
+
</head>
|
166
|
+
<body>
|
167
|
+
<div id='body'>
|
168
|
+
eos
|
169
|
+
s += @main_text + "\n</div><!-- end of div[@id='body'] -->\n"
|
170
|
+
s += "<div id='back'>\n" + @back + "</div></body></html>\n"
|
171
|
+
File.write(fn, s)
|
172
|
+
end
|
173
|
+
|
174
|
+
def create_nav_html
|
175
|
+
@nav_root_ol.add_child("<li><a href='donate.xhtml'>贊助資訊</a></li>")
|
176
|
+
|
177
|
+
fn = File.join(@temp_folder, 'nav.xhtml')
|
178
|
+
s = NAV_TEMPLATE % to_html(@nav_root_ol)
|
179
|
+
File.write(fn, s)
|
180
|
+
end
|
181
|
+
|
182
|
+
def handle_anchor(e)
|
183
|
+
id = e['id']
|
184
|
+
if e.has_attribute?('id')
|
185
|
+
if id.start_with?('nkr_note_orig')
|
186
|
+
note = @notes[id]
|
187
|
+
note_text = traverse(note)
|
188
|
+
n = id[/^nkr_note_orig_(.*)$/, 1]
|
189
|
+
@back += "<span class='footnote' id='n#{n}'>#{note_text}</span>\n"
|
190
|
+
return "<a class='noteAnchor' href='#n#{n}'></a>"
|
191
|
+
elsif id.start_with? 'fx'
|
192
|
+
return "<span class='star'>[*]</span>"
|
193
|
+
end
|
194
|
+
end
|
195
|
+
|
196
|
+
if e.has_attribute?('type')
|
197
|
+
if e['type'] == 'circle'
|
198
|
+
return '◎'
|
199
|
+
end
|
200
|
+
end
|
201
|
+
|
202
|
+
''
|
203
|
+
end
|
204
|
+
|
205
|
+
def handle_app(e)
|
206
|
+
r = ''
|
207
|
+
if e['type'] == 'star'
|
208
|
+
c = e['corresp'][1..-1]
|
209
|
+
r = "<a class='noteAnchor star' href='#n#{c}'></a>"
|
210
|
+
end
|
211
|
+
r + traverse(e)
|
212
|
+
end
|
213
|
+
|
214
|
+
def handle_byline(e)
|
215
|
+
r = '<p class="byline">'
|
216
|
+
r += "<span class='lineInfo'>#{@lb}</span>"
|
217
|
+
r += traverse(e)
|
218
|
+
r + '</p>'
|
219
|
+
end
|
220
|
+
|
221
|
+
def handle_cell(e)
|
222
|
+
doc = Nokogiri::XML::Document.new
|
223
|
+
cell = doc.create_element('td')
|
224
|
+
cell['rowspan'] = e['rows'] if e.key? 'rows'
|
225
|
+
cell['colspan'] = e['cols'] if e.key? 'cols'
|
226
|
+
cell.inner_html = traverse(e)
|
227
|
+
to_html(cell) + "\n"
|
228
|
+
end
|
229
|
+
|
230
|
+
def handle_corr(e)
|
231
|
+
r = ''
|
232
|
+
if e.parent.name == 'choice'
|
233
|
+
sic = e.parent.at_xpath('sic')
|
234
|
+
unless sic.nil?
|
235
|
+
@dila_note += 1
|
236
|
+
r = "<a class='noteAnchor dila' href='#dila_note#{@dila_note}'></a>"
|
237
|
+
|
238
|
+
note = @orig
|
239
|
+
sic_text = traverse(sic, 'back')
|
240
|
+
if sic_text.empty?
|
241
|
+
note += MISSING
|
242
|
+
else
|
243
|
+
note += sic_text
|
244
|
+
end
|
245
|
+
@back += "<span class='footnote_dila' id='dila_note#{@dila_note}'>#{note}</span>\n"
|
246
|
+
end
|
247
|
+
end
|
248
|
+
r + "<span class='cbeta'>%s</span>" % traverse(e)
|
249
|
+
end
|
250
|
+
|
251
|
+
def handle_div(e)
|
252
|
+
if e.has_attribute? 'type'
|
253
|
+
@open_divs << e
|
254
|
+
r = traverse(e)
|
255
|
+
@open_divs.pop
|
256
|
+
return "<div class='div-#{e['type']}'>#{r}</div>"
|
257
|
+
else
|
258
|
+
return traverse(e)
|
259
|
+
end
|
260
|
+
end
|
261
|
+
|
262
|
+
def handle_figure(e)
|
263
|
+
"<div class='figure'>%s</div>" % traverse(e)
|
264
|
+
end
|
265
|
+
|
266
|
+
def handle_g(e, mode)
|
267
|
+
# if 有 <mapping type="unicode">
|
268
|
+
# if 不在 Unicode Extension C, D, E 範圍裡
|
269
|
+
# 直接採用
|
270
|
+
# else
|
271
|
+
# 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
|
272
|
+
# else if 有 <mapping type="normal_unicode">
|
273
|
+
# 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
|
274
|
+
# else if 有 normalized form
|
275
|
+
# 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
|
276
|
+
# else
|
277
|
+
# 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
|
278
|
+
gid = e['ref'][1..-1]
|
279
|
+
g = @gaijis[gid]
|
280
|
+
abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
|
281
|
+
zzs = g['zzs']
|
282
|
+
|
283
|
+
if mode == 'txt'
|
284
|
+
return g['roman'] if gid.start_with?('SD')
|
285
|
+
if zzs.nil?
|
286
|
+
abort "缺組字式:#{g}"
|
287
|
+
else
|
288
|
+
return zzs
|
289
|
+
end
|
290
|
+
end
|
291
|
+
|
292
|
+
if gid.start_with?('SD')
|
293
|
+
case gid
|
294
|
+
when 'SD-E35A'
|
295
|
+
return '('
|
296
|
+
when 'SD-E35B'
|
297
|
+
return ')'
|
298
|
+
else
|
299
|
+
return "<span class='siddam' roman='#{g['roman']}' code='#{gid}' char='#{g['sd-char']}'/>"
|
300
|
+
end
|
301
|
+
end
|
302
|
+
|
303
|
+
if gid.start_with?('RJ')
|
304
|
+
return "<span class='ranja' roman='#{g['roman']}' code='#{gid}' char='#{g['rj-char']}'/>"
|
305
|
+
end
|
306
|
+
|
307
|
+
default = ''
|
308
|
+
if g.has_key?('unicode')
|
309
|
+
#if @unicode1.include?(g['unicode'])
|
310
|
+
# 如果在 unicode ext-C, ext-D, ext-E 範圍內
|
311
|
+
if (0x2A700..0x2CEAF).include? g['unicode'].hex
|
312
|
+
default = g['unicode-char']
|
313
|
+
else
|
314
|
+
return g['unicode-char'] # 直接採用 unicode
|
315
|
+
end
|
316
|
+
end
|
317
|
+
|
318
|
+
nor = ''
|
319
|
+
if g.has_key?('normal_unicode')
|
320
|
+
nor = g['normal_unicode']
|
321
|
+
default = nor if default.empty?
|
322
|
+
end
|
323
|
+
|
324
|
+
if g.has_key?('normal')
|
325
|
+
nor += ', ' unless nor==''
|
326
|
+
nor += g['normal']
|
327
|
+
default = g['normal'] if default.empty?
|
328
|
+
end
|
329
|
+
|
330
|
+
default = zzs if default.empty?
|
331
|
+
|
332
|
+
href = 'http://dict.cbeta.org/dict_word/gaiji-cb/%s/%s.gif' % [gid[2, 2], gid]
|
333
|
+
unless @back.include?(href)
|
334
|
+
@back += "<span id='#{gid}' class='gaijiInfo' figure_url='#{href}' zzs='#{zzs}' nor='#{nor}'>#{default}</span>\n"
|
335
|
+
end
|
336
|
+
"<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
|
337
|
+
end
|
338
|
+
|
339
|
+
def handle_graphic(e)
|
340
|
+
url = e['url']
|
341
|
+
url.sub!(/^.*figures\/(.*)$/, '\1')
|
342
|
+
|
343
|
+
src = File.join(@graphic_base, url)
|
344
|
+
basename = File.basename(src)
|
345
|
+
dest = File.join(@temp_folder, 'img', basename)
|
346
|
+
FileUtils.copy(src, dest)
|
347
|
+
|
348
|
+
"<img src='img/#{basename}' />"
|
349
|
+
end
|
350
|
+
|
351
|
+
def handle_head(e)
|
352
|
+
r = ''
|
353
|
+
unless e['type'] == 'added'
|
354
|
+
i = @open_divs.size
|
355
|
+
r = "<p class='head' data-head-level='#{i}'>%s</p>" % traverse(e)
|
356
|
+
end
|
357
|
+
r
|
358
|
+
end
|
359
|
+
|
360
|
+
def handle_item(e)
|
361
|
+
"<li>%s</li>\n" % traverse(e)
|
362
|
+
end
|
363
|
+
|
364
|
+
def handle_juan(e)
|
365
|
+
"<p class='juan'>%s</p>" % traverse(e)
|
366
|
+
end
|
367
|
+
|
368
|
+
def handle_l(e)
|
369
|
+
if @lg_type == 'abnormal'
|
370
|
+
return traverse(e)
|
371
|
+
end
|
372
|
+
|
373
|
+
@in_l = true
|
374
|
+
|
375
|
+
doc = Nokogiri::XML::Document.new
|
376
|
+
cell = doc.create_element('div')
|
377
|
+
cell['class'] = 'lg-cell'
|
378
|
+
cell.inner_html = traverse(e)
|
379
|
+
|
380
|
+
if @first_l
|
381
|
+
parent = e.parent()
|
382
|
+
if parent.has_attribute?('rend')
|
383
|
+
indent = parent['rend'].scan(/text-indent:[^:]*/)
|
384
|
+
unless indent.empty?
|
385
|
+
cell['style'] = indent[0]
|
386
|
+
end
|
387
|
+
end
|
388
|
+
@first_l = false
|
389
|
+
end
|
390
|
+
r = to_html(cell)
|
391
|
+
|
392
|
+
unless @lg_row_open
|
393
|
+
r = "\n<div class='lg-row'>" + r
|
394
|
+
@lg_row_open = true
|
395
|
+
end
|
396
|
+
@in_l = false
|
397
|
+
r
|
398
|
+
end
|
399
|
+
|
400
|
+
def handle_lb(e)
|
401
|
+
# 卍續藏有 X 跟 R 兩種 lb, 只處理 X
|
402
|
+
return '' if e['ed'] != @series
|
403
|
+
|
404
|
+
@lb = e['n']
|
405
|
+
r = ''
|
406
|
+
#if e.parent.name == 'lg' and $lg_row_open
|
407
|
+
if @lg_row_open && !@in_l
|
408
|
+
# 每行偈頌放在一個 lg-row 裡面
|
409
|
+
# T46n1937, p. 914a01, l 包雙行夾註跨行
|
410
|
+
# T20n1092, 337c16, lb 在 l 中間,不結束 lg-row
|
411
|
+
r += "</div><!-- end of lg-row -->"
|
412
|
+
@lg_row_open = false
|
413
|
+
end
|
414
|
+
unless @next_line_buf.empty?
|
415
|
+
r += @next_line_buf
|
416
|
+
@next_line_buf = ''
|
417
|
+
end
|
418
|
+
r
|
419
|
+
end
|
420
|
+
|
421
|
+
def handle_lem(e)
|
422
|
+
r = ''
|
423
|
+
w = e['wit']
|
424
|
+
if w.include? 'CBETA' and not w.include? @orig
|
425
|
+
@dila_note += 1
|
426
|
+
r = "<a class='noteAnchor dila' href='#dila_note#{@dila_note}'></a>"
|
427
|
+
r += "<span class='cbeta'>%s</span>" % traverse(e)
|
428
|
+
|
429
|
+
note = lem_note_cf(e)
|
430
|
+
note += lem_note_rdg(e)
|
431
|
+
@back += "<span class='footnote_dila' id='dila_note#{@dila_note}'>#{note}</span>\n"
|
432
|
+
else
|
433
|
+
r = traverse(e)
|
434
|
+
end
|
435
|
+
r
|
436
|
+
end
|
437
|
+
|
438
|
+
def handle_lg(e)
|
439
|
+
r = ''
|
440
|
+
@lg_type = e['type']
|
441
|
+
if @lg_type == 'abnormal'
|
442
|
+
r = "<p class='lg-abnormal'>" + traverse(e) + "</p>"
|
443
|
+
else
|
444
|
+
@first_l = true
|
445
|
+
doc = Nokogiri::XML::Document.new
|
446
|
+
node = doc.create_element('div')
|
447
|
+
node['class'] = 'lg'
|
448
|
+
if e.has_attribute?('rend')
|
449
|
+
rend = e['rend'].gsub(/text-indent:[^:]*/, '')
|
450
|
+
node['style'] = rend
|
451
|
+
end
|
452
|
+
@lg_row_open = false
|
453
|
+
node.inner_html = traverse(e)
|
454
|
+
if @lg_row_open
|
455
|
+
node.inner_html += '</div><!-- end of lg -->'
|
456
|
+
@lg_row_open = false
|
457
|
+
end
|
458
|
+
r = "\n" + to_html(node)
|
459
|
+
end
|
460
|
+
r
|
461
|
+
end
|
462
|
+
|
463
|
+
def handle_list(e)
|
464
|
+
"<ul>%s</ul>" % traverse(e)
|
465
|
+
end
|
466
|
+
|
467
|
+
def handle_milestone(e)
|
468
|
+
''
|
469
|
+
end
|
470
|
+
|
471
|
+
def handle_mulu(e)
|
472
|
+
return '' if e['type'] == '卷'
|
473
|
+
|
474
|
+
level = e['level'].to_i
|
475
|
+
while @current_nav.size > level
|
476
|
+
@current_nav.pop
|
477
|
+
end
|
478
|
+
|
479
|
+
label = traverse(e, 'txt')
|
480
|
+
@mulu_count += 1
|
481
|
+
li = @current_nav.last.add_child("<li><a href='#{@main_html}#mulu#{@mulu_count}'>#{label}</a></li>").first
|
482
|
+
ol = li.add_child('<ol></ol>').first
|
483
|
+
@current_nav << ol
|
484
|
+
"<a id='mulu#{@mulu_count}' />"
|
485
|
+
end
|
486
|
+
|
487
|
+
def handle_node(e, mode)
|
488
|
+
return '' if e.comment?
|
489
|
+
return handle_text(e, mode) if e.text?
|
490
|
+
return '' if PASS.include?(e.name)
|
491
|
+
r = case e.name
|
492
|
+
when 'anchor' then handle_anchor(e)
|
493
|
+
when 'app' then handle_app(e)
|
494
|
+
when 'byline' then handle_byline(e)
|
495
|
+
when 'cell' then handle_cell(e)
|
496
|
+
when 'corr' then handle_corr(e)
|
497
|
+
when 'div' then handle_div(e)
|
498
|
+
when 'figure' then handle_figure(e)
|
499
|
+
when 'foreign' then ''
|
500
|
+
when 'g' then handle_g(e, mode)
|
501
|
+
when 'graphic' then handle_graphic(e)
|
502
|
+
when 'head' then handle_head(e)
|
503
|
+
when 'item' then handle_item(e)
|
504
|
+
when 'juan' then handle_juan(e)
|
505
|
+
when 'l' then handle_l(e)
|
506
|
+
when 'lb' then handle_lb(e)
|
507
|
+
when 'lem' then handle_lem(e)
|
508
|
+
when 'lg' then handle_lg(e)
|
509
|
+
when 'list' then handle_list(e)
|
510
|
+
when 'mulu' then handle_mulu(e)
|
511
|
+
when 'note' then handle_note(e)
|
512
|
+
when 'milestone' then handle_milestone(e)
|
513
|
+
when 'p' then handle_p(e)
|
514
|
+
when 'rdg' then ''
|
515
|
+
when 'reg' then ''
|
516
|
+
when 'row' then handle_row(e)
|
517
|
+
when 'sic' then ''
|
518
|
+
when 'sg' then handle_sg(e)
|
519
|
+
when 't' then handle_t(e)
|
520
|
+
when 'tt' then handle_tt(e)
|
521
|
+
when 'table' then handle_table(e)
|
522
|
+
else traverse(e)
|
523
|
+
end
|
524
|
+
r
|
525
|
+
end
|
526
|
+
|
527
|
+
def handle_note(e)
|
528
|
+
n = e['n']
|
529
|
+
if e.has_attribute?('type')
|
530
|
+
t = e['type']
|
531
|
+
case t
|
532
|
+
when 'equivalent'
|
533
|
+
return ''
|
534
|
+
when 'orig'
|
535
|
+
return handle_note_orig(e)
|
536
|
+
when 'orig_biao'
|
537
|
+
return handle_note_orig(e, 'biao')
|
538
|
+
when 'orig_ke'
|
539
|
+
return handle_note_orig(e, 'ke')
|
540
|
+
when 'mod'
|
541
|
+
@pass << false
|
542
|
+
s = traverse(e)
|
543
|
+
@pass.pop
|
544
|
+
@back += "<span class='footnote_cb' id='n#{n}'>#{s}</span>\n"
|
545
|
+
return "<a class='noteAnchor' href='#n#{n}'></a>"
|
546
|
+
when 'rest'
|
547
|
+
return ''
|
548
|
+
else
|
549
|
+
return '' if t.start_with?('cf')
|
550
|
+
end
|
551
|
+
end
|
552
|
+
|
553
|
+
if e.has_attribute?('resp')
|
554
|
+
return '' if e['resp'].start_with? 'CBETA'
|
555
|
+
end
|
556
|
+
|
557
|
+
if e.has_attribute?('place') && e['place']=='inline'
|
558
|
+
r = traverse(e)
|
559
|
+
return "<span class='doube-line-note'>#{r}</span>"
|
560
|
+
else
|
561
|
+
return traverse(e)
|
562
|
+
end
|
563
|
+
end
|
564
|
+
|
565
|
+
def handle_note_orig(e, anchor_type=nil)
|
566
|
+
n = e['n']
|
567
|
+
@pass << false
|
568
|
+
s = traverse(e)
|
569
|
+
@pass.pop
|
570
|
+
@back += "<span class='footnote_orig' id='n#{n}'>#{s}</span>\n"
|
571
|
+
|
572
|
+
if @mod_notes.include? n
|
573
|
+
return ''
|
574
|
+
else
|
575
|
+
label = case anchor_type
|
576
|
+
when 'biao' then " data-label='標#{n[-2..-1]}'"
|
577
|
+
when 'ke' then " data-label='科#{n[-2..-1]}'"
|
578
|
+
else ''
|
579
|
+
end
|
580
|
+
return "<a class='noteAnchor' href='#n#{n}'#{label}></a>"
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
def handle_p(e)
|
585
|
+
r = "<div class='p'>\n"
|
586
|
+
r += traverse(e)
|
587
|
+
r + "</div>\n"
|
588
|
+
end
|
589
|
+
|
590
|
+
def handle_row(e)
|
591
|
+
"<tr>" + traverse(e) + "</tr>\n"
|
592
|
+
end
|
593
|
+
|
594
|
+
def handle_sg(e)
|
595
|
+
'(' + traverse(e) + ')'
|
596
|
+
end
|
597
|
+
|
598
|
+
def handle_file(xml_fn)
|
599
|
+
puts "read #{xml_fn}"
|
600
|
+
@in_l = false
|
601
|
+
@lg_row_open = false
|
602
|
+
@mod_notes = Set.new
|
603
|
+
@next_line_buf = ''
|
604
|
+
@open_divs = []
|
605
|
+
|
606
|
+
if @book_id.start_with? 'DA'
|
607
|
+
@orig = nil?
|
608
|
+
else
|
609
|
+
@orig = @cbeta.get_canon_abbr(@book_id[0])
|
610
|
+
abort "未處理底本: #{@book_id[0]}" if @orig.nil?
|
611
|
+
end
|
612
|
+
|
613
|
+
text = parse_xml(xml_fn)
|
614
|
+
|
615
|
+
# 註標移到 lg-cell 裡面,不然以 table 呈現 lg 會有問題
|
616
|
+
text.gsub!(/(<a class='noteAnchor'[^>]*><\/a>)(<div class="lg-cell"[^>]*>)/, '\2\1')
|
617
|
+
|
618
|
+
@main_text += text
|
619
|
+
end
|
620
|
+
|
621
|
+
def handle_t(e)
|
622
|
+
if e.has_attribute? 'place'
|
623
|
+
return '' if e['place'].include? 'foot'
|
624
|
+
end
|
625
|
+
r = traverse(e)
|
626
|
+
|
627
|
+
# <tt type="app"> 不是 悉漢雙行對照
|
628
|
+
return r if @tt_type == 'app'
|
629
|
+
|
630
|
+
# 處理雙行對照
|
631
|
+
i = e.xpath('../t').index(e)
|
632
|
+
case i
|
633
|
+
when 0
|
634
|
+
return r + ' '
|
635
|
+
when 1
|
636
|
+
@next_line_buf += r + ' '
|
637
|
+
return ''
|
638
|
+
else
|
639
|
+
return r
|
640
|
+
end
|
641
|
+
end
|
642
|
+
|
643
|
+
def handle_tt(e)
|
644
|
+
@tt_type = e['type']
|
645
|
+
traverse(e)
|
646
|
+
end
|
647
|
+
|
648
|
+
def handle_table(e)
|
649
|
+
"<table>" + traverse(e) + "</table>"
|
650
|
+
end
|
651
|
+
|
652
|
+
def handle_text(e, mode)
|
653
|
+
s = e.content().chomp
|
654
|
+
return '' if s.empty?
|
655
|
+
return '' if e.parent.name == 'app'
|
656
|
+
|
657
|
+
# cbeta xml 文字之間會有多餘的換行
|
658
|
+
r = s.gsub(/[\n\r]/, '')
|
659
|
+
|
660
|
+
# 把 & 轉為 &
|
661
|
+
CGI.escapeHTML(r)
|
662
|
+
end
|
663
|
+
|
664
|
+
def lem_note_cf(e)
|
665
|
+
# ex: T32n1670A.xml, p. 703a16
|
666
|
+
# <note type="cf1">K30n1002_p0257a01-a23</note>
|
667
|
+
refs = []
|
668
|
+
e.xpath('./note').each { |n|
|
669
|
+
if n.key?('type') and n['type'].start_with? 'cf'
|
670
|
+
refs << n.content
|
671
|
+
end
|
672
|
+
}
|
673
|
+
if refs.empty?
|
674
|
+
''
|
675
|
+
else
|
676
|
+
'修訂依據:' + refs.join(';') + '。'
|
677
|
+
end
|
678
|
+
end
|
679
|
+
|
680
|
+
def lem_note_rdg(lem)
|
681
|
+
r = ''
|
682
|
+
app = lem.parent
|
683
|
+
@pass << false
|
684
|
+
app.xpath('rdg').each { |rdg|
|
685
|
+
if rdg['wit'].include? @orig
|
686
|
+
s = traverse(rdg, 'back')
|
687
|
+
s = MISSING if s.empty?
|
688
|
+
r += @orig + s
|
689
|
+
end
|
690
|
+
}
|
691
|
+
@pass.pop
|
692
|
+
r += '。' unless r.empty?
|
693
|
+
r
|
694
|
+
end
|
695
|
+
|
696
|
+
def sutra_init
|
697
|
+
s = NAV_TEMPLATE % '<ol></ol>'
|
698
|
+
@nav_doc = Nokogiri::XML(s)
|
699
|
+
|
700
|
+
@nav_doc.remove_namespaces!()
|
701
|
+
@nav_root_ol = @nav_doc.at_xpath('//ol')
|
702
|
+
@current_nav = [@nav_root_ol]
|
703
|
+
|
704
|
+
@nav_root_ol.add_child("<li><a href='readme.xhtml'>編輯說明</a></li>")
|
705
|
+
|
706
|
+
@mulu_count = 0
|
707
|
+
@main_text = ''
|
708
|
+
@back = ''
|
709
|
+
@dila_note = 0
|
710
|
+
|
711
|
+
FileUtils::mkdir_p File.join(@temp_folder, 'img')
|
712
|
+
end
|
713
|
+
|
714
|
+
def open_xml(fn)
|
715
|
+
s = File.read(fn)
|
716
|
+
|
717
|
+
if fn.include? 'T16n0657'
|
718
|
+
# 這個地方 雙行夾註 跨兩行偈頌
|
719
|
+
# 把 lb 移到 note 結束之前
|
720
|
+
# 讓 lg-row 先結束,再結束雙行夾註
|
721
|
+
s.sub!(/(<\/note>)(\n<lb n="0206b29" ed="T"\/>)/, '\2\1')
|
722
|
+
end
|
723
|
+
|
724
|
+
# <milestone unit="juan"> 前面的 lb 屬於新的這一卷
|
725
|
+
s.gsub!(%r{((?:<pb [^>]+>\n?)?(?:<lb [^>]+>\n?)+)(<milestone [^>]*unit="juan"[^/>]*/>)}, '\2\1')
|
726
|
+
|
727
|
+
doc = Nokogiri::XML(s)
|
728
|
+
doc.remove_namespaces!()
|
729
|
+
doc
|
730
|
+
end
|
731
|
+
|
732
|
+
def read_mod_notes(doc)
|
733
|
+
doc.xpath("//note[@type='mod']").each { |e|
|
734
|
+
@mod_notes << e['n']
|
735
|
+
}
|
736
|
+
end
|
737
|
+
|
738
|
+
def parse_xml(xml_fn)
|
739
|
+
@pass = [false]
|
740
|
+
|
741
|
+
doc = open_xml(xml_fn)
|
742
|
+
|
743
|
+
e = doc.xpath("//titleStmt/title")[0]
|
744
|
+
@title = traverse(e, 'txt')
|
745
|
+
@title = @title.split()[-1]
|
746
|
+
|
747
|
+
read_mod_notes(doc)
|
748
|
+
|
749
|
+
root = doc.root()
|
750
|
+
body = root.xpath("text/body")[0]
|
751
|
+
@pass = [true]
|
752
|
+
|
753
|
+
text = traverse(body)
|
754
|
+
text
|
755
|
+
end
|
756
|
+
|
757
|
+
def remove_empty_nav(node_list)
|
758
|
+
node_list.each do |n|
|
759
|
+
if n[:nav].empty?
|
760
|
+
n.delete(:nav)
|
761
|
+
else
|
762
|
+
remove_empty_nav(n[:nav])
|
763
|
+
end
|
764
|
+
end
|
765
|
+
end
|
766
|
+
|
767
|
+
def to_html(e)
|
768
|
+
e.to_xml(encoding: 'UTF-8', pertty: true, :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
|
769
|
+
end
|
770
|
+
|
771
|
+
def traverse(e, mode='html')
|
772
|
+
r = ''
|
773
|
+
e.children.each { |c|
|
774
|
+
s = handle_node(c, mode)
|
775
|
+
r += s
|
776
|
+
}
|
777
|
+
r
|
778
|
+
end
|
779
|
+
|
780
|
+
end
|
data/lib/cbeta/p5a_to_html.rb
CHANGED
@@ -5,18 +5,17 @@ require 'json'
|
|
5
5
|
require 'nokogiri'
|
6
6
|
require 'set'
|
7
7
|
|
8
|
-
# 內容不輸出的元素
|
9
|
-
PASS=['back', 'teiHeader']
|
10
|
-
|
11
|
-
# 某版用字缺的符號
|
12
|
-
MISSING = '-'
|
13
|
-
|
14
8
|
# Convert CBETA XML P5a to HTML
|
15
9
|
#
|
16
10
|
# CBETA XML P5a 可由此取得: https://github.com/cbeta-git/xml-p5a
|
17
11
|
#
|
18
12
|
# 轉檔規則請參考: http://wiki.ddbc.edu.tw/pages/CBETA_XML_P5a_轉_HTML
|
19
13
|
class CBETA::P5aToHTML
|
14
|
+
# 內容不輸出的元素
|
15
|
+
PASS=['back', 'teiHeader']
|
16
|
+
|
17
|
+
# 某版用字缺的符號
|
18
|
+
MISSING = '-'
|
20
19
|
|
21
20
|
# @param xml_root [String] 來源 CBETA XML P5a 路徑
|
22
21
|
# @param out_root [String] 輸出 HTML 路徑
|
@@ -25,11 +24,6 @@ class CBETA::P5aToHTML
|
|
25
24
|
@out_root = out_root
|
26
25
|
@cbeta = CBETA.new
|
27
26
|
@gaijis = CBETA::Gaiji.new
|
28
|
-
|
29
|
-
# 載入 unicode 1.1 字集列表
|
30
|
-
#fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
|
31
|
-
#json = File.read(fn)
|
32
|
-
#@unicode1 = JSON.parse(json)
|
33
27
|
end
|
34
28
|
|
35
29
|
# 將 CBETA XML P5a 轉為 HTML
|
@@ -22,11 +22,6 @@ class CBETA::P5aToSimpleHTML
|
|
22
22
|
@output_root = output_root
|
23
23
|
@cbeta = CBETA.new
|
24
24
|
@gaijis = CBETA::Gaiji.new
|
25
|
-
|
26
|
-
# 載入 unicode 1.1 字集列表
|
27
|
-
fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
|
28
|
-
json = File.read(fn)
|
29
|
-
@unicode1 = JSON.parse(json)
|
30
25
|
end
|
31
26
|
|
32
27
|
# 將 CBETA XML P5a 轉為 Text
|
data/lib/cbeta/p5a_to_text.rb
CHANGED
@@ -25,11 +25,6 @@ class CBETA::P5aToText
|
|
25
25
|
@format = format
|
26
26
|
@cbeta = CBETA.new
|
27
27
|
@gaijis = CBETA::Gaiji.new
|
28
|
-
|
29
|
-
# 載入 unicode 1.1 字集列表
|
30
|
-
fn = File.join(File.dirname(__FILE__), 'unicode-1.1.json')
|
31
|
-
json = File.read(fn)
|
32
|
-
@unicode1 = JSON.parse(json)
|
33
28
|
end
|
34
29
|
|
35
30
|
# 將 CBETA XML P5a 轉為 Text
|
File without changes
|
@@ -0,0 +1,23 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-TW" xmlns:xml="http://www.w3.org/XML/1998/namespace">
|
6
|
+
<head>
|
7
|
+
<link href="../Styles/stylesheet.css" rel="stylesheet" type="text/css" />
|
8
|
+
|
9
|
+
<title>贊助</title>
|
10
|
+
</head>
|
11
|
+
|
12
|
+
<body>
|
13
|
+
<p><b>歡迎隨喜贊助</b></p>
|
14
|
+
|
15
|
+
<p><b>劃撥捐款</b></p>
|
16
|
+
|
17
|
+
<p>郵政劃撥帳號:19624224</p>
|
18
|
+
|
19
|
+
<p>戶名:財團法人智諭老和尚教育紀念基金會</p>
|
20
|
+
|
21
|
+
<p>若欲指定特殊用途者,請特別註明,我們會專款專用。</p>
|
22
|
+
</body>
|
23
|
+
</html>
|
@@ -0,0 +1,27 @@
|
|
1
|
+
<?xml version="1.0" encoding="utf-8" standalone="no"?>
|
2
|
+
<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.1//EN"
|
3
|
+
"http://www.w3.org/TR/xhtml11/DTD/xhtml11.dtd">
|
4
|
+
|
5
|
+
<html xmlns="http://www.w3.org/1999/xhtml" xml:lang="zh-TW" xmlns:xml="http://www.w3.org/XML/1998/namespace">
|
6
|
+
<head>
|
7
|
+
<link href="../Styles/stylesheet.css" rel="stylesheet" type="text/css" />
|
8
|
+
|
9
|
+
<title>說明</title>
|
10
|
+
</head>
|
11
|
+
|
12
|
+
<body>
|
13
|
+
<div>
|
14
|
+
<h2>編輯說明</h2>
|
15
|
+
|
16
|
+
<ul>
|
17
|
+
<li>本電子書以<a href="http://www.seeland.org.tw/www/zhiyu/index.html">「西蓮淨苑智諭老和尚著作全集」</a>為資料來源。</li>
|
18
|
+
|
19
|
+
<li>漢字呈現以 Unicode 1.1 為基礎,不在此範圍的字則採用 <a href="http://www.cbeta.org/format/rare-rule.php">組字式</a> 表達。</li>
|
20
|
+
|
21
|
+
<li><span style="line-height: 1.6em;">若有發現任何問題,歡迎來函</span> <a href="mailto:seeland77@gmail.com" style="line-height: 1.6em;">seeland77@gmail.com</a> <span style="line-height: 1.6em;">回報。</span><br /></li>
|
22
|
+
|
23
|
+
<li>版權所有,歡迎自由流通,但禁止營利使用。</li>
|
24
|
+
</ul><br />
|
25
|
+
</div>
|
26
|
+
</body>
|
27
|
+
</html>
|
File without changes
|
File without changes
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-08-
|
11
|
+
date: 2015-08-22 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|
@@ -16,16 +16,20 @@ executables: []
|
|
16
16
|
extensions: []
|
17
17
|
extra_rdoc_files: []
|
18
18
|
files:
|
19
|
-
- lib/canons.csv
|
20
19
|
- lib/cbeta.rb
|
21
20
|
- lib/cbeta/bm_to_text.rb
|
22
|
-
- lib/cbeta/gaiji.json
|
23
21
|
- lib/cbeta/gaiji.rb
|
24
22
|
- lib/cbeta/html_to_text.rb
|
23
|
+
- lib/cbeta/p5a_to_epub.rb
|
25
24
|
- lib/cbeta/p5a_to_html.rb
|
26
25
|
- lib/cbeta/p5a_to_simple_html.rb
|
27
26
|
- lib/cbeta/p5a_to_text.rb
|
28
|
-
- lib/
|
27
|
+
- lib/data/canons.csv
|
28
|
+
- lib/data/epub-donate.xhtml
|
29
|
+
- lib/data/epub-nav.xhtml
|
30
|
+
- lib/data/epub-readme.xhtml
|
31
|
+
- lib/data/gaiji.json
|
32
|
+
- lib/data/unicode-1.1.json
|
29
33
|
homepage: https://github.com/RayCHOU/ruby-cbeta
|
30
34
|
licenses:
|
31
35
|
- MIT
|