cbeta 2.1.1 → 2.1.2
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/cbeta/html_to_pdf.rb +12 -33
- data/lib/cbeta/p5a_parser.rb +6 -5
- data/lib/cbeta/p5a_to_html_for_pdf.rb +133 -31
- data/lib/data/html-for-pdf.css +4 -0
- data/lib/data/pdf-template.htm +2 -6
- metadata +3 -4
- data/lib/data/epub-nav.xhtml +0 -11
- data/lib/data/epub.css +0 -57
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 833cb0a1b76d79d40438f7d5f9b5dc49ce464f99
|
4
|
+
data.tar.gz: 550bd5306dec92d4454c47cd7cb8bfed5065328d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 02c9a174e77a60db9fbda71d119965d36ef3048ebf5d485f657bfde32ba80a5b8e3c7cbc1a60db5ef97a259c7b83cb2a7aca03d37d3133682e87413c0b5f4387
|
7
|
+
data.tar.gz: 1bd1732b427d9778d22ee7b62d2546b4b6be93b29c2ca3a87ca2ca7bd32a34708782af21fc89252fb5bdc321acafd5005152b7e4d38f45308487798dc95796ac
|
data/lib/cbeta/html_to_pdf.rb
CHANGED
@@ -35,7 +35,7 @@ class CBETA::HTMLToPDF
|
|
35
35
|
return convert_all if target.nil?
|
36
36
|
|
37
37
|
arg = target.upcase
|
38
|
-
if arg.size
|
38
|
+
if arg.size <= 2
|
39
39
|
convert_collection(arg)
|
40
40
|
else
|
41
41
|
if arg.include? '..'
|
@@ -49,46 +49,25 @@ class CBETA::HTMLToPDF
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def convert_collection(c)
|
52
|
-
@
|
52
|
+
@canon = c
|
53
53
|
puts 'convert_collection ' + c
|
54
|
-
folder = File.join(@input, @series)
|
55
|
-
Dir.foreach(folder) { |vol|
|
56
|
-
next if ['.', '..', '.DS_Store'].include? vol
|
57
|
-
convert_vol(vol)
|
58
|
-
}
|
59
|
-
end
|
60
|
-
|
61
|
-
def convert_file(html_fn, pdf_fn)
|
62
|
-
puts "convert file: #{html_fn} to #{pdf_fn}"
|
63
|
-
cmd = @converter % { in: html_fn, out: pdf_fn}
|
64
|
-
`#{cmd}`
|
65
|
-
end
|
66
|
-
|
67
|
-
def convert_vol(arg)
|
68
|
-
vol = arg.upcase
|
69
|
-
canon = vol[0]
|
70
|
-
vol_folder = File.join(@input, canon, vol)
|
71
54
|
|
72
|
-
output_folder = File.join(@output, canon
|
55
|
+
output_folder = File.join(@output, @canon)
|
73
56
|
FileUtils.mkdir_p(output_folder) unless Dir.exist? output_folder
|
74
57
|
|
75
|
-
|
58
|
+
folder = File.join(@input, @canon)
|
59
|
+
Dir.foreach(folder) { |f|
|
76
60
|
next if f.start_with? '.'
|
77
|
-
src = File.join(
|
61
|
+
src = File.join(folder, f, 'main.htm')
|
78
62
|
dest = File.join(output_folder, "#{f}.pdf")
|
79
63
|
convert_file(src, dest)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def convert_vols(v1, v2)
|
84
|
-
puts "convert volumns: #{v1}..#{v2}"
|
85
|
-
@series = v1[0]
|
86
|
-
folder = File.join(@input, @series)
|
87
|
-
Dir.foreach(folder) { |vol|
|
88
|
-
next if vol < v1
|
89
|
-
next if vol > v2
|
90
|
-
convert_vol(vol)
|
91
64
|
}
|
92
65
|
end
|
93
66
|
|
67
|
+
def convert_file(html_fn, pdf_fn)
|
68
|
+
puts "convert file: #{html_fn} to #{pdf_fn}"
|
69
|
+
cmd = @converter % { in: html_fn, out: pdf_fn}
|
70
|
+
`#{cmd}`
|
71
|
+
end
|
72
|
+
|
94
73
|
end
|
data/lib/cbeta/p5a_parser.rb
CHANGED
@@ -22,11 +22,12 @@ class CBETA::P5aParser
|
|
22
22
|
# @param e [Nokogiri::XML::Element]
|
23
23
|
# @param mode [String] 'html' or 'text', default value: 'html'
|
24
24
|
# @return [Hash]
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# *
|
29
|
-
#
|
25
|
+
# 回傳
|
26
|
+
# * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
|
27
|
+
# * :footnote_text [String] 要放在 footnote 的文字
|
28
|
+
# * :footnote_resp [String]
|
29
|
+
# * 'orig': 表示這個註解是底本的註
|
30
|
+
# * 'CBETA': 表示這個註解是 CBETA 修訂過的註
|
30
31
|
def handle_note(e, mode='html')
|
31
32
|
r = {
|
32
33
|
content: '',
|
@@ -28,8 +28,10 @@ class CBETA::P5aToHTMLForPDF
|
|
28
28
|
# @option opts [String] :front_page_title 加在目錄的 front_page 標題
|
29
29
|
# @option opts [String] :back_page 內文後可以加一段 HTML,例如「版權聲明」
|
30
30
|
# @option opts [String] :back_page_title 加在目錄的 back_page 標題
|
31
|
+
# @option opts [Boolean] :toc 要不要放目次, 預設會有目次
|
31
32
|
def initialize(xml_root, out_root, opts={})
|
32
33
|
@config = {
|
34
|
+
toc: true
|
33
35
|
}
|
34
36
|
@config.merge!(opts)
|
35
37
|
|
@@ -61,8 +63,8 @@ class CBETA::P5aToHTMLForPDF
|
|
61
63
|
return convert_all if target.nil?
|
62
64
|
|
63
65
|
arg = target.upcase
|
64
|
-
if arg.size
|
65
|
-
|
66
|
+
if arg.size <= 2
|
67
|
+
convert_collection(arg)
|
66
68
|
else
|
67
69
|
if arg.include? '..'
|
68
70
|
arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
|
@@ -76,6 +78,17 @@ class CBETA::P5aToHTMLForPDF
|
|
76
78
|
|
77
79
|
private
|
78
80
|
|
81
|
+
def before_convert_work(work_id)
|
82
|
+
@nav_doc = Nokogiri::XML('<ul></ul>')
|
83
|
+
@nav_doc.remove_namespaces!()
|
84
|
+
@nav_root = @nav_doc.at_xpath('/ul')
|
85
|
+
@current_nav = [@nav_root]
|
86
|
+
@mulu_count = 0
|
87
|
+
|
88
|
+
@output_folder_work = File.join(@out_root, @series, work_id)
|
89
|
+
FileUtils.mkdir_p(@output_folder_work) unless Dir.exist? @output_folder_work
|
90
|
+
end
|
91
|
+
|
79
92
|
def before_parse_xml(xml_fn)
|
80
93
|
@div_count = 0
|
81
94
|
@in_l = false
|
@@ -83,33 +96,102 @@ class CBETA::P5aToHTMLForPDF
|
|
83
96
|
@t_buf1 = []
|
84
97
|
@t_buf2 = []
|
85
98
|
@open_divs = []
|
86
|
-
@sutra_no = File.basename(xml_fn, ".xml")
|
87
|
-
|
88
|
-
@output_folder_sutra = File.join(@out_folder, @sutra_no)
|
89
|
-
FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
|
99
|
+
@sutra_no = File.basename(xml_fn, ".xml")
|
90
100
|
|
91
101
|
src = File.join(CBETA::DATA, 'html-for-pdf.css')
|
92
|
-
|
93
|
-
FileUtils.copy(src, dest)
|
94
|
-
|
95
|
-
@nav_doc = Nokogiri::XML('<ul></ul>')
|
96
|
-
@nav_doc.remove_namespaces!()
|
97
|
-
@nav_root = @nav_doc.at_xpath('/ul')
|
98
|
-
@current_nav = [@nav_root]
|
99
|
-
@mulu_count = 0
|
102
|
+
copy_file(src)
|
100
103
|
|
101
104
|
if @config[:front_page_title]
|
102
105
|
s = @config[:front_page_title]
|
103
106
|
@nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
|
104
|
-
end
|
107
|
+
end
|
105
108
|
end
|
106
109
|
|
107
110
|
def convert_all
|
108
111
|
Dir.foreach(@xml_root) { |c|
|
109
112
|
next unless c.match(/^[A-Z]$/)
|
110
|
-
|
113
|
+
convert_collection(c)
|
111
114
|
}
|
112
115
|
end
|
116
|
+
|
117
|
+
def convert_collection(c)
|
118
|
+
@series = c
|
119
|
+
puts 'handle_collection ' + c
|
120
|
+
folder = File.join(@xml_root, @series)
|
121
|
+
@works = {}
|
122
|
+
prepare_work_list(folder)
|
123
|
+
@works.each do |work_id, xml_files|
|
124
|
+
convert_work(work_id, xml_files)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def convert_work(work_id, xml_files)
|
129
|
+
puts "convert work #{work_id}"
|
130
|
+
|
131
|
+
before_convert_work(work_id)
|
132
|
+
|
133
|
+
# 目次
|
134
|
+
if @config[:back_page_title]
|
135
|
+
s = @config[:back_page_title]
|
136
|
+
@nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
|
137
|
+
end
|
138
|
+
|
139
|
+
@cover = nil
|
140
|
+
if @config.key? :graphic_base
|
141
|
+
cover = File.join(@config[:graphic_base], 'covers', @series, "#{work_id}.jpg")
|
142
|
+
if File.exist? cover
|
143
|
+
@cover = "<div id='cover'><img src='#{work_id}.jpg' /></div>"
|
144
|
+
copy_file(cover)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
if @config.key? :front_page
|
149
|
+
s = File.read(@config[:front_page])
|
150
|
+
@front = "<div id='front'>#{s}</div>"
|
151
|
+
end
|
152
|
+
|
153
|
+
if @config.key? :back_page
|
154
|
+
s = File.read(@config[:back_page])
|
155
|
+
@back = "<div id='back'>#{s}</div>"
|
156
|
+
end
|
157
|
+
|
158
|
+
@text = ''
|
159
|
+
xml_files.each do |fn|
|
160
|
+
@text += convert_xml_file(fn)
|
161
|
+
end
|
162
|
+
|
163
|
+
if @config[:toc]
|
164
|
+
@toc = to_html(@nav_root)
|
165
|
+
@toc.gsub!('<ul/>', '')
|
166
|
+
@toc = "<div><h1>目次</h1>#{@toc}</div>"
|
167
|
+
else
|
168
|
+
@toc = ''
|
169
|
+
end
|
170
|
+
|
171
|
+
fn = File.join(CBETA::DATA, 'pdf-template.htm')
|
172
|
+
template = File.read(fn)
|
173
|
+
output = template % {
|
174
|
+
cover: @cover,
|
175
|
+
toc: @toc,
|
176
|
+
front: @front,
|
177
|
+
text: @text,
|
178
|
+
back: @back
|
179
|
+
}
|
180
|
+
|
181
|
+
fn = File.join(@output_folder_work, 'main.htm')
|
182
|
+
File.write(fn, output)
|
183
|
+
end
|
184
|
+
|
185
|
+
def convert_xml_file(xml_fn)
|
186
|
+
before_parse_xml(xml_fn)
|
187
|
+
parse_xml(xml_fn)
|
188
|
+
end
|
189
|
+
|
190
|
+
def copy_file(src)
|
191
|
+
basename = File.basename(src)
|
192
|
+
dest = File.join(@output_folder_work, basename)
|
193
|
+
FileUtils.copy(src, dest)
|
194
|
+
end
|
113
195
|
|
114
196
|
def handle_anchor(e)
|
115
197
|
id = e['id']
|
@@ -141,15 +223,6 @@ class CBETA::P5aToHTMLForPDF
|
|
141
223
|
to_html(cell)
|
142
224
|
end
|
143
225
|
|
144
|
-
def handle_collection(c)
|
145
|
-
@series = c
|
146
|
-
puts 'handle_collection ' + c
|
147
|
-
folder = File.join(@xml_root, @series)
|
148
|
-
Dir.foreach(folder) { |vol|
|
149
|
-
next if ['.', '..', '.DS_Store'].include? vol
|
150
|
-
handle_vol(vol)
|
151
|
-
}
|
152
|
-
end
|
153
226
|
|
154
227
|
def handle_corr(e)
|
155
228
|
"<span class='corr'>%s</span>" % traverse(e)
|
@@ -201,8 +274,7 @@ class CBETA::P5aToHTMLForPDF
|
|
201
274
|
else
|
202
275
|
fn = "#{gid}.gif"
|
203
276
|
src = File.join(@config[:graphic_base], 'sd-gif', gid[3..4], fn)
|
204
|
-
|
205
|
-
FileUtils.copy(src, dest)
|
277
|
+
copy_file(src)
|
206
278
|
return "<img src='#{fn}'/>"
|
207
279
|
end
|
208
280
|
end
|
@@ -210,7 +282,7 @@ class CBETA::P5aToHTMLForPDF
|
|
210
282
|
if gid.start_with?('RJ')
|
211
283
|
fn = "#{gid}.gif"
|
212
284
|
src = File.join(@config[:graphic_base], 'rj-gif', gid[3..4], fn)
|
213
|
-
|
285
|
+
copy_file(src)
|
214
286
|
return "<img src='#{fn}'/>"
|
215
287
|
end
|
216
288
|
|
@@ -229,9 +301,9 @@ class CBETA::P5aToHTMLForPDF
|
|
229
301
|
url.sub!(/^.*(figures\/.*)$/, '\1')
|
230
302
|
|
231
303
|
src = File.join(@config[:graphic_base], url)
|
304
|
+
copy_file(src)
|
305
|
+
|
232
306
|
fn = File.basename(src)
|
233
|
-
dest = File.join(@output_folder_sutra, fn)
|
234
|
-
FileUtils.copy(src, dest)
|
235
307
|
"<img src='#{fn}'/>"
|
236
308
|
end
|
237
309
|
|
@@ -464,6 +536,10 @@ class CBETA::P5aToHTMLForPDF
|
|
464
536
|
@toc = to_html(@nav_root)
|
465
537
|
@toc.gsub!('<ul/>', '')
|
466
538
|
|
539
|
+
if @config.key? :graphic_base
|
540
|
+
|
541
|
+
end
|
542
|
+
|
467
543
|
if @config.key? :front_page
|
468
544
|
s = File.read(@config[:front_page])
|
469
545
|
@front = "<div id='front'>#{s}</div>"
|
@@ -541,7 +617,11 @@ class CBETA::P5aToHTMLForPDF
|
|
541
617
|
abort "未處理底本" if @orig.nil?
|
542
618
|
|
543
619
|
@vol = vol
|
544
|
-
|
620
|
+
if vol.start_with? 'DA'
|
621
|
+
@series = 'DA'
|
622
|
+
else
|
623
|
+
@series = vol[0]
|
624
|
+
end
|
545
625
|
@out_folder = File.join(@out_root, @series, vol)
|
546
626
|
FileUtils.remove_dir(@out_folder, force=true)
|
547
627
|
FileUtils::mkdir_p @out_folder
|
@@ -591,6 +671,11 @@ class CBETA::P5aToHTMLForPDF
|
|
591
671
|
|
592
672
|
@author = doc.at_xpath("//titleStmt/author").text
|
593
673
|
|
674
|
+
if @cover.nil?
|
675
|
+
@cover = "<p class='title'>#{@title}</p>\n"
|
676
|
+
@cover += "<p class='author'>#{@author}</p>"
|
677
|
+
end
|
678
|
+
|
594
679
|
e = doc.at_xpath("//editionStmt/edition/date")
|
595
680
|
abort "找不到版本日期" if e.nil?
|
596
681
|
@edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
|
@@ -627,6 +712,23 @@ class CBETA::P5aToHTMLForPDF
|
|
627
712
|
|
628
713
|
r + "<table>\n"
|
629
714
|
end
|
715
|
+
|
716
|
+
def prepare_work_list(input_folder)
|
717
|
+
Dir.foreach(input_folder) do |f|
|
718
|
+
next if f.start_with? '.'
|
719
|
+
p1 = File.join(input_folder, f)
|
720
|
+
if File.file?(p1)
|
721
|
+
work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
|
722
|
+
work = 'T0220' if work.start_with? 'T0220'
|
723
|
+
unless @works.key? work
|
724
|
+
@works[work] = []
|
725
|
+
end
|
726
|
+
@works[work] << p1
|
727
|
+
else
|
728
|
+
prepare_work_list(p1)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
end
|
630
732
|
|
631
733
|
def to_html(e)
|
632
734
|
e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
|
data/lib/data/html-for-pdf.css
CHANGED
data/lib/data/pdf-template.htm
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|
@@ -32,8 +32,6 @@ files:
|
|
32
32
|
- lib/cbeta/p5a_validator.rb
|
33
33
|
- lib/data/canons.csv
|
34
34
|
- lib/data/categories.json
|
35
|
-
- lib/data/epub-nav.xhtml
|
36
|
-
- lib/data/epub.css
|
37
35
|
- lib/data/gaiji.json
|
38
36
|
- lib/data/html-for-pdf.css
|
39
37
|
- lib/data/pdf-template.htm
|
@@ -63,3 +61,4 @@ signing_key:
|
|
63
61
|
specification_version: 4
|
64
62
|
summary: CBETA Tools
|
65
63
|
test_files: []
|
64
|
+
has_rdoc:
|
data/lib/data/epub-nav.xhtml
DELETED
data/lib/data/epub.css
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
div.p {
|
2
|
-
margin-bottom: 20px;
|
3
|
-
line-height: 1.4;
|
4
|
-
text-indent: 2em;
|
5
|
-
}
|
6
|
-
p.byline {
|
7
|
-
text-align: right;
|
8
|
-
}
|
9
|
-
p.h1 {
|
10
|
-
text-indent: 2em;
|
11
|
-
font-weight: bold;
|
12
|
-
}
|
13
|
-
p.h2 {
|
14
|
-
text-indent: 3em;
|
15
|
-
font-weight: bold;
|
16
|
-
}
|
17
|
-
p.h3 {
|
18
|
-
text-indent: 4em;
|
19
|
-
font-weight: bold;
|
20
|
-
}
|
21
|
-
p.h4 {
|
22
|
-
text-indent: 2em;
|
23
|
-
font-weight: bold;
|
24
|
-
}
|
25
|
-
p.h5 {
|
26
|
-
text-indent: 3em;
|
27
|
-
font-weight: bold;
|
28
|
-
}
|
29
|
-
p.h6 {
|
30
|
-
text-indent: 4em;
|
31
|
-
font-weight: bold;
|
32
|
-
}
|
33
|
-
p.h7 {
|
34
|
-
text-indent: 2em;
|
35
|
-
font-weight: bold;
|
36
|
-
}
|
37
|
-
p.h8 {
|
38
|
-
text-indent: 2em;
|
39
|
-
font-weight: bold;
|
40
|
-
}
|
41
|
-
span.corr {
|
42
|
-
color: red;
|
43
|
-
}
|
44
|
-
table {
|
45
|
-
border-collapse: collapse;
|
46
|
-
}
|
47
|
-
th, td {
|
48
|
-
border: solid;
|
49
|
-
border-width: 1px;
|
50
|
-
padding: 5px;
|
51
|
-
word-wrap: break-word;
|
52
|
-
word-break: break-all;
|
53
|
-
text-indent: 0;
|
54
|
-
}
|
55
|
-
ul.simple {
|
56
|
-
list-style-type: none;
|
57
|
-
}
|