cbeta 2.1.1 → 2.1.2
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/cbeta/html_to_pdf.rb +12 -33
- data/lib/cbeta/p5a_parser.rb +6 -5
- data/lib/cbeta/p5a_to_html_for_pdf.rb +133 -31
- data/lib/data/html-for-pdf.css +4 -0
- data/lib/data/pdf-template.htm +2 -6
- metadata +3 -4
- data/lib/data/epub-nav.xhtml +0 -11
- data/lib/data/epub.css +0 -57
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 833cb0a1b76d79d40438f7d5f9b5dc49ce464f99
|
4
|
+
data.tar.gz: 550bd5306dec92d4454c47cd7cb8bfed5065328d
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 02c9a174e77a60db9fbda71d119965d36ef3048ebf5d485f657bfde32ba80a5b8e3c7cbc1a60db5ef97a259c7b83cb2a7aca03d37d3133682e87413c0b5f4387
|
7
|
+
data.tar.gz: 1bd1732b427d9778d22ee7b62d2546b4b6be93b29c2ca3a87ca2ca7bd32a34708782af21fc89252fb5bdc321acafd5005152b7e4d38f45308487798dc95796ac
|
data/lib/cbeta/html_to_pdf.rb
CHANGED
@@ -35,7 +35,7 @@ class CBETA::HTMLToPDF
|
|
35
35
|
return convert_all if target.nil?
|
36
36
|
|
37
37
|
arg = target.upcase
|
38
|
-
if arg.size
|
38
|
+
if arg.size <= 2
|
39
39
|
convert_collection(arg)
|
40
40
|
else
|
41
41
|
if arg.include? '..'
|
@@ -49,46 +49,25 @@ class CBETA::HTMLToPDF
|
|
49
49
|
end
|
50
50
|
|
51
51
|
def convert_collection(c)
|
52
|
-
@
|
52
|
+
@canon = c
|
53
53
|
puts 'convert_collection ' + c
|
54
|
-
folder = File.join(@input, @series)
|
55
|
-
Dir.foreach(folder) { |vol|
|
56
|
-
next if ['.', '..', '.DS_Store'].include? vol
|
57
|
-
convert_vol(vol)
|
58
|
-
}
|
59
|
-
end
|
60
|
-
|
61
|
-
def convert_file(html_fn, pdf_fn)
|
62
|
-
puts "convert file: #{html_fn} to #{pdf_fn}"
|
63
|
-
cmd = @converter % { in: html_fn, out: pdf_fn}
|
64
|
-
`#{cmd}`
|
65
|
-
end
|
66
|
-
|
67
|
-
def convert_vol(arg)
|
68
|
-
vol = arg.upcase
|
69
|
-
canon = vol[0]
|
70
|
-
vol_folder = File.join(@input, canon, vol)
|
71
54
|
|
72
|
-
output_folder = File.join(@output, canon
|
55
|
+
output_folder = File.join(@output, @canon)
|
73
56
|
FileUtils.mkdir_p(output_folder) unless Dir.exist? output_folder
|
74
57
|
|
75
|
-
|
58
|
+
folder = File.join(@input, @canon)
|
59
|
+
Dir.foreach(folder) { |f|
|
76
60
|
next if f.start_with? '.'
|
77
|
-
src = File.join(
|
61
|
+
src = File.join(folder, f, 'main.htm')
|
78
62
|
dest = File.join(output_folder, "#{f}.pdf")
|
79
63
|
convert_file(src, dest)
|
80
|
-
end
|
81
|
-
end
|
82
|
-
|
83
|
-
def convert_vols(v1, v2)
|
84
|
-
puts "convert volumns: #{v1}..#{v2}"
|
85
|
-
@series = v1[0]
|
86
|
-
folder = File.join(@input, @series)
|
87
|
-
Dir.foreach(folder) { |vol|
|
88
|
-
next if vol < v1
|
89
|
-
next if vol > v2
|
90
|
-
convert_vol(vol)
|
91
64
|
}
|
92
65
|
end
|
93
66
|
|
67
|
+
def convert_file(html_fn, pdf_fn)
|
68
|
+
puts "convert file: #{html_fn} to #{pdf_fn}"
|
69
|
+
cmd = @converter % { in: html_fn, out: pdf_fn}
|
70
|
+
`#{cmd}`
|
71
|
+
end
|
72
|
+
|
94
73
|
end
|
data/lib/cbeta/p5a_parser.rb
CHANGED
@@ -22,11 +22,12 @@ class CBETA::P5aParser
|
|
22
22
|
# @param e [Nokogiri::XML::Element]
|
23
23
|
# @param mode [String] 'html' or 'text', default value: 'html'
|
24
24
|
# @return [Hash]
|
25
|
-
#
|
26
|
-
#
|
27
|
-
#
|
28
|
-
# *
|
29
|
-
#
|
25
|
+
# 回傳
|
26
|
+
# * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
|
27
|
+
# * :footnote_text [String] 要放在 footnote 的文字
|
28
|
+
# * :footnote_resp [String]
|
29
|
+
# * 'orig': 表示這個註解是底本的註
|
30
|
+
# * 'CBETA': 表示這個註解是 CBETA 修訂過的註
|
30
31
|
def handle_note(e, mode='html')
|
31
32
|
r = {
|
32
33
|
content: '',
|
@@ -28,8 +28,10 @@ class CBETA::P5aToHTMLForPDF
|
|
28
28
|
# @option opts [String] :front_page_title 加在目錄的 front_page 標題
|
29
29
|
# @option opts [String] :back_page 內文後可以加一段 HTML,例如「版權聲明」
|
30
30
|
# @option opts [String] :back_page_title 加在目錄的 back_page 標題
|
31
|
+
# @option opts [Boolean] :toc 要不要放目次, 預設會有目次
|
31
32
|
def initialize(xml_root, out_root, opts={})
|
32
33
|
@config = {
|
34
|
+
toc: true
|
33
35
|
}
|
34
36
|
@config.merge!(opts)
|
35
37
|
|
@@ -61,8 +63,8 @@ class CBETA::P5aToHTMLForPDF
|
|
61
63
|
return convert_all if target.nil?
|
62
64
|
|
63
65
|
arg = target.upcase
|
64
|
-
if arg.size
|
65
|
-
|
66
|
+
if arg.size <= 2
|
67
|
+
convert_collection(arg)
|
66
68
|
else
|
67
69
|
if arg.include? '..'
|
68
70
|
arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
|
@@ -76,6 +78,17 @@ class CBETA::P5aToHTMLForPDF
|
|
76
78
|
|
77
79
|
private
|
78
80
|
|
81
|
+
def before_convert_work(work_id)
|
82
|
+
@nav_doc = Nokogiri::XML('<ul></ul>')
|
83
|
+
@nav_doc.remove_namespaces!()
|
84
|
+
@nav_root = @nav_doc.at_xpath('/ul')
|
85
|
+
@current_nav = [@nav_root]
|
86
|
+
@mulu_count = 0
|
87
|
+
|
88
|
+
@output_folder_work = File.join(@out_root, @series, work_id)
|
89
|
+
FileUtils.mkdir_p(@output_folder_work) unless Dir.exist? @output_folder_work
|
90
|
+
end
|
91
|
+
|
79
92
|
def before_parse_xml(xml_fn)
|
80
93
|
@div_count = 0
|
81
94
|
@in_l = false
|
@@ -83,33 +96,102 @@ class CBETA::P5aToHTMLForPDF
|
|
83
96
|
@t_buf1 = []
|
84
97
|
@t_buf2 = []
|
85
98
|
@open_divs = []
|
86
|
-
@sutra_no = File.basename(xml_fn, ".xml")
|
87
|
-
|
88
|
-
@output_folder_sutra = File.join(@out_folder, @sutra_no)
|
89
|
-
FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
|
99
|
+
@sutra_no = File.basename(xml_fn, ".xml")
|
90
100
|
|
91
101
|
src = File.join(CBETA::DATA, 'html-for-pdf.css')
|
92
|
-
|
93
|
-
FileUtils.copy(src, dest)
|
94
|
-
|
95
|
-
@nav_doc = Nokogiri::XML('<ul></ul>')
|
96
|
-
@nav_doc.remove_namespaces!()
|
97
|
-
@nav_root = @nav_doc.at_xpath('/ul')
|
98
|
-
@current_nav = [@nav_root]
|
99
|
-
@mulu_count = 0
|
102
|
+
copy_file(src)
|
100
103
|
|
101
104
|
if @config[:front_page_title]
|
102
105
|
s = @config[:front_page_title]
|
103
106
|
@nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
|
104
|
-
end
|
107
|
+
end
|
105
108
|
end
|
106
109
|
|
107
110
|
def convert_all
|
108
111
|
Dir.foreach(@xml_root) { |c|
|
109
112
|
next unless c.match(/^[A-Z]$/)
|
110
|
-
|
113
|
+
convert_collection(c)
|
111
114
|
}
|
112
115
|
end
|
116
|
+
|
117
|
+
def convert_collection(c)
|
118
|
+
@series = c
|
119
|
+
puts 'handle_collection ' + c
|
120
|
+
folder = File.join(@xml_root, @series)
|
121
|
+
@works = {}
|
122
|
+
prepare_work_list(folder)
|
123
|
+
@works.each do |work_id, xml_files|
|
124
|
+
convert_work(work_id, xml_files)
|
125
|
+
end
|
126
|
+
end
|
127
|
+
|
128
|
+
def convert_work(work_id, xml_files)
|
129
|
+
puts "convert work #{work_id}"
|
130
|
+
|
131
|
+
before_convert_work(work_id)
|
132
|
+
|
133
|
+
# 目次
|
134
|
+
if @config[:back_page_title]
|
135
|
+
s = @config[:back_page_title]
|
136
|
+
@nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
|
137
|
+
end
|
138
|
+
|
139
|
+
@cover = nil
|
140
|
+
if @config.key? :graphic_base
|
141
|
+
cover = File.join(@config[:graphic_base], 'covers', @series, "#{work_id}.jpg")
|
142
|
+
if File.exist? cover
|
143
|
+
@cover = "<div id='cover'><img src='#{work_id}.jpg' /></div>"
|
144
|
+
copy_file(cover)
|
145
|
+
end
|
146
|
+
end
|
147
|
+
|
148
|
+
if @config.key? :front_page
|
149
|
+
s = File.read(@config[:front_page])
|
150
|
+
@front = "<div id='front'>#{s}</div>"
|
151
|
+
end
|
152
|
+
|
153
|
+
if @config.key? :back_page
|
154
|
+
s = File.read(@config[:back_page])
|
155
|
+
@back = "<div id='back'>#{s}</div>"
|
156
|
+
end
|
157
|
+
|
158
|
+
@text = ''
|
159
|
+
xml_files.each do |fn|
|
160
|
+
@text += convert_xml_file(fn)
|
161
|
+
end
|
162
|
+
|
163
|
+
if @config[:toc]
|
164
|
+
@toc = to_html(@nav_root)
|
165
|
+
@toc.gsub!('<ul/>', '')
|
166
|
+
@toc = "<div><h1>目次</h1>#{@toc}</div>"
|
167
|
+
else
|
168
|
+
@toc = ''
|
169
|
+
end
|
170
|
+
|
171
|
+
fn = File.join(CBETA::DATA, 'pdf-template.htm')
|
172
|
+
template = File.read(fn)
|
173
|
+
output = template % {
|
174
|
+
cover: @cover,
|
175
|
+
toc: @toc,
|
176
|
+
front: @front,
|
177
|
+
text: @text,
|
178
|
+
back: @back
|
179
|
+
}
|
180
|
+
|
181
|
+
fn = File.join(@output_folder_work, 'main.htm')
|
182
|
+
File.write(fn, output)
|
183
|
+
end
|
184
|
+
|
185
|
+
def convert_xml_file(xml_fn)
|
186
|
+
before_parse_xml(xml_fn)
|
187
|
+
parse_xml(xml_fn)
|
188
|
+
end
|
189
|
+
|
190
|
+
def copy_file(src)
|
191
|
+
basename = File.basename(src)
|
192
|
+
dest = File.join(@output_folder_work, basename)
|
193
|
+
FileUtils.copy(src, dest)
|
194
|
+
end
|
113
195
|
|
114
196
|
def handle_anchor(e)
|
115
197
|
id = e['id']
|
@@ -141,15 +223,6 @@ class CBETA::P5aToHTMLForPDF
|
|
141
223
|
to_html(cell)
|
142
224
|
end
|
143
225
|
|
144
|
-
def handle_collection(c)
|
145
|
-
@series = c
|
146
|
-
puts 'handle_collection ' + c
|
147
|
-
folder = File.join(@xml_root, @series)
|
148
|
-
Dir.foreach(folder) { |vol|
|
149
|
-
next if ['.', '..', '.DS_Store'].include? vol
|
150
|
-
handle_vol(vol)
|
151
|
-
}
|
152
|
-
end
|
153
226
|
|
154
227
|
def handle_corr(e)
|
155
228
|
"<span class='corr'>%s</span>" % traverse(e)
|
@@ -201,8 +274,7 @@ class CBETA::P5aToHTMLForPDF
|
|
201
274
|
else
|
202
275
|
fn = "#{gid}.gif"
|
203
276
|
src = File.join(@config[:graphic_base], 'sd-gif', gid[3..4], fn)
|
204
|
-
|
205
|
-
FileUtils.copy(src, dest)
|
277
|
+
copy_file(src)
|
206
278
|
return "<img src='#{fn}'/>"
|
207
279
|
end
|
208
280
|
end
|
@@ -210,7 +282,7 @@ class CBETA::P5aToHTMLForPDF
|
|
210
282
|
if gid.start_with?('RJ')
|
211
283
|
fn = "#{gid}.gif"
|
212
284
|
src = File.join(@config[:graphic_base], 'rj-gif', gid[3..4], fn)
|
213
|
-
|
285
|
+
copy_file(src)
|
214
286
|
return "<img src='#{fn}'/>"
|
215
287
|
end
|
216
288
|
|
@@ -229,9 +301,9 @@ class CBETA::P5aToHTMLForPDF
|
|
229
301
|
url.sub!(/^.*(figures\/.*)$/, '\1')
|
230
302
|
|
231
303
|
src = File.join(@config[:graphic_base], url)
|
304
|
+
copy_file(src)
|
305
|
+
|
232
306
|
fn = File.basename(src)
|
233
|
-
dest = File.join(@output_folder_sutra, fn)
|
234
|
-
FileUtils.copy(src, dest)
|
235
307
|
"<img src='#{fn}'/>"
|
236
308
|
end
|
237
309
|
|
@@ -464,6 +536,10 @@ class CBETA::P5aToHTMLForPDF
|
|
464
536
|
@toc = to_html(@nav_root)
|
465
537
|
@toc.gsub!('<ul/>', '')
|
466
538
|
|
539
|
+
if @config.key? :graphic_base
|
540
|
+
|
541
|
+
end
|
542
|
+
|
467
543
|
if @config.key? :front_page
|
468
544
|
s = File.read(@config[:front_page])
|
469
545
|
@front = "<div id='front'>#{s}</div>"
|
@@ -541,7 +617,11 @@ class CBETA::P5aToHTMLForPDF
|
|
541
617
|
abort "未處理底本" if @orig.nil?
|
542
618
|
|
543
619
|
@vol = vol
|
544
|
-
|
620
|
+
if vol.start_with? 'DA'
|
621
|
+
@series = 'DA'
|
622
|
+
else
|
623
|
+
@series = vol[0]
|
624
|
+
end
|
545
625
|
@out_folder = File.join(@out_root, @series, vol)
|
546
626
|
FileUtils.remove_dir(@out_folder, force=true)
|
547
627
|
FileUtils::mkdir_p @out_folder
|
@@ -591,6 +671,11 @@ class CBETA::P5aToHTMLForPDF
|
|
591
671
|
|
592
672
|
@author = doc.at_xpath("//titleStmt/author").text
|
593
673
|
|
674
|
+
if @cover.nil?
|
675
|
+
@cover = "<p class='title'>#{@title}</p>\n"
|
676
|
+
@cover += "<p class='author'>#{@author}</p>"
|
677
|
+
end
|
678
|
+
|
594
679
|
e = doc.at_xpath("//editionStmt/edition/date")
|
595
680
|
abort "找不到版本日期" if e.nil?
|
596
681
|
@edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
|
@@ -627,6 +712,23 @@ class CBETA::P5aToHTMLForPDF
|
|
627
712
|
|
628
713
|
r + "<table>\n"
|
629
714
|
end
|
715
|
+
|
716
|
+
def prepare_work_list(input_folder)
|
717
|
+
Dir.foreach(input_folder) do |f|
|
718
|
+
next if f.start_with? '.'
|
719
|
+
p1 = File.join(input_folder, f)
|
720
|
+
if File.file?(p1)
|
721
|
+
work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
|
722
|
+
work = 'T0220' if work.start_with? 'T0220'
|
723
|
+
unless @works.key? work
|
724
|
+
@works[work] = []
|
725
|
+
end
|
726
|
+
@works[work] << p1
|
727
|
+
else
|
728
|
+
prepare_work_list(p1)
|
729
|
+
end
|
730
|
+
end
|
731
|
+
end
|
630
732
|
|
631
733
|
def to_html(e)
|
632
734
|
e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
|
data/lib/data/html-for-pdf.css
CHANGED
data/lib/data/pdf-template.htm
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: cbeta
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 2.1.
|
4
|
+
version: 2.1.2
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ray Chou
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-12-
|
11
|
+
date: 2015-12-25 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
|
14
14
|
email: zhoubx@gmail.com
|
@@ -32,8 +32,6 @@ files:
|
|
32
32
|
- lib/cbeta/p5a_validator.rb
|
33
33
|
- lib/data/canons.csv
|
34
34
|
- lib/data/categories.json
|
35
|
-
- lib/data/epub-nav.xhtml
|
36
|
-
- lib/data/epub.css
|
37
35
|
- lib/data/gaiji.json
|
38
36
|
- lib/data/html-for-pdf.css
|
39
37
|
- lib/data/pdf-template.htm
|
@@ -63,3 +61,4 @@ signing_key:
|
|
63
61
|
specification_version: 4
|
64
62
|
summary: CBETA Tools
|
65
63
|
test_files: []
|
64
|
+
has_rdoc:
|
data/lib/data/epub-nav.xhtml
DELETED
data/lib/data/epub.css
DELETED
@@ -1,57 +0,0 @@
|
|
1
|
-
div.p {
|
2
|
-
margin-bottom: 20px;
|
3
|
-
line-height: 1.4;
|
4
|
-
text-indent: 2em;
|
5
|
-
}
|
6
|
-
p.byline {
|
7
|
-
text-align: right;
|
8
|
-
}
|
9
|
-
p.h1 {
|
10
|
-
text-indent: 2em;
|
11
|
-
font-weight: bold;
|
12
|
-
}
|
13
|
-
p.h2 {
|
14
|
-
text-indent: 3em;
|
15
|
-
font-weight: bold;
|
16
|
-
}
|
17
|
-
p.h3 {
|
18
|
-
text-indent: 4em;
|
19
|
-
font-weight: bold;
|
20
|
-
}
|
21
|
-
p.h4 {
|
22
|
-
text-indent: 2em;
|
23
|
-
font-weight: bold;
|
24
|
-
}
|
25
|
-
p.h5 {
|
26
|
-
text-indent: 3em;
|
27
|
-
font-weight: bold;
|
28
|
-
}
|
29
|
-
p.h6 {
|
30
|
-
text-indent: 4em;
|
31
|
-
font-weight: bold;
|
32
|
-
}
|
33
|
-
p.h7 {
|
34
|
-
text-indent: 2em;
|
35
|
-
font-weight: bold;
|
36
|
-
}
|
37
|
-
p.h8 {
|
38
|
-
text-indent: 2em;
|
39
|
-
font-weight: bold;
|
40
|
-
}
|
41
|
-
span.corr {
|
42
|
-
color: red;
|
43
|
-
}
|
44
|
-
table {
|
45
|
-
border-collapse: collapse;
|
46
|
-
}
|
47
|
-
th, td {
|
48
|
-
border: solid;
|
49
|
-
border-width: 1px;
|
50
|
-
padding: 5px;
|
51
|
-
word-wrap: break-word;
|
52
|
-
word-break: break-all;
|
53
|
-
text-indent: 0;
|
54
|
-
}
|
55
|
-
ul.simple {
|
56
|
-
list-style-type: none;
|
57
|
-
}
|