cbeta 2.1.1 → 2.1.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4f9d8df5d6d472d3cd24eab6cc0b33086d14c14
4
- data.tar.gz: dcebae46e56c9895cd7bbcd0162161218d8b710a
3
+ metadata.gz: 833cb0a1b76d79d40438f7d5f9b5dc49ce464f99
4
+ data.tar.gz: 550bd5306dec92d4454c47cd7cb8bfed5065328d
5
5
  SHA512:
6
- metadata.gz: e10fd20296adc40ca814984d90a3c2c23c0b5d46763f7ae80ccdd89f5b407a68014446eefa0b9671f58be315fa6af30aeddf1e01e7e8ca77072f99e4b8857ee4
7
- data.tar.gz: 11c4590682a0f6e85b4be01e1c85a2ed19f21d38ec0881376f2903442f88c650c07a5a3910fb608c894d44a3d649b857f855dcd475cb9ede769aa8eb8cf89d75
6
+ metadata.gz: 02c9a174e77a60db9fbda71d119965d36ef3048ebf5d485f657bfde32ba80a5b8e3c7cbc1a60db5ef97a259c7b83cb2a7aca03d37d3133682e87413c0b5f4387
7
+ data.tar.gz: 1bd1732b427d9778d22ee7b62d2546b4b6be93b29c2ca3a87ca2ca7bd32a34708782af21fc89252fb5bdc321acafd5005152b7e4d38f45308487798dc95796ac
@@ -35,7 +35,7 @@ class CBETA::HTMLToPDF
35
35
  return convert_all if target.nil?
36
36
 
37
37
  arg = target.upcase
38
- if arg.size == 1
38
+ if arg.size <= 2
39
39
  convert_collection(arg)
40
40
  else
41
41
  if arg.include? '..'
@@ -49,46 +49,25 @@ class CBETA::HTMLToPDF
49
49
  end
50
50
 
51
51
  def convert_collection(c)
52
- @series = c
52
+ @canon = c
53
53
  puts 'convert_collection ' + c
54
- folder = File.join(@input, @series)
55
- Dir.foreach(folder) { |vol|
56
- next if ['.', '..', '.DS_Store'].include? vol
57
- convert_vol(vol)
58
- }
59
- end
60
-
61
- def convert_file(html_fn, pdf_fn)
62
- puts "convert file: #{html_fn} to #{pdf_fn}"
63
- cmd = @converter % { in: html_fn, out: pdf_fn}
64
- `#{cmd}`
65
- end
66
-
67
- def convert_vol(arg)
68
- vol = arg.upcase
69
- canon = vol[0]
70
- vol_folder = File.join(@input, canon, vol)
71
54
 
72
- output_folder = File.join(@output, canon, vol)
55
+ output_folder = File.join(@output, @canon)
73
56
  FileUtils.mkdir_p(output_folder) unless Dir.exist? output_folder
74
57
 
75
- Dir.entries(vol_folder).sort.each do |f|
58
+ folder = File.join(@input, @canon)
59
+ Dir.foreach(folder) { |f|
76
60
  next if f.start_with? '.'
77
- src = File.join(vol_folder, f, 'main.htm')
61
+ src = File.join(folder, f, 'main.htm')
78
62
  dest = File.join(output_folder, "#{f}.pdf")
79
63
  convert_file(src, dest)
80
- end
81
- end
82
-
83
- def convert_vols(v1, v2)
84
- puts "convert volumns: #{v1}..#{v2}"
85
- @series = v1[0]
86
- folder = File.join(@input, @series)
87
- Dir.foreach(folder) { |vol|
88
- next if vol < v1
89
- next if vol > v2
90
- convert_vol(vol)
91
64
  }
92
65
  end
93
66
 
67
+ def convert_file(html_fn, pdf_fn)
68
+ puts "convert file: #{html_fn} to #{pdf_fn}"
69
+ cmd = @converter % { in: html_fn, out: pdf_fn}
70
+ `#{cmd}`
71
+ end
72
+
94
73
  end
@@ -22,11 +22,12 @@ class CBETA::P5aParser
22
22
  # @param e [Nokogiri::XML::Element]
23
23
  # @param mode [String] 'html' or 'text', default value: 'html'
24
24
  # @return [Hash]
25
- # * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
26
- # * :footnote_text [String] 要放在 footnote 的文字
27
- # * :footnote_resp [String]
28
- # * 'orig': 表示這個註解是底本的註
29
- # * 'CBETA': 表示這個註解是 CBETA 修訂過的註
25
+ # 回傳
26
+ # * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
27
+ # * :footnote_text [String] 要放在 footnote 的文字
28
+ # * :footnote_resp [String]
29
+ # * 'orig': 表示這個註解是底本的註
30
+ # * 'CBETA': 表示這個註解是 CBETA 修訂過的註
30
31
  def handle_note(e, mode='html')
31
32
  r = {
32
33
  content: '',
@@ -28,8 +28,10 @@ class CBETA::P5aToHTMLForPDF
28
28
  # @option opts [String] :front_page_title 加在目錄的 front_page 標題
29
29
  # @option opts [String] :back_page 內文後可以加一段 HTML,例如「版權聲明」
30
30
  # @option opts [String] :back_page_title 加在目錄的 back_page 標題
31
+ # @option opts [Boolean] :toc 要不要放目次, 預設會有目次
31
32
  def initialize(xml_root, out_root, opts={})
32
33
  @config = {
34
+ toc: true
33
35
  }
34
36
  @config.merge!(opts)
35
37
 
@@ -61,8 +63,8 @@ class CBETA::P5aToHTMLForPDF
61
63
  return convert_all if target.nil?
62
64
 
63
65
  arg = target.upcase
64
- if arg.size == 1
65
- handle_collection(arg)
66
+ if arg.size <= 2
67
+ convert_collection(arg)
66
68
  else
67
69
  if arg.include? '..'
68
70
  arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
@@ -76,6 +78,17 @@ class CBETA::P5aToHTMLForPDF
76
78
 
77
79
  private
78
80
 
81
+ def before_convert_work(work_id)
82
+ @nav_doc = Nokogiri::XML('<ul></ul>')
83
+ @nav_doc.remove_namespaces!()
84
+ @nav_root = @nav_doc.at_xpath('/ul')
85
+ @current_nav = [@nav_root]
86
+ @mulu_count = 0
87
+
88
+ @output_folder_work = File.join(@out_root, @series, work_id)
89
+ FileUtils.mkdir_p(@output_folder_work) unless Dir.exist? @output_folder_work
90
+ end
91
+
79
92
  def before_parse_xml(xml_fn)
80
93
  @div_count = 0
81
94
  @in_l = false
@@ -83,33 +96,102 @@ class CBETA::P5aToHTMLForPDF
83
96
  @t_buf1 = []
84
97
  @t_buf2 = []
85
98
  @open_divs = []
86
- @sutra_no = File.basename(xml_fn, ".xml")
87
-
88
- @output_folder_sutra = File.join(@out_folder, @sutra_no)
89
- FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
99
+ @sutra_no = File.basename(xml_fn, ".xml")
90
100
 
91
101
  src = File.join(CBETA::DATA, 'html-for-pdf.css')
92
- dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
93
- FileUtils.copy(src, dest)
94
-
95
- @nav_doc = Nokogiri::XML('<ul></ul>')
96
- @nav_doc.remove_namespaces!()
97
- @nav_root = @nav_doc.at_xpath('/ul')
98
- @current_nav = [@nav_root]
99
- @mulu_count = 0
102
+ copy_file(src)
100
103
 
101
104
  if @config[:front_page_title]
102
105
  s = @config[:front_page_title]
103
106
  @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
104
- end
107
+ end
105
108
  end
106
109
 
107
110
  def convert_all
108
111
  Dir.foreach(@xml_root) { |c|
109
112
  next unless c.match(/^[A-Z]$/)
110
- handle_collection(c)
113
+ convert_collection(c)
111
114
  }
112
115
  end
116
+
117
+ def convert_collection(c)
118
+ @series = c
119
+ puts 'handle_collection ' + c
120
+ folder = File.join(@xml_root, @series)
121
+ @works = {}
122
+ prepare_work_list(folder)
123
+ @works.each do |work_id, xml_files|
124
+ convert_work(work_id, xml_files)
125
+ end
126
+ end
127
+
128
+ def convert_work(work_id, xml_files)
129
+ puts "convert work #{work_id}"
130
+
131
+ before_convert_work(work_id)
132
+
133
+ # 目次
134
+ if @config[:back_page_title]
135
+ s = @config[:back_page_title]
136
+ @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
137
+ end
138
+
139
+ @cover = nil
140
+ if @config.key? :graphic_base
141
+ cover = File.join(@config[:graphic_base], 'covers', @series, "#{work_id}.jpg")
142
+ if File.exist? cover
143
+ @cover = "<div id='cover'><img src='#{work_id}.jpg' /></div>"
144
+ copy_file(cover)
145
+ end
146
+ end
147
+
148
+ if @config.key? :front_page
149
+ s = File.read(@config[:front_page])
150
+ @front = "<div id='front'>#{s}</div>"
151
+ end
152
+
153
+ if @config.key? :back_page
154
+ s = File.read(@config[:back_page])
155
+ @back = "<div id='back'>#{s}</div>"
156
+ end
157
+
158
+ @text = ''
159
+ xml_files.each do |fn|
160
+ @text += convert_xml_file(fn)
161
+ end
162
+
163
+ if @config[:toc]
164
+ @toc = to_html(@nav_root)
165
+ @toc.gsub!('<ul/>', '')
166
+ @toc = "<div><h1>目次</h1>#{@toc}</div>"
167
+ else
168
+ @toc = ''
169
+ end
170
+
171
+ fn = File.join(CBETA::DATA, 'pdf-template.htm')
172
+ template = File.read(fn)
173
+ output = template % {
174
+ cover: @cover,
175
+ toc: @toc,
176
+ front: @front,
177
+ text: @text,
178
+ back: @back
179
+ }
180
+
181
+ fn = File.join(@output_folder_work, 'main.htm')
182
+ File.write(fn, output)
183
+ end
184
+
185
+ def convert_xml_file(xml_fn)
186
+ before_parse_xml(xml_fn)
187
+ parse_xml(xml_fn)
188
+ end
189
+
190
+ def copy_file(src)
191
+ basename = File.basename(src)
192
+ dest = File.join(@output_folder_work, basename)
193
+ FileUtils.copy(src, dest)
194
+ end
113
195
 
114
196
  def handle_anchor(e)
115
197
  id = e['id']
@@ -141,15 +223,6 @@ class CBETA::P5aToHTMLForPDF
141
223
  to_html(cell)
142
224
  end
143
225
 
144
- def handle_collection(c)
145
- @series = c
146
- puts 'handle_collection ' + c
147
- folder = File.join(@xml_root, @series)
148
- Dir.foreach(folder) { |vol|
149
- next if ['.', '..', '.DS_Store'].include? vol
150
- handle_vol(vol)
151
- }
152
- end
153
226
 
154
227
  def handle_corr(e)
155
228
  "<span class='corr'>%s</span>" % traverse(e)
@@ -201,8 +274,7 @@ class CBETA::P5aToHTMLForPDF
201
274
  else
202
275
  fn = "#{gid}.gif"
203
276
  src = File.join(@config[:graphic_base], 'sd-gif', gid[3..4], fn)
204
- dest = File.join(@output_folder_sutra, fn)
205
- FileUtils.copy(src, dest)
277
+ copy_file(src)
206
278
  return "<img src='#{fn}'/>"
207
279
  end
208
280
  end
@@ -210,7 +282,7 @@ class CBETA::P5aToHTMLForPDF
210
282
  if gid.start_with?('RJ')
211
283
  fn = "#{gid}.gif"
212
284
  src = File.join(@config[:graphic_base], 'rj-gif', gid[3..4], fn)
213
- dest = File.join(@output_folder_sutra, fn)
285
+ copy_file(src)
214
286
  return "<img src='#{fn}'/>"
215
287
  end
216
288
 
@@ -229,9 +301,9 @@ class CBETA::P5aToHTMLForPDF
229
301
  url.sub!(/^.*(figures\/.*)$/, '\1')
230
302
 
231
303
  src = File.join(@config[:graphic_base], url)
304
+ copy_file(src)
305
+
232
306
  fn = File.basename(src)
233
- dest = File.join(@output_folder_sutra, fn)
234
- FileUtils.copy(src, dest)
235
307
  "<img src='#{fn}'/>"
236
308
  end
237
309
 
@@ -464,6 +536,10 @@ class CBETA::P5aToHTMLForPDF
464
536
  @toc = to_html(@nav_root)
465
537
  @toc.gsub!('<ul/>', '')
466
538
 
539
+ if @config.key? :graphic_base
540
+
541
+ end
542
+
467
543
  if @config.key? :front_page
468
544
  s = File.read(@config[:front_page])
469
545
  @front = "<div id='front'>#{s}</div>"
@@ -541,7 +617,11 @@ class CBETA::P5aToHTMLForPDF
541
617
  abort "未處理底本" if @orig.nil?
542
618
 
543
619
  @vol = vol
544
- @series = vol[0]
620
+ if vol.start_with? 'DA'
621
+ @series = 'DA'
622
+ else
623
+ @series = vol[0]
624
+ end
545
625
  @out_folder = File.join(@out_root, @series, vol)
546
626
  FileUtils.remove_dir(@out_folder, force=true)
547
627
  FileUtils::mkdir_p @out_folder
@@ -591,6 +671,11 @@ class CBETA::P5aToHTMLForPDF
591
671
 
592
672
  @author = doc.at_xpath("//titleStmt/author").text
593
673
 
674
+ if @cover.nil?
675
+ @cover = "<p class='title'>#{@title}</p>\n"
676
+ @cover += "<p class='author'>#{@author}</p>"
677
+ end
678
+
594
679
  e = doc.at_xpath("//editionStmt/edition/date")
595
680
  abort "找不到版本日期" if e.nil?
596
681
  @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
@@ -627,6 +712,23 @@ class CBETA::P5aToHTMLForPDF
627
712
 
628
713
  r + "<table>\n"
629
714
  end
715
+
716
+ def prepare_work_list(input_folder)
717
+ Dir.foreach(input_folder) do |f|
718
+ next if f.start_with? '.'
719
+ p1 = File.join(input_folder, f)
720
+ if File.file?(p1)
721
+ work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
722
+ work = 'T0220' if work.start_with? 'T0220'
723
+ unless @works.key? work
724
+ @works[work] = []
725
+ end
726
+ @works[work] << p1
727
+ else
728
+ prepare_work_list(p1)
729
+ end
730
+ end
731
+ end
630
732
 
631
733
  def to_html(e)
632
734
  e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
@@ -30,6 +30,10 @@ div.p {
30
30
  margin-bottom: 1em;
31
31
  margin-top: 1em;
32
32
  line-height: 1.4;
33
+ text-indent: 2em;
34
+ }
35
+ li div.p {
36
+ text-indent: 0;
33
37
  }
34
38
  p.h1 {
35
39
  margin-left: 1em;
@@ -5,12 +5,8 @@
5
5
  <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
6
6
  </head>
7
7
  <body>
8
- <p class='title'>%{title}</p>
9
- <p class='author'>%{author}</p>
10
- <div>
11
- <h1>目次</h1>
12
- %{toc}
13
- </div>
8
+ %{cover}
9
+ %{toc}
14
10
  %{front}
15
11
  %{text}
16
12
  %{back}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-24 00:00:00.000000000 Z
11
+ date: 2015-12-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -32,8 +32,6 @@ files:
32
32
  - lib/cbeta/p5a_validator.rb
33
33
  - lib/data/canons.csv
34
34
  - lib/data/categories.json
35
- - lib/data/epub-nav.xhtml
36
- - lib/data/epub.css
37
35
  - lib/data/gaiji.json
38
36
  - lib/data/html-for-pdf.css
39
37
  - lib/data/pdf-template.htm
@@ -63,3 +61,4 @@ signing_key:
63
61
  specification_version: 4
64
62
  summary: CBETA Tools
65
63
  test_files: []
64
+ has_rdoc:
@@ -1,11 +0,0 @@
1
- <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
2
- <head>
3
- <meta charset="utf-8" />
4
- </head>
5
- <body>
6
- <nav epub:type="toc" id="toc">
7
- <h1>Table of contents</h1>
8
- %s
9
- </nav>
10
- </body>
11
- </html>
@@ -1,57 +0,0 @@
1
- div.p {
2
- margin-bottom: 20px;
3
- line-height: 1.4;
4
- text-indent: 2em;
5
- }
6
- p.byline {
7
- text-align: right;
8
- }
9
- p.h1 {
10
- text-indent: 2em;
11
- font-weight: bold;
12
- }
13
- p.h2 {
14
- text-indent: 3em;
15
- font-weight: bold;
16
- }
17
- p.h3 {
18
- text-indent: 4em;
19
- font-weight: bold;
20
- }
21
- p.h4 {
22
- text-indent: 2em;
23
- font-weight: bold;
24
- }
25
- p.h5 {
26
- text-indent: 3em;
27
- font-weight: bold;
28
- }
29
- p.h6 {
30
- text-indent: 4em;
31
- font-weight: bold;
32
- }
33
- p.h7 {
34
- text-indent: 2em;
35
- font-weight: bold;
36
- }
37
- p.h8 {
38
- text-indent: 2em;
39
- font-weight: bold;
40
- }
41
- span.corr {
42
- color: red;
43
- }
44
- table {
45
- border-collapse: collapse;
46
- }
47
- th, td {
48
- border: solid;
49
- border-width: 1px;
50
- padding: 5px;
51
- word-wrap: break-word;
52
- word-break: break-all;
53
- text-indent: 0;
54
- }
55
- ul.simple {
56
- list-style-type: none;
57
- }