cbeta 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: f4f9d8df5d6d472d3cd24eab6cc0b33086d14c14
4
- data.tar.gz: dcebae46e56c9895cd7bbcd0162161218d8b710a
3
+ metadata.gz: 833cb0a1b76d79d40438f7d5f9b5dc49ce464f99
4
+ data.tar.gz: 550bd5306dec92d4454c47cd7cb8bfed5065328d
5
5
  SHA512:
6
- metadata.gz: e10fd20296adc40ca814984d90a3c2c23c0b5d46763f7ae80ccdd89f5b407a68014446eefa0b9671f58be315fa6af30aeddf1e01e7e8ca77072f99e4b8857ee4
7
- data.tar.gz: 11c4590682a0f6e85b4be01e1c85a2ed19f21d38ec0881376f2903442f88c650c07a5a3910fb608c894d44a3d649b857f855dcd475cb9ede769aa8eb8cf89d75
6
+ metadata.gz: 02c9a174e77a60db9fbda71d119965d36ef3048ebf5d485f657bfde32ba80a5b8e3c7cbc1a60db5ef97a259c7b83cb2a7aca03d37d3133682e87413c0b5f4387
7
+ data.tar.gz: 1bd1732b427d9778d22ee7b62d2546b4b6be93b29c2ca3a87ca2ca7bd32a34708782af21fc89252fb5bdc321acafd5005152b7e4d38f45308487798dc95796ac
@@ -35,7 +35,7 @@ class CBETA::HTMLToPDF
35
35
  return convert_all if target.nil?
36
36
 
37
37
  arg = target.upcase
38
- if arg.size == 1
38
+ if arg.size <= 2
39
39
  convert_collection(arg)
40
40
  else
41
41
  if arg.include? '..'
@@ -49,46 +49,25 @@ class CBETA::HTMLToPDF
49
49
  end
50
50
 
51
51
  def convert_collection(c)
52
- @series = c
52
+ @canon = c
53
53
  puts 'convert_collection ' + c
54
- folder = File.join(@input, @series)
55
- Dir.foreach(folder) { |vol|
56
- next if ['.', '..', '.DS_Store'].include? vol
57
- convert_vol(vol)
58
- }
59
- end
60
-
61
- def convert_file(html_fn, pdf_fn)
62
- puts "convert file: #{html_fn} to #{pdf_fn}"
63
- cmd = @converter % { in: html_fn, out: pdf_fn}
64
- `#{cmd}`
65
- end
66
-
67
- def convert_vol(arg)
68
- vol = arg.upcase
69
- canon = vol[0]
70
- vol_folder = File.join(@input, canon, vol)
71
54
 
72
- output_folder = File.join(@output, canon, vol)
55
+ output_folder = File.join(@output, @canon)
73
56
  FileUtils.mkdir_p(output_folder) unless Dir.exist? output_folder
74
57
 
75
- Dir.entries(vol_folder).sort.each do |f|
58
+ folder = File.join(@input, @canon)
59
+ Dir.foreach(folder) { |f|
76
60
  next if f.start_with? '.'
77
- src = File.join(vol_folder, f, 'main.htm')
61
+ src = File.join(folder, f, 'main.htm')
78
62
  dest = File.join(output_folder, "#{f}.pdf")
79
63
  convert_file(src, dest)
80
- end
81
- end
82
-
83
- def convert_vols(v1, v2)
84
- puts "convert volumns: #{v1}..#{v2}"
85
- @series = v1[0]
86
- folder = File.join(@input, @series)
87
- Dir.foreach(folder) { |vol|
88
- next if vol < v1
89
- next if vol > v2
90
- convert_vol(vol)
91
64
  }
92
65
  end
93
66
 
67
+ def convert_file(html_fn, pdf_fn)
68
+ puts "convert file: #{html_fn} to #{pdf_fn}"
69
+ cmd = @converter % { in: html_fn, out: pdf_fn}
70
+ `#{cmd}`
71
+ end
72
+
94
73
  end
@@ -22,11 +22,12 @@ class CBETA::P5aParser
22
22
  # @param e [Nokogiri::XML::Element]
23
23
  # @param mode [String] 'html' or 'text', default value: 'html'
24
24
  # @return [Hash]
25
- # * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
26
- # * :footnote_text [String] 要放在 footnote 的文字
27
- # * :footnote_resp [String]
28
- # * 'orig': 表示這個註解是底本的註
29
- # * 'CBETA': 表示這個註解是 CBETA 修訂過的註
25
+ # 回傳
26
+ # * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
27
+ # * :footnote_text [String] 要放在 footnote 的文字
28
+ # * :footnote_resp [String]
29
+ # * 'orig': 表示這個註解是底本的註
30
+ # * 'CBETA': 表示這個註解是 CBETA 修訂過的註
30
31
  def handle_note(e, mode='html')
31
32
  r = {
32
33
  content: '',
@@ -28,8 +28,10 @@ class CBETA::P5aToHTMLForPDF
28
28
  # @option opts [String] :front_page_title 加在目錄的 front_page 標題
29
29
  # @option opts [String] :back_page 內文後可以加一段 HTML,例如「版權聲明」
30
30
  # @option opts [String] :back_page_title 加在目錄的 back_page 標題
31
+ # @option opts [Boolean] :toc 要不要放目次, 預設會有目次
31
32
  def initialize(xml_root, out_root, opts={})
32
33
  @config = {
34
+ toc: true
33
35
  }
34
36
  @config.merge!(opts)
35
37
 
@@ -61,8 +63,8 @@ class CBETA::P5aToHTMLForPDF
61
63
  return convert_all if target.nil?
62
64
 
63
65
  arg = target.upcase
64
- if arg.size == 1
65
- handle_collection(arg)
66
+ if arg.size <= 2
67
+ convert_collection(arg)
66
68
  else
67
69
  if arg.include? '..'
68
70
  arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
@@ -76,6 +78,17 @@ class CBETA::P5aToHTMLForPDF
76
78
 
77
79
  private
78
80
 
81
+ def before_convert_work(work_id)
82
+ @nav_doc = Nokogiri::XML('<ul></ul>')
83
+ @nav_doc.remove_namespaces!()
84
+ @nav_root = @nav_doc.at_xpath('/ul')
85
+ @current_nav = [@nav_root]
86
+ @mulu_count = 0
87
+
88
+ @output_folder_work = File.join(@out_root, @series, work_id)
89
+ FileUtils.mkdir_p(@output_folder_work) unless Dir.exist? @output_folder_work
90
+ end
91
+
79
92
  def before_parse_xml(xml_fn)
80
93
  @div_count = 0
81
94
  @in_l = false
@@ -83,33 +96,102 @@ class CBETA::P5aToHTMLForPDF
83
96
  @t_buf1 = []
84
97
  @t_buf2 = []
85
98
  @open_divs = []
86
- @sutra_no = File.basename(xml_fn, ".xml")
87
-
88
- @output_folder_sutra = File.join(@out_folder, @sutra_no)
89
- FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
99
+ @sutra_no = File.basename(xml_fn, ".xml")
90
100
 
91
101
  src = File.join(CBETA::DATA, 'html-for-pdf.css')
92
- dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
93
- FileUtils.copy(src, dest)
94
-
95
- @nav_doc = Nokogiri::XML('<ul></ul>')
96
- @nav_doc.remove_namespaces!()
97
- @nav_root = @nav_doc.at_xpath('/ul')
98
- @current_nav = [@nav_root]
99
- @mulu_count = 0
102
+ copy_file(src)
100
103
 
101
104
  if @config[:front_page_title]
102
105
  s = @config[:front_page_title]
103
106
  @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
104
- end
107
+ end
105
108
  end
106
109
 
107
110
  def convert_all
108
111
  Dir.foreach(@xml_root) { |c|
109
112
  next unless c.match(/^[A-Z]$/)
110
- handle_collection(c)
113
+ convert_collection(c)
111
114
  }
112
115
  end
116
+
117
+ def convert_collection(c)
118
+ @series = c
119
+ puts 'handle_collection ' + c
120
+ folder = File.join(@xml_root, @series)
121
+ @works = {}
122
+ prepare_work_list(folder)
123
+ @works.each do |work_id, xml_files|
124
+ convert_work(work_id, xml_files)
125
+ end
126
+ end
127
+
128
+ def convert_work(work_id, xml_files)
129
+ puts "convert work #{work_id}"
130
+
131
+ before_convert_work(work_id)
132
+
133
+ # 目次
134
+ if @config[:back_page_title]
135
+ s = @config[:back_page_title]
136
+ @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
137
+ end
138
+
139
+ @cover = nil
140
+ if @config.key? :graphic_base
141
+ cover = File.join(@config[:graphic_base], 'covers', @series, "#{work_id}.jpg")
142
+ if File.exist? cover
143
+ @cover = "<div id='cover'><img src='#{work_id}.jpg' /></div>"
144
+ copy_file(cover)
145
+ end
146
+ end
147
+
148
+ if @config.key? :front_page
149
+ s = File.read(@config[:front_page])
150
+ @front = "<div id='front'>#{s}</div>"
151
+ end
152
+
153
+ if @config.key? :back_page
154
+ s = File.read(@config[:back_page])
155
+ @back = "<div id='back'>#{s}</div>"
156
+ end
157
+
158
+ @text = ''
159
+ xml_files.each do |fn|
160
+ @text += convert_xml_file(fn)
161
+ end
162
+
163
+ if @config[:toc]
164
+ @toc = to_html(@nav_root)
165
+ @toc.gsub!('<ul/>', '')
166
+ @toc = "<div><h1>目次</h1>#{@toc}</div>"
167
+ else
168
+ @toc = ''
169
+ end
170
+
171
+ fn = File.join(CBETA::DATA, 'pdf-template.htm')
172
+ template = File.read(fn)
173
+ output = template % {
174
+ cover: @cover,
175
+ toc: @toc,
176
+ front: @front,
177
+ text: @text,
178
+ back: @back
179
+ }
180
+
181
+ fn = File.join(@output_folder_work, 'main.htm')
182
+ File.write(fn, output)
183
+ end
184
+
185
+ def convert_xml_file(xml_fn)
186
+ before_parse_xml(xml_fn)
187
+ parse_xml(xml_fn)
188
+ end
189
+
190
+ def copy_file(src)
191
+ basename = File.basename(src)
192
+ dest = File.join(@output_folder_work, basename)
193
+ FileUtils.copy(src, dest)
194
+ end
113
195
 
114
196
  def handle_anchor(e)
115
197
  id = e['id']
@@ -141,15 +223,6 @@ class CBETA::P5aToHTMLForPDF
141
223
  to_html(cell)
142
224
  end
143
225
 
144
- def handle_collection(c)
145
- @series = c
146
- puts 'handle_collection ' + c
147
- folder = File.join(@xml_root, @series)
148
- Dir.foreach(folder) { |vol|
149
- next if ['.', '..', '.DS_Store'].include? vol
150
- handle_vol(vol)
151
- }
152
- end
153
226
 
154
227
  def handle_corr(e)
155
228
  "<span class='corr'>%s</span>" % traverse(e)
@@ -201,8 +274,7 @@ class CBETA::P5aToHTMLForPDF
201
274
  else
202
275
  fn = "#{gid}.gif"
203
276
  src = File.join(@config[:graphic_base], 'sd-gif', gid[3..4], fn)
204
- dest = File.join(@output_folder_sutra, fn)
205
- FileUtils.copy(src, dest)
277
+ copy_file(src)
206
278
  return "<img src='#{fn}'/>"
207
279
  end
208
280
  end
@@ -210,7 +282,7 @@ class CBETA::P5aToHTMLForPDF
210
282
  if gid.start_with?('RJ')
211
283
  fn = "#{gid}.gif"
212
284
  src = File.join(@config[:graphic_base], 'rj-gif', gid[3..4], fn)
213
- dest = File.join(@output_folder_sutra, fn)
285
+ copy_file(src)
214
286
  return "<img src='#{fn}'/>"
215
287
  end
216
288
 
@@ -229,9 +301,9 @@ class CBETA::P5aToHTMLForPDF
229
301
  url.sub!(/^.*(figures\/.*)$/, '\1')
230
302
 
231
303
  src = File.join(@config[:graphic_base], url)
304
+ copy_file(src)
305
+
232
306
  fn = File.basename(src)
233
- dest = File.join(@output_folder_sutra, fn)
234
- FileUtils.copy(src, dest)
235
307
  "<img src='#{fn}'/>"
236
308
  end
237
309
 
@@ -464,6 +536,10 @@ class CBETA::P5aToHTMLForPDF
464
536
  @toc = to_html(@nav_root)
465
537
  @toc.gsub!('<ul/>', '')
466
538
 
539
+ if @config.key? :graphic_base
540
+
541
+ end
542
+
467
543
  if @config.key? :front_page
468
544
  s = File.read(@config[:front_page])
469
545
  @front = "<div id='front'>#{s}</div>"
@@ -541,7 +617,11 @@ class CBETA::P5aToHTMLForPDF
541
617
  abort "未處理底本" if @orig.nil?
542
618
 
543
619
  @vol = vol
544
- @series = vol[0]
620
+ if vol.start_with? 'DA'
621
+ @series = 'DA'
622
+ else
623
+ @series = vol[0]
624
+ end
545
625
  @out_folder = File.join(@out_root, @series, vol)
546
626
  FileUtils.remove_dir(@out_folder, force=true)
547
627
  FileUtils::mkdir_p @out_folder
@@ -591,6 +671,11 @@ class CBETA::P5aToHTMLForPDF
591
671
 
592
672
  @author = doc.at_xpath("//titleStmt/author").text
593
673
 
674
+ if @cover.nil?
675
+ @cover = "<p class='title'>#{@title}</p>\n"
676
+ @cover += "<p class='author'>#{@author}</p>"
677
+ end
678
+
594
679
  e = doc.at_xpath("//editionStmt/edition/date")
595
680
  abort "找不到版本日期" if e.nil?
596
681
  @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
@@ -627,6 +712,23 @@ class CBETA::P5aToHTMLForPDF
627
712
 
628
713
  r + "<table>\n"
629
714
  end
715
+
716
+ def prepare_work_list(input_folder)
717
+ Dir.foreach(input_folder) do |f|
718
+ next if f.start_with? '.'
719
+ p1 = File.join(input_folder, f)
720
+ if File.file?(p1)
721
+ work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
722
+ work = 'T0220' if work.start_with? 'T0220'
723
+ unless @works.key? work
724
+ @works[work] = []
725
+ end
726
+ @works[work] << p1
727
+ else
728
+ prepare_work_list(p1)
729
+ end
730
+ end
731
+ end
630
732
 
631
733
  def to_html(e)
632
734
  e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)
@@ -30,6 +30,10 @@ div.p {
30
30
  margin-bottom: 1em;
31
31
  margin-top: 1em;
32
32
  line-height: 1.4;
33
+ text-indent: 2em;
34
+ }
35
+ li div.p {
36
+ text-indent: 0;
33
37
  }
34
38
  p.h1 {
35
39
  margin-left: 1em;
@@ -5,12 +5,8 @@
5
5
  <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
6
6
  </head>
7
7
  <body>
8
- <p class='title'>%{title}</p>
9
- <p class='author'>%{author}</p>
10
- <div>
11
- <h1>目次</h1>
12
- %{toc}
13
- </div>
8
+ %{cover}
9
+ %{toc}
14
10
  %{front}
15
11
  %{text}
16
12
  %{back}
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 2.1.1
4
+ version: 2.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-12-24 00:00:00.000000000 Z
11
+ date: 2015-12-25 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -32,8 +32,6 @@ files:
32
32
  - lib/cbeta/p5a_validator.rb
33
33
  - lib/data/canons.csv
34
34
  - lib/data/categories.json
35
- - lib/data/epub-nav.xhtml
36
- - lib/data/epub.css
37
35
  - lib/data/gaiji.json
38
36
  - lib/data/html-for-pdf.css
39
37
  - lib/data/pdf-template.htm
@@ -63,3 +61,4 @@ signing_key:
63
61
  specification_version: 4
64
62
  summary: CBETA Tools
65
63
  test_files: []
64
+ has_rdoc:
@@ -1,11 +0,0 @@
1
- <html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
2
- <head>
3
- <meta charset="utf-8" />
4
- </head>
5
- <body>
6
- <nav epub:type="toc" id="toc">
7
- <h1>Table of contents</h1>
8
- %s
9
- </nav>
10
- </body>
11
- </html>
@@ -1,57 +0,0 @@
1
- div.p {
2
- margin-bottom: 20px;
3
- line-height: 1.4;
4
- text-indent: 2em;
5
- }
6
- p.byline {
7
- text-align: right;
8
- }
9
- p.h1 {
10
- text-indent: 2em;
11
- font-weight: bold;
12
- }
13
- p.h2 {
14
- text-indent: 3em;
15
- font-weight: bold;
16
- }
17
- p.h3 {
18
- text-indent: 4em;
19
- font-weight: bold;
20
- }
21
- p.h4 {
22
- text-indent: 2em;
23
- font-weight: bold;
24
- }
25
- p.h5 {
26
- text-indent: 3em;
27
- font-weight: bold;
28
- }
29
- p.h6 {
30
- text-indent: 4em;
31
- font-weight: bold;
32
- }
33
- p.h7 {
34
- text-indent: 2em;
35
- font-weight: bold;
36
- }
37
- p.h8 {
38
- text-indent: 2em;
39
- font-weight: bold;
40
- }
41
- span.corr {
42
- color: red;
43
- }
44
- table {
45
- border-collapse: collapse;
46
- }
47
- th, td {
48
- border: solid;
49
- border-width: 1px;
50
- padding: 5px;
51
- word-wrap: break-word;
52
- word-break: break-all;
53
- text-indent: 0;
54
- }
55
- ul.simple {
56
- list-style-type: none;
57
- }