cbeta 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a21aa8cd67f37fc6d5ca9d87720b1d5d0da3f7f1
4
- data.tar.gz: bb7fb6cb3ab5d8b36492cc96728aa5a7fc9a7286
3
+ metadata.gz: c227a83872ae6d070b3c37ba8ba0895bd46242e2
4
+ data.tar.gz: c89bdb2a9e7c3410c6f4de3e1a4ed760fc73fe2c
5
5
  SHA512:
6
- metadata.gz: cb8bbfd96ad22c331e01059303ca539cb6718aae0caa551eba4a8064a7854d9f29bbd3bdb447bd06dd6a6f14b14363d16edb7b4f0932bab7fcfb0feccb9ba9bf
7
- data.tar.gz: 2a4394f918a235d08790409488e5dc1a1fd280239d9e1df2ece6dc99628ffc67b73a5beeef199d35a5fe10f4c781cf7ee19910585ff3aa13719b6163b0725d46
6
+ metadata.gz: 08f20461b411e57c6dabba538faf7f64c73cdc2cd7d8296a33fca0202091a7f36004862a4f6b3b11b9cc99776bec64651b29aa83966692340b821d5d7623f3b5
7
+ data.tar.gz: 7b0751f4610efb54b7dd959a827d5ffa81f61ebd9a5ada6c2f3dfc31de6fb22fe5ad259cd7b537d4e29ac656d590802e18db537e6fb97a9924ebbb37fff5ea86
@@ -1,11 +1,16 @@
1
- require 'wicked_pdf'
2
-
3
1
  class CBETA::HTMLToPDF
4
2
  # @param input [String] folder of source HTML, HTML can be produced by CBETA::P5aToHTMLForPDF.
5
3
  # @param output [String] output folder
6
- def initialize(input, output)
4
+ # @param converter [String] shell command to convert HTML to PDF
5
+ # * suggestion: http://www.princexml.com/
6
+ # * wkhtmltopdf has font problem to display unicode extb characters
7
+ #
8
+ # @example
9
+ # c = CBETA::HTMLToPDF.new('/temp/cbeta-html', '/temp/cbeta-pdf', "prince %{in} -o %{out}")
10
+ def initialize(input, output, converter)
7
11
  @input = input
8
12
  @output = output
13
+ @converter = converter
9
14
  end
10
15
 
11
16
  # Convert CBETA HTML to PDF
@@ -55,11 +60,8 @@ class CBETA::HTMLToPDF
55
60
 
56
61
  def convert_file(html_fn, pdf_fn)
57
62
  puts "convert file: #{html_fn} to #{pdf_fn}"
58
- pdf = WickedPdf.new.pdf_from_html_file(html_fn)
59
-
60
- File.open(pdf_fn, 'wb') do |file|
61
- file << pdf
62
- end
63
+ cmd = @converter % { in: html_fn, out: pdf_fn}
64
+ `#{cmd}`
63
65
  end
64
66
 
65
67
  def convert_vol(arg)
@@ -278,7 +278,9 @@ class CBETA::P5aToHTML
278
278
  cell['class'] = 'lg-cell'
279
279
  cell.inner_html = traverse(e)
280
280
 
281
- if @first_l
281
+ if e.key? 'rend'
282
+ cell['style'] = e['rend']
283
+ elsif @first_l
282
284
  parent = e.parent()
283
285
  if parent.has_attribute?('rend')
284
286
  indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -286,8 +288,9 @@ class CBETA::P5aToHTML
286
288
  cell['style'] = indent[0]
287
289
  end
288
290
  end
289
- @first_l = false
290
291
  end
292
+ @first_l = false
293
+
291
294
  r = to_html(cell)
292
295
 
293
296
  unless @lg_row_open
@@ -4,6 +4,7 @@ require 'fileutils'
4
4
  require 'json'
5
5
  require 'nokogiri'
6
6
  require 'set'
7
+ require 'erb'
7
8
 
8
9
  # Convert CBETA XML P5a to HTML for PDF
9
10
  #
@@ -23,6 +24,10 @@ class CBETA::P5aToHTMLForPDF
23
24
  # * graphic_base/figures: 插圖圖檔位置
24
25
  # * graphic_base/sd-gif: images for Siddham (悉曇字)
25
26
  # * graphic_base/rj-gif: images for Ranjana (蘭札體)
27
+ # @option opts [String] :front_page 內文前可以加一段 HTML,例如「編輯說明」
28
+ # @option opts [String] :front_page_title 加在目錄的 front_page 標題
29
+ # @option opts [String] :back_page 內文後可以加一段 HTML,例如「版權聲明」
30
+ # @option opts [String] :back_page_title 加在目錄的 back_page 標題
26
31
  def initialize(xml_root, out_root, opts={})
27
32
  @config = {
28
33
  }
@@ -70,6 +75,34 @@ class CBETA::P5aToHTMLForPDF
70
75
  end
71
76
 
72
77
  private
78
+
79
+ def before_parse_xml(xml_fn)
80
+ @div_count = 0
81
+ @in_l = false
82
+ @lg_row_open = false
83
+ @t_buf1 = []
84
+ @t_buf2 = []
85
+ @open_divs = []
86
+ @sutra_no = File.basename(xml_fn, ".xml")
87
+
88
+ @output_folder_sutra = File.join(@out_folder, @sutra_no)
89
+ FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
90
+
91
+ src = File.join(CBETA::DATA, 'html-for-pdf.css')
92
+ dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
93
+ FileUtils.copy(src, dest)
94
+
95
+ @nav_doc = Nokogiri::XML('<ul></ul>')
96
+ @nav_doc.remove_namespaces!()
97
+ @nav_root = @nav_doc.at_xpath('/ul')
98
+ @current_nav = [@nav_root]
99
+ @mulu_count = 0
100
+
101
+ if @config[:front_page_title]
102
+ s = @config[:front_page_title]
103
+ @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
104
+ end
105
+ end
73
106
 
74
107
  def convert_all
75
108
  Dir.foreach(@xml_root) { |c|
@@ -119,7 +152,7 @@ class CBETA::P5aToHTMLForPDF
119
152
  end
120
153
 
121
154
  def handle_corr(e)
122
- traverse(e)
155
+ "<span class='corr'>%s</span>" % traverse(e)
123
156
  end
124
157
 
125
158
  def handle_div(e)
@@ -133,22 +166,18 @@ class CBETA::P5aToHTMLForPDF
133
166
  end
134
167
  end
135
168
 
169
+ def handle_doc_number(e)
170
+ "<p>%s</p>" % traverse(e)
171
+ end
172
+
136
173
  def handle_figure(e)
137
174
  "<div class='figure'>%s</div>" % traverse(e)
138
175
  end
139
176
 
140
177
  def handle_g(e, mode)
141
- # if 有 <mapping type="unicode">
142
- # if 不在 Unicode Extension C, D, E 範圍裡
143
- # 直接採用
144
- # else
145
- # 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
146
- # else if 有 <mapping type="normal_unicode">
147
- # 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
148
- # else if 有 normalized form
149
- # 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
150
- # else
151
- # 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
178
+ # 悉曇字、蘭札體 使用圖檔
179
+ # 如果有對應的 unicode 且不在 Unicode Extension C, D, E 範圍裡,直接採用 unicode
180
+ # 呈現組字式
152
181
  gid = e['ref'][1..-1]
153
182
  g = @gaijis[gid]
154
183
  abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
@@ -163,8 +192,6 @@ class CBETA::P5aToHTMLForPDF
163
192
  end
164
193
  end
165
194
 
166
- @char_count += 1
167
-
168
195
  if gid.start_with?('SD')
169
196
  case gid
170
197
  when 'SD-E35A'
@@ -188,14 +215,12 @@ class CBETA::P5aToHTMLForPDF
188
215
  end
189
216
 
190
217
  if g.has_key?('unicode')
191
- if @unicode1.include?(g['unicode'])
218
+ # 如果不在 unicode ext-C, ext-D, ext-E 範圍內
219
+ unless (0x2A700..0x2CEAF).include? g['unicode'].hex
192
220
  return g['unicode-char'] # 直接採用 unicode
193
221
  end
194
222
  end
195
223
 
196
- return g['normal_unicode'] if g.has_key?('normal_unicode')
197
- return g['normal'] if g.has_key?('normal')
198
-
199
224
  zzs
200
225
  end
201
226
 
@@ -217,7 +242,11 @@ class CBETA::P5aToHTMLForPDF
217
242
  return traverse(e)
218
243
  else
219
244
  i = @open_divs.size
220
- return "<p class='h#{i}'>%s</p>" % traverse(e)
245
+ if i <= 6
246
+ return "<p class='h#{i}'>%s</p>" % traverse(e)
247
+ else
248
+ return "<p class='h#{i}'>%s</p>" % traverse(e)
249
+ end
221
250
  end
222
251
  end
223
252
 
@@ -239,9 +268,11 @@ class CBETA::P5aToHTMLForPDF
239
268
  doc = Nokogiri::XML::Document.new
240
269
  cell = doc.create_element('div')
241
270
  cell['class'] = 'lg-cell'
242
- cell.inner_html = traverse(e)
271
+ cell.inner_html = traverse(e) + ' '
243
272
 
244
- if @first_l
273
+ if e.key? 'rend'
274
+ cell['style'] = e['rend']
275
+ elsif @first_l
245
276
  parent = e.parent()
246
277
  if parent.has_attribute?('rend')
247
278
  indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -249,8 +280,8 @@ class CBETA::P5aToHTMLForPDF
249
280
  cell['style'] = indent[0]
250
281
  end
251
282
  end
252
- @first_l = false
253
283
  end
284
+ @first_l = false
254
285
  r = to_html(cell)
255
286
 
256
287
  unless @lg_row_open
@@ -280,7 +311,14 @@ class CBETA::P5aToHTMLForPDF
280
311
  end
281
312
 
282
313
  def handle_lem(e)
283
- traverse(e)
314
+ r = ''
315
+ w = e['wit']
316
+ if w.include? 'CBETA' and not w.include? @orig
317
+ r = "<span class='corr'>%s</span>" % traverse(e)
318
+ else
319
+ r = traverse(e)
320
+ end
321
+ r
284
322
  end
285
323
 
286
324
  def handle_lg(e)
@@ -317,7 +355,20 @@ class CBETA::P5aToHTMLForPDF
317
355
  end
318
356
 
319
357
  def handle_mulu(e)
320
- ''
358
+ return '' if e['type']=='卷'
359
+ @mulu_count += 1
360
+ level = e['level'].to_i
361
+ while @current_nav.size > level
362
+ @current_nav.pop
363
+ end
364
+
365
+ label = traverse(e, 'txt')
366
+ li = @current_nav.last.add_child("<li><a href='#mulu#{@mulu_count}'>#{label}</a></li>").first
367
+ ul = li.add_child('<ul></ul>').first
368
+ @current_nav << ul
369
+
370
+ # mulu 標記裡要有東西,prince 才會產生 pdf bookmark
371
+ "<a id='mulu#{@mulu_count}'></a><mulu#{level} title='#{label}'>&nbsp;</mulu#{level}>"
321
372
  end
322
373
 
323
374
  def handle_node(e, mode)
@@ -330,6 +381,7 @@ class CBETA::P5aToHTMLForPDF
330
381
  when 'byline' then handle_byline(e)
331
382
  when 'cell' then handle_cell(e)
332
383
  when 'corr' then handle_corr(e)
384
+ when 'docNumber' then handle_doc_number(e)
333
385
  when 'div' then handle_div(e)
334
386
  when 'figure' then handle_figure(e)
335
387
  when 'foreign' then ''
@@ -399,37 +451,42 @@ class CBETA::P5aToHTMLForPDF
399
451
 
400
452
  def handle_sutra(xml_fn)
401
453
  puts "convert sutra #{xml_fn}"
402
- @back = { 0 => '' }
403
- @char_count = 1
404
- @dila_note = 0
405
- @div_count = 0
406
- @in_l = false
407
- @juan = 0
408
- @lg_row_open = false
409
- @t_buf1 = []
410
- @t_buf2 = []
411
- @open_divs = []
412
- @sutra_no = File.basename(xml_fn, ".xml")
413
454
 
414
- @output_folder_sutra = File.join(@out_folder, @sutra_no)
415
- FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
455
+ before_parse_xml(xml_fn)
456
+
457
+ @text = parse_xml(xml_fn)
416
458
 
417
- src = File.join(CBETA::DATA, 'html-for-pdf.css')
418
- dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
419
- FileUtils.copy(src, dest)
459
+ # 目次
460
+ if @config[:back_page_title]
461
+ s = @config[:back_page_title]
462
+ @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
463
+ end
464
+ @toc = to_html(@nav_root)
465
+ @toc.gsub!('<ul/>', '')
466
+
467
+ if @config.key? :front_page
468
+ s = File.read(@config[:front_page])
469
+ @front = "<div id='front'>#{s}</div>"
470
+ end
471
+
472
+ if @config.key? :back_page
473
+ s = File.read(@config[:back_page])
474
+ @back = "<div id='back'>#{s}</div>"
475
+ end
420
476
 
421
- text = parse_xml(xml_fn)
422
- text = "
423
- <html>
424
- <head>
425
- <meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
426
- <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
427
- </head>
428
- <body>#{text}</body>
429
- </html>"
477
+ fn = File.join(CBETA::DATA, 'pdf-template.htm')
478
+ template = File.read(fn)
479
+ output = template % {
480
+ title: @title,
481
+ author: @author,
482
+ toc: @toc,
483
+ front: @front,
484
+ text: @text,
485
+ back: @back
486
+ }
430
487
 
431
488
  fn = File.join(@output_folder_sutra, 'main.htm')
432
- File.write(fn, text)
489
+ File.write(fn, output)
433
490
  end
434
491
 
435
492
  def handle_t(e)
@@ -470,7 +527,7 @@ class CBETA::P5aToHTMLForPDF
470
527
 
471
528
  # cbeta xml 文字之間會有多餘的換行
472
529
  r = s.gsub(/[\n\r]/, '')
473
-
530
+
474
531
  # 把 & 轉為 &amp;
475
532
  r = CGI.escapeHTML(r)
476
533
 
@@ -532,6 +589,8 @@ class CBETA::P5aToHTMLForPDF
532
589
  @title = traverse(e, 'txt')
533
590
  @title = @title.split()[-1]
534
591
 
592
+ @author = doc.at_xpath("//titleStmt/author").text
593
+
535
594
  e = doc.at_xpath("//editionStmt/edition/date")
536
595
  abort "找不到版本日期" if e.nil?
537
596
  @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
@@ -1,13 +1,24 @@
1
1
  @font-face {
2
- font-family: 'Songti';
3
- /* src: url('/Library/Fonts/Songti.ttc'); */
4
- src: url('/Library/Fonts/华文仿宋.ttf');
2
+ font-family: extb;
3
+ font-style: normal;
4
+ font-weight: normal;
5
+ src: url("/Library/Fonts/Microsoft/PMingLiU-ExtB.ttf")
6
+ }
7
+ @font-face {
8
+ font-family: PMingLiU;
9
+ font-style: normal;
10
+ font-weight: normal;
11
+ src: url("/Library/Fonts/Microsoft/PMingLiU.ttf")
12
+ }
13
+ a {
14
+ text-decoration: none;
5
15
  }
6
16
  body {
7
- font-family: Songti, PMingLiU-ExtB;
17
+ font-family: PMingLiU, extb;
8
18
  }
9
19
  div.lg {
10
20
  display: table;
21
+ margin-left: 1em;
11
22
  }
12
23
  div.lg-cell {
13
24
  display: table-cell;
@@ -16,37 +27,76 @@ div.lg-row {
16
27
  display: table-row;
17
28
  }
18
29
  div.p {
19
- margin-bottom: 20px;
30
+ margin-bottom: 1em;
31
+ margin-top: 1em;
20
32
  line-height: 1.4;
21
- text-indent: 2em;
22
- }
23
- p.byline {
24
- text-align: right;
25
33
  }
26
34
  p.h1 {
27
- text-indent: 2em;
35
+ margin-left: 1em;
36
+ font-size: 1.2em;
28
37
  font-weight: bold;
29
38
  }
30
39
  p.h2 {
31
- text-indent: 3em;
40
+ margin-left: 2em;
41
+ font-size: 1.2em;
32
42
  font-weight: bold;
33
43
  }
34
44
  p.h3 {
35
- text-indent: 4em;
45
+ margin-left: 3em;
46
+ font-size: 1.2em;
36
47
  font-weight: bold;
37
48
  }
38
49
  p.h4 {
39
- text-indent: 2em;
50
+ margin-left: 2em;
51
+ font-size: 1.2em;
40
52
  font-weight: bold;
41
53
  }
42
54
  p.h5 {
43
- text-indent: 3em;
55
+ margin-left: 3em;
56
+ font-size: 1.2em;
44
57
  font-weight: bold;
45
58
  }
46
59
  p.h6 {
47
- text-indent: 4em;
60
+ margin-left: 4em;
61
+ font-size: 1.2em;
48
62
  font-weight: bold;
49
63
  }
64
+
65
+ /*
66
+ mulu 標記是用來產生 pdf bookmark 用的
67
+ 參考: http://www.princexml.com/doc/pdf-bookmarks/
68
+ */
69
+ mulu1 {
70
+ prince-bookmark-level: 1;
71
+ prince-bookmark-label: attr(title)
72
+ }
73
+ mulu2 {
74
+ prince-bookmark-level: 2;
75
+ prince-bookmark-label: attr(title)
76
+ }
77
+ mulu3 {
78
+ prince-bookmark-level: 3;
79
+ prince-bookmark-label: attr(title)
80
+ }
81
+ mulu4 {
82
+ prince-bookmark-level: 4;
83
+ prince-bookmark-label: attr(title)
84
+ }
85
+ mulu5 {
86
+ prince-bookmark-level: 5;
87
+ prince-bookmark-label: attr(title)
88
+ }
89
+ mulu6 {
90
+ prince-bookmark-level: 6;
91
+ prince-bookmark-label: attr(title)
92
+ }
93
+ p.author {
94
+ font-size: 1.6em;
95
+ text-align: center;
96
+ }
97
+ p.byline {
98
+ text-align: right;
99
+ }
50
100
  p.h7 {
51
101
  text-indent: 2em;
52
102
  font-weight: bold;
@@ -55,9 +105,16 @@ p.h8 {
55
105
  text-indent: 2em;
56
106
  font-weight: bold;
57
107
  }
108
+ p.title {
109
+ font-size: 2em;
110
+ text-align: center;
111
+ }
58
112
  span.corr {
59
113
  color: red;
60
114
  }
115
+ span.extb {
116
+ font-family: extb;
117
+ }
61
118
  table {
62
119
  border-collapse: collapse;
63
120
  }
@@ -0,0 +1,18 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
6
+ </head>
7
+ <body>
8
+ <p class='title'>%{title}</p>
9
+ <p class='author'>%{author}</p>
10
+ <div>
11
+ <h1>目次</h1>
12
+ %{toc}
13
+ </div>
14
+ %{front}
15
+ %{text}
16
+ %{back}
17
+ </body>
18
+ </html>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-04 00:00:00.000000000 Z
11
+ date: 2015-11-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -36,6 +36,7 @@ files:
36
36
  - lib/data/epub.css
37
37
  - lib/data/gaiji.json
38
38
  - lib/data/html-for-pdf.css
39
+ - lib/data/pdf-template.htm
39
40
  - lib/data/unicode-1.1.json
40
41
  homepage: https://github.com/RayCHOU/ruby-cbeta
41
42
  licenses: