cbeta 1.3.1 → 1.3.2

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: a21aa8cd67f37fc6d5ca9d87720b1d5d0da3f7f1
4
- data.tar.gz: bb7fb6cb3ab5d8b36492cc96728aa5a7fc9a7286
3
+ metadata.gz: c227a83872ae6d070b3c37ba8ba0895bd46242e2
4
+ data.tar.gz: c89bdb2a9e7c3410c6f4de3e1a4ed760fc73fe2c
5
5
  SHA512:
6
- metadata.gz: cb8bbfd96ad22c331e01059303ca539cb6718aae0caa551eba4a8064a7854d9f29bbd3bdb447bd06dd6a6f14b14363d16edb7b4f0932bab7fcfb0feccb9ba9bf
7
- data.tar.gz: 2a4394f918a235d08790409488e5dc1a1fd280239d9e1df2ece6dc99628ffc67b73a5beeef199d35a5fe10f4c781cf7ee19910585ff3aa13719b6163b0725d46
6
+ metadata.gz: 08f20461b411e57c6dabba538faf7f64c73cdc2cd7d8296a33fca0202091a7f36004862a4f6b3b11b9cc99776bec64651b29aa83966692340b821d5d7623f3b5
7
+ data.tar.gz: 7b0751f4610efb54b7dd959a827d5ffa81f61ebd9a5ada6c2f3dfc31de6fb22fe5ad259cd7b537d4e29ac656d590802e18db537e6fb97a9924ebbb37fff5ea86
@@ -1,11 +1,16 @@
1
- require 'wicked_pdf'
2
-
3
1
  class CBETA::HTMLToPDF
4
2
  # @param input [String] folder of source HTML, HTML can be produced by CBETA::P5aToHTMLForPDF.
5
3
  # @param output [String] output folder
6
- def initialize(input, output)
4
+ # @param converter [String] shell command to convert HTML to PDF
5
+ # * suggestion: http://www.princexml.com/
6
+ # * wkhtmltopdf has font problem to display unicode extb characters
7
+ #
8
+ # @example
9
+ # c = CBETA::HTMLToPDF.new('/temp/cbeta-html', '/temp/cbeta-pdf', "prince %{in} -o %{out}")
10
+ def initialize(input, output, converter)
7
11
  @input = input
8
12
  @output = output
13
+ @converter = converter
9
14
  end
10
15
 
11
16
  # Convert CBETA HTML to PDF
@@ -55,11 +60,8 @@ class CBETA::HTMLToPDF
55
60
 
56
61
  def convert_file(html_fn, pdf_fn)
57
62
  puts "convert file: #{html_fn} to #{pdf_fn}"
58
- pdf = WickedPdf.new.pdf_from_html_file(html_fn)
59
-
60
- File.open(pdf_fn, 'wb') do |file|
61
- file << pdf
62
- end
63
+ cmd = @converter % { in: html_fn, out: pdf_fn}
64
+ `#{cmd}`
63
65
  end
64
66
 
65
67
  def convert_vol(arg)
@@ -278,7 +278,9 @@ class CBETA::P5aToHTML
278
278
  cell['class'] = 'lg-cell'
279
279
  cell.inner_html = traverse(e)
280
280
 
281
- if @first_l
281
+ if e.key? 'rend'
282
+ cell['style'] = e['rend']
283
+ elsif @first_l
282
284
  parent = e.parent()
283
285
  if parent.has_attribute?('rend')
284
286
  indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -286,8 +288,9 @@ class CBETA::P5aToHTML
286
288
  cell['style'] = indent[0]
287
289
  end
288
290
  end
289
- @first_l = false
290
291
  end
292
+ @first_l = false
293
+
291
294
  r = to_html(cell)
292
295
 
293
296
  unless @lg_row_open
@@ -4,6 +4,7 @@ require 'fileutils'
4
4
  require 'json'
5
5
  require 'nokogiri'
6
6
  require 'set'
7
+ require 'erb'
7
8
 
8
9
  # Convert CBETA XML P5a to HTML for PDF
9
10
  #
@@ -23,6 +24,10 @@ class CBETA::P5aToHTMLForPDF
23
24
  # * graphic_base/figures: 插圖圖檔位置
24
25
  # * graphic_base/sd-gif: images for Siddham (悉曇字)
25
26
  # * graphic_base/rj-gif: images for Ranjana (蘭札體)
27
+ # @option opts [String] :front_page 內文前可以加一段 HTML,例如「編輯說明」
28
+ # @option opts [String] :front_page_title 加在目錄的 front_page 標題
29
+ # @option opts [String] :back_page 內文後可以加一段 HTML,例如「版權聲明」
30
+ # @option opts [String] :back_page_title 加在目錄的 back_page 標題
26
31
  def initialize(xml_root, out_root, opts={})
27
32
  @config = {
28
33
  }
@@ -70,6 +75,34 @@ class CBETA::P5aToHTMLForPDF
70
75
  end
71
76
 
72
77
  private
78
+
79
+ def before_parse_xml(xml_fn)
80
+ @div_count = 0
81
+ @in_l = false
82
+ @lg_row_open = false
83
+ @t_buf1 = []
84
+ @t_buf2 = []
85
+ @open_divs = []
86
+ @sutra_no = File.basename(xml_fn, ".xml")
87
+
88
+ @output_folder_sutra = File.join(@out_folder, @sutra_no)
89
+ FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
90
+
91
+ src = File.join(CBETA::DATA, 'html-for-pdf.css')
92
+ dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
93
+ FileUtils.copy(src, dest)
94
+
95
+ @nav_doc = Nokogiri::XML('<ul></ul>')
96
+ @nav_doc.remove_namespaces!()
97
+ @nav_root = @nav_doc.at_xpath('/ul')
98
+ @current_nav = [@nav_root]
99
+ @mulu_count = 0
100
+
101
+ if @config[:front_page_title]
102
+ s = @config[:front_page_title]
103
+ @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
104
+ end
105
+ end
73
106
 
74
107
  def convert_all
75
108
  Dir.foreach(@xml_root) { |c|
@@ -119,7 +152,7 @@ class CBETA::P5aToHTMLForPDF
119
152
  end
120
153
 
121
154
  def handle_corr(e)
122
- traverse(e)
155
+ "<span class='corr'>%s</span>" % traverse(e)
123
156
  end
124
157
 
125
158
  def handle_div(e)
@@ -133,22 +166,18 @@ class CBETA::P5aToHTMLForPDF
133
166
  end
134
167
  end
135
168
 
169
+ def handle_doc_number(e)
170
+ "<p>%s</p>" % traverse(e)
171
+ end
172
+
136
173
  def handle_figure(e)
137
174
  "<div class='figure'>%s</div>" % traverse(e)
138
175
  end
139
176
 
140
177
  def handle_g(e, mode)
141
- # if 有 <mapping type="unicode">
142
- # if 不在 Unicode Extension C, D, E 範圍裡
143
- # 直接採用
144
- # else
145
- # 預設呈現 unicode, 但仍包缺字資訊,供點選開 popup
146
- # else if 有 <mapping type="normal_unicode">
147
- # 預設呈現 normal_unicode, 但仍包缺字資訊,供點選開 popup
148
- # else if 有 normalized form
149
- # 預設呈現 normalized form, 但仍包缺字資訊,供點選開 popup
150
- # else
151
- # 預設呈現組字式, 但仍包缺字資訊,供點選開 popup
178
+ # 悉曇字、蘭札體 使用圖檔
179
+ # 如果有對應的 unicode 且不在 Unicode Extension C, D, E 範圍裡,直接採用 unicode
180
+ # 呈現組字式
152
181
  gid = e['ref'][1..-1]
153
182
  g = @gaijis[gid]
154
183
  abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
@@ -163,8 +192,6 @@ class CBETA::P5aToHTMLForPDF
163
192
  end
164
193
  end
165
194
 
166
- @char_count += 1
167
-
168
195
  if gid.start_with?('SD')
169
196
  case gid
170
197
  when 'SD-E35A'
@@ -188,14 +215,12 @@ class CBETA::P5aToHTMLForPDF
188
215
  end
189
216
 
190
217
  if g.has_key?('unicode')
191
- if @unicode1.include?(g['unicode'])
218
+ # 如果不在 unicode ext-C, ext-D, ext-E 範圍內
219
+ unless (0x2A700..0x2CEAF).include? g['unicode'].hex
192
220
  return g['unicode-char'] # 直接採用 unicode
193
221
  end
194
222
  end
195
223
 
196
- return g['normal_unicode'] if g.has_key?('normal_unicode')
197
- return g['normal'] if g.has_key?('normal')
198
-
199
224
  zzs
200
225
  end
201
226
 
@@ -217,7 +242,11 @@ class CBETA::P5aToHTMLForPDF
217
242
  return traverse(e)
218
243
  else
219
244
  i = @open_divs.size
220
- return "<p class='h#{i}'>%s</p>" % traverse(e)
245
+ if i <= 6
246
+ return "<p class='h#{i}'>%s</p>" % traverse(e)
247
+ else
248
+ return "<p class='h#{i}'>%s</p>" % traverse(e)
249
+ end
221
250
  end
222
251
  end
223
252
 
@@ -239,9 +268,11 @@ class CBETA::P5aToHTMLForPDF
239
268
  doc = Nokogiri::XML::Document.new
240
269
  cell = doc.create_element('div')
241
270
  cell['class'] = 'lg-cell'
242
- cell.inner_html = traverse(e)
271
+ cell.inner_html = traverse(e) + ' '
243
272
 
244
- if @first_l
273
+ if e.key? 'rend'
274
+ cell['style'] = e['rend']
275
+ elsif @first_l
245
276
  parent = e.parent()
246
277
  if parent.has_attribute?('rend')
247
278
  indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -249,8 +280,8 @@ class CBETA::P5aToHTMLForPDF
249
280
  cell['style'] = indent[0]
250
281
  end
251
282
  end
252
- @first_l = false
253
283
  end
284
+ @first_l = false
254
285
  r = to_html(cell)
255
286
 
256
287
  unless @lg_row_open
@@ -280,7 +311,14 @@ class CBETA::P5aToHTMLForPDF
280
311
  end
281
312
 
282
313
  def handle_lem(e)
283
- traverse(e)
314
+ r = ''
315
+ w = e['wit']
316
+ if w.include? 'CBETA' and not w.include? @orig
317
+ r = "<span class='corr'>%s</span>" % traverse(e)
318
+ else
319
+ r = traverse(e)
320
+ end
321
+ r
284
322
  end
285
323
 
286
324
  def handle_lg(e)
@@ -317,7 +355,20 @@ class CBETA::P5aToHTMLForPDF
317
355
  end
318
356
 
319
357
  def handle_mulu(e)
320
- ''
358
+ return '' if e['type']=='卷'
359
+ @mulu_count += 1
360
+ level = e['level'].to_i
361
+ while @current_nav.size > level
362
+ @current_nav.pop
363
+ end
364
+
365
+ label = traverse(e, 'txt')
366
+ li = @current_nav.last.add_child("<li><a href='#mulu#{@mulu_count}'>#{label}</a></li>").first
367
+ ul = li.add_child('<ul></ul>').first
368
+ @current_nav << ul
369
+
370
+ # mulu 標記裡要有東西,prince 才會產生 pdf bookmark
371
+ "<a id='mulu#{@mulu_count}'></a><mulu#{level} title='#{label}'>&nbsp;</mulu#{level}>"
321
372
  end
322
373
 
323
374
  def handle_node(e, mode)
@@ -330,6 +381,7 @@ class CBETA::P5aToHTMLForPDF
330
381
  when 'byline' then handle_byline(e)
331
382
  when 'cell' then handle_cell(e)
332
383
  when 'corr' then handle_corr(e)
384
+ when 'docNumber' then handle_doc_number(e)
333
385
  when 'div' then handle_div(e)
334
386
  when 'figure' then handle_figure(e)
335
387
  when 'foreign' then ''
@@ -399,37 +451,42 @@ class CBETA::P5aToHTMLForPDF
399
451
 
400
452
  def handle_sutra(xml_fn)
401
453
  puts "convert sutra #{xml_fn}"
402
- @back = { 0 => '' }
403
- @char_count = 1
404
- @dila_note = 0
405
- @div_count = 0
406
- @in_l = false
407
- @juan = 0
408
- @lg_row_open = false
409
- @t_buf1 = []
410
- @t_buf2 = []
411
- @open_divs = []
412
- @sutra_no = File.basename(xml_fn, ".xml")
413
454
 
414
- @output_folder_sutra = File.join(@out_folder, @sutra_no)
415
- FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
455
+ before_parse_xml(xml_fn)
456
+
457
+ @text = parse_xml(xml_fn)
416
458
 
417
- src = File.join(CBETA::DATA, 'html-for-pdf.css')
418
- dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
419
- FileUtils.copy(src, dest)
459
+ # 目次
460
+ if @config[:back_page_title]
461
+ s = @config[:back_page_title]
462
+ @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
463
+ end
464
+ @toc = to_html(@nav_root)
465
+ @toc.gsub!('<ul/>', '')
466
+
467
+ if @config.key? :front_page
468
+ s = File.read(@config[:front_page])
469
+ @front = "<div id='front'>#{s}</div>"
470
+ end
471
+
472
+ if @config.key? :back_page
473
+ s = File.read(@config[:back_page])
474
+ @back = "<div id='back'>#{s}</div>"
475
+ end
420
476
 
421
- text = parse_xml(xml_fn)
422
- text = "
423
- <html>
424
- <head>
425
- <meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
426
- <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
427
- </head>
428
- <body>#{text}</body>
429
- </html>"
477
+ fn = File.join(CBETA::DATA, 'pdf-template.htm')
478
+ template = File.read(fn)
479
+ output = template % {
480
+ title: @title,
481
+ author: @author,
482
+ toc: @toc,
483
+ front: @front,
484
+ text: @text,
485
+ back: @back
486
+ }
430
487
 
431
488
  fn = File.join(@output_folder_sutra, 'main.htm')
432
- File.write(fn, text)
489
+ File.write(fn, output)
433
490
  end
434
491
 
435
492
  def handle_t(e)
@@ -470,7 +527,7 @@ class CBETA::P5aToHTMLForPDF
470
527
 
471
528
  # cbeta xml 文字之間會有多餘的換行
472
529
  r = s.gsub(/[\n\r]/, '')
473
-
530
+
474
531
  # 把 & 轉為 &amp;
475
532
  r = CGI.escapeHTML(r)
476
533
 
@@ -532,6 +589,8 @@ class CBETA::P5aToHTMLForPDF
532
589
  @title = traverse(e, 'txt')
533
590
  @title = @title.split()[-1]
534
591
 
592
+ @author = doc.at_xpath("//titleStmt/author").text
593
+
535
594
  e = doc.at_xpath("//editionStmt/edition/date")
536
595
  abort "找不到版本日期" if e.nil?
537
596
  @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
@@ -1,13 +1,24 @@
1
1
  @font-face {
2
- font-family: 'Songti';
3
- /* src: url('/Library/Fonts/Songti.ttc'); */
4
- src: url('/Library/Fonts/华文仿宋.ttf');
2
+ font-family: extb;
3
+ font-style: normal;
4
+ font-weight: normal;
5
+ src: url("/Library/Fonts/Microsoft/PMingLiU-ExtB.ttf")
6
+ }
7
+ @font-face {
8
+ font-family: PMingLiU;
9
+ font-style: normal;
10
+ font-weight: normal;
11
+ src: url("/Library/Fonts/Microsoft/PMingLiU.ttf")
12
+ }
13
+ a {
14
+ text-decoration: none;
5
15
  }
6
16
  body {
7
- font-family: Songti, PMingLiU-ExtB;
17
+ font-family: PMingLiU, extb;
8
18
  }
9
19
  div.lg {
10
20
  display: table;
21
+ margin-left: 1em;
11
22
  }
12
23
  div.lg-cell {
13
24
  display: table-cell;
@@ -16,37 +27,76 @@ div.lg-row {
16
27
  display: table-row;
17
28
  }
18
29
  div.p {
19
- margin-bottom: 20px;
30
+ margin-bottom: 1em;
31
+ margin-top: 1em;
20
32
  line-height: 1.4;
21
- text-indent: 2em;
22
- }
23
- p.byline {
24
- text-align: right;
25
33
  }
26
34
  p.h1 {
27
- text-indent: 2em;
35
+ margin-left: 1em;
36
+ font-size: 1.2em;
28
37
  font-weight: bold;
29
38
  }
30
39
  p.h2 {
31
- text-indent: 3em;
40
+ margin-left: 2em;
41
+ font-size: 1.2em;
32
42
  font-weight: bold;
33
43
  }
34
44
  p.h3 {
35
- text-indent: 4em;
45
+ margin-left: 3em;
46
+ font-size: 1.2em;
36
47
  font-weight: bold;
37
48
  }
38
49
  p.h4 {
39
- text-indent: 2em;
50
+ margin-left: 2em;
51
+ font-size: 1.2em;
40
52
  font-weight: bold;
41
53
  }
42
54
  p.h5 {
43
- text-indent: 3em;
55
+ margin-left: 3em;
56
+ font-size: 1.2em;
44
57
  font-weight: bold;
45
58
  }
46
59
  p.h6 {
47
- text-indent: 4em;
60
+ margin-left: 4em;
61
+ font-size: 1.2em;
48
62
  font-weight: bold;
49
63
  }
64
+
65
+ /*
66
+ mulu 標記是用來產生 pdf bookmark 用的
67
+ 參考: http://www.princexml.com/doc/pdf-bookmarks/
68
+ */
69
+ mulu1 {
70
+ prince-bookmark-level: 1;
71
+ prince-bookmark-label: attr(title)
72
+ }
73
+ mulu2 {
74
+ prince-bookmark-level: 2;
75
+ prince-bookmark-label: attr(title)
76
+ }
77
+ mulu3 {
78
+ prince-bookmark-level: 3;
79
+ prince-bookmark-label: attr(title)
80
+ }
81
+ mulu4 {
82
+ prince-bookmark-level: 4;
83
+ prince-bookmark-label: attr(title)
84
+ }
85
+ mulu5 {
86
+ prince-bookmark-level: 5;
87
+ prince-bookmark-label: attr(title)
88
+ }
89
+ mulu6 {
90
+ prince-bookmark-level: 6;
91
+ prince-bookmark-label: attr(title)
92
+ }
93
+ p.author {
94
+ font-size: 1.6em;
95
+ text-align: center;
96
+ }
97
+ p.byline {
98
+ text-align: right;
99
+ }
50
100
  p.h7 {
51
101
  text-indent: 2em;
52
102
  font-weight: bold;
@@ -55,9 +105,16 @@ p.h8 {
55
105
  text-indent: 2em;
56
106
  font-weight: bold;
57
107
  }
108
+ p.title {
109
+ font-size: 2em;
110
+ text-align: center;
111
+ }
58
112
  span.corr {
59
113
  color: red;
60
114
  }
115
+ span.extb {
116
+ font-family: extb;
117
+ }
61
118
  table {
62
119
  border-collapse: collapse;
63
120
  }
@@ -0,0 +1,18 @@
1
+ <!DOCTYPE html>
2
+ <html>
3
+ <head>
4
+ <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
5
+ <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
6
+ </head>
7
+ <body>
8
+ <p class='title'>%{title}</p>
9
+ <p class='author'>%{author}</p>
10
+ <div>
11
+ <h1>目次</h1>
12
+ %{toc}
13
+ </div>
14
+ %{front}
15
+ %{text}
16
+ %{back}
17
+ </body>
18
+ </html>
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: cbeta
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.3.1
4
+ version: 1.3.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ray Chou
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-04 00:00:00.000000000 Z
11
+ date: 2015-11-13 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
14
14
  email: zhoubx@gmail.com
@@ -36,6 +36,7 @@ files:
36
36
  - lib/data/epub.css
37
37
  - lib/data/gaiji.json
38
38
  - lib/data/html-for-pdf.css
39
+ - lib/data/pdf-template.htm
39
40
  - lib/data/unicode-1.1.json
40
41
  homepage: https://github.com/RayCHOU/ruby-cbeta
41
42
  licenses: