RubyGems - cbeta - Versions diffs - 1.3.1 → 1.3.2 - Mend

cbeta 1.3.1 → 1.3.2

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/cbeta/html_to_pdf.rb +10 -8
data/lib/cbeta/p5a_to_html.rb +5 -2
data/lib/cbeta/p5a_to_html_for_pdf.rb +110 -51
data/lib/data/html-for-pdf.css +72 -15
data/lib/data/pdf-template.htm +18 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: a21aa8cd67f37fc6d5ca9d87720b1d5d0da3f7f1
-  data.tar.gz: bb7fb6cb3ab5d8b36492cc96728aa5a7fc9a7286
+  metadata.gz: c227a83872ae6d070b3c37ba8ba0895bd46242e2
+  data.tar.gz: c89bdb2a9e7c3410c6f4de3e1a4ed760fc73fe2c
 SHA512:
-  metadata.gz: cb8bbfd96ad22c331e01059303ca539cb6718aae0caa551eba4a8064a7854d9f29bbd3bdb447bd06dd6a6f14b14363d16edb7b4f0932bab7fcfb0feccb9ba9bf
-  data.tar.gz: 2a4394f918a235d08790409488e5dc1a1fd280239d9e1df2ece6dc99628ffc67b73a5beeef199d35a5fe10f4c781cf7ee19910585ff3aa13719b6163b0725d46
+  metadata.gz: 08f20461b411e57c6dabba538faf7f64c73cdc2cd7d8296a33fca0202091a7f36004862a4f6b3b11b9cc99776bec64651b29aa83966692340b821d5d7623f3b5
+  data.tar.gz: 7b0751f4610efb54b7dd959a827d5ffa81f61ebd9a5ada6c2f3dfc31de6fb22fe5ad259cd7b537d4e29ac656d590802e18db537e6fb97a9924ebbb37fff5ea86

data/lib/cbeta/html_to_pdf.rb CHANGED Viewed

@@ -1,11 +1,16 @@
-require 'wicked_pdf'
 class CBETA::HTMLToPDF
   # @param input [String] folder of source HTML, HTML can be produced by CBETA::P5aToHTMLForPDF.
   # @param output [String] output folder
-  def initialize(input, output)
+  # @param converter [String] shell command to convert HTML to PDF
+  #   * suggestion: http://www.princexml.com/
+  #   * wkhtmltopdf has font problem to display unicode extb characters
+  #
+  # @example
+  #   c = CBETA::HTMLToPDF.new('/temp/cbeta-html', '/temp/cbeta-pdf', "prince %{in} -o %{out}")
+  def initialize(input, output, converter)
     @input = input
     @output = output
+    @converter = converter
   end
   # Convert CBETA HTML to PDF
@@ -55,11 +60,8 @@ class CBETA::HTMLToPDF
   def convert_file(html_fn, pdf_fn)
     puts "convert file: #{html_fn} to #{pdf_fn}"
-    pdf = WickedPdf.new.pdf_from_html_file(html_fn)
-    File.open(pdf_fn, 'wb') do |file|
-      file << pdf
-    end
+    cmd = @converter % { in: html_fn, out: pdf_fn}
+    `#{cmd}`
   end
   def convert_vol(arg)

data/lib/cbeta/p5a_to_html.rb CHANGED Viewed

@@ -278,7 +278,9 @@ class CBETA::P5aToHTML
     cell['class'] = 'lg-cell'
     cell.inner_html = traverse(e)
-    if @first_l
+    if e.key? 'rend'
+      cell['style'] = e['rend']
+    elsif @first_l
       parent = e.parent()
       if parent.has_attribute?('rend')
         indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -286,8 +288,9 @@ class CBETA::P5aToHTML
           cell['style'] = indent[0]
         end
       end
-      @first_l = false
     end
+    @first_l = false
     r = to_html(cell)
     unless @lg_row_open

data/lib/cbeta/p5a_to_html_for_pdf.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'fileutils'
 require 'json'
 require 'nokogiri'
 require 'set'
+require 'erb'
 # Convert CBETA XML P5a to HTML for PDF
 #
@@ -23,6 +24,10 @@ class CBETA::P5aToHTMLForPDF
   #   * graphic_base/figures: 插圖圖檔位置
   #   * graphic_base/sd-gif: images for Siddham (悉曇字)
   #   * graphic_base/rj-gif: images for Ranjana (蘭札體)
+  # @option opts [String] :front_page 內文前可以加一段 HTML，例如「編輯說明」
+  # @option opts [String] :front_page_title 加在目錄的 front_page 標題
+  # @option opts [String] :back_page 內文後可以加一段 HTML，例如「版權聲明」
+  # @option opts [String] :back_page_title 加在目錄的 back_page 標題
   def initialize(xml_root, out_root, opts={})
     @config = {
     }
@@ -70,6 +75,34 @@ class CBETA::P5aToHTMLForPDF
   end
   private
+  def before_parse_xml(xml_fn)
+    @div_count = 0
+    @in_l = false
+    @lg_row_open = false
+    @t_buf1 = []
+    @t_buf2 = []
+    @open_divs = []
+    @sutra_no = File.basename(xml_fn, ".xml")
+    @output_folder_sutra = File.join(@out_folder, @sutra_no)
+    FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
+    src = File.join(CBETA::DATA, 'html-for-pdf.css')
+    dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
+    FileUtils.copy(src, dest)
+    @nav_doc = Nokogiri::XML('<ul></ul>')
+    @nav_doc.remove_namespaces!()
+    @nav_root = @nav_doc.at_xpath('/ul')
+    @current_nav = [@nav_root]
+    @mulu_count = 0
+    if @config[:front_page_title]
+      s = @config[:front_page_title]
+      @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
+    end
+  end
   def convert_all
     Dir.foreach(@xml_root) { |c|
@@ -119,7 +152,7 @@ class CBETA::P5aToHTMLForPDF
   end
   def handle_corr(e)
-    traverse(e)
+    "<span class='corr'>%s</span>" % traverse(e)
   end
   def handle_div(e)
@@ -133,22 +166,18 @@ class CBETA::P5aToHTMLForPDF
     end
   end
+  def handle_doc_number(e)
+    "<p>%s</p>" % traverse(e)
+  end
   def handle_figure(e)
     "<div class='figure'>%s</div>" % traverse(e)
   end
   def handle_g(e, mode)
-    # if 有 <mapping type="unicode">
-    #   if 不在 Unicode Extension C, D, E 範圍裡
-    #     直接採用
-    #   else
-    #     預設呈現 unicode, 但仍包缺字資訊，供點選開 popup
-    # else if 有 <mapping type="normal_unicode">
-    #   預設呈現 normal_unicode, 但仍包缺字資訊，供點選開 popup
-    # else if 有 normalized form
-    #   預設呈現 normalized form, 但仍包缺字資訊，供點選開 popup
-    # else
-    #   預設呈現組字式, 但仍包缺字資訊，供點選開 popup
+    # 悉曇字、蘭札體 使用圖檔
+    # 如果有對應的 unicode 且不在 Unicode Extension C, D, E 範圍裡，直接採用 unicode
+    # 呈現組字式
     gid = e['ref'][1..-1]
     g = @gaijis[gid]
     abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
@@ -163,8 +192,6 @@ class CBETA::P5aToHTMLForPDF
       end
     end
-    @char_count += 1
     if gid.start_with?('SD')
       case gid
       when 'SD-E35A'
@@ -188,14 +215,12 @@ class CBETA::P5aToHTMLForPDF
     end
     if g.has_key?('unicode')
-      if @unicode1.include?(g['unicode'])
+      # 如果不在 unicode ext-C, ext-D, ext-E 範圍內
+      unless (0x2A700..0x2CEAF).include? g['unicode'].hex
         return g['unicode-char'] # 直接採用 unicode
       end
     end
-    return g['normal_unicode'] if g.has_key?('normal_unicode')
-    return g['normal'] if g.has_key?('normal')
     zzs
   end
@@ -217,7 +242,11 @@ class CBETA::P5aToHTMLForPDF
       return traverse(e)
     else
       i = @open_divs.size
-      return "<p class='h#{i}'>%s</p>" % traverse(e)
+      if i <= 6
+        return "<p class='h#{i}'>%s</p>" % traverse(e)
+      else
+        return "<p class='h#{i}'>%s</p>" % traverse(e)
+      end
     end
   end
@@ -239,9 +268,11 @@ class CBETA::P5aToHTMLForPDF
     doc = Nokogiri::XML::Document.new
     cell = doc.create_element('div')
     cell['class'] = 'lg-cell'
-    cell.inner_html = traverse(e)
+    cell.inner_html = traverse(e) + '　'
-    if @first_l
+    if e.key? 'rend'
+      cell['style'] = e['rend']
+    elsif @first_l
       parent = e.parent()
       if parent.has_attribute?('rend')
         indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -249,8 +280,8 @@ class CBETA::P5aToHTMLForPDF
           cell['style'] = indent[0]
         end
       end
-      @first_l = false
     end
+    @first_l = false
     r = to_html(cell)
     unless @lg_row_open
@@ -280,7 +311,14 @@ class CBETA::P5aToHTMLForPDF
   end
   def handle_lem(e)
-    traverse(e)
+    r = ''
+    w = e['wit']
+    if w.include? 'CBETA' and not w.include? @orig
+      r = "<span class='corr'>%s</span>" % traverse(e)
+    else
+      r = traverse(e)
+    end
+    r
   end
   def handle_lg(e)
@@ -317,7 +355,20 @@ class CBETA::P5aToHTMLForPDF
   end
   def handle_mulu(e)
-    ''
+    return '' if e['type']=='卷'
+    @mulu_count += 1
+    level = e['level'].to_i
+    while @current_nav.size > level
+      @current_nav.pop
+    end
+    label = traverse(e, 'txt')
+    li = @current_nav.last.add_child("<li><a href='#mulu#{@mulu_count}'>#{label}</a></li>").first
+    ul = li.add_child('<ul></ul>').first
+    @current_nav << ul
+    # mulu 標記裡要有東西，prince 才會產生 pdf bookmark
+    "<a id='mulu#{@mulu_count}'></a><mulu#{level} title='#{label}'>&nbsp;</mulu#{level}>"
   end
   def handle_node(e, mode)
@@ -330,6 +381,7 @@ class CBETA::P5aToHTMLForPDF
     when 'byline'    then handle_byline(e)
     when 'cell'      then handle_cell(e)
     when 'corr'      then handle_corr(e)
+    when 'docNumber' then handle_doc_number(e)
     when 'div'       then handle_div(e)
     when 'figure'    then handle_figure(e)
     when 'foreign'   then ''
@@ -399,37 +451,42 @@ class CBETA::P5aToHTMLForPDF
   def handle_sutra(xml_fn)
     puts "convert sutra #{xml_fn}"
-    @back = { 0 => '' }
-    @char_count = 1
-    @dila_note = 0
-    @div_count = 0
-    @in_l = false
-    @juan = 0
-    @lg_row_open = false
-    @t_buf1 = []
-    @t_buf2 = []
-    @open_divs = []
-    @sutra_no = File.basename(xml_fn, ".xml")
-    @output_folder_sutra = File.join(@out_folder, @sutra_no)
-    FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
+    before_parse_xml(xml_fn)
+    @text = parse_xml(xml_fn)
-    src = File.join(CBETA::DATA, 'html-for-pdf.css')
-    dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
-    FileUtils.copy(src, dest)
+    # 目次
+    if @config[:back_page_title]
+      s = @config[:back_page_title]
+      @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
+    end
+    @toc = to_html(@nav_root)
+    @toc.gsub!('<ul/>', '')
+    if @config.key? :front_page
+      s = File.read(@config[:front_page])
+      @front = "<div id='front'>#{s}</div>"
+    end
+    if @config.key? :back_page
+      s = File.read(@config[:back_page])
+      @back = "<div id='back'>#{s}</div>"
+    end
-    text = parse_xml(xml_fn)
-    text = "
-<html>
-<head>
-  <meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
-  <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
-</head>
-<body>#{text}</body>
-</html>"
+    fn = File.join(CBETA::DATA, 'pdf-template.htm')
+    template = File.read(fn)
+    output = template % {
+      title: @title,
+      author: @author,
+      toc: @toc,
+      front: @front,
+      text: @text,
+      back: @back
+    }
     fn = File.join(@output_folder_sutra, 'main.htm')
-    File.write(fn, text)
+    File.write(fn, output)
   end
   def handle_t(e)
@@ -470,7 +527,7 @@ class CBETA::P5aToHTMLForPDF
     # cbeta xml 文字之間會有多餘的換行
     r = s.gsub(/[\n\r]/, '')
     # 把 & 轉為 &amp;
     r = CGI.escapeHTML(r)
@@ -532,6 +589,8 @@ class CBETA::P5aToHTMLForPDF
     @title = traverse(e, 'txt')
     @title = @title.split()[-1]
+    @author = doc.at_xpath("//titleStmt/author").text
     e = doc.at_xpath("//editionStmt/edition/date")
     abort "找不到版本日期" if e.nil?
     @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')

data/lib/data/html-for-pdf.css CHANGED Viewed

@@ -1,13 +1,24 @@
 @font-face {
-    font-family: 'Songti';
-    /* src: url('/Library/Fonts/Songti.ttc'); */
-		src: url('/Library/Fonts/华文仿宋.ttf');
+    font-family: extb;
+    font-style: normal;
+    font-weight: normal;
+    src: url("/Library/Fonts/Microsoft/PMingLiU-ExtB.ttf")
+}
+@font-face {
+    font-family: PMingLiU;
+    font-style: normal;
+    font-weight: normal;
+    src: url("/Library/Fonts/Microsoft/PMingLiU.ttf")
+}
+a {
+    text-decoration: none;
 }
 body {
-	font-family: Songti, PMingLiU-ExtB;
+	font-family: PMingLiU, extb;
 }
 div.lg {
 	display: table;
+	margin-left: 1em;
 }
 div.lg-cell {
 	display: table-cell;
@@ -16,37 +27,76 @@ div.lg-row {
 	display: table-row;
 }
 div.p {
-	margin-bottom: 20px;
+	margin-bottom: 1em;
+	margin-top: 1em;
 	line-height: 1.4;
-	text-indent: 2em;
-}
-p.byline {
-	text-align: right;
 }
 p.h1 {
-	text-indent: 2em;
+	margin-left: 1em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h2 {
-	text-indent: 3em;
+	margin-left: 2em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h3 {
-	text-indent: 4em;
+	margin-left: 3em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h4 {
-	text-indent: 2em;
+	margin-left: 2em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h5 {
-	text-indent: 3em;
+	margin-left: 3em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h6 {
-	text-indent: 4em;
+	margin-left: 4em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
+/*
+  mulu 標記是用來產生 pdf bookmark 用的
+  參考： http://www.princexml.com/doc/pdf-bookmarks/
+*/
+mulu1 {
+    prince-bookmark-level: 1;
+    prince-bookmark-label: attr(title)
+}
+mulu2 {
+    prince-bookmark-level: 2;
+    prince-bookmark-label: attr(title)
+}
+mulu3 {
+    prince-bookmark-level: 3;
+    prince-bookmark-label: attr(title)
+}
+mulu4 {
+    prince-bookmark-level: 4;
+    prince-bookmark-label: attr(title)
+}
+mulu5 {
+    prince-bookmark-level: 5;
+    prince-bookmark-label: attr(title)
+}
+mulu6 {
+    prince-bookmark-level: 6;
+    prince-bookmark-label: attr(title)
+}
+p.author {
+	font-size: 1.6em;
+	text-align: center;
+}
+p.byline {
+	text-align: right;
+}
 p.h7 {
 	text-indent: 2em;
 	font-weight: bold;
@@ -55,9 +105,16 @@ p.h8 {
 	text-indent: 2em;
 	font-weight: bold;
 }
+p.title {
+	font-size: 2em;
+	text-align: center;
+}
 span.corr {
 	color: red;
 }
+span.extb {
+	font-family: extb;
+}
 table {
 	border-collapse: collapse;
 }

data/lib/data/pdf-template.htm ADDED Viewed

@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+	<link rel=stylesheet type='text/css' href='html-for-pdf.css'>
+</head>
+<body>
+  <p class='title'>%{title}</p>
+  <p class='author'>%{author}</p>
+	<div>
+		<h1>目次</h1>
+		%{toc}
+	</div>
+	%{front}
+  %{text}
+	%{back}
+</body>
+</html>

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: cbeta
 version: !ruby/object:Gem::Version
-  version: 1.3.1
+  version: 1.3.2
 platform: ruby
 authors:
 - Ray Chou
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-11-04 00:00:00.000000000 Z
+date: 2015-11-13 00:00:00.000000000 Z
 dependencies: []
 description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
 email: zhoubx@gmail.com
@@ -36,6 +36,7 @@ files:
 - lib/data/epub.css
 - lib/data/gaiji.json
 - lib/data/html-for-pdf.css
+- lib/data/pdf-template.htm
 - lib/data/unicode-1.1.json
 homepage: https://github.com/RayCHOU/ruby-cbeta
 licenses: