RubyGems - cbeta - Versions diffs - 1.3.1 → 1.3.2 - Mend

cbeta 1.3.1 → 1.3.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml +4 -4
data/lib/cbeta/html_to_pdf.rb +10 -8
data/lib/cbeta/p5a_to_html.rb +5 -2
data/lib/cbeta/p5a_to_html_for_pdf.rb +110 -51
data/lib/data/html-for-pdf.css +72 -15
data/lib/data/pdf-template.htm +18 -0
metadata +3 -2

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: a21aa8cd67f37fc6d5ca9d87720b1d5d0da3f7f1
-  data.tar.gz: bb7fb6cb3ab5d8b36492cc96728aa5a7fc9a7286
+  metadata.gz: c227a83872ae6d070b3c37ba8ba0895bd46242e2
+  data.tar.gz: c89bdb2a9e7c3410c6f4de3e1a4ed760fc73fe2c
 SHA512:
-  metadata.gz: cb8bbfd96ad22c331e01059303ca539cb6718aae0caa551eba4a8064a7854d9f29bbd3bdb447bd06dd6a6f14b14363d16edb7b4f0932bab7fcfb0feccb9ba9bf
-  data.tar.gz: 2a4394f918a235d08790409488e5dc1a1fd280239d9e1df2ece6dc99628ffc67b73a5beeef199d35a5fe10f4c781cf7ee19910585ff3aa13719b6163b0725d46
+  metadata.gz: 08f20461b411e57c6dabba538faf7f64c73cdc2cd7d8296a33fca0202091a7f36004862a4f6b3b11b9cc99776bec64651b29aa83966692340b821d5d7623f3b5
+  data.tar.gz: 7b0751f4610efb54b7dd959a827d5ffa81f61ebd9a5ada6c2f3dfc31de6fb22fe5ad259cd7b537d4e29ac656d590802e18db537e6fb97a9924ebbb37fff5ea86

data/lib/cbeta/html_to_pdf.rb CHANGED Viewed

@@ -1,11 +1,16 @@
-require 'wicked_pdf'
 class CBETA::HTMLToPDF
   # @param input [String] folder of source HTML, HTML can be produced by CBETA::P5aToHTMLForPDF.
   # @param output [String] output folder
-  def initialize(input, output)
+  # @param converter [String] shell command to convert HTML to PDF
+  #   * suggestion: http://www.princexml.com/
+  #   * wkhtmltopdf has font problem to display unicode extb characters
+  #
+  # @example
+  #   c = CBETA::HTMLToPDF.new('/temp/cbeta-html', '/temp/cbeta-pdf', "prince %{in} -o %{out}")
+  def initialize(input, output, converter)
     @input = input
     @output = output
+    @converter = converter
   end
   # Convert CBETA HTML to PDF
@@ -55,11 +60,8 @@ class CBETA::HTMLToPDF
   def convert_file(html_fn, pdf_fn)
     puts "convert file: #{html_fn} to #{pdf_fn}"
-    pdf = WickedPdf.new.pdf_from_html_file(html_fn)
-    File.open(pdf_fn, 'wb') do |file|
-      file << pdf
-    end
+    cmd = @converter % { in: html_fn, out: pdf_fn}
+    `#{cmd}`
   end
   def convert_vol(arg)

data/lib/cbeta/p5a_to_html.rb CHANGED Viewed

@@ -278,7 +278,9 @@ class CBETA::P5aToHTML
     cell['class'] = 'lg-cell'
     cell.inner_html = traverse(e)
-    if @first_l
+    if e.key? 'rend'
+      cell['style'] = e['rend']
+    elsif @first_l
       parent = e.parent()
       if parent.has_attribute?('rend')
         indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -286,8 +288,9 @@ class CBETA::P5aToHTML
           cell['style'] = indent[0]
         end
       end
-      @first_l = false
     end
+    @first_l = false
     r = to_html(cell)
     unless @lg_row_open

data/lib/cbeta/p5a_to_html_for_pdf.rb CHANGED Viewed

@@ -4,6 +4,7 @@ require 'fileutils'
 require 'json'
 require 'nokogiri'
 require 'set'
+require 'erb'
 # Convert CBETA XML P5a to HTML for PDF
 #
@@ -23,6 +24,10 @@ class CBETA::P5aToHTMLForPDF
   #   * graphic_base/figures: 插圖圖檔位置
   #   * graphic_base/sd-gif: images for Siddham (悉曇字)
   #   * graphic_base/rj-gif: images for Ranjana (蘭札體)
+  # @option opts [String] :front_page 內文前可以加一段 HTML，例如「編輯說明」
+  # @option opts [String] :front_page_title 加在目錄的 front_page 標題
+  # @option opts [String] :back_page 內文後可以加一段 HTML，例如「版權聲明」
+  # @option opts [String] :back_page_title 加在目錄的 back_page 標題
   def initialize(xml_root, out_root, opts={})
     @config = {
     }
@@ -70,6 +75,34 @@ class CBETA::P5aToHTMLForPDF
   end
   private
+  def before_parse_xml(xml_fn)
+    @div_count = 0
+    @in_l = false
+    @lg_row_open = false
+    @t_buf1 = []
+    @t_buf2 = []
+    @open_divs = []
+    @sutra_no = File.basename(xml_fn, ".xml")
+    @output_folder_sutra = File.join(@out_folder, @sutra_no)
+    FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
+    src = File.join(CBETA::DATA, 'html-for-pdf.css')
+    dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
+    FileUtils.copy(src, dest)
+    @nav_doc = Nokogiri::XML('<ul></ul>')
+    @nav_doc.remove_namespaces!()
+    @nav_root = @nav_doc.at_xpath('/ul')
+    @current_nav = [@nav_root]
+    @mulu_count = 0
+    if @config[:front_page_title]
+      s = @config[:front_page_title]
+      @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
+    end
+  end
   def convert_all
     Dir.foreach(@xml_root) { |c|
@@ -119,7 +152,7 @@ class CBETA::P5aToHTMLForPDF
   end
   def handle_corr(e)
-    traverse(e)
+    "<span class='corr'>%s</span>" % traverse(e)
   end
   def handle_div(e)
@@ -133,22 +166,18 @@ class CBETA::P5aToHTMLForPDF
     end
   end
+  def handle_doc_number(e)
+    "<p>%s</p>" % traverse(e)
+  end
   def handle_figure(e)
     "<div class='figure'>%s</div>" % traverse(e)
   end
   def handle_g(e, mode)
-    # if 有 <mapping type="unicode">
-    #   if 不在 Unicode Extension C, D, E 範圍裡
-    #     直接採用
-    #   else
-    #     預設呈現 unicode, 但仍包缺字資訊，供點選開 popup
-    # else if 有 <mapping type="normal_unicode">
-    #   預設呈現 normal_unicode, 但仍包缺字資訊，供點選開 popup
-    # else if 有 normalized form
-    #   預設呈現 normalized form, 但仍包缺字資訊，供點選開 popup
-    # else
-    #   預設呈現組字式, 但仍包缺字資訊，供點選開 popup
+    # 悉曇字、蘭札體 使用圖檔
+    # 如果有對應的 unicode 且不在 Unicode Extension C, D, E 範圍裡，直接採用 unicode
+    # 呈現組字式
     gid = e['ref'][1..-1]
     g = @gaijis[gid]
     abort "Line:#{__LINE__} 無缺字資料:#{gid}" if g.nil?
@@ -163,8 +192,6 @@ class CBETA::P5aToHTMLForPDF
       end
     end
-    @char_count += 1
     if gid.start_with?('SD')
       case gid
       when 'SD-E35A'
@@ -188,14 +215,12 @@ class CBETA::P5aToHTMLForPDF
     end
     if g.has_key?('unicode')
-      if @unicode1.include?(g['unicode'])
+      # 如果不在 unicode ext-C, ext-D, ext-E 範圍內
+      unless (0x2A700..0x2CEAF).include? g['unicode'].hex
         return g['unicode-char'] # 直接採用 unicode
       end
     end
-    return g['normal_unicode'] if g.has_key?('normal_unicode')
-    return g['normal'] if g.has_key?('normal')
     zzs
   end
@@ -217,7 +242,11 @@ class CBETA::P5aToHTMLForPDF
       return traverse(e)
     else
       i = @open_divs.size
-      return "<p class='h#{i}'>%s</p>" % traverse(e)
+      if i <= 6
+        return "<p class='h#{i}'>%s</p>" % traverse(e)
+      else
+        return "<p class='h#{i}'>%s</p>" % traverse(e)
+      end
     end
   end
@@ -239,9 +268,11 @@ class CBETA::P5aToHTMLForPDF
     doc = Nokogiri::XML::Document.new
     cell = doc.create_element('div')
     cell['class'] = 'lg-cell'
-    cell.inner_html = traverse(e)
+    cell.inner_html = traverse(e) + '　'
-    if @first_l
+    if e.key? 'rend'
+      cell['style'] = e['rend']
+    elsif @first_l
       parent = e.parent()
       if parent.has_attribute?('rend')
         indent = parent['rend'].scan(/text-indent:[^:]*/)
@@ -249,8 +280,8 @@ class CBETA::P5aToHTMLForPDF
           cell['style'] = indent[0]
         end
       end
-      @first_l = false
     end
+    @first_l = false
     r = to_html(cell)
     unless @lg_row_open
@@ -280,7 +311,14 @@ class CBETA::P5aToHTMLForPDF
   end
   def handle_lem(e)
-    traverse(e)
+    r = ''
+    w = e['wit']
+    if w.include? 'CBETA' and not w.include? @orig
+      r = "<span class='corr'>%s</span>" % traverse(e)
+    else
+      r = traverse(e)
+    end
+    r
   end
   def handle_lg(e)
@@ -317,7 +355,20 @@ class CBETA::P5aToHTMLForPDF
   end
   def handle_mulu(e)
-    ''
+    return '' if e['type']=='卷'
+    @mulu_count += 1
+    level = e['level'].to_i
+    while @current_nav.size > level
+      @current_nav.pop
+    end
+    label = traverse(e, 'txt')
+    li = @current_nav.last.add_child("<li><a href='#mulu#{@mulu_count}'>#{label}</a></li>").first
+    ul = li.add_child('<ul></ul>').first
+    @current_nav << ul
+    # mulu 標記裡要有東西，prince 才會產生 pdf bookmark
+    "<a id='mulu#{@mulu_count}'></a><mulu#{level} title='#{label}'>&nbsp;</mulu#{level}>"
   end
   def handle_node(e, mode)
@@ -330,6 +381,7 @@ class CBETA::P5aToHTMLForPDF
     when 'byline'    then handle_byline(e)
     when 'cell'      then handle_cell(e)
     when 'corr'      then handle_corr(e)
+    when 'docNumber' then handle_doc_number(e)
     when 'div'       then handle_div(e)
     when 'figure'    then handle_figure(e)
     when 'foreign'   then ''
@@ -399,37 +451,42 @@ class CBETA::P5aToHTMLForPDF
   def handle_sutra(xml_fn)
     puts "convert sutra #{xml_fn}"
-    @back = { 0 => '' }
-    @char_count = 1
-    @dila_note = 0
-    @div_count = 0
-    @in_l = false
-    @juan = 0
-    @lg_row_open = false
-    @t_buf1 = []
-    @t_buf2 = []
-    @open_divs = []
-    @sutra_no = File.basename(xml_fn, ".xml")
-    @output_folder_sutra = File.join(@out_folder, @sutra_no)
-    FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
+    before_parse_xml(xml_fn)
+    @text = parse_xml(xml_fn)
-    src = File.join(CBETA::DATA, 'html-for-pdf.css')
-    dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
-    FileUtils.copy(src, dest)
+    # 目次
+    if @config[:back_page_title]
+      s = @config[:back_page_title]
+      @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
+    end
+    @toc = to_html(@nav_root)
+    @toc.gsub!('<ul/>', '')
+    if @config.key? :front_page
+      s = File.read(@config[:front_page])
+      @front = "<div id='front'>#{s}</div>"
+    end
+    if @config.key? :back_page
+      s = File.read(@config[:back_page])
+      @back = "<div id='back'>#{s}</div>"
+    end
-    text = parse_xml(xml_fn)
-    text = "
-<html>
-<head>
-  <meta http-equiv='Content-Type' content='text/html; charset=utf-8' />
-  <link rel=stylesheet type='text/css' href='html-for-pdf.css'>
-</head>
-<body>#{text}</body>
-</html>"
+    fn = File.join(CBETA::DATA, 'pdf-template.htm')
+    template = File.read(fn)
+    output = template % {
+      title: @title,
+      author: @author,
+      toc: @toc,
+      front: @front,
+      text: @text,
+      back: @back
+    }
     fn = File.join(@output_folder_sutra, 'main.htm')
-    File.write(fn, text)
+    File.write(fn, output)
   end
   def handle_t(e)
@@ -470,7 +527,7 @@ class CBETA::P5aToHTMLForPDF
     # cbeta xml 文字之間會有多餘的換行
     r = s.gsub(/[\n\r]/, '')
     # 把 & 轉為 &amp;
     r = CGI.escapeHTML(r)
@@ -532,6 +589,8 @@ class CBETA::P5aToHTMLForPDF
     @title = traverse(e, 'txt')
     @title = @title.split()[-1]
+    @author = doc.at_xpath("//titleStmt/author").text
     e = doc.at_xpath("//editionStmt/edition/date")
     abort "找不到版本日期" if e.nil?
     @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')

data/lib/data/html-for-pdf.css CHANGED Viewed

@@ -1,13 +1,24 @@
 @font-face {
-    font-family: 'Songti';
-    /* src: url('/Library/Fonts/Songti.ttc'); */
-		src: url('/Library/Fonts/华文仿宋.ttf');
+    font-family: extb;
+    font-style: normal;
+    font-weight: normal;
+    src: url("/Library/Fonts/Microsoft/PMingLiU-ExtB.ttf")
+}
+@font-face {
+    font-family: PMingLiU;
+    font-style: normal;
+    font-weight: normal;
+    src: url("/Library/Fonts/Microsoft/PMingLiU.ttf")
+}
+a {
+    text-decoration: none;
 }
 body {
-	font-family: Songti, PMingLiU-ExtB;
+	font-family: PMingLiU, extb;
 }
 div.lg {
 	display: table;
+	margin-left: 1em;
 }
 div.lg-cell {
 	display: table-cell;
@@ -16,37 +27,76 @@ div.lg-row {
 	display: table-row;
 }
 div.p {
-	margin-bottom: 20px;
+	margin-bottom: 1em;
+	margin-top: 1em;
 	line-height: 1.4;
-	text-indent: 2em;
-}
-p.byline {
-	text-align: right;
 }
 p.h1 {
-	text-indent: 2em;
+	margin-left: 1em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h2 {
-	text-indent: 3em;
+	margin-left: 2em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h3 {
-	text-indent: 4em;
+	margin-left: 3em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h4 {
-	text-indent: 2em;
+	margin-left: 2em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h5 {
-	text-indent: 3em;
+	margin-left: 3em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
 p.h6 {
-	text-indent: 4em;
+	margin-left: 4em;
+	font-size: 1.2em;
 	font-weight: bold;
 }
+/*
+  mulu 標記是用來產生 pdf bookmark 用的
+  參考： http://www.princexml.com/doc/pdf-bookmarks/
+*/
+mulu1 {
+    prince-bookmark-level: 1;
+    prince-bookmark-label: attr(title)
+}
+mulu2 {
+    prince-bookmark-level: 2;
+    prince-bookmark-label: attr(title)
+}
+mulu3 {
+    prince-bookmark-level: 3;
+    prince-bookmark-label: attr(title)
+}
+mulu4 {
+    prince-bookmark-level: 4;
+    prince-bookmark-label: attr(title)
+}
+mulu5 {
+    prince-bookmark-level: 5;
+    prince-bookmark-label: attr(title)
+}
+mulu6 {
+    prince-bookmark-level: 6;
+    prince-bookmark-label: attr(title)
+}
+p.author {
+	font-size: 1.6em;
+	text-align: center;
+}
+p.byline {
+	text-align: right;
+}
 p.h7 {
 	text-indent: 2em;
 	font-weight: bold;
@@ -55,9 +105,16 @@ p.h8 {
 	text-indent: 2em;
 	font-weight: bold;
 }
+p.title {
+	font-size: 2em;
+	text-align: center;
+}
 span.corr {
 	color: red;
 }
+span.extb {
+	font-family: extb;
+}
 table {
 	border-collapse: collapse;
 }

data/lib/data/pdf-template.htm ADDED Viewed

@@ -0,0 +1,18 @@
+<!DOCTYPE html>
+<html>
+<head>
+  <meta http-equiv="Content-Type" content="text/html; charset=UTF-8">
+	<link rel=stylesheet type='text/css' href='html-for-pdf.css'>
+</head>
+<body>
+  <p class='title'>%{title}</p>
+  <p class='author'>%{author}</p>
+	<div>
+		<h1>目次</h1>
+		%{toc}
+	</div>
+	%{front}
+  %{text}
+	%{back}
+</body>
+</html>

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: cbeta
 version: !ruby/object:Gem::Version
-  version: 1.3.1
+  version: 1.3.2
 platform: ruby
 authors:
 - Ray Chou
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-11-04 00:00:00.000000000 Z
+date: 2015-11-13 00:00:00.000000000 Z
 dependencies: []
 description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
 email: zhoubx@gmail.com
@@ -36,6 +36,7 @@ files:
 - lib/data/epub.css
 - lib/data/gaiji.json
 - lib/data/html-for-pdf.css
+- lib/data/pdf-template.htm
 - lib/data/unicode-1.1.json
 homepage: https://github.com/RayCHOU/ruby-cbeta
 licenses: