RubyGems - cbeta - Versions diffs - 2.1.1 → 2.1.2 - Mend

cbeta 2.1.1 → 2.1.2

Files changed (9) hide show

checksums.yaml +4 -4
data/lib/cbeta/html_to_pdf.rb +12 -33
data/lib/cbeta/p5a_parser.rb +6 -5
data/lib/cbeta/p5a_to_html_for_pdf.rb +133 -31
data/lib/data/html-for-pdf.css +4 -0
data/lib/data/pdf-template.htm +2 -6
metadata +3 -4
data/lib/data/epub-nav.xhtml +0 -11
data/lib/data/epub.css +0 -57

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f4f9d8df5d6d472d3cd24eab6cc0b33086d14c14
-  data.tar.gz: dcebae46e56c9895cd7bbcd0162161218d8b710a
+  metadata.gz: 833cb0a1b76d79d40438f7d5f9b5dc49ce464f99
+  data.tar.gz: 550bd5306dec92d4454c47cd7cb8bfed5065328d
 SHA512:
-  metadata.gz: e10fd20296adc40ca814984d90a3c2c23c0b5d46763f7ae80ccdd89f5b407a68014446eefa0b9671f58be315fa6af30aeddf1e01e7e8ca77072f99e4b8857ee4
-  data.tar.gz: 11c4590682a0f6e85b4be01e1c85a2ed19f21d38ec0881376f2903442f88c650c07a5a3910fb608c894d44a3d649b857f855dcd475cb9ede769aa8eb8cf89d75
+  metadata.gz: 02c9a174e77a60db9fbda71d119965d36ef3048ebf5d485f657bfde32ba80a5b8e3c7cbc1a60db5ef97a259c7b83cb2a7aca03d37d3133682e87413c0b5f4387
+  data.tar.gz: 1bd1732b427d9778d22ee7b62d2546b4b6be93b29c2ca3a87ca2ca7bd32a34708782af21fc89252fb5bdc321acafd5005152b7e4d38f45308487798dc95796ac

data/lib/cbeta/html_to_pdf.rb CHANGED

@@ -35,7 +35,7 @@ class CBETA::HTMLToPDF
     return convert_all if target.nil?
     arg = target.upcase
-    if arg.size == 1
+    if arg.size <= 2
       convert_collection(arg)
     else
       if arg.include? '..'
@@ -49,46 +49,25 @@ class CBETA::HTMLToPDF
   end
   def convert_collection(c)
-    @series = c
+    @canon = c
     puts 'convert_collection ' + c
-    folder = File.join(@input, @series)
-    Dir.foreach(folder) { |vol|
-      next if ['.', '..', '.DS_Store'].include? vol
-      convert_vol(vol)
-    }
-  end
-  def convert_file(html_fn, pdf_fn)
-    puts "convert file: #{html_fn} to #{pdf_fn}"
-    cmd = @converter % { in: html_fn, out: pdf_fn}
-    `#{cmd}`
-  end
-  def convert_vol(arg)
-    vol = arg.upcase
-    canon = vol[0]
-    vol_folder = File.join(@input, canon, vol)
-    output_folder = File.join(@output, canon, vol)
+    output_folder = File.join(@output, @canon)
     FileUtils.mkdir_p(output_folder) unless Dir.exist? output_folder
-    Dir.entries(vol_folder).sort.each do |f|
+    folder = File.join(@input, @canon)
+    Dir.foreach(folder) { |f|
       next if f.start_with? '.'
-      src = File.join(vol_folder, f, 'main.htm')
+      src = File.join(folder, f, 'main.htm')
       dest = File.join(output_folder, "#{f}.pdf")
       convert_file(src, dest)
-    end
-  end
-  def convert_vols(v1, v2)
-    puts "convert volumns: #{v1}..#{v2}"
-    @series = v1[0]
-    folder = File.join(@input, @series)
-    Dir.foreach(folder) { |vol|
-      next if vol < v1
-      next if vol > v2
-      convert_vol(vol)
     }
   end
+  def convert_file(html_fn, pdf_fn)
+    puts "convert file: #{html_fn} to #{pdf_fn}"
+    cmd = @converter % { in: html_fn, out: pdf_fn}
+    `#{cmd}`
+  end
 end

data/lib/cbeta/p5a_parser.rb CHANGED

@@ -22,11 +22,12 @@ class CBETA::P5aParser
   # @param e [Nokogiri::XML::Element]
   # @param mode [String] 'html' or 'text', default value: 'html'
   # @return [Hash]
-  #   * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
-  #   * :footnote_text [String] 要放在 footnote 的文字
-  #   * :footnote_resp [String]
-  #     * 'orig': 表示這個註解是底本的註
-  #     * 'CBETA': 表示這個註解是 CBETA 修訂過的註
+  #   回傳
+  #     * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
+  #     * :footnote_text [String] 要放在 footnote 的文字
+  #     * :footnote_resp [String]
+  #       * 'orig': 表示這個註解是底本的註
+  #       * 'CBETA': 表示這個註解是 CBETA 修訂過的註
   def handle_note(e, mode='html')
     r = {
       content: '',

data/lib/cbeta/p5a_to_html_for_pdf.rb CHANGED

@@ -28,8 +28,10 @@ class CBETA::P5aToHTMLForPDF
   # @option opts [String] :front_page_title 加在目錄的 front_page 標題
   # @option opts [String] :back_page 內文後可以加一段 HTML，例如「版權聲明」
   # @option opts [String] :back_page_title 加在目錄的 back_page 標題
+  # @option opts [Boolean] :toc 要不要放目次, 預設會有目次
   def initialize(xml_root, out_root, opts={})
     @config = {
+      toc: true
     }
     @config.merge!(opts)
@@ -61,8 +63,8 @@ class CBETA::P5aToHTMLForPDF
     return convert_all if target.nil?
     arg = target.upcase
-    if arg.size == 1
-      handle_collection(arg)
+    if arg.size <= 2
+      convert_collection(arg)
     else
       if arg.include? '..'
         arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
@@ -76,6 +78,17 @@ class CBETA::P5aToHTMLForPDF
   private
+  def before_convert_work(work_id)
+    @nav_doc = Nokogiri::XML('<ul></ul>')
+    @nav_doc.remove_namespaces!()
+    @nav_root = @nav_doc.at_xpath('/ul')
+    @current_nav = [@nav_root]
+    @mulu_count = 0
+    @output_folder_work = File.join(@out_root, @series, work_id)
+    FileUtils.mkdir_p(@output_folder_work) unless Dir.exist? @output_folder_work
+  end
   def before_parse_xml(xml_fn)
     @div_count = 0
     @in_l = false
@@ -83,33 +96,102 @@ class CBETA::P5aToHTMLForPDF
     @t_buf1 = []
     @t_buf2 = []
     @open_divs = []
-    @sutra_no = File.basename(xml_fn, ".xml")
-    @output_folder_sutra = File.join(@out_folder, @sutra_no)
-    FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
+    @sutra_no = File.basename(xml_fn, ".xml")
     src = File.join(CBETA::DATA, 'html-for-pdf.css')
-    dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
-    FileUtils.copy(src, dest)
-    @nav_doc = Nokogiri::XML('<ul></ul>')
-    @nav_doc.remove_namespaces!()
-    @nav_root = @nav_doc.at_xpath('/ul')
-    @current_nav = [@nav_root]
-    @mulu_count = 0
+    copy_file(src)
     if @config[:front_page_title]
       s = @config[:front_page_title]
       @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
-    end
+    end
   end
   def convert_all
     Dir.foreach(@xml_root) { |c|
       next unless c.match(/^[A-Z]$/)
-      handle_collection(c)
+      convert_collection(c)
     }
   end
+  def convert_collection(c)
+    @series = c
+    puts 'handle_collection ' + c
+    folder = File.join(@xml_root, @series)
+    @works = {}
+    prepare_work_list(folder)
+    @works.each do |work_id, xml_files|
+      convert_work(work_id, xml_files)
+    end
+  end
+  def convert_work(work_id, xml_files)
+    puts "convert work #{work_id}"
+    before_convert_work(work_id)
+    # 目次
+    if @config[:back_page_title]
+      s = @config[:back_page_title]
+      @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
+    end
+    @cover = nil
+    if @config.key? :graphic_base
+      cover = File.join(@config[:graphic_base], 'covers', @series, "#{work_id}.jpg")
+      if File.exist? cover
+        @cover = "<div id='cover'><img src='#{work_id}.jpg' /></div>"
+        copy_file(cover)
+      end
+    end
+    if @config.key? :front_page
+      s = File.read(@config[:front_page])
+      @front = "<div id='front'>#{s}</div>"
+    end
+    if @config.key? :back_page
+      s = File.read(@config[:back_page])
+      @back = "<div id='back'>#{s}</div>"
+    end
+    @text = ''
+    xml_files.each do |fn|
+      @text += convert_xml_file(fn)
+    end
+    if @config[:toc]
+      @toc = to_html(@nav_root)
+      @toc.gsub!('<ul/>', '')
+    	@toc = "<div><h1>目次</h1>#{@toc}</div>"
+    else
+      @toc = ''
+    end
+    fn = File.join(CBETA::DATA, 'pdf-template.htm')
+    template = File.read(fn)
+    output = template % {
+      cover: @cover,
+      toc: @toc,
+      front: @front,
+      text: @text,
+      back: @back
+    }
+    fn = File.join(@output_folder_work, 'main.htm')
+    File.write(fn, output)
+  end
+  def convert_xml_file(xml_fn)
+    before_parse_xml(xml_fn)
+    parse_xml(xml_fn)
+  end
+  def copy_file(src)
+    basename = File.basename(src)
+    dest = File.join(@output_folder_work, basename)
+    FileUtils.copy(src, dest)
+  end
   def handle_anchor(e)
     id = e['id']
@@ -141,15 +223,6 @@ class CBETA::P5aToHTMLForPDF
     to_html(cell)
   end
-  def handle_collection(c)
-    @series = c
-    puts 'handle_collection ' + c
-    folder = File.join(@xml_root, @series)
-    Dir.foreach(folder) { |vol|
-      next if ['.', '..', '.DS_Store'].include? vol
-      handle_vol(vol)
-    }
-  end
   def handle_corr(e)
     "<span class='corr'>%s</span>" % traverse(e)
@@ -201,8 +274,7 @@ class CBETA::P5aToHTMLForPDF
       else
         fn = "#{gid}.gif"
         src = File.join(@config[:graphic_base], 'sd-gif', gid[3..4], fn)
-        dest = File.join(@output_folder_sutra, fn)
-        FileUtils.copy(src, dest)
+        copy_file(src)
         return "<img src='#{fn}'/>"
       end
     end
@@ -210,7 +282,7 @@ class CBETA::P5aToHTMLForPDF
     if gid.start_with?('RJ')
       fn = "#{gid}.gif"
       src = File.join(@config[:graphic_base], 'rj-gif', gid[3..4], fn)
-      dest = File.join(@output_folder_sutra, fn)
+      copy_file(src)
       return "<img src='#{fn}'/>"
     end
@@ -229,9 +301,9 @@ class CBETA::P5aToHTMLForPDF
     url.sub!(/^.*(figures\/.*)$/, '\1')
     src = File.join(@config[:graphic_base], url)
+    copy_file(src)
     fn = File.basename(src)
-    dest = File.join(@output_folder_sutra, fn)
-    FileUtils.copy(src, dest)
     "<img src='#{fn}'/>"
   end
@@ -464,6 +536,10 @@ class CBETA::P5aToHTMLForPDF
     @toc = to_html(@nav_root)
     @toc.gsub!('<ul/>', '')
+    if @config.key? :graphic_base
+    end
     if @config.key? :front_page
       s = File.read(@config[:front_page])
       @front = "<div id='front'>#{s}</div>"
@@ -541,7 +617,11 @@ class CBETA::P5aToHTMLForPDF
     abort "未處理底本" if @orig.nil?
     @vol = vol
-    @series = vol[0]
+    if vol.start_with? 'DA'
+      @series = 'DA'
+    else
+      @series = vol[0]
+    end
     @out_folder = File.join(@out_root, @series, vol)
     FileUtils.remove_dir(@out_folder, force=true)
     FileUtils::mkdir_p @out_folder
@@ -591,6 +671,11 @@ class CBETA::P5aToHTMLForPDF
     @author = doc.at_xpath("//titleStmt/author").text
+    if @cover.nil?
+      @cover = "<p class='title'>#{@title}</p>\n"
+      @cover += "<p class='author'>#{@author}</p>"
+    end
     e = doc.at_xpath("//editionStmt/edition/date")
     abort "找不到版本日期" if e.nil?
     @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
@@ -627,6 +712,23 @@ class CBETA::P5aToHTMLForPDF
     r + "<table>\n"
   end
+  def prepare_work_list(input_folder)
+    Dir.foreach(input_folder) do |f|
+      next if f.start_with? '.'
+      p1 = File.join(input_folder, f)
+      if File.file?(p1)
+        work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
+        work = 'T0220' if work.start_with? 'T0220'
+        unless @works.key? work
+          @works[work] = []
+        end
+        @works[work] << p1
+      else
+        prepare_work_list(p1)
+      end
+    end
+  end
   def to_html(e)
     e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)

data/lib/data/html-for-pdf.css CHANGED

@@ -30,6 +30,10 @@ div.p {
 	margin-bottom: 1em;
 	margin-top: 1em;
 	line-height: 1.4;
+	text-indent: 2em;
+}
+li div.p {
+	text-indent: 0;
 }
 p.h1 {
 	margin-left: 1em;

data/lib/data/pdf-template.htm CHANGED

@@ -5,12 +5,8 @@
 	<link rel=stylesheet type='text/css' href='html-for-pdf.css'>
 </head>
 <body>
-  <p class='title'>%{title}</p>
-  <p class='author'>%{author}</p>
-	<div>
-		<h1>目次</h1>
-		%{toc}
-	</div>
+	%{cover}
+	%{toc}
 	%{front}
   %{text}
 	%{back}

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: cbeta
 version: !ruby/object:Gem::Version
-  version: 2.1.1
+  version: 2.1.2
 platform: ruby
 authors:
 - Ray Chou
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-12-24 00:00:00.000000000 Z
+date: 2015-12-25 00:00:00.000000000 Z
 dependencies: []
 description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
 email: zhoubx@gmail.com
@@ -32,8 +32,6 @@ files:
 - lib/cbeta/p5a_validator.rb
 - lib/data/canons.csv
 - lib/data/categories.json
-- lib/data/epub-nav.xhtml
-- lib/data/epub.css
 - lib/data/gaiji.json
 - lib/data/html-for-pdf.css
 - lib/data/pdf-template.htm
@@ -63,3 +61,4 @@ signing_key:
 specification_version: 4
 summary: CBETA Tools
 test_files: []
+has_rdoc:

data/lib/data/epub-nav.xhtml DELETED

@@ -1,11 +0,0 @@
-<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
-<head>
-	<meta charset="utf-8" />
-</head>
-<body>
-<nav epub:type="toc" id="toc">
-  <h1>Table of contents</h1>
-	%s
-</nav>
-</body>
-</html>

data/lib/data/epub.css DELETED

@@ -1,57 +0,0 @@
-div.p {
-	margin-bottom: 20px;
-	line-height: 1.4;
-	text-indent: 2em;
-}
-p.byline {
-	text-align: right;
-}
-p.h1 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-p.h2 {
-	text-indent: 3em;
-	font-weight: bold;
-}
-p.h3 {
-	text-indent: 4em;
-	font-weight: bold;
-}
-p.h4 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-p.h5 {
-	text-indent: 3em;
-	font-weight: bold;
-}
-p.h6 {
-	text-indent: 4em;
-	font-weight: bold;
-}
-p.h7 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-p.h8 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-span.corr {
-	color: red;
-}
-table {
-	border-collapse: collapse;
-}
-th, td {
-	border: solid;
-	border-width: 1px;
-	padding: 5px;
-	word-wrap: break-word;
-	word-break: break-all;
-	text-indent: 0;
-}
-ul.simple {
-	list-style-type: none;
-}