RubyGems - cbeta - Versions diffs - 2.1.1 → 2.1.2 - Mend

cbeta 2.1.1 → 2.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml +4 -4
data/lib/cbeta/html_to_pdf.rb +12 -33
data/lib/cbeta/p5a_parser.rb +6 -5
data/lib/cbeta/p5a_to_html_for_pdf.rb +133 -31
data/lib/data/html-for-pdf.css +4 -0
data/lib/data/pdf-template.htm +2 -6
metadata +3 -4
data/lib/data/epub-nav.xhtml +0 -11
data/lib/data/epub.css +0 -57

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: f4f9d8df5d6d472d3cd24eab6cc0b33086d14c14
-  data.tar.gz: dcebae46e56c9895cd7bbcd0162161218d8b710a
+  metadata.gz: 833cb0a1b76d79d40438f7d5f9b5dc49ce464f99
+  data.tar.gz: 550bd5306dec92d4454c47cd7cb8bfed5065328d
 SHA512:
-  metadata.gz: e10fd20296adc40ca814984d90a3c2c23c0b5d46763f7ae80ccdd89f5b407a68014446eefa0b9671f58be315fa6af30aeddf1e01e7e8ca77072f99e4b8857ee4
-  data.tar.gz: 11c4590682a0f6e85b4be01e1c85a2ed19f21d38ec0881376f2903442f88c650c07a5a3910fb608c894d44a3d649b857f855dcd475cb9ede769aa8eb8cf89d75
+  metadata.gz: 02c9a174e77a60db9fbda71d119965d36ef3048ebf5d485f657bfde32ba80a5b8e3c7cbc1a60db5ef97a259c7b83cb2a7aca03d37d3133682e87413c0b5f4387
+  data.tar.gz: 1bd1732b427d9778d22ee7b62d2546b4b6be93b29c2ca3a87ca2ca7bd32a34708782af21fc89252fb5bdc321acafd5005152b7e4d38f45308487798dc95796ac

data/lib/cbeta/html_to_pdf.rb CHANGED

@@ -35,7 +35,7 @@ class CBETA::HTMLToPDF
     return convert_all if target.nil?
     arg = target.upcase
-    if arg.size == 1
+    if arg.size <= 2
       convert_collection(arg)
     else
       if arg.include? '..'
@@ -49,46 +49,25 @@ class CBETA::HTMLToPDF
   end
   def convert_collection(c)
-    @series = c
+    @canon = c
     puts 'convert_collection ' + c
-    folder = File.join(@input, @series)
-    Dir.foreach(folder) { |vol|
-      next if ['.', '..', '.DS_Store'].include? vol
-      convert_vol(vol)
-    }
-  end
-  def convert_file(html_fn, pdf_fn)
-    puts "convert file: #{html_fn} to #{pdf_fn}"
-    cmd = @converter % { in: html_fn, out: pdf_fn}
-    `#{cmd}`
-  end
-  def convert_vol(arg)
-    vol = arg.upcase
-    canon = vol[0]
-    vol_folder = File.join(@input, canon, vol)
-    output_folder = File.join(@output, canon, vol)
+    output_folder = File.join(@output, @canon)
     FileUtils.mkdir_p(output_folder) unless Dir.exist? output_folder
-    Dir.entries(vol_folder).sort.each do |f|
+    folder = File.join(@input, @canon)
+    Dir.foreach(folder) { |f|
       next if f.start_with? '.'
-      src = File.join(vol_folder, f, 'main.htm')
+      src = File.join(folder, f, 'main.htm')
       dest = File.join(output_folder, "#{f}.pdf")
       convert_file(src, dest)
-    end
-  end
-  def convert_vols(v1, v2)
-    puts "convert volumns: #{v1}..#{v2}"
-    @series = v1[0]
-    folder = File.join(@input, @series)
-    Dir.foreach(folder) { |vol|
-      next if vol < v1
-      next if vol > v2
-      convert_vol(vol)
     }
   end
+  def convert_file(html_fn, pdf_fn)
+    puts "convert file: #{html_fn} to #{pdf_fn}"
+    cmd = @converter % { in: html_fn, out: pdf_fn}
+    `#{cmd}`
+  end
 end

data/lib/cbeta/p5a_parser.rb CHANGED

@@ -22,11 +22,12 @@ class CBETA::P5aParser
   # @param e [Nokogiri::XML::Element]
   # @param mode [String] 'html' or 'text', default value: 'html'
   # @return [Hash]
-  #   * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
-  #   * :footnote_text [String] 要放在 footnote 的文字
-  #   * :footnote_resp [String]
-  #     * 'orig': 表示這個註解是底本的註
-  #     * 'CBETA': 表示這個註解是 CBETA 修訂過的註
+  #   回傳
+  #     * :content [String] 要放在本文中的文字, 如果 mode=='html', 那麼本文文字會包含 footnote anchor
+  #     * :footnote_text [String] 要放在 footnote 的文字
+  #     * :footnote_resp [String]
+  #       * 'orig': 表示這個註解是底本的註
+  #       * 'CBETA': 表示這個註解是 CBETA 修訂過的註
   def handle_note(e, mode='html')
     r = {
       content: '',

data/lib/cbeta/p5a_to_html_for_pdf.rb CHANGED

@@ -28,8 +28,10 @@ class CBETA::P5aToHTMLForPDF
   # @option opts [String] :front_page_title 加在目錄的 front_page 標題
   # @option opts [String] :back_page 內文後可以加一段 HTML，例如「版權聲明」
   # @option opts [String] :back_page_title 加在目錄的 back_page 標題
+  # @option opts [Boolean] :toc 要不要放目次, 預設會有目次
   def initialize(xml_root, out_root, opts={})
     @config = {
+      toc: true
     }
     @config.merge!(opts)
@@ -61,8 +63,8 @@ class CBETA::P5aToHTMLForPDF
     return convert_all if target.nil?
     arg = target.upcase
-    if arg.size == 1
-      handle_collection(arg)
+    if arg.size <= 2
+      convert_collection(arg)
     else
       if arg.include? '..'
         arg.match(/^([^\.]+?)\.\.([^\.]+)$/) {
@@ -76,6 +78,17 @@ class CBETA::P5aToHTMLForPDF
   private
+  def before_convert_work(work_id)
+    @nav_doc = Nokogiri::XML('<ul></ul>')
+    @nav_doc.remove_namespaces!()
+    @nav_root = @nav_doc.at_xpath('/ul')
+    @current_nav = [@nav_root]
+    @mulu_count = 0
+    @output_folder_work = File.join(@out_root, @series, work_id)
+    FileUtils.mkdir_p(@output_folder_work) unless Dir.exist? @output_folder_work
+  end
   def before_parse_xml(xml_fn)
     @div_count = 0
     @in_l = false
@@ -83,33 +96,102 @@ class CBETA::P5aToHTMLForPDF
     @t_buf1 = []
     @t_buf2 = []
     @open_divs = []
-    @sutra_no = File.basename(xml_fn, ".xml")
-    @output_folder_sutra = File.join(@out_folder, @sutra_no)
-    FileUtils.mkdir_p(@output_folder_sutra) unless Dir.exist? @output_folder_sutra
+    @sutra_no = File.basename(xml_fn, ".xml")
     src = File.join(CBETA::DATA, 'html-for-pdf.css')
-    dest = File.join(@output_folder_sutra, 'html-for-pdf.css')
-    FileUtils.copy(src, dest)
-    @nav_doc = Nokogiri::XML('<ul></ul>')
-    @nav_doc.remove_namespaces!()
-    @nav_root = @nav_doc.at_xpath('/ul')
-    @current_nav = [@nav_root]
-    @mulu_count = 0
+    copy_file(src)
     if @config[:front_page_title]
       s = @config[:front_page_title]
       @nav_root.add_child("<li><a href='#front'>#{s}</a></li>")
-    end
+    end
   end
   def convert_all
     Dir.foreach(@xml_root) { |c|
       next unless c.match(/^[A-Z]$/)
-      handle_collection(c)
+      convert_collection(c)
     }
   end
+  def convert_collection(c)
+    @series = c
+    puts 'handle_collection ' + c
+    folder = File.join(@xml_root, @series)
+    @works = {}
+    prepare_work_list(folder)
+    @works.each do |work_id, xml_files|
+      convert_work(work_id, xml_files)
+    end
+  end
+  def convert_work(work_id, xml_files)
+    puts "convert work #{work_id}"
+    before_convert_work(work_id)
+    # 目次
+    if @config[:back_page_title]
+      s = @config[:back_page_title]
+      @nav_root.add_child("<li><a href='#back'>#{s}</a></li>")
+    end
+    @cover = nil
+    if @config.key? :graphic_base
+      cover = File.join(@config[:graphic_base], 'covers', @series, "#{work_id}.jpg")
+      if File.exist? cover
+        @cover = "<div id='cover'><img src='#{work_id}.jpg' /></div>"
+        copy_file(cover)
+      end
+    end
+    if @config.key? :front_page
+      s = File.read(@config[:front_page])
+      @front = "<div id='front'>#{s}</div>"
+    end
+    if @config.key? :back_page
+      s = File.read(@config[:back_page])
+      @back = "<div id='back'>#{s}</div>"
+    end
+    @text = ''
+    xml_files.each do |fn|
+      @text += convert_xml_file(fn)
+    end
+    if @config[:toc]
+      @toc = to_html(@nav_root)
+      @toc.gsub!('<ul/>', '')
+    	@toc = "<div><h1>目次</h1>#{@toc}</div>"
+    else
+      @toc = ''
+    end
+    fn = File.join(CBETA::DATA, 'pdf-template.htm')
+    template = File.read(fn)
+    output = template % {
+      cover: @cover,
+      toc: @toc,
+      front: @front,
+      text: @text,
+      back: @back
+    }
+    fn = File.join(@output_folder_work, 'main.htm')
+    File.write(fn, output)
+  end
+  def convert_xml_file(xml_fn)
+    before_parse_xml(xml_fn)
+    parse_xml(xml_fn)
+  end
+  def copy_file(src)
+    basename = File.basename(src)
+    dest = File.join(@output_folder_work, basename)
+    FileUtils.copy(src, dest)
+  end
   def handle_anchor(e)
     id = e['id']
@@ -141,15 +223,6 @@ class CBETA::P5aToHTMLForPDF
     to_html(cell)
   end
-  def handle_collection(c)
-    @series = c
-    puts 'handle_collection ' + c
-    folder = File.join(@xml_root, @series)
-    Dir.foreach(folder) { |vol|
-      next if ['.', '..', '.DS_Store'].include? vol
-      handle_vol(vol)
-    }
-  end
   def handle_corr(e)
     "<span class='corr'>%s</span>" % traverse(e)
@@ -201,8 +274,7 @@ class CBETA::P5aToHTMLForPDF
       else
         fn = "#{gid}.gif"
         src = File.join(@config[:graphic_base], 'sd-gif', gid[3..4], fn)
-        dest = File.join(@output_folder_sutra, fn)
-        FileUtils.copy(src, dest)
+        copy_file(src)
         return "<img src='#{fn}'/>"
       end
     end
@@ -210,7 +282,7 @@ class CBETA::P5aToHTMLForPDF
     if gid.start_with?('RJ')
       fn = "#{gid}.gif"
       src = File.join(@config[:graphic_base], 'rj-gif', gid[3..4], fn)
-      dest = File.join(@output_folder_sutra, fn)
+      copy_file(src)
       return "<img src='#{fn}'/>"
     end
@@ -229,9 +301,9 @@ class CBETA::P5aToHTMLForPDF
     url.sub!(/^.*(figures\/.*)$/, '\1')
     src = File.join(@config[:graphic_base], url)
+    copy_file(src)
     fn = File.basename(src)
-    dest = File.join(@output_folder_sutra, fn)
-    FileUtils.copy(src, dest)
     "<img src='#{fn}'/>"
   end
@@ -464,6 +536,10 @@ class CBETA::P5aToHTMLForPDF
     @toc = to_html(@nav_root)
     @toc.gsub!('<ul/>', '')
+    if @config.key? :graphic_base
+    end
     if @config.key? :front_page
       s = File.read(@config[:front_page])
       @front = "<div id='front'>#{s}</div>"
@@ -541,7 +617,11 @@ class CBETA::P5aToHTMLForPDF
     abort "未處理底本" if @orig.nil?
     @vol = vol
-    @series = vol[0]
+    if vol.start_with? 'DA'
+      @series = 'DA'
+    else
+      @series = vol[0]
+    end
     @out_folder = File.join(@out_root, @series, vol)
     FileUtils.remove_dir(@out_folder, force=true)
     FileUtils::mkdir_p @out_folder
@@ -591,6 +671,11 @@ class CBETA::P5aToHTMLForPDF
     @author = doc.at_xpath("//titleStmt/author").text
+    if @cover.nil?
+      @cover = "<p class='title'>#{@title}</p>\n"
+      @cover += "<p class='author'>#{@author}</p>"
+    end
     e = doc.at_xpath("//editionStmt/edition/date")
     abort "找不到版本日期" if e.nil?
     @edition_date = e.text.sub(/\$Date: (.*?) \$$/, '\1')
@@ -627,6 +712,23 @@ class CBETA::P5aToHTMLForPDF
     r + "<table>\n"
   end
+  def prepare_work_list(input_folder)
+    Dir.foreach(input_folder) do |f|
+      next if f.start_with? '.'
+      p1 = File.join(input_folder, f)
+      if File.file?(p1)
+        work = f.sub(/^([A-Z]{1,2})\d{2,3}n(.*)\.xml$/, '\1\2')
+        work = 'T0220' if work.start_with? 'T0220'
+        unless @works.key? work
+          @works[work] = []
+        end
+        @works[work] << p1
+      else
+        prepare_work_list(p1)
+      end
+    end
+  end
   def to_html(e)
     e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)

data/lib/data/html-for-pdf.css CHANGED

@@ -30,6 +30,10 @@ div.p {
 	margin-bottom: 1em;
 	margin-top: 1em;
 	line-height: 1.4;
+	text-indent: 2em;
+}
+li div.p {
+	text-indent: 0;
 }
 p.h1 {
 	margin-left: 1em;

data/lib/data/pdf-template.htm CHANGED

@@ -5,12 +5,8 @@
 	<link rel=stylesheet type='text/css' href='html-for-pdf.css'>
 </head>
 <body>
-  <p class='title'>%{title}</p>
-  <p class='author'>%{author}</p>
-	<div>
-		<h1>目次</h1>
-		%{toc}
-	</div>
+	%{cover}
+	%{toc}
 	%{front}
   %{text}
 	%{back}

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: cbeta
 version: !ruby/object:Gem::Version
-  version: 2.1.1
+  version: 2.1.2
 platform: ruby
 authors:
 - Ray Chou
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2015-12-24 00:00:00.000000000 Z
+date: 2015-12-25 00:00:00.000000000 Z
 dependencies: []
 description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
 email: zhoubx@gmail.com
@@ -32,8 +32,6 @@ files:
 - lib/cbeta/p5a_validator.rb
 - lib/data/canons.csv
 - lib/data/categories.json
-- lib/data/epub-nav.xhtml
-- lib/data/epub.css
 - lib/data/gaiji.json
 - lib/data/html-for-pdf.css
 - lib/data/pdf-template.htm
@@ -63,3 +61,4 @@ signing_key:
 specification_version: 4
 summary: CBETA Tools
 test_files: []
+has_rdoc:

data/lib/data/epub-nav.xhtml DELETED

@@ -1,11 +0,0 @@
-<html xmlns="http://www.w3.org/1999/xhtml" xmlns:epub="http://www.idpf.org/2007/ops">
-<head>
-	<meta charset="utf-8" />
-</head>
-<body>
-<nav epub:type="toc" id="toc">
-  <h1>Table of contents</h1>
-	%s
-</nav>
-</body>
-</html>

data/lib/data/epub.css DELETED

@@ -1,57 +0,0 @@
-div.p {
-	margin-bottom: 20px;
-	line-height: 1.4;
-	text-indent: 2em;
-}
-p.byline {
-	text-align: right;
-}
-p.h1 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-p.h2 {
-	text-indent: 3em;
-	font-weight: bold;
-}
-p.h3 {
-	text-indent: 4em;
-	font-weight: bold;
-}
-p.h4 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-p.h5 {
-	text-indent: 3em;
-	font-weight: bold;
-}
-p.h6 {
-	text-indent: 4em;
-	font-weight: bold;
-}
-p.h7 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-p.h8 {
-	text-indent: 2em;
-	font-weight: bold;
-}
-span.corr {
-	color: red;
-}
-table {
-	border-collapse: collapse;
-}
-th, td {
-	border: solid;
-	border-width: 1px;
-	padding: 5px;
-	word-wrap: break-word;
-	word-break: break-all;
-	text-indent: 0;
-}
-ul.simple {
-	list-style-type: none;
-}