RubyGems - cbeta - Versions diffs - 2.2.22 → 2.2.26 - Mend

cbeta 2.2.22 → 2.2.26

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml +4 -4
data/lib/cbeta.rb +10 -0
data/lib/cbeta/p5a_to_html.rb +1 -1
data/lib/cbeta/p5a_to_html_for_every_edition.rb +134 -125
metadata +2 -2

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: c051fafb22469b41ce99482cb6c7e42c2b57a934
-  data.tar.gz: b844d9fd876dcda4236f3c0d74b21baa234fec5c
+  metadata.gz: d19eb4a198b323a6ee4601eee34ec46d56d41c50
+  data.tar.gz: 359ce18558763b5b65ac05682c4619d5fe106b7a
 SHA512:
-  metadata.gz: 74c281e563811da223290c5cd35fc5246bae9f25267d634c553010ed3e9e7a9d7451d745b54408dbda05bec0218e1ac553fcb447b34c7ab37dd6bdbe806782cc
-  data.tar.gz: 25a7190bde65648a11d5ea65f476b247a406c50736df9f66c1f9b90c12f7f14e3af7581023e4a397bd2fa4270134f9c9e66df257abed8bda7f752af8b8fdcaf7
+  metadata.gz: b25957a15c65e3e49cf6ef6f33a6e8bbec950cac63ebbb4a164286c55ce16080b4824d4b033773d5407d0b86fe9c0bc398eb5685ac7a09f9ee48c7d936b81cf4
+  data.tar.gz: 4af74f6871288f1bc15a8b858ccc03fd688cec5a6621578e9a38764c1f25f8dd3051f6088bd095e546d05f96ea336368b25aa2656384d06674a2e85036c08562

data/lib/cbeta.rb CHANGED

@@ -32,6 +32,16 @@ class CBETA
     vol.sub(/^(#{CANON}).*$/, '\1')
   end
+  # @param file_basename[String] XML檔主檔名, 例如 "T01n0001" 或 "T25n1510a"
+  # @param lb[String] 例如 "0001a01" 或 "0757b29"
+  # @return [String] CBETA 行首資訊，例如 "T01n0001_p0001a01" 或 "T25n1510ap0757b29"
+  def self.get_linehead(file_basename, lb)
+    r = file_basename
+    r += '_' if file_basename.match(/\d$/)
+    r += 'p' + lb
+    r
+  end
   # 由 冊號 及 典籍編號 取得 XML 主檔名
   # @param vol[String] 冊號, 例如 "T01" 或 "GA009"
   # @param work[String] 典籍編號, 例如 "T0001" 或 "GA0008"

data/lib/cbeta/p5a_to_html.rb CHANGED

@@ -307,7 +307,7 @@ class CBETA::P5aToHTML
     @char_count = 1
     @lb = e['n']
-    line_head = @sutra_no + '_p' + @lb
+    line_head = CBETA.get_linehad(@sutra_no, @lb)
     r = ''
     #if e.parent.name == 'lg' and $lg_row_open

data/lib/cbeta/p5a_to_html_for_every_edition.rb CHANGED

@@ -72,7 +72,7 @@ class CBETA::P5aToHTMLForEveryEdition
   def convert_all
     Dir.entries(@xml_root).sort.each do |c|
-      next unless c.match(/^#{CBETA.CANON}$/)
+      next unless c.match(/^#{CBETA::CANON}$/)
       convert_canon(c)
     end
   end
@@ -139,29 +139,7 @@ class CBETA::P5aToHTMLForEveryEdition
     end
   end
-  def filter_html(html, ed)
-    frag = Nokogiri::HTML.fragment(html)
-    frag.search("r").each do |node|
-      if node['w'].include? ed
-        html_only_this_edition = filter_html(node.inner_html, ed)
-        node.add_previous_sibling html_only_this_edition
-      end
-      node.remove
-    end
-    frag.to_html
-  end
-  def get_editions(doc)
-    r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
-    doc.xpath('//lem|//rdg').each do |e|
-      w = e['wit'].scan(/【.*?】/)
-      r.merge w
-    end
-    r
-  end
-  def handle_anchor(e)
+  def e_anchor(e)
     id = e['id']
     if e.has_attribute?('id')
       if id.start_with?('nkr_note_orig')
@@ -184,7 +162,7 @@ class CBETA::P5aToHTMLForEveryEdition
     ''
   end
-  def handle_app(e)
+  def e_app(e)
     r = ''
     if e['type'] == 'star'
       c = e['corresp'][1..-1]
@@ -193,14 +171,14 @@ class CBETA::P5aToHTMLForEveryEdition
     r + traverse(e)
   end
-  def handle_byline(e)
+  def e_byline(e)
     r = '<p class="byline">'
     r += line_info
     r += traverse(e)
     r + '</p>'
   end
-  def handle_cell(e)
+  def e_cell(e)
     doc = Nokogiri::XML::Document.new
     cell = doc.create_element('div')
     cell['class'] = 'bip-table-cell'
@@ -210,7 +188,7 @@ class CBETA::P5aToHTMLForEveryEdition
     to_html(cell)
   end
-  def handle_corr(e)
+  def e_corr(e)
     r = ''
     if e.parent.name == 'choice'
       sic = e.parent.at_xpath('sic')
@@ -231,7 +209,7 @@ class CBETA::P5aToHTMLForEveryEdition
     r + "<r w='【CBETA】' l='#{@lb}'><span class='cbeta'>%s</span></r>" % traverse(e)
   end
-  def handle_div(e)
+  def e_div(e)
     @div_count += 1
     n = @div_count
     if e.has_attribute? 'type'
@@ -244,11 +222,11 @@ class CBETA::P5aToHTMLForEveryEdition
     end
   end
-  def handle_figure(e)
+  def e_figure(e)
     "<p class='figure'>%s</p>" % traverse(e)
   end
-  def handle_g(e, mode)
+  def e_g(e, mode)
     # if 有 <mapping type="unicode">
     #   if 不在 Unicode Extension C, D, E 範圍裡
     #     直接採用
@@ -326,12 +304,12 @@ class CBETA::P5aToHTMLForEveryEdition
     "<a class='gaijiAnchor' href='##{gid}'>#{default}</a>"
   end
-  def handle_graphic(e)
+  def e_graphic(e)
     url = File.basename(e['url'])
     "<span imgsrc='#{url}' class='graphic'></span>"
   end
-  def handle_head(e)
+  def e_head(e)
     r = ''
     unless e['type'] == 'added'
       i = @open_divs.size
@@ -340,15 +318,15 @@ class CBETA::P5aToHTMLForEveryEdition
     r
   end
-  def handle_item(e)
+  def e_item(e)
     "<li>%s</li>\n" % traverse(e)
   end
-  def handle_juan(e)
+  def e_juan(e)
     "<p class='juan'>%s</p>" % traverse(e)
   end
-  def handle_l(e)
+  def e_l(e)
     if @lg_type == 'abnormal'
       return traverse(e)
     end
@@ -380,13 +358,13 @@ class CBETA::P5aToHTMLForEveryEdition
     r
   end
-  def handle_lb(e)
+  def e_lb(e)
     # 卍續藏有 X 跟 R 兩種 lb, 只處理 X
     return '' if e['ed'] != @series
     @char_count = 1
     @lb = e['n']
-    line_head = @sutra_no + '_p' + e['n']
+    line_head = CBETA.get_linehead(@sutra_no, e['n'])
     r = ''
     #if e.parent.name == 'lg' and $lg_row_open
     if @lg_row_open && !@in_l
@@ -408,7 +386,7 @@ class CBETA::P5aToHTMLForEveryEdition
     r
   end
-  def handle_lem(e)
+  def e_lem(e)
     r = ''
     content = traverse(e)
     wit = e['wit']
@@ -436,7 +414,7 @@ class CBETA::P5aToHTMLForEveryEdition
     r + ("<r w='#{w}' l='#{@lb}'>%s</r>" % content)
   end
-  def handle_lg(e)
+  def e_lg(e)
     r = ''
     @lg_type = e['type']
     if @lg_type == 'abnormal'
@@ -461,11 +439,11 @@ class CBETA::P5aToHTMLForEveryEdition
     r
   end
-  def handle_list(e)
+  def e_list(e)
     "<ul>%s</ul>" % traverse(e)
   end
-  def handle_milestone(e)
+  def e_milestone(e)
     r = ''
     if e['unit'] == 'juan'
@@ -484,7 +462,7 @@ class CBETA::P5aToHTMLForEveryEdition
     r
   end
-  def handle_mulu(e)
+  def e_mulu(e)
     r = ''
     if e['type'] == '品'
       @pass << false
@@ -493,49 +471,9 @@ class CBETA::P5aToHTMLForEveryEdition
     end
     r
   end
-  def handle_node(e, mode)
-    return '' if e.comment?
-    return handle_text(e, mode) if e.text?
-    return '' if PASS.include?(e.name)
-    r = case e.name
-    when 'anchor'    then handle_anchor(e)
-    when 'app'       then handle_app(e)
-    when 'byline'    then handle_byline(e)
-    when 'cell'      then handle_cell(e)
-    when 'corr'      then handle_corr(e)
-    when 'div'       then handle_div(e)
-    when 'figure'    then handle_figure(e)
-    when 'foreign'   then ''
-    when 'g'         then handle_g(e, mode)
-    when 'graphic'   then handle_graphic(e)
-    when 'head'      then handle_head(e)
-    when 'item'      then handle_item(e)
-    when 'juan'      then handle_juan(e)
-    when 'l'         then handle_l(e)
-    when 'lb'        then handle_lb(e)
-    when 'lem'       then handle_lem(e)
-    when 'lg'        then handle_lg(e)
-    when 'list'      then handle_list(e)
-    when 'mulu'      then handle_mulu(e)
-    when 'note'      then handle_note(e)
-    when 'milestone' then handle_milestone(e)
-    when 'p'         then handle_p(e)
-    when 'rdg'       then handle_rdg(e)
-    when 'reg'       then ''
-    when 'row'       then handle_row(e)
-    when 'sic'       then handle_sic(e)
-    when 'sg'        then handle_sg(e)
-    when 't'         then handle_t(e)
-    when 'tt'        then handle_tt(e)
-    when 'table'     then handle_table(e)
-    when 'unclear'   then handle_unclear(e)
-    else traverse(e)
-    end
-    r
-  end
-  def handle_note(e)
+  def e_note(e)
     n = e['n']
     if e.has_attribute?('type')
       t = e['type']
@@ -580,36 +518,8 @@ class CBETA::P5aToHTMLForEveryEdition
     end
   end
-  def handle_note_orig(e, anchor_type=nil)
-    n = e['n']
-    @pass << false
-    s = traverse(e)
-    @pass.pop
-    @notes_orig[@juan][n] = s
-    @notes_mod[@juan][n] = s
-    c = @series
-    # 如果 CBETA 沒有修訂，就跟底本的註一樣
-    # 但是 CBETA 修訂後的編號，有時會加上 a, b
-    # T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
-    c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
-    label = case anchor_type
-    when 'biao' then " data-label='標#{n[-2..-1]}'"
-    when 'ke'   then " data-label='科#{n[-2..-1]}'"
-    else ''
-    end
-    s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
-    r = "<r w='#{@orig}'>#{s}</r>"
-    unless @mod_notes.include?(n)
-      r += "<r w='【CBETA】'>#{s}</r>"
-    end
-    r
-  end
-  def handle_p(e)
+  def e_p(e)
     if e.key? 'type'
       r = "<p class='%s'>" % e['type']
     else
@@ -620,24 +530,24 @@ class CBETA::P5aToHTMLForEveryEdition
     r + '</p>'
   end
-  def handle_rdg(e)
+  def e_rdg(e)
     r = traverse(e)
     "<r w='#{e['wit']}' l='#{@lb}' w='#{@char_count}'>#{r}</r>"
   end
-  def handle_row(e)
+  def e_row(e)
     "<div class='bip-table-row'>" + traverse(e) + "</div>"
   end
-  def handle_sg(e)
+  def e_sg(e)
     '(' + traverse(e) + ')'
   end
-  def handle_sic(e)
+  def e_sic(e)
     "<r w='#{@orig}' l='#{@lb}'>" + traverse(e) + "</r>"
   end
-  def handle_t(e)
+  def e_t(e)
     if e.has_attribute? 'place'
       return '' if e['place'].include? 'foot'
     end
@@ -659,15 +569,112 @@ class CBETA::P5aToHTMLForEveryEdition
     end
   end
-  def handle_tt(e)
+  def e_tt(e)
     @tt_type = e['type']
     traverse(e)
   end
-  def handle_table(e)
+  def e_table(e)
     "<div class='bip-table'>" + traverse(e) + "</div>"
   end
+  def e_unclear(e)
+    '▆'
+  end
+  def filter_html(html, ed)
+    progress "filter html ed: #{ed}"
+    frag = Nokogiri::HTML.fragment(html)
+    frag.search("r").each do |node|
+      if node['w'].include? ed
+        html_only_this_edition = filter_html(node.inner_html, ed)
+        node.add_previous_sibling html_only_this_edition
+      end
+      node.remove
+    end
+    frag.to_html
+  end
+  def get_editions(doc)
+    r = Set.new [@orig, "【CBETA】"] # 至少有底本及 CBETA 兩個版本
+    doc.xpath('//lem|//rdg').each do |e|
+      w = e['wit'].scan(/【.*?】/)
+      r.merge w
+    end
+    r
+  end
+  def handle_node(e, mode)
+    return '' if e.comment?
+    return handle_text(e, mode) if e.text?
+    return '' if PASS.include?(e.name)
+    r = case e.name
+    when 'anchor'    then e_anchor(e)
+    when 'app'       then e_app(e)
+    when 'byline'    then e_byline(e)
+    when 'cell'      then e_cell(e)
+    when 'corr'      then e_corr(e)
+    when 'div'       then e_div(e)
+    when 'figure'    then e_figure(e)
+    when 'foreign'   then ''
+    when 'g'         then e_g(e, mode)
+    when 'graphic'   then e_graphic(e)
+    when 'head'      then e_head(e)
+    when 'item'      then e_item(e)
+    when 'juan'      then e_juan(e)
+    when 'l'         then e_l(e)
+    when 'lb'        then e_lb(e)
+    when 'lem'       then e_lem(e)
+    when 'lg'        then e_lg(e)
+    when 'list'      then e_list(e)
+    when 'mulu'      then e_mulu(e)
+    when 'note'      then e_note(e)
+    when 'milestone' then e_milestone(e)
+    when 'p'         then e_p(e)
+    when 'rdg'       then e_rdg(e)
+    when 'reg'       then ''
+    when 'row'       then e_row(e)
+    when 'sic'       then e_sic(e)
+    when 'sg'        then e_sg(e)
+    when 't'         then e_t(e)
+    when 'tt'        then e_tt(e)
+    when 'table'     then e_table(e)
+    when 'unclear'   then e_unclear(e)
+    else traverse(e)
+    end
+    r
+  end
+  def handle_note_orig(e, anchor_type=nil)
+    n = e['n']
+    @pass << false
+    s = traverse(e)
+    @pass.pop
+    @notes_orig[@juan][n] = s
+    @notes_mod[@juan][n] = s
+    c = @series
+    # 如果 CBETA 沒有修訂，就跟底本的註一樣
+    # 但是 CBETA 修訂後的編號，有時會加上 a, b
+    # T01n0026, p. 506b07, 大正藏校勘 0506007, CBETA 拆為 0506007a, 0506007b
+    c += " cb" unless @mod_notes.include?(n) or @mod_notes.include?(n+'a')
+    label = case anchor_type
+    when 'biao' then " data-label='標#{n[-2..-1]}'"
+    when 'ke'   then " data-label='科#{n[-2..-1]}'"
+    else ''
+    end
+    s = "<a class='noteAnchor #{c}' href='#n#{n}'#{label}></a>"
+    r = "<r w='#{@orig}'>#{s}</r>"
+    unless @mod_notes.include?(n)
+      r += "<r w='【CBETA】'>#{s}</r>"
+    end
+    r
+  end
   def handle_text(e, mode)
     s = e.content().chomp
     return '' if s.empty?
@@ -689,11 +696,8 @@ class CBETA::P5aToHTMLForEveryEdition
     r
   end
-  def handle_unclear(e)
-    '▆'
-  end
   def html_back(juan_no, ed)
+    progress "html back, juan: #{juan_no}, ed: #{ed}"
     r = ''
     case ed
     when '【CBETA】'
@@ -843,6 +847,11 @@ class CBETA::P5aToHTMLForEveryEdition
     text = traverse(body)
     text
   end
+  def progress(msg)
+    puts Time.now.strftime("%Y-%m-%d %H:%M:%S")
+    puts msg
+  end
   def to_html(e)
     e.to_xml(encoding: 'UTF-8', :save_with => Nokogiri::XML::Node::SaveOptions::AS_XML)

metadata CHANGED

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: cbeta
 version: !ruby/object:Gem::Version
-  version: 2.2.22
+  version: 2.2.26
 platform: ruby
 authors:
 - Ray Chou
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2016-12-03 00:00:00.000000000 Z
+date: 2016-12-12 00:00:00.000000000 Z
 dependencies: []
 description: Ruby gem for use Chinese Buddhist Text resources made by CBETA (http://www.cbeta.org).
 email: zhoubx@gmail.com