RubyGems - isodoc - Versions diffs - 0.4.5 → 0.5.5 - Mend

isodoc 0.4.5 → 0.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (31) hide show

checksums.yaml +4 -4
data/bin/rspec +18 -0
data/isodoc.gemspec +1 -1
data/lib/isodoc.rb +34 -5
data/lib/isodoc/blocks.rb +62 -50
data/lib/isodoc/cleanup.rb +34 -10
data/lib/isodoc/html.rb +31 -16
data/lib/isodoc/i18n-en.yaml +72 -0
data/lib/isodoc/i18n-fr.yaml +65 -0
data/lib/isodoc/i18n-zh-Hans.yaml +64 -0
data/lib/isodoc/i18n.rb +90 -0
data/lib/isodoc/inline.rb +25 -18
data/lib/isodoc/iso2wordhtml.rb +30 -7
data/lib/isodoc/lists.rb +29 -9
data/lib/isodoc/metadata.rb +54 -38
data/lib/isodoc/notes.rb +32 -32
data/lib/isodoc/postprocessing.rb +65 -46
data/lib/isodoc/references.rb +63 -29
data/lib/isodoc/section.rb +94 -44
data/lib/isodoc/table.rb +19 -19
data/lib/isodoc/terms.rb +5 -6
data/lib/isodoc/utils.rb +48 -5
data/lib/isodoc/version.rb +1 -1
data/lib/isodoc/xref_gen.rb +87 -75
data/spec/isodoc/blocks_spec.rb +618 -0
data/spec/isodoc/lists_spec.rb +227 -0
data/spec/isodoc/section_spec.rb +419 -0
data/spec/isodoc/table_spec.rb +135 -0
data/spec/isodoc/xref_spec.rb +1073 -0
data/spec/spec_helper.rb +26 -0
metadata +17 -6

checksums.yaml CHANGED

@@ -1,7 +1,7 @@
 ---
 SHA1:
-  metadata.gz: 92ad102a0e0bc53916aaec5d7a4f23d71828f3d4
-  data.tar.gz: 7dbbd1969e0e1714c9074ac455a335939f634335
+  metadata.gz: 32b644d32aaf9d2170de736e389cec4a7b0b1a4d
+  data.tar.gz: afc5f22b859a533e4244758b80a3f4acc9658924
 SHA512:
-  metadata.gz: 1b570dec93a170716366cf0d433d20dc614026ebe2224b02e659216fccf006b1680a1126e763a953f8d61c8154a0c9b133dfd989b05e25e675b5b3aa1f10d980
-  data.tar.gz: b9499b701f2eaca4a27d71ff85ff27926bd58e43a2b255dd429180eaf8b8c72704f8ba3768b2eb894f41128136f887308e6f2a29e89c000b7f02cf2dcb391e51
+  metadata.gz: 22698380e197c7f94fa0e690dfa181bc2312aa130aeab631fa6cd8ca515c1cd6c312b3f37b56823eef43d079c24ee1c2b539173c4dcd5f13c156dec975fdfce8
+  data.tar.gz: 6b95287839fe12f19e777cfdf62bc06b317247e79b1f6cf084b4ab6b938b561d763aebf9f6ec7bcd0ce280a3cea19ded061cd2fbb8e086c5591221ea9c768a4e

data/bin/rspec ADDED

@@ -0,0 +1,18 @@
+#!/usr/bin/env ruby
+# This file was generated by Bundler.
+#
+# The application 'rspec' is installed as part of a gem, and
+# this file is here to facilitate running it.
+#
+require "pathname"
+ENV["BUNDLE_GEMFILE"] ||= File.expand_path(
+  "../../Gemfile", Pathname.new(__FILE__).realpath
+)
+require "rubygems"
+require "bundler/setup"
+load Gem.bin_path("rspec-core", "rspec")

data/isodoc.gemspec CHANGED

@@ -32,7 +32,7 @@ Gem::Specification.new do |spec|
   spec.add_dependency "htmlentities", "~> 4.3.4"
   spec.add_dependency "image_size"
   spec.add_dependency "mime-types"
-  spec.add_dependency "nokogiri", "= 1.8.1"
+  spec.add_dependency "nokogiri"
   spec.add_dependency "ruby-xslt"
   spec.add_dependency "thread_safe"
   spec.add_dependency "uuidtools"

data/lib/isodoc.rb CHANGED

@@ -23,6 +23,7 @@ require_relative "isodoc/inline"
 require_relative "isodoc/notes"
 require_relative "isodoc/xref_gen"
 require_relative "isodoc/html"
+require_relative "isodoc/i18n"
 require "pp"
 module IsoDoc
@@ -36,6 +37,9 @@ module IsoDoc
     # wordcoverpage: Cover page for Word
     # htmlintropage: Introductory page for HTML
     # wordintropage: Introductory page for Word
+    # i18nyaml: YAML file for internationalisation of text
+    # ulstyle: list style in Word CSS for unordered lists
+    # olstyle: list style in Word CSS for ordered lists
     def initialize(options)
       @htmlstylesheet = options[:htmlstylesheet]
       @wordstylesheet = options[:wordstylesheet]
@@ -45,6 +49,9 @@ module IsoDoc
       @wordcoverpage = options[:wordcoverpage]
       @htmlintropage = options[:htmlintropage]
       @wordintropage = options[:wordintropage]
+      @i18nyaml = options[:i18nyaml]
+      @ulstyle = options[:ulstyle]
+      @olstyle = options[:olstyle]
       @termdomain = ""
       @termexample = false
       @note = false
@@ -59,19 +66,41 @@ module IsoDoc
       @in_table = false
       @in_figure = false
       @seen_footnote = Set.new
+      @c = HTMLEntities.new
+      @openmathdelim = "`"
+      @closemathdelim = "`"
+      @lang = "en"
+      @script = "Latn"
     end
-    def convert(filename)
-      docxml = Nokogiri::XML(File.read(filename))
-      filename, dir = init_file(filename)
-      docxml.root.default_namespace = ""
-      result = noko do |xml|
+    def convert1(docxml, filename, dir)
+      noko do |xml|
         xml.html do |html|
           html.parent.add_namespace("epub", "http://www.idpf.org/2007/ops")
           html_header(html, docxml, filename, dir)
           make_body(html, docxml)
         end
       end.join("\n")
+    end
+    def convert_init(file, filename, debug)
+      docxml = Nokogiri::XML(file)
+      filename, dir = init_file(filename, debug)
+      docxml.root.default_namespace = ""
+      i18n_init(docxml&.at(ns("//bibdata/language"))&.text || "en",
+                docxml&.at(ns("//bibdata/script"))&.text || "Latn")
+      [docxml, filename, dir]
+    end
+    def convert(filename, debug = false)
+      convert_file(File.read(filename), filename, debug)
+    end
+    def convert_file(file, filename, debug)
+      @openmathdelim, @closemathdelim = extract_delims(file)
+      docxml, filename, dir = convert_init(file, filename, debug)
+      result = convert1(docxml, filename, dir)
+      return result if debug
       postprocess(result, filename, dir)
     end
   end

data/lib/isodoc/blocks.rb CHANGED

@@ -1,30 +1,11 @@
 module IsoDoc
   class Convert
-    #attr_accessor :termdomain, :termexample, :sourcecode, :note
-    def set_termdomain(termdomain)
-      @termdomain = termdomain
-    end
-    def get_termexample
-      @termexample
-    end
-    def set_termexample(value)
-      @termexample = value
-    end
-    def in_sourcecode
-      @sourcecode
-    end
-    def is_note
-      @note
-    end
+    @annotation = false
     def note_label(node)
-      n = get_anchors()[node["id"]]
-      return "NOTE" if n.nil?
-      n[:label]
+      n = get_anchors[node["id"]]
+      return @note_lbl if n.nil? || n[:label].empty?
+      l10n("#{@note_lbl} #{n[:label]}")
     end
     def note_p_parse(node, div)
@@ -36,17 +17,21 @@ module IsoDoc
       node.element_children[1..-1].each { |n| parse(n, div) }
     end
+    def note_parse1(node, div)
+      div.p **{ class: "Note" } do |p|
+        p << note_label(node)
+        insert_tab(p, 1)
+      end
+      node.children.each { |n| parse(n, div) }
+    end
     def note_parse(node, out)
       @note = true
       out.div **{ id: node["id"], class: "Note" } do |div|
         if node.first_element_child.name == "p"
           note_p_parse(node, div)
         else
-          div.p **{ class: "Note" } do |p|
-            p << note_label(node)
-            insert_tab(p, 1)
-          end
-          node.children.each { |n| parse(n, div) }
+          note_parse1(node, div)
         end
       end
       @note = false
@@ -55,15 +40,15 @@ module IsoDoc
     def figure_name_parse(node, div, name)
       div.p **{ class: "FigureTitle", align: "center" } do |p|
         p.b do |b|
-          b << "#{get_anchors()[node['id']][:label]}&nbsp;&mdash; "
-          b << name.text
+          b << l10n("#{@figure_lbl} #{get_anchors[node['id']][:label]}")
+          b << "&nbsp;&mdash; #{name.text}" if name
         end
       end
     end
     def figure_key(out)
-      out.p do |p|
-        p.b { |b| b << "Key" }
+      out.p do |p|
+        p.b { |b| b << @key_lbl }
       end
     end
@@ -80,17 +65,40 @@ module IsoDoc
       @in_figure = false
     end
-        def example_parse(node, out)
-      name = node.at(ns("./name"))
-      out.div **attr_code(id: node["id"], class: "figure") do |div|
+    def example_label(node)
+      n = get_anchors[node["id"]]
+      return @example_lbl if n.nil? || n[:label].empty?
+      l10n("#{@example_lbl} #{n[:label]}")
+    end
+    EXAMPLE_TBL_ATTR =
+      { width: "110pt", valign: "top",
+        style: "width:82.8pt;padding:.75pt .75pt .75pt .75pt" }.freeze
+    # used if we are boxing examples
+    def example_div_parse(node, out)
+      out.div **attr_code(id: node["id"], class: "example") do |div|
+        out.p { |p| p << example_label(node) }
         node.children.each do |n|
-          parse(n, div) unless n.name == "name"
+          parse(n, div)
         end
-        figure_name_parse(node, div, name) if name
       end
     end
-    def sourcecode_name_parse(node, div, name)
+    def example_parse(node, out)
+      out.table **attr_code(id: node["id"], class: "example") do |t|
+        t.tr do |tr|
+          tr.td **EXAMPLE_TBL_ATTR do |td|
+            td << example_label(node)
+          end
+          tr.td **{ valign: "top" } do |td|
+            node.children.each { |n| parse(n, td) }
+          end
+        end
+      end
+    end
+    def sourcecode_name_parse(_node, div, name)
       div.p **{ class: "FigureTitle", align: "center" } do |p|
         p.b do |b|
           b << name.text
@@ -111,9 +119,13 @@ module IsoDoc
     end
     def annotation_parse(node, out)
-      out.p **{ class: "Sourcecode" } do |li|
-        node.children.each { |n| parse(n, li) }
+      @sourcecode = false
+      @annotation = true
+      out.span **{ class: "zzMoveToFollowing" } do |s|
+        s  << "&lt;#{node.at(ns("//callout[@target='#{node['id']}']")).text}&gt; "
       end
+        node.children.each { |n| parse(n, out) }
+      @annotation = false
     end
     def admonition_parse(node, out)
@@ -127,29 +139,29 @@ module IsoDoc
     end
     def formula_where(dl, out)
-             out.p { |p| p << "where" }
-        parse(dl, out)
+      return unless dl
+      out.p { |p| p << @where_lbl }
+      parse(dl, out)
     end
     def formula_parse(node, out)
-      dl = node.at(ns("./dl"))
       out.div **attr_code(id: node["id"], class: "formula") do |div|
         parse(node.at(ns("./stem")), out)
         insert_tab(div, 1)
-        div << "(#{get_anchors()[node['id']][:label]})"
+        div << "(#{get_anchors[node['id']][:label]})"
       end
-        formula_where(dl, out) if dl
+      formula_where(node.at(ns("./dl")), out)
     end
     def para_attrs(node)
       classtype = nil
       classtype = "Note" if @note
-      # classtype = "MsoFootnoteText" if in_footnote
       classtype = "MsoCommentText" if in_comment
+      classtype = "Sourcecode" if @annotation
       attrs = { class: classtype, id: node["id"] }
       unless node["align"].nil?
         attrs[:align] = node["align"] unless node["align"] == "justify"
-        attrs[:style] = "text-align:#{node["align"]}"
+        attrs[:style] = "text-align:#{node['align']}"
       end
       attrs
     end
@@ -169,7 +181,7 @@ module IsoDoc
       source = node.at(ns("./source"))
       out.p **{ class: "QuoteAttribution" } do |p|
         p << "&mdash; #{author.text}, " if author
-        eref_parse(source, p)
+        eref_parse(source, p) if source
       end
     end
@@ -177,8 +189,8 @@ module IsoDoc
       attrs = para_attrs(node)
       attrs[:class] = "Quote"
       out.div **attr_code(attrs) do |p|
-        node.children.each do
-          |n| parse(n, p) unless ["author", "source"].include? n.name
+        node.children.each do |n|
+          parse(n, p) unless ["author", "source"].include? n.name
         end
         quote_attribution(node, out)
       end

data/lib/isodoc/cleanup.rb CHANGED

@@ -11,8 +11,8 @@ module IsoDoc
       inline_header_cleanup(docxml)
       figure_cleanup(docxml)
       table_cleanup(docxml)
+      symbols_cleanup(docxml)
       admonition_cleanup(docxml)
-      docxml
     end
     def admonition_cleanup(docxml)
@@ -21,12 +21,13 @@ module IsoDoc
         n = title.next_element
         n&.children&.first&.add_previous_sibling(title.text + "&mdash;")
       end
+      docxml
     end
     def figure_get_or_make_dl(t)
       dl = t.at(".//dl")
       if dl.nil?
-        t.add_child("<p><b>Key</b></p><dl></dl>")
+        t.add_child("<p><b>#{@key_lbl}</b></p><dl></dl>")
         dl = t.at(".//dl")
       end
       dl
@@ -62,13 +63,12 @@ module IsoDoc
     def inline_header_cleanup(docxml)
       docxml.xpath('//span[@class="zzMoveToFollowing"]').each do |x|
+        x.delete("class")
         n = x.next_element
         if n.nil?
-          html = Nokogiri::XML.fragment("<p></p>")
-          html.parent = x.parent
-          x.parent = html
+          x.name = "p"
         else
-          n.children.first.add_previous_sibling(x.remove)
+          n.children.first.previous = x.remove
         end
       end
     end
@@ -86,10 +86,8 @@ module IsoDoc
       n&.children&.first&.add_previous_sibling(fn.remove)
     end
-    TABLE_WITH_FOOTNOTES = "//table[descendant::aside]".freeze
     def table_footnote_cleanup(docxml)
-      docxml.xpath(TABLE_WITH_FOOTNOTES).each do |t|
+      docxml.xpath("//table[descendant::aside]").each do |t|
         t.xpath(".//aside").each do |a|
           merge_fnref_into_fn_text(a)
           a.name = "div"
@@ -139,12 +137,38 @@ module IsoDoc
                    "[ancestor::*[@class = 'Note']]").each do |p|
         p["class"] = "Note"
       end
     end
     def table_cleanup(docxml)
       table_footnote_cleanup(docxml)
       table_note_cleanup(docxml)
     end
+    # We assume AsciiMath. Indices sort after letter but before any following
+    # letter (x, x_m, x_1, xa); we use colon to force that sort order.
+    # Numbers sort *after* letters; we use thorn to force that sort order.
+    def symbol_key(x)
+      HTMLEntities.new.decode(x.text).gsub(/_/, ":").gsub(/`/, "").
+        gsub(/[0-9]+/, "þ\\1")
+    end
+    def extract_symbols_list(dl)
+      dl_out = []
+      dl.xpath("./dt | ./dd").each do |dtd|
+        if dtd.name == "dt"
+          dl_out << { dt: dtd.remove, key: symbol_key(dtd) }
+        else
+          dl_out.last[:dd] = dtd.remove
+        end
+      end
+      dl_out
+    end
+    def symbols_cleanup(docxml)
+      dl = docxml.at("//div[@class = 'Symbols']/dl") || return
+      dl_out = extract_symbols_list(dl)
+      dl_out.sort! { |a, b| a[:key] <=> b[:key] }
+      dl.replace(dl_out.map { |d| d[:dt].to_s + d[:dd].to_s }.join("\n"))
+    end
   end
 end

data/lib/isodoc/html.rb CHANGED

@@ -1,8 +1,6 @@
 module IsoDoc
   class Convert
     def toHTML(result, filename)
-      # result = html_cleanup(Nokogiri::HTML(result)).to_xml
       result = from_xhtml(html_cleanup(to_xhtml(result)))
       result = populate_template(result, :html)
       File.open("#{filename}.html", "w") do |f|
@@ -11,19 +9,37 @@ module IsoDoc
     end
     def html_cleanup(x)
-      footnote_backlinks(move_images(html_footnote_filter(htmlPreface(htmlstyle(x)))))
+      footnote_backlinks(
+        move_images(html_footnote_filter(html_preface(htmlstyle(x))))
+      )
     end
-    def htmlPreface(docxml)
+    MATHJAX_ADDR =
+      "https://cdnjs.cloudflare.com/ajax/libs/mathjax/2.7.1/MathJax.js".freeze
+    MATHJAX = <<~"MATHJAX".freeze
+      <script type="text/x-mathjax-config">
+        MathJax.Hub.Config({
+          asciimath2jax: {
+            delimiters: [['OPEN', 'CLOSE']]
+          }
+       });
+      </script>
+      <script src="#{MATHJAX_ADDR}?config=AM_HTMLorMML"></script>
+    MATHJAX
+    def mathjax(open, close)
+      MATHJAX.gsub("OPEN", open).gsub("CLOSE", close)
+    end
+    def html_preface(docxml)
       cover = Nokogiri::HTML(File.read(@htmlcoverpage, encoding: "UTF-8"))
       d = docxml.at('//div[@class="WordSection1"]')
-      d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
+      d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
       cover = Nokogiri::HTML(File.read(@htmlintropage, encoding: "UTF-8"))
       d = docxml.at('//div[@class="WordSection2"]')
-      d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
-      body = docxml.at("//*[local-name() = 'body']")
-      body << '<script src="https://cdn.mathjax.org/mathjax/latest/'\
-        'MathJax.js?config=AM_HTMLorMML"></script>'
+      d.children.first.add_previous_sibling cover.to_xml(encoding: "US-ASCII")
+      docxml.at("//*[local-name() = 'body']") << mathjax(@openmathdelim,
+                                                         @closemathdelim)
       docxml
     end
@@ -45,8 +61,7 @@ module IsoDoc
       docxml
     end
-    def update_footnote_filter(docxml, x, i, seen)
-      fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || return
+    def update_footnote_filter(fn, x, i, seen)
       if seen[fn.text]
         x.at("./sup").content = seen[fn.text][:num].to_s
         fn.remove unless x["href"] == seen[fn.text][:href]
@@ -63,7 +78,8 @@ module IsoDoc
       seen = {}
       i = 1
       docxml.xpath('//a[@epub:type = "footnote"]').each do |x|
-        i, seen = update_footnote_filter(docxml, x, i, seen)
+        fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
+        i, seen = update_footnote_filter(fn, x, i, seen)
       end
       docxml
     end
@@ -73,22 +89,21 @@ module IsoDoc
       docxml.xpath('//a[@epub:type = "footnote"]').each_with_index do |x, i|
         next if seen[x["href"]]
         seen[x["href"]] = true
-        sup = x.at("./sup").text
         fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
         x["id"] || x["id"] = "_footnote#{i + 1}"
-        fn.elements.first.children.first.
-          add_previous_sibling("<a href='##{x['id']}'>#{sup}) </a>")
+        fn.elements.first.children.first.previous =
+          "<a href='##{x['id']}'>#{x.at('./sup').text}) </a>"
       end
       docxml
     end
+    # presupposes that the image source is local
     def move_images(docxml)
       system "rm -r _images; mkdir _images"
       docxml.xpath("//*[local-name() = 'img']").each do |i|
         matched = /\.(?<suffix>\S+)$/.match i["src"]
         uuid = UUIDTools::UUID.random_create.to_s
         new_full_filename = File.join("_images", "#{uuid}.#{matched[:suffix]}")
-        # presupposes that the image source is local
         system "cp #{i['src']} #{new_full_filename}"
         i["src"] = new_full_filename
         i["width"], i["height"] = Html2Doc.image_resize(i, 800, 1200)