RubyGems - isodoc - Versions diffs - 1.7.5 → 1.8.0 - Mend

isodoc 1.7.5 → 1.8.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (45) hide show

checksums.yaml +4 -4
data/isodoc.gemspec +8 -7
data/lib/isodoc/class_utils.rb +25 -2
data/lib/isodoc/convert.rb +2 -0
data/lib/isodoc/function/cleanup.rb +4 -0
data/lib/isodoc/function/to_word_html.rb +2 -1
data/lib/isodoc/function/utils.rb +34 -14
data/lib/isodoc/html_function/comments.rb +107 -111
data/lib/isodoc/html_function/footnotes.rb +68 -67
data/lib/isodoc/html_function/html.rb +113 -103
data/lib/isodoc/presentation_function/block.rb +1 -69
data/lib/isodoc/presentation_function/image.rb +112 -0
data/lib/isodoc/presentation_function/inline.rb +16 -78
data/lib/isodoc/presentation_function/terms.rb +179 -0
data/lib/isodoc/presentation_xml_convert.rb +11 -4
data/lib/isodoc/version.rb +1 -1
data/lib/isodoc/word_function/body.rb +176 -174
data/lib/isodoc/word_function/comments.rb +117 -112
data/lib/isodoc/word_function/footnotes.rb +88 -86
data/lib/isodoc/word_function/inline.rb +42 -67
data/lib/isodoc/word_function/postprocess.rb +184 -176
data/lib/isodoc/word_function/postprocess_cover.rb +121 -110
data/lib/isodoc/xref/xref_gen.rb +153 -150
data/lib/isodoc/xref/xref_sect_gen.rb +134 -129
data/lib/isodoc/xslfo_convert.rb +11 -7
data/lib/isodoc-yaml/i18n-ar.yaml +22 -0
data/lib/isodoc-yaml/i18n-de.yaml +20 -0
data/lib/isodoc-yaml/i18n-en.yaml +20 -0
data/lib/isodoc-yaml/i18n-es.yaml +20 -0
data/lib/isodoc-yaml/i18n-fr.yaml +20 -0
data/lib/isodoc-yaml/i18n-ru.yaml +21 -1
data/lib/isodoc-yaml/i18n-zh-Hans.yaml +21 -0
data/lib/metanorma/output/xslfo.rb +4 -11
data/spec/assets/i18n.yaml +3 -1
data/spec/assets/odf.svg +1 -4
data/spec/isodoc/blocks_spec.rb +229 -157
data/spec/isodoc/i18n_spec.rb +8 -8
data/spec/isodoc/inline_spec.rb +285 -32
data/spec/isodoc/postproc_spec.rb +38 -0
data/spec/isodoc/presentation_xml_spec.rb +60 -0
data/spec/isodoc/section_spec.rb +11 -10
data/spec/isodoc/terms_spec.rb +354 -34
data/spec/isodoc/xref_spec.rb +4 -4
data/spec/isodoc/xslfo_convert_spec.rb +34 -9
metadata +49 -33

data/lib/isodoc/word_function/postprocess.rb CHANGED Viewed

@@ -1,229 +1,237 @@
 require "fileutils"
 require_relative "./postprocess_cover"
-module IsoDoc::WordFunction
-  module Postprocess
-    # add namespaces for Word fragments
-    WORD_NOKOHEAD = <<~HERE.freeze
-      <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-      <html xmlns="http://www.w3.org/1999/xhtml"
-      xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"
-      xmlns:w="urn:schemas-microsoft-com:office:word"
-      xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
-      <head> <title></title> <meta charset="UTF-8" /> </head>
-      <body> </body> </html>
-    HERE
-    def to_word_xhtml_fragment(xml)
-      doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD)
-      ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
-    end
+module IsoDoc
+  module WordFunction
+    module Postprocess
+      # add namespaces for Word fragments
+      WORD_NOKOHEAD = <<~HERE.freeze
+        <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+        <html xmlns="http://www.w3.org/1999/xhtml"
+        xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"
+        xmlns:w="urn:schemas-microsoft-com:office:word"
+        xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
+        <head> <title></title> <meta charset="UTF-8" /> </head>
+        <body> </body> </html>
+      HERE
+      def to_word_xhtml_fragment(xml)
+        doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD)
+        ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
+      end
-    def table_note_cleanup(docxml)
-      super
-      # preempt html2doc putting MsoNormal there
-      docxml.xpath("//p[not(self::*[@class])][ancestor::*[@class = 'Note']]")
-        .each do |p|
-        p["class"] = "Note"
+      def table_note_cleanup(docxml)
+        super
+        # preempt html2doc putting MsoNormal there
+        docxml.xpath("//p[not(self::*[@class])][ancestor::*[@class = 'Note']]")
+          .each do |p|
+          p["class"] = "Note"
+        end
       end
-    end
-    def postprocess(result, filename, dir)
-      filename = filename.sub(/\.doc$/, "")
-      header = generate_header(filename, dir)
-      result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
-      toWord(result, filename, dir, header)
-      @files_to_delete.each { |f| FileUtils.rm_f f }
-    end
+      def postprocess(result, filename, dir)
+        filename = filename.sub(/\.doc$/, "")
+        header = generate_header(filename, dir)
+        result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
+        toWord(result, filename, dir, header)
+        @files_to_delete.each { |f| FileUtils.rm_f f }
+      end
-    def toWord(result, filename, dir, header)
-      result = from_xhtml(word_cleanup(to_xhtml(result)))
-      @wordstylesheet = wordstylesheet_update
-      Html2Doc.process(
-        result,
-        filename: filename,
-        stylesheet: @wordstylesheet&.path,
-        header_file: header&.path, dir: dir,
-        asciimathdelims: [@openmathdelim, @closemathdelim],
-        liststyles: { ul: @ulstyle, ol: @olstyle }
-      )
-      header&.unlink
-      @wordstylesheet.unlink if @wordstylesheet.is_a?(Tempfile)
-    end
+      def toWord(result, filename, dir, header)
+        result = from_xhtml(word_cleanup(to_xhtml(result)))
+        @wordstylesheet = wordstylesheet_update
+        Html2Doc.process(
+          result,
+          filename: filename,
+          imagedir: @localdir,
+          stylesheet: @wordstylesheet&.path,
+          header_file: header&.path, dir: dir,
+          asciimathdelims: [@openmathdelim, @closemathdelim],
+          liststyles: { ul: @ulstyle, ol: @olstyle }
+        )
+        header&.unlink
+        @wordstylesheet.unlink if @wordstylesheet.is_a?(Tempfile)
+      end
-    def wordstylesheet_update
-      return if @wordstylesheet.nil?
+      def wordstylesheet_update
+        return if @wordstylesheet.nil?
-      f = File.open(@wordstylesheet.path, "a")
-      @landscapestyle.empty? or f.write(@landscapestyle)
-      if @wordstylesheet_override && @wordstylesheet
-        f.write(@wordstylesheet_override.read)
-        @wordstylesheet_override.close
-      elsif @wordstylesheet_override && !@wordstylesheet
-        @wordstylesheet = @wordstylesheet_override
+        f = File.open(@wordstylesheet.path, "a")
+        @landscapestyle.empty? or f.write(@landscapestyle)
+        if @wordstylesheet_override && @wordstylesheet
+          f.write(@wordstylesheet_override.read)
+          @wordstylesheet_override.close
+        elsif @wordstylesheet_override && !@wordstylesheet
+          @wordstylesheet = @wordstylesheet_override
+        end
+        f.close
+        @wordstylesheet
       end
-      f.close
-      @wordstylesheet
-    end
-    def word_admonition_images(docxml)
-      docxml.xpath("//div[@class = 'Admonition']//img").each do |i|
-        i["width"], i["height"] =
-          Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
+      def word_admonition_images(docxml)
+        docxml.xpath("//div[@class = 'Admonition']//img").each do |i|
+          i["width"], i["height"] =
+            Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
+        end
       end
-    end
-    def word_cleanup(docxml)
-      word_annex_cleanup(docxml)
-      word_preface(docxml)
-      word_nested_tables(docxml)
-      word_colgroup(docxml)
-      word_table_align(docxml)
-      word_table_separator(docxml)
-      word_admonition_images(docxml)
-      word_list_continuations(docxml)
-      word_example_cleanup(docxml)
-      word_pseudocode_cleanup(docxml)
-      word_image_caption(docxml)
-      word_section_breaks(docxml)
-      authority_cleanup(docxml)
-      word_footnote_format(docxml)
-      docxml
-    end
+      def word_cleanup(docxml)
+        word_annex_cleanup(docxml)
+        word_preface(docxml)
+        word_nested_tables(docxml)
+        word_colgroup(docxml)
+        word_table_align(docxml)
+        word_table_separator(docxml)
+        word_admonition_images(docxml)
+        word_list_continuations(docxml)
+        word_example_cleanup(docxml)
+        word_pseudocode_cleanup(docxml)
+        word_image_caption(docxml)
+        word_section_breaks(docxml)
+        authority_cleanup(docxml)
+        word_footnote_format(docxml)
+        docxml
+      end
-    def word_colgroup(docxml)
-      cells2d = {}
-      docxml.xpath("//table[colgroup]").each do |t|
-        w = colgroup_widths(t)
-        t.xpath(".//tr").each_with_index { |_tr, r| cells2d[r] = {} }
-        t.xpath(".//tr").each_with_index do |tr, r|
-          tr.xpath("./td | ./th").each_with_index do |td, _i|
-            x = 0
-            rs = td&.attr("rowspan")&.to_i || 1
-            cs = td&.attr("colspan")&.to_i || 1
-            while cells2d[r][x]
-              x += 1
-            end
-            (r..(r + rs - 1)).each do |y2|
-              (x..(x + cs - 1)).each do |x2|
-                cells2d[y2][x2] = 1
+      def word_colgroup(docxml)
+        cells2d = {}
+        docxml.xpath("//table[colgroup]").each do |t|
+          w = colgroup_widths(t)
+          t.xpath(".//tr").each_with_index { |_tr, r| cells2d[r] = {} }
+          t.xpath(".//tr").each_with_index do |tr, r|
+            tr.xpath("./td | ./th").each_with_index do |td, _i|
+              x = 0
+              rs = td&.attr("rowspan")&.to_i || 1
+              cs = td&.attr("colspan")&.to_i || 1
+              while cells2d[r][x]
+                x += 1
               end
+              (r..(r + rs - 1)).each do |y2|
+                (x..(x + cs - 1)).each do |x2|
+                  cells2d[y2][x2] = 1
+                end
+              end
+              width = (x..(x + cs - 1)).each_with_object({ width: 0 }) do |z, m|
+                m[:width] += w[z]
+              end
+              td["width"] = "#{width[:width]}%"
+              x += cs
             end
-            width = (x..(x + cs - 1)).each_with_object({ width: 0 }) do |z, m|
-              m[:width] += w[z]
-            end
-            td["width"] = "#{width[:width]}%"
-            x += cs
           end
         end
       end
-    end
-    # assume percentages
-    def colgroup_widths(table)
-      table.xpath("./colgroup/col").each_with_object([]) do |c, m|
-        m << c["width"].sub(/%$/, "").to_f
+      # assume percentages
+      def colgroup_widths(table)
+        table.xpath("./colgroup/col").each_with_object([]) do |c, m|
+          m << c["width"].sub(/%$/, "").to_f
+        end
       end
-    end
-    def word_nested_tables(docxml)
-      docxml.xpath("//table").each do |t|
-        t.xpath(".//table").reverse.each do |tt|
-          t.next = tt.remove
+      def word_nested_tables(docxml)
+        docxml.xpath("//table").each do |t|
+          t.xpath(".//table").reverse.each do |tt|
+            t.next = tt.remove
+          end
         end
       end
-    end
-    def style_update(node, css)
-      return unless node
+      def style_update(node, css)
+        return unless node
-      node["style"] = node["style"] ? node["style"].sub(/;?$/, ";#{css}") : css
-    end
+        node["style"] =
+          node["style"] ? node["style"].sub(/;?$/, ";#{css}") : css
+      end
-    def word_image_caption(docxml)
-      docxml.xpath("//p[@class = 'FigureTitle' or @class = 'SourceTitle']")
-        .each do |t|
-        if t&.previous_element&.name == "img"
-          img = t.previous_element
-          t.previous_element.swap("<p class=\'figure\'>#{img.to_xml}</p>")
+      def word_image_caption(docxml)
+        docxml.xpath("//p[@class = 'FigureTitle' or @class = 'SourceTitle']")
+          .each do |t|
+          if t&.previous_element&.name == "img"
+            img = t.previous_element
+            t.previous_element.swap("<p class=\'figure\'>#{img.to_xml}</p>")
+          end
+          style_update(t&.previous_element, "page-break-after:avoid;")
         end
-        style_update(t&.previous_element, "page-break-after:avoid;")
       end
-    end
-    def word_list_continuations(docxml)
-      list_add(docxml.xpath("//ul[not(ancestor::ul) and not(ancestor::ol)]"), 1)
-      list_add(docxml.xpath("//ol[not(ancestor::ul) and not(ancestor::ol)]"), 1)
-    end
+      def word_list_continuations(docxml)
+        list_add(docxml.xpath("//ul[not(ancestor::ul) and not(ancestor::ol)]"),
+                 1)
+        list_add(docxml.xpath("//ol[not(ancestor::ul) and not(ancestor::ol)]"),
+                 1)
+      end
-    def list_add(xpath, lvl)
-      xpath.each do |list|
-        (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |l|
-          l.xpath("./p | ./div | ./table").each_with_index do |p, i|
-            next if i.zero?
+      def list_add(xpath, lvl)
+        xpath.each do |list|
+          (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |l|
+            l.xpath("./p | ./div | ./table").each_with_index do |p, i|
+              next if i.zero?
-            p.wrap(%{<div class="ListContLevel#{lvl}"/>})
+              p.wrap(%{<div class="ListContLevel#{lvl}"/>})
+            end
+            list_add(l.xpath(".//ul") - l.xpath(".//ul//ul | .//ol//ul"),
+                     lvl + 1)
+            list_add(l.xpath(".//ol") - l.xpath(".//ul//ol | .//ol//ol"),
+                     lvl + 1)
           end
-          list_add(l.xpath(".//ul") - l.xpath(".//ul//ul | .//ol//ul"), lvl + 1)
-          list_add(l.xpath(".//ol") - l.xpath(".//ul//ol | .//ol//ol"), lvl + 1)
         end
       end
-    end
-    def word_table_align(docxml)
-      docxml.xpath("//td[@align]/p | //th[@align]/p").each do |p|
-        next if p["align"]
+      def word_table_align(docxml)
+        docxml.xpath("//td[@align]/p | //th[@align]/p").each do |p|
+          next if p["align"]
-        style_update(p, "text-align: #{p.parent['align']}")
+          style_update(p, "text-align: #{p.parent['align']}")
+        end
       end
-    end
-    def word_table_separator(docxml)
-      docxml.xpath("//p[@class = 'TableTitle']").each do |t|
-        next unless t.children.empty?
+      def word_table_separator(docxml)
+        docxml.xpath("//p[@class = 'TableTitle']").each do |t|
+          next unless t.children.empty?
-        t["style"] = t["style"].sub(/;?$/, ";font-size:0pt;")
-        t.children = "&nbsp;"
+          t["style"] = t["style"].sub(/;?$/, ";font-size:0pt;")
+          t.children = "&nbsp;"
+        end
       end
-    end
-    def word_annex_cleanup(docxml); end
+      def word_annex_cleanup(docxml); end
-    def word_example_cleanup(docxml)
-      docxml.xpath("//div[@class = 'example']//p[not(@class)]").each do |p|
-        p["class"] = "example"
+      def word_example_cleanup(docxml)
+        docxml.xpath("//div[@class = 'example']//p[not(@class)]").each do |p|
+          p["class"] = "example"
+        end
       end
-    end
-    def word_pseudocode_cleanup(docxml)
-      docxml.xpath("//div[@class = 'pseudocode']//p[not(@class)]").each do |p|
-        p["class"] = "pseudocode"
+      def word_pseudocode_cleanup(docxml)
+        docxml.xpath("//div[@class = 'pseudocode']//p[not(@class)]").each do |p|
+          p["class"] = "pseudocode"
+        end
       end
-    end
-    # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
-    def word_remove_pb_before_annex(docxml)
-      docxml.xpath("//div[p/br]").each do |d|
-        /^WordSection\d+_\d+$/.match(d["class"]) or next
-        d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
-        d.elements[0].elements[0].name == "br" &&
-          d.elements[0].elements[0]["style"] ==
-            "mso-special-character:line-break;page-break-before:always" or next
-        d.elements[0].remove
+      # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
+      def word_remove_pb_before_annex(docxml)
+        docxml.xpath("//div[p/br]").each do |d|
+          /^WordSection\d+_\d+$/.match(d["class"]) or next
+          d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
+          d.elements[0].elements[0].name == "br" &&
+            d.elements[0].elements[0]["style"] ==
+              "mso-special-character:line-break;page-break-before:always" or next
+          d.elements[0].remove
+        end
       end
-    end
-    def word_footnote_format(docxml)
-      # the content is in a[@epub:type = 'footnote']//sup, but in Word,
-      # we need to inject content around the autonumbered footnote reference
-      docxml.xpath("//a[@epub:type = 'footnote']").each do |x|
-        footnote_reference_format(x)
-      end
-      docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
-                   "//span[@class = 'TableFootnoteRef']").each do |x|
-        table_footnote_reference_format(x)
+      def word_footnote_format(docxml)
+        # the content is in a[@epub:type = 'footnote']//sup, but in Word,
+        # we need to inject content around the autonumbered footnote reference
+        docxml.xpath("//a[@epub:type = 'footnote']").each do |x|
+          footnote_reference_format(x)
+        end
+        docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
+                     "//span[@class = 'TableFootnoteRef']").each do |x|
+          table_footnote_reference_format(x)
+        end
+        docxml
       end
-      docxml
     end
   end
 end