RubyGems - isodoc - Versions diffs - 0.0.1 - Mend

isodoc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +7 -0
data/.hound.yml +3 -0
data/.oss-guides.rubocop.yml +1077 -0
data/.rubocop.ribose.yml +65 -0
data/.rubocop.tb.yml +640 -0
data/.rubocop.yml +15 -0
data/Gemfile +6 -0
data/README.adoc +32 -0
data/isodoc.gemspec +51 -0
data/lib/isodoc.rb +74 -0
data/lib/isodoc/blocks.rb +184 -0
data/lib/isodoc/cleanup.rb +155 -0
data/lib/isodoc/html.rb +44 -0
data/lib/isodoc/inline.rb +211 -0
data/lib/isodoc/iso2wordhtml.rb +143 -0
data/lib/isodoc/lists.rb +54 -0
data/lib/isodoc/metadata.rb +99 -0
data/lib/isodoc/postprocessing.rb +156 -0
data/lib/isodoc/references.rb +129 -0
data/lib/isodoc/section.rb +136 -0
data/lib/isodoc/table.rb +99 -0
data/lib/isodoc/terms.rb +74 -0
data/lib/isodoc/utils.rb +88 -0
data/lib/isodoc/version.rb +3 -0
data/lib/isodoc/xref_gen.rb +204 -0
metadata +338 -0

data/lib/isodoc/metadata.rb ADDED

@@ -0,0 +1,99 @@
+require "htmlentities"
+module IsoDoc
+  class Convert
+    def get_metadata
+      @meta
+    end
+    def set_metadata(key, value)
+      @meta[key] = value
+    end
+    def author(isoxml, _out)
+      # tc = isoxml.at(ns("//technical-committee"))
+      tc_num = isoxml.at(ns("//technical-committee/@number"))
+      # sc = isoxml.at(ns("//subcommittee"))
+      sc_num = isoxml.at(ns("//subcommittee/@number"))
+      # wg = isoxml.at(ns("//workgroup"))
+      wg_num = isoxml.at(ns("//workgroup/@number"))
+      secretariat = isoxml.at(ns("//secretariat"))
+      set_metadata(:tc, "XXXX")
+      set_metadata(:sc, "XXXX")
+      set_metadata(:wg, "XXXX")
+      set_metadata(:secretariat, "XXXX")
+      set_metadata(:tc,  tc_num.text) if tc_num
+      set_metadata(:sc, sc_num.text) if sc_num
+      set_metadata(:wg, wg_num.text) if wg_num
+      set_metadata(:secretariat, secretariat.text) if secretariat
+    end
+    def id(isoxml, _out)
+      docnumber = isoxml.at(ns("//project-number"))
+      partnumber = isoxml.at(ns("//project-number/@part"))
+      documentstatus = isoxml.at(ns("//status/stage"))
+      dn = docnumber.text
+      dn += "-#{partnumber.text}" if partnumber
+      if documentstatus
+        set_metadata(:stage, documentstatus.text)
+        abbr = stage_abbreviation(documentstatus.text)
+        set_metadata(:stageabbr, abbr)
+        documentstatus.text.to_i < 60 and
+          dn = abbr + " " + dn
+      end
+      set_metadata(:docnumber, dn)
+    end
+    def draftinfo(draft, revdate)
+      draftinfo = ""
+      if draft
+        draftinfo = " (draft #{draft.text}"
+        draftinfo += ", #{revdate.text}" if revdate
+        draftinfo += ")"
+      end
+      draftinfo
+    end
+    def version(isoxml, _out)
+      yr = isoxml.at(ns("//copyright/from"))
+      set_metadata(:docyear, yr.text)
+      draft = isoxml.at(ns("//version/draft"))
+      set_metadata(:draft, draft.nil? ? nil : draft.text)
+      revdate = isoxml.at(ns("//version/revision-date"))
+      set_metadata(:revdate, revdate.nil? ? nil : revdate.text)
+      draftinfo = draftinfo(draft, revdate)
+      set_metadata(:draftinfo, draftinfo(draft, revdate))
+    end
+    def compose_title(main, intro, part, partnumber)
+      c = HTMLEntities.new
+      main = c.encode(main.text, :hexadecimal)
+      intro &&
+        main = "#{c.encode(intro.text, :hexadecimal)}&nbsp;&mdash; #{main}"
+      part &&
+        main = "#{main}&nbsp;&mdash; Part&nbsp;#{partnumber}: "\
+        "#{c.encode(part.text, :hexadecimal)}"
+      main
+    end
+    def title(isoxml, _out)
+      intro = isoxml.at(ns("//title[@language='en']/title-intro"))
+      main = isoxml.at(ns("//title[@language='en']/title-main"))
+      part = isoxml.at(ns("//title[@language='en']/title-part"))
+      partnumber = isoxml.at(ns("//id/project-number/@part"))
+      main = compose_title(main, intro, part, partnumber)
+      set_metadata(:doctitle, main)
+    end
+    def subtitle(isoxml, _out)
+      intro = isoxml.at(ns("//title[@language='fr']/title-intro"))
+      main = isoxml.at(ns("//title[@language='fr']/title-main"))
+      part = isoxml.at(ns("//title[@language='fr']/title-part"))
+      partnumber = isoxml.at(ns("//id/project-number/@part"))
+      main = compose_title(main, intro, part, partnumber)
+      set_metadata(:docsubtitle, main)
+    end
+  end
+end

data/lib/isodoc/postprocessing.rb ADDED

@@ -0,0 +1,156 @@
+require "html2doc"
+require "htmlentities"
+require "nokogiri"
+require "pp"
+module IsoDoc
+  class Convert
+    def postprocess(result, filename, dir)
+      generate_header(filename, dir)
+      result = from_xhtml(cleanup(to_xhtml(result)))
+      toWord(result, filename, dir)
+      toHTML(result, filename)
+    end
+    def toWord(result, filename, dir)
+      result = from_xhtml(wordCleanup(to_xhtml(result)))
+      result = populate_template(result)
+      Html2Doc.process(result, filename, @wordstylesheet, "header.html",
+                       dir, ['`', '`'])
+    end
+    def wordCleanup(docxml)
+      wordPreface(docxml)
+      wordAnnexCleanup(docxml)
+      docxml
+    end
+    # force Annex h2 to be p.h2Annex, so it is not picked up by ToC
+    def wordAnnexCleanup(docxml)
+      d = docxml.xpath("//h2[ancestor::*[@class = 'Section3']]").each do |h2|
+        h2.name = "p"
+        h2["class"] = "h2Annex"
+      end
+    end
+    def wordPreface(docxml)
+      cover = to_xhtml_fragment(File.read(@wordcoverpage, encoding: "UTF-8"))
+      d = docxml.at('//div[@class="WordSection1"]')
+      d.children.first.add_previous_sibling cover.to_xml(encoding: 'US-ASCII')
+      intro = to_xhtml_fragment(
+        File.read(@wordintropage, encoding: "UTF-8").
+        sub(/WORDTOC/, makeWordToC(docxml)))
+      d = docxml.at('//div[@class="WordSection2"]')
+      d.children.first.add_previous_sibling intro.to_xml(encoding: 'US-ASCII')
+    end
+    def populate_template(docxml)
+      meta = get_metadata
+      docxml.
+        gsub(/DOCYEAR/, meta[:docyear]).
+        gsub(/DOCNUMBER/, meta[:docnumber]).
+        gsub(/TCNUM/, meta[:tc]).
+        gsub(/SCNUM/, meta[:sc]).
+        gsub(/WGNUM/, meta[:wg]).
+        gsub(/DOCTITLE/, meta[:doctitle]).
+        gsub(/DOCSUBTITLE/, meta[:docsubtitle]).
+        gsub(/SECRETARIAT/, meta[:secretariat]).
+        gsub(/[ ]?DRAFTINFO/, meta[:draftinfo]).
+        gsub(/\[TERMREF\]\s*/, "[SOURCE: ").
+        gsub(/\s*\[\/TERMREF\]\s*/, "]").
+        gsub(/\s*\[ISOSECTION\]/, ", ").
+        gsub(/\s*\[MODIFICATION\]/, ", modified &mdash; ").
+        gsub(%r{WD/CD/DIS/FDIS}, meta[:stageabbr])
+    end
+    def generate_header(filename, dir)
+      header = File.read(@header, encoding: "UTF-8").
+        gsub(/FILENAME/, filename).
+        gsub(/DOCYEAR/, get_metadata()[:docyear]).
+        gsub(/[ ]?DRAFTINFO/, get_metadata()[:draftinfo]).
+        gsub(/DOCNUMBER/, get_metadata()[:docnumber])
+      File.open("header.html", "w") do |f|
+        f.write(header)
+      end
+    end
+    # these are in fact preprocess,
+    # but they are extraneous to main HTML file
+    def html_header(html, docxml, filename, dir)
+      anchor_names docxml
+      define_head html, filename, dir
+    end
+    # isodoc.css overrides any CSS injected by Html2Doc, which
+    # is inserted before this CSS.
+    def define_head(html, filename, dir)
+      html.head do |head|
+        head.title { |t| t << filename }
+        head.style do |style|
+          stylesheet = File.read(@standardstylesheet).
+            gsub("FILENAME", filename)
+          style.comment "\n#{stylesheet}\n"
+        end
+      end
+    end
+    def titlepage(_docxml, div)
+      titlepage = File.read(@wordcoverpage, encoding: "UTF-8")
+      div.parent.add_child titlepage
+    end
+    def wordTocEntry(toclevel, heading)
+      bookmark = Random.rand(1000000000)
+      <<~TOC
+      <p class="MsoToc#{toclevel}"><span class="MsoHyperlink"><span
+      lang="EN-GB" style='mso-no-proof:yes'>
+      <a href="#_Toc#{bookmark}">#{heading}<span lang="EN-GB"
+      class="MsoTocTextSpan">
+        <span style='mso-tab-count:1 dotted'>. </span>
+        </span><span lang="EN-GB" class="MsoTocTextSpan">
+        <span style='mso-element:field-begin'></span></span>
+        <span lang="EN-GB"
+        class="MsoTocTextSpan"> PAGEREF _Toc#{bookmark} \\h </span>
+          <span lang="EN-GB" class="MsoTocTextSpan"><span
+          style='mso-element:field-separator'></span></span><span
+          lang="EN-GB" class="MsoTocTextSpan">1</span>
+          <span lang="EN-GB"
+          class="MsoTocTextSpan"></span><span
+          lang="EN-GB" class="MsoTocTextSpan"><span
+          style='mso-element:field-end'></span></span></a></span></span></p>
+      TOC
+    end
+    WORD_TOC_PREFACE = <<~TOC
+      <span lang="EN-GB"><span
+        style='mso-element:field-begin'></span><span
+        style='mso-spacerun:yes'>&#xA0;</span>TOC
+        \\o &quot;1-2&quot; \\h \\z \\u <span
+        style='mso-element:field-separator'></span></span>
+    TOC
+    WORD_TOC_SUFFIX = <<~TOC
+      <p class="MsoToc1"><span lang="EN-GB"><span
+        style='mso-element:field-end'></span></span><span
+        lang="EN-GB"><o:p>&nbsp;</o:p></span></p>
+    TOC
+    def header_strip(h)
+      h.to_s.gsub(%r{<br/>}, " ").
+        sub(/<h[12][^>]*>/, "").sub(%r{</h[12]>}, "")
+    end
+    def makeWordToC(docxml)
+      toc = ""
+      docxml.xpath("//h1 | //h2[not(ancestor::*[@class = 'Section3'])]").
+        each do |h|
+        toc += wordTocEntry(h.name == "h1" ? 1 : 2, header_strip(h))
+      end
+      toc.sub(/(<p class="MsoToc1">)/,
+              %{\\1#{WORD_TOC_PREFACE}}) + WORD_TOC_SUFFIX
+    end
+  end
+end

data/lib/isodoc/references.rb ADDED

@@ -0,0 +1,129 @@
+module IsoDoc
+  class Convert
+    def iso_bibitem_ref_code(b)
+      isocode = b.at(ns("./docidentifier"))
+      isodate = b.at(ns("./publishdate"))
+      reference = "ISO #{isocode.text}"
+      reference += ": #{isodate.text}" if isodate
+      reference
+    end
+    def date_note_process(b, ref)
+      date_note = b.xpath(ns("./note[text()][contains(.,'ISO DATE:')]"))
+      unless date_note.empty?
+        date_note.first.content =
+          date_note.first.content.gsub(/ISO DATE: /, "")
+        date_note.wrap("<p></p>")
+        footnote_parse(date_note.first, ref)
+      end
+    end
+    def iso_bibitem_entry(list, b, ordinal, biblio)
+      attrs = { id: b["id"], class: biblio ? "Biblio" : nil }
+      list.p **attr_code(attrs) do |ref|
+        if biblio
+          ref << "[#{ordinal}]"
+          insert_tab(ref, 1)
+        end
+        ref << iso_bibitem_ref_code(b)
+        date_note_process(b, ref)
+        ref << ", " if biblio
+        ref.i { |i| i << " #{b.at(ns('./name')).text}" }
+      end
+    end
+    def ref_entry_code(r, ordinal, t)
+      if /^\d+$/.match?(t)
+        r << "[#{t}]"
+        insert_tab(r, 1)
+      else
+        r << "[#{ordinal}]"
+        insert_tab(r, 1)
+        r << "#{t},"
+      end
+    end
+    def ref_entry(list, b, ordinal, bibliography)
+      ref = b.at(ns("./ref"))
+      para = b.at(ns("./p"))
+      list.p **attr_code("id": ref["id"], class: "Biblio") do |r|
+        ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
+        para.children.each { |n| parse(n, r) }
+      end
+    end
+    def noniso_bibitem(list, b, ordinal, bibliography)
+      ref = b.at(ns("./docidentifier"))
+      para = b.at(ns("./formatted"))
+      list.p **attr_code("id": b["id"], class: "Biblio") do |r|
+        ref_entry_code(r, ordinal, ref.text.gsub(/[\[\]]/, ""))
+        para.children.each { |n| parse(n, r) }
+      end
+    end
+    def split_bibitems(f)
+      iso_bibitem = []
+      non_iso_bibitem = []
+      f.xpath(ns("./bibitem")).each do |x|
+        if x.at(ns("./publisher/affiliation[name = 'ISO']")).nil?
+          non_iso_bibitem << x
+        else
+          iso_bibitem << x
+        end
+      end
+      { iso: iso_bibitem, noniso: non_iso_bibitem }
+    end
+    def biblio_list(f, div, bibliography)
+      bibitems = split_bibitems(f)
+      bibitems[:iso].each_with_index do |b, i|
+        iso_bibitem_entry(div, b, (i + 1), bibliography)
+      end
+      bibitems[:noniso].each_with_index do |b, i|
+        noniso_bibitem(div, b, (i + 1 + bibitems[:iso].size), bibliography)
+      end
+    end
+    NORM_WITH_REFS_PREF = <<~BOILERPLATE
+          The following documents are referred to in the text in such a way
+          that some or all of their content constitutes requirements of this
+          document. For dated references, only the edition cited applies.
+          For undated references, the latest edition of the referenced
+          document (including any amendments) applies.
+    BOILERPLATE
+    NORM_EMPTY_PREF =
+      "There are no normative references in this document."
+    def norm_ref_preface(f, div)
+      refs = f.elements.select do |e|
+        ["reference", "bibitem"].include? e.name
+      end
+      pref = refs.empty? ? NORM_EMPTY_PREF : NORM_WITH_REFS_PREF
+      div.p pref
+    end
+    def norm_ref(isoxml, out)
+      q = "//sections/references[title = 'Normative References']"
+      f = isoxml.at(ns(q)) or return
+      out.div do |div|
+        clause_name("2.", "Normative References", div, false)
+        norm_ref_preface(f, div)
+        biblio_list(f, div, false)
+      end
+    end
+    def bibliography(isoxml, out)
+      q = "//sections/references[title = 'Bibliography']"
+      f = isoxml.at(ns(q)) or return
+      page_break(out)
+      out.div do |div|
+        div.h1 "Bibliography", **{ class: "Section3" }
+        f.elements.reject do |e|
+          ["reference", "title", "bibitem"].include? e.name
+        end.each { |e| parse(e, div) }
+        biblio_list(f, div, true)
+      end
+    end
+  end
+end

data/lib/isodoc/section.rb ADDED

@@ -0,0 +1,136 @@
+module IsoDoc
+  class Convert
+    def clause_parse(node, out)
+      out.div **attr_code("id": node["id"]) do |s|
+        node.children.each do |c1|
+          if c1.name == "title"
+            if node["inline-header"]
+              out.span **{ class: "zzMoveToFollowing" } do |s|
+                s.b do |b|
+                  b << "#{get_anchors()[node['id']][:label]}. #{c1.text} "
+                end
+              end
+            else
+              s.send "h#{get_anchors()[node['id']][:level]}" do |h|
+                h << "#{get_anchors()[node['id']][:label]}. #{c1.text}"
+              end
+            end
+          else
+            parse(c1, s)
+          end
+        end
+      end
+    end
+    def clause_name(num, title, div, inline_header)
+      if inline_header
+        div.span **{ class: "zzMoveToFollowing" } do |s|
+          s.b do |b|
+            b << num
+            b << title + " "
+          end
+        end
+      else
+        div.h1 do |h1|
+          h1 << num
+          insert_tab(h1, 1)
+          h1 << title
+        end
+      end
+    end
+    def clause(isoxml, out)
+      isoxml.xpath(ns("//clause[parent::sections]")).each do |c|
+        next if c.at(ns("./title")).text == "Scope"
+        out.div **attr_code("id": c["id"]) do |s|
+          c.elements.each do |c1|
+            if c1.name == "title"
+              clause_name("#{get_anchors()[c['id']][:label]}.",
+                          c1.text, s, c["inline-header"])
+            else
+              parse(c1, s)
+            end
+          end
+        end
+      end
+    end
+    def annex_name(annex, name, div)
+      div.h1 **{ class: "Annex" } do |t|
+        t << "#{get_anchors()[annex['id']][:label]}<br/><br/>"
+        t << "<b>#{name.text}</b>"
+      end
+    end
+    def annex(isoxml, out)
+      isoxml.xpath(ns("//annex")).each do |c|
+        page_break(out)
+        out.div **attr_code("id": c["id"], class: "Section3" ) do |s|
+          #s1.div **{ class: "annex" } do |s|
+            c.elements.each do |c1|
+              if c1.name == "title" then annex_name(c, c1, s)
+              else
+                parse(c1, s)
+              end
+            end
+          # end
+        end
+      end
+    end
+    def scope(isoxml, out)
+      f = isoxml.at(ns("//clause[title = 'Scope']")) || return
+      out.div do |div|
+        clause_name("1.", "Scope", div, false)
+        f.elements.each do |e|
+          parse(e, div) unless e.name == "title"
+        end
+      end
+    end
+    def terms_defs(isoxml, out)
+      f = isoxml.at(ns("//terms")) || return
+      out.div do |div|
+        clause_name("3.", "Terms and Definitions", div, false)
+        f.elements.each do |e|
+          parse(e, div) unless e.name == "title"
+        end
+      end
+    end
+    def symbols_abbrevs(isoxml, out)
+      f = isoxml.at(ns("//symbols-abbrevs")) || return
+      out.div do |div|
+        clause_name("4.", "Symbols and Abbreviations", div, false)
+        f.elements.each do |e|
+          parse(e, div) unless e.name == "title"
+        end
+      end
+    end
+    def introduction(isoxml, out)
+      f = isoxml.at(ns("//content[title = 'Introduction']")) || return
+      title_attr = { class: "IntroTitle" }
+      page_break(out)
+      out.div **{ class: "Section3" } do |div|
+        div.h1 "Introduction", **attr_code(title_attr)
+        f.elements.each do |e|
+          if e.name == "patent-notice"
+            e.elements.each { |e1| parse(e1, div) }
+          else
+            parse(e, div) unless e.name == "title"
+          end
+        end
+      end
+    end
+    def foreword(isoxml, out)
+      f = isoxml.at(ns("//content[title = 'Foreword']")) || return
+      page_break(out)
+      out.div do |s|
+        s.h1 **{ class: "ForewordTitle" } { |h1| h1 << "Foreword" }
+        f.elements.each { |e| parse(e, s) unless e.name == "title" }
+      end
+    end
+  end
+end