RubyGems - isodoc - Versions diffs - 0.0.1 - Mend

isodoc 0.0.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (26) hide show

checksums.yaml +7 -0
data/.hound.yml +3 -0
data/.oss-guides.rubocop.yml +1077 -0
data/.rubocop.ribose.yml +65 -0
data/.rubocop.tb.yml +640 -0
data/.rubocop.yml +15 -0
data/Gemfile +6 -0
data/README.adoc +32 -0
data/isodoc.gemspec +51 -0
data/lib/isodoc.rb +74 -0
data/lib/isodoc/blocks.rb +184 -0
data/lib/isodoc/cleanup.rb +155 -0
data/lib/isodoc/html.rb +44 -0
data/lib/isodoc/inline.rb +211 -0
data/lib/isodoc/iso2wordhtml.rb +143 -0
data/lib/isodoc/lists.rb +54 -0
data/lib/isodoc/metadata.rb +99 -0
data/lib/isodoc/postprocessing.rb +156 -0
data/lib/isodoc/references.rb +129 -0
data/lib/isodoc/section.rb +136 -0
data/lib/isodoc/table.rb +99 -0
data/lib/isodoc/terms.rb +74 -0
data/lib/isodoc/utils.rb +88 -0
data/lib/isodoc/version.rb +3 -0
data/lib/isodoc/xref_gen.rb +204 -0
metadata +338 -0

data/.rubocop.yml ADDED

@@ -0,0 +1,15 @@
+# This project follows the Ribose OSS style guide.
+# https://github.com/riboseinc/oss-guides
+# All project-specific additions and overrides should be specified in this file.
+inherit_from:
+  # Thoughtbot's style guide from: https://github.com/thoughtbot/guides
+  - ".rubocop.tb.yml"
+  # Overrides from Ribose
+  - ".rubocop.ribose.yml"
+AllCops:
+  DisplayCopNames: false
+  StyleGuideCopsOnly: false
+  TargetRubyVersion: 2.4
+Rails:
+  Enabled: true

data/Gemfile ADDED

@@ -0,0 +1,6 @@
+source "https://rubygems.org"
+# Specify your gem's dependencies in ribose.gemspec
+gem "html2doc",
+      git: "https://github.com/riboseinc/html2doc.git"
+gemspec

data/README.adoc ADDED

@@ -0,0 +1,32 @@
+= isodoc
+This Gem converts documents in the https://github.com/riboseinc/isodoc-models[ISODoc document model] into HTML and Microsoft Word.
+The Gem is a class called with a hash of file locations:
+htmlstylesheet:: Generic stylesheet for HTML
+wordstylesheet:: Generic stylesheet for Word
+standardsheet:: Stylesheet specific to Standard
+header:: Header file for Word
+htmlcoverpage:: Cover page for HTML
+wordcoverpage:: Cover page for Word
+tmlintropage:: Introductory page for HTML
+wordintropage:: Introductory page for Word
+e.g.
+[source,ruby]
+--
+        IsoDoc::Convert.new(
+          htmlstylesheet: html_doc_path("htmlstyle.css"),
+          wordstylesheet: nil,
+          standardstylesheet: html_doc_path("isodoc.css"),
+          header: html_doc_path("header.html"),
+          htmlcoverpage: html_doc_path("iso_titlepage.html"),
+          wordcoverpage: html_doc_path("iso_titlepage.html"),
+          htmlintropage: html_doc_path("iso_intro.html"),
+          wordintropage: html_doc_path("iso_intro.html"),
+        )
+--
+NOTE: Cover page and Intro page must be XHTML fragments, not HTML fragments. In particular, unlike Word HTML, all HTML attributes need to be quoted: `<p class="MsoToc2">`, not `<p class=MsoToc2>`.

data/isodoc.gemspec ADDED

@@ -0,0 +1,51 @@
+# coding: utf-8
+lib = File.expand_path("../lib", __FILE__)
+$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
+require "isodoc/version"
+Gem::Specification.new do |spec|
+  spec.name          = "isodoc"
+  spec.version       = IsoDoc::VERSION
+  spec.authors       = ["Ribose Inc."]
+  spec.email         = ["open.source@ribose.com"]
+  spec.summary       = "Convert documents in IsoDoc into Word and HTML "\
+    "in AsciiDoc."
+  spec.description   = <<~DESCRIPTION
+    isodoc converts documents in the IsoDoc document model into
+    Microsoft Word and HTML.
+    This gem is in active development.
+  DESCRIPTION
+  spec.homepage      = "https://github.com/riboseinc/isodoc"
+  spec.license       = "MIT"
+  spec.bindir        = "bin"
+  spec.require_paths = ["lib"]
+  spec.files         = `git ls-files`.split("\n")
+  spec.test_files    = `git ls-files -- {spec}/*`.split("\n")
+  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
+  spec.add_dependency "asciimath"
+  spec.add_dependency "htmlentities", "~> 4.3.4"
+  spec.add_dependency "image_size"
+  spec.add_dependency "mime-types"
+  spec.add_dependency "nokogiri", "= 1.8.1"
+  spec.add_dependency "ruby-xslt"
+  spec.add_dependency "thread_safe"
+  spec.add_dependency "uuidtools"
+  spec.add_dependency "html2doc"
+  spec.add_development_dependency "bundler", "~> 1.15"
+  spec.add_development_dependency "byebug", "~> 9.1"
+  spec.add_development_dependency "equivalent-xml", "~> 0.6"
+  spec.add_development_dependency "guard", "~> 2.14"
+  spec.add_development_dependency "guard-rspec", "~> 4.7"
+  spec.add_development_dependency "rake", "~> 12.0"
+  spec.add_development_dependency "rspec", "~> 3.6"
+  spec.add_development_dependency "rubocop", "~> 0.50"
+  spec.add_development_dependency "simplecov", "~> 0.15"
+  spec.add_development_dependency "timecop", "~> 0.9"
+end

data/lib/isodoc.rb ADDED

@@ -0,0 +1,74 @@
+require_relative "isodoc/version"
+require "nokogiri"
+require "asciimath"
+require "xml/xslt"
+require "uuidtools"
+require "base64"
+require "mime/types"
+require "image_size"
+require "set"
+require_relative "isodoc/iso2wordhtml"
+require_relative "isodoc/cleanup"
+require_relative "isodoc/postprocessing"
+require_relative "isodoc/utils"
+require_relative "isodoc/metadata"
+require_relative "isodoc/section"
+require_relative "isodoc/references"
+require_relative "isodoc/terms"
+require_relative "isodoc/blocks"
+require_relative "isodoc/lists"
+require_relative "isodoc/table"
+require_relative "isodoc/inline"
+require_relative "isodoc/xref_gen"
+require_relative "isodoc/html"
+require "pp"
+module IsoDoc
+  class Convert
+    # htmlstylesheet: Generic stylesheet for HTML
+    # wordstylesheet: Generic stylesheet for Word
+    # standardsheet: Stylesheet specific to Standard
+    # header: Header file for Word
+    # htmlcoverpage: Cover page for HTML
+    # wordcoverpage: Cover page for Word
+    # htmlintropage: Introductory page for HTML
+    # wordintropage: Introductory page for Word
+    def initialize(options)
+      @htmlstylesheet = options[:htmlstylesheet]
+      @wordstylesheet = options[:wordstylesheet]
+      @standardstylesheet = options[:standardstylesheet]
+      @header = options[:header]
+      @htmlcoverpage = options[:htmlcoverpage]
+      @wordcoverpage = options[:wordcoverpage]
+      @htmlintropage = options[:htmlintropage]
+      @wordintropage = options[:wordintropage]
+      @termdomain = ""
+      @termexample = false
+      @note = false
+      @sourcecode = false
+      @anchors = {}
+      @meta = {}
+      @footnotes = []
+      @comments = []
+      @in_footnote = false
+      @in_table = false
+      @in_figure = false
+      @seen_footnote = Set.new
+    end
+    def convert(filename)
+      docxml = Nokogiri::XML(File.read(filename))
+      filename, dir = init_file(filename)
+      docxml.root.default_namespace = ""
+      result = noko do |xml|
+        xml.html do |html|
+          html_header(html, docxml, filename, dir)
+          make_body(html, docxml)
+        end
+      end.join("\n")
+      postprocess(result, filename, dir)
+    end
+  end
+end

data/lib/isodoc/blocks.rb ADDED

@@ -0,0 +1,184 @@
+module IsoDoc
+  class Convert
+    #attr_accessor :termdomain, :termexample, :sourcecode, :note
+    def set_termdomain(termdomain)
+      @termdomain = termdomain
+    end
+    def get_termexample
+      @termexample
+    end
+    def set_termexample(value)
+      @termexample = value
+    end
+    def in_sourcecode
+      @sourcecode
+    end
+    def is_note
+      @note
+    end
+    def note_label(node)
+      n = get_anchors()[node["id"]]
+      return "NOTE" if n.nil?
+      n[:label]
+    end
+    def note_p_parse(node, div)
+      div.p **{ class: "Note" } do |p|
+        p << note_label(node)
+        insert_tab(p, 1)
+        node.first_element_child.children.each { |n| parse(n, p) }
+      end
+      node.element_children[1..-1].each { |n| parse(n, div) }
+    end
+    def note_parse(node, out)
+      @note = true
+      out.div **{ id: node["id"], class: "Note" } do |div|
+        if node.first_element_child.name == "p"
+          note_p_parse(node, div)
+        else
+          div.p **{ class: "Note" } do |p|
+            p << note_label(node)
+            insert_tab(p, 1)
+          end
+          node.children.each { |n| parse(n, div) }
+        end
+      end
+      @note = false
+    end
+    def figure_name_parse(node, div, name)
+      div.p **{ class: "FigureTitle", align: "center" } do |p|
+        p.b do |b|
+          b << "#{get_anchors()[node['id']][:label]}&nbsp;&mdash; "
+          b << name.text
+        end
+      end
+    end
+    def figure_key(out)
+      out.p do |p|
+        p.b { |b| b << "Key" }
+      end
+    end
+    def figure_parse(node, out)
+      @in_figure = true
+      name = node.at(ns("./name"))
+      out.div **attr_code(id: node["id"], class: "figure") do |div|
+        node.children.each do |n|
+          figure_key(out) if n.name == "dl"
+          parse(n, div) unless n.name == "name"
+        end
+        figure_name_parse(node, div, name) if name
+      end
+      @in_figure = false
+    end
+    def sourcecode_name_parse(node, div, name)
+      div.p **{ class: "FigureTitle", align: "center" } do |p|
+        p.b do |b|
+          b << name.text
+        end
+      end
+    end
+    def sourcecode_parse(node, out)
+      name = node.at(ns("./name"))
+      out.p **attr_code(id: node["id"], class: "Sourcecode") do |div|
+        @sourcecode = true
+        node.children.each do |n|
+          parse(n, div) unless n.name == "name"
+        end
+        @sourcecode = false
+        sourcecode_name_parse(node, div, name) if name
+      end
+    end
+    def annotation_parse(node, out)
+      out.p **{ class: "Sourcecode" } do |li|
+        node.children.each { |n| parse(n, li) }
+      end
+    end
+    def admonition_parse(node, out)
+      name = node["type"]
+      out.div **{ class: "Admonition" } do |t|
+        t.p.b { |b| b << name.upcase } if name
+        node.children.each do |n|
+          parse(n, t)
+        end
+      end
+    end
+    def formula_parse(node, out)
+      dl = node.at(ns("./dl"))
+      out.div **attr_code(id: node["id"], class: "formula") do |div|
+        parse(node.at(ns("./stem")), out)
+        insert_tab(div, 1)
+        div << "(#{get_anchors()[node['id']][:label]})"
+      end
+      if dl
+        out.p { |p| p << "where" }
+        parse(dl, out)
+      end
+    end
+    def para_attrs(node)
+      classtype = nil
+      classtype = "Note" if @note
+      classtype = "MsoFootnoteText" if in_footnote
+      attrs = { class: classtype }
+      unless node["align"].nil?
+        attrs[:align] = node["align"] unless node["align"] == "justify"
+        attrs[:style] = "text-align:#{node["align"]}"
+      end
+      attrs
+    end
+    def para_parse(node, out)
+      out.p **attr_code(para_attrs(node)) do |p|
+        unless @termdomain.empty?
+          p << "&lt;#{@termdomain}&gt; "
+          @termdomain = ""
+        end
+        node.children.each { |n| parse(n, p) }
+      end
+    end
+    def quote_attribution(node, out)
+      author = node.at(ns("./author/fullname/"))
+      source = node.at(ns("./source"))
+      # TODO implement
+    end
+    def quote_parse(node, out)
+      attrs = para_attrs(node)
+      attrs[:class] = "Quote"
+      out.p **attr_code(attrs) do |p|
+        node.children.each do
+          |n| parse(n, p) unless ["author", "source"].include? n.name
+        end
+        quote_attribution(node, out)
+      end
+    end
+    def image_title_parse(out, caption)
+      unless caption.nil?
+        out.p **{ class: "FigureTitle", align: "center" } do |p|
+          p.b { |b| b << caption.to_s }
+        end
+      end
+    end
+    def image_parse(url, out, caption)
+      out.img **attr_code(src: url)
+      image_title_parse(out, caption)
+    end
+  end
+end

data/lib/isodoc/cleanup.rb ADDED

@@ -0,0 +1,155 @@
+require "html2doc"
+require "htmlentities"
+require "nokogiri"
+require "pp"
+module IsoDoc
+  class Convert
+    def cleanup(docxml)
+      comment_cleanup(docxml)
+      footnote_cleanup(docxml)
+      inline_header_cleanup(docxml)
+      figure_cleanup(docxml)
+      table_cleanup(docxml)
+      docxml
+    end
+    def figure_get_or_make_dl(t)
+      dl = t.at(".//dl")
+      if dl.nil?
+        t.add_child("<p><b>Key</b></p><dl></dl>")
+        dl = t.at(".//dl")
+      end
+      dl
+    end
+    FIGURE_WITH_FOOTNOTES =
+      "//div[@class = 'figure'][descendant::aside]"\
+      "[not(descendant::div[@class = 'figure'])]".freeze
+    def figure_aside_process(f, aside, key)
+      # get rid of footnote link, it is in diagram
+      f.at("./a[@class='zzFootnote']").remove
+      fnref = f.at(".//a[@class='zzFootnote']")
+      dt = key.add_child("<dt></dt>").first
+      dd = key.add_child("<dd></dd>").first
+      fnref.parent = dt
+      aside.xpath(".//p").each do |a|
+        a.delete("class")
+        a.parent = dd
+      end
+    end
+    def figure_cleanup(docxml)
+      # move footnotes into key, and get rid of footnote reference
+      # since it is in diagram
+      docxml.xpath(FIGURE_WITH_FOOTNOTES).each do |f|
+        key = figure_get_or_make_dl(f)
+        f.xpath(".//aside").each do |aside|
+          figure_aside_process(f, aside, key)
+        end
+      end
+    end
+    def inline_header_cleanup(docxml)
+      docxml.xpath('//span[@class="zzMoveToFollowing"]').each do |x|
+        n = x.next_element
+        if n.nil?
+          html = Nokogiri::XML.fragment("<p></p>")
+          html.parent = x.parent
+          x.parent = html
+        else
+          n.children.first.add_previous_sibling(x.remove)
+        end
+      end
+    end
+    def comment_cleanup(docxml)
+      docxml.xpath('//div/span[@style="MsoCommentReference"]').
+        each do |x|
+        prev = x.previous_element
+        if !prev.nil?
+          x.parent = prev
+        end
+      end
+      docxml
+    end
+    def footnote_cleanup(docxml)
+      docxml.xpath('//div[@style="mso-element:footnote"]/a').
+        each do |x|
+        n = x.next_element
+        if !n.nil?
+          n.children.first.add_previous_sibling(x.remove)
+        end
+      end
+      docxml
+    end
+    def merge_fnref_into_fn_text(a)
+      fn = a.at('.//a[@class="zzFootnote"]')
+      n = fn.next_element
+      n.children.first.add_previous_sibling(fn.remove) unless n.nil?
+    end
+    TABLE_WITH_FOOTNOTES = "//table[descendant::aside]".freeze
+    def table_footnote_cleanup(docxml)
+      docxml.xpath(TABLE_WITH_FOOTNOTES).each do |t|
+        t.xpath(".//aside").each do |a|
+          merge_fnref_into_fn_text(a)
+          a.name = "div"
+          a["class"] = "Note"
+          t << a.remove
+        end
+      end
+    end
+    def remove_bottom_border(td)
+      td["style"] =
+        td["style"].gsub(/border-bottom:[^;]+;/, "border-bottom:0pt;").
+        gsub(/mso-border-bottom-alt:[^;]+;/, "mso-border-bottom-alt:0pt;")
+    end
+    def table_get_or_make_tfoot(t)
+      tfoot = t.at(".//tfoot")
+      if tfoot.nil?
+        t.add_child("<tfoot></tfoot>")
+        tfoot = t.at(".//tfoot")
+      else
+        # nuke its bottom border
+        tfoot.xpath(".//td | .//th").each do |td|
+          remove_bottom_border(td)
+        end
+      end
+      tfoot
+    end
+    def new_fullcolspan_row(t, tfoot)
+      # how many columns in the table?
+      cols = 0
+      t.at(".//tr").xpath("./td | ./th").each do |td|
+        cols += ( td["colspan"] ? td["colspan"].to_i : 1 )
+      end
+      style = %{border-top:0pt;mso-border-top-alt:0pt;
+      border-bottom:#{SW} 1.5pt;mso-border-bottom-alt:#{SW} 1.5pt;}
+      tfoot.add_child("<tr><td colspan='#{cols}' style='#{style}'/></tr>")
+      tfoot.xpath(".//td").last
+    end
+    def table_note_cleanup(docxml)
+      docxml.xpath("//table[div[@class = 'Note']]").each do |t|
+        tfoot = table_get_or_make_tfoot(t)
+        insert_here = new_fullcolspan_row(t, tfoot)
+        t.xpath("div[@class = 'Note']").each do |d|
+          d.parent = insert_here
+        end
+      end
+    end
+    def table_cleanup(docxml)
+      table_footnote_cleanup(docxml)
+      table_note_cleanup(docxml)
+    end
+  end
+end