RubyGems - isodoc - Versions diffs - 0.5.5 → 0.5.7 - Mend

isodoc 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (57) hide show

checksums.yaml +4 -4
data/CODE_OF_CONDUCT.md +46 -0
data/LICENSE +25 -0
data/README.adoc +1 -1
data/Rakefile +6 -0
data/isodoc.gemspec +1 -0
data/lib/isodoc.rb +4 -95
data/lib/isodoc/cleanup.rb +14 -10
data/lib/isodoc/{notes.rb → comments.rb} +0 -73
data/lib/isodoc/convert.rb +97 -0
data/lib/isodoc/footnotes.rb +74 -0
data/lib/isodoc/html.rb +41 -4
data/lib/isodoc/i18n-en.yaml +1 -0
data/lib/isodoc/i18n-fr.yaml +1 -0
data/lib/isodoc/i18n-zh-Hans.yaml +1 -0
data/lib/isodoc/i18n.rb +1 -0
data/lib/isodoc/inline.rb +4 -12
data/lib/isodoc/iso2wordhtml.rb +26 -13
data/lib/isodoc/metadata.rb +23 -10
data/lib/isodoc/references.rb +20 -22
data/lib/isodoc/section.rb +4 -3
data/lib/isodoc/table.rb +0 -2
data/lib/isodoc/terms.rb +2 -13
data/lib/isodoc/utils.rb +24 -3
data/lib/isodoc/version.rb +1 -1
data/lib/isodoc/wordconvert/comments.rb +155 -0
data/lib/isodoc/wordconvert/convert.rb +31 -0
data/lib/isodoc/wordconvert/footnotes.rb +80 -0
data/lib/isodoc/wordconvert/wordconvertmodule.rb +212 -0
data/lib/isodoc/xref_gen.rb +50 -79
data/lib/isodoc/xref_sect_gen.rb +82 -0
data/spec/assets/header.html +7 -0
data/spec/assets/html.css +2 -0
data/spec/assets/htmlcover.html +4 -0
data/spec/assets/htmlintro.html +5 -0
data/spec/assets/i18n.yaml +2 -0
data/spec/assets/iso.xml +8 -0
data/spec/assets/rice_image1.png +0 -0
data/spec/assets/std.css +2 -0
data/spec/assets/word.css +2 -0
data/spec/assets/wordcover.html +3 -0
data/spec/assets/wordintro.html +4 -0
data/spec/isodoc/blocks_spec.rb +130 -47
data/spec/isodoc/cleanup_spec.rb +693 -0
data/spec/isodoc/footnotes_spec.rb +282 -0
data/spec/isodoc/i18n_spec.rb +662 -0
data/spec/isodoc/inline_spec.rb +344 -0
data/spec/isodoc/lists_spec.rb +81 -18
data/spec/isodoc/metadata_spec.rb +141 -0
data/spec/isodoc/postproc_spec.rb +444 -0
data/spec/isodoc/ref_spec.rb +158 -0
data/spec/isodoc/section_spec.rb +275 -112
data/spec/isodoc/table_spec.rb +146 -8
data/spec/isodoc/terms_spec.rb +118 -0
data/spec/isodoc/xref_spec.rb +490 -114
metadata +46 -4
data/lib/isodoc/postprocessing.rb +0 -176

data/spec/isodoc/metadata_spec.rb ADDED

@@ -0,0 +1,141 @@
+require "spec_helper"
+require "nokogiri"
+RSpec.describe IsoDoc do
+  it "processes IsoXML metadata" do
+    expect(Hash[IsoDoc::Convert.new({}).info(Nokogiri::XML(<<~"INPUT"), nil).sort]).to be_equivalent_to <<~"OUTPUT"
+    <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <bibdata type="international-standard">
+  <title>
+    <title-intro language="en" format="text/plain">Cereals and pulses</title-intro>
+    <title-main language="en" format="text/plain">Specifications and test methods</title-main>
+    <title-part language="en" format="text/plain">Rice</title-part>
+  </title>
+  <title>
+    <title-intro language="fr" format="text/plain">Céréales et légumineuses</title-intro>
+    <title-main language="fr" format="text/plain">Spécification et méthodes d'essai</title-main>
+    <title-part language="fr" format="text/plain">Riz</title-part>
+  </title>
+  <docidentifier>
+    <project-number part="1">17301</project-number>
+    <tc-document-number>17301</tc-document-number>
+  </docidentifier>
+  <date type="published"><from>2011</from></date>
+  <date type="accessed"><from>2012</from></date>
+  <date type="created"><from>2010</from><to>2011</to></date>
+  <date type="activated"><from>2013</from></date>
+  <date type="obsoleted"><from>2014</from></date>
+  <contributor>
+    <role type="author"/>
+    <organization>
+      <abbreviation>ISO</abbreviation>
+    </organization>
+  </contributor>
+  <contributor>
+    <role type="publisher"/>
+    <organization>
+      <abbreviation>ISO</abbreviation>
+    </organization>
+  </contributor>
+  <language>en</language>
+  <script>Latn</script>
+  <status>
+    <stage>30</stage>
+    <substage>92</substage>
+  </status>
+  <copyright>
+    <from>2016</from>
+    <owner>
+      <organization>
+        <abbreviation>ISO</abbreviation>
+      </organization>
+    </owner>
+  </copyright>
+  <editorialgroup>
+    <technical-committee number="34">Food products</technical-committee>
+    <subcommittee number="4">Cereals and pulses</subcommittee>
+    <workgroup number="3">Rice Group</workgroup>
+    <secretariat>GB</secretariat>
+  </editorialgroup>
+</bibdata><version>
+  <edition>2</edition>
+  <revision-date>2016-05-01</revision-date>
+</version>
+</iso-standard>
+INPUT
+       {:accesseddate=>"2012", :activateddate=>"2013", :agency=>"ISO", :createddate=>"2010&ndash;2011", :docnumber=>"CD 17301-1", :docsubtitle=>"C&#xe9;r&#xe9;ales et l&#xe9;gumineuses&nbsp;&mdash; Sp&#xe9;cification et m&#xe9;thodes d&#x27;essai&nbsp;&mdash; Partie&nbsp;1: Riz", :doctitle=>"Cereals and pulses&nbsp;&mdash; Specifications and test methods&nbsp;&mdash; Part&nbsp;1: Rice", :docyear=>"2016", :draft=>nil, :draftinfo=>"", :editorialgroup=>["TC 34", "SC 4", "WG 3"], :obsoleteddate=>"2014", :obsoletes=>nil, :obsoletes_part=>nil, :publisheddate=>"2011", :revdate=>"2016-05-01", :sc=>"SC 4", :secretariat=>"GB", :stage=>"30", :stageabbr=>"CD", :tc=>"TC 34", :wg=>"WG 3"}
+OUTPUT
+  end
+  it "processes IsoXML metadata" do
+    expect(Hash[IsoDoc::Convert.new({}).info(Nokogiri::XML(<<~"INPUT"), nil).sort]).to be_equivalent_to <<~"OUTPUT"
+    <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <bibdata type="international-standard">
+  <title>
+    <title-intro language="en" format="text/plain">Cereals and pulses</title-intro>
+    <title-main language="en" format="text/plain">Specifications and test methods</title-main>
+    <title-part language="en" format="text/plain">Rice</title-part>
+  </title>
+  <title>
+    <title-intro language="fr" format="text/plain">Céréales et légumineuses</title-intro>
+    <title-main language="fr" format="text/plain">Spécification et méthodes d'essai</title-main>
+    <title-part language="fr" format="text/plain">Riz</title-part>
+  </title>
+  <docidentifier>
+    <project-number part="1" subpart="3">17301</project-number>
+    <tc-document-number>17301</tc-document-number>
+  </docidentifier>
+  <contributor>
+    <role type="author"/>
+    <organization>
+      <name>ISO</name>
+    </organization>
+  </contributor>
+  <contributor>
+    <role type="publisher"/>
+    <organization>
+      <abbreviation>ISO</abbreviation>
+     </organization>
+  </contributor>
+  <contributor>
+    <role type="publisher"/>
+    <organization>
+      <abbreviation>IEC</abbreviation>
+    </organization>
+  </contributor>
+  <language>en</language>
+  <script>Latn</script>
+  <status>
+    <stage>30</stage>
+    <substage>92</substage>
+  </status>
+  <copyright>
+    <from>2016</from>
+    <owner>
+      <organization>
+        <name>International Organization for Standardization</name>
+      </organization>
+    </owner>
+  </copyright>
+  <relation type="obsoletes">
+    <locality type="clause"><referenceFrom>3.1</referenceFrom></locality>
+    <docidentifier>IEC 8121</docidentifier>
+  </relation>
+  <editorialgroup>
+    <technical-committee number="34" type="ABC">Food products</technical-committee>
+    <subcommittee number="4" type="DEF">Cereals and pulses</subcommittee>
+    <workgroup number="3" type="GHI">Rice Group</workgroup>
+  </editorialgroup>
+</bibdata><version>
+  <edition>2</edition>
+  <revision-date>2016-05-01</revision-date>
+  <draft>12</draft>
+</version>
+</iso-standard>
+INPUT
+       {:accesseddate=>"XXX", :activateddate=>"XXX", :agency=>"ISO/IEC", :createddate=>"XXX", :docnumber=>"CD 17301-1-3", :docsubtitle=>"C&#xe9;r&#xe9;ales et l&#xe9;gumineuses&nbsp;&mdash; Sp&#xe9;cification et m&#xe9;thodes d&#x27;essai&nbsp;&mdash; Partie&nbsp;1&ndash;3: Riz", :doctitle=>"Cereals and pulses&nbsp;&mdash; Specifications and test methods&nbsp;&mdash; Part&nbsp;1&ndash;3: Rice", :docyear=>"2016", :draft=>"12", :draftinfo=>" ( 12, 2016-05-01)", :editorialgroup=>["ABC 34", "DEF 4", "GHI 3"], :obsoleteddate=>"XXX", :obsoletes=>"IEC 8121", :obsoletes_part=>"3.1", :publisheddate=>"XXX", :revdate=>"2016-05-01", :sc=>"DEF 4", :secretariat=>"XXXX", :stage=>"30", :stageabbr=>"CD", :tc=>"ABC 34", :wg=>"GHI 3"}
+OUTPUT
+  end
+end

data/spec/isodoc/postproc_spec.rb ADDED

@@ -0,0 +1,444 @@
+require "spec_helper"
+RSpec.describe IsoDoc do
+  it "generates HTML output docs with null configuration" do
+    system "rm -f test.doc"
+    system "rm -f test.html"
+    IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <foreword>
+    <note>
+  <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
+</note>
+    </foreword>
+    </iso-standard>
+    INPUT
+    expect(File.exist?("test.html")).to be true
+    html = File.read("test.html")
+    expect(html).to match(%r{<title>test</title><style>})
+    expect(html).to match(/another empty stylesheet/)
+    expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/2\.7\.1/MathJax\.js})
+    expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
+  end
+  it "generates Word output docs with null configuration" do
+    system "rm -f test.doc"
+    system "rm -f test.html"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <foreword>
+    <note>
+  <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
+</note>
+    </foreword>
+    </iso-standard>
+    INPUT
+    expect(File.exist?("test.doc")).to be true
+    word = File.read("test.doc")
+    expect(word).to match(/one empty stylesheet/)
+  end
+  it "generates HTML output docs with null configuration from file" do
+    system "rm -f spec/assets/iso.doc"
+    system "rm -f spec/assets/iso.html"
+    IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", false)
+    expect(File.exist?("spec/assets/iso.html")).to be true
+    html = File.read("spec/assets/iso.html")
+    expect(html).to match(/another empty stylesheet/)
+  end
+  it "generates Word output docs with null configuration from file" do
+    system "rm -f spec/assets/iso.doc"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", false)
+    expect(File.exist?("spec/assets/iso.doc")).to be true
+    word = File.read("spec/assets/iso.doc")
+    expect(word).to match(/one empty stylesheet/)
+  end
+  it "generates HTML output docs with complete configuration" do
+    system "rm -f test.doc"
+    system "rm -f test.html"
+    IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <foreword>
+    <note>
+  <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
+</note>
+    </foreword>
+    </iso-standard>
+    INPUT
+    html = File.read("test.html")
+    expect(html).to match(/a third empty stylesheet/)
+    expect(html).to match(/an empty html cover page/)
+    expect(html).to match(/an empty html intro page/)
+    expect(html).to match(%r{Enkonduko</h1>})
+  end
+  it "generates Word output docs with complete configuration" do
+    system "rm -f test.doc"
+    system "rm -f test.html"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <foreword>
+    <note>
+  <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
+</note>
+    </foreword>
+    </iso-standard>
+    INPUT
+    word = File.read("test.doc")
+    expect(word).to match(/a third empty stylesheet/)
+    expect(word).to match(/<title>test<\/title>/)
+    expect(word).to match(/test_files\/header.html/)
+    expect(word).to match(/an empty word cover page/)
+    expect(word).to match(/an empty word intro page/)
+    expect(word).to match(%r{Enkonduko</h1>})
+  end
+  it "converts definition lists to tables for Word" do
+    system "rm -f test.doc"
+    system "rm -f test.html"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
+     <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <foreword>
+    <dl>
+    <dt>Term</dt>
+    <dd>Definition</dd>
+    <dt>Term 2</dt>
+    <dd>Definition 2</dd>
+    </dl>
+    </foreword>
+    </iso-standard>
+    INPUT
+    word = File.read("test.doc").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
+      sub(%r{<br clear="all" class="section"/>\s*<div class="WordSection3">.*$}m, "")
+    expect(word).to be_equivalent_to <<~"OUTPUT"
+        <div class="WordSection2">
+                <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
+                <div>
+                  <h1 class="ForewordTitle">Foreword</h1>
+                  <table class="dl">
+                    <tr>
+                      <td valign="top" align="left">
+                        <p style="text-align: left;" class="MsoNormal">Term</p>
+                      </td>
+                      <td valign="top">Definition</td>
+                    </tr>
+                    <tr>
+                      <td valign="top" align="left">
+                        <p style="text-align: left;" class="MsoNormal">Term 2</p>
+                      </td>
+                      <td valign="top">Definition 2</td>
+                    </tr>
+                  </table>
+                </div>
+                <p class="MsoNormal">&#xA0;</p>
+              </div>
+    OUTPUT
+  end
+  it "converts annex subheadings to h2Annex class for Word" do
+    system "rm -f test.doc"
+    system "rm -f test.html"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
+    <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <annex id="P" inline-header="false" obligation="normative">
+         <title>Annex</title>
+         <subsection id="Q" inline-header="false" obligation="normative">
+         <title>Annex A.1</title>
+    </annex>
+    </iso-standard>
+    INPUT
+    word = File.read("test.doc").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
+      sub(%r{<div style="mso-element:footnote-list"/>.*$}m, "")
+    expect(word).to be_equivalent_to <<~"OUTPUT"
+           <div class="WordSection3">
+               <p class="zzSTDTitle1"></p>
+               <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
+               <div class="Section3"><a name="P" id="P"></a>
+                 <h1 class="Annex"><b>Annex A</b><br/>(normative)<br/><br/><b>Annex</b></h1>
+                 <div><a name="Q" id="Q"></a>
+            <p class="h2Annex">A.1. Annex A.1</p>
+       </div>
+               </div>
+             </div>
+    OUTPUT
+  end
+  it "populates Word template with terms reference labels" do
+    system "rm -f test.doc"
+    system "rm -f test.html"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+    <sections>
+    <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions</title>
+<term id="paddy1"><preferred>paddy</preferred>
+<definition><p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">rice retaining its husk after threshing</p></definition>
+<termsource status="modified">
+  <origin bibitemid="ISO7301" type="inline" citeas="ISO 7301: 2011"><locality type="clause"><referenceFrom>3.1</referenceFrom></locality></origin>
+    <modification>
+    <p id="_e73a417d-ad39-417d-a4c8-20e4e2529489">The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here</p>
+  </modification>
+</termsource></term>
+</terms>
+</sections>
+</iso-standard>
+    INPUT
+    word = File.read("test.doc").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
+      sub(%r{<div style="mso-element:footnote-list"/>.*$}m, "")
+    expect(word).to be_equivalent_to <<~"OUTPUT"
+           <div class="WordSection3">
+               <p class="zzSTDTitle1"></p>
+               <div><a name="_terms_and_definitions" id="_terms_and_definitions"></a><h1>3.<span style="mso-tab-count:1">&#xA0; </span>Terms and Definitions</h1><p class="MsoNormal">For the purposes of this document,
+           the following terms and definitions apply.</p>
+       <p class="MsoNormal">ISO and IEC maintain terminological databases for use in
+       standardization at the following addresses:</p>
+       <ul>
+       <li class="MsoNormal"> <p class="MsoNormal">ISO Online browsing platform: available at
+         <a href="http://www.iso.org/obp">http://www.iso.org/obp</a></p> </li>
+       <li class="MsoNormal"> <p class="MsoNormal">IEC Electropedia: available at
+         <a href="http://www.electropedia.org">http://www.electropedia.org</a>
+       </p> </li> </ul>
+       <p class="TermNum"><a name="paddy1" id="paddy1"></a>3.1</p><p class="Terms">paddy</p>
+       <p class="MsoNormal"><a name="_eb29b35e-123e-4d1c-b50b-2714d41e747f" id="_eb29b35e-123e-4d1c-b50b-2714d41e747f"></a>rice retaining its husk after threshing</p>
+       <p class="MsoNormal">[SOURCE: <a href="#ISO7301">ISO 7301: 2011, 3.1</a>, modified &mdash; The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here]</p></div>
+             </div>
+    OUTPUT
+  end
+  it "populates Word header" do
+    system "rm -f test.doc"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", header: "spec/assets/header.html"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+               <bibdata type="article">
+                        <docidentifier>
+           <project-number part="1">1000</project-number>
+         </docidentifier>
+        </bibdata>
+</iso-standard>
+    INPUT
+    word = File.read("test.doc").sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}m, "Content-Location: file:///C:/Doc/test_files/header.html").
+      sub(/------=_NextPart.*$/m, "")
+    expect(word).to be_equivalent_to <<~"OUTPUT"
+Content-Location: file:///C:/Doc/test_files/header.html
+Content-Transfer-Encoding: base64
+Content-Type: text/html charset="utf-8"
+Ci8qIGFuIGVtcHR5IGhlYWRlciAqLwoKU1RBUlQgRE9DIElEOiAxMDAwLTE6IEVORCBET0MgSUQK
+CkZJTEVOQU1FOiB0ZXN0Cgo=
+    OUTPUT
+  end
+  it "populates Word ToC" do
+    system "rm -f test.doc"
+    IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+        <sections>
+               <clause inline-header="false" obligation="normative"><title>Clause 4</title><subsection id="N" inline-header="false" obligation="normative">
+         <title>Introduction<bookmark id="Q"/> to this<fn reference="1">
+  <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
+</fn></title>
+       </subsection>
+       <subsection id="O" inline-header="false" obligation="normative">
+         <title>Clause 4.2</title>
+         <p>A<fn reference="1">
+  <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
+</fn></p>
+       </subsection></clause>
+        </sections>
+        </iso-standard>
+    INPUT
+    word = File.read("test.doc").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
+      sub(%r{<br clear="all" class="section"/>\s*<div class="WordSection3">.*$}m, "")
+    expect(word.gsub(/_Toc\d\d+/, "_Toc")).to be_equivalent_to <<~'OUTPUT'
+           <div class="WordSection2">
+       /* an empty word intro page */
+       <p class="MsoToc1"><span lang="EN-GB" xml:lang="EN-GB"><span style="mso-element:field-begin"></span><span style="mso-spacerun:yes">&#xA0;</span>TOC
+         \o "1-2" \h \z \u <span style="mso-element:field-separator"></span></span>
+       <span class="MsoHyperlink"><span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
+       <a href="#_Toc">4.<span style="mso-tab-count:1">&#xA0; </span>Clause 4<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
+       <span style="mso-tab-count:1 dotted">. </span>
+       </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
+       <span style="mso-element:field-begin"></span></span>
+       <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
+         <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
+         <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span></span></p>
+       <p class="MsoToc2">
+         <span class="MsoHyperlink">
+           <span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
+       <a href="#_Toc">4.1. Introduction to this<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
+       <span style="mso-tab-count:1 dotted">. </span>
+       </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
+       <span style="mso-element:field-begin"></span></span>
+       <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
+         <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
+         <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span>
+         </span>
+       </p>
+       <p class="MsoToc2">
+         <span class="MsoHyperlink">
+           <span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
+       <a href="#_Toc">4.2. Clause 4.2<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
+       <span style="mso-tab-count:1 dotted">. </span>
+       </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
+       <span style="mso-element:field-begin"></span></span>
+       <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
+         <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
+         <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span>
+         </span>
+       </p>
+       <p class="MsoToc1">
+         <span lang="EN-GB" xml:lang="EN-GB">
+           <span style="mso-element:field-end"></span>
+         </span>
+         <span lang="EN-GB" xml:lang="EN-GB">
+           <p class="MsoNormal">&#xA0;</p>
+         </span>
+       </p>
+               <p class="MsoNormal">&#xA0;</p>
+             </div>
+    OUTPUT
+  end
+  it "reorders footnote numbers in HTML" do
+    system "rm -f test.html"
+    IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+        <sections>
+               <clause inline-header="false" obligation="normative"><title>Clause 4</title><fn reference="3">
+  <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">This is a footnote.</p>
+</fn><subsection id="N" inline-header="false" obligation="normative">
+         <title>Introduction to this<fn reference="2">
+  <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
+</fn></title>
+       </subsection>
+       <subsection id="O" inline-header="false" obligation="normative">
+         <title>Clause 4.2</title>
+         <p>A<fn reference="1">
+  <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
+</fn></p>
+       </subsection></clause>
+        </sections>
+        </iso-standard>
+    INPUT
+    html = File.read("test.html").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
+      sub(%r{<script type="text/x-mathjax-config">.*$}m, "")
+    expect(html).to be_equivalent_to <<~"OUTPUT"
+           <div class="WordSection3">
+               <p class="zzSTDTitle1"></p>
+               <div>
+                 <h1>4.&#xA0; Clause 4</h1>
+                 <a href="#ftn3" epub:type="footnote" id="_footnote1">
+                   <sup>1</sup>
+                 </a>
+                 <div id="N">
+                <h2>4.1. Introduction to this<a href="#ftn2" epub:type="footnote" id="_footnote2"><sup>2</sup></a></h2>
+              </div>
+                 <div id="O">
+                <h2>4.2. Clause 4.2</h2>
+                <p>A<a href="#ftn2" epub:type="footnote"><sup>2</sup></a></p>
+              </div>
+               </div>
+               <aside id="ftn3">
+         <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6"><a href="#_footnote1">1) </a>This is a footnote.</p>
+       </aside>
+               <aside id="ftn2">
+         <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6"><a href="#_footnote2">2) </a>Formerly denoted as 15 % (m/m).</p>
+       </aside>
+             </div>
+    OUTPUT
+  end
+  it "moves images in HTML" do
+    system "rm -f test.html"
+    system "rm -rf _images"
+    IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+        <foreword>
+         <figure id="_">
+         <name>Split-it-right sample divider</name>
+                  <image src="spec/assets/rice_image1.png" id="_" imagetype="PNG"/>
+       </figure>
+       </foreword>
+        </iso-standard>
+    INPUT
+    html = File.read("test.html").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
+      sub(%r{<div class="WordSection3">.*$}m, "")
+    expect(`ls _images`).to match(/\.png$/)
+    expect(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png")).to be_equivalent_to <<~"OUTPUT"
+        <div class="WordSection2">
+        <br />
+        <div>
+          <h1 class="ForewordTitle">Foreword</h1>
+          <div id="_" class="figure">
+           <img src="_images/_.png" width="800" height="673" />
+<p class="FigureTitle" align="center"><b>Figure 1&#xA0;&#x2014; Split-it-right sample divider</b></p></div>
+        </div>
+        <p>&#xA0;</p>
+      </div>
+      <br />
+    OUTPUT
+  end
+  it "populates HTML ToC" do
+    system "rm -f test.html"
+    IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", htmlintropage: "spec/assets/htmlintro.html"}).convert_file(<<~"INPUT", "test", false)
+        <iso-standard xmlns="http://riboseinc.com/isoxml">
+        <sections>
+               <clause inline-header="false" obligation="normative"><title>Clause 4</title><subsection id="N" inline-header="false" obligation="normative">
+         <title>Introduction<bookmark id="Q"/> to this<fn reference="1">
+  <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
+</fn></title>
+       </subsection>
+       <subsection id="O" inline-header="false" obligation="normative">
+         <title>Clause 4.2</title>
+         <p>A<fn reference="1">
+  <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
+</fn></p>
+       </subsection></clause>
+               <clause inline-header="false" obligation="normative"><title>Clause 5</title></clause>
+        </sections>
+        </iso-standard>
+    INPUT
+    html = File.read("test.html").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
+      sub(%r{<div class="WordSection3">.*$}m, "")
+    expect(html.gsub(/"#[a-f0-9-]+"/, "#_")).to be_equivalent_to <<~"OUTPUT"
+       <div class="WordSection2">
+       <p>/* an empty html intro page */
+       </p>
+       <ul><li><a href=#_>5.&#xA0; Clause 4</a></li><ul><li><a href=#_>4.1. Introduction to this</a></li><li><a href=#_>4.2. Clause 4.2</a></li></ul><li><a href=#_>5.&#xA0; Clause 5</a></li></ul>
+               <p>&#xA0;</p>
+             </div>
+             <br />
+    OUTPUT
+  end
+end