isodoc 0.5.5 → 0.5.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +46 -0
  3. data/LICENSE +25 -0
  4. data/README.adoc +1 -1
  5. data/Rakefile +6 -0
  6. data/isodoc.gemspec +1 -0
  7. data/lib/isodoc.rb +4 -95
  8. data/lib/isodoc/cleanup.rb +14 -10
  9. data/lib/isodoc/{notes.rb → comments.rb} +0 -73
  10. data/lib/isodoc/convert.rb +97 -0
  11. data/lib/isodoc/footnotes.rb +74 -0
  12. data/lib/isodoc/html.rb +41 -4
  13. data/lib/isodoc/i18n-en.yaml +1 -0
  14. data/lib/isodoc/i18n-fr.yaml +1 -0
  15. data/lib/isodoc/i18n-zh-Hans.yaml +1 -0
  16. data/lib/isodoc/i18n.rb +1 -0
  17. data/lib/isodoc/inline.rb +4 -12
  18. data/lib/isodoc/iso2wordhtml.rb +26 -13
  19. data/lib/isodoc/metadata.rb +23 -10
  20. data/lib/isodoc/references.rb +20 -22
  21. data/lib/isodoc/section.rb +4 -3
  22. data/lib/isodoc/table.rb +0 -2
  23. data/lib/isodoc/terms.rb +2 -13
  24. data/lib/isodoc/utils.rb +24 -3
  25. data/lib/isodoc/version.rb +1 -1
  26. data/lib/isodoc/wordconvert/comments.rb +155 -0
  27. data/lib/isodoc/wordconvert/convert.rb +31 -0
  28. data/lib/isodoc/wordconvert/footnotes.rb +80 -0
  29. data/lib/isodoc/wordconvert/wordconvertmodule.rb +212 -0
  30. data/lib/isodoc/xref_gen.rb +50 -79
  31. data/lib/isodoc/xref_sect_gen.rb +82 -0
  32. data/spec/assets/header.html +7 -0
  33. data/spec/assets/html.css +2 -0
  34. data/spec/assets/htmlcover.html +4 -0
  35. data/spec/assets/htmlintro.html +5 -0
  36. data/spec/assets/i18n.yaml +2 -0
  37. data/spec/assets/iso.xml +8 -0
  38. data/spec/assets/rice_image1.png +0 -0
  39. data/spec/assets/std.css +2 -0
  40. data/spec/assets/word.css +2 -0
  41. data/spec/assets/wordcover.html +3 -0
  42. data/spec/assets/wordintro.html +4 -0
  43. data/spec/isodoc/blocks_spec.rb +130 -47
  44. data/spec/isodoc/cleanup_spec.rb +693 -0
  45. data/spec/isodoc/footnotes_spec.rb +282 -0
  46. data/spec/isodoc/i18n_spec.rb +662 -0
  47. data/spec/isodoc/inline_spec.rb +344 -0
  48. data/spec/isodoc/lists_spec.rb +81 -18
  49. data/spec/isodoc/metadata_spec.rb +141 -0
  50. data/spec/isodoc/postproc_spec.rb +444 -0
  51. data/spec/isodoc/ref_spec.rb +158 -0
  52. data/spec/isodoc/section_spec.rb +275 -112
  53. data/spec/isodoc/table_spec.rb +146 -8
  54. data/spec/isodoc/terms_spec.rb +118 -0
  55. data/spec/isodoc/xref_spec.rb +490 -114
  56. metadata +46 -4
  57. data/lib/isodoc/postprocessing.rb +0 -176
@@ -0,0 +1,141 @@
1
+ require "spec_helper"
2
+ require "nokogiri"
3
+
4
+ RSpec.describe IsoDoc do
5
+ it "processes IsoXML metadata" do
6
+ expect(Hash[IsoDoc::Convert.new({}).info(Nokogiri::XML(<<~"INPUT"), nil).sort]).to be_equivalent_to <<~"OUTPUT"
7
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
8
+ <bibdata type="international-standard">
9
+ <title>
10
+ <title-intro language="en" format="text/plain">Cereals and pulses</title-intro>
11
+ <title-main language="en" format="text/plain">Specifications and test methods</title-main>
12
+ <title-part language="en" format="text/plain">Rice</title-part>
13
+ </title>
14
+ <title>
15
+ <title-intro language="fr" format="text/plain">Céréales et légumineuses</title-intro>
16
+ <title-main language="fr" format="text/plain">Spécification et méthodes d'essai</title-main>
17
+ <title-part language="fr" format="text/plain">Riz</title-part>
18
+ </title>
19
+ <docidentifier>
20
+ <project-number part="1">17301</project-number>
21
+ <tc-document-number>17301</tc-document-number>
22
+ </docidentifier>
23
+ <date type="published"><from>2011</from></date>
24
+ <date type="accessed"><from>2012</from></date>
25
+ <date type="created"><from>2010</from><to>2011</to></date>
26
+ <date type="activated"><from>2013</from></date>
27
+ <date type="obsoleted"><from>2014</from></date>
28
+ <contributor>
29
+ <role type="author"/>
30
+ <organization>
31
+ <abbreviation>ISO</abbreviation>
32
+ </organization>
33
+ </contributor>
34
+ <contributor>
35
+ <role type="publisher"/>
36
+ <organization>
37
+ <abbreviation>ISO</abbreviation>
38
+ </organization>
39
+ </contributor>
40
+ <language>en</language>
41
+ <script>Latn</script>
42
+ <status>
43
+ <stage>30</stage>
44
+ <substage>92</substage>
45
+ </status>
46
+ <copyright>
47
+ <from>2016</from>
48
+ <owner>
49
+ <organization>
50
+ <abbreviation>ISO</abbreviation>
51
+ </organization>
52
+ </owner>
53
+ </copyright>
54
+ <editorialgroup>
55
+ <technical-committee number="34">Food products</technical-committee>
56
+ <subcommittee number="4">Cereals and pulses</subcommittee>
57
+ <workgroup number="3">Rice Group</workgroup>
58
+ <secretariat>GB</secretariat>
59
+ </editorialgroup>
60
+ </bibdata><version>
61
+ <edition>2</edition>
62
+ <revision-date>2016-05-01</revision-date>
63
+
64
+ </version>
65
+ </iso-standard>
66
+ INPUT
67
+ {:accesseddate=>"2012", :activateddate=>"2013", :agency=>"ISO", :createddate=>"2010&ndash;2011", :docnumber=>"CD 17301-1", :docsubtitle=>"C&#xe9;r&#xe9;ales et l&#xe9;gumineuses&nbsp;&mdash; Sp&#xe9;cification et m&#xe9;thodes d&#x27;essai&nbsp;&mdash; Partie&nbsp;1: Riz", :doctitle=>"Cereals and pulses&nbsp;&mdash; Specifications and test methods&nbsp;&mdash; Part&nbsp;1: Rice", :docyear=>"2016", :draft=>nil, :draftinfo=>"", :editorialgroup=>["TC 34", "SC 4", "WG 3"], :obsoleteddate=>"2014", :obsoletes=>nil, :obsoletes_part=>nil, :publisheddate=>"2011", :revdate=>"2016-05-01", :sc=>"SC 4", :secretariat=>"GB", :stage=>"30", :stageabbr=>"CD", :tc=>"TC 34", :wg=>"WG 3"}
68
+ OUTPUT
69
+ end
70
+
71
+ it "processes IsoXML metadata" do
72
+ expect(Hash[IsoDoc::Convert.new({}).info(Nokogiri::XML(<<~"INPUT"), nil).sort]).to be_equivalent_to <<~"OUTPUT"
73
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
74
+ <bibdata type="international-standard">
75
+ <title>
76
+ <title-intro language="en" format="text/plain">Cereals and pulses</title-intro>
77
+ <title-main language="en" format="text/plain">Specifications and test methods</title-main>
78
+ <title-part language="en" format="text/plain">Rice</title-part>
79
+ </title>
80
+ <title>
81
+ <title-intro language="fr" format="text/plain">Céréales et légumineuses</title-intro>
82
+ <title-main language="fr" format="text/plain">Spécification et méthodes d'essai</title-main>
83
+ <title-part language="fr" format="text/plain">Riz</title-part>
84
+ </title>
85
+ <docidentifier>
86
+ <project-number part="1" subpart="3">17301</project-number>
87
+ <tc-document-number>17301</tc-document-number>
88
+ </docidentifier>
89
+ <contributor>
90
+ <role type="author"/>
91
+ <organization>
92
+ <name>ISO</name>
93
+ </organization>
94
+ </contributor>
95
+ <contributor>
96
+ <role type="publisher"/>
97
+ <organization>
98
+ <abbreviation>ISO</abbreviation>
99
+ </organization>
100
+ </contributor>
101
+ <contributor>
102
+ <role type="publisher"/>
103
+ <organization>
104
+ <abbreviation>IEC</abbreviation>
105
+ </organization>
106
+ </contributor>
107
+ <language>en</language>
108
+ <script>Latn</script>
109
+ <status>
110
+ <stage>30</stage>
111
+ <substage>92</substage>
112
+ </status>
113
+ <copyright>
114
+ <from>2016</from>
115
+ <owner>
116
+ <organization>
117
+ <name>International Organization for Standardization</name>
118
+ </organization>
119
+ </owner>
120
+ </copyright>
121
+ <relation type="obsoletes">
122
+ <locality type="clause"><referenceFrom>3.1</referenceFrom></locality>
123
+ <docidentifier>IEC 8121</docidentifier>
124
+ </relation>
125
+ <editorialgroup>
126
+ <technical-committee number="34" type="ABC">Food products</technical-committee>
127
+ <subcommittee number="4" type="DEF">Cereals and pulses</subcommittee>
128
+ <workgroup number="3" type="GHI">Rice Group</workgroup>
129
+ </editorialgroup>
130
+ </bibdata><version>
131
+ <edition>2</edition>
132
+ <revision-date>2016-05-01</revision-date>
133
+ <draft>12</draft>
134
+ </version>
135
+ </iso-standard>
136
+ INPUT
137
+ {:accesseddate=>"XXX", :activateddate=>"XXX", :agency=>"ISO/IEC", :createddate=>"XXX", :docnumber=>"CD 17301-1-3", :docsubtitle=>"C&#xe9;r&#xe9;ales et l&#xe9;gumineuses&nbsp;&mdash; Sp&#xe9;cification et m&#xe9;thodes d&#x27;essai&nbsp;&mdash; Partie&nbsp;1&ndash;3: Riz", :doctitle=>"Cereals and pulses&nbsp;&mdash; Specifications and test methods&nbsp;&mdash; Part&nbsp;1&ndash;3: Rice", :docyear=>"2016", :draft=>"12", :draftinfo=>" ( 12, 2016-05-01)", :editorialgroup=>["ABC 34", "DEF 4", "GHI 3"], :obsoleteddate=>"XXX", :obsoletes=>"IEC 8121", :obsoletes_part=>"3.1", :publisheddate=>"XXX", :revdate=>"2016-05-01", :sc=>"DEF 4", :secretariat=>"XXXX", :stage=>"30", :stageabbr=>"CD", :tc=>"ABC 34", :wg=>"GHI 3"}
138
+ OUTPUT
139
+ end
140
+
141
+ end
@@ -0,0 +1,444 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe IsoDoc do
4
+ it "generates HTML output docs with null configuration" do
5
+ system "rm -f test.doc"
6
+ system "rm -f test.html"
7
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
8
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
9
+ <foreword>
10
+ <note>
11
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
12
+ </note>
13
+ </foreword>
14
+ </iso-standard>
15
+ INPUT
16
+ expect(File.exist?("test.html")).to be true
17
+ html = File.read("test.html")
18
+ expect(html).to match(%r{<title>test</title><style>})
19
+ expect(html).to match(/another empty stylesheet/)
20
+ expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/2\.7\.1/MathJax\.js})
21
+ expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
22
+ end
23
+
24
+ it "generates Word output docs with null configuration" do
25
+ system "rm -f test.doc"
26
+ system "rm -f test.html"
27
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
28
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
29
+ <foreword>
30
+ <note>
31
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
32
+ </note>
33
+ </foreword>
34
+ </iso-standard>
35
+ INPUT
36
+ expect(File.exist?("test.doc")).to be true
37
+ word = File.read("test.doc")
38
+ expect(word).to match(/one empty stylesheet/)
39
+ end
40
+
41
+ it "generates HTML output docs with null configuration from file" do
42
+ system "rm -f spec/assets/iso.doc"
43
+ system "rm -f spec/assets/iso.html"
44
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", false)
45
+ expect(File.exist?("spec/assets/iso.html")).to be true
46
+ html = File.read("spec/assets/iso.html")
47
+ expect(html).to match(/another empty stylesheet/)
48
+ end
49
+
50
+ it "generates Word output docs with null configuration from file" do
51
+ system "rm -f spec/assets/iso.doc"
52
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", false)
53
+ expect(File.exist?("spec/assets/iso.doc")).to be true
54
+ word = File.read("spec/assets/iso.doc")
55
+ expect(word).to match(/one empty stylesheet/)
56
+ end
57
+
58
+ it "generates HTML output docs with complete configuration" do
59
+ system "rm -f test.doc"
60
+ system "rm -f test.html"
61
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert_file(<<~"INPUT", "test", false)
62
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
63
+ <foreword>
64
+ <note>
65
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
66
+ </note>
67
+ </foreword>
68
+ </iso-standard>
69
+ INPUT
70
+ html = File.read("test.html")
71
+ expect(html).to match(/a third empty stylesheet/)
72
+ expect(html).to match(/an empty html cover page/)
73
+ expect(html).to match(/an empty html intro page/)
74
+ expect(html).to match(%r{Enkonduko</h1>})
75
+ end
76
+
77
+ it "generates Word output docs with complete configuration" do
78
+ system "rm -f test.doc"
79
+ system "rm -f test.html"
80
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert_file(<<~"INPUT", "test", false)
81
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
82
+ <foreword>
83
+ <note>
84
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
85
+ </note>
86
+ </foreword>
87
+ </iso-standard>
88
+ INPUT
89
+ word = File.read("test.doc")
90
+ expect(word).to match(/a third empty stylesheet/)
91
+ expect(word).to match(/<title>test<\/title>/)
92
+ expect(word).to match(/test_files\/header.html/)
93
+ expect(word).to match(/an empty word cover page/)
94
+ expect(word).to match(/an empty word intro page/)
95
+ expect(word).to match(%r{Enkonduko</h1>})
96
+ end
97
+
98
+ it "converts definition lists to tables for Word" do
99
+ system "rm -f test.doc"
100
+ system "rm -f test.html"
101
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
102
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
103
+ <foreword>
104
+ <dl>
105
+ <dt>Term</dt>
106
+ <dd>Definition</dd>
107
+ <dt>Term 2</dt>
108
+ <dd>Definition 2</dd>
109
+ </dl>
110
+ </foreword>
111
+ </iso-standard>
112
+ INPUT
113
+ word = File.read("test.doc").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
114
+ sub(%r{<br clear="all" class="section"/>\s*<div class="WordSection3">.*$}m, "")
115
+ expect(word).to be_equivalent_to <<~"OUTPUT"
116
+ <div class="WordSection2">
117
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
118
+ <div>
119
+ <h1 class="ForewordTitle">Foreword</h1>
120
+ <table class="dl">
121
+ <tr>
122
+ <td valign="top" align="left">
123
+ <p style="text-align: left;" class="MsoNormal">Term</p>
124
+ </td>
125
+ <td valign="top">Definition</td>
126
+ </tr>
127
+ <tr>
128
+ <td valign="top" align="left">
129
+ <p style="text-align: left;" class="MsoNormal">Term 2</p>
130
+ </td>
131
+ <td valign="top">Definition 2</td>
132
+ </tr>
133
+ </table>
134
+ </div>
135
+ <p class="MsoNormal">&#xA0;</p>
136
+ </div>
137
+ OUTPUT
138
+ end
139
+
140
+ it "converts annex subheadings to h2Annex class for Word" do
141
+ system "rm -f test.doc"
142
+ system "rm -f test.html"
143
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
144
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
145
+ <annex id="P" inline-header="false" obligation="normative">
146
+ <title>Annex</title>
147
+ <subsection id="Q" inline-header="false" obligation="normative">
148
+ <title>Annex A.1</title>
149
+ </annex>
150
+ </iso-standard>
151
+ INPUT
152
+ word = File.read("test.doc").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
153
+ sub(%r{<div style="mso-element:footnote-list"/>.*$}m, "")
154
+ expect(word).to be_equivalent_to <<~"OUTPUT"
155
+ <div class="WordSection3">
156
+ <p class="zzSTDTitle1"></p>
157
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
158
+ <div class="Section3"><a name="P" id="P"></a>
159
+ <h1 class="Annex"><b>Annex A</b><br/>(normative)<br/><br/><b>Annex</b></h1>
160
+ <div><a name="Q" id="Q"></a>
161
+ <p class="h2Annex">A.1. Annex A.1</p>
162
+ </div>
163
+ </div>
164
+ </div>
165
+ OUTPUT
166
+ end
167
+
168
+ it "populates Word template with terms reference labels" do
169
+ system "rm -f test.doc"
170
+ system "rm -f test.html"
171
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
172
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
173
+ <sections>
174
+ <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions</title>
175
+
176
+ <term id="paddy1"><preferred>paddy</preferred>
177
+ <definition><p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">rice retaining its husk after threshing</p></definition>
178
+ <termsource status="modified">
179
+ <origin bibitemid="ISO7301" type="inline" citeas="ISO 7301: 2011"><locality type="clause"><referenceFrom>3.1</referenceFrom></locality></origin>
180
+ <modification>
181
+ <p id="_e73a417d-ad39-417d-a4c8-20e4e2529489">The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here</p>
182
+ </modification>
183
+ </termsource></term>
184
+
185
+ </terms>
186
+ </sections>
187
+ </iso-standard>
188
+
189
+ INPUT
190
+ word = File.read("test.doc").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
191
+ sub(%r{<div style="mso-element:footnote-list"/>.*$}m, "")
192
+ expect(word).to be_equivalent_to <<~"OUTPUT"
193
+ <div class="WordSection3">
194
+ <p class="zzSTDTitle1"></p>
195
+ <div><a name="_terms_and_definitions" id="_terms_and_definitions"></a><h1>3.<span style="mso-tab-count:1">&#xA0; </span>Terms and Definitions</h1><p class="MsoNormal">For the purposes of this document,
196
+ the following terms and definitions apply.</p>
197
+ <p class="MsoNormal">ISO and IEC maintain terminological databases for use in
198
+ standardization at the following addresses:</p>
199
+
200
+ <ul>
201
+ <li class="MsoNormal"> <p class="MsoNormal">ISO Online browsing platform: available at
202
+ <a href="http://www.iso.org/obp">http://www.iso.org/obp</a></p> </li>
203
+ <li class="MsoNormal"> <p class="MsoNormal">IEC Electropedia: available at
204
+ <a href="http://www.electropedia.org">http://www.electropedia.org</a>
205
+ </p> </li> </ul>
206
+ <p class="TermNum"><a name="paddy1" id="paddy1"></a>3.1</p><p class="Terms">paddy</p>
207
+ <p class="MsoNormal"><a name="_eb29b35e-123e-4d1c-b50b-2714d41e747f" id="_eb29b35e-123e-4d1c-b50b-2714d41e747f"></a>rice retaining its husk after threshing</p>
208
+ <p class="MsoNormal">[SOURCE: <a href="#ISO7301">ISO 7301: 2011, 3.1</a>, modified &mdash; The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here]</p></div>
209
+ </div>
210
+ OUTPUT
211
+ end
212
+
213
+ it "populates Word header" do
214
+ system "rm -f test.doc"
215
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", header: "spec/assets/header.html"}).convert_file(<<~"INPUT", "test", false)
216
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
217
+ <bibdata type="article">
218
+ <docidentifier>
219
+ <project-number part="1">1000</project-number>
220
+ </docidentifier>
221
+ </bibdata>
222
+ </iso-standard>
223
+
224
+ INPUT
225
+ word = File.read("test.doc").sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}m, "Content-Location: file:///C:/Doc/test_files/header.html").
226
+ sub(/------=_NextPart.*$/m, "")
227
+ expect(word).to be_equivalent_to <<~"OUTPUT"
228
+
229
+ Content-Location: file:///C:/Doc/test_files/header.html
230
+ Content-Transfer-Encoding: base64
231
+ Content-Type: text/html charset="utf-8"
232
+
233
+ Ci8qIGFuIGVtcHR5IGhlYWRlciAqLwoKU1RBUlQgRE9DIElEOiAxMDAwLTE6IEVORCBET0MgSUQK
234
+ CkZJTEVOQU1FOiB0ZXN0Cgo=
235
+
236
+ OUTPUT
237
+ end
238
+
239
+ it "populates Word ToC" do
240
+ system "rm -f test.doc"
241
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert_file(<<~"INPUT", "test", false)
242
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
243
+ <sections>
244
+ <clause inline-header="false" obligation="normative"><title>Clause 4</title><subsection id="N" inline-header="false" obligation="normative">
245
+
246
+ <title>Introduction<bookmark id="Q"/> to this<fn reference="1">
247
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
248
+ </fn></title>
249
+ </subsection>
250
+ <subsection id="O" inline-header="false" obligation="normative">
251
+ <title>Clause 4.2</title>
252
+ <p>A<fn reference="1">
253
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
254
+ </fn></p>
255
+ </subsection></clause>
256
+ </sections>
257
+ </iso-standard>
258
+
259
+ INPUT
260
+ word = File.read("test.doc").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
261
+ sub(%r{<br clear="all" class="section"/>\s*<div class="WordSection3">.*$}m, "")
262
+ expect(word.gsub(/_Toc\d\d+/, "_Toc")).to be_equivalent_to <<~'OUTPUT'
263
+ <div class="WordSection2">
264
+ /* an empty word intro page */
265
+
266
+ <p class="MsoToc1"><span lang="EN-GB" xml:lang="EN-GB"><span style="mso-element:field-begin"></span><span style="mso-spacerun:yes">&#xA0;</span>TOC
267
+ \o "1-2" \h \z \u <span style="mso-element:field-separator"></span></span>
268
+ <span class="MsoHyperlink"><span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
269
+ <a href="#_Toc">4.<span style="mso-tab-count:1">&#xA0; </span>Clause 4<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
270
+ <span style="mso-tab-count:1 dotted">. </span>
271
+ </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
272
+ <span style="mso-element:field-begin"></span></span>
273
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
274
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
275
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span></span></p>
276
+
277
+ <p class="MsoToc2">
278
+ <span class="MsoHyperlink">
279
+ <span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
280
+ <a href="#_Toc">4.1. Introduction to this<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
281
+ <span style="mso-tab-count:1 dotted">. </span>
282
+ </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
283
+ <span style="mso-element:field-begin"></span></span>
284
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
285
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
286
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span>
287
+ </span>
288
+ </p>
289
+
290
+ <p class="MsoToc2">
291
+ <span class="MsoHyperlink">
292
+ <span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
293
+ <a href="#_Toc">4.2. Clause 4.2<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
294
+ <span style="mso-tab-count:1 dotted">. </span>
295
+ </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
296
+ <span style="mso-element:field-begin"></span></span>
297
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
298
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
299
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span>
300
+ </span>
301
+ </p>
302
+
303
+ <p class="MsoToc1">
304
+ <span lang="EN-GB" xml:lang="EN-GB">
305
+ <span style="mso-element:field-end"></span>
306
+ </span>
307
+ <span lang="EN-GB" xml:lang="EN-GB">
308
+ <p class="MsoNormal">&#xA0;</p>
309
+ </span>
310
+ </p>
311
+
312
+
313
+ <p class="MsoNormal">&#xA0;</p>
314
+ </div>
315
+ OUTPUT
316
+ end
317
+
318
+ it "reorders footnote numbers in HTML" do
319
+ system "rm -f test.html"
320
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert_file(<<~"INPUT", "test", false)
321
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
322
+ <sections>
323
+ <clause inline-header="false" obligation="normative"><title>Clause 4</title><fn reference="3">
324
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">This is a footnote.</p>
325
+ </fn><subsection id="N" inline-header="false" obligation="normative">
326
+
327
+ <title>Introduction to this<fn reference="2">
328
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
329
+ </fn></title>
330
+ </subsection>
331
+ <subsection id="O" inline-header="false" obligation="normative">
332
+ <title>Clause 4.2</title>
333
+ <p>A<fn reference="1">
334
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
335
+ </fn></p>
336
+ </subsection></clause>
337
+ </sections>
338
+ </iso-standard>
339
+ INPUT
340
+ html = File.read("test.html").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
341
+ sub(%r{<script type="text/x-mathjax-config">.*$}m, "")
342
+ expect(html).to be_equivalent_to <<~"OUTPUT"
343
+ <div class="WordSection3">
344
+ <p class="zzSTDTitle1"></p>
345
+ <div>
346
+ <h1>4.&#xA0; Clause 4</h1>
347
+ <a href="#ftn3" epub:type="footnote" id="_footnote1">
348
+ <sup>1</sup>
349
+ </a>
350
+ <div id="N">
351
+
352
+ <h2>4.1. Introduction to this<a href="#ftn2" epub:type="footnote" id="_footnote2"><sup>2</sup></a></h2>
353
+ </div>
354
+ <div id="O">
355
+ <h2>4.2. Clause 4.2</h2>
356
+ <p>A<a href="#ftn2" epub:type="footnote"><sup>2</sup></a></p>
357
+ </div>
358
+ </div>
359
+ <aside id="ftn3">
360
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6"><a href="#_footnote1">1) </a>This is a footnote.</p>
361
+ </aside>
362
+ <aside id="ftn2">
363
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6"><a href="#_footnote2">2) </a>Formerly denoted as 15 % (m/m).</p>
364
+ </aside>
365
+
366
+ </div>
367
+ OUTPUT
368
+ end
369
+
370
+ it "moves images in HTML" do
371
+ system "rm -f test.html"
372
+ system "rm -rf _images"
373
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
374
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
375
+ <foreword>
376
+ <figure id="_">
377
+ <name>Split-it-right sample divider</name>
378
+ <image src="spec/assets/rice_image1.png" id="_" imagetype="PNG"/>
379
+ </figure>
380
+ </foreword>
381
+ </iso-standard>
382
+ INPUT
383
+ html = File.read("test.html").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
384
+ sub(%r{<div class="WordSection3">.*$}m, "")
385
+ expect(`ls _images`).to match(/\.png$/)
386
+ expect(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png")).to be_equivalent_to <<~"OUTPUT"
387
+ <div class="WordSection2">
388
+ <br />
389
+ <div>
390
+ <h1 class="ForewordTitle">Foreword</h1>
391
+ <div id="_" class="figure">
392
+
393
+ <img src="_images/_.png" width="800" height="673" />
394
+ <p class="FigureTitle" align="center"><b>Figure 1&#xA0;&#x2014; Split-it-right sample divider</b></p></div>
395
+ </div>
396
+ <p>&#xA0;</p>
397
+ </div>
398
+ <br />
399
+ OUTPUT
400
+
401
+ end
402
+
403
+ it "populates HTML ToC" do
404
+ system "rm -f test.html"
405
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", htmlintropage: "spec/assets/htmlintro.html"}).convert_file(<<~"INPUT", "test", false)
406
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
407
+ <sections>
408
+ <clause inline-header="false" obligation="normative"><title>Clause 4</title><subsection id="N" inline-header="false" obligation="normative">
409
+
410
+ <title>Introduction<bookmark id="Q"/> to this<fn reference="1">
411
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
412
+ </fn></title>
413
+ </subsection>
414
+ <subsection id="O" inline-header="false" obligation="normative">
415
+ <title>Clause 4.2</title>
416
+ <p>A<fn reference="1">
417
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
418
+ </fn></p>
419
+ </subsection></clause>
420
+ <clause inline-header="false" obligation="normative"><title>Clause 5</title></clause>
421
+ </sections>
422
+ </iso-standard>
423
+
424
+ INPUT
425
+ html = File.read("test.html").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
426
+ sub(%r{<div class="WordSection3">.*$}m, "")
427
+ expect(html.gsub(/"#[a-f0-9-]+"/, "#_")).to be_equivalent_to <<~"OUTPUT"
428
+ <div class="WordSection2">
429
+
430
+ <p>/* an empty html intro page */
431
+
432
+ </p>
433
+ <ul><li><a href=#_>5.&#xA0; Clause 4</a></li><ul><li><a href=#_>4.1. Introduction to this</a></li><li><a href=#_>4.2. Clause 4.2</a></li></ul><li><a href=#_>5.&#xA0; Clause 5</a></li></ul>
434
+
435
+
436
+
437
+ <p>&#xA0;</p>
438
+ </div>
439
+ <br />
440
+
441
+ OUTPUT
442
+ end
443
+
444
+ end