isodoc 0.5.5 → 0.5.7

Sign up to get free protection for your applications and to get access to all the features.
Files changed (57) hide show
  1. checksums.yaml +4 -4
  2. data/CODE_OF_CONDUCT.md +46 -0
  3. data/LICENSE +25 -0
  4. data/README.adoc +1 -1
  5. data/Rakefile +6 -0
  6. data/isodoc.gemspec +1 -0
  7. data/lib/isodoc.rb +4 -95
  8. data/lib/isodoc/cleanup.rb +14 -10
  9. data/lib/isodoc/{notes.rb → comments.rb} +0 -73
  10. data/lib/isodoc/convert.rb +97 -0
  11. data/lib/isodoc/footnotes.rb +74 -0
  12. data/lib/isodoc/html.rb +41 -4
  13. data/lib/isodoc/i18n-en.yaml +1 -0
  14. data/lib/isodoc/i18n-fr.yaml +1 -0
  15. data/lib/isodoc/i18n-zh-Hans.yaml +1 -0
  16. data/lib/isodoc/i18n.rb +1 -0
  17. data/lib/isodoc/inline.rb +4 -12
  18. data/lib/isodoc/iso2wordhtml.rb +26 -13
  19. data/lib/isodoc/metadata.rb +23 -10
  20. data/lib/isodoc/references.rb +20 -22
  21. data/lib/isodoc/section.rb +4 -3
  22. data/lib/isodoc/table.rb +0 -2
  23. data/lib/isodoc/terms.rb +2 -13
  24. data/lib/isodoc/utils.rb +24 -3
  25. data/lib/isodoc/version.rb +1 -1
  26. data/lib/isodoc/wordconvert/comments.rb +155 -0
  27. data/lib/isodoc/wordconvert/convert.rb +31 -0
  28. data/lib/isodoc/wordconvert/footnotes.rb +80 -0
  29. data/lib/isodoc/wordconvert/wordconvertmodule.rb +212 -0
  30. data/lib/isodoc/xref_gen.rb +50 -79
  31. data/lib/isodoc/xref_sect_gen.rb +82 -0
  32. data/spec/assets/header.html +7 -0
  33. data/spec/assets/html.css +2 -0
  34. data/spec/assets/htmlcover.html +4 -0
  35. data/spec/assets/htmlintro.html +5 -0
  36. data/spec/assets/i18n.yaml +2 -0
  37. data/spec/assets/iso.xml +8 -0
  38. data/spec/assets/rice_image1.png +0 -0
  39. data/spec/assets/std.css +2 -0
  40. data/spec/assets/word.css +2 -0
  41. data/spec/assets/wordcover.html +3 -0
  42. data/spec/assets/wordintro.html +4 -0
  43. data/spec/isodoc/blocks_spec.rb +130 -47
  44. data/spec/isodoc/cleanup_spec.rb +693 -0
  45. data/spec/isodoc/footnotes_spec.rb +282 -0
  46. data/spec/isodoc/i18n_spec.rb +662 -0
  47. data/spec/isodoc/inline_spec.rb +344 -0
  48. data/spec/isodoc/lists_spec.rb +81 -18
  49. data/spec/isodoc/metadata_spec.rb +141 -0
  50. data/spec/isodoc/postproc_spec.rb +444 -0
  51. data/spec/isodoc/ref_spec.rb +158 -0
  52. data/spec/isodoc/section_spec.rb +275 -112
  53. data/spec/isodoc/table_spec.rb +146 -8
  54. data/spec/isodoc/terms_spec.rb +118 -0
  55. data/spec/isodoc/xref_spec.rb +490 -114
  56. metadata +46 -4
  57. data/lib/isodoc/postprocessing.rb +0 -176
@@ -0,0 +1,141 @@
1
+ require "spec_helper"
2
+ require "nokogiri"
3
+
4
+ RSpec.describe IsoDoc do
5
+ it "processes IsoXML metadata" do
6
+ expect(Hash[IsoDoc::Convert.new({}).info(Nokogiri::XML(<<~"INPUT"), nil).sort]).to be_equivalent_to <<~"OUTPUT"
7
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
8
+ <bibdata type="international-standard">
9
+ <title>
10
+ <title-intro language="en" format="text/plain">Cereals and pulses</title-intro>
11
+ <title-main language="en" format="text/plain">Specifications and test methods</title-main>
12
+ <title-part language="en" format="text/plain">Rice</title-part>
13
+ </title>
14
+ <title>
15
+ <title-intro language="fr" format="text/plain">Céréales et légumineuses</title-intro>
16
+ <title-main language="fr" format="text/plain">Spécification et méthodes d'essai</title-main>
17
+ <title-part language="fr" format="text/plain">Riz</title-part>
18
+ </title>
19
+ <docidentifier>
20
+ <project-number part="1">17301</project-number>
21
+ <tc-document-number>17301</tc-document-number>
22
+ </docidentifier>
23
+ <date type="published"><from>2011</from></date>
24
+ <date type="accessed"><from>2012</from></date>
25
+ <date type="created"><from>2010</from><to>2011</to></date>
26
+ <date type="activated"><from>2013</from></date>
27
+ <date type="obsoleted"><from>2014</from></date>
28
+ <contributor>
29
+ <role type="author"/>
30
+ <organization>
31
+ <abbreviation>ISO</abbreviation>
32
+ </organization>
33
+ </contributor>
34
+ <contributor>
35
+ <role type="publisher"/>
36
+ <organization>
37
+ <abbreviation>ISO</abbreviation>
38
+ </organization>
39
+ </contributor>
40
+ <language>en</language>
41
+ <script>Latn</script>
42
+ <status>
43
+ <stage>30</stage>
44
+ <substage>92</substage>
45
+ </status>
46
+ <copyright>
47
+ <from>2016</from>
48
+ <owner>
49
+ <organization>
50
+ <abbreviation>ISO</abbreviation>
51
+ </organization>
52
+ </owner>
53
+ </copyright>
54
+ <editorialgroup>
55
+ <technical-committee number="34">Food products</technical-committee>
56
+ <subcommittee number="4">Cereals and pulses</subcommittee>
57
+ <workgroup number="3">Rice Group</workgroup>
58
+ <secretariat>GB</secretariat>
59
+ </editorialgroup>
60
+ </bibdata><version>
61
+ <edition>2</edition>
62
+ <revision-date>2016-05-01</revision-date>
63
+
64
+ </version>
65
+ </iso-standard>
66
+ INPUT
67
+ {:accesseddate=>"2012", :activateddate=>"2013", :agency=>"ISO", :createddate=>"2010&ndash;2011", :docnumber=>"CD 17301-1", :docsubtitle=>"C&#xe9;r&#xe9;ales et l&#xe9;gumineuses&nbsp;&mdash; Sp&#xe9;cification et m&#xe9;thodes d&#x27;essai&nbsp;&mdash; Partie&nbsp;1: Riz", :doctitle=>"Cereals and pulses&nbsp;&mdash; Specifications and test methods&nbsp;&mdash; Part&nbsp;1: Rice", :docyear=>"2016", :draft=>nil, :draftinfo=>"", :editorialgroup=>["TC 34", "SC 4", "WG 3"], :obsoleteddate=>"2014", :obsoletes=>nil, :obsoletes_part=>nil, :publisheddate=>"2011", :revdate=>"2016-05-01", :sc=>"SC 4", :secretariat=>"GB", :stage=>"30", :stageabbr=>"CD", :tc=>"TC 34", :wg=>"WG 3"}
68
+ OUTPUT
69
+ end
70
+
71
+ it "processes IsoXML metadata" do
72
+ expect(Hash[IsoDoc::Convert.new({}).info(Nokogiri::XML(<<~"INPUT"), nil).sort]).to be_equivalent_to <<~"OUTPUT"
73
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
74
+ <bibdata type="international-standard">
75
+ <title>
76
+ <title-intro language="en" format="text/plain">Cereals and pulses</title-intro>
77
+ <title-main language="en" format="text/plain">Specifications and test methods</title-main>
78
+ <title-part language="en" format="text/plain">Rice</title-part>
79
+ </title>
80
+ <title>
81
+ <title-intro language="fr" format="text/plain">Céréales et légumineuses</title-intro>
82
+ <title-main language="fr" format="text/plain">Spécification et méthodes d'essai</title-main>
83
+ <title-part language="fr" format="text/plain">Riz</title-part>
84
+ </title>
85
+ <docidentifier>
86
+ <project-number part="1" subpart="3">17301</project-number>
87
+ <tc-document-number>17301</tc-document-number>
88
+ </docidentifier>
89
+ <contributor>
90
+ <role type="author"/>
91
+ <organization>
92
+ <name>ISO</name>
93
+ </organization>
94
+ </contributor>
95
+ <contributor>
96
+ <role type="publisher"/>
97
+ <organization>
98
+ <abbreviation>ISO</abbreviation>
99
+ </organization>
100
+ </contributor>
101
+ <contributor>
102
+ <role type="publisher"/>
103
+ <organization>
104
+ <abbreviation>IEC</abbreviation>
105
+ </organization>
106
+ </contributor>
107
+ <language>en</language>
108
+ <script>Latn</script>
109
+ <status>
110
+ <stage>30</stage>
111
+ <substage>92</substage>
112
+ </status>
113
+ <copyright>
114
+ <from>2016</from>
115
+ <owner>
116
+ <organization>
117
+ <name>International Organization for Standardization</name>
118
+ </organization>
119
+ </owner>
120
+ </copyright>
121
+ <relation type="obsoletes">
122
+ <locality type="clause"><referenceFrom>3.1</referenceFrom></locality>
123
+ <docidentifier>IEC 8121</docidentifier>
124
+ </relation>
125
+ <editorialgroup>
126
+ <technical-committee number="34" type="ABC">Food products</technical-committee>
127
+ <subcommittee number="4" type="DEF">Cereals and pulses</subcommittee>
128
+ <workgroup number="3" type="GHI">Rice Group</workgroup>
129
+ </editorialgroup>
130
+ </bibdata><version>
131
+ <edition>2</edition>
132
+ <revision-date>2016-05-01</revision-date>
133
+ <draft>12</draft>
134
+ </version>
135
+ </iso-standard>
136
+ INPUT
137
+ {:accesseddate=>"XXX", :activateddate=>"XXX", :agency=>"ISO/IEC", :createddate=>"XXX", :docnumber=>"CD 17301-1-3", :docsubtitle=>"C&#xe9;r&#xe9;ales et l&#xe9;gumineuses&nbsp;&mdash; Sp&#xe9;cification et m&#xe9;thodes d&#x27;essai&nbsp;&mdash; Partie&nbsp;1&ndash;3: Riz", :doctitle=>"Cereals and pulses&nbsp;&mdash; Specifications and test methods&nbsp;&mdash; Part&nbsp;1&ndash;3: Rice", :docyear=>"2016", :draft=>"12", :draftinfo=>" ( 12, 2016-05-01)", :editorialgroup=>["ABC 34", "DEF 4", "GHI 3"], :obsoleteddate=>"XXX", :obsoletes=>"IEC 8121", :obsoletes_part=>"3.1", :publisheddate=>"XXX", :revdate=>"2016-05-01", :sc=>"DEF 4", :secretariat=>"XXXX", :stage=>"30", :stageabbr=>"CD", :tc=>"ABC 34", :wg=>"GHI 3"}
138
+ OUTPUT
139
+ end
140
+
141
+ end
@@ -0,0 +1,444 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe IsoDoc do
4
+ it "generates HTML output docs with null configuration" do
5
+ system "rm -f test.doc"
6
+ system "rm -f test.html"
7
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
8
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
9
+ <foreword>
10
+ <note>
11
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
12
+ </note>
13
+ </foreword>
14
+ </iso-standard>
15
+ INPUT
16
+ expect(File.exist?("test.html")).to be true
17
+ html = File.read("test.html")
18
+ expect(html).to match(%r{<title>test</title><style>})
19
+ expect(html).to match(/another empty stylesheet/)
20
+ expect(html).to match(%r{cdnjs\.cloudflare\.com/ajax/libs/mathjax/2\.7\.1/MathJax\.js})
21
+ expect(html).to match(/delimiters: \[\['\(#\(', '\)#\)'\]\]/)
22
+ end
23
+
24
+ it "generates Word output docs with null configuration" do
25
+ system "rm -f test.doc"
26
+ system "rm -f test.html"
27
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
28
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
29
+ <foreword>
30
+ <note>
31
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
32
+ </note>
33
+ </foreword>
34
+ </iso-standard>
35
+ INPUT
36
+ expect(File.exist?("test.doc")).to be true
37
+ word = File.read("test.doc")
38
+ expect(word).to match(/one empty stylesheet/)
39
+ end
40
+
41
+ it "generates HTML output docs with null configuration from file" do
42
+ system "rm -f spec/assets/iso.doc"
43
+ system "rm -f spec/assets/iso.html"
44
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", false)
45
+ expect(File.exist?("spec/assets/iso.html")).to be true
46
+ html = File.read("spec/assets/iso.html")
47
+ expect(html).to match(/another empty stylesheet/)
48
+ end
49
+
50
+ it "generates Word output docs with null configuration from file" do
51
+ system "rm -f spec/assets/iso.doc"
52
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert("spec/assets/iso.xml", false)
53
+ expect(File.exist?("spec/assets/iso.doc")).to be true
54
+ word = File.read("spec/assets/iso.doc")
55
+ expect(word).to match(/one empty stylesheet/)
56
+ end
57
+
58
+ it "generates HTML output docs with complete configuration" do
59
+ system "rm -f test.doc"
60
+ system "rm -f test.html"
61
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert_file(<<~"INPUT", "test", false)
62
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
63
+ <foreword>
64
+ <note>
65
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
66
+ </note>
67
+ </foreword>
68
+ </iso-standard>
69
+ INPUT
70
+ html = File.read("test.html")
71
+ expect(html).to match(/a third empty stylesheet/)
72
+ expect(html).to match(/an empty html cover page/)
73
+ expect(html).to match(/an empty html intro page/)
74
+ expect(html).to match(%r{Enkonduko</h1>})
75
+ end
76
+
77
+ it "generates Word output docs with complete configuration" do
78
+ system "rm -f test.doc"
79
+ system "rm -f test.html"
80
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", standardstylesheet: "spec/assets/std.css", header: "spec/assets/header.html", htmlcoverpage: "spec/assets/htmlcover.html", htmlintropage: "spec/assets/htmlintro.html", wordcoverpage: "spec/assets/wordcover.html", wordintropage: "spec/assets/wordintro.html", i18nyaml: "spec/assets/i18n.yaml", ulstyle: "l1", olstyle: "l2"}).convert_file(<<~"INPUT", "test", false)
81
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
82
+ <foreword>
83
+ <note>
84
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
85
+ </note>
86
+ </foreword>
87
+ </iso-standard>
88
+ INPUT
89
+ word = File.read("test.doc")
90
+ expect(word).to match(/a third empty stylesheet/)
91
+ expect(word).to match(/<title>test<\/title>/)
92
+ expect(word).to match(/test_files\/header.html/)
93
+ expect(word).to match(/an empty word cover page/)
94
+ expect(word).to match(/an empty word intro page/)
95
+ expect(word).to match(%r{Enkonduko</h1>})
96
+ end
97
+
98
+ it "converts definition lists to tables for Word" do
99
+ system "rm -f test.doc"
100
+ system "rm -f test.html"
101
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
102
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
103
+ <foreword>
104
+ <dl>
105
+ <dt>Term</dt>
106
+ <dd>Definition</dd>
107
+ <dt>Term 2</dt>
108
+ <dd>Definition 2</dd>
109
+ </dl>
110
+ </foreword>
111
+ </iso-standard>
112
+ INPUT
113
+ word = File.read("test.doc").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
114
+ sub(%r{<br clear="all" class="section"/>\s*<div class="WordSection3">.*$}m, "")
115
+ expect(word).to be_equivalent_to <<~"OUTPUT"
116
+ <div class="WordSection2">
117
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
118
+ <div>
119
+ <h1 class="ForewordTitle">Foreword</h1>
120
+ <table class="dl">
121
+ <tr>
122
+ <td valign="top" align="left">
123
+ <p style="text-align: left;" class="MsoNormal">Term</p>
124
+ </td>
125
+ <td valign="top">Definition</td>
126
+ </tr>
127
+ <tr>
128
+ <td valign="top" align="left">
129
+ <p style="text-align: left;" class="MsoNormal">Term 2</p>
130
+ </td>
131
+ <td valign="top">Definition 2</td>
132
+ </tr>
133
+ </table>
134
+ </div>
135
+ <p class="MsoNormal">&#xA0;</p>
136
+ </div>
137
+ OUTPUT
138
+ end
139
+
140
+ it "converts annex subheadings to h2Annex class for Word" do
141
+ system "rm -f test.doc"
142
+ system "rm -f test.html"
143
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
144
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
145
+ <annex id="P" inline-header="false" obligation="normative">
146
+ <title>Annex</title>
147
+ <subsection id="Q" inline-header="false" obligation="normative">
148
+ <title>Annex A.1</title>
149
+ </annex>
150
+ </iso-standard>
151
+ INPUT
152
+ word = File.read("test.doc").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
153
+ sub(%r{<div style="mso-element:footnote-list"/>.*$}m, "")
154
+ expect(word).to be_equivalent_to <<~"OUTPUT"
155
+ <div class="WordSection3">
156
+ <p class="zzSTDTitle1"></p>
157
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
158
+ <div class="Section3"><a name="P" id="P"></a>
159
+ <h1 class="Annex"><b>Annex A</b><br/>(normative)<br/><br/><b>Annex</b></h1>
160
+ <div><a name="Q" id="Q"></a>
161
+ <p class="h2Annex">A.1. Annex A.1</p>
162
+ </div>
163
+ </div>
164
+ </div>
165
+ OUTPUT
166
+ end
167
+
168
+ it "populates Word template with terms reference labels" do
169
+ system "rm -f test.doc"
170
+ system "rm -f test.html"
171
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
172
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
173
+ <sections>
174
+ <terms id="_terms_and_definitions" obligation="normative"><title>Terms and Definitions</title>
175
+
176
+ <term id="paddy1"><preferred>paddy</preferred>
177
+ <definition><p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">rice retaining its husk after threshing</p></definition>
178
+ <termsource status="modified">
179
+ <origin bibitemid="ISO7301" type="inline" citeas="ISO 7301: 2011"><locality type="clause"><referenceFrom>3.1</referenceFrom></locality></origin>
180
+ <modification>
181
+ <p id="_e73a417d-ad39-417d-a4c8-20e4e2529489">The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here</p>
182
+ </modification>
183
+ </termsource></term>
184
+
185
+ </terms>
186
+ </sections>
187
+ </iso-standard>
188
+
189
+ INPUT
190
+ word = File.read("test.doc").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
191
+ sub(%r{<div style="mso-element:footnote-list"/>.*$}m, "")
192
+ expect(word).to be_equivalent_to <<~"OUTPUT"
193
+ <div class="WordSection3">
194
+ <p class="zzSTDTitle1"></p>
195
+ <div><a name="_terms_and_definitions" id="_terms_and_definitions"></a><h1>3.<span style="mso-tab-count:1">&#xA0; </span>Terms and Definitions</h1><p class="MsoNormal">For the purposes of this document,
196
+ the following terms and definitions apply.</p>
197
+ <p class="MsoNormal">ISO and IEC maintain terminological databases for use in
198
+ standardization at the following addresses:</p>
199
+
200
+ <ul>
201
+ <li class="MsoNormal"> <p class="MsoNormal">ISO Online browsing platform: available at
202
+ <a href="http://www.iso.org/obp">http://www.iso.org/obp</a></p> </li>
203
+ <li class="MsoNormal"> <p class="MsoNormal">IEC Electropedia: available at
204
+ <a href="http://www.electropedia.org">http://www.electropedia.org</a>
205
+ </p> </li> </ul>
206
+ <p class="TermNum"><a name="paddy1" id="paddy1"></a>3.1</p><p class="Terms">paddy</p>
207
+ <p class="MsoNormal"><a name="_eb29b35e-123e-4d1c-b50b-2714d41e747f" id="_eb29b35e-123e-4d1c-b50b-2714d41e747f"></a>rice retaining its husk after threshing</p>
208
+ <p class="MsoNormal">[SOURCE: <a href="#ISO7301">ISO 7301: 2011, 3.1</a>, modified &mdash; The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here]</p></div>
209
+ </div>
210
+ OUTPUT
211
+ end
212
+
213
+ it "populates Word header" do
214
+ system "rm -f test.doc"
215
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", header: "spec/assets/header.html"}).convert_file(<<~"INPUT", "test", false)
216
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
217
+ <bibdata type="article">
218
+ <docidentifier>
219
+ <project-number part="1">1000</project-number>
220
+ </docidentifier>
221
+ </bibdata>
222
+ </iso-standard>
223
+
224
+ INPUT
225
+ word = File.read("test.doc").sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}m, "Content-Location: file:///C:/Doc/test_files/header.html").
226
+ sub(/------=_NextPart.*$/m, "")
227
+ expect(word).to be_equivalent_to <<~"OUTPUT"
228
+
229
+ Content-Location: file:///C:/Doc/test_files/header.html
230
+ Content-Transfer-Encoding: base64
231
+ Content-Type: text/html charset="utf-8"
232
+
233
+ Ci8qIGFuIGVtcHR5IGhlYWRlciAqLwoKU1RBUlQgRE9DIElEOiAxMDAwLTE6IEVORCBET0MgSUQK
234
+ CkZJTEVOQU1FOiB0ZXN0Cgo=
235
+
236
+ OUTPUT
237
+ end
238
+
239
+ it "populates Word ToC" do
240
+ system "rm -f test.doc"
241
+ IsoDoc::WordConvert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert_file(<<~"INPUT", "test", false)
242
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
243
+ <sections>
244
+ <clause inline-header="false" obligation="normative"><title>Clause 4</title><subsection id="N" inline-header="false" obligation="normative">
245
+
246
+ <title>Introduction<bookmark id="Q"/> to this<fn reference="1">
247
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
248
+ </fn></title>
249
+ </subsection>
250
+ <subsection id="O" inline-header="false" obligation="normative">
251
+ <title>Clause 4.2</title>
252
+ <p>A<fn reference="1">
253
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
254
+ </fn></p>
255
+ </subsection></clause>
256
+ </sections>
257
+ </iso-standard>
258
+
259
+ INPUT
260
+ word = File.read("test.doc").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
261
+ sub(%r{<br clear="all" class="section"/>\s*<div class="WordSection3">.*$}m, "")
262
+ expect(word.gsub(/_Toc\d\d+/, "_Toc")).to be_equivalent_to <<~'OUTPUT'
263
+ <div class="WordSection2">
264
+ /* an empty word intro page */
265
+
266
+ <p class="MsoToc1"><span lang="EN-GB" xml:lang="EN-GB"><span style="mso-element:field-begin"></span><span style="mso-spacerun:yes">&#xA0;</span>TOC
267
+ \o "1-2" \h \z \u <span style="mso-element:field-separator"></span></span>
268
+ <span class="MsoHyperlink"><span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
269
+ <a href="#_Toc">4.<span style="mso-tab-count:1">&#xA0; </span>Clause 4<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
270
+ <span style="mso-tab-count:1 dotted">. </span>
271
+ </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
272
+ <span style="mso-element:field-begin"></span></span>
273
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
274
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
275
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span></span></p>
276
+
277
+ <p class="MsoToc2">
278
+ <span class="MsoHyperlink">
279
+ <span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
280
+ <a href="#_Toc">4.1. Introduction to this<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
281
+ <span style="mso-tab-count:1 dotted">. </span>
282
+ </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
283
+ <span style="mso-element:field-begin"></span></span>
284
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
285
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
286
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span>
287
+ </span>
288
+ </p>
289
+
290
+ <p class="MsoToc2">
291
+ <span class="MsoHyperlink">
292
+ <span lang="EN-GB" style="mso-no-proof:yes" xml:lang="EN-GB">
293
+ <a href="#_Toc">4.2. Clause 4.2<span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
294
+ <span style="mso-tab-count:1 dotted">. </span>
295
+ </span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">
296
+ <span style="mso-element:field-begin"></span></span>
297
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"> PAGEREF _Toc \h </span>
298
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-separator"></span></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB">1</span>
299
+ <span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"></span><span lang="EN-GB" class="MsoTocTextSpan" xml:lang="EN-GB"><span style="mso-element:field-end"></span></span></a></span>
300
+ </span>
301
+ </p>
302
+
303
+ <p class="MsoToc1">
304
+ <span lang="EN-GB" xml:lang="EN-GB">
305
+ <span style="mso-element:field-end"></span>
306
+ </span>
307
+ <span lang="EN-GB" xml:lang="EN-GB">
308
+ <p class="MsoNormal">&#xA0;</p>
309
+ </span>
310
+ </p>
311
+
312
+
313
+ <p class="MsoNormal">&#xA0;</p>
314
+ </div>
315
+ OUTPUT
316
+ end
317
+
318
+ it "reorders footnote numbers in HTML" do
319
+ system "rm -f test.html"
320
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", wordintropage: "spec/assets/wordintro.html"}).convert_file(<<~"INPUT", "test", false)
321
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
322
+ <sections>
323
+ <clause inline-header="false" obligation="normative"><title>Clause 4</title><fn reference="3">
324
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">This is a footnote.</p>
325
+ </fn><subsection id="N" inline-header="false" obligation="normative">
326
+
327
+ <title>Introduction to this<fn reference="2">
328
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
329
+ </fn></title>
330
+ </subsection>
331
+ <subsection id="O" inline-header="false" obligation="normative">
332
+ <title>Clause 4.2</title>
333
+ <p>A<fn reference="1">
334
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
335
+ </fn></p>
336
+ </subsection></clause>
337
+ </sections>
338
+ </iso-standard>
339
+ INPUT
340
+ html = File.read("test.html").sub(/^.*<div class="WordSection3">/m, '<div class="WordSection3">').
341
+ sub(%r{<script type="text/x-mathjax-config">.*$}m, "")
342
+ expect(html).to be_equivalent_to <<~"OUTPUT"
343
+ <div class="WordSection3">
344
+ <p class="zzSTDTitle1"></p>
345
+ <div>
346
+ <h1>4.&#xA0; Clause 4</h1>
347
+ <a href="#ftn3" epub:type="footnote" id="_footnote1">
348
+ <sup>1</sup>
349
+ </a>
350
+ <div id="N">
351
+
352
+ <h2>4.1. Introduction to this<a href="#ftn2" epub:type="footnote" id="_footnote2"><sup>2</sup></a></h2>
353
+ </div>
354
+ <div id="O">
355
+ <h2>4.2. Clause 4.2</h2>
356
+ <p>A<a href="#ftn2" epub:type="footnote"><sup>2</sup></a></p>
357
+ </div>
358
+ </div>
359
+ <aside id="ftn3">
360
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6"><a href="#_footnote1">1) </a>This is a footnote.</p>
361
+ </aside>
362
+ <aside id="ftn2">
363
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6"><a href="#_footnote2">2) </a>Formerly denoted as 15 % (m/m).</p>
364
+ </aside>
365
+
366
+ </div>
367
+ OUTPUT
368
+ end
369
+
370
+ it "moves images in HTML" do
371
+ system "rm -f test.html"
372
+ system "rm -rf _images"
373
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css"}).convert_file(<<~"INPUT", "test", false)
374
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
375
+ <foreword>
376
+ <figure id="_">
377
+ <name>Split-it-right sample divider</name>
378
+ <image src="spec/assets/rice_image1.png" id="_" imagetype="PNG"/>
379
+ </figure>
380
+ </foreword>
381
+ </iso-standard>
382
+ INPUT
383
+ html = File.read("test.html").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
384
+ sub(%r{<div class="WordSection3">.*$}m, "")
385
+ expect(`ls _images`).to match(/\.png$/)
386
+ expect(html.gsub(/\/[0-9a-f-]+\.png/, "/_.png")).to be_equivalent_to <<~"OUTPUT"
387
+ <div class="WordSection2">
388
+ <br />
389
+ <div>
390
+ <h1 class="ForewordTitle">Foreword</h1>
391
+ <div id="_" class="figure">
392
+
393
+ <img src="_images/_.png" width="800" height="673" />
394
+ <p class="FigureTitle" align="center"><b>Figure 1&#xA0;&#x2014; Split-it-right sample divider</b></p></div>
395
+ </div>
396
+ <p>&#xA0;</p>
397
+ </div>
398
+ <br />
399
+ OUTPUT
400
+
401
+ end
402
+
403
+ it "populates HTML ToC" do
404
+ system "rm -f test.html"
405
+ IsoDoc::Convert.new({wordstylesheet: "spec/assets/word.css", htmlstylesheet: "spec/assets/html.css", htmlintropage: "spec/assets/htmlintro.html"}).convert_file(<<~"INPUT", "test", false)
406
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
407
+ <sections>
408
+ <clause inline-header="false" obligation="normative"><title>Clause 4</title><subsection id="N" inline-header="false" obligation="normative">
409
+
410
+ <title>Introduction<bookmark id="Q"/> to this<fn reference="1">
411
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
412
+ </fn></title>
413
+ </subsection>
414
+ <subsection id="O" inline-header="false" obligation="normative">
415
+ <title>Clause 4.2</title>
416
+ <p>A<fn reference="1">
417
+ <p id="_ff27c067-2785-4551-96cf-0a73530ff1e6">Formerly denoted as 15 % (m/m).</p>
418
+ </fn></p>
419
+ </subsection></clause>
420
+ <clause inline-header="false" obligation="normative"><title>Clause 5</title></clause>
421
+ </sections>
422
+ </iso-standard>
423
+
424
+ INPUT
425
+ html = File.read("test.html").sub(/^.*<div class="WordSection2">/m, '<div class="WordSection2">').
426
+ sub(%r{<div class="WordSection3">.*$}m, "")
427
+ expect(html.gsub(/"#[a-f0-9-]+"/, "#_")).to be_equivalent_to <<~"OUTPUT"
428
+ <div class="WordSection2">
429
+
430
+ <p>/* an empty html intro page */
431
+
432
+ </p>
433
+ <ul><li><a href=#_>5.&#xA0; Clause 4</a></li><ul><li><a href=#_>4.1. Introduction to this</a></li><li><a href=#_>4.2. Clause 4.2</a></li></ul><li><a href=#_>5.&#xA0; Clause 5</a></li></ul>
434
+
435
+
436
+
437
+ <p>&#xA0;</p>
438
+ </div>
439
+ <br />
440
+
441
+ OUTPUT
442
+ end
443
+
444
+ end