isodoc 0.4.5 → 0.5.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -1,5 +1,3 @@
1
- #require "uuidtools"
2
-
3
1
  module IsoDoc
4
2
  class Convert
5
3
  def ns(xpath)
@@ -11,7 +9,7 @@ module IsoDoc
11
9
 
12
10
  def insert_tab(out, n)
13
11
  out.span **attr_code(style: "mso-tab-count:#{n}") do |span|
14
- [1..n].each { |i| span << "&#xA0; " }
12
+ [1..n].each { span << "&#xA0; " }
15
13
  end
16
14
  end
17
15
 
@@ -31,7 +29,7 @@ module IsoDoc
31
29
  STAGE_ABBRS[stage.to_sym] || "??"
32
30
  end
33
31
 
34
- NOKOHEAD = <<~HERE
32
+ NOKOHEAD = <<~HERE.freeze
35
33
  <!DOCTYPE html SYSTEM
36
34
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
37
35
  <html xmlns="http://www.w3.org/1999/xhtml">
@@ -39,7 +37,6 @@ module IsoDoc
39
37
  <body> </body> </html>
40
38
  HERE
41
39
 
42
-
43
40
  # block for processing XML document fragments as XHTML,
44
41
  # to allow for HTMLentities
45
42
  def noko(&block)
@@ -76,5 +73,51 @@ module IsoDoc
76
73
  def from_xhtml(xml)
77
74
  xml.to_xml.sub(%r{ xmlns="http://www.w3.org/1999/xhtml"}, "")
78
75
  end
76
+
77
+ CLAUSE_ANCESTOR =
78
+ ".//ancestor::*[local-name() = 'subsection' or "\
79
+ "local-name() = 'foreword' or "\
80
+ "local-name() = 'introduction' or local-name() = 'terms' or "\
81
+ "local-name() = 'clause' or local-name() = 'references' or "\
82
+ "local-name() = 'annex']/@id".freeze
83
+
84
+ def get_clause_id(node)
85
+ clause = node.xpath(CLAUSE_ANCESTOR)
86
+ clause&.last&.text || nil
87
+ end
88
+
89
+ NOTE_CONTAINER_ANCESTOR =
90
+ ".//ancestor::*[local-name() = 'subsection' or "\
91
+ "local-name() = 'foreword' or "\
92
+ "local-name() = 'introduction' or local-name() = 'terms' or "\
93
+ "local-name() = 'clause' or local-name() = 'references' or "\
94
+ "local-name() = 'annex' or local-name() = 'formula' or "\
95
+ "local-name() = 'table' or local-name() = 'example' or "\
96
+ "local-name() = 'figure']/@id".freeze
97
+
98
+ def get_note_container_id(node)
99
+ container = node.xpath(NOTE_CONTAINER_ANCESTOR)
100
+ container&.last&.text || nil
101
+ end
102
+
103
+ def sentence_join(array)
104
+ return "" if array.nil? || array.empty?
105
+ if array.length == 1
106
+ array[0]
107
+ else
108
+ l10n("#{array[0..-2].join(', ')} #{@and_lbl} #{array.last}")
109
+ end
110
+ end
111
+
112
+ # avoid `; avoid {{ (Liquid Templates); avoid [[ (Javascript)
113
+ def extract_delims(text)
114
+ @openmathdelim = "(#("
115
+ @closemathdelim = ")#)"
116
+ while text.include?(@openmathdelim) || text.include?(@closemathdelim)
117
+ @openmathdelim += "("
118
+ @closemathdelim += ")"
119
+ end
120
+ [@openmathdelim, @closemathdelim]
121
+ end
79
122
  end
80
123
  end
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "0.4.5".freeze
2
+ VERSION = "0.5.5".freeze
3
3
  end
@@ -1,9 +1,5 @@
1
- #require "isodoc/utils"
2
-
3
1
  module IsoDoc
4
2
  class Convert
5
- #include ::IsoDoc::Utils
6
-
7
3
  @anchors = {}
8
4
 
9
5
  def get_anchors
@@ -24,46 +20,78 @@ module IsoDoc
24
20
  section_names(d.at(ns("//clause[title = 'Scope']")), "1", 1)
25
21
  section_names(d.at(ns(
26
22
  "//references[title = 'Normative References']")), "2", 1)
27
- section_names(d.at(ns("//terms")), "3", 1)
23
+ section_names(d.at(ns("//sections/terms")), "3", 1)
28
24
  middle_section_asset_names(d)
29
25
  end
30
26
 
31
27
  def middle_section_asset_names(d)
32
28
  middle_sections = "//clause[title = 'Scope'] | "\
33
- "//references[title = 'Normative References'] | //terms | "\
34
- "//symbols-abbrevs | //clause[parent::sections]"
29
+ "//foreword | //introduction | "\
30
+ "//references[title = 'Normative References'] | //sections/terms | "\
31
+ "//sections/symbols-abbrevs | //clause[parent::sections]"
35
32
  sequential_asset_names(d.xpath(ns(middle_sections)))
36
33
  end
37
34
 
38
- def clause_names(docxml,sect_num)
35
+ def clause_names(docxml, sect_num)
39
36
  q = "//clause[parent::sections][not(xmlns:title = 'Scope')]"
40
37
  docxml.xpath(ns(q)).each_with_index do |c, i|
41
38
  section_names(c, (i + sect_num).to_s, 1)
42
39
  end
43
40
  end
44
41
 
42
+ def termnote_label(n)
43
+ @termnote_lbl.gsub(/%/, n.to_s)
44
+ end
45
+
45
46
  def termnote_anchor_names(docxml)
46
47
  docxml.xpath(ns("//term[termnote]")).each do |t|
47
48
  t.xpath(ns("./termnote")).each_with_index do |n, i|
48
- @anchors[n["id"]] = { label: "Note #{i + 1} to entry",
49
- xref: "#{@anchors[t["id"]][:xref]},"\
50
- "Note #{i + 1}" }
49
+ @anchors[n["id"]] =
50
+ { label: termnote_label(i + 1),
51
+ xref: l10n("#{@anchors[t['id']][:xref]}, "\
52
+ "#{@note_xref_lbl} #{i + 1}") }
51
53
  end
52
54
  end
53
55
  end
54
56
 
55
- def table_note_anchor_names(docxml)
56
- docxml.xpath(ns("//table[note]")).each do |t|
57
- t.xpath(ns("./note")).each_with_index do |n, i|
58
- @anchors[n["id"]] = { label: "NOTE #{i + 1}",
59
- xref: "#{@anchors[t["id"]][:xref]},"\
60
- "Note #{i + 1}" }
57
+ SECTIONS_XPATH =
58
+ "//foreword | //introduction | //sections/terms | //annex | "\
59
+ "//sections/clause | //references[not(ancestor::references)]".freeze
60
+
61
+ CHILD_NOTES_XPATH =
62
+ "./*[not(self::xmlns:subsection)]//xmlns:note | ./xmlns:note".freeze
63
+
64
+ def note_anchor_names(sections)
65
+ sections.each do |s|
66
+ notes = s.xpath(CHILD_NOTES_XPATH)
67
+ notes.each_with_index do |n, i|
68
+ next if @anchors[n["id"]]
69
+ next if n["id"].nil?
70
+ idx = notes.size == 1 ? "" : " #{i + 1}"
71
+ @anchors[n["id"]] = anchor_struct(idx, s, @note_xref_lbl)
72
+ end
73
+ note_anchor_names(s.xpath(ns("./subsection")))
74
+ end
75
+ end
76
+
77
+ CHILD_EXAMPLES_XPATH =
78
+ "./*[not(self::xmlns:subsection)]//xmlns:example | "\
79
+ "./xmlns:example".freeze
80
+
81
+ def example_anchor_names(sections)
82
+ sections.each do |s|
83
+ notes = s.xpath(CHILD_EXAMPLES_XPATH)
84
+ notes.each_with_index do |n, i|
85
+ next if @anchors[n["id"]]
86
+ idx = notes.size == 1 ? "" : " #{i + 1}"
87
+ @anchors[n["id"]] = anchor_struct(idx, s, @example_xref_lbl)
61
88
  end
89
+ example_anchor_names(s.xpath(ns("./subsection")))
62
90
  end
63
91
  end
64
92
 
65
93
  def middle_anchor_names(docxml)
66
- symbols_abbrevs = docxml.at(ns("//symbols-abbrevs"))
94
+ symbols_abbrevs = docxml.at(ns("//sections/symbols-abbrevs"))
67
95
  sect_num = 4
68
96
  if symbols_abbrevs
69
97
  section_names(symbols_abbrevs, sect_num.to_s, 1)
@@ -78,59 +106,65 @@ module IsoDoc
78
106
  initial_anchor_names(docxml)
79
107
  middle_anchor_names(docxml)
80
108
  back_anchor_names(docxml)
81
- table_note_anchor_names(docxml)
109
+ # preempt clause notes with all other types of note
110
+ note_anchor_names(docxml.xpath(ns("//table | //example | //formula | "\
111
+ "//figure")))
112
+ note_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
113
+ example_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
82
114
  end
83
115
 
84
116
  def sequential_figure_names(clause)
85
117
  i = j = 0
86
118
  clause.xpath(ns(".//figure")).each do |t|
87
- label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
88
- if t.parent.name == "figure"
89
- j += 1
119
+ if t.parent.name == "figure" then j += 1
90
120
  else
91
121
  j = 0
92
122
  i += 1
93
123
  end
94
- label = "Figure #{i}" + ( j.zero? ? "" : "-#{j}" )
95
- @anchors[t["id"]] = { label: label, xref: label }
124
+ label = i.to_s + (j.zero? ? "" : "-#{j}")
125
+ @anchors[t["id"]] = anchor_struct(label, nil, @figure_lbl)
96
126
  end
97
127
  end
98
128
 
129
+ def anchor_struct(lbl, container, elem)
130
+ ret = { label: lbl.to_s }
131
+ ret[:xref] =
132
+ elem == "Formula" ? l10n("#{elem} (#{lbl})") : l10n("#{elem} #{lbl}")
133
+ ret[:xref].gsub!(/ $/, "")
134
+ ret[:container] = get_clause_id(container) unless container.nil?
135
+ ret
136
+ end
137
+
99
138
  def sequential_asset_names(clause)
100
139
  clause.xpath(ns(".//table")).each_with_index do |t, i|
101
- @anchors[t["id"]] = { label: "Table #{i + 1}",
102
- xref: "Table #{i + 1}" }
140
+ @anchors[t["id"]] = anchor_struct(i + 1, nil, @table_lbl)
103
141
  end
104
142
  sequential_figure_names(clause)
105
143
  clause.xpath(ns(".//formula")).each_with_index do |t, i|
106
- @anchors[t["id"]] = { label: (i + 1).to_s,
107
- xref: "Formula #{i + 1}" }
144
+ @anchors[t["id"]] = anchor_struct(i + 1, t, @formula_lbl)
108
145
  end
109
146
  end
110
147
 
111
148
  def hierarchical_figure_names(clause, num)
112
149
  i = j = 0
113
150
  clause.xpath(ns(".//figure")).each do |t|
114
- if t.parent.name == "figure"
115
- j += 1
151
+ if t.parent.name == "figure" then j += 1
116
152
  else
117
153
  j = 0
118
154
  i += 1
119
155
  end
120
- label = "Figure #{num}.#{i}" + ( j.zero? ? "" : "-#{j}" )
121
- @anchors[t["id"]] = { label: label, xref: label }
156
+ label = "#{num}.#{i}" + (j.zero? ? "" : "-#{j}")
157
+ @anchors[t["id"]] = anchor_struct(label, nil, @figure_lbl)
122
158
  end
123
159
  end
124
160
 
125
161
  def hierarchical_asset_names(clause, num)
126
162
  clause.xpath(ns(".//table")).each_with_index do |t, i|
127
- @anchors[t["id"]] = { label: "Table #{num}.#{i + 1}",
128
- xref: "Table #{num}.#{i + 1}" }
163
+ @anchors[t["id"]] = anchor_struct("#{num}.#{i + 1}", nil, @table_lbl)
129
164
  end
130
165
  hierarchical_figure_names(clause, num)
131
166
  clause.xpath(ns(".//formula")).each_with_index do |t, i|
132
- @anchors[t["id"]] = { label: "#{num}.#{i + 1}",
133
- xref: "Formula #{num}.#{i + 1}" }
167
+ @anchors[t["id"]] = anchor_struct("#{num}.#{i + 1}", t, @formula_lbl)
134
168
  end
135
169
  end
136
170
 
@@ -141,30 +175,32 @@ module IsoDoc
141
175
  end
142
176
  end
143
177
 
144
- def section_names(clause, num, level)
145
- @anchors[clause["id"]] = { label: num, xref: "Clause #{num}",
146
- level: level }
147
- clause.xpath(ns("./subsection | ./term")).each_with_index do |c, i|
148
- section_names1(c, "#{num}.#{i + 1}", level + 1)
178
+ def section_names(clause, num, lvl)
179
+ return if clause.nil?
180
+ @anchors[clause["id"]] =
181
+ { label: num, xref: l10n("#{@clause_lbl} #{num}"), level: lvl }
182
+ clause.xpath(ns("./subsection | ./term | ./terms | ./symbols-abbrevs")).
183
+ each_with_index do |c, i|
184
+ section_names1(c, "#{num}.#{i + 1}", lvl + 1)
149
185
  end
150
186
  end
151
187
 
152
188
  def section_names1(clause, num, level)
153
- @anchors[clause["id"]] =
154
- { label: num, level: level,
155
- xref: clause.name == "term" ? num : "Clause #{num}" }
156
- clause.xpath(ns("./subsection ")).
189
+ @anchors[clause["id"]] =
190
+ { label: num, level: level, xref: num }
191
+ # subclauses are not prefixed with "Clause"
192
+ clause.xpath(ns("./subsection | ./terms | ./term | ./symbols-abbrevs")).
157
193
  each_with_index do |c, i|
158
194
  section_names1(c, "#{num}.#{i + 1}", level + 1)
159
195
  end
160
196
  end
161
197
 
162
198
  def annex_names(clause, num)
163
- obligation = "(Informative)"
164
- obligation = "(Normative)" if clause["subtype"] == "normative"
165
- label = "<b>Annex #{num}</b><br/>#{obligation}"
166
- @anchors[clause["id"]] = { label: label,
167
- xref: "Annex #{num}", level: 1 }
199
+ obl = l10n("(#{@inform_annex_lbl})")
200
+ obl = l10n("(#{@norm_annex_lbl})") if clause["obligation"] == "normative"
201
+ label = l10n("<b>#{@annex_lbl} #{num}</b><br/>#{obl}")
202
+ @anchors[clause["id"]] =
203
+ { label: label, xref: "#{@annex_lbl} #{num}", level: 1 }
168
204
  clause.xpath(ns("./subsection")).each_with_index do |c, i|
169
205
  annex_names1(c, "#{num}.#{i + 1}", 2)
170
206
  end
@@ -172,34 +208,10 @@ module IsoDoc
172
208
  end
173
209
 
174
210
  def annex_names1(clause, num, level)
175
- @anchors[clause["id"]] = { label: num,
176
- xref: num,
177
- level: level }
211
+ @anchors[clause["id"]] = { label: num, xref: num, level: level }
178
212
  clause.xpath(ns(".//subsection")).each_with_index do |c, i|
179
213
  annex_names1(c, "#{num}.#{i + 1}", level + 1)
180
214
  end
181
215
  end
182
-
183
- def format_ref(ref, isopub)
184
- return "ISO #{ref}" if isopub
185
- return "[#{ref}]" if /^\d+$/.match?(ref) && !/^\[.*\]$/.match?(ref)
186
- ref
187
- end
188
-
189
- def reference_names(ref)
190
- isopub = ref.at(ns(ISO_PUBLISHER_XPATH))
191
- docid = ref.at(ns("./docidentifier"))
192
- return ref_names(ref) unless docid
193
- date = ref.at(ns("./date[@type = 'published']"))
194
- reference = format_ref(docid.text, isopub)
195
- reference += ": #{date.text}" if date && isopub
196
- @anchors[ref["id"]] = { xref: reference }
197
- end
198
-
199
- def ref_names(ref)
200
- linkend = ref.text
201
- linkend.gsub!(/[\[\]]/, "") unless /^\[\d+\]$/.match? linkend
202
- @anchors[ref["id"]] = { xref: linkend }
203
- end
204
216
  end
205
217
  end
@@ -0,0 +1,618 @@
1
+ require "spec_helper"
2
+
3
+ RSpec.describe IsoDoc do
4
+ it "processes unlabelled notes" do
5
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
6
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
7
+ <foreword>
8
+ <note>
9
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
10
+ </note>
11
+ </foreword>
12
+ </iso-standard>
13
+ INPUT
14
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
15
+ <head>
16
+ <title>test</title>
17
+ <body lang="EN-US" link="blue" vlink="#954F72">
18
+ <div class="WordSection1">
19
+ <p>&#160;</p>
20
+ </div>
21
+ <br clear="all" class="section"/>
22
+ <div class="WordSection2">
23
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
24
+ <div>
25
+ <h1 class="ForewordTitle">Foreword</h1>
26
+ <div id="" class="Note">
27
+ <p class="Note">NOTE<span style="mso-tab-count:1">&#160; </span>These results are based on a study carried out on three different types of kernel.</p>
28
+ </div>
29
+ </div>
30
+ <p>&#160;</p>
31
+ </div>
32
+ <br clear="all" class="section"/>
33
+ <div class="WordSection3">
34
+ <p class="zzSTDTitle1"/>
35
+ </div>
36
+ </body>
37
+ </head>
38
+ </html>
39
+ OUTPUT
40
+ end
41
+
42
+ it "processes labelled notes" do
43
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
44
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
45
+ <foreword>
46
+ <note id="note1">
47
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
48
+ </note>
49
+ <note id="note2">
50
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83a">These results are based on a study carried out on three different types of kernel.</p>
51
+ </note>
52
+ </foreword>
53
+ </iso-standard>
54
+ INPUT
55
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
56
+ <head>
57
+ <title>test</title>
58
+ <body lang="EN-US" link="blue" vlink="#954F72">
59
+ <div class="WordSection1">
60
+ <p>&#160;</p>
61
+ </div>
62
+ <br clear="all" class="section"/>
63
+ <div class="WordSection2">
64
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
65
+ <div>
66
+ <h1 class="ForewordTitle">Foreword</h1>
67
+ <div id="note1" class="Note">
68
+ <p class="Note">NOTE 1<span style="mso-tab-count:1">&#160; </span>These results are based on a study carried out on three different types of kernel.</p>
69
+ </div>
70
+ <div id="note2" class="Note">
71
+ <p class="Note">NOTE 2<span style="mso-tab-count:1">&#160; </span>These results are based on a study carried out on three different types of kernel.</p>
72
+ </div>
73
+ </div>
74
+ <p>&#160;</p>
75
+ </div>
76
+ <br clear="all" class="section"/>
77
+ <div class="WordSection3">
78
+ <p class="zzSTDTitle1"/>
79
+ </div>
80
+ </body>
81
+ </head>
82
+ </html>
83
+ OUTPUT
84
+ end
85
+
86
+ it "processes multi-para notes" do
87
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
88
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
89
+ <foreword>
90
+ <note>
91
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83f">These results are based on a study carried out on three different types of kernel.</p>
92
+ <p id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83a">These results are based on a study carried out on three different types of kernel.</p>
93
+ </note>
94
+ </foreword>
95
+ </iso-standard>
96
+ INPUT
97
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
98
+ <head>
99
+ <title>test</title>
100
+ <body lang="EN-US" link="blue" vlink="#954F72">
101
+ <div class="WordSection1">
102
+ <p>&#160;</p>
103
+ </div>
104
+ <br clear="all" class="section"/>
105
+ <div class="WordSection2">
106
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
107
+ <div>
108
+ <h1 class="ForewordTitle">Foreword</h1>
109
+ <div id="" class="Note">
110
+ <p class="Note">NOTE<span style="mso-tab-count:1">&#160; </span>These results are based on a study carried out on three different types of kernel.</p>
111
+ <p class="Note" id="_f06fd0d1-a203-4f3d-a515-0bdba0f8d83a">These results are based on a study carried out on three different types of kernel.</p>
112
+ </div>
113
+ </div>
114
+ <p>&#160;</p>
115
+ </div>
116
+ <br clear="all" class="section"/>
117
+ <div class="WordSection3">
118
+ <p class="zzSTDTitle1"/>
119
+ </div>
120
+ </body>
121
+ </head>
122
+ </html>
123
+ OUTPUT
124
+ end
125
+
126
+ it "processes non-para notes" do
127
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
128
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
129
+ <foreword>
130
+ <note>
131
+ <dl>
132
+ <dt>A</dt>
133
+ <dd><p>B</p></dd>
134
+ </dl>
135
+ <ul>
136
+ <li>C</li></ul>
137
+ </note>
138
+ </foreword>
139
+ </iso-standard>
140
+ INPUT
141
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
142
+ <head>
143
+ <title>test</title>
144
+ <body lang="EN-US" link="blue" vlink="#954F72">
145
+ <div class="WordSection1">
146
+ <p>&#160;</p>
147
+ </div>
148
+ <br clear="all" class="section"/>
149
+ <div class="WordSection2">
150
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
151
+ <div>
152
+ <h1 class="ForewordTitle">Foreword</h1>
153
+ <div id="" class="Note"><p class="Note">NOTE<span style="mso-tab-count:1">&#160; </span></p>
154
+ <dl><dt><p class="Note">A</p></dt><dd><p class="Note">B</p></dd></dl>
155
+ <ul>
156
+ <li>C</li></ul>
157
+ </div>
158
+ </div>
159
+ <p>&#160;</p>
160
+ </div>
161
+ <br clear="all" class="section"/>
162
+ <div class="WordSection3">
163
+ <p class="zzSTDTitle1"/>
164
+ </div>
165
+ </body>
166
+ </head>
167
+ </html>
168
+
169
+ OUTPUT
170
+ end
171
+
172
+ it "processes figures" do
173
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
174
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
175
+ <foreword>
176
+ <figure id="figureA-1">
177
+ <name>Split-it-right sample divider</name>
178
+ <image src="rice_images/rice_image1.png" id="_8357ede4-6d44-4672-bac4-9a85e82ab7f0" imagetype="PNG"/>
179
+ <dl>
180
+ <dt>A</dt>
181
+ <dd><p>B</p></dd>
182
+ </dl>
183
+ </figure>
184
+ </foreword>
185
+ </iso-standard>
186
+ INPUT
187
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
188
+ <head>
189
+ <title>test</title>
190
+ <body lang="EN-US" link="blue" vlink="#954F72">
191
+ <div class="WordSection1">
192
+ <p>&#160;</p>
193
+ </div>
194
+ <br clear="all" class="section"/>
195
+ <div class="WordSection2">
196
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
197
+ <div>
198
+ <h1 class="ForewordTitle">Foreword</h1>
199
+ <div id="figureA-1" class="figure">
200
+
201
+ <img src="rice_images/rice_image1.png"/>
202
+ <p><b>Key</b></p><dl><dt><p>A</p></dt><dd><p>B</p></dd></dl>
203
+ <p class="FigureTitle" align="center"><b>Figure 1&#160;&#8212; Split-it-right sample divider</b></p></div>
204
+ </div>
205
+ <p>&#160;</p>
206
+ </div>
207
+ <br clear="all" class="section"/>
208
+ <div class="WordSection3">
209
+ <p class="zzSTDTitle1"/>
210
+ </div>
211
+ </body>
212
+ </head>
213
+ </html>
214
+ OUTPUT
215
+ end
216
+
217
+ it "processes examples" do
218
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
219
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
220
+ <foreword>
221
+ <example id="samplecode">
222
+ <p>Hello</p>
223
+ </example>
224
+ </foreword>
225
+ </iso-standard>
226
+ INPUT
227
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
228
+ <head>
229
+ <title>test</title>
230
+ <body lang="EN-US" link="blue" vlink="#954F72">
231
+ <div class="WordSection1">
232
+ <p>&#160;</p>
233
+ </div>
234
+ <br clear="all" class="section"/>
235
+ <div class="WordSection2">
236
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
237
+ <div>
238
+ <h1 class="ForewordTitle">Foreword</h1>
239
+ <table id="samplecode" class="example">
240
+ <tr>
241
+ <td width="110pt" valign="top" style="width:82.8pt;padding:.75pt .75pt .75pt .75pt">EXAMPLE</td>
242
+ <td valign="top">
243
+ <p>Hello</p>
244
+ </td>
245
+ </tr>
246
+ </table>
247
+ </div>
248
+ <p>&#160;</p>
249
+ </div>
250
+ <br clear="all" class="section"/>
251
+ <div class="WordSection3">
252
+ <p class="zzSTDTitle1"/>
253
+ </div>
254
+ </body>
255
+ </head>
256
+ </html>
257
+ OUTPUT
258
+ end
259
+
260
+
261
+ it "processes sequences of examples" do
262
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
263
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
264
+ <foreword>
265
+ <example id="samplecode">
266
+ <p>Hello</p>
267
+ </example>
268
+ <example id="samplecode2>
269
+ <p>Hello</p>
270
+ </example>
271
+ </foreword>
272
+ </iso-standard>
273
+ INPUT
274
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
275
+ <head>
276
+ <title>test</title>
277
+ <body lang="EN-US" link="blue" vlink="#954F72">
278
+ <div class="WordSection1">
279
+ <p>&#160;</p>
280
+ </div>
281
+ <br clear="all" class="section"/>
282
+ <div class="WordSection2">
283
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
284
+ <div>
285
+ <h1 class="ForewordTitle">Foreword</h1>
286
+ <table id="samplecode" class="example">
287
+ <tr>
288
+ <td width="110pt" valign="top" style="width:82.8pt;padding:.75pt .75pt .75pt .75pt">EXAMPLE 1</td>
289
+ <td valign="top">
290
+ <p>Hello</p>
291
+ </td>
292
+ </tr>
293
+ </table>
294
+ <table id="samplecode2&gt; " class="example">
295
+ <tr>
296
+ <td width="110pt" valign="top" style="width:82.8pt;padding:.75pt .75pt .75pt .75pt">EXAMPLE 2</td>
297
+ <td valign="top"/>
298
+ </tr>
299
+ </table>
300
+ <p>Hello</p>
301
+ </div>
302
+ <p>&#160;</p>
303
+ </div>
304
+ <br clear="all" class="section"/>
305
+ <div class="WordSection3">
306
+ <p class="zzSTDTitle1"/>
307
+ </div>
308
+ </body>
309
+ </head>
310
+ </html>
311
+ OUTPUT
312
+ end
313
+
314
+ it "processes sourcecode" do
315
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
316
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
317
+ <foreword>
318
+ <sourcecode id="samplecode">
319
+ <name>Ruby code</name>
320
+ puts x
321
+ </sourcecode>
322
+ </foreword>
323
+ </iso-standard>
324
+ INPUT
325
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
326
+ <head>
327
+ <title>test</title>
328
+ <body lang="EN-US" link="blue" vlink="#954F72">
329
+ <div class="WordSection1">
330
+ <p>&#160;</p>
331
+ </div>
332
+ <br clear="all" class="section"/>
333
+ <div class="WordSection2">
334
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
335
+ <div>
336
+ <h1 class="ForewordTitle">Foreword</h1>
337
+ <p id="samplecode" class="Sourcecode"><br/>&#160;&#160;&#160;&#160;<br/>&#160;&#160;puts&#160;x<br/><p class="FigureTitle" align="center"><b>Ruby code</b></p></p>
338
+ </div>
339
+ <p>&#160;</p>
340
+ </div>
341
+ <br clear="all" class="section"/>
342
+ <div class="WordSection3">
343
+ <p class="zzSTDTitle1"/>
344
+ </div>
345
+ </body>
346
+ </head>
347
+ </html>
348
+ OUTPUT
349
+ end
350
+
351
+ it "processes sourcecode with annotations" do
352
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
353
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
354
+ <foreword>
355
+ <sourcecode id="_">puts "Hello, world." <callout target="A">1</callout>
356
+ %w{a b c}.each do |x|
357
+ puts x <callout target="B">2</callout>
358
+ end<annotation id="A">
359
+ <p id="_">This is one callout</p>
360
+ </annotation><annotation id="B">
361
+ <p id="_">This is another callout</p>
362
+ </annotation></sourcecode>
363
+ </foreword>
364
+ </iso-standard>
365
+ INPUT
366
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
367
+ <head>
368
+ <title>test</title>
369
+ <body lang="EN-US" link="blue" vlink="#954F72">
370
+ <div class="WordSection1">
371
+ <p>&#160;</p>
372
+ </div>
373
+ <br clear="all" class="section"/>
374
+ <div class="WordSection2">
375
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
376
+ <div>
377
+ <h1 class="ForewordTitle">Foreword</h1>
378
+ <p id="_" class="Sourcecode">puts&#160;"Hello,&#160;world."&#160; &lt;1&gt;<br/>&#160;&#160;&#160;%w{a&#160;b&#160;c}.each&#160;do&#160;|x|<br/>&#160;&#160;&#160;&#160;&#160;puts&#160;x&#160; &lt;2&gt;<br/>&#160;&#160;&#160;end<span class="zzMoveToFollowing">&lt;1&gt; </span>
379
+ <p class="Sourcecode" id="_">This is one callout</p>
380
+ <span class="zzMoveToFollowing">&lt;2&gt; </span>
381
+ <p class="Sourcecode" id="_">This is another callout</p>
382
+ </p>
383
+ </div>
384
+ <p>&#160;</p>
385
+ </div>
386
+ <br clear="all" class="section"/>
387
+ <div class="WordSection3">
388
+ <p class="zzSTDTitle1"/>
389
+ </div>
390
+ </body>
391
+ </head>
392
+ </html>
393
+ OUTPUT
394
+ end
395
+
396
+ it "processes admonitions" do
397
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
398
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
399
+ <foreword>
400
+ <admonition id="_70234f78-64e5-4dfc-8b6f-f3f037348b6a" type="caution">
401
+ <p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
402
+ </admonition>
403
+ </foreword>
404
+ </iso-standard>
405
+ INPUT
406
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
407
+ <head>
408
+ <title>test</title>
409
+ <body lang="EN-US" link="blue" vlink="#954F72">
410
+ <div class="WordSection1">
411
+ <p>&#160;</p>
412
+ </div>
413
+ <br clear="all" class="section"/>
414
+ <div class="WordSection2">
415
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
416
+ <div>
417
+ <h1 class="ForewordTitle">Foreword</h1>
418
+ <div class="Admonition"><title>CAUTION</title>
419
+ <p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
420
+ </div>
421
+ </div>
422
+ <p>&#160;</p>
423
+ </div>
424
+ <br clear="all" class="section"/>
425
+ <div class="WordSection3">
426
+ <p class="zzSTDTitle1"/>
427
+ </div>
428
+ </body>
429
+ </head>
430
+ </html>
431
+ OUTPUT
432
+ end
433
+
434
+ it "processes formulae" do
435
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
436
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
437
+ <foreword>
438
+ <formula id="_be9158af-7e93-4ee2-90c5-26d31c181934">
439
+ <stem type="AsciiMath">r = 1 %</stem>
440
+ <dl id="_e4fe94fe-1cde-49d9-b1ad-743293b7e21d">
441
+ <dt>
442
+ <stem type="AsciiMath">r</stem>
443
+ </dt>
444
+ <dd>
445
+ <p id="_1b99995d-ff03-40f5-8f2e-ab9665a69b77">is the repeatability limit.</p>
446
+ </dd>
447
+ </dl></formula>
448
+ </foreword>
449
+ </iso-standard>
450
+ INPUT
451
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
452
+ <head>
453
+ <title>test</title>
454
+ <body lang="EN-US" link="blue" vlink="#954F72">
455
+ <div class="WordSection1">
456
+ <p>&#160;</p>
457
+ </div>
458
+ <br clear="all" class="section"/>
459
+ <div class="WordSection2">
460
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
461
+ <div>
462
+ <h1 class="ForewordTitle">Foreword</h1>
463
+ <div id="_be9158af-7e93-4ee2-90c5-26d31c181934" class="formula"><span class="stem">(#(r = 1 %)#)</span><span style="mso-tab-count:1">&#160; </span>(1)</div>
464
+ <p>where</p>
465
+ <dl>
466
+ <dt>
467
+ <span class="stem">(#(r)#)</span>
468
+ </dt>
469
+ <dd>
470
+ <p id="_1b99995d-ff03-40f5-8f2e-ab9665a69b77">is the repeatability limit.</p>
471
+ </dd>
472
+ </dl>
473
+ </div>
474
+ <p>&#160;</p>
475
+ </div>
476
+ <br clear="all" class="section"/>
477
+ <div class="WordSection3">
478
+ <p class="zzSTDTitle1"/>
479
+ </div>
480
+ </body>
481
+ </head>
482
+ </html>
483
+ OUTPUT
484
+ end
485
+
486
+ it "processes paragraph alignments" do
487
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
488
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
489
+ <foreword>
490
+ <p align="left" id="_08bfe952-d57f-4150-9c95-5d52098cc2a8">Vache Equipment<br/>
491
+ Fictitious<br/>
492
+ World
493
+ </foreword>
494
+ </iso-standard>
495
+ INPUT
496
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
497
+ <head>
498
+ <title>test</title>
499
+ <body lang="EN-US" link="blue" vlink="#954F72">
500
+ <div class="WordSection1">
501
+ <p>&#160;</p>
502
+ </div>
503
+ <br clear="all" class="section"/>
504
+ <div class="WordSection2">
505
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
506
+ <div>
507
+ <h1 class="ForewordTitle">Foreword</h1>
508
+ <p id="_08bfe952-d57f-4150-9c95-5d52098cc2a8" align="left" style="text-align:left">Vache Equipment<br/>
509
+ Fictitious<br/>
510
+ World
511
+ </p>
512
+ </div>
513
+ <p>&#160;</p>
514
+ </div>
515
+ <br clear="all" class="section"/>
516
+ <div class="WordSection3">
517
+ <p class="zzSTDTitle1"/>
518
+ </div>
519
+ </body>
520
+ </head>
521
+ </html>
522
+ OUTPUT
523
+ end
524
+
525
+ it "processes blockquotes" do
526
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
527
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
528
+ <foreword>
529
+ <quote id="_044bd364-c832-4b78-8fea-92242402a1d1">
530
+ <source type="inline" bibitemid="ISO7301" citeas="ISO 7301: 2011"><locality type="clause"><referenceFrom>1</referenceFrom></locality></source>
531
+ <author>ISO</author>
532
+ <p id="_d4fd0a61-f300-4285-abe6-602707590e53">This International Standard gives the minimum specifications for rice (<em>Oryza sativa</em> L.) which is subject to international trade. It is applicable to the following types: husked rice and milled rice, parboiled or not, intended for direct human consumption. It is neither applicable to other products derived from rice, nor to waxy rice (glutinous rice).</p>
533
+ </quote>
534
+
535
+ </foreword>
536
+ </iso-standard>
537
+ INPUT
538
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
539
+ <head>
540
+ <title>test</title>
541
+ <body lang="EN-US" link="blue" vlink="#954F72">
542
+ <div class="WordSection1">
543
+ <p>&#160;</p>
544
+ </div>
545
+ <br clear="all" class="section"/>
546
+ <div class="WordSection2">
547
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
548
+ <div>
549
+ <h1 class="ForewordTitle">Foreword</h1>
550
+ <div class="Quote" id="_044bd364-c832-4b78-8fea-92242402a1d1">
551
+
552
+
553
+ <p id="_d4fd0a61-f300-4285-abe6-602707590e53">This International Standard gives the minimum specifications for rice (<i>Oryza sativa</i> L.) which is subject to international trade. It is applicable to the following types: husked rice and milled rice, parboiled or not, intended for direct human consumption. It is neither applicable to other products derived from rice, nor to waxy rice (glutinous rice).</p>
554
+ <p class="QuoteAttribution">&#8212; ISO, <a href="#ISO7301">ISO 7301: 2011, Clause 1</a></p></div>
555
+ </div>
556
+ <p>&#160;</p>
557
+ </div>
558
+ <br clear="all" class="section"/>
559
+ <div class="WordSection3">
560
+ <p class="zzSTDTitle1"/>
561
+ </div>
562
+ </body>
563
+ </head>
564
+ </html>
565
+ OUTPUT
566
+ end
567
+
568
+ it "processes term domains" do
569
+ expect(IsoDoc::Convert.new({}).convert_file(<<~"INPUT", "test", true)).to be_equivalent_to <<~"OUTPUT"
570
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
571
+ <sections>
572
+ <terms>
573
+ <term id="_extraneous_matter"><preferred>extraneous matter</preferred><admitted>EM</admitted>
574
+ <domain>rice</domain>
575
+ <definition><p id="_318b3939-be09-46c4-a284-93f9826b981e">organic and inorganic components other than whole or broken kernels</p></definition>
576
+ </term>
577
+ </terms>
578
+ </sections>
579
+ </iso-standard>
580
+ INPUT
581
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
582
+ <head>
583
+ <title>test</title>
584
+ <body lang="EN-US" link="blue" vlink="#954F72">
585
+ <div class="WordSection1">
586
+ <p>&#160;</p>
587
+ </div>
588
+ <br clear="all" class="section"/>
589
+ <div class="WordSection2">
590
+ <p>&#160;</p>
591
+ </div>
592
+ <br clear="all" class="section"/>
593
+ <div class="WordSection3">
594
+ <p class="zzSTDTitle1"/>
595
+ <div><h1>3.<span style="mso-tab-count:1">&#160; </span>Terms and Definitions</h1><p>For the purposes of this document,
596
+ the following terms and definitions apply.</p>
597
+ <p>ISO and IEC maintain terminological databases for use in
598
+ standardization at the following addresses:</p>
599
+
600
+ <ul>
601
+ <li> <p>ISO Online browsing platform: available at
602
+ <a href="http://www.iso.org/obp">http://www.iso.org/obp</a></p> </li>
603
+ <li> <p>IEC Electropedia: available at
604
+ <a href="http://www.electropedia.org">http://www.electropedia.org</a>
605
+ </p> </li> </ul>
606
+ <p class="TermNum" id="_extraneous_matter">3.1</p><p class="Terms">extraneous matter</p><p class="AltTerms">EM</p>
607
+
608
+ <p id="_318b3939-be09-46c4-a284-93f9826b981e">&lt;rice&gt; organic and inorganic components other than whole or broken kernels</p>
609
+ </div>
610
+ </div>
611
+ </body>
612
+ </head>
613
+ </html>
614
+ OUTPUT
615
+ end
616
+
617
+
618
+ end