isodoc 1.0.23 → 1.0.28
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/macos.yml +10 -2
- data/.github/workflows/ubuntu.yml +13 -3
- data/.github/workflows/windows.yml +10 -2
- data/isodoc.gemspec +1 -1
- data/lib/isodoc-yaml/i18n-en.yaml +3 -1
- data/lib/isodoc-yaml/i18n-fr.yaml +3 -1
- data/lib/isodoc-yaml/i18n-zh-Hans.yaml +3 -1
- data/lib/isodoc/base_style/reset.scss +1 -1
- data/lib/isodoc/convert.rb +1 -0
- data/lib/isodoc/function/blocks.rb +6 -1
- data/lib/isodoc/function/cleanup.rb +16 -2
- data/lib/isodoc/function/i18n.rb +5 -5
- data/lib/isodoc/function/inline.rb +77 -79
- data/lib/isodoc/function/inline_simple.rb +72 -0
- data/lib/isodoc/function/references.rb +49 -37
- data/lib/isodoc/function/section.rb +19 -8
- data/lib/isodoc/function/table.rb +0 -1
- data/lib/isodoc/function/to_word_html.rb +23 -13
- data/lib/isodoc/function/utils.rb +11 -5
- data/lib/isodoc/function/xref_gen.rb +2 -1
- data/lib/isodoc/function/xref_sect_gen.rb +24 -24
- data/lib/isodoc/headlesshtml_convert.rb +5 -0
- data/lib/isodoc/html_convert.rb +5 -0
- data/lib/isodoc/html_function/footnotes.rb +3 -3
- data/lib/isodoc/html_function/html.rb +15 -0
- data/lib/isodoc/html_function/postprocess.rb +6 -5
- data/lib/isodoc/metadata.rb +10 -3
- data/lib/isodoc/metadata_date.rb +19 -7
- data/lib/isodoc/pdf_convert.rb +5 -0
- data/lib/isodoc/version.rb +1 -1
- data/lib/isodoc/word_convert.rb +5 -0
- data/lib/isodoc/word_function/body.rb +0 -4
- data/lib/isodoc/word_function/footnotes.rb +3 -3
- data/lib/isodoc/word_function/postprocess.rb +13 -2
- data/lib/isodoc/xslfo_convert.rb +5 -0
- data/spec/assets/i18n.yaml +4 -1
- data/spec/isodoc/blocks_spec.rb +59 -8
- data/spec/isodoc/cleanup_spec.rb +317 -25
- data/spec/isodoc/footnotes_spec.rb +20 -5
- data/spec/isodoc/i18n_spec.rb +12 -12
- data/spec/isodoc/inline_spec.rb +118 -5
- data/spec/isodoc/metadata_spec.rb +8 -3
- data/spec/isodoc/postproc_spec.rb +34 -12
- data/spec/isodoc/ref_spec.rb +120 -51
- data/spec/isodoc/section_spec.rb +236 -207
- data/spec/isodoc/table_spec.rb +24 -24
- data/spec/isodoc/terms_spec.rb +50 -6
- data/spec/isodoc/xref_spec.rb +53 -26
- metadata +5 -4
@@ -14,6 +14,11 @@ module IsoDoc
|
|
14
14
|
"_headlessimages"
|
15
15
|
end
|
16
16
|
|
17
|
+
def initialize(options)
|
18
|
+
@format = :html
|
19
|
+
super
|
20
|
+
end
|
21
|
+
|
17
22
|
def convert(filename, file = nil, debug = false)
|
18
23
|
file = File.read(filename, encoding: "utf-8") if file.nil?
|
19
24
|
@openmathdelim, @closemathdelim = extract_delims(file)
|
data/lib/isodoc/html_convert.rb
CHANGED
@@ -48,7 +48,7 @@ module IsoDoc::HtmlFunction
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def table_footnote_parse(node, out)
|
51
|
-
fn = node["reference"]
|
51
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
52
52
|
tid = get_table_ancestor_id(node)
|
53
53
|
make_table_footnote_link(out, tid + fn, fn)
|
54
54
|
# do not output footnote text if we have already seen it for this table
|
@@ -64,8 +64,8 @@ module IsoDoc::HtmlFunction
|
|
64
64
|
def footnote_parse(node, out)
|
65
65
|
return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
|
66
66
|
!node.ancestors.map {|m| m.name }.include?("name")
|
67
|
-
fn = node["reference"]
|
68
|
-
attrs = {
|
67
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
68
|
+
attrs = { class: "FootnoteRef", href: "#fn:#{fn}" }
|
69
69
|
out.a **attrs do |a|
|
70
70
|
a.sup { |sup| sup << fn }
|
71
71
|
end
|
@@ -3,6 +3,18 @@ require "base64"
|
|
3
3
|
|
4
4
|
module IsoDoc::HtmlFunction
|
5
5
|
module Html
|
6
|
+
def convert1(docxml, filename, dir)
|
7
|
+
anchor_names docxml
|
8
|
+
noko do |xml|
|
9
|
+
xml.html **{ lang: "#{@lang}" } do |html|
|
10
|
+
info docxml, nil
|
11
|
+
populate_css()
|
12
|
+
html.head { |head| define_head head, filename, dir }
|
13
|
+
make_body(html, docxml)
|
14
|
+
end
|
15
|
+
end.join("\n")
|
16
|
+
end
|
17
|
+
|
6
18
|
def make_body1(body, _docxml)
|
7
19
|
body.div **{ class: "title-section" } do |div1|
|
8
20
|
div1.p { |p| p << " " } # placeholder
|
@@ -97,5 +109,8 @@ module IsoDoc::HtmlFunction
|
|
97
109
|
end
|
98
110
|
sourcecode_name_parse(node, out, name)
|
99
111
|
end
|
112
|
+
|
113
|
+
def table_long_strings_cleanup(docxml)
|
114
|
+
end
|
100
115
|
end
|
101
116
|
end
|
@@ -108,7 +108,7 @@ module IsoDoc::HtmlFunction
|
|
108
108
|
idx = docxml.at("//div[@id = 'toc']") or return docxml
|
109
109
|
toc = "<ul>"
|
110
110
|
path = toclevel_classes.map do |l|
|
111
|
-
"//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][
|
111
|
+
"//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][text()]"
|
112
112
|
end
|
113
113
|
docxml.xpath(path.join(" | ")).each_with_index do |h, tocidx|
|
114
114
|
h["id"] ||= "toc#{tocidx}"
|
@@ -159,7 +159,8 @@ module IsoDoc::HtmlFunction
|
|
159
159
|
def inject_script(doc)
|
160
160
|
return doc unless @scripts
|
161
161
|
scripts = File.read(@scripts, encoding: "UTF-8")
|
162
|
-
doc.
|
162
|
+
a = doc.split(%r{</body>})
|
163
|
+
a[0] + scripts + "</body>" + a[1]
|
163
164
|
end
|
164
165
|
|
165
166
|
def update_footnote_filter(fn, x, i, seen)
|
@@ -178,7 +179,7 @@ module IsoDoc::HtmlFunction
|
|
178
179
|
def html_footnote_filter(docxml)
|
179
180
|
seen = {}
|
180
181
|
i = 1
|
181
|
-
docxml.xpath('//a[@
|
182
|
+
docxml.xpath('//a[@class = "FootnoteRef"]').each do |x|
|
182
183
|
fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
|
183
184
|
i, seen = update_footnote_filter(fn, x, i, seen)
|
184
185
|
end
|
@@ -187,7 +188,7 @@ module IsoDoc::HtmlFunction
|
|
187
188
|
|
188
189
|
def footnote_backlinks(docxml)
|
189
190
|
seen = {}
|
190
|
-
docxml.xpath('//a[@
|
191
|
+
docxml.xpath('//a[@class = "FootnoteRef"]').each_with_index do |x, i|
|
191
192
|
seen[x["href"]] and next or seen[x["href"]] = true
|
192
193
|
fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
|
193
194
|
xdup = x.dup
|
@@ -200,7 +201,7 @@ module IsoDoc::HtmlFunction
|
|
200
201
|
end
|
201
202
|
|
202
203
|
def footnote_format(docxml)
|
203
|
-
docxml.xpath("//a[@
|
204
|
+
docxml.xpath("//a[@class = 'FootnoteRef']/sup").each do |x|
|
204
205
|
footnote_reference_format(x)
|
205
206
|
end
|
206
207
|
docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
|
data/lib/isodoc/metadata.rb
CHANGED
@@ -3,7 +3,8 @@ require_relative "./metadata_date"
|
|
3
3
|
module IsoDoc
|
4
4
|
class Metadata
|
5
5
|
DATETYPES = %w{published accessed created implemented obsoleted confirmed
|
6
|
-
updated issued received transmitted copied unchanged circulated
|
6
|
+
updated issued received transmitted copied unchanged circulated vote-started
|
7
|
+
vote-ended}.freeze
|
7
8
|
|
8
9
|
def ns(xpath)
|
9
10
|
Common::ns(xpath)
|
@@ -15,7 +16,7 @@ module IsoDoc
|
|
15
16
|
|
16
17
|
def initialize(lang, script, labels)
|
17
18
|
@metadata = {}
|
18
|
-
DATETYPES.each { |w| @metadata["#{w}date".to_sym] = "XXX" }
|
19
|
+
DATETYPES.each { |w| @metadata["#{w.gsub(/-/, "_")}date".to_sym] = "XXX" }
|
19
20
|
@lang = lang
|
20
21
|
@script = script
|
21
22
|
@c = HTMLEntities.new
|
@@ -84,7 +85,7 @@ module IsoDoc
|
|
84
85
|
|
85
86
|
def bibdate(isoxml, _out)
|
86
87
|
isoxml.xpath(ns("//bibdata/date")).each do |d|
|
87
|
-
set("#{d['type']}date".to_sym, Common::date_range(d))
|
88
|
+
set("#{d['type'].gsub(/-/, "_")}date".to_sym, Common::date_range(d))
|
88
89
|
end
|
89
90
|
end
|
90
91
|
|
@@ -217,5 +218,11 @@ module IsoDoc
|
|
217
218
|
a = xml.at(ns("//bibdata/uri[@type = 'pdf']")) and set(:pdf, a.text)
|
218
219
|
a = xml.at(ns("//bibdata/uri[@type = 'doc']")) and set(:doc, a.text)
|
219
220
|
end
|
221
|
+
|
222
|
+
def keywords(isoxml, _out)
|
223
|
+
ret = []
|
224
|
+
isoxml.xpath(ns("//bibdata/keyword")).each { |kw| ret << kw.text }
|
225
|
+
set(:keywords, ret)
|
226
|
+
end
|
220
227
|
end
|
221
228
|
end
|
data/lib/isodoc/metadata_date.rb
CHANGED
@@ -14,14 +14,26 @@ module IsoDoc
|
|
14
14
|
"10": @labels["month_october"],
|
15
15
|
"11": @labels["month_november"],
|
16
16
|
"12": @labels["month_december"],
|
17
|
-
|
17
|
+
}
|
18
18
|
end
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
def monthyr(isodate)
|
21
|
+
m = /(?<yr>\d\d\d\d)-(?<mo>\d\d)/.match isodate
|
22
|
+
return isodate unless m && m[:yr] && m[:mo]
|
23
|
+
IsoDoc::Function::I18n::l10n("#{months[m[:mo].to_sym]} #{m[:yr]}",
|
24
|
+
@lang, @script)
|
25
|
+
end
|
26
|
+
|
27
|
+
def MMMddyyyy(isodate)
|
28
|
+
return nil if isodate.nil?
|
29
|
+
arr = isodate.split("-")
|
30
|
+
date = if arr.size == 1 and (/^\d+$/.match isodate)
|
31
|
+
Date.new(*arr.map(&:to_i)).strftime("%Y")
|
32
|
+
elsif arr.size == 2
|
33
|
+
Date.new(*arr.map(&:to_i)).strftime("%B %Y")
|
34
|
+
else
|
35
|
+
Date.parse(isodate).strftime("%B %d, %Y")
|
36
|
+
end
|
37
|
+
end
|
26
38
|
end
|
27
39
|
end
|
data/lib/isodoc/pdf_convert.rb
CHANGED
@@ -22,6 +22,11 @@ module IsoDoc
|
|
22
22
|
"_pdfimages"
|
23
23
|
end
|
24
24
|
|
25
|
+
def initialize(options)
|
26
|
+
@format = :pdf
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
25
30
|
def convert(filename, file = nil, debug = false)
|
26
31
|
file = File.read(filename, encoding: "utf-8") if file.nil?
|
27
32
|
@openmathdelim, @closemathdelim = extract_delims(file)
|
data/lib/isodoc/version.rb
CHANGED
data/lib/isodoc/word_convert.rb
CHANGED
@@ -204,10 +204,6 @@ module IsoDoc::WordFunction
|
|
204
204
|
alt: node["alt"],
|
205
205
|
title: node["title"],
|
206
206
|
width: node["width"] }
|
207
|
-
if node["height"] == "auto" || node["width"] == "auto"
|
208
|
-
attrs[:height] = nil
|
209
|
-
attrs[:width] = nil
|
210
|
-
end
|
211
207
|
out.img **attr_code(attrs)
|
212
208
|
image_title_parse(out, caption)
|
213
209
|
end
|
@@ -56,7 +56,7 @@ module IsoDoc::WordFunction
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def table_footnote_parse(node, out)
|
59
|
-
fn = node["reference"]
|
59
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
60
60
|
tid = get_table_ancestor_id(node)
|
61
61
|
make_table_footnote_link(out, tid + fn, fn)
|
62
62
|
# do not output footnote text if we have already seen it for this table
|
@@ -80,11 +80,11 @@ module IsoDoc::WordFunction
|
|
80
80
|
def footnote_parse(node, out)
|
81
81
|
return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
|
82
82
|
!node.ancestors.map {|m| m.name }.include?("name")
|
83
|
-
fn = node["reference"]
|
83
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
84
84
|
return seen_footnote_parse(node, out, fn) if @seen_footnote.include?(fn)
|
85
85
|
@fn_bookmarks[fn] = bookmarkid
|
86
86
|
out.span **{style: "mso-bookmark:_Ref#{@fn_bookmarks[fn]}"} do |s|
|
87
|
-
s.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
|
87
|
+
s.a **{ "class": "FootnoteRef", "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
|
88
88
|
a.sup { |sup| sup << fn }
|
89
89
|
end
|
90
90
|
end
|
@@ -39,7 +39,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def toWord(result, filename, dir, header)
|
42
|
-
#result = populate_template(result, :word)
|
43
42
|
result = from_xhtml(word_cleanup(to_xhtml(result)))
|
44
43
|
unless @landscapestyle.empty?
|
45
44
|
@wordstylesheet&.open
|
@@ -196,6 +195,8 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
196
195
|
@landscapestyle = ""
|
197
196
|
word_section_breaks1(docxml, "WordSection2")
|
198
197
|
word_section_breaks1(docxml, "WordSection3")
|
198
|
+
word_remove_pb_before_annex(docxml)
|
199
|
+
docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
|
199
200
|
end
|
200
201
|
|
201
202
|
def word_section_breaks1(docxml, sect)
|
@@ -203,7 +204,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
203
204
|
each_with_index do |br, i|
|
204
205
|
@landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\
|
205
206
|
"#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
|
206
|
-
br.delete("orientation")
|
207
207
|
split_at_section_break(docxml, sect, br, i)
|
208
208
|
end
|
209
209
|
end
|
@@ -219,6 +219,17 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
219
219
|
end
|
220
220
|
end
|
221
221
|
|
222
|
+
# applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
|
223
|
+
def word_remove_pb_before_annex(docxml)
|
224
|
+
docxml.xpath("//div[p/br]").each do |d|
|
225
|
+
/^WordSection\d+_\d+$/.match(d["class"]) or next
|
226
|
+
d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
|
227
|
+
d.elements[0].elements[0].name == "br" && d.elements[0].elements[0]["style"] ==
|
228
|
+
"mso-special-character:line-break;page-break-before:always" or next
|
229
|
+
d.elements[0].remove
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
222
233
|
def word_footnote_format(docxml)
|
223
234
|
# the content is in a[@epub:type = 'footnote']//sup, but in Word,
|
224
235
|
# we need to inject content around the autonumbered footnote reference
|
data/lib/isodoc/xslfo_convert.rb
CHANGED
@@ -13,6 +13,11 @@ module IsoDoc
|
|
13
13
|
"_pdfimages"
|
14
14
|
end
|
15
15
|
|
16
|
+
def initialize(options)
|
17
|
+
@format = :pdf
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
16
21
|
def convert(filename, file = nil, debug = false)
|
17
22
|
file = File.read(filename, encoding: "utf-8") if file.nil?
|
18
23
|
docxml, outname_html, dir = convert_init(file, filename, debug)
|
data/spec/assets/i18n.yaml
CHANGED
data/spec/isodoc/blocks_spec.rb
CHANGED
@@ -355,7 +355,7 @@ B
|
|
355
355
|
</div></aside>
|
356
356
|
<p style='page-break-after:avoid;'><b>Key</b></p><dl><dt><p>A</p></dt><dd><p>B</p></dd></dl>
|
357
357
|
<p class="FigureTitle" style="text-align:center;">Figure 1 — Split-it-right <i>sample</i> divider
|
358
|
-
<a
|
358
|
+
<a class='FootnoteRef' href='#fn:1'>
|
359
359
|
<sup>1</sup>
|
360
360
|
</a>
|
361
361
|
</p></div>
|
@@ -417,8 +417,8 @@ B
|
|
417
417
|
<div id="figureA-1" class="figure">
|
418
418
|
|
419
419
|
<img src="rice_images/rice_image1.png" height="20" width="30" alt="alttext" title="titletext"/>
|
420
|
-
<img src="rice_images/rice_image1.png"/>
|
421
|
-
<img src=
|
420
|
+
<img src="rice_images/rice_image1.png" height='20' width='auto'/>
|
421
|
+
<img src='_.gif' height='20' width='auto'/>
|
422
422
|
<a href="#_" class="TableFootnoteRef">a</a><aside><div id="ftn_"><span><span id="_" class="TableFootnoteRef">a</span><span style="mso-tab-count:1">  </span></span>
|
423
423
|
<p id="_">The time <span class="stem">(#(t_90)#)</span> was estimated to be 18,2 min for this example.</p>
|
424
424
|
</div></aside>
|
@@ -426,7 +426,7 @@ B
|
|
426
426
|
<p class='FigureTitle' style='text-align:center;'>
|
427
427
|
Figure 1 — Split-it-right sample divider
|
428
428
|
<span style='mso-bookmark:_Ref'>
|
429
|
-
<a href='#ftn1' epub:type='footnote'>
|
429
|
+
<a href='#ftn1' epub:type='footnote' class='FootnoteRef'>
|
430
430
|
<sup>1</sup>
|
431
431
|
</a>
|
432
432
|
</span>
|
@@ -681,7 +681,7 @@ Que?
|
|
681
681
|
<br/>
|
682
682
|
<div>
|
683
683
|
<h1 class="ForewordTitle">Foreword</h1>
|
684
|
-
<div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
|
684
|
+
<div class="Admonition" id='_70234f78-64e5-4dfc-8b6f-f3f037348b6a'><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
|
685
685
|
<p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
|
686
686
|
</div>
|
687
687
|
</div>
|
@@ -707,7 +707,7 @@ Que?
|
|
707
707
|
<br/>
|
708
708
|
<div>
|
709
709
|
<h1 class="ForewordTitle">Foreword</h1>
|
710
|
-
<div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
|
710
|
+
<div class="Admonition" id="_70234f78-64e5-4dfc-8b6f-f3f037348b6a"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
|
711
711
|
<p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
|
712
712
|
</div>
|
713
713
|
</div>
|
@@ -969,7 +969,7 @@ World
|
|
969
969
|
INPUT
|
970
970
|
#{HTML_HDR}
|
971
971
|
<p class="zzSTDTitle1"/>
|
972
|
-
<div><h1>1. 
|
972
|
+
<div><h1>1.  </h1>
|
973
973
|
<p class="TermNum" id="_extraneous_matter">1.1.</p><p class="Terms" style="text-align:left;">extraneous matter</p><p class="AltTerms" style="text-align:left;">EM</p>
|
974
974
|
|
975
975
|
<p id="_318b3939-be09-46c4-a284-93f9826b981e"><rice> organic and inorganic components other than whole or broken kernels</p>
|
@@ -1030,7 +1030,7 @@ World
|
|
1030
1030
|
</import>
|
1031
1031
|
</permission>
|
1032
1032
|
</foreword></preface>
|
1033
|
-
<bibliography><references id="_bibliography" obligation="informative">
|
1033
|
+
<bibliography><references id="_bibliography" obligation="informative" normative="false">
|
1034
1034
|
<title>Bibliography</title>
|
1035
1035
|
<bibitem id="rfc2616" type="standard"> <fetched>2020-03-27</fetched> <title format="text/plain" language="en" script="Latn">Hypertext Transfer Protocol — HTTP/1.1</title> <uri type="xml">https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2616.xml</uri> <uri type="src">https://www.rfc-editor.org/info/rfc2616</uri> <docidentifier type="IETF">RFC 2616</docidentifier> <docidentifier type="rfc-anchor">RFC2616</docidentifier> <docidentifier type="DOI">10.17487/RFC2616</docidentifier> <date type="published"> <on>1999-06</on> </date> <contributor> <role type="author"/> <person> <name> <completename language="en">R. Fielding</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Gettys</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Mogul</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">H. Frystyk</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">L. Masinter</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">P. Leach</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">T. Berners-Lee</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <language>en</language> <script>Latn</script> <abstract format="text/plain" language="en" script="Latn">HTTP has been in use by the World-Wide Web global information initiative since 1990. This specification defines the protocol referred to as “HTTP/1.1”, and is an update to RFC 2068. [STANDARDS-TRACK]</abstract> <series type="main"> <title format="text/plain" language="en" script="Latn">RFC</title> <number>2616</number> </series> <place>Fremont, CA</place></bibitem>
|
1036
1036
|
|
@@ -1462,5 +1462,56 @@ INPUT
|
|
1462
1462
|
OUTPUT
|
1463
1463
|
end
|
1464
1464
|
|
1465
|
+
it "processes passthrough with compatible format" do
|
1466
|
+
FileUtils.rm_f "test.html"
|
1467
|
+
IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false)
|
1468
|
+
<iso-standard xmlns="http://riboseinc.com/isoxml">
|
1469
|
+
<preface><foreword>
|
1470
|
+
<passthrough format="html,rfc"><A></passthrough><em>Hello</em><passthrough format="html,rfc"></A></passthrough>
|
1471
|
+
</foreword></preface>
|
1472
|
+
</iso-standard>
|
1473
|
+
INPUT
|
1474
|
+
expect(( File.read("test.html").gsub(%r{^.*<h1 class="ForewordTitle">Foreword</h1>}m, "").gsub(%r{</div>.*}m, ""))).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
1475
|
+
<A><i>Hello</i></A>
|
1476
|
+
OUTPUT
|
1477
|
+
end
|
1478
|
+
|
1479
|
+
it "aborts if passthrough results in malformed XML" do
|
1480
|
+
FileUtils.rm_f "test.html"
|
1481
|
+
FileUtils.rm_f "test.html.err"
|
1482
|
+
begin
|
1483
|
+
expect { IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false) }.to raise_error(SystemExit)
|
1484
|
+
<iso-standard xmlns="http://riboseinc.com/isoxml">
|
1485
|
+
<preface><foreword>
|
1486
|
+
<passthrough format="html,rfc"><A></passthrough><em>Hello</em>
|
1487
|
+
</foreword></preface>
|
1488
|
+
</iso-standard>
|
1489
|
+
INPUT
|
1490
|
+
rescue SystemExit
|
1491
|
+
end
|
1492
|
+
expect(File.exist?("test.html.err")).to be true
|
1493
|
+
end
|
1494
|
+
|
1495
|
+
it "ignore passthrough with incompatible format" do
|
1496
|
+
expect(xmlpp(IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", true))).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
1497
|
+
<iso-standard xmlns="http://riboseinc.com/isoxml">
|
1498
|
+
<preface><foreword>
|
1499
|
+
<passthrough format="doc,rfc"><A></passthrough>
|
1500
|
+
</foreword></preface>
|
1501
|
+
</iso-standard>
|
1502
|
+
INPUT
|
1503
|
+
#{HTML_HDR}
|
1504
|
+
<br/>
|
1505
|
+
<div>
|
1506
|
+
<h1 class='ForewordTitle'>Foreword</h1>
|
1507
|
+
</div>
|
1508
|
+
<p class='zzSTDTitle1'/>
|
1509
|
+
</div>
|
1510
|
+
</body>
|
1511
|
+
</html>
|
1512
|
+
OUTPUT
|
1513
|
+
end
|
1514
|
+
|
1515
|
+
|
1465
1516
|
|
1466
1517
|
end
|
data/spec/isodoc/cleanup_spec.rb
CHANGED
@@ -154,9 +154,9 @@ RSpec.describe IsoDoc do
|
|
154
154
|
<br/>
|
155
155
|
<div>
|
156
156
|
<h1 class="ForewordTitle">Foreword</h1>
|
157
|
-
<p>A.<a
|
158
|
-
<p>B.<a
|
159
|
-
<p>C.<a
|
157
|
+
<p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
|
158
|
+
<p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
|
159
|
+
<p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>1</sup></a></p>
|
160
160
|
</div>
|
161
161
|
<p class="zzSTDTitle1"/>
|
162
162
|
<aside id="fn:2" class="footnote">
|
@@ -173,9 +173,9 @@ RSpec.describe IsoDoc do
|
|
173
173
|
<br/>
|
174
174
|
<div>
|
175
175
|
<h1 class="ForewordTitle">Foreword</h1>
|
176
|
-
<p>A.<a
|
177
|
-
<p>B.<a
|
178
|
-
<p>C.<a
|
176
|
+
<p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>1</sup></a></p>
|
177
|
+
<p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
|
178
|
+
<p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>3</sup></a></p>
|
179
179
|
</div>
|
180
180
|
<p class="zzSTDTitle1"/>
|
181
181
|
<aside id="fn:2" class="footnote">
|
@@ -715,7 +715,7 @@ INPUT
|
|
715
715
|
OUTPUT
|
716
716
|
end
|
717
717
|
|
718
|
-
it "
|
718
|
+
it "does not break up very long strings in tables by default" do
|
719
719
|
expect(xmlpp(IsoDoc::HtmlConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
720
720
|
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
721
721
|
<head>
|
@@ -739,6 +739,7 @@ INPUT
|
|
739
739
|
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
740
740
|
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
741
741
|
</tr>
|
742
|
+
</thead>
|
742
743
|
<tbody>
|
743
744
|
<tr>
|
744
745
|
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
@@ -775,26 +776,102 @@ INPUT
|
|
775
776
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
776
777
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
777
778
|
</tr>
|
779
|
+
</thead>
|
778
780
|
<tbody>
|
779
781
|
<tr>
|
780
782
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
781
|
-
http://www.example.com/
|
782
|
-
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/
|
783
|
-
BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
783
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
784
784
|
</td>
|
785
785
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
786
|
-
http://www.example.com/
|
787
|
-
|
788
|
-
|
786
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB </td>
|
787
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
788
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
789
|
+
</td>
|
790
|
+
</tr>
|
791
|
+
</tbody>
|
792
|
+
</table>
|
793
|
+
</div>
|
794
|
+
</div>
|
795
|
+
</body>
|
796
|
+
</head>
|
797
|
+
</html>
|
798
|
+
OUTPUT
|
799
|
+
end
|
800
|
+
|
801
|
+
it "does not break up very long strings in tables on request in HTML" do
|
802
|
+
expect(xmlpp(IsoDoc::HtmlConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
803
|
+
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
804
|
+
<head>
|
805
|
+
<title>test</title>
|
806
|
+
<body lang="EN-US" link="blue" vlink="#954F72">
|
807
|
+
<div class="WordSection1">
|
808
|
+
<p> </p>
|
809
|
+
</div>
|
810
|
+
<br clear="all" class="section"/>
|
811
|
+
<div class="WordSection2">
|
812
|
+
<br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
|
813
|
+
<div>
|
814
|
+
<h1 class="ForewordTitle">Foreword</h1>
|
815
|
+
<p class="TableTitle" align="center">
|
816
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
817
|
+
</p>
|
818
|
+
<table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
|
819
|
+
<thead>
|
820
|
+
<tr>
|
821
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
822
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
823
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
824
|
+
</tr>
|
825
|
+
</thead>
|
826
|
+
<tbody>
|
827
|
+
<tr>
|
828
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
829
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
830
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
831
|
+
</tr>
|
832
|
+
</tbody>
|
833
|
+
</table>
|
834
|
+
</div>
|
835
|
+
</div>
|
836
|
+
</body>
|
837
|
+
</html>
|
838
|
+
INPUT
|
839
|
+
<?xml version='1.0'?>
|
840
|
+
<html xmlns:epub='http://www.idpf.org/2007/ops'>
|
841
|
+
<head>
|
842
|
+
<title>test</title>
|
843
|
+
<body lang='EN-US' link='blue' vlink='#954F72'>
|
844
|
+
<div class='WordSection1'>
|
845
|
+
<p> </p>
|
846
|
+
</div>
|
847
|
+
<br clear='all' class='section'/>
|
848
|
+
<div class='WordSection2'>
|
849
|
+
<br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
|
850
|
+
<div>
|
851
|
+
<h1 class='ForewordTitle'>Foreword</h1>
|
852
|
+
<p class='TableTitle' align='center'>
|
853
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
854
|
+
</p>
|
855
|
+
<table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
|
856
|
+
<thead>
|
857
|
+
<tr>
|
858
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
859
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
860
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
861
|
+
</tr>
|
862
|
+
</thead>
|
863
|
+
<tbody>
|
864
|
+
<tr>
|
865
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
866
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
789
867
|
</td>
|
868
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
869
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
790
870
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
791
|
-
www.example.com/
|
792
|
-
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
|
793
|
-
ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
871
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
794
872
|
</td>
|
795
873
|
</tr>
|
796
874
|
</tbody>
|
797
|
-
</thead>
|
798
875
|
</table>
|
799
876
|
</div>
|
800
877
|
</div>
|
@@ -804,7 +881,8 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
804
881
|
OUTPUT
|
805
882
|
end
|
806
883
|
|
807
|
-
|
884
|
+
|
885
|
+
it "does not break up very long strings in tables by default (Word)" do
|
808
886
|
expect(xmlpp(IsoDoc::WordConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
809
887
|
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
810
888
|
<head>
|
@@ -828,16 +906,17 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
828
906
|
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
829
907
|
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
830
908
|
</tr>
|
909
|
+
</thead>
|
831
910
|
<tbody>
|
832
911
|
<tr>
|
833
912
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
834
|
-
http://www.example.com
|
913
|
+
http://www.example.com/&AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
835
914
|
</td>
|
836
915
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
837
|
-
http://www.example.com/
|
916
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
838
917
|
</td>
|
839
918
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
840
|
-
www.example.com/
|
919
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
841
920
|
</td>
|
842
921
|
</tr>
|
843
922
|
</tbody>
|
@@ -870,20 +949,20 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
870
949
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
871
950
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
872
951
|
</tr>
|
952
|
+
</thead>
|
873
953
|
<tbody>
|
874
954
|
<tr>
|
875
955
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
876
|
-
http://www.example.com
|
956
|
+
http://www.example.com/&AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
877
957
|
</td>
|
878
958
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
879
|
-
http://www.example.com/
|
959
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
880
960
|
</td>
|
881
961
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
882
|
-
www.example.com/
|
962
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
883
963
|
</td>
|
884
964
|
</tr>
|
885
965
|
</tbody>
|
886
|
-
</thead>
|
887
966
|
</table>
|
888
967
|
</div>
|
889
968
|
</div>
|
@@ -893,4 +972,217 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
893
972
|
OUTPUT
|
894
973
|
end
|
895
974
|
|
975
|
+
it "breaks up very long strings in tables on request (Word)" do
|
976
|
+
expect(xmlpp(IsoDoc::WordConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
977
|
+
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
978
|
+
<head>
|
979
|
+
<title>test</title>
|
980
|
+
<body lang="EN-US" link="blue" vlink="#954F72">
|
981
|
+
<div class="WordSection1">
|
982
|
+
<p> </p>
|
983
|
+
</div>
|
984
|
+
<br clear="all" class="section"/>
|
985
|
+
<div class="WordSection2">
|
986
|
+
<br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
|
987
|
+
<div>
|
988
|
+
<h1 class="ForewordTitle">Foreword</h1>
|
989
|
+
<p class="TableTitle" align="center">
|
990
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
991
|
+
</p>
|
992
|
+
<table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
|
993
|
+
<thead>
|
994
|
+
<tr>
|
995
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
996
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
997
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
998
|
+
</tr>
|
999
|
+
</thead>
|
1000
|
+
<tbody>
|
1001
|
+
<tr>
|
1002
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
1003
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
1004
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
1005
|
+
</tr>
|
1006
|
+
</tbody>
|
1007
|
+
</table>
|
1008
|
+
</div>
|
1009
|
+
</div>
|
1010
|
+
</body>
|
1011
|
+
</html>
|
1012
|
+
INPUT
|
1013
|
+
<?xml version='1.0'?>
|
1014
|
+
<html xmlns:epub='http://www.idpf.org/2007/ops'>
|
1015
|
+
<head>
|
1016
|
+
<title>test</title>
|
1017
|
+
<body lang='EN-US' link='blue' vlink='#954F72'>
|
1018
|
+
<div class='WordSection1'>
|
1019
|
+
<p> </p>
|
1020
|
+
</div>
|
1021
|
+
<br clear='all' class='section'/>
|
1022
|
+
<div class='WordSection2'>
|
1023
|
+
<br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
|
1024
|
+
<div>
|
1025
|
+
<h1 class='ForewordTitle'>Foreword</h1>
|
1026
|
+
<p class='TableTitle' align='center'>
|
1027
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
1028
|
+
</p>
|
1029
|
+
<table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
|
1030
|
+
<thead>
|
1031
|
+
<tr>
|
1032
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
1033
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
1034
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
1035
|
+
</tr>
|
1036
|
+
</thead>
|
1037
|
+
<tbody>
|
1038
|
+
<tr>
|
1039
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
1040
|
+
http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
1041
|
+
</td>
|
1042
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
1043
|
+
http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
|
1044
|
+
</td>
|
1045
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
1046
|
+
www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
1047
|
+
</td>
|
1048
|
+
</tr>
|
1049
|
+
</tbody>
|
1050
|
+
</table>
|
1051
|
+
</div>
|
1052
|
+
</div>
|
1053
|
+
</body>
|
1054
|
+
</head>
|
1055
|
+
</html>
|
1056
|
+
OUTPUT
|
1057
|
+
end
|
1058
|
+
|
1059
|
+
it "cleans up term sources" do
|
1060
|
+
c = IsoDoc::HtmlConvert.new({i18nyaml: "spec/assets/i18n.yaml"})
|
1061
|
+
c.i18n_init("en", "Latn")
|
1062
|
+
expect(xmlpp(c.textcleanup(<<~"INPUT").to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
1063
|
+
#{HTML_HDR}
|
1064
|
+
<p class="zzSTDTitle1"/>
|
1065
|
+
<div id="_terms_and_definitions"><h1>1.  Terms and Definitions</h1><p>For the purposes of this document,
|
1066
|
+
the following terms and definitions apply.</p>
|
1067
|
+
<p class="TermNum" id="paddy1">1.1.</p><p class="Terms" style="text-align:left;">paddy</p>
|
1068
|
+
|
1069
|
+
<p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f"><rice> rice retaining its husk after threshing</p>
|
1070
|
+
<div id="_bd57bbf1-f948-4bae-b0ce-73c00431f892" class="example"><p class="example-title">EXAMPLE 1</p>
|
1071
|
+
<p id="_65c9a509-9a89-4b54-a890-274126aeb55c">Foreign seeds, husks, bran, sand, dust.</p>
|
1072
|
+
<ul>
|
1073
|
+
<li>A</li>
|
1074
|
+
</ul>
|
1075
|
+
</div>
|
1076
|
+
<div id="_bd57bbf1-f948-4bae-b0ce-73c00431f894" class="example"><p class="example-title">EXAMPLE 2</p>
|
1077
|
+
<ul>
|
1078
|
+
<li>A</li>
|
1079
|
+
</ul>
|
1080
|
+
</div>
|
1081
|
+
|
1082
|
+
<p>[TERMREF]
|
1083
|
+
<a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
|
1084
|
+
[MODIFICATION]The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
|
1085
|
+
[/TERMREF]</p>
|
1086
|
+
<p>[TERMREF] Termbase IEV, term ID xyz [/TERMREF]</p>
|
1087
|
+
<p>[TERMREF] Termbase IEV, term ID xyz [MODIFICATION]with adjustments [/TERMREF]</p>
|
1088
|
+
<p class="TermNum" id="paddy">1.2.</p><p class="Terms" style="text-align:left;">paddy</p><p class="AltTerms" style="text-align:left;">paddy rice</p>
|
1089
|
+
<p class="AltTerms" style="text-align:left;">rough rice</p>
|
1090
|
+
<p class="DeprecatedTerms" style="text-align:left;">DEPRECATED: cargo rice</p>
|
1091
|
+
<p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">rice retaining its husk after threshing</p>
|
1092
|
+
<div id="_bd57bbf1-f948-4bae-b0ce-73c00431f893" class="example"><p class="example-title">EXAMPLE</p>
|
1093
|
+
<ul>
|
1094
|
+
<li>A</li>
|
1095
|
+
</ul>
|
1096
|
+
</div>
|
1097
|
+
<div class="Note"><p>Note 1 to entry: The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></div>
|
1098
|
+
<div class="Note"><p>Note 2 to entry: <ul><li>A</li></ul><p id="_19830f33-e46c-42cc-94ca-a5ef101132d5">The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></p></div>
|
1099
|
+
<p>[TERMREF]
|
1100
|
+
<a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
|
1101
|
+
[/TERMREF]</p></div>
|
1102
|
+
</div>
|
1103
|
+
</body>
|
1104
|
+
</html>
|
1105
|
+
INPUT
|
1106
|
+
<html xmlns:epub='http://www.idpf.org/2007/ops' lang='en'>
|
1107
|
+
<head/>
|
1108
|
+
<body lang='en'>
|
1109
|
+
<div class='title-section'>
|
1110
|
+
<p> </p>
|
1111
|
+
</div>
|
1112
|
+
<br/>
|
1113
|
+
<div class='prefatory-section'>
|
1114
|
+
<p> </p>
|
1115
|
+
</div>
|
1116
|
+
<br/>
|
1117
|
+
<div class='main-section'>
|
1118
|
+
<p class='zzSTDTitle1'/>
|
1119
|
+
<div id='_terms_and_definitions'>
|
1120
|
+
<h1>1.  Terms and Definitions</h1>
|
1121
|
+
<p>For the purposes of this document, the following terms and definitions apply.</p>
|
1122
|
+
<p class='TermNum' id='paddy1'>1.1.</p>
|
1123
|
+
<p class='Terms' style='text-align:left;'>paddy</p>
|
1124
|
+
<p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'><rice> rice retaining its husk after threshing</p>
|
1125
|
+
<div id='_bd57bbf1-f948-4bae-b0ce-73c00431f892' class='example'>
|
1126
|
+
<p class='example-title'>EXAMPLE 1</p>
|
1127
|
+
<p id='_65c9a509-9a89-4b54-a890-274126aeb55c'>Foreign seeds, husks, bran, sand, dust.</p>
|
1128
|
+
<ul>
|
1129
|
+
<li>A</li>
|
1130
|
+
</ul>
|
1131
|
+
</div>
|
1132
|
+
<div id='_bd57bbf1-f948-4bae-b0ce-73c00431f894' class='example'>
|
1133
|
+
<p class='example-title'>EXAMPLE 2</p>
|
1134
|
+
<ul>
|
1135
|
+
<li>A</li>
|
1136
|
+
</ul>
|
1137
|
+
</div>
|
1138
|
+
<p>
|
1139
|
+
[SOURCE:
|
1140
|
+
<a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
|
1141
|
+
, modified — The term "cargo rice" is shown as deprecated, and
|
1142
|
+
Note 1 to entry is not included here; Termbase IEV, term ID xyz;
|
1143
|
+
Termbase IEV, term ID xyz, modified — with adjustments]
|
1144
|
+
</p>
|
1145
|
+
<p class='TermNum' id='paddy'>1.2.</p>
|
1146
|
+
<p class='Terms' style='text-align:left;'>paddy</p>
|
1147
|
+
<p class='AltTerms' style='text-align:left;'>paddy rice</p>
|
1148
|
+
<p class='AltTerms' style='text-align:left;'>rough rice</p>
|
1149
|
+
<p class='DeprecatedTerms' style='text-align:left;'>DEPRECATED: cargo rice</p>
|
1150
|
+
<p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'>rice retaining its husk after threshing</p>
|
1151
|
+
<div id='_bd57bbf1-f948-4bae-b0ce-73c00431f893' class='example'>
|
1152
|
+
<p class='example-title'>EXAMPLE</p>
|
1153
|
+
<ul>
|
1154
|
+
<li>A</li>
|
1155
|
+
</ul>
|
1156
|
+
</div>
|
1157
|
+
<div class='Note'>
|
1158
|
+
<p>
|
1159
|
+
Note 1 to entry: The starch of waxy rice consists almost entirely of
|
1160
|
+
amylopectin. The kernels have a tendency to stick together after
|
1161
|
+
cooking.
|
1162
|
+
</p>
|
1163
|
+
</div>
|
1164
|
+
<div class='Note'>
|
1165
|
+
<p>
|
1166
|
+
Note 2 to entry:
|
1167
|
+
<ul>
|
1168
|
+
<li>A</li>
|
1169
|
+
</ul>
|
1170
|
+
<p id='_19830f33-e46c-42cc-94ca-a5ef101132d5'>
|
1171
|
+
The starch of waxy rice consists almost entirely of amylopectin.
|
1172
|
+
The kernels have a tendency to stick together after cooking.
|
1173
|
+
</p>
|
1174
|
+
</p>
|
1175
|
+
</div>
|
1176
|
+
<p>
|
1177
|
+
[SOURCE:
|
1178
|
+
<a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
|
1179
|
+
]
|
1180
|
+
</p>
|
1181
|
+
</div>
|
1182
|
+
</div>
|
1183
|
+
</body>
|
1184
|
+
</html>
|
1185
|
+
OUTPUT
|
1186
|
+
end
|
1187
|
+
|
896
1188
|
end
|