isodoc 1.0.23 → 1.0.28
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/macos.yml +10 -2
- data/.github/workflows/ubuntu.yml +13 -3
- data/.github/workflows/windows.yml +10 -2
- data/isodoc.gemspec +1 -1
- data/lib/isodoc-yaml/i18n-en.yaml +3 -1
- data/lib/isodoc-yaml/i18n-fr.yaml +3 -1
- data/lib/isodoc-yaml/i18n-zh-Hans.yaml +3 -1
- data/lib/isodoc/base_style/reset.scss +1 -1
- data/lib/isodoc/convert.rb +1 -0
- data/lib/isodoc/function/blocks.rb +6 -1
- data/lib/isodoc/function/cleanup.rb +16 -2
- data/lib/isodoc/function/i18n.rb +5 -5
- data/lib/isodoc/function/inline.rb +77 -79
- data/lib/isodoc/function/inline_simple.rb +72 -0
- data/lib/isodoc/function/references.rb +49 -37
- data/lib/isodoc/function/section.rb +19 -8
- data/lib/isodoc/function/table.rb +0 -1
- data/lib/isodoc/function/to_word_html.rb +23 -13
- data/lib/isodoc/function/utils.rb +11 -5
- data/lib/isodoc/function/xref_gen.rb +2 -1
- data/lib/isodoc/function/xref_sect_gen.rb +24 -24
- data/lib/isodoc/headlesshtml_convert.rb +5 -0
- data/lib/isodoc/html_convert.rb +5 -0
- data/lib/isodoc/html_function/footnotes.rb +3 -3
- data/lib/isodoc/html_function/html.rb +15 -0
- data/lib/isodoc/html_function/postprocess.rb +6 -5
- data/lib/isodoc/metadata.rb +10 -3
- data/lib/isodoc/metadata_date.rb +19 -7
- data/lib/isodoc/pdf_convert.rb +5 -0
- data/lib/isodoc/version.rb +1 -1
- data/lib/isodoc/word_convert.rb +5 -0
- data/lib/isodoc/word_function/body.rb +0 -4
- data/lib/isodoc/word_function/footnotes.rb +3 -3
- data/lib/isodoc/word_function/postprocess.rb +13 -2
- data/lib/isodoc/xslfo_convert.rb +5 -0
- data/spec/assets/i18n.yaml +4 -1
- data/spec/isodoc/blocks_spec.rb +59 -8
- data/spec/isodoc/cleanup_spec.rb +317 -25
- data/spec/isodoc/footnotes_spec.rb +20 -5
- data/spec/isodoc/i18n_spec.rb +12 -12
- data/spec/isodoc/inline_spec.rb +118 -5
- data/spec/isodoc/metadata_spec.rb +8 -3
- data/spec/isodoc/postproc_spec.rb +34 -12
- data/spec/isodoc/ref_spec.rb +120 -51
- data/spec/isodoc/section_spec.rb +236 -207
- data/spec/isodoc/table_spec.rb +24 -24
- data/spec/isodoc/terms_spec.rb +50 -6
- data/spec/isodoc/xref_spec.rb +53 -26
- metadata +5 -4
@@ -14,6 +14,11 @@ module IsoDoc
|
|
14
14
|
"_headlessimages"
|
15
15
|
end
|
16
16
|
|
17
|
+
def initialize(options)
|
18
|
+
@format = :html
|
19
|
+
super
|
20
|
+
end
|
21
|
+
|
17
22
|
def convert(filename, file = nil, debug = false)
|
18
23
|
file = File.read(filename, encoding: "utf-8") if file.nil?
|
19
24
|
@openmathdelim, @closemathdelim = extract_delims(file)
|
data/lib/isodoc/html_convert.rb
CHANGED
@@ -48,7 +48,7 @@ module IsoDoc::HtmlFunction
|
|
48
48
|
end
|
49
49
|
|
50
50
|
def table_footnote_parse(node, out)
|
51
|
-
fn = node["reference"]
|
51
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
52
52
|
tid = get_table_ancestor_id(node)
|
53
53
|
make_table_footnote_link(out, tid + fn, fn)
|
54
54
|
# do not output footnote text if we have already seen it for this table
|
@@ -64,8 +64,8 @@ module IsoDoc::HtmlFunction
|
|
64
64
|
def footnote_parse(node, out)
|
65
65
|
return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
|
66
66
|
!node.ancestors.map {|m| m.name }.include?("name")
|
67
|
-
fn = node["reference"]
|
68
|
-
attrs = {
|
67
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
68
|
+
attrs = { class: "FootnoteRef", href: "#fn:#{fn}" }
|
69
69
|
out.a **attrs do |a|
|
70
70
|
a.sup { |sup| sup << fn }
|
71
71
|
end
|
@@ -3,6 +3,18 @@ require "base64"
|
|
3
3
|
|
4
4
|
module IsoDoc::HtmlFunction
|
5
5
|
module Html
|
6
|
+
def convert1(docxml, filename, dir)
|
7
|
+
anchor_names docxml
|
8
|
+
noko do |xml|
|
9
|
+
xml.html **{ lang: "#{@lang}" } do |html|
|
10
|
+
info docxml, nil
|
11
|
+
populate_css()
|
12
|
+
html.head { |head| define_head head, filename, dir }
|
13
|
+
make_body(html, docxml)
|
14
|
+
end
|
15
|
+
end.join("\n")
|
16
|
+
end
|
17
|
+
|
6
18
|
def make_body1(body, _docxml)
|
7
19
|
body.div **{ class: "title-section" } do |div1|
|
8
20
|
div1.p { |p| p << " " } # placeholder
|
@@ -97,5 +109,8 @@ module IsoDoc::HtmlFunction
|
|
97
109
|
end
|
98
110
|
sourcecode_name_parse(node, out, name)
|
99
111
|
end
|
112
|
+
|
113
|
+
def table_long_strings_cleanup(docxml)
|
114
|
+
end
|
100
115
|
end
|
101
116
|
end
|
@@ -108,7 +108,7 @@ module IsoDoc::HtmlFunction
|
|
108
108
|
idx = docxml.at("//div[@id = 'toc']") or return docxml
|
109
109
|
toc = "<ul>"
|
110
110
|
path = toclevel_classes.map do |l|
|
111
|
-
"//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][
|
111
|
+
"//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][text()]"
|
112
112
|
end
|
113
113
|
docxml.xpath(path.join(" | ")).each_with_index do |h, tocidx|
|
114
114
|
h["id"] ||= "toc#{tocidx}"
|
@@ -159,7 +159,8 @@ module IsoDoc::HtmlFunction
|
|
159
159
|
def inject_script(doc)
|
160
160
|
return doc unless @scripts
|
161
161
|
scripts = File.read(@scripts, encoding: "UTF-8")
|
162
|
-
doc.
|
162
|
+
a = doc.split(%r{</body>})
|
163
|
+
a[0] + scripts + "</body>" + a[1]
|
163
164
|
end
|
164
165
|
|
165
166
|
def update_footnote_filter(fn, x, i, seen)
|
@@ -178,7 +179,7 @@ module IsoDoc::HtmlFunction
|
|
178
179
|
def html_footnote_filter(docxml)
|
179
180
|
seen = {}
|
180
181
|
i = 1
|
181
|
-
docxml.xpath('//a[@
|
182
|
+
docxml.xpath('//a[@class = "FootnoteRef"]').each do |x|
|
182
183
|
fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
|
183
184
|
i, seen = update_footnote_filter(fn, x, i, seen)
|
184
185
|
end
|
@@ -187,7 +188,7 @@ module IsoDoc::HtmlFunction
|
|
187
188
|
|
188
189
|
def footnote_backlinks(docxml)
|
189
190
|
seen = {}
|
190
|
-
docxml.xpath('//a[@
|
191
|
+
docxml.xpath('//a[@class = "FootnoteRef"]').each_with_index do |x, i|
|
191
192
|
seen[x["href"]] and next or seen[x["href"]] = true
|
192
193
|
fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
|
193
194
|
xdup = x.dup
|
@@ -200,7 +201,7 @@ module IsoDoc::HtmlFunction
|
|
200
201
|
end
|
201
202
|
|
202
203
|
def footnote_format(docxml)
|
203
|
-
docxml.xpath("//a[@
|
204
|
+
docxml.xpath("//a[@class = 'FootnoteRef']/sup").each do |x|
|
204
205
|
footnote_reference_format(x)
|
205
206
|
end
|
206
207
|
docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
|
data/lib/isodoc/metadata.rb
CHANGED
@@ -3,7 +3,8 @@ require_relative "./metadata_date"
|
|
3
3
|
module IsoDoc
|
4
4
|
class Metadata
|
5
5
|
DATETYPES = %w{published accessed created implemented obsoleted confirmed
|
6
|
-
updated issued received transmitted copied unchanged circulated
|
6
|
+
updated issued received transmitted copied unchanged circulated vote-started
|
7
|
+
vote-ended}.freeze
|
7
8
|
|
8
9
|
def ns(xpath)
|
9
10
|
Common::ns(xpath)
|
@@ -15,7 +16,7 @@ module IsoDoc
|
|
15
16
|
|
16
17
|
def initialize(lang, script, labels)
|
17
18
|
@metadata = {}
|
18
|
-
DATETYPES.each { |w| @metadata["#{w}date".to_sym] = "XXX" }
|
19
|
+
DATETYPES.each { |w| @metadata["#{w.gsub(/-/, "_")}date".to_sym] = "XXX" }
|
19
20
|
@lang = lang
|
20
21
|
@script = script
|
21
22
|
@c = HTMLEntities.new
|
@@ -84,7 +85,7 @@ module IsoDoc
|
|
84
85
|
|
85
86
|
def bibdate(isoxml, _out)
|
86
87
|
isoxml.xpath(ns("//bibdata/date")).each do |d|
|
87
|
-
set("#{d['type']}date".to_sym, Common::date_range(d))
|
88
|
+
set("#{d['type'].gsub(/-/, "_")}date".to_sym, Common::date_range(d))
|
88
89
|
end
|
89
90
|
end
|
90
91
|
|
@@ -217,5 +218,11 @@ module IsoDoc
|
|
217
218
|
a = xml.at(ns("//bibdata/uri[@type = 'pdf']")) and set(:pdf, a.text)
|
218
219
|
a = xml.at(ns("//bibdata/uri[@type = 'doc']")) and set(:doc, a.text)
|
219
220
|
end
|
221
|
+
|
222
|
+
def keywords(isoxml, _out)
|
223
|
+
ret = []
|
224
|
+
isoxml.xpath(ns("//bibdata/keyword")).each { |kw| ret << kw.text }
|
225
|
+
set(:keywords, ret)
|
226
|
+
end
|
220
227
|
end
|
221
228
|
end
|
data/lib/isodoc/metadata_date.rb
CHANGED
@@ -14,14 +14,26 @@ module IsoDoc
|
|
14
14
|
"10": @labels["month_october"],
|
15
15
|
"11": @labels["month_november"],
|
16
16
|
"12": @labels["month_december"],
|
17
|
-
|
17
|
+
}
|
18
18
|
end
|
19
19
|
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
20
|
+
def monthyr(isodate)
|
21
|
+
m = /(?<yr>\d\d\d\d)-(?<mo>\d\d)/.match isodate
|
22
|
+
return isodate unless m && m[:yr] && m[:mo]
|
23
|
+
IsoDoc::Function::I18n::l10n("#{months[m[:mo].to_sym]} #{m[:yr]}",
|
24
|
+
@lang, @script)
|
25
|
+
end
|
26
|
+
|
27
|
+
def MMMddyyyy(isodate)
|
28
|
+
return nil if isodate.nil?
|
29
|
+
arr = isodate.split("-")
|
30
|
+
date = if arr.size == 1 and (/^\d+$/.match isodate)
|
31
|
+
Date.new(*arr.map(&:to_i)).strftime("%Y")
|
32
|
+
elsif arr.size == 2
|
33
|
+
Date.new(*arr.map(&:to_i)).strftime("%B %Y")
|
34
|
+
else
|
35
|
+
Date.parse(isodate).strftime("%B %d, %Y")
|
36
|
+
end
|
37
|
+
end
|
26
38
|
end
|
27
39
|
end
|
data/lib/isodoc/pdf_convert.rb
CHANGED
@@ -22,6 +22,11 @@ module IsoDoc
|
|
22
22
|
"_pdfimages"
|
23
23
|
end
|
24
24
|
|
25
|
+
def initialize(options)
|
26
|
+
@format = :pdf
|
27
|
+
super
|
28
|
+
end
|
29
|
+
|
25
30
|
def convert(filename, file = nil, debug = false)
|
26
31
|
file = File.read(filename, encoding: "utf-8") if file.nil?
|
27
32
|
@openmathdelim, @closemathdelim = extract_delims(file)
|
data/lib/isodoc/version.rb
CHANGED
data/lib/isodoc/word_convert.rb
CHANGED
@@ -204,10 +204,6 @@ module IsoDoc::WordFunction
|
|
204
204
|
alt: node["alt"],
|
205
205
|
title: node["title"],
|
206
206
|
width: node["width"] }
|
207
|
-
if node["height"] == "auto" || node["width"] == "auto"
|
208
|
-
attrs[:height] = nil
|
209
|
-
attrs[:width] = nil
|
210
|
-
end
|
211
207
|
out.img **attr_code(attrs)
|
212
208
|
image_title_parse(out, caption)
|
213
209
|
end
|
@@ -56,7 +56,7 @@ module IsoDoc::WordFunction
|
|
56
56
|
end
|
57
57
|
|
58
58
|
def table_footnote_parse(node, out)
|
59
|
-
fn = node["reference"]
|
59
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
60
60
|
tid = get_table_ancestor_id(node)
|
61
61
|
make_table_footnote_link(out, tid + fn, fn)
|
62
62
|
# do not output footnote text if we have already seen it for this table
|
@@ -80,11 +80,11 @@ module IsoDoc::WordFunction
|
|
80
80
|
def footnote_parse(node, out)
|
81
81
|
return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
|
82
82
|
!node.ancestors.map {|m| m.name }.include?("name")
|
83
|
-
fn = node["reference"]
|
83
|
+
fn = node["reference"] || UUIDTools::UUID.random_create.to_s
|
84
84
|
return seen_footnote_parse(node, out, fn) if @seen_footnote.include?(fn)
|
85
85
|
@fn_bookmarks[fn] = bookmarkid
|
86
86
|
out.span **{style: "mso-bookmark:_Ref#{@fn_bookmarks[fn]}"} do |s|
|
87
|
-
s.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
|
87
|
+
s.a **{ "class": "FootnoteRef", "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
|
88
88
|
a.sup { |sup| sup << fn }
|
89
89
|
end
|
90
90
|
end
|
@@ -39,7 +39,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
39
39
|
end
|
40
40
|
|
41
41
|
def toWord(result, filename, dir, header)
|
42
|
-
#result = populate_template(result, :word)
|
43
42
|
result = from_xhtml(word_cleanup(to_xhtml(result)))
|
44
43
|
unless @landscapestyle.empty?
|
45
44
|
@wordstylesheet&.open
|
@@ -196,6 +195,8 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
196
195
|
@landscapestyle = ""
|
197
196
|
word_section_breaks1(docxml, "WordSection2")
|
198
197
|
word_section_breaks1(docxml, "WordSection3")
|
198
|
+
word_remove_pb_before_annex(docxml)
|
199
|
+
docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
|
199
200
|
end
|
200
201
|
|
201
202
|
def word_section_breaks1(docxml, sect)
|
@@ -203,7 +204,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
203
204
|
each_with_index do |br, i|
|
204
205
|
@landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\
|
205
206
|
"#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
|
206
|
-
br.delete("orientation")
|
207
207
|
split_at_section_break(docxml, sect, br, i)
|
208
208
|
end
|
209
209
|
end
|
@@ -219,6 +219,17 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
|
|
219
219
|
end
|
220
220
|
end
|
221
221
|
|
222
|
+
# applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
|
223
|
+
def word_remove_pb_before_annex(docxml)
|
224
|
+
docxml.xpath("//div[p/br]").each do |d|
|
225
|
+
/^WordSection\d+_\d+$/.match(d["class"]) or next
|
226
|
+
d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
|
227
|
+
d.elements[0].elements[0].name == "br" && d.elements[0].elements[0]["style"] ==
|
228
|
+
"mso-special-character:line-break;page-break-before:always" or next
|
229
|
+
d.elements[0].remove
|
230
|
+
end
|
231
|
+
end
|
232
|
+
|
222
233
|
def word_footnote_format(docxml)
|
223
234
|
# the content is in a[@epub:type = 'footnote']//sup, but in Word,
|
224
235
|
# we need to inject content around the autonumbered footnote reference
|
data/lib/isodoc/xslfo_convert.rb
CHANGED
@@ -13,6 +13,11 @@ module IsoDoc
|
|
13
13
|
"_pdfimages"
|
14
14
|
end
|
15
15
|
|
16
|
+
def initialize(options)
|
17
|
+
@format = :pdf
|
18
|
+
super
|
19
|
+
end
|
20
|
+
|
16
21
|
def convert(filename, file = nil, debug = false)
|
17
22
|
file = File.read(filename, encoding: "utf-8") if file.nil?
|
18
23
|
docxml, outname_html, dir = convert_init(file, filename, debug)
|
data/spec/assets/i18n.yaml
CHANGED
data/spec/isodoc/blocks_spec.rb
CHANGED
@@ -355,7 +355,7 @@ B
|
|
355
355
|
</div></aside>
|
356
356
|
<p style='page-break-after:avoid;'><b>Key</b></p><dl><dt><p>A</p></dt><dd><p>B</p></dd></dl>
|
357
357
|
<p class="FigureTitle" style="text-align:center;">Figure 1 — Split-it-right <i>sample</i> divider
|
358
|
-
<a
|
358
|
+
<a class='FootnoteRef' href='#fn:1'>
|
359
359
|
<sup>1</sup>
|
360
360
|
</a>
|
361
361
|
</p></div>
|
@@ -417,8 +417,8 @@ B
|
|
417
417
|
<div id="figureA-1" class="figure">
|
418
418
|
|
419
419
|
<img src="rice_images/rice_image1.png" height="20" width="30" alt="alttext" title="titletext"/>
|
420
|
-
<img src="rice_images/rice_image1.png"/>
|
421
|
-
<img src=
|
420
|
+
<img src="rice_images/rice_image1.png" height='20' width='auto'/>
|
421
|
+
<img src='_.gif' height='20' width='auto'/>
|
422
422
|
<a href="#_" class="TableFootnoteRef">a</a><aside><div id="ftn_"><span><span id="_" class="TableFootnoteRef">a</span><span style="mso-tab-count:1">  </span></span>
|
423
423
|
<p id="_">The time <span class="stem">(#(t_90)#)</span> was estimated to be 18,2 min for this example.</p>
|
424
424
|
</div></aside>
|
@@ -426,7 +426,7 @@ B
|
|
426
426
|
<p class='FigureTitle' style='text-align:center;'>
|
427
427
|
Figure 1 — Split-it-right sample divider
|
428
428
|
<span style='mso-bookmark:_Ref'>
|
429
|
-
<a href='#ftn1' epub:type='footnote'>
|
429
|
+
<a href='#ftn1' epub:type='footnote' class='FootnoteRef'>
|
430
430
|
<sup>1</sup>
|
431
431
|
</a>
|
432
432
|
</span>
|
@@ -681,7 +681,7 @@ Que?
|
|
681
681
|
<br/>
|
682
682
|
<div>
|
683
683
|
<h1 class="ForewordTitle">Foreword</h1>
|
684
|
-
<div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
|
684
|
+
<div class="Admonition" id='_70234f78-64e5-4dfc-8b6f-f3f037348b6a'><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
|
685
685
|
<p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
|
686
686
|
</div>
|
687
687
|
</div>
|
@@ -707,7 +707,7 @@ Que?
|
|
707
707
|
<br/>
|
708
708
|
<div>
|
709
709
|
<h1 class="ForewordTitle">Foreword</h1>
|
710
|
-
<div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
|
710
|
+
<div class="Admonition" id="_70234f78-64e5-4dfc-8b6f-f3f037348b6a"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
|
711
711
|
<p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
|
712
712
|
</div>
|
713
713
|
</div>
|
@@ -969,7 +969,7 @@ World
|
|
969
969
|
INPUT
|
970
970
|
#{HTML_HDR}
|
971
971
|
<p class="zzSTDTitle1"/>
|
972
|
-
<div><h1>1. 
|
972
|
+
<div><h1>1.  </h1>
|
973
973
|
<p class="TermNum" id="_extraneous_matter">1.1.</p><p class="Terms" style="text-align:left;">extraneous matter</p><p class="AltTerms" style="text-align:left;">EM</p>
|
974
974
|
|
975
975
|
<p id="_318b3939-be09-46c4-a284-93f9826b981e"><rice> organic and inorganic components other than whole or broken kernels</p>
|
@@ -1030,7 +1030,7 @@ World
|
|
1030
1030
|
</import>
|
1031
1031
|
</permission>
|
1032
1032
|
</foreword></preface>
|
1033
|
-
<bibliography><references id="_bibliography" obligation="informative">
|
1033
|
+
<bibliography><references id="_bibliography" obligation="informative" normative="false">
|
1034
1034
|
<title>Bibliography</title>
|
1035
1035
|
<bibitem id="rfc2616" type="standard"> <fetched>2020-03-27</fetched> <title format="text/plain" language="en" script="Latn">Hypertext Transfer Protocol — HTTP/1.1</title> <uri type="xml">https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2616.xml</uri> <uri type="src">https://www.rfc-editor.org/info/rfc2616</uri> <docidentifier type="IETF">RFC 2616</docidentifier> <docidentifier type="rfc-anchor">RFC2616</docidentifier> <docidentifier type="DOI">10.17487/RFC2616</docidentifier> <date type="published"> <on>1999-06</on> </date> <contributor> <role type="author"/> <person> <name> <completename language="en">R. Fielding</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Gettys</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Mogul</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">H. Frystyk</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">L. Masinter</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">P. Leach</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">T. Berners-Lee</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <language>en</language> <script>Latn</script> <abstract format="text/plain" language="en" script="Latn">HTTP has been in use by the World-Wide Web global information initiative since 1990. This specification defines the protocol referred to as “HTTP/1.1”, and is an update to RFC 2068. [STANDARDS-TRACK]</abstract> <series type="main"> <title format="text/plain" language="en" script="Latn">RFC</title> <number>2616</number> </series> <place>Fremont, CA</place></bibitem>
|
1036
1036
|
|
@@ -1462,5 +1462,56 @@ INPUT
|
|
1462
1462
|
OUTPUT
|
1463
1463
|
end
|
1464
1464
|
|
1465
|
+
it "processes passthrough with compatible format" do
|
1466
|
+
FileUtils.rm_f "test.html"
|
1467
|
+
IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false)
|
1468
|
+
<iso-standard xmlns="http://riboseinc.com/isoxml">
|
1469
|
+
<preface><foreword>
|
1470
|
+
<passthrough format="html,rfc"><A></passthrough><em>Hello</em><passthrough format="html,rfc"></A></passthrough>
|
1471
|
+
</foreword></preface>
|
1472
|
+
</iso-standard>
|
1473
|
+
INPUT
|
1474
|
+
expect(( File.read("test.html").gsub(%r{^.*<h1 class="ForewordTitle">Foreword</h1>}m, "").gsub(%r{</div>.*}m, ""))).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
1475
|
+
<A><i>Hello</i></A>
|
1476
|
+
OUTPUT
|
1477
|
+
end
|
1478
|
+
|
1479
|
+
it "aborts if passthrough results in malformed XML" do
|
1480
|
+
FileUtils.rm_f "test.html"
|
1481
|
+
FileUtils.rm_f "test.html.err"
|
1482
|
+
begin
|
1483
|
+
expect { IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false) }.to raise_error(SystemExit)
|
1484
|
+
<iso-standard xmlns="http://riboseinc.com/isoxml">
|
1485
|
+
<preface><foreword>
|
1486
|
+
<passthrough format="html,rfc"><A></passthrough><em>Hello</em>
|
1487
|
+
</foreword></preface>
|
1488
|
+
</iso-standard>
|
1489
|
+
INPUT
|
1490
|
+
rescue SystemExit
|
1491
|
+
end
|
1492
|
+
expect(File.exist?("test.html.err")).to be true
|
1493
|
+
end
|
1494
|
+
|
1495
|
+
it "ignore passthrough with incompatible format" do
|
1496
|
+
expect(xmlpp(IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", true))).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
1497
|
+
<iso-standard xmlns="http://riboseinc.com/isoxml">
|
1498
|
+
<preface><foreword>
|
1499
|
+
<passthrough format="doc,rfc"><A></passthrough>
|
1500
|
+
</foreword></preface>
|
1501
|
+
</iso-standard>
|
1502
|
+
INPUT
|
1503
|
+
#{HTML_HDR}
|
1504
|
+
<br/>
|
1505
|
+
<div>
|
1506
|
+
<h1 class='ForewordTitle'>Foreword</h1>
|
1507
|
+
</div>
|
1508
|
+
<p class='zzSTDTitle1'/>
|
1509
|
+
</div>
|
1510
|
+
</body>
|
1511
|
+
</html>
|
1512
|
+
OUTPUT
|
1513
|
+
end
|
1514
|
+
|
1515
|
+
|
1465
1516
|
|
1466
1517
|
end
|
data/spec/isodoc/cleanup_spec.rb
CHANGED
@@ -154,9 +154,9 @@ RSpec.describe IsoDoc do
|
|
154
154
|
<br/>
|
155
155
|
<div>
|
156
156
|
<h1 class="ForewordTitle">Foreword</h1>
|
157
|
-
<p>A.<a
|
158
|
-
<p>B.<a
|
159
|
-
<p>C.<a
|
157
|
+
<p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
|
158
|
+
<p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
|
159
|
+
<p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>1</sup></a></p>
|
160
160
|
</div>
|
161
161
|
<p class="zzSTDTitle1"/>
|
162
162
|
<aside id="fn:2" class="footnote">
|
@@ -173,9 +173,9 @@ RSpec.describe IsoDoc do
|
|
173
173
|
<br/>
|
174
174
|
<div>
|
175
175
|
<h1 class="ForewordTitle">Foreword</h1>
|
176
|
-
<p>A.<a
|
177
|
-
<p>B.<a
|
178
|
-
<p>C.<a
|
176
|
+
<p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>1</sup></a></p>
|
177
|
+
<p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
|
178
|
+
<p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>3</sup></a></p>
|
179
179
|
</div>
|
180
180
|
<p class="zzSTDTitle1"/>
|
181
181
|
<aside id="fn:2" class="footnote">
|
@@ -715,7 +715,7 @@ INPUT
|
|
715
715
|
OUTPUT
|
716
716
|
end
|
717
717
|
|
718
|
-
it "
|
718
|
+
it "does not break up very long strings in tables by default" do
|
719
719
|
expect(xmlpp(IsoDoc::HtmlConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
720
720
|
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
721
721
|
<head>
|
@@ -739,6 +739,7 @@ INPUT
|
|
739
739
|
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
740
740
|
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
741
741
|
</tr>
|
742
|
+
</thead>
|
742
743
|
<tbody>
|
743
744
|
<tr>
|
744
745
|
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
@@ -775,26 +776,102 @@ INPUT
|
|
775
776
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
776
777
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
777
778
|
</tr>
|
779
|
+
</thead>
|
778
780
|
<tbody>
|
779
781
|
<tr>
|
780
782
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
781
|
-
http://www.example.com/
|
782
|
-
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/
|
783
|
-
BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
783
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
784
784
|
</td>
|
785
785
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
786
|
-
http://www.example.com/
|
787
|
-
|
788
|
-
|
786
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB </td>
|
787
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
788
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
789
|
+
</td>
|
790
|
+
</tr>
|
791
|
+
</tbody>
|
792
|
+
</table>
|
793
|
+
</div>
|
794
|
+
</div>
|
795
|
+
</body>
|
796
|
+
</head>
|
797
|
+
</html>
|
798
|
+
OUTPUT
|
799
|
+
end
|
800
|
+
|
801
|
+
it "does not break up very long strings in tables on request in HTML" do
|
802
|
+
expect(xmlpp(IsoDoc::HtmlConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
803
|
+
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
804
|
+
<head>
|
805
|
+
<title>test</title>
|
806
|
+
<body lang="EN-US" link="blue" vlink="#954F72">
|
807
|
+
<div class="WordSection1">
|
808
|
+
<p> </p>
|
809
|
+
</div>
|
810
|
+
<br clear="all" class="section"/>
|
811
|
+
<div class="WordSection2">
|
812
|
+
<br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
|
813
|
+
<div>
|
814
|
+
<h1 class="ForewordTitle">Foreword</h1>
|
815
|
+
<p class="TableTitle" align="center">
|
816
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
817
|
+
</p>
|
818
|
+
<table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
|
819
|
+
<thead>
|
820
|
+
<tr>
|
821
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
822
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
823
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
824
|
+
</tr>
|
825
|
+
</thead>
|
826
|
+
<tbody>
|
827
|
+
<tr>
|
828
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
829
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
830
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
831
|
+
</tr>
|
832
|
+
</tbody>
|
833
|
+
</table>
|
834
|
+
</div>
|
835
|
+
</div>
|
836
|
+
</body>
|
837
|
+
</html>
|
838
|
+
INPUT
|
839
|
+
<?xml version='1.0'?>
|
840
|
+
<html xmlns:epub='http://www.idpf.org/2007/ops'>
|
841
|
+
<head>
|
842
|
+
<title>test</title>
|
843
|
+
<body lang='EN-US' link='blue' vlink='#954F72'>
|
844
|
+
<div class='WordSection1'>
|
845
|
+
<p> </p>
|
846
|
+
</div>
|
847
|
+
<br clear='all' class='section'/>
|
848
|
+
<div class='WordSection2'>
|
849
|
+
<br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
|
850
|
+
<div>
|
851
|
+
<h1 class='ForewordTitle'>Foreword</h1>
|
852
|
+
<p class='TableTitle' align='center'>
|
853
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
854
|
+
</p>
|
855
|
+
<table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
|
856
|
+
<thead>
|
857
|
+
<tr>
|
858
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
859
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
860
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
861
|
+
</tr>
|
862
|
+
</thead>
|
863
|
+
<tbody>
|
864
|
+
<tr>
|
865
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
866
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
789
867
|
</td>
|
868
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
869
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
790
870
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
791
|
-
www.example.com/
|
792
|
-
AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
|
793
|
-
ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
871
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
794
872
|
</td>
|
795
873
|
</tr>
|
796
874
|
</tbody>
|
797
|
-
</thead>
|
798
875
|
</table>
|
799
876
|
</div>
|
800
877
|
</div>
|
@@ -804,7 +881,8 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
804
881
|
OUTPUT
|
805
882
|
end
|
806
883
|
|
807
|
-
|
884
|
+
|
885
|
+
it "does not break up very long strings in tables by default (Word)" do
|
808
886
|
expect(xmlpp(IsoDoc::WordConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
809
887
|
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
810
888
|
<head>
|
@@ -828,16 +906,17 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
828
906
|
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
829
907
|
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
830
908
|
</tr>
|
909
|
+
</thead>
|
831
910
|
<tbody>
|
832
911
|
<tr>
|
833
912
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
834
|
-
http://www.example.com
|
913
|
+
http://www.example.com/&AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
835
914
|
</td>
|
836
915
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
837
|
-
http://www.example.com/
|
916
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
838
917
|
</td>
|
839
918
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
840
|
-
www.example.com/
|
919
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
841
920
|
</td>
|
842
921
|
</tr>
|
843
922
|
</tbody>
|
@@ -870,20 +949,20 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
870
949
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
871
950
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
872
951
|
</tr>
|
952
|
+
</thead>
|
873
953
|
<tbody>
|
874
954
|
<tr>
|
875
955
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
876
|
-
http://www.example.com
|
956
|
+
http://www.example.com/&AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
877
957
|
</td>
|
878
958
|
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
879
|
-
http://www.example.com/
|
959
|
+
http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
880
960
|
</td>
|
881
961
|
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
882
|
-
www.example.com/
|
962
|
+
www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
883
963
|
</td>
|
884
964
|
</tr>
|
885
965
|
</tbody>
|
886
|
-
</thead>
|
887
966
|
</table>
|
888
967
|
</div>
|
889
968
|
</div>
|
@@ -893,4 +972,217 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
|
893
972
|
OUTPUT
|
894
973
|
end
|
895
974
|
|
975
|
+
it "breaks up very long strings in tables on request (Word)" do
|
976
|
+
expect(xmlpp(IsoDoc::WordConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
977
|
+
<html xmlns:epub="http://www.idpf.org/2007/ops">
|
978
|
+
<head>
|
979
|
+
<title>test</title>
|
980
|
+
<body lang="EN-US" link="blue" vlink="#954F72">
|
981
|
+
<div class="WordSection1">
|
982
|
+
<p> </p>
|
983
|
+
</div>
|
984
|
+
<br clear="all" class="section"/>
|
985
|
+
<div class="WordSection2">
|
986
|
+
<br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
|
987
|
+
<div>
|
988
|
+
<h1 class="ForewordTitle">Foreword</h1>
|
989
|
+
<p class="TableTitle" align="center">
|
990
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
991
|
+
</p>
|
992
|
+
<table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
|
993
|
+
<thead>
|
994
|
+
<tr>
|
995
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
996
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
|
997
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
|
998
|
+
</tr>
|
999
|
+
</thead>
|
1000
|
+
<tbody>
|
1001
|
+
<tr>
|
1002
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
1003
|
+
<td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
1004
|
+
<td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
|
1005
|
+
</tr>
|
1006
|
+
</tbody>
|
1007
|
+
</table>
|
1008
|
+
</div>
|
1009
|
+
</div>
|
1010
|
+
</body>
|
1011
|
+
</html>
|
1012
|
+
INPUT
|
1013
|
+
<?xml version='1.0'?>
|
1014
|
+
<html xmlns:epub='http://www.idpf.org/2007/ops'>
|
1015
|
+
<head>
|
1016
|
+
<title>test</title>
|
1017
|
+
<body lang='EN-US' link='blue' vlink='#954F72'>
|
1018
|
+
<div class='WordSection1'>
|
1019
|
+
<p> </p>
|
1020
|
+
</div>
|
1021
|
+
<br clear='all' class='section'/>
|
1022
|
+
<div class='WordSection2'>
|
1023
|
+
<br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
|
1024
|
+
<div>
|
1025
|
+
<h1 class='ForewordTitle'>Foreword</h1>
|
1026
|
+
<p class='TableTitle' align='center'>
|
1027
|
+
<b>Table 1 — Repeatability and reproducibility of husked rice yield</b>
|
1028
|
+
</p>
|
1029
|
+
<table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
|
1030
|
+
<thead>
|
1031
|
+
<tr>
|
1032
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
1033
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
|
1034
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
|
1035
|
+
</tr>
|
1036
|
+
</thead>
|
1037
|
+
<tbody>
|
1038
|
+
<tr>
|
1039
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
1040
|
+
http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
1041
|
+
</td>
|
1042
|
+
<td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
|
1043
|
+
http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
|
1044
|
+
</td>
|
1045
|
+
<td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
|
1046
|
+
www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
|
1047
|
+
</td>
|
1048
|
+
</tr>
|
1049
|
+
</tbody>
|
1050
|
+
</table>
|
1051
|
+
</div>
|
1052
|
+
</div>
|
1053
|
+
</body>
|
1054
|
+
</head>
|
1055
|
+
</html>
|
1056
|
+
OUTPUT
|
1057
|
+
end
|
1058
|
+
|
1059
|
+
it "cleans up term sources" do
|
1060
|
+
c = IsoDoc::HtmlConvert.new({i18nyaml: "spec/assets/i18n.yaml"})
|
1061
|
+
c.i18n_init("en", "Latn")
|
1062
|
+
expect(xmlpp(c.textcleanup(<<~"INPUT").to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
|
1063
|
+
#{HTML_HDR}
|
1064
|
+
<p class="zzSTDTitle1"/>
|
1065
|
+
<div id="_terms_and_definitions"><h1>1.  Terms and Definitions</h1><p>For the purposes of this document,
|
1066
|
+
the following terms and definitions apply.</p>
|
1067
|
+
<p class="TermNum" id="paddy1">1.1.</p><p class="Terms" style="text-align:left;">paddy</p>
|
1068
|
+
|
1069
|
+
<p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f"><rice> rice retaining its husk after threshing</p>
|
1070
|
+
<div id="_bd57bbf1-f948-4bae-b0ce-73c00431f892" class="example"><p class="example-title">EXAMPLE 1</p>
|
1071
|
+
<p id="_65c9a509-9a89-4b54-a890-274126aeb55c">Foreign seeds, husks, bran, sand, dust.</p>
|
1072
|
+
<ul>
|
1073
|
+
<li>A</li>
|
1074
|
+
</ul>
|
1075
|
+
</div>
|
1076
|
+
<div id="_bd57bbf1-f948-4bae-b0ce-73c00431f894" class="example"><p class="example-title">EXAMPLE 2</p>
|
1077
|
+
<ul>
|
1078
|
+
<li>A</li>
|
1079
|
+
</ul>
|
1080
|
+
</div>
|
1081
|
+
|
1082
|
+
<p>[TERMREF]
|
1083
|
+
<a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
|
1084
|
+
[MODIFICATION]The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
|
1085
|
+
[/TERMREF]</p>
|
1086
|
+
<p>[TERMREF] Termbase IEV, term ID xyz [/TERMREF]</p>
|
1087
|
+
<p>[TERMREF] Termbase IEV, term ID xyz [MODIFICATION]with adjustments [/TERMREF]</p>
|
1088
|
+
<p class="TermNum" id="paddy">1.2.</p><p class="Terms" style="text-align:left;">paddy</p><p class="AltTerms" style="text-align:left;">paddy rice</p>
|
1089
|
+
<p class="AltTerms" style="text-align:left;">rough rice</p>
|
1090
|
+
<p class="DeprecatedTerms" style="text-align:left;">DEPRECATED: cargo rice</p>
|
1091
|
+
<p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">rice retaining its husk after threshing</p>
|
1092
|
+
<div id="_bd57bbf1-f948-4bae-b0ce-73c00431f893" class="example"><p class="example-title">EXAMPLE</p>
|
1093
|
+
<ul>
|
1094
|
+
<li>A</li>
|
1095
|
+
</ul>
|
1096
|
+
</div>
|
1097
|
+
<div class="Note"><p>Note 1 to entry: The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></div>
|
1098
|
+
<div class="Note"><p>Note 2 to entry: <ul><li>A</li></ul><p id="_19830f33-e46c-42cc-94ca-a5ef101132d5">The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></p></div>
|
1099
|
+
<p>[TERMREF]
|
1100
|
+
<a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
|
1101
|
+
[/TERMREF]</p></div>
|
1102
|
+
</div>
|
1103
|
+
</body>
|
1104
|
+
</html>
|
1105
|
+
INPUT
|
1106
|
+
<html xmlns:epub='http://www.idpf.org/2007/ops' lang='en'>
|
1107
|
+
<head/>
|
1108
|
+
<body lang='en'>
|
1109
|
+
<div class='title-section'>
|
1110
|
+
<p> </p>
|
1111
|
+
</div>
|
1112
|
+
<br/>
|
1113
|
+
<div class='prefatory-section'>
|
1114
|
+
<p> </p>
|
1115
|
+
</div>
|
1116
|
+
<br/>
|
1117
|
+
<div class='main-section'>
|
1118
|
+
<p class='zzSTDTitle1'/>
|
1119
|
+
<div id='_terms_and_definitions'>
|
1120
|
+
<h1>1.  Terms and Definitions</h1>
|
1121
|
+
<p>For the purposes of this document, the following terms and definitions apply.</p>
|
1122
|
+
<p class='TermNum' id='paddy1'>1.1.</p>
|
1123
|
+
<p class='Terms' style='text-align:left;'>paddy</p>
|
1124
|
+
<p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'><rice> rice retaining its husk after threshing</p>
|
1125
|
+
<div id='_bd57bbf1-f948-4bae-b0ce-73c00431f892' class='example'>
|
1126
|
+
<p class='example-title'>EXAMPLE 1</p>
|
1127
|
+
<p id='_65c9a509-9a89-4b54-a890-274126aeb55c'>Foreign seeds, husks, bran, sand, dust.</p>
|
1128
|
+
<ul>
|
1129
|
+
<li>A</li>
|
1130
|
+
</ul>
|
1131
|
+
</div>
|
1132
|
+
<div id='_bd57bbf1-f948-4bae-b0ce-73c00431f894' class='example'>
|
1133
|
+
<p class='example-title'>EXAMPLE 2</p>
|
1134
|
+
<ul>
|
1135
|
+
<li>A</li>
|
1136
|
+
</ul>
|
1137
|
+
</div>
|
1138
|
+
<p>
|
1139
|
+
[SOURCE:
|
1140
|
+
<a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
|
1141
|
+
, modified — The term "cargo rice" is shown as deprecated, and
|
1142
|
+
Note 1 to entry is not included here; Termbase IEV, term ID xyz;
|
1143
|
+
Termbase IEV, term ID xyz, modified — with adjustments]
|
1144
|
+
</p>
|
1145
|
+
<p class='TermNum' id='paddy'>1.2.</p>
|
1146
|
+
<p class='Terms' style='text-align:left;'>paddy</p>
|
1147
|
+
<p class='AltTerms' style='text-align:left;'>paddy rice</p>
|
1148
|
+
<p class='AltTerms' style='text-align:left;'>rough rice</p>
|
1149
|
+
<p class='DeprecatedTerms' style='text-align:left;'>DEPRECATED: cargo rice</p>
|
1150
|
+
<p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'>rice retaining its husk after threshing</p>
|
1151
|
+
<div id='_bd57bbf1-f948-4bae-b0ce-73c00431f893' class='example'>
|
1152
|
+
<p class='example-title'>EXAMPLE</p>
|
1153
|
+
<ul>
|
1154
|
+
<li>A</li>
|
1155
|
+
</ul>
|
1156
|
+
</div>
|
1157
|
+
<div class='Note'>
|
1158
|
+
<p>
|
1159
|
+
Note 1 to entry: The starch of waxy rice consists almost entirely of
|
1160
|
+
amylopectin. The kernels have a tendency to stick together after
|
1161
|
+
cooking.
|
1162
|
+
</p>
|
1163
|
+
</div>
|
1164
|
+
<div class='Note'>
|
1165
|
+
<p>
|
1166
|
+
Note 2 to entry:
|
1167
|
+
<ul>
|
1168
|
+
<li>A</li>
|
1169
|
+
</ul>
|
1170
|
+
<p id='_19830f33-e46c-42cc-94ca-a5ef101132d5'>
|
1171
|
+
The starch of waxy rice consists almost entirely of amylopectin.
|
1172
|
+
The kernels have a tendency to stick together after cooking.
|
1173
|
+
</p>
|
1174
|
+
</p>
|
1175
|
+
</div>
|
1176
|
+
<p>
|
1177
|
+
[SOURCE:
|
1178
|
+
<a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
|
1179
|
+
]
|
1180
|
+
</p>
|
1181
|
+
</div>
|
1182
|
+
</div>
|
1183
|
+
</body>
|
1184
|
+
</html>
|
1185
|
+
OUTPUT
|
1186
|
+
end
|
1187
|
+
|
896
1188
|
end
|