isodoc 1.0.23 → 1.0.28

Sign up to get free protection for your applications and to get access to all the features.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/macos.yml +10 -2
  3. data/.github/workflows/ubuntu.yml +13 -3
  4. data/.github/workflows/windows.yml +10 -2
  5. data/isodoc.gemspec +1 -1
  6. data/lib/isodoc-yaml/i18n-en.yaml +3 -1
  7. data/lib/isodoc-yaml/i18n-fr.yaml +3 -1
  8. data/lib/isodoc-yaml/i18n-zh-Hans.yaml +3 -1
  9. data/lib/isodoc/base_style/reset.scss +1 -1
  10. data/lib/isodoc/convert.rb +1 -0
  11. data/lib/isodoc/function/blocks.rb +6 -1
  12. data/lib/isodoc/function/cleanup.rb +16 -2
  13. data/lib/isodoc/function/i18n.rb +5 -5
  14. data/lib/isodoc/function/inline.rb +77 -79
  15. data/lib/isodoc/function/inline_simple.rb +72 -0
  16. data/lib/isodoc/function/references.rb +49 -37
  17. data/lib/isodoc/function/section.rb +19 -8
  18. data/lib/isodoc/function/table.rb +0 -1
  19. data/lib/isodoc/function/to_word_html.rb +23 -13
  20. data/lib/isodoc/function/utils.rb +11 -5
  21. data/lib/isodoc/function/xref_gen.rb +2 -1
  22. data/lib/isodoc/function/xref_sect_gen.rb +24 -24
  23. data/lib/isodoc/headlesshtml_convert.rb +5 -0
  24. data/lib/isodoc/html_convert.rb +5 -0
  25. data/lib/isodoc/html_function/footnotes.rb +3 -3
  26. data/lib/isodoc/html_function/html.rb +15 -0
  27. data/lib/isodoc/html_function/postprocess.rb +6 -5
  28. data/lib/isodoc/metadata.rb +10 -3
  29. data/lib/isodoc/metadata_date.rb +19 -7
  30. data/lib/isodoc/pdf_convert.rb +5 -0
  31. data/lib/isodoc/version.rb +1 -1
  32. data/lib/isodoc/word_convert.rb +5 -0
  33. data/lib/isodoc/word_function/body.rb +0 -4
  34. data/lib/isodoc/word_function/footnotes.rb +3 -3
  35. data/lib/isodoc/word_function/postprocess.rb +13 -2
  36. data/lib/isodoc/xslfo_convert.rb +5 -0
  37. data/spec/assets/i18n.yaml +4 -1
  38. data/spec/isodoc/blocks_spec.rb +59 -8
  39. data/spec/isodoc/cleanup_spec.rb +317 -25
  40. data/spec/isodoc/footnotes_spec.rb +20 -5
  41. data/spec/isodoc/i18n_spec.rb +12 -12
  42. data/spec/isodoc/inline_spec.rb +118 -5
  43. data/spec/isodoc/metadata_spec.rb +8 -3
  44. data/spec/isodoc/postproc_spec.rb +34 -12
  45. data/spec/isodoc/ref_spec.rb +120 -51
  46. data/spec/isodoc/section_spec.rb +236 -207
  47. data/spec/isodoc/table_spec.rb +24 -24
  48. data/spec/isodoc/terms_spec.rb +50 -6
  49. data/spec/isodoc/xref_spec.rb +53 -26
  50. metadata +5 -4
@@ -14,6 +14,11 @@ module IsoDoc
14
14
  "_headlessimages"
15
15
  end
16
16
 
17
+ def initialize(options)
18
+ @format = :html
19
+ super
20
+ end
21
+
17
22
  def convert(filename, file = nil, debug = false)
18
23
  file = File.read(filename, encoding: "utf-8") if file.nil?
19
24
  @openmathdelim, @closemathdelim = extract_delims(file)
@@ -14,6 +14,11 @@ module IsoDoc
14
14
  "_htmlimages"
15
15
  end
16
16
 
17
+ def initialize(options)
18
+ @format = :html
19
+ super
20
+ end
21
+
17
22
  def convert(filename, file = nil, debug = false)
18
23
  ret = super
19
24
  Dir.exists?(tmpimagedir) and Dir["#{tmpimagedir}/*"].empty? and
@@ -48,7 +48,7 @@ module IsoDoc::HtmlFunction
48
48
  end
49
49
 
50
50
  def table_footnote_parse(node, out)
51
- fn = node["reference"]
51
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
52
52
  tid = get_table_ancestor_id(node)
53
53
  make_table_footnote_link(out, tid + fn, fn)
54
54
  # do not output footnote text if we have already seen it for this table
@@ -64,8 +64,8 @@ module IsoDoc::HtmlFunction
64
64
  def footnote_parse(node, out)
65
65
  return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
66
66
  !node.ancestors.map {|m| m.name }.include?("name")
67
- fn = node["reference"]
68
- attrs = { "epub:type": "footnote", rel: "footnote", href: "#fn:#{fn}" }
67
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
68
+ attrs = { class: "FootnoteRef", href: "#fn:#{fn}" }
69
69
  out.a **attrs do |a|
70
70
  a.sup { |sup| sup << fn }
71
71
  end
@@ -3,6 +3,18 @@ require "base64"
3
3
 
4
4
  module IsoDoc::HtmlFunction
5
5
  module Html
6
+ def convert1(docxml, filename, dir)
7
+ anchor_names docxml
8
+ noko do |xml|
9
+ xml.html **{ lang: "#{@lang}" } do |html|
10
+ info docxml, nil
11
+ populate_css()
12
+ html.head { |head| define_head head, filename, dir }
13
+ make_body(html, docxml)
14
+ end
15
+ end.join("\n")
16
+ end
17
+
6
18
  def make_body1(body, _docxml)
7
19
  body.div **{ class: "title-section" } do |div1|
8
20
  div1.p { |p| p << "&nbsp;" } # placeholder
@@ -97,5 +109,8 @@ module IsoDoc::HtmlFunction
97
109
  end
98
110
  sourcecode_name_parse(node, out, name)
99
111
  end
112
+
113
+ def table_long_strings_cleanup(docxml)
114
+ end
100
115
  end
101
116
  end
@@ -108,7 +108,7 @@ module IsoDoc::HtmlFunction
108
108
  idx = docxml.at("//div[@id = 'toc']") or return docxml
109
109
  toc = "<ul>"
110
110
  path = toclevel_classes.map do |l|
111
- "//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][not(text())]"
111
+ "//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][text()]"
112
112
  end
113
113
  docxml.xpath(path.join(" | ")).each_with_index do |h, tocidx|
114
114
  h["id"] ||= "toc#{tocidx}"
@@ -159,7 +159,8 @@ module IsoDoc::HtmlFunction
159
159
  def inject_script(doc)
160
160
  return doc unless @scripts
161
161
  scripts = File.read(@scripts, encoding: "UTF-8")
162
- doc.sub("</body>", scripts + "\n</body>")
162
+ a = doc.split(%r{</body>})
163
+ a[0] + scripts + "</body>" + a[1]
163
164
  end
164
165
 
165
166
  def update_footnote_filter(fn, x, i, seen)
@@ -178,7 +179,7 @@ module IsoDoc::HtmlFunction
178
179
  def html_footnote_filter(docxml)
179
180
  seen = {}
180
181
  i = 1
181
- docxml.xpath('//a[@epub:type = "footnote"]').each do |x|
182
+ docxml.xpath('//a[@class = "FootnoteRef"]').each do |x|
182
183
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
183
184
  i, seen = update_footnote_filter(fn, x, i, seen)
184
185
  end
@@ -187,7 +188,7 @@ module IsoDoc::HtmlFunction
187
188
 
188
189
  def footnote_backlinks(docxml)
189
190
  seen = {}
190
- docxml.xpath('//a[@epub:type = "footnote"]').each_with_index do |x, i|
191
+ docxml.xpath('//a[@class = "FootnoteRef"]').each_with_index do |x, i|
191
192
  seen[x["href"]] and next or seen[x["href"]] = true
192
193
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
193
194
  xdup = x.dup
@@ -200,7 +201,7 @@ module IsoDoc::HtmlFunction
200
201
  end
201
202
 
202
203
  def footnote_format(docxml)
203
- docxml.xpath("//a[@epub:type = 'footnote']/sup").each do |x|
204
+ docxml.xpath("//a[@class = 'FootnoteRef']/sup").each do |x|
204
205
  footnote_reference_format(x)
205
206
  end
206
207
  docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
@@ -3,7 +3,8 @@ require_relative "./metadata_date"
3
3
  module IsoDoc
4
4
  class Metadata
5
5
  DATETYPES = %w{published accessed created implemented obsoleted confirmed
6
- updated issued received transmitted copied unchanged circulated}.freeze
6
+ updated issued received transmitted copied unchanged circulated vote-started
7
+ vote-ended}.freeze
7
8
 
8
9
  def ns(xpath)
9
10
  Common::ns(xpath)
@@ -15,7 +16,7 @@ module IsoDoc
15
16
 
16
17
  def initialize(lang, script, labels)
17
18
  @metadata = {}
18
- DATETYPES.each { |w| @metadata["#{w}date".to_sym] = "XXX" }
19
+ DATETYPES.each { |w| @metadata["#{w.gsub(/-/, "_")}date".to_sym] = "XXX" }
19
20
  @lang = lang
20
21
  @script = script
21
22
  @c = HTMLEntities.new
@@ -84,7 +85,7 @@ module IsoDoc
84
85
 
85
86
  def bibdate(isoxml, _out)
86
87
  isoxml.xpath(ns("//bibdata/date")).each do |d|
87
- set("#{d['type']}date".to_sym, Common::date_range(d))
88
+ set("#{d['type'].gsub(/-/, "_")}date".to_sym, Common::date_range(d))
88
89
  end
89
90
  end
90
91
 
@@ -217,5 +218,11 @@ module IsoDoc
217
218
  a = xml.at(ns("//bibdata/uri[@type = 'pdf']")) and set(:pdf, a.text)
218
219
  a = xml.at(ns("//bibdata/uri[@type = 'doc']")) and set(:doc, a.text)
219
220
  end
221
+
222
+ def keywords(isoxml, _out)
223
+ ret = []
224
+ isoxml.xpath(ns("//bibdata/keyword")).each { |kw| ret << kw.text }
225
+ set(:keywords, ret)
226
+ end
220
227
  end
221
228
  end
@@ -14,14 +14,26 @@ module IsoDoc
14
14
  "10": @labels["month_october"],
15
15
  "11": @labels["month_november"],
16
16
  "12": @labels["month_december"],
17
- }
17
+ }
18
18
  end
19
19
 
20
- def monthyr(isodate)
21
- m = /(?<yr>\d\d\d\d)-(?<mo>\d\d)/.match isodate
22
- return isodate unless m && m[:yr] && m[:mo]
23
- IsoDoc::Function::I18n::l10n("#{months[m[:mo].to_sym]} #{m[:yr]}",
24
- @lang, @script)
25
- end
20
+ def monthyr(isodate)
21
+ m = /(?<yr>\d\d\d\d)-(?<mo>\d\d)/.match isodate
22
+ return isodate unless m && m[:yr] && m[:mo]
23
+ IsoDoc::Function::I18n::l10n("#{months[m[:mo].to_sym]} #{m[:yr]}",
24
+ @lang, @script)
25
+ end
26
+
27
+ def MMMddyyyy(isodate)
28
+ return nil if isodate.nil?
29
+ arr = isodate.split("-")
30
+ date = if arr.size == 1 and (/^\d+$/.match isodate)
31
+ Date.new(*arr.map(&:to_i)).strftime("%Y")
32
+ elsif arr.size == 2
33
+ Date.new(*arr.map(&:to_i)).strftime("%B %Y")
34
+ else
35
+ Date.parse(isodate).strftime("%B %d, %Y")
36
+ end
37
+ end
26
38
  end
27
39
  end
@@ -22,6 +22,11 @@ module IsoDoc
22
22
  "_pdfimages"
23
23
  end
24
24
 
25
+ def initialize(options)
26
+ @format = :pdf
27
+ super
28
+ end
29
+
25
30
  def convert(filename, file = nil, debug = false)
26
31
  file = File.read(filename, encoding: "utf-8") if file.nil?
27
32
  @openmathdelim, @closemathdelim = extract_delims(file)
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "1.0.23".freeze
2
+ VERSION = "1.0.28".freeze
3
3
  end
@@ -30,6 +30,11 @@ module IsoDoc
30
30
  include WordFunction::Body
31
31
  include WordFunction::Postprocess
32
32
 
33
+ def initialize(options)
34
+ @format = :doc
35
+ super
36
+ end
37
+
33
38
  def convert(filename, file = nil, debug = false)
34
39
  ret = super
35
40
  FileUtils.rm_rf tmpimagedir
@@ -204,10 +204,6 @@ module IsoDoc::WordFunction
204
204
  alt: node["alt"],
205
205
  title: node["title"],
206
206
  width: node["width"] }
207
- if node["height"] == "auto" || node["width"] == "auto"
208
- attrs[:height] = nil
209
- attrs[:width] = nil
210
- end
211
207
  out.img **attr_code(attrs)
212
208
  image_title_parse(out, caption)
213
209
  end
@@ -56,7 +56,7 @@ module IsoDoc::WordFunction
56
56
  end
57
57
 
58
58
  def table_footnote_parse(node, out)
59
- fn = node["reference"]
59
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
60
60
  tid = get_table_ancestor_id(node)
61
61
  make_table_footnote_link(out, tid + fn, fn)
62
62
  # do not output footnote text if we have already seen it for this table
@@ -80,11 +80,11 @@ module IsoDoc::WordFunction
80
80
  def footnote_parse(node, out)
81
81
  return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
82
82
  !node.ancestors.map {|m| m.name }.include?("name")
83
- fn = node["reference"]
83
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
84
84
  return seen_footnote_parse(node, out, fn) if @seen_footnote.include?(fn)
85
85
  @fn_bookmarks[fn] = bookmarkid
86
86
  out.span **{style: "mso-bookmark:_Ref#{@fn_bookmarks[fn]}"} do |s|
87
- s.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
87
+ s.a **{ "class": "FootnoteRef", "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
88
88
  a.sup { |sup| sup << fn }
89
89
  end
90
90
  end
@@ -39,7 +39,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
39
39
  end
40
40
 
41
41
  def toWord(result, filename, dir, header)
42
- #result = populate_template(result, :word)
43
42
  result = from_xhtml(word_cleanup(to_xhtml(result)))
44
43
  unless @landscapestyle.empty?
45
44
  @wordstylesheet&.open
@@ -196,6 +195,8 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
196
195
  @landscapestyle = ""
197
196
  word_section_breaks1(docxml, "WordSection2")
198
197
  word_section_breaks1(docxml, "WordSection3")
198
+ word_remove_pb_before_annex(docxml)
199
+ docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
199
200
  end
200
201
 
201
202
  def word_section_breaks1(docxml, sect)
@@ -203,7 +204,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
203
204
  each_with_index do |br, i|
204
205
  @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\
205
206
  "#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
206
- br.delete("orientation")
207
207
  split_at_section_break(docxml, sect, br, i)
208
208
  end
209
209
  end
@@ -219,6 +219,17 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
219
219
  end
220
220
  end
221
221
 
222
+ # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
223
+ def word_remove_pb_before_annex(docxml)
224
+ docxml.xpath("//div[p/br]").each do |d|
225
+ /^WordSection\d+_\d+$/.match(d["class"]) or next
226
+ d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
227
+ d.elements[0].elements[0].name == "br" && d.elements[0].elements[0]["style"] ==
228
+ "mso-special-character:line-break;page-break-before:always" or next
229
+ d.elements[0].remove
230
+ end
231
+ end
232
+
222
233
  def word_footnote_format(docxml)
223
234
  # the content is in a[@epub:type = 'footnote']//sup, but in Word,
224
235
  # we need to inject content around the autonumbered footnote reference
@@ -13,6 +13,11 @@ module IsoDoc
13
13
  "_pdfimages"
14
14
  end
15
15
 
16
+ def initialize(options)
17
+ @format = :pdf
18
+ super
19
+ end
20
+
16
21
  def convert(filename, file = nil, debug = false)
17
22
  file = File.read(filename, encoding: "utf-8") if file.nil?
18
23
  docxml, outname_html, dir = convert_init(file, filename, debug)
@@ -1,2 +1,5 @@
1
1
  foreword: Enkonduko
2
-
2
+ clause: klaŭzo
3
+ table: Tabelo
4
+ source: SOURCE
5
+ modified: modified
@@ -355,7 +355,7 @@ B
355
355
  </div></aside>
356
356
  <p style='page-break-after:avoid;'><b>Key</b></p><dl><dt><p>A</p></dt><dd><p>B</p></dd></dl>
357
357
  <p class="FigureTitle" style="text-align:center;">Figure 1&#160;&#8212; Split-it-right <i>sample</i> divider
358
- <a rel='footnote' href='#fn:1' epub:type='footnote'>
358
+ <a class='FootnoteRef' href='#fn:1'>
359
359
  <sup>1</sup>
360
360
  </a>
361
361
  </p></div>
@@ -417,8 +417,8 @@ B
417
417
  <div id="figureA-1" class="figure">
418
418
 
419
419
  <img src="rice_images/rice_image1.png" height="20" width="30" alt="alttext" title="titletext"/>
420
- <img src="rice_images/rice_image1.png"/>
421
- <img src="_.gif"/>
420
+ <img src="rice_images/rice_image1.png" height='20' width='auto'/>
421
+ <img src='_.gif' height='20' width='auto'/>
422
422
  <a href="#_" class="TableFootnoteRef">a</a><aside><div id="ftn_"><span><span id="_" class="TableFootnoteRef">a</span><span style="mso-tab-count:1">&#160; </span></span>
423
423
  <p id="_">The time <span class="stem">(#(t_90)#)</span> was estimated to be 18,2 min for this example.</p>
424
424
  </div></aside>
@@ -426,7 +426,7 @@ B
426
426
  <p class='FigureTitle' style='text-align:center;'>
427
427
  Figure 1&#160;&#8212; Split-it-right sample divider
428
428
  <span style='mso-bookmark:_Ref'>
429
- <a href='#ftn1' epub:type='footnote'>
429
+ <a href='#ftn1' epub:type='footnote' class='FootnoteRef'>
430
430
  <sup>1</sup>
431
431
  </a>
432
432
  </span>
@@ -681,7 +681,7 @@ Que?
681
681
  <br/>
682
682
  <div>
683
683
  <h1 class="ForewordTitle">Foreword</h1>
684
- <div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
684
+ <div class="Admonition" id='_70234f78-64e5-4dfc-8b6f-f3f037348b6a'><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
685
685
  <p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
686
686
  </div>
687
687
  </div>
@@ -707,7 +707,7 @@ Que?
707
707
  <br/>
708
708
  <div>
709
709
  <h1 class="ForewordTitle">Foreword</h1>
710
- <div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
710
+ <div class="Admonition" id="_70234f78-64e5-4dfc-8b6f-f3f037348b6a"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
711
711
  <p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
712
712
  </div>
713
713
  </div>
@@ -969,7 +969,7 @@ World

969
969
  INPUT
970
970
  #{HTML_HDR}
971
971
  <p class="zzSTDTitle1"/>
972
- <div><h1>1.&#160; Terms and definitions</h1>
972
+ <div><h1>1.&#160; </h1>
973
973
  <p class="TermNum" id="_extraneous_matter">1.1.</p><p class="Terms" style="text-align:left;">extraneous matter</p><p class="AltTerms" style="text-align:left;">EM</p>
974
974
 
975
975
  <p id="_318b3939-be09-46c4-a284-93f9826b981e">&lt;rice&gt; organic and inorganic components other than whole or broken kernels</p>
@@ -1030,7 +1030,7 @@ World

1030
1030
  </import>
1031
1031
  </permission>
1032
1032
  </foreword></preface>
1033
- <bibliography><references id="_bibliography" obligation="informative">
1033
+ <bibliography><references id="_bibliography" obligation="informative" normative="false">
1034
1034
  <title>Bibliography</title>
1035
1035
  <bibitem id="rfc2616" type="standard"> <fetched>2020-03-27</fetched> <title format="text/plain" language="en" script="Latn">Hypertext Transfer Protocol — HTTP/1.1</title> <uri type="xml">https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2616.xml</uri> <uri type="src">https://www.rfc-editor.org/info/rfc2616</uri> <docidentifier type="IETF">RFC 2616</docidentifier> <docidentifier type="rfc-anchor">RFC2616</docidentifier> <docidentifier type="DOI">10.17487/RFC2616</docidentifier> <date type="published"> <on>1999-06</on> </date> <contributor> <role type="author"/> <person> <name> <completename language="en">R. Fielding</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Gettys</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Mogul</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">H. Frystyk</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">L. Masinter</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">P. Leach</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">T. Berners-Lee</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <language>en</language> <script>Latn</script> <abstract format="text/plain" language="en" script="Latn">HTTP has been in use by the World-Wide Web global information initiative since 1990. This specification defines the protocol referred to as “HTTP/1.1”, and is an update to RFC 2068. [STANDARDS-TRACK]</abstract> <series type="main"> <title format="text/plain" language="en" script="Latn">RFC</title> <number>2616</number> </series> <place>Fremont, CA</place></bibitem>
1036
1036
 
@@ -1462,5 +1462,56 @@ INPUT
1462
1462
  OUTPUT
1463
1463
  end
1464
1464
 
1465
+ it "processes passthrough with compatible format" do
1466
+ FileUtils.rm_f "test.html"
1467
+ IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false)
1468
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
1469
+ <preface><foreword>
1470
+ <passthrough format="html,rfc">&lt;A&gt;</passthrough><em>Hello</em><passthrough format="html,rfc">&lt;/A&gt;</passthrough>
1471
+ </foreword></preface>
1472
+ </iso-standard>
1473
+ INPUT
1474
+ expect(( File.read("test.html").gsub(%r{^.*<h1 class="ForewordTitle">Foreword</h1>}m, "").gsub(%r{</div>.*}m, ""))).to be_equivalent_to xmlpp(<<~"OUTPUT")
1475
+ <A><i>Hello</i></A>
1476
+ OUTPUT
1477
+ end
1478
+
1479
+ it "aborts if passthrough results in malformed XML" do
1480
+ FileUtils.rm_f "test.html"
1481
+ FileUtils.rm_f "test.html.err"
1482
+ begin
1483
+ expect { IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false) }.to raise_error(SystemExit)
1484
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
1485
+ <preface><foreword>
1486
+ <passthrough format="html,rfc">&lt;A&gt;</passthrough><em>Hello</em>
1487
+ </foreword></preface>
1488
+ </iso-standard>
1489
+ INPUT
1490
+ rescue SystemExit
1491
+ end
1492
+ expect(File.exist?("test.html.err")).to be true
1493
+ end
1494
+
1495
+ it "ignore passthrough with incompatible format" do
1496
+ expect(xmlpp(IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", true))).to be_equivalent_to xmlpp(<<~"OUTPUT")
1497
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
1498
+ <preface><foreword>
1499
+ <passthrough format="doc,rfc">&lt;A&gt;</passthrough>
1500
+ </foreword></preface>
1501
+ </iso-standard>
1502
+ INPUT
1503
+ #{HTML_HDR}
1504
+ <br/>
1505
+ <div>
1506
+ <h1 class='ForewordTitle'>Foreword</h1>
1507
+ </div>
1508
+ <p class='zzSTDTitle1'/>
1509
+ </div>
1510
+ </body>
1511
+ </html>
1512
+ OUTPUT
1513
+ end
1514
+
1515
+
1465
1516
 
1466
1517
  end
@@ -154,9 +154,9 @@ RSpec.describe IsoDoc do
154
154
  <br/>
155
155
  <div>
156
156
  <h1 class="ForewordTitle">Foreword</h1>
157
- <p>A.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
158
- <p>B.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
159
- <p>C.<a rel="footnote" href="#fn:1" epub:type="footnote"><sup>1</sup></a></p>
157
+ <p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
158
+ <p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
159
+ <p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>1</sup></a></p>
160
160
  </div>
161
161
  <p class="zzSTDTitle1"/>
162
162
  <aside id="fn:2" class="footnote">
@@ -173,9 +173,9 @@ RSpec.describe IsoDoc do
173
173
  <br/>
174
174
  <div>
175
175
  <h1 class="ForewordTitle">Foreword</h1>
176
- <p>A.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>1</sup></a></p>
177
- <p>B.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
178
- <p>C.<a rel="footnote" href="#fn:1" epub:type="footnote"><sup>3</sup></a></p>
176
+ <p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>1</sup></a></p>
177
+ <p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
178
+ <p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>3</sup></a></p>
179
179
  </div>
180
180
  <p class="zzSTDTitle1"/>
181
181
  <aside id="fn:2" class="footnote">
@@ -715,7 +715,7 @@ INPUT
715
715
  OUTPUT
716
716
  end
717
717
 
718
- it "breaks up very long strings in tables" do
718
+ it "does not break up very long strings in tables by default" do
719
719
  expect(xmlpp(IsoDoc::HtmlConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
720
720
  <html xmlns:epub="http://www.idpf.org/2007/ops">
721
721
  <head>
@@ -739,6 +739,7 @@ INPUT
739
739
  <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
740
740
  <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
741
741
  </tr>
742
+ </thead>
742
743
  <tbody>
743
744
  <tr>
744
745
  <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
@@ -775,26 +776,102 @@ INPUT
775
776
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
776
777
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
777
778
  </tr>
779
+ </thead>
778
780
  <tbody>
779
781
  <tr>
780
782
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
781
- http://www.example.com/
782
- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/
783
- BBBBBBBBBBBBBBBBBBBBBBBBBBBB
783
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
784
784
  </td>
785
785
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
786
- http://www.example.com/
787
- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
788
- AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
786
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB </td>
787
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
788
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
789
+ </td>
790
+ </tr>
791
+ </tbody>
792
+ </table>
793
+ </div>
794
+ </div>
795
+ </body>
796
+ </head>
797
+ </html>
798
+ OUTPUT
799
+ end
800
+
801
+ it "does not break up very long strings in tables on request in HTML" do
802
+ expect(xmlpp(IsoDoc::HtmlConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
803
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
804
+ <head>
805
+ <title>test</title>
806
+ <body lang="EN-US" link="blue" vlink="#954F72">
807
+ <div class="WordSection1">
808
+ <p>&#160;</p>
809
+ </div>
810
+ <br clear="all" class="section"/>
811
+ <div class="WordSection2">
812
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
813
+ <div>
814
+ <h1 class="ForewordTitle">Foreword</h1>
815
+ <p class="TableTitle" align="center">
816
+ <b>Table 1&#160;&#8212; Repeatability and reproducibility of husked rice yield</b>
817
+ </p>
818
+ <table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
819
+ <thead>
820
+ <tr>
821
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
822
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
823
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
824
+ </tr>
825
+ </thead>
826
+ <tbody>
827
+ <tr>
828
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
829
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
830
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
831
+ </tr>
832
+ </tbody>
833
+ </table>
834
+ </div>
835
+ </div>
836
+ </body>
837
+ </html>
838
+ INPUT
839
+ <?xml version='1.0'?>
840
+ <html xmlns:epub='http://www.idpf.org/2007/ops'>
841
+ <head>
842
+ <title>test</title>
843
+ <body lang='EN-US' link='blue' vlink='#954F72'>
844
+ <div class='WordSection1'>
845
+ <p>&#xA0;</p>
846
+ </div>
847
+ <br clear='all' class='section'/>
848
+ <div class='WordSection2'>
849
+ <br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
850
+ <div>
851
+ <h1 class='ForewordTitle'>Foreword</h1>
852
+ <p class='TableTitle' align='center'>
853
+ <b>Table 1&#xA0;&#x2014; Repeatability and reproducibility of husked rice yield</b>
854
+ </p>
855
+ <table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
856
+ <thead>
857
+ <tr>
858
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
859
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
860
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
861
+ </tr>
862
+ </thead>
863
+ <tbody>
864
+ <tr>
865
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
866
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
789
867
  </td>
868
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
869
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
790
870
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
791
- www.example.com/
792
- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
793
- ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
871
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
794
872
  </td>
795
873
  </tr>
796
874
  </tbody>
797
- </thead>
798
875
  </table>
799
876
  </div>
800
877
  </div>
@@ -804,7 +881,8 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
804
881
  OUTPUT
805
882
  end
806
883
 
807
- it "breaks up very long strings in tables (Word)" do
884
+
885
+ it "does not break up very long strings in tables by default (Word)" do
808
886
  expect(xmlpp(IsoDoc::WordConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
809
887
  <html xmlns:epub="http://www.idpf.org/2007/ops">
810
888
  <head>
@@ -828,16 +906,17 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
828
906
  <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
829
907
  <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
830
908
  </tr>
909
+ </thead>
831
910
  <tbody>
832
911
  <tr>
833
912
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
834
- http://www.example.com/ &amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
913
+ http://www.example.com/&amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
835
914
  </td>
836
915
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
837
- http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
916
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
838
917
  </td>
839
918
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
840
- www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
919
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
841
920
  </td>
842
921
  </tr>
843
922
  </tbody>
@@ -870,20 +949,20 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
870
949
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
871
950
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
872
951
  </tr>
952
+ </thead>
873
953
  <tbody>
874
954
  <tr>
875
955
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
876
- http://www.example.com/ &amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
956
+ http://www.example.com/&amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
877
957
  </td>
878
958
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
879
- http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
959
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
880
960
  </td>
881
961
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
882
- www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
962
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
883
963
  </td>
884
964
  </tr>
885
965
  </tbody>
886
- </thead>
887
966
  </table>
888
967
  </div>
889
968
  </div>
@@ -893,4 +972,217 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
893
972
  OUTPUT
894
973
  end
895
974
 
975
+ it "breaks up very long strings in tables on request (Word)" do
976
+ expect(xmlpp(IsoDoc::WordConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
977
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
978
+ <head>
979
+ <title>test</title>
980
+ <body lang="EN-US" link="blue" vlink="#954F72">
981
+ <div class="WordSection1">
982
+ <p>&#160;</p>
983
+ </div>
984
+ <br clear="all" class="section"/>
985
+ <div class="WordSection2">
986
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
987
+ <div>
988
+ <h1 class="ForewordTitle">Foreword</h1>
989
+ <p class="TableTitle" align="center">
990
+ <b>Table 1&#160;&#8212; Repeatability and reproducibility of husked rice yield</b>
991
+ </p>
992
+ <table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
993
+ <thead>
994
+ <tr>
995
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
996
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
997
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
998
+ </tr>
999
+ </thead>
1000
+ <tbody>
1001
+ <tr>
1002
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
1003
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
1004
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
1005
+ </tr>
1006
+ </tbody>
1007
+ </table>
1008
+ </div>
1009
+ </div>
1010
+ </body>
1011
+ </html>
1012
+ INPUT
1013
+ <?xml version='1.0'?>
1014
+ <html xmlns:epub='http://www.idpf.org/2007/ops'>
1015
+ <head>
1016
+ <title>test</title>
1017
+ <body lang='EN-US' link='blue' vlink='#954F72'>
1018
+ <div class='WordSection1'>
1019
+ <p>&#xA0;</p>
1020
+ </div>
1021
+ <br clear='all' class='section'/>
1022
+ <div class='WordSection2'>
1023
+ <br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
1024
+ <div>
1025
+ <h1 class='ForewordTitle'>Foreword</h1>
1026
+ <p class='TableTitle' align='center'>
1027
+ <b>Table 1&#xA0;&#x2014; Repeatability and reproducibility of husked rice yield</b>
1028
+ </p>
1029
+ <table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
1030
+ <thead>
1031
+ <tr>
1032
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
1033
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
1034
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
1035
+ </tr>
1036
+ </thead>
1037
+ <tbody>
1038
+ <tr>
1039
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
1040
+ http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
1041
+ </td>
1042
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
1043
+ http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
1044
+ </td>
1045
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
1046
+ www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
1047
+ </td>
1048
+ </tr>
1049
+ </tbody>
1050
+ </table>
1051
+ </div>
1052
+ </div>
1053
+ </body>
1054
+ </head>
1055
+ </html>
1056
+ OUTPUT
1057
+ end
1058
+
1059
+ it "cleans up term sources" do
1060
+ c = IsoDoc::HtmlConvert.new({i18nyaml: "spec/assets/i18n.yaml"})
1061
+ c.i18n_init("en", "Latn")
1062
+ expect(xmlpp(c.textcleanup(<<~"INPUT").to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
1063
+ #{HTML_HDR}
1064
+ <p class="zzSTDTitle1"/>
1065
+ <div id="_terms_and_definitions"><h1>1.&#160; Terms and Definitions</h1><p>For the purposes of this document,
1066
+ the following terms and definitions apply.</p>
1067
+ <p class="TermNum" id="paddy1">1.1.</p><p class="Terms" style="text-align:left;">paddy</p>
1068
+
1069
+ <p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">&lt;rice&gt; rice retaining its husk after threshing</p>
1070
+ <div id="_bd57bbf1-f948-4bae-b0ce-73c00431f892" class="example"><p class="example-title">EXAMPLE 1</p>
1071
+ <p id="_65c9a509-9a89-4b54-a890-274126aeb55c">Foreign seeds, husks, bran, sand, dust.</p>
1072
+ <ul>
1073
+ <li>A</li>
1074
+ </ul>
1075
+ </div>
1076
+ <div id="_bd57bbf1-f948-4bae-b0ce-73c00431f894" class="example"><p class="example-title">EXAMPLE 2</p>
1077
+ <ul>
1078
+ <li>A</li>
1079
+ </ul>
1080
+ </div>
1081
+
1082
+ <p>[TERMREF]
1083
+ <a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
1084
+ [MODIFICATION]The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
1085
+ [/TERMREF]</p>
1086
+ <p>[TERMREF] Termbase IEV, term ID xyz [/TERMREF]</p>
1087
+ <p>[TERMREF] Termbase IEV, term ID xyz [MODIFICATION]with adjustments [/TERMREF]</p>
1088
+ <p class="TermNum" id="paddy">1.2.</p><p class="Terms" style="text-align:left;">paddy</p><p class="AltTerms" style="text-align:left;">paddy rice</p>
1089
+ <p class="AltTerms" style="text-align:left;">rough rice</p>
1090
+ <p class="DeprecatedTerms" style="text-align:left;">DEPRECATED: cargo rice</p>
1091
+ <p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">rice retaining its husk after threshing</p>
1092
+ <div id="_bd57bbf1-f948-4bae-b0ce-73c00431f893" class="example"><p class="example-title">EXAMPLE</p>
1093
+ <ul>
1094
+ <li>A</li>
1095
+ </ul>
1096
+ </div>
1097
+ <div class="Note"><p>Note 1 to entry: The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></div>
1098
+ <div class="Note"><p>Note 2 to entry: <ul><li>A</li></ul><p id="_19830f33-e46c-42cc-94ca-a5ef101132d5">The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></p></div>
1099
+ <p>[TERMREF]
1100
+ <a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
1101
+ [/TERMREF]</p></div>
1102
+ </div>
1103
+ </body>
1104
+ </html>
1105
+ INPUT
1106
+ <html xmlns:epub='http://www.idpf.org/2007/ops' lang='en'>
1107
+ <head/>
1108
+ <body lang='en'>
1109
+ <div class='title-section'>
1110
+ <p>&#160;</p>
1111
+ </div>
1112
+ <br/>
1113
+ <div class='prefatory-section'>
1114
+ <p>&#160;</p>
1115
+ </div>
1116
+ <br/>
1117
+ <div class='main-section'>
1118
+ <p class='zzSTDTitle1'/>
1119
+ <div id='_terms_and_definitions'>
1120
+ <h1>1.&#160; Terms and Definitions</h1>
1121
+ <p>For the purposes of this document, the following terms and definitions apply.</p>
1122
+ <p class='TermNum' id='paddy1'>1.1.</p>
1123
+ <p class='Terms' style='text-align:left;'>paddy</p>
1124
+ <p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'>&lt;rice&gt; rice retaining its husk after threshing</p>
1125
+ <div id='_bd57bbf1-f948-4bae-b0ce-73c00431f892' class='example'>
1126
+ <p class='example-title'>EXAMPLE 1</p>
1127
+ <p id='_65c9a509-9a89-4b54-a890-274126aeb55c'>Foreign seeds, husks, bran, sand, dust.</p>
1128
+ <ul>
1129
+ <li>A</li>
1130
+ </ul>
1131
+ </div>
1132
+ <div id='_bd57bbf1-f948-4bae-b0ce-73c00431f894' class='example'>
1133
+ <p class='example-title'>EXAMPLE 2</p>
1134
+ <ul>
1135
+ <li>A</li>
1136
+ </ul>
1137
+ </div>
1138
+ <p>
1139
+ [SOURCE:
1140
+ <a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
1141
+ , modified &mdash; The term "cargo rice" is shown as deprecated, and
1142
+ Note 1 to entry is not included here; Termbase IEV, term ID xyz;
1143
+ Termbase IEV, term ID xyz, modified &mdash; with adjustments]
1144
+ </p>
1145
+ <p class='TermNum' id='paddy'>1.2.</p>
1146
+ <p class='Terms' style='text-align:left;'>paddy</p>
1147
+ <p class='AltTerms' style='text-align:left;'>paddy rice</p>
1148
+ <p class='AltTerms' style='text-align:left;'>rough rice</p>
1149
+ <p class='DeprecatedTerms' style='text-align:left;'>DEPRECATED: cargo rice</p>
1150
+ <p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'>rice retaining its husk after threshing</p>
1151
+ <div id='_bd57bbf1-f948-4bae-b0ce-73c00431f893' class='example'>
1152
+ <p class='example-title'>EXAMPLE</p>
1153
+ <ul>
1154
+ <li>A</li>
1155
+ </ul>
1156
+ </div>
1157
+ <div class='Note'>
1158
+ <p>
1159
+ Note 1 to entry: The starch of waxy rice consists almost entirely of
1160
+ amylopectin. The kernels have a tendency to stick together after
1161
+ cooking.
1162
+ </p>
1163
+ </div>
1164
+ <div class='Note'>
1165
+ <p>
1166
+ Note 2 to entry:
1167
+ <ul>
1168
+ <li>A</li>
1169
+ </ul>
1170
+ <p id='_19830f33-e46c-42cc-94ca-a5ef101132d5'>
1171
+ The starch of waxy rice consists almost entirely of amylopectin.
1172
+ The kernels have a tendency to stick together after cooking.
1173
+ </p>
1174
+ </p>
1175
+ </div>
1176
+ <p>
1177
+ [SOURCE:
1178
+ <a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
1179
+ ]
1180
+ </p>
1181
+ </div>
1182
+ </div>
1183
+ </body>
1184
+ </html>
1185
+ OUTPUT
1186
+ end
1187
+
896
1188
  end