isodoc 1.0.23 → 1.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (50) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/macos.yml +10 -2
  3. data/.github/workflows/ubuntu.yml +13 -3
  4. data/.github/workflows/windows.yml +10 -2
  5. data/isodoc.gemspec +1 -1
  6. data/lib/isodoc-yaml/i18n-en.yaml +3 -1
  7. data/lib/isodoc-yaml/i18n-fr.yaml +3 -1
  8. data/lib/isodoc-yaml/i18n-zh-Hans.yaml +3 -1
  9. data/lib/isodoc/base_style/reset.scss +1 -1
  10. data/lib/isodoc/convert.rb +1 -0
  11. data/lib/isodoc/function/blocks.rb +6 -1
  12. data/lib/isodoc/function/cleanup.rb +16 -2
  13. data/lib/isodoc/function/i18n.rb +5 -5
  14. data/lib/isodoc/function/inline.rb +77 -79
  15. data/lib/isodoc/function/inline_simple.rb +72 -0
  16. data/lib/isodoc/function/references.rb +49 -37
  17. data/lib/isodoc/function/section.rb +19 -8
  18. data/lib/isodoc/function/table.rb +0 -1
  19. data/lib/isodoc/function/to_word_html.rb +23 -13
  20. data/lib/isodoc/function/utils.rb +11 -5
  21. data/lib/isodoc/function/xref_gen.rb +2 -1
  22. data/lib/isodoc/function/xref_sect_gen.rb +24 -24
  23. data/lib/isodoc/headlesshtml_convert.rb +5 -0
  24. data/lib/isodoc/html_convert.rb +5 -0
  25. data/lib/isodoc/html_function/footnotes.rb +3 -3
  26. data/lib/isodoc/html_function/html.rb +15 -0
  27. data/lib/isodoc/html_function/postprocess.rb +6 -5
  28. data/lib/isodoc/metadata.rb +10 -3
  29. data/lib/isodoc/metadata_date.rb +19 -7
  30. data/lib/isodoc/pdf_convert.rb +5 -0
  31. data/lib/isodoc/version.rb +1 -1
  32. data/lib/isodoc/word_convert.rb +5 -0
  33. data/lib/isodoc/word_function/body.rb +0 -4
  34. data/lib/isodoc/word_function/footnotes.rb +3 -3
  35. data/lib/isodoc/word_function/postprocess.rb +13 -2
  36. data/lib/isodoc/xslfo_convert.rb +5 -0
  37. data/spec/assets/i18n.yaml +4 -1
  38. data/spec/isodoc/blocks_spec.rb +59 -8
  39. data/spec/isodoc/cleanup_spec.rb +317 -25
  40. data/spec/isodoc/footnotes_spec.rb +20 -5
  41. data/spec/isodoc/i18n_spec.rb +12 -12
  42. data/spec/isodoc/inline_spec.rb +118 -5
  43. data/spec/isodoc/metadata_spec.rb +8 -3
  44. data/spec/isodoc/postproc_spec.rb +34 -12
  45. data/spec/isodoc/ref_spec.rb +120 -51
  46. data/spec/isodoc/section_spec.rb +236 -207
  47. data/spec/isodoc/table_spec.rb +24 -24
  48. data/spec/isodoc/terms_spec.rb +50 -6
  49. data/spec/isodoc/xref_spec.rb +53 -26
  50. metadata +5 -4
@@ -14,6 +14,11 @@ module IsoDoc
14
14
  "_headlessimages"
15
15
  end
16
16
 
17
+ def initialize(options)
18
+ @format = :html
19
+ super
20
+ end
21
+
17
22
  def convert(filename, file = nil, debug = false)
18
23
  file = File.read(filename, encoding: "utf-8") if file.nil?
19
24
  @openmathdelim, @closemathdelim = extract_delims(file)
@@ -14,6 +14,11 @@ module IsoDoc
14
14
  "_htmlimages"
15
15
  end
16
16
 
17
+ def initialize(options)
18
+ @format = :html
19
+ super
20
+ end
21
+
17
22
  def convert(filename, file = nil, debug = false)
18
23
  ret = super
19
24
  Dir.exists?(tmpimagedir) and Dir["#{tmpimagedir}/*"].empty? and
@@ -48,7 +48,7 @@ module IsoDoc::HtmlFunction
48
48
  end
49
49
 
50
50
  def table_footnote_parse(node, out)
51
- fn = node["reference"]
51
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
52
52
  tid = get_table_ancestor_id(node)
53
53
  make_table_footnote_link(out, tid + fn, fn)
54
54
  # do not output footnote text if we have already seen it for this table
@@ -64,8 +64,8 @@ module IsoDoc::HtmlFunction
64
64
  def footnote_parse(node, out)
65
65
  return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
66
66
  !node.ancestors.map {|m| m.name }.include?("name")
67
- fn = node["reference"]
68
- attrs = { "epub:type": "footnote", rel: "footnote", href: "#fn:#{fn}" }
67
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
68
+ attrs = { class: "FootnoteRef", href: "#fn:#{fn}" }
69
69
  out.a **attrs do |a|
70
70
  a.sup { |sup| sup << fn }
71
71
  end
@@ -3,6 +3,18 @@ require "base64"
3
3
 
4
4
  module IsoDoc::HtmlFunction
5
5
  module Html
6
+ def convert1(docxml, filename, dir)
7
+ anchor_names docxml
8
+ noko do |xml|
9
+ xml.html **{ lang: "#{@lang}" } do |html|
10
+ info docxml, nil
11
+ populate_css()
12
+ html.head { |head| define_head head, filename, dir }
13
+ make_body(html, docxml)
14
+ end
15
+ end.join("\n")
16
+ end
17
+
6
18
  def make_body1(body, _docxml)
7
19
  body.div **{ class: "title-section" } do |div1|
8
20
  div1.p { |p| p << "&nbsp;" } # placeholder
@@ -97,5 +109,8 @@ module IsoDoc::HtmlFunction
97
109
  end
98
110
  sourcecode_name_parse(node, out, name)
99
111
  end
112
+
113
+ def table_long_strings_cleanup(docxml)
114
+ end
100
115
  end
101
116
  end
@@ -108,7 +108,7 @@ module IsoDoc::HtmlFunction
108
108
  idx = docxml.at("//div[@id = 'toc']") or return docxml
109
109
  toc = "<ul>"
110
110
  path = toclevel_classes.map do |l|
111
- "//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][not(text())]"
111
+ "//main//#{l}[not(@class = 'TermNum')][not(@class = 'noTOC')][text()]"
112
112
  end
113
113
  docxml.xpath(path.join(" | ")).each_with_index do |h, tocidx|
114
114
  h["id"] ||= "toc#{tocidx}"
@@ -159,7 +159,8 @@ module IsoDoc::HtmlFunction
159
159
  def inject_script(doc)
160
160
  return doc unless @scripts
161
161
  scripts = File.read(@scripts, encoding: "UTF-8")
162
- doc.sub("</body>", scripts + "\n</body>")
162
+ a = doc.split(%r{</body>})
163
+ a[0] + scripts + "</body>" + a[1]
163
164
  end
164
165
 
165
166
  def update_footnote_filter(fn, x, i, seen)
@@ -178,7 +179,7 @@ module IsoDoc::HtmlFunction
178
179
  def html_footnote_filter(docxml)
179
180
  seen = {}
180
181
  i = 1
181
- docxml.xpath('//a[@epub:type = "footnote"]').each do |x|
182
+ docxml.xpath('//a[@class = "FootnoteRef"]').each do |x|
182
183
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
183
184
  i, seen = update_footnote_filter(fn, x, i, seen)
184
185
  end
@@ -187,7 +188,7 @@ module IsoDoc::HtmlFunction
187
188
 
188
189
  def footnote_backlinks(docxml)
189
190
  seen = {}
190
- docxml.xpath('//a[@epub:type = "footnote"]').each_with_index do |x, i|
191
+ docxml.xpath('//a[@class = "FootnoteRef"]').each_with_index do |x, i|
191
192
  seen[x["href"]] and next or seen[x["href"]] = true
192
193
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
193
194
  xdup = x.dup
@@ -200,7 +201,7 @@ module IsoDoc::HtmlFunction
200
201
  end
201
202
 
202
203
  def footnote_format(docxml)
203
- docxml.xpath("//a[@epub:type = 'footnote']/sup").each do |x|
204
+ docxml.xpath("//a[@class = 'FootnoteRef']/sup").each do |x|
204
205
  footnote_reference_format(x)
205
206
  end
206
207
  docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
@@ -3,7 +3,8 @@ require_relative "./metadata_date"
3
3
  module IsoDoc
4
4
  class Metadata
5
5
  DATETYPES = %w{published accessed created implemented obsoleted confirmed
6
- updated issued received transmitted copied unchanged circulated}.freeze
6
+ updated issued received transmitted copied unchanged circulated vote-started
7
+ vote-ended}.freeze
7
8
 
8
9
  def ns(xpath)
9
10
  Common::ns(xpath)
@@ -15,7 +16,7 @@ module IsoDoc
15
16
 
16
17
  def initialize(lang, script, labels)
17
18
  @metadata = {}
18
- DATETYPES.each { |w| @metadata["#{w}date".to_sym] = "XXX" }
19
+ DATETYPES.each { |w| @metadata["#{w.gsub(/-/, "_")}date".to_sym] = "XXX" }
19
20
  @lang = lang
20
21
  @script = script
21
22
  @c = HTMLEntities.new
@@ -84,7 +85,7 @@ module IsoDoc
84
85
 
85
86
  def bibdate(isoxml, _out)
86
87
  isoxml.xpath(ns("//bibdata/date")).each do |d|
87
- set("#{d['type']}date".to_sym, Common::date_range(d))
88
+ set("#{d['type'].gsub(/-/, "_")}date".to_sym, Common::date_range(d))
88
89
  end
89
90
  end
90
91
 
@@ -217,5 +218,11 @@ module IsoDoc
217
218
  a = xml.at(ns("//bibdata/uri[@type = 'pdf']")) and set(:pdf, a.text)
218
219
  a = xml.at(ns("//bibdata/uri[@type = 'doc']")) and set(:doc, a.text)
219
220
  end
221
+
222
+ def keywords(isoxml, _out)
223
+ ret = []
224
+ isoxml.xpath(ns("//bibdata/keyword")).each { |kw| ret << kw.text }
225
+ set(:keywords, ret)
226
+ end
220
227
  end
221
228
  end
@@ -14,14 +14,26 @@ module IsoDoc
14
14
  "10": @labels["month_october"],
15
15
  "11": @labels["month_november"],
16
16
  "12": @labels["month_december"],
17
- }
17
+ }
18
18
  end
19
19
 
20
- def monthyr(isodate)
21
- m = /(?<yr>\d\d\d\d)-(?<mo>\d\d)/.match isodate
22
- return isodate unless m && m[:yr] && m[:mo]
23
- IsoDoc::Function::I18n::l10n("#{months[m[:mo].to_sym]} #{m[:yr]}",
24
- @lang, @script)
25
- end
20
+ def monthyr(isodate)
21
+ m = /(?<yr>\d\d\d\d)-(?<mo>\d\d)/.match isodate
22
+ return isodate unless m && m[:yr] && m[:mo]
23
+ IsoDoc::Function::I18n::l10n("#{months[m[:mo].to_sym]} #{m[:yr]}",
24
+ @lang, @script)
25
+ end
26
+
27
+ def MMMddyyyy(isodate)
28
+ return nil if isodate.nil?
29
+ arr = isodate.split("-")
30
+ date = if arr.size == 1 and (/^\d+$/.match isodate)
31
+ Date.new(*arr.map(&:to_i)).strftime("%Y")
32
+ elsif arr.size == 2
33
+ Date.new(*arr.map(&:to_i)).strftime("%B %Y")
34
+ else
35
+ Date.parse(isodate).strftime("%B %d, %Y")
36
+ end
37
+ end
26
38
  end
27
39
  end
@@ -22,6 +22,11 @@ module IsoDoc
22
22
  "_pdfimages"
23
23
  end
24
24
 
25
+ def initialize(options)
26
+ @format = :pdf
27
+ super
28
+ end
29
+
25
30
  def convert(filename, file = nil, debug = false)
26
31
  file = File.read(filename, encoding: "utf-8") if file.nil?
27
32
  @openmathdelim, @closemathdelim = extract_delims(file)
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "1.0.23".freeze
2
+ VERSION = "1.0.28".freeze
3
3
  end
@@ -30,6 +30,11 @@ module IsoDoc
30
30
  include WordFunction::Body
31
31
  include WordFunction::Postprocess
32
32
 
33
+ def initialize(options)
34
+ @format = :doc
35
+ super
36
+ end
37
+
33
38
  def convert(filename, file = nil, debug = false)
34
39
  ret = super
35
40
  FileUtils.rm_rf tmpimagedir
@@ -204,10 +204,6 @@ module IsoDoc::WordFunction
204
204
  alt: node["alt"],
205
205
  title: node["title"],
206
206
  width: node["width"] }
207
- if node["height"] == "auto" || node["width"] == "auto"
208
- attrs[:height] = nil
209
- attrs[:width] = nil
210
- end
211
207
  out.img **attr_code(attrs)
212
208
  image_title_parse(out, caption)
213
209
  end
@@ -56,7 +56,7 @@ module IsoDoc::WordFunction
56
56
  end
57
57
 
58
58
  def table_footnote_parse(node, out)
59
- fn = node["reference"]
59
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
60
60
  tid = get_table_ancestor_id(node)
61
61
  make_table_footnote_link(out, tid + fn, fn)
62
62
  # do not output footnote text if we have already seen it for this table
@@ -80,11 +80,11 @@ module IsoDoc::WordFunction
80
80
  def footnote_parse(node, out)
81
81
  return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
82
82
  !node.ancestors.map {|m| m.name }.include?("name")
83
- fn = node["reference"]
83
+ fn = node["reference"] || UUIDTools::UUID.random_create.to_s
84
84
  return seen_footnote_parse(node, out, fn) if @seen_footnote.include?(fn)
85
85
  @fn_bookmarks[fn] = bookmarkid
86
86
  out.span **{style: "mso-bookmark:_Ref#{@fn_bookmarks[fn]}"} do |s|
87
- s.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
87
+ s.a **{ "class": "FootnoteRef", "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
88
88
  a.sup { |sup| sup << fn }
89
89
  end
90
90
  end
@@ -39,7 +39,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
39
39
  end
40
40
 
41
41
  def toWord(result, filename, dir, header)
42
- #result = populate_template(result, :word)
43
42
  result = from_xhtml(word_cleanup(to_xhtml(result)))
44
43
  unless @landscapestyle.empty?
45
44
  @wordstylesheet&.open
@@ -196,6 +195,8 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
196
195
  @landscapestyle = ""
197
196
  word_section_breaks1(docxml, "WordSection2")
198
197
  word_section_breaks1(docxml, "WordSection3")
198
+ word_remove_pb_before_annex(docxml)
199
+ docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
199
200
  end
200
201
 
201
202
  def word_section_breaks1(docxml, sect)
@@ -203,7 +204,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
203
204
  each_with_index do |br, i|
204
205
  @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\
205
206
  "#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
206
- br.delete("orientation")
207
207
  split_at_section_break(docxml, sect, br, i)
208
208
  end
209
209
  end
@@ -219,6 +219,17 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
219
219
  end
220
220
  end
221
221
 
222
+ # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
223
+ def word_remove_pb_before_annex(docxml)
224
+ docxml.xpath("//div[p/br]").each do |d|
225
+ /^WordSection\d+_\d+$/.match(d["class"]) or next
226
+ d.elements[0].name == "p" && !d.elements[0].elements.empty? or next
227
+ d.elements[0].elements[0].name == "br" && d.elements[0].elements[0]["style"] ==
228
+ "mso-special-character:line-break;page-break-before:always" or next
229
+ d.elements[0].remove
230
+ end
231
+ end
232
+
222
233
  def word_footnote_format(docxml)
223
234
  # the content is in a[@epub:type = 'footnote']//sup, but in Word,
224
235
  # we need to inject content around the autonumbered footnote reference
@@ -13,6 +13,11 @@ module IsoDoc
13
13
  "_pdfimages"
14
14
  end
15
15
 
16
+ def initialize(options)
17
+ @format = :pdf
18
+ super
19
+ end
20
+
16
21
  def convert(filename, file = nil, debug = false)
17
22
  file = File.read(filename, encoding: "utf-8") if file.nil?
18
23
  docxml, outname_html, dir = convert_init(file, filename, debug)
@@ -1,2 +1,5 @@
1
1
  foreword: Enkonduko
2
-
2
+ clause: klaŭzo
3
+ table: Tabelo
4
+ source: SOURCE
5
+ modified: modified
@@ -355,7 +355,7 @@ B
355
355
  </div></aside>
356
356
  <p style='page-break-after:avoid;'><b>Key</b></p><dl><dt><p>A</p></dt><dd><p>B</p></dd></dl>
357
357
  <p class="FigureTitle" style="text-align:center;">Figure 1&#160;&#8212; Split-it-right <i>sample</i> divider
358
- <a rel='footnote' href='#fn:1' epub:type='footnote'>
358
+ <a class='FootnoteRef' href='#fn:1'>
359
359
  <sup>1</sup>
360
360
  </a>
361
361
  </p></div>
@@ -417,8 +417,8 @@ B
417
417
  <div id="figureA-1" class="figure">
418
418
 
419
419
  <img src="rice_images/rice_image1.png" height="20" width="30" alt="alttext" title="titletext"/>
420
- <img src="rice_images/rice_image1.png"/>
421
- <img src="_.gif"/>
420
+ <img src="rice_images/rice_image1.png" height='20' width='auto'/>
421
+ <img src='_.gif' height='20' width='auto'/>
422
422
  <a href="#_" class="TableFootnoteRef">a</a><aside><div id="ftn_"><span><span id="_" class="TableFootnoteRef">a</span><span style="mso-tab-count:1">&#160; </span></span>
423
423
  <p id="_">The time <span class="stem">(#(t_90)#)</span> was estimated to be 18,2 min for this example.</p>
424
424
  </div></aside>
@@ -426,7 +426,7 @@ B
426
426
  <p class='FigureTitle' style='text-align:center;'>
427
427
  Figure 1&#160;&#8212; Split-it-right sample divider
428
428
  <span style='mso-bookmark:_Ref'>
429
- <a href='#ftn1' epub:type='footnote'>
429
+ <a href='#ftn1' epub:type='footnote' class='FootnoteRef'>
430
430
  <sup>1</sup>
431
431
  </a>
432
432
  </span>
@@ -681,7 +681,7 @@ Que?
681
681
  <br/>
682
682
  <div>
683
683
  <h1 class="ForewordTitle">Foreword</h1>
684
- <div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
684
+ <div class="Admonition" id='_70234f78-64e5-4dfc-8b6f-f3f037348b6a'><p class="AdmonitionTitle" style="text-align:center;">CAUTION</p>
685
685
  <p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
686
686
  </div>
687
687
  </div>
@@ -707,7 +707,7 @@ Que?
707
707
  <br/>
708
708
  <div>
709
709
  <h1 class="ForewordTitle">Foreword</h1>
710
- <div class="Admonition"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
710
+ <div class="Admonition" id="_70234f78-64e5-4dfc-8b6f-f3f037348b6a"><p class="AdmonitionTitle" style="text-align:center;">Title</p>
711
711
  <p id="_e94663cc-2473-4ccc-9a72-983a74d989f2">Only use paddy or parboiled rice for the determination of husked rice yield.</p>
712
712
  </div>
713
713
  </div>
@@ -969,7 +969,7 @@ World

969
969
  INPUT
970
970
  #{HTML_HDR}
971
971
  <p class="zzSTDTitle1"/>
972
- <div><h1>1.&#160; Terms and definitions</h1>
972
+ <div><h1>1.&#160; </h1>
973
973
  <p class="TermNum" id="_extraneous_matter">1.1.</p><p class="Terms" style="text-align:left;">extraneous matter</p><p class="AltTerms" style="text-align:left;">EM</p>
974
974
 
975
975
  <p id="_318b3939-be09-46c4-a284-93f9826b981e">&lt;rice&gt; organic and inorganic components other than whole or broken kernels</p>
@@ -1030,7 +1030,7 @@ World

1030
1030
  </import>
1031
1031
  </permission>
1032
1032
  </foreword></preface>
1033
- <bibliography><references id="_bibliography" obligation="informative">
1033
+ <bibliography><references id="_bibliography" obligation="informative" normative="false">
1034
1034
  <title>Bibliography</title>
1035
1035
  <bibitem id="rfc2616" type="standard"> <fetched>2020-03-27</fetched> <title format="text/plain" language="en" script="Latn">Hypertext Transfer Protocol — HTTP/1.1</title> <uri type="xml">https://xml2rfc.tools.ietf.org/public/rfc/bibxml/reference.RFC.2616.xml</uri> <uri type="src">https://www.rfc-editor.org/info/rfc2616</uri> <docidentifier type="IETF">RFC 2616</docidentifier> <docidentifier type="rfc-anchor">RFC2616</docidentifier> <docidentifier type="DOI">10.17487/RFC2616</docidentifier> <date type="published"> <on>1999-06</on> </date> <contributor> <role type="author"/> <person> <name> <completename language="en">R. Fielding</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Gettys</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">J. Mogul</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">H. Frystyk</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">L. Masinter</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">P. Leach</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <contributor> <role type="author"/> <person> <name> <completename language="en">T. Berners-Lee</completename> </name> <affiliation> <organization> <name>IETF</name> <abbreviation>IETF</abbreviation> </organization> </affiliation> </person> </contributor> <language>en</language> <script>Latn</script> <abstract format="text/plain" language="en" script="Latn">HTTP has been in use by the World-Wide Web global information initiative since 1990. This specification defines the protocol referred to as “HTTP/1.1”, and is an update to RFC 2068. [STANDARDS-TRACK]</abstract> <series type="main"> <title format="text/plain" language="en" script="Latn">RFC</title> <number>2616</number> </series> <place>Fremont, CA</place></bibitem>
1036
1036
 
@@ -1462,5 +1462,56 @@ INPUT
1462
1462
  OUTPUT
1463
1463
  end
1464
1464
 
1465
+ it "processes passthrough with compatible format" do
1466
+ FileUtils.rm_f "test.html"
1467
+ IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false)
1468
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
1469
+ <preface><foreword>
1470
+ <passthrough format="html,rfc">&lt;A&gt;</passthrough><em>Hello</em><passthrough format="html,rfc">&lt;/A&gt;</passthrough>
1471
+ </foreword></preface>
1472
+ </iso-standard>
1473
+ INPUT
1474
+ expect(( File.read("test.html").gsub(%r{^.*<h1 class="ForewordTitle">Foreword</h1>}m, "").gsub(%r{</div>.*}m, ""))).to be_equivalent_to xmlpp(<<~"OUTPUT")
1475
+ <A><i>Hello</i></A>
1476
+ OUTPUT
1477
+ end
1478
+
1479
+ it "aborts if passthrough results in malformed XML" do
1480
+ FileUtils.rm_f "test.html"
1481
+ FileUtils.rm_f "test.html.err"
1482
+ begin
1483
+ expect { IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", false) }.to raise_error(SystemExit)
1484
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
1485
+ <preface><foreword>
1486
+ <passthrough format="html,rfc">&lt;A&gt;</passthrough><em>Hello</em>
1487
+ </foreword></preface>
1488
+ </iso-standard>
1489
+ INPUT
1490
+ rescue SystemExit
1491
+ end
1492
+ expect(File.exist?("test.html.err")).to be true
1493
+ end
1494
+
1495
+ it "ignore passthrough with incompatible format" do
1496
+ expect(xmlpp(IsoDoc::HtmlConvert.new({}).convert("test", <<~"INPUT", true))).to be_equivalent_to xmlpp(<<~"OUTPUT")
1497
+ <iso-standard xmlns="http://riboseinc.com/isoxml">
1498
+ <preface><foreword>
1499
+ <passthrough format="doc,rfc">&lt;A&gt;</passthrough>
1500
+ </foreword></preface>
1501
+ </iso-standard>
1502
+ INPUT
1503
+ #{HTML_HDR}
1504
+ <br/>
1505
+ <div>
1506
+ <h1 class='ForewordTitle'>Foreword</h1>
1507
+ </div>
1508
+ <p class='zzSTDTitle1'/>
1509
+ </div>
1510
+ </body>
1511
+ </html>
1512
+ OUTPUT
1513
+ end
1514
+
1515
+
1465
1516
 
1466
1517
  end
@@ -154,9 +154,9 @@ RSpec.describe IsoDoc do
154
154
  <br/>
155
155
  <div>
156
156
  <h1 class="ForewordTitle">Foreword</h1>
157
- <p>A.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
158
- <p>B.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
159
- <p>C.<a rel="footnote" href="#fn:1" epub:type="footnote"><sup>1</sup></a></p>
157
+ <p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
158
+ <p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
159
+ <p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>1</sup></a></p>
160
160
  </div>
161
161
  <p class="zzSTDTitle1"/>
162
162
  <aside id="fn:2" class="footnote">
@@ -173,9 +173,9 @@ RSpec.describe IsoDoc do
173
173
  <br/>
174
174
  <div>
175
175
  <h1 class="ForewordTitle">Foreword</h1>
176
- <p>A.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>1</sup></a></p>
177
- <p>B.<a rel="footnote" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
178
- <p>C.<a rel="footnote" href="#fn:1" epub:type="footnote"><sup>3</sup></a></p>
176
+ <p>A.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>1</sup></a></p>
177
+ <p>B.<a class="FootnoteRef" href="#fn:2" epub:type="footnote"><sup>2</sup></a></p>
178
+ <p>C.<a class="FootnoteRef" href="#fn:1" epub:type="footnote"><sup>3</sup></a></p>
179
179
  </div>
180
180
  <p class="zzSTDTitle1"/>
181
181
  <aside id="fn:2" class="footnote">
@@ -715,7 +715,7 @@ INPUT
715
715
  OUTPUT
716
716
  end
717
717
 
718
- it "breaks up very long strings in tables" do
718
+ it "does not break up very long strings in tables by default" do
719
719
  expect(xmlpp(IsoDoc::HtmlConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
720
720
  <html xmlns:epub="http://www.idpf.org/2007/ops">
721
721
  <head>
@@ -739,6 +739,7 @@ INPUT
739
739
  <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
740
740
  <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
741
741
  </tr>
742
+ </thead>
742
743
  <tbody>
743
744
  <tr>
744
745
  <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
@@ -775,26 +776,102 @@ INPUT
775
776
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
776
777
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
777
778
  </tr>
779
+ </thead>
778
780
  <tbody>
779
781
  <tr>
780
782
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
781
- http://www.example.com/
782
- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/
783
- BBBBBBBBBBBBBBBBBBBBBBBBBBBB
783
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
784
784
  </td>
785
785
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
786
- http://www.example.com/
787
- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
788
- AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
786
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB </td>
787
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
788
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
789
+ </td>
790
+ </tr>
791
+ </tbody>
792
+ </table>
793
+ </div>
794
+ </div>
795
+ </body>
796
+ </head>
797
+ </html>
798
+ OUTPUT
799
+ end
800
+
801
+ it "does not break up very long strings in tables on request in HTML" do
802
+ expect(xmlpp(IsoDoc::HtmlConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
803
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
804
+ <head>
805
+ <title>test</title>
806
+ <body lang="EN-US" link="blue" vlink="#954F72">
807
+ <div class="WordSection1">
808
+ <p>&#160;</p>
809
+ </div>
810
+ <br clear="all" class="section"/>
811
+ <div class="WordSection2">
812
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
813
+ <div>
814
+ <h1 class="ForewordTitle">Foreword</h1>
815
+ <p class="TableTitle" align="center">
816
+ <b>Table 1&#160;&#8212; Repeatability and reproducibility of husked rice yield</b>
817
+ </p>
818
+ <table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
819
+ <thead>
820
+ <tr>
821
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
822
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
823
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
824
+ </tr>
825
+ </thead>
826
+ <tbody>
827
+ <tr>
828
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
829
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
830
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
831
+ </tr>
832
+ </tbody>
833
+ </table>
834
+ </div>
835
+ </div>
836
+ </body>
837
+ </html>
838
+ INPUT
839
+ <?xml version='1.0'?>
840
+ <html xmlns:epub='http://www.idpf.org/2007/ops'>
841
+ <head>
842
+ <title>test</title>
843
+ <body lang='EN-US' link='blue' vlink='#954F72'>
844
+ <div class='WordSection1'>
845
+ <p>&#xA0;</p>
846
+ </div>
847
+ <br clear='all' class='section'/>
848
+ <div class='WordSection2'>
849
+ <br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
850
+ <div>
851
+ <h1 class='ForewordTitle'>Foreword</h1>
852
+ <p class='TableTitle' align='center'>
853
+ <b>Table 1&#xA0;&#x2014; Repeatability and reproducibility of husked rice yield</b>
854
+ </p>
855
+ <table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
856
+ <thead>
857
+ <tr>
858
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
859
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
860
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
861
+ </tr>
862
+ </thead>
863
+ <tbody>
864
+ <tr>
865
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
866
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
789
867
  </td>
868
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
869
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
790
870
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
791
- www.example.com/
792
- AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA
793
- ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
871
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
794
872
  </td>
795
873
  </tr>
796
874
  </tbody>
797
- </thead>
798
875
  </table>
799
876
  </div>
800
877
  </div>
@@ -804,7 +881,8 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
804
881
  OUTPUT
805
882
  end
806
883
 
807
- it "breaks up very long strings in tables (Word)" do
884
+
885
+ it "does not break up very long strings in tables by default (Word)" do
808
886
  expect(xmlpp(IsoDoc::WordConvert.new({}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
809
887
  <html xmlns:epub="http://www.idpf.org/2007/ops">
810
888
  <head>
@@ -828,16 +906,17 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
828
906
  <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
829
907
  <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
830
908
  </tr>
909
+ </thead>
831
910
  <tbody>
832
911
  <tr>
833
912
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
834
- http://www.example.com/ &amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
913
+ http://www.example.com/&amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
835
914
  </td>
836
915
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
837
- http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
916
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
838
917
  </td>
839
918
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
840
- www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
919
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
841
920
  </td>
842
921
  </tr>
843
922
  </tbody>
@@ -870,20 +949,20 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
870
949
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
871
950
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
872
951
  </tr>
952
+ </thead>
873
953
  <tbody>
874
954
  <tr>
875
955
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
876
- http://www.example.com/ &amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
956
+ http://www.example.com/&amp;AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB
877
957
  </td>
878
958
  <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
879
- http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
959
+ http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
880
960
  </td>
881
961
  <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
882
- www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
962
+ www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB
883
963
  </td>
884
964
  </tr>
885
965
  </tbody>
886
- </thead>
887
966
  </table>
888
967
  </div>
889
968
  </div>
@@ -893,4 +972,217 @@ ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
893
972
  OUTPUT
894
973
  end
895
974
 
975
+ it "breaks up very long strings in tables on request (Word)" do
976
+ expect(xmlpp(IsoDoc::WordConvert.new({break_up_urls_in_tables: "true"}).cleanup(Nokogiri::XML(<<~"INPUT")).to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
977
+ <html xmlns:epub="http://www.idpf.org/2007/ops">
978
+ <head>
979
+ <title>test</title>
980
+ <body lang="EN-US" link="blue" vlink="#954F72">
981
+ <div class="WordSection1">
982
+ <p>&#160;</p>
983
+ </div>
984
+ <br clear="all" class="section"/>
985
+ <div class="WordSection2">
986
+ <br clear="all" style="mso-special-character:line-break;page-break-before:always"/>
987
+ <div>
988
+ <h1 class="ForewordTitle">Foreword</h1>
989
+ <p class="TableTitle" align="center">
990
+ <b>Table 1&#160;&#8212; Repeatability and reproducibility of husked rice yield</b>
991
+ </p>
992
+ <table id="tableD-1" class="MsoISOTable" border="1" cellspacing="0" cellpadding="0">
993
+ <thead>
994
+ <tr>
995
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
996
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">Description</td>
997
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">Rice sample</td>
998
+ </tr>
999
+ </thead>
1000
+ <tbody>
1001
+ <tr>
1002
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA/BBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
1003
+ <td align="left" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;">http://www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
1004
+ <td align="center" style="border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;">www.example.com/AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAABBBBBBBBBBBBBBBBBBBBBBBBBBBB</td>
1005
+ </tr>
1006
+ </tbody>
1007
+ </table>
1008
+ </div>
1009
+ </div>
1010
+ </body>
1011
+ </html>
1012
+ INPUT
1013
+ <?xml version='1.0'?>
1014
+ <html xmlns:epub='http://www.idpf.org/2007/ops'>
1015
+ <head>
1016
+ <title>test</title>
1017
+ <body lang='EN-US' link='blue' vlink='#954F72'>
1018
+ <div class='WordSection1'>
1019
+ <p>&#xA0;</p>
1020
+ </div>
1021
+ <br clear='all' class='section'/>
1022
+ <div class='WordSection2'>
1023
+ <br clear='all' style='mso-special-character:line-break;page-break-before:always'/>
1024
+ <div>
1025
+ <h1 class='ForewordTitle'>Foreword</h1>
1026
+ <p class='TableTitle' align='center'>
1027
+ <b>Table 1&#xA0;&#x2014; Repeatability and reproducibility of husked rice yield</b>
1028
+ </p>
1029
+ <table id='tableD-1' class='MsoISOTable' border='1' cellspacing='0' cellpadding='0'>
1030
+ <thead>
1031
+ <tr>
1032
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
1033
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>Description</td>
1034
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>Rice sample</td>
1035
+ </tr>
1036
+ </thead>
1037
+ <tbody>
1038
+ <tr>
1039
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
1040
+ http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAA/ BBBBBBBBBBBBBBBBBBBBBBBBBBBB
1041
+ </td>
1042
+ <td align='left' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.5pt;'>
1043
+ http://www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA AAAAAAAABBBBBBBBBBBBBBBBBBBBBB BBBBBB
1044
+ </td>
1045
+ <td align='center' style='border-top:solid windowtext 1.5pt;border-bottom:solid windowtext 1.0pt;'>
1046
+ www.example.com/ AAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAAA ABBBBBBBBBBBBBBBBBBBBBBBBBBBB
1047
+ </td>
1048
+ </tr>
1049
+ </tbody>
1050
+ </table>
1051
+ </div>
1052
+ </div>
1053
+ </body>
1054
+ </head>
1055
+ </html>
1056
+ OUTPUT
1057
+ end
1058
+
1059
+ it "cleans up term sources" do
1060
+ c = IsoDoc::HtmlConvert.new({i18nyaml: "spec/assets/i18n.yaml"})
1061
+ c.i18n_init("en", "Latn")
1062
+ expect(xmlpp(c.textcleanup(<<~"INPUT").to_s)).to be_equivalent_to xmlpp(<<~"OUTPUT")
1063
+ #{HTML_HDR}
1064
+ <p class="zzSTDTitle1"/>
1065
+ <div id="_terms_and_definitions"><h1>1.&#160; Terms and Definitions</h1><p>For the purposes of this document,
1066
+ the following terms and definitions apply.</p>
1067
+ <p class="TermNum" id="paddy1">1.1.</p><p class="Terms" style="text-align:left;">paddy</p>
1068
+
1069
+ <p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">&lt;rice&gt; rice retaining its husk after threshing</p>
1070
+ <div id="_bd57bbf1-f948-4bae-b0ce-73c00431f892" class="example"><p class="example-title">EXAMPLE 1</p>
1071
+ <p id="_65c9a509-9a89-4b54-a890-274126aeb55c">Foreign seeds, husks, bran, sand, dust.</p>
1072
+ <ul>
1073
+ <li>A</li>
1074
+ </ul>
1075
+ </div>
1076
+ <div id="_bd57bbf1-f948-4bae-b0ce-73c00431f894" class="example"><p class="example-title">EXAMPLE 2</p>
1077
+ <ul>
1078
+ <li>A</li>
1079
+ </ul>
1080
+ </div>
1081
+
1082
+ <p>[TERMREF]
1083
+ <a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
1084
+ [MODIFICATION]The term "cargo rice" is shown as deprecated, and Note 1 to entry is not included here
1085
+ [/TERMREF]</p>
1086
+ <p>[TERMREF] Termbase IEV, term ID xyz [/TERMREF]</p>
1087
+ <p>[TERMREF] Termbase IEV, term ID xyz [MODIFICATION]with adjustments [/TERMREF]</p>
1088
+ <p class="TermNum" id="paddy">1.2.</p><p class="Terms" style="text-align:left;">paddy</p><p class="AltTerms" style="text-align:left;">paddy rice</p>
1089
+ <p class="AltTerms" style="text-align:left;">rough rice</p>
1090
+ <p class="DeprecatedTerms" style="text-align:left;">DEPRECATED: cargo rice</p>
1091
+ <p id="_eb29b35e-123e-4d1c-b50b-2714d41e747f">rice retaining its husk after threshing</p>
1092
+ <div id="_bd57bbf1-f948-4bae-b0ce-73c00431f893" class="example"><p class="example-title">EXAMPLE</p>
1093
+ <ul>
1094
+ <li>A</li>
1095
+ </ul>
1096
+ </div>
1097
+ <div class="Note"><p>Note 1 to entry: The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></div>
1098
+ <div class="Note"><p>Note 2 to entry: <ul><li>A</li></ul><p id="_19830f33-e46c-42cc-94ca-a5ef101132d5">The starch of waxy rice consists almost entirely of amylopectin. The kernels have a tendency to stick together after cooking.</p></p></div>
1099
+ <p>[TERMREF]
1100
+ <a href="#ISO7301">ISO 7301:2011, Clause 3.1</a>
1101
+ [/TERMREF]</p></div>
1102
+ </div>
1103
+ </body>
1104
+ </html>
1105
+ INPUT
1106
+ <html xmlns:epub='http://www.idpf.org/2007/ops' lang='en'>
1107
+ <head/>
1108
+ <body lang='en'>
1109
+ <div class='title-section'>
1110
+ <p>&#160;</p>
1111
+ </div>
1112
+ <br/>
1113
+ <div class='prefatory-section'>
1114
+ <p>&#160;</p>
1115
+ </div>
1116
+ <br/>
1117
+ <div class='main-section'>
1118
+ <p class='zzSTDTitle1'/>
1119
+ <div id='_terms_and_definitions'>
1120
+ <h1>1.&#160; Terms and Definitions</h1>
1121
+ <p>For the purposes of this document, the following terms and definitions apply.</p>
1122
+ <p class='TermNum' id='paddy1'>1.1.</p>
1123
+ <p class='Terms' style='text-align:left;'>paddy</p>
1124
+ <p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'>&lt;rice&gt; rice retaining its husk after threshing</p>
1125
+ <div id='_bd57bbf1-f948-4bae-b0ce-73c00431f892' class='example'>
1126
+ <p class='example-title'>EXAMPLE 1</p>
1127
+ <p id='_65c9a509-9a89-4b54-a890-274126aeb55c'>Foreign seeds, husks, bran, sand, dust.</p>
1128
+ <ul>
1129
+ <li>A</li>
1130
+ </ul>
1131
+ </div>
1132
+ <div id='_bd57bbf1-f948-4bae-b0ce-73c00431f894' class='example'>
1133
+ <p class='example-title'>EXAMPLE 2</p>
1134
+ <ul>
1135
+ <li>A</li>
1136
+ </ul>
1137
+ </div>
1138
+ <p>
1139
+ [SOURCE:
1140
+ <a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
1141
+ , modified &mdash; The term "cargo rice" is shown as deprecated, and
1142
+ Note 1 to entry is not included here; Termbase IEV, term ID xyz;
1143
+ Termbase IEV, term ID xyz, modified &mdash; with adjustments]
1144
+ </p>
1145
+ <p class='TermNum' id='paddy'>1.2.</p>
1146
+ <p class='Terms' style='text-align:left;'>paddy</p>
1147
+ <p class='AltTerms' style='text-align:left;'>paddy rice</p>
1148
+ <p class='AltTerms' style='text-align:left;'>rough rice</p>
1149
+ <p class='DeprecatedTerms' style='text-align:left;'>DEPRECATED: cargo rice</p>
1150
+ <p id='_eb29b35e-123e-4d1c-b50b-2714d41e747f'>rice retaining its husk after threshing</p>
1151
+ <div id='_bd57bbf1-f948-4bae-b0ce-73c00431f893' class='example'>
1152
+ <p class='example-title'>EXAMPLE</p>
1153
+ <ul>
1154
+ <li>A</li>
1155
+ </ul>
1156
+ </div>
1157
+ <div class='Note'>
1158
+ <p>
1159
+ Note 1 to entry: The starch of waxy rice consists almost entirely of
1160
+ amylopectin. The kernels have a tendency to stick together after
1161
+ cooking.
1162
+ </p>
1163
+ </div>
1164
+ <div class='Note'>
1165
+ <p>
1166
+ Note 2 to entry:
1167
+ <ul>
1168
+ <li>A</li>
1169
+ </ul>
1170
+ <p id='_19830f33-e46c-42cc-94ca-a5ef101132d5'>
1171
+ The starch of waxy rice consists almost entirely of amylopectin.
1172
+ The kernels have a tendency to stick together after cooking.
1173
+ </p>
1174
+ </p>
1175
+ </div>
1176
+ <p>
1177
+ [SOURCE:
1178
+ <a href='#ISO7301'>ISO 7301:2011, Clause 3.1</a>
1179
+ ]
1180
+ </p>
1181
+ </div>
1182
+ </div>
1183
+ </body>
1184
+ </html>
1185
+ OUTPUT
1186
+ end
1187
+
896
1188
  end