isodoc 1.3.0 → 1.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +14 -0
  3. data/isodoc.gemspec +2 -2
  4. data/lib/isodoc-yaml/i18n-en.yaml +55 -0
  5. data/lib/isodoc-yaml/i18n-fr.yaml +56 -0
  6. data/lib/isodoc/convert.rb +2 -1
  7. data/lib/isodoc/function/inline_simple.rb +10 -1
  8. data/lib/isodoc/function/section.rb +1 -1
  9. data/lib/isodoc/function/table.rb +10 -0
  10. data/lib/isodoc/function/to_word_html.rb +4 -2
  11. data/lib/isodoc/function/utils.rb +4 -4
  12. data/lib/isodoc/gem_tasks.rb +4 -0
  13. data/lib/isodoc/html_function/html.rb +7 -0
  14. data/lib/isodoc/html_function/mathvariant_to_plain.rb +82 -0
  15. data/lib/isodoc/html_function/postprocess.rb +32 -19
  16. data/lib/isodoc/metadata_contributor.rb +4 -3
  17. data/lib/isodoc/presentation_function/inline.rb +5 -1
  18. data/lib/isodoc/presentation_function/section.rb +9 -0
  19. data/lib/isodoc/presentation_xml_convert.rb +2 -0
  20. data/lib/isodoc/version.rb +1 -1
  21. data/lib/isodoc/word_function/inline.rb +2 -2
  22. data/lib/isodoc/word_function/postprocess.rb +38 -80
  23. data/lib/isodoc/word_function/postprocess_cover.rb +55 -0
  24. data/lib/isodoc/word_function/table.rb +10 -0
  25. data/lib/isodoc/xref.rb +1 -0
  26. data/lib/isodoc/xref/xref_counter.rb +44 -12
  27. data/lib/isodoc/xref/xref_gen.rb +18 -0
  28. data/lib/isodoc/xref/xref_sect_gen.rb +34 -27
  29. data/spec/isodoc/blocks_spec.rb +26 -73
  30. data/spec/isodoc/cleanup_spec.rb +0 -1
  31. data/spec/isodoc/inline_spec.rb +14 -14
  32. data/spec/isodoc/metadata_spec.rb +3 -1
  33. data/spec/isodoc/postproc_spec.rb +441 -3
  34. data/spec/isodoc/presentation_xml_spec.rb +5 -5
  35. data/spec/isodoc/table_spec.rb +28 -0
  36. data/spec/isodoc/xref_spec.rb +455 -2
  37. metadata +9 -8
@@ -17,9 +17,10 @@ module IsoDoc
17
17
  def extract_person_affiliations(authors)
18
18
  authors.reduce([]) do |m, a|
19
19
  name = a&.at(ns('./affiliation/organization/name'))&.text
20
- location = a&.at(ns('./affiliation/organization/address/'\
21
- 'formattedAddress'))&.text
22
- m << (!name.nil? && !location.nil? ? "#{name}, #{location}" :
20
+ subdivs = a&.xpath(ns('./affiliation/organization/subdivision'))&.map(&:text)&.join(", ")
21
+ name and subdivs and !subdivs.empty? and name = l10n("#{name}, #{subdivs}", @lang, @script)
22
+ location = a&.at(ns('./affiliation/organization/address/formattedAddress'))&.text
23
+ m << (!name.nil? && !location.nil? ? l10n("#{name}, #{location}", @lang, @script) :
23
24
  (name || location || ''))
24
25
  m
25
26
  end
@@ -211,7 +211,11 @@ module IsoDoc
211
211
  def mathml1(f, locale)
212
212
  localize_maths(f, locale)
213
213
  return unless f.elements.size == 1 && f.elements.first.name == "mn"
214
- f.replace(f.at("./m:mn", MATHML).children)
214
+ if f.parent.name == "stem"
215
+ f.parent.replace(f.at("./m:mn", MATHML).children)
216
+ else
217
+ f.replace(f.at("./m:mn", MATHML).children)
218
+ end
215
219
  end
216
220
 
217
221
  def variant(docxml)
@@ -42,5 +42,14 @@ module IsoDoc
42
42
  lbl = @xrefs.get[f["id"]][:label] or return
43
43
  prefix_name(f, "", "#{lbl}#{clausedelim}", "name")
44
44
  end
45
+
46
+ def references(docxml)
47
+ end
48
+
49
+ def index(docxml)
50
+ docxml.xpath(ns("//index | //index-xref")).each do |f|
51
+ f.remove
52
+ end
53
+ end
45
54
  end
46
55
  end
@@ -30,6 +30,8 @@ module IsoDoc
30
30
  clause docxml
31
31
  annex docxml
32
32
  term docxml
33
+ references docxml
34
+ index docxml
33
35
  end
34
36
 
35
37
  def block(docxml)
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "1.3.0".freeze
2
+ VERSION = "1.5.0".freeze
3
3
  end
@@ -24,7 +24,7 @@ module IsoDoc::WordFunction
24
24
 
25
25
  def imgsrc(node)
26
26
  ret = svg_to_emf(node) and return ret
27
- return node["src"] unless %r{^data:image/}.match node["src"]
27
+ return node["src"] unless %r{^data:}.match node["src"]
28
28
  save_dataimage(node["src"])
29
29
  end
30
30
 
@@ -45,7 +45,7 @@ module IsoDoc::WordFunction
45
45
  def svg_to_emf(node)
46
46
  return unless node["mimetype"] == "image/svg+xml"
47
47
  uri = node["src"]
48
- %r{^data:image/}.match(uri) and uri = save_dataimage(uri)
48
+ %r{^data:}.match(uri) and uri = save_dataimage(uri)
49
49
  ret = svg_to_emf_filename(uri)
50
50
  File.exists?(ret) and return ret
51
51
  exe = inkscape_installed? or return nil
@@ -5,11 +5,9 @@ module IsoDoc::WordFunction
5
5
  module Postprocess
6
6
  # add namespaces for Word fragments
7
7
  WORD_NOKOHEAD = <<~HERE.freeze
8
- <!DOCTYPE html SYSTEM
9
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
8
+ <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
10
9
  <html xmlns="http://www.w3.org/1999/xhtml"
11
- xmlns:v="urn:schemas-microsoft-com:vml"
12
- xmlns:o="urn:schemas-microsoft-com:office:office"
10
+ xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"
13
11
  xmlns:w="urn:schemas-microsoft-com:office:word"
14
12
  xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
15
13
  <head> <title></title> <meta charset="UTF-8" /> </head>
@@ -18,15 +16,13 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
18
16
 
19
17
  def to_word_xhtml_fragment(xml)
20
18
  doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD)
21
- fragment = ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
22
- fragment
19
+ ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
23
20
  end
24
21
 
25
22
  def table_note_cleanup(docxml)
26
23
  super
27
24
  # preempt html2doc putting MsoNormal there
28
- docxml.xpath("//p[not(self::*[@class])]"\
29
- "[ancestor::*[@class = 'Note']]").each do |p|
25
+ docxml.xpath("//p[not(self::*[@class])][ancestor::*[@class = 'Note']]").each do |p|
30
26
  p["class"] = "Note"
31
27
  end
32
28
  end
@@ -56,8 +52,7 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
56
52
 
57
53
  def word_admonition_images(docxml)
58
54
  docxml.xpath("//div[@class = 'Admonition']//img").each do |i|
59
- i["width"], i["height"] =
60
- Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
55
+ i["width"], i["height"] = Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
61
56
  end
62
57
  end
63
58
 
@@ -65,6 +60,7 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
65
60
  word_annex_cleanup(docxml)
66
61
  word_preface(docxml)
67
62
  word_nested_tables(docxml)
63
+ word_colgroup(docxml)
68
64
  word_table_align(docxml)
69
65
  word_table_separator(docxml)
70
66
  word_admonition_images(docxml)
@@ -78,28 +74,44 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
78
74
  docxml
79
75
  end
80
76
 
81
- def word_nested_tables(docxml)
82
- docxml.xpath("//table").each do |t|
83
- t.xpath(".//table").reverse.each do |tt|
84
- t.next = tt.remove
77
+ def word_colgroup(docxml)
78
+ cells2d = {}
79
+ docxml.xpath("//table[colgroup]").each do |t|
80
+ w = colgroup_widths(t)
81
+ t.xpath(".//tr").each_with_index { |tr, r| cells2d[r] = {} }
82
+ t.xpath(".//tr").each_with_index do |tr, r|
83
+ tr.xpath("./td | ./th").each_with_index do |td, i|
84
+ x = 0
85
+ rs = td&.attr("rowspan")&.to_i || 1
86
+ cs = td&.attr("colspan")&.to_i || 1
87
+ while cells2d[r][x] do
88
+ x += 1
89
+ end
90
+ for y2 in r..(r + rs - 1)
91
+ for x2 in x..(x + cs - 1)
92
+ cells2d[y2][x2] = 1
93
+ end
94
+ end
95
+ width = (x..(x+cs-1)).each_with_object({width: 0}) { |z, m| m[:width] += w[z] }
96
+ td["width"] = "#{width[:width]}%"
97
+ x += cs
98
+ end
85
99
  end
86
100
  end
87
101
  end
88
102
 
89
- def authority_cleanup1(docxml, klass)
90
- dest = docxml.at("//div[@id = 'boilerplate-#{klass}-destination']")
91
- auth = docxml.at("//div[@id = 'boilerplate-#{klass}' or @class = 'boilerplate-#{klass}']")
92
- auth&.xpath(".//h1[not(text())] | .//h2[not(text())]")&.each { |h| h.remove }
93
- auth&.xpath(".//h1 | .//h2")&.each do |h|
94
- h.name = "p"
95
- h["class"] = "TitlePageSubhead"
103
+ # assume percentages
104
+ def colgroup_widths(t)
105
+ t.xpath("./colgroup/col").each_with_object([]) do |c, m|
106
+ m << c["width"].sub(/%$/, "").to_f
96
107
  end
97
- dest and auth and dest.replace(auth.remove)
98
108
  end
99
109
 
100
- def authority_cleanup(docxml)
101
- %w(copyright license legal feedback).each do |t|
102
- authority_cleanup1(docxml, t)
110
+ def word_nested_tables(docxml)
111
+ docxml.xpath("//table").each do |t|
112
+ t.xpath(".//table").reverse.each do |tt|
113
+ t.next = tt.remove
114
+ end
103
115
  end
104
116
  end
105
117
 
@@ -144,19 +156,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
144
156
  end
145
157
  end
146
158
 
147
- =begin
148
- EMPTY_PARA = "<p style='margin-top:0cm;margin-right:0cm;"\
149
- "margin-bottom:0cm;margin-left:0.0pt;margin-bottom:.0001pt;"\
150
- "line-height:1.0pt;mso-line-height-rule:exactly'>"\
151
- "<span lang=EN-GB style='display:none;mso-hide:all'>&nbsp;</span></p>"
152
-
153
- def table_after_table(docxml)
154
- docxml.xpath("//table[following-sibling::*[1]/self::table]").each do |t|
155
- t.add_next_sibling(EMPTY_PARA)
156
- end
157
- end
158
- =end
159
-
160
159
  def word_table_separator(docxml)
161
160
  docxml.xpath("//p[@class = 'TableTitle']").each do |t|
162
161
  next unless t.children.empty?
@@ -180,46 +179,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
180
179
  end
181
180
  end
182
181
 
183
- def generate_header(filename, _dir)
184
- return nil unless @header
185
- template = IsoDoc::Common.liquid(File.read(@header, encoding: "UTF-8"))
186
- meta = @meta.get.merge(@labels || {}).merge(@meta.labels || {})
187
- meta[:filename] = filename
188
- params = meta.map { |k, v| [k.to_s, v] }.to_h
189
- Tempfile.open(%w(header html), :encoding => "utf-8") do |f|
190
- f.write(template.render(params))
191
- f
192
- end
193
- end
194
-
195
- def word_section_breaks(docxml)
196
- @landscapestyle = ""
197
- word_section_breaks1(docxml, "WordSection2")
198
- word_section_breaks1(docxml, "WordSection3")
199
- word_remove_pb_before_annex(docxml)
200
- docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
201
- end
202
-
203
- def word_section_breaks1(docxml, sect)
204
- docxml.xpath("//div[@class = '#{sect}']//br[@orientation]").reverse.
205
- each_with_index do |br, i|
206
- @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\
207
- "#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
208
- split_at_section_break(docxml, sect, br, i)
209
- end
210
- end
211
-
212
- def split_at_section_break(docxml, sect, br, i)
213
- move = br.parent.xpath("following::node()") &
214
- br.document.xpath("//div[@class = '#{sect}']//*")
215
- ins = docxml.at("//div[@class = '#{sect}']").
216
- after("<div class='#{sect}_#{i}'/>").next_element
217
- move.each do |m|
218
- next if m.at("./ancestor::div[@class = '#{sect}_#{i}']")
219
- ins << m.remove
220
- end
221
- end
222
-
223
182
  # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
224
183
  def word_remove_pb_before_annex(docxml)
225
184
  docxml.xpath("//div[p/br]").each do |d|
@@ -237,8 +196,7 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
237
196
  docxml.xpath("//a[@epub:type = 'footnote']").each do |x|
238
197
  footnote_reference_format(x)
239
198
  end
240
- docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
241
- "//span[@class = 'TableFootnoteRef']").each do |x|
199
+ docxml.xpath("//a[@class = 'TableFootnoteRef'] | //span[@class = 'TableFootnoteRef']").each do |x|
242
200
  table_footnote_reference_format(x)
243
201
  end
244
202
  docxml
@@ -75,5 +75,60 @@ module IsoDoc::WordFunction
75
75
  toc.sub(/(<p class="MsoToc1">)/,
76
76
  %{\\1#{word_toc_preface(level)}}) + WORD_TOC_SUFFIX1
77
77
  end
78
+
79
+ def authority_cleanup1(docxml, klass)
80
+ dest = docxml.at("//div[@id = 'boilerplate-#{klass}-destination']")
81
+ auth = docxml.at("//div[@id = 'boilerplate-#{klass}' or @class = 'boilerplate-#{klass}']")
82
+ auth&.xpath(".//h1[not(text())] | .//h2[not(text())]")&.each { |h| h.remove }
83
+ auth&.xpath(".//h1 | .//h2")&.each do |h|
84
+ h.name = "p"
85
+ h["class"] = "TitlePageSubhead"
86
+ end
87
+ dest and auth and dest.replace(auth.remove)
88
+ end
89
+
90
+ def authority_cleanup(docxml)
91
+ %w(copyright license legal feedback).each do |t|
92
+ authority_cleanup1(docxml, t)
93
+ end
94
+ end
95
+
96
+ def generate_header(filename, _dir)
97
+ return nil unless @header
98
+ template = IsoDoc::Common.liquid(File.read(@header, encoding: "UTF-8"))
99
+ meta = @meta.get.merge(@labels ? { labels: @labels } : {}).merge(@meta.labels ? { labels: @meta.labels } : {})
100
+ meta[:filename] = filename
101
+ params = meta.map { |k, v| [k.to_s, v] }.to_h
102
+ Tempfile.open(%w(header html), :encoding => "utf-8") do |f|
103
+ f.write(template.render(params))
104
+ f
105
+ end
106
+ end
107
+
108
+ def word_section_breaks(docxml)
109
+ @landscapestyle = ""
110
+ word_section_breaks1(docxml, "WordSection2")
111
+ word_section_breaks1(docxml, "WordSection3")
112
+ word_remove_pb_before_annex(docxml)
113
+ docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
114
+ end
115
+
116
+ def word_section_breaks1(docxml, sect)
117
+ docxml.xpath("//div[@class = '#{sect}']//br[@orientation]").reverse.
118
+ each_with_index do |br, i|
119
+ @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
120
+ split_at_section_break(docxml, sect, br, i)
121
+ end
122
+ end
123
+
124
+ def split_at_section_break(docxml, sect, br, i)
125
+ move = br.parent.xpath("following::node()") &
126
+ br.document.xpath("//div[@class = '#{sect}']//*")
127
+ ins = docxml.at("//div[@class = '#{sect}']").after("<div class='#{sect}_#{i}'/>").next_element
128
+ move.each do |m|
129
+ next if m.at("./ancestor::div[@class = '#{sect}_#{i}']")
130
+ ins << m.remove
131
+ end
132
+ end
78
133
  end
79
134
  end
@@ -43,11 +43,21 @@ module IsoDoc::WordFunction
43
43
  }))
44
44
  end
45
45
 
46
+ def colgroup(node, t)
47
+ colgroup = node.at(ns("./colgroup")) or return
48
+ t.colgroup do |cg|
49
+ colgroup.xpath(ns("./col")).each do |c|
50
+ cg.col **{ width: c["width"] }
51
+ end
52
+ end
53
+ end
54
+
46
55
  def table_parse(node, out)
47
56
  @in_table = true
48
57
  table_title_parse(node, out)
49
58
  out.div **{ align: "center", class: "table_container" } do |div|
50
59
  div.table **table_attrs(node) do |t|
60
+ colgroup(node, t)
51
61
  thead_parse(node, t)
52
62
  tbody_parse(node, t)
53
63
  tfoot_parse(node, t)
@@ -49,6 +49,7 @@ module IsoDoc
49
49
  note_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
50
50
  example_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
51
51
  list_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
52
+ bookmark_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
52
53
  end
53
54
 
54
55
  def ns(xpath)
@@ -2,41 +2,56 @@ require "roman-numerals"
2
2
 
3
3
  module IsoDoc::XrefGen
4
4
  class Counter
5
- def initialize(num = 0)
5
+ def initialize(num = 0, opts = {numerals: :arabic})
6
6
  @num = num
7
7
  @letter = ""
8
8
  @subseq = ""
9
9
  @letter_override = nil
10
10
  @number_override = nil
11
+ @style = opts[:numerals]
11
12
  @base = ""
13
+ if num.is_a? String
14
+ if /^\d+$/.match(num)
15
+ @num = num.to_i
16
+ else
17
+ @num = nil
18
+ @base = num[0..-2]
19
+ @letter = num[-1]
20
+ end
21
+ end
12
22
  end
13
23
 
14
24
  def new_subseq_increment(node)
15
25
  @subseq = node["subsequence"]
16
- @num += 1
26
+ @num += 1 unless @num.nil?
17
27
  @letter = node["subsequence"] ? "a" : ""
18
28
  @base = ""
19
29
  if node["number"]
20
- /^(?<b>.*?)(?<n>\d*)(?<a>[a-z]*)$/ =~ node["number"]
30
+ /^(?<b>.*?)(?<n>\d*)(?<a>[a-zA-Z]*)$/ =~ node["number"]
21
31
  if !n.empty? || !a.empty?
22
32
  @letter_override = @letter = a unless a.empty?
23
33
  @number_override = @num = n.to_i unless n.empty?
24
34
  @base = b
25
35
  else
26
36
  @letter_override = node["number"]
27
- @letter = @letter_override if /^[a-z]$/.match(@letter_override)
37
+ @letter = @letter_override if /^[a-zA-Z]$/.match(@letter_override)
28
38
  end
29
39
  end
30
40
  end
31
41
 
32
42
  def sequence_increment(node)
33
43
  if node["number"]
34
- @base = ""
35
- @number_override = node["number"]
44
+ @base = @letter_override = @number_override = ""
36
45
  /^(?<b>.*?)(?<n>\d+)$/ =~ node["number"]
37
- unless n.nil? || n.empty?
46
+ if blank?(n)
47
+ @num = nil
48
+ @base = node["number"][0..-2]
49
+ @letter = @letter_override = node["number"][-1]
50
+ else
51
+ @number_override = node["number"]
38
52
  @num = n.to_i
39
53
  @base = b
54
+ @letter = ""
40
55
  end
41
56
  else
42
57
  @num += 1
@@ -47,9 +62,20 @@ module IsoDoc::XrefGen
47
62
  if node["number"]
48
63
  @base = ""
49
64
  @letter_override = node["number"]
50
- /^(?<b>.*?)(?<n>\d*)(?<a>[a-z]+)$/ =~ node["number"]
51
- unless a.empty?
52
- @letter = a
65
+ /^(?<b>.*?)(?<n>\d*)(?<a>[a-zA-Z])$/ =~ node["number"]
66
+ if blank?(a)
67
+ if /^\d+$/.match(node["number"])
68
+ @letter_override = @letter = ""
69
+ @number_override = @num = node["number"].to_i
70
+ else
71
+ /^(?<b>.*)(?<a>[a-zA-Z])$/ =~ node["number"]
72
+ unless blank?(a)
73
+ @letter = @letter_override = a
74
+ @base = b
75
+ end
76
+ end
77
+ else
78
+ @letter_override = @letter = a
53
79
  @base = b
54
80
  @number_override = @num = n.to_i unless n.empty?
55
81
  end
@@ -58,11 +84,15 @@ module IsoDoc::XrefGen
58
84
  end
59
85
  end
60
86
 
87
+ def blank?(x)
88
+ x.nil? || x.empty?
89
+ end
90
+
61
91
  def increment(node)
62
92
  return self if node["unnumbered"]
63
93
  @letter_override = nil
64
94
  @number_override = nil
65
- if node["subsequence"] != @subseq
95
+ if node["subsequence"] != @subseq && !(blank?(node["subsequence"]) && blank?(@subseq))
66
96
  new_subseq_increment(node)
67
97
  elsif @letter.empty?
68
98
  sequence_increment(node)
@@ -73,7 +103,9 @@ module IsoDoc::XrefGen
73
103
  end
74
104
 
75
105
  def print
76
- "#{@base}#{@number_override || @num}#{@letter_override || @letter}"
106
+ num = @number_override || @num
107
+ num_out = @style == :roman && !num.nil? ? RomanNumerals.to_roman(num) : num
108
+ "#{@base}#{num_out}#{@letter_override || @letter}"
77
109
  end
78
110
 
79
111
  def ol_type(list, depth)