isodoc 1.3.0 → 1.5.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (37) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/rake.yml +14 -0
  3. data/isodoc.gemspec +2 -2
  4. data/lib/isodoc-yaml/i18n-en.yaml +55 -0
  5. data/lib/isodoc-yaml/i18n-fr.yaml +56 -0
  6. data/lib/isodoc/convert.rb +2 -1
  7. data/lib/isodoc/function/inline_simple.rb +10 -1
  8. data/lib/isodoc/function/section.rb +1 -1
  9. data/lib/isodoc/function/table.rb +10 -0
  10. data/lib/isodoc/function/to_word_html.rb +4 -2
  11. data/lib/isodoc/function/utils.rb +4 -4
  12. data/lib/isodoc/gem_tasks.rb +4 -0
  13. data/lib/isodoc/html_function/html.rb +7 -0
  14. data/lib/isodoc/html_function/mathvariant_to_plain.rb +82 -0
  15. data/lib/isodoc/html_function/postprocess.rb +32 -19
  16. data/lib/isodoc/metadata_contributor.rb +4 -3
  17. data/lib/isodoc/presentation_function/inline.rb +5 -1
  18. data/lib/isodoc/presentation_function/section.rb +9 -0
  19. data/lib/isodoc/presentation_xml_convert.rb +2 -0
  20. data/lib/isodoc/version.rb +1 -1
  21. data/lib/isodoc/word_function/inline.rb +2 -2
  22. data/lib/isodoc/word_function/postprocess.rb +38 -80
  23. data/lib/isodoc/word_function/postprocess_cover.rb +55 -0
  24. data/lib/isodoc/word_function/table.rb +10 -0
  25. data/lib/isodoc/xref.rb +1 -0
  26. data/lib/isodoc/xref/xref_counter.rb +44 -12
  27. data/lib/isodoc/xref/xref_gen.rb +18 -0
  28. data/lib/isodoc/xref/xref_sect_gen.rb +34 -27
  29. data/spec/isodoc/blocks_spec.rb +26 -73
  30. data/spec/isodoc/cleanup_spec.rb +0 -1
  31. data/spec/isodoc/inline_spec.rb +14 -14
  32. data/spec/isodoc/metadata_spec.rb +3 -1
  33. data/spec/isodoc/postproc_spec.rb +441 -3
  34. data/spec/isodoc/presentation_xml_spec.rb +5 -5
  35. data/spec/isodoc/table_spec.rb +28 -0
  36. data/spec/isodoc/xref_spec.rb +455 -2
  37. metadata +9 -8
@@ -17,9 +17,10 @@ module IsoDoc
17
17
  def extract_person_affiliations(authors)
18
18
  authors.reduce([]) do |m, a|
19
19
  name = a&.at(ns('./affiliation/organization/name'))&.text
20
- location = a&.at(ns('./affiliation/organization/address/'\
21
- 'formattedAddress'))&.text
22
- m << (!name.nil? && !location.nil? ? "#{name}, #{location}" :
20
+ subdivs = a&.xpath(ns('./affiliation/organization/subdivision'))&.map(&:text)&.join(", ")
21
+ name and subdivs and !subdivs.empty? and name = l10n("#{name}, #{subdivs}", @lang, @script)
22
+ location = a&.at(ns('./affiliation/organization/address/formattedAddress'))&.text
23
+ m << (!name.nil? && !location.nil? ? l10n("#{name}, #{location}", @lang, @script) :
23
24
  (name || location || ''))
24
25
  m
25
26
  end
@@ -211,7 +211,11 @@ module IsoDoc
211
211
  def mathml1(f, locale)
212
212
  localize_maths(f, locale)
213
213
  return unless f.elements.size == 1 && f.elements.first.name == "mn"
214
- f.replace(f.at("./m:mn", MATHML).children)
214
+ if f.parent.name == "stem"
215
+ f.parent.replace(f.at("./m:mn", MATHML).children)
216
+ else
217
+ f.replace(f.at("./m:mn", MATHML).children)
218
+ end
215
219
  end
216
220
 
217
221
  def variant(docxml)
@@ -42,5 +42,14 @@ module IsoDoc
42
42
  lbl = @xrefs.get[f["id"]][:label] or return
43
43
  prefix_name(f, "", "#{lbl}#{clausedelim}", "name")
44
44
  end
45
+
46
+ def references(docxml)
47
+ end
48
+
49
+ def index(docxml)
50
+ docxml.xpath(ns("//index | //index-xref")).each do |f|
51
+ f.remove
52
+ end
53
+ end
45
54
  end
46
55
  end
@@ -30,6 +30,8 @@ module IsoDoc
30
30
  clause docxml
31
31
  annex docxml
32
32
  term docxml
33
+ references docxml
34
+ index docxml
33
35
  end
34
36
 
35
37
  def block(docxml)
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "1.3.0".freeze
2
+ VERSION = "1.5.0".freeze
3
3
  end
@@ -24,7 +24,7 @@ module IsoDoc::WordFunction
24
24
 
25
25
  def imgsrc(node)
26
26
  ret = svg_to_emf(node) and return ret
27
- return node["src"] unless %r{^data:image/}.match node["src"]
27
+ return node["src"] unless %r{^data:}.match node["src"]
28
28
  save_dataimage(node["src"])
29
29
  end
30
30
 
@@ -45,7 +45,7 @@ module IsoDoc::WordFunction
45
45
  def svg_to_emf(node)
46
46
  return unless node["mimetype"] == "image/svg+xml"
47
47
  uri = node["src"]
48
- %r{^data:image/}.match(uri) and uri = save_dataimage(uri)
48
+ %r{^data:}.match(uri) and uri = save_dataimage(uri)
49
49
  ret = svg_to_emf_filename(uri)
50
50
  File.exists?(ret) and return ret
51
51
  exe = inkscape_installed? or return nil
@@ -5,11 +5,9 @@ module IsoDoc::WordFunction
5
5
  module Postprocess
6
6
  # add namespaces for Word fragments
7
7
  WORD_NOKOHEAD = <<~HERE.freeze
8
- <!DOCTYPE html SYSTEM
9
- "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
8
+ <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
10
9
  <html xmlns="http://www.w3.org/1999/xhtml"
11
- xmlns:v="urn:schemas-microsoft-com:vml"
12
- xmlns:o="urn:schemas-microsoft-com:office:office"
10
+ xmlns:v="urn:schemas-microsoft-com:vml" xmlns:o="urn:schemas-microsoft-com:office:office"
13
11
  xmlns:w="urn:schemas-microsoft-com:office:word"
14
12
  xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
15
13
  <head> <title></title> <meta charset="UTF-8" /> </head>
@@ -18,15 +16,13 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
18
16
 
19
17
  def to_word_xhtml_fragment(xml)
20
18
  doc = ::Nokogiri::XML.parse(WORD_NOKOHEAD)
21
- fragment = ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
22
- fragment
19
+ ::Nokogiri::XML::DocumentFragment.new(doc, xml, doc.root)
23
20
  end
24
21
 
25
22
  def table_note_cleanup(docxml)
26
23
  super
27
24
  # preempt html2doc putting MsoNormal there
28
- docxml.xpath("//p[not(self::*[@class])]"\
29
- "[ancestor::*[@class = 'Note']]").each do |p|
25
+ docxml.xpath("//p[not(self::*[@class])][ancestor::*[@class = 'Note']]").each do |p|
30
26
  p["class"] = "Note"
31
27
  end
32
28
  end
@@ -56,8 +52,7 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
56
52
 
57
53
  def word_admonition_images(docxml)
58
54
  docxml.xpath("//div[@class = 'Admonition']//img").each do |i|
59
- i["width"], i["height"] =
60
- Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
55
+ i["width"], i["height"] = Html2Doc.image_resize(i, image_localfile(i), @maxheight, 300)
61
56
  end
62
57
  end
63
58
 
@@ -65,6 +60,7 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
65
60
  word_annex_cleanup(docxml)
66
61
  word_preface(docxml)
67
62
  word_nested_tables(docxml)
63
+ word_colgroup(docxml)
68
64
  word_table_align(docxml)
69
65
  word_table_separator(docxml)
70
66
  word_admonition_images(docxml)
@@ -78,28 +74,44 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
78
74
  docxml
79
75
  end
80
76
 
81
- def word_nested_tables(docxml)
82
- docxml.xpath("//table").each do |t|
83
- t.xpath(".//table").reverse.each do |tt|
84
- t.next = tt.remove
77
+ def word_colgroup(docxml)
78
+ cells2d = {}
79
+ docxml.xpath("//table[colgroup]").each do |t|
80
+ w = colgroup_widths(t)
81
+ t.xpath(".//tr").each_with_index { |tr, r| cells2d[r] = {} }
82
+ t.xpath(".//tr").each_with_index do |tr, r|
83
+ tr.xpath("./td | ./th").each_with_index do |td, i|
84
+ x = 0
85
+ rs = td&.attr("rowspan")&.to_i || 1
86
+ cs = td&.attr("colspan")&.to_i || 1
87
+ while cells2d[r][x] do
88
+ x += 1
89
+ end
90
+ for y2 in r..(r + rs - 1)
91
+ for x2 in x..(x + cs - 1)
92
+ cells2d[y2][x2] = 1
93
+ end
94
+ end
95
+ width = (x..(x+cs-1)).each_with_object({width: 0}) { |z, m| m[:width] += w[z] }
96
+ td["width"] = "#{width[:width]}%"
97
+ x += cs
98
+ end
85
99
  end
86
100
  end
87
101
  end
88
102
 
89
- def authority_cleanup1(docxml, klass)
90
- dest = docxml.at("//div[@id = 'boilerplate-#{klass}-destination']")
91
- auth = docxml.at("//div[@id = 'boilerplate-#{klass}' or @class = 'boilerplate-#{klass}']")
92
- auth&.xpath(".//h1[not(text())] | .//h2[not(text())]")&.each { |h| h.remove }
93
- auth&.xpath(".//h1 | .//h2")&.each do |h|
94
- h.name = "p"
95
- h["class"] = "TitlePageSubhead"
103
+ # assume percentages
104
+ def colgroup_widths(t)
105
+ t.xpath("./colgroup/col").each_with_object([]) do |c, m|
106
+ m << c["width"].sub(/%$/, "").to_f
96
107
  end
97
- dest and auth and dest.replace(auth.remove)
98
108
  end
99
109
 
100
- def authority_cleanup(docxml)
101
- %w(copyright license legal feedback).each do |t|
102
- authority_cleanup1(docxml, t)
110
+ def word_nested_tables(docxml)
111
+ docxml.xpath("//table").each do |t|
112
+ t.xpath(".//table").reverse.each do |tt|
113
+ t.next = tt.remove
114
+ end
103
115
  end
104
116
  end
105
117
 
@@ -144,19 +156,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
144
156
  end
145
157
  end
146
158
 
147
- =begin
148
- EMPTY_PARA = "<p style='margin-top:0cm;margin-right:0cm;"\
149
- "margin-bottom:0cm;margin-left:0.0pt;margin-bottom:.0001pt;"\
150
- "line-height:1.0pt;mso-line-height-rule:exactly'>"\
151
- "<span lang=EN-GB style='display:none;mso-hide:all'>&nbsp;</span></p>"
152
-
153
- def table_after_table(docxml)
154
- docxml.xpath("//table[following-sibling::*[1]/self::table]").each do |t|
155
- t.add_next_sibling(EMPTY_PARA)
156
- end
157
- end
158
- =end
159
-
160
159
  def word_table_separator(docxml)
161
160
  docxml.xpath("//p[@class = 'TableTitle']").each do |t|
162
161
  next unless t.children.empty?
@@ -180,46 +179,6 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
180
179
  end
181
180
  end
182
181
 
183
- def generate_header(filename, _dir)
184
- return nil unless @header
185
- template = IsoDoc::Common.liquid(File.read(@header, encoding: "UTF-8"))
186
- meta = @meta.get.merge(@labels || {}).merge(@meta.labels || {})
187
- meta[:filename] = filename
188
- params = meta.map { |k, v| [k.to_s, v] }.to_h
189
- Tempfile.open(%w(header html), :encoding => "utf-8") do |f|
190
- f.write(template.render(params))
191
- f
192
- end
193
- end
194
-
195
- def word_section_breaks(docxml)
196
- @landscapestyle = ""
197
- word_section_breaks1(docxml, "WordSection2")
198
- word_section_breaks1(docxml, "WordSection3")
199
- word_remove_pb_before_annex(docxml)
200
- docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
201
- end
202
-
203
- def word_section_breaks1(docxml, sect)
204
- docxml.xpath("//div[@class = '#{sect}']//br[@orientation]").reverse.
205
- each_with_index do |br, i|
206
- @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}"\
207
- "#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
208
- split_at_section_break(docxml, sect, br, i)
209
- end
210
- end
211
-
212
- def split_at_section_break(docxml, sect, br, i)
213
- move = br.parent.xpath("following::node()") &
214
- br.document.xpath("//div[@class = '#{sect}']//*")
215
- ins = docxml.at("//div[@class = '#{sect}']").
216
- after("<div class='#{sect}_#{i}'/>").next_element
217
- move.each do |m|
218
- next if m.at("./ancestor::div[@class = '#{sect}_#{i}']")
219
- ins << m.remove
220
- end
221
- end
222
-
223
182
  # applies for <div class="WordSectionN_M"><p><pagebreak/></p>...
224
183
  def word_remove_pb_before_annex(docxml)
225
184
  docxml.xpath("//div[p/br]").each do |d|
@@ -237,8 +196,7 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
237
196
  docxml.xpath("//a[@epub:type = 'footnote']").each do |x|
238
197
  footnote_reference_format(x)
239
198
  end
240
- docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
241
- "//span[@class = 'TableFootnoteRef']").each do |x|
199
+ docxml.xpath("//a[@class = 'TableFootnoteRef'] | //span[@class = 'TableFootnoteRef']").each do |x|
242
200
  table_footnote_reference_format(x)
243
201
  end
244
202
  docxml
@@ -75,5 +75,60 @@ module IsoDoc::WordFunction
75
75
  toc.sub(/(<p class="MsoToc1">)/,
76
76
  %{\\1#{word_toc_preface(level)}}) + WORD_TOC_SUFFIX1
77
77
  end
78
+
79
+ def authority_cleanup1(docxml, klass)
80
+ dest = docxml.at("//div[@id = 'boilerplate-#{klass}-destination']")
81
+ auth = docxml.at("//div[@id = 'boilerplate-#{klass}' or @class = 'boilerplate-#{klass}']")
82
+ auth&.xpath(".//h1[not(text())] | .//h2[not(text())]")&.each { |h| h.remove }
83
+ auth&.xpath(".//h1 | .//h2")&.each do |h|
84
+ h.name = "p"
85
+ h["class"] = "TitlePageSubhead"
86
+ end
87
+ dest and auth and dest.replace(auth.remove)
88
+ end
89
+
90
+ def authority_cleanup(docxml)
91
+ %w(copyright license legal feedback).each do |t|
92
+ authority_cleanup1(docxml, t)
93
+ end
94
+ end
95
+
96
+ def generate_header(filename, _dir)
97
+ return nil unless @header
98
+ template = IsoDoc::Common.liquid(File.read(@header, encoding: "UTF-8"))
99
+ meta = @meta.get.merge(@labels ? { labels: @labels } : {}).merge(@meta.labels ? { labels: @meta.labels } : {})
100
+ meta[:filename] = filename
101
+ params = meta.map { |k, v| [k.to_s, v] }.to_h
102
+ Tempfile.open(%w(header html), :encoding => "utf-8") do |f|
103
+ f.write(template.render(params))
104
+ f
105
+ end
106
+ end
107
+
108
+ def word_section_breaks(docxml)
109
+ @landscapestyle = ""
110
+ word_section_breaks1(docxml, "WordSection2")
111
+ word_section_breaks1(docxml, "WordSection3")
112
+ word_remove_pb_before_annex(docxml)
113
+ docxml.xpath("//br[@orientation]").each { |br| br.delete("orientation") }
114
+ end
115
+
116
+ def word_section_breaks1(docxml, sect)
117
+ docxml.xpath("//div[@class = '#{sect}']//br[@orientation]").reverse.
118
+ each_with_index do |br, i|
119
+ @landscapestyle += "\ndiv.#{sect}_#{i} {page:#{sect}#{br["orientation"] == "landscape" ? "L" : "P"};}\n"
120
+ split_at_section_break(docxml, sect, br, i)
121
+ end
122
+ end
123
+
124
+ def split_at_section_break(docxml, sect, br, i)
125
+ move = br.parent.xpath("following::node()") &
126
+ br.document.xpath("//div[@class = '#{sect}']//*")
127
+ ins = docxml.at("//div[@class = '#{sect}']").after("<div class='#{sect}_#{i}'/>").next_element
128
+ move.each do |m|
129
+ next if m.at("./ancestor::div[@class = '#{sect}_#{i}']")
130
+ ins << m.remove
131
+ end
132
+ end
78
133
  end
79
134
  end
@@ -43,11 +43,21 @@ module IsoDoc::WordFunction
43
43
  }))
44
44
  end
45
45
 
46
+ def colgroup(node, t)
47
+ colgroup = node.at(ns("./colgroup")) or return
48
+ t.colgroup do |cg|
49
+ colgroup.xpath(ns("./col")).each do |c|
50
+ cg.col **{ width: c["width"] }
51
+ end
52
+ end
53
+ end
54
+
46
55
  def table_parse(node, out)
47
56
  @in_table = true
48
57
  table_title_parse(node, out)
49
58
  out.div **{ align: "center", class: "table_container" } do |div|
50
59
  div.table **table_attrs(node) do |t|
60
+ colgroup(node, t)
51
61
  thead_parse(node, t)
52
62
  tbody_parse(node, t)
53
63
  tfoot_parse(node, t)
@@ -49,6 +49,7 @@ module IsoDoc
49
49
  note_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
50
50
  example_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
51
51
  list_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
52
+ bookmark_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
52
53
  end
53
54
 
54
55
  def ns(xpath)
@@ -2,41 +2,56 @@ require "roman-numerals"
2
2
 
3
3
  module IsoDoc::XrefGen
4
4
  class Counter
5
- def initialize(num = 0)
5
+ def initialize(num = 0, opts = {numerals: :arabic})
6
6
  @num = num
7
7
  @letter = ""
8
8
  @subseq = ""
9
9
  @letter_override = nil
10
10
  @number_override = nil
11
+ @style = opts[:numerals]
11
12
  @base = ""
13
+ if num.is_a? String
14
+ if /^\d+$/.match(num)
15
+ @num = num.to_i
16
+ else
17
+ @num = nil
18
+ @base = num[0..-2]
19
+ @letter = num[-1]
20
+ end
21
+ end
12
22
  end
13
23
 
14
24
  def new_subseq_increment(node)
15
25
  @subseq = node["subsequence"]
16
- @num += 1
26
+ @num += 1 unless @num.nil?
17
27
  @letter = node["subsequence"] ? "a" : ""
18
28
  @base = ""
19
29
  if node["number"]
20
- /^(?<b>.*?)(?<n>\d*)(?<a>[a-z]*)$/ =~ node["number"]
30
+ /^(?<b>.*?)(?<n>\d*)(?<a>[a-zA-Z]*)$/ =~ node["number"]
21
31
  if !n.empty? || !a.empty?
22
32
  @letter_override = @letter = a unless a.empty?
23
33
  @number_override = @num = n.to_i unless n.empty?
24
34
  @base = b
25
35
  else
26
36
  @letter_override = node["number"]
27
- @letter = @letter_override if /^[a-z]$/.match(@letter_override)
37
+ @letter = @letter_override if /^[a-zA-Z]$/.match(@letter_override)
28
38
  end
29
39
  end
30
40
  end
31
41
 
32
42
  def sequence_increment(node)
33
43
  if node["number"]
34
- @base = ""
35
- @number_override = node["number"]
44
+ @base = @letter_override = @number_override = ""
36
45
  /^(?<b>.*?)(?<n>\d+)$/ =~ node["number"]
37
- unless n.nil? || n.empty?
46
+ if blank?(n)
47
+ @num = nil
48
+ @base = node["number"][0..-2]
49
+ @letter = @letter_override = node["number"][-1]
50
+ else
51
+ @number_override = node["number"]
38
52
  @num = n.to_i
39
53
  @base = b
54
+ @letter = ""
40
55
  end
41
56
  else
42
57
  @num += 1
@@ -47,9 +62,20 @@ module IsoDoc::XrefGen
47
62
  if node["number"]
48
63
  @base = ""
49
64
  @letter_override = node["number"]
50
- /^(?<b>.*?)(?<n>\d*)(?<a>[a-z]+)$/ =~ node["number"]
51
- unless a.empty?
52
- @letter = a
65
+ /^(?<b>.*?)(?<n>\d*)(?<a>[a-zA-Z])$/ =~ node["number"]
66
+ if blank?(a)
67
+ if /^\d+$/.match(node["number"])
68
+ @letter_override = @letter = ""
69
+ @number_override = @num = node["number"].to_i
70
+ else
71
+ /^(?<b>.*)(?<a>[a-zA-Z])$/ =~ node["number"]
72
+ unless blank?(a)
73
+ @letter = @letter_override = a
74
+ @base = b
75
+ end
76
+ end
77
+ else
78
+ @letter_override = @letter = a
53
79
  @base = b
54
80
  @number_override = @num = n.to_i unless n.empty?
55
81
  end
@@ -58,11 +84,15 @@ module IsoDoc::XrefGen
58
84
  end
59
85
  end
60
86
 
87
+ def blank?(x)
88
+ x.nil? || x.empty?
89
+ end
90
+
61
91
  def increment(node)
62
92
  return self if node["unnumbered"]
63
93
  @letter_override = nil
64
94
  @number_override = nil
65
- if node["subsequence"] != @subseq
95
+ if node["subsequence"] != @subseq && !(blank?(node["subsequence"]) && blank?(@subseq))
66
96
  new_subseq_increment(node)
67
97
  elsif @letter.empty?
68
98
  sequence_increment(node)
@@ -73,7 +103,9 @@ module IsoDoc::XrefGen
73
103
  end
74
104
 
75
105
  def print
76
- "#{@base}#{@number_override || @num}#{@letter_override || @letter}"
106
+ num = @number_override || @num
107
+ num_out = @style == :roman && !num.nil? ? RomanNumerals.to_roman(num) : num
108
+ "#{@base}#{num_out}#{@letter_override || @letter}"
77
109
  end
78
110
 
79
111
  def ol_type(list, depth)