isodoc 1.0.26 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/macos.yml +12 -8
  3. data/.github/workflows/ubuntu.yml +26 -16
  4. data/.github/workflows/windows.yml +12 -8
  5. data/isodoc.gemspec +2 -2
  6. data/lib/isodoc.rb +2 -0
  7. data/lib/isodoc/common.rb +0 -4
  8. data/lib/isodoc/convert.rb +18 -8
  9. data/lib/isodoc/function/blocks.rb +43 -54
  10. data/lib/isodoc/function/blocks_example_note.rb +108 -0
  11. data/lib/isodoc/function/cleanup.rb +14 -2
  12. data/lib/isodoc/function/i18n.rb +1 -0
  13. data/lib/isodoc/function/inline.rb +76 -82
  14. data/lib/isodoc/function/inline_simple.rb +72 -0
  15. data/lib/isodoc/function/lists.rb +12 -6
  16. data/lib/isodoc/function/references.rb +65 -57
  17. data/lib/isodoc/function/reqt.rb +14 -5
  18. data/lib/isodoc/function/section.rb +8 -11
  19. data/lib/isodoc/function/table.rb +4 -5
  20. data/lib/isodoc/function/terms.rb +3 -3
  21. data/lib/isodoc/function/to_word_html.rb +22 -13
  22. data/lib/isodoc/function/utils.rb +9 -3
  23. data/lib/isodoc/headlesshtml_convert.rb +7 -6
  24. data/lib/isodoc/html_convert.rb +2 -1
  25. data/lib/isodoc/html_function/footnotes.rb +1 -1
  26. data/lib/isodoc/html_function/html.rb +16 -1
  27. data/lib/isodoc/html_function/postprocess.rb +6 -5
  28. data/lib/isodoc/metadata.rb +6 -0
  29. data/lib/isodoc/pdf_convert.rb +8 -6
  30. data/lib/isodoc/presentation_xml_convert.rb +29 -0
  31. data/lib/isodoc/version.rb +1 -1
  32. data/lib/isodoc/word_convert.rb +2 -1
  33. data/lib/isodoc/word_function/body.rb +14 -48
  34. data/lib/isodoc/word_function/footnotes.rb +1 -1
  35. data/lib/isodoc/word_function/inline.rb +75 -0
  36. data/lib/isodoc/word_function/postprocess.rb +1 -0
  37. data/lib/isodoc/word_function/table.rb +3 -3
  38. data/lib/isodoc/xref.rb +59 -0
  39. data/lib/isodoc/{function → xref}/xref_anchor.rb +10 -21
  40. data/lib/isodoc/xref/xref_counter.rb +74 -0
  41. data/lib/isodoc/{function → xref}/xref_gen.rb +9 -22
  42. data/lib/isodoc/{function → xref}/xref_gen_seq.rb +41 -32
  43. data/lib/isodoc/{function → xref}/xref_sect_gen.rb +33 -23
  44. data/lib/isodoc/xslfo_convert.rb +16 -4
  45. data/spec/assets/i18n.yaml +4 -1
  46. data/spec/assets/odf.emf +0 -0
  47. data/spec/assets/odf.svg +4 -0
  48. data/spec/assets/odf1.svg +4 -0
  49. data/spec/isodoc/blocks_spec.rb +240 -59
  50. data/spec/isodoc/cleanup_spec.rb +139 -17
  51. data/spec/isodoc/footnotes_spec.rb +20 -5
  52. data/spec/isodoc/inline_spec.rb +296 -1
  53. data/spec/isodoc/lists_spec.rb +8 -8
  54. data/spec/isodoc/metadata_spec.rb +110 -3
  55. data/spec/isodoc/postproc_spec.rb +10 -14
  56. data/spec/isodoc/presentation_xml_spec.rb +20 -0
  57. data/spec/isodoc/ref_spec.rb +119 -50
  58. data/spec/isodoc/section_spec.rb +84 -18
  59. data/spec/isodoc/table_spec.rb +28 -28
  60. data/spec/isodoc/terms_spec.rb +7 -7
  61. data/spec/isodoc/xref_spec.rb +177 -57
  62. metadata +24 -17
  63. data/lib/isodoc/function/blocks_example.rb +0 -53
  64. data/lib/isodoc/function/xref_counter.rb +0 -50
@@ -49,7 +49,12 @@ module IsoDoc::Function
49
49
  /^(\&[^ \t\r\n#;]+;)/.match(t) ?
50
50
  HTMLEntities.new.encode(HTMLEntities.new.decode(t), :hexadecimal) : t
51
51
  end.join("")
52
- Nokogiri::XML.parse(xml)
52
+ begin
53
+ Nokogiri::XML.parse(xml) { |config| config.strict }
54
+ rescue Nokogiri::XML::SyntaxError => e
55
+ File.open("#{@filename}.#{@format}.err", "w:UTF-8") { |f| f.write xml }
56
+ abort "Malformed Output XML for #{@format}: #{e} (see #{@filename}.#{@format}.err)"
57
+ end
53
58
  end
54
59
 
55
60
  def to_xhtml_fragment(xml)
@@ -116,7 +121,7 @@ module IsoDoc::Function
116
121
  h1.traverse do |x|
117
122
  x.replace(" ") if x.name == "span" && /mso-tab-count/.match(x["style"])
118
123
  x.remove if x.name == "span" && x["class"] == "MsoCommentReference"
119
- x.remove if x.name == "a" && x["epub:type"] == "footnote"
124
+ x.remove if x.name == "a" && x["class"] == "FootnoteRef"
120
125
  x.remove if x.name == "span" && /mso-bookmark/.match(x["style"])
121
126
  x.replace(x.children) if x.name == "a"
122
127
  end
@@ -142,7 +147,7 @@ module IsoDoc::Function
142
147
  end
143
148
 
144
149
  def populate_template(docxml, _format = nil)
145
- meta = @meta.get.merge(@labels || {})
150
+ meta = @meta.get.merge(@labels || {}).merge(@meta.labels || {})
146
151
  template = liquid(docxml)
147
152
  template.render(meta.map { |k, v| [k.to_s, empty2nil(v)] }.to_h).
148
153
  gsub('<', '<').gsub('>', '>').gsub('&', '&')
@@ -150,6 +155,7 @@ module IsoDoc::Function
150
155
 
151
156
  def save_dataimage(uri, relative_dir = true)
152
157
  %r{^data:image/(?<imgtype>[^;]+);base64,(?<imgdata>.+)$} =~ uri
158
+ imgtype.sub!(/\+[a-z0-9]+$/, "") # svg+xml
153
159
  imgtype = "png" unless /^[a-z0-9]+$/.match imgtype
154
160
  Tempfile.open(["image", ".#{imgtype}"]) do |f|
155
161
  f.binmode
@@ -16,18 +16,19 @@ module IsoDoc
16
16
 
17
17
  def initialize(options)
18
18
  @format = :html
19
+ @suffix = "headless.html"
19
20
  super
20
21
  end
21
22
 
22
- def convert(filename, file = nil, debug = false)
23
- file = File.read(filename, encoding: "utf-8") if file.nil?
23
+ def convert(input_filename, file = nil, debug = false, output_filename = nil)
24
+ file = File.read(input_filename, encoding: "utf-8") if file.nil?
24
25
  @openmathdelim, @closemathdelim = extract_delims(file)
25
- docxml, outname_html, dir = convert_init(file, filename, debug)
26
- result = convert1(docxml, outname_html, dir)
26
+ docxml, filename, dir = convert_init(file, input_filename, debug)
27
+ result = convert1(docxml, filename, dir)
27
28
  return result if debug
28
- postprocess(result, filename + ".tmp", dir)
29
+ postprocess(result, filename + ".tmp.html", dir)
29
30
  FileUtils.rm_rf dir
30
- strip_head(filename + ".tmp.html", outname_html + ".headless.html")
31
+ strip_head(filename + ".tmp.html", output_filename || "#{filename}.#{@suffix}")
31
32
  FileUtils.rm_rf ["#{filename}.tmp.html", tmpimagedir]
32
33
  end
33
34
 
@@ -16,10 +16,11 @@ module IsoDoc
16
16
 
17
17
  def initialize(options)
18
18
  @format = :html
19
+ @suffix = "html"
19
20
  super
20
21
  end
21
22
 
22
- def convert(filename, file = nil, debug = false)
23
+ def convert(filename, file = nil, debug = false, output_filename = nil)
23
24
  ret = super
24
25
  Dir.exists?(tmpimagedir) and Dir["#{tmpimagedir}/*"].empty? and
25
26
  FileUtils.rm_r tmpimagedir
@@ -65,7 +65,7 @@ module IsoDoc::HtmlFunction
65
65
  return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
66
66
  !node.ancestors.map {|m| m.name }.include?("name")
67
67
  fn = node["reference"] || UUIDTools::UUID.random_create.to_s
68
- attrs = { "epub:type": "footnote", rel: "footnote", href: "#fn:#{fn}" }
68
+ attrs = { class: "FootnoteRef", href: "#fn:#{fn}" }
69
69
  out.a **attrs do |a|
70
70
  a.sup { |sup| sup << fn }
71
71
  end
@@ -3,6 +3,18 @@ require "base64"
3
3
 
4
4
  module IsoDoc::HtmlFunction
5
5
  module Html
6
+ def convert1(docxml, filename, dir)
7
+ @xrefs.parse docxml
8
+ noko do |xml|
9
+ xml.html **{ lang: "#{@lang}" } do |html|
10
+ info docxml, nil
11
+ populate_css()
12
+ html.head { |head| define_head head, filename, dir }
13
+ make_body(html, docxml)
14
+ end
15
+ end.join("\n")
16
+ end
17
+
6
18
  def make_body1(body, _docxml)
7
19
  body.div **{ class: "title-section" } do |div1|
8
20
  div1.p { |p| p << "&nbsp;" } # placeholder
@@ -90,12 +102,15 @@ module IsoDoc::HtmlFunction
90
102
  def sourcecode_parse(node, out)
91
103
  name = node.at(ns("./name"))
92
104
  class1 = "prettyprint #{sourcecodelang(node&.at(ns('./@lang'))&.value)}"
93
- out.pre **attr_code(id: node["id"], class: class1) do |div|
105
+ out.pre **sourcecode_attrs(node).merge(class: class1) do |div|
94
106
  @sourcecode = true
95
107
  node.children.each { |n| parse(n, div) unless n.name == "name" }
96
108
  @sourcecode = false
97
109
  end
98
110
  sourcecode_name_parse(node, out, name)
99
111
  end
112
+
113
+ def table_long_strings_cleanup(docxml)
114
+ end
100
115
  end
101
116
  end
@@ -18,7 +18,7 @@ module IsoDoc::HtmlFunction
18
18
  #result = populate_template(result, :html)
19
19
  result = from_xhtml(move_images(to_xhtml(result)))
20
20
  result = html5(script_cdata(inject_script(result)))
21
- File.open("#{filename}.html", "w:UTF-8") { |f| f.write(result) }
21
+ File.open(filename, "w:UTF-8") { |f| f.write(result) }
22
22
  end
23
23
 
24
24
  def html5(doc)
@@ -159,7 +159,8 @@ module IsoDoc::HtmlFunction
159
159
  def inject_script(doc)
160
160
  return doc unless @scripts
161
161
  scripts = File.read(@scripts, encoding: "UTF-8")
162
- doc.sub("</body>", scripts + "\n</body>")
162
+ a = doc.split(%r{</body>})
163
+ a[0] + scripts + "</body>" + a[1]
163
164
  end
164
165
 
165
166
  def update_footnote_filter(fn, x, i, seen)
@@ -178,7 +179,7 @@ module IsoDoc::HtmlFunction
178
179
  def html_footnote_filter(docxml)
179
180
  seen = {}
180
181
  i = 1
181
- docxml.xpath('//a[@epub:type = "footnote"]').each do |x|
182
+ docxml.xpath('//a[@class = "FootnoteRef"]').each do |x|
182
183
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
183
184
  i, seen = update_footnote_filter(fn, x, i, seen)
184
185
  end
@@ -187,7 +188,7 @@ module IsoDoc::HtmlFunction
187
188
 
188
189
  def footnote_backlinks(docxml)
189
190
  seen = {}
190
- docxml.xpath('//a[@epub:type = "footnote"]').each_with_index do |x, i|
191
+ docxml.xpath('//a[@class = "FootnoteRef"]').each_with_index do |x, i|
191
192
  seen[x["href"]] and next or seen[x["href"]] = true
192
193
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
193
194
  xdup = x.dup
@@ -200,7 +201,7 @@ module IsoDoc::HtmlFunction
200
201
  end
201
202
 
202
203
  def footnote_format(docxml)
203
- docxml.xpath("//a[@epub:type = 'footnote']/sup").each do |x|
204
+ docxml.xpath("//a[@class = 'FootnoteRef']/sup").each do |x|
204
205
  footnote_reference_format(x)
205
206
  end
206
207
  docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
@@ -218,5 +218,11 @@ module IsoDoc
218
218
  a = xml.at(ns("//bibdata/uri[@type = 'pdf']")) and set(:pdf, a.text)
219
219
  a = xml.at(ns("//bibdata/uri[@type = 'doc']")) and set(:doc, a.text)
220
220
  end
221
+
222
+ def keywords(isoxml, _out)
223
+ ret = []
224
+ isoxml.xpath(ns("//bibdata/keyword")).each { |kw| ret << kw.text }
225
+ set(:keywords, ret)
226
+ end
221
227
  end
222
228
  end
@@ -24,19 +24,21 @@ module IsoDoc
24
24
 
25
25
  def initialize(options)
26
26
  @format = :pdf
27
+ @suffix = "pdf"
27
28
  super
28
29
  end
29
30
 
30
- def convert(filename, file = nil, debug = false)
31
- file = File.read(filename, encoding: "utf-8") if file.nil?
31
+ def convert(input_filename, file = nil, debug = false, output_filename = nil)
32
+ file = File.read(input_filename, encoding: "utf-8") if file.nil?
32
33
  @openmathdelim, @closemathdelim = extract_delims(file)
33
- docxml, outname_html, dir = convert_init(file, filename, debug)
34
+ docxml, filename, dir = convert_init(file, input_filename, debug)
34
35
  result = convert1(docxml, filename, dir)
35
36
  return result if debug
36
- postprocess(result, filename, dir)
37
+ postprocess(result, filename + ".tmp.html", dir)
37
38
  FileUtils.rm_rf dir
38
- ::Metanorma::Output::Pdf.new.convert("#{filename}.html", outname_html + ".pdf")
39
- FileUtils.rm_rf ["#{filename}.html", tmpimagedir]
39
+ ::Metanorma::Output::Pdf.new.convert("#{filename}.tmp.html",
40
+ output_filename || "#{filename}.#{@suffix}")
41
+ FileUtils.rm_rf ["#{filename}.tmp.html", tmpimagedir]
40
42
  end
41
43
 
42
44
  def xref_parse(node, out)
@@ -0,0 +1,29 @@
1
+ module IsoDoc
2
+ class PresentationXMLConvert < ::IsoDoc::Convert
3
+ def initialize(options)
4
+ @format = :presentation
5
+ @suffix = "presentation.xml"
6
+ super
7
+ end
8
+
9
+ def convert1(docxml, filename, dir)
10
+ @xrefs.parse docxml
11
+ info docxml, nil
12
+ docxml.to_xml
13
+ end
14
+
15
+ def postprocess(result, filename, dir)
16
+ #result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
17
+ toXML(result, filename)
18
+ @files_to_delete.each { |f| FileUtils.rm_rf f }
19
+ end
20
+
21
+ def toXML(result, filename)
22
+ #result = (from_xhtml(html_cleanup(to_xhtml(result))))
23
+ #result = from_xhtml(move_images(to_xhtml(result)))
24
+ #result = html5(script_cdata(inject_script(result)))
25
+ File.open(filename, "w:UTF-8") { |f| f.write(result) }
26
+ end
27
+ end
28
+ end
29
+
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "1.0.26".freeze
2
+ VERSION = "1.1.1".freeze
3
3
  end
@@ -32,10 +32,11 @@ module IsoDoc
32
32
 
33
33
  def initialize(options)
34
34
  @format = :doc
35
+ @suffix = "doc"
35
36
  super
36
37
  end
37
38
 
38
- def convert(filename, file = nil, debug = false)
39
+ def convert(filename, file = nil, debug = false, output_filename = nil)
39
40
  ret = super
40
41
  FileUtils.rm_rf tmpimagedir
41
42
  ret
@@ -1,4 +1,5 @@
1
1
  require_relative "./table.rb"
2
+ require_relative "./inline.rb"
2
3
 
3
4
  module IsoDoc::WordFunction
4
5
  module Body
@@ -69,28 +70,6 @@ module IsoDoc::WordFunction
69
70
  node.xpath(ns("./note")).each { |n| parse(n, out) }
70
71
  end
71
72
 
72
- def section_break(body)
73
- body.p do |p|
74
- p.br **{ clear: "all", class: "section" }
75
- end
76
- end
77
-
78
- def page_break(out)
79
- out.p do |p|
80
- p.br **{ clear: "all",
81
- style: "mso-special-character:line-break;"\
82
- "page-break-before:always" }
83
- end
84
- end
85
-
86
- def pagebreak_parse(node, out)
87
- return page_break(out) if node["orientation"].nil?
88
- out.p do |p|
89
- p.br **{clear: "all", class: "section",
90
- orientation: node["orientation"] }
91
- end
92
- end
93
-
94
73
  WORD_DT_ATTRS = {class: @note ? "Note" : nil, align: "left",
95
74
  style: "margin-left:0pt;text-align:left;"}.freeze
96
75
 
@@ -174,10 +153,10 @@ module IsoDoc::WordFunction
174
153
  end
175
154
 
176
155
  def termnote_parse(node, out)
177
- out.div **{ class: "Note" } do |div|
156
+ out.div **note_attrs(node) do |div|
178
157
  first = node.first_element_child
179
158
  div.p **{ class: "Note" } do |p|
180
- anchor = get_anchors[node['id']]
159
+ anchor = @xrefs.get[node['id']]
181
160
  p << "#{anchor&.dig(:label) || '???'}: "
182
161
  para_then_remainder(first, node, p, div)
183
162
  end
@@ -185,41 +164,23 @@ module IsoDoc::WordFunction
185
164
  end
186
165
 
187
166
  def para_attrs(node)
188
- attrs = { class: para_class(node), id: node["id"] }
167
+ attrs = { class: para_class(node), id: node["id"], style: "" }
189
168
  unless node["align"].nil?
190
169
  attrs[:align] = node["align"] unless node["align"] == "justify"
191
- attrs[:style] = "text-align:#{node['align']}"
170
+ attrs[:style] += "text-align:#{node['align']};"
192
171
  end
172
+ attrs[:style] += "#{keep_style(node)}"
173
+ attrs[:style] = nil if attrs[:style].empty?
193
174
  attrs
194
175
  end
195
176
 
196
- def imgsrc(uri)
197
- return uri unless %r{^data:image/}.match uri
198
- save_dataimage(uri)
199
- end
200
-
201
- def image_parse(node, out, caption)
202
- attrs = { src: imgsrc(node["src"]),
203
- height: node["height"],
204
- alt: node["alt"],
205
- title: node["title"],
206
- width: node["width"] }
207
- out.img **attr_code(attrs)
208
- image_title_parse(out, caption)
209
- end
210
-
211
- def xref_parse(node, out)
212
- target = /#/.match(node["target"]) ? node["target"].sub(/#/, ".doc#") :
213
- "##{node["target"]}"
214
- out.a(**{ "href": target }) { |l| l << get_linkend(node) }
215
- end
216
-
217
177
  def example_table_attr(node)
218
178
  super.merge({
219
179
  style: "mso-table-lspace:15.0cm;margin-left:423.0pt;"\
220
180
  "mso-table-rspace:15.0cm;margin-right:423.0pt;"\
221
181
  "mso-table-anchor-horizontal:column;"\
222
- "mso-table-overlap:never;border-collapse:collapse;"
182
+ "mso-table-overlap:never;border-collapse:collapse;"\
183
+ "#{keep_style(node)}"
223
184
  })
224
185
  end
225
186
 
@@ -240,5 +201,10 @@ module IsoDoc::WordFunction
240
201
  node.children.each { |n| parse(n, li) }
241
202
  end
242
203
  end
204
+
205
+ def suffix_url(url)
206
+ return url if %r{^http[s]?://}.match(url)
207
+ url.sub(/#{File.extname(url)}$/, ".doc")
208
+ end
243
209
  end
244
210
  end
@@ -84,7 +84,7 @@ module IsoDoc::WordFunction
84
84
  return seen_footnote_parse(node, out, fn) if @seen_footnote.include?(fn)
85
85
  @fn_bookmarks[fn] = bookmarkid
86
86
  out.span **{style: "mso-bookmark:_Ref#{@fn_bookmarks[fn]}"} do |s|
87
- s.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
87
+ s.a **{ "class": "FootnoteRef", "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
88
88
  a.sup { |sup| sup << fn }
89
89
  end
90
90
  end
@@ -0,0 +1,75 @@
1
+ module IsoDoc::WordFunction
2
+ module Body
3
+ def section_break(body)
4
+ body.p do |p|
5
+ p.br **{ clear: "all", class: "section" }
6
+ end
7
+ end
8
+
9
+ def page_break(out)
10
+ out.p do |p|
11
+ p.br **{ clear: "all",
12
+ style: "mso-special-character:line-break;"\
13
+ "page-break-before:always" }
14
+ end
15
+ end
16
+
17
+ def pagebreak_parse(node, out)
18
+ return page_break(out) if node["orientation"].nil?
19
+ out.p do |p|
20
+ p.br **{clear: "all", class: "section",
21
+ orientation: node["orientation"] }
22
+ end
23
+ end
24
+
25
+ def imgsrc(node)
26
+ ret = svg_to_emf(node) and return ret
27
+ return node["src"] unless %r{^data:image/}.match node["src"]
28
+ save_dataimage(node["src"])
29
+ end
30
+
31
+ def image_parse(node, out, caption)
32
+ attrs = { src: imgsrc(node),
33
+ height: node["height"],
34
+ alt: node["alt"],
35
+ title: node["title"],
36
+ width: node["width"] }
37
+ out.img **attr_code(attrs)
38
+ image_title_parse(out, caption)
39
+ end
40
+
41
+ def svg_to_emf_filename(uri)
42
+ File.join(File.dirname(uri), File.basename(uri, ".*")) + ".emf"
43
+ end
44
+
45
+ def svg_to_emf(node)
46
+ return unless node["mimetype"] == "image/svg+xml"
47
+ uri = node["src"]
48
+ %r{^data:image/}.match(uri) and uri = save_dataimage(uri)
49
+ ret = svg_to_emf_filename(uri)
50
+ File.exists?(ret) and return ret
51
+ exe = inkscape_installed? or return nil
52
+ system %(#{exe} --export-type="emf" #{uri}) and
53
+ return ret
54
+ nil
55
+ end
56
+
57
+ def inkscape_installed?
58
+ cmd = "inkscape"
59
+ exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
60
+ ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
61
+ exts.each do |ext|
62
+ exe = File.join(path, "#{cmd}#{ext}")
63
+ return exe if File.executable?(exe) && !File.directory?(exe)
64
+ end
65
+ end
66
+ nil
67
+ end
68
+
69
+ def xref_parse(node, out)
70
+ target = /#/.match(node["target"]) ? node["target"].sub(/#/, ".doc#") :
71
+ "##{node["target"]}"
72
+ out.a(**{ "href": target }) { |l| l << get_linkend(node) }
73
+ end
74
+ end
75
+ end