isodoc 1.0.26 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (64) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/macos.yml +12 -8
  3. data/.github/workflows/ubuntu.yml +26 -16
  4. data/.github/workflows/windows.yml +12 -8
  5. data/isodoc.gemspec +2 -2
  6. data/lib/isodoc.rb +2 -0
  7. data/lib/isodoc/common.rb +0 -4
  8. data/lib/isodoc/convert.rb +18 -8
  9. data/lib/isodoc/function/blocks.rb +43 -54
  10. data/lib/isodoc/function/blocks_example_note.rb +108 -0
  11. data/lib/isodoc/function/cleanup.rb +14 -2
  12. data/lib/isodoc/function/i18n.rb +1 -0
  13. data/lib/isodoc/function/inline.rb +76 -82
  14. data/lib/isodoc/function/inline_simple.rb +72 -0
  15. data/lib/isodoc/function/lists.rb +12 -6
  16. data/lib/isodoc/function/references.rb +65 -57
  17. data/lib/isodoc/function/reqt.rb +14 -5
  18. data/lib/isodoc/function/section.rb +8 -11
  19. data/lib/isodoc/function/table.rb +4 -5
  20. data/lib/isodoc/function/terms.rb +3 -3
  21. data/lib/isodoc/function/to_word_html.rb +22 -13
  22. data/lib/isodoc/function/utils.rb +9 -3
  23. data/lib/isodoc/headlesshtml_convert.rb +7 -6
  24. data/lib/isodoc/html_convert.rb +2 -1
  25. data/lib/isodoc/html_function/footnotes.rb +1 -1
  26. data/lib/isodoc/html_function/html.rb +16 -1
  27. data/lib/isodoc/html_function/postprocess.rb +6 -5
  28. data/lib/isodoc/metadata.rb +6 -0
  29. data/lib/isodoc/pdf_convert.rb +8 -6
  30. data/lib/isodoc/presentation_xml_convert.rb +29 -0
  31. data/lib/isodoc/version.rb +1 -1
  32. data/lib/isodoc/word_convert.rb +2 -1
  33. data/lib/isodoc/word_function/body.rb +14 -48
  34. data/lib/isodoc/word_function/footnotes.rb +1 -1
  35. data/lib/isodoc/word_function/inline.rb +75 -0
  36. data/lib/isodoc/word_function/postprocess.rb +1 -0
  37. data/lib/isodoc/word_function/table.rb +3 -3
  38. data/lib/isodoc/xref.rb +59 -0
  39. data/lib/isodoc/{function → xref}/xref_anchor.rb +10 -21
  40. data/lib/isodoc/xref/xref_counter.rb +74 -0
  41. data/lib/isodoc/{function → xref}/xref_gen.rb +9 -22
  42. data/lib/isodoc/{function → xref}/xref_gen_seq.rb +41 -32
  43. data/lib/isodoc/{function → xref}/xref_sect_gen.rb +33 -23
  44. data/lib/isodoc/xslfo_convert.rb +16 -4
  45. data/spec/assets/i18n.yaml +4 -1
  46. data/spec/assets/odf.emf +0 -0
  47. data/spec/assets/odf.svg +4 -0
  48. data/spec/assets/odf1.svg +4 -0
  49. data/spec/isodoc/blocks_spec.rb +240 -59
  50. data/spec/isodoc/cleanup_spec.rb +139 -17
  51. data/spec/isodoc/footnotes_spec.rb +20 -5
  52. data/spec/isodoc/inline_spec.rb +296 -1
  53. data/spec/isodoc/lists_spec.rb +8 -8
  54. data/spec/isodoc/metadata_spec.rb +110 -3
  55. data/spec/isodoc/postproc_spec.rb +10 -14
  56. data/spec/isodoc/presentation_xml_spec.rb +20 -0
  57. data/spec/isodoc/ref_spec.rb +119 -50
  58. data/spec/isodoc/section_spec.rb +84 -18
  59. data/spec/isodoc/table_spec.rb +28 -28
  60. data/spec/isodoc/terms_spec.rb +7 -7
  61. data/spec/isodoc/xref_spec.rb +177 -57
  62. metadata +24 -17
  63. data/lib/isodoc/function/blocks_example.rb +0 -53
  64. data/lib/isodoc/function/xref_counter.rb +0 -50
@@ -49,7 +49,12 @@ module IsoDoc::Function
49
49
  /^(\&[^ \t\r\n#;]+;)/.match(t) ?
50
50
  HTMLEntities.new.encode(HTMLEntities.new.decode(t), :hexadecimal) : t
51
51
  end.join("")
52
- Nokogiri::XML.parse(xml)
52
+ begin
53
+ Nokogiri::XML.parse(xml) { |config| config.strict }
54
+ rescue Nokogiri::XML::SyntaxError => e
55
+ File.open("#{@filename}.#{@format}.err", "w:UTF-8") { |f| f.write xml }
56
+ abort "Malformed Output XML for #{@format}: #{e} (see #{@filename}.#{@format}.err)"
57
+ end
53
58
  end
54
59
 
55
60
  def to_xhtml_fragment(xml)
@@ -116,7 +121,7 @@ module IsoDoc::Function
116
121
  h1.traverse do |x|
117
122
  x.replace(" ") if x.name == "span" && /mso-tab-count/.match(x["style"])
118
123
  x.remove if x.name == "span" && x["class"] == "MsoCommentReference"
119
- x.remove if x.name == "a" && x["epub:type"] == "footnote"
124
+ x.remove if x.name == "a" && x["class"] == "FootnoteRef"
120
125
  x.remove if x.name == "span" && /mso-bookmark/.match(x["style"])
121
126
  x.replace(x.children) if x.name == "a"
122
127
  end
@@ -142,7 +147,7 @@ module IsoDoc::Function
142
147
  end
143
148
 
144
149
  def populate_template(docxml, _format = nil)
145
- meta = @meta.get.merge(@labels || {})
150
+ meta = @meta.get.merge(@labels || {}).merge(@meta.labels || {})
146
151
  template = liquid(docxml)
147
152
  template.render(meta.map { |k, v| [k.to_s, empty2nil(v)] }.to_h).
148
153
  gsub('<', '<').gsub('>', '>').gsub('&', '&')
@@ -150,6 +155,7 @@ module IsoDoc::Function
150
155
 
151
156
  def save_dataimage(uri, relative_dir = true)
152
157
  %r{^data:image/(?<imgtype>[^;]+);base64,(?<imgdata>.+)$} =~ uri
158
+ imgtype.sub!(/\+[a-z0-9]+$/, "") # svg+xml
153
159
  imgtype = "png" unless /^[a-z0-9]+$/.match imgtype
154
160
  Tempfile.open(["image", ".#{imgtype}"]) do |f|
155
161
  f.binmode
@@ -16,18 +16,19 @@ module IsoDoc
16
16
 
17
17
  def initialize(options)
18
18
  @format = :html
19
+ @suffix = "headless.html"
19
20
  super
20
21
  end
21
22
 
22
- def convert(filename, file = nil, debug = false)
23
- file = File.read(filename, encoding: "utf-8") if file.nil?
23
+ def convert(input_filename, file = nil, debug = false, output_filename = nil)
24
+ file = File.read(input_filename, encoding: "utf-8") if file.nil?
24
25
  @openmathdelim, @closemathdelim = extract_delims(file)
25
- docxml, outname_html, dir = convert_init(file, filename, debug)
26
- result = convert1(docxml, outname_html, dir)
26
+ docxml, filename, dir = convert_init(file, input_filename, debug)
27
+ result = convert1(docxml, filename, dir)
27
28
  return result if debug
28
- postprocess(result, filename + ".tmp", dir)
29
+ postprocess(result, filename + ".tmp.html", dir)
29
30
  FileUtils.rm_rf dir
30
- strip_head(filename + ".tmp.html", outname_html + ".headless.html")
31
+ strip_head(filename + ".tmp.html", output_filename || "#{filename}.#{@suffix}")
31
32
  FileUtils.rm_rf ["#{filename}.tmp.html", tmpimagedir]
32
33
  end
33
34
 
@@ -16,10 +16,11 @@ module IsoDoc
16
16
 
17
17
  def initialize(options)
18
18
  @format = :html
19
+ @suffix = "html"
19
20
  super
20
21
  end
21
22
 
22
- def convert(filename, file = nil, debug = false)
23
+ def convert(filename, file = nil, debug = false, output_filename = nil)
23
24
  ret = super
24
25
  Dir.exists?(tmpimagedir) and Dir["#{tmpimagedir}/*"].empty? and
25
26
  FileUtils.rm_r tmpimagedir
@@ -65,7 +65,7 @@ module IsoDoc::HtmlFunction
65
65
  return table_footnote_parse(node, out) if (@in_table || @in_figure) &&
66
66
  !node.ancestors.map {|m| m.name }.include?("name")
67
67
  fn = node["reference"] || UUIDTools::UUID.random_create.to_s
68
- attrs = { "epub:type": "footnote", rel: "footnote", href: "#fn:#{fn}" }
68
+ attrs = { class: "FootnoteRef", href: "#fn:#{fn}" }
69
69
  out.a **attrs do |a|
70
70
  a.sup { |sup| sup << fn }
71
71
  end
@@ -3,6 +3,18 @@ require "base64"
3
3
 
4
4
  module IsoDoc::HtmlFunction
5
5
  module Html
6
+ def convert1(docxml, filename, dir)
7
+ @xrefs.parse docxml
8
+ noko do |xml|
9
+ xml.html **{ lang: "#{@lang}" } do |html|
10
+ info docxml, nil
11
+ populate_css()
12
+ html.head { |head| define_head head, filename, dir }
13
+ make_body(html, docxml)
14
+ end
15
+ end.join("\n")
16
+ end
17
+
6
18
  def make_body1(body, _docxml)
7
19
  body.div **{ class: "title-section" } do |div1|
8
20
  div1.p { |p| p << "&nbsp;" } # placeholder
@@ -90,12 +102,15 @@ module IsoDoc::HtmlFunction
90
102
  def sourcecode_parse(node, out)
91
103
  name = node.at(ns("./name"))
92
104
  class1 = "prettyprint #{sourcecodelang(node&.at(ns('./@lang'))&.value)}"
93
- out.pre **attr_code(id: node["id"], class: class1) do |div|
105
+ out.pre **sourcecode_attrs(node).merge(class: class1) do |div|
94
106
  @sourcecode = true
95
107
  node.children.each { |n| parse(n, div) unless n.name == "name" }
96
108
  @sourcecode = false
97
109
  end
98
110
  sourcecode_name_parse(node, out, name)
99
111
  end
112
+
113
+ def table_long_strings_cleanup(docxml)
114
+ end
100
115
  end
101
116
  end
@@ -18,7 +18,7 @@ module IsoDoc::HtmlFunction
18
18
  #result = populate_template(result, :html)
19
19
  result = from_xhtml(move_images(to_xhtml(result)))
20
20
  result = html5(script_cdata(inject_script(result)))
21
- File.open("#{filename}.html", "w:UTF-8") { |f| f.write(result) }
21
+ File.open(filename, "w:UTF-8") { |f| f.write(result) }
22
22
  end
23
23
 
24
24
  def html5(doc)
@@ -159,7 +159,8 @@ module IsoDoc::HtmlFunction
159
159
  def inject_script(doc)
160
160
  return doc unless @scripts
161
161
  scripts = File.read(@scripts, encoding: "UTF-8")
162
- doc.sub("</body>", scripts + "\n</body>")
162
+ a = doc.split(%r{</body>})
163
+ a[0] + scripts + "</body>" + a[1]
163
164
  end
164
165
 
165
166
  def update_footnote_filter(fn, x, i, seen)
@@ -178,7 +179,7 @@ module IsoDoc::HtmlFunction
178
179
  def html_footnote_filter(docxml)
179
180
  seen = {}
180
181
  i = 1
181
- docxml.xpath('//a[@epub:type = "footnote"]').each do |x|
182
+ docxml.xpath('//a[@class = "FootnoteRef"]').each do |x|
182
183
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
183
184
  i, seen = update_footnote_filter(fn, x, i, seen)
184
185
  end
@@ -187,7 +188,7 @@ module IsoDoc::HtmlFunction
187
188
 
188
189
  def footnote_backlinks(docxml)
189
190
  seen = {}
190
- docxml.xpath('//a[@epub:type = "footnote"]').each_with_index do |x, i|
191
+ docxml.xpath('//a[@class = "FootnoteRef"]').each_with_index do |x, i|
191
192
  seen[x["href"]] and next or seen[x["href"]] = true
192
193
  fn = docxml.at(%<//*[@id = '#{x['href'].sub(/^#/, '')}']>) || next
193
194
  xdup = x.dup
@@ -200,7 +201,7 @@ module IsoDoc::HtmlFunction
200
201
  end
201
202
 
202
203
  def footnote_format(docxml)
203
- docxml.xpath("//a[@epub:type = 'footnote']/sup").each do |x|
204
+ docxml.xpath("//a[@class = 'FootnoteRef']/sup").each do |x|
204
205
  footnote_reference_format(x)
205
206
  end
206
207
  docxml.xpath("//a[@class = 'TableFootnoteRef'] | "\
@@ -218,5 +218,11 @@ module IsoDoc
218
218
  a = xml.at(ns("//bibdata/uri[@type = 'pdf']")) and set(:pdf, a.text)
219
219
  a = xml.at(ns("//bibdata/uri[@type = 'doc']")) and set(:doc, a.text)
220
220
  end
221
+
222
+ def keywords(isoxml, _out)
223
+ ret = []
224
+ isoxml.xpath(ns("//bibdata/keyword")).each { |kw| ret << kw.text }
225
+ set(:keywords, ret)
226
+ end
221
227
  end
222
228
  end
@@ -24,19 +24,21 @@ module IsoDoc
24
24
 
25
25
  def initialize(options)
26
26
  @format = :pdf
27
+ @suffix = "pdf"
27
28
  super
28
29
  end
29
30
 
30
- def convert(filename, file = nil, debug = false)
31
- file = File.read(filename, encoding: "utf-8") if file.nil?
31
+ def convert(input_filename, file = nil, debug = false, output_filename = nil)
32
+ file = File.read(input_filename, encoding: "utf-8") if file.nil?
32
33
  @openmathdelim, @closemathdelim = extract_delims(file)
33
- docxml, outname_html, dir = convert_init(file, filename, debug)
34
+ docxml, filename, dir = convert_init(file, input_filename, debug)
34
35
  result = convert1(docxml, filename, dir)
35
36
  return result if debug
36
- postprocess(result, filename, dir)
37
+ postprocess(result, filename + ".tmp.html", dir)
37
38
  FileUtils.rm_rf dir
38
- ::Metanorma::Output::Pdf.new.convert("#{filename}.html", outname_html + ".pdf")
39
- FileUtils.rm_rf ["#{filename}.html", tmpimagedir]
39
+ ::Metanorma::Output::Pdf.new.convert("#{filename}.tmp.html",
40
+ output_filename || "#{filename}.#{@suffix}")
41
+ FileUtils.rm_rf ["#{filename}.tmp.html", tmpimagedir]
40
42
  end
41
43
 
42
44
  def xref_parse(node, out)
@@ -0,0 +1,29 @@
1
+ module IsoDoc
2
+ class PresentationXMLConvert < ::IsoDoc::Convert
3
+ def initialize(options)
4
+ @format = :presentation
5
+ @suffix = "presentation.xml"
6
+ super
7
+ end
8
+
9
+ def convert1(docxml, filename, dir)
10
+ @xrefs.parse docxml
11
+ info docxml, nil
12
+ docxml.to_xml
13
+ end
14
+
15
+ def postprocess(result, filename, dir)
16
+ #result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
17
+ toXML(result, filename)
18
+ @files_to_delete.each { |f| FileUtils.rm_rf f }
19
+ end
20
+
21
+ def toXML(result, filename)
22
+ #result = (from_xhtml(html_cleanup(to_xhtml(result))))
23
+ #result = from_xhtml(move_images(to_xhtml(result)))
24
+ #result = html5(script_cdata(inject_script(result)))
25
+ File.open(filename, "w:UTF-8") { |f| f.write(result) }
26
+ end
27
+ end
28
+ end
29
+
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "1.0.26".freeze
2
+ VERSION = "1.1.1".freeze
3
3
  end
@@ -32,10 +32,11 @@ module IsoDoc
32
32
 
33
33
  def initialize(options)
34
34
  @format = :doc
35
+ @suffix = "doc"
35
36
  super
36
37
  end
37
38
 
38
- def convert(filename, file = nil, debug = false)
39
+ def convert(filename, file = nil, debug = false, output_filename = nil)
39
40
  ret = super
40
41
  FileUtils.rm_rf tmpimagedir
41
42
  ret
@@ -1,4 +1,5 @@
1
1
  require_relative "./table.rb"
2
+ require_relative "./inline.rb"
2
3
 
3
4
  module IsoDoc::WordFunction
4
5
  module Body
@@ -69,28 +70,6 @@ module IsoDoc::WordFunction
69
70
  node.xpath(ns("./note")).each { |n| parse(n, out) }
70
71
  end
71
72
 
72
- def section_break(body)
73
- body.p do |p|
74
- p.br **{ clear: "all", class: "section" }
75
- end
76
- end
77
-
78
- def page_break(out)
79
- out.p do |p|
80
- p.br **{ clear: "all",
81
- style: "mso-special-character:line-break;"\
82
- "page-break-before:always" }
83
- end
84
- end
85
-
86
- def pagebreak_parse(node, out)
87
- return page_break(out) if node["orientation"].nil?
88
- out.p do |p|
89
- p.br **{clear: "all", class: "section",
90
- orientation: node["orientation"] }
91
- end
92
- end
93
-
94
73
  WORD_DT_ATTRS = {class: @note ? "Note" : nil, align: "left",
95
74
  style: "margin-left:0pt;text-align:left;"}.freeze
96
75
 
@@ -174,10 +153,10 @@ module IsoDoc::WordFunction
174
153
  end
175
154
 
176
155
  def termnote_parse(node, out)
177
- out.div **{ class: "Note" } do |div|
156
+ out.div **note_attrs(node) do |div|
178
157
  first = node.first_element_child
179
158
  div.p **{ class: "Note" } do |p|
180
- anchor = get_anchors[node['id']]
159
+ anchor = @xrefs.get[node['id']]
181
160
  p << "#{anchor&.dig(:label) || '???'}: "
182
161
  para_then_remainder(first, node, p, div)
183
162
  end
@@ -185,41 +164,23 @@ module IsoDoc::WordFunction
185
164
  end
186
165
 
187
166
  def para_attrs(node)
188
- attrs = { class: para_class(node), id: node["id"] }
167
+ attrs = { class: para_class(node), id: node["id"], style: "" }
189
168
  unless node["align"].nil?
190
169
  attrs[:align] = node["align"] unless node["align"] == "justify"
191
- attrs[:style] = "text-align:#{node['align']}"
170
+ attrs[:style] += "text-align:#{node['align']};"
192
171
  end
172
+ attrs[:style] += "#{keep_style(node)}"
173
+ attrs[:style] = nil if attrs[:style].empty?
193
174
  attrs
194
175
  end
195
176
 
196
- def imgsrc(uri)
197
- return uri unless %r{^data:image/}.match uri
198
- save_dataimage(uri)
199
- end
200
-
201
- def image_parse(node, out, caption)
202
- attrs = { src: imgsrc(node["src"]),
203
- height: node["height"],
204
- alt: node["alt"],
205
- title: node["title"],
206
- width: node["width"] }
207
- out.img **attr_code(attrs)
208
- image_title_parse(out, caption)
209
- end
210
-
211
- def xref_parse(node, out)
212
- target = /#/.match(node["target"]) ? node["target"].sub(/#/, ".doc#") :
213
- "##{node["target"]}"
214
- out.a(**{ "href": target }) { |l| l << get_linkend(node) }
215
- end
216
-
217
177
  def example_table_attr(node)
218
178
  super.merge({
219
179
  style: "mso-table-lspace:15.0cm;margin-left:423.0pt;"\
220
180
  "mso-table-rspace:15.0cm;margin-right:423.0pt;"\
221
181
  "mso-table-anchor-horizontal:column;"\
222
- "mso-table-overlap:never;border-collapse:collapse;"
182
+ "mso-table-overlap:never;border-collapse:collapse;"\
183
+ "#{keep_style(node)}"
223
184
  })
224
185
  end
225
186
 
@@ -240,5 +201,10 @@ module IsoDoc::WordFunction
240
201
  node.children.each { |n| parse(n, li) }
241
202
  end
242
203
  end
204
+
205
+ def suffix_url(url)
206
+ return url if %r{^http[s]?://}.match(url)
207
+ url.sub(/#{File.extname(url)}$/, ".doc")
208
+ end
243
209
  end
244
210
  end
@@ -84,7 +84,7 @@ module IsoDoc::WordFunction
84
84
  return seen_footnote_parse(node, out, fn) if @seen_footnote.include?(fn)
85
85
  @fn_bookmarks[fn] = bookmarkid
86
86
  out.span **{style: "mso-bookmark:_Ref#{@fn_bookmarks[fn]}"} do |s|
87
- s.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
87
+ s.a **{ "class": "FootnoteRef", "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
88
88
  a.sup { |sup| sup << fn }
89
89
  end
90
90
  end
@@ -0,0 +1,75 @@
1
+ module IsoDoc::WordFunction
2
+ module Body
3
+ def section_break(body)
4
+ body.p do |p|
5
+ p.br **{ clear: "all", class: "section" }
6
+ end
7
+ end
8
+
9
+ def page_break(out)
10
+ out.p do |p|
11
+ p.br **{ clear: "all",
12
+ style: "mso-special-character:line-break;"\
13
+ "page-break-before:always" }
14
+ end
15
+ end
16
+
17
+ def pagebreak_parse(node, out)
18
+ return page_break(out) if node["orientation"].nil?
19
+ out.p do |p|
20
+ p.br **{clear: "all", class: "section",
21
+ orientation: node["orientation"] }
22
+ end
23
+ end
24
+
25
+ def imgsrc(node)
26
+ ret = svg_to_emf(node) and return ret
27
+ return node["src"] unless %r{^data:image/}.match node["src"]
28
+ save_dataimage(node["src"])
29
+ end
30
+
31
+ def image_parse(node, out, caption)
32
+ attrs = { src: imgsrc(node),
33
+ height: node["height"],
34
+ alt: node["alt"],
35
+ title: node["title"],
36
+ width: node["width"] }
37
+ out.img **attr_code(attrs)
38
+ image_title_parse(out, caption)
39
+ end
40
+
41
+ def svg_to_emf_filename(uri)
42
+ File.join(File.dirname(uri), File.basename(uri, ".*")) + ".emf"
43
+ end
44
+
45
+ def svg_to_emf(node)
46
+ return unless node["mimetype"] == "image/svg+xml"
47
+ uri = node["src"]
48
+ %r{^data:image/}.match(uri) and uri = save_dataimage(uri)
49
+ ret = svg_to_emf_filename(uri)
50
+ File.exists?(ret) and return ret
51
+ exe = inkscape_installed? or return nil
52
+ system %(#{exe} --export-type="emf" #{uri}) and
53
+ return ret
54
+ nil
55
+ end
56
+
57
+ def inkscape_installed?
58
+ cmd = "inkscape"
59
+ exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
60
+ ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
61
+ exts.each do |ext|
62
+ exe = File.join(path, "#{cmd}#{ext}")
63
+ return exe if File.executable?(exe) && !File.directory?(exe)
64
+ end
65
+ end
66
+ nil
67
+ end
68
+
69
+ def xref_parse(node, out)
70
+ target = /#/.match(node["target"]) ? node["target"].sub(/#/, ".doc#") :
71
+ "##{node["target"]}"
72
+ out.a(**{ "href": target }) { |l| l << get_linkend(node) }
73
+ end
74
+ end
75
+ end