isodoc 1.0.25 → 1.1.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (66) hide show
  1. checksums.yaml +4 -4
  2. data/.github/workflows/macos.yml +12 -2
  3. data/.github/workflows/ubuntu.yml +15 -3
  4. data/.github/workflows/windows.yml +12 -2
  5. data/isodoc.gemspec +2 -2
  6. data/lib/isodoc.rb +2 -0
  7. data/lib/isodoc/common.rb +0 -4
  8. data/lib/isodoc/convert.rb +18 -8
  9. data/lib/isodoc/function/blocks.rb +46 -52
  10. data/lib/isodoc/function/blocks_example_note.rb +108 -0
  11. data/lib/isodoc/function/cleanup.rb +14 -2
  12. data/lib/isodoc/function/i18n.rb +1 -0
  13. data/lib/isodoc/function/inline.rb +76 -82
  14. data/lib/isodoc/function/inline_simple.rb +72 -0
  15. data/lib/isodoc/function/lists.rb +12 -6
  16. data/lib/isodoc/function/references.rb +49 -53
  17. data/lib/isodoc/function/reqt.rb +14 -5
  18. data/lib/isodoc/function/section.rb +10 -12
  19. data/lib/isodoc/function/table.rb +4 -5
  20. data/lib/isodoc/function/terms.rb +3 -3
  21. data/lib/isodoc/function/to_word_html.rb +22 -12
  22. data/lib/isodoc/function/utils.rb +9 -3
  23. data/lib/isodoc/headlesshtml_convert.rb +12 -6
  24. data/lib/isodoc/html_convert.rb +7 -1
  25. data/lib/isodoc/html_function/footnotes.rb +1 -1
  26. data/lib/isodoc/html_function/html.rb +16 -1
  27. data/lib/isodoc/html_function/postprocess.rb +6 -5
  28. data/lib/isodoc/metadata.rb +6 -0
  29. data/lib/isodoc/metadata_date.rb +19 -7
  30. data/lib/isodoc/pdf_convert.rb +13 -6
  31. data/lib/isodoc/presentation_xml_convert.rb +29 -0
  32. data/lib/isodoc/version.rb +1 -1
  33. data/lib/isodoc/word_convert.rb +7 -1
  34. data/lib/isodoc/word_function/body.rb +14 -48
  35. data/lib/isodoc/word_function/footnotes.rb +1 -1
  36. data/lib/isodoc/word_function/inline.rb +75 -0
  37. data/lib/isodoc/word_function/postprocess.rb +1 -0
  38. data/lib/isodoc/word_function/table.rb +3 -3
  39. data/lib/isodoc/xref.rb +59 -0
  40. data/lib/isodoc/{function → xref}/xref_anchor.rb +10 -21
  41. data/lib/isodoc/xref/xref_counter.rb +74 -0
  42. data/lib/isodoc/{function → xref}/xref_gen.rb +11 -23
  43. data/lib/isodoc/{function → xref}/xref_gen_seq.rb +41 -32
  44. data/lib/isodoc/{function → xref}/xref_sect_gen.rb +54 -40
  45. data/lib/isodoc/xslfo_convert.rb +21 -4
  46. data/spec/assets/i18n.yaml +4 -1
  47. data/spec/assets/odf.emf +0 -0
  48. data/spec/assets/odf.svg +4 -0
  49. data/spec/assets/odf1.svg +4 -0
  50. data/spec/isodoc/blocks_spec.rb +271 -48
  51. data/spec/isodoc/cleanup_spec.rb +139 -17
  52. data/spec/isodoc/footnotes_spec.rb +20 -5
  53. data/spec/isodoc/i18n_spec.rb +8 -8
  54. data/spec/isodoc/inline_spec.rb +299 -4
  55. data/spec/isodoc/lists_spec.rb +8 -8
  56. data/spec/isodoc/metadata_spec.rb +110 -3
  57. data/spec/isodoc/postproc_spec.rb +10 -14
  58. data/spec/isodoc/presentation_xml_spec.rb +20 -0
  59. data/spec/isodoc/ref_spec.rb +121 -52
  60. data/spec/isodoc/section_spec.rb +232 -208
  61. data/spec/isodoc/table_spec.rb +28 -28
  62. data/spec/isodoc/terms_spec.rb +7 -7
  63. data/spec/isodoc/xref_spec.rb +206 -59
  64. metadata +24 -17
  65. data/lib/isodoc/function/blocks_example.rb +0 -53
  66. data/lib/isodoc/function/xref_counter.rb +0 -50
@@ -22,16 +22,23 @@ module IsoDoc
22
22
  "_pdfimages"
23
23
  end
24
24
 
25
- def convert(filename, file = nil, debug = false)
26
- file = File.read(filename, encoding: "utf-8") if file.nil?
25
+ def initialize(options)
26
+ @format = :pdf
27
+ @suffix = "pdf"
28
+ super
29
+ end
30
+
31
+ def convert(input_filename, file = nil, debug = false, output_filename = nil)
32
+ file = File.read(input_filename, encoding: "utf-8") if file.nil?
27
33
  @openmathdelim, @closemathdelim = extract_delims(file)
28
- docxml, outname_html, dir = convert_init(file, filename, debug)
34
+ docxml, filename, dir = convert_init(file, input_filename, debug)
29
35
  result = convert1(docxml, filename, dir)
30
36
  return result if debug
31
- postprocess(result, filename, dir)
37
+ postprocess(result, filename + ".tmp.html", dir)
32
38
  FileUtils.rm_rf dir
33
- ::Metanorma::Output::Pdf.new.convert("#{filename}.html", outname_html + ".pdf")
34
- FileUtils.rm_rf ["#{filename}.html", tmpimagedir]
39
+ ::Metanorma::Output::Pdf.new.convert("#{filename}.tmp.html",
40
+ output_filename || "#{filename}.#{@suffix}")
41
+ FileUtils.rm_rf ["#{filename}.tmp.html", tmpimagedir]
35
42
  end
36
43
 
37
44
  def xref_parse(node, out)
@@ -0,0 +1,29 @@
1
+ module IsoDoc
2
+ class PresentationXMLConvert < ::IsoDoc::Convert
3
+ def initialize(options)
4
+ @format = :presentation
5
+ @suffix = "presentation.xml"
6
+ super
7
+ end
8
+
9
+ def convert1(docxml, filename, dir)
10
+ @xrefs.parse docxml
11
+ info docxml, nil
12
+ docxml.to_xml
13
+ end
14
+
15
+ def postprocess(result, filename, dir)
16
+ #result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
17
+ toXML(result, filename)
18
+ @files_to_delete.each { |f| FileUtils.rm_rf f }
19
+ end
20
+
21
+ def toXML(result, filename)
22
+ #result = (from_xhtml(html_cleanup(to_xhtml(result))))
23
+ #result = from_xhtml(move_images(to_xhtml(result)))
24
+ #result = html5(script_cdata(inject_script(result)))
25
+ File.open(filename, "w:UTF-8") { |f| f.write(result) }
26
+ end
27
+ end
28
+ end
29
+
@@ -1,3 +1,3 @@
1
1
  module IsoDoc
2
- VERSION = "1.0.25".freeze
2
+ VERSION = "1.1.0".freeze
3
3
  end
@@ -30,7 +30,13 @@ module IsoDoc
30
30
  include WordFunction::Body
31
31
  include WordFunction::Postprocess
32
32
 
33
- def convert(filename, file = nil, debug = false)
33
+ def initialize(options)
34
+ @format = :doc
35
+ @suffix = "doc"
36
+ super
37
+ end
38
+
39
+ def convert(filename, file = nil, debug = false, output_filename = nil)
34
40
  ret = super
35
41
  FileUtils.rm_rf tmpimagedir
36
42
  ret
@@ -1,4 +1,5 @@
1
1
  require_relative "./table.rb"
2
+ require_relative "./inline.rb"
2
3
 
3
4
  module IsoDoc::WordFunction
4
5
  module Body
@@ -69,28 +70,6 @@ module IsoDoc::WordFunction
69
70
  node.xpath(ns("./note")).each { |n| parse(n, out) }
70
71
  end
71
72
 
72
- def section_break(body)
73
- body.p do |p|
74
- p.br **{ clear: "all", class: "section" }
75
- end
76
- end
77
-
78
- def page_break(out)
79
- out.p do |p|
80
- p.br **{ clear: "all",
81
- style: "mso-special-character:line-break;"\
82
- "page-break-before:always" }
83
- end
84
- end
85
-
86
- def pagebreak_parse(node, out)
87
- return page_break(out) if node["orientation"].nil?
88
- out.p do |p|
89
- p.br **{clear: "all", class: "section",
90
- orientation: node["orientation"] }
91
- end
92
- end
93
-
94
73
  WORD_DT_ATTRS = {class: @note ? "Note" : nil, align: "left",
95
74
  style: "margin-left:0pt;text-align:left;"}.freeze
96
75
 
@@ -174,10 +153,10 @@ module IsoDoc::WordFunction
174
153
  end
175
154
 
176
155
  def termnote_parse(node, out)
177
- out.div **{ class: "Note" } do |div|
156
+ out.div **note_attrs(node) do |div|
178
157
  first = node.first_element_child
179
158
  div.p **{ class: "Note" } do |p|
180
- anchor = get_anchors[node['id']]
159
+ anchor = @xrefs.get[node['id']]
181
160
  p << "#{anchor&.dig(:label) || '???'}: "
182
161
  para_then_remainder(first, node, p, div)
183
162
  end
@@ -185,41 +164,23 @@ module IsoDoc::WordFunction
185
164
  end
186
165
 
187
166
  def para_attrs(node)
188
- attrs = { class: para_class(node), id: node["id"] }
167
+ attrs = { class: para_class(node), id: node["id"], style: "" }
189
168
  unless node["align"].nil?
190
169
  attrs[:align] = node["align"] unless node["align"] == "justify"
191
- attrs[:style] = "text-align:#{node['align']}"
170
+ attrs[:style] += "text-align:#{node['align']};"
192
171
  end
172
+ attrs[:style] += "#{keep_style(node)}"
173
+ attrs[:style] = nil if attrs[:style].empty?
193
174
  attrs
194
175
  end
195
176
 
196
- def imgsrc(uri)
197
- return uri unless %r{^data:image/}.match uri
198
- save_dataimage(uri)
199
- end
200
-
201
- def image_parse(node, out, caption)
202
- attrs = { src: imgsrc(node["src"]),
203
- height: node["height"],
204
- alt: node["alt"],
205
- title: node["title"],
206
- width: node["width"] }
207
- out.img **attr_code(attrs)
208
- image_title_parse(out, caption)
209
- end
210
-
211
- def xref_parse(node, out)
212
- target = /#/.match(node["target"]) ? node["target"].sub(/#/, ".doc#") :
213
- "##{node["target"]}"
214
- out.a(**{ "href": target }) { |l| l << get_linkend(node) }
215
- end
216
-
217
177
  def example_table_attr(node)
218
178
  super.merge({
219
179
  style: "mso-table-lspace:15.0cm;margin-left:423.0pt;"\
220
180
  "mso-table-rspace:15.0cm;margin-right:423.0pt;"\
221
181
  "mso-table-anchor-horizontal:column;"\
222
- "mso-table-overlap:never;border-collapse:collapse;"
182
+ "mso-table-overlap:never;border-collapse:collapse;"\
183
+ "#{keep_style(node)}"
223
184
  })
224
185
  end
225
186
 
@@ -240,5 +201,10 @@ module IsoDoc::WordFunction
240
201
  node.children.each { |n| parse(n, li) }
241
202
  end
242
203
  end
204
+
205
+ def suffix_url(url)
206
+ return url if %r{^http[s]?://}.match(url)
207
+ url.sub(/#{File.extname(url)}$/, ".doc")
208
+ end
243
209
  end
244
210
  end
@@ -84,7 +84,7 @@ module IsoDoc::WordFunction
84
84
  return seen_footnote_parse(node, out, fn) if @seen_footnote.include?(fn)
85
85
  @fn_bookmarks[fn] = bookmarkid
86
86
  out.span **{style: "mso-bookmark:_Ref#{@fn_bookmarks[fn]}"} do |s|
87
- s.a **{ "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
87
+ s.a **{ "class": "FootnoteRef", "epub:type": "footnote", href: "#ftn#{fn}" } do |a|
88
88
  a.sup { |sup| sup << fn }
89
89
  end
90
90
  end
@@ -0,0 +1,75 @@
1
+ module IsoDoc::WordFunction
2
+ module Body
3
+ def section_break(body)
4
+ body.p do |p|
5
+ p.br **{ clear: "all", class: "section" }
6
+ end
7
+ end
8
+
9
+ def page_break(out)
10
+ out.p do |p|
11
+ p.br **{ clear: "all",
12
+ style: "mso-special-character:line-break;"\
13
+ "page-break-before:always" }
14
+ end
15
+ end
16
+
17
+ def pagebreak_parse(node, out)
18
+ return page_break(out) if node["orientation"].nil?
19
+ out.p do |p|
20
+ p.br **{clear: "all", class: "section",
21
+ orientation: node["orientation"] }
22
+ end
23
+ end
24
+
25
+ def imgsrc(node)
26
+ ret = svg_to_emf(node) and return ret
27
+ return node["src"] unless %r{^data:image/}.match node["src"]
28
+ save_dataimage(node["src"])
29
+ end
30
+
31
+ def image_parse(node, out, caption)
32
+ attrs = { src: imgsrc(node),
33
+ height: node["height"],
34
+ alt: node["alt"],
35
+ title: node["title"],
36
+ width: node["width"] }
37
+ out.img **attr_code(attrs)
38
+ image_title_parse(out, caption)
39
+ end
40
+
41
+ def svg_to_emf_filename(uri)
42
+ File.join(File.dirname(uri), File.basename(uri, ".*")) + ".emf"
43
+ end
44
+
45
+ def svg_to_emf(node)
46
+ return unless node["mimetype"] == "image/svg+xml"
47
+ uri = node["src"]
48
+ %r{^data:image/}.match(uri) and uri = save_dataimage(uri)
49
+ ret = svg_to_emf_filename(uri)
50
+ File.exists?(ret) and return ret
51
+ exe = inkscape_installed? or return nil
52
+ system %(#{exe} --export-type="emf" #{uri}) and
53
+ return ret
54
+ nil
55
+ end
56
+
57
+ def inkscape_installed?
58
+ cmd = "inkscape"
59
+ exts = ENV['PATHEXT'] ? ENV['PATHEXT'].split(';') : ['']
60
+ ENV['PATH'].split(File::PATH_SEPARATOR).each do |path|
61
+ exts.each do |ext|
62
+ exe = File.join(path, "#{cmd}#{ext}")
63
+ return exe if File.executable?(exe) && !File.directory?(exe)
64
+ end
65
+ end
66
+ nil
67
+ end
68
+
69
+ def xref_parse(node, out)
70
+ target = /#/.match(node["target"]) ? node["target"].sub(/#/, ".doc#") :
71
+ "##{node["target"]}"
72
+ out.a(**{ "href": target }) { |l| l << get_linkend(node) }
73
+ end
74
+ end
75
+ end
@@ -32,6 +32,7 @@ xmlns:m="http://schemas.microsoft.com/office/2004/12/omml">
32
32
  end
33
33
 
34
34
  def postprocess(result, filename, dir)
35
+ filename = filename.sub(/\.doc$/, "")
35
36
  header = generate_header(filename, dir)
36
37
  result = from_xhtml(cleanup(to_xhtml(textcleanup(result))))
37
38
  toWord(result, filename, dir, header)
@@ -33,12 +33,12 @@ module IsoDoc::WordFunction
33
33
  align: td["align"], style: style.gsub(/\n/, "") }
34
34
  end
35
35
 
36
- def make_table_attr(node)
36
+ def table_attrs(node)
37
37
  super.merge(attr_code({
38
38
  summary: node["summary"],
39
39
  width: node["width"],
40
40
  style: "mso-table-anchor-horizontal:column;"\
41
- "mso-table-overlap:never;border-spacing:0;border-width:1px;"
41
+ "mso-table-overlap:never;border-spacing:0;border-width:1px;#{keep_style(node)}"
42
42
  }))
43
43
  end
44
44
 
@@ -46,7 +46,7 @@ module IsoDoc::WordFunction
46
46
  @in_table = true
47
47
  table_title_parse(node, out)
48
48
  out.div **{ align: "center", class: "table_container" } do |div|
49
- div.table **make_table_attr(node) do |t|
49
+ div.table **table_attrs(node) do |t|
50
50
  thead_parse(node, t)
51
51
  tbody_parse(node, t)
52
52
  tfoot_parse(node, t)
@@ -0,0 +1,59 @@
1
+ require_relative "xref/xref_anchor"
2
+ require_relative "xref/xref_counter"
3
+ require_relative "xref/xref_gen_seq"
4
+ require_relative "xref/xref_gen"
5
+ require_relative "xref/xref_sect_gen"
6
+ require_relative "class_utils"
7
+
8
+ module IsoDoc
9
+ class Xref
10
+ include XrefGen::Anchor
11
+ include XrefGen::Blocks
12
+ include XrefGen::Sections
13
+
14
+ def initialize(lang, script, klass, labels, options = {})
15
+ @anchors = {}
16
+ @lang = lang
17
+ @script = script
18
+ @klass = klass
19
+ @labels = labels
20
+ @options = options
21
+ end
22
+
23
+ def get
24
+ @anchors
25
+ end
26
+
27
+ def anchor(id, lbl, warning = true)
28
+ return nil if id.nil? || id.empty?
29
+ unless @anchors[id]
30
+ if warning
31
+ @seen ||= Seen_Anchor.instance
32
+ @seen.seen(id) or warn "No label has been processed for ID #{id}"
33
+ @seen.add(id)
34
+ return "[#{id}]"
35
+ end
36
+ end
37
+ @anchors.dig(id, lbl)
38
+ end
39
+
40
+ # extract names for all anchors, xref and label
41
+ def parse(docxml)
42
+ initial_anchor_names(docxml)
43
+ back_anchor_names(docxml)
44
+ # preempt clause notes with all other types of note (ISO default)
45
+ note_anchor_names(docxml.xpath(ns("//table | //figure")))
46
+ note_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
47
+ example_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
48
+ list_anchor_names(docxml.xpath(ns(SECTIONS_XPATH)))
49
+ end
50
+
51
+ def ns(xpath)
52
+ Common::ns(xpath)
53
+ end
54
+
55
+ def l10n(a, lang = @lang, script = @script)
56
+ IsoDoc::Function::I18n::l10n(a, lang, script)
57
+ end
58
+ end
59
+ end
@@ -1,7 +1,7 @@
1
1
  require "singleton"
2
2
 
3
- module IsoDoc::Function
4
- module XrefGen
3
+ module IsoDoc::XrefGen
4
+ module Anchor
5
5
  class Seen_Anchor
6
6
  include Singleton
7
7
 
@@ -12,34 +12,23 @@ module IsoDoc::Function
12
12
  def seen(x)
13
13
  @seen.has_key?(x)
14
14
  end
15
-
15
+
16
16
  def add(x)
17
17
  @seen[x] = true
18
18
  end
19
19
  end
20
20
 
21
- @anchors = {}
21
+ def initialize()
22
+ @anchors = {}
23
+ end
22
24
 
23
25
  def get_anchors
24
26
  @anchors
25
27
  end
26
28
 
27
- def anchor(id, lbl, warning = true)
28
- return nil if id.nil? || id.empty?
29
- unless @anchors[id]
30
- if warning
31
- @seen ||= Seen_Anchor.instance
32
- @seen.seen(id) or warn "No label has been processed for ID #{id}"
33
- @seen.add(id)
34
- return "[#{id}]"
35
- end
36
- end
37
- @anchors.dig(id, lbl)
38
- end
39
-
40
29
  def anchor_struct_label(lbl, elem)
41
30
  case elem
42
- when @appendix_lbl then l10n("#{elem} #{lbl}")
31
+ when @labels["appendix"] then l10n("#{elem} #{lbl}")
43
32
  else
44
33
  lbl.to_s
45
34
  end
@@ -47,8 +36,8 @@ module IsoDoc::Function
47
36
 
48
37
  def anchor_struct_xref(lbl, elem)
49
38
  case elem
50
- when @formula_lbl then l10n("#{elem} (#{lbl})")
51
- when @inequality_lbl then l10n("#{elem} (#{lbl})")
39
+ when @labels["formula"] then l10n("#{elem} (#{lbl})")
40
+ when @labels["inequality"] then l10n("#{elem} (#{lbl})")
52
41
  else
53
42
  l10n("#{elem} #{lbl}")
54
43
  end
@@ -59,7 +48,7 @@ module IsoDoc::Function
59
48
  ret[:label] = unnumbered == "true" ? nil : anchor_struct_label(lbl, elem)
60
49
  ret[:xref] = anchor_struct_xref(unnumbered == "true" ? "(??)" : lbl, elem)
61
50
  ret[:xref].gsub!(/ $/, "")
62
- ret[:container] = get_clause_id(container) unless container.nil?
51
+ ret[:container] = @klass.get_clause_id(container) unless container.nil?
63
52
  ret[:type] = type
64
53
  ret
65
54
  end
@@ -0,0 +1,74 @@
1
+ require "roman-numerals"
2
+
3
+ module IsoDoc::XrefGen
4
+ class Counter
5
+ def initialize
6
+ @num = 0
7
+ @letter = ""
8
+ @subseq = ""
9
+ @letter_override = nil
10
+ @number_override = nil
11
+ end
12
+
13
+ def new_subseq_increment(node)
14
+ @subseq = node["subsequence"]
15
+ @num += 1
16
+ @letter = node["subsequence"] ? "a" : ""
17
+ if node["number"]
18
+ /^(?<n>\d*)(?<a>[a-z]*)$/ =~ node["number"]
19
+ if n || a
20
+ @letter_override = @letter = a if a
21
+ @number_override = @num = n.to_i if n
22
+ else
23
+ @letter_override = node["number"]
24
+ @letter = @letter_override if /^[a-z]$/.match(@letter_override)
25
+ end
26
+ end
27
+ end
28
+
29
+ def sequence_increment(node)
30
+ if node["number"]
31
+ @number_override = node["number"]
32
+ @num = @number_override.to_i if /^\d+$/.match(@number_override)
33
+ else
34
+ @num += 1
35
+ end
36
+ end
37
+
38
+ def subsequence_increment(node)
39
+ if node["number"]
40
+ @letter_override = node["number"]
41
+ @letter = @letter_override if /^[a-z]$/.match(@letter_override)
42
+ else
43
+ @letter = (@letter.ord + 1).chr.to_s
44
+ end
45
+ end
46
+
47
+ def increment(node)
48
+ return self if node["unnumbered"]
49
+ @letter_override = nil
50
+ @number_override = nil
51
+ if node["subsequence"] != @subseq
52
+ new_subseq_increment(node)
53
+ elsif @letter.empty?
54
+ sequence_increment(node)
55
+ else
56
+ subsequence_increment(node)
57
+ end
58
+ self
59
+ end
60
+
61
+ def print
62
+ "#{@number_override || @num}#{@letter_override || @letter}"
63
+ end
64
+
65
+ def listlabel(depth)
66
+ return @num.to_s if [2, 7].include? depth
67
+ return (96 + @num).chr.to_s if [1, 6].include? depth
68
+ return (64 + @num).chr.to_s if [4, 9].include? depth
69
+ return RomanNumerals.to_roman(@num).downcase if [3, 8].include? depth
70
+ return RomanNumerals.to_roman(@num).upcase if [5, 10].include? depth
71
+ return @num.to_s
72
+ end
73
+ end
74
+ end