html2doc 1.1.1 → 1.1.2

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: b92a72c9d0ee6005e38ab8de1f0cbc48455819955d34eb349fef1244504a9971
4
- data.tar.gz: 52b07e9c1720bc2bf7a7196e0f650c62b81bf534535795bb2a136f2e79829416
3
+ metadata.gz: 64215843fa5fb0918c0c24d6023f466b3602d36fe155ff49687f2a9e447e6681
4
+ data.tar.gz: 2fd0022ce58dd6106f27825b593b7d6c632913414699067cc70ba09936d47810
5
5
  SHA512:
6
- metadata.gz: af4b84183859fd83ac500c6c1ac28e76d49ec7fb48c8a4cf49bc3543909b913e78c8293918b34e21e9e68f2dc78a500824337677ae1aea54bcf7cbecebd9363d
7
- data.tar.gz: 9e0bd48390458ec57dcc9650643a9332a549148a1297b37962e37399453d95132e39c69cb94ea13fcc70f9d6bc273fccf742da61e6f4a3025710182df1d65144
6
+ metadata.gz: fc525ead15dbbdbfede252026ae44451cd5b45fe8225c70cdefcccafd3e5a903a32d8e6dfdb396f67c58d11e8b2c48a36562bf8e4f439181f99a8f49b3799202
7
+ data.tar.gz: 243b6c5efa00090e1248ad420d9641882fcc956fb8734333267459db9dbaffbf0742488164f4c72975ce4bb9b4e62458eb0c0d5dc03bb3cd7990863d77ba40a3
data/bin/html2doc CHANGED
@@ -24,5 +24,5 @@ Html2Doc.process(
24
24
  File.read(ARGV[0], encoding: "utf-8"),
25
25
  filename: ARGV[0].gsub(/\.html?$/, ""),
26
26
  stylesheet: options[:stylesheet],
27
- header: options[:header]
27
+ header: options[:header],
28
28
  )
data/bin/rspec CHANGED
@@ -1,5 +1,5 @@
1
1
  #!/usr/bin/env ruby
2
-
2
+
3
3
  # This file was generated by Bundler.
4
4
  #
5
5
  # The application 'rspec' is installed as part of a gem, and
data/html2doc.gemspec CHANGED
@@ -1,5 +1,4 @@
1
-
2
- lib = File.expand_path("../lib", __FILE__)
1
+ lib = File.expand_path("lib", __dir__)
3
2
  $LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
4
3
  require "html2doc/version"
5
4
 
@@ -16,8 +15,8 @@ Gem::Specification.new do |spec|
16
15
  This gem is in active development.
17
16
  DESCRIPTION
18
17
 
19
- spec.homepage = "https://github.com/metanorma/html2doc"
20
- spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
18
+ spec.homepage = "https://github.com/metanorma/html2doc"
19
+ spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
21
20
 
22
21
  spec.bindir = "bin"
23
22
  spec.require_paths = ["lib"]
@@ -25,14 +24,14 @@ Gem::Specification.new do |spec|
25
24
  spec.test_files = `git ls-files -- {spec}/*`.split("\n")
26
25
  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
27
26
 
27
+ spec.add_dependency "asciimath", "~> 2.0.2"
28
28
  spec.add_dependency "htmlentities", "~> 4.3.4"
29
29
  spec.add_dependency "image_size"
30
30
  spec.add_dependency "mime-types"
31
- spec.add_dependency "nokogiri", "~> 1.10.4"
31
+ spec.add_dependency "nokogiri", "~> 1.10"
32
+ spec.add_dependency "plane1converter", "~> 0.0.1"
32
33
  spec.add_dependency "thread_safe"
33
34
  spec.add_dependency "uuidtools"
34
- spec.add_dependency "asciimath", "~> 2.0.2"
35
- spec.add_dependency "plane1converter", "~> 0.0.1"
36
35
 
37
36
  spec.add_development_dependency "byebug", "~> 9.1"
38
37
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
@@ -40,8 +39,8 @@ Gem::Specification.new do |spec|
40
39
  spec.add_development_dependency "guard-rspec", "~> 4.7"
41
40
  spec.add_development_dependency "rake", "~> 12.0"
42
41
  spec.add_development_dependency "rspec", "~> 3.6"
42
+ spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
43
43
  spec.add_development_dependency "rubocop", "~> 1.5.2"
44
44
  spec.add_development_dependency "simplecov", "~> 0.15"
45
45
  spec.add_development_dependency "timecop", "~> 0.9"
46
- spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
47
46
  end
data/lib/html2doc.rb CHANGED
@@ -4,6 +4,3 @@ require_relative "html2doc/mime"
4
4
  require_relative "html2doc/notes"
5
5
  require_relative "html2doc/math"
6
6
  require_relative "html2doc/lists"
7
- #require_relative "asciimath/parser"
8
- #require_relative "asciimath/mathml"
9
- #require_relative "asciimath/html"
data/lib/html2doc/base.rb CHANGED
@@ -106,6 +106,7 @@ module Html2Doc
106
106
  doc.gsub!(%r{></o:lock>}, "/>")
107
107
  doc.gsub!(%r{></v:imagedata>}, "/>")
108
108
  doc.gsub!(%r{></w:wrap>}, "/>")
109
+ doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2")
109
110
  doc.gsub!(%r{&tab;|&amp;tab;},
110
111
  '<span style="mso-tab-count:1">&#xA0; </span>')
111
112
  doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
@@ -127,7 +128,7 @@ module Html2Doc
127
128
  <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
128
129
  XML
129
130
 
130
- def self.define_head1(docxml, dir)
131
+ def self.define_head1(docxml, _dir)
131
132
  docxml.xpath("//*[local-name() = 'head']").each do |h|
132
133
  h.children.first.add_previous_sibling <<~XML
133
134
  #{PRINT_VIEW}
@@ -147,7 +148,7 @@ module Html2Doc
147
148
  end
148
149
  end
149
150
 
150
- def self.stylesheet(filename, header_filename, fn)
151
+ def self.stylesheet(_filename, _header_filename, fn)
151
152
  (fn.nil? || fn.empty?) and
152
153
  fn = File.join(File.dirname(__FILE__), "wordstyle.css")
153
154
  stylesheet = File.read(fn, encoding: "UTF-8")
@@ -2,30 +2,29 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "uuidtools"
6
5
 
7
6
  module Html2Doc
8
- def self.style_list(li, level, liststyle, listnumber)
7
+ def self.style_list(elem, level, liststyle, listnumber)
9
8
  return unless liststyle
10
9
 
11
- if li["style"]
12
- li["style"] += ";"
10
+ if elem["style"]
11
+ elem["style"] += ";"
13
12
  else
14
- li["style"] = ""
13
+ elem["style"] = ""
15
14
  end
16
- li["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
15
+ elem["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
17
16
  end
18
17
 
19
- def self.list_add1(li, liststyles, listtype, level)
18
+ def self.list_add1(elem, liststyles, listtype, level)
20
19
  if %i[ul ol].include? listtype
21
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
20
+ list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
22
21
  liststyles, :ul, level + 1)
23
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
22
+ list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
24
23
  liststyles, :ol, level + 1)
25
24
  else
26
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
25
+ list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
27
26
  liststyles, listtype, level + 1)
28
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
27
+ list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
29
28
  liststyles, listtype, level + 1)
30
29
  end
31
30
  end
@@ -47,45 +46,43 @@ module Html2Doc
47
46
  end
48
47
  end
49
48
 
50
- def self.list2para(u)
51
- return if u.xpath("./li").empty?
49
+ def self.list2para(list)
50
+ return if list.xpath("./li").empty?
52
51
 
53
- u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
54
- u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
55
- u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
56
- u.xpath("./li").each do |l|
52
+ list.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
53
+ list.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
54
+ list.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
55
+ list.xpath("./li").each do |l|
57
56
  l.name = "p"
58
57
  l["class"] ||= "MsoListParagraphCxSpMiddle"
59
58
  l&.first_element_child&.name == "p" and
60
59
  l.first_element_child.replace(l.first_element_child.children)
61
60
  end
62
- u.replace(u.children)
61
+ list.replace(list.children)
63
62
  end
64
63
 
65
64
  TOPLIST = "[not(ancestor::ul) and not(ancestor::ol)]".freeze
66
65
 
67
- def self.lists1(docxml, liststyles, k)
68
- case k
66
+ def self.lists1(docxml, liststyles, style)
67
+ case style
69
68
  when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
70
69
  liststyles, :ul, 1)
71
70
  when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
72
71
  liststyles, :ol, 1)
73
72
  else
74
- list_add(docxml.xpath("//ol[@class = '#{k}']#{TOPLIST} | "\
75
- "//ul[@class = '#{k}']#{TOPLIST}"),
76
- liststyles, k, 1)
73
+ list_add(docxml.xpath("//ol[@class = '#{style}']#{TOPLIST} | "\
74
+ "//ul[@class = '#{style}']#{TOPLIST}"),
75
+ liststyles, style, 1)
77
76
  end
78
77
  end
79
78
 
80
79
  def self.lists_unstyled(docxml, liststyles)
81
- if liststyles.has_key?(:ul)
80
+ liststyles.has_key?(:ul) and
82
81
  list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
83
82
  liststyles, :ul, 1)
84
- end
85
- if liststyles.has_key?(:ol)
83
+ liststyles.has_key?(:ol) and
86
84
  list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
87
85
  liststyles, :ul, 1)
88
- end
89
86
  docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
90
87
  l.delete("seen")
91
88
  end
data/lib/html2doc/math.rb CHANGED
@@ -25,13 +25,18 @@ module Html2Doc
25
25
 
26
26
  m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
27
27
  m.each_slice(4).map.with_index do |(*a), i|
28
- i % 500 == 0 && m.size > 1000 && i > 0 and
29
- warn "MathML #{i} of #{(m.size / 4).floor}"
28
+ progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
30
29
  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
31
30
  a.size > 1 ? a[0] + a[2] : a[0]
32
31
  end.join
33
32
  end
34
33
 
34
+ def self.progress_conv(idx, step, total, threshold, msg)
35
+ return unless (idx % step).zero? && total > threshold && idx.positive?
36
+
37
+ warn "#{msg} #{idx} of #{total}"
38
+ end
39
+
35
40
  def self.unwrap_accents(doc)
36
41
  doc.xpath("//*[@accent = 'true']").each do |x|
37
42
  x.elements.length > 1 or next
@@ -69,18 +74,20 @@ module Html2Doc
69
74
  math
70
75
  end
71
76
 
77
+ HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
78
+
72
79
  def self.unitalic(math)
73
80
  math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
74
- x.wrap("<span style='font-style:normal;'></span>")
81
+ x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
75
82
  end
76
83
  math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
77
- x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
84
+ x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
78
85
  end
79
86
  math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
80
- x.wrap("<span class='nostem'><em></em></span>")
87
+ x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
81
88
  end
82
89
  math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
83
- x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
90
+ x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
84
91
  end
85
92
  math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
86
93
  to_plane1(x, :monospace)
@@ -128,20 +135,30 @@ module Html2Doc
128
135
  docnamespaces = docxml.collect_namespaces
129
136
  m = docxml.xpath("//*[local-name() = 'math']")
130
137
  m.each_with_index do |x, i|
131
- i % 100 == 0 && m.size > 500 && i > 0 and
132
- warn "Math OOXML #{i} of #{m.size}"
133
- element = ooxml_cleanup(x, docnamespaces)
134
- doc = Nokogiri::XML::Document::new
135
- doc.root = element
136
- ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
137
- .gsub(/<\?[^>]+>\s*/, "")
138
- .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
139
- .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
140
- ooxml = uncenter(x, ooxml)
141
- x.swap(ooxml)
138
+ progress_conv(i, 100, m.size, 500, "Math OOXML")
139
+ mathml_to_ooml1(x, docnamespaces)
142
140
  end
143
141
  end
144
142
 
143
+ # We need span and em not to be namespaced. Word can't deal with explicit
144
+ # namespaces.
145
+ # We will end up stripping them out again under Nokogiri 1.11, which correctly
146
+ # insists on inheriting namespace from parent.
147
+ def self.ooml_clean(xml)
148
+ xml.to_s
149
+ .gsub(/<\?[^>]+>\s*/, "")
150
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
151
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
152
+ end
153
+
154
+ def self.mathml_to_ooml1(xml, docnamespaces)
155
+ doc = Nokogiri::XML::Document::new
156
+ doc.root = ooxml_cleanup(xml, docnamespaces)
157
+ ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
158
+ ooxml = uncenter(xml, ooxml)
159
+ xml.swap(ooxml)
160
+ end
161
+
145
162
  # escape space as &#x32;; we are removing any spaces generated by
146
163
  # XML indentation
147
164
  def self.esc_space(xml)
@@ -157,7 +174,7 @@ module Html2Doc
157
174
  # left/right if parent is so tagged
158
175
  def self.uncenter(math, ooxml)
159
176
  alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
160
- "local-name() = 'div' or local-name() = 'td']/@style")
177
+ "local-name() = 'div' or local-name() = 'td']/@style")
161
178
  return ooxml unless alignnode && (math.next == nil && math.previous == nil)
162
179
 
163
180
  %w(left right).each do |dir|
data/lib/html2doc/mime.rb CHANGED
@@ -43,7 +43,7 @@ module Html2Doc
43
43
  def self.mime_type(item)
44
44
  types = MIME::Types.type_for(item)
45
45
  type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
46
- type = type + ' charset="utf-8"' if /^text/.match(type) && types
46
+ type = %(#{type} charset="utf-8") if /^text/.match(type) && types
47
47
  type
48
48
  end
49
49
 
@@ -77,14 +77,14 @@ module Html2Doc
77
77
  end
78
78
 
79
79
  # max width for Word document is 400, max height is 680
80
- def self.image_resize(i, path, maxheight, maxwidth)
81
- realSize = ImageSize.path(path).size
82
- s = [i["width"].to_i, i["height"].to_i]
83
- s = realSize if s[0].zero? && s[1].zero?
84
- return [nil, nil] if realSize.nil? || realSize[0].nil? || realSize[1].nil?
85
-
86
- s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
87
- s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
80
+ def self.image_resize(img, path, maxheight, maxwidth)
81
+ realsize = ImageSize.path(path).size
82
+ s = [img["width"].to_i, img["height"].to_i]
83
+ s = realsize if s[0].zero? && s[1].zero?
84
+ return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
85
+
86
+ s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
87
+ s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
88
88
  s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
89
89
  s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
90
90
  s
@@ -100,16 +100,18 @@ module Html2Doc
100
100
  warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
101
101
  end
102
102
 
103
+ def self.localname(src, localdir)
104
+ %r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
105
+ end
106
+
103
107
  # only processes locally stored images
104
108
  def self.image_cleanup(docxml, dir, localdir)
105
109
  docxml.traverse do |i|
106
110
  next unless i.element? && %w(img v:imagedata).include?(i.name)
107
- #warnsvg(i["src"])
108
- next if /^http/.match i["src"]
111
+ next if /^http/.match? i["src"]
109
112
  next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
110
113
 
111
- local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
112
- File.join(localdir, i["src"])
114
+ local_filename = localname(i["src"], localdir)
113
115
  new_filename = "#{mkuuid}#{File.extname(i['src'])}"
114
116
  FileUtils.cp local_filename, File.join(dir, new_filename)
115
117
  i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
@@ -130,12 +132,9 @@ module Html2Doc
130
132
  if a.size == 2 && !(/ src="https?:/.match a[1]) &&
131
133
  !(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
132
134
  m = / src=['"](?<src>[^"']+)['"]/.match a[1]
133
- #warnsvg(m[:src])
134
135
  m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
135
136
  new_filename = "#{mkuuid}.#{m2[:suffix]}"
136
- old_filename = %r{^([A-Z]:)?/}.match?(m[:src]) ? m[:src] :
137
- File.join(localdir, m[:src])
138
- FileUtils.cp old_filename, File.join(dir, new_filename)
137
+ FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
139
138
  a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
140
139
  end
141
140
  a.join
@@ -56,19 +56,22 @@ module Html2Doc
56
56
 
57
57
  set_footnote_link_attrs(elem, idx)
58
58
  if elem.at("./span[@class = 'MsoFootnoteReference']")
59
- elem.children.each do |c|
60
- if c.name == "span" && c["class"] == "MsoFootnoteReference"
61
- c.replace(FN)
62
- else
63
- c.wrap("<span class='MsoFootnoteReference'></span>")
64
- end
65
- end
66
- else
67
- elem.children = FN
59
+ process_footnote_link1(elem)
60
+ else elem.children = FN
68
61
  end
69
62
  footnote << transform_footnote_text(note)
70
63
  end
71
64
 
65
+ def self.process_footnote_link1(elem)
66
+ elem.children.each do |c|
67
+ if c.name == "span" && c["class"] == "MsoFootnoteReference"
68
+ c.replace(FN)
69
+ else
70
+ c.wrap("<span class='MsoFootnoteReference'></span>")
71
+ end
72
+ end
73
+ end
74
+
72
75
  def self.transform_footnote_text(note)
73
76
  note["id"] = ""
74
77
  note.xpath(".//div").each { |div| div.replace(div.children) }
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "1.1.1".freeze
2
+ VERSION = "1.1.2".freeze
3
3
  end
@@ -64,11 +64,11 @@ WORD_HDR_END = <<~HDR.freeze
64
64
  </head>
65
65
  HDR
66
66
 
67
- def word_body(x, fn)
67
+ def word_body(xml, footnote)
68
68
  <<~BODY
69
69
  <body>
70
- #{x}
71
- #{fn}</body></html>
70
+ #{xml}
71
+ #{footnote}</body></html>
72
72
  BODY
73
73
  end
74
74
 
@@ -317,26 +317,36 @@ RSpec.describe Html2Doc do
317
317
  end
318
318
 
319
319
  it "processes a stylesheet in an HTML document with an empty head" do
320
- Html2Doc.process(html_input_empty_head(""), filename: "test", stylesheet: "lib/html2doc/wordstyle.css")
320
+ Html2Doc.process(html_input_empty_head(""),
321
+ filename: "test", stylesheet: "lib/html2doc/wordstyle.css")
322
+ word_hdr_end = WORD_HDR_END
323
+ .sub(%(<meta name="Originator" content="Me"/>\n), "")
324
+ .sub("</style>\n</head>", "</style></head>")
321
325
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
322
326
  .to match_fuzzy(<<~OUTPUT)
323
327
  #{WORD_HDR.sub('<title>blank</title>', '')}
324
328
  #{DEFAULT_STYLESHEET}
325
- #{WORD_HDR_END.sub('<meta name="Originator" content="Me"/>' + "\n", '').sub("</style>\n</head>", '</style></head>')}
329
+ #{word_hdr_end}
326
330
  #{word_body('', '<div style="mso-element:footnote-list"/>')} #{WORD_FTR1}
327
331
  OUTPUT
328
332
  end
329
333
 
330
334
  it "processes a header" do
331
- Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header.html")
335
+ Html2Doc.process(html_input(""),
336
+ filename: "test", header_file: "spec/header.html")
332
337
  html = guid_clean(File.read("test.doc", encoding: "utf-8"))
333
- hdr = Base64.decode64(html.sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}, "")
334
- .sub(%r{^.*Content-Type: text/html charset="utf-8"}m, "")
335
- .sub(%r{------=_NextPart_--.*$}m, "")).force_encoding("UTF-8")
338
+ hdr = Base64.decode64(
339
+ html
340
+ .sub(%r{^.*Content-Location: file:///C:/Doc/test_files/header.html}, "")
341
+ .sub(%r{^.*Content-Type: text/html charset="utf-8"}m, "")
342
+ .sub(%r{------=_NextPart_--.*$}m, ""),
343
+ ).force_encoding("UTF-8")
336
344
  # expect(hdr.gsub(/\xa0/, " ")).to match_fuzzy(HEADERHTML)
337
345
  expect(HTMLEntities.new.encode(hdr, :hexadecimal)
338
- .gsub(/&#x3c;/, "<").gsub(/&#x3e;/, ">").gsub(/&#x27;/, "'").gsub(/&#x22;/, '"')
339
- .gsub(/&#xd;/, "&#xa;").gsub(/&#xa;/, "\n")).to match_fuzzy(HEADERHTML)
346
+ .gsub(/&#x3c;/, "<").gsub(/&#x3e;/, ">")
347
+ .gsub(/&#x27;/, "'").gsub(/&#x22;/, '"')
348
+ .gsub(/&#xd;/, "&#xa;").gsub(/&#xa;/, "\n"))
349
+ .to match_fuzzy(HEADERHTML)
340
350
  expect(html.sub(%r{Content-ID: <header.html>.*$}m, ""))
341
351
  .to match_fuzzy(<<~OUTPUT)
342
352
  #{WORD_HDR} #{DEFAULT_STYLESHEET.gsub(/url\("[^"]+"\)/, 'url(cid:header.html)')}
@@ -345,7 +355,8 @@ RSpec.describe Html2Doc do
345
355
  end
346
356
 
347
357
  it "processes a header with an image" do
348
- Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
358
+ Html2Doc.process(html_input(""),
359
+ filename: "test", header_file: "spec/header_img.html")
349
360
  doc = guid_clean(File.read("test.doc", encoding: "utf-8"))
350
361
  expect(doc).to match(%r{Content-Type: image/png})
351
362
  expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB})
@@ -354,9 +365,13 @@ RSpec.describe Html2Doc do
354
365
  it "processes a header with an image with absolute path" do
355
366
  doc = File.read("spec/header_img.html", encoding: "utf-8")
356
367
  File.open("spec/header_img1.html", "w:UTF-8") do |f|
357
- f.write doc.sub(%r{spec/19160-6.png}, File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png")))
368
+ f.write(
369
+ doc.sub(%r{spec/19160-6.png},
370
+ File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png"))),
371
+ )
358
372
  end
359
- Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img1.html")
373
+ Html2Doc.process(html_input(""),
374
+ filename: "test", header_file: "spec/header_img1.html")
360
375
  doc = guid_clean(File.read("test.doc", encoding: "utf-8"))
361
376
  expect(doc).to match(%r{Content-Type: image/png})
362
377
  expect(doc).to match(%r{iVBORw0KGgoAAAANSUhEUgAAA5cAAAN7CAYAAADRE24cAAAgAElEQVR4XuydB5gUxdaGC65gTogB})
@@ -375,43 +390,46 @@ RSpec.describe Html2Doc do
375
390
  end
376
391
 
377
392
  it "processes AsciiMath" do
378
- Html2Doc.process(html_input(%[<div>{{sum_(i=1)^n i^3=((n(n+1))/2)^2 text("integer"))}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
393
+ Html2Doc.process(html_input(%[<div>{{sum_(i=1)^n i^3=((n(n+1))/2)^2 text("integer"))}}</div>]),
394
+ filename: "test", asciimathdelims: ["{{", "}}"])
379
395
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
380
396
  .to match_fuzzy(<<~OUTPUT)
381
397
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
382
398
  #{word_body(%{
383
- <div><m:oMath>
384
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>"integer"</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>)</m:t></m:r></span>
385
- </m:oMath>
386
- </div>}, '<div style="mso-element:footnote-list"/>')}
399
+ <div><m:oMath>
400
+ <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>"integer"</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>)</m:t></m:r></span>
401
+ </m:oMath>
402
+ </div>}, '<div style="mso-element:footnote-list"/>')}
387
403
  #{WORD_FTR1}
388
404
  OUTPUT
389
405
  end
390
406
 
391
407
  it "processes mstyle" do
392
- Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
408
+ Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}</div>]),
409
+ filename: "test", asciimathdelims: ["{{", "}}"])
393
410
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
394
411
  .to match_fuzzy(<<~OUTPUT)
395
412
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
396
413
  #{word_body(%{
397
- <div><m:oMath>
398
- <span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x2212;</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>BB</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D539;&#x1D539;&#x1D539;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>&#x1D49E;&#x1D49E;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D4D1;&#x1D4D2;&#x1D4D2;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="monospace"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D683;&#x1D683;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D509;&#x211C;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D56D;&#x1D571;&#x1D57D;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D5B2;&#x1D5A5;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D5D5;&#x1D5E6;&#x1D5D9;&#x1D770;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr></m:rPr><m:t>&#x1D5B2;&#x1D5A5;&#x1D5A8;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="bi"></m:sty></m:rPr><m:t>&#x1D64E;&#x1D641;&#x1D63D;&#x1D644;&#x1D7AA;</m:t></m:r><span class="nostem" style="font-weight:bold;"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="bi"></m:sty></m:rPr><m:t>BII</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>II</m:t></m:r></span>
399
- </m:oMath>
400
- </div>}, '<div style="mso-element:footnote-list"/>')}
414
+ <div><m:oMath>
415
+ <span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x2212;</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>BB</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D539;&#x1D539;&#x1D539;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>&#x1D49E;&#x1D49E;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D4D1;&#x1D4D2;&#x1D4D2;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="monospace"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D683;&#x1D683;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D509;&#x211C;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D56D;&#x1D571;&#x1D57D;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D5B2;&#x1D5A5;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D5D5;&#x1D5E6;&#x1D5D9;&#x1D770;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr></m:rPr><m:t>&#x1D5B2;&#x1D5A5;&#x1D5A8;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="bi"></m:sty></m:rPr><m:t>&#x1D64E;&#x1D641;&#x1D63D;&#x1D644;&#x1D7AA;</m:t></m:r><span class="nostem" style="font-weight:bold;"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="bi"></m:sty></m:rPr><m:t>BII</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>II</m:t></m:r></span>
416
+ </m:oMath>
417
+ </div>}, '<div style="mso-element:footnote-list"/>')}
401
418
  #{WORD_FTR1}
402
419
  OUTPUT
403
420
  end
404
421
 
405
422
  it "processes spaces in AsciiMath" do
406
- Html2Doc.process(html_input(%[<div>{{text " integer ")}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
423
+ Html2Doc.process(html_input(%[<div>{{text " integer ")}}</div>]),
424
+ filename: "test", asciimathdelims: ["{{", "}}"])
407
425
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
408
426
  .to match_fuzzy(<<~OUTPUT)
409
427
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
410
428
  #{word_body('
411
- <div><m:oMath>
412
- <m:r><m:t>text</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>&#xA0;integer&#xA0;</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>)</m:t></m:r></span>
413
- </m:oMath>
414
- </div>', '<div style="mso-element:footnote-list"/>')}
429
+ <div><m:oMath>
430
+ <m:r><m:t>text</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>&#xA0;integer&#xA0;</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>)</m:t></m:r></span>
431
+ </m:oMath>
432
+ </div>', '<div style="mso-element:footnote-list"/>')}
415
433
  #{WORD_FTR1}
416
434
  OUTPUT
417
435
  end
@@ -419,14 +437,15 @@ RSpec.describe Html2Doc do
419
437
  it "processes spaces in MathML mtext" do
420
438
  Html2Doc.process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
421
439
  <mrow><mi>H</mi><mtext> original </mtext><mi>J</mi></mrow>
422
- </math></div>"), filename: "test", asciimathdelims: ["{{", "}}"])
440
+ </math></div>"),
441
+ filename: "test", asciimathdelims: ["{{", "}}"])
423
442
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
424
443
  .to match_fuzzy(<<~OUTPUT)
425
444
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
426
445
  #{word_body('<div><m:oMath>
427
- <m:r><m:t>H</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>&#xA0;original&#xA0;</m:t></m:r><m:r><m:t>J</m:t></m:r>
428
- </m:oMath>
429
- </div>', '<div style="mso-element:footnote-list"/>')}
446
+ <m:r><m:t>H</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>&#xA0;original&#xA0;</m:t></m:r><m:r><m:t>J</m:t></m:r>
447
+ </m:oMath>
448
+ </div>', '<div style="mso-element:footnote-list"/>')}
430
449
  #{WORD_FTR1}
431
450
  OUTPUT
432
451
  end
@@ -439,58 +458,67 @@ RSpec.describe Html2Doc do
439
458
  .to match_fuzzy(<<~OUTPUT)
440
459
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
441
460
  #{word_body('<div><m:oMath>
442
- <m:acc><m:accPr><m:chr m:val="^"></m:chr></m:accPr><m:e><m:r><m:t>p</m:t></m:r></m:e></m:acc>
443
- </m:oMath>
444
- </div>', '<div style="mso-element:footnote-list"/>')}
461
+ <m:acc><m:accPr><m:chr m:val="^"></m:chr></m:accPr><m:e><m:r><m:t>p</m:t></m:r></m:e></m:acc>
462
+ </m:oMath>
463
+ </div>', '<div style="mso-element:footnote-list"/>')}
445
464
  #{WORD_FTR1}
446
465
  OUTPUT
447
466
  end
448
467
 
449
468
  it "left-aligns AsciiMath" do
450
- Html2Doc.process(html_input("<div style='text-align:left;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"), filename: "test", asciimathdelims: ["{{", "}}"])
469
+ Html2Doc.process(html_input("<div style='text-align:left;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"),
470
+ filename: "test", asciimathdelims: ["{{", "}}"])
451
471
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
452
472
  .to match_fuzzy(<<~OUTPUT)
453
473
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
454
474
  #{word_body(%{
455
- <div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"/></m:oMathParaPr><m:oMath>
456
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
457
- </m:oMath>
458
- </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
475
+ <div style="text-align:left;"><m:oMathPara><m:oMathParaPr><m:jc m:val="left"/></m:oMathParaPr><m:oMath>
476
+ <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
477
+ </m:oMath>
478
+ </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
459
479
  #{WORD_FTR1}
460
480
  OUTPUT
461
481
  end
462
482
 
463
483
  it "right-aligns AsciiMath" do
464
- Html2Doc.process(html_input("<div style='text-align:right;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"), filename: "test", asciimathdelims: ["{{", "}}"])
484
+ Html2Doc.process(html_input("<div style='text-align:right;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"),
485
+ filename: "test", asciimathdelims: ["{{", "}}"])
465
486
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
466
487
  .to match_fuzzy(<<~OUTPUT)
467
488
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
468
489
  #{word_body(%{
469
- <div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"/></m:oMathParaPr><m:oMath>
470
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
471
- </m:oMath>
472
- </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
490
+ <div style="text-align:right;"><m:oMathPara><m:oMathParaPr><m:jc m:val="right"/></m:oMathParaPr><m:oMath>
491
+ <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=1</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>i</m:t></m:r></m:e><m:sup><m:r><m:t>3</m:t></m:r></m:sup></m:sSup></m:e></m:nary><span style="font-style:normal;"><m:r><m:rPr><m:sty m:val="p"></m:sty></m:rPr><m:t>=</m:t></m:r></span><m:sSup><m:e><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:f><m:fPr><m:type m:val="bar"></m:type></m:fPr><m:num><m:r><m:t>n</m:t></m:r><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:r><m:t>n+1</m:t></m:r></m:e></m:d></m:num><m:den><m:r><m:t>2</m:t></m:r></m:den></m:f></m:e></m:d></m:e><m:sup><m:r><m:t>2</m:t></m:r></m:sup></m:sSup>
492
+ </m:oMath>
493
+ </m:oMathPara></div>}, '<div style="mso-element:footnote-list"/>')}
473
494
  #{WORD_FTR1}
474
495
  OUTPUT
475
496
  end
476
497
 
477
498
  it "raises error in processing of broken AsciiMath" do
478
499
  begin
479
- expect { Html2Doc.process(html_input(%[<div style='text-align:right;'>{{u_c = 6.6"unitsml(kHz)}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"]) }.to output('parsing: u_c = 6.6"unitsml(kHz)').to_stderr
500
+ expect do
501
+ Html2Doc.process(html_input(%[<div style='text-align:right;'>{{u_c = 6.6"unitsml(kHz)}}</div>]),
502
+ filename: "test", asciimathdelims: ["{{", "}}"])
503
+ end.to output('parsing: u_c = 6.6"unitsml(kHz)').to_stderr
480
504
  rescue StandardError
481
505
  end
482
- expect { Html2Doc.process(html_input(%[<div style='text-align:right;'>{{u_c = 6.6"unitsml(kHz)}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"]) }.to raise_error(StandardError)
506
+ expect do
507
+ Html2Doc.process(html_input(%[<div style='text-align:right;'>{{u_c = 6.6"unitsml(kHz)}}</div>]),
508
+ filename: "test", asciimathdelims: ["{{", "}}"])
509
+ end.to raise_error(StandardError)
483
510
  end
484
511
 
485
512
  it "wraps msup after munderover in MathML" do
486
513
  Html2Doc.process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
487
- <munderover><mo>&#x2211;</mo><mrow><mi>i</mi><mo>=</mo><mn>0</mn></mrow><mrow><mi>n</mi></mrow></munderover><msup><mn>2</mn><mrow><mi>i</mi></mrow></msup></math></div>"), filename: "test", asciimathdelims: ["{{", "}}"])
514
+ <munderover><mo>&#x2211;</mo><mrow><mi>i</mi><mo>=</mo><mn>0</mn></mrow><mrow><mi>n</mi></mrow></munderover><msup><mn>2</mn><mrow><mi>i</mi></mrow></msup></math></div>"),
515
+ filename: "test", asciimathdelims: ["{{", "}}"])
488
516
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
489
517
  .to match_fuzzy(<<~OUTPUT)
490
518
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
491
519
  #{word_body('<div><m:oMath>
492
- <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=0</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>2</m:t></m:r></m:e><m:sup><m:r><m:t>i</m:t></m:r></m:sup></m:sSup></m:e></m:nary></m:oMath>
493
- </div>', '<div style="mso-element:footnote-list"/>')}
520
+ <m:nary><m:naryPr><m:chr m:val="&#x2211;"></m:chr><m:limLoc m:val="undOvr"></m:limLoc><m:grow m:val="on"></m:grow><m:subHide m:val="off"></m:subHide><m:supHide m:val="off"></m:supHide></m:naryPr><m:sub><m:r><m:t>i=0</m:t></m:r></m:sub><m:sup><m:r><m:t>n</m:t></m:r></m:sup><m:e><m:sSup><m:e><m:r><m:t>2</m:t></m:r></m:e><m:sup><m:r><m:t>i</m:t></m:r></m:sup></m:sSup></m:e></m:nary></m:oMath>
521
+ </div>', '<div style="mso-element:footnote-list"/>')}
494
522
  #{WORD_FTR1}
495
523
  OUTPUT
496
524
  end
@@ -573,33 +601,43 @@ RSpec.describe Html2Doc do
573
601
 
574
602
  it "resizes images with missing or auto sizes" do
575
603
  image = { "src" => "spec/19160-8.jpg" }
576
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100]
604
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
605
+ .to eq [30, 100]
577
606
  image["width"] = "20"
578
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [20, 65]
607
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
608
+ .to eq [20, 65]
579
609
  image.delete("width")
580
610
  image["height"] = "50"
581
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [15, 50]
611
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
612
+ .to eq [15, 50]
582
613
  image.delete("height")
583
614
  image["width"] = "500"
584
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100]
615
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
616
+ .to eq [30, 100]
585
617
  image.delete("width")
586
618
  image["height"] = "500"
587
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100]
619
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
620
+ .to eq [30, 100]
588
621
  image["width"] = "20"
589
622
  image["height"] = "auto"
590
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [20, 65]
623
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
624
+ .to eq [20, 65]
591
625
  image["width"] = "auto"
592
626
  image["height"] = "50"
593
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [15, 50]
627
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
628
+ .to eq [15, 50]
594
629
  image["width"] = "500"
595
630
  image["height"] = "auto"
596
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100]
631
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
632
+ .to eq [30, 100]
597
633
  image["width"] = "auto"
598
634
  image["height"] = "500"
599
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100]
635
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
636
+ .to eq [30, 100]
600
637
  image["width"] = "auto"
601
638
  image["height"] = "auto"
602
- expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100)).to eq [30, 100]
639
+ expect(Html2Doc.image_resize(image, "spec/19160-8.jpg", 100, 100))
640
+ .to eq [30, 100]
603
641
  end
604
642
 
605
643
  it "does not move images if they are external URLs" do
@@ -627,7 +665,8 @@ RSpec.describe Html2Doc do
627
665
 
628
666
  # it "warns about SVG" do
629
667
  # simple_body = '<img src="https://example.com/19160-6.svg">'
630
- # expect{ Html2Doc.process(html_input(simple_body), filename: "test") }.to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr
668
+ # expect{ Html2Doc.process(html_input(simple_body), filename: "test") }
669
+ # .to output("https://example.com/19160-6.svg: SVG not supported\n").to_stderr
631
670
  # end
632
671
 
633
672
  it "processes epub:type footnotes" do
@@ -638,15 +677,15 @@ RSpec.describe Html2Doc do
638
677
  Html2Doc.process(html_input(simple_body), filename: "test")
639
678
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
640
679
  .to match_fuzzy(<<~OUTPUT)
641
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
642
- #{word_body('<div>This is a very simple
643
- document<a epub:type="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a epub:type="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
644
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
645
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
646
- <div style="mso-element:footnote" id="ftn2">
647
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
648
- </div>')}
649
- #{WORD_FTR1}
680
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
681
+ #{word_body('<div>This is a very simple
682
+ document<a epub:type="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a epub:type="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
683
+ '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
684
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
685
+ <div style="mso-element:footnote" id="ftn2">
686
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
687
+ </div>')}
688
+ #{WORD_FTR1}
650
689
  OUTPUT
651
690
  end
652
691
 
@@ -658,15 +697,15 @@ RSpec.describe Html2Doc do
658
697
  Html2Doc.process(html_input(simple_body), filename: "test")
659
698
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
660
699
  .to match_fuzzy(<<~OUTPUT)
661
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
662
- #{word_body('<div>This is a very simple
663
- document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
664
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
665
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
666
- <div style="mso-element:footnote" id="ftn2">
667
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
668
- </div>')}
669
- #{WORD_FTR1}
700
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
701
+ #{word_body('<div>This is a very simple
702
+ document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
703
+ '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
704
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
705
+ <div style="mso-element:footnote" id="ftn2">
706
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
707
+ </div>')}
708
+ #{WORD_FTR1}
670
709
  OUTPUT
671
710
  end
672
711
 
@@ -678,15 +717,15 @@ RSpec.describe Html2Doc do
678
717
  Html2Doc.process(html_input(simple_body), filename: "test")
679
718
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
680
719
  .to match_fuzzy(<<~OUTPUT)
681
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
682
- #{word_body('<div>This is a very simple
683
- document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
684
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
685
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a>Footnote</p></div>
686
- <div style="mso-element:footnote" id="ftn2">
687
- <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
688
- </div>')}
689
- #{WORD_FTR1}
720
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
721
+ #{word_body('<div>This is a very simple
722
+ document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
723
+ '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
724
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference">(</span><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span><span class="MsoFootnoteReference">)</span></a>Footnote</p></div>
725
+ <div style="mso-element:footnote" id="ftn2">
726
+ <p id="" class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
727
+ </div>')}
728
+ #{WORD_FTR1}
690
729
  OUTPUT
691
730
  end
692
731
 
@@ -698,15 +737,15 @@ RSpec.describe Html2Doc do
698
737
  Html2Doc.process(html_input(simple_body), filename: "test")
699
738
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
700
739
  .to match_fuzzy(<<~OUTPUT)
701
- #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
702
- #{word_body('<div>This is a very simple
703
- document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
704
- '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
705
- <p class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
706
- <div style="mso-element:footnote" id="ftn2">
707
- <p class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
708
- </div>')}
709
- #{WORD_FTR1}
740
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
741
+ #{word_body('<div>This is a very simple
742
+ document<a class="footnote" href="#_ftn1" style="mso-footnote-id:ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a> allegedly<a class="footnote" href="#_ftn2" style="mso-footnote-id:ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a></div>',
743
+ '<div style="mso-element:footnote-list"><div style="mso-element:footnote" id="ftn1">
744
+ <p class="MsoFootnoteText"><a style="mso-footnote-id:ftn1" href="#_ftn1" name="_ftnref1" title="" id="_ftnref1"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Footnote</p></div>
745
+ <div style="mso-element:footnote" id="ftn2">
746
+ <p class="MsoFootnoteText"><a style="mso-footnote-id:ftn2" href="#_ftn2" name="_ftnref2" title="" id="_ftnref2"><span class="MsoFootnoteReference"><span style="mso-special-character:footnote"></span></span></a>Other Footnote</p></div>
747
+ </div>')}
748
+ #{WORD_FTR1}
710
749
  OUTPUT
711
750
  end
712
751
 
@@ -715,13 +754,14 @@ RSpec.describe Html2Doc do
715
754
  <div><ul id="0">
716
755
  <li><div><p><ol id="1"><li><ul id="2"><li><p><ol id="3"><li><ol id="4"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li><div><ul id="5"><li>C</li></ul></div>
717
756
  BODY
718
- Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2" })
757
+ Html2Doc.process(html_input(simple_body),
758
+ filename: "test", liststyles: { ul: "l1", ol: "l2" })
719
759
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
720
760
  .to match_fuzzy(<<~OUTPUT)
721
761
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
722
762
  #{word_body('<div>
723
- <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p><div><p style="mso-list:l1 level1 lfo2;" class="MsoListParagraphCxSpFirst">C</p></div>
724
- </div>',
763
+ <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p><div><p style="mso-list:l1 level1 lfo2;" class="MsoListParagraphCxSpFirst">C</p></div>
764
+ </div>',
725
765
  '<div style="mso-element:footnote-list"/>')}
726
766
  #{WORD_FTR1}
727
767
  OUTPUT
@@ -733,13 +773,14 @@ RSpec.describe Html2Doc do
733
773
  <ol id="1"><li><div><p><ol id="2"><li><ul id="3"><li><p><ol id="4"><li><ol id="5"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol>
734
774
  <ol id="6"><li><div><p><ol id="7"><li><ul id="8"><li><p><ol id="9"><li><ol id="10"><li>A</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ol></div>
735
775
  BODY
736
- Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2" })
776
+ Html2Doc.process(html_input(simple_body),
777
+ filename: "test", liststyles: { ul: "l1", ol: "l2" })
737
778
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
738
779
  .to match_fuzzy(<<~OUTPUT)
739
780
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
740
781
  #{word_body('<div>
741
- <p style="mso-list:l2 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p></p></p></p></div></p>
742
- <p style="mso-list:l2 level1 lfo2;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo2;" class="MsoListParagraphCxSpFirst">A</p></p></p></p></div></p></div>',
782
+ <p style="mso-list:l2 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p></p></p></p></div></p>
783
+ <p style="mso-list:l2 level1 lfo2;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo2;" class="MsoListParagraphCxSpFirst">A</p></p></p></p></div></p></div>',
743
784
  '<div style="mso-element:footnote-list"/>')}
744
785
  #{WORD_FTR1}
745
786
  OUTPUT
@@ -754,16 +795,18 @@ RSpec.describe Html2Doc do
754
795
  <div><ul class="other" id="10">
755
796
  <li><div><p><ol id="11"><li><ul id="12"><li><p><ol id="13"><li><ol id="14"><li>A</li><li><p>B</p><p>B2</p></li><li>C</li></ol></li></ol></p></li></ul></li></ol></p></div></li></ul></div>
756
797
  BODY
757
- Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2", steps: "l3" })
798
+ Html2Doc.process(html_input(simple_body),
799
+ filename: "test",
800
+ liststyles: { ul: "l1", ol: "l2", steps: "l3" })
758
801
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
759
802
  .to match_fuzzy(<<~OUTPUT)
760
803
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
761
804
  #{word_body('<div>
762
- <p style="mso-list:l3 level1 lfo2;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l3 level2 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l3 level4 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>
763
- <div>
764
- <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>
765
- <div>
766
- <p style="mso-list:l1 level1 lfo3;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo3;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo3;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>',
805
+ <p style="mso-list:l3 level1 lfo2;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l3 level2 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l3 level4 lfo2;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l3 level5 lfo2;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>
806
+ <div>
807
+ <p style="mso-list:l1 level1 lfo1;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo1;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo1;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>
808
+ <div>
809
+ <p style="mso-list:l1 level1 lfo3;" class="MsoListParagraphCxSpFirst"><div><p class="MsoNormal"><p style="mso-list:l2 level2 lfo3;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level4 lfo3;" class="MsoListParagraphCxSpFirst"><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpFirst">A</p><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpMiddle">B<p class="MsoListParagraphCxSpMiddle">B2</p></p><p style="mso-list:l2 level5 lfo3;" class="MsoListParagraphCxSpLast">C</p></p></p></p></div></p></div>',
767
810
  '<div style="mso-element:footnote-list"/>')}
768
811
  #{WORD_FTR1}
769
812
  OUTPUT
@@ -776,14 +819,15 @@ RSpec.describe Html2Doc do
776
819
  <p id="b"/>
777
820
  </div>
778
821
  BODY
779
- Html2Doc.process(html_input(simple_body), filename: "test", liststyles: { ul: "l1", ol: "l2" })
822
+ Html2Doc.process(html_input(simple_body),
823
+ filename: "test", liststyles: { ul: "l1", ol: "l2" })
780
824
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
781
825
  .to match_fuzzy(<<~OUTPUT)
782
826
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
783
827
  #{word_body('<div>
784
- <p class="MsoNormal"><a name="a" id="a"></a>Hello</p>
785
- <p class="MsoNormal"><a name="b" id="b"></a></p>
786
- </div>',
828
+ <p class="MsoNormal"><a name="a" id="a"></a>Hello</p>
829
+ <p class="MsoNormal"><a name="b" id="b"></a></p>
830
+ </div>',
787
831
  '<div style="mso-element:footnote-list"/>')}
788
832
  #{WORD_FTR1}
789
833
  OUTPUT
@@ -791,12 +835,14 @@ RSpec.describe Html2Doc do
791
835
 
792
836
  it "test image base64 image encoding" do
793
837
  simple_body = '<img src="19160-6.png">'
794
- Html2Doc.process(html_input(simple_body), filename: "spec/test", debug: true)
838
+ Html2Doc.process(html_input(simple_body),
839
+ filename: "spec/test", debug: true)
795
840
  testdoc = File.read("spec/test.doc", encoding: "utf-8")
796
841
  base64_image = testdoc[/image\/png\n\n(.*?)\n\n----/m, 1].gsub!("\n", "")
797
842
  base64_image_basename = testdoc[%r{Content-ID: <([0-9a-z\-]+)\.png}m, 1]
798
843
  doc_bin_image = Base64.strict_decode64(base64_image)
799
- file_bin_image = IO.read("spec/test_files/#{base64_image_basename}.png", mode: "rb")
844
+ file_bin_image = IO
845
+ .read("spec/test_files/#{base64_image_basename}.png", mode: "rb")
800
846
  expect(doc_bin_image).to eq file_bin_image
801
847
  FileUtils.rm_rf %w[spec/test_files spec/test.doc spec/test.htm]
802
848
  end
metadata CHANGED
@@ -1,15 +1,29 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.1.1
4
+ version: 1.1.2
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-05-10 00:00:00.000000000 Z
11
+ date: 2021-05-24 00:00:00.000000000 Z
12
12
  dependencies:
13
+ - !ruby/object:Gem::Dependency
14
+ name: asciimath
15
+ requirement: !ruby/object:Gem::Requirement
16
+ requirements:
17
+ - - "~>"
18
+ - !ruby/object:Gem::Version
19
+ version: 2.0.2
20
+ type: :runtime
21
+ prerelease: false
22
+ version_requirements: !ruby/object:Gem::Requirement
23
+ requirements:
24
+ - - "~>"
25
+ - !ruby/object:Gem::Version
26
+ version: 2.0.2
13
27
  - !ruby/object:Gem::Dependency
14
28
  name: htmlentities
15
29
  requirement: !ruby/object:Gem::Requirement
@@ -58,30 +72,30 @@ dependencies:
58
72
  requirements:
59
73
  - - "~>"
60
74
  - !ruby/object:Gem::Version
61
- version: 1.10.4
75
+ version: '1.10'
62
76
  type: :runtime
63
77
  prerelease: false
64
78
  version_requirements: !ruby/object:Gem::Requirement
65
79
  requirements:
66
80
  - - "~>"
67
81
  - !ruby/object:Gem::Version
68
- version: 1.10.4
82
+ version: '1.10'
69
83
  - !ruby/object:Gem::Dependency
70
- name: thread_safe
84
+ name: plane1converter
71
85
  requirement: !ruby/object:Gem::Requirement
72
86
  requirements:
73
- - - ">="
87
+ - - "~>"
74
88
  - !ruby/object:Gem::Version
75
- version: '0'
89
+ version: 0.0.1
76
90
  type: :runtime
77
91
  prerelease: false
78
92
  version_requirements: !ruby/object:Gem::Requirement
79
93
  requirements:
80
- - - ">="
94
+ - - "~>"
81
95
  - !ruby/object:Gem::Version
82
- version: '0'
96
+ version: 0.0.1
83
97
  - !ruby/object:Gem::Dependency
84
- name: uuidtools
98
+ name: thread_safe
85
99
  requirement: !ruby/object:Gem::Requirement
86
100
  requirements:
87
101
  - - ">="
@@ -95,33 +109,19 @@ dependencies:
95
109
  - !ruby/object:Gem::Version
96
110
  version: '0'
97
111
  - !ruby/object:Gem::Dependency
98
- name: asciimath
99
- requirement: !ruby/object:Gem::Requirement
100
- requirements:
101
- - - "~>"
102
- - !ruby/object:Gem::Version
103
- version: 2.0.2
104
- type: :runtime
105
- prerelease: false
106
- version_requirements: !ruby/object:Gem::Requirement
107
- requirements:
108
- - - "~>"
109
- - !ruby/object:Gem::Version
110
- version: 2.0.2
111
- - !ruby/object:Gem::Dependency
112
- name: plane1converter
112
+ name: uuidtools
113
113
  requirement: !ruby/object:Gem::Requirement
114
114
  requirements:
115
- - - "~>"
115
+ - - ">="
116
116
  - !ruby/object:Gem::Version
117
- version: 0.0.1
117
+ version: '0'
118
118
  type: :runtime
119
119
  prerelease: false
120
120
  version_requirements: !ruby/object:Gem::Requirement
121
121
  requirements:
122
- - - "~>"
122
+ - - ">="
123
123
  - !ruby/object:Gem::Version
124
- version: 0.0.1
124
+ version: '0'
125
125
  - !ruby/object:Gem::Dependency
126
126
  name: byebug
127
127
  requirement: !ruby/object:Gem::Requirement
@@ -207,61 +207,61 @@ dependencies:
207
207
  - !ruby/object:Gem::Version
208
208
  version: '3.6'
209
209
  - !ruby/object:Gem::Dependency
210
- name: rubocop
210
+ name: rspec-match_fuzzy
211
211
  requirement: !ruby/object:Gem::Requirement
212
212
  requirements:
213
213
  - - "~>"
214
214
  - !ruby/object:Gem::Version
215
- version: 1.5.2
215
+ version: 0.1.3
216
216
  type: :development
217
217
  prerelease: false
218
218
  version_requirements: !ruby/object:Gem::Requirement
219
219
  requirements:
220
220
  - - "~>"
221
221
  - !ruby/object:Gem::Version
222
- version: 1.5.2
222
+ version: 0.1.3
223
223
  - !ruby/object:Gem::Dependency
224
- name: simplecov
224
+ name: rubocop
225
225
  requirement: !ruby/object:Gem::Requirement
226
226
  requirements:
227
227
  - - "~>"
228
228
  - !ruby/object:Gem::Version
229
- version: '0.15'
229
+ version: 1.5.2
230
230
  type: :development
231
231
  prerelease: false
232
232
  version_requirements: !ruby/object:Gem::Requirement
233
233
  requirements:
234
234
  - - "~>"
235
235
  - !ruby/object:Gem::Version
236
- version: '0.15'
236
+ version: 1.5.2
237
237
  - !ruby/object:Gem::Dependency
238
- name: timecop
238
+ name: simplecov
239
239
  requirement: !ruby/object:Gem::Requirement
240
240
  requirements:
241
241
  - - "~>"
242
242
  - !ruby/object:Gem::Version
243
- version: '0.9'
243
+ version: '0.15'
244
244
  type: :development
245
245
  prerelease: false
246
246
  version_requirements: !ruby/object:Gem::Requirement
247
247
  requirements:
248
248
  - - "~>"
249
249
  - !ruby/object:Gem::Version
250
- version: '0.9'
250
+ version: '0.15'
251
251
  - !ruby/object:Gem::Dependency
252
- name: rspec-match_fuzzy
252
+ name: timecop
253
253
  requirement: !ruby/object:Gem::Requirement
254
254
  requirements:
255
255
  - - "~>"
256
256
  - !ruby/object:Gem::Version
257
- version: 0.1.3
257
+ version: '0.9'
258
258
  type: :development
259
259
  prerelease: false
260
260
  version_requirements: !ruby/object:Gem::Requirement
261
261
  requirements:
262
262
  - - "~>"
263
263
  - !ruby/object:Gem::Version
264
- version: 0.1.3
264
+ version: '0.9'
265
265
  description: |
266
266
  Convert HTML document to Microsoft Word document.
267
267