html2doc 1.0.7 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/html2doc/math.rb CHANGED
@@ -9,23 +9,34 @@ module Html2Doc
9
9
  Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
10
10
  encoding: "utf-8"))
11
11
 
12
- def self.asciimath_to_mathml1(x)
13
- AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
14
- AsciiMath.parse(HTMLEntities.new.decode(x)).ast).to_s.
15
- gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
12
+ def self.asciimath_to_mathml1(expr)
13
+ AsciiMath::MathMLBuilder.new(msword: true).append_expression(
14
+ AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
15
+ ).to_s
16
+ .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
+ rescue StandardError => e
18
+ puts "parsing: #{expr}"
19
+ puts e.message
20
+ raise e
16
21
  end
17
22
 
18
23
  def self.asciimath_to_mathml(doc, delims)
19
24
  return doc if delims.nil? || delims.size < 2
25
+
20
26
  m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
21
27
  m.each_slice(4).map.with_index do |(*a), i|
22
- i % 500 == 0 && m.size > 1000 && i > 0 and
23
- warn "MathML #{i} of #{(m.size / 4).floor}"
28
+ progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
24
29
  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
25
30
  a.size > 1 ? a[0] + a[2] : a[0]
26
31
  end.join
27
32
  end
28
33
 
34
+ def self.progress_conv(idx, step, total, threshold, msg)
35
+ return unless (idx % step).zero? && total > threshold && idx.positive?
36
+
37
+ warn "#{msg} #{idx} of #{total}"
38
+ end
39
+
29
40
  def self.unwrap_accents(doc)
30
41
  doc.xpath("//*[@accent = 'true']").each do |x|
31
42
  x.elements.length > 1 or next
@@ -36,106 +47,124 @@ module Html2Doc
36
47
  end
37
48
 
38
49
  # random fixes to MathML input that OOXML needs to render properly
39
- def self.ooxml_cleanup(m, docnamespaces)
40
- m = unwrap_accents(mathml_preserve_space(
41
- mathml_insert_rows(m, docnamespaces), docnamespaces))
42
- m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
43
- m
50
+ def self.ooxml_cleanup(math, docnamespaces)
51
+ math = unwrap_accents(
52
+ mathml_preserve_space(
53
+ mathml_insert_rows(math, docnamespaces), docnamespaces
54
+ ),
55
+ )
56
+ math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
57
+ math
44
58
  end
45
59
 
46
- def self.mathml_insert_rows(m, docnamespaces)
47
- m.xpath(%w(msup msub msubsup munder mover munderover).
48
- map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
60
+ def self.mathml_insert_rows(math, docnamespaces)
61
+ math.xpath(%w(msup msub msubsup munder mover munderover)
62
+ .map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
49
63
  next unless x.next_element && x.next_element != "mrow"
64
+
50
65
  x.next_element.wrap("<mrow/>")
51
66
  end
52
- m
67
+ math
53
68
  end
54
69
 
55
- def self.mathml_preserve_space(m, docnamespaces)
56
- m.xpath(".//xmlns:mtext", docnamespaces).each do |x|
70
+ def self.mathml_preserve_space(math, docnamespaces)
71
+ math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
57
72
  x.children = x.children.to_xml.gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
58
73
  end
59
- m
74
+ math
60
75
  end
61
76
 
62
- def self.unitalic(m)
63
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
64
- x.wrap("<span style='font-style:normal;'></span>")
77
+ HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
78
+
79
+ def self.unitalic(math)
80
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
81
+ x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
65
82
  end
66
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
67
- x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
83
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
84
+ x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
68
85
  end
69
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
70
- x.wrap("<span class='nostem'><em></em></span>")
86
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
87
+ x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
71
88
  end
72
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
73
- x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
89
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
90
+ x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
74
91
  end
75
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
76
- toPlane1(x, :monospace)
92
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
93
+ to_plane1(x, :monospace)
77
94
  end
78
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
79
- toPlane1(x, :doublestruck)
95
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
96
+ to_plane1(x, :doublestruck)
80
97
  end
81
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
82
- toPlane1(x, :script)
98
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
99
+ to_plane1(x, :script)
83
100
  end
84
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
85
- toPlane1(x, :scriptbold)
101
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
102
+ to_plane1(x, :scriptbold)
86
103
  end
87
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
88
- toPlane1(x, :fraktur)
104
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
105
+ to_plane1(x, :fraktur)
89
106
  end
90
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
91
- toPlane1(x, :frakturbold)
107
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
108
+ to_plane1(x, :frakturbold)
92
109
  end
93
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
94
- toPlane1(x, :sans)
110
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
111
+ to_plane1(x, :sans)
95
112
  end
96
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
97
- toPlane1(x, :sansbold)
113
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
114
+ to_plane1(x, :sansbold)
98
115
  end
99
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
- toPlane1(x, :sansitalic)
116
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
117
+ to_plane1(x, :sansitalic)
101
118
  end
102
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
- toPlane1(x, :sansbolditalic)
119
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
120
+ to_plane1(x, :sansbolditalic)
104
121
  end
105
- m
122
+ math
106
123
  end
107
124
 
108
- def self.toPlane1(x, font)
109
- x.traverse do |n|
125
+ def self.to_plane1(xml, font)
126
+ xml.traverse do |n|
110
127
  next unless n.text?
128
+
111
129
  n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
112
130
  end
113
- x
131
+ xml
114
132
  end
115
133
 
116
134
  def self.mathml_to_ooml(docxml)
117
135
  docnamespaces = docxml.collect_namespaces
118
136
  m = docxml.xpath("//*[local-name() = 'math']")
119
137
  m.each_with_index do |x, i|
120
- i % 100 == 0 && m.size > 500 && i > 0 and
121
- warn "Math OOXML #{i} of #{m.size}"
122
- element = ooxml_cleanup(x, docnamespaces)
123
- doc = Nokogiri::XML::Document::new()
124
- doc.root = element
125
- ooxml = (unitalic(esc_space(@xsltemplate.transform(doc)))).to_s.
126
- gsub(/<\?[^>]+>\s*/, "").
127
- gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
128
- gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
129
- ooxml = uncenter(x, ooxml)
130
- x.swap(ooxml)
138
+ progress_conv(i, 100, m.size, 500, "Math OOXML")
139
+ mathml_to_ooml1(x, docnamespaces)
131
140
  end
132
141
  end
133
142
 
134
- # escape space as &#x32;; we are removing any spaces generated by
143
+ # We need span and em not to be namespaced. Word can't deal with explicit
144
+ # namespaces.
145
+ # We will end up stripping them out again under Nokogiri 1.11, which correctly
146
+ # insists on inheriting namespace from parent.
147
+ def self.ooml_clean(xml)
148
+ xml.to_s
149
+ .gsub(/<\?[^>]+>\s*/, "")
150
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
151
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
152
+ end
153
+
154
+ def self.mathml_to_ooml1(xml, docnamespaces)
155
+ doc = Nokogiri::XML::Document::new
156
+ doc.root = ooxml_cleanup(xml, docnamespaces)
157
+ ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
158
+ ooxml = uncenter(xml, ooxml)
159
+ xml.swap(ooxml)
160
+ end
161
+
162
+ # escape space as &#x32;; we are removing any spaces generated by
135
163
  # XML indentation
136
164
  def self.esc_space(xml)
137
165
  xml.traverse do |n|
138
166
  next unless n.text?
167
+
139
168
  n = n.text.gsub(/ /, "&#x32;")
140
169
  end
141
170
  xml
@@ -143,17 +172,15 @@ module Html2Doc
143
172
 
144
173
  # if oomml has no siblings, by default it is centered; override this with
145
174
  # left/right if parent is so tagged
146
- def self.uncenter(m, ooxml)
147
- if m.next == nil && m.previous == nil
148
- alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
149
- "local-name() = 'div' or local-name() = 'td']/@style")
150
- return ooxml unless alignnode
151
- if alignnode.text.include? ("text-align:left")
152
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
153
- "m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
154
- elsif alignnode.text.include? ("text-align:right")
175
+ def self.uncenter(math, ooxml)
176
+ alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
177
+ "local-name() = 'div' or local-name() = 'td']/@style")
178
+ return ooxml unless alignnode && (math.next == nil && math.previous == nil)
179
+
180
+ %w(left right).each do |dir|
181
+ if alignnode.text.include? ("text-align:#{dir}")
155
182
  ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
156
- "m:val='right'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
183
+ "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
157
184
  end
158
185
  end
159
186
  ooxml
data/lib/html2doc/mime.rb CHANGED
@@ -7,19 +7,20 @@ require "fileutils"
7
7
  module Html2Doc
8
8
  def self.mime_preamble(boundary, filename, result)
9
9
  <<~"PREAMBLE"
10
- MIME-Version: 1.0
11
- Content-Type: multipart/related; boundary="#{boundary}"
10
+ MIME-Version: 1.0
11
+ Content-Type: multipart/related; boundary="#{boundary}"
12
12
 
13
- --#{boundary}
14
- Content-Location: file:///C:/Doc/#{File.basename(filename)}.htm
15
- Content-Type: text/html; charset="utf-8"
13
+ --#{boundary}
14
+ Content-ID: <#{File.basename(filename)}>
15
+ Content-Disposition: inline; filename="#{File.basename(filename)}"
16
+ Content-Type: text/html; charset="utf-8"
16
17
 
17
- #{result}
18
+ #{result}
18
19
 
19
20
  PREAMBLE
20
21
  end
21
22
 
22
- def self.mime_attachment(boundary, filename, item, dir)
23
+ def self.mime_attachment(boundary, _filename, item, dir)
23
24
  content_type = mime_type(item)
24
25
  text_mode = %w[text application].any? { |p| content_type.start_with? p }
25
26
 
@@ -28,12 +29,13 @@ module Html2Doc
28
29
 
29
30
  encoded_file = Base64.strict_encode64(content).gsub(/(.{76})/, "\\1\n")
30
31
  <<~"FILE"
31
- --#{boundary}
32
- Content-Location: file:///C:/Doc/#{File.basename(filename)}_files/#{item}
33
- Content-Transfer-Encoding: base64
34
- Content-Type: #{content_type}
32
+ --#{boundary}
33
+ Content-ID: <#{File.basename(item)}>
34
+ Content-Disposition: inline; filename="#{File.basename(item)}"
35
+ Content-Transfer-Encoding: base64
36
+ Content-Type: #{content_type}
35
37
 
36
- #{encoded_file}
38
+ #{encoded_file}
37
39
 
38
40
  FILE
39
41
  end
@@ -41,7 +43,7 @@ module Html2Doc
41
43
  def self.mime_type(item)
42
44
  types = MIME::Types.type_for(item)
43
45
  type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
44
- type = type + ' charset="utf-8"' if /^text/.match(type) && types
46
+ type = %(#{type} charset="utf-8") if /^text/.match(type) && types
45
47
  type
46
48
  end
47
49
 
@@ -52,25 +54,37 @@ module Html2Doc
52
54
 
53
55
  def self.mime_package(result, filename, dir)
54
56
  boundary = mime_boundary
55
- mhtml = mime_preamble(boundary, filename, result)
56
- mhtml += mime_attachment(boundary, filename, "filelist.xml", dir)
57
+ mhtml = mime_preamble(boundary, "#{filename}.htm", result)
58
+ mhtml += mime_attachment(boundary, "#{filename}.htm", "filelist.xml", dir)
57
59
  Dir.foreach(dir) do |item|
58
60
  next if item == "." || item == ".." || /^\./.match(item) ||
59
61
  item == "filelist.xml"
60
- mhtml += mime_attachment(boundary, filename, item, dir)
62
+
63
+ mhtml += mime_attachment(boundary, "#{filename}.htm", item, dir)
61
64
  end
62
65
  mhtml += "--#{boundary}--"
63
- File.open("#{filename}.doc", "w:UTF-8") { |f| f.write mhtml }
66
+ File.open("#{filename}.doc", "w:UTF-8") { |f| f.write contentid(mhtml) }
67
+ end
68
+
69
+ def self.contentid(mhtml)
70
+ mhtml.gsub %r{(<img[^>]*?src=")([^\"']+)(['"])}m do |m|
71
+ repl = "#{$1}cid:#{File.basename($2)}#{$3}"
72
+ /^data:|^https?:/.match($2) ? m : repl
73
+ end.gsub %r{(<v:imagedata[^>]*?src=")([^\"']+)(['"])}m do |m|
74
+ repl = "#{$1}cid:#{File.basename($2)}#{$3}"
75
+ /^data:|^https?:/.match($2) ? m : repl
76
+ end
64
77
  end
65
78
 
66
79
  # max width for Word document is 400, max height is 680
67
- def self.image_resize(i, path, maxheight, maxwidth)
68
- realSize = ImageSize.path(path).size
69
- s = [i["width"].to_i, i["height"].to_i]
70
- s = realSize if s[0].zero? && s[1].zero?
71
- return [nil, nil] if realSize.nil? || realSize[0].nil? || realSize[1].nil?
72
- s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
73
- s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
80
+ def self.image_resize(img, path, maxheight, maxwidth)
81
+ realsize = ImageSize.path(path).size
82
+ s = [img["width"].to_i, img["height"].to_i]
83
+ s = realsize if s[0].zero? && s[1].zero?
84
+ return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
85
+
86
+ s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
87
+ s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
74
88
  s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
75
89
  s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
76
90
  s
@@ -83,19 +97,22 @@ module Html2Doc
83
97
  end
84
98
 
85
99
  def self.warnsvg(src)
86
- warn "#{src}: SVG not supported" if /\.svg$/i.match(src)
100
+ warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
101
+ end
102
+
103
+ def self.localname(src, localdir)
104
+ %r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
87
105
  end
88
106
 
89
107
  # only processes locally stored images
90
108
  def self.image_cleanup(docxml, dir, localdir)
91
109
  docxml.traverse do |i|
92
110
  next unless i.element? && %w(img v:imagedata).include?(i.name)
93
- #warnsvg(i["src"])
94
- next if /^http/.match i["src"]
95
- next if %r{^data:(image|application)/[^;]+;base64}.match i["src"]
96
- local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
97
- File.join(localdir, i["src"])
98
- new_filename = "#{mkuuid}#{File.extname(i["src"])}"
111
+ next if /^http/.match? i["src"]
112
+ next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
113
+
114
+ local_filename = localname(i["src"], localdir)
115
+ new_filename = "#{mkuuid}#{File.extname(i['src'])}"
99
116
  FileUtils.cp local_filename, File.join(dir, new_filename)
100
117
  i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
101
118
  i["src"] = File.join(File.basename(dir), new_filename)
@@ -103,7 +120,7 @@ module Html2Doc
103
120
  docxml
104
121
  end
105
122
 
106
- # do not parse the header through Nokogiri, since it will contain
123
+ # do not parse the header through Nokogiri, since it will contain
107
124
  # non-XML like <![if !supportFootnotes]>
108
125
  def self.header_image_cleanup(doc, dir, filename, localdir)
109
126
  doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
@@ -111,16 +128,14 @@ module Html2Doc
111
128
  end.join
112
129
  end
113
130
 
114
- def self.header_image_cleanup1(a, dir, filename, localdir)
131
+ def self.header_image_cleanup1(a, dir, _filename, localdir)
115
132
  if a.size == 2 && !(/ src="https?:/.match a[1]) &&
116
133
  !(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
117
134
  m = / src=['"](?<src>[^"']+)['"]/.match a[1]
118
- #warnsvg(m[:src])
119
135
  m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
120
136
  new_filename = "#{mkuuid}.#{m2[:suffix]}"
121
- old_filename = %r{^([A-Z]:)?/}.match(m[:src]) ? m[:src] : File.join(localdir, m[:src])
122
- FileUtils.cp old_filename, File.join(dir, new_filename)
123
- a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='file:///C:/Doc/#{filename}_files/#{new_filename}'")
137
+ FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
138
+ a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
124
139
  end
125
140
  a.join
126
141
  end
@@ -131,6 +146,7 @@ module Html2Doc
131
146
  <o:MainFile HRef="../#{filename}.htm"/>}
132
147
  Dir.entries(dir).sort.each do |item|
133
148
  next if item == "." || item == ".." || /^\./.match(item)
149
+
134
150
  f.write %{ <o:File HRef="#{item}"/>\n}
135
151
  end
136
152
  f.write("</xml>\n")
@@ -6,6 +6,7 @@ module Html2Doc
6
6
  fn = []
7
7
  docxml.xpath("//a").each do |a|
8
8
  next unless process_footnote_link(docxml, a, i, fn)
9
+
9
10
  i += 1
10
11
  end
11
12
  process_footnote_texts(docxml, fn)
@@ -22,13 +23,13 @@ module Html2Doc
22
23
  footnote_cleanup(docxml)
23
24
  end
24
25
 
25
- def self.footnote_div_to_p(f)
26
- if %w{div aside}.include? f.name
27
- if f.at(".//p")
28
- f.replace(f.children)
26
+ def self.footnote_div_to_p(elem)
27
+ if %w{div aside}.include? elem.name
28
+ if elem.at(".//p")
29
+ elem.replace(elem.children)
29
30
  else
30
- f.name = "p"
31
- f["class"] = "MsoFootnoteText"
31
+ elem.name = "p"
32
+ elem["class"] = "MsoFootnoteText"
32
33
  end
33
34
  end
34
35
  end
@@ -36,34 +37,39 @@ module Html2Doc
36
37
  FN = "<span class='MsoFootnoteReference'>"\
37
38
  "<span style='mso-special-character:footnote'/></span>".freeze
38
39
 
39
- def self.footnote_container(docxml, i)
40
- ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
41
- gsub(/>\n</, "><") || FN
40
+ def self.footnote_container(docxml, idx)
41
+ ref = docxml&.at("//a[@href='#_ftn#{idx}']")&.children&.to_xml(indent: 0)
42
+ &.gsub(/>\n</, "><") || FN
42
43
  <<~DIV
43
- <div style='mso-element:footnote' id='ftn#{i}'>
44
- <a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
45
- name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
44
+ <div style='mso-element:footnote' id='ftn#{idx}'>
45
+ <a style='mso-footnote-id:ftn#{idx}' href='#_ftn#{idx}'
46
+ name='_ftnref#{idx}' title='' id='_ftnref#{idx}'>#{ref.strip}</a></div>
46
47
  DIV
47
48
  end
48
49
 
49
- def self.process_footnote_link(docxml, a, i, fn)
50
- return false unless footnote?(a)
51
- href = a["href"].gsub(/^#/, "")
50
+ def self.process_footnote_link(docxml, elem, idx, footnote)
51
+ return false unless footnote?(elem)
52
+
53
+ href = elem["href"].gsub(/^#/, "")
52
54
  note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
53
55
  return false if note.nil?
54
- set_footnote_link_attrs(a, i)
55
- if a.at("./span[@class = 'MsoFootnoteReference']")
56
- a.children.each do |c|
57
- if c.name == "span" and c["class"] == "MsoFootnoteReference"
58
- c.replace(FN)
59
- else
60
- c.wrap("<span class='MsoFootnoteReference'></span>")
61
- end
56
+
57
+ set_footnote_link_attrs(elem, idx)
58
+ if elem.at("./span[@class = 'MsoFootnoteReference']")
59
+ process_footnote_link1(elem)
60
+ else elem.children = FN
61
+ end
62
+ footnote << transform_footnote_text(note)
63
+ end
64
+
65
+ def self.process_footnote_link1(elem)
66
+ elem.children.each do |c|
67
+ if c.name == "span" && c["class"] == "MsoFootnoteReference"
68
+ c.replace(FN)
69
+ else
70
+ c.wrap("<span class='MsoFootnoteReference'></span>")
62
71
  end
63
- else
64
- a.children = FN
65
72
  end
66
- fn << transform_footnote_text(note)
67
73
  end
68
74
 
69
75
  def self.transform_footnote_text(note)
@@ -76,16 +82,16 @@ module Html2Doc
76
82
  note.remove
77
83
  end
78
84
 
79
- def self.footnote?(a)
80
- a["epub:type"]&.casecmp("footnote")&.zero? ||
81
- a["class"]&.casecmp("footnote")&.zero?
85
+ def self.footnote?(elem)
86
+ elem["epub:type"]&.casecmp("footnote")&.zero? ||
87
+ elem["class"]&.casecmp("footnote")&.zero?
82
88
  end
83
89
 
84
- def self.set_footnote_link_attrs(a, i)
85
- a["style"] = "mso-footnote-id:ftn#{i}"
86
- a["href"] = "#_ftn#{i}"
87
- a["name"] = "_ftnref#{i}"
88
- a["title"] = ""
90
+ def self.set_footnote_link_attrs(elem, idx)
91
+ elem["style"] = "mso-footnote-id:ftn#{idx}"
92
+ elem["href"] = "#_ftn#{idx}"
93
+ elem["name"] = "_ftnref#{idx}"
94
+ elem["title"] = ""
89
95
  end
90
96
 
91
97
  # We expect that the content of the footnote text received is one or
@@ -94,8 +100,8 @@ module Html2Doc
94
100
  # are present in the HTML, they need to have been cleaned out before
95
101
  # passing to this gem
96
102
  def self.footnote_cleanup(docxml)
97
- docxml.xpath('//div[@style="mso-element:footnote"]/a').
98
- each do |x|
103
+ docxml.xpath('//div[@style="mso-element:footnote"]/a')
104
+ .each do |x|
99
105
  n = x.next_element
100
106
  n&.children&.first&.add_previous_sibling(x.remove)
101
107
  end