html2doc 1.0.7 → 1.1.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/html2doc/math.rb CHANGED
@@ -9,23 +9,34 @@ module Html2Doc
9
9
  Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
10
10
  encoding: "utf-8"))
11
11
 
12
- def self.asciimath_to_mathml1(x)
13
- AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
14
- AsciiMath.parse(HTMLEntities.new.decode(x)).ast).to_s.
15
- gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
12
+ def self.asciimath_to_mathml1(expr)
13
+ AsciiMath::MathMLBuilder.new(msword: true).append_expression(
14
+ AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
15
+ ).to_s
16
+ .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
+ rescue StandardError => e
18
+ puts "parsing: #{expr}"
19
+ puts e.message
20
+ raise e
16
21
  end
17
22
 
18
23
  def self.asciimath_to_mathml(doc, delims)
19
24
  return doc if delims.nil? || delims.size < 2
25
+
20
26
  m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
21
27
  m.each_slice(4).map.with_index do |(*a), i|
22
- i % 500 == 0 && m.size > 1000 && i > 0 and
23
- warn "MathML #{i} of #{(m.size / 4).floor}"
28
+ progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
24
29
  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
25
30
  a.size > 1 ? a[0] + a[2] : a[0]
26
31
  end.join
27
32
  end
28
33
 
34
+ def self.progress_conv(idx, step, total, threshold, msg)
35
+ return unless (idx % step).zero? && total > threshold && idx.positive?
36
+
37
+ warn "#{msg} #{idx} of #{total}"
38
+ end
39
+
29
40
  def self.unwrap_accents(doc)
30
41
  doc.xpath("//*[@accent = 'true']").each do |x|
31
42
  x.elements.length > 1 or next
@@ -36,106 +47,124 @@ module Html2Doc
36
47
  end
37
48
 
38
49
  # random fixes to MathML input that OOXML needs to render properly
39
- def self.ooxml_cleanup(m, docnamespaces)
40
- m = unwrap_accents(mathml_preserve_space(
41
- mathml_insert_rows(m, docnamespaces), docnamespaces))
42
- m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
43
- m
50
+ def self.ooxml_cleanup(math, docnamespaces)
51
+ math = unwrap_accents(
52
+ mathml_preserve_space(
53
+ mathml_insert_rows(math, docnamespaces), docnamespaces
54
+ ),
55
+ )
56
+ math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
57
+ math
44
58
  end
45
59
 
46
- def self.mathml_insert_rows(m, docnamespaces)
47
- m.xpath(%w(msup msub msubsup munder mover munderover).
48
- map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
60
+ def self.mathml_insert_rows(math, docnamespaces)
61
+ math.xpath(%w(msup msub msubsup munder mover munderover)
62
+ .map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
49
63
  next unless x.next_element && x.next_element != "mrow"
64
+
50
65
  x.next_element.wrap("<mrow/>")
51
66
  end
52
- m
67
+ math
53
68
  end
54
69
 
55
- def self.mathml_preserve_space(m, docnamespaces)
56
- m.xpath(".//xmlns:mtext", docnamespaces).each do |x|
70
+ def self.mathml_preserve_space(math, docnamespaces)
71
+ math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
57
72
  x.children = x.children.to_xml.gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
58
73
  end
59
- m
74
+ math
60
75
  end
61
76
 
62
- def self.unitalic(m)
63
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
64
- x.wrap("<span style='font-style:normal;'></span>")
77
+ HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
78
+
79
+ def self.unitalic(math)
80
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
81
+ x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
65
82
  end
66
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
67
- x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
83
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
84
+ x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
68
85
  end
69
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
70
- x.wrap("<span class='nostem'><em></em></span>")
86
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
87
+ x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
71
88
  end
72
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
73
- x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
89
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
90
+ x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
74
91
  end
75
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
76
- toPlane1(x, :monospace)
92
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
93
+ to_plane1(x, :monospace)
77
94
  end
78
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
79
- toPlane1(x, :doublestruck)
95
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
96
+ to_plane1(x, :doublestruck)
80
97
  end
81
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
82
- toPlane1(x, :script)
98
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
99
+ to_plane1(x, :script)
83
100
  end
84
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
85
- toPlane1(x, :scriptbold)
101
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
102
+ to_plane1(x, :scriptbold)
86
103
  end
87
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
88
- toPlane1(x, :fraktur)
104
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
105
+ to_plane1(x, :fraktur)
89
106
  end
90
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
91
- toPlane1(x, :frakturbold)
107
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
108
+ to_plane1(x, :frakturbold)
92
109
  end
93
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
94
- toPlane1(x, :sans)
110
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
111
+ to_plane1(x, :sans)
95
112
  end
96
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
97
- toPlane1(x, :sansbold)
113
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
114
+ to_plane1(x, :sansbold)
98
115
  end
99
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
- toPlane1(x, :sansitalic)
116
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
117
+ to_plane1(x, :sansitalic)
101
118
  end
102
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
- toPlane1(x, :sansbolditalic)
119
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
120
+ to_plane1(x, :sansbolditalic)
104
121
  end
105
- m
122
+ math
106
123
  end
107
124
 
108
- def self.toPlane1(x, font)
109
- x.traverse do |n|
125
+ def self.to_plane1(xml, font)
126
+ xml.traverse do |n|
110
127
  next unless n.text?
128
+
111
129
  n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
112
130
  end
113
- x
131
+ xml
114
132
  end
115
133
 
116
134
  def self.mathml_to_ooml(docxml)
117
135
  docnamespaces = docxml.collect_namespaces
118
136
  m = docxml.xpath("//*[local-name() = 'math']")
119
137
  m.each_with_index do |x, i|
120
- i % 100 == 0 && m.size > 500 && i > 0 and
121
- warn "Math OOXML #{i} of #{m.size}"
122
- element = ooxml_cleanup(x, docnamespaces)
123
- doc = Nokogiri::XML::Document::new()
124
- doc.root = element
125
- ooxml = (unitalic(esc_space(@xsltemplate.transform(doc)))).to_s.
126
- gsub(/<\?[^>]+>\s*/, "").
127
- gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
128
- gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
129
- ooxml = uncenter(x, ooxml)
130
- x.swap(ooxml)
138
+ progress_conv(i, 100, m.size, 500, "Math OOXML")
139
+ mathml_to_ooml1(x, docnamespaces)
131
140
  end
132
141
  end
133
142
 
134
- # escape space as &#x32;; we are removing any spaces generated by
143
+ # We need span and em not to be namespaced. Word can't deal with explicit
144
+ # namespaces.
145
+ # We will end up stripping them out again under Nokogiri 1.11, which correctly
146
+ # insists on inheriting namespace from parent.
147
+ def self.ooml_clean(xml)
148
+ xml.to_s
149
+ .gsub(/<\?[^>]+>\s*/, "")
150
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
151
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
152
+ end
153
+
154
+ def self.mathml_to_ooml1(xml, docnamespaces)
155
+ doc = Nokogiri::XML::Document::new
156
+ doc.root = ooxml_cleanup(xml, docnamespaces)
157
+ ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
158
+ ooxml = uncenter(xml, ooxml)
159
+ xml.swap(ooxml)
160
+ end
161
+
162
+ # escape space as &#x32;; we are removing any spaces generated by
135
163
  # XML indentation
136
164
  def self.esc_space(xml)
137
165
  xml.traverse do |n|
138
166
  next unless n.text?
167
+
139
168
  n = n.text.gsub(/ /, "&#x32;")
140
169
  end
141
170
  xml
@@ -143,17 +172,15 @@ module Html2Doc
143
172
 
144
173
  # if oomml has no siblings, by default it is centered; override this with
145
174
  # left/right if parent is so tagged
146
- def self.uncenter(m, ooxml)
147
- if m.next == nil && m.previous == nil
148
- alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
149
- "local-name() = 'div' or local-name() = 'td']/@style")
150
- return ooxml unless alignnode
151
- if alignnode.text.include? ("text-align:left")
152
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
153
- "m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
154
- elsif alignnode.text.include? ("text-align:right")
175
+ def self.uncenter(math, ooxml)
176
+ alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
177
+ "local-name() = 'div' or local-name() = 'td']/@style")
178
+ return ooxml unless alignnode && (math.next == nil && math.previous == nil)
179
+
180
+ %w(left right).each do |dir|
181
+ if alignnode.text.include? ("text-align:#{dir}")
155
182
  ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
156
- "m:val='right'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
183
+ "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
157
184
  end
158
185
  end
159
186
  ooxml
data/lib/html2doc/mime.rb CHANGED
@@ -7,19 +7,20 @@ require "fileutils"
7
7
  module Html2Doc
8
8
  def self.mime_preamble(boundary, filename, result)
9
9
  <<~"PREAMBLE"
10
- MIME-Version: 1.0
11
- Content-Type: multipart/related; boundary="#{boundary}"
10
+ MIME-Version: 1.0
11
+ Content-Type: multipart/related; boundary="#{boundary}"
12
12
 
13
- --#{boundary}
14
- Content-Location: file:///C:/Doc/#{File.basename(filename)}.htm
15
- Content-Type: text/html; charset="utf-8"
13
+ --#{boundary}
14
+ Content-ID: <#{File.basename(filename)}>
15
+ Content-Disposition: inline; filename="#{File.basename(filename)}"
16
+ Content-Type: text/html; charset="utf-8"
16
17
 
17
- #{result}
18
+ #{result}
18
19
 
19
20
  PREAMBLE
20
21
  end
21
22
 
22
- def self.mime_attachment(boundary, filename, item, dir)
23
+ def self.mime_attachment(boundary, _filename, item, dir)
23
24
  content_type = mime_type(item)
24
25
  text_mode = %w[text application].any? { |p| content_type.start_with? p }
25
26
 
@@ -28,12 +29,13 @@ module Html2Doc
28
29
 
29
30
  encoded_file = Base64.strict_encode64(content).gsub(/(.{76})/, "\\1\n")
30
31
  <<~"FILE"
31
- --#{boundary}
32
- Content-Location: file:///C:/Doc/#{File.basename(filename)}_files/#{item}
33
- Content-Transfer-Encoding: base64
34
- Content-Type: #{content_type}
32
+ --#{boundary}
33
+ Content-ID: <#{File.basename(item)}>
34
+ Content-Disposition: inline; filename="#{File.basename(item)}"
35
+ Content-Transfer-Encoding: base64
36
+ Content-Type: #{content_type}
35
37
 
36
- #{encoded_file}
38
+ #{encoded_file}
37
39
 
38
40
  FILE
39
41
  end
@@ -41,7 +43,7 @@ module Html2Doc
41
43
  def self.mime_type(item)
42
44
  types = MIME::Types.type_for(item)
43
45
  type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
44
- type = type + ' charset="utf-8"' if /^text/.match(type) && types
46
+ type = %(#{type} charset="utf-8") if /^text/.match(type) && types
45
47
  type
46
48
  end
47
49
 
@@ -52,25 +54,37 @@ module Html2Doc
52
54
 
53
55
  def self.mime_package(result, filename, dir)
54
56
  boundary = mime_boundary
55
- mhtml = mime_preamble(boundary, filename, result)
56
- mhtml += mime_attachment(boundary, filename, "filelist.xml", dir)
57
+ mhtml = mime_preamble(boundary, "#{filename}.htm", result)
58
+ mhtml += mime_attachment(boundary, "#{filename}.htm", "filelist.xml", dir)
57
59
  Dir.foreach(dir) do |item|
58
60
  next if item == "." || item == ".." || /^\./.match(item) ||
59
61
  item == "filelist.xml"
60
- mhtml += mime_attachment(boundary, filename, item, dir)
62
+
63
+ mhtml += mime_attachment(boundary, "#{filename}.htm", item, dir)
61
64
  end
62
65
  mhtml += "--#{boundary}--"
63
- File.open("#{filename}.doc", "w:UTF-8") { |f| f.write mhtml }
66
+ File.open("#{filename}.doc", "w:UTF-8") { |f| f.write contentid(mhtml) }
67
+ end
68
+
69
+ def self.contentid(mhtml)
70
+ mhtml.gsub %r{(<img[^>]*?src=")([^\"']+)(['"])}m do |m|
71
+ repl = "#{$1}cid:#{File.basename($2)}#{$3}"
72
+ /^data:|^https?:/.match($2) ? m : repl
73
+ end.gsub %r{(<v:imagedata[^>]*?src=")([^\"']+)(['"])}m do |m|
74
+ repl = "#{$1}cid:#{File.basename($2)}#{$3}"
75
+ /^data:|^https?:/.match($2) ? m : repl
76
+ end
64
77
  end
65
78
 
66
79
  # max width for Word document is 400, max height is 680
67
- def self.image_resize(i, path, maxheight, maxwidth)
68
- realSize = ImageSize.path(path).size
69
- s = [i["width"].to_i, i["height"].to_i]
70
- s = realSize if s[0].zero? && s[1].zero?
71
- return [nil, nil] if realSize.nil? || realSize[0].nil? || realSize[1].nil?
72
- s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
73
- s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
80
+ def self.image_resize(img, path, maxheight, maxwidth)
81
+ realsize = ImageSize.path(path).size
82
+ s = [img["width"].to_i, img["height"].to_i]
83
+ s = realsize if s[0].zero? && s[1].zero?
84
+ return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
85
+
86
+ s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
87
+ s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
74
88
  s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
75
89
  s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
76
90
  s
@@ -83,19 +97,22 @@ module Html2Doc
83
97
  end
84
98
 
85
99
  def self.warnsvg(src)
86
- warn "#{src}: SVG not supported" if /\.svg$/i.match(src)
100
+ warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
101
+ end
102
+
103
+ def self.localname(src, localdir)
104
+ %r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
87
105
  end
88
106
 
89
107
  # only processes locally stored images
90
108
  def self.image_cleanup(docxml, dir, localdir)
91
109
  docxml.traverse do |i|
92
110
  next unless i.element? && %w(img v:imagedata).include?(i.name)
93
- #warnsvg(i["src"])
94
- next if /^http/.match i["src"]
95
- next if %r{^data:(image|application)/[^;]+;base64}.match i["src"]
96
- local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
97
- File.join(localdir, i["src"])
98
- new_filename = "#{mkuuid}#{File.extname(i["src"])}"
111
+ next if /^http/.match? i["src"]
112
+ next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
113
+
114
+ local_filename = localname(i["src"], localdir)
115
+ new_filename = "#{mkuuid}#{File.extname(i['src'])}"
99
116
  FileUtils.cp local_filename, File.join(dir, new_filename)
100
117
  i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
101
118
  i["src"] = File.join(File.basename(dir), new_filename)
@@ -103,7 +120,7 @@ module Html2Doc
103
120
  docxml
104
121
  end
105
122
 
106
- # do not parse the header through Nokogiri, since it will contain
123
+ # do not parse the header through Nokogiri, since it will contain
107
124
  # non-XML like <![if !supportFootnotes]>
108
125
  def self.header_image_cleanup(doc, dir, filename, localdir)
109
126
  doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
@@ -111,16 +128,14 @@ module Html2Doc
111
128
  end.join
112
129
  end
113
130
 
114
- def self.header_image_cleanup1(a, dir, filename, localdir)
131
+ def self.header_image_cleanup1(a, dir, _filename, localdir)
115
132
  if a.size == 2 && !(/ src="https?:/.match a[1]) &&
116
133
  !(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
117
134
  m = / src=['"](?<src>[^"']+)['"]/.match a[1]
118
- #warnsvg(m[:src])
119
135
  m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
120
136
  new_filename = "#{mkuuid}.#{m2[:suffix]}"
121
- old_filename = %r{^([A-Z]:)?/}.match(m[:src]) ? m[:src] : File.join(localdir, m[:src])
122
- FileUtils.cp old_filename, File.join(dir, new_filename)
123
- a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='file:///C:/Doc/#{filename}_files/#{new_filename}'")
137
+ FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
138
+ a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
124
139
  end
125
140
  a.join
126
141
  end
@@ -131,6 +146,7 @@ module Html2Doc
131
146
  <o:MainFile HRef="../#{filename}.htm"/>}
132
147
  Dir.entries(dir).sort.each do |item|
133
148
  next if item == "." || item == ".." || /^\./.match(item)
149
+
134
150
  f.write %{ <o:File HRef="#{item}"/>\n}
135
151
  end
136
152
  f.write("</xml>\n")
@@ -6,6 +6,7 @@ module Html2Doc
6
6
  fn = []
7
7
  docxml.xpath("//a").each do |a|
8
8
  next unless process_footnote_link(docxml, a, i, fn)
9
+
9
10
  i += 1
10
11
  end
11
12
  process_footnote_texts(docxml, fn)
@@ -22,13 +23,13 @@ module Html2Doc
22
23
  footnote_cleanup(docxml)
23
24
  end
24
25
 
25
- def self.footnote_div_to_p(f)
26
- if %w{div aside}.include? f.name
27
- if f.at(".//p")
28
- f.replace(f.children)
26
+ def self.footnote_div_to_p(elem)
27
+ if %w{div aside}.include? elem.name
28
+ if elem.at(".//p")
29
+ elem.replace(elem.children)
29
30
  else
30
- f.name = "p"
31
- f["class"] = "MsoFootnoteText"
31
+ elem.name = "p"
32
+ elem["class"] = "MsoFootnoteText"
32
33
  end
33
34
  end
34
35
  end
@@ -36,34 +37,39 @@ module Html2Doc
36
37
  FN = "<span class='MsoFootnoteReference'>"\
37
38
  "<span style='mso-special-character:footnote'/></span>".freeze
38
39
 
39
- def self.footnote_container(docxml, i)
40
- ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
41
- gsub(/>\n</, "><") || FN
40
+ def self.footnote_container(docxml, idx)
41
+ ref = docxml&.at("//a[@href='#_ftn#{idx}']")&.children&.to_xml(indent: 0)
42
+ &.gsub(/>\n</, "><") || FN
42
43
  <<~DIV
43
- <div style='mso-element:footnote' id='ftn#{i}'>
44
- <a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
45
- name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
44
+ <div style='mso-element:footnote' id='ftn#{idx}'>
45
+ <a style='mso-footnote-id:ftn#{idx}' href='#_ftn#{idx}'
46
+ name='_ftnref#{idx}' title='' id='_ftnref#{idx}'>#{ref.strip}</a></div>
46
47
  DIV
47
48
  end
48
49
 
49
- def self.process_footnote_link(docxml, a, i, fn)
50
- return false unless footnote?(a)
51
- href = a["href"].gsub(/^#/, "")
50
+ def self.process_footnote_link(docxml, elem, idx, footnote)
51
+ return false unless footnote?(elem)
52
+
53
+ href = elem["href"].gsub(/^#/, "")
52
54
  note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
53
55
  return false if note.nil?
54
- set_footnote_link_attrs(a, i)
55
- if a.at("./span[@class = 'MsoFootnoteReference']")
56
- a.children.each do |c|
57
- if c.name == "span" and c["class"] == "MsoFootnoteReference"
58
- c.replace(FN)
59
- else
60
- c.wrap("<span class='MsoFootnoteReference'></span>")
61
- end
56
+
57
+ set_footnote_link_attrs(elem, idx)
58
+ if elem.at("./span[@class = 'MsoFootnoteReference']")
59
+ process_footnote_link1(elem)
60
+ else elem.children = FN
61
+ end
62
+ footnote << transform_footnote_text(note)
63
+ end
64
+
65
+ def self.process_footnote_link1(elem)
66
+ elem.children.each do |c|
67
+ if c.name == "span" && c["class"] == "MsoFootnoteReference"
68
+ c.replace(FN)
69
+ else
70
+ c.wrap("<span class='MsoFootnoteReference'></span>")
62
71
  end
63
- else
64
- a.children = FN
65
72
  end
66
- fn << transform_footnote_text(note)
67
73
  end
68
74
 
69
75
  def self.transform_footnote_text(note)
@@ -76,16 +82,16 @@ module Html2Doc
76
82
  note.remove
77
83
  end
78
84
 
79
- def self.footnote?(a)
80
- a["epub:type"]&.casecmp("footnote")&.zero? ||
81
- a["class"]&.casecmp("footnote")&.zero?
85
+ def self.footnote?(elem)
86
+ elem["epub:type"]&.casecmp("footnote")&.zero? ||
87
+ elem["class"]&.casecmp("footnote")&.zero?
82
88
  end
83
89
 
84
- def self.set_footnote_link_attrs(a, i)
85
- a["style"] = "mso-footnote-id:ftn#{i}"
86
- a["href"] = "#_ftn#{i}"
87
- a["name"] = "_ftnref#{i}"
88
- a["title"] = ""
90
+ def self.set_footnote_link_attrs(elem, idx)
91
+ elem["style"] = "mso-footnote-id:ftn#{idx}"
92
+ elem["href"] = "#_ftn#{idx}"
93
+ elem["name"] = "_ftnref#{idx}"
94
+ elem["title"] = ""
89
95
  end
90
96
 
91
97
  # We expect that the content of the footnote text received is one or
@@ -94,8 +100,8 @@ module Html2Doc
94
100
  # are present in the HTML, they need to have been cleaned out before
95
101
  # passing to this gem
96
102
  def self.footnote_cleanup(docxml)
97
- docxml.xpath('//div[@style="mso-element:footnote"]/a').
98
- each do |x|
103
+ docxml.xpath('//div[@style="mso-element:footnote"]/a')
104
+ .each do |x|
99
105
  n = x.next_element
100
106
  n&.children&.first&.add_previous_sibling(x.remove)
101
107
  end