html2doc 1.1.0 → 1.1.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/html2doc/math.rb CHANGED
@@ -9,29 +9,34 @@ module Html2Doc
9
9
  Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
10
10
  encoding: "utf-8"))
11
11
 
12
- def self.asciimath_to_mathml1(x)
13
- begin
14
- AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
15
- AsciiMath.parse(HTMLEntities.new.decode(x)).ast).to_s.
16
- gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
- rescue StandardError => e
18
- puts "parsing: #{x}"
19
- puts e.message
20
- raise e
21
- end
12
+ def self.asciimath_to_mathml1(expr)
13
+ AsciiMath::MathMLBuilder.new(msword: true).append_expression(
14
+ AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
15
+ ).to_s
16
+ .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
+ rescue StandardError => e
18
+ puts "parsing: #{expr}"
19
+ puts e.message
20
+ raise e
22
21
  end
23
22
 
24
23
  def self.asciimath_to_mathml(doc, delims)
25
24
  return doc if delims.nil? || delims.size < 2
25
+
26
26
  m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
27
27
  m.each_slice(4).map.with_index do |(*a), i|
28
- i % 500 == 0 && m.size > 1000 && i > 0 and
29
- warn "MathML #{i} of #{(m.size / 4).floor}"
28
+ progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
30
29
  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
31
30
  a.size > 1 ? a[0] + a[2] : a[0]
32
31
  end.join
33
32
  end
34
33
 
34
+ def self.progress_conv(idx, step, total, threshold, msg)
35
+ return unless (idx % step).zero? && total > threshold && idx.positive?
36
+
37
+ warn "#{msg} #{idx} of #{total}"
38
+ end
39
+
35
40
  def self.unwrap_accents(doc)
36
41
  doc.xpath("//*[@accent = 'true']").each do |x|
37
42
  x.elements.length > 1 or next
@@ -42,106 +47,124 @@ module Html2Doc
42
47
  end
43
48
 
44
49
  # random fixes to MathML input that OOXML needs to render properly
45
- def self.ooxml_cleanup(m, docnamespaces)
46
- m = unwrap_accents(mathml_preserve_space(
47
- mathml_insert_rows(m, docnamespaces), docnamespaces))
48
- m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
49
- m
50
+ def self.ooxml_cleanup(math, docnamespaces)
51
+ math = unwrap_accents(
52
+ mathml_preserve_space(
53
+ mathml_insert_rows(math, docnamespaces), docnamespaces
54
+ ),
55
+ )
56
+ math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
57
+ math
50
58
  end
51
59
 
52
- def self.mathml_insert_rows(m, docnamespaces)
53
- m.xpath(%w(msup msub msubsup munder mover munderover).
54
- map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
60
+ def self.mathml_insert_rows(math, docnamespaces)
61
+ math.xpath(%w(msup msub msubsup munder mover munderover)
62
+ .map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
55
63
  next unless x.next_element && x.next_element != "mrow"
64
+
56
65
  x.next_element.wrap("<mrow/>")
57
66
  end
58
- m
67
+ math
59
68
  end
60
69
 
61
- def self.mathml_preserve_space(m, docnamespaces)
62
- m.xpath(".//xmlns:mtext", docnamespaces).each do |x|
70
+ def self.mathml_preserve_space(math, docnamespaces)
71
+ math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
63
72
  x.children = x.children.to_xml.gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
64
73
  end
65
- m
74
+ math
66
75
  end
67
76
 
68
- def self.unitalic(m)
69
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
70
- x.wrap("<span style='font-style:normal;'></span>")
77
+ HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
78
+
79
+ def self.unitalic(math)
80
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
81
+ x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
71
82
  end
72
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
73
- x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
83
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
84
+ x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
74
85
  end
75
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
76
- x.wrap("<span class='nostem'><em></em></span>")
86
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
87
+ x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
77
88
  end
78
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
79
- x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
89
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
90
+ x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
80
91
  end
81
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
82
- toPlane1(x, :monospace)
92
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
93
+ to_plane1(x, :monospace)
83
94
  end
84
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
85
- toPlane1(x, :doublestruck)
95
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
96
+ to_plane1(x, :doublestruck)
86
97
  end
87
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
88
- toPlane1(x, :script)
98
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
99
+ to_plane1(x, :script)
89
100
  end
90
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
91
- toPlane1(x, :scriptbold)
101
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
102
+ to_plane1(x, :scriptbold)
92
103
  end
93
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
94
- toPlane1(x, :fraktur)
104
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
105
+ to_plane1(x, :fraktur)
95
106
  end
96
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
97
- toPlane1(x, :frakturbold)
107
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
108
+ to_plane1(x, :frakturbold)
98
109
  end
99
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
- toPlane1(x, :sans)
110
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
111
+ to_plane1(x, :sans)
101
112
  end
102
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
- toPlane1(x, :sansbold)
113
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
114
+ to_plane1(x, :sansbold)
104
115
  end
105
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
106
- toPlane1(x, :sansitalic)
116
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
117
+ to_plane1(x, :sansitalic)
107
118
  end
108
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
109
- toPlane1(x, :sansbolditalic)
119
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
120
+ to_plane1(x, :sansbolditalic)
110
121
  end
111
- m
122
+ math
112
123
  end
113
124
 
114
- def self.toPlane1(x, font)
115
- x.traverse do |n|
125
+ def self.to_plane1(xml, font)
126
+ xml.traverse do |n|
116
127
  next unless n.text?
128
+
117
129
  n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
118
130
  end
119
- x
131
+ xml
120
132
  end
121
133
 
122
134
  def self.mathml_to_ooml(docxml)
123
135
  docnamespaces = docxml.collect_namespaces
124
136
  m = docxml.xpath("//*[local-name() = 'math']")
125
137
  m.each_with_index do |x, i|
126
- i % 100 == 0 && m.size > 500 && i > 0 and
127
- warn "Math OOXML #{i} of #{m.size}"
128
- element = ooxml_cleanup(x, docnamespaces)
129
- doc = Nokogiri::XML::Document::new()
130
- doc.root = element
131
- ooxml = (unitalic(esc_space(@xsltemplate.transform(doc)))).to_s.
132
- gsub(/<\?[^>]+>\s*/, "").
133
- gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
134
- gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
135
- ooxml = uncenter(x, ooxml)
136
- x.swap(ooxml)
138
+ progress_conv(i, 100, m.size, 500, "Math OOXML")
139
+ mathml_to_ooml1(x, docnamespaces)
137
140
  end
138
141
  end
139
142
 
140
- # escape space as &#x32;; we are removing any spaces generated by
143
+ # We need span and em not to be namespaced. Word can't deal with explicit
144
+ # namespaces.
145
+ # We will end up stripping them out again under Nokogiri 1.11, which correctly
146
+ # insists on inheriting namespace from parent.
147
+ def self.ooml_clean(xml)
148
+ xml.to_s
149
+ .gsub(/<\?[^>]+>\s*/, "")
150
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
151
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
152
+ end
153
+
154
+ def self.mathml_to_ooml1(xml, docnamespaces)
155
+ doc = Nokogiri::XML::Document::new
156
+ doc.root = ooxml_cleanup(xml, docnamespaces)
157
+ ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
158
+ ooxml = uncenter(xml, ooxml)
159
+ xml.swap(ooxml)
160
+ end
161
+
162
+ # escape space as &#x32;; we are removing any spaces generated by
141
163
  # XML indentation
142
164
  def self.esc_space(xml)
143
165
  xml.traverse do |n|
144
166
  next unless n.text?
167
+
145
168
  n = n.text.gsub(/ /, "&#x32;")
146
169
  end
147
170
  xml
@@ -149,17 +172,15 @@ module Html2Doc
149
172
 
150
173
  # if oomml has no siblings, by default it is centered; override this with
151
174
  # left/right if parent is so tagged
152
- def self.uncenter(m, ooxml)
153
- if m.next == nil && m.previous == nil
154
- alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
155
- "local-name() = 'div' or local-name() = 'td']/@style")
156
- return ooxml unless alignnode
157
- if alignnode.text.include? ("text-align:left")
158
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
159
- "m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
160
- elsif alignnode.text.include? ("text-align:right")
175
+ def self.uncenter(math, ooxml)
176
+ alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
177
+ "local-name() = 'div' or local-name() = 'td']/@style")
178
+ return ooxml unless alignnode && (math.next == nil && math.previous == nil)
179
+
180
+ %w(left right).each do |dir|
181
+ if alignnode.text.include? ("text-align:#{dir}")
161
182
  ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
162
- "m:val='right'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
183
+ "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
163
184
  end
164
185
  end
165
186
  ooxml
data/lib/html2doc/mime.rb CHANGED
@@ -7,20 +7,20 @@ require "fileutils"
7
7
  module Html2Doc
8
8
  def self.mime_preamble(boundary, filename, result)
9
9
  <<~"PREAMBLE"
10
- MIME-Version: 1.0
11
- Content-Type: multipart/related; boundary="#{boundary}"
10
+ MIME-Version: 1.0
11
+ Content-Type: multipart/related; boundary="#{boundary}"
12
12
 
13
- --#{boundary}
14
- Content-ID: <#{File.basename(filename)}>
15
- Content-Disposition: inline; filename="#{File.basename(filename)}"
16
- Content-Type: text/html; charset="utf-8"
13
+ --#{boundary}
14
+ Content-ID: <#{File.basename(filename)}>
15
+ Content-Disposition: inline; filename="#{File.basename(filename)}"
16
+ Content-Type: text/html; charset="utf-8"
17
17
 
18
- #{result}
18
+ #{result}
19
19
 
20
20
  PREAMBLE
21
21
  end
22
22
 
23
- def self.mime_attachment(boundary, filename, item, dir)
23
+ def self.mime_attachment(boundary, _filename, item, dir)
24
24
  content_type = mime_type(item)
25
25
  text_mode = %w[text application].any? { |p| content_type.start_with? p }
26
26
 
@@ -29,13 +29,13 @@ module Html2Doc
29
29
 
30
30
  encoded_file = Base64.strict_encode64(content).gsub(/(.{76})/, "\\1\n")
31
31
  <<~"FILE"
32
- --#{boundary}
33
- Content-ID: <#{File.basename(item)}>
34
- Content-Disposition: inline; filename="#{File.basename(item)}"
35
- Content-Transfer-Encoding: base64
36
- Content-Type: #{content_type}
32
+ --#{boundary}
33
+ Content-ID: <#{File.basename(item)}>
34
+ Content-Disposition: inline; filename="#{File.basename(item)}"
35
+ Content-Transfer-Encoding: base64
36
+ Content-Type: #{content_type}
37
37
 
38
- #{encoded_file}
38
+ #{encoded_file}
39
39
 
40
40
  FILE
41
41
  end
@@ -43,7 +43,7 @@ module Html2Doc
43
43
  def self.mime_type(item)
44
44
  types = MIME::Types.type_for(item)
45
45
  type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
46
- type = type + ' charset="utf-8"' if /^text/.match(type) && types
46
+ type = %(#{type} charset="utf-8") if /^text/.match(type) && types
47
47
  type
48
48
  end
49
49
 
@@ -59,6 +59,7 @@ module Html2Doc
59
59
  Dir.foreach(dir) do |item|
60
60
  next if item == "." || item == ".." || /^\./.match(item) ||
61
61
  item == "filelist.xml"
62
+
62
63
  mhtml += mime_attachment(boundary, "#{filename}.htm", item, dir)
63
64
  end
64
65
  mhtml += "--#{boundary}--"
@@ -69,17 +70,21 @@ module Html2Doc
69
70
  mhtml.gsub %r{(<img[^>]*?src=")([^\"']+)(['"])}m do |m|
70
71
  repl = "#{$1}cid:#{File.basename($2)}#{$3}"
71
72
  /^data:|^https?:/.match($2) ? m : repl
73
+ end.gsub %r{(<v:imagedata[^>]*?src=")([^\"']+)(['"])}m do |m|
74
+ repl = "#{$1}cid:#{File.basename($2)}#{$3}"
75
+ /^data:|^https?:/.match($2) ? m : repl
72
76
  end
73
77
  end
74
78
 
75
79
  # max width for Word document is 400, max height is 680
76
- def self.image_resize(i, path, maxheight, maxwidth)
77
- realSize = ImageSize.path(path).size
78
- s = [i["width"].to_i, i["height"].to_i]
79
- s = realSize if s[0].zero? && s[1].zero?
80
- return [nil, nil] if realSize.nil? || realSize[0].nil? || realSize[1].nil?
81
- s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
82
- s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
80
+ def self.image_resize(img, path, maxheight, maxwidth)
81
+ realsize = ImageSize.path(path).size
82
+ s = [img["width"].to_i, img["height"].to_i]
83
+ s = realsize if s[0].zero? && s[1].zero?
84
+ return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
85
+
86
+ s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
87
+ s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
83
88
  s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
84
89
  s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
85
90
  s
@@ -92,19 +97,22 @@ module Html2Doc
92
97
  end
93
98
 
94
99
  def self.warnsvg(src)
95
- warn "#{src}: SVG not supported" if /\.svg$/i.match(src)
100
+ warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
101
+ end
102
+
103
+ def self.localname(src, localdir)
104
+ %r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
96
105
  end
97
106
 
98
107
  # only processes locally stored images
99
108
  def self.image_cleanup(docxml, dir, localdir)
100
109
  docxml.traverse do |i|
101
110
  next unless i.element? && %w(img v:imagedata).include?(i.name)
102
- #warnsvg(i["src"])
103
- next if /^http/.match i["src"]
104
- next if %r{^data:(image|application)/[^;]+;base64}.match i["src"]
105
- local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
106
- File.join(localdir, i["src"])
107
- new_filename = "#{mkuuid}#{File.extname(i["src"])}"
111
+ next if /^http/.match? i["src"]
112
+ next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
113
+
114
+ local_filename = localname(i["src"], localdir)
115
+ new_filename = "#{mkuuid}#{File.extname(i['src'])}"
108
116
  FileUtils.cp local_filename, File.join(dir, new_filename)
109
117
  i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
110
118
  i["src"] = File.join(File.basename(dir), new_filename)
@@ -112,7 +120,7 @@ module Html2Doc
112
120
  docxml
113
121
  end
114
122
 
115
- # do not parse the header through Nokogiri, since it will contain
123
+ # do not parse the header through Nokogiri, since it will contain
116
124
  # non-XML like <![if !supportFootnotes]>
117
125
  def self.header_image_cleanup(doc, dir, filename, localdir)
118
126
  doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
@@ -120,15 +128,13 @@ module Html2Doc
120
128
  end.join
121
129
  end
122
130
 
123
- def self.header_image_cleanup1(a, dir, filename, localdir)
131
+ def self.header_image_cleanup1(a, dir, _filename, localdir)
124
132
  if a.size == 2 && !(/ src="https?:/.match a[1]) &&
125
133
  !(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
126
134
  m = / src=['"](?<src>[^"']+)['"]/.match a[1]
127
- #warnsvg(m[:src])
128
135
  m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
129
136
  new_filename = "#{mkuuid}.#{m2[:suffix]}"
130
- old_filename = %r{^([A-Z]:)?/}.match(m[:src]) ? m[:src] : File.join(localdir, m[:src])
131
- FileUtils.cp old_filename, File.join(dir, new_filename)
137
+ FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
132
138
  a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
133
139
  end
134
140
  a.join
@@ -140,6 +146,7 @@ module Html2Doc
140
146
  <o:MainFile HRef="../#{filename}.htm"/>}
141
147
  Dir.entries(dir).sort.each do |item|
142
148
  next if item == "." || item == ".." || /^\./.match(item)
149
+
143
150
  f.write %{ <o:File HRef="#{item}"/>\n}
144
151
  end
145
152
  f.write("</xml>\n")
@@ -6,6 +6,7 @@ module Html2Doc
6
6
  fn = []
7
7
  docxml.xpath("//a").each do |a|
8
8
  next unless process_footnote_link(docxml, a, i, fn)
9
+
9
10
  i += 1
10
11
  end
11
12
  process_footnote_texts(docxml, fn)
@@ -22,13 +23,13 @@ module Html2Doc
22
23
  footnote_cleanup(docxml)
23
24
  end
24
25
 
25
- def self.footnote_div_to_p(f)
26
- if %w{div aside}.include? f.name
27
- if f.at(".//p")
28
- f.replace(f.children)
26
+ def self.footnote_div_to_p(elem)
27
+ if %w{div aside}.include? elem.name
28
+ if elem.at(".//p")
29
+ elem.replace(elem.children)
29
30
  else
30
- f.name = "p"
31
- f["class"] = "MsoFootnoteText"
31
+ elem.name = "p"
32
+ elem["class"] = "MsoFootnoteText"
32
33
  end
33
34
  end
34
35
  end
@@ -36,34 +37,39 @@ module Html2Doc
36
37
  FN = "<span class='MsoFootnoteReference'>"\
37
38
  "<span style='mso-special-character:footnote'/></span>".freeze
38
39
 
39
- def self.footnote_container(docxml, i)
40
- ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
41
- gsub(/>\n</, "><") || FN
40
+ def self.footnote_container(docxml, idx)
41
+ ref = docxml&.at("//a[@href='#_ftn#{idx}']")&.children&.to_xml(indent: 0)
42
+ &.gsub(/>\n</, "><") || FN
42
43
  <<~DIV
43
- <div style='mso-element:footnote' id='ftn#{i}'>
44
- <a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
45
- name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
44
+ <div style='mso-element:footnote' id='ftn#{idx}'>
45
+ <a style='mso-footnote-id:ftn#{idx}' href='#_ftn#{idx}'
46
+ name='_ftnref#{idx}' title='' id='_ftnref#{idx}'>#{ref.strip}</a></div>
46
47
  DIV
47
48
  end
48
49
 
49
- def self.process_footnote_link(docxml, a, i, fn)
50
- return false unless footnote?(a)
51
- href = a["href"].gsub(/^#/, "")
50
+ def self.process_footnote_link(docxml, elem, idx, footnote)
51
+ return false unless footnote?(elem)
52
+
53
+ href = elem["href"].gsub(/^#/, "")
52
54
  note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
53
55
  return false if note.nil?
54
- set_footnote_link_attrs(a, i)
55
- if a.at("./span[@class = 'MsoFootnoteReference']")
56
- a.children.each do |c|
57
- if c.name == "span" and c["class"] == "MsoFootnoteReference"
58
- c.replace(FN)
59
- else
60
- c.wrap("<span class='MsoFootnoteReference'></span>")
61
- end
56
+
57
+ set_footnote_link_attrs(elem, idx)
58
+ if elem.at("./span[@class = 'MsoFootnoteReference']")
59
+ process_footnote_link1(elem)
60
+ else elem.children = FN
61
+ end
62
+ footnote << transform_footnote_text(note)
63
+ end
64
+
65
+ def self.process_footnote_link1(elem)
66
+ elem.children.each do |c|
67
+ if c.name == "span" && c["class"] == "MsoFootnoteReference"
68
+ c.replace(FN)
69
+ else
70
+ c.wrap("<span class='MsoFootnoteReference'></span>")
62
71
  end
63
- else
64
- a.children = FN
65
72
  end
66
- fn << transform_footnote_text(note)
67
73
  end
68
74
 
69
75
  def self.transform_footnote_text(note)
@@ -76,16 +82,16 @@ module Html2Doc
76
82
  note.remove
77
83
  end
78
84
 
79
- def self.footnote?(a)
80
- a["epub:type"]&.casecmp("footnote")&.zero? ||
81
- a["class"]&.casecmp("footnote")&.zero?
85
+ def self.footnote?(elem)
86
+ elem["epub:type"]&.casecmp("footnote")&.zero? ||
87
+ elem["class"]&.casecmp("footnote")&.zero?
82
88
  end
83
89
 
84
- def self.set_footnote_link_attrs(a, i)
85
- a["style"] = "mso-footnote-id:ftn#{i}"
86
- a["href"] = "#_ftn#{i}"
87
- a["name"] = "_ftnref#{i}"
88
- a["title"] = ""
90
+ def self.set_footnote_link_attrs(elem, idx)
91
+ elem["style"] = "mso-footnote-id:ftn#{idx}"
92
+ elem["href"] = "#_ftn#{idx}"
93
+ elem["name"] = "_ftnref#{idx}"
94
+ elem["title"] = ""
89
95
  end
90
96
 
91
97
  # We expect that the content of the footnote text received is one or
@@ -94,8 +100,8 @@ module Html2Doc
94
100
  # are present in the HTML, they need to have been cleaned out before
95
101
  # passing to this gem
96
102
  def self.footnote_cleanup(docxml)
97
- docxml.xpath('//div[@style="mso-element:footnote"]/a').
98
- each do |x|
103
+ docxml.xpath('//div[@style="mso-element:footnote"]/a')
104
+ .each do |x|
99
105
  n = x.next_element
100
106
  n&.children&.first&.add_previous_sibling(x.remove)
101
107
  end
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "1.1.0".freeze
2
+ VERSION = "1.1.4".freeze
3
3
  end
data/lib/html2doc.rb CHANGED
@@ -4,6 +4,3 @@ require_relative "html2doc/mime"
4
4
  require_relative "html2doc/notes"
5
5
  require_relative "html2doc/math"
6
6
  require_relative "html2doc/lists"
7
- #require_relative "asciimath/parser"
8
- #require_relative "asciimath/mathml"
9
- #require_relative "asciimath/html"