html2doc 1.1.0 → 1.1.4

Sign up to get free protection for your applications and to get access to all the features.
data/lib/html2doc/math.rb CHANGED
@@ -9,29 +9,34 @@ module Html2Doc
9
9
  Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
10
10
  encoding: "utf-8"))
11
11
 
12
- def self.asciimath_to_mathml1(x)
13
- begin
14
- AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
15
- AsciiMath.parse(HTMLEntities.new.decode(x)).ast).to_s.
16
- gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
- rescue StandardError => e
18
- puts "parsing: #{x}"
19
- puts e.message
20
- raise e
21
- end
12
+ def self.asciimath_to_mathml1(expr)
13
+ AsciiMath::MathMLBuilder.new(msword: true).append_expression(
14
+ AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
15
+ ).to_s
16
+ .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
+ rescue StandardError => e
18
+ puts "parsing: #{expr}"
19
+ puts e.message
20
+ raise e
22
21
  end
23
22
 
24
23
  def self.asciimath_to_mathml(doc, delims)
25
24
  return doc if delims.nil? || delims.size < 2
25
+
26
26
  m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
27
27
  m.each_slice(4).map.with_index do |(*a), i|
28
- i % 500 == 0 && m.size > 1000 && i > 0 and
29
- warn "MathML #{i} of #{(m.size / 4).floor}"
28
+ progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
30
29
  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
31
30
  a.size > 1 ? a[0] + a[2] : a[0]
32
31
  end.join
33
32
  end
34
33
 
34
+ def self.progress_conv(idx, step, total, threshold, msg)
35
+ return unless (idx % step).zero? && total > threshold && idx.positive?
36
+
37
+ warn "#{msg} #{idx} of #{total}"
38
+ end
39
+
35
40
  def self.unwrap_accents(doc)
36
41
  doc.xpath("//*[@accent = 'true']").each do |x|
37
42
  x.elements.length > 1 or next
@@ -42,106 +47,124 @@ module Html2Doc
42
47
  end
43
48
 
44
49
  # random fixes to MathML input that OOXML needs to render properly
45
- def self.ooxml_cleanup(m, docnamespaces)
46
- m = unwrap_accents(mathml_preserve_space(
47
- mathml_insert_rows(m, docnamespaces), docnamespaces))
48
- m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
49
- m
50
+ def self.ooxml_cleanup(math, docnamespaces)
51
+ math = unwrap_accents(
52
+ mathml_preserve_space(
53
+ mathml_insert_rows(math, docnamespaces), docnamespaces
54
+ ),
55
+ )
56
+ math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
57
+ math
50
58
  end
51
59
 
52
- def self.mathml_insert_rows(m, docnamespaces)
53
- m.xpath(%w(msup msub msubsup munder mover munderover).
54
- map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
60
+ def self.mathml_insert_rows(math, docnamespaces)
61
+ math.xpath(%w(msup msub msubsup munder mover munderover)
62
+ .map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
55
63
  next unless x.next_element && x.next_element != "mrow"
64
+
56
65
  x.next_element.wrap("<mrow/>")
57
66
  end
58
- m
67
+ math
59
68
  end
60
69
 
61
- def self.mathml_preserve_space(m, docnamespaces)
62
- m.xpath(".//xmlns:mtext", docnamespaces).each do |x|
70
+ def self.mathml_preserve_space(math, docnamespaces)
71
+ math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
63
72
  x.children = x.children.to_xml.gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
64
73
  end
65
- m
74
+ math
66
75
  end
67
76
 
68
- def self.unitalic(m)
69
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
70
- x.wrap("<span style='font-style:normal;'></span>")
77
+ HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
78
+
79
+ def self.unitalic(math)
80
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
81
+ x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
71
82
  end
72
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
73
- x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
83
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
84
+ x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
74
85
  end
75
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
76
- x.wrap("<span class='nostem'><em></em></span>")
86
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
87
+ x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
77
88
  end
78
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
79
- x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
89
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
90
+ x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
80
91
  end
81
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
82
- toPlane1(x, :monospace)
92
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
93
+ to_plane1(x, :monospace)
83
94
  end
84
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
85
- toPlane1(x, :doublestruck)
95
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
96
+ to_plane1(x, :doublestruck)
86
97
  end
87
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
88
- toPlane1(x, :script)
98
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
99
+ to_plane1(x, :script)
89
100
  end
90
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
91
- toPlane1(x, :scriptbold)
101
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
102
+ to_plane1(x, :scriptbold)
92
103
  end
93
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
94
- toPlane1(x, :fraktur)
104
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
105
+ to_plane1(x, :fraktur)
95
106
  end
96
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
97
- toPlane1(x, :frakturbold)
107
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
108
+ to_plane1(x, :frakturbold)
98
109
  end
99
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
- toPlane1(x, :sans)
110
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
111
+ to_plane1(x, :sans)
101
112
  end
102
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
- toPlane1(x, :sansbold)
113
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
114
+ to_plane1(x, :sansbold)
104
115
  end
105
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
106
- toPlane1(x, :sansitalic)
116
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
117
+ to_plane1(x, :sansitalic)
107
118
  end
108
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
109
- toPlane1(x, :sansbolditalic)
119
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
120
+ to_plane1(x, :sansbolditalic)
110
121
  end
111
- m
122
+ math
112
123
  end
113
124
 
114
- def self.toPlane1(x, font)
115
- x.traverse do |n|
125
+ def self.to_plane1(xml, font)
126
+ xml.traverse do |n|
116
127
  next unless n.text?
128
+
117
129
  n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
118
130
  end
119
- x
131
+ xml
120
132
  end
121
133
 
122
134
  def self.mathml_to_ooml(docxml)
123
135
  docnamespaces = docxml.collect_namespaces
124
136
  m = docxml.xpath("//*[local-name() = 'math']")
125
137
  m.each_with_index do |x, i|
126
- i % 100 == 0 && m.size > 500 && i > 0 and
127
- warn "Math OOXML #{i} of #{m.size}"
128
- element = ooxml_cleanup(x, docnamespaces)
129
- doc = Nokogiri::XML::Document::new()
130
- doc.root = element
131
- ooxml = (unitalic(esc_space(@xsltemplate.transform(doc)))).to_s.
132
- gsub(/<\?[^>]+>\s*/, "").
133
- gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
134
- gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
135
- ooxml = uncenter(x, ooxml)
136
- x.swap(ooxml)
138
+ progress_conv(i, 100, m.size, 500, "Math OOXML")
139
+ mathml_to_ooml1(x, docnamespaces)
137
140
  end
138
141
  end
139
142
 
140
- # escape space as &#x32;; we are removing any spaces generated by
143
+ # We need span and em not to be namespaced. Word can't deal with explicit
144
+ # namespaces.
145
+ # We will end up stripping them out again under Nokogiri 1.11, which correctly
146
+ # insists on inheriting namespace from parent.
147
+ def self.ooml_clean(xml)
148
+ xml.to_s
149
+ .gsub(/<\?[^>]+>\s*/, "")
150
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
151
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
152
+ end
153
+
154
+ def self.mathml_to_ooml1(xml, docnamespaces)
155
+ doc = Nokogiri::XML::Document::new
156
+ doc.root = ooxml_cleanup(xml, docnamespaces)
157
+ ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
158
+ ooxml = uncenter(xml, ooxml)
159
+ xml.swap(ooxml)
160
+ end
161
+
162
+ # escape space as &#x32;; we are removing any spaces generated by
141
163
  # XML indentation
142
164
  def self.esc_space(xml)
143
165
  xml.traverse do |n|
144
166
  next unless n.text?
167
+
145
168
  n = n.text.gsub(/ /, "&#x32;")
146
169
  end
147
170
  xml
@@ -149,17 +172,15 @@ module Html2Doc
149
172
 
150
173
  # if oomml has no siblings, by default it is centered; override this with
151
174
  # left/right if parent is so tagged
152
- def self.uncenter(m, ooxml)
153
- if m.next == nil && m.previous == nil
154
- alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
155
- "local-name() = 'div' or local-name() = 'td']/@style")
156
- return ooxml unless alignnode
157
- if alignnode.text.include? ("text-align:left")
158
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
159
- "m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
160
- elsif alignnode.text.include? ("text-align:right")
175
+ def self.uncenter(math, ooxml)
176
+ alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
177
+ "local-name() = 'div' or local-name() = 'td']/@style")
178
+ return ooxml unless alignnode && (math.next == nil && math.previous == nil)
179
+
180
+ %w(left right).each do |dir|
181
+ if alignnode.text.include? ("text-align:#{dir}")
161
182
  ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
162
- "m:val='right'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
183
+ "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
163
184
  end
164
185
  end
165
186
  ooxml
data/lib/html2doc/mime.rb CHANGED
@@ -7,20 +7,20 @@ require "fileutils"
7
7
  module Html2Doc
8
8
  def self.mime_preamble(boundary, filename, result)
9
9
  <<~"PREAMBLE"
10
- MIME-Version: 1.0
11
- Content-Type: multipart/related; boundary="#{boundary}"
10
+ MIME-Version: 1.0
11
+ Content-Type: multipart/related; boundary="#{boundary}"
12
12
 
13
- --#{boundary}
14
- Content-ID: <#{File.basename(filename)}>
15
- Content-Disposition: inline; filename="#{File.basename(filename)}"
16
- Content-Type: text/html; charset="utf-8"
13
+ --#{boundary}
14
+ Content-ID: <#{File.basename(filename)}>
15
+ Content-Disposition: inline; filename="#{File.basename(filename)}"
16
+ Content-Type: text/html; charset="utf-8"
17
17
 
18
- #{result}
18
+ #{result}
19
19
 
20
20
  PREAMBLE
21
21
  end
22
22
 
23
- def self.mime_attachment(boundary, filename, item, dir)
23
+ def self.mime_attachment(boundary, _filename, item, dir)
24
24
  content_type = mime_type(item)
25
25
  text_mode = %w[text application].any? { |p| content_type.start_with? p }
26
26
 
@@ -29,13 +29,13 @@ module Html2Doc
29
29
 
30
30
  encoded_file = Base64.strict_encode64(content).gsub(/(.{76})/, "\\1\n")
31
31
  <<~"FILE"
32
- --#{boundary}
33
- Content-ID: <#{File.basename(item)}>
34
- Content-Disposition: inline; filename="#{File.basename(item)}"
35
- Content-Transfer-Encoding: base64
36
- Content-Type: #{content_type}
32
+ --#{boundary}
33
+ Content-ID: <#{File.basename(item)}>
34
+ Content-Disposition: inline; filename="#{File.basename(item)}"
35
+ Content-Transfer-Encoding: base64
36
+ Content-Type: #{content_type}
37
37
 
38
- #{encoded_file}
38
+ #{encoded_file}
39
39
 
40
40
  FILE
41
41
  end
@@ -43,7 +43,7 @@ module Html2Doc
43
43
  def self.mime_type(item)
44
44
  types = MIME::Types.type_for(item)
45
45
  type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
46
- type = type + ' charset="utf-8"' if /^text/.match(type) && types
46
+ type = %(#{type} charset="utf-8") if /^text/.match(type) && types
47
47
  type
48
48
  end
49
49
 
@@ -59,6 +59,7 @@ module Html2Doc
59
59
  Dir.foreach(dir) do |item|
60
60
  next if item == "." || item == ".." || /^\./.match(item) ||
61
61
  item == "filelist.xml"
62
+
62
63
  mhtml += mime_attachment(boundary, "#{filename}.htm", item, dir)
63
64
  end
64
65
  mhtml += "--#{boundary}--"
@@ -69,17 +70,21 @@ module Html2Doc
69
70
  mhtml.gsub %r{(<img[^>]*?src=")([^\"']+)(['"])}m do |m|
70
71
  repl = "#{$1}cid:#{File.basename($2)}#{$3}"
71
72
  /^data:|^https?:/.match($2) ? m : repl
73
+ end.gsub %r{(<v:imagedata[^>]*?src=")([^\"']+)(['"])}m do |m|
74
+ repl = "#{$1}cid:#{File.basename($2)}#{$3}"
75
+ /^data:|^https?:/.match($2) ? m : repl
72
76
  end
73
77
  end
74
78
 
75
79
  # max width for Word document is 400, max height is 680
76
- def self.image_resize(i, path, maxheight, maxwidth)
77
- realSize = ImageSize.path(path).size
78
- s = [i["width"].to_i, i["height"].to_i]
79
- s = realSize if s[0].zero? && s[1].zero?
80
- return [nil, nil] if realSize.nil? || realSize[0].nil? || realSize[1].nil?
81
- s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
82
- s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
80
+ def self.image_resize(img, path, maxheight, maxwidth)
81
+ realsize = ImageSize.path(path).size
82
+ s = [img["width"].to_i, img["height"].to_i]
83
+ s = realsize if s[0].zero? && s[1].zero?
84
+ return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
85
+
86
+ s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
87
+ s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
83
88
  s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
84
89
  s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
85
90
  s
@@ -92,19 +97,22 @@ module Html2Doc
92
97
  end
93
98
 
94
99
  def self.warnsvg(src)
95
- warn "#{src}: SVG not supported" if /\.svg$/i.match(src)
100
+ warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
101
+ end
102
+
103
+ def self.localname(src, localdir)
104
+ %r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
96
105
  end
97
106
 
98
107
  # only processes locally stored images
99
108
  def self.image_cleanup(docxml, dir, localdir)
100
109
  docxml.traverse do |i|
101
110
  next unless i.element? && %w(img v:imagedata).include?(i.name)
102
- #warnsvg(i["src"])
103
- next if /^http/.match i["src"]
104
- next if %r{^data:(image|application)/[^;]+;base64}.match i["src"]
105
- local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
106
- File.join(localdir, i["src"])
107
- new_filename = "#{mkuuid}#{File.extname(i["src"])}"
111
+ next if /^http/.match? i["src"]
112
+ next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
113
+
114
+ local_filename = localname(i["src"], localdir)
115
+ new_filename = "#{mkuuid}#{File.extname(i['src'])}"
108
116
  FileUtils.cp local_filename, File.join(dir, new_filename)
109
117
  i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
110
118
  i["src"] = File.join(File.basename(dir), new_filename)
@@ -112,7 +120,7 @@ module Html2Doc
112
120
  docxml
113
121
  end
114
122
 
115
- # do not parse the header through Nokogiri, since it will contain
123
+ # do not parse the header through Nokogiri, since it will contain
116
124
  # non-XML like <![if !supportFootnotes]>
117
125
  def self.header_image_cleanup(doc, dir, filename, localdir)
118
126
  doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
@@ -120,15 +128,13 @@ module Html2Doc
120
128
  end.join
121
129
  end
122
130
 
123
- def self.header_image_cleanup1(a, dir, filename, localdir)
131
+ def self.header_image_cleanup1(a, dir, _filename, localdir)
124
132
  if a.size == 2 && !(/ src="https?:/.match a[1]) &&
125
133
  !(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
126
134
  m = / src=['"](?<src>[^"']+)['"]/.match a[1]
127
- #warnsvg(m[:src])
128
135
  m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
129
136
  new_filename = "#{mkuuid}.#{m2[:suffix]}"
130
- old_filename = %r{^([A-Z]:)?/}.match(m[:src]) ? m[:src] : File.join(localdir, m[:src])
131
- FileUtils.cp old_filename, File.join(dir, new_filename)
137
+ FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
132
138
  a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
133
139
  end
134
140
  a.join
@@ -140,6 +146,7 @@ module Html2Doc
140
146
  <o:MainFile HRef="../#{filename}.htm"/>}
141
147
  Dir.entries(dir).sort.each do |item|
142
148
  next if item == "." || item == ".." || /^\./.match(item)
149
+
143
150
  f.write %{ <o:File HRef="#{item}"/>\n}
144
151
  end
145
152
  f.write("</xml>\n")
@@ -6,6 +6,7 @@ module Html2Doc
6
6
  fn = []
7
7
  docxml.xpath("//a").each do |a|
8
8
  next unless process_footnote_link(docxml, a, i, fn)
9
+
9
10
  i += 1
10
11
  end
11
12
  process_footnote_texts(docxml, fn)
@@ -22,13 +23,13 @@ module Html2Doc
22
23
  footnote_cleanup(docxml)
23
24
  end
24
25
 
25
- def self.footnote_div_to_p(f)
26
- if %w{div aside}.include? f.name
27
- if f.at(".//p")
28
- f.replace(f.children)
26
+ def self.footnote_div_to_p(elem)
27
+ if %w{div aside}.include? elem.name
28
+ if elem.at(".//p")
29
+ elem.replace(elem.children)
29
30
  else
30
- f.name = "p"
31
- f["class"] = "MsoFootnoteText"
31
+ elem.name = "p"
32
+ elem["class"] = "MsoFootnoteText"
32
33
  end
33
34
  end
34
35
  end
@@ -36,34 +37,39 @@ module Html2Doc
36
37
  FN = "<span class='MsoFootnoteReference'>"\
37
38
  "<span style='mso-special-character:footnote'/></span>".freeze
38
39
 
39
- def self.footnote_container(docxml, i)
40
- ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
41
- gsub(/>\n</, "><") || FN
40
+ def self.footnote_container(docxml, idx)
41
+ ref = docxml&.at("//a[@href='#_ftn#{idx}']")&.children&.to_xml(indent: 0)
42
+ &.gsub(/>\n</, "><") || FN
42
43
  <<~DIV
43
- <div style='mso-element:footnote' id='ftn#{i}'>
44
- <a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
45
- name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
44
+ <div style='mso-element:footnote' id='ftn#{idx}'>
45
+ <a style='mso-footnote-id:ftn#{idx}' href='#_ftn#{idx}'
46
+ name='_ftnref#{idx}' title='' id='_ftnref#{idx}'>#{ref.strip}</a></div>
46
47
  DIV
47
48
  end
48
49
 
49
- def self.process_footnote_link(docxml, a, i, fn)
50
- return false unless footnote?(a)
51
- href = a["href"].gsub(/^#/, "")
50
+ def self.process_footnote_link(docxml, elem, idx, footnote)
51
+ return false unless footnote?(elem)
52
+
53
+ href = elem["href"].gsub(/^#/, "")
52
54
  note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
53
55
  return false if note.nil?
54
- set_footnote_link_attrs(a, i)
55
- if a.at("./span[@class = 'MsoFootnoteReference']")
56
- a.children.each do |c|
57
- if c.name == "span" and c["class"] == "MsoFootnoteReference"
58
- c.replace(FN)
59
- else
60
- c.wrap("<span class='MsoFootnoteReference'></span>")
61
- end
56
+
57
+ set_footnote_link_attrs(elem, idx)
58
+ if elem.at("./span[@class = 'MsoFootnoteReference']")
59
+ process_footnote_link1(elem)
60
+ else elem.children = FN
61
+ end
62
+ footnote << transform_footnote_text(note)
63
+ end
64
+
65
+ def self.process_footnote_link1(elem)
66
+ elem.children.each do |c|
67
+ if c.name == "span" && c["class"] == "MsoFootnoteReference"
68
+ c.replace(FN)
69
+ else
70
+ c.wrap("<span class='MsoFootnoteReference'></span>")
62
71
  end
63
- else
64
- a.children = FN
65
72
  end
66
- fn << transform_footnote_text(note)
67
73
  end
68
74
 
69
75
  def self.transform_footnote_text(note)
@@ -76,16 +82,16 @@ module Html2Doc
76
82
  note.remove
77
83
  end
78
84
 
79
- def self.footnote?(a)
80
- a["epub:type"]&.casecmp("footnote")&.zero? ||
81
- a["class"]&.casecmp("footnote")&.zero?
85
+ def self.footnote?(elem)
86
+ elem["epub:type"]&.casecmp("footnote")&.zero? ||
87
+ elem["class"]&.casecmp("footnote")&.zero?
82
88
  end
83
89
 
84
- def self.set_footnote_link_attrs(a, i)
85
- a["style"] = "mso-footnote-id:ftn#{i}"
86
- a["href"] = "#_ftn#{i}"
87
- a["name"] = "_ftnref#{i}"
88
- a["title"] = ""
90
+ def self.set_footnote_link_attrs(elem, idx)
91
+ elem["style"] = "mso-footnote-id:ftn#{idx}"
92
+ elem["href"] = "#_ftn#{idx}"
93
+ elem["name"] = "_ftnref#{idx}"
94
+ elem["title"] = ""
89
95
  end
90
96
 
91
97
  # We expect that the content of the footnote text received is one or
@@ -94,8 +100,8 @@ module Html2Doc
94
100
  # are present in the HTML, they need to have been cleaned out before
95
101
  # passing to this gem
96
102
  def self.footnote_cleanup(docxml)
97
- docxml.xpath('//div[@style="mso-element:footnote"]/a').
98
- each do |x|
103
+ docxml.xpath('//div[@style="mso-element:footnote"]/a')
104
+ .each do |x|
99
105
  n = x.next_element
100
106
  n&.children&.first&.add_previous_sibling(x.remove)
101
107
  end
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "1.1.0".freeze
2
+ VERSION = "1.1.4".freeze
3
3
  end
data/lib/html2doc.rb CHANGED
@@ -4,6 +4,3 @@ require_relative "html2doc/mime"
4
4
  require_relative "html2doc/notes"
5
5
  require_relative "html2doc/math"
6
6
  require_relative "html2doc/lists"
7
- #require_relative "asciimath/parser"
8
- #require_relative "asciimath/mathml"
9
- #require_relative "asciimath/html"