RubyGems - html2doc - Versions diffs - 1.0.7 → 1.1.3 - Mend

html2doc 1.0.7 → 1.1.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (21) hide show

data/lib/html2doc/math.rb CHANGED Viewed

@@ -9,23 +9,34 @@ module Html2Doc
     Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
                              encoding: "utf-8"))
-  def self.asciimath_to_mathml1(x)
-    AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
-      AsciiMath.parse(HTMLEntities.new.decode(x)).ast).to_s.
-        gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
+  def self.asciimath_to_mathml1(expr)
+    AsciiMath::MathMLBuilder.new(msword: true).append_expression(
+      AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
+    ).to_s
+      .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
+  rescue StandardError => e
+    puts "parsing: #{expr}"
+    puts e.message
+    raise e
   end
   def self.asciimath_to_mathml(doc, delims)
     return doc if delims.nil? || delims.size < 2
     m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
     m.each_slice(4).map.with_index do |(*a), i|
-      i % 500 == 0 && m.size > 1000 && i > 0 and
-        warn "MathML #{i} of #{(m.size / 4).floor}"
+      progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
       a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
       a.size > 1 ? a[0] + a[2] : a[0]
     end.join
   end
+  def self.progress_conv(idx, step, total, threshold, msg)
+    return unless (idx % step).zero? && total > threshold && idx.positive?
+    warn "#{msg} #{idx} of #{total}"
+  end
   def self.unwrap_accents(doc)
     doc.xpath("//*[@accent = 'true']").each do |x|
       x.elements.length > 1 or next
@@ -36,106 +47,124 @@ module Html2Doc
   end
   # random fixes to MathML input that OOXML needs to render properly
-  def self.ooxml_cleanup(m, docnamespaces)
-    m = unwrap_accents(mathml_preserve_space(
-      mathml_insert_rows(m, docnamespaces), docnamespaces))
-    m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
-    m
+  def self.ooxml_cleanup(math, docnamespaces)
+    math = unwrap_accents(
+      mathml_preserve_space(
+        mathml_insert_rows(math, docnamespaces), docnamespaces
+      ),
+    )
+    math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
+    math
   end
-  def self.mathml_insert_rows(m, docnamespaces)
-    m.xpath(%w(msup msub msubsup munder mover munderover).
-            map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
+  def self.mathml_insert_rows(math, docnamespaces)
+    math.xpath(%w(msup msub msubsup munder mover munderover)
+            .map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
       next unless x.next_element && x.next_element != "mrow"
       x.next_element.wrap("<mrow/>")
     end
-    m
+    math
   end
-  def self.mathml_preserve_space(m, docnamespaces)
-    m.xpath(".//xmlns:mtext", docnamespaces).each do |x|
+  def self.mathml_preserve_space(math, docnamespaces)
+    math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
       x.children = x.children.to_xml.gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
     end
-    m
+    math
   end
-  def self.unitalic(m)
-    m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
-      x.wrap("<span style='font-style:normal;'></span>")
+  HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
+  def self.unitalic(math)
+    math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
+      x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
-      x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
+    math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
+      x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
-      x.wrap("<span class='nostem'><em></em></span>")
+    math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
+      x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
-      x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
+    math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
+      x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
     end
-    m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
-      toPlane1(x, :monospace)
+    math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
+      to_plane1(x, :monospace)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
-      toPlane1(x, :doublestruck)
+    math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
+      to_plane1(x, :doublestruck)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
-      toPlane1(x, :script)
+    math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
+      to_plane1(x, :script)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
-      toPlane1(x, :scriptbold)
+    math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
+      to_plane1(x, :scriptbold)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
-      toPlane1(x, :fraktur)
+    math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
+      to_plane1(x, :fraktur)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
-      toPlane1(x, :frakturbold)
+    math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
+      to_plane1(x, :frakturbold)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
-      toPlane1(x, :sans)
+    math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+      to_plane1(x, :sans)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
-      toPlane1(x, :sansbold)
+    math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+      to_plane1(x, :sansbold)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
-      toPlane1(x, :sansitalic)
+    math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+      to_plane1(x, :sansitalic)
     end
-    m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
-      toPlane1(x, :sansbolditalic)
+    math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
+      to_plane1(x, :sansbolditalic)
     end
-    m
+    math
   end
-  def self.toPlane1(x, font)
-    x.traverse do |n|
+  def self.to_plane1(xml, font)
+    xml.traverse do |n|
       next unless n.text?
       n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
     end
-    x
+    xml
   end
   def self.mathml_to_ooml(docxml)
     docnamespaces = docxml.collect_namespaces
     m = docxml.xpath("//*[local-name() = 'math']")
     m.each_with_index do |x, i|
-      i % 100 == 0 && m.size > 500 && i > 0 and
-        warn "Math OOXML #{i} of #{m.size}"
-      element = ooxml_cleanup(x, docnamespaces)
-      doc = Nokogiri::XML::Document::new()
-      doc.root = element
-      ooxml = (unitalic(esc_space(@xsltemplate.transform(doc)))).to_s.
-        gsub(/<\?[^>]+>\s*/, "").
-        gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
-        gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
-      ooxml = uncenter(x, ooxml)
-      x.swap(ooxml)
+      progress_conv(i, 100, m.size, 500, "Math OOXML")
+      mathml_to_ooml1(x, docnamespaces)
     end
   end
-  # escape space as &#x32;; we are removing any spaces generated by
+  # We need span and em not to be namespaced. Word can't deal with explicit
+  # namespaces.
+  # We will end up stripping them out again under Nokogiri 1.11, which correctly
+  # insists on inheriting namespace from parent.
+  def self.ooml_clean(xml)
+    xml.to_s
+      .gsub(/<\?[^>]+>\s*/, "")
+      .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
+      .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
+  end
+  def self.mathml_to_ooml1(xml, docnamespaces)
+    doc = Nokogiri::XML::Document::new
+    doc.root = ooxml_cleanup(xml, docnamespaces)
+      ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
+    ooxml = uncenter(xml, ooxml)
+    xml.swap(ooxml)
+  end
+  # escape space as &#x32;; we are removing any spaces generated by
   # XML indentation
   def self.esc_space(xml)
     xml.traverse do |n|
       next unless n.text?
       n = n.text.gsub(/ /, "&#x32;")
     end
     xml
@@ -143,17 +172,15 @@ module Html2Doc
   # if oomml has no siblings, by default it is centered; override this with
   # left/right if parent is so tagged
-  def self.uncenter(m, ooxml)
-    if m.next == nil && m.previous == nil
-      alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
-                       "local-name() = 'div' or local-name() = 'td']/@style")
-      return ooxml unless alignnode
-      if alignnode.text.include? ("text-align:left")
-        ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
-          "m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
-      elsif alignnode.text.include? ("text-align:right")
+  def self.uncenter(math, ooxml)
+    alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
+                        "local-name() = 'div' or local-name() = 'td']/@style")
+    return ooxml unless alignnode && (math.next == nil && math.previous == nil)
+    %w(left right).each do |dir|
+      if alignnode.text.include? ("text-align:#{dir}")
         ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
-          "m:val='right'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
+          "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
       end
     end
     ooxml

data/lib/html2doc/mime.rb CHANGED Viewed

@@ -7,19 +7,20 @@ require "fileutils"
 module Html2Doc
   def self.mime_preamble(boundary, filename, result)
     <<~"PREAMBLE"
-    MIME-Version: 1.0
-    Content-Type: multipart/related; boundary="#{boundary}"
+      MIME-Version: 1.0
+      Content-Type: multipart/related; boundary="#{boundary}"
-    --#{boundary}
-    Content-Location: file:///C:/Doc/#{File.basename(filename)}.htm
-    Content-Type: text/html; charset="utf-8"
+      --#{boundary}
+      Content-ID: <#{File.basename(filename)}>
+      Content-Disposition: inline; filename="#{File.basename(filename)}"
+      Content-Type: text/html; charset="utf-8"
-    #{result}
+      #{result}
     PREAMBLE
   end
-  def self.mime_attachment(boundary, filename, item, dir)
+  def self.mime_attachment(boundary, _filename, item, dir)
     content_type = mime_type(item)
     text_mode = %w[text application].any? { |p| content_type.start_with? p }
@@ -28,12 +29,13 @@ module Html2Doc
     encoded_file = Base64.strict_encode64(content).gsub(/(.{76})/, "\\1\n")
     <<~"FILE"
-    --#{boundary}
-    Content-Location: file:///C:/Doc/#{File.basename(filename)}_files/#{item}
-    Content-Transfer-Encoding: base64
-    Content-Type: #{content_type}
+      --#{boundary}
+      Content-ID: <#{File.basename(item)}>
+      Content-Disposition: inline; filename="#{File.basename(item)}"
+      Content-Transfer-Encoding: base64
+      Content-Type: #{content_type}
-    #{encoded_file}
+      #{encoded_file}
     FILE
   end
@@ -41,7 +43,7 @@ module Html2Doc
   def self.mime_type(item)
     types = MIME::Types.type_for(item)
     type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
-    type = type + ' charset="utf-8"' if /^text/.match(type) && types
+    type = %(#{type} charset="utf-8") if /^text/.match(type) && types
     type
   end
@@ -52,25 +54,37 @@ module Html2Doc
   def self.mime_package(result, filename, dir)
     boundary = mime_boundary
-    mhtml = mime_preamble(boundary, filename, result)
-    mhtml += mime_attachment(boundary, filename, "filelist.xml", dir)
+    mhtml = mime_preamble(boundary, "#{filename}.htm", result)
+    mhtml += mime_attachment(boundary, "#{filename}.htm", "filelist.xml", dir)
     Dir.foreach(dir) do |item|
       next if item == "." || item == ".." || /^\./.match(item) ||
         item == "filelist.xml"
-      mhtml += mime_attachment(boundary, filename, item, dir)
+      mhtml += mime_attachment(boundary, "#{filename}.htm", item, dir)
     end
     mhtml += "--#{boundary}--"
-    File.open("#{filename}.doc", "w:UTF-8") { |f| f.write mhtml }
+    File.open("#{filename}.doc", "w:UTF-8") { |f| f.write contentid(mhtml) }
+  end
+  def self.contentid(mhtml)
+    mhtml.gsub %r{(<img[^>]*?src=")([^\"']+)(['"])}m do |m|
+      repl = "#{$1}cid:#{File.basename($2)}#{$3}"
+      /^data:|^https?:/.match($2) ? m : repl
+    end.gsub %r{(<v:imagedata[^>]*?src=")([^\"']+)(['"])}m do |m|
+      repl = "#{$1}cid:#{File.basename($2)}#{$3}"
+      /^data:|^https?:/.match($2) ? m : repl
+    end
   end
   # max width for Word document is 400, max height is 680
-  def self.image_resize(i, path, maxheight, maxwidth)
-    realSize = ImageSize.path(path).size
-    s = [i["width"].to_i, i["height"].to_i]
-    s = realSize if s[0].zero? && s[1].zero?
-    return [nil, nil] if realSize.nil? || realSize[0].nil? || realSize[1].nil?
-    s[1] = s[0] * realSize[1] / realSize[0] if s[1].zero? && !s[0].zero?
-    s[0] = s[1] * realSize[0] / realSize[1] if s[0].zero? && !s[1].zero?
+  def self.image_resize(img, path, maxheight, maxwidth)
+    realsize = ImageSize.path(path).size
+    s = [img["width"].to_i, img["height"].to_i]
+    s = realsize if s[0].zero? && s[1].zero?
+    return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
+    s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
+    s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
     s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
     s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
     s
@@ -83,19 +97,22 @@ module Html2Doc
   end
   def self.warnsvg(src)
-    warn "#{src}: SVG not supported" if /\.svg$/i.match(src)
+    warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
+  end
+  def self.localname(src, localdir)
+    %r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
   end
   # only processes locally stored images
   def self.image_cleanup(docxml, dir, localdir)
     docxml.traverse do |i|
       next unless i.element? && %w(img v:imagedata).include?(i.name)
-      #warnsvg(i["src"])
-      next if /^http/.match i["src"]
-      next if %r{^data:(image|application)/[^;]+;base64}.match i["src"]
-      local_filename = %r{^([A-Z]:)?/}.match(i["src"]) ? i["src"] :
-        File.join(localdir, i["src"])
-      new_filename = "#{mkuuid}#{File.extname(i["src"])}"
+      next if /^http/.match? i["src"]
+      next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
+      local_filename = localname(i["src"], localdir)
+      new_filename = "#{mkuuid}#{File.extname(i['src'])}"
       FileUtils.cp local_filename, File.join(dir, new_filename)
       i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
       i["src"] = File.join(File.basename(dir), new_filename)
@@ -103,7 +120,7 @@ module Html2Doc
     docxml
   end
-  # do not parse the header through Nokogiri, since it will contain
+  # do not parse the header through Nokogiri, since it will contain
   # non-XML like <![if !supportFootnotes]>
   def self.header_image_cleanup(doc, dir, filename, localdir)
     doc.split(%r{(<img [^>]*>|<v:imagedata [^>]*>)}).each_slice(2).map do |a|
@@ -111,16 +128,14 @@ module Html2Doc
     end.join
   end
-  def self.header_image_cleanup1(a, dir, filename, localdir)
+  def self.header_image_cleanup1(a, dir, _filename, localdir)
     if a.size == 2 && !(/ src="https?:/.match a[1]) &&
         !(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
       m = / src=['"](?<src>[^"']+)['"]/.match a[1]
-      #warnsvg(m[:src])
       m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
       new_filename = "#{mkuuid}.#{m2[:suffix]}"
-      old_filename = %r{^([A-Z]:)?/}.match(m[:src]) ? m[:src] : File.join(localdir, m[:src])
-      FileUtils.cp old_filename, File.join(dir, new_filename)
-      a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='file:///C:/Doc/#{filename}_files/#{new_filename}'")
+      FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
+      a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
     end
     a.join
   end
@@ -131,6 +146,7 @@ module Html2Doc
         <o:MainFile HRef="../#{filename}.htm"/>}
       Dir.entries(dir).sort.each do |item|
         next if item == "." || item == ".." || /^\./.match(item)
         f.write %{  <o:File HRef="#{item}"/>\n}
       end
       f.write("</xml>\n")

data/lib/html2doc/notes.rb CHANGED Viewed

@@ -6,6 +6,7 @@ module Html2Doc
     fn = []
     docxml.xpath("//a").each do |a|
       next unless process_footnote_link(docxml, a, i, fn)
       i += 1
     end
     process_footnote_texts(docxml, fn)
@@ -22,13 +23,13 @@ module Html2Doc
     footnote_cleanup(docxml)
   end
-  def self.footnote_div_to_p(f)
-    if %w{div aside}.include? f.name
-      if f.at(".//p")
-        f.replace(f.children)
+  def self.footnote_div_to_p(elem)
+    if %w{div aside}.include? elem.name
+      if elem.at(".//p")
+        elem.replace(elem.children)
       else
-        f.name = "p"
-        f["class"] = "MsoFootnoteText"
+        elem.name = "p"
+        elem["class"] = "MsoFootnoteText"
       end
     end
   end
@@ -36,34 +37,39 @@ module Html2Doc
   FN = "<span class='MsoFootnoteReference'>"\
     "<span style='mso-special-character:footnote'/></span>".freeze
-  def self.footnote_container(docxml, i)
-    ref = docxml&.at("//a[@href='#_ftn#{i}']")&.children&.to_xml(indent: 0).
-      gsub(/>\n</, "><") || FN
+  def self.footnote_container(docxml, idx)
+    ref = docxml&.at("//a[@href='#_ftn#{idx}']")&.children&.to_xml(indent: 0)
+      &.gsub(/>\n</, "><") || FN
     <<~DIV
-      <div style='mso-element:footnote' id='ftn#{i}'>
-        <a style='mso-footnote-id:ftn#{i}' href='#_ftn#{i}'
-           name='_ftnref#{i}' title='' id='_ftnref#{i}'>#{ref.strip}</a></div>
+      <div style='mso-element:footnote' id='ftn#{idx}'>
+        <a style='mso-footnote-id:ftn#{idx}' href='#_ftn#{idx}'
+           name='_ftnref#{idx}' title='' id='_ftnref#{idx}'>#{ref.strip}</a></div>
     DIV
   end
-  def self.process_footnote_link(docxml, a, i, fn)
-    return false unless footnote?(a)
-    href = a["href"].gsub(/^#/, "")
+  def self.process_footnote_link(docxml, elem, idx, footnote)
+    return false unless footnote?(elem)
+    href = elem["href"].gsub(/^#/, "")
     note = docxml.at("//*[@name = '#{href}' or @id = '#{href}']")
     return false if note.nil?
-    set_footnote_link_attrs(a, i)
-    if a.at("./span[@class = 'MsoFootnoteReference']")
-      a.children.each do |c|
-        if c.name == "span" and c["class"] == "MsoFootnoteReference"
-          c.replace(FN)
-        else
-          c.wrap("<span class='MsoFootnoteReference'></span>")
-        end
+    set_footnote_link_attrs(elem, idx)
+    if elem.at("./span[@class = 'MsoFootnoteReference']")
+      process_footnote_link1(elem)
+    else elem.children = FN
+    end
+    footnote << transform_footnote_text(note)
+  end
+  def self.process_footnote_link1(elem)
+    elem.children.each do |c|
+      if c.name == "span" && c["class"] == "MsoFootnoteReference"
+        c.replace(FN)
+      else
+        c.wrap("<span class='MsoFootnoteReference'></span>")
       end
-    else
-      a.children = FN
     end
-    fn << transform_footnote_text(note)
   end
   def self.transform_footnote_text(note)
@@ -76,16 +82,16 @@ module Html2Doc
     note.remove
   end
-  def self.footnote?(a)
-    a["epub:type"]&.casecmp("footnote")&.zero? ||
-      a["class"]&.casecmp("footnote")&.zero?
+  def self.footnote?(elem)
+    elem["epub:type"]&.casecmp("footnote")&.zero? ||
+      elem["class"]&.casecmp("footnote")&.zero?
   end
-  def self.set_footnote_link_attrs(a, i)
-    a["style"] = "mso-footnote-id:ftn#{i}"
-    a["href"] = "#_ftn#{i}"
-    a["name"] = "_ftnref#{i}"
-    a["title"] = ""
+  def self.set_footnote_link_attrs(elem, idx)
+    elem["style"] = "mso-footnote-id:ftn#{idx}"
+    elem["href"] = "#_ftn#{idx}"
+    elem["name"] = "_ftnref#{idx}"
+    elem["title"] = ""
   end
   # We expect that the content of the footnote text received is one or
@@ -94,8 +100,8 @@ module Html2Doc
   # are present in the HTML, they need to have been cleaned out before
   # passing to this gem
   def self.footnote_cleanup(docxml)
-    docxml.xpath('//div[@style="mso-element:footnote"]/a').
-      each do |x|
+    docxml.xpath('//div[@style="mso-element:footnote"]/a')
+      .each do |x|
       n = x.next_element
       n&.children&.first&.add_previous_sibling(x.remove)
     end