RubyGems - html2doc - Versions diffs - 1.10.4 → 1.10.5 - Mend

html2doc 1.10.4 → 1.10.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (4) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 152467633084047452eb3055fb70033671821c5ba0b9918b9eff4a19373f8aea
-  data.tar.gz: 283ec00b5bb8660322c1cd0d55aa0f63bad3d4bbde9a1b23619d466ff681ae6f
+  metadata.gz: 891f686836c7fa66be96cbcde889b12c9a45df09ed3321e9b78fef625b8f4102
+  data.tar.gz: 0135175121e52e6ca97ed471b8659b36f79bcbf1ee72453bb51112b7dba738f3
 SHA512:
-  metadata.gz: 73004619c0d6f067037b411721fe580129a8a9ba781df3a180764b8283ab931ab8e82620605aeccffe9605ea9cee08b96caa9554fb176bbbd4adbd22339eaa78
-  data.tar.gz: a5fa1f0947456b12d2fa902a3beeddb05397b9d7f816ceb18214345b3c06364def154387eedf0878771ee7ef5dfde46b69a0f4faf4bbc1c10c1b7816503391f7
+  metadata.gz: 5d614fff856c3cadf255cb364aedb8d697a226fd1c8f6dd41431dd84c2b72aaf2b7616a7f0632a6ed30df1a8d6bbbbb0ef9a60b11f8103e63924f8c618dfae71
+  data.tar.gz: 9aa106cfb435df572e57303a3fad1c58531f8da9d4d728dd9a8fddbbbee6bf23360b3e1024722009b23710e1c052809dc3ea4eeaa197273b5b4efe8861847903

data/lib/html2doc/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class Html2Doc
-  VERSION = "1.10.4".freeze
+  VERSION = "1.10.5".freeze
 end

data/lib/html2doc/xml.rb CHANGED Viewed

@@ -15,19 +15,49 @@ class Html2Doc
     end
     xml = xml.gsub(/<!--\s*\[([^\<\]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
       .gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
+    # Escape & to &amp; in href attributes before XML parsing to prevent stripping
+    xml = escape_amp_in_hrefs(xml)
     Nokogiri::XML.parse(xml)
   end
+  # Escape plain & to &amp; in href attributes
+  # This prevents Nokogiri from stripping invalid HTML entities during XML parsing
+  def escape_amp_in_hrefs(html)
+    # Match href="..." and href='...' separately
+    html.gsub(/(href\s*=\s*")([^"]*)"|(href\s*=\s*')([^']*)'/) do
+      if Regexp.last_match(1)
+        "#{Regexp.last_match(1)}#{Regexp.last_match(2).gsub('&', '&amp;')}\""
+      else
+        "#{Regexp.last_match(3)}#{Regexp.last_match(4).gsub('&', '&amp;')}'"
+      end
+    end
+  end
   DOCTYPE = <<~DOCTYPE.freeze
     <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
   DOCTYPE
   def from_xhtml(xml)
-    xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
+    result = xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
       .sub(DOCTYPE, "").gsub(%{ />}, "/>")
       .gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
       .gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
       .gsub("\n--&gt;\n", "\n-->\n")
+    # Unescape &amp; to & in href attributes for proper URL handling
+    unescape_amp_in_hrefs(result)
+  end
+  # Unescape &amp; to & in href attributes only
+  # This ensures URLs work correctly in Word while preserving &amp; in text
+  def unescape_amp_in_hrefs(html)
+    # Match href="..." and href='...' separately
+    html.gsub(/(href\s*=\s*")([^"]*)"|(href\s*=\s*')([^']*)'/) do
+      if Regexp.last_match(1)
+        "#{Regexp.last_match(1)}#{Regexp.last_match(2).gsub('&amp;', '&')}\""
+      else
+        "#{Regexp.last_match(3)}#{Regexp.last_match(4).gsub('&amp;', '&')}'"
+      end
+    end
   end
   def msword_fix(doc)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: html2doc
 version: !ruby/object:Gem::Version
-  version: 1.10.4
+  version: 1.10.5
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2026-03-02 00:00:00.000000000 Z
+date: 2026-03-16 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: base64