RubyGems - html2doc - Versions diffs - 1.5.3 → 1.5.4 - Mend

html2doc 1.5.3 → 1.5.4

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (5) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 114c21f3bbc33c244fb49577d4c85334789a388be61d65c04765d33ed1913208
-  data.tar.gz: 60c00e3a300eb16db1f2332393d5ff9a45f65c60a8a1d40a1cfaba2767744249
+  metadata.gz: 74b05f46f1fd365f9ff0766e95d884bd2959c01b92c70d4a080651adfc2e8d3c
+  data.tar.gz: f70eb009e705ff767b34922fc0444740be8dde80da8b78c503784e02be0e4560
 SHA512:
-  metadata.gz: 606e87a4dcfd0a3e270588461c6150dd38b1baad57bfc90b3965806477945deeae0b2eaeca87af9800b70d214133b11588a66f8205108e3af87f13464306b34a
-  data.tar.gz: ecc2ad3631a0cafb9ff3b7f04022b9a977efac71099f7b256f840ec6d15e418bb18f426c729d6a40e9a9cc196f3e02d4cf0c5c448a99c6097c2da9d6b2bfb4d0
+  metadata.gz: e3d93501d63bd27ed6e5245cb18dbc49013fcecd83bc57acf3a5d3c797636b928b91e148e33e8326f10f77f9b94a7175d85294eb86a1b4b2261aafb7dfe9d7a4
+  data.tar.gz: 4cbb8887089e622b9d9d1fd82dc4e5fd4e8e81a28a59dcf02ccce22cb3c9e6e7c4c7802177259557c268d697ced17ec09e4181e82c0dc851a613553e7f5b58c1

data/lib/html2doc/base.rb CHANGED Viewed

@@ -168,9 +168,7 @@ class Html2Doc
   end
   def stylesheet(_filename, _header_filename, cssname)
-    (cssname.nil? || cssname.empty?) and
-      cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
-    stylesheet = File.read(cssname, encoding: "UTF-8")
+    stylesheet = read_stylesheet(cssname)
     xml = Nokogiri::XML("<style/>")
     # s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
     # xml.children.first << Nokogiri::XML::Comment.new(xml, s)
@@ -180,6 +178,12 @@ class Html2Doc
     xml.root.to_s
   end
+  def read_stylesheet(cssname)
+    (cssname.nil? || cssname.empty?) and
+      cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
+    File.read(cssname, encoding: "UTF-8")
+  end
   def define_head(docxml)
     title = docxml.at("//*[local-name() = 'head']/*[local-name() = 'title']")
     head = docxml.at("//*[local-name() = 'head']")

data/lib/html2doc/mime.rb CHANGED Viewed

@@ -76,7 +76,6 @@ class Html2Doc
     end
   end
-  # max width for Word document is 400, max height is 680
   def image_resize(img, path, maxheight, maxwidth)
     s, realsize = get_image_size(img, path)
     return s if s[0] == nil && s[1] == nil
@@ -115,21 +114,89 @@ class Html2Doc
   # only processes locally stored images
   def image_cleanup(docxml, dir, localdir)
+    maxheight, maxwidth = page_dimensions(docxml)
     docxml.traverse do |i|
-      src = i["src"]
-      next unless i.element? && %w(img v:imagedata).include?(i.name)
-      next if src.nil? || src.empty? || /^http/.match?(src)
-      next if %r{^data:(image|application)/[^;]+;base64}.match? src
-      local_filename = localname(src, localdir)
-      new_filename = "#{mkuuid}#{File.extname(src)}"
-      FileUtils.cp local_filename, File.join(dir, new_filename)
-      i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
-      i["src"] = File.join(File.basename(dir), new_filename)
+      skip_image_cleanup?(i) and next
+      local_filename = rename_image(i, dir, localdir)
+      i["width"], i["height"] = image_resize(i, local_filename, maxheight,
+                                             maxwidth)
     end
     docxml
   end
+  def rename_image(img, dir, localdir)
+    local_filename = localname(img["src"], localdir)
+    new_filename = "#{mkuuid}#{File.extname(img['src'])}"
+    FileUtils.cp local_filename, File.join(dir, new_filename)
+    img["src"] = File.join(File.basename(dir), new_filename)
+    local_filename
+  end
+  def skip_image_cleanup?(img)
+    src = img["src"]
+    return true unless img.element? && %w(img v:imagedata).include?(img.name)
+    return true if src.nil? || src.empty? || /^http/.match?(src) ||
+      %r{^data:(image|application)/[^;]+;base64}.match?(src)
+    false
+  end
+  # we are going to use the 2nd instance of @page in the Word CSS,
+  # skipping the cover page. Currently doesn't deal with Landscape.
+  # Scan both @stylesheet and docxml.to_xml (where @standardstylesheet has ended up)
+  # Allow 0.9 * height to fit caption
+  def page_dimensions(docxml)
+    stylesheet = read_stylesheet(@stylesheet)
+    page_size = find_page_size_in_doc(stylesheet, docxml.to_xml) or
+      return [680, 400]
+    m_size = /size:\s*(\S+)\s+(\S+)\s*;/.match(page_size) or return [680, 400]
+    m_marg = /margin:\s*(\S+)\s+(\S+)\s*(\S+)\s*(\S+)\s*;/.match(page_size) or
+      return [680, 400]
+    [0.9 * (units_to_px(m_size[2]) - units_to_px(m_marg[1]) - units_to_px(m_marg[3])),
+     units_to_px(m_size[1]) - units_to_px(m_marg[2]) - units_to_px(m_marg[4])]
+  rescue StandardError
+    [680, 400]
+  end
+  def find_page_size_in_doc(stylesheet, doc)
+    find_page_size(stylesheet, "WordSection2", false) ||
+      find_page_size(stylesheet, "WordSection3", false) ||
+      find_page_size(doc, "WordSection2", true) ||
+      find_page_size(doc, "WordSection3", true) ||
+      find_page_size(stylesheet, "", false) || find_page_size(doc, "", true)
+  end
+  # if in_xml, CSS is embedded in XML <style> tag
+  def find_page_size(stylesheet, klass, in_xml)
+    xml_found = false
+    found = false
+    ret = ""
+    stylesheet&.lines&.each do |l|
+      in_xml && l.include?("<style") and xml_found = true and found = false
+      in_xml && l.include?("</style>") and xml_found = false
+      /^\s*@page\s+#{klass}/.match?(l) and found = true
+      found && /^\s*\{?size:/.match?(l) and ret += l
+      found && /^\s*\{?margin:/.match?(l) and ret += l
+      if found && /}/.match?(l)
+        !ret.blank? && (!in_xml || xml_found) and return ret
+        ret = ""
+        found = false
+      end
+    end
+    nil
+  end
+  def units_to_px(measure)
+    m = /^(\S+)(pt|cm)/.match(measure)
+    ret = case m[2]
+          when "px" then (m[1].to_f * 0.75)
+          when "pt" then m[1].to_f
+          when "cm" then (m[1].to_f * 28.346456693)
+          when "in" then (m[1].to_f * 72)
+          end
+    ret.to_i
+  end
   # do not parse the header through Nokogiri, since it will contain
   # non-XML like <![if !supportFootnotes]>
   def header_image_cleanup(doc, dir, filename, localdir)

data/lib/html2doc/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class Html2Doc
-  VERSION = "1.5.3".freeze
+  VERSION = "1.5.4".freeze
 end

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: html2doc
 version: !ruby/object:Gem::Version
-  version: 1.5.3
+  version: 1.5.4
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-04-10 00:00:00.000000000 Z
+date: 2023-05-19 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: htmlentities