RubyGems - html2doc - Versions diffs - 1.5.4 → 1.5.5 - Mend

html2doc 1.5.4 → 1.5.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (7) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 74b05f46f1fd365f9ff0766e95d884bd2959c01b92c70d4a080651adfc2e8d3c
-  data.tar.gz: f70eb009e705ff767b34922fc0444740be8dde80da8b78c503784e02be0e4560
+  metadata.gz: 46856bf56ad5dd95f8f5781dc11049bb4600060c28c49715a262837ece8028bf
+  data.tar.gz: 866ba19867f233b45aeee436df719679623d671902b30476b61952f7a6357e1f
 SHA512:
-  metadata.gz: e3d93501d63bd27ed6e5245cb18dbc49013fcecd83bc57acf3a5d3c797636b928b91e148e33e8326f10f77f9b94a7175d85294eb86a1b4b2261aafb7dfe9d7a4
-  data.tar.gz: 4cbb8887089e622b9d9d1fd82dc4e5fd4e8e81a28a59dcf02ccce22cb3c9e6e7c4c7802177259557c268d697ced17ec09e4181e82c0dc851a613553e7f5b58c1
+  metadata.gz: b949f47c356437ce418f65ce7fd1c497648d0d0e960fe1e05d7318d280ddf6de23ddad8e5ab94a18f447b3eaba948b2a4db69ca2d00f0dcfebd692933a64c1da
+  data.tar.gz: 953999bd39aa1c1b6a0e1a34c939dcbdba01242c898f5a587eab546b4c9a4578e8051c9c68b49900390fcb3ab5d2de4ad6f0f1bce1af5a8ee6e7bd4daf300966

data/lib/html2doc/base.rb CHANGED Viewed

@@ -30,8 +30,7 @@ class Html2Doc
   end
   def process_header(headerfile)
-    return if headerfile.nil?
+    headerfile.nil? and return
     doc = File.read(headerfile, encoding: "utf-8")
     doc = header_image_cleanup(doc, @dir1, @filename,
                                File.dirname(@filename))
@@ -66,6 +65,7 @@ class Html2Doc
   end
   def cleanup(docxml)
+    locate_landscape(docxml)
     namespace(docxml.root)
     image_cleanup(docxml, @dir1, @imagedir)
     mathml_to_ooml(docxml)
@@ -76,76 +76,11 @@ class Html2Doc
     docxml
   end
-  NOKOHEAD = <<~HERE.freeze
-    <!DOCTYPE html SYSTEM
-    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-    <html xmlns="http://www.w3.org/1999/xhtml">
-    <head> <title></title> <meta charset="UTF-8" /> </head>
-    <body> </body> </html>
-  HERE
-  def to_xhtml(xml)
-    xml.gsub!(/<\?xml[^>]*>/, "")
-    unless /<!DOCTYPE /.match? xml
-      xml = '<!DOCTYPE html SYSTEM
-          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
-    end
-    xml = xml.gsub(/<!--\s*\[([^\]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
-      .gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
-    Nokogiri::XML.parse(xml)
-  end
-  DOCTYPE = <<~"DOCTYPE".freeze
-    <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-  DOCTYPE
-  def from_xhtml(xml)
-    xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
-      .sub(DOCTYPE, "").gsub(%{ />}, "/>")
-      .gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
-      .gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
-      .gsub("\n--&gt;\n", "\n-->\n")
-  end
-  def msword_fix(doc)
-    # brain damage in MSWord parser
-    doc.gsub!(%r{<w:DoNotOptimizeForBrowser></w:DoNotOptimizeForBrowser>},
-              "<w:DoNotOptimizeForBrowser/>")
-    doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
-              '<span style="mso-special-character:footnote"></span>')
-    doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
-              '<div style="mso-element:footnote-list"/>')
-    doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
-    doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
-    doc.gsub!(%r{<meta http-equiv="Content-Type"},
-              "<meta http-equiv=Content-Type")
-    doc.gsub!(%r{></m:jc>}, "/>")
-    doc.gsub!(%r{></v:stroke>}, "/>")
-    doc.gsub!(%r{></v:f>}, "/>")
-    doc.gsub!(%r{></v:path>}, "/>")
-    doc.gsub!(%r{></o:lock>}, "/>")
-    doc.gsub!(%r{></v:imagedata>}, "/>")
-    doc.gsub!(%r{></w:wrap>}, "/>")
-    doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2")
-    doc.gsub!(%r{&tab;|&amp;tab;},
-              '<span style="mso-tab-count:1">&#xA0; </span>')
-    doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
-      a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
-      a
-    end.join
-  end
-  PRINT_VIEW = <<~XML.freeze
-    <xml>
-    <w:WordDocument>
-    <w:View>Print</w:View>
-    <w:Zoom>100</w:Zoom>
-    <w:DoNotOptimizeForBrowser/>
-    </w:WordDocument>
-    </xml>
-    <meta http-equiv='Content-Type' content="text/html; charset=utf-8"/>
-  XML
+  def locate_landscape(_docxml)
+    css = read_stylesheet(@stylesheet)
+    @landscape = css.scan(/div\.\S+\s+\{\s*page:\s*[^;]+L;\s*\}/m)
+      .map { |e| e.sub(/^div\.(\S+).*$/m, "\\1") }
+  end
   def define_head1(docxml, _dir)
     docxml.xpath("//*[local-name() = 'head']").each do |h|
@@ -174,7 +109,6 @@ class Html2Doc
     # xml.children.first << Nokogiri::XML::Comment.new(xml, s)
     xml.children.first << Nokogiri::XML::CDATA
       .new(xml, "\n<!--\n#{stylesheet}\n-->\n")
     xml.root.to_s
   end
@@ -199,30 +133,15 @@ class Html2Doc
       head.add_child css
     elsif title.nil?
       head.children.first.add_previous_sibling css
-    else
-      title.add_next_sibling css
+    else title.add_next_sibling css
     end
   end
-  def namespace(root)
-    {
-      o: "urn:schemas-microsoft-com:office:office",
-      w: "urn:schemas-microsoft-com:office:word",
-      v: "urn:schemas-microsoft-com:vml",
-      m: "http://schemas.microsoft.com/office/2004/12/omml",
-    }.each { |k, v| root.add_namespace_definition(k.to_s, v) }
-  end
-  def rootnamespace(root)
-    root.add_namespace(nil, "http://www.w3.org/TR/REC-html40")
-  end
   def bookmarks(docxml)
     docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
       .each do |x|
-      next if x["id"].empty? ||
-        %w(shapetype v:shapetype shape v:shape).include?(x.name)
+      (x["id"].empty? ||
+        %w(shapetype v:shapetype shape v:shape).include?(x.name)) and next
       if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
       else x.children.first.previous = "<a name='#{x['id']}'></a>"
       end

data/lib/html2doc/mime.rb CHANGED Viewed

@@ -78,8 +78,7 @@ class Html2Doc
   def image_resize(img, path, maxheight, maxwidth)
     s, realsize = get_image_size(img, path)
-    return s if s[0] == nil && s[1] == nil
+    s[0] == nil && s[1] == nil and return s
     if img.name == "svg" && !img["viewBox"]
       img["viewBox"] = "0 0 #{s[0]} #{s[1]}"
     end
@@ -118,12 +117,24 @@ class Html2Doc
     docxml.traverse do |i|
       skip_image_cleanup?(i) and next
       local_filename = rename_image(i, dir, localdir)
-      i["width"], i["height"] = image_resize(i, local_filename, maxheight,
-                                             maxwidth)
+      i["width"], i["height"] =
+        if landscape?(i)
+          image_resize(i, local_filename, maxwidth, maxheight)
+        else
+          image_resize(i, local_filename, maxheight, maxwidth)
+        end
     end
     docxml
   end
+  def landscape?(img)
+    img.ancestors.each do |a|
+      a.name == "div" or next
+      @landscape.include?(a["class"]) and return true
+    end
+    false
+  end
   def rename_image(img, dir, localdir)
     local_filename = localname(img["src"], localdir)
     new_filename = "#{mkuuid}#{File.extname(img['src'])}"
@@ -134,10 +145,9 @@ class Html2Doc
   def skip_image_cleanup?(img)
     src = img["src"]
-    return true unless img.element? && %w(img v:imagedata).include?(img.name)
-    return true if src.nil? || src.empty? || /^http/.match?(src) ||
-      %r{^data:(image|application)/[^;]+;base64}.match?(src)
+    (img.element? && %w(img v:imagedata).include?(img.name)) or return true
+    (src.nil? || src.empty? || /^http/.match?(src) ||
+      %r{^data:(image|application)/[^;]+;base64}.match?(src)) and return true
     false
   end
@@ -222,8 +232,7 @@ class Html2Doc
       f.write %{<xml xmlns:o="urn:schemas-microsoft-com:office:office">
         <o:MainFile HRef="../#{filename}.htm"/>}
       Dir.entries(dir).sort.each do |item|
-        next if item == "." || item == ".." || /^\./.match(item)
+        (item == "." || item == ".." || /^\./.match(item)) and next
         f.write %{  <o:File HRef="#{item}"/>\n}
       end
       f.write("</xml>\n")

data/lib/html2doc/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 class Html2Doc
-  VERSION = "1.5.4".freeze
+  VERSION = "1.5.5".freeze
 end

data/lib/html2doc/xml.rb ADDED Viewed

@@ -0,0 +1,83 @@
+class Html2Doc
+  NOKOHEAD = <<~HERE.freeze
+    <!DOCTYPE html SYSTEM
+    "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+    <html xmlns="http://www.w3.org/1999/xhtml">
+    <head> <title></title> <meta charset="UTF-8" /> </head>
+    <body> </body> </html>
+  HERE
+  def to_xhtml(xml)
+    xml.gsub!(/<\?xml[^>]*>/, "")
+    unless /<!DOCTYPE /.match? xml
+      xml = '<!DOCTYPE html SYSTEM
+          "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
+    end
+    xml = xml.gsub(/<!--\s*\[([^\]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
+      .gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
+    Nokogiri::XML.parse(xml)
+  end
+  DOCTYPE = <<~DOCTYPE.freeze
+    <!DOCTYPE html SYSTEM "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+  DOCTYPE
+  def from_xhtml(xml)
+    xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
+      .sub(DOCTYPE, "").gsub(%{ />}, "/>")
+      .gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
+      .gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
+      .gsub("\n--&gt;\n", "\n-->\n")
+  end
+  def msword_fix(doc)
+    # brain damage in MSWord parser
+    doc.gsub!(%r{<w:DoNotOptimizeForBrowser></w:DoNotOptimizeForBrowser>},
+              "<w:DoNotOptimizeForBrowser/>")
+    doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
+              '<span style="mso-special-character:footnote"></span>')
+    doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
+              '<div style="mso-element:footnote-list"/>')
+    doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
+    doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
+    doc.gsub!(%r{<meta http-equiv="Content-Type"},
+              "<meta http-equiv=Content-Type")
+    doc.gsub!(%r{></m:jc>}, "/>")
+    doc.gsub!(%r{></v:stroke>}, "/>")
+    doc.gsub!(%r{></v:f>}, "/>")
+    doc.gsub!(%r{></v:path>}, "/>")
+    doc.gsub!(%r{></o:lock>}, "/>")
+    doc.gsub!(%r{></v:imagedata>}, "/>")
+    doc.gsub!(%r{></w:wrap>}, "/>")
+    doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2")
+    doc.gsub!(%r{&tab;|&amp;tab;},
+              '<span style="mso-tab-count:1">&#xA0; </span>')
+    doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
+      a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
+      a
+    end.join
+  end
+  PRINT_VIEW = <<~XML.freeze
+    <xml>
+    <w:WordDocument>
+    <w:View>Print</w:View>
+    <w:Zoom>100</w:Zoom>
+    <w:DoNotOptimizeForBrowser/>
+    </w:WordDocument>
+    </xml>
+    <meta http-equiv='Content-Type' content="text/html; charset=utf-8"/>
+  XML
+  def namespace(root)
+    { o: "urn:schemas-microsoft-com:office:office",
+      w: "urn:schemas-microsoft-com:office:word",
+      v: "urn:schemas-microsoft-com:vml",
+      m: "http://schemas.microsoft.com/office/2004/12/omml" }.each { |k, v| root.add_namespace_definition(k.to_s, v) }
+  end
+  def rootnamespace(root)
+    root.add_namespace(nil, "http://www.w3.org/TR/REC-html40")
+  end
+end

data/lib/html2doc.rb CHANGED Viewed

@@ -4,3 +4,4 @@ require_relative "html2doc/mime"
 require_relative "html2doc/notes"
 require_relative "html2doc/math"
 require_relative "html2doc/lists"
+require_relative "html2doc/xml"

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: html2doc
 version: !ruby/object:Gem::Version
-  version: 1.5.4
+  version: 1.5.5
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2023-05-19 00:00:00.000000000 Z
+date: 2023-06-29 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: htmlentities
@@ -293,6 +293,7 @@ files:
 - lib/html2doc/notes.rb
 - lib/html2doc/version.rb
 - lib/html2doc/wordstyle.css
+- lib/html2doc/xml.rb
 homepage: https://github.com/metanorma/html2doc
 licenses:
 - CC-BY-SA-3.0