RubyGems - html2doc - Versions diffs - 1.1.3 → 1.3.0 - Mend

html2doc 1.1.3 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (9) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: bfc410fe0fcf441579436aaf3befc8929176ee8f531e68606b4ca2252934e222
-  data.tar.gz: 8e7516c957ed9cb3de6bfffaab8b472333a5a727adce16c5fc5fda3e78c4767d
+  metadata.gz: a71b394c280e43e4c661958ef48e0d1a7e26f05e9988e3a697837bd972b5a2f5
+  data.tar.gz: 243ef6cab6e2674befed8cc1d3190bc3448cceae4360604bacb956fe9bb72efe
 SHA512:
-  metadata.gz: 623bd220a0631e9304ba49d586ea76824fd3a772b2afdfbf557903cafdecc942ba90a346a8947b818aa0d2e74043cedefb68a9ee859b4e1b4f76241236730086
-  data.tar.gz: 9dc7ea304707dd234fb5a2c16a5377583cce3b17839c90d51f5a172ebe704751a45ebe4dc832362d34ca1d7e44b96ce39f6afa943098fee7660db67c2049d6a0
+  metadata.gz: 80dab821665aeccf3c2f89a301af6fc63b911b79659c0c65ffceb4cbe7a1c637342b37d4803f3d41a842ace6d1c694d031d9fc38402a7adbce67d74c30bb15c6
+  data.tar.gz: 927dfe85cbbbf65da137465776dc1261364f6267e955b8d26f9fd5de994a79bea210b206dda32522fa758c7ffa0f50549f848d77694b83cbd506c28fc1111c78

data/.github/workflows/rake.yml CHANGED Viewed

@@ -10,23 +10,6 @@ on:
 jobs:
   rake:
-    name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    continue-on-error: ${{ matrix.experimental }}
-    strategy:
-      fail-fast: false
-      matrix:
-        ruby: [ '3.0', '2.7', '2.6', '2.5', '2.4' ]
-        os: [ ubuntu-latest, windows-latest, macos-latest ]
-        experimental: [ false ]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - uses: ruby/setup-ruby@v1
-        with:
-          ruby-version: ${{ matrix.ruby }}
-          bundler-cache: true
-      - run: bundle exec rake
+    uses: metanorma/metanorma-build-scripts/.github/workflows/generic-rake.yml@main
+    secrets:
+      pat_token: ${{ secrets.METANORMA_CI_PAT_TOKEN }}

data/.rubocop.yml CHANGED Viewed

@@ -7,4 +7,4 @@ inherit_from:
 # ...
 AllCops:
-  TargetRubyVersion: 2.4
+  TargetRubyVersion: 2.5

data/README.adoc CHANGED Viewed

@@ -58,11 +58,12 @@ There there are two other Microsoft Word vendors in the Ruby ecosystem.
 --
 require "html2doc"
-Html2Doc.process(result, filename: filename, stylesheet: stylesheet, header_filename: header_filename, dir: dir, asciimathdelims: asciimathdelims, liststyles: liststyles)
+Html2Doc.process(result, filename: filename, imagedir: imagedir, stylesheet: stylesheet, header_filename: header_filename, dir: dir, asciimathdelims: asciimathdelims, liststyles: liststyles)
 --
 result:: is the Html document to be converted into Word, as a string.
 filename:: is the name the document is to be saved as, without a file suffix
+imagedir:: base directory for local image file names in source XML
 stylesheet:: is the full path filename of the CSS stylesheet for Microsoft Word-specific styles. If this is not provided, the program will used the default stylesheet included in the gem, `lib/html2doc/wordstyle.css`. The stylsheet provided must match this stylesheet; you can obtain one by saving a Word document with your desired styles to HTML, and extracting the style definitions from the HTML document header.
 header_filename:: is the filename of the HTML document containing header and footer for the document, as well as footnote/endnote separators; if there is none, use nil. To generate your own such document, save a Word document with headers/footers and/or footnote/endnote separators as an HTML document; the `header.html` will be in the `{filename}.fld` folder generated along with the HTML. A sample file is available at https://github.com/metanorma/metanorma-iso/blob/master/lib/asciidoctor/iso/word/header.html
 dir:: is the folder that any ancillary files (images, headers, filelist) are to be saved to. If not provided, it will be created as `{filename}_files`. Anything in the directory will be attached to the Word document; so this folder should only contain the images that accompany the document. (If the images are elsewhere on the local drive, the gem will move them into the folder. External URL images are left alone, and are not downloaded.)

data/html2doc.gemspec CHANGED Viewed

@@ -22,18 +22,18 @@ Gem::Specification.new do |spec|
   spec.require_paths = ["lib"]
   spec.files         = `git ls-files`.split("\n")
   spec.test_files    = `git ls-files -- {spec}/*`.split("\n")
-  spec.required_ruby_version = Gem::Requirement.new(">= 2.4.0")
+  spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
   spec.add_dependency "asciimath", "~> 2.0.2"
   spec.add_dependency "htmlentities", "~> 4.3.4"
   spec.add_dependency "image_size"
   spec.add_dependency "mime-types"
-  spec.add_dependency "nokogiri", "~> 1.11"
+  spec.add_dependency "nokogiri", "~> 1.12"
   spec.add_dependency "plane1converter", "~> 0.0.1"
   spec.add_dependency "thread_safe"
   spec.add_dependency "uuidtools"
-  spec.add_development_dependency "byebug", "~> 9.1"
+  spec.add_development_dependency "debug"
   spec.add_development_dependency "equivalent-xml", "~> 0.6"
   spec.add_development_dependency "guard", "~> 2.14"
   spec.add_development_dependency "guard-rspec", "~> 4.7"

data/lib/html2doc/base.rb CHANGED Viewed

@@ -53,7 +53,7 @@ module Html2Doc
   def self.cleanup(docxml, hash)
     namespace(docxml.root)
-    image_cleanup(docxml, hash[:dir1], File.dirname(hash[:filename]))
+    image_cleanup(docxml, hash[:dir1], hash[:imagedir])
     mathml_to_ooml(docxml)
     lists(docxml, hash[:liststyles])
     footnotes(docxml)
@@ -76,6 +76,8 @@ module Html2Doc
       xml = '<!DOCTYPE html SYSTEM
           "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
     end
+    xml = xml.gsub(/<!--\s*\[([^]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
+      .gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
     Nokogiri::XML.parse(xml)
   end
@@ -85,12 +87,16 @@ module Html2Doc
   def self.from_xhtml(xml)
     xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
-      .sub(DOCTYPE, "")
-      .gsub(%{ />}, "/>")
+      .sub(DOCTYPE, "").gsub(%{ />}, "/>")
+      .gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
+      .gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
+      .gsub("\n--&gt;\n", "\n-->\n")
   end
   def self.msword_fix(doc)
     # brain damage in MSWord parser
+    doc.gsub!(%r{<w:DoNotOptimizeForBrowser></w:DoNotOptimizeForBrowser>},
+              "<w:DoNotOptimizeForBrowser/>")
     doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
               '<span style="mso-special-character:footnote"></span>')
     doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
@@ -116,7 +122,7 @@ module Html2Doc
   end
   PRINT_VIEW = <<~XML.freeze
-    <!--[if gte mso 9]>
     <xml>
     <w:WordDocument>
     <w:View>Print</w:View>
@@ -124,8 +130,7 @@ module Html2Doc
     <w:DoNotOptimizeForBrowser/>
     </w:WordDocument>
     </xml>
-    <![endif]-->
-    <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
+    <meta http-equiv='Content-Type' content="text/html; charset=utf-8"/>
   XML
   def self.define_head1(docxml, _dir)
@@ -148,12 +153,16 @@ module Html2Doc
     end
   end
-  def self.stylesheet(_filename, _header_filename, fn)
-    (fn.nil? || fn.empty?) and
-      fn = File.join(File.dirname(__FILE__), "wordstyle.css")
-    stylesheet = File.read(fn, encoding: "UTF-8")
+  def self.stylesheet(_filename, _header_filename, cssname)
+    (cssname.nil? || cssname.empty?) and
+      cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
+    stylesheet = File.read(cssname, encoding: "UTF-8")
     xml = Nokogiri::XML("<style/>")
-    xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
+    #s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
+    #xml.children.first << Nokogiri::XML::Comment.new(xml, s)
+    xml.children.first << Nokogiri::XML::CDATA
+      .new(xml, "\n<!--\n#{stylesheet}\n-->\n")
     xml.root.to_s
   end

data/lib/html2doc/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Html2Doc
-  VERSION = "1.1.3".freeze
+  VERSION = "1.3.0".freeze
 end

data/spec/html2doc_spec.rb CHANGED Viewed

@@ -41,7 +41,7 @@ WORD_HDR = <<~HDR.freeze
   Content-Type: text/html; charset="utf-8"
   <?xml version="1.0"?>
-  <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><!--[if gte mso 9]>
+  <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head>
   <xml>
   <w:WordDocument>
   <w:View>Print</w:View>
@@ -49,7 +49,6 @@ WORD_HDR = <<~HDR.freeze
   <w:DoNotOptimizeForBrowser/>
   </w:WordDocument>
   </xml>
-  <![endif]-->
   <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
     <link rel=File-List href="cid:filelist.xml"/>
@@ -278,6 +277,17 @@ RSpec.describe Html2Doc do
     expect(Html2Doc::VERSION).not_to be nil
   end
+  it "preserves Word HTML directives" do
+    Html2Doc.process(html_input(%[A<!--[if gte mso 9]>X<![endif]-->B]), filename: "test")
+    expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
+      .to match_fuzzy(<<~OUTPUT)
+        #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
+        #{word_body(%{A<!--[if gte mso 9]>X<![endif]-->B},
+                   '<div style="mso-element:footnote-list"/>')}
+        #{WORD_FTR1}
+      OUTPUT
+  end
   it "processes a blank document" do
     Html2Doc.process(html_input(""), filename: "test")
     expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
@@ -367,7 +377,8 @@ RSpec.describe Html2Doc do
     File.open("spec/header_img1.html", "w:UTF-8") do |f|
       f.write(
         doc.sub(%r{spec/19160-6.png},
-                File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png"))),
+                File.expand_path(File.join(File.dirname(__FILE__),
+                                           "19160-6.png"))),
       )
     end
     Html2Doc.process(html_input(""),
@@ -565,7 +576,8 @@ RSpec.describe Html2Doc do
   it "resizes images for height, in a file in a subdirectory" do
     simple_body = '<img src="19160-6.png">'
-    Html2Doc.process(html_input(simple_body), filename: "spec/test")
+    Html2Doc.process(html_input(simple_body), filename: "spec/test",
+                                              imagedir: "spec")
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/png})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
@@ -577,7 +589,7 @@ RSpec.describe Html2Doc do
   it "resizes images for width" do
     simple_body = '<img src="spec/19160-7.gif">'
-    Html2Doc.process(html_input(simple_body), filename: "test")
+    Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".")
     testdoc = File.read("test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/gif})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
@@ -589,7 +601,7 @@ RSpec.describe Html2Doc do
   it "resizes images for height" do
     simple_body = '<img src="spec/19160-8.jpg">'
-    Html2Doc.process(html_input(simple_body), filename: "test")
+    Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".")
     testdoc = File.read("test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/jpeg})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
@@ -642,7 +654,7 @@ RSpec.describe Html2Doc do
   it "does not move images if they are external URLs" do
     simple_body = '<img src="https://example.com/19160-6.png">'
-    Html2Doc.process(html_input(simple_body), filename: "test")
+    Html2Doc.process(html_input(simple_body), filename: "test", imagedir: ".")
     testdoc = File.read("test.doc", encoding: "utf-8")
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
       #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
@@ -653,7 +665,8 @@ RSpec.describe Html2Doc do
   it "deals with absolute image locations" do
     simple_body = %{<img src="#{__dir__}/19160-6.png">}
-    Html2Doc.process(html_input(simple_body), filename: "spec/test")
+    Html2Doc.process(html_input(simple_body), filename: "spec/test",
+                                              imagedir: ".")
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/png})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
@@ -836,7 +849,7 @@ RSpec.describe Html2Doc do
   it "test image base64 image encoding" do
     simple_body = '<img src="19160-6.png">'
     Html2Doc.process(html_input(simple_body),
-                     filename: "spec/test", debug: true)
+                     filename: "spec/test", debug: true, imagedir: "spec")
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     base64_image = testdoc[/image\/png\n\n(.*?)\n\n----/m, 1].gsub!("\n", "")
     base64_image_basename = testdoc[%r{Content-ID: <([0-9a-z\-]+)\.png}m, 1]

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: html2doc
 version: !ruby/object:Gem::Version
-  version: 1.1.3
+  version: 1.3.0
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-06-07 00:00:00.000000000 Z
+date: 2022-01-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: asciimath
@@ -72,14 +72,14 @@ dependencies:
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.11'
+        version: '1.12'
   type: :runtime
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
     - - "~>"
       - !ruby/object:Gem::Version
-        version: '1.11'
+        version: '1.12'
 - !ruby/object:Gem::Dependency
   name: plane1converter
   requirement: !ruby/object:Gem::Requirement
@@ -123,19 +123,19 @@ dependencies:
       - !ruby/object:Gem::Version
         version: '0'
 - !ruby/object:Gem::Dependency
-  name: byebug
+  name: debug
   requirement: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '9.1'
+        version: '0'
   type: :development
   prerelease: false
   version_requirements: !ruby/object:Gem::Requirement
     requirements:
-    - - "~>"
+    - - ">="
       - !ruby/object:Gem::Version
-        version: '9.1'
+        version: '0'
 - !ruby/object:Gem::Dependency
   name: equivalent-xml
   requirement: !ruby/object:Gem::Requirement
@@ -327,14 +327,14 @@ required_ruby_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
-      version: 2.4.0
+      version: 2.5.0
 required_rubygems_version: !ruby/object:Gem::Requirement
   requirements:
   - - ">="
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.1.4
+rubygems_version: 3.2.32
 signing_key:
 specification_version: 4
 summary: Convert HTML document to Microsoft Word document