RubyGems - html2doc - Versions diffs - 1.2.1 → 1.3.0 - Mend

html2doc 1.2.1 → 1.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (6) hide show

checksums.yaml CHANGED Viewed

@@ -1,7 +1,7 @@
 ---
 SHA256:
-  metadata.gz: 99602f2e4d42bf9e809ccc26cdfdba8a602ead1dfbfb68876ca2e90683e091e9
-  data.tar.gz: aba931c818606124656a92a78760e8870291829fcd4476656bbd1fa656697855
+  metadata.gz: a71b394c280e43e4c661958ef48e0d1a7e26f05e9988e3a697837bd972b5a2f5
+  data.tar.gz: 243ef6cab6e2674befed8cc1d3190bc3448cceae4360604bacb956fe9bb72efe
 SHA512:
-  metadata.gz: 5e94fd597cd70658bd034f6a202c4010b31145b1da2375a59f6f35c648a278b6ad40221c85d6e19c25aaaf9da3e734c3d3fff33b6175226db4a1b468fea842ea
-  data.tar.gz: 30a8189d7440fa8742c2c83bc3bbff3ac2008e1fc7add0add4534bfaf341b1d80d12434b038051d729eb165aeca9199a1e79b4b9a430a397b386704a5121f461
+  metadata.gz: 80dab821665aeccf3c2f89a301af6fc63b911b79659c0c65ffceb4cbe7a1c637342b37d4803f3d41a842ace6d1c694d031d9fc38402a7adbce67d74c30bb15c6
+  data.tar.gz: 927dfe85cbbbf65da137465776dc1261364f6267e955b8d26f9fd5de994a79bea210b206dda32522fa758c7ffa0f50549f848d77694b83cbd506c28fc1111c78

data/.github/workflows/rake.yml CHANGED Viewed

@@ -10,23 +10,6 @@ on:
 jobs:
   rake:
-    name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
-    runs-on: ${{ matrix.os }}
-    continue-on-error: ${{ matrix.experimental }}
-    strategy:
-      fail-fast: false
-      matrix:
-        ruby: [ '3.0', '2.7', '2.6', '2.5' ]
-        os: [ ubuntu-latest, windows-latest, macos-latest ]
-        experimental: [ false ]
-    steps:
-      - uses: actions/checkout@v2
-        with:
-          submodules: true
-      - uses: ruby/setup-ruby@v1
-        with:
-          ruby-version: ${{ matrix.ruby }}
-          bundler-cache: true
-      - run: bundle exec rake
+    uses: metanorma/metanorma-build-scripts/.github/workflows/generic-rake.yml@main
+    secrets:
+      pat_token: ${{ secrets.METANORMA_CI_PAT_TOKEN }}

data/lib/html2doc/base.rb CHANGED Viewed

@@ -76,6 +76,8 @@ module Html2Doc
       xml = '<!DOCTYPE html SYSTEM
           "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
     end
+    xml = xml.gsub(/<!--\s*\[([^]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
+      .gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
     Nokogiri::XML.parse(xml)
   end
@@ -85,12 +87,16 @@ module Html2Doc
   def self.from_xhtml(xml)
     xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
-      .sub(DOCTYPE, "")
-      .gsub(%{ />}, "/>")
+      .sub(DOCTYPE, "").gsub(%{ />}, "/>")
+      .gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
+      .gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
+      .gsub("\n--&gt;\n", "\n-->\n")
   end
   def self.msword_fix(doc)
     # brain damage in MSWord parser
+    doc.gsub!(%r{<w:DoNotOptimizeForBrowser></w:DoNotOptimizeForBrowser>},
+              "<w:DoNotOptimizeForBrowser/>")
     doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
               '<span style="mso-special-character:footnote"></span>')
     doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
@@ -116,7 +122,7 @@ module Html2Doc
   end
   PRINT_VIEW = <<~XML.freeze
-    <!--[if gte mso 9]>
     <xml>
     <w:WordDocument>
     <w:View>Print</w:View>
@@ -124,8 +130,7 @@ module Html2Doc
     <w:DoNotOptimizeForBrowser/>
     </w:WordDocument>
     </xml>
-    <![endif]-->
-    <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
+    <meta http-equiv='Content-Type' content="text/html; charset=utf-8"/>
   XML
   def self.define_head1(docxml, _dir)
@@ -148,12 +153,16 @@ module Html2Doc
     end
   end
-  def self.stylesheet(_filename, _header_filename, fn)
-    (fn.nil? || fn.empty?) and
-      fn = File.join(File.dirname(__FILE__), "wordstyle.css")
-    stylesheet = File.read(fn, encoding: "UTF-8")
+  def self.stylesheet(_filename, _header_filename, cssname)
+    (cssname.nil? || cssname.empty?) and
+      cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
+    stylesheet = File.read(cssname, encoding: "UTF-8")
     xml = Nokogiri::XML("<style/>")
-    xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
+    #s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
+    #xml.children.first << Nokogiri::XML::Comment.new(xml, s)
+    xml.children.first << Nokogiri::XML::CDATA
+      .new(xml, "\n<!--\n#{stylesheet}\n-->\n")
     xml.root.to_s
   end

data/lib/html2doc/version.rb CHANGED Viewed

@@ -1,3 +1,3 @@
 module Html2Doc
-  VERSION = "1.2.1".freeze
+  VERSION = "1.3.0".freeze
 end

data/spec/html2doc_spec.rb CHANGED Viewed

@@ -41,7 +41,7 @@ WORD_HDR = <<~HDR.freeze
   Content-Type: text/html; charset="utf-8"
   <?xml version="1.0"?>
-  <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><!--[if gte mso 9]>
+  <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head>
   <xml>
   <w:WordDocument>
   <w:View>Print</w:View>
@@ -49,7 +49,6 @@ WORD_HDR = <<~HDR.freeze
   <w:DoNotOptimizeForBrowser/>
   </w:WordDocument>
   </xml>
-  <![endif]-->
   <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
     <link rel=File-List href="cid:filelist.xml"/>
@@ -278,6 +277,17 @@ RSpec.describe Html2Doc do
     expect(Html2Doc::VERSION).not_to be nil
   end
+  it "preserves Word HTML directives" do
+    Html2Doc.process(html_input(%[A<!--[if gte mso 9]>X<![endif]-->B]), filename: "test")
+    expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
+      .to match_fuzzy(<<~OUTPUT)
+        #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
+        #{word_body(%{A<!--[if gte mso 9]>X<![endif]-->B},
+                   '<div style="mso-element:footnote-list"/>')}
+        #{WORD_FTR1}
+      OUTPUT
+  end
   it "processes a blank document" do
     Html2Doc.process(html_input(""), filename: "test")
     expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
@@ -367,7 +377,8 @@ RSpec.describe Html2Doc do
     File.open("spec/header_img1.html", "w:UTF-8") do |f|
       f.write(
         doc.sub(%r{spec/19160-6.png},
-                File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png"))),
+                File.expand_path(File.join(File.dirname(__FILE__),
+                                           "19160-6.png"))),
       )
     end
     Html2Doc.process(html_input(""),
@@ -565,7 +576,8 @@ RSpec.describe Html2Doc do
   it "resizes images for height, in a file in a subdirectory" do
     simple_body = '<img src="19160-6.png">'
-    Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: "spec")
+    Html2Doc.process(html_input(simple_body), filename: "spec/test",
+                                              imagedir: "spec")
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/png})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
@@ -653,7 +665,8 @@ RSpec.describe Html2Doc do
   it "deals with absolute image locations" do
     simple_body = %{<img src="#{__dir__}/19160-6.png">}
-    Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: ".")
+    Html2Doc.process(html_input(simple_body), filename: "spec/test",
+                                              imagedir: ".")
     testdoc = File.read("spec/test.doc", encoding: "utf-8")
     expect(testdoc).to match(%r{Content-Type: image/png})
     expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)

metadata CHANGED Viewed

@@ -1,14 +1,14 @@
 --- !ruby/object:Gem::Specification
 name: html2doc
 version: !ruby/object:Gem::Version
-  version: 1.2.1
+  version: 1.3.0
 platform: ruby
 authors:
 - Ribose Inc.
 autorequire:
 bindir: bin
 cert_chain: []
-date: 2021-11-29 00:00:00.000000000 Z
+date: 2022-01-22 00:00:00.000000000 Z
 dependencies:
 - !ruby/object:Gem::Dependency
   name: asciimath
@@ -334,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
     - !ruby/object:Gem::Version
       version: '0'
 requirements: []
-rubygems_version: 3.2.22
+rubygems_version: 3.2.32
 signing_key:
 specification_version: 4
 summary: Convert HTML document to Microsoft Word document