html2doc 1.2.1 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 99602f2e4d42bf9e809ccc26cdfdba8a602ead1dfbfb68876ca2e90683e091e9
4
- data.tar.gz: aba931c818606124656a92a78760e8870291829fcd4476656bbd1fa656697855
3
+ metadata.gz: a71b394c280e43e4c661958ef48e0d1a7e26f05e9988e3a697837bd972b5a2f5
4
+ data.tar.gz: 243ef6cab6e2674befed8cc1d3190bc3448cceae4360604bacb956fe9bb72efe
5
5
  SHA512:
6
- metadata.gz: 5e94fd597cd70658bd034f6a202c4010b31145b1da2375a59f6f35c648a278b6ad40221c85d6e19c25aaaf9da3e734c3d3fff33b6175226db4a1b468fea842ea
7
- data.tar.gz: 30a8189d7440fa8742c2c83bc3bbff3ac2008e1fc7add0add4534bfaf341b1d80d12434b038051d729eb165aeca9199a1e79b4b9a430a397b386704a5121f461
6
+ metadata.gz: 80dab821665aeccf3c2f89a301af6fc63b911b79659c0c65ffceb4cbe7a1c637342b37d4803f3d41a842ace6d1c694d031d9fc38402a7adbce67d74c30bb15c6
7
+ data.tar.gz: 927dfe85cbbbf65da137465776dc1261364f6267e955b8d26f9fd5de994a79bea210b206dda32522fa758c7ffa0f50549f848d77694b83cbd506c28fc1111c78
@@ -10,23 +10,6 @@ on:
10
10
 
11
11
  jobs:
12
12
  rake:
13
- name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
14
- runs-on: ${{ matrix.os }}
15
- continue-on-error: ${{ matrix.experimental }}
16
- strategy:
17
- fail-fast: false
18
- matrix:
19
- ruby: [ '3.0', '2.7', '2.6', '2.5' ]
20
- os: [ ubuntu-latest, windows-latest, macos-latest ]
21
- experimental: [ false ]
22
- steps:
23
- - uses: actions/checkout@v2
24
- with:
25
- submodules: true
26
-
27
- - uses: ruby/setup-ruby@v1
28
- with:
29
- ruby-version: ${{ matrix.ruby }}
30
- bundler-cache: true
31
-
32
- - run: bundle exec rake
13
+ uses: metanorma/metanorma-build-scripts/.github/workflows/generic-rake.yml@main
14
+ secrets:
15
+ pat_token: ${{ secrets.METANORMA_CI_PAT_TOKEN }}
data/lib/html2doc/base.rb CHANGED
@@ -76,6 +76,8 @@ module Html2Doc
76
76
  xml = '<!DOCTYPE html SYSTEM
77
77
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
78
78
  end
79
+ xml = xml.gsub(/<!--\s*\[([^]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
80
+ .gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
79
81
  Nokogiri::XML.parse(xml)
80
82
  end
81
83
 
@@ -85,12 +87,16 @@ module Html2Doc
85
87
 
86
88
  def self.from_xhtml(xml)
87
89
  xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
88
- .sub(DOCTYPE, "")
89
- .gsub(%{ />}, "/>")
90
+ .sub(DOCTYPE, "").gsub(%{ />}, "/>")
91
+ .gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
92
+ .gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
93
+ .gsub("\n--&gt;\n", "\n-->\n")
90
94
  end
91
95
 
92
96
  def self.msword_fix(doc)
93
97
  # brain damage in MSWord parser
98
+ doc.gsub!(%r{<w:DoNotOptimizeForBrowser></w:DoNotOptimizeForBrowser>},
99
+ "<w:DoNotOptimizeForBrowser/>")
94
100
  doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
101
  '<span style="mso-special-character:footnote"></span>')
96
102
  doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
@@ -116,7 +122,7 @@ module Html2Doc
116
122
  end
117
123
 
118
124
  PRINT_VIEW = <<~XML.freeze
119
- <!--[if gte mso 9]>
125
+
120
126
  <xml>
121
127
  <w:WordDocument>
122
128
  <w:View>Print</w:View>
@@ -124,8 +130,7 @@ module Html2Doc
124
130
  <w:DoNotOptimizeForBrowser/>
125
131
  </w:WordDocument>
126
132
  </xml>
127
- <![endif]-->
128
- <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
133
+ <meta http-equiv='Content-Type' content="text/html; charset=utf-8"/>
129
134
  XML
130
135
 
131
136
  def self.define_head1(docxml, _dir)
@@ -148,12 +153,16 @@ module Html2Doc
148
153
  end
149
154
  end
150
155
 
151
- def self.stylesheet(_filename, _header_filename, fn)
152
- (fn.nil? || fn.empty?) and
153
- fn = File.join(File.dirname(__FILE__), "wordstyle.css")
154
- stylesheet = File.read(fn, encoding: "UTF-8")
156
+ def self.stylesheet(_filename, _header_filename, cssname)
157
+ (cssname.nil? || cssname.empty?) and
158
+ cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
159
+ stylesheet = File.read(cssname, encoding: "UTF-8")
155
160
  xml = Nokogiri::XML("<style/>")
156
- xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
161
+ #s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
162
+ #xml.children.first << Nokogiri::XML::Comment.new(xml, s)
163
+ xml.children.first << Nokogiri::XML::CDATA
164
+ .new(xml, "\n<!--\n#{stylesheet}\n-->\n")
165
+
157
166
  xml.root.to_s
158
167
  end
159
168
 
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "1.2.1".freeze
2
+ VERSION = "1.3.0".freeze
3
3
  end
@@ -41,7 +41,7 @@ WORD_HDR = <<~HDR.freeze
41
41
  Content-Type: text/html; charset="utf-8"
42
42
 
43
43
  <?xml version="1.0"?>
44
- <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head><!--[if gte mso 9]>
44
+ <html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head>
45
45
  <xml>
46
46
  <w:WordDocument>
47
47
  <w:View>Print</w:View>
@@ -49,7 +49,6 @@ WORD_HDR = <<~HDR.freeze
49
49
  <w:DoNotOptimizeForBrowser/>
50
50
  </w:WordDocument>
51
51
  </xml>
52
- <![endif]-->
53
52
  <meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
54
53
 
55
54
  <link rel=File-List href="cid:filelist.xml"/>
@@ -278,6 +277,17 @@ RSpec.describe Html2Doc do
278
277
  expect(Html2Doc::VERSION).not_to be nil
279
278
  end
280
279
 
280
+ it "preserves Word HTML directives" do
281
+ Html2Doc.process(html_input(%[A<!--[if gte mso 9]>X<![endif]-->B]), filename: "test")
282
+ expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
283
+ .to match_fuzzy(<<~OUTPUT)
284
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
285
+ #{word_body(%{A<!--[if gte mso 9]>X<![endif]-->B},
286
+ '<div style="mso-element:footnote-list"/>')}
287
+ #{WORD_FTR1}
288
+ OUTPUT
289
+ end
290
+
281
291
  it "processes a blank document" do
282
292
  Html2Doc.process(html_input(""), filename: "test")
283
293
  expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
@@ -367,7 +377,8 @@ RSpec.describe Html2Doc do
367
377
  File.open("spec/header_img1.html", "w:UTF-8") do |f|
368
378
  f.write(
369
379
  doc.sub(%r{spec/19160-6.png},
370
- File.expand_path(File.join(File.dirname(__FILE__), "19160-6.png"))),
380
+ File.expand_path(File.join(File.dirname(__FILE__),
381
+ "19160-6.png"))),
371
382
  )
372
383
  end
373
384
  Html2Doc.process(html_input(""),
@@ -565,7 +576,8 @@ RSpec.describe Html2Doc do
565
576
 
566
577
  it "resizes images for height, in a file in a subdirectory" do
567
578
  simple_body = '<img src="19160-6.png">'
568
- Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: "spec")
579
+ Html2Doc.process(html_input(simple_body), filename: "spec/test",
580
+ imagedir: "spec")
569
581
  testdoc = File.read("spec/test.doc", encoding: "utf-8")
570
582
  expect(testdoc).to match(%r{Content-Type: image/png})
571
583
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
@@ -653,7 +665,8 @@ RSpec.describe Html2Doc do
653
665
 
654
666
  it "deals with absolute image locations" do
655
667
  simple_body = %{<img src="#{__dir__}/19160-6.png">}
656
- Html2Doc.process(html_input(simple_body), filename: "spec/test", imagedir: ".")
668
+ Html2Doc.process(html_input(simple_body), filename: "spec/test",
669
+ imagedir: ".")
657
670
  testdoc = File.read("spec/test.doc", encoding: "utf-8")
658
671
  expect(testdoc).to match(%r{Content-Type: image/png})
659
672
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.2.1
4
+ version: 1.3.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2021-11-29 00:00:00.000000000 Z
11
+ date: 2022-01-22 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: asciimath
@@ -334,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
334
334
  - !ruby/object:Gem::Version
335
335
  version: '0'
336
336
  requirements: []
337
- rubygems_version: 3.2.22
337
+ rubygems_version: 3.2.32
338
338
  signing_key:
339
339
  specification_version: 4
340
340
  summary: Convert HTML document to Microsoft Word document