html2doc 1.2.1 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +3 -20
- data/lib/html2doc/base.rb +19 -10
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +18 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a71b394c280e43e4c661958ef48e0d1a7e26f05e9988e3a697837bd972b5a2f5
|
4
|
+
data.tar.gz: 243ef6cab6e2674befed8cc1d3190bc3448cceae4360604bacb956fe9bb72efe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80dab821665aeccf3c2f89a301af6fc63b911b79659c0c65ffceb4cbe7a1c637342b37d4803f3d41a842ace6d1c694d031d9fc38402a7adbce67d74c30bb15c6
|
7
|
+
data.tar.gz: 927dfe85cbbbf65da137465776dc1261364f6267e955b8d26f9fd5de994a79bea210b206dda32522fa758c7ffa0f50549f848d77694b83cbd506c28fc1111c78
|
data/.github/workflows/rake.yml
CHANGED
@@ -10,23 +10,6 @@ on:
|
|
10
10
|
|
11
11
|
jobs:
|
12
12
|
rake:
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
strategy:
|
17
|
-
fail-fast: false
|
18
|
-
matrix:
|
19
|
-
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
|
-
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
-
experimental: [ false ]
|
22
|
-
steps:
|
23
|
-
- uses: actions/checkout@v2
|
24
|
-
with:
|
25
|
-
submodules: true
|
26
|
-
|
27
|
-
- uses: ruby/setup-ruby@v1
|
28
|
-
with:
|
29
|
-
ruby-version: ${{ matrix.ruby }}
|
30
|
-
bundler-cache: true
|
31
|
-
|
32
|
-
- run: bundle exec rake
|
13
|
+
uses: metanorma/metanorma-build-scripts/.github/workflows/generic-rake.yml@main
|
14
|
+
secrets:
|
15
|
+
pat_token: ${{ secrets.METANORMA_CI_PAT_TOKEN }}
|
data/lib/html2doc/base.rb
CHANGED
@@ -76,6 +76,8 @@ module Html2Doc
|
|
76
76
|
xml = '<!DOCTYPE html SYSTEM
|
77
77
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
|
78
78
|
end
|
79
|
+
xml = xml.gsub(/<!--\s*\[([^]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
|
80
|
+
.gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
|
79
81
|
Nokogiri::XML.parse(xml)
|
80
82
|
end
|
81
83
|
|
@@ -85,12 +87,16 @@ module Html2Doc
|
|
85
87
|
|
86
88
|
def self.from_xhtml(xml)
|
87
89
|
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
88
|
-
.sub(DOCTYPE, "")
|
89
|
-
.gsub(
|
90
|
+
.sub(DOCTYPE, "").gsub(%{ />}, "/>")
|
91
|
+
.gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
|
92
|
+
.gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
|
93
|
+
.gsub("\n-->\n", "\n-->\n")
|
90
94
|
end
|
91
95
|
|
92
96
|
def self.msword_fix(doc)
|
93
97
|
# brain damage in MSWord parser
|
98
|
+
doc.gsub!(%r{<w:DoNotOptimizeForBrowser></w:DoNotOptimizeForBrowser>},
|
99
|
+
"<w:DoNotOptimizeForBrowser/>")
|
94
100
|
doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
|
95
101
|
'<span style="mso-special-character:footnote"></span>')
|
96
102
|
doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
|
@@ -116,7 +122,7 @@ module Html2Doc
|
|
116
122
|
end
|
117
123
|
|
118
124
|
PRINT_VIEW = <<~XML.freeze
|
119
|
-
|
125
|
+
|
120
126
|
<xml>
|
121
127
|
<w:WordDocument>
|
122
128
|
<w:View>Print</w:View>
|
@@ -124,8 +130,7 @@ module Html2Doc
|
|
124
130
|
<w:DoNotOptimizeForBrowser/>
|
125
131
|
</w:WordDocument>
|
126
132
|
</xml>
|
127
|
-
|
128
|
-
<meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
|
133
|
+
<meta http-equiv='Content-Type' content="text/html; charset=utf-8"/>
|
129
134
|
XML
|
130
135
|
|
131
136
|
def self.define_head1(docxml, _dir)
|
@@ -148,12 +153,16 @@ module Html2Doc
|
|
148
153
|
end
|
149
154
|
end
|
150
155
|
|
151
|
-
def self.stylesheet(_filename, _header_filename,
|
152
|
-
(
|
153
|
-
|
154
|
-
stylesheet = File.read(
|
156
|
+
def self.stylesheet(_filename, _header_filename, cssname)
|
157
|
+
(cssname.nil? || cssname.empty?) and
|
158
|
+
cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
|
159
|
+
stylesheet = File.read(cssname, encoding: "UTF-8")
|
155
160
|
xml = Nokogiri::XML("<style/>")
|
156
|
-
|
161
|
+
#s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
|
162
|
+
#xml.children.first << Nokogiri::XML::Comment.new(xml, s)
|
163
|
+
xml.children.first << Nokogiri::XML::CDATA
|
164
|
+
.new(xml, "\n<!--\n#{stylesheet}\n-->\n")
|
165
|
+
|
157
166
|
xml.root.to_s
|
158
167
|
end
|
159
168
|
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -41,7 +41,7 @@ WORD_HDR = <<~HDR.freeze
|
|
41
41
|
Content-Type: text/html; charset="utf-8"
|
42
42
|
|
43
43
|
<?xml version="1.0"?>
|
44
|
-
<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head
|
44
|
+
<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head>
|
45
45
|
<xml>
|
46
46
|
<w:WordDocument>
|
47
47
|
<w:View>Print</w:View>
|
@@ -49,7 +49,6 @@ WORD_HDR = <<~HDR.freeze
|
|
49
49
|
<w:DoNotOptimizeForBrowser/>
|
50
50
|
</w:WordDocument>
|
51
51
|
</xml>
|
52
|
-
<![endif]-->
|
53
52
|
<meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
|
54
53
|
|
55
54
|
<link rel=File-List href="cid:filelist.xml"/>
|
@@ -278,6 +277,17 @@ RSpec.describe Html2Doc do
|
|
278
277
|
expect(Html2Doc::VERSION).not_to be nil
|
279
278
|
end
|
280
279
|
|
280
|
+
it "preserves Word HTML directives" do
|
281
|
+
Html2Doc.process(html_input(%[A<!--[if gte mso 9]>X<![endif]-->B]), filename: "test")
|
282
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
|
283
|
+
.to match_fuzzy(<<~OUTPUT)
|
284
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
285
|
+
#{word_body(%{A<!--[if gte mso 9]>X<![endif]-->B},
|
286
|
+
'<div style="mso-element:footnote-list"/>')}
|
287
|
+
#{WORD_FTR1}
|
288
|
+
OUTPUT
|
289
|
+
end
|
290
|
+
|
281
291
|
it "processes a blank document" do
|
282
292
|
Html2Doc.process(html_input(""), filename: "test")
|
283
293
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
|
@@ -367,7 +377,8 @@ RSpec.describe Html2Doc do
|
|
367
377
|
File.open("spec/header_img1.html", "w:UTF-8") do |f|
|
368
378
|
f.write(
|
369
379
|
doc.sub(%r{spec/19160-6.png},
|
370
|
-
File.expand_path(File.join(File.dirname(__FILE__),
|
380
|
+
File.expand_path(File.join(File.dirname(__FILE__),
|
381
|
+
"19160-6.png"))),
|
371
382
|
)
|
372
383
|
end
|
373
384
|
Html2Doc.process(html_input(""),
|
@@ -565,7 +576,8 @@ RSpec.describe Html2Doc do
|
|
565
576
|
|
566
577
|
it "resizes images for height, in a file in a subdirectory" do
|
567
578
|
simple_body = '<img src="19160-6.png">'
|
568
|
-
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
579
|
+
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
580
|
+
imagedir: "spec")
|
569
581
|
testdoc = File.read("spec/test.doc", encoding: "utf-8")
|
570
582
|
expect(testdoc).to match(%r{Content-Type: image/png})
|
571
583
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
@@ -653,7 +665,8 @@ RSpec.describe Html2Doc do
|
|
653
665
|
|
654
666
|
it "deals with absolute image locations" do
|
655
667
|
simple_body = %{<img src="#{__dir__}/19160-6.png">}
|
656
|
-
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
668
|
+
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
669
|
+
imagedir: ".")
|
657
670
|
testdoc = File.read("spec/test.doc", encoding: "utf-8")
|
658
671
|
expect(testdoc).to match(%r{Content-Type: image/png})
|
659
672
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciimath
|
@@ -334,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
334
334
|
- !ruby/object:Gem::Version
|
335
335
|
version: '0'
|
336
336
|
requirements: []
|
337
|
-
rubygems_version: 3.2.
|
337
|
+
rubygems_version: 3.2.32
|
338
338
|
signing_key:
|
339
339
|
specification_version: 4
|
340
340
|
summary: Convert HTML document to Microsoft Word document
|