html2doc 1.2.1 → 1.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +3 -20
- data/lib/html2doc/base.rb +19 -10
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +18 -5
- metadata +3 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: a71b394c280e43e4c661958ef48e0d1a7e26f05e9988e3a697837bd972b5a2f5
|
4
|
+
data.tar.gz: 243ef6cab6e2674befed8cc1d3190bc3448cceae4360604bacb956fe9bb72efe
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 80dab821665aeccf3c2f89a301af6fc63b911b79659c0c65ffceb4cbe7a1c637342b37d4803f3d41a842ace6d1c694d031d9fc38402a7adbce67d74c30bb15c6
|
7
|
+
data.tar.gz: 927dfe85cbbbf65da137465776dc1261364f6267e955b8d26f9fd5de994a79bea210b206dda32522fa758c7ffa0f50549f848d77694b83cbd506c28fc1111c78
|
data/.github/workflows/rake.yml
CHANGED
@@ -10,23 +10,6 @@ on:
|
|
10
10
|
|
11
11
|
jobs:
|
12
12
|
rake:
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
strategy:
|
17
|
-
fail-fast: false
|
18
|
-
matrix:
|
19
|
-
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
|
-
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
-
experimental: [ false ]
|
22
|
-
steps:
|
23
|
-
- uses: actions/checkout@v2
|
24
|
-
with:
|
25
|
-
submodules: true
|
26
|
-
|
27
|
-
- uses: ruby/setup-ruby@v1
|
28
|
-
with:
|
29
|
-
ruby-version: ${{ matrix.ruby }}
|
30
|
-
bundler-cache: true
|
31
|
-
|
32
|
-
- run: bundle exec rake
|
13
|
+
uses: metanorma/metanorma-build-scripts/.github/workflows/generic-rake.yml@main
|
14
|
+
secrets:
|
15
|
+
pat_token: ${{ secrets.METANORMA_CI_PAT_TOKEN }}
|
data/lib/html2doc/base.rb
CHANGED
@@ -76,6 +76,8 @@ module Html2Doc
|
|
76
76
|
xml = '<!DOCTYPE html SYSTEM
|
77
77
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
|
78
78
|
end
|
79
|
+
xml = xml.gsub(/<!--\s*\[([^]]+)\]>/, "<!-- MSWORD-COMMENT \\1 -->")
|
80
|
+
.gsub(/<!\s*\[endif\]\s*-->/, "<!-- MSWORD-COMMENT-END -->")
|
79
81
|
Nokogiri::XML.parse(xml)
|
80
82
|
end
|
81
83
|
|
@@ -85,12 +87,16 @@ module Html2Doc
|
|
85
87
|
|
86
88
|
def self.from_xhtml(xml)
|
87
89
|
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
88
|
-
.sub(DOCTYPE, "")
|
89
|
-
.gsub(
|
90
|
+
.sub(DOCTYPE, "").gsub(%{ />}, "/>")
|
91
|
+
.gsub(/<!-- MSWORD-COMMENT (.+?) -->/, "<!--[\\1]>")
|
92
|
+
.gsub(/<!-- MSWORD-COMMENT-END -->/, "<![endif]-->")
|
93
|
+
.gsub("\n-->\n", "\n-->\n")
|
90
94
|
end
|
91
95
|
|
92
96
|
def self.msword_fix(doc)
|
93
97
|
# brain damage in MSWord parser
|
98
|
+
doc.gsub!(%r{<w:DoNotOptimizeForBrowser></w:DoNotOptimizeForBrowser>},
|
99
|
+
"<w:DoNotOptimizeForBrowser/>")
|
94
100
|
doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
|
95
101
|
'<span style="mso-special-character:footnote"></span>')
|
96
102
|
doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
|
@@ -116,7 +122,7 @@ module Html2Doc
|
|
116
122
|
end
|
117
123
|
|
118
124
|
PRINT_VIEW = <<~XML.freeze
|
119
|
-
|
125
|
+
|
120
126
|
<xml>
|
121
127
|
<w:WordDocument>
|
122
128
|
<w:View>Print</w:View>
|
@@ -124,8 +130,7 @@ module Html2Doc
|
|
124
130
|
<w:DoNotOptimizeForBrowser/>
|
125
131
|
</w:WordDocument>
|
126
132
|
</xml>
|
127
|
-
|
128
|
-
<meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
|
133
|
+
<meta http-equiv='Content-Type' content="text/html; charset=utf-8"/>
|
129
134
|
XML
|
130
135
|
|
131
136
|
def self.define_head1(docxml, _dir)
|
@@ -148,12 +153,16 @@ module Html2Doc
|
|
148
153
|
end
|
149
154
|
end
|
150
155
|
|
151
|
-
def self.stylesheet(_filename, _header_filename,
|
152
|
-
(
|
153
|
-
|
154
|
-
stylesheet = File.read(
|
156
|
+
def self.stylesheet(_filename, _header_filename, cssname)
|
157
|
+
(cssname.nil? || cssname.empty?) and
|
158
|
+
cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
|
159
|
+
stylesheet = File.read(cssname, encoding: "UTF-8")
|
155
160
|
xml = Nokogiri::XML("<style/>")
|
156
|
-
|
161
|
+
#s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
|
162
|
+
#xml.children.first << Nokogiri::XML::Comment.new(xml, s)
|
163
|
+
xml.children.first << Nokogiri::XML::CDATA
|
164
|
+
.new(xml, "\n<!--\n#{stylesheet}\n-->\n")
|
165
|
+
|
157
166
|
xml.root.to_s
|
158
167
|
end
|
159
168
|
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -41,7 +41,7 @@ WORD_HDR = <<~HDR.freeze
|
|
41
41
|
Content-Type: text/html; charset="utf-8"
|
42
42
|
|
43
43
|
<?xml version="1.0"?>
|
44
|
-
<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head
|
44
|
+
<html xmlns:o="urn:schemas-microsoft-com:office:office" xmlns:w="urn:schemas-microsoft-com:office:word" xmlns:v="urn:schemas-microsoft-com:vml" xmlns:m="http://schemas.microsoft.com/office/2004/12/omml" xmlns="http://www.w3.org/TR/REC-html40"><head>
|
45
45
|
<xml>
|
46
46
|
<w:WordDocument>
|
47
47
|
<w:View>Print</w:View>
|
@@ -49,7 +49,6 @@ WORD_HDR = <<~HDR.freeze
|
|
49
49
|
<w:DoNotOptimizeForBrowser/>
|
50
50
|
</w:WordDocument>
|
51
51
|
</xml>
|
52
|
-
<![endif]-->
|
53
52
|
<meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
|
54
53
|
|
55
54
|
<link rel=File-List href="cid:filelist.xml"/>
|
@@ -278,6 +277,17 @@ RSpec.describe Html2Doc do
|
|
278
277
|
expect(Html2Doc::VERSION).not_to be nil
|
279
278
|
end
|
280
279
|
|
280
|
+
it "preserves Word HTML directives" do
|
281
|
+
Html2Doc.process(html_input(%[A<!--[if gte mso 9]>X<![endif]-->B]), filename: "test")
|
282
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
|
283
|
+
.to match_fuzzy(<<~OUTPUT)
|
284
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
285
|
+
#{word_body(%{A<!--[if gte mso 9]>X<![endif]-->B},
|
286
|
+
'<div style="mso-element:footnote-list"/>')}
|
287
|
+
#{WORD_FTR1}
|
288
|
+
OUTPUT
|
289
|
+
end
|
290
|
+
|
281
291
|
it "processes a blank document" do
|
282
292
|
Html2Doc.process(html_input(""), filename: "test")
|
283
293
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8")))
|
@@ -367,7 +377,8 @@ RSpec.describe Html2Doc do
|
|
367
377
|
File.open("spec/header_img1.html", "w:UTF-8") do |f|
|
368
378
|
f.write(
|
369
379
|
doc.sub(%r{spec/19160-6.png},
|
370
|
-
File.expand_path(File.join(File.dirname(__FILE__),
|
380
|
+
File.expand_path(File.join(File.dirname(__FILE__),
|
381
|
+
"19160-6.png"))),
|
371
382
|
)
|
372
383
|
end
|
373
384
|
Html2Doc.process(html_input(""),
|
@@ -565,7 +576,8 @@ RSpec.describe Html2Doc do
|
|
565
576
|
|
566
577
|
it "resizes images for height, in a file in a subdirectory" do
|
567
578
|
simple_body = '<img src="19160-6.png">'
|
568
|
-
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
579
|
+
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
580
|
+
imagedir: "spec")
|
569
581
|
testdoc = File.read("spec/test.doc", encoding: "utf-8")
|
570
582
|
expect(testdoc).to match(%r{Content-Type: image/png})
|
571
583
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
@@ -653,7 +665,8 @@ RSpec.describe Html2Doc do
|
|
653
665
|
|
654
666
|
it "deals with absolute image locations" do
|
655
667
|
simple_body = %{<img src="#{__dir__}/19160-6.png">}
|
656
|
-
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
668
|
+
Html2Doc.process(html_input(simple_body), filename: "spec/test",
|
669
|
+
imagedir: ".")
|
657
670
|
testdoc = File.read("spec/test.doc", encoding: "utf-8")
|
658
671
|
expect(testdoc).to match(%r{Content-Type: image/png})
|
659
672
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.3.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date:
|
11
|
+
date: 2022-01-22 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: asciimath
|
@@ -334,7 +334,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
334
334
|
- !ruby/object:Gem::Version
|
335
335
|
version: '0'
|
336
336
|
requirements: []
|
337
|
-
rubygems_version: 3.2.
|
337
|
+
rubygems_version: 3.2.32
|
338
338
|
signing_key:
|
339
339
|
specification_version: 4
|
340
340
|
summary: Convert HTML document to Microsoft Word document
|