html2doc 0.8.2 → 0.8.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/html2doc/base.rb +2 -2
- data/lib/html2doc/math.rb +1 -1
- data/lib/html2doc/mathml2omml.xsl +3822 -0
- data/lib/html2doc/mime.rb +8 -5
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +14 -4
- metadata +3 -2
data/lib/html2doc/mime.rb
CHANGED
@@ -2,6 +2,7 @@ require "uuidtools"
|
|
2
2
|
require "base64"
|
3
3
|
require "mime/types"
|
4
4
|
require "image_size"
|
5
|
+
require "fileutils"
|
5
6
|
|
6
7
|
module Html2Doc
|
7
8
|
def self.mime_preamble(boundary, filename, result)
|
@@ -20,7 +21,7 @@ module Html2Doc
|
|
20
21
|
|
21
22
|
def self.mime_attachment(boundary, filename, item, dir)
|
22
23
|
encoded_file = Base64.strict_encode64(
|
23
|
-
File.read("#{dir}/#{item}"),
|
24
|
+
File.read("#{dir}/#{item}", encoding: "utf-8"),
|
24
25
|
).gsub(/(.{76})/, "\\1\n")
|
25
26
|
<<~"FILE"
|
26
27
|
--#{boundary}
|
@@ -55,7 +56,7 @@ module Html2Doc
|
|
55
56
|
mhtml += mime_attachment(boundary, filename, item, dir)
|
56
57
|
end
|
57
58
|
mhtml += "--#{boundary}--"
|
58
|
-
File.open("#{filename}.doc", "w") { |f| f.write mhtml }
|
59
|
+
File.open("#{filename}.doc", "w:UTF-8") { |f| f.write mhtml }
|
59
60
|
end
|
60
61
|
|
61
62
|
# max height for Word document is 400, max width is 680
|
@@ -72,13 +73,14 @@ module Html2Doc
|
|
72
73
|
|
73
74
|
IMAGE_PATH = "//*[local-name() = 'img' or local-name() = 'imagedata']".freeze
|
74
75
|
|
76
|
+
# only processes locally stored images
|
75
77
|
def self.image_cleanup(docxml, dir)
|
76
78
|
docxml.xpath(IMAGE_PATH).each do |i|
|
79
|
+
next if /^http/.match i["src"]
|
77
80
|
matched = /\.(?<suffix>\S+)$/.match i["src"]
|
78
81
|
uuid = UUIDTools::UUID.random_create.to_s
|
79
82
|
new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}")
|
80
|
-
|
81
|
-
system "cp #{i['src']} #{new_full_filename}"
|
83
|
+
FileUtils.cp i["src"], new_full_filename
|
82
84
|
i["width"], i["height"] = image_resize(i, 400, 680)
|
83
85
|
i["src"] = new_full_filename
|
84
86
|
end
|
@@ -100,7 +102,8 @@ module Html2Doc
|
|
100
102
|
uuid = UUIDTools::UUID.random_create.to_s
|
101
103
|
new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
|
102
104
|
dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
|
103
|
-
system "cp #{matched[:src]} #{dest_filename}"
|
105
|
+
#system "cp #{matched[:src]} #{dest_filename}"
|
106
|
+
FileUtils.cp matched[:src], dest_filename
|
104
107
|
a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
|
105
108
|
end
|
106
109
|
a.join
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -364,7 +364,7 @@ RSpec.describe Html2Doc do
|
|
364
364
|
OUTPUT
|
365
365
|
end
|
366
366
|
|
367
|
-
|
367
|
+
it "processes a header with an image" do
|
368
368
|
Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
|
369
369
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).to match(%r{Content-Type: image/png})
|
370
370
|
end
|
@@ -517,7 +517,7 @@ RSpec.describe Html2Doc do
|
|
517
517
|
expect(testdoc).to match(%r{Content-Type: image/png})
|
518
518
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
519
519
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
520
|
-
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="
|
520
|
+
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="412" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
|
521
521
|
#{image_clean(WORD_FTR3)}
|
522
522
|
OUTPUT
|
523
523
|
end
|
@@ -529,7 +529,7 @@ RSpec.describe Html2Doc do
|
|
529
529
|
expect(testdoc).to match(%r{Content-Type: image/gif})
|
530
530
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
531
531
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
532
|
-
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="
|
532
|
+
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="680" height="202"></img>', '<div style="mso-element:footnote-list"/>'))}
|
533
533
|
#{image_clean(WORD_FTR3).gsub(/image\.png/, "image.gif")}
|
534
534
|
OUTPUT
|
535
535
|
end
|
@@ -541,7 +541,7 @@ RSpec.describe Html2Doc do
|
|
541
541
|
expect(testdoc).to match(%r{Content-Type: image/jpeg})
|
542
542
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
543
543
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
544
|
-
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="
|
544
|
+
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="122" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
|
545
545
|
#{image_clean(WORD_FTR3).gsub(/image\.png/, "image.jpg")}
|
546
546
|
OUTPUT
|
547
547
|
end
|
@@ -577,6 +577,16 @@ RSpec.describe Html2Doc do
|
|
577
577
|
expect(Html2Doc.image_resize(image, 100, 100)).to eq [30, 100]
|
578
578
|
end
|
579
579
|
|
580
|
+
it "does not move images if they are external URLs" do
|
581
|
+
simple_body = '<img src="https://example.com/19160-6.png">'
|
582
|
+
Html2Doc.process(html_input(simple_body), filename: "test")
|
583
|
+
testdoc = File.read("test.doc", encoding: "utf-8")
|
584
|
+
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
585
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
586
|
+
#{image_clean(word_body('<img src="https://example.com/19160-6.png"></img>', '<div style="mso-element:footnote-list"/>'))}
|
587
|
+
#{image_clean(WORD_FTR1)}
|
588
|
+
OUTPUT
|
589
|
+
end
|
580
590
|
|
581
591
|
it "processes epub:type footnotes" do
|
582
592
|
simple_body = '<div>This is a very simple
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -310,6 +310,7 @@ files:
|
|
310
310
|
- lib/html2doc/base.rb
|
311
311
|
- lib/html2doc/lists.rb
|
312
312
|
- lib/html2doc/math.rb
|
313
|
+
- lib/html2doc/mathml2omml.xsl
|
313
314
|
- lib/html2doc/mime.rb
|
314
315
|
- lib/html2doc/mml2omml.xsl
|
315
316
|
- lib/html2doc/notes.rb
|