html2doc 0.8.2 → 0.8.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/html2doc/base.rb +2 -2
- data/lib/html2doc/math.rb +1 -1
- data/lib/html2doc/mathml2omml.xsl +3822 -0
- data/lib/html2doc/mime.rb +8 -5
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +14 -4
- metadata +3 -2
data/lib/html2doc/mime.rb
CHANGED
@@ -2,6 +2,7 @@ require "uuidtools"
|
|
2
2
|
require "base64"
|
3
3
|
require "mime/types"
|
4
4
|
require "image_size"
|
5
|
+
require "fileutils"
|
5
6
|
|
6
7
|
module Html2Doc
|
7
8
|
def self.mime_preamble(boundary, filename, result)
|
@@ -20,7 +21,7 @@ module Html2Doc
|
|
20
21
|
|
21
22
|
def self.mime_attachment(boundary, filename, item, dir)
|
22
23
|
encoded_file = Base64.strict_encode64(
|
23
|
-
File.read("#{dir}/#{item}"),
|
24
|
+
File.read("#{dir}/#{item}", encoding: "utf-8"),
|
24
25
|
).gsub(/(.{76})/, "\\1\n")
|
25
26
|
<<~"FILE"
|
26
27
|
--#{boundary}
|
@@ -55,7 +56,7 @@ module Html2Doc
|
|
55
56
|
mhtml += mime_attachment(boundary, filename, item, dir)
|
56
57
|
end
|
57
58
|
mhtml += "--#{boundary}--"
|
58
|
-
File.open("#{filename}.doc", "w") { |f| f.write mhtml }
|
59
|
+
File.open("#{filename}.doc", "w:UTF-8") { |f| f.write mhtml }
|
59
60
|
end
|
60
61
|
|
61
62
|
# max height for Word document is 400, max width is 680
|
@@ -72,13 +73,14 @@ module Html2Doc
|
|
72
73
|
|
73
74
|
IMAGE_PATH = "//*[local-name() = 'img' or local-name() = 'imagedata']".freeze
|
74
75
|
|
76
|
+
# only processes locally stored images
|
75
77
|
def self.image_cleanup(docxml, dir)
|
76
78
|
docxml.xpath(IMAGE_PATH).each do |i|
|
79
|
+
next if /^http/.match i["src"]
|
77
80
|
matched = /\.(?<suffix>\S+)$/.match i["src"]
|
78
81
|
uuid = UUIDTools::UUID.random_create.to_s
|
79
82
|
new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}")
|
80
|
-
|
81
|
-
system "cp #{i['src']} #{new_full_filename}"
|
83
|
+
FileUtils.cp i["src"], new_full_filename
|
82
84
|
i["width"], i["height"] = image_resize(i, 400, 680)
|
83
85
|
i["src"] = new_full_filename
|
84
86
|
end
|
@@ -100,7 +102,8 @@ module Html2Doc
|
|
100
102
|
uuid = UUIDTools::UUID.random_create.to_s
|
101
103
|
new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
|
102
104
|
dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
|
103
|
-
system "cp #{matched[:src]} #{dest_filename}"
|
105
|
+
#system "cp #{matched[:src]} #{dest_filename}"
|
106
|
+
FileUtils.cp matched[:src], dest_filename
|
104
107
|
a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
|
105
108
|
end
|
106
109
|
a.join
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -364,7 +364,7 @@ RSpec.describe Html2Doc do
|
|
364
364
|
OUTPUT
|
365
365
|
end
|
366
366
|
|
367
|
-
|
367
|
+
it "processes a header with an image" do
|
368
368
|
Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
|
369
369
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).to match(%r{Content-Type: image/png})
|
370
370
|
end
|
@@ -517,7 +517,7 @@ RSpec.describe Html2Doc do
|
|
517
517
|
expect(testdoc).to match(%r{Content-Type: image/png})
|
518
518
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
519
519
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
520
|
-
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="
|
520
|
+
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="412" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
|
521
521
|
#{image_clean(WORD_FTR3)}
|
522
522
|
OUTPUT
|
523
523
|
end
|
@@ -529,7 +529,7 @@ RSpec.describe Html2Doc do
|
|
529
529
|
expect(testdoc).to match(%r{Content-Type: image/gif})
|
530
530
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
531
531
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
532
|
-
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="
|
532
|
+
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="680" height="202"></img>', '<div style="mso-element:footnote-list"/>'))}
|
533
533
|
#{image_clean(WORD_FTR3).gsub(/image\.png/, "image.gif")}
|
534
534
|
OUTPUT
|
535
535
|
end
|
@@ -541,7 +541,7 @@ RSpec.describe Html2Doc do
|
|
541
541
|
expect(testdoc).to match(%r{Content-Type: image/jpeg})
|
542
542
|
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
543
543
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
544
|
-
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="
|
544
|
+
#{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="122" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
|
545
545
|
#{image_clean(WORD_FTR3).gsub(/image\.png/, "image.jpg")}
|
546
546
|
OUTPUT
|
547
547
|
end
|
@@ -577,6 +577,16 @@ RSpec.describe Html2Doc do
|
|
577
577
|
expect(Html2Doc.image_resize(image, 100, 100)).to eq [30, 100]
|
578
578
|
end
|
579
579
|
|
580
|
+
it "does not move images if they are external URLs" do
|
581
|
+
simple_body = '<img src="https://example.com/19160-6.png">'
|
582
|
+
Html2Doc.process(html_input(simple_body), filename: "test")
|
583
|
+
testdoc = File.read("test.doc", encoding: "utf-8")
|
584
|
+
expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
|
585
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
586
|
+
#{image_clean(word_body('<img src="https://example.com/19160-6.png"></img>', '<div style="mso-element:footnote-list"/>'))}
|
587
|
+
#{image_clean(WORD_FTR1)}
|
588
|
+
OUTPUT
|
589
|
+
end
|
580
590
|
|
581
591
|
it "processes epub:type footnotes" do
|
582
592
|
simple_body = '<div>This is a very simple
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-08-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -310,6 +310,7 @@ files:
|
|
310
310
|
- lib/html2doc/base.rb
|
311
311
|
- lib/html2doc/lists.rb
|
312
312
|
- lib/html2doc/math.rb
|
313
|
+
- lib/html2doc/mathml2omml.xsl
|
313
314
|
- lib/html2doc/mime.rb
|
314
315
|
- lib/html2doc/mml2omml.xsl
|
315
316
|
- lib/html2doc/notes.rb
|