html2doc 0.8.2 → 0.8.3

Sign up to get free protection for your applications and to get access to all the features.
data/lib/html2doc/mime.rb CHANGED
@@ -2,6 +2,7 @@ require "uuidtools"
2
2
  require "base64"
3
3
  require "mime/types"
4
4
  require "image_size"
5
+ require "fileutils"
5
6
 
6
7
  module Html2Doc
7
8
  def self.mime_preamble(boundary, filename, result)
@@ -20,7 +21,7 @@ module Html2Doc
20
21
 
21
22
  def self.mime_attachment(boundary, filename, item, dir)
22
23
  encoded_file = Base64.strict_encode64(
23
- File.read("#{dir}/#{item}"),
24
+ File.read("#{dir}/#{item}", encoding: "utf-8"),
24
25
  ).gsub(/(.{76})/, "\\1\n")
25
26
  <<~"FILE"
26
27
  --#{boundary}
@@ -55,7 +56,7 @@ module Html2Doc
55
56
  mhtml += mime_attachment(boundary, filename, item, dir)
56
57
  end
57
58
  mhtml += "--#{boundary}--"
58
- File.open("#{filename}.doc", "w") { |f| f.write mhtml }
59
+ File.open("#{filename}.doc", "w:UTF-8") { |f| f.write mhtml }
59
60
  end
60
61
 
61
62
  # max height for Word document is 400, max width is 680
@@ -72,13 +73,14 @@ module Html2Doc
72
73
 
73
74
  IMAGE_PATH = "//*[local-name() = 'img' or local-name() = 'imagedata']".freeze
74
75
 
76
+ # only processes locally stored images
75
77
  def self.image_cleanup(docxml, dir)
76
78
  docxml.xpath(IMAGE_PATH).each do |i|
79
+ next if /^http/.match i["src"]
77
80
  matched = /\.(?<suffix>\S+)$/.match i["src"]
78
81
  uuid = UUIDTools::UUID.random_create.to_s
79
82
  new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}")
80
- # presupposes that the image source is local
81
- system "cp #{i['src']} #{new_full_filename}"
83
+ FileUtils.cp i["src"], new_full_filename
82
84
  i["width"], i["height"] = image_resize(i, 400, 680)
83
85
  i["src"] = new_full_filename
84
86
  end
@@ -100,7 +102,8 @@ module Html2Doc
100
102
  uuid = UUIDTools::UUID.random_create.to_s
101
103
  new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
102
104
  dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
103
- system "cp #{matched[:src]} #{dest_filename}"
105
+ #system "cp #{matched[:src]} #{dest_filename}"
106
+ FileUtils.cp matched[:src], dest_filename
104
107
  a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
105
108
  end
106
109
  a.join
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "0.8.2".freeze
2
+ VERSION = "0.8.3".freeze
3
3
  end
@@ -364,7 +364,7 @@ RSpec.describe Html2Doc do
364
364
  OUTPUT
365
365
  end
366
366
 
367
- it "processes a header with an image" do
367
+ it "processes a header with an image" do
368
368
  Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
369
369
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).to match(%r{Content-Type: image/png})
370
370
  end
@@ -517,7 +517,7 @@ RSpec.describe Html2Doc do
517
517
  expect(testdoc).to match(%r{Content-Type: image/png})
518
518
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
519
519
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
520
- #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="387"></img>', '<div style="mso-element:footnote-list"/>'))}
520
+ #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="412" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
521
521
  #{image_clean(WORD_FTR3)}
522
522
  OUTPUT
523
523
  end
@@ -529,7 +529,7 @@ RSpec.describe Html2Doc do
529
529
  expect(testdoc).to match(%r{Content-Type: image/gif})
530
530
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
531
531
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
532
- #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="400" height="118"></img>', '<div style="mso-element:footnote-list"/>'))}
532
+ #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="680" height="202"></img>', '<div style="mso-element:footnote-list"/>'))}
533
533
  #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.gif")}
534
534
  OUTPUT
535
535
  end
@@ -541,7 +541,7 @@ RSpec.describe Html2Doc do
541
541
  expect(testdoc).to match(%r{Content-Type: image/jpeg})
542
542
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
543
543
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
544
- #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="208" height="680"></img>', '<div style="mso-element:footnote-list"/>'))}
544
+ #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="122" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
545
545
  #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.jpg")}
546
546
  OUTPUT
547
547
  end
@@ -577,6 +577,16 @@ RSpec.describe Html2Doc do
577
577
  expect(Html2Doc.image_resize(image, 100, 100)).to eq [30, 100]
578
578
  end
579
579
 
580
+ it "does not move images if they are external URLs" do
581
+ simple_body = '<img src="https://example.com/19160-6.png">'
582
+ Html2Doc.process(html_input(simple_body), filename: "test")
583
+ testdoc = File.read("test.doc", encoding: "utf-8")
584
+ expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
585
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
586
+ #{image_clean(word_body('<img src="https://example.com/19160-6.png"></img>', '<div style="mso-element:footnote-list"/>'))}
587
+ #{image_clean(WORD_FTR1)}
588
+ OUTPUT
589
+ end
580
590
 
581
591
  it "processes epub:type footnotes" do
582
592
  simple_body = '<div>This is a very simple
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ version: 0.8.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-14 00:00:00.000000000 Z
11
+ date: 2018-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -310,6 +310,7 @@ files:
310
310
  - lib/html2doc/base.rb
311
311
  - lib/html2doc/lists.rb
312
312
  - lib/html2doc/math.rb
313
+ - lib/html2doc/mathml2omml.xsl
313
314
  - lib/html2doc/mime.rb
314
315
  - lib/html2doc/mml2omml.xsl
315
316
  - lib/html2doc/notes.rb