html2doc 0.8.2 → 0.8.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
data/lib/html2doc/mime.rb CHANGED
@@ -2,6 +2,7 @@ require "uuidtools"
2
2
  require "base64"
3
3
  require "mime/types"
4
4
  require "image_size"
5
+ require "fileutils"
5
6
 
6
7
  module Html2Doc
7
8
  def self.mime_preamble(boundary, filename, result)
@@ -20,7 +21,7 @@ module Html2Doc
20
21
 
21
22
  def self.mime_attachment(boundary, filename, item, dir)
22
23
  encoded_file = Base64.strict_encode64(
23
- File.read("#{dir}/#{item}"),
24
+ File.read("#{dir}/#{item}", encoding: "utf-8"),
24
25
  ).gsub(/(.{76})/, "\\1\n")
25
26
  <<~"FILE"
26
27
  --#{boundary}
@@ -55,7 +56,7 @@ module Html2Doc
55
56
  mhtml += mime_attachment(boundary, filename, item, dir)
56
57
  end
57
58
  mhtml += "--#{boundary}--"
58
- File.open("#{filename}.doc", "w") { |f| f.write mhtml }
59
+ File.open("#{filename}.doc", "w:UTF-8") { |f| f.write mhtml }
59
60
  end
60
61
 
61
62
  # max height for Word document is 400, max width is 680
@@ -72,13 +73,14 @@ module Html2Doc
72
73
 
73
74
  IMAGE_PATH = "//*[local-name() = 'img' or local-name() = 'imagedata']".freeze
74
75
 
76
+ # only processes locally stored images
75
77
  def self.image_cleanup(docxml, dir)
76
78
  docxml.xpath(IMAGE_PATH).each do |i|
79
+ next if /^http/.match i["src"]
77
80
  matched = /\.(?<suffix>\S+)$/.match i["src"]
78
81
  uuid = UUIDTools::UUID.random_create.to_s
79
82
  new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}")
80
- # presupposes that the image source is local
81
- system "cp #{i['src']} #{new_full_filename}"
83
+ FileUtils.cp i["src"], new_full_filename
82
84
  i["width"], i["height"] = image_resize(i, 400, 680)
83
85
  i["src"] = new_full_filename
84
86
  end
@@ -100,7 +102,8 @@ module Html2Doc
100
102
  uuid = UUIDTools::UUID.random_create.to_s
101
103
  new_full_filename = "file:///C:/Doc/#{filename}_files/#{uuid}.#{matched2[:suffix]}"
102
104
  dest_filename = File.join(dir, "#{uuid}.#{matched2[:suffix]}")
103
- system "cp #{matched[:src]} #{dest_filename}"
105
+ #system "cp #{matched[:src]} #{dest_filename}"
106
+ FileUtils.cp matched[:src], dest_filename
104
107
  a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='#{new_full_filename}'")
105
108
  end
106
109
  a.join
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "0.8.2".freeze
2
+ VERSION = "0.8.3".freeze
3
3
  end
@@ -364,7 +364,7 @@ RSpec.describe Html2Doc do
364
364
  OUTPUT
365
365
  end
366
366
 
367
- it "processes a header with an image" do
367
+ it "processes a header with an image" do
368
368
  Html2Doc.process(html_input(""), filename: "test", header_file: "spec/header_img.html")
369
369
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).to match(%r{Content-Type: image/png})
370
370
  end
@@ -517,7 +517,7 @@ RSpec.describe Html2Doc do
517
517
  expect(testdoc).to match(%r{Content-Type: image/png})
518
518
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
519
519
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
520
- #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="400" height="387"></img>', '<div style="mso-element:footnote-list"/>'))}
520
+ #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.png" width="412" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
521
521
  #{image_clean(WORD_FTR3)}
522
522
  OUTPUT
523
523
  end
@@ -529,7 +529,7 @@ RSpec.describe Html2Doc do
529
529
  expect(testdoc).to match(%r{Content-Type: image/gif})
530
530
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
531
531
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
532
- #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="400" height="118"></img>', '<div style="mso-element:footnote-list"/>'))}
532
+ #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.gif" width="680" height="202"></img>', '<div style="mso-element:footnote-list"/>'))}
533
533
  #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.gif")}
534
534
  OUTPUT
535
535
  end
@@ -541,7 +541,7 @@ RSpec.describe Html2Doc do
541
541
  expect(testdoc).to match(%r{Content-Type: image/jpeg})
542
542
  expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
543
543
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
544
- #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="208" height="680"></img>', '<div style="mso-element:footnote-list"/>'))}
544
+ #{image_clean(word_body('<img src="test_files/cb7b0d19-891e-4634-815a-570d019d454c.jpg" width="122" height="400"></img>', '<div style="mso-element:footnote-list"/>'))}
545
545
  #{image_clean(WORD_FTR3).gsub(/image\.png/, "image.jpg")}
546
546
  OUTPUT
547
547
  end
@@ -577,6 +577,16 @@ RSpec.describe Html2Doc do
577
577
  expect(Html2Doc.image_resize(image, 100, 100)).to eq [30, 100]
578
578
  end
579
579
 
580
+ it "does not move images if they are external URLs" do
581
+ simple_body = '<img src="https://example.com/19160-6.png">'
582
+ Html2Doc.process(html_input(simple_body), filename: "test")
583
+ testdoc = File.read("test.doc", encoding: "utf-8")
584
+ expect(image_clean(guid_clean(testdoc))).to match_fuzzy(<<~OUTPUT)
585
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
586
+ #{image_clean(word_body('<img src="https://example.com/19160-6.png"></img>', '<div style="mso-element:footnote-list"/>'))}
587
+ #{image_clean(WORD_FTR1)}
588
+ OUTPUT
589
+ end
580
590
 
581
591
  it "processes epub:type footnotes" do
582
592
  simple_body = '<div>This is a very simple
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.2
4
+ version: 0.8.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2018-07-14 00:00:00.000000000 Z
11
+ date: 2018-08-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -310,6 +310,7 @@ files:
310
310
  - lib/html2doc/base.rb
311
311
  - lib/html2doc/lists.rb
312
312
  - lib/html2doc/math.rb
313
+ - lib/html2doc/mathml2omml.xsl
313
314
  - lib/html2doc/mime.rb
314
315
  - lib/html2doc/mml2omml.xsl
315
316
  - lib/html2doc/notes.rb