html2doc 1.5.3 → 1.5.4
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/html2doc/base.rb +7 -3
- data/lib/html2doc/mime.rb +78 -11
- data/lib/html2doc/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74b05f46f1fd365f9ff0766e95d884bd2959c01b92c70d4a080651adfc2e8d3c
|
4
|
+
data.tar.gz: f70eb009e705ff767b34922fc0444740be8dde80da8b78c503784e02be0e4560
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3d93501d63bd27ed6e5245cb18dbc49013fcecd83bc57acf3a5d3c797636b928b91e148e33e8326f10f77f9b94a7175d85294eb86a1b4b2261aafb7dfe9d7a4
|
7
|
+
data.tar.gz: 4cbb8887089e622b9d9d1fd82dc4e5fd4e8e81a28a59dcf02ccce22cb3c9e6e7c4c7802177259557c268d697ced17ec09e4181e82c0dc851a613553e7f5b58c1
|
data/lib/html2doc/base.rb
CHANGED
@@ -168,9 +168,7 @@ class Html2Doc
|
|
168
168
|
end
|
169
169
|
|
170
170
|
def stylesheet(_filename, _header_filename, cssname)
|
171
|
-
|
172
|
-
cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
|
173
|
-
stylesheet = File.read(cssname, encoding: "UTF-8")
|
171
|
+
stylesheet = read_stylesheet(cssname)
|
174
172
|
xml = Nokogiri::XML("<style/>")
|
175
173
|
# s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
|
176
174
|
# xml.children.first << Nokogiri::XML::Comment.new(xml, s)
|
@@ -180,6 +178,12 @@ class Html2Doc
|
|
180
178
|
xml.root.to_s
|
181
179
|
end
|
182
180
|
|
181
|
+
def read_stylesheet(cssname)
|
182
|
+
(cssname.nil? || cssname.empty?) and
|
183
|
+
cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
|
184
|
+
File.read(cssname, encoding: "UTF-8")
|
185
|
+
end
|
186
|
+
|
183
187
|
def define_head(docxml)
|
184
188
|
title = docxml.at("//*[local-name() = 'head']/*[local-name() = 'title']")
|
185
189
|
head = docxml.at("//*[local-name() = 'head']")
|
data/lib/html2doc/mime.rb
CHANGED
@@ -76,7 +76,6 @@ class Html2Doc
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
-
# max width for Word document is 400, max height is 680
|
80
79
|
def image_resize(img, path, maxheight, maxwidth)
|
81
80
|
s, realsize = get_image_size(img, path)
|
82
81
|
return s if s[0] == nil && s[1] == nil
|
@@ -115,21 +114,89 @@ class Html2Doc
|
|
115
114
|
|
116
115
|
# only processes locally stored images
|
117
116
|
def image_cleanup(docxml, dir, localdir)
|
117
|
+
maxheight, maxwidth = page_dimensions(docxml)
|
118
118
|
docxml.traverse do |i|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
local_filename = localname(src, localdir)
|
125
|
-
new_filename = "#{mkuuid}#{File.extname(src)}"
|
126
|
-
FileUtils.cp local_filename, File.join(dir, new_filename)
|
127
|
-
i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
|
128
|
-
i["src"] = File.join(File.basename(dir), new_filename)
|
119
|
+
skip_image_cleanup?(i) and next
|
120
|
+
local_filename = rename_image(i, dir, localdir)
|
121
|
+
i["width"], i["height"] = image_resize(i, local_filename, maxheight,
|
122
|
+
maxwidth)
|
129
123
|
end
|
130
124
|
docxml
|
131
125
|
end
|
132
126
|
|
127
|
+
def rename_image(img, dir, localdir)
|
128
|
+
local_filename = localname(img["src"], localdir)
|
129
|
+
new_filename = "#{mkuuid}#{File.extname(img['src'])}"
|
130
|
+
FileUtils.cp local_filename, File.join(dir, new_filename)
|
131
|
+
img["src"] = File.join(File.basename(dir), new_filename)
|
132
|
+
local_filename
|
133
|
+
end
|
134
|
+
|
135
|
+
def skip_image_cleanup?(img)
|
136
|
+
src = img["src"]
|
137
|
+
return true unless img.element? && %w(img v:imagedata).include?(img.name)
|
138
|
+
return true if src.nil? || src.empty? || /^http/.match?(src) ||
|
139
|
+
%r{^data:(image|application)/[^;]+;base64}.match?(src)
|
140
|
+
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
# we are going to use the 2nd instance of @page in the Word CSS,
|
145
|
+
# skipping the cover page. Currently doesn't deal with Landscape.
|
146
|
+
# Scan both @stylesheet and docxml.to_xml (where @standardstylesheet has ended up)
|
147
|
+
# Allow 0.9 * height to fit caption
|
148
|
+
def page_dimensions(docxml)
|
149
|
+
stylesheet = read_stylesheet(@stylesheet)
|
150
|
+
page_size = find_page_size_in_doc(stylesheet, docxml.to_xml) or
|
151
|
+
return [680, 400]
|
152
|
+
m_size = /size:\s*(\S+)\s+(\S+)\s*;/.match(page_size) or return [680, 400]
|
153
|
+
m_marg = /margin:\s*(\S+)\s+(\S+)\s*(\S+)\s*(\S+)\s*;/.match(page_size) or
|
154
|
+
return [680, 400]
|
155
|
+
[0.9 * (units_to_px(m_size[2]) - units_to_px(m_marg[1]) - units_to_px(m_marg[3])),
|
156
|
+
units_to_px(m_size[1]) - units_to_px(m_marg[2]) - units_to_px(m_marg[4])]
|
157
|
+
rescue StandardError
|
158
|
+
[680, 400]
|
159
|
+
end
|
160
|
+
|
161
|
+
def find_page_size_in_doc(stylesheet, doc)
|
162
|
+
find_page_size(stylesheet, "WordSection2", false) ||
|
163
|
+
find_page_size(stylesheet, "WordSection3", false) ||
|
164
|
+
find_page_size(doc, "WordSection2", true) ||
|
165
|
+
find_page_size(doc, "WordSection3", true) ||
|
166
|
+
find_page_size(stylesheet, "", false) || find_page_size(doc, "", true)
|
167
|
+
end
|
168
|
+
|
169
|
+
# if in_xml, CSS is embedded in XML <style> tag
|
170
|
+
def find_page_size(stylesheet, klass, in_xml)
|
171
|
+
xml_found = false
|
172
|
+
found = false
|
173
|
+
ret = ""
|
174
|
+
stylesheet&.lines&.each do |l|
|
175
|
+
in_xml && l.include?("<style") and xml_found = true and found = false
|
176
|
+
in_xml && l.include?("</style>") and xml_found = false
|
177
|
+
/^\s*@page\s+#{klass}/.match?(l) and found = true
|
178
|
+
found && /^\s*\{?size:/.match?(l) and ret += l
|
179
|
+
found && /^\s*\{?margin:/.match?(l) and ret += l
|
180
|
+
if found && /}/.match?(l)
|
181
|
+
!ret.blank? && (!in_xml || xml_found) and return ret
|
182
|
+
ret = ""
|
183
|
+
found = false
|
184
|
+
end
|
185
|
+
end
|
186
|
+
nil
|
187
|
+
end
|
188
|
+
|
189
|
+
def units_to_px(measure)
|
190
|
+
m = /^(\S+)(pt|cm)/.match(measure)
|
191
|
+
ret = case m[2]
|
192
|
+
when "px" then (m[1].to_f * 0.75)
|
193
|
+
when "pt" then m[1].to_f
|
194
|
+
when "cm" then (m[1].to_f * 28.346456693)
|
195
|
+
when "in" then (m[1].to_f * 72)
|
196
|
+
end
|
197
|
+
ret.to_i
|
198
|
+
end
|
199
|
+
|
133
200
|
# do not parse the header through Nokogiri, since it will contain
|
134
201
|
# non-XML like <![if !supportFootnotes]>
|
135
202
|
def header_image_cleanup(doc, dir, filename, localdir)
|
data/lib/html2doc/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|