html2doc 1.5.3 → 1.5.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html2doc/base.rb +7 -3
- data/lib/html2doc/mime.rb +78 -11
- data/lib/html2doc/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 74b05f46f1fd365f9ff0766e95d884bd2959c01b92c70d4a080651adfc2e8d3c
|
4
|
+
data.tar.gz: f70eb009e705ff767b34922fc0444740be8dde80da8b78c503784e02be0e4560
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: e3d93501d63bd27ed6e5245cb18dbc49013fcecd83bc57acf3a5d3c797636b928b91e148e33e8326f10f77f9b94a7175d85294eb86a1b4b2261aafb7dfe9d7a4
|
7
|
+
data.tar.gz: 4cbb8887089e622b9d9d1fd82dc4e5fd4e8e81a28a59dcf02ccce22cb3c9e6e7c4c7802177259557c268d697ced17ec09e4181e82c0dc851a613553e7f5b58c1
|
data/lib/html2doc/base.rb
CHANGED
@@ -168,9 +168,7 @@ class Html2Doc
|
|
168
168
|
end
|
169
169
|
|
170
170
|
def stylesheet(_filename, _header_filename, cssname)
|
171
|
-
|
172
|
-
cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
|
173
|
-
stylesheet = File.read(cssname, encoding: "UTF-8")
|
171
|
+
stylesheet = read_stylesheet(cssname)
|
174
172
|
xml = Nokogiri::XML("<style/>")
|
175
173
|
# s = Nokogiri::XML::CDATA.new(xml, "\n#{stylesheet}\n")
|
176
174
|
# xml.children.first << Nokogiri::XML::Comment.new(xml, s)
|
@@ -180,6 +178,12 @@ class Html2Doc
|
|
180
178
|
xml.root.to_s
|
181
179
|
end
|
182
180
|
|
181
|
+
def read_stylesheet(cssname)
|
182
|
+
(cssname.nil? || cssname.empty?) and
|
183
|
+
cssname = File.join(File.dirname(__FILE__), "wordstyle.css")
|
184
|
+
File.read(cssname, encoding: "UTF-8")
|
185
|
+
end
|
186
|
+
|
183
187
|
def define_head(docxml)
|
184
188
|
title = docxml.at("//*[local-name() = 'head']/*[local-name() = 'title']")
|
185
189
|
head = docxml.at("//*[local-name() = 'head']")
|
data/lib/html2doc/mime.rb
CHANGED
@@ -76,7 +76,6 @@ class Html2Doc
|
|
76
76
|
end
|
77
77
|
end
|
78
78
|
|
79
|
-
# max width for Word document is 400, max height is 680
|
80
79
|
def image_resize(img, path, maxheight, maxwidth)
|
81
80
|
s, realsize = get_image_size(img, path)
|
82
81
|
return s if s[0] == nil && s[1] == nil
|
@@ -115,21 +114,89 @@ class Html2Doc
|
|
115
114
|
|
116
115
|
# only processes locally stored images
|
117
116
|
def image_cleanup(docxml, dir, localdir)
|
117
|
+
maxheight, maxwidth = page_dimensions(docxml)
|
118
118
|
docxml.traverse do |i|
|
119
|
-
|
120
|
-
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
local_filename = localname(src, localdir)
|
125
|
-
new_filename = "#{mkuuid}#{File.extname(src)}"
|
126
|
-
FileUtils.cp local_filename, File.join(dir, new_filename)
|
127
|
-
i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
|
128
|
-
i["src"] = File.join(File.basename(dir), new_filename)
|
119
|
+
skip_image_cleanup?(i) and next
|
120
|
+
local_filename = rename_image(i, dir, localdir)
|
121
|
+
i["width"], i["height"] = image_resize(i, local_filename, maxheight,
|
122
|
+
maxwidth)
|
129
123
|
end
|
130
124
|
docxml
|
131
125
|
end
|
132
126
|
|
127
|
+
def rename_image(img, dir, localdir)
|
128
|
+
local_filename = localname(img["src"], localdir)
|
129
|
+
new_filename = "#{mkuuid}#{File.extname(img['src'])}"
|
130
|
+
FileUtils.cp local_filename, File.join(dir, new_filename)
|
131
|
+
img["src"] = File.join(File.basename(dir), new_filename)
|
132
|
+
local_filename
|
133
|
+
end
|
134
|
+
|
135
|
+
def skip_image_cleanup?(img)
|
136
|
+
src = img["src"]
|
137
|
+
return true unless img.element? && %w(img v:imagedata).include?(img.name)
|
138
|
+
return true if src.nil? || src.empty? || /^http/.match?(src) ||
|
139
|
+
%r{^data:(image|application)/[^;]+;base64}.match?(src)
|
140
|
+
|
141
|
+
false
|
142
|
+
end
|
143
|
+
|
144
|
+
# we are going to use the 2nd instance of @page in the Word CSS,
|
145
|
+
# skipping the cover page. Currently doesn't deal with Landscape.
|
146
|
+
# Scan both @stylesheet and docxml.to_xml (where @standardstylesheet has ended up)
|
147
|
+
# Allow 0.9 * height to fit caption
|
148
|
+
def page_dimensions(docxml)
|
149
|
+
stylesheet = read_stylesheet(@stylesheet)
|
150
|
+
page_size = find_page_size_in_doc(stylesheet, docxml.to_xml) or
|
151
|
+
return [680, 400]
|
152
|
+
m_size = /size:\s*(\S+)\s+(\S+)\s*;/.match(page_size) or return [680, 400]
|
153
|
+
m_marg = /margin:\s*(\S+)\s+(\S+)\s*(\S+)\s*(\S+)\s*;/.match(page_size) or
|
154
|
+
return [680, 400]
|
155
|
+
[0.9 * (units_to_px(m_size[2]) - units_to_px(m_marg[1]) - units_to_px(m_marg[3])),
|
156
|
+
units_to_px(m_size[1]) - units_to_px(m_marg[2]) - units_to_px(m_marg[4])]
|
157
|
+
rescue StandardError
|
158
|
+
[680, 400]
|
159
|
+
end
|
160
|
+
|
161
|
+
def find_page_size_in_doc(stylesheet, doc)
|
162
|
+
find_page_size(stylesheet, "WordSection2", false) ||
|
163
|
+
find_page_size(stylesheet, "WordSection3", false) ||
|
164
|
+
find_page_size(doc, "WordSection2", true) ||
|
165
|
+
find_page_size(doc, "WordSection3", true) ||
|
166
|
+
find_page_size(stylesheet, "", false) || find_page_size(doc, "", true)
|
167
|
+
end
|
168
|
+
|
169
|
+
# if in_xml, CSS is embedded in XML <style> tag
|
170
|
+
def find_page_size(stylesheet, klass, in_xml)
|
171
|
+
xml_found = false
|
172
|
+
found = false
|
173
|
+
ret = ""
|
174
|
+
stylesheet&.lines&.each do |l|
|
175
|
+
in_xml && l.include?("<style") and xml_found = true and found = false
|
176
|
+
in_xml && l.include?("</style>") and xml_found = false
|
177
|
+
/^\s*@page\s+#{klass}/.match?(l) and found = true
|
178
|
+
found && /^\s*\{?size:/.match?(l) and ret += l
|
179
|
+
found && /^\s*\{?margin:/.match?(l) and ret += l
|
180
|
+
if found && /}/.match?(l)
|
181
|
+
!ret.blank? && (!in_xml || xml_found) and return ret
|
182
|
+
ret = ""
|
183
|
+
found = false
|
184
|
+
end
|
185
|
+
end
|
186
|
+
nil
|
187
|
+
end
|
188
|
+
|
189
|
+
def units_to_px(measure)
|
190
|
+
m = /^(\S+)(pt|cm)/.match(measure)
|
191
|
+
ret = case m[2]
|
192
|
+
when "px" then (m[1].to_f * 0.75)
|
193
|
+
when "pt" then m[1].to_f
|
194
|
+
when "cm" then (m[1].to_f * 28.346456693)
|
195
|
+
when "in" then (m[1].to_f * 72)
|
196
|
+
end
|
197
|
+
ret.to_i
|
198
|
+
end
|
199
|
+
|
133
200
|
# do not parse the header through Nokogiri, since it will contain
|
134
201
|
# non-XML like <![if !supportFootnotes]>
|
135
202
|
def header_image_cleanup(doc, dir, filename, localdir)
|
data/lib/html2doc/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.5.
|
4
|
+
version: 1.5.4
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-05-19 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|