html2doc 1.1.1 → 1.2.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +1 -11
- data/.hound.yml +3 -1
- data/.rubocop.yml +4 -6
- data/README.adoc +2 -1
- data/bin/html2doc +1 -1
- data/bin/rspec +1 -1
- data/html2doc.gemspec +8 -9
- data/lib/html2doc/base.rb +4 -3
- data/lib/html2doc/lists.rb +24 -27
- data/lib/html2doc/math.rb +35 -18
- data/lib/html2doc/mime.rb +16 -17
- data/lib/html2doc/notes.rb +12 -9
- data/lib/html2doc/version.rb +1 -1
- data/lib/html2doc.rb +0 -3
- data/spec/html2doc_spec.rb +167 -121
- metadata +42 -42
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 64cb262d3176610f0887cc69bab66fa2a3a7ca8445f8ad493d9d65c455a091d6
|
4
|
+
data.tar.gz: 468dc7a8fb687cdbf6db1497cf9d9b5e164687b7d460a5eac1fb983b4673672b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 2a47bbe5df7ae0767ad2f4ccf52c1f96b8e27fc32d08b3b7b25e5051a3d229b29b9852a51c052a701990f9be6dbf0efc97795ea0c0ff4b3745b63f5a0c7adb4f
|
7
|
+
data.tar.gz: c4c10a84141889d820fd8d2afc273122b28372794edd9fdb3e60aee28773350d3e545f2a3efb75c4d28eff350d367b020d01d6e5cc2874a957ca612124e78fd4
|
data/.github/workflows/rake.yml
CHANGED
@@ -16,19 +16,9 @@ jobs:
|
|
16
16
|
strategy:
|
17
17
|
fail-fast: false
|
18
18
|
matrix:
|
19
|
-
ruby: [ '
|
19
|
+
ruby: [ '3.0', '2.7', '2.6', '2.5' ]
|
20
20
|
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
21
|
experimental: [ false ]
|
22
|
-
include:
|
23
|
-
- ruby: '3.0'
|
24
|
-
os: 'ubuntu-latest'
|
25
|
-
experimental: true
|
26
|
-
- ruby: '3.0'
|
27
|
-
os: 'windows-latest'
|
28
|
-
experimental: true
|
29
|
-
- ruby: '3.0'
|
30
|
-
os: 'macos-latest'
|
31
|
-
experimental: true
|
32
22
|
steps:
|
33
23
|
- uses: actions/checkout@v2
|
34
24
|
with:
|
data/.hound.yml
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,12 +1,10 @@
|
|
1
|
-
#
|
2
|
-
# https://github.com/
|
3
|
-
# All project-specific additions and overrides should be specified in this file.
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
4
3
|
inherit_from:
|
5
4
|
- https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
|
6
5
|
|
7
6
|
# local repo-specific modifications
|
7
|
+
# ...
|
8
8
|
|
9
9
|
AllCops:
|
10
|
-
|
11
|
-
StyleGuideCopsOnly: false
|
12
|
-
TargetRubyVersion: 2.4
|
10
|
+
TargetRubyVersion: 2.5
|
data/README.adoc
CHANGED
@@ -58,11 +58,12 @@ There there are two other Microsoft Word vendors in the Ruby ecosystem.
|
|
58
58
|
--
|
59
59
|
require "html2doc"
|
60
60
|
|
61
|
-
Html2Doc.process(result, filename: filename, stylesheet: stylesheet, header_filename: header_filename, dir: dir, asciimathdelims: asciimathdelims, liststyles: liststyles)
|
61
|
+
Html2Doc.process(result, filename: filename, imagedir: imagedir, stylesheet: stylesheet, header_filename: header_filename, dir: dir, asciimathdelims: asciimathdelims, liststyles: liststyles)
|
62
62
|
--
|
63
63
|
|
64
64
|
result:: is the Html document to be converted into Word, as a string.
|
65
65
|
filename:: is the name the document is to be saved as, without a file suffix
|
66
|
+
imagedir:: base directory for local image file names in source XML
|
66
67
|
stylesheet:: is the full path filename of the CSS stylesheet for Microsoft Word-specific styles. If this is not provided, the program will used the default stylesheet included in the gem, `lib/html2doc/wordstyle.css`. The stylsheet provided must match this stylesheet; you can obtain one by saving a Word document with your desired styles to HTML, and extracting the style definitions from the HTML document header.
|
67
68
|
header_filename:: is the filename of the HTML document containing header and footer for the document, as well as footnote/endnote separators; if there is none, use nil. To generate your own such document, save a Word document with headers/footers and/or footnote/endnote separators as an HTML document; the `header.html` will be in the `{filename}.fld` folder generated along with the HTML. A sample file is available at https://github.com/metanorma/metanorma-iso/blob/master/lib/asciidoctor/iso/word/header.html
|
68
69
|
dir:: is the folder that any ancillary files (images, headers, filelist) are to be saved to. If not provided, it will be created as `{filename}_files`. Anything in the directory will be attached to the Word document; so this folder should only contain the images that accompany the document. (If the images are elsewhere on the local drive, the gem will move them into the folder. External URL images are left alone, and are not downloaded.)
|
data/bin/html2doc
CHANGED
data/bin/rspec
CHANGED
data/html2doc.gemspec
CHANGED
@@ -1,5 +1,4 @@
|
|
1
|
-
|
2
|
-
lib = File.expand_path("../lib", __FILE__)
|
1
|
+
lib = File.expand_path("lib", __dir__)
|
3
2
|
$LOAD_PATH.unshift(lib) unless $LOAD_PATH.include?(lib)
|
4
3
|
require "html2doc/version"
|
5
4
|
|
@@ -16,23 +15,23 @@ Gem::Specification.new do |spec|
|
|
16
15
|
This gem is in active development.
|
17
16
|
DESCRIPTION
|
18
17
|
|
19
|
-
spec.homepage
|
20
|
-
spec.licenses
|
18
|
+
spec.homepage = "https://github.com/metanorma/html2doc"
|
19
|
+
spec.licenses = ["CC-BY-SA-3.0", "BSD-2-Clause"]
|
21
20
|
|
22
21
|
spec.bindir = "bin"
|
23
22
|
spec.require_paths = ["lib"]
|
24
23
|
spec.files = `git ls-files`.split("\n")
|
25
24
|
spec.test_files = `git ls-files -- {spec}/*`.split("\n")
|
26
|
-
spec.required_ruby_version = Gem::Requirement.new(">= 2.
|
25
|
+
spec.required_ruby_version = Gem::Requirement.new(">= 2.5.0")
|
27
26
|
|
27
|
+
spec.add_dependency "asciimath", "~> 2.0.2"
|
28
28
|
spec.add_dependency "htmlentities", "~> 4.3.4"
|
29
29
|
spec.add_dependency "image_size"
|
30
30
|
spec.add_dependency "mime-types"
|
31
|
-
spec.add_dependency "nokogiri", "~> 1.
|
31
|
+
spec.add_dependency "nokogiri", "~> 1.12"
|
32
|
+
spec.add_dependency "plane1converter", "~> 0.0.1"
|
32
33
|
spec.add_dependency "thread_safe"
|
33
34
|
spec.add_dependency "uuidtools"
|
34
|
-
spec.add_dependency "asciimath", "~> 2.0.2"
|
35
|
-
spec.add_dependency "plane1converter", "~> 0.0.1"
|
36
35
|
|
37
36
|
spec.add_development_dependency "byebug", "~> 9.1"
|
38
37
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
@@ -40,8 +39,8 @@ Gem::Specification.new do |spec|
|
|
40
39
|
spec.add_development_dependency "guard-rspec", "~> 4.7"
|
41
40
|
spec.add_development_dependency "rake", "~> 12.0"
|
42
41
|
spec.add_development_dependency "rspec", "~> 3.6"
|
42
|
+
spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
|
43
43
|
spec.add_development_dependency "rubocop", "~> 1.5.2"
|
44
44
|
spec.add_development_dependency "simplecov", "~> 0.15"
|
45
45
|
spec.add_development_dependency "timecop", "~> 0.9"
|
46
|
-
spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
|
47
46
|
end
|
data/lib/html2doc/base.rb
CHANGED
@@ -53,7 +53,7 @@ module Html2Doc
|
|
53
53
|
|
54
54
|
def self.cleanup(docxml, hash)
|
55
55
|
namespace(docxml.root)
|
56
|
-
image_cleanup(docxml, hash[:dir1],
|
56
|
+
image_cleanup(docxml, hash[:dir1], hash[:imagedir])
|
57
57
|
mathml_to_ooml(docxml)
|
58
58
|
lists(docxml, hash[:liststyles])
|
59
59
|
footnotes(docxml)
|
@@ -106,6 +106,7 @@ module Html2Doc
|
|
106
106
|
doc.gsub!(%r{></o:lock>}, "/>")
|
107
107
|
doc.gsub!(%r{></v:imagedata>}, "/>")
|
108
108
|
doc.gsub!(%r{></w:wrap>}, "/>")
|
109
|
+
doc.gsub!(%r{<(/)?m:(span|em)\b}, "<\\1\\2")
|
109
110
|
doc.gsub!(%r{&tab;|&tab;},
|
110
111
|
'<span style="mso-tab-count:1">  </span>')
|
111
112
|
doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
|
@@ -127,7 +128,7 @@ module Html2Doc
|
|
127
128
|
<meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
|
128
129
|
XML
|
129
130
|
|
130
|
-
def self.define_head1(docxml,
|
131
|
+
def self.define_head1(docxml, _dir)
|
131
132
|
docxml.xpath("//*[local-name() = 'head']").each do |h|
|
132
133
|
h.children.first.add_previous_sibling <<~XML
|
133
134
|
#{PRINT_VIEW}
|
@@ -147,7 +148,7 @@ module Html2Doc
|
|
147
148
|
end
|
148
149
|
end
|
149
150
|
|
150
|
-
def self.stylesheet(
|
151
|
+
def self.stylesheet(_filename, _header_filename, fn)
|
151
152
|
(fn.nil? || fn.empty?) and
|
152
153
|
fn = File.join(File.dirname(__FILE__), "wordstyle.css")
|
153
154
|
stylesheet = File.read(fn, encoding: "UTF-8")
|
data/lib/html2doc/lists.rb
CHANGED
@@ -2,30 +2,29 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
-
require "uuidtools"
|
6
5
|
|
7
6
|
module Html2Doc
|
8
|
-
def self.style_list(
|
7
|
+
def self.style_list(elem, level, liststyle, listnumber)
|
9
8
|
return unless liststyle
|
10
9
|
|
11
|
-
if
|
12
|
-
|
10
|
+
if elem["style"]
|
11
|
+
elem["style"] += ";"
|
13
12
|
else
|
14
|
-
|
13
|
+
elem["style"] = ""
|
15
14
|
end
|
16
|
-
|
15
|
+
elem["style"] += "mso-list:#{liststyle} level#{level} lfo#{listnumber};"
|
17
16
|
end
|
18
17
|
|
19
|
-
def self.list_add1(
|
18
|
+
def self.list_add1(elem, liststyles, listtype, level)
|
20
19
|
if %i[ul ol].include? listtype
|
21
|
-
list_add(
|
20
|
+
list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
|
22
21
|
liststyles, :ul, level + 1)
|
23
|
-
list_add(
|
22
|
+
list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
|
24
23
|
liststyles, :ol, level + 1)
|
25
24
|
else
|
26
|
-
list_add(
|
25
|
+
list_add(elem.xpath(".//ul") - elem.xpath(".//ul//ul | .//ol//ul"),
|
27
26
|
liststyles, listtype, level + 1)
|
28
|
-
list_add(
|
27
|
+
list_add(elem.xpath(".//ol") - elem.xpath(".//ul//ol | .//ol//ol"),
|
29
28
|
liststyles, listtype, level + 1)
|
30
29
|
end
|
31
30
|
end
|
@@ -47,45 +46,43 @@ module Html2Doc
|
|
47
46
|
end
|
48
47
|
end
|
49
48
|
|
50
|
-
def self.list2para(
|
51
|
-
return if
|
49
|
+
def self.list2para(list)
|
50
|
+
return if list.xpath("./li").empty?
|
52
51
|
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
52
|
+
list.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
|
53
|
+
list.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
|
54
|
+
list.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
|
55
|
+
list.xpath("./li").each do |l|
|
57
56
|
l.name = "p"
|
58
57
|
l["class"] ||= "MsoListParagraphCxSpMiddle"
|
59
58
|
l&.first_element_child&.name == "p" and
|
60
59
|
l.first_element_child.replace(l.first_element_child.children)
|
61
60
|
end
|
62
|
-
|
61
|
+
list.replace(list.children)
|
63
62
|
end
|
64
63
|
|
65
64
|
TOPLIST = "[not(ancestor::ul) and not(ancestor::ol)]".freeze
|
66
65
|
|
67
|
-
def self.lists1(docxml, liststyles,
|
68
|
-
case
|
66
|
+
def self.lists1(docxml, liststyles, style)
|
67
|
+
case style
|
69
68
|
when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
|
70
69
|
liststyles, :ul, 1)
|
71
70
|
when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
|
72
71
|
liststyles, :ol, 1)
|
73
72
|
else
|
74
|
-
list_add(docxml.xpath("//ol[@class = '#{
|
75
|
-
"//ul[@class = '#{
|
76
|
-
liststyles,
|
73
|
+
list_add(docxml.xpath("//ol[@class = '#{style}']#{TOPLIST} | "\
|
74
|
+
"//ul[@class = '#{style}']#{TOPLIST}"),
|
75
|
+
liststyles, style, 1)
|
77
76
|
end
|
78
77
|
end
|
79
78
|
|
80
79
|
def self.lists_unstyled(docxml, liststyles)
|
81
|
-
|
80
|
+
liststyles.has_key?(:ul) and
|
82
81
|
list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
|
83
82
|
liststyles, :ul, 1)
|
84
|
-
|
85
|
-
if liststyles.has_key?(:ol)
|
83
|
+
liststyles.has_key?(:ol) and
|
86
84
|
list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
|
87
85
|
liststyles, :ul, 1)
|
88
|
-
end
|
89
86
|
docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
|
90
87
|
l.delete("seen")
|
91
88
|
end
|
data/lib/html2doc/math.rb
CHANGED
@@ -25,13 +25,18 @@ module Html2Doc
|
|
25
25
|
|
26
26
|
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
27
27
|
m.each_slice(4).map.with_index do |(*a), i|
|
28
|
-
i
|
29
|
-
warn "MathML #{i} of #{(m.size / 4).floor}"
|
28
|
+
progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
|
30
29
|
a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
|
31
30
|
a.size > 1 ? a[0] + a[2] : a[0]
|
32
31
|
end.join
|
33
32
|
end
|
34
33
|
|
34
|
+
def self.progress_conv(idx, step, total, threshold, msg)
|
35
|
+
return unless (idx % step).zero? && total > threshold && idx.positive?
|
36
|
+
|
37
|
+
warn "#{msg} #{idx} of #{total}"
|
38
|
+
end
|
39
|
+
|
35
40
|
def self.unwrap_accents(doc)
|
36
41
|
doc.xpath("//*[@accent = 'true']").each do |x|
|
37
42
|
x.elements.length > 1 or next
|
@@ -69,18 +74,20 @@ module Html2Doc
|
|
69
74
|
math
|
70
75
|
end
|
71
76
|
|
77
|
+
HTML_NS = 'xmlns="http://www.w3.org/1999/xhtml"'.freeze
|
78
|
+
|
72
79
|
def self.unitalic(math)
|
73
80
|
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
|
74
|
-
x.wrap("<span style='font-style:normal;'></span>")
|
81
|
+
x.wrap("<span #{HTML_NS} style='font-style:normal;'></span>")
|
75
82
|
end
|
76
83
|
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
|
77
|
-
x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
|
84
|
+
x.wrap("<span #{HTML_NS} class='nostem' style='font-weight:bold;'><em></em></span>")
|
78
85
|
end
|
79
86
|
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
|
80
|
-
x.wrap("<span class='nostem'><em></em></span>")
|
87
|
+
x.wrap("<span #{HTML_NS} class='nostem'><em></em></span>")
|
81
88
|
end
|
82
89
|
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
|
83
|
-
x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
|
90
|
+
x.wrap("<span #{HTML_NS} style='font-style:normal;font-weight:bold;'></span>")
|
84
91
|
end
|
85
92
|
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
|
86
93
|
to_plane1(x, :monospace)
|
@@ -128,20 +135,30 @@ module Html2Doc
|
|
128
135
|
docnamespaces = docxml.collect_namespaces
|
129
136
|
m = docxml.xpath("//*[local-name() = 'math']")
|
130
137
|
m.each_with_index do |x, i|
|
131
|
-
i
|
132
|
-
|
133
|
-
element = ooxml_cleanup(x, docnamespaces)
|
134
|
-
doc = Nokogiri::XML::Document::new
|
135
|
-
doc.root = element
|
136
|
-
ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
|
137
|
-
.gsub(/<\?[^>]+>\s*/, "")
|
138
|
-
.gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
139
|
-
.gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
140
|
-
ooxml = uncenter(x, ooxml)
|
141
|
-
x.swap(ooxml)
|
138
|
+
progress_conv(i, 100, m.size, 500, "Math OOXML")
|
139
|
+
mathml_to_ooml1(x, docnamespaces)
|
142
140
|
end
|
143
141
|
end
|
144
142
|
|
143
|
+
# We need span and em not to be namespaced. Word can't deal with explicit
|
144
|
+
# namespaces.
|
145
|
+
# We will end up stripping them out again under Nokogiri 1.11, which correctly
|
146
|
+
# insists on inheriting namespace from parent.
|
147
|
+
def self.ooml_clean(xml)
|
148
|
+
xml.to_s
|
149
|
+
.gsub(/<\?[^>]+>\s*/, "")
|
150
|
+
.gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
151
|
+
.gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
152
|
+
end
|
153
|
+
|
154
|
+
def self.mathml_to_ooml1(xml, docnamespaces)
|
155
|
+
doc = Nokogiri::XML::Document::new
|
156
|
+
doc.root = ooxml_cleanup(xml, docnamespaces)
|
157
|
+
ooxml = ooml_clean(unitalic(esc_space(@xsltemplate.transform(doc))))
|
158
|
+
ooxml = uncenter(xml, ooxml)
|
159
|
+
xml.swap(ooxml)
|
160
|
+
end
|
161
|
+
|
145
162
|
# escape space as 2; we are removing any spaces generated by
|
146
163
|
# XML indentation
|
147
164
|
def self.esc_space(xml)
|
@@ -157,7 +174,7 @@ module Html2Doc
|
|
157
174
|
# left/right if parent is so tagged
|
158
175
|
def self.uncenter(math, ooxml)
|
159
176
|
alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
|
160
|
-
|
177
|
+
"local-name() = 'div' or local-name() = 'td']/@style")
|
161
178
|
return ooxml unless alignnode && (math.next == nil && math.previous == nil)
|
162
179
|
|
163
180
|
%w(left right).each do |dir|
|
data/lib/html2doc/mime.rb
CHANGED
@@ -43,7 +43,7 @@ module Html2Doc
|
|
43
43
|
def self.mime_type(item)
|
44
44
|
types = MIME::Types.type_for(item)
|
45
45
|
type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
|
46
|
-
type = type
|
46
|
+
type = %(#{type} charset="utf-8") if /^text/.match(type) && types
|
47
47
|
type
|
48
48
|
end
|
49
49
|
|
@@ -77,14 +77,14 @@ module Html2Doc
|
|
77
77
|
end
|
78
78
|
|
79
79
|
# max width for Word document is 400, max height is 680
|
80
|
-
def self.image_resize(
|
81
|
-
|
82
|
-
s = [
|
83
|
-
s =
|
84
|
-
return [nil, nil] if
|
85
|
-
|
86
|
-
s[1] = s[0] *
|
87
|
-
s[0] = s[1] *
|
80
|
+
def self.image_resize(img, path, maxheight, maxwidth)
|
81
|
+
realsize = ImageSize.path(path).size
|
82
|
+
s = [img["width"].to_i, img["height"].to_i]
|
83
|
+
s = realsize if s[0].zero? && s[1].zero?
|
84
|
+
return [nil, nil] if realsize.nil? || realsize[0].nil? || realsize[1].nil?
|
85
|
+
|
86
|
+
s[1] = s[0] * realsize[1] / realsize[0] if s[1].zero? && !s[0].zero?
|
87
|
+
s[0] = s[1] * realsize[0] / realsize[1] if s[0].zero? && !s[1].zero?
|
88
88
|
s = [(s[0] * maxheight / s[1]).ceil, maxheight] if s[1] > maxheight
|
89
89
|
s = [maxwidth, (s[1] * maxwidth / s[0]).ceil] if s[0] > maxwidth
|
90
90
|
s
|
@@ -100,16 +100,18 @@ module Html2Doc
|
|
100
100
|
warn "#{src}: SVG not supported" if /\.svg$/i.match?(src)
|
101
101
|
end
|
102
102
|
|
103
|
+
def self.localname(src, localdir)
|
104
|
+
%r{^([A-Z]:)?/}.match?(src) ? src : File.join(localdir, src)
|
105
|
+
end
|
106
|
+
|
103
107
|
# only processes locally stored images
|
104
108
|
def self.image_cleanup(docxml, dir, localdir)
|
105
109
|
docxml.traverse do |i|
|
106
110
|
next unless i.element? && %w(img v:imagedata).include?(i.name)
|
107
|
-
|
108
|
-
next if /^http/.match i["src"]
|
111
|
+
next if /^http/.match? i["src"]
|
109
112
|
next if %r{^data:(image|application)/[^;]+;base64}.match? i["src"]
|
110
113
|
|
111
|
-
local_filename =
|
112
|
-
File.join(localdir, i["src"])
|
114
|
+
local_filename = localname(i["src"], localdir)
|
113
115
|
new_filename = "#{mkuuid}#{File.extname(i['src'])}"
|
114
116
|
FileUtils.cp local_filename, File.join(dir, new_filename)
|
115
117
|
i["width"], i["height"] = image_resize(i, local_filename, 680, 400)
|
@@ -130,12 +132,9 @@ module Html2Doc
|
|
130
132
|
if a.size == 2 && !(/ src="https?:/.match a[1]) &&
|
131
133
|
!(%r{ src="data:(image|application)/[^;]+;base64}.match a[1])
|
132
134
|
m = / src=['"](?<src>[^"']+)['"]/.match a[1]
|
133
|
-
#warnsvg(m[:src])
|
134
135
|
m2 = /\.(?<suffix>[a-zA-Z_0-9]+)$/.match m[:src]
|
135
136
|
new_filename = "#{mkuuid}.#{m2[:suffix]}"
|
136
|
-
|
137
|
-
File.join(localdir, m[:src])
|
138
|
-
FileUtils.cp old_filename, File.join(dir, new_filename)
|
137
|
+
FileUtils.cp localname(m[:src], localdir), File.join(dir, new_filename)
|
139
138
|
a[1].sub!(%r{ src=['"](?<src>[^"']+)['"]}, " src='cid:#{new_filename}'")
|
140
139
|
end
|
141
140
|
a.join
|
data/lib/html2doc/notes.rb
CHANGED
@@ -56,19 +56,22 @@ module Html2Doc
|
|
56
56
|
|
57
57
|
set_footnote_link_attrs(elem, idx)
|
58
58
|
if elem.at("./span[@class = 'MsoFootnoteReference']")
|
59
|
-
elem
|
60
|
-
|
61
|
-
c.replace(FN)
|
62
|
-
else
|
63
|
-
c.wrap("<span class='MsoFootnoteReference'></span>")
|
64
|
-
end
|
65
|
-
end
|
66
|
-
else
|
67
|
-
elem.children = FN
|
59
|
+
process_footnote_link1(elem)
|
60
|
+
else elem.children = FN
|
68
61
|
end
|
69
62
|
footnote << transform_footnote_text(note)
|
70
63
|
end
|
71
64
|
|
65
|
+
def self.process_footnote_link1(elem)
|
66
|
+
elem.children.each do |c|
|
67
|
+
if c.name == "span" && c["class"] == "MsoFootnoteReference"
|
68
|
+
c.replace(FN)
|
69
|
+
else
|
70
|
+
c.wrap("<span class='MsoFootnoteReference'></span>")
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
74
|
+
|
72
75
|
def self.transform_footnote_text(note)
|
73
76
|
note["id"] = ""
|
74
77
|
note.xpath(".//div").each { |div| div.replace(div.children) }
|
data/lib/html2doc/version.rb
CHANGED
data/lib/html2doc.rb
CHANGED