html2doc 0.0.1 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/README.adoc +7 -4
- data/html2doc.gemspec +3 -1
- data/lib/html2doc.rb +1 -0
- data/lib/html2doc/base.rb +110 -91
- data/lib/html2doc/mime.rb +0 -141
- data/lib/html2doc/version.rb +1 -1
- data/lib/html2doc/wordstyle.css +6 -0
- metadata +31 -3
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 878408b54af45f8693aade94aee06047dcd450a3
|
4
|
+
data.tar.gz: 66775cf77b38dc25490da74ac84a5dd5ade68650
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 04baa6214e38eb83f7bd687d42b4bc4db9a28ac80e9c78944dc0ec1150e8c3d9f3fb0ccbaf4df00040b17362405c7dc28765335c01a6d2e5a1e47314232c5b01
|
7
|
+
data.tar.gz: 0ad65befcc98b15e89bd6c94a8b18a54f709fbf01185b786dd1675a287473a4bb8ad05149647e383c130fcefaafc66048711a438eb557fe80fbd3fc08fded629
|
data/README.adoc
CHANGED
@@ -8,8 +8,10 @@ This work is driven by the Word document generation procedure documented in http
|
|
8
8
|
|
9
9
|
The gem currently does the following:
|
10
10
|
|
11
|
+
* Convert any AsciiMath and MathML to Word's native mathematical formatting language.
|
11
12
|
* Resize any images in the HTML file to fit within the maximum page size. (Word will otherwise crash on reading the document.)
|
12
13
|
* Generate a filelist.xml listing of all files to be bundled into the Word document.
|
14
|
+
* Assign the class `MsoNormal` to any paragraphs that do not have a class, so that they can be treated as Normal Style when editing the Word document.
|
13
15
|
* Inject Microsoft Word-specific CSS into the HTML document. The CSS file used is at `lib/html2doc/wordstyle.css`, and can be customised. (This generic CSS can be overridden by CSS already in the HTML document, since the generic CSS is injected at the top of the document.)
|
14
16
|
* Bundle up the images, the HTML file of the document proper, and the `header.html` file representing header/footer information, into a MIME file, and save that file to disk (so that Microsoft Word can deal with it as a Word file.)
|
15
17
|
|
@@ -19,7 +21,6 @@ Work being tracked at https://github.com/riboseinc/asciidoctor-iso/issues/47:
|
|
19
21
|
|
20
22
|
* Render footnotes
|
21
23
|
* Render (editorial) comments
|
22
|
-
* Render MathML, AsciiMath
|
23
24
|
|
24
25
|
== Constraints
|
25
26
|
|
@@ -33,12 +34,14 @@ TO DO: compare with https://github.com/MuhammetDilmac/Html2Docx (much simpler, b
|
|
33
34
|
--
|
34
35
|
require "html2doc"
|
35
36
|
|
36
|
-
Html2Doc.process(result, filename, header_filename, dir)
|
37
|
+
Html2Doc.process(result, filename, stylesheet, header_filename, dir, asciimathdelims = nil)
|
37
38
|
--
|
38
39
|
|
39
40
|
result:: is the Html document to be converted into Word, as a string.
|
40
41
|
filename:: is the name the document is to be saved as, without a file suffix
|
41
|
-
|
42
|
+
stylesheet:: is the full path filename of the CSS stylesheet for Microsoft Word-specific styles. If this is not provided (`nil`), the program will used the default stylesheet included in the gem, `lib/html2doc/wordstyle.css`. The stylsheet provided must match this stylesheet; you can obtain one by saving a Word document with your desired styles to HTML, and extracting the style definitions from the HTML document header.
|
43
|
+
header_filename:: is the filename of the HTML document containing header and footer for the document, as well as footnote/endnote separators; if there is none, use nil. To generate your own such document, save a Word document with headers/footers and/or footnote/endnote separators as an HTML document; the `header.html` will be in the `{filename}.fld` folder generated along with the HTML. A sample file is available at https://github.com/riboseinc/asciidoctor-iso/blob/master/lib/asciidoctor/iso/word/header.html
|
42
44
|
dir:: is the directory the document is to be saved to
|
45
|
+
asciimathdelims:: are the AsciiMath delimiters used in the text. If none are provided, no AsciiMath conversion is attempted.
|
43
46
|
|
44
|
-
Note that the local CSS file contains a variable `FILENAME` for the location of footnote/endnote separators and headers/footers, which are provided in the header HTML file. The gem replaces `FILENAME` with the file nane that the document will be saved as.
|
47
|
+
Note that the local CSS stylesheet file contains a variable `FILENAME` for the location of footnote/endnote separators and headers/footers, which are provided in the header HTML file. The gem replaces `FILENAME` with the file nane that the document will be saved as. If you supply your own stylesheet and also wish to use separators or headers/footers, you will likewise need to replace the document name mentioned in your stylesheet with a `FILENAME` string.
|
data/html2doc.gemspec
CHANGED
@@ -10,7 +10,7 @@ Gem::Specification.new do |spec|
|
|
10
10
|
spec.email = ["open.source@ribose.com"]
|
11
11
|
|
12
12
|
spec.summary = "Convert HTML document to Microsoft Word document"
|
13
|
-
|
13
|
+
"in AsciiDoc."
|
14
14
|
spec.description = <<~DESCRIPTION
|
15
15
|
Convert HTML document to Microsoft Word document.
|
16
16
|
|
@@ -32,6 +32,8 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_dependency "nokogiri", "~> 1.8.1"
|
33
33
|
spec.add_dependency "thread_safe"
|
34
34
|
spec.add_dependency "uuidtools"
|
35
|
+
spec.add_dependency "ruby-xslt"
|
36
|
+
spec.add_dependency "asciimath"
|
35
37
|
|
36
38
|
spec.add_development_dependency "bundler", "~> 1.15"
|
37
39
|
spec.add_development_dependency "byebug", "~> 9.1"
|
data/lib/html2doc.rb
CHANGED
data/lib/html2doc/base.rb
CHANGED
@@ -1,34 +1,82 @@
|
|
1
1
|
require "uuidtools"
|
2
|
+
require "asciimath"
|
2
3
|
require "nokogiri"
|
4
|
+
require "xml/xslt"
|
5
|
+
require "pp"
|
3
6
|
|
4
7
|
module Html2Doc
|
5
|
-
|
6
|
-
|
8
|
+
@xslt = XML::XSLT.new
|
9
|
+
@xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mathml2omml.xsl"))
|
10
|
+
|
11
|
+
def self.process(result, filename, stylesheet, header_file, dir,
|
12
|
+
asciimathdelims = nil)
|
13
|
+
result = process_html(result, filename, stylesheet, header_file,
|
14
|
+
dir, asciimathdelims)
|
15
|
+
system "cp #{header_file} #{dir}/header.html" unless header_file.nil?
|
16
|
+
generate_filelist(filename, dir)
|
17
|
+
File.open("#{filename}.htm", "w") { |f| f.write(result) }
|
18
|
+
mime_package result, filename, dir
|
19
|
+
rm_temp_files(filename, dir)
|
20
|
+
end
|
21
|
+
|
22
|
+
def self.process_html(result, filename, stylesheet, header_file, dir, asciimathdelims)
|
23
|
+
docxml = Nokogiri::XML(asciimath_to_mathml(result, asciimathdelims))
|
24
|
+
define_head(cleanup(docxml, dir), dir, filename, stylesheet, header_file)
|
25
|
+
result = msword_fix(docxml.to_xml)
|
26
|
+
end
|
27
|
+
|
28
|
+
def self.rm_temp_files(filename, dir)
|
29
|
+
system "rm #{filename}.htm"
|
30
|
+
system "rm -r #{filename}_files"
|
31
|
+
end
|
32
|
+
|
33
|
+
def self.cleanup(docxml, dir)
|
34
|
+
image_cleanup(docxml, dir)
|
35
|
+
mathml_to_ooml(docxml)
|
36
|
+
msonormal(docxml)
|
37
|
+
docxml
|
38
|
+
end
|
39
|
+
|
40
|
+
def self.asciimath_to_mathml(doc, delims)
|
41
|
+
return doc if delims.nil? || delims.size < 2
|
42
|
+
doc.split(/(#{delims[0]}|#{delims[1]})/).each_slice(4).map do |a|
|
43
|
+
a[2].nil? || a[2] = AsciiMath.parse(a[2]).to_mathml.
|
44
|
+
gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
|
45
|
+
a.size > 1 ? a[0] + a[2] : a[0]
|
46
|
+
end.join
|
47
|
+
end
|
48
|
+
|
49
|
+
def self.mathml_to_ooml(docxml)
|
50
|
+
docxml.xpath("//*[local-name() = 'math']").each do |m|
|
51
|
+
@xslt.xml = m.to_s.gsub(/<math>/,
|
52
|
+
"<math xmlns='http://www.w3.org/1998/Math/MathML'>")
|
53
|
+
ooml = @xslt.serve.gsub(/<\?[^>]+>\s*/, "").
|
54
|
+
gsub(/ xmlns:[^=]+="[^"]+"/, "")# .gsub(%r{(</?)}, "\\1m:")
|
55
|
+
m.swap(ooml)
|
56
|
+
end
|
57
|
+
end
|
58
|
+
|
59
|
+
# preserve HTML escapes
|
60
|
+
def self.xhtml(result)
|
7
61
|
unless /<!DOCTYPE html/.match? result
|
8
62
|
result.gsub!(/<\?xml version="1.0"\?>/, "")
|
9
63
|
result = "<!DOCTYPE html SYSTEM " +
|
10
64
|
"'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>" + result
|
11
65
|
end
|
12
|
-
|
13
|
-
image_cleanup(docxml, dir)
|
14
|
-
define_head(docxml, dir, filename, header_file)
|
15
|
-
result = self.msword_fix(docxml.to_xml)
|
16
|
-
system "cp #{header_file} #{dir}/header.html" unless header_file.nil?
|
17
|
-
generate_filelist(filename, dir)
|
18
|
-
File.open("#{filename}.htm", "w") { |f| f.write(result) }
|
19
|
-
mime_package result, filename, dir
|
66
|
+
result
|
20
67
|
end
|
21
68
|
|
22
69
|
def self.msword_fix(r)
|
23
70
|
# brain damage in MSWord parser
|
24
|
-
r.gsub(%r{<span style="mso-special-character:footnote"/>},
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
71
|
+
r.gsub!(%r{<span style="mso-special-character:footnote"/>},
|
72
|
+
'<span style="mso-special-character:footnote"></span>')
|
73
|
+
r.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
|
74
|
+
r.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
|
75
|
+
r.gsub!(%r{<meta http-equiv="Content-Type"},
|
76
|
+
"<meta http-equiv=Content-Type")
|
77
|
+
r.gsub!(%r{&tab;|&tab;},
|
78
|
+
'<span style="mso-tab-count:1">  </span>')
|
79
|
+
r
|
32
80
|
end
|
33
81
|
|
34
82
|
def self.image_resize(orig_filename)
|
@@ -52,11 +100,8 @@ module Html2Doc
|
|
52
100
|
new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}")
|
53
101
|
# presupposes that the image source is local
|
54
102
|
system "cp #{i['src']} #{new_full_filename}"
|
55
|
-
# image_size = image_resize(i["src"])
|
56
103
|
i["width"], i["height"] = image_resize(i["src"])
|
57
104
|
i["src"] = new_full_filename
|
58
|
-
#i["height"] = image_size[1]
|
59
|
-
#i["width"] = image_size[0]
|
60
105
|
end
|
61
106
|
docxml
|
62
107
|
end
|
@@ -64,102 +109,67 @@ module Html2Doc
|
|
64
109
|
def self.define_head1(docxml, dir)
|
65
110
|
docxml.xpath("//*[local-name() = 'head']").each do |h|
|
66
111
|
h.children.first.add_previous_sibling <<~XML
|
67
|
-
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
-
|
72
|
-
|
73
|
-
|
74
|
-
|
75
|
-
|
76
|
-
|
77
|
-
|
112
|
+
<!--[if gte mso 9]>
|
113
|
+
<xml>
|
114
|
+
<w:WordDocument>
|
115
|
+
<w:View>Print</w:View>
|
116
|
+
<w:Zoom>100</w:Zoom>
|
117
|
+
<w:DoNotOptimizeForBrowser/>
|
118
|
+
</w:WordDocument>
|
119
|
+
</xml>
|
120
|
+
<![endif]-->
|
121
|
+
<meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
|
122
|
+
<link rel="File-List" href="#{dir}/filelist.xml"/>
|
78
123
|
XML
|
79
124
|
end
|
80
125
|
end
|
81
126
|
|
82
|
-
def self.stylesheet
|
83
|
-
fn = File.join(File.dirname(__FILE__), "wordstyle.css")
|
84
|
-
stylesheet = File.read(fn, encoding: "UTF-8")
|
127
|
+
def self.filename_substitute(stylesheet, header_filename, filename)
|
85
128
|
if header_filename.nil?
|
86
129
|
stylesheet.gsub!(/\n[^\n]*FILENAME[^\n]*i\n/, "\n")
|
87
130
|
else
|
88
131
|
stylesheet.gsub!(/FILENAME/, filename)
|
89
132
|
end
|
133
|
+
stylesheet
|
134
|
+
end
|
135
|
+
|
136
|
+
def self.stylesheet(filename, header_filename, fn)
|
137
|
+
(fn.nil? || fn.empty?) &&
|
138
|
+
fn = File.join(File.dirname(__FILE__), "wordstyle.css")
|
139
|
+
stylesheet = File.read(fn, encoding: "UTF-8")
|
140
|
+
stylesheet = filename_substitute(stylesheet, header_filename, filename)
|
90
141
|
xml = Nokogiri::XML("<style/>")
|
91
142
|
xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
|
92
143
|
xml.root.to_s
|
93
144
|
end
|
94
145
|
|
95
|
-
def self.define_head(docxml, dir, filename, header_file)
|
146
|
+
def self.define_head(docxml, dir, filename, cssname, header_file)
|
96
147
|
title = docxml.at("//*[local-name() = 'head']/*[local-name() = 'title']")
|
97
148
|
head = docxml.at("//*[local-name() = 'head']")
|
149
|
+
css = stylesheet(filename, header_file, cssname)
|
98
150
|
if title.nil?
|
99
|
-
head.children.first.add_previous_sibling
|
151
|
+
head.children.first.add_previous_sibling css
|
100
152
|
else
|
101
|
-
title.add_next_sibling
|
153
|
+
title.add_next_sibling css
|
102
154
|
end
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
def self.mime_preamble(boundary, filename, result)
|
107
|
-
<<~"PREAMBLE"
|
108
|
-
MIME-Version: 1.0
|
109
|
-
Content-Type: multipart/related; boundary="#{boundary}"
|
110
|
-
|
111
|
-
--#{boundary}
|
112
|
-
Content-Location: file:///C:/Doc/#{filename}.htm
|
113
|
-
Content-Type: text/html; charset="utf-8"
|
114
|
-
|
115
|
-
#{result}
|
116
|
-
|
117
|
-
PREAMBLE
|
155
|
+
define_head1(docxml, dir)
|
156
|
+
namespace(docxml.root)
|
118
157
|
end
|
119
158
|
|
120
|
-
def self.
|
121
|
-
|
122
|
-
|
123
|
-
|
124
|
-
|
125
|
-
|
126
|
-
|
127
|
-
Content-Transfer-Encoding: base64
|
128
|
-
Content-Type: #{mime_type(item)}
|
129
|
-
|
130
|
-
#{encoded_file}
|
131
|
-
|
132
|
-
FILE
|
133
|
-
end
|
134
|
-
|
135
|
-
def self.mime_type(item)
|
136
|
-
types = MIME::Types.type_for(item)
|
137
|
-
type = types ? types.first.to_s : 'text/plain; charset="utf-8"'
|
138
|
-
type = type + ' charset="utf-8"' if /^text/.match?(type) && types
|
139
|
-
type
|
140
|
-
end
|
141
|
-
|
142
|
-
def self.mime_boundary
|
143
|
-
salt = UUIDTools::UUID.random_create.to_s.gsub(/-/, ".")[0..17]
|
144
|
-
"----=_NextPart_#{salt}"
|
145
|
-
end
|
146
|
-
|
147
|
-
def self.mime_package(result, filename, dir)
|
148
|
-
boundary = mime_boundary
|
149
|
-
mhtml = mime_preamble(boundary, filename, result)
|
150
|
-
Dir.foreach(dir) do |item|
|
151
|
-
next if item == "." || item == ".." || /^\./.match(item)
|
152
|
-
mhtml += mime_attachment(boundary, filename, item, dir)
|
153
|
-
end
|
154
|
-
mhtml += "--#{boundary}--"
|
155
|
-
File.open("#{filename}.doc", "w") { |f| f.write mhtml }
|
159
|
+
def self.namespace(root)
|
160
|
+
{
|
161
|
+
o: "urn:schemas-microsoft-com:office:office",
|
162
|
+
w: "urn:schemas-microsoft-com:office:word",
|
163
|
+
m: "http://schemas.microsoft.com/office/2004/12/omml",
|
164
|
+
}.each { |k, v| root.add_namespace_definition(k.to_s, v) }
|
165
|
+
root.add_namespace(nil, "http://www.w3.org/TR/REC-html40")
|
156
166
|
end
|
157
167
|
|
158
168
|
def self.generate_filelist(filename, dir)
|
159
169
|
File.open(File.join(dir, "filelist.xml"), "w") do |f|
|
160
170
|
f.write(<<~"XML")
|
161
|
-
|
162
|
-
|
171
|
+
<xml xmlns:o="urn:schemas-microsoft-com:office:office">
|
172
|
+
<o:MainFile HRef="../#{filename}.htm"/>
|
163
173
|
XML
|
164
174
|
Dir.foreach(dir) do |item|
|
165
175
|
next if item == "." || item == ".." || /^\./.match(item)
|
@@ -168,4 +178,13 @@ module Html2Doc
|
|
168
178
|
f.write("</xml>\n")
|
169
179
|
end
|
170
180
|
end
|
181
|
+
|
182
|
+
def self.msonormal(docxml)
|
183
|
+
docxml.xpath("//*[local-name() = 'p'][not(self::*[@class])]").each do |p|
|
184
|
+
p["class"] = "MsoNormal"
|
185
|
+
end
|
186
|
+
docxml.xpath("//*[local-name() = 'li'][not(self::*[@class])]").each do |p|
|
187
|
+
p["class"] = "MsoNormal"
|
188
|
+
end
|
189
|
+
end
|
171
190
|
end
|
data/lib/html2doc/mime.rb
CHANGED
@@ -2,124 +2,6 @@ require "uuidtools"
|
|
2
2
|
require "nokogiri"
|
3
3
|
|
4
4
|
module Html2Doc
|
5
|
-
def self.process(result, filename, header_file, dir)
|
6
|
-
docxml = Nokogiri::XML(xhtml(result))
|
7
|
-
cleanup(docxml, dir)
|
8
|
-
define_head(docxml, dir, filename, header_file)
|
9
|
-
result = self.msword_fix(docxml.to_xml)
|
10
|
-
system "cp #{header_file} #{dir}/header.html" unless header_file.nil?
|
11
|
-
generate_filelist(filename, dir)
|
12
|
-
File.open("#{filename}.htm", "w") { |f| f.write(result) }
|
13
|
-
mime_package result, filename, dir
|
14
|
-
end
|
15
|
-
|
16
|
-
def self.cleanup(docxml, dir)
|
17
|
-
image_cleanup(docxml, dir)
|
18
|
-
msonormal(docxml)
|
19
|
-
end
|
20
|
-
|
21
|
-
# preserve HTML escapes
|
22
|
-
def self.xhtml(result)
|
23
|
-
unless /<!DOCTYPE html/.match? result
|
24
|
-
result.gsub!(/<\?xml version="1.0"\?>/, "")
|
25
|
-
result = "<!DOCTYPE html SYSTEM " +
|
26
|
-
"'http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd'>" + result
|
27
|
-
end
|
28
|
-
result
|
29
|
-
end
|
30
|
-
|
31
|
-
def self.msword_fix(r)
|
32
|
-
# brain damage in MSWord parser
|
33
|
-
r.gsub!(%r{<span style="mso-special-character:footnote"/>},
|
34
|
-
'<span style="mso-special-character:footnote"></span>')
|
35
|
-
r.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
|
36
|
-
r.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
|
37
|
-
r.gsub!(%r{<meta http-equiv="Content-Type"},
|
38
|
-
"<meta http-equiv=Content-Type")
|
39
|
-
r.gsub!(%r{&tab;|&tab;},
|
40
|
-
'<span style="mso-tab-count:1">  </span>')
|
41
|
-
r
|
42
|
-
end
|
43
|
-
|
44
|
-
def self.image_resize(orig_filename)
|
45
|
-
image_size = ImageSize.path(orig_filename).size
|
46
|
-
# max width for Word document is 400, max height is 680
|
47
|
-
if image_size[0] > 400
|
48
|
-
image_size[1] = (image_size[1] * 400 / image_size[0]).ceil
|
49
|
-
image_size[0] = 400
|
50
|
-
end
|
51
|
-
if image_size[1] > 680
|
52
|
-
image_size[0] = (image_size[0] * 680 / image_size[1]).ceil
|
53
|
-
image_size[1] = 680
|
54
|
-
end
|
55
|
-
image_size
|
56
|
-
end
|
57
|
-
|
58
|
-
def self.image_cleanup(docxml, dir)
|
59
|
-
docxml.xpath("//*[local-name() = 'img']").each do |i|
|
60
|
-
matched = /\.(?<suffix>\S+)$/.match i["src"]
|
61
|
-
uuid = UUIDTools::UUID.random_create.to_s
|
62
|
-
new_full_filename = File.join(dir, "#{uuid}.#{matched[:suffix]}")
|
63
|
-
# presupposes that the image source is local
|
64
|
-
system "cp #{i['src']} #{new_full_filename}"
|
65
|
-
i["width"], i["height"] = image_resize(i["src"])
|
66
|
-
i["src"] = new_full_filename
|
67
|
-
end
|
68
|
-
docxml
|
69
|
-
end
|
70
|
-
|
71
|
-
def self.define_head1(docxml, dir)
|
72
|
-
docxml.xpath("//*[local-name() = 'head']").each do |h|
|
73
|
-
h.children.first.add_previous_sibling <<~XML
|
74
|
-
<!--[if gte mso 9]>
|
75
|
-
<xml>
|
76
|
-
<w:WordDocument>
|
77
|
-
<w:View>Print</w:View>
|
78
|
-
<w:Zoom>100</w:Zoom>
|
79
|
-
<w:DoNotOptimizeForBrowser/>
|
80
|
-
</w:WordDocument>
|
81
|
-
</xml>
|
82
|
-
<![endif]-->
|
83
|
-
<meta http-equiv=Content-Type content="text/html; charset=utf-8"/>
|
84
|
-
<link rel="File-List" href="#{dir}/filelist.xml"/>
|
85
|
-
XML
|
86
|
-
end
|
87
|
-
end
|
88
|
-
|
89
|
-
def self.stylesheet(filename, header_filename)
|
90
|
-
fn = File.join(File.dirname(__FILE__), "wordstyle.css")
|
91
|
-
stylesheet = File.read(fn, encoding: "UTF-8")
|
92
|
-
if header_filename.nil?
|
93
|
-
stylesheet.gsub!(/\n[^\n]*FILENAME[^\n]*i\n/, "\n")
|
94
|
-
else
|
95
|
-
stylesheet.gsub!(/FILENAME/, filename)
|
96
|
-
end
|
97
|
-
xml = Nokogiri::XML("<style/>")
|
98
|
-
xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
|
99
|
-
xml.root.to_s
|
100
|
-
end
|
101
|
-
|
102
|
-
def self.define_head(docxml, dir, filename, header_file)
|
103
|
-
title = docxml.at("//*[local-name() = 'head']/*[local-name() = 'title']")
|
104
|
-
head = docxml.at("//*[local-name() = 'head']")
|
105
|
-
if title.nil?
|
106
|
-
head.children.first.add_previous_sibling stylesheet(filename, header_file)
|
107
|
-
else
|
108
|
-
title.add_next_sibling stylesheet(filename, header_file)
|
109
|
-
end
|
110
|
-
define_head1(docxml, dir)
|
111
|
-
namespace(docxml.root)
|
112
|
-
end
|
113
|
-
|
114
|
-
def self.namespace(root)
|
115
|
-
{
|
116
|
-
o: "urn:schemas-microsoft-com:office:office",
|
117
|
-
w: "urn:schemas-microsoft-com:office:word",
|
118
|
-
m: "http://schemas.microsoft.com/office/2004/12/omml",
|
119
|
-
}.each { |k, v| root.add_namespace_definition(k.to_s, v) }
|
120
|
-
root.add_namespace(nil, "http://www.w3.org/TR/REC-html40")
|
121
|
-
end
|
122
|
-
|
123
5
|
def self.mime_preamble(boundary, filename, result)
|
124
6
|
<<~"PREAMBLE"
|
125
7
|
MIME-Version: 1.0
|
@@ -171,27 +53,4 @@ module Html2Doc
|
|
171
53
|
mhtml += "--#{boundary}--"
|
172
54
|
File.open("#{filename}.doc", "w") { |f| f.write mhtml }
|
173
55
|
end
|
174
|
-
|
175
|
-
def self.generate_filelist(filename, dir)
|
176
|
-
File.open(File.join(dir, "filelist.xml"), "w") do |f|
|
177
|
-
f.write(<<~"XML")
|
178
|
-
<xml xmlns:o="urn:schemas-microsoft-com:office:office">
|
179
|
-
<o:MainFile HRef="../#{filename}.htm"/>
|
180
|
-
XML
|
181
|
-
Dir.foreach(dir) do |item|
|
182
|
-
next if item == "." || item == ".." || /^\./.match(item)
|
183
|
-
f.write %{ <o:File HRef="#{item}"/>\n}
|
184
|
-
end
|
185
|
-
f.write("</xml>\n")
|
186
|
-
end
|
187
|
-
end
|
188
|
-
|
189
|
-
def self.msonormal(docxml)
|
190
|
-
docxml.xpath("//*[local-name() = 'p'][not(self::*[@class])]").each do |p|
|
191
|
-
p["class"] = "MsoNormal"
|
192
|
-
end
|
193
|
-
docxml.xpath("//*[local-name() = 'li'][not(self::*[@class])]").each do |p|
|
194
|
-
p["class"] = "MsoNormal"
|
195
|
-
end
|
196
|
-
end
|
197
56
|
end
|
data/lib/html2doc/version.rb
CHANGED
data/lib/html2doc/wordstyle.css
CHANGED
@@ -990,3 +990,9 @@ table.MsoNormalTable
|
|
990
990
|
mso-pagination:widow-orphan;
|
991
991
|
font-size:10.0pt;
|
992
992
|
font-family:"Cambria",serif;}
|
993
|
+
br.section
|
994
|
+
{page-break-before:always;
|
995
|
+
mso-break-type:section-break;}
|
996
|
+
br.pagebreak
|
997
|
+
{page-break-before:always;
|
998
|
+
mso-special-character:line-break;}
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0
|
4
|
+
version: 0.5.0
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-02-06 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -94,6 +94,34 @@ dependencies:
|
|
94
94
|
- - ">="
|
95
95
|
- !ruby/object:Gem::Version
|
96
96
|
version: '0'
|
97
|
+
- !ruby/object:Gem::Dependency
|
98
|
+
name: ruby-xslt
|
99
|
+
requirement: !ruby/object:Gem::Requirement
|
100
|
+
requirements:
|
101
|
+
- - ">="
|
102
|
+
- !ruby/object:Gem::Version
|
103
|
+
version: '0'
|
104
|
+
type: :runtime
|
105
|
+
prerelease: false
|
106
|
+
version_requirements: !ruby/object:Gem::Requirement
|
107
|
+
requirements:
|
108
|
+
- - ">="
|
109
|
+
- !ruby/object:Gem::Version
|
110
|
+
version: '0'
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: asciimath
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - ">="
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: '0'
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - ">="
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: '0'
|
97
125
|
- !ruby/object:Gem::Dependency
|
98
126
|
name: bundler
|
99
127
|
requirement: !ruby/object:Gem::Requirement
|
@@ -287,7 +315,7 @@ required_rubygems_version: !ruby/object:Gem::Requirement
|
|
287
315
|
version: '0'
|
288
316
|
requirements: []
|
289
317
|
rubyforge_project:
|
290
|
-
rubygems_version: 2.6.
|
318
|
+
rubygems_version: 2.6.12
|
291
319
|
signing_key:
|
292
320
|
specification_version: 4
|
293
321
|
summary: Convert HTML document to Microsoft Word document
|