html2doc 1.1.0 → 1.1.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +0 -2
- data/Gemfile +2 -2
- data/Rakefile +1 -1
- data/bin/html2doc +2 -3
- data/lib/html2doc/base.rb +45 -44
- data/lib/html2doc/lists.rb +33 -25
- data/lib/html2doc/math.rb +71 -67
- data/lib/html2doc/mime.rb +28 -20
- data/lib/html2doc/notes.rb +34 -31
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +495 -496
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b92a72c9d0ee6005e38ab8de1f0cbc48455819955d34eb349fef1244504a9971
|
4
|
+
data.tar.gz: 52b07e9c1720bc2bf7a7196e0f650c62b81bf534535795bb2a136f2e79829416
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af4b84183859fd83ac500c6c1ac28e76d49ec7fb48c8a4cf49bc3543909b913e78c8293918b34e21e9e68f2dc78a500824337677ae1aea54bcf7cbecebd9363d
|
7
|
+
data.tar.gz: 9e0bd48390458ec57dcc9650643a9332a549148a1297b37962e37399453d95132e39c69cb94ea13fcc70f9d6bc273fccf742da61e6f4a3025710182df1d65144
|
data/.rubocop.yml
CHANGED
data/Gemfile
CHANGED
@@ -10,6 +10,6 @@ end
|
|
10
10
|
|
11
11
|
gemspec
|
12
12
|
|
13
|
-
if File.exist?
|
14
|
-
eval File.read(
|
13
|
+
if File.exist? "Gemfile.devel"
|
14
|
+
eval File.read("Gemfile.devel"), nil, "Gemfile.devel" # rubocop:disable Security/Eval
|
15
15
|
end
|
data/Rakefile
CHANGED
data/bin/html2doc
CHANGED
@@ -21,9 +21,8 @@ if ARGV.length < 1
|
|
21
21
|
end
|
22
22
|
|
23
23
|
Html2Doc.process(
|
24
|
-
File.read(ARGV[0], encoding: "utf-8"),
|
24
|
+
File.read(ARGV[0], encoding: "utf-8"),
|
25
25
|
filename: ARGV[0].gsub(/\.html?$/, ""),
|
26
26
|
stylesheet: options[:stylesheet],
|
27
|
-
header: options[:header]
|
27
|
+
header: options[:header]
|
28
28
|
)
|
29
|
-
|
data/lib/html2doc/base.rb
CHANGED
@@ -2,8 +2,6 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
-
#require "xml/xslt"
|
6
|
-
require "pp"
|
7
5
|
require "fileutils"
|
8
6
|
|
9
7
|
module Html2Doc
|
@@ -19,15 +17,17 @@ module Html2Doc
|
|
19
17
|
|
20
18
|
def self.process_header(headerfile, hash)
|
21
19
|
return if headerfile.nil?
|
20
|
+
|
22
21
|
doc = File.read(headerfile, encoding: "utf-8")
|
23
|
-
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
|
22
|
+
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
|
23
|
+
File.dirname(hash[:filename]))
|
24
24
|
File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) }
|
25
25
|
end
|
26
26
|
|
27
27
|
def self.clear_dir(dir)
|
28
28
|
Dir.foreach(dir) do |f|
|
29
29
|
fn = File.join(dir, f)
|
30
|
-
File.delete(fn) if f !=
|
30
|
+
File.delete(fn) if f != "." && f != ".."
|
31
31
|
end
|
32
32
|
dir
|
33
33
|
end
|
@@ -72,7 +72,7 @@ module Html2Doc
|
|
72
72
|
|
73
73
|
def self.to_xhtml(xml)
|
74
74
|
xml.gsub!(/<\?xml[^>]*>/, "")
|
75
|
-
unless /<!DOCTYPE /.match xml
|
75
|
+
unless /<!DOCTYPE /.match? xml
|
76
76
|
xml = '<!DOCTYPE html SYSTEM
|
77
77
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
|
78
78
|
end
|
@@ -84,34 +84,34 @@ module Html2Doc
|
|
84
84
|
DOCTYPE
|
85
85
|
|
86
86
|
def self.from_xhtml(xml)
|
87
|
-
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
88
|
-
sub(DOCTYPE, "")
|
89
|
-
gsub(%{ />}, "/>")
|
87
|
+
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
88
|
+
.sub(DOCTYPE, "")
|
89
|
+
.gsub(%{ />}, "/>")
|
90
90
|
end
|
91
91
|
|
92
|
-
def self.msword_fix(
|
92
|
+
def self.msword_fix(doc)
|
93
93
|
# brain damage in MSWord parser
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
94
|
+
doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
|
95
|
+
'<span style="mso-special-character:footnote"></span>')
|
96
|
+
doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
|
97
|
+
'<div style="mso-element:footnote-list"/>')
|
98
|
+
doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
|
99
|
+
doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
|
100
|
+
doc.gsub!(%r{<meta http-equiv="Content-Type"},
|
101
|
+
"<meta http-equiv=Content-Type")
|
102
|
+
doc.gsub!(%r{></m:jc>}, "/>")
|
103
|
+
doc.gsub!(%r{></v:stroke>}, "/>")
|
104
|
+
doc.gsub!(%r{></v:f>}, "/>")
|
105
|
+
doc.gsub!(%r{></v:path>}, "/>")
|
106
|
+
doc.gsub!(%r{></o:lock>}, "/>")
|
107
|
+
doc.gsub!(%r{></v:imagedata>}, "/>")
|
108
|
+
doc.gsub!(%r{></w:wrap>}, "/>")
|
109
|
+
doc.gsub!(%r{&tab;|&tab;},
|
110
|
+
'<span style="mso-tab-count:1">  </span>')
|
111
|
+
doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
|
111
112
|
a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
|
112
113
|
a
|
113
114
|
end.join
|
114
|
-
r
|
115
115
|
end
|
116
116
|
|
117
117
|
PRINT_VIEW = <<~XML.freeze
|
@@ -130,27 +130,27 @@ module Html2Doc
|
|
130
130
|
def self.define_head1(docxml, dir)
|
131
131
|
docxml.xpath("//*[local-name() = 'head']").each do |h|
|
132
132
|
h.children.first.add_previous_sibling <<~XML
|
133
|
-
|
134
|
-
|
133
|
+
#{PRINT_VIEW}
|
134
|
+
<link rel="File-List" href="cid:filelist.xml"/>
|
135
135
|
XML
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
139
|
-
def self.filename_substitute(
|
140
|
-
if header_filename.nil?
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
/FILENAME/.match(m) ? "url(cid:header.html)" : m
|
139
|
+
def self.filename_substitute(head, header_filename)
|
140
|
+
return if header_filename.nil?
|
141
|
+
|
142
|
+
head.xpath(".//*[local-name() = 'style']").each do |s|
|
143
|
+
s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m|
|
144
|
+
/FILENAME/.match?(m) ? "url(cid:header.html)" : m
|
145
145
|
end
|
146
|
+
s.replace(s1)
|
146
147
|
end
|
147
148
|
end
|
148
149
|
|
149
150
|
def self.stylesheet(filename, header_filename, fn)
|
150
|
-
(fn.nil? || fn.empty?)
|
151
|
+
(fn.nil? || fn.empty?) and
|
151
152
|
fn = File.join(File.dirname(__FILE__), "wordstyle.css")
|
152
153
|
stylesheet = File.read(fn, encoding: "UTF-8")
|
153
|
-
stylesheet = filename_substitute(stylesheet, header_filename, filename)
|
154
154
|
xml = Nokogiri::XML("<style/>")
|
155
155
|
xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
|
156
156
|
xml.root.to_s
|
@@ -161,6 +161,7 @@ module Html2Doc
|
|
161
161
|
head = docxml.at("//*[local-name() = 'head']")
|
162
162
|
css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
|
163
163
|
add_stylesheet(head, title, css)
|
164
|
+
filename_substitute(head, hash[:header_file])
|
164
165
|
define_head1(docxml, hash[:dir1])
|
165
166
|
rootnamespace(docxml.root)
|
166
167
|
end
|
@@ -189,13 +190,13 @@ module Html2Doc
|
|
189
190
|
end
|
190
191
|
|
191
192
|
def self.bookmarks(docxml)
|
192
|
-
docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
|
193
|
-
|
194
|
-
next if
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
193
|
+
docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
|
194
|
+
.each do |x|
|
195
|
+
next if x["id"].empty? ||
|
196
|
+
%w(shapetype v:shapetype shape v:shape).include?(x.name)
|
197
|
+
|
198
|
+
if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
|
199
|
+
else x.children.first.previous = "<a name='#{x['id']}'></a>"
|
199
200
|
end
|
200
201
|
x.delete("id")
|
201
202
|
end
|
data/lib/html2doc/lists.rb
CHANGED
@@ -7,6 +7,7 @@ require "uuidtools"
|
|
7
7
|
module Html2Doc
|
8
8
|
def self.style_list(li, level, liststyle, listnumber)
|
9
9
|
return unless liststyle
|
10
|
+
|
10
11
|
if li["style"]
|
11
12
|
li["style"] += ";"
|
12
13
|
else
|
@@ -16,37 +17,39 @@ module Html2Doc
|
|
16
17
|
end
|
17
18
|
|
18
19
|
def self.list_add1(li, liststyles, listtype, level)
|
19
|
-
if [
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
20
|
+
if %i[ul ol].include? listtype
|
21
|
+
list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
|
22
|
+
liststyles, :ul, level + 1)
|
23
|
+
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
24
|
+
liststyles, :ol, level + 1)
|
25
|
+
else
|
26
|
+
list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
|
27
|
+
liststyles, listtype, level + 1)
|
28
|
+
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
29
|
+
liststyles, listtype, level + 1)
|
30
|
+
end
|
30
31
|
end
|
31
32
|
|
32
33
|
def self.list_add(xpath, liststyles, listtype, level)
|
33
|
-
xpath.each_with_index do |
|
34
|
+
xpath.each_with_index do |l, _i|
|
34
35
|
@listnumber += 1 if level == 1
|
35
|
-
|
36
|
-
|
37
|
-
(
|
36
|
+
l["seen"] = true if level == 1
|
37
|
+
l["id"] ||= UUIDTools::UUID.random_create
|
38
|
+
(l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
|
38
39
|
style_list(li, level, liststyles[listtype], @listnumber)
|
39
40
|
list_add1(li, liststyles, listtype, level)
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
l.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{l['id']}')] | "\
|
43
|
+
".//ol[not(ancestor::li/ancestor::*/@id = '#{l['id']}')]")
|
44
|
+
.each do |li|
|
45
|
+
list_add1(li.parent, liststyles, listtype, level - 1)
|
44
46
|
end
|
45
47
|
end
|
46
48
|
end
|
47
49
|
|
48
50
|
def self.list2para(u)
|
49
51
|
return if u.xpath("./li").empty?
|
52
|
+
|
50
53
|
u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
|
51
54
|
u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
|
52
55
|
u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
|
@@ -64,21 +67,25 @@ module Html2Doc
|
|
64
67
|
def self.lists1(docxml, liststyles, k)
|
65
68
|
case k
|
66
69
|
when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
|
67
|
-
|
70
|
+
liststyles, :ul, 1)
|
68
71
|
when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
|
69
72
|
liststyles, :ol, 1)
|
70
73
|
else
|
71
|
-
list_add(docxml.xpath("//ol[@class = '#{k
|
72
|
-
"//ul[@class = '#{k
|
74
|
+
list_add(docxml.xpath("//ol[@class = '#{k}']#{TOPLIST} | "\
|
75
|
+
"//ul[@class = '#{k}']#{TOPLIST}"),
|
73
76
|
liststyles, k, 1)
|
74
77
|
end
|
75
78
|
end
|
76
79
|
|
77
80
|
def self.lists_unstyled(docxml, liststyles)
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
81
|
+
if liststyles.has_key?(:ul)
|
82
|
+
list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
|
83
|
+
liststyles, :ul, 1)
|
84
|
+
end
|
85
|
+
if liststyles.has_key?(:ol)
|
86
|
+
list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
|
87
|
+
liststyles, :ul, 1)
|
88
|
+
end
|
82
89
|
docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
|
83
90
|
l.delete("seen")
|
84
91
|
end
|
@@ -86,6 +93,7 @@ module Html2Doc
|
|
86
93
|
|
87
94
|
def self.lists(docxml, liststyles)
|
88
95
|
return if liststyles.nil?
|
96
|
+
|
89
97
|
@listnumber = 0
|
90
98
|
liststyles.each_key { |k| lists1(docxml, liststyles, k) }
|
91
99
|
lists_unstyled(docxml, liststyles)
|
data/lib/html2doc/math.rb
CHANGED
@@ -9,20 +9,20 @@ module Html2Doc
|
|
9
9
|
Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
|
10
10
|
encoding: "utf-8"))
|
11
11
|
|
12
|
-
def self.asciimath_to_mathml1(
|
13
|
-
|
14
|
-
AsciiMath
|
15
|
-
|
16
|
-
gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
12
|
+
def self.asciimath_to_mathml1(expr)
|
13
|
+
AsciiMath::MathMLBuilder.new(msword: true).append_expression(
|
14
|
+
AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
|
15
|
+
).to_s
|
16
|
+
.gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
|
17
|
+
rescue StandardError => e
|
18
|
+
puts "parsing: #{expr}"
|
19
|
+
puts e.message
|
20
|
+
raise e
|
22
21
|
end
|
23
22
|
|
24
23
|
def self.asciimath_to_mathml(doc, delims)
|
25
24
|
return doc if delims.nil? || delims.size < 2
|
25
|
+
|
26
26
|
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
27
27
|
m.each_slice(4).map.with_index do |(*a), i|
|
28
28
|
i % 500 == 0 && m.size > 1000 && i > 0 and
|
@@ -42,81 +42,86 @@ module Html2Doc
|
|
42
42
|
end
|
43
43
|
|
44
44
|
# random fixes to MathML input that OOXML needs to render properly
|
45
|
-
def self.ooxml_cleanup(
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
def self.ooxml_cleanup(math, docnamespaces)
|
46
|
+
math = unwrap_accents(
|
47
|
+
mathml_preserve_space(
|
48
|
+
mathml_insert_rows(math, docnamespaces), docnamespaces
|
49
|
+
),
|
50
|
+
)
|
51
|
+
math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
|
52
|
+
math
|
50
53
|
end
|
51
54
|
|
52
|
-
def self.mathml_insert_rows(
|
53
|
-
|
54
|
-
map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
|
55
|
+
def self.mathml_insert_rows(math, docnamespaces)
|
56
|
+
math.xpath(%w(msup msub msubsup munder mover munderover)
|
57
|
+
.map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
|
55
58
|
next unless x.next_element && x.next_element != "mrow"
|
59
|
+
|
56
60
|
x.next_element.wrap("<mrow/>")
|
57
61
|
end
|
58
|
-
|
62
|
+
math
|
59
63
|
end
|
60
64
|
|
61
|
-
def self.mathml_preserve_space(
|
62
|
-
|
65
|
+
def self.mathml_preserve_space(math, docnamespaces)
|
66
|
+
math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
|
63
67
|
x.children = x.children.to_xml.gsub(/^\s/, " ").gsub(/\s$/, " ")
|
64
68
|
end
|
65
|
-
|
69
|
+
math
|
66
70
|
end
|
67
71
|
|
68
|
-
def self.unitalic(
|
69
|
-
|
72
|
+
def self.unitalic(math)
|
73
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
|
70
74
|
x.wrap("<span style='font-style:normal;'></span>")
|
71
75
|
end
|
72
|
-
|
76
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
|
73
77
|
x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
|
74
78
|
end
|
75
|
-
|
79
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
|
76
80
|
x.wrap("<span class='nostem'><em></em></span>")
|
77
81
|
end
|
78
|
-
|
82
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
|
79
83
|
x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
|
80
84
|
end
|
81
|
-
|
82
|
-
|
85
|
+
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
|
86
|
+
to_plane1(x, :monospace)
|
83
87
|
end
|
84
|
-
|
85
|
-
|
88
|
+
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
|
89
|
+
to_plane1(x, :doublestruck)
|
86
90
|
end
|
87
|
-
|
88
|
-
|
91
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
|
92
|
+
to_plane1(x, :script)
|
89
93
|
end
|
90
|
-
|
91
|
-
|
94
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
|
95
|
+
to_plane1(x, :scriptbold)
|
92
96
|
end
|
93
|
-
|
94
|
-
|
97
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
98
|
+
to_plane1(x, :fraktur)
|
95
99
|
end
|
96
|
-
|
97
|
-
|
100
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
101
|
+
to_plane1(x, :frakturbold)
|
98
102
|
end
|
99
|
-
|
100
|
-
|
103
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
104
|
+
to_plane1(x, :sans)
|
101
105
|
end
|
102
|
-
|
103
|
-
|
106
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
107
|
+
to_plane1(x, :sansbold)
|
104
108
|
end
|
105
|
-
|
106
|
-
|
109
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
110
|
+
to_plane1(x, :sansitalic)
|
107
111
|
end
|
108
|
-
|
109
|
-
|
112
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
113
|
+
to_plane1(x, :sansbolditalic)
|
110
114
|
end
|
111
|
-
|
115
|
+
math
|
112
116
|
end
|
113
117
|
|
114
|
-
def self.
|
115
|
-
|
118
|
+
def self.to_plane1(xml, font)
|
119
|
+
xml.traverse do |n|
|
116
120
|
next unless n.text?
|
121
|
+
|
117
122
|
n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
|
118
123
|
end
|
119
|
-
|
124
|
+
xml
|
120
125
|
end
|
121
126
|
|
122
127
|
def self.mathml_to_ooml(docxml)
|
@@ -126,22 +131,23 @@ module Html2Doc
|
|
126
131
|
i % 100 == 0 && m.size > 500 && i > 0 and
|
127
132
|
warn "Math OOXML #{i} of #{m.size}"
|
128
133
|
element = ooxml_cleanup(x, docnamespaces)
|
129
|
-
doc = Nokogiri::XML::Document::new
|
134
|
+
doc = Nokogiri::XML::Document::new
|
130
135
|
doc.root = element
|
131
|
-
ooxml =
|
132
|
-
gsub(/<\?[^>]+>\s*/, "")
|
133
|
-
gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
134
|
-
gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
136
|
+
ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
|
137
|
+
.gsub(/<\?[^>]+>\s*/, "")
|
138
|
+
.gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
139
|
+
.gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
135
140
|
ooxml = uncenter(x, ooxml)
|
136
141
|
x.swap(ooxml)
|
137
142
|
end
|
138
143
|
end
|
139
144
|
|
140
|
-
# escape space as 2; we are removing any spaces generated by
|
145
|
+
# escape space as 2; we are removing any spaces generated by
|
141
146
|
# XML indentation
|
142
147
|
def self.esc_space(xml)
|
143
148
|
xml.traverse do |n|
|
144
149
|
next unless n.text?
|
150
|
+
|
145
151
|
n = n.text.gsub(/ /, "2")
|
146
152
|
end
|
147
153
|
xml
|
@@ -149,17 +155,15 @@ module Html2Doc
|
|
149
155
|
|
150
156
|
# if oomml has no siblings, by default it is centered; override this with
|
151
157
|
# left/right if parent is so tagged
|
152
|
-
def self.uncenter(
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
"m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
|
160
|
-
elsif alignnode.text.include? ("text-align:right")
|
158
|
+
def self.uncenter(math, ooxml)
|
159
|
+
alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
|
160
|
+
"local-name() = 'div' or local-name() = 'td']/@style")
|
161
|
+
return ooxml unless alignnode && (math.next == nil && math.previous == nil)
|
162
|
+
|
163
|
+
%w(left right).each do |dir|
|
164
|
+
if alignnode.text.include? ("text-align:#{dir}")
|
161
165
|
ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
|
162
|
-
"m:val='
|
166
|
+
"m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
|
163
167
|
end
|
164
168
|
end
|
165
169
|
ooxml
|