html2doc 1.1.0 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.rubocop.yml +0 -2
- data/Gemfile +2 -2
- data/Rakefile +1 -1
- data/bin/html2doc +2 -3
- data/lib/html2doc/base.rb +45 -44
- data/lib/html2doc/lists.rb +33 -25
- data/lib/html2doc/math.rb +71 -67
- data/lib/html2doc/mime.rb +28 -20
- data/lib/html2doc/notes.rb +34 -31
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +495 -496
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b92a72c9d0ee6005e38ab8de1f0cbc48455819955d34eb349fef1244504a9971
|
4
|
+
data.tar.gz: 52b07e9c1720bc2bf7a7196e0f650c62b81bf534535795bb2a136f2e79829416
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af4b84183859fd83ac500c6c1ac28e76d49ec7fb48c8a4cf49bc3543909b913e78c8293918b34e21e9e68f2dc78a500824337677ae1aea54bcf7cbecebd9363d
|
7
|
+
data.tar.gz: 9e0bd48390458ec57dcc9650643a9332a549148a1297b37962e37399453d95132e39c69cb94ea13fcc70f9d6bc273fccf742da61e6f4a3025710182df1d65144
|
data/.rubocop.yml
CHANGED
data/Gemfile
CHANGED
@@ -10,6 +10,6 @@ end
|
|
10
10
|
|
11
11
|
gemspec
|
12
12
|
|
13
|
-
if File.exist?
|
14
|
-
eval File.read(
|
13
|
+
if File.exist? "Gemfile.devel"
|
14
|
+
eval File.read("Gemfile.devel"), nil, "Gemfile.devel" # rubocop:disable Security/Eval
|
15
15
|
end
|
data/Rakefile
CHANGED
data/bin/html2doc
CHANGED
@@ -21,9 +21,8 @@ if ARGV.length < 1
|
|
21
21
|
end
|
22
22
|
|
23
23
|
Html2Doc.process(
|
24
|
-
File.read(ARGV[0], encoding: "utf-8"),
|
24
|
+
File.read(ARGV[0], encoding: "utf-8"),
|
25
25
|
filename: ARGV[0].gsub(/\.html?$/, ""),
|
26
26
|
stylesheet: options[:stylesheet],
|
27
|
-
header: options[:header]
|
27
|
+
header: options[:header]
|
28
28
|
)
|
29
|
-
|
data/lib/html2doc/base.rb
CHANGED
@@ -2,8 +2,6 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
-
#require "xml/xslt"
|
6
|
-
require "pp"
|
7
5
|
require "fileutils"
|
8
6
|
|
9
7
|
module Html2Doc
|
@@ -19,15 +17,17 @@ module Html2Doc
|
|
19
17
|
|
20
18
|
def self.process_header(headerfile, hash)
|
21
19
|
return if headerfile.nil?
|
20
|
+
|
22
21
|
doc = File.read(headerfile, encoding: "utf-8")
|
23
|
-
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
|
22
|
+
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
|
23
|
+
File.dirname(hash[:filename]))
|
24
24
|
File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) }
|
25
25
|
end
|
26
26
|
|
27
27
|
def self.clear_dir(dir)
|
28
28
|
Dir.foreach(dir) do |f|
|
29
29
|
fn = File.join(dir, f)
|
30
|
-
File.delete(fn) if f !=
|
30
|
+
File.delete(fn) if f != "." && f != ".."
|
31
31
|
end
|
32
32
|
dir
|
33
33
|
end
|
@@ -72,7 +72,7 @@ module Html2Doc
|
|
72
72
|
|
73
73
|
def self.to_xhtml(xml)
|
74
74
|
xml.gsub!(/<\?xml[^>]*>/, "")
|
75
|
-
unless /<!DOCTYPE /.match xml
|
75
|
+
unless /<!DOCTYPE /.match? xml
|
76
76
|
xml = '<!DOCTYPE html SYSTEM
|
77
77
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
|
78
78
|
end
|
@@ -84,34 +84,34 @@ module Html2Doc
|
|
84
84
|
DOCTYPE
|
85
85
|
|
86
86
|
def self.from_xhtml(xml)
|
87
|
-
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
88
|
-
sub(DOCTYPE, "")
|
89
|
-
gsub(%{ />}, "/>")
|
87
|
+
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
88
|
+
.sub(DOCTYPE, "")
|
89
|
+
.gsub(%{ />}, "/>")
|
90
90
|
end
|
91
91
|
|
92
|
-
def self.msword_fix(
|
92
|
+
def self.msword_fix(doc)
|
93
93
|
# brain damage in MSWord parser
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
|
105
|
-
|
106
|
-
|
107
|
-
|
108
|
-
|
109
|
-
|
110
|
-
|
94
|
+
doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
|
95
|
+
'<span style="mso-special-character:footnote"></span>')
|
96
|
+
doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
|
97
|
+
'<div style="mso-element:footnote-list"/>')
|
98
|
+
doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
|
99
|
+
doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
|
100
|
+
doc.gsub!(%r{<meta http-equiv="Content-Type"},
|
101
|
+
"<meta http-equiv=Content-Type")
|
102
|
+
doc.gsub!(%r{></m:jc>}, "/>")
|
103
|
+
doc.gsub!(%r{></v:stroke>}, "/>")
|
104
|
+
doc.gsub!(%r{></v:f>}, "/>")
|
105
|
+
doc.gsub!(%r{></v:path>}, "/>")
|
106
|
+
doc.gsub!(%r{></o:lock>}, "/>")
|
107
|
+
doc.gsub!(%r{></v:imagedata>}, "/>")
|
108
|
+
doc.gsub!(%r{></w:wrap>}, "/>")
|
109
|
+
doc.gsub!(%r{&tab;|&tab;},
|
110
|
+
'<span style="mso-tab-count:1">  </span>')
|
111
|
+
doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
|
111
112
|
a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
|
112
113
|
a
|
113
114
|
end.join
|
114
|
-
r
|
115
115
|
end
|
116
116
|
|
117
117
|
PRINT_VIEW = <<~XML.freeze
|
@@ -130,27 +130,27 @@ module Html2Doc
|
|
130
130
|
def self.define_head1(docxml, dir)
|
131
131
|
docxml.xpath("//*[local-name() = 'head']").each do |h|
|
132
132
|
h.children.first.add_previous_sibling <<~XML
|
133
|
-
|
134
|
-
|
133
|
+
#{PRINT_VIEW}
|
134
|
+
<link rel="File-List" href="cid:filelist.xml"/>
|
135
135
|
XML
|
136
136
|
end
|
137
137
|
end
|
138
138
|
|
139
|
-
def self.filename_substitute(
|
140
|
-
if header_filename.nil?
|
141
|
-
|
142
|
-
|
143
|
-
|
144
|
-
/FILENAME/.match(m) ? "url(cid:header.html)" : m
|
139
|
+
def self.filename_substitute(head, header_filename)
|
140
|
+
return if header_filename.nil?
|
141
|
+
|
142
|
+
head.xpath(".//*[local-name() = 'style']").each do |s|
|
143
|
+
s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m|
|
144
|
+
/FILENAME/.match?(m) ? "url(cid:header.html)" : m
|
145
145
|
end
|
146
|
+
s.replace(s1)
|
146
147
|
end
|
147
148
|
end
|
148
149
|
|
149
150
|
def self.stylesheet(filename, header_filename, fn)
|
150
|
-
(fn.nil? || fn.empty?)
|
151
|
+
(fn.nil? || fn.empty?) and
|
151
152
|
fn = File.join(File.dirname(__FILE__), "wordstyle.css")
|
152
153
|
stylesheet = File.read(fn, encoding: "UTF-8")
|
153
|
-
stylesheet = filename_substitute(stylesheet, header_filename, filename)
|
154
154
|
xml = Nokogiri::XML("<style/>")
|
155
155
|
xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
|
156
156
|
xml.root.to_s
|
@@ -161,6 +161,7 @@ module Html2Doc
|
|
161
161
|
head = docxml.at("//*[local-name() = 'head']")
|
162
162
|
css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
|
163
163
|
add_stylesheet(head, title, css)
|
164
|
+
filename_substitute(head, hash[:header_file])
|
164
165
|
define_head1(docxml, hash[:dir1])
|
165
166
|
rootnamespace(docxml.root)
|
166
167
|
end
|
@@ -189,13 +190,13 @@ module Html2Doc
|
|
189
190
|
end
|
190
191
|
|
191
192
|
def self.bookmarks(docxml)
|
192
|
-
docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
|
193
|
-
|
194
|
-
next if
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
|
193
|
+
docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
|
194
|
+
.each do |x|
|
195
|
+
next if x["id"].empty? ||
|
196
|
+
%w(shapetype v:shapetype shape v:shape).include?(x.name)
|
197
|
+
|
198
|
+
if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
|
199
|
+
else x.children.first.previous = "<a name='#{x['id']}'></a>"
|
199
200
|
end
|
200
201
|
x.delete("id")
|
201
202
|
end
|
data/lib/html2doc/lists.rb
CHANGED
@@ -7,6 +7,7 @@ require "uuidtools"
|
|
7
7
|
module Html2Doc
|
8
8
|
def self.style_list(li, level, liststyle, listnumber)
|
9
9
|
return unless liststyle
|
10
|
+
|
10
11
|
if li["style"]
|
11
12
|
li["style"] += ";"
|
12
13
|
else
|
@@ -16,37 +17,39 @@ module Html2Doc
|
|
16
17
|
end
|
17
18
|
|
18
19
|
def self.list_add1(li, liststyles, listtype, level)
|
19
|
-
if [
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
20
|
+
if %i[ul ol].include? listtype
|
21
|
+
list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
|
22
|
+
liststyles, :ul, level + 1)
|
23
|
+
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
24
|
+
liststyles, :ol, level + 1)
|
25
|
+
else
|
26
|
+
list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
|
27
|
+
liststyles, listtype, level + 1)
|
28
|
+
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
29
|
+
liststyles, listtype, level + 1)
|
30
|
+
end
|
30
31
|
end
|
31
32
|
|
32
33
|
def self.list_add(xpath, liststyles, listtype, level)
|
33
|
-
xpath.each_with_index do |
|
34
|
+
xpath.each_with_index do |l, _i|
|
34
35
|
@listnumber += 1 if level == 1
|
35
|
-
|
36
|
-
|
37
|
-
(
|
36
|
+
l["seen"] = true if level == 1
|
37
|
+
l["id"] ||= UUIDTools::UUID.random_create
|
38
|
+
(l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
|
38
39
|
style_list(li, level, liststyles[listtype], @listnumber)
|
39
40
|
list_add1(li, liststyles, listtype, level)
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
l.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{l['id']}')] | "\
|
43
|
+
".//ol[not(ancestor::li/ancestor::*/@id = '#{l['id']}')]")
|
44
|
+
.each do |li|
|
45
|
+
list_add1(li.parent, liststyles, listtype, level - 1)
|
44
46
|
end
|
45
47
|
end
|
46
48
|
end
|
47
49
|
|
48
50
|
def self.list2para(u)
|
49
51
|
return if u.xpath("./li").empty?
|
52
|
+
|
50
53
|
u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
|
51
54
|
u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
|
52
55
|
u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
|
@@ -64,21 +67,25 @@ module Html2Doc
|
|
64
67
|
def self.lists1(docxml, liststyles, k)
|
65
68
|
case k
|
66
69
|
when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
|
67
|
-
|
70
|
+
liststyles, :ul, 1)
|
68
71
|
when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
|
69
72
|
liststyles, :ol, 1)
|
70
73
|
else
|
71
|
-
list_add(docxml.xpath("//ol[@class = '#{k
|
72
|
-
"//ul[@class = '#{k
|
74
|
+
list_add(docxml.xpath("//ol[@class = '#{k}']#{TOPLIST} | "\
|
75
|
+
"//ul[@class = '#{k}']#{TOPLIST}"),
|
73
76
|
liststyles, k, 1)
|
74
77
|
end
|
75
78
|
end
|
76
79
|
|
77
80
|
def self.lists_unstyled(docxml, liststyles)
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
81
|
+
if liststyles.has_key?(:ul)
|
82
|
+
list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
|
83
|
+
liststyles, :ul, 1)
|
84
|
+
end
|
85
|
+
if liststyles.has_key?(:ol)
|
86
|
+
list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
|
87
|
+
liststyles, :ul, 1)
|
88
|
+
end
|
82
89
|
docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
|
83
90
|
l.delete("seen")
|
84
91
|
end
|
@@ -86,6 +93,7 @@ module Html2Doc
|
|
86
93
|
|
87
94
|
def self.lists(docxml, liststyles)
|
88
95
|
return if liststyles.nil?
|
96
|
+
|
89
97
|
@listnumber = 0
|
90
98
|
liststyles.each_key { |k| lists1(docxml, liststyles, k) }
|
91
99
|
lists_unstyled(docxml, liststyles)
|
data/lib/html2doc/math.rb
CHANGED
@@ -9,20 +9,20 @@ module Html2Doc
|
|
9
9
|
Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
|
10
10
|
encoding: "utf-8"))
|
11
11
|
|
12
|
-
def self.asciimath_to_mathml1(
|
13
|
-
|
14
|
-
AsciiMath
|
15
|
-
|
16
|
-
gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
|
17
|
-
|
18
|
-
|
19
|
-
|
20
|
-
|
21
|
-
end
|
12
|
+
def self.asciimath_to_mathml1(expr)
|
13
|
+
AsciiMath::MathMLBuilder.new(msword: true).append_expression(
|
14
|
+
AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
|
15
|
+
).to_s
|
16
|
+
.gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
|
17
|
+
rescue StandardError => e
|
18
|
+
puts "parsing: #{expr}"
|
19
|
+
puts e.message
|
20
|
+
raise e
|
22
21
|
end
|
23
22
|
|
24
23
|
def self.asciimath_to_mathml(doc, delims)
|
25
24
|
return doc if delims.nil? || delims.size < 2
|
25
|
+
|
26
26
|
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
27
27
|
m.each_slice(4).map.with_index do |(*a), i|
|
28
28
|
i % 500 == 0 && m.size > 1000 && i > 0 and
|
@@ -42,81 +42,86 @@ module Html2Doc
|
|
42
42
|
end
|
43
43
|
|
44
44
|
# random fixes to MathML input that OOXML needs to render properly
|
45
|
-
def self.ooxml_cleanup(
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
45
|
+
def self.ooxml_cleanup(math, docnamespaces)
|
46
|
+
math = unwrap_accents(
|
47
|
+
mathml_preserve_space(
|
48
|
+
mathml_insert_rows(math, docnamespaces), docnamespaces
|
49
|
+
),
|
50
|
+
)
|
51
|
+
math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
|
52
|
+
math
|
50
53
|
end
|
51
54
|
|
52
|
-
def self.mathml_insert_rows(
|
53
|
-
|
54
|
-
map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
|
55
|
+
def self.mathml_insert_rows(math, docnamespaces)
|
56
|
+
math.xpath(%w(msup msub msubsup munder mover munderover)
|
57
|
+
.map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
|
55
58
|
next unless x.next_element && x.next_element != "mrow"
|
59
|
+
|
56
60
|
x.next_element.wrap("<mrow/>")
|
57
61
|
end
|
58
|
-
|
62
|
+
math
|
59
63
|
end
|
60
64
|
|
61
|
-
def self.mathml_preserve_space(
|
62
|
-
|
65
|
+
def self.mathml_preserve_space(math, docnamespaces)
|
66
|
+
math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
|
63
67
|
x.children = x.children.to_xml.gsub(/^\s/, " ").gsub(/\s$/, " ")
|
64
68
|
end
|
65
|
-
|
69
|
+
math
|
66
70
|
end
|
67
71
|
|
68
|
-
def self.unitalic(
|
69
|
-
|
72
|
+
def self.unitalic(math)
|
73
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
|
70
74
|
x.wrap("<span style='font-style:normal;'></span>")
|
71
75
|
end
|
72
|
-
|
76
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
|
73
77
|
x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
|
74
78
|
end
|
75
|
-
|
79
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
|
76
80
|
x.wrap("<span class='nostem'><em></em></span>")
|
77
81
|
end
|
78
|
-
|
82
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
|
79
83
|
x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
|
80
84
|
end
|
81
|
-
|
82
|
-
|
85
|
+
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
|
86
|
+
to_plane1(x, :monospace)
|
83
87
|
end
|
84
|
-
|
85
|
-
|
88
|
+
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
|
89
|
+
to_plane1(x, :doublestruck)
|
86
90
|
end
|
87
|
-
|
88
|
-
|
91
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
|
92
|
+
to_plane1(x, :script)
|
89
93
|
end
|
90
|
-
|
91
|
-
|
94
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
|
95
|
+
to_plane1(x, :scriptbold)
|
92
96
|
end
|
93
|
-
|
94
|
-
|
97
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
98
|
+
to_plane1(x, :fraktur)
|
95
99
|
end
|
96
|
-
|
97
|
-
|
100
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
101
|
+
to_plane1(x, :frakturbold)
|
98
102
|
end
|
99
|
-
|
100
|
-
|
103
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
104
|
+
to_plane1(x, :sans)
|
101
105
|
end
|
102
|
-
|
103
|
-
|
106
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
107
|
+
to_plane1(x, :sansbold)
|
104
108
|
end
|
105
|
-
|
106
|
-
|
109
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
110
|
+
to_plane1(x, :sansitalic)
|
107
111
|
end
|
108
|
-
|
109
|
-
|
112
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
113
|
+
to_plane1(x, :sansbolditalic)
|
110
114
|
end
|
111
|
-
|
115
|
+
math
|
112
116
|
end
|
113
117
|
|
114
|
-
def self.
|
115
|
-
|
118
|
+
def self.to_plane1(xml, font)
|
119
|
+
xml.traverse do |n|
|
116
120
|
next unless n.text?
|
121
|
+
|
117
122
|
n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
|
118
123
|
end
|
119
|
-
|
124
|
+
xml
|
120
125
|
end
|
121
126
|
|
122
127
|
def self.mathml_to_ooml(docxml)
|
@@ -126,22 +131,23 @@ module Html2Doc
|
|
126
131
|
i % 100 == 0 && m.size > 500 && i > 0 and
|
127
132
|
warn "Math OOXML #{i} of #{m.size}"
|
128
133
|
element = ooxml_cleanup(x, docnamespaces)
|
129
|
-
doc = Nokogiri::XML::Document::new
|
134
|
+
doc = Nokogiri::XML::Document::new
|
130
135
|
doc.root = element
|
131
|
-
ooxml =
|
132
|
-
gsub(/<\?[^>]+>\s*/, "")
|
133
|
-
gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
134
|
-
gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
136
|
+
ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
|
137
|
+
.gsub(/<\?[^>]+>\s*/, "")
|
138
|
+
.gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
139
|
+
.gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
135
140
|
ooxml = uncenter(x, ooxml)
|
136
141
|
x.swap(ooxml)
|
137
142
|
end
|
138
143
|
end
|
139
144
|
|
140
|
-
# escape space as 2; we are removing any spaces generated by
|
145
|
+
# escape space as 2; we are removing any spaces generated by
|
141
146
|
# XML indentation
|
142
147
|
def self.esc_space(xml)
|
143
148
|
xml.traverse do |n|
|
144
149
|
next unless n.text?
|
150
|
+
|
145
151
|
n = n.text.gsub(/ /, "2")
|
146
152
|
end
|
147
153
|
xml
|
@@ -149,17 +155,15 @@ module Html2Doc
|
|
149
155
|
|
150
156
|
# if oomml has no siblings, by default it is centered; override this with
|
151
157
|
# left/right if parent is so tagged
|
152
|
-
def self.uncenter(
|
153
|
-
|
154
|
-
|
155
|
-
|
156
|
-
|
157
|
-
|
158
|
-
|
159
|
-
"m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
|
160
|
-
elsif alignnode.text.include? ("text-align:right")
|
158
|
+
def self.uncenter(math, ooxml)
|
159
|
+
alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
|
160
|
+
"local-name() = 'div' or local-name() = 'td']/@style")
|
161
|
+
return ooxml unless alignnode && (math.next == nil && math.previous == nil)
|
162
|
+
|
163
|
+
%w(left right).each do |dir|
|
164
|
+
if alignnode.text.include? ("text-align:#{dir}")
|
161
165
|
ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
|
162
|
-
"m:val='
|
166
|
+
"m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
|
163
167
|
end
|
164
168
|
end
|
165
169
|
ooxml
|