html2doc 1.1.0 → 1.1.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1d55301cebf98f75ddfd8966237d19c39bae1c9c8b4b01a39f124ca5da8e588
4
- data.tar.gz: a84cd4d87e2cf1c9f107300c883b6b6b85496e2067e76d7f0a282feecf6550c1
3
+ metadata.gz: b92a72c9d0ee6005e38ab8de1f0cbc48455819955d34eb349fef1244504a9971
4
+ data.tar.gz: 52b07e9c1720bc2bf7a7196e0f650c62b81bf534535795bb2a136f2e79829416
5
5
  SHA512:
6
- metadata.gz: 2ef21ec975f624420db8ee706ab1997ab7941bb7e9f75bfaecdde056346a08f73842cd151ad6f5c7a9711a0616ccc61177bd24f17693f8c31c6db70e7aa78088
7
- data.tar.gz: c204dfcb3f27a24f86908195d47287c74daaaa41d02de5f28fbb99167b17895f7b46e27971ae533c18874f09ed2d32d4205c6dcd9c6ee6b9e045a9dc06656bfa
6
+ metadata.gz: af4b84183859fd83ac500c6c1ac28e76d49ec7fb48c8a4cf49bc3543909b913e78c8293918b34e21e9e68f2dc78a500824337677ae1aea54bcf7cbecebd9363d
7
+ data.tar.gz: 9e0bd48390458ec57dcc9650643a9332a549148a1297b37962e37399453d95132e39c69cb94ea13fcc70f9d6bc273fccf742da61e6f4a3025710182df1d65144
data/.rubocop.yml CHANGED
@@ -10,5 +10,3 @@ AllCops:
10
10
  DisplayCopNames: false
11
11
  StyleGuideCopsOnly: false
12
12
  TargetRubyVersion: 2.4
13
- Rails:
14
- Enabled: true
data/Gemfile CHANGED
@@ -10,6 +10,6 @@ end
10
10
 
11
11
  gemspec
12
12
 
13
- if File.exist? 'Gemfile.devel'
14
- eval File.read('Gemfile.devel'), nil, 'Gemfile.devel' # rubocop:disable Security/Eval
13
+ if File.exist? "Gemfile.devel"
14
+ eval File.read("Gemfile.devel"), nil, "Gemfile.devel" # rubocop:disable Security/Eval
15
15
  end
data/Rakefile CHANGED
@@ -3,4 +3,4 @@ require "rspec/core/rake_task"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
data/bin/html2doc CHANGED
@@ -21,9 +21,8 @@ if ARGV.length < 1
21
21
  end
22
22
 
23
23
  Html2Doc.process(
24
- File.read(ARGV[0], encoding: "utf-8"),
24
+ File.read(ARGV[0], encoding: "utf-8"),
25
25
  filename: ARGV[0].gsub(/\.html?$/, ""),
26
26
  stylesheet: options[:stylesheet],
27
- header: options[:header],
27
+ header: options[:header]
28
28
  )
29
-
data/lib/html2doc/base.rb CHANGED
@@ -2,8 +2,6 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- #require "xml/xslt"
6
- require "pp"
7
5
  require "fileutils"
8
6
 
9
7
  module Html2Doc
@@ -19,15 +17,17 @@ module Html2Doc
19
17
 
20
18
  def self.process_header(headerfile, hash)
21
19
  return if headerfile.nil?
20
+
22
21
  doc = File.read(headerfile, encoding: "utf-8")
23
- doc = header_image_cleanup(doc, hash[:dir1], hash[:filename], File.dirname(hash[:filename]))
22
+ doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
23
+ File.dirname(hash[:filename]))
24
24
  File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) }
25
25
  end
26
26
 
27
27
  def self.clear_dir(dir)
28
28
  Dir.foreach(dir) do |f|
29
29
  fn = File.join(dir, f)
30
- File.delete(fn) if f != '.' && f != '..'
30
+ File.delete(fn) if f != "." && f != ".."
31
31
  end
32
32
  dir
33
33
  end
@@ -72,7 +72,7 @@ module Html2Doc
72
72
 
73
73
  def self.to_xhtml(xml)
74
74
  xml.gsub!(/<\?xml[^>]*>/, "")
75
- unless /<!DOCTYPE /.match xml
75
+ unless /<!DOCTYPE /.match? xml
76
76
  xml = '<!DOCTYPE html SYSTEM
77
77
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
78
78
  end
@@ -84,34 +84,34 @@ module Html2Doc
84
84
  DOCTYPE
85
85
 
86
86
  def self.from_xhtml(xml)
87
- xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "").
88
- sub(DOCTYPE, "").
89
- gsub(%{ />}, "/>")
87
+ xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
88
+ .sub(DOCTYPE, "")
89
+ .gsub(%{ />}, "/>")
90
90
  end
91
91
 
92
- def self.msword_fix(r)
92
+ def self.msword_fix(doc)
93
93
  # brain damage in MSWord parser
94
- r.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
- '<span style="mso-special-character:footnote"></span>')
96
- r.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
- '<div style="mso-element:footnote-list"/>')
98
- r.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
- r.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
- r.gsub!(%r{<meta http-equiv="Content-Type"},
101
- "<meta http-equiv=Content-Type")
102
- r.gsub!(%r{></m:jc>}, "/>")
103
- r.gsub!(%r{></v:stroke>}, "/>")
104
- r.gsub!(%r{></v:f>}, "/>")
105
- r.gsub!(%r{></v:path>}, "/>")
106
- r.gsub!(%r{></o:lock>}, "/>")
107
- r.gsub!(%r{></v:imagedata>}, "/>")
108
- r.gsub!(%r{></w:wrap>}, "/>")
109
- r.gsub!(%r{&tab;|&amp;tab;}, '<span style="mso-tab-count:1">&#xA0; </span>')
110
- r = r.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
94
+ doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
+ '<span style="mso-special-character:footnote"></span>')
96
+ doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
+ '<div style="mso-element:footnote-list"/>')
98
+ doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
+ doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
+ doc.gsub!(%r{<meta http-equiv="Content-Type"},
101
+ "<meta http-equiv=Content-Type")
102
+ doc.gsub!(%r{></m:jc>}, "/>")
103
+ doc.gsub!(%r{></v:stroke>}, "/>")
104
+ doc.gsub!(%r{></v:f>}, "/>")
105
+ doc.gsub!(%r{></v:path>}, "/>")
106
+ doc.gsub!(%r{></o:lock>}, "/>")
107
+ doc.gsub!(%r{></v:imagedata>}, "/>")
108
+ doc.gsub!(%r{></w:wrap>}, "/>")
109
+ doc.gsub!(%r{&tab;|&amp;tab;},
110
+ '<span style="mso-tab-count:1">&#xA0; </span>')
111
+ doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
111
112
  a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
112
113
  a
113
114
  end.join
114
- r
115
115
  end
116
116
 
117
117
  PRINT_VIEW = <<~XML.freeze
@@ -130,27 +130,27 @@ module Html2Doc
130
130
  def self.define_head1(docxml, dir)
131
131
  docxml.xpath("//*[local-name() = 'head']").each do |h|
132
132
  h.children.first.add_previous_sibling <<~XML
133
- #{PRINT_VIEW}
134
- <link rel="File-List" href="cid:filelist.xml"/>
133
+ #{PRINT_VIEW}
134
+ <link rel="File-List" href="cid:filelist.xml"/>
135
135
  XML
136
136
  end
137
137
  end
138
138
 
139
- def self.filename_substitute(stylesheet, header_filename, filename)
140
- if header_filename.nil?
141
- stylesheet
142
- else
143
- stylesheet.gsub(/url\("[^"]+"\)/) do |m|
144
- /FILENAME/.match(m) ? "url(cid:header.html)" : m
139
+ def self.filename_substitute(head, header_filename)
140
+ return if header_filename.nil?
141
+
142
+ head.xpath(".//*[local-name() = 'style']").each do |s|
143
+ s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m|
144
+ /FILENAME/.match?(m) ? "url(cid:header.html)" : m
145
145
  end
146
+ s.replace(s1)
146
147
  end
147
148
  end
148
149
 
149
150
  def self.stylesheet(filename, header_filename, fn)
150
- (fn.nil? || fn.empty?) &&
151
+ (fn.nil? || fn.empty?) and
151
152
  fn = File.join(File.dirname(__FILE__), "wordstyle.css")
152
153
  stylesheet = File.read(fn, encoding: "UTF-8")
153
- stylesheet = filename_substitute(stylesheet, header_filename, filename)
154
154
  xml = Nokogiri::XML("<style/>")
155
155
  xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
156
156
  xml.root.to_s
@@ -161,6 +161,7 @@ module Html2Doc
161
161
  head = docxml.at("//*[local-name() = 'head']")
162
162
  css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
163
163
  add_stylesheet(head, title, css)
164
+ filename_substitute(head, hash[:header_file])
164
165
  define_head1(docxml, hash[:dir1])
165
166
  rootnamespace(docxml.root)
166
167
  end
@@ -189,13 +190,13 @@ module Html2Doc
189
190
  end
190
191
 
191
192
  def self.bookmarks(docxml)
192
- docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]").each do |x|
193
- next if x["id"].empty?
194
- next if %w(shapetype v:shapetype shape v:shape).include? x.name
195
- if x.children.empty?
196
- x.add_child("<a name='#{x["id"]}'></a>")
197
- else
198
- x.children.first.previous = "<a name='#{x["id"]}'></a>"
193
+ docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
194
+ .each do |x|
195
+ next if x["id"].empty? ||
196
+ %w(shapetype v:shapetype shape v:shape).include?(x.name)
197
+
198
+ if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
199
+ else x.children.first.previous = "<a name='#{x['id']}'></a>"
199
200
  end
200
201
  x.delete("id")
201
202
  end
@@ -7,6 +7,7 @@ require "uuidtools"
7
7
  module Html2Doc
8
8
  def self.style_list(li, level, liststyle, listnumber)
9
9
  return unless liststyle
10
+
10
11
  if li["style"]
11
12
  li["style"] += ";"
12
13
  else
@@ -16,37 +17,39 @@ module Html2Doc
16
17
  end
17
18
 
18
19
  def self.list_add1(li, liststyles, listtype, level)
19
- if [:ul, :ol].include? listtype
20
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
21
- liststyles, :ul, level + 1)
22
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
23
- liststyles, :ol, level + 1)
24
- else
25
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
26
- liststyles, listtype, level + 1)
27
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
28
- liststyles, listtype, level + 1)
29
- end
20
+ if %i[ul ol].include? listtype
21
+ list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
22
+ liststyles, :ul, level + 1)
23
+ list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
24
+ liststyles, :ol, level + 1)
25
+ else
26
+ list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
27
+ liststyles, listtype, level + 1)
28
+ list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
29
+ liststyles, listtype, level + 1)
30
+ end
30
31
  end
31
32
 
32
33
  def self.list_add(xpath, liststyles, listtype, level)
33
- xpath.each_with_index do |list, i|
34
+ xpath.each_with_index do |l, _i|
34
35
  @listnumber += 1 if level == 1
35
- list["seen"] = true if level == 1
36
- list["id"] ||= UUIDTools::UUID.random_create
37
- (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
36
+ l["seen"] = true if level == 1
37
+ l["id"] ||= UUIDTools::UUID.random_create
38
+ (l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
38
39
  style_list(li, level, liststyles[listtype], @listnumber)
39
40
  list_add1(li, liststyles, listtype, level)
40
41
  end
41
- list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
42
- ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]").each do |li|
43
- list_add1(li.parent, liststyles, listtype, level-1)
42
+ l.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{l['id']}')] | "\
43
+ ".//ol[not(ancestor::li/ancestor::*/@id = '#{l['id']}')]")
44
+ .each do |li|
45
+ list_add1(li.parent, liststyles, listtype, level - 1)
44
46
  end
45
47
  end
46
48
  end
47
49
 
48
50
  def self.list2para(u)
49
51
  return if u.xpath("./li").empty?
52
+
50
53
  u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
51
54
  u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
52
55
  u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
@@ -64,21 +67,25 @@ module Html2Doc
64
67
  def self.lists1(docxml, liststyles, k)
65
68
  case k
66
69
  when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
67
- liststyles, :ul, 1)
70
+ liststyles, :ul, 1)
68
71
  when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
69
72
  liststyles, :ol, 1)
70
73
  else
71
- list_add(docxml.xpath("//ol[@class = '#{k.to_s}']#{TOPLIST} | "\
72
- "//ul[@class = '#{k.to_s}']#{TOPLIST}"),
74
+ list_add(docxml.xpath("//ol[@class = '#{k}']#{TOPLIST} | "\
75
+ "//ul[@class = '#{k}']#{TOPLIST}"),
73
76
  liststyles, k, 1)
74
77
  end
75
78
  end
76
79
 
77
80
  def self.lists_unstyled(docxml, liststyles)
78
- list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
79
- liststyles, :ul, 1) if liststyles.has_key?(:ul)
80
- list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
81
- liststyles, :ul, 1) if liststyles.has_key?(:ol)
81
+ if liststyles.has_key?(:ul)
82
+ list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
83
+ liststyles, :ul, 1)
84
+ end
85
+ if liststyles.has_key?(:ol)
86
+ list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
87
+ liststyles, :ul, 1)
88
+ end
82
89
  docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
83
90
  l.delete("seen")
84
91
  end
@@ -86,6 +93,7 @@ module Html2Doc
86
93
 
87
94
  def self.lists(docxml, liststyles)
88
95
  return if liststyles.nil?
96
+
89
97
  @listnumber = 0
90
98
  liststyles.each_key { |k| lists1(docxml, liststyles, k) }
91
99
  lists_unstyled(docxml, liststyles)
data/lib/html2doc/math.rb CHANGED
@@ -9,20 +9,20 @@ module Html2Doc
9
9
  Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
10
10
  encoding: "utf-8"))
11
11
 
12
- def self.asciimath_to_mathml1(x)
13
- begin
14
- AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
15
- AsciiMath.parse(HTMLEntities.new.decode(x)).ast).to_s.
16
- gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
- rescue StandardError => e
18
- puts "parsing: #{x}"
19
- puts e.message
20
- raise e
21
- end
12
+ def self.asciimath_to_mathml1(expr)
13
+ AsciiMath::MathMLBuilder.new(msword: true).append_expression(
14
+ AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
15
+ ).to_s
16
+ .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
+ rescue StandardError => e
18
+ puts "parsing: #{expr}"
19
+ puts e.message
20
+ raise e
22
21
  end
23
22
 
24
23
  def self.asciimath_to_mathml(doc, delims)
25
24
  return doc if delims.nil? || delims.size < 2
25
+
26
26
  m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
27
27
  m.each_slice(4).map.with_index do |(*a), i|
28
28
  i % 500 == 0 && m.size > 1000 && i > 0 and
@@ -42,81 +42,86 @@ module Html2Doc
42
42
  end
43
43
 
44
44
  # random fixes to MathML input that OOXML needs to render properly
45
- def self.ooxml_cleanup(m, docnamespaces)
46
- m = unwrap_accents(mathml_preserve_space(
47
- mathml_insert_rows(m, docnamespaces), docnamespaces))
48
- m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
49
- m
45
+ def self.ooxml_cleanup(math, docnamespaces)
46
+ math = unwrap_accents(
47
+ mathml_preserve_space(
48
+ mathml_insert_rows(math, docnamespaces), docnamespaces
49
+ ),
50
+ )
51
+ math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
52
+ math
50
53
  end
51
54
 
52
- def self.mathml_insert_rows(m, docnamespaces)
53
- m.xpath(%w(msup msub msubsup munder mover munderover).
54
- map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
55
+ def self.mathml_insert_rows(math, docnamespaces)
56
+ math.xpath(%w(msup msub msubsup munder mover munderover)
57
+ .map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
55
58
  next unless x.next_element && x.next_element != "mrow"
59
+
56
60
  x.next_element.wrap("<mrow/>")
57
61
  end
58
- m
62
+ math
59
63
  end
60
64
 
61
- def self.mathml_preserve_space(m, docnamespaces)
62
- m.xpath(".//xmlns:mtext", docnamespaces).each do |x|
65
+ def self.mathml_preserve_space(math, docnamespaces)
66
+ math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
63
67
  x.children = x.children.to_xml.gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
64
68
  end
65
- m
69
+ math
66
70
  end
67
71
 
68
- def self.unitalic(m)
69
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
72
+ def self.unitalic(math)
73
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
70
74
  x.wrap("<span style='font-style:normal;'></span>")
71
75
  end
72
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
76
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
73
77
  x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
74
78
  end
75
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
79
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
76
80
  x.wrap("<span class='nostem'><em></em></span>")
77
81
  end
78
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
82
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
79
83
  x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
80
84
  end
81
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
82
- toPlane1(x, :monospace)
85
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
86
+ to_plane1(x, :monospace)
83
87
  end
84
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
85
- toPlane1(x, :doublestruck)
88
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
89
+ to_plane1(x, :doublestruck)
86
90
  end
87
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
88
- toPlane1(x, :script)
91
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
92
+ to_plane1(x, :script)
89
93
  end
90
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
91
- toPlane1(x, :scriptbold)
94
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
95
+ to_plane1(x, :scriptbold)
92
96
  end
93
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
94
- toPlane1(x, :fraktur)
97
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
98
+ to_plane1(x, :fraktur)
95
99
  end
96
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
97
- toPlane1(x, :frakturbold)
100
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
101
+ to_plane1(x, :frakturbold)
98
102
  end
99
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
- toPlane1(x, :sans)
103
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
104
+ to_plane1(x, :sans)
101
105
  end
102
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
- toPlane1(x, :sansbold)
106
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
107
+ to_plane1(x, :sansbold)
104
108
  end
105
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
106
- toPlane1(x, :sansitalic)
109
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
110
+ to_plane1(x, :sansitalic)
107
111
  end
108
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
109
- toPlane1(x, :sansbolditalic)
112
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
113
+ to_plane1(x, :sansbolditalic)
110
114
  end
111
- m
115
+ math
112
116
  end
113
117
 
114
- def self.toPlane1(x, font)
115
- x.traverse do |n|
118
+ def self.to_plane1(xml, font)
119
+ xml.traverse do |n|
116
120
  next unless n.text?
121
+
117
122
  n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
118
123
  end
119
- x
124
+ xml
120
125
  end
121
126
 
122
127
  def self.mathml_to_ooml(docxml)
@@ -126,22 +131,23 @@ module Html2Doc
126
131
  i % 100 == 0 && m.size > 500 && i > 0 and
127
132
  warn "Math OOXML #{i} of #{m.size}"
128
133
  element = ooxml_cleanup(x, docnamespaces)
129
- doc = Nokogiri::XML::Document::new()
134
+ doc = Nokogiri::XML::Document::new
130
135
  doc.root = element
131
- ooxml = (unitalic(esc_space(@xsltemplate.transform(doc)))).to_s.
132
- gsub(/<\?[^>]+>\s*/, "").
133
- gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
134
- gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
136
+ ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
137
+ .gsub(/<\?[^>]+>\s*/, "")
138
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
139
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
135
140
  ooxml = uncenter(x, ooxml)
136
141
  x.swap(ooxml)
137
142
  end
138
143
  end
139
144
 
140
- # escape space as &#x32;; we are removing any spaces generated by
145
+ # escape space as &#x32;; we are removing any spaces generated by
141
146
  # XML indentation
142
147
  def self.esc_space(xml)
143
148
  xml.traverse do |n|
144
149
  next unless n.text?
150
+
145
151
  n = n.text.gsub(/ /, "&#x32;")
146
152
  end
147
153
  xml
@@ -149,17 +155,15 @@ module Html2Doc
149
155
 
150
156
  # if oomml has no siblings, by default it is centered; override this with
151
157
  # left/right if parent is so tagged
152
- def self.uncenter(m, ooxml)
153
- if m.next == nil && m.previous == nil
154
- alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
155
- "local-name() = 'div' or local-name() = 'td']/@style")
156
- return ooxml unless alignnode
157
- if alignnode.text.include? ("text-align:left")
158
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
159
- "m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
160
- elsif alignnode.text.include? ("text-align:right")
158
+ def self.uncenter(math, ooxml)
159
+ alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
160
+ "local-name() = 'div' or local-name() = 'td']/@style")
161
+ return ooxml unless alignnode && (math.next == nil && math.previous == nil)
162
+
163
+ %w(left right).each do |dir|
164
+ if alignnode.text.include? ("text-align:#{dir}")
161
165
  ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
162
- "m:val='right'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
166
+ "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
163
167
  end
164
168
  end
165
169
  ooxml