html2doc 1.1.0 → 1.1.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: f1d55301cebf98f75ddfd8966237d19c39bae1c9c8b4b01a39f124ca5da8e588
4
- data.tar.gz: a84cd4d87e2cf1c9f107300c883b6b6b85496e2067e76d7f0a282feecf6550c1
3
+ metadata.gz: b92a72c9d0ee6005e38ab8de1f0cbc48455819955d34eb349fef1244504a9971
4
+ data.tar.gz: 52b07e9c1720bc2bf7a7196e0f650c62b81bf534535795bb2a136f2e79829416
5
5
  SHA512:
6
- metadata.gz: 2ef21ec975f624420db8ee706ab1997ab7941bb7e9f75bfaecdde056346a08f73842cd151ad6f5c7a9711a0616ccc61177bd24f17693f8c31c6db70e7aa78088
7
- data.tar.gz: c204dfcb3f27a24f86908195d47287c74daaaa41d02de5f28fbb99167b17895f7b46e27971ae533c18874f09ed2d32d4205c6dcd9c6ee6b9e045a9dc06656bfa
6
+ metadata.gz: af4b84183859fd83ac500c6c1ac28e76d49ec7fb48c8a4cf49bc3543909b913e78c8293918b34e21e9e68f2dc78a500824337677ae1aea54bcf7cbecebd9363d
7
+ data.tar.gz: 9e0bd48390458ec57dcc9650643a9332a549148a1297b37962e37399453d95132e39c69cb94ea13fcc70f9d6bc273fccf742da61e6f4a3025710182df1d65144
data/.rubocop.yml CHANGED
@@ -10,5 +10,3 @@ AllCops:
10
10
  DisplayCopNames: false
11
11
  StyleGuideCopsOnly: false
12
12
  TargetRubyVersion: 2.4
13
- Rails:
14
- Enabled: true
data/Gemfile CHANGED
@@ -10,6 +10,6 @@ end
10
10
 
11
11
  gemspec
12
12
 
13
- if File.exist? 'Gemfile.devel'
14
- eval File.read('Gemfile.devel'), nil, 'Gemfile.devel' # rubocop:disable Security/Eval
13
+ if File.exist? "Gemfile.devel"
14
+ eval File.read("Gemfile.devel"), nil, "Gemfile.devel" # rubocop:disable Security/Eval
15
15
  end
data/Rakefile CHANGED
@@ -3,4 +3,4 @@ require "rspec/core/rake_task"
3
3
 
4
4
  RSpec::Core::RakeTask.new(:spec)
5
5
 
6
- task :default => :spec
6
+ task default: :spec
data/bin/html2doc CHANGED
@@ -21,9 +21,8 @@ if ARGV.length < 1
21
21
  end
22
22
 
23
23
  Html2Doc.process(
24
- File.read(ARGV[0], encoding: "utf-8"),
24
+ File.read(ARGV[0], encoding: "utf-8"),
25
25
  filename: ARGV[0].gsub(/\.html?$/, ""),
26
26
  stylesheet: options[:stylesheet],
27
- header: options[:header],
27
+ header: options[:header]
28
28
  )
29
-
data/lib/html2doc/base.rb CHANGED
@@ -2,8 +2,6 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- #require "xml/xslt"
6
- require "pp"
7
5
  require "fileutils"
8
6
 
9
7
  module Html2Doc
@@ -19,15 +17,17 @@ module Html2Doc
19
17
 
20
18
  def self.process_header(headerfile, hash)
21
19
  return if headerfile.nil?
20
+
22
21
  doc = File.read(headerfile, encoding: "utf-8")
23
- doc = header_image_cleanup(doc, hash[:dir1], hash[:filename], File.dirname(hash[:filename]))
22
+ doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
23
+ File.dirname(hash[:filename]))
24
24
  File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) }
25
25
  end
26
26
 
27
27
  def self.clear_dir(dir)
28
28
  Dir.foreach(dir) do |f|
29
29
  fn = File.join(dir, f)
30
- File.delete(fn) if f != '.' && f != '..'
30
+ File.delete(fn) if f != "." && f != ".."
31
31
  end
32
32
  dir
33
33
  end
@@ -72,7 +72,7 @@ module Html2Doc
72
72
 
73
73
  def self.to_xhtml(xml)
74
74
  xml.gsub!(/<\?xml[^>]*>/, "")
75
- unless /<!DOCTYPE /.match xml
75
+ unless /<!DOCTYPE /.match? xml
76
76
  xml = '<!DOCTYPE html SYSTEM
77
77
  "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
78
78
  end
@@ -84,34 +84,34 @@ module Html2Doc
84
84
  DOCTYPE
85
85
 
86
86
  def self.from_xhtml(xml)
87
- xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "").
88
- sub(DOCTYPE, "").
89
- gsub(%{ />}, "/>")
87
+ xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
88
+ .sub(DOCTYPE, "")
89
+ .gsub(%{ />}, "/>")
90
90
  end
91
91
 
92
- def self.msword_fix(r)
92
+ def self.msword_fix(doc)
93
93
  # brain damage in MSWord parser
94
- r.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
- '<span style="mso-special-character:footnote"></span>')
96
- r.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
- '<div style="mso-element:footnote-list"/>')
98
- r.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
- r.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
- r.gsub!(%r{<meta http-equiv="Content-Type"},
101
- "<meta http-equiv=Content-Type")
102
- r.gsub!(%r{></m:jc>}, "/>")
103
- r.gsub!(%r{></v:stroke>}, "/>")
104
- r.gsub!(%r{></v:f>}, "/>")
105
- r.gsub!(%r{></v:path>}, "/>")
106
- r.gsub!(%r{></o:lock>}, "/>")
107
- r.gsub!(%r{></v:imagedata>}, "/>")
108
- r.gsub!(%r{></w:wrap>}, "/>")
109
- r.gsub!(%r{&tab;|&amp;tab;}, '<span style="mso-tab-count:1">&#xA0; </span>')
110
- r = r.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
94
+ doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
95
+ '<span style="mso-special-character:footnote"></span>')
96
+ doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
97
+ '<div style="mso-element:footnote-list"/>')
98
+ doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
99
+ doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
100
+ doc.gsub!(%r{<meta http-equiv="Content-Type"},
101
+ "<meta http-equiv=Content-Type")
102
+ doc.gsub!(%r{></m:jc>}, "/>")
103
+ doc.gsub!(%r{></v:stroke>}, "/>")
104
+ doc.gsub!(%r{></v:f>}, "/>")
105
+ doc.gsub!(%r{></v:path>}, "/>")
106
+ doc.gsub!(%r{></o:lock>}, "/>")
107
+ doc.gsub!(%r{></v:imagedata>}, "/>")
108
+ doc.gsub!(%r{></w:wrap>}, "/>")
109
+ doc.gsub!(%r{&tab;|&amp;tab;},
110
+ '<span style="mso-tab-count:1">&#xA0; </span>')
111
+ doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
111
112
  a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
112
113
  a
113
114
  end.join
114
- r
115
115
  end
116
116
 
117
117
  PRINT_VIEW = <<~XML.freeze
@@ -130,27 +130,27 @@ module Html2Doc
130
130
  def self.define_head1(docxml, dir)
131
131
  docxml.xpath("//*[local-name() = 'head']").each do |h|
132
132
  h.children.first.add_previous_sibling <<~XML
133
- #{PRINT_VIEW}
134
- <link rel="File-List" href="cid:filelist.xml"/>
133
+ #{PRINT_VIEW}
134
+ <link rel="File-List" href="cid:filelist.xml"/>
135
135
  XML
136
136
  end
137
137
  end
138
138
 
139
- def self.filename_substitute(stylesheet, header_filename, filename)
140
- if header_filename.nil?
141
- stylesheet
142
- else
143
- stylesheet.gsub(/url\("[^"]+"\)/) do |m|
144
- /FILENAME/.match(m) ? "url(cid:header.html)" : m
139
+ def self.filename_substitute(head, header_filename)
140
+ return if header_filename.nil?
141
+
142
+ head.xpath(".//*[local-name() = 'style']").each do |s|
143
+ s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m|
144
+ /FILENAME/.match?(m) ? "url(cid:header.html)" : m
145
145
  end
146
+ s.replace(s1)
146
147
  end
147
148
  end
148
149
 
149
150
  def self.stylesheet(filename, header_filename, fn)
150
- (fn.nil? || fn.empty?) &&
151
+ (fn.nil? || fn.empty?) and
151
152
  fn = File.join(File.dirname(__FILE__), "wordstyle.css")
152
153
  stylesheet = File.read(fn, encoding: "UTF-8")
153
- stylesheet = filename_substitute(stylesheet, header_filename, filename)
154
154
  xml = Nokogiri::XML("<style/>")
155
155
  xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
156
156
  xml.root.to_s
@@ -161,6 +161,7 @@ module Html2Doc
161
161
  head = docxml.at("//*[local-name() = 'head']")
162
162
  css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
163
163
  add_stylesheet(head, title, css)
164
+ filename_substitute(head, hash[:header_file])
164
165
  define_head1(docxml, hash[:dir1])
165
166
  rootnamespace(docxml.root)
166
167
  end
@@ -189,13 +190,13 @@ module Html2Doc
189
190
  end
190
191
 
191
192
  def self.bookmarks(docxml)
192
- docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]").each do |x|
193
- next if x["id"].empty?
194
- next if %w(shapetype v:shapetype shape v:shape).include? x.name
195
- if x.children.empty?
196
- x.add_child("<a name='#{x["id"]}'></a>")
197
- else
198
- x.children.first.previous = "<a name='#{x["id"]}'></a>"
193
+ docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
194
+ .each do |x|
195
+ next if x["id"].empty? ||
196
+ %w(shapetype v:shapetype shape v:shape).include?(x.name)
197
+
198
+ if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
199
+ else x.children.first.previous = "<a name='#{x['id']}'></a>"
199
200
  end
200
201
  x.delete("id")
201
202
  end
@@ -7,6 +7,7 @@ require "uuidtools"
7
7
  module Html2Doc
8
8
  def self.style_list(li, level, liststyle, listnumber)
9
9
  return unless liststyle
10
+
10
11
  if li["style"]
11
12
  li["style"] += ";"
12
13
  else
@@ -16,37 +17,39 @@ module Html2Doc
16
17
  end
17
18
 
18
19
  def self.list_add1(li, liststyles, listtype, level)
19
- if [:ul, :ol].include? listtype
20
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
21
- liststyles, :ul, level + 1)
22
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
23
- liststyles, :ol, level + 1)
24
- else
25
- list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
26
- liststyles, listtype, level + 1)
27
- list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
28
- liststyles, listtype, level + 1)
29
- end
20
+ if %i[ul ol].include? listtype
21
+ list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
22
+ liststyles, :ul, level + 1)
23
+ list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
24
+ liststyles, :ol, level + 1)
25
+ else
26
+ list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
27
+ liststyles, listtype, level + 1)
28
+ list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
29
+ liststyles, listtype, level + 1)
30
+ end
30
31
  end
31
32
 
32
33
  def self.list_add(xpath, liststyles, listtype, level)
33
- xpath.each_with_index do |list, i|
34
+ xpath.each_with_index do |l, _i|
34
35
  @listnumber += 1 if level == 1
35
- list["seen"] = true if level == 1
36
- list["id"] ||= UUIDTools::UUID.random_create
37
- (list.xpath(".//li") - list.xpath(".//ol//li | .//ul//li")).each do |li|
36
+ l["seen"] = true if level == 1
37
+ l["id"] ||= UUIDTools::UUID.random_create
38
+ (l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
38
39
  style_list(li, level, liststyles[listtype], @listnumber)
39
40
  list_add1(li, liststyles, listtype, level)
40
41
  end
41
- list.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{list['id']}')] | "\
42
- ".//ol[not(ancestor::li/ancestor::*/@id = '#{list['id']}')]").each do |li|
43
- list_add1(li.parent, liststyles, listtype, level-1)
42
+ l.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{l['id']}')] | "\
43
+ ".//ol[not(ancestor::li/ancestor::*/@id = '#{l['id']}')]")
44
+ .each do |li|
45
+ list_add1(li.parent, liststyles, listtype, level - 1)
44
46
  end
45
47
  end
46
48
  end
47
49
 
48
50
  def self.list2para(u)
49
51
  return if u.xpath("./li").empty?
52
+
50
53
  u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
51
54
  u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
52
55
  u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
@@ -64,21 +67,25 @@ module Html2Doc
64
67
  def self.lists1(docxml, liststyles, k)
65
68
  case k
66
69
  when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
67
- liststyles, :ul, 1)
70
+ liststyles, :ul, 1)
68
71
  when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
69
72
  liststyles, :ol, 1)
70
73
  else
71
- list_add(docxml.xpath("//ol[@class = '#{k.to_s}']#{TOPLIST} | "\
72
- "//ul[@class = '#{k.to_s}']#{TOPLIST}"),
74
+ list_add(docxml.xpath("//ol[@class = '#{k}']#{TOPLIST} | "\
75
+ "//ul[@class = '#{k}']#{TOPLIST}"),
73
76
  liststyles, k, 1)
74
77
  end
75
78
  end
76
79
 
77
80
  def self.lists_unstyled(docxml, liststyles)
78
- list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
79
- liststyles, :ul, 1) if liststyles.has_key?(:ul)
80
- list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
81
- liststyles, :ul, 1) if liststyles.has_key?(:ol)
81
+ if liststyles.has_key?(:ul)
82
+ list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
83
+ liststyles, :ul, 1)
84
+ end
85
+ if liststyles.has_key?(:ol)
86
+ list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
87
+ liststyles, :ul, 1)
88
+ end
82
89
  docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
83
90
  l.delete("seen")
84
91
  end
@@ -86,6 +93,7 @@ module Html2Doc
86
93
 
87
94
  def self.lists(docxml, liststyles)
88
95
  return if liststyles.nil?
96
+
89
97
  @listnumber = 0
90
98
  liststyles.each_key { |k| lists1(docxml, liststyles, k) }
91
99
  lists_unstyled(docxml, liststyles)
data/lib/html2doc/math.rb CHANGED
@@ -9,20 +9,20 @@ module Html2Doc
9
9
  Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
10
10
  encoding: "utf-8"))
11
11
 
12
- def self.asciimath_to_mathml1(x)
13
- begin
14
- AsciiMath::MathMLBuilder.new(:msword => true).append_expression(
15
- AsciiMath.parse(HTMLEntities.new.decode(x)).ast).to_s.
16
- gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
- rescue StandardError => e
18
- puts "parsing: #{x}"
19
- puts e.message
20
- raise e
21
- end
12
+ def self.asciimath_to_mathml1(expr)
13
+ AsciiMath::MathMLBuilder.new(msword: true).append_expression(
14
+ AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
15
+ ).to_s
16
+ .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
17
+ rescue StandardError => e
18
+ puts "parsing: #{expr}"
19
+ puts e.message
20
+ raise e
22
21
  end
23
22
 
24
23
  def self.asciimath_to_mathml(doc, delims)
25
24
  return doc if delims.nil? || delims.size < 2
25
+
26
26
  m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
27
27
  m.each_slice(4).map.with_index do |(*a), i|
28
28
  i % 500 == 0 && m.size > 1000 && i > 0 and
@@ -42,81 +42,86 @@ module Html2Doc
42
42
  end
43
43
 
44
44
  # random fixes to MathML input that OOXML needs to render properly
45
- def self.ooxml_cleanup(m, docnamespaces)
46
- m = unwrap_accents(mathml_preserve_space(
47
- mathml_insert_rows(m, docnamespaces), docnamespaces))
48
- m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
49
- m
45
+ def self.ooxml_cleanup(math, docnamespaces)
46
+ math = unwrap_accents(
47
+ mathml_preserve_space(
48
+ mathml_insert_rows(math, docnamespaces), docnamespaces
49
+ ),
50
+ )
51
+ math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
52
+ math
50
53
  end
51
54
 
52
- def self.mathml_insert_rows(m, docnamespaces)
53
- m.xpath(%w(msup msub msubsup munder mover munderover).
54
- map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
55
+ def self.mathml_insert_rows(math, docnamespaces)
56
+ math.xpath(%w(msup msub msubsup munder mover munderover)
57
+ .map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
55
58
  next unless x.next_element && x.next_element != "mrow"
59
+
56
60
  x.next_element.wrap("<mrow/>")
57
61
  end
58
- m
62
+ math
59
63
  end
60
64
 
61
- def self.mathml_preserve_space(m, docnamespaces)
62
- m.xpath(".//xmlns:mtext", docnamespaces).each do |x|
65
+ def self.mathml_preserve_space(math, docnamespaces)
66
+ math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
63
67
  x.children = x.children.to_xml.gsub(/^\s/, "&#xA0;").gsub(/\s$/, "&#xA0;")
64
68
  end
65
- m
69
+ math
66
70
  end
67
71
 
68
- def self.unitalic(m)
69
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
72
+ def self.unitalic(math)
73
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
70
74
  x.wrap("<span style='font-style:normal;'></span>")
71
75
  end
72
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
76
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
73
77
  x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
74
78
  end
75
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
79
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
76
80
  x.wrap("<span class='nostem'><em></em></span>")
77
81
  end
78
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
82
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
79
83
  x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
80
84
  end
81
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
82
- toPlane1(x, :monospace)
85
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
86
+ to_plane1(x, :monospace)
83
87
  end
84
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
85
- toPlane1(x, :doublestruck)
88
+ math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
89
+ to_plane1(x, :doublestruck)
86
90
  end
87
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
88
- toPlane1(x, :script)
91
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
92
+ to_plane1(x, :script)
89
93
  end
90
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
91
- toPlane1(x, :scriptbold)
94
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
95
+ to_plane1(x, :scriptbold)
92
96
  end
93
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
94
- toPlane1(x, :fraktur)
97
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
98
+ to_plane1(x, :fraktur)
95
99
  end
96
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
97
- toPlane1(x, :frakturbold)
100
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
101
+ to_plane1(x, :frakturbold)
98
102
  end
99
- m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
- toPlane1(x, :sans)
103
+ math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
104
+ to_plane1(x, :sans)
101
105
  end
102
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
- toPlane1(x, :sansbold)
106
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
107
+ to_plane1(x, :sansbold)
104
108
  end
105
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
106
- toPlane1(x, :sansitalic)
109
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
110
+ to_plane1(x, :sansitalic)
107
111
  end
108
- m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
109
- toPlane1(x, :sansbolditalic)
112
+ math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
113
+ to_plane1(x, :sansbolditalic)
110
114
  end
111
- m
115
+ math
112
116
  end
113
117
 
114
- def self.toPlane1(x, font)
115
- x.traverse do |n|
118
+ def self.to_plane1(xml, font)
119
+ xml.traverse do |n|
116
120
  next unless n.text?
121
+
117
122
  n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
118
123
  end
119
- x
124
+ xml
120
125
  end
121
126
 
122
127
  def self.mathml_to_ooml(docxml)
@@ -126,22 +131,23 @@ module Html2Doc
126
131
  i % 100 == 0 && m.size > 500 && i > 0 and
127
132
  warn "Math OOXML #{i} of #{m.size}"
128
133
  element = ooxml_cleanup(x, docnamespaces)
129
- doc = Nokogiri::XML::Document::new()
134
+ doc = Nokogiri::XML::Document::new
130
135
  doc.root = element
131
- ooxml = (unitalic(esc_space(@xsltemplate.transform(doc)))).to_s.
132
- gsub(/<\?[^>]+>\s*/, "").
133
- gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
134
- gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
136
+ ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
137
+ .gsub(/<\?[^>]+>\s*/, "")
138
+ .gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
139
+ .gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
135
140
  ooxml = uncenter(x, ooxml)
136
141
  x.swap(ooxml)
137
142
  end
138
143
  end
139
144
 
140
- # escape space as &#x32;; we are removing any spaces generated by
145
+ # escape space as &#x32;; we are removing any spaces generated by
141
146
  # XML indentation
142
147
  def self.esc_space(xml)
143
148
  xml.traverse do |n|
144
149
  next unless n.text?
150
+
145
151
  n = n.text.gsub(/ /, "&#x32;")
146
152
  end
147
153
  xml
@@ -149,17 +155,15 @@ module Html2Doc
149
155
 
150
156
  # if oomml has no siblings, by default it is centered; override this with
151
157
  # left/right if parent is so tagged
152
- def self.uncenter(m, ooxml)
153
- if m.next == nil && m.previous == nil
154
- alignnode = m.at(".//ancestor::*[@style][local-name() = 'p' or "\
155
- "local-name() = 'div' or local-name() = 'td']/@style")
156
- return ooxml unless alignnode
157
- if alignnode.text.include? ("text-align:left")
158
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
159
- "m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
160
- elsif alignnode.text.include? ("text-align:right")
158
+ def self.uncenter(math, ooxml)
159
+ alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
160
+ "local-name() = 'div' or local-name() = 'td']/@style")
161
+ return ooxml unless alignnode && (math.next == nil && math.previous == nil)
162
+
163
+ %w(left right).each do |dir|
164
+ if alignnode.text.include? ("text-align:#{dir}")
161
165
  ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
162
- "m:val='right'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
166
+ "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
163
167
  end
164
168
  end
165
169
  ooxml