html2doc 1.0.5 → 1.1.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.github/workflows/rake.yml +42 -0
- data/.gitignore +2 -0
- data/.rubocop.yml +6 -4
- data/Gemfile +2 -2
- data/README.adoc +1 -3
- data/Rakefile +1 -1
- data/bin/html2doc +2 -3
- data/html2doc.gemspec +4 -3
- data/lib/html2doc/base.rb +55 -45
- data/lib/html2doc/lists.rb +33 -25
- data/lib/html2doc/math.rb +97 -38
- data/lib/html2doc/mime.rb +42 -25
- data/lib/html2doc/mml2omml.xsl +9 -1
- data/lib/html2doc/notes.rb +34 -31
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +511 -484
- metadata +26 -16
- data/.github/workflows/macos.yml +0 -38
- data/.github/workflows/ubuntu.yml +0 -56
- data/.github/workflows/windows.yml +0 -40
- data/.rubocop.ribose.yml +0 -65
- data/.rubocop.tb.yml +0 -650
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: b92a72c9d0ee6005e38ab8de1f0cbc48455819955d34eb349fef1244504a9971
|
4
|
+
data.tar.gz: 52b07e9c1720bc2bf7a7196e0f650c62b81bf534535795bb2a136f2e79829416
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: af4b84183859fd83ac500c6c1ac28e76d49ec7fb48c8a4cf49bc3543909b913e78c8293918b34e21e9e68f2dc78a500824337677ae1aea54bcf7cbecebd9363d
|
7
|
+
data.tar.gz: 9e0bd48390458ec57dcc9650643a9332a549148a1297b37962e37399453d95132e39c69cb94ea13fcc70f9d6bc273fccf742da61e6f4a3025710182df1d65144
|
@@ -0,0 +1,42 @@
|
|
1
|
+
# Auto-generated by Cimas: Do not edit it manually!
|
2
|
+
# See https://github.com/metanorma/cimas
|
3
|
+
name: rake
|
4
|
+
|
5
|
+
on:
|
6
|
+
push:
|
7
|
+
branches: [ master, main ]
|
8
|
+
tags: [ v* ]
|
9
|
+
pull_request:
|
10
|
+
|
11
|
+
jobs:
|
12
|
+
rake:
|
13
|
+
name: Test on Ruby ${{ matrix.ruby }} ${{ matrix.os }}
|
14
|
+
runs-on: ${{ matrix.os }}
|
15
|
+
continue-on-error: ${{ matrix.experimental }}
|
16
|
+
strategy:
|
17
|
+
fail-fast: false
|
18
|
+
matrix:
|
19
|
+
ruby: [ '2.7', '2.6', '2.5', '2.4' ]
|
20
|
+
os: [ ubuntu-latest, windows-latest, macos-latest ]
|
21
|
+
experimental: [ false ]
|
22
|
+
include:
|
23
|
+
- ruby: '3.0'
|
24
|
+
os: 'ubuntu-latest'
|
25
|
+
experimental: true
|
26
|
+
- ruby: '3.0'
|
27
|
+
os: 'windows-latest'
|
28
|
+
experimental: true
|
29
|
+
- ruby: '3.0'
|
30
|
+
os: 'macos-latest'
|
31
|
+
experimental: true
|
32
|
+
steps:
|
33
|
+
- uses: actions/checkout@v2
|
34
|
+
with:
|
35
|
+
submodules: true
|
36
|
+
|
37
|
+
- uses: ruby/setup-ruby@v1
|
38
|
+
with:
|
39
|
+
ruby-version: ${{ matrix.ruby }}
|
40
|
+
bundler-cache: true
|
41
|
+
|
42
|
+
- run: bundle exec rake
|
data/.gitignore
CHANGED
data/.rubocop.yml
CHANGED
@@ -1,10 +1,12 @@
|
|
1
1
|
# This project follows the Ribose OSS style guide.
|
2
2
|
# https://github.com/riboseinc/oss-guides
|
3
3
|
# All project-specific additions and overrides should be specified in this file.
|
4
|
-
|
5
4
|
inherit_from:
|
6
5
|
- https://raw.githubusercontent.com/riboseinc/oss-guides/master/ci/rubocop.yml
|
6
|
+
|
7
|
+
# local repo-specific modifications
|
8
|
+
|
7
9
|
AllCops:
|
8
|
-
|
9
|
-
|
10
|
-
|
10
|
+
DisplayCopNames: false
|
11
|
+
StyleGuideCopsOnly: false
|
12
|
+
TargetRubyVersion: 2.4
|
data/Gemfile
CHANGED
@@ -10,6 +10,6 @@ end
|
|
10
10
|
|
11
11
|
gemspec
|
12
12
|
|
13
|
-
if File.exist?
|
14
|
-
eval File.read(
|
13
|
+
if File.exist? "Gemfile.devel"
|
14
|
+
eval File.read("Gemfile.devel"), nil, "Gemfile.devel" # rubocop:disable Security/Eval
|
15
15
|
end
|
data/README.adoc
CHANGED
@@ -3,9 +3,7 @@
|
|
3
3
|
https://github.com/metanorma/html2doc/workflows/main/badge.svg
|
4
4
|
|
5
5
|
image:https://img.shields.io/gem/v/html2doc.svg["Gem Version", link="https://rubygems.org/gems/html2doc"]
|
6
|
-
image:https://github.com/metanorma/html2doc/workflows/
|
7
|
-
image:https://github.com/metanorma/html2doc/workflows/macos/badge.svg["OSX Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Amacos"]
|
8
|
-
image:https://github.com/metanorma/html2doc/workflows/windows/badge.svg["Windows Build Status", link="https://github.com/metanorma/html2doc/actions?query=workflow%3Awindows"]
|
6
|
+
image:https://github.com/metanorma/html2doc/workflows/rake/badge.svg["Build Status", link="https://github.com/metanorma/html2doc/actions?workflow=rake"]
|
9
7
|
image:https://codeclimate.com/github/metanorma/html2doc/badges/gpa.svg["Code Climate", link="https://codeclimate.com/github/metanorma/html2doc"]
|
10
8
|
image:https://img.shields.io/github/issues-pr-raw/metanorma/html2doc.svg["Pull Requests", link="https://github.com/metanorma/html2doc/pulls"]
|
11
9
|
image:https://img.shields.io/github/commits-since/metanorma/html2doc/latest.svg["Commits since latest",link="https://github.com/metanorma/html2doc/releases"]
|
data/Rakefile
CHANGED
data/bin/html2doc
CHANGED
@@ -21,9 +21,8 @@ if ARGV.length < 1
|
|
21
21
|
end
|
22
22
|
|
23
23
|
Html2Doc.process(
|
24
|
-
File.read(ARGV[0], encoding: "utf-8"),
|
24
|
+
File.read(ARGV[0], encoding: "utf-8"),
|
25
25
|
filename: ARGV[0].gsub(/\.html?$/, ""),
|
26
26
|
stylesheet: options[:stylesheet],
|
27
|
-
header: options[:header]
|
27
|
+
header: options[:header]
|
28
28
|
)
|
29
|
-
|
data/html2doc.gemspec
CHANGED
@@ -28,10 +28,11 @@ Gem::Specification.new do |spec|
|
|
28
28
|
spec.add_dependency "htmlentities", "~> 4.3.4"
|
29
29
|
spec.add_dependency "image_size"
|
30
30
|
spec.add_dependency "mime-types"
|
31
|
-
spec.add_dependency "nokogiri", "
|
31
|
+
spec.add_dependency "nokogiri", "~> 1.10.4"
|
32
32
|
spec.add_dependency "thread_safe"
|
33
33
|
spec.add_dependency "uuidtools"
|
34
|
-
spec.add_dependency "asciimath", "~> 2.0.
|
34
|
+
spec.add_dependency "asciimath", "~> 2.0.2"
|
35
|
+
spec.add_dependency "plane1converter", "~> 0.0.1"
|
35
36
|
|
36
37
|
spec.add_development_dependency "byebug", "~> 9.1"
|
37
38
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
@@ -39,7 +40,7 @@ Gem::Specification.new do |spec|
|
|
39
40
|
spec.add_development_dependency "guard-rspec", "~> 4.7"
|
40
41
|
spec.add_development_dependency "rake", "~> 12.0"
|
41
42
|
spec.add_development_dependency "rspec", "~> 3.6"
|
42
|
-
spec.add_development_dependency "rubocop", "
|
43
|
+
spec.add_development_dependency "rubocop", "~> 1.5.2"
|
43
44
|
spec.add_development_dependency "simplecov", "~> 0.15"
|
44
45
|
spec.add_development_dependency "timecop", "~> 0.9"
|
45
46
|
spec.add_development_dependency "rspec-match_fuzzy", "~> 0.1.3"
|
data/lib/html2doc/base.rb
CHANGED
@@ -2,8 +2,6 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
-
#require "xml/xslt"
|
6
|
-
require "pp"
|
7
5
|
require "fileutils"
|
8
6
|
|
9
7
|
module Html2Doc
|
@@ -19,16 +17,26 @@ module Html2Doc
|
|
19
17
|
|
20
18
|
def self.process_header(headerfile, hash)
|
21
19
|
return if headerfile.nil?
|
20
|
+
|
22
21
|
doc = File.read(headerfile, encoding: "utf-8")
|
23
|
-
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
|
22
|
+
doc = header_image_cleanup(doc, hash[:dir1], hash[:filename],
|
23
|
+
File.dirname(hash[:filename]))
|
24
24
|
File.open("#{hash[:dir1]}/header.html", "w:UTF-8") { |f| f.write(doc) }
|
25
25
|
end
|
26
26
|
|
27
|
+
def self.clear_dir(dir)
|
28
|
+
Dir.foreach(dir) do |f|
|
29
|
+
fn = File.join(dir, f)
|
30
|
+
File.delete(fn) if f != "." && f != ".."
|
31
|
+
end
|
32
|
+
dir
|
33
|
+
end
|
34
|
+
|
27
35
|
def self.create_dir(filename, dir)
|
28
|
-
|
36
|
+
dir and return clear_dir(dir)
|
29
37
|
dir = "#{filename}_files"
|
30
38
|
Dir.mkdir(dir) unless File.exists?(dir)
|
31
|
-
dir
|
39
|
+
clear_dir(dir)
|
32
40
|
end
|
33
41
|
|
34
42
|
def self.process_html(result, hash)
|
@@ -64,7 +72,7 @@ module Html2Doc
|
|
64
72
|
|
65
73
|
def self.to_xhtml(xml)
|
66
74
|
xml.gsub!(/<\?xml[^>]*>/, "")
|
67
|
-
unless /<!DOCTYPE /.match xml
|
75
|
+
unless /<!DOCTYPE /.match? xml
|
68
76
|
xml = '<!DOCTYPE html SYSTEM
|
69
77
|
"http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">' + xml
|
70
78
|
end
|
@@ -76,34 +84,34 @@ module Html2Doc
|
|
76
84
|
DOCTYPE
|
77
85
|
|
78
86
|
def self.from_xhtml(xml)
|
79
|
-
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
80
|
-
sub(DOCTYPE, "")
|
81
|
-
gsub(%{ />}, "/>")
|
87
|
+
xml.to_xml.sub(%{ xmlns="http://www.w3.org/1999/xhtml"}, "")
|
88
|
+
.sub(DOCTYPE, "")
|
89
|
+
.gsub(%{ />}, "/>")
|
82
90
|
end
|
83
91
|
|
84
|
-
def self.msword_fix(
|
92
|
+
def self.msword_fix(doc)
|
85
93
|
# brain damage in MSWord parser
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
|
97
|
-
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
94
|
+
doc.gsub!(%r{<span style="mso-special-character:footnote"/>},
|
95
|
+
'<span style="mso-special-character:footnote"></span>')
|
96
|
+
doc.gsub!(%r{<div style="mso-element:footnote-list"></div>},
|
97
|
+
'<div style="mso-element:footnote-list"/>')
|
98
|
+
doc.gsub!(%r{(<a style="mso-comment-reference:[^>/]+)/>}, "\\1></a>")
|
99
|
+
doc.gsub!(%r{<link rel="File-List"}, "<link rel=File-List")
|
100
|
+
doc.gsub!(%r{<meta http-equiv="Content-Type"},
|
101
|
+
"<meta http-equiv=Content-Type")
|
102
|
+
doc.gsub!(%r{></m:jc>}, "/>")
|
103
|
+
doc.gsub!(%r{></v:stroke>}, "/>")
|
104
|
+
doc.gsub!(%r{></v:f>}, "/>")
|
105
|
+
doc.gsub!(%r{></v:path>}, "/>")
|
106
|
+
doc.gsub!(%r{></o:lock>}, "/>")
|
107
|
+
doc.gsub!(%r{></v:imagedata>}, "/>")
|
108
|
+
doc.gsub!(%r{></w:wrap>}, "/>")
|
109
|
+
doc.gsub!(%r{&tab;|&tab;},
|
110
|
+
'<span style="mso-tab-count:1">  </span>')
|
111
|
+
doc.split(%r{(<m:oMath>|</m:oMath>)}).each_slice(4).map do |a|
|
103
112
|
a.size > 2 and a[2] = a[2].gsub(/>\s+</, "><")
|
104
113
|
a
|
105
114
|
end.join
|
106
|
-
r
|
107
115
|
end
|
108
116
|
|
109
117
|
PRINT_VIEW = <<~XML.freeze
|
@@ -122,26 +130,27 @@ module Html2Doc
|
|
122
130
|
def self.define_head1(docxml, dir)
|
123
131
|
docxml.xpath("//*[local-name() = 'head']").each do |h|
|
124
132
|
h.children.first.add_previous_sibling <<~XML
|
125
|
-
|
126
|
-
|
133
|
+
#{PRINT_VIEW}
|
134
|
+
<link rel="File-List" href="cid:filelist.xml"/>
|
127
135
|
XML
|
128
136
|
end
|
129
137
|
end
|
130
138
|
|
131
|
-
def self.filename_substitute(
|
132
|
-
if header_filename.nil?
|
133
|
-
|
134
|
-
|
135
|
-
|
139
|
+
def self.filename_substitute(head, header_filename)
|
140
|
+
return if header_filename.nil?
|
141
|
+
|
142
|
+
head.xpath(".//*[local-name() = 'style']").each do |s|
|
143
|
+
s1 = s.to_xml.gsub(/url\("[^"]+"\)/) do |m|
|
144
|
+
/FILENAME/.match?(m) ? "url(cid:header.html)" : m
|
145
|
+
end
|
146
|
+
s.replace(s1)
|
136
147
|
end
|
137
|
-
stylesheet
|
138
148
|
end
|
139
149
|
|
140
150
|
def self.stylesheet(filename, header_filename, fn)
|
141
|
-
(fn.nil? || fn.empty?)
|
151
|
+
(fn.nil? || fn.empty?) and
|
142
152
|
fn = File.join(File.dirname(__FILE__), "wordstyle.css")
|
143
153
|
stylesheet = File.read(fn, encoding: "UTF-8")
|
144
|
-
stylesheet = filename_substitute(stylesheet, header_filename, filename)
|
145
154
|
xml = Nokogiri::XML("<style/>")
|
146
155
|
xml.children.first << Nokogiri::XML::Comment.new(xml, "\n#{stylesheet}\n")
|
147
156
|
xml.root.to_s
|
@@ -152,6 +161,7 @@ module Html2Doc
|
|
152
161
|
head = docxml.at("//*[local-name() = 'head']")
|
153
162
|
css = stylesheet(hash[:filename], hash[:header_file], hash[:stylesheet])
|
154
163
|
add_stylesheet(head, title, css)
|
164
|
+
filename_substitute(head, hash[:header_file])
|
155
165
|
define_head1(docxml, hash[:dir1])
|
156
166
|
rootnamespace(docxml.root)
|
157
167
|
end
|
@@ -180,13 +190,13 @@ module Html2Doc
|
|
180
190
|
end
|
181
191
|
|
182
192
|
def self.bookmarks(docxml)
|
183
|
-
docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
|
184
|
-
|
185
|
-
next if
|
186
|
-
|
187
|
-
|
188
|
-
|
189
|
-
|
193
|
+
docxml.xpath("//*[@id][not(@name)][not(@style = 'mso-element:footnote')]")
|
194
|
+
.each do |x|
|
195
|
+
next if x["id"].empty? ||
|
196
|
+
%w(shapetype v:shapetype shape v:shape).include?(x.name)
|
197
|
+
|
198
|
+
if x.children.empty? then x.add_child("<a name='#{x['id']}'></a>")
|
199
|
+
else x.children.first.previous = "<a name='#{x['id']}'></a>"
|
190
200
|
end
|
191
201
|
x.delete("id")
|
192
202
|
end
|
data/lib/html2doc/lists.rb
CHANGED
@@ -7,6 +7,7 @@ require "uuidtools"
|
|
7
7
|
module Html2Doc
|
8
8
|
def self.style_list(li, level, liststyle, listnumber)
|
9
9
|
return unless liststyle
|
10
|
+
|
10
11
|
if li["style"]
|
11
12
|
li["style"] += ";"
|
12
13
|
else
|
@@ -16,37 +17,39 @@ module Html2Doc
|
|
16
17
|
end
|
17
18
|
|
18
19
|
def self.list_add1(li, liststyles, listtype, level)
|
19
|
-
if [
|
20
|
-
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
20
|
+
if %i[ul ol].include? listtype
|
21
|
+
list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
|
22
|
+
liststyles, :ul, level + 1)
|
23
|
+
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
24
|
+
liststyles, :ol, level + 1)
|
25
|
+
else
|
26
|
+
list_add(li.xpath(".//ul") - li.xpath(".//ul//ul | .//ol//ul"),
|
27
|
+
liststyles, listtype, level + 1)
|
28
|
+
list_add(li.xpath(".//ol") - li.xpath(".//ul//ol | .//ol//ol"),
|
29
|
+
liststyles, listtype, level + 1)
|
30
|
+
end
|
30
31
|
end
|
31
32
|
|
32
33
|
def self.list_add(xpath, liststyles, listtype, level)
|
33
|
-
xpath.each_with_index do |
|
34
|
+
xpath.each_with_index do |l, _i|
|
34
35
|
@listnumber += 1 if level == 1
|
35
|
-
|
36
|
-
|
37
|
-
(
|
36
|
+
l["seen"] = true if level == 1
|
37
|
+
l["id"] ||= UUIDTools::UUID.random_create
|
38
|
+
(l.xpath(".//li") - l.xpath(".//ol//li | .//ul//li")).each do |li|
|
38
39
|
style_list(li, level, liststyles[listtype], @listnumber)
|
39
40
|
list_add1(li, liststyles, listtype, level)
|
40
41
|
end
|
41
|
-
|
42
|
-
|
43
|
-
|
42
|
+
l.xpath(".//ul[not(ancestor::li/ancestor::*/@id = '#{l['id']}')] | "\
|
43
|
+
".//ol[not(ancestor::li/ancestor::*/@id = '#{l['id']}')]")
|
44
|
+
.each do |li|
|
45
|
+
list_add1(li.parent, liststyles, listtype, level - 1)
|
44
46
|
end
|
45
47
|
end
|
46
48
|
end
|
47
49
|
|
48
50
|
def self.list2para(u)
|
49
51
|
return if u.xpath("./li").empty?
|
52
|
+
|
50
53
|
u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
|
51
54
|
u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
|
52
55
|
u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
|
@@ -64,21 +67,25 @@ module Html2Doc
|
|
64
67
|
def self.lists1(docxml, liststyles, k)
|
65
68
|
case k
|
66
69
|
when :ul then list_add(docxml.xpath("//ul[not(@class)]#{TOPLIST}"),
|
67
|
-
|
70
|
+
liststyles, :ul, 1)
|
68
71
|
when :ol then list_add(docxml.xpath("//ol[not(@class)]#{TOPLIST}"),
|
69
72
|
liststyles, :ol, 1)
|
70
73
|
else
|
71
|
-
list_add(docxml.xpath("//ol[@class = '#{k
|
72
|
-
"//ul[@class = '#{k
|
74
|
+
list_add(docxml.xpath("//ol[@class = '#{k}']#{TOPLIST} | "\
|
75
|
+
"//ul[@class = '#{k}']#{TOPLIST}"),
|
73
76
|
liststyles, k, 1)
|
74
77
|
end
|
75
78
|
end
|
76
79
|
|
77
80
|
def self.lists_unstyled(docxml, liststyles)
|
78
|
-
|
79
|
-
|
80
|
-
|
81
|
-
|
81
|
+
if liststyles.has_key?(:ul)
|
82
|
+
list_add(docxml.xpath("//ul#{TOPLIST}[not(@seen)]"),
|
83
|
+
liststyles, :ul, 1)
|
84
|
+
end
|
85
|
+
if liststyles.has_key?(:ol)
|
86
|
+
list_add(docxml.xpath("//ol#{TOPLIST}[not(@seen)]"),
|
87
|
+
liststyles, :ul, 1)
|
88
|
+
end
|
82
89
|
docxml.xpath("//ul[@seen] | //ol[@seen]").each do |l|
|
83
90
|
l.delete("seen")
|
84
91
|
end
|
@@ -86,6 +93,7 @@ module Html2Doc
|
|
86
93
|
|
87
94
|
def self.lists(docxml, liststyles)
|
88
95
|
return if liststyles.nil?
|
96
|
+
|
89
97
|
@listnumber = 0
|
90
98
|
liststyles.each_key { |k| lists1(docxml, liststyles, k) }
|
91
99
|
lists_unstyled(docxml, liststyles)
|
data/lib/html2doc/math.rb
CHANGED
@@ -2,20 +2,27 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
+
require "plane1converter"
|
5
6
|
|
6
7
|
module Html2Doc
|
7
8
|
@xsltemplate =
|
8
9
|
Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"),
|
9
10
|
encoding: "utf-8"))
|
10
11
|
|
11
|
-
def self.asciimath_to_mathml1(
|
12
|
-
AsciiMath::MathMLBuilder.new(:
|
13
|
-
AsciiMath.parse(HTMLEntities.new.decode(
|
14
|
-
|
12
|
+
def self.asciimath_to_mathml1(expr)
|
13
|
+
AsciiMath::MathMLBuilder.new(msword: true).append_expression(
|
14
|
+
AsciiMath.parse(HTMLEntities.new.decode(expr)).ast,
|
15
|
+
).to_s
|
16
|
+
.gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>")
|
17
|
+
rescue StandardError => e
|
18
|
+
puts "parsing: #{expr}"
|
19
|
+
puts e.message
|
20
|
+
raise e
|
15
21
|
end
|
16
22
|
|
17
23
|
def self.asciimath_to_mathml(doc, delims)
|
18
24
|
return doc if delims.nil? || delims.size < 2
|
25
|
+
|
19
26
|
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
20
27
|
m.each_slice(4).map.with_index do |(*a), i|
|
21
28
|
i % 500 == 0 && m.size > 1000 && i > 0 and
|
@@ -25,43 +32,96 @@ module Html2Doc
|
|
25
32
|
end.join
|
26
33
|
end
|
27
34
|
|
35
|
+
def self.unwrap_accents(doc)
|
36
|
+
doc.xpath("//*[@accent = 'true']").each do |x|
|
37
|
+
x.elements.length > 1 or next
|
38
|
+
x.elements[1].name == "mrow" and
|
39
|
+
x.elements[1].replace(x.elements[1].children)
|
40
|
+
end
|
41
|
+
doc
|
42
|
+
end
|
43
|
+
|
28
44
|
# random fixes to MathML input that OOXML needs to render properly
|
29
|
-
def self.ooxml_cleanup(
|
30
|
-
|
31
|
-
|
32
|
-
|
45
|
+
def self.ooxml_cleanup(math, docnamespaces)
|
46
|
+
math = unwrap_accents(
|
47
|
+
mathml_preserve_space(
|
48
|
+
mathml_insert_rows(math, docnamespaces), docnamespaces
|
49
|
+
),
|
50
|
+
)
|
51
|
+
math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
|
52
|
+
math
|
33
53
|
end
|
34
54
|
|
35
|
-
def self.mathml_insert_rows(
|
36
|
-
|
37
|
-
map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
|
55
|
+
def self.mathml_insert_rows(math, docnamespaces)
|
56
|
+
math.xpath(%w(msup msub msubsup munder mover munderover)
|
57
|
+
.map { |m| ".//xmlns:#{m}" }.join(" | "), docnamespaces).each do |x|
|
38
58
|
next unless x.next_element && x.next_element != "mrow"
|
59
|
+
|
39
60
|
x.next_element.wrap("<mrow/>")
|
40
61
|
end
|
41
|
-
|
62
|
+
math
|
42
63
|
end
|
43
64
|
|
44
|
-
def self.mathml_preserve_space(
|
45
|
-
|
65
|
+
def self.mathml_preserve_space(math, docnamespaces)
|
66
|
+
math.xpath(".//xmlns:mtext", docnamespaces).each do |x|
|
46
67
|
x.children = x.children.to_xml.gsub(/^\s/, " ").gsub(/\s$/, " ")
|
47
68
|
end
|
48
|
-
|
69
|
+
math
|
49
70
|
end
|
50
71
|
|
51
|
-
def self.unitalic(
|
52
|
-
|
72
|
+
def self.unitalic(math)
|
73
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
|
53
74
|
x.wrap("<span style='font-style:normal;'></span>")
|
54
75
|
end
|
55
|
-
|
56
|
-
x.wrap("<span style='font-
|
76
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
|
77
|
+
x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
|
57
78
|
end
|
58
|
-
|
79
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
|
59
80
|
x.wrap("<span class='nostem'><em></em></span>")
|
60
81
|
end
|
61
|
-
|
82
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
|
62
83
|
x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
|
63
84
|
end
|
64
|
-
m
|
85
|
+
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
|
86
|
+
to_plane1(x, :monospace)
|
87
|
+
end
|
88
|
+
math.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
|
89
|
+
to_plane1(x, :doublestruck)
|
90
|
+
end
|
91
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
|
92
|
+
to_plane1(x, :script)
|
93
|
+
end
|
94
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
|
95
|
+
to_plane1(x, :scriptbold)
|
96
|
+
end
|
97
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
98
|
+
to_plane1(x, :fraktur)
|
99
|
+
end
|
100
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
101
|
+
to_plane1(x, :frakturbold)
|
102
|
+
end
|
103
|
+
math.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
104
|
+
to_plane1(x, :sans)
|
105
|
+
end
|
106
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
107
|
+
to_plane1(x, :sansbold)
|
108
|
+
end
|
109
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
110
|
+
to_plane1(x, :sansitalic)
|
111
|
+
end
|
112
|
+
math.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
113
|
+
to_plane1(x, :sansbolditalic)
|
114
|
+
end
|
115
|
+
math
|
116
|
+
end
|
117
|
+
|
118
|
+
def self.to_plane1(xml, font)
|
119
|
+
xml.traverse do |n|
|
120
|
+
next unless n.text?
|
121
|
+
|
122
|
+
n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
|
123
|
+
end
|
124
|
+
xml
|
65
125
|
end
|
66
126
|
|
67
127
|
def self.mathml_to_ooml(docxml)
|
@@ -71,22 +131,23 @@ module Html2Doc
|
|
71
131
|
i % 100 == 0 && m.size > 500 && i > 0 and
|
72
132
|
warn "Math OOXML #{i} of #{m.size}"
|
73
133
|
element = ooxml_cleanup(x, docnamespaces)
|
74
|
-
doc = Nokogiri::XML::Document::new
|
134
|
+
doc = Nokogiri::XML::Document::new
|
75
135
|
doc.root = element
|
76
|
-
ooxml =
|
77
|
-
gsub(/<\?[^>]+>\s*/, "")
|
78
|
-
gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
79
|
-
gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
136
|
+
ooxml = unitalic(esc_space(@xsltemplate.transform(doc))).to_s
|
137
|
+
.gsub(/<\?[^>]+>\s*/, "")
|
138
|
+
.gsub(/ xmlns(:[^=]+)?="[^"]+"/, "")
|
139
|
+
.gsub(%r{<(/)?(?!span)(?!em)([a-z])}, "<\\1m:\\2")
|
80
140
|
ooxml = uncenter(x, ooxml)
|
81
141
|
x.swap(ooxml)
|
82
142
|
end
|
83
143
|
end
|
84
144
|
|
85
|
-
# escape space as 2; we are removing any spaces generated by
|
145
|
+
# escape space as 2; we are removing any spaces generated by
|
86
146
|
# XML indentation
|
87
147
|
def self.esc_space(xml)
|
88
148
|
xml.traverse do |n|
|
89
149
|
next unless n.text?
|
150
|
+
|
90
151
|
n = n.text.gsub(/ /, "2")
|
91
152
|
end
|
92
153
|
xml
|
@@ -94,17 +155,15 @@ module Html2Doc
|
|
94
155
|
|
95
156
|
# if oomml has no siblings, by default it is centered; override this with
|
96
157
|
# left/right if parent is so tagged
|
97
|
-
def self.uncenter(
|
98
|
-
|
99
|
-
|
100
|
-
|
101
|
-
|
102
|
-
|
103
|
-
|
104
|
-
"m:val='left'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
|
105
|
-
elsif alignnode.text.include? ("text-align:right")
|
158
|
+
def self.uncenter(math, ooxml)
|
159
|
+
alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
|
160
|
+
"local-name() = 'div' or local-name() = 'td']/@style")
|
161
|
+
return ooxml unless alignnode && (math.next == nil && math.previous == nil)
|
162
|
+
|
163
|
+
%w(left right).each do |dir|
|
164
|
+
if alignnode.text.include? ("text-align:#{dir}")
|
106
165
|
ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
|
107
|
-
"m:val='
|
166
|
+
"m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
|
108
167
|
end
|
109
168
|
end
|
110
169
|
ooxml
|