html2doc 0.8.8 → 0.8.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -5
- data/lib/html2doc/base.rb +1 -1
- data/lib/html2doc/lists.rb +2 -3
- data/lib/html2doc/math.rb +14 -10
- data/lib/html2doc/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 825c44453c97ff2f83cdd4099e293b8ce78642139a56bed33488e05dfaa22d5f
|
4
|
+
data.tar.gz: e3643c17f4c31ad1b202df404e45c1b00f52591eefb52779369496bc573819ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6dff48ab65903fac3b334738bc340a044c0a16237aa1980b8151f8468796ed25222dd2f88259ce64cd604788a461f87f2425f2f0ef5e7b4d11e7c7b34be9b333
|
7
|
+
data.tar.gz: d8fa57c7f1d34798bf2865a728a99df6634db4dd793bc4fae9feebcb13dab0a408b265bd73975043f02f6e0ba94e726aae259efe59eadc94d0a5ad699e0c12e5
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2doc (0.8.
|
4
|
+
html2doc (0.8.9)
|
5
5
|
asciimath (~> 1.0.7)
|
6
6
|
htmlentities (~> 4.3.4)
|
7
7
|
image_size
|
@@ -14,7 +14,7 @@ PATH
|
|
14
14
|
GEM
|
15
15
|
remote: https://rubygems.org/
|
16
16
|
specs:
|
17
|
-
asciimath (1.0.
|
17
|
+
asciimath (1.0.8)
|
18
18
|
ast (2.4.0)
|
19
19
|
byebug (9.1.0)
|
20
20
|
coderay (1.1.2)
|
@@ -52,13 +52,13 @@ GEM
|
|
52
52
|
mime-types-data (3.2018.0812)
|
53
53
|
mini_portile2 (2.4.0)
|
54
54
|
nenv (0.3.0)
|
55
|
-
nokogiri (1.10.
|
55
|
+
nokogiri (1.10.1)
|
56
56
|
mini_portile2 (~> 2.4.0)
|
57
57
|
notiffany (0.1.1)
|
58
58
|
nenv (~> 0.1)
|
59
59
|
shellany (~> 0.0)
|
60
|
-
parallel (1.
|
61
|
-
parser (2.
|
60
|
+
parallel (1.13.0)
|
61
|
+
parser (2.6.0.0)
|
62
62
|
ast (~> 2.4.0)
|
63
63
|
powerpack (0.1.2)
|
64
64
|
pry (0.12.2)
|
data/lib/html2doc/base.rb
CHANGED
data/lib/html2doc/lists.rb
CHANGED
@@ -2,7 +2,6 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
-
require "xml/xslt"
|
6
5
|
require "pp"
|
7
6
|
|
8
7
|
module Html2Doc
|
@@ -29,8 +28,8 @@ module Html2Doc
|
|
29
28
|
|
30
29
|
def self.list2para(u)
|
31
30
|
return if u.xpath("./li").empty?
|
32
|
-
u.xpath("./li").
|
33
|
-
u.xpath("./li").
|
31
|
+
u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
|
32
|
+
u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
|
34
33
|
u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
|
35
34
|
u.xpath("./li").each do |l|
|
36
35
|
l.name = "p"
|
data/lib/html2doc/math.rb
CHANGED
@@ -3,12 +3,12 @@ require "asciimath"
|
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
5
|
require "xml/xslt"
|
6
|
-
require "pp"
|
7
6
|
|
8
7
|
module Html2Doc
|
9
8
|
@xslt = XML::XSLT.new
|
10
|
-
|
9
|
+
@xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mathml2omml.xsl"))
|
11
10
|
@xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8")
|
11
|
+
@xsltemplate = Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8"))
|
12
12
|
|
13
13
|
def self.asciimath_to_mathml1(x)
|
14
14
|
AsciiMath.parse(HTMLEntities.new.decode(x)).to_mathml.
|
@@ -17,17 +17,18 @@ module Html2Doc
|
|
17
17
|
|
18
18
|
def self.asciimath_to_mathml(doc, delims)
|
19
19
|
return doc if delims.nil? || delims.size < 2
|
20
|
-
doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
21
|
-
|
20
|
+
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
21
|
+
m.each_slice(4).map.with_index do |(*a), i|
|
22
|
+
warn "MathML #{i} of #{(m.size / 4).floor}" if i % 500 == 0 && m.size > 1000 && i > 0
|
22
23
|
a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
|
23
24
|
a.size > 1 ? a[0] + a[2] : a[0]
|
24
25
|
end.join
|
25
26
|
end
|
26
27
|
|
27
28
|
# random fixes to MathML input that OOXML needs to render properly
|
28
|
-
def self.ooxml_cleanup(m)
|
29
|
+
def self.ooxml_cleanup(m, docnamespaces)
|
29
30
|
m.xpath(".//xmlns:msup[name(preceding-sibling::*[1])='munderover']",
|
30
|
-
|
31
|
+
docnamespaces).each do |x|
|
31
32
|
x1 = x.replace("<mrow></mrow>").first
|
32
33
|
x1.children = x
|
33
34
|
end
|
@@ -36,13 +37,16 @@ module Html2Doc
|
|
36
37
|
end
|
37
38
|
|
38
39
|
def self.mathml_to_ooml(docxml)
|
39
|
-
|
40
|
-
|
40
|
+
docnamespaces = docxml.collect_namespaces
|
41
|
+
m = docxml.xpath("//*[local-name() = 'math']")
|
42
|
+
m.each_with_index do |x, i|
|
43
|
+
warn "Math OOXML #{i} of #{m.size}" if i % 100 == 0 && m.size > 500 && i > 0
|
44
|
+
@xslt.xml = ooxml_cleanup(x, docnamespaces)
|
41
45
|
ooxml = @xslt.serve.gsub(/<\?[^>]+>\s*/, "").
|
42
46
|
gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
|
43
47
|
gsub(%r{<(/)?([a-z])}, "<\\1m:\\2")
|
44
|
-
ooxml = uncenter(
|
45
|
-
|
48
|
+
ooxml = uncenter(x, ooxml)
|
49
|
+
x.swap(ooxml)
|
46
50
|
end
|
47
51
|
end
|
48
52
|
|
data/lib/html2doc/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|