html2doc 0.8.8 → 0.8.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +5 -5
- data/lib/html2doc/base.rb +1 -1
- data/lib/html2doc/lists.rb +2 -3
- data/lib/html2doc/math.rb +14 -10
- data/lib/html2doc/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 825c44453c97ff2f83cdd4099e293b8ce78642139a56bed33488e05dfaa22d5f
|
4
|
+
data.tar.gz: e3643c17f4c31ad1b202df404e45c1b00f52591eefb52779369496bc573819ac
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 6dff48ab65903fac3b334738bc340a044c0a16237aa1980b8151f8468796ed25222dd2f88259ce64cd604788a461f87f2425f2f0ef5e7b4d11e7c7b34be9b333
|
7
|
+
data.tar.gz: d8fa57c7f1d34798bf2865a728a99df6634db4dd793bc4fae9feebcb13dab0a408b265bd73975043f02f6e0ba94e726aae259efe59eadc94d0a5ad699e0c12e5
|
data/Gemfile.lock
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
PATH
|
2
2
|
remote: .
|
3
3
|
specs:
|
4
|
-
html2doc (0.8.
|
4
|
+
html2doc (0.8.9)
|
5
5
|
asciimath (~> 1.0.7)
|
6
6
|
htmlentities (~> 4.3.4)
|
7
7
|
image_size
|
@@ -14,7 +14,7 @@ PATH
|
|
14
14
|
GEM
|
15
15
|
remote: https://rubygems.org/
|
16
16
|
specs:
|
17
|
-
asciimath (1.0.
|
17
|
+
asciimath (1.0.8)
|
18
18
|
ast (2.4.0)
|
19
19
|
byebug (9.1.0)
|
20
20
|
coderay (1.1.2)
|
@@ -52,13 +52,13 @@ GEM
|
|
52
52
|
mime-types-data (3.2018.0812)
|
53
53
|
mini_portile2 (2.4.0)
|
54
54
|
nenv (0.3.0)
|
55
|
-
nokogiri (1.10.
|
55
|
+
nokogiri (1.10.1)
|
56
56
|
mini_portile2 (~> 2.4.0)
|
57
57
|
notiffany (0.1.1)
|
58
58
|
nenv (~> 0.1)
|
59
59
|
shellany (~> 0.0)
|
60
|
-
parallel (1.
|
61
|
-
parser (2.
|
60
|
+
parallel (1.13.0)
|
61
|
+
parser (2.6.0.0)
|
62
62
|
ast (~> 2.4.0)
|
63
63
|
powerpack (0.1.2)
|
64
64
|
pry (0.12.2)
|
data/lib/html2doc/base.rb
CHANGED
data/lib/html2doc/lists.rb
CHANGED
@@ -2,7 +2,6 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
-
require "xml/xslt"
|
6
5
|
require "pp"
|
7
6
|
|
8
7
|
module Html2Doc
|
@@ -29,8 +28,8 @@ module Html2Doc
|
|
29
28
|
|
30
29
|
def self.list2para(u)
|
31
30
|
return if u.xpath("./li").empty?
|
32
|
-
u.xpath("./li").
|
33
|
-
u.xpath("./li").
|
31
|
+
u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
|
32
|
+
u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
|
34
33
|
u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
|
35
34
|
u.xpath("./li").each do |l|
|
36
35
|
l.name = "p"
|
data/lib/html2doc/math.rb
CHANGED
@@ -3,12 +3,12 @@ require "asciimath"
|
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
5
|
require "xml/xslt"
|
6
|
-
require "pp"
|
7
6
|
|
8
7
|
module Html2Doc
|
9
8
|
@xslt = XML::XSLT.new
|
10
|
-
|
9
|
+
@xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mathml2omml.xsl"))
|
11
10
|
@xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8")
|
11
|
+
@xsltemplate = Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8"))
|
12
12
|
|
13
13
|
def self.asciimath_to_mathml1(x)
|
14
14
|
AsciiMath.parse(HTMLEntities.new.decode(x)).to_mathml.
|
@@ -17,17 +17,18 @@ module Html2Doc
|
|
17
17
|
|
18
18
|
def self.asciimath_to_mathml(doc, delims)
|
19
19
|
return doc if delims.nil? || delims.size < 2
|
20
|
-
doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
21
|
-
|
20
|
+
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
21
|
+
m.each_slice(4).map.with_index do |(*a), i|
|
22
|
+
warn "MathML #{i} of #{(m.size / 4).floor}" if i % 500 == 0 && m.size > 1000 && i > 0
|
22
23
|
a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
|
23
24
|
a.size > 1 ? a[0] + a[2] : a[0]
|
24
25
|
end.join
|
25
26
|
end
|
26
27
|
|
27
28
|
# random fixes to MathML input that OOXML needs to render properly
|
28
|
-
def self.ooxml_cleanup(m)
|
29
|
+
def self.ooxml_cleanup(m, docnamespaces)
|
29
30
|
m.xpath(".//xmlns:msup[name(preceding-sibling::*[1])='munderover']",
|
30
|
-
|
31
|
+
docnamespaces).each do |x|
|
31
32
|
x1 = x.replace("<mrow></mrow>").first
|
32
33
|
x1.children = x
|
33
34
|
end
|
@@ -36,13 +37,16 @@ module Html2Doc
|
|
36
37
|
end
|
37
38
|
|
38
39
|
def self.mathml_to_ooml(docxml)
|
39
|
-
|
40
|
-
|
40
|
+
docnamespaces = docxml.collect_namespaces
|
41
|
+
m = docxml.xpath("//*[local-name() = 'math']")
|
42
|
+
m.each_with_index do |x, i|
|
43
|
+
warn "Math OOXML #{i} of #{m.size}" if i % 100 == 0 && m.size > 500 && i > 0
|
44
|
+
@xslt.xml = ooxml_cleanup(x, docnamespaces)
|
41
45
|
ooxml = @xslt.serve.gsub(/<\?[^>]+>\s*/, "").
|
42
46
|
gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
|
43
47
|
gsub(%r{<(/)?([a-z])}, "<\\1m:\\2")
|
44
|
-
ooxml = uncenter(
|
45
|
-
|
48
|
+
ooxml = uncenter(x, ooxml)
|
49
|
+
x.swap(ooxml)
|
46
50
|
end
|
47
51
|
end
|
48
52
|
|
data/lib/html2doc/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.8.
|
4
|
+
version: 0.8.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2019-01-
|
11
|
+
date: 2019-01-31 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|