html2doc 0.8.8 → 0.8.9

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c88de00bdeb2cbb88460c403e03cd10d623fb1152371129a6b9cf90c2f664a02
4
- data.tar.gz: 0c61af5fa5eb93dcc4730328055b0914dfe03de8794909779c6d99e1308a0a22
3
+ metadata.gz: 825c44453c97ff2f83cdd4099e293b8ce78642139a56bed33488e05dfaa22d5f
4
+ data.tar.gz: e3643c17f4c31ad1b202df404e45c1b00f52591eefb52779369496bc573819ac
5
5
  SHA512:
6
- metadata.gz: 73ddb8e6c7e4505df3127737c4302a364c1ade83b18c55274c7e0bc34a3640cbc5e4d02cd94eca0cd72eb6f2c505ee6b701388eaadbb1b45969d543df15b778d
7
- data.tar.gz: 15556cf840a5fe4de804e5de8d0eb0f23d470d1a7e620814e8e0a15239e8efba761be31601ea5d1a30cf806c9740eabe2ac9a8ae1382ebad158562a7c1081420
6
+ metadata.gz: 6dff48ab65903fac3b334738bc340a044c0a16237aa1980b8151f8468796ed25222dd2f88259ce64cd604788a461f87f2425f2f0ef5e7b4d11e7c7b34be9b333
7
+ data.tar.gz: d8fa57c7f1d34798bf2865a728a99df6634db4dd793bc4fae9feebcb13dab0a408b265bd73975043f02f6e0ba94e726aae259efe59eadc94d0a5ad699e0c12e5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2doc (0.8.8)
4
+ html2doc (0.8.9)
5
5
  asciimath (~> 1.0.7)
6
6
  htmlentities (~> 4.3.4)
7
7
  image_size
@@ -14,7 +14,7 @@ PATH
14
14
  GEM
15
15
  remote: https://rubygems.org/
16
16
  specs:
17
- asciimath (1.0.7)
17
+ asciimath (1.0.8)
18
18
  ast (2.4.0)
19
19
  byebug (9.1.0)
20
20
  coderay (1.1.2)
@@ -52,13 +52,13 @@ GEM
52
52
  mime-types-data (3.2018.0812)
53
53
  mini_portile2 (2.4.0)
54
54
  nenv (0.3.0)
55
- nokogiri (1.10.0)
55
+ nokogiri (1.10.1)
56
56
  mini_portile2 (~> 2.4.0)
57
57
  notiffany (0.1.1)
58
58
  nenv (~> 0.1)
59
59
  shellany (~> 0.0)
60
- parallel (1.12.1)
61
- parser (2.5.3.0)
60
+ parallel (1.13.0)
61
+ parser (2.6.0.0)
62
62
  ast (~> 2.4.0)
63
63
  powerpack (0.1.2)
64
64
  pry (0.12.2)
data/lib/html2doc/base.rb CHANGED
@@ -2,7 +2,7 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "xml/xslt"
5
+ #require "xml/xslt"
6
6
  require "pp"
7
7
  require "fileutils"
8
8
 
@@ -2,7 +2,6 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "xml/xslt"
6
5
  require "pp"
7
6
 
8
7
  module Html2Doc
@@ -29,8 +28,8 @@ module Html2Doc
29
28
 
30
29
  def self.list2para(u)
31
30
  return if u.xpath("./li").empty?
32
- u.xpath("./li").last["class"] = "MsoListParagraphCxSpLast"
33
- u.xpath("./li").first["class"] = "MsoListParagraphCxSpFirst"
31
+ u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
32
+ u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
34
33
  u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
35
34
  u.xpath("./li").each do |l|
36
35
  l.name = "p"
data/lib/html2doc/math.rb CHANGED
@@ -3,12 +3,12 @@ require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
5
  require "xml/xslt"
6
- require "pp"
7
6
 
8
7
  module Html2Doc
9
8
  @xslt = XML::XSLT.new
10
- #@xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mathml2omml.xsl"))
9
+ @xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mathml2omml.xsl"))
11
10
  @xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8")
11
+ @xsltemplate = Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8"))
12
12
 
13
13
  def self.asciimath_to_mathml1(x)
14
14
  AsciiMath.parse(HTMLEntities.new.decode(x)).to_mathml.
@@ -17,17 +17,18 @@ module Html2Doc
17
17
 
18
18
  def self.asciimath_to_mathml(doc, delims)
19
19
  return doc if delims.nil? || delims.size < 2
20
- doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/).
21
- each_slice(4).map do |a|
20
+ m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
21
+ m.each_slice(4).map.with_index do |(*a), i|
22
+ warn "MathML #{i} of #{(m.size / 4).floor}" if i % 500 == 0 && m.size > 1000 && i > 0
22
23
  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
23
24
  a.size > 1 ? a[0] + a[2] : a[0]
24
25
  end.join
25
26
  end
26
27
 
27
28
  # random fixes to MathML input that OOXML needs to render properly
28
- def self.ooxml_cleanup(m)
29
+ def self.ooxml_cleanup(m, docnamespaces)
29
30
  m.xpath(".//xmlns:msup[name(preceding-sibling::*[1])='munderover']",
30
- m.document.collect_namespaces).each do |x|
31
+ docnamespaces).each do |x|
31
32
  x1 = x.replace("<mrow></mrow>").first
32
33
  x1.children = x
33
34
  end
@@ -36,13 +37,16 @@ module Html2Doc
36
37
  end
37
38
 
38
39
  def self.mathml_to_ooml(docxml)
39
- docxml.xpath("//*[local-name() = 'math']").each do |m|
40
- @xslt.xml = ooxml_cleanup(m)
40
+ docnamespaces = docxml.collect_namespaces
41
+ m = docxml.xpath("//*[local-name() = 'math']")
42
+ m.each_with_index do |x, i|
43
+ warn "Math OOXML #{i} of #{m.size}" if i % 100 == 0 && m.size > 500 && i > 0
44
+ @xslt.xml = ooxml_cleanup(x, docnamespaces)
41
45
  ooxml = @xslt.serve.gsub(/<\?[^>]+>\s*/, "").
42
46
  gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
43
47
  gsub(%r{<(/)?([a-z])}, "<\\1m:\\2")
44
- ooxml = uncenter(m, ooxml)
45
- m.swap(ooxml)
48
+ ooxml = uncenter(x, ooxml)
49
+ x.swap(ooxml)
46
50
  end
47
51
  end
48
52
 
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "0.8.8".freeze
2
+ VERSION = "0.8.9".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.8
4
+ version: 0.8.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-11 00:00:00.000000000 Z
11
+ date: 2019-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities