html2doc 0.8.8 → 0.8.9

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: c88de00bdeb2cbb88460c403e03cd10d623fb1152371129a6b9cf90c2f664a02
4
- data.tar.gz: 0c61af5fa5eb93dcc4730328055b0914dfe03de8794909779c6d99e1308a0a22
3
+ metadata.gz: 825c44453c97ff2f83cdd4099e293b8ce78642139a56bed33488e05dfaa22d5f
4
+ data.tar.gz: e3643c17f4c31ad1b202df404e45c1b00f52591eefb52779369496bc573819ac
5
5
  SHA512:
6
- metadata.gz: 73ddb8e6c7e4505df3127737c4302a364c1ade83b18c55274c7e0bc34a3640cbc5e4d02cd94eca0cd72eb6f2c505ee6b701388eaadbb1b45969d543df15b778d
7
- data.tar.gz: 15556cf840a5fe4de804e5de8d0eb0f23d470d1a7e620814e8e0a15239e8efba761be31601ea5d1a30cf806c9740eabe2ac9a8ae1382ebad158562a7c1081420
6
+ metadata.gz: 6dff48ab65903fac3b334738bc340a044c0a16237aa1980b8151f8468796ed25222dd2f88259ce64cd604788a461f87f2425f2f0ef5e7b4d11e7c7b34be9b333
7
+ data.tar.gz: d8fa57c7f1d34798bf2865a728a99df6634db4dd793bc4fae9feebcb13dab0a408b265bd73975043f02f6e0ba94e726aae259efe59eadc94d0a5ad699e0c12e5
data/Gemfile.lock CHANGED
@@ -1,7 +1,7 @@
1
1
  PATH
2
2
  remote: .
3
3
  specs:
4
- html2doc (0.8.8)
4
+ html2doc (0.8.9)
5
5
  asciimath (~> 1.0.7)
6
6
  htmlentities (~> 4.3.4)
7
7
  image_size
@@ -14,7 +14,7 @@ PATH
14
14
  GEM
15
15
  remote: https://rubygems.org/
16
16
  specs:
17
- asciimath (1.0.7)
17
+ asciimath (1.0.8)
18
18
  ast (2.4.0)
19
19
  byebug (9.1.0)
20
20
  coderay (1.1.2)
@@ -52,13 +52,13 @@ GEM
52
52
  mime-types-data (3.2018.0812)
53
53
  mini_portile2 (2.4.0)
54
54
  nenv (0.3.0)
55
- nokogiri (1.10.0)
55
+ nokogiri (1.10.1)
56
56
  mini_portile2 (~> 2.4.0)
57
57
  notiffany (0.1.1)
58
58
  nenv (~> 0.1)
59
59
  shellany (~> 0.0)
60
- parallel (1.12.1)
61
- parser (2.5.3.0)
60
+ parallel (1.13.0)
61
+ parser (2.6.0.0)
62
62
  ast (~> 2.4.0)
63
63
  powerpack (0.1.2)
64
64
  pry (0.12.2)
data/lib/html2doc/base.rb CHANGED
@@ -2,7 +2,7 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "xml/xslt"
5
+ #require "xml/xslt"
6
6
  require "pp"
7
7
  require "fileutils"
8
8
 
@@ -2,7 +2,6 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
- require "xml/xslt"
6
5
  require "pp"
7
6
 
8
7
  module Html2Doc
@@ -29,8 +28,8 @@ module Html2Doc
29
28
 
30
29
  def self.list2para(u)
31
30
  return if u.xpath("./li").empty?
32
- u.xpath("./li").last["class"] = "MsoListParagraphCxSpLast"
33
- u.xpath("./li").first["class"] = "MsoListParagraphCxSpFirst"
31
+ u.xpath("./li").first["class"] ||= "MsoListParagraphCxSpFirst"
32
+ u.xpath("./li").last["class"] ||= "MsoListParagraphCxSpLast"
34
33
  u.xpath("./li/p").each { |p| p["class"] ||= "MsoListParagraphCxSpMiddle" }
35
34
  u.xpath("./li").each do |l|
36
35
  l.name = "p"
data/lib/html2doc/math.rb CHANGED
@@ -3,12 +3,12 @@ require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
5
  require "xml/xslt"
6
- require "pp"
7
6
 
8
7
  module Html2Doc
9
8
  @xslt = XML::XSLT.new
10
- #@xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mathml2omml.xsl"))
9
+ @xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mathml2omml.xsl"))
11
10
  @xslt.xsl = File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8")
11
+ @xsltemplate = Nokogiri::XSLT(File.read(File.join(File.dirname(__FILE__), "mml2omml.xsl"), encoding: "utf-8"))
12
12
 
13
13
  def self.asciimath_to_mathml1(x)
14
14
  AsciiMath.parse(HTMLEntities.new.decode(x)).to_mathml.
@@ -17,17 +17,18 @@ module Html2Doc
17
17
 
18
18
  def self.asciimath_to_mathml(doc, delims)
19
19
  return doc if delims.nil? || delims.size < 2
20
- doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/).
21
- each_slice(4).map do |a|
20
+ m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
21
+ m.each_slice(4).map.with_index do |(*a), i|
22
+ warn "MathML #{i} of #{(m.size / 4).floor}" if i % 500 == 0 && m.size > 1000 && i > 0
22
23
  a[2].nil? || a[2] = asciimath_to_mathml1(a[2])
23
24
  a.size > 1 ? a[0] + a[2] : a[0]
24
25
  end.join
25
26
  end
26
27
 
27
28
  # random fixes to MathML input that OOXML needs to render properly
28
- def self.ooxml_cleanup(m)
29
+ def self.ooxml_cleanup(m, docnamespaces)
29
30
  m.xpath(".//xmlns:msup[name(preceding-sibling::*[1])='munderover']",
30
- m.document.collect_namespaces).each do |x|
31
+ docnamespaces).each do |x|
31
32
  x1 = x.replace("<mrow></mrow>").first
32
33
  x1.children = x
33
34
  end
@@ -36,13 +37,16 @@ module Html2Doc
36
37
  end
37
38
 
38
39
  def self.mathml_to_ooml(docxml)
39
- docxml.xpath("//*[local-name() = 'math']").each do |m|
40
- @xslt.xml = ooxml_cleanup(m)
40
+ docnamespaces = docxml.collect_namespaces
41
+ m = docxml.xpath("//*[local-name() = 'math']")
42
+ m.each_with_index do |x, i|
43
+ warn "Math OOXML #{i} of #{m.size}" if i % 100 == 0 && m.size > 500 && i > 0
44
+ @xslt.xml = ooxml_cleanup(x, docnamespaces)
41
45
  ooxml = @xslt.serve.gsub(/<\?[^>]+>\s*/, "").
42
46
  gsub(/ xmlns(:[^=]+)?="[^"]+"/, "").
43
47
  gsub(%r{<(/)?([a-z])}, "<\\1m:\\2")
44
- ooxml = uncenter(m, ooxml)
45
- m.swap(ooxml)
48
+ ooxml = uncenter(x, ooxml)
49
+ x.swap(ooxml)
46
50
  end
47
51
  end
48
52
 
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "0.8.8".freeze
2
+ VERSION = "0.8.9".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.8.8
4
+ version: 0.8.9
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2019-01-11 00:00:00.000000000 Z
11
+ date: 2019-01-31 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities