html2doc 1.0.5 → 1.0.6
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/html2doc.gemspec +1 -0
- data/lib/html2doc/math.rb +55 -6
- data/lib/html2doc/mml2omml.xsl +9 -1
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +17 -2
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed67d44964092ea651bf40f1e7d4843d9ed84b9e2e3c9a8fe51c77e6ed1b4e8c
|
4
|
+
data.tar.gz: 2bc1662270ac499f54710568ae9ed31f78113556278a0f34510d30683a08b48a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5503cb2408a86644d3481b82002fba01f333a841ff0de0113cdd15de99984fd6aa458ba8af82e521d3fdce79d6f27aae69b9a760be66e85b8de874681710c36b
|
7
|
+
data.tar.gz: b48073035e3000fa1cfc9b3eabf1a5d3e5e2f79dd61aa20881b73cb186d75a69c01e8c7951016bf7a997ab6b7e5911ad2e69b845ecc0783b15aeaa54c6f4f55a
|
data/html2doc.gemspec
CHANGED
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_dependency "thread_safe"
|
33
33
|
spec.add_dependency "uuidtools"
|
34
34
|
spec.add_dependency "asciimath", "~> 2.0.0"
|
35
|
+
spec.add_dependency "plane1converter", "~> 0.0.1"
|
35
36
|
|
36
37
|
spec.add_development_dependency "byebug", "~> 9.1"
|
37
38
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
data/lib/html2doc/math.rb
CHANGED
@@ -2,6 +2,7 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
+
require "plane1converter"
|
5
6
|
|
6
7
|
module Html2Doc
|
7
8
|
@xsltemplate =
|
@@ -25,9 +26,19 @@ module Html2Doc
|
|
25
26
|
end.join
|
26
27
|
end
|
27
28
|
|
29
|
+
def self.unwrap_accents(doc)
|
30
|
+
doc.xpath("//*[@accent = 'true']").each do |x|
|
31
|
+
x.elements.length > 1 or next
|
32
|
+
x.elements[1].name == "mrow" and
|
33
|
+
x.elements[1].replace(x.elements[1].children)
|
34
|
+
end
|
35
|
+
doc
|
36
|
+
end
|
37
|
+
|
28
38
|
# random fixes to MathML input that OOXML needs to render properly
|
29
39
|
def self.ooxml_cleanup(m, docnamespaces)
|
30
|
-
m = mathml_preserve_space(
|
40
|
+
m = unwrap_accents(mathml_preserve_space(
|
41
|
+
mathml_insert_rows(m, docnamespaces), docnamespaces))
|
31
42
|
m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
|
32
43
|
m
|
33
44
|
end
|
@@ -49,21 +60,59 @@ module Html2Doc
|
|
49
60
|
end
|
50
61
|
|
51
62
|
def self.unitalic(m)
|
52
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'p']]").each do |x|
|
63
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
|
53
64
|
x.wrap("<span style='font-style:normal;'></span>")
|
54
65
|
end
|
55
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'bi']]").each do |x|
|
56
|
-
x.wrap("<span style='font-
|
66
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
|
67
|
+
x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
|
57
68
|
end
|
58
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'i']]").each do |x|
|
69
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
|
59
70
|
x.wrap("<span class='nostem'><em></em></span>")
|
60
71
|
end
|
61
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'b']]").each do |x|
|
72
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
|
62
73
|
x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
|
63
74
|
end
|
75
|
+
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
|
76
|
+
toPlane1(x, :monospace)
|
77
|
+
end
|
78
|
+
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
|
79
|
+
toPlane1(x, :doublestruck)
|
80
|
+
end
|
81
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
|
82
|
+
toPlane1(x, :script)
|
83
|
+
end
|
84
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
|
85
|
+
toPlane1(x, :scriptbold)
|
86
|
+
end
|
87
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
88
|
+
toPlane1(x, :fraktur)
|
89
|
+
end
|
90
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
91
|
+
toPlane1(x, :frakturbold)
|
92
|
+
end
|
93
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
94
|
+
toPlane1(x, :sans)
|
95
|
+
end
|
96
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
97
|
+
toPlane1(x, :sansbold)
|
98
|
+
end
|
99
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
100
|
+
toPlane1(x, :sansitalic)
|
101
|
+
end
|
102
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
103
|
+
toPlane1(x, :sansbolditalic)
|
104
|
+
end
|
64
105
|
m
|
65
106
|
end
|
66
107
|
|
108
|
+
def self.toPlane1(x, font)
|
109
|
+
x.traverse do |n|
|
110
|
+
next unless n.text?
|
111
|
+
n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
|
112
|
+
end
|
113
|
+
x
|
114
|
+
end
|
115
|
+
|
67
116
|
def self.mathml_to_ooml(docxml)
|
68
117
|
docnamespaces = docxml.collect_namespaces
|
69
118
|
m = docxml.xpath("//*[local-name() = 'math']")
|
data/lib/html2doc/mml2omml.xsl
CHANGED
@@ -1087,8 +1087,16 @@
|
|
1087
1087
|
<xsl:attribute name="m:val">bi</xsl:attribute>
|
1088
1088
|
</sty>
|
1089
1089
|
</xsl:when>
|
1090
|
-
<xsl:when test="$font='monospace'"
|
1090
|
+
<xsl:when test="$font='monospace'">
|
1091
1091
|
<!-- We can't do monospace, so leave empty -->
|
1092
|
+
<!-- NN 2020 https://github.com/metanorma/html2doc/issues/47 no, we will -->
|
1093
|
+
<scr>
|
1094
|
+
<xsl:attribute name="m:val">monospace</xsl:attribute>
|
1095
|
+
</scr>
|
1096
|
+
<sty>
|
1097
|
+
<xsl:attribute name="m:val">p</xsl:attribute>
|
1098
|
+
</sty>
|
1099
|
+
</xsl:when>
|
1092
1100
|
<xsl:when test="$font='bold'">
|
1093
1101
|
<sty>
|
1094
1102
|
<xsl:attribute name="m:val">b</xsl:attribute>
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -382,13 +382,13 @@ RSpec.describe Html2Doc do
|
|
382
382
|
end
|
383
383
|
|
384
384
|
it "processes mstyle" do
|
385
|
-
Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "
|
385
|
+
Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
|
386
386
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
387
387
|
to match_fuzzy(<<~OUTPUT)
|
388
388
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
389
389
|
#{word_body(%{
|
390
390
|
<div><m:oMath>
|
391
|
-
<span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>−</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>
|
391
|
+
<span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>−</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>BB</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝔹𝔹𝔹</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>𝒞𝒞</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>𝓑𝓒𝓒</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="monospace"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝚃𝚃</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝔉ℜ</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>𝕭𝕱𝕽</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝖲𝖥</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>𝗕𝗦𝗙𝝰</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr></m:rPr><m:t>𝖲𝖥𝖨</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="bi"></m:sty></m:rPr><m:t>𝙎𝙁𝘽𝙄𝞪</m:t></m:r><span class="nostem" style="font-weight:bold;"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="bi"></m:sty></m:rPr><m:t>BII</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>II</m:t></m:r></span>
|
392
392
|
</m:oMath>
|
393
393
|
</div>}, '<div style="mso-element:footnote-list"/>')}
|
394
394
|
#{WORD_FTR1}
|
@@ -424,6 +424,21 @@ RSpec.describe Html2Doc do
|
|
424
424
|
OUTPUT
|
425
425
|
end
|
426
426
|
|
427
|
+
it "unwraps accent in MathML" do
|
428
|
+
Html2Doc.process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
|
429
|
+
<mover accent='true'><mrow><mi>p</mi></mrow><mrow><mo>^</mo></mrow></mover>
|
430
|
+
</math></div>"), filename: "test", asciimathdelims: ["{{", "}}"])
|
431
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
432
|
+
to match_fuzzy(<<~OUTPUT)
|
433
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
434
|
+
#{word_body('<div><m:oMath>
|
435
|
+
<m:acc><m:accPr><m:chr m:val="^"></m:chr></m:accPr><m:e><m:r><m:t>p</m:t></m:r></m:e></m:acc>
|
436
|
+
</m:oMath>
|
437
|
+
</div>', '<div style="mso-element:footnote-list"/>')}
|
438
|
+
#{WORD_FTR1}
|
439
|
+
OUTPUT
|
440
|
+
end
|
441
|
+
|
427
442
|
it "left-aligns AsciiMath" do
|
428
443
|
Html2Doc.process(html_input("<div style='text-align:left;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"), filename: "test", asciimathdelims: ["{{", "}}"])
|
429
444
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 2.0.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: plane1converter
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.0.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.0.1
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: byebug
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|