html2doc 1.0.5 → 1.0.6
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/html2doc.gemspec +1 -0
- data/lib/html2doc/math.rb +55 -6
- data/lib/html2doc/mml2omml.xsl +9 -1
- data/lib/html2doc/version.rb +1 -1
- data/spec/html2doc_spec.rb +17 -2
- metadata +16 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: ed67d44964092ea651bf40f1e7d4843d9ed84b9e2e3c9a8fe51c77e6ed1b4e8c
|
4
|
+
data.tar.gz: 2bc1662270ac499f54710568ae9ed31f78113556278a0f34510d30683a08b48a
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5503cb2408a86644d3481b82002fba01f333a841ff0de0113cdd15de99984fd6aa458ba8af82e521d3fdce79d6f27aae69b9a760be66e85b8de874681710c36b
|
7
|
+
data.tar.gz: b48073035e3000fa1cfc9b3eabf1a5d3e5e2f79dd61aa20881b73cb186d75a69c01e8c7951016bf7a997ab6b7e5911ad2e69b845ecc0783b15aeaa54c6f4f55a
|
data/html2doc.gemspec
CHANGED
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
|
|
32
32
|
spec.add_dependency "thread_safe"
|
33
33
|
spec.add_dependency "uuidtools"
|
34
34
|
spec.add_dependency "asciimath", "~> 2.0.0"
|
35
|
+
spec.add_dependency "plane1converter", "~> 0.0.1"
|
35
36
|
|
36
37
|
spec.add_development_dependency "byebug", "~> 9.1"
|
37
38
|
spec.add_development_dependency "equivalent-xml", "~> 0.6"
|
data/lib/html2doc/math.rb
CHANGED
@@ -2,6 +2,7 @@ require "uuidtools"
|
|
2
2
|
require "asciimath"
|
3
3
|
require "htmlentities"
|
4
4
|
require "nokogiri"
|
5
|
+
require "plane1converter"
|
5
6
|
|
6
7
|
module Html2Doc
|
7
8
|
@xsltemplate =
|
@@ -25,9 +26,19 @@ module Html2Doc
|
|
25
26
|
end.join
|
26
27
|
end
|
27
28
|
|
29
|
+
def self.unwrap_accents(doc)
|
30
|
+
doc.xpath("//*[@accent = 'true']").each do |x|
|
31
|
+
x.elements.length > 1 or next
|
32
|
+
x.elements[1].name == "mrow" and
|
33
|
+
x.elements[1].replace(x.elements[1].children)
|
34
|
+
end
|
35
|
+
doc
|
36
|
+
end
|
37
|
+
|
28
38
|
# random fixes to MathML input that OOXML needs to render properly
|
29
39
|
def self.ooxml_cleanup(m, docnamespaces)
|
30
|
-
m = mathml_preserve_space(
|
40
|
+
m = unwrap_accents(mathml_preserve_space(
|
41
|
+
mathml_insert_rows(m, docnamespaces), docnamespaces))
|
31
42
|
m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
|
32
43
|
m
|
33
44
|
end
|
@@ -49,21 +60,59 @@ module Html2Doc
|
|
49
60
|
end
|
50
61
|
|
51
62
|
def self.unitalic(m)
|
52
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'p']]").each do |x|
|
63
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
|
53
64
|
x.wrap("<span style='font-style:normal;'></span>")
|
54
65
|
end
|
55
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'bi']]").each do |x|
|
56
|
-
x.wrap("<span style='font-
|
66
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
|
67
|
+
x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
|
57
68
|
end
|
58
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'i']]").each do |x|
|
69
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
|
59
70
|
x.wrap("<span class='nostem'><em></em></span>")
|
60
71
|
end
|
61
|
-
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'b']]").each do |x|
|
72
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
|
62
73
|
x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
|
63
74
|
end
|
75
|
+
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
|
76
|
+
toPlane1(x, :monospace)
|
77
|
+
end
|
78
|
+
m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
|
79
|
+
toPlane1(x, :doublestruck)
|
80
|
+
end
|
81
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
|
82
|
+
toPlane1(x, :script)
|
83
|
+
end
|
84
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
|
85
|
+
toPlane1(x, :scriptbold)
|
86
|
+
end
|
87
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
88
|
+
toPlane1(x, :fraktur)
|
89
|
+
end
|
90
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
|
91
|
+
toPlane1(x, :frakturbold)
|
92
|
+
end
|
93
|
+
m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
94
|
+
toPlane1(x, :sans)
|
95
|
+
end
|
96
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
97
|
+
toPlane1(x, :sansbold)
|
98
|
+
end
|
99
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
100
|
+
toPlane1(x, :sansitalic)
|
101
|
+
end
|
102
|
+
m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
|
103
|
+
toPlane1(x, :sansbolditalic)
|
104
|
+
end
|
64
105
|
m
|
65
106
|
end
|
66
107
|
|
108
|
+
def self.toPlane1(x, font)
|
109
|
+
x.traverse do |n|
|
110
|
+
next unless n.text?
|
111
|
+
n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
|
112
|
+
end
|
113
|
+
x
|
114
|
+
end
|
115
|
+
|
67
116
|
def self.mathml_to_ooml(docxml)
|
68
117
|
docnamespaces = docxml.collect_namespaces
|
69
118
|
m = docxml.xpath("//*[local-name() = 'math']")
|
data/lib/html2doc/mml2omml.xsl
CHANGED
@@ -1087,8 +1087,16 @@
|
|
1087
1087
|
<xsl:attribute name="m:val">bi</xsl:attribute>
|
1088
1088
|
</sty>
|
1089
1089
|
</xsl:when>
|
1090
|
-
<xsl:when test="$font='monospace'"
|
1090
|
+
<xsl:when test="$font='monospace'">
|
1091
1091
|
<!-- We can't do monospace, so leave empty -->
|
1092
|
+
<!-- NN 2020 https://github.com/metanorma/html2doc/issues/47 no, we will -->
|
1093
|
+
<scr>
|
1094
|
+
<xsl:attribute name="m:val">monospace</xsl:attribute>
|
1095
|
+
</scr>
|
1096
|
+
<sty>
|
1097
|
+
<xsl:attribute name="m:val">p</xsl:attribute>
|
1098
|
+
</sty>
|
1099
|
+
</xsl:when>
|
1092
1100
|
<xsl:when test="$font='bold'">
|
1093
1101
|
<sty>
|
1094
1102
|
<xsl:attribute name="m:val">b</xsl:attribute>
|
data/lib/html2doc/version.rb
CHANGED
data/spec/html2doc_spec.rb
CHANGED
@@ -382,13 +382,13 @@ RSpec.describe Html2Doc do
|
|
382
382
|
end
|
383
383
|
|
384
384
|
it "processes mstyle" do
|
385
|
-
Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "
|
385
|
+
Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
|
386
386
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
387
387
|
to match_fuzzy(<<~OUTPUT)
|
388
388
|
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
389
389
|
#{word_body(%{
|
390
390
|
<div><m:oMath>
|
391
|
-
<span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>−</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>
|
391
|
+
<span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>−</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>BB</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝔹𝔹𝔹</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>𝒞𝒞</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>𝓑𝓒𝓒</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="monospace"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝚃𝚃</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝔉ℜ</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>𝕭𝕱𝕽</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>𝖲𝖥</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>𝗕𝗦𝗙𝝰</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr></m:rPr><m:t>𝖲𝖥𝖨</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="bi"></m:sty></m:rPr><m:t>𝙎𝙁𝘽𝙄𝞪</m:t></m:r><span class="nostem" style="font-weight:bold;"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="bi"></m:sty></m:rPr><m:t>BII</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>II</m:t></m:r></span>
|
392
392
|
</m:oMath>
|
393
393
|
</div>}, '<div style="mso-element:footnote-list"/>')}
|
394
394
|
#{WORD_FTR1}
|
@@ -424,6 +424,21 @@ RSpec.describe Html2Doc do
|
|
424
424
|
OUTPUT
|
425
425
|
end
|
426
426
|
|
427
|
+
it "unwraps accent in MathML" do
|
428
|
+
Html2Doc.process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
|
429
|
+
<mover accent='true'><mrow><mi>p</mi></mrow><mrow><mo>^</mo></mrow></mover>
|
430
|
+
</math></div>"), filename: "test", asciimathdelims: ["{{", "}}"])
|
431
|
+
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
432
|
+
to match_fuzzy(<<~OUTPUT)
|
433
|
+
#{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
|
434
|
+
#{word_body('<div><m:oMath>
|
435
|
+
<m:acc><m:accPr><m:chr m:val="^"></m:chr></m:accPr><m:e><m:r><m:t>p</m:t></m:r></m:e></m:acc>
|
436
|
+
</m:oMath>
|
437
|
+
</div>', '<div style="mso-element:footnote-list"/>')}
|
438
|
+
#{WORD_FTR1}
|
439
|
+
OUTPUT
|
440
|
+
end
|
441
|
+
|
427
442
|
it "left-aligns AsciiMath" do
|
428
443
|
Html2Doc.process(html_input("<div style='text-align:left;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"), filename: "test", asciimathdelims: ["{{", "}}"])
|
429
444
|
expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.0.
|
4
|
+
version: 1.0.6
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2020-09-
|
11
|
+
date: 2020-09-26 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|
@@ -108,6 +108,20 @@ dependencies:
|
|
108
108
|
- - "~>"
|
109
109
|
- !ruby/object:Gem::Version
|
110
110
|
version: 2.0.0
|
111
|
+
- !ruby/object:Gem::Dependency
|
112
|
+
name: plane1converter
|
113
|
+
requirement: !ruby/object:Gem::Requirement
|
114
|
+
requirements:
|
115
|
+
- - "~>"
|
116
|
+
- !ruby/object:Gem::Version
|
117
|
+
version: 0.0.1
|
118
|
+
type: :runtime
|
119
|
+
prerelease: false
|
120
|
+
version_requirements: !ruby/object:Gem::Requirement
|
121
|
+
requirements:
|
122
|
+
- - "~>"
|
123
|
+
- !ruby/object:Gem::Version
|
124
|
+
version: 0.0.1
|
111
125
|
- !ruby/object:Gem::Dependency
|
112
126
|
name: byebug
|
113
127
|
requirement: !ruby/object:Gem::Requirement
|