html2doc 1.0.5 → 1.0.6

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6616c37575b4bd09b8b7bf7a89beffece7388f3e2a4039610825a030ffb72318
4
- data.tar.gz: f29a3348e0b9260c4178af9a7f939e8ec39c2401cdc321346e5a6321e862cbad
3
+ metadata.gz: ed67d44964092ea651bf40f1e7d4843d9ed84b9e2e3c9a8fe51c77e6ed1b4e8c
4
+ data.tar.gz: 2bc1662270ac499f54710568ae9ed31f78113556278a0f34510d30683a08b48a
5
5
  SHA512:
6
- metadata.gz: 4dee0b6541178293833caf2d55e1ec382ea16692b4efe4455ff0fd627e89ef5e67be747248cefd6c56ff151d8f9ea46f54633241d90996055e3574604eed175e
7
- data.tar.gz: da5201fa1568f34e0638947d6dcfb5df1b7cf4303de1f74323122fcf92f6060cc7d5ddcc483cbfbfcb70798d12afc95c89b48d0274a34114fd8b18d7c1d58692
6
+ metadata.gz: 5503cb2408a86644d3481b82002fba01f333a841ff0de0113cdd15de99984fd6aa458ba8af82e521d3fdce79d6f27aae69b9a760be66e85b8de874681710c36b
7
+ data.tar.gz: b48073035e3000fa1cfc9b3eabf1a5d3e5e2f79dd61aa20881b73cb186d75a69c01e8c7951016bf7a997ab6b7e5911ad2e69b845ecc0783b15aeaa54c6f4f55a
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency "thread_safe"
33
33
  spec.add_dependency "uuidtools"
34
34
  spec.add_dependency "asciimath", "~> 2.0.0"
35
+ spec.add_dependency "plane1converter", "~> 0.0.1"
35
36
 
36
37
  spec.add_development_dependency "byebug", "~> 9.1"
37
38
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
@@ -2,6 +2,7 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
+ require "plane1converter"
5
6
 
6
7
  module Html2Doc
7
8
  @xsltemplate =
@@ -25,9 +26,19 @@ module Html2Doc
25
26
  end.join
26
27
  end
27
28
 
29
+ def self.unwrap_accents(doc)
30
+ doc.xpath("//*[@accent = 'true']").each do |x|
31
+ x.elements.length > 1 or next
32
+ x.elements[1].name == "mrow" and
33
+ x.elements[1].replace(x.elements[1].children)
34
+ end
35
+ doc
36
+ end
37
+
28
38
  # random fixes to MathML input that OOXML needs to render properly
29
39
  def self.ooxml_cleanup(m, docnamespaces)
30
- m = mathml_preserve_space(mathml_insert_rows(m, docnamespaces), docnamespaces)
40
+ m = unwrap_accents(mathml_preserve_space(
41
+ mathml_insert_rows(m, docnamespaces), docnamespaces))
31
42
  m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
32
43
  m
33
44
  end
@@ -49,21 +60,59 @@ module Html2Doc
49
60
  end
50
61
 
51
62
  def self.unitalic(m)
52
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'p']]").each do |x|
63
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
53
64
  x.wrap("<span style='font-style:normal;'></span>")
54
65
  end
55
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'bi']]").each do |x|
56
- x.wrap("<span style='font-style:italic;font-weight:bold;'></span>")
66
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
67
+ x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
57
68
  end
58
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'i']]").each do |x|
69
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
59
70
  x.wrap("<span class='nostem'><em></em></span>")
60
71
  end
61
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'b']]").each do |x|
72
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
62
73
  x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
63
74
  end
75
+ m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
76
+ toPlane1(x, :monospace)
77
+ end
78
+ m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
79
+ toPlane1(x, :doublestruck)
80
+ end
81
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
82
+ toPlane1(x, :script)
83
+ end
84
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
85
+ toPlane1(x, :scriptbold)
86
+ end
87
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
88
+ toPlane1(x, :fraktur)
89
+ end
90
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
91
+ toPlane1(x, :frakturbold)
92
+ end
93
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
94
+ toPlane1(x, :sans)
95
+ end
96
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
97
+ toPlane1(x, :sansbold)
98
+ end
99
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
+ toPlane1(x, :sansitalic)
101
+ end
102
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
+ toPlane1(x, :sansbolditalic)
104
+ end
64
105
  m
65
106
  end
66
107
 
108
+ def self.toPlane1(x, font)
109
+ x.traverse do |n|
110
+ next unless n.text?
111
+ n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
112
+ end
113
+ x
114
+ end
115
+
67
116
  def self.mathml_to_ooml(docxml)
68
117
  docnamespaces = docxml.collect_namespaces
69
118
  m = docxml.xpath("//*[local-name() = 'math']")
@@ -1087,8 +1087,16 @@
1087
1087
  <xsl:attribute name="m:val">bi</xsl:attribute>
1088
1088
  </sty>
1089
1089
  </xsl:when>
1090
- <xsl:when test="$font='monospace'"/>
1090
+ <xsl:when test="$font='monospace'">
1091
1091
  <!-- We can't do monospace, so leave empty -->
1092
+ <!-- NN 2020 https://github.com/metanorma/html2doc/issues/47 no, we will -->
1093
+ <scr>
1094
+ <xsl:attribute name="m:val">monospace</xsl:attribute>
1095
+ </scr>
1096
+ <sty>
1097
+ <xsl:attribute name="m:val">p</xsl:attribute>
1098
+ </sty>
1099
+ </xsl:when>
1092
1100
  <xsl:when test="$font='bold'">
1093
1101
  <sty>
1094
1102
  <xsl:attribute name="m:val">b</xsl:attribute>
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "1.0.5".freeze
2
+ VERSION = "1.0.6".freeze
3
3
  end
@@ -382,13 +382,13 @@ RSpec.describe Html2Doc do
382
382
  end
383
383
 
384
384
  it "processes mstyle" do
385
- Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "AA" bbb "AA" cc "AA" tt "AA" fr "AA" sf "AA" ii "AA"}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
385
+ Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
386
386
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
387
387
  to match_fuzzy(<<~OUTPUT)
388
388
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
389
389
  #{word_body(%{
390
390
  <div><m:oMath>
391
- <span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x2212;</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><span style="font-style:normal;"><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>AA</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>AA</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><span style="font-style:normal;"><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>AA</m:t></m:r></span>
391
+ <span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x2212;</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>BB</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D539;&#x1D539;&#x1D539;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>&#x1D49E;&#x1D49E;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D4D1;&#x1D4D2;&#x1D4D2;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="monospace"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D683;&#x1D683;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D509;&#x211C;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D56D;&#x1D571;&#x1D57D;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D5B2;&#x1D5A5;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D5D5;&#x1D5E6;&#x1D5D9;&#x1D770;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr></m:rPr><m:t>&#x1D5B2;&#x1D5A5;&#x1D5A8;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="bi"></m:sty></m:rPr><m:t>&#x1D64E;&#x1D641;&#x1D63D;&#x1D644;&#x1D7AA;</m:t></m:r><span class="nostem" style="font-weight:bold;"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="bi"></m:sty></m:rPr><m:t>BII</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>II</m:t></m:r></span>
392
392
  </m:oMath>
393
393
  </div>}, '<div style="mso-element:footnote-list"/>')}
394
394
  #{WORD_FTR1}
@@ -424,6 +424,21 @@ RSpec.describe Html2Doc do
424
424
  OUTPUT
425
425
  end
426
426
 
427
+ it "unwraps accent in MathML" do
428
+ Html2Doc.process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
429
+ <mover accent='true'><mrow><mi>p</mi></mrow><mrow><mo>^</mo></mrow></mover>
430
+ </math></div>"), filename: "test", asciimathdelims: ["{{", "}}"])
431
+ expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
432
+ to match_fuzzy(<<~OUTPUT)
433
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
434
+ #{word_body('<div><m:oMath>
435
+ <m:acc><m:accPr><m:chr m:val="^"></m:chr></m:accPr><m:e><m:r><m:t>p</m:t></m:r></m:e></m:acc>
436
+ </m:oMath>
437
+ </div>', '<div style="mso-element:footnote-list"/>')}
438
+ #{WORD_FTR1}
439
+ OUTPUT
440
+ end
441
+
427
442
  it "left-aligns AsciiMath" do
428
443
  Html2Doc.process(html_input("<div style='text-align:left;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"), filename: "test", asciimathdelims: ["{{", "}}"])
429
444
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-11 00:00:00.000000000 Z
11
+ date: 2020-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: 2.0.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: plane1converter
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.0.1
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.0.1
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: byebug
113
127
  requirement: !ruby/object:Gem::Requirement