html2doc 1.0.5 → 1.0.6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 6616c37575b4bd09b8b7bf7a89beffece7388f3e2a4039610825a030ffb72318
4
- data.tar.gz: f29a3348e0b9260c4178af9a7f939e8ec39c2401cdc321346e5a6321e862cbad
3
+ metadata.gz: ed67d44964092ea651bf40f1e7d4843d9ed84b9e2e3c9a8fe51c77e6ed1b4e8c
4
+ data.tar.gz: 2bc1662270ac499f54710568ae9ed31f78113556278a0f34510d30683a08b48a
5
5
  SHA512:
6
- metadata.gz: 4dee0b6541178293833caf2d55e1ec382ea16692b4efe4455ff0fd627e89ef5e67be747248cefd6c56ff151d8f9ea46f54633241d90996055e3574604eed175e
7
- data.tar.gz: da5201fa1568f34e0638947d6dcfb5df1b7cf4303de1f74323122fcf92f6060cc7d5ddcc483cbfbfcb70798d12afc95c89b48d0274a34114fd8b18d7c1d58692
6
+ metadata.gz: 5503cb2408a86644d3481b82002fba01f333a841ff0de0113cdd15de99984fd6aa458ba8af82e521d3fdce79d6f27aae69b9a760be66e85b8de874681710c36b
7
+ data.tar.gz: b48073035e3000fa1cfc9b3eabf1a5d3e5e2f79dd61aa20881b73cb186d75a69c01e8c7951016bf7a997ab6b7e5911ad2e69b845ecc0783b15aeaa54c6f4f55a
@@ -32,6 +32,7 @@ Gem::Specification.new do |spec|
32
32
  spec.add_dependency "thread_safe"
33
33
  spec.add_dependency "uuidtools"
34
34
  spec.add_dependency "asciimath", "~> 2.0.0"
35
+ spec.add_dependency "plane1converter", "~> 0.0.1"
35
36
 
36
37
  spec.add_development_dependency "byebug", "~> 9.1"
37
38
  spec.add_development_dependency "equivalent-xml", "~> 0.6"
@@ -2,6 +2,7 @@ require "uuidtools"
2
2
  require "asciimath"
3
3
  require "htmlentities"
4
4
  require "nokogiri"
5
+ require "plane1converter"
5
6
 
6
7
  module Html2Doc
7
8
  @xsltemplate =
@@ -25,9 +26,19 @@ module Html2Doc
25
26
  end.join
26
27
  end
27
28
 
29
+ def self.unwrap_accents(doc)
30
+ doc.xpath("//*[@accent = 'true']").each do |x|
31
+ x.elements.length > 1 or next
32
+ x.elements[1].name == "mrow" and
33
+ x.elements[1].replace(x.elements[1].children)
34
+ end
35
+ doc
36
+ end
37
+
28
38
  # random fixes to MathML input that OOXML needs to render properly
29
39
  def self.ooxml_cleanup(m, docnamespaces)
30
- m = mathml_preserve_space(mathml_insert_rows(m, docnamespaces), docnamespaces)
40
+ m = unwrap_accents(mathml_preserve_space(
41
+ mathml_insert_rows(m, docnamespaces), docnamespaces))
31
42
  m.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
32
43
  m
33
44
  end
@@ -49,21 +60,59 @@ module Html2Doc
49
60
  end
50
61
 
51
62
  def self.unitalic(m)
52
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'p']]").each do |x|
63
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'p']]").each do |x|
53
64
  x.wrap("<span style='font-style:normal;'></span>")
54
65
  end
55
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'bi']]").each do |x|
56
- x.wrap("<span style='font-style:italic;font-weight:bold;'></span>")
66
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'bi']]").each do |x|
67
+ x.wrap("<span class='nostem' style='font-weight:bold;'><em></em></span>")
57
68
  end
58
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'i']]").each do |x|
69
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'i']]").each do |x|
59
70
  x.wrap("<span class='nostem'><em></em></span>")
60
71
  end
61
- m.xpath(".//xmlns:r[xmlns:rPr/xmlns:sty[@m:val = 'b']]").each do |x|
72
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:scr)]/xmlns:sty[@m:val = 'b']]").each do |x|
62
73
  x.wrap("<span style='font-style:normal;font-weight:bold;'></span>")
63
74
  end
75
+ m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'monospace']]").each do |x|
76
+ toPlane1(x, :monospace)
77
+ end
78
+ m.xpath(".//xmlns:r[xmlns:rPr/xmlns:scr[@m:val = 'double-struck']]").each do |x|
79
+ toPlane1(x, :doublestruck)
80
+ end
81
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'script']]").each do |x|
82
+ toPlane1(x, :script)
83
+ end
84
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'script']]").each do |x|
85
+ toPlane1(x, :scriptbold)
86
+ end
87
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
88
+ toPlane1(x, :fraktur)
89
+ end
90
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'fraktur']]").each do |x|
91
+ toPlane1(x, :frakturbold)
92
+ end
93
+ m.xpath(".//xmlns:r[xmlns:rPr[not(xmlns:sty) or xmlns:sty/@m:val = 'p']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
94
+ toPlane1(x, :sans)
95
+ end
96
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'b']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
97
+ toPlane1(x, :sansbold)
98
+ end
99
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'i']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
100
+ toPlane1(x, :sansitalic)
101
+ end
102
+ m.xpath(".//xmlns:r[xmlns:rPr[xmlns:sty/@m:val = 'bi']/xmlns:scr[@m:val = 'sans-serif']]").each do |x|
103
+ toPlane1(x, :sansbolditalic)
104
+ end
64
105
  m
65
106
  end
66
107
 
108
+ def self.toPlane1(x, font)
109
+ x.traverse do |n|
110
+ next unless n.text?
111
+ n.replace(Plane1Converter.conv(HTMLEntities.new.decode(n.text), font))
112
+ end
113
+ x
114
+ end
115
+
67
116
  def self.mathml_to_ooml(docxml)
68
117
  docnamespaces = docxml.collect_namespaces
69
118
  m = docxml.xpath("//*[local-name() = 'math']")
@@ -1087,8 +1087,16 @@
1087
1087
  <xsl:attribute name="m:val">bi</xsl:attribute>
1088
1088
  </sty>
1089
1089
  </xsl:when>
1090
- <xsl:when test="$font='monospace'"/>
1090
+ <xsl:when test="$font='monospace'">
1091
1091
  <!-- We can't do monospace, so leave empty -->
1092
+ <!-- NN 2020 https://github.com/metanorma/html2doc/issues/47 no, we will -->
1093
+ <scr>
1094
+ <xsl:attribute name="m:val">monospace</xsl:attribute>
1095
+ </scr>
1096
+ <sty>
1097
+ <xsl:attribute name="m:val">p</xsl:attribute>
1098
+ </sty>
1099
+ </xsl:when>
1092
1100
  <xsl:when test="$font='bold'">
1093
1101
  <sty>
1094
1102
  <xsl:attribute name="m:val">b</xsl:attribute>
@@ -1,3 +1,3 @@
1
1
  module Html2Doc
2
- VERSION = "1.0.5".freeze
2
+ VERSION = "1.0.6".freeze
3
3
  end
@@ -382,13 +382,13 @@ RSpec.describe Html2Doc do
382
382
  end
383
383
 
384
384
  it "processes mstyle" do
385
- Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "AA" bbb "AA" cc "AA" tt "AA" fr "AA" sf "AA" ii "AA"}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
385
+ Html2Doc.process(html_input(%[<div>{{bb (-log_2 (p_u)) bb "BB" bbb "BBB" cc "CC" bcc "BCC" tt "TT" fr "FR" bfr "BFR" sf "SF" bsf "BSFα" sfi "SFI" sfbi "SFBIα" bii "BII" ii "II"}}</div>]), filename: "test", asciimathdelims: ["{{", "}}"])
386
386
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
387
387
  to match_fuzzy(<<~OUTPUT)
388
388
  #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
389
389
  #{word_body(%{
390
390
  <div><m:oMath>
391
- <span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x2212;</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><span style="font-style:normal;"><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>AA</m:t></m:r><m:r><m:rPr><m:nor></m:nor></m:rPr><m:t>AA</m:t></m:r><span style="font-style:normal;"><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><span style="font-style:normal;"><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>AA</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>AA</m:t></m:r></span>
391
+ <span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x2212;</m:t></m:r></span><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>log</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>2</m:t></m:r></span></m:sub></m:sSub><m:d><m:dPr><m:sepChr m:val=","></m:sepChr></m:dPr><m:e><m:sSub><m:e><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>p</m:t></m:r></span></m:e><m:sub><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:sty m:val="b"></m:sty></m:rPr><m:t>u</m:t></m:r></span></m:sub></m:sSub></m:e></m:d><span style="font-style:normal;font-weight:bold;"><m:r><m:rPr><m:nor></m:nor><m:sty m:val="b"></m:sty></m:rPr><m:t>BB</m:t></m:r></span><m:r><m:rPr><m:nor></m:nor><m:scr m:val="double-struck"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D539;&#x1D539;&#x1D539;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr></m:rPr><m:t>&#x1D49E;&#x1D49E;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="script"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D4D1;&#x1D4D2;&#x1D4D2;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="monospace"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D683;&#x1D683;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D509;&#x211C;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="fraktur"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D56D;&#x1D571;&#x1D57D;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="p"></m:sty></m:rPr><m:t>&#x1D5B2;&#x1D5A5;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="b"></m:sty></m:rPr><m:t>&#x1D5D5;&#x1D5E6;&#x1D5D9;&#x1D770;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr></m:rPr><m:t>&#x1D5B2;&#x1D5A5;&#x1D5A8;</m:t></m:r><m:r><m:rPr><m:nor></m:nor><m:scr m:val="sans-serif"></m:scr><m:sty m:val="bi"></m:sty></m:rPr><m:t>&#x1D64E;&#x1D641;&#x1D63D;&#x1D644;&#x1D7AA;</m:t></m:r><span class="nostem" style="font-weight:bold;"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="bi"></m:sty></m:rPr><m:t>BII</m:t></m:r></span><span class="nostem"><em></em><m:r><m:rPr><m:nor></m:nor><m:sty m:val="i"></m:sty></m:rPr><m:t>II</m:t></m:r></span>
392
392
  </m:oMath>
393
393
  </div>}, '<div style="mso-element:footnote-list"/>')}
394
394
  #{WORD_FTR1}
@@ -424,6 +424,21 @@ RSpec.describe Html2Doc do
424
424
  OUTPUT
425
425
  end
426
426
 
427
+ it "unwraps accent in MathML" do
428
+ Html2Doc.process(html_input("<div><math xmlns='http://www.w3.org/1998/Math/MathML'>
429
+ <mover accent='true'><mrow><mi>p</mi></mrow><mrow><mo>^</mo></mrow></mover>
430
+ </math></div>"), filename: "test", asciimathdelims: ["{{", "}}"])
431
+ expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
432
+ to match_fuzzy(<<~OUTPUT)
433
+ #{WORD_HDR} #{DEFAULT_STYLESHEET} #{WORD_HDR_END}
434
+ #{word_body('<div><m:oMath>
435
+ <m:acc><m:accPr><m:chr m:val="^"></m:chr></m:accPr><m:e><m:r><m:t>p</m:t></m:r></m:e></m:acc>
436
+ </m:oMath>
437
+ </div>', '<div style="mso-element:footnote-list"/>')}
438
+ #{WORD_FTR1}
439
+ OUTPUT
440
+ end
441
+
427
442
  it "left-aligns AsciiMath" do
428
443
  Html2Doc.process(html_input("<div style='text-align:left;'>{{sum_(i=1)^n i^3=((n(n+1))/2)^2}}</div>"), filename: "test", asciimathdelims: ["{{", "}}"])
429
444
  expect(guid_clean(File.read("test.doc", encoding: "utf-8"))).
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.0.5
4
+ version: 1.0.6
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2020-09-11 00:00:00.000000000 Z
11
+ date: 2020-09-26 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities
@@ -108,6 +108,20 @@ dependencies:
108
108
  - - "~>"
109
109
  - !ruby/object:Gem::Version
110
110
  version: 2.0.0
111
+ - !ruby/object:Gem::Dependency
112
+ name: plane1converter
113
+ requirement: !ruby/object:Gem::Requirement
114
+ requirements:
115
+ - - "~>"
116
+ - !ruby/object:Gem::Version
117
+ version: 0.0.1
118
+ type: :runtime
119
+ prerelease: false
120
+ version_requirements: !ruby/object:Gem::Requirement
121
+ requirements:
122
+ - - "~>"
123
+ - !ruby/object:Gem::Version
124
+ version: 0.0.1
111
125
  - !ruby/object:Gem::Dependency
112
126
  name: byebug
113
127
  requirement: !ruby/object:Gem::Requirement