html2doc 1.5.5 → 1.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46856bf56ad5dd95f8f5781dc11049bb4600060c28c49715a262837ece8028bf
4
- data.tar.gz: 866ba19867f233b45aeee436df719679623d671902b30476b61952f7a6357e1f
3
+ metadata.gz: 6b08c4ee810280f83835a9884c17d54a04837195c1d133bedd48f8a103780316
4
+ data.tar.gz: f47560825288a3297700d048c3fff8c90ca61ba6bcb2b2e8fef3ac9749d9e094
5
5
  SHA512:
6
- metadata.gz: b949f47c356437ce418f65ce7fd1c497648d0d0e960fe1e05d7318d280ddf6de23ddad8e5ab94a18f447b3eaba948b2a4db69ca2d00f0dcfebd692933a64c1da
7
- data.tar.gz: 953999bd39aa1c1b6a0e1a34c939dcbdba01242c898f5a587eab546b4c9a4578e8051c9c68b49900390fcb3ab5d2de4ad6f0f1bce1af5a8ee6e7bd4daf300966
6
+ metadata.gz: 5a94da368aa84ae4abcbcf6c3bc401b349811b32b1d8097b82e96b479cd684d545ac66fc9fb6f41367db85bfeb86590a15bbac7af0e989c41a0e1af0e2c79966
7
+ data.tar.gz: 4dd0add251285b7c23a82b3b5d709d5421542b96ef0025357672616003d5e31ed35c4c064deb652925666a4a6e33962d6e73d72b137c8cea3592e5d4bb131574
data/lib/html2doc/base.rb CHANGED
@@ -53,7 +53,7 @@ class Html2Doc
53
53
  end
54
54
 
55
55
  def process_html(result)
56
- docxml = to_xhtml(asciimath_to_mathml(result, @asciimathdelims))
56
+ docxml = to_xhtml(result)
57
57
  define_head(cleanup(docxml))
58
58
  msword_fix(from_xhtml(docxml))
59
59
  end
data/lib/html2doc/math.rb CHANGED
@@ -5,29 +5,6 @@ require "nokogiri"
5
5
  require "plane1converter"
6
6
 
7
7
  class Html2Doc
8
- def asciimath_to_mathml1(expr, retain_asciimath)
9
- ret = Plurimath::Math.parse(HTMLEntities.new.decode(expr), "asciimath").to_mathml
10
- .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>").strip
11
- retain_asciimath and
12
- ret += "<asciimath>#{@c.encode(@c.decode(expr), :basic)}</asciimath>"
13
- ret
14
- rescue StandardError => e
15
- puts "parsing: #{expr}"
16
- puts e.message
17
- raise e
18
- end
19
-
20
- def asciimath_to_mathml(doc, delims, retain_asciimath: false)
21
- return doc if delims.nil? || delims.size < 2
22
-
23
- m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
24
- m.each_slice(4).map.with_index do |(*a), i|
25
- progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
26
- a[2].nil? or a[2] = asciimath_to_mathml1(a[2], retain_asciimath)
27
- a.size > 1 ? a[0] + a[2] : a[0]
28
- end.join
29
- end
30
-
31
8
  def progress_conv(idx, step, total, threshold, msg)
32
9
  return unless (idx % step).zero? && total > threshold && idx.positive?
33
10
 
@@ -43,6 +20,8 @@ class Html2Doc
43
20
  doc
44
21
  end
45
22
 
23
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
24
+
46
25
  # random fixes to MathML input that OOXML needs to render properly
47
26
  def ooxml_cleanup(math, docnamespaces)
48
27
  math = unwrap_accents(
@@ -50,7 +29,7 @@ class Html2Doc
50
29
  mathml_insert_rows(math, docnamespaces), docnamespaces
51
30
  ),
52
31
  )
53
- math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
32
+ math.add_namespace(nil, MATHML_NS)
54
33
  math
55
34
  end
56
35
 
@@ -151,8 +130,8 @@ class Html2Doc
151
130
  def mathml_to_ooml1(xml, docnamespaces)
152
131
  doc = Nokogiri::XML::Document::new
153
132
  doc.root = ooxml_cleanup(xml, docnamespaces)
154
- ooxml = ooml_clean(unitalic(esc_space(accent_tr(@xsltemplate.transform(doc)))))
155
- ooxml = uncenter(xml, ooxml)
133
+ ooxml = unitalic(esc_space(accent_tr(@xsltemplate.transform(doc))))
134
+ ooxml = ooml_clean(uncenter(xml, ooxml))
156
135
  xml.swap(ooxml)
157
136
  end
158
137
 
@@ -184,19 +163,33 @@ class Html2Doc
184
163
  xml
185
164
  end
186
165
 
166
+ OOXML_NS = "http://schemas.microsoft.com/office/2004/12/omml".freeze
167
+
168
+ def math_only_para?(node)
169
+ x = node.dup
170
+ x.xpath(".//m:math", "m" => MATHML_NS).each(&:remove)
171
+ x.xpath(".//m:oMathPara | .//m:oMath", "m" => OOXML_NS).each(&:remove)
172
+ x.text.strip.empty?
173
+ end
174
+
175
+ def math_block?(ooxml, mathml)
176
+ ooxml.name == "oMathPara" || mathml["displaystyle"] == "true"
177
+ end
178
+
179
+ STYLE_BEARING_NODE =
180
+ %w(p div td th li).map { |x| ".//ancestor::#{x}" }.join(" | ").freeze
181
+
187
182
  # if oomml has no siblings, by default it is centered; override this with
188
183
  # left/right if parent is so tagged
184
+ # also if ooml has mathPara already, or is in para with only oMath content
189
185
  def uncenter(math, ooxml)
190
- alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
191
- "local-name() = 'div' or local-name() = 'td']/@style")
192
- return ooxml unless alignnode && (math.next == nil && math.previous == nil)
193
-
194
- %w(left right).each do |dir|
195
- if alignnode.text.include? ("text-align:#{dir}")
196
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
197
- "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
198
- end
199
- end
200
- ooxml
186
+ alignnode = math.xpath(STYLE_BEARING_NODE).last
187
+ ret = ooxml.root.to_xml(indent: 0)
188
+ (math_block?(ooxml, math) ||
189
+ !alignnode) || !math_only_para?(alignnode) and return ret
190
+ dir = "left"
191
+ alignnode["style"]&.include?("text-align:right") and dir = "right"
192
+ "<oMathPara><oMathParaPr><jc " \
193
+ "m:val='#{dir}'/></oMathParaPr>#{ret}</oMathPara>"
201
194
  end
202
195
  end
@@ -1,3 +1,3 @@
1
1
  class Html2Doc
2
- VERSION = "1.5.5".freeze
2
+ VERSION = "1.6.1".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.5
4
+ version: 1.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-29 00:00:00.000000000 Z
11
+ date: 2023-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities