html2doc 1.5.5 → 1.6.1

Sign up to get free protection for your applications and to get access to all the features.
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA256:
3
- metadata.gz: 46856bf56ad5dd95f8f5781dc11049bb4600060c28c49715a262837ece8028bf
4
- data.tar.gz: 866ba19867f233b45aeee436df719679623d671902b30476b61952f7a6357e1f
3
+ metadata.gz: 6b08c4ee810280f83835a9884c17d54a04837195c1d133bedd48f8a103780316
4
+ data.tar.gz: f47560825288a3297700d048c3fff8c90ca61ba6bcb2b2e8fef3ac9749d9e094
5
5
  SHA512:
6
- metadata.gz: b949f47c356437ce418f65ce7fd1c497648d0d0e960fe1e05d7318d280ddf6de23ddad8e5ab94a18f447b3eaba948b2a4db69ca2d00f0dcfebd692933a64c1da
7
- data.tar.gz: 953999bd39aa1c1b6a0e1a34c939dcbdba01242c898f5a587eab546b4c9a4578e8051c9c68b49900390fcb3ab5d2de4ad6f0f1bce1af5a8ee6e7bd4daf300966
6
+ metadata.gz: 5a94da368aa84ae4abcbcf6c3bc401b349811b32b1d8097b82e96b479cd684d545ac66fc9fb6f41367db85bfeb86590a15bbac7af0e989c41a0e1af0e2c79966
7
+ data.tar.gz: 4dd0add251285b7c23a82b3b5d709d5421542b96ef0025357672616003d5e31ed35c4c064deb652925666a4a6e33962d6e73d72b137c8cea3592e5d4bb131574
data/lib/html2doc/base.rb CHANGED
@@ -53,7 +53,7 @@ class Html2Doc
53
53
  end
54
54
 
55
55
  def process_html(result)
56
- docxml = to_xhtml(asciimath_to_mathml(result, @asciimathdelims))
56
+ docxml = to_xhtml(result)
57
57
  define_head(cleanup(docxml))
58
58
  msword_fix(from_xhtml(docxml))
59
59
  end
data/lib/html2doc/math.rb CHANGED
@@ -5,29 +5,6 @@ require "nokogiri"
5
5
  require "plane1converter"
6
6
 
7
7
  class Html2Doc
8
- def asciimath_to_mathml1(expr, retain_asciimath)
9
- ret = Plurimath::Math.parse(HTMLEntities.new.decode(expr), "asciimath").to_mathml
10
- .gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>").strip
11
- retain_asciimath and
12
- ret += "<asciimath>#{@c.encode(@c.decode(expr), :basic)}</asciimath>"
13
- ret
14
- rescue StandardError => e
15
- puts "parsing: #{expr}"
16
- puts e.message
17
- raise e
18
- end
19
-
20
- def asciimath_to_mathml(doc, delims, retain_asciimath: false)
21
- return doc if delims.nil? || delims.size < 2
22
-
23
- m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
24
- m.each_slice(4).map.with_index do |(*a), i|
25
- progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
26
- a[2].nil? or a[2] = asciimath_to_mathml1(a[2], retain_asciimath)
27
- a.size > 1 ? a[0] + a[2] : a[0]
28
- end.join
29
- end
30
-
31
8
  def progress_conv(idx, step, total, threshold, msg)
32
9
  return unless (idx % step).zero? && total > threshold && idx.positive?
33
10
 
@@ -43,6 +20,8 @@ class Html2Doc
43
20
  doc
44
21
  end
45
22
 
23
+ MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
24
+
46
25
  # random fixes to MathML input that OOXML needs to render properly
47
26
  def ooxml_cleanup(math, docnamespaces)
48
27
  math = unwrap_accents(
@@ -50,7 +29,7 @@ class Html2Doc
50
29
  mathml_insert_rows(math, docnamespaces), docnamespaces
51
30
  ),
52
31
  )
53
- math.add_namespace(nil, "http://www.w3.org/1998/Math/MathML")
32
+ math.add_namespace(nil, MATHML_NS)
54
33
  math
55
34
  end
56
35
 
@@ -151,8 +130,8 @@ class Html2Doc
151
130
  def mathml_to_ooml1(xml, docnamespaces)
152
131
  doc = Nokogiri::XML::Document::new
153
132
  doc.root = ooxml_cleanup(xml, docnamespaces)
154
- ooxml = ooml_clean(unitalic(esc_space(accent_tr(@xsltemplate.transform(doc)))))
155
- ooxml = uncenter(xml, ooxml)
133
+ ooxml = unitalic(esc_space(accent_tr(@xsltemplate.transform(doc))))
134
+ ooxml = ooml_clean(uncenter(xml, ooxml))
156
135
  xml.swap(ooxml)
157
136
  end
158
137
 
@@ -184,19 +163,33 @@ class Html2Doc
184
163
  xml
185
164
  end
186
165
 
166
+ OOXML_NS = "http://schemas.microsoft.com/office/2004/12/omml".freeze
167
+
168
+ def math_only_para?(node)
169
+ x = node.dup
170
+ x.xpath(".//m:math", "m" => MATHML_NS).each(&:remove)
171
+ x.xpath(".//m:oMathPara | .//m:oMath", "m" => OOXML_NS).each(&:remove)
172
+ x.text.strip.empty?
173
+ end
174
+
175
+ def math_block?(ooxml, mathml)
176
+ ooxml.name == "oMathPara" || mathml["displaystyle"] == "true"
177
+ end
178
+
179
+ STYLE_BEARING_NODE =
180
+ %w(p div td th li).map { |x| ".//ancestor::#{x}" }.join(" | ").freeze
181
+
187
182
  # if oomml has no siblings, by default it is centered; override this with
188
183
  # left/right if parent is so tagged
184
+ # also if ooml has mathPara already, or is in para with only oMath content
189
185
  def uncenter(math, ooxml)
190
- alignnode = math.at(".//ancestor::*[@style][local-name() = 'p' or "\
191
- "local-name() = 'div' or local-name() = 'td']/@style")
192
- return ooxml unless alignnode && (math.next == nil && math.previous == nil)
193
-
194
- %w(left right).each do |dir|
195
- if alignnode.text.include? ("text-align:#{dir}")
196
- ooxml = "<m:oMathPara><m:oMathParaPr><m:jc "\
197
- "m:val='#{dir}'/></m:oMathParaPr>#{ooxml}</m:oMathPara>"
198
- end
199
- end
200
- ooxml
186
+ alignnode = math.xpath(STYLE_BEARING_NODE).last
187
+ ret = ooxml.root.to_xml(indent: 0)
188
+ (math_block?(ooxml, math) ||
189
+ !alignnode) || !math_only_para?(alignnode) and return ret
190
+ dir = "left"
191
+ alignnode["style"]&.include?("text-align:right") and dir = "right"
192
+ "<oMathPara><oMathParaPr><jc " \
193
+ "m:val='#{dir}'/></oMathParaPr>#{ret}</oMathPara>"
201
194
  end
202
195
  end
@@ -1,3 +1,3 @@
1
1
  class Html2Doc
2
- VERSION = "1.5.5".freeze
2
+ VERSION = "1.6.1".freeze
3
3
  end
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: html2doc
3
3
  version: !ruby/object:Gem::Version
4
- version: 1.5.5
4
+ version: 1.6.1
5
5
  platform: ruby
6
6
  authors:
7
7
  - Ribose Inc.
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2023-06-29 00:00:00.000000000 Z
11
+ date: 2023-08-12 00:00:00.000000000 Z
12
12
  dependencies:
13
13
  - !ruby/object:Gem::Dependency
14
14
  name: htmlentities