html2doc 1.5.5 → 1.6.1
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/html2doc/base.rb +1 -1
- data/lib/html2doc/math.rb +30 -37
- data/lib/html2doc/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b08c4ee810280f83835a9884c17d54a04837195c1d133bedd48f8a103780316
|
4
|
+
data.tar.gz: f47560825288a3297700d048c3fff8c90ca61ba6bcb2b2e8fef3ac9749d9e094
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a94da368aa84ae4abcbcf6c3bc401b349811b32b1d8097b82e96b479cd684d545ac66fc9fb6f41367db85bfeb86590a15bbac7af0e989c41a0e1af0e2c79966
|
7
|
+
data.tar.gz: 4dd0add251285b7c23a82b3b5d709d5421542b96ef0025357672616003d5e31ed35c4c064deb652925666a4a6e33962d6e73d72b137c8cea3592e5d4bb131574
|
data/lib/html2doc/base.rb
CHANGED
data/lib/html2doc/math.rb
CHANGED
@@ -5,29 +5,6 @@ require "nokogiri"
|
|
5
5
|
require "plane1converter"
|
6
6
|
|
7
7
|
class Html2Doc
|
8
|
-
def asciimath_to_mathml1(expr, retain_asciimath)
|
9
|
-
ret = Plurimath::Math.parse(HTMLEntities.new.decode(expr), "asciimath").to_mathml
|
10
|
-
.gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>").strip
|
11
|
-
retain_asciimath and
|
12
|
-
ret += "<asciimath>#{@c.encode(@c.decode(expr), :basic)}</asciimath>"
|
13
|
-
ret
|
14
|
-
rescue StandardError => e
|
15
|
-
puts "parsing: #{expr}"
|
16
|
-
puts e.message
|
17
|
-
raise e
|
18
|
-
end
|
19
|
-
|
20
|
-
def asciimath_to_mathml(doc, delims, retain_asciimath: false)
|
21
|
-
return doc if delims.nil? || delims.size < 2
|
22
|
-
|
23
|
-
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
24
|
-
m.each_slice(4).map.with_index do |(*a), i|
|
25
|
-
progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
|
26
|
-
a[2].nil? or a[2] = asciimath_to_mathml1(a[2], retain_asciimath)
|
27
|
-
a.size > 1 ? a[0] + a[2] : a[0]
|
28
|
-
end.join
|
29
|
-
end
|
30
|
-
|
31
8
|
def progress_conv(idx, step, total, threshold, msg)
|
32
9
|
return unless (idx % step).zero? && total > threshold && idx.positive?
|
33
10
|
|
@@ -43,6 +20,8 @@ class Html2Doc
|
|
43
20
|
doc
|
44
21
|
end
|
45
22
|
|
23
|
+
MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
|
24
|
+
|
46
25
|
# random fixes to MathML input that OOXML needs to render properly
|
47
26
|
def ooxml_cleanup(math, docnamespaces)
|
48
27
|
math = unwrap_accents(
|
@@ -50,7 +29,7 @@ class Html2Doc
|
|
50
29
|
mathml_insert_rows(math, docnamespaces), docnamespaces
|
51
30
|
),
|
52
31
|
)
|
53
|
-
math.add_namespace(nil,
|
32
|
+
math.add_namespace(nil, MATHML_NS)
|
54
33
|
math
|
55
34
|
end
|
56
35
|
|
@@ -151,8 +130,8 @@ class Html2Doc
|
|
151
130
|
def mathml_to_ooml1(xml, docnamespaces)
|
152
131
|
doc = Nokogiri::XML::Document::new
|
153
132
|
doc.root = ooxml_cleanup(xml, docnamespaces)
|
154
|
-
ooxml =
|
155
|
-
ooxml = uncenter(xml, ooxml)
|
133
|
+
ooxml = unitalic(esc_space(accent_tr(@xsltemplate.transform(doc))))
|
134
|
+
ooxml = ooml_clean(uncenter(xml, ooxml))
|
156
135
|
xml.swap(ooxml)
|
157
136
|
end
|
158
137
|
|
@@ -184,19 +163,33 @@ class Html2Doc
|
|
184
163
|
xml
|
185
164
|
end
|
186
165
|
|
166
|
+
OOXML_NS = "http://schemas.microsoft.com/office/2004/12/omml".freeze
|
167
|
+
|
168
|
+
def math_only_para?(node)
|
169
|
+
x = node.dup
|
170
|
+
x.xpath(".//m:math", "m" => MATHML_NS).each(&:remove)
|
171
|
+
x.xpath(".//m:oMathPara | .//m:oMath", "m" => OOXML_NS).each(&:remove)
|
172
|
+
x.text.strip.empty?
|
173
|
+
end
|
174
|
+
|
175
|
+
def math_block?(ooxml, mathml)
|
176
|
+
ooxml.name == "oMathPara" || mathml["displaystyle"] == "true"
|
177
|
+
end
|
178
|
+
|
179
|
+
STYLE_BEARING_NODE =
|
180
|
+
%w(p div td th li).map { |x| ".//ancestor::#{x}" }.join(" | ").freeze
|
181
|
+
|
187
182
|
# if oomml has no siblings, by default it is centered; override this with
|
188
183
|
# left/right if parent is so tagged
|
184
|
+
# also if ooml has mathPara already, or is in para with only oMath content
|
189
185
|
def uncenter(math, ooxml)
|
190
|
-
alignnode = math.
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
end
|
199
|
-
end
|
200
|
-
ooxml
|
186
|
+
alignnode = math.xpath(STYLE_BEARING_NODE).last
|
187
|
+
ret = ooxml.root.to_xml(indent: 0)
|
188
|
+
(math_block?(ooxml, math) ||
|
189
|
+
!alignnode) || !math_only_para?(alignnode) and return ret
|
190
|
+
dir = "left"
|
191
|
+
alignnode["style"]&.include?("text-align:right") and dir = "right"
|
192
|
+
"<oMathPara><oMathParaPr><jc " \
|
193
|
+
"m:val='#{dir}'/></oMathParaPr>#{ret}</oMathPara>"
|
201
194
|
end
|
202
195
|
end
|
data/lib/html2doc/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|