html2doc 1.5.5 → 1.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/html2doc/base.rb +1 -1
- data/lib/html2doc/math.rb +30 -37
- data/lib/html2doc/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 6b08c4ee810280f83835a9884c17d54a04837195c1d133bedd48f8a103780316
|
4
|
+
data.tar.gz: f47560825288a3297700d048c3fff8c90ca61ba6bcb2b2e8fef3ac9749d9e094
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 5a94da368aa84ae4abcbcf6c3bc401b349811b32b1d8097b82e96b479cd684d545ac66fc9fb6f41367db85bfeb86590a15bbac7af0e989c41a0e1af0e2c79966
|
7
|
+
data.tar.gz: 4dd0add251285b7c23a82b3b5d709d5421542b96ef0025357672616003d5e31ed35c4c064deb652925666a4a6e33962d6e73d72b137c8cea3592e5d4bb131574
|
data/lib/html2doc/base.rb
CHANGED
data/lib/html2doc/math.rb
CHANGED
@@ -5,29 +5,6 @@ require "nokogiri"
|
|
5
5
|
require "plane1converter"
|
6
6
|
|
7
7
|
class Html2Doc
|
8
|
-
def asciimath_to_mathml1(expr, retain_asciimath)
|
9
|
-
ret = Plurimath::Math.parse(HTMLEntities.new.decode(expr), "asciimath").to_mathml
|
10
|
-
.gsub(/<math>/, "<math xmlns='http://www.w3.org/1998/Math/MathML'>").strip
|
11
|
-
retain_asciimath and
|
12
|
-
ret += "<asciimath>#{@c.encode(@c.decode(expr), :basic)}</asciimath>"
|
13
|
-
ret
|
14
|
-
rescue StandardError => e
|
15
|
-
puts "parsing: #{expr}"
|
16
|
-
puts e.message
|
17
|
-
raise e
|
18
|
-
end
|
19
|
-
|
20
|
-
def asciimath_to_mathml(doc, delims, retain_asciimath: false)
|
21
|
-
return doc if delims.nil? || delims.size < 2
|
22
|
-
|
23
|
-
m = doc.split(/(#{Regexp.escape(delims[0])}|#{Regexp.escape(delims[1])})/)
|
24
|
-
m.each_slice(4).map.with_index do |(*a), i|
|
25
|
-
progress_conv(i, 500, (m.size / 4).floor, 1000, "AsciiMath")
|
26
|
-
a[2].nil? or a[2] = asciimath_to_mathml1(a[2], retain_asciimath)
|
27
|
-
a.size > 1 ? a[0] + a[2] : a[0]
|
28
|
-
end.join
|
29
|
-
end
|
30
|
-
|
31
8
|
def progress_conv(idx, step, total, threshold, msg)
|
32
9
|
return unless (idx % step).zero? && total > threshold && idx.positive?
|
33
10
|
|
@@ -43,6 +20,8 @@ class Html2Doc
|
|
43
20
|
doc
|
44
21
|
end
|
45
22
|
|
23
|
+
MATHML_NS = "http://www.w3.org/1998/Math/MathML".freeze
|
24
|
+
|
46
25
|
# random fixes to MathML input that OOXML needs to render properly
|
47
26
|
def ooxml_cleanup(math, docnamespaces)
|
48
27
|
math = unwrap_accents(
|
@@ -50,7 +29,7 @@ class Html2Doc
|
|
50
29
|
mathml_insert_rows(math, docnamespaces), docnamespaces
|
51
30
|
),
|
52
31
|
)
|
53
|
-
math.add_namespace(nil,
|
32
|
+
math.add_namespace(nil, MATHML_NS)
|
54
33
|
math
|
55
34
|
end
|
56
35
|
|
@@ -151,8 +130,8 @@ class Html2Doc
|
|
151
130
|
def mathml_to_ooml1(xml, docnamespaces)
|
152
131
|
doc = Nokogiri::XML::Document::new
|
153
132
|
doc.root = ooxml_cleanup(xml, docnamespaces)
|
154
|
-
ooxml =
|
155
|
-
ooxml = uncenter(xml, ooxml)
|
133
|
+
ooxml = unitalic(esc_space(accent_tr(@xsltemplate.transform(doc))))
|
134
|
+
ooxml = ooml_clean(uncenter(xml, ooxml))
|
156
135
|
xml.swap(ooxml)
|
157
136
|
end
|
158
137
|
|
@@ -184,19 +163,33 @@ class Html2Doc
|
|
184
163
|
xml
|
185
164
|
end
|
186
165
|
|
166
|
+
OOXML_NS = "http://schemas.microsoft.com/office/2004/12/omml".freeze
|
167
|
+
|
168
|
+
def math_only_para?(node)
|
169
|
+
x = node.dup
|
170
|
+
x.xpath(".//m:math", "m" => MATHML_NS).each(&:remove)
|
171
|
+
x.xpath(".//m:oMathPara | .//m:oMath", "m" => OOXML_NS).each(&:remove)
|
172
|
+
x.text.strip.empty?
|
173
|
+
end
|
174
|
+
|
175
|
+
def math_block?(ooxml, mathml)
|
176
|
+
ooxml.name == "oMathPara" || mathml["displaystyle"] == "true"
|
177
|
+
end
|
178
|
+
|
179
|
+
STYLE_BEARING_NODE =
|
180
|
+
%w(p div td th li).map { |x| ".//ancestor::#{x}" }.join(" | ").freeze
|
181
|
+
|
187
182
|
# if oomml has no siblings, by default it is centered; override this with
|
188
183
|
# left/right if parent is so tagged
|
184
|
+
# also if ooml has mathPara already, or is in para with only oMath content
|
189
185
|
def uncenter(math, ooxml)
|
190
|
-
alignnode = math.
|
191
|
-
|
192
|
-
|
193
|
-
|
194
|
-
|
195
|
-
|
196
|
-
|
197
|
-
|
198
|
-
end
|
199
|
-
end
|
200
|
-
ooxml
|
186
|
+
alignnode = math.xpath(STYLE_BEARING_NODE).last
|
187
|
+
ret = ooxml.root.to_xml(indent: 0)
|
188
|
+
(math_block?(ooxml, math) ||
|
189
|
+
!alignnode) || !math_only_para?(alignnode) and return ret
|
190
|
+
dir = "left"
|
191
|
+
alignnode["style"]&.include?("text-align:right") and dir = "right"
|
192
|
+
"<oMathPara><oMathParaPr><jc " \
|
193
|
+
"m:val='#{dir}'/></oMathParaPr>#{ret}</oMathPara>"
|
201
194
|
end
|
202
195
|
end
|
data/lib/html2doc/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: html2doc
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 1.
|
4
|
+
version: 1.6.1
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Ribose Inc.
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2023-
|
11
|
+
date: 2023-08-12 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: htmlentities
|