plurimath 0.4.4 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/.rspec-opal +11 -0
- data/Gemfile +3 -0
- data/Rakefile +11 -0
- data/lib/plurimath/asciimath/transform.rb +15 -0
- data/lib/plurimath/latex/constants.rb +3 -0
- data/lib/plurimath/latex/parse.rb +20 -11
- data/lib/plurimath/latex/transform.rb +24 -2
- data/lib/plurimath/math/core.rb +88 -0
- data/lib/plurimath/math/formula.rb +68 -24
- data/lib/plurimath/math/function/base.rb +8 -2
- data/lib/plurimath/math/function/binary_function.rb +36 -4
- data/lib/plurimath/math/function/color.rb +14 -0
- data/lib/plurimath/math/function/fenced.rb +27 -0
- data/lib/plurimath/math/function/floor.rb +1 -1
- data/lib/plurimath/math/function/font_style.rb +46 -0
- data/lib/plurimath/math/function/frac.rb +6 -0
- data/lib/plurimath/math/function/int.rb +7 -0
- data/lib/plurimath/math/function/left.rb +19 -1
- data/lib/plurimath/math/function/lim.rb +6 -0
- data/lib/plurimath/math/function/limits.rb +7 -0
- data/lib/plurimath/math/function/log.rb +6 -0
- data/lib/plurimath/math/function/menclose.rb +6 -0
- data/lib/plurimath/math/function/mod.rb +6 -0
- data/lib/plurimath/math/function/msgroup.rb +28 -0
- data/lib/plurimath/math/function/multiscript.rb +7 -0
- data/lib/plurimath/math/function/nary.rb +94 -0
- data/lib/plurimath/math/function/oint.rb +6 -0
- data/lib/plurimath/math/function/over.rb +6 -0
- data/lib/plurimath/math/function/overset.rb +6 -0
- data/lib/plurimath/math/function/power.rb +8 -2
- data/lib/plurimath/math/function/power_base.rb +10 -31
- data/lib/plurimath/math/function/prod.rb +19 -18
- data/lib/plurimath/math/function/right.rb +19 -1
- data/lib/plurimath/math/function/root.rb +6 -0
- data/lib/plurimath/math/function/rule.rb +7 -0
- data/lib/plurimath/math/function/semantics.rb +6 -0
- data/lib/plurimath/math/function/stackrel.rb +6 -0
- data/lib/plurimath/math/function/substack.rb +6 -0
- data/lib/plurimath/math/function/sum.rb +26 -25
- data/lib/plurimath/math/function/table.rb +52 -24
- data/lib/plurimath/math/function/td.rb +28 -0
- data/lib/plurimath/math/function/ternary_function.rb +44 -4
- data/lib/plurimath/math/function/text.rb +25 -3
- data/lib/plurimath/math/function/tr.rb +28 -0
- data/lib/plurimath/math/function/unary_function.rb +43 -3
- data/lib/plurimath/math/function/underover.rb +7 -55
- data/lib/plurimath/math/function/underset.rb +6 -0
- data/lib/plurimath/math/function/vec.rb +40 -0
- data/lib/plurimath/math/function.rb +7 -5
- data/lib/plurimath/math/number.rb +9 -5
- data/lib/plurimath/math/symbol.rb +13 -9
- data/lib/plurimath/math.rb +1 -3
- data/lib/plurimath/mathml/parser.rb +4 -4
- data/lib/plurimath/mathml/transform.rb +3 -4
- data/lib/plurimath/omml/parser.rb +19 -3
- data/lib/plurimath/omml/transform.rb +19 -14
- data/lib/plurimath/setup/oga.rb +5 -0
- data/lib/plurimath/setup/opal.rb.erb +8 -0
- data/lib/plurimath/setup/ox.rb +5 -0
- data/lib/plurimath/utility.rb +60 -34
- data/lib/plurimath/version.rb +1 -1
- data/lib/plurimath/xml_engine/oga.rb +246 -0
- data/lib/plurimath/xml_engine/ox.rb +29 -0
- data/lib/plurimath/xml_engine.rb +6 -0
- data/lib/plurimath.rb +12 -2
- metadata +11 -2
data/lib/plurimath/math.rb
CHANGED
@@ -19,9 +19,7 @@ require_relative "latex/parser"
|
|
19
19
|
require_relative "html/parser"
|
20
20
|
require_relative "omml/parser"
|
21
21
|
require_relative "utility"
|
22
|
-
require "ox"
|
23
22
|
require "yaml"
|
24
|
-
Ox.default_options = { encoding: "UTF-8" }
|
25
23
|
|
26
24
|
module Plurimath
|
27
25
|
module Math
|
@@ -72,7 +70,7 @@ module Plurimath
|
|
72
70
|
|
73
71
|
def type_error!
|
74
72
|
raise InvalidTypeError.new(
|
75
|
-
"`type` must be one of: `#{VALID_TYPES.keys.join('`, `')}`"
|
73
|
+
"`type` must be one of: `#{VALID_TYPES.keys.join('`, `')}`",
|
76
74
|
)
|
77
75
|
end
|
78
76
|
|
@@ -23,8 +23,8 @@ module Plurimath
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse
|
26
|
-
ox_nodes =
|
27
|
-
display_style = ox_nodes&.locate("
|
26
|
+
ox_nodes = Plurimath.xml_engine.load(text)
|
27
|
+
display_style = ox_nodes&.locate("mstyle/@displaystyle")&.first
|
28
28
|
nodes = parse_nodes(ox_nodes.nodes)
|
29
29
|
Math::Formula.new(
|
30
30
|
Transform.new.apply(nodes).flatten.compact,
|
@@ -34,7 +34,7 @@ module Plurimath
|
|
34
34
|
|
35
35
|
def parse_nodes(nodes)
|
36
36
|
nodes.map do |node|
|
37
|
-
next if
|
37
|
+
next if Plurimath.xml_engine.is_xml_comment?(node)
|
38
38
|
|
39
39
|
if node.is_a?(String)
|
40
40
|
node
|
@@ -47,7 +47,7 @@ module Plurimath
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def validate_attributes(attributes)
|
50
|
-
attributes&.select! { |key, _| SUPPORTED_ATTRS.include?(key
|
50
|
+
attributes&.select! { |key, _| SUPPORTED_ATTRS.include?(key.to_s) }
|
51
51
|
attributes&.transform_keys(&:to_sym) if attributes&.any?
|
52
52
|
end
|
53
53
|
|
@@ -88,8 +88,7 @@ module Plurimath
|
|
88
88
|
|
89
89
|
rule(merror: sequence(:merror)) do
|
90
90
|
Math::Function::Merror.new(
|
91
|
-
merror
|
92
|
-
merror[1],
|
91
|
+
Utility.filter_values(merror),
|
93
92
|
)
|
94
93
|
end
|
95
94
|
|
@@ -247,7 +246,7 @@ module Plurimath
|
|
247
246
|
symbols = Constants::UNICODE_SYMBOLS.transform_keys(&:to_s)
|
248
247
|
text = entities.encode(mtext.flatten.join, :hexadecimal)
|
249
248
|
symbols.each do |code, string|
|
250
|
-
text.gsub
|
249
|
+
text = text.gsub(code.downcase, "unicode[:#{string}]")
|
251
250
|
end
|
252
251
|
Math::Function::Text.new(text)
|
253
252
|
end
|
@@ -257,7 +256,7 @@ module Plurimath
|
|
257
256
|
symbols = Constants::UNICODE_SYMBOLS.transform_keys(&:to_s)
|
258
257
|
text = entities.encode(ms.first, :hexadecimal)
|
259
258
|
symbols.each do |code, string|
|
260
|
-
text.gsub
|
259
|
+
text = text.gsub(code.downcase, "unicode[:#{string}]")
|
261
260
|
end
|
262
261
|
Math::Function::Text.new(text)
|
263
262
|
end
|
@@ -11,13 +11,29 @@ module Plurimath
|
|
11
11
|
mr
|
12
12
|
r
|
13
13
|
].freeze
|
14
|
+
SUPPORTED_FONTS = {
|
15
|
+
"sans-serif-bi": "sans-serif-bold-italic",
|
16
|
+
"double-struck": "double-struck",
|
17
|
+
"sans-serif-i": "sans-serif-italic",
|
18
|
+
"sans-serif-b": "bold-sans-serif",
|
19
|
+
"sans-serif-p": "sans-serif",
|
20
|
+
"fraktur-p": "fraktur",
|
21
|
+
"fraktur-b": "bold-fraktur",
|
22
|
+
"script-b": "bold-script",
|
23
|
+
"script-p": "script",
|
24
|
+
monospace: "monospace",
|
25
|
+
bi: "bold-italic",
|
26
|
+
p: "normal",
|
27
|
+
i: "italic",
|
28
|
+
b: "bold",
|
29
|
+
}.freeze
|
14
30
|
|
15
31
|
def initialize(text)
|
16
32
|
@text = text
|
17
33
|
end
|
18
34
|
|
19
35
|
def parse
|
20
|
-
nodes =
|
36
|
+
nodes = Plurimath.xml_engine.load(text)
|
21
37
|
@hash = { sequence: parse_nodes(nodes.nodes) }
|
22
38
|
nodes = JSON.parse(@hash.to_json, symbolize_names: true)
|
23
39
|
Math::Formula.new(
|
@@ -55,14 +71,14 @@ module Plurimath
|
|
55
71
|
end
|
56
72
|
|
57
73
|
def organize_table_td(node)
|
58
|
-
node.locate("e
|
74
|
+
node.locate("e/*").each do |child_node|
|
59
75
|
child_node.name = "mtd" if child_node.name == "r"
|
60
76
|
end
|
61
77
|
end
|
62
78
|
|
63
79
|
def organize_fonts(node)
|
64
80
|
attrs_arr = { val: [] }
|
65
|
-
node.locate("rPr
|
81
|
+
node.locate("rPr/*").each do |child|
|
66
82
|
attrs_arr[:val] << child.attributes["val"]
|
67
83
|
end
|
68
84
|
node.attributes.merge! attrs_arr
|
@@ -72,7 +72,7 @@ module Plurimath
|
|
72
72
|
font = flatten_row.shift
|
73
73
|
font.new(
|
74
74
|
Utility.filter_values(flatten_row),
|
75
|
-
Utility::
|
75
|
+
Utility::FONT_STYLES.key(font).to_s,
|
76
76
|
)
|
77
77
|
else
|
78
78
|
Utility.filter_values(flatten_row)
|
@@ -98,11 +98,17 @@ module Plurimath
|
|
98
98
|
open_paren = fenced.shift if fenced&.first&.class_name == "symbol"
|
99
99
|
close_paren = fenced.shift if fenced&.first&.class_name == "symbol"
|
100
100
|
fenced_value = fenced.compact
|
101
|
-
Math::Function::
|
102
|
-
open_paren
|
103
|
-
fenced_value
|
104
|
-
|
105
|
-
|
101
|
+
if fenced_value.length == 1 && fenced_value.first.is_a?(Math::Function::Table)
|
102
|
+
fenced_value.first.open_paren = open_paren&.value
|
103
|
+
fenced_value.first.close_paren = close_paren&.value
|
104
|
+
fenced_value
|
105
|
+
else
|
106
|
+
Math::Function::Fenced.new(
|
107
|
+
open_paren,
|
108
|
+
fenced_value,
|
109
|
+
close_paren,
|
110
|
+
)
|
111
|
+
end
|
106
112
|
end
|
107
113
|
|
108
114
|
rule(dPr: subtree(:dpr)) do
|
@@ -116,7 +122,7 @@ module Plurimath
|
|
116
122
|
font = flatten_mtd.shift
|
117
123
|
font.new(
|
118
124
|
Utility.filter_values(flatten_mtd),
|
119
|
-
Utility::
|
125
|
+
Utility::FONT_STYLES.rassoc(font).first.to_s,
|
120
126
|
)
|
121
127
|
else
|
122
128
|
flatten_mtd
|
@@ -133,7 +139,11 @@ module Plurimath
|
|
133
139
|
|
134
140
|
rule(rPr: subtree(:rpr)) do
|
135
141
|
if rpr.is_a?(Array)
|
136
|
-
Utility::
|
142
|
+
Utility::FONT_STYLES[
|
143
|
+
Omml::Parser::SUPPORTED_FONTS[
|
144
|
+
rpr&.join("-")&.to_sym,
|
145
|
+
]&.to_sym,
|
146
|
+
]
|
137
147
|
end
|
138
148
|
end
|
139
149
|
|
@@ -172,12 +182,7 @@ module Plurimath
|
|
172
182
|
ternary_class.parameter_three = Utility.filter_values(nary[3])
|
173
183
|
ternary_class
|
174
184
|
else
|
175
|
-
|
176
|
-
[
|
177
|
-
Utility.nary_fonts(nary),
|
178
|
-
Utility.filter_values(nary[3]),
|
179
|
-
],
|
180
|
-
)
|
185
|
+
Utility.nary_fonts(nary)
|
181
186
|
end
|
182
187
|
end
|
183
188
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require 'plurimath/math/core'
|
2
|
+
require 'plurimath/math/function'
|
3
|
+
<% (
|
4
|
+
Dir[File.dirname(__dir__)+"/math/function/*.rb"] +
|
5
|
+
Dir[File.dirname(__dir__)+"/math/function/**/*.rb"]
|
6
|
+
).each do |f| %>
|
7
|
+
require 'plurimath/<%= f.split("lib/plurimath").last.gsub(/.rb$/,'') %>'
|
8
|
+
<% end %>
|
data/lib/plurimath/utility.rb
CHANGED
@@ -4,24 +4,30 @@ module Plurimath
|
|
4
4
|
class Utility
|
5
5
|
FONT_STYLES = {
|
6
6
|
"double-struck": Math::Function::FontStyle::DoubleStruck,
|
7
|
+
"sans-serif-bold-italic": Math::Function::FontStyle::SansSerifBoldItalic,
|
8
|
+
"sans-serif-italic": Math::Function::FontStyle::SansSerifItalic,
|
9
|
+
"bold-sans-serif": Math::Function::FontStyle::BoldSansSerif,
|
7
10
|
"sans-serif": Math::Function::FontStyle::SansSerif,
|
11
|
+
"bold-fraktur": Math::Function::FontStyle::BoldFraktur,
|
12
|
+
"bold-italic": Math::Function::FontStyle::BoldItalic,
|
13
|
+
"bold-script": Math::Function::FontStyle::BoldScript,
|
8
14
|
monospace: Math::Function::FontStyle::Monospace,
|
9
|
-
fraktur: Math::Function::FontStyle::Fraktur,
|
10
|
-
script: Math::Function::FontStyle::Script,
|
11
|
-
normal: Math::Function::FontStyle::Normal,
|
12
|
-
bold: Math::Function::FontStyle::Bold,
|
13
15
|
mathfrak: Math::Function::FontStyle::Fraktur,
|
14
16
|
mathcal: Math::Function::FontStyle::Script,
|
17
|
+
fraktur: Math::Function::FontStyle::Fraktur,
|
15
18
|
mathbb: Math::Function::FontStyle::DoubleStruck,
|
16
19
|
mathtt: Math::Function::FontStyle::Monospace,
|
17
20
|
mathsf: Math::Function::FontStyle::SansSerif,
|
18
21
|
mathrm: Math::Function::FontStyle::Normal,
|
19
22
|
textrm: Math::Function::FontStyle::Normal,
|
23
|
+
italic: Math::Function::FontStyle::Italic,
|
20
24
|
mathbf: Math::Function::FontStyle::Bold,
|
21
25
|
textbf: Math::Function::FontStyle::Bold,
|
26
|
+
script: Math::Function::FontStyle::Script,
|
27
|
+
normal: Math::Function::FontStyle::Normal,
|
28
|
+
bold: Math::Function::FontStyle::Bold,
|
22
29
|
bbb: Math::Function::FontStyle::DoubleStruck,
|
23
30
|
cal: Math::Function::FontStyle::Script,
|
24
|
-
bf: Math::Function::FontStyle::Bold,
|
25
31
|
sf: Math::Function::FontStyle::SansSerif,
|
26
32
|
tt: Math::Function::FontStyle::Monospace,
|
27
33
|
fr: Math::Function::FontStyle::Fraktur,
|
@@ -29,6 +35,7 @@ module Plurimath
|
|
29
35
|
cc: Math::Function::FontStyle::Script,
|
30
36
|
ii: Math::Function::FontStyle::Italic,
|
31
37
|
bb: Math::Function::FontStyle::Bold,
|
38
|
+
bf: Math::Function::FontStyle::Bold,
|
32
39
|
}.freeze
|
33
40
|
ALIGNMENT_LETTERS = {
|
34
41
|
c: "center",
|
@@ -72,22 +79,6 @@ module Plurimath
|
|
72
79
|
max
|
73
80
|
min
|
74
81
|
].freeze
|
75
|
-
OMML_FONTS = {
|
76
|
-
"sans-serif-bi": Math::Function::FontStyle::SansSerifBoldItalic,
|
77
|
-
"sans-serif-i": Math::Function::FontStyle::SansSerifItalic,
|
78
|
-
"sans-serif-b": Math::Function::FontStyle::BoldSansSerif,
|
79
|
-
"double-struck": Math::Function::FontStyle::DoubleStruck,
|
80
|
-
"sans-serif-p": Math::Function::FontStyle::SansSerif,
|
81
|
-
"fraktur-p": Math::Function::FontStyle::Fraktur,
|
82
|
-
"fraktur-b": Math::Function::FontStyle::BoldFraktur,
|
83
|
-
"script-b": Math::Function::FontStyle::BoldScript,
|
84
|
-
"script-p": Math::Function::FontStyle::Script,
|
85
|
-
monospace: Math::Function::FontStyle::Monospace,
|
86
|
-
bi: Math::Function::FontStyle::BoldItalic,
|
87
|
-
p: Math::Function::FontStyle::Normal,
|
88
|
-
i: Math::Function::FontStyle::Italic,
|
89
|
-
b: Math::Function::FontStyle::Bold,
|
90
|
-
}.freeze
|
91
82
|
PARENTHESIS = {
|
92
83
|
"〈": "〉",
|
93
84
|
"⌊": "⌋",
|
@@ -100,6 +91,12 @@ module Plurimath
|
|
100
91
|
"{": "}",
|
101
92
|
"[": "]",
|
102
93
|
}.freeze
|
94
|
+
TEXT_CLASSES = %w[
|
95
|
+
unicode
|
96
|
+
symbol
|
97
|
+
number
|
98
|
+
text
|
99
|
+
].freeze
|
103
100
|
|
104
101
|
class << self
|
105
102
|
def organize_table(array, column_align: nil, options: nil)
|
@@ -196,7 +193,7 @@ module Plurimath
|
|
196
193
|
def ox_element(node, attributes: [], namespace: "")
|
197
194
|
namespace = "#{namespace}:" unless namespace.empty?
|
198
195
|
|
199
|
-
element =
|
196
|
+
element = Plurimath.xml_engine.new_element("#{namespace}#{node}")
|
200
197
|
attributes&.each do |attr_key, attr_value|
|
201
198
|
element[attr_key] = attr_value
|
202
199
|
end
|
@@ -258,12 +255,14 @@ module Plurimath
|
|
258
255
|
|
259
256
|
def nary_fonts(nary)
|
260
257
|
narypr = nary.first.flatten.compact
|
261
|
-
subsup = narypr.any?("undOvr") ? "
|
258
|
+
subsup = narypr.any?("undOvr") ? "undOvr" : "subSup"
|
262
259
|
unicode = narypr.any?(Hash) ? narypr.first[:chr] : "∫"
|
263
|
-
|
260
|
+
Math::Function::Nary.new(
|
264
261
|
Math::Symbol.new(string_to_html_entity(unicode)),
|
265
|
-
nary[1],
|
266
|
-
nary[2],
|
262
|
+
filter_values(nary[1]),
|
263
|
+
filter_values(nary[2]),
|
264
|
+
filter_values(nary[3]),
|
265
|
+
{ type: subsup }
|
267
266
|
)
|
268
267
|
end
|
269
268
|
|
@@ -292,7 +291,8 @@ module Plurimath
|
|
292
291
|
end
|
293
292
|
|
294
293
|
def td_value(td_object)
|
295
|
-
|
294
|
+
str_classes = [String, Parslet::Slice]
|
295
|
+
if str_classes.include?(td_object.class) && td_object.to_s.empty?
|
296
296
|
return Math::Function::Text.new(nil)
|
297
297
|
end
|
298
298
|
|
@@ -461,7 +461,7 @@ module Plurimath
|
|
461
461
|
end
|
462
462
|
|
463
463
|
def left_right_objects(paren, function)
|
464
|
-
paren = if paren.to_s.match?(
|
464
|
+
paren = if paren.to_s.match?(/\\\{|\\\}/)
|
465
465
|
paren.to_s.gsub(/\\/, "")
|
466
466
|
else
|
467
467
|
Latex::Constants::LEFT_RIGHT_PARENTHESIS[paren.to_sym]
|
@@ -478,12 +478,7 @@ module Plurimath
|
|
478
478
|
def mrow_left_right(mrow = [])
|
479
479
|
object = mrow.first
|
480
480
|
!(
|
481
|
-
(
|
482
|
-
(
|
483
|
-
object.is_a?(Math::Function::TernaryFunction) && object.any_value_exist?
|
484
|
-
) &&
|
485
|
-
(mrow.length <= 2)
|
486
|
-
) ||
|
481
|
+
((object.is_a?(Math::Function::TernaryFunction) && object.any_value_exist?) && (mrow.length <= 2)) ||
|
487
482
|
(object.is_a?(Math::Function::UnaryFunction) && mrow.length == 1)
|
488
483
|
)
|
489
484
|
end
|
@@ -573,6 +568,37 @@ module Plurimath
|
|
573
568
|
end
|
574
569
|
end
|
575
570
|
end
|
571
|
+
|
572
|
+
def validate_math_zone(object)
|
573
|
+
return false unless object
|
574
|
+
|
575
|
+
if object.is_a?(Math::Formula)
|
576
|
+
filter_math_zone_values(object.value).find do |d|
|
577
|
+
!d.is_a?(Math::Function::Text)
|
578
|
+
end
|
579
|
+
else
|
580
|
+
!TEXT_CLASSES.include?(object.class_name)
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
def filter_math_zone_values(value)
|
585
|
+
return [] if value&.empty?
|
586
|
+
|
587
|
+
new_arr = []
|
588
|
+
temp_array = []
|
589
|
+
skip_index = nil
|
590
|
+
value.each_with_index do |obj, index|
|
591
|
+
object = obj.dup
|
592
|
+
next if index == skip_index
|
593
|
+
next temp_array << object.value if TEXT_CLASSES.include?(object.class_name)
|
594
|
+
|
595
|
+
new_arr << Math::Function::Text.new(temp_array.join(" ")) if temp_array.any?
|
596
|
+
temp_array = []
|
597
|
+
new_arr << object
|
598
|
+
end
|
599
|
+
new_arr << Math::Function::Text.new(temp_array.join(" ")) if temp_array.any?
|
600
|
+
new_arr
|
601
|
+
end
|
576
602
|
end
|
577
603
|
end
|
578
604
|
end
|
data/lib/plurimath/version.rb
CHANGED
@@ -0,0 +1,246 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "plurimath/xml_engine"
|
4
|
+
require "corelib/array/pack" if RUBY_ENGINE == "opal"
|
5
|
+
require "oga"
|
6
|
+
|
7
|
+
module Plurimath
|
8
|
+
module XMLEngine
|
9
|
+
class Oga
|
10
|
+
class << self
|
11
|
+
def new_element(name)
|
12
|
+
data = ::Oga::XML::Element.new(name: name)
|
13
|
+
Node.new(data)
|
14
|
+
end
|
15
|
+
|
16
|
+
def dump(data, indent: nil)
|
17
|
+
Dumper.new(data, indent: indent).dump.out
|
18
|
+
end
|
19
|
+
|
20
|
+
def load(data)
|
21
|
+
data = ::Oga::XML::Parser.new(data, html: true).parse
|
22
|
+
if data.xml_declaration
|
23
|
+
Document.new(data)
|
24
|
+
else
|
25
|
+
Document.new(data).nodes.first
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def is_xml_comment?(node)
|
30
|
+
node = node.unwrap if node.respond_to? :unwrap
|
31
|
+
node.is_a?(Comment)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Create API compatible with Ox, per Plurimath usage
|
36
|
+
class Wrapper
|
37
|
+
def initialize(value)
|
38
|
+
@wrapped = value
|
39
|
+
end
|
40
|
+
|
41
|
+
def unwrap
|
42
|
+
@wrapped
|
43
|
+
end
|
44
|
+
|
45
|
+
def ==(other)
|
46
|
+
self.class == other.class &&
|
47
|
+
@wrapped.inspect == other.unwrap.inspect
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Node < Wrapper
|
52
|
+
# Ox removes text nodes that are whitespace-only.
|
53
|
+
# There exists a weird edge case on which Plurimath depends:
|
54
|
+
# <mi> <!-- xxx --> π<!--GREEK SMALL LETTER PI--> </mi>
|
55
|
+
# If the last text node of an element that does not contain other
|
56
|
+
# elements is a whitespace, it preserves it. The first one can be
|
57
|
+
# safely removed.
|
58
|
+
def nodes
|
59
|
+
children = @wrapped.children
|
60
|
+
length = children.length
|
61
|
+
preserve_last = true
|
62
|
+
children.map.with_index do |i,idx|
|
63
|
+
if preserve_last && idx == length-1 && i.is_a?(::Oga::XML::Text)
|
64
|
+
i.text
|
65
|
+
elsif i.is_a? ::Oga::XML::Text
|
66
|
+
remove_indentation(i)
|
67
|
+
elsif i.is_a? ::Oga::XML::Comment
|
68
|
+
Node.new(i)
|
69
|
+
else
|
70
|
+
preserve_last = false
|
71
|
+
Node.new(i)
|
72
|
+
end
|
73
|
+
end.compact
|
74
|
+
end
|
75
|
+
|
76
|
+
def [](attr)
|
77
|
+
attr = attr.to_s
|
78
|
+
|
79
|
+
@wrapped.attributes.each do |e|
|
80
|
+
return e.value if [e.name, e.name.split(":").last].include? attr
|
81
|
+
end
|
82
|
+
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
|
86
|
+
def []=(attr, value)
|
87
|
+
# Here we tap into the internal representation due to some likely
|
88
|
+
# bug in Oga
|
89
|
+
attr = ::Oga::XML::Attribute.new(name: attr.to_s)
|
90
|
+
attr.element = @wrapped
|
91
|
+
attr.instance_variable_set(:@value, value.to_s)
|
92
|
+
attr.instance_variable_set(:@decoded, true)
|
93
|
+
@wrapped.attributes << attr
|
94
|
+
end
|
95
|
+
|
96
|
+
def <<(other)
|
97
|
+
other = other.unwrap if other.respond_to? :unwrap
|
98
|
+
|
99
|
+
case other
|
100
|
+
when String
|
101
|
+
text = other
|
102
|
+
# Here we tap into the internal representation due to some likely
|
103
|
+
# bug in Oga
|
104
|
+
other = ::Oga::XML::Text.new
|
105
|
+
other.instance_variable_set(:@from_plurimath, true)
|
106
|
+
other.instance_variable_set(:@text, text)
|
107
|
+
other.instance_variable_set(:@decoded, true)
|
108
|
+
end
|
109
|
+
|
110
|
+
@wrapped.children << other.dup
|
111
|
+
self
|
112
|
+
end
|
113
|
+
|
114
|
+
def attributes
|
115
|
+
@wrapped.attributes.to_h do |e|
|
116
|
+
[e.name.split(":").last, e.value]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def locate(xpath)
|
121
|
+
@wrapped.xpath(xpath).map do |i|
|
122
|
+
case i
|
123
|
+
when ::Oga::XML::Text
|
124
|
+
i.text
|
125
|
+
when ::Oga::XML::Attribute
|
126
|
+
i.value
|
127
|
+
else
|
128
|
+
Node.new(i)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def name
|
134
|
+
@wrapped.name
|
135
|
+
end
|
136
|
+
|
137
|
+
def name=(new_name)
|
138
|
+
@wrapped.name = new_name
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def remove_indentation(text)
|
144
|
+
from_us = text.instance_variable_get(:@from_plurimath)
|
145
|
+
!from_us && text.text.strip == "" ? nil : text.text
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
class Document < Node
|
150
|
+
end
|
151
|
+
|
152
|
+
Comment = ::Oga::XML::Comment
|
153
|
+
|
154
|
+
# Dump the tree just as if we were Ox. This is a limited implementation.
|
155
|
+
class Dumper
|
156
|
+
def initialize(tree, indent: nil)
|
157
|
+
@tree = tree
|
158
|
+
@indent = indent
|
159
|
+
@depth = 0
|
160
|
+
@out = ""
|
161
|
+
end
|
162
|
+
|
163
|
+
def dump(node = @tree)
|
164
|
+
case node
|
165
|
+
when Node
|
166
|
+
nodes = node.nodes
|
167
|
+
if nodes.length == 0
|
168
|
+
line_break
|
169
|
+
@out += "<#{node.unwrap.name}#{dump_attrs(node)}/>"
|
170
|
+
else
|
171
|
+
line_break
|
172
|
+
@out += "<#{node.unwrap.name}#{dump_attrs(node)}>"
|
173
|
+
@depth += 1
|
174
|
+
nodes.each { |i| dump(i) }
|
175
|
+
@depth -= 1
|
176
|
+
line_break unless nodes.last.is_a?(::String)
|
177
|
+
@out += "</#{node.unwrap.name}>"
|
178
|
+
end
|
179
|
+
when ::String
|
180
|
+
@out += entities(node)
|
181
|
+
end
|
182
|
+
|
183
|
+
line_break if node.object_id == @tree.object_id
|
184
|
+
|
185
|
+
self
|
186
|
+
end
|
187
|
+
|
188
|
+
attr_reader :out
|
189
|
+
|
190
|
+
ORD_AMP="&".ord
|
191
|
+
ORD_LT="<".ord
|
192
|
+
ORD_GT=">".ord
|
193
|
+
ORD_APOS="'".ord
|
194
|
+
ORD_QUOT='"'.ord
|
195
|
+
ORD_NEWLINE="\n".ord
|
196
|
+
ORD_CARRIAGERETURN="\r".ord
|
197
|
+
|
198
|
+
def self.entities(text,attr=false)
|
199
|
+
text.to_s.chars.map(&:ord).map do |i|
|
200
|
+
if i == ORD_AMP
|
201
|
+
"&"
|
202
|
+
elsif i == ORD_LT
|
203
|
+
"<"
|
204
|
+
elsif i == ORD_GT
|
205
|
+
">"
|
206
|
+
elsif i == ORD_QUOT && attr
|
207
|
+
"""
|
208
|
+
elsif i == ORD_NEWLINE || i == ORD_CARRIAGERETURN
|
209
|
+
i.chr("utf-8")
|
210
|
+
elsif i < 0x20
|
211
|
+
"&#x#{i.to_s(16).rjust(4, "0")};"
|
212
|
+
else
|
213
|
+
i.chr("utf-8")
|
214
|
+
end
|
215
|
+
end.join
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
|
220
|
+
def dump_attrs(node)
|
221
|
+
node.unwrap.attributes.map do |i|
|
222
|
+
# Currently, this is not part of the contract. But in the future
|
223
|
+
# it may be needed to also handle namespaces:
|
224
|
+
#
|
225
|
+
# if i.namespace
|
226
|
+
# %{ #{i.namespace.name}:#{i.name}="#{attr_entities i.value}"}
|
227
|
+
%{ #{i.name}="#{attr_entities i.value}"}
|
228
|
+
end.join
|
229
|
+
end
|
230
|
+
|
231
|
+
def entities(text)
|
232
|
+
self.class.entities(text)
|
233
|
+
end
|
234
|
+
|
235
|
+
def attr_entities(text)
|
236
|
+
self.class.entities(text, true)
|
237
|
+
end
|
238
|
+
|
239
|
+
def line_break
|
240
|
+
@out += "\n"
|
241
|
+
@out += " " * (@indent * @depth) if @indent
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "plurimath/xml_engine"
|
4
|
+
require "ox"
|
5
|
+
Ox.default_options = { encoding: "UTF-8" }
|
6
|
+
|
7
|
+
module Plurimath
|
8
|
+
module XMLEngine
|
9
|
+
class Ox
|
10
|
+
class << self
|
11
|
+
def new_element(name)
|
12
|
+
::Ox::Element.new(name)
|
13
|
+
end
|
14
|
+
|
15
|
+
def dump(data, **options)
|
16
|
+
::Ox.dump(data, **options)
|
17
|
+
end
|
18
|
+
|
19
|
+
def load(data)
|
20
|
+
::Ox.load(data, strip_namespace: true)
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_xml_comment?(node)
|
24
|
+
node.is_a?(::Ox::Comment)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|