plurimath 0.4.4 → 0.6.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.gitignore +4 -0
- data/.rspec +3 -0
- data/.rspec-opal +11 -0
- data/Gemfile +3 -0
- data/Rakefile +11 -0
- data/lib/plurimath/asciimath/transform.rb +15 -0
- data/lib/plurimath/latex/constants.rb +3 -0
- data/lib/plurimath/latex/parse.rb +20 -11
- data/lib/plurimath/latex/transform.rb +24 -2
- data/lib/plurimath/math/core.rb +88 -0
- data/lib/plurimath/math/formula.rb +68 -24
- data/lib/plurimath/math/function/base.rb +8 -2
- data/lib/plurimath/math/function/binary_function.rb +36 -4
- data/lib/plurimath/math/function/color.rb +14 -0
- data/lib/plurimath/math/function/fenced.rb +27 -0
- data/lib/plurimath/math/function/floor.rb +1 -1
- data/lib/plurimath/math/function/font_style.rb +46 -0
- data/lib/plurimath/math/function/frac.rb +6 -0
- data/lib/plurimath/math/function/int.rb +7 -0
- data/lib/plurimath/math/function/left.rb +19 -1
- data/lib/plurimath/math/function/lim.rb +6 -0
- data/lib/plurimath/math/function/limits.rb +7 -0
- data/lib/plurimath/math/function/log.rb +6 -0
- data/lib/plurimath/math/function/menclose.rb +6 -0
- data/lib/plurimath/math/function/mod.rb +6 -0
- data/lib/plurimath/math/function/msgroup.rb +28 -0
- data/lib/plurimath/math/function/multiscript.rb +7 -0
- data/lib/plurimath/math/function/nary.rb +94 -0
- data/lib/plurimath/math/function/oint.rb +6 -0
- data/lib/plurimath/math/function/over.rb +6 -0
- data/lib/plurimath/math/function/overset.rb +6 -0
- data/lib/plurimath/math/function/power.rb +8 -2
- data/lib/plurimath/math/function/power_base.rb +10 -31
- data/lib/plurimath/math/function/prod.rb +19 -18
- data/lib/plurimath/math/function/right.rb +19 -1
- data/lib/plurimath/math/function/root.rb +6 -0
- data/lib/plurimath/math/function/rule.rb +7 -0
- data/lib/plurimath/math/function/semantics.rb +6 -0
- data/lib/plurimath/math/function/stackrel.rb +6 -0
- data/lib/plurimath/math/function/substack.rb +6 -0
- data/lib/plurimath/math/function/sum.rb +26 -25
- data/lib/plurimath/math/function/table.rb +52 -24
- data/lib/plurimath/math/function/td.rb +28 -0
- data/lib/plurimath/math/function/ternary_function.rb +44 -4
- data/lib/plurimath/math/function/text.rb +25 -3
- data/lib/plurimath/math/function/tr.rb +28 -0
- data/lib/plurimath/math/function/unary_function.rb +43 -3
- data/lib/plurimath/math/function/underover.rb +7 -55
- data/lib/plurimath/math/function/underset.rb +6 -0
- data/lib/plurimath/math/function/vec.rb +40 -0
- data/lib/plurimath/math/function.rb +7 -5
- data/lib/plurimath/math/number.rb +9 -5
- data/lib/plurimath/math/symbol.rb +13 -9
- data/lib/plurimath/math.rb +1 -3
- data/lib/plurimath/mathml/parser.rb +4 -4
- data/lib/plurimath/mathml/transform.rb +3 -4
- data/lib/plurimath/omml/parser.rb +19 -3
- data/lib/plurimath/omml/transform.rb +19 -14
- data/lib/plurimath/setup/oga.rb +5 -0
- data/lib/plurimath/setup/opal.rb.erb +8 -0
- data/lib/plurimath/setup/ox.rb +5 -0
- data/lib/plurimath/utility.rb +60 -34
- data/lib/plurimath/version.rb +1 -1
- data/lib/plurimath/xml_engine/oga.rb +246 -0
- data/lib/plurimath/xml_engine/ox.rb +29 -0
- data/lib/plurimath/xml_engine.rb +6 -0
- data/lib/plurimath.rb +12 -2
- metadata +11 -2
data/lib/plurimath/math.rb
CHANGED
@@ -19,9 +19,7 @@ require_relative "latex/parser"
|
|
19
19
|
require_relative "html/parser"
|
20
20
|
require_relative "omml/parser"
|
21
21
|
require_relative "utility"
|
22
|
-
require "ox"
|
23
22
|
require "yaml"
|
24
|
-
Ox.default_options = { encoding: "UTF-8" }
|
25
23
|
|
26
24
|
module Plurimath
|
27
25
|
module Math
|
@@ -72,7 +70,7 @@ module Plurimath
|
|
72
70
|
|
73
71
|
def type_error!
|
74
72
|
raise InvalidTypeError.new(
|
75
|
-
"`type` must be one of: `#{VALID_TYPES.keys.join('`, `')}`"
|
73
|
+
"`type` must be one of: `#{VALID_TYPES.keys.join('`, `')}`",
|
76
74
|
)
|
77
75
|
end
|
78
76
|
|
@@ -23,8 +23,8 @@ module Plurimath
|
|
23
23
|
end
|
24
24
|
|
25
25
|
def parse
|
26
|
-
ox_nodes =
|
27
|
-
display_style = ox_nodes&.locate("
|
26
|
+
ox_nodes = Plurimath.xml_engine.load(text)
|
27
|
+
display_style = ox_nodes&.locate("mstyle/@displaystyle")&.first
|
28
28
|
nodes = parse_nodes(ox_nodes.nodes)
|
29
29
|
Math::Formula.new(
|
30
30
|
Transform.new.apply(nodes).flatten.compact,
|
@@ -34,7 +34,7 @@ module Plurimath
|
|
34
34
|
|
35
35
|
def parse_nodes(nodes)
|
36
36
|
nodes.map do |node|
|
37
|
-
next if
|
37
|
+
next if Plurimath.xml_engine.is_xml_comment?(node)
|
38
38
|
|
39
39
|
if node.is_a?(String)
|
40
40
|
node
|
@@ -47,7 +47,7 @@ module Plurimath
|
|
47
47
|
end
|
48
48
|
|
49
49
|
def validate_attributes(attributes)
|
50
|
-
attributes&.select! { |key, _| SUPPORTED_ATTRS.include?(key
|
50
|
+
attributes&.select! { |key, _| SUPPORTED_ATTRS.include?(key.to_s) }
|
51
51
|
attributes&.transform_keys(&:to_sym) if attributes&.any?
|
52
52
|
end
|
53
53
|
|
@@ -88,8 +88,7 @@ module Plurimath
|
|
88
88
|
|
89
89
|
rule(merror: sequence(:merror)) do
|
90
90
|
Math::Function::Merror.new(
|
91
|
-
merror
|
92
|
-
merror[1],
|
91
|
+
Utility.filter_values(merror),
|
93
92
|
)
|
94
93
|
end
|
95
94
|
|
@@ -247,7 +246,7 @@ module Plurimath
|
|
247
246
|
symbols = Constants::UNICODE_SYMBOLS.transform_keys(&:to_s)
|
248
247
|
text = entities.encode(mtext.flatten.join, :hexadecimal)
|
249
248
|
symbols.each do |code, string|
|
250
|
-
text.gsub
|
249
|
+
text = text.gsub(code.downcase, "unicode[:#{string}]")
|
251
250
|
end
|
252
251
|
Math::Function::Text.new(text)
|
253
252
|
end
|
@@ -257,7 +256,7 @@ module Plurimath
|
|
257
256
|
symbols = Constants::UNICODE_SYMBOLS.transform_keys(&:to_s)
|
258
257
|
text = entities.encode(ms.first, :hexadecimal)
|
259
258
|
symbols.each do |code, string|
|
260
|
-
text.gsub
|
259
|
+
text = text.gsub(code.downcase, "unicode[:#{string}]")
|
261
260
|
end
|
262
261
|
Math::Function::Text.new(text)
|
263
262
|
end
|
@@ -11,13 +11,29 @@ module Plurimath
|
|
11
11
|
mr
|
12
12
|
r
|
13
13
|
].freeze
|
14
|
+
SUPPORTED_FONTS = {
|
15
|
+
"sans-serif-bi": "sans-serif-bold-italic",
|
16
|
+
"double-struck": "double-struck",
|
17
|
+
"sans-serif-i": "sans-serif-italic",
|
18
|
+
"sans-serif-b": "bold-sans-serif",
|
19
|
+
"sans-serif-p": "sans-serif",
|
20
|
+
"fraktur-p": "fraktur",
|
21
|
+
"fraktur-b": "bold-fraktur",
|
22
|
+
"script-b": "bold-script",
|
23
|
+
"script-p": "script",
|
24
|
+
monospace: "monospace",
|
25
|
+
bi: "bold-italic",
|
26
|
+
p: "normal",
|
27
|
+
i: "italic",
|
28
|
+
b: "bold",
|
29
|
+
}.freeze
|
14
30
|
|
15
31
|
def initialize(text)
|
16
32
|
@text = text
|
17
33
|
end
|
18
34
|
|
19
35
|
def parse
|
20
|
-
nodes =
|
36
|
+
nodes = Plurimath.xml_engine.load(text)
|
21
37
|
@hash = { sequence: parse_nodes(nodes.nodes) }
|
22
38
|
nodes = JSON.parse(@hash.to_json, symbolize_names: true)
|
23
39
|
Math::Formula.new(
|
@@ -55,14 +71,14 @@ module Plurimath
|
|
55
71
|
end
|
56
72
|
|
57
73
|
def organize_table_td(node)
|
58
|
-
node.locate("e
|
74
|
+
node.locate("e/*").each do |child_node|
|
59
75
|
child_node.name = "mtd" if child_node.name == "r"
|
60
76
|
end
|
61
77
|
end
|
62
78
|
|
63
79
|
def organize_fonts(node)
|
64
80
|
attrs_arr = { val: [] }
|
65
|
-
node.locate("rPr
|
81
|
+
node.locate("rPr/*").each do |child|
|
66
82
|
attrs_arr[:val] << child.attributes["val"]
|
67
83
|
end
|
68
84
|
node.attributes.merge! attrs_arr
|
@@ -72,7 +72,7 @@ module Plurimath
|
|
72
72
|
font = flatten_row.shift
|
73
73
|
font.new(
|
74
74
|
Utility.filter_values(flatten_row),
|
75
|
-
Utility::
|
75
|
+
Utility::FONT_STYLES.key(font).to_s,
|
76
76
|
)
|
77
77
|
else
|
78
78
|
Utility.filter_values(flatten_row)
|
@@ -98,11 +98,17 @@ module Plurimath
|
|
98
98
|
open_paren = fenced.shift if fenced&.first&.class_name == "symbol"
|
99
99
|
close_paren = fenced.shift if fenced&.first&.class_name == "symbol"
|
100
100
|
fenced_value = fenced.compact
|
101
|
-
Math::Function::
|
102
|
-
open_paren
|
103
|
-
fenced_value
|
104
|
-
|
105
|
-
|
101
|
+
if fenced_value.length == 1 && fenced_value.first.is_a?(Math::Function::Table)
|
102
|
+
fenced_value.first.open_paren = open_paren&.value
|
103
|
+
fenced_value.first.close_paren = close_paren&.value
|
104
|
+
fenced_value
|
105
|
+
else
|
106
|
+
Math::Function::Fenced.new(
|
107
|
+
open_paren,
|
108
|
+
fenced_value,
|
109
|
+
close_paren,
|
110
|
+
)
|
111
|
+
end
|
106
112
|
end
|
107
113
|
|
108
114
|
rule(dPr: subtree(:dpr)) do
|
@@ -116,7 +122,7 @@ module Plurimath
|
|
116
122
|
font = flatten_mtd.shift
|
117
123
|
font.new(
|
118
124
|
Utility.filter_values(flatten_mtd),
|
119
|
-
Utility::
|
125
|
+
Utility::FONT_STYLES.rassoc(font).first.to_s,
|
120
126
|
)
|
121
127
|
else
|
122
128
|
flatten_mtd
|
@@ -133,7 +139,11 @@ module Plurimath
|
|
133
139
|
|
134
140
|
rule(rPr: subtree(:rpr)) do
|
135
141
|
if rpr.is_a?(Array)
|
136
|
-
Utility::
|
142
|
+
Utility::FONT_STYLES[
|
143
|
+
Omml::Parser::SUPPORTED_FONTS[
|
144
|
+
rpr&.join("-")&.to_sym,
|
145
|
+
]&.to_sym,
|
146
|
+
]
|
137
147
|
end
|
138
148
|
end
|
139
149
|
|
@@ -172,12 +182,7 @@ module Plurimath
|
|
172
182
|
ternary_class.parameter_three = Utility.filter_values(nary[3])
|
173
183
|
ternary_class
|
174
184
|
else
|
175
|
-
|
176
|
-
[
|
177
|
-
Utility.nary_fonts(nary),
|
178
|
-
Utility.filter_values(nary[3]),
|
179
|
-
],
|
180
|
-
)
|
185
|
+
Utility.nary_fonts(nary)
|
181
186
|
end
|
182
187
|
end
|
183
188
|
|
@@ -0,0 +1,8 @@
|
|
1
|
+
require 'plurimath/math/core'
|
2
|
+
require 'plurimath/math/function'
|
3
|
+
<% (
|
4
|
+
Dir[File.dirname(__dir__)+"/math/function/*.rb"] +
|
5
|
+
Dir[File.dirname(__dir__)+"/math/function/**/*.rb"]
|
6
|
+
).each do |f| %>
|
7
|
+
require 'plurimath/<%= f.split("lib/plurimath").last.gsub(/.rb$/,'') %>'
|
8
|
+
<% end %>
|
data/lib/plurimath/utility.rb
CHANGED
@@ -4,24 +4,30 @@ module Plurimath
|
|
4
4
|
class Utility
|
5
5
|
FONT_STYLES = {
|
6
6
|
"double-struck": Math::Function::FontStyle::DoubleStruck,
|
7
|
+
"sans-serif-bold-italic": Math::Function::FontStyle::SansSerifBoldItalic,
|
8
|
+
"sans-serif-italic": Math::Function::FontStyle::SansSerifItalic,
|
9
|
+
"bold-sans-serif": Math::Function::FontStyle::BoldSansSerif,
|
7
10
|
"sans-serif": Math::Function::FontStyle::SansSerif,
|
11
|
+
"bold-fraktur": Math::Function::FontStyle::BoldFraktur,
|
12
|
+
"bold-italic": Math::Function::FontStyle::BoldItalic,
|
13
|
+
"bold-script": Math::Function::FontStyle::BoldScript,
|
8
14
|
monospace: Math::Function::FontStyle::Monospace,
|
9
|
-
fraktur: Math::Function::FontStyle::Fraktur,
|
10
|
-
script: Math::Function::FontStyle::Script,
|
11
|
-
normal: Math::Function::FontStyle::Normal,
|
12
|
-
bold: Math::Function::FontStyle::Bold,
|
13
15
|
mathfrak: Math::Function::FontStyle::Fraktur,
|
14
16
|
mathcal: Math::Function::FontStyle::Script,
|
17
|
+
fraktur: Math::Function::FontStyle::Fraktur,
|
15
18
|
mathbb: Math::Function::FontStyle::DoubleStruck,
|
16
19
|
mathtt: Math::Function::FontStyle::Monospace,
|
17
20
|
mathsf: Math::Function::FontStyle::SansSerif,
|
18
21
|
mathrm: Math::Function::FontStyle::Normal,
|
19
22
|
textrm: Math::Function::FontStyle::Normal,
|
23
|
+
italic: Math::Function::FontStyle::Italic,
|
20
24
|
mathbf: Math::Function::FontStyle::Bold,
|
21
25
|
textbf: Math::Function::FontStyle::Bold,
|
26
|
+
script: Math::Function::FontStyle::Script,
|
27
|
+
normal: Math::Function::FontStyle::Normal,
|
28
|
+
bold: Math::Function::FontStyle::Bold,
|
22
29
|
bbb: Math::Function::FontStyle::DoubleStruck,
|
23
30
|
cal: Math::Function::FontStyle::Script,
|
24
|
-
bf: Math::Function::FontStyle::Bold,
|
25
31
|
sf: Math::Function::FontStyle::SansSerif,
|
26
32
|
tt: Math::Function::FontStyle::Monospace,
|
27
33
|
fr: Math::Function::FontStyle::Fraktur,
|
@@ -29,6 +35,7 @@ module Plurimath
|
|
29
35
|
cc: Math::Function::FontStyle::Script,
|
30
36
|
ii: Math::Function::FontStyle::Italic,
|
31
37
|
bb: Math::Function::FontStyle::Bold,
|
38
|
+
bf: Math::Function::FontStyle::Bold,
|
32
39
|
}.freeze
|
33
40
|
ALIGNMENT_LETTERS = {
|
34
41
|
c: "center",
|
@@ -72,22 +79,6 @@ module Plurimath
|
|
72
79
|
max
|
73
80
|
min
|
74
81
|
].freeze
|
75
|
-
OMML_FONTS = {
|
76
|
-
"sans-serif-bi": Math::Function::FontStyle::SansSerifBoldItalic,
|
77
|
-
"sans-serif-i": Math::Function::FontStyle::SansSerifItalic,
|
78
|
-
"sans-serif-b": Math::Function::FontStyle::BoldSansSerif,
|
79
|
-
"double-struck": Math::Function::FontStyle::DoubleStruck,
|
80
|
-
"sans-serif-p": Math::Function::FontStyle::SansSerif,
|
81
|
-
"fraktur-p": Math::Function::FontStyle::Fraktur,
|
82
|
-
"fraktur-b": Math::Function::FontStyle::BoldFraktur,
|
83
|
-
"script-b": Math::Function::FontStyle::BoldScript,
|
84
|
-
"script-p": Math::Function::FontStyle::Script,
|
85
|
-
monospace: Math::Function::FontStyle::Monospace,
|
86
|
-
bi: Math::Function::FontStyle::BoldItalic,
|
87
|
-
p: Math::Function::FontStyle::Normal,
|
88
|
-
i: Math::Function::FontStyle::Italic,
|
89
|
-
b: Math::Function::FontStyle::Bold,
|
90
|
-
}.freeze
|
91
82
|
PARENTHESIS = {
|
92
83
|
"〈": "〉",
|
93
84
|
"⌊": "⌋",
|
@@ -100,6 +91,12 @@ module Plurimath
|
|
100
91
|
"{": "}",
|
101
92
|
"[": "]",
|
102
93
|
}.freeze
|
94
|
+
TEXT_CLASSES = %w[
|
95
|
+
unicode
|
96
|
+
symbol
|
97
|
+
number
|
98
|
+
text
|
99
|
+
].freeze
|
103
100
|
|
104
101
|
class << self
|
105
102
|
def organize_table(array, column_align: nil, options: nil)
|
@@ -196,7 +193,7 @@ module Plurimath
|
|
196
193
|
def ox_element(node, attributes: [], namespace: "")
|
197
194
|
namespace = "#{namespace}:" unless namespace.empty?
|
198
195
|
|
199
|
-
element =
|
196
|
+
element = Plurimath.xml_engine.new_element("#{namespace}#{node}")
|
200
197
|
attributes&.each do |attr_key, attr_value|
|
201
198
|
element[attr_key] = attr_value
|
202
199
|
end
|
@@ -258,12 +255,14 @@ module Plurimath
|
|
258
255
|
|
259
256
|
def nary_fonts(nary)
|
260
257
|
narypr = nary.first.flatten.compact
|
261
|
-
subsup = narypr.any?("undOvr") ? "
|
258
|
+
subsup = narypr.any?("undOvr") ? "undOvr" : "subSup"
|
262
259
|
unicode = narypr.any?(Hash) ? narypr.first[:chr] : "∫"
|
263
|
-
|
260
|
+
Math::Function::Nary.new(
|
264
261
|
Math::Symbol.new(string_to_html_entity(unicode)),
|
265
|
-
nary[1],
|
266
|
-
nary[2],
|
262
|
+
filter_values(nary[1]),
|
263
|
+
filter_values(nary[2]),
|
264
|
+
filter_values(nary[3]),
|
265
|
+
{ type: subsup }
|
267
266
|
)
|
268
267
|
end
|
269
268
|
|
@@ -292,7 +291,8 @@ module Plurimath
|
|
292
291
|
end
|
293
292
|
|
294
293
|
def td_value(td_object)
|
295
|
-
|
294
|
+
str_classes = [String, Parslet::Slice]
|
295
|
+
if str_classes.include?(td_object.class) && td_object.to_s.empty?
|
296
296
|
return Math::Function::Text.new(nil)
|
297
297
|
end
|
298
298
|
|
@@ -461,7 +461,7 @@ module Plurimath
|
|
461
461
|
end
|
462
462
|
|
463
463
|
def left_right_objects(paren, function)
|
464
|
-
paren = if paren.to_s.match?(
|
464
|
+
paren = if paren.to_s.match?(/\\\{|\\\}/)
|
465
465
|
paren.to_s.gsub(/\\/, "")
|
466
466
|
else
|
467
467
|
Latex::Constants::LEFT_RIGHT_PARENTHESIS[paren.to_sym]
|
@@ -478,12 +478,7 @@ module Plurimath
|
|
478
478
|
def mrow_left_right(mrow = [])
|
479
479
|
object = mrow.first
|
480
480
|
!(
|
481
|
-
(
|
482
|
-
(
|
483
|
-
object.is_a?(Math::Function::TernaryFunction) && object.any_value_exist?
|
484
|
-
) &&
|
485
|
-
(mrow.length <= 2)
|
486
|
-
) ||
|
481
|
+
((object.is_a?(Math::Function::TernaryFunction) && object.any_value_exist?) && (mrow.length <= 2)) ||
|
487
482
|
(object.is_a?(Math::Function::UnaryFunction) && mrow.length == 1)
|
488
483
|
)
|
489
484
|
end
|
@@ -573,6 +568,37 @@ module Plurimath
|
|
573
568
|
end
|
574
569
|
end
|
575
570
|
end
|
571
|
+
|
572
|
+
def validate_math_zone(object)
|
573
|
+
return false unless object
|
574
|
+
|
575
|
+
if object.is_a?(Math::Formula)
|
576
|
+
filter_math_zone_values(object.value).find do |d|
|
577
|
+
!d.is_a?(Math::Function::Text)
|
578
|
+
end
|
579
|
+
else
|
580
|
+
!TEXT_CLASSES.include?(object.class_name)
|
581
|
+
end
|
582
|
+
end
|
583
|
+
|
584
|
+
def filter_math_zone_values(value)
|
585
|
+
return [] if value&.empty?
|
586
|
+
|
587
|
+
new_arr = []
|
588
|
+
temp_array = []
|
589
|
+
skip_index = nil
|
590
|
+
value.each_with_index do |obj, index|
|
591
|
+
object = obj.dup
|
592
|
+
next if index == skip_index
|
593
|
+
next temp_array << object.value if TEXT_CLASSES.include?(object.class_name)
|
594
|
+
|
595
|
+
new_arr << Math::Function::Text.new(temp_array.join(" ")) if temp_array.any?
|
596
|
+
temp_array = []
|
597
|
+
new_arr << object
|
598
|
+
end
|
599
|
+
new_arr << Math::Function::Text.new(temp_array.join(" ")) if temp_array.any?
|
600
|
+
new_arr
|
601
|
+
end
|
576
602
|
end
|
577
603
|
end
|
578
604
|
end
|
data/lib/plurimath/version.rb
CHANGED
@@ -0,0 +1,246 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "plurimath/xml_engine"
|
4
|
+
require "corelib/array/pack" if RUBY_ENGINE == "opal"
|
5
|
+
require "oga"
|
6
|
+
|
7
|
+
module Plurimath
|
8
|
+
module XMLEngine
|
9
|
+
class Oga
|
10
|
+
class << self
|
11
|
+
def new_element(name)
|
12
|
+
data = ::Oga::XML::Element.new(name: name)
|
13
|
+
Node.new(data)
|
14
|
+
end
|
15
|
+
|
16
|
+
def dump(data, indent: nil)
|
17
|
+
Dumper.new(data, indent: indent).dump.out
|
18
|
+
end
|
19
|
+
|
20
|
+
def load(data)
|
21
|
+
data = ::Oga::XML::Parser.new(data, html: true).parse
|
22
|
+
if data.xml_declaration
|
23
|
+
Document.new(data)
|
24
|
+
else
|
25
|
+
Document.new(data).nodes.first
|
26
|
+
end
|
27
|
+
end
|
28
|
+
|
29
|
+
def is_xml_comment?(node)
|
30
|
+
node = node.unwrap if node.respond_to? :unwrap
|
31
|
+
node.is_a?(Comment)
|
32
|
+
end
|
33
|
+
end
|
34
|
+
|
35
|
+
# Create API compatible with Ox, per Plurimath usage
|
36
|
+
class Wrapper
|
37
|
+
def initialize(value)
|
38
|
+
@wrapped = value
|
39
|
+
end
|
40
|
+
|
41
|
+
def unwrap
|
42
|
+
@wrapped
|
43
|
+
end
|
44
|
+
|
45
|
+
def ==(other)
|
46
|
+
self.class == other.class &&
|
47
|
+
@wrapped.inspect == other.unwrap.inspect
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
class Node < Wrapper
|
52
|
+
# Ox removes text nodes that are whitespace-only.
|
53
|
+
# There exists a weird edge case on which Plurimath depends:
|
54
|
+
# <mi> <!-- xxx --> π<!--GREEK SMALL LETTER PI--> </mi>
|
55
|
+
# If the last text node of an element that does not contain other
|
56
|
+
# elements is a whitespace, it preserves it. The first one can be
|
57
|
+
# safely removed.
|
58
|
+
def nodes
|
59
|
+
children = @wrapped.children
|
60
|
+
length = children.length
|
61
|
+
preserve_last = true
|
62
|
+
children.map.with_index do |i,idx|
|
63
|
+
if preserve_last && idx == length-1 && i.is_a?(::Oga::XML::Text)
|
64
|
+
i.text
|
65
|
+
elsif i.is_a? ::Oga::XML::Text
|
66
|
+
remove_indentation(i)
|
67
|
+
elsif i.is_a? ::Oga::XML::Comment
|
68
|
+
Node.new(i)
|
69
|
+
else
|
70
|
+
preserve_last = false
|
71
|
+
Node.new(i)
|
72
|
+
end
|
73
|
+
end.compact
|
74
|
+
end
|
75
|
+
|
76
|
+
def [](attr)
|
77
|
+
attr = attr.to_s
|
78
|
+
|
79
|
+
@wrapped.attributes.each do |e|
|
80
|
+
return e.value if [e.name, e.name.split(":").last].include? attr
|
81
|
+
end
|
82
|
+
|
83
|
+
nil
|
84
|
+
end
|
85
|
+
|
86
|
+
def []=(attr, value)
|
87
|
+
# Here we tap into the internal representation due to some likely
|
88
|
+
# bug in Oga
|
89
|
+
attr = ::Oga::XML::Attribute.new(name: attr.to_s)
|
90
|
+
attr.element = @wrapped
|
91
|
+
attr.instance_variable_set(:@value, value.to_s)
|
92
|
+
attr.instance_variable_set(:@decoded, true)
|
93
|
+
@wrapped.attributes << attr
|
94
|
+
end
|
95
|
+
|
96
|
+
def <<(other)
|
97
|
+
other = other.unwrap if other.respond_to? :unwrap
|
98
|
+
|
99
|
+
case other
|
100
|
+
when String
|
101
|
+
text = other
|
102
|
+
# Here we tap into the internal representation due to some likely
|
103
|
+
# bug in Oga
|
104
|
+
other = ::Oga::XML::Text.new
|
105
|
+
other.instance_variable_set(:@from_plurimath, true)
|
106
|
+
other.instance_variable_set(:@text, text)
|
107
|
+
other.instance_variable_set(:@decoded, true)
|
108
|
+
end
|
109
|
+
|
110
|
+
@wrapped.children << other.dup
|
111
|
+
self
|
112
|
+
end
|
113
|
+
|
114
|
+
def attributes
|
115
|
+
@wrapped.attributes.to_h do |e|
|
116
|
+
[e.name.split(":").last, e.value]
|
117
|
+
end
|
118
|
+
end
|
119
|
+
|
120
|
+
def locate(xpath)
|
121
|
+
@wrapped.xpath(xpath).map do |i|
|
122
|
+
case i
|
123
|
+
when ::Oga::XML::Text
|
124
|
+
i.text
|
125
|
+
when ::Oga::XML::Attribute
|
126
|
+
i.value
|
127
|
+
else
|
128
|
+
Node.new(i)
|
129
|
+
end
|
130
|
+
end
|
131
|
+
end
|
132
|
+
|
133
|
+
def name
|
134
|
+
@wrapped.name
|
135
|
+
end
|
136
|
+
|
137
|
+
def name=(new_name)
|
138
|
+
@wrapped.name = new_name
|
139
|
+
end
|
140
|
+
|
141
|
+
private
|
142
|
+
|
143
|
+
def remove_indentation(text)
|
144
|
+
from_us = text.instance_variable_get(:@from_plurimath)
|
145
|
+
!from_us && text.text.strip == "" ? nil : text.text
|
146
|
+
end
|
147
|
+
end
|
148
|
+
|
149
|
+
class Document < Node
|
150
|
+
end
|
151
|
+
|
152
|
+
Comment = ::Oga::XML::Comment
|
153
|
+
|
154
|
+
# Dump the tree just as if we were Ox. This is a limited implementation.
|
155
|
+
class Dumper
|
156
|
+
def initialize(tree, indent: nil)
|
157
|
+
@tree = tree
|
158
|
+
@indent = indent
|
159
|
+
@depth = 0
|
160
|
+
@out = ""
|
161
|
+
end
|
162
|
+
|
163
|
+
def dump(node = @tree)
|
164
|
+
case node
|
165
|
+
when Node
|
166
|
+
nodes = node.nodes
|
167
|
+
if nodes.length == 0
|
168
|
+
line_break
|
169
|
+
@out += "<#{node.unwrap.name}#{dump_attrs(node)}/>"
|
170
|
+
else
|
171
|
+
line_break
|
172
|
+
@out += "<#{node.unwrap.name}#{dump_attrs(node)}>"
|
173
|
+
@depth += 1
|
174
|
+
nodes.each { |i| dump(i) }
|
175
|
+
@depth -= 1
|
176
|
+
line_break unless nodes.last.is_a?(::String)
|
177
|
+
@out += "</#{node.unwrap.name}>"
|
178
|
+
end
|
179
|
+
when ::String
|
180
|
+
@out += entities(node)
|
181
|
+
end
|
182
|
+
|
183
|
+
line_break if node.object_id == @tree.object_id
|
184
|
+
|
185
|
+
self
|
186
|
+
end
|
187
|
+
|
188
|
+
attr_reader :out
|
189
|
+
|
190
|
+
ORD_AMP="&".ord
|
191
|
+
ORD_LT="<".ord
|
192
|
+
ORD_GT=">".ord
|
193
|
+
ORD_APOS="'".ord
|
194
|
+
ORD_QUOT='"'.ord
|
195
|
+
ORD_NEWLINE="\n".ord
|
196
|
+
ORD_CARRIAGERETURN="\r".ord
|
197
|
+
|
198
|
+
def self.entities(text,attr=false)
|
199
|
+
text.to_s.chars.map(&:ord).map do |i|
|
200
|
+
if i == ORD_AMP
|
201
|
+
"&"
|
202
|
+
elsif i == ORD_LT
|
203
|
+
"<"
|
204
|
+
elsif i == ORD_GT
|
205
|
+
">"
|
206
|
+
elsif i == ORD_QUOT && attr
|
207
|
+
"""
|
208
|
+
elsif i == ORD_NEWLINE || i == ORD_CARRIAGERETURN
|
209
|
+
i.chr("utf-8")
|
210
|
+
elsif i < 0x20
|
211
|
+
"&#x#{i.to_s(16).rjust(4, "0")};"
|
212
|
+
else
|
213
|
+
i.chr("utf-8")
|
214
|
+
end
|
215
|
+
end.join
|
216
|
+
end
|
217
|
+
|
218
|
+
private
|
219
|
+
|
220
|
+
def dump_attrs(node)
|
221
|
+
node.unwrap.attributes.map do |i|
|
222
|
+
# Currently, this is not part of the contract. But in the future
|
223
|
+
# it may be needed to also handle namespaces:
|
224
|
+
#
|
225
|
+
# if i.namespace
|
226
|
+
# %{ #{i.namespace.name}:#{i.name}="#{attr_entities i.value}"}
|
227
|
+
%{ #{i.name}="#{attr_entities i.value}"}
|
228
|
+
end.join
|
229
|
+
end
|
230
|
+
|
231
|
+
def entities(text)
|
232
|
+
self.class.entities(text)
|
233
|
+
end
|
234
|
+
|
235
|
+
def attr_entities(text)
|
236
|
+
self.class.entities(text, true)
|
237
|
+
end
|
238
|
+
|
239
|
+
def line_break
|
240
|
+
@out += "\n"
|
241
|
+
@out += " " * (@indent * @depth) if @indent
|
242
|
+
end
|
243
|
+
end
|
244
|
+
end
|
245
|
+
end
|
246
|
+
end
|
@@ -0,0 +1,29 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require "plurimath/xml_engine"
|
4
|
+
require "ox"
|
5
|
+
Ox.default_options = { encoding: "UTF-8" }
|
6
|
+
|
7
|
+
module Plurimath
|
8
|
+
module XMLEngine
|
9
|
+
class Ox
|
10
|
+
class << self
|
11
|
+
def new_element(name)
|
12
|
+
::Ox::Element.new(name)
|
13
|
+
end
|
14
|
+
|
15
|
+
def dump(data, **options)
|
16
|
+
::Ox.dump(data, **options)
|
17
|
+
end
|
18
|
+
|
19
|
+
def load(data)
|
20
|
+
::Ox.load(data, strip_namespace: true)
|
21
|
+
end
|
22
|
+
|
23
|
+
def is_xml_comment?(node)
|
24
|
+
node.is_a?(::Ox::Comment)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
end
|
28
|
+
end
|
29
|
+
end
|