moxml 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/lib/moxml/adapter/base.rb +65 -8
- data/lib/moxml/adapter/headed_ox.rb +2 -1
- data/lib/moxml/adapter/libxml.rb +16 -6
- data/lib/moxml/adapter/nokogiri.rb +13 -7
- data/lib/moxml/adapter/oga.rb +35 -90
- data/lib/moxml/adapter/ox.rb +69 -19
- data/lib/moxml/adapter/rexml.rb +26 -9
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/config.rb +17 -2
- data/lib/moxml/element.rb +12 -8
- data/lib/moxml/node.rb +4 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +40 -21
- data/lib/moxml/xpath/parser.rb +12 -7
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/edge_cases.rb +85 -6
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +124 -0
- data/spec/integration/shared_examples/high_level/document_builder_behavior.rb +8 -6
- data/spec/integration/shared_examples/integration_workflows.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/integration/shared_examples/node_wrappers/node_set_behavior.rb +3 -1
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/builder_spec.rb +16 -1
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/moxml_spec.rb +39 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- data/spec/performance/benchmark_spec.rb +1 -1
- metadata +6 -12
- data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
- data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
- data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
- data/TODO.remaining/4-lenient-entities-mode.md +0 -106
- data/TODO.remaining/5-fixture-integrity.md +0 -65
- data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
- data/TODO.remaining/7-headed-ox-limitations.md +0 -95
- data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
- data/TODO.remaining/9-cleanup-hygiene.md +0 -42
- data/TODO.remaining/README.md +0 -54
|
@@ -32,12 +32,6 @@ RSpec.shared_examples "Moxml Edge Cases" do
|
|
|
32
32
|
|
|
33
33
|
describe "malformed content handling" do
|
|
34
34
|
it "handles CDATA with nested markers" do
|
|
35
|
-
if context.config.adapter_name == :ox
|
|
36
|
-
pending "Ox doesn't escape the end token"
|
|
37
|
-
end
|
|
38
|
-
if context.config.adapter_name == :headed_ox
|
|
39
|
-
skip "HeadedOx limitation: Ox doesn't escape CDATA end markers. See docs/_pages/headed-ox-limitations.adoc"
|
|
40
|
-
end
|
|
41
35
|
cdata_text = "]]>]]>]]>"
|
|
42
36
|
doc = context.create_document
|
|
43
37
|
cdata = doc.create_cdata(cdata_text)
|
|
@@ -173,6 +167,91 @@ RSpec.shared_examples "Moxml Edge Cases" do
|
|
|
173
167
|
end
|
|
174
168
|
end
|
|
175
169
|
|
|
170
|
+
describe "whitespace text node preservation" do
|
|
171
|
+
# Ox/HeadedOx do not generate whitespace-only text nodes in their parser,
|
|
172
|
+
# so these tests only apply to adapters that do (Nokogiri, OGA, REXML, LibXML)
|
|
173
|
+
let(:preserves_ws) { !%i[ox headed_ox].include?(context.config.adapter_name) }
|
|
174
|
+
|
|
175
|
+
it "preserves whitespace-only text nodes between sibling elements" do
|
|
176
|
+
unless preserves_ws
|
|
177
|
+
skip "Ox/HeadedOx parser does not generate whitespace-only text nodes"
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
xml = <<~XML
|
|
181
|
+
<root>
|
|
182
|
+
<a>1</a>
|
|
183
|
+
<b>2</b>
|
|
184
|
+
<c>3</c>
|
|
185
|
+
</root>
|
|
186
|
+
XML
|
|
187
|
+
|
|
188
|
+
doc = context.parse(xml)
|
|
189
|
+
children = doc.root.children
|
|
190
|
+
|
|
191
|
+
# Should have whitespace text nodes between elements
|
|
192
|
+
expect(children.size).to be > 3
|
|
193
|
+
|
|
194
|
+
# Whitespace text nodes should be Text nodes
|
|
195
|
+
ws_nodes = children.select { |c| c.is_a?(Moxml::Text) && c.content.strip.empty? }
|
|
196
|
+
expect(ws_nodes).not_to be_empty
|
|
197
|
+
|
|
198
|
+
# Element children should still be accessible
|
|
199
|
+
elements = children.select { |c| c.is_a?(Moxml::Element) }
|
|
200
|
+
expect(elements.map(&:name)).to eq(%w[a b c])
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
it "preserves inline whitespace text nodes between text and elements" do
|
|
204
|
+
xml = "<p>Figure <sub>A</sub>.1</p>"
|
|
205
|
+
doc = context.parse(xml)
|
|
206
|
+
|
|
207
|
+
children = doc.root.children
|
|
208
|
+
expect(children.size).to eq(3)
|
|
209
|
+
|
|
210
|
+
# First child: "Figure " text node
|
|
211
|
+
expect(children[0]).to be_a(Moxml::Text)
|
|
212
|
+
expect(children[0].content).to eq("Figure ")
|
|
213
|
+
|
|
214
|
+
# Second child: <sub> element
|
|
215
|
+
expect(children[1]).to be_a(Moxml::Element)
|
|
216
|
+
expect(children[1].name).to eq("sub")
|
|
217
|
+
expect(children[1].text).to eq("A")
|
|
218
|
+
|
|
219
|
+
# Third child: ".1" text node
|
|
220
|
+
expect(children[2]).to be_a(Moxml::Text)
|
|
221
|
+
expect(children[2].content).to eq(".1")
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
it "preserves space-only text node as meaningful content" do
|
|
225
|
+
xml = "<p>Hello <b>world</b>!</p>"
|
|
226
|
+
doc = context.parse(xml)
|
|
227
|
+
|
|
228
|
+
children = doc.root.children
|
|
229
|
+
expect(children.size).to eq(3)
|
|
230
|
+
|
|
231
|
+
expect(children[0].content).to eq("Hello ")
|
|
232
|
+
expect(children[1]).to be_a(Moxml::Element)
|
|
233
|
+
expect(children[2].content).to eq("!")
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
it "distinguishes whitespace text nodes from element children" do
|
|
237
|
+
unless preserves_ws
|
|
238
|
+
skip "Ox/HeadedOx parser does not generate whitespace-only text nodes"
|
|
239
|
+
end
|
|
240
|
+
|
|
241
|
+
xml = "<root> <child/> </root>"
|
|
242
|
+
doc = context.parse(xml)
|
|
243
|
+
|
|
244
|
+
children = doc.root.children
|
|
245
|
+
# " " before child, " " after child
|
|
246
|
+
expect(children.size).to eq(3)
|
|
247
|
+
expect(children[0]).to be_a(Moxml::Text)
|
|
248
|
+
expect(children[0].content).to eq(" ")
|
|
249
|
+
expect(children[1]).to be_a(Moxml::Element)
|
|
250
|
+
expect(children[2]).to be_a(Moxml::Text)
|
|
251
|
+
expect(children[2].content).to eq(" ")
|
|
252
|
+
end
|
|
253
|
+
end
|
|
254
|
+
|
|
176
255
|
describe "document structure edge cases" do
|
|
177
256
|
it "handles deeply nested elements" do
|
|
178
257
|
doc = context.create_document
|
|
@@ -0,0 +1,124 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.shared_examples "Entity Reference Whitespace Preservation" do
|
|
4
|
+
let(:context) { Moxml.new }
|
|
5
|
+
|
|
6
|
+
describe "whitespace preservation around entities on parse round-trip" do
|
|
7
|
+
it "preserves spaces around entity references" do
|
|
8
|
+
doc = context.parse("<p>A © B — C</p>")
|
|
9
|
+
xml = doc.root.to_xml
|
|
10
|
+
|
|
11
|
+
expect(xml).to include("A ").and include(" B ").and include(" C")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "preserves entity references in serialized output" do
|
|
15
|
+
doc = context.parse("<p>Copyright © 2024 — All rights reserved</p>")
|
|
16
|
+
xml = doc.root.to_xml
|
|
17
|
+
|
|
18
|
+
expect(xml).to include("©")
|
|
19
|
+
expect(xml).to include("—")
|
|
20
|
+
expect(xml).to include("Copyright © 2024 — All rights reserved")
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "preserves whitespace with single entity reference" do
|
|
24
|
+
doc = context.parse("<p>A © B</p>")
|
|
25
|
+
|
|
26
|
+
expect(doc.root.inner_text).to include("A ").and include(" B")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "preserves whitespace with entity at start of text" do
|
|
30
|
+
doc = context.parse("<p>© start</p>")
|
|
31
|
+
|
|
32
|
+
expect(doc.root.inner_text).to include("©")
|
|
33
|
+
expect(doc.root.inner_text).to include(" start")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "preserves whitespace with entity at end of text" do
|
|
37
|
+
doc = context.parse("<p>end ©</p>")
|
|
38
|
+
|
|
39
|
+
expect(doc.root.inner_text).to include("end ")
|
|
40
|
+
expect(doc.root.inner_text).to include("©")
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe "Builder entity reference whitespace" do
|
|
45
|
+
# REXML and LibXML store entity references outside the native DOM tree,
|
|
46
|
+
# so they cannot maintain positional ordering relative to text nodes.
|
|
47
|
+
before do
|
|
48
|
+
adapter_name = context.config.adapter.name
|
|
49
|
+
if adapter_name.include?("Rexml") || adapter_name.include?("Libxml")
|
|
50
|
+
skip "#{adapter_name} does not support inline entity reference nodes via Builder"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "preserves spaces around entity references" do
|
|
55
|
+
doc = Moxml::Builder.new(context).build do
|
|
56
|
+
element "p" do
|
|
57
|
+
text "Copyright "
|
|
58
|
+
entity_reference "copy"
|
|
59
|
+
text " 2024 "
|
|
60
|
+
entity_reference "mdash"
|
|
61
|
+
text " All rights reserved"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
xml = doc.root.to_xml
|
|
66
|
+
expect(xml).to include("Copyright © 2024 — All rights reserved")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "preserves whitespace-only text nodes adjacent to entity references" do
|
|
70
|
+
doc = Moxml::Builder.new(context).build do
|
|
71
|
+
element "p" do
|
|
72
|
+
entity_reference "copy"
|
|
73
|
+
text " "
|
|
74
|
+
entity_reference "mdash"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
children = doc.root.children
|
|
79
|
+
types = children.map(&:class)
|
|
80
|
+
|
|
81
|
+
expect(types).to eq([
|
|
82
|
+
Moxml::EntityReference,
|
|
83
|
+
Moxml::Text,
|
|
84
|
+
Moxml::EntityReference,
|
|
85
|
+
])
|
|
86
|
+
expect(children[1].content).to eq(" ")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it "preserves multiple spaces between entity references" do
|
|
90
|
+
doc = Moxml::Builder.new(context).build do
|
|
91
|
+
element "p" do
|
|
92
|
+
text "A"
|
|
93
|
+
entity_reference "amp"
|
|
94
|
+
text " "
|
|
95
|
+
entity_reference "lt"
|
|
96
|
+
text "B"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
children = doc.root.children
|
|
101
|
+
expect(children.length).to eq(5)
|
|
102
|
+
expect(children[2].content).to eq(" ")
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
describe "structural whitespace filtering" do
|
|
107
|
+
it "preserves whitespace text nodes between elements" do
|
|
108
|
+
xml = <<~XML
|
|
109
|
+
<root>
|
|
110
|
+
<child1/>
|
|
111
|
+
<child2/>
|
|
112
|
+
</root>
|
|
113
|
+
XML
|
|
114
|
+
|
|
115
|
+
doc = context.parse(xml)
|
|
116
|
+
children = doc.root.children
|
|
117
|
+
|
|
118
|
+
# Whitespace text nodes between elements are preserved
|
|
119
|
+
elements = children.select { |c| c.is_a?(Moxml::Element) }
|
|
120
|
+
expect(elements.length).to eq(2)
|
|
121
|
+
expect(elements.map(&:name)).to eq(%w[child1 child2])
|
|
122
|
+
end
|
|
123
|
+
end
|
|
124
|
+
end
|
|
@@ -32,12 +32,14 @@ RSpec.shared_examples "Moxml::DocumentBuilder" do
|
|
|
32
32
|
|
|
33
33
|
expect(doc.root.namespaces.count).to eq(1)
|
|
34
34
|
expect(doc.root.namespaces.first.uri).to eq("http://example.org")
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
expect(
|
|
38
|
-
expect(
|
|
39
|
-
expect(
|
|
40
|
-
expect(
|
|
35
|
+
# Whitespace text nodes are preserved between elements
|
|
36
|
+
non_ws_children = doc.root.children.reject { |c| c.is_a?(Moxml::Text) && c.content.strip.empty? }
|
|
37
|
+
expect(non_ws_children[0]).to be_a(Moxml::Comment)
|
|
38
|
+
expect(non_ws_children[1]).to be_a(Moxml::Element)
|
|
39
|
+
expect(non_ws_children[1].name).to eq("child")
|
|
40
|
+
expect(non_ws_children[1]["id"]).to eq("1")
|
|
41
|
+
expect(non_ws_children[1].children.find { |c| c.is_a?(Moxml::Cdata) }).to be_a(Moxml::Cdata)
|
|
42
|
+
expect(non_ws_children[2]).to be_a(Moxml::ProcessingInstruction)
|
|
41
43
|
end
|
|
42
44
|
end
|
|
43
45
|
end
|
|
@@ -113,7 +113,7 @@ RSpec.shared_examples "Moxml Integration" do
|
|
|
113
113
|
expect(attr).to eq("value")
|
|
114
114
|
|
|
115
115
|
# Test namespace override
|
|
116
|
-
deeper = a_element.children.
|
|
116
|
+
deeper = a_element.children.find { |c| c.is_a?(Moxml::Element) }
|
|
117
117
|
expect(deeper.namespace.uri).to eq("http://other.org")
|
|
118
118
|
end
|
|
119
119
|
end
|
|
@@ -36,13 +36,6 @@ RSpec.shared_examples "Moxml::Cdata" do
|
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
it "escapes CDATA end marker" do
|
|
39
|
-
# pending for Ox: https://github.com/ohler55/ox/issues/377
|
|
40
|
-
if context.config.adapter_name == :ox
|
|
41
|
-
pending "Ox doesn't escape the end token"
|
|
42
|
-
end
|
|
43
|
-
if context.config.adapter_name == :headed_ox
|
|
44
|
-
skip "HeadedOx limitation: Ox doesn't escape CDATA end markers. See docs/_pages/headed-ox-limitations.adoc"
|
|
45
|
-
end
|
|
46
39
|
cdata.content = "content]]>more"
|
|
47
40
|
expect(cdata.to_xml).to eq("<![CDATA[content]]]]><![CDATA[>more]]>")
|
|
48
41
|
end
|
|
@@ -88,6 +88,141 @@ RSpec.shared_examples "Moxml::Namespace" do
|
|
|
88
88
|
end
|
|
89
89
|
end
|
|
90
90
|
|
|
91
|
+
describe "namespace_definitions" do
|
|
92
|
+
it "returns only namespace declarations, not regular attributes" do
|
|
93
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
94
|
+
element["id"] = "test-id"
|
|
95
|
+
element["class"] = "foo"
|
|
96
|
+
|
|
97
|
+
ns_defs = element.namespaces
|
|
98
|
+
prefixes = ns_defs.map(&:prefix)
|
|
99
|
+
|
|
100
|
+
expect(prefixes).to include("xs")
|
|
101
|
+
expect(prefixes).not_to include("id", "class")
|
|
102
|
+
expect(ns_defs.size).to eq(1)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "returns multiple prefixed namespace declarations" do
|
|
106
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
107
|
+
element.add_namespace("xsi", "http://www.w3.org/2001/XMLSchema-instance")
|
|
108
|
+
element.add_namespace("dc", "http://purl.org/dc/elements/1.1/")
|
|
109
|
+
element["id"] = "test-id"
|
|
110
|
+
|
|
111
|
+
ns_defs = element.namespaces
|
|
112
|
+
prefixes = ns_defs.map(&:prefix)
|
|
113
|
+
|
|
114
|
+
expect(prefixes).to contain_exactly("xs", "xsi", "dc")
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it "returns both default and prefixed namespace declarations" do
|
|
118
|
+
element.add_namespace(nil, "http://example.org/default")
|
|
119
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
120
|
+
element["attr"] = "value"
|
|
121
|
+
|
|
122
|
+
ns_defs = element.namespaces
|
|
123
|
+
prefixes = ns_defs.map(&:prefix)
|
|
124
|
+
|
|
125
|
+
expect(ns_defs.size).to eq(2)
|
|
126
|
+
expect(prefixes).to include("xs")
|
|
127
|
+
expect(ns_defs.find { |ns| ns.prefix.nil? }).not_to be_nil
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "does not include namespaces from ancestor elements" do
|
|
131
|
+
root = doc.create_element("root")
|
|
132
|
+
root.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
133
|
+
child = doc.create_element("child")
|
|
134
|
+
child.add_namespace("dc", "http://purl.org/dc/elements/1.1/")
|
|
135
|
+
root.add_child(child)
|
|
136
|
+
|
|
137
|
+
ns_defs = child.namespaces
|
|
138
|
+
prefixes = ns_defs.map(&:prefix)
|
|
139
|
+
|
|
140
|
+
expect(prefixes).to contain_exactly("dc")
|
|
141
|
+
expect(prefixes).not_to include("xs")
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
describe "in_scope_namespaces" do
|
|
146
|
+
it "returns namespaces declared on the element itself" do
|
|
147
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
148
|
+
element.add_namespace("xsi", "http://www.w3.org/2001/XMLSchema-instance")
|
|
149
|
+
|
|
150
|
+
in_scope = element.in_scope_namespaces
|
|
151
|
+
prefixes = in_scope.map(&:prefix)
|
|
152
|
+
uris = in_scope.map(&:uri)
|
|
153
|
+
|
|
154
|
+
expect(prefixes).to include("xs", "xsi")
|
|
155
|
+
expect(uris).to include(
|
|
156
|
+
"http://www.w3.org/2001/XMLSchema",
|
|
157
|
+
"http://www.w3.org/2001/XMLSchema-instance",
|
|
158
|
+
)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
it "inherits namespaces from ancestor elements" do
|
|
162
|
+
root = doc.create_element("root")
|
|
163
|
+
root.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
164
|
+
child = doc.create_element("child")
|
|
165
|
+
root.add_child(child)
|
|
166
|
+
|
|
167
|
+
in_scope = child.in_scope_namespaces
|
|
168
|
+
prefixes = in_scope.map(&:prefix)
|
|
169
|
+
|
|
170
|
+
expect(prefixes).to include("xs")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
it "collects namespaces from multiple ancestor levels" do
|
|
174
|
+
root = doc.create_element("root")
|
|
175
|
+
root.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
176
|
+
middle = doc.create_element("middle")
|
|
177
|
+
middle.add_namespace("dc", "http://purl.org/dc/elements/1.1/")
|
|
178
|
+
root.add_child(middle)
|
|
179
|
+
leaf = doc.create_element("leaf")
|
|
180
|
+
middle.add_child(leaf)
|
|
181
|
+
|
|
182
|
+
in_scope = leaf.in_scope_namespaces
|
|
183
|
+
prefixes = in_scope.map(&:prefix)
|
|
184
|
+
|
|
185
|
+
expect(prefixes).to include("xs", "dc")
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
it "closest ancestor wins for duplicate prefixes" do
|
|
189
|
+
root = doc.create_element("root")
|
|
190
|
+
root.add_namespace("ns", "http://example.org/old")
|
|
191
|
+
child = doc.create_element("child")
|
|
192
|
+
child.add_namespace("ns", "http://example.org/new")
|
|
193
|
+
root.add_child(child)
|
|
194
|
+
|
|
195
|
+
in_scope = child.in_scope_namespaces
|
|
196
|
+
ns_match = in_scope.find { |ns| ns.prefix == "ns" }
|
|
197
|
+
|
|
198
|
+
expect(ns_match.uri).to eq("http://example.org/new")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
it "includes default namespace" do
|
|
202
|
+
root = doc.create_element("root")
|
|
203
|
+
root.add_namespace(nil, "http://example.org/default")
|
|
204
|
+
child = doc.create_element("child")
|
|
205
|
+
root.add_child(child)
|
|
206
|
+
|
|
207
|
+
in_scope = child.in_scope_namespaces
|
|
208
|
+
default_ns = in_scope.find { |ns| ns.prefix.nil? }
|
|
209
|
+
|
|
210
|
+
expect(default_ns).not_to be_nil
|
|
211
|
+
expect(default_ns.uri).to eq("http://example.org/default")
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it "returns empty array for element with no namespaces" do
|
|
215
|
+
lonely = doc.create_element("lonely")
|
|
216
|
+
expect(lonely.in_scope_namespaces).to eq([])
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
it "returns empty array for document root with no namespace declarations" do
|
|
220
|
+
root = doc.create_element("root")
|
|
221
|
+
doc.add_child(root)
|
|
222
|
+
expect(root.in_scope_namespaces).to eq([])
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
91
226
|
describe "inheritance" do
|
|
92
227
|
it "does not inherit parent namespaces" do
|
|
93
228
|
# https://stackoverflow.com/a/67347081
|
|
@@ -109,9 +109,6 @@ RSpec.shared_examples "Moxml::Node" do
|
|
|
109
109
|
if context.config.adapter_name == :ox
|
|
110
110
|
pending "Ox doesn't have a native XPath"
|
|
111
111
|
end
|
|
112
|
-
if context.config.adapter_name == :headed_ox
|
|
113
|
-
skip "HeadedOx limitation: Text content access from nested elements needs investigation. See docs/_pages/headed-ox-limitations.adoc"
|
|
114
|
-
end
|
|
115
112
|
|
|
116
113
|
node = doc.at_xpath("//b")
|
|
117
114
|
expect(node.text).to eq("1")
|
|
@@ -41,7 +41,9 @@ RSpec.shared_examples "Moxml::NodeSet" do
|
|
|
41
41
|
end
|
|
42
42
|
|
|
43
43
|
it "compares nodes" do
|
|
44
|
-
|
|
44
|
+
xpath_results = doc.xpath("//child")
|
|
45
|
+
element_children = doc.root.children.select { |c| c.is_a?(Moxml::Element) }
|
|
46
|
+
expect(xpath_results.map(&:native)).to eq(element_children.map(&:native))
|
|
45
47
|
end
|
|
46
48
|
end
|
|
47
49
|
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
# Adapters that use DocumentBuilder for parse (entity restoration during parsing works)
|
|
6
|
+
DBUILD_ADAPTERS = %i[oga rexml].freeze
|
|
7
|
+
|
|
8
|
+
# All adapters with entity reference support
|
|
9
|
+
ENTITY_ADAPTERS = %i[nokogiri oga ox rexml].freeze
|
|
10
|
+
|
|
11
|
+
RSpec.shared_examples "standard entity preservation" do |adapter_name|
|
|
12
|
+
context "with #{adapter_name}", adapter: adapter_name do
|
|
13
|
+
it "preserves amp entity through round-trip" do
|
|
14
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
15
|
+
doc = ctx.parse("<p>a & b</p>")
|
|
16
|
+
expect(doc.to_xml).to include("&")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it "preserves lt entity through round-trip" do
|
|
20
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
21
|
+
doc = ctx.parse("<p>a < b</p>")
|
|
22
|
+
expect(doc.to_xml).to include("<")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "preserves gt entity through round-trip" do
|
|
26
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
27
|
+
doc = ctx.parse("<p>a > b</p>")
|
|
28
|
+
expect(doc.to_xml).to include(">")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "produces valid XML through round-trip" do
|
|
32
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
33
|
+
doc = ctx.parse("<p>& text &</p>")
|
|
34
|
+
output = doc.to_xml
|
|
35
|
+
expect { ctx.parse(output) }.not_to raise_error
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
RSpec.shared_examples "non-standard entity restoration via DocumentBuilder" do |adapter_name|
|
|
41
|
+
context "with #{adapter_name}", adapter: adapter_name do
|
|
42
|
+
let(:ctx) { Moxml.new(adapter_name) { |c| c.restore_entities = true } }
|
|
43
|
+
|
|
44
|
+
it "restores nbsp (U+00A0) from bundled W3C entity set" do
|
|
45
|
+
doc = ctx.parse("<p>\u00A0</p>")
|
|
46
|
+
expect(doc.to_xml).to include(" ")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "restores copy (U+00A9) from bundled W3C entity set" do
|
|
50
|
+
doc = ctx.parse("<p>\u00A9</p>")
|
|
51
|
+
expect(doc.to_xml).to include("©")
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "restores mdash (U+2014) from bundled W3C entity set" do
|
|
55
|
+
doc = ctx.parse("<p>\u2014</p>")
|
|
56
|
+
expect(doc.to_xml).to include("—")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "restores multiple entities in a single text node" do
|
|
60
|
+
doc = ctx.parse("<p>before\u00A0middle\u00A9end</p>")
|
|
61
|
+
output = doc.to_xml
|
|
62
|
+
expect(output).to include(" ")
|
|
63
|
+
expect(output).to include("©")
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
RSpec.shared_examples "restore_entities disabled" do |adapter_name|
|
|
69
|
+
context "with #{adapter_name}", adapter: adapter_name do
|
|
70
|
+
let(:ctx) { Moxml.new(adapter_name) { |c| c.restore_entities = false } }
|
|
71
|
+
|
|
72
|
+
it "does not create EntityReference nodes for standard entities" do
|
|
73
|
+
doc = ctx.parse("<p>a & b</p>")
|
|
74
|
+
entity_children = doc.root.children.grep(Moxml::EntityReference)
|
|
75
|
+
expect(entity_children).to be_empty
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it "does not create EntityReference nodes for non-standard characters" do
|
|
79
|
+
doc = ctx.parse("<p>\u00A0</p>")
|
|
80
|
+
entity_children = doc.root.children.grep(Moxml::EntityReference)
|
|
81
|
+
expect(entity_children).to be_empty
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
RSpec.describe "Entity restoration" do
|
|
87
|
+
ENTITY_ADAPTERS.each do |adapter_name|
|
|
88
|
+
it_behaves_like "standard entity preservation", adapter_name
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
DBUILD_ADAPTERS.each do |adapter_name|
|
|
92
|
+
it_behaves_like "non-standard entity restoration via DocumentBuilder",
|
|
93
|
+
adapter_name
|
|
94
|
+
|
|
95
|
+
it_behaves_like "restore_entities disabled", adapter_name
|
|
96
|
+
end
|
|
97
|
+
end
|
data/spec/moxml/builder_spec.rb
CHANGED
|
@@ -266,7 +266,22 @@ RSpec.describe Moxml::Builder do
|
|
|
266
266
|
builder.title("Hello") { builder.child }
|
|
267
267
|
end
|
|
268
268
|
end
|
|
269
|
-
end.to raise_error(ArgumentError,
|
|
269
|
+
end.to raise_error(ArgumentError,
|
|
270
|
+
/title: cannot combine text content with a block/)
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
describe "#entity_reference" do
|
|
275
|
+
it "creates entity references via DSL" do
|
|
276
|
+
doc = described_class.new(context).build do
|
|
277
|
+
element "p" do
|
|
278
|
+
entity_reference "nbsp"
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
ref = doc.root.children.first
|
|
282
|
+
expect(ref).to be_a(Moxml::EntityReference)
|
|
283
|
+
expect(ref.name).to eq("nbsp")
|
|
284
|
+
expect(doc.to_xml).to include(" ")
|
|
270
285
|
end
|
|
271
286
|
end
|
|
272
287
|
|