moxml 0.1.16 → 0.1.17
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/lib/moxml/adapter/base.rb +65 -8
- data/lib/moxml/adapter/headed_ox.rb +2 -1
- data/lib/moxml/adapter/libxml.rb +16 -3
- data/lib/moxml/adapter/nokogiri.rb +14 -4
- data/lib/moxml/adapter/oga.rb +26 -87
- data/lib/moxml/adapter/ox.rb +69 -19
- data/lib/moxml/adapter/rexml.rb +24 -3
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/element.rb +12 -8
- data/lib/moxml/node.rb +4 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +40 -21
- data/lib/moxml/xpath/parser.rb +12 -7
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/edge_cases.rb +0 -6
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +122 -0
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/builder_spec.rb +16 -1
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- metadata +6 -12
- data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
- data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
- data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
- data/TODO.remaining/4-lenient-entities-mode.md +0 -106
- data/TODO.remaining/5-fixture-integrity.md +0 -65
- data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
- data/TODO.remaining/7-headed-ox-limitations.md +0 -95
- data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
- data/TODO.remaining/9-cleanup-hygiene.md +0 -42
- data/TODO.remaining/README.md +0 -54
|
@@ -0,0 +1,122 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
RSpec.shared_examples "Entity Reference Whitespace Preservation" do
|
|
4
|
+
let(:context) { Moxml.new }
|
|
5
|
+
|
|
6
|
+
describe "whitespace preservation around entities on parse round-trip" do
|
|
7
|
+
it "preserves spaces around entity references" do
|
|
8
|
+
doc = context.parse("<p>A © B — C</p>")
|
|
9
|
+
xml = doc.root.to_xml
|
|
10
|
+
|
|
11
|
+
expect(xml).to include("A ").and include(" B ").and include(" C")
|
|
12
|
+
end
|
|
13
|
+
|
|
14
|
+
it "preserves entity references in serialized output" do
|
|
15
|
+
doc = context.parse("<p>Copyright © 2024 — All rights reserved</p>")
|
|
16
|
+
xml = doc.root.to_xml
|
|
17
|
+
|
|
18
|
+
expect(xml).to include("©")
|
|
19
|
+
expect(xml).to include("—")
|
|
20
|
+
expect(xml).to include("Copyright © 2024 — All rights reserved")
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "preserves whitespace with single entity reference" do
|
|
24
|
+
doc = context.parse("<p>A © B</p>")
|
|
25
|
+
|
|
26
|
+
expect(doc.root.inner_text).to include("A ").and include(" B")
|
|
27
|
+
end
|
|
28
|
+
|
|
29
|
+
it "preserves whitespace with entity at start of text" do
|
|
30
|
+
doc = context.parse("<p>© start</p>")
|
|
31
|
+
|
|
32
|
+
expect(doc.root.inner_text).to include("©")
|
|
33
|
+
expect(doc.root.inner_text).to include(" start")
|
|
34
|
+
end
|
|
35
|
+
|
|
36
|
+
it "preserves whitespace with entity at end of text" do
|
|
37
|
+
doc = context.parse("<p>end ©</p>")
|
|
38
|
+
|
|
39
|
+
expect(doc.root.inner_text).to include("end ")
|
|
40
|
+
expect(doc.root.inner_text).to include("©")
|
|
41
|
+
end
|
|
42
|
+
end
|
|
43
|
+
|
|
44
|
+
describe "Builder entity reference whitespace" do
|
|
45
|
+
# REXML and LibXML store entity references outside the native DOM tree,
|
|
46
|
+
# so they cannot maintain positional ordering relative to text nodes.
|
|
47
|
+
before do
|
|
48
|
+
adapter_name = context.config.adapter.name
|
|
49
|
+
if adapter_name.include?("Rexml") || adapter_name.include?("Libxml")
|
|
50
|
+
skip "#{adapter_name} does not support inline entity reference nodes via Builder"
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "preserves spaces around entity references" do
|
|
55
|
+
doc = Moxml::Builder.new(context).build do
|
|
56
|
+
element "p" do
|
|
57
|
+
text "Copyright "
|
|
58
|
+
entity_reference "copy"
|
|
59
|
+
text " 2024 "
|
|
60
|
+
entity_reference "mdash"
|
|
61
|
+
text " All rights reserved"
|
|
62
|
+
end
|
|
63
|
+
end
|
|
64
|
+
|
|
65
|
+
xml = doc.root.to_xml
|
|
66
|
+
expect(xml).to include("Copyright © 2024 — All rights reserved")
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
it "preserves whitespace-only text nodes adjacent to entity references" do
|
|
70
|
+
doc = Moxml::Builder.new(context).build do
|
|
71
|
+
element "p" do
|
|
72
|
+
entity_reference "copy"
|
|
73
|
+
text " "
|
|
74
|
+
entity_reference "mdash"
|
|
75
|
+
end
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
children = doc.root.children
|
|
79
|
+
types = children.map(&:class)
|
|
80
|
+
|
|
81
|
+
expect(types).to eq([
|
|
82
|
+
Moxml::EntityReference,
|
|
83
|
+
Moxml::Text,
|
|
84
|
+
Moxml::EntityReference,
|
|
85
|
+
])
|
|
86
|
+
expect(children[1].content).to eq(" ")
|
|
87
|
+
end
|
|
88
|
+
|
|
89
|
+
it "preserves multiple spaces between entity references" do
|
|
90
|
+
doc = Moxml::Builder.new(context).build do
|
|
91
|
+
element "p" do
|
|
92
|
+
text "A"
|
|
93
|
+
entity_reference "amp"
|
|
94
|
+
text " "
|
|
95
|
+
entity_reference "lt"
|
|
96
|
+
text "B"
|
|
97
|
+
end
|
|
98
|
+
end
|
|
99
|
+
|
|
100
|
+
children = doc.root.children
|
|
101
|
+
expect(children.length).to eq(5)
|
|
102
|
+
expect(children[2].content).to eq(" ")
|
|
103
|
+
end
|
|
104
|
+
end
|
|
105
|
+
|
|
106
|
+
describe "structural whitespace filtering" do
|
|
107
|
+
it "still filters whitespace between elements" do
|
|
108
|
+
xml = <<~XML
|
|
109
|
+
<root>
|
|
110
|
+
<child1/>
|
|
111
|
+
<child2/>
|
|
112
|
+
</root>
|
|
113
|
+
XML
|
|
114
|
+
|
|
115
|
+
doc = context.parse(xml)
|
|
116
|
+
children = doc.root.children
|
|
117
|
+
|
|
118
|
+
expect(children.length).to eq(2)
|
|
119
|
+
expect(children.all?(Moxml::Element)).to be true
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
end
|
|
@@ -36,13 +36,6 @@ RSpec.shared_examples "Moxml::Cdata" do
|
|
|
36
36
|
end
|
|
37
37
|
|
|
38
38
|
it "escapes CDATA end marker" do
|
|
39
|
-
# pending for Ox: https://github.com/ohler55/ox/issues/377
|
|
40
|
-
if context.config.adapter_name == :ox
|
|
41
|
-
pending "Ox doesn't escape the end token"
|
|
42
|
-
end
|
|
43
|
-
if context.config.adapter_name == :headed_ox
|
|
44
|
-
skip "HeadedOx limitation: Ox doesn't escape CDATA end markers. See docs/_pages/headed-ox-limitations.adoc"
|
|
45
|
-
end
|
|
46
39
|
cdata.content = "content]]>more"
|
|
47
40
|
expect(cdata.to_xml).to eq("<![CDATA[content]]]]><![CDATA[>more]]>")
|
|
48
41
|
end
|
|
@@ -88,6 +88,141 @@ RSpec.shared_examples "Moxml::Namespace" do
|
|
|
88
88
|
end
|
|
89
89
|
end
|
|
90
90
|
|
|
91
|
+
describe "namespace_definitions" do
|
|
92
|
+
it "returns only namespace declarations, not regular attributes" do
|
|
93
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
94
|
+
element["id"] = "test-id"
|
|
95
|
+
element["class"] = "foo"
|
|
96
|
+
|
|
97
|
+
ns_defs = element.namespaces
|
|
98
|
+
prefixes = ns_defs.map(&:prefix)
|
|
99
|
+
|
|
100
|
+
expect(prefixes).to include("xs")
|
|
101
|
+
expect(prefixes).not_to include("id", "class")
|
|
102
|
+
expect(ns_defs.size).to eq(1)
|
|
103
|
+
end
|
|
104
|
+
|
|
105
|
+
it "returns multiple prefixed namespace declarations" do
|
|
106
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
107
|
+
element.add_namespace("xsi", "http://www.w3.org/2001/XMLSchema-instance")
|
|
108
|
+
element.add_namespace("dc", "http://purl.org/dc/elements/1.1/")
|
|
109
|
+
element["id"] = "test-id"
|
|
110
|
+
|
|
111
|
+
ns_defs = element.namespaces
|
|
112
|
+
prefixes = ns_defs.map(&:prefix)
|
|
113
|
+
|
|
114
|
+
expect(prefixes).to contain_exactly("xs", "xsi", "dc")
|
|
115
|
+
end
|
|
116
|
+
|
|
117
|
+
it "returns both default and prefixed namespace declarations" do
|
|
118
|
+
element.add_namespace(nil, "http://example.org/default")
|
|
119
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
120
|
+
element["attr"] = "value"
|
|
121
|
+
|
|
122
|
+
ns_defs = element.namespaces
|
|
123
|
+
prefixes = ns_defs.map(&:prefix)
|
|
124
|
+
|
|
125
|
+
expect(ns_defs.size).to eq(2)
|
|
126
|
+
expect(prefixes).to include("xs")
|
|
127
|
+
expect(ns_defs.find { |ns| ns.prefix.nil? }).not_to be_nil
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "does not include namespaces from ancestor elements" do
|
|
131
|
+
root = doc.create_element("root")
|
|
132
|
+
root.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
133
|
+
child = doc.create_element("child")
|
|
134
|
+
child.add_namespace("dc", "http://purl.org/dc/elements/1.1/")
|
|
135
|
+
root.add_child(child)
|
|
136
|
+
|
|
137
|
+
ns_defs = child.namespaces
|
|
138
|
+
prefixes = ns_defs.map(&:prefix)
|
|
139
|
+
|
|
140
|
+
expect(prefixes).to contain_exactly("dc")
|
|
141
|
+
expect(prefixes).not_to include("xs")
|
|
142
|
+
end
|
|
143
|
+
end
|
|
144
|
+
|
|
145
|
+
describe "in_scope_namespaces" do
|
|
146
|
+
it "returns namespaces declared on the element itself" do
|
|
147
|
+
element.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
148
|
+
element.add_namespace("xsi", "http://www.w3.org/2001/XMLSchema-instance")
|
|
149
|
+
|
|
150
|
+
in_scope = element.in_scope_namespaces
|
|
151
|
+
prefixes = in_scope.map(&:prefix)
|
|
152
|
+
uris = in_scope.map(&:uri)
|
|
153
|
+
|
|
154
|
+
expect(prefixes).to include("xs", "xsi")
|
|
155
|
+
expect(uris).to include(
|
|
156
|
+
"http://www.w3.org/2001/XMLSchema",
|
|
157
|
+
"http://www.w3.org/2001/XMLSchema-instance",
|
|
158
|
+
)
|
|
159
|
+
end
|
|
160
|
+
|
|
161
|
+
it "inherits namespaces from ancestor elements" do
|
|
162
|
+
root = doc.create_element("root")
|
|
163
|
+
root.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
164
|
+
child = doc.create_element("child")
|
|
165
|
+
root.add_child(child)
|
|
166
|
+
|
|
167
|
+
in_scope = child.in_scope_namespaces
|
|
168
|
+
prefixes = in_scope.map(&:prefix)
|
|
169
|
+
|
|
170
|
+
expect(prefixes).to include("xs")
|
|
171
|
+
end
|
|
172
|
+
|
|
173
|
+
it "collects namespaces from multiple ancestor levels" do
|
|
174
|
+
root = doc.create_element("root")
|
|
175
|
+
root.add_namespace("xs", "http://www.w3.org/2001/XMLSchema")
|
|
176
|
+
middle = doc.create_element("middle")
|
|
177
|
+
middle.add_namespace("dc", "http://purl.org/dc/elements/1.1/")
|
|
178
|
+
root.add_child(middle)
|
|
179
|
+
leaf = doc.create_element("leaf")
|
|
180
|
+
middle.add_child(leaf)
|
|
181
|
+
|
|
182
|
+
in_scope = leaf.in_scope_namespaces
|
|
183
|
+
prefixes = in_scope.map(&:prefix)
|
|
184
|
+
|
|
185
|
+
expect(prefixes).to include("xs", "dc")
|
|
186
|
+
end
|
|
187
|
+
|
|
188
|
+
it "closest ancestor wins for duplicate prefixes" do
|
|
189
|
+
root = doc.create_element("root")
|
|
190
|
+
root.add_namespace("ns", "http://example.org/old")
|
|
191
|
+
child = doc.create_element("child")
|
|
192
|
+
child.add_namespace("ns", "http://example.org/new")
|
|
193
|
+
root.add_child(child)
|
|
194
|
+
|
|
195
|
+
in_scope = child.in_scope_namespaces
|
|
196
|
+
ns_match = in_scope.find { |ns| ns.prefix == "ns" }
|
|
197
|
+
|
|
198
|
+
expect(ns_match.uri).to eq("http://example.org/new")
|
|
199
|
+
end
|
|
200
|
+
|
|
201
|
+
it "includes default namespace" do
|
|
202
|
+
root = doc.create_element("root")
|
|
203
|
+
root.add_namespace(nil, "http://example.org/default")
|
|
204
|
+
child = doc.create_element("child")
|
|
205
|
+
root.add_child(child)
|
|
206
|
+
|
|
207
|
+
in_scope = child.in_scope_namespaces
|
|
208
|
+
default_ns = in_scope.find { |ns| ns.prefix.nil? }
|
|
209
|
+
|
|
210
|
+
expect(default_ns).not_to be_nil
|
|
211
|
+
expect(default_ns.uri).to eq("http://example.org/default")
|
|
212
|
+
end
|
|
213
|
+
|
|
214
|
+
it "returns empty array for element with no namespaces" do
|
|
215
|
+
lonely = doc.create_element("lonely")
|
|
216
|
+
expect(lonely.in_scope_namespaces).to eq([])
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
it "returns empty array for document root with no namespace declarations" do
|
|
220
|
+
root = doc.create_element("root")
|
|
221
|
+
doc.add_child(root)
|
|
222
|
+
expect(root.in_scope_namespaces).to eq([])
|
|
223
|
+
end
|
|
224
|
+
end
|
|
225
|
+
|
|
91
226
|
describe "inheritance" do
|
|
92
227
|
it "does not inherit parent namespaces" do
|
|
93
228
|
# https://stackoverflow.com/a/67347081
|
|
@@ -109,9 +109,6 @@ RSpec.shared_examples "Moxml::Node" do
|
|
|
109
109
|
if context.config.adapter_name == :ox
|
|
110
110
|
pending "Ox doesn't have a native XPath"
|
|
111
111
|
end
|
|
112
|
-
if context.config.adapter_name == :headed_ox
|
|
113
|
-
skip "HeadedOx limitation: Text content access from nested elements needs investigation. See docs/_pages/headed-ox-limitations.adoc"
|
|
114
|
-
end
|
|
115
112
|
|
|
116
113
|
node = doc.at_xpath("//b")
|
|
117
114
|
expect(node.text).to eq("1")
|
|
@@ -0,0 +1,97 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
# Adapters that use DocumentBuilder for parse (entity restoration during parsing works)
|
|
6
|
+
DBUILD_ADAPTERS = %i[oga rexml].freeze
|
|
7
|
+
|
|
8
|
+
# All adapters with entity reference support
|
|
9
|
+
ENTITY_ADAPTERS = %i[nokogiri oga ox rexml].freeze
|
|
10
|
+
|
|
11
|
+
RSpec.shared_examples "standard entity preservation" do |adapter_name|
|
|
12
|
+
context "with #{adapter_name}", adapter: adapter_name do
|
|
13
|
+
it "preserves amp entity through round-trip" do
|
|
14
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
15
|
+
doc = ctx.parse("<p>a & b</p>")
|
|
16
|
+
expect(doc.to_xml).to include("&")
|
|
17
|
+
end
|
|
18
|
+
|
|
19
|
+
it "preserves lt entity through round-trip" do
|
|
20
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
21
|
+
doc = ctx.parse("<p>a < b</p>")
|
|
22
|
+
expect(doc.to_xml).to include("<")
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "preserves gt entity through round-trip" do
|
|
26
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
27
|
+
doc = ctx.parse("<p>a > b</p>")
|
|
28
|
+
expect(doc.to_xml).to include(">")
|
|
29
|
+
end
|
|
30
|
+
|
|
31
|
+
it "produces valid XML through round-trip" do
|
|
32
|
+
ctx = Moxml.new(adapter_name) { |c| c.restore_entities = true }
|
|
33
|
+
doc = ctx.parse("<p>& text &</p>")
|
|
34
|
+
output = doc.to_xml
|
|
35
|
+
expect { ctx.parse(output) }.not_to raise_error
|
|
36
|
+
end
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
RSpec.shared_examples "non-standard entity restoration via DocumentBuilder" do |adapter_name|
|
|
41
|
+
context "with #{adapter_name}", adapter: adapter_name do
|
|
42
|
+
let(:ctx) { Moxml.new(adapter_name) { |c| c.restore_entities = true } }
|
|
43
|
+
|
|
44
|
+
it "restores nbsp (U+00A0) from bundled W3C entity set" do
|
|
45
|
+
doc = ctx.parse("<p>\u00A0</p>")
|
|
46
|
+
expect(doc.to_xml).to include(" ")
|
|
47
|
+
end
|
|
48
|
+
|
|
49
|
+
it "restores copy (U+00A9) from bundled W3C entity set" do
|
|
50
|
+
doc = ctx.parse("<p>\u00A9</p>")
|
|
51
|
+
expect(doc.to_xml).to include("©")
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
it "restores mdash (U+2014) from bundled W3C entity set" do
|
|
55
|
+
doc = ctx.parse("<p>\u2014</p>")
|
|
56
|
+
expect(doc.to_xml).to include("—")
|
|
57
|
+
end
|
|
58
|
+
|
|
59
|
+
it "restores multiple entities in a single text node" do
|
|
60
|
+
doc = ctx.parse("<p>before\u00A0middle\u00A9end</p>")
|
|
61
|
+
output = doc.to_xml
|
|
62
|
+
expect(output).to include(" ")
|
|
63
|
+
expect(output).to include("©")
|
|
64
|
+
end
|
|
65
|
+
end
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
RSpec.shared_examples "restore_entities disabled" do |adapter_name|
|
|
69
|
+
context "with #{adapter_name}", adapter: adapter_name do
|
|
70
|
+
let(:ctx) { Moxml.new(adapter_name) { |c| c.restore_entities = false } }
|
|
71
|
+
|
|
72
|
+
it "does not create EntityReference nodes for standard entities" do
|
|
73
|
+
doc = ctx.parse("<p>a & b</p>")
|
|
74
|
+
entity_children = doc.root.children.grep(Moxml::EntityReference)
|
|
75
|
+
expect(entity_children).to be_empty
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
it "does not create EntityReference nodes for non-standard characters" do
|
|
79
|
+
doc = ctx.parse("<p>\u00A0</p>")
|
|
80
|
+
entity_children = doc.root.children.grep(Moxml::EntityReference)
|
|
81
|
+
expect(entity_children).to be_empty
|
|
82
|
+
end
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
RSpec.describe "Entity restoration" do
|
|
87
|
+
ENTITY_ADAPTERS.each do |adapter_name|
|
|
88
|
+
it_behaves_like "standard entity preservation", adapter_name
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
DBUILD_ADAPTERS.each do |adapter_name|
|
|
92
|
+
it_behaves_like "non-standard entity restoration via DocumentBuilder",
|
|
93
|
+
adapter_name
|
|
94
|
+
|
|
95
|
+
it_behaves_like "restore_entities disabled", adapter_name
|
|
96
|
+
end
|
|
97
|
+
end
|
data/spec/moxml/builder_spec.rb
CHANGED
|
@@ -266,7 +266,22 @@ RSpec.describe Moxml::Builder do
|
|
|
266
266
|
builder.title("Hello") { builder.child }
|
|
267
267
|
end
|
|
268
268
|
end
|
|
269
|
-
end.to raise_error(ArgumentError,
|
|
269
|
+
end.to raise_error(ArgumentError,
|
|
270
|
+
/title: cannot combine text content with a block/)
|
|
271
|
+
end
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
describe "#entity_reference" do
|
|
275
|
+
it "creates entity references via DSL" do
|
|
276
|
+
doc = described_class.new(context).build do
|
|
277
|
+
element "p" do
|
|
278
|
+
entity_reference "nbsp"
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
ref = doc.root.children.first
|
|
282
|
+
expect(ref).to be_a(Moxml::EntityReference)
|
|
283
|
+
expect(ref.name).to eq("nbsp")
|
|
284
|
+
expect(doc.to_xml).to include(" ")
|
|
270
285
|
end
|
|
271
286
|
end
|
|
272
287
|
|
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
RSpec.describe "Entity preservation across adapters" do
|
|
6
|
+
shared_examples "consistent entity handling" do
|
|
7
|
+
describe "text content with entities" do
|
|
8
|
+
it "preserves single entity in inner_text" do
|
|
9
|
+
doc = adapter.parse("<root>© 2024</root>")
|
|
10
|
+
expect(doc.root.inner_text).to eq("© 2024")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "preserves multiple entities in inner_text" do
|
|
14
|
+
doc = adapter.parse("<root>© text — end</root>")
|
|
15
|
+
expect(doc.root.inner_text).to eq("© text — end")
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "preserves entity at start" do
|
|
19
|
+
doc = adapter.parse("<root>© start</root>")
|
|
20
|
+
expect(doc.root.inner_text).to include("©")
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "preserves entity at end" do
|
|
24
|
+
doc = adapter.parse("<root>end ©</root>")
|
|
25
|
+
expect(doc.root.inner_text).to include("©")
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it "does not alter standard entities" do
|
|
29
|
+
doc = adapter.parse("<root>& < ></root>")
|
|
30
|
+
expect(doc.root.inner_text).to eq("& < >")
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
describe "to_xml round-trip" do
|
|
35
|
+
it "preserves entity in serialized output" do
|
|
36
|
+
doc = adapter.parse("<root>© 2024</root>")
|
|
37
|
+
expect(doc.root.to_xml(declaration: false)).to include("©")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it "preserves multiple entities in serialized output" do
|
|
41
|
+
doc = adapter.parse("<root>© text — end</root>")
|
|
42
|
+
xml = doc.root.to_xml(declaration: false)
|
|
43
|
+
expect(xml).to include("©")
|
|
44
|
+
expect(xml).to include("—")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "does not double-escape standard entities" do
|
|
48
|
+
doc = adapter.parse("<root>& test</root>")
|
|
49
|
+
xml = doc.root.to_xml(declaration: false)
|
|
50
|
+
expect(xml).not_to include("&amp;")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
describe "attribute values with entities" do
|
|
55
|
+
it "preserves entity in attribute value" do
|
|
56
|
+
doc = adapter.parse('<root attr="© 2024"/>')
|
|
57
|
+
expect(doc.root["attr"]).to eq("© 2024")
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "preserves entity in attribute via Attribute#value" do
|
|
61
|
+
doc = adapter.parse('<root attr="© 2024"/>')
|
|
62
|
+
attr = doc.root.attributes.first
|
|
63
|
+
expect(attr.value).to eq("© 2024")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "preserves entity in attribute to_xml" do
|
|
67
|
+
doc = adapter.parse('<root attr="© 2024"/>')
|
|
68
|
+
xml = doc.root.to_xml(declaration: false)
|
|
69
|
+
expect(xml).to include('attr="© 2024"')
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
describe "Text node content" do
|
|
74
|
+
it "preserves entity in Text#content" do
|
|
75
|
+
doc = adapter.parse("<root>© text</root>")
|
|
76
|
+
text_nodes = doc.root.children.grep(Moxml::Text)
|
|
77
|
+
combined = text_nodes.map(&:content).join
|
|
78
|
+
expect(combined).to include("©")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
context "with nokogiri adapter" do
|
|
84
|
+
let(:adapter) { Moxml::Adapter::Nokogiri }
|
|
85
|
+
|
|
86
|
+
before { require "moxml/adapter/nokogiri" }
|
|
87
|
+
|
|
88
|
+
it_behaves_like "consistent entity handling"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
context "with oga adapter" do
|
|
92
|
+
let(:adapter) { Moxml::Adapter::Oga }
|
|
93
|
+
|
|
94
|
+
before { require "moxml/adapter/oga" }
|
|
95
|
+
|
|
96
|
+
it_behaves_like "consistent entity handling"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
context "with rexml adapter" do
|
|
100
|
+
let(:adapter) { Moxml::Adapter::Rexml }
|
|
101
|
+
|
|
102
|
+
before { require "moxml/adapter/rexml" }
|
|
103
|
+
|
|
104
|
+
it_behaves_like "consistent entity handling"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
context "with ox adapter" do
|
|
108
|
+
let(:adapter) { Moxml::Adapter::Ox }
|
|
109
|
+
|
|
110
|
+
before { require "moxml/adapter/ox" }
|
|
111
|
+
|
|
112
|
+
it_behaves_like "consistent entity handling"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
context "with headed_ox adapter" do
|
|
116
|
+
let(:adapter) { Moxml::Adapter::HeadedOx }
|
|
117
|
+
|
|
118
|
+
before { require "moxml/adapter/headed_ox" }
|
|
119
|
+
|
|
120
|
+
it_behaves_like "consistent entity handling"
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
context "with libxml adapter" do
|
|
124
|
+
let(:adapter) { Moxml::Adapter::Libxml }
|
|
125
|
+
|
|
126
|
+
before { require "moxml/adapter/libxml" }
|
|
127
|
+
|
|
128
|
+
it_behaves_like "consistent entity handling"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
RSpec.shared_examples "entity reference node" do |adapter_name|
|
|
6
|
+
context "with #{adapter_name} adapter", adapter: adapter_name do
|
|
7
|
+
let(:ctx) { Moxml.new(adapter_name) }
|
|
8
|
+
|
|
9
|
+
describe "creating entity references" do
|
|
10
|
+
it "creates an entity reference node" do
|
|
11
|
+
doc = ctx.create_document
|
|
12
|
+
ref = doc.create_entity_reference("nbsp")
|
|
13
|
+
expect(ref).to be_a(Moxml::EntityReference)
|
|
14
|
+
expect(ref.name).to eq("nbsp")
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "creates standard XML entity references" do
|
|
18
|
+
doc = ctx.create_document
|
|
19
|
+
%w[amp lt gt quot apos].each do |name|
|
|
20
|
+
ref = doc.create_entity_reference(name)
|
|
21
|
+
expect(ref.name).to eq(name)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "raises ValidationError for invalid names" do
|
|
26
|
+
doc = ctx.create_document
|
|
27
|
+
expect do
|
|
28
|
+
doc.create_entity_reference("123invalid")
|
|
29
|
+
end.to raise_error(Moxml::ValidationError)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it "raises ValidationError for empty name" do
|
|
33
|
+
doc = ctx.create_document
|
|
34
|
+
expect do
|
|
35
|
+
doc.create_entity_reference("")
|
|
36
|
+
end.to raise_error(Moxml::ValidationError)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe "node properties" do
|
|
41
|
+
it "has empty text content" do
|
|
42
|
+
doc = ctx.create_document
|
|
43
|
+
ref = doc.create_entity_reference("amp")
|
|
44
|
+
expect(ref.text).to eq("")
|
|
45
|
+
expect(ref.content).to eq("")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "is recognized as entity_reference type" do
|
|
49
|
+
doc = ctx.create_document
|
|
50
|
+
ref = doc.create_entity_reference("copy")
|
|
51
|
+
expect(ref.entity_reference?).to be true
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
describe "serialization" do
|
|
56
|
+
it "serializes to entity syntax" do
|
|
57
|
+
doc = ctx.create_document
|
|
58
|
+
ref = doc.create_entity_reference("mdash")
|
|
59
|
+
expect(ref.to_xml).to eq("—")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it "serializes standard entities" do
|
|
63
|
+
doc = ctx.create_document
|
|
64
|
+
ref = doc.create_entity_reference("amp")
|
|
65
|
+
expect(ref.to_xml).to eq("&")
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
describe "adding to document" do
|
|
70
|
+
it "survives add_child and retrieval" do
|
|
71
|
+
doc = ctx.create_document
|
|
72
|
+
root = doc.create_element("p")
|
|
73
|
+
doc.root = root
|
|
74
|
+
ref = doc.create_entity_reference("nbsp")
|
|
75
|
+
root.add_child(ref)
|
|
76
|
+
children = root.children
|
|
77
|
+
expect(children.size).to be >= 1
|
|
78
|
+
entity_child = children.find { |c| c.is_a?(Moxml::EntityReference) }
|
|
79
|
+
expect(entity_child).not_to be_nil
|
|
80
|
+
expect(entity_child.name).to eq("nbsp")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it "serializes within a document" do
|
|
84
|
+
doc = ctx.create_document
|
|
85
|
+
root = doc.create_element("p")
|
|
86
|
+
doc.root = root
|
|
87
|
+
root.add_child(doc.create_text("before"))
|
|
88
|
+
root.add_child(doc.create_entity_reference("nbsp"))
|
|
89
|
+
root.add_child(doc.create_text("after"))
|
|
90
|
+
output = doc.to_xml
|
|
91
|
+
expect(output).to include(" ")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "preserves multiple entity references in sequence" do
|
|
95
|
+
doc = ctx.create_document
|
|
96
|
+
root = doc.create_element("p")
|
|
97
|
+
doc.root = root
|
|
98
|
+
root.add_child(doc.create_entity_reference("nbsp"))
|
|
99
|
+
root.add_child(doc.create_entity_reference("copy"))
|
|
100
|
+
root.add_child(doc.create_entity_reference("mdash"))
|
|
101
|
+
output = doc.to_xml
|
|
102
|
+
expect(output).to include(" ")
|
|
103
|
+
expect(output).to include("©")
|
|
104
|
+
expect(output).to include("—")
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
RSpec.describe Moxml::EntityReference do
|
|
111
|
+
%i[nokogiri oga ox rexml].each do |adapter_name|
|
|
112
|
+
it_behaves_like "entity reference node", adapter_name
|
|
113
|
+
end
|
|
114
|
+
end
|