moxml 0.1.16 → 0.1.18
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.gitignore +6 -0
- data/.rubocop_todo.yml +49 -133
- data/README.adoc +18 -0
- data/lib/moxml/adapter/base.rb +65 -8
- data/lib/moxml/adapter/headed_ox.rb +2 -1
- data/lib/moxml/adapter/libxml.rb +16 -6
- data/lib/moxml/adapter/nokogiri.rb +13 -7
- data/lib/moxml/adapter/oga.rb +35 -90
- data/lib/moxml/adapter/ox.rb +69 -19
- data/lib/moxml/adapter/rexml.rb +26 -9
- data/lib/moxml/attribute.rb +6 -0
- data/lib/moxml/config.rb +17 -2
- data/lib/moxml/element.rb +12 -8
- data/lib/moxml/node.rb +4 -1
- data/lib/moxml/text.rb +6 -0
- data/lib/moxml/version.rb +1 -1
- data/lib/moxml/xpath/compiler.rb +40 -21
- data/lib/moxml/xpath/parser.rb +12 -7
- data/spec/integration/all_adapters_spec.rb +1 -0
- data/spec/integration/shared_examples/edge_cases.rb +85 -6
- data/spec/integration/shared_examples/entity_reference_whitespace.rb +124 -0
- data/spec/integration/shared_examples/high_level/document_builder_behavior.rb +8 -6
- data/spec/integration/shared_examples/integration_workflows.rb +1 -1
- data/spec/integration/shared_examples/node_wrappers/cdata_behavior.rb +0 -7
- data/spec/integration/shared_examples/node_wrappers/namespace_behavior.rb +135 -0
- data/spec/integration/shared_examples/node_wrappers/node_behavior.rb +0 -3
- data/spec/integration/shared_examples/node_wrappers/node_set_behavior.rb +3 -1
- data/spec/moxml/adapter/entity_restoration_spec.rb +97 -0
- data/spec/moxml/builder_spec.rb +16 -1
- data/spec/moxml/entity_preservation_spec.rb +130 -0
- data/spec/moxml/entity_reference_spec.rb +114 -0
- data/spec/moxml/entity_registry_spec.rb +68 -0
- data/spec/moxml/moxml_spec.rb +39 -0
- data/spec/moxml/xpath/axes_spec.rb +0 -1
- data/spec/moxml/xpath/compiler_spec.rb +0 -2
- data/spec/performance/benchmark_spec.rb +1 -1
- metadata +6 -12
- data/TODO.remaining/1-entity-reference-adapter-support.md +0 -157
- data/TODO.remaining/2-entity-restoration-model-driven.md +0 -169
- data/TODO.remaining/3-entity-reference-test-coverage.md +0 -170
- data/TODO.remaining/4-lenient-entities-mode.md +0 -106
- data/TODO.remaining/5-fixture-integrity.md +0 -65
- data/TODO.remaining/6-ox-element-ordering-bug.md +0 -36
- data/TODO.remaining/7-headed-ox-limitations.md +0 -95
- data/TODO.remaining/8-xpath-predicate-gaps.md +0 -68
- data/TODO.remaining/9-cleanup-hygiene.md +0 -42
- data/TODO.remaining/README.md +0 -54
|
@@ -0,0 +1,130 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
RSpec.describe "Entity preservation across adapters" do
|
|
6
|
+
shared_examples "consistent entity handling" do
|
|
7
|
+
describe "text content with entities" do
|
|
8
|
+
it "preserves single entity in inner_text" do
|
|
9
|
+
doc = adapter.parse("<root>© 2024</root>")
|
|
10
|
+
expect(doc.root.inner_text).to eq("© 2024")
|
|
11
|
+
end
|
|
12
|
+
|
|
13
|
+
it "preserves multiple entities in inner_text" do
|
|
14
|
+
doc = adapter.parse("<root>© text — end</root>")
|
|
15
|
+
expect(doc.root.inner_text).to eq("© text — end")
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
it "preserves entity at start" do
|
|
19
|
+
doc = adapter.parse("<root>© start</root>")
|
|
20
|
+
expect(doc.root.inner_text).to include("©")
|
|
21
|
+
end
|
|
22
|
+
|
|
23
|
+
it "preserves entity at end" do
|
|
24
|
+
doc = adapter.parse("<root>end ©</root>")
|
|
25
|
+
expect(doc.root.inner_text).to include("©")
|
|
26
|
+
end
|
|
27
|
+
|
|
28
|
+
it "does not alter standard entities" do
|
|
29
|
+
doc = adapter.parse("<root>& < ></root>")
|
|
30
|
+
expect(doc.root.inner_text).to eq("& < >")
|
|
31
|
+
end
|
|
32
|
+
end
|
|
33
|
+
|
|
34
|
+
describe "to_xml round-trip" do
|
|
35
|
+
it "preserves entity in serialized output" do
|
|
36
|
+
doc = adapter.parse("<root>© 2024</root>")
|
|
37
|
+
expect(doc.root.to_xml(declaration: false)).to include("©")
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
it "preserves multiple entities in serialized output" do
|
|
41
|
+
doc = adapter.parse("<root>© text — end</root>")
|
|
42
|
+
xml = doc.root.to_xml(declaration: false)
|
|
43
|
+
expect(xml).to include("©")
|
|
44
|
+
expect(xml).to include("—")
|
|
45
|
+
end
|
|
46
|
+
|
|
47
|
+
it "does not double-escape standard entities" do
|
|
48
|
+
doc = adapter.parse("<root>& test</root>")
|
|
49
|
+
xml = doc.root.to_xml(declaration: false)
|
|
50
|
+
expect(xml).not_to include("&amp;")
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
describe "attribute values with entities" do
|
|
55
|
+
it "preserves entity in attribute value" do
|
|
56
|
+
doc = adapter.parse('<root attr="© 2024"/>')
|
|
57
|
+
expect(doc.root["attr"]).to eq("© 2024")
|
|
58
|
+
end
|
|
59
|
+
|
|
60
|
+
it "preserves entity in attribute via Attribute#value" do
|
|
61
|
+
doc = adapter.parse('<root attr="© 2024"/>')
|
|
62
|
+
attr = doc.root.attributes.first
|
|
63
|
+
expect(attr.value).to eq("© 2024")
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "preserves entity in attribute to_xml" do
|
|
67
|
+
doc = adapter.parse('<root attr="© 2024"/>')
|
|
68
|
+
xml = doc.root.to_xml(declaration: false)
|
|
69
|
+
expect(xml).to include('attr="© 2024"')
|
|
70
|
+
end
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
describe "Text node content" do
|
|
74
|
+
it "preserves entity in Text#content" do
|
|
75
|
+
doc = adapter.parse("<root>© text</root>")
|
|
76
|
+
text_nodes = doc.root.children.grep(Moxml::Text)
|
|
77
|
+
combined = text_nodes.map(&:content).join
|
|
78
|
+
expect(combined).to include("©")
|
|
79
|
+
end
|
|
80
|
+
end
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
context "with nokogiri adapter" do
|
|
84
|
+
let(:adapter) { Moxml::Adapter::Nokogiri }
|
|
85
|
+
|
|
86
|
+
before { require "moxml/adapter/nokogiri" }
|
|
87
|
+
|
|
88
|
+
it_behaves_like "consistent entity handling"
|
|
89
|
+
end
|
|
90
|
+
|
|
91
|
+
context "with oga adapter" do
|
|
92
|
+
let(:adapter) { Moxml::Adapter::Oga }
|
|
93
|
+
|
|
94
|
+
before { require "moxml/adapter/oga" }
|
|
95
|
+
|
|
96
|
+
it_behaves_like "consistent entity handling"
|
|
97
|
+
end
|
|
98
|
+
|
|
99
|
+
context "with rexml adapter" do
|
|
100
|
+
let(:adapter) { Moxml::Adapter::Rexml }
|
|
101
|
+
|
|
102
|
+
before { require "moxml/adapter/rexml" }
|
|
103
|
+
|
|
104
|
+
it_behaves_like "consistent entity handling"
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
context "with ox adapter" do
|
|
108
|
+
let(:adapter) { Moxml::Adapter::Ox }
|
|
109
|
+
|
|
110
|
+
before { require "moxml/adapter/ox" }
|
|
111
|
+
|
|
112
|
+
it_behaves_like "consistent entity handling"
|
|
113
|
+
end
|
|
114
|
+
|
|
115
|
+
context "with headed_ox adapter" do
|
|
116
|
+
let(:adapter) { Moxml::Adapter::HeadedOx }
|
|
117
|
+
|
|
118
|
+
before { require "moxml/adapter/headed_ox" }
|
|
119
|
+
|
|
120
|
+
it_behaves_like "consistent entity handling"
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
context "with libxml adapter" do
|
|
124
|
+
let(:adapter) { Moxml::Adapter::Libxml }
|
|
125
|
+
|
|
126
|
+
before { require "moxml/adapter/libxml" }
|
|
127
|
+
|
|
128
|
+
it_behaves_like "consistent entity handling"
|
|
129
|
+
end
|
|
130
|
+
end
|
|
@@ -0,0 +1,114 @@
|
|
|
1
|
+
# frozen_string_literal: true
|
|
2
|
+
|
|
3
|
+
require "spec_helper"
|
|
4
|
+
|
|
5
|
+
RSpec.shared_examples "entity reference node" do |adapter_name|
|
|
6
|
+
context "with #{adapter_name} adapter", adapter: adapter_name do
|
|
7
|
+
let(:ctx) { Moxml.new(adapter_name) }
|
|
8
|
+
|
|
9
|
+
describe "creating entity references" do
|
|
10
|
+
it "creates an entity reference node" do
|
|
11
|
+
doc = ctx.create_document
|
|
12
|
+
ref = doc.create_entity_reference("nbsp")
|
|
13
|
+
expect(ref).to be_a(Moxml::EntityReference)
|
|
14
|
+
expect(ref.name).to eq("nbsp")
|
|
15
|
+
end
|
|
16
|
+
|
|
17
|
+
it "creates standard XML entity references" do
|
|
18
|
+
doc = ctx.create_document
|
|
19
|
+
%w[amp lt gt quot apos].each do |name|
|
|
20
|
+
ref = doc.create_entity_reference(name)
|
|
21
|
+
expect(ref.name).to eq(name)
|
|
22
|
+
end
|
|
23
|
+
end
|
|
24
|
+
|
|
25
|
+
it "raises ValidationError for invalid names" do
|
|
26
|
+
doc = ctx.create_document
|
|
27
|
+
expect do
|
|
28
|
+
doc.create_entity_reference("123invalid")
|
|
29
|
+
end.to raise_error(Moxml::ValidationError)
|
|
30
|
+
end
|
|
31
|
+
|
|
32
|
+
it "raises ValidationError for empty name" do
|
|
33
|
+
doc = ctx.create_document
|
|
34
|
+
expect do
|
|
35
|
+
doc.create_entity_reference("")
|
|
36
|
+
end.to raise_error(Moxml::ValidationError)
|
|
37
|
+
end
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe "node properties" do
|
|
41
|
+
it "has empty text content" do
|
|
42
|
+
doc = ctx.create_document
|
|
43
|
+
ref = doc.create_entity_reference("amp")
|
|
44
|
+
expect(ref.text).to eq("")
|
|
45
|
+
expect(ref.content).to eq("")
|
|
46
|
+
end
|
|
47
|
+
|
|
48
|
+
it "is recognized as entity_reference type" do
|
|
49
|
+
doc = ctx.create_document
|
|
50
|
+
ref = doc.create_entity_reference("copy")
|
|
51
|
+
expect(ref.entity_reference?).to be true
|
|
52
|
+
end
|
|
53
|
+
end
|
|
54
|
+
|
|
55
|
+
describe "serialization" do
|
|
56
|
+
it "serializes to entity syntax" do
|
|
57
|
+
doc = ctx.create_document
|
|
58
|
+
ref = doc.create_entity_reference("mdash")
|
|
59
|
+
expect(ref.to_xml).to eq("—")
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
it "serializes standard entities" do
|
|
63
|
+
doc = ctx.create_document
|
|
64
|
+
ref = doc.create_entity_reference("amp")
|
|
65
|
+
expect(ref.to_xml).to eq("&")
|
|
66
|
+
end
|
|
67
|
+
end
|
|
68
|
+
|
|
69
|
+
describe "adding to document" do
|
|
70
|
+
it "survives add_child and retrieval" do
|
|
71
|
+
doc = ctx.create_document
|
|
72
|
+
root = doc.create_element("p")
|
|
73
|
+
doc.root = root
|
|
74
|
+
ref = doc.create_entity_reference("nbsp")
|
|
75
|
+
root.add_child(ref)
|
|
76
|
+
children = root.children
|
|
77
|
+
expect(children.size).to be >= 1
|
|
78
|
+
entity_child = children.find { |c| c.is_a?(Moxml::EntityReference) }
|
|
79
|
+
expect(entity_child).not_to be_nil
|
|
80
|
+
expect(entity_child.name).to eq("nbsp")
|
|
81
|
+
end
|
|
82
|
+
|
|
83
|
+
it "serializes within a document" do
|
|
84
|
+
doc = ctx.create_document
|
|
85
|
+
root = doc.create_element("p")
|
|
86
|
+
doc.root = root
|
|
87
|
+
root.add_child(doc.create_text("before"))
|
|
88
|
+
root.add_child(doc.create_entity_reference("nbsp"))
|
|
89
|
+
root.add_child(doc.create_text("after"))
|
|
90
|
+
output = doc.to_xml
|
|
91
|
+
expect(output).to include(" ")
|
|
92
|
+
end
|
|
93
|
+
|
|
94
|
+
it "preserves multiple entity references in sequence" do
|
|
95
|
+
doc = ctx.create_document
|
|
96
|
+
root = doc.create_element("p")
|
|
97
|
+
doc.root = root
|
|
98
|
+
root.add_child(doc.create_entity_reference("nbsp"))
|
|
99
|
+
root.add_child(doc.create_entity_reference("copy"))
|
|
100
|
+
root.add_child(doc.create_entity_reference("mdash"))
|
|
101
|
+
output = doc.to_xml
|
|
102
|
+
expect(output).to include(" ")
|
|
103
|
+
expect(output).to include("©")
|
|
104
|
+
expect(output).to include("—")
|
|
105
|
+
end
|
|
106
|
+
end
|
|
107
|
+
end
|
|
108
|
+
end
|
|
109
|
+
|
|
110
|
+
RSpec.describe Moxml::EntityReference do
|
|
111
|
+
%i[nokogiri oga ox rexml].each do |adapter_name|
|
|
112
|
+
it_behaves_like "entity reference node", adapter_name
|
|
113
|
+
end
|
|
114
|
+
end
|
|
@@ -181,4 +181,72 @@ RSpec.describe Moxml::EntityRegistry do
|
|
|
181
181
|
expect(registry.load_all).to be(registry)
|
|
182
182
|
end
|
|
183
183
|
end
|
|
184
|
+
|
|
185
|
+
describe "#standard_entity?" do
|
|
186
|
+
it "returns true for the 5 standard XML entities" do
|
|
187
|
+
registry = described_class.new
|
|
188
|
+
expect(registry.standard_entity?(0x26)).to be true # amp
|
|
189
|
+
expect(registry.standard_entity?(0x3C)).to be true # lt
|
|
190
|
+
expect(registry.standard_entity?(0x3E)).to be true # gt
|
|
191
|
+
expect(registry.standard_entity?(0x22)).to be true # quot
|
|
192
|
+
expect(registry.standard_entity?(0x27)).to be true # apos
|
|
193
|
+
end
|
|
194
|
+
|
|
195
|
+
it "returns false for non-standard codepoints" do
|
|
196
|
+
registry = described_class.new
|
|
197
|
+
expect(registry.standard_entity?(0xA0)).to be false # nbsp
|
|
198
|
+
expect(registry.standard_entity?(0xA9)).to be false # copy
|
|
199
|
+
expect(registry.standard_entity?(0x30)).to be false # '0'
|
|
200
|
+
end
|
|
201
|
+
end
|
|
202
|
+
|
|
203
|
+
describe "#should_restore?" do
|
|
204
|
+
it "always restores the 5 standard XML entities regardless of config" do
|
|
205
|
+
registry = described_class.new
|
|
206
|
+
config = Moxml::Config.new(:nokogiri)
|
|
207
|
+
config.restore_entities = false
|
|
208
|
+
expect(registry.should_restore?(0x26, config: config)).to be true # amp
|
|
209
|
+
expect(registry.should_restore?(0x3C, config: config)).to be true # lt
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
it "restores non-standard entities when restore_entities is true and mode is lenient" do
|
|
213
|
+
registry = described_class.new
|
|
214
|
+
config = Moxml::Config.new(:nokogiri)
|
|
215
|
+
config.restore_entities = true
|
|
216
|
+
config.entity_restoration_mode = :lenient
|
|
217
|
+
expect(registry.should_restore?(0xA0, config: config)).to be true # nbsp
|
|
218
|
+
expect(registry.should_restore?(0xA9, config: config)).to be true # copy
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
it "does not restore non-standard entities when restore_entities is false" do
|
|
222
|
+
registry = described_class.new
|
|
223
|
+
config = Moxml::Config.new(:nokogiri)
|
|
224
|
+
config.restore_entities = false
|
|
225
|
+
expect(registry.should_restore?(0xA0, config: config)).to be false
|
|
226
|
+
end
|
|
227
|
+
|
|
228
|
+
it "returns false for codepoints not in the registry" do
|
|
229
|
+
registry = described_class.new(mode: :disabled)
|
|
230
|
+
config = Moxml::Config.new(:nokogiri)
|
|
231
|
+
config.restore_entities = true
|
|
232
|
+
expect(registry.should_restore?(0x30, config: config)).to be false # '0'
|
|
233
|
+
end
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
describe "#restorable_codepoints" do
|
|
237
|
+
it "returns the set of codepoints that could be restored" do
|
|
238
|
+
registry = described_class.new
|
|
239
|
+
codepoints = registry.restorable_codepoints
|
|
240
|
+
expect(codepoints).to be_a(Set)
|
|
241
|
+
expect(codepoints).to include(0x26) # amp
|
|
242
|
+
expect(codepoints).to include(0xA0) # nbsp
|
|
243
|
+
expect(codepoints.size).to be > 100
|
|
244
|
+
end
|
|
245
|
+
|
|
246
|
+
it "returns only standard codepoints for empty registry" do
|
|
247
|
+
registry = described_class.new(mode: :disabled)
|
|
248
|
+
codepoints = registry.restorable_codepoints
|
|
249
|
+
expect(codepoints).to eq(described_class::STANDARD_CODEPOINTS)
|
|
250
|
+
end
|
|
251
|
+
end
|
|
184
252
|
end
|
data/spec/moxml/moxml_spec.rb
CHANGED
|
@@ -2,6 +2,28 @@
|
|
|
2
2
|
|
|
3
3
|
# spec/moxml_spec.rb
|
|
4
4
|
RSpec.describe Moxml do
|
|
5
|
+
around do |example|
|
|
6
|
+
original_default = Moxml::Config.instance_variable_get(:@default)
|
|
7
|
+
original_default_adapter = Moxml::Config.instance_variable_get(:@default_adapter)
|
|
8
|
+
|
|
9
|
+
Moxml::Config.remove_instance_variable(:@default) if Moxml::Config.instance_variable_defined?(:@default)
|
|
10
|
+
Moxml::Config.remove_instance_variable(:@default_adapter) if Moxml::Config.instance_variable_defined?(:@default_adapter)
|
|
11
|
+
|
|
12
|
+
example.run
|
|
13
|
+
ensure
|
|
14
|
+
if original_default.nil?
|
|
15
|
+
Moxml::Config.remove_instance_variable(:@default) if Moxml::Config.instance_variable_defined?(:@default)
|
|
16
|
+
else
|
|
17
|
+
Moxml::Config.instance_variable_set(:@default, original_default)
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
if original_default_adapter.nil?
|
|
21
|
+
Moxml::Config.remove_instance_variable(:@default_adapter) if Moxml::Config.instance_variable_defined?(:@default_adapter)
|
|
22
|
+
else
|
|
23
|
+
Moxml::Config.instance_variable_set(:@default_adapter, original_default_adapter)
|
|
24
|
+
end
|
|
25
|
+
end
|
|
26
|
+
|
|
5
27
|
it "has a version number" do
|
|
6
28
|
expect(Moxml::VERSION).not_to be_nil
|
|
7
29
|
end
|
|
@@ -34,6 +56,23 @@ RSpec.describe Moxml do
|
|
|
34
56
|
expect(context.config.adapter_name).to eq(:nokogiri)
|
|
35
57
|
end
|
|
36
58
|
|
|
59
|
+
it "defaults to oga on Opal" do
|
|
60
|
+
stub_const("RUBY_ENGINE", "opal")
|
|
61
|
+
|
|
62
|
+
context = described_class.new
|
|
63
|
+
expect(context.config.adapter_name).to eq(:oga)
|
|
64
|
+
end
|
|
65
|
+
|
|
66
|
+
it "prefers ox when it is already loaded" do
|
|
67
|
+
allow(Object).to receive(:const_defined?).and_call_original
|
|
68
|
+
allow(Object).to receive(:const_defined?).with(:Nokogiri).and_return(false)
|
|
69
|
+
allow(Object).to receive(:const_defined?).with(:Ox).and_return(true)
|
|
70
|
+
allow(Object).to receive(:const_defined?).with(:Oga).and_return(false)
|
|
71
|
+
|
|
72
|
+
context = described_class.new
|
|
73
|
+
expect(context.config.adapter_name).to eq(:ox)
|
|
74
|
+
end
|
|
75
|
+
|
|
37
76
|
it "uses configured options from the block" do
|
|
38
77
|
described_class.configure do |config|
|
|
39
78
|
config.default_adapter = :oga
|
|
@@ -222,7 +222,6 @@ RSpec.describe "XPath Axes" do
|
|
|
222
222
|
end
|
|
223
223
|
|
|
224
224
|
it "combines attribute axis with wildcards" do
|
|
225
|
-
skip "HeadedOx limitation: Attribute wildcard (@*) not supported by XPath parser. See docs/_pages/headed-ox-limitations.adoc"
|
|
226
225
|
ast = Moxml::XPath::Parser.parse("//book/@*")
|
|
227
226
|
proc = Moxml::XPath::Compiler.compile_with_cache(ast)
|
|
228
227
|
result = proc.call(book_doc)
|
|
@@ -153,7 +153,6 @@ RSpec.describe Moxml::XPath::Compiler do
|
|
|
153
153
|
end
|
|
154
154
|
|
|
155
155
|
it "works with wildcards" do
|
|
156
|
-
skip "HeadedOx limitation: Wildcard count differs due to Ox's DOM structure. See docs/_pages/headed-ox-limitations.adoc"
|
|
157
156
|
ast = Moxml::XPath::Parser.parse("//*")
|
|
158
157
|
proc = described_class.compile_with_cache(ast)
|
|
159
158
|
result = proc.call(nested_doc)
|
|
@@ -189,7 +188,6 @@ RSpec.describe Moxml::XPath::Compiler do
|
|
|
189
188
|
end
|
|
190
189
|
|
|
191
190
|
it "works with wildcards" do
|
|
192
|
-
skip "HeadedOx limitation: Attribute wildcard (@*) not supported by XPath parser. See docs/_pages/headed-ox-limitations.adoc"
|
|
193
191
|
ast = Moxml::XPath::Parser.parse("/root/book/@*")
|
|
194
192
|
proc = described_class.compile_with_cache(ast)
|
|
195
193
|
result = proc.call(attr_doc)
|
|
@@ -27,7 +27,7 @@ RSpec.shared_examples "Performance Examples" do
|
|
|
27
27
|
{
|
|
28
28
|
nokogiri: { parser: 15, serializer: 1000 },
|
|
29
29
|
oga: { parser: 10, serializer: 100 },
|
|
30
|
-
rexml: { parser: 0, serializer:
|
|
30
|
+
rexml: { parser: 0, serializer: 5 },
|
|
31
31
|
ox: { parser: 2, serializer: 1000 },
|
|
32
32
|
headed_ox: { parser: 2, serializer: 1000 },
|
|
33
33
|
libxml: { parser: 10, serializer: 30 },
|
metadata
CHANGED
|
@@ -1,14 +1,14 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: moxml
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.1.
|
|
4
|
+
version: 0.1.18
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Ribose Inc.
|
|
8
8
|
autorequire:
|
|
9
9
|
bindir: exe
|
|
10
10
|
cert_chain: []
|
|
11
|
-
date: 2026-04-
|
|
11
|
+
date: 2026-04-27 00:00:00.000000000 Z
|
|
12
12
|
dependencies: []
|
|
13
13
|
description: |
|
|
14
14
|
Moxml is a unified XML manipulation library that provides a common API
|
|
@@ -34,16 +34,6 @@ files:
|
|
|
34
34
|
- LICENSE.md
|
|
35
35
|
- README.adoc
|
|
36
36
|
- Rakefile
|
|
37
|
-
- TODO.remaining/1-entity-reference-adapter-support.md
|
|
38
|
-
- TODO.remaining/2-entity-restoration-model-driven.md
|
|
39
|
-
- TODO.remaining/3-entity-reference-test-coverage.md
|
|
40
|
-
- TODO.remaining/4-lenient-entities-mode.md
|
|
41
|
-
- TODO.remaining/5-fixture-integrity.md
|
|
42
|
-
- TODO.remaining/6-ox-element-ordering-bug.md
|
|
43
|
-
- TODO.remaining/7-headed-ox-limitations.md
|
|
44
|
-
- TODO.remaining/8-xpath-predicate-gaps.md
|
|
45
|
-
- TODO.remaining/9-cleanup-hygiene.md
|
|
46
|
-
- TODO.remaining/README.md
|
|
47
37
|
- benchmarks/.gitignore
|
|
48
38
|
- benchmarks/generate_report.rb
|
|
49
39
|
- bin/console
|
|
@@ -277,6 +267,7 @@ files:
|
|
|
277
267
|
- spec/integration/all_adapters_spec.rb
|
|
278
268
|
- spec/integration/headed_ox_integration_spec.rb
|
|
279
269
|
- spec/integration/shared_examples/edge_cases.rb
|
|
270
|
+
- spec/integration/shared_examples/entity_reference_whitespace.rb
|
|
280
271
|
- spec/integration/shared_examples/high_level/.gitkeep
|
|
281
272
|
- spec/integration/shared_examples/high_level/builder_behavior.rb
|
|
282
273
|
- spec/integration/shared_examples/high_level/context_behavior.rb
|
|
@@ -302,6 +293,7 @@ files:
|
|
|
302
293
|
- spec/moxml/adapter/.gitkeep
|
|
303
294
|
- spec/moxml/adapter/README.md
|
|
304
295
|
- spec/moxml/adapter/base_spec.rb
|
|
296
|
+
- spec/moxml/adapter/entity_restoration_spec.rb
|
|
305
297
|
- spec/moxml/adapter/headed_ox_spec.rb
|
|
306
298
|
- spec/moxml/adapter/libxml_spec.rb
|
|
307
299
|
- spec/moxml/adapter/nokogiri_spec.rb
|
|
@@ -325,6 +317,8 @@ files:
|
|
|
325
317
|
- spec/moxml/document_builder_spec.rb
|
|
326
318
|
- spec/moxml/document_spec.rb
|
|
327
319
|
- spec/moxml/element_spec.rb
|
|
320
|
+
- spec/moxml/entity_preservation_spec.rb
|
|
321
|
+
- spec/moxml/entity_reference_spec.rb
|
|
328
322
|
- spec/moxml/entity_registry_spec.rb
|
|
329
323
|
- spec/moxml/error_spec.rb
|
|
330
324
|
- spec/moxml/lazy_parse_spec.rb
|
|
@@ -1,157 +0,0 @@
|
|
|
1
|
-
# TODO 1: EntityReference Adapter Support for Ox, Oga, REXML, LibXML, HeadedOx
|
|
2
|
-
|
|
3
|
-
## Problem
|
|
4
|
-
|
|
5
|
-
Only the Nokogiri adapter implements `create_native_entity_reference` and maps
|
|
6
|
-
its native type to `:entity_reference` in `node_type`. The other 5 adapters
|
|
7
|
-
will raise `NotImplementedError` if `restore_entities` is enabled or if any
|
|
8
|
-
code path calls `create_entity_reference`. This makes the entire
|
|
9
|
-
EntityReference feature **non-functional** outside Nokogiri.
|
|
10
|
-
|
|
11
|
-
## Current State (verified)
|
|
12
|
-
|
|
13
|
-
| Adapter | `create_native_entity_reference` | `node_type` mapping | Serialization | Status |
|
|
14
|
-
|-----------|----------------------------------|---------------------|---------------|--------|
|
|
15
|
-
| Nokogiri | Done (`Nokogiri::XML::EntityReference.new`) | Done | Native | Working |
|
|
16
|
-
| Ox | Missing | Missing | Uses `Ox.dump` (C-level, won't handle custom types) | Broken |
|
|
17
|
-
| HeadedOx | Missing (inherits Ox) | Missing | Same as Ox | Broken |
|
|
18
|
-
| Oga | Missing | Missing | Uses `CustomizedOga::XmlGenerator` | Broken |
|
|
19
|
-
| REXML | Missing | Missing | Uses REXML's `write` | Broken |
|
|
20
|
-
| LibXML | Missing | Missing | Uses custom serializer with wrapper detection | Broken |
|
|
21
|
-
|
|
22
|
-
## Architecture
|
|
23
|
-
|
|
24
|
-
EntityReference follows the same pattern as other non-native node types in Moxml:
|
|
25
|
-
a **wrapper class** that represents what the underlying library cannot express natively.
|
|
26
|
-
|
|
27
|
-
Each adapter needs three things:
|
|
28
|
-
1. **Wrapper class** (`CustomizedXxx::EntityReference`) — holds the entity name
|
|
29
|
-
2. **`node_type` mapping** — so `Node.wrap` can create the correct Moxml type
|
|
30
|
-
3. **Serialization** — so `to_xml` outputs `&name;`
|
|
31
|
-
|
|
32
|
-
The existing pattern: `CustomizedOx::Text` extends `::Ox::Node`,
|
|
33
|
-
`CustomizedOx::Attribute` extends `::Ox::Node`. EntityReference should follow suit.
|
|
34
|
-
|
|
35
|
-
### Serialization Challenge for Ox
|
|
36
|
-
|
|
37
|
-
Ox's `serialize` calls `::Ox.dump(node)` which is C-level — it only handles
|
|
38
|
-
Ox native types. For EntityReference wrappers to survive serialization, we need
|
|
39
|
-
one of:
|
|
40
|
-
|
|
41
|
-
- **Option A**: Custom serialization in the adapter that walks the tree manually,
|
|
42
|
-
detecting EntityReference wrappers and emitting `&name;` directly.
|
|
43
|
-
- **Option B**: Convert EntityReferences to their text equivalent before calling
|
|
44
|
-
`Ox.dump`, restoring them in a post-processing step. This is fragile.
|
|
45
|
-
- **Option C**: Override `serialize` for Element nodes to handle children
|
|
46
|
-
individually, using `Ox.dump` for native children but handling wrappers
|
|
47
|
-
directly.
|
|
48
|
-
|
|
49
|
-
**Recommended: Option A** — it's how `CustomizedOga::XmlGenerator` already works
|
|
50
|
-
for Oga. A similar tree-walking serializer for Ox gives full control.
|
|
51
|
-
|
|
52
|
-
For LibXML, the existing serializer already checks `node.respond_to?(:to_xml)`
|
|
53
|
-
for wrapper classes, so adding an EntityReference wrapper with `to_xml` returning
|
|
54
|
-
`"&#{name};"` should integrate cleanly.
|
|
55
|
-
|
|
56
|
-
## Implementation Steps
|
|
57
|
-
|
|
58
|
-
### Ox Adapter
|
|
59
|
-
|
|
60
|
-
1. Create `lib/moxml/adapter/customized_ox/entity_reference.rb`:
|
|
61
|
-
```ruby
|
|
62
|
-
module Moxml::Adapter::CustomizedOx
|
|
63
|
-
class EntityReference < ::Ox::Node
|
|
64
|
-
attr_reader :name
|
|
65
|
-
|
|
66
|
-
def initialize(name)
|
|
67
|
-
@name = name
|
|
68
|
-
super() # Ox::Node requires no args or a value
|
|
69
|
-
end
|
|
70
|
-
|
|
71
|
-
def to_xml
|
|
72
|
-
"&#{@name};"
|
|
73
|
-
end
|
|
74
|
-
alias to_s to_xml
|
|
75
|
-
end
|
|
76
|
-
end
|
|
77
|
-
```
|
|
78
|
-
|
|
79
|
-
2. Add to `lib/moxml/adapter/ox.rb`:
|
|
80
|
-
- `create_native_entity_reference(name)` → `CustomizedOx::EntityReference.new(name)`
|
|
81
|
-
- `node_type`: add `when CustomizedOx::EntityReference then :entity_reference`
|
|
82
|
-
- `patch_node`: handle EntityReference wrapper in child list
|
|
83
|
-
- `entity_reference_name(node)`: return `node.name`
|
|
84
|
-
- Serialization: handle EntityReference children when walking the tree
|
|
85
|
-
|
|
86
|
-
3. Add to `lib/moxml/adapter/ox.rb` `unpatch_node`: return wrapper as-is
|
|
87
|
-
(it extends Ox::Node so it can stay in the tree)
|
|
88
|
-
|
|
89
|
-
### HeadedOx Adapter
|
|
90
|
-
|
|
91
|
-
HeadedOx inherits from Ox, so it gets Ox's EntityReference support
|
|
92
|
-
automatically once Ox is done. Verify that the XPath engine doesn't
|
|
93
|
-
break when encountering EntityReference nodes in the tree.
|
|
94
|
-
|
|
95
|
-
### Oga Adapter
|
|
96
|
-
|
|
97
|
-
1. Create `lib/moxml/adapter/customized_oga/entity_reference.rb`:
|
|
98
|
-
```ruby
|
|
99
|
-
module Moxml::Adapter::CustomizedOga
|
|
100
|
-
class EntityReference
|
|
101
|
-
attr_reader :name
|
|
102
|
-
|
|
103
|
-
def initialize(name)
|
|
104
|
-
@name = name
|
|
105
|
-
end
|
|
106
|
-
|
|
107
|
-
def to_xml
|
|
108
|
-
"&#{@name};"
|
|
109
|
-
end
|
|
110
|
-
end
|
|
111
|
-
end
|
|
112
|
-
```
|
|
113
|
-
|
|
114
|
-
2. Add to `lib/moxml/adapter/oga.rb`:
|
|
115
|
-
- `create_native_entity_reference(name)` → `CustomizedOga::EntityReference.new(name)`
|
|
116
|
-
- `node_type`: add `when CustomizedOga::EntityReference then :entity_reference`
|
|
117
|
-
- Update `CustomizedOga::XmlGenerator` to handle EntityReference children
|
|
118
|
-
- `entity_reference_name(node)`: return `node.name`
|
|
119
|
-
|
|
120
|
-
### REXML Adapter
|
|
121
|
-
|
|
122
|
-
1. Investigate: REXML has `REXML::Entity` and `REXML::EntityRef` classes.
|
|
123
|
-
Check if they can be used as native entity reference nodes, or if a
|
|
124
|
-
wrapper is needed.
|
|
125
|
-
|
|
126
|
-
2. Add to `lib/moxml/adapter/rexml.rb`:
|
|
127
|
-
- `create_native_entity_reference(name)` — native or wrapper
|
|
128
|
-
- `node_type`: add mapping
|
|
129
|
-
- `entity_reference_name(node)`
|
|
130
|
-
|
|
131
|
-
### LibXML Adapter
|
|
132
|
-
|
|
133
|
-
1. Investigate: LibXML Ruby has `LibXML::XML::Node::ENTITY_REF_NODE` constant
|
|
134
|
-
(value 5). Check if native entity reference nodes can be created.
|
|
135
|
-
|
|
136
|
-
2. Create `lib/moxml/adapter/customized_libxml/entity_reference.rb` if needed.
|
|
137
|
-
|
|
138
|
-
3. Add to `lib/moxml/adapter/libxml.rb`:
|
|
139
|
-
- `create_native_entity_reference(name)`
|
|
140
|
-
- `node_type`: add `ENTITY_REF_NODE` mapping or wrapper mapping
|
|
141
|
-
- `entity_reference_name(node)`
|
|
142
|
-
- The existing serializer already handles wrappers with `to_xml` —
|
|
143
|
-
verify EntityReference works in this path.
|
|
144
|
-
|
|
145
|
-
## Files to Create/Modify
|
|
146
|
-
|
|
147
|
-
### New Files
|
|
148
|
-
- `lib/moxml/adapter/customized_ox/entity_reference.rb`
|
|
149
|
-
- `lib/moxml/adapter/customized_oga/entity_reference.rb`
|
|
150
|
-
- Possibly: `lib/moxml/adapter/customized_libxml/entity_reference.rb`
|
|
151
|
-
|
|
152
|
-
### Modified Files
|
|
153
|
-
- `lib/moxml/adapter/ox.rb` — create_native_entity_reference, node_type, serialization
|
|
154
|
-
- `lib/moxml/adapter/oga.rb` — create_native_entity_reference, node_type, XmlGenerator
|
|
155
|
-
- `lib/moxml/adapter/rexml.rb` — create_native_entity_reference, node_type
|
|
156
|
-
- `lib/moxml/adapter/libxml.rb` — create_native_entity_reference, node_type
|
|
157
|
-
- `lib/moxml/adapter/headed_ox.rb` — verify inheritance works (likely no changes)
|