rxerces 0.6.1 → 0.7.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +17 -0
- data/README.md +29 -1
- data/benchmarks/xpath_validation_cache_benchmark.rb +157 -0
- data/benchmarks/xpath_validation_micro_benchmark.rb +168 -0
- data/e +0 -0
- data/ext/rxerces/rxerces.cpp +497 -22
- data/lib/rxerces/version.rb +1 -1
- data/lib/rxerces.rb +3 -2
- data/rxerces.gemspec +2 -1
- data/spec/document_spec.rb +184 -17
- data/spec/node_spec.rb +230 -58
- data/spec/nodeset_spec.rb +90 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/rxerces_spec.rb +58 -0
- data/spec/schema_spec.rb +28 -1
- data/spec/spec_helper.rb +5 -0
- data/spec/xpath_cache_spec.rb +409 -0
- data/spec/xpath_spec.rb +306 -18
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Info.plist +20 -0
- data/tmp/arm64-darwin24/rxerces/3.4.8/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +5 -0
- data.tar.gz.sig +0 -0
- metadata +25 -4
- metadata.gz.sig +0 -0
- /data/{tmp/arm64-darwin24/rxerces/3.4.7 → ext/rxerces}/rxerces.bundle.dSYM/Contents/Info.plist +0 -0
- /data/{tmp/arm64-darwin24/rxerces/3.4.7 → ext/rxerces}/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +0 -0
data/lib/rxerces/version.rb
CHANGED
data/lib/rxerces.rb
CHANGED
|
@@ -5,9 +5,10 @@ require 'rxerces/version'
|
|
|
5
5
|
module RXerces
|
|
6
6
|
# Parse XML from a string
|
|
7
7
|
# @param string [String] XML string to parse
|
|
8
|
+
# @param options [Hash] parsing options
|
|
8
9
|
# @return [RXerces::XML::Document] parsed document
|
|
9
|
-
def self.XML(string)
|
|
10
|
-
RXerces::XML::Document.parse(string)
|
|
10
|
+
def self.XML(string, **options)
|
|
11
|
+
RXerces::XML::Document.parse(string, **options)
|
|
11
12
|
end
|
|
12
13
|
|
|
13
14
|
# Alias for compatibility
|
data/rxerces.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |spec|
|
|
2
2
|
spec.name = "rxerces"
|
|
3
|
-
spec.version = "0.
|
|
3
|
+
spec.version = "0.7.0"
|
|
4
4
|
spec.author = "Daniel J. Berger"
|
|
5
5
|
spec.email = "djberg96@gmail.com"
|
|
6
6
|
spec.cert_chain = ["certs/djberg96_pub.pem"]
|
|
@@ -16,6 +16,7 @@ Gem::Specification.new do |spec|
|
|
|
16
16
|
spec.add_development_dependency "rake", "~> 13.0"
|
|
17
17
|
spec.add_development_dependency "rake-compiler", "~> 1.2"
|
|
18
18
|
spec.add_development_dependency "rspec", "~> 3.12"
|
|
19
|
+
spec.add_development_dependency "mkmf-lite", "~> 0.7.5"
|
|
19
20
|
|
|
20
21
|
spec.description = <<-EOF
|
|
21
22
|
A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
|
data/spec/document_spec.rb
CHANGED
|
@@ -27,6 +27,76 @@ RSpec.describe RXerces::XML::Document do
|
|
|
27
27
|
doc = RXerces::XML::Document.parse(complex_xml)
|
|
28
28
|
expect(doc).to be_a(RXerces::XML::Document)
|
|
29
29
|
end
|
|
30
|
+
|
|
31
|
+
it "handles empty documents" do
|
|
32
|
+
expect {
|
|
33
|
+
RXerces::XML::Document.parse('')
|
|
34
|
+
}.to raise_error(RuntimeError, /invalid document structure/)
|
|
35
|
+
end
|
|
36
|
+
|
|
37
|
+
it "handles documents with only XML declaration" do
|
|
38
|
+
expect {
|
|
39
|
+
RXerces::XML::Document.parse('<?xml version="1.0"?>')
|
|
40
|
+
}.to raise_error(RuntimeError, /invalid document structure/)
|
|
41
|
+
end
|
|
42
|
+
|
|
43
|
+
it "handles deeply nested elements" do
|
|
44
|
+
# Create XML with 50 levels of nesting
|
|
45
|
+
xml = '<root>'
|
|
46
|
+
50.times { |i| xml += "<level#{i}>" }
|
|
47
|
+
xml += 'deep content'
|
|
48
|
+
50.times { |i| xml += "</level#{49-i}>" }
|
|
49
|
+
xml += '</root>'
|
|
50
|
+
|
|
51
|
+
doc = RXerces::XML::Document.parse(xml)
|
|
52
|
+
expect(doc).to be_a(RXerces::XML::Document)
|
|
53
|
+
expect(doc.root.name).to eq('root')
|
|
54
|
+
|
|
55
|
+
# Verify we can access deeply nested content
|
|
56
|
+
deepest = doc.root
|
|
57
|
+
50.times { |i| deepest = deepest.element_children.first }
|
|
58
|
+
expect(deepest.text).to eq('deep content')
|
|
59
|
+
end
|
|
60
|
+
|
|
61
|
+
it "handles documents with CDATA sections" do
|
|
62
|
+
xml_with_cdata = '<root><![CDATA[<not>&parsed;]]></root>'
|
|
63
|
+
doc = RXerces::XML::Document.parse(xml_with_cdata)
|
|
64
|
+
expect(doc.root.text).to eq('<not>&parsed;')
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it "handles documents with processing instructions" do
|
|
68
|
+
xml_with_pi = '<?xml-stylesheet type="text/xsl" href="style.xsl"?><root>content</root>'
|
|
69
|
+
doc = RXerces::XML::Document.parse(xml_with_pi)
|
|
70
|
+
expect(doc.root.text).to eq('content')
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "handles documents with comments" do
|
|
74
|
+
xml_with_comment = '<root><!-- This is a comment -->content</root>'
|
|
75
|
+
doc = RXerces::XML::Document.parse(xml_with_comment)
|
|
76
|
+
expect(doc.root.text).to eq('content')
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "handles mixed content" do
|
|
80
|
+
mixed_xml = '<root>text <b>bold</b> more text <i>italic</i> end</root>'
|
|
81
|
+
doc = RXerces::XML::Document.parse(mixed_xml)
|
|
82
|
+
expect(doc.root.children.length).to eq(5) # 3 text nodes + 2 elements
|
|
83
|
+
expect(doc.root.text).to eq('text bold more text italic end')
|
|
84
|
+
end
|
|
85
|
+
|
|
86
|
+
it "handles elements with many attributes" do
|
|
87
|
+
xml_many_attrs = '<root ' + (1..100).map { |i| "attr#{i}=\"value#{i}\"" }.join(' ') + '>content</root>'
|
|
88
|
+
doc = RXerces::XML::Document.parse(xml_many_attrs)
|
|
89
|
+
expect(doc.root.attributes.length).to eq(100)
|
|
90
|
+
expect(doc.root['attr50']).to eq('value50')
|
|
91
|
+
end
|
|
92
|
+
|
|
93
|
+
it "handles large text content" do
|
|
94
|
+
large_text = 'x' * 10000
|
|
95
|
+
xml_large = "<root>#{large_text}</root>"
|
|
96
|
+
doc = RXerces::XML::Document.parse(xml_large)
|
|
97
|
+
expect(doc.root.text.length).to eq(10000)
|
|
98
|
+
expect(doc.root.text).to start_with('xxxxx')
|
|
99
|
+
end
|
|
30
100
|
end
|
|
31
101
|
|
|
32
102
|
describe "#root" do
|
|
@@ -64,23 +134,7 @@ RSpec.describe RXerces::XML::Document do
|
|
|
64
134
|
end
|
|
65
135
|
end
|
|
66
136
|
|
|
67
|
-
describe "#css" do
|
|
68
|
-
# Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
|
|
69
|
-
xalan_available = begin
|
|
70
|
-
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
71
|
-
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
72
|
-
result = test_doc.xpath('//item[@id="1"]')
|
|
73
|
-
result.length == 1
|
|
74
|
-
rescue
|
|
75
|
-
false
|
|
76
|
-
end
|
|
77
|
-
|
|
78
|
-
before(:all) do
|
|
79
|
-
unless xalan_available
|
|
80
|
-
skip "Xalan-C not available - CSS selectors require Xalan-C library"
|
|
81
|
-
end
|
|
82
|
-
end
|
|
83
|
-
|
|
137
|
+
describe "#css", xalan: true do
|
|
84
138
|
let(:xml) do
|
|
85
139
|
<<-XML
|
|
86
140
|
<library>
|
|
@@ -234,4 +288,117 @@ RSpec.describe RXerces::XML::Document do
|
|
|
234
288
|
end
|
|
235
289
|
end
|
|
236
290
|
end
|
|
291
|
+
|
|
292
|
+
describe "parse options validation" do
|
|
293
|
+
let(:simple_xml) { '<root><child>test</child></root>' }
|
|
294
|
+
|
|
295
|
+
context "with valid options" do
|
|
296
|
+
it "accepts no options" do
|
|
297
|
+
expect {
|
|
298
|
+
RXerces::XML::Document.parse(simple_xml)
|
|
299
|
+
}.not_to raise_error
|
|
300
|
+
end
|
|
301
|
+
|
|
302
|
+
it "accepts nil options" do
|
|
303
|
+
expect {
|
|
304
|
+
RXerces::XML::Document.parse(simple_xml, nil)
|
|
305
|
+
}.not_to raise_error
|
|
306
|
+
end
|
|
307
|
+
|
|
308
|
+
it "accepts empty hash" do
|
|
309
|
+
expect {
|
|
310
|
+
RXerces::XML::Document.parse(simple_xml, {})
|
|
311
|
+
}.not_to raise_error
|
|
312
|
+
end
|
|
313
|
+
|
|
314
|
+
it "accepts allow_external_entities with symbol key" do
|
|
315
|
+
expect {
|
|
316
|
+
doc = RXerces::XML::Document.parse(simple_xml, allow_external_entities: false)
|
|
317
|
+
expect(doc).to be_a(RXerces::XML::Document)
|
|
318
|
+
}.not_to raise_error
|
|
319
|
+
end
|
|
320
|
+
|
|
321
|
+
it "accepts allow_external_entities with string key" do
|
|
322
|
+
expect {
|
|
323
|
+
doc = RXerces::XML::Document.parse(simple_xml, 'allow_external_entities' => false)
|
|
324
|
+
expect(doc).to be_a(RXerces::XML::Document)
|
|
325
|
+
}.not_to raise_error
|
|
326
|
+
end
|
|
327
|
+
|
|
328
|
+
it "accepts allow_external_entities set to true" do
|
|
329
|
+
expect {
|
|
330
|
+
doc = RXerces::XML::Document.parse(simple_xml, allow_external_entities: true)
|
|
331
|
+
expect(doc).to be_a(RXerces::XML::Document)
|
|
332
|
+
}.not_to raise_error
|
|
333
|
+
end
|
|
334
|
+
end
|
|
335
|
+
|
|
336
|
+
context "with invalid options" do
|
|
337
|
+
it "rejects unknown option keys" do
|
|
338
|
+
expect {
|
|
339
|
+
RXerces::XML::Document.parse(simple_xml, unknown_option: true)
|
|
340
|
+
}.to raise_error(ArgumentError, /Unknown option: unknown_option/)
|
|
341
|
+
end
|
|
342
|
+
|
|
343
|
+
it "rejects multiple unknown options and reports the first one" do
|
|
344
|
+
expect {
|
|
345
|
+
RXerces::XML::Document.parse(simple_xml, foo: 1, bar: 2)
|
|
346
|
+
}.to raise_error(ArgumentError, /Unknown option:/)
|
|
347
|
+
end
|
|
348
|
+
|
|
349
|
+
it "lists allowed options in error message" do
|
|
350
|
+
expect {
|
|
351
|
+
RXerces::XML::Document.parse(simple_xml, invalid: true)
|
|
352
|
+
}.to raise_error(ArgumentError, /Allowed options are: allow_external_entities/)
|
|
353
|
+
end
|
|
354
|
+
|
|
355
|
+
it "rejects options with both valid and invalid keys" do
|
|
356
|
+
expect {
|
|
357
|
+
RXerces::XML::Document.parse(simple_xml,
|
|
358
|
+
allow_external_entities: true,
|
|
359
|
+
invalid_key: false)
|
|
360
|
+
}.to raise_error(ArgumentError, /Unknown option: invalid_key/)
|
|
361
|
+
end
|
|
362
|
+
|
|
363
|
+
it "rejects non-string, non-symbol keys" do
|
|
364
|
+
expect {
|
|
365
|
+
RXerces::XML::Document.parse(simple_xml, 123 => true)
|
|
366
|
+
}.to raise_error(ArgumentError, /Option keys must be symbols or strings/)
|
|
367
|
+
end
|
|
368
|
+
|
|
369
|
+
it "rejects options that are not a hash" do
|
|
370
|
+
expect {
|
|
371
|
+
RXerces::XML::Document.parse(simple_xml, "not a hash")
|
|
372
|
+
}.to raise_error(TypeError)
|
|
373
|
+
end
|
|
374
|
+
|
|
375
|
+
it "rejects options array" do
|
|
376
|
+
expect {
|
|
377
|
+
RXerces::XML::Document.parse(simple_xml, [:allow_external_entities])
|
|
378
|
+
}.to raise_error(TypeError)
|
|
379
|
+
end
|
|
380
|
+
end
|
|
381
|
+
|
|
382
|
+
context "security" do
|
|
383
|
+
it "prevents typos from silently disabling security features" do
|
|
384
|
+
# If someone mistypes allow_external_entities, it should fail
|
|
385
|
+
# rather than silently ignoring the option
|
|
386
|
+
expect {
|
|
387
|
+
RXerces::XML::Document.parse(simple_xml, allow_external_entity: true)
|
|
388
|
+
}.to raise_error(ArgumentError, /Unknown option/)
|
|
389
|
+
end
|
|
390
|
+
|
|
391
|
+
it "prevents typos with underscores" do
|
|
392
|
+
expect {
|
|
393
|
+
RXerces::XML::Document.parse(simple_xml, allowexternalentities: true)
|
|
394
|
+
}.to raise_error(ArgumentError, /Unknown option/)
|
|
395
|
+
end
|
|
396
|
+
|
|
397
|
+
it "prevents similar-looking options" do
|
|
398
|
+
expect {
|
|
399
|
+
RXerces::XML::Document.parse(simple_xml, external_entities: true)
|
|
400
|
+
}.to raise_error(ArgumentError, /Unknown option/)
|
|
401
|
+
end
|
|
402
|
+
end
|
|
403
|
+
end
|
|
237
404
|
end
|
data/spec/node_spec.rb
CHANGED
|
@@ -65,6 +65,50 @@ RSpec.describe RXerces::XML::Node do
|
|
|
65
65
|
it "returns empty string for nodes without text" do
|
|
66
66
|
expect(root.text).to be_a(String)
|
|
67
67
|
end
|
|
68
|
+
|
|
69
|
+
context "edge cases" do
|
|
70
|
+
let(:edge_xml) do
|
|
71
|
+
<<-XML
|
|
72
|
+
<root>
|
|
73
|
+
<empty></empty>
|
|
74
|
+
<mixed>Hello <b>world</b>!</mixed>
|
|
75
|
+
<whitespace> </whitespace>
|
|
76
|
+
<cdata><![CDATA[<not>parsed</not>]]></cdata>
|
|
77
|
+
<multiline>
|
|
78
|
+
Line 1
|
|
79
|
+
Line 2
|
|
80
|
+
</multiline>
|
|
81
|
+
</root>
|
|
82
|
+
XML
|
|
83
|
+
end
|
|
84
|
+
let(:edge_doc) { RXerces::XML::Document.parse(edge_xml) }
|
|
85
|
+
let(:edge_root) { edge_doc.root }
|
|
86
|
+
|
|
87
|
+
it "returns empty string for empty elements" do
|
|
88
|
+
empty = edge_root.children.find { |n| n.name == 'empty' }
|
|
89
|
+
expect(empty.text).to eq('')
|
|
90
|
+
end
|
|
91
|
+
|
|
92
|
+
it "concatenates text from mixed content elements" do
|
|
93
|
+
mixed = edge_root.children.find { |n| n.name == 'mixed' }
|
|
94
|
+
expect(mixed.text).to eq('Hello world!')
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
it "preserves whitespace-only content" do
|
|
98
|
+
whitespace = edge_root.children.find { |n| n.name == 'whitespace' }
|
|
99
|
+
expect(whitespace.text).to eq(' ')
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
it "returns CDATA content as plain text" do
|
|
103
|
+
cdata = edge_root.children.find { |n| n.name == 'cdata' }
|
|
104
|
+
expect(cdata.text).to eq('<not>parsed</not>')
|
|
105
|
+
end
|
|
106
|
+
|
|
107
|
+
it "preserves multiline text content" do
|
|
108
|
+
multiline = edge_root.children.find { |n| n.name == 'multiline' }
|
|
109
|
+
expect(multiline.text).to include("Line 1\n Line 2")
|
|
110
|
+
end
|
|
111
|
+
end
|
|
68
112
|
end
|
|
69
113
|
|
|
70
114
|
describe "#content" do
|
|
@@ -142,6 +186,33 @@ RSpec.describe RXerces::XML::Node do
|
|
|
142
186
|
text_node = root.children.find { |n| n.is_a?(RXerces::XML::Text) }
|
|
143
187
|
expect(text_node.has_attribute?('anything')).to be false if text_node
|
|
144
188
|
end
|
|
189
|
+
|
|
190
|
+
context "edge cases" do
|
|
191
|
+
let(:attr_xml) do
|
|
192
|
+
<<-XML
|
|
193
|
+
<root>
|
|
194
|
+
<element empty="" xmlns:prefix="http://example.com" special_chars="a&b<c>">
|
|
195
|
+
content
|
|
196
|
+
</element>
|
|
197
|
+
</root>
|
|
198
|
+
XML
|
|
199
|
+
end
|
|
200
|
+
let(:attr_doc) { RXerces::XML::Document.parse(attr_xml) }
|
|
201
|
+
let(:element) { attr_doc.root.children.find { |n| n.is_a?(RXerces::XML::Element) } }
|
|
202
|
+
|
|
203
|
+
it "handles empty string attribute values" do
|
|
204
|
+
expect(element['empty']).to be_nil
|
|
205
|
+
expect(element.has_attribute?('empty')).to be false
|
|
206
|
+
end
|
|
207
|
+
|
|
208
|
+
it "handles attributes with special characters" do
|
|
209
|
+
expect(element['special_chars']).to eq('a&b<c>')
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
it "handles namespace prefix attributes" do
|
|
213
|
+
expect(element['xmlns:prefix']).to eq('http://example.com')
|
|
214
|
+
end
|
|
215
|
+
end
|
|
145
216
|
end
|
|
146
217
|
|
|
147
218
|
describe "#children" do
|
|
@@ -233,23 +304,7 @@ RSpec.describe RXerces::XML::Node do
|
|
|
233
304
|
expect(ancestors.any? { |a| a.name == '#document' }).to be false
|
|
234
305
|
end
|
|
235
306
|
|
|
236
|
-
context "with selector" do
|
|
237
|
-
# Check if Xalan support is compiled in (selectors require XPath which needs Xalan)
|
|
238
|
-
xalan_available = begin
|
|
239
|
-
test_xml = '<root><item id="1">A</item></root>'
|
|
240
|
-
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
241
|
-
result = test_doc.xpath('//item[@id="1"]')
|
|
242
|
-
result.length == 1
|
|
243
|
-
rescue
|
|
244
|
-
false
|
|
245
|
-
end
|
|
246
|
-
|
|
247
|
-
before(:all) do
|
|
248
|
-
unless xalan_available
|
|
249
|
-
skip "Xalan-C not available - ancestor selectors require Xalan-C library"
|
|
250
|
-
end
|
|
251
|
-
end
|
|
252
|
-
|
|
307
|
+
context "with selector", xalan: true do
|
|
253
308
|
it "filters ancestors by tag name selector" do
|
|
254
309
|
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
255
310
|
city = person.children.find { |n| n.name == 'city' }
|
|
@@ -312,6 +367,45 @@ RSpec.describe RXerces::XML::Node do
|
|
|
312
367
|
end
|
|
313
368
|
end
|
|
314
369
|
|
|
370
|
+
describe "#attribute_nodes" do
|
|
371
|
+
it "returns an array of attribute nodes" do
|
|
372
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
373
|
+
attr_nodes = person.attribute_nodes
|
|
374
|
+
expect(attr_nodes).to be_an(Array)
|
|
375
|
+
expect(attr_nodes.length).to eq(2)
|
|
376
|
+
expect(attr_nodes.all? { |n| n.is_a?(RXerces::XML::Node) }).to be true
|
|
377
|
+
end
|
|
378
|
+
|
|
379
|
+
it "returns nodes with name and text/value" do
|
|
380
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
381
|
+
attr_nodes = person.attribute_nodes
|
|
382
|
+
|
|
383
|
+
id_node = attr_nodes.find { |n| n.name == 'id' }
|
|
384
|
+
expect(id_node).not_to be_nil
|
|
385
|
+
expect(id_node.text).to eq('1')
|
|
386
|
+
|
|
387
|
+
name_node = attr_nodes.find { |n| n.name == 'name' }
|
|
388
|
+
expect(name_node).not_to be_nil
|
|
389
|
+
expect(name_node.text).to eq('Alice')
|
|
390
|
+
end
|
|
391
|
+
|
|
392
|
+
it "returns empty array for elements without attributes" do
|
|
393
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
394
|
+
age = person.children.find { |n| n.name == 'age' }
|
|
395
|
+
attr_nodes = age.attribute_nodes
|
|
396
|
+
expect(attr_nodes).to be_an(Array)
|
|
397
|
+
expect(attr_nodes).to be_empty
|
|
398
|
+
end
|
|
399
|
+
|
|
400
|
+
it "returns empty array for text nodes" do
|
|
401
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
402
|
+
text_node = person.children.find { |n| n.is_a?(RXerces::XML::Text) }
|
|
403
|
+
attr_nodes = text_node.attribute_nodes
|
|
404
|
+
expect(attr_nodes).to be_an(Array)
|
|
405
|
+
expect(attr_nodes).to be_empty
|
|
406
|
+
end
|
|
407
|
+
end
|
|
408
|
+
|
|
315
409
|
describe "#next_sibling" do
|
|
316
410
|
it "returns the next sibling node" do
|
|
317
411
|
people = root.children.select { |n| n.is_a?(RXerces::XML::Element) }
|
|
@@ -554,25 +648,104 @@ RSpec.describe RXerces::XML::Node do
|
|
|
554
648
|
end
|
|
555
649
|
|
|
556
650
|
context "with nodes from different documents" do
|
|
557
|
-
it "
|
|
651
|
+
it "automatically imports node from different document" do
|
|
558
652
|
doc1 = RXerces::XML::Document.parse('<root><item>one</item></root>')
|
|
559
653
|
doc2 = RXerces::XML::Document.parse('<other><item>two</item></other>')
|
|
560
654
|
|
|
561
655
|
root1 = doc1.root
|
|
562
656
|
item2 = doc2.root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
563
657
|
|
|
658
|
+
# Should not raise an error - should import automatically
|
|
564
659
|
expect {
|
|
565
660
|
root1.add_child(item2)
|
|
566
|
-
}.
|
|
661
|
+
}.not_to raise_error
|
|
662
|
+
|
|
663
|
+
# Verify the node was added
|
|
664
|
+
items = doc1.xpath('//item')
|
|
665
|
+
expect(items.length).to eq(2)
|
|
567
666
|
end
|
|
568
667
|
|
|
569
|
-
it "
|
|
668
|
+
it "imports node with all its children (deep copy)" do
|
|
570
669
|
doc1 = RXerces::XML::Document.parse('<root></root>')
|
|
571
|
-
doc2 = RXerces::XML::Document.parse('<other><child
|
|
670
|
+
doc2 = RXerces::XML::Document.parse('<other><parent><child>text</child></parent></other>')
|
|
572
671
|
|
|
573
|
-
|
|
574
|
-
|
|
575
|
-
|
|
672
|
+
parent_node = doc2.xpath('//parent').first
|
|
673
|
+
|
|
674
|
+
# Add parent node from doc2 to doc1
|
|
675
|
+
doc1.root.add_child(parent_node)
|
|
676
|
+
|
|
677
|
+
# Verify the entire subtree was imported
|
|
678
|
+
expect(doc1.xpath('//parent').length).to eq(1)
|
|
679
|
+
expect(doc1.xpath('//parent/child').length).to eq(1)
|
|
680
|
+
expect(doc1.xpath('//parent/child').first.text).to eq('text')
|
|
681
|
+
end
|
|
682
|
+
|
|
683
|
+
it "preserves node content when importing" do
|
|
684
|
+
doc1 = RXerces::XML::Document.parse('<root></root>')
|
|
685
|
+
doc2 = RXerces::XML::Document.parse('<other><item attr="value">content</item></other>')
|
|
686
|
+
|
|
687
|
+
item = doc2.xpath('//item').first
|
|
688
|
+
doc1.root.add_child(item)
|
|
689
|
+
|
|
690
|
+
imported_item = doc1.xpath('//item').first
|
|
691
|
+
expect(imported_item.text).to eq('content')
|
|
692
|
+
expect(imported_item['attr']).to eq('value')
|
|
693
|
+
end
|
|
694
|
+
|
|
695
|
+
it "does not modify the original document when importing" do
|
|
696
|
+
doc1 = RXerces::XML::Document.parse('<root></root>')
|
|
697
|
+
doc2 = RXerces::XML::Document.parse('<other><item>original</item></other>')
|
|
698
|
+
|
|
699
|
+
item = doc2.xpath('//item').first
|
|
700
|
+
doc1.root.add_child(item)
|
|
701
|
+
|
|
702
|
+
# Original document should still have the item
|
|
703
|
+
# Note: importNode creates a copy, but the Ruby wrapper gets updated
|
|
704
|
+
# to point to the imported node, so we need to re-query doc2
|
|
705
|
+
expect(doc2.xpath('//item').length).to eq(1)
|
|
706
|
+
end
|
|
707
|
+
|
|
708
|
+
it "handles importing complex nested structures" do
|
|
709
|
+
doc1 = RXerces::XML::Document.parse('<root></root>')
|
|
710
|
+
doc2 = RXerces::XML::Document.parse(<<-XML)
|
|
711
|
+
<other>
|
|
712
|
+
<section id="s1">
|
|
713
|
+
<title>Section 1</title>
|
|
714
|
+
<paragraph>Content 1</paragraph>
|
|
715
|
+
<subsection>
|
|
716
|
+
<title>Subsection</title>
|
|
717
|
+
<paragraph>Nested content</paragraph>
|
|
718
|
+
</subsection>
|
|
719
|
+
</section>
|
|
720
|
+
</other>
|
|
721
|
+
XML
|
|
722
|
+
|
|
723
|
+
section = doc2.xpath('//section').first
|
|
724
|
+
doc1.root.add_child(section)
|
|
725
|
+
|
|
726
|
+
# Verify entire structure was imported
|
|
727
|
+
expect(doc1.xpath('//section').length).to eq(1)
|
|
728
|
+
expect(doc1.xpath('//section').first['id']).to eq('s1')
|
|
729
|
+
expect(doc1.xpath('//title').length).to eq(2)
|
|
730
|
+
expect(doc1.xpath('//paragraph').length).to eq(2)
|
|
731
|
+
expect(doc1.xpath('//subsection').length).to eq(1)
|
|
732
|
+
end
|
|
733
|
+
|
|
734
|
+
it "allows importing multiple nodes from different documents" do
|
|
735
|
+
doc1 = RXerces::XML::Document.parse('<root></root>')
|
|
736
|
+
doc2 = RXerces::XML::Document.parse('<other><item>A</item></other>')
|
|
737
|
+
doc3 = RXerces::XML::Document.parse('<another><item>B</item></another>')
|
|
738
|
+
|
|
739
|
+
item2 = doc2.xpath('//item').first
|
|
740
|
+
item3 = doc3.xpath('//item').first
|
|
741
|
+
|
|
742
|
+
doc1.root.add_child(item2)
|
|
743
|
+
doc1.root.add_child(item3)
|
|
744
|
+
|
|
745
|
+
items = doc1.xpath('//item')
|
|
746
|
+
expect(items.length).to eq(2)
|
|
747
|
+
expect(items[0].text).to eq('A')
|
|
748
|
+
expect(items[1].text).to eq('B')
|
|
576
749
|
end
|
|
577
750
|
end
|
|
578
751
|
|
|
@@ -674,6 +847,37 @@ RSpec.describe RXerces::XML::Node do
|
|
|
674
847
|
result = root.xpath('.//age')
|
|
675
848
|
expect(result).to be_a(RXerces::XML::NodeSet)
|
|
676
849
|
end
|
|
850
|
+
|
|
851
|
+
context "edge cases" do
|
|
852
|
+
it "returns empty NodeSet for nonexistent elements" do
|
|
853
|
+
result = root.xpath('.//nonexistent')
|
|
854
|
+
expect(result).to be_a(RXerces::XML::NodeSet)
|
|
855
|
+
expect(result.length).to eq(0)
|
|
856
|
+
end
|
|
857
|
+
|
|
858
|
+
it "raises error for invalid XPath syntax" do
|
|
859
|
+
expect {
|
|
860
|
+
root.xpath('.//[invalid')
|
|
861
|
+
}.to raise_error(ArgumentError, /XPath expression has unbalanced/)
|
|
862
|
+
end
|
|
863
|
+
|
|
864
|
+
it "handles very deep XPath expressions" do
|
|
865
|
+
# Create a deep nesting scenario
|
|
866
|
+
deep_xml = '<root>' + ('<level>' * 50) + '<deep>content</deep>' + ('</level>' * 50) + '</root>'
|
|
867
|
+
deep_doc = RXerces::XML::Document.parse(deep_xml)
|
|
868
|
+
result = deep_doc.xpath('//deep')
|
|
869
|
+
expect(result.length).to eq(1)
|
|
870
|
+
expect(result.first.text).to eq('content')
|
|
871
|
+
end
|
|
872
|
+
|
|
873
|
+
it "handles XPath with special characters in element names" do
|
|
874
|
+
special_xml = '<root><element-name_with.specials>content</element-name_with.specials></root>'
|
|
875
|
+
special_doc = RXerces::XML::Document.parse(special_xml)
|
|
876
|
+
result = special_doc.xpath('//element-name_with.specials')
|
|
877
|
+
expect(result.length).to eq(1)
|
|
878
|
+
expect(result.first.text).to eq('content')
|
|
879
|
+
end
|
|
880
|
+
end
|
|
677
881
|
end
|
|
678
882
|
|
|
679
883
|
describe "#inner_html" do
|
|
@@ -823,23 +1027,7 @@ RSpec.describe RXerces::XML::Node do
|
|
|
823
1027
|
end
|
|
824
1028
|
end
|
|
825
1029
|
|
|
826
|
-
describe "#at_css" do
|
|
827
|
-
# Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
|
|
828
|
-
xalan_available = begin
|
|
829
|
-
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
830
|
-
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
831
|
-
result = test_doc.xpath('//item[@id="1"]')
|
|
832
|
-
result.length == 1
|
|
833
|
-
rescue
|
|
834
|
-
false
|
|
835
|
-
end
|
|
836
|
-
|
|
837
|
-
before(:all) do
|
|
838
|
-
unless xalan_available
|
|
839
|
-
skip "Xalan-C not available - CSS selectors require Xalan-C library"
|
|
840
|
-
end
|
|
841
|
-
end
|
|
842
|
-
|
|
1030
|
+
describe "#at_css", xalan: true do
|
|
843
1031
|
it "is an alias for at (which uses CSS converted to XPath)" do
|
|
844
1032
|
xml = '<root><item class="foo">First</item><item class="bar">Second</item></root>'
|
|
845
1033
|
doc = RXerces::XML::Document.parse(xml)
|
|
@@ -863,23 +1051,7 @@ RSpec.describe RXerces::XML::Node do
|
|
|
863
1051
|
end
|
|
864
1052
|
end
|
|
865
1053
|
|
|
866
|
-
describe "#css" do
|
|
867
|
-
# Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
|
|
868
|
-
xalan_available = begin
|
|
869
|
-
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
870
|
-
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
871
|
-
result = test_doc.xpath('//item[@id="1"]')
|
|
872
|
-
result.length == 1
|
|
873
|
-
rescue
|
|
874
|
-
false
|
|
875
|
-
end
|
|
876
|
-
|
|
877
|
-
before(:all) do
|
|
878
|
-
unless xalan_available
|
|
879
|
-
skip "Xalan-C not available - CSS selectors require Xalan-C library"
|
|
880
|
-
end
|
|
881
|
-
end
|
|
882
|
-
|
|
1054
|
+
describe "#css", xalan: true do
|
|
883
1055
|
let(:xml) do
|
|
884
1056
|
<<-XML
|
|
885
1057
|
<library>
|