rxerces 0.4.0 → 0.5.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +12 -0
- data/README.md +14 -3
- data/ext/rxerces/rxerces.cpp +770 -3
- data/lib/rxerces/nokogiri.rb +26 -0
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +1 -1
- data/spec/document_spec.rb +59 -0
- data/spec/node_spec.rb +351 -4
- data/spec/nodeset_spec.rb +59 -0
- data/spec/nokogiri_compatibility_spec.rb +44 -0
- data/spec/rxerces_shared.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +1 -1
- metadata.gz.sig +0 -0
data/lib/rxerces/nokogiri.rb
CHANGED
|
@@ -21,6 +21,25 @@ module Nokogiri
|
|
|
21
21
|
Schema = RXerces::XML::Schema
|
|
22
22
|
end
|
|
23
23
|
|
|
24
|
+
# Nokogiri-compatible HTML module
|
|
25
|
+
# Since RXerces uses Xerces-C which is an XML parser,
|
|
26
|
+
# HTML parsing delegates to XML parsing
|
|
27
|
+
module HTML
|
|
28
|
+
# Parse HTML from a string - delegates to XML parsing
|
|
29
|
+
# @param string [String] HTML string to parse
|
|
30
|
+
# @return [RXerces::XML::Document] parsed document
|
|
31
|
+
def self.parse(string)
|
|
32
|
+
RXerces::XML::Document.parse(string)
|
|
33
|
+
end
|
|
34
|
+
|
|
35
|
+
# Alias Document class for compatibility
|
|
36
|
+
Document = RXerces::XML::Document
|
|
37
|
+
Node = RXerces::XML::Node
|
|
38
|
+
Element = RXerces::XML::Element
|
|
39
|
+
Text = RXerces::XML::Text
|
|
40
|
+
NodeSet = RXerces::XML::NodeSet
|
|
41
|
+
end
|
|
42
|
+
|
|
24
43
|
# Top-level parse method for compatibility
|
|
25
44
|
# @param string [String] XML string to parse
|
|
26
45
|
# @return [RXerces::XML::Document] parsed document
|
|
@@ -28,6 +47,13 @@ module Nokogiri
|
|
|
28
47
|
RXerces::XML::Document.parse(string)
|
|
29
48
|
end
|
|
30
49
|
|
|
50
|
+
# Top-level HTML parsing method
|
|
51
|
+
# @param string [String] HTML string to parse
|
|
52
|
+
# @return [RXerces::XML::Document] parsed document
|
|
53
|
+
def self.HTML(string)
|
|
54
|
+
RXerces::XML::Document.parse(string)
|
|
55
|
+
end
|
|
56
|
+
|
|
31
57
|
class << self
|
|
32
58
|
alias_method :parse, :XML
|
|
33
59
|
end
|
data/lib/rxerces/version.rb
CHANGED
data/rxerces.gemspec
CHANGED
data/spec/document_spec.rb
CHANGED
|
@@ -64,6 +64,65 @@ RSpec.describe RXerces::XML::Document do
|
|
|
64
64
|
end
|
|
65
65
|
end
|
|
66
66
|
|
|
67
|
+
describe "#css" do
|
|
68
|
+
# Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
|
|
69
|
+
xalan_available = begin
|
|
70
|
+
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
71
|
+
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
72
|
+
result = test_doc.xpath('//item[@id="1"]')
|
|
73
|
+
result.length == 1
|
|
74
|
+
rescue
|
|
75
|
+
false
|
|
76
|
+
end
|
|
77
|
+
|
|
78
|
+
before(:all) do
|
|
79
|
+
unless xalan_available
|
|
80
|
+
skip "Xalan-C not available - CSS selectors require Xalan-C library"
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
84
|
+
let(:xml) do
|
|
85
|
+
<<-XML
|
|
86
|
+
<library>
|
|
87
|
+
<book id="book1" class="fiction">
|
|
88
|
+
<title>1984</title>
|
|
89
|
+
</book>
|
|
90
|
+
<book id="book2" class="non-fiction">
|
|
91
|
+
<title>Sapiens</title>
|
|
92
|
+
</book>
|
|
93
|
+
</library>
|
|
94
|
+
XML
|
|
95
|
+
end
|
|
96
|
+
|
|
97
|
+
let(:doc) { RXerces::XML::Document.parse(xml) }
|
|
98
|
+
|
|
99
|
+
it "returns a NodeSet" do
|
|
100
|
+
result = doc.css('book')
|
|
101
|
+
expect(result).to be_a(RXerces::XML::NodeSet)
|
|
102
|
+
end
|
|
103
|
+
|
|
104
|
+
it "finds elements by tag name" do
|
|
105
|
+
books = doc.css('book')
|
|
106
|
+
expect(books.length).to eq(2)
|
|
107
|
+
end
|
|
108
|
+
|
|
109
|
+
it "finds elements by class" do
|
|
110
|
+
fiction = doc.css('.fiction')
|
|
111
|
+
expect(fiction.length).to eq(1)
|
|
112
|
+
end
|
|
113
|
+
|
|
114
|
+
it "finds elements by id" do
|
|
115
|
+
book = doc.css('#book1')
|
|
116
|
+
expect(book.length).to eq(1)
|
|
117
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
118
|
+
end
|
|
119
|
+
|
|
120
|
+
it "finds elements with combined selectors" do
|
|
121
|
+
fiction_books = doc.css('book.fiction')
|
|
122
|
+
expect(fiction_books.length).to eq(1)
|
|
123
|
+
end
|
|
124
|
+
end
|
|
125
|
+
|
|
67
126
|
describe "#encoding" do
|
|
68
127
|
it "returns UTF-8 for documents without explicit encoding" do
|
|
69
128
|
doc = RXerces::XML::Document.parse(simple_xml)
|
data/spec/node_spec.rb
CHANGED
|
@@ -110,6 +110,40 @@ RSpec.describe RXerces::XML::Node do
|
|
|
110
110
|
end
|
|
111
111
|
end
|
|
112
112
|
|
|
113
|
+
describe "#get_attribute" do
|
|
114
|
+
it "is an alias for []" do
|
|
115
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
116
|
+
expect(person.get_attribute('id')).to eq('1')
|
|
117
|
+
expect(person.get_attribute('name')).to eq('Alice')
|
|
118
|
+
end
|
|
119
|
+
end
|
|
120
|
+
|
|
121
|
+
describe "#attribute" do
|
|
122
|
+
it "is an alias for []" do
|
|
123
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
124
|
+
expect(person.attribute('id')).to eq('1')
|
|
125
|
+
expect(person.attribute('name')).to eq('Alice')
|
|
126
|
+
end
|
|
127
|
+
end
|
|
128
|
+
|
|
129
|
+
describe "#has_attribute?" do
|
|
130
|
+
it "returns true when attribute exists" do
|
|
131
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
132
|
+
expect(person.has_attribute?('id')).to be true
|
|
133
|
+
expect(person.has_attribute?('name')).to be true
|
|
134
|
+
end
|
|
135
|
+
|
|
136
|
+
it "returns false when attribute does not exist" do
|
|
137
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
138
|
+
expect(person.has_attribute?('nonexistent')).to be false
|
|
139
|
+
end
|
|
140
|
+
|
|
141
|
+
it "returns false for non-element nodes" do
|
|
142
|
+
text_node = root.children.find { |n| n.is_a?(RXerces::XML::Text) }
|
|
143
|
+
expect(text_node.has_attribute?('anything')).to be false if text_node
|
|
144
|
+
end
|
|
145
|
+
end
|
|
146
|
+
|
|
113
147
|
describe "#children" do
|
|
114
148
|
it "returns an array of child nodes" do
|
|
115
149
|
children = root.children
|
|
@@ -158,6 +192,94 @@ RSpec.describe RXerces::XML::Node do
|
|
|
158
192
|
end
|
|
159
193
|
end
|
|
160
194
|
|
|
195
|
+
describe "#ancestors" do
|
|
196
|
+
it "returns an array of ancestor nodes" do
|
|
197
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
198
|
+
age = person.children.find { |n| n.name == 'age' }
|
|
199
|
+
ancestors = age.ancestors
|
|
200
|
+
|
|
201
|
+
expect(ancestors).to be_an(Array)
|
|
202
|
+
expect(ancestors.length).to eq(2)
|
|
203
|
+
expect(ancestors[0].name).to eq('person')
|
|
204
|
+
expect(ancestors[1].name).to eq('root')
|
|
205
|
+
end
|
|
206
|
+
|
|
207
|
+
it "returns ancestors in order from immediate parent to root" do
|
|
208
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
209
|
+
city = person.children.find { |n| n.name == 'city' }
|
|
210
|
+
ancestors = city.ancestors
|
|
211
|
+
|
|
212
|
+
expect(ancestors.map(&:name)).to eq(['person', 'root'])
|
|
213
|
+
end
|
|
214
|
+
|
|
215
|
+
it "returns empty array for root element" do
|
|
216
|
+
ancestors = root.ancestors
|
|
217
|
+
expect(ancestors).to be_an(Array)
|
|
218
|
+
expect(ancestors).to be_empty
|
|
219
|
+
end
|
|
220
|
+
|
|
221
|
+
it "returns only one ancestor for direct children of root" do
|
|
222
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
223
|
+
ancestors = person.ancestors
|
|
224
|
+
|
|
225
|
+
expect(ancestors.length).to eq(1)
|
|
226
|
+
expect(ancestors[0].name).to eq('root')
|
|
227
|
+
end
|
|
228
|
+
|
|
229
|
+
it "does not include the document node in ancestors" do
|
|
230
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
231
|
+
ancestors = person.ancestors
|
|
232
|
+
|
|
233
|
+
expect(ancestors.any? { |a| a.name == '#document' }).to be false
|
|
234
|
+
end
|
|
235
|
+
|
|
236
|
+
context "with selector" do
|
|
237
|
+
# Check if Xalan support is compiled in (selectors require XPath which needs Xalan)
|
|
238
|
+
xalan_available = begin
|
|
239
|
+
test_xml = '<root><item id="1">A</item></root>'
|
|
240
|
+
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
241
|
+
result = test_doc.xpath('//item[@id="1"]')
|
|
242
|
+
result.length == 1
|
|
243
|
+
rescue
|
|
244
|
+
false
|
|
245
|
+
end
|
|
246
|
+
|
|
247
|
+
before(:all) do
|
|
248
|
+
unless xalan_available
|
|
249
|
+
skip "Xalan-C not available - ancestor selectors require Xalan-C library"
|
|
250
|
+
end
|
|
251
|
+
end
|
|
252
|
+
|
|
253
|
+
it "filters ancestors by tag name selector" do
|
|
254
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
255
|
+
city = person.children.find { |n| n.name == 'city' }
|
|
256
|
+
ancestors = city.ancestors('person')
|
|
257
|
+
|
|
258
|
+
expect(ancestors.length).to eq(1)
|
|
259
|
+
expect(ancestors[0].name).to eq('person')
|
|
260
|
+
end
|
|
261
|
+
|
|
262
|
+
it "filters ancestors by CSS class selector" do
|
|
263
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
264
|
+
city = person.children.find { |n| n.name == 'city' }
|
|
265
|
+
person_ancestors = city.ancestors('person[name]')
|
|
266
|
+
|
|
267
|
+
expect(person_ancestors.length).to eq(1)
|
|
268
|
+
expect(person_ancestors[0].name).to eq('person')
|
|
269
|
+
expect(person_ancestors[0]['name']).to eq('Alice')
|
|
270
|
+
end
|
|
271
|
+
|
|
272
|
+
it "returns empty array when no ancestors match selector" do
|
|
273
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
274
|
+
city = person.children.find { |n| n.name == 'city' }
|
|
275
|
+
ancestors = city.ancestors('nonexistent')
|
|
276
|
+
|
|
277
|
+
expect(ancestors).to be_an(Array)
|
|
278
|
+
expect(ancestors).to be_empty
|
|
279
|
+
end
|
|
280
|
+
end
|
|
281
|
+
end
|
|
282
|
+
|
|
161
283
|
describe "#attributes" do
|
|
162
284
|
it "returns a hash of attributes" do
|
|
163
285
|
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
@@ -281,6 +403,103 @@ RSpec.describe RXerces::XML::Node do
|
|
|
281
403
|
end
|
|
282
404
|
end
|
|
283
405
|
|
|
406
|
+
describe "#element_children" do
|
|
407
|
+
it "returns only element children, filtering out text nodes" do
|
|
408
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
409
|
+
element_children = person.element_children
|
|
410
|
+
|
|
411
|
+
expect(element_children).to be_an(Array)
|
|
412
|
+
expect(element_children.all? { |n| n.is_a?(RXerces::XML::Element) }).to be true
|
|
413
|
+
expect(element_children.length).to eq(2) # age and city elements
|
|
414
|
+
expect(element_children.map(&:name)).to match_array(['age', 'city'])
|
|
415
|
+
end
|
|
416
|
+
|
|
417
|
+
it "returns empty array for elements with no element children" do
|
|
418
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
419
|
+
age = person.element_children.find { |n| n.name == 'age' }
|
|
420
|
+
expect(age.element_children).to be_empty
|
|
421
|
+
end
|
|
422
|
+
|
|
423
|
+
it "returns empty array for text nodes" do
|
|
424
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
425
|
+
text_node = person.children.find { |n| n.is_a?(RXerces::XML::Text) }
|
|
426
|
+
expect(text_node.element_children).to be_empty if text_node
|
|
427
|
+
end
|
|
428
|
+
end
|
|
429
|
+
|
|
430
|
+
describe "#elements" do
|
|
431
|
+
it "is an alias for element_children" do
|
|
432
|
+
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
433
|
+
expect(person.elements.map(&:name)).to eq(person.element_children.map(&:name))
|
|
434
|
+
expect(person.elements.length).to eq(2)
|
|
435
|
+
end
|
|
436
|
+
end
|
|
437
|
+
|
|
438
|
+
describe "#next_element" do
|
|
439
|
+
it "returns the next element sibling, skipping text nodes" do
|
|
440
|
+
people = root.children.select { |n| n.is_a?(RXerces::XML::Element) }
|
|
441
|
+
first_person = people[0]
|
|
442
|
+
next_element = first_person.next_element
|
|
443
|
+
|
|
444
|
+
expect(next_element).to be_a(RXerces::XML::Element)
|
|
445
|
+
expect(next_element.name).to eq('person')
|
|
446
|
+
expect(next_element['id']).to eq('2')
|
|
447
|
+
end
|
|
448
|
+
|
|
449
|
+
it "returns nil when there is no next element" do
|
|
450
|
+
people = root.children.select { |n| n.is_a?(RXerces::XML::Element) }
|
|
451
|
+
last_person = people.last
|
|
452
|
+
expect(last_person.next_element).to be_nil
|
|
453
|
+
end
|
|
454
|
+
|
|
455
|
+
it "can navigate through all element siblings" do
|
|
456
|
+
first_element = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
457
|
+
siblings = []
|
|
458
|
+
current = first_element
|
|
459
|
+
|
|
460
|
+
while current
|
|
461
|
+
siblings << current
|
|
462
|
+
current = current.next_element
|
|
463
|
+
end
|
|
464
|
+
|
|
465
|
+
expect(siblings.length).to eq(2)
|
|
466
|
+
expect(siblings[0]['id']).to eq('1')
|
|
467
|
+
expect(siblings[1]['id']).to eq('2')
|
|
468
|
+
end
|
|
469
|
+
end
|
|
470
|
+
|
|
471
|
+
describe "#previous_element" do
|
|
472
|
+
it "returns the previous element sibling, skipping text nodes" do
|
|
473
|
+
people = root.children.select { |n| n.is_a?(RXerces::XML::Element) }
|
|
474
|
+
second_person = people[1]
|
|
475
|
+
prev_element = second_person.previous_element
|
|
476
|
+
|
|
477
|
+
expect(prev_element).to be_a(RXerces::XML::Element)
|
|
478
|
+
expect(prev_element.name).to eq('person')
|
|
479
|
+
expect(prev_element['id']).to eq('1')
|
|
480
|
+
end
|
|
481
|
+
|
|
482
|
+
it "returns nil when there is no previous element" do
|
|
483
|
+
first_element = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
484
|
+
expect(first_element.previous_element).to be_nil
|
|
485
|
+
end
|
|
486
|
+
|
|
487
|
+
it "can navigate backward through all element siblings" do
|
|
488
|
+
last_element = root.children.select { |n| n.is_a?(RXerces::XML::Element) }.last
|
|
489
|
+
siblings = []
|
|
490
|
+
current = last_element
|
|
491
|
+
|
|
492
|
+
while current
|
|
493
|
+
siblings.unshift(current)
|
|
494
|
+
current = current.previous_element
|
|
495
|
+
end
|
|
496
|
+
|
|
497
|
+
expect(siblings.length).to eq(2)
|
|
498
|
+
expect(siblings[0]['id']).to eq('1')
|
|
499
|
+
expect(siblings[1]['id']).to eq('2')
|
|
500
|
+
end
|
|
501
|
+
end
|
|
502
|
+
|
|
284
503
|
describe "#add_child" do
|
|
285
504
|
it "adds a text node from a string" do
|
|
286
505
|
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
@@ -547,13 +766,141 @@ RSpec.describe RXerces::XML::Node do
|
|
|
547
766
|
end
|
|
548
767
|
end
|
|
549
768
|
|
|
769
|
+
describe "#at_css" do
|
|
770
|
+
# Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
|
|
771
|
+
xalan_available = begin
|
|
772
|
+
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
773
|
+
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
774
|
+
result = test_doc.xpath('//item[@id="1"]')
|
|
775
|
+
result.length == 1
|
|
776
|
+
rescue
|
|
777
|
+
false
|
|
778
|
+
end
|
|
779
|
+
|
|
780
|
+
before(:all) do
|
|
781
|
+
unless xalan_available
|
|
782
|
+
skip "Xalan-C not available - CSS selectors require Xalan-C library"
|
|
783
|
+
end
|
|
784
|
+
end
|
|
785
|
+
|
|
786
|
+
it "is an alias for at (which uses CSS converted to XPath)" do
|
|
787
|
+
xml = '<root><item class="foo">First</item><item class="bar">Second</item></root>'
|
|
788
|
+
doc = RXerces::XML::Document.parse(xml)
|
|
789
|
+
result = doc.root.at_css('.foo')
|
|
790
|
+
expect(result).to be_a(RXerces::XML::Element)
|
|
791
|
+
expect(result.text).to eq('First')
|
|
792
|
+
end
|
|
793
|
+
|
|
794
|
+
it "returns the first matching element" do
|
|
795
|
+
xml = '<root><item>A</item><item>B</item></root>'
|
|
796
|
+
doc = RXerces::XML::Document.parse(xml)
|
|
797
|
+
result = doc.root.at_css('item')
|
|
798
|
+
expect(result.text).to eq('A')
|
|
799
|
+
end
|
|
800
|
+
|
|
801
|
+
it "returns nil when no match found" do
|
|
802
|
+
xml = '<root><item>A</item></root>'
|
|
803
|
+
doc = RXerces::XML::Document.parse(xml)
|
|
804
|
+
result = doc.root.at_css('nonexistent')
|
|
805
|
+
expect(result).to be_nil
|
|
806
|
+
end
|
|
807
|
+
end
|
|
808
|
+
|
|
550
809
|
describe "#css" do
|
|
551
|
-
|
|
552
|
-
|
|
810
|
+
# Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
|
|
811
|
+
xalan_available = begin
|
|
812
|
+
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
813
|
+
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
814
|
+
result = test_doc.xpath('//item[@id="1"]')
|
|
815
|
+
result.length == 1
|
|
816
|
+
rescue
|
|
817
|
+
false
|
|
818
|
+
end
|
|
819
|
+
|
|
820
|
+
before(:all) do
|
|
821
|
+
unless xalan_available
|
|
822
|
+
skip "Xalan-C not available - CSS selectors require Xalan-C library"
|
|
823
|
+
end
|
|
824
|
+
end
|
|
825
|
+
|
|
826
|
+
let(:xml) do
|
|
827
|
+
<<-XML
|
|
828
|
+
<library>
|
|
829
|
+
<book id="book1" class="fiction bestseller">
|
|
830
|
+
<title>1984</title>
|
|
831
|
+
<author>George Orwell</author>
|
|
832
|
+
</book>
|
|
833
|
+
<book id="book2" class="fiction">
|
|
834
|
+
<title>Brave New World</title>
|
|
835
|
+
<author>Aldous Huxley</author>
|
|
836
|
+
</book>
|
|
837
|
+
<book id="book3" class="non-fiction">
|
|
838
|
+
<title>Sapiens</title>
|
|
839
|
+
<author>Yuval Noah Harari</author>
|
|
840
|
+
</book>
|
|
841
|
+
</library>
|
|
842
|
+
XML
|
|
843
|
+
end
|
|
844
|
+
|
|
845
|
+
let(:doc) { RXerces::XML::Document.parse(xml) }
|
|
846
|
+
let(:root) { doc.root }
|
|
847
|
+
|
|
848
|
+
it "finds elements by tag name" do
|
|
849
|
+
books = root.css('book')
|
|
850
|
+
expect(books.length).to eq(3)
|
|
851
|
+
end
|
|
852
|
+
|
|
853
|
+
it "finds elements by class" do
|
|
854
|
+
fiction = root.css('.fiction')
|
|
855
|
+
expect(fiction.length).to eq(2)
|
|
856
|
+
end
|
|
857
|
+
|
|
858
|
+
it "finds elements by id" do
|
|
859
|
+
book = root.css('#book1')
|
|
860
|
+
expect(book.length).to eq(1)
|
|
861
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
862
|
+
end
|
|
863
|
+
|
|
864
|
+
it "finds elements by tag and class" do
|
|
865
|
+
fiction_books = root.css('book.fiction')
|
|
866
|
+
expect(fiction_books.length).to eq(2)
|
|
553
867
|
end
|
|
554
868
|
|
|
555
|
-
it "
|
|
556
|
-
|
|
869
|
+
it "finds elements by tag and id" do
|
|
870
|
+
book = root.css('book#book2')
|
|
871
|
+
expect(book.length).to eq(1)
|
|
872
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
873
|
+
end
|
|
874
|
+
|
|
875
|
+
it "finds elements with attribute selector" do
|
|
876
|
+
books_with_id = root.css('book[id]')
|
|
877
|
+
expect(books_with_id.length).to eq(3)
|
|
878
|
+
end
|
|
879
|
+
|
|
880
|
+
it "finds elements with attribute value selector" do
|
|
881
|
+
book = root.css('book[id=book3]')
|
|
882
|
+
expect(book.length).to eq(1)
|
|
883
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
|
|
884
|
+
end
|
|
885
|
+
|
|
886
|
+
it "handles descendant combinator" do
|
|
887
|
+
titles = root.css('library title')
|
|
888
|
+
expect(titles.length).to eq(3)
|
|
889
|
+
end
|
|
890
|
+
|
|
891
|
+
it "handles child combinator" do
|
|
892
|
+
books = root.css('library > book')
|
|
893
|
+
expect(books.length).to eq(3)
|
|
894
|
+
end
|
|
895
|
+
|
|
896
|
+
it "finds nested elements" do
|
|
897
|
+
authors = root.css('book author')
|
|
898
|
+
expect(authors.length).to eq(3)
|
|
899
|
+
end
|
|
900
|
+
|
|
901
|
+
it "combines multiple selectors" do
|
|
902
|
+
result = root.css('book.fiction title')
|
|
903
|
+
expect(result.length).to eq(2)
|
|
557
904
|
end
|
|
558
905
|
end
|
|
559
906
|
end
|
data/spec/nodeset_spec.rb
CHANGED
|
@@ -86,6 +86,65 @@ RSpec.describe RXerces::XML::NodeSet do
|
|
|
86
86
|
end
|
|
87
87
|
end
|
|
88
88
|
|
|
89
|
+
describe "#first" do
|
|
90
|
+
it "returns the first node" do
|
|
91
|
+
first = nodeset.first
|
|
92
|
+
expect(first).to be_a(RXerces::XML::Element)
|
|
93
|
+
expect(first.text.strip).to eq('First')
|
|
94
|
+
end
|
|
95
|
+
|
|
96
|
+
it "returns nil for empty nodeset" do
|
|
97
|
+
expect(empty_nodeset.first).to be_nil
|
|
98
|
+
end
|
|
99
|
+
end
|
|
100
|
+
|
|
101
|
+
describe "#last" do
|
|
102
|
+
it "returns the last node" do
|
|
103
|
+
last = nodeset.last
|
|
104
|
+
expect(last).to be_a(RXerces::XML::Element)
|
|
105
|
+
expect(last.text.strip).to eq('Third')
|
|
106
|
+
end
|
|
107
|
+
|
|
108
|
+
it "returns nil for empty nodeset" do
|
|
109
|
+
expect(empty_nodeset.last).to be_nil
|
|
110
|
+
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
describe "#empty?" do
|
|
114
|
+
it "returns false for non-empty nodeset" do
|
|
115
|
+
expect(nodeset.empty?).to be false
|
|
116
|
+
end
|
|
117
|
+
|
|
118
|
+
it "returns true for empty nodeset" do
|
|
119
|
+
expect(empty_nodeset.empty?).to be true
|
|
120
|
+
end
|
|
121
|
+
end
|
|
122
|
+
|
|
123
|
+
describe "#inner_html" do
|
|
124
|
+
it "returns concatenated inner_html of all nodes" do
|
|
125
|
+
result = nodeset.inner_html
|
|
126
|
+
expect(result).to be_a(String)
|
|
127
|
+
expect(result).to eq('FirstSecondThird')
|
|
128
|
+
end
|
|
129
|
+
|
|
130
|
+
it "returns empty string for empty nodeset" do
|
|
131
|
+
expect(empty_nodeset.inner_html).to eq('')
|
|
132
|
+
end
|
|
133
|
+
|
|
134
|
+
it "includes child elements in inner_html" do
|
|
135
|
+
xml_with_children = <<-XML
|
|
136
|
+
<root>
|
|
137
|
+
<div><span>A</span></div>
|
|
138
|
+
<div><span>B</span></div>
|
|
139
|
+
</root>
|
|
140
|
+
XML
|
|
141
|
+
doc = RXerces::XML::Document.parse(xml_with_children)
|
|
142
|
+
divs = doc.xpath('//div')
|
|
143
|
+
expect(divs.inner_html).to include('<span>A</span>')
|
|
144
|
+
expect(divs.inner_html).to include('<span>B</span>')
|
|
145
|
+
end
|
|
146
|
+
end
|
|
147
|
+
|
|
89
148
|
it "includes Enumerable" do
|
|
90
149
|
expect(RXerces::XML::NodeSet.ancestors).to include(Enumerable)
|
|
91
150
|
end
|
|
@@ -37,6 +37,50 @@ RSpec.describe "Nokogiri compatibility" do
|
|
|
37
37
|
end
|
|
38
38
|
end
|
|
39
39
|
|
|
40
|
+
describe "Nokogiri::HTML" do
|
|
41
|
+
it "exists" do
|
|
42
|
+
expect(defined?(Nokogiri::HTML)).to eq('constant')
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
describe ".parse" do
|
|
46
|
+
it "parses HTML" do
|
|
47
|
+
html = '<html><body><h1>Hello</h1></body></html>'
|
|
48
|
+
doc = Nokogiri::HTML.parse(html)
|
|
49
|
+
expect(doc).to be_a(RXerces::XML::Document)
|
|
50
|
+
end
|
|
51
|
+
end
|
|
52
|
+
end
|
|
53
|
+
|
|
54
|
+
describe "Nokogiri.HTML" do
|
|
55
|
+
it "parses HTML" do
|
|
56
|
+
html = '<html><body><h1>Hello</h1></body></html>'
|
|
57
|
+
doc = Nokogiri.HTML(html)
|
|
58
|
+
expect(doc).to be_a(RXerces::XML::Document)
|
|
59
|
+
end
|
|
60
|
+
end
|
|
61
|
+
|
|
62
|
+
describe "Nokogiri::HTML class aliases" do
|
|
63
|
+
it "aliases Document" do
|
|
64
|
+
expect(Nokogiri::HTML::Document).to eq(RXerces::XML::Document)
|
|
65
|
+
end
|
|
66
|
+
|
|
67
|
+
it "aliases Node" do
|
|
68
|
+
expect(Nokogiri::HTML::Node).to eq(RXerces::XML::Node)
|
|
69
|
+
end
|
|
70
|
+
|
|
71
|
+
it "aliases Element" do
|
|
72
|
+
expect(Nokogiri::HTML::Element).to eq(RXerces::XML::Element)
|
|
73
|
+
end
|
|
74
|
+
|
|
75
|
+
it "aliases Text" do
|
|
76
|
+
expect(Nokogiri::HTML::Text).to eq(RXerces::XML::Text)
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "aliases NodeSet" do
|
|
80
|
+
expect(Nokogiri::HTML::NodeSet).to eq(RXerces::XML::NodeSet)
|
|
81
|
+
end
|
|
82
|
+
end
|
|
83
|
+
|
|
40
84
|
describe "Nokogiri::XML::Document" do
|
|
41
85
|
it "is an alias for RXerces::XML::Document" do
|
|
42
86
|
expect(Nokogiri::XML::Document).to eq(RXerces::XML::Document)
|
data/spec/rxerces_shared.rb
CHANGED
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
metadata.gz.sig
CHANGED
|
Binary file
|