rxerces 0.4.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -21,6 +21,25 @@ module Nokogiri
21
21
  Schema = RXerces::XML::Schema
22
22
  end
23
23
 
24
+ # Nokogiri-compatible HTML module
25
+ # Since RXerces uses Xerces-C which is an XML parser,
26
+ # HTML parsing delegates to XML parsing
27
+ module HTML
28
+ # Parse HTML from a string - delegates to XML parsing
29
+ # @param string [String] HTML string to parse
30
+ # @return [RXerces::XML::Document] parsed document
31
+ def self.parse(string)
32
+ RXerces::XML::Document.parse(string)
33
+ end
34
+
35
+ # Alias Document class for compatibility
36
+ Document = RXerces::XML::Document
37
+ Node = RXerces::XML::Node
38
+ Element = RXerces::XML::Element
39
+ Text = RXerces::XML::Text
40
+ NodeSet = RXerces::XML::NodeSet
41
+ end
42
+
24
43
  # Top-level parse method for compatibility
25
44
  # @param string [String] XML string to parse
26
45
  # @return [RXerces::XML::Document] parsed document
@@ -28,6 +47,13 @@ module Nokogiri
28
47
  RXerces::XML::Document.parse(string)
29
48
  end
30
49
 
50
+ # Top-level HTML parsing method
51
+ # @param string [String] HTML string to parse
52
+ # @return [RXerces::XML::Document] parsed document
53
+ def self.HTML(string)
54
+ RXerces::XML::Document.parse(string)
55
+ end
56
+
31
57
  class << self
32
58
  alias_method :parse, :XML
33
59
  end
@@ -1,3 +1,3 @@
1
1
  module RXerces
2
- VERSION = "0.4.0".freeze
2
+ VERSION = "0.5.0".freeze
3
3
  end
data/rxerces.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "rxerces"
3
- spec.version = "0.4.0"
3
+ spec.version = "0.5.0"
4
4
  spec.author = "Daniel J. Berger"
5
5
  spec.email = "djberg96@gmail.com"
6
6
  spec.cert_chain = ["certs/djberg96_pub.pem"]
@@ -64,6 +64,65 @@ RSpec.describe RXerces::XML::Document do
64
64
  end
65
65
  end
66
66
 
67
+ describe "#css" do
68
+ # Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
69
+ xalan_available = begin
70
+ test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
71
+ test_doc = RXerces::XML::Document.parse(test_xml)
72
+ result = test_doc.xpath('//item[@id="1"]')
73
+ result.length == 1
74
+ rescue
75
+ false
76
+ end
77
+
78
+ before(:all) do
79
+ unless xalan_available
80
+ skip "Xalan-C not available - CSS selectors require Xalan-C library"
81
+ end
82
+ end
83
+
84
+ let(:xml) do
85
+ <<-XML
86
+ <library>
87
+ <book id="book1" class="fiction">
88
+ <title>1984</title>
89
+ </book>
90
+ <book id="book2" class="non-fiction">
91
+ <title>Sapiens</title>
92
+ </book>
93
+ </library>
94
+ XML
95
+ end
96
+
97
+ let(:doc) { RXerces::XML::Document.parse(xml) }
98
+
99
+ it "returns a NodeSet" do
100
+ result = doc.css('book')
101
+ expect(result).to be_a(RXerces::XML::NodeSet)
102
+ end
103
+
104
+ it "finds elements by tag name" do
105
+ books = doc.css('book')
106
+ expect(books.length).to eq(2)
107
+ end
108
+
109
+ it "finds elements by class" do
110
+ fiction = doc.css('.fiction')
111
+ expect(fiction.length).to eq(1)
112
+ end
113
+
114
+ it "finds elements by id" do
115
+ book = doc.css('#book1')
116
+ expect(book.length).to eq(1)
117
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
118
+ end
119
+
120
+ it "finds elements with combined selectors" do
121
+ fiction_books = doc.css('book.fiction')
122
+ expect(fiction_books.length).to eq(1)
123
+ end
124
+ end
125
+
67
126
  describe "#encoding" do
68
127
  it "returns UTF-8 for documents without explicit encoding" do
69
128
  doc = RXerces::XML::Document.parse(simple_xml)
data/spec/node_spec.rb CHANGED
@@ -110,6 +110,40 @@ RSpec.describe RXerces::XML::Node do
110
110
  end
111
111
  end
112
112
 
113
+ describe "#get_attribute" do
114
+ it "is an alias for []" do
115
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
116
+ expect(person.get_attribute('id')).to eq('1')
117
+ expect(person.get_attribute('name')).to eq('Alice')
118
+ end
119
+ end
120
+
121
+ describe "#attribute" do
122
+ it "is an alias for []" do
123
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
124
+ expect(person.attribute('id')).to eq('1')
125
+ expect(person.attribute('name')).to eq('Alice')
126
+ end
127
+ end
128
+
129
+ describe "#has_attribute?" do
130
+ it "returns true when attribute exists" do
131
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
132
+ expect(person.has_attribute?('id')).to be true
133
+ expect(person.has_attribute?('name')).to be true
134
+ end
135
+
136
+ it "returns false when attribute does not exist" do
137
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
138
+ expect(person.has_attribute?('nonexistent')).to be false
139
+ end
140
+
141
+ it "returns false for non-element nodes" do
142
+ text_node = root.children.find { |n| n.is_a?(RXerces::XML::Text) }
143
+ expect(text_node.has_attribute?('anything')).to be false if text_node
144
+ end
145
+ end
146
+
113
147
  describe "#children" do
114
148
  it "returns an array of child nodes" do
115
149
  children = root.children
@@ -158,6 +192,94 @@ RSpec.describe RXerces::XML::Node do
158
192
  end
159
193
  end
160
194
 
195
+ describe "#ancestors" do
196
+ it "returns an array of ancestor nodes" do
197
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
198
+ age = person.children.find { |n| n.name == 'age' }
199
+ ancestors = age.ancestors
200
+
201
+ expect(ancestors).to be_an(Array)
202
+ expect(ancestors.length).to eq(2)
203
+ expect(ancestors[0].name).to eq('person')
204
+ expect(ancestors[1].name).to eq('root')
205
+ end
206
+
207
+ it "returns ancestors in order from immediate parent to root" do
208
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
209
+ city = person.children.find { |n| n.name == 'city' }
210
+ ancestors = city.ancestors
211
+
212
+ expect(ancestors.map(&:name)).to eq(['person', 'root'])
213
+ end
214
+
215
+ it "returns empty array for root element" do
216
+ ancestors = root.ancestors
217
+ expect(ancestors).to be_an(Array)
218
+ expect(ancestors).to be_empty
219
+ end
220
+
221
+ it "returns only one ancestor for direct children of root" do
222
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
223
+ ancestors = person.ancestors
224
+
225
+ expect(ancestors.length).to eq(1)
226
+ expect(ancestors[0].name).to eq('root')
227
+ end
228
+
229
+ it "does not include the document node in ancestors" do
230
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
231
+ ancestors = person.ancestors
232
+
233
+ expect(ancestors.any? { |a| a.name == '#document' }).to be false
234
+ end
235
+
236
+ context "with selector" do
237
+ # Check if Xalan support is compiled in (selectors require XPath which needs Xalan)
238
+ xalan_available = begin
239
+ test_xml = '<root><item id="1">A</item></root>'
240
+ test_doc = RXerces::XML::Document.parse(test_xml)
241
+ result = test_doc.xpath('//item[@id="1"]')
242
+ result.length == 1
243
+ rescue
244
+ false
245
+ end
246
+
247
+ before(:all) do
248
+ unless xalan_available
249
+ skip "Xalan-C not available - ancestor selectors require Xalan-C library"
250
+ end
251
+ end
252
+
253
+ it "filters ancestors by tag name selector" do
254
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
255
+ city = person.children.find { |n| n.name == 'city' }
256
+ ancestors = city.ancestors('person')
257
+
258
+ expect(ancestors.length).to eq(1)
259
+ expect(ancestors[0].name).to eq('person')
260
+ end
261
+
262
+ it "filters ancestors by CSS class selector" do
263
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
264
+ city = person.children.find { |n| n.name == 'city' }
265
+ person_ancestors = city.ancestors('person[name]')
266
+
267
+ expect(person_ancestors.length).to eq(1)
268
+ expect(person_ancestors[0].name).to eq('person')
269
+ expect(person_ancestors[0]['name']).to eq('Alice')
270
+ end
271
+
272
+ it "returns empty array when no ancestors match selector" do
273
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
274
+ city = person.children.find { |n| n.name == 'city' }
275
+ ancestors = city.ancestors('nonexistent')
276
+
277
+ expect(ancestors).to be_an(Array)
278
+ expect(ancestors).to be_empty
279
+ end
280
+ end
281
+ end
282
+
161
283
  describe "#attributes" do
162
284
  it "returns a hash of attributes" do
163
285
  person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
@@ -281,6 +403,103 @@ RSpec.describe RXerces::XML::Node do
281
403
  end
282
404
  end
283
405
 
406
+ describe "#element_children" do
407
+ it "returns only element children, filtering out text nodes" do
408
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
409
+ element_children = person.element_children
410
+
411
+ expect(element_children).to be_an(Array)
412
+ expect(element_children.all? { |n| n.is_a?(RXerces::XML::Element) }).to be true
413
+ expect(element_children.length).to eq(2) # age and city elements
414
+ expect(element_children.map(&:name)).to match_array(['age', 'city'])
415
+ end
416
+
417
+ it "returns empty array for elements with no element children" do
418
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
419
+ age = person.element_children.find { |n| n.name == 'age' }
420
+ expect(age.element_children).to be_empty
421
+ end
422
+
423
+ it "returns empty array for text nodes" do
424
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
425
+ text_node = person.children.find { |n| n.is_a?(RXerces::XML::Text) }
426
+ expect(text_node.element_children).to be_empty if text_node
427
+ end
428
+ end
429
+
430
+ describe "#elements" do
431
+ it "is an alias for element_children" do
432
+ person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
433
+ expect(person.elements.map(&:name)).to eq(person.element_children.map(&:name))
434
+ expect(person.elements.length).to eq(2)
435
+ end
436
+ end
437
+
438
+ describe "#next_element" do
439
+ it "returns the next element sibling, skipping text nodes" do
440
+ people = root.children.select { |n| n.is_a?(RXerces::XML::Element) }
441
+ first_person = people[0]
442
+ next_element = first_person.next_element
443
+
444
+ expect(next_element).to be_a(RXerces::XML::Element)
445
+ expect(next_element.name).to eq('person')
446
+ expect(next_element['id']).to eq('2')
447
+ end
448
+
449
+ it "returns nil when there is no next element" do
450
+ people = root.children.select { |n| n.is_a?(RXerces::XML::Element) }
451
+ last_person = people.last
452
+ expect(last_person.next_element).to be_nil
453
+ end
454
+
455
+ it "can navigate through all element siblings" do
456
+ first_element = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
457
+ siblings = []
458
+ current = first_element
459
+
460
+ while current
461
+ siblings << current
462
+ current = current.next_element
463
+ end
464
+
465
+ expect(siblings.length).to eq(2)
466
+ expect(siblings[0]['id']).to eq('1')
467
+ expect(siblings[1]['id']).to eq('2')
468
+ end
469
+ end
470
+
471
+ describe "#previous_element" do
472
+ it "returns the previous element sibling, skipping text nodes" do
473
+ people = root.children.select { |n| n.is_a?(RXerces::XML::Element) }
474
+ second_person = people[1]
475
+ prev_element = second_person.previous_element
476
+
477
+ expect(prev_element).to be_a(RXerces::XML::Element)
478
+ expect(prev_element.name).to eq('person')
479
+ expect(prev_element['id']).to eq('1')
480
+ end
481
+
482
+ it "returns nil when there is no previous element" do
483
+ first_element = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
484
+ expect(first_element.previous_element).to be_nil
485
+ end
486
+
487
+ it "can navigate backward through all element siblings" do
488
+ last_element = root.children.select { |n| n.is_a?(RXerces::XML::Element) }.last
489
+ siblings = []
490
+ current = last_element
491
+
492
+ while current
493
+ siblings.unshift(current)
494
+ current = current.previous_element
495
+ end
496
+
497
+ expect(siblings.length).to eq(2)
498
+ expect(siblings[0]['id']).to eq('1')
499
+ expect(siblings[1]['id']).to eq('2')
500
+ end
501
+ end
502
+
284
503
  describe "#add_child" do
285
504
  it "adds a text node from a string" do
286
505
  person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
@@ -547,13 +766,141 @@ RSpec.describe RXerces::XML::Node do
547
766
  end
548
767
  end
549
768
 
769
+ describe "#at_css" do
770
+ # Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
771
+ xalan_available = begin
772
+ test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
773
+ test_doc = RXerces::XML::Document.parse(test_xml)
774
+ result = test_doc.xpath('//item[@id="1"]')
775
+ result.length == 1
776
+ rescue
777
+ false
778
+ end
779
+
780
+ before(:all) do
781
+ unless xalan_available
782
+ skip "Xalan-C not available - CSS selectors require Xalan-C library"
783
+ end
784
+ end
785
+
786
+ it "is an alias for at (which uses CSS converted to XPath)" do
787
+ xml = '<root><item class="foo">First</item><item class="bar">Second</item></root>'
788
+ doc = RXerces::XML::Document.parse(xml)
789
+ result = doc.root.at_css('.foo')
790
+ expect(result).to be_a(RXerces::XML::Element)
791
+ expect(result.text).to eq('First')
792
+ end
793
+
794
+ it "returns the first matching element" do
795
+ xml = '<root><item>A</item><item>B</item></root>'
796
+ doc = RXerces::XML::Document.parse(xml)
797
+ result = doc.root.at_css('item')
798
+ expect(result.text).to eq('A')
799
+ end
800
+
801
+ it "returns nil when no match found" do
802
+ xml = '<root><item>A</item></root>'
803
+ doc = RXerces::XML::Document.parse(xml)
804
+ result = doc.root.at_css('nonexistent')
805
+ expect(result).to be_nil
806
+ end
807
+ end
808
+
550
809
  describe "#css" do
551
- it "raises NotImplementedError for CSS selectors" do
552
- expect { root.css('div.class') }.to raise_error(NotImplementedError, /CSS selectors are not supported/)
810
+ # Check if Xalan support is compiled in (CSS requires XPath which needs Xalan)
811
+ xalan_available = begin
812
+ test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
813
+ test_doc = RXerces::XML::Document.parse(test_xml)
814
+ result = test_doc.xpath('//item[@id="1"]')
815
+ result.length == 1
816
+ rescue
817
+ false
818
+ end
819
+
820
+ before(:all) do
821
+ unless xalan_available
822
+ skip "Xalan-C not available - CSS selectors require Xalan-C library"
823
+ end
824
+ end
825
+
826
+ let(:xml) do
827
+ <<-XML
828
+ <library>
829
+ <book id="book1" class="fiction bestseller">
830
+ <title>1984</title>
831
+ <author>George Orwell</author>
832
+ </book>
833
+ <book id="book2" class="fiction">
834
+ <title>Brave New World</title>
835
+ <author>Aldous Huxley</author>
836
+ </book>
837
+ <book id="book3" class="non-fiction">
838
+ <title>Sapiens</title>
839
+ <author>Yuval Noah Harari</author>
840
+ </book>
841
+ </library>
842
+ XML
843
+ end
844
+
845
+ let(:doc) { RXerces::XML::Document.parse(xml) }
846
+ let(:root) { doc.root }
847
+
848
+ it "finds elements by tag name" do
849
+ books = root.css('book')
850
+ expect(books.length).to eq(3)
851
+ end
852
+
853
+ it "finds elements by class" do
854
+ fiction = root.css('.fiction')
855
+ expect(fiction.length).to eq(2)
856
+ end
857
+
858
+ it "finds elements by id" do
859
+ book = root.css('#book1')
860
+ expect(book.length).to eq(1)
861
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
862
+ end
863
+
864
+ it "finds elements by tag and class" do
865
+ fiction_books = root.css('book.fiction')
866
+ expect(fiction_books.length).to eq(2)
553
867
  end
554
868
 
555
- it "suggests using xpath instead" do
556
- expect { root.css('p') }.to raise_error(NotImplementedError, /Use xpath/)
869
+ it "finds elements by tag and id" do
870
+ book = root.css('book#book2')
871
+ expect(book.length).to eq(1)
872
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
873
+ end
874
+
875
+ it "finds elements with attribute selector" do
876
+ books_with_id = root.css('book[id]')
877
+ expect(books_with_id.length).to eq(3)
878
+ end
879
+
880
+ it "finds elements with attribute value selector" do
881
+ book = root.css('book[id=book3]')
882
+ expect(book.length).to eq(1)
883
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
884
+ end
885
+
886
+ it "handles descendant combinator" do
887
+ titles = root.css('library title')
888
+ expect(titles.length).to eq(3)
889
+ end
890
+
891
+ it "handles child combinator" do
892
+ books = root.css('library > book')
893
+ expect(books.length).to eq(3)
894
+ end
895
+
896
+ it "finds nested elements" do
897
+ authors = root.css('book author')
898
+ expect(authors.length).to eq(3)
899
+ end
900
+
901
+ it "combines multiple selectors" do
902
+ result = root.css('book.fiction title')
903
+ expect(result.length).to eq(2)
557
904
  end
558
905
  end
559
906
  end
data/spec/nodeset_spec.rb CHANGED
@@ -86,6 +86,65 @@ RSpec.describe RXerces::XML::NodeSet do
86
86
  end
87
87
  end
88
88
 
89
+ describe "#first" do
90
+ it "returns the first node" do
91
+ first = nodeset.first
92
+ expect(first).to be_a(RXerces::XML::Element)
93
+ expect(first.text.strip).to eq('First')
94
+ end
95
+
96
+ it "returns nil for empty nodeset" do
97
+ expect(empty_nodeset.first).to be_nil
98
+ end
99
+ end
100
+
101
+ describe "#last" do
102
+ it "returns the last node" do
103
+ last = nodeset.last
104
+ expect(last).to be_a(RXerces::XML::Element)
105
+ expect(last.text.strip).to eq('Third')
106
+ end
107
+
108
+ it "returns nil for empty nodeset" do
109
+ expect(empty_nodeset.last).to be_nil
110
+ end
111
+ end
112
+
113
+ describe "#empty?" do
114
+ it "returns false for non-empty nodeset" do
115
+ expect(nodeset.empty?).to be false
116
+ end
117
+
118
+ it "returns true for empty nodeset" do
119
+ expect(empty_nodeset.empty?).to be true
120
+ end
121
+ end
122
+
123
+ describe "#inner_html" do
124
+ it "returns concatenated inner_html of all nodes" do
125
+ result = nodeset.inner_html
126
+ expect(result).to be_a(String)
127
+ expect(result).to eq('FirstSecondThird')
128
+ end
129
+
130
+ it "returns empty string for empty nodeset" do
131
+ expect(empty_nodeset.inner_html).to eq('')
132
+ end
133
+
134
+ it "includes child elements in inner_html" do
135
+ xml_with_children = <<-XML
136
+ <root>
137
+ <div><span>A</span></div>
138
+ <div><span>B</span></div>
139
+ </root>
140
+ XML
141
+ doc = RXerces::XML::Document.parse(xml_with_children)
142
+ divs = doc.xpath('//div')
143
+ expect(divs.inner_html).to include('<span>A</span>')
144
+ expect(divs.inner_html).to include('<span>B</span>')
145
+ end
146
+ end
147
+
89
148
  it "includes Enumerable" do
90
149
  expect(RXerces::XML::NodeSet.ancestors).to include(Enumerable)
91
150
  end
@@ -37,6 +37,50 @@ RSpec.describe "Nokogiri compatibility" do
37
37
  end
38
38
  end
39
39
 
40
+ describe "Nokogiri::HTML" do
41
+ it "exists" do
42
+ expect(defined?(Nokogiri::HTML)).to eq('constant')
43
+ end
44
+
45
+ describe ".parse" do
46
+ it "parses HTML" do
47
+ html = '<html><body><h1>Hello</h1></body></html>'
48
+ doc = Nokogiri::HTML.parse(html)
49
+ expect(doc).to be_a(RXerces::XML::Document)
50
+ end
51
+ end
52
+ end
53
+
54
+ describe "Nokogiri.HTML" do
55
+ it "parses HTML" do
56
+ html = '<html><body><h1>Hello</h1></body></html>'
57
+ doc = Nokogiri.HTML(html)
58
+ expect(doc).to be_a(RXerces::XML::Document)
59
+ end
60
+ end
61
+
62
+ describe "Nokogiri::HTML class aliases" do
63
+ it "aliases Document" do
64
+ expect(Nokogiri::HTML::Document).to eq(RXerces::XML::Document)
65
+ end
66
+
67
+ it "aliases Node" do
68
+ expect(Nokogiri::HTML::Node).to eq(RXerces::XML::Node)
69
+ end
70
+
71
+ it "aliases Element" do
72
+ expect(Nokogiri::HTML::Element).to eq(RXerces::XML::Element)
73
+ end
74
+
75
+ it "aliases Text" do
76
+ expect(Nokogiri::HTML::Text).to eq(RXerces::XML::Text)
77
+ end
78
+
79
+ it "aliases NodeSet" do
80
+ expect(Nokogiri::HTML::NodeSet).to eq(RXerces::XML::NodeSet)
81
+ end
82
+ end
83
+
40
84
  describe "Nokogiri::XML::Document" do
41
85
  it "is an alias for RXerces::XML::Document" do
42
86
  expect(Nokogiri::XML::Document).to eq(RXerces::XML::Document)
@@ -4,7 +4,7 @@ require 'rxerces'
4
4
 
5
5
  RSpec.shared_examples RXerces do
6
6
  example 'version number is set to the expected value' do
7
- expect(RXerces::VERSION).to eq('0.4.0')
7
+ expect(RXerces::VERSION).to eq('0.5.0')
8
8
  expect(RXerces::VERSION).to be_frozen
9
9
  end
10
10
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rxerces
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.4.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel J. Berger
metadata.gz.sig CHANGED
Binary file