RubyGems - rxerces - Versions diffs - 0.3.0 → 0.5.0 - Mend

rxerces 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Files changed (18) hide show

checksums.yaml +4 -4
checksums.yaml.gz.sig +0 -0
data/CHANGES.md +20 -0
data/README.md +40 -5
data/ext/rxerces/extconf.rb +42 -0
data/ext/rxerces/rxerces.cpp +980 -8
data/lib/rxerces/nokogiri.rb +26 -0
data/lib/rxerces/version.rb +1 -1
data/rxerces.gemspec +5 -2
data/spec/document_spec.rb +78 -0
data/spec/node_spec.rb +434 -0
data/spec/nodeset_spec.rb +59 -0
data/spec/nokogiri_compatibility_spec.rb +44 -0
data/spec/rxerces_shared.rb +1 -1
data/spec/xpath_spec.rb +252 -18
data.tar.gz.sig +0 -0
metadata +4 -3
metadata.gz.sig +2 -2

data/spec/nokogiri_compatibility_spec.rb CHANGED Viewed

@@ -37,6 +37,50 @@ RSpec.describe "Nokogiri compatibility" do
     end
   end
+  describe "Nokogiri::HTML" do
+    it "exists" do
+      expect(defined?(Nokogiri::HTML)).to eq('constant')
+    end
+    describe ".parse" do
+      it "parses HTML" do
+        html = '<html><body><h1>Hello</h1></body></html>'
+        doc = Nokogiri::HTML.parse(html)
+        expect(doc).to be_a(RXerces::XML::Document)
+      end
+    end
+  end
+  describe "Nokogiri.HTML" do
+    it "parses HTML" do
+      html = '<html><body><h1>Hello</h1></body></html>'
+      doc = Nokogiri.HTML(html)
+      expect(doc).to be_a(RXerces::XML::Document)
+    end
+  end
+  describe "Nokogiri::HTML class aliases" do
+    it "aliases Document" do
+      expect(Nokogiri::HTML::Document).to eq(RXerces::XML::Document)
+    end
+    it "aliases Node" do
+      expect(Nokogiri::HTML::Node).to eq(RXerces::XML::Node)
+    end
+    it "aliases Element" do
+      expect(Nokogiri::HTML::Element).to eq(RXerces::XML::Element)
+    end
+    it "aliases Text" do
+      expect(Nokogiri::HTML::Text).to eq(RXerces::XML::Text)
+    end
+    it "aliases NodeSet" do
+      expect(Nokogiri::HTML::NodeSet).to eq(RXerces::XML::NodeSet)
+    end
+  end
   describe "Nokogiri::XML::Document" do
     it "is an alias for RXerces::XML::Document" do
       expect(Nokogiri::XML::Document).to eq(RXerces::XML::Document)

data/spec/rxerces_shared.rb CHANGED Viewed

@@ -4,7 +4,7 @@ require 'rxerces'
 RSpec.shared_examples RXerces do
   example 'version number is set to the expected value' do
-    expect(RXerces::VERSION).to eq('0.3.0')
+    expect(RXerces::VERSION).to eq('0.5.0')
     expect(RXerces::VERSION).to be_frozen
   end
 end

data/spec/xpath_spec.rb CHANGED Viewed

@@ -141,24 +141,258 @@ RSpec.describe "XPath support" do
     end
   end
-  describe "XPath limitations" do
-    it "notes that Xerces-C uses XML Schema XPath subset" do
-      # Xerces-C implements the XML Schema XPath subset, not full XPath 1.0
-      # This means the following are NOT supported:
-      # - Attribute predicates like [@id="1"]
-      # - Functions like last(), position(), text()
-      # - Comparison operators in predicates
-      #
-      # However, basic path expressions work well:
-      # - // (descendant-or-self)
-      # - / (child)
-      # - . (self)
-      # - .. (parent)
-      # Basic paths work
-      expect(doc.xpath('//book').length).to eq(3)
-      expect(doc.xpath('/library/book').length).to eq(3)
-      expect(doc.xpath('//title').length).to eq(3)
+  describe "XPath 1.0 compliance with Xalan" do
+    # Check if Xalan support is compiled in
+    xalan_available = begin
+      # Try a feature that only works with Xalan (attribute predicates)
+      test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
+      test_doc = RXerces::XML::Document.parse(test_xml)
+      result = test_doc.xpath('//item[@id="1"]')
+      result.length == 1
+    rescue
+      false
+    end
+    before(:all) do
+      unless xalan_available
+        skip "Xalan-C not available - XPath 1.0 features require Xalan-C library"
+      end
+    end
+    describe "Attribute predicates" do
+      it "finds elements by attribute value" do
+        book = doc.xpath('//book[@id="1"]')
+        expect(book.length).to eq(1)
+        expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
+      end
+      it "finds elements by attribute equality" do
+        fiction_books = doc.xpath('//book[@category="fiction"]')
+        expect(fiction_books.length).to eq(2)
+      end
+      it "finds elements by attribute inequality" do
+        non_fiction = doc.xpath('//book[@category!="fiction"]')
+        expect(non_fiction.length).to eq(1)
+        expect(non_fiction[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
+      end
+      it "supports multiple attribute predicates" do
+        book = doc.xpath('//book[@id="2"][@category="fiction"]')
+        expect(book.length).to eq(1)
+        expect(book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
+      end
+    end
+    describe "Position and indexing functions" do
+      it "uses position() to find first element" do
+        first_book = doc.xpath('//book[position()=1]')
+        expect(first_book.length).to eq(1)
+        expect(first_book[0].xpath('.//title')[0].text.strip).to eq('1984')
+      end
+      it "uses last() to find last element" do
+        last_book = doc.xpath('//book[position()=last()]')
+        expect(last_book.length).to eq(1)
+        expect(last_book[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
+      end
+      it "uses numeric predicates for indexing" do
+        second_book = doc.xpath('//book[2]')
+        expect(second_book.length).to eq(1)
+        expect(second_book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
+      end
+      it "finds elements by position greater than" do
+        later_books = doc.xpath('//book[position()>1]')
+        expect(later_books.length).to eq(2)
+      end
+    end
+    describe "String functions" do
+      it "uses contains() function" do
+        books_with_new = doc.xpath('//book[contains(.//title, "New")]')
+        expect(books_with_new.length).to eq(1)
+        expect(books_with_new[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
+      end
+      it "uses starts-with() function" do
+        books_starting_with_1 = doc.xpath('//book[starts-with(.//title, "1")]')
+        expect(books_starting_with_1.length).to eq(1)
+        expect(books_starting_with_1[0].xpath('.//title')[0].text.strip).to eq('1984')
+      end
+      it "uses normalize-space() function" do
+        # Should find titles even with whitespace differences
+        result = doc.xpath('//title[normalize-space()="1984"]')
+        expect(result.length).to eq(1)
+      end
+      it "uses string-length() function" do
+        # Find books where title length is less than 10 characters
+        short_titles = doc.xpath('//book[string-length(.//title) < 10]')
+        expect(short_titles.length).to eq(2) # "1984" and "Sapiens"
+      end
+      it "uses concat() function" do
+        # This tests that concat works by checking if a book has matching text
+        # concat('19', '84') = '1984'
+        result = doc.xpath('//book[.//title = concat("19", "84")]')
+        expect(result.length).to eq(1)
+      end
+      it "uses substring() function" do
+        # Find books where first 5 chars of title is "Brave"
+        result = doc.xpath('//book[substring(.//title, 1, 5) = "Brave"]')
+        expect(result.length).to eq(1)
+        expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
+      end
+    end
+    describe "Numeric functions and comparisons" do
+      it "uses count() function" do
+        # Find library element that has exactly 3 book children
+        result = doc.xpath('/library[count(book) = 3]')
+        expect(result.length).to eq(1)
+      end
+      it "compares numeric values with >" do
+        expensive_books = doc.xpath('//book[.//price > 15]')
+        expect(expensive_books.length).to eq(2) # 15.99 and 18.99
+      end
+      it "compares numeric values with <" do
+        cheap_books = doc.xpath('//book[.//price < 16]')
+        expect(cheap_books.length).to eq(2) # 15.99 and 14.99
+      end
+      it "compares numeric values with >=" do
+        books_1950_or_later = doc.xpath('//book[.//year >= 1949]')
+        expect(books_1950_or_later.length).to eq(2) # 1949 and 2011
+      end
+      it "uses sum() function" do
+        # sum() returns a number, not a nodeset, so we can't call it directly
+        # Instead, test it within a predicate
+        result = doc.xpath('//library[sum(book/price) > 40]')
+        expect(result.length).to eq(1) # Total is 49.97
+      end
+      it "uses floor() function" do
+        # Find books where floor(price) = 15 (15.99 -> 15)
+        result = doc.xpath('//book[floor(.//price) = 15]')
+        expect(result.length).to eq(1)
+      end
+      it "uses ceiling() function" do
+        # Find books where ceiling(price) = 19 (18.99 -> 19)
+        result = doc.xpath('//book[ceiling(.//price) = 19]')
+        expect(result.length).to eq(1)
+      end
+      it "uses round() function" do
+        # Find books where round(price) = 15 (14.99 -> 15, 15.99 -> 16)
+        result = doc.xpath('//book[round(.//price) = 15]')
+        expect(result.length).to eq(1) # Only 14.99
+      end
+    end
+    describe "Boolean operators" do
+      it "uses 'and' operator" do
+        result = doc.xpath('//book[@category="fiction" and .//year < 1940]')
+        expect(result.length).to eq(1) # Only "Brave New World" (1932)
+      end
+      it "uses 'or' operator" do
+        result = doc.xpath('//book[@id="1" or @id="3"]')
+        expect(result.length).to eq(2)
+      end
+      it "uses 'not()' function" do
+        result = doc.xpath('//book[not(@category="fiction")]')
+        expect(result.length).to eq(1)
+        expect(result[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
+      end
+      it "combines multiple boolean operators" do
+        result = doc.xpath('//book[@category="fiction" and .//price < 15.50]')
+        expect(result.length).to eq(1) # Only "Brave New World" (14.99)
+      end
+    end
+    describe "Axes" do
+      it "uses parent:: axis" do
+        # Find parent of first title
+        first_title = doc.xpath('//title[1]')
+        parent = first_title[0].xpath('parent::*')
+        expect(parent.length).to eq(1)
+        expect(parent[0].name).to eq('book')
+      end
+      it "uses ancestor:: axis" do
+        # Find all ancestors of a title element
+        first_title = doc.xpath('//title[1]')
+        ancestors = first_title[0].xpath('ancestor::*')
+        expect(ancestors.length).to eq(2) # book and library
+      end
+      it "uses following-sibling:: axis" do
+        # Find siblings after title
+        first_title = doc.xpath('//title[1]')
+        siblings = first_title[0].xpath('following-sibling::*')
+        expect(siblings.length).to eq(3) # author, year, price
+      end
+      it "uses preceding-sibling:: axis" do
+        # Find siblings before author
+        first_author = doc.xpath('//author[1]')
+        siblings = first_author[0].xpath('preceding-sibling::*')
+        expect(siblings.length).to eq(1) # title
+      end
+      it "uses descendant:: axis" do
+        root = doc.root
+        descendants = root.xpath('descendant::title')
+        expect(descendants.length).to eq(3)
+      end
+      it "uses self:: axis" do
+        books = doc.xpath('//book')
+        self_nodes = books[0].xpath('self::book')
+        expect(self_nodes.length).to eq(1)
+      end
+    end
+    describe "Complex predicates" do
+      it "chains multiple predicates" do
+        result = doc.xpath('//book[@category="fiction"][.//year > 1940]')
+        expect(result.length).to eq(1) # Only "1984" (1949)
+      end
+      it "uses nested predicates" do
+        result = doc.xpath('//library[book[@category="fiction"]]')
+        expect(result.length).to eq(1)
+      end
+      it "combines functions in predicates" do
+        result = doc.xpath('//book[contains(.//title, "World") and .//year < 1950]')
+        expect(result.length).to eq(1)
+        expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
+      end
+    end
+    describe "Text nodes" do
+      it "selects text nodes with text()" do
+        text_nodes = doc.xpath('//title/text()')
+        expect(text_nodes.length).to eq(3)
+      end
+      it "uses text() in predicates" do
+        result = doc.xpath('//title[text()="1984"]')
+        # Note: text() returns the raw text which includes whitespace
+        # This might not match due to whitespace, so we test it doesn't error
+        expect { result }.not_to raise_error
+      end
     end
   end
 end

data.tar.gz.sig CHANGED Viewed

Binary file

metadata CHANGED Viewed

@@ -1,7 +1,7 @@
 --- !ruby/object:Gem::Specification
 name: rxerces
 version: !ruby/object:Gem::Version
-  version: 0.3.0
+  version: 0.5.0
 platform: ruby
 authors:
 - Daniel J. Berger
@@ -78,8 +78,9 @@ dependencies:
     - - "~>"
       - !ruby/object:Gem::Version
         version: '3.12'
-description: A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
-  instead of libxml2
+description: |2
+      A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
+      instead of libxml2. It also optionally uses Xalan for Xpath 1.0 compliance.
 email: djberg96@gmail.com
 executables: []
 extensions:

metadata.gz.sig CHANGED Viewed

@@ -1,2 +1,2 @@
-Q� �q��o�{�;?��-6��%�:��-ܪB�h�y,�P�U}�#loyp�w��c����M@X+>��]U�.��<z5��a'�m�z]G���1laA�x���w�V�`���b6��U�����s��z��=u��$�6��Vj6��~��x:�����
-O��Е>d~�s��cYs����	������tHgd�%�Y��@TtcԁI��S
+�s43i��Q� ��u����:G�A\��|;��2Mu<~|α��"��
+"H����@�/�P���4|�*�����4].v�z��p��ިzџ���p��s`%˝Piaٌ\J�w���@�}��-_1��5~����d�2:���-"�n�<R�TcA�ѥ��?y�