rxerces 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -37,6 +37,50 @@ RSpec.describe "Nokogiri compatibility" do
37
37
  end
38
38
  end
39
39
 
40
+ describe "Nokogiri::HTML" do
41
+ it "exists" do
42
+ expect(defined?(Nokogiri::HTML)).to eq('constant')
43
+ end
44
+
45
+ describe ".parse" do
46
+ it "parses HTML" do
47
+ html = '<html><body><h1>Hello</h1></body></html>'
48
+ doc = Nokogiri::HTML.parse(html)
49
+ expect(doc).to be_a(RXerces::XML::Document)
50
+ end
51
+ end
52
+ end
53
+
54
+ describe "Nokogiri.HTML" do
55
+ it "parses HTML" do
56
+ html = '<html><body><h1>Hello</h1></body></html>'
57
+ doc = Nokogiri.HTML(html)
58
+ expect(doc).to be_a(RXerces::XML::Document)
59
+ end
60
+ end
61
+
62
+ describe "Nokogiri::HTML class aliases" do
63
+ it "aliases Document" do
64
+ expect(Nokogiri::HTML::Document).to eq(RXerces::XML::Document)
65
+ end
66
+
67
+ it "aliases Node" do
68
+ expect(Nokogiri::HTML::Node).to eq(RXerces::XML::Node)
69
+ end
70
+
71
+ it "aliases Element" do
72
+ expect(Nokogiri::HTML::Element).to eq(RXerces::XML::Element)
73
+ end
74
+
75
+ it "aliases Text" do
76
+ expect(Nokogiri::HTML::Text).to eq(RXerces::XML::Text)
77
+ end
78
+
79
+ it "aliases NodeSet" do
80
+ expect(Nokogiri::HTML::NodeSet).to eq(RXerces::XML::NodeSet)
81
+ end
82
+ end
83
+
40
84
  describe "Nokogiri::XML::Document" do
41
85
  it "is an alias for RXerces::XML::Document" do
42
86
  expect(Nokogiri::XML::Document).to eq(RXerces::XML::Document)
@@ -4,7 +4,7 @@ require 'rxerces'
4
4
 
5
5
  RSpec.shared_examples RXerces do
6
6
  example 'version number is set to the expected value' do
7
- expect(RXerces::VERSION).to eq('0.3.0')
7
+ expect(RXerces::VERSION).to eq('0.5.0')
8
8
  expect(RXerces::VERSION).to be_frozen
9
9
  end
10
10
  end
data/spec/xpath_spec.rb CHANGED
@@ -141,24 +141,258 @@ RSpec.describe "XPath support" do
141
141
  end
142
142
  end
143
143
 
144
- describe "XPath limitations" do
145
- it "notes that Xerces-C uses XML Schema XPath subset" do
146
- # Xerces-C implements the XML Schema XPath subset, not full XPath 1.0
147
- # This means the following are NOT supported:
148
- # - Attribute predicates like [@id="1"]
149
- # - Functions like last(), position(), text()
150
- # - Comparison operators in predicates
151
- #
152
- # However, basic path expressions work well:
153
- # - // (descendant-or-self)
154
- # - / (child)
155
- # - . (self)
156
- # - .. (parent)
157
-
158
- # Basic paths work
159
- expect(doc.xpath('//book').length).to eq(3)
160
- expect(doc.xpath('/library/book').length).to eq(3)
161
- expect(doc.xpath('//title').length).to eq(3)
144
+ describe "XPath 1.0 compliance with Xalan" do
145
+ # Check if Xalan support is compiled in
146
+ xalan_available = begin
147
+ # Try a feature that only works with Xalan (attribute predicates)
148
+ test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
149
+ test_doc = RXerces::XML::Document.parse(test_xml)
150
+ result = test_doc.xpath('//item[@id="1"]')
151
+ result.length == 1
152
+ rescue
153
+ false
154
+ end
155
+
156
+ before(:all) do
157
+ unless xalan_available
158
+ skip "Xalan-C not available - XPath 1.0 features require Xalan-C library"
159
+ end
160
+ end
161
+
162
+ describe "Attribute predicates" do
163
+ it "finds elements by attribute value" do
164
+ book = doc.xpath('//book[@id="1"]')
165
+ expect(book.length).to eq(1)
166
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
167
+ end
168
+
169
+ it "finds elements by attribute equality" do
170
+ fiction_books = doc.xpath('//book[@category="fiction"]')
171
+ expect(fiction_books.length).to eq(2)
172
+ end
173
+
174
+ it "finds elements by attribute inequality" do
175
+ non_fiction = doc.xpath('//book[@category!="fiction"]')
176
+ expect(non_fiction.length).to eq(1)
177
+ expect(non_fiction[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
178
+ end
179
+
180
+ it "supports multiple attribute predicates" do
181
+ book = doc.xpath('//book[@id="2"][@category="fiction"]')
182
+ expect(book.length).to eq(1)
183
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
184
+ end
185
+ end
186
+
187
+ describe "Position and indexing functions" do
188
+ it "uses position() to find first element" do
189
+ first_book = doc.xpath('//book[position()=1]')
190
+ expect(first_book.length).to eq(1)
191
+ expect(first_book[0].xpath('.//title')[0].text.strip).to eq('1984')
192
+ end
193
+
194
+ it "uses last() to find last element" do
195
+ last_book = doc.xpath('//book[position()=last()]')
196
+ expect(last_book.length).to eq(1)
197
+ expect(last_book[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
198
+ end
199
+
200
+ it "uses numeric predicates for indexing" do
201
+ second_book = doc.xpath('//book[2]')
202
+ expect(second_book.length).to eq(1)
203
+ expect(second_book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
204
+ end
205
+
206
+ it "finds elements by position greater than" do
207
+ later_books = doc.xpath('//book[position()>1]')
208
+ expect(later_books.length).to eq(2)
209
+ end
210
+ end
211
+
212
+ describe "String functions" do
213
+ it "uses contains() function" do
214
+ books_with_new = doc.xpath('//book[contains(.//title, "New")]')
215
+ expect(books_with_new.length).to eq(1)
216
+ expect(books_with_new[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
217
+ end
218
+
219
+ it "uses starts-with() function" do
220
+ books_starting_with_1 = doc.xpath('//book[starts-with(.//title, "1")]')
221
+ expect(books_starting_with_1.length).to eq(1)
222
+ expect(books_starting_with_1[0].xpath('.//title')[0].text.strip).to eq('1984')
223
+ end
224
+
225
+ it "uses normalize-space() function" do
226
+ # Should find titles even with whitespace differences
227
+ result = doc.xpath('//title[normalize-space()="1984"]')
228
+ expect(result.length).to eq(1)
229
+ end
230
+
231
+ it "uses string-length() function" do
232
+ # Find books where title length is less than 10 characters
233
+ short_titles = doc.xpath('//book[string-length(.//title) < 10]')
234
+ expect(short_titles.length).to eq(2) # "1984" and "Sapiens"
235
+ end
236
+
237
+ it "uses concat() function" do
238
+ # This tests that concat works by checking if a book has matching text
239
+ # concat('19', '84') = '1984'
240
+ result = doc.xpath('//book[.//title = concat("19", "84")]')
241
+ expect(result.length).to eq(1)
242
+ end
243
+
244
+ it "uses substring() function" do
245
+ # Find books where first 5 chars of title is "Brave"
246
+ result = doc.xpath('//book[substring(.//title, 1, 5) = "Brave"]')
247
+ expect(result.length).to eq(1)
248
+ expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
249
+ end
250
+ end
251
+
252
+ describe "Numeric functions and comparisons" do
253
+ it "uses count() function" do
254
+ # Find library element that has exactly 3 book children
255
+ result = doc.xpath('/library[count(book) = 3]')
256
+ expect(result.length).to eq(1)
257
+ end
258
+
259
+ it "compares numeric values with >" do
260
+ expensive_books = doc.xpath('//book[.//price > 15]')
261
+ expect(expensive_books.length).to eq(2) # 15.99 and 18.99
262
+ end
263
+
264
+ it "compares numeric values with <" do
265
+ cheap_books = doc.xpath('//book[.//price < 16]')
266
+ expect(cheap_books.length).to eq(2) # 15.99 and 14.99
267
+ end
268
+
269
+ it "compares numeric values with >=" do
270
+ books_1950_or_later = doc.xpath('//book[.//year >= 1949]')
271
+ expect(books_1950_or_later.length).to eq(2) # 1949 and 2011
272
+ end
273
+
274
+ it "uses sum() function" do
275
+ # sum() returns a number, not a nodeset, so we can't call it directly
276
+ # Instead, test it within a predicate
277
+ result = doc.xpath('//library[sum(book/price) > 40]')
278
+ expect(result.length).to eq(1) # Total is 49.97
279
+ end
280
+
281
+ it "uses floor() function" do
282
+ # Find books where floor(price) = 15 (15.99 -> 15)
283
+ result = doc.xpath('//book[floor(.//price) = 15]')
284
+ expect(result.length).to eq(1)
285
+ end
286
+
287
+ it "uses ceiling() function" do
288
+ # Find books where ceiling(price) = 19 (18.99 -> 19)
289
+ result = doc.xpath('//book[ceiling(.//price) = 19]')
290
+ expect(result.length).to eq(1)
291
+ end
292
+
293
+ it "uses round() function" do
294
+ # Find books where round(price) = 15 (14.99 -> 15, 15.99 -> 16)
295
+ result = doc.xpath('//book[round(.//price) = 15]')
296
+ expect(result.length).to eq(1) # Only 14.99
297
+ end
298
+ end
299
+
300
+ describe "Boolean operators" do
301
+ it "uses 'and' operator" do
302
+ result = doc.xpath('//book[@category="fiction" and .//year < 1940]')
303
+ expect(result.length).to eq(1) # Only "Brave New World" (1932)
304
+ end
305
+
306
+ it "uses 'or' operator" do
307
+ result = doc.xpath('//book[@id="1" or @id="3"]')
308
+ expect(result.length).to eq(2)
309
+ end
310
+
311
+ it "uses 'not()' function" do
312
+ result = doc.xpath('//book[not(@category="fiction")]')
313
+ expect(result.length).to eq(1)
314
+ expect(result[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
315
+ end
316
+
317
+ it "combines multiple boolean operators" do
318
+ result = doc.xpath('//book[@category="fiction" and .//price < 15.50]')
319
+ expect(result.length).to eq(1) # Only "Brave New World" (14.99)
320
+ end
321
+ end
322
+
323
+ describe "Axes" do
324
+ it "uses parent:: axis" do
325
+ # Find parent of first title
326
+ first_title = doc.xpath('//title[1]')
327
+ parent = first_title[0].xpath('parent::*')
328
+ expect(parent.length).to eq(1)
329
+ expect(parent[0].name).to eq('book')
330
+ end
331
+
332
+ it "uses ancestor:: axis" do
333
+ # Find all ancestors of a title element
334
+ first_title = doc.xpath('//title[1]')
335
+ ancestors = first_title[0].xpath('ancestor::*')
336
+ expect(ancestors.length).to eq(2) # book and library
337
+ end
338
+
339
+ it "uses following-sibling:: axis" do
340
+ # Find siblings after title
341
+ first_title = doc.xpath('//title[1]')
342
+ siblings = first_title[0].xpath('following-sibling::*')
343
+ expect(siblings.length).to eq(3) # author, year, price
344
+ end
345
+
346
+ it "uses preceding-sibling:: axis" do
347
+ # Find siblings before author
348
+ first_author = doc.xpath('//author[1]')
349
+ siblings = first_author[0].xpath('preceding-sibling::*')
350
+ expect(siblings.length).to eq(1) # title
351
+ end
352
+
353
+ it "uses descendant:: axis" do
354
+ root = doc.root
355
+ descendants = root.xpath('descendant::title')
356
+ expect(descendants.length).to eq(3)
357
+ end
358
+
359
+ it "uses self:: axis" do
360
+ books = doc.xpath('//book')
361
+ self_nodes = books[0].xpath('self::book')
362
+ expect(self_nodes.length).to eq(1)
363
+ end
364
+ end
365
+
366
+ describe "Complex predicates" do
367
+ it "chains multiple predicates" do
368
+ result = doc.xpath('//book[@category="fiction"][.//year > 1940]')
369
+ expect(result.length).to eq(1) # Only "1984" (1949)
370
+ end
371
+
372
+ it "uses nested predicates" do
373
+ result = doc.xpath('//library[book[@category="fiction"]]')
374
+ expect(result.length).to eq(1)
375
+ end
376
+
377
+ it "combines functions in predicates" do
378
+ result = doc.xpath('//book[contains(.//title, "World") and .//year < 1950]')
379
+ expect(result.length).to eq(1)
380
+ expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
381
+ end
382
+ end
383
+
384
+ describe "Text nodes" do
385
+ it "selects text nodes with text()" do
386
+ text_nodes = doc.xpath('//title/text()')
387
+ expect(text_nodes.length).to eq(3)
388
+ end
389
+
390
+ it "uses text() in predicates" do
391
+ result = doc.xpath('//title[text()="1984"]')
392
+ # Note: text() returns the raw text which includes whitespace
393
+ # This might not match due to whitespace, so we test it doesn't error
394
+ expect { result }.not_to raise_error
395
+ end
162
396
  end
163
397
  end
164
398
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rxerces
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.5.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel J. Berger
@@ -78,8 +78,9 @@ dependencies:
78
78
  - - "~>"
79
79
  - !ruby/object:Gem::Version
80
80
  version: '3.12'
81
- description: A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
82
- instead of libxml2
81
+ description: |2
82
+ A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
83
+ instead of libxml2. It also optionally uses Xalan for Xpath 1.0 compliance.
83
84
  email: djberg96@gmail.com
84
85
  executables: []
85
86
  extensions:
metadata.gz.sig CHANGED
@@ -1,2 +1,2 @@
1
- Q� �q��o {�;?��-6��%�:��-ܪB�h�y,�P�U}�#loyp�w��c����M@X+>��]U�.��<z5��a'�m�z]G���1laA�x���w�V�`���b6��U�����s��z��=u��$�6��Vj6��~��x:�����
2
- O��Е>d~�s��cYs���� ������tHgd�%Y��@TtcԁI��S
1
+ �s43i��Q� ��u����:G�A\��|;��2Mu<~|�"��
2
+ "H����@�/�P���4|�*�����4].v�z��p��ިzџ���p��s`%˝Piaٌ\J�w���@�}��-_1��5~����d�2:���-"�n�<RTcA�ѥ��?y�