rxerces 0.2.0 → 0.4.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,76 @@
1
+ require 'spec_helper'
2
+
3
+ RSpec.describe RXerces::XML::Schema do
4
+ let(:simple_xsd) do
5
+ <<~XSD
6
+ <?xml version="1.0" encoding="UTF-8"?>
7
+ <xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
8
+ <xs:element name="root">
9
+ <xs:complexType>
10
+ <xs:sequence>
11
+ <xs:element name="name" type="xs:string"/>
12
+ <xs:element name="age" type="xs:integer"/>
13
+ </xs:sequence>
14
+ </xs:complexType>
15
+ </xs:element>
16
+ </xs:schema>
17
+ XSD
18
+ end
19
+
20
+ let(:valid_xml) do
21
+ <<~XML
22
+ <?xml version="1.0"?>
23
+ <root>
24
+ <name>John</name>
25
+ <age>30</age>
26
+ </root>
27
+ XML
28
+ end
29
+
30
+ let(:invalid_xml) do
31
+ <<~XML
32
+ <?xml version="1.0"?>
33
+ <root>
34
+ <name>John</name>
35
+ <age>not-a-number</age>
36
+ </root>
37
+ XML
38
+ end
39
+
40
+ describe '.from_string' do
41
+ it 'creates a schema from an XSD string' do
42
+ schema = described_class.from_string(simple_xsd)
43
+ expect(schema).to be_a(described_class)
44
+ end
45
+
46
+ # Note: Xerces-C parser is very tolerant of invalid XML
47
+ # So we just skip testing for invalid XML for now
48
+ end
49
+
50
+ describe '.from_document' do
51
+ it 'creates a schema from a Document' do
52
+ schema_doc = RXerces::XML::Document.parse(simple_xsd)
53
+ schema = described_class.from_document(schema_doc)
54
+ expect(schema).to be_a(described_class)
55
+ end
56
+ end
57
+
58
+ describe 'validation' do
59
+ let(:schema) { described_class.from_string(simple_xsd) }
60
+
61
+ it 'validates a valid document' do
62
+ doc = RXerces::XML::Document.parse(valid_xml)
63
+ errors = doc.validate(schema)
64
+ expect(errors).to be_a(Array)
65
+ expect(errors).to be_empty
66
+ end
67
+
68
+ it 'returns validation errors for an invalid document' do
69
+ doc = RXerces::XML::Document.parse(invalid_xml)
70
+ errors = doc.validate(schema)
71
+ expect(errors).to be_a(Array)
72
+ expect(errors).not_to be_empty
73
+ expect(errors.first).to include('not-a-number')
74
+ end
75
+ end
76
+ end
data/spec/xpath_spec.rb CHANGED
@@ -141,24 +141,258 @@ RSpec.describe "XPath support" do
141
141
  end
142
142
  end
143
143
 
144
- describe "XPath limitations" do
145
- it "notes that Xerces-C uses XML Schema XPath subset" do
146
- # Xerces-C implements the XML Schema XPath subset, not full XPath 1.0
147
- # This means the following are NOT supported:
148
- # - Attribute predicates like [@id="1"]
149
- # - Functions like last(), position(), text()
150
- # - Comparison operators in predicates
151
- #
152
- # However, basic path expressions work well:
153
- # - // (descendant-or-self)
154
- # - / (child)
155
- # - . (self)
156
- # - .. (parent)
157
-
158
- # Basic paths work
159
- expect(doc.xpath('//book').length).to eq(3)
160
- expect(doc.xpath('/library/book').length).to eq(3)
161
- expect(doc.xpath('//title').length).to eq(3)
144
+ describe "XPath 1.0 compliance with Xalan" do
145
+ # Check if Xalan support is compiled in
146
+ xalan_available = begin
147
+ # Try a feature that only works with Xalan (attribute predicates)
148
+ test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
149
+ test_doc = RXerces::XML::Document.parse(test_xml)
150
+ result = test_doc.xpath('//item[@id="1"]')
151
+ result.length == 1
152
+ rescue
153
+ false
154
+ end
155
+
156
+ before(:all) do
157
+ unless xalan_available
158
+ skip "Xalan-C not available - XPath 1.0 features require Xalan-C library"
159
+ end
160
+ end
161
+
162
+ describe "Attribute predicates" do
163
+ it "finds elements by attribute value" do
164
+ book = doc.xpath('//book[@id="1"]')
165
+ expect(book.length).to eq(1)
166
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
167
+ end
168
+
169
+ it "finds elements by attribute equality" do
170
+ fiction_books = doc.xpath('//book[@category="fiction"]')
171
+ expect(fiction_books.length).to eq(2)
172
+ end
173
+
174
+ it "finds elements by attribute inequality" do
175
+ non_fiction = doc.xpath('//book[@category!="fiction"]')
176
+ expect(non_fiction.length).to eq(1)
177
+ expect(non_fiction[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
178
+ end
179
+
180
+ it "supports multiple attribute predicates" do
181
+ book = doc.xpath('//book[@id="2"][@category="fiction"]')
182
+ expect(book.length).to eq(1)
183
+ expect(book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
184
+ end
185
+ end
186
+
187
+ describe "Position and indexing functions" do
188
+ it "uses position() to find first element" do
189
+ first_book = doc.xpath('//book[position()=1]')
190
+ expect(first_book.length).to eq(1)
191
+ expect(first_book[0].xpath('.//title')[0].text.strip).to eq('1984')
192
+ end
193
+
194
+ it "uses last() to find last element" do
195
+ last_book = doc.xpath('//book[position()=last()]')
196
+ expect(last_book.length).to eq(1)
197
+ expect(last_book[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
198
+ end
199
+
200
+ it "uses numeric predicates for indexing" do
201
+ second_book = doc.xpath('//book[2]')
202
+ expect(second_book.length).to eq(1)
203
+ expect(second_book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
204
+ end
205
+
206
+ it "finds elements by position greater than" do
207
+ later_books = doc.xpath('//book[position()>1]')
208
+ expect(later_books.length).to eq(2)
209
+ end
210
+ end
211
+
212
+ describe "String functions" do
213
+ it "uses contains() function" do
214
+ books_with_new = doc.xpath('//book[contains(.//title, "New")]')
215
+ expect(books_with_new.length).to eq(1)
216
+ expect(books_with_new[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
217
+ end
218
+
219
+ it "uses starts-with() function" do
220
+ books_starting_with_1 = doc.xpath('//book[starts-with(.//title, "1")]')
221
+ expect(books_starting_with_1.length).to eq(1)
222
+ expect(books_starting_with_1[0].xpath('.//title')[0].text.strip).to eq('1984')
223
+ end
224
+
225
+ it "uses normalize-space() function" do
226
+ # Should find titles even with whitespace differences
227
+ result = doc.xpath('//title[normalize-space()="1984"]')
228
+ expect(result.length).to eq(1)
229
+ end
230
+
231
+ it "uses string-length() function" do
232
+ # Find books where title length is less than 10 characters
233
+ short_titles = doc.xpath('//book[string-length(.//title) < 10]')
234
+ expect(short_titles.length).to eq(2) # "1984" and "Sapiens"
235
+ end
236
+
237
+ it "uses concat() function" do
238
+ # This tests that concat works by checking if a book has matching text
239
+ # concat('19', '84') = '1984'
240
+ result = doc.xpath('//book[.//title = concat("19", "84")]')
241
+ expect(result.length).to eq(1)
242
+ end
243
+
244
+ it "uses substring() function" do
245
+ # Find books where first 5 chars of title is "Brave"
246
+ result = doc.xpath('//book[substring(.//title, 1, 5) = "Brave"]')
247
+ expect(result.length).to eq(1)
248
+ expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
249
+ end
250
+ end
251
+
252
+ describe "Numeric functions and comparisons" do
253
+ it "uses count() function" do
254
+ # Find library element that has exactly 3 book children
255
+ result = doc.xpath('/library[count(book) = 3]')
256
+ expect(result.length).to eq(1)
257
+ end
258
+
259
+ it "compares numeric values with >" do
260
+ expensive_books = doc.xpath('//book[.//price > 15]')
261
+ expect(expensive_books.length).to eq(2) # 15.99 and 18.99
262
+ end
263
+
264
+ it "compares numeric values with <" do
265
+ cheap_books = doc.xpath('//book[.//price < 16]')
266
+ expect(cheap_books.length).to eq(2) # 15.99 and 14.99
267
+ end
268
+
269
+ it "compares numeric values with >=" do
270
+ books_1950_or_later = doc.xpath('//book[.//year >= 1949]')
271
+ expect(books_1950_or_later.length).to eq(2) # 1949 and 2011
272
+ end
273
+
274
+ it "uses sum() function" do
275
+ # sum() returns a number, not a nodeset, so we can't call it directly
276
+ # Instead, test it within a predicate
277
+ result = doc.xpath('//library[sum(book/price) > 40]')
278
+ expect(result.length).to eq(1) # Total is 49.97
279
+ end
280
+
281
+ it "uses floor() function" do
282
+ # Find books where floor(price) = 15 (15.99 -> 15)
283
+ result = doc.xpath('//book[floor(.//price) = 15]')
284
+ expect(result.length).to eq(1)
285
+ end
286
+
287
+ it "uses ceiling() function" do
288
+ # Find books where ceiling(price) = 19 (18.99 -> 19)
289
+ result = doc.xpath('//book[ceiling(.//price) = 19]')
290
+ expect(result.length).to eq(1)
291
+ end
292
+
293
+ it "uses round() function" do
294
+ # Find books where round(price) = 15 (14.99 -> 15, 15.99 -> 16)
295
+ result = doc.xpath('//book[round(.//price) = 15]')
296
+ expect(result.length).to eq(1) # Only 14.99
297
+ end
298
+ end
299
+
300
+ describe "Boolean operators" do
301
+ it "uses 'and' operator" do
302
+ result = doc.xpath('//book[@category="fiction" and .//year < 1940]')
303
+ expect(result.length).to eq(1) # Only "Brave New World" (1932)
304
+ end
305
+
306
+ it "uses 'or' operator" do
307
+ result = doc.xpath('//book[@id="1" or @id="3"]')
308
+ expect(result.length).to eq(2)
309
+ end
310
+
311
+ it "uses 'not()' function" do
312
+ result = doc.xpath('//book[not(@category="fiction")]')
313
+ expect(result.length).to eq(1)
314
+ expect(result[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
315
+ end
316
+
317
+ it "combines multiple boolean operators" do
318
+ result = doc.xpath('//book[@category="fiction" and .//price < 15.50]')
319
+ expect(result.length).to eq(1) # Only "Brave New World" (14.99)
320
+ end
321
+ end
322
+
323
+ describe "Axes" do
324
+ it "uses parent:: axis" do
325
+ # Find parent of first title
326
+ first_title = doc.xpath('//title[1]')
327
+ parent = first_title[0].xpath('parent::*')
328
+ expect(parent.length).to eq(1)
329
+ expect(parent[0].name).to eq('book')
330
+ end
331
+
332
+ it "uses ancestor:: axis" do
333
+ # Find all ancestors of a title element
334
+ first_title = doc.xpath('//title[1]')
335
+ ancestors = first_title[0].xpath('ancestor::*')
336
+ expect(ancestors.length).to eq(2) # book and library
337
+ end
338
+
339
+ it "uses following-sibling:: axis" do
340
+ # Find siblings after title
341
+ first_title = doc.xpath('//title[1]')
342
+ siblings = first_title[0].xpath('following-sibling::*')
343
+ expect(siblings.length).to eq(3) # author, year, price
344
+ end
345
+
346
+ it "uses preceding-sibling:: axis" do
347
+ # Find siblings before author
348
+ first_author = doc.xpath('//author[1]')
349
+ siblings = first_author[0].xpath('preceding-sibling::*')
350
+ expect(siblings.length).to eq(1) # title
351
+ end
352
+
353
+ it "uses descendant:: axis" do
354
+ root = doc.root
355
+ descendants = root.xpath('descendant::title')
356
+ expect(descendants.length).to eq(3)
357
+ end
358
+
359
+ it "uses self:: axis" do
360
+ books = doc.xpath('//book')
361
+ self_nodes = books[0].xpath('self::book')
362
+ expect(self_nodes.length).to eq(1)
363
+ end
364
+ end
365
+
366
+ describe "Complex predicates" do
367
+ it "chains multiple predicates" do
368
+ result = doc.xpath('//book[@category="fiction"][.//year > 1940]')
369
+ expect(result.length).to eq(1) # Only "1984" (1949)
370
+ end
371
+
372
+ it "uses nested predicates" do
373
+ result = doc.xpath('//library[book[@category="fiction"]]')
374
+ expect(result.length).to eq(1)
375
+ end
376
+
377
+ it "combines functions in predicates" do
378
+ result = doc.xpath('//book[contains(.//title, "World") and .//year < 1950]')
379
+ expect(result.length).to eq(1)
380
+ expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
381
+ end
382
+ end
383
+
384
+ describe "Text nodes" do
385
+ it "selects text nodes with text()" do
386
+ text_nodes = doc.xpath('//title/text()')
387
+ expect(text_nodes.length).to eq(3)
388
+ end
389
+
390
+ it "uses text() in predicates" do
391
+ result = doc.xpath('//title[text()="1984"]')
392
+ # Note: text() returns the raw text which includes whitespace
393
+ # This might not match due to whitespace, so we test it doesn't error
394
+ expect { result }.not_to raise_error
395
+ end
162
396
  end
163
397
  end
164
398
  end
data.tar.gz.sig CHANGED
Binary file
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: rxerces
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.2.0
4
+ version: 0.4.0
5
5
  platform: ruby
6
6
  authors:
7
7
  - Daniel J. Berger
@@ -78,8 +78,9 @@ dependencies:
78
78
  - - "~>"
79
79
  - !ruby/object:Gem::Version
80
80
  version: '3.12'
81
- description: A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
82
- instead of libxml2
81
+ description: |2
82
+ A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
83
+ instead of libxml2. It also optionally uses Xalan for Xpath 1.0 compliance.
83
84
  email: djberg96@gmail.com
84
85
  executables: []
85
86
  extensions:
@@ -93,6 +94,7 @@ files:
93
94
  - Rakefile
94
95
  - certs/djberg96_pub.pem
95
96
  - examples/basic_usage.rb
97
+ - examples/schema_example.rb
96
98
  - examples/simple_example.rb
97
99
  - examples/xpath_example.rb
98
100
  - ext/rxerces/extconf.rb
@@ -109,6 +111,7 @@ files:
109
111
  - spec/nokogiri_compatibility_spec.rb
110
112
  - spec/rxerces_shared.rb
111
113
  - spec/rxerces_spec.rb
114
+ - spec/schema_spec.rb
112
115
  - spec/spec_helper.rb
113
116
  - spec/xpath_spec.rb
114
117
  homepage: http://github.com/djberg96/rxerces
@@ -148,4 +151,5 @@ test_files:
148
151
  - spec/nodeset_spec.rb
149
152
  - spec/nokogiri_compatibility_spec.rb
150
153
  - spec/rxerces_spec.rb
154
+ - spec/schema_spec.rb
151
155
  - spec/xpath_spec.rb
metadata.gz.sig CHANGED
Binary file