rxerces 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +20 -0
- data/README.md +26 -2
- data/examples/schema_example.rb +107 -0
- data/ext/rxerces/extconf.rb +42 -0
- data/ext/rxerces/rxerces.cpp +834 -7
- data/lib/rxerces/nokogiri.rb +1 -0
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +5 -2
- data/spec/document_spec.rb +54 -0
- data/spec/node_spec.rb +448 -0
- data/spec/nokogiri_compatibility_spec.rb +69 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/schema_spec.rb +76 -0
- data/spec/xpath_spec.rb +252 -18
- data.tar.gz.sig +0 -0
- metadata +7 -3
- metadata.gz.sig +0 -0
data/spec/schema_spec.rb
ADDED
|
@@ -0,0 +1,76 @@
|
|
|
1
|
+
require 'spec_helper'
|
|
2
|
+
|
|
3
|
+
RSpec.describe RXerces::XML::Schema do
|
|
4
|
+
let(:simple_xsd) do
|
|
5
|
+
<<~XSD
|
|
6
|
+
<?xml version="1.0" encoding="UTF-8"?>
|
|
7
|
+
<xs:schema xmlns:xs="http://www.w3.org/2001/XMLSchema">
|
|
8
|
+
<xs:element name="root">
|
|
9
|
+
<xs:complexType>
|
|
10
|
+
<xs:sequence>
|
|
11
|
+
<xs:element name="name" type="xs:string"/>
|
|
12
|
+
<xs:element name="age" type="xs:integer"/>
|
|
13
|
+
</xs:sequence>
|
|
14
|
+
</xs:complexType>
|
|
15
|
+
</xs:element>
|
|
16
|
+
</xs:schema>
|
|
17
|
+
XSD
|
|
18
|
+
end
|
|
19
|
+
|
|
20
|
+
let(:valid_xml) do
|
|
21
|
+
<<~XML
|
|
22
|
+
<?xml version="1.0"?>
|
|
23
|
+
<root>
|
|
24
|
+
<name>John</name>
|
|
25
|
+
<age>30</age>
|
|
26
|
+
</root>
|
|
27
|
+
XML
|
|
28
|
+
end
|
|
29
|
+
|
|
30
|
+
let(:invalid_xml) do
|
|
31
|
+
<<~XML
|
|
32
|
+
<?xml version="1.0"?>
|
|
33
|
+
<root>
|
|
34
|
+
<name>John</name>
|
|
35
|
+
<age>not-a-number</age>
|
|
36
|
+
</root>
|
|
37
|
+
XML
|
|
38
|
+
end
|
|
39
|
+
|
|
40
|
+
describe '.from_string' do
|
|
41
|
+
it 'creates a schema from an XSD string' do
|
|
42
|
+
schema = described_class.from_string(simple_xsd)
|
|
43
|
+
expect(schema).to be_a(described_class)
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
# Note: Xerces-C parser is very tolerant of invalid XML
|
|
47
|
+
# So we just skip testing for invalid XML for now
|
|
48
|
+
end
|
|
49
|
+
|
|
50
|
+
describe '.from_document' do
|
|
51
|
+
it 'creates a schema from a Document' do
|
|
52
|
+
schema_doc = RXerces::XML::Document.parse(simple_xsd)
|
|
53
|
+
schema = described_class.from_document(schema_doc)
|
|
54
|
+
expect(schema).to be_a(described_class)
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
58
|
+
describe 'validation' do
|
|
59
|
+
let(:schema) { described_class.from_string(simple_xsd) }
|
|
60
|
+
|
|
61
|
+
it 'validates a valid document' do
|
|
62
|
+
doc = RXerces::XML::Document.parse(valid_xml)
|
|
63
|
+
errors = doc.validate(schema)
|
|
64
|
+
expect(errors).to be_a(Array)
|
|
65
|
+
expect(errors).to be_empty
|
|
66
|
+
end
|
|
67
|
+
|
|
68
|
+
it 'returns validation errors for an invalid document' do
|
|
69
|
+
doc = RXerces::XML::Document.parse(invalid_xml)
|
|
70
|
+
errors = doc.validate(schema)
|
|
71
|
+
expect(errors).to be_a(Array)
|
|
72
|
+
expect(errors).not_to be_empty
|
|
73
|
+
expect(errors.first).to include('not-a-number')
|
|
74
|
+
end
|
|
75
|
+
end
|
|
76
|
+
end
|
data/spec/xpath_spec.rb
CHANGED
|
@@ -141,24 +141,258 @@ RSpec.describe "XPath support" do
|
|
|
141
141
|
end
|
|
142
142
|
end
|
|
143
143
|
|
|
144
|
-
describe "XPath
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
144
|
+
describe "XPath 1.0 compliance with Xalan" do
|
|
145
|
+
# Check if Xalan support is compiled in
|
|
146
|
+
xalan_available = begin
|
|
147
|
+
# Try a feature that only works with Xalan (attribute predicates)
|
|
148
|
+
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
149
|
+
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
150
|
+
result = test_doc.xpath('//item[@id="1"]')
|
|
151
|
+
result.length == 1
|
|
152
|
+
rescue
|
|
153
|
+
false
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
before(:all) do
|
|
157
|
+
unless xalan_available
|
|
158
|
+
skip "Xalan-C not available - XPath 1.0 features require Xalan-C library"
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
describe "Attribute predicates" do
|
|
163
|
+
it "finds elements by attribute value" do
|
|
164
|
+
book = doc.xpath('//book[@id="1"]')
|
|
165
|
+
expect(book.length).to eq(1)
|
|
166
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
it "finds elements by attribute equality" do
|
|
170
|
+
fiction_books = doc.xpath('//book[@category="fiction"]')
|
|
171
|
+
expect(fiction_books.length).to eq(2)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
it "finds elements by attribute inequality" do
|
|
175
|
+
non_fiction = doc.xpath('//book[@category!="fiction"]')
|
|
176
|
+
expect(non_fiction.length).to eq(1)
|
|
177
|
+
expect(non_fiction[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
it "supports multiple attribute predicates" do
|
|
181
|
+
book = doc.xpath('//book[@id="2"][@category="fiction"]')
|
|
182
|
+
expect(book.length).to eq(1)
|
|
183
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
describe "Position and indexing functions" do
|
|
188
|
+
it "uses position() to find first element" do
|
|
189
|
+
first_book = doc.xpath('//book[position()=1]')
|
|
190
|
+
expect(first_book.length).to eq(1)
|
|
191
|
+
expect(first_book[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
it "uses last() to find last element" do
|
|
195
|
+
last_book = doc.xpath('//book[position()=last()]')
|
|
196
|
+
expect(last_book.length).to eq(1)
|
|
197
|
+
expect(last_book[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
it "uses numeric predicates for indexing" do
|
|
201
|
+
second_book = doc.xpath('//book[2]')
|
|
202
|
+
expect(second_book.length).to eq(1)
|
|
203
|
+
expect(second_book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
it "finds elements by position greater than" do
|
|
207
|
+
later_books = doc.xpath('//book[position()>1]')
|
|
208
|
+
expect(later_books.length).to eq(2)
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
describe "String functions" do
|
|
213
|
+
it "uses contains() function" do
|
|
214
|
+
books_with_new = doc.xpath('//book[contains(.//title, "New")]')
|
|
215
|
+
expect(books_with_new.length).to eq(1)
|
|
216
|
+
expect(books_with_new[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
it "uses starts-with() function" do
|
|
220
|
+
books_starting_with_1 = doc.xpath('//book[starts-with(.//title, "1")]')
|
|
221
|
+
expect(books_starting_with_1.length).to eq(1)
|
|
222
|
+
expect(books_starting_with_1[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
it "uses normalize-space() function" do
|
|
226
|
+
# Should find titles even with whitespace differences
|
|
227
|
+
result = doc.xpath('//title[normalize-space()="1984"]')
|
|
228
|
+
expect(result.length).to eq(1)
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
it "uses string-length() function" do
|
|
232
|
+
# Find books where title length is less than 10 characters
|
|
233
|
+
short_titles = doc.xpath('//book[string-length(.//title) < 10]')
|
|
234
|
+
expect(short_titles.length).to eq(2) # "1984" and "Sapiens"
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
it "uses concat() function" do
|
|
238
|
+
# This tests that concat works by checking if a book has matching text
|
|
239
|
+
# concat('19', '84') = '1984'
|
|
240
|
+
result = doc.xpath('//book[.//title = concat("19", "84")]')
|
|
241
|
+
expect(result.length).to eq(1)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
it "uses substring() function" do
|
|
245
|
+
# Find books where first 5 chars of title is "Brave"
|
|
246
|
+
result = doc.xpath('//book[substring(.//title, 1, 5) = "Brave"]')
|
|
247
|
+
expect(result.length).to eq(1)
|
|
248
|
+
expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
describe "Numeric functions and comparisons" do
|
|
253
|
+
it "uses count() function" do
|
|
254
|
+
# Find library element that has exactly 3 book children
|
|
255
|
+
result = doc.xpath('/library[count(book) = 3]')
|
|
256
|
+
expect(result.length).to eq(1)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
it "compares numeric values with >" do
|
|
260
|
+
expensive_books = doc.xpath('//book[.//price > 15]')
|
|
261
|
+
expect(expensive_books.length).to eq(2) # 15.99 and 18.99
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
it "compares numeric values with <" do
|
|
265
|
+
cheap_books = doc.xpath('//book[.//price < 16]')
|
|
266
|
+
expect(cheap_books.length).to eq(2) # 15.99 and 14.99
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
it "compares numeric values with >=" do
|
|
270
|
+
books_1950_or_later = doc.xpath('//book[.//year >= 1949]')
|
|
271
|
+
expect(books_1950_or_later.length).to eq(2) # 1949 and 2011
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
it "uses sum() function" do
|
|
275
|
+
# sum() returns a number, not a nodeset, so we can't call it directly
|
|
276
|
+
# Instead, test it within a predicate
|
|
277
|
+
result = doc.xpath('//library[sum(book/price) > 40]')
|
|
278
|
+
expect(result.length).to eq(1) # Total is 49.97
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
it "uses floor() function" do
|
|
282
|
+
# Find books where floor(price) = 15 (15.99 -> 15)
|
|
283
|
+
result = doc.xpath('//book[floor(.//price) = 15]')
|
|
284
|
+
expect(result.length).to eq(1)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it "uses ceiling() function" do
|
|
288
|
+
# Find books where ceiling(price) = 19 (18.99 -> 19)
|
|
289
|
+
result = doc.xpath('//book[ceiling(.//price) = 19]')
|
|
290
|
+
expect(result.length).to eq(1)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
it "uses round() function" do
|
|
294
|
+
# Find books where round(price) = 15 (14.99 -> 15, 15.99 -> 16)
|
|
295
|
+
result = doc.xpath('//book[round(.//price) = 15]')
|
|
296
|
+
expect(result.length).to eq(1) # Only 14.99
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
describe "Boolean operators" do
|
|
301
|
+
it "uses 'and' operator" do
|
|
302
|
+
result = doc.xpath('//book[@category="fiction" and .//year < 1940]')
|
|
303
|
+
expect(result.length).to eq(1) # Only "Brave New World" (1932)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
it "uses 'or' operator" do
|
|
307
|
+
result = doc.xpath('//book[@id="1" or @id="3"]')
|
|
308
|
+
expect(result.length).to eq(2)
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
it "uses 'not()' function" do
|
|
312
|
+
result = doc.xpath('//book[not(@category="fiction")]')
|
|
313
|
+
expect(result.length).to eq(1)
|
|
314
|
+
expect(result[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
it "combines multiple boolean operators" do
|
|
318
|
+
result = doc.xpath('//book[@category="fiction" and .//price < 15.50]')
|
|
319
|
+
expect(result.length).to eq(1) # Only "Brave New World" (14.99)
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
describe "Axes" do
|
|
324
|
+
it "uses parent:: axis" do
|
|
325
|
+
# Find parent of first title
|
|
326
|
+
first_title = doc.xpath('//title[1]')
|
|
327
|
+
parent = first_title[0].xpath('parent::*')
|
|
328
|
+
expect(parent.length).to eq(1)
|
|
329
|
+
expect(parent[0].name).to eq('book')
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
it "uses ancestor:: axis" do
|
|
333
|
+
# Find all ancestors of a title element
|
|
334
|
+
first_title = doc.xpath('//title[1]')
|
|
335
|
+
ancestors = first_title[0].xpath('ancestor::*')
|
|
336
|
+
expect(ancestors.length).to eq(2) # book and library
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
it "uses following-sibling:: axis" do
|
|
340
|
+
# Find siblings after title
|
|
341
|
+
first_title = doc.xpath('//title[1]')
|
|
342
|
+
siblings = first_title[0].xpath('following-sibling::*')
|
|
343
|
+
expect(siblings.length).to eq(3) # author, year, price
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
it "uses preceding-sibling:: axis" do
|
|
347
|
+
# Find siblings before author
|
|
348
|
+
first_author = doc.xpath('//author[1]')
|
|
349
|
+
siblings = first_author[0].xpath('preceding-sibling::*')
|
|
350
|
+
expect(siblings.length).to eq(1) # title
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
it "uses descendant:: axis" do
|
|
354
|
+
root = doc.root
|
|
355
|
+
descendants = root.xpath('descendant::title')
|
|
356
|
+
expect(descendants.length).to eq(3)
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
it "uses self:: axis" do
|
|
360
|
+
books = doc.xpath('//book')
|
|
361
|
+
self_nodes = books[0].xpath('self::book')
|
|
362
|
+
expect(self_nodes.length).to eq(1)
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
describe "Complex predicates" do
|
|
367
|
+
it "chains multiple predicates" do
|
|
368
|
+
result = doc.xpath('//book[@category="fiction"][.//year > 1940]')
|
|
369
|
+
expect(result.length).to eq(1) # Only "1984" (1949)
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
it "uses nested predicates" do
|
|
373
|
+
result = doc.xpath('//library[book[@category="fiction"]]')
|
|
374
|
+
expect(result.length).to eq(1)
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
it "combines functions in predicates" do
|
|
378
|
+
result = doc.xpath('//book[contains(.//title, "World") and .//year < 1950]')
|
|
379
|
+
expect(result.length).to eq(1)
|
|
380
|
+
expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
describe "Text nodes" do
|
|
385
|
+
it "selects text nodes with text()" do
|
|
386
|
+
text_nodes = doc.xpath('//title/text()')
|
|
387
|
+
expect(text_nodes.length).to eq(3)
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
it "uses text() in predicates" do
|
|
391
|
+
result = doc.xpath('//title[text()="1984"]')
|
|
392
|
+
# Note: text() returns the raw text which includes whitespace
|
|
393
|
+
# This might not match due to whitespace, so we test it doesn't error
|
|
394
|
+
expect { result }.not_to raise_error
|
|
395
|
+
end
|
|
162
396
|
end
|
|
163
397
|
end
|
|
164
398
|
end
|
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rxerces
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Daniel J. Berger
|
|
@@ -78,8 +78,9 @@ dependencies:
|
|
|
78
78
|
- - "~>"
|
|
79
79
|
- !ruby/object:Gem::Version
|
|
80
80
|
version: '3.12'
|
|
81
|
-
description:
|
|
82
|
-
|
|
81
|
+
description: |2
|
|
82
|
+
A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
|
|
83
|
+
instead of libxml2. It also optionally uses Xalan for Xpath 1.0 compliance.
|
|
83
84
|
email: djberg96@gmail.com
|
|
84
85
|
executables: []
|
|
85
86
|
extensions:
|
|
@@ -93,6 +94,7 @@ files:
|
|
|
93
94
|
- Rakefile
|
|
94
95
|
- certs/djberg96_pub.pem
|
|
95
96
|
- examples/basic_usage.rb
|
|
97
|
+
- examples/schema_example.rb
|
|
96
98
|
- examples/simple_example.rb
|
|
97
99
|
- examples/xpath_example.rb
|
|
98
100
|
- ext/rxerces/extconf.rb
|
|
@@ -109,6 +111,7 @@ files:
|
|
|
109
111
|
- spec/nokogiri_compatibility_spec.rb
|
|
110
112
|
- spec/rxerces_shared.rb
|
|
111
113
|
- spec/rxerces_spec.rb
|
|
114
|
+
- spec/schema_spec.rb
|
|
112
115
|
- spec/spec_helper.rb
|
|
113
116
|
- spec/xpath_spec.rb
|
|
114
117
|
homepage: http://github.com/djberg96/rxerces
|
|
@@ -148,4 +151,5 @@ test_files:
|
|
|
148
151
|
- spec/nodeset_spec.rb
|
|
149
152
|
- spec/nokogiri_compatibility_spec.rb
|
|
150
153
|
- spec/rxerces_spec.rb
|
|
154
|
+
- spec/schema_spec.rb
|
|
151
155
|
- spec/xpath_spec.rb
|
metadata.gz.sig
CHANGED
|
Binary file
|