rxerces 0.3.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +4 -4
- data/CHANGES.md +8 -0
- data/README.md +26 -2
- data/ext/rxerces/extconf.rb +42 -0
- data/ext/rxerces/rxerces.cpp +213 -8
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +5 -2
- data/spec/document_spec.rb +19 -0
- data/spec/node_spec.rb +87 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/xpath_spec.rb +252 -18
- data.tar.gz.sig +0 -0
- metadata +4 -3
- metadata.gz.sig +0 -0
checksums.yaml
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
---
|
|
2
2
|
SHA256:
|
|
3
|
-
metadata.gz:
|
|
4
|
-
data.tar.gz:
|
|
3
|
+
metadata.gz: 5c6ba3896b05b5303ed0842c6ccc0376ce27e0325301dafe54472c73f9298ed9
|
|
4
|
+
data.tar.gz: 45844824ec1ca0ba91acf214cd1369466b40e1e0d7d1a653ed5ad7807a488e58
|
|
5
5
|
SHA512:
|
|
6
|
-
metadata.gz:
|
|
7
|
-
data.tar.gz:
|
|
6
|
+
metadata.gz: d9513b47e15bf8219c9cfc0a801763b656622e0f00c517d4e3cc3065b7775555d03ec1ad933eb309933e9bb9563ec234f66d19ce179693a265bac1a9a9d52f7e
|
|
7
|
+
data.tar.gz: 8df33c1af2dc29c325e2b82a148b2adaa75d61d1e80a7dba466b39cb0896ea9a26da3c53b564a84643330c70d88f239202ee87b3fedb7c2ca66e4b82e5eb4112
|
checksums.yaml.gz.sig
CHANGED
|
@@ -1,4 +1,4 @@
|
|
|
1
|
-
|
|
2
|
-
|
|
3
|
-
|
|
4
|
-
��
|
|
1
|
+
|
|
2
|
+
�J���A��h%۲���4z>@[re�j
|
|
3
|
+
|
|
4
|
+
�͏�G��"�!�/���l��D���n���G�"C�J�)��8o��7Xc��E��h��*����v�T�C��VT�(F,�l������E��&��7�V��N&����0���ueE<�O�����I�uJR�L���FL���A��#K�@�c�(����Å�v[O�����Icd�GDCxx��%��7X�+f%MY��(��������!+�/K����Ü�ZBd�AA�@�=_��@��yzF#%W�qZXi7�����=�29r����O���X}�4����P��fg^����Z?/Xw��C�o�>S���7�4���!%o��̇c^02cr@л���)W�2"N���
|
data/CHANGES.md
CHANGED
|
@@ -1,3 +1,11 @@
|
|
|
1
|
+
## 0.4.0 - 15-Dec-2025
|
|
2
|
+
* Now uses Xalan if installed for xpath 1.0 compliance.
|
|
3
|
+
* Added Node#search.
|
|
4
|
+
* Added Node#at and Node#at_xpath.
|
|
5
|
+
* Added Document#encoding.
|
|
6
|
+
* Added Node#namespace.
|
|
7
|
+
* Added a placeholder css method for now, it's on the TODO list.
|
|
8
|
+
|
|
1
9
|
## 0.3.0 - 14-Dec-2025
|
|
2
10
|
* Added Node#parent.
|
|
3
11
|
* Added Element#attributes.
|
data/README.md
CHANGED
|
@@ -39,6 +39,28 @@ sudo apt-get install libxerces-c-dev
|
|
|
39
39
|
sudo yum install xerces-c-devel
|
|
40
40
|
```
|
|
41
41
|
|
|
42
|
+
### Xalan
|
|
43
|
+
|
|
44
|
+
For XPath 1.0 compliance, you will need to install the Xalan library. Note
|
|
45
|
+
that this is optional, and rxerces will default to using the Xpath support
|
|
46
|
+
from Xerces, which is more limited.
|
|
47
|
+
|
|
48
|
+
**Ubuntu/Debian:**
|
|
49
|
+
```bash
|
|
50
|
+
sudo apt-get install libxalan-c-dev
|
|
51
|
+
```
|
|
52
|
+
|
|
53
|
+
**Fedora/RHEL:**
|
|
54
|
+
```bash
|
|
55
|
+
sudo yum install xalan-c-devel
|
|
56
|
+
```
|
|
57
|
+
|
|
58
|
+
Note that MacOS, contrary to what the documentation currently says, does not
|
|
59
|
+
have a brew package for Xalan. You will either need to use Mac ports or clone
|
|
60
|
+
and build the code manually. I found that it required some tweaking to work:
|
|
61
|
+
|
|
62
|
+
https://github.com/apache/xalan-c/pull/44
|
|
63
|
+
|
|
42
64
|
### Install the Gem
|
|
43
65
|
|
|
44
66
|
Add this line to your application's Gemfile:
|
|
@@ -156,7 +178,7 @@ puts doc.to_s
|
|
|
156
178
|
|
|
157
179
|
### XPath Queries
|
|
158
180
|
|
|
159
|
-
RXerces supports XPath queries using Xerces-C's XPath implementation:
|
|
181
|
+
RXerces supports XPath queries using Xerces-C's XPath implementation by default:
|
|
160
182
|
|
|
161
183
|
```ruby
|
|
162
184
|
xml = <<-XML
|
|
@@ -206,6 +228,8 @@ Not supported:
|
|
|
206
228
|
|
|
207
229
|
For more complex queries, you can combine basic XPath with Ruby's `select` and `find` methods.
|
|
208
230
|
|
|
231
|
+
For full XPath 1.0 support, install the Xalan library.
|
|
232
|
+
|
|
209
233
|
## API Reference
|
|
210
234
|
|
|
211
235
|
### RXerces Module
|
|
@@ -270,7 +294,7 @@ bundle exec rake
|
|
|
270
294
|
|
|
271
295
|
- Uses Apache Xerces-C 3.x for XML parsing
|
|
272
296
|
- C++ extension compiled with Ruby's native extension API
|
|
273
|
-
- XPath support is basic (full XPath requires
|
|
297
|
+
- XPath support is basic by default (full XPath requires Xalan)
|
|
274
298
|
- Memory management handled by Ruby's GC and Xerces-C's DOM
|
|
275
299
|
|
|
276
300
|
## Differences from Nokogiri
|
data/ext/rxerces/extconf.rb
CHANGED
|
@@ -29,4 +29,46 @@ unless find_header('xercesc/util/PlatformUtils.hpp')
|
|
|
29
29
|
puts "Proceeding with compilation..."
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
+
# Check for Xalan-C (optional for enhanced XPath support)
|
|
33
|
+
# Use dir_config which handles --with-xalan-dir and --with-xalan-include/lib
|
|
34
|
+
dir_config('xalan')
|
|
35
|
+
|
|
36
|
+
# Also try to auto-detect in common locations
|
|
37
|
+
if RUBY_PLATFORM =~ /darwin/
|
|
38
|
+
homebrew_xalan = `brew --prefix xalan-c 2>/dev/null`.chomp
|
|
39
|
+
if !homebrew_xalan.empty? && File.directory?(homebrew_xalan)
|
|
40
|
+
$INCFLAGS << " -I#{homebrew_xalan}/include" unless $INCFLAGS.include?("-I#{homebrew_xalan}/include")
|
|
41
|
+
$LDFLAGS << " -L#{homebrew_xalan}/lib" unless $LDFLAGS.include?("-L#{homebrew_xalan}/lib")
|
|
42
|
+
end
|
|
43
|
+
end
|
|
44
|
+
|
|
45
|
+
# Try standard locations
|
|
46
|
+
xalan_found_prefix = nil
|
|
47
|
+
['/usr/local', '/opt/local', '/usr'].each do |prefix|
|
|
48
|
+
if File.directory?("#{prefix}/include/xalanc")
|
|
49
|
+
$INCFLAGS << " -I#{prefix}/include" unless $INCFLAGS.include?("-I#{prefix}/include")
|
|
50
|
+
$LDFLAGS << " -L#{prefix}/lib" unless $LDFLAGS.include?("-L#{prefix}/lib")
|
|
51
|
+
xalan_found_prefix = prefix
|
|
52
|
+
break
|
|
53
|
+
end
|
|
54
|
+
end
|
|
55
|
+
|
|
56
|
+
# Check for Xalan libraries
|
|
57
|
+
# Note: We skip the header check because Xalan C++ headers require C++ compilation
|
|
58
|
+
# which mkmf's find_header doesn't handle well. The library check is sufficient.
|
|
59
|
+
if have_library('xalanMsg') && have_library('xalan-c')
|
|
60
|
+
$CXXFLAGS << " -DHAVE_XALAN"
|
|
61
|
+
# Add rpath so the dynamic libraries can be found at runtime
|
|
62
|
+
if xalan_found_prefix
|
|
63
|
+
$LDFLAGS << " -Wl,-rpath,#{xalan_found_prefix}/lib"
|
|
64
|
+
end
|
|
65
|
+
puts "Xalan-C found: Full XPath 1.0 support enabled"
|
|
66
|
+
else
|
|
67
|
+
puts "Xalan-C not found: Using Xerces XPath subset"
|
|
68
|
+
puts "For full XPath 1.0 support, install Xalan-C:"
|
|
69
|
+
puts " macOS: Build from source (no homebrew formula available)"
|
|
70
|
+
puts " Linux: May be available via package manager"
|
|
71
|
+
puts " Or specify: --with-xalan-dir=/path/to/xalan"
|
|
72
|
+
end
|
|
73
|
+
|
|
32
74
|
create_makefile('rxerces/rxerces')
|
data/ext/rxerces/rxerces.cpp
CHANGED
|
@@ -13,7 +13,28 @@
|
|
|
13
13
|
#include <sstream>
|
|
14
14
|
#include <vector>
|
|
15
15
|
|
|
16
|
+
#ifdef HAVE_XALAN
|
|
17
|
+
#include <xalanc/XPath/XPathEvaluator.hpp>
|
|
18
|
+
#include <xalanc/XPath/NodeRefList.hpp>
|
|
19
|
+
#include <xalanc/XPath/XObject.hpp>
|
|
20
|
+
#include <xalanc/XPath/XObjectFactoryDefault.hpp>
|
|
21
|
+
#include <xalanc/XPath/XPathEnvSupportDefault.hpp>
|
|
22
|
+
#include <xalanc/XPath/XPathExecutionContextDefault.hpp>
|
|
23
|
+
#include <xalanc/XPath/XPathConstructionContextDefault.hpp>
|
|
24
|
+
#include <xalanc/XPath/ElementPrefixResolverProxy.hpp>
|
|
25
|
+
#include <xalanc/XPath/XPathFactoryDefault.hpp>
|
|
26
|
+
#include <xalanc/XPath/XPathProcessorImpl.hpp>
|
|
27
|
+
#include <xalanc/XPath/XPath.hpp>
|
|
28
|
+
#include <xalanc/XercesParserLiaison/XercesParserLiaison.hpp>
|
|
29
|
+
#include <xalanc/XercesParserLiaison/XercesDOMSupport.hpp>
|
|
30
|
+
#include <xalanc/XercesParserLiaison/XercesDocumentWrapper.hpp>
|
|
31
|
+
#include <xalanc/PlatformSupport/XalanMemoryManagerDefault.hpp>
|
|
32
|
+
#endif
|
|
33
|
+
|
|
16
34
|
using namespace xercesc;
|
|
35
|
+
#ifdef HAVE_XALAN
|
|
36
|
+
using namespace xalanc;
|
|
37
|
+
#endif
|
|
17
38
|
|
|
18
39
|
VALUE rb_mRXerces;
|
|
19
40
|
VALUE rb_mXML;
|
|
@@ -24,8 +45,11 @@ VALUE rb_cElement;
|
|
|
24
45
|
VALUE rb_cText;
|
|
25
46
|
VALUE rb_cSchema;
|
|
26
47
|
|
|
27
|
-
//
|
|
48
|
+
// Initialization flags
|
|
28
49
|
static bool xerces_initialized = false;
|
|
50
|
+
#ifdef HAVE_XALAN
|
|
51
|
+
static bool xalan_initialized = false;
|
|
52
|
+
#endif
|
|
29
53
|
|
|
30
54
|
// Helper class to manage XMLCh strings
|
|
31
55
|
class XStr {
|
|
@@ -246,7 +270,7 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
246
270
|
|
|
247
271
|
XercesDOMParser* parser = new XercesDOMParser();
|
|
248
272
|
parser->setValidationScheme(XercesDOMParser::Val_Never);
|
|
249
|
-
parser->setDoNamespaces(
|
|
273
|
+
parser->setDoNamespaces(true);
|
|
250
274
|
parser->setDoSchema(false);
|
|
251
275
|
|
|
252
276
|
try {
|
|
@@ -319,6 +343,25 @@ static VALUE document_to_s(VALUE self) {
|
|
|
319
343
|
return Qnil;
|
|
320
344
|
}
|
|
321
345
|
|
|
346
|
+
// document.encoding
|
|
347
|
+
static VALUE document_encoding(VALUE self) {
|
|
348
|
+
DocumentWrapper* wrapper;
|
|
349
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
350
|
+
|
|
351
|
+
if (!wrapper->doc) {
|
|
352
|
+
return Qnil;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const XMLCh* encoding = wrapper->doc->getXmlEncoding();
|
|
356
|
+
if (!encoding || XMLString::stringLen(encoding) == 0) {
|
|
357
|
+
// Default to UTF-8 if no encoding is specified
|
|
358
|
+
return rb_str_new_cstr("UTF-8");
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
CharStr utf8_encoding(encoding);
|
|
362
|
+
return rb_str_new_cstr(utf8_encoding.localForm());
|
|
363
|
+
}
|
|
364
|
+
|
|
322
365
|
// document.create_element(name)
|
|
323
366
|
static VALUE document_create_element(VALUE self, VALUE name) {
|
|
324
367
|
DocumentWrapper* doc_wrapper;
|
|
@@ -354,6 +397,107 @@ static VALUE document_create_element(VALUE self, VALUE name) {
|
|
|
354
397
|
return Qnil;
|
|
355
398
|
}
|
|
356
399
|
|
|
400
|
+
#ifdef HAVE_XALAN
|
|
401
|
+
// Helper function to execute XPath using Xalan for full XPath 1.0 support
|
|
402
|
+
static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
|
|
403
|
+
try {
|
|
404
|
+
// Initialize Xalan if needed
|
|
405
|
+
if (!xalan_initialized) {
|
|
406
|
+
XPathEvaluator::initialize();
|
|
407
|
+
XMLPlatformUtils::Initialize();
|
|
408
|
+
xalan_initialized = true;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Get the document
|
|
412
|
+
DOMDocument* domDoc = context_node->getOwnerDocument();
|
|
413
|
+
if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
|
|
414
|
+
domDoc = static_cast<DOMDocument*>(context_node);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (!domDoc) {
|
|
418
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
419
|
+
wrapper->nodes_array = rb_ary_new();
|
|
420
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Create Xalan support objects
|
|
424
|
+
XercesParserLiaison liaison;
|
|
425
|
+
XercesDOMSupport domSupport(liaison);
|
|
426
|
+
|
|
427
|
+
// Create Xalan document - this creates and returns a XercesDocumentWrapper
|
|
428
|
+
XalanDocument* xalanDoc = liaison.createDocument(domDoc, false, false, false);
|
|
429
|
+
if (!xalanDoc) {
|
|
430
|
+
rb_raise(rb_eRuntimeError, "Failed to create Xalan document wrapper");
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// The document IS the wrapper
|
|
434
|
+
XercesDocumentWrapper* docWrapper = static_cast<XercesDocumentWrapper*>(xalanDoc);
|
|
435
|
+
|
|
436
|
+
// Map the context node to Xalan
|
|
437
|
+
XalanNode* xalanContextNode = docWrapper->mapNode(context_node);
|
|
438
|
+
if (!xalanContextNode) {
|
|
439
|
+
xalanContextNode = docWrapper;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Set up XPath factories and contexts
|
|
443
|
+
XPathEnvSupportDefault envSupport;
|
|
444
|
+
XObjectFactoryDefault objectFactory;
|
|
445
|
+
XPathExecutionContextDefault executionContext(envSupport, domSupport, objectFactory);
|
|
446
|
+
XPathConstructionContextDefault constructionContext;
|
|
447
|
+
XPathFactoryDefault factory;
|
|
448
|
+
|
|
449
|
+
// Create XPath
|
|
450
|
+
XPathProcessorImpl processor;
|
|
451
|
+
XPath* xpath = factory.create();
|
|
452
|
+
|
|
453
|
+
// Compile XPath expression
|
|
454
|
+
ElementPrefixResolverProxy resolver(docWrapper->getDocumentElement(), envSupport, domSupport);
|
|
455
|
+
processor.initXPath(*xpath, constructionContext, XalanDOMString(xpath_str), resolver);
|
|
456
|
+
|
|
457
|
+
// Execute XPath query
|
|
458
|
+
const XObjectPtr result = xpath->execute(xalanContextNode, resolver, executionContext);
|
|
459
|
+
|
|
460
|
+
VALUE nodes_array = rb_ary_new();
|
|
461
|
+
|
|
462
|
+
if (result.get() != 0) {
|
|
463
|
+
// Check if result is a node set
|
|
464
|
+
const NodeRefListBase& nodeList = result->nodeset();
|
|
465
|
+
const NodeRefListBase::size_type length = nodeList.getLength();
|
|
466
|
+
|
|
467
|
+
for (NodeRefListBase::size_type i = 0; i < length; ++i) {
|
|
468
|
+
XalanNode* xalanNode = nodeList.item(i);
|
|
469
|
+
if (xalanNode) {
|
|
470
|
+
// Map back to Xerces DOM node
|
|
471
|
+
const DOMNode* domNode = docWrapper->mapNode(xalanNode);
|
|
472
|
+
if (domNode) {
|
|
473
|
+
rb_ary_push(nodes_array, wrap_node(const_cast<DOMNode*>(domNode), doc_ref));
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
factory.returnObject(xpath);
|
|
480
|
+
|
|
481
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
482
|
+
wrapper->nodes_array = nodes_array;
|
|
483
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
484
|
+
|
|
485
|
+
} catch (const XalanXPathException& e) {
|
|
486
|
+
CharStr msg(e.getMessage().c_str());
|
|
487
|
+
rb_raise(rb_eRuntimeError, "XPath error: %s", msg.localForm());
|
|
488
|
+
} catch (const XMLException& e) {
|
|
489
|
+
CharStr message(e.getMessage());
|
|
490
|
+
rb_raise(rb_eRuntimeError, "XML error: %s", message.localForm());
|
|
491
|
+
} catch (...) {
|
|
492
|
+
rb_raise(rb_eRuntimeError, "Unknown XPath error");
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
496
|
+
wrapper->nodes_array = rb_ary_new();
|
|
497
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
498
|
+
}
|
|
499
|
+
#endif
|
|
500
|
+
|
|
357
501
|
// document.xpath(path)
|
|
358
502
|
static VALUE document_xpath(VALUE self, VALUE path) {
|
|
359
503
|
DocumentWrapper* doc_wrapper;
|
|
@@ -368,6 +512,17 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
368
512
|
Check_Type(path, T_STRING);
|
|
369
513
|
const char* xpath_str = StringValueCStr(path);
|
|
370
514
|
|
|
515
|
+
#ifdef HAVE_XALAN
|
|
516
|
+
// Use Xalan for full XPath 1.0 support
|
|
517
|
+
DOMElement* root = doc_wrapper->doc->getDocumentElement();
|
|
518
|
+
if (!root) {
|
|
519
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
520
|
+
wrapper->nodes_array = rb_ary_new();
|
|
521
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
522
|
+
}
|
|
523
|
+
return execute_xpath_with_xalan(root, xpath_str, self);
|
|
524
|
+
#else
|
|
525
|
+
// Fall back to Xerces XPath subset
|
|
371
526
|
try {
|
|
372
527
|
DOMElement* root = doc_wrapper->doc->getDocumentElement();
|
|
373
528
|
if (!root) {
|
|
@@ -417,6 +572,7 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
417
572
|
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
418
573
|
wrapper->nodes_array = rb_ary_new();
|
|
419
574
|
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
575
|
+
#endif
|
|
420
576
|
}
|
|
421
577
|
|
|
422
578
|
// node.name
|
|
@@ -434,6 +590,24 @@ static VALUE node_name(VALUE self) {
|
|
|
434
590
|
return rb_str_new_cstr(utf8_name.localForm());
|
|
435
591
|
}
|
|
436
592
|
|
|
593
|
+
// node.namespace
|
|
594
|
+
static VALUE node_namespace(VALUE self) {
|
|
595
|
+
NodeWrapper* wrapper;
|
|
596
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
597
|
+
|
|
598
|
+
if (!wrapper->node) {
|
|
599
|
+
return Qnil;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
const XMLCh* namespaceURI = wrapper->node->getNamespaceURI();
|
|
603
|
+
if (!namespaceURI || XMLString::stringLen(namespaceURI) == 0) {
|
|
604
|
+
return Qnil;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
CharStr utf8_namespace(namespaceURI);
|
|
608
|
+
return rb_str_new_cstr(utf8_namespace.localForm());
|
|
609
|
+
}
|
|
610
|
+
|
|
437
611
|
// node.text / node.content
|
|
438
612
|
static VALUE node_text(VALUE self) {
|
|
439
613
|
NodeWrapper* wrapper;
|
|
@@ -880,6 +1054,11 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
880
1054
|
const char* xpath_str = StringValueCStr(path);
|
|
881
1055
|
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
882
1056
|
|
|
1057
|
+
#ifdef HAVE_XALAN
|
|
1058
|
+
// Use Xalan for full XPath 1.0 support
|
|
1059
|
+
return execute_xpath_with_xalan(node_wrapper->node, xpath_str, doc_ref);
|
|
1060
|
+
#else
|
|
1061
|
+
// Fall back to Xerces XPath subset
|
|
883
1062
|
try {
|
|
884
1063
|
DOMDocument* doc = node_wrapper->node->getOwnerDocument();
|
|
885
1064
|
if (!doc) {
|
|
@@ -929,6 +1108,26 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
929
1108
|
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
930
1109
|
wrapper->nodes_array = rb_ary_new();
|
|
931
1110
|
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
1111
|
+
#endif
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
// node.at_xpath(path) - returns first matching node or nil
|
|
1115
|
+
static VALUE node_at_xpath(VALUE self, VALUE path) {
|
|
1116
|
+
VALUE nodeset = node_xpath(self, path);
|
|
1117
|
+
NodeSetWrapper* wrapper;
|
|
1118
|
+
TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1119
|
+
|
|
1120
|
+
if (RARRAY_LEN(wrapper->nodes_array) == 0) {
|
|
1121
|
+
return Qnil;
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
// node.css(selector) - CSS selectors not supported
|
|
1128
|
+
static VALUE node_css(VALUE self, VALUE selector) {
|
|
1129
|
+
rb_raise(rb_eNotImpError, "CSS selectors are not supported. Use xpath() instead. Xerces-C only supports XPath queries.");
|
|
1130
|
+
return Qnil;
|
|
932
1131
|
}
|
|
933
1132
|
|
|
934
1133
|
// nodeset.length / nodeset.size
|
|
@@ -1162,17 +1361,19 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1162
1361
|
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
|
|
1163
1362
|
rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
|
|
1164
1363
|
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
1165
|
-
|
|
1364
|
+
rb_define_alias(rb_cDocument, "to_xml", "to_s");
|
|
1166
1365
|
rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
|
|
1366
|
+
rb_define_method(rb_cDocument, "encoding", RUBY_METHOD_FUNC(document_encoding), 0);
|
|
1167
1367
|
rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
|
|
1168
1368
|
|
|
1169
1369
|
rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
|
|
1170
1370
|
rb_undef_alloc_func(rb_cNode);
|
|
1171
1371
|
rb_define_method(rb_cNode, "name", RUBY_METHOD_FUNC(node_name), 0);
|
|
1372
|
+
rb_define_method(rb_cNode, "namespace", RUBY_METHOD_FUNC(node_namespace), 0);
|
|
1172
1373
|
rb_define_method(rb_cNode, "text", RUBY_METHOD_FUNC(node_text), 0);
|
|
1173
|
-
|
|
1374
|
+
rb_define_alias(rb_cNode, "content", "text");
|
|
1174
1375
|
rb_define_method(rb_cNode, "text=", RUBY_METHOD_FUNC(node_text_set), 1);
|
|
1175
|
-
|
|
1376
|
+
rb_define_alias(rb_cNode, "content=", "text=");
|
|
1176
1377
|
rb_define_method(rb_cNode, "[]", RUBY_METHOD_FUNC(node_get_attribute), 1);
|
|
1177
1378
|
rb_define_method(rb_cNode, "[]=", RUBY_METHOD_FUNC(node_set_attribute), 2);
|
|
1178
1379
|
rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
|
|
@@ -1182,12 +1383,16 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1182
1383
|
rb_define_method(rb_cNode, "previous_sibling", RUBY_METHOD_FUNC(node_previous_sibling), 0);
|
|
1183
1384
|
rb_define_method(rb_cNode, "add_child", RUBY_METHOD_FUNC(node_add_child), 1);
|
|
1184
1385
|
rb_define_method(rb_cNode, "remove", RUBY_METHOD_FUNC(node_remove), 0);
|
|
1185
|
-
|
|
1386
|
+
rb_define_alias(rb_cNode, "unlink", "remove");
|
|
1186
1387
|
rb_define_method(rb_cNode, "inner_html", RUBY_METHOD_FUNC(node_inner_html), 0);
|
|
1187
|
-
|
|
1388
|
+
rb_define_alias(rb_cNode, "inner_xml", "inner_html");
|
|
1188
1389
|
rb_define_method(rb_cNode, "path", RUBY_METHOD_FUNC(node_path), 0);
|
|
1189
1390
|
rb_define_method(rb_cNode, "blank?", RUBY_METHOD_FUNC(node_blank_p), 0);
|
|
1190
1391
|
rb_define_method(rb_cNode, "xpath", RUBY_METHOD_FUNC(node_xpath), 1);
|
|
1392
|
+
rb_define_alias(rb_cNode, "search", "xpath");
|
|
1393
|
+
rb_define_method(rb_cNode, "at_xpath", RUBY_METHOD_FUNC(node_at_xpath), 1);
|
|
1394
|
+
rb_define_alias(rb_cNode, "at", "at_xpath");
|
|
1395
|
+
rb_define_method(rb_cNode, "css", RUBY_METHOD_FUNC(node_css), 1);
|
|
1191
1396
|
|
|
1192
1397
|
rb_cElement = rb_define_class_under(rb_mXML, "Element", rb_cNode);
|
|
1193
1398
|
rb_undef_alloc_func(rb_cElement);
|
|
@@ -1198,7 +1403,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1198
1403
|
rb_cNodeSet = rb_define_class_under(rb_mXML, "NodeSet", rb_cObject);
|
|
1199
1404
|
rb_undef_alloc_func(rb_cNodeSet);
|
|
1200
1405
|
rb_define_method(rb_cNodeSet, "length", RUBY_METHOD_FUNC(nodeset_length), 0);
|
|
1201
|
-
|
|
1406
|
+
rb_define_alias(rb_cNodeSet, "size", "length");
|
|
1202
1407
|
rb_define_method(rb_cNodeSet, "[]", RUBY_METHOD_FUNC(nodeset_at), 1);
|
|
1203
1408
|
rb_define_method(rb_cNodeSet, "each", RUBY_METHOD_FUNC(nodeset_each), 0);
|
|
1204
1409
|
rb_define_method(rb_cNodeSet, "to_a", RUBY_METHOD_FUNC(nodeset_to_a), 0);
|
data/lib/rxerces/version.rb
CHANGED
data/rxerces.gemspec
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Gem::Specification.new do |spec|
|
|
2
2
|
spec.name = "rxerces"
|
|
3
|
-
spec.version = "0.
|
|
3
|
+
spec.version = "0.4.0"
|
|
4
4
|
spec.author = "Daniel J. Berger"
|
|
5
5
|
spec.email = "djberg96@gmail.com"
|
|
6
6
|
spec.cert_chain = ["certs/djberg96_pub.pem"]
|
|
@@ -17,7 +17,10 @@ Gem::Specification.new do |spec|
|
|
|
17
17
|
spec.add_development_dependency "rake-compiler", "~> 1.2"
|
|
18
18
|
spec.add_development_dependency "rspec", "~> 3.12"
|
|
19
19
|
|
|
20
|
-
spec.description =
|
|
20
|
+
spec.description = <<-EOF
|
|
21
|
+
A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
|
|
22
|
+
instead of libxml2. It also optionally uses Xalan for Xpath 1.0 compliance.
|
|
23
|
+
EOF
|
|
21
24
|
|
|
22
25
|
spec.metadata = {
|
|
23
26
|
'homepage_uri' => 'https://github.com/djberg96/rxerces',
|
data/spec/document_spec.rb
CHANGED
|
@@ -64,6 +64,25 @@ RSpec.describe RXerces::XML::Document do
|
|
|
64
64
|
end
|
|
65
65
|
end
|
|
66
66
|
|
|
67
|
+
describe "#encoding" do
|
|
68
|
+
it "returns UTF-8 for documents without explicit encoding" do
|
|
69
|
+
doc = RXerces::XML::Document.parse(simple_xml)
|
|
70
|
+
expect(doc.encoding).to eq('UTF-8')
|
|
71
|
+
end
|
|
72
|
+
|
|
73
|
+
it "returns the encoding specified in the XML declaration" do
|
|
74
|
+
xml_with_encoding = '<?xml version="1.0" encoding="ISO-8859-1"?><root><item>Test</item></root>'
|
|
75
|
+
doc = RXerces::XML::Document.parse(xml_with_encoding)
|
|
76
|
+
expect(doc.encoding).to eq('ISO-8859-1')
|
|
77
|
+
end
|
|
78
|
+
|
|
79
|
+
it "returns the encoding for UTF-16 documents" do
|
|
80
|
+
xml_with_encoding = '<?xml version="1.0" encoding="UTF-16"?><root><item>Test</item></root>'
|
|
81
|
+
doc = RXerces::XML::Document.parse(xml_with_encoding)
|
|
82
|
+
expect(doc.encoding).to eq('UTF-16')
|
|
83
|
+
end
|
|
84
|
+
end
|
|
85
|
+
|
|
67
86
|
describe "#create_element" do
|
|
68
87
|
let(:doc) { RXerces::XML::Document.parse(simple_xml) }
|
|
69
88
|
|
data/spec/node_spec.rb
CHANGED
|
@@ -29,6 +29,32 @@ RSpec.describe RXerces::XML::Node do
|
|
|
29
29
|
end
|
|
30
30
|
end
|
|
31
31
|
|
|
32
|
+
describe "#namespace" do
|
|
33
|
+
let(:ns_xml) do
|
|
34
|
+
<<-XML
|
|
35
|
+
<root xmlns="http://example.com/default">
|
|
36
|
+
<item>Default namespace</item>
|
|
37
|
+
</root>
|
|
38
|
+
XML
|
|
39
|
+
end
|
|
40
|
+
let(:ns_doc) { RXerces::XML::Document.parse(ns_xml) }
|
|
41
|
+
|
|
42
|
+
it "returns nil for nodes without a namespace" do
|
|
43
|
+
expect(root.namespace).to be_nil
|
|
44
|
+
end
|
|
45
|
+
|
|
46
|
+
it "returns the default namespace URI" do
|
|
47
|
+
ns_root = ns_doc.root
|
|
48
|
+
expect(ns_root.namespace).to eq('http://example.com/default')
|
|
49
|
+
end
|
|
50
|
+
|
|
51
|
+
it "returns the namespace for child elements inheriting default namespace" do
|
|
52
|
+
ns_root = ns_doc.root
|
|
53
|
+
item = ns_root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
54
|
+
expect(item.namespace).to eq('http://example.com/default')
|
|
55
|
+
end
|
|
56
|
+
end
|
|
57
|
+
|
|
32
58
|
describe "#text" do
|
|
33
59
|
it "returns text content" do
|
|
34
60
|
person = root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
@@ -469,4 +495,65 @@ RSpec.describe RXerces::XML::Node do
|
|
|
469
495
|
expect(text_node.blank?).to be true
|
|
470
496
|
end
|
|
471
497
|
end
|
|
498
|
+
|
|
499
|
+
describe "#search" do
|
|
500
|
+
it "is an alias for xpath" do
|
|
501
|
+
result1 = root.search('.//age')
|
|
502
|
+
result2 = root.xpath('.//age')
|
|
503
|
+
expect(result1.length).to eq(result2.length)
|
|
504
|
+
expect(result1.first.text).to eq(result2.first.text)
|
|
505
|
+
end
|
|
506
|
+
|
|
507
|
+
it "returns a NodeSet" do
|
|
508
|
+
result = root.search('.//person')
|
|
509
|
+
expect(result).to be_a(RXerces::XML::NodeSet)
|
|
510
|
+
end
|
|
511
|
+
|
|
512
|
+
it "finds nested elements" do
|
|
513
|
+
result = root.search('.//age')
|
|
514
|
+
expect(result.length).to eq(2)
|
|
515
|
+
expect(result.first.text).to eq('30')
|
|
516
|
+
end
|
|
517
|
+
end
|
|
518
|
+
|
|
519
|
+
describe "#at_xpath" do
|
|
520
|
+
it "returns the first matching node" do
|
|
521
|
+
result = root.at_xpath('.//age')
|
|
522
|
+
expect(result).to be_a(RXerces::XML::Element)
|
|
523
|
+
expect(result.text).to eq('30')
|
|
524
|
+
end
|
|
525
|
+
|
|
526
|
+
it "returns nil when no match found" do
|
|
527
|
+
result = root.at_xpath('.//nonexistent')
|
|
528
|
+
expect(result).to be_nil
|
|
529
|
+
end
|
|
530
|
+
|
|
531
|
+
it "returns only the first match when multiple exist" do
|
|
532
|
+
result = root.at_xpath('.//person')
|
|
533
|
+
expect(result['id']).to eq('1')
|
|
534
|
+
end
|
|
535
|
+
end
|
|
536
|
+
|
|
537
|
+
describe "#at" do
|
|
538
|
+
it "is an alias for at_xpath" do
|
|
539
|
+
result1 = root.at('.//age')
|
|
540
|
+
result2 = root.at_xpath('.//age')
|
|
541
|
+
expect(result1.text).to eq(result2.text)
|
|
542
|
+
end
|
|
543
|
+
|
|
544
|
+
it "returns the first matching element" do
|
|
545
|
+
result = root.at('.//city')
|
|
546
|
+
expect(result.text).to eq('New York')
|
|
547
|
+
end
|
|
548
|
+
end
|
|
549
|
+
|
|
550
|
+
describe "#css" do
|
|
551
|
+
it "raises NotImplementedError for CSS selectors" do
|
|
552
|
+
expect { root.css('div.class') }.to raise_error(NotImplementedError, /CSS selectors are not supported/)
|
|
553
|
+
end
|
|
554
|
+
|
|
555
|
+
it "suggests using xpath instead" do
|
|
556
|
+
expect { root.css('p') }.to raise_error(NotImplementedError, /Use xpath/)
|
|
557
|
+
end
|
|
558
|
+
end
|
|
472
559
|
end
|
data/spec/rxerces_shared.rb
CHANGED
data/spec/xpath_spec.rb
CHANGED
|
@@ -141,24 +141,258 @@ RSpec.describe "XPath support" do
|
|
|
141
141
|
end
|
|
142
142
|
end
|
|
143
143
|
|
|
144
|
-
describe "XPath
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
#
|
|
148
|
-
|
|
149
|
-
|
|
150
|
-
|
|
151
|
-
|
|
152
|
-
|
|
153
|
-
|
|
154
|
-
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
144
|
+
describe "XPath 1.0 compliance with Xalan" do
|
|
145
|
+
# Check if Xalan support is compiled in
|
|
146
|
+
xalan_available = begin
|
|
147
|
+
# Try a feature that only works with Xalan (attribute predicates)
|
|
148
|
+
test_xml = '<root><item id="1">A</item><item id="2">B</item></root>'
|
|
149
|
+
test_doc = RXerces::XML::Document.parse(test_xml)
|
|
150
|
+
result = test_doc.xpath('//item[@id="1"]')
|
|
151
|
+
result.length == 1
|
|
152
|
+
rescue
|
|
153
|
+
false
|
|
154
|
+
end
|
|
155
|
+
|
|
156
|
+
before(:all) do
|
|
157
|
+
unless xalan_available
|
|
158
|
+
skip "Xalan-C not available - XPath 1.0 features require Xalan-C library"
|
|
159
|
+
end
|
|
160
|
+
end
|
|
161
|
+
|
|
162
|
+
describe "Attribute predicates" do
|
|
163
|
+
it "finds elements by attribute value" do
|
|
164
|
+
book = doc.xpath('//book[@id="1"]')
|
|
165
|
+
expect(book.length).to eq(1)
|
|
166
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
167
|
+
end
|
|
168
|
+
|
|
169
|
+
it "finds elements by attribute equality" do
|
|
170
|
+
fiction_books = doc.xpath('//book[@category="fiction"]')
|
|
171
|
+
expect(fiction_books.length).to eq(2)
|
|
172
|
+
end
|
|
173
|
+
|
|
174
|
+
it "finds elements by attribute inequality" do
|
|
175
|
+
non_fiction = doc.xpath('//book[@category!="fiction"]')
|
|
176
|
+
expect(non_fiction.length).to eq(1)
|
|
177
|
+
expect(non_fiction[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
|
|
178
|
+
end
|
|
179
|
+
|
|
180
|
+
it "supports multiple attribute predicates" do
|
|
181
|
+
book = doc.xpath('//book[@id="2"][@category="fiction"]')
|
|
182
|
+
expect(book.length).to eq(1)
|
|
183
|
+
expect(book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
184
|
+
end
|
|
185
|
+
end
|
|
186
|
+
|
|
187
|
+
describe "Position and indexing functions" do
|
|
188
|
+
it "uses position() to find first element" do
|
|
189
|
+
first_book = doc.xpath('//book[position()=1]')
|
|
190
|
+
expect(first_book.length).to eq(1)
|
|
191
|
+
expect(first_book[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
192
|
+
end
|
|
193
|
+
|
|
194
|
+
it "uses last() to find last element" do
|
|
195
|
+
last_book = doc.xpath('//book[position()=last()]')
|
|
196
|
+
expect(last_book.length).to eq(1)
|
|
197
|
+
expect(last_book[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
|
|
198
|
+
end
|
|
199
|
+
|
|
200
|
+
it "uses numeric predicates for indexing" do
|
|
201
|
+
second_book = doc.xpath('//book[2]')
|
|
202
|
+
expect(second_book.length).to eq(1)
|
|
203
|
+
expect(second_book[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
204
|
+
end
|
|
205
|
+
|
|
206
|
+
it "finds elements by position greater than" do
|
|
207
|
+
later_books = doc.xpath('//book[position()>1]')
|
|
208
|
+
expect(later_books.length).to eq(2)
|
|
209
|
+
end
|
|
210
|
+
end
|
|
211
|
+
|
|
212
|
+
describe "String functions" do
|
|
213
|
+
it "uses contains() function" do
|
|
214
|
+
books_with_new = doc.xpath('//book[contains(.//title, "New")]')
|
|
215
|
+
expect(books_with_new.length).to eq(1)
|
|
216
|
+
expect(books_with_new[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
217
|
+
end
|
|
218
|
+
|
|
219
|
+
it "uses starts-with() function" do
|
|
220
|
+
books_starting_with_1 = doc.xpath('//book[starts-with(.//title, "1")]')
|
|
221
|
+
expect(books_starting_with_1.length).to eq(1)
|
|
222
|
+
expect(books_starting_with_1[0].xpath('.//title')[0].text.strip).to eq('1984')
|
|
223
|
+
end
|
|
224
|
+
|
|
225
|
+
it "uses normalize-space() function" do
|
|
226
|
+
# Should find titles even with whitespace differences
|
|
227
|
+
result = doc.xpath('//title[normalize-space()="1984"]')
|
|
228
|
+
expect(result.length).to eq(1)
|
|
229
|
+
end
|
|
230
|
+
|
|
231
|
+
it "uses string-length() function" do
|
|
232
|
+
# Find books where title length is less than 10 characters
|
|
233
|
+
short_titles = doc.xpath('//book[string-length(.//title) < 10]')
|
|
234
|
+
expect(short_titles.length).to eq(2) # "1984" and "Sapiens"
|
|
235
|
+
end
|
|
236
|
+
|
|
237
|
+
it "uses concat() function" do
|
|
238
|
+
# This tests that concat works by checking if a book has matching text
|
|
239
|
+
# concat('19', '84') = '1984'
|
|
240
|
+
result = doc.xpath('//book[.//title = concat("19", "84")]')
|
|
241
|
+
expect(result.length).to eq(1)
|
|
242
|
+
end
|
|
243
|
+
|
|
244
|
+
it "uses substring() function" do
|
|
245
|
+
# Find books where first 5 chars of title is "Brave"
|
|
246
|
+
result = doc.xpath('//book[substring(.//title, 1, 5) = "Brave"]')
|
|
247
|
+
expect(result.length).to eq(1)
|
|
248
|
+
expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
249
|
+
end
|
|
250
|
+
end
|
|
251
|
+
|
|
252
|
+
describe "Numeric functions and comparisons" do
|
|
253
|
+
it "uses count() function" do
|
|
254
|
+
# Find library element that has exactly 3 book children
|
|
255
|
+
result = doc.xpath('/library[count(book) = 3]')
|
|
256
|
+
expect(result.length).to eq(1)
|
|
257
|
+
end
|
|
258
|
+
|
|
259
|
+
it "compares numeric values with >" do
|
|
260
|
+
expensive_books = doc.xpath('//book[.//price > 15]')
|
|
261
|
+
expect(expensive_books.length).to eq(2) # 15.99 and 18.99
|
|
262
|
+
end
|
|
263
|
+
|
|
264
|
+
it "compares numeric values with <" do
|
|
265
|
+
cheap_books = doc.xpath('//book[.//price < 16]')
|
|
266
|
+
expect(cheap_books.length).to eq(2) # 15.99 and 14.99
|
|
267
|
+
end
|
|
268
|
+
|
|
269
|
+
it "compares numeric values with >=" do
|
|
270
|
+
books_1950_or_later = doc.xpath('//book[.//year >= 1949]')
|
|
271
|
+
expect(books_1950_or_later.length).to eq(2) # 1949 and 2011
|
|
272
|
+
end
|
|
273
|
+
|
|
274
|
+
it "uses sum() function" do
|
|
275
|
+
# sum() returns a number, not a nodeset, so we can't call it directly
|
|
276
|
+
# Instead, test it within a predicate
|
|
277
|
+
result = doc.xpath('//library[sum(book/price) > 40]')
|
|
278
|
+
expect(result.length).to eq(1) # Total is 49.97
|
|
279
|
+
end
|
|
280
|
+
|
|
281
|
+
it "uses floor() function" do
|
|
282
|
+
# Find books where floor(price) = 15 (15.99 -> 15)
|
|
283
|
+
result = doc.xpath('//book[floor(.//price) = 15]')
|
|
284
|
+
expect(result.length).to eq(1)
|
|
285
|
+
end
|
|
286
|
+
|
|
287
|
+
it "uses ceiling() function" do
|
|
288
|
+
# Find books where ceiling(price) = 19 (18.99 -> 19)
|
|
289
|
+
result = doc.xpath('//book[ceiling(.//price) = 19]')
|
|
290
|
+
expect(result.length).to eq(1)
|
|
291
|
+
end
|
|
292
|
+
|
|
293
|
+
it "uses round() function" do
|
|
294
|
+
# Find books where round(price) = 15 (14.99 -> 15, 15.99 -> 16)
|
|
295
|
+
result = doc.xpath('//book[round(.//price) = 15]')
|
|
296
|
+
expect(result.length).to eq(1) # Only 14.99
|
|
297
|
+
end
|
|
298
|
+
end
|
|
299
|
+
|
|
300
|
+
describe "Boolean operators" do
|
|
301
|
+
it "uses 'and' operator" do
|
|
302
|
+
result = doc.xpath('//book[@category="fiction" and .//year < 1940]')
|
|
303
|
+
expect(result.length).to eq(1) # Only "Brave New World" (1932)
|
|
304
|
+
end
|
|
305
|
+
|
|
306
|
+
it "uses 'or' operator" do
|
|
307
|
+
result = doc.xpath('//book[@id="1" or @id="3"]')
|
|
308
|
+
expect(result.length).to eq(2)
|
|
309
|
+
end
|
|
310
|
+
|
|
311
|
+
it "uses 'not()' function" do
|
|
312
|
+
result = doc.xpath('//book[not(@category="fiction")]')
|
|
313
|
+
expect(result.length).to eq(1)
|
|
314
|
+
expect(result[0].xpath('.//title')[0].text.strip).to eq('Sapiens')
|
|
315
|
+
end
|
|
316
|
+
|
|
317
|
+
it "combines multiple boolean operators" do
|
|
318
|
+
result = doc.xpath('//book[@category="fiction" and .//price < 15.50]')
|
|
319
|
+
expect(result.length).to eq(1) # Only "Brave New World" (14.99)
|
|
320
|
+
end
|
|
321
|
+
end
|
|
322
|
+
|
|
323
|
+
describe "Axes" do
|
|
324
|
+
it "uses parent:: axis" do
|
|
325
|
+
# Find parent of first title
|
|
326
|
+
first_title = doc.xpath('//title[1]')
|
|
327
|
+
parent = first_title[0].xpath('parent::*')
|
|
328
|
+
expect(parent.length).to eq(1)
|
|
329
|
+
expect(parent[0].name).to eq('book')
|
|
330
|
+
end
|
|
331
|
+
|
|
332
|
+
it "uses ancestor:: axis" do
|
|
333
|
+
# Find all ancestors of a title element
|
|
334
|
+
first_title = doc.xpath('//title[1]')
|
|
335
|
+
ancestors = first_title[0].xpath('ancestor::*')
|
|
336
|
+
expect(ancestors.length).to eq(2) # book and library
|
|
337
|
+
end
|
|
338
|
+
|
|
339
|
+
it "uses following-sibling:: axis" do
|
|
340
|
+
# Find siblings after title
|
|
341
|
+
first_title = doc.xpath('//title[1]')
|
|
342
|
+
siblings = first_title[0].xpath('following-sibling::*')
|
|
343
|
+
expect(siblings.length).to eq(3) # author, year, price
|
|
344
|
+
end
|
|
345
|
+
|
|
346
|
+
it "uses preceding-sibling:: axis" do
|
|
347
|
+
# Find siblings before author
|
|
348
|
+
first_author = doc.xpath('//author[1]')
|
|
349
|
+
siblings = first_author[0].xpath('preceding-sibling::*')
|
|
350
|
+
expect(siblings.length).to eq(1) # title
|
|
351
|
+
end
|
|
352
|
+
|
|
353
|
+
it "uses descendant:: axis" do
|
|
354
|
+
root = doc.root
|
|
355
|
+
descendants = root.xpath('descendant::title')
|
|
356
|
+
expect(descendants.length).to eq(3)
|
|
357
|
+
end
|
|
358
|
+
|
|
359
|
+
it "uses self:: axis" do
|
|
360
|
+
books = doc.xpath('//book')
|
|
361
|
+
self_nodes = books[0].xpath('self::book')
|
|
362
|
+
expect(self_nodes.length).to eq(1)
|
|
363
|
+
end
|
|
364
|
+
end
|
|
365
|
+
|
|
366
|
+
describe "Complex predicates" do
|
|
367
|
+
it "chains multiple predicates" do
|
|
368
|
+
result = doc.xpath('//book[@category="fiction"][.//year > 1940]')
|
|
369
|
+
expect(result.length).to eq(1) # Only "1984" (1949)
|
|
370
|
+
end
|
|
371
|
+
|
|
372
|
+
it "uses nested predicates" do
|
|
373
|
+
result = doc.xpath('//library[book[@category="fiction"]]')
|
|
374
|
+
expect(result.length).to eq(1)
|
|
375
|
+
end
|
|
376
|
+
|
|
377
|
+
it "combines functions in predicates" do
|
|
378
|
+
result = doc.xpath('//book[contains(.//title, "World") and .//year < 1950]')
|
|
379
|
+
expect(result.length).to eq(1)
|
|
380
|
+
expect(result[0].xpath('.//title')[0].text.strip).to eq('Brave New World')
|
|
381
|
+
end
|
|
382
|
+
end
|
|
383
|
+
|
|
384
|
+
describe "Text nodes" do
|
|
385
|
+
it "selects text nodes with text()" do
|
|
386
|
+
text_nodes = doc.xpath('//title/text()')
|
|
387
|
+
expect(text_nodes.length).to eq(3)
|
|
388
|
+
end
|
|
389
|
+
|
|
390
|
+
it "uses text() in predicates" do
|
|
391
|
+
result = doc.xpath('//title[text()="1984"]')
|
|
392
|
+
# Note: text() returns the raw text which includes whitespace
|
|
393
|
+
# This might not match due to whitespace, so we test it doesn't error
|
|
394
|
+
expect { result }.not_to raise_error
|
|
395
|
+
end
|
|
162
396
|
end
|
|
163
397
|
end
|
|
164
398
|
end
|
data.tar.gz.sig
CHANGED
|
Binary file
|
metadata
CHANGED
|
@@ -1,7 +1,7 @@
|
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
|
2
2
|
name: rxerces
|
|
3
3
|
version: !ruby/object:Gem::Version
|
|
4
|
-
version: 0.
|
|
4
|
+
version: 0.4.0
|
|
5
5
|
platform: ruby
|
|
6
6
|
authors:
|
|
7
7
|
- Daniel J. Berger
|
|
@@ -78,8 +78,9 @@ dependencies:
|
|
|
78
78
|
- - "~>"
|
|
79
79
|
- !ruby/object:Gem::Version
|
|
80
80
|
version: '3.12'
|
|
81
|
-
description:
|
|
82
|
-
|
|
81
|
+
description: |2
|
|
82
|
+
A Ruby XML library with Nokogiri-compatible API, powered by Xerces-C
|
|
83
|
+
instead of libxml2. It also optionally uses Xalan for Xpath 1.0 compliance.
|
|
83
84
|
email: djberg96@gmail.com
|
|
84
85
|
executables: []
|
|
85
86
|
extensions:
|
metadata.gz.sig
CHANGED
|
Binary file
|