rxerces 0.2.0 → 0.4.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +20 -0
- data/README.md +26 -2
- data/examples/schema_example.rb +107 -0
- data/ext/rxerces/extconf.rb +42 -0
- data/ext/rxerces/rxerces.cpp +834 -7
- data/lib/rxerces/nokogiri.rb +1 -0
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +5 -2
- data/spec/document_spec.rb +54 -0
- data/spec/node_spec.rb +448 -0
- data/spec/nokogiri_compatibility_spec.rb +69 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/schema_spec.rb +76 -0
- data/spec/xpath_spec.rb +252 -18
- data.tar.gz.sig +0 -0
- metadata +7 -3
- metadata.gz.sig +0 -0
data/ext/rxerces/rxerces.cpp
CHANGED
|
@@ -8,9 +8,33 @@
|
|
|
8
8
|
#include <xercesc/util/XercesDefs.hpp>
|
|
9
9
|
#include <xercesc/dom/DOMXPathResult.hpp>
|
|
10
10
|
#include <xercesc/dom/DOMXPathExpression.hpp>
|
|
11
|
+
#include <xercesc/sax/ErrorHandler.hpp>
|
|
12
|
+
#include <xercesc/sax/SAXParseException.hpp>
|
|
11
13
|
#include <sstream>
|
|
14
|
+
#include <vector>
|
|
15
|
+
|
|
16
|
+
#ifdef HAVE_XALAN
|
|
17
|
+
#include <xalanc/XPath/XPathEvaluator.hpp>
|
|
18
|
+
#include <xalanc/XPath/NodeRefList.hpp>
|
|
19
|
+
#include <xalanc/XPath/XObject.hpp>
|
|
20
|
+
#include <xalanc/XPath/XObjectFactoryDefault.hpp>
|
|
21
|
+
#include <xalanc/XPath/XPathEnvSupportDefault.hpp>
|
|
22
|
+
#include <xalanc/XPath/XPathExecutionContextDefault.hpp>
|
|
23
|
+
#include <xalanc/XPath/XPathConstructionContextDefault.hpp>
|
|
24
|
+
#include <xalanc/XPath/ElementPrefixResolverProxy.hpp>
|
|
25
|
+
#include <xalanc/XPath/XPathFactoryDefault.hpp>
|
|
26
|
+
#include <xalanc/XPath/XPathProcessorImpl.hpp>
|
|
27
|
+
#include <xalanc/XPath/XPath.hpp>
|
|
28
|
+
#include <xalanc/XercesParserLiaison/XercesParserLiaison.hpp>
|
|
29
|
+
#include <xalanc/XercesParserLiaison/XercesDOMSupport.hpp>
|
|
30
|
+
#include <xalanc/XercesParserLiaison/XercesDocumentWrapper.hpp>
|
|
31
|
+
#include <xalanc/PlatformSupport/XalanMemoryManagerDefault.hpp>
|
|
32
|
+
#endif
|
|
12
33
|
|
|
13
34
|
using namespace xercesc;
|
|
35
|
+
#ifdef HAVE_XALAN
|
|
36
|
+
using namespace xalanc;
|
|
37
|
+
#endif
|
|
14
38
|
|
|
15
39
|
VALUE rb_mRXerces;
|
|
16
40
|
VALUE rb_mXML;
|
|
@@ -19,9 +43,13 @@ VALUE rb_cNode;
|
|
|
19
43
|
VALUE rb_cNodeSet;
|
|
20
44
|
VALUE rb_cElement;
|
|
21
45
|
VALUE rb_cText;
|
|
46
|
+
VALUE rb_cSchema;
|
|
22
47
|
|
|
23
|
-
//
|
|
48
|
+
// Initialization flags
|
|
24
49
|
static bool xerces_initialized = false;
|
|
50
|
+
#ifdef HAVE_XALAN
|
|
51
|
+
static bool xalan_initialized = false;
|
|
52
|
+
#endif
|
|
25
53
|
|
|
26
54
|
// Helper class to manage XMLCh strings
|
|
27
55
|
class XStr {
|
|
@@ -82,6 +110,39 @@ typedef struct {
|
|
|
82
110
|
VALUE nodes_array;
|
|
83
111
|
} NodeSetWrapper;
|
|
84
112
|
|
|
113
|
+
// Wrapper structure for Schema
|
|
114
|
+
typedef struct {
|
|
115
|
+
std::string* schemaContent;
|
|
116
|
+
} SchemaWrapper;
|
|
117
|
+
|
|
118
|
+
// Error handler for schema validation
|
|
119
|
+
class ValidationErrorHandler : public ErrorHandler {
|
|
120
|
+
public:
|
|
121
|
+
std::vector<std::string> errors;
|
|
122
|
+
|
|
123
|
+
void warning(const SAXParseException& e) {
|
|
124
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
125
|
+
errors.push_back(std::string("Warning: ") + msg);
|
|
126
|
+
XMLString::release(&msg);
|
|
127
|
+
}
|
|
128
|
+
|
|
129
|
+
void error(const SAXParseException& e) {
|
|
130
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
131
|
+
errors.push_back(std::string("Error: ") + msg);
|
|
132
|
+
XMLString::release(&msg);
|
|
133
|
+
}
|
|
134
|
+
|
|
135
|
+
void fatalError(const SAXParseException& e) {
|
|
136
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
137
|
+
errors.push_back(std::string("Fatal: ") + msg);
|
|
138
|
+
XMLString::release(&msg);
|
|
139
|
+
}
|
|
140
|
+
|
|
141
|
+
void resetErrors() {
|
|
142
|
+
errors.clear();
|
|
143
|
+
}
|
|
144
|
+
};
|
|
145
|
+
|
|
85
146
|
// Memory management functions
|
|
86
147
|
static void document_free(void* ptr) {
|
|
87
148
|
DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
|
|
@@ -109,6 +170,16 @@ static void nodeset_free(void* ptr) {
|
|
|
109
170
|
}
|
|
110
171
|
}
|
|
111
172
|
|
|
173
|
+
static void schema_free(void* ptr) {
|
|
174
|
+
SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
|
|
175
|
+
if (wrapper) {
|
|
176
|
+
if (wrapper->schemaContent) {
|
|
177
|
+
delete wrapper->schemaContent;
|
|
178
|
+
}
|
|
179
|
+
xfree(wrapper);
|
|
180
|
+
}
|
|
181
|
+
}
|
|
182
|
+
|
|
112
183
|
static size_t document_size(const void* ptr) {
|
|
113
184
|
return sizeof(DocumentWrapper);
|
|
114
185
|
}
|
|
@@ -121,6 +192,10 @@ static size_t nodeset_size(const void* ptr) {
|
|
|
121
192
|
return sizeof(NodeSetWrapper);
|
|
122
193
|
}
|
|
123
194
|
|
|
195
|
+
static size_t schema_size(const void* ptr) {
|
|
196
|
+
return sizeof(SchemaWrapper);
|
|
197
|
+
}
|
|
198
|
+
|
|
124
199
|
static const rb_data_type_t document_type = {
|
|
125
200
|
"RXerces::XML::Document",
|
|
126
201
|
{0, document_free, document_size},
|
|
@@ -142,6 +217,13 @@ static const rb_data_type_t nodeset_type = {
|
|
|
142
217
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
143
218
|
};
|
|
144
219
|
|
|
220
|
+
static const rb_data_type_t schema_type = {
|
|
221
|
+
"RXerces::XML::Schema",
|
|
222
|
+
{0, schema_free, schema_size},
|
|
223
|
+
0, 0,
|
|
224
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
225
|
+
};
|
|
226
|
+
|
|
145
227
|
// Helper to create Ruby Node object from DOMNode
|
|
146
228
|
static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
|
|
147
229
|
if (!node) {
|
|
@@ -188,7 +270,7 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
188
270
|
|
|
189
271
|
XercesDOMParser* parser = new XercesDOMParser();
|
|
190
272
|
parser->setValidationScheme(XercesDOMParser::Val_Never);
|
|
191
|
-
parser->setDoNamespaces(
|
|
273
|
+
parser->setDoNamespaces(true);
|
|
192
274
|
parser->setDoSchema(false);
|
|
193
275
|
|
|
194
276
|
try {
|
|
@@ -261,6 +343,161 @@ static VALUE document_to_s(VALUE self) {
|
|
|
261
343
|
return Qnil;
|
|
262
344
|
}
|
|
263
345
|
|
|
346
|
+
// document.encoding
|
|
347
|
+
static VALUE document_encoding(VALUE self) {
|
|
348
|
+
DocumentWrapper* wrapper;
|
|
349
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
350
|
+
|
|
351
|
+
if (!wrapper->doc) {
|
|
352
|
+
return Qnil;
|
|
353
|
+
}
|
|
354
|
+
|
|
355
|
+
const XMLCh* encoding = wrapper->doc->getXmlEncoding();
|
|
356
|
+
if (!encoding || XMLString::stringLen(encoding) == 0) {
|
|
357
|
+
// Default to UTF-8 if no encoding is specified
|
|
358
|
+
return rb_str_new_cstr("UTF-8");
|
|
359
|
+
}
|
|
360
|
+
|
|
361
|
+
CharStr utf8_encoding(encoding);
|
|
362
|
+
return rb_str_new_cstr(utf8_encoding.localForm());
|
|
363
|
+
}
|
|
364
|
+
|
|
365
|
+
// document.create_element(name)
|
|
366
|
+
static VALUE document_create_element(VALUE self, VALUE name) {
|
|
367
|
+
DocumentWrapper* doc_wrapper;
|
|
368
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, doc_wrapper);
|
|
369
|
+
|
|
370
|
+
if (!doc_wrapper->doc) {
|
|
371
|
+
rb_raise(rb_eRuntimeError, "Cannot create element on null document");
|
|
372
|
+
}
|
|
373
|
+
|
|
374
|
+
Check_Type(name, T_STRING);
|
|
375
|
+
const char* element_name = StringValueCStr(name);
|
|
376
|
+
|
|
377
|
+
try {
|
|
378
|
+
XMLCh* element_name_xml = XMLString::transcode(element_name);
|
|
379
|
+
DOMElement* element = doc_wrapper->doc->createElement(element_name_xml);
|
|
380
|
+
XMLString::release(&element_name_xml);
|
|
381
|
+
|
|
382
|
+
if (!element) {
|
|
383
|
+
rb_raise(rb_eRuntimeError, "Failed to create element");
|
|
384
|
+
}
|
|
385
|
+
|
|
386
|
+
return wrap_node(element, self);
|
|
387
|
+
|
|
388
|
+
} catch (const DOMException& e) {
|
|
389
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
390
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
391
|
+
XMLString::release(&message);
|
|
392
|
+
rb_raise(rb_eRuntimeError, "Failed to create element: %s", StringValueCStr(rb_error));
|
|
393
|
+
} catch (...) {
|
|
394
|
+
rb_raise(rb_eRuntimeError, "Unknown error creating element");
|
|
395
|
+
}
|
|
396
|
+
|
|
397
|
+
return Qnil;
|
|
398
|
+
}
|
|
399
|
+
|
|
400
|
+
#ifdef HAVE_XALAN
|
|
401
|
+
// Helper function to execute XPath using Xalan for full XPath 1.0 support
|
|
402
|
+
static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
|
|
403
|
+
try {
|
|
404
|
+
// Initialize Xalan if needed
|
|
405
|
+
if (!xalan_initialized) {
|
|
406
|
+
XPathEvaluator::initialize();
|
|
407
|
+
XMLPlatformUtils::Initialize();
|
|
408
|
+
xalan_initialized = true;
|
|
409
|
+
}
|
|
410
|
+
|
|
411
|
+
// Get the document
|
|
412
|
+
DOMDocument* domDoc = context_node->getOwnerDocument();
|
|
413
|
+
if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
|
|
414
|
+
domDoc = static_cast<DOMDocument*>(context_node);
|
|
415
|
+
}
|
|
416
|
+
|
|
417
|
+
if (!domDoc) {
|
|
418
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
419
|
+
wrapper->nodes_array = rb_ary_new();
|
|
420
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
421
|
+
}
|
|
422
|
+
|
|
423
|
+
// Create Xalan support objects
|
|
424
|
+
XercesParserLiaison liaison;
|
|
425
|
+
XercesDOMSupport domSupport(liaison);
|
|
426
|
+
|
|
427
|
+
// Create Xalan document - this creates and returns a XercesDocumentWrapper
|
|
428
|
+
XalanDocument* xalanDoc = liaison.createDocument(domDoc, false, false, false);
|
|
429
|
+
if (!xalanDoc) {
|
|
430
|
+
rb_raise(rb_eRuntimeError, "Failed to create Xalan document wrapper");
|
|
431
|
+
}
|
|
432
|
+
|
|
433
|
+
// The document IS the wrapper
|
|
434
|
+
XercesDocumentWrapper* docWrapper = static_cast<XercesDocumentWrapper*>(xalanDoc);
|
|
435
|
+
|
|
436
|
+
// Map the context node to Xalan
|
|
437
|
+
XalanNode* xalanContextNode = docWrapper->mapNode(context_node);
|
|
438
|
+
if (!xalanContextNode) {
|
|
439
|
+
xalanContextNode = docWrapper;
|
|
440
|
+
}
|
|
441
|
+
|
|
442
|
+
// Set up XPath factories and contexts
|
|
443
|
+
XPathEnvSupportDefault envSupport;
|
|
444
|
+
XObjectFactoryDefault objectFactory;
|
|
445
|
+
XPathExecutionContextDefault executionContext(envSupport, domSupport, objectFactory);
|
|
446
|
+
XPathConstructionContextDefault constructionContext;
|
|
447
|
+
XPathFactoryDefault factory;
|
|
448
|
+
|
|
449
|
+
// Create XPath
|
|
450
|
+
XPathProcessorImpl processor;
|
|
451
|
+
XPath* xpath = factory.create();
|
|
452
|
+
|
|
453
|
+
// Compile XPath expression
|
|
454
|
+
ElementPrefixResolverProxy resolver(docWrapper->getDocumentElement(), envSupport, domSupport);
|
|
455
|
+
processor.initXPath(*xpath, constructionContext, XalanDOMString(xpath_str), resolver);
|
|
456
|
+
|
|
457
|
+
// Execute XPath query
|
|
458
|
+
const XObjectPtr result = xpath->execute(xalanContextNode, resolver, executionContext);
|
|
459
|
+
|
|
460
|
+
VALUE nodes_array = rb_ary_new();
|
|
461
|
+
|
|
462
|
+
if (result.get() != 0) {
|
|
463
|
+
// Check if result is a node set
|
|
464
|
+
const NodeRefListBase& nodeList = result->nodeset();
|
|
465
|
+
const NodeRefListBase::size_type length = nodeList.getLength();
|
|
466
|
+
|
|
467
|
+
for (NodeRefListBase::size_type i = 0; i < length; ++i) {
|
|
468
|
+
XalanNode* xalanNode = nodeList.item(i);
|
|
469
|
+
if (xalanNode) {
|
|
470
|
+
// Map back to Xerces DOM node
|
|
471
|
+
const DOMNode* domNode = docWrapper->mapNode(xalanNode);
|
|
472
|
+
if (domNode) {
|
|
473
|
+
rb_ary_push(nodes_array, wrap_node(const_cast<DOMNode*>(domNode), doc_ref));
|
|
474
|
+
}
|
|
475
|
+
}
|
|
476
|
+
}
|
|
477
|
+
}
|
|
478
|
+
|
|
479
|
+
factory.returnObject(xpath);
|
|
480
|
+
|
|
481
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
482
|
+
wrapper->nodes_array = nodes_array;
|
|
483
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
484
|
+
|
|
485
|
+
} catch (const XalanXPathException& e) {
|
|
486
|
+
CharStr msg(e.getMessage().c_str());
|
|
487
|
+
rb_raise(rb_eRuntimeError, "XPath error: %s", msg.localForm());
|
|
488
|
+
} catch (const XMLException& e) {
|
|
489
|
+
CharStr message(e.getMessage());
|
|
490
|
+
rb_raise(rb_eRuntimeError, "XML error: %s", message.localForm());
|
|
491
|
+
} catch (...) {
|
|
492
|
+
rb_raise(rb_eRuntimeError, "Unknown XPath error");
|
|
493
|
+
}
|
|
494
|
+
|
|
495
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
496
|
+
wrapper->nodes_array = rb_ary_new();
|
|
497
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
498
|
+
}
|
|
499
|
+
#endif
|
|
500
|
+
|
|
264
501
|
// document.xpath(path)
|
|
265
502
|
static VALUE document_xpath(VALUE self, VALUE path) {
|
|
266
503
|
DocumentWrapper* doc_wrapper;
|
|
@@ -275,6 +512,17 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
275
512
|
Check_Type(path, T_STRING);
|
|
276
513
|
const char* xpath_str = StringValueCStr(path);
|
|
277
514
|
|
|
515
|
+
#ifdef HAVE_XALAN
|
|
516
|
+
// Use Xalan for full XPath 1.0 support
|
|
517
|
+
DOMElement* root = doc_wrapper->doc->getDocumentElement();
|
|
518
|
+
if (!root) {
|
|
519
|
+
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
520
|
+
wrapper->nodes_array = rb_ary_new();
|
|
521
|
+
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
522
|
+
}
|
|
523
|
+
return execute_xpath_with_xalan(root, xpath_str, self);
|
|
524
|
+
#else
|
|
525
|
+
// Fall back to Xerces XPath subset
|
|
278
526
|
try {
|
|
279
527
|
DOMElement* root = doc_wrapper->doc->getDocumentElement();
|
|
280
528
|
if (!root) {
|
|
@@ -324,6 +572,7 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
324
572
|
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
325
573
|
wrapper->nodes_array = rb_ary_new();
|
|
326
574
|
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
575
|
+
#endif
|
|
327
576
|
}
|
|
328
577
|
|
|
329
578
|
// node.name
|
|
@@ -341,6 +590,24 @@ static VALUE node_name(VALUE self) {
|
|
|
341
590
|
return rb_str_new_cstr(utf8_name.localForm());
|
|
342
591
|
}
|
|
343
592
|
|
|
593
|
+
// node.namespace
|
|
594
|
+
static VALUE node_namespace(VALUE self) {
|
|
595
|
+
NodeWrapper* wrapper;
|
|
596
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
597
|
+
|
|
598
|
+
if (!wrapper->node) {
|
|
599
|
+
return Qnil;
|
|
600
|
+
}
|
|
601
|
+
|
|
602
|
+
const XMLCh* namespaceURI = wrapper->node->getNamespaceURI();
|
|
603
|
+
if (!namespaceURI || XMLString::stringLen(namespaceURI) == 0) {
|
|
604
|
+
return Qnil;
|
|
605
|
+
}
|
|
606
|
+
|
|
607
|
+
CharStr utf8_namespace(namespaceURI);
|
|
608
|
+
return rb_str_new_cstr(utf8_namespace.localForm());
|
|
609
|
+
}
|
|
610
|
+
|
|
344
611
|
// node.text / node.content
|
|
345
612
|
static VALUE node_text(VALUE self) {
|
|
346
613
|
NodeWrapper* wrapper;
|
|
@@ -443,6 +710,335 @@ static VALUE node_children(VALUE self) {
|
|
|
443
710
|
return children;
|
|
444
711
|
}
|
|
445
712
|
|
|
713
|
+
// node.parent
|
|
714
|
+
static VALUE node_parent(VALUE self) {
|
|
715
|
+
NodeWrapper* wrapper;
|
|
716
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
717
|
+
|
|
718
|
+
if (!wrapper->node) {
|
|
719
|
+
return Qnil;
|
|
720
|
+
}
|
|
721
|
+
|
|
722
|
+
DOMNode* parent = wrapper->node->getParentNode();
|
|
723
|
+
if (!parent) {
|
|
724
|
+
return Qnil;
|
|
725
|
+
}
|
|
726
|
+
|
|
727
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
728
|
+
return wrap_node(parent, doc_ref);
|
|
729
|
+
}
|
|
730
|
+
|
|
731
|
+
// node.attributes - returns hash of all attributes (only for element nodes)
|
|
732
|
+
static VALUE node_attributes(VALUE self) {
|
|
733
|
+
NodeWrapper* wrapper;
|
|
734
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
735
|
+
|
|
736
|
+
if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
737
|
+
return rb_hash_new();
|
|
738
|
+
}
|
|
739
|
+
|
|
740
|
+
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
741
|
+
DOMNamedNodeMap* attributes = element->getAttributes();
|
|
742
|
+
|
|
743
|
+
if (!attributes) {
|
|
744
|
+
return rb_hash_new();
|
|
745
|
+
}
|
|
746
|
+
|
|
747
|
+
VALUE hash = rb_hash_new();
|
|
748
|
+
XMLSize_t length = attributes->getLength();
|
|
749
|
+
|
|
750
|
+
for (XMLSize_t i = 0; i < length; i++) {
|
|
751
|
+
DOMNode* attr = attributes->item(i);
|
|
752
|
+
if (attr) {
|
|
753
|
+
const XMLCh* name = attr->getNodeName();
|
|
754
|
+
const XMLCh* value = attr->getNodeValue();
|
|
755
|
+
|
|
756
|
+
CharStr attr_name(name);
|
|
757
|
+
CharStr attr_value(value);
|
|
758
|
+
|
|
759
|
+
rb_hash_aset(hash,
|
|
760
|
+
rb_str_new_cstr(attr_name.localForm()),
|
|
761
|
+
rb_str_new_cstr(attr_value.localForm()));
|
|
762
|
+
}
|
|
763
|
+
}
|
|
764
|
+
|
|
765
|
+
return hash;
|
|
766
|
+
}
|
|
767
|
+
|
|
768
|
+
// node.next_sibling
|
|
769
|
+
static VALUE node_next_sibling(VALUE self) {
|
|
770
|
+
NodeWrapper* wrapper;
|
|
771
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
772
|
+
|
|
773
|
+
if (!wrapper->node) {
|
|
774
|
+
return Qnil;
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
DOMNode* next = wrapper->node->getNextSibling();
|
|
778
|
+
if (!next) {
|
|
779
|
+
return Qnil;
|
|
780
|
+
}
|
|
781
|
+
|
|
782
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
783
|
+
return wrap_node(next, doc_ref);
|
|
784
|
+
}
|
|
785
|
+
|
|
786
|
+
// node.previous_sibling
|
|
787
|
+
static VALUE node_previous_sibling(VALUE self) {
|
|
788
|
+
NodeWrapper* wrapper;
|
|
789
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
790
|
+
|
|
791
|
+
if (!wrapper->node) {
|
|
792
|
+
return Qnil;
|
|
793
|
+
}
|
|
794
|
+
|
|
795
|
+
DOMNode* prev = wrapper->node->getPreviousSibling();
|
|
796
|
+
if (!prev) {
|
|
797
|
+
return Qnil;
|
|
798
|
+
}
|
|
799
|
+
|
|
800
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
801
|
+
return wrap_node(prev, doc_ref);
|
|
802
|
+
}
|
|
803
|
+
|
|
804
|
+
// node.add_child(node_or_string) - adds a child node
|
|
805
|
+
static VALUE node_add_child(VALUE self, VALUE child) {
|
|
806
|
+
NodeWrapper* wrapper;
|
|
807
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
808
|
+
|
|
809
|
+
if (!wrapper->node) {
|
|
810
|
+
rb_raise(rb_eRuntimeError, "Cannot add child to null node");
|
|
811
|
+
}
|
|
812
|
+
|
|
813
|
+
DOMDocument* doc = wrapper->node->getOwnerDocument();
|
|
814
|
+
if (!doc) {
|
|
815
|
+
rb_raise(rb_eRuntimeError, "Node has no owner document");
|
|
816
|
+
}
|
|
817
|
+
|
|
818
|
+
DOMNode* child_node = NULL;
|
|
819
|
+
|
|
820
|
+
// Check if child is a string or a node
|
|
821
|
+
if (TYPE(child) == T_STRING) {
|
|
822
|
+
// Create a text node from the string
|
|
823
|
+
const char* text_str = StringValueCStr(child);
|
|
824
|
+
XMLCh* text_content = XMLString::transcode(text_str);
|
|
825
|
+
child_node = doc->createTextNode(text_content);
|
|
826
|
+
XMLString::release(&text_content);
|
|
827
|
+
} else {
|
|
828
|
+
// Assume it's a Node object
|
|
829
|
+
NodeWrapper* child_wrapper;
|
|
830
|
+
if (rb_obj_is_kind_of(child, rb_cNode)) {
|
|
831
|
+
TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
|
|
832
|
+
child_node = child_wrapper->node;
|
|
833
|
+
} else {
|
|
834
|
+
rb_raise(rb_eTypeError, "Argument must be a String or Node");
|
|
835
|
+
}
|
|
836
|
+
}
|
|
837
|
+
|
|
838
|
+
if (!child_node) {
|
|
839
|
+
rb_raise(rb_eRuntimeError, "Failed to create child node");
|
|
840
|
+
}
|
|
841
|
+
|
|
842
|
+
try {
|
|
843
|
+
wrapper->node->appendChild(child_node);
|
|
844
|
+
} catch (const DOMException& e) {
|
|
845
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
846
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
847
|
+
XMLString::release(&message);
|
|
848
|
+
rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
|
|
849
|
+
}
|
|
850
|
+
|
|
851
|
+
return child;
|
|
852
|
+
}
|
|
853
|
+
|
|
854
|
+
// node.remove / node.unlink - removes node from its parent
|
|
855
|
+
static VALUE node_remove(VALUE self) {
|
|
856
|
+
NodeWrapper* wrapper;
|
|
857
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
858
|
+
|
|
859
|
+
if (!wrapper->node) {
|
|
860
|
+
rb_raise(rb_eRuntimeError, "Cannot remove null node");
|
|
861
|
+
}
|
|
862
|
+
|
|
863
|
+
DOMNode* parent = wrapper->node->getParentNode();
|
|
864
|
+
if (!parent) {
|
|
865
|
+
rb_raise(rb_eRuntimeError, "Node has no parent to remove from");
|
|
866
|
+
}
|
|
867
|
+
|
|
868
|
+
try {
|
|
869
|
+
parent->removeChild(wrapper->node);
|
|
870
|
+
} catch (const DOMException& e) {
|
|
871
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
872
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
873
|
+
XMLString::release(&message);
|
|
874
|
+
rb_raise(rb_eRuntimeError, "Failed to remove node: %s", StringValueCStr(rb_error));
|
|
875
|
+
}
|
|
876
|
+
|
|
877
|
+
return self;
|
|
878
|
+
}
|
|
879
|
+
|
|
880
|
+
// node.inner_html / node.inner_xml - returns XML content of children
|
|
881
|
+
static VALUE node_inner_html(VALUE self) {
|
|
882
|
+
NodeWrapper* wrapper;
|
|
883
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
884
|
+
|
|
885
|
+
if (!wrapper->node) {
|
|
886
|
+
return rb_str_new_cstr("");
|
|
887
|
+
}
|
|
888
|
+
|
|
889
|
+
try {
|
|
890
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XStr("LS").unicodeForm());
|
|
891
|
+
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
892
|
+
|
|
893
|
+
// Build a string by serializing each child
|
|
894
|
+
std::string result;
|
|
895
|
+
DOMNodeList* children = wrapper->node->getChildNodes();
|
|
896
|
+
XMLSize_t count = children->getLength();
|
|
897
|
+
|
|
898
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
899
|
+
DOMNode* child = children->item(i);
|
|
900
|
+
XMLCh* xml_str = serializer->writeToString(child);
|
|
901
|
+
CharStr utf8_str(xml_str);
|
|
902
|
+
result += utf8_str.localForm();
|
|
903
|
+
XMLString::release(&xml_str);
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
serializer->release();
|
|
907
|
+
return rb_str_new_cstr(result.c_str());
|
|
908
|
+
} catch (const DOMException& e) {
|
|
909
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
910
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
911
|
+
XMLString::release(&message);
|
|
912
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize inner content: %s", StringValueCStr(rb_error));
|
|
913
|
+
} catch (...) {
|
|
914
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize inner content");
|
|
915
|
+
}
|
|
916
|
+
|
|
917
|
+
return rb_str_new_cstr("");
|
|
918
|
+
}
|
|
919
|
+
|
|
920
|
+
// node.path - returns XPath to the node
|
|
921
|
+
static VALUE node_path(VALUE self) {
|
|
922
|
+
NodeWrapper* wrapper;
|
|
923
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
924
|
+
|
|
925
|
+
if (!wrapper->node) {
|
|
926
|
+
return rb_str_new_cstr("");
|
|
927
|
+
}
|
|
928
|
+
|
|
929
|
+
std::string path = "";
|
|
930
|
+
DOMNode* current = wrapper->node;
|
|
931
|
+
|
|
932
|
+
// Build path from current node to root
|
|
933
|
+
while (current && current->getNodeType() != DOMNode::DOCUMENT_NODE) {
|
|
934
|
+
std::string segment = "";
|
|
935
|
+
|
|
936
|
+
if (current->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
937
|
+
CharStr name(current->getNodeName());
|
|
938
|
+
segment = std::string(name.localForm());
|
|
939
|
+
|
|
940
|
+
// Count position among siblings with same name
|
|
941
|
+
int position = 1;
|
|
942
|
+
DOMNode* sibling = current->getPreviousSibling();
|
|
943
|
+
while (sibling) {
|
|
944
|
+
if (sibling->getNodeType() == DOMNode::ELEMENT_NODE &&
|
|
945
|
+
XMLString::equals(sibling->getNodeName(), current->getNodeName())) {
|
|
946
|
+
position++;
|
|
947
|
+
}
|
|
948
|
+
sibling = sibling->getPreviousSibling();
|
|
949
|
+
}
|
|
950
|
+
|
|
951
|
+
// Add position predicate
|
|
952
|
+
segment += "[" + std::to_string(position) + "]";
|
|
953
|
+
path = "/" + segment + path;
|
|
954
|
+
} else if (current->getNodeType() == DOMNode::TEXT_NODE) {
|
|
955
|
+
// Count position among text node siblings
|
|
956
|
+
int position = 1;
|
|
957
|
+
DOMNode* sibling = current->getPreviousSibling();
|
|
958
|
+
while (sibling) {
|
|
959
|
+
if (sibling->getNodeType() == DOMNode::TEXT_NODE) {
|
|
960
|
+
position++;
|
|
961
|
+
}
|
|
962
|
+
sibling = sibling->getPreviousSibling();
|
|
963
|
+
}
|
|
964
|
+
path = "/text()[" + std::to_string(position) + "]" + path;
|
|
965
|
+
}
|
|
966
|
+
|
|
967
|
+
current = current->getParentNode();
|
|
968
|
+
}
|
|
969
|
+
|
|
970
|
+
return rb_str_new_cstr(path.c_str());
|
|
971
|
+
}
|
|
972
|
+
|
|
973
|
+
// node.blank? - returns true if node has no meaningful content
|
|
974
|
+
static VALUE node_blank_p(VALUE self) {
|
|
975
|
+
NodeWrapper* wrapper;
|
|
976
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
977
|
+
|
|
978
|
+
if (!wrapper->node) {
|
|
979
|
+
return Qtrue;
|
|
980
|
+
}
|
|
981
|
+
|
|
982
|
+
// Text nodes are blank if they contain only whitespace
|
|
983
|
+
if (wrapper->node->getNodeType() == DOMNode::TEXT_NODE) {
|
|
984
|
+
const XMLCh* text_content = wrapper->node->getNodeValue();
|
|
985
|
+
if (!text_content) {
|
|
986
|
+
return Qtrue;
|
|
987
|
+
}
|
|
988
|
+
|
|
989
|
+
// Check if text contains only whitespace
|
|
990
|
+
CharStr utf8_text(text_content);
|
|
991
|
+
const char* str = utf8_text.localForm();
|
|
992
|
+
while (*str) {
|
|
993
|
+
if (!isspace((unsigned char)*str)) {
|
|
994
|
+
return Qfalse;
|
|
995
|
+
}
|
|
996
|
+
str++;
|
|
997
|
+
}
|
|
998
|
+
return Qtrue;
|
|
999
|
+
}
|
|
1000
|
+
|
|
1001
|
+
// Element nodes are blank if they have no child elements and no non-blank text
|
|
1002
|
+
if (wrapper->node->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
1003
|
+
DOMNodeList* children = wrapper->node->getChildNodes();
|
|
1004
|
+
XMLSize_t count = children->getLength();
|
|
1005
|
+
|
|
1006
|
+
if (count == 0) {
|
|
1007
|
+
return Qtrue;
|
|
1008
|
+
}
|
|
1009
|
+
|
|
1010
|
+
// Check if all children are blank text nodes
|
|
1011
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
1012
|
+
DOMNode* child = children->item(i);
|
|
1013
|
+
|
|
1014
|
+
// If there's an element child, not blank
|
|
1015
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
1016
|
+
return Qfalse;
|
|
1017
|
+
}
|
|
1018
|
+
|
|
1019
|
+
// If there's a non-whitespace text node, not blank
|
|
1020
|
+
if (child->getNodeType() == DOMNode::TEXT_NODE) {
|
|
1021
|
+
const XMLCh* text_content = child->getNodeValue();
|
|
1022
|
+
if (text_content) {
|
|
1023
|
+
CharStr utf8_text(text_content);
|
|
1024
|
+
const char* str = utf8_text.localForm();
|
|
1025
|
+
while (*str) {
|
|
1026
|
+
if (!isspace((unsigned char)*str)) {
|
|
1027
|
+
return Qfalse;
|
|
1028
|
+
}
|
|
1029
|
+
str++;
|
|
1030
|
+
}
|
|
1031
|
+
}
|
|
1032
|
+
}
|
|
1033
|
+
}
|
|
1034
|
+
|
|
1035
|
+
return Qtrue;
|
|
1036
|
+
}
|
|
1037
|
+
|
|
1038
|
+
// Other node types are considered blank
|
|
1039
|
+
return Qtrue;
|
|
1040
|
+
}
|
|
1041
|
+
|
|
446
1042
|
// node.xpath(path)
|
|
447
1043
|
static VALUE node_xpath(VALUE self, VALUE path) {
|
|
448
1044
|
NodeWrapper* node_wrapper;
|
|
@@ -458,6 +1054,11 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
458
1054
|
const char* xpath_str = StringValueCStr(path);
|
|
459
1055
|
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
460
1056
|
|
|
1057
|
+
#ifdef HAVE_XALAN
|
|
1058
|
+
// Use Xalan for full XPath 1.0 support
|
|
1059
|
+
return execute_xpath_with_xalan(node_wrapper->node, xpath_str, doc_ref);
|
|
1060
|
+
#else
|
|
1061
|
+
// Fall back to Xerces XPath subset
|
|
461
1062
|
try {
|
|
462
1063
|
DOMDocument* doc = node_wrapper->node->getOwnerDocument();
|
|
463
1064
|
if (!doc) {
|
|
@@ -507,6 +1108,26 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
507
1108
|
NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
|
|
508
1109
|
wrapper->nodes_array = rb_ary_new();
|
|
509
1110
|
return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
|
|
1111
|
+
#endif
|
|
1112
|
+
}
|
|
1113
|
+
|
|
1114
|
+
// node.at_xpath(path) - returns first matching node or nil
|
|
1115
|
+
static VALUE node_at_xpath(VALUE self, VALUE path) {
|
|
1116
|
+
VALUE nodeset = node_xpath(self, path);
|
|
1117
|
+
NodeSetWrapper* wrapper;
|
|
1118
|
+
TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1119
|
+
|
|
1120
|
+
if (RARRAY_LEN(wrapper->nodes_array) == 0) {
|
|
1121
|
+
return Qnil;
|
|
1122
|
+
}
|
|
1123
|
+
|
|
1124
|
+
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
1125
|
+
}
|
|
1126
|
+
|
|
1127
|
+
// node.css(selector) - CSS selectors not supported
|
|
1128
|
+
static VALUE node_css(VALUE self, VALUE selector) {
|
|
1129
|
+
rb_raise(rb_eNotImpError, "CSS selectors are not supported. Use xpath() instead. Xerces-C only supports XPath queries.");
|
|
1130
|
+
return Qnil;
|
|
510
1131
|
}
|
|
511
1132
|
|
|
512
1133
|
// nodeset.length / nodeset.size
|
|
@@ -550,7 +1171,188 @@ static VALUE nodeset_to_a(VALUE self) {
|
|
|
550
1171
|
return rb_ary_dup(wrapper->nodes_array);
|
|
551
1172
|
}
|
|
552
1173
|
|
|
553
|
-
|
|
1174
|
+
// Schema.from_document(schema_doc) or Schema.from_string(xsd_string)
|
|
1175
|
+
static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
|
|
1176
|
+
VALUE schema_source;
|
|
1177
|
+
rb_scan_args(argc, argv, "1", &schema_source);
|
|
1178
|
+
|
|
1179
|
+
// Ensure Xerces is initialized
|
|
1180
|
+
if (!xerces_initialized) {
|
|
1181
|
+
try {
|
|
1182
|
+
XMLPlatformUtils::Initialize();
|
|
1183
|
+
xerces_initialized = true;
|
|
1184
|
+
} catch (const XMLException& e) {
|
|
1185
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1186
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1187
|
+
XMLString::release(&message);
|
|
1188
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
|
|
1189
|
+
}
|
|
1190
|
+
}
|
|
1191
|
+
|
|
1192
|
+
try {
|
|
1193
|
+
SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
|
|
1194
|
+
wrapper->schemaContent = new std::string();
|
|
1195
|
+
|
|
1196
|
+
// Convert schema source to string
|
|
1197
|
+
std::string xsd_content;
|
|
1198
|
+
if (rb_obj_is_kind_of(schema_source, rb_cString)) {
|
|
1199
|
+
xsd_content = std::string(RSTRING_PTR(schema_source), RSTRING_LEN(schema_source));
|
|
1200
|
+
} else {
|
|
1201
|
+
// Assume it's a Document, call to_s
|
|
1202
|
+
VALUE str = rb_funcall(schema_source, rb_intern("to_s"), 0);
|
|
1203
|
+
xsd_content = std::string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
1204
|
+
}
|
|
1205
|
+
|
|
1206
|
+
// Store the schema content
|
|
1207
|
+
*wrapper->schemaContent = xsd_content;
|
|
1208
|
+
|
|
1209
|
+
// Validate that it's valid XML by trying to parse it
|
|
1210
|
+
XercesDOMParser* schemaParser = new XercesDOMParser();
|
|
1211
|
+
schemaParser->setValidationScheme(XercesDOMParser::Val_Never);
|
|
1212
|
+
schemaParser->setDoNamespaces(true);
|
|
1213
|
+
|
|
1214
|
+
// Parse the schema using MemBufInputSource
|
|
1215
|
+
MemBufInputSource schemaInput(
|
|
1216
|
+
(const XMLByte*)xsd_content.c_str(),
|
|
1217
|
+
xsd_content.length(),
|
|
1218
|
+
"schema"
|
|
1219
|
+
);
|
|
1220
|
+
|
|
1221
|
+
try {
|
|
1222
|
+
schemaParser->parse(schemaInput);
|
|
1223
|
+
} catch (...) {
|
|
1224
|
+
delete schemaParser;
|
|
1225
|
+
delete wrapper->schemaContent;
|
|
1226
|
+
xfree(wrapper);
|
|
1227
|
+
rb_raise(rb_eRuntimeError, "Schema parsing failed: Invalid XML");
|
|
1228
|
+
}
|
|
1229
|
+
|
|
1230
|
+
delete schemaParser;
|
|
1231
|
+
|
|
1232
|
+
VALUE rb_schema = TypedData_Wrap_Struct(klass, &schema_type, wrapper);
|
|
1233
|
+
return rb_schema;
|
|
1234
|
+
|
|
1235
|
+
} catch (const XMLException& e) {
|
|
1236
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1237
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1238
|
+
XMLString::release(&message);
|
|
1239
|
+
rb_raise(rb_eRuntimeError, "XMLException: %s", StringValueCStr(rb_error));
|
|
1240
|
+
} catch (const DOMException& e) {
|
|
1241
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1242
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1243
|
+
XMLString::release(&message);
|
|
1244
|
+
rb_raise(rb_eRuntimeError, "DOMException: %s", StringValueCStr(rb_error));
|
|
1245
|
+
} catch (...) {
|
|
1246
|
+
rb_raise(rb_eRuntimeError, "Unknown exception during schema parsing");
|
|
1247
|
+
}
|
|
1248
|
+
|
|
1249
|
+
return Qnil;
|
|
1250
|
+
}
|
|
1251
|
+
|
|
1252
|
+
// document.validate(schema) - returns array of error messages (empty if valid)
|
|
1253
|
+
static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
1254
|
+
DocumentWrapper* doc_wrapper;
|
|
1255
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, doc_wrapper);
|
|
1256
|
+
|
|
1257
|
+
SchemaWrapper* schema_wrapper;
|
|
1258
|
+
TypedData_Get_Struct(rb_schema, SchemaWrapper, &schema_type, schema_wrapper);
|
|
1259
|
+
|
|
1260
|
+
try {
|
|
1261
|
+
// Serialize the document to UTF-8 for validation
|
|
1262
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XMLString::transcode("LS"));
|
|
1263
|
+
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
1264
|
+
|
|
1265
|
+
// Use a MemBufFormatTarget to get UTF-8 encoded output
|
|
1266
|
+
MemBufFormatTarget target;
|
|
1267
|
+
DOMLSOutput* output = ((DOMImplementationLS*)impl)->createLSOutput();
|
|
1268
|
+
output->setByteStream(&target);
|
|
1269
|
+
|
|
1270
|
+
serializer->write(doc_wrapper->doc, output);
|
|
1271
|
+
|
|
1272
|
+
// Get the UTF-8 content
|
|
1273
|
+
std::string xml_content((const char*)target.getRawBuffer(), target.getLen());
|
|
1274
|
+
|
|
1275
|
+
output->release();
|
|
1276
|
+
serializer->release();
|
|
1277
|
+
|
|
1278
|
+
// Create a validating parser
|
|
1279
|
+
XercesDOMParser* validator = new XercesDOMParser();
|
|
1280
|
+
validator->setValidationScheme(XercesDOMParser::Val_Always);
|
|
1281
|
+
validator->setDoNamespaces(true);
|
|
1282
|
+
validator->setDoSchema(true);
|
|
1283
|
+
validator->setValidationSchemaFullChecking(true);
|
|
1284
|
+
|
|
1285
|
+
ValidationErrorHandler errorHandler;
|
|
1286
|
+
validator->setErrorHandler(&errorHandler);
|
|
1287
|
+
|
|
1288
|
+
// Create a combined input with both the schema and the document
|
|
1289
|
+
// First, we need to add schema location to the document
|
|
1290
|
+
std::string schema_location = "http://example.com/schema";
|
|
1291
|
+
|
|
1292
|
+
// Create memory buffers for both schema and document
|
|
1293
|
+
MemBufInputSource schemaSource(
|
|
1294
|
+
(const XMLByte*)schema_wrapper->schemaContent->c_str(),
|
|
1295
|
+
schema_wrapper->schemaContent->length(),
|
|
1296
|
+
"schema.xsd"
|
|
1297
|
+
);
|
|
1298
|
+
|
|
1299
|
+
// Load the schema grammar
|
|
1300
|
+
try {
|
|
1301
|
+
validator->loadGrammar(schemaSource, Grammar::SchemaGrammarType, true);
|
|
1302
|
+
validator->setExternalNoNamespaceSchemaLocation("schema.xsd");
|
|
1303
|
+
validator->useCachedGrammarInParse(true);
|
|
1304
|
+
} catch (...) {
|
|
1305
|
+
// If grammar loading fails, just note it
|
|
1306
|
+
errorHandler.errors.push_back("Warning: Schema grammar could not be loaded");
|
|
1307
|
+
}
|
|
1308
|
+
|
|
1309
|
+
// Now parse and validate the document
|
|
1310
|
+
MemBufInputSource docSource(
|
|
1311
|
+
(const XMLByte*)xml_content.c_str(),
|
|
1312
|
+
xml_content.length(),
|
|
1313
|
+
"document.xml"
|
|
1314
|
+
);
|
|
1315
|
+
|
|
1316
|
+
try {
|
|
1317
|
+
validator->parse(docSource);
|
|
1318
|
+
} catch (const XMLException& e) {
|
|
1319
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1320
|
+
errorHandler.errors.push_back(std::string("XMLException: ") + message);
|
|
1321
|
+
XMLString::release(&message);
|
|
1322
|
+
} catch (const DOMException& e) {
|
|
1323
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1324
|
+
errorHandler.errors.push_back(std::string("DOMException: ") + message);
|
|
1325
|
+
XMLString::release(&message);
|
|
1326
|
+
} catch (...) {
|
|
1327
|
+
errorHandler.errors.push_back("Unknown parsing exception");
|
|
1328
|
+
}
|
|
1329
|
+
|
|
1330
|
+
delete validator;
|
|
1331
|
+
|
|
1332
|
+
// Return array of error messages
|
|
1333
|
+
VALUE errors_array = rb_ary_new();
|
|
1334
|
+
for (const auto& err : errorHandler.errors) {
|
|
1335
|
+
rb_ary_push(errors_array, rb_str_new_cstr(err.c_str()));
|
|
1336
|
+
}
|
|
1337
|
+
|
|
1338
|
+
return errors_array;
|
|
1339
|
+
|
|
1340
|
+
} catch (const XMLException& e) {
|
|
1341
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1342
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1343
|
+
XMLString::release(&message);
|
|
1344
|
+
rb_raise(rb_eRuntimeError, "XMLException during validation: %s", StringValueCStr(rb_error));
|
|
1345
|
+
} catch (const DOMException& e) {
|
|
1346
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1347
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1348
|
+
XMLString::release(&message);
|
|
1349
|
+
rb_raise(rb_eRuntimeError, "DOMException during validation: %s", StringValueCStr(rb_error));
|
|
1350
|
+
} catch (...) {
|
|
1351
|
+
rb_raise(rb_eRuntimeError, "Unknown exception during validation");
|
|
1352
|
+
}
|
|
1353
|
+
|
|
1354
|
+
return Qnil;
|
|
1355
|
+
}extern "C" void Init_rxerces(void) {
|
|
554
1356
|
rb_mRXerces = rb_define_module("RXerces");
|
|
555
1357
|
rb_mXML = rb_define_module_under(rb_mRXerces, "XML");
|
|
556
1358
|
|
|
@@ -559,20 +1361,38 @@ extern "C" void Init_rxerces(void) {
|
|
|
559
1361
|
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
|
|
560
1362
|
rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
|
|
561
1363
|
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
562
|
-
|
|
1364
|
+
rb_define_alias(rb_cDocument, "to_xml", "to_s");
|
|
563
1365
|
rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
|
|
1366
|
+
rb_define_method(rb_cDocument, "encoding", RUBY_METHOD_FUNC(document_encoding), 0);
|
|
1367
|
+
rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
|
|
564
1368
|
|
|
565
1369
|
rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
|
|
566
1370
|
rb_undef_alloc_func(rb_cNode);
|
|
567
1371
|
rb_define_method(rb_cNode, "name", RUBY_METHOD_FUNC(node_name), 0);
|
|
1372
|
+
rb_define_method(rb_cNode, "namespace", RUBY_METHOD_FUNC(node_namespace), 0);
|
|
568
1373
|
rb_define_method(rb_cNode, "text", RUBY_METHOD_FUNC(node_text), 0);
|
|
569
|
-
|
|
1374
|
+
rb_define_alias(rb_cNode, "content", "text");
|
|
570
1375
|
rb_define_method(rb_cNode, "text=", RUBY_METHOD_FUNC(node_text_set), 1);
|
|
571
|
-
|
|
1376
|
+
rb_define_alias(rb_cNode, "content=", "text=");
|
|
572
1377
|
rb_define_method(rb_cNode, "[]", RUBY_METHOD_FUNC(node_get_attribute), 1);
|
|
573
1378
|
rb_define_method(rb_cNode, "[]=", RUBY_METHOD_FUNC(node_set_attribute), 2);
|
|
574
1379
|
rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
|
|
1380
|
+
rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
|
|
1381
|
+
rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
|
|
1382
|
+
rb_define_method(rb_cNode, "next_sibling", RUBY_METHOD_FUNC(node_next_sibling), 0);
|
|
1383
|
+
rb_define_method(rb_cNode, "previous_sibling", RUBY_METHOD_FUNC(node_previous_sibling), 0);
|
|
1384
|
+
rb_define_method(rb_cNode, "add_child", RUBY_METHOD_FUNC(node_add_child), 1);
|
|
1385
|
+
rb_define_method(rb_cNode, "remove", RUBY_METHOD_FUNC(node_remove), 0);
|
|
1386
|
+
rb_define_alias(rb_cNode, "unlink", "remove");
|
|
1387
|
+
rb_define_method(rb_cNode, "inner_html", RUBY_METHOD_FUNC(node_inner_html), 0);
|
|
1388
|
+
rb_define_alias(rb_cNode, "inner_xml", "inner_html");
|
|
1389
|
+
rb_define_method(rb_cNode, "path", RUBY_METHOD_FUNC(node_path), 0);
|
|
1390
|
+
rb_define_method(rb_cNode, "blank?", RUBY_METHOD_FUNC(node_blank_p), 0);
|
|
575
1391
|
rb_define_method(rb_cNode, "xpath", RUBY_METHOD_FUNC(node_xpath), 1);
|
|
1392
|
+
rb_define_alias(rb_cNode, "search", "xpath");
|
|
1393
|
+
rb_define_method(rb_cNode, "at_xpath", RUBY_METHOD_FUNC(node_at_xpath), 1);
|
|
1394
|
+
rb_define_alias(rb_cNode, "at", "at_xpath");
|
|
1395
|
+
rb_define_method(rb_cNode, "css", RUBY_METHOD_FUNC(node_css), 1);
|
|
576
1396
|
|
|
577
1397
|
rb_cElement = rb_define_class_under(rb_mXML, "Element", rb_cNode);
|
|
578
1398
|
rb_undef_alloc_func(rb_cElement);
|
|
@@ -583,9 +1403,16 @@ extern "C" void Init_rxerces(void) {
|
|
|
583
1403
|
rb_cNodeSet = rb_define_class_under(rb_mXML, "NodeSet", rb_cObject);
|
|
584
1404
|
rb_undef_alloc_func(rb_cNodeSet);
|
|
585
1405
|
rb_define_method(rb_cNodeSet, "length", RUBY_METHOD_FUNC(nodeset_length), 0);
|
|
586
|
-
|
|
1406
|
+
rb_define_alias(rb_cNodeSet, "size", "length");
|
|
587
1407
|
rb_define_method(rb_cNodeSet, "[]", RUBY_METHOD_FUNC(nodeset_at), 1);
|
|
588
1408
|
rb_define_method(rb_cNodeSet, "each", RUBY_METHOD_FUNC(nodeset_each), 0);
|
|
589
1409
|
rb_define_method(rb_cNodeSet, "to_a", RUBY_METHOD_FUNC(nodeset_to_a), 0);
|
|
590
1410
|
rb_include_module(rb_cNodeSet, rb_mEnumerable);
|
|
1411
|
+
|
|
1412
|
+
rb_cSchema = rb_define_class_under(rb_mXML, "Schema", rb_cObject);
|
|
1413
|
+
rb_undef_alloc_func(rb_cSchema);
|
|
1414
|
+
rb_define_singleton_method(rb_cSchema, "from_document", RUBY_METHOD_FUNC(schema_from_document), -1);
|
|
1415
|
+
rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
|
|
1416
|
+
|
|
1417
|
+
rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
|
|
591
1418
|
}
|