rxerces 0.2.0 → 0.3.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +12 -0
- data/examples/schema_example.rb +107 -0
- data/ext/rxerces/rxerces.cpp +623 -1
- data/lib/rxerces/nokogiri.rb +1 -0
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +1 -1
- data/spec/document_spec.rb +35 -0
- data/spec/node_spec.rb +361 -0
- data/spec/nokogiri_compatibility_spec.rb +69 -0
- data/spec/rxerces_shared.rb +1 -1
- data/spec/schema_spec.rb +76 -0
- data.tar.gz.sig +0 -0
- metadata +4 -1
- metadata.gz.sig +2 -2
data/ext/rxerces/rxerces.cpp
CHANGED
|
@@ -8,7 +8,10 @@
|
|
|
8
8
|
#include <xercesc/util/XercesDefs.hpp>
|
|
9
9
|
#include <xercesc/dom/DOMXPathResult.hpp>
|
|
10
10
|
#include <xercesc/dom/DOMXPathExpression.hpp>
|
|
11
|
+
#include <xercesc/sax/ErrorHandler.hpp>
|
|
12
|
+
#include <xercesc/sax/SAXParseException.hpp>
|
|
11
13
|
#include <sstream>
|
|
14
|
+
#include <vector>
|
|
12
15
|
|
|
13
16
|
using namespace xercesc;
|
|
14
17
|
|
|
@@ -19,6 +22,7 @@ VALUE rb_cNode;
|
|
|
19
22
|
VALUE rb_cNodeSet;
|
|
20
23
|
VALUE rb_cElement;
|
|
21
24
|
VALUE rb_cText;
|
|
25
|
+
VALUE rb_cSchema;
|
|
22
26
|
|
|
23
27
|
// Xerces initialization flag
|
|
24
28
|
static bool xerces_initialized = false;
|
|
@@ -82,6 +86,39 @@ typedef struct {
|
|
|
82
86
|
VALUE nodes_array;
|
|
83
87
|
} NodeSetWrapper;
|
|
84
88
|
|
|
89
|
+
// Wrapper structure for Schema
|
|
90
|
+
typedef struct {
|
|
91
|
+
std::string* schemaContent;
|
|
92
|
+
} SchemaWrapper;
|
|
93
|
+
|
|
94
|
+
// Error handler for schema validation
|
|
95
|
+
class ValidationErrorHandler : public ErrorHandler {
|
|
96
|
+
public:
|
|
97
|
+
std::vector<std::string> errors;
|
|
98
|
+
|
|
99
|
+
void warning(const SAXParseException& e) {
|
|
100
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
101
|
+
errors.push_back(std::string("Warning: ") + msg);
|
|
102
|
+
XMLString::release(&msg);
|
|
103
|
+
}
|
|
104
|
+
|
|
105
|
+
void error(const SAXParseException& e) {
|
|
106
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
107
|
+
errors.push_back(std::string("Error: ") + msg);
|
|
108
|
+
XMLString::release(&msg);
|
|
109
|
+
}
|
|
110
|
+
|
|
111
|
+
void fatalError(const SAXParseException& e) {
|
|
112
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
113
|
+
errors.push_back(std::string("Fatal: ") + msg);
|
|
114
|
+
XMLString::release(&msg);
|
|
115
|
+
}
|
|
116
|
+
|
|
117
|
+
void resetErrors() {
|
|
118
|
+
errors.clear();
|
|
119
|
+
}
|
|
120
|
+
};
|
|
121
|
+
|
|
85
122
|
// Memory management functions
|
|
86
123
|
static void document_free(void* ptr) {
|
|
87
124
|
DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
|
|
@@ -109,6 +146,16 @@ static void nodeset_free(void* ptr) {
|
|
|
109
146
|
}
|
|
110
147
|
}
|
|
111
148
|
|
|
149
|
+
static void schema_free(void* ptr) {
|
|
150
|
+
SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
|
|
151
|
+
if (wrapper) {
|
|
152
|
+
if (wrapper->schemaContent) {
|
|
153
|
+
delete wrapper->schemaContent;
|
|
154
|
+
}
|
|
155
|
+
xfree(wrapper);
|
|
156
|
+
}
|
|
157
|
+
}
|
|
158
|
+
|
|
112
159
|
static size_t document_size(const void* ptr) {
|
|
113
160
|
return sizeof(DocumentWrapper);
|
|
114
161
|
}
|
|
@@ -121,6 +168,10 @@ static size_t nodeset_size(const void* ptr) {
|
|
|
121
168
|
return sizeof(NodeSetWrapper);
|
|
122
169
|
}
|
|
123
170
|
|
|
171
|
+
static size_t schema_size(const void* ptr) {
|
|
172
|
+
return sizeof(SchemaWrapper);
|
|
173
|
+
}
|
|
174
|
+
|
|
124
175
|
static const rb_data_type_t document_type = {
|
|
125
176
|
"RXerces::XML::Document",
|
|
126
177
|
{0, document_free, document_size},
|
|
@@ -142,6 +193,13 @@ static const rb_data_type_t nodeset_type = {
|
|
|
142
193
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
143
194
|
};
|
|
144
195
|
|
|
196
|
+
static const rb_data_type_t schema_type = {
|
|
197
|
+
"RXerces::XML::Schema",
|
|
198
|
+
{0, schema_free, schema_size},
|
|
199
|
+
0, 0,
|
|
200
|
+
RUBY_TYPED_FREE_IMMEDIATELY
|
|
201
|
+
};
|
|
202
|
+
|
|
145
203
|
// Helper to create Ruby Node object from DOMNode
|
|
146
204
|
static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
|
|
147
205
|
if (!node) {
|
|
@@ -261,6 +319,41 @@ static VALUE document_to_s(VALUE self) {
|
|
|
261
319
|
return Qnil;
|
|
262
320
|
}
|
|
263
321
|
|
|
322
|
+
// document.create_element(name)
|
|
323
|
+
static VALUE document_create_element(VALUE self, VALUE name) {
|
|
324
|
+
DocumentWrapper* doc_wrapper;
|
|
325
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, doc_wrapper);
|
|
326
|
+
|
|
327
|
+
if (!doc_wrapper->doc) {
|
|
328
|
+
rb_raise(rb_eRuntimeError, "Cannot create element on null document");
|
|
329
|
+
}
|
|
330
|
+
|
|
331
|
+
Check_Type(name, T_STRING);
|
|
332
|
+
const char* element_name = StringValueCStr(name);
|
|
333
|
+
|
|
334
|
+
try {
|
|
335
|
+
XMLCh* element_name_xml = XMLString::transcode(element_name);
|
|
336
|
+
DOMElement* element = doc_wrapper->doc->createElement(element_name_xml);
|
|
337
|
+
XMLString::release(&element_name_xml);
|
|
338
|
+
|
|
339
|
+
if (!element) {
|
|
340
|
+
rb_raise(rb_eRuntimeError, "Failed to create element");
|
|
341
|
+
}
|
|
342
|
+
|
|
343
|
+
return wrap_node(element, self);
|
|
344
|
+
|
|
345
|
+
} catch (const DOMException& e) {
|
|
346
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
347
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
348
|
+
XMLString::release(&message);
|
|
349
|
+
rb_raise(rb_eRuntimeError, "Failed to create element: %s", StringValueCStr(rb_error));
|
|
350
|
+
} catch (...) {
|
|
351
|
+
rb_raise(rb_eRuntimeError, "Unknown error creating element");
|
|
352
|
+
}
|
|
353
|
+
|
|
354
|
+
return Qnil;
|
|
355
|
+
}
|
|
356
|
+
|
|
264
357
|
// document.xpath(path)
|
|
265
358
|
static VALUE document_xpath(VALUE self, VALUE path) {
|
|
266
359
|
DocumentWrapper* doc_wrapper;
|
|
@@ -443,6 +536,335 @@ static VALUE node_children(VALUE self) {
|
|
|
443
536
|
return children;
|
|
444
537
|
}
|
|
445
538
|
|
|
539
|
+
// node.parent
|
|
540
|
+
static VALUE node_parent(VALUE self) {
|
|
541
|
+
NodeWrapper* wrapper;
|
|
542
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
543
|
+
|
|
544
|
+
if (!wrapper->node) {
|
|
545
|
+
return Qnil;
|
|
546
|
+
}
|
|
547
|
+
|
|
548
|
+
DOMNode* parent = wrapper->node->getParentNode();
|
|
549
|
+
if (!parent) {
|
|
550
|
+
return Qnil;
|
|
551
|
+
}
|
|
552
|
+
|
|
553
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
554
|
+
return wrap_node(parent, doc_ref);
|
|
555
|
+
}
|
|
556
|
+
|
|
557
|
+
// node.attributes - returns hash of all attributes (only for element nodes)
|
|
558
|
+
static VALUE node_attributes(VALUE self) {
|
|
559
|
+
NodeWrapper* wrapper;
|
|
560
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
561
|
+
|
|
562
|
+
if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
563
|
+
return rb_hash_new();
|
|
564
|
+
}
|
|
565
|
+
|
|
566
|
+
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
567
|
+
DOMNamedNodeMap* attributes = element->getAttributes();
|
|
568
|
+
|
|
569
|
+
if (!attributes) {
|
|
570
|
+
return rb_hash_new();
|
|
571
|
+
}
|
|
572
|
+
|
|
573
|
+
VALUE hash = rb_hash_new();
|
|
574
|
+
XMLSize_t length = attributes->getLength();
|
|
575
|
+
|
|
576
|
+
for (XMLSize_t i = 0; i < length; i++) {
|
|
577
|
+
DOMNode* attr = attributes->item(i);
|
|
578
|
+
if (attr) {
|
|
579
|
+
const XMLCh* name = attr->getNodeName();
|
|
580
|
+
const XMLCh* value = attr->getNodeValue();
|
|
581
|
+
|
|
582
|
+
CharStr attr_name(name);
|
|
583
|
+
CharStr attr_value(value);
|
|
584
|
+
|
|
585
|
+
rb_hash_aset(hash,
|
|
586
|
+
rb_str_new_cstr(attr_name.localForm()),
|
|
587
|
+
rb_str_new_cstr(attr_value.localForm()));
|
|
588
|
+
}
|
|
589
|
+
}
|
|
590
|
+
|
|
591
|
+
return hash;
|
|
592
|
+
}
|
|
593
|
+
|
|
594
|
+
// node.next_sibling
|
|
595
|
+
static VALUE node_next_sibling(VALUE self) {
|
|
596
|
+
NodeWrapper* wrapper;
|
|
597
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
598
|
+
|
|
599
|
+
if (!wrapper->node) {
|
|
600
|
+
return Qnil;
|
|
601
|
+
}
|
|
602
|
+
|
|
603
|
+
DOMNode* next = wrapper->node->getNextSibling();
|
|
604
|
+
if (!next) {
|
|
605
|
+
return Qnil;
|
|
606
|
+
}
|
|
607
|
+
|
|
608
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
609
|
+
return wrap_node(next, doc_ref);
|
|
610
|
+
}
|
|
611
|
+
|
|
612
|
+
// node.previous_sibling
|
|
613
|
+
static VALUE node_previous_sibling(VALUE self) {
|
|
614
|
+
NodeWrapper* wrapper;
|
|
615
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
616
|
+
|
|
617
|
+
if (!wrapper->node) {
|
|
618
|
+
return Qnil;
|
|
619
|
+
}
|
|
620
|
+
|
|
621
|
+
DOMNode* prev = wrapper->node->getPreviousSibling();
|
|
622
|
+
if (!prev) {
|
|
623
|
+
return Qnil;
|
|
624
|
+
}
|
|
625
|
+
|
|
626
|
+
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
627
|
+
return wrap_node(prev, doc_ref);
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
// node.add_child(node_or_string) - adds a child node
|
|
631
|
+
static VALUE node_add_child(VALUE self, VALUE child) {
|
|
632
|
+
NodeWrapper* wrapper;
|
|
633
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
634
|
+
|
|
635
|
+
if (!wrapper->node) {
|
|
636
|
+
rb_raise(rb_eRuntimeError, "Cannot add child to null node");
|
|
637
|
+
}
|
|
638
|
+
|
|
639
|
+
DOMDocument* doc = wrapper->node->getOwnerDocument();
|
|
640
|
+
if (!doc) {
|
|
641
|
+
rb_raise(rb_eRuntimeError, "Node has no owner document");
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
DOMNode* child_node = NULL;
|
|
645
|
+
|
|
646
|
+
// Check if child is a string or a node
|
|
647
|
+
if (TYPE(child) == T_STRING) {
|
|
648
|
+
// Create a text node from the string
|
|
649
|
+
const char* text_str = StringValueCStr(child);
|
|
650
|
+
XMLCh* text_content = XMLString::transcode(text_str);
|
|
651
|
+
child_node = doc->createTextNode(text_content);
|
|
652
|
+
XMLString::release(&text_content);
|
|
653
|
+
} else {
|
|
654
|
+
// Assume it's a Node object
|
|
655
|
+
NodeWrapper* child_wrapper;
|
|
656
|
+
if (rb_obj_is_kind_of(child, rb_cNode)) {
|
|
657
|
+
TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
|
|
658
|
+
child_node = child_wrapper->node;
|
|
659
|
+
} else {
|
|
660
|
+
rb_raise(rb_eTypeError, "Argument must be a String or Node");
|
|
661
|
+
}
|
|
662
|
+
}
|
|
663
|
+
|
|
664
|
+
if (!child_node) {
|
|
665
|
+
rb_raise(rb_eRuntimeError, "Failed to create child node");
|
|
666
|
+
}
|
|
667
|
+
|
|
668
|
+
try {
|
|
669
|
+
wrapper->node->appendChild(child_node);
|
|
670
|
+
} catch (const DOMException& e) {
|
|
671
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
672
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
673
|
+
XMLString::release(&message);
|
|
674
|
+
rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
|
|
675
|
+
}
|
|
676
|
+
|
|
677
|
+
return child;
|
|
678
|
+
}
|
|
679
|
+
|
|
680
|
+
// node.remove / node.unlink - removes node from its parent
|
|
681
|
+
static VALUE node_remove(VALUE self) {
|
|
682
|
+
NodeWrapper* wrapper;
|
|
683
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
684
|
+
|
|
685
|
+
if (!wrapper->node) {
|
|
686
|
+
rb_raise(rb_eRuntimeError, "Cannot remove null node");
|
|
687
|
+
}
|
|
688
|
+
|
|
689
|
+
DOMNode* parent = wrapper->node->getParentNode();
|
|
690
|
+
if (!parent) {
|
|
691
|
+
rb_raise(rb_eRuntimeError, "Node has no parent to remove from");
|
|
692
|
+
}
|
|
693
|
+
|
|
694
|
+
try {
|
|
695
|
+
parent->removeChild(wrapper->node);
|
|
696
|
+
} catch (const DOMException& e) {
|
|
697
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
698
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
699
|
+
XMLString::release(&message);
|
|
700
|
+
rb_raise(rb_eRuntimeError, "Failed to remove node: %s", StringValueCStr(rb_error));
|
|
701
|
+
}
|
|
702
|
+
|
|
703
|
+
return self;
|
|
704
|
+
}
|
|
705
|
+
|
|
706
|
+
// node.inner_html / node.inner_xml - returns XML content of children
|
|
707
|
+
static VALUE node_inner_html(VALUE self) {
|
|
708
|
+
NodeWrapper* wrapper;
|
|
709
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
710
|
+
|
|
711
|
+
if (!wrapper->node) {
|
|
712
|
+
return rb_str_new_cstr("");
|
|
713
|
+
}
|
|
714
|
+
|
|
715
|
+
try {
|
|
716
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XStr("LS").unicodeForm());
|
|
717
|
+
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
718
|
+
|
|
719
|
+
// Build a string by serializing each child
|
|
720
|
+
std::string result;
|
|
721
|
+
DOMNodeList* children = wrapper->node->getChildNodes();
|
|
722
|
+
XMLSize_t count = children->getLength();
|
|
723
|
+
|
|
724
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
725
|
+
DOMNode* child = children->item(i);
|
|
726
|
+
XMLCh* xml_str = serializer->writeToString(child);
|
|
727
|
+
CharStr utf8_str(xml_str);
|
|
728
|
+
result += utf8_str.localForm();
|
|
729
|
+
XMLString::release(&xml_str);
|
|
730
|
+
}
|
|
731
|
+
|
|
732
|
+
serializer->release();
|
|
733
|
+
return rb_str_new_cstr(result.c_str());
|
|
734
|
+
} catch (const DOMException& e) {
|
|
735
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
736
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
737
|
+
XMLString::release(&message);
|
|
738
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize inner content: %s", StringValueCStr(rb_error));
|
|
739
|
+
} catch (...) {
|
|
740
|
+
rb_raise(rb_eRuntimeError, "Failed to serialize inner content");
|
|
741
|
+
}
|
|
742
|
+
|
|
743
|
+
return rb_str_new_cstr("");
|
|
744
|
+
}
|
|
745
|
+
|
|
746
|
+
// node.path - returns XPath to the node
|
|
747
|
+
static VALUE node_path(VALUE self) {
|
|
748
|
+
NodeWrapper* wrapper;
|
|
749
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
750
|
+
|
|
751
|
+
if (!wrapper->node) {
|
|
752
|
+
return rb_str_new_cstr("");
|
|
753
|
+
}
|
|
754
|
+
|
|
755
|
+
std::string path = "";
|
|
756
|
+
DOMNode* current = wrapper->node;
|
|
757
|
+
|
|
758
|
+
// Build path from current node to root
|
|
759
|
+
while (current && current->getNodeType() != DOMNode::DOCUMENT_NODE) {
|
|
760
|
+
std::string segment = "";
|
|
761
|
+
|
|
762
|
+
if (current->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
763
|
+
CharStr name(current->getNodeName());
|
|
764
|
+
segment = std::string(name.localForm());
|
|
765
|
+
|
|
766
|
+
// Count position among siblings with same name
|
|
767
|
+
int position = 1;
|
|
768
|
+
DOMNode* sibling = current->getPreviousSibling();
|
|
769
|
+
while (sibling) {
|
|
770
|
+
if (sibling->getNodeType() == DOMNode::ELEMENT_NODE &&
|
|
771
|
+
XMLString::equals(sibling->getNodeName(), current->getNodeName())) {
|
|
772
|
+
position++;
|
|
773
|
+
}
|
|
774
|
+
sibling = sibling->getPreviousSibling();
|
|
775
|
+
}
|
|
776
|
+
|
|
777
|
+
// Add position predicate
|
|
778
|
+
segment += "[" + std::to_string(position) + "]";
|
|
779
|
+
path = "/" + segment + path;
|
|
780
|
+
} else if (current->getNodeType() == DOMNode::TEXT_NODE) {
|
|
781
|
+
// Count position among text node siblings
|
|
782
|
+
int position = 1;
|
|
783
|
+
DOMNode* sibling = current->getPreviousSibling();
|
|
784
|
+
while (sibling) {
|
|
785
|
+
if (sibling->getNodeType() == DOMNode::TEXT_NODE) {
|
|
786
|
+
position++;
|
|
787
|
+
}
|
|
788
|
+
sibling = sibling->getPreviousSibling();
|
|
789
|
+
}
|
|
790
|
+
path = "/text()[" + std::to_string(position) + "]" + path;
|
|
791
|
+
}
|
|
792
|
+
|
|
793
|
+
current = current->getParentNode();
|
|
794
|
+
}
|
|
795
|
+
|
|
796
|
+
return rb_str_new_cstr(path.c_str());
|
|
797
|
+
}
|
|
798
|
+
|
|
799
|
+
// node.blank? - returns true if node has no meaningful content
|
|
800
|
+
static VALUE node_blank_p(VALUE self) {
|
|
801
|
+
NodeWrapper* wrapper;
|
|
802
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
803
|
+
|
|
804
|
+
if (!wrapper->node) {
|
|
805
|
+
return Qtrue;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
// Text nodes are blank if they contain only whitespace
|
|
809
|
+
if (wrapper->node->getNodeType() == DOMNode::TEXT_NODE) {
|
|
810
|
+
const XMLCh* text_content = wrapper->node->getNodeValue();
|
|
811
|
+
if (!text_content) {
|
|
812
|
+
return Qtrue;
|
|
813
|
+
}
|
|
814
|
+
|
|
815
|
+
// Check if text contains only whitespace
|
|
816
|
+
CharStr utf8_text(text_content);
|
|
817
|
+
const char* str = utf8_text.localForm();
|
|
818
|
+
while (*str) {
|
|
819
|
+
if (!isspace((unsigned char)*str)) {
|
|
820
|
+
return Qfalse;
|
|
821
|
+
}
|
|
822
|
+
str++;
|
|
823
|
+
}
|
|
824
|
+
return Qtrue;
|
|
825
|
+
}
|
|
826
|
+
|
|
827
|
+
// Element nodes are blank if they have no child elements and no non-blank text
|
|
828
|
+
if (wrapper->node->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
829
|
+
DOMNodeList* children = wrapper->node->getChildNodes();
|
|
830
|
+
XMLSize_t count = children->getLength();
|
|
831
|
+
|
|
832
|
+
if (count == 0) {
|
|
833
|
+
return Qtrue;
|
|
834
|
+
}
|
|
835
|
+
|
|
836
|
+
// Check if all children are blank text nodes
|
|
837
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
838
|
+
DOMNode* child = children->item(i);
|
|
839
|
+
|
|
840
|
+
// If there's an element child, not blank
|
|
841
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
842
|
+
return Qfalse;
|
|
843
|
+
}
|
|
844
|
+
|
|
845
|
+
// If there's a non-whitespace text node, not blank
|
|
846
|
+
if (child->getNodeType() == DOMNode::TEXT_NODE) {
|
|
847
|
+
const XMLCh* text_content = child->getNodeValue();
|
|
848
|
+
if (text_content) {
|
|
849
|
+
CharStr utf8_text(text_content);
|
|
850
|
+
const char* str = utf8_text.localForm();
|
|
851
|
+
while (*str) {
|
|
852
|
+
if (!isspace((unsigned char)*str)) {
|
|
853
|
+
return Qfalse;
|
|
854
|
+
}
|
|
855
|
+
str++;
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
}
|
|
859
|
+
}
|
|
860
|
+
|
|
861
|
+
return Qtrue;
|
|
862
|
+
}
|
|
863
|
+
|
|
864
|
+
// Other node types are considered blank
|
|
865
|
+
return Qtrue;
|
|
866
|
+
}
|
|
867
|
+
|
|
446
868
|
// node.xpath(path)
|
|
447
869
|
static VALUE node_xpath(VALUE self, VALUE path) {
|
|
448
870
|
NodeWrapper* node_wrapper;
|
|
@@ -550,7 +972,188 @@ static VALUE nodeset_to_a(VALUE self) {
|
|
|
550
972
|
return rb_ary_dup(wrapper->nodes_array);
|
|
551
973
|
}
|
|
552
974
|
|
|
553
|
-
|
|
975
|
+
// Schema.from_document(schema_doc) or Schema.from_string(xsd_string)
|
|
976
|
+
static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
|
|
977
|
+
VALUE schema_source;
|
|
978
|
+
rb_scan_args(argc, argv, "1", &schema_source);
|
|
979
|
+
|
|
980
|
+
// Ensure Xerces is initialized
|
|
981
|
+
if (!xerces_initialized) {
|
|
982
|
+
try {
|
|
983
|
+
XMLPlatformUtils::Initialize();
|
|
984
|
+
xerces_initialized = true;
|
|
985
|
+
} catch (const XMLException& e) {
|
|
986
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
987
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
988
|
+
XMLString::release(&message);
|
|
989
|
+
rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
|
|
990
|
+
}
|
|
991
|
+
}
|
|
992
|
+
|
|
993
|
+
try {
|
|
994
|
+
SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
|
|
995
|
+
wrapper->schemaContent = new std::string();
|
|
996
|
+
|
|
997
|
+
// Convert schema source to string
|
|
998
|
+
std::string xsd_content;
|
|
999
|
+
if (rb_obj_is_kind_of(schema_source, rb_cString)) {
|
|
1000
|
+
xsd_content = std::string(RSTRING_PTR(schema_source), RSTRING_LEN(schema_source));
|
|
1001
|
+
} else {
|
|
1002
|
+
// Assume it's a Document, call to_s
|
|
1003
|
+
VALUE str = rb_funcall(schema_source, rb_intern("to_s"), 0);
|
|
1004
|
+
xsd_content = std::string(RSTRING_PTR(str), RSTRING_LEN(str));
|
|
1005
|
+
}
|
|
1006
|
+
|
|
1007
|
+
// Store the schema content
|
|
1008
|
+
*wrapper->schemaContent = xsd_content;
|
|
1009
|
+
|
|
1010
|
+
// Validate that it's valid XML by trying to parse it
|
|
1011
|
+
XercesDOMParser* schemaParser = new XercesDOMParser();
|
|
1012
|
+
schemaParser->setValidationScheme(XercesDOMParser::Val_Never);
|
|
1013
|
+
schemaParser->setDoNamespaces(true);
|
|
1014
|
+
|
|
1015
|
+
// Parse the schema using MemBufInputSource
|
|
1016
|
+
MemBufInputSource schemaInput(
|
|
1017
|
+
(const XMLByte*)xsd_content.c_str(),
|
|
1018
|
+
xsd_content.length(),
|
|
1019
|
+
"schema"
|
|
1020
|
+
);
|
|
1021
|
+
|
|
1022
|
+
try {
|
|
1023
|
+
schemaParser->parse(schemaInput);
|
|
1024
|
+
} catch (...) {
|
|
1025
|
+
delete schemaParser;
|
|
1026
|
+
delete wrapper->schemaContent;
|
|
1027
|
+
xfree(wrapper);
|
|
1028
|
+
rb_raise(rb_eRuntimeError, "Schema parsing failed: Invalid XML");
|
|
1029
|
+
}
|
|
1030
|
+
|
|
1031
|
+
delete schemaParser;
|
|
1032
|
+
|
|
1033
|
+
VALUE rb_schema = TypedData_Wrap_Struct(klass, &schema_type, wrapper);
|
|
1034
|
+
return rb_schema;
|
|
1035
|
+
|
|
1036
|
+
} catch (const XMLException& e) {
|
|
1037
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1038
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1039
|
+
XMLString::release(&message);
|
|
1040
|
+
rb_raise(rb_eRuntimeError, "XMLException: %s", StringValueCStr(rb_error));
|
|
1041
|
+
} catch (const DOMException& e) {
|
|
1042
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1043
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1044
|
+
XMLString::release(&message);
|
|
1045
|
+
rb_raise(rb_eRuntimeError, "DOMException: %s", StringValueCStr(rb_error));
|
|
1046
|
+
} catch (...) {
|
|
1047
|
+
rb_raise(rb_eRuntimeError, "Unknown exception during schema parsing");
|
|
1048
|
+
}
|
|
1049
|
+
|
|
1050
|
+
return Qnil;
|
|
1051
|
+
}
|
|
1052
|
+
|
|
1053
|
+
// document.validate(schema) - returns array of error messages (empty if valid)
|
|
1054
|
+
static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
1055
|
+
DocumentWrapper* doc_wrapper;
|
|
1056
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, doc_wrapper);
|
|
1057
|
+
|
|
1058
|
+
SchemaWrapper* schema_wrapper;
|
|
1059
|
+
TypedData_Get_Struct(rb_schema, SchemaWrapper, &schema_type, schema_wrapper);
|
|
1060
|
+
|
|
1061
|
+
try {
|
|
1062
|
+
// Serialize the document to UTF-8 for validation
|
|
1063
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XMLString::transcode("LS"));
|
|
1064
|
+
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
1065
|
+
|
|
1066
|
+
// Use a MemBufFormatTarget to get UTF-8 encoded output
|
|
1067
|
+
MemBufFormatTarget target;
|
|
1068
|
+
DOMLSOutput* output = ((DOMImplementationLS*)impl)->createLSOutput();
|
|
1069
|
+
output->setByteStream(&target);
|
|
1070
|
+
|
|
1071
|
+
serializer->write(doc_wrapper->doc, output);
|
|
1072
|
+
|
|
1073
|
+
// Get the UTF-8 content
|
|
1074
|
+
std::string xml_content((const char*)target.getRawBuffer(), target.getLen());
|
|
1075
|
+
|
|
1076
|
+
output->release();
|
|
1077
|
+
serializer->release();
|
|
1078
|
+
|
|
1079
|
+
// Create a validating parser
|
|
1080
|
+
XercesDOMParser* validator = new XercesDOMParser();
|
|
1081
|
+
validator->setValidationScheme(XercesDOMParser::Val_Always);
|
|
1082
|
+
validator->setDoNamespaces(true);
|
|
1083
|
+
validator->setDoSchema(true);
|
|
1084
|
+
validator->setValidationSchemaFullChecking(true);
|
|
1085
|
+
|
|
1086
|
+
ValidationErrorHandler errorHandler;
|
|
1087
|
+
validator->setErrorHandler(&errorHandler);
|
|
1088
|
+
|
|
1089
|
+
// Create a combined input with both the schema and the document
|
|
1090
|
+
// First, we need to add schema location to the document
|
|
1091
|
+
std::string schema_location = "http://example.com/schema";
|
|
1092
|
+
|
|
1093
|
+
// Create memory buffers for both schema and document
|
|
1094
|
+
MemBufInputSource schemaSource(
|
|
1095
|
+
(const XMLByte*)schema_wrapper->schemaContent->c_str(),
|
|
1096
|
+
schema_wrapper->schemaContent->length(),
|
|
1097
|
+
"schema.xsd"
|
|
1098
|
+
);
|
|
1099
|
+
|
|
1100
|
+
// Load the schema grammar
|
|
1101
|
+
try {
|
|
1102
|
+
validator->loadGrammar(schemaSource, Grammar::SchemaGrammarType, true);
|
|
1103
|
+
validator->setExternalNoNamespaceSchemaLocation("schema.xsd");
|
|
1104
|
+
validator->useCachedGrammarInParse(true);
|
|
1105
|
+
} catch (...) {
|
|
1106
|
+
// If grammar loading fails, just note it
|
|
1107
|
+
errorHandler.errors.push_back("Warning: Schema grammar could not be loaded");
|
|
1108
|
+
}
|
|
1109
|
+
|
|
1110
|
+
// Now parse and validate the document
|
|
1111
|
+
MemBufInputSource docSource(
|
|
1112
|
+
(const XMLByte*)xml_content.c_str(),
|
|
1113
|
+
xml_content.length(),
|
|
1114
|
+
"document.xml"
|
|
1115
|
+
);
|
|
1116
|
+
|
|
1117
|
+
try {
|
|
1118
|
+
validator->parse(docSource);
|
|
1119
|
+
} catch (const XMLException& e) {
|
|
1120
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1121
|
+
errorHandler.errors.push_back(std::string("XMLException: ") + message);
|
|
1122
|
+
XMLString::release(&message);
|
|
1123
|
+
} catch (const DOMException& e) {
|
|
1124
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1125
|
+
errorHandler.errors.push_back(std::string("DOMException: ") + message);
|
|
1126
|
+
XMLString::release(&message);
|
|
1127
|
+
} catch (...) {
|
|
1128
|
+
errorHandler.errors.push_back("Unknown parsing exception");
|
|
1129
|
+
}
|
|
1130
|
+
|
|
1131
|
+
delete validator;
|
|
1132
|
+
|
|
1133
|
+
// Return array of error messages
|
|
1134
|
+
VALUE errors_array = rb_ary_new();
|
|
1135
|
+
for (const auto& err : errorHandler.errors) {
|
|
1136
|
+
rb_ary_push(errors_array, rb_str_new_cstr(err.c_str()));
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
return errors_array;
|
|
1140
|
+
|
|
1141
|
+
} catch (const XMLException& e) {
|
|
1142
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1143
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1144
|
+
XMLString::release(&message);
|
|
1145
|
+
rb_raise(rb_eRuntimeError, "XMLException during validation: %s", StringValueCStr(rb_error));
|
|
1146
|
+
} catch (const DOMException& e) {
|
|
1147
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
1148
|
+
VALUE rb_error = rb_str_new_cstr(message);
|
|
1149
|
+
XMLString::release(&message);
|
|
1150
|
+
rb_raise(rb_eRuntimeError, "DOMException during validation: %s", StringValueCStr(rb_error));
|
|
1151
|
+
} catch (...) {
|
|
1152
|
+
rb_raise(rb_eRuntimeError, "Unknown exception during validation");
|
|
1153
|
+
}
|
|
1154
|
+
|
|
1155
|
+
return Qnil;
|
|
1156
|
+
}extern "C" void Init_rxerces(void) {
|
|
554
1157
|
rb_mRXerces = rb_define_module("RXerces");
|
|
555
1158
|
rb_mXML = rb_define_module_under(rb_mRXerces, "XML");
|
|
556
1159
|
|
|
@@ -561,6 +1164,7 @@ extern "C" void Init_rxerces(void) {
|
|
|
561
1164
|
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
562
1165
|
rb_define_method(rb_cDocument, "to_xml", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
563
1166
|
rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
|
|
1167
|
+
rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
|
|
564
1168
|
|
|
565
1169
|
rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
|
|
566
1170
|
rb_undef_alloc_func(rb_cNode);
|
|
@@ -572,6 +1176,17 @@ extern "C" void Init_rxerces(void) {
|
|
|
572
1176
|
rb_define_method(rb_cNode, "[]", RUBY_METHOD_FUNC(node_get_attribute), 1);
|
|
573
1177
|
rb_define_method(rb_cNode, "[]=", RUBY_METHOD_FUNC(node_set_attribute), 2);
|
|
574
1178
|
rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
|
|
1179
|
+
rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
|
|
1180
|
+
rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
|
|
1181
|
+
rb_define_method(rb_cNode, "next_sibling", RUBY_METHOD_FUNC(node_next_sibling), 0);
|
|
1182
|
+
rb_define_method(rb_cNode, "previous_sibling", RUBY_METHOD_FUNC(node_previous_sibling), 0);
|
|
1183
|
+
rb_define_method(rb_cNode, "add_child", RUBY_METHOD_FUNC(node_add_child), 1);
|
|
1184
|
+
rb_define_method(rb_cNode, "remove", RUBY_METHOD_FUNC(node_remove), 0);
|
|
1185
|
+
rb_define_method(rb_cNode, "unlink", RUBY_METHOD_FUNC(node_remove), 0);
|
|
1186
|
+
rb_define_method(rb_cNode, "inner_html", RUBY_METHOD_FUNC(node_inner_html), 0);
|
|
1187
|
+
rb_define_method(rb_cNode, "inner_xml", RUBY_METHOD_FUNC(node_inner_html), 0);
|
|
1188
|
+
rb_define_method(rb_cNode, "path", RUBY_METHOD_FUNC(node_path), 0);
|
|
1189
|
+
rb_define_method(rb_cNode, "blank?", RUBY_METHOD_FUNC(node_blank_p), 0);
|
|
575
1190
|
rb_define_method(rb_cNode, "xpath", RUBY_METHOD_FUNC(node_xpath), 1);
|
|
576
1191
|
|
|
577
1192
|
rb_cElement = rb_define_class_under(rb_mXML, "Element", rb_cNode);
|
|
@@ -588,4 +1203,11 @@ extern "C" void Init_rxerces(void) {
|
|
|
588
1203
|
rb_define_method(rb_cNodeSet, "each", RUBY_METHOD_FUNC(nodeset_each), 0);
|
|
589
1204
|
rb_define_method(rb_cNodeSet, "to_a", RUBY_METHOD_FUNC(nodeset_to_a), 0);
|
|
590
1205
|
rb_include_module(rb_cNodeSet, rb_mEnumerable);
|
|
1206
|
+
|
|
1207
|
+
rb_cSchema = rb_define_class_under(rb_mXML, "Schema", rb_cObject);
|
|
1208
|
+
rb_undef_alloc_func(rb_cSchema);
|
|
1209
|
+
rb_define_singleton_method(rb_cSchema, "from_document", RUBY_METHOD_FUNC(schema_from_document), -1);
|
|
1210
|
+
rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
|
|
1211
|
+
|
|
1212
|
+
rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
|
|
591
1213
|
}
|