rxerces 0.1.0 → 0.3.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -8,7 +8,10 @@
8
8
  #include <xercesc/util/XercesDefs.hpp>
9
9
  #include <xercesc/dom/DOMXPathResult.hpp>
10
10
  #include <xercesc/dom/DOMXPathExpression.hpp>
11
+ #include <xercesc/sax/ErrorHandler.hpp>
12
+ #include <xercesc/sax/SAXParseException.hpp>
11
13
  #include <sstream>
14
+ #include <vector>
12
15
 
13
16
  using namespace xercesc;
14
17
 
@@ -19,6 +22,7 @@ VALUE rb_cNode;
19
22
  VALUE rb_cNodeSet;
20
23
  VALUE rb_cElement;
21
24
  VALUE rb_cText;
25
+ VALUE rb_cSchema;
22
26
 
23
27
  // Xerces initialization flag
24
28
  static bool xerces_initialized = false;
@@ -82,6 +86,39 @@ typedef struct {
82
86
  VALUE nodes_array;
83
87
  } NodeSetWrapper;
84
88
 
89
+ // Wrapper structure for Schema
90
+ typedef struct {
91
+ std::string* schemaContent;
92
+ } SchemaWrapper;
93
+
94
+ // Error handler for schema validation
95
+ class ValidationErrorHandler : public ErrorHandler {
96
+ public:
97
+ std::vector<std::string> errors;
98
+
99
+ void warning(const SAXParseException& e) {
100
+ char* msg = XMLString::transcode(e.getMessage());
101
+ errors.push_back(std::string("Warning: ") + msg);
102
+ XMLString::release(&msg);
103
+ }
104
+
105
+ void error(const SAXParseException& e) {
106
+ char* msg = XMLString::transcode(e.getMessage());
107
+ errors.push_back(std::string("Error: ") + msg);
108
+ XMLString::release(&msg);
109
+ }
110
+
111
+ void fatalError(const SAXParseException& e) {
112
+ char* msg = XMLString::transcode(e.getMessage());
113
+ errors.push_back(std::string("Fatal: ") + msg);
114
+ XMLString::release(&msg);
115
+ }
116
+
117
+ void resetErrors() {
118
+ errors.clear();
119
+ }
120
+ };
121
+
85
122
  // Memory management functions
86
123
  static void document_free(void* ptr) {
87
124
  DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
@@ -109,6 +146,16 @@ static void nodeset_free(void* ptr) {
109
146
  }
110
147
  }
111
148
 
149
+ static void schema_free(void* ptr) {
150
+ SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
151
+ if (wrapper) {
152
+ if (wrapper->schemaContent) {
153
+ delete wrapper->schemaContent;
154
+ }
155
+ xfree(wrapper);
156
+ }
157
+ }
158
+
112
159
  static size_t document_size(const void* ptr) {
113
160
  return sizeof(DocumentWrapper);
114
161
  }
@@ -121,6 +168,10 @@ static size_t nodeset_size(const void* ptr) {
121
168
  return sizeof(NodeSetWrapper);
122
169
  }
123
170
 
171
+ static size_t schema_size(const void* ptr) {
172
+ return sizeof(SchemaWrapper);
173
+ }
174
+
124
175
  static const rb_data_type_t document_type = {
125
176
  "RXerces::XML::Document",
126
177
  {0, document_free, document_size},
@@ -142,6 +193,13 @@ static const rb_data_type_t nodeset_type = {
142
193
  RUBY_TYPED_FREE_IMMEDIATELY
143
194
  };
144
195
 
196
+ static const rb_data_type_t schema_type = {
197
+ "RXerces::XML::Schema",
198
+ {0, schema_free, schema_size},
199
+ 0, 0,
200
+ RUBY_TYPED_FREE_IMMEDIATELY
201
+ };
202
+
145
203
  // Helper to create Ruby Node object from DOMNode
146
204
  static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
147
205
  if (!node) {
@@ -261,6 +319,41 @@ static VALUE document_to_s(VALUE self) {
261
319
  return Qnil;
262
320
  }
263
321
 
322
+ // document.create_element(name)
323
+ static VALUE document_create_element(VALUE self, VALUE name) {
324
+ DocumentWrapper* doc_wrapper;
325
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, doc_wrapper);
326
+
327
+ if (!doc_wrapper->doc) {
328
+ rb_raise(rb_eRuntimeError, "Cannot create element on null document");
329
+ }
330
+
331
+ Check_Type(name, T_STRING);
332
+ const char* element_name = StringValueCStr(name);
333
+
334
+ try {
335
+ XMLCh* element_name_xml = XMLString::transcode(element_name);
336
+ DOMElement* element = doc_wrapper->doc->createElement(element_name_xml);
337
+ XMLString::release(&element_name_xml);
338
+
339
+ if (!element) {
340
+ rb_raise(rb_eRuntimeError, "Failed to create element");
341
+ }
342
+
343
+ return wrap_node(element, self);
344
+
345
+ } catch (const DOMException& e) {
346
+ char* message = XMLString::transcode(e.getMessage());
347
+ VALUE rb_error = rb_str_new_cstr(message);
348
+ XMLString::release(&message);
349
+ rb_raise(rb_eRuntimeError, "Failed to create element: %s", StringValueCStr(rb_error));
350
+ } catch (...) {
351
+ rb_raise(rb_eRuntimeError, "Unknown error creating element");
352
+ }
353
+
354
+ return Qnil;
355
+ }
356
+
264
357
  // document.xpath(path)
265
358
  static VALUE document_xpath(VALUE self, VALUE path) {
266
359
  DocumentWrapper* doc_wrapper;
@@ -443,6 +536,335 @@ static VALUE node_children(VALUE self) {
443
536
  return children;
444
537
  }
445
538
 
539
+ // node.parent
540
+ static VALUE node_parent(VALUE self) {
541
+ NodeWrapper* wrapper;
542
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
543
+
544
+ if (!wrapper->node) {
545
+ return Qnil;
546
+ }
547
+
548
+ DOMNode* parent = wrapper->node->getParentNode();
549
+ if (!parent) {
550
+ return Qnil;
551
+ }
552
+
553
+ VALUE doc_ref = rb_iv_get(self, "@document");
554
+ return wrap_node(parent, doc_ref);
555
+ }
556
+
557
+ // node.attributes - returns hash of all attributes (only for element nodes)
558
+ static VALUE node_attributes(VALUE self) {
559
+ NodeWrapper* wrapper;
560
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
561
+
562
+ if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
563
+ return rb_hash_new();
564
+ }
565
+
566
+ DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
567
+ DOMNamedNodeMap* attributes = element->getAttributes();
568
+
569
+ if (!attributes) {
570
+ return rb_hash_new();
571
+ }
572
+
573
+ VALUE hash = rb_hash_new();
574
+ XMLSize_t length = attributes->getLength();
575
+
576
+ for (XMLSize_t i = 0; i < length; i++) {
577
+ DOMNode* attr = attributes->item(i);
578
+ if (attr) {
579
+ const XMLCh* name = attr->getNodeName();
580
+ const XMLCh* value = attr->getNodeValue();
581
+
582
+ CharStr attr_name(name);
583
+ CharStr attr_value(value);
584
+
585
+ rb_hash_aset(hash,
586
+ rb_str_new_cstr(attr_name.localForm()),
587
+ rb_str_new_cstr(attr_value.localForm()));
588
+ }
589
+ }
590
+
591
+ return hash;
592
+ }
593
+
594
+ // node.next_sibling
595
+ static VALUE node_next_sibling(VALUE self) {
596
+ NodeWrapper* wrapper;
597
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
598
+
599
+ if (!wrapper->node) {
600
+ return Qnil;
601
+ }
602
+
603
+ DOMNode* next = wrapper->node->getNextSibling();
604
+ if (!next) {
605
+ return Qnil;
606
+ }
607
+
608
+ VALUE doc_ref = rb_iv_get(self, "@document");
609
+ return wrap_node(next, doc_ref);
610
+ }
611
+
612
+ // node.previous_sibling
613
+ static VALUE node_previous_sibling(VALUE self) {
614
+ NodeWrapper* wrapper;
615
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
616
+
617
+ if (!wrapper->node) {
618
+ return Qnil;
619
+ }
620
+
621
+ DOMNode* prev = wrapper->node->getPreviousSibling();
622
+ if (!prev) {
623
+ return Qnil;
624
+ }
625
+
626
+ VALUE doc_ref = rb_iv_get(self, "@document");
627
+ return wrap_node(prev, doc_ref);
628
+ }
629
+
630
+ // node.add_child(node_or_string) - adds a child node
631
+ static VALUE node_add_child(VALUE self, VALUE child) {
632
+ NodeWrapper* wrapper;
633
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
634
+
635
+ if (!wrapper->node) {
636
+ rb_raise(rb_eRuntimeError, "Cannot add child to null node");
637
+ }
638
+
639
+ DOMDocument* doc = wrapper->node->getOwnerDocument();
640
+ if (!doc) {
641
+ rb_raise(rb_eRuntimeError, "Node has no owner document");
642
+ }
643
+
644
+ DOMNode* child_node = NULL;
645
+
646
+ // Check if child is a string or a node
647
+ if (TYPE(child) == T_STRING) {
648
+ // Create a text node from the string
649
+ const char* text_str = StringValueCStr(child);
650
+ XMLCh* text_content = XMLString::transcode(text_str);
651
+ child_node = doc->createTextNode(text_content);
652
+ XMLString::release(&text_content);
653
+ } else {
654
+ // Assume it's a Node object
655
+ NodeWrapper* child_wrapper;
656
+ if (rb_obj_is_kind_of(child, rb_cNode)) {
657
+ TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
658
+ child_node = child_wrapper->node;
659
+ } else {
660
+ rb_raise(rb_eTypeError, "Argument must be a String or Node");
661
+ }
662
+ }
663
+
664
+ if (!child_node) {
665
+ rb_raise(rb_eRuntimeError, "Failed to create child node");
666
+ }
667
+
668
+ try {
669
+ wrapper->node->appendChild(child_node);
670
+ } catch (const DOMException& e) {
671
+ char* message = XMLString::transcode(e.getMessage());
672
+ VALUE rb_error = rb_str_new_cstr(message);
673
+ XMLString::release(&message);
674
+ rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
675
+ }
676
+
677
+ return child;
678
+ }
679
+
680
+ // node.remove / node.unlink - removes node from its parent
681
+ static VALUE node_remove(VALUE self) {
682
+ NodeWrapper* wrapper;
683
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
684
+
685
+ if (!wrapper->node) {
686
+ rb_raise(rb_eRuntimeError, "Cannot remove null node");
687
+ }
688
+
689
+ DOMNode* parent = wrapper->node->getParentNode();
690
+ if (!parent) {
691
+ rb_raise(rb_eRuntimeError, "Node has no parent to remove from");
692
+ }
693
+
694
+ try {
695
+ parent->removeChild(wrapper->node);
696
+ } catch (const DOMException& e) {
697
+ char* message = XMLString::transcode(e.getMessage());
698
+ VALUE rb_error = rb_str_new_cstr(message);
699
+ XMLString::release(&message);
700
+ rb_raise(rb_eRuntimeError, "Failed to remove node: %s", StringValueCStr(rb_error));
701
+ }
702
+
703
+ return self;
704
+ }
705
+
706
+ // node.inner_html / node.inner_xml - returns XML content of children
707
+ static VALUE node_inner_html(VALUE self) {
708
+ NodeWrapper* wrapper;
709
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
710
+
711
+ if (!wrapper->node) {
712
+ return rb_str_new_cstr("");
713
+ }
714
+
715
+ try {
716
+ DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XStr("LS").unicodeForm());
717
+ DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
718
+
719
+ // Build a string by serializing each child
720
+ std::string result;
721
+ DOMNodeList* children = wrapper->node->getChildNodes();
722
+ XMLSize_t count = children->getLength();
723
+
724
+ for (XMLSize_t i = 0; i < count; i++) {
725
+ DOMNode* child = children->item(i);
726
+ XMLCh* xml_str = serializer->writeToString(child);
727
+ CharStr utf8_str(xml_str);
728
+ result += utf8_str.localForm();
729
+ XMLString::release(&xml_str);
730
+ }
731
+
732
+ serializer->release();
733
+ return rb_str_new_cstr(result.c_str());
734
+ } catch (const DOMException& e) {
735
+ char* message = XMLString::transcode(e.getMessage());
736
+ VALUE rb_error = rb_str_new_cstr(message);
737
+ XMLString::release(&message);
738
+ rb_raise(rb_eRuntimeError, "Failed to serialize inner content: %s", StringValueCStr(rb_error));
739
+ } catch (...) {
740
+ rb_raise(rb_eRuntimeError, "Failed to serialize inner content");
741
+ }
742
+
743
+ return rb_str_new_cstr("");
744
+ }
745
+
746
+ // node.path - returns XPath to the node
747
+ static VALUE node_path(VALUE self) {
748
+ NodeWrapper* wrapper;
749
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
750
+
751
+ if (!wrapper->node) {
752
+ return rb_str_new_cstr("");
753
+ }
754
+
755
+ std::string path = "";
756
+ DOMNode* current = wrapper->node;
757
+
758
+ // Build path from current node to root
759
+ while (current && current->getNodeType() != DOMNode::DOCUMENT_NODE) {
760
+ std::string segment = "";
761
+
762
+ if (current->getNodeType() == DOMNode::ELEMENT_NODE) {
763
+ CharStr name(current->getNodeName());
764
+ segment = std::string(name.localForm());
765
+
766
+ // Count position among siblings with same name
767
+ int position = 1;
768
+ DOMNode* sibling = current->getPreviousSibling();
769
+ while (sibling) {
770
+ if (sibling->getNodeType() == DOMNode::ELEMENT_NODE &&
771
+ XMLString::equals(sibling->getNodeName(), current->getNodeName())) {
772
+ position++;
773
+ }
774
+ sibling = sibling->getPreviousSibling();
775
+ }
776
+
777
+ // Add position predicate
778
+ segment += "[" + std::to_string(position) + "]";
779
+ path = "/" + segment + path;
780
+ } else if (current->getNodeType() == DOMNode::TEXT_NODE) {
781
+ // Count position among text node siblings
782
+ int position = 1;
783
+ DOMNode* sibling = current->getPreviousSibling();
784
+ while (sibling) {
785
+ if (sibling->getNodeType() == DOMNode::TEXT_NODE) {
786
+ position++;
787
+ }
788
+ sibling = sibling->getPreviousSibling();
789
+ }
790
+ path = "/text()[" + std::to_string(position) + "]" + path;
791
+ }
792
+
793
+ current = current->getParentNode();
794
+ }
795
+
796
+ return rb_str_new_cstr(path.c_str());
797
+ }
798
+
799
+ // node.blank? - returns true if node has no meaningful content
800
+ static VALUE node_blank_p(VALUE self) {
801
+ NodeWrapper* wrapper;
802
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
803
+
804
+ if (!wrapper->node) {
805
+ return Qtrue;
806
+ }
807
+
808
+ // Text nodes are blank if they contain only whitespace
809
+ if (wrapper->node->getNodeType() == DOMNode::TEXT_NODE) {
810
+ const XMLCh* text_content = wrapper->node->getNodeValue();
811
+ if (!text_content) {
812
+ return Qtrue;
813
+ }
814
+
815
+ // Check if text contains only whitespace
816
+ CharStr utf8_text(text_content);
817
+ const char* str = utf8_text.localForm();
818
+ while (*str) {
819
+ if (!isspace((unsigned char)*str)) {
820
+ return Qfalse;
821
+ }
822
+ str++;
823
+ }
824
+ return Qtrue;
825
+ }
826
+
827
+ // Element nodes are blank if they have no child elements and no non-blank text
828
+ if (wrapper->node->getNodeType() == DOMNode::ELEMENT_NODE) {
829
+ DOMNodeList* children = wrapper->node->getChildNodes();
830
+ XMLSize_t count = children->getLength();
831
+
832
+ if (count == 0) {
833
+ return Qtrue;
834
+ }
835
+
836
+ // Check if all children are blank text nodes
837
+ for (XMLSize_t i = 0; i < count; i++) {
838
+ DOMNode* child = children->item(i);
839
+
840
+ // If there's an element child, not blank
841
+ if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
842
+ return Qfalse;
843
+ }
844
+
845
+ // If there's a non-whitespace text node, not blank
846
+ if (child->getNodeType() == DOMNode::TEXT_NODE) {
847
+ const XMLCh* text_content = child->getNodeValue();
848
+ if (text_content) {
849
+ CharStr utf8_text(text_content);
850
+ const char* str = utf8_text.localForm();
851
+ while (*str) {
852
+ if (!isspace((unsigned char)*str)) {
853
+ return Qfalse;
854
+ }
855
+ str++;
856
+ }
857
+ }
858
+ }
859
+ }
860
+
861
+ return Qtrue;
862
+ }
863
+
864
+ // Other node types are considered blank
865
+ return Qtrue;
866
+ }
867
+
446
868
  // node.xpath(path)
447
869
  static VALUE node_xpath(VALUE self, VALUE path) {
448
870
  NodeWrapper* node_wrapper;
@@ -550,7 +972,188 @@ static VALUE nodeset_to_a(VALUE self) {
550
972
  return rb_ary_dup(wrapper->nodes_array);
551
973
  }
552
974
 
553
- extern "C" void Init_rxerces(void) {
975
+ // Schema.from_document(schema_doc) or Schema.from_string(xsd_string)
976
+ static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
977
+ VALUE schema_source;
978
+ rb_scan_args(argc, argv, "1", &schema_source);
979
+
980
+ // Ensure Xerces is initialized
981
+ if (!xerces_initialized) {
982
+ try {
983
+ XMLPlatformUtils::Initialize();
984
+ xerces_initialized = true;
985
+ } catch (const XMLException& e) {
986
+ char* message = XMLString::transcode(e.getMessage());
987
+ VALUE rb_error = rb_str_new_cstr(message);
988
+ XMLString::release(&message);
989
+ rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
990
+ }
991
+ }
992
+
993
+ try {
994
+ SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
995
+ wrapper->schemaContent = new std::string();
996
+
997
+ // Convert schema source to string
998
+ std::string xsd_content;
999
+ if (rb_obj_is_kind_of(schema_source, rb_cString)) {
1000
+ xsd_content = std::string(RSTRING_PTR(schema_source), RSTRING_LEN(schema_source));
1001
+ } else {
1002
+ // Assume it's a Document, call to_s
1003
+ VALUE str = rb_funcall(schema_source, rb_intern("to_s"), 0);
1004
+ xsd_content = std::string(RSTRING_PTR(str), RSTRING_LEN(str));
1005
+ }
1006
+
1007
+ // Store the schema content
1008
+ *wrapper->schemaContent = xsd_content;
1009
+
1010
+ // Validate that it's valid XML by trying to parse it
1011
+ XercesDOMParser* schemaParser = new XercesDOMParser();
1012
+ schemaParser->setValidationScheme(XercesDOMParser::Val_Never);
1013
+ schemaParser->setDoNamespaces(true);
1014
+
1015
+ // Parse the schema using MemBufInputSource
1016
+ MemBufInputSource schemaInput(
1017
+ (const XMLByte*)xsd_content.c_str(),
1018
+ xsd_content.length(),
1019
+ "schema"
1020
+ );
1021
+
1022
+ try {
1023
+ schemaParser->parse(schemaInput);
1024
+ } catch (...) {
1025
+ delete schemaParser;
1026
+ delete wrapper->schemaContent;
1027
+ xfree(wrapper);
1028
+ rb_raise(rb_eRuntimeError, "Schema parsing failed: Invalid XML");
1029
+ }
1030
+
1031
+ delete schemaParser;
1032
+
1033
+ VALUE rb_schema = TypedData_Wrap_Struct(klass, &schema_type, wrapper);
1034
+ return rb_schema;
1035
+
1036
+ } catch (const XMLException& e) {
1037
+ char* message = XMLString::transcode(e.getMessage());
1038
+ VALUE rb_error = rb_str_new_cstr(message);
1039
+ XMLString::release(&message);
1040
+ rb_raise(rb_eRuntimeError, "XMLException: %s", StringValueCStr(rb_error));
1041
+ } catch (const DOMException& e) {
1042
+ char* message = XMLString::transcode(e.getMessage());
1043
+ VALUE rb_error = rb_str_new_cstr(message);
1044
+ XMLString::release(&message);
1045
+ rb_raise(rb_eRuntimeError, "DOMException: %s", StringValueCStr(rb_error));
1046
+ } catch (...) {
1047
+ rb_raise(rb_eRuntimeError, "Unknown exception during schema parsing");
1048
+ }
1049
+
1050
+ return Qnil;
1051
+ }
1052
+
1053
+ // document.validate(schema) - returns array of error messages (empty if valid)
1054
+ static VALUE document_validate(VALUE self, VALUE rb_schema) {
1055
+ DocumentWrapper* doc_wrapper;
1056
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, doc_wrapper);
1057
+
1058
+ SchemaWrapper* schema_wrapper;
1059
+ TypedData_Get_Struct(rb_schema, SchemaWrapper, &schema_type, schema_wrapper);
1060
+
1061
+ try {
1062
+ // Serialize the document to UTF-8 for validation
1063
+ DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XMLString::transcode("LS"));
1064
+ DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
1065
+
1066
+ // Use a MemBufFormatTarget to get UTF-8 encoded output
1067
+ MemBufFormatTarget target;
1068
+ DOMLSOutput* output = ((DOMImplementationLS*)impl)->createLSOutput();
1069
+ output->setByteStream(&target);
1070
+
1071
+ serializer->write(doc_wrapper->doc, output);
1072
+
1073
+ // Get the UTF-8 content
1074
+ std::string xml_content((const char*)target.getRawBuffer(), target.getLen());
1075
+
1076
+ output->release();
1077
+ serializer->release();
1078
+
1079
+ // Create a validating parser
1080
+ XercesDOMParser* validator = new XercesDOMParser();
1081
+ validator->setValidationScheme(XercesDOMParser::Val_Always);
1082
+ validator->setDoNamespaces(true);
1083
+ validator->setDoSchema(true);
1084
+ validator->setValidationSchemaFullChecking(true);
1085
+
1086
+ ValidationErrorHandler errorHandler;
1087
+ validator->setErrorHandler(&errorHandler);
1088
+
1089
+ // Create a combined input with both the schema and the document
1090
+ // First, we need to add schema location to the document
1091
+ std::string schema_location = "http://example.com/schema";
1092
+
1093
+ // Create memory buffers for both schema and document
1094
+ MemBufInputSource schemaSource(
1095
+ (const XMLByte*)schema_wrapper->schemaContent->c_str(),
1096
+ schema_wrapper->schemaContent->length(),
1097
+ "schema.xsd"
1098
+ );
1099
+
1100
+ // Load the schema grammar
1101
+ try {
1102
+ validator->loadGrammar(schemaSource, Grammar::SchemaGrammarType, true);
1103
+ validator->setExternalNoNamespaceSchemaLocation("schema.xsd");
1104
+ validator->useCachedGrammarInParse(true);
1105
+ } catch (...) {
1106
+ // If grammar loading fails, just note it
1107
+ errorHandler.errors.push_back("Warning: Schema grammar could not be loaded");
1108
+ }
1109
+
1110
+ // Now parse and validate the document
1111
+ MemBufInputSource docSource(
1112
+ (const XMLByte*)xml_content.c_str(),
1113
+ xml_content.length(),
1114
+ "document.xml"
1115
+ );
1116
+
1117
+ try {
1118
+ validator->parse(docSource);
1119
+ } catch (const XMLException& e) {
1120
+ char* message = XMLString::transcode(e.getMessage());
1121
+ errorHandler.errors.push_back(std::string("XMLException: ") + message);
1122
+ XMLString::release(&message);
1123
+ } catch (const DOMException& e) {
1124
+ char* message = XMLString::transcode(e.getMessage());
1125
+ errorHandler.errors.push_back(std::string("DOMException: ") + message);
1126
+ XMLString::release(&message);
1127
+ } catch (...) {
1128
+ errorHandler.errors.push_back("Unknown parsing exception");
1129
+ }
1130
+
1131
+ delete validator;
1132
+
1133
+ // Return array of error messages
1134
+ VALUE errors_array = rb_ary_new();
1135
+ for (const auto& err : errorHandler.errors) {
1136
+ rb_ary_push(errors_array, rb_str_new_cstr(err.c_str()));
1137
+ }
1138
+
1139
+ return errors_array;
1140
+
1141
+ } catch (const XMLException& e) {
1142
+ char* message = XMLString::transcode(e.getMessage());
1143
+ VALUE rb_error = rb_str_new_cstr(message);
1144
+ XMLString::release(&message);
1145
+ rb_raise(rb_eRuntimeError, "XMLException during validation: %s", StringValueCStr(rb_error));
1146
+ } catch (const DOMException& e) {
1147
+ char* message = XMLString::transcode(e.getMessage());
1148
+ VALUE rb_error = rb_str_new_cstr(message);
1149
+ XMLString::release(&message);
1150
+ rb_raise(rb_eRuntimeError, "DOMException during validation: %s", StringValueCStr(rb_error));
1151
+ } catch (...) {
1152
+ rb_raise(rb_eRuntimeError, "Unknown exception during validation");
1153
+ }
1154
+
1155
+ return Qnil;
1156
+ }extern "C" void Init_rxerces(void) {
554
1157
  rb_mRXerces = rb_define_module("RXerces");
555
1158
  rb_mXML = rb_define_module_under(rb_mRXerces, "XML");
556
1159
 
@@ -561,6 +1164,7 @@ extern "C" void Init_rxerces(void) {
561
1164
  rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
562
1165
  rb_define_method(rb_cDocument, "to_xml", RUBY_METHOD_FUNC(document_to_s), 0);
563
1166
  rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
1167
+ rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
564
1168
 
565
1169
  rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
566
1170
  rb_undef_alloc_func(rb_cNode);
@@ -572,6 +1176,17 @@ extern "C" void Init_rxerces(void) {
572
1176
  rb_define_method(rb_cNode, "[]", RUBY_METHOD_FUNC(node_get_attribute), 1);
573
1177
  rb_define_method(rb_cNode, "[]=", RUBY_METHOD_FUNC(node_set_attribute), 2);
574
1178
  rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
1179
+ rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
1180
+ rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
1181
+ rb_define_method(rb_cNode, "next_sibling", RUBY_METHOD_FUNC(node_next_sibling), 0);
1182
+ rb_define_method(rb_cNode, "previous_sibling", RUBY_METHOD_FUNC(node_previous_sibling), 0);
1183
+ rb_define_method(rb_cNode, "add_child", RUBY_METHOD_FUNC(node_add_child), 1);
1184
+ rb_define_method(rb_cNode, "remove", RUBY_METHOD_FUNC(node_remove), 0);
1185
+ rb_define_method(rb_cNode, "unlink", RUBY_METHOD_FUNC(node_remove), 0);
1186
+ rb_define_method(rb_cNode, "inner_html", RUBY_METHOD_FUNC(node_inner_html), 0);
1187
+ rb_define_method(rb_cNode, "inner_xml", RUBY_METHOD_FUNC(node_inner_html), 0);
1188
+ rb_define_method(rb_cNode, "path", RUBY_METHOD_FUNC(node_path), 0);
1189
+ rb_define_method(rb_cNode, "blank?", RUBY_METHOD_FUNC(node_blank_p), 0);
575
1190
  rb_define_method(rb_cNode, "xpath", RUBY_METHOD_FUNC(node_xpath), 1);
576
1191
 
577
1192
  rb_cElement = rb_define_class_under(rb_mXML, "Element", rb_cNode);
@@ -588,4 +1203,11 @@ extern "C" void Init_rxerces(void) {
588
1203
  rb_define_method(rb_cNodeSet, "each", RUBY_METHOD_FUNC(nodeset_each), 0);
589
1204
  rb_define_method(rb_cNodeSet, "to_a", RUBY_METHOD_FUNC(nodeset_to_a), 0);
590
1205
  rb_include_module(rb_cNodeSet, rb_mEnumerable);
1206
+
1207
+ rb_cSchema = rb_define_class_under(rb_mXML, "Schema", rb_cObject);
1208
+ rb_undef_alloc_func(rb_cSchema);
1209
+ rb_define_singleton_method(rb_cSchema, "from_document", RUBY_METHOD_FUNC(schema_from_document), -1);
1210
+ rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
1211
+
1212
+ rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
591
1213
  }