rxerces 0.5.0 → 0.6.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,41 @@ static VALUE node_css(VALUE self, VALUE selector);
57
57
  static VALUE node_xpath(VALUE self, VALUE path);
58
58
  static VALUE document_xpath(VALUE self, VALUE path);
59
59
 
60
+ // Initialize Xerces (and Xalan if available) exactly once
61
+ static void ensure_xerces_initialized() {
62
+ if (xerces_initialized) {
63
+ return;
64
+ }
65
+
66
+ try {
67
+ XMLPlatformUtils::Initialize();
68
+ #ifdef HAVE_XALAN
69
+ XPathEvaluator::initialize();
70
+ xalan_initialized = true;
71
+ #endif
72
+ xerces_initialized = true;
73
+ } catch (const XMLException& e) {
74
+ char* message = XMLString::transcode(e.getMessage());
75
+ std::string error_msg = std::string("Xerces initialization failed: ") + message;
76
+ XMLString::release(&message);
77
+ rb_raise(rb_eRuntimeError, "%s", error_msg.c_str());
78
+ }
79
+ }
80
+
81
+ // Cleanup function called at exit
82
+ static void cleanup_xerces() {
83
+ #ifdef HAVE_XALAN
84
+ if (xalan_initialized) {
85
+ XPathEvaluator::terminate();
86
+ xalan_initialized = false;
87
+ }
88
+ #endif
89
+ if (xerces_initialized) {
90
+ XMLPlatformUtils::Terminate();
91
+ xerces_initialized = false;
92
+ }
93
+ }
94
+
60
95
  // Helper class to manage XMLCh strings
61
96
  class XStr {
62
97
  public:
@@ -103,6 +138,7 @@ private:
103
138
  typedef struct {
104
139
  DOMDocument* doc;
105
140
  XercesDOMParser* parser;
141
+ std::vector<std::string>* parse_errors;
106
142
  } DocumentWrapper;
107
143
 
108
144
  // Wrapper structure for DOMNode
@@ -128,19 +164,34 @@ public:
128
164
 
129
165
  void warning(const SAXParseException& e) {
130
166
  char* msg = XMLString::transcode(e.getMessage());
131
- errors.push_back(std::string("Warning: ") + msg);
167
+ char buffer[512];
168
+ snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
169
+ (unsigned long)e.getLineNumber(),
170
+ (unsigned long)e.getColumnNumber(),
171
+ msg);
172
+ errors.push_back(buffer);
132
173
  XMLString::release(&msg);
133
174
  }
134
175
 
135
176
  void error(const SAXParseException& e) {
136
177
  char* msg = XMLString::transcode(e.getMessage());
137
- errors.push_back(std::string("Error: ") + msg);
178
+ char buffer[512];
179
+ snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
180
+ (unsigned long)e.getLineNumber(),
181
+ (unsigned long)e.getColumnNumber(),
182
+ msg);
183
+ errors.push_back(buffer);
138
184
  XMLString::release(&msg);
139
185
  }
140
186
 
141
187
  void fatalError(const SAXParseException& e) {
142
188
  char* msg = XMLString::transcode(e.getMessage());
143
- errors.push_back(std::string("Fatal: ") + msg);
189
+ char buffer[512];
190
+ snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
191
+ (unsigned long)e.getLineNumber(),
192
+ (unsigned long)e.getColumnNumber(),
193
+ msg);
194
+ errors.push_back(buffer);
144
195
  XMLString::release(&msg);
145
196
  }
146
197
 
@@ -149,6 +200,55 @@ public:
149
200
  }
150
201
  };
151
202
 
203
+ // Error handler for parsing - stores errors but doesn't throw
204
+ class ParseErrorHandler : public ErrorHandler {
205
+ public:
206
+ std::vector<std::string>* errors;
207
+ bool has_fatal;
208
+
209
+ ParseErrorHandler(std::vector<std::string>* error_vec)
210
+ : errors(error_vec), has_fatal(false) {}
211
+
212
+ void warning(const SAXParseException& e) {
213
+ char* msg = XMLString::transcode(e.getMessage());
214
+ char buffer[512];
215
+ snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
216
+ (unsigned long)e.getLineNumber(),
217
+ (unsigned long)e.getColumnNumber(),
218
+ msg);
219
+ errors->push_back(buffer);
220
+ XMLString::release(&msg);
221
+ }
222
+
223
+ void error(const SAXParseException& e) {
224
+ char* msg = XMLString::transcode(e.getMessage());
225
+ char buffer[512];
226
+ snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
227
+ (unsigned long)e.getLineNumber(),
228
+ (unsigned long)e.getColumnNumber(),
229
+ msg);
230
+ errors->push_back(buffer);
231
+ XMLString::release(&msg);
232
+ }
233
+
234
+ void fatalError(const SAXParseException& e) {
235
+ has_fatal = true;
236
+ char* msg = XMLString::transcode(e.getMessage());
237
+ char buffer[512];
238
+ snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
239
+ (unsigned long)e.getLineNumber(),
240
+ (unsigned long)e.getColumnNumber(),
241
+ msg);
242
+ errors->push_back(buffer);
243
+ XMLString::release(&msg);
244
+ }
245
+
246
+ void resetErrors() {
247
+ errors->clear();
248
+ has_fatal = false;
249
+ }
250
+ };
251
+
152
252
  // Memory management functions
153
253
  static void document_free(void* ptr) {
154
254
  DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
@@ -156,6 +256,9 @@ static void document_free(void* ptr) {
156
256
  if (wrapper->parser) {
157
257
  delete wrapper->parser;
158
258
  }
259
+ if (wrapper->parse_errors) {
260
+ delete wrapper->parse_errors;
261
+ }
159
262
  // Document is owned by parser, so don't delete it separately
160
263
  xfree(wrapper);
161
264
  }
@@ -169,6 +272,13 @@ static void node_free(void* ptr) {
169
272
  }
170
273
  }
171
274
 
275
+ static void node_mark(void* ptr) {
276
+ NodeWrapper* wrapper = (NodeWrapper*)ptr;
277
+ if (wrapper) {
278
+ rb_gc_mark(wrapper->doc_ref);
279
+ }
280
+ }
281
+
172
282
  static void nodeset_free(void* ptr) {
173
283
  NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
174
284
  if (wrapper) {
@@ -176,6 +286,13 @@ static void nodeset_free(void* ptr) {
176
286
  }
177
287
  }
178
288
 
289
+ static void nodeset_mark(void* ptr) {
290
+ NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
291
+ if (wrapper) {
292
+ rb_gc_mark(wrapper->nodes_array);
293
+ }
294
+ }
295
+
179
296
  static void schema_free(void* ptr) {
180
297
  SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
181
298
  if (wrapper) {
@@ -211,14 +328,14 @@ static const rb_data_type_t document_type = {
211
328
 
212
329
  static const rb_data_type_t node_type = {
213
330
  "RXerces::XML::Node",
214
- {0, node_free, node_size},
331
+ {node_mark, node_free, node_size},
215
332
  0, 0,
216
333
  RUBY_TYPED_FREE_IMMEDIATELY
217
334
  };
218
335
 
219
336
  static const rb_data_type_t nodeset_type = {
220
337
  "RXerces::XML::NodeSet",
221
- {0, nodeset_free, nodeset_size},
338
+ {nodeset_mark, nodeset_free, nodeset_size},
222
339
  0, 0,
223
340
  RUBY_TYPED_FREE_IMMEDIATELY
224
341
  };
@@ -254,22 +371,12 @@ static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
254
371
  break;
255
372
  }
256
373
 
257
- // Keep reference to document to prevent GC
258
- rb_iv_set(rb_node, "@document", doc_ref);
259
-
260
374
  return rb_node;
261
375
  }
262
376
 
263
377
  // RXerces::XML::Document.parse(string)
264
378
  static VALUE document_parse(VALUE klass, VALUE str) {
265
- if (!xerces_initialized) {
266
- try {
267
- XMLPlatformUtils::Initialize();
268
- xerces_initialized = true;
269
- } catch (const XMLException& e) {
270
- rb_raise(rb_eRuntimeError, "Xerces initialization failed");
271
- }
272
- }
379
+ ensure_xerces_initialized();
273
380
 
274
381
  Check_Type(str, T_STRING);
275
382
  const char* xml_str = StringValueCStr(str);
@@ -279,6 +386,11 @@ static VALUE document_parse(VALUE klass, VALUE str) {
279
386
  parser->setDoNamespaces(true);
280
387
  parser->setDoSchema(false);
281
388
 
389
+ // Set up error handler to capture parse errors
390
+ std::vector<std::string>* parse_errors = new std::vector<std::string>();
391
+ ParseErrorHandler error_handler(parse_errors);
392
+ parser->setErrorHandler(&error_handler);
393
+
282
394
  try {
283
395
  MemBufInputSource input((const XMLByte*)xml_str, strlen(xml_str), "memory");
284
396
  parser->parse(input);
@@ -288,18 +400,33 @@ static VALUE document_parse(VALUE klass, VALUE str) {
288
400
  DocumentWrapper* wrapper = ALLOC(DocumentWrapper);
289
401
  wrapper->doc = doc;
290
402
  wrapper->parser = parser;
403
+ wrapper->parse_errors = parse_errors;
291
404
 
292
405
  VALUE rb_doc = TypedData_Wrap_Struct(rb_cDocument, &document_type, wrapper);
406
+
407
+ // If there were fatal errors, raise an exception with details
408
+ if (error_handler.has_fatal && !parse_errors->empty()) {
409
+ std::string all_errors;
410
+ for (const auto& err : *parse_errors) {
411
+ if (!all_errors.empty()) all_errors += "\n";
412
+ all_errors += err;
413
+ }
414
+ rb_raise(rb_eRuntimeError, "XML parsing failed:\n%s", all_errors.c_str());
415
+ }
416
+
293
417
  return rb_doc;
294
418
  } catch (const XMLException& e) {
295
419
  CharStr message(e.getMessage());
420
+ delete parse_errors;
296
421
  delete parser;
297
422
  rb_raise(rb_eRuntimeError, "XML parsing error: %s", message.localForm());
298
423
  } catch (const DOMException& e) {
299
424
  CharStr message(e.getMessage());
425
+ delete parse_errors;
300
426
  delete parser;
301
427
  rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
302
428
  } catch (...) {
429
+ delete parse_errors;
303
430
  delete parser;
304
431
  rb_raise(rb_eRuntimeError, "Unknown XML parsing error");
305
432
  }
@@ -307,6 +434,22 @@ static VALUE document_parse(VALUE klass, VALUE str) {
307
434
  return Qnil;
308
435
  }
309
436
 
437
+ // document.errors - returns array of parse errors (warnings and errors)
438
+ static VALUE document_errors(VALUE self) {
439
+ DocumentWrapper* wrapper;
440
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
441
+
442
+ VALUE errors_array = rb_ary_new();
443
+
444
+ if (wrapper->parse_errors) {
445
+ for (const auto& error : *wrapper->parse_errors) {
446
+ rb_ary_push(errors_array, rb_str_new2(error.c_str()));
447
+ }
448
+ }
449
+
450
+ return errors_array;
451
+ }
452
+
310
453
  // document.root
311
454
  static VALUE document_root(VALUE self) {
312
455
  DocumentWrapper* wrapper;
@@ -465,17 +608,103 @@ static VALUE document_create_element(VALUE self, VALUE name) {
465
608
  return Qnil;
466
609
  }
467
610
 
611
+ // document.children - returns all children (elements, text, comments, etc.)
612
+ static VALUE document_children(VALUE self) {
613
+ DocumentWrapper* wrapper;
614
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
615
+
616
+ VALUE children = rb_ary_new();
617
+
618
+ if (!wrapper->doc) {
619
+ return children;
620
+ }
621
+
622
+ DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
623
+ XMLSize_t count = child_nodes->getLength();
624
+
625
+ for (XMLSize_t i = 0; i < count; i++) {
626
+ DOMNode* child = child_nodes->item(i);
627
+ rb_ary_push(children, wrap_node(child, self));
628
+ }
629
+
630
+ return children;
631
+ }
632
+
633
+ // document.element_children - returns only element children (no text nodes, comments, etc.)
634
+ static VALUE document_element_children(VALUE self) {
635
+ DocumentWrapper* wrapper;
636
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
637
+
638
+ VALUE children = rb_ary_new();
639
+
640
+ if (!wrapper->doc) {
641
+ return children;
642
+ }
643
+
644
+ DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
645
+ XMLSize_t count = child_nodes->getLength();
646
+
647
+ for (XMLSize_t i = 0; i < count; i++) {
648
+ DOMNode* child = child_nodes->item(i);
649
+ if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
650
+ rb_ary_push(children, wrap_node(child, self));
651
+ }
652
+ }
653
+
654
+ return children;
655
+ }
656
+
657
+ // document.first_element_child - returns first element child
658
+ static VALUE document_first_element_child(VALUE self) {
659
+ DocumentWrapper* wrapper;
660
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
661
+
662
+ if (!wrapper->doc) {
663
+ return Qnil;
664
+ }
665
+
666
+ DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
667
+ XMLSize_t count = child_nodes->getLength();
668
+
669
+ for (XMLSize_t i = 0; i < count; i++) {
670
+ DOMNode* child = child_nodes->item(i);
671
+ if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
672
+ return wrap_node(child, self);
673
+ }
674
+ }
675
+
676
+ return Qnil;
677
+ }
678
+
679
+ // document.last_element_child - returns last element child
680
+ static VALUE document_last_element_child(VALUE self) {
681
+ DocumentWrapper* wrapper;
682
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
683
+
684
+ if (!wrapper->doc) {
685
+ return Qnil;
686
+ }
687
+
688
+ DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
689
+ XMLSize_t count = child_nodes->getLength();
690
+
691
+ // Search backwards for last element
692
+ for (XMLSize_t i = count; i > 0; i--) {
693
+ DOMNode* child = child_nodes->item(i - 1);
694
+ if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
695
+ return wrap_node(child, self);
696
+ }
697
+ }
698
+
699
+ return Qnil;
700
+ }
701
+
468
702
  #ifdef HAVE_XALAN
469
703
  // Helper function to execute XPath using Xalan for full XPath 1.0 support
470
704
  static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
471
- try {
472
- // Initialize Xalan if needed
473
- if (!xalan_initialized) {
474
- XPathEvaluator::initialize();
475
- XMLPlatformUtils::Initialize();
476
- xalan_initialized = true;
477
- }
705
+ ensure_xerces_initialized();
478
706
 
707
+ try {
479
708
  // Get the document
480
709
  DOMDocument* domDoc = context_node->getOwnerDocument();
481
710
  if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
@@ -600,8 +829,9 @@ static VALUE document_xpath(VALUE self, VALUE path) {
600
829
  }
601
830
 
602
831
  DOMXPathNSResolver* resolver = doc_wrapper->doc->createNSResolver(root);
832
+ XStr xpath_xstr(xpath_str);
603
833
  DOMXPathExpression* expression = doc_wrapper->doc->createExpression(
604
- XStr(xpath_str).unicodeForm(), resolver);
834
+ xpath_xstr.unicodeForm(), resolver);
605
835
 
606
836
  DOMXPathResult* result = expression->evaluate(
607
837
  doc_wrapper->doc->getDocumentElement(),
@@ -643,6 +873,19 @@ static VALUE document_xpath(VALUE self, VALUE path) {
643
873
  #endif
644
874
  }
645
875
 
876
+ // document.at_xpath(path) - returns first matching node or nil
877
+ static VALUE document_at_xpath(VALUE self, VALUE path) {
878
+ VALUE nodeset = document_xpath(self, path);
879
+ NodeSetWrapper* wrapper;
880
+ TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
881
+
882
+ if (RARRAY_LEN(wrapper->nodes_array) == 0) {
883
+ return Qnil;
884
+ }
885
+
886
+ return rb_ary_entry(wrapper->nodes_array, 0);
887
+ }
888
+
646
889
  // document.css(selector) - Convert CSS to XPath and execute
647
890
  static VALUE document_css(VALUE self, VALUE selector) {
648
891
  Check_Type(selector, T_STRING);
@@ -840,7 +1083,8 @@ static VALUE node_text_set(VALUE self, VALUE text) {
840
1083
  Check_Type(text, T_STRING);
841
1084
  const char* text_str = StringValueCStr(text);
842
1085
 
843
- wrapper->node->setTextContent(XStr(text_str).unicodeForm());
1086
+ XStr text_xstr(text_str);
1087
+ wrapper->node->setTextContent(text_xstr.unicodeForm());
844
1088
 
845
1089
  return text;
846
1090
  }
@@ -858,7 +1102,8 @@ static VALUE node_get_attribute(VALUE self, VALUE attr_name) {
858
1102
  const char* attr_str = StringValueCStr(attr_name);
859
1103
 
860
1104
  DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
861
- const XMLCh* value = element->getAttribute(XStr(attr_str).unicodeForm());
1105
+ XStr attr_xstr(attr_str);
1106
+ const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
862
1107
 
863
1108
  if (!value || XMLString::stringLen(value) == 0) {
864
1109
  return Qnil;
@@ -884,7 +1129,9 @@ static VALUE node_set_attribute(VALUE self, VALUE attr_name, VALUE attr_value) {
884
1129
  const char* value_str = StringValueCStr(attr_value);
885
1130
 
886
1131
  DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
887
- element->setAttribute(XStr(attr_str).unicodeForm(), XStr(value_str).unicodeForm());
1132
+ XStr attr_xstr(attr_str);
1133
+ XStr value_xstr(value_str);
1134
+ element->setAttribute(attr_xstr.unicodeForm(), value_xstr.unicodeForm());
888
1135
 
889
1136
  return attr_value;
890
1137
  }
@@ -902,7 +1149,8 @@ static VALUE node_has_attribute_p(VALUE self, VALUE attr_name) {
902
1149
  const char* attr_str = StringValueCStr(attr_name);
903
1150
 
904
1151
  DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
905
- const XMLCh* value = element->getAttribute(XStr(attr_str).unicodeForm());
1152
+ XStr attr_xstr(attr_str);
1153
+ const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
906
1154
 
907
1155
  if (!value || XMLString::stringLen(value) == 0) {
908
1156
  return Qfalse;
@@ -916,13 +1164,14 @@ static VALUE node_children(VALUE self) {
916
1164
  NodeWrapper* wrapper;
917
1165
  TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
918
1166
 
919
- VALUE doc_ref = rb_iv_get(self, "@document");
920
1167
  VALUE children = rb_ary_new();
921
1168
 
922
1169
  if (!wrapper->node) {
923
1170
  return children;
924
1171
  }
925
1172
 
1173
+ VALUE doc_ref = wrapper->doc_ref;
1174
+
926
1175
  DOMNodeList* child_nodes = wrapper->node->getChildNodes();
927
1176
  XMLSize_t count = child_nodes->getLength();
928
1177
 
@@ -939,13 +1188,13 @@ static VALUE node_element_children(VALUE self) {
939
1188
  NodeWrapper* wrapper;
940
1189
  TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
941
1190
 
942
- VALUE doc_ref = rb_iv_get(self, "@document");
943
1191
  VALUE children = rb_ary_new();
944
1192
 
945
1193
  if (!wrapper->node) {
946
1194
  return children;
947
1195
  }
948
1196
 
1197
+ VALUE doc_ref = wrapper->doc_ref;
949
1198
  DOMNodeList* child_nodes = wrapper->node->getChildNodes();
950
1199
  XMLSize_t count = child_nodes->getLength();
951
1200
 
@@ -959,6 +1208,65 @@ static VALUE node_element_children(VALUE self) {
959
1208
  return children;
960
1209
  }
961
1210
 
1211
+ // node.first_element_child - returns first element child
1212
+ static VALUE node_first_element_child(VALUE self) {
1213
+ NodeWrapper* wrapper;
1214
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
1215
+
1216
+ if (!wrapper->node) {
1217
+ return Qnil;
1218
+ }
1219
+
1220
+ VALUE doc_ref = wrapper->doc_ref;
1221
+ DOMNodeList* child_nodes = wrapper->node->getChildNodes();
1222
+ XMLSize_t count = child_nodes->getLength();
1223
+
1224
+ for (XMLSize_t i = 0; i < count; i++) {
1225
+ DOMNode* child = child_nodes->item(i);
1226
+ if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
1227
+ return wrap_node(child, doc_ref);
1228
+ }
1229
+ }
1230
+
1231
+ return Qnil;
1232
+ }
1233
+
1234
+ // node.last_element_child - returns last element child
1235
+ static VALUE node_last_element_child(VALUE self) {
1236
+ NodeWrapper* wrapper;
1237
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
1238
+
1239
+ if (!wrapper->node) {
1240
+ return Qnil;
1241
+ }
1242
+
1243
+ VALUE doc_ref = wrapper->doc_ref;
1244
+ DOMNodeList* child_nodes = wrapper->node->getChildNodes();
1245
+ XMLSize_t count = child_nodes->getLength();
1246
+
1247
+ // Search backwards for last element
1248
+ for (XMLSize_t i = count; i > 0; i--) {
1249
+ DOMNode* child = child_nodes->item(i - 1);
1250
+ if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
1251
+ return wrap_node(child, doc_ref);
1252
+ }
1253
+ }
1254
+
1255
+ return Qnil;
1256
+ }
1257
+
1258
+ // node.document - returns the document that owns this node
1259
+ static VALUE node_document(VALUE self) {
1260
+ NodeWrapper* wrapper;
1261
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
1262
+
1263
+ if (!wrapper->node) {
1264
+ return Qnil;
1265
+ }
1266
+
1267
+ return wrapper->doc_ref;
1268
+ }
1269
+
962
1270
  // node.parent
963
1271
  static VALUE node_parent(VALUE self) {
964
1272
  NodeWrapper* wrapper;
@@ -973,7 +1281,7 @@ static VALUE node_parent(VALUE self) {
973
1281
  return Qnil;
974
1282
  }
975
1283
 
976
- VALUE doc_ref = rb_iv_get(self, "@document");
1284
+ VALUE doc_ref = wrapper->doc_ref;
977
1285
  return wrap_node(parent, doc_ref);
978
1286
  }
979
1287
 
@@ -991,7 +1299,7 @@ static VALUE node_ancestors(int argc, VALUE* argv, VALUE self) {
991
1299
  return ancestors;
992
1300
  }
993
1301
 
994
- VALUE doc_ref = rb_iv_get(self, "@document");
1302
+ VALUE doc_ref = wrapper->doc_ref;
995
1303
  DOMNode* current = wrapper->node->getParentNode();
996
1304
 
997
1305
  // Walk up the tree, collecting all ancestors
@@ -1100,7 +1408,7 @@ static VALUE node_next_sibling(VALUE self) {
1100
1408
  return Qnil;
1101
1409
  }
1102
1410
 
1103
- VALUE doc_ref = rb_iv_get(self, "@document");
1411
+ VALUE doc_ref = wrapper->doc_ref;
1104
1412
  return wrap_node(next, doc_ref);
1105
1413
  }
1106
1414
 
@@ -1118,7 +1426,7 @@ static VALUE node_previous_sibling(VALUE self) {
1118
1426
  return Qnil;
1119
1427
  }
1120
1428
 
1121
- VALUE doc_ref = rb_iv_get(self, "@document");
1429
+ VALUE doc_ref = wrapper->doc_ref;
1122
1430
  return wrap_node(prev, doc_ref);
1123
1431
  }
1124
1432
 
@@ -1131,7 +1439,7 @@ static VALUE node_next_element(VALUE self) {
1131
1439
  return Qnil;
1132
1440
  }
1133
1441
 
1134
- VALUE doc_ref = rb_iv_get(self, "@document");
1442
+ VALUE doc_ref = wrapper->doc_ref;
1135
1443
  DOMNode* next = wrapper->node->getNextSibling();
1136
1444
 
1137
1445
  // Skip non-element nodes
@@ -1155,7 +1463,7 @@ static VALUE node_previous_element(VALUE self) {
1155
1463
  return Qnil;
1156
1464
  }
1157
1465
 
1158
- VALUE doc_ref = rb_iv_get(self, "@document");
1466
+ VALUE doc_ref = wrapper->doc_ref;
1159
1467
  DOMNode* prev = wrapper->node->getPreviousSibling();
1160
1468
 
1161
1469
  // Skip non-element nodes
@@ -1185,6 +1493,7 @@ static VALUE node_add_child(VALUE self, VALUE child) {
1185
1493
  }
1186
1494
 
1187
1495
  DOMNode* child_node = NULL;
1496
+ bool needs_import = false;
1188
1497
 
1189
1498
  // Check if child is a string or a node
1190
1499
  if (TYPE(child) == T_STRING) {
@@ -1199,6 +1508,13 @@ static VALUE node_add_child(VALUE self, VALUE child) {
1199
1508
  if (rb_obj_is_kind_of(child, rb_cNode)) {
1200
1509
  TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
1201
1510
  child_node = child_wrapper->node;
1511
+
1512
+ // Check if child belongs to a different document
1513
+ DOMDocument* child_doc = child_node->getOwnerDocument();
1514
+ if (child_doc && child_doc != doc) {
1515
+ rb_raise(rb_eRuntimeError,
1516
+ "Node belongs to a different document. Use importNode to adopt nodes from other documents.");
1517
+ }
1202
1518
  } else {
1203
1519
  rb_raise(rb_eTypeError, "Argument must be a String or Node");
1204
1520
  }
@@ -1209,12 +1525,24 @@ static VALUE node_add_child(VALUE self, VALUE child) {
1209
1525
  }
1210
1526
 
1211
1527
  try {
1528
+ // appendChild will automatically detach the node from its current parent if it has one
1212
1529
  wrapper->node->appendChild(child_node);
1213
1530
  } catch (const DOMException& e) {
1214
1531
  char* message = XMLString::transcode(e.getMessage());
1215
1532
  VALUE rb_error = rb_str_new_cstr(message);
1216
1533
  XMLString::release(&message);
1217
- rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
1534
+
1535
+ // Provide more context for common errors
1536
+ unsigned short code = e.code;
1537
+ if (code == DOMException::WRONG_DOCUMENT_ERR) {
1538
+ rb_raise(rb_eRuntimeError, "Node belongs to a different document: %s", StringValueCStr(rb_error));
1539
+ } else if (code == DOMException::HIERARCHY_REQUEST_ERR) {
1540
+ rb_raise(rb_eRuntimeError, "Invalid hierarchy: cannot add this node as a child: %s", StringValueCStr(rb_error));
1541
+ } else if (code == DOMException::NO_MODIFICATION_ALLOWED_ERR) {
1542
+ rb_raise(rb_eRuntimeError, "Node is read-only: %s", StringValueCStr(rb_error));
1543
+ } else {
1544
+ rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
1545
+ }
1218
1546
  }
1219
1547
 
1220
1548
  return child;
@@ -1256,7 +1584,8 @@ static VALUE node_inner_html(VALUE self) {
1256
1584
  }
1257
1585
 
1258
1586
  try {
1259
- DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XStr("LS").unicodeForm());
1587
+ XStr ls_name("LS");
1588
+ DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(ls_name.unicodeForm());
1260
1589
  DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
1261
1590
 
1262
1591
  // Build a string by serializing each child
@@ -1421,7 +1750,7 @@ static VALUE node_xpath(VALUE self, VALUE path) {
1421
1750
 
1422
1751
  Check_Type(path, T_STRING);
1423
1752
  const char* xpath_str = StringValueCStr(path);
1424
- VALUE doc_ref = rb_iv_get(self, "@document");
1753
+ VALUE doc_ref = node_wrapper->doc_ref;
1425
1754
 
1426
1755
  #ifdef HAVE_XALAN
1427
1756
  // Use Xalan for full XPath 1.0 support
@@ -1437,8 +1766,9 @@ static VALUE node_xpath(VALUE self, VALUE path) {
1437
1766
  }
1438
1767
 
1439
1768
  DOMXPathNSResolver* resolver = doc->createNSResolver(node_wrapper->node);
1769
+ XStr xpath_xstr(xpath_str);
1440
1770
  DOMXPathExpression* expression = doc->createExpression(
1441
- XStr(xpath_str).unicodeForm(), resolver);
1771
+ xpath_xstr.unicodeForm(), resolver);
1442
1772
 
1443
1773
  DOMXPathResult* result = expression->evaluate(
1444
1774
  node_wrapper->node,
@@ -1919,18 +2249,7 @@ static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
1919
2249
  VALUE schema_source;
1920
2250
  rb_scan_args(argc, argv, "1", &schema_source);
1921
2251
 
1922
- // Ensure Xerces is initialized
1923
- if (!xerces_initialized) {
1924
- try {
1925
- XMLPlatformUtils::Initialize();
1926
- xerces_initialized = true;
1927
- } catch (const XMLException& e) {
1928
- char* message = XMLString::transcode(e.getMessage());
1929
- VALUE rb_error = rb_str_new_cstr(message);
1930
- XMLString::release(&message);
1931
- rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
1932
- }
1933
- }
2252
+ ensure_xerces_initialized();
1934
2253
 
1935
2254
  try {
1936
2255
  SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
@@ -2103,16 +2422,24 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
2103
2422
  rb_undef_alloc_func(rb_cDocument);
2104
2423
  rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
2105
2424
  rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
2425
+ rb_define_method(rb_cDocument, "errors", RUBY_METHOD_FUNC(document_errors), 0);
2106
2426
  rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
2107
2427
  rb_define_alias(rb_cDocument, "to_xml", "to_s");
2108
2428
  rb_define_method(rb_cDocument, "inspect", RUBY_METHOD_FUNC(document_inspect), 0);
2109
2429
  rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
2430
+ rb_define_method(rb_cDocument, "at_xpath", RUBY_METHOD_FUNC(document_at_xpath), 1);
2431
+ rb_define_alias(rb_cDocument, "at", "at_xpath");
2110
2432
  rb_define_method(rb_cDocument, "css", RUBY_METHOD_FUNC(document_css), 1);
2111
2433
  rb_define_method(rb_cDocument, "at_css", RUBY_METHOD_FUNC(document_at_css), 1);
2112
2434
  rb_define_method(rb_cDocument, "encoding", RUBY_METHOD_FUNC(document_encoding), 0);
2113
2435
  rb_define_method(rb_cDocument, "text", RUBY_METHOD_FUNC(document_text), 0);
2114
2436
  rb_define_alias(rb_cDocument, "content", "text");
2115
2437
  rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
2438
+ rb_define_method(rb_cDocument, "children", RUBY_METHOD_FUNC(document_children), 0);
2439
+ rb_define_method(rb_cDocument, "element_children", RUBY_METHOD_FUNC(document_element_children), 0);
2440
+ rb_define_alias(rb_cDocument, "elements", "element_children");
2441
+ rb_define_method(rb_cDocument, "first_element_child", RUBY_METHOD_FUNC(document_first_element_child), 0);
2442
+ rb_define_method(rb_cDocument, "last_element_child", RUBY_METHOD_FUNC(document_last_element_child), 0);
2116
2443
 
2117
2444
  rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
2118
2445
  rb_undef_alloc_func(rb_cNode);
@@ -2131,6 +2458,9 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
2131
2458
  rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
2132
2459
  rb_define_method(rb_cNode, "element_children", RUBY_METHOD_FUNC(node_element_children), 0);
2133
2460
  rb_define_alias(rb_cNode, "elements", "element_children");
2461
+ rb_define_method(rb_cNode, "first_element_child", RUBY_METHOD_FUNC(node_first_element_child), 0);
2462
+ rb_define_method(rb_cNode, "last_element_child", RUBY_METHOD_FUNC(node_last_element_child), 0);
2463
+ rb_define_method(rb_cNode, "document", RUBY_METHOD_FUNC(node_document), 0);
2134
2464
  rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
2135
2465
  rb_define_method(rb_cNode, "ancestors", RUBY_METHOD_FUNC(node_ancestors), -1);
2136
2466
  rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
@@ -2182,4 +2512,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
2182
2512
  rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
2183
2513
 
2184
2514
  rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
2515
+
2516
+ // Register cleanup handler
2517
+ atexit(cleanup_xerces);
2185
2518
  }