rxerces 0.5.0 → 0.6.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -57,6 +57,41 @@ static VALUE node_css(VALUE self, VALUE selector);
57
57
  static VALUE node_xpath(VALUE self, VALUE path);
58
58
  static VALUE document_xpath(VALUE self, VALUE path);
59
59
 
60
+ // Initialize Xerces (and Xalan if available) exactly once
61
+ static void ensure_xerces_initialized() {
62
+ if (xerces_initialized) {
63
+ return;
64
+ }
65
+
66
+ try {
67
+ XMLPlatformUtils::Initialize();
68
+ #ifdef HAVE_XALAN
69
+ XPathEvaluator::initialize();
70
+ xalan_initialized = true;
71
+ #endif
72
+ xerces_initialized = true;
73
+ } catch (const XMLException& e) {
74
+ char* message = XMLString::transcode(e.getMessage());
75
+ std::string error_msg = std::string("Xerces initialization failed: ") + message;
76
+ XMLString::release(&message);
77
+ rb_raise(rb_eRuntimeError, "%s", error_msg.c_str());
78
+ }
79
+ }
80
+
81
+ // Cleanup function called at exit
82
+ static void cleanup_xerces() {
83
+ #ifdef HAVE_XALAN
84
+ if (xalan_initialized) {
85
+ XPathEvaluator::terminate();
86
+ xalan_initialized = false;
87
+ }
88
+ #endif
89
+ if (xerces_initialized) {
90
+ XMLPlatformUtils::Terminate();
91
+ xerces_initialized = false;
92
+ }
93
+ }
94
+
60
95
  // Helper class to manage XMLCh strings
61
96
  class XStr {
62
97
  public:
@@ -103,6 +138,7 @@ private:
103
138
  typedef struct {
104
139
  DOMDocument* doc;
105
140
  XercesDOMParser* parser;
141
+ std::vector<std::string>* parse_errors;
106
142
  } DocumentWrapper;
107
143
 
108
144
  // Wrapper structure for DOMNode
@@ -128,19 +164,34 @@ public:
128
164
 
129
165
  void warning(const SAXParseException& e) {
130
166
  char* msg = XMLString::transcode(e.getMessage());
131
- errors.push_back(std::string("Warning: ") + msg);
167
+ char buffer[512];
168
+ snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
169
+ (unsigned long)e.getLineNumber(),
170
+ (unsigned long)e.getColumnNumber(),
171
+ msg);
172
+ errors.push_back(buffer);
132
173
  XMLString::release(&msg);
133
174
  }
134
175
 
135
176
  void error(const SAXParseException& e) {
136
177
  char* msg = XMLString::transcode(e.getMessage());
137
- errors.push_back(std::string("Error: ") + msg);
178
+ char buffer[512];
179
+ snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
180
+ (unsigned long)e.getLineNumber(),
181
+ (unsigned long)e.getColumnNumber(),
182
+ msg);
183
+ errors.push_back(buffer);
138
184
  XMLString::release(&msg);
139
185
  }
140
186
 
141
187
  void fatalError(const SAXParseException& e) {
142
188
  char* msg = XMLString::transcode(e.getMessage());
143
- errors.push_back(std::string("Fatal: ") + msg);
189
+ char buffer[512];
190
+ snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
191
+ (unsigned long)e.getLineNumber(),
192
+ (unsigned long)e.getColumnNumber(),
193
+ msg);
194
+ errors.push_back(buffer);
144
195
  XMLString::release(&msg);
145
196
  }
146
197
 
@@ -149,6 +200,55 @@ public:
149
200
  }
150
201
  };
151
202
 
203
+ // Error handler for parsing - stores errors but doesn't throw
204
+ class ParseErrorHandler : public ErrorHandler {
205
+ public:
206
+ std::vector<std::string>* errors;
207
+ bool has_fatal;
208
+
209
+ ParseErrorHandler(std::vector<std::string>* error_vec)
210
+ : errors(error_vec), has_fatal(false) {}
211
+
212
+ void warning(const SAXParseException& e) {
213
+ char* msg = XMLString::transcode(e.getMessage());
214
+ char buffer[512];
215
+ snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
216
+ (unsigned long)e.getLineNumber(),
217
+ (unsigned long)e.getColumnNumber(),
218
+ msg);
219
+ errors->push_back(buffer);
220
+ XMLString::release(&msg);
221
+ }
222
+
223
+ void error(const SAXParseException& e) {
224
+ char* msg = XMLString::transcode(e.getMessage());
225
+ char buffer[512];
226
+ snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
227
+ (unsigned long)e.getLineNumber(),
228
+ (unsigned long)e.getColumnNumber(),
229
+ msg);
230
+ errors->push_back(buffer);
231
+ XMLString::release(&msg);
232
+ }
233
+
234
+ void fatalError(const SAXParseException& e) {
235
+ has_fatal = true;
236
+ char* msg = XMLString::transcode(e.getMessage());
237
+ char buffer[512];
238
+ snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
239
+ (unsigned long)e.getLineNumber(),
240
+ (unsigned long)e.getColumnNumber(),
241
+ msg);
242
+ errors->push_back(buffer);
243
+ XMLString::release(&msg);
244
+ }
245
+
246
+ void resetErrors() {
247
+ errors->clear();
248
+ has_fatal = false;
249
+ }
250
+ };
251
+
152
252
  // Memory management functions
153
253
  static void document_free(void* ptr) {
154
254
  DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
@@ -156,6 +256,9 @@ static void document_free(void* ptr) {
156
256
  if (wrapper->parser) {
157
257
  delete wrapper->parser;
158
258
  }
259
+ if (wrapper->parse_errors) {
260
+ delete wrapper->parse_errors;
261
+ }
159
262
  // Document is owned by parser, so don't delete it separately
160
263
  xfree(wrapper);
161
264
  }
@@ -169,6 +272,13 @@ static void node_free(void* ptr) {
169
272
  }
170
273
  }
171
274
 
275
+ static void node_mark(void* ptr) {
276
+ NodeWrapper* wrapper = (NodeWrapper*)ptr;
277
+ if (wrapper) {
278
+ rb_gc_mark(wrapper->doc_ref);
279
+ }
280
+ }
281
+
172
282
  static void nodeset_free(void* ptr) {
173
283
  NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
174
284
  if (wrapper) {
@@ -176,6 +286,13 @@ static void nodeset_free(void* ptr) {
176
286
  }
177
287
  }
178
288
 
289
+ static void nodeset_mark(void* ptr) {
290
+ NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
291
+ if (wrapper) {
292
+ rb_gc_mark(wrapper->nodes_array);
293
+ }
294
+ }
295
+
179
296
  static void schema_free(void* ptr) {
180
297
  SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
181
298
  if (wrapper) {
@@ -211,14 +328,14 @@ static const rb_data_type_t document_type = {
211
328
 
212
329
  static const rb_data_type_t node_type = {
213
330
  "RXerces::XML::Node",
214
- {0, node_free, node_size},
331
+ {node_mark, node_free, node_size},
215
332
  0, 0,
216
333
  RUBY_TYPED_FREE_IMMEDIATELY
217
334
  };
218
335
 
219
336
  static const rb_data_type_t nodeset_type = {
220
337
  "RXerces::XML::NodeSet",
221
- {0, nodeset_free, nodeset_size},
338
+ {nodeset_mark, nodeset_free, nodeset_size},
222
339
  0, 0,
223
340
  RUBY_TYPED_FREE_IMMEDIATELY
224
341
  };
@@ -254,22 +371,12 @@ static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
254
371
  break;
255
372
  }
256
373
 
257
- // Keep reference to document to prevent GC
258
- rb_iv_set(rb_node, "@document", doc_ref);
259
-
260
374
  return rb_node;
261
375
  }
262
376
 
263
377
  // RXerces::XML::Document.parse(string)
264
378
  static VALUE document_parse(VALUE klass, VALUE str) {
265
- if (!xerces_initialized) {
266
- try {
267
- XMLPlatformUtils::Initialize();
268
- xerces_initialized = true;
269
- } catch (const XMLException& e) {
270
- rb_raise(rb_eRuntimeError, "Xerces initialization failed");
271
- }
272
- }
379
+ ensure_xerces_initialized();
273
380
 
274
381
  Check_Type(str, T_STRING);
275
382
  const char* xml_str = StringValueCStr(str);
@@ -279,6 +386,11 @@ static VALUE document_parse(VALUE klass, VALUE str) {
279
386
  parser->setDoNamespaces(true);
280
387
  parser->setDoSchema(false);
281
388
 
389
+ // Set up error handler to capture parse errors
390
+ std::vector<std::string>* parse_errors = new std::vector<std::string>();
391
+ ParseErrorHandler error_handler(parse_errors);
392
+ parser->setErrorHandler(&error_handler);
393
+
282
394
  try {
283
395
  MemBufInputSource input((const XMLByte*)xml_str, strlen(xml_str), "memory");
284
396
  parser->parse(input);
@@ -288,18 +400,33 @@ static VALUE document_parse(VALUE klass, VALUE str) {
288
400
  DocumentWrapper* wrapper = ALLOC(DocumentWrapper);
289
401
  wrapper->doc = doc;
290
402
  wrapper->parser = parser;
403
+ wrapper->parse_errors = parse_errors;
291
404
 
292
405
  VALUE rb_doc = TypedData_Wrap_Struct(rb_cDocument, &document_type, wrapper);
406
+
407
+ // If there were fatal errors, raise an exception with details
408
+ if (error_handler.has_fatal && !parse_errors->empty()) {
409
+ std::string all_errors;
410
+ for (const auto& err : *parse_errors) {
411
+ if (!all_errors.empty()) all_errors += "\n";
412
+ all_errors += err;
413
+ }
414
+ rb_raise(rb_eRuntimeError, "XML parsing failed:\n%s", all_errors.c_str());
415
+ }
416
+
293
417
  return rb_doc;
294
418
  } catch (const XMLException& e) {
295
419
  CharStr message(e.getMessage());
420
+ delete parse_errors;
296
421
  delete parser;
297
422
  rb_raise(rb_eRuntimeError, "XML parsing error: %s", message.localForm());
298
423
  } catch (const DOMException& e) {
299
424
  CharStr message(e.getMessage());
425
+ delete parse_errors;
300
426
  delete parser;
301
427
  rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
302
428
  } catch (...) {
429
+ delete parse_errors;
303
430
  delete parser;
304
431
  rb_raise(rb_eRuntimeError, "Unknown XML parsing error");
305
432
  }
@@ -307,6 +434,22 @@ static VALUE document_parse(VALUE klass, VALUE str) {
307
434
  return Qnil;
308
435
  }
309
436
 
437
+ // document.errors - returns array of parse errors (warnings and errors)
438
+ static VALUE document_errors(VALUE self) {
439
+ DocumentWrapper* wrapper;
440
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
441
+
442
+ VALUE errors_array = rb_ary_new();
443
+
444
+ if (wrapper->parse_errors) {
445
+ for (const auto& error : *wrapper->parse_errors) {
446
+ rb_ary_push(errors_array, rb_str_new2(error.c_str()));
447
+ }
448
+ }
449
+
450
+ return errors_array;
451
+ }
452
+
310
453
  // document.root
311
454
  static VALUE document_root(VALUE self) {
312
455
  DocumentWrapper* wrapper;
@@ -468,14 +611,9 @@ static VALUE document_create_element(VALUE self, VALUE name) {
468
611
  #ifdef HAVE_XALAN
469
612
  // Helper function to execute XPath using Xalan for full XPath 1.0 support
470
613
  static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
471
- try {
472
- // Initialize Xalan if needed
473
- if (!xalan_initialized) {
474
- XPathEvaluator::initialize();
475
- XMLPlatformUtils::Initialize();
476
- xalan_initialized = true;
477
- }
614
+ ensure_xerces_initialized();
478
615
 
616
+ try {
479
617
  // Get the document
480
618
  DOMDocument* domDoc = context_node->getOwnerDocument();
481
619
  if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
@@ -600,8 +738,9 @@ static VALUE document_xpath(VALUE self, VALUE path) {
600
738
  }
601
739
 
602
740
  DOMXPathNSResolver* resolver = doc_wrapper->doc->createNSResolver(root);
741
+ XStr xpath_xstr(xpath_str);
603
742
  DOMXPathExpression* expression = doc_wrapper->doc->createExpression(
604
- XStr(xpath_str).unicodeForm(), resolver);
743
+ xpath_xstr.unicodeForm(), resolver);
605
744
 
606
745
  DOMXPathResult* result = expression->evaluate(
607
746
  doc_wrapper->doc->getDocumentElement(),
@@ -840,7 +979,8 @@ static VALUE node_text_set(VALUE self, VALUE text) {
840
979
  Check_Type(text, T_STRING);
841
980
  const char* text_str = StringValueCStr(text);
842
981
 
843
- wrapper->node->setTextContent(XStr(text_str).unicodeForm());
982
+ XStr text_xstr(text_str);
983
+ wrapper->node->setTextContent(text_xstr.unicodeForm());
844
984
 
845
985
  return text;
846
986
  }
@@ -858,7 +998,8 @@ static VALUE node_get_attribute(VALUE self, VALUE attr_name) {
858
998
  const char* attr_str = StringValueCStr(attr_name);
859
999
 
860
1000
  DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
861
- const XMLCh* value = element->getAttribute(XStr(attr_str).unicodeForm());
1001
+ XStr attr_xstr(attr_str);
1002
+ const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
862
1003
 
863
1004
  if (!value || XMLString::stringLen(value) == 0) {
864
1005
  return Qnil;
@@ -884,7 +1025,9 @@ static VALUE node_set_attribute(VALUE self, VALUE attr_name, VALUE attr_value) {
884
1025
  const char* value_str = StringValueCStr(attr_value);
885
1026
 
886
1027
  DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
887
- element->setAttribute(XStr(attr_str).unicodeForm(), XStr(value_str).unicodeForm());
1028
+ XStr attr_xstr(attr_str);
1029
+ XStr value_xstr(value_str);
1030
+ element->setAttribute(attr_xstr.unicodeForm(), value_xstr.unicodeForm());
888
1031
 
889
1032
  return attr_value;
890
1033
  }
@@ -902,7 +1045,8 @@ static VALUE node_has_attribute_p(VALUE self, VALUE attr_name) {
902
1045
  const char* attr_str = StringValueCStr(attr_name);
903
1046
 
904
1047
  DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
905
- const XMLCh* value = element->getAttribute(XStr(attr_str).unicodeForm());
1048
+ XStr attr_xstr(attr_str);
1049
+ const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
906
1050
 
907
1051
  if (!value || XMLString::stringLen(value) == 0) {
908
1052
  return Qfalse;
@@ -916,13 +1060,14 @@ static VALUE node_children(VALUE self) {
916
1060
  NodeWrapper* wrapper;
917
1061
  TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
918
1062
 
919
- VALUE doc_ref = rb_iv_get(self, "@document");
920
1063
  VALUE children = rb_ary_new();
921
1064
 
922
1065
  if (!wrapper->node) {
923
1066
  return children;
924
1067
  }
925
1068
 
1069
+ VALUE doc_ref = wrapper->doc_ref;
1070
+
926
1071
  DOMNodeList* child_nodes = wrapper->node->getChildNodes();
927
1072
  XMLSize_t count = child_nodes->getLength();
928
1073
 
@@ -939,13 +1084,13 @@ static VALUE node_element_children(VALUE self) {
939
1084
  NodeWrapper* wrapper;
940
1085
  TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
941
1086
 
942
- VALUE doc_ref = rb_iv_get(self, "@document");
943
1087
  VALUE children = rb_ary_new();
944
1088
 
945
1089
  if (!wrapper->node) {
946
1090
  return children;
947
1091
  }
948
1092
 
1093
+ VALUE doc_ref = wrapper->doc_ref;
949
1094
  DOMNodeList* child_nodes = wrapper->node->getChildNodes();
950
1095
  XMLSize_t count = child_nodes->getLength();
951
1096
 
@@ -973,7 +1118,7 @@ static VALUE node_parent(VALUE self) {
973
1118
  return Qnil;
974
1119
  }
975
1120
 
976
- VALUE doc_ref = rb_iv_get(self, "@document");
1121
+ VALUE doc_ref = wrapper->doc_ref;
977
1122
  return wrap_node(parent, doc_ref);
978
1123
  }
979
1124
 
@@ -991,7 +1136,7 @@ static VALUE node_ancestors(int argc, VALUE* argv, VALUE self) {
991
1136
  return ancestors;
992
1137
  }
993
1138
 
994
- VALUE doc_ref = rb_iv_get(self, "@document");
1139
+ VALUE doc_ref = wrapper->doc_ref;
995
1140
  DOMNode* current = wrapper->node->getParentNode();
996
1141
 
997
1142
  // Walk up the tree, collecting all ancestors
@@ -1100,7 +1245,7 @@ static VALUE node_next_sibling(VALUE self) {
1100
1245
  return Qnil;
1101
1246
  }
1102
1247
 
1103
- VALUE doc_ref = rb_iv_get(self, "@document");
1248
+ VALUE doc_ref = wrapper->doc_ref;
1104
1249
  return wrap_node(next, doc_ref);
1105
1250
  }
1106
1251
 
@@ -1118,7 +1263,7 @@ static VALUE node_previous_sibling(VALUE self) {
1118
1263
  return Qnil;
1119
1264
  }
1120
1265
 
1121
- VALUE doc_ref = rb_iv_get(self, "@document");
1266
+ VALUE doc_ref = wrapper->doc_ref;
1122
1267
  return wrap_node(prev, doc_ref);
1123
1268
  }
1124
1269
 
@@ -1131,7 +1276,7 @@ static VALUE node_next_element(VALUE self) {
1131
1276
  return Qnil;
1132
1277
  }
1133
1278
 
1134
- VALUE doc_ref = rb_iv_get(self, "@document");
1279
+ VALUE doc_ref = wrapper->doc_ref;
1135
1280
  DOMNode* next = wrapper->node->getNextSibling();
1136
1281
 
1137
1282
  // Skip non-element nodes
@@ -1155,7 +1300,7 @@ static VALUE node_previous_element(VALUE self) {
1155
1300
  return Qnil;
1156
1301
  }
1157
1302
 
1158
- VALUE doc_ref = rb_iv_get(self, "@document");
1303
+ VALUE doc_ref = wrapper->doc_ref;
1159
1304
  DOMNode* prev = wrapper->node->getPreviousSibling();
1160
1305
 
1161
1306
  // Skip non-element nodes
@@ -1185,6 +1330,7 @@ static VALUE node_add_child(VALUE self, VALUE child) {
1185
1330
  }
1186
1331
 
1187
1332
  DOMNode* child_node = NULL;
1333
+ bool needs_import = false;
1188
1334
 
1189
1335
  // Check if child is a string or a node
1190
1336
  if (TYPE(child) == T_STRING) {
@@ -1199,6 +1345,13 @@ static VALUE node_add_child(VALUE self, VALUE child) {
1199
1345
  if (rb_obj_is_kind_of(child, rb_cNode)) {
1200
1346
  TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
1201
1347
  child_node = child_wrapper->node;
1348
+
1349
+ // Check if child belongs to a different document
1350
+ DOMDocument* child_doc = child_node->getOwnerDocument();
1351
+ if (child_doc && child_doc != doc) {
1352
+ rb_raise(rb_eRuntimeError,
1353
+ "Node belongs to a different document. Use importNode to adopt nodes from other documents.");
1354
+ }
1202
1355
  } else {
1203
1356
  rb_raise(rb_eTypeError, "Argument must be a String or Node");
1204
1357
  }
@@ -1209,12 +1362,24 @@ static VALUE node_add_child(VALUE self, VALUE child) {
1209
1362
  }
1210
1363
 
1211
1364
  try {
1365
+ // appendChild will automatically detach the node from its current parent if it has one
1212
1366
  wrapper->node->appendChild(child_node);
1213
1367
  } catch (const DOMException& e) {
1214
1368
  char* message = XMLString::transcode(e.getMessage());
1215
1369
  VALUE rb_error = rb_str_new_cstr(message);
1216
1370
  XMLString::release(&message);
1217
- rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
1371
+
1372
+ // Provide more context for common errors
1373
+ unsigned short code = e.code;
1374
+ if (code == DOMException::WRONG_DOCUMENT_ERR) {
1375
+ rb_raise(rb_eRuntimeError, "Node belongs to a different document: %s", StringValueCStr(rb_error));
1376
+ } else if (code == DOMException::HIERARCHY_REQUEST_ERR) {
1377
+ rb_raise(rb_eRuntimeError, "Invalid hierarchy: cannot add this node as a child: %s", StringValueCStr(rb_error));
1378
+ } else if (code == DOMException::NO_MODIFICATION_ALLOWED_ERR) {
1379
+ rb_raise(rb_eRuntimeError, "Node is read-only: %s", StringValueCStr(rb_error));
1380
+ } else {
1381
+ rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
1382
+ }
1218
1383
  }
1219
1384
 
1220
1385
  return child;
@@ -1256,7 +1421,8 @@ static VALUE node_inner_html(VALUE self) {
1256
1421
  }
1257
1422
 
1258
1423
  try {
1259
- DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(XStr("LS").unicodeForm());
1424
+ XStr ls_name("LS");
1425
+ DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(ls_name.unicodeForm());
1260
1426
  DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
1261
1427
 
1262
1428
  // Build a string by serializing each child
@@ -1421,7 +1587,7 @@ static VALUE node_xpath(VALUE self, VALUE path) {
1421
1587
 
1422
1588
  Check_Type(path, T_STRING);
1423
1589
  const char* xpath_str = StringValueCStr(path);
1424
- VALUE doc_ref = rb_iv_get(self, "@document");
1590
+ VALUE doc_ref = node_wrapper->doc_ref;
1425
1591
 
1426
1592
  #ifdef HAVE_XALAN
1427
1593
  // Use Xalan for full XPath 1.0 support
@@ -1437,8 +1603,9 @@ static VALUE node_xpath(VALUE self, VALUE path) {
1437
1603
  }
1438
1604
 
1439
1605
  DOMXPathNSResolver* resolver = doc->createNSResolver(node_wrapper->node);
1606
+ XStr xpath_xstr(xpath_str);
1440
1607
  DOMXPathExpression* expression = doc->createExpression(
1441
- XStr(xpath_str).unicodeForm(), resolver);
1608
+ xpath_xstr.unicodeForm(), resolver);
1442
1609
 
1443
1610
  DOMXPathResult* result = expression->evaluate(
1444
1611
  node_wrapper->node,
@@ -1919,18 +2086,7 @@ static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
1919
2086
  VALUE schema_source;
1920
2087
  rb_scan_args(argc, argv, "1", &schema_source);
1921
2088
 
1922
- // Ensure Xerces is initialized
1923
- if (!xerces_initialized) {
1924
- try {
1925
- XMLPlatformUtils::Initialize();
1926
- xerces_initialized = true;
1927
- } catch (const XMLException& e) {
1928
- char* message = XMLString::transcode(e.getMessage());
1929
- VALUE rb_error = rb_str_new_cstr(message);
1930
- XMLString::release(&message);
1931
- rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
1932
- }
1933
- }
2089
+ ensure_xerces_initialized();
1934
2090
 
1935
2091
  try {
1936
2092
  SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
@@ -2103,6 +2259,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
2103
2259
  rb_undef_alloc_func(rb_cDocument);
2104
2260
  rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
2105
2261
  rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
2262
+ rb_define_method(rb_cDocument, "errors", RUBY_METHOD_FUNC(document_errors), 0);
2106
2263
  rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
2107
2264
  rb_define_alias(rb_cDocument, "to_xml", "to_s");
2108
2265
  rb_define_method(rb_cDocument, "inspect", RUBY_METHOD_FUNC(document_inspect), 0);
@@ -2182,4 +2339,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
2182
2339
  rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
2183
2340
 
2184
2341
  rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
2342
+
2343
+ // Register cleanup handler
2344
+ atexit(cleanup_xerces);
2185
2345
  }
@@ -1,3 +1,3 @@
1
1
  module RXerces
2
- VERSION = "0.5.0".freeze
2
+ VERSION = "0.6.0".freeze
3
3
  end
data/rxerces.gemspec CHANGED
@@ -1,6 +1,6 @@
1
1
  Gem::Specification.new do |spec|
2
2
  spec.name = "rxerces"
3
- spec.version = "0.5.0"
3
+ spec.version = "0.6.0"
4
4
  spec.author = "Daniel J. Berger"
5
5
  spec.email = "djberg96@gmail.com"
6
6
  spec.cert_chain = ["certs/djberg96_pub.pem"]
@@ -176,4 +176,62 @@ RSpec.describe RXerces::XML::Document do
176
176
  expect(result.first.text).to eq('New content')
177
177
  end
178
178
  end
179
+
180
+ describe "#errors" do
181
+ it "returns empty array for valid XML" do
182
+ doc = RXerces::XML::Document.parse(simple_xml)
183
+ expect(doc.errors).to eq([])
184
+ end
185
+
186
+ it "returns empty array for complex valid XML" do
187
+ doc = RXerces::XML::Document.parse(complex_xml)
188
+ expect(doc.errors).to eq([])
189
+ end
190
+
191
+ context "with malformed XML" do
192
+ it "raises error and provides line/column information for unclosed tags" do
193
+ expect {
194
+ RXerces::XML::Document.parse('<root><item>test</root>')
195
+ }.to raise_error(RuntimeError, /Fatal error at line \d+, column \d+/)
196
+ end
197
+
198
+ it "raises error with detailed message for multiple errors" do
199
+ expect {
200
+ RXerces::XML::Document.parse('<root><item>test</item><unclosed>')
201
+ }.to raise_error(RuntimeError, /Fatal error at line/)
202
+ end
203
+
204
+ it "raises error for completely invalid XML" do
205
+ expect {
206
+ RXerces::XML::Document.parse('not xml at all')
207
+ }.to raise_error(RuntimeError, /Fatal error at line/)
208
+ end
209
+
210
+ it "raises error for mismatched tags" do
211
+ expect {
212
+ RXerces::XML::Document.parse('<root><item>test</other></root>')
213
+ }.to raise_error(RuntimeError, /Fatal error at line/)
214
+ end
215
+ end
216
+
217
+ context "error message format" do
218
+ it "includes line number in error message" do
219
+ expect {
220
+ RXerces::XML::Document.parse('<root><bad>')
221
+ }.to raise_error(RuntimeError, /line \d+/)
222
+ end
223
+
224
+ it "includes column number in error message" do
225
+ expect {
226
+ RXerces::XML::Document.parse('<root><bad>')
227
+ }.to raise_error(RuntimeError, /column \d+/)
228
+ end
229
+
230
+ it "describes the error type" do
231
+ expect {
232
+ RXerces::XML::Document.parse('<root><item>test</root>')
233
+ }.to raise_error(RuntimeError, /expected end of tag/)
234
+ end
235
+ end
236
+ end
179
237
  end
data/spec/node_spec.rb CHANGED
@@ -552,6 +552,63 @@ RSpec.describe RXerces::XML::Node do
552
552
  xml_output = simple_doc.to_s
553
553
  expect(xml_output).to include("Content")
554
554
  end
555
+
556
+ context "with nodes from different documents" do
557
+ it "raises error when adding node from different document" do
558
+ doc1 = RXerces::XML::Document.parse('<root><item>one</item></root>')
559
+ doc2 = RXerces::XML::Document.parse('<other><item>two</item></other>')
560
+
561
+ root1 = doc1.root
562
+ item2 = doc2.root.children.find { |n| n.is_a?(RXerces::XML::Element) }
563
+
564
+ expect {
565
+ root1.add_child(item2)
566
+ }.to raise_error(RuntimeError, /belongs to a different document/)
567
+ end
568
+
569
+ it "provides helpful error message mentioning importNode" do
570
+ doc1 = RXerces::XML::Document.parse('<root></root>')
571
+ doc2 = RXerces::XML::Document.parse('<other><child/></other>')
572
+
573
+ expect {
574
+ doc1.root.add_child(doc2.root.children.first)
575
+ }.to raise_error(RuntimeError, /importNode/)
576
+ end
577
+ end
578
+
579
+ context "when child already has a parent" do
580
+ it "moves node from one parent to another (detaches automatically)" do
581
+ doc = RXerces::XML::Document.parse('<root><parent1><child>text</child></parent1><parent2/></root>')
582
+ parent1 = doc.xpath('//parent1').first
583
+ parent2 = doc.xpath('//parent2').first
584
+ child = doc.xpath('//child').first
585
+
586
+ # Verify initial state
587
+ expect(parent1.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(1)
588
+ expect(parent2.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(0)
589
+
590
+ # Move child from parent1 to parent2
591
+ parent2.add_child(child)
592
+
593
+ # Child should now be under parent2, not parent1
594
+ expect(parent1.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(0)
595
+ expect(parent2.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(1)
596
+ expect(doc.xpath('//parent2/child').length).to eq(1)
597
+ expect(doc.xpath('//parent1/child').length).to eq(0)
598
+ end
599
+
600
+ it "preserves node content when moving" do
601
+ doc = RXerces::XML::Document.parse('<root><a><item>content</item></a><b/></root>')
602
+ a = doc.xpath('//a').first
603
+ b = doc.xpath('//b').first
604
+ item = doc.xpath('//item').first
605
+
606
+ b.add_child(item)
607
+
608
+ expect(item.text).to eq('content')
609
+ expect(doc.xpath('//b/item').first.text).to eq('content')
610
+ end
611
+ end
555
612
  end
556
613
 
557
614
  describe "#remove" do