rxerces 0.5.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +7 -0
- data/benchmarks/README.md +68 -0
- data/benchmarks/css_benchmark.rb +115 -0
- data/benchmarks/parse_benchmark.rb +103 -0
- data/benchmarks/run_all.rb +25 -0
- data/benchmarks/serialization_benchmark.rb +93 -0
- data/benchmarks/traversal_benchmark.rb +149 -0
- data/benchmarks/xpath_benchmark.rb +100 -0
- data/ext/rxerces/rxerces.cpp +212 -52
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +1 -1
- data/spec/document_spec.rb +58 -0
- data/spec/node_spec.rb +57 -0
- data/spec/rxerces_shared.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +8 -1
- metadata.gz.sig +0 -0
data/ext/rxerces/rxerces.cpp
CHANGED
|
@@ -57,6 +57,41 @@ static VALUE node_css(VALUE self, VALUE selector);
|
|
|
57
57
|
static VALUE node_xpath(VALUE self, VALUE path);
|
|
58
58
|
static VALUE document_xpath(VALUE self, VALUE path);
|
|
59
59
|
|
|
60
|
+
// Initialize Xerces (and Xalan if available) exactly once
|
|
61
|
+
static void ensure_xerces_initialized() {
|
|
62
|
+
if (xerces_initialized) {
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
XMLPlatformUtils::Initialize();
|
|
68
|
+
#ifdef HAVE_XALAN
|
|
69
|
+
XPathEvaluator::initialize();
|
|
70
|
+
xalan_initialized = true;
|
|
71
|
+
#endif
|
|
72
|
+
xerces_initialized = true;
|
|
73
|
+
} catch (const XMLException& e) {
|
|
74
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
75
|
+
std::string error_msg = std::string("Xerces initialization failed: ") + message;
|
|
76
|
+
XMLString::release(&message);
|
|
77
|
+
rb_raise(rb_eRuntimeError, "%s", error_msg.c_str());
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Cleanup function called at exit
|
|
82
|
+
static void cleanup_xerces() {
|
|
83
|
+
#ifdef HAVE_XALAN
|
|
84
|
+
if (xalan_initialized) {
|
|
85
|
+
XPathEvaluator::terminate();
|
|
86
|
+
xalan_initialized = false;
|
|
87
|
+
}
|
|
88
|
+
#endif
|
|
89
|
+
if (xerces_initialized) {
|
|
90
|
+
XMLPlatformUtils::Terminate();
|
|
91
|
+
xerces_initialized = false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
60
95
|
// Helper class to manage XMLCh strings
|
|
61
96
|
class XStr {
|
|
62
97
|
public:
|
|
@@ -103,6 +138,7 @@ private:
|
|
|
103
138
|
typedef struct {
|
|
104
139
|
DOMDocument* doc;
|
|
105
140
|
XercesDOMParser* parser;
|
|
141
|
+
std::vector<std::string>* parse_errors;
|
|
106
142
|
} DocumentWrapper;
|
|
107
143
|
|
|
108
144
|
// Wrapper structure for DOMNode
|
|
@@ -128,19 +164,34 @@ public:
|
|
|
128
164
|
|
|
129
165
|
void warning(const SAXParseException& e) {
|
|
130
166
|
char* msg = XMLString::transcode(e.getMessage());
|
|
131
|
-
|
|
167
|
+
char buffer[512];
|
|
168
|
+
snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
|
|
169
|
+
(unsigned long)e.getLineNumber(),
|
|
170
|
+
(unsigned long)e.getColumnNumber(),
|
|
171
|
+
msg);
|
|
172
|
+
errors.push_back(buffer);
|
|
132
173
|
XMLString::release(&msg);
|
|
133
174
|
}
|
|
134
175
|
|
|
135
176
|
void error(const SAXParseException& e) {
|
|
136
177
|
char* msg = XMLString::transcode(e.getMessage());
|
|
137
|
-
|
|
178
|
+
char buffer[512];
|
|
179
|
+
snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
|
|
180
|
+
(unsigned long)e.getLineNumber(),
|
|
181
|
+
(unsigned long)e.getColumnNumber(),
|
|
182
|
+
msg);
|
|
183
|
+
errors.push_back(buffer);
|
|
138
184
|
XMLString::release(&msg);
|
|
139
185
|
}
|
|
140
186
|
|
|
141
187
|
void fatalError(const SAXParseException& e) {
|
|
142
188
|
char* msg = XMLString::transcode(e.getMessage());
|
|
143
|
-
|
|
189
|
+
char buffer[512];
|
|
190
|
+
snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
|
|
191
|
+
(unsigned long)e.getLineNumber(),
|
|
192
|
+
(unsigned long)e.getColumnNumber(),
|
|
193
|
+
msg);
|
|
194
|
+
errors.push_back(buffer);
|
|
144
195
|
XMLString::release(&msg);
|
|
145
196
|
}
|
|
146
197
|
|
|
@@ -149,6 +200,55 @@ public:
|
|
|
149
200
|
}
|
|
150
201
|
};
|
|
151
202
|
|
|
203
|
+
// Error handler for parsing - stores errors but doesn't throw
|
|
204
|
+
class ParseErrorHandler : public ErrorHandler {
|
|
205
|
+
public:
|
|
206
|
+
std::vector<std::string>* errors;
|
|
207
|
+
bool has_fatal;
|
|
208
|
+
|
|
209
|
+
ParseErrorHandler(std::vector<std::string>* error_vec)
|
|
210
|
+
: errors(error_vec), has_fatal(false) {}
|
|
211
|
+
|
|
212
|
+
void warning(const SAXParseException& e) {
|
|
213
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
214
|
+
char buffer[512];
|
|
215
|
+
snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
|
|
216
|
+
(unsigned long)e.getLineNumber(),
|
|
217
|
+
(unsigned long)e.getColumnNumber(),
|
|
218
|
+
msg);
|
|
219
|
+
errors->push_back(buffer);
|
|
220
|
+
XMLString::release(&msg);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
void error(const SAXParseException& e) {
|
|
224
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
225
|
+
char buffer[512];
|
|
226
|
+
snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
|
|
227
|
+
(unsigned long)e.getLineNumber(),
|
|
228
|
+
(unsigned long)e.getColumnNumber(),
|
|
229
|
+
msg);
|
|
230
|
+
errors->push_back(buffer);
|
|
231
|
+
XMLString::release(&msg);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
void fatalError(const SAXParseException& e) {
|
|
235
|
+
has_fatal = true;
|
|
236
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
237
|
+
char buffer[512];
|
|
238
|
+
snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
|
|
239
|
+
(unsigned long)e.getLineNumber(),
|
|
240
|
+
(unsigned long)e.getColumnNumber(),
|
|
241
|
+
msg);
|
|
242
|
+
errors->push_back(buffer);
|
|
243
|
+
XMLString::release(&msg);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
void resetErrors() {
|
|
247
|
+
errors->clear();
|
|
248
|
+
has_fatal = false;
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
|
|
152
252
|
// Memory management functions
|
|
153
253
|
static void document_free(void* ptr) {
|
|
154
254
|
DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
|
|
@@ -156,6 +256,9 @@ static void document_free(void* ptr) {
|
|
|
156
256
|
if (wrapper->parser) {
|
|
157
257
|
delete wrapper->parser;
|
|
158
258
|
}
|
|
259
|
+
if (wrapper->parse_errors) {
|
|
260
|
+
delete wrapper->parse_errors;
|
|
261
|
+
}
|
|
159
262
|
// Document is owned by parser, so don't delete it separately
|
|
160
263
|
xfree(wrapper);
|
|
161
264
|
}
|
|
@@ -169,6 +272,13 @@ static void node_free(void* ptr) {
|
|
|
169
272
|
}
|
|
170
273
|
}
|
|
171
274
|
|
|
275
|
+
static void node_mark(void* ptr) {
|
|
276
|
+
NodeWrapper* wrapper = (NodeWrapper*)ptr;
|
|
277
|
+
if (wrapper) {
|
|
278
|
+
rb_gc_mark(wrapper->doc_ref);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
172
282
|
static void nodeset_free(void* ptr) {
|
|
173
283
|
NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
|
|
174
284
|
if (wrapper) {
|
|
@@ -176,6 +286,13 @@ static void nodeset_free(void* ptr) {
|
|
|
176
286
|
}
|
|
177
287
|
}
|
|
178
288
|
|
|
289
|
+
static void nodeset_mark(void* ptr) {
|
|
290
|
+
NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
|
|
291
|
+
if (wrapper) {
|
|
292
|
+
rb_gc_mark(wrapper->nodes_array);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
179
296
|
static void schema_free(void* ptr) {
|
|
180
297
|
SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
|
|
181
298
|
if (wrapper) {
|
|
@@ -211,14 +328,14 @@ static const rb_data_type_t document_type = {
|
|
|
211
328
|
|
|
212
329
|
static const rb_data_type_t node_type = {
|
|
213
330
|
"RXerces::XML::Node",
|
|
214
|
-
{
|
|
331
|
+
{node_mark, node_free, node_size},
|
|
215
332
|
0, 0,
|
|
216
333
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
217
334
|
};
|
|
218
335
|
|
|
219
336
|
static const rb_data_type_t nodeset_type = {
|
|
220
337
|
"RXerces::XML::NodeSet",
|
|
221
|
-
{
|
|
338
|
+
{nodeset_mark, nodeset_free, nodeset_size},
|
|
222
339
|
0, 0,
|
|
223
340
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
224
341
|
};
|
|
@@ -254,22 +371,12 @@ static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
|
|
|
254
371
|
break;
|
|
255
372
|
}
|
|
256
373
|
|
|
257
|
-
// Keep reference to document to prevent GC
|
|
258
|
-
rb_iv_set(rb_node, "@document", doc_ref);
|
|
259
|
-
|
|
260
374
|
return rb_node;
|
|
261
375
|
}
|
|
262
376
|
|
|
263
377
|
// RXerces::XML::Document.parse(string)
|
|
264
378
|
static VALUE document_parse(VALUE klass, VALUE str) {
|
|
265
|
-
|
|
266
|
-
try {
|
|
267
|
-
XMLPlatformUtils::Initialize();
|
|
268
|
-
xerces_initialized = true;
|
|
269
|
-
} catch (const XMLException& e) {
|
|
270
|
-
rb_raise(rb_eRuntimeError, "Xerces initialization failed");
|
|
271
|
-
}
|
|
272
|
-
}
|
|
379
|
+
ensure_xerces_initialized();
|
|
273
380
|
|
|
274
381
|
Check_Type(str, T_STRING);
|
|
275
382
|
const char* xml_str = StringValueCStr(str);
|
|
@@ -279,6 +386,11 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
279
386
|
parser->setDoNamespaces(true);
|
|
280
387
|
parser->setDoSchema(false);
|
|
281
388
|
|
|
389
|
+
// Set up error handler to capture parse errors
|
|
390
|
+
std::vector<std::string>* parse_errors = new std::vector<std::string>();
|
|
391
|
+
ParseErrorHandler error_handler(parse_errors);
|
|
392
|
+
parser->setErrorHandler(&error_handler);
|
|
393
|
+
|
|
282
394
|
try {
|
|
283
395
|
MemBufInputSource input((const XMLByte*)xml_str, strlen(xml_str), "memory");
|
|
284
396
|
parser->parse(input);
|
|
@@ -288,18 +400,33 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
288
400
|
DocumentWrapper* wrapper = ALLOC(DocumentWrapper);
|
|
289
401
|
wrapper->doc = doc;
|
|
290
402
|
wrapper->parser = parser;
|
|
403
|
+
wrapper->parse_errors = parse_errors;
|
|
291
404
|
|
|
292
405
|
VALUE rb_doc = TypedData_Wrap_Struct(rb_cDocument, &document_type, wrapper);
|
|
406
|
+
|
|
407
|
+
// If there were fatal errors, raise an exception with details
|
|
408
|
+
if (error_handler.has_fatal && !parse_errors->empty()) {
|
|
409
|
+
std::string all_errors;
|
|
410
|
+
for (const auto& err : *parse_errors) {
|
|
411
|
+
if (!all_errors.empty()) all_errors += "\n";
|
|
412
|
+
all_errors += err;
|
|
413
|
+
}
|
|
414
|
+
rb_raise(rb_eRuntimeError, "XML parsing failed:\n%s", all_errors.c_str());
|
|
415
|
+
}
|
|
416
|
+
|
|
293
417
|
return rb_doc;
|
|
294
418
|
} catch (const XMLException& e) {
|
|
295
419
|
CharStr message(e.getMessage());
|
|
420
|
+
delete parse_errors;
|
|
296
421
|
delete parser;
|
|
297
422
|
rb_raise(rb_eRuntimeError, "XML parsing error: %s", message.localForm());
|
|
298
423
|
} catch (const DOMException& e) {
|
|
299
424
|
CharStr message(e.getMessage());
|
|
425
|
+
delete parse_errors;
|
|
300
426
|
delete parser;
|
|
301
427
|
rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
|
|
302
428
|
} catch (...) {
|
|
429
|
+
delete parse_errors;
|
|
303
430
|
delete parser;
|
|
304
431
|
rb_raise(rb_eRuntimeError, "Unknown XML parsing error");
|
|
305
432
|
}
|
|
@@ -307,6 +434,22 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
307
434
|
return Qnil;
|
|
308
435
|
}
|
|
309
436
|
|
|
437
|
+
// document.errors - returns array of parse errors (warnings and errors)
|
|
438
|
+
static VALUE document_errors(VALUE self) {
|
|
439
|
+
DocumentWrapper* wrapper;
|
|
440
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
441
|
+
|
|
442
|
+
VALUE errors_array = rb_ary_new();
|
|
443
|
+
|
|
444
|
+
if (wrapper->parse_errors) {
|
|
445
|
+
for (const auto& error : *wrapper->parse_errors) {
|
|
446
|
+
rb_ary_push(errors_array, rb_str_new2(error.c_str()));
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return errors_array;
|
|
451
|
+
}
|
|
452
|
+
|
|
310
453
|
// document.root
|
|
311
454
|
static VALUE document_root(VALUE self) {
|
|
312
455
|
DocumentWrapper* wrapper;
|
|
@@ -468,14 +611,9 @@ static VALUE document_create_element(VALUE self, VALUE name) {
|
|
|
468
611
|
#ifdef HAVE_XALAN
|
|
469
612
|
// Helper function to execute XPath using Xalan for full XPath 1.0 support
|
|
470
613
|
static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
|
|
471
|
-
|
|
472
|
-
// Initialize Xalan if needed
|
|
473
|
-
if (!xalan_initialized) {
|
|
474
|
-
XPathEvaluator::initialize();
|
|
475
|
-
XMLPlatformUtils::Initialize();
|
|
476
|
-
xalan_initialized = true;
|
|
477
|
-
}
|
|
614
|
+
ensure_xerces_initialized();
|
|
478
615
|
|
|
616
|
+
try {
|
|
479
617
|
// Get the document
|
|
480
618
|
DOMDocument* domDoc = context_node->getOwnerDocument();
|
|
481
619
|
if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
|
|
@@ -600,8 +738,9 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
600
738
|
}
|
|
601
739
|
|
|
602
740
|
DOMXPathNSResolver* resolver = doc_wrapper->doc->createNSResolver(root);
|
|
741
|
+
XStr xpath_xstr(xpath_str);
|
|
603
742
|
DOMXPathExpression* expression = doc_wrapper->doc->createExpression(
|
|
604
|
-
|
|
743
|
+
xpath_xstr.unicodeForm(), resolver);
|
|
605
744
|
|
|
606
745
|
DOMXPathResult* result = expression->evaluate(
|
|
607
746
|
doc_wrapper->doc->getDocumentElement(),
|
|
@@ -840,7 +979,8 @@ static VALUE node_text_set(VALUE self, VALUE text) {
|
|
|
840
979
|
Check_Type(text, T_STRING);
|
|
841
980
|
const char* text_str = StringValueCStr(text);
|
|
842
981
|
|
|
843
|
-
|
|
982
|
+
XStr text_xstr(text_str);
|
|
983
|
+
wrapper->node->setTextContent(text_xstr.unicodeForm());
|
|
844
984
|
|
|
845
985
|
return text;
|
|
846
986
|
}
|
|
@@ -858,7 +998,8 @@ static VALUE node_get_attribute(VALUE self, VALUE attr_name) {
|
|
|
858
998
|
const char* attr_str = StringValueCStr(attr_name);
|
|
859
999
|
|
|
860
1000
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
861
|
-
|
|
1001
|
+
XStr attr_xstr(attr_str);
|
|
1002
|
+
const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
|
|
862
1003
|
|
|
863
1004
|
if (!value || XMLString::stringLen(value) == 0) {
|
|
864
1005
|
return Qnil;
|
|
@@ -884,7 +1025,9 @@ static VALUE node_set_attribute(VALUE self, VALUE attr_name, VALUE attr_value) {
|
|
|
884
1025
|
const char* value_str = StringValueCStr(attr_value);
|
|
885
1026
|
|
|
886
1027
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
887
|
-
|
|
1028
|
+
XStr attr_xstr(attr_str);
|
|
1029
|
+
XStr value_xstr(value_str);
|
|
1030
|
+
element->setAttribute(attr_xstr.unicodeForm(), value_xstr.unicodeForm());
|
|
888
1031
|
|
|
889
1032
|
return attr_value;
|
|
890
1033
|
}
|
|
@@ -902,7 +1045,8 @@ static VALUE node_has_attribute_p(VALUE self, VALUE attr_name) {
|
|
|
902
1045
|
const char* attr_str = StringValueCStr(attr_name);
|
|
903
1046
|
|
|
904
1047
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
905
|
-
|
|
1048
|
+
XStr attr_xstr(attr_str);
|
|
1049
|
+
const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
|
|
906
1050
|
|
|
907
1051
|
if (!value || XMLString::stringLen(value) == 0) {
|
|
908
1052
|
return Qfalse;
|
|
@@ -916,13 +1060,14 @@ static VALUE node_children(VALUE self) {
|
|
|
916
1060
|
NodeWrapper* wrapper;
|
|
917
1061
|
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
918
1062
|
|
|
919
|
-
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
920
1063
|
VALUE children = rb_ary_new();
|
|
921
1064
|
|
|
922
1065
|
if (!wrapper->node) {
|
|
923
1066
|
return children;
|
|
924
1067
|
}
|
|
925
1068
|
|
|
1069
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1070
|
+
|
|
926
1071
|
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
927
1072
|
XMLSize_t count = child_nodes->getLength();
|
|
928
1073
|
|
|
@@ -939,13 +1084,13 @@ static VALUE node_element_children(VALUE self) {
|
|
|
939
1084
|
NodeWrapper* wrapper;
|
|
940
1085
|
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
941
1086
|
|
|
942
|
-
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
943
1087
|
VALUE children = rb_ary_new();
|
|
944
1088
|
|
|
945
1089
|
if (!wrapper->node) {
|
|
946
1090
|
return children;
|
|
947
1091
|
}
|
|
948
1092
|
|
|
1093
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
949
1094
|
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
950
1095
|
XMLSize_t count = child_nodes->getLength();
|
|
951
1096
|
|
|
@@ -973,7 +1118,7 @@ static VALUE node_parent(VALUE self) {
|
|
|
973
1118
|
return Qnil;
|
|
974
1119
|
}
|
|
975
1120
|
|
|
976
|
-
VALUE doc_ref =
|
|
1121
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
977
1122
|
return wrap_node(parent, doc_ref);
|
|
978
1123
|
}
|
|
979
1124
|
|
|
@@ -991,7 +1136,7 @@ static VALUE node_ancestors(int argc, VALUE* argv, VALUE self) {
|
|
|
991
1136
|
return ancestors;
|
|
992
1137
|
}
|
|
993
1138
|
|
|
994
|
-
VALUE doc_ref =
|
|
1139
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
995
1140
|
DOMNode* current = wrapper->node->getParentNode();
|
|
996
1141
|
|
|
997
1142
|
// Walk up the tree, collecting all ancestors
|
|
@@ -1100,7 +1245,7 @@ static VALUE node_next_sibling(VALUE self) {
|
|
|
1100
1245
|
return Qnil;
|
|
1101
1246
|
}
|
|
1102
1247
|
|
|
1103
|
-
VALUE doc_ref =
|
|
1248
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1104
1249
|
return wrap_node(next, doc_ref);
|
|
1105
1250
|
}
|
|
1106
1251
|
|
|
@@ -1118,7 +1263,7 @@ static VALUE node_previous_sibling(VALUE self) {
|
|
|
1118
1263
|
return Qnil;
|
|
1119
1264
|
}
|
|
1120
1265
|
|
|
1121
|
-
VALUE doc_ref =
|
|
1266
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1122
1267
|
return wrap_node(prev, doc_ref);
|
|
1123
1268
|
}
|
|
1124
1269
|
|
|
@@ -1131,7 +1276,7 @@ static VALUE node_next_element(VALUE self) {
|
|
|
1131
1276
|
return Qnil;
|
|
1132
1277
|
}
|
|
1133
1278
|
|
|
1134
|
-
VALUE doc_ref =
|
|
1279
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1135
1280
|
DOMNode* next = wrapper->node->getNextSibling();
|
|
1136
1281
|
|
|
1137
1282
|
// Skip non-element nodes
|
|
@@ -1155,7 +1300,7 @@ static VALUE node_previous_element(VALUE self) {
|
|
|
1155
1300
|
return Qnil;
|
|
1156
1301
|
}
|
|
1157
1302
|
|
|
1158
|
-
VALUE doc_ref =
|
|
1303
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1159
1304
|
DOMNode* prev = wrapper->node->getPreviousSibling();
|
|
1160
1305
|
|
|
1161
1306
|
// Skip non-element nodes
|
|
@@ -1185,6 +1330,7 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1185
1330
|
}
|
|
1186
1331
|
|
|
1187
1332
|
DOMNode* child_node = NULL;
|
|
1333
|
+
bool needs_import = false;
|
|
1188
1334
|
|
|
1189
1335
|
// Check if child is a string or a node
|
|
1190
1336
|
if (TYPE(child) == T_STRING) {
|
|
@@ -1199,6 +1345,13 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1199
1345
|
if (rb_obj_is_kind_of(child, rb_cNode)) {
|
|
1200
1346
|
TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
|
|
1201
1347
|
child_node = child_wrapper->node;
|
|
1348
|
+
|
|
1349
|
+
// Check if child belongs to a different document
|
|
1350
|
+
DOMDocument* child_doc = child_node->getOwnerDocument();
|
|
1351
|
+
if (child_doc && child_doc != doc) {
|
|
1352
|
+
rb_raise(rb_eRuntimeError,
|
|
1353
|
+
"Node belongs to a different document. Use importNode to adopt nodes from other documents.");
|
|
1354
|
+
}
|
|
1202
1355
|
} else {
|
|
1203
1356
|
rb_raise(rb_eTypeError, "Argument must be a String or Node");
|
|
1204
1357
|
}
|
|
@@ -1209,12 +1362,24 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1209
1362
|
}
|
|
1210
1363
|
|
|
1211
1364
|
try {
|
|
1365
|
+
// appendChild will automatically detach the node from its current parent if it has one
|
|
1212
1366
|
wrapper->node->appendChild(child_node);
|
|
1213
1367
|
} catch (const DOMException& e) {
|
|
1214
1368
|
char* message = XMLString::transcode(e.getMessage());
|
|
1215
1369
|
VALUE rb_error = rb_str_new_cstr(message);
|
|
1216
1370
|
XMLString::release(&message);
|
|
1217
|
-
|
|
1371
|
+
|
|
1372
|
+
// Provide more context for common errors
|
|
1373
|
+
unsigned short code = e.code;
|
|
1374
|
+
if (code == DOMException::WRONG_DOCUMENT_ERR) {
|
|
1375
|
+
rb_raise(rb_eRuntimeError, "Node belongs to a different document: %s", StringValueCStr(rb_error));
|
|
1376
|
+
} else if (code == DOMException::HIERARCHY_REQUEST_ERR) {
|
|
1377
|
+
rb_raise(rb_eRuntimeError, "Invalid hierarchy: cannot add this node as a child: %s", StringValueCStr(rb_error));
|
|
1378
|
+
} else if (code == DOMException::NO_MODIFICATION_ALLOWED_ERR) {
|
|
1379
|
+
rb_raise(rb_eRuntimeError, "Node is read-only: %s", StringValueCStr(rb_error));
|
|
1380
|
+
} else {
|
|
1381
|
+
rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
|
|
1382
|
+
}
|
|
1218
1383
|
}
|
|
1219
1384
|
|
|
1220
1385
|
return child;
|
|
@@ -1256,7 +1421,8 @@ static VALUE node_inner_html(VALUE self) {
|
|
|
1256
1421
|
}
|
|
1257
1422
|
|
|
1258
1423
|
try {
|
|
1259
|
-
|
|
1424
|
+
XStr ls_name("LS");
|
|
1425
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(ls_name.unicodeForm());
|
|
1260
1426
|
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
1261
1427
|
|
|
1262
1428
|
// Build a string by serializing each child
|
|
@@ -1421,7 +1587,7 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
1421
1587
|
|
|
1422
1588
|
Check_Type(path, T_STRING);
|
|
1423
1589
|
const char* xpath_str = StringValueCStr(path);
|
|
1424
|
-
VALUE doc_ref =
|
|
1590
|
+
VALUE doc_ref = node_wrapper->doc_ref;
|
|
1425
1591
|
|
|
1426
1592
|
#ifdef HAVE_XALAN
|
|
1427
1593
|
// Use Xalan for full XPath 1.0 support
|
|
@@ -1437,8 +1603,9 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
1437
1603
|
}
|
|
1438
1604
|
|
|
1439
1605
|
DOMXPathNSResolver* resolver = doc->createNSResolver(node_wrapper->node);
|
|
1606
|
+
XStr xpath_xstr(xpath_str);
|
|
1440
1607
|
DOMXPathExpression* expression = doc->createExpression(
|
|
1441
|
-
|
|
1608
|
+
xpath_xstr.unicodeForm(), resolver);
|
|
1442
1609
|
|
|
1443
1610
|
DOMXPathResult* result = expression->evaluate(
|
|
1444
1611
|
node_wrapper->node,
|
|
@@ -1919,18 +2086,7 @@ static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
|
|
|
1919
2086
|
VALUE schema_source;
|
|
1920
2087
|
rb_scan_args(argc, argv, "1", &schema_source);
|
|
1921
2088
|
|
|
1922
|
-
|
|
1923
|
-
if (!xerces_initialized) {
|
|
1924
|
-
try {
|
|
1925
|
-
XMLPlatformUtils::Initialize();
|
|
1926
|
-
xerces_initialized = true;
|
|
1927
|
-
} catch (const XMLException& e) {
|
|
1928
|
-
char* message = XMLString::transcode(e.getMessage());
|
|
1929
|
-
VALUE rb_error = rb_str_new_cstr(message);
|
|
1930
|
-
XMLString::release(&message);
|
|
1931
|
-
rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
|
|
1932
|
-
}
|
|
1933
|
-
}
|
|
2089
|
+
ensure_xerces_initialized();
|
|
1934
2090
|
|
|
1935
2091
|
try {
|
|
1936
2092
|
SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
|
|
@@ -2103,6 +2259,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2103
2259
|
rb_undef_alloc_func(rb_cDocument);
|
|
2104
2260
|
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
|
|
2105
2261
|
rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
|
|
2262
|
+
rb_define_method(rb_cDocument, "errors", RUBY_METHOD_FUNC(document_errors), 0);
|
|
2106
2263
|
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
2107
2264
|
rb_define_alias(rb_cDocument, "to_xml", "to_s");
|
|
2108
2265
|
rb_define_method(rb_cDocument, "inspect", RUBY_METHOD_FUNC(document_inspect), 0);
|
|
@@ -2182,4 +2339,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2182
2339
|
rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
|
|
2183
2340
|
|
|
2184
2341
|
rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
|
|
2342
|
+
|
|
2343
|
+
// Register cleanup handler
|
|
2344
|
+
atexit(cleanup_xerces);
|
|
2185
2345
|
}
|
data/lib/rxerces/version.rb
CHANGED
data/rxerces.gemspec
CHANGED
data/spec/document_spec.rb
CHANGED
|
@@ -176,4 +176,62 @@ RSpec.describe RXerces::XML::Document do
|
|
|
176
176
|
expect(result.first.text).to eq('New content')
|
|
177
177
|
end
|
|
178
178
|
end
|
|
179
|
+
|
|
180
|
+
describe "#errors" do
|
|
181
|
+
it "returns empty array for valid XML" do
|
|
182
|
+
doc = RXerces::XML::Document.parse(simple_xml)
|
|
183
|
+
expect(doc.errors).to eq([])
|
|
184
|
+
end
|
|
185
|
+
|
|
186
|
+
it "returns empty array for complex valid XML" do
|
|
187
|
+
doc = RXerces::XML::Document.parse(complex_xml)
|
|
188
|
+
expect(doc.errors).to eq([])
|
|
189
|
+
end
|
|
190
|
+
|
|
191
|
+
context "with malformed XML" do
|
|
192
|
+
it "raises error and provides line/column information for unclosed tags" do
|
|
193
|
+
expect {
|
|
194
|
+
RXerces::XML::Document.parse('<root><item>test</root>')
|
|
195
|
+
}.to raise_error(RuntimeError, /Fatal error at line \d+, column \d+/)
|
|
196
|
+
end
|
|
197
|
+
|
|
198
|
+
it "raises error with detailed message for multiple errors" do
|
|
199
|
+
expect {
|
|
200
|
+
RXerces::XML::Document.parse('<root><item>test</item><unclosed>')
|
|
201
|
+
}.to raise_error(RuntimeError, /Fatal error at line/)
|
|
202
|
+
end
|
|
203
|
+
|
|
204
|
+
it "raises error for completely invalid XML" do
|
|
205
|
+
expect {
|
|
206
|
+
RXerces::XML::Document.parse('not xml at all')
|
|
207
|
+
}.to raise_error(RuntimeError, /Fatal error at line/)
|
|
208
|
+
end
|
|
209
|
+
|
|
210
|
+
it "raises error for mismatched tags" do
|
|
211
|
+
expect {
|
|
212
|
+
RXerces::XML::Document.parse('<root><item>test</other></root>')
|
|
213
|
+
}.to raise_error(RuntimeError, /Fatal error at line/)
|
|
214
|
+
end
|
|
215
|
+
end
|
|
216
|
+
|
|
217
|
+
context "error message format" do
|
|
218
|
+
it "includes line number in error message" do
|
|
219
|
+
expect {
|
|
220
|
+
RXerces::XML::Document.parse('<root><bad>')
|
|
221
|
+
}.to raise_error(RuntimeError, /line \d+/)
|
|
222
|
+
end
|
|
223
|
+
|
|
224
|
+
it "includes column number in error message" do
|
|
225
|
+
expect {
|
|
226
|
+
RXerces::XML::Document.parse('<root><bad>')
|
|
227
|
+
}.to raise_error(RuntimeError, /column \d+/)
|
|
228
|
+
end
|
|
229
|
+
|
|
230
|
+
it "describes the error type" do
|
|
231
|
+
expect {
|
|
232
|
+
RXerces::XML::Document.parse('<root><item>test</root>')
|
|
233
|
+
}.to raise_error(RuntimeError, /expected end of tag/)
|
|
234
|
+
end
|
|
235
|
+
end
|
|
236
|
+
end
|
|
179
237
|
end
|
data/spec/node_spec.rb
CHANGED
|
@@ -552,6 +552,63 @@ RSpec.describe RXerces::XML::Node do
|
|
|
552
552
|
xml_output = simple_doc.to_s
|
|
553
553
|
expect(xml_output).to include("Content")
|
|
554
554
|
end
|
|
555
|
+
|
|
556
|
+
context "with nodes from different documents" do
|
|
557
|
+
it "raises error when adding node from different document" do
|
|
558
|
+
doc1 = RXerces::XML::Document.parse('<root><item>one</item></root>')
|
|
559
|
+
doc2 = RXerces::XML::Document.parse('<other><item>two</item></other>')
|
|
560
|
+
|
|
561
|
+
root1 = doc1.root
|
|
562
|
+
item2 = doc2.root.children.find { |n| n.is_a?(RXerces::XML::Element) }
|
|
563
|
+
|
|
564
|
+
expect {
|
|
565
|
+
root1.add_child(item2)
|
|
566
|
+
}.to raise_error(RuntimeError, /belongs to a different document/)
|
|
567
|
+
end
|
|
568
|
+
|
|
569
|
+
it "provides helpful error message mentioning importNode" do
|
|
570
|
+
doc1 = RXerces::XML::Document.parse('<root></root>')
|
|
571
|
+
doc2 = RXerces::XML::Document.parse('<other><child/></other>')
|
|
572
|
+
|
|
573
|
+
expect {
|
|
574
|
+
doc1.root.add_child(doc2.root.children.first)
|
|
575
|
+
}.to raise_error(RuntimeError, /importNode/)
|
|
576
|
+
end
|
|
577
|
+
end
|
|
578
|
+
|
|
579
|
+
context "when child already has a parent" do
|
|
580
|
+
it "moves node from one parent to another (detaches automatically)" do
|
|
581
|
+
doc = RXerces::XML::Document.parse('<root><parent1><child>text</child></parent1><parent2/></root>')
|
|
582
|
+
parent1 = doc.xpath('//parent1').first
|
|
583
|
+
parent2 = doc.xpath('//parent2').first
|
|
584
|
+
child = doc.xpath('//child').first
|
|
585
|
+
|
|
586
|
+
# Verify initial state
|
|
587
|
+
expect(parent1.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(1)
|
|
588
|
+
expect(parent2.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(0)
|
|
589
|
+
|
|
590
|
+
# Move child from parent1 to parent2
|
|
591
|
+
parent2.add_child(child)
|
|
592
|
+
|
|
593
|
+
# Child should now be under parent2, not parent1
|
|
594
|
+
expect(parent1.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(0)
|
|
595
|
+
expect(parent2.children.select { |n| n.is_a?(RXerces::XML::Element) }.length).to eq(1)
|
|
596
|
+
expect(doc.xpath('//parent2/child').length).to eq(1)
|
|
597
|
+
expect(doc.xpath('//parent1/child').length).to eq(0)
|
|
598
|
+
end
|
|
599
|
+
|
|
600
|
+
it "preserves node content when moving" do
|
|
601
|
+
doc = RXerces::XML::Document.parse('<root><a><item>content</item></a><b/></root>')
|
|
602
|
+
a = doc.xpath('//a').first
|
|
603
|
+
b = doc.xpath('//b').first
|
|
604
|
+
item = doc.xpath('//item').first
|
|
605
|
+
|
|
606
|
+
b.add_child(item)
|
|
607
|
+
|
|
608
|
+
expect(item.text).to eq('content')
|
|
609
|
+
expect(doc.xpath('//b/item').first.text).to eq('content')
|
|
610
|
+
end
|
|
611
|
+
end
|
|
555
612
|
end
|
|
556
613
|
|
|
557
614
|
describe "#remove" do
|