rxerces 0.5.0 → 0.6.1
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +11 -0
- data/benchmarks/README.md +68 -0
- data/benchmarks/css_benchmark.rb +115 -0
- data/benchmarks/parse_benchmark.rb +103 -0
- data/benchmarks/run_all.rb +25 -0
- data/benchmarks/serialization_benchmark.rb +93 -0
- data/benchmarks/traversal_benchmark.rb +149 -0
- data/benchmarks/xpath_benchmark.rb +100 -0
- data/ext/rxerces/rxerces.cpp +385 -52
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +1 -1
- data/spec/document_spec.rb +58 -0
- data/spec/node_spec.rb +57 -0
- data/spec/rxerces_shared.rb +1 -1
- data/tmp/arm64-darwin24/rxerces/3.4.7/rxerces.bundle.dSYM/Contents/Info.plist +20 -0
- data/tmp/arm64-darwin24/rxerces/3.4.7/rxerces.bundle.dSYM/Contents/Resources/Relocations/aarch64/rxerces.bundle.yml +5 -0
- data.tar.gz.sig +0 -0
- metadata +11 -2
- metadata.gz.sig +0 -0
data/ext/rxerces/rxerces.cpp
CHANGED
|
@@ -57,6 +57,41 @@ static VALUE node_css(VALUE self, VALUE selector);
|
|
|
57
57
|
static VALUE node_xpath(VALUE self, VALUE path);
|
|
58
58
|
static VALUE document_xpath(VALUE self, VALUE path);
|
|
59
59
|
|
|
60
|
+
// Initialize Xerces (and Xalan if available) exactly once
|
|
61
|
+
static void ensure_xerces_initialized() {
|
|
62
|
+
if (xerces_initialized) {
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
XMLPlatformUtils::Initialize();
|
|
68
|
+
#ifdef HAVE_XALAN
|
|
69
|
+
XPathEvaluator::initialize();
|
|
70
|
+
xalan_initialized = true;
|
|
71
|
+
#endif
|
|
72
|
+
xerces_initialized = true;
|
|
73
|
+
} catch (const XMLException& e) {
|
|
74
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
75
|
+
std::string error_msg = std::string("Xerces initialization failed: ") + message;
|
|
76
|
+
XMLString::release(&message);
|
|
77
|
+
rb_raise(rb_eRuntimeError, "%s", error_msg.c_str());
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Cleanup function called at exit
|
|
82
|
+
static void cleanup_xerces() {
|
|
83
|
+
#ifdef HAVE_XALAN
|
|
84
|
+
if (xalan_initialized) {
|
|
85
|
+
XPathEvaluator::terminate();
|
|
86
|
+
xalan_initialized = false;
|
|
87
|
+
}
|
|
88
|
+
#endif
|
|
89
|
+
if (xerces_initialized) {
|
|
90
|
+
XMLPlatformUtils::Terminate();
|
|
91
|
+
xerces_initialized = false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
60
95
|
// Helper class to manage XMLCh strings
|
|
61
96
|
class XStr {
|
|
62
97
|
public:
|
|
@@ -103,6 +138,7 @@ private:
|
|
|
103
138
|
typedef struct {
|
|
104
139
|
DOMDocument* doc;
|
|
105
140
|
XercesDOMParser* parser;
|
|
141
|
+
std::vector<std::string>* parse_errors;
|
|
106
142
|
} DocumentWrapper;
|
|
107
143
|
|
|
108
144
|
// Wrapper structure for DOMNode
|
|
@@ -128,19 +164,34 @@ public:
|
|
|
128
164
|
|
|
129
165
|
void warning(const SAXParseException& e) {
|
|
130
166
|
char* msg = XMLString::transcode(e.getMessage());
|
|
131
|
-
|
|
167
|
+
char buffer[512];
|
|
168
|
+
snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
|
|
169
|
+
(unsigned long)e.getLineNumber(),
|
|
170
|
+
(unsigned long)e.getColumnNumber(),
|
|
171
|
+
msg);
|
|
172
|
+
errors.push_back(buffer);
|
|
132
173
|
XMLString::release(&msg);
|
|
133
174
|
}
|
|
134
175
|
|
|
135
176
|
void error(const SAXParseException& e) {
|
|
136
177
|
char* msg = XMLString::transcode(e.getMessage());
|
|
137
|
-
|
|
178
|
+
char buffer[512];
|
|
179
|
+
snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
|
|
180
|
+
(unsigned long)e.getLineNumber(),
|
|
181
|
+
(unsigned long)e.getColumnNumber(),
|
|
182
|
+
msg);
|
|
183
|
+
errors.push_back(buffer);
|
|
138
184
|
XMLString::release(&msg);
|
|
139
185
|
}
|
|
140
186
|
|
|
141
187
|
void fatalError(const SAXParseException& e) {
|
|
142
188
|
char* msg = XMLString::transcode(e.getMessage());
|
|
143
|
-
|
|
189
|
+
char buffer[512];
|
|
190
|
+
snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
|
|
191
|
+
(unsigned long)e.getLineNumber(),
|
|
192
|
+
(unsigned long)e.getColumnNumber(),
|
|
193
|
+
msg);
|
|
194
|
+
errors.push_back(buffer);
|
|
144
195
|
XMLString::release(&msg);
|
|
145
196
|
}
|
|
146
197
|
|
|
@@ -149,6 +200,55 @@ public:
|
|
|
149
200
|
}
|
|
150
201
|
};
|
|
151
202
|
|
|
203
|
+
// Error handler for parsing - stores errors but doesn't throw
|
|
204
|
+
class ParseErrorHandler : public ErrorHandler {
|
|
205
|
+
public:
|
|
206
|
+
std::vector<std::string>* errors;
|
|
207
|
+
bool has_fatal;
|
|
208
|
+
|
|
209
|
+
ParseErrorHandler(std::vector<std::string>* error_vec)
|
|
210
|
+
: errors(error_vec), has_fatal(false) {}
|
|
211
|
+
|
|
212
|
+
void warning(const SAXParseException& e) {
|
|
213
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
214
|
+
char buffer[512];
|
|
215
|
+
snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
|
|
216
|
+
(unsigned long)e.getLineNumber(),
|
|
217
|
+
(unsigned long)e.getColumnNumber(),
|
|
218
|
+
msg);
|
|
219
|
+
errors->push_back(buffer);
|
|
220
|
+
XMLString::release(&msg);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
void error(const SAXParseException& e) {
|
|
224
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
225
|
+
char buffer[512];
|
|
226
|
+
snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
|
|
227
|
+
(unsigned long)e.getLineNumber(),
|
|
228
|
+
(unsigned long)e.getColumnNumber(),
|
|
229
|
+
msg);
|
|
230
|
+
errors->push_back(buffer);
|
|
231
|
+
XMLString::release(&msg);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
void fatalError(const SAXParseException& e) {
|
|
235
|
+
has_fatal = true;
|
|
236
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
237
|
+
char buffer[512];
|
|
238
|
+
snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
|
|
239
|
+
(unsigned long)e.getLineNumber(),
|
|
240
|
+
(unsigned long)e.getColumnNumber(),
|
|
241
|
+
msg);
|
|
242
|
+
errors->push_back(buffer);
|
|
243
|
+
XMLString::release(&msg);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
void resetErrors() {
|
|
247
|
+
errors->clear();
|
|
248
|
+
has_fatal = false;
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
|
|
152
252
|
// Memory management functions
|
|
153
253
|
static void document_free(void* ptr) {
|
|
154
254
|
DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
|
|
@@ -156,6 +256,9 @@ static void document_free(void* ptr) {
|
|
|
156
256
|
if (wrapper->parser) {
|
|
157
257
|
delete wrapper->parser;
|
|
158
258
|
}
|
|
259
|
+
if (wrapper->parse_errors) {
|
|
260
|
+
delete wrapper->parse_errors;
|
|
261
|
+
}
|
|
159
262
|
// Document is owned by parser, so don't delete it separately
|
|
160
263
|
xfree(wrapper);
|
|
161
264
|
}
|
|
@@ -169,6 +272,13 @@ static void node_free(void* ptr) {
|
|
|
169
272
|
}
|
|
170
273
|
}
|
|
171
274
|
|
|
275
|
+
static void node_mark(void* ptr) {
|
|
276
|
+
NodeWrapper* wrapper = (NodeWrapper*)ptr;
|
|
277
|
+
if (wrapper) {
|
|
278
|
+
rb_gc_mark(wrapper->doc_ref);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
172
282
|
static void nodeset_free(void* ptr) {
|
|
173
283
|
NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
|
|
174
284
|
if (wrapper) {
|
|
@@ -176,6 +286,13 @@ static void nodeset_free(void* ptr) {
|
|
|
176
286
|
}
|
|
177
287
|
}
|
|
178
288
|
|
|
289
|
+
static void nodeset_mark(void* ptr) {
|
|
290
|
+
NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
|
|
291
|
+
if (wrapper) {
|
|
292
|
+
rb_gc_mark(wrapper->nodes_array);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
179
296
|
static void schema_free(void* ptr) {
|
|
180
297
|
SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
|
|
181
298
|
if (wrapper) {
|
|
@@ -211,14 +328,14 @@ static const rb_data_type_t document_type = {
|
|
|
211
328
|
|
|
212
329
|
static const rb_data_type_t node_type = {
|
|
213
330
|
"RXerces::XML::Node",
|
|
214
|
-
{
|
|
331
|
+
{node_mark, node_free, node_size},
|
|
215
332
|
0, 0,
|
|
216
333
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
217
334
|
};
|
|
218
335
|
|
|
219
336
|
static const rb_data_type_t nodeset_type = {
|
|
220
337
|
"RXerces::XML::NodeSet",
|
|
221
|
-
{
|
|
338
|
+
{nodeset_mark, nodeset_free, nodeset_size},
|
|
222
339
|
0, 0,
|
|
223
340
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
224
341
|
};
|
|
@@ -254,22 +371,12 @@ static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
|
|
|
254
371
|
break;
|
|
255
372
|
}
|
|
256
373
|
|
|
257
|
-
// Keep reference to document to prevent GC
|
|
258
|
-
rb_iv_set(rb_node, "@document", doc_ref);
|
|
259
|
-
|
|
260
374
|
return rb_node;
|
|
261
375
|
}
|
|
262
376
|
|
|
263
377
|
// RXerces::XML::Document.parse(string)
|
|
264
378
|
static VALUE document_parse(VALUE klass, VALUE str) {
|
|
265
|
-
|
|
266
|
-
try {
|
|
267
|
-
XMLPlatformUtils::Initialize();
|
|
268
|
-
xerces_initialized = true;
|
|
269
|
-
} catch (const XMLException& e) {
|
|
270
|
-
rb_raise(rb_eRuntimeError, "Xerces initialization failed");
|
|
271
|
-
}
|
|
272
|
-
}
|
|
379
|
+
ensure_xerces_initialized();
|
|
273
380
|
|
|
274
381
|
Check_Type(str, T_STRING);
|
|
275
382
|
const char* xml_str = StringValueCStr(str);
|
|
@@ -279,6 +386,11 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
279
386
|
parser->setDoNamespaces(true);
|
|
280
387
|
parser->setDoSchema(false);
|
|
281
388
|
|
|
389
|
+
// Set up error handler to capture parse errors
|
|
390
|
+
std::vector<std::string>* parse_errors = new std::vector<std::string>();
|
|
391
|
+
ParseErrorHandler error_handler(parse_errors);
|
|
392
|
+
parser->setErrorHandler(&error_handler);
|
|
393
|
+
|
|
282
394
|
try {
|
|
283
395
|
MemBufInputSource input((const XMLByte*)xml_str, strlen(xml_str), "memory");
|
|
284
396
|
parser->parse(input);
|
|
@@ -288,18 +400,33 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
288
400
|
DocumentWrapper* wrapper = ALLOC(DocumentWrapper);
|
|
289
401
|
wrapper->doc = doc;
|
|
290
402
|
wrapper->parser = parser;
|
|
403
|
+
wrapper->parse_errors = parse_errors;
|
|
291
404
|
|
|
292
405
|
VALUE rb_doc = TypedData_Wrap_Struct(rb_cDocument, &document_type, wrapper);
|
|
406
|
+
|
|
407
|
+
// If there were fatal errors, raise an exception with details
|
|
408
|
+
if (error_handler.has_fatal && !parse_errors->empty()) {
|
|
409
|
+
std::string all_errors;
|
|
410
|
+
for (const auto& err : *parse_errors) {
|
|
411
|
+
if (!all_errors.empty()) all_errors += "\n";
|
|
412
|
+
all_errors += err;
|
|
413
|
+
}
|
|
414
|
+
rb_raise(rb_eRuntimeError, "XML parsing failed:\n%s", all_errors.c_str());
|
|
415
|
+
}
|
|
416
|
+
|
|
293
417
|
return rb_doc;
|
|
294
418
|
} catch (const XMLException& e) {
|
|
295
419
|
CharStr message(e.getMessage());
|
|
420
|
+
delete parse_errors;
|
|
296
421
|
delete parser;
|
|
297
422
|
rb_raise(rb_eRuntimeError, "XML parsing error: %s", message.localForm());
|
|
298
423
|
} catch (const DOMException& e) {
|
|
299
424
|
CharStr message(e.getMessage());
|
|
425
|
+
delete parse_errors;
|
|
300
426
|
delete parser;
|
|
301
427
|
rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
|
|
302
428
|
} catch (...) {
|
|
429
|
+
delete parse_errors;
|
|
303
430
|
delete parser;
|
|
304
431
|
rb_raise(rb_eRuntimeError, "Unknown XML parsing error");
|
|
305
432
|
}
|
|
@@ -307,6 +434,22 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
307
434
|
return Qnil;
|
|
308
435
|
}
|
|
309
436
|
|
|
437
|
+
// document.errors - returns array of parse errors (warnings and errors)
|
|
438
|
+
static VALUE document_errors(VALUE self) {
|
|
439
|
+
DocumentWrapper* wrapper;
|
|
440
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
441
|
+
|
|
442
|
+
VALUE errors_array = rb_ary_new();
|
|
443
|
+
|
|
444
|
+
if (wrapper->parse_errors) {
|
|
445
|
+
for (const auto& error : *wrapper->parse_errors) {
|
|
446
|
+
rb_ary_push(errors_array, rb_str_new2(error.c_str()));
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return errors_array;
|
|
451
|
+
}
|
|
452
|
+
|
|
310
453
|
// document.root
|
|
311
454
|
static VALUE document_root(VALUE self) {
|
|
312
455
|
DocumentWrapper* wrapper;
|
|
@@ -465,17 +608,103 @@ static VALUE document_create_element(VALUE self, VALUE name) {
|
|
|
465
608
|
return Qnil;
|
|
466
609
|
}
|
|
467
610
|
|
|
611
|
+
// document.children - returns all children (elements, text, comments, etc.)
|
|
612
|
+
static VALUE document_children(VALUE self) {
|
|
613
|
+
DocumentWrapper* wrapper;
|
|
614
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
615
|
+
|
|
616
|
+
VALUE children = rb_ary_new();
|
|
617
|
+
|
|
618
|
+
if (!wrapper->doc) {
|
|
619
|
+
return children;
|
|
620
|
+
}
|
|
621
|
+
|
|
622
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
623
|
+
XMLSize_t count = child_nodes->getLength();
|
|
624
|
+
|
|
625
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
626
|
+
DOMNode* child = child_nodes->item(i);
|
|
627
|
+
rb_ary_push(children, wrap_node(child, self));
|
|
628
|
+
}
|
|
629
|
+
|
|
630
|
+
return children;
|
|
631
|
+
}
|
|
632
|
+
|
|
633
|
+
// document.element_children - returns only element children (no text nodes, comments, etc.)
|
|
634
|
+
static VALUE document_element_children(VALUE self) {
|
|
635
|
+
DocumentWrapper* wrapper;
|
|
636
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
637
|
+
|
|
638
|
+
VALUE children = rb_ary_new();
|
|
639
|
+
|
|
640
|
+
if (!wrapper->doc) {
|
|
641
|
+
return children;
|
|
642
|
+
}
|
|
643
|
+
|
|
644
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
645
|
+
XMLSize_t count = child_nodes->getLength();
|
|
646
|
+
|
|
647
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
648
|
+
DOMNode* child = child_nodes->item(i);
|
|
649
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
650
|
+
rb_ary_push(children, wrap_node(child, self));
|
|
651
|
+
}
|
|
652
|
+
}
|
|
653
|
+
|
|
654
|
+
return children;
|
|
655
|
+
}
|
|
656
|
+
|
|
657
|
+
// document.first_element_child - returns first element child
|
|
658
|
+
static VALUE document_first_element_child(VALUE self) {
|
|
659
|
+
DocumentWrapper* wrapper;
|
|
660
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
661
|
+
|
|
662
|
+
if (!wrapper->doc) {
|
|
663
|
+
return Qnil;
|
|
664
|
+
}
|
|
665
|
+
|
|
666
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
667
|
+
XMLSize_t count = child_nodes->getLength();
|
|
668
|
+
|
|
669
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
670
|
+
DOMNode* child = child_nodes->item(i);
|
|
671
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
672
|
+
return wrap_node(child, self);
|
|
673
|
+
}
|
|
674
|
+
}
|
|
675
|
+
|
|
676
|
+
return Qnil;
|
|
677
|
+
}
|
|
678
|
+
|
|
679
|
+
// document.last_element_child - returns last element child
|
|
680
|
+
static VALUE document_last_element_child(VALUE self) {
|
|
681
|
+
DocumentWrapper* wrapper;
|
|
682
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
683
|
+
|
|
684
|
+
if (!wrapper->doc) {
|
|
685
|
+
return Qnil;
|
|
686
|
+
}
|
|
687
|
+
|
|
688
|
+
DOMNodeList* child_nodes = wrapper->doc->getChildNodes();
|
|
689
|
+
XMLSize_t count = child_nodes->getLength();
|
|
690
|
+
|
|
691
|
+
// Search backwards for last element
|
|
692
|
+
for (XMLSize_t i = count; i > 0; i--) {
|
|
693
|
+
DOMNode* child = child_nodes->item(i - 1);
|
|
694
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
695
|
+
return wrap_node(child, self);
|
|
696
|
+
}
|
|
697
|
+
}
|
|
698
|
+
|
|
699
|
+
return Qnil;
|
|
700
|
+
}
|
|
701
|
+
|
|
468
702
|
#ifdef HAVE_XALAN
|
|
469
703
|
// Helper function to execute XPath using Xalan for full XPath 1.0 support
|
|
470
704
|
static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
|
|
471
|
-
|
|
472
|
-
// Initialize Xalan if needed
|
|
473
|
-
if (!xalan_initialized) {
|
|
474
|
-
XPathEvaluator::initialize();
|
|
475
|
-
XMLPlatformUtils::Initialize();
|
|
476
|
-
xalan_initialized = true;
|
|
477
|
-
}
|
|
705
|
+
ensure_xerces_initialized();
|
|
478
706
|
|
|
707
|
+
try {
|
|
479
708
|
// Get the document
|
|
480
709
|
DOMDocument* domDoc = context_node->getOwnerDocument();
|
|
481
710
|
if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
|
|
@@ -600,8 +829,9 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
600
829
|
}
|
|
601
830
|
|
|
602
831
|
DOMXPathNSResolver* resolver = doc_wrapper->doc->createNSResolver(root);
|
|
832
|
+
XStr xpath_xstr(xpath_str);
|
|
603
833
|
DOMXPathExpression* expression = doc_wrapper->doc->createExpression(
|
|
604
|
-
|
|
834
|
+
xpath_xstr.unicodeForm(), resolver);
|
|
605
835
|
|
|
606
836
|
DOMXPathResult* result = expression->evaluate(
|
|
607
837
|
doc_wrapper->doc->getDocumentElement(),
|
|
@@ -643,6 +873,19 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
643
873
|
#endif
|
|
644
874
|
}
|
|
645
875
|
|
|
876
|
+
// document.at_xpath(path) - returns first matching node or nil
|
|
877
|
+
static VALUE document_at_xpath(VALUE self, VALUE path) {
|
|
878
|
+
VALUE nodeset = document_xpath(self, path);
|
|
879
|
+
NodeSetWrapper* wrapper;
|
|
880
|
+
TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
|
|
881
|
+
|
|
882
|
+
if (RARRAY_LEN(wrapper->nodes_array) == 0) {
|
|
883
|
+
return Qnil;
|
|
884
|
+
}
|
|
885
|
+
|
|
886
|
+
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
887
|
+
}
|
|
888
|
+
|
|
646
889
|
// document.css(selector) - Convert CSS to XPath and execute
|
|
647
890
|
static VALUE document_css(VALUE self, VALUE selector) {
|
|
648
891
|
Check_Type(selector, T_STRING);
|
|
@@ -840,7 +1083,8 @@ static VALUE node_text_set(VALUE self, VALUE text) {
|
|
|
840
1083
|
Check_Type(text, T_STRING);
|
|
841
1084
|
const char* text_str = StringValueCStr(text);
|
|
842
1085
|
|
|
843
|
-
|
|
1086
|
+
XStr text_xstr(text_str);
|
|
1087
|
+
wrapper->node->setTextContent(text_xstr.unicodeForm());
|
|
844
1088
|
|
|
845
1089
|
return text;
|
|
846
1090
|
}
|
|
@@ -858,7 +1102,8 @@ static VALUE node_get_attribute(VALUE self, VALUE attr_name) {
|
|
|
858
1102
|
const char* attr_str = StringValueCStr(attr_name);
|
|
859
1103
|
|
|
860
1104
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
861
|
-
|
|
1105
|
+
XStr attr_xstr(attr_str);
|
|
1106
|
+
const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
|
|
862
1107
|
|
|
863
1108
|
if (!value || XMLString::stringLen(value) == 0) {
|
|
864
1109
|
return Qnil;
|
|
@@ -884,7 +1129,9 @@ static VALUE node_set_attribute(VALUE self, VALUE attr_name, VALUE attr_value) {
|
|
|
884
1129
|
const char* value_str = StringValueCStr(attr_value);
|
|
885
1130
|
|
|
886
1131
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
887
|
-
|
|
1132
|
+
XStr attr_xstr(attr_str);
|
|
1133
|
+
XStr value_xstr(value_str);
|
|
1134
|
+
element->setAttribute(attr_xstr.unicodeForm(), value_xstr.unicodeForm());
|
|
888
1135
|
|
|
889
1136
|
return attr_value;
|
|
890
1137
|
}
|
|
@@ -902,7 +1149,8 @@ static VALUE node_has_attribute_p(VALUE self, VALUE attr_name) {
|
|
|
902
1149
|
const char* attr_str = StringValueCStr(attr_name);
|
|
903
1150
|
|
|
904
1151
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
905
|
-
|
|
1152
|
+
XStr attr_xstr(attr_str);
|
|
1153
|
+
const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
|
|
906
1154
|
|
|
907
1155
|
if (!value || XMLString::stringLen(value) == 0) {
|
|
908
1156
|
return Qfalse;
|
|
@@ -916,13 +1164,14 @@ static VALUE node_children(VALUE self) {
|
|
|
916
1164
|
NodeWrapper* wrapper;
|
|
917
1165
|
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
918
1166
|
|
|
919
|
-
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
920
1167
|
VALUE children = rb_ary_new();
|
|
921
1168
|
|
|
922
1169
|
if (!wrapper->node) {
|
|
923
1170
|
return children;
|
|
924
1171
|
}
|
|
925
1172
|
|
|
1173
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1174
|
+
|
|
926
1175
|
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
927
1176
|
XMLSize_t count = child_nodes->getLength();
|
|
928
1177
|
|
|
@@ -939,13 +1188,13 @@ static VALUE node_element_children(VALUE self) {
|
|
|
939
1188
|
NodeWrapper* wrapper;
|
|
940
1189
|
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
941
1190
|
|
|
942
|
-
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
943
1191
|
VALUE children = rb_ary_new();
|
|
944
1192
|
|
|
945
1193
|
if (!wrapper->node) {
|
|
946
1194
|
return children;
|
|
947
1195
|
}
|
|
948
1196
|
|
|
1197
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
949
1198
|
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
950
1199
|
XMLSize_t count = child_nodes->getLength();
|
|
951
1200
|
|
|
@@ -959,6 +1208,65 @@ static VALUE node_element_children(VALUE self) {
|
|
|
959
1208
|
return children;
|
|
960
1209
|
}
|
|
961
1210
|
|
|
1211
|
+
// node.first_element_child - returns first element child
|
|
1212
|
+
static VALUE node_first_element_child(VALUE self) {
|
|
1213
|
+
NodeWrapper* wrapper;
|
|
1214
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1215
|
+
|
|
1216
|
+
if (!wrapper->node) {
|
|
1217
|
+
return Qnil;
|
|
1218
|
+
}
|
|
1219
|
+
|
|
1220
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1221
|
+
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
1222
|
+
XMLSize_t count = child_nodes->getLength();
|
|
1223
|
+
|
|
1224
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
1225
|
+
DOMNode* child = child_nodes->item(i);
|
|
1226
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
1227
|
+
return wrap_node(child, doc_ref);
|
|
1228
|
+
}
|
|
1229
|
+
}
|
|
1230
|
+
|
|
1231
|
+
return Qnil;
|
|
1232
|
+
}
|
|
1233
|
+
|
|
1234
|
+
// node.last_element_child - returns last element child
|
|
1235
|
+
static VALUE node_last_element_child(VALUE self) {
|
|
1236
|
+
NodeWrapper* wrapper;
|
|
1237
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1238
|
+
|
|
1239
|
+
if (!wrapper->node) {
|
|
1240
|
+
return Qnil;
|
|
1241
|
+
}
|
|
1242
|
+
|
|
1243
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1244
|
+
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
1245
|
+
XMLSize_t count = child_nodes->getLength();
|
|
1246
|
+
|
|
1247
|
+
// Search backwards for last element
|
|
1248
|
+
for (XMLSize_t i = count; i > 0; i--) {
|
|
1249
|
+
DOMNode* child = child_nodes->item(i - 1);
|
|
1250
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
1251
|
+
return wrap_node(child, doc_ref);
|
|
1252
|
+
}
|
|
1253
|
+
}
|
|
1254
|
+
|
|
1255
|
+
return Qnil;
|
|
1256
|
+
}
|
|
1257
|
+
|
|
1258
|
+
// node.document - returns the document that owns this node
|
|
1259
|
+
static VALUE node_document(VALUE self) {
|
|
1260
|
+
NodeWrapper* wrapper;
|
|
1261
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1262
|
+
|
|
1263
|
+
if (!wrapper->node) {
|
|
1264
|
+
return Qnil;
|
|
1265
|
+
}
|
|
1266
|
+
|
|
1267
|
+
return wrapper->doc_ref;
|
|
1268
|
+
}
|
|
1269
|
+
|
|
962
1270
|
// node.parent
|
|
963
1271
|
static VALUE node_parent(VALUE self) {
|
|
964
1272
|
NodeWrapper* wrapper;
|
|
@@ -973,7 +1281,7 @@ static VALUE node_parent(VALUE self) {
|
|
|
973
1281
|
return Qnil;
|
|
974
1282
|
}
|
|
975
1283
|
|
|
976
|
-
VALUE doc_ref =
|
|
1284
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
977
1285
|
return wrap_node(parent, doc_ref);
|
|
978
1286
|
}
|
|
979
1287
|
|
|
@@ -991,7 +1299,7 @@ static VALUE node_ancestors(int argc, VALUE* argv, VALUE self) {
|
|
|
991
1299
|
return ancestors;
|
|
992
1300
|
}
|
|
993
1301
|
|
|
994
|
-
VALUE doc_ref =
|
|
1302
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
995
1303
|
DOMNode* current = wrapper->node->getParentNode();
|
|
996
1304
|
|
|
997
1305
|
// Walk up the tree, collecting all ancestors
|
|
@@ -1100,7 +1408,7 @@ static VALUE node_next_sibling(VALUE self) {
|
|
|
1100
1408
|
return Qnil;
|
|
1101
1409
|
}
|
|
1102
1410
|
|
|
1103
|
-
VALUE doc_ref =
|
|
1411
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1104
1412
|
return wrap_node(next, doc_ref);
|
|
1105
1413
|
}
|
|
1106
1414
|
|
|
@@ -1118,7 +1426,7 @@ static VALUE node_previous_sibling(VALUE self) {
|
|
|
1118
1426
|
return Qnil;
|
|
1119
1427
|
}
|
|
1120
1428
|
|
|
1121
|
-
VALUE doc_ref =
|
|
1429
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1122
1430
|
return wrap_node(prev, doc_ref);
|
|
1123
1431
|
}
|
|
1124
1432
|
|
|
@@ -1131,7 +1439,7 @@ static VALUE node_next_element(VALUE self) {
|
|
|
1131
1439
|
return Qnil;
|
|
1132
1440
|
}
|
|
1133
1441
|
|
|
1134
|
-
VALUE doc_ref =
|
|
1442
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1135
1443
|
DOMNode* next = wrapper->node->getNextSibling();
|
|
1136
1444
|
|
|
1137
1445
|
// Skip non-element nodes
|
|
@@ -1155,7 +1463,7 @@ static VALUE node_previous_element(VALUE self) {
|
|
|
1155
1463
|
return Qnil;
|
|
1156
1464
|
}
|
|
1157
1465
|
|
|
1158
|
-
VALUE doc_ref =
|
|
1466
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1159
1467
|
DOMNode* prev = wrapper->node->getPreviousSibling();
|
|
1160
1468
|
|
|
1161
1469
|
// Skip non-element nodes
|
|
@@ -1185,6 +1493,7 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1185
1493
|
}
|
|
1186
1494
|
|
|
1187
1495
|
DOMNode* child_node = NULL;
|
|
1496
|
+
bool needs_import = false;
|
|
1188
1497
|
|
|
1189
1498
|
// Check if child is a string or a node
|
|
1190
1499
|
if (TYPE(child) == T_STRING) {
|
|
@@ -1199,6 +1508,13 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1199
1508
|
if (rb_obj_is_kind_of(child, rb_cNode)) {
|
|
1200
1509
|
TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
|
|
1201
1510
|
child_node = child_wrapper->node;
|
|
1511
|
+
|
|
1512
|
+
// Check if child belongs to a different document
|
|
1513
|
+
DOMDocument* child_doc = child_node->getOwnerDocument();
|
|
1514
|
+
if (child_doc && child_doc != doc) {
|
|
1515
|
+
rb_raise(rb_eRuntimeError,
|
|
1516
|
+
"Node belongs to a different document. Use importNode to adopt nodes from other documents.");
|
|
1517
|
+
}
|
|
1202
1518
|
} else {
|
|
1203
1519
|
rb_raise(rb_eTypeError, "Argument must be a String or Node");
|
|
1204
1520
|
}
|
|
@@ -1209,12 +1525,24 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
1209
1525
|
}
|
|
1210
1526
|
|
|
1211
1527
|
try {
|
|
1528
|
+
// appendChild will automatically detach the node from its current parent if it has one
|
|
1212
1529
|
wrapper->node->appendChild(child_node);
|
|
1213
1530
|
} catch (const DOMException& e) {
|
|
1214
1531
|
char* message = XMLString::transcode(e.getMessage());
|
|
1215
1532
|
VALUE rb_error = rb_str_new_cstr(message);
|
|
1216
1533
|
XMLString::release(&message);
|
|
1217
|
-
|
|
1534
|
+
|
|
1535
|
+
// Provide more context for common errors
|
|
1536
|
+
unsigned short code = e.code;
|
|
1537
|
+
if (code == DOMException::WRONG_DOCUMENT_ERR) {
|
|
1538
|
+
rb_raise(rb_eRuntimeError, "Node belongs to a different document: %s", StringValueCStr(rb_error));
|
|
1539
|
+
} else if (code == DOMException::HIERARCHY_REQUEST_ERR) {
|
|
1540
|
+
rb_raise(rb_eRuntimeError, "Invalid hierarchy: cannot add this node as a child: %s", StringValueCStr(rb_error));
|
|
1541
|
+
} else if (code == DOMException::NO_MODIFICATION_ALLOWED_ERR) {
|
|
1542
|
+
rb_raise(rb_eRuntimeError, "Node is read-only: %s", StringValueCStr(rb_error));
|
|
1543
|
+
} else {
|
|
1544
|
+
rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
|
|
1545
|
+
}
|
|
1218
1546
|
}
|
|
1219
1547
|
|
|
1220
1548
|
return child;
|
|
@@ -1256,7 +1584,8 @@ static VALUE node_inner_html(VALUE self) {
|
|
|
1256
1584
|
}
|
|
1257
1585
|
|
|
1258
1586
|
try {
|
|
1259
|
-
|
|
1587
|
+
XStr ls_name("LS");
|
|
1588
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(ls_name.unicodeForm());
|
|
1260
1589
|
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
1261
1590
|
|
|
1262
1591
|
// Build a string by serializing each child
|
|
@@ -1421,7 +1750,7 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
1421
1750
|
|
|
1422
1751
|
Check_Type(path, T_STRING);
|
|
1423
1752
|
const char* xpath_str = StringValueCStr(path);
|
|
1424
|
-
VALUE doc_ref =
|
|
1753
|
+
VALUE doc_ref = node_wrapper->doc_ref;
|
|
1425
1754
|
|
|
1426
1755
|
#ifdef HAVE_XALAN
|
|
1427
1756
|
// Use Xalan for full XPath 1.0 support
|
|
@@ -1437,8 +1766,9 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
1437
1766
|
}
|
|
1438
1767
|
|
|
1439
1768
|
DOMXPathNSResolver* resolver = doc->createNSResolver(node_wrapper->node);
|
|
1769
|
+
XStr xpath_xstr(xpath_str);
|
|
1440
1770
|
DOMXPathExpression* expression = doc->createExpression(
|
|
1441
|
-
|
|
1771
|
+
xpath_xstr.unicodeForm(), resolver);
|
|
1442
1772
|
|
|
1443
1773
|
DOMXPathResult* result = expression->evaluate(
|
|
1444
1774
|
node_wrapper->node,
|
|
@@ -1919,18 +2249,7 @@ static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
|
|
|
1919
2249
|
VALUE schema_source;
|
|
1920
2250
|
rb_scan_args(argc, argv, "1", &schema_source);
|
|
1921
2251
|
|
|
1922
|
-
|
|
1923
|
-
if (!xerces_initialized) {
|
|
1924
|
-
try {
|
|
1925
|
-
XMLPlatformUtils::Initialize();
|
|
1926
|
-
xerces_initialized = true;
|
|
1927
|
-
} catch (const XMLException& e) {
|
|
1928
|
-
char* message = XMLString::transcode(e.getMessage());
|
|
1929
|
-
VALUE rb_error = rb_str_new_cstr(message);
|
|
1930
|
-
XMLString::release(&message);
|
|
1931
|
-
rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
|
|
1932
|
-
}
|
|
1933
|
-
}
|
|
2252
|
+
ensure_xerces_initialized();
|
|
1934
2253
|
|
|
1935
2254
|
try {
|
|
1936
2255
|
SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
|
|
@@ -2103,16 +2422,24 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2103
2422
|
rb_undef_alloc_func(rb_cDocument);
|
|
2104
2423
|
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
|
|
2105
2424
|
rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
|
|
2425
|
+
rb_define_method(rb_cDocument, "errors", RUBY_METHOD_FUNC(document_errors), 0);
|
|
2106
2426
|
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
2107
2427
|
rb_define_alias(rb_cDocument, "to_xml", "to_s");
|
|
2108
2428
|
rb_define_method(rb_cDocument, "inspect", RUBY_METHOD_FUNC(document_inspect), 0);
|
|
2109
2429
|
rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
|
|
2430
|
+
rb_define_method(rb_cDocument, "at_xpath", RUBY_METHOD_FUNC(document_at_xpath), 1);
|
|
2431
|
+
rb_define_alias(rb_cDocument, "at", "at_xpath");
|
|
2110
2432
|
rb_define_method(rb_cDocument, "css", RUBY_METHOD_FUNC(document_css), 1);
|
|
2111
2433
|
rb_define_method(rb_cDocument, "at_css", RUBY_METHOD_FUNC(document_at_css), 1);
|
|
2112
2434
|
rb_define_method(rb_cDocument, "encoding", RUBY_METHOD_FUNC(document_encoding), 0);
|
|
2113
2435
|
rb_define_method(rb_cDocument, "text", RUBY_METHOD_FUNC(document_text), 0);
|
|
2114
2436
|
rb_define_alias(rb_cDocument, "content", "text");
|
|
2115
2437
|
rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
|
|
2438
|
+
rb_define_method(rb_cDocument, "children", RUBY_METHOD_FUNC(document_children), 0);
|
|
2439
|
+
rb_define_method(rb_cDocument, "element_children", RUBY_METHOD_FUNC(document_element_children), 0);
|
|
2440
|
+
rb_define_alias(rb_cDocument, "elements", "element_children");
|
|
2441
|
+
rb_define_method(rb_cDocument, "first_element_child", RUBY_METHOD_FUNC(document_first_element_child), 0);
|
|
2442
|
+
rb_define_method(rb_cDocument, "last_element_child", RUBY_METHOD_FUNC(document_last_element_child), 0);
|
|
2116
2443
|
|
|
2117
2444
|
rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
|
|
2118
2445
|
rb_undef_alloc_func(rb_cNode);
|
|
@@ -2131,6 +2458,9 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2131
2458
|
rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
|
|
2132
2459
|
rb_define_method(rb_cNode, "element_children", RUBY_METHOD_FUNC(node_element_children), 0);
|
|
2133
2460
|
rb_define_alias(rb_cNode, "elements", "element_children");
|
|
2461
|
+
rb_define_method(rb_cNode, "first_element_child", RUBY_METHOD_FUNC(node_first_element_child), 0);
|
|
2462
|
+
rb_define_method(rb_cNode, "last_element_child", RUBY_METHOD_FUNC(node_last_element_child), 0);
|
|
2463
|
+
rb_define_method(rb_cNode, "document", RUBY_METHOD_FUNC(node_document), 0);
|
|
2134
2464
|
rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
|
|
2135
2465
|
rb_define_method(rb_cNode, "ancestors", RUBY_METHOD_FUNC(node_ancestors), -1);
|
|
2136
2466
|
rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
|
|
@@ -2182,4 +2512,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
2182
2512
|
rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
|
|
2183
2513
|
|
|
2184
2514
|
rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
|
|
2515
|
+
|
|
2516
|
+
// Register cleanup handler
|
|
2517
|
+
atexit(cleanup_xerces);
|
|
2185
2518
|
}
|