rxerces 0.4.0 → 0.6.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- checksums.yaml.gz.sig +0 -0
- data/CHANGES.md +19 -0
- data/README.md +14 -3
- data/benchmarks/README.md +68 -0
- data/benchmarks/css_benchmark.rb +115 -0
- data/benchmarks/parse_benchmark.rb +103 -0
- data/benchmarks/run_all.rb +25 -0
- data/benchmarks/serialization_benchmark.rb +93 -0
- data/benchmarks/traversal_benchmark.rb +149 -0
- data/benchmarks/xpath_benchmark.rb +100 -0
- data/ext/rxerces/rxerces.cpp +977 -50
- data/lib/rxerces/nokogiri.rb +26 -0
- data/lib/rxerces/version.rb +1 -1
- data/rxerces.gemspec +1 -1
- data/spec/document_spec.rb +117 -0
- data/spec/node_spec.rb +408 -4
- data/spec/nodeset_spec.rb +59 -0
- data/spec/nokogiri_compatibility_spec.rb +44 -0
- data/spec/rxerces_shared.rb +1 -1
- data.tar.gz.sig +0 -0
- metadata +8 -1
- metadata.gz.sig +0 -0
data/ext/rxerces/rxerces.cpp
CHANGED
|
@@ -51,6 +51,47 @@ static bool xerces_initialized = false;
|
|
|
51
51
|
static bool xalan_initialized = false;
|
|
52
52
|
#endif
|
|
53
53
|
|
|
54
|
+
// Forward declarations
|
|
55
|
+
static std::string css_to_xpath(const char* css);
|
|
56
|
+
static VALUE node_css(VALUE self, VALUE selector);
|
|
57
|
+
static VALUE node_xpath(VALUE self, VALUE path);
|
|
58
|
+
static VALUE document_xpath(VALUE self, VALUE path);
|
|
59
|
+
|
|
60
|
+
// Initialize Xerces (and Xalan if available) exactly once
|
|
61
|
+
static void ensure_xerces_initialized() {
|
|
62
|
+
if (xerces_initialized) {
|
|
63
|
+
return;
|
|
64
|
+
}
|
|
65
|
+
|
|
66
|
+
try {
|
|
67
|
+
XMLPlatformUtils::Initialize();
|
|
68
|
+
#ifdef HAVE_XALAN
|
|
69
|
+
XPathEvaluator::initialize();
|
|
70
|
+
xalan_initialized = true;
|
|
71
|
+
#endif
|
|
72
|
+
xerces_initialized = true;
|
|
73
|
+
} catch (const XMLException& e) {
|
|
74
|
+
char* message = XMLString::transcode(e.getMessage());
|
|
75
|
+
std::string error_msg = std::string("Xerces initialization failed: ") + message;
|
|
76
|
+
XMLString::release(&message);
|
|
77
|
+
rb_raise(rb_eRuntimeError, "%s", error_msg.c_str());
|
|
78
|
+
}
|
|
79
|
+
}
|
|
80
|
+
|
|
81
|
+
// Cleanup function called at exit
|
|
82
|
+
static void cleanup_xerces() {
|
|
83
|
+
#ifdef HAVE_XALAN
|
|
84
|
+
if (xalan_initialized) {
|
|
85
|
+
XPathEvaluator::terminate();
|
|
86
|
+
xalan_initialized = false;
|
|
87
|
+
}
|
|
88
|
+
#endif
|
|
89
|
+
if (xerces_initialized) {
|
|
90
|
+
XMLPlatformUtils::Terminate();
|
|
91
|
+
xerces_initialized = false;
|
|
92
|
+
}
|
|
93
|
+
}
|
|
94
|
+
|
|
54
95
|
// Helper class to manage XMLCh strings
|
|
55
96
|
class XStr {
|
|
56
97
|
public:
|
|
@@ -97,6 +138,7 @@ private:
|
|
|
97
138
|
typedef struct {
|
|
98
139
|
DOMDocument* doc;
|
|
99
140
|
XercesDOMParser* parser;
|
|
141
|
+
std::vector<std::string>* parse_errors;
|
|
100
142
|
} DocumentWrapper;
|
|
101
143
|
|
|
102
144
|
// Wrapper structure for DOMNode
|
|
@@ -122,19 +164,34 @@ public:
|
|
|
122
164
|
|
|
123
165
|
void warning(const SAXParseException& e) {
|
|
124
166
|
char* msg = XMLString::transcode(e.getMessage());
|
|
125
|
-
|
|
167
|
+
char buffer[512];
|
|
168
|
+
snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
|
|
169
|
+
(unsigned long)e.getLineNumber(),
|
|
170
|
+
(unsigned long)e.getColumnNumber(),
|
|
171
|
+
msg);
|
|
172
|
+
errors.push_back(buffer);
|
|
126
173
|
XMLString::release(&msg);
|
|
127
174
|
}
|
|
128
175
|
|
|
129
176
|
void error(const SAXParseException& e) {
|
|
130
177
|
char* msg = XMLString::transcode(e.getMessage());
|
|
131
|
-
|
|
178
|
+
char buffer[512];
|
|
179
|
+
snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
|
|
180
|
+
(unsigned long)e.getLineNumber(),
|
|
181
|
+
(unsigned long)e.getColumnNumber(),
|
|
182
|
+
msg);
|
|
183
|
+
errors.push_back(buffer);
|
|
132
184
|
XMLString::release(&msg);
|
|
133
185
|
}
|
|
134
186
|
|
|
135
187
|
void fatalError(const SAXParseException& e) {
|
|
136
188
|
char* msg = XMLString::transcode(e.getMessage());
|
|
137
|
-
|
|
189
|
+
char buffer[512];
|
|
190
|
+
snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
|
|
191
|
+
(unsigned long)e.getLineNumber(),
|
|
192
|
+
(unsigned long)e.getColumnNumber(),
|
|
193
|
+
msg);
|
|
194
|
+
errors.push_back(buffer);
|
|
138
195
|
XMLString::release(&msg);
|
|
139
196
|
}
|
|
140
197
|
|
|
@@ -143,6 +200,55 @@ public:
|
|
|
143
200
|
}
|
|
144
201
|
};
|
|
145
202
|
|
|
203
|
+
// Error handler for parsing - stores errors but doesn't throw
|
|
204
|
+
class ParseErrorHandler : public ErrorHandler {
|
|
205
|
+
public:
|
|
206
|
+
std::vector<std::string>* errors;
|
|
207
|
+
bool has_fatal;
|
|
208
|
+
|
|
209
|
+
ParseErrorHandler(std::vector<std::string>* error_vec)
|
|
210
|
+
: errors(error_vec), has_fatal(false) {}
|
|
211
|
+
|
|
212
|
+
void warning(const SAXParseException& e) {
|
|
213
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
214
|
+
char buffer[512];
|
|
215
|
+
snprintf(buffer, sizeof(buffer), "Warning at line %lu, column %lu: %s",
|
|
216
|
+
(unsigned long)e.getLineNumber(),
|
|
217
|
+
(unsigned long)e.getColumnNumber(),
|
|
218
|
+
msg);
|
|
219
|
+
errors->push_back(buffer);
|
|
220
|
+
XMLString::release(&msg);
|
|
221
|
+
}
|
|
222
|
+
|
|
223
|
+
void error(const SAXParseException& e) {
|
|
224
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
225
|
+
char buffer[512];
|
|
226
|
+
snprintf(buffer, sizeof(buffer), "Error at line %lu, column %lu: %s",
|
|
227
|
+
(unsigned long)e.getLineNumber(),
|
|
228
|
+
(unsigned long)e.getColumnNumber(),
|
|
229
|
+
msg);
|
|
230
|
+
errors->push_back(buffer);
|
|
231
|
+
XMLString::release(&msg);
|
|
232
|
+
}
|
|
233
|
+
|
|
234
|
+
void fatalError(const SAXParseException& e) {
|
|
235
|
+
has_fatal = true;
|
|
236
|
+
char* msg = XMLString::transcode(e.getMessage());
|
|
237
|
+
char buffer[512];
|
|
238
|
+
snprintf(buffer, sizeof(buffer), "Fatal error at line %lu, column %lu: %s",
|
|
239
|
+
(unsigned long)e.getLineNumber(),
|
|
240
|
+
(unsigned long)e.getColumnNumber(),
|
|
241
|
+
msg);
|
|
242
|
+
errors->push_back(buffer);
|
|
243
|
+
XMLString::release(&msg);
|
|
244
|
+
}
|
|
245
|
+
|
|
246
|
+
void resetErrors() {
|
|
247
|
+
errors->clear();
|
|
248
|
+
has_fatal = false;
|
|
249
|
+
}
|
|
250
|
+
};
|
|
251
|
+
|
|
146
252
|
// Memory management functions
|
|
147
253
|
static void document_free(void* ptr) {
|
|
148
254
|
DocumentWrapper* wrapper = (DocumentWrapper*)ptr;
|
|
@@ -150,6 +256,9 @@ static void document_free(void* ptr) {
|
|
|
150
256
|
if (wrapper->parser) {
|
|
151
257
|
delete wrapper->parser;
|
|
152
258
|
}
|
|
259
|
+
if (wrapper->parse_errors) {
|
|
260
|
+
delete wrapper->parse_errors;
|
|
261
|
+
}
|
|
153
262
|
// Document is owned by parser, so don't delete it separately
|
|
154
263
|
xfree(wrapper);
|
|
155
264
|
}
|
|
@@ -163,6 +272,13 @@ static void node_free(void* ptr) {
|
|
|
163
272
|
}
|
|
164
273
|
}
|
|
165
274
|
|
|
275
|
+
static void node_mark(void* ptr) {
|
|
276
|
+
NodeWrapper* wrapper = (NodeWrapper*)ptr;
|
|
277
|
+
if (wrapper) {
|
|
278
|
+
rb_gc_mark(wrapper->doc_ref);
|
|
279
|
+
}
|
|
280
|
+
}
|
|
281
|
+
|
|
166
282
|
static void nodeset_free(void* ptr) {
|
|
167
283
|
NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
|
|
168
284
|
if (wrapper) {
|
|
@@ -170,6 +286,13 @@ static void nodeset_free(void* ptr) {
|
|
|
170
286
|
}
|
|
171
287
|
}
|
|
172
288
|
|
|
289
|
+
static void nodeset_mark(void* ptr) {
|
|
290
|
+
NodeSetWrapper* wrapper = (NodeSetWrapper*)ptr;
|
|
291
|
+
if (wrapper) {
|
|
292
|
+
rb_gc_mark(wrapper->nodes_array);
|
|
293
|
+
}
|
|
294
|
+
}
|
|
295
|
+
|
|
173
296
|
static void schema_free(void* ptr) {
|
|
174
297
|
SchemaWrapper* wrapper = (SchemaWrapper*)ptr;
|
|
175
298
|
if (wrapper) {
|
|
@@ -205,14 +328,14 @@ static const rb_data_type_t document_type = {
|
|
|
205
328
|
|
|
206
329
|
static const rb_data_type_t node_type = {
|
|
207
330
|
"RXerces::XML::Node",
|
|
208
|
-
{
|
|
331
|
+
{node_mark, node_free, node_size},
|
|
209
332
|
0, 0,
|
|
210
333
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
211
334
|
};
|
|
212
335
|
|
|
213
336
|
static const rb_data_type_t nodeset_type = {
|
|
214
337
|
"RXerces::XML::NodeSet",
|
|
215
|
-
{
|
|
338
|
+
{nodeset_mark, nodeset_free, nodeset_size},
|
|
216
339
|
0, 0,
|
|
217
340
|
RUBY_TYPED_FREE_IMMEDIATELY
|
|
218
341
|
};
|
|
@@ -248,22 +371,12 @@ static VALUE wrap_node(DOMNode* node, VALUE doc_ref) {
|
|
|
248
371
|
break;
|
|
249
372
|
}
|
|
250
373
|
|
|
251
|
-
// Keep reference to document to prevent GC
|
|
252
|
-
rb_iv_set(rb_node, "@document", doc_ref);
|
|
253
|
-
|
|
254
374
|
return rb_node;
|
|
255
375
|
}
|
|
256
376
|
|
|
257
377
|
// RXerces::XML::Document.parse(string)
|
|
258
378
|
static VALUE document_parse(VALUE klass, VALUE str) {
|
|
259
|
-
|
|
260
|
-
try {
|
|
261
|
-
XMLPlatformUtils::Initialize();
|
|
262
|
-
xerces_initialized = true;
|
|
263
|
-
} catch (const XMLException& e) {
|
|
264
|
-
rb_raise(rb_eRuntimeError, "Xerces initialization failed");
|
|
265
|
-
}
|
|
266
|
-
}
|
|
379
|
+
ensure_xerces_initialized();
|
|
267
380
|
|
|
268
381
|
Check_Type(str, T_STRING);
|
|
269
382
|
const char* xml_str = StringValueCStr(str);
|
|
@@ -273,6 +386,11 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
273
386
|
parser->setDoNamespaces(true);
|
|
274
387
|
parser->setDoSchema(false);
|
|
275
388
|
|
|
389
|
+
// Set up error handler to capture parse errors
|
|
390
|
+
std::vector<std::string>* parse_errors = new std::vector<std::string>();
|
|
391
|
+
ParseErrorHandler error_handler(parse_errors);
|
|
392
|
+
parser->setErrorHandler(&error_handler);
|
|
393
|
+
|
|
276
394
|
try {
|
|
277
395
|
MemBufInputSource input((const XMLByte*)xml_str, strlen(xml_str), "memory");
|
|
278
396
|
parser->parse(input);
|
|
@@ -282,18 +400,33 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
282
400
|
DocumentWrapper* wrapper = ALLOC(DocumentWrapper);
|
|
283
401
|
wrapper->doc = doc;
|
|
284
402
|
wrapper->parser = parser;
|
|
403
|
+
wrapper->parse_errors = parse_errors;
|
|
285
404
|
|
|
286
405
|
VALUE rb_doc = TypedData_Wrap_Struct(rb_cDocument, &document_type, wrapper);
|
|
406
|
+
|
|
407
|
+
// If there were fatal errors, raise an exception with details
|
|
408
|
+
if (error_handler.has_fatal && !parse_errors->empty()) {
|
|
409
|
+
std::string all_errors;
|
|
410
|
+
for (const auto& err : *parse_errors) {
|
|
411
|
+
if (!all_errors.empty()) all_errors += "\n";
|
|
412
|
+
all_errors += err;
|
|
413
|
+
}
|
|
414
|
+
rb_raise(rb_eRuntimeError, "XML parsing failed:\n%s", all_errors.c_str());
|
|
415
|
+
}
|
|
416
|
+
|
|
287
417
|
return rb_doc;
|
|
288
418
|
} catch (const XMLException& e) {
|
|
289
419
|
CharStr message(e.getMessage());
|
|
420
|
+
delete parse_errors;
|
|
290
421
|
delete parser;
|
|
291
422
|
rb_raise(rb_eRuntimeError, "XML parsing error: %s", message.localForm());
|
|
292
423
|
} catch (const DOMException& e) {
|
|
293
424
|
CharStr message(e.getMessage());
|
|
425
|
+
delete parse_errors;
|
|
294
426
|
delete parser;
|
|
295
427
|
rb_raise(rb_eRuntimeError, "DOM error: %s", message.localForm());
|
|
296
428
|
} catch (...) {
|
|
429
|
+
delete parse_errors;
|
|
297
430
|
delete parser;
|
|
298
431
|
rb_raise(rb_eRuntimeError, "Unknown XML parsing error");
|
|
299
432
|
}
|
|
@@ -301,6 +434,22 @@ static VALUE document_parse(VALUE klass, VALUE str) {
|
|
|
301
434
|
return Qnil;
|
|
302
435
|
}
|
|
303
436
|
|
|
437
|
+
// document.errors - returns array of parse errors (warnings and errors)
|
|
438
|
+
static VALUE document_errors(VALUE self) {
|
|
439
|
+
DocumentWrapper* wrapper;
|
|
440
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
441
|
+
|
|
442
|
+
VALUE errors_array = rb_ary_new();
|
|
443
|
+
|
|
444
|
+
if (wrapper->parse_errors) {
|
|
445
|
+
for (const auto& error : *wrapper->parse_errors) {
|
|
446
|
+
rb_ary_push(errors_array, rb_str_new2(error.c_str()));
|
|
447
|
+
}
|
|
448
|
+
}
|
|
449
|
+
|
|
450
|
+
return errors_array;
|
|
451
|
+
}
|
|
452
|
+
|
|
304
453
|
// document.root
|
|
305
454
|
static VALUE document_root(VALUE self) {
|
|
306
455
|
DocumentWrapper* wrapper;
|
|
@@ -343,6 +492,45 @@ static VALUE document_to_s(VALUE self) {
|
|
|
343
492
|
return Qnil;
|
|
344
493
|
}
|
|
345
494
|
|
|
495
|
+
// document.inspect - human-readable representation
|
|
496
|
+
static VALUE document_inspect(VALUE self) {
|
|
497
|
+
DocumentWrapper* wrapper;
|
|
498
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
499
|
+
|
|
500
|
+
std::string result = "#<RXerces::XML::Document:0x";
|
|
501
|
+
|
|
502
|
+
// Add object ID
|
|
503
|
+
char buf[32];
|
|
504
|
+
snprintf(buf, sizeof(buf), "%016lx", (unsigned long)self);
|
|
505
|
+
result += buf;
|
|
506
|
+
|
|
507
|
+
if (!wrapper->doc) {
|
|
508
|
+
result += " (empty)>";
|
|
509
|
+
return rb_str_new_cstr(result.c_str());
|
|
510
|
+
}
|
|
511
|
+
|
|
512
|
+
// Add encoding
|
|
513
|
+
const XMLCh* encoding = wrapper->doc->getXmlEncoding();
|
|
514
|
+
if (encoding && XMLString::stringLen(encoding) > 0) {
|
|
515
|
+
CharStr utf8_encoding(encoding);
|
|
516
|
+
result += " encoding=\"";
|
|
517
|
+
result += utf8_encoding.localForm();
|
|
518
|
+
result += "\"";
|
|
519
|
+
}
|
|
520
|
+
|
|
521
|
+
// Add root element name
|
|
522
|
+
DOMElement* root = wrapper->doc->getDocumentElement();
|
|
523
|
+
if (root) {
|
|
524
|
+
CharStr rootName(root->getNodeName());
|
|
525
|
+
result += " root=<";
|
|
526
|
+
result += rootName.localForm();
|
|
527
|
+
result += ">";
|
|
528
|
+
}
|
|
529
|
+
|
|
530
|
+
result += ">";
|
|
531
|
+
return rb_str_new_cstr(result.c_str());
|
|
532
|
+
}
|
|
533
|
+
|
|
346
534
|
// document.encoding
|
|
347
535
|
static VALUE document_encoding(VALUE self) {
|
|
348
536
|
DocumentWrapper* wrapper;
|
|
@@ -362,6 +550,29 @@ static VALUE document_encoding(VALUE self) {
|
|
|
362
550
|
return rb_str_new_cstr(utf8_encoding.localForm());
|
|
363
551
|
}
|
|
364
552
|
|
|
553
|
+
// document.text / document.content - returns text content of entire document
|
|
554
|
+
static VALUE document_text(VALUE self) {
|
|
555
|
+
DocumentWrapper* wrapper;
|
|
556
|
+
TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
|
|
557
|
+
|
|
558
|
+
if (!wrapper->doc) {
|
|
559
|
+
return rb_str_new_cstr("");
|
|
560
|
+
}
|
|
561
|
+
|
|
562
|
+
DOMElement* root = wrapper->doc->getDocumentElement();
|
|
563
|
+
if (!root) {
|
|
564
|
+
return rb_str_new_cstr("");
|
|
565
|
+
}
|
|
566
|
+
|
|
567
|
+
const XMLCh* content = root->getTextContent();
|
|
568
|
+
if (!content) {
|
|
569
|
+
return rb_str_new_cstr("");
|
|
570
|
+
}
|
|
571
|
+
|
|
572
|
+
CharStr utf8_content(content);
|
|
573
|
+
return rb_str_new_cstr(utf8_content.localForm());
|
|
574
|
+
}
|
|
575
|
+
|
|
365
576
|
// document.create_element(name)
|
|
366
577
|
static VALUE document_create_element(VALUE self, VALUE name) {
|
|
367
578
|
DocumentWrapper* doc_wrapper;
|
|
@@ -400,14 +611,9 @@ static VALUE document_create_element(VALUE self, VALUE name) {
|
|
|
400
611
|
#ifdef HAVE_XALAN
|
|
401
612
|
// Helper function to execute XPath using Xalan for full XPath 1.0 support
|
|
402
613
|
static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
|
|
403
|
-
|
|
404
|
-
// Initialize Xalan if needed
|
|
405
|
-
if (!xalan_initialized) {
|
|
406
|
-
XPathEvaluator::initialize();
|
|
407
|
-
XMLPlatformUtils::Initialize();
|
|
408
|
-
xalan_initialized = true;
|
|
409
|
-
}
|
|
614
|
+
ensure_xerces_initialized();
|
|
410
615
|
|
|
616
|
+
try {
|
|
411
617
|
// Get the document
|
|
412
618
|
DOMDocument* domDoc = context_node->getOwnerDocument();
|
|
413
619
|
if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
|
|
@@ -532,8 +738,9 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
532
738
|
}
|
|
533
739
|
|
|
534
740
|
DOMXPathNSResolver* resolver = doc_wrapper->doc->createNSResolver(root);
|
|
741
|
+
XStr xpath_xstr(xpath_str);
|
|
535
742
|
DOMXPathExpression* expression = doc_wrapper->doc->createExpression(
|
|
536
|
-
|
|
743
|
+
xpath_xstr.unicodeForm(), resolver);
|
|
537
744
|
|
|
538
745
|
DOMXPathResult* result = expression->evaluate(
|
|
539
746
|
doc_wrapper->doc->getDocumentElement(),
|
|
@@ -575,6 +782,140 @@ static VALUE document_xpath(VALUE self, VALUE path) {
|
|
|
575
782
|
#endif
|
|
576
783
|
}
|
|
577
784
|
|
|
785
|
+
// document.css(selector) - Convert CSS to XPath and execute
|
|
786
|
+
static VALUE document_css(VALUE self, VALUE selector) {
|
|
787
|
+
Check_Type(selector, T_STRING);
|
|
788
|
+
const char* css_str = StringValueCStr(selector);
|
|
789
|
+
|
|
790
|
+
// Convert CSS to XPath
|
|
791
|
+
std::string xpath_str = css_to_xpath(css_str);
|
|
792
|
+
|
|
793
|
+
// Call the xpath method with converted selector
|
|
794
|
+
return document_xpath(self, rb_str_new2(xpath_str.c_str()));
|
|
795
|
+
}
|
|
796
|
+
|
|
797
|
+
// document.at_css(selector) - Returns first matching node
|
|
798
|
+
static VALUE document_at_css(VALUE self, VALUE selector) {
|
|
799
|
+
VALUE nodeset = document_css(self, selector);
|
|
800
|
+
|
|
801
|
+
NodeSetWrapper* wrapper;
|
|
802
|
+
TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
|
|
803
|
+
|
|
804
|
+
if (RARRAY_LEN(wrapper->nodes_array) == 0) {
|
|
805
|
+
return Qnil;
|
|
806
|
+
}
|
|
807
|
+
|
|
808
|
+
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
809
|
+
}
|
|
810
|
+
|
|
811
|
+
// node.inspect - human-readable representation
|
|
812
|
+
static VALUE node_inspect(VALUE self) {
|
|
813
|
+
NodeWrapper* wrapper;
|
|
814
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
815
|
+
|
|
816
|
+
if (!wrapper->node) {
|
|
817
|
+
return rb_str_new_cstr("#<RXerces::XML::Node (nil)>");
|
|
818
|
+
}
|
|
819
|
+
|
|
820
|
+
DOMNode::NodeType nodeType = wrapper->node->getNodeType();
|
|
821
|
+
std::string result;
|
|
822
|
+
|
|
823
|
+
// Add object ID
|
|
824
|
+
char buf[32];
|
|
825
|
+
snprintf(buf, sizeof(buf), "%016lx", (unsigned long)self);
|
|
826
|
+
|
|
827
|
+
if (nodeType == DOMNode::ELEMENT_NODE) {
|
|
828
|
+
result = "#<RXerces::XML::Element:0x";
|
|
829
|
+
result += buf;
|
|
830
|
+
result += " <";
|
|
831
|
+
|
|
832
|
+
CharStr name(wrapper->node->getNodeName());
|
|
833
|
+
result += name.localForm();
|
|
834
|
+
|
|
835
|
+
// Add attributes
|
|
836
|
+
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
837
|
+
if (element) {
|
|
838
|
+
DOMNamedNodeMap* attributes = element->getAttributes();
|
|
839
|
+
if (attributes && attributes->getLength() > 0) {
|
|
840
|
+
XMLSize_t attrLen = attributes->getLength();
|
|
841
|
+
if (attrLen > 3) attrLen = 3;
|
|
842
|
+
|
|
843
|
+
for (XMLSize_t i = 0; i < attrLen; i++) {
|
|
844
|
+
DOMNode* attr = attributes->item(i);
|
|
845
|
+
CharStr attrName(attr->getNodeName());
|
|
846
|
+
CharStr attrValue(attr->getNodeValue());
|
|
847
|
+
result += " ";
|
|
848
|
+
result += attrName.localForm();
|
|
849
|
+
result += "=\"";
|
|
850
|
+
result += attrValue.localForm();
|
|
851
|
+
result += "\"";
|
|
852
|
+
}
|
|
853
|
+
if (attributes->getLength() > 3) {
|
|
854
|
+
result += " ...";
|
|
855
|
+
}
|
|
856
|
+
}
|
|
857
|
+
}
|
|
858
|
+
|
|
859
|
+
result += ">";
|
|
860
|
+
|
|
861
|
+
// Add truncated text content
|
|
862
|
+
const XMLCh* textContent = wrapper->node->getTextContent();
|
|
863
|
+
if (textContent && XMLString::stringLen(textContent) > 0) {
|
|
864
|
+
CharStr text(textContent);
|
|
865
|
+
std::string textStr = text.localForm();
|
|
866
|
+
|
|
867
|
+
size_t start = textStr.find_first_not_of(" \t\n\r");
|
|
868
|
+
if (start != std::string::npos) {
|
|
869
|
+
size_t end = textStr.find_last_not_of(" \t\n\r");
|
|
870
|
+
textStr = textStr.substr(start, end - start + 1);
|
|
871
|
+
|
|
872
|
+
if (textStr.length() > 40) {
|
|
873
|
+
textStr = textStr.substr(0, 37) + "...";
|
|
874
|
+
}
|
|
875
|
+
|
|
876
|
+
result += "\"";
|
|
877
|
+
result += textStr;
|
|
878
|
+
result += "\"";
|
|
879
|
+
}
|
|
880
|
+
}
|
|
881
|
+
|
|
882
|
+
result += ">";
|
|
883
|
+
} else if (nodeType == DOMNode::TEXT_NODE) {
|
|
884
|
+
result = "#<RXerces::XML::Text:0x";
|
|
885
|
+
result += buf;
|
|
886
|
+
result += " \"";
|
|
887
|
+
|
|
888
|
+
const XMLCh* textContent = wrapper->node->getNodeValue();
|
|
889
|
+
if (textContent) {
|
|
890
|
+
CharStr text(textContent);
|
|
891
|
+
std::string textStr = text.localForm();
|
|
892
|
+
|
|
893
|
+
size_t start = textStr.find_first_not_of(" \t\n\r");
|
|
894
|
+
if (start != std::string::npos) {
|
|
895
|
+
size_t end = textStr.find_last_not_of(" \t\n\r");
|
|
896
|
+
textStr = textStr.substr(start, end - start + 1);
|
|
897
|
+
|
|
898
|
+
if (textStr.length() > 40) {
|
|
899
|
+
textStr = textStr.substr(0, 37) + "...";
|
|
900
|
+
}
|
|
901
|
+
|
|
902
|
+
result += textStr;
|
|
903
|
+
}
|
|
904
|
+
}
|
|
905
|
+
|
|
906
|
+
result += "\">";
|
|
907
|
+
} else {
|
|
908
|
+
result = "#<RXerces::XML::Node:0x";
|
|
909
|
+
result += buf;
|
|
910
|
+
result += " ";
|
|
911
|
+
CharStr name(wrapper->node->getNodeName());
|
|
912
|
+
result += name.localForm();
|
|
913
|
+
result += ">";
|
|
914
|
+
}
|
|
915
|
+
|
|
916
|
+
return rb_str_new_cstr(result.c_str());
|
|
917
|
+
}
|
|
918
|
+
|
|
578
919
|
// node.name
|
|
579
920
|
static VALUE node_name(VALUE self) {
|
|
580
921
|
NodeWrapper* wrapper;
|
|
@@ -638,7 +979,8 @@ static VALUE node_text_set(VALUE self, VALUE text) {
|
|
|
638
979
|
Check_Type(text, T_STRING);
|
|
639
980
|
const char* text_str = StringValueCStr(text);
|
|
640
981
|
|
|
641
|
-
|
|
982
|
+
XStr text_xstr(text_str);
|
|
983
|
+
wrapper->node->setTextContent(text_xstr.unicodeForm());
|
|
642
984
|
|
|
643
985
|
return text;
|
|
644
986
|
}
|
|
@@ -656,7 +998,8 @@ static VALUE node_get_attribute(VALUE self, VALUE attr_name) {
|
|
|
656
998
|
const char* attr_str = StringValueCStr(attr_name);
|
|
657
999
|
|
|
658
1000
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
659
|
-
|
|
1001
|
+
XStr attr_xstr(attr_str);
|
|
1002
|
+
const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
|
|
660
1003
|
|
|
661
1004
|
if (!value || XMLString::stringLen(value) == 0) {
|
|
662
1005
|
return Qnil;
|
|
@@ -682,23 +1025,49 @@ static VALUE node_set_attribute(VALUE self, VALUE attr_name, VALUE attr_value) {
|
|
|
682
1025
|
const char* value_str = StringValueCStr(attr_value);
|
|
683
1026
|
|
|
684
1027
|
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
685
|
-
|
|
1028
|
+
XStr attr_xstr(attr_str);
|
|
1029
|
+
XStr value_xstr(value_str);
|
|
1030
|
+
element->setAttribute(attr_xstr.unicodeForm(), value_xstr.unicodeForm());
|
|
686
1031
|
|
|
687
1032
|
return attr_value;
|
|
688
1033
|
}
|
|
689
1034
|
|
|
1035
|
+
// node.has_attribute?(attribute_name)
|
|
1036
|
+
static VALUE node_has_attribute_p(VALUE self, VALUE attr_name) {
|
|
1037
|
+
NodeWrapper* wrapper;
|
|
1038
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1039
|
+
|
|
1040
|
+
if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
1041
|
+
return Qfalse;
|
|
1042
|
+
}
|
|
1043
|
+
|
|
1044
|
+
Check_Type(attr_name, T_STRING);
|
|
1045
|
+
const char* attr_str = StringValueCStr(attr_name);
|
|
1046
|
+
|
|
1047
|
+
DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
|
|
1048
|
+
XStr attr_xstr(attr_str);
|
|
1049
|
+
const XMLCh* value = element->getAttribute(attr_xstr.unicodeForm());
|
|
1050
|
+
|
|
1051
|
+
if (!value || XMLString::stringLen(value) == 0) {
|
|
1052
|
+
return Qfalse;
|
|
1053
|
+
}
|
|
1054
|
+
|
|
1055
|
+
return Qtrue;
|
|
1056
|
+
}
|
|
1057
|
+
|
|
690
1058
|
// node.children
|
|
691
1059
|
static VALUE node_children(VALUE self) {
|
|
692
1060
|
NodeWrapper* wrapper;
|
|
693
1061
|
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
694
1062
|
|
|
695
|
-
VALUE doc_ref = rb_iv_get(self, "@document");
|
|
696
1063
|
VALUE children = rb_ary_new();
|
|
697
1064
|
|
|
698
1065
|
if (!wrapper->node) {
|
|
699
1066
|
return children;
|
|
700
1067
|
}
|
|
701
1068
|
|
|
1069
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1070
|
+
|
|
702
1071
|
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
703
1072
|
XMLSize_t count = child_nodes->getLength();
|
|
704
1073
|
|
|
@@ -710,6 +1079,31 @@ static VALUE node_children(VALUE self) {
|
|
|
710
1079
|
return children;
|
|
711
1080
|
}
|
|
712
1081
|
|
|
1082
|
+
// node.element_children - returns only element children (no text nodes)
|
|
1083
|
+
static VALUE node_element_children(VALUE self) {
|
|
1084
|
+
NodeWrapper* wrapper;
|
|
1085
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1086
|
+
|
|
1087
|
+
VALUE children = rb_ary_new();
|
|
1088
|
+
|
|
1089
|
+
if (!wrapper->node) {
|
|
1090
|
+
return children;
|
|
1091
|
+
}
|
|
1092
|
+
|
|
1093
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1094
|
+
DOMNodeList* child_nodes = wrapper->node->getChildNodes();
|
|
1095
|
+
XMLSize_t count = child_nodes->getLength();
|
|
1096
|
+
|
|
1097
|
+
for (XMLSize_t i = 0; i < count; i++) {
|
|
1098
|
+
DOMNode* child = child_nodes->item(i);
|
|
1099
|
+
if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
|
|
1100
|
+
rb_ary_push(children, wrap_node(child, doc_ref));
|
|
1101
|
+
}
|
|
1102
|
+
}
|
|
1103
|
+
|
|
1104
|
+
return children;
|
|
1105
|
+
}
|
|
1106
|
+
|
|
713
1107
|
// node.parent
|
|
714
1108
|
static VALUE node_parent(VALUE self) {
|
|
715
1109
|
NodeWrapper* wrapper;
|
|
@@ -724,10 +1118,82 @@ static VALUE node_parent(VALUE self) {
|
|
|
724
1118
|
return Qnil;
|
|
725
1119
|
}
|
|
726
1120
|
|
|
727
|
-
VALUE doc_ref =
|
|
1121
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
728
1122
|
return wrap_node(parent, doc_ref);
|
|
729
1123
|
}
|
|
730
1124
|
|
|
1125
|
+
// node.ancestors(selector = nil) - returns an array of all ancestor nodes, optionally filtered by selector
|
|
1126
|
+
static VALUE node_ancestors(int argc, VALUE* argv, VALUE self) {
|
|
1127
|
+
VALUE selector;
|
|
1128
|
+
rb_scan_args(argc, argv, "01", &selector);
|
|
1129
|
+
|
|
1130
|
+
NodeWrapper* wrapper;
|
|
1131
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1132
|
+
|
|
1133
|
+
VALUE ancestors = rb_ary_new();
|
|
1134
|
+
|
|
1135
|
+
if (!wrapper->node) {
|
|
1136
|
+
return ancestors;
|
|
1137
|
+
}
|
|
1138
|
+
|
|
1139
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1140
|
+
DOMNode* current = wrapper->node->getParentNode();
|
|
1141
|
+
|
|
1142
|
+
// Walk up the tree, collecting all ancestors
|
|
1143
|
+
while (current) {
|
|
1144
|
+
// Stop at the document node (don't include it in ancestors)
|
|
1145
|
+
if (current->getNodeType() == DOMNode::DOCUMENT_NODE) {
|
|
1146
|
+
break;
|
|
1147
|
+
}
|
|
1148
|
+
rb_ary_push(ancestors, wrap_node(current, doc_ref));
|
|
1149
|
+
current = current->getParentNode();
|
|
1150
|
+
}
|
|
1151
|
+
|
|
1152
|
+
// If selector is provided, filter the ancestors
|
|
1153
|
+
if (!NIL_P(selector)) {
|
|
1154
|
+
Check_Type(selector, T_STRING);
|
|
1155
|
+
const char* selector_str = StringValueCStr(selector);
|
|
1156
|
+
|
|
1157
|
+
// Convert CSS to XPath if needed (css_to_xpath adds // prefix)
|
|
1158
|
+
std::string xpath_str = css_to_xpath(selector_str);
|
|
1159
|
+
|
|
1160
|
+
// Get all matching nodes from the document
|
|
1161
|
+
VALUE all_matches = document_xpath(doc_ref, rb_str_new2(xpath_str.c_str()));
|
|
1162
|
+
|
|
1163
|
+
NodeSetWrapper* matches_wrapper;
|
|
1164
|
+
TypedData_Get_Struct(all_matches, NodeSetWrapper, &nodeset_type, matches_wrapper);
|
|
1165
|
+
|
|
1166
|
+
VALUE filtered = rb_ary_new();
|
|
1167
|
+
long ancestor_len = RARRAY_LEN(ancestors);
|
|
1168
|
+
long matches_len = RARRAY_LEN(matches_wrapper->nodes_array);
|
|
1169
|
+
|
|
1170
|
+
// For each ancestor, check if it's in the matches
|
|
1171
|
+
for (long i = 0; i < ancestor_len; i++) {
|
|
1172
|
+
VALUE ancestor = rb_ary_entry(ancestors, i);
|
|
1173
|
+
|
|
1174
|
+
NodeWrapper* ancestor_wrapper;
|
|
1175
|
+
TypedData_Get_Struct(ancestor, NodeWrapper, &node_type, ancestor_wrapper);
|
|
1176
|
+
|
|
1177
|
+
// Check if this ancestor node is in the matches
|
|
1178
|
+
for (long j = 0; j < matches_len; j++) {
|
|
1179
|
+
VALUE match = rb_ary_entry(matches_wrapper->nodes_array, j);
|
|
1180
|
+
NodeWrapper* match_wrapper;
|
|
1181
|
+
TypedData_Get_Struct(match, NodeWrapper, &node_type, match_wrapper);
|
|
1182
|
+
|
|
1183
|
+
// Compare the actual DOM nodes
|
|
1184
|
+
if (ancestor_wrapper->node == match_wrapper->node) {
|
|
1185
|
+
rb_ary_push(filtered, ancestor);
|
|
1186
|
+
break;
|
|
1187
|
+
}
|
|
1188
|
+
}
|
|
1189
|
+
}
|
|
1190
|
+
|
|
1191
|
+
return filtered;
|
|
1192
|
+
}
|
|
1193
|
+
|
|
1194
|
+
return ancestors;
|
|
1195
|
+
}
|
|
1196
|
+
|
|
731
1197
|
// node.attributes - returns hash of all attributes (only for element nodes)
|
|
732
1198
|
static VALUE node_attributes(VALUE self) {
|
|
733
1199
|
NodeWrapper* wrapper;
|
|
@@ -779,7 +1245,7 @@ static VALUE node_next_sibling(VALUE self) {
|
|
|
779
1245
|
return Qnil;
|
|
780
1246
|
}
|
|
781
1247
|
|
|
782
|
-
VALUE doc_ref =
|
|
1248
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
783
1249
|
return wrap_node(next, doc_ref);
|
|
784
1250
|
}
|
|
785
1251
|
|
|
@@ -797,7 +1263,55 @@ static VALUE node_previous_sibling(VALUE self) {
|
|
|
797
1263
|
return Qnil;
|
|
798
1264
|
}
|
|
799
1265
|
|
|
800
|
-
VALUE doc_ref =
|
|
1266
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1267
|
+
return wrap_node(prev, doc_ref);
|
|
1268
|
+
}
|
|
1269
|
+
|
|
1270
|
+
// node.next_element - next sibling that is an element (skipping text nodes)
|
|
1271
|
+
static VALUE node_next_element(VALUE self) {
|
|
1272
|
+
NodeWrapper* wrapper;
|
|
1273
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1274
|
+
|
|
1275
|
+
if (!wrapper->node) {
|
|
1276
|
+
return Qnil;
|
|
1277
|
+
}
|
|
1278
|
+
|
|
1279
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1280
|
+
DOMNode* next = wrapper->node->getNextSibling();
|
|
1281
|
+
|
|
1282
|
+
// Skip non-element nodes
|
|
1283
|
+
while (next && next->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
1284
|
+
next = next->getNextSibling();
|
|
1285
|
+
}
|
|
1286
|
+
|
|
1287
|
+
if (!next) {
|
|
1288
|
+
return Qnil;
|
|
1289
|
+
}
|
|
1290
|
+
|
|
1291
|
+
return wrap_node(next, doc_ref);
|
|
1292
|
+
}
|
|
1293
|
+
|
|
1294
|
+
// node.previous_element - previous sibling that is an element (skipping text nodes)
|
|
1295
|
+
static VALUE node_previous_element(VALUE self) {
|
|
1296
|
+
NodeWrapper* wrapper;
|
|
1297
|
+
TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
|
|
1298
|
+
|
|
1299
|
+
if (!wrapper->node) {
|
|
1300
|
+
return Qnil;
|
|
1301
|
+
}
|
|
1302
|
+
|
|
1303
|
+
VALUE doc_ref = wrapper->doc_ref;
|
|
1304
|
+
DOMNode* prev = wrapper->node->getPreviousSibling();
|
|
1305
|
+
|
|
1306
|
+
// Skip non-element nodes
|
|
1307
|
+
while (prev && prev->getNodeType() != DOMNode::ELEMENT_NODE) {
|
|
1308
|
+
prev = prev->getPreviousSibling();
|
|
1309
|
+
}
|
|
1310
|
+
|
|
1311
|
+
if (!prev) {
|
|
1312
|
+
return Qnil;
|
|
1313
|
+
}
|
|
1314
|
+
|
|
801
1315
|
return wrap_node(prev, doc_ref);
|
|
802
1316
|
}
|
|
803
1317
|
|
|
@@ -816,6 +1330,7 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
816
1330
|
}
|
|
817
1331
|
|
|
818
1332
|
DOMNode* child_node = NULL;
|
|
1333
|
+
bool needs_import = false;
|
|
819
1334
|
|
|
820
1335
|
// Check if child is a string or a node
|
|
821
1336
|
if (TYPE(child) == T_STRING) {
|
|
@@ -830,6 +1345,13 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
830
1345
|
if (rb_obj_is_kind_of(child, rb_cNode)) {
|
|
831
1346
|
TypedData_Get_Struct(child, NodeWrapper, &node_type, child_wrapper);
|
|
832
1347
|
child_node = child_wrapper->node;
|
|
1348
|
+
|
|
1349
|
+
// Check if child belongs to a different document
|
|
1350
|
+
DOMDocument* child_doc = child_node->getOwnerDocument();
|
|
1351
|
+
if (child_doc && child_doc != doc) {
|
|
1352
|
+
rb_raise(rb_eRuntimeError,
|
|
1353
|
+
"Node belongs to a different document. Use importNode to adopt nodes from other documents.");
|
|
1354
|
+
}
|
|
833
1355
|
} else {
|
|
834
1356
|
rb_raise(rb_eTypeError, "Argument must be a String or Node");
|
|
835
1357
|
}
|
|
@@ -840,12 +1362,24 @@ static VALUE node_add_child(VALUE self, VALUE child) {
|
|
|
840
1362
|
}
|
|
841
1363
|
|
|
842
1364
|
try {
|
|
1365
|
+
// appendChild will automatically detach the node from its current parent if it has one
|
|
843
1366
|
wrapper->node->appendChild(child_node);
|
|
844
1367
|
} catch (const DOMException& e) {
|
|
845
1368
|
char* message = XMLString::transcode(e.getMessage());
|
|
846
1369
|
VALUE rb_error = rb_str_new_cstr(message);
|
|
847
1370
|
XMLString::release(&message);
|
|
848
|
-
|
|
1371
|
+
|
|
1372
|
+
// Provide more context for common errors
|
|
1373
|
+
unsigned short code = e.code;
|
|
1374
|
+
if (code == DOMException::WRONG_DOCUMENT_ERR) {
|
|
1375
|
+
rb_raise(rb_eRuntimeError, "Node belongs to a different document: %s", StringValueCStr(rb_error));
|
|
1376
|
+
} else if (code == DOMException::HIERARCHY_REQUEST_ERR) {
|
|
1377
|
+
rb_raise(rb_eRuntimeError, "Invalid hierarchy: cannot add this node as a child: %s", StringValueCStr(rb_error));
|
|
1378
|
+
} else if (code == DOMException::NO_MODIFICATION_ALLOWED_ERR) {
|
|
1379
|
+
rb_raise(rb_eRuntimeError, "Node is read-only: %s", StringValueCStr(rb_error));
|
|
1380
|
+
} else {
|
|
1381
|
+
rb_raise(rb_eRuntimeError, "Failed to add child: %s", StringValueCStr(rb_error));
|
|
1382
|
+
}
|
|
849
1383
|
}
|
|
850
1384
|
|
|
851
1385
|
return child;
|
|
@@ -887,7 +1421,8 @@ static VALUE node_inner_html(VALUE self) {
|
|
|
887
1421
|
}
|
|
888
1422
|
|
|
889
1423
|
try {
|
|
890
|
-
|
|
1424
|
+
XStr ls_name("LS");
|
|
1425
|
+
DOMImplementation* impl = DOMImplementationRegistry::getDOMImplementation(ls_name.unicodeForm());
|
|
891
1426
|
DOMLSSerializer* serializer = ((DOMImplementationLS*)impl)->createLSSerializer();
|
|
892
1427
|
|
|
893
1428
|
// Build a string by serializing each child
|
|
@@ -1052,7 +1587,7 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
1052
1587
|
|
|
1053
1588
|
Check_Type(path, T_STRING);
|
|
1054
1589
|
const char* xpath_str = StringValueCStr(path);
|
|
1055
|
-
VALUE doc_ref =
|
|
1590
|
+
VALUE doc_ref = node_wrapper->doc_ref;
|
|
1056
1591
|
|
|
1057
1592
|
#ifdef HAVE_XALAN
|
|
1058
1593
|
// Use Xalan for full XPath 1.0 support
|
|
@@ -1068,8 +1603,9 @@ static VALUE node_xpath(VALUE self, VALUE path) {
|
|
|
1068
1603
|
}
|
|
1069
1604
|
|
|
1070
1605
|
DOMXPathNSResolver* resolver = doc->createNSResolver(node_wrapper->node);
|
|
1606
|
+
XStr xpath_xstr(xpath_str);
|
|
1071
1607
|
DOMXPathExpression* expression = doc->createExpression(
|
|
1072
|
-
|
|
1608
|
+
xpath_xstr.unicodeForm(), resolver);
|
|
1073
1609
|
|
|
1074
1610
|
DOMXPathResult* result = expression->evaluate(
|
|
1075
1611
|
node_wrapper->node,
|
|
@@ -1124,10 +1660,185 @@ static VALUE node_at_xpath(VALUE self, VALUE path) {
|
|
|
1124
1660
|
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
1125
1661
|
}
|
|
1126
1662
|
|
|
1127
|
-
// node.
|
|
1663
|
+
// node.at_css(selector) - returns first matching node or nil
|
|
1664
|
+
static VALUE node_at_css(VALUE self, VALUE selector) {
|
|
1665
|
+
VALUE nodeset = node_css(self, selector);
|
|
1666
|
+
NodeSetWrapper* wrapper;
|
|
1667
|
+
TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1668
|
+
|
|
1669
|
+
if (RARRAY_LEN(wrapper->nodes_array) == 0) {
|
|
1670
|
+
return Qnil;
|
|
1671
|
+
}
|
|
1672
|
+
|
|
1673
|
+
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
1674
|
+
}
|
|
1675
|
+
|
|
1676
|
+
// Helper function to convert basic CSS selectors to XPath
|
|
1677
|
+
// Supports common patterns like: tag, .class, #id, tag.class, tag#id, [attr], [attr=value]
|
|
1678
|
+
static std::string css_to_xpath(const char* css) {
|
|
1679
|
+
std::string selector(css);
|
|
1680
|
+
|
|
1681
|
+
// Trim whitespace
|
|
1682
|
+
size_t start = selector.find_first_not_of(" \t\n\r");
|
|
1683
|
+
size_t end = selector.find_last_not_of(" \t\n\r");
|
|
1684
|
+
if (start == std::string::npos) return "//*";
|
|
1685
|
+
selector = selector.substr(start, end - start + 1);
|
|
1686
|
+
|
|
1687
|
+
std::string result = "//";
|
|
1688
|
+
std::string current_element = "*";
|
|
1689
|
+
bool has_element = false;
|
|
1690
|
+
bool in_brackets = false;
|
|
1691
|
+
|
|
1692
|
+
for (size_t i = 0; i < selector.length(); i++) {
|
|
1693
|
+
char c = selector[i];
|
|
1694
|
+
|
|
1695
|
+
if (c == '[') in_brackets = true;
|
|
1696
|
+
if (c == ']') in_brackets = false;
|
|
1697
|
+
|
|
1698
|
+
// Handle spaces (descendant combinator) outside of attribute selectors
|
|
1699
|
+
if (c == ' ' && !in_brackets) {
|
|
1700
|
+
// Flush current element
|
|
1701
|
+
if (!has_element && current_element != "*") {
|
|
1702
|
+
result += current_element;
|
|
1703
|
+
}
|
|
1704
|
+
// Skip multiple spaces
|
|
1705
|
+
while (i + 1 < selector.length() && selector[i + 1] == ' ') i++;
|
|
1706
|
+
result += "//";
|
|
1707
|
+
current_element = "*";
|
|
1708
|
+
has_element = false;
|
|
1709
|
+
continue;
|
|
1710
|
+
}
|
|
1711
|
+
|
|
1712
|
+
// Handle child combinator
|
|
1713
|
+
if (c == '>' && !in_brackets) {
|
|
1714
|
+
// Flush current element
|
|
1715
|
+
if (!has_element && current_element != "*") {
|
|
1716
|
+
result += current_element;
|
|
1717
|
+
}
|
|
1718
|
+
// Remove any trailing slashes and spaces
|
|
1719
|
+
while (!result.empty() && (result.back() == ' ' || result.back() == '/')) {
|
|
1720
|
+
if (result.back() == '/') {
|
|
1721
|
+
result.pop_back();
|
|
1722
|
+
break;
|
|
1723
|
+
}
|
|
1724
|
+
result.pop_back();
|
|
1725
|
+
}
|
|
1726
|
+
result += "/";
|
|
1727
|
+
// Skip spaces after >
|
|
1728
|
+
while (i + 1 < selector.length() && selector[i + 1] == ' ') i++;
|
|
1729
|
+
current_element = "*";
|
|
1730
|
+
has_element = false;
|
|
1731
|
+
continue;
|
|
1732
|
+
}
|
|
1733
|
+
|
|
1734
|
+
// Handle ID selector
|
|
1735
|
+
if (c == '#' && !in_brackets) {
|
|
1736
|
+
if (!has_element) {
|
|
1737
|
+
result += "*";
|
|
1738
|
+
has_element = true;
|
|
1739
|
+
} else if (current_element != "*") {
|
|
1740
|
+
result += current_element;
|
|
1741
|
+
current_element = "*";
|
|
1742
|
+
has_element = true;
|
|
1743
|
+
}
|
|
1744
|
+
result += "[@id='";
|
|
1745
|
+
i++;
|
|
1746
|
+
while (i < selector.length() && selector[i] != ' ' && selector[i] != '.' &&
|
|
1747
|
+
selector[i] != '[' && selector[i] != '>' && selector[i] != '+' && selector[i] != '~') {
|
|
1748
|
+
result += selector[i++];
|
|
1749
|
+
}
|
|
1750
|
+
result += "']";
|
|
1751
|
+
i--;
|
|
1752
|
+
continue;
|
|
1753
|
+
}
|
|
1754
|
+
|
|
1755
|
+
// Handle class selector
|
|
1756
|
+
if (c == '.' && !in_brackets) {
|
|
1757
|
+
if (!has_element) {
|
|
1758
|
+
result += "*";
|
|
1759
|
+
has_element = true;
|
|
1760
|
+
} else if (current_element != "*") {
|
|
1761
|
+
result += current_element;
|
|
1762
|
+
current_element = "*";
|
|
1763
|
+
has_element = true;
|
|
1764
|
+
}
|
|
1765
|
+
result += "[contains(concat(' ', @class, ' '), ' ";
|
|
1766
|
+
i++;
|
|
1767
|
+
while (i < selector.length() && selector[i] != ' ' && selector[i] != '.' &&
|
|
1768
|
+
selector[i] != '[' && selector[i] != '>' && selector[i] != '+' && selector[i] != '~' && selector[i] != '#') {
|
|
1769
|
+
result += selector[i++];
|
|
1770
|
+
}
|
|
1771
|
+
result += " ')]";
|
|
1772
|
+
i--;
|
|
1773
|
+
continue;
|
|
1774
|
+
}
|
|
1775
|
+
|
|
1776
|
+
// Handle attribute selectors
|
|
1777
|
+
if (c == '[') {
|
|
1778
|
+
if (!has_element && current_element != "*") {
|
|
1779
|
+
result += current_element;
|
|
1780
|
+
has_element = true;
|
|
1781
|
+
}
|
|
1782
|
+
result += "[@";
|
|
1783
|
+
i++;
|
|
1784
|
+
// Get attribute name
|
|
1785
|
+
while (i < selector.length() && selector[i] != ']' && selector[i] != '=' &&
|
|
1786
|
+
selector[i] != '!' && selector[i] != '~' && selector[i] != '^' && selector[i] != '$' && selector[i] != '*') {
|
|
1787
|
+
result += selector[i++];
|
|
1788
|
+
}
|
|
1789
|
+
|
|
1790
|
+
if (i < selector.length() && selector[i] == '=') {
|
|
1791
|
+
result += "='";
|
|
1792
|
+
i++;
|
|
1793
|
+
// Skip quotes if present
|
|
1794
|
+
if (i < selector.length() && (selector[i] == '"' || selector[i] == '\'')) {
|
|
1795
|
+
char quote = selector[i++];
|
|
1796
|
+
while (i < selector.length() && selector[i] != quote) {
|
|
1797
|
+
result += selector[i++];
|
|
1798
|
+
}
|
|
1799
|
+
if (i < selector.length()) i++; // Skip closing quote
|
|
1800
|
+
} else {
|
|
1801
|
+
// No quotes, read until ]
|
|
1802
|
+
while (i < selector.length() && selector[i] != ']') {
|
|
1803
|
+
result += selector[i++];
|
|
1804
|
+
}
|
|
1805
|
+
}
|
|
1806
|
+
result += "'";
|
|
1807
|
+
}
|
|
1808
|
+
|
|
1809
|
+
// Skip to closing bracket
|
|
1810
|
+
while (i < selector.length() && selector[i] != ']') i++;
|
|
1811
|
+
result += ']';
|
|
1812
|
+
continue;
|
|
1813
|
+
}
|
|
1814
|
+
|
|
1815
|
+
// Regular character - part of element name
|
|
1816
|
+
if (c != ' ' && c != '>' && c != '.' && c != '#' && c != '[' && !has_element) {
|
|
1817
|
+
if (current_element == "*") {
|
|
1818
|
+
current_element = "";
|
|
1819
|
+
}
|
|
1820
|
+
current_element += c;
|
|
1821
|
+
}
|
|
1822
|
+
}
|
|
1823
|
+
|
|
1824
|
+
// Flush any remaining element name
|
|
1825
|
+
if (!has_element && current_element != "*") {
|
|
1826
|
+
result += current_element;
|
|
1827
|
+
}
|
|
1828
|
+
|
|
1829
|
+
return result;
|
|
1830
|
+
}
|
|
1831
|
+
|
|
1832
|
+
// node.css(selector) - Convert CSS to XPath and execute
|
|
1128
1833
|
static VALUE node_css(VALUE self, VALUE selector) {
|
|
1129
|
-
|
|
1130
|
-
|
|
1834
|
+
Check_Type(selector, T_STRING);
|
|
1835
|
+
const char* css_str = StringValueCStr(selector);
|
|
1836
|
+
|
|
1837
|
+
// Convert CSS to XPath
|
|
1838
|
+
std::string xpath_str = css_to_xpath(css_str);
|
|
1839
|
+
|
|
1840
|
+
// Call the xpath method with converted selector
|
|
1841
|
+
return node_xpath(self, rb_str_new2(xpath_str.c_str()));
|
|
1131
1842
|
}
|
|
1132
1843
|
|
|
1133
1844
|
// nodeset.length / nodeset.size
|
|
@@ -1171,23 +1882,211 @@ static VALUE nodeset_to_a(VALUE self) {
|
|
|
1171
1882
|
return rb_ary_dup(wrapper->nodes_array);
|
|
1172
1883
|
}
|
|
1173
1884
|
|
|
1885
|
+
// nodeset.first - returns first node or nil
|
|
1886
|
+
static VALUE nodeset_first(VALUE self) {
|
|
1887
|
+
NodeSetWrapper* wrapper;
|
|
1888
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1889
|
+
|
|
1890
|
+
if (RARRAY_LEN(wrapper->nodes_array) == 0) {
|
|
1891
|
+
return Qnil;
|
|
1892
|
+
}
|
|
1893
|
+
|
|
1894
|
+
return rb_ary_entry(wrapper->nodes_array, 0);
|
|
1895
|
+
}
|
|
1896
|
+
|
|
1897
|
+
// nodeset.last - returns last node or nil
|
|
1898
|
+
static VALUE nodeset_last(VALUE self) {
|
|
1899
|
+
NodeSetWrapper* wrapper;
|
|
1900
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1901
|
+
|
|
1902
|
+
long len = RARRAY_LEN(wrapper->nodes_array);
|
|
1903
|
+
if (len == 0) {
|
|
1904
|
+
return Qnil;
|
|
1905
|
+
}
|
|
1906
|
+
|
|
1907
|
+
return rb_ary_entry(wrapper->nodes_array, len - 1);
|
|
1908
|
+
}
|
|
1909
|
+
|
|
1910
|
+
// nodeset.empty? - returns true if nodeset is empty
|
|
1911
|
+
static VALUE nodeset_empty_p(VALUE self) {
|
|
1912
|
+
NodeSetWrapper* wrapper;
|
|
1913
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1914
|
+
|
|
1915
|
+
return RARRAY_LEN(wrapper->nodes_array) == 0 ? Qtrue : Qfalse;
|
|
1916
|
+
}
|
|
1917
|
+
|
|
1918
|
+
// nodeset.inner_html - returns concatenated inner_html of all nodes
|
|
1919
|
+
static VALUE nodeset_inner_html(VALUE self) {
|
|
1920
|
+
NodeSetWrapper* wrapper;
|
|
1921
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1922
|
+
|
|
1923
|
+
std::string result;
|
|
1924
|
+
long len = RARRAY_LEN(wrapper->nodes_array);
|
|
1925
|
+
|
|
1926
|
+
for (long i = 0; i < len; i++) {
|
|
1927
|
+
VALUE node = rb_ary_entry(wrapper->nodes_array, i);
|
|
1928
|
+
VALUE inner_html = rb_funcall(node, rb_intern("inner_html"), 0);
|
|
1929
|
+
result += StringValueCStr(inner_html);
|
|
1930
|
+
}
|
|
1931
|
+
|
|
1932
|
+
return rb_str_new_cstr(result.c_str());
|
|
1933
|
+
}
|
|
1934
|
+
|
|
1935
|
+
// nodeset.text - returns concatenated text content of all nodes
|
|
1936
|
+
static VALUE nodeset_text(VALUE self) {
|
|
1937
|
+
NodeSetWrapper* wrapper;
|
|
1938
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1939
|
+
|
|
1940
|
+
std::string result;
|
|
1941
|
+
long len = RARRAY_LEN(wrapper->nodes_array);
|
|
1942
|
+
|
|
1943
|
+
for (long i = 0; i < len; i++) {
|
|
1944
|
+
VALUE node = rb_ary_entry(wrapper->nodes_array, i);
|
|
1945
|
+
NodeWrapper* node_wrapper;
|
|
1946
|
+
TypedData_Get_Struct(node, NodeWrapper, &node_type, node_wrapper);
|
|
1947
|
+
|
|
1948
|
+
if (node_wrapper->node) {
|
|
1949
|
+
const XMLCh* content = node_wrapper->node->getTextContent();
|
|
1950
|
+
if (content) {
|
|
1951
|
+
CharStr utf8_content(content);
|
|
1952
|
+
result += utf8_content.localForm();
|
|
1953
|
+
}
|
|
1954
|
+
}
|
|
1955
|
+
}
|
|
1956
|
+
|
|
1957
|
+
return rb_str_new_cstr(result.c_str());
|
|
1958
|
+
}
|
|
1959
|
+
|
|
1960
|
+
// nodeset.inspect / nodeset.to_s - human-readable representation
|
|
1961
|
+
static VALUE nodeset_inspect(VALUE self) {
|
|
1962
|
+
NodeSetWrapper* wrapper;
|
|
1963
|
+
TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
|
|
1964
|
+
|
|
1965
|
+
long len = RARRAY_LEN(wrapper->nodes_array);
|
|
1966
|
+
std::string result = "#<RXerces::XML::NodeSet:0x";
|
|
1967
|
+
|
|
1968
|
+
// Add object ID
|
|
1969
|
+
char buf[32];
|
|
1970
|
+
snprintf(buf, sizeof(buf), "%016lx", (unsigned long)self);
|
|
1971
|
+
result += buf;
|
|
1972
|
+
result += " [";
|
|
1973
|
+
|
|
1974
|
+
for (long i = 0; i < len; i++) {
|
|
1975
|
+
if (i > 0) result += ", ";
|
|
1976
|
+
|
|
1977
|
+
VALUE node = rb_ary_entry(wrapper->nodes_array, i);
|
|
1978
|
+
NodeWrapper* node_wrapper;
|
|
1979
|
+
TypedData_Get_Struct(node, NodeWrapper, &node_type, node_wrapper);
|
|
1980
|
+
|
|
1981
|
+
if (!node_wrapper->node) {
|
|
1982
|
+
result += "nil";
|
|
1983
|
+
continue;
|
|
1984
|
+
}
|
|
1985
|
+
|
|
1986
|
+
DOMNode::NodeType nodeType = node_wrapper->node->getNodeType();
|
|
1987
|
+
|
|
1988
|
+
if (nodeType == DOMNode::ELEMENT_NODE) {
|
|
1989
|
+
// For elements, show: <tag attr="value">content</tag>
|
|
1990
|
+
CharStr name(node_wrapper->node->getNodeName());
|
|
1991
|
+
result += "<";
|
|
1992
|
+
result += name.localForm();
|
|
1993
|
+
|
|
1994
|
+
// Add first few attributes if present
|
|
1995
|
+
DOMElement* element = dynamic_cast<DOMElement*>(node_wrapper->node);
|
|
1996
|
+
if (element) {
|
|
1997
|
+
DOMNamedNodeMap* attributes = element->getAttributes();
|
|
1998
|
+
if (attributes && attributes->getLength() > 0) {
|
|
1999
|
+
XMLSize_t attrLen = attributes->getLength();
|
|
2000
|
+
if (attrLen > 3) attrLen = 3; // Limit to first 3 attributes
|
|
2001
|
+
|
|
2002
|
+
for (XMLSize_t j = 0; j < attrLen; j++) {
|
|
2003
|
+
DOMNode* attr = attributes->item(j);
|
|
2004
|
+
CharStr attrName(attr->getNodeName());
|
|
2005
|
+
CharStr attrValue(attr->getNodeValue());
|
|
2006
|
+
result += " ";
|
|
2007
|
+
result += attrName.localForm();
|
|
2008
|
+
result += "=\"";
|
|
2009
|
+
result += attrValue.localForm();
|
|
2010
|
+
result += "\"";
|
|
2011
|
+
}
|
|
2012
|
+
if (attributes->getLength() > 3) {
|
|
2013
|
+
result += " ...";
|
|
2014
|
+
}
|
|
2015
|
+
}
|
|
2016
|
+
}
|
|
2017
|
+
|
|
2018
|
+
// Show truncated text content
|
|
2019
|
+
const XMLCh* textContent = node_wrapper->node->getTextContent();
|
|
2020
|
+
if (textContent && XMLString::stringLen(textContent) > 0) {
|
|
2021
|
+
CharStr text(textContent);
|
|
2022
|
+
std::string textStr = text.localForm();
|
|
2023
|
+
|
|
2024
|
+
// Trim whitespace and truncate
|
|
2025
|
+
size_t start = textStr.find_first_not_of(" \t\n\r");
|
|
2026
|
+
if (start != std::string::npos) {
|
|
2027
|
+
size_t end = textStr.find_last_not_of(" \t\n\r");
|
|
2028
|
+
textStr = textStr.substr(start, end - start + 1);
|
|
2029
|
+
|
|
2030
|
+
if (textStr.length() > 30) {
|
|
2031
|
+
textStr = textStr.substr(0, 27) + "...";
|
|
2032
|
+
}
|
|
2033
|
+
|
|
2034
|
+
result += ">";
|
|
2035
|
+
result += textStr;
|
|
2036
|
+
result += "</";
|
|
2037
|
+
result += name.localForm();
|
|
2038
|
+
result += ">";
|
|
2039
|
+
} else {
|
|
2040
|
+
result += ">";
|
|
2041
|
+
}
|
|
2042
|
+
} else {
|
|
2043
|
+
result += ">";
|
|
2044
|
+
}
|
|
2045
|
+
} else if (nodeType == DOMNode::TEXT_NODE) {
|
|
2046
|
+
// For text nodes, show: text("content")
|
|
2047
|
+
const XMLCh* textContent = node_wrapper->node->getNodeValue();
|
|
2048
|
+
if (textContent) {
|
|
2049
|
+
CharStr text(textContent);
|
|
2050
|
+
std::string textStr = text.localForm();
|
|
2051
|
+
|
|
2052
|
+
// Trim and truncate
|
|
2053
|
+
size_t start = textStr.find_first_not_of(" \t\n\r");
|
|
2054
|
+
if (start != std::string::npos) {
|
|
2055
|
+
size_t end = textStr.find_last_not_of(" \t\n\r");
|
|
2056
|
+
textStr = textStr.substr(start, end - start + 1);
|
|
2057
|
+
|
|
2058
|
+
if (textStr.length() > 30) {
|
|
2059
|
+
textStr = textStr.substr(0, 27) + "...";
|
|
2060
|
+
}
|
|
2061
|
+
|
|
2062
|
+
result += "text(\"";
|
|
2063
|
+
result += textStr;
|
|
2064
|
+
result += "\")";
|
|
2065
|
+
} else {
|
|
2066
|
+
result += "text()";
|
|
2067
|
+
}
|
|
2068
|
+
} else {
|
|
2069
|
+
result += "text()";
|
|
2070
|
+
}
|
|
2071
|
+
} else {
|
|
2072
|
+
// For other nodes, just show the type
|
|
2073
|
+
CharStr name(node_wrapper->node->getNodeName());
|
|
2074
|
+
result += "#<";
|
|
2075
|
+
result += name.localForm();
|
|
2076
|
+
result += ">";
|
|
2077
|
+
}
|
|
2078
|
+
}
|
|
2079
|
+
|
|
2080
|
+
result += "]>";
|
|
2081
|
+
return rb_str_new_cstr(result.c_str());
|
|
2082
|
+
}
|
|
2083
|
+
|
|
1174
2084
|
// Schema.from_document(schema_doc) or Schema.from_string(xsd_string)
|
|
1175
2085
|
static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
|
|
1176
2086
|
VALUE schema_source;
|
|
1177
2087
|
rb_scan_args(argc, argv, "1", &schema_source);
|
|
1178
2088
|
|
|
1179
|
-
|
|
1180
|
-
if (!xerces_initialized) {
|
|
1181
|
-
try {
|
|
1182
|
-
XMLPlatformUtils::Initialize();
|
|
1183
|
-
xerces_initialized = true;
|
|
1184
|
-
} catch (const XMLException& e) {
|
|
1185
|
-
char* message = XMLString::transcode(e.getMessage());
|
|
1186
|
-
VALUE rb_error = rb_str_new_cstr(message);
|
|
1187
|
-
XMLString::release(&message);
|
|
1188
|
-
rb_raise(rb_eRuntimeError, "Failed to initialize Xerces-C: %s", StringValueCStr(rb_error));
|
|
1189
|
-
}
|
|
1190
|
-
}
|
|
2089
|
+
ensure_xerces_initialized();
|
|
1191
2090
|
|
|
1192
2091
|
try {
|
|
1193
2092
|
SchemaWrapper* wrapper = ALLOC(SchemaWrapper);
|
|
@@ -1360,14 +2259,21 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1360
2259
|
rb_undef_alloc_func(rb_cDocument);
|
|
1361
2260
|
rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
|
|
1362
2261
|
rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
|
|
2262
|
+
rb_define_method(rb_cDocument, "errors", RUBY_METHOD_FUNC(document_errors), 0);
|
|
1363
2263
|
rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
|
|
1364
2264
|
rb_define_alias(rb_cDocument, "to_xml", "to_s");
|
|
2265
|
+
rb_define_method(rb_cDocument, "inspect", RUBY_METHOD_FUNC(document_inspect), 0);
|
|
1365
2266
|
rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
|
|
2267
|
+
rb_define_method(rb_cDocument, "css", RUBY_METHOD_FUNC(document_css), 1);
|
|
2268
|
+
rb_define_method(rb_cDocument, "at_css", RUBY_METHOD_FUNC(document_at_css), 1);
|
|
1366
2269
|
rb_define_method(rb_cDocument, "encoding", RUBY_METHOD_FUNC(document_encoding), 0);
|
|
2270
|
+
rb_define_method(rb_cDocument, "text", RUBY_METHOD_FUNC(document_text), 0);
|
|
2271
|
+
rb_define_alias(rb_cDocument, "content", "text");
|
|
1367
2272
|
rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
|
|
1368
2273
|
|
|
1369
2274
|
rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
|
|
1370
2275
|
rb_undef_alloc_func(rb_cNode);
|
|
2276
|
+
rb_define_method(rb_cNode, "inspect", RUBY_METHOD_FUNC(node_inspect), 0);
|
|
1371
2277
|
rb_define_method(rb_cNode, "name", RUBY_METHOD_FUNC(node_name), 0);
|
|
1372
2278
|
rb_define_method(rb_cNode, "namespace", RUBY_METHOD_FUNC(node_namespace), 0);
|
|
1373
2279
|
rb_define_method(rb_cNode, "text", RUBY_METHOD_FUNC(node_text), 0);
|
|
@@ -1376,11 +2282,19 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1376
2282
|
rb_define_alias(rb_cNode, "content=", "text=");
|
|
1377
2283
|
rb_define_method(rb_cNode, "[]", RUBY_METHOD_FUNC(node_get_attribute), 1);
|
|
1378
2284
|
rb_define_method(rb_cNode, "[]=", RUBY_METHOD_FUNC(node_set_attribute), 2);
|
|
2285
|
+
rb_define_alias(rb_cNode, "get_attribute", "[]");
|
|
2286
|
+
rb_define_alias(rb_cNode, "attribute", "[]");
|
|
2287
|
+
rb_define_method(rb_cNode, "has_attribute?", RUBY_METHOD_FUNC(node_has_attribute_p), 1);
|
|
1379
2288
|
rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
|
|
2289
|
+
rb_define_method(rb_cNode, "element_children", RUBY_METHOD_FUNC(node_element_children), 0);
|
|
2290
|
+
rb_define_alias(rb_cNode, "elements", "element_children");
|
|
1380
2291
|
rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
|
|
2292
|
+
rb_define_method(rb_cNode, "ancestors", RUBY_METHOD_FUNC(node_ancestors), -1);
|
|
1381
2293
|
rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
|
|
1382
2294
|
rb_define_method(rb_cNode, "next_sibling", RUBY_METHOD_FUNC(node_next_sibling), 0);
|
|
2295
|
+
rb_define_method(rb_cNode, "next_element", RUBY_METHOD_FUNC(node_next_element), 0);
|
|
1383
2296
|
rb_define_method(rb_cNode, "previous_sibling", RUBY_METHOD_FUNC(node_previous_sibling), 0);
|
|
2297
|
+
rb_define_method(rb_cNode, "previous_element", RUBY_METHOD_FUNC(node_previous_element), 0);
|
|
1384
2298
|
rb_define_method(rb_cNode, "add_child", RUBY_METHOD_FUNC(node_add_child), 1);
|
|
1385
2299
|
rb_define_method(rb_cNode, "remove", RUBY_METHOD_FUNC(node_remove), 0);
|
|
1386
2300
|
rb_define_alias(rb_cNode, "unlink", "remove");
|
|
@@ -1393,6 +2307,9 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1393
2307
|
rb_define_method(rb_cNode, "at_xpath", RUBY_METHOD_FUNC(node_at_xpath), 1);
|
|
1394
2308
|
rb_define_alias(rb_cNode, "at", "at_xpath");
|
|
1395
2309
|
rb_define_method(rb_cNode, "css", RUBY_METHOD_FUNC(node_css), 1);
|
|
2310
|
+
rb_define_method(rb_cNode, "at_css", RUBY_METHOD_FUNC(node_at_css), 1);
|
|
2311
|
+
rb_define_alias(rb_cNode, "get_attribute", "[]");
|
|
2312
|
+
rb_define_alias(rb_cNode, "attribute", "[]");
|
|
1396
2313
|
|
|
1397
2314
|
rb_cElement = rb_define_class_under(rb_mXML, "Element", rb_cNode);
|
|
1398
2315
|
rb_undef_alloc_func(rb_cElement);
|
|
@@ -1405,8 +2322,15 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1405
2322
|
rb_define_method(rb_cNodeSet, "length", RUBY_METHOD_FUNC(nodeset_length), 0);
|
|
1406
2323
|
rb_define_alias(rb_cNodeSet, "size", "length");
|
|
1407
2324
|
rb_define_method(rb_cNodeSet, "[]", RUBY_METHOD_FUNC(nodeset_at), 1);
|
|
2325
|
+
rb_define_method(rb_cNodeSet, "first", RUBY_METHOD_FUNC(nodeset_first), 0);
|
|
2326
|
+
rb_define_method(rb_cNodeSet, "last", RUBY_METHOD_FUNC(nodeset_last), 0);
|
|
2327
|
+
rb_define_method(rb_cNodeSet, "empty?", RUBY_METHOD_FUNC(nodeset_empty_p), 0);
|
|
1408
2328
|
rb_define_method(rb_cNodeSet, "each", RUBY_METHOD_FUNC(nodeset_each), 0);
|
|
1409
2329
|
rb_define_method(rb_cNodeSet, "to_a", RUBY_METHOD_FUNC(nodeset_to_a), 0);
|
|
2330
|
+
rb_define_method(rb_cNodeSet, "text", RUBY_METHOD_FUNC(nodeset_text), 0);
|
|
2331
|
+
rb_define_method(rb_cNodeSet, "inner_html", RUBY_METHOD_FUNC(nodeset_inner_html), 0);
|
|
2332
|
+
rb_define_method(rb_cNodeSet, "inspect", RUBY_METHOD_FUNC(nodeset_inspect), 0);
|
|
2333
|
+
rb_define_alias(rb_cNodeSet, "to_s", "inspect");
|
|
1410
2334
|
rb_include_module(rb_cNodeSet, rb_mEnumerable);
|
|
1411
2335
|
|
|
1412
2336
|
rb_cSchema = rb_define_class_under(rb_mXML, "Schema", rb_cObject);
|
|
@@ -1415,4 +2339,7 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
|
|
|
1415
2339
|
rb_define_singleton_method(rb_cSchema, "from_string", RUBY_METHOD_FUNC(schema_from_document), -1);
|
|
1416
2340
|
|
|
1417
2341
|
rb_define_method(rb_cDocument, "validate", RUBY_METHOD_FUNC(document_validate), 1);
|
|
2342
|
+
|
|
2343
|
+
// Register cleanup handler
|
|
2344
|
+
atexit(cleanup_xerces);
|
|
1418
2345
|
}
|