rxerces 0.3.0 → 0.5.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -13,7 +13,28 @@
13
13
  #include <sstream>
14
14
  #include <vector>
15
15
 
16
+ #ifdef HAVE_XALAN
17
+ #include <xalanc/XPath/XPathEvaluator.hpp>
18
+ #include <xalanc/XPath/NodeRefList.hpp>
19
+ #include <xalanc/XPath/XObject.hpp>
20
+ #include <xalanc/XPath/XObjectFactoryDefault.hpp>
21
+ #include <xalanc/XPath/XPathEnvSupportDefault.hpp>
22
+ #include <xalanc/XPath/XPathExecutionContextDefault.hpp>
23
+ #include <xalanc/XPath/XPathConstructionContextDefault.hpp>
24
+ #include <xalanc/XPath/ElementPrefixResolverProxy.hpp>
25
+ #include <xalanc/XPath/XPathFactoryDefault.hpp>
26
+ #include <xalanc/XPath/XPathProcessorImpl.hpp>
27
+ #include <xalanc/XPath/XPath.hpp>
28
+ #include <xalanc/XercesParserLiaison/XercesParserLiaison.hpp>
29
+ #include <xalanc/XercesParserLiaison/XercesDOMSupport.hpp>
30
+ #include <xalanc/XercesParserLiaison/XercesDocumentWrapper.hpp>
31
+ #include <xalanc/PlatformSupport/XalanMemoryManagerDefault.hpp>
32
+ #endif
33
+
16
34
  using namespace xercesc;
35
+ #ifdef HAVE_XALAN
36
+ using namespace xalanc;
37
+ #endif
17
38
 
18
39
  VALUE rb_mRXerces;
19
40
  VALUE rb_mXML;
@@ -24,8 +45,17 @@ VALUE rb_cElement;
24
45
  VALUE rb_cText;
25
46
  VALUE rb_cSchema;
26
47
 
27
- // Xerces initialization flag
48
+ // Initialization flags
28
49
  static bool xerces_initialized = false;
50
+ #ifdef HAVE_XALAN
51
+ static bool xalan_initialized = false;
52
+ #endif
53
+
54
+ // Forward declarations
55
+ static std::string css_to_xpath(const char* css);
56
+ static VALUE node_css(VALUE self, VALUE selector);
57
+ static VALUE node_xpath(VALUE self, VALUE path);
58
+ static VALUE document_xpath(VALUE self, VALUE path);
29
59
 
30
60
  // Helper class to manage XMLCh strings
31
61
  class XStr {
@@ -246,7 +276,7 @@ static VALUE document_parse(VALUE klass, VALUE str) {
246
276
 
247
277
  XercesDOMParser* parser = new XercesDOMParser();
248
278
  parser->setValidationScheme(XercesDOMParser::Val_Never);
249
- parser->setDoNamespaces(false);
279
+ parser->setDoNamespaces(true);
250
280
  parser->setDoSchema(false);
251
281
 
252
282
  try {
@@ -319,6 +349,87 @@ static VALUE document_to_s(VALUE self) {
319
349
  return Qnil;
320
350
  }
321
351
 
352
+ // document.inspect - human-readable representation
353
+ static VALUE document_inspect(VALUE self) {
354
+ DocumentWrapper* wrapper;
355
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
356
+
357
+ std::string result = "#<RXerces::XML::Document:0x";
358
+
359
+ // Add object ID
360
+ char buf[32];
361
+ snprintf(buf, sizeof(buf), "%016lx", (unsigned long)self);
362
+ result += buf;
363
+
364
+ if (!wrapper->doc) {
365
+ result += " (empty)>";
366
+ return rb_str_new_cstr(result.c_str());
367
+ }
368
+
369
+ // Add encoding
370
+ const XMLCh* encoding = wrapper->doc->getXmlEncoding();
371
+ if (encoding && XMLString::stringLen(encoding) > 0) {
372
+ CharStr utf8_encoding(encoding);
373
+ result += " encoding=\"";
374
+ result += utf8_encoding.localForm();
375
+ result += "\"";
376
+ }
377
+
378
+ // Add root element name
379
+ DOMElement* root = wrapper->doc->getDocumentElement();
380
+ if (root) {
381
+ CharStr rootName(root->getNodeName());
382
+ result += " root=<";
383
+ result += rootName.localForm();
384
+ result += ">";
385
+ }
386
+
387
+ result += ">";
388
+ return rb_str_new_cstr(result.c_str());
389
+ }
390
+
391
+ // document.encoding
392
+ static VALUE document_encoding(VALUE self) {
393
+ DocumentWrapper* wrapper;
394
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
395
+
396
+ if (!wrapper->doc) {
397
+ return Qnil;
398
+ }
399
+
400
+ const XMLCh* encoding = wrapper->doc->getXmlEncoding();
401
+ if (!encoding || XMLString::stringLen(encoding) == 0) {
402
+ // Default to UTF-8 if no encoding is specified
403
+ return rb_str_new_cstr("UTF-8");
404
+ }
405
+
406
+ CharStr utf8_encoding(encoding);
407
+ return rb_str_new_cstr(utf8_encoding.localForm());
408
+ }
409
+
410
+ // document.text / document.content - returns text content of entire document
411
+ static VALUE document_text(VALUE self) {
412
+ DocumentWrapper* wrapper;
413
+ TypedData_Get_Struct(self, DocumentWrapper, &document_type, wrapper);
414
+
415
+ if (!wrapper->doc) {
416
+ return rb_str_new_cstr("");
417
+ }
418
+
419
+ DOMElement* root = wrapper->doc->getDocumentElement();
420
+ if (!root) {
421
+ return rb_str_new_cstr("");
422
+ }
423
+
424
+ const XMLCh* content = root->getTextContent();
425
+ if (!content) {
426
+ return rb_str_new_cstr("");
427
+ }
428
+
429
+ CharStr utf8_content(content);
430
+ return rb_str_new_cstr(utf8_content.localForm());
431
+ }
432
+
322
433
  // document.create_element(name)
323
434
  static VALUE document_create_element(VALUE self, VALUE name) {
324
435
  DocumentWrapper* doc_wrapper;
@@ -354,6 +465,107 @@ static VALUE document_create_element(VALUE self, VALUE name) {
354
465
  return Qnil;
355
466
  }
356
467
 
468
+ #ifdef HAVE_XALAN
469
+ // Helper function to execute XPath using Xalan for full XPath 1.0 support
470
+ static VALUE execute_xpath_with_xalan(DOMNode* context_node, const char* xpath_str, VALUE doc_ref) {
471
+ try {
472
+ // Initialize Xalan if needed
473
+ if (!xalan_initialized) {
474
+ XPathEvaluator::initialize();
475
+ XMLPlatformUtils::Initialize();
476
+ xalan_initialized = true;
477
+ }
478
+
479
+ // Get the document
480
+ DOMDocument* domDoc = context_node->getOwnerDocument();
481
+ if (!domDoc && context_node->getNodeType() == DOMNode::DOCUMENT_NODE) {
482
+ domDoc = static_cast<DOMDocument*>(context_node);
483
+ }
484
+
485
+ if (!domDoc) {
486
+ NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
487
+ wrapper->nodes_array = rb_ary_new();
488
+ return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
489
+ }
490
+
491
+ // Create Xalan support objects
492
+ XercesParserLiaison liaison;
493
+ XercesDOMSupport domSupport(liaison);
494
+
495
+ // Create Xalan document - this creates and returns a XercesDocumentWrapper
496
+ XalanDocument* xalanDoc = liaison.createDocument(domDoc, false, false, false);
497
+ if (!xalanDoc) {
498
+ rb_raise(rb_eRuntimeError, "Failed to create Xalan document wrapper");
499
+ }
500
+
501
+ // The document IS the wrapper
502
+ XercesDocumentWrapper* docWrapper = static_cast<XercesDocumentWrapper*>(xalanDoc);
503
+
504
+ // Map the context node to Xalan
505
+ XalanNode* xalanContextNode = docWrapper->mapNode(context_node);
506
+ if (!xalanContextNode) {
507
+ xalanContextNode = docWrapper;
508
+ }
509
+
510
+ // Set up XPath factories and contexts
511
+ XPathEnvSupportDefault envSupport;
512
+ XObjectFactoryDefault objectFactory;
513
+ XPathExecutionContextDefault executionContext(envSupport, domSupport, objectFactory);
514
+ XPathConstructionContextDefault constructionContext;
515
+ XPathFactoryDefault factory;
516
+
517
+ // Create XPath
518
+ XPathProcessorImpl processor;
519
+ XPath* xpath = factory.create();
520
+
521
+ // Compile XPath expression
522
+ ElementPrefixResolverProxy resolver(docWrapper->getDocumentElement(), envSupport, domSupport);
523
+ processor.initXPath(*xpath, constructionContext, XalanDOMString(xpath_str), resolver);
524
+
525
+ // Execute XPath query
526
+ const XObjectPtr result = xpath->execute(xalanContextNode, resolver, executionContext);
527
+
528
+ VALUE nodes_array = rb_ary_new();
529
+
530
+ if (result.get() != 0) {
531
+ // Check if result is a node set
532
+ const NodeRefListBase& nodeList = result->nodeset();
533
+ const NodeRefListBase::size_type length = nodeList.getLength();
534
+
535
+ for (NodeRefListBase::size_type i = 0; i < length; ++i) {
536
+ XalanNode* xalanNode = nodeList.item(i);
537
+ if (xalanNode) {
538
+ // Map back to Xerces DOM node
539
+ const DOMNode* domNode = docWrapper->mapNode(xalanNode);
540
+ if (domNode) {
541
+ rb_ary_push(nodes_array, wrap_node(const_cast<DOMNode*>(domNode), doc_ref));
542
+ }
543
+ }
544
+ }
545
+ }
546
+
547
+ factory.returnObject(xpath);
548
+
549
+ NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
550
+ wrapper->nodes_array = nodes_array;
551
+ return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
552
+
553
+ } catch (const XalanXPathException& e) {
554
+ CharStr msg(e.getMessage().c_str());
555
+ rb_raise(rb_eRuntimeError, "XPath error: %s", msg.localForm());
556
+ } catch (const XMLException& e) {
557
+ CharStr message(e.getMessage());
558
+ rb_raise(rb_eRuntimeError, "XML error: %s", message.localForm());
559
+ } catch (...) {
560
+ rb_raise(rb_eRuntimeError, "Unknown XPath error");
561
+ }
562
+
563
+ NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
564
+ wrapper->nodes_array = rb_ary_new();
565
+ return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
566
+ }
567
+ #endif
568
+
357
569
  // document.xpath(path)
358
570
  static VALUE document_xpath(VALUE self, VALUE path) {
359
571
  DocumentWrapper* doc_wrapper;
@@ -368,6 +580,17 @@ static VALUE document_xpath(VALUE self, VALUE path) {
368
580
  Check_Type(path, T_STRING);
369
581
  const char* xpath_str = StringValueCStr(path);
370
582
 
583
+ #ifdef HAVE_XALAN
584
+ // Use Xalan for full XPath 1.0 support
585
+ DOMElement* root = doc_wrapper->doc->getDocumentElement();
586
+ if (!root) {
587
+ NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
588
+ wrapper->nodes_array = rb_ary_new();
589
+ return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
590
+ }
591
+ return execute_xpath_with_xalan(root, xpath_str, self);
592
+ #else
593
+ // Fall back to Xerces XPath subset
371
594
  try {
372
595
  DOMElement* root = doc_wrapper->doc->getDocumentElement();
373
596
  if (!root) {
@@ -417,6 +640,141 @@ static VALUE document_xpath(VALUE self, VALUE path) {
417
640
  NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
418
641
  wrapper->nodes_array = rb_ary_new();
419
642
  return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
643
+ #endif
644
+ }
645
+
646
+ // document.css(selector) - Convert CSS to XPath and execute
647
+ static VALUE document_css(VALUE self, VALUE selector) {
648
+ Check_Type(selector, T_STRING);
649
+ const char* css_str = StringValueCStr(selector);
650
+
651
+ // Convert CSS to XPath
652
+ std::string xpath_str = css_to_xpath(css_str);
653
+
654
+ // Call the xpath method with converted selector
655
+ return document_xpath(self, rb_str_new2(xpath_str.c_str()));
656
+ }
657
+
658
+ // document.at_css(selector) - Returns first matching node
659
+ static VALUE document_at_css(VALUE self, VALUE selector) {
660
+ VALUE nodeset = document_css(self, selector);
661
+
662
+ NodeSetWrapper* wrapper;
663
+ TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
664
+
665
+ if (RARRAY_LEN(wrapper->nodes_array) == 0) {
666
+ return Qnil;
667
+ }
668
+
669
+ return rb_ary_entry(wrapper->nodes_array, 0);
670
+ }
671
+
672
+ // node.inspect - human-readable representation
673
+ static VALUE node_inspect(VALUE self) {
674
+ NodeWrapper* wrapper;
675
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
676
+
677
+ if (!wrapper->node) {
678
+ return rb_str_new_cstr("#<RXerces::XML::Node (nil)>");
679
+ }
680
+
681
+ DOMNode::NodeType nodeType = wrapper->node->getNodeType();
682
+ std::string result;
683
+
684
+ // Add object ID
685
+ char buf[32];
686
+ snprintf(buf, sizeof(buf), "%016lx", (unsigned long)self);
687
+
688
+ if (nodeType == DOMNode::ELEMENT_NODE) {
689
+ result = "#<RXerces::XML::Element:0x";
690
+ result += buf;
691
+ result += " <";
692
+
693
+ CharStr name(wrapper->node->getNodeName());
694
+ result += name.localForm();
695
+
696
+ // Add attributes
697
+ DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
698
+ if (element) {
699
+ DOMNamedNodeMap* attributes = element->getAttributes();
700
+ if (attributes && attributes->getLength() > 0) {
701
+ XMLSize_t attrLen = attributes->getLength();
702
+ if (attrLen > 3) attrLen = 3;
703
+
704
+ for (XMLSize_t i = 0; i < attrLen; i++) {
705
+ DOMNode* attr = attributes->item(i);
706
+ CharStr attrName(attr->getNodeName());
707
+ CharStr attrValue(attr->getNodeValue());
708
+ result += " ";
709
+ result += attrName.localForm();
710
+ result += "=\"";
711
+ result += attrValue.localForm();
712
+ result += "\"";
713
+ }
714
+ if (attributes->getLength() > 3) {
715
+ result += " ...";
716
+ }
717
+ }
718
+ }
719
+
720
+ result += ">";
721
+
722
+ // Add truncated text content
723
+ const XMLCh* textContent = wrapper->node->getTextContent();
724
+ if (textContent && XMLString::stringLen(textContent) > 0) {
725
+ CharStr text(textContent);
726
+ std::string textStr = text.localForm();
727
+
728
+ size_t start = textStr.find_first_not_of(" \t\n\r");
729
+ if (start != std::string::npos) {
730
+ size_t end = textStr.find_last_not_of(" \t\n\r");
731
+ textStr = textStr.substr(start, end - start + 1);
732
+
733
+ if (textStr.length() > 40) {
734
+ textStr = textStr.substr(0, 37) + "...";
735
+ }
736
+
737
+ result += "\"";
738
+ result += textStr;
739
+ result += "\"";
740
+ }
741
+ }
742
+
743
+ result += ">";
744
+ } else if (nodeType == DOMNode::TEXT_NODE) {
745
+ result = "#<RXerces::XML::Text:0x";
746
+ result += buf;
747
+ result += " \"";
748
+
749
+ const XMLCh* textContent = wrapper->node->getNodeValue();
750
+ if (textContent) {
751
+ CharStr text(textContent);
752
+ std::string textStr = text.localForm();
753
+
754
+ size_t start = textStr.find_first_not_of(" \t\n\r");
755
+ if (start != std::string::npos) {
756
+ size_t end = textStr.find_last_not_of(" \t\n\r");
757
+ textStr = textStr.substr(start, end - start + 1);
758
+
759
+ if (textStr.length() > 40) {
760
+ textStr = textStr.substr(0, 37) + "...";
761
+ }
762
+
763
+ result += textStr;
764
+ }
765
+ }
766
+
767
+ result += "\">";
768
+ } else {
769
+ result = "#<RXerces::XML::Node:0x";
770
+ result += buf;
771
+ result += " ";
772
+ CharStr name(wrapper->node->getNodeName());
773
+ result += name.localForm();
774
+ result += ">";
775
+ }
776
+
777
+ return rb_str_new_cstr(result.c_str());
420
778
  }
421
779
 
422
780
  // node.name
@@ -434,6 +792,24 @@ static VALUE node_name(VALUE self) {
434
792
  return rb_str_new_cstr(utf8_name.localForm());
435
793
  }
436
794
 
795
+ // node.namespace
796
+ static VALUE node_namespace(VALUE self) {
797
+ NodeWrapper* wrapper;
798
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
799
+
800
+ if (!wrapper->node) {
801
+ return Qnil;
802
+ }
803
+
804
+ const XMLCh* namespaceURI = wrapper->node->getNamespaceURI();
805
+ if (!namespaceURI || XMLString::stringLen(namespaceURI) == 0) {
806
+ return Qnil;
807
+ }
808
+
809
+ CharStr utf8_namespace(namespaceURI);
810
+ return rb_str_new_cstr(utf8_namespace.localForm());
811
+ }
812
+
437
813
  // node.text / node.content
438
814
  static VALUE node_text(VALUE self) {
439
815
  NodeWrapper* wrapper;
@@ -513,6 +889,28 @@ static VALUE node_set_attribute(VALUE self, VALUE attr_name, VALUE attr_value) {
513
889
  return attr_value;
514
890
  }
515
891
 
892
+ // node.has_attribute?(attribute_name)
893
+ static VALUE node_has_attribute_p(VALUE self, VALUE attr_name) {
894
+ NodeWrapper* wrapper;
895
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
896
+
897
+ if (!wrapper->node || wrapper->node->getNodeType() != DOMNode::ELEMENT_NODE) {
898
+ return Qfalse;
899
+ }
900
+
901
+ Check_Type(attr_name, T_STRING);
902
+ const char* attr_str = StringValueCStr(attr_name);
903
+
904
+ DOMElement* element = dynamic_cast<DOMElement*>(wrapper->node);
905
+ const XMLCh* value = element->getAttribute(XStr(attr_str).unicodeForm());
906
+
907
+ if (!value || XMLString::stringLen(value) == 0) {
908
+ return Qfalse;
909
+ }
910
+
911
+ return Qtrue;
912
+ }
913
+
516
914
  // node.children
517
915
  static VALUE node_children(VALUE self) {
518
916
  NodeWrapper* wrapper;
@@ -536,6 +934,31 @@ static VALUE node_children(VALUE self) {
536
934
  return children;
537
935
  }
538
936
 
937
+ // node.element_children - returns only element children (no text nodes)
938
+ static VALUE node_element_children(VALUE self) {
939
+ NodeWrapper* wrapper;
940
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
941
+
942
+ VALUE doc_ref = rb_iv_get(self, "@document");
943
+ VALUE children = rb_ary_new();
944
+
945
+ if (!wrapper->node) {
946
+ return children;
947
+ }
948
+
949
+ DOMNodeList* child_nodes = wrapper->node->getChildNodes();
950
+ XMLSize_t count = child_nodes->getLength();
951
+
952
+ for (XMLSize_t i = 0; i < count; i++) {
953
+ DOMNode* child = child_nodes->item(i);
954
+ if (child->getNodeType() == DOMNode::ELEMENT_NODE) {
955
+ rb_ary_push(children, wrap_node(child, doc_ref));
956
+ }
957
+ }
958
+
959
+ return children;
960
+ }
961
+
539
962
  // node.parent
540
963
  static VALUE node_parent(VALUE self) {
541
964
  NodeWrapper* wrapper;
@@ -554,6 +977,78 @@ static VALUE node_parent(VALUE self) {
554
977
  return wrap_node(parent, doc_ref);
555
978
  }
556
979
 
980
+ // node.ancestors(selector = nil) - returns an array of all ancestor nodes, optionally filtered by selector
981
+ static VALUE node_ancestors(int argc, VALUE* argv, VALUE self) {
982
+ VALUE selector;
983
+ rb_scan_args(argc, argv, "01", &selector);
984
+
985
+ NodeWrapper* wrapper;
986
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
987
+
988
+ VALUE ancestors = rb_ary_new();
989
+
990
+ if (!wrapper->node) {
991
+ return ancestors;
992
+ }
993
+
994
+ VALUE doc_ref = rb_iv_get(self, "@document");
995
+ DOMNode* current = wrapper->node->getParentNode();
996
+
997
+ // Walk up the tree, collecting all ancestors
998
+ while (current) {
999
+ // Stop at the document node (don't include it in ancestors)
1000
+ if (current->getNodeType() == DOMNode::DOCUMENT_NODE) {
1001
+ break;
1002
+ }
1003
+ rb_ary_push(ancestors, wrap_node(current, doc_ref));
1004
+ current = current->getParentNode();
1005
+ }
1006
+
1007
+ // If selector is provided, filter the ancestors
1008
+ if (!NIL_P(selector)) {
1009
+ Check_Type(selector, T_STRING);
1010
+ const char* selector_str = StringValueCStr(selector);
1011
+
1012
+ // Convert CSS to XPath if needed (css_to_xpath adds // prefix)
1013
+ std::string xpath_str = css_to_xpath(selector_str);
1014
+
1015
+ // Get all matching nodes from the document
1016
+ VALUE all_matches = document_xpath(doc_ref, rb_str_new2(xpath_str.c_str()));
1017
+
1018
+ NodeSetWrapper* matches_wrapper;
1019
+ TypedData_Get_Struct(all_matches, NodeSetWrapper, &nodeset_type, matches_wrapper);
1020
+
1021
+ VALUE filtered = rb_ary_new();
1022
+ long ancestor_len = RARRAY_LEN(ancestors);
1023
+ long matches_len = RARRAY_LEN(matches_wrapper->nodes_array);
1024
+
1025
+ // For each ancestor, check if it's in the matches
1026
+ for (long i = 0; i < ancestor_len; i++) {
1027
+ VALUE ancestor = rb_ary_entry(ancestors, i);
1028
+
1029
+ NodeWrapper* ancestor_wrapper;
1030
+ TypedData_Get_Struct(ancestor, NodeWrapper, &node_type, ancestor_wrapper);
1031
+
1032
+ // Check if this ancestor node is in the matches
1033
+ for (long j = 0; j < matches_len; j++) {
1034
+ VALUE match = rb_ary_entry(matches_wrapper->nodes_array, j);
1035
+ NodeWrapper* match_wrapper;
1036
+ TypedData_Get_Struct(match, NodeWrapper, &node_type, match_wrapper);
1037
+
1038
+ // Compare the actual DOM nodes
1039
+ if (ancestor_wrapper->node == match_wrapper->node) {
1040
+ rb_ary_push(filtered, ancestor);
1041
+ break;
1042
+ }
1043
+ }
1044
+ }
1045
+
1046
+ return filtered;
1047
+ }
1048
+
1049
+ return ancestors;
1050
+ }
1051
+
557
1052
  // node.attributes - returns hash of all attributes (only for element nodes)
558
1053
  static VALUE node_attributes(VALUE self) {
559
1054
  NodeWrapper* wrapper;
@@ -627,6 +1122,54 @@ static VALUE node_previous_sibling(VALUE self) {
627
1122
  return wrap_node(prev, doc_ref);
628
1123
  }
629
1124
 
1125
+ // node.next_element - next sibling that is an element (skipping text nodes)
1126
+ static VALUE node_next_element(VALUE self) {
1127
+ NodeWrapper* wrapper;
1128
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
1129
+
1130
+ if (!wrapper->node) {
1131
+ return Qnil;
1132
+ }
1133
+
1134
+ VALUE doc_ref = rb_iv_get(self, "@document");
1135
+ DOMNode* next = wrapper->node->getNextSibling();
1136
+
1137
+ // Skip non-element nodes
1138
+ while (next && next->getNodeType() != DOMNode::ELEMENT_NODE) {
1139
+ next = next->getNextSibling();
1140
+ }
1141
+
1142
+ if (!next) {
1143
+ return Qnil;
1144
+ }
1145
+
1146
+ return wrap_node(next, doc_ref);
1147
+ }
1148
+
1149
+ // node.previous_element - previous sibling that is an element (skipping text nodes)
1150
+ static VALUE node_previous_element(VALUE self) {
1151
+ NodeWrapper* wrapper;
1152
+ TypedData_Get_Struct(self, NodeWrapper, &node_type, wrapper);
1153
+
1154
+ if (!wrapper->node) {
1155
+ return Qnil;
1156
+ }
1157
+
1158
+ VALUE doc_ref = rb_iv_get(self, "@document");
1159
+ DOMNode* prev = wrapper->node->getPreviousSibling();
1160
+
1161
+ // Skip non-element nodes
1162
+ while (prev && prev->getNodeType() != DOMNode::ELEMENT_NODE) {
1163
+ prev = prev->getPreviousSibling();
1164
+ }
1165
+
1166
+ if (!prev) {
1167
+ return Qnil;
1168
+ }
1169
+
1170
+ return wrap_node(prev, doc_ref);
1171
+ }
1172
+
630
1173
  // node.add_child(node_or_string) - adds a child node
631
1174
  static VALUE node_add_child(VALUE self, VALUE child) {
632
1175
  NodeWrapper* wrapper;
@@ -880,6 +1423,11 @@ static VALUE node_xpath(VALUE self, VALUE path) {
880
1423
  const char* xpath_str = StringValueCStr(path);
881
1424
  VALUE doc_ref = rb_iv_get(self, "@document");
882
1425
 
1426
+ #ifdef HAVE_XALAN
1427
+ // Use Xalan for full XPath 1.0 support
1428
+ return execute_xpath_with_xalan(node_wrapper->node, xpath_str, doc_ref);
1429
+ #else
1430
+ // Fall back to Xerces XPath subset
883
1431
  try {
884
1432
  DOMDocument* doc = node_wrapper->node->getOwnerDocument();
885
1433
  if (!doc) {
@@ -929,6 +1477,201 @@ static VALUE node_xpath(VALUE self, VALUE path) {
929
1477
  NodeSetWrapper* wrapper = ALLOC(NodeSetWrapper);
930
1478
  wrapper->nodes_array = rb_ary_new();
931
1479
  return TypedData_Wrap_Struct(rb_cNodeSet, &nodeset_type, wrapper);
1480
+ #endif
1481
+ }
1482
+
1483
+ // node.at_xpath(path) - returns first matching node or nil
1484
+ static VALUE node_at_xpath(VALUE self, VALUE path) {
1485
+ VALUE nodeset = node_xpath(self, path);
1486
+ NodeSetWrapper* wrapper;
1487
+ TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
1488
+
1489
+ if (RARRAY_LEN(wrapper->nodes_array) == 0) {
1490
+ return Qnil;
1491
+ }
1492
+
1493
+ return rb_ary_entry(wrapper->nodes_array, 0);
1494
+ }
1495
+
1496
+ // node.at_css(selector) - returns first matching node or nil
1497
+ static VALUE node_at_css(VALUE self, VALUE selector) {
1498
+ VALUE nodeset = node_css(self, selector);
1499
+ NodeSetWrapper* wrapper;
1500
+ TypedData_Get_Struct(nodeset, NodeSetWrapper, &nodeset_type, wrapper);
1501
+
1502
+ if (RARRAY_LEN(wrapper->nodes_array) == 0) {
1503
+ return Qnil;
1504
+ }
1505
+
1506
+ return rb_ary_entry(wrapper->nodes_array, 0);
1507
+ }
1508
+
1509
+ // Helper function to convert basic CSS selectors to XPath
1510
+ // Supports common patterns like: tag, .class, #id, tag.class, tag#id, [attr], [attr=value]
1511
+ static std::string css_to_xpath(const char* css) {
1512
+ std::string selector(css);
1513
+
1514
+ // Trim whitespace
1515
+ size_t start = selector.find_first_not_of(" \t\n\r");
1516
+ size_t end = selector.find_last_not_of(" \t\n\r");
1517
+ if (start == std::string::npos) return "//*";
1518
+ selector = selector.substr(start, end - start + 1);
1519
+
1520
+ std::string result = "//";
1521
+ std::string current_element = "*";
1522
+ bool has_element = false;
1523
+ bool in_brackets = false;
1524
+
1525
+ for (size_t i = 0; i < selector.length(); i++) {
1526
+ char c = selector[i];
1527
+
1528
+ if (c == '[') in_brackets = true;
1529
+ if (c == ']') in_brackets = false;
1530
+
1531
+ // Handle spaces (descendant combinator) outside of attribute selectors
1532
+ if (c == ' ' && !in_brackets) {
1533
+ // Flush current element
1534
+ if (!has_element && current_element != "*") {
1535
+ result += current_element;
1536
+ }
1537
+ // Skip multiple spaces
1538
+ while (i + 1 < selector.length() && selector[i + 1] == ' ') i++;
1539
+ result += "//";
1540
+ current_element = "*";
1541
+ has_element = false;
1542
+ continue;
1543
+ }
1544
+
1545
+ // Handle child combinator
1546
+ if (c == '>' && !in_brackets) {
1547
+ // Flush current element
1548
+ if (!has_element && current_element != "*") {
1549
+ result += current_element;
1550
+ }
1551
+ // Remove any trailing slashes and spaces
1552
+ while (!result.empty() && (result.back() == ' ' || result.back() == '/')) {
1553
+ if (result.back() == '/') {
1554
+ result.pop_back();
1555
+ break;
1556
+ }
1557
+ result.pop_back();
1558
+ }
1559
+ result += "/";
1560
+ // Skip spaces after >
1561
+ while (i + 1 < selector.length() && selector[i + 1] == ' ') i++;
1562
+ current_element = "*";
1563
+ has_element = false;
1564
+ continue;
1565
+ }
1566
+
1567
+ // Handle ID selector
1568
+ if (c == '#' && !in_brackets) {
1569
+ if (!has_element) {
1570
+ result += "*";
1571
+ has_element = true;
1572
+ } else if (current_element != "*") {
1573
+ result += current_element;
1574
+ current_element = "*";
1575
+ has_element = true;
1576
+ }
1577
+ result += "[@id='";
1578
+ i++;
1579
+ while (i < selector.length() && selector[i] != ' ' && selector[i] != '.' &&
1580
+ selector[i] != '[' && selector[i] != '>' && selector[i] != '+' && selector[i] != '~') {
1581
+ result += selector[i++];
1582
+ }
1583
+ result += "']";
1584
+ i--;
1585
+ continue;
1586
+ }
1587
+
1588
+ // Handle class selector
1589
+ if (c == '.' && !in_brackets) {
1590
+ if (!has_element) {
1591
+ result += "*";
1592
+ has_element = true;
1593
+ } else if (current_element != "*") {
1594
+ result += current_element;
1595
+ current_element = "*";
1596
+ has_element = true;
1597
+ }
1598
+ result += "[contains(concat(' ', @class, ' '), ' ";
1599
+ i++;
1600
+ while (i < selector.length() && selector[i] != ' ' && selector[i] != '.' &&
1601
+ selector[i] != '[' && selector[i] != '>' && selector[i] != '+' && selector[i] != '~' && selector[i] != '#') {
1602
+ result += selector[i++];
1603
+ }
1604
+ result += " ')]";
1605
+ i--;
1606
+ continue;
1607
+ }
1608
+
1609
+ // Handle attribute selectors
1610
+ if (c == '[') {
1611
+ if (!has_element && current_element != "*") {
1612
+ result += current_element;
1613
+ has_element = true;
1614
+ }
1615
+ result += "[@";
1616
+ i++;
1617
+ // Get attribute name
1618
+ while (i < selector.length() && selector[i] != ']' && selector[i] != '=' &&
1619
+ selector[i] != '!' && selector[i] != '~' && selector[i] != '^' && selector[i] != '$' && selector[i] != '*') {
1620
+ result += selector[i++];
1621
+ }
1622
+
1623
+ if (i < selector.length() && selector[i] == '=') {
1624
+ result += "='";
1625
+ i++;
1626
+ // Skip quotes if present
1627
+ if (i < selector.length() && (selector[i] == '"' || selector[i] == '\'')) {
1628
+ char quote = selector[i++];
1629
+ while (i < selector.length() && selector[i] != quote) {
1630
+ result += selector[i++];
1631
+ }
1632
+ if (i < selector.length()) i++; // Skip closing quote
1633
+ } else {
1634
+ // No quotes, read until ]
1635
+ while (i < selector.length() && selector[i] != ']') {
1636
+ result += selector[i++];
1637
+ }
1638
+ }
1639
+ result += "'";
1640
+ }
1641
+
1642
+ // Skip to closing bracket
1643
+ while (i < selector.length() && selector[i] != ']') i++;
1644
+ result += ']';
1645
+ continue;
1646
+ }
1647
+
1648
+ // Regular character - part of element name
1649
+ if (c != ' ' && c != '>' && c != '.' && c != '#' && c != '[' && !has_element) {
1650
+ if (current_element == "*") {
1651
+ current_element = "";
1652
+ }
1653
+ current_element += c;
1654
+ }
1655
+ }
1656
+
1657
+ // Flush any remaining element name
1658
+ if (!has_element && current_element != "*") {
1659
+ result += current_element;
1660
+ }
1661
+
1662
+ return result;
1663
+ }
1664
+
1665
+ // node.css(selector) - Convert CSS to XPath and execute
1666
+ static VALUE node_css(VALUE self, VALUE selector) {
1667
+ Check_Type(selector, T_STRING);
1668
+ const char* css_str = StringValueCStr(selector);
1669
+
1670
+ // Convert CSS to XPath
1671
+ std::string xpath_str = css_to_xpath(css_str);
1672
+
1673
+ // Call the xpath method with converted selector
1674
+ return node_xpath(self, rb_str_new2(xpath_str.c_str()));
932
1675
  }
933
1676
 
934
1677
  // nodeset.length / nodeset.size
@@ -972,6 +1715,205 @@ static VALUE nodeset_to_a(VALUE self) {
972
1715
  return rb_ary_dup(wrapper->nodes_array);
973
1716
  }
974
1717
 
1718
+ // nodeset.first - returns first node or nil
1719
+ static VALUE nodeset_first(VALUE self) {
1720
+ NodeSetWrapper* wrapper;
1721
+ TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
1722
+
1723
+ if (RARRAY_LEN(wrapper->nodes_array) == 0) {
1724
+ return Qnil;
1725
+ }
1726
+
1727
+ return rb_ary_entry(wrapper->nodes_array, 0);
1728
+ }
1729
+
1730
+ // nodeset.last - returns last node or nil
1731
+ static VALUE nodeset_last(VALUE self) {
1732
+ NodeSetWrapper* wrapper;
1733
+ TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
1734
+
1735
+ long len = RARRAY_LEN(wrapper->nodes_array);
1736
+ if (len == 0) {
1737
+ return Qnil;
1738
+ }
1739
+
1740
+ return rb_ary_entry(wrapper->nodes_array, len - 1);
1741
+ }
1742
+
1743
+ // nodeset.empty? - returns true if nodeset is empty
1744
+ static VALUE nodeset_empty_p(VALUE self) {
1745
+ NodeSetWrapper* wrapper;
1746
+ TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
1747
+
1748
+ return RARRAY_LEN(wrapper->nodes_array) == 0 ? Qtrue : Qfalse;
1749
+ }
1750
+
1751
+ // nodeset.inner_html - returns concatenated inner_html of all nodes
1752
+ static VALUE nodeset_inner_html(VALUE self) {
1753
+ NodeSetWrapper* wrapper;
1754
+ TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
1755
+
1756
+ std::string result;
1757
+ long len = RARRAY_LEN(wrapper->nodes_array);
1758
+
1759
+ for (long i = 0; i < len; i++) {
1760
+ VALUE node = rb_ary_entry(wrapper->nodes_array, i);
1761
+ VALUE inner_html = rb_funcall(node, rb_intern("inner_html"), 0);
1762
+ result += StringValueCStr(inner_html);
1763
+ }
1764
+
1765
+ return rb_str_new_cstr(result.c_str());
1766
+ }
1767
+
1768
+ // nodeset.text - returns concatenated text content of all nodes
1769
+ static VALUE nodeset_text(VALUE self) {
1770
+ NodeSetWrapper* wrapper;
1771
+ TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
1772
+
1773
+ std::string result;
1774
+ long len = RARRAY_LEN(wrapper->nodes_array);
1775
+
1776
+ for (long i = 0; i < len; i++) {
1777
+ VALUE node = rb_ary_entry(wrapper->nodes_array, i);
1778
+ NodeWrapper* node_wrapper;
1779
+ TypedData_Get_Struct(node, NodeWrapper, &node_type, node_wrapper);
1780
+
1781
+ if (node_wrapper->node) {
1782
+ const XMLCh* content = node_wrapper->node->getTextContent();
1783
+ if (content) {
1784
+ CharStr utf8_content(content);
1785
+ result += utf8_content.localForm();
1786
+ }
1787
+ }
1788
+ }
1789
+
1790
+ return rb_str_new_cstr(result.c_str());
1791
+ }
1792
+
1793
+ // nodeset.inspect / nodeset.to_s - human-readable representation
1794
+ static VALUE nodeset_inspect(VALUE self) {
1795
+ NodeSetWrapper* wrapper;
1796
+ TypedData_Get_Struct(self, NodeSetWrapper, &nodeset_type, wrapper);
1797
+
1798
+ long len = RARRAY_LEN(wrapper->nodes_array);
1799
+ std::string result = "#<RXerces::XML::NodeSet:0x";
1800
+
1801
+ // Add object ID
1802
+ char buf[32];
1803
+ snprintf(buf, sizeof(buf), "%016lx", (unsigned long)self);
1804
+ result += buf;
1805
+ result += " [";
1806
+
1807
+ for (long i = 0; i < len; i++) {
1808
+ if (i > 0) result += ", ";
1809
+
1810
+ VALUE node = rb_ary_entry(wrapper->nodes_array, i);
1811
+ NodeWrapper* node_wrapper;
1812
+ TypedData_Get_Struct(node, NodeWrapper, &node_type, node_wrapper);
1813
+
1814
+ if (!node_wrapper->node) {
1815
+ result += "nil";
1816
+ continue;
1817
+ }
1818
+
1819
+ DOMNode::NodeType nodeType = node_wrapper->node->getNodeType();
1820
+
1821
+ if (nodeType == DOMNode::ELEMENT_NODE) {
1822
+ // For elements, show: <tag attr="value">content</tag>
1823
+ CharStr name(node_wrapper->node->getNodeName());
1824
+ result += "<";
1825
+ result += name.localForm();
1826
+
1827
+ // Add first few attributes if present
1828
+ DOMElement* element = dynamic_cast<DOMElement*>(node_wrapper->node);
1829
+ if (element) {
1830
+ DOMNamedNodeMap* attributes = element->getAttributes();
1831
+ if (attributes && attributes->getLength() > 0) {
1832
+ XMLSize_t attrLen = attributes->getLength();
1833
+ if (attrLen > 3) attrLen = 3; // Limit to first 3 attributes
1834
+
1835
+ for (XMLSize_t j = 0; j < attrLen; j++) {
1836
+ DOMNode* attr = attributes->item(j);
1837
+ CharStr attrName(attr->getNodeName());
1838
+ CharStr attrValue(attr->getNodeValue());
1839
+ result += " ";
1840
+ result += attrName.localForm();
1841
+ result += "=\"";
1842
+ result += attrValue.localForm();
1843
+ result += "\"";
1844
+ }
1845
+ if (attributes->getLength() > 3) {
1846
+ result += " ...";
1847
+ }
1848
+ }
1849
+ }
1850
+
1851
+ // Show truncated text content
1852
+ const XMLCh* textContent = node_wrapper->node->getTextContent();
1853
+ if (textContent && XMLString::stringLen(textContent) > 0) {
1854
+ CharStr text(textContent);
1855
+ std::string textStr = text.localForm();
1856
+
1857
+ // Trim whitespace and truncate
1858
+ size_t start = textStr.find_first_not_of(" \t\n\r");
1859
+ if (start != std::string::npos) {
1860
+ size_t end = textStr.find_last_not_of(" \t\n\r");
1861
+ textStr = textStr.substr(start, end - start + 1);
1862
+
1863
+ if (textStr.length() > 30) {
1864
+ textStr = textStr.substr(0, 27) + "...";
1865
+ }
1866
+
1867
+ result += ">";
1868
+ result += textStr;
1869
+ result += "</";
1870
+ result += name.localForm();
1871
+ result += ">";
1872
+ } else {
1873
+ result += ">";
1874
+ }
1875
+ } else {
1876
+ result += ">";
1877
+ }
1878
+ } else if (nodeType == DOMNode::TEXT_NODE) {
1879
+ // For text nodes, show: text("content")
1880
+ const XMLCh* textContent = node_wrapper->node->getNodeValue();
1881
+ if (textContent) {
1882
+ CharStr text(textContent);
1883
+ std::string textStr = text.localForm();
1884
+
1885
+ // Trim and truncate
1886
+ size_t start = textStr.find_first_not_of(" \t\n\r");
1887
+ if (start != std::string::npos) {
1888
+ size_t end = textStr.find_last_not_of(" \t\n\r");
1889
+ textStr = textStr.substr(start, end - start + 1);
1890
+
1891
+ if (textStr.length() > 30) {
1892
+ textStr = textStr.substr(0, 27) + "...";
1893
+ }
1894
+
1895
+ result += "text(\"";
1896
+ result += textStr;
1897
+ result += "\")";
1898
+ } else {
1899
+ result += "text()";
1900
+ }
1901
+ } else {
1902
+ result += "text()";
1903
+ }
1904
+ } else {
1905
+ // For other nodes, just show the type
1906
+ CharStr name(node_wrapper->node->getNodeName());
1907
+ result += "#<";
1908
+ result += name.localForm();
1909
+ result += ">";
1910
+ }
1911
+ }
1912
+
1913
+ result += "]>";
1914
+ return rb_str_new_cstr(result.c_str());
1915
+ }
1916
+
975
1917
  // Schema.from_document(schema_doc) or Schema.from_string(xsd_string)
976
1918
  static VALUE schema_from_document(int argc, VALUE* argv, VALUE klass) {
977
1919
  VALUE schema_source;
@@ -1162,32 +2104,55 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
1162
2104
  rb_define_singleton_method(rb_cDocument, "parse", RUBY_METHOD_FUNC(document_parse), 1);
1163
2105
  rb_define_method(rb_cDocument, "root", RUBY_METHOD_FUNC(document_root), 0);
1164
2106
  rb_define_method(rb_cDocument, "to_s", RUBY_METHOD_FUNC(document_to_s), 0);
1165
- rb_define_method(rb_cDocument, "to_xml", RUBY_METHOD_FUNC(document_to_s), 0);
2107
+ rb_define_alias(rb_cDocument, "to_xml", "to_s");
2108
+ rb_define_method(rb_cDocument, "inspect", RUBY_METHOD_FUNC(document_inspect), 0);
1166
2109
  rb_define_method(rb_cDocument, "xpath", RUBY_METHOD_FUNC(document_xpath), 1);
2110
+ rb_define_method(rb_cDocument, "css", RUBY_METHOD_FUNC(document_css), 1);
2111
+ rb_define_method(rb_cDocument, "at_css", RUBY_METHOD_FUNC(document_at_css), 1);
2112
+ rb_define_method(rb_cDocument, "encoding", RUBY_METHOD_FUNC(document_encoding), 0);
2113
+ rb_define_method(rb_cDocument, "text", RUBY_METHOD_FUNC(document_text), 0);
2114
+ rb_define_alias(rb_cDocument, "content", "text");
1167
2115
  rb_define_method(rb_cDocument, "create_element", RUBY_METHOD_FUNC(document_create_element), 1);
1168
2116
 
1169
2117
  rb_cNode = rb_define_class_under(rb_mXML, "Node", rb_cObject);
1170
2118
  rb_undef_alloc_func(rb_cNode);
2119
+ rb_define_method(rb_cNode, "inspect", RUBY_METHOD_FUNC(node_inspect), 0);
1171
2120
  rb_define_method(rb_cNode, "name", RUBY_METHOD_FUNC(node_name), 0);
2121
+ rb_define_method(rb_cNode, "namespace", RUBY_METHOD_FUNC(node_namespace), 0);
1172
2122
  rb_define_method(rb_cNode, "text", RUBY_METHOD_FUNC(node_text), 0);
1173
- rb_define_method(rb_cNode, "content", RUBY_METHOD_FUNC(node_text), 0);
2123
+ rb_define_alias(rb_cNode, "content", "text");
1174
2124
  rb_define_method(rb_cNode, "text=", RUBY_METHOD_FUNC(node_text_set), 1);
1175
- rb_define_method(rb_cNode, "content=", RUBY_METHOD_FUNC(node_text_set), 1);
2125
+ rb_define_alias(rb_cNode, "content=", "text=");
1176
2126
  rb_define_method(rb_cNode, "[]", RUBY_METHOD_FUNC(node_get_attribute), 1);
1177
2127
  rb_define_method(rb_cNode, "[]=", RUBY_METHOD_FUNC(node_set_attribute), 2);
2128
+ rb_define_alias(rb_cNode, "get_attribute", "[]");
2129
+ rb_define_alias(rb_cNode, "attribute", "[]");
2130
+ rb_define_method(rb_cNode, "has_attribute?", RUBY_METHOD_FUNC(node_has_attribute_p), 1);
1178
2131
  rb_define_method(rb_cNode, "children", RUBY_METHOD_FUNC(node_children), 0);
2132
+ rb_define_method(rb_cNode, "element_children", RUBY_METHOD_FUNC(node_element_children), 0);
2133
+ rb_define_alias(rb_cNode, "elements", "element_children");
1179
2134
  rb_define_method(rb_cNode, "parent", RUBY_METHOD_FUNC(node_parent), 0);
2135
+ rb_define_method(rb_cNode, "ancestors", RUBY_METHOD_FUNC(node_ancestors), -1);
1180
2136
  rb_define_method(rb_cNode, "attributes", RUBY_METHOD_FUNC(node_attributes), 0);
1181
2137
  rb_define_method(rb_cNode, "next_sibling", RUBY_METHOD_FUNC(node_next_sibling), 0);
2138
+ rb_define_method(rb_cNode, "next_element", RUBY_METHOD_FUNC(node_next_element), 0);
1182
2139
  rb_define_method(rb_cNode, "previous_sibling", RUBY_METHOD_FUNC(node_previous_sibling), 0);
2140
+ rb_define_method(rb_cNode, "previous_element", RUBY_METHOD_FUNC(node_previous_element), 0);
1183
2141
  rb_define_method(rb_cNode, "add_child", RUBY_METHOD_FUNC(node_add_child), 1);
1184
2142
  rb_define_method(rb_cNode, "remove", RUBY_METHOD_FUNC(node_remove), 0);
1185
- rb_define_method(rb_cNode, "unlink", RUBY_METHOD_FUNC(node_remove), 0);
2143
+ rb_define_alias(rb_cNode, "unlink", "remove");
1186
2144
  rb_define_method(rb_cNode, "inner_html", RUBY_METHOD_FUNC(node_inner_html), 0);
1187
- rb_define_method(rb_cNode, "inner_xml", RUBY_METHOD_FUNC(node_inner_html), 0);
2145
+ rb_define_alias(rb_cNode, "inner_xml", "inner_html");
1188
2146
  rb_define_method(rb_cNode, "path", RUBY_METHOD_FUNC(node_path), 0);
1189
2147
  rb_define_method(rb_cNode, "blank?", RUBY_METHOD_FUNC(node_blank_p), 0);
1190
2148
  rb_define_method(rb_cNode, "xpath", RUBY_METHOD_FUNC(node_xpath), 1);
2149
+ rb_define_alias(rb_cNode, "search", "xpath");
2150
+ rb_define_method(rb_cNode, "at_xpath", RUBY_METHOD_FUNC(node_at_xpath), 1);
2151
+ rb_define_alias(rb_cNode, "at", "at_xpath");
2152
+ rb_define_method(rb_cNode, "css", RUBY_METHOD_FUNC(node_css), 1);
2153
+ rb_define_method(rb_cNode, "at_css", RUBY_METHOD_FUNC(node_at_css), 1);
2154
+ rb_define_alias(rb_cNode, "get_attribute", "[]");
2155
+ rb_define_alias(rb_cNode, "attribute", "[]");
1191
2156
 
1192
2157
  rb_cElement = rb_define_class_under(rb_mXML, "Element", rb_cNode);
1193
2158
  rb_undef_alloc_func(rb_cElement);
@@ -1198,10 +2163,17 @@ static VALUE document_validate(VALUE self, VALUE rb_schema) {
1198
2163
  rb_cNodeSet = rb_define_class_under(rb_mXML, "NodeSet", rb_cObject);
1199
2164
  rb_undef_alloc_func(rb_cNodeSet);
1200
2165
  rb_define_method(rb_cNodeSet, "length", RUBY_METHOD_FUNC(nodeset_length), 0);
1201
- rb_define_method(rb_cNodeSet, "size", RUBY_METHOD_FUNC(nodeset_length), 0);
2166
+ rb_define_alias(rb_cNodeSet, "size", "length");
1202
2167
  rb_define_method(rb_cNodeSet, "[]", RUBY_METHOD_FUNC(nodeset_at), 1);
2168
+ rb_define_method(rb_cNodeSet, "first", RUBY_METHOD_FUNC(nodeset_first), 0);
2169
+ rb_define_method(rb_cNodeSet, "last", RUBY_METHOD_FUNC(nodeset_last), 0);
2170
+ rb_define_method(rb_cNodeSet, "empty?", RUBY_METHOD_FUNC(nodeset_empty_p), 0);
1203
2171
  rb_define_method(rb_cNodeSet, "each", RUBY_METHOD_FUNC(nodeset_each), 0);
1204
2172
  rb_define_method(rb_cNodeSet, "to_a", RUBY_METHOD_FUNC(nodeset_to_a), 0);
2173
+ rb_define_method(rb_cNodeSet, "text", RUBY_METHOD_FUNC(nodeset_text), 0);
2174
+ rb_define_method(rb_cNodeSet, "inner_html", RUBY_METHOD_FUNC(nodeset_inner_html), 0);
2175
+ rb_define_method(rb_cNodeSet, "inspect", RUBY_METHOD_FUNC(nodeset_inspect), 0);
2176
+ rb_define_alias(rb_cNodeSet, "to_s", "inspect");
1205
2177
  rb_include_module(rb_cNodeSet, rb_mEnumerable);
1206
2178
 
1207
2179
  rb_cSchema = rb_define_class_under(rb_mXML, "Schema", rb_cObject);