nokogiri 1.16.7-x86_64-linux → 1.17.0-x86_64-linux

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/nokogiri/extconf.rb +191 -137
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  12. data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
  13. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
  14. data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
  15. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
  16. data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
  17. data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
  18. data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
  19. data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
  20. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
  21. data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
  22. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
  23. data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
  24. data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
  25. data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
  26. data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
  27. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
  28. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
  29. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
  30. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
  31. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
  32. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
  33. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
  34. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
  35. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
  36. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
  37. data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
  38. data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
  39. data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
  40. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  41. data/ext/nokogiri/nokogiri.c +9 -2
  42. data/ext/nokogiri/nokogiri.h +18 -33
  43. data/ext/nokogiri/xml_attr.c +1 -1
  44. data/ext/nokogiri/xml_cdata.c +2 -10
  45. data/ext/nokogiri/xml_comment.c +3 -8
  46. data/ext/nokogiri/xml_document.c +163 -156
  47. data/ext/nokogiri/xml_document_fragment.c +10 -25
  48. data/ext/nokogiri/xml_dtd.c +1 -1
  49. data/ext/nokogiri/xml_element_content.c +9 -9
  50. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  51. data/ext/nokogiri/xml_namespace.c +6 -6
  52. data/ext/nokogiri/xml_node.c +137 -105
  53. data/ext/nokogiri/xml_node_set.c +46 -44
  54. data/ext/nokogiri/xml_reader.c +54 -58
  55. data/ext/nokogiri/xml_relax_ng.c +35 -56
  56. data/ext/nokogiri/xml_sax_parser.c +156 -88
  57. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  58. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  59. data/ext/nokogiri/xml_schema.c +50 -85
  60. data/ext/nokogiri/xml_syntax_error.c +19 -11
  61. data/ext/nokogiri/xml_text.c +2 -4
  62. data/ext/nokogiri/xml_xpath_context.c +2 -2
  63. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  64. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  65. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  66. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  67. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  68. data/lib/nokogiri/class_resolver.rb +1 -1
  69. data/lib/nokogiri/css/node.rb +6 -2
  70. data/lib/nokogiri/css/parser.rb +6 -4
  71. data/lib/nokogiri/css/parser.y +2 -2
  72. data/lib/nokogiri/css/parser_extras.rb +6 -66
  73. data/lib/nokogiri/css/selector_cache.rb +38 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -4
  75. data/lib/nokogiri/css/tokenizer.rex +9 -8
  76. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  77. data/lib/nokogiri/css.rb +86 -20
  78. data/lib/nokogiri/decorators/slop.rb +3 -5
  79. data/lib/nokogiri/encoding_handler.rb +2 -2
  80. data/lib/nokogiri/html4/document.rb +44 -23
  81. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  82. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  83. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  84. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  85. data/lib/nokogiri/html4.rb +9 -14
  86. data/lib/nokogiri/html5/builder.rb +40 -0
  87. data/lib/nokogiri/html5/document.rb +61 -30
  88. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  89. data/lib/nokogiri/html5/node.rb +4 -4
  90. data/lib/nokogiri/html5.rb +114 -72
  91. data/lib/nokogiri/version/constant.rb +1 -1
  92. data/lib/nokogiri/xml/builder.rb +8 -1
  93. data/lib/nokogiri/xml/document.rb +70 -26
  94. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  95. data/lib/nokogiri/xml/node.rb +82 -11
  96. data/lib/nokogiri/xml/node_set.rb +9 -7
  97. data/lib/nokogiri/xml/parse_options.rb +1 -1
  98. data/lib/nokogiri/xml/pp/node.rb +6 -1
  99. data/lib/nokogiri/xml/reader.rb +46 -13
  100. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  101. data/lib/nokogiri/xml/sax/document.rb +174 -83
  102. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  103. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  104. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  105. data/lib/nokogiri/xml/sax.rb +48 -0
  106. data/lib/nokogiri/xml/schema.rb +112 -45
  107. data/lib/nokogiri/xml/searchable.rb +6 -8
  108. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  109. data/lib/nokogiri/xml.rb +13 -24
  110. data/lib/nokogiri/xslt.rb +3 -9
  111. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  112. metadata +8 -5
  113. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -38,8 +38,8 @@ _xml_node_update_references(void *ptr)
38
38
  }
39
39
  }
40
40
 
41
- static const rb_data_type_t nokogiri_node_type = {
42
- .wrap_struct_name = "Nokogiri::XML::Node",
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
43
43
  .function = {
44
44
  .dmark = _xml_node_mark,
45
45
  .dcompact = _xml_node_update_references,
@@ -47,6 +47,24 @@ static const rb_data_type_t nokogiri_node_type = {
47
47
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
48
  };
49
49
 
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
50
68
  static void
51
69
  relink_namespace(xmlNodePtr reparented)
52
70
  {
@@ -141,7 +159,7 @@ relink_namespace(xmlNodePtr reparented)
141
159
  /* reparent. */
142
160
  if (NULL == reparented->ns) { return; }
143
161
 
144
- /* When a node gets reparented, walk it's children to make sure that */
162
+ /* When a node gets reparented, walk its children to make sure that */
145
163
  /* their namespaces are reparented as well. */
146
164
  child = reparented->children;
147
165
  while (NULL != child) {
@@ -944,51 +962,25 @@ internal_subset(VALUE self)
944
962
  return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
945
963
  }
946
964
 
947
- /*
948
- * :call-seq:
949
- * dup → Nokogiri::XML::Node
950
- * dup(depth) → Nokogiri::XML::Node
951
- * dup(depth, new_parent_doc) → Nokogiri::XML::Node
952
- *
953
- * Copy this node.
954
- *
955
- * [Parameters]
956
- * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
957
- * - +new_parent_doc+
958
- * The new node's parent Document. Defaults to the this node's document.
959
- *
960
- * [Returns] The new Nokogiri::XML::Node
961
- */
965
+ /* :nodoc: */
962
966
  static VALUE
963
- duplicate_node(int argc, VALUE *argv, VALUE self)
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
964
968
  {
965
- VALUE r_level, r_new_parent_doc;
966
- int level;
967
- int n_args;
968
- xmlDocPtr new_parent_doc;
969
- xmlNodePtr node, dup;
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
970
972
 
971
- Noko_Node_Get_Struct(self, xmlNode, node);
972
-
973
- n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
974
-
975
- if (n_args < 1) {
976
- r_level = INT2NUM((long)1);
977
- }
978
- level = (int)NUM2INT(r_level);
979
-
980
- if (n_args < 2) {
981
- new_parent_doc = node->doc;
982
- } else {
983
- new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
984
- }
973
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
974
+ c_level = (int)NUM2INT(rb_level);
975
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
985
976
 
986
- dup = xmlDocCopyNode(node, new_parent_doc, level);
987
- if (dup == NULL) { return Qnil; }
977
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
978
+ if (c_self == NULL) { return Qnil; }
988
979
 
989
- noko_xml_document_pin_node(dup);
980
+ _xml_node_data_ptr_set(rb_self, c_self);
981
+ noko_xml_document_pin_node(c_self);
990
982
 
991
- return noko_xml_node_wrap(rb_obj_class(self), dup);
983
+ return rb_self;
992
984
  }
993
985
 
994
986
  /*
@@ -1074,17 +1066,10 @@ previous_element(VALUE self)
1074
1066
  xmlNodePtr node, sibling;
1075
1067
  Noko_Node_Get_Struct(self, xmlNode, node);
1076
1068
 
1077
- /*
1078
- * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
1079
- */
1080
- sibling = node->prev;
1069
+ sibling = xmlPreviousElementSibling(node);
1081
1070
  if (!sibling) { return Qnil; }
1082
1071
 
1083
- while (sibling && sibling->type != XML_ELEMENT_NODE) {
1084
- sibling = sibling->prev;
1085
- }
1086
-
1087
- return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
1072
+ return noko_xml_node_wrap(Qnil, sibling);
1088
1073
  }
1089
1074
 
1090
1075
  /* :nodoc: */
@@ -1500,9 +1485,44 @@ node_type(VALUE self)
1500
1485
 
1501
1486
  /*
1502
1487
  * call-seq:
1503
- * content=
1488
+ * native_content=(input)
1489
+ *
1490
+ * Set the content of this node to +input+.
1491
+ *
1492
+ * [Parameters]
1493
+ * - +input+ (String) The new content for this node.
1494
+ *
1495
+ * ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1496
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1497
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1498
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1499
+ * entity-escaped.
1500
+ *
1501
+ * 💡 Use Node#content= for a more consistent API across node types.
1502
+ *
1503
+ * [Example]
1504
+ * Note the behavior differences of this method between Text and Element nodes:
1504
1505
  *
1505
- * Set the content for this Node
1506
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1507
+ * <html>
1508
+ * <body>
1509
+ * <div id="first">asdf</div>
1510
+ * <div id="second">asdf</div>
1511
+ * HTML
1512
+ *
1513
+ * text_node = doc.at_css("div#first").children.first
1514
+ * div_node = doc.at_css("div#second")
1515
+ *
1516
+ * value = "You &amp; Me"
1517
+ *
1518
+ * text_node.native_content = value
1519
+ * div_node.native_content = value
1520
+ *
1521
+ * doc.css("div").to_html
1522
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1523
+ * # <div id=\"second\">You &amp; Me</div>"
1524
+ *
1525
+ * See also: #content=
1506
1526
  */
1507
1527
  static VALUE
1508
1528
  set_native_content(VALUE self, VALUE content)
@@ -1813,12 +1833,12 @@ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1813
1833
  ++next;
1814
1834
  continue;
1815
1835
  }
1816
- output_partial_string(out, (char const *)start, next - start);
1836
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1817
1837
  output_string(out, replacement);
1818
1838
  next += replaced_bytes;
1819
1839
  start = next;
1820
1840
  }
1821
- output_partial_string(out, (char const *)start, next - start);
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1822
1842
  }
1823
1843
 
1824
1844
  static bool
@@ -1849,13 +1869,19 @@ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1849
1869
  if (name == NULL) { // fragments don't have a name
1850
1870
  return false;
1851
1871
  }
1872
+
1873
+ if (node->ns != NULL) {
1874
+ // if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
1875
+ // matching against.
1876
+ return false;
1877
+ }
1878
+
1852
1879
  for (size_t idx = 0; idx < num_tagnames; ++idx) {
1853
1880
  if (!strcmp(name, tagnames[idx])) {
1854
1881
  return true;
1855
1882
  }
1856
1883
  }
1857
1884
  return false;
1858
-
1859
1885
  }
1860
1886
 
1861
1887
  static void
@@ -1883,17 +1909,7 @@ output_node(
1883
1909
  // Add attributes.
1884
1910
  for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1885
1911
  output_char(out, ' ');
1886
- output_attr_name(out, attr);
1887
- if (attr->children) {
1888
- output_string(out, "=\"");
1889
- xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1890
- output_escaped_string(out, value, true);
1891
- xmlFree(value);
1892
- output_char(out, '"');
1893
- } else {
1894
- // Output name=""
1895
- output_string(out, "=\"\"");
1896
- }
1912
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1897
1913
  }
1898
1914
  output_char(out, '>');
1899
1915
 
@@ -1911,6 +1927,22 @@ output_node(
1911
1927
  }
1912
1928
  break;
1913
1929
 
1930
+ case XML_ATTRIBUTE_NODE: {
1931
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1932
+ output_attr_name(out, attr);
1933
+ if (attr->children) {
1934
+ output_string(out, "=\"");
1935
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1936
+ output_escaped_string(out, value, true);
1937
+ xmlFree(value);
1938
+ output_char(out, '"');
1939
+ } else {
1940
+ // Output name=""
1941
+ output_string(out, "=\"\"");
1942
+ }
1943
+ }
1944
+ break;
1945
+
1914
1946
  case XML_TEXT_NODE:
1915
1947
  if (node->parent
1916
1948
  && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
@@ -2026,11 +2058,11 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2026
2058
  // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2027
2059
  // search for "psvi" in SAX2.c and tree.c to learn more.
2028
2060
  if (line_number < 65535) {
2029
- c_node->line = (short) line_number;
2061
+ c_node->line = (short unsigned)line_number;
2030
2062
  } else {
2031
2063
  c_node->line = 65535;
2032
2064
  if (c_node->type == XML_TEXT_NODE) {
2033
- c_node->psvi = (void *)(ptrdiff_t) line_number;
2065
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2034
2066
  }
2035
2067
  }
2036
2068
 
@@ -2090,7 +2122,7 @@ dump_html(VALUE self)
2090
2122
 
2091
2123
  buf = xmlBufferCreate() ;
2092
2124
  htmlNodeDump(buf, node->doc, node);
2093
- html = NOKOGIRI_STR_NEW2(buf->content);
2125
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
2094
2126
  xmlBufferFree(buf);
2095
2127
  return html ;
2096
2128
  }
@@ -2114,36 +2146,38 @@ compare(VALUE self, VALUE _other)
2114
2146
 
2115
2147
  /*
2116
2148
  * call-seq:
2117
- * process_xincludes(options)
2149
+ * process_xincludes(flags)
2118
2150
  *
2119
2151
  * Loads and substitutes all xinclude elements below the node. The
2120
- * parser context will be initialized with +options+.
2152
+ * parser context will be initialized with +flags+.
2121
2153
  */
2122
2154
  static VALUE
2123
- process_xincludes(VALUE self, VALUE options)
2155
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2124
2156
  {
2125
- int rcode ;
2126
- xmlNodePtr node;
2127
- VALUE error_list = rb_ary_new();
2157
+ int status ;
2158
+ xmlNodePtr c_node;
2159
+ VALUE rb_errors = rb_ary_new();
2160
+ libxmlStructuredErrorHandlerState handler_state;
2128
2161
 
2129
- Noko_Node_Get_Struct(self, xmlNode, node);
2162
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2130
2163
 
2131
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
2132
- rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
2133
- xmlSetStructuredErrorFunc(NULL, NULL);
2164
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2165
+
2166
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2134
2167
 
2135
- if (rcode < 0) {
2136
- xmlErrorConstPtr error;
2168
+ noko__structured_error_func_restore(&handler_state);
2137
2169
 
2138
- error = xmlGetLastError();
2139
- if (error) {
2140
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
2170
+ if (status < 0) {
2171
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2172
+
2173
+ if (RB_TEST(exception)) {
2174
+ rb_exc_raise(exception);
2141
2175
  } else {
2142
2176
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2143
2177
  }
2144
2178
  }
2145
2179
 
2146
- return self;
2180
+ return rb_node;
2147
2181
  }
2148
2182
 
2149
2183
 
@@ -2165,16 +2199,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2165
2199
  node_children = node->children;
2166
2200
  doc_children = node->doc->children;
2167
2201
 
2168
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
2169
-
2170
- /* Twiddle global variable because of a bug in libxml2.
2171
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2172
- *
2173
- * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
2174
- */
2175
- #ifndef HTML_PARSE_NOIMPLIED
2176
- htmlHandleOmittedElem(0);
2177
- #endif
2202
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
2178
2203
 
2179
2204
  /* This function adds a fake node to the child of +node+. If the parser
2180
2205
  * does not exit cleanly with XML_ERR_OK, the list is freed. This can
@@ -2204,10 +2229,6 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2204
2229
  child_iter = child_iter->next;
2205
2230
  }
2206
2231
 
2207
- #ifndef HTML_PARSE_NOIMPLIED
2208
- htmlHandleOmittedElem(1);
2209
- #endif
2210
-
2211
2232
  xmlSetStructuredErrorFunc(NULL, NULL);
2212
2233
 
2213
2234
  /*
@@ -2256,6 +2277,15 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2256
2277
  return noko_xml_node_set_wrap(set, doc);
2257
2278
  }
2258
2279
 
2280
+ /* :nodoc: */
2281
+ VALUE
2282
+ rb_xml_node_data_ptr_eh(VALUE self)
2283
+ {
2284
+ xmlNodePtr c_node;
2285
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2286
+ return c_node ? Qtrue : Qfalse;
2287
+ }
2288
+
2259
2289
  VALUE
2260
2290
  noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2261
2291
  {
@@ -2321,8 +2351,8 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2321
2351
  }
2322
2352
  }
2323
2353
 
2324
- rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2325
- c_node->_private = (void *)rb_node;
2354
+ rb_node = _xml_node_alloc(rb_class);
2355
+ _xml_node_data_ptr_set(rb_node, c_node);
2326
2356
 
2327
2357
  if (node_has_a_document) {
2328
2358
  rb_document = DOC_RUBY_OBJECT(c_doc);
@@ -2358,7 +2388,7 @@ noko_init_xml_node(void)
2358
2388
  {
2359
2389
  cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2360
2390
 
2361
- rb_undef_alloc_func(cNokogiriXmlNode);
2391
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2362
2392
 
2363
2393
  rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2364
2394
 
@@ -2372,8 +2402,8 @@ noko_init_xml_node(void)
2372
2402
  rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2373
2403
  rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2374
2404
  rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2405
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2375
2406
  rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2376
- rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
2377
2407
  rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2378
2408
  rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2379
2409
  rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
@@ -2402,6 +2432,8 @@ noko_init_xml_node(void)
2402
2432
  rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2403
2433
  rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2404
2434
 
2435
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2436
+
2405
2437
  rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2406
2438
  rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2407
2439
  rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
@@ -2412,7 +2444,7 @@ noko_init_xml_node(void)
2412
2444
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2413
2445
  rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2414
2446
  rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2415
- rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2447
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2416
2448
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2417
2449
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2418
2450
  rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
@@ -68,15 +68,8 @@ xml_node_set_deallocate(void *data)
68
68
  xmlFree(node_set);
69
69
  }
70
70
 
71
-
72
- static VALUE
73
- xml_node_set_allocate(VALUE klass)
74
- {
75
- return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil);
76
- }
77
-
78
71
  static const rb_data_type_t xml_node_set_type = {
79
- .wrap_struct_name = "Nokogiri::XML::NodeSet",
72
+ .wrap_struct_name = "xmlNodeSet",
80
73
  .function = {
81
74
  .dmark = xml_node_set_mark,
82
75
  .dfree = xml_node_set_deallocate,
@@ -84,6 +77,33 @@ static const rb_data_type_t xml_node_set_type = {
84
77
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
85
78
  };
86
79
 
80
+ static VALUE
81
+ xml_node_set_allocate(VALUE klass)
82
+ {
83
+ return TypedData_Wrap_Struct(klass, &xml_node_set_type, xmlXPathNodeSetCreate(NULL));
84
+ }
85
+
86
+ /* :nodoc: */
87
+ static VALUE
88
+ rb_xml_node_set_initialize_copy(VALUE rb_self, VALUE rb_other)
89
+ {
90
+ xmlNodeSetPtr c_self, c_other;
91
+ VALUE rb_document;
92
+
93
+ TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
94
+ TypedData_Get_Struct(rb_other, xmlNodeSet, &xml_node_set_type, c_other);
95
+
96
+ xmlXPathNodeSetMerge(c_self, c_other);
97
+
98
+ rb_document = rb_iv_get(rb_other, "@document");
99
+ if (!NIL_P(rb_document)) {
100
+ rb_iv_set(rb_self, "@document", rb_document);
101
+ rb_funcall(rb_document, decorate, 1, rb_self);
102
+ }
103
+
104
+ return rb_self;
105
+ }
106
+
87
107
  static void
88
108
  xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
89
109
  {
@@ -112,27 +132,6 @@ xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
112
132
  cur->nodeTab[cur->nodeNr] = NULL;
113
133
  }
114
134
 
115
-
116
- /*
117
- * call-seq:
118
- * dup
119
- *
120
- * Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not
121
- * duplicated (similar to how Array and other Enumerable classes work).
122
- */
123
- static VALUE
124
- duplicate(VALUE rb_self)
125
- {
126
- xmlNodeSetPtr c_self;
127
- xmlNodeSetPtr dupl;
128
-
129
- TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
130
-
131
- dupl = xmlXPathNodeSetMerge(NULL, c_self);
132
-
133
- return noko_xml_node_set_wrap(dupl, rb_iv_get(rb_self, "@document"));
134
- }
135
-
136
135
  /*
137
136
  * call-seq:
138
137
  * length
@@ -453,19 +452,21 @@ noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document)
453
452
  VALUE rb_node_set ;
454
453
 
455
454
  if (c_node_set == NULL) {
456
- c_node_set = xmlXPathNodeSetCreate(NULL);
455
+ rb_node_set = xml_node_set_allocate(cNokogiriXmlNodeSet);
456
+ } else {
457
+ rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
457
458
  }
458
459
 
459
- rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
460
-
461
460
  if (!NIL_P(document)) {
462
461
  rb_iv_set(rb_node_set, "@document", document);
463
462
  rb_funcall(document, decorate, 1, rb_node_set);
464
463
  }
465
464
 
466
- /* make sure we create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
- for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
- noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
465
+ if (c_node_set) {
466
+ /* create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
+ for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
+ noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
469
+ }
469
470
  }
470
471
 
471
472
  return rb_node_set ;
@@ -499,18 +500,19 @@ noko_init_xml_node_set(void)
499
500
 
500
501
  rb_define_alloc_func(cNokogiriXmlNodeSet, xml_node_set_allocate);
501
502
 
502
- rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
503
- rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
504
- rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
505
- rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
506
- rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
503
+ rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
507
504
  rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1);
508
- rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
509
- rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
510
- rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0);
505
+ rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
511
506
  rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1);
512
- rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
513
507
  rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1);
508
+ rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
509
+ rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
510
+ rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
511
+ rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
512
+ rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
513
+ rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
514
+
515
+ rb_define_private_method(cNokogiriXmlNodeSet, "initialize_copy", rb_xml_node_set_initialize_copy, 1);
514
516
 
515
517
  decorate = rb_intern("decorate");
516
518
  }