nokogiri 1.16.8 → 1.18.2

Sign up to get free protection for your applications and to get access to all the features.
Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +6 -6
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +163 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +134 -103
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +54 -58
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  37. data/gumbo-parser/src/ascii.c +2 -2
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +63 -25
  42. data/gumbo-parser/src/tokenizer.c +6 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -6
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +44 -23
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -72
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/xml/builder.rb +8 -1
  68. data/lib/nokogiri/xml/document.rb +70 -26
  69. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  70. data/lib/nokogiri/xml/node.rb +82 -11
  71. data/lib/nokogiri/xml/node_set.rb +9 -7
  72. data/lib/nokogiri/xml/parse_options.rb +1 -1
  73. data/lib/nokogiri/xml/pp/node.rb +6 -1
  74. data/lib/nokogiri/xml/reader.rb +46 -13
  75. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  76. data/lib/nokogiri/xml/sax/document.rb +174 -83
  77. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  78. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  79. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  80. data/lib/nokogiri/xml/sax.rb +48 -0
  81. data/lib/nokogiri/xml/schema.rb +112 -45
  82. data/lib/nokogiri/xml/searchable.rb +38 -42
  83. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  85. data/lib/nokogiri/xml.rb +13 -24
  86. data/lib/nokogiri/xslt.rb +3 -9
  87. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  88. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  89. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  90. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  91. metadata +13 -12
  92. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  93. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  94. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  95. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -38,8 +38,8 @@ _xml_node_update_references(void *ptr)
38
38
  }
39
39
  }
40
40
 
41
- static const rb_data_type_t nokogiri_node_type = {
42
- .wrap_struct_name = "Nokogiri::XML::Node",
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
43
43
  .function = {
44
44
  .dmark = _xml_node_mark,
45
45
  .dcompact = _xml_node_update_references,
@@ -47,6 +47,24 @@ static const rb_data_type_t nokogiri_node_type = {
47
47
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
48
  };
49
49
 
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
50
68
  static void
51
69
  relink_namespace(xmlNodePtr reparented)
52
70
  {
@@ -141,7 +159,7 @@ relink_namespace(xmlNodePtr reparented)
141
159
  /* reparent. */
142
160
  if (NULL == reparented->ns) { return; }
143
161
 
144
- /* When a node gets reparented, walk it's children to make sure that */
162
+ /* When a node gets reparented, walk its children to make sure that */
145
163
  /* their namespaces are reparented as well. */
146
164
  child = reparented->children;
147
165
  while (NULL != child) {
@@ -944,51 +962,30 @@ internal_subset(VALUE self)
944
962
  return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
945
963
  }
946
964
 
947
- /*
948
- * :call-seq:
949
- * dup → Nokogiri::XML::Node
950
- * dup(depth) → Nokogiri::XML::Node
951
- * dup(depth, new_parent_doc) → Nokogiri::XML::Node
952
- *
953
- * Copy this node.
954
- *
955
- * [Parameters]
956
- * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
957
- * - +new_parent_doc+
958
- * The new node's parent Document. Defaults to the this node's document.
959
- *
960
- * [Returns] The new Nokogiri::XML::Node
961
- */
965
+ /* :nodoc: */
962
966
  static VALUE
963
- duplicate_node(int argc, VALUE *argv, VALUE self)
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
964
968
  {
965
- VALUE r_level, r_new_parent_doc;
966
- int level;
967
- int n_args;
968
- xmlDocPtr new_parent_doc;
969
- xmlNodePtr node, dup;
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
972
+ VALUE rb_node_cache;
970
973
 
971
- Noko_Node_Get_Struct(self, xmlNode, node);
974
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
+ c_level = (int)NUM2INT(rb_level);
976
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
972
977
 
973
- n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
978
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
979
+ if (c_self == NULL) { return Qnil; }
974
980
 
975
- if (n_args < 1) {
976
- r_level = INT2NUM((long)1);
977
- }
978
- level = (int)NUM2INT(r_level);
981
+ _xml_node_data_ptr_set(rb_self, c_self);
982
+ noko_xml_document_pin_node(c_self);
979
983
 
980
- if (n_args < 2) {
981
- new_parent_doc = node->doc;
982
- } else {
983
- new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
984
- }
984
+ rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
985
+ rb_ary_push(rb_node_cache, rb_self);
986
+ rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
985
987
 
986
- dup = xmlDocCopyNode(node, new_parent_doc, level);
987
- if (dup == NULL) { return Qnil; }
988
-
989
- noko_xml_document_pin_node(dup);
990
-
991
- return noko_xml_node_wrap(rb_obj_class(self), dup);
988
+ return rb_self;
992
989
  }
993
990
 
994
991
  /*
@@ -1074,17 +1071,10 @@ previous_element(VALUE self)
1074
1071
  xmlNodePtr node, sibling;
1075
1072
  Noko_Node_Get_Struct(self, xmlNode, node);
1076
1073
 
1077
- /*
1078
- * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
1079
- */
1080
- sibling = node->prev;
1074
+ sibling = xmlPreviousElementSibling(node);
1081
1075
  if (!sibling) { return Qnil; }
1082
1076
 
1083
- while (sibling && sibling->type != XML_ELEMENT_NODE) {
1084
- sibling = sibling->prev;
1085
- }
1086
-
1087
- return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
1077
+ return noko_xml_node_wrap(Qnil, sibling);
1088
1078
  }
1089
1079
 
1090
1080
  /* :nodoc: */
@@ -1500,9 +1490,44 @@ node_type(VALUE self)
1500
1490
 
1501
1491
  /*
1502
1492
  * call-seq:
1503
- * content=
1493
+ * native_content=(input)
1494
+ *
1495
+ * Set the content of this node to +input+.
1496
+ *
1497
+ * [Parameters]
1498
+ * - +input+ (String) The new content for this node.
1499
+ *
1500
+ * ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1501
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1502
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1503
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1504
+ * entity-escaped.
1504
1505
  *
1505
- * Set the content for this Node
1506
+ * 💡 Use Node#content= for a more consistent API across node types.
1507
+ *
1508
+ * [Example]
1509
+ * Note the behavior differences of this method between Text and Element nodes:
1510
+ *
1511
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1512
+ * <html>
1513
+ * <body>
1514
+ * <div id="first">asdf</div>
1515
+ * <div id="second">asdf</div>
1516
+ * HTML
1517
+ *
1518
+ * text_node = doc.at_css("div#first").children.first
1519
+ * div_node = doc.at_css("div#second")
1520
+ *
1521
+ * value = "You &amp; Me"
1522
+ *
1523
+ * text_node.native_content = value
1524
+ * div_node.native_content = value
1525
+ *
1526
+ * doc.css("div").to_html
1527
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1528
+ * # <div id=\"second\">You &amp; Me</div>"
1529
+ *
1530
+ * See also: #content=
1506
1531
  */
1507
1532
  static VALUE
1508
1533
  set_native_content(VALUE self, VALUE content)
@@ -1813,12 +1838,12 @@ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1813
1838
  ++next;
1814
1839
  continue;
1815
1840
  }
1816
- output_partial_string(out, (char const *)start, next - start);
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1817
1842
  output_string(out, replacement);
1818
1843
  next += replaced_bytes;
1819
1844
  start = next;
1820
1845
  }
1821
- output_partial_string(out, (char const *)start, next - start);
1846
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1822
1847
  }
1823
1848
 
1824
1849
  static bool
@@ -1889,17 +1914,7 @@ output_node(
1889
1914
  // Add attributes.
1890
1915
  for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1891
1916
  output_char(out, ' ');
1892
- output_attr_name(out, attr);
1893
- if (attr->children) {
1894
- output_string(out, "=\"");
1895
- xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1896
- output_escaped_string(out, value, true);
1897
- xmlFree(value);
1898
- output_char(out, '"');
1899
- } else {
1900
- // Output name=""
1901
- output_string(out, "=\"\"");
1902
- }
1917
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1903
1918
  }
1904
1919
  output_char(out, '>');
1905
1920
 
@@ -1917,6 +1932,22 @@ output_node(
1917
1932
  }
1918
1933
  break;
1919
1934
 
1935
+ case XML_ATTRIBUTE_NODE: {
1936
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1937
+ output_attr_name(out, attr);
1938
+ if (attr->children) {
1939
+ output_string(out, "=\"");
1940
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1941
+ output_escaped_string(out, value, true);
1942
+ xmlFree(value);
1943
+ output_char(out, '"');
1944
+ } else {
1945
+ // Output name=""
1946
+ output_string(out, "=\"\"");
1947
+ }
1948
+ }
1949
+ break;
1950
+
1920
1951
  case XML_TEXT_NODE:
1921
1952
  if (node->parent
1922
1953
  && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
@@ -2032,11 +2063,11 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2032
2063
  // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2033
2064
  // search for "psvi" in SAX2.c and tree.c to learn more.
2034
2065
  if (line_number < 65535) {
2035
- c_node->line = (short) line_number;
2066
+ c_node->line = (short unsigned)line_number;
2036
2067
  } else {
2037
2068
  c_node->line = 65535;
2038
2069
  if (c_node->type == XML_TEXT_NODE) {
2039
- c_node->psvi = (void *)(ptrdiff_t) line_number;
2070
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2040
2071
  }
2041
2072
  }
2042
2073
 
@@ -2096,7 +2127,7 @@ dump_html(VALUE self)
2096
2127
 
2097
2128
  buf = xmlBufferCreate() ;
2098
2129
  htmlNodeDump(buf, node->doc, node);
2099
- html = NOKOGIRI_STR_NEW2(buf->content);
2130
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
2100
2131
  xmlBufferFree(buf);
2101
2132
  return html ;
2102
2133
  }
@@ -2120,36 +2151,38 @@ compare(VALUE self, VALUE _other)
2120
2151
 
2121
2152
  /*
2122
2153
  * call-seq:
2123
- * process_xincludes(options)
2154
+ * process_xincludes(flags)
2124
2155
  *
2125
2156
  * Loads and substitutes all xinclude elements below the node. The
2126
- * parser context will be initialized with +options+.
2157
+ * parser context will be initialized with +flags+.
2127
2158
  */
2128
2159
  static VALUE
2129
- process_xincludes(VALUE self, VALUE options)
2160
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2130
2161
  {
2131
- int rcode ;
2132
- xmlNodePtr node;
2133
- VALUE error_list = rb_ary_new();
2162
+ int status ;
2163
+ xmlNodePtr c_node;
2164
+ VALUE rb_errors = rb_ary_new();
2165
+ libxmlStructuredErrorHandlerState handler_state;
2134
2166
 
2135
- Noko_Node_Get_Struct(self, xmlNode, node);
2167
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2136
2168
 
2137
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
2138
- rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
2139
- xmlSetStructuredErrorFunc(NULL, NULL);
2169
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2170
+
2171
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2172
+
2173
+ noko__structured_error_func_restore(&handler_state);
2140
2174
 
2141
- if (rcode < 0) {
2142
- xmlErrorConstPtr error;
2175
+ if (status < 0) {
2176
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2143
2177
 
2144
- error = xmlGetLastError();
2145
- if (error) {
2146
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
2178
+ if (RB_TEST(exception)) {
2179
+ rb_exc_raise(exception);
2147
2180
  } else {
2148
2181
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2149
2182
  }
2150
2183
  }
2151
2184
 
2152
- return self;
2185
+ return rb_node;
2153
2186
  }
2154
2187
 
2155
2188
 
@@ -2171,16 +2204,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2171
2204
  node_children = node->children;
2172
2205
  doc_children = node->doc->children;
2173
2206
 
2174
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
2175
-
2176
- /* Twiddle global variable because of a bug in libxml2.
2177
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2178
- *
2179
- * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
2180
- */
2181
- #ifndef HTML_PARSE_NOIMPLIED
2182
- htmlHandleOmittedElem(0);
2183
- #endif
2207
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
2184
2208
 
2185
2209
  /* This function adds a fake node to the child of +node+. If the parser
2186
2210
  * does not exit cleanly with XML_ERR_OK, the list is freed. This can
@@ -2210,10 +2234,6 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2210
2234
  child_iter = child_iter->next;
2211
2235
  }
2212
2236
 
2213
- #ifndef HTML_PARSE_NOIMPLIED
2214
- htmlHandleOmittedElem(1);
2215
- #endif
2216
-
2217
2237
  xmlSetStructuredErrorFunc(NULL, NULL);
2218
2238
 
2219
2239
  /*
@@ -2262,6 +2282,15 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2262
2282
  return noko_xml_node_set_wrap(set, doc);
2263
2283
  }
2264
2284
 
2285
+ /* :nodoc: */
2286
+ VALUE
2287
+ rb_xml_node_data_ptr_eh(VALUE self)
2288
+ {
2289
+ xmlNodePtr c_node;
2290
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
+ return c_node ? Qtrue : Qfalse;
2292
+ }
2293
+
2265
2294
  VALUE
2266
2295
  noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2267
2296
  {
@@ -2327,8 +2356,8 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2327
2356
  }
2328
2357
  }
2329
2358
 
2330
- rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2331
- c_node->_private = (void *)rb_node;
2359
+ rb_node = _xml_node_alloc(rb_class);
2360
+ _xml_node_data_ptr_set(rb_node, c_node);
2332
2361
 
2333
2362
  if (node_has_a_document) {
2334
2363
  rb_document = DOC_RUBY_OBJECT(c_doc);
@@ -2364,7 +2393,7 @@ noko_init_xml_node(void)
2364
2393
  {
2365
2394
  cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2366
2395
 
2367
- rb_undef_alloc_func(cNokogiriXmlNode);
2396
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2368
2397
 
2369
2398
  rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2370
2399
 
@@ -2378,8 +2407,8 @@ noko_init_xml_node(void)
2378
2407
  rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2379
2408
  rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2380
2409
  rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2410
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2381
2411
  rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2382
- rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
2383
2412
  rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2384
2413
  rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2385
2414
  rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
@@ -2408,6 +2437,8 @@ noko_init_xml_node(void)
2408
2437
  rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2409
2438
  rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2410
2439
 
2440
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2441
+
2411
2442
  rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2412
2443
  rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2413
2444
  rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
@@ -2418,7 +2449,7 @@ noko_init_xml_node(void)
2418
2449
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2419
2450
  rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2420
2451
  rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2421
- rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2452
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2422
2453
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2423
2454
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2424
2455
  rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
@@ -68,15 +68,8 @@ xml_node_set_deallocate(void *data)
68
68
  xmlFree(node_set);
69
69
  }
70
70
 
71
-
72
- static VALUE
73
- xml_node_set_allocate(VALUE klass)
74
- {
75
- return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil);
76
- }
77
-
78
71
  static const rb_data_type_t xml_node_set_type = {
79
- .wrap_struct_name = "Nokogiri::XML::NodeSet",
72
+ .wrap_struct_name = "xmlNodeSet",
80
73
  .function = {
81
74
  .dmark = xml_node_set_mark,
82
75
  .dfree = xml_node_set_deallocate,
@@ -84,6 +77,33 @@ static const rb_data_type_t xml_node_set_type = {
84
77
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
85
78
  };
86
79
 
80
+ static VALUE
81
+ xml_node_set_allocate(VALUE klass)
82
+ {
83
+ return TypedData_Wrap_Struct(klass, &xml_node_set_type, xmlXPathNodeSetCreate(NULL));
84
+ }
85
+
86
+ /* :nodoc: */
87
+ static VALUE
88
+ rb_xml_node_set_initialize_copy(VALUE rb_self, VALUE rb_other)
89
+ {
90
+ xmlNodeSetPtr c_self, c_other;
91
+ VALUE rb_document;
92
+
93
+ TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
94
+ TypedData_Get_Struct(rb_other, xmlNodeSet, &xml_node_set_type, c_other);
95
+
96
+ xmlXPathNodeSetMerge(c_self, c_other);
97
+
98
+ rb_document = rb_iv_get(rb_other, "@document");
99
+ if (!NIL_P(rb_document)) {
100
+ rb_iv_set(rb_self, "@document", rb_document);
101
+ rb_funcall(rb_document, decorate, 1, rb_self);
102
+ }
103
+
104
+ return rb_self;
105
+ }
106
+
87
107
  static void
88
108
  xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
89
109
  {
@@ -112,27 +132,6 @@ xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
112
132
  cur->nodeTab[cur->nodeNr] = NULL;
113
133
  }
114
134
 
115
-
116
- /*
117
- * call-seq:
118
- * dup
119
- *
120
- * Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not
121
- * duplicated (similar to how Array and other Enumerable classes work).
122
- */
123
- static VALUE
124
- duplicate(VALUE rb_self)
125
- {
126
- xmlNodeSetPtr c_self;
127
- xmlNodeSetPtr dupl;
128
-
129
- TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
130
-
131
- dupl = xmlXPathNodeSetMerge(NULL, c_self);
132
-
133
- return noko_xml_node_set_wrap(dupl, rb_iv_get(rb_self, "@document"));
134
- }
135
-
136
135
  /*
137
136
  * call-seq:
138
137
  * length
@@ -453,19 +452,21 @@ noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document)
453
452
  VALUE rb_node_set ;
454
453
 
455
454
  if (c_node_set == NULL) {
456
- c_node_set = xmlXPathNodeSetCreate(NULL);
455
+ rb_node_set = xml_node_set_allocate(cNokogiriXmlNodeSet);
456
+ } else {
457
+ rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
457
458
  }
458
459
 
459
- rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
460
-
461
460
  if (!NIL_P(document)) {
462
461
  rb_iv_set(rb_node_set, "@document", document);
463
462
  rb_funcall(document, decorate, 1, rb_node_set);
464
463
  }
465
464
 
466
- /* make sure we create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
- for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
- noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
465
+ if (c_node_set) {
466
+ /* create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
+ for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
+ noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
469
+ }
469
470
  }
470
471
 
471
472
  return rb_node_set ;
@@ -499,18 +500,19 @@ noko_init_xml_node_set(void)
499
500
 
500
501
  rb_define_alloc_func(cNokogiriXmlNodeSet, xml_node_set_allocate);
501
502
 
502
- rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
503
- rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
504
- rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
505
- rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
506
- rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
503
+ rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
507
504
  rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1);
508
- rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
509
- rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
510
- rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0);
505
+ rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
511
506
  rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1);
512
- rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
513
507
  rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1);
508
+ rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
509
+ rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
510
+ rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
511
+ rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
512
+ rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
513
+ rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
514
+
515
+ rb_define_private_method(cNokogiriXmlNodeSet, "initialize_copy", rb_xml_node_set_initialize_copy, 1);
514
516
 
515
517
  decorate = rb_intern("decorate");
516
518
  }