nokogiri 1.16.8 → 1.18.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (95) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +14 -22
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +8 -5
  5. data/dependencies.yml +6 -6
  6. data/ext/nokogiri/extconf.rb +188 -142
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +18 -33
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +2 -10
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +163 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -6
  25. data/ext/nokogiri/xml_node.c +134 -103
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +54 -58
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +219 -131
  31. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  32. data/ext/nokogiri/xml_schema.c +50 -85
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +2 -4
  35. data/ext/nokogiri/xml_xpath_context.c +103 -100
  36. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  37. data/gumbo-parser/src/ascii.c +2 -2
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +63 -25
  42. data/gumbo-parser/src/tokenizer.c +6 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +44 -23
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -72
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/xml/builder.rb +8 -1
  68. data/lib/nokogiri/xml/document.rb +70 -26
  69. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  70. data/lib/nokogiri/xml/node.rb +82 -11
  71. data/lib/nokogiri/xml/node_set.rb +9 -7
  72. data/lib/nokogiri/xml/parse_options.rb +1 -1
  73. data/lib/nokogiri/xml/pp/node.rb +6 -1
  74. data/lib/nokogiri/xml/reader.rb +46 -13
  75. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  76. data/lib/nokogiri/xml/sax/document.rb +174 -83
  77. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  78. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  79. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  80. data/lib/nokogiri/xml/sax.rb +48 -0
  81. data/lib/nokogiri/xml/schema.rb +112 -45
  82. data/lib/nokogiri/xml/searchable.rb +38 -42
  83. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  84. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  85. data/lib/nokogiri/xml.rb +13 -24
  86. data/lib/nokogiri/xslt.rb +3 -9
  87. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  88. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  89. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  90. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  91. metadata +13 -12
  92. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  93. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  94. data/ports/archives/libxml2-2.12.9.tar.xz +0 -0
  95. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -38,8 +38,8 @@ _xml_node_update_references(void *ptr)
38
38
  }
39
39
  }
40
40
 
41
- static const rb_data_type_t nokogiri_node_type = {
42
- .wrap_struct_name = "Nokogiri::XML::Node",
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
43
43
  .function = {
44
44
  .dmark = _xml_node_mark,
45
45
  .dcompact = _xml_node_update_references,
@@ -47,6 +47,24 @@ static const rb_data_type_t nokogiri_node_type = {
47
47
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
48
48
  };
49
49
 
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
50
68
  static void
51
69
  relink_namespace(xmlNodePtr reparented)
52
70
  {
@@ -141,7 +159,7 @@ relink_namespace(xmlNodePtr reparented)
141
159
  /* reparent. */
142
160
  if (NULL == reparented->ns) { return; }
143
161
 
144
- /* When a node gets reparented, walk it's children to make sure that */
162
+ /* When a node gets reparented, walk its children to make sure that */
145
163
  /* their namespaces are reparented as well. */
146
164
  child = reparented->children;
147
165
  while (NULL != child) {
@@ -944,51 +962,30 @@ internal_subset(VALUE self)
944
962
  return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
945
963
  }
946
964
 
947
- /*
948
- * :call-seq:
949
- * dup → Nokogiri::XML::Node
950
- * dup(depth) → Nokogiri::XML::Node
951
- * dup(depth, new_parent_doc) → Nokogiri::XML::Node
952
- *
953
- * Copy this node.
954
- *
955
- * [Parameters]
956
- * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
957
- * - +new_parent_doc+
958
- * The new node's parent Document. Defaults to the this node's document.
959
- *
960
- * [Returns] The new Nokogiri::XML::Node
961
- */
965
+ /* :nodoc: */
962
966
  static VALUE
963
- duplicate_node(int argc, VALUE *argv, VALUE self)
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
964
968
  {
965
- VALUE r_level, r_new_parent_doc;
966
- int level;
967
- int n_args;
968
- xmlDocPtr new_parent_doc;
969
- xmlNodePtr node, dup;
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
972
+ VALUE rb_node_cache;
970
973
 
971
- Noko_Node_Get_Struct(self, xmlNode, node);
974
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
+ c_level = (int)NUM2INT(rb_level);
976
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
972
977
 
973
- n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
978
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
979
+ if (c_self == NULL) { return Qnil; }
974
980
 
975
- if (n_args < 1) {
976
- r_level = INT2NUM((long)1);
977
- }
978
- level = (int)NUM2INT(r_level);
981
+ _xml_node_data_ptr_set(rb_self, c_self);
982
+ noko_xml_document_pin_node(c_self);
979
983
 
980
- if (n_args < 2) {
981
- new_parent_doc = node->doc;
982
- } else {
983
- new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
984
- }
984
+ rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
985
+ rb_ary_push(rb_node_cache, rb_self);
986
+ rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
985
987
 
986
- dup = xmlDocCopyNode(node, new_parent_doc, level);
987
- if (dup == NULL) { return Qnil; }
988
-
989
- noko_xml_document_pin_node(dup);
990
-
991
- return noko_xml_node_wrap(rb_obj_class(self), dup);
988
+ return rb_self;
992
989
  }
993
990
 
994
991
  /*
@@ -1074,17 +1071,10 @@ previous_element(VALUE self)
1074
1071
  xmlNodePtr node, sibling;
1075
1072
  Noko_Node_Get_Struct(self, xmlNode, node);
1076
1073
 
1077
- /*
1078
- * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
1079
- */
1080
- sibling = node->prev;
1074
+ sibling = xmlPreviousElementSibling(node);
1081
1075
  if (!sibling) { return Qnil; }
1082
1076
 
1083
- while (sibling && sibling->type != XML_ELEMENT_NODE) {
1084
- sibling = sibling->prev;
1085
- }
1086
-
1087
- return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
1077
+ return noko_xml_node_wrap(Qnil, sibling);
1088
1078
  }
1089
1079
 
1090
1080
  /* :nodoc: */
@@ -1500,9 +1490,44 @@ node_type(VALUE self)
1500
1490
 
1501
1491
  /*
1502
1492
  * call-seq:
1503
- * content=
1493
+ * native_content=(input)
1494
+ *
1495
+ * Set the content of this node to +input+.
1496
+ *
1497
+ * [Parameters]
1498
+ * - +input+ (String) The new content for this node.
1499
+ *
1500
+ * ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1501
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1502
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1503
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1504
+ * entity-escaped.
1504
1505
  *
1505
- * Set the content for this Node
1506
+ * 💡 Use Node#content= for a more consistent API across node types.
1507
+ *
1508
+ * [Example]
1509
+ * Note the behavior differences of this method between Text and Element nodes:
1510
+ *
1511
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1512
+ * <html>
1513
+ * <body>
1514
+ * <div id="first">asdf</div>
1515
+ * <div id="second">asdf</div>
1516
+ * HTML
1517
+ *
1518
+ * text_node = doc.at_css("div#first").children.first
1519
+ * div_node = doc.at_css("div#second")
1520
+ *
1521
+ * value = "You &amp; Me"
1522
+ *
1523
+ * text_node.native_content = value
1524
+ * div_node.native_content = value
1525
+ *
1526
+ * doc.css("div").to_html
1527
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1528
+ * # <div id=\"second\">You &amp; Me</div>"
1529
+ *
1530
+ * See also: #content=
1506
1531
  */
1507
1532
  static VALUE
1508
1533
  set_native_content(VALUE self, VALUE content)
@@ -1813,12 +1838,12 @@ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1813
1838
  ++next;
1814
1839
  continue;
1815
1840
  }
1816
- output_partial_string(out, (char const *)start, next - start);
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1817
1842
  output_string(out, replacement);
1818
1843
  next += replaced_bytes;
1819
1844
  start = next;
1820
1845
  }
1821
- output_partial_string(out, (char const *)start, next - start);
1846
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1822
1847
  }
1823
1848
 
1824
1849
  static bool
@@ -1889,17 +1914,7 @@ output_node(
1889
1914
  // Add attributes.
1890
1915
  for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1891
1916
  output_char(out, ' ');
1892
- output_attr_name(out, attr);
1893
- if (attr->children) {
1894
- output_string(out, "=\"");
1895
- xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1896
- output_escaped_string(out, value, true);
1897
- xmlFree(value);
1898
- output_char(out, '"');
1899
- } else {
1900
- // Output name=""
1901
- output_string(out, "=\"\"");
1902
- }
1917
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1903
1918
  }
1904
1919
  output_char(out, '>');
1905
1920
 
@@ -1917,6 +1932,22 @@ output_node(
1917
1932
  }
1918
1933
  break;
1919
1934
 
1935
+ case XML_ATTRIBUTE_NODE: {
1936
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1937
+ output_attr_name(out, attr);
1938
+ if (attr->children) {
1939
+ output_string(out, "=\"");
1940
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1941
+ output_escaped_string(out, value, true);
1942
+ xmlFree(value);
1943
+ output_char(out, '"');
1944
+ } else {
1945
+ // Output name=""
1946
+ output_string(out, "=\"\"");
1947
+ }
1948
+ }
1949
+ break;
1950
+
1920
1951
  case XML_TEXT_NODE:
1921
1952
  if (node->parent
1922
1953
  && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
@@ -2032,11 +2063,11 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2032
2063
  // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2033
2064
  // search for "psvi" in SAX2.c and tree.c to learn more.
2034
2065
  if (line_number < 65535) {
2035
- c_node->line = (short) line_number;
2066
+ c_node->line = (short unsigned)line_number;
2036
2067
  } else {
2037
2068
  c_node->line = 65535;
2038
2069
  if (c_node->type == XML_TEXT_NODE) {
2039
- c_node->psvi = (void *)(ptrdiff_t) line_number;
2070
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2040
2071
  }
2041
2072
  }
2042
2073
 
@@ -2096,7 +2127,7 @@ dump_html(VALUE self)
2096
2127
 
2097
2128
  buf = xmlBufferCreate() ;
2098
2129
  htmlNodeDump(buf, node->doc, node);
2099
- html = NOKOGIRI_STR_NEW2(buf->content);
2130
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
2100
2131
  xmlBufferFree(buf);
2101
2132
  return html ;
2102
2133
  }
@@ -2120,36 +2151,38 @@ compare(VALUE self, VALUE _other)
2120
2151
 
2121
2152
  /*
2122
2153
  * call-seq:
2123
- * process_xincludes(options)
2154
+ * process_xincludes(flags)
2124
2155
  *
2125
2156
  * Loads and substitutes all xinclude elements below the node. The
2126
- * parser context will be initialized with +options+.
2157
+ * parser context will be initialized with +flags+.
2127
2158
  */
2128
2159
  static VALUE
2129
- process_xincludes(VALUE self, VALUE options)
2160
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2130
2161
  {
2131
- int rcode ;
2132
- xmlNodePtr node;
2133
- VALUE error_list = rb_ary_new();
2162
+ int status ;
2163
+ xmlNodePtr c_node;
2164
+ VALUE rb_errors = rb_ary_new();
2165
+ libxmlStructuredErrorHandlerState handler_state;
2134
2166
 
2135
- Noko_Node_Get_Struct(self, xmlNode, node);
2167
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2136
2168
 
2137
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
2138
- rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
2139
- xmlSetStructuredErrorFunc(NULL, NULL);
2169
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2170
+
2171
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2172
+
2173
+ noko__structured_error_func_restore(&handler_state);
2140
2174
 
2141
- if (rcode < 0) {
2142
- xmlErrorConstPtr error;
2175
+ if (status < 0) {
2176
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2143
2177
 
2144
- error = xmlGetLastError();
2145
- if (error) {
2146
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
2178
+ if (RB_TEST(exception)) {
2179
+ rb_exc_raise(exception);
2147
2180
  } else {
2148
2181
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2149
2182
  }
2150
2183
  }
2151
2184
 
2152
- return self;
2185
+ return rb_node;
2153
2186
  }
2154
2187
 
2155
2188
 
@@ -2171,16 +2204,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2171
2204
  node_children = node->children;
2172
2205
  doc_children = node->doc->children;
2173
2206
 
2174
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
2175
-
2176
- /* Twiddle global variable because of a bug in libxml2.
2177
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2178
- *
2179
- * TODO: this is fixed, and HTML_PARSE_NOIMPLIED is defined, in libxml2 2.7.7
2180
- */
2181
- #ifndef HTML_PARSE_NOIMPLIED
2182
- htmlHandleOmittedElem(0);
2183
- #endif
2207
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
2184
2208
 
2185
2209
  /* This function adds a fake node to the child of +node+. If the parser
2186
2210
  * does not exit cleanly with XML_ERR_OK, the list is freed. This can
@@ -2210,10 +2234,6 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2210
2234
  child_iter = child_iter->next;
2211
2235
  }
2212
2236
 
2213
- #ifndef HTML_PARSE_NOIMPLIED
2214
- htmlHandleOmittedElem(1);
2215
- #endif
2216
-
2217
2237
  xmlSetStructuredErrorFunc(NULL, NULL);
2218
2238
 
2219
2239
  /*
@@ -2262,6 +2282,15 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2262
2282
  return noko_xml_node_set_wrap(set, doc);
2263
2283
  }
2264
2284
 
2285
+ /* :nodoc: */
2286
+ VALUE
2287
+ rb_xml_node_data_ptr_eh(VALUE self)
2288
+ {
2289
+ xmlNodePtr c_node;
2290
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
+ return c_node ? Qtrue : Qfalse;
2292
+ }
2293
+
2265
2294
  VALUE
2266
2295
  noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2267
2296
  {
@@ -2327,8 +2356,8 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2327
2356
  }
2328
2357
  }
2329
2358
 
2330
- rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2331
- c_node->_private = (void *)rb_node;
2359
+ rb_node = _xml_node_alloc(rb_class);
2360
+ _xml_node_data_ptr_set(rb_node, c_node);
2332
2361
 
2333
2362
  if (node_has_a_document) {
2334
2363
  rb_document = DOC_RUBY_OBJECT(c_doc);
@@ -2364,7 +2393,7 @@ noko_init_xml_node(void)
2364
2393
  {
2365
2394
  cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2366
2395
 
2367
- rb_undef_alloc_func(cNokogiriXmlNode);
2396
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2368
2397
 
2369
2398
  rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2370
2399
 
@@ -2378,8 +2407,8 @@ noko_init_xml_node(void)
2378
2407
  rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2379
2408
  rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2380
2409
  rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2410
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2381
2411
  rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2382
- rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
2383
2412
  rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2384
2413
  rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2385
2414
  rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
@@ -2408,6 +2437,8 @@ noko_init_xml_node(void)
2408
2437
  rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2409
2438
  rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2410
2439
 
2440
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2441
+
2411
2442
  rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2412
2443
  rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2413
2444
  rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
@@ -2418,7 +2449,7 @@ noko_init_xml_node(void)
2418
2449
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2419
2450
  rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2420
2451
  rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2421
- rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2452
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2422
2453
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2423
2454
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2424
2455
  rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
@@ -68,15 +68,8 @@ xml_node_set_deallocate(void *data)
68
68
  xmlFree(node_set);
69
69
  }
70
70
 
71
-
72
- static VALUE
73
- xml_node_set_allocate(VALUE klass)
74
- {
75
- return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil);
76
- }
77
-
78
71
  static const rb_data_type_t xml_node_set_type = {
79
- .wrap_struct_name = "Nokogiri::XML::NodeSet",
72
+ .wrap_struct_name = "xmlNodeSet",
80
73
  .function = {
81
74
  .dmark = xml_node_set_mark,
82
75
  .dfree = xml_node_set_deallocate,
@@ -84,6 +77,33 @@ static const rb_data_type_t xml_node_set_type = {
84
77
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
85
78
  };
86
79
 
80
+ static VALUE
81
+ xml_node_set_allocate(VALUE klass)
82
+ {
83
+ return TypedData_Wrap_Struct(klass, &xml_node_set_type, xmlXPathNodeSetCreate(NULL));
84
+ }
85
+
86
+ /* :nodoc: */
87
+ static VALUE
88
+ rb_xml_node_set_initialize_copy(VALUE rb_self, VALUE rb_other)
89
+ {
90
+ xmlNodeSetPtr c_self, c_other;
91
+ VALUE rb_document;
92
+
93
+ TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
94
+ TypedData_Get_Struct(rb_other, xmlNodeSet, &xml_node_set_type, c_other);
95
+
96
+ xmlXPathNodeSetMerge(c_self, c_other);
97
+
98
+ rb_document = rb_iv_get(rb_other, "@document");
99
+ if (!NIL_P(rb_document)) {
100
+ rb_iv_set(rb_self, "@document", rb_document);
101
+ rb_funcall(rb_document, decorate, 1, rb_self);
102
+ }
103
+
104
+ return rb_self;
105
+ }
106
+
87
107
  static void
88
108
  xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
89
109
  {
@@ -112,27 +132,6 @@ xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
112
132
  cur->nodeTab[cur->nodeNr] = NULL;
113
133
  }
114
134
 
115
-
116
- /*
117
- * call-seq:
118
- * dup
119
- *
120
- * Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not
121
- * duplicated (similar to how Array and other Enumerable classes work).
122
- */
123
- static VALUE
124
- duplicate(VALUE rb_self)
125
- {
126
- xmlNodeSetPtr c_self;
127
- xmlNodeSetPtr dupl;
128
-
129
- TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
130
-
131
- dupl = xmlXPathNodeSetMerge(NULL, c_self);
132
-
133
- return noko_xml_node_set_wrap(dupl, rb_iv_get(rb_self, "@document"));
134
- }
135
-
136
135
  /*
137
136
  * call-seq:
138
137
  * length
@@ -453,19 +452,21 @@ noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document)
453
452
  VALUE rb_node_set ;
454
453
 
455
454
  if (c_node_set == NULL) {
456
- c_node_set = xmlXPathNodeSetCreate(NULL);
455
+ rb_node_set = xml_node_set_allocate(cNokogiriXmlNodeSet);
456
+ } else {
457
+ rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
457
458
  }
458
459
 
459
- rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
460
-
461
460
  if (!NIL_P(document)) {
462
461
  rb_iv_set(rb_node_set, "@document", document);
463
462
  rb_funcall(document, decorate, 1, rb_node_set);
464
463
  }
465
464
 
466
- /* make sure we create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
- for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
- noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
465
+ if (c_node_set) {
466
+ /* create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
+ for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
+ noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
469
+ }
469
470
  }
470
471
 
471
472
  return rb_node_set ;
@@ -499,18 +500,19 @@ noko_init_xml_node_set(void)
499
500
 
500
501
  rb_define_alloc_func(cNokogiriXmlNodeSet, xml_node_set_allocate);
501
502
 
502
- rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
503
- rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
504
- rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
505
- rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
506
- rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
503
+ rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
507
504
  rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1);
508
- rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
509
- rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
510
- rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0);
505
+ rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
511
506
  rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1);
512
- rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
513
507
  rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1);
508
+ rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
509
+ rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
510
+ rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
511
+ rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
512
+ rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
513
+ rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
514
+
515
+ rb_define_private_method(cNokogiriXmlNodeSet, "initialize_copy", rb_xml_node_set_initialize_copy, 1);
514
516
 
515
517
  decorate = rb_intern("decorate");
516
518
  }