nokogiri 1.15.7 → 1.18.7

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (102) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -17
  3. data/LICENSE-DEPENDENCIES.md +6 -6
  4. data/README.md +11 -5
  5. data/dependencies.yml +9 -8
  6. data/ext/nokogiri/extconf.rb +191 -154
  7. data/ext/nokogiri/gumbo.c +69 -53
  8. data/ext/nokogiri/html4_document.c +10 -4
  9. data/ext/nokogiri/html4_element_description.c +18 -18
  10. data/ext/nokogiri/html4_sax_parser.c +40 -0
  11. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  12. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  13. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  14. data/ext/nokogiri/nokogiri.c +9 -2
  15. data/ext/nokogiri/nokogiri.h +25 -33
  16. data/ext/nokogiri/test_global_handlers.c +1 -1
  17. data/ext/nokogiri/xml_attr.c +1 -1
  18. data/ext/nokogiri/xml_cdata.c +3 -12
  19. data/ext/nokogiri/xml_comment.c +3 -8
  20. data/ext/nokogiri/xml_document.c +167 -156
  21. data/ext/nokogiri/xml_document_fragment.c +10 -25
  22. data/ext/nokogiri/xml_dtd.c +1 -1
  23. data/ext/nokogiri/xml_element_content.c +9 -9
  24. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  25. data/ext/nokogiri/xml_namespace.c +6 -10
  26. data/ext/nokogiri/xml_node.c +135 -107
  27. data/ext/nokogiri/xml_node_set.c +46 -44
  28. data/ext/nokogiri/xml_reader.c +74 -100
  29. data/ext/nokogiri/xml_relax_ng.c +35 -56
  30. data/ext/nokogiri/xml_sax_parser.c +156 -88
  31. data/ext/nokogiri/xml_sax_parser_context.c +220 -128
  32. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  33. data/ext/nokogiri/xml_schema.c +51 -87
  34. data/ext/nokogiri/xml_syntax_error.c +19 -11
  35. data/ext/nokogiri/xml_text.c +3 -6
  36. data/ext/nokogiri/xml_xpath_context.c +104 -104
  37. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  38. data/gumbo-parser/Makefile +18 -0
  39. data/gumbo-parser/src/ascii.c +2 -2
  40. data/gumbo-parser/src/error.c +76 -48
  41. data/gumbo-parser/src/error.h +5 -1
  42. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  43. data/gumbo-parser/src/parser.c +66 -25
  44. data/gumbo-parser/src/tokenizer.c +7 -6
  45. data/lib/nokogiri/class_resolver.rb +1 -1
  46. data/lib/nokogiri/css/node.rb +6 -2
  47. data/lib/nokogiri/css/parser.rb +6 -4
  48. data/lib/nokogiri/css/parser.y +2 -2
  49. data/lib/nokogiri/css/parser_extras.rb +6 -66
  50. data/lib/nokogiri/css/selector_cache.rb +38 -0
  51. data/lib/nokogiri/css/tokenizer.rb +4 -4
  52. data/lib/nokogiri/css/tokenizer.rex +9 -8
  53. data/lib/nokogiri/css/xpath_visitor.rb +44 -27
  54. data/lib/nokogiri/css.rb +86 -20
  55. data/lib/nokogiri/decorators/slop.rb +3 -5
  56. data/lib/nokogiri/encoding_handler.rb +2 -2
  57. data/lib/nokogiri/html4/document.rb +45 -24
  58. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  59. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  60. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  61. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  62. data/lib/nokogiri/html4.rb +9 -14
  63. data/lib/nokogiri/html5/builder.rb +40 -0
  64. data/lib/nokogiri/html5/document.rb +61 -30
  65. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  66. data/lib/nokogiri/html5/node.rb +4 -4
  67. data/lib/nokogiri/html5.rb +114 -138
  68. data/lib/nokogiri/version/constant.rb +1 -1
  69. data/lib/nokogiri/version/info.rb +6 -5
  70. data/lib/nokogiri/xml/attr.rb +2 -2
  71. data/lib/nokogiri/xml/builder.rb +8 -1
  72. data/lib/nokogiri/xml/document.rb +73 -29
  73. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  74. data/lib/nokogiri/xml/namespace.rb +1 -2
  75. data/lib/nokogiri/xml/node.rb +113 -35
  76. data/lib/nokogiri/xml/node_set.rb +12 -10
  77. data/lib/nokogiri/xml/parse_options.rb +1 -1
  78. data/lib/nokogiri/xml/pp/node.rb +6 -1
  79. data/lib/nokogiri/xml/reader.rb +51 -17
  80. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  81. data/lib/nokogiri/xml/sax/document.rb +174 -83
  82. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  83. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  84. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  85. data/lib/nokogiri/xml/sax.rb +48 -0
  86. data/lib/nokogiri/xml/schema.rb +112 -45
  87. data/lib/nokogiri/xml/searchable.rb +39 -43
  88. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  89. data/lib/nokogiri/xml/xpath_context.rb +14 -3
  90. data/lib/nokogiri/xml.rb +14 -25
  91. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  92. data/lib/nokogiri/xslt.rb +4 -10
  93. data/lib/nokogiri.rb +1 -1
  94. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  95. data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
  96. data/ports/archives/libxml2-2.13.7.tar.xz +0 -0
  97. data/ports/archives/libxslt-1.1.43.tar.xz +0 -0
  98. metadata +13 -12
  99. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  100. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  101. data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
  102. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
28
28
  }
29
29
  }
30
30
 
31
- #ifdef HAVE_RB_GC_LOCATION
32
31
  static void
33
32
  _xml_node_update_references(void *ptr)
34
33
  {
@@ -38,12 +37,9 @@ _xml_node_update_references(void *ptr)
38
37
  node->_private = (void *)rb_gc_location((VALUE)node->_private);
39
38
  }
40
39
  }
41
- #else
42
- # define _xml_node_update_references 0
43
- #endif
44
40
 
45
- static const rb_data_type_t nokogiri_node_type = {
46
- .wrap_struct_name = "Nokogiri::XML::Node",
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
47
43
  .function = {
48
44
  .dmark = _xml_node_mark,
49
45
  .dcompact = _xml_node_update_references,
@@ -51,6 +47,24 @@ static const rb_data_type_t nokogiri_node_type = {
51
47
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
52
48
  };
53
49
 
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
54
68
  static void
55
69
  relink_namespace(xmlNodePtr reparented)
56
70
  {
@@ -145,7 +159,7 @@ relink_namespace(xmlNodePtr reparented)
145
159
  /* reparent. */
146
160
  if (NULL == reparented->ns) { return; }
147
161
 
148
- /* When a node gets reparented, walk it's children to make sure that */
162
+ /* When a node gets reparented, walk its children to make sure that */
149
163
  /* their namespaces are reparented as well. */
150
164
  child = reparented->children;
151
165
  while (NULL != child) {
@@ -948,51 +962,30 @@ internal_subset(VALUE self)
948
962
  return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
949
963
  }
950
964
 
951
- /*
952
- * :call-seq:
953
- * dup → Nokogiri::XML::Node
954
- * dup(depth) → Nokogiri::XML::Node
955
- * dup(depth, new_parent_doc) → Nokogiri::XML::Node
956
- *
957
- * Copy this node.
958
- *
959
- * [Parameters]
960
- * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
961
- * - +new_parent_doc+
962
- * The new node's parent Document. Defaults to the this node's document.
963
- *
964
- * [Returns] The new Nokgiri::XML::Node
965
- */
965
+ /* :nodoc: */
966
966
  static VALUE
967
- duplicate_node(int argc, VALUE *argv, VALUE self)
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
968
968
  {
969
- VALUE r_level, r_new_parent_doc;
970
- int level;
971
- int n_args;
972
- xmlDocPtr new_parent_doc;
973
- xmlNodePtr node, dup;
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
972
+ VALUE rb_node_cache;
974
973
 
975
- Noko_Node_Get_Struct(self, xmlNode, node);
974
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
+ c_level = (int)NUM2INT(rb_level);
976
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
976
977
 
977
- n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
978
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
979
+ if (c_self == NULL) { return Qnil; }
978
980
 
979
- if (n_args < 1) {
980
- r_level = INT2NUM((long)1);
981
- }
982
- level = (int)NUM2INT(r_level);
981
+ _xml_node_data_ptr_set(rb_self, c_self);
982
+ noko_xml_document_pin_node(c_self);
983
983
 
984
- if (n_args < 2) {
985
- new_parent_doc = node->doc;
986
- } else {
987
- new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
988
- }
989
-
990
- dup = xmlDocCopyNode(node, new_parent_doc, level);
991
- if (dup == NULL) { return Qnil; }
984
+ rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
985
+ rb_ary_push(rb_node_cache, rb_self);
986
+ rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
992
987
 
993
- noko_xml_document_pin_node(dup);
994
-
995
- return noko_xml_node_wrap(rb_obj_class(self), dup);
988
+ return rb_self;
996
989
  }
997
990
 
998
991
  /*
@@ -1078,17 +1071,10 @@ previous_element(VALUE self)
1078
1071
  xmlNodePtr node, sibling;
1079
1072
  Noko_Node_Get_Struct(self, xmlNode, node);
1080
1073
 
1081
- /*
1082
- * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
1083
- */
1084
- sibling = node->prev;
1074
+ sibling = xmlPreviousElementSibling(node);
1085
1075
  if (!sibling) { return Qnil; }
1086
1076
 
1087
- while (sibling && sibling->type != XML_ELEMENT_NODE) {
1088
- sibling = sibling->prev;
1089
- }
1090
-
1091
- return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
1077
+ return noko_xml_node_wrap(Qnil, sibling);
1092
1078
  }
1093
1079
 
1094
1080
  /* :nodoc: */
@@ -1504,9 +1490,44 @@ node_type(VALUE self)
1504
1490
 
1505
1491
  /*
1506
1492
  * call-seq:
1507
- * content=
1493
+ * native_content=(input)
1494
+ *
1495
+ * Set the content of this node to +input+.
1496
+ *
1497
+ * [Parameters]
1498
+ * - +input+ (String) The new content for this node.
1508
1499
  *
1509
- * Set the content for this Node
1500
+ * This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1501
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1502
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1503
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1504
+ * entity-escaped.
1505
+ *
1506
+ * 💡 Use Node#content= for a more consistent API across node types.
1507
+ *
1508
+ * [Example]
1509
+ * Note the behavior differences of this method between Text and Element nodes:
1510
+ *
1511
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1512
+ * <html>
1513
+ * <body>
1514
+ * <div id="first">asdf</div>
1515
+ * <div id="second">asdf</div>
1516
+ * HTML
1517
+ *
1518
+ * text_node = doc.at_css("div#first").children.first
1519
+ * div_node = doc.at_css("div#second")
1520
+ *
1521
+ * value = "You &amp; Me"
1522
+ *
1523
+ * text_node.native_content = value
1524
+ * div_node.native_content = value
1525
+ *
1526
+ * doc.css("div").to_html
1527
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1528
+ * # <div id=\"second\">You &amp; Me</div>"
1529
+ *
1530
+ * See also: #content=
1510
1531
  */
1511
1532
  static VALUE
1512
1533
  set_native_content(VALUE self, VALUE content)
@@ -1817,12 +1838,12 @@ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1817
1838
  ++next;
1818
1839
  continue;
1819
1840
  }
1820
- output_partial_string(out, (char const *)start, next - start);
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1821
1842
  output_string(out, replacement);
1822
1843
  next += replaced_bytes;
1823
1844
  start = next;
1824
1845
  }
1825
- output_partial_string(out, (char const *)start, next - start);
1846
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1826
1847
  }
1827
1848
 
1828
1849
  static bool
@@ -1893,17 +1914,7 @@ output_node(
1893
1914
  // Add attributes.
1894
1915
  for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1895
1916
  output_char(out, ' ');
1896
- output_attr_name(out, attr);
1897
- if (attr->children) {
1898
- output_string(out, "=\"");
1899
- xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1900
- output_escaped_string(out, value, true);
1901
- xmlFree(value);
1902
- output_char(out, '"');
1903
- } else {
1904
- // Output name=""
1905
- output_string(out, "=\"\"");
1906
- }
1917
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1907
1918
  }
1908
1919
  output_char(out, '>');
1909
1920
 
@@ -1921,6 +1932,22 @@ output_node(
1921
1932
  }
1922
1933
  break;
1923
1934
 
1935
+ case XML_ATTRIBUTE_NODE: {
1936
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1937
+ output_attr_name(out, attr);
1938
+ if (attr->children) {
1939
+ output_string(out, "=\"");
1940
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1941
+ output_escaped_string(out, value, true);
1942
+ xmlFree(value);
1943
+ output_char(out, '"');
1944
+ } else {
1945
+ // Output name=""
1946
+ output_string(out, "=\"\"");
1947
+ }
1948
+ }
1949
+ break;
1950
+
1924
1951
  case XML_TEXT_NODE:
1925
1952
  if (node->parent
1926
1953
  && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
@@ -2036,11 +2063,11 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2036
2063
  // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2037
2064
  // search for "psvi" in SAX2.c and tree.c to learn more.
2038
2065
  if (line_number < 65535) {
2039
- c_node->line = (short) line_number;
2066
+ c_node->line = (short unsigned)line_number;
2040
2067
  } else {
2041
2068
  c_node->line = 65535;
2042
2069
  if (c_node->type == XML_TEXT_NODE) {
2043
- c_node->psvi = (void *)(ptrdiff_t) line_number;
2070
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2044
2071
  }
2045
2072
  }
2046
2073
 
@@ -2064,8 +2091,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2064
2091
  rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2065
2092
  }
2066
2093
  if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2067
- // TODO: deprecate allowing Node
2068
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
2094
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2069
2095
  }
2070
2096
  Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
2071
2097
 
@@ -2101,7 +2127,7 @@ dump_html(VALUE self)
2101
2127
 
2102
2128
  buf = xmlBufferCreate() ;
2103
2129
  htmlNodeDump(buf, node->doc, node);
2104
- html = NOKOGIRI_STR_NEW2(buf->content);
2130
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
2105
2131
  xmlBufferFree(buf);
2106
2132
  return html ;
2107
2133
  }
@@ -2125,36 +2151,38 @@ compare(VALUE self, VALUE _other)
2125
2151
 
2126
2152
  /*
2127
2153
  * call-seq:
2128
- * process_xincludes(options)
2154
+ * process_xincludes(flags)
2129
2155
  *
2130
2156
  * Loads and substitutes all xinclude elements below the node. The
2131
- * parser context will be initialized with +options+.
2157
+ * parser context will be initialized with +flags+.
2132
2158
  */
2133
2159
  static VALUE
2134
- process_xincludes(VALUE self, VALUE options)
2160
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2135
2161
  {
2136
- int rcode ;
2137
- xmlNodePtr node;
2138
- VALUE error_list = rb_ary_new();
2162
+ int status ;
2163
+ xmlNodePtr c_node;
2164
+ VALUE rb_errors = rb_ary_new();
2165
+ libxmlStructuredErrorHandlerState handler_state;
2139
2166
 
2140
- Noko_Node_Get_Struct(self, xmlNode, node);
2167
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2141
2168
 
2142
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
2143
- rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
2144
- xmlSetStructuredErrorFunc(NULL, NULL);
2169
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2170
+
2171
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2145
2172
 
2146
- if (rcode < 0) {
2147
- xmlErrorPtr error;
2173
+ noko__structured_error_func_restore(&handler_state);
2148
2174
 
2149
- error = xmlGetLastError();
2150
- if (error) {
2151
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
2175
+ if (status < 0) {
2176
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2177
+
2178
+ if (RB_TEST(exception)) {
2179
+ rb_exc_raise(exception);
2152
2180
  } else {
2153
2181
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2154
2182
  }
2155
2183
  }
2156
2184
 
2157
- return self;
2185
+ return rb_node;
2158
2186
  }
2159
2187
 
2160
2188
 
@@ -2176,14 +2204,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2176
2204
  node_children = node->children;
2177
2205
  doc_children = node->doc->children;
2178
2206
 
2179
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
2180
-
2181
- /* Twiddle global variable because of a bug in libxml2.
2182
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2183
- */
2184
- #ifndef HTML_PARSE_NOIMPLIED
2185
- htmlHandleOmittedElem(0);
2186
- #endif
2207
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
2187
2208
 
2188
2209
  /* This function adds a fake node to the child of +node+. If the parser
2189
2210
  * does not exit cleanly with XML_ERR_OK, the list is freed. This can
@@ -2213,10 +2234,6 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2213
2234
  child_iter = child_iter->next;
2214
2235
  }
2215
2236
 
2216
- #ifndef HTML_PARSE_NOIMPLIED
2217
- htmlHandleOmittedElem(1);
2218
- #endif
2219
-
2220
2237
  xmlSetStructuredErrorFunc(NULL, NULL);
2221
2238
 
2222
2239
  /*
@@ -2265,6 +2282,15 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2265
2282
  return noko_xml_node_set_wrap(set, doc);
2266
2283
  }
2267
2284
 
2285
+ /* :nodoc: */
2286
+ VALUE
2287
+ rb_xml_node_data_ptr_eh(VALUE self)
2288
+ {
2289
+ xmlNodePtr c_node;
2290
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
+ return c_node ? Qtrue : Qfalse;
2292
+ }
2293
+
2268
2294
  VALUE
2269
2295
  noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2270
2296
  {
@@ -2330,8 +2356,8 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2330
2356
  }
2331
2357
  }
2332
2358
 
2333
- rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2334
- c_node->_private = (void *)rb_node;
2359
+ rb_node = _xml_node_alloc(rb_class);
2360
+ _xml_node_data_ptr_set(rb_node, c_node);
2335
2361
 
2336
2362
  if (node_has_a_document) {
2337
2363
  rb_document = DOC_RUBY_OBJECT(c_doc);
@@ -2367,7 +2393,7 @@ noko_init_xml_node(void)
2367
2393
  {
2368
2394
  cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2369
2395
 
2370
- rb_undef_alloc_func(cNokogiriXmlNode);
2396
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2371
2397
 
2372
2398
  rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2373
2399
 
@@ -2381,8 +2407,8 @@ noko_init_xml_node(void)
2381
2407
  rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2382
2408
  rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2383
2409
  rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2410
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2384
2411
  rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2385
- rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
2386
2412
  rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2387
2413
  rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2388
2414
  rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
@@ -2411,6 +2437,8 @@ noko_init_xml_node(void)
2411
2437
  rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2412
2438
  rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2413
2439
 
2440
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2441
+
2414
2442
  rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2415
2443
  rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2416
2444
  rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
@@ -2421,7 +2449,7 @@ noko_init_xml_node(void)
2421
2449
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2422
2450
  rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2423
2451
  rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2424
- rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2452
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2425
2453
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2426
2454
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2427
2455
  rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
@@ -68,15 +68,8 @@ xml_node_set_deallocate(void *data)
68
68
  xmlFree(node_set);
69
69
  }
70
70
 
71
-
72
- static VALUE
73
- xml_node_set_allocate(VALUE klass)
74
- {
75
- return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil);
76
- }
77
-
78
71
  static const rb_data_type_t xml_node_set_type = {
79
- .wrap_struct_name = "Nokogiri::XML::NodeSet",
72
+ .wrap_struct_name = "xmlNodeSet",
80
73
  .function = {
81
74
  .dmark = xml_node_set_mark,
82
75
  .dfree = xml_node_set_deallocate,
@@ -84,6 +77,33 @@ static const rb_data_type_t xml_node_set_type = {
84
77
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
85
78
  };
86
79
 
80
+ static VALUE
81
+ xml_node_set_allocate(VALUE klass)
82
+ {
83
+ return TypedData_Wrap_Struct(klass, &xml_node_set_type, xmlXPathNodeSetCreate(NULL));
84
+ }
85
+
86
+ /* :nodoc: */
87
+ static VALUE
88
+ rb_xml_node_set_initialize_copy(VALUE rb_self, VALUE rb_other)
89
+ {
90
+ xmlNodeSetPtr c_self, c_other;
91
+ VALUE rb_document;
92
+
93
+ TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
94
+ TypedData_Get_Struct(rb_other, xmlNodeSet, &xml_node_set_type, c_other);
95
+
96
+ xmlXPathNodeSetMerge(c_self, c_other);
97
+
98
+ rb_document = rb_iv_get(rb_other, "@document");
99
+ if (!NIL_P(rb_document)) {
100
+ rb_iv_set(rb_self, "@document", rb_document);
101
+ rb_funcall(rb_document, decorate, 1, rb_self);
102
+ }
103
+
104
+ return rb_self;
105
+ }
106
+
87
107
  static void
88
108
  xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
89
109
  {
@@ -112,27 +132,6 @@ xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
112
132
  cur->nodeTab[cur->nodeNr] = NULL;
113
133
  }
114
134
 
115
-
116
- /*
117
- * call-seq:
118
- * dup
119
- *
120
- * Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not
121
- * duplicated (similar to how Array and other Enumerable classes work).
122
- */
123
- static VALUE
124
- duplicate(VALUE rb_self)
125
- {
126
- xmlNodeSetPtr c_self;
127
- xmlNodeSetPtr dupl;
128
-
129
- TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
130
-
131
- dupl = xmlXPathNodeSetMerge(NULL, c_self);
132
-
133
- return noko_xml_node_set_wrap(dupl, rb_iv_get(rb_self, "@document"));
134
- }
135
-
136
135
  /*
137
136
  * call-seq:
138
137
  * length
@@ -453,19 +452,21 @@ noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document)
453
452
  VALUE rb_node_set ;
454
453
 
455
454
  if (c_node_set == NULL) {
456
- c_node_set = xmlXPathNodeSetCreate(NULL);
455
+ rb_node_set = xml_node_set_allocate(cNokogiriXmlNodeSet);
456
+ } else {
457
+ rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
457
458
  }
458
459
 
459
- rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
460
-
461
460
  if (!NIL_P(document)) {
462
461
  rb_iv_set(rb_node_set, "@document", document);
463
462
  rb_funcall(document, decorate, 1, rb_node_set);
464
463
  }
465
464
 
466
- /* make sure we create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
- for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
- noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
465
+ if (c_node_set) {
466
+ /* create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
+ for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
+ noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
469
+ }
469
470
  }
470
471
 
471
472
  return rb_node_set ;
@@ -499,18 +500,19 @@ noko_init_xml_node_set(void)
499
500
 
500
501
  rb_define_alloc_func(cNokogiriXmlNodeSet, xml_node_set_allocate);
501
502
 
502
- rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
503
- rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
504
- rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
505
- rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
506
- rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
503
+ rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
507
504
  rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1);
508
- rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
509
- rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
510
- rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0);
505
+ rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
511
506
  rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1);
512
- rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
513
507
  rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1);
508
+ rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
509
+ rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
510
+ rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
511
+ rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
512
+ rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
513
+ rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
514
+
515
+ rb_define_private_method(cNokogiriXmlNodeSet, "initialize_copy", rb_xml_node_set_initialize_copy, 1);
514
516
 
515
517
  decorate = rb_intern("decorate");
516
518
  }