nokogiri 1.15.4 → 1.17.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -19
  3. data/README.md +8 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +194 -141
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  12. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  13. data/ext/nokogiri/nokogiri.c +9 -2
  14. data/ext/nokogiri/nokogiri.h +25 -33
  15. data/ext/nokogiri/test_global_handlers.c +1 -1
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +3 -12
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +167 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -10
  25. data/ext/nokogiri/xml_node.c +142 -108
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +74 -100
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +214 -128
  31. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  32. data/ext/nokogiri/xml_schema.c +51 -87
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +3 -6
  35. data/ext/nokogiri/xml_xpath_context.c +4 -7
  36. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +64 -23
  42. data/gumbo-parser/src/tokenizer.c +7 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -27
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +45 -24
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -138
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/version/info.rb +6 -5
  68. data/lib/nokogiri/xml/attr.rb +2 -2
  69. data/lib/nokogiri/xml/builder.rb +8 -1
  70. data/lib/nokogiri/xml/document.rb +74 -31
  71. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  72. data/lib/nokogiri/xml/namespace.rb +1 -2
  73. data/lib/nokogiri/xml/node.rb +113 -35
  74. data/lib/nokogiri/xml/node_set.rb +12 -10
  75. data/lib/nokogiri/xml/parse_options.rb +1 -1
  76. data/lib/nokogiri/xml/pp/node.rb +6 -1
  77. data/lib/nokogiri/xml/reader.rb +51 -17
  78. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  79. data/lib/nokogiri/xml/sax/document.rb +174 -83
  80. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  81. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  82. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  83. data/lib/nokogiri/xml/sax.rb +48 -0
  84. data/lib/nokogiri/xml/schema.rb +112 -45
  85. data/lib/nokogiri/xml/searchable.rb +9 -11
  86. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  87. data/lib/nokogiri/xml.rb +14 -25
  88. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  89. data/lib/nokogiri/xslt.rb +4 -10
  90. data/lib/nokogiri.rb +1 -1
  91. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  92. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  93. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  94. metadata +15 -14
  95. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  96. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  97. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  98. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
28
28
  }
29
29
  }
30
30
 
31
- #ifdef HAVE_RB_GC_LOCATION
32
31
  static void
33
32
  _xml_node_update_references(void *ptr)
34
33
  {
@@ -38,12 +37,9 @@ _xml_node_update_references(void *ptr)
38
37
  node->_private = (void *)rb_gc_location((VALUE)node->_private);
39
38
  }
40
39
  }
41
- #else
42
- # define _xml_node_update_references 0
43
- #endif
44
40
 
45
- static const rb_data_type_t nokogiri_node_type = {
46
- .wrap_struct_name = "Nokogiri::XML::Node",
41
+ static const rb_data_type_t xml_node_type = {
42
+ .wrap_struct_name = "xmlNode",
47
43
  .function = {
48
44
  .dmark = _xml_node_mark,
49
45
  .dcompact = _xml_node_update_references,
@@ -51,6 +47,24 @@ static const rb_data_type_t nokogiri_node_type = {
51
47
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
52
48
  };
53
49
 
50
+ static VALUE
51
+ _xml_node_alloc(VALUE klass)
52
+ {
53
+ return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
54
+ }
55
+
56
+ static void
57
+ _xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
58
+ {
59
+ assert(DATA_PTR(rb_node) == NULL);
60
+ assert(c_node->_private == NULL);
61
+
62
+ DATA_PTR(rb_node) = c_node;
63
+ c_node->_private = (void *)rb_node;
64
+
65
+ return;
66
+ }
67
+
54
68
  static void
55
69
  relink_namespace(xmlNodePtr reparented)
56
70
  {
@@ -145,7 +159,7 @@ relink_namespace(xmlNodePtr reparented)
145
159
  /* reparent. */
146
160
  if (NULL == reparented->ns) { return; }
147
161
 
148
- /* When a node gets reparented, walk it's children to make sure that */
162
+ /* When a node gets reparented, walk its children to make sure that */
149
163
  /* their namespaces are reparented as well. */
150
164
  child = reparented->children;
151
165
  while (NULL != child) {
@@ -948,51 +962,30 @@ internal_subset(VALUE self)
948
962
  return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
949
963
  }
950
964
 
951
- /*
952
- * :call-seq:
953
- * dup → Nokogiri::XML::Node
954
- * dup(depth) → Nokogiri::XML::Node
955
- * dup(depth, new_parent_doc) → Nokogiri::XML::Node
956
- *
957
- * Copy this node.
958
- *
959
- * [Parameters]
960
- * - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
961
- * - +new_parent_doc+
962
- * The new node's parent Document. Defaults to the this node's document.
963
- *
964
- * [Returns] The new Nokgiri::XML::Node
965
- */
965
+ /* :nodoc: */
966
966
  static VALUE
967
- duplicate_node(int argc, VALUE *argv, VALUE self)
967
+ rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
968
968
  {
969
- VALUE r_level, r_new_parent_doc;
970
- int level;
971
- int n_args;
972
- xmlDocPtr new_parent_doc;
973
- xmlNodePtr node, dup;
969
+ xmlNodePtr c_self, c_other;
970
+ int c_level;
971
+ xmlDocPtr c_new_parent_doc;
972
+ VALUE rb_node_cache;
974
973
 
975
- Noko_Node_Get_Struct(self, xmlNode, node);
976
-
977
- n_args = rb_scan_args(argc, argv, "02", &r_level, &r_new_parent_doc);
974
+ Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
975
+ c_level = (int)NUM2INT(rb_level);
976
+ c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
978
977
 
979
- if (n_args < 1) {
980
- r_level = INT2NUM((long)1);
981
- }
982
- level = (int)NUM2INT(r_level);
978
+ c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
979
+ if (c_self == NULL) { return Qnil; }
983
980
 
984
- if (n_args < 2) {
985
- new_parent_doc = node->doc;
986
- } else {
987
- new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
988
- }
981
+ _xml_node_data_ptr_set(rb_self, c_self);
982
+ noko_xml_document_pin_node(c_self);
989
983
 
990
- dup = xmlDocCopyNode(node, new_parent_doc, level);
991
- if (dup == NULL) { return Qnil; }
984
+ rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
985
+ rb_ary_push(rb_node_cache, rb_self);
986
+ rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
992
987
 
993
- noko_xml_document_pin_node(dup);
994
-
995
- return noko_xml_node_wrap(rb_obj_class(self), dup);
988
+ return rb_self;
996
989
  }
997
990
 
998
991
  /*
@@ -1078,17 +1071,10 @@ previous_element(VALUE self)
1078
1071
  xmlNodePtr node, sibling;
1079
1072
  Noko_Node_Get_Struct(self, xmlNode, node);
1080
1073
 
1081
- /*
1082
- * note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
1083
- */
1084
- sibling = node->prev;
1074
+ sibling = xmlPreviousElementSibling(node);
1085
1075
  if (!sibling) { return Qnil; }
1086
1076
 
1087
- while (sibling && sibling->type != XML_ELEMENT_NODE) {
1088
- sibling = sibling->prev;
1089
- }
1090
-
1091
- return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
1077
+ return noko_xml_node_wrap(Qnil, sibling);
1092
1078
  }
1093
1079
 
1094
1080
  /* :nodoc: */
@@ -1504,9 +1490,44 @@ node_type(VALUE self)
1504
1490
 
1505
1491
  /*
1506
1492
  * call-seq:
1507
- * content=
1493
+ * native_content=(input)
1494
+ *
1495
+ * Set the content of this node to +input+.
1496
+ *
1497
+ * [Parameters]
1498
+ * - +input+ (String) The new content for this node.
1499
+ *
1500
+ * ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
1501
+ * ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
1502
+ * will contain the entity-escaped version of the input (see example below). For Element and Attr
1503
+ * nodes, it treats the input as parsed content and expects it to be valid markup that is already
1504
+ * entity-escaped.
1505
+ *
1506
+ * 💡 Use Node#content= for a more consistent API across node types.
1507
+ *
1508
+ * [Example]
1509
+ * Note the behavior differences of this method between Text and Element nodes:
1508
1510
  *
1509
- * Set the content for this Node
1511
+ * doc = Nokogiri::HTML::Document.parse(<<~HTML)
1512
+ * <html>
1513
+ * <body>
1514
+ * <div id="first">asdf</div>
1515
+ * <div id="second">asdf</div>
1516
+ * HTML
1517
+ *
1518
+ * text_node = doc.at_css("div#first").children.first
1519
+ * div_node = doc.at_css("div#second")
1520
+ *
1521
+ * value = "You &amp; Me"
1522
+ *
1523
+ * text_node.native_content = value
1524
+ * div_node.native_content = value
1525
+ *
1526
+ * doc.css("div").to_html
1527
+ * # => "<div id=\"first\">You &amp;amp; Me</div>
1528
+ * # <div id=\"second\">You &amp; Me</div>"
1529
+ *
1530
+ * See also: #content=
1510
1531
  */
1511
1532
  static VALUE
1512
1533
  set_native_content(VALUE self, VALUE content)
@@ -1817,12 +1838,12 @@ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
1817
1838
  ++next;
1818
1839
  continue;
1819
1840
  }
1820
- output_partial_string(out, (char const *)start, next - start);
1841
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1821
1842
  output_string(out, replacement);
1822
1843
  next += replaced_bytes;
1823
1844
  start = next;
1824
1845
  }
1825
- output_partial_string(out, (char const *)start, next - start);
1846
+ output_partial_string(out, (char const *)start, (size_t)(next - start));
1826
1847
  }
1827
1848
 
1828
1849
  static bool
@@ -1853,13 +1874,19 @@ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
1853
1874
  if (name == NULL) { // fragments don't have a name
1854
1875
  return false;
1855
1876
  }
1877
+
1878
+ if (node->ns != NULL) {
1879
+ // if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
1880
+ // matching against.
1881
+ return false;
1882
+ }
1883
+
1856
1884
  for (size_t idx = 0; idx < num_tagnames; ++idx) {
1857
1885
  if (!strcmp(name, tagnames[idx])) {
1858
1886
  return true;
1859
1887
  }
1860
1888
  }
1861
1889
  return false;
1862
-
1863
1890
  }
1864
1891
 
1865
1892
  static void
@@ -1887,17 +1914,7 @@ output_node(
1887
1914
  // Add attributes.
1888
1915
  for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
1889
1916
  output_char(out, ' ');
1890
- output_attr_name(out, attr);
1891
- if (attr->children) {
1892
- output_string(out, "=\"");
1893
- xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1894
- output_escaped_string(out, value, true);
1895
- xmlFree(value);
1896
- output_char(out, '"');
1897
- } else {
1898
- // Output name=""
1899
- output_string(out, "=\"\"");
1900
- }
1917
+ output_node(out, (xmlNodePtr)attr, preserve_newline);
1901
1918
  }
1902
1919
  output_char(out, '>');
1903
1920
 
@@ -1915,6 +1932,22 @@ output_node(
1915
1932
  }
1916
1933
  break;
1917
1934
 
1935
+ case XML_ATTRIBUTE_NODE: {
1936
+ xmlAttrPtr attr = (xmlAttrPtr)node;
1937
+ output_attr_name(out, attr);
1938
+ if (attr->children) {
1939
+ output_string(out, "=\"");
1940
+ xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
1941
+ output_escaped_string(out, value, true);
1942
+ xmlFree(value);
1943
+ output_char(out, '"');
1944
+ } else {
1945
+ // Output name=""
1946
+ output_string(out, "=\"\"");
1947
+ }
1948
+ }
1949
+ break;
1950
+
1918
1951
  case XML_TEXT_NODE:
1919
1952
  if (node->parent
1920
1953
  && is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
@@ -2030,11 +2063,11 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
2030
2063
  // libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
2031
2064
  // search for "psvi" in SAX2.c and tree.c to learn more.
2032
2065
  if (line_number < 65535) {
2033
- c_node->line = (short) line_number;
2066
+ c_node->line = (short unsigned)line_number;
2034
2067
  } else {
2035
2068
  c_node->line = 65535;
2036
2069
  if (c_node->type == XML_TEXT_NODE) {
2037
- c_node->psvi = (void *)(ptrdiff_t) line_number;
2070
+ c_node->psvi = (void *)(ptrdiff_t)line_number;
2038
2071
  }
2039
2072
  }
2040
2073
 
@@ -2058,8 +2091,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
2058
2091
  rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
2059
2092
  }
2060
2093
  if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
2061
- // TODO: deprecate allowing Node
2062
- NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
2094
+ NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
2063
2095
  }
2064
2096
  Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
2065
2097
 
@@ -2095,7 +2127,7 @@ dump_html(VALUE self)
2095
2127
 
2096
2128
  buf = xmlBufferCreate() ;
2097
2129
  htmlNodeDump(buf, node->doc, node);
2098
- html = NOKOGIRI_STR_NEW2(buf->content);
2130
+ html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
2099
2131
  xmlBufferFree(buf);
2100
2132
  return html ;
2101
2133
  }
@@ -2119,36 +2151,38 @@ compare(VALUE self, VALUE _other)
2119
2151
 
2120
2152
  /*
2121
2153
  * call-seq:
2122
- * process_xincludes(options)
2154
+ * process_xincludes(flags)
2123
2155
  *
2124
2156
  * Loads and substitutes all xinclude elements below the node. The
2125
- * parser context will be initialized with +options+.
2157
+ * parser context will be initialized with +flags+.
2126
2158
  */
2127
2159
  static VALUE
2128
- process_xincludes(VALUE self, VALUE options)
2160
+ noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
2129
2161
  {
2130
- int rcode ;
2131
- xmlNodePtr node;
2132
- VALUE error_list = rb_ary_new();
2162
+ int status ;
2163
+ xmlNodePtr c_node;
2164
+ VALUE rb_errors = rb_ary_new();
2165
+ libxmlStructuredErrorHandlerState handler_state;
2133
2166
 
2134
- Noko_Node_Get_Struct(self, xmlNode, node);
2167
+ Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
2135
2168
 
2136
- xmlSetStructuredErrorFunc((void *)error_list, Nokogiri_error_array_pusher);
2137
- rcode = xmlXIncludeProcessTreeFlags(node, (int)NUM2INT(options));
2138
- xmlSetStructuredErrorFunc(NULL, NULL);
2169
+ noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
2170
+
2171
+ status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
2172
+
2173
+ noko__structured_error_func_restore(&handler_state);
2139
2174
 
2140
- if (rcode < 0) {
2141
- xmlErrorPtr error;
2175
+ if (status < 0) {
2176
+ VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
2142
2177
 
2143
- error = xmlGetLastError();
2144
- if (error) {
2145
- rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
2178
+ if (RB_TEST(exception)) {
2179
+ rb_exc_raise(exception);
2146
2180
  } else {
2147
2181
  rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
2148
2182
  }
2149
2183
  }
2150
2184
 
2151
- return self;
2185
+ return rb_node;
2152
2186
  }
2153
2187
 
2154
2188
 
@@ -2170,14 +2204,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2170
2204
  node_children = node->children;
2171
2205
  doc_children = node->doc->children;
2172
2206
 
2173
- xmlSetStructuredErrorFunc((void *)err, Nokogiri_error_array_pusher);
2174
-
2175
- /* Twiddle global variable because of a bug in libxml2.
2176
- * http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
2177
- */
2178
- #ifndef HTML_PARSE_NOIMPLIED
2179
- htmlHandleOmittedElem(0);
2180
- #endif
2207
+ xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
2181
2208
 
2182
2209
  /* This function adds a fake node to the child of +node+. If the parser
2183
2210
  * does not exit cleanly with XML_ERR_OK, the list is freed. This can
@@ -2207,10 +2234,6 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2207
2234
  child_iter = child_iter->next;
2208
2235
  }
2209
2236
 
2210
- #ifndef HTML_PARSE_NOIMPLIED
2211
- htmlHandleOmittedElem(1);
2212
- #endif
2213
-
2214
2237
  xmlSetStructuredErrorFunc(NULL, NULL);
2215
2238
 
2216
2239
  /*
@@ -2259,6 +2282,15 @@ in_context(VALUE self, VALUE _str, VALUE _options)
2259
2282
  return noko_xml_node_set_wrap(set, doc);
2260
2283
  }
2261
2284
 
2285
+ /* :nodoc: */
2286
+ VALUE
2287
+ rb_xml_node_data_ptr_eh(VALUE self)
2288
+ {
2289
+ xmlNodePtr c_node;
2290
+ Noko_Node_Get_Struct(self, xmlNode, c_node);
2291
+ return c_node ? Qtrue : Qfalse;
2292
+ }
2293
+
2262
2294
  VALUE
2263
2295
  noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2264
2296
  {
@@ -2324,8 +2356,8 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
2324
2356
  }
2325
2357
  }
2326
2358
 
2327
- rb_node = TypedData_Wrap_Struct(rb_class, &nokogiri_node_type, c_node) ;
2328
- c_node->_private = (void *)rb_node;
2359
+ rb_node = _xml_node_alloc(rb_class);
2360
+ _xml_node_data_ptr_set(rb_node, c_node);
2329
2361
 
2330
2362
  if (node_has_a_document) {
2331
2363
  rb_document = DOC_RUBY_OBJECT(c_doc);
@@ -2361,7 +2393,7 @@ noko_init_xml_node(void)
2361
2393
  {
2362
2394
  cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
2363
2395
 
2364
- rb_undef_alloc_func(cNokogiriXmlNode);
2396
+ rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
2365
2397
 
2366
2398
  rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
2367
2399
 
@@ -2375,8 +2407,8 @@ noko_init_xml_node(void)
2375
2407
  rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
2376
2408
  rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
2377
2409
  rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
2410
+ rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
2378
2411
  rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
2379
- rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
2380
2412
  rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
2381
2413
  rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
2382
2414
  rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
@@ -2405,6 +2437,8 @@ noko_init_xml_node(void)
2405
2437
  rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
2406
2438
  rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
2407
2439
 
2440
+ rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
2441
+
2408
2442
  rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
2409
2443
  rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
2410
2444
  rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
@@ -2415,7 +2449,7 @@ noko_init_xml_node(void)
2415
2449
  rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
2416
2450
  rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
2417
2451
  rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
2418
- rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
2452
+ rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
2419
2453
  rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
2420
2454
  rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
2421
2455
  rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
@@ -68,15 +68,8 @@ xml_node_set_deallocate(void *data)
68
68
  xmlFree(node_set);
69
69
  }
70
70
 
71
-
72
- static VALUE
73
- xml_node_set_allocate(VALUE klass)
74
- {
75
- return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil);
76
- }
77
-
78
71
  static const rb_data_type_t xml_node_set_type = {
79
- .wrap_struct_name = "Nokogiri::XML::NodeSet",
72
+ .wrap_struct_name = "xmlNodeSet",
80
73
  .function = {
81
74
  .dmark = xml_node_set_mark,
82
75
  .dfree = xml_node_set_deallocate,
@@ -84,6 +77,33 @@ static const rb_data_type_t xml_node_set_type = {
84
77
  .flags = RUBY_TYPED_FREE_IMMEDIATELY,
85
78
  };
86
79
 
80
+ static VALUE
81
+ xml_node_set_allocate(VALUE klass)
82
+ {
83
+ return TypedData_Wrap_Struct(klass, &xml_node_set_type, xmlXPathNodeSetCreate(NULL));
84
+ }
85
+
86
+ /* :nodoc: */
87
+ static VALUE
88
+ rb_xml_node_set_initialize_copy(VALUE rb_self, VALUE rb_other)
89
+ {
90
+ xmlNodeSetPtr c_self, c_other;
91
+ VALUE rb_document;
92
+
93
+ TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
94
+ TypedData_Get_Struct(rb_other, xmlNodeSet, &xml_node_set_type, c_other);
95
+
96
+ xmlXPathNodeSetMerge(c_self, c_other);
97
+
98
+ rb_document = rb_iv_get(rb_other, "@document");
99
+ if (!NIL_P(rb_document)) {
100
+ rb_iv_set(rb_self, "@document", rb_document);
101
+ rb_funcall(rb_document, decorate, 1, rb_self);
102
+ }
103
+
104
+ return rb_self;
105
+ }
106
+
87
107
  static void
88
108
  xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
89
109
  {
@@ -112,27 +132,6 @@ xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
112
132
  cur->nodeTab[cur->nodeNr] = NULL;
113
133
  }
114
134
 
115
-
116
- /*
117
- * call-seq:
118
- * dup
119
- *
120
- * Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not
121
- * duplicated (similar to how Array and other Enumerable classes work).
122
- */
123
- static VALUE
124
- duplicate(VALUE rb_self)
125
- {
126
- xmlNodeSetPtr c_self;
127
- xmlNodeSetPtr dupl;
128
-
129
- TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
130
-
131
- dupl = xmlXPathNodeSetMerge(NULL, c_self);
132
-
133
- return noko_xml_node_set_wrap(dupl, rb_iv_get(rb_self, "@document"));
134
- }
135
-
136
135
  /*
137
136
  * call-seq:
138
137
  * length
@@ -453,19 +452,21 @@ noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document)
453
452
  VALUE rb_node_set ;
454
453
 
455
454
  if (c_node_set == NULL) {
456
- c_node_set = xmlXPathNodeSetCreate(NULL);
455
+ rb_node_set = xml_node_set_allocate(cNokogiriXmlNodeSet);
456
+ } else {
457
+ rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
457
458
  }
458
459
 
459
- rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
460
-
461
460
  if (!NIL_P(document)) {
462
461
  rb_iv_set(rb_node_set, "@document", document);
463
462
  rb_funcall(document, decorate, 1, rb_node_set);
464
463
  }
465
464
 
466
- /* make sure we create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
- for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
- noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
465
+ if (c_node_set) {
466
+ /* create ruby objects for all the results, so they'll be marked during the GC mark phase */
467
+ for (j = 0 ; j < c_node_set->nodeNr ; j++) {
468
+ noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
469
+ }
469
470
  }
470
471
 
471
472
  return rb_node_set ;
@@ -499,18 +500,19 @@ noko_init_xml_node_set(void)
499
500
 
500
501
  rb_define_alloc_func(cNokogiriXmlNodeSet, xml_node_set_allocate);
501
502
 
502
- rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
503
- rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
504
- rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
505
- rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
506
- rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
503
+ rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
507
504
  rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1);
508
- rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
509
- rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
510
- rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0);
505
+ rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
511
506
  rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1);
512
- rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
513
507
  rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1);
508
+ rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
509
+ rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
510
+ rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
511
+ rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
512
+ rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
513
+ rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
514
+
515
+ rb_define_private_method(cNokogiriXmlNodeSet, "initialize_copy", rb_xml_node_set_initialize_copy, 1);
514
516
 
515
517
  decorate = rb_intern("decorate");
516
518
  }