nokogiri 1.15.4 → 1.17.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +12 -19
- data/README.md +8 -1
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +194 -141
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +142 -108
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +214 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +4 -7
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +64 -23
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +43 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +74 -31
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +9 -11
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +15 -14
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
data/ext/nokogiri/xml_node.c
CHANGED
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
|
|
28
28
|
}
|
29
29
|
}
|
30
30
|
|
31
|
-
#ifdef HAVE_RB_GC_LOCATION
|
32
31
|
static void
|
33
32
|
_xml_node_update_references(void *ptr)
|
34
33
|
{
|
@@ -38,12 +37,9 @@ _xml_node_update_references(void *ptr)
|
|
38
37
|
node->_private = (void *)rb_gc_location((VALUE)node->_private);
|
39
38
|
}
|
40
39
|
}
|
41
|
-
#else
|
42
|
-
# define _xml_node_update_references 0
|
43
|
-
#endif
|
44
40
|
|
45
|
-
static const rb_data_type_t
|
46
|
-
.wrap_struct_name = "
|
41
|
+
static const rb_data_type_t xml_node_type = {
|
42
|
+
.wrap_struct_name = "xmlNode",
|
47
43
|
.function = {
|
48
44
|
.dmark = _xml_node_mark,
|
49
45
|
.dcompact = _xml_node_update_references,
|
@@ -51,6 +47,24 @@ static const rb_data_type_t nokogiri_node_type = {
|
|
51
47
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
52
48
|
};
|
53
49
|
|
50
|
+
static VALUE
|
51
|
+
_xml_node_alloc(VALUE klass)
|
52
|
+
{
|
53
|
+
return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
|
54
|
+
}
|
55
|
+
|
56
|
+
static void
|
57
|
+
_xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
|
58
|
+
{
|
59
|
+
assert(DATA_PTR(rb_node) == NULL);
|
60
|
+
assert(c_node->_private == NULL);
|
61
|
+
|
62
|
+
DATA_PTR(rb_node) = c_node;
|
63
|
+
c_node->_private = (void *)rb_node;
|
64
|
+
|
65
|
+
return;
|
66
|
+
}
|
67
|
+
|
54
68
|
static void
|
55
69
|
relink_namespace(xmlNodePtr reparented)
|
56
70
|
{
|
@@ -145,7 +159,7 @@ relink_namespace(xmlNodePtr reparented)
|
|
145
159
|
/* reparent. */
|
146
160
|
if (NULL == reparented->ns) { return; }
|
147
161
|
|
148
|
-
/* When a node gets reparented, walk
|
162
|
+
/* When a node gets reparented, walk its children to make sure that */
|
149
163
|
/* their namespaces are reparented as well. */
|
150
164
|
child = reparented->children;
|
151
165
|
while (NULL != child) {
|
@@ -948,51 +962,30 @@ internal_subset(VALUE self)
|
|
948
962
|
return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
|
949
963
|
}
|
950
964
|
|
951
|
-
/*
|
952
|
-
* :call-seq:
|
953
|
-
* dup → Nokogiri::XML::Node
|
954
|
-
* dup(depth) → Nokogiri::XML::Node
|
955
|
-
* dup(depth, new_parent_doc) → Nokogiri::XML::Node
|
956
|
-
*
|
957
|
-
* Copy this node.
|
958
|
-
*
|
959
|
-
* [Parameters]
|
960
|
-
* - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
|
961
|
-
* - +new_parent_doc+
|
962
|
-
* The new node's parent Document. Defaults to the this node's document.
|
963
|
-
*
|
964
|
-
* [Returns] The new Nokgiri::XML::Node
|
965
|
-
*/
|
965
|
+
/* :nodoc: */
|
966
966
|
static VALUE
|
967
|
-
|
967
|
+
rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
|
968
968
|
{
|
969
|
-
|
970
|
-
int
|
971
|
-
|
972
|
-
|
973
|
-
xmlNodePtr node, dup;
|
969
|
+
xmlNodePtr c_self, c_other;
|
970
|
+
int c_level;
|
971
|
+
xmlDocPtr c_new_parent_doc;
|
972
|
+
VALUE rb_node_cache;
|
974
973
|
|
975
|
-
Noko_Node_Get_Struct(
|
976
|
-
|
977
|
-
|
974
|
+
Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
|
975
|
+
c_level = (int)NUM2INT(rb_level);
|
976
|
+
c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
|
978
977
|
|
979
|
-
|
980
|
-
|
981
|
-
}
|
982
|
-
level = (int)NUM2INT(r_level);
|
978
|
+
c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
|
979
|
+
if (c_self == NULL) { return Qnil; }
|
983
980
|
|
984
|
-
|
985
|
-
|
986
|
-
} else {
|
987
|
-
new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
|
988
|
-
}
|
981
|
+
_xml_node_data_ptr_set(rb_self, c_self);
|
982
|
+
noko_xml_document_pin_node(c_self);
|
989
983
|
|
990
|
-
|
991
|
-
|
984
|
+
rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
|
985
|
+
rb_ary_push(rb_node_cache, rb_self);
|
986
|
+
rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
|
992
987
|
|
993
|
-
|
994
|
-
|
995
|
-
return noko_xml_node_wrap(rb_obj_class(self), dup);
|
988
|
+
return rb_self;
|
996
989
|
}
|
997
990
|
|
998
991
|
/*
|
@@ -1078,17 +1071,10 @@ previous_element(VALUE self)
|
|
1078
1071
|
xmlNodePtr node, sibling;
|
1079
1072
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1080
1073
|
|
1081
|
-
|
1082
|
-
* note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
|
1083
|
-
*/
|
1084
|
-
sibling = node->prev;
|
1074
|
+
sibling = xmlPreviousElementSibling(node);
|
1085
1075
|
if (!sibling) { return Qnil; }
|
1086
1076
|
|
1087
|
-
|
1088
|
-
sibling = sibling->prev;
|
1089
|
-
}
|
1090
|
-
|
1091
|
-
return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
|
1077
|
+
return noko_xml_node_wrap(Qnil, sibling);
|
1092
1078
|
}
|
1093
1079
|
|
1094
1080
|
/* :nodoc: */
|
@@ -1504,9 +1490,44 @@ node_type(VALUE self)
|
|
1504
1490
|
|
1505
1491
|
/*
|
1506
1492
|
* call-seq:
|
1507
|
-
*
|
1493
|
+
* native_content=(input)
|
1494
|
+
*
|
1495
|
+
* Set the content of this node to +input+.
|
1496
|
+
*
|
1497
|
+
* [Parameters]
|
1498
|
+
* - +input+ (String) The new content for this node.
|
1499
|
+
*
|
1500
|
+
* ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
|
1501
|
+
* ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
|
1502
|
+
* will contain the entity-escaped version of the input (see example below). For Element and Attr
|
1503
|
+
* nodes, it treats the input as parsed content and expects it to be valid markup that is already
|
1504
|
+
* entity-escaped.
|
1505
|
+
*
|
1506
|
+
* 💡 Use Node#content= for a more consistent API across node types.
|
1507
|
+
*
|
1508
|
+
* [Example]
|
1509
|
+
* Note the behavior differences of this method between Text and Element nodes:
|
1508
1510
|
*
|
1509
|
-
*
|
1511
|
+
* doc = Nokogiri::HTML::Document.parse(<<~HTML)
|
1512
|
+
* <html>
|
1513
|
+
* <body>
|
1514
|
+
* <div id="first">asdf</div>
|
1515
|
+
* <div id="second">asdf</div>
|
1516
|
+
* HTML
|
1517
|
+
*
|
1518
|
+
* text_node = doc.at_css("div#first").children.first
|
1519
|
+
* div_node = doc.at_css("div#second")
|
1520
|
+
*
|
1521
|
+
* value = "You & Me"
|
1522
|
+
*
|
1523
|
+
* text_node.native_content = value
|
1524
|
+
* div_node.native_content = value
|
1525
|
+
*
|
1526
|
+
* doc.css("div").to_html
|
1527
|
+
* # => "<div id=\"first\">You &amp; Me</div>
|
1528
|
+
* # <div id=\"second\">You & Me</div>"
|
1529
|
+
*
|
1530
|
+
* See also: #content=
|
1510
1531
|
*/
|
1511
1532
|
static VALUE
|
1512
1533
|
set_native_content(VALUE self, VALUE content)
|
@@ -1817,12 +1838,12 @@ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
|
|
1817
1838
|
++next;
|
1818
1839
|
continue;
|
1819
1840
|
}
|
1820
|
-
output_partial_string(out, (char const *)start, next - start);
|
1841
|
+
output_partial_string(out, (char const *)start, (size_t)(next - start));
|
1821
1842
|
output_string(out, replacement);
|
1822
1843
|
next += replaced_bytes;
|
1823
1844
|
start = next;
|
1824
1845
|
}
|
1825
|
-
output_partial_string(out, (char const *)start, next - start);
|
1846
|
+
output_partial_string(out, (char const *)start, (size_t)(next - start));
|
1826
1847
|
}
|
1827
1848
|
|
1828
1849
|
static bool
|
@@ -1853,13 +1874,19 @@ is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
|
|
1853
1874
|
if (name == NULL) { // fragments don't have a name
|
1854
1875
|
return false;
|
1855
1876
|
}
|
1877
|
+
|
1878
|
+
if (node->ns != NULL) {
|
1879
|
+
// if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
|
1880
|
+
// matching against.
|
1881
|
+
return false;
|
1882
|
+
}
|
1883
|
+
|
1856
1884
|
for (size_t idx = 0; idx < num_tagnames; ++idx) {
|
1857
1885
|
if (!strcmp(name, tagnames[idx])) {
|
1858
1886
|
return true;
|
1859
1887
|
}
|
1860
1888
|
}
|
1861
1889
|
return false;
|
1862
|
-
|
1863
1890
|
}
|
1864
1891
|
|
1865
1892
|
static void
|
@@ -1887,17 +1914,7 @@ output_node(
|
|
1887
1914
|
// Add attributes.
|
1888
1915
|
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
|
1889
1916
|
output_char(out, ' ');
|
1890
|
-
|
1891
|
-
if (attr->children) {
|
1892
|
-
output_string(out, "=\"");
|
1893
|
-
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
|
1894
|
-
output_escaped_string(out, value, true);
|
1895
|
-
xmlFree(value);
|
1896
|
-
output_char(out, '"');
|
1897
|
-
} else {
|
1898
|
-
// Output name=""
|
1899
|
-
output_string(out, "=\"\"");
|
1900
|
-
}
|
1917
|
+
output_node(out, (xmlNodePtr)attr, preserve_newline);
|
1901
1918
|
}
|
1902
1919
|
output_char(out, '>');
|
1903
1920
|
|
@@ -1915,6 +1932,22 @@ output_node(
|
|
1915
1932
|
}
|
1916
1933
|
break;
|
1917
1934
|
|
1935
|
+
case XML_ATTRIBUTE_NODE: {
|
1936
|
+
xmlAttrPtr attr = (xmlAttrPtr)node;
|
1937
|
+
output_attr_name(out, attr);
|
1938
|
+
if (attr->children) {
|
1939
|
+
output_string(out, "=\"");
|
1940
|
+
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
|
1941
|
+
output_escaped_string(out, value, true);
|
1942
|
+
xmlFree(value);
|
1943
|
+
output_char(out, '"');
|
1944
|
+
} else {
|
1945
|
+
// Output name=""
|
1946
|
+
output_string(out, "=\"\"");
|
1947
|
+
}
|
1948
|
+
}
|
1949
|
+
break;
|
1950
|
+
|
1918
1951
|
case XML_TEXT_NODE:
|
1919
1952
|
if (node->parent
|
1920
1953
|
&& is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
|
@@ -2030,11 +2063,11 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
|
|
2030
2063
|
// libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
|
2031
2064
|
// search for "psvi" in SAX2.c and tree.c to learn more.
|
2032
2065
|
if (line_number < 65535) {
|
2033
|
-
c_node->line = (short)
|
2066
|
+
c_node->line = (short unsigned)line_number;
|
2034
2067
|
} else {
|
2035
2068
|
c_node->line = 65535;
|
2036
2069
|
if (c_node->type == XML_TEXT_NODE) {
|
2037
|
-
c_node->psvi = (void *)(ptrdiff_t)
|
2070
|
+
c_node->psvi = (void *)(ptrdiff_t)line_number;
|
2038
2071
|
}
|
2039
2072
|
}
|
2040
2073
|
|
@@ -2058,8 +2091,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
|
|
2058
2091
|
rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
|
2059
2092
|
}
|
2060
2093
|
if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
|
2061
|
-
// TODO:
|
2062
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
|
2094
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
|
2063
2095
|
}
|
2064
2096
|
Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
|
2065
2097
|
|
@@ -2095,7 +2127,7 @@ dump_html(VALUE self)
|
|
2095
2127
|
|
2096
2128
|
buf = xmlBufferCreate() ;
|
2097
2129
|
htmlNodeDump(buf, node->doc, node);
|
2098
|
-
html = NOKOGIRI_STR_NEW2(buf
|
2130
|
+
html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
|
2099
2131
|
xmlBufferFree(buf);
|
2100
2132
|
return html ;
|
2101
2133
|
}
|
@@ -2119,36 +2151,38 @@ compare(VALUE self, VALUE _other)
|
|
2119
2151
|
|
2120
2152
|
/*
|
2121
2153
|
* call-seq:
|
2122
|
-
* process_xincludes(
|
2154
|
+
* process_xincludes(flags)
|
2123
2155
|
*
|
2124
2156
|
* Loads and substitutes all xinclude elements below the node. The
|
2125
|
-
* parser context will be initialized with +
|
2157
|
+
* parser context will be initialized with +flags+.
|
2126
2158
|
*/
|
2127
2159
|
static VALUE
|
2128
|
-
|
2160
|
+
noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
|
2129
2161
|
{
|
2130
|
-
int
|
2131
|
-
xmlNodePtr
|
2132
|
-
VALUE
|
2162
|
+
int status ;
|
2163
|
+
xmlNodePtr c_node;
|
2164
|
+
VALUE rb_errors = rb_ary_new();
|
2165
|
+
libxmlStructuredErrorHandlerState handler_state;
|
2133
2166
|
|
2134
|
-
Noko_Node_Get_Struct(
|
2167
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
2135
2168
|
|
2136
|
-
|
2137
|
-
|
2138
|
-
|
2169
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
2170
|
+
|
2171
|
+
status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
|
2172
|
+
|
2173
|
+
noko__structured_error_func_restore(&handler_state);
|
2139
2174
|
|
2140
|
-
if (
|
2141
|
-
|
2175
|
+
if (status < 0) {
|
2176
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
2142
2177
|
|
2143
|
-
|
2144
|
-
|
2145
|
-
rb_exc_raise(Nokogiri_wrap_xml_syntax_error(error));
|
2178
|
+
if (RB_TEST(exception)) {
|
2179
|
+
rb_exc_raise(exception);
|
2146
2180
|
} else {
|
2147
2181
|
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
|
2148
2182
|
}
|
2149
2183
|
}
|
2150
2184
|
|
2151
|
-
return
|
2185
|
+
return rb_node;
|
2152
2186
|
}
|
2153
2187
|
|
2154
2188
|
|
@@ -2170,14 +2204,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2170
2204
|
node_children = node->children;
|
2171
2205
|
doc_children = node->doc->children;
|
2172
2206
|
|
2173
|
-
xmlSetStructuredErrorFunc((void *)err,
|
2174
|
-
|
2175
|
-
/* Twiddle global variable because of a bug in libxml2.
|
2176
|
-
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
|
2177
|
-
*/
|
2178
|
-
#ifndef HTML_PARSE_NOIMPLIED
|
2179
|
-
htmlHandleOmittedElem(0);
|
2180
|
-
#endif
|
2207
|
+
xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
|
2181
2208
|
|
2182
2209
|
/* This function adds a fake node to the child of +node+. If the parser
|
2183
2210
|
* does not exit cleanly with XML_ERR_OK, the list is freed. This can
|
@@ -2207,10 +2234,6 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2207
2234
|
child_iter = child_iter->next;
|
2208
2235
|
}
|
2209
2236
|
|
2210
|
-
#ifndef HTML_PARSE_NOIMPLIED
|
2211
|
-
htmlHandleOmittedElem(1);
|
2212
|
-
#endif
|
2213
|
-
|
2214
2237
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
2215
2238
|
|
2216
2239
|
/*
|
@@ -2259,6 +2282,15 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2259
2282
|
return noko_xml_node_set_wrap(set, doc);
|
2260
2283
|
}
|
2261
2284
|
|
2285
|
+
/* :nodoc: */
|
2286
|
+
VALUE
|
2287
|
+
rb_xml_node_data_ptr_eh(VALUE self)
|
2288
|
+
{
|
2289
|
+
xmlNodePtr c_node;
|
2290
|
+
Noko_Node_Get_Struct(self, xmlNode, c_node);
|
2291
|
+
return c_node ? Qtrue : Qfalse;
|
2292
|
+
}
|
2293
|
+
|
2262
2294
|
VALUE
|
2263
2295
|
noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
|
2264
2296
|
{
|
@@ -2324,8 +2356,8 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
|
|
2324
2356
|
}
|
2325
2357
|
}
|
2326
2358
|
|
2327
|
-
rb_node =
|
2328
|
-
|
2359
|
+
rb_node = _xml_node_alloc(rb_class);
|
2360
|
+
_xml_node_data_ptr_set(rb_node, c_node);
|
2329
2361
|
|
2330
2362
|
if (node_has_a_document) {
|
2331
2363
|
rb_document = DOC_RUBY_OBJECT(c_doc);
|
@@ -2361,7 +2393,7 @@ noko_init_xml_node(void)
|
|
2361
2393
|
{
|
2362
2394
|
cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
|
2363
2395
|
|
2364
|
-
|
2396
|
+
rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
|
2365
2397
|
|
2366
2398
|
rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
|
2367
2399
|
|
@@ -2375,8 +2407,8 @@ noko_init_xml_node(void)
|
|
2375
2407
|
rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
|
2376
2408
|
rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
|
2377
2409
|
rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
|
2410
|
+
rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
|
2378
2411
|
rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
|
2379
|
-
rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
|
2380
2412
|
rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
|
2381
2413
|
rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
|
2382
2414
|
rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
|
@@ -2405,6 +2437,8 @@ noko_init_xml_node(void)
|
|
2405
2437
|
rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
|
2406
2438
|
rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
|
2407
2439
|
|
2440
|
+
rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
|
2441
|
+
|
2408
2442
|
rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
|
2409
2443
|
rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
|
2410
2444
|
rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
|
@@ -2415,7 +2449,7 @@ noko_init_xml_node(void)
|
|
2415
2449
|
rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
|
2416
2450
|
rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
|
2417
2451
|
rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
|
2418
|
-
rb_define_private_method(cNokogiriXmlNode, "process_xincludes",
|
2452
|
+
rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
|
2419
2453
|
rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
|
2420
2454
|
rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
|
2421
2455
|
rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
|
data/ext/nokogiri/xml_node_set.c
CHANGED
@@ -68,15 +68,8 @@ xml_node_set_deallocate(void *data)
|
|
68
68
|
xmlFree(node_set);
|
69
69
|
}
|
70
70
|
|
71
|
-
|
72
|
-
static VALUE
|
73
|
-
xml_node_set_allocate(VALUE klass)
|
74
|
-
{
|
75
|
-
return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil);
|
76
|
-
}
|
77
|
-
|
78
71
|
static const rb_data_type_t xml_node_set_type = {
|
79
|
-
.wrap_struct_name = "
|
72
|
+
.wrap_struct_name = "xmlNodeSet",
|
80
73
|
.function = {
|
81
74
|
.dmark = xml_node_set_mark,
|
82
75
|
.dfree = xml_node_set_deallocate,
|
@@ -84,6 +77,33 @@ static const rb_data_type_t xml_node_set_type = {
|
|
84
77
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
85
78
|
};
|
86
79
|
|
80
|
+
static VALUE
|
81
|
+
xml_node_set_allocate(VALUE klass)
|
82
|
+
{
|
83
|
+
return TypedData_Wrap_Struct(klass, &xml_node_set_type, xmlXPathNodeSetCreate(NULL));
|
84
|
+
}
|
85
|
+
|
86
|
+
/* :nodoc: */
|
87
|
+
static VALUE
|
88
|
+
rb_xml_node_set_initialize_copy(VALUE rb_self, VALUE rb_other)
|
89
|
+
{
|
90
|
+
xmlNodeSetPtr c_self, c_other;
|
91
|
+
VALUE rb_document;
|
92
|
+
|
93
|
+
TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
|
94
|
+
TypedData_Get_Struct(rb_other, xmlNodeSet, &xml_node_set_type, c_other);
|
95
|
+
|
96
|
+
xmlXPathNodeSetMerge(c_self, c_other);
|
97
|
+
|
98
|
+
rb_document = rb_iv_get(rb_other, "@document");
|
99
|
+
if (!NIL_P(rb_document)) {
|
100
|
+
rb_iv_set(rb_self, "@document", rb_document);
|
101
|
+
rb_funcall(rb_document, decorate, 1, rb_self);
|
102
|
+
}
|
103
|
+
|
104
|
+
return rb_self;
|
105
|
+
}
|
106
|
+
|
87
107
|
static void
|
88
108
|
xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
|
89
109
|
{
|
@@ -112,27 +132,6 @@ xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
|
|
112
132
|
cur->nodeTab[cur->nodeNr] = NULL;
|
113
133
|
}
|
114
134
|
|
115
|
-
|
116
|
-
/*
|
117
|
-
* call-seq:
|
118
|
-
* dup
|
119
|
-
*
|
120
|
-
* Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not
|
121
|
-
* duplicated (similar to how Array and other Enumerable classes work).
|
122
|
-
*/
|
123
|
-
static VALUE
|
124
|
-
duplicate(VALUE rb_self)
|
125
|
-
{
|
126
|
-
xmlNodeSetPtr c_self;
|
127
|
-
xmlNodeSetPtr dupl;
|
128
|
-
|
129
|
-
TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
|
130
|
-
|
131
|
-
dupl = xmlXPathNodeSetMerge(NULL, c_self);
|
132
|
-
|
133
|
-
return noko_xml_node_set_wrap(dupl, rb_iv_get(rb_self, "@document"));
|
134
|
-
}
|
135
|
-
|
136
135
|
/*
|
137
136
|
* call-seq:
|
138
137
|
* length
|
@@ -453,19 +452,21 @@ noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document)
|
|
453
452
|
VALUE rb_node_set ;
|
454
453
|
|
455
454
|
if (c_node_set == NULL) {
|
456
|
-
|
455
|
+
rb_node_set = xml_node_set_allocate(cNokogiriXmlNodeSet);
|
456
|
+
} else {
|
457
|
+
rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
|
457
458
|
}
|
458
459
|
|
459
|
-
rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
|
460
|
-
|
461
460
|
if (!NIL_P(document)) {
|
462
461
|
rb_iv_set(rb_node_set, "@document", document);
|
463
462
|
rb_funcall(document, decorate, 1, rb_node_set);
|
464
463
|
}
|
465
464
|
|
466
|
-
|
467
|
-
|
468
|
-
|
465
|
+
if (c_node_set) {
|
466
|
+
/* create ruby objects for all the results, so they'll be marked during the GC mark phase */
|
467
|
+
for (j = 0 ; j < c_node_set->nodeNr ; j++) {
|
468
|
+
noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
|
469
|
+
}
|
469
470
|
}
|
470
471
|
|
471
472
|
return rb_node_set ;
|
@@ -499,18 +500,19 @@ noko_init_xml_node_set(void)
|
|
499
500
|
|
500
501
|
rb_define_alloc_func(cNokogiriXmlNodeSet, xml_node_set_allocate);
|
501
502
|
|
502
|
-
rb_define_method(cNokogiriXmlNodeSet, "
|
503
|
-
rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
|
504
|
-
rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
|
505
|
-
rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
|
506
|
-
rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
|
503
|
+
rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
|
507
504
|
rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1);
|
508
|
-
rb_define_method(cNokogiriXmlNodeSet, "
|
509
|
-
rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
|
510
|
-
rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0);
|
505
|
+
rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
|
511
506
|
rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1);
|
512
|
-
rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
|
513
507
|
rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1);
|
508
|
+
rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
|
509
|
+
rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
|
510
|
+
rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
|
511
|
+
rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
|
512
|
+
rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
|
513
|
+
rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
|
514
|
+
|
515
|
+
rb_define_private_method(cNokogiriXmlNodeSet, "initialize_copy", rb_xml_node_set_initialize_copy, 1);
|
514
516
|
|
515
517
|
decorate = rb_intern("decorate");
|
516
518
|
}
|