nokogiri 1.15.7 → 1.18.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +12 -17
- data/LICENSE-DEPENDENCIES.md +6 -6
- data/README.md +11 -5
- data/dependencies.yml +9 -8
- data/ext/nokogiri/extconf.rb +191 -154
- data/ext/nokogiri/gumbo.c +69 -53
- data/ext/nokogiri/html4_document.c +10 -4
- data/ext/nokogiri/html4_element_description.c +18 -18
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +48 -58
- data/ext/nokogiri/html4_sax_push_parser.c +26 -25
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +9 -2
- data/ext/nokogiri/nokogiri.h +25 -33
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +1 -1
- data/ext/nokogiri/xml_cdata.c +3 -12
- data/ext/nokogiri/xml_comment.c +3 -8
- data/ext/nokogiri/xml_document.c +167 -156
- data/ext/nokogiri/xml_document_fragment.c +10 -25
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_element_content.c +9 -9
- data/ext/nokogiri/xml_encoding_handler.c +4 -4
- data/ext/nokogiri/xml_namespace.c +6 -10
- data/ext/nokogiri/xml_node.c +135 -107
- data/ext/nokogiri/xml_node_set.c +46 -44
- data/ext/nokogiri/xml_reader.c +74 -100
- data/ext/nokogiri/xml_relax_ng.c +35 -56
- data/ext/nokogiri/xml_sax_parser.c +156 -88
- data/ext/nokogiri/xml_sax_parser_context.c +220 -128
- data/ext/nokogiri/xml_sax_push_parser.c +69 -50
- data/ext/nokogiri/xml_schema.c +51 -87
- data/ext/nokogiri/xml_syntax_error.c +19 -11
- data/ext/nokogiri/xml_text.c +3 -6
- data/ext/nokogiri/xml_xpath_context.c +104 -104
- data/ext/nokogiri/xslt_stylesheet.c +16 -11
- data/gumbo-parser/Makefile +18 -0
- data/gumbo-parser/src/ascii.c +2 -2
- data/gumbo-parser/src/error.c +76 -48
- data/gumbo-parser/src/error.h +5 -1
- data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
- data/gumbo-parser/src/parser.c +66 -25
- data/gumbo-parser/src/tokenizer.c +7 -6
- data/lib/nokogiri/class_resolver.rb +1 -1
- data/lib/nokogiri/css/node.rb +6 -2
- data/lib/nokogiri/css/parser.rb +6 -4
- data/lib/nokogiri/css/parser.y +2 -2
- data/lib/nokogiri/css/parser_extras.rb +6 -66
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -4
- data/lib/nokogiri/css/tokenizer.rex +9 -8
- data/lib/nokogiri/css/xpath_visitor.rb +44 -27
- data/lib/nokogiri/css.rb +86 -20
- data/lib/nokogiri/decorators/slop.rb +3 -5
- data/lib/nokogiri/encoding_handler.rb +2 -2
- data/lib/nokogiri/html4/document.rb +45 -24
- data/lib/nokogiri/html4/document_fragment.rb +124 -12
- data/lib/nokogiri/html4/encoding_reader.rb +2 -2
- data/lib/nokogiri/html4/sax/parser.rb +23 -38
- data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
- data/lib/nokogiri/html4.rb +9 -14
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +61 -30
- data/lib/nokogiri/html5/document_fragment.rb +130 -20
- data/lib/nokogiri/html5/node.rb +4 -4
- data/lib/nokogiri/html5.rb +114 -138
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +6 -5
- data/lib/nokogiri/xml/attr.rb +2 -2
- data/lib/nokogiri/xml/builder.rb +8 -1
- data/lib/nokogiri/xml/document.rb +73 -29
- data/lib/nokogiri/xml/document_fragment.rb +86 -15
- data/lib/nokogiri/xml/namespace.rb +1 -2
- data/lib/nokogiri/xml/node.rb +113 -35
- data/lib/nokogiri/xml/node_set.rb +12 -10
- data/lib/nokogiri/xml/parse_options.rb +1 -1
- data/lib/nokogiri/xml/pp/node.rb +6 -1
- data/lib/nokogiri/xml/reader.rb +51 -17
- data/lib/nokogiri/xml/relax_ng.rb +57 -20
- data/lib/nokogiri/xml/sax/document.rb +174 -83
- data/lib/nokogiri/xml/sax/parser.rb +115 -41
- data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
- data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
- data/lib/nokogiri/xml/sax.rb +48 -0
- data/lib/nokogiri/xml/schema.rb +112 -45
- data/lib/nokogiri/xml/searchable.rb +39 -43
- data/lib/nokogiri/xml/syntax_error.rb +23 -1
- data/lib/nokogiri/xml/xpath_context.rb +14 -3
- data/lib/nokogiri/xml.rb +14 -25
- data/lib/nokogiri/xslt/stylesheet.rb +29 -7
- data/lib/nokogiri/xslt.rb +4 -10
- data/lib/nokogiri.rb +1 -1
- data/lib/xsd/xmlparser/nokogiri.rb +3 -4
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/ports/archives/libxml2-2.13.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.43.tar.xz +0 -0
- metadata +13 -12
- data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
- data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
data/ext/nokogiri/xml_node.c
CHANGED
@@ -28,7 +28,6 @@ _xml_node_mark(void *ptr)
|
|
28
28
|
}
|
29
29
|
}
|
30
30
|
|
31
|
-
#ifdef HAVE_RB_GC_LOCATION
|
32
31
|
static void
|
33
32
|
_xml_node_update_references(void *ptr)
|
34
33
|
{
|
@@ -38,12 +37,9 @@ _xml_node_update_references(void *ptr)
|
|
38
37
|
node->_private = (void *)rb_gc_location((VALUE)node->_private);
|
39
38
|
}
|
40
39
|
}
|
41
|
-
#else
|
42
|
-
# define _xml_node_update_references 0
|
43
|
-
#endif
|
44
40
|
|
45
|
-
static const rb_data_type_t
|
46
|
-
.wrap_struct_name = "
|
41
|
+
static const rb_data_type_t xml_node_type = {
|
42
|
+
.wrap_struct_name = "xmlNode",
|
47
43
|
.function = {
|
48
44
|
.dmark = _xml_node_mark,
|
49
45
|
.dcompact = _xml_node_update_references,
|
@@ -51,6 +47,24 @@ static const rb_data_type_t nokogiri_node_type = {
|
|
51
47
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
52
48
|
};
|
53
49
|
|
50
|
+
static VALUE
|
51
|
+
_xml_node_alloc(VALUE klass)
|
52
|
+
{
|
53
|
+
return TypedData_Wrap_Struct(klass, &xml_node_type, NULL);
|
54
|
+
}
|
55
|
+
|
56
|
+
static void
|
57
|
+
_xml_node_data_ptr_set(VALUE rb_node, xmlNodePtr c_node)
|
58
|
+
{
|
59
|
+
assert(DATA_PTR(rb_node) == NULL);
|
60
|
+
assert(c_node->_private == NULL);
|
61
|
+
|
62
|
+
DATA_PTR(rb_node) = c_node;
|
63
|
+
c_node->_private = (void *)rb_node;
|
64
|
+
|
65
|
+
return;
|
66
|
+
}
|
67
|
+
|
54
68
|
static void
|
55
69
|
relink_namespace(xmlNodePtr reparented)
|
56
70
|
{
|
@@ -145,7 +159,7 @@ relink_namespace(xmlNodePtr reparented)
|
|
145
159
|
/* reparent. */
|
146
160
|
if (NULL == reparented->ns) { return; }
|
147
161
|
|
148
|
-
/* When a node gets reparented, walk
|
162
|
+
/* When a node gets reparented, walk its children to make sure that */
|
149
163
|
/* their namespaces are reparented as well. */
|
150
164
|
child = reparented->children;
|
151
165
|
while (NULL != child) {
|
@@ -948,51 +962,30 @@ internal_subset(VALUE self)
|
|
948
962
|
return noko_xml_node_wrap(Qnil, (xmlNodePtr)dtd);
|
949
963
|
}
|
950
964
|
|
951
|
-
/*
|
952
|
-
* :call-seq:
|
953
|
-
* dup → Nokogiri::XML::Node
|
954
|
-
* dup(depth) → Nokogiri::XML::Node
|
955
|
-
* dup(depth, new_parent_doc) → Nokogiri::XML::Node
|
956
|
-
*
|
957
|
-
* Copy this node.
|
958
|
-
*
|
959
|
-
* [Parameters]
|
960
|
-
* - +depth+ 0 is a shallow copy, 1 (the default) is a deep copy.
|
961
|
-
* - +new_parent_doc+
|
962
|
-
* The new node's parent Document. Defaults to the this node's document.
|
963
|
-
*
|
964
|
-
* [Returns] The new Nokgiri::XML::Node
|
965
|
-
*/
|
965
|
+
/* :nodoc: */
|
966
966
|
static VALUE
|
967
|
-
|
967
|
+
rb_xml_node_initialize_copy_with_args(VALUE rb_self, VALUE rb_other, VALUE rb_level, VALUE rb_new_parent_doc)
|
968
968
|
{
|
969
|
-
|
970
|
-
int
|
971
|
-
|
972
|
-
|
973
|
-
xmlNodePtr node, dup;
|
969
|
+
xmlNodePtr c_self, c_other;
|
970
|
+
int c_level;
|
971
|
+
xmlDocPtr c_new_parent_doc;
|
972
|
+
VALUE rb_node_cache;
|
974
973
|
|
975
|
-
Noko_Node_Get_Struct(
|
974
|
+
Noko_Node_Get_Struct(rb_other, xmlNode, c_other);
|
975
|
+
c_level = (int)NUM2INT(rb_level);
|
976
|
+
c_new_parent_doc = noko_xml_document_unwrap(rb_new_parent_doc);
|
976
977
|
|
977
|
-
|
978
|
+
c_self = xmlDocCopyNode(c_other, c_new_parent_doc, c_level);
|
979
|
+
if (c_self == NULL) { return Qnil; }
|
978
980
|
|
979
|
-
|
980
|
-
|
981
|
-
}
|
982
|
-
level = (int)NUM2INT(r_level);
|
981
|
+
_xml_node_data_ptr_set(rb_self, c_self);
|
982
|
+
noko_xml_document_pin_node(c_self);
|
983
983
|
|
984
|
-
|
985
|
-
|
986
|
-
|
987
|
-
new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
|
988
|
-
}
|
989
|
-
|
990
|
-
dup = xmlDocCopyNode(node, new_parent_doc, level);
|
991
|
-
if (dup == NULL) { return Qnil; }
|
984
|
+
rb_node_cache = DOC_NODE_CACHE(c_new_parent_doc);
|
985
|
+
rb_ary_push(rb_node_cache, rb_self);
|
986
|
+
rb_funcall(rb_new_parent_doc, id_decorate, 1, rb_self);
|
992
987
|
|
993
|
-
|
994
|
-
|
995
|
-
return noko_xml_node_wrap(rb_obj_class(self), dup);
|
988
|
+
return rb_self;
|
996
989
|
}
|
997
990
|
|
998
991
|
/*
|
@@ -1078,17 +1071,10 @@ previous_element(VALUE self)
|
|
1078
1071
|
xmlNodePtr node, sibling;
|
1079
1072
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1080
1073
|
|
1081
|
-
|
1082
|
-
* note that we don't use xmlPreviousElementSibling here because it's buggy pre-2.7.7.
|
1083
|
-
*/
|
1084
|
-
sibling = node->prev;
|
1074
|
+
sibling = xmlPreviousElementSibling(node);
|
1085
1075
|
if (!sibling) { return Qnil; }
|
1086
1076
|
|
1087
|
-
|
1088
|
-
sibling = sibling->prev;
|
1089
|
-
}
|
1090
|
-
|
1091
|
-
return sibling ? noko_xml_node_wrap(Qnil, sibling) : Qnil ;
|
1077
|
+
return noko_xml_node_wrap(Qnil, sibling);
|
1092
1078
|
}
|
1093
1079
|
|
1094
1080
|
/* :nodoc: */
|
@@ -1504,9 +1490,44 @@ node_type(VALUE self)
|
|
1504
1490
|
|
1505
1491
|
/*
|
1506
1492
|
* call-seq:
|
1507
|
-
*
|
1493
|
+
* native_content=(input)
|
1494
|
+
*
|
1495
|
+
* Set the content of this node to +input+.
|
1496
|
+
*
|
1497
|
+
* [Parameters]
|
1498
|
+
* - +input+ (String) The new content for this node.
|
1508
1499
|
*
|
1509
|
-
*
|
1500
|
+
* ⚠ This method behaves differently depending on the node type. For Text, CDATA, Comment, and
|
1501
|
+
* ProcessingInstruction nodes, it treats the input as raw content, which means that the final DOM
|
1502
|
+
* will contain the entity-escaped version of the input (see example below). For Element and Attr
|
1503
|
+
* nodes, it treats the input as parsed content and expects it to be valid markup that is already
|
1504
|
+
* entity-escaped.
|
1505
|
+
*
|
1506
|
+
* 💡 Use Node#content= for a more consistent API across node types.
|
1507
|
+
*
|
1508
|
+
* [Example]
|
1509
|
+
* Note the behavior differences of this method between Text and Element nodes:
|
1510
|
+
*
|
1511
|
+
* doc = Nokogiri::HTML::Document.parse(<<~HTML)
|
1512
|
+
* <html>
|
1513
|
+
* <body>
|
1514
|
+
* <div id="first">asdf</div>
|
1515
|
+
* <div id="second">asdf</div>
|
1516
|
+
* HTML
|
1517
|
+
*
|
1518
|
+
* text_node = doc.at_css("div#first").children.first
|
1519
|
+
* div_node = doc.at_css("div#second")
|
1520
|
+
*
|
1521
|
+
* value = "You & Me"
|
1522
|
+
*
|
1523
|
+
* text_node.native_content = value
|
1524
|
+
* div_node.native_content = value
|
1525
|
+
*
|
1526
|
+
* doc.css("div").to_html
|
1527
|
+
* # => "<div id=\"first\">You &amp; Me</div>
|
1528
|
+
* # <div id=\"second\">You & Me</div>"
|
1529
|
+
*
|
1530
|
+
* See also: #content=
|
1510
1531
|
*/
|
1511
1532
|
static VALUE
|
1512
1533
|
set_native_content(VALUE self, VALUE content)
|
@@ -1817,12 +1838,12 @@ output_escaped_string(VALUE out, xmlChar const *start, bool attr)
|
|
1817
1838
|
++next;
|
1818
1839
|
continue;
|
1819
1840
|
}
|
1820
|
-
output_partial_string(out, (char const *)start, next - start);
|
1841
|
+
output_partial_string(out, (char const *)start, (size_t)(next - start));
|
1821
1842
|
output_string(out, replacement);
|
1822
1843
|
next += replaced_bytes;
|
1823
1844
|
start = next;
|
1824
1845
|
}
|
1825
|
-
output_partial_string(out, (char const *)start, next - start);
|
1846
|
+
output_partial_string(out, (char const *)start, (size_t)(next - start));
|
1826
1847
|
}
|
1827
1848
|
|
1828
1849
|
static bool
|
@@ -1893,17 +1914,7 @@ output_node(
|
|
1893
1914
|
// Add attributes.
|
1894
1915
|
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
|
1895
1916
|
output_char(out, ' ');
|
1896
|
-
|
1897
|
-
if (attr->children) {
|
1898
|
-
output_string(out, "=\"");
|
1899
|
-
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
|
1900
|
-
output_escaped_string(out, value, true);
|
1901
|
-
xmlFree(value);
|
1902
|
-
output_char(out, '"');
|
1903
|
-
} else {
|
1904
|
-
// Output name=""
|
1905
|
-
output_string(out, "=\"\"");
|
1906
|
-
}
|
1917
|
+
output_node(out, (xmlNodePtr)attr, preserve_newline);
|
1907
1918
|
}
|
1908
1919
|
output_char(out, '>');
|
1909
1920
|
|
@@ -1921,6 +1932,22 @@ output_node(
|
|
1921
1932
|
}
|
1922
1933
|
break;
|
1923
1934
|
|
1935
|
+
case XML_ATTRIBUTE_NODE: {
|
1936
|
+
xmlAttrPtr attr = (xmlAttrPtr)node;
|
1937
|
+
output_attr_name(out, attr);
|
1938
|
+
if (attr->children) {
|
1939
|
+
output_string(out, "=\"");
|
1940
|
+
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
|
1941
|
+
output_escaped_string(out, value, true);
|
1942
|
+
xmlFree(value);
|
1943
|
+
output_char(out, '"');
|
1944
|
+
} else {
|
1945
|
+
// Output name=""
|
1946
|
+
output_string(out, "=\"\"");
|
1947
|
+
}
|
1948
|
+
}
|
1949
|
+
break;
|
1950
|
+
|
1924
1951
|
case XML_TEXT_NODE:
|
1925
1952
|
if (node->parent
|
1926
1953
|
&& is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
|
@@ -2036,11 +2063,11 @@ rb_xml_node_line_set(VALUE rb_node, VALUE rb_line_number)
|
|
2036
2063
|
// libxml2 optionally uses xmlNode.psvi to store longer line numbers, but only for text nodes.
|
2037
2064
|
// search for "psvi" in SAX2.c and tree.c to learn more.
|
2038
2065
|
if (line_number < 65535) {
|
2039
|
-
c_node->line = (short)
|
2066
|
+
c_node->line = (short unsigned)line_number;
|
2040
2067
|
} else {
|
2041
2068
|
c_node->line = 65535;
|
2042
2069
|
if (c_node->type == XML_TEXT_NODE) {
|
2043
|
-
c_node->psvi = (void *)(ptrdiff_t)
|
2070
|
+
c_node->psvi = (void *)(ptrdiff_t)line_number;
|
2044
2071
|
}
|
2045
2072
|
}
|
2046
2073
|
|
@@ -2064,8 +2091,7 @@ rb_xml_node_new(int argc, VALUE *argv, VALUE klass)
|
|
2064
2091
|
rb_raise(rb_eArgError, "document must be a Nokogiri::XML::Node");
|
2065
2092
|
}
|
2066
2093
|
if (!rb_obj_is_kind_of(rb_document_node, cNokogiriXmlDocument)) {
|
2067
|
-
// TODO:
|
2068
|
-
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in a future release of Nokogiri.");
|
2094
|
+
NOKO_WARN_DEPRECATION("Passing a Node as the second parameter to Node.new is deprecated. Please pass a Document instead, or prefer an alternative constructor like Node#add_child. This will become an error in Nokogiri v1.17.0."); // TODO: deprecated in v1.13.0, remove in v1.17.0
|
2069
2095
|
}
|
2070
2096
|
Noko_Node_Get_Struct(rb_document_node, xmlNode, c_document_node);
|
2071
2097
|
|
@@ -2101,7 +2127,7 @@ dump_html(VALUE self)
|
|
2101
2127
|
|
2102
2128
|
buf = xmlBufferCreate() ;
|
2103
2129
|
htmlNodeDump(buf, node->doc, node);
|
2104
|
-
html = NOKOGIRI_STR_NEW2(buf
|
2130
|
+
html = NOKOGIRI_STR_NEW2(xmlBufferContent(buf));
|
2105
2131
|
xmlBufferFree(buf);
|
2106
2132
|
return html ;
|
2107
2133
|
}
|
@@ -2125,36 +2151,38 @@ compare(VALUE self, VALUE _other)
|
|
2125
2151
|
|
2126
2152
|
/*
|
2127
2153
|
* call-seq:
|
2128
|
-
* process_xincludes(
|
2154
|
+
* process_xincludes(flags)
|
2129
2155
|
*
|
2130
2156
|
* Loads and substitutes all xinclude elements below the node. The
|
2131
|
-
* parser context will be initialized with +
|
2157
|
+
* parser context will be initialized with +flags+.
|
2132
2158
|
*/
|
2133
2159
|
static VALUE
|
2134
|
-
|
2160
|
+
noko_xml_node__process_xincludes(VALUE rb_node, VALUE rb_flags)
|
2135
2161
|
{
|
2136
|
-
int
|
2137
|
-
xmlNodePtr
|
2138
|
-
VALUE
|
2162
|
+
int status ;
|
2163
|
+
xmlNodePtr c_node;
|
2164
|
+
VALUE rb_errors = rb_ary_new();
|
2165
|
+
libxmlStructuredErrorHandlerState handler_state;
|
2139
2166
|
|
2140
|
-
Noko_Node_Get_Struct(
|
2167
|
+
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
2141
2168
|
|
2142
|
-
|
2143
|
-
|
2144
|
-
|
2169
|
+
noko__structured_error_func_save_and_set(&handler_state, (void *)rb_errors, noko__error_array_pusher);
|
2170
|
+
|
2171
|
+
status = xmlXIncludeProcessTreeFlags(c_node, (int)NUM2INT(rb_flags));
|
2145
2172
|
|
2146
|
-
|
2147
|
-
xmlErrorPtr error;
|
2173
|
+
noko__structured_error_func_restore(&handler_state);
|
2148
2174
|
|
2149
|
-
|
2150
|
-
|
2151
|
-
|
2175
|
+
if (status < 0) {
|
2176
|
+
VALUE exception = rb_funcall(cNokogiriXmlSyntaxError, rb_intern("aggregate"), 1, rb_errors);
|
2177
|
+
|
2178
|
+
if (RB_TEST(exception)) {
|
2179
|
+
rb_exc_raise(exception);
|
2152
2180
|
} else {
|
2153
2181
|
rb_raise(rb_eRuntimeError, "Could not perform xinclude substitution");
|
2154
2182
|
}
|
2155
2183
|
}
|
2156
2184
|
|
2157
|
-
return
|
2185
|
+
return rb_node;
|
2158
2186
|
}
|
2159
2187
|
|
2160
2188
|
|
@@ -2176,14 +2204,7 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2176
2204
|
node_children = node->children;
|
2177
2205
|
doc_children = node->doc->children;
|
2178
2206
|
|
2179
|
-
xmlSetStructuredErrorFunc((void *)err,
|
2180
|
-
|
2181
|
-
/* Twiddle global variable because of a bug in libxml2.
|
2182
|
-
* http://git.gnome.org/browse/libxml2/commit/?id=e20fb5a72c83cbfc8e4a8aa3943c6be8febadab7
|
2183
|
-
*/
|
2184
|
-
#ifndef HTML_PARSE_NOIMPLIED
|
2185
|
-
htmlHandleOmittedElem(0);
|
2186
|
-
#endif
|
2207
|
+
xmlSetStructuredErrorFunc((void *)err, noko__error_array_pusher);
|
2187
2208
|
|
2188
2209
|
/* This function adds a fake node to the child of +node+. If the parser
|
2189
2210
|
* does not exit cleanly with XML_ERR_OK, the list is freed. This can
|
@@ -2213,10 +2234,6 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2213
2234
|
child_iter = child_iter->next;
|
2214
2235
|
}
|
2215
2236
|
|
2216
|
-
#ifndef HTML_PARSE_NOIMPLIED
|
2217
|
-
htmlHandleOmittedElem(1);
|
2218
|
-
#endif
|
2219
|
-
|
2220
2237
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
2221
2238
|
|
2222
2239
|
/*
|
@@ -2265,6 +2282,15 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
2265
2282
|
return noko_xml_node_set_wrap(set, doc);
|
2266
2283
|
}
|
2267
2284
|
|
2285
|
+
/* :nodoc: */
|
2286
|
+
VALUE
|
2287
|
+
rb_xml_node_data_ptr_eh(VALUE self)
|
2288
|
+
{
|
2289
|
+
xmlNodePtr c_node;
|
2290
|
+
Noko_Node_Get_Struct(self, xmlNode, c_node);
|
2291
|
+
return c_node ? Qtrue : Qfalse;
|
2292
|
+
}
|
2293
|
+
|
2268
2294
|
VALUE
|
2269
2295
|
noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
|
2270
2296
|
{
|
@@ -2330,8 +2356,8 @@ noko_xml_node_wrap(VALUE rb_class, xmlNodePtr c_node)
|
|
2330
2356
|
}
|
2331
2357
|
}
|
2332
2358
|
|
2333
|
-
rb_node =
|
2334
|
-
|
2359
|
+
rb_node = _xml_node_alloc(rb_class);
|
2360
|
+
_xml_node_data_ptr_set(rb_node, c_node);
|
2335
2361
|
|
2336
2362
|
if (node_has_a_document) {
|
2337
2363
|
rb_document = DOC_RUBY_OBJECT(c_doc);
|
@@ -2367,7 +2393,7 @@ noko_init_xml_node(void)
|
|
2367
2393
|
{
|
2368
2394
|
cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
|
2369
2395
|
|
2370
|
-
|
2396
|
+
rb_define_alloc_func(cNokogiriXmlNode, _xml_node_alloc);
|
2371
2397
|
|
2372
2398
|
rb_define_singleton_method(cNokogiriXmlNode, "new", rb_xml_node_new, -1);
|
2373
2399
|
|
@@ -2381,8 +2407,8 @@ noko_init_xml_node(void)
|
|
2381
2407
|
rb_define_method(cNokogiriXmlNode, "content", rb_xml_node_content, 0);
|
2382
2408
|
rb_define_method(cNokogiriXmlNode, "create_external_subset", create_external_subset, 3);
|
2383
2409
|
rb_define_method(cNokogiriXmlNode, "create_internal_subset", create_internal_subset, 3);
|
2410
|
+
rb_define_method(cNokogiriXmlNode, "data_ptr?", rb_xml_node_data_ptr_eh, 0);
|
2384
2411
|
rb_define_method(cNokogiriXmlNode, "document", rb_xml_node_document, 0);
|
2385
|
-
rb_define_method(cNokogiriXmlNode, "dup", duplicate_node, -1);
|
2386
2412
|
rb_define_method(cNokogiriXmlNode, "element_children", rb_xml_node_element_children, 0);
|
2387
2413
|
rb_define_method(cNokogiriXmlNode, "encode_special_chars", encode_special_chars, 1);
|
2388
2414
|
rb_define_method(cNokogiriXmlNode, "external_subset", external_subset, 0);
|
@@ -2411,6 +2437,8 @@ noko_init_xml_node(void)
|
|
2411
2437
|
rb_define_method(cNokogiriXmlNode, "previous_sibling", previous_sibling, 0);
|
2412
2438
|
rb_define_method(cNokogiriXmlNode, "unlink", unlink_node, 0);
|
2413
2439
|
|
2440
|
+
rb_define_protected_method(cNokogiriXmlNode, "initialize_copy_with_args", rb_xml_node_initialize_copy_with_args, 3);
|
2441
|
+
|
2414
2442
|
rb_define_private_method(cNokogiriXmlNode, "add_child_node", add_child, 1);
|
2415
2443
|
rb_define_private_method(cNokogiriXmlNode, "add_next_sibling_node", add_next_sibling, 1);
|
2416
2444
|
rb_define_private_method(cNokogiriXmlNode, "add_previous_sibling_node", add_previous_sibling, 1);
|
@@ -2421,7 +2449,7 @@ noko_init_xml_node(void)
|
|
2421
2449
|
rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
|
2422
2450
|
rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
|
2423
2451
|
rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
|
2424
|
-
rb_define_private_method(cNokogiriXmlNode, "process_xincludes",
|
2452
|
+
rb_define_private_method(cNokogiriXmlNode, "process_xincludes", noko_xml_node__process_xincludes, 1);
|
2425
2453
|
rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
|
2426
2454
|
rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
|
2427
2455
|
rb_define_private_method(cNokogiriXmlNode, "set_namespace", set_namespace, 1);
|
data/ext/nokogiri/xml_node_set.c
CHANGED
@@ -68,15 +68,8 @@ xml_node_set_deallocate(void *data)
|
|
68
68
|
xmlFree(node_set);
|
69
69
|
}
|
70
70
|
|
71
|
-
|
72
|
-
static VALUE
|
73
|
-
xml_node_set_allocate(VALUE klass)
|
74
|
-
{
|
75
|
-
return noko_xml_node_set_wrap(xmlXPathNodeSetCreate(NULL), Qnil);
|
76
|
-
}
|
77
|
-
|
78
71
|
static const rb_data_type_t xml_node_set_type = {
|
79
|
-
.wrap_struct_name = "
|
72
|
+
.wrap_struct_name = "xmlNodeSet",
|
80
73
|
.function = {
|
81
74
|
.dmark = xml_node_set_mark,
|
82
75
|
.dfree = xml_node_set_deallocate,
|
@@ -84,6 +77,33 @@ static const rb_data_type_t xml_node_set_type = {
|
|
84
77
|
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
85
78
|
};
|
86
79
|
|
80
|
+
static VALUE
|
81
|
+
xml_node_set_allocate(VALUE klass)
|
82
|
+
{
|
83
|
+
return TypedData_Wrap_Struct(klass, &xml_node_set_type, xmlXPathNodeSetCreate(NULL));
|
84
|
+
}
|
85
|
+
|
86
|
+
/* :nodoc: */
|
87
|
+
static VALUE
|
88
|
+
rb_xml_node_set_initialize_copy(VALUE rb_self, VALUE rb_other)
|
89
|
+
{
|
90
|
+
xmlNodeSetPtr c_self, c_other;
|
91
|
+
VALUE rb_document;
|
92
|
+
|
93
|
+
TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
|
94
|
+
TypedData_Get_Struct(rb_other, xmlNodeSet, &xml_node_set_type, c_other);
|
95
|
+
|
96
|
+
xmlXPathNodeSetMerge(c_self, c_other);
|
97
|
+
|
98
|
+
rb_document = rb_iv_get(rb_other, "@document");
|
99
|
+
if (!NIL_P(rb_document)) {
|
100
|
+
rb_iv_set(rb_self, "@document", rb_document);
|
101
|
+
rb_funcall(rb_document, decorate, 1, rb_self);
|
102
|
+
}
|
103
|
+
|
104
|
+
return rb_self;
|
105
|
+
}
|
106
|
+
|
87
107
|
static void
|
88
108
|
xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
|
89
109
|
{
|
@@ -112,27 +132,6 @@ xpath_node_set_del(xmlNodeSetPtr cur, xmlNodePtr val)
|
|
112
132
|
cur->nodeTab[cur->nodeNr] = NULL;
|
113
133
|
}
|
114
134
|
|
115
|
-
|
116
|
-
/*
|
117
|
-
* call-seq:
|
118
|
-
* dup
|
119
|
-
*
|
120
|
-
* Duplicate this NodeSet. Note that the Nodes contained in the NodeSet are not
|
121
|
-
* duplicated (similar to how Array and other Enumerable classes work).
|
122
|
-
*/
|
123
|
-
static VALUE
|
124
|
-
duplicate(VALUE rb_self)
|
125
|
-
{
|
126
|
-
xmlNodeSetPtr c_self;
|
127
|
-
xmlNodeSetPtr dupl;
|
128
|
-
|
129
|
-
TypedData_Get_Struct(rb_self, xmlNodeSet, &xml_node_set_type, c_self);
|
130
|
-
|
131
|
-
dupl = xmlXPathNodeSetMerge(NULL, c_self);
|
132
|
-
|
133
|
-
return noko_xml_node_set_wrap(dupl, rb_iv_get(rb_self, "@document"));
|
134
|
-
}
|
135
|
-
|
136
135
|
/*
|
137
136
|
* call-seq:
|
138
137
|
* length
|
@@ -453,19 +452,21 @@ noko_xml_node_set_wrap(xmlNodeSetPtr c_node_set, VALUE document)
|
|
453
452
|
VALUE rb_node_set ;
|
454
453
|
|
455
454
|
if (c_node_set == NULL) {
|
456
|
-
|
455
|
+
rb_node_set = xml_node_set_allocate(cNokogiriXmlNodeSet);
|
456
|
+
} else {
|
457
|
+
rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
|
457
458
|
}
|
458
459
|
|
459
|
-
rb_node_set = TypedData_Wrap_Struct(cNokogiriXmlNodeSet, &xml_node_set_type, c_node_set);
|
460
|
-
|
461
460
|
if (!NIL_P(document)) {
|
462
461
|
rb_iv_set(rb_node_set, "@document", document);
|
463
462
|
rb_funcall(document, decorate, 1, rb_node_set);
|
464
463
|
}
|
465
464
|
|
466
|
-
|
467
|
-
|
468
|
-
|
465
|
+
if (c_node_set) {
|
466
|
+
/* create ruby objects for all the results, so they'll be marked during the GC mark phase */
|
467
|
+
for (j = 0 ; j < c_node_set->nodeNr ; j++) {
|
468
|
+
noko_xml_node_wrap_node_set_result(c_node_set->nodeTab[j], rb_node_set);
|
469
|
+
}
|
469
470
|
}
|
470
471
|
|
471
472
|
return rb_node_set ;
|
@@ -499,18 +500,19 @@ noko_init_xml_node_set(void)
|
|
499
500
|
|
500
501
|
rb_define_alloc_func(cNokogiriXmlNodeSet, xml_node_set_allocate);
|
501
502
|
|
502
|
-
rb_define_method(cNokogiriXmlNodeSet, "
|
503
|
-
rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
|
504
|
-
rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
|
505
|
-
rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
|
506
|
-
rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
|
503
|
+
rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
|
507
504
|
rb_define_method(cNokogiriXmlNodeSet, "-", minus, 1);
|
508
|
-
rb_define_method(cNokogiriXmlNodeSet, "
|
509
|
-
rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
|
510
|
-
rb_define_method(cNokogiriXmlNodeSet, "dup", duplicate, 0);
|
505
|
+
rb_define_method(cNokogiriXmlNodeSet, "[]", slice, -1);
|
511
506
|
rb_define_method(cNokogiriXmlNodeSet, "delete", delete, 1);
|
512
|
-
rb_define_method(cNokogiriXmlNodeSet, "&", intersection, 1);
|
513
507
|
rb_define_method(cNokogiriXmlNodeSet, "include?", include_eh, 1);
|
508
|
+
rb_define_method(cNokogiriXmlNodeSet, "length", length, 0);
|
509
|
+
rb_define_method(cNokogiriXmlNodeSet, "push", push, 1);
|
510
|
+
rb_define_method(cNokogiriXmlNodeSet, "slice", slice, -1);
|
511
|
+
rb_define_method(cNokogiriXmlNodeSet, "to_a", to_array, 0);
|
512
|
+
rb_define_method(cNokogiriXmlNodeSet, "unlink", unlink_nodeset, 0);
|
513
|
+
rb_define_method(cNokogiriXmlNodeSet, "|", rb_xml_node_set_union, 1);
|
514
|
+
|
515
|
+
rb_define_private_method(cNokogiriXmlNodeSet, "initialize_copy", rb_xml_node_set_initialize_copy, 1);
|
514
516
|
|
515
517
|
decorate = rb_intern("decorate");
|
516
518
|
}
|