nokogiri 1.13.8 → 1.15.7
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +40 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +33 -15
- data/ext/nokogiri/extconf.rb +164 -46
- data/ext/nokogiri/gumbo.c +20 -10
- data/ext/nokogiri/html4_document.c +3 -4
- data/ext/nokogiri/html4_element_description.c +20 -15
- data/ext/nokogiri/html4_entity_lookup.c +2 -2
- data/ext/nokogiri/html4_sax_parser_context.c +11 -22
- data/ext/nokogiri/html4_sax_push_parser.c +3 -3
- data/ext/nokogiri/nokogiri.c +84 -75
- data/ext/nokogiri/nokogiri.h +31 -16
- data/ext/nokogiri/test_global_handlers.c +1 -1
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +2 -2
- data/ext/nokogiri/xml_cdata.c +32 -18
- data/ext/nokogiri/xml_comment.c +2 -2
- data/ext/nokogiri/xml_document.c +127 -34
- data/ext/nokogiri/xml_document_fragment.c +2 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_element_content.c +34 -31
- data/ext/nokogiri/xml_element_decl.c +7 -7
- data/ext/nokogiri/xml_encoding_handler.c +15 -7
- data/ext/nokogiri/xml_entity_decl.c +1 -1
- data/ext/nokogiri/xml_entity_reference.c +2 -2
- data/ext/nokogiri/xml_namespace.c +79 -14
- data/ext/nokogiri/xml_node.c +300 -34
- data/ext/nokogiri/xml_node_set.c +125 -107
- data/ext/nokogiri/xml_processing_instruction.c +2 -2
- data/ext/nokogiri/xml_reader.c +81 -48
- data/ext/nokogiri/xml_relax_ng.c +66 -81
- data/ext/nokogiri/xml_sax_parser.c +45 -20
- data/ext/nokogiri/xml_sax_parser_context.c +46 -30
- data/ext/nokogiri/xml_sax_push_parser.c +30 -11
- data/ext/nokogiri/xml_schema.c +95 -117
- data/ext/nokogiri/xml_syntax_error.c +1 -1
- data/ext/nokogiri/xml_text.c +28 -14
- data/ext/nokogiri/xml_xpath_context.c +216 -136
- data/ext/nokogiri/xslt_stylesheet.c +118 -64
- data/gumbo-parser/Makefile +10 -0
- data/gumbo-parser/src/attribute.h +1 -1
- data/gumbo-parser/src/error.c +10 -6
- data/gumbo-parser/src/error.h +1 -1
- data/gumbo-parser/src/foreign_attrs.c +15 -16
- data/gumbo-parser/src/foreign_attrs.gperf +1 -1
- data/gumbo-parser/src/{gumbo.h → nokogiri_gumbo.h} +1 -0
- data/gumbo-parser/src/parser.c +21 -5
- data/gumbo-parser/src/replacement.h +1 -1
- data/gumbo-parser/src/string_buffer.h +1 -1
- data/gumbo-parser/src/string_piece.c +1 -1
- data/gumbo-parser/src/svg_attrs.c +2 -2
- data/gumbo-parser/src/svg_tags.c +2 -2
- data/gumbo-parser/src/tag.c +2 -1
- data/gumbo-parser/src/tag_lookup.c +7 -7
- data/gumbo-parser/src/tag_lookup.gperf +1 -0
- data/gumbo-parser/src/tag_lookup.h +1 -1
- data/gumbo-parser/src/token_buffer.h +1 -1
- data/gumbo-parser/src/tokenizer.c +1 -1
- data/gumbo-parser/src/tokenizer.h +1 -1
- data/gumbo-parser/src/utf8.c +1 -1
- data/gumbo-parser/src/utf8.h +1 -1
- data/gumbo-parser/src/util.c +1 -3
- data/gumbo-parser/src/util.h +4 -0
- data/gumbo-parser/src/vector.h +1 -1
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +7 -5
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/decorators/slop.rb +1 -1
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +4 -3
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/document_fragment.rb +1 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +1827 -365
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +10 -3
- data/lib/nokogiri/html5/node.rb +8 -5
- data/lib/nokogiri/html5.rb +130 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/attribute_decl.rb +4 -2
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -55
- data/lib/nokogiri/xml/document_fragment.rb +50 -7
- data/lib/nokogiri/xml/element_content.rb +10 -2
- data/lib/nokogiri/xml/element_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +14 -4
- data/lib/nokogiri/xml/node.rb +212 -48
- data/lib/nokogiri/xml/node_set.rb +88 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +28 -15
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/document.rb +1 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xml/searchable.rb +18 -10
- data/lib/nokogiri/xslt.rb +74 -4
- data/lib/nokogiri.rb +15 -15
- data/lib/xsd/xmlparser/nokogiri.rb +4 -2
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.11.7.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
- metadata +19 -242
- data/patches/libxml2/0004-use-glibc-strlen.patch +0 -53
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +0 -81
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +0 -3040
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +0 -61
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +0 -3037
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
data/ext/nokogiri/xml_node.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
+
#include <stdbool.h>
|
4
|
+
|
3
5
|
// :stopdoc:
|
4
6
|
|
5
7
|
VALUE cNokogiriXmlNode ;
|
@@ -7,20 +9,11 @@ static ID id_decorate, id_decorate_bang;
|
|
7
9
|
|
8
10
|
typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
|
9
11
|
|
10
|
-
#ifdef DEBUG
|
11
12
|
static void
|
12
|
-
|
13
|
+
_xml_node_mark(void *ptr)
|
13
14
|
{
|
14
|
-
|
15
|
-
NOKOGIRI_DEBUG_END(x)
|
16
|
-
}
|
17
|
-
#else
|
18
|
-
# define _xml_node_dealloc 0
|
19
|
-
#endif
|
15
|
+
xmlNodePtr node = ptr;
|
20
16
|
|
21
|
-
static void
|
22
|
-
_xml_node_mark(xmlNodePtr node)
|
23
|
-
{
|
24
17
|
if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
|
25
18
|
return;
|
26
19
|
}
|
@@ -37,28 +30,25 @@ _xml_node_mark(xmlNodePtr node)
|
|
37
30
|
|
38
31
|
#ifdef HAVE_RB_GC_LOCATION
|
39
32
|
static void
|
40
|
-
_xml_node_update_references(
|
33
|
+
_xml_node_update_references(void *ptr)
|
41
34
|
{
|
35
|
+
xmlNodePtr node = ptr;
|
36
|
+
|
42
37
|
if (node->_private) {
|
43
38
|
node->_private = (void *)rb_gc_location((VALUE)node->_private);
|
44
39
|
}
|
45
40
|
}
|
41
|
+
#else
|
42
|
+
# define _xml_node_update_references 0
|
46
43
|
#endif
|
47
44
|
|
48
|
-
typedef void (*gc_callback_t)(void *);
|
49
|
-
|
50
45
|
static const rb_data_type_t nokogiri_node_type = {
|
51
|
-
"Nokogiri
|
52
|
-
{
|
53
|
-
|
54
|
-
|
55
|
-
(gc_callback_t)_xml_node_update_references
|
56
|
-
#endif
|
46
|
+
.wrap_struct_name = "Nokogiri::XML::Node",
|
47
|
+
.function = {
|
48
|
+
.dmark = _xml_node_mark,
|
49
|
+
.dcompact = _xml_node_update_references,
|
57
50
|
},
|
58
|
-
|
59
|
-
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
|
60
|
-
RUBY_TYPED_FREE_IMMEDIATELY,
|
61
|
-
#endif
|
51
|
+
.flags = RUBY_TYPED_FREE_IMMEDIATELY,
|
62
52
|
};
|
63
53
|
|
64
54
|
static void
|
@@ -360,7 +350,7 @@ ok:
|
|
360
350
|
|
361
351
|
xmlUnlinkNode(original_reparentee);
|
362
352
|
|
363
|
-
if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling
|
353
|
+
if (prf != xmlAddPrevSibling && prf != xmlAddNextSibling && prf != xmlAddChild
|
364
354
|
&& reparentee->type == XML_TEXT_NODE && pivot->next && pivot->next->type == XML_TEXT_NODE) {
|
365
355
|
/*
|
366
356
|
* libxml merges text nodes in a right-to-left fashion, meaning that if
|
@@ -809,7 +799,7 @@ rb_xml_node_pointer_id(VALUE self)
|
|
809
799
|
xmlNodePtr node;
|
810
800
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
811
801
|
|
812
|
-
return
|
802
|
+
return rb_uint2inum((uintptr_t)(node));
|
813
803
|
}
|
814
804
|
|
815
805
|
/*
|
@@ -994,7 +984,7 @@ duplicate_node(int argc, VALUE *argv, VALUE self)
|
|
994
984
|
if (n_args < 2) {
|
995
985
|
new_parent_doc = node->doc;
|
996
986
|
} else {
|
997
|
-
|
987
|
+
new_parent_doc = noko_xml_document_unwrap(r_new_parent_doc);
|
998
988
|
}
|
999
989
|
|
1000
990
|
dup = xmlDocCopyNode(node, new_parent_doc, level);
|
@@ -1353,7 +1343,7 @@ set_namespace(VALUE self, VALUE namespace)
|
|
1353
1343
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1354
1344
|
|
1355
1345
|
if (!NIL_P(namespace)) {
|
1356
|
-
|
1346
|
+
Noko_Namespace_Get_Struct(namespace, xmlNs, ns);
|
1357
1347
|
}
|
1358
1348
|
|
1359
1349
|
xmlSetNs(node, ns);
|
@@ -1509,7 +1499,7 @@ node_type(VALUE self)
|
|
1509
1499
|
{
|
1510
1500
|
xmlNodePtr node;
|
1511
1501
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1512
|
-
return INT2NUM(
|
1502
|
+
return INT2NUM(node->type);
|
1513
1503
|
}
|
1514
1504
|
|
1515
1505
|
/*
|
@@ -1724,6 +1714,275 @@ native_write_to(
|
|
1724
1714
|
return io;
|
1725
1715
|
}
|
1726
1716
|
|
1717
|
+
|
1718
|
+
static inline void
|
1719
|
+
output_partial_string(VALUE out, char const *str, size_t length)
|
1720
|
+
{
|
1721
|
+
if (length) {
|
1722
|
+
rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
|
1723
|
+
}
|
1724
|
+
}
|
1725
|
+
|
1726
|
+
static inline void
|
1727
|
+
output_char(VALUE out, char ch)
|
1728
|
+
{
|
1729
|
+
output_partial_string(out, &ch, 1);
|
1730
|
+
}
|
1731
|
+
|
1732
|
+
static inline void
|
1733
|
+
output_string(VALUE out, char const *str)
|
1734
|
+
{
|
1735
|
+
output_partial_string(out, str, strlen(str));
|
1736
|
+
}
|
1737
|
+
|
1738
|
+
static inline void
|
1739
|
+
output_tagname(VALUE out, xmlNodePtr elem)
|
1740
|
+
{
|
1741
|
+
// Elements in the HTML, MathML, and SVG namespaces do not use a namespace
|
1742
|
+
// prefix in the HTML syntax.
|
1743
|
+
char const *name = (char const *)elem->name;
|
1744
|
+
xmlNsPtr ns = elem->ns;
|
1745
|
+
if (ns && ns->href && ns->prefix
|
1746
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
|
1747
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
|
1748
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
|
1749
|
+
output_string(out, (char const *)elem->ns->prefix);
|
1750
|
+
output_char(out, ':');
|
1751
|
+
char const *colon = strchr(name, ':');
|
1752
|
+
if (colon) {
|
1753
|
+
name = colon + 1;
|
1754
|
+
}
|
1755
|
+
}
|
1756
|
+
output_string(out, name);
|
1757
|
+
}
|
1758
|
+
|
1759
|
+
static inline void
|
1760
|
+
output_attr_name(VALUE out, xmlAttrPtr attr)
|
1761
|
+
{
|
1762
|
+
xmlNsPtr ns = attr->ns;
|
1763
|
+
char const *name = (char const *)attr->name;
|
1764
|
+
if (ns && ns->href) {
|
1765
|
+
char const *uri = (char const *)ns->href;
|
1766
|
+
char const *localname = strchr(name, ':');
|
1767
|
+
if (localname) {
|
1768
|
+
++localname;
|
1769
|
+
} else {
|
1770
|
+
localname = name;
|
1771
|
+
}
|
1772
|
+
|
1773
|
+
if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
|
1774
|
+
output_string(out, "xml:");
|
1775
|
+
name = localname;
|
1776
|
+
} else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
|
1777
|
+
// xmlns:xmlns -> xmlns
|
1778
|
+
// xmlns:foo -> xmlns:foo
|
1779
|
+
if (strcmp(localname, "xmlns")) {
|
1780
|
+
output_string(out, "xmlns:");
|
1781
|
+
}
|
1782
|
+
name = localname;
|
1783
|
+
} else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
|
1784
|
+
output_string(out, "xlink:");
|
1785
|
+
name = localname;
|
1786
|
+
} else if (ns->prefix) {
|
1787
|
+
output_string(out, (char const *)ns->prefix);
|
1788
|
+
output_char(out, ':');
|
1789
|
+
name = localname;
|
1790
|
+
}
|
1791
|
+
}
|
1792
|
+
output_string(out, name);
|
1793
|
+
}
|
1794
|
+
|
1795
|
+
static void
|
1796
|
+
output_escaped_string(VALUE out, xmlChar const *start, bool attr)
|
1797
|
+
{
|
1798
|
+
xmlChar const *next = start;
|
1799
|
+
int ch;
|
1800
|
+
|
1801
|
+
while ((ch = *next) != 0) {
|
1802
|
+
char const *replacement = NULL;
|
1803
|
+
size_t replaced_bytes = 1;
|
1804
|
+
if (ch == '&') {
|
1805
|
+
replacement = "&";
|
1806
|
+
} else if (ch == 0xC2 && next[1] == 0xA0) {
|
1807
|
+
// U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
|
1808
|
+
replacement = " ";
|
1809
|
+
replaced_bytes = 2;
|
1810
|
+
} else if (attr && ch == '"') {
|
1811
|
+
replacement = """;
|
1812
|
+
} else if (!attr && ch == '<') {
|
1813
|
+
replacement = "<";
|
1814
|
+
} else if (!attr && ch == '>') {
|
1815
|
+
replacement = ">";
|
1816
|
+
} else {
|
1817
|
+
++next;
|
1818
|
+
continue;
|
1819
|
+
}
|
1820
|
+
output_partial_string(out, (char const *)start, next - start);
|
1821
|
+
output_string(out, replacement);
|
1822
|
+
next += replaced_bytes;
|
1823
|
+
start = next;
|
1824
|
+
}
|
1825
|
+
output_partial_string(out, (char const *)start, next - start);
|
1826
|
+
}
|
1827
|
+
|
1828
|
+
static bool
|
1829
|
+
should_prepend_newline(xmlNodePtr node)
|
1830
|
+
{
|
1831
|
+
char const *name = (char const *)node->name;
|
1832
|
+
xmlNodePtr child = node->children;
|
1833
|
+
|
1834
|
+
if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
|
1835
|
+
return false;
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
|
1839
|
+
}
|
1840
|
+
|
1841
|
+
static VALUE
|
1842
|
+
rb_prepend_newline(VALUE self)
|
1843
|
+
{
|
1844
|
+
xmlNodePtr node;
|
1845
|
+
Noko_Node_Get_Struct(self, xmlNode, node);
|
1846
|
+
return should_prepend_newline(node) ? Qtrue : Qfalse;
|
1847
|
+
}
|
1848
|
+
|
1849
|
+
static bool
|
1850
|
+
is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
|
1851
|
+
{
|
1852
|
+
char const *name = (char const *)node->name;
|
1853
|
+
if (name == NULL) { // fragments don't have a name
|
1854
|
+
return false;
|
1855
|
+
}
|
1856
|
+
|
1857
|
+
if (node->ns != NULL) {
|
1858
|
+
// if the node has a namespace, it's in a foreign context and is not one of the HTML tags we're
|
1859
|
+
// matching against.
|
1860
|
+
return false;
|
1861
|
+
}
|
1862
|
+
|
1863
|
+
for (size_t idx = 0; idx < num_tagnames; ++idx) {
|
1864
|
+
if (!strcmp(name, tagnames[idx])) {
|
1865
|
+
return true;
|
1866
|
+
}
|
1867
|
+
}
|
1868
|
+
return false;
|
1869
|
+
}
|
1870
|
+
|
1871
|
+
static void
|
1872
|
+
output_node(
|
1873
|
+
VALUE out,
|
1874
|
+
xmlNodePtr node,
|
1875
|
+
bool preserve_newline
|
1876
|
+
)
|
1877
|
+
{
|
1878
|
+
static char const *const VOID_ELEMENTS[] = {
|
1879
|
+
"area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
|
1880
|
+
"img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
|
1881
|
+
};
|
1882
|
+
|
1883
|
+
static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
|
1884
|
+
"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
|
1885
|
+
};
|
1886
|
+
|
1887
|
+
switch (node->type) {
|
1888
|
+
case XML_ELEMENT_NODE:
|
1889
|
+
// Serialize the start tag.
|
1890
|
+
output_char(out, '<');
|
1891
|
+
output_tagname(out, node);
|
1892
|
+
|
1893
|
+
// Add attributes.
|
1894
|
+
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
|
1895
|
+
output_char(out, ' ');
|
1896
|
+
output_attr_name(out, attr);
|
1897
|
+
if (attr->children) {
|
1898
|
+
output_string(out, "=\"");
|
1899
|
+
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
|
1900
|
+
output_escaped_string(out, value, true);
|
1901
|
+
xmlFree(value);
|
1902
|
+
output_char(out, '"');
|
1903
|
+
} else {
|
1904
|
+
// Output name=""
|
1905
|
+
output_string(out, "=\"\"");
|
1906
|
+
}
|
1907
|
+
}
|
1908
|
+
output_char(out, '>');
|
1909
|
+
|
1910
|
+
// Add children and end tag if element is not void.
|
1911
|
+
if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
|
1912
|
+
if (preserve_newline && should_prepend_newline(node)) {
|
1913
|
+
output_char(out, '\n');
|
1914
|
+
}
|
1915
|
+
for (xmlNodePtr child = node->children; child; child = child->next) {
|
1916
|
+
output_node(out, child, preserve_newline);
|
1917
|
+
}
|
1918
|
+
output_string(out, "</");
|
1919
|
+
output_tagname(out, node);
|
1920
|
+
output_char(out, '>');
|
1921
|
+
}
|
1922
|
+
break;
|
1923
|
+
|
1924
|
+
case XML_TEXT_NODE:
|
1925
|
+
if (node->parent
|
1926
|
+
&& is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
|
1927
|
+
sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
|
1928
|
+
output_string(out, (char const *)node->content);
|
1929
|
+
} else {
|
1930
|
+
output_escaped_string(out, node->content, false);
|
1931
|
+
}
|
1932
|
+
break;
|
1933
|
+
|
1934
|
+
case XML_CDATA_SECTION_NODE:
|
1935
|
+
output_string(out, "<![CDATA[");
|
1936
|
+
output_string(out, (char const *)node->content);
|
1937
|
+
output_string(out, "]]>");
|
1938
|
+
break;
|
1939
|
+
|
1940
|
+
case XML_COMMENT_NODE:
|
1941
|
+
output_string(out, "<!--");
|
1942
|
+
output_string(out, (char const *)node->content);
|
1943
|
+
output_string(out, "-->");
|
1944
|
+
break;
|
1945
|
+
|
1946
|
+
case XML_PI_NODE:
|
1947
|
+
output_string(out, "<?");
|
1948
|
+
output_string(out, (char const *)node->content);
|
1949
|
+
output_char(out, '>');
|
1950
|
+
break;
|
1951
|
+
|
1952
|
+
case XML_DOCUMENT_TYPE_NODE:
|
1953
|
+
case XML_DTD_NODE:
|
1954
|
+
output_string(out, "<!DOCTYPE ");
|
1955
|
+
output_string(out, (char const *)node->name);
|
1956
|
+
output_string(out, ">");
|
1957
|
+
break;
|
1958
|
+
|
1959
|
+
case XML_DOCUMENT_NODE:
|
1960
|
+
case XML_DOCUMENT_FRAG_NODE:
|
1961
|
+
case XML_HTML_DOCUMENT_NODE:
|
1962
|
+
for (xmlNodePtr child = node->children; child; child = child->next) {
|
1963
|
+
output_node(out, child, preserve_newline);
|
1964
|
+
}
|
1965
|
+
break;
|
1966
|
+
|
1967
|
+
default:
|
1968
|
+
rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
|
1969
|
+
break;
|
1970
|
+
}
|
1971
|
+
}
|
1972
|
+
|
1973
|
+
static VALUE
|
1974
|
+
html_standard_serialize(
|
1975
|
+
VALUE self,
|
1976
|
+
VALUE preserve_newline
|
1977
|
+
)
|
1978
|
+
{
|
1979
|
+
xmlNodePtr node;
|
1980
|
+
Noko_Node_Get_Struct(self, xmlNode, node);
|
1981
|
+
VALUE output = rb_str_buf_new(4096);
|
1982
|
+
output_node(output, node, RTEST(preserve_newline));
|
1983
|
+
return output;
|
1984
|
+
}
|
1985
|
+
|
1727
1986
|
/*
|
1728
1987
|
* :call-seq:
|
1729
1988
|
* line() → Integer
|
@@ -1757,7 +2016,7 @@ rb_xml_node_line(VALUE rb_node)
|
|
1757
2016
|
xmlNodePtr c_node;
|
1758
2017
|
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
1759
2018
|
|
1760
|
-
return
|
2019
|
+
return LONG2NUM(xmlGetLineNo(c_node));
|
1761
2020
|
}
|
1762
2021
|
|
1763
2022
|
/*
|
@@ -1860,7 +2119,7 @@ compare(VALUE self, VALUE _other)
|
|
1860
2119
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1861
2120
|
Noko_Node_Get_Struct(_other, xmlNode, other);
|
1862
2121
|
|
1863
|
-
return INT2NUM(
|
2122
|
+
return INT2NUM(xmlXPathCmpNodes(other, node));
|
1864
2123
|
}
|
1865
2124
|
|
1866
2125
|
|
@@ -1960,12 +2219,17 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
1960
2219
|
|
1961
2220
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
1962
2221
|
|
1963
|
-
/*
|
2222
|
+
/*
|
2223
|
+
* Workaround for a libxml2 bug where a parsing error may leave a broken
|
1964
2224
|
* node reference in node->doc->children.
|
2225
|
+
*
|
2226
|
+
* https://bugzilla.gnome.org/show_bug.cgi?id=668155
|
2227
|
+
*
|
1965
2228
|
* This workaround is limited to when a parse error occurs, the document
|
1966
2229
|
* went from having no children to having children, and the context node is
|
1967
2230
|
* part of a document fragment.
|
1968
|
-
*
|
2231
|
+
*
|
2232
|
+
* TODO: This was fixed in libxml 2.8.0 by 71a243d
|
1969
2233
|
*/
|
1970
2234
|
if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
|
1971
2235
|
child_iter = node;
|
@@ -2099,7 +2363,7 @@ noko_xml_node_attrs(xmlNodePtr c_node)
|
|
2099
2363
|
}
|
2100
2364
|
|
2101
2365
|
void
|
2102
|
-
noko_init_xml_node()
|
2366
|
+
noko_init_xml_node(void)
|
2103
2367
|
{
|
2104
2368
|
cNokogiriXmlNode = rb_define_class_under(mNokogiriXml, "Node", rb_cObject);
|
2105
2369
|
|
@@ -2155,6 +2419,8 @@ noko_init_xml_node()
|
|
2155
2419
|
rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
|
2156
2420
|
rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
|
2157
2421
|
rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
|
2422
|
+
rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
|
2423
|
+
rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
|
2158
2424
|
rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
|
2159
2425
|
rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
|
2160
2426
|
rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
|