nokogiri 1.13.10-java → 1.14.0-java
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +33 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +25 -7
- data/ext/java/nokogiri/Html4Document.java +2 -0
- data/ext/java/nokogiri/Html4ElementDescription.java +9 -9
- data/ext/java/nokogiri/Html4EntityLookup.java +14 -3
- data/ext/java/nokogiri/Html4SaxParserContext.java +2 -2
- data/ext/java/nokogiri/Html4SaxPushParser.java +3 -0
- data/ext/java/nokogiri/NokogiriService.java +1 -24
- data/ext/java/nokogiri/XmlAttr.java +1 -1
- data/ext/java/nokogiri/XmlAttributeDecl.java +2 -1
- data/ext/java/nokogiri/XmlCdata.java +2 -1
- data/ext/java/nokogiri/XmlComment.java +2 -1
- data/ext/java/nokogiri/XmlDocument.java +5 -6
- data/ext/java/nokogiri/XmlDocumentFragment.java +2 -1
- data/ext/java/nokogiri/XmlDtd.java +4 -3
- data/ext/java/nokogiri/XmlElement.java +1 -0
- data/ext/java/nokogiri/XmlElementContent.java +4 -1
- data/ext/java/nokogiri/XmlElementDecl.java +3 -1
- data/ext/java/nokogiri/XmlEntityDecl.java +2 -0
- data/ext/java/nokogiri/XmlEntityReference.java +1 -0
- data/ext/java/nokogiri/XmlNamespace.java +2 -0
- data/ext/java/nokogiri/XmlNode.java +39 -24
- data/ext/java/nokogiri/XmlNodeSet.java +10 -7
- data/ext/java/nokogiri/XmlProcessingInstruction.java +1 -0
- data/ext/java/nokogiri/XmlReader.java +4 -3
- data/ext/java/nokogiri/XmlRelaxng.java +1 -0
- data/ext/java/nokogiri/XmlSaxParserContext.java +1 -0
- data/ext/java/nokogiri/XmlSaxPushParser.java +3 -0
- data/ext/java/nokogiri/XmlSchema.java +4 -2
- data/ext/java/nokogiri/XmlSyntaxError.java +1 -0
- data/ext/java/nokogiri/XmlText.java +1 -0
- data/ext/java/nokogiri/XmlXpathContext.java +2 -0
- data/ext/java/nokogiri/XsltStylesheet.java +16 -13
- data/ext/java/nokogiri/internals/HtmlDomParserContext.java +3 -2
- data/ext/java/nokogiri/internals/NokogiriHandler.java +2 -2
- data/ext/java/nokogiri/internals/NokogiriHelpers.java +4 -5
- data/ext/java/nokogiri/internals/NokogiriXPathFunction.java +3 -3
- data/ext/java/nokogiri/internals/ParserContext.java +2 -0
- data/ext/java/nokogiri/internals/ReaderNode.java +1 -1
- data/ext/java/nokogiri/internals/SaveContextVisitor.java +4 -2
- data/ext/java/nokogiri/internals/SchemaErrorHandler.java +2 -2
- data/ext/java/nokogiri/internals/XmlDomParserContext.java +2 -1
- data/ext/java/nokogiri/internals/c14n/Canonicalizer.java +1 -0
- data/ext/java/nokogiri/internals/dom2dtm/DOM2DTM.java +5 -4
- data/ext/nokogiri/extconf.rb +80 -21
- data/ext/nokogiri/gumbo.c +19 -9
- data/ext/nokogiri/html4_document.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +0 -5
- data/ext/nokogiri/nokogiri.c +33 -51
- data/ext/nokogiri/xml_attribute_decl.c +1 -1
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +16 -11
- data/ext/nokogiri/xml_element_content.c +2 -2
- data/ext/nokogiri/xml_element_decl.c +1 -1
- data/ext/nokogiri/xml_encoding_handler.c +2 -2
- data/ext/nokogiri/xml_namespace.c +38 -8
- data/ext/nokogiri/xml_node.c +286 -26
- data/ext/nokogiri/xml_node_set.c +0 -2
- data/ext/nokogiri/xml_reader.c +40 -20
- data/ext/nokogiri/xml_relax_ng.c +0 -2
- data/ext/nokogiri/xml_sax_parser.c +22 -16
- data/ext/nokogiri/xml_sax_parser_context.c +0 -5
- data/ext/nokogiri/xml_sax_push_parser.c +0 -2
- data/ext/nokogiri/xml_schema.c +0 -2
- data/ext/nokogiri/xml_xpath_context.c +87 -83
- data/ext/nokogiri/xslt_stylesheet.c +14 -13
- data/gumbo-parser/Makefile +10 -0
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +5 -3
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +3 -2
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +9 -2
- data/lib/nokogiri/html5/node.rb +3 -5
- data/lib/nokogiri/html5.rb +127 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/{isorelax.jar → nokogiri/jruby/isorelax/isorelax/20030108/isorelax-20030108.jar} +0 -0
- data/lib/nokogiri/jruby/net/sf/saxon/Saxon-HE/9.6.0-4/Saxon-HE-9.6.0-4.jar +0 -0
- data/lib/nokogiri/jruby/net/sourceforge/htmlunit/neko-htmlunit/2.63.0/neko-htmlunit-2.63.0.jar +0 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/jruby/nu/validator/jing/20200702VNU/jing-20200702VNU.jar +0 -0
- data/lib/nokogiri/jruby/org/nokogiri/nekodtd/0.1.11.noko1/nekodtd-0.1.11.noko1.jar +0 -0
- data/lib/{serializer.jar → nokogiri/jruby/xalan/serializer/2.7.2/serializer-2.7.2.jar} +0 -0
- data/lib/{xalan.jar → nokogiri/jruby/xalan/xalan/2.7.2/xalan-2.7.2.jar} +0 -0
- data/lib/{xercesImpl.jar → nokogiri/jruby/xerces/xercesImpl/2.12.2/xercesImpl-2.12.2.jar} +0 -0
- data/lib/{xml-apis.jar → nokogiri/jruby/xml-apis/xml-apis/1.4.01/xml-apis-1.4.01.jar} +0 -0
- data/lib/nokogiri/nokogiri.jar +0 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -54
- data/lib/nokogiri/xml/document_fragment.rb +49 -6
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +6 -4
- data/lib/nokogiri/xml/node.rb +190 -35
- data/lib/nokogiri/xml/node_set.rb +87 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +6 -4
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +3 -11
- data/lib/xsd/xmlparser/nokogiri.rb +3 -1
- metadata +60 -272
- data/ext/java/nokogiri/EncodingHandler.java +0 -111
- data/lib/jing.jar +0 -0
- data/lib/nekodtd.jar +0 -0
- data/lib/nekohtml.jar +0 -0
@@ -32,7 +32,6 @@ _xml_namespace_dealloc(void *ptr)
|
|
32
32
|
* node set. see noko_xml_namespace_wrap().
|
33
33
|
*/
|
34
34
|
xmlNsPtr ns = ptr;
|
35
|
-
NOKOGIRI_DEBUG_START(ns) ;
|
36
35
|
|
37
36
|
if (ns->href) {
|
38
37
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->href));
|
@@ -41,7 +40,6 @@ _xml_namespace_dealloc(void *ptr)
|
|
41
40
|
xmlFree(DISCARD_CONST_QUAL_XMLCHAR(ns->prefix));
|
42
41
|
}
|
43
42
|
xmlFree(ns);
|
44
|
-
NOKOGIRI_DEBUG_END(ns) ;
|
45
43
|
}
|
46
44
|
|
47
45
|
#ifdef HAVE_RB_GC_LOCATION
|
@@ -76,10 +74,26 @@ static const rb_data_type_t nokogiri_xml_namespace_type_without_dealloc = {
|
|
76
74
|
};
|
77
75
|
|
78
76
|
/*
|
79
|
-
*
|
80
|
-
*
|
77
|
+
* :call-seq:
|
78
|
+
* prefix() → String or nil
|
81
79
|
*
|
82
|
-
*
|
80
|
+
* Return the prefix for this Namespace, or +nil+ if there is no prefix (e.g., default namespace).
|
81
|
+
*
|
82
|
+
* *Example*
|
83
|
+
*
|
84
|
+
* doc = Nokogiri::XML.parse(<<~XML)
|
85
|
+
* <?xml version="1.0"?>
|
86
|
+
* <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
87
|
+
* <child1 foo="abc" noko:bar="def"/>
|
88
|
+
* <noko:child2 foo="qwe" noko:bar="rty"/>
|
89
|
+
* </root>
|
90
|
+
* XML
|
91
|
+
*
|
92
|
+
* doc.root.elements.first.namespace.prefix
|
93
|
+
* # => nil
|
94
|
+
*
|
95
|
+
* doc.root.elements.last.namespace.prefix
|
96
|
+
* # => "noko"
|
83
97
|
*/
|
84
98
|
static VALUE
|
85
99
|
prefix(VALUE self)
|
@@ -93,10 +107,26 @@ prefix(VALUE self)
|
|
93
107
|
}
|
94
108
|
|
95
109
|
/*
|
96
|
-
*
|
97
|
-
*
|
110
|
+
* :call-seq:
|
111
|
+
* href() → String
|
112
|
+
*
|
113
|
+
* Returns the URI reference for this Namespace.
|
114
|
+
*
|
115
|
+
* *Example*
|
116
|
+
*
|
117
|
+
* doc = Nokogiri::XML.parse(<<~XML)
|
118
|
+
* <?xml version="1.0"?>
|
119
|
+
* <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
120
|
+
* <child1 foo="abc" noko:bar="def"/>
|
121
|
+
* <noko:child2 foo="qwe" noko:bar="rty"/>
|
122
|
+
* </root>
|
123
|
+
* XML
|
124
|
+
*
|
125
|
+
* doc.root.elements.first.namespace.href
|
126
|
+
* # => "http://nokogiri.org/ns/default"
|
98
127
|
*
|
99
|
-
*
|
128
|
+
* doc.root.elements.last.namespace.href
|
129
|
+
* # => "http://nokogiri.org/ns/noko"
|
100
130
|
*/
|
101
131
|
static VALUE
|
102
132
|
href(VALUE self)
|
data/ext/nokogiri/xml_node.c
CHANGED
@@ -1,5 +1,7 @@
|
|
1
1
|
#include <nokogiri.h>
|
2
2
|
|
3
|
+
#include <stdbool.h>
|
4
|
+
|
3
5
|
// :stopdoc:
|
4
6
|
|
5
7
|
VALUE cNokogiriXmlNode ;
|
@@ -7,20 +9,11 @@ static ID id_decorate, id_decorate_bang;
|
|
7
9
|
|
8
10
|
typedef xmlNodePtr(*pivot_reparentee_func)(xmlNodePtr, xmlNodePtr);
|
9
11
|
|
10
|
-
#ifdef DEBUG
|
11
12
|
static void
|
12
|
-
|
13
|
+
_xml_node_mark(void *ptr)
|
13
14
|
{
|
14
|
-
|
15
|
-
NOKOGIRI_DEBUG_END(x)
|
16
|
-
}
|
17
|
-
#else
|
18
|
-
# define _xml_node_dealloc 0
|
19
|
-
#endif
|
15
|
+
xmlNodePtr node = ptr;
|
20
16
|
|
21
|
-
static void
|
22
|
-
_xml_node_mark(xmlNodePtr node)
|
23
|
-
{
|
24
17
|
if (!DOC_RUBY_OBJECT_TEST(node->doc)) {
|
25
18
|
return;
|
26
19
|
}
|
@@ -37,24 +30,21 @@ _xml_node_mark(xmlNodePtr node)
|
|
37
30
|
|
38
31
|
#ifdef HAVE_RB_GC_LOCATION
|
39
32
|
static void
|
40
|
-
_xml_node_update_references(
|
33
|
+
_xml_node_update_references(void *ptr)
|
41
34
|
{
|
35
|
+
xmlNodePtr node = ptr;
|
36
|
+
|
42
37
|
if (node->_private) {
|
43
38
|
node->_private = (void *)rb_gc_location((VALUE)node->_private);
|
44
39
|
}
|
45
40
|
}
|
41
|
+
#else
|
42
|
+
# define _xml_node_update_references 0
|
46
43
|
#endif
|
47
44
|
|
48
|
-
typedef void (*gc_callback_t)(void *);
|
49
|
-
|
50
45
|
static const rb_data_type_t nokogiri_node_type = {
|
51
46
|
"Nokogiri/XMLNode",
|
52
|
-
{
|
53
|
-
(gc_callback_t)_xml_node_mark, (gc_callback_t)_xml_node_dealloc, 0,
|
54
|
-
#ifdef HAVE_RB_GC_LOCATION
|
55
|
-
(gc_callback_t)_xml_node_update_references
|
56
|
-
#endif
|
57
|
-
},
|
47
|
+
{_xml_node_mark, 0, 0, _xml_node_update_references},
|
58
48
|
0, 0,
|
59
49
|
#ifdef RUBY_TYPED_FREE_IMMEDIATELY
|
60
50
|
RUBY_TYPED_FREE_IMMEDIATELY,
|
@@ -809,7 +799,7 @@ rb_xml_node_pointer_id(VALUE self)
|
|
809
799
|
xmlNodePtr node;
|
810
800
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
811
801
|
|
812
|
-
return
|
802
|
+
return rb_uint2inum((uintptr_t)(node));
|
813
803
|
}
|
814
804
|
|
815
805
|
/*
|
@@ -1509,7 +1499,7 @@ node_type(VALUE self)
|
|
1509
1499
|
{
|
1510
1500
|
xmlNodePtr node;
|
1511
1501
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1512
|
-
return INT2NUM(
|
1502
|
+
return INT2NUM(node->type);
|
1513
1503
|
}
|
1514
1504
|
|
1515
1505
|
/*
|
@@ -1724,6 +1714,269 @@ native_write_to(
|
|
1724
1714
|
return io;
|
1725
1715
|
}
|
1726
1716
|
|
1717
|
+
|
1718
|
+
static inline void
|
1719
|
+
output_partial_string(VALUE out, char const *str, size_t length)
|
1720
|
+
{
|
1721
|
+
if (length) {
|
1722
|
+
rb_enc_str_buf_cat(out, str, (long)length, rb_utf8_encoding());
|
1723
|
+
}
|
1724
|
+
}
|
1725
|
+
|
1726
|
+
static inline void
|
1727
|
+
output_char(VALUE out, char ch)
|
1728
|
+
{
|
1729
|
+
output_partial_string(out, &ch, 1);
|
1730
|
+
}
|
1731
|
+
|
1732
|
+
static inline void
|
1733
|
+
output_string(VALUE out, char const *str)
|
1734
|
+
{
|
1735
|
+
output_partial_string(out, str, strlen(str));
|
1736
|
+
}
|
1737
|
+
|
1738
|
+
static inline void
|
1739
|
+
output_tagname(VALUE out, xmlNodePtr elem)
|
1740
|
+
{
|
1741
|
+
// Elements in the HTML, MathML, and SVG namespaces do not use a namespace
|
1742
|
+
// prefix in the HTML syntax.
|
1743
|
+
char const *name = (char const *)elem->name;
|
1744
|
+
xmlNsPtr ns = elem->ns;
|
1745
|
+
if (ns && ns->href && ns->prefix
|
1746
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/1999/xhtml")
|
1747
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/1998/Math/MathML")
|
1748
|
+
&& strcmp((char const *)ns->href, "http://www.w3.org/2000/svg")) {
|
1749
|
+
output_string(out, (char const *)elem->ns->prefix);
|
1750
|
+
output_char(out, ':');
|
1751
|
+
char const *colon = strchr(name, ':');
|
1752
|
+
if (colon) {
|
1753
|
+
name = colon + 1;
|
1754
|
+
}
|
1755
|
+
}
|
1756
|
+
output_string(out, name);
|
1757
|
+
}
|
1758
|
+
|
1759
|
+
static inline void
|
1760
|
+
output_attr_name(VALUE out, xmlAttrPtr attr)
|
1761
|
+
{
|
1762
|
+
xmlNsPtr ns = attr->ns;
|
1763
|
+
char const *name = (char const *)attr->name;
|
1764
|
+
if (ns && ns->href) {
|
1765
|
+
char const *uri = (char const *)ns->href;
|
1766
|
+
char const *localname = strchr(name, ':');
|
1767
|
+
if (localname) {
|
1768
|
+
++localname;
|
1769
|
+
} else {
|
1770
|
+
localname = name;
|
1771
|
+
}
|
1772
|
+
|
1773
|
+
if (!strcmp(uri, "http://www.w3.org/XML/1998/namespace")) {
|
1774
|
+
output_string(out, "xml:");
|
1775
|
+
name = localname;
|
1776
|
+
} else if (!strcmp(uri, "http://www.w3.org/2000/xmlns/")) {
|
1777
|
+
// xmlns:xmlns -> xmlns
|
1778
|
+
// xmlns:foo -> xmlns:foo
|
1779
|
+
if (strcmp(localname, "xmlns")) {
|
1780
|
+
output_string(out, "xmlns:");
|
1781
|
+
}
|
1782
|
+
name = localname;
|
1783
|
+
} else if (!strcmp(uri, "http://www.w3.org/1999/xlink")) {
|
1784
|
+
output_string(out, "xlink:");
|
1785
|
+
name = localname;
|
1786
|
+
} else if (ns->prefix) {
|
1787
|
+
output_string(out, (char const *)ns->prefix);
|
1788
|
+
output_char(out, ':');
|
1789
|
+
name = localname;
|
1790
|
+
}
|
1791
|
+
}
|
1792
|
+
output_string(out, name);
|
1793
|
+
}
|
1794
|
+
|
1795
|
+
static void
|
1796
|
+
output_escaped_string(VALUE out, xmlChar const *start, bool attr)
|
1797
|
+
{
|
1798
|
+
xmlChar const *next = start;
|
1799
|
+
int ch;
|
1800
|
+
|
1801
|
+
while ((ch = *next) != 0) {
|
1802
|
+
char const *replacement = NULL;
|
1803
|
+
size_t replaced_bytes = 1;
|
1804
|
+
if (ch == '&') {
|
1805
|
+
replacement = "&";
|
1806
|
+
} else if (ch == 0xC2 && next[1] == 0xA0) {
|
1807
|
+
// U+00A0 NO-BREAK SPACE has the UTF-8 encoding C2 A0.
|
1808
|
+
replacement = " ";
|
1809
|
+
replaced_bytes = 2;
|
1810
|
+
} else if (attr && ch == '"') {
|
1811
|
+
replacement = """;
|
1812
|
+
} else if (!attr && ch == '<') {
|
1813
|
+
replacement = "<";
|
1814
|
+
} else if (!attr && ch == '>') {
|
1815
|
+
replacement = ">";
|
1816
|
+
} else {
|
1817
|
+
++next;
|
1818
|
+
continue;
|
1819
|
+
}
|
1820
|
+
output_partial_string(out, (char const *)start, next - start);
|
1821
|
+
output_string(out, replacement);
|
1822
|
+
next += replaced_bytes;
|
1823
|
+
start = next;
|
1824
|
+
}
|
1825
|
+
output_partial_string(out, (char const *)start, next - start);
|
1826
|
+
}
|
1827
|
+
|
1828
|
+
static bool
|
1829
|
+
should_prepend_newline(xmlNodePtr node)
|
1830
|
+
{
|
1831
|
+
char const *name = (char const *)node->name;
|
1832
|
+
xmlNodePtr child = node->children;
|
1833
|
+
|
1834
|
+
if (!name || !child || (strcmp(name, "pre") && strcmp(name, "textarea") && strcmp(name, "listing"))) {
|
1835
|
+
return false;
|
1836
|
+
}
|
1837
|
+
|
1838
|
+
return child->type == XML_TEXT_NODE && child->content && child->content[0] == '\n';
|
1839
|
+
}
|
1840
|
+
|
1841
|
+
static VALUE
|
1842
|
+
rb_prepend_newline(VALUE self)
|
1843
|
+
{
|
1844
|
+
xmlNodePtr node;
|
1845
|
+
Noko_Node_Get_Struct(self, xmlNode, node);
|
1846
|
+
return should_prepend_newline(node) ? Qtrue : Qfalse;
|
1847
|
+
}
|
1848
|
+
|
1849
|
+
static bool
|
1850
|
+
is_one_of(xmlNodePtr node, char const *const *tagnames, size_t num_tagnames)
|
1851
|
+
{
|
1852
|
+
char const *name = (char const *)node->name;
|
1853
|
+
if (name == NULL) { // fragments don't have a name
|
1854
|
+
return false;
|
1855
|
+
}
|
1856
|
+
for (size_t idx = 0; idx < num_tagnames; ++idx) {
|
1857
|
+
if (!strcmp(name, tagnames[idx])) {
|
1858
|
+
return true;
|
1859
|
+
}
|
1860
|
+
}
|
1861
|
+
return false;
|
1862
|
+
|
1863
|
+
}
|
1864
|
+
|
1865
|
+
static void
|
1866
|
+
output_node(
|
1867
|
+
VALUE out,
|
1868
|
+
xmlNodePtr node,
|
1869
|
+
bool preserve_newline
|
1870
|
+
)
|
1871
|
+
{
|
1872
|
+
static char const *const VOID_ELEMENTS[] = {
|
1873
|
+
"area", "base", "basefont", "bgsound", "br", "col", "embed", "frame", "hr",
|
1874
|
+
"img", "input", "keygen", "link", "meta", "param", "source", "track", "wbr",
|
1875
|
+
};
|
1876
|
+
|
1877
|
+
static char const *const UNESCAPED_TEXT_ELEMENTS[] = {
|
1878
|
+
"style", "script", "xmp", "iframe", "noembed", "noframes", "plaintext", "noscript",
|
1879
|
+
};
|
1880
|
+
|
1881
|
+
switch (node->type) {
|
1882
|
+
case XML_ELEMENT_NODE:
|
1883
|
+
// Serialize the start tag.
|
1884
|
+
output_char(out, '<');
|
1885
|
+
output_tagname(out, node);
|
1886
|
+
|
1887
|
+
// Add attributes.
|
1888
|
+
for (xmlAttrPtr attr = node->properties; attr; attr = attr->next) {
|
1889
|
+
output_char(out, ' ');
|
1890
|
+
output_attr_name(out, attr);
|
1891
|
+
if (attr->children) {
|
1892
|
+
output_string(out, "=\"");
|
1893
|
+
xmlChar *value = xmlNodeListGetString(attr->doc, attr->children, 1);
|
1894
|
+
output_escaped_string(out, value, true);
|
1895
|
+
xmlFree(value);
|
1896
|
+
output_char(out, '"');
|
1897
|
+
} else {
|
1898
|
+
// Output name=""
|
1899
|
+
output_string(out, "=\"\"");
|
1900
|
+
}
|
1901
|
+
}
|
1902
|
+
output_char(out, '>');
|
1903
|
+
|
1904
|
+
// Add children and end tag if element is not void.
|
1905
|
+
if (!is_one_of(node, VOID_ELEMENTS, sizeof VOID_ELEMENTS / sizeof VOID_ELEMENTS[0])) {
|
1906
|
+
if (preserve_newline && should_prepend_newline(node)) {
|
1907
|
+
output_char(out, '\n');
|
1908
|
+
}
|
1909
|
+
for (xmlNodePtr child = node->children; child; child = child->next) {
|
1910
|
+
output_node(out, child, preserve_newline);
|
1911
|
+
}
|
1912
|
+
output_string(out, "</");
|
1913
|
+
output_tagname(out, node);
|
1914
|
+
output_char(out, '>');
|
1915
|
+
}
|
1916
|
+
break;
|
1917
|
+
|
1918
|
+
case XML_TEXT_NODE:
|
1919
|
+
if (node->parent
|
1920
|
+
&& is_one_of(node->parent, UNESCAPED_TEXT_ELEMENTS,
|
1921
|
+
sizeof UNESCAPED_TEXT_ELEMENTS / sizeof UNESCAPED_TEXT_ELEMENTS[0])) {
|
1922
|
+
output_string(out, (char const *)node->content);
|
1923
|
+
} else {
|
1924
|
+
output_escaped_string(out, node->content, false);
|
1925
|
+
}
|
1926
|
+
break;
|
1927
|
+
|
1928
|
+
case XML_CDATA_SECTION_NODE:
|
1929
|
+
output_string(out, "<![CDATA[");
|
1930
|
+
output_string(out, (char const *)node->content);
|
1931
|
+
output_string(out, "]]>");
|
1932
|
+
break;
|
1933
|
+
|
1934
|
+
case XML_COMMENT_NODE:
|
1935
|
+
output_string(out, "<!--");
|
1936
|
+
output_string(out, (char const *)node->content);
|
1937
|
+
output_string(out, "-->");
|
1938
|
+
break;
|
1939
|
+
|
1940
|
+
case XML_PI_NODE:
|
1941
|
+
output_string(out, "<?");
|
1942
|
+
output_string(out, (char const *)node->content);
|
1943
|
+
output_char(out, '>');
|
1944
|
+
break;
|
1945
|
+
|
1946
|
+
case XML_DOCUMENT_TYPE_NODE:
|
1947
|
+
case XML_DTD_NODE:
|
1948
|
+
output_string(out, "<!DOCTYPE ");
|
1949
|
+
output_string(out, (char const *)node->name);
|
1950
|
+
output_string(out, ">");
|
1951
|
+
break;
|
1952
|
+
|
1953
|
+
case XML_DOCUMENT_NODE:
|
1954
|
+
case XML_DOCUMENT_FRAG_NODE:
|
1955
|
+
case XML_HTML_DOCUMENT_NODE:
|
1956
|
+
for (xmlNodePtr child = node->children; child; child = child->next) {
|
1957
|
+
output_node(out, child, preserve_newline);
|
1958
|
+
}
|
1959
|
+
break;
|
1960
|
+
|
1961
|
+
default:
|
1962
|
+
rb_raise(rb_eRuntimeError, "Unsupported document node (%d); this is a bug in Nokogiri", node->type);
|
1963
|
+
break;
|
1964
|
+
}
|
1965
|
+
}
|
1966
|
+
|
1967
|
+
static VALUE
|
1968
|
+
html_standard_serialize(
|
1969
|
+
VALUE self,
|
1970
|
+
VALUE preserve_newline
|
1971
|
+
)
|
1972
|
+
{
|
1973
|
+
xmlNodePtr node;
|
1974
|
+
Noko_Node_Get_Struct(self, xmlNode, node);
|
1975
|
+
VALUE output = rb_str_buf_new(4096);
|
1976
|
+
output_node(output, node, RTEST(preserve_newline));
|
1977
|
+
return output;
|
1978
|
+
}
|
1979
|
+
|
1727
1980
|
/*
|
1728
1981
|
* :call-seq:
|
1729
1982
|
* line() → Integer
|
@@ -1757,7 +2010,7 @@ rb_xml_node_line(VALUE rb_node)
|
|
1757
2010
|
xmlNodePtr c_node;
|
1758
2011
|
Noko_Node_Get_Struct(rb_node, xmlNode, c_node);
|
1759
2012
|
|
1760
|
-
return
|
2013
|
+
return LONG2NUM(xmlGetLineNo(c_node));
|
1761
2014
|
}
|
1762
2015
|
|
1763
2016
|
/*
|
@@ -1860,7 +2113,7 @@ compare(VALUE self, VALUE _other)
|
|
1860
2113
|
Noko_Node_Get_Struct(self, xmlNode, node);
|
1861
2114
|
Noko_Node_Get_Struct(_other, xmlNode, other);
|
1862
2115
|
|
1863
|
-
return INT2NUM(
|
2116
|
+
return INT2NUM(xmlXPathCmpNodes(other, node));
|
1864
2117
|
}
|
1865
2118
|
|
1866
2119
|
|
@@ -1960,12 +2213,17 @@ in_context(VALUE self, VALUE _str, VALUE _options)
|
|
1960
2213
|
|
1961
2214
|
xmlSetStructuredErrorFunc(NULL, NULL);
|
1962
2215
|
|
1963
|
-
/*
|
2216
|
+
/*
|
2217
|
+
* Workaround for a libxml2 bug where a parsing error may leave a broken
|
1964
2218
|
* node reference in node->doc->children.
|
2219
|
+
*
|
2220
|
+
* https://bugzilla.gnome.org/show_bug.cgi?id=668155
|
2221
|
+
*
|
1965
2222
|
* This workaround is limited to when a parse error occurs, the document
|
1966
2223
|
* went from having no children to having children, and the context node is
|
1967
2224
|
* part of a document fragment.
|
1968
|
-
*
|
2225
|
+
*
|
2226
|
+
* TODO: This was fixed in libxml 2.8.0 by 71a243d
|
1969
2227
|
*/
|
1970
2228
|
if (error != XML_ERR_OK && doc_is_empty && node->doc->children != NULL) {
|
1971
2229
|
child_iter = node;
|
@@ -2155,6 +2413,8 @@ noko_init_xml_node()
|
|
2155
2413
|
rb_define_private_method(cNokogiriXmlNode, "get", get, 1);
|
2156
2414
|
rb_define_private_method(cNokogiriXmlNode, "in_context", in_context, 2);
|
2157
2415
|
rb_define_private_method(cNokogiriXmlNode, "native_write_to", native_write_to, 4);
|
2416
|
+
rb_define_private_method(cNokogiriXmlNode, "prepend_newline?", rb_prepend_newline, 0);
|
2417
|
+
rb_define_private_method(cNokogiriXmlNode, "html_standard_serialize", html_standard_serialize, 1);
|
2158
2418
|
rb_define_private_method(cNokogiriXmlNode, "process_xincludes", process_xincludes, 1);
|
2159
2419
|
rb_define_private_method(cNokogiriXmlNode, "replace_node", replace, 1);
|
2160
2420
|
rb_define_private_method(cNokogiriXmlNode, "set", set, 2);
|
data/ext/nokogiri/xml_node_set.c
CHANGED
@@ -88,13 +88,11 @@ deallocate(xmlNodeSetPtr node_set)
|
|
88
88
|
* For reasons outlined in xml_namespace.c, here we reproduce xmlXPathFreeNodeSet() except for the
|
89
89
|
* offending call to xmlXPathNodeSetFreeNs().
|
90
90
|
*/
|
91
|
-
NOKOGIRI_DEBUG_START(node_set) ;
|
92
91
|
if (node_set->nodeTab != NULL) {
|
93
92
|
xmlFree(node_set->nodeTab);
|
94
93
|
}
|
95
94
|
|
96
95
|
xmlFree(node_set);
|
97
|
-
NOKOGIRI_DEBUG_END(node_set) ;
|
98
96
|
}
|
99
97
|
|
100
98
|
|
data/ext/nokogiri/xml_reader.c
CHANGED
@@ -5,9 +5,7 @@ VALUE cNokogiriXmlReader;
|
|
5
5
|
static void
|
6
6
|
dealloc(xmlTextReaderPtr reader)
|
7
7
|
{
|
8
|
-
NOKOGIRI_DEBUG_START(reader);
|
9
8
|
xmlFreeTextReader(reader);
|
10
|
-
NOKOGIRI_DEBUG_END(reader);
|
11
9
|
}
|
12
10
|
|
13
11
|
static int
|
@@ -126,26 +124,37 @@ attributes_eh(VALUE self)
|
|
126
124
|
* Get a hash of namespaces for this Node
|
127
125
|
*/
|
128
126
|
static VALUE
|
129
|
-
|
127
|
+
rb_xml_reader_namespaces(VALUE rb_reader)
|
130
128
|
{
|
131
|
-
|
132
|
-
|
133
|
-
|
134
|
-
|
135
|
-
Data_Get_Struct(self, xmlTextReader, reader);
|
129
|
+
VALUE rb_namespaces = rb_hash_new() ;
|
130
|
+
xmlTextReaderPtr c_reader;
|
131
|
+
xmlNodePtr c_node;
|
132
|
+
VALUE rb_errors;
|
136
133
|
|
137
|
-
|
134
|
+
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
138
135
|
|
139
|
-
if (! has_attributes(
|
140
|
-
return
|
136
|
+
if (! has_attributes(c_reader)) {
|
137
|
+
return rb_namespaces ;
|
141
138
|
}
|
142
139
|
|
143
|
-
|
144
|
-
if (ptr == NULL) { return Qnil; }
|
140
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
145
141
|
|
146
|
-
|
142
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
143
|
+
c_node = xmlTextReaderExpand(c_reader);
|
144
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
147
145
|
|
148
|
-
|
146
|
+
if (c_node == NULL) {
|
147
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
148
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
149
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
150
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
151
|
+
}
|
152
|
+
return Qnil;
|
153
|
+
}
|
154
|
+
|
155
|
+
Nokogiri_xml_node_namespaces(c_node, rb_namespaces);
|
156
|
+
|
157
|
+
return rb_namespaces ;
|
149
158
|
}
|
150
159
|
|
151
160
|
/*
|
@@ -204,6 +213,7 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
204
213
|
xmlTextReaderPtr c_reader;
|
205
214
|
xmlNodePtr c_node;
|
206
215
|
xmlAttrPtr c_property;
|
216
|
+
VALUE rb_errors;
|
207
217
|
|
208
218
|
Data_Get_Struct(rb_reader, xmlTextReader, c_reader);
|
209
219
|
|
@@ -211,8 +221,18 @@ rb_xml_reader_attribute_hash(VALUE rb_reader)
|
|
211
221
|
return rb_attributes;
|
212
222
|
}
|
213
223
|
|
224
|
+
rb_errors = rb_funcall(rb_reader, rb_intern("errors"), 0);
|
225
|
+
|
226
|
+
xmlSetStructuredErrorFunc((void *)rb_errors, Nokogiri_error_array_pusher);
|
214
227
|
c_node = xmlTextReaderExpand(c_reader);
|
228
|
+
xmlSetStructuredErrorFunc(NULL, NULL);
|
229
|
+
|
215
230
|
if (c_node == NULL) {
|
231
|
+
if (RARRAY_LEN(rb_errors) > 0) {
|
232
|
+
VALUE rb_error = rb_ary_entry(rb_errors, 0);
|
233
|
+
VALUE exception_message = rb_funcall(rb_error, rb_intern("to_s"), 0);
|
234
|
+
rb_exc_raise(rb_class_new_instance(1, &exception_message, cNokogiriXmlSyntaxError));
|
235
|
+
}
|
216
236
|
return Qnil;
|
217
237
|
}
|
218
238
|
|
@@ -306,7 +326,7 @@ attribute_count(VALUE self)
|
|
306
326
|
count = xmlTextReaderAttributeCount(reader);
|
307
327
|
if (count == -1) { return Qnil; }
|
308
328
|
|
309
|
-
return INT2NUM(
|
329
|
+
return INT2NUM(count);
|
310
330
|
}
|
311
331
|
|
312
332
|
/*
|
@@ -325,7 +345,7 @@ depth(VALUE self)
|
|
325
345
|
depth = xmlTextReaderDepth(reader);
|
326
346
|
if (depth == -1) { return Qnil; }
|
327
347
|
|
328
|
-
return INT2NUM(
|
348
|
+
return INT2NUM(depth);
|
329
349
|
}
|
330
350
|
|
331
351
|
/*
|
@@ -498,7 +518,7 @@ state(VALUE self)
|
|
498
518
|
{
|
499
519
|
xmlTextReaderPtr reader;
|
500
520
|
Data_Get_Struct(self, xmlTextReader, reader);
|
501
|
-
return INT2NUM(
|
521
|
+
return INT2NUM(xmlTextReaderReadState(reader));
|
502
522
|
}
|
503
523
|
|
504
524
|
/*
|
@@ -512,7 +532,7 @@ node_type(VALUE self)
|
|
512
532
|
{
|
513
533
|
xmlTextReaderPtr reader;
|
514
534
|
Data_Get_Struct(self, xmlTextReader, reader);
|
515
|
-
return INT2NUM(
|
535
|
+
return INT2NUM(xmlTextReaderNodeType(reader));
|
516
536
|
}
|
517
537
|
|
518
538
|
/*
|
@@ -762,7 +782,7 @@ noko_init_xml_reader()
|
|
762
782
|
rb_define_method(cNokogiriXmlReader, "local_name", local_name, 0);
|
763
783
|
rb_define_method(cNokogiriXmlReader, "name", name, 0);
|
764
784
|
rb_define_method(cNokogiriXmlReader, "namespace_uri", namespace_uri, 0);
|
765
|
-
rb_define_method(cNokogiriXmlReader, "namespaces",
|
785
|
+
rb_define_method(cNokogiriXmlReader, "namespaces", rb_xml_reader_namespaces, 0);
|
766
786
|
rb_define_method(cNokogiriXmlReader, "node_type", node_type, 0);
|
767
787
|
rb_define_method(cNokogiriXmlReader, "outer_xml", outer_xml, 0);
|
768
788
|
rb_define_method(cNokogiriXmlReader, "prefix", prefix, 0);
|
data/ext/nokogiri/xml_relax_ng.c
CHANGED
@@ -195,40 +195,48 @@ comment_func(void *ctx, const xmlChar *value)
|
|
195
195
|
rb_funcall(doc, id_comment, 1, str);
|
196
196
|
}
|
197
197
|
|
198
|
+
PRINTFLIKE_DECL(2, 3)
|
198
199
|
static void
|
199
200
|
warning_func(void *ctx, const char *msg, ...)
|
200
201
|
{
|
201
202
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
202
203
|
VALUE doc = rb_iv_get(self, "@document");
|
203
|
-
|
204
|
-
VALUE ruby_message;
|
204
|
+
VALUE rb_message;
|
205
205
|
|
206
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
207
|
+
/* It is not currently possible to pass var args from native
|
208
|
+
functions to sulong, so we work around the issue here. */
|
209
|
+
rb_message = rb_sprintf("warning_func: %s", msg);
|
210
|
+
#else
|
206
211
|
va_list args;
|
207
212
|
va_start(args, msg);
|
208
|
-
|
213
|
+
rb_message = rb_vsprintf(msg, args);
|
209
214
|
va_end(args);
|
215
|
+
#endif
|
210
216
|
|
211
|
-
|
212
|
-
free(message);
|
213
|
-
rb_funcall(doc, id_warning, 1, ruby_message);
|
217
|
+
rb_funcall(doc, id_warning, 1, rb_message);
|
214
218
|
}
|
215
219
|
|
220
|
+
PRINTFLIKE_DECL(2, 3)
|
216
221
|
static void
|
217
222
|
error_func(void *ctx, const char *msg, ...)
|
218
223
|
{
|
219
224
|
VALUE self = NOKOGIRI_SAX_SELF(ctx);
|
220
225
|
VALUE doc = rb_iv_get(self, "@document");
|
221
|
-
|
222
|
-
VALUE ruby_message;
|
226
|
+
VALUE rb_message;
|
223
227
|
|
228
|
+
#ifdef TRUFFLERUBY_NOKOGIRI_SYSTEM_LIBRARIES
|
229
|
+
/* It is not currently possible to pass var args from native
|
230
|
+
functions to sulong, so we work around the issue here. */
|
231
|
+
rb_message = rb_sprintf("error_func: %s", msg);
|
232
|
+
#else
|
224
233
|
va_list args;
|
225
234
|
va_start(args, msg);
|
226
|
-
|
235
|
+
rb_message = rb_vsprintf(msg, args);
|
227
236
|
va_end(args);
|
237
|
+
#endif
|
228
238
|
|
229
|
-
|
230
|
-
free(message);
|
231
|
-
rb_funcall(doc, id_error, 1, ruby_message);
|
239
|
+
rb_funcall(doc, id_error, 1, rb_message);
|
232
240
|
}
|
233
241
|
|
234
242
|
static void
|
@@ -260,15 +268,13 @@ processing_instruction(void *ctx, const xmlChar *name, const xmlChar *content)
|
|
260
268
|
static void
|
261
269
|
deallocate(xmlSAXHandlerPtr handler)
|
262
270
|
{
|
263
|
-
|
264
|
-
free(handler);
|
265
|
-
NOKOGIRI_DEBUG_END(handler);
|
271
|
+
ruby_xfree(handler);
|
266
272
|
}
|
267
273
|
|
268
274
|
static VALUE
|
269
275
|
allocate(VALUE klass)
|
270
276
|
{
|
271
|
-
xmlSAXHandlerPtr handler =
|
277
|
+
xmlSAXHandlerPtr handler = ruby_xcalloc((size_t)1, sizeof(xmlSAXHandler));
|
272
278
|
|
273
279
|
handler->startDocument = start_document;
|
274
280
|
handler->endDocument = end_document;
|