nokogiri 1.12.0 → 1.13.3
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +11 -62
- data/ext/nokogiri/extconf.rb +68 -37
- data/ext/nokogiri/gumbo.c +11 -11
- data/ext/nokogiri/html4_element_description.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +2 -1
- data/ext/nokogiri/nokogiri.c +1 -1
- data/ext/nokogiri/nokogiri.h +3 -0
- data/ext/nokogiri/xml_document.c +36 -36
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_namespace.c +2 -2
- data/ext/nokogiri/xml_node.c +647 -335
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/gumbo-parser/src/Makefile +20 -3
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +20 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +7 -2
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +84 -75
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +2 -1
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +24 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +6 -3
- data/lib/nokogiri/html5.rb +68 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +19 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +69 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +178 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +7 -4
- data/lib/nokogiri/xml/node.rb +512 -348
- data/lib/nokogiri/xml/node_set.rb +46 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +11 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +36 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +3 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0004-use-glibc-strlen.patch +3 -3
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2443 -1914
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-Revert-Different-approach-to-fix-quadratic-behavior.patch +45 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +104 -31
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
From 3ea8d08da310b645e37940eaae5cc28e251b155b Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Sat, 17 Jul 2021 14:36:53 -0400
|
4
|
+
Subject: [PATCH] htmlParseComment: handle abruptly-closed comments
|
5
|
+
|
6
|
+
See guidance provided on abrutply-closed comments here:
|
7
|
+
|
8
|
+
https://html.spec.whatwg.org/multipage/parsing.html#parse-error-abrupt-closing-of-empty-comment
|
9
|
+
---
|
10
|
+
HTMLparser.c | 11 +++++++++++
|
11
|
+
include/libxml/xmlerror.h | 1 +
|
12
|
+
2 files changed, 12 insertions(+)
|
13
|
+
|
14
|
+
diff --git a/HTMLparser.c b/HTMLparser.c
|
15
|
+
index b56363a..f0bf294 100644
|
16
|
+
--- a/HTMLparser.c
|
17
|
+
+++ b/HTMLparser.c
|
18
|
+
@@ -3485,10 +3485,20 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
19
|
+
q = CUR_CHAR(ql);
|
20
|
+
if (q == 0)
|
21
|
+
goto unfinished;
|
22
|
+
+ if (q == '>') {
|
23
|
+
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
24
|
+
+ cur = '>';
|
25
|
+
+ goto finished;
|
26
|
+
+ }
|
27
|
+
NEXTL(ql);
|
28
|
+
r = CUR_CHAR(rl);
|
29
|
+
if (r == 0)
|
30
|
+
goto unfinished;
|
31
|
+
+ if (q == '-' && r == '>') {
|
32
|
+
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
33
|
+
+ cur = '>';
|
34
|
+
+ goto finished;
|
35
|
+
+ }
|
36
|
+
NEXTL(rl);
|
37
|
+
cur = CUR_CHAR(l);
|
38
|
+
while ((cur != 0) &&
|
39
|
+
@@ -3536,6 +3546,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
40
|
+
cur = next;
|
41
|
+
l = nl;
|
42
|
+
}
|
43
|
+
+finished:
|
44
|
+
buf[len] = 0;
|
45
|
+
if (cur == '>') {
|
46
|
+
NEXT;
|
47
|
+
diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h
|
48
|
+
index c101997..7b68e40 100644
|
49
|
+
--- a/include/libxml/xmlerror.h
|
50
|
+
+++ b/include/libxml/xmlerror.h
|
51
|
+
@@ -209,6 +209,7 @@ typedef enum {
|
52
|
+
XML_ERR_VERSION_MISMATCH, /* 109 */
|
53
|
+
XML_ERR_NAME_TOO_LONG, /* 110 */
|
54
|
+
XML_ERR_USER_STOP, /* 111 */
|
55
|
+
+ XML_ERR_COMMENT_ABRUPTLY_ENDED, /* 112 */
|
56
|
+
XML_NS_ERR_XML_NAMESPACE = 200,
|
57
|
+
XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */
|
58
|
+
XML_NS_ERR_QNAME, /* 202 */
|
59
|
+
--
|
60
|
+
2.31.0
|
61
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Fri, 24 Dec 2021 19:08:01 -0500
|
4
|
+
Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
|
5
|
+
|
6
|
+
I'm not confident this is a bulletproof patch.
|
7
|
+
---
|
8
|
+
xpath.c | 24 ++++++++++++++++++------
|
9
|
+
1 file changed, 18 insertions(+), 6 deletions(-)
|
10
|
+
|
11
|
+
diff --git a/xpath.c b/xpath.c
|
12
|
+
index 1aa2f1a..c7f0885 100644
|
13
|
+
--- a/xpath.c
|
14
|
+
+++ b/xpath.c
|
15
|
+
@@ -146,6 +146,9 @@
|
16
|
+
#define XPATH_MAX_RECURSION_DEPTH 5000
|
17
|
+
#endif
|
18
|
+
|
19
|
+
+#define WILDCARD_PREFIX "*"
|
20
|
+
+#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
|
21
|
+
+
|
22
|
+
/*
|
23
|
+
* TODO:
|
24
|
+
* There are a few spots where some tests are done which depend upon ascii
|
25
|
+
@@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
|
26
|
+
SKIP_BLANKS;
|
27
|
+
|
28
|
+
if ((name == NULL) && (CUR == '*')) {
|
29
|
+
- /*
|
30
|
+
- * All elements
|
31
|
+
- */
|
32
|
+
NEXT;
|
33
|
+
- *test = NODE_TEST_ALL;
|
34
|
+
- return(NULL);
|
35
|
+
+ if (CUR != ':') {
|
36
|
+
+ /*
|
37
|
+
+ * All elements
|
38
|
+
+ */
|
39
|
+
+ *test = NODE_TEST_ALL;
|
40
|
+
+ return(NULL);
|
41
|
+
+ }
|
42
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
43
|
+
}
|
44
|
+
|
45
|
+
if (name == NULL)
|
46
|
+
@@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
|
47
|
+
}
|
48
|
+
#endif
|
49
|
+
if (CUR == '*') {
|
50
|
+
+ if (NXT(1) == ':') {
|
51
|
+
+ NEXT;
|
52
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
53
|
+
+ }
|
54
|
+
axis = AXIS_CHILD;
|
55
|
+
} else {
|
56
|
+
if (name == NULL)
|
57
|
+
@@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
58
|
+
/*
|
59
|
+
* Setup namespaces.
|
60
|
+
*/
|
61
|
+
- if (prefix != NULL) {
|
62
|
+
+ if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
|
63
|
+
URI = xmlXPathNsLookup(xpctxt, prefix);
|
64
|
+
if (URI == NULL) {
|
65
|
+
xmlXPathReleaseObject(xpctxt, obj);
|
66
|
+
@@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
67
|
+
{
|
68
|
+
XP_TEST_HIT
|
69
|
+
}
|
70
|
+
+ } else if (IS_WILDCARD_PREFIX(prefix)) {
|
71
|
+
+ XP_TEST_HIT
|
72
|
+
} else {
|
73
|
+
if ((cur->ns != NULL) &&
|
74
|
+
(xmlStrEqual(URI, cur->ns->href)))
|
75
|
+
--
|
76
|
+
2.31.0
|
77
|
+
|
@@ -0,0 +1,45 @@
|
|
1
|
+
From ddc5f3d22644e0f6fbcc20541c86825757ffee62 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Mon, 21 Feb 2022 18:27:45 -0500
|
4
|
+
Subject: [PATCH] Revert "Different approach to fix quadratic behavior in HTML
|
5
|
+
push parser"
|
6
|
+
|
7
|
+
This reverts commit 798bdf13f6964a650b9a0b7b4b3a769f6f1d509a.
|
8
|
+
---
|
9
|
+
HTMLparser.c | 14 +-------------
|
10
|
+
1 file changed, 1 insertion(+), 13 deletions(-)
|
11
|
+
|
12
|
+
diff --git a/HTMLparser.c b/HTMLparser.c
|
13
|
+
index eba2d7c..c0b8119 100644
|
14
|
+
--- a/HTMLparser.c
|
15
|
+
+++ b/HTMLparser.c
|
16
|
+
@@ -3960,25 +3960,13 @@ htmlParseStartTag(htmlParserCtxtPtr ctxt) {
|
17
|
+
htmlParseErr(ctxt, XML_ERR_NAME_REQUIRED,
|
18
|
+
"htmlParseStartTag: invalid element name\n",
|
19
|
+
NULL, NULL);
|
20
|
+
- /*
|
21
|
+
- * The recovery code is disabled for now as it can result in
|
22
|
+
- * quadratic behavior with the push parser. htmlParseStartTag
|
23
|
+
- * must consume all content up to the final '>' in order to avoid
|
24
|
+
- * rescanning for this terminator.
|
25
|
+
- *
|
26
|
+
- * For a proper fix in line with HTML5, htmlParseStartTag and
|
27
|
+
- * htmlParseElement should only be called when there's an ASCII
|
28
|
+
- * alpha character following the initial '<'. Otherwise, the '<'
|
29
|
+
- * should be emitted as text (unless followed by '!', '/' or '?').
|
30
|
+
- */
|
31
|
+
-#if 0
|
32
|
+
/* if recover preserve text on classic misconstructs */
|
33
|
+
if ((ctxt->recovery) && ((IS_BLANK_CH(CUR)) || (CUR == '<') ||
|
34
|
+
(CUR == '=') || (CUR == '>') || (((CUR >= '0') && (CUR <= '9'))))) {
|
35
|
+
htmlParseCharDataInternal(ctxt, '<');
|
36
|
+
return(-1);
|
37
|
+
}
|
38
|
+
-#endif
|
39
|
+
+
|
40
|
+
|
41
|
+
/* Dump the bogus tag like browsers do */
|
42
|
+
while ((CUR != 0) && (CUR != '>') &&
|
43
|
+
--
|
44
|
+
2.31.0
|
45
|
+
|