nokogiri 1.11.4 → 1.13.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +14 -11
- data/bin/nokogiri +63 -50
- data/dependencies.yml +11 -62
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +235 -126
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +6 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +70 -38
- data/ext/nokogiri/nokogiri.h +19 -9
- data/ext/nokogiri/xml_document.c +49 -49
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +31 -12
- data/ext/nokogiri/xml_namespace.c +4 -2
- data/ext/nokogiri/xml_node.c +759 -439
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +39 -11
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +109 -84
- data/ext/nokogiri/xslt_stylesheet.c +109 -10
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +361 -342
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +20 -20
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +49 -17
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +8 -3
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +37 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +88 -79
- data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +13 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +30 -14
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +69 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +178 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +7 -4
- data/lib/nokogiri/xml/node.rb +516 -351
- data/lib/nokogiri/xml/node_set.rb +46 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -6
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +44 -49
- data/lib/nokogiri/xml/sax/parser.rb +36 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +4 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +13 -1
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +36 -37
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +28 -20
- data/lib/nokogiri.rb +48 -43
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0004-use-glibc-strlen.patch +3 -3
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2443 -1914
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +198 -83
- data/lib/nokogiri/html/element_description_defaults.rb +0 -672
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
From 3ea8d08da310b645e37940eaae5cc28e251b155b Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Sat, 17 Jul 2021 14:36:53 -0400
|
4
|
+
Subject: [PATCH] htmlParseComment: handle abruptly-closed comments
|
5
|
+
|
6
|
+
See guidance provided on abrutply-closed comments here:
|
7
|
+
|
8
|
+
https://html.spec.whatwg.org/multipage/parsing.html#parse-error-abrupt-closing-of-empty-comment
|
9
|
+
---
|
10
|
+
HTMLparser.c | 11 +++++++++++
|
11
|
+
include/libxml/xmlerror.h | 1 +
|
12
|
+
2 files changed, 12 insertions(+)
|
13
|
+
|
14
|
+
diff --git a/HTMLparser.c b/HTMLparser.c
|
15
|
+
index b56363a..f0bf294 100644
|
16
|
+
--- a/HTMLparser.c
|
17
|
+
+++ b/HTMLparser.c
|
18
|
+
@@ -3485,10 +3485,20 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
19
|
+
q = CUR_CHAR(ql);
|
20
|
+
if (q == 0)
|
21
|
+
goto unfinished;
|
22
|
+
+ if (q == '>') {
|
23
|
+
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
24
|
+
+ cur = '>';
|
25
|
+
+ goto finished;
|
26
|
+
+ }
|
27
|
+
NEXTL(ql);
|
28
|
+
r = CUR_CHAR(rl);
|
29
|
+
if (r == 0)
|
30
|
+
goto unfinished;
|
31
|
+
+ if (q == '-' && r == '>') {
|
32
|
+
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
33
|
+
+ cur = '>';
|
34
|
+
+ goto finished;
|
35
|
+
+ }
|
36
|
+
NEXTL(rl);
|
37
|
+
cur = CUR_CHAR(l);
|
38
|
+
while ((cur != 0) &&
|
39
|
+
@@ -3536,6 +3546,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
40
|
+
cur = next;
|
41
|
+
l = nl;
|
42
|
+
}
|
43
|
+
+finished:
|
44
|
+
buf[len] = 0;
|
45
|
+
if (cur == '>') {
|
46
|
+
NEXT;
|
47
|
+
diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h
|
48
|
+
index c101997..7b68e40 100644
|
49
|
+
--- a/include/libxml/xmlerror.h
|
50
|
+
+++ b/include/libxml/xmlerror.h
|
51
|
+
@@ -209,6 +209,7 @@ typedef enum {
|
52
|
+
XML_ERR_VERSION_MISMATCH, /* 109 */
|
53
|
+
XML_ERR_NAME_TOO_LONG, /* 110 */
|
54
|
+
XML_ERR_USER_STOP, /* 111 */
|
55
|
+
+ XML_ERR_COMMENT_ABRUPTLY_ENDED, /* 112 */
|
56
|
+
XML_NS_ERR_XML_NAMESPACE = 200,
|
57
|
+
XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */
|
58
|
+
XML_NS_ERR_QNAME, /* 202 */
|
59
|
+
--
|
60
|
+
2.31.0
|
61
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Fri, 24 Dec 2021 19:08:01 -0500
|
4
|
+
Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
|
5
|
+
|
6
|
+
I'm not confident this is a bulletproof patch.
|
7
|
+
---
|
8
|
+
xpath.c | 24 ++++++++++++++++++------
|
9
|
+
1 file changed, 18 insertions(+), 6 deletions(-)
|
10
|
+
|
11
|
+
diff --git a/xpath.c b/xpath.c
|
12
|
+
index 1aa2f1a..c7f0885 100644
|
13
|
+
--- a/xpath.c
|
14
|
+
+++ b/xpath.c
|
15
|
+
@@ -146,6 +146,9 @@
|
16
|
+
#define XPATH_MAX_RECURSION_DEPTH 5000
|
17
|
+
#endif
|
18
|
+
|
19
|
+
+#define WILDCARD_PREFIX "*"
|
20
|
+
+#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
|
21
|
+
+
|
22
|
+
/*
|
23
|
+
* TODO:
|
24
|
+
* There are a few spots where some tests are done which depend upon ascii
|
25
|
+
@@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
|
26
|
+
SKIP_BLANKS;
|
27
|
+
|
28
|
+
if ((name == NULL) && (CUR == '*')) {
|
29
|
+
- /*
|
30
|
+
- * All elements
|
31
|
+
- */
|
32
|
+
NEXT;
|
33
|
+
- *test = NODE_TEST_ALL;
|
34
|
+
- return(NULL);
|
35
|
+
+ if (CUR != ':') {
|
36
|
+
+ /*
|
37
|
+
+ * All elements
|
38
|
+
+ */
|
39
|
+
+ *test = NODE_TEST_ALL;
|
40
|
+
+ return(NULL);
|
41
|
+
+ }
|
42
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
43
|
+
}
|
44
|
+
|
45
|
+
if (name == NULL)
|
46
|
+
@@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
|
47
|
+
}
|
48
|
+
#endif
|
49
|
+
if (CUR == '*') {
|
50
|
+
+ if (NXT(1) == ':') {
|
51
|
+
+ NEXT;
|
52
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
53
|
+
+ }
|
54
|
+
axis = AXIS_CHILD;
|
55
|
+
} else {
|
56
|
+
if (name == NULL)
|
57
|
+
@@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
58
|
+
/*
|
59
|
+
* Setup namespaces.
|
60
|
+
*/
|
61
|
+
- if (prefix != NULL) {
|
62
|
+
+ if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
|
63
|
+
URI = xmlXPathNsLookup(xpctxt, prefix);
|
64
|
+
if (URI == NULL) {
|
65
|
+
xmlXPathReleaseObject(xpctxt, obj);
|
66
|
+
@@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
67
|
+
{
|
68
|
+
XP_TEST_HIT
|
69
|
+
}
|
70
|
+
+ } else if (IS_WILDCARD_PREFIX(prefix)) {
|
71
|
+
+ XP_TEST_HIT
|
72
|
+
} else {
|
73
|
+
if ((cur->ns != NULL) &&
|
74
|
+
(xmlStrEqual(URI, cur->ns->href)))
|
75
|
+
--
|
76
|
+
2.31.0
|
77
|
+
|