nokogiri 1.10.3 → 1.13.2
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +178 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +11 -60
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +752 -423
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +191 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +291 -219
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +56 -50
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +60 -51
- data/ext/nokogiri/xml_node.c +1001 -610
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +226 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +105 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +223 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +52 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +99 -103
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +14 -15
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +88 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +96 -0
- data/lib/nokogiri/html5.rb +477 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +221 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +95 -53
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +219 -86
- data/lib/nokogiri/xml/document_fragment.rb +46 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +876 -376
- data/lib/nokogiri/xml/node_set.rb +47 -54
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +21 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +23 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +37 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +37 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.9.13.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +207 -137
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -0,0 +1,61 @@
|
|
1
|
+
From 3ea8d08da310b645e37940eaae5cc28e251b155b Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Sat, 17 Jul 2021 14:36:53 -0400
|
4
|
+
Subject: [PATCH] htmlParseComment: handle abruptly-closed comments
|
5
|
+
|
6
|
+
See guidance provided on abrutply-closed comments here:
|
7
|
+
|
8
|
+
https://html.spec.whatwg.org/multipage/parsing.html#parse-error-abrupt-closing-of-empty-comment
|
9
|
+
---
|
10
|
+
HTMLparser.c | 11 +++++++++++
|
11
|
+
include/libxml/xmlerror.h | 1 +
|
12
|
+
2 files changed, 12 insertions(+)
|
13
|
+
|
14
|
+
diff --git a/HTMLparser.c b/HTMLparser.c
|
15
|
+
index b56363a..f0bf294 100644
|
16
|
+
--- a/HTMLparser.c
|
17
|
+
+++ b/HTMLparser.c
|
18
|
+
@@ -3485,10 +3485,20 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
19
|
+
q = CUR_CHAR(ql);
|
20
|
+
if (q == 0)
|
21
|
+
goto unfinished;
|
22
|
+
+ if (q == '>') {
|
23
|
+
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
24
|
+
+ cur = '>';
|
25
|
+
+ goto finished;
|
26
|
+
+ }
|
27
|
+
NEXTL(ql);
|
28
|
+
r = CUR_CHAR(rl);
|
29
|
+
if (r == 0)
|
30
|
+
goto unfinished;
|
31
|
+
+ if (q == '-' && r == '>') {
|
32
|
+
+ htmlParseErr(ctxt, XML_ERR_COMMENT_ABRUPTLY_ENDED, "Comment abruptly ended", NULL, NULL);
|
33
|
+
+ cur = '>';
|
34
|
+
+ goto finished;
|
35
|
+
+ }
|
36
|
+
NEXTL(rl);
|
37
|
+
cur = CUR_CHAR(l);
|
38
|
+
while ((cur != 0) &&
|
39
|
+
@@ -3536,6 +3546,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
40
|
+
cur = next;
|
41
|
+
l = nl;
|
42
|
+
}
|
43
|
+
+finished:
|
44
|
+
buf[len] = 0;
|
45
|
+
if (cur == '>') {
|
46
|
+
NEXT;
|
47
|
+
diff --git a/include/libxml/xmlerror.h b/include/libxml/xmlerror.h
|
48
|
+
index c101997..7b68e40 100644
|
49
|
+
--- a/include/libxml/xmlerror.h
|
50
|
+
+++ b/include/libxml/xmlerror.h
|
51
|
+
@@ -209,6 +209,7 @@ typedef enum {
|
52
|
+
XML_ERR_VERSION_MISMATCH, /* 109 */
|
53
|
+
XML_ERR_NAME_TOO_LONG, /* 110 */
|
54
|
+
XML_ERR_USER_STOP, /* 111 */
|
55
|
+
+ XML_ERR_COMMENT_ABRUPTLY_ENDED, /* 112 */
|
56
|
+
XML_NS_ERR_XML_NAMESPACE = 200,
|
57
|
+
XML_NS_ERR_UNDEFINED_NAMESPACE, /* 201 */
|
58
|
+
XML_NS_ERR_QNAME, /* 202 */
|
59
|
+
--
|
60
|
+
2.31.0
|
61
|
+
|
@@ -0,0 +1,77 @@
|
|
1
|
+
From 74c95ec5932c737d4fcb06b8646b0017364ada14 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
+
Date: Fri, 24 Dec 2021 19:08:01 -0500
|
4
|
+
Subject: [PATCH] attempt to hack in wildcard namespaces to xpath
|
5
|
+
|
6
|
+
I'm not confident this is a bulletproof patch.
|
7
|
+
---
|
8
|
+
xpath.c | 24 ++++++++++++++++++------
|
9
|
+
1 file changed, 18 insertions(+), 6 deletions(-)
|
10
|
+
|
11
|
+
diff --git a/xpath.c b/xpath.c
|
12
|
+
index 1aa2f1a..c7f0885 100644
|
13
|
+
--- a/xpath.c
|
14
|
+
+++ b/xpath.c
|
15
|
+
@@ -146,6 +146,9 @@
|
16
|
+
#define XPATH_MAX_RECURSION_DEPTH 5000
|
17
|
+
#endif
|
18
|
+
|
19
|
+
+#define WILDCARD_PREFIX "*"
|
20
|
+
+#define IS_WILDCARD_PREFIX(p) xmlStrEqual((xmlChar*)WILDCARD_PREFIX, p)
|
21
|
+
+
|
22
|
+
/*
|
23
|
+
* TODO:
|
24
|
+
* There are a few spots where some tests are done which depend upon ascii
|
25
|
+
@@ -11073,12 +11076,15 @@ xmlXPathCompNodeTest(xmlXPathParserContextPtr ctxt, xmlXPathTestVal *test,
|
26
|
+
SKIP_BLANKS;
|
27
|
+
|
28
|
+
if ((name == NULL) && (CUR == '*')) {
|
29
|
+
- /*
|
30
|
+
- * All elements
|
31
|
+
- */
|
32
|
+
NEXT;
|
33
|
+
- *test = NODE_TEST_ALL;
|
34
|
+
- return(NULL);
|
35
|
+
+ if (CUR != ':') {
|
36
|
+
+ /*
|
37
|
+
+ * All elements
|
38
|
+
+ */
|
39
|
+
+ *test = NODE_TEST_ALL;
|
40
|
+
+ return(NULL);
|
41
|
+
+ }
|
42
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
43
|
+
}
|
44
|
+
|
45
|
+
if (name == NULL)
|
46
|
+
@@ -11327,6 +11333,10 @@ xmlXPathCompStep(xmlXPathParserContextPtr ctxt) {
|
47
|
+
}
|
48
|
+
#endif
|
49
|
+
if (CUR == '*') {
|
50
|
+
+ if (NXT(1) == ':') {
|
51
|
+
+ NEXT;
|
52
|
+
+ name = xmlCharStrdup(WILDCARD_PREFIX);
|
53
|
+
+ }
|
54
|
+
axis = AXIS_CHILD;
|
55
|
+
} else {
|
56
|
+
if (name == NULL)
|
57
|
+
@@ -12030,7 +12040,7 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
58
|
+
/*
|
59
|
+
* Setup namespaces.
|
60
|
+
*/
|
61
|
+
- if (prefix != NULL) {
|
62
|
+
+ if (prefix != NULL && !IS_WILDCARD_PREFIX(prefix)) {
|
63
|
+
URI = xmlXPathNsLookup(xpctxt, prefix);
|
64
|
+
if (URI == NULL) {
|
65
|
+
xmlXPathReleaseObject(xpctxt, obj);
|
66
|
+
@@ -12369,6 +12379,8 @@ xmlXPathNodeCollectAndTest(xmlXPathParserContextPtr ctxt,
|
67
|
+
{
|
68
|
+
XP_TEST_HIT
|
69
|
+
}
|
70
|
+
+ } else if (IS_WILDCARD_PREFIX(prefix)) {
|
71
|
+
+ XP_TEST_HIT
|
72
|
+
} else {
|
73
|
+
if ((cur->ns != NULL) &&
|
74
|
+
(xmlStrEqual(URI, cur->ns->href)))
|
75
|
+
--
|
76
|
+
2.31.0
|
77
|
+
|