nokogiri 1.11.0 → 1.11.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +12 -12
- data/LICENSE.md +1 -1
- data/README.md +21 -16
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +34 -474
- data/ext/nokogiri/extconf.rb +253 -183
- data/ext/nokogiri/html_document.c +10 -15
- data/ext/nokogiri/html_element_description.c +84 -71
- data/ext/nokogiri/html_entity_lookup.c +21 -16
- data/ext/nokogiri/html_sax_parser_context.c +67 -64
- data/ext/nokogiri/html_sax_push_parser.c +42 -34
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +190 -60
- data/ext/nokogiri/nokogiri.h +158 -75
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +246 -188
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +30 -27
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +17 -11
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +56 -49
- data/ext/nokogiri/xml_node.c +371 -320
- data/ext/nokogiri/xml_node_set.c +168 -156
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +191 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +117 -112
- data/ext/nokogiri/xml_sax_parser_context.c +101 -84
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +48 -42
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +134 -127
- data/ext/nokogiri/xslt_stylesheet.c +157 -157
- data/lib/nokogiri.rb +1 -22
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -15
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +32 -8
- data/lib/nokogiri/xml/document.rb +74 -28
- data/lib/nokogiri/xml/node.rb +39 -42
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/xpath.rb +1 -3
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +20 -53
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,73 +0,0 @@
|
|
1
|
-
From 4f51a6d2b1755ce5b36c524c215aad70d864ac1d Mon Sep 17 00:00:00 2001
|
2
|
-
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
-
Date: Mon, 3 Aug 2020 17:36:05 -0400
|
4
|
-
Subject: [PATCH 1/2] htmlParseComment: treat `--!>` as if it closed the
|
5
|
-
comment
|
6
|
-
|
7
|
-
See guidance provided on incorrectly-closed comments here:
|
8
|
-
|
9
|
-
https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
|
10
|
-
---
|
11
|
-
HTMLparser.c | 28 ++++++++++++++++++++--------
|
12
|
-
1 file changed, 20 insertions(+), 8 deletions(-)
|
13
|
-
|
14
|
-
diff --git a/HTMLparser.c b/HTMLparser.c
|
15
|
-
index 7b6d689..4d43479 100644
|
16
|
-
--- a/HTMLparser.c
|
17
|
-
+++ b/HTMLparser.c
|
18
|
-
@@ -3300,6 +3300,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
19
|
-
int q, ql;
|
20
|
-
int r, rl;
|
21
|
-
int cur, l;
|
22
|
-
+ int next, nl;
|
23
|
-
xmlParserInputState state;
|
24
|
-
|
25
|
-
/*
|
26
|
-
@@ -3332,6 +3333,21 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
27
|
-
while (IS_CHAR(cur) &&
|
28
|
-
((cur != '>') ||
|
29
|
-
(r != '-') || (q != '-'))) {
|
30
|
-
+ NEXTL(l);
|
31
|
-
+ next = CUR_CHAR(nl);
|
32
|
-
+ if (next == 0) {
|
33
|
-
+ SHRINK;
|
34
|
-
+ GROW;
|
35
|
-
+ next = CUR_CHAR(nl);
|
36
|
-
+ }
|
37
|
-
+
|
38
|
-
+ if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
|
39
|
-
+ htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
|
40
|
-
+ "Comment incorrectly closed by '--!>'", NULL, NULL);
|
41
|
-
+ cur = '>';
|
42
|
-
+ break;
|
43
|
-
+ }
|
44
|
-
+
|
45
|
-
if (len + 5 >= size) {
|
46
|
-
xmlChar *tmp;
|
47
|
-
|
48
|
-
@@ -3345,18 +3361,14 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
|
49
|
-
}
|
50
|
-
buf = tmp;
|
51
|
-
}
|
52
|
-
- COPY_BUF(ql,buf,len,q);
|
53
|
-
+ COPY_BUF(ql,buf,len,q);
|
54
|
-
+
|
55
|
-
q = r;
|
56
|
-
ql = rl;
|
57
|
-
r = cur;
|
58
|
-
rl = l;
|
59
|
-
- NEXTL(l);
|
60
|
-
- cur = CUR_CHAR(l);
|
61
|
-
- if (cur == 0) {
|
62
|
-
- SHRINK;
|
63
|
-
- GROW;
|
64
|
-
- cur = CUR_CHAR(l);
|
65
|
-
- }
|
66
|
-
+ cur = next;
|
67
|
-
+ l = nl;
|
68
|
-
}
|
69
|
-
buf[len] = 0;
|
70
|
-
if (IS_CHAR(cur)) {
|
71
|
-
--
|
72
|
-
2.25.1
|
73
|
-
|
@@ -1,103 +0,0 @@
|
|
1
|
-
From b20d746fa7cbb74716171bc49d836af99927e41e Mon Sep 17 00:00:00 2001
|
2
|
-
From: Mike Dalessio <mike.dalessio@gmail.com>
|
3
|
-
Date: Sun, 11 Oct 2020 14:15:37 -0400
|
4
|
-
Subject: [PATCH 2/2] use new htmlParseLookupCommentEnd to find comment ends
|
5
|
-
|
6
|
-
Note that the caret in error messages generated during comment parsing
|
7
|
-
may have moved by one byte.
|
8
|
-
|
9
|
-
See guidance provided on incorrectly-closed comments here:
|
10
|
-
|
11
|
-
https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
|
12
|
-
---
|
13
|
-
HTMLparser.c | 46 +++++++++++++++++++++++++++++++++++++---------
|
14
|
-
1 file changed, 37 insertions(+), 9 deletions(-)
|
15
|
-
|
16
|
-
diff --git a/HTMLparser.c b/HTMLparser.c
|
17
|
-
index 4d43479..000dc3d 100644
|
18
|
-
--- a/HTMLparser.c
|
19
|
-
+++ b/HTMLparser.c
|
20
|
-
@@ -5331,6 +5331,39 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
|
21
|
-
return (-1);
|
22
|
-
}
|
23
|
-
|
24
|
-
+/**
|
25
|
-
+ * htmlParseLookupCommentEnd:
|
26
|
-
+ * @ctxt: an HTML parser context
|
27
|
-
+ *
|
28
|
-
+ * Try to find a comment end tag in the input stream
|
29
|
-
+ * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
|
30
|
-
+ * (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
|
31
|
-
+ * This function has a side effect of (possibly) incrementing ctxt->checkIndex
|
32
|
-
+ * to avoid rescanning sequences of bytes, it DOES change the state of the
|
33
|
-
+ * parser, do not use liberally.
|
34
|
-
+ * This wraps to htmlParseLookupSequence()
|
35
|
-
+ *
|
36
|
-
+ * Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
|
37
|
-
+ */
|
38
|
-
+static int
|
39
|
-
+htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
|
40
|
-
+{
|
41
|
-
+ int mark = 0;
|
42
|
-
+ int cur = CUR_PTR - BASE_PTR;
|
43
|
-
+
|
44
|
-
+ while (mark >= 0) {
|
45
|
-
+ mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 1, 1);
|
46
|
-
+ if ((mark < 0) ||
|
47
|
-
+ (NXT(mark+2) == '>') ||
|
48
|
-
+ ((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
|
49
|
-
+ return mark;
|
50
|
-
+ }
|
51
|
-
+ ctxt->checkIndex = cur + mark + 1;
|
52
|
-
+ }
|
53
|
-
+ return mark;
|
54
|
-
+}
|
55
|
-
+
|
56
|
-
+
|
57
|
-
/**
|
58
|
-
* htmlParseTryOrFinish:
|
59
|
-
* @ctxt: an HTML parser context
|
60
|
-
@@ -5507,8 +5540,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
61
|
-
cur = in->cur[0];
|
62
|
-
if ((cur == '<') && (next == '!') &&
|
63
|
-
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
64
|
-
- if ((!terminate) &&
|
65
|
-
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
|
66
|
-
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
67
|
-
goto done;
|
68
|
-
#ifdef DEBUG_PUSH
|
69
|
-
xmlGenericError(xmlGenericErrorContext,
|
70
|
-
@@ -5567,8 +5599,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
71
|
-
next = in->cur[1];
|
72
|
-
if ((cur == '<') && (next == '!') &&
|
73
|
-
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
74
|
-
- if ((!terminate) &&
|
75
|
-
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
|
76
|
-
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
77
|
-
goto done;
|
78
|
-
#ifdef DEBUG_PUSH
|
79
|
-
xmlGenericError(xmlGenericErrorContext,
|
80
|
-
@@ -5614,8 +5645,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
81
|
-
next = in->cur[1];
|
82
|
-
if ((cur == '<') && (next == '!') &&
|
83
|
-
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
84
|
-
- if ((!terminate) &&
|
85
|
-
- (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
|
86
|
-
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
87
|
-
goto done;
|
88
|
-
#ifdef DEBUG_PUSH
|
89
|
-
xmlGenericError(xmlGenericErrorContext,
|
90
|
-
@@ -5871,9 +5901,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
|
91
|
-
htmlParseDocTypeDecl(ctxt);
|
92
|
-
} else if ((cur == '<') && (next == '!') &&
|
93
|
-
(in->cur[2] == '-') && (in->cur[3] == '-')) {
|
94
|
-
- if ((!terminate) &&
|
95
|
-
- (htmlParseLookupSequence(
|
96
|
-
- ctxt, '-', '-', '>', 1, 1) < 0))
|
97
|
-
+ if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
|
98
|
-
goto done;
|
99
|
-
#ifdef DEBUG_PUSH
|
100
|
-
xmlGenericError(xmlGenericErrorContext,
|
101
|
-
--
|
102
|
-
2.25.1
|
103
|
-
|
Binary file
|