nokogiri 1.11.3 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/LICENSE-DEPENDENCIES.md +243 -22
  4. data/LICENSE.md +1 -1
  5. data/README.md +14 -11
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -64
  8. data/ext/nokogiri/depend +35 -34
  9. data/ext/nokogiri/extconf.rb +237 -133
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
  12. data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
  13. data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
  14. data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
  15. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
  16. data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
  17. data/ext/nokogiri/nokogiri.c +70 -38
  18. data/ext/nokogiri/nokogiri.h +27 -9
  19. data/ext/nokogiri/xml_attr.c +2 -2
  20. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  21. data/ext/nokogiri/xml_cdata.c +1 -1
  22. data/ext/nokogiri/xml_document.c +50 -50
  23. data/ext/nokogiri/xml_document_fragment.c +0 -2
  24. data/ext/nokogiri/xml_dtd.c +10 -10
  25. data/ext/nokogiri/xml_element_content.c +2 -0
  26. data/ext/nokogiri/xml_element_decl.c +3 -3
  27. data/ext/nokogiri/xml_encoding_handler.c +31 -12
  28. data/ext/nokogiri/xml_entity_decl.c +5 -5
  29. data/ext/nokogiri/xml_namespace.c +4 -2
  30. data/ext/nokogiri/xml_node.c +833 -492
  31. data/ext/nokogiri/xml_node_set.c +24 -24
  32. data/ext/nokogiri/xml_reader.c +90 -11
  33. data/ext/nokogiri/xml_sax_parser.c +6 -6
  34. data/ext/nokogiri/xml_sax_parser_context.c +12 -3
  35. data/ext/nokogiri/xml_schema.c +5 -3
  36. data/ext/nokogiri/xml_text.c +1 -1
  37. data/ext/nokogiri/xml_xpath_context.c +110 -85
  38. data/ext/nokogiri/xslt_stylesheet.c +109 -10
  39. data/gumbo-parser/CHANGES.md +63 -0
  40. data/gumbo-parser/Makefile +101 -0
  41. data/gumbo-parser/THANKS +27 -0
  42. data/gumbo-parser/src/Makefile +34 -0
  43. data/gumbo-parser/src/README.md +41 -0
  44. data/gumbo-parser/src/ascii.c +75 -0
  45. data/gumbo-parser/src/ascii.h +115 -0
  46. data/gumbo-parser/src/attribute.c +42 -0
  47. data/gumbo-parser/src/attribute.h +17 -0
  48. data/gumbo-parser/src/char_ref.c +22225 -0
  49. data/gumbo-parser/src/char_ref.h +29 -0
  50. data/gumbo-parser/src/char_ref.rl +2154 -0
  51. data/gumbo-parser/src/error.c +626 -0
  52. data/gumbo-parser/src/error.h +148 -0
  53. data/gumbo-parser/src/foreign_attrs.c +104 -0
  54. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  55. data/gumbo-parser/src/gumbo.h +943 -0
  56. data/gumbo-parser/src/insertion_mode.h +33 -0
  57. data/gumbo-parser/src/macros.h +91 -0
  58. data/gumbo-parser/src/parser.c +4875 -0
  59. data/gumbo-parser/src/parser.h +41 -0
  60. data/gumbo-parser/src/replacement.h +33 -0
  61. data/gumbo-parser/src/string_buffer.c +103 -0
  62. data/gumbo-parser/src/string_buffer.h +68 -0
  63. data/gumbo-parser/src/string_piece.c +48 -0
  64. data/gumbo-parser/src/svg_attrs.c +174 -0
  65. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  66. data/gumbo-parser/src/svg_tags.c +137 -0
  67. data/gumbo-parser/src/svg_tags.gperf +55 -0
  68. data/gumbo-parser/src/tag.c +222 -0
  69. data/gumbo-parser/src/tag_lookup.c +382 -0
  70. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  71. data/gumbo-parser/src/tag_lookup.h +13 -0
  72. data/gumbo-parser/src/token_buffer.c +79 -0
  73. data/gumbo-parser/src/token_buffer.h +71 -0
  74. data/gumbo-parser/src/token_type.h +17 -0
  75. data/gumbo-parser/src/tokenizer.c +3463 -0
  76. data/gumbo-parser/src/tokenizer.h +112 -0
  77. data/gumbo-parser/src/tokenizer_states.h +339 -0
  78. data/gumbo-parser/src/utf8.c +245 -0
  79. data/gumbo-parser/src/utf8.h +164 -0
  80. data/gumbo-parser/src/util.c +68 -0
  81. data/gumbo-parser/src/util.h +30 -0
  82. data/gumbo-parser/src/vector.c +111 -0
  83. data/gumbo-parser/src/vector.h +45 -0
  84. data/lib/nokogiri/class_resolver.rb +67 -0
  85. data/lib/nokogiri/css/node.rb +9 -8
  86. data/lib/nokogiri/css/parser.rb +361 -342
  87. data/lib/nokogiri/css/parser.y +250 -245
  88. data/lib/nokogiri/css/parser_extras.rb +22 -20
  89. data/lib/nokogiri/css/syntax_error.rb +2 -1
  90. data/lib/nokogiri/css/tokenizer.rb +4 -3
  91. data/lib/nokogiri/css/tokenizer.rex +3 -2
  92. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  93. data/lib/nokogiri/css.rb +49 -17
  94. data/lib/nokogiri/decorators/slop.rb +8 -7
  95. data/lib/nokogiri/extension.rb +8 -3
  96. data/lib/nokogiri/gumbo.rb +15 -0
  97. data/lib/nokogiri/html.rb +37 -27
  98. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +92 -81
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
  101. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  102. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
  105. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
  107. data/lib/nokogiri/html4.rb +46 -0
  108. data/lib/nokogiri/html5/document.rb +91 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  110. data/lib/nokogiri/html5/node.rb +100 -0
  111. data/lib/nokogiri/html5.rb +478 -0
  112. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  113. data/lib/nokogiri/syntax_error.rb +1 -0
  114. data/lib/nokogiri/version/constant.rb +2 -1
  115. data/lib/nokogiri/version/info.rb +31 -14
  116. data/lib/nokogiri/version.rb +1 -0
  117. data/lib/nokogiri/xml/attr.rb +5 -3
  118. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  119. data/lib/nokogiri/xml/builder.rb +71 -31
  120. data/lib/nokogiri/xml/cdata.rb +2 -1
  121. data/lib/nokogiri/xml/character_data.rb +1 -0
  122. data/lib/nokogiri/xml/document.rb +183 -96
  123. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  124. data/lib/nokogiri/xml/dtd.rb +3 -2
  125. data/lib/nokogiri/xml/element_content.rb +1 -0
  126. data/lib/nokogiri/xml/element_decl.rb +2 -1
  127. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  128. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  129. data/lib/nokogiri/xml/namespace.rb +2 -0
  130. data/lib/nokogiri/xml/node/save_options.rb +9 -5
  131. data/lib/nokogiri/xml/node.rb +525 -354
  132. data/lib/nokogiri/xml/node_set.rb +50 -54
  133. data/lib/nokogiri/xml/notation.rb +12 -0
  134. data/lib/nokogiri/xml/parse_options.rb +13 -6
  135. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  136. data/lib/nokogiri/xml/pp/node.rb +24 -26
  137. data/lib/nokogiri/xml/pp.rb +3 -2
  138. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  139. data/lib/nokogiri/xml/reader.rb +20 -24
  140. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  141. data/lib/nokogiri/xml/sax/document.rb +44 -49
  142. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  143. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  144. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  145. data/lib/nokogiri/xml/sax.rb +5 -4
  146. data/lib/nokogiri/xml/schema.rb +7 -6
  147. data/lib/nokogiri/xml/searchable.rb +93 -62
  148. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  149. data/lib/nokogiri/xml/text.rb +1 -0
  150. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  151. data/lib/nokogiri/xml/xpath.rb +13 -1
  152. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  153. data/lib/nokogiri/xml.rb +37 -37
  154. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  155. data/lib/nokogiri/xslt.rb +28 -20
  156. data/lib/nokogiri.rb +48 -43
  157. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  158. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  159. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  160. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  161. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
  162. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  163. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
  164. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  165. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  166. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  167. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  168. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  169. metadata +204 -93
  170. data/lib/nokogiri/html/element_description_defaults.rb +0 -672
  171. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  172. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  173. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  174. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  175. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  176. data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
  177. data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
  178. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  179. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,78 +0,0 @@
1
- From c5538465c08a8ea248a370bf55bc39cd3385e4af Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Thu, 29 Mar 2018 14:09:00 -0400
4
- Subject: [PATCH] Revert "Do not URI escape in server side includes"
5
-
6
- This reverts commit 960f0e275616cadc29671a218d7fb9b69eb35588.
7
- ---
8
- HTMLtree.c | 49 +++++++++++--------------------------------------
9
- 1 file changed, 11 insertions(+), 38 deletions(-)
10
-
11
- diff --git a/HTMLtree.c b/HTMLtree.c
12
- index 2fd0c9c..67160c5 100644
13
- --- a/HTMLtree.c
14
- +++ b/HTMLtree.c
15
- @@ -717,49 +717,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
16
- (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
17
- ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
18
- (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
19
- + xmlChar *escaped;
20
- xmlChar *tmp = value;
21
- - /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
22
- - xmlBufCCat(buf->buffer, "\"");
23
-
24
- while (IS_BLANK_CH(*tmp)) tmp++;
25
-
26
- - /* URI Escape everything, except server side includes. */
27
- - for ( ; ; ) {
28
- - xmlChar *escaped;
29
- - xmlChar endChar;
30
- - xmlChar *end = NULL;
31
- - xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
32
- - if (start != NULL) {
33
- - end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
34
- - if (end != NULL) {
35
- - *start = '\0';
36
- - }
37
- - }
38
- -
39
- - /* Escape the whole string, or until start (set to '\0'). */
40
- - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
41
- - if (escaped != NULL) {
42
- - xmlBufCat(buf->buffer, escaped);
43
- - xmlFree(escaped);
44
- - } else {
45
- - xmlBufCat(buf->buffer, tmp);
46
- - }
47
- -
48
- - if (end == NULL) { /* Everything has been written. */
49
- - break;
50
- - }
51
- -
52
- - /* Do not escape anything within server side includes. */
53
- - *start = '<'; /* Restore the first character of "<!--". */
54
- - end += 3; /* strlen("-->") */
55
- - endChar = *end;
56
- - *end = '\0';
57
- - xmlBufCat(buf->buffer, start);
58
- - *end = endChar;
59
- - tmp = end;
60
- + /*
61
- + * the < and > have already been escaped at the entity level
62
- + * And doing so here breaks server side includes
63
- + */
64
- + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
65
- + if (escaped != NULL) {
66
- + xmlBufWriteQuotedString(buf->buffer, escaped);
67
- + xmlFree(escaped);
68
- + } else {
69
- + xmlBufWriteQuotedString(buf->buffer, value);
70
- }
71
- -
72
- - xmlBufCCat(buf->buffer, "\"");
73
- } else {
74
- xmlBufWriteQuotedString(buf->buffer, value);
75
- }
76
- --
77
- 2.9.5
78
-
@@ -1,32 +0,0 @@
1
- From 0e1a49c8907645d2e155f0d89d4d9895ac5112b5 Mon Sep 17 00:00:00 2001
2
- From: Zhipeng Xie <xiezhipeng1@huawei.com>
3
- Date: Thu, 12 Dec 2019 17:30:55 +0800
4
- Subject: [PATCH] Fix infinite loop in xmlStringLenDecodeEntities
5
-
6
- When ctxt->instate == XML_PARSER_EOF,xmlParseStringEntityRef
7
- return NULL which cause a infinite loop in xmlStringLenDecodeEntities
8
-
9
- Found with libFuzzer.
10
-
11
- Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
12
- ---
13
- parser.c | 3 ++-
14
- 1 file changed, 2 insertions(+), 1 deletion(-)
15
-
16
- diff --git a/parser.c b/parser.c
17
- index d1c3196..a34bb6c 100644
18
- --- a/parser.c
19
- +++ b/parser.c
20
- @@ -2646,7 +2646,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
21
- else
22
- c = 0;
23
- while ((c != 0) && (c != end) && /* non input consuming loop */
24
- - (c != end2) && (c != end3)) {
25
- + (c != end2) && (c != end3) &&
26
- + (ctxt->instate != XML_PARSER_EOF)) {
27
-
28
- if (c == 0) break;
29
- if ((c == '&') && (str[1] == '#')) {
30
- --
31
- 2.17.1
32
-
@@ -1,73 +0,0 @@
1
- From 4f51a6d2b1755ce5b36c524c215aad70d864ac1d Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Mon, 3 Aug 2020 17:36:05 -0400
4
- Subject: [PATCH 1/2] htmlParseComment: treat `--!>` as if it closed the
5
- comment
6
-
7
- See guidance provided on incorrectly-closed comments here:
8
-
9
- https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
10
- ---
11
- HTMLparser.c | 28 ++++++++++++++++++++--------
12
- 1 file changed, 20 insertions(+), 8 deletions(-)
13
-
14
- diff --git a/HTMLparser.c b/HTMLparser.c
15
- index 7b6d689..4d43479 100644
16
- --- a/HTMLparser.c
17
- +++ b/HTMLparser.c
18
- @@ -3300,6 +3300,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
19
- int q, ql;
20
- int r, rl;
21
- int cur, l;
22
- + int next, nl;
23
- xmlParserInputState state;
24
-
25
- /*
26
- @@ -3332,6 +3333,21 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
27
- while (IS_CHAR(cur) &&
28
- ((cur != '>') ||
29
- (r != '-') || (q != '-'))) {
30
- + NEXTL(l);
31
- + next = CUR_CHAR(nl);
32
- + if (next == 0) {
33
- + SHRINK;
34
- + GROW;
35
- + next = CUR_CHAR(nl);
36
- + }
37
- +
38
- + if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
39
- + htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
40
- + "Comment incorrectly closed by '--!>'", NULL, NULL);
41
- + cur = '>';
42
- + break;
43
- + }
44
- +
45
- if (len + 5 >= size) {
46
- xmlChar *tmp;
47
-
48
- @@ -3345,18 +3361,14 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
49
- }
50
- buf = tmp;
51
- }
52
- - COPY_BUF(ql,buf,len,q);
53
- + COPY_BUF(ql,buf,len,q);
54
- +
55
- q = r;
56
- ql = rl;
57
- r = cur;
58
- rl = l;
59
- - NEXTL(l);
60
- - cur = CUR_CHAR(l);
61
- - if (cur == 0) {
62
- - SHRINK;
63
- - GROW;
64
- - cur = CUR_CHAR(l);
65
- - }
66
- + cur = next;
67
- + l = nl;
68
- }
69
- buf[len] = 0;
70
- if (IS_CHAR(cur)) {
71
- --
72
- 2.25.1
73
-
@@ -1,103 +0,0 @@
1
- From b20d746fa7cbb74716171bc49d836af99927e41e Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Sun, 11 Oct 2020 14:15:37 -0400
4
- Subject: [PATCH 2/2] use new htmlParseLookupCommentEnd to find comment ends
5
-
6
- Note that the caret in error messages generated during comment parsing
7
- may have moved by one byte.
8
-
9
- See guidance provided on incorrectly-closed comments here:
10
-
11
- https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
12
- ---
13
- HTMLparser.c | 46 +++++++++++++++++++++++++++++++++++++---------
14
- 1 file changed, 37 insertions(+), 9 deletions(-)
15
-
16
- diff --git a/HTMLparser.c b/HTMLparser.c
17
- index 4d43479..000dc3d 100644
18
- --- a/HTMLparser.c
19
- +++ b/HTMLparser.c
20
- @@ -5331,6 +5331,39 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
21
- return (-1);
22
- }
23
-
24
- +/**
25
- + * htmlParseLookupCommentEnd:
26
- + * @ctxt: an HTML parser context
27
- + *
28
- + * Try to find a comment end tag in the input stream
29
- + * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
30
- + * (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
31
- + * This function has a side effect of (possibly) incrementing ctxt->checkIndex
32
- + * to avoid rescanning sequences of bytes, it DOES change the state of the
33
- + * parser, do not use liberally.
34
- + * This wraps to htmlParseLookupSequence()
35
- + *
36
- + * Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
37
- + */
38
- +static int
39
- +htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
40
- +{
41
- + int mark = 0;
42
- + int cur = CUR_PTR - BASE_PTR;
43
- +
44
- + while (mark >= 0) {
45
- + mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 1, 1);
46
- + if ((mark < 0) ||
47
- + (NXT(mark+2) == '>') ||
48
- + ((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
49
- + return mark;
50
- + }
51
- + ctxt->checkIndex = cur + mark + 1;
52
- + }
53
- + return mark;
54
- +}
55
- +
56
- +
57
- /**
58
- * htmlParseTryOrFinish:
59
- * @ctxt: an HTML parser context
60
- @@ -5507,8 +5540,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
61
- cur = in->cur[0];
62
- if ((cur == '<') && (next == '!') &&
63
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
64
- - if ((!terminate) &&
65
- - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
66
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
67
- goto done;
68
- #ifdef DEBUG_PUSH
69
- xmlGenericError(xmlGenericErrorContext,
70
- @@ -5567,8 +5599,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
71
- next = in->cur[1];
72
- if ((cur == '<') && (next == '!') &&
73
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
74
- - if ((!terminate) &&
75
- - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
76
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
77
- goto done;
78
- #ifdef DEBUG_PUSH
79
- xmlGenericError(xmlGenericErrorContext,
80
- @@ -5614,8 +5645,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
81
- next = in->cur[1];
82
- if ((cur == '<') && (next == '!') &&
83
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
84
- - if ((!terminate) &&
85
- - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
86
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
87
- goto done;
88
- #ifdef DEBUG_PUSH
89
- xmlGenericError(xmlGenericErrorContext,
90
- @@ -5871,9 +5901,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
91
- htmlParseDocTypeDecl(ctxt);
92
- } else if ((cur == '<') && (next == '!') &&
93
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
94
- - if ((!terminate) &&
95
- - (htmlParseLookupSequence(
96
- - ctxt, '-', '-', '>', 1, 1) < 0))
97
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
98
- goto done;
99
- #ifdef DEBUG_PUSH
100
- xmlGenericError(xmlGenericErrorContext,
101
- --
102
- 2.25.1
103
-
@@ -1,70 +0,0 @@
1
- From ca565c1edef9a455453fa8564270cc9c5813e1b9 Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Sun, 31 Jan 2021 09:53:56 -0500
4
- Subject: [PATCH] parser.c: shrink the input buffer when appropriate
5
-
6
- Fixes GNOME/libxml2#200
7
-
8
- Also see discussions at:
9
- - GNOME/libxml2#192
10
- - https://gitlab.gnome.org/nwellnhof/libxml2/-/commit/99bda1e
11
- - https://github.com/sparklemotion/nokogiri/issues/2132
12
- ---
13
- parser.c | 6 ++++++
14
- 1 file changed, 6 insertions(+)
15
-
16
- diff --git a/parser.c b/parser.c
17
- index a7bdc7f..efde672 100644
18
- --- a/parser.c
19
- +++ b/parser.c
20
- @@ -4204,6 +4204,7 @@ xmlParseSystemLiteral(xmlParserCtxtPtr ctxt) {
21
- }
22
- count++;
23
- if (count > 50) {
24
- + SHRINK;
25
- GROW;
26
- count = 0;
27
- if (ctxt->instate == XML_PARSER_EOF) {
28
- @@ -4291,6 +4292,7 @@ xmlParsePubidLiteral(xmlParserCtxtPtr ctxt) {
29
- buf[len++] = cur;
30
- count++;
31
- if (count > 50) {
32
- + SHRINK;
33
- GROW;
34
- count = 0;
35
- if (ctxt->instate == XML_PARSER_EOF) {
36
- @@ -4571,6 +4573,7 @@ xmlParseCharDataComplex(xmlParserCtxtPtr ctxt, int cdata) {
37
- }
38
- count++;
39
- if (count > 50) {
40
- + SHRINK;
41
- GROW;
42
- count = 0;
43
- if (ctxt->instate == XML_PARSER_EOF)
44
- @@ -4776,6 +4779,7 @@ xmlParseCommentComplex(xmlParserCtxtPtr ctxt, xmlChar *buf,
45
-
46
- count++;
47
- if (count > 50) {
48
- + SHRINK;
49
- GROW;
50
- count = 0;
51
- if (ctxt->instate == XML_PARSER_EOF) {
52
- @@ -5186,6 +5190,7 @@ xmlParsePI(xmlParserCtxtPtr ctxt) {
53
- }
54
- count++;
55
- if (count > 50) {
56
- + SHRINK;
57
- GROW;
58
- if (ctxt->instate == XML_PARSER_EOF) {
59
- xmlFree(buf);
60
- @@ -9783,6 +9788,7 @@ xmlParseCDSect(xmlParserCtxtPtr ctxt) {
61
- sl = l;
62
- count++;
63
- if (count > 50) {
64
- + SHRINK;
65
- GROW;
66
- if (ctxt->instate == XML_PARSER_EOF) {
67
- xmlFree(buf);
68
- --
69
- 2.25.1
70
-