nokogiri 1.11.1 → 1.12.0.rc1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (179) hide show
  1. checksums.yaml +4 -4
  2. data/LICENSE-DEPENDENCIES.md +232 -11
  3. data/LICENSE.md +1 -1
  4. data/README.md +27 -21
  5. data/dependencies.yml +12 -12
  6. data/ext/nokogiri/depend +35 -474
  7. data/ext/nokogiri/extconf.rb +391 -243
  8. data/ext/nokogiri/gumbo.c +611 -0
  9. data/ext/nokogiri/{html_document.c → html4_document.c} +18 -23
  10. data/ext/nokogiri/html4_element_description.c +294 -0
  11. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  12. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  13. data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +29 -27
  14. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  15. data/ext/nokogiri/nokogiri.c +206 -66
  16. data/ext/nokogiri/nokogiri.h +166 -76
  17. data/ext/nokogiri/test_global_handlers.c +3 -4
  18. data/ext/nokogiri/xml_attr.c +15 -15
  19. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  20. data/ext/nokogiri/xml_cdata.c +13 -18
  21. data/ext/nokogiri/xml_comment.c +19 -26
  22. data/ext/nokogiri/xml_document.c +258 -200
  23. data/ext/nokogiri/xml_document_fragment.c +13 -15
  24. data/ext/nokogiri/xml_dtd.c +54 -48
  25. data/ext/nokogiri/xml_element_content.c +31 -26
  26. data/ext/nokogiri/xml_element_decl.c +22 -22
  27. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  28. data/ext/nokogiri/xml_entity_decl.c +32 -30
  29. data/ext/nokogiri/xml_entity_reference.c +16 -18
  30. data/ext/nokogiri/xml_namespace.c +58 -49
  31. data/ext/nokogiri/xml_node.c +473 -414
  32. data/ext/nokogiri/xml_node_set.c +174 -162
  33. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  34. data/ext/nokogiri/xml_reader.c +193 -157
  35. data/ext/nokogiri/xml_relax_ng.c +29 -23
  36. data/ext/nokogiri/xml_sax_parser.c +111 -106
  37. data/ext/nokogiri/xml_sax_parser_context.c +102 -85
  38. data/ext/nokogiri/xml_sax_push_parser.c +34 -27
  39. data/ext/nokogiri/xml_schema.c +49 -41
  40. data/ext/nokogiri/xml_syntax_error.c +21 -23
  41. data/ext/nokogiri/xml_text.c +13 -17
  42. data/ext/nokogiri/xml_xpath_context.c +86 -77
  43. data/ext/nokogiri/xslt_stylesheet.c +157 -156
  44. data/gumbo-parser/CHANGES.md +63 -0
  45. data/gumbo-parser/Makefile +101 -0
  46. data/gumbo-parser/THANKS +27 -0
  47. data/gumbo-parser/src/Makefile +17 -0
  48. data/gumbo-parser/src/README.md +41 -0
  49. data/gumbo-parser/src/ascii.c +75 -0
  50. data/gumbo-parser/src/ascii.h +115 -0
  51. data/gumbo-parser/src/attribute.c +42 -0
  52. data/gumbo-parser/src/attribute.h +17 -0
  53. data/gumbo-parser/src/char_ref.c +22225 -0
  54. data/gumbo-parser/src/char_ref.h +29 -0
  55. data/gumbo-parser/src/char_ref.rl +2154 -0
  56. data/gumbo-parser/src/error.c +626 -0
  57. data/gumbo-parser/src/error.h +148 -0
  58. data/gumbo-parser/src/foreign_attrs.c +104 -0
  59. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  60. data/gumbo-parser/src/gumbo.h +943 -0
  61. data/gumbo-parser/src/insertion_mode.h +33 -0
  62. data/gumbo-parser/src/macros.h +91 -0
  63. data/gumbo-parser/src/parser.c +4886 -0
  64. data/gumbo-parser/src/parser.h +41 -0
  65. data/gumbo-parser/src/replacement.h +33 -0
  66. data/gumbo-parser/src/string_buffer.c +103 -0
  67. data/gumbo-parser/src/string_buffer.h +68 -0
  68. data/gumbo-parser/src/string_piece.c +48 -0
  69. data/gumbo-parser/src/svg_attrs.c +174 -0
  70. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  71. data/gumbo-parser/src/svg_tags.c +137 -0
  72. data/gumbo-parser/src/svg_tags.gperf +55 -0
  73. data/gumbo-parser/src/tag.c +222 -0
  74. data/gumbo-parser/src/tag_lookup.c +382 -0
  75. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  76. data/gumbo-parser/src/tag_lookup.h +13 -0
  77. data/gumbo-parser/src/token_buffer.c +79 -0
  78. data/gumbo-parser/src/token_buffer.h +71 -0
  79. data/gumbo-parser/src/token_type.h +17 -0
  80. data/gumbo-parser/src/tokenizer.c +3463 -0
  81. data/gumbo-parser/src/tokenizer.h +112 -0
  82. data/gumbo-parser/src/tokenizer_states.h +339 -0
  83. data/gumbo-parser/src/utf8.c +245 -0
  84. data/gumbo-parser/src/utf8.h +164 -0
  85. data/gumbo-parser/src/util.c +68 -0
  86. data/gumbo-parser/src/util.h +30 -0
  87. data/gumbo-parser/src/vector.c +111 -0
  88. data/gumbo-parser/src/vector.h +45 -0
  89. data/lib/nokogiri.rb +31 -50
  90. data/lib/nokogiri/css.rb +14 -14
  91. data/lib/nokogiri/css/parser.rb +2 -2
  92. data/lib/nokogiri/css/parser.y +1 -1
  93. data/lib/nokogiri/css/syntax_error.rb +1 -1
  94. data/lib/nokogiri/extension.rb +26 -0
  95. data/lib/nokogiri/gumbo.rb +14 -0
  96. data/lib/nokogiri/html.rb +31 -27
  97. data/lib/nokogiri/html4.rb +40 -0
  98. data/lib/nokogiri/{html → html4}/builder.rb +2 -2
  99. data/lib/nokogiri/{html → html4}/document.rb +4 -4
  100. data/lib/nokogiri/{html → html4}/document_fragment.rb +17 -17
  101. data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
  102. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
  103. data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
  104. data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
  105. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  106. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
  107. data/lib/nokogiri/html5.rb +473 -0
  108. data/lib/nokogiri/html5/document.rb +74 -0
  109. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  110. data/lib/nokogiri/html5/node.rb +93 -0
  111. data/lib/nokogiri/version/constant.rb +1 -1
  112. data/lib/nokogiri/version/info.rb +42 -9
  113. data/lib/nokogiri/xml.rb +35 -36
  114. data/lib/nokogiri/xml/document.rb +74 -28
  115. data/lib/nokogiri/xml/node.rb +45 -47
  116. data/lib/nokogiri/xml/parse_options.rb +2 -0
  117. data/lib/nokogiri/xml/pp.rb +2 -2
  118. data/lib/nokogiri/xml/reader.rb +2 -9
  119. data/lib/nokogiri/xml/sax.rb +4 -4
  120. data/lib/nokogiri/xml/sax/document.rb +24 -30
  121. data/lib/nokogiri/xml/xpath.rb +3 -5
  122. data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
  123. data/lib/nokogiri/xslt.rb +16 -16
  124. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  125. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  126. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  127. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  128. data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
  129. data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
  130. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  131. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  132. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  133. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  134. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  135. metadata +117 -109
  136. data/ext/nokogiri/html_document.h +0 -10
  137. data/ext/nokogiri/html_element_description.c +0 -279
  138. data/ext/nokogiri/html_element_description.h +0 -10
  139. data/ext/nokogiri/html_entity_lookup.c +0 -32
  140. data/ext/nokogiri/html_entity_lookup.h +0 -8
  141. data/ext/nokogiri/html_sax_parser_context.c +0 -118
  142. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  143. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  144. data/ext/nokogiri/xml_attr.h +0 -9
  145. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  146. data/ext/nokogiri/xml_cdata.h +0 -9
  147. data/ext/nokogiri/xml_comment.h +0 -9
  148. data/ext/nokogiri/xml_document.h +0 -23
  149. data/ext/nokogiri/xml_document_fragment.h +0 -10
  150. data/ext/nokogiri/xml_dtd.h +0 -10
  151. data/ext/nokogiri/xml_element_content.h +0 -10
  152. data/ext/nokogiri/xml_element_decl.h +0 -9
  153. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  154. data/ext/nokogiri/xml_entity_decl.h +0 -10
  155. data/ext/nokogiri/xml_entity_reference.h +0 -9
  156. data/ext/nokogiri/xml_io.c +0 -63
  157. data/ext/nokogiri/xml_io.h +0 -11
  158. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  159. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  160. data/ext/nokogiri/xml_namespace.h +0 -14
  161. data/ext/nokogiri/xml_node.h +0 -13
  162. data/ext/nokogiri/xml_node_set.h +0 -12
  163. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  164. data/ext/nokogiri/xml_reader.h +0 -10
  165. data/ext/nokogiri/xml_relax_ng.h +0 -9
  166. data/ext/nokogiri/xml_sax_parser.h +0 -39
  167. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  168. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  169. data/ext/nokogiri/xml_schema.h +0 -9
  170. data/ext/nokogiri/xml_syntax_error.h +0 -25
  171. data/ext/nokogiri/xml_text.h +0 -9
  172. data/ext/nokogiri/xml_xpath_context.h +0 -10
  173. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  174. data/lib/nokogiri/html/sax/parser_context.rb +0 -17
  175. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  176. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  177. data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
  178. data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
  179. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,12 +0,0 @@
1
- #ifndef HAVE_XMLFIRSTELEMENTCHILD
2
-
3
- #ifndef XML_LIBXML2_HACKS
4
- #define XML_LIBXML2_HACKS
5
-
6
- xmlNodePtr xmlFirstElementChild(xmlNodePtr parent);
7
- xmlNodePtr xmlNextElementSibling(xmlNodePtr node);
8
- xmlNodePtr xmlLastElementChild(xmlNodePtr parent);
9
-
10
- #endif
11
-
12
- #endif
@@ -1,14 +0,0 @@
1
- #ifndef NOKOGIRI_XML_NAMESPACE
2
- #define NOKOGIRI_XML_NAMESPACE
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_namespace();
7
-
8
- extern VALUE cNokogiriXmlNamespace ;
9
-
10
- VALUE Nokogiri_wrap_xml_namespace(xmlDocPtr doc, xmlNsPtr node);
11
-
12
- #define NOKOGIRI_NAMESPACE_EH(node) ((node)->type == XML_NAMESPACE_DECL)
13
-
14
- #endif
@@ -1,13 +0,0 @@
1
- #ifndef NOKOGIRI_XML_NODE
2
- #define NOKOGIRI_XML_NODE
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_node();
7
-
8
- extern VALUE cNokogiriXmlNode ;
9
- extern VALUE cNokogiriXmlElement ;
10
-
11
- VALUE Nokogiri_wrap_xml_node(VALUE klass, xmlNodePtr node) ;
12
- void Nokogiri_xml_node_properties(xmlNodePtr node, VALUE attr_hash) ;
13
- #endif
@@ -1,12 +0,0 @@
1
- #ifndef NOKOGIRI_XML_NODE_SET
2
- #define NOKOGIRI_XML_NODE_SET
3
-
4
- #include <nokogiri.h>
5
- void init_xml_node_set();
6
-
7
- extern VALUE cNokogiriXmlNodeSet ;
8
- VALUE Nokogiri_wrap_xml_node_set(xmlNodeSetPtr node_set, VALUE document) ;
9
- VALUE Nokogiri_wrap_xml_node_set_node(xmlNodePtr node, VALUE node_set) ;
10
- VALUE Nokogiri_wrap_xml_node_set_namespace(xmlNsPtr node, VALUE node_set) ;
11
-
12
- #endif
@@ -1,9 +0,0 @@
1
- #ifndef NOKOGIRI_XML_PROCESSING_INSTRUCTION
2
- #define NOKOGIRI_XML_PROCESSING_INSTRUCTION
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_processing_instruction();
7
-
8
- extern VALUE cNokogiriXmlProcessingInstruction;
9
- #endif
@@ -1,10 +0,0 @@
1
- #ifndef NOKOGIRI_XML_READER
2
- #define NOKOGIRI_XML_READER
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_reader();
7
-
8
- extern VALUE cNokogiriXmlReader;
9
-
10
- #endif
@@ -1,9 +0,0 @@
1
- #ifndef NOKOGIRI_XML_RELAX_NG
2
- #define NOKOGIRI_XML_RELAX_NG
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_relax_ng();
7
-
8
- extern VALUE cNokogiriXmlRelaxNG;
9
- #endif
@@ -1,39 +0,0 @@
1
- #ifndef NOKOGIRI_XML_SAX_PARSER
2
- #define NOKOGIRI_XML_SAX_PARSER
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_sax_parser();
7
-
8
- extern VALUE cNokogiriXmlSaxParser ;
9
-
10
- typedef struct _nokogiriSAXTuple {
11
- xmlParserCtxtPtr ctxt;
12
- VALUE self;
13
- } nokogiriSAXTuple;
14
-
15
- typedef nokogiriSAXTuple * nokogiriSAXTuplePtr;
16
-
17
- #define NOKOGIRI_SAX_SELF(_ctxt) \
18
- ((nokogiriSAXTuplePtr)(_ctxt))->self
19
-
20
- #define NOKOGIRI_SAX_CTXT(_ctxt) \
21
- ((nokogiriSAXTuplePtr)(_ctxt))->ctxt
22
-
23
- #define NOKOGIRI_SAX_TUPLE_NEW(_ctxt, _self) \
24
- nokogiri_sax_tuple_new(_ctxt, _self)
25
-
26
- static inline nokogiriSAXTuplePtr
27
- nokogiri_sax_tuple_new(xmlParserCtxtPtr ctxt, VALUE self)
28
- {
29
- nokogiriSAXTuplePtr tuple = malloc(sizeof(nokogiriSAXTuple));
30
- tuple->self = self;
31
- tuple->ctxt = ctxt;
32
- return tuple;
33
- }
34
-
35
- #define NOKOGIRI_SAX_TUPLE_DESTROY(_tuple) \
36
- free(_tuple) \
37
-
38
- #endif
39
-
@@ -1,10 +0,0 @@
1
- #ifndef NOKOGIRI_XML_SAX_PARSER_CONTEXT
2
- #define NOKOGIRI_XML_SAX_PARSER_CONTEXT
3
-
4
- #include <nokogiri.h>
5
-
6
- extern VALUE cNokogiriXmlSaxParserContext;
7
-
8
- void init_xml_sax_parser_context();
9
-
10
- #endif
@@ -1,9 +0,0 @@
1
- #ifndef NOKOGIRI_XML_SAX_PUSH_PARSER
2
- #define NOKOGIRI_XML_SAX_PUSH_PARSER
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_sax_push_parser();
7
-
8
- extern VALUE cNokogiriXmlSaxPushParser ;
9
- #endif
@@ -1,9 +0,0 @@
1
- #ifndef NOKOGIRI_XML_SCHEMA
2
- #define NOKOGIRI_XML_SCHEMA
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_schema();
7
-
8
- extern VALUE cNokogiriXmlSchema;
9
- #endif
@@ -1,25 +0,0 @@
1
- #ifndef NOKOGIRI_XML_SYNTAX_ERROR
2
- #define NOKOGIRI_XML_SYNTAX_ERROR
3
-
4
- #include <nokogiri.h>
5
-
6
- typedef struct _libxmlStructuredErrorHandlerState {
7
- void *user_data;
8
- xmlStructuredErrorFunc handler;
9
- } libxmlStructuredErrorHandlerState ;
10
-
11
- void init_xml_syntax_error();
12
-
13
- void Nokogiri_structured_error_func_save(libxmlStructuredErrorHandlerState *handler_state);
14
- void Nokogiri_structured_error_func_save_and_set(libxmlStructuredErrorHandlerState *handler_state,
15
- void *user_data,
16
- xmlStructuredErrorFunc handler);
17
- void Nokogiri_structured_error_func_restore(libxmlStructuredErrorHandlerState *handler_state);
18
-
19
- VALUE Nokogiri_wrap_xml_syntax_error(xmlErrorPtr error);
20
- void Nokogiri_error_array_pusher(void *ctx, xmlErrorPtr error);
21
- NORETURN(void Nokogiri_error_raise(void *ctx, xmlErrorPtr error));
22
-
23
- extern VALUE cNokogiriXmlSyntaxError;
24
-
25
- #endif /* NOKOGIRI_XML_SYNTAX_ERROR */
@@ -1,9 +0,0 @@
1
- #ifndef NOKOGIRI_XML_TEXT
2
- #define NOKOGIRI_XML_TEXT
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_text();
7
-
8
- extern VALUE cNokogiriXmlText ;
9
- #endif
@@ -1,10 +0,0 @@
1
- #ifndef NOKOGIRI_XML_XPATH_CONTEXT
2
- #define NOKOGIRI_XML_XPATH_CONTEXT
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xml_xpath_context();
7
- void Nokogiri_marshal_xpath_funcall_and_return_values(xmlXPathParserContextPtr ctx, int nargs, VALUE handler, const char* function_name) ;
8
-
9
- extern VALUE cNokogiriXmlXpathContext;
10
- #endif
@@ -1,14 +0,0 @@
1
- #ifndef NOKOGIRI_XSLT_STYLESHEET
2
- #define NOKOGIRI_XSLT_STYLESHEET
3
-
4
- #include <nokogiri.h>
5
-
6
- void init_xslt_stylesheet();
7
-
8
- extern VALUE cNokogiriXsltStylesheet ;
9
-
10
- typedef struct _nokogiriXsltStylesheetTuple {
11
- xsltStylesheetPtr ss;
12
- VALUE func_instances;
13
- } nokogiriXsltStylesheetTuple;
14
- #endif
@@ -1,17 +0,0 @@
1
- # frozen_string_literal: true
2
- module Nokogiri
3
- module HTML
4
- module SAX
5
- ###
6
- # Context for HTML SAX parsers. This class is usually not instantiated
7
- # by the user. Instead, you should be looking at
8
- # Nokogiri::HTML::SAX::Parser
9
- class ParserContext < Nokogiri::XML::SAX::ParserContext
10
- def self.new thing, encoding = 'UTF-8'
11
- [:read, :close].all? { |x| thing.respond_to?(x) } ? super :
12
- memory(thing, encoding)
13
- end
14
- end
15
- end
16
- end
17
- end
@@ -1,78 +0,0 @@
1
- From c5538465c08a8ea248a370bf55bc39cd3385e4af Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Thu, 29 Mar 2018 14:09:00 -0400
4
- Subject: [PATCH] Revert "Do not URI escape in server side includes"
5
-
6
- This reverts commit 960f0e275616cadc29671a218d7fb9b69eb35588.
7
- ---
8
- HTMLtree.c | 49 +++++++++++--------------------------------------
9
- 1 file changed, 11 insertions(+), 38 deletions(-)
10
-
11
- diff --git a/HTMLtree.c b/HTMLtree.c
12
- index 2fd0c9c..67160c5 100644
13
- --- a/HTMLtree.c
14
- +++ b/HTMLtree.c
15
- @@ -717,49 +717,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
16
- (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
17
- ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
18
- (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
19
- + xmlChar *escaped;
20
- xmlChar *tmp = value;
21
- - /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
22
- - xmlBufCCat(buf->buffer, "\"");
23
-
24
- while (IS_BLANK_CH(*tmp)) tmp++;
25
-
26
- - /* URI Escape everything, except server side includes. */
27
- - for ( ; ; ) {
28
- - xmlChar *escaped;
29
- - xmlChar endChar;
30
- - xmlChar *end = NULL;
31
- - xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
32
- - if (start != NULL) {
33
- - end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
34
- - if (end != NULL) {
35
- - *start = '\0';
36
- - }
37
- - }
38
- -
39
- - /* Escape the whole string, or until start (set to '\0'). */
40
- - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
41
- - if (escaped != NULL) {
42
- - xmlBufCat(buf->buffer, escaped);
43
- - xmlFree(escaped);
44
- - } else {
45
- - xmlBufCat(buf->buffer, tmp);
46
- - }
47
- -
48
- - if (end == NULL) { /* Everything has been written. */
49
- - break;
50
- - }
51
- -
52
- - /* Do not escape anything within server side includes. */
53
- - *start = '<'; /* Restore the first character of "<!--". */
54
- - end += 3; /* strlen("-->") */
55
- - endChar = *end;
56
- - *end = '\0';
57
- - xmlBufCat(buf->buffer, start);
58
- - *end = endChar;
59
- - tmp = end;
60
- + /*
61
- + * the < and > have already been escaped at the entity level
62
- + * And doing so here breaks server side includes
63
- + */
64
- + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
65
- + if (escaped != NULL) {
66
- + xmlBufWriteQuotedString(buf->buffer, escaped);
67
- + xmlFree(escaped);
68
- + } else {
69
- + xmlBufWriteQuotedString(buf->buffer, value);
70
- }
71
- -
72
- - xmlBufCCat(buf->buffer, "\"");
73
- } else {
74
- xmlBufWriteQuotedString(buf->buffer, value);
75
- }
76
- --
77
- 2.9.5
78
-
@@ -1,32 +0,0 @@
1
- From 0e1a49c8907645d2e155f0d89d4d9895ac5112b5 Mon Sep 17 00:00:00 2001
2
- From: Zhipeng Xie <xiezhipeng1@huawei.com>
3
- Date: Thu, 12 Dec 2019 17:30:55 +0800
4
- Subject: [PATCH] Fix infinite loop in xmlStringLenDecodeEntities
5
-
6
- When ctxt->instate == XML_PARSER_EOF,xmlParseStringEntityRef
7
- return NULL which cause a infinite loop in xmlStringLenDecodeEntities
8
-
9
- Found with libFuzzer.
10
-
11
- Signed-off-by: Zhipeng Xie <xiezhipeng1@huawei.com>
12
- ---
13
- parser.c | 3 ++-
14
- 1 file changed, 2 insertions(+), 1 deletion(-)
15
-
16
- diff --git a/parser.c b/parser.c
17
- index d1c3196..a34bb6c 100644
18
- --- a/parser.c
19
- +++ b/parser.c
20
- @@ -2646,7 +2646,8 @@ xmlStringLenDecodeEntities(xmlParserCtxtPtr ctxt, const xmlChar *str, int len,
21
- else
22
- c = 0;
23
- while ((c != 0) && (c != end) && /* non input consuming loop */
24
- - (c != end2) && (c != end3)) {
25
- + (c != end2) && (c != end3) &&
26
- + (ctxt->instate != XML_PARSER_EOF)) {
27
-
28
- if (c == 0) break;
29
- if ((c == '&') && (str[1] == '#')) {
30
- --
31
- 2.17.1
32
-
@@ -1,73 +0,0 @@
1
- From 4f51a6d2b1755ce5b36c524c215aad70d864ac1d Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Mon, 3 Aug 2020 17:36:05 -0400
4
- Subject: [PATCH 1/2] htmlParseComment: treat `--!>` as if it closed the
5
- comment
6
-
7
- See guidance provided on incorrectly-closed comments here:
8
-
9
- https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
10
- ---
11
- HTMLparser.c | 28 ++++++++++++++++++++--------
12
- 1 file changed, 20 insertions(+), 8 deletions(-)
13
-
14
- diff --git a/HTMLparser.c b/HTMLparser.c
15
- index 7b6d689..4d43479 100644
16
- --- a/HTMLparser.c
17
- +++ b/HTMLparser.c
18
- @@ -3300,6 +3300,7 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
19
- int q, ql;
20
- int r, rl;
21
- int cur, l;
22
- + int next, nl;
23
- xmlParserInputState state;
24
-
25
- /*
26
- @@ -3332,6 +3333,21 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
27
- while (IS_CHAR(cur) &&
28
- ((cur != '>') ||
29
- (r != '-') || (q != '-'))) {
30
- + NEXTL(l);
31
- + next = CUR_CHAR(nl);
32
- + if (next == 0) {
33
- + SHRINK;
34
- + GROW;
35
- + next = CUR_CHAR(nl);
36
- + }
37
- +
38
- + if ((q == '-') && (r == '-') && (cur == '!') && (next == '>')) {
39
- + htmlParseErr(ctxt, XML_ERR_COMMENT_NOT_FINISHED,
40
- + "Comment incorrectly closed by '--!>'", NULL, NULL);
41
- + cur = '>';
42
- + break;
43
- + }
44
- +
45
- if (len + 5 >= size) {
46
- xmlChar *tmp;
47
-
48
- @@ -3345,18 +3361,14 @@ htmlParseComment(htmlParserCtxtPtr ctxt) {
49
- }
50
- buf = tmp;
51
- }
52
- - COPY_BUF(ql,buf,len,q);
53
- + COPY_BUF(ql,buf,len,q);
54
- +
55
- q = r;
56
- ql = rl;
57
- r = cur;
58
- rl = l;
59
- - NEXTL(l);
60
- - cur = CUR_CHAR(l);
61
- - if (cur == 0) {
62
- - SHRINK;
63
- - GROW;
64
- - cur = CUR_CHAR(l);
65
- - }
66
- + cur = next;
67
- + l = nl;
68
- }
69
- buf[len] = 0;
70
- if (IS_CHAR(cur)) {
71
- --
72
- 2.25.1
73
-
@@ -1,103 +0,0 @@
1
- From b20d746fa7cbb74716171bc49d836af99927e41e Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Sun, 11 Oct 2020 14:15:37 -0400
4
- Subject: [PATCH 2/2] use new htmlParseLookupCommentEnd to find comment ends
5
-
6
- Note that the caret in error messages generated during comment parsing
7
- may have moved by one byte.
8
-
9
- See guidance provided on incorrectly-closed comments here:
10
-
11
- https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment
12
- ---
13
- HTMLparser.c | 46 +++++++++++++++++++++++++++++++++++++---------
14
- 1 file changed, 37 insertions(+), 9 deletions(-)
15
-
16
- diff --git a/HTMLparser.c b/HTMLparser.c
17
- index 4d43479..000dc3d 100644
18
- --- a/HTMLparser.c
19
- +++ b/HTMLparser.c
20
- @@ -5331,6 +5331,39 @@ htmlParseLookupChars(htmlParserCtxtPtr ctxt, const xmlChar * stop,
21
- return (-1);
22
- }
23
-
24
- +/**
25
- + * htmlParseLookupCommentEnd:
26
- + * @ctxt: an HTML parser context
27
- + *
28
- + * Try to find a comment end tag in the input stream
29
- + * The search includes "-->" as well as WHATWG-recommended incorrectly-closed tags.
30
- + * (See https://html.spec.whatwg.org/multipage/parsing.html#parse-error-incorrectly-closed-comment)
31
- + * This function has a side effect of (possibly) incrementing ctxt->checkIndex
32
- + * to avoid rescanning sequences of bytes, it DOES change the state of the
33
- + * parser, do not use liberally.
34
- + * This wraps to htmlParseLookupSequence()
35
- + *
36
- + * Returns the index to the current parsing point if the full sequence is available, -1 otherwise.
37
- + */
38
- +static int
39
- +htmlParseLookupCommentEnd(htmlParserCtxtPtr ctxt)
40
- +{
41
- + int mark = 0;
42
- + int cur = CUR_PTR - BASE_PTR;
43
- +
44
- + while (mark >= 0) {
45
- + mark = htmlParseLookupSequence(ctxt, '-', '-', 0, 1, 1);
46
- + if ((mark < 0) ||
47
- + (NXT(mark+2) == '>') ||
48
- + ((NXT(mark+2) == '!') && (NXT(mark+3) == '>'))) {
49
- + return mark;
50
- + }
51
- + ctxt->checkIndex = cur + mark + 1;
52
- + }
53
- + return mark;
54
- +}
55
- +
56
- +
57
- /**
58
- * htmlParseTryOrFinish:
59
- * @ctxt: an HTML parser context
60
- @@ -5507,8 +5540,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
61
- cur = in->cur[0];
62
- if ((cur == '<') && (next == '!') &&
63
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
64
- - if ((!terminate) &&
65
- - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
66
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
67
- goto done;
68
- #ifdef DEBUG_PUSH
69
- xmlGenericError(xmlGenericErrorContext,
70
- @@ -5567,8 +5599,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
71
- next = in->cur[1];
72
- if ((cur == '<') && (next == '!') &&
73
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
74
- - if ((!terminate) &&
75
- - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
76
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
77
- goto done;
78
- #ifdef DEBUG_PUSH
79
- xmlGenericError(xmlGenericErrorContext,
80
- @@ -5614,8 +5645,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
81
- next = in->cur[1];
82
- if ((cur == '<') && (next == '!') &&
83
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
84
- - if ((!terminate) &&
85
- - (htmlParseLookupSequence(ctxt, '-', '-', '>', 1, 1) < 0))
86
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
87
- goto done;
88
- #ifdef DEBUG_PUSH
89
- xmlGenericError(xmlGenericErrorContext,
90
- @@ -5871,9 +5901,7 @@ htmlParseTryOrFinish(htmlParserCtxtPtr ctxt, int terminate) {
91
- htmlParseDocTypeDecl(ctxt);
92
- } else if ((cur == '<') && (next == '!') &&
93
- (in->cur[2] == '-') && (in->cur[3] == '-')) {
94
- - if ((!terminate) &&
95
- - (htmlParseLookupSequence(
96
- - ctxt, '-', '-', '>', 1, 1) < 0))
97
- + if ((!terminate) && (htmlParseLookupCommentEnd(ctxt) < 0))
98
- goto done;
99
- #ifdef DEBUG_PUSH
100
- xmlGenericError(xmlGenericErrorContext,
101
- --
102
- 2.25.1
103
-