nokogiri 1.10.3 → 1.12.5

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (218) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +28 -26
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +716 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +120 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +191 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +60 -51
  32. data/ext/nokogiri/xml_node.c +493 -407
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +112 -33
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +34 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri/css/node.rb +1 -0
  91. data/lib/nokogiri/css/parser.rb +64 -63
  92. data/lib/nokogiri/css/parser.y +3 -3
  93. data/lib/nokogiri/css/parser_extras.rb +39 -36
  94. data/lib/nokogiri/css/syntax_error.rb +2 -1
  95. data/lib/nokogiri/css/tokenizer.rb +105 -103
  96. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  97. data/lib/nokogiri/css.rb +15 -14
  98. data/lib/nokogiri/decorators/slop.rb +1 -0
  99. data/lib/nokogiri/extension.rb +31 -0
  100. data/lib/nokogiri/gumbo.rb +14 -0
  101. data/lib/nokogiri/html.rb +32 -27
  102. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  103. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  104. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  105. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  106. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  107. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  109. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  110. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  111. data/lib/nokogiri/html4.rb +40 -0
  112. data/lib/nokogiri/html5/document.rb +74 -0
  113. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  114. data/lib/nokogiri/html5/node.rb +93 -0
  115. data/lib/nokogiri/html5.rb +473 -0
  116. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  117. data/lib/nokogiri/syntax_error.rb +1 -0
  118. data/lib/nokogiri/version/constant.rb +5 -0
  119. data/lib/nokogiri/version/info.rb +215 -0
  120. data/lib/nokogiri/version.rb +3 -109
  121. data/lib/nokogiri/xml/attr.rb +1 -0
  122. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  123. data/lib/nokogiri/xml/builder.rb +74 -32
  124. data/lib/nokogiri/xml/cdata.rb +1 -0
  125. data/lib/nokogiri/xml/character_data.rb +1 -0
  126. data/lib/nokogiri/xml/document.rb +138 -41
  127. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  128. data/lib/nokogiri/xml/dtd.rb +1 -0
  129. data/lib/nokogiri/xml/element_content.rb +1 -0
  130. data/lib/nokogiri/xml/element_decl.rb +1 -0
  131. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  132. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  133. data/lib/nokogiri/xml/namespace.rb +1 -0
  134. data/lib/nokogiri/xml/node/save_options.rb +2 -1
  135. data/lib/nokogiri/xml/node.rb +629 -293
  136. data/lib/nokogiri/xml/node_set.rb +1 -0
  137. data/lib/nokogiri/xml/notation.rb +1 -0
  138. data/lib/nokogiri/xml/parse_options.rb +12 -3
  139. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  140. data/lib/nokogiri/xml/pp/node.rb +1 -0
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  143. data/lib/nokogiri/xml/reader.rb +9 -12
  144. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  145. data/lib/nokogiri/xml/sax/document.rb +25 -30
  146. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  147. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  148. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  149. data/lib/nokogiri/xml/sax.rb +5 -4
  150. data/lib/nokogiri/xml/schema.rb +13 -4
  151. data/lib/nokogiri/xml/searchable.rb +25 -16
  152. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  153. data/lib/nokogiri/xml/text.rb +1 -0
  154. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  155. data/lib/nokogiri/xml/xpath.rb +4 -5
  156. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  157. data/lib/nokogiri/xml.rb +36 -36
  158. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri.rb +32 -51
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  173. metadata +151 -153
  174. data/ext/nokogiri/html_document.c +0 -170
  175. data/ext/nokogiri/html_document.h +0 -10
  176. data/ext/nokogiri/html_element_description.c +0 -279
  177. data/ext/nokogiri/html_element_description.h +0 -10
  178. data/ext/nokogiri/html_entity_lookup.c +0 -32
  179. data/ext/nokogiri/html_entity_lookup.h +0 -8
  180. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  181. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  182. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  183. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_attr.h +0 -9
  185. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  186. data/ext/nokogiri/xml_cdata.h +0 -9
  187. data/ext/nokogiri/xml_comment.h +0 -9
  188. data/ext/nokogiri/xml_document.h +0 -23
  189. data/ext/nokogiri/xml_document_fragment.h +0 -10
  190. data/ext/nokogiri/xml_dtd.h +0 -10
  191. data/ext/nokogiri/xml_element_content.h +0 -10
  192. data/ext/nokogiri/xml_element_decl.h +0 -9
  193. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  194. data/ext/nokogiri/xml_entity_decl.h +0 -10
  195. data/ext/nokogiri/xml_entity_reference.h +0 -9
  196. data/ext/nokogiri/xml_io.c +0 -61
  197. data/ext/nokogiri/xml_io.h +0 -11
  198. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  199. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  200. data/ext/nokogiri/xml_namespace.h +0 -14
  201. data/ext/nokogiri/xml_node.h +0 -13
  202. data/ext/nokogiri/xml_node_set.h +0 -12
  203. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  204. data/ext/nokogiri/xml_reader.h +0 -10
  205. data/ext/nokogiri/xml_relax_ng.h +0 -9
  206. data/ext/nokogiri/xml_sax_parser.h +0 -39
  207. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  208. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  209. data/ext/nokogiri/xml_schema.h +0 -9
  210. data/ext/nokogiri/xml_syntax_error.h +0 -13
  211. data/ext/nokogiri/xml_text.h +0 -9
  212. data/ext/nokogiri/xml_xpath_context.h +0 -10
  213. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  214. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  215. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  216. data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
  217. data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
  218. data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,78 +0,0 @@
1
- From c5538465c08a8ea248a370bf55bc39cd3385e4af Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Thu, 29 Mar 2018 14:09:00 -0400
4
- Subject: [PATCH] Revert "Do not URI escape in server side includes"
5
-
6
- This reverts commit 960f0e275616cadc29671a218d7fb9b69eb35588.
7
- ---
8
- HTMLtree.c | 49 +++++++++++--------------------------------------
9
- 1 file changed, 11 insertions(+), 38 deletions(-)
10
-
11
- diff --git a/HTMLtree.c b/HTMLtree.c
12
- index 2fd0c9c..67160c5 100644
13
- --- a/HTMLtree.c
14
- +++ b/HTMLtree.c
15
- @@ -717,49 +717,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
16
- (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
17
- ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
18
- (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
19
- + xmlChar *escaped;
20
- xmlChar *tmp = value;
21
- - /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
22
- - xmlBufCCat(buf->buffer, "\"");
23
-
24
- while (IS_BLANK_CH(*tmp)) tmp++;
25
-
26
- - /* URI Escape everything, except server side includes. */
27
- - for ( ; ; ) {
28
- - xmlChar *escaped;
29
- - xmlChar endChar;
30
- - xmlChar *end = NULL;
31
- - xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
32
- - if (start != NULL) {
33
- - end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
34
- - if (end != NULL) {
35
- - *start = '\0';
36
- - }
37
- - }
38
- -
39
- - /* Escape the whole string, or until start (set to '\0'). */
40
- - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
41
- - if (escaped != NULL) {
42
- - xmlBufCat(buf->buffer, escaped);
43
- - xmlFree(escaped);
44
- - } else {
45
- - xmlBufCat(buf->buffer, tmp);
46
- - }
47
- -
48
- - if (end == NULL) { /* Everything has been written. */
49
- - break;
50
- - }
51
- -
52
- - /* Do not escape anything within server side includes. */
53
- - *start = '<'; /* Restore the first character of "<!--". */
54
- - end += 3; /* strlen("-->") */
55
- - endChar = *end;
56
- - *end = '\0';
57
- - xmlBufCat(buf->buffer, start);
58
- - *end = endChar;
59
- - tmp = end;
60
- + /*
61
- + * the < and > have already been escaped at the entity level
62
- + * And doing so here breaks server side includes
63
- + */
64
- + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
65
- + if (escaped != NULL) {
66
- + xmlBufWriteQuotedString(buf->buffer, escaped);
67
- + xmlFree(escaped);
68
- + } else {
69
- + xmlBufWriteQuotedString(buf->buffer, value);
70
- }
71
- -
72
- - xmlBufCCat(buf->buffer, "\"");
73
- } else {
74
- xmlBufWriteQuotedString(buf->buffer, value);
75
- }
76
- --
77
- 2.9.5
78
-
@@ -1,120 +0,0 @@
1
- From e03553605b45c88f0b4b2980adfbbb8f6fca2fd6 Mon Sep 17 00:00:00 2001
2
- From: Nick Wellnhofer <wellnhofer@aevum.de>
3
- Date: Sun, 24 Mar 2019 09:51:39 +0100
4
- Subject: [PATCH] Fix security framework bypass
5
-
6
- xsltCheckRead and xsltCheckWrite return -1 in case of error but callers
7
- don't check for this condition and allow access. With a specially
8
- crafted URL, xsltCheckRead could be tricked into returning an error
9
- because of a supposedly invalid URL that would still be loaded
10
- succesfully later on.
11
-
12
- Fixes #12.
13
-
14
- Thanks to Felix Wilhelm for the report.
15
- ---
16
- libxslt/documents.c | 18 ++++++++++--------
17
- libxslt/imports.c | 9 +++++----
18
- libxslt/transform.c | 9 +++++----
19
- libxslt/xslt.c | 9 +++++----
20
- 4 files changed, 25 insertions(+), 20 deletions(-)
21
-
22
- diff --git a/libxslt/documents.c b/libxslt/documents.c
23
- index 3f3a731..4aad11b 100644
24
- --- a/libxslt/documents.c
25
- +++ b/libxslt/documents.c
26
- @@ -296,10 +296,11 @@ xsltLoadDocument(xsltTransformContextPtr ctxt, const xmlChar *URI) {
27
- int res;
28
-
29
- res = xsltCheckRead(ctxt->sec, ctxt, URI);
30
- - if (res == 0) {
31
- - xsltTransformError(ctxt, NULL, NULL,
32
- - "xsltLoadDocument: read rights for %s denied\n",
33
- - URI);
34
- + if (res <= 0) {
35
- + if (res == 0)
36
- + xsltTransformError(ctxt, NULL, NULL,
37
- + "xsltLoadDocument: read rights for %s denied\n",
38
- + URI);
39
- return(NULL);
40
- }
41
- }
42
- @@ -372,10 +373,11 @@ xsltLoadStyleDocument(xsltStylesheetPtr style, const xmlChar *URI) {
43
- int res;
44
-
45
- res = xsltCheckRead(sec, NULL, URI);
46
- - if (res == 0) {
47
- - xsltTransformError(NULL, NULL, NULL,
48
- - "xsltLoadStyleDocument: read rights for %s denied\n",
49
- - URI);
50
- + if (res <= 0) {
51
- + if (res == 0)
52
- + xsltTransformError(NULL, NULL, NULL,
53
- + "xsltLoadStyleDocument: read rights for %s denied\n",
54
- + URI);
55
- return(NULL);
56
- }
57
- }
58
- diff --git a/libxslt/imports.c b/libxslt/imports.c
59
- index 874870c..3783b24 100644
60
- --- a/libxslt/imports.c
61
- +++ b/libxslt/imports.c
62
- @@ -130,10 +130,11 @@ xsltParseStylesheetImport(xsltStylesheetPtr style, xmlNodePtr cur) {
63
- int secres;
64
-
65
- secres = xsltCheckRead(sec, NULL, URI);
66
- - if (secres == 0) {
67
- - xsltTransformError(NULL, NULL, NULL,
68
- - "xsl:import: read rights for %s denied\n",
69
- - URI);
70
- + if (secres <= 0) {
71
- + if (secres == 0)
72
- + xsltTransformError(NULL, NULL, NULL,
73
- + "xsl:import: read rights for %s denied\n",
74
- + URI);
75
- goto error;
76
- }
77
- }
78
- diff --git a/libxslt/transform.c b/libxslt/transform.c
79
- index 1379391..0636dbd 100644
80
- --- a/libxslt/transform.c
81
- +++ b/libxslt/transform.c
82
- @@ -3493,10 +3493,11 @@ xsltDocumentElem(xsltTransformContextPtr ctxt, xmlNodePtr node,
83
- */
84
- if (ctxt->sec != NULL) {
85
- ret = xsltCheckWrite(ctxt->sec, ctxt, filename);
86
- - if (ret == 0) {
87
- - xsltTransformError(ctxt, NULL, inst,
88
- - "xsltDocumentElem: write rights for %s denied\n",
89
- - filename);
90
- + if (ret <= 0) {
91
- + if (ret == 0)
92
- + xsltTransformError(ctxt, NULL, inst,
93
- + "xsltDocumentElem: write rights for %s denied\n",
94
- + filename);
95
- xmlFree(URL);
96
- xmlFree(filename);
97
- return;
98
- diff --git a/libxslt/xslt.c b/libxslt/xslt.c
99
- index 780a5ad..a234eb7 100644
100
- --- a/libxslt/xslt.c
101
- +++ b/libxslt/xslt.c
102
- @@ -6763,10 +6763,11 @@ xsltParseStylesheetFile(const xmlChar* filename) {
103
- int res;
104
-
105
- res = xsltCheckRead(sec, NULL, filename);
106
- - if (res == 0) {
107
- - xsltTransformError(NULL, NULL, NULL,
108
- - "xsltParseStylesheetFile: read rights for %s denied\n",
109
- - filename);
110
- + if (res <= 0) {
111
- + if (res == 0)
112
- + xsltTransformError(NULL, NULL, NULL,
113
- + "xsltParseStylesheetFile: read rights for %s denied\n",
114
- + filename);
115
- return(NULL);
116
- }
117
- }
118
- --
119
- 2.17.1
120
-
Binary file
Binary file