nokogiri 1.9.1 → 1.15.3

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (226) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +45 -0
  3. data/LICENSE-DEPENDENCIES.md +1636 -1024
  4. data/LICENSE.md +5 -28
  5. data/README.md +203 -89
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +33 -61
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +864 -418
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +215 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +40 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +401 -240
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +135 -61
  33. data/ext/nokogiri/xml_node.c +1346 -677
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +347 -212
  37. data/ext/nokogiri/xml_relax_ng.c +86 -77
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +145 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +64 -36
  41. data/ext/nokogiri/xml_schema.c +138 -81
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +36 -26
  44. data/ext/nokogiri/xml_xpath_context.c +366 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +111 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +107 -104
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +224 -95
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +392 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +223 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +98 -54
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -126
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +45 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1088 -418
  142. data/lib/nokogiri/xml/node_set.rb +173 -63
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +38 -37
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/0001-Remove-script-macro-support.patch +40 -0
  169. data/patches/libxml2/0002-Update-entities-to-remove-handling-of-ssi.patch +44 -0
  170. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
  171. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  172. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  173. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  174. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  175. data/ports/archives/libxml2-2.11.4.tar.xz +0 -0
  176. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
  177. metadata +128 -265
  178. data/ext/nokogiri/html_document.c +0 -170
  179. data/ext/nokogiri/html_document.h +0 -10
  180. data/ext/nokogiri/html_element_description.c +0 -279
  181. data/ext/nokogiri/html_element_description.h +0 -10
  182. data/ext/nokogiri/html_entity_lookup.c +0 -32
  183. data/ext/nokogiri/html_entity_lookup.h +0 -8
  184. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  185. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  186. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  187. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  188. data/ext/nokogiri/xml_attr.h +0 -9
  189. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  190. data/ext/nokogiri/xml_cdata.h +0 -9
  191. data/ext/nokogiri/xml_comment.h +0 -9
  192. data/ext/nokogiri/xml_document.h +0 -23
  193. data/ext/nokogiri/xml_document_fragment.h +0 -10
  194. data/ext/nokogiri/xml_dtd.h +0 -10
  195. data/ext/nokogiri/xml_element_content.h +0 -10
  196. data/ext/nokogiri/xml_element_decl.h +0 -9
  197. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  198. data/ext/nokogiri/xml_entity_decl.h +0 -10
  199. data/ext/nokogiri/xml_entity_reference.h +0 -9
  200. data/ext/nokogiri/xml_io.c +0 -61
  201. data/ext/nokogiri/xml_io.h +0 -11
  202. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  203. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  204. data/ext/nokogiri/xml_namespace.h +0 -14
  205. data/ext/nokogiri/xml_node.h +0 -13
  206. data/ext/nokogiri/xml_node_set.h +0 -12
  207. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  208. data/ext/nokogiri/xml_reader.h +0 -10
  209. data/ext/nokogiri/xml_relax_ng.h +0 -9
  210. data/ext/nokogiri/xml_sax_parser.h +0 -39
  211. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  212. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  213. data/ext/nokogiri/xml_schema.h +0 -9
  214. data/ext/nokogiri/xml_syntax_error.h +0 -13
  215. data/ext/nokogiri/xml_text.h +0 -9
  216. data/ext/nokogiri/xml_xpath_context.h +0 -10
  217. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  218. data/lib/nokogiri/html/document.rb +0 -335
  219. data/lib/nokogiri/html/document_fragment.rb +0 -49
  220. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  221. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  222. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  223. data/patches/libxml2/0002-Fix-nullptr-deref-with-XPath-logic-ops.patch +0 -54
  224. data/patches/libxml2/0003-Fix-infinite-loop-in-LZMA-decompression.patch +0 -50
  225. data/ports/archives/libxml2-2.9.8.tar.gz +0 -0
  226. data/ports/archives/libxslt-1.1.32.tar.gz +0 -0
@@ -1,78 +0,0 @@
1
- From c5538465c08a8ea248a370bf55bc39cd3385e4af Mon Sep 17 00:00:00 2001
2
- From: Mike Dalessio <mike.dalessio@gmail.com>
3
- Date: Thu, 29 Mar 2018 14:09:00 -0400
4
- Subject: [PATCH] Revert "Do not URI escape in server side includes"
5
-
6
- This reverts commit 960f0e275616cadc29671a218d7fb9b69eb35588.
7
- ---
8
- HTMLtree.c | 49 +++++++++++--------------------------------------
9
- 1 file changed, 11 insertions(+), 38 deletions(-)
10
-
11
- diff --git a/HTMLtree.c b/HTMLtree.c
12
- index 2fd0c9c..67160c5 100644
13
- --- a/HTMLtree.c
14
- +++ b/HTMLtree.c
15
- @@ -717,49 +717,22 @@ htmlAttrDumpOutput(xmlOutputBufferPtr buf, xmlDocPtr doc, xmlAttrPtr cur,
16
- (!xmlStrcasecmp(cur->name, BAD_CAST "src")) ||
17
- ((!xmlStrcasecmp(cur->name, BAD_CAST "name")) &&
18
- (!xmlStrcasecmp(cur->parent->name, BAD_CAST "a"))))) {
19
- + xmlChar *escaped;
20
- xmlChar *tmp = value;
21
- - /* xmlURIEscapeStr() escapes '"' so it can be safely used. */
22
- - xmlBufCCat(buf->buffer, "\"");
23
-
24
- while (IS_BLANK_CH(*tmp)) tmp++;
25
-
26
- - /* URI Escape everything, except server side includes. */
27
- - for ( ; ; ) {
28
- - xmlChar *escaped;
29
- - xmlChar endChar;
30
- - xmlChar *end = NULL;
31
- - xmlChar *start = (xmlChar *)xmlStrstr(tmp, BAD_CAST "<!--");
32
- - if (start != NULL) {
33
- - end = (xmlChar *)xmlStrstr(tmp, BAD_CAST "-->");
34
- - if (end != NULL) {
35
- - *start = '\0';
36
- - }
37
- - }
38
- -
39
- - /* Escape the whole string, or until start (set to '\0'). */
40
- - escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+");
41
- - if (escaped != NULL) {
42
- - xmlBufCat(buf->buffer, escaped);
43
- - xmlFree(escaped);
44
- - } else {
45
- - xmlBufCat(buf->buffer, tmp);
46
- - }
47
- -
48
- - if (end == NULL) { /* Everything has been written. */
49
- - break;
50
- - }
51
- -
52
- - /* Do not escape anything within server side includes. */
53
- - *start = '<'; /* Restore the first character of "<!--". */
54
- - end += 3; /* strlen("-->") */
55
- - endChar = *end;
56
- - *end = '\0';
57
- - xmlBufCat(buf->buffer, start);
58
- - *end = endChar;
59
- - tmp = end;
60
- + /*
61
- + * the < and > have already been escaped at the entity level
62
- + * And doing so here breaks server side includes
63
- + */
64
- + escaped = xmlURIEscapeStr(tmp, BAD_CAST"@/:=?;#%&,+<>");
65
- + if (escaped != NULL) {
66
- + xmlBufWriteQuotedString(buf->buffer, escaped);
67
- + xmlFree(escaped);
68
- + } else {
69
- + xmlBufWriteQuotedString(buf->buffer, value);
70
- }
71
- -
72
- - xmlBufCCat(buf->buffer, "\"");
73
- } else {
74
- xmlBufWriteQuotedString(buf->buffer, value);
75
- }
76
- --
77
- 2.9.5
78
-
@@ -1,54 +0,0 @@
1
- From a436374994c47b12d5de1b8b1d191a098fa23594 Mon Sep 17 00:00:00 2001
2
- From: Nick Wellnhofer <wellnhofer@aevum.de>
3
- Date: Mon, 30 Jul 2018 12:54:38 +0200
4
- Subject: [PATCH] Fix nullptr deref with XPath logic ops
5
-
6
- If the XPath stack is corrupted, for example by a misbehaving extension
7
- function, the "and" and "or" XPath operators could dereference NULL
8
- pointers. Check that the XPath stack isn't empty and optimize the
9
- logic operators slightly.
10
-
11
- Closes: https://gitlab.gnome.org/GNOME/libxml2/issues/5
12
-
13
- Also see
14
- https://bugs.debian.org/cgi-bin/bugreport.cgi?bug=901817
15
- https://bugzilla.redhat.com/show_bug.cgi?id=1595985
16
-
17
- This is CVE-2018-14404.
18
-
19
- Thanks to Guy Inbar for the report.
20
- ---
21
- xpath.c | 10 ++++------
22
- 1 file changed, 4 insertions(+), 6 deletions(-)
23
-
24
- diff --git a/xpath.c b/xpath.c
25
- index 3fae0bf..5e3bb9f 100644
26
- --- a/xpath.c
27
- +++ b/xpath.c
28
- @@ -13234,9 +13234,8 @@ xmlXPathCompOpEval(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op)
29
- return(0);
30
- }
31
- xmlXPathBooleanFunction(ctxt, 1);
32
- - arg1 = valuePop(ctxt);
33
- - arg1->boolval &= arg2->boolval;
34
- - valuePush(ctxt, arg1);
35
- + if (ctxt->value != NULL)
36
- + ctxt->value->boolval &= arg2->boolval;
37
- xmlXPathReleaseObject(ctxt->context, arg2);
38
- return (total);
39
- case XPATH_OP_OR:
40
- @@ -13252,9 +13251,8 @@ xmlXPathCompOpEval(xmlXPathParserContextPtr ctxt, xmlXPathStepOpPtr op)
41
- return(0);
42
- }
43
- xmlXPathBooleanFunction(ctxt, 1);
44
- - arg1 = valuePop(ctxt);
45
- - arg1->boolval |= arg2->boolval;
46
- - valuePush(ctxt, arg1);
47
- + if (ctxt->value != NULL)
48
- + ctxt->value->boolval |= arg2->boolval;
49
- xmlXPathReleaseObject(ctxt->context, arg2);
50
- return (total);
51
- case XPATH_OP_EQUAL:
52
- --
53
- 2.17.1
54
-
@@ -1,50 +0,0 @@
1
- From 2240fbf5912054af025fb6e01e26375100275e74 Mon Sep 17 00:00:00 2001
2
- From: Nick Wellnhofer <wellnhofer@aevum.de>
3
- Date: Mon, 30 Jul 2018 13:14:11 +0200
4
- Subject: [PATCH] Fix infinite loop in LZMA decompression
5
- MIME-Version: 1.0
6
- Content-Type: text/plain; charset=UTF-8
7
- Content-Transfer-Encoding: 8bit
8
-
9
- Check the liblzma error code more thoroughly to avoid infinite loops.
10
-
11
- Closes: https://gitlab.gnome.org/GNOME/libxml2/issues/13
12
- Closes: https://bugzilla.gnome.org/show_bug.cgi?id=794914
13
-
14
- This is CVE-2018-9251 and CVE-2018-14567.
15
-
16
- Thanks to Dongliang Mu and Simon Wörner for the reports.
17
- ---
18
- xzlib.c | 9 +++++++++
19
- 1 file changed, 9 insertions(+)
20
-
21
- diff --git a/xzlib.c b/xzlib.c
22
- index a839169..0ba88cf 100644
23
- --- a/xzlib.c
24
- +++ b/xzlib.c
25
- @@ -562,6 +562,10 @@ xz_decomp(xz_statep state)
26
- "internal error: inflate stream corrupt");
27
- return -1;
28
- }
29
- + /*
30
- + * FIXME: Remapping a couple of error codes and falling through
31
- + * to the LZMA error handling looks fragile.
32
- + */
33
- if (ret == Z_MEM_ERROR)
34
- ret = LZMA_MEM_ERROR;
35
- if (ret == Z_DATA_ERROR)
36
- @@ -587,6 +591,11 @@ xz_decomp(xz_statep state)
37
- xz_error(state, LZMA_PROG_ERROR, "compression error");
38
- return -1;
39
- }
40
- + if ((state->how != GZIP) &&
41
- + (ret != LZMA_OK) && (ret != LZMA_STREAM_END)) {
42
- + xz_error(state, ret, "lzma error");
43
- + return -1;
44
- + }
45
- } while (strm->avail_out && ret != LZMA_STREAM_END);
46
-
47
- /* update available output and crc check value */
48
- --
49
- 2.17.1
50
-
Binary file
Binary file