nokogiri 1.10.10 → 1.12.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (216) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +3 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +176 -96
  6. data/dependencies.yml +12 -12
  7. data/ext/nokogiri/depend +38 -358
  8. data/ext/nokogiri/extconf.rb +712 -414
  9. data/ext/nokogiri/gumbo.c +584 -0
  10. data/ext/nokogiri/html4_document.c +166 -0
  11. data/ext/nokogiri/html4_element_description.c +294 -0
  12. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  13. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  14. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  15. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  16. data/ext/nokogiri/nokogiri.c +228 -91
  17. data/ext/nokogiri/nokogiri.h +188 -89
  18. data/ext/nokogiri/test_global_handlers.c +40 -0
  19. data/ext/nokogiri/xml_attr.c +15 -15
  20. data/ext/nokogiri/xml_attribute_decl.c +18 -18
  21. data/ext/nokogiri/xml_cdata.c +13 -18
  22. data/ext/nokogiri/xml_comment.c +19 -26
  23. data/ext/nokogiri/xml_document.c +267 -195
  24. data/ext/nokogiri/xml_document_fragment.c +13 -15
  25. data/ext/nokogiri/xml_dtd.c +54 -48
  26. data/ext/nokogiri/xml_element_content.c +31 -26
  27. data/ext/nokogiri/xml_element_decl.c +22 -22
  28. data/ext/nokogiri/xml_encoding_handler.c +28 -17
  29. data/ext/nokogiri/xml_entity_decl.c +32 -30
  30. data/ext/nokogiri/xml_entity_reference.c +16 -18
  31. data/ext/nokogiri/xml_namespace.c +58 -49
  32. data/ext/nokogiri/xml_node.c +489 -410
  33. data/ext/nokogiri/xml_node_set.c +174 -162
  34. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  35. data/ext/nokogiri/xml_reader.c +197 -172
  36. data/ext/nokogiri/xml_relax_ng.c +52 -28
  37. data/ext/nokogiri/xml_sax_parser.c +112 -112
  38. data/ext/nokogiri/xml_sax_parser_context.c +105 -86
  39. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  40. data/ext/nokogiri/xml_schema.c +96 -46
  41. data/ext/nokogiri/xml_syntax_error.c +42 -21
  42. data/ext/nokogiri/xml_text.c +13 -17
  43. data/ext/nokogiri/xml_xpath_context.c +158 -73
  44. data/ext/nokogiri/xslt_stylesheet.c +158 -164
  45. data/gumbo-parser/CHANGES.md +63 -0
  46. data/gumbo-parser/Makefile +101 -0
  47. data/gumbo-parser/THANKS +27 -0
  48. data/gumbo-parser/src/Makefile +17 -0
  49. data/gumbo-parser/src/README.md +41 -0
  50. data/gumbo-parser/src/ascii.c +75 -0
  51. data/gumbo-parser/src/ascii.h +115 -0
  52. data/gumbo-parser/src/attribute.c +42 -0
  53. data/gumbo-parser/src/attribute.h +17 -0
  54. data/gumbo-parser/src/char_ref.c +22225 -0
  55. data/gumbo-parser/src/char_ref.h +29 -0
  56. data/gumbo-parser/src/char_ref.rl +2154 -0
  57. data/gumbo-parser/src/error.c +626 -0
  58. data/gumbo-parser/src/error.h +148 -0
  59. data/gumbo-parser/src/foreign_attrs.c +104 -0
  60. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  61. data/gumbo-parser/src/gumbo.h +943 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/parser.c +4886 -0
  65. data/gumbo-parser/src/parser.h +41 -0
  66. data/gumbo-parser/src/replacement.h +33 -0
  67. data/gumbo-parser/src/string_buffer.c +103 -0
  68. data/gumbo-parser/src/string_buffer.h +68 -0
  69. data/gumbo-parser/src/string_piece.c +48 -0
  70. data/gumbo-parser/src/svg_attrs.c +174 -0
  71. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  72. data/gumbo-parser/src/svg_tags.c +137 -0
  73. data/gumbo-parser/src/svg_tags.gperf +55 -0
  74. data/gumbo-parser/src/tag.c +222 -0
  75. data/gumbo-parser/src/tag_lookup.c +382 -0
  76. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  77. data/gumbo-parser/src/tag_lookup.h +13 -0
  78. data/gumbo-parser/src/token_buffer.c +79 -0
  79. data/gumbo-parser/src/token_buffer.h +71 -0
  80. data/gumbo-parser/src/token_type.h +17 -0
  81. data/gumbo-parser/src/tokenizer.c +3463 -0
  82. data/gumbo-parser/src/tokenizer.h +112 -0
  83. data/gumbo-parser/src/tokenizer_states.h +339 -0
  84. data/gumbo-parser/src/utf8.c +245 -0
  85. data/gumbo-parser/src/utf8.h +164 -0
  86. data/gumbo-parser/src/util.c +68 -0
  87. data/gumbo-parser/src/util.h +30 -0
  88. data/gumbo-parser/src/vector.c +111 -0
  89. data/gumbo-parser/src/vector.h +45 -0
  90. data/lib/nokogiri.rb +32 -51
  91. data/lib/nokogiri/css.rb +15 -14
  92. data/lib/nokogiri/css/node.rb +1 -0
  93. data/lib/nokogiri/css/parser.rb +64 -63
  94. data/lib/nokogiri/css/parser.y +3 -3
  95. data/lib/nokogiri/css/parser_extras.rb +39 -36
  96. data/lib/nokogiri/css/syntax_error.rb +2 -1
  97. data/lib/nokogiri/css/tokenizer.rb +1 -0
  98. data/lib/nokogiri/css/xpath_visitor.rb +73 -43
  99. data/lib/nokogiri/decorators/slop.rb +1 -0
  100. data/lib/nokogiri/extension.rb +26 -0
  101. data/lib/nokogiri/gumbo.rb +14 -0
  102. data/lib/nokogiri/html.rb +32 -27
  103. data/lib/nokogiri/html4.rb +40 -0
  104. data/lib/nokogiri/{html → html4}/builder.rb +3 -2
  105. data/lib/nokogiri/{html → html4}/document.rb +17 -30
  106. data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
  107. data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
  108. data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
  109. data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
  110. data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
  111. data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
  112. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
  113. data/lib/nokogiri/html5.rb +473 -0
  114. data/lib/nokogiri/html5/document.rb +74 -0
  115. data/lib/nokogiri/html5/document_fragment.rb +80 -0
  116. data/lib/nokogiri/html5/node.rb +93 -0
  117. data/lib/nokogiri/jruby/dependencies.rb +20 -0
  118. data/lib/nokogiri/syntax_error.rb +1 -0
  119. data/lib/nokogiri/version.rb +3 -109
  120. data/lib/nokogiri/version/constant.rb +5 -0
  121. data/lib/nokogiri/version/info.rb +215 -0
  122. data/lib/nokogiri/xml.rb +36 -36
  123. data/lib/nokogiri/xml/attr.rb +1 -0
  124. data/lib/nokogiri/xml/attribute_decl.rb +1 -0
  125. data/lib/nokogiri/xml/builder.rb +3 -2
  126. data/lib/nokogiri/xml/cdata.rb +1 -0
  127. data/lib/nokogiri/xml/character_data.rb +1 -0
  128. data/lib/nokogiri/xml/document.rb +92 -41
  129. data/lib/nokogiri/xml/document_fragment.rb +5 -6
  130. data/lib/nokogiri/xml/dtd.rb +1 -0
  131. data/lib/nokogiri/xml/element_content.rb +1 -0
  132. data/lib/nokogiri/xml/element_decl.rb +1 -0
  133. data/lib/nokogiri/xml/entity_decl.rb +1 -0
  134. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  135. data/lib/nokogiri/xml/namespace.rb +1 -0
  136. data/lib/nokogiri/xml/node.rb +629 -293
  137. data/lib/nokogiri/xml/node/save_options.rb +1 -0
  138. data/lib/nokogiri/xml/node_set.rb +1 -0
  139. data/lib/nokogiri/xml/notation.rb +1 -0
  140. data/lib/nokogiri/xml/parse_options.rb +12 -3
  141. data/lib/nokogiri/xml/pp.rb +3 -2
  142. data/lib/nokogiri/xml/pp/character_data.rb +1 -0
  143. data/lib/nokogiri/xml/pp/node.rb +1 -0
  144. data/lib/nokogiri/xml/processing_instruction.rb +1 -0
  145. data/lib/nokogiri/xml/reader.rb +9 -12
  146. data/lib/nokogiri/xml/relax_ng.rb +7 -2
  147. data/lib/nokogiri/xml/sax.rb +5 -4
  148. data/lib/nokogiri/xml/sax/document.rb +25 -30
  149. data/lib/nokogiri/xml/sax/parser.rb +1 -0
  150. data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
  151. data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
  152. data/lib/nokogiri/xml/schema.rb +13 -4
  153. data/lib/nokogiri/xml/searchable.rb +25 -16
  154. data/lib/nokogiri/xml/syntax_error.rb +1 -0
  155. data/lib/nokogiri/xml/text.rb +1 -0
  156. data/lib/nokogiri/xml/xpath.rb +4 -5
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  158. data/lib/nokogiri/xml/xpath_context.rb +1 -0
  159. data/lib/nokogiri/xslt.rb +17 -16
  160. data/lib/nokogiri/xslt/stylesheet.rb +2 -1
  161. data/lib/xsd/xmlparser/nokogiri.rb +1 -0
  162. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  163. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  164. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  165. data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
  166. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  167. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
  168. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
  169. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
  170. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
  171. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  172. metadata +139 -161
  173. data/ext/nokogiri/html_document.c +0 -170
  174. data/ext/nokogiri/html_document.h +0 -10
  175. data/ext/nokogiri/html_element_description.c +0 -279
  176. data/ext/nokogiri/html_element_description.h +0 -10
  177. data/ext/nokogiri/html_entity_lookup.c +0 -32
  178. data/ext/nokogiri/html_entity_lookup.h +0 -8
  179. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  180. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  181. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  182. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  183. data/ext/nokogiri/xml_attr.h +0 -9
  184. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  185. data/ext/nokogiri/xml_cdata.h +0 -9
  186. data/ext/nokogiri/xml_comment.h +0 -9
  187. data/ext/nokogiri/xml_document.h +0 -23
  188. data/ext/nokogiri/xml_document_fragment.h +0 -10
  189. data/ext/nokogiri/xml_dtd.h +0 -10
  190. data/ext/nokogiri/xml_element_content.h +0 -10
  191. data/ext/nokogiri/xml_element_decl.h +0 -9
  192. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  193. data/ext/nokogiri/xml_entity_decl.h +0 -10
  194. data/ext/nokogiri/xml_entity_reference.h +0 -9
  195. data/ext/nokogiri/xml_io.c +0 -61
  196. data/ext/nokogiri/xml_io.h +0 -11
  197. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  198. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  199. data/ext/nokogiri/xml_namespace.h +0 -14
  200. data/ext/nokogiri/xml_node.h +0 -13
  201. data/ext/nokogiri/xml_node_set.h +0 -12
  202. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  203. data/ext/nokogiri/xml_reader.h +0 -10
  204. data/ext/nokogiri/xml_relax_ng.h +0 -9
  205. data/ext/nokogiri/xml_sax_parser.h +0 -39
  206. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  207. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  208. data/ext/nokogiri/xml_schema.h +0 -9
  209. data/ext/nokogiri/xml_syntax_error.h +0 -13
  210. data/ext/nokogiri/xml_text.h +0 -9
  211. data/ext/nokogiri/xml_xpath_context.h +0 -10
  212. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  213. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  214. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  215. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  216. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -0,0 +1,30 @@
1
+ #ifndef GUMBO_UTIL_H_
2
+ #define GUMBO_UTIL_H_
3
+
4
+ #include <stdbool.h>
5
+ #include <stddef.h>
6
+ #include "macros.h"
7
+
8
+ #ifdef __cplusplus
9
+ extern "C" {
10
+ #endif
11
+
12
+ // Utility function for allocating & copying a null-terminated string into a
13
+ // freshly-allocated buffer. This is necessary for proper memory management; we
14
+ // have the convention that all const char* in parse tree structures are
15
+ // freshly-allocated, so if we didn't copy, we'd try to delete a literal string
16
+ // when the parse tree is destroyed.
17
+ char* gumbo_strdup(const char* str) XMALLOC NONNULL_ARGS;
18
+
19
+ void* gumbo_alloc(size_t size) XMALLOC;
20
+ void* gumbo_realloc(void* ptr, size_t size) RETURNS_NONNULL;
21
+ void gumbo_free(void* ptr);
22
+
23
+ // Debug wrapper for printf
24
+ void gumbo_debug(const char* format, ...) PRINTF(1);
25
+
26
+ #ifdef __cplusplus
27
+ }
28
+ #endif
29
+
30
+ #endif // GUMBO_UTIL_H_
@@ -0,0 +1,111 @@
1
+ /*
2
+ Copyright 2018 Craig Barnes.
3
+ Copyright 2010 Google Inc.
4
+
5
+ Licensed under the Apache License, Version 2.0 (the "License");
6
+ you may not use this file except in compliance with the License.
7
+ You may obtain a copy of the License at
8
+
9
+ https://www.apache.org/licenses/LICENSE-2.0
10
+
11
+ Unless required by applicable law or agreed to in writing, software
12
+ distributed under the License is distributed on an "AS IS" BASIS,
13
+ WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
14
+ See the License for the specific language governing permissions and
15
+ limitations under the License.
16
+ */
17
+
18
+ #include <assert.h>
19
+ #include <stdlib.h>
20
+ #include <string.h>
21
+ #include "vector.h"
22
+ #include "util.h"
23
+
24
+ void gumbo_vector_init(unsigned int initial_capacity, GumboVector* vector) {
25
+ vector->length = 0;
26
+ vector->capacity = initial_capacity;
27
+ if (initial_capacity > 0) {
28
+ vector->data = gumbo_alloc(sizeof(void*) * initial_capacity);
29
+ } else {
30
+ vector->data = NULL;
31
+ }
32
+ }
33
+
34
+ void gumbo_vector_destroy(GumboVector* vector) {
35
+ if (vector->capacity > 0) {
36
+ gumbo_free(vector->data);
37
+ }
38
+ }
39
+
40
+ static void enlarge_vector_if_full(GumboVector* vector) {
41
+ if (vector->length >= vector->capacity) {
42
+ if (vector->capacity) {
43
+ vector->capacity *= 2;
44
+ size_t num_bytes = sizeof(void*) * vector->capacity;
45
+ vector->data = gumbo_realloc(vector->data, num_bytes);
46
+ } else {
47
+ // 0-capacity vector; no previous array to deallocate.
48
+ vector->capacity = 2;
49
+ vector->data = gumbo_alloc(sizeof(void*) * vector->capacity);
50
+ }
51
+ }
52
+ }
53
+
54
+ void gumbo_vector_add(void* element, GumboVector* vector) {
55
+ enlarge_vector_if_full(vector);
56
+ assert(vector->data);
57
+ assert(vector->length < vector->capacity);
58
+ vector->data[vector->length++] = element;
59
+ }
60
+
61
+ void* gumbo_vector_pop(GumboVector* vector) {
62
+ if (vector->length == 0) {
63
+ return NULL;
64
+ }
65
+ return vector->data[--vector->length];
66
+ }
67
+
68
+ int gumbo_vector_index_of(GumboVector* vector, const void* element) {
69
+ for (unsigned int i = 0; i < vector->length; ++i) {
70
+ if (vector->data[i] == element) {
71
+ return i;
72
+ }
73
+ }
74
+ return -1;
75
+ }
76
+
77
+ void gumbo_vector_insert_at (
78
+ void* element,
79
+ unsigned int index,
80
+ GumboVector* vector
81
+ ) {
82
+ assert(index <= vector->length);
83
+ enlarge_vector_if_full(vector);
84
+ ++vector->length;
85
+ memmove (
86
+ &vector->data[index + 1],
87
+ &vector->data[index],
88
+ sizeof(void*) * (vector->length - index - 1)
89
+ );
90
+ vector->data[index] = element;
91
+ }
92
+
93
+ void gumbo_vector_remove(void* node, GumboVector* vector) {
94
+ int index = gumbo_vector_index_of(vector, node);
95
+ if (index == -1) {
96
+ return;
97
+ }
98
+ gumbo_vector_remove_at(index, vector);
99
+ }
100
+
101
+ void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector) {
102
+ assert(index < vector->length);
103
+ void* result = vector->data[index];
104
+ memmove (
105
+ &vector->data[index],
106
+ &vector->data[index + 1],
107
+ sizeof(void*) * (vector->length - index - 1)
108
+ );
109
+ --vector->length;
110
+ return result;
111
+ }
@@ -0,0 +1,45 @@
1
+ #ifndef GUMBO_VECTOR_H_
2
+ #define GUMBO_VECTOR_H_
3
+
4
+ #include "gumbo.h"
5
+
6
+ #ifdef __cplusplus
7
+ extern "C" {
8
+ #endif
9
+
10
+ // Initializes a new GumboVector with the specified initial capacity.
11
+ void gumbo_vector_init(unsigned int initial_capacity, GumboVector* vector);
12
+
13
+ // Frees the memory used by a GumboVector. Does not free the contained
14
+ // pointers.
15
+ void gumbo_vector_destroy(GumboVector* vector);
16
+
17
+ // Adds a new element to a GumboVector.
18
+ void gumbo_vector_add(void* element, GumboVector* vector);
19
+
20
+ // Removes and returns the element most recently added to the GumboVector.
21
+ // Ownership is transferred to caller. Capacity is unchanged. If the vector is
22
+ // empty, NULL is returned.
23
+ void* gumbo_vector_pop(GumboVector* vector);
24
+
25
+ // Inserts an element at a specific index. This is potentially O(N) time, but
26
+ // is necessary for some of the spec's behavior.
27
+ void gumbo_vector_insert_at (
28
+ void* element,
29
+ unsigned int index,
30
+ GumboVector* vector
31
+ );
32
+
33
+ // Removes an element from the vector, or does nothing if the element is not in
34
+ // the vector.
35
+ void gumbo_vector_remove(void* element, GumboVector* vector);
36
+
37
+ // Removes and returns an element at a specific index. Note that this is
38
+ // potentially O(N) time and should be used sparingly.
39
+ void* gumbo_vector_remove_at(unsigned int index, GumboVector* vector);
40
+
41
+ #ifdef __cplusplus
42
+ }
43
+ #endif
44
+
45
+ #endif // GUMBO_VECTOR_H_
data/lib/nokogiri.rb CHANGED
@@ -1,60 +1,30 @@
1
1
  # -*- coding: utf-8 -*-
2
+ # frozen_string_literal: true
2
3
  # Modify the PATH on windows so that the external DLLs will get loaded.
3
4
 
4
- require 'rbconfig'
5
+ require "rbconfig"
5
6
 
6
7
  if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
7
- # The line below caused a problem on non-GAE rack environment.
8
- # unless defined?(JRuby::Rack::VERSION) || defined?(AppEngine::ApiProxy)
9
- #
10
- # However, simply cutting defined?(JRuby::Rack::VERSION) off resulted in
11
- # an unable-to-load-nokogiri problem. Thus, now, Nokogiri checks the presense
12
- # of appengine-rack.jar in $LOAD_PATH. If Nokogiri is on GAE, Nokogiri
13
- # should skip loading xml jars. This is because those are in WEB-INF/lib and
14
- # already set in the classpath.
15
- unless $LOAD_PATH.to_s.include?("appengine-rack")
16
- require 'stringio'
17
- require 'isorelax.jar'
18
- require 'jing.jar'
19
- require 'nekohtml.jar'
20
- require 'nekodtd.jar'
21
- require 'xercesImpl.jar'
22
- require 'serializer.jar'
23
- require 'xalan.jar'
24
- require 'xml-apis.jar'
25
- end
8
+ require_relative "nokogiri/jruby/dependencies"
26
9
  end
27
10
 
28
- begin
29
- RUBY_VERSION =~ /(\d+\.\d+)/
30
- require "nokogiri/#{$1}/nokogiri"
31
- rescue LoadError
32
- require 'nokogiri/nokogiri'
33
- end
34
- require 'nokogiri/version'
35
- require 'nokogiri/syntax_error'
36
- require 'nokogiri/xml'
37
- require 'nokogiri/xslt'
38
- require 'nokogiri/html'
39
- require 'nokogiri/decorators/slop'
40
- require 'nokogiri/css'
41
- require 'nokogiri/html/builder'
11
+ require_relative "nokogiri/extension"
42
12
 
43
13
  # Nokogiri parses and searches XML/HTML very quickly, and also has
44
14
  # correctly implemented CSS3 selector support as well as XPath 1.0
45
15
  # support.
46
16
  #
47
17
  # Parsing a document returns either a Nokogiri::XML::Document, or a
48
- # Nokogiri::HTML::Document depending on the kind of document you parse.
18
+ # Nokogiri::HTML4::Document depending on the kind of document you parse.
49
19
  #
50
20
  # Here is an example:
51
21
  #
52
22
  # require 'nokogiri'
53
23
  # require 'open-uri'
54
24
  #
55
- # # Get a Nokogiri::HTML:Document for the page we’re interested in...
25
+ # # Get a Nokogiri::HTML4::Document for the page we’re interested in...
56
26
  #
57
- # doc = Nokogiri::HTML(open('http://www.google.com/search?q=tenderlove'))
27
+ # doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
58
28
  #
59
29
  # # Do funky things with it using Nokogiri::XML::Node methods...
60
30
  #
@@ -70,27 +40,27 @@ module Nokogiri
70
40
  class << self
71
41
  ###
72
42
  # Parse an HTML or XML document. +string+ contains the document.
73
- def parse string, url = nil, encoding = nil, options = nil
43
+ def parse(string, url = nil, encoding = nil, options = nil)
74
44
  if string.respond_to?(:read) ||
75
45
  /^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
76
46
  # Expect an HTML indicator to appear within the first 512
77
47
  # characters of a document. (<?xml ?> + <?xml-stylesheet ?>
78
48
  # shouldn't be that long)
79
- Nokogiri.HTML(string, url, encoding,
49
+ Nokogiri.HTML4(string, url, encoding,
80
50
  options || XML::ParseOptions::DEFAULT_HTML)
81
51
  else
82
52
  Nokogiri.XML(string, url, encoding,
83
53
  options || XML::ParseOptions::DEFAULT_XML)
84
- end.tap { |doc|
54
+ end.tap do |doc|
85
55
  yield doc if block_given?
86
- }
56
+ end
87
57
  end
88
58
 
89
59
  ###
90
60
  # Create a new Nokogiri::XML::DocumentFragment
91
- def make input = nil, opts = {}, &blk
61
+ def make(input = nil, opts = {}, &blk)
92
62
  if input
93
- Nokogiri::HTML.fragment(input).children.first
63
+ Nokogiri::HTML4.fragment(input).children.first
94
64
  else
95
65
  Nokogiri(&blk)
96
66
  end
@@ -119,10 +89,10 @@ module Nokogiri
119
89
  # Make sure to support some popular encoding aliases not known by
120
90
  # all iconv implementations.
121
91
  {
122
- 'Windows-31J' => 'CP932', # Windows-31J is the IANA registered name of CP932.
123
- }.each { |alias_name, name|
92
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
93
+ }.each do |alias_name, name|
124
94
  EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
125
- }
95
+ end
126
96
  end
127
97
  end
128
98
 
@@ -130,15 +100,26 @@ module Nokogiri
130
100
  end
131
101
 
132
102
  ###
133
- # Parser a document contained in +args+. Nokogiri will try to guess what
134
- # type of document you are attempting to parse. For more information, see
135
- # Nokogiri.parse
103
+ # Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
104
+ # attempting to parse. For more information, see Nokogiri.parse
136
105
  #
137
- # To specify the type of document, use Nokogiri.XML or Nokogiri.HTML.
106
+ # To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
138
107
  def Nokogiri(*args, &block)
139
108
  if block_given?
140
- Nokogiri::HTML::Builder.new(&block).doc.root
109
+ Nokogiri::HTML4::Builder.new(&block).doc.root
141
110
  else
142
111
  Nokogiri.parse(*args)
143
112
  end
144
113
  end
114
+
115
+ require_relative "nokogiri/version"
116
+ require_relative "nokogiri/syntax_error"
117
+ require_relative "nokogiri/xml"
118
+ require_relative "nokogiri/xslt"
119
+ require_relative "nokogiri/html4"
120
+ require_relative "nokogiri/html"
121
+ require_relative "nokogiri/decorators/slop"
122
+ require_relative "nokogiri/css"
123
+ require_relative "nokogiri/html4/builder"
124
+
125
+ require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
data/lib/nokogiri/css.rb CHANGED
@@ -1,27 +1,28 @@
1
- require 'nokogiri/css/node'
2
- require 'nokogiri/css/xpath_visitor'
3
- x = $-w
4
- $-w = false
5
- require 'nokogiri/css/parser'
6
- $-w = x
7
-
8
- require 'nokogiri/css/tokenizer'
9
- require 'nokogiri/css/syntax_error'
10
-
1
+ # frozen_string_literal: true
11
2
  module Nokogiri
12
3
  module CSS
13
4
  class << self
14
5
  ###
15
6
  # Parse this CSS selector in +selector+. Returns an AST.
16
- def parse selector
17
- Parser.new.parse selector
7
+ def parse(selector)
8
+ Parser.new.parse(selector)
18
9
  end
19
10
 
20
11
  ###
21
12
  # Get the XPath for +selector+.
22
- def xpath_for selector, options={}
23
- Parser.new(options[:ns] || {}).xpath_for selector, options
13
+ def xpath_for(selector, options = {})
14
+ Parser.new(options[:ns] || {}).xpath_for(selector, options)
24
15
  end
25
16
  end
26
17
  end
27
18
  end
19
+
20
+ require_relative "css/node"
21
+ require_relative "css/xpath_visitor"
22
+ x = $-w
23
+ $-w = false
24
+ require_relative "css/parser"
25
+ $-w = x
26
+
27
+ require_relative "css/tokenizer"
28
+ require_relative "css/syntax_error"
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  module Nokogiri
2
3
  module CSS
3
4
  class Node
@@ -1,13 +1,14 @@
1
+ # frozen_string_literal: true
1
2
  #
2
3
  # DO NOT MODIFY!!!!
3
- # This file is automatically generated by Racc 1.4.14
4
- # from Racc grammer file "".
4
+ # This file is automatically generated by Racc 1.5.2
5
+ # from Racc grammar file "".
5
6
  #
6
7
 
7
8
  require 'racc/parser.rb'
8
9
 
9
10
 
10
- require 'nokogiri/css/parser_extras'
11
+ require_relative "parser_extras"
11
12
 
12
13
  module Nokogiri
13
14
  module CSS
@@ -333,42 +334,42 @@ Racc_debug_parser = false
333
334
 
334
335
  def _reduce_1(val, _values, result)
335
336
  result = [val.first, val.last].flatten
336
-
337
+
337
338
  result
338
339
  end
339
340
 
340
341
  def _reduce_2(val, _values, result)
341
- result = val.flatten
342
+ result = val.flatten
342
343
  result
343
344
  end
344
345
 
345
346
  def _reduce_3(val, _values, result)
346
- result = [val.last].flatten
347
+ result = [val.last].flatten
347
348
  result
348
349
  end
349
350
 
350
351
  def _reduce_4(val, _values, result)
351
- result = :DIRECT_ADJACENT_SELECTOR
352
+ result = :DIRECT_ADJACENT_SELECTOR
352
353
  result
353
354
  end
354
355
 
355
356
  def _reduce_5(val, _values, result)
356
- result = :CHILD_SELECTOR
357
+ result = :CHILD_SELECTOR
357
358
  result
358
359
  end
359
360
 
360
361
  def _reduce_6(val, _values, result)
361
- result = :FOLLOWING_SELECTOR
362
+ result = :FOLLOWING_SELECTOR
362
363
  result
363
364
  end
364
365
 
365
366
  def _reduce_7(val, _values, result)
366
- result = :DESCENDANT_SELECTOR
367
+ result = :DESCENDANT_SELECTOR
367
368
  result
368
369
  end
369
370
 
370
371
  def _reduce_8(val, _values, result)
371
- result = :CHILD_SELECTOR
372
+ result = :CHILD_SELECTOR
372
373
  result
373
374
  end
374
375
 
@@ -378,7 +379,7 @@ def _reduce_9(val, _values, result)
378
379
  else
379
380
  Node.new(:CONDITIONAL_SELECTOR, [val.first, val[1]])
380
381
  end
381
-
382
+
382
383
  result
383
384
  end
384
385
 
@@ -386,13 +387,13 @@ end
386
387
 
387
388
  def _reduce_11(val, _values, result)
388
389
  result = Node.new(:CONDITIONAL_SELECTOR, val)
389
-
390
+
390
391
  result
391
392
  end
392
393
 
393
394
  def _reduce_12(val, _values, result)
394
395
  result = Node.new(:CONDITIONAL_SELECTOR, val)
395
-
396
+
396
397
  result
397
398
  end
398
399
 
@@ -400,39 +401,39 @@ def _reduce_13(val, _values, result)
400
401
  result = Node.new(:CONDITIONAL_SELECTOR,
401
402
  [Node.new(:ELEMENT_NAME, ['*']), val.first]
402
403
  )
403
-
404
+
404
405
  result
405
406
  end
406
407
 
407
408
  def _reduce_14(val, _values, result)
408
409
  result = Node.new(val.first, [nil, val.last])
409
-
410
+
410
411
  result
411
412
  end
412
413
 
413
414
  def _reduce_15(val, _values, result)
414
415
  result = Node.new(val[1], [val.first, val.last])
415
-
416
+
416
417
  result
417
418
  end
418
419
 
419
420
  def _reduce_16(val, _values, result)
420
421
  result = Node.new(:DESCENDANT_SELECTOR, [val.first, val.last])
421
-
422
+
422
423
  result
423
424
  end
424
425
 
425
426
  # reduce 17 omitted
426
427
 
427
428
  def _reduce_18(val, _values, result)
428
- result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])])
429
+ result = Node.new(:CLASS_CONDITION, [unescape_css_identifier(val[1])])
429
430
  result
430
431
  end
431
432
 
432
433
  # reduce 19 omitted
433
434
 
434
435
  def _reduce_20(val, _values, result)
435
- result = Node.new(:ELEMENT_NAME, val)
436
+ result = Node.new(:ELEMENT_NAME, val)
436
437
  result
437
438
  end
438
439
 
@@ -440,19 +441,19 @@ def _reduce_21(val, _values, result)
440
441
  result = Node.new(:ELEMENT_NAME,
441
442
  [[val.first, val.last].compact.join(':')]
442
443
  )
443
-
444
+
444
445
  result
445
446
  end
446
447
 
447
448
  def _reduce_22(val, _values, result)
448
449
  name = @namespaces.key?('xmlns') ? "xmlns:#{val.first}" : val.first
449
450
  result = Node.new(:ELEMENT_NAME, [name])
450
-
451
+
451
452
  result
452
453
  end
453
454
 
454
455
  def _reduce_23(val, _values, result)
455
- result = val[0]
456
+ result = val[0]
456
457
  result
457
458
  end
458
459
 
@@ -462,7 +463,7 @@ def _reduce_25(val, _values, result)
462
463
  result = Node.new(:ATTRIBUTE_CONDITION,
463
464
  [val[1]] + (val[2] || [])
464
465
  )
465
-
466
+
466
467
  result
467
468
  end
468
469
 
@@ -470,16 +471,16 @@ def _reduce_26(val, _values, result)
470
471
  result = Node.new(:ATTRIBUTE_CONDITION,
471
472
  [val[1]] + (val[2] || [])
472
473
  )
473
-
474
+
474
475
  result
475
476
  end
476
477
 
477
478
  def _reduce_27(val, _values, result)
478
- # Non standard, but hpricot supports it.
479
+ # non-standard, from hpricot
479
480
  result = Node.new(:PSEUDO_CLASS,
480
481
  [Node.new(:FUNCTION, ['nth-child(', val[1]])]
481
482
  )
482
-
483
+
483
484
  result
484
485
  end
485
486
 
@@ -487,7 +488,7 @@ def _reduce_28(val, _values, result)
487
488
  result = Node.new(:ELEMENT_NAME,
488
489
  [[val.first, val.last].compact.join(':')]
489
490
  )
490
-
491
+
491
492
  result
492
493
  end
493
494
 
@@ -495,52 +496,52 @@ def _reduce_29(val, _values, result)
495
496
  # Default namespace is not applied to attributes.
496
497
  # So we don't add prefix "xmlns:" as in namespaced_ident.
497
498
  result = Node.new(:ELEMENT_NAME, [val.first])
498
-
499
+
499
500
  result
500
501
  end
501
502
 
502
503
  def _reduce_30(val, _values, result)
503
504
  result = Node.new(:FUNCTION, [val.first.strip])
504
-
505
+
505
506
  result
506
507
  end
507
508
 
508
509
  def _reduce_31(val, _values, result)
509
510
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
510
-
511
+
511
512
  result
512
513
  end
513
514
 
514
515
  def _reduce_32(val, _values, result)
515
516
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
516
-
517
+
517
518
  result
518
519
  end
519
520
 
520
521
  def _reduce_33(val, _values, result)
521
522
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
522
-
523
+
523
524
  result
524
525
  end
525
526
 
526
527
  def _reduce_34(val, _values, result)
527
528
  result = Node.new(:FUNCTION, [val.first.strip, val[1]].flatten)
528
-
529
+
529
530
  result
530
531
  end
531
532
 
532
533
  def _reduce_35(val, _values, result)
533
- result = [val.first, val.last]
534
+ result = [val.first, val.last]
534
535
  result
535
536
  end
536
537
 
537
538
  def _reduce_36(val, _values, result)
538
- result = [val.first, val.last]
539
+ result = [val.first, val.last]
539
540
  result
540
541
  end
541
542
 
542
543
  def _reduce_37(val, _values, result)
543
- result = [val.first, val.last]
544
+ result = [val.first, val.last]
544
545
  result
545
546
  end
546
547
 
@@ -557,13 +558,13 @@ def _reduce_40(val, _values, result)
557
558
  when 'n'
558
559
  result = Node.new(:NTH, ['1','n','+','0'])
559
560
  else
560
- # This is not CSS standard. It allows us to support this:
561
+ # non-standard to support custom functions:
561
562
  # assert_xpath("//a[foo(., @href)]", @parser.parse('a:foo(@href)'))
562
563
  # assert_xpath("//a[foo(., @a, b)]", @parser.parse('a:foo(@a, b)'))
563
564
  # assert_xpath("//a[foo(., a, 10)]", @parser.parse('a:foo(a, 10)'))
564
565
  result = val
565
566
  end
566
-
567
+
567
568
  result
568
569
  end
569
570
 
@@ -573,7 +574,7 @@ def _reduce_41(val, _values, result)
573
574
  else
574
575
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
575
576
  end
576
-
577
+
577
578
  result
578
579
  end
579
580
 
@@ -589,7 +590,7 @@ def _reduce_42(val, _values, result)
589
590
  else
590
591
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
591
592
  end
592
-
593
+
593
594
  result
594
595
  end
595
596
 
@@ -609,18 +610,18 @@ def _reduce_43(val, _values, result)
609
610
  else
610
611
  raise Racc::ParseError, "parse error on IDENT '#{val[1]}'"
611
612
  end
612
-
613
+
613
614
  result
614
615
  end
615
616
 
616
617
  def _reduce_44(val, _values, result)
617
618
  result = Node.new(:PSEUDO_CLASS, [val[1]])
618
-
619
+
619
620
  result
620
621
  end
621
622
 
622
623
  def _reduce_45(val, _values, result)
623
- result = Node.new(:PSEUDO_CLASS, [val[1]])
624
+ result = Node.new(:PSEUDO_CLASS, [val[1]])
624
625
  result
625
626
  end
626
627
 
@@ -630,31 +631,31 @@ end
630
631
 
631
632
  def _reduce_48(val, _values, result)
632
633
  result = Node.new(:COMBINATOR, val)
633
-
634
+
634
635
  result
635
636
  end
636
637
 
637
638
  def _reduce_49(val, _values, result)
638
639
  result = Node.new(:COMBINATOR, val)
639
-
640
+
640
641
  result
641
642
  end
642
643
 
643
644
  def _reduce_50(val, _values, result)
644
645
  result = Node.new(:COMBINATOR, val)
645
-
646
+
646
647
  result
647
648
  end
648
649
 
649
650
  def _reduce_51(val, _values, result)
650
651
  result = Node.new(:COMBINATOR, val)
651
-
652
+
652
653
  result
653
654
  end
654
655
 
655
656
  def _reduce_52(val, _values, result)
656
657
  result = Node.new(:COMBINATOR, val)
657
-
658
+
658
659
  result
659
660
  end
660
661
 
@@ -669,65 +670,65 @@ end
669
670
  # reduce 57 omitted
670
671
 
671
672
  def _reduce_58(val, _values, result)
672
- result = Node.new(:ID, [unescape_css_identifier(val.first)])
673
+ result = Node.new(:ID, [unescape_css_identifier(val.first)])
673
674
  result
674
675
  end
675
676
 
676
677
  def _reduce_59(val, _values, result)
677
- result = [val.first, unescape_css_identifier(val[1])]
678
+ result = [val.first, unescape_css_identifier(val[1])]
678
679
  result
679
680
  end
680
681
 
681
682
  def _reduce_60(val, _values, result)
682
- result = [val.first, unescape_css_string(val[1])]
683
+ result = [val.first, unescape_css_string(val[1])]
683
684
  result
684
685
  end
685
686
 
686
687
  def _reduce_61(val, _values, result)
687
- result = [val.first, val[1]]
688
+ result = [val.first, val[1]]
688
689
  result
689
690
  end
690
691
 
691
692
  # reduce 62 omitted
692
693
 
693
694
  def _reduce_63(val, _values, result)
694
- result = :equal
695
+ result = :equal
695
696
  result
696
697
  end
697
698
 
698
699
  def _reduce_64(val, _values, result)
699
- result = :prefix_match
700
+ result = :prefix_match
700
701
  result
701
702
  end
702
703
 
703
704
  def _reduce_65(val, _values, result)
704
- result = :suffix_match
705
+ result = :suffix_match
705
706
  result
706
707
  end
707
708
 
708
709
  def _reduce_66(val, _values, result)
709
- result = :substring_match
710
+ result = :substring_match
710
711
  result
711
712
  end
712
713
 
713
714
  def _reduce_67(val, _values, result)
714
- result = :not_equal
715
+ result = :not_equal
715
716
  result
716
717
  end
717
718
 
718
719
  def _reduce_68(val, _values, result)
719
- result = :includes
720
+ result = :includes
720
721
  result
721
722
  end
722
723
 
723
724
  def _reduce_69(val, _values, result)
724
- result = :dash_match
725
+ result = :dash_match
725
726
  result
726
727
  end
727
728
 
728
729
  def _reduce_70(val, _values, result)
729
730
  result = Node.new(:NOT, [val[1]])
730
-
731
+
731
732
  result
732
733
  end
733
734
 
@@ -746,5 +747,5 @@ def _reduce_none(val, _values, result)
746
747
  end
747
748
 
748
749
  end # class Parser
749
- end # module CSS
750
- end # module Nokogiri
750
+ end # module CSS
751
+ end # module Nokogiri