nokogiri 1.10.10 → 1.13.9

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (220) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +5 -0
  3. data/LICENSE-DEPENDENCIES.md +1173 -884
  4. data/LICENSE.md +1 -1
  5. data/README.md +178 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +13 -64
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +761 -424
  10. data/ext/nokogiri/gumbo.c +584 -0
  11. data/ext/nokogiri/html4_document.c +166 -0
  12. data/ext/nokogiri/html4_element_description.c +294 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +119 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +228 -91
  18. data/ext/nokogiri/nokogiri.h +199 -88
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +21 -21
  22. data/ext/nokogiri/xml_cdata.c +14 -19
  23. data/ext/nokogiri/xml_comment.c +19 -26
  24. data/ext/nokogiri/xml_document.c +296 -220
  25. data/ext/nokogiri/xml_document_fragment.c +12 -16
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +31 -26
  28. data/ext/nokogiri/xml_element_decl.c +25 -25
  29. data/ext/nokogiri/xml_encoding_handler.c +43 -18
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +16 -18
  32. data/ext/nokogiri/xml_namespace.c +98 -53
  33. data/ext/nokogiri/xml_node.c +1065 -653
  34. data/ext/nokogiri/xml_node_set.c +178 -166
  35. data/ext/nokogiri/xml_processing_instruction.c +17 -19
  36. data/ext/nokogiri/xml_reader.c +277 -175
  37. data/ext/nokogiri/xml_relax_ng.c +52 -28
  38. data/ext/nokogiri/xml_sax_parser.c +112 -112
  39. data/ext/nokogiri/xml_sax_parser_context.c +112 -86
  40. data/ext/nokogiri/xml_sax_push_parser.c +36 -27
  41. data/ext/nokogiri/xml_schema.c +98 -48
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +14 -18
  44. data/ext/nokogiri/xml_xpath_context.c +226 -115
  45. data/ext/nokogiri/xslt_stylesheet.c +265 -173
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +101 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +626 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +104 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/gumbo.h +943 -0
  63. data/gumbo-parser/src/insertion_mode.h +33 -0
  64. data/gumbo-parser/src/macros.h +91 -0
  65. data/gumbo-parser/src/parser.c +4875 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +222 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +169 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3463 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +68 -0
  88. data/gumbo-parser/src/util.h +30 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +218 -91
  100. data/lib/nokogiri/css.rb +50 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/extension.rb +31 -0
  103. data/lib/nokogiri/gumbo.rb +15 -0
  104. data/lib/nokogiri/html.rb +38 -27
  105. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  106. data/lib/nokogiri/{html → html4}/document.rb +103 -105
  107. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  108. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  109. data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
  110. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  111. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  112. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  113. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  114. data/lib/nokogiri/html4.rb +46 -0
  115. data/lib/nokogiri/html5/document.rb +91 -0
  116. data/lib/nokogiri/html5/document_fragment.rb +83 -0
  117. data/lib/nokogiri/html5/node.rb +100 -0
  118. data/lib/nokogiri/html5.rb +478 -0
  119. data/lib/nokogiri/jruby/dependencies.rb +21 -0
  120. data/lib/nokogiri/syntax_error.rb +2 -0
  121. data/lib/nokogiri/version/constant.rb +6 -0
  122. data/lib/nokogiri/version/info.rb +222 -0
  123. data/lib/nokogiri/version.rb +3 -108
  124. data/lib/nokogiri/xml/attr.rb +6 -3
  125. data/lib/nokogiri/xml/attribute_decl.rb +3 -1
  126. data/lib/nokogiri/xml/builder.rb +74 -33
  127. data/lib/nokogiri/xml/cdata.rb +3 -1
  128. data/lib/nokogiri/xml/character_data.rb +2 -0
  129. data/lib/nokogiri/xml/document.rb +224 -86
  130. data/lib/nokogiri/xml/document_fragment.rb +46 -44
  131. data/lib/nokogiri/xml/dtd.rb +4 -2
  132. data/lib/nokogiri/xml/element_content.rb +2 -0
  133. data/lib/nokogiri/xml/element_decl.rb +3 -1
  134. data/lib/nokogiri/xml/entity_decl.rb +4 -2
  135. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  136. data/lib/nokogiri/xml/namespace.rb +3 -0
  137. data/lib/nokogiri/xml/node/save_options.rb +10 -5
  138. data/lib/nokogiri/xml/node.rb +884 -378
  139. data/lib/nokogiri/xml/node_set.rb +51 -54
  140. data/lib/nokogiri/xml/notation.rb +13 -0
  141. data/lib/nokogiri/xml/parse_options.rb +22 -8
  142. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  143. data/lib/nokogiri/xml/pp/node.rb +25 -26
  144. data/lib/nokogiri/xml/pp.rb +4 -2
  145. data/lib/nokogiri/xml/processing_instruction.rb +3 -1
  146. data/lib/nokogiri/xml/reader.rb +21 -28
  147. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  148. data/lib/nokogiri/xml/sax/document.rb +45 -49
  149. data/lib/nokogiri/xml/sax/parser.rb +38 -34
  150. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  151. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  152. data/lib/nokogiri/xml/sax.rb +6 -4
  153. data/lib/nokogiri/xml/schema.rb +19 -9
  154. data/lib/nokogiri/xml/searchable.rb +112 -72
  155. data/lib/nokogiri/xml/syntax_error.rb +6 -4
  156. data/lib/nokogiri/xml/text.rb +2 -0
  157. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  158. data/lib/nokogiri/xml/xpath.rb +15 -4
  159. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  160. data/lib/nokogiri/xml.rb +38 -37
  161. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  162. data/lib/nokogiri/xslt.rb +29 -20
  163. data/lib/nokogiri.rb +49 -65
  164. data/lib/xsd/xmlparser/nokogiri.rb +26 -24
  165. data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  166. data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
  167. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  168. data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
  171. data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
  172. data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
  173. metadata +189 -142
  174. data/ext/nokogiri/html_document.c +0 -170
  175. data/ext/nokogiri/html_document.h +0 -10
  176. data/ext/nokogiri/html_element_description.c +0 -279
  177. data/ext/nokogiri/html_element_description.h +0 -10
  178. data/ext/nokogiri/html_entity_lookup.c +0 -32
  179. data/ext/nokogiri/html_entity_lookup.h +0 -8
  180. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  181. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  182. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  183. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  184. data/ext/nokogiri/xml_attr.h +0 -9
  185. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  186. data/ext/nokogiri/xml_cdata.h +0 -9
  187. data/ext/nokogiri/xml_comment.h +0 -9
  188. data/ext/nokogiri/xml_document.h +0 -23
  189. data/ext/nokogiri/xml_document_fragment.h +0 -10
  190. data/ext/nokogiri/xml_dtd.h +0 -10
  191. data/ext/nokogiri/xml_element_content.h +0 -10
  192. data/ext/nokogiri/xml_element_decl.h +0 -9
  193. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  194. data/ext/nokogiri/xml_entity_decl.h +0 -10
  195. data/ext/nokogiri/xml_entity_reference.h +0 -9
  196. data/ext/nokogiri/xml_io.c +0 -61
  197. data/ext/nokogiri/xml_io.h +0 -11
  198. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  199. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  200. data/ext/nokogiri/xml_namespace.h +0 -14
  201. data/ext/nokogiri/xml_node.h +0 -13
  202. data/ext/nokogiri/xml_node_set.h +0 -12
  203. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  204. data/ext/nokogiri/xml_reader.h +0 -10
  205. data/ext/nokogiri/xml_relax_ng.h +0 -9
  206. data/ext/nokogiri/xml_sax_parser.h +0 -39
  207. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  208. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  209. data/ext/nokogiri/xml_schema.h +0 -9
  210. data/ext/nokogiri/xml_syntax_error.h +0 -13
  211. data/ext/nokogiri/xml_text.h +0 -9
  212. data/ext/nokogiri/xml_xpath_context.h +0 -10
  213. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  214. data/lib/nokogiri/html/document_fragment.rb +0 -49
  215. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  216. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  217. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  218. data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
  219. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  220. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -0,0 +1,67 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ require "set"
5
+
6
+ module Nokogiri
7
+ #
8
+ # Some classes in Nokogiri are namespaced as a group, for example
9
+ # Document, DocumentFragment, and Builder.
10
+ #
11
+ # It's sometimes necessary to look up the related class, e.g.:
12
+ #
13
+ # XML::Builder → XML::Document
14
+ # HTML4::Builder → HTML4::Document
15
+ # HTML5::Document → HTML5::DocumentFragment
16
+ #
17
+ # This module is included into those key classes who need to do this.
18
+ #
19
+ module ClassResolver
20
+ # #related_class restricts matching namespaces to those matching this set.
21
+ VALID_NAMESPACES = Set.new(["HTML", "HTML4", "HTML5", "XML"])
22
+
23
+ # :call-seq:
24
+ # related_class(class_name) → Class
25
+ #
26
+ # Find a class constant within the
27
+ #
28
+ # Some examples:
29
+ #
30
+ # Nokogiri::XML::Document.new.related_class("DocumentFragment")
31
+ # # => Nokogiri::XML::DocumentFragment
32
+ # Nokogiri::HTML4::Document.new.related_class("DocumentFragment")
33
+ # # => Nokogiri::HTML4::DocumentFragment
34
+ #
35
+ # Note this will also work for subclasses that follow the same convention, e.g.:
36
+ #
37
+ # Loofah::HTML::Document.new.related_class("DocumentFragment")
38
+ # # => Loofah::HTML::DocumentFragment
39
+ #
40
+ # And even if it's a subclass, this will iterate through the superclasses:
41
+ #
42
+ # class ThisIsATopLevelClass < Nokogiri::HTML4::Builder ; end
43
+ # ThisIsATopLevelClass.new.related_class("Document")
44
+ # # => Nokogiri::HTML4::Document
45
+ #
46
+ def related_class(class_name)
47
+ klass = nil
48
+ inspecting = self.class
49
+
50
+ while inspecting
51
+ namespace_path = inspecting.name.split("::")[0..-2]
52
+ inspecting = inspecting.superclass
53
+
54
+ next unless VALID_NAMESPACES.include?(namespace_path.last)
55
+
56
+ related_class_name = (namespace_path << class_name).join("::")
57
+ klass = begin
58
+ Object.const_get(related_class_name)
59
+ rescue NameError
60
+ nil
61
+ end
62
+ break if klass
63
+ end
64
+ klass
65
+ end
66
+ end
67
+ end
@@ -1,6 +1,8 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module CSS
3
- class Node
5
+ class Node # :nodoc:
4
6
  ALLOW_COMBINATOR_ON_SELF = [:DIRECT_ADJACENT_SELECTOR, :FOLLOWING_SELECTOR, :CHILD_SELECTOR]
5
7
 
6
8
  # Get the type of this node
@@ -9,25 +11,25 @@ module Nokogiri
9
11
  attr_accessor :value
10
12
 
11
13
  # Create a new Node with +type+ and +value+
12
- def initialize type, value
14
+ def initialize(type, value)
13
15
  @type = type
14
16
  @value = value
15
17
  end
16
18
 
17
19
  # Accept +visitor+
18
- def accept visitor
20
+ def accept(visitor)
19
21
  visitor.send(:"visit_#{type.to_s.downcase}", self)
20
22
  end
21
23
 
22
24
  ###
23
25
  # Convert this CSS node to xpath with +prefix+ using +visitor+
24
- def to_xpath prefix = '//', visitor = XPathVisitor.new
25
- prefix = '.' if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
26
+ def to_xpath(prefix, visitor)
27
+ prefix = "." if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
26
28
  prefix + visitor.accept(self)
27
29
  end
28
30
 
29
31
  # Find a node by type using +types+
30
- def find_by_type types
32
+ def find_by_type(types)
31
33
  matches = []
32
34
  matches << self if to_type == types
33
35
  @value.each do |v|
@@ -38,9 +40,9 @@ module Nokogiri
38
40
 
39
41
  # Convert to_type
40
42
  def to_type
41
- [@type] + @value.map { |n|
43
+ [@type] + @value.map do |n|
42
44
  n.to_type if n.respond_to?(:to_type)
43
- }.compact
45
+ end.compact
44
46
  end
45
47
 
46
48
  # Convert to array