nokogiri 1.10.7 → 1.16.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (224) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +42 -0
  3. data/LICENSE-DEPENDENCIES.md +1632 -1022
  4. data/LICENSE.md +1 -1
  5. data/README.md +188 -96
  6. data/bin/nokogiri +63 -50
  7. data/dependencies.yml +34 -66
  8. data/ext/nokogiri/depend +38 -358
  9. data/ext/nokogiri/extconf.rb +862 -421
  10. data/ext/nokogiri/gumbo.c +594 -0
  11. data/ext/nokogiri/html4_document.c +165 -0
  12. data/ext/nokogiri/html4_element_description.c +299 -0
  13. data/ext/nokogiri/html4_entity_lookup.c +37 -0
  14. data/ext/nokogiri/html4_sax_parser_context.c +108 -0
  15. data/ext/nokogiri/html4_sax_push_parser.c +95 -0
  16. data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
  17. data/ext/nokogiri/nokogiri.c +251 -105
  18. data/ext/nokogiri/nokogiri.h +222 -90
  19. data/ext/nokogiri/test_global_handlers.c +40 -0
  20. data/ext/nokogiri/xml_attr.c +17 -17
  21. data/ext/nokogiri/xml_attribute_decl.c +22 -22
  22. data/ext/nokogiri/xml_cdata.c +39 -31
  23. data/ext/nokogiri/xml_comment.c +20 -27
  24. data/ext/nokogiri/xml_document.c +408 -243
  25. data/ext/nokogiri/xml_document_fragment.c +13 -17
  26. data/ext/nokogiri/xml_dtd.c +64 -58
  27. data/ext/nokogiri/xml_element_content.c +63 -55
  28. data/ext/nokogiri/xml_element_decl.c +31 -31
  29. data/ext/nokogiri/xml_encoding_handler.c +54 -21
  30. data/ext/nokogiri/xml_entity_decl.c +37 -35
  31. data/ext/nokogiri/xml_entity_reference.c +17 -19
  32. data/ext/nokogiri/xml_namespace.c +131 -61
  33. data/ext/nokogiri/xml_node.c +1343 -674
  34. data/ext/nokogiri/xml_node_set.c +246 -216
  35. data/ext/nokogiri/xml_processing_instruction.c +18 -20
  36. data/ext/nokogiri/xml_reader.c +305 -213
  37. data/ext/nokogiri/xml_relax_ng.c +87 -78
  38. data/ext/nokogiri/xml_sax_parser.c +149 -124
  39. data/ext/nokogiri/xml_sax_parser_context.c +149 -103
  40. data/ext/nokogiri/xml_sax_push_parser.c +65 -37
  41. data/ext/nokogiri/xml_schema.c +138 -82
  42. data/ext/nokogiri/xml_syntax_error.c +42 -21
  43. data/ext/nokogiri/xml_text.c +35 -26
  44. data/ext/nokogiri/xml_xpath_context.c +363 -178
  45. data/ext/nokogiri/xslt_stylesheet.c +335 -189
  46. data/gumbo-parser/CHANGES.md +63 -0
  47. data/gumbo-parser/Makefile +126 -0
  48. data/gumbo-parser/THANKS +27 -0
  49. data/gumbo-parser/src/Makefile +34 -0
  50. data/gumbo-parser/src/README.md +41 -0
  51. data/gumbo-parser/src/ascii.c +75 -0
  52. data/gumbo-parser/src/ascii.h +115 -0
  53. data/gumbo-parser/src/attribute.c +42 -0
  54. data/gumbo-parser/src/attribute.h +17 -0
  55. data/gumbo-parser/src/char_ref.c +22225 -0
  56. data/gumbo-parser/src/char_ref.h +29 -0
  57. data/gumbo-parser/src/char_ref.rl +2154 -0
  58. data/gumbo-parser/src/error.c +630 -0
  59. data/gumbo-parser/src/error.h +148 -0
  60. data/gumbo-parser/src/foreign_attrs.c +103 -0
  61. data/gumbo-parser/src/foreign_attrs.gperf +27 -0
  62. data/gumbo-parser/src/insertion_mode.h +33 -0
  63. data/gumbo-parser/src/macros.h +91 -0
  64. data/gumbo-parser/src/nokogiri_gumbo.h +944 -0
  65. data/gumbo-parser/src/parser.c +4891 -0
  66. data/gumbo-parser/src/parser.h +41 -0
  67. data/gumbo-parser/src/replacement.h +33 -0
  68. data/gumbo-parser/src/string_buffer.c +103 -0
  69. data/gumbo-parser/src/string_buffer.h +68 -0
  70. data/gumbo-parser/src/string_piece.c +48 -0
  71. data/gumbo-parser/src/svg_attrs.c +174 -0
  72. data/gumbo-parser/src/svg_attrs.gperf +77 -0
  73. data/gumbo-parser/src/svg_tags.c +137 -0
  74. data/gumbo-parser/src/svg_tags.gperf +55 -0
  75. data/gumbo-parser/src/tag.c +223 -0
  76. data/gumbo-parser/src/tag_lookup.c +382 -0
  77. data/gumbo-parser/src/tag_lookup.gperf +170 -0
  78. data/gumbo-parser/src/tag_lookup.h +13 -0
  79. data/gumbo-parser/src/token_buffer.c +79 -0
  80. data/gumbo-parser/src/token_buffer.h +71 -0
  81. data/gumbo-parser/src/token_type.h +17 -0
  82. data/gumbo-parser/src/tokenizer.c +3464 -0
  83. data/gumbo-parser/src/tokenizer.h +112 -0
  84. data/gumbo-parser/src/tokenizer_states.h +339 -0
  85. data/gumbo-parser/src/utf8.c +245 -0
  86. data/gumbo-parser/src/utf8.h +164 -0
  87. data/gumbo-parser/src/util.c +66 -0
  88. data/gumbo-parser/src/util.h +34 -0
  89. data/gumbo-parser/src/vector.c +111 -0
  90. data/gumbo-parser/src/vector.h +45 -0
  91. data/lib/nokogiri/class_resolver.rb +67 -0
  92. data/lib/nokogiri/css/node.rb +10 -8
  93. data/lib/nokogiri/css/parser.rb +397 -377
  94. data/lib/nokogiri/css/parser.y +250 -245
  95. data/lib/nokogiri/css/parser_extras.rb +54 -49
  96. data/lib/nokogiri/css/syntax_error.rb +3 -1
  97. data/lib/nokogiri/css/tokenizer.rb +5 -3
  98. data/lib/nokogiri/css/tokenizer.rex +3 -2
  99. data/lib/nokogiri/css/xpath_visitor.rb +205 -96
  100. data/lib/nokogiri/css.rb +56 -17
  101. data/lib/nokogiri/decorators/slop.rb +9 -7
  102. data/lib/nokogiri/encoding_handler.rb +57 -0
  103. data/lib/nokogiri/extension.rb +32 -0
  104. data/lib/nokogiri/gumbo.rb +15 -0
  105. data/lib/nokogiri/html.rb +38 -27
  106. data/lib/nokogiri/{html → html4}/builder.rb +4 -2
  107. data/lib/nokogiri/html4/document.rb +214 -0
  108. data/lib/nokogiri/html4/document_fragment.rb +54 -0
  109. data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
  110. data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
  111. data/lib/nokogiri/html4/encoding_reader.rb +121 -0
  112. data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
  113. data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
  114. data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
  115. data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
  116. data/lib/nokogiri/html4.rb +47 -0
  117. data/lib/nokogiri/html5/document.rb +168 -0
  118. data/lib/nokogiri/html5/document_fragment.rb +90 -0
  119. data/lib/nokogiri/html5/node.rb +103 -0
  120. data/lib/nokogiri/html5.rb +326 -0
  121. data/lib/nokogiri/jruby/dependencies.rb +3 -0
  122. data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
  123. data/lib/nokogiri/syntax_error.rb +2 -0
  124. data/lib/nokogiri/version/constant.rb +6 -0
  125. data/lib/nokogiri/version/info.rb +224 -0
  126. data/lib/nokogiri/version.rb +3 -108
  127. data/lib/nokogiri/xml/attr.rb +55 -3
  128. data/lib/nokogiri/xml/attribute_decl.rb +6 -2
  129. data/lib/nokogiri/xml/builder.rb +75 -34
  130. data/lib/nokogiri/xml/cdata.rb +3 -1
  131. data/lib/nokogiri/xml/character_data.rb +2 -0
  132. data/lib/nokogiri/xml/document.rb +312 -127
  133. data/lib/nokogiri/xml/document_fragment.rb +93 -48
  134. data/lib/nokogiri/xml/dtd.rb +4 -2
  135. data/lib/nokogiri/xml/element_content.rb +12 -2
  136. data/lib/nokogiri/xml/element_decl.rb +6 -2
  137. data/lib/nokogiri/xml/entity_decl.rb +7 -3
  138. data/lib/nokogiri/xml/entity_reference.rb +2 -0
  139. data/lib/nokogiri/xml/namespace.rb +44 -0
  140. data/lib/nokogiri/xml/node/save_options.rb +23 -8
  141. data/lib/nokogiri/xml/node.rb +1096 -419
  142. data/lib/nokogiri/xml/node_set.rb +137 -61
  143. data/lib/nokogiri/xml/notation.rb +13 -0
  144. data/lib/nokogiri/xml/parse_options.rb +145 -52
  145. data/lib/nokogiri/xml/pp/character_data.rb +9 -6
  146. data/lib/nokogiri/xml/pp/node.rb +42 -30
  147. data/lib/nokogiri/xml/pp.rb +4 -2
  148. data/lib/nokogiri/xml/processing_instruction.rb +4 -1
  149. data/lib/nokogiri/xml/reader.rb +21 -28
  150. data/lib/nokogiri/xml/relax_ng.rb +8 -2
  151. data/lib/nokogiri/xml/sax/document.rb +45 -49
  152. data/lib/nokogiri/xml/sax/parser.rb +39 -36
  153. data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
  154. data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
  155. data/lib/nokogiri/xml/sax.rb +6 -4
  156. data/lib/nokogiri/xml/schema.rb +19 -9
  157. data/lib/nokogiri/xml/searchable.rb +120 -72
  158. data/lib/nokogiri/xml/syntax_error.rb +7 -5
  159. data/lib/nokogiri/xml/text.rb +2 -0
  160. data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
  161. data/lib/nokogiri/xml/xpath.rb +15 -4
  162. data/lib/nokogiri/xml/xpath_context.rb +3 -3
  163. data/lib/nokogiri/xml.rb +39 -38
  164. data/lib/nokogiri/xslt/stylesheet.rb +3 -1
  165. data/lib/nokogiri/xslt.rb +101 -22
  166. data/lib/nokogiri.rb +59 -75
  167. data/lib/xsd/xmlparser/nokogiri.rb +29 -25
  168. data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
  169. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  170. data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
  171. data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
  172. data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
  173. data/ports/archives/libxml2-2.12.3.tar.xz +0 -0
  174. data/ports/archives/libxslt-1.1.39.tar.xz +0 -0
  175. metadata +121 -291
  176. data/ext/nokogiri/html_document.c +0 -170
  177. data/ext/nokogiri/html_document.h +0 -10
  178. data/ext/nokogiri/html_element_description.c +0 -279
  179. data/ext/nokogiri/html_element_description.h +0 -10
  180. data/ext/nokogiri/html_entity_lookup.c +0 -32
  181. data/ext/nokogiri/html_entity_lookup.h +0 -8
  182. data/ext/nokogiri/html_sax_parser_context.c +0 -116
  183. data/ext/nokogiri/html_sax_parser_context.h +0 -11
  184. data/ext/nokogiri/html_sax_push_parser.c +0 -87
  185. data/ext/nokogiri/html_sax_push_parser.h +0 -9
  186. data/ext/nokogiri/xml_attr.h +0 -9
  187. data/ext/nokogiri/xml_attribute_decl.h +0 -9
  188. data/ext/nokogiri/xml_cdata.h +0 -9
  189. data/ext/nokogiri/xml_comment.h +0 -9
  190. data/ext/nokogiri/xml_document.h +0 -23
  191. data/ext/nokogiri/xml_document_fragment.h +0 -10
  192. data/ext/nokogiri/xml_dtd.h +0 -10
  193. data/ext/nokogiri/xml_element_content.h +0 -10
  194. data/ext/nokogiri/xml_element_decl.h +0 -9
  195. data/ext/nokogiri/xml_encoding_handler.h +0 -8
  196. data/ext/nokogiri/xml_entity_decl.h +0 -10
  197. data/ext/nokogiri/xml_entity_reference.h +0 -9
  198. data/ext/nokogiri/xml_io.c +0 -61
  199. data/ext/nokogiri/xml_io.h +0 -11
  200. data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
  201. data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
  202. data/ext/nokogiri/xml_namespace.h +0 -14
  203. data/ext/nokogiri/xml_node.h +0 -13
  204. data/ext/nokogiri/xml_node_set.h +0 -12
  205. data/ext/nokogiri/xml_processing_instruction.h +0 -9
  206. data/ext/nokogiri/xml_reader.h +0 -10
  207. data/ext/nokogiri/xml_relax_ng.h +0 -9
  208. data/ext/nokogiri/xml_sax_parser.h +0 -39
  209. data/ext/nokogiri/xml_sax_parser_context.h +0 -10
  210. data/ext/nokogiri/xml_sax_push_parser.h +0 -9
  211. data/ext/nokogiri/xml_schema.h +0 -9
  212. data/ext/nokogiri/xml_syntax_error.h +0 -13
  213. data/ext/nokogiri/xml_text.h +0 -9
  214. data/ext/nokogiri/xml_xpath_context.h +0 -10
  215. data/ext/nokogiri/xslt_stylesheet.h +0 -14
  216. data/lib/nokogiri/html/document.rb +0 -335
  217. data/lib/nokogiri/html/document_fragment.rb +0 -49
  218. data/lib/nokogiri/html/element_description_defaults.rb +0 -671
  219. data/lib/nokogiri/html/sax/parser_context.rb +0 -16
  220. data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
  221. data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
  222. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
  223. /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
  224. /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,3 +1,4 @@
1
+ # frozen_string_literal: true
1
2
  #--
2
3
  # DO NOT MODIFY!!!!
3
4
  # This file is automatically generated by rex 1.0.7
@@ -6,7 +7,8 @@
6
7
 
7
8
  module Nokogiri
8
9
  module CSS
9
- class Tokenizer # :nodoc:
10
+ # :nodoc: all
11
+ class Tokenizer
10
12
  require 'strscan'
11
13
 
12
14
  class ScanError < StandardError ; end
@@ -61,10 +63,10 @@ class Tokenizer # :nodoc:
61
63
  when (text = @ss.scan(/has\([\s]*/))
62
64
  action { [:HAS, text] }
63
65
 
64
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
65
67
  action { [:FUNCTION, text] }
66
68
 
67
- when (text = @ss.scan(/[-@]?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
68
70
  action { [:IDENT, text] }
69
71
 
70
72
  when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
@@ -1,6 +1,7 @@
1
1
  module Nokogiri
2
2
  module CSS
3
- class Tokenizer # :nodoc:
3
+ # :nodoc: all
4
+ class Tokenizer
4
5
 
5
6
  macro
6
7
  nl \n|\r\n|\r|\f
@@ -12,7 +13,7 @@ macro
12
13
  escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
13
14
  nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
14
15
  nmstart [_A-Za-z]|{nonascii}|{escape}
15
- ident [-@]?({nmstart})({nmchar})*
16
+ ident -?({nmstart})({nmchar})*
16
17
  name ({nmchar})+
17
18
  string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
18
19
  string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
@@ -1,64 +1,143 @@
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
3
+
1
4
  module Nokogiri
2
5
  module CSS
3
- class XPathVisitor # :nodoc:
4
- def visit_function node
6
+ # When translating CSS selectors to XPath queries with Nokogiri::CSS.xpath_for, the XPathVisitor
7
+ # class allows for changing some of the behaviors related to builtin xpath functions and quirks
8
+ # of HTML5.
9
+ class XPathVisitor
10
+ WILDCARD_NAMESPACES = Nokogiri.libxml2_patches.include?("0009-allow-wildcard-namespaces.patch") # :nodoc:
11
+
12
+ # Enum to direct XPathVisitor when to use Nokogiri builtin XPath functions.
13
+ module BuiltinsConfig
14
+ # Never use Nokogiri builtin functions, always generate vanilla XPath 1.0 queries. This is
15
+ # the default when calling Nokogiri::CSS.xpath_for directly.
16
+ NEVER = :never
17
+
18
+ # Always use Nokogiri builtin functions whenever possible. This is probably only useful for testing.
19
+ ALWAYS = :always
20
+
21
+ # Only use Nokogiri builtin functions when they will be faster than vanilla XPath. This is
22
+ # the behavior chosen when searching for CSS selectors on a Nokogiri document, fragment, or
23
+ # node.
24
+ OPTIMAL = :optimal
25
+
26
+ # :nodoc: array of values for validation
27
+ VALUES = [NEVER, ALWAYS, OPTIMAL]
28
+ end
29
+
30
+ # Enum to direct XPathVisitor when to tweak the XPath query to suit the nature of the document
31
+ # being searched. Note that searches for CSS selectors from a Nokogiri document, fragment, or
32
+ # node will choose the correct option automatically.
33
+ module DoctypeConfig
34
+ # The document being searched is an XML document. This is the default.
35
+ XML = :xml
36
+
37
+ # The document being searched is an HTML4 document.
38
+ HTML4 = :html4
39
+
40
+ # The document being searched is an HTML5 document.
41
+ HTML5 = :html5
42
+
43
+ # :nodoc: array of values for validation
44
+ VALUES = [XML, HTML4, HTML5]
45
+ end
46
+
47
+ # :call-seq:
48
+ # new() → XPathVisitor
49
+ # new(builtins:, doctype:) → XPathVisitor
50
+ #
51
+ # [Parameters]
52
+ # - +builtins:+ (BuiltinsConfig) Determine when to use Nokogiri's built-in xpath functions for performance improvements.
53
+ # - +doctype:+ (DoctypeConfig) Make document-type-specific accommodations for CSS queries.
54
+ #
55
+ # [Returns] XPathVisitor
56
+ #
57
+ def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
58
+ unless BuiltinsConfig::VALUES.include?(builtins)
59
+ raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
60
+ end
61
+ unless DoctypeConfig::VALUES.include?(doctype)
62
+ raise(ArgumentError, "Invalid values #{doctype.inspect} for doctype: keyword parameter")
63
+ end
64
+
65
+ @builtins = builtins
66
+ @doctype = doctype
67
+ end
68
+
69
+ # :call-seq: config() → Hash
70
+ #
71
+ # [Returns]
72
+ # a Hash representing the configuration of the XPathVisitor, suitable for use as
73
+ # part of the CSS cache key.
74
+ def config
75
+ { builtins: @builtins, doctype: @doctype }
76
+ end
5
77
 
6
- msg = :"visit_function_#{node.value.first.gsub(/[(]/, '')}"
7
- return self.send(msg, node) if self.respond_to?(msg)
78
+ # :stopdoc:
79
+ def visit_function(node)
80
+ msg = :"visit_function_#{node.value.first.gsub(/[(]/, "")}"
81
+ return send(msg, node) if respond_to?(msg)
8
82
 
9
83
  case node.value.first
10
84
  when /^text\(/
11
- 'child::text()'
85
+ "child::text()"
12
86
  when /^self\(/
13
87
  "self::#{node.value[1]}"
14
88
  when /^eq\(/
15
- "position() = #{node.value[1]}"
89
+ "position()=#{node.value[1]}"
16
90
  when /^(nth|nth-of-type)\(/
17
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
91
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
18
92
  nth(node.value[1])
19
93
  else
20
- "position() = #{node.value[1]}"
94
+ "position()=#{node.value[1]}"
21
95
  end
22
96
  when /^nth-child\(/
23
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
24
- nth(node.value[1], :child => true)
97
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
98
+ nth(node.value[1], child: true)
25
99
  else
26
- "count(preceding-sibling::*) = #{node.value[1].to_i-1}"
100
+ "count(preceding-sibling::*)=#{node.value[1].to_i - 1}"
27
101
  end
28
102
  when /^nth-last-of-type\(/
29
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
30
- nth(node.value[1], :last => true)
103
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
104
+ nth(node.value[1], last: true)
31
105
  else
32
106
  index = node.value[1].to_i - 1
33
- index == 0 ? "position() = last()" : "position() = last() - #{index}"
107
+ index == 0 ? "position()=last()" : "position()=last()-#{index}"
34
108
  end
35
109
  when /^nth-last-child\(/
36
- if node.value[1].is_a?(Nokogiri::CSS::Node) and node.value[1].type == :NTH
37
- nth(node.value[1], :last => true, :child => true)
110
+ if node.value[1].is_a?(Nokogiri::CSS::Node) && (node.value[1].type == :NTH)
111
+ nth(node.value[1], last: true, child: true)
38
112
  else
39
- "count(following-sibling::*) = #{node.value[1].to_i-1}"
113
+ "count(following-sibling::*)=#{node.value[1].to_i - 1}"
40
114
  end
41
115
  when /^(first|first-of-type)\(/
42
- "position() = 1"
116
+ "position()=1"
43
117
  when /^(last|last-of-type)\(/
44
- "position() = last()"
118
+ "position()=last()"
45
119
  when /^contains\(/
46
- "contains(., #{node.value[1]})"
120
+ "contains(.,#{node.value[1]})"
47
121
  when /^gt\(/
48
- "position() > #{node.value[1]}"
122
+ "position()>#{node.value[1]}"
49
123
  when /^only-child\(/
50
- "last() = 1"
124
+ "last()=1"
51
125
  when /^comment\(/
52
126
  "comment()"
53
127
  when /^has\(/
54
- ".//#{node.value[1].accept(self)}"
128
+ is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
129
+ ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
55
130
  else
56
- args = ['.'] + node.value[1..-1]
57
- "#{node.value.first}#{args.join(', ')})"
131
+ # xpath function call, let's marshal those arguments
132
+ args = ["."]
133
+ args += node.value[1..-1].map do |n|
134
+ n.is_a?(Nokogiri::CSS::Node) ? n.accept(self) : n
135
+ end
136
+ "nokogiri:#{node.value.first}#{args.join(",")})"
58
137
  end
59
138
  end
60
139
 
61
- def visit_not node
140
+ def visit_not(node)
62
141
  child = node.value.first
63
142
  if :ELEMENT_NAME == child.type
64
143
  "not(self::#{child.accept(self)})"
@@ -67,143 +146,163 @@ module Nokogiri
67
146
  end
68
147
  end
69
148
 
70
- def visit_id node
149
+ def visit_id(node)
71
150
  node.value.first =~ /^#(.*)$/
72
- "@id = '#{$1}'"
151
+ "@id='#{Regexp.last_match(1)}'"
73
152
  end
74
153
 
75
- def visit_attribute_condition node
76
- attribute = if (node.value.first.type == :FUNCTION) or (node.value.first.value.first =~ /::/)
77
- ''
78
- else
79
- '@'
80
- end
81
- attribute += node.value.first.accept(self)
82
-
83
- # Support non-standard css
84
- attribute.gsub!(/^@@/, '@')
85
-
86
- return attribute unless node.value.length == 3
154
+ def visit_attribute_condition(node)
155
+ attribute = node.value.first.accept(self)
156
+ return attribute if node.value.length == 1
87
157
 
88
158
  value = node.value.last
89
- value = "'#{value}'" if value !~ /^['"]/
159
+ value = "'#{value}'" unless /^['"]/.match?(value)
90
160
 
91
- if (value[0]==value[-1]) && %q{"'}.include?(value[0])
161
+ # quoted values - see test_attribute_value_with_quotes in test/css/test_parser.rb
162
+ if (value[0] == value[-1]) && %q{"'}.include?(value[0])
92
163
  str_value = value[1..-2]
93
164
  if str_value.include?(value[0])
94
- value = 'concat("' + str_value.split('"', -1).join(%q{", '"', "}) + '", "")'
165
+ value = 'concat("' + str_value.split('"', -1).join(%q{",'"',"}) + '","")'
95
166
  end
96
167
  end
97
168
 
98
169
  case node.value[1]
99
170
  when :equal
100
- attribute + " = " + "#{value}"
171
+ attribute + "=" + value.to_s
101
172
  when :not_equal
102
- attribute + " != " + "#{value}"
173
+ attribute + "!=" + value.to_s
103
174
  when :substring_match
104
- "contains(#{attribute}, #{value})"
175
+ "contains(#{attribute},#{value})"
105
176
  when :prefix_match
106
- "starts-with(#{attribute}, #{value})"
177
+ "starts-with(#{attribute},#{value})"
107
178
  when :dash_match
108
- "#{attribute} = #{value} or starts-with(#{attribute}, concat(#{value}, '-'))"
179
+ "#{attribute}=#{value} or starts-with(#{attribute},concat(#{value},'-'))"
109
180
  when :includes
110
- "contains(concat(\" \", #{attribute}, \" \"),concat(\" \", #{value}, \" \"))"
181
+ value = value[1..-2] # strip quotes
182
+ css_class(attribute, value)
111
183
  when :suffix_match
112
- "substring(#{attribute}, string-length(#{attribute}) - " +
113
- "string-length(#{value}) + 1, string-length(#{value})) = #{value}"
184
+ "substring(#{attribute},string-length(#{attribute})-string-length(#{value})+1,string-length(#{value}))=#{value}"
114
185
  else
115
- attribute + " #{node.value[1]} " + "#{value}"
186
+ attribute + " #{node.value[1]} " + value.to_s
116
187
  end
117
188
  end
118
189
 
119
- def visit_pseudo_class node
120
- if node.value.first.is_a?(Nokogiri::CSS::Node) and node.value.first.type == :FUNCTION
190
+ def visit_pseudo_class(node)
191
+ if node.value.first.is_a?(Nokogiri::CSS::Node) && (node.value.first.type == :FUNCTION)
121
192
  node.value.first.accept(self)
122
193
  else
123
- msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, '')}"
124
- return self.send(msg, node) if self.respond_to?(msg)
194
+ msg = :"visit_pseudo_class_#{node.value.first.gsub(/[(]/, "")}"
195
+ return send(msg, node) if respond_to?(msg)
125
196
 
126
197
  case node.value.first
127
- when "first" then "position() = 1"
128
- when "first-child" then "count(preceding-sibling::*) = 0"
129
- when "last" then "position() = last()"
130
- when "last-child" then "count(following-sibling::*) = 0"
131
- when "first-of-type" then "position() = 1"
132
- when "last-of-type" then "position() = last()"
133
- when "only-child" then "count(preceding-sibling::*) = 0 and count(following-sibling::*) = 0"
134
- when "only-of-type" then "last() = 1"
198
+ when "first" then "position()=1"
199
+ when "first-child" then "count(preceding-sibling::*)=0"
200
+ when "last" then "position()=last()"
201
+ when "last-child" then "count(following-sibling::*)=0"
202
+ when "first-of-type" then "position()=1"
203
+ when "last-of-type" then "position()=last()"
204
+ when "only-child" then "count(preceding-sibling::*)=0 and count(following-sibling::*)=0"
205
+ when "only-of-type" then "last()=1"
135
206
  when "empty" then "not(node())"
136
207
  when "parent" then "node()"
137
208
  when "root" then "not(parent::*)"
138
209
  else
139
- node.value.first + "(.)"
210
+ "nokogiri:#{node.value.first}(.)"
140
211
  end
141
212
  end
142
213
  end
143
214
 
144
- def visit_class_condition node
145
- "contains(concat(' ', normalize-space(@class), ' '), ' #{node.value.first} ')"
215
+ def visit_class_condition(node)
216
+ css_class("@class", node.value.first)
146
217
  end
147
218
 
148
- def visit_combinator node
219
+ def visit_combinator(node)
149
220
  if is_of_type_pseudo_class?(node.value.last)
150
- "#{node.value.first.accept(self) if node.value.first}][#{node.value.last.accept(self)}"
221
+ "#{node.value.first&.accept(self)}][#{node.value.last.accept(self)}"
151
222
  else
152
- "#{node.value.first.accept(self) if node.value.first} and #{node.value.last.accept(self)}"
223
+ "#{node.value.first&.accept(self)} and #{node.value.last.accept(self)}"
153
224
  end
154
225
  end
155
226
 
156
227
  {
157
- 'direct_adjacent_selector' => "/following-sibling::*[1]/self::",
158
- 'following_selector' => "/following-sibling::",
159
- 'descendant_selector' => '//',
160
- 'child_selector' => '/',
161
- }.each do |k,v|
162
- class_eval %{
228
+ "direct_adjacent_selector" => "/following-sibling::*[1]/self::",
229
+ "following_selector" => "/following-sibling::",
230
+ "descendant_selector" => "//",
231
+ "child_selector" => "/",
232
+ }.each do |k, v|
233
+ class_eval <<~RUBY, __FILE__, __LINE__ + 1
163
234
  def visit_#{k} node
164
235
  "\#{node.value.first.accept(self) if node.value.first}#{v}\#{node.value.last.accept(self)}"
165
236
  end
166
- }
237
+ RUBY
238
+ end
239
+
240
+ def visit_conditional_selector(node)
241
+ node.value.first.accept(self) + "[" +
242
+ node.value.last.accept(self) + "]"
167
243
  end
168
244
 
169
- def visit_conditional_selector node
170
- node.value.first.accept(self) + '[' +
171
- node.value.last.accept(self) + ']'
245
+ def visit_element_name(node)
246
+ if @doctype == DoctypeConfig::HTML5 && html5_element_name_needs_namespace_handling(node)
247
+ # HTML5 has namespaces that should be ignored in CSS queries
248
+ # https://github.com/sparklemotion/nokogiri/issues/2376
249
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
250
+ if WILDCARD_NAMESPACES
251
+ "*:#{node.value.first}"
252
+ else
253
+ "*[nokogiri-builtin:local-name-is('#{node.value.first}')]"
254
+ end
255
+ else
256
+ "*[local-name()='#{node.value.first}']"
257
+ end
258
+ else
259
+ node.value.first
260
+ end
172
261
  end
173
262
 
174
- def visit_element_name node
175
- node.value.first
263
+ def visit_attrib_name(node)
264
+ "@#{node.value.first}"
176
265
  end
177
266
 
178
- def accept node
267
+ def accept(node)
179
268
  node.accept(self)
180
269
  end
181
270
 
182
- private
183
- def nth node, options={}
184
- raise ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}" unless node.value.size == 4
271
+ private
185
272
 
186
- a, b = read_a_and_positive_b node.value
273
+ def html5_element_name_needs_namespace_handling(node)
274
+ # if this is the wildcard selector "*", use it as normal
275
+ node.value.first != "*" &&
276
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
277
+ !node.value.first.include?(":")
278
+ end
279
+
280
+ def nth(node, options = {})
281
+ unless node.value.size == 4
282
+ raise(ArgumentError, "expected an+b node to contain 4 tokens, but is #{node.value.inspect}")
283
+ end
284
+
285
+ a, b = read_a_and_positive_b(node.value)
187
286
  position = if options[:child]
188
- options[:last] ? "(count(following-sibling::*) + 1)" : "(count(preceding-sibling::*) + 1)"
287
+ options[:last] ? "(count(following-sibling::*)+1)" : "(count(preceding-sibling::*)+1)"
189
288
  else
190
289
  options[:last] ? "(last()-position()+1)" : "position()"
191
290
  end
192
291
 
193
292
  if b.zero?
194
- "(#{position} mod #{a}) = 0"
293
+ "(#{position} mod #{a})=0"
195
294
  else
196
295
  compare = a < 0 ? "<=" : ">="
197
296
  if a.abs == 1
198
- "#{position} #{compare} #{b}"
297
+ "#{position}#{compare}#{b}"
199
298
  else
200
- "(#{position} #{compare} #{b}) and (((#{position}-#{b}) mod #{a.abs}) = 0)"
299
+ "(#{position}#{compare}#{b}) and (((#{position}-#{b}) mod #{a.abs})=0)"
201
300
  end
202
301
  end
203
302
  end
204
303
 
205
- def read_a_and_positive_b values
206
- op = values[2]
304
+ def read_a_and_positive_b(values)
305
+ op = values[2].strip
207
306
  if op == "+"
208
307
  a = values[0].to_i
209
308
  b = values[3].to_i
@@ -216,15 +315,25 @@ module Nokogiri
216
315
  [a, b]
217
316
  end
218
317
 
219
- def is_of_type_pseudo_class? node
220
- if node.type==:PSEUDO_CLASS
221
- if node.value[0].is_a?(Nokogiri::CSS::Node) and node.value[0].type == :FUNCTION
318
+ def is_of_type_pseudo_class?(node) # rubocop:disable Naming/PredicateName
319
+ if node.type == :PSEUDO_CLASS
320
+ if node.value[0].is_a?(Nokogiri::CSS::Node) && (node.value[0].type == :FUNCTION)
222
321
  node.value[0].value[0]
223
322
  else
224
323
  node.value[0]
225
324
  end =~ /(nth|first|last|only)-of-type(\()?/
226
325
  end
227
326
  end
327
+
328
+ def css_class(hay, needle)
329
+ if @builtins == BuiltinsConfig::ALWAYS || (@builtins == BuiltinsConfig::OPTIMAL && Nokogiri.uses_libxml?)
330
+ # use the builtin implementation
331
+ "nokogiri-builtin:css-class(#{hay},'#{needle}')"
332
+ else
333
+ # use only ordinary xpath functions
334
+ "contains(concat(' ',normalize-space(#{hay}),' '),' #{needle} ')"
335
+ end
336
+ end
228
337
  end
229
338
  end
230
339
  end
data/lib/nokogiri/css.rb CHANGED
@@ -1,27 +1,66 @@
1
- require 'nokogiri/css/node'
2
- require 'nokogiri/css/xpath_visitor'
3
- x = $-w
4
- $-w = false
5
- require 'nokogiri/css/parser'
6
- $-w = x
7
-
8
- require 'nokogiri/css/tokenizer'
9
- require 'nokogiri/css/syntax_error'
1
+ # coding: utf-8
2
+ # frozen_string_literal: true
10
3
 
11
4
  module Nokogiri
5
+ # Translate a CSS selector into an XPath 1.0 query
12
6
  module CSS
13
7
  class << self
14
- ###
15
- # Parse this CSS selector in +selector+. Returns an AST.
16
- def parse selector
17
- Parser.new.parse selector
8
+ # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
+ # It is not used by Nokogiri and shouldn't be part of the public API.
10
+ def parse(selector) # :nodoc:
11
+ Parser.new.parse(selector)
18
12
  end
19
13
 
20
- ###
21
- # Get the XPath for +selector+.
22
- def xpath_for selector, options={}
23
- Parser.new(options[:ns] || {}).xpath_for selector, options
14
+ # :call-seq:
15
+ # xpath_for(selector) String
16
+ # xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
17
+ #
18
+ # Translate a CSS selector to the equivalent XPath query.
19
+ #
20
+ # [Parameters]
21
+ # - +selector+ (String) The CSS selector to be translated into XPath
22
+ #
23
+ # - +prefix:+ (String)
24
+ #
25
+ # The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
26
+ # +XML::XPath::GLOBAL_SEARCH_PREFIX+.
27
+ #
28
+ # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
29
+ #
30
+ # The visitor class to use to transform the AST into XPath. Default is
31
+ # +Nokogiri::CSS::XPathVisitor.new+.
32
+ #
33
+ # - +ns:+ (Hash<String ⇒ String>)
34
+ #
35
+ # The namespaces that are referenced in the query, if any. This is a hash where the keys are
36
+ # the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
37
+ #
38
+ # [Returns] (String) The equivalent XPath query for +selector+
39
+ #
40
+ # 💡 Note that translated queries are cached for performance concerns.
41
+ #
42
+ def xpath_for(selector, options = {})
43
+ raise TypeError, "no implicit conversion of #{selector.inspect} to String" unless selector.respond_to?(:to_str)
44
+
45
+ selector = selector.to_str
46
+ raise Nokogiri::CSS::SyntaxError, "empty CSS selector" if selector.empty?
47
+
48
+ prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
49
+ visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
50
+ ns = options.fetch(:ns, {})
51
+
52
+ Parser.new(ns).xpath_for(selector, prefix, visitor)
24
53
  end
25
54
  end
26
55
  end
27
56
  end
57
+
58
+ require_relative "css/node"
59
+ require_relative "css/xpath_visitor"
60
+ x = $-w
61
+ $-w = false
62
+ require_relative "css/parser"
63
+ $-w = x
64
+
65
+ require_relative "css/tokenizer"
66
+ require_relative "css/syntax_error"
@@ -1,3 +1,5 @@
1
+ # frozen_string_literal: true
2
+
1
3
  module Nokogiri
2
4
  module Decorators
3
5
  ###
@@ -9,21 +11,21 @@ module Nokogiri
9
11
 
10
12
  ###
11
13
  # look for node with +name+. See Nokogiri.Slop
12
- def method_missing name, *args, &block
14
+ def method_missing(name, *args, &block)
13
15
  if args.empty?
14
- list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
15
- elsif args.first.is_a? Hash
16
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
17
+ elsif args.first.is_a?(Hash)
16
18
  hash = args.first
17
19
  if hash[:css]
18
20
  list = css("#{name}#{hash[:css]}")
19
21
  elsif hash[:xpath]
20
- conds = Array(hash[:xpath]).join(' and ')
22
+ conds = Array(hash[:xpath]).join(" and ")
21
23
  list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
22
24
  end
23
25
  else
24
26
  CSS::Parser.without_cache do
25
27
  list = xpath(
26
- *CSS.xpath_for("#{name}#{args.first}", :prefix => XPATH_PREFIX)
28
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX),
27
29
  )
28
30
  end
29
31
  end
@@ -32,8 +34,8 @@ module Nokogiri
32
34
  list.length == 1 ? list.first : list
33
35
  end
34
36
 
35
- def respond_to_missing? name, include_private = false
36
- list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, '')}")
37
+ def respond_to_missing?(name, include_private = false)
38
+ list = xpath("#{XPATH_PREFIX}#{name.to_s.sub(/^_/, "")}")
37
39
 
38
40
  !list.empty?
39
41
  end
@@ -0,0 +1,57 @@
1
+ # encoding: utf-8
2
+ # frozen_string_literal: true
3
+
4
+ module Nokogiri
5
+ class EncodingHandler
6
+ # Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
7
+ USEFUL_ALIASES = {
8
+ # alias_name => true_name
9
+ "NOKOGIRI-SENTINEL" => "UTF-8", # indicating the Nokogiri has installed aliases
10
+ "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
11
+ "UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
12
+ }
13
+
14
+ class << self
15
+ def install_default_aliases
16
+ USEFUL_ALIASES.each do |alias_name, name|
17
+ EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
18
+ end
19
+ end
20
+ end
21
+
22
+ # :stopdoc:
23
+ if Nokogiri.jruby?
24
+ class << self
25
+ def [](name)
26
+ storage.key?(name) ? new(storage[name]) : nil
27
+ end
28
+
29
+ def alias(name, alias_name)
30
+ storage[alias_name] = name
31
+ end
32
+
33
+ def delete(name)
34
+ storage.delete(name)
35
+ end
36
+
37
+ def clear_aliases!
38
+ storage.clear
39
+ end
40
+
41
+ private
42
+
43
+ def storage
44
+ @storage ||= {}
45
+ end
46
+ end
47
+
48
+ def initialize(name)
49
+ @name = name
50
+ end
51
+
52
+ attr_reader :name
53
+ end
54
+ end
55
+ end
56
+
57
+ Nokogiri::EncodingHandler.install_default_aliases