nokogiri 1.16.8-x86-mingw32 → 1.17.0-x86-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/nokogiri/extconf.rb +191 -137
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  12. data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
  13. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
  14. data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
  15. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
  16. data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
  17. data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
  18. data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
  19. data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
  20. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
  21. data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
  22. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
  23. data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
  24. data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
  25. data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
  26. data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
  27. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
  28. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
  29. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
  30. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
  31. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
  32. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
  33. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
  34. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
  35. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
  36. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
  37. data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
  38. data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
  39. data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
  40. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  41. data/ext/nokogiri/nokogiri.c +9 -2
  42. data/ext/nokogiri/nokogiri.h +18 -33
  43. data/ext/nokogiri/xml_attr.c +1 -1
  44. data/ext/nokogiri/xml_cdata.c +2 -10
  45. data/ext/nokogiri/xml_comment.c +3 -8
  46. data/ext/nokogiri/xml_document.c +163 -156
  47. data/ext/nokogiri/xml_document_fragment.c +10 -25
  48. data/ext/nokogiri/xml_dtd.c +1 -1
  49. data/ext/nokogiri/xml_element_content.c +9 -9
  50. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  51. data/ext/nokogiri/xml_namespace.c +6 -6
  52. data/ext/nokogiri/xml_node.c +130 -104
  53. data/ext/nokogiri/xml_node_set.c +46 -44
  54. data/ext/nokogiri/xml_reader.c +54 -58
  55. data/ext/nokogiri/xml_relax_ng.c +35 -56
  56. data/ext/nokogiri/xml_sax_parser.c +156 -88
  57. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  58. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  59. data/ext/nokogiri/xml_schema.c +50 -85
  60. data/ext/nokogiri/xml_syntax_error.c +19 -11
  61. data/ext/nokogiri/xml_text.c +2 -4
  62. data/ext/nokogiri/xml_xpath_context.c +2 -2
  63. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  64. data/lib/nokogiri/3.0/nokogiri.so +0 -0
  65. data/lib/nokogiri/3.1/nokogiri.so +0 -0
  66. data/lib/nokogiri/3.2/nokogiri.so +0 -0
  67. data/lib/nokogiri/3.3/nokogiri.so +0 -0
  68. data/lib/nokogiri/class_resolver.rb +1 -1
  69. data/lib/nokogiri/css/node.rb +6 -2
  70. data/lib/nokogiri/css/parser.rb +6 -4
  71. data/lib/nokogiri/css/parser.y +2 -2
  72. data/lib/nokogiri/css/parser_extras.rb +6 -66
  73. data/lib/nokogiri/css/selector_cache.rb +38 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -4
  75. data/lib/nokogiri/css/tokenizer.rex +9 -8
  76. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  77. data/lib/nokogiri/css.rb +86 -20
  78. data/lib/nokogiri/decorators/slop.rb +3 -5
  79. data/lib/nokogiri/encoding_handler.rb +2 -2
  80. data/lib/nokogiri/html4/document.rb +44 -23
  81. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  82. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  83. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  84. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  85. data/lib/nokogiri/html4.rb +9 -14
  86. data/lib/nokogiri/html5/builder.rb +40 -0
  87. data/lib/nokogiri/html5/document.rb +61 -30
  88. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  89. data/lib/nokogiri/html5/node.rb +4 -4
  90. data/lib/nokogiri/html5.rb +114 -72
  91. data/lib/nokogiri/version/constant.rb +1 -1
  92. data/lib/nokogiri/xml/builder.rb +8 -1
  93. data/lib/nokogiri/xml/document.rb +70 -26
  94. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  95. data/lib/nokogiri/xml/node.rb +82 -11
  96. data/lib/nokogiri/xml/node_set.rb +9 -7
  97. data/lib/nokogiri/xml/parse_options.rb +1 -1
  98. data/lib/nokogiri/xml/pp/node.rb +6 -1
  99. data/lib/nokogiri/xml/reader.rb +46 -13
  100. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  101. data/lib/nokogiri/xml/sax/document.rb +174 -83
  102. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  103. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  104. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  105. data/lib/nokogiri/xml/sax.rb +48 -0
  106. data/lib/nokogiri/xml/schema.rb +112 -45
  107. data/lib/nokogiri/xml/searchable.rb +6 -8
  108. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  109. data/lib/nokogiri/xml.rb +13 -24
  110. data/lib/nokogiri/xslt.rb +3 -9
  111. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  112. metadata +8 -4
  113. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -23,8 +23,12 @@ module Nokogiri
23
23
 
24
24
  ###
25
25
  # Convert this CSS node to xpath with +prefix+ using +visitor+
26
- def to_xpath(prefix, visitor)
27
- prefix = "." if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
26
+ def to_xpath(visitor)
27
+ prefix = if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
28
+ "."
29
+ else
30
+ visitor.prefix
31
+ end
28
32
  prefix + visitor.accept(self)
29
33
  end
30
34
 
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  #
3
3
  # DO NOT MODIFY!!!!
4
- # This file is automatically generated by Racc 1.6.0
5
- # from Racc grammar file "".
4
+ # This file is automatically generated by Racc 1.8.0
5
+ # from Racc grammar file "parser.y".
6
6
  #
7
7
 
8
8
  require 'racc/parser.rb'
@@ -291,6 +291,7 @@ Racc_arg = [
291
291
  racc_shift_n,
292
292
  racc_reduce_n,
293
293
  racc_use_result_var ]
294
+ Ractor.make_shareable(Racc_arg) if defined?(Ractor)
294
295
 
295
296
  Racc_token_to_s_table = [
296
297
  "$end",
@@ -351,6 +352,7 @@ Racc_token_to_s_table = [
351
352
  "negation",
352
353
  "eql_incl_dash",
353
354
  "negation_arg" ]
355
+ Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor)
354
356
 
355
357
  Racc_debug_parser = false
356
358
 
@@ -468,12 +470,12 @@ def _reduce_23(val, _values, result)
468
470
  end
469
471
 
470
472
  def _reduce_24(val, _values, result)
471
- result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')])
473
+ result = Node.new(:ELEMENT_NAME, [val[0], val[2]])
472
474
  result
473
475
  end
474
476
 
475
477
  def _reduce_25(val, _values, result)
476
- name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
478
+ name = val[0]
477
479
  result = Node.new(:ELEMENT_NAME, [name])
478
480
 
479
481
  result
@@ -64,9 +64,9 @@ rule
64
64
  ;
65
65
 
66
66
  namespaced_ident:
67
- namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) }
67
+ namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [val[0], val[2]]) }
68
68
  | IDENT {
69
- name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
69
+ name = val[0]
70
70
  result = Node.new(:ELEMENT_NAME, [name])
71
71
  }
72
72
  ;
@@ -5,62 +5,9 @@ require "thread"
5
5
  module Nokogiri
6
6
  module CSS
7
7
  class Parser < Racc::Parser # :nodoc:
8
- CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
9
-
10
- @cache = {}
11
- @mutex = Mutex.new
12
-
13
- class << self
14
- # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
15
- def cache_on?
16
- !Thread.current[CACHE_SWITCH_NAME]
17
- end
18
-
19
- # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
20
- def set_cache(value) # rubocop:disable Naming/AccessorMethodName
21
- Thread.current[CACHE_SWITCH_NAME] = !value
22
- end
23
-
24
- # Get the css selector in +string+ from the cache
25
- def [](string)
26
- return unless cache_on?
27
-
28
- @mutex.synchronize { @cache[string] }
29
- end
30
-
31
- # Set the css selector in +string+ in the cache to +value+
32
- def []=(string, value)
33
- return value unless cache_on?
34
-
35
- @mutex.synchronize { @cache[string] = value }
36
- end
37
-
38
- # Clear the cache
39
- def clear_cache(create_new_object = false)
40
- @mutex.synchronize do
41
- if create_new_object
42
- @cache = {}
43
- else
44
- @cache.clear
45
- end
46
- end
47
- end
48
-
49
- # Execute +block+ without cache
50
- def without_cache(&block)
51
- original_cache_setting = cache_on?
52
- set_cache(false)
53
- yield
54
- ensure
55
- set_cache(original_cache_setting)
56
- end
57
- end
58
-
59
- # Create a new CSS parser with respect to +namespaces+
60
- def initialize(namespaces = {})
8
+ def initialize
61
9
  @tokenizer = Tokenizer.new
62
- @namespaces = namespaces
63
- super()
10
+ super
64
11
  end
65
12
 
66
13
  def parse(string)
@@ -72,11 +19,10 @@ module Nokogiri
72
19
  @tokenizer.next_token
73
20
  end
74
21
 
75
- # Get the xpath for +string+ using +options+
76
- def xpath_for(string, prefix, visitor)
77
- key = cache_key(string, prefix, visitor)
78
- self.class[key] ||= parse(string).map do |ast|
79
- ast.to_xpath(prefix, visitor)
22
+ # Get the xpath for +selector+ using +visitor+
23
+ def xpath_for(selector, visitor)
24
+ parse(selector).map do |ast|
25
+ ast.to_xpath(visitor)
80
26
  end
81
27
  end
82
28
 
@@ -85,12 +31,6 @@ module Nokogiri
85
31
  after = value_stack.compact.last
86
32
  raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
87
33
  end
88
-
89
- def cache_key(query, prefix, visitor)
90
- if self.class.cache_on?
91
- [query, prefix, @namespaces, visitor.config]
92
- end
93
- end
94
34
  end
95
35
  end
96
36
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ module SelectorCache # :nodoc:
6
+ @cache = {}
7
+ @mutex = Mutex.new
8
+
9
+ class << self
10
+ # Retrieve the cached XPath expressions for the key
11
+ def [](key)
12
+ @mutex.synchronize { @cache[key] }
13
+ end
14
+
15
+ # Insert the XPath expressions `value` at the cache key
16
+ def []=(key, value)
17
+ @mutex.synchronize { @cache[key] = value }
18
+ end
19
+
20
+ # Clear the cache
21
+ def clear_cache(create_new_object = false)
22
+ @mutex.synchronize do
23
+ if create_new_object # used in tests to avoid 'method redefined' warnings when injecting spies
24
+ @cache = {}
25
+ else
26
+ @cache.clear
27
+ end
28
+ end
29
+ end
30
+
31
+ # Construct a unique key cache key
32
+ def key(selector:, visitor:)
33
+ [selector, visitor.config]
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -63,13 +63,13 @@ class Tokenizer
63
63
  when (text = @ss.scan(/has\([\s]*/))
64
64
  action { [:HAS, text] }
65
65
 
66
- when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
67
67
  action { [:FUNCTION, text] }
68
68
 
69
- when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
70
70
  action { [:IDENT, text] }
71
71
 
72
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
73
73
  action { [:HASH, text] }
74
74
 
75
75
  when (text = @ss.scan(/[\s]*~=[\s]*/))
@@ -132,7 +132,7 @@ class Tokenizer
132
132
  when (text = @ss.scan(/[\s]+/))
133
133
  action { [:S, text] }
134
134
 
135
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ when (text = @ss.scan(/("([^\n\r\f"]|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*')/))
136
136
  action { [:STRING, text] }
137
137
 
138
138
  when (text = @ss.scan(/./))
@@ -4,20 +4,21 @@ module CSS
4
4
  class Tokenizer
5
5
 
6
6
  macro
7
- nl \n|\r\n|\r|\f
7
+ nl (\n|\r\n|\r|\f)
8
8
  w [\s]*
9
9
  nonascii [^\0-\177]
10
10
  num -?([0-9]+|[0-9]*\.[0-9]+)
11
11
  unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
12
12
 
13
- escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
14
- nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
15
- nmstart [_A-Za-z]|{nonascii}|{escape}
16
- ident -?({nmstart})({nmchar})*
17
- name ({nmchar})+
13
+ escape ({unicode}|\\[^\n\r\f0-9A-Fa-f])
14
+ nmchar ([_A-Za-z0-9-]|{nonascii}|{escape})
15
+ nmstart ([_A-Za-z]|{nonascii}|{escape})
16
+ name {nmstart}{nmchar}*
17
+ ident -?{name}
18
+ charref {nmchar}+
18
19
  string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
19
20
  string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
20
- string {string1}|{string2}
21
+ string ({string1}|{string2})
21
22
 
22
23
  rule
23
24
 
@@ -26,7 +27,7 @@ rule
26
27
  has\({w} { [:HAS, text] }
27
28
  {ident}\({w} { [:FUNCTION, text] }
28
29
  {ident} { [:IDENT, text] }
29
- \#{name} { [:HASH, text] }
30
+ \#{charref} { [:HASH, text] }
30
31
  {w}~={w} { [:INCLUDES, text] }
31
32
  {w}\|={w} { [:DASHMATCH, text] }
32
33
  {w}\^={w} { [:PREFIXMATCH, text] }
@@ -44,6 +44,18 @@ module Nokogiri
44
44
  VALUES = [XML, HTML4, HTML5]
45
45
  end
46
46
 
47
+ # The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
48
+ attr_reader :builtins
49
+
50
+ # The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
51
+ attr_reader :doctype
52
+
53
+ # The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
54
+ attr_reader :prefix
55
+
56
+ # The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
57
+ attr_reader :namespaces
58
+
47
59
  # :call-seq:
48
60
  # new() → XPathVisitor
49
61
  # new(builtins:, doctype:) → XPathVisitor
@@ -54,7 +66,12 @@ module Nokogiri
54
66
  #
55
67
  # [Returns] XPathVisitor
56
68
  #
57
- def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
69
+ def initialize(
70
+ builtins: BuiltinsConfig::NEVER,
71
+ doctype: DoctypeConfig::XML,
72
+ prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
73
+ namespaces: nil
74
+ )
58
75
  unless BuiltinsConfig::VALUES.include?(builtins)
59
76
  raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
60
77
  end
@@ -64,6 +81,8 @@ module Nokogiri
64
81
 
65
82
  @builtins = builtins
66
83
  @doctype = doctype
84
+ @prefix = prefix
85
+ @namespaces = namespaces
67
86
  end
68
87
 
69
88
  # :call-seq: config() → Hash
@@ -72,7 +91,7 @@ module Nokogiri
72
91
  # a Hash representing the configuration of the XPathVisitor, suitable for use as
73
92
  # part of the CSS cache key.
74
93
  def config
75
- { builtins: @builtins, doctype: @doctype }
94
+ { builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
76
95
  end
77
96
 
78
97
  # :stopdoc:
@@ -128,6 +147,8 @@ module Nokogiri
128
147
  is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
129
148
  ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
130
149
  else
150
+ validate_xpath_function_name(node.value.first)
151
+
131
152
  # xpath function call, let's marshal those arguments
132
153
  args = ["."]
133
154
  args += node.value[1..-1].map do |n|
@@ -207,6 +228,7 @@ module Nokogiri
207
228
  when "parent" then "node()"
208
229
  when "root" then "not(parent::*)"
209
230
  else
231
+ validate_xpath_function_name(node.value.first)
210
232
  "nokogiri:#{node.value.first}(.)"
211
233
  end
212
234
  end
@@ -255,6 +277,14 @@ module Nokogiri
255
277
  else
256
278
  "*[local-name()='#{node.value.first}']"
257
279
  end
280
+ elsif node.value.length == 2 # has a namespace prefix
281
+ if node.value.first.nil? # namespace prefix is empty
282
+ node.value.last
283
+ else
284
+ node.value.join(":")
285
+ end
286
+ elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
287
+ "xmlns:#{node.value.first}"
258
288
  else
259
289
  node.value.first
260
290
  end
@@ -270,11 +300,17 @@ module Nokogiri
270
300
 
271
301
  private
272
302
 
303
+ def validate_xpath_function_name(name)
304
+ if name.start_with?("-")
305
+ raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
306
+ end
307
+ end
308
+
273
309
  def html5_element_name_needs_namespace_handling(node)
274
- # if this is the wildcard selector "*", use it as normal
275
- node.value.first != "*" &&
276
- # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
277
- !node.value.first.include?(":")
310
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
311
+ node.value.length == 1 &&
312
+ # if this is the wildcard selector "*", use it as normal
313
+ node.value.first != "*"
278
314
  end
279
315
 
280
316
  def nth(node, options = {})
data/lib/nokogiri/css.rb CHANGED
@@ -8,53 +8,119 @@ module Nokogiri
8
8
  # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
9
  # It is not used by Nokogiri and shouldn't be part of the public API.
10
10
  def parse(selector) # :nodoc:
11
+ warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
11
12
  Parser.new.parse(selector)
12
13
  end
13
14
 
14
15
  # :call-seq:
15
- # xpath_for(selector) → String
16
- # xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
16
+ # xpath_for(selector_list) → Array<String>
17
+ # xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
17
18
  #
18
- # Translate a CSS selector to the equivalent XPath query.
19
+ # Translate a CSS selector list to the equivalent XPath expressions.
20
+ #
21
+ # 💡 Note that translated queries are cached by default for performance concerns.
22
+ #
23
+ # ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
24
+ # node classes, for querying documents with CSS selectors. This method is the underlying
25
+ # mechanism used by XML::Searchable and is provided solely for advanced users to translate
26
+ # \CSS selectors to XPath directly.
27
+ #
28
+ # Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
29
+ # some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
30
+ # functions.
19
31
  #
20
32
  # [Parameters]
21
- # - +selector+ (String) The CSS selector to be translated into XPath
33
+ # - +selector_list+ (String)
22
34
  #
35
+ # The CSS selector to be translated into XPath. This is always a String, but that string
36
+ # value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
37
+ # examples).
38
+ #
39
+ # [Keyword arguments]
23
40
  # - +prefix:+ (String)
24
41
  #
25
- # The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
26
- # +XML::XPath::GLOBAL_SEARCH_PREFIX+.
42
+ # The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
43
+ # for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
44
+ #
45
+ # - +ns:+ (Hash<String ⇒ String>, nil)
46
+ #
47
+ # Namespaces that are referenced in the query, if any. This is a hash where the keys are the
48
+ # namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
49
+ # empty set of namespaces.
27
50
  #
28
51
  # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
29
52
  #
30
- # The visitor class to use to transform the AST into XPath. Default is
31
- # +Nokogiri::CSS::XPathVisitor.new+.
53
+ # Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
54
+ # Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
55
+ # be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
56
+ #
57
+ # ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
58
+ # provided, +prefix+ and +ns+ must not be present.
59
+ #
60
+ # - +cache:+ (Boolean)
61
+ #
62
+ # Whether to use the SelectorCache for the translated query to ensure that repeated queries
63
+ # don't incur the overhead of re-parsing the selector. Default is +true+.
32
64
  #
33
- # - +ns:+ (Hash<String String>)
65
+ # [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
34
66
  #
35
- # The namespaces that are referenced in the query, if any. This is a hash where the keys are
36
- # the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
67
+ # *Example* with a simple selector:
37
68
  #
38
- # [Returns] (String) The equivalent XPath query for +selector+
69
+ # Nokogiri::CSS.xpath_for("div") # => ["//div"]
39
70
  #
40
- # 💡 Note that translated queries are cached for performance concerns.
71
+ # *Example* with a compound selector:
41
72
  #
42
- def xpath_for(selector, options = {})
43
- raise TypeError, "no implicit conversion of #{selector.inspect} to String" unless selector.respond_to?(:to_str)
73
+ # Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
74
+ #
75
+ # *Example* with a complex selector:
76
+ #
77
+ # Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
78
+ #
79
+ # *Example* with a selector list:
80
+ #
81
+ # Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
82
+ #
83
+ def xpath_for(
84
+ selector, options = nil,
85
+ prefix: options&.delete(:prefix),
86
+ visitor: options&.delete(:visitor),
87
+ ns: options&.delete(:ns),
88
+ cache: true
89
+ )
90
+ unless options.nil?
91
+ warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
92
+ end
93
+
94
+ raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
44
95
 
45
96
  selector = selector.to_str
46
- raise Nokogiri::CSS::SyntaxError, "empty CSS selector" if selector.empty?
97
+ raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
98
+
99
+ if visitor
100
+ raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
101
+ raise ArgumentError, "cannot provide both :ns and :visitor" if ns
102
+ end
103
+
104
+ visitor ||= begin
105
+ visitor_kw = {}
106
+ visitor_kw[:prefix] = prefix if prefix
107
+ visitor_kw[:namespaces] = ns if ns
47
108
 
48
- prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
49
- visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
50
- ns = options.fetch(:ns, {})
109
+ Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
110
+ end
51
111
 
52
- Parser.new(ns).xpath_for(selector, prefix, visitor)
112
+ if cache
113
+ key = SelectorCache.key(selector: selector, visitor: visitor)
114
+ SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
115
+ else
116
+ Parser.new.xpath_for(selector, visitor)
117
+ end
53
118
  end
54
119
  end
55
120
  end
56
121
  end
57
122
 
123
+ require_relative "css/selector_cache"
58
124
  require_relative "css/node"
59
125
  require_relative "css/xpath_visitor"
60
126
  x = $-w
@@ -23,11 +23,9 @@ module Nokogiri
23
23
  list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
24
24
  end
25
25
  else
26
- CSS::Parser.without_cache do
27
- list = xpath(
28
- *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX),
29
- )
30
- end
26
+ list = xpath(
27
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
28
+ )
31
29
  end
32
30
 
33
31
  super if list.empty?
@@ -6,9 +6,9 @@ module Nokogiri
6
6
  # Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
7
7
  USEFUL_ALIASES = {
8
8
  # alias_name => true_name
9
- "NOKOGIRI-SENTINEL" => "UTF-8", # indicating the Nokogiri has installed aliases
9
+ "ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
10
+ "NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
10
11
  "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
11
- "UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
12
12
  }
13
13
 
14
14
  class << self
@@ -161,52 +161,73 @@ module Nokogiri
161
161
  end
162
162
 
163
163
  class << self
164
- ###
165
- # Parse HTML. +string_or_io+ may be a String, or any object that
166
- # responds to _read_ and _close_ such as an IO, or StringIO.
167
- # +url+ is resource where this document is located. +encoding+ is the
168
- # encoding that should be used when processing the document. +options+
169
- # is a number that sets options in the parser, such as
170
- # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
171
- # Nokogiri::XML::ParseOptions.
172
- def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
164
+ # :call-seq:
165
+ # parse(input) { |options| ... } => Nokogiri::HTML4::Document
166
+ # parse(input, url:, encoding:, options:) => Nokogiri::HTML4::Document
167
+ #
168
+ # Parse \HTML4 input from a String or IO object, and return a new HTML4::Document.
169
+ #
170
+ # [Required Parameters]
171
+ # - +input+ (String | IO) The content to be parsed.
172
+ #
173
+ # [Optional Keyword Arguments]
174
+ # - +url:+ (String) The base URI for this document.
175
+ #
176
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
177
+ # document. When not provided, the encoding will be determined based on the document
178
+ # content.
179
+ #
180
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
181
+ # behaviors during parsing. See ParseOptions for more information. The default value is
182
+ # +ParseOptions::DEFAULT_HTML+.
183
+ #
184
+ # [Yields]
185
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
186
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
187
+ #
188
+ # [Returns] Nokogiri::HTML4::Document
189
+ def parse(
190
+ input,
191
+ url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
192
+ url: url_, encoding: encoding_, options: options_
193
+ )
173
194
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
174
195
  yield options if block_given?
175
196
 
176
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
197
+ url ||= input.respond_to?(:path) ? input.path : nil
177
198
 
178
- if string_or_io.respond_to?(:encoding)
179
- unless string_or_io.encoding == Encoding::ASCII_8BIT
180
- encoding ||= string_or_io.encoding.name
199
+ if input.respond_to?(:encoding)
200
+ unless input.encoding == Encoding::ASCII_8BIT
201
+ encoding ||= input.encoding.name
181
202
  end
182
203
  end
183
204
 
184
- if string_or_io.respond_to?(:read)
185
- if string_or_io.is_a?(Pathname)
205
+ if input.respond_to?(:read)
206
+ if input.is_a?(Pathname)
186
207
  # resolve the Pathname to the file and open it as an IO object, see #2110
187
- string_or_io = string_or_io.expand_path.open
188
- url ||= string_or_io.path
208
+ input = input.expand_path.open
209
+ url ||= input.path
189
210
  end
190
211
 
191
212
  unless encoding
192
- string_or_io = EncodingReader.new(string_or_io)
213
+ input = EncodingReader.new(input)
193
214
  begin
194
- return read_io(string_or_io, url, encoding, options.to_i)
215
+ return read_io(input, url, encoding, options.to_i)
195
216
  rescue EncodingReader::EncodingFound => e
196
217
  encoding = e.found_encoding
197
218
  end
198
219
  end
199
- return read_io(string_or_io, url, encoding, options.to_i)
220
+ return read_io(input, url, encoding, options.to_i)
200
221
  end
201
222
 
202
223
  # read_memory pukes on empty docs
203
- if string_or_io.nil? || string_or_io.empty?
224
+ if input.nil? || input.empty?
204
225
  return encoding ? new.tap { |i| i.encoding = encoding } : new
205
226
  end
206
227
 
207
- encoding ||= EncodingReader.detect_encoding(string_or_io)
228
+ encoding ||= EncodingReader.detect_encoding(input)
208
229
 
209
- read_memory(string_or_io, url, encoding, options.to_i)
230
+ read_memory(input, url, encoding, options.to_i)
210
231
  end
211
232
  end
212
233
  end