nokogiri 1.16.8-x86_64-darwin → 1.17.1-x86_64-darwin

Sign up to get free protection for your applications and to get access to all the features.
Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +11 -21
  3. data/README.md +4 -0
  4. data/dependencies.yml +6 -6
  5. data/ext/nokogiri/extconf.rb +191 -137
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +25 -24
  12. data/ext/nokogiri/include/libexslt/exsltconfig.h +3 -3
  13. data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +12 -19
  14. data/ext/nokogiri/include/libxml2/libxml/c14n.h +1 -12
  15. data/ext/nokogiri/include/libxml2/libxml/debugXML.h +1 -1
  16. data/ext/nokogiri/include/libxml2/libxml/encoding.h +9 -0
  17. data/ext/nokogiri/include/libxml2/libxml/entities.h +12 -1
  18. data/ext/nokogiri/include/libxml2/libxml/hash.h +19 -0
  19. data/ext/nokogiri/include/libxml2/libxml/list.h +2 -2
  20. data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +17 -0
  21. data/ext/nokogiri/include/libxml2/libxml/parser.h +60 -54
  22. data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +9 -1
  23. data/ext/nokogiri/include/libxml2/libxml/pattern.h +6 -0
  24. data/ext/nokogiri/include/libxml2/libxml/tree.h +32 -12
  25. data/ext/nokogiri/include/libxml2/libxml/uri.h +11 -0
  26. data/ext/nokogiri/include/libxml2/libxml/valid.h +29 -2
  27. data/ext/nokogiri/include/libxml2/libxml/xinclude.h +7 -0
  28. data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +21 -4
  29. data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +14 -0
  30. data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +111 -15
  31. data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +8 -45
  32. data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +2 -0
  33. data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +5 -0
  34. data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +165 -1
  35. data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +7 -171
  36. data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +1 -0
  37. data/ext/nokogiri/include/libxml2/libxml/xpath.h +4 -0
  38. data/ext/nokogiri/include/libxslt/xsltInternals.h +3 -0
  39. data/ext/nokogiri/include/libxslt/xsltconfig.h +4 -37
  40. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  41. data/ext/nokogiri/nokogiri.c +9 -2
  42. data/ext/nokogiri/nokogiri.h +18 -33
  43. data/ext/nokogiri/xml_attr.c +1 -1
  44. data/ext/nokogiri/xml_cdata.c +2 -10
  45. data/ext/nokogiri/xml_comment.c +3 -8
  46. data/ext/nokogiri/xml_document.c +163 -156
  47. data/ext/nokogiri/xml_document_fragment.c +10 -25
  48. data/ext/nokogiri/xml_dtd.c +1 -1
  49. data/ext/nokogiri/xml_element_content.c +9 -9
  50. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  51. data/ext/nokogiri/xml_namespace.c +6 -6
  52. data/ext/nokogiri/xml_node.c +134 -103
  53. data/ext/nokogiri/xml_node_set.c +46 -44
  54. data/ext/nokogiri/xml_reader.c +54 -58
  55. data/ext/nokogiri/xml_relax_ng.c +35 -56
  56. data/ext/nokogiri/xml_sax_parser.c +156 -88
  57. data/ext/nokogiri/xml_sax_parser_context.c +213 -131
  58. data/ext/nokogiri/xml_sax_push_parser.c +68 -49
  59. data/ext/nokogiri/xml_schema.c +50 -85
  60. data/ext/nokogiri/xml_syntax_error.c +19 -11
  61. data/ext/nokogiri/xml_text.c +2 -4
  62. data/ext/nokogiri/xml_xpath_context.c +2 -2
  63. data/ext/nokogiri/xslt_stylesheet.c +8 -8
  64. data/lib/nokogiri/3.0/nokogiri.bundle +0 -0
  65. data/lib/nokogiri/3.1/nokogiri.bundle +0 -0
  66. data/lib/nokogiri/3.2/nokogiri.bundle +0 -0
  67. data/lib/nokogiri/3.3/nokogiri.bundle +0 -0
  68. data/lib/nokogiri/class_resolver.rb +1 -1
  69. data/lib/nokogiri/css/node.rb +6 -2
  70. data/lib/nokogiri/css/parser.rb +6 -4
  71. data/lib/nokogiri/css/parser.y +2 -2
  72. data/lib/nokogiri/css/parser_extras.rb +6 -66
  73. data/lib/nokogiri/css/selector_cache.rb +38 -0
  74. data/lib/nokogiri/css/tokenizer.rb +4 -4
  75. data/lib/nokogiri/css/tokenizer.rex +9 -8
  76. data/lib/nokogiri/css/xpath_visitor.rb +42 -6
  77. data/lib/nokogiri/css.rb +86 -20
  78. data/lib/nokogiri/decorators/slop.rb +3 -5
  79. data/lib/nokogiri/encoding_handler.rb +2 -2
  80. data/lib/nokogiri/html4/document.rb +44 -23
  81. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  82. data/lib/nokogiri/html4/encoding_reader.rb +1 -1
  83. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  84. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  85. data/lib/nokogiri/html4.rb +9 -14
  86. data/lib/nokogiri/html5/builder.rb +40 -0
  87. data/lib/nokogiri/html5/document.rb +61 -30
  88. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  89. data/lib/nokogiri/html5/node.rb +4 -4
  90. data/lib/nokogiri/html5.rb +114 -72
  91. data/lib/nokogiri/version/constant.rb +1 -1
  92. data/lib/nokogiri/xml/builder.rb +8 -1
  93. data/lib/nokogiri/xml/document.rb +70 -26
  94. data/lib/nokogiri/xml/document_fragment.rb +84 -13
  95. data/lib/nokogiri/xml/node.rb +82 -11
  96. data/lib/nokogiri/xml/node_set.rb +9 -7
  97. data/lib/nokogiri/xml/parse_options.rb +1 -1
  98. data/lib/nokogiri/xml/pp/node.rb +6 -1
  99. data/lib/nokogiri/xml/reader.rb +46 -13
  100. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  101. data/lib/nokogiri/xml/sax/document.rb +174 -83
  102. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  103. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  104. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  105. data/lib/nokogiri/xml/sax.rb +48 -0
  106. data/lib/nokogiri/xml/schema.rb +112 -45
  107. data/lib/nokogiri/xml/searchable.rb +6 -8
  108. data/lib/nokogiri/xml/syntax_error.rb +22 -0
  109. data/lib/nokogiri/xml.rb +13 -24
  110. data/lib/nokogiri/xslt.rb +3 -9
  111. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  112. metadata +8 -4
  113. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
@@ -23,8 +23,12 @@ module Nokogiri
23
23
 
24
24
  ###
25
25
  # Convert this CSS node to xpath with +prefix+ using +visitor+
26
- def to_xpath(prefix, visitor)
27
- prefix = "." if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
26
+ def to_xpath(visitor)
27
+ prefix = if ALLOW_COMBINATOR_ON_SELF.include?(type) && value.first.nil?
28
+ "."
29
+ else
30
+ visitor.prefix
31
+ end
28
32
  prefix + visitor.accept(self)
29
33
  end
30
34
 
@@ -1,8 +1,8 @@
1
1
  # frozen_string_literal: true
2
2
  #
3
3
  # DO NOT MODIFY!!!!
4
- # This file is automatically generated by Racc 1.6.0
5
- # from Racc grammar file "".
4
+ # This file is automatically generated by Racc 1.8.0
5
+ # from Racc grammar file "parser.y".
6
6
  #
7
7
 
8
8
  require 'racc/parser.rb'
@@ -291,6 +291,7 @@ Racc_arg = [
291
291
  racc_shift_n,
292
292
  racc_reduce_n,
293
293
  racc_use_result_var ]
294
+ Ractor.make_shareable(Racc_arg) if defined?(Ractor)
294
295
 
295
296
  Racc_token_to_s_table = [
296
297
  "$end",
@@ -351,6 +352,7 @@ Racc_token_to_s_table = [
351
352
  "negation",
352
353
  "eql_incl_dash",
353
354
  "negation_arg" ]
355
+ Ractor.make_shareable(Racc_token_to_s_table) if defined?(Ractor)
354
356
 
355
357
  Racc_debug_parser = false
356
358
 
@@ -468,12 +470,12 @@ def _reduce_23(val, _values, result)
468
470
  end
469
471
 
470
472
  def _reduce_24(val, _values, result)
471
- result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')])
473
+ result = Node.new(:ELEMENT_NAME, [val[0], val[2]])
472
474
  result
473
475
  end
474
476
 
475
477
  def _reduce_25(val, _values, result)
476
- name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
478
+ name = val[0]
477
479
  result = Node.new(:ELEMENT_NAME, [name])
478
480
 
479
481
  result
@@ -64,9 +64,9 @@ rule
64
64
  ;
65
65
 
66
66
  namespaced_ident:
67
- namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [[val[0], val[2]].compact.join(':')]) }
67
+ namespace '|' IDENT { result = Node.new(:ELEMENT_NAME, [val[0], val[2]]) }
68
68
  | IDENT {
69
- name = @namespaces.key?('xmlns') ? "xmlns:#{val[0]}" : val[0]
69
+ name = val[0]
70
70
  result = Node.new(:ELEMENT_NAME, [name])
71
71
  }
72
72
  ;
@@ -5,62 +5,9 @@ require "thread"
5
5
  module Nokogiri
6
6
  module CSS
7
7
  class Parser < Racc::Parser # :nodoc:
8
- CACHE_SWITCH_NAME = :nokogiri_css_parser_cache_is_off
9
-
10
- @cache = {}
11
- @mutex = Mutex.new
12
-
13
- class << self
14
- # Return a thread-local boolean indicating whether the CSS-to-XPath cache is active. (Default is `true`.)
15
- def cache_on?
16
- !Thread.current[CACHE_SWITCH_NAME]
17
- end
18
-
19
- # Set a thread-local boolean to turn cacheing on and off. Truthy values turn the cache on, falsey values turn the cache off.
20
- def set_cache(value) # rubocop:disable Naming/AccessorMethodName
21
- Thread.current[CACHE_SWITCH_NAME] = !value
22
- end
23
-
24
- # Get the css selector in +string+ from the cache
25
- def [](string)
26
- return unless cache_on?
27
-
28
- @mutex.synchronize { @cache[string] }
29
- end
30
-
31
- # Set the css selector in +string+ in the cache to +value+
32
- def []=(string, value)
33
- return value unless cache_on?
34
-
35
- @mutex.synchronize { @cache[string] = value }
36
- end
37
-
38
- # Clear the cache
39
- def clear_cache(create_new_object = false)
40
- @mutex.synchronize do
41
- if create_new_object
42
- @cache = {}
43
- else
44
- @cache.clear
45
- end
46
- end
47
- end
48
-
49
- # Execute +block+ without cache
50
- def without_cache(&block)
51
- original_cache_setting = cache_on?
52
- set_cache(false)
53
- yield
54
- ensure
55
- set_cache(original_cache_setting)
56
- end
57
- end
58
-
59
- # Create a new CSS parser with respect to +namespaces+
60
- def initialize(namespaces = {})
8
+ def initialize
61
9
  @tokenizer = Tokenizer.new
62
- @namespaces = namespaces
63
- super()
10
+ super
64
11
  end
65
12
 
66
13
  def parse(string)
@@ -72,11 +19,10 @@ module Nokogiri
72
19
  @tokenizer.next_token
73
20
  end
74
21
 
75
- # Get the xpath for +string+ using +options+
76
- def xpath_for(string, prefix, visitor)
77
- key = cache_key(string, prefix, visitor)
78
- self.class[key] ||= parse(string).map do |ast|
79
- ast.to_xpath(prefix, visitor)
22
+ # Get the xpath for +selector+ using +visitor+
23
+ def xpath_for(selector, visitor)
24
+ parse(selector).map do |ast|
25
+ ast.to_xpath(visitor)
80
26
  end
81
27
  end
82
28
 
@@ -85,12 +31,6 @@ module Nokogiri
85
31
  after = value_stack.compact.last
86
32
  raise SyntaxError, "unexpected '#{error_value}' after '#{after}'"
87
33
  end
88
-
89
- def cache_key(query, prefix, visitor)
90
- if self.class.cache_on?
91
- [query, prefix, @namespaces, visitor.config]
92
- end
93
- end
94
34
  end
95
35
  end
96
36
  end
@@ -0,0 +1,38 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Nokogiri
4
+ module CSS
5
+ module SelectorCache # :nodoc:
6
+ @cache = {}
7
+ @mutex = Mutex.new
8
+
9
+ class << self
10
+ # Retrieve the cached XPath expressions for the key
11
+ def [](key)
12
+ @mutex.synchronize { @cache[key] }
13
+ end
14
+
15
+ # Insert the XPath expressions `value` at the cache key
16
+ def []=(key, value)
17
+ @mutex.synchronize { @cache[key] = value }
18
+ end
19
+
20
+ # Clear the cache
21
+ def clear_cache(create_new_object = false)
22
+ @mutex.synchronize do
23
+ if create_new_object # used in tests to avoid 'method redefined' warnings when injecting spies
24
+ @cache = {}
25
+ else
26
+ @cache.clear
27
+ end
28
+ end
29
+ end
30
+
31
+ # Construct a unique key cache key
32
+ def key(selector:, visitor:)
33
+ [selector, visitor.config]
34
+ end
35
+ end
36
+ end
37
+ end
38
+ end
@@ -63,13 +63,13 @@ class Tokenizer
63
63
  when (text = @ss.scan(/has\([\s]*/))
64
64
  action { [:HAS, text] }
65
65
 
66
- when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*\([\s]*/))
66
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*\([\s]*/))
67
67
  action { [:FUNCTION, text] }
68
68
 
69
- when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*/))
69
+ when (text = @ss.scan(/-?([_A-Za-z]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*/))
70
70
  action { [:IDENT, text] }
71
71
 
72
- when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])+/))
72
+ when (text = @ss.scan(/\#([_A-Za-z0-9-]|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))+/))
73
73
  action { [:HASH, text] }
74
74
 
75
75
  when (text = @ss.scan(/[\s]*~=[\s]*/))
@@ -132,7 +132,7 @@ class Tokenizer
132
132
  when (text = @ss.scan(/[\s]+/))
133
133
  action { [:S, text] }
134
134
 
135
- when (text = @ss.scan(/"([^\n\r\f"]|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|\n|\r\n|\r|\f|[^\0-\177]|\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f])*(?<!\\)(?:\\{2})*'/))
135
+ when (text = @ss.scan(/("([^\n\r\f"]|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*"|'([^\n\r\f']|(\n|\r\n|\r|\f)|[^\0-\177]|(\\[0-9A-Fa-f]{1,6}(\r\n|[\s])?|\\[^\n\r\f0-9A-Fa-f]))*(?<!\\)(?:\\{2})*')/))
136
136
  action { [:STRING, text] }
137
137
 
138
138
  when (text = @ss.scan(/./))
@@ -4,20 +4,21 @@ module CSS
4
4
  class Tokenizer
5
5
 
6
6
  macro
7
- nl \n|\r\n|\r|\f
7
+ nl (\n|\r\n|\r|\f)
8
8
  w [\s]*
9
9
  nonascii [^\0-\177]
10
10
  num -?([0-9]+|[0-9]*\.[0-9]+)
11
11
  unicode \\[0-9A-Fa-f]{1,6}(\r\n|[\s])?
12
12
 
13
- escape {unicode}|\\[^\n\r\f0-9A-Fa-f]
14
- nmchar [_A-Za-z0-9-]|{nonascii}|{escape}
15
- nmstart [_A-Za-z]|{nonascii}|{escape}
16
- ident -?({nmstart})({nmchar})*
17
- name ({nmchar})+
13
+ escape ({unicode}|\\[^\n\r\f0-9A-Fa-f])
14
+ nmchar ([_A-Za-z0-9-]|{nonascii}|{escape})
15
+ nmstart ([_A-Za-z]|{nonascii}|{escape})
16
+ name {nmstart}{nmchar}*
17
+ ident -?{name}
18
+ charref {nmchar}+
18
19
  string1 "([^\n\r\f"]|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*"
19
20
  string2 '([^\n\r\f']|{nl}|{nonascii}|{escape})*(?<!\\)(?:\\{2})*'
20
- string {string1}|{string2}
21
+ string ({string1}|{string2})
21
22
 
22
23
  rule
23
24
 
@@ -26,7 +27,7 @@ rule
26
27
  has\({w} { [:HAS, text] }
27
28
  {ident}\({w} { [:FUNCTION, text] }
28
29
  {ident} { [:IDENT, text] }
29
- \#{name} { [:HASH, text] }
30
+ \#{charref} { [:HASH, text] }
30
31
  {w}~={w} { [:INCLUDES, text] }
31
32
  {w}\|={w} { [:DASHMATCH, text] }
32
33
  {w}\^={w} { [:PREFIXMATCH, text] }
@@ -44,6 +44,18 @@ module Nokogiri
44
44
  VALUES = [XML, HTML4, HTML5]
45
45
  end
46
46
 
47
+ # The visitor configuration set via the +builtins:+ keyword argument to XPathVisitor.new.
48
+ attr_reader :builtins
49
+
50
+ # The visitor configuration set via the +doctype:+ keyword argument to XPathVisitor.new.
51
+ attr_reader :doctype
52
+
53
+ # The visitor configuration set via the +prefix:+ keyword argument to XPathVisitor.new.
54
+ attr_reader :prefix
55
+
56
+ # The visitor configuration set via the +namespaces:+ keyword argument to XPathVisitor.new.
57
+ attr_reader :namespaces
58
+
47
59
  # :call-seq:
48
60
  # new() → XPathVisitor
49
61
  # new(builtins:, doctype:) → XPathVisitor
@@ -54,7 +66,12 @@ module Nokogiri
54
66
  #
55
67
  # [Returns] XPathVisitor
56
68
  #
57
- def initialize(builtins: BuiltinsConfig::NEVER, doctype: DoctypeConfig::XML)
69
+ def initialize(
70
+ builtins: BuiltinsConfig::NEVER,
71
+ doctype: DoctypeConfig::XML,
72
+ prefix: Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX,
73
+ namespaces: nil
74
+ )
58
75
  unless BuiltinsConfig::VALUES.include?(builtins)
59
76
  raise(ArgumentError, "Invalid values #{builtins.inspect} for builtins: keyword parameter")
60
77
  end
@@ -64,6 +81,8 @@ module Nokogiri
64
81
 
65
82
  @builtins = builtins
66
83
  @doctype = doctype
84
+ @prefix = prefix
85
+ @namespaces = namespaces
67
86
  end
68
87
 
69
88
  # :call-seq: config() → Hash
@@ -72,7 +91,7 @@ module Nokogiri
72
91
  # a Hash representing the configuration of the XPathVisitor, suitable for use as
73
92
  # part of the CSS cache key.
74
93
  def config
75
- { builtins: @builtins, doctype: @doctype }
94
+ { builtins: @builtins, doctype: @doctype, prefix: @prefix, namespaces: @namespaces }
76
95
  end
77
96
 
78
97
  # :stopdoc:
@@ -128,6 +147,8 @@ module Nokogiri
128
147
  is_direct = node.value[1].value[0].nil? # e.g. "has(> a)", "has(~ a)", "has(+ a)"
129
148
  ".#{"//" unless is_direct}#{node.value[1].accept(self)}"
130
149
  else
150
+ validate_xpath_function_name(node.value.first)
151
+
131
152
  # xpath function call, let's marshal those arguments
132
153
  args = ["."]
133
154
  args += node.value[1..-1].map do |n|
@@ -207,6 +228,7 @@ module Nokogiri
207
228
  when "parent" then "node()"
208
229
  when "root" then "not(parent::*)"
209
230
  else
231
+ validate_xpath_function_name(node.value.first)
210
232
  "nokogiri:#{node.value.first}(.)"
211
233
  end
212
234
  end
@@ -255,6 +277,14 @@ module Nokogiri
255
277
  else
256
278
  "*[local-name()='#{node.value.first}']"
257
279
  end
280
+ elsif node.value.length == 2 # has a namespace prefix
281
+ if node.value.first.nil? # namespace prefix is empty
282
+ node.value.last
283
+ else
284
+ node.value.join(":")
285
+ end
286
+ elsif @namespaces&.key?("xmlns") # apply the default namespace if it's declared
287
+ "xmlns:#{node.value.first}"
258
288
  else
259
289
  node.value.first
260
290
  end
@@ -270,11 +300,17 @@ module Nokogiri
270
300
 
271
301
  private
272
302
 
303
+ def validate_xpath_function_name(name)
304
+ if name.start_with?("-")
305
+ raise Nokogiri::CSS::SyntaxError, "Invalid XPath function name '#{name}'"
306
+ end
307
+ end
308
+
273
309
  def html5_element_name_needs_namespace_handling(node)
274
- # if this is the wildcard selector "*", use it as normal
275
- node.value.first != "*" &&
276
- # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
277
- !node.value.first.include?(":")
310
+ # if there is already a namespace (i.e., it is a prefixed QName), use it as normal
311
+ node.value.length == 1 &&
312
+ # if this is the wildcard selector "*", use it as normal
313
+ node.value.first != "*"
278
314
  end
279
315
 
280
316
  def nth(node, options = {})
data/lib/nokogiri/css.rb CHANGED
@@ -8,53 +8,119 @@ module Nokogiri
8
8
  # TODO: Deprecate this method ahead of 2.0 and delete it in 2.0.
9
9
  # It is not used by Nokogiri and shouldn't be part of the public API.
10
10
  def parse(selector) # :nodoc:
11
+ warn("Nokogiri::CSS.parse is deprecated and will be removed in a future version of Nokogiri. Use Nokogiri::CSS::Parser#parse instead.", uplevel: 1, category: :deprecated)
11
12
  Parser.new.parse(selector)
12
13
  end
13
14
 
14
15
  # :call-seq:
15
- # xpath_for(selector) → String
16
- # xpath_for(selector [, prefix:] [, visitor:] [, ns:]) → String
16
+ # xpath_for(selector_list) → Array<String>
17
+ # xpath_for(selector_list [, prefix:] [, ns:] [, visitor:] [, cache:]) → Array<String>
17
18
  #
18
- # Translate a CSS selector to the equivalent XPath query.
19
+ # Translate a CSS selector list to the equivalent XPath expressions.
20
+ #
21
+ # 💡 Note that translated queries are cached by default for performance concerns.
22
+ #
23
+ # ⚠ Users should prefer Nokogiri::XML::Searchable#css, which is mixed into all document and
24
+ # node classes, for querying documents with CSS selectors. This method is the underlying
25
+ # mechanism used by XML::Searchable and is provided solely for advanced users to translate
26
+ # \CSS selectors to XPath directly.
27
+ #
28
+ # Also see Nokogiri::XML::Searchable#css for documentation on supported CSS selector features,
29
+ # some extended syntax that Nokogiri supports, and advanced CSS features like pseudo-class
30
+ # functions.
19
31
  #
20
32
  # [Parameters]
21
- # - +selector+ (String) The CSS selector to be translated into XPath
33
+ # - +selector_list+ (String)
22
34
  #
35
+ # The CSS selector to be translated into XPath. This is always a String, but that string
36
+ # value may be a {selector list}[https://www.w3.org/TR/selectors-4/#grouping] (see
37
+ # examples).
38
+ #
39
+ # [Keyword arguments]
23
40
  # - +prefix:+ (String)
24
41
  #
25
- # The XPath prefix for the query, see Nokogiri::XML::XPath for some options. Default is
26
- # +XML::XPath::GLOBAL_SEARCH_PREFIX+.
42
+ # The XPath expression prefix which determines the search context. See Nokogiri::XML::XPath
43
+ # for standard options. Default is +XPath::GLOBAL_SEARCH_PREFIX+.
44
+ #
45
+ # - +ns:+ (Hash<String ⇒ String>, nil)
46
+ #
47
+ # Namespaces that are referenced in the query, if any. This is a hash where the keys are the
48
+ # namespace prefix and the values are the namespace URIs. Default is +nil+ indicating an
49
+ # empty set of namespaces.
27
50
  #
28
51
  # - +visitor:+ (Nokogiri::CSS::XPathVisitor)
29
52
  #
30
- # The visitor class to use to transform the AST into XPath. Default is
31
- # +Nokogiri::CSS::XPathVisitor.new+.
53
+ # Use this XPathVisitor object to transform the CSS AST into XPath expressions. See
54
+ # Nokogiri::CSS::XPathVisitor for more information on some of the complex behavior that can
55
+ # be customized for your document type. Default is +Nokogiri::CSS::XPathVisitor.new+.
56
+ #
57
+ # ⚠ Note that this option is mutually exclusive with +prefix+ and +ns+. If +visitor+ is
58
+ # provided, +prefix+ and +ns+ must not be present.
59
+ #
60
+ # - +cache:+ (Boolean)
61
+ #
62
+ # Whether to use the SelectorCache for the translated query to ensure that repeated queries
63
+ # don't incur the overhead of re-parsing the selector. Default is +true+.
32
64
  #
33
- # - +ns:+ (Hash<String String>)
65
+ # [Returns] (Array<String>) The equivalent set of XPath expressions for +selector_list+
34
66
  #
35
- # The namespaces that are referenced in the query, if any. This is a hash where the keys are
36
- # the namespace prefix and the values are the namespace URIs. Default is an empty Hash.
67
+ # *Example* with a simple selector:
37
68
  #
38
- # [Returns] (String) The equivalent XPath query for +selector+
69
+ # Nokogiri::CSS.xpath_for("div") # => ["//div"]
39
70
  #
40
- # 💡 Note that translated queries are cached for performance concerns.
71
+ # *Example* with a compound selector:
41
72
  #
42
- def xpath_for(selector, options = {})
43
- raise TypeError, "no implicit conversion of #{selector.inspect} to String" unless selector.respond_to?(:to_str)
73
+ # Nokogiri::CSS.xpath_for("div.xl") # => ["//div[contains(concat(' ',normalize-space(@class),' '),' xl ')]"]
74
+ #
75
+ # *Example* with a complex selector:
76
+ #
77
+ # Nokogiri::CSS.xpath_for("h1 + div") # => ["//h1/following-sibling::*[1]/self::div"]
78
+ #
79
+ # *Example* with a selector list:
80
+ #
81
+ # Nokogiri::CSS.xpath_for("h1, h2, h3") # => ["//h1", "//h2", "//h3"]
82
+ #
83
+ def xpath_for(
84
+ selector, options = nil,
85
+ prefix: options&.delete(:prefix),
86
+ visitor: options&.delete(:visitor),
87
+ ns: options&.delete(:ns),
88
+ cache: true
89
+ )
90
+ unless options.nil?
91
+ warn("Nokogiri::CSS.xpath_for: Passing options as an explicit hash is deprecated. Use keyword arguments instead. This will become an error in a future release.", uplevel: 1, category: :deprecated)
92
+ end
93
+
94
+ raise(TypeError, "no implicit conversion of #{selector.inspect} to String") unless selector.respond_to?(:to_str)
44
95
 
45
96
  selector = selector.to_str
46
- raise Nokogiri::CSS::SyntaxError, "empty CSS selector" if selector.empty?
97
+ raise(Nokogiri::CSS::SyntaxError, "empty CSS selector") if selector.empty?
98
+
99
+ if visitor
100
+ raise ArgumentError, "cannot provide both :prefix and :visitor" if prefix
101
+ raise ArgumentError, "cannot provide both :ns and :visitor" if ns
102
+ end
103
+
104
+ visitor ||= begin
105
+ visitor_kw = {}
106
+ visitor_kw[:prefix] = prefix if prefix
107
+ visitor_kw[:namespaces] = ns if ns
47
108
 
48
- prefix = options.fetch(:prefix, Nokogiri::XML::XPath::GLOBAL_SEARCH_PREFIX)
49
- visitor = options.fetch(:visitor) { Nokogiri::CSS::XPathVisitor.new }
50
- ns = options.fetch(:ns, {})
109
+ Nokogiri::CSS::XPathVisitor.new(**visitor_kw)
110
+ end
51
111
 
52
- Parser.new(ns).xpath_for(selector, prefix, visitor)
112
+ if cache
113
+ key = SelectorCache.key(selector: selector, visitor: visitor)
114
+ SelectorCache[key] ||= Parser.new.xpath_for(selector, visitor)
115
+ else
116
+ Parser.new.xpath_for(selector, visitor)
117
+ end
53
118
  end
54
119
  end
55
120
  end
56
121
  end
57
122
 
123
+ require_relative "css/selector_cache"
58
124
  require_relative "css/node"
59
125
  require_relative "css/xpath_visitor"
60
126
  x = $-w
@@ -23,11 +23,9 @@ module Nokogiri
23
23
  list = xpath("#{XPATH_PREFIX}#{name}[#{conds}]")
24
24
  end
25
25
  else
26
- CSS::Parser.without_cache do
27
- list = xpath(
28
- *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX),
29
- )
30
- end
26
+ list = xpath(
27
+ *CSS.xpath_for("#{name}#{args.first}", prefix: XPATH_PREFIX, cache: false),
28
+ )
31
29
  end
32
30
 
33
31
  super if list.empty?
@@ -6,9 +6,9 @@ module Nokogiri
6
6
  # Popular encoding aliases not known by all iconv implementations that Nokogiri should support.
7
7
  USEFUL_ALIASES = {
8
8
  # alias_name => true_name
9
- "NOKOGIRI-SENTINEL" => "UTF-8", # indicating the Nokogiri has installed aliases
9
+ "ISO-2022-JP" => "ISO-2022-JP", # only for JRuby tests, this is a no-op in CRuby
10
+ "NOKOGIRI-SENTINEL" => "ISO-2022-JP", # indicating the Nokogiri has installed aliases
10
11
  "Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
11
- "UTF-8" => "UTF-8", # for JRuby tests, this is a no-op in CRuby
12
12
  }
13
13
 
14
14
  class << self
@@ -161,52 +161,73 @@ module Nokogiri
161
161
  end
162
162
 
163
163
  class << self
164
- ###
165
- # Parse HTML. +string_or_io+ may be a String, or any object that
166
- # responds to _read_ and _close_ such as an IO, or StringIO.
167
- # +url+ is resource where this document is located. +encoding+ is the
168
- # encoding that should be used when processing the document. +options+
169
- # is a number that sets options in the parser, such as
170
- # Nokogiri::XML::ParseOptions::RECOVER. See the constants in
171
- # Nokogiri::XML::ParseOptions.
172
- def parse(string_or_io, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML)
164
+ # :call-seq:
165
+ # parse(input) { |options| ... } => Nokogiri::HTML4::Document
166
+ # parse(input, url:, encoding:, options:) => Nokogiri::HTML4::Document
167
+ #
168
+ # Parse \HTML4 input from a String or IO object, and return a new HTML4::Document.
169
+ #
170
+ # [Required Parameters]
171
+ # - +input+ (String | IO) The content to be parsed.
172
+ #
173
+ # [Optional Keyword Arguments]
174
+ # - +url:+ (String) The base URI for this document.
175
+ #
176
+ # - +encoding:+ (String) The name of the encoding that should be used when processing the
177
+ # document. When not provided, the encoding will be determined based on the document
178
+ # content.
179
+ #
180
+ # - +options:+ (Nokogiri::XML::ParseOptions) Configuration object that determines some
181
+ # behaviors during parsing. See ParseOptions for more information. The default value is
182
+ # +ParseOptions::DEFAULT_HTML+.
183
+ #
184
+ # [Yields]
185
+ # If a block is given, a Nokogiri::XML::ParseOptions object is yielded to the block which
186
+ # can be configured before parsing. See Nokogiri::XML::ParseOptions for more information.
187
+ #
188
+ # [Returns] Nokogiri::HTML4::Document
189
+ def parse(
190
+ input,
191
+ url_ = nil, encoding_ = nil, options_ = XML::ParseOptions::DEFAULT_HTML,
192
+ url: url_, encoding: encoding_, options: options_
193
+ )
173
194
  options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
174
195
  yield options if block_given?
175
196
 
176
- url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
197
+ url ||= input.respond_to?(:path) ? input.path : nil
177
198
 
178
- if string_or_io.respond_to?(:encoding)
179
- unless string_or_io.encoding == Encoding::ASCII_8BIT
180
- encoding ||= string_or_io.encoding.name
199
+ if input.respond_to?(:encoding)
200
+ unless input.encoding == Encoding::ASCII_8BIT
201
+ encoding ||= input.encoding.name
181
202
  end
182
203
  end
183
204
 
184
- if string_or_io.respond_to?(:read)
185
- if string_or_io.is_a?(Pathname)
205
+ if input.respond_to?(:read)
206
+ if input.is_a?(Pathname)
186
207
  # resolve the Pathname to the file and open it as an IO object, see #2110
187
- string_or_io = string_or_io.expand_path.open
188
- url ||= string_or_io.path
208
+ input = input.expand_path.open
209
+ url ||= input.path
189
210
  end
190
211
 
191
212
  unless encoding
192
- string_or_io = EncodingReader.new(string_or_io)
213
+ input = EncodingReader.new(input)
193
214
  begin
194
- return read_io(string_or_io, url, encoding, options.to_i)
215
+ return read_io(input, url, encoding, options.to_i)
195
216
  rescue EncodingReader::EncodingFound => e
196
217
  encoding = e.found_encoding
197
218
  end
198
219
  end
199
- return read_io(string_or_io, url, encoding, options.to_i)
220
+ return read_io(input, url, encoding, options.to_i)
200
221
  end
201
222
 
202
223
  # read_memory pukes on empty docs
203
- if string_or_io.nil? || string_or_io.empty?
224
+ if input.nil? || input.empty?
204
225
  return encoding ? new.tap { |i| i.encoding = encoding } : new
205
226
  end
206
227
 
207
- encoding ||= EncodingReader.detect_encoding(string_or_io)
228
+ encoding ||= EncodingReader.detect_encoding(input)
208
229
 
209
- read_memory(string_or_io, url, encoding, options.to_i)
230
+ read_memory(input, url, encoding, options.to_i)
210
231
  end
211
232
  end
212
233
  end