nokogiri 1.12.5 → 1.13.8

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (113) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +2 -0
  3. data/README.md +9 -7
  4. data/bin/nokogiri +63 -50
  5. data/dependencies.yml +13 -64
  6. data/ext/nokogiri/extconf.rb +66 -44
  7. data/ext/nokogiri/gumbo.c +1 -1
  8. data/ext/nokogiri/html4_sax_parser_context.c +2 -3
  9. data/ext/nokogiri/nokogiri.h +8 -0
  10. data/ext/nokogiri/xml_attr.c +2 -2
  11. data/ext/nokogiri/xml_attribute_decl.c +3 -3
  12. data/ext/nokogiri/xml_cdata.c +1 -1
  13. data/ext/nokogiri/xml_document.c +36 -36
  14. data/ext/nokogiri/xml_document_fragment.c +0 -2
  15. data/ext/nokogiri/xml_dtd.c +10 -10
  16. data/ext/nokogiri/xml_element_decl.c +3 -3
  17. data/ext/nokogiri/xml_encoding_handler.c +25 -11
  18. data/ext/nokogiri/xml_entity_decl.c +5 -5
  19. data/ext/nokogiri/xml_node.c +707 -381
  20. data/ext/nokogiri/xml_node_set.c +4 -4
  21. data/ext/nokogiri/xml_reader.c +88 -11
  22. data/ext/nokogiri/xml_sax_parser_context.c +10 -3
  23. data/ext/nokogiri/xml_schema.c +3 -3
  24. data/ext/nokogiri/xml_text.c +1 -1
  25. data/ext/nokogiri/xml_xpath_context.c +73 -50
  26. data/ext/nokogiri/xslt_stylesheet.c +107 -9
  27. data/gumbo-parser/src/parser.c +0 -11
  28. data/lib/nokogiri/class_resolver.rb +67 -0
  29. data/lib/nokogiri/css/node.rb +9 -8
  30. data/lib/nokogiri/css/parser.rb +360 -341
  31. data/lib/nokogiri/css/parser.y +249 -244
  32. data/lib/nokogiri/css/parser_extras.rb +22 -20
  33. data/lib/nokogiri/css/syntax_error.rb +1 -0
  34. data/lib/nokogiri/css/tokenizer.rb +4 -3
  35. data/lib/nokogiri/css/tokenizer.rex +3 -2
  36. data/lib/nokogiri/css/xpath_visitor.rb +179 -82
  37. data/lib/nokogiri/css.rb +38 -6
  38. data/lib/nokogiri/decorators/slop.rb +8 -7
  39. data/lib/nokogiri/extension.rb +1 -1
  40. data/lib/nokogiri/gumbo.rb +1 -0
  41. data/lib/nokogiri/html.rb +16 -10
  42. data/lib/nokogiri/html4/builder.rb +1 -0
  43. data/lib/nokogiri/html4/document.rb +88 -77
  44. data/lib/nokogiri/html4/document_fragment.rb +11 -7
  45. data/lib/nokogiri/html4/element_description.rb +1 -0
  46. data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
  47. data/lib/nokogiri/html4/entity_lookup.rb +2 -1
  48. data/lib/nokogiri/html4/sax/parser.rb +5 -2
  49. data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
  50. data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
  51. data/lib/nokogiri/html4.rb +11 -5
  52. data/lib/nokogiri/html5/document.rb +27 -10
  53. data/lib/nokogiri/html5/document_fragment.rb +5 -2
  54. data/lib/nokogiri/html5/node.rb +10 -3
  55. data/lib/nokogiri/html5.rb +69 -64
  56. data/lib/nokogiri/jruby/dependencies.rb +10 -9
  57. data/lib/nokogiri/syntax_error.rb +1 -0
  58. data/lib/nokogiri/version/constant.rb +2 -1
  59. data/lib/nokogiri/version/info.rb +20 -13
  60. data/lib/nokogiri/version.rb +1 -0
  61. data/lib/nokogiri/xml/attr.rb +5 -3
  62. data/lib/nokogiri/xml/attribute_decl.rb +2 -1
  63. data/lib/nokogiri/xml/builder.rb +34 -32
  64. data/lib/nokogiri/xml/cdata.rb +2 -1
  65. data/lib/nokogiri/xml/character_data.rb +1 -0
  66. data/lib/nokogiri/xml/document.rb +144 -103
  67. data/lib/nokogiri/xml/document_fragment.rb +41 -38
  68. data/lib/nokogiri/xml/dtd.rb +3 -2
  69. data/lib/nokogiri/xml/element_content.rb +1 -0
  70. data/lib/nokogiri/xml/element_decl.rb +2 -1
  71. data/lib/nokogiri/xml/entity_decl.rb +3 -2
  72. data/lib/nokogiri/xml/entity_reference.rb +1 -0
  73. data/lib/nokogiri/xml/namespace.rb +2 -0
  74. data/lib/nokogiri/xml/node/save_options.rb +8 -4
  75. data/lib/nokogiri/xml/node.rb +521 -351
  76. data/lib/nokogiri/xml/node_set.rb +50 -54
  77. data/lib/nokogiri/xml/notation.rb +12 -0
  78. data/lib/nokogiri/xml/parse_options.rb +12 -7
  79. data/lib/nokogiri/xml/pp/character_data.rb +8 -6
  80. data/lib/nokogiri/xml/pp/node.rb +24 -26
  81. data/lib/nokogiri/xml/pp.rb +1 -0
  82. data/lib/nokogiri/xml/processing_instruction.rb +2 -1
  83. data/lib/nokogiri/xml/reader.rb +20 -24
  84. data/lib/nokogiri/xml/relax_ng.rb +1 -0
  85. data/lib/nokogiri/xml/sax/document.rb +20 -19
  86. data/lib/nokogiri/xml/sax/parser.rb +37 -34
  87. data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
  88. data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
  89. data/lib/nokogiri/xml/sax.rb +1 -0
  90. data/lib/nokogiri/xml/schema.rb +7 -6
  91. data/lib/nokogiri/xml/searchable.rb +93 -62
  92. data/lib/nokogiri/xml/syntax_error.rb +5 -4
  93. data/lib/nokogiri/xml/text.rb +1 -0
  94. data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
  95. data/lib/nokogiri/xml/xpath.rb +12 -0
  96. data/lib/nokogiri/xml/xpath_context.rb +2 -3
  97. data/lib/nokogiri/xml.rb +4 -3
  98. data/lib/nokogiri/xslt/stylesheet.rb +1 -0
  99. data/lib/nokogiri/xslt.rb +21 -13
  100. data/lib/nokogiri.rb +19 -16
  101. data/lib/xsd/xmlparser/nokogiri.rb +25 -24
  102. data/patches/libxml2/0004-use-glibc-strlen.patch +3 -3
  103. data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2443 -1914
  104. data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
  105. data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
  106. data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
  107. data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
  108. data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
  109. metadata +104 -32
  110. data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
  111. data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
  112. data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
  113. data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  ###
@@ -65,7 +66,7 @@ module Nokogiri
65
66
  class Document
66
67
  ###
67
68
  # Called when an XML declaration is parsed
68
- def xmldecl version, encoding, standalone
69
+ def xmldecl(version, encoding, standalone)
69
70
  end
70
71
 
71
72
  ###
@@ -83,13 +84,13 @@ module Nokogiri
83
84
  # * +name+ is the name of the tag
84
85
  # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
85
86
  # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
86
- def start_element name, attrs = []
87
+ def start_element(name, attrs = [])
87
88
  end
88
89
 
89
90
  ###
90
91
  # Called at the end of an element
91
92
  # +name+ is the tag name
92
- def end_element name
93
+ def end_element(name)
93
94
  end
94
95
 
95
96
  ###
@@ -99,16 +100,16 @@ module Nokogiri
99
100
  # +prefix+ is the namespace prefix for the element
100
101
  # +uri+ is the associated namespace URI
101
102
  # +ns+ is a hash of namespace prefix:urls associated with the element
102
- def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
103
+ def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
103
104
  ###
104
105
  # Deal with SAX v1 interface
105
- name = [prefix, name].compact.join(':')
106
- attributes = ns.map { |ns_prefix,ns_uri|
107
- [['xmlns', ns_prefix].compact.join(':'), ns_uri]
108
- } + attrs.map { |attr|
109
- [[attr.prefix, attr.localname].compact.join(':'), attr.value]
110
- }
111
- start_element name, attributes
106
+ name = [prefix, name].compact.join(":")
107
+ attributes = ns.map do |ns_prefix, ns_uri|
108
+ [["xmlns", ns_prefix].compact.join(":"), ns_uri]
109
+ end + attrs.map do |attr|
110
+ [[attr.prefix, attr.localname].compact.join(":"), attr.value]
111
+ end
112
+ start_element(name, attributes)
112
113
  end
113
114
 
114
115
  ###
@@ -116,10 +117,10 @@ module Nokogiri
116
117
  # +name+ is the element's name
117
118
  # +prefix+ is the namespace prefix associated with the element
118
119
  # +uri+ is the associated namespace URI
119
- def end_element_namespace name, prefix = nil, uri = nil
120
+ def end_element_namespace(name, prefix = nil, uri = nil)
120
121
  ###
121
122
  # Deal with SAX v1 interface
122
- end_element [prefix, name].compact.join(':')
123
+ end_element([prefix, name].compact.join(":"))
123
124
  end
124
125
 
125
126
  ###
@@ -127,38 +128,38 @@ module Nokogiri
127
128
  # times given one contiguous string of characters.
128
129
  #
129
130
  # +string+ contains the character data
130
- def characters string
131
+ def characters(string)
131
132
  end
132
133
 
133
134
  ###
134
135
  # Called when comments are encountered
135
136
  # +string+ contains the comment data
136
- def comment string
137
+ def comment(string)
137
138
  end
138
139
 
139
140
  ###
140
141
  # Called on document warnings
141
142
  # +string+ contains the warning
142
- def warning string
143
+ def warning(string)
143
144
  end
144
145
 
145
146
  ###
146
147
  # Called on document errors
147
148
  # +string+ contains the error
148
- def error string
149
+ def error(string)
149
150
  end
150
151
 
151
152
  ###
152
153
  # Called when cdata blocks are found
153
154
  # +string+ contains the cdata content
154
- def cdata_block string
155
+ def cdata_block(string)
155
156
  end
156
157
 
157
158
  ###
158
159
  # Called when processing instructions are found
159
160
  # +name+ is the target of the instruction
160
161
  # +content+ is the value of the instruction
161
- def processing_instruction name, content
162
+ def processing_instruction(name, content)
162
163
  end
163
164
  end
164
165
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module SAX
@@ -36,29 +37,29 @@ module Nokogiri
36
37
 
37
38
  # Encodinds this parser supports
38
39
  ENCODINGS = {
39
- 'NONE' => 0, # No char encoding detected
40
- 'UTF-8' => 1, # UTF-8
41
- 'UTF16LE' => 2, # UTF-16 little endian
42
- 'UTF16BE' => 3, # UTF-16 big endian
43
- 'UCS4LE' => 4, # UCS-4 little endian
44
- 'UCS4BE' => 5, # UCS-4 big endian
45
- 'EBCDIC' => 6, # EBCDIC uh!
46
- 'UCS4-2143' => 7, # UCS-4 unusual ordering
47
- 'UCS4-3412' => 8, # UCS-4 unusual ordering
48
- 'UCS2' => 9, # UCS-2
49
- 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
50
- 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
51
- 'ISO-8859-3' => 12, # ISO-8859-3
52
- 'ISO-8859-4' => 13, # ISO-8859-4
53
- 'ISO-8859-5' => 14, # ISO-8859-5
54
- 'ISO-8859-6' => 15, # ISO-8859-6
55
- 'ISO-8859-7' => 16, # ISO-8859-7
56
- 'ISO-8859-8' => 17, # ISO-8859-8
57
- 'ISO-8859-9' => 18, # ISO-8859-9
58
- 'ISO-2022-JP' => 19, # ISO-2022-JP
59
- 'SHIFT-JIS' => 20, # Shift_JIS
60
- 'EUC-JP' => 21, # EUC-JP
61
- 'ASCII' => 22, # pure ASCII
40
+ "NONE" => 0, # No char encoding detected
41
+ "UTF-8" => 1, # UTF-8
42
+ "UTF16LE" => 2, # UTF-16 little endian
43
+ "UTF16BE" => 3, # UTF-16 big endian
44
+ "UCS4LE" => 4, # UCS-4 little endian
45
+ "UCS4BE" => 5, # UCS-4 big endian
46
+ "EBCDIC" => 6, # EBCDIC uh!
47
+ "UCS4-2143" => 7, # UCS-4 unusual ordering
48
+ "UCS4-3412" => 8, # UCS-4 unusual ordering
49
+ "UCS2" => 9, # UCS-2
50
+ "ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
51
+ "ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
52
+ "ISO-8859-3" => 12, # ISO-8859-3
53
+ "ISO-8859-4" => 13, # ISO-8859-4
54
+ "ISO-8859-5" => 14, # ISO-8859-5
55
+ "ISO-8859-6" => 15, # ISO-8859-6
56
+ "ISO-8859-7" => 16, # ISO-8859-7
57
+ "ISO-8859-8" => 17, # ISO-8859-8
58
+ "ISO-8859-9" => 18, # ISO-8859-9
59
+ "ISO-2022-JP" => 19, # ISO-2022-JP
60
+ "SHIFT-JIS" => 20, # Shift_JIS
61
+ "EUC-JP" => 21, # EUC-JP
62
+ "ASCII" => 22, # pure ASCII
62
63
  }
63
64
 
64
65
  # The Nokogiri::XML::SAX::Document where events will be sent.
@@ -68,7 +69,7 @@ module Nokogiri
68
69
  attr_accessor :encoding
69
70
 
70
71
  # Create a new Parser with +doc+ and +encoding+
71
- def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
72
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
72
73
  @encoding = check_encoding(encoding)
73
74
  @document = doc
74
75
  @warned = false
@@ -77,7 +78,7 @@ module Nokogiri
77
78
  ###
78
79
  # Parse given +thing+ which may be a string containing xml, or an
79
80
  # IO object.
80
- def parse thing, &block
81
+ def parse(thing, &block)
81
82
  if thing.respond_to?(:read) && thing.respond_to?(:close)
82
83
  parse_io(thing, &block)
83
84
  else
@@ -87,34 +88,36 @@ module Nokogiri
87
88
 
88
89
  ###
89
90
  # Parse given +io+
90
- def parse_io io, encoding = 'ASCII'
91
+ def parse_io(io, encoding = "ASCII")
91
92
  @encoding = check_encoding(encoding)
92
93
  ctx = ParserContext.io(io, ENCODINGS[@encoding])
93
94
  yield ctx if block_given?
94
- ctx.parse_with self
95
+ ctx.parse_with(self)
95
96
  end
96
97
 
97
98
  ###
98
99
  # Parse a file with +filename+
99
- def parse_file filename
100
+ def parse_file(filename)
100
101
  raise ArgumentError unless filename
101
102
  raise Errno::ENOENT unless File.exist?(filename)
102
103
  raise Errno::EISDIR if File.directory?(filename)
103
- ctx = ParserContext.file filename
104
+
105
+ ctx = ParserContext.file(filename)
104
106
  yield ctx if block_given?
105
- ctx.parse_with self
107
+ ctx.parse_with(self)
106
108
  end
107
109
 
108
- def parse_memory data
109
- ctx = ParserContext.memory data
110
+ def parse_memory(data)
111
+ ctx = ParserContext.memory(data)
110
112
  yield ctx if block_given?
111
- ctx.parse_with self
113
+ ctx.parse_with(self)
112
114
  end
113
115
 
114
116
  private
117
+
115
118
  def check_encoding(encoding)
116
119
  encoding.upcase.tap do |enc|
117
- raise ArgumentError.new("'#{enc}' is not a valid encoding") unless ENCODINGS[enc]
120
+ raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
118
121
  end
119
122
  end
120
123
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module SAX
@@ -7,9 +8,12 @@ module Nokogiri
7
8
  # by the user. Instead, you should be looking at
8
9
  # Nokogiri::XML::SAX::Parser
9
10
  class ParserContext
10
- def self.new thing, encoding = 'UTF-8'
11
- [:read, :close].all? { |x| thing.respond_to?(x) } ?
12
- io(thing, Parser::ENCODINGS[encoding]) : memory(thing)
11
+ def self.new(thing, encoding = "UTF-8")
12
+ if [:read, :close].all? { |x| thing.respond_to?(x) }
13
+ io(thing, Parser::ENCODINGS[encoding])
14
+ else
15
+ memory(thing)
16
+ end
13
17
  end
14
18
  end
15
19
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  module SAX
@@ -24,7 +25,6 @@ module Nokogiri
24
25
  # parser << "/div>"
25
26
  # parser.finish
26
27
  class PushParser
27
-
28
28
  # The Nokogiri::XML::SAX::Document on which the PushParser will be
29
29
  # operating
30
30
  attr_accessor :document
@@ -32,7 +32,7 @@ module Nokogiri
32
32
  ###
33
33
  # Create a new PushParser with +doc+ as the SAX Document, providing
34
34
  # an optional +file_name+ and +encoding+
35
- def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
35
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
36
36
  @document = doc
37
37
  @encoding = encoding
38
38
  @sax_parser = XML::SAX::Parser.new(doc)
@@ -44,16 +44,16 @@ module Nokogiri
44
44
  ###
45
45
  # Write a +chunk+ of XML to the PushParser. Any callback methods
46
46
  # that can be called will be called immediately.
47
- def write chunk, last_chunk = false
47
+ def write(chunk, last_chunk = false)
48
48
  native_write(chunk, last_chunk)
49
49
  end
50
- alias :<< :write
50
+ alias_method :<<, :write
51
51
 
52
52
  ###
53
53
  # Finish the parsing. This method is only necessary for
54
54
  # Nokogiri::XML::SAX::Document#end_document to be called.
55
55
  def finish
56
- write '', true
56
+ write("", true)
57
57
  end
58
58
  end
59
59
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  require_relative "sax/document"
3
4
  require_relative "sax/parser_context"
4
5
  require_relative "sax/parser"
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class << self
@@ -42,7 +43,7 @@ module Nokogiri
42
43
  ###
43
44
  # Create a new Nokogiri::XML::Schema object using a +string_or_io+
44
45
  # object.
45
- def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
46
+ def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
46
47
  from_document(Nokogiri::XML(string_or_io), options)
47
48
  end
48
49
 
@@ -51,9 +52,9 @@ module Nokogiri
51
52
  # Nokogiri::XML::Document object, or a filename. An Array of
52
53
  # Nokogiri::XML::SyntaxError objects found while validating the
53
54
  # +thing+ is returned.
54
- def validate thing
55
- if thing.is_a?(Nokogiri::XML::Document)
56
- validate_document(thing)
55
+ def validate(thing)
56
+ if thing.is_a?(Nokogiri::XML::Document)
57
+ validate_document(thing)
57
58
  elsif File.file?(thing)
58
59
  validate_file(thing)
59
60
  else
@@ -64,8 +65,8 @@ module Nokogiri
64
65
  ###
65
66
  # Returns true if +thing+ is a valid Nokogiri::XML::Document or
66
67
  # file.
67
- def valid? thing
68
- validate(thing).length == 0
68
+ def valid?(thing)
69
+ validate(thing).empty?
69
70
  end
70
71
  end
71
72
  end
@@ -1,22 +1,25 @@
1
+ # coding: utf-8
1
2
  # frozen_string_literal: true
3
+
2
4
  module Nokogiri
3
5
  module XML
4
6
  #
5
7
  # The Searchable module declares the interface used for searching your DOM.
6
8
  #
7
- # It implements the public methods `search`, `css`, and `xpath`,
9
+ # It implements the public methods #search, #css, and #xpath,
8
10
  # as well as allowing specific implementations to specialize some
9
11
  # of the important behaviors.
10
12
  #
11
13
  module Searchable
12
14
  # Regular expression used by Searchable#search to determine if a query
13
15
  # string is CSS or XPath
14
- LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
16
+ LOOKS_LIKE_XPATH = %r{^(\./|/|\.\.|\.$)}
15
17
 
16
- # @!group Searching via XPath or CSS Queries
18
+ # :section: Searching via XPath or CSS Queries
17
19
 
18
20
  ###
19
- # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
21
+ # call-seq:
22
+ # search(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
20
23
  #
21
24
  # Search this object for +paths+. +paths+ must be one or more XPath or CSS queries:
22
25
  #
@@ -27,41 +30,39 @@ module Nokogiri
27
30
  # node.search('.//bike:tire', {'bike' => 'http://schwinn.com/'})
28
31
  # node.search('bike|tire', {'bike' => 'http://schwinn.com/'})
29
32
  #
30
- # For XPath queries, a hash of variable bindings may also be
31
- # appended to the namespace bindings. For example:
33
+ # For XPath queries, a hash of variable bindings may also be appended to the namespace
34
+ # bindings. For example:
32
35
  #
33
36
  # node.search('.//address[@domestic=$value]', nil, {:value => 'Yes'})
34
37
  #
35
- # Custom XPath functions and CSS pseudo-selectors may also be
36
- # defined. To define custom functions create a class and
37
- # implement the function you want to define. The first argument
38
- # to the method will be the current matching NodeSet. Any other
39
- # arguments are ones that you pass in. Note that this class may
40
- # appear anywhere in the argument list. For example:
41
- #
42
- # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")'
43
- # Class.new {
44
- # def regex node_set, regex
45
- # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
- # end
47
- # }.new
48
- # )
38
+ # 💡 Custom XPath functions and CSS pseudo-selectors may also be defined. To define custom
39
+ # functions create a class and implement the function you want to define. The first argument
40
+ # to the method will be the current matching NodeSet. Any other arguments are ones that you
41
+ # pass in. Note that this class may appear anywhere in the argument list. For example:
42
+ #
43
+ # handler = Class.new {
44
+ # def regex node_set, regex
45
+ # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
46
+ # end
47
+ # }.new
48
+ # node.search('.//title[regex(., "\w+")]', 'div.employee:regex("[0-9]+")', handler)
49
49
  #
50
50
  # See Searchable#xpath and Searchable#css for further usage help.
51
51
  def search(*args)
52
52
  paths, handler, ns, binds = extract_params(args)
53
53
 
54
54
  xpaths = paths.map(&:to_s).map do |path|
55
- (path =~ LOOKS_LIKE_XPATH) ? path : xpath_query_from_css_rule(path, ns)
55
+ LOOKS_LIKE_XPATH.match?(path) ? path : xpath_query_from_css_rule(path, ns)
56
56
  end.flatten.uniq
57
57
 
58
58
  xpath(*(xpaths + [ns, handler, binds].compact))
59
59
  end
60
60
 
61
- alias :/ :search
61
+ alias_method :/, :search
62
62
 
63
63
  ###
64
- # call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
64
+ # call-seq:
65
+ # at(*paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class])
65
66
  #
66
67
  # Search this object for +paths+, and return only the first
67
68
  # result. +paths+ must be one or more XPath or CSS queries.
@@ -71,10 +72,11 @@ module Nokogiri
71
72
  search(*args).first
72
73
  end
73
74
 
74
- alias :% :at
75
+ alias_method :%, :at
75
76
 
76
77
  ###
77
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
78
+ # call-seq:
79
+ # css(*rules, [namespace-bindings, custom-pseudo-class])
78
80
  #
79
81
  # Search this object for CSS +rules+. +rules+ must be one or more CSS
80
82
  # selectors. For example:
@@ -87,33 +89,49 @@ module Nokogiri
87
89
  #
88
90
  # node.css('bike|tire', {'bike' => 'http://schwinn.com/'})
89
91
  #
90
- # Custom CSS pseudo classes may also be defined. To define
91
- # custom pseudo classes, create a class and implement the custom
92
- # pseudo class you want defined. The first argument to the
93
- # method will be the current matching NodeSet. Any other
94
- # arguments are ones that you pass in. For example:
92
+ # 💡 Custom CSS pseudo classes may also be defined which are mapped to a custom XPath
93
+ # function. To define custom pseudo classes, create a class and implement the custom pseudo
94
+ # class you want defined. The first argument to the method will be the matching context
95
+ # NodeSet. Any other arguments are ones that you pass in. For example:
95
96
  #
96
- # node.css('title:regex("\w+")', Class.new {
97
- # def regex node_set, regex
97
+ # handler = Class.new {
98
+ # def regex(node_set, regex)
98
99
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
99
100
  # end
100
- # }.new)
101
+ # }.new
102
+ # node.css('title:regex("\w+")', handler)
103
+ #
104
+ # 💡 Some XPath syntax is supported in CSS queries. For example, to query for an attribute:
105
+ #
106
+ # node.css('img > @href') # returns all +href+ attributes on an +img+ element
107
+ # node.css('img / @href') # same
108
+ #
109
+ # # ⚠ this returns +class+ attributes from all +div+ elements AND THEIR CHILDREN!
110
+ # node.css('div @class')
101
111
  #
102
- # Note that the CSS query string is case-sensitive with regards
103
- # to your document type. That is, if you're looking for "H1" in
104
- # an HTML document, you'll never find anything, since HTML tags
105
- # will match only lowercase CSS queries. However, "H1" might be
106
- # found in an XML document, where tags names are case-sensitive
107
- # (e.g., "H1" is distinct from "h1").
112
+ # node.css
108
113
  #
114
+ # 💡 Array-like syntax is supported in CSS queries as an alternative to using +:nth-child()+.
115
+ #
116
+ # ⚠ NOTE that indices are 1-based like +:nth-child+ and not 0-based like Ruby Arrays. For
117
+ # example:
118
+ #
119
+ # # equivalent to 'li:nth-child(2)'
120
+ # node.css('li[2]') # retrieve the second li element in a list
121
+ #
122
+ # ⚠ NOTE that the CSS query string is case-sensitive with regards to your document type. HTML
123
+ # tags will match only lowercase CSS queries, so if you search for "H1" in an HTML document,
124
+ # you'll never find anything. However, "H1" might be found in an XML document, where tags
125
+ # names are case-sensitive (e.g., "H1" is distinct from "h1").
109
126
  def css(*args)
110
127
  rules, handler, ns, _ = extract_params(args)
111
128
 
112
- css_internal self, rules, handler, ns
129
+ css_internal(self, rules, handler, ns)
113
130
  end
114
131
 
115
132
  ##
116
- # call-seq: css *rules, [namespace-bindings, custom-pseudo-class]
133
+ # call-seq:
134
+ # at_css(*rules, [namespace-bindings, custom-pseudo-class])
117
135
  #
118
136
  # Search this object for CSS +rules+, and return only the first
119
137
  # match. +rules+ must be one or more CSS selectors.
@@ -124,7 +142,8 @@ module Nokogiri
124
142
  end
125
143
 
126
144
  ###
127
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
145
+ # call-seq:
146
+ # xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
128
147
  #
129
148
  # Search this node for XPath +paths+. +paths+ must be one or more XPath
130
149
  # queries.
@@ -140,27 +159,27 @@ module Nokogiri
140
159
  #
141
160
  # node.xpath('.//address[@domestic=$value]', nil, {:value => 'Yes'})
142
161
  #
143
- # Custom XPath functions may also be defined. To define custom
144
- # functions create a class and implement the function you want
145
- # to define. The first argument to the method will be the
146
- # current matching NodeSet. Any other arguments are ones that
147
- # you pass in. Note that this class may appear anywhere in the
148
- # argument list. For example:
162
+ # 💡 Custom XPath functions may also be defined. To define custom functions create a class and
163
+ # implement the function you want to define. The first argument to the method will be the
164
+ # current matching NodeSet. Any other arguments are ones that you pass in. Note that this
165
+ # class may appear anywhere in the argument list. For example:
149
166
  #
150
- # node.xpath('.//title[regex(., "\w+")]', Class.new {
151
- # def regex node_set, regex
167
+ # handler = Class.new {
168
+ # def regex(node_set, regex)
152
169
  # node_set.find_all { |node| node['some_attribute'] =~ /#{regex}/ }
153
170
  # end
154
- # }.new)
171
+ # }.new
172
+ # node.xpath('.//title[regex(., "\w+")]', handler)
155
173
  #
156
174
  def xpath(*args)
157
175
  paths, handler, ns, binds = extract_params(args)
158
176
 
159
- xpath_internal self, paths, handler, ns, binds
177
+ xpath_internal(self, paths, handler, ns, binds)
160
178
  end
161
179
 
162
180
  ##
163
- # call-seq: xpath *paths, [namespace-bindings, variable-bindings, custom-handler-class]
181
+ # call-seq:
182
+ # at_xpath(*paths, [namespace-bindings, variable-bindings, custom-handler-class])
164
183
  #
165
184
  # Search this node for XPath +paths+, and return only the first
166
185
  # match. +paths+ must be one or more XPath queries.
@@ -170,12 +189,21 @@ module Nokogiri
170
189
  xpath(*args).first
171
190
  end
172
191
 
173
- # @!endgroup
192
+ # :call-seq:
193
+ # >(selector) → NodeSet
194
+ #
195
+ # Search this node's immediate children using CSS selector +selector+
196
+ def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
197
+ ns = (document.root&.namespaces || {})
198
+ xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
199
+ end
200
+
201
+ # :section:
174
202
 
175
203
  private
176
204
 
177
205
  def css_internal(node, rules, handler, ns)
178
- xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
206
+ xpath_internal(node, css_rules_to_xpath(rules, ns), handler, ns, nil)
179
207
  end
180
208
 
181
209
  def xpath_internal(node, paths, handler, ns, binds)
@@ -198,9 +226,9 @@ module Nokogiri
198
226
  ctx.register_namespaces(ns)
199
227
  path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
200
228
 
201
- binds.each do |key, value|
202
- ctx.register_variable key.to_s, value
203
- end if binds
229
+ binds&.each do |key, value|
230
+ ctx.register_variable(key.to_s, value)
231
+ end
204
232
 
205
233
  ctx.evaluate(path, handler)
206
234
  end
@@ -210,10 +238,13 @@ module Nokogiri
210
238
  end
211
239
 
212
240
  def xpath_query_from_css_rule(rule, ns)
213
- visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
241
+ visitor = Nokogiri::CSS::XPathVisitor.new(
242
+ builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
243
+ doctype: document.xpath_doctype,
244
+ )
214
245
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
215
- CSS.xpath_for(rule.to_s, {:prefix => implied_xpath_context, :ns => ns,
216
- :visitor => visitor})
246
+ CSS.xpath_for(rule.to_s, { prefix: implied_xpath_context, ns: ns,
247
+ visitor: visitor, })
217
248
  end.join(" | ")
218
249
  end
219
250
 
@@ -230,7 +261,7 @@ module Nokogiri
230
261
  end
231
262
  ns, binds = hashes.reverse
232
263
 
233
- ns ||= document.root ? document.root.namespaces : {}
264
+ ns ||= (document.root&.namespaces || {})
234
265
 
235
266
  [params, handler, ns, binds]
236
267
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  ###
@@ -42,9 +43,9 @@ module Nokogiri
42
43
 
43
44
  def to_s
44
45
  message = super.chomp
45
- [location_to_s, level_to_s, message].
46
- compact.join(": ").
47
- force_encoding(message.encoding)
46
+ [location_to_s, level_to_s, message]
47
+ .compact.join(": ")
48
+ .force_encoding(message.encoding)
48
49
  end
49
50
 
50
51
  private
@@ -54,7 +55,6 @@ module Nokogiri
54
55
  when 3 then "FATAL"
55
56
  when 2 then "ERROR"
56
57
  when 1 then "WARNING"
57
- else nil
58
58
  end
59
59
  end
60
60
 
@@ -64,6 +64,7 @@ module Nokogiri
64
64
 
65
65
  def location_to_s
66
66
  return nil if nil_or_zero?(line) && nil_or_zero?(column)
67
+
67
68
  "#{line}:#{column}"
68
69
  end
69
70
  end
@@ -1,4 +1,5 @@
1
1
  # frozen_string_literal: true
2
+
2
3
  module Nokogiri
3
4
  module XML
4
5
  class Text < Nokogiri::XML::CharacterData