nokogiri 1.15.4 → 1.17.2

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (98) hide show
  1. checksums.yaml +4 -4
  2. data/Gemfile +12 -19
  3. data/README.md +8 -1
  4. data/dependencies.yml +9 -8
  5. data/ext/nokogiri/extconf.rb +194 -141
  6. data/ext/nokogiri/gumbo.c +69 -53
  7. data/ext/nokogiri/html4_document.c +10 -4
  8. data/ext/nokogiri/html4_element_description.c +18 -18
  9. data/ext/nokogiri/html4_sax_parser.c +40 -0
  10. data/ext/nokogiri/html4_sax_parser_context.c +48 -58
  11. data/ext/nokogiri/html4_sax_push_parser.c +26 -25
  12. data/ext/nokogiri/libxml2_polyfill.c +114 -0
  13. data/ext/nokogiri/nokogiri.c +9 -2
  14. data/ext/nokogiri/nokogiri.h +25 -33
  15. data/ext/nokogiri/test_global_handlers.c +1 -1
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_cdata.c +3 -12
  18. data/ext/nokogiri/xml_comment.c +3 -8
  19. data/ext/nokogiri/xml_document.c +167 -156
  20. data/ext/nokogiri/xml_document_fragment.c +10 -25
  21. data/ext/nokogiri/xml_dtd.c +1 -1
  22. data/ext/nokogiri/xml_element_content.c +9 -9
  23. data/ext/nokogiri/xml_encoding_handler.c +4 -4
  24. data/ext/nokogiri/xml_namespace.c +6 -10
  25. data/ext/nokogiri/xml_node.c +142 -108
  26. data/ext/nokogiri/xml_node_set.c +46 -44
  27. data/ext/nokogiri/xml_reader.c +74 -100
  28. data/ext/nokogiri/xml_relax_ng.c +35 -56
  29. data/ext/nokogiri/xml_sax_parser.c +156 -88
  30. data/ext/nokogiri/xml_sax_parser_context.c +214 -128
  31. data/ext/nokogiri/xml_sax_push_parser.c +69 -50
  32. data/ext/nokogiri/xml_schema.c +51 -87
  33. data/ext/nokogiri/xml_syntax_error.c +19 -11
  34. data/ext/nokogiri/xml_text.c +3 -6
  35. data/ext/nokogiri/xml_xpath_context.c +4 -7
  36. data/ext/nokogiri/xslt_stylesheet.c +16 -11
  37. data/gumbo-parser/Makefile +18 -0
  38. data/gumbo-parser/src/error.c +76 -48
  39. data/gumbo-parser/src/error.h +5 -1
  40. data/gumbo-parser/src/nokogiri_gumbo.h +11 -2
  41. data/gumbo-parser/src/parser.c +64 -23
  42. data/gumbo-parser/src/tokenizer.c +7 -6
  43. data/lib/nokogiri/class_resolver.rb +1 -1
  44. data/lib/nokogiri/css/node.rb +6 -2
  45. data/lib/nokogiri/css/parser.rb +6 -4
  46. data/lib/nokogiri/css/parser.y +2 -2
  47. data/lib/nokogiri/css/parser_extras.rb +6 -66
  48. data/lib/nokogiri/css/selector_cache.rb +38 -0
  49. data/lib/nokogiri/css/tokenizer.rb +4 -4
  50. data/lib/nokogiri/css/tokenizer.rex +9 -8
  51. data/lib/nokogiri/css/xpath_visitor.rb +43 -27
  52. data/lib/nokogiri/css.rb +86 -20
  53. data/lib/nokogiri/decorators/slop.rb +3 -5
  54. data/lib/nokogiri/encoding_handler.rb +2 -2
  55. data/lib/nokogiri/html4/document.rb +45 -24
  56. data/lib/nokogiri/html4/document_fragment.rb +124 -12
  57. data/lib/nokogiri/html4/encoding_reader.rb +2 -2
  58. data/lib/nokogiri/html4/sax/parser.rb +23 -38
  59. data/lib/nokogiri/html4/sax/parser_context.rb +4 -9
  60. data/lib/nokogiri/html4.rb +9 -14
  61. data/lib/nokogiri/html5/builder.rb +40 -0
  62. data/lib/nokogiri/html5/document.rb +61 -30
  63. data/lib/nokogiri/html5/document_fragment.rb +130 -20
  64. data/lib/nokogiri/html5/node.rb +4 -4
  65. data/lib/nokogiri/html5.rb +114 -138
  66. data/lib/nokogiri/version/constant.rb +1 -1
  67. data/lib/nokogiri/version/info.rb +6 -5
  68. data/lib/nokogiri/xml/attr.rb +2 -2
  69. data/lib/nokogiri/xml/builder.rb +8 -1
  70. data/lib/nokogiri/xml/document.rb +74 -31
  71. data/lib/nokogiri/xml/document_fragment.rb +86 -15
  72. data/lib/nokogiri/xml/namespace.rb +1 -2
  73. data/lib/nokogiri/xml/node.rb +113 -35
  74. data/lib/nokogiri/xml/node_set.rb +12 -10
  75. data/lib/nokogiri/xml/parse_options.rb +1 -1
  76. data/lib/nokogiri/xml/pp/node.rb +6 -1
  77. data/lib/nokogiri/xml/reader.rb +51 -17
  78. data/lib/nokogiri/xml/relax_ng.rb +57 -20
  79. data/lib/nokogiri/xml/sax/document.rb +174 -83
  80. data/lib/nokogiri/xml/sax/parser.rb +115 -41
  81. data/lib/nokogiri/xml/sax/parser_context.rb +116 -8
  82. data/lib/nokogiri/xml/sax/push_parser.rb +3 -0
  83. data/lib/nokogiri/xml/sax.rb +48 -0
  84. data/lib/nokogiri/xml/schema.rb +112 -45
  85. data/lib/nokogiri/xml/searchable.rb +9 -11
  86. data/lib/nokogiri/xml/syntax_error.rb +23 -1
  87. data/lib/nokogiri/xml.rb +14 -25
  88. data/lib/nokogiri/xslt/stylesheet.rb +29 -7
  89. data/lib/nokogiri/xslt.rb +4 -10
  90. data/lib/nokogiri.rb +1 -1
  91. data/lib/xsd/xmlparser/nokogiri.rb +3 -4
  92. data/ports/archives/libxml2-2.13.5.tar.xz +0 -0
  93. data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
  94. metadata +15 -14
  95. data/ext/nokogiri/libxml2_backwards_compat.c +0 -121
  96. data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +0 -25
  97. data/ports/archives/libxml2-2.11.5.tar.xz +0 -0
  98. data/ports/archives/libxslt-1.1.38.tar.xz +0 -0
@@ -4,16 +4,15 @@ module Nokogiri
4
4
  module XML
5
5
  module SAX
6
6
  ###
7
- # This parser is a SAX style parser that reads it's input as it
8
- # deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
9
- # an optional encoding, then given an XML input, sends messages to
10
- # the Nokogiri::XML::SAX::Document.
7
+ # This parser is a SAX style parser that reads its input as it deems necessary. The parser
8
+ # takes a Nokogiri::XML::SAX::Document, an optional encoding, then given an XML input, sends
9
+ # messages to the Nokogiri::XML::SAX::Document.
11
10
  #
12
11
  # Here is an example of using this parser:
13
12
  #
14
13
  # # Create a subclass of Nokogiri::XML::SAX::Document and implement
15
14
  # # the events we care about:
16
- # class MyDoc < Nokogiri::XML::SAX::Document
15
+ # class MyHandler < Nokogiri::XML::SAX::Document
17
16
  # def start_element name, attrs = []
18
17
  # puts "starting: #{name}"
19
18
  # end
@@ -23,20 +22,28 @@ module Nokogiri
23
22
  # end
24
23
  # end
25
24
  #
26
- # # Create our parser
27
- # parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
25
+ # parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
28
26
  #
29
- # # Send some XML to the parser
30
- # parser.parse(File.open(ARGV[0]))
27
+ # # Hand an IO object to the parser, which will read the XML from the IO.
28
+ # File.open(path_to_xml) do |f|
29
+ # parser.parse(f)
30
+ # end
31
+ #
32
+ # For more information about \SAX parsers, see Nokogiri::XML::SAX.
33
+ #
34
+ # Also see Nokogiri::XML::SAX::Document for the available events.
35
+ #
36
+ # For \HTML documents, use the subclass Nokogiri::HTML4::SAX::Parser.
31
37
  #
32
- # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
33
- # see Nokogiri::XML::SAX::Document for the available events.
34
38
  class Parser
39
+ # to dynamically resolve ParserContext in inherited methods
40
+ include Nokogiri::ClassResolver
41
+
42
+ # Structure used for marshalling attributes for some callbacks in XML::SAX::Document.
35
43
  class Attribute < Struct.new(:localname, :prefix, :uri, :value)
36
44
  end
37
45
 
38
- # Encodinds this parser supports
39
- ENCODINGS = {
46
+ ENCODINGS = { # :nodoc:
40
47
  "NONE" => 0, # No char encoding detected
41
48
  "UTF-8" => 1, # UTF-8
42
49
  "UTF16LE" => 2, # UTF-16 little endian
@@ -61,6 +68,8 @@ module Nokogiri
61
68
  "EUC-JP" => 21, # EUC-JP
62
69
  "ASCII" => 22, # pure ASCII
63
70
  }
71
+ REVERSE_ENCODINGS = ENCODINGS.invert # :nodoc:
72
+ deprecate_constant :ENCODINGS
64
73
 
65
74
  # The Nokogiri::XML::SAX::Document where events will be sent.
66
75
  attr_accessor :document
@@ -68,57 +77,122 @@ module Nokogiri
68
77
  # The encoding beings used for this document.
69
78
  attr_accessor :encoding
70
79
 
71
- # Create a new Parser with +doc+ and +encoding+
72
- def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
73
- @encoding = check_encoding(encoding)
80
+ ###
81
+ # :call-seq:
82
+ # new SAX::Parser
83
+ # new(handler) ⇒ SAX::Parser
84
+ # new(handler, encoding) ⇒ SAX::Parser
85
+ #
86
+ # Create a new Parser.
87
+ #
88
+ # [Parameters]
89
+ # - +handler+ (optional Nokogiri::XML::SAX::Document) The document that will receive
90
+ # events. Will create a new Nokogiri::XML::SAX::Document if not given, which is accessible
91
+ # through the #document attribute.
92
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
93
+ # parsing the input. (default +nil+ for auto-detection)
94
+ #
95
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = nil)
96
+ @encoding = encoding
74
97
  @document = doc
75
98
  @warned = false
99
+
100
+ initialize_native unless Nokogiri.jruby?
76
101
  end
77
102
 
78
103
  ###
79
- # Parse given +thing+ which may be a string containing xml, or an
80
- # IO object.
81
- def parse(thing, &block)
82
- if thing.respond_to?(:read) && thing.respond_to?(:close)
83
- parse_io(thing, &block)
104
+ # :call-seq:
105
+ # parse(input) { |parser_context| ... }
106
+ #
107
+ # Parse the input, sending events to the SAX::Document at #document.
108
+ #
109
+ # [Parameters]
110
+ # - +input+ (String, IO) The input to parse.
111
+ #
112
+ # If +input+ quacks like a readable IO object, this method forwards to Parser.parse_io,
113
+ # otherwise it forwards to Parser.parse_memory.
114
+ #
115
+ # [Yields]
116
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
117
+ # to set options on the parser context before parsing begins.
118
+ #
119
+ def parse(input, &block)
120
+ if input.respond_to?(:read) && input.respond_to?(:close)
121
+ parse_io(input, &block)
84
122
  else
85
- parse_memory(thing, &block)
123
+ parse_memory(input, &block)
86
124
  end
87
125
  end
88
126
 
89
127
  ###
90
- # Parse given +io+
128
+ # :call-seq:
129
+ # parse_io(io) { |parser_context| ... }
130
+ # parse_io(io, encoding) { |parser_context| ... }
131
+ #
132
+ # Parse an input stream.
133
+ #
134
+ # [Parameters]
135
+ # - +io+ (IO) The readable IO object from which to read input
136
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
137
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
138
+ #
139
+ # [Yields]
140
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
141
+ # to set options on the parser context before parsing begins.
142
+ #
91
143
  def parse_io(io, encoding = @encoding)
92
- ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
144
+ ctx = related_class("ParserContext").io(io, encoding)
93
145
  yield ctx if block_given?
94
146
  ctx.parse_with(self)
95
147
  end
96
148
 
97
149
  ###
98
- # Parse a file with +filename+
99
- def parse_file(filename)
100
- raise ArgumentError unless filename
101
- raise Errno::ENOENT unless File.exist?(filename)
102
- raise Errno::EISDIR if File.directory?(filename)
103
-
104
- ctx = ParserContext.file(filename)
150
+ # :call-seq:
151
+ # parse_memory(input) { |parser_context| ... }
152
+ # parse_memory(input, encoding) { |parser_context| ... }
153
+ #
154
+ # Parse an input string.
155
+ #
156
+ # [Parameters]
157
+ # - +input+ (String) The input string to be parsed.
158
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
159
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
160
+ #
161
+ # [Yields]
162
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
163
+ # to set options on the parser context before parsing begins.
164
+ #
165
+ def parse_memory(input, encoding = @encoding)
166
+ ctx = related_class("ParserContext").memory(input, encoding)
105
167
  yield ctx if block_given?
106
168
  ctx.parse_with(self)
107
169
  end
108
170
 
109
- def parse_memory(data)
110
- ctx = ParserContext.memory(data)
171
+ ###
172
+ # :call-seq:
173
+ # parse_file(filename) { |parser_context| ... }
174
+ # parse_file(filename, encoding) { |parser_context| ... }
175
+ #
176
+ # Parse a file.
177
+ #
178
+ # [Parameters]
179
+ # - +filename+ (String) The path to the file to be parsed.
180
+ # - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
181
+ # parsing the input, or +nil+ for auto-detection. (default #encoding)
182
+ #
183
+ # [Yields]
184
+ # If a block is given, the underlying ParserContext object will be yielded. This can be used
185
+ # to set options on the parser context before parsing begins.
186
+ #
187
+ def parse_file(filename, encoding = @encoding)
188
+ raise ArgumentError, "no filename provided" unless filename
189
+ raise Errno::ENOENT unless File.exist?(filename)
190
+ raise Errno::EISDIR if File.directory?(filename)
191
+
192
+ ctx = related_class("ParserContext").file(filename, encoding)
111
193
  yield ctx if block_given?
112
194
  ctx.parse_with(self)
113
195
  end
114
-
115
- private
116
-
117
- def check_encoding(encoding)
118
- encoding.upcase.tap do |enc|
119
- raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
120
- end
121
- end
122
196
  end
123
197
  end
124
198
  end
@@ -4,15 +4,123 @@ module Nokogiri
4
4
  module XML
5
5
  module SAX
6
6
  ###
7
- # Context for XML SAX parsers. This class is usually not instantiated
8
- # by the user. Instead, you should be looking at
9
- # Nokogiri::XML::SAX::Parser
7
+ # Context object to invoke the XML SAX parser on the SAX::Document handler.
8
+ #
9
+ # 💡 This class is usually not instantiated by the user. Use Nokogiri::XML::SAX::Parser
10
+ # instead.
10
11
  class ParserContext
11
- def self.new(thing, encoding = "UTF-8")
12
- if [:read, :close].all? { |x| thing.respond_to?(x) }
13
- io(thing, Parser::ENCODINGS[encoding])
14
- else
15
- memory(thing)
12
+ class << self
13
+ ###
14
+ # :call-seq:
15
+ # new(input)
16
+ # new(input, encoding)
17
+ #
18
+ # Create a parser context for an IO or a String. This is a shorthand method for
19
+ # ParserContext.io and ParserContext.memory.
20
+ #
21
+ # [Parameters]
22
+ # - +input+ (IO, String) A String or a readable IO object
23
+ # - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
24
+ # encoding to use (default +nil+, encoding will be autodetected)
25
+ #
26
+ # If +input+ quacks like a readable IO object, this method forwards to ParserContext.io,
27
+ # otherwise it forwards to ParserContext.memory.
28
+ #
29
+ # [Returns] Nokogiri::XML::SAX::ParserContext
30
+ #
31
+ def new(input, encoding = nil)
32
+ if [:read, :close].all? { |x| input.respond_to?(x) }
33
+ io(input, encoding)
34
+ else
35
+ memory(input, encoding)
36
+ end
37
+ end
38
+
39
+ ###
40
+ # :call-seq:
41
+ # io(input)
42
+ # io(input, encoding)
43
+ #
44
+ # Create a parser context for an +input+ IO which will assume +encoding+
45
+ #
46
+ # [Parameters]
47
+ # - +io+ (IO) The readable IO object from which to read input
48
+ # - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
49
+ # encoding to use (default +nil+, encoding will be autodetected)
50
+ #
51
+ # [Returns] Nokogiri::XML::SAX::ParserContext
52
+ #
53
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse
54
+ # methods which are more convenient for most use cases.
55
+ #
56
+ def io(input, encoding = nil)
57
+ native_io(input, resolve_encoding(encoding))
58
+ end
59
+
60
+ ###
61
+ # :call-seq:
62
+ # memory(input)
63
+ # memory(input, encoding)
64
+ #
65
+ # Create a parser context for the +input+ String.
66
+ #
67
+ # [Parameters]
68
+ # - +input+ (String) The input string to be parsed.
69
+ # - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
70
+ # use (default +nil+, encoding will be autodetected)
71
+ #
72
+ # [Returns] Nokogiri::XML::SAX::ParserContext
73
+ #
74
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse methods
75
+ # which are more convenient for most use cases.
76
+ #
77
+ def memory(input, encoding = nil)
78
+ native_memory(input, resolve_encoding(encoding))
79
+ end
80
+
81
+ ###
82
+ # :call-seq:
83
+ # file(path)
84
+ # file(path, encoding)
85
+ #
86
+ # Create a parser context for the file at +path+.
87
+ #
88
+ # [Parameters]
89
+ # - +path+ (String) The path to the input file
90
+ # - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
91
+ # use (default +nil+, encoding will be autodetected)
92
+ #
93
+ # [Returns] Nokogiri::XML::SAX::ParserContext
94
+ #
95
+ # 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser.parse_file which
96
+ # is more convenient for most use cases.
97
+ def file(input, encoding = nil)
98
+ native_file(input, resolve_encoding(encoding))
99
+ end
100
+
101
+ private def resolve_encoding(encoding)
102
+ case encoding
103
+ when Encoding
104
+ encoding
105
+
106
+ when nil
107
+ nil # totally fine, parser will guess encoding
108
+
109
+ when Integer
110
+ warn("Passing an integer to Nokogiri::XML::SAX::ParserContext.io is deprecated. Use an Encoding object instead. This will become an error in a future release.", uplevel: 2, category: :deprecated)
111
+
112
+ return nil if encoding == Parser::ENCODINGS["NONE"]
113
+
114
+ encoding = Parser::REVERSE_ENCODINGS[encoding]
115
+ raise ArgumentError, "Invalid libxml2 encoding id #{encoding}" if encoding.nil?
116
+ Encoding.find(encoding)
117
+
118
+ when String
119
+ Encoding.find(encoding)
120
+
121
+ else
122
+ raise ArgumentError, "Cannot resolve #{encoding.inspect} to an Encoding"
123
+ end
16
124
  end
17
125
  end
18
126
  end
@@ -52,6 +52,9 @@ module Nokogiri
52
52
  ###
53
53
  # Finish the parsing. This method is only necessary for
54
54
  # Nokogiri::XML::SAX::Document#end_document to be called.
55
+ #
56
+ # ⚠ Note that empty documents are treated as an error when using the libxml2-based
57
+ # implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby).
55
58
  def finish
56
59
  write("", true)
57
60
  end
@@ -1,5 +1,53 @@
1
1
  # frozen_string_literal: true
2
2
 
3
+ module Nokogiri
4
+ module XML
5
+ ###
6
+ # SAX Parsers are event-driven parsers.
7
+ #
8
+ # Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
9
+ # feels necessary, and a parser that you explicitly feed XML in chunks. If you want to let
10
+ # Nokogiri deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have
11
+ # fine grain control over the XML input, use the Nokogiri::XML::SAX::PushParser.
12
+ #
13
+ # If you want to do SAX style parsing of HTML, check out Nokogiri::HTML4::SAX.
14
+ #
15
+ # The basic way a SAX style parser works is by creating a parser, telling the parser about the
16
+ # events we're interested in, then giving the parser some XML to process. The parser will notify
17
+ # you when it encounters events you said you would like to know about.
18
+ #
19
+ # To register for events, subclass Nokogiri::XML::SAX::Document and implement the methods for
20
+ # which you would like notification.
21
+ #
22
+ # For example, if I want to be notified when a document ends, and when an element starts, I
23
+ # would write a class like this:
24
+ #
25
+ # class MyHandler < Nokogiri::XML::SAX::Document
26
+ # def end_document
27
+ # puts "the document has ended"
28
+ # end
29
+ #
30
+ # def start_element name, attributes = []
31
+ # puts "#{name} started"
32
+ # end
33
+ # end
34
+ #
35
+ # Then I would instantiate a SAX parser with this document, and feed the parser some XML
36
+ #
37
+ # # Create a new parser
38
+ # parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
39
+ #
40
+ # # Feed the parser some XML
41
+ # parser.parse(File.open(ARGV[0]))
42
+ #
43
+ # Now my document handler will be called when each node starts, and when then document ends. To
44
+ # see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
45
+ #
46
+ module SAX
47
+ end
48
+ end
49
+ end
50
+
3
51
  require_relative "sax/document"
4
52
  require_relative "sax/parser_context"
5
53
  require_relative "sax/parser"
@@ -3,70 +3,137 @@
3
3
  module Nokogiri
4
4
  module XML
5
5
  class << self
6
- ###
7
- # Create a new Nokogiri::XML::Schema object using a +string_or_io+
8
- # object.
9
- def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
10
- Schema.new(string_or_io, options)
6
+ # :call-seq:
7
+ # Schema(input) Nokogiri::XML::Schema
8
+ # Schema(input, parse_options) → Nokogiri::XML::Schema
9
+ #
10
+ # Convenience method for Nokogiri::XML::Schema.new
11
+ def Schema(...)
12
+ Schema.new(...)
11
13
  end
12
14
  end
13
15
 
14
- ###
15
- # Nokogiri::XML::Schema is used for validating XML against a schema
16
- # (usually from an xsd file).
16
+ # Nokogiri::XML::Schema is used for validating \XML against an \XSD schema definition.
17
17
  #
18
- # == Synopsis
18
+ # Since v1.11.0, Schema treats inputs as *untrusted* by default, and so external entities are
19
+ # not resolved from the network (+http://+ or +ftp://+). When parsing a trusted document, the
20
+ # caller may turn off the +NONET+ option via the ParseOptions to (re-)enable external entity
21
+ # resolution over a network connection.
19
22
  #
20
- # Validate an XML document against a Schema. Loop over the errors that
21
- # are returned and print them out:
23
+ # 🛡 Before v1.11.0, documents were "trusted" by default during schema parsing which was counter
24
+ # to Nokogiri's "untrusted by default" security policy.
22
25
  #
23
- # xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
24
- # doc = Nokogiri::XML(File.read(PO_XML_FILE))
26
+ # *Example:* Determine whether an \XML document is valid.
25
27
  #
26
- # xsd.validate(doc).each do |error|
27
- # puts error.message
28
- # end
28
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
29
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
30
+ # schema.valid?(doc) # Boolean
29
31
  #
30
- # The list of errors are Nokogiri::XML::SyntaxError objects.
32
+ # *Example:* Validate an \XML document against an \XSD schema, and capture any errors that are found.
33
+ #
34
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
35
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
36
+ # errors = schema.validate(doc) # Array<SyntaxError>
37
+ #
38
+ # *Example:* Validate an \XML document using a Document containing an \XSD schema definition.
39
+ #
40
+ # schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
41
+ # schema = Nokogiri::XML::Schema.from_document(schema_doc)
42
+ # doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
43
+ # schema.valid?(doc) # Boolean
31
44
  #
32
- # NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
33
- # are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
34
- # documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
35
- # security policy. If a document is trusted, then the caller may turn off the NONET option via
36
- # the ParseOptions to re-enable external entity resolution over a network connection.
37
45
  class Schema
38
- # Errors while parsing the schema file
46
+ # The errors found while parsing the \XSD
47
+ #
48
+ # [Returns] Array<Nokogiri::XML::SyntaxError>
39
49
  attr_accessor :errors
40
- # The Nokogiri::XML::ParseOptions used to parse the schema
50
+
51
+ # The options used to parse the schema
52
+ #
53
+ # [Returns] Nokogiri::XML::ParseOptions
41
54
  attr_accessor :parse_options
42
55
 
43
- ###
44
- # Create a new Nokogiri::XML::Schema object using a +string_or_io+
45
- # object.
46
- def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
47
- from_document(Nokogiri::XML(string_or_io), options)
56
+ # :call-seq:
57
+ # new(input) Nokogiri::XML::Schema
58
+ # new(input, parse_options) → Nokogiri::XML::Schema
59
+ #
60
+ # Parse an \XSD schema definition from a String or IO to create a new Nokogiri::XML::Schema
61
+ #
62
+ # [Parameters]
63
+ # - +input+ (String | IO) \XSD schema definition
64
+ # - +parse_options+ (Nokogiri::XML::ParseOptions)
65
+ # Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
66
+ #
67
+ # [Returns] Nokogiri::XML::Schema
68
+ #
69
+ def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, parse_options: parse_options_)
70
+ from_document(Nokogiri::XML::Document.parse(input), parse_options)
71
+ end
72
+
73
+ # :call-seq:
74
+ # read_memory(input) → Nokogiri::XML::Schema
75
+ # read_memory(input, parse_options) → Nokogiri::XML::Schema
76
+ #
77
+ # Convenience method for Nokogiri::XML::Schema.new
78
+ def self.read_memory(...)
79
+ # TODO deprecate this method
80
+ new(...)
48
81
  end
49
82
 
50
- ###
51
- # Validate +thing+ against this schema. +thing+ can be a
52
- # Nokogiri::XML::Document object, or a filename. An Array of
53
- # Nokogiri::XML::SyntaxError objects found while validating the
54
- # +thing+ is returned.
55
- def validate(thing)
56
- if thing.is_a?(Nokogiri::XML::Document)
57
- validate_document(thing)
58
- elsif File.file?(thing)
59
- validate_file(thing)
83
+ #
84
+ # :call-seq: validate(input) Array<SyntaxError>
85
+ #
86
+ # Validate +input+ and return any errors that are found.
87
+ #
88
+ # [Parameters]
89
+ # - +input+ (Nokogiri::XML::Document | String)
90
+ # A parsed document, or a string containing a local filename.
91
+ #
92
+ # [Returns] Array<SyntaxError>
93
+ #
94
+ # *Example:* Validate an existing XML::Document, and capture any errors that are found.
95
+ #
96
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
97
+ # errors = schema.validate(document)
98
+ #
99
+ # *Example:* Validate an \XML document on disk, and capture any errors that are found.
100
+ #
101
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
102
+ # errors = schema.validate("/path/to/file.xml")
103
+ #
104
+ def validate(input)
105
+ if input.is_a?(Nokogiri::XML::Document)
106
+ validate_document(input)
107
+ elsif File.file?(input)
108
+ validate_file(input)
60
109
  else
61
- raise ArgumentError, "Must provide Nokogiri::Xml::Document or the name of an existing file"
110
+ raise ArgumentError, "Must provide Nokogiri::XML::Document or the name of an existing file"
62
111
  end
63
112
  end
64
113
 
65
- ###
66
- # Returns true if +thing+ is a valid Nokogiri::XML::Document or
67
- # file.
68
- def valid?(thing)
69
- validate(thing).empty?
114
+ #
115
+ # :call-seq: valid?(input) Boolean
116
+ #
117
+ # Validate +input+ and return a Boolean indicating whether the document is valid
118
+ #
119
+ # [Parameters]
120
+ # - +input+ (Nokogiri::XML::Document | String)
121
+ # A parsed document, or a string containing a local filename.
122
+ #
123
+ # [Returns] Boolean
124
+ #
125
+ # *Example:* Validate an existing XML::Document
126
+ #
127
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
128
+ # return unless schema.valid?(document)
129
+ #
130
+ # *Example:* Validate an \XML document on disk
131
+ #
132
+ # schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
133
+ # return unless schema.valid?("/path/to/file.xml")
134
+ #
135
+ def valid?(input)
136
+ validate(input).empty?
70
137
  end
71
138
  end
72
139
  end
@@ -199,7 +199,7 @@ module Nokogiri
199
199
  #
200
200
  # Search this node's immediate children using CSS selector +selector+
201
201
  def >(selector) # rubocop:disable Naming/BinaryOperatorParameterName
202
- ns = (document.root&.namespaces || {})
202
+ ns = document.root&.namespaces || {}
203
203
  xpath(CSS.xpath_for(selector, prefix: "./", ns: ns).first)
204
204
  end
205
205
 
@@ -229,7 +229,7 @@ module Nokogiri
229
229
  def xpath_impl(node, path, handler, ns, binds)
230
230
  ctx = XPathContext.new(node)
231
231
  ctx.register_namespaces(ns)
232
- path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
232
+ path = path.gsub("xmlns:", " :") unless Nokogiri.uses_libxml?
233
233
 
234
234
  binds&.each do |key, value|
235
235
  ctx.register_variable(key.to_s, value)
@@ -243,16 +243,14 @@ module Nokogiri
243
243
  end
244
244
 
245
245
  def xpath_query_from_css_rule(rule, ns)
246
- visitor = Nokogiri::CSS::XPathVisitor.new(
247
- builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
248
- doctype: document.xpath_doctype,
249
- )
250
246
  self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
251
- CSS.xpath_for(rule.to_s, {
247
+ visitor = Nokogiri::CSS::XPathVisitor.new(
248
+ builtins: Nokogiri::CSS::XPathVisitor::BuiltinsConfig::OPTIMAL,
249
+ doctype: document.xpath_doctype,
252
250
  prefix: implied_xpath_context,
253
- ns: ns,
254
- visitor: visitor,
255
- })
251
+ namespaces: ns,
252
+ )
253
+ CSS.xpath_for(rule.to_s, visitor: visitor)
256
254
  end.join(" | ")
257
255
  end
258
256
 
@@ -269,7 +267,7 @@ module Nokogiri
269
267
  end
270
268
  ns, binds = hashes.reverse
271
269
 
272
- ns ||= (document.root&.namespaces || {})
270
+ ns ||= document.root&.namespaces || {}
273
271
 
274
272
  [params, handler, ns, binds]
275
273
  end