nokogiri 1.13.10-x86_64-linux → 1.14.0-x86_64-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +33 -0
- data/LICENSE-DEPENDENCIES.md +830 -509
- data/LICENSE.md +1 -1
- data/README.md +18 -11
- data/dependencies.yml +25 -7
- data/ext/nokogiri/extconf.rb +80 -21
- data/ext/nokogiri/gumbo.c +19 -9
- data/ext/nokogiri/html4_document.c +1 -1
- data/ext/nokogiri/html4_entity_lookup.c +1 -1
- data/ext/nokogiri/html4_sax_parser_context.c +0 -5
- data/ext/nokogiri/nokogiri.c +33 -51
- data/ext/nokogiri/nokogiri.h +17 -14
- data/ext/nokogiri/xml_attribute_decl.c +1 -1
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +16 -11
- data/ext/nokogiri/xml_element_content.c +2 -2
- data/ext/nokogiri/xml_element_decl.c +1 -1
- data/ext/nokogiri/xml_encoding_handler.c +2 -2
- data/ext/nokogiri/xml_namespace.c +38 -8
- data/ext/nokogiri/xml_node.c +286 -26
- data/ext/nokogiri/xml_node_set.c +0 -2
- data/ext/nokogiri/xml_reader.c +40 -20
- data/ext/nokogiri/xml_relax_ng.c +0 -2
- data/ext/nokogiri/xml_sax_parser.c +22 -16
- data/ext/nokogiri/xml_sax_parser_context.c +0 -5
- data/ext/nokogiri/xml_sax_push_parser.c +0 -2
- data/ext/nokogiri/xml_schema.c +0 -2
- data/ext/nokogiri/xml_xpath_context.c +87 -83
- data/ext/nokogiri/xslt_stylesheet.c +14 -13
- data/gumbo-parser/Makefile +10 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/css/node.rb +2 -2
- data/lib/nokogiri/css/xpath_visitor.rb +5 -3
- data/lib/nokogiri/css.rb +6 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +3 -2
- data/lib/nokogiri/html4/document.rb +2 -121
- data/lib/nokogiri/html4/element_description_defaults.rb +6 -12
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4.rb +1 -0
- data/lib/nokogiri/html5/document.rb +113 -36
- data/lib/nokogiri/html5/document_fragment.rb +9 -2
- data/lib/nokogiri/html5/node.rb +3 -5
- data/lib/nokogiri/html5.rb +127 -216
- data/lib/nokogiri/jruby/dependencies.rb +1 -19
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -10
- data/lib/nokogiri/xml/attr.rb +49 -0
- data/lib/nokogiri/xml/builder.rb +1 -1
- data/lib/nokogiri/xml/document.rb +102 -54
- data/lib/nokogiri/xml/document_fragment.rb +49 -6
- data/lib/nokogiri/xml/namespace.rb +42 -0
- data/lib/nokogiri/xml/node/save_options.rb +6 -4
- data/lib/nokogiri/xml/node.rb +190 -35
- data/lib/nokogiri/xml/node_set.rb +87 -9
- data/lib/nokogiri/xml/parse_options.rb +129 -50
- data/lib/nokogiri/xml/pp/node.rb +6 -4
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/sax/parser.rb +2 -3
- data/lib/nokogiri/xslt.rb +1 -1
- data/lib/nokogiri.rb +3 -11
- data/lib/xsd/xmlparser/nokogiri.rb +3 -1
- metadata +13 -248
- data/lib/nokogiri/2.6/nokogiri.so +0 -0
@@ -19,63 +19,72 @@ module Nokogiri
|
|
19
19
|
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
20
20
|
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
21
|
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
31
|
-
|
32
|
-
|
33
|
-
|
34
|
-
|
35
|
-
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
22
|
+
class << self
|
23
|
+
# Parse an XML file.
|
24
|
+
#
|
25
|
+
# +string_or_io+ may be a String, or any object that responds to
|
26
|
+
# _read_ and _close_ such as an IO, or StringIO.
|
27
|
+
#
|
28
|
+
# +url+ (optional) is the URI where this document is located.
|
29
|
+
#
|
30
|
+
# +encoding+ (optional) is the encoding that should be used when processing
|
31
|
+
# the document.
|
32
|
+
#
|
33
|
+
# +options+ (optional) is a configuration object that sets options during
|
34
|
+
# parsing, such as Nokogiri::XML::ParseOptions::RECOVER. See the
|
35
|
+
# Nokogiri::XML::ParseOptions for more information.
|
36
|
+
#
|
37
|
+
# +block+ (optional) is passed a configuration object on which
|
38
|
+
# parse options may be set.
|
39
|
+
#
|
40
|
+
# By default, Nokogiri treats documents as untrusted, and so
|
41
|
+
# does not attempt to load DTDs or access the network. See
|
42
|
+
# Nokogiri::XML::ParseOptions for a complete list of options;
|
43
|
+
# and that module's DEFAULT_XML constant for what's set (and not
|
44
|
+
# set) by default.
|
45
|
+
#
|
46
|
+
# Nokogiri.XML() is a convenience method which will call this method.
|
47
|
+
#
|
48
|
+
def parse(string_or_io, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML)
|
49
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
50
|
+
yield options if block_given?
|
51
|
+
|
52
|
+
url ||= string_or_io.respond_to?(:path) ? string_or_io.path : nil
|
53
|
+
|
54
|
+
if empty_doc?(string_or_io)
|
55
|
+
if options.strict?
|
56
|
+
raise Nokogiri::XML::SyntaxError, "Empty document"
|
57
|
+
else
|
58
|
+
return encoding ? new.tap { |i| i.encoding = encoding } : new
|
59
|
+
end
|
60
|
+
end
|
51
61
|
|
52
|
-
|
62
|
+
doc = if string_or_io.respond_to?(:read)
|
63
|
+
if string_or_io.is_a?(Pathname)
|
64
|
+
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
|
+
string_or_io = string_or_io.expand_path.open
|
66
|
+
url ||= string_or_io.path
|
67
|
+
end
|
53
68
|
|
54
|
-
|
55
|
-
if options.strict?
|
56
|
-
raise Nokogiri::XML::SyntaxError, "Empty document"
|
69
|
+
read_io(string_or_io, url, encoding, options.to_i)
|
57
70
|
else
|
58
|
-
|
71
|
+
# read_memory pukes on empty docs
|
72
|
+
read_memory(string_or_io, url, encoding, options.to_i)
|
59
73
|
end
|
60
|
-
end
|
61
74
|
|
62
|
-
|
63
|
-
if
|
64
|
-
# resolve the Pathname to the file and open it as an IO object, see #2110
|
65
|
-
string_or_io = string_or_io.expand_path.open
|
66
|
-
url ||= string_or_io.path
|
67
|
-
end
|
75
|
+
# do xinclude processing
|
76
|
+
doc.do_xinclude(options) if options.xinclude?
|
68
77
|
|
69
|
-
|
70
|
-
else
|
71
|
-
# read_memory pukes on empty docs
|
72
|
-
read_memory(string_or_io, url, encoding, options.to_i)
|
78
|
+
doc
|
73
79
|
end
|
74
80
|
|
75
|
-
|
76
|
-
doc.do_xinclude(options) if options.xinclude?
|
81
|
+
private
|
77
82
|
|
78
|
-
|
83
|
+
def empty_doc?(string_or_io)
|
84
|
+
string_or_io.nil? ||
|
85
|
+
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
86
|
+
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
87
|
+
end
|
79
88
|
end
|
80
89
|
|
81
90
|
##
|
@@ -165,6 +174,7 @@ module Nokogiri
|
|
165
174
|
# Since v1.12.4
|
166
175
|
attr_accessor :namespace_inheritance
|
167
176
|
|
177
|
+
# rubocop:disable Lint/MissingSuper
|
168
178
|
def initialize(*args) # :nodoc:
|
169
179
|
@errors = []
|
170
180
|
@decorators = nil
|
@@ -405,14 +415,52 @@ module Nokogiri
|
|
405
415
|
Nokogiri::CSS::XPathVisitor::DoctypeConfig::XML
|
406
416
|
end
|
407
417
|
|
408
|
-
|
409
|
-
|
410
|
-
|
411
|
-
|
412
|
-
|
413
|
-
|
418
|
+
#
|
419
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
420
|
+
#
|
421
|
+
# Returns a hash describing the Document, to use in pattern matching.
|
422
|
+
#
|
423
|
+
# Valid keys and their values:
|
424
|
+
# - +root+ → (Node, nil) The root node of the Document, or +nil+ if the document is empty.
|
425
|
+
#
|
426
|
+
# In the future, other keys may allow accessing things like doctype and processing
|
427
|
+
# instructions. If you have a use case and would like this functionality, please let us know
|
428
|
+
# by opening an issue or a discussion on the github project.
|
429
|
+
#
|
430
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
431
|
+
#
|
432
|
+
# *Example*
|
433
|
+
#
|
434
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
435
|
+
# <?xml version="1.0"?>
|
436
|
+
# <root>
|
437
|
+
# <child>
|
438
|
+
# </root>
|
439
|
+
# XML
|
440
|
+
#
|
441
|
+
# doc.deconstruct_keys([:root])
|
442
|
+
# # => {:root=>
|
443
|
+
# # #(Element:0x35c {
|
444
|
+
# # name = "root",
|
445
|
+
# # children = [
|
446
|
+
# # #(Text "\n" + " "),
|
447
|
+
# # #(Element:0x370 { name = "child", children = [ #(Text "\n")] }),
|
448
|
+
# # #(Text "\n")]
|
449
|
+
# # })}
|
450
|
+
#
|
451
|
+
# *Example* of an empty document
|
452
|
+
#
|
453
|
+
# doc = Nokogiri::XML::Document.new
|
454
|
+
#
|
455
|
+
# doc.deconstruct_keys([:root])
|
456
|
+
# # => {:root=>nil}
|
457
|
+
#
|
458
|
+
def deconstruct_keys(keys)
|
459
|
+
{ root: root }
|
414
460
|
end
|
415
461
|
|
462
|
+
private
|
463
|
+
|
416
464
|
IMPLIED_XPATH_CONTEXTS = ["//"].freeze # :nodoc:
|
417
465
|
|
418
466
|
def inspect_attributes
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module Nokogiri
|
@@ -66,9 +67,7 @@ module Nokogiri
|
|
66
67
|
def to_html(*args)
|
67
68
|
if Nokogiri.jruby?
|
68
69
|
options = args.first.is_a?(Hash) ? args.shift : {}
|
69
|
-
|
70
|
-
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_HTML
|
71
|
-
end
|
70
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_HTML
|
72
71
|
args.insert(0, options)
|
73
72
|
end
|
74
73
|
children.to_html(*args)
|
@@ -80,9 +79,7 @@ module Nokogiri
|
|
80
79
|
def to_xhtml(*args)
|
81
80
|
if Nokogiri.jruby?
|
82
81
|
options = args.first.is_a?(Hash) ? args.shift : {}
|
83
|
-
|
84
|
-
options[:save_with] = Node::SaveOptions::NO_DECLARATION | Node::SaveOptions::NO_EMPTY_TAGS | Node::SaveOptions::AS_XHTML
|
85
|
-
end
|
82
|
+
options[:save_with] ||= Node::SaveOptions::DEFAULT_XHTML
|
86
83
|
args.insert(0, options)
|
87
84
|
end
|
88
85
|
children.to_xhtml(*args)
|
@@ -148,6 +145,52 @@ module Nokogiri
|
|
148
145
|
document.fragment(data)
|
149
146
|
end
|
150
147
|
|
148
|
+
#
|
149
|
+
# :call-seq: deconstruct() → Array
|
150
|
+
#
|
151
|
+
# Returns the root nodes of this document fragment as an array, to use in pattern matching.
|
152
|
+
#
|
153
|
+
# 💡 Note that text nodes are returned as well as elements. If you wish to operate only on
|
154
|
+
# root elements, you should deconstruct the array returned by
|
155
|
+
# <tt>DocumentFragment#elements</tt>.
|
156
|
+
#
|
157
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
158
|
+
#
|
159
|
+
# *Example*
|
160
|
+
#
|
161
|
+
# frag = Nokogiri::HTML5.fragment(<<~HTML)
|
162
|
+
# <div>Start</div>
|
163
|
+
# This is a <a href="#jump">shortcut</a> for you.
|
164
|
+
# <div>End</div>
|
165
|
+
# HTML
|
166
|
+
#
|
167
|
+
# frag.deconstruct
|
168
|
+
# # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
|
169
|
+
# # #(Text "\n" + "This is a "),
|
170
|
+
# # #(Element:0x370 {
|
171
|
+
# # name = "a",
|
172
|
+
# # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
|
173
|
+
# # children = [ #(Text "shortcut")]
|
174
|
+
# # }),
|
175
|
+
# # #(Text " for you.\n"),
|
176
|
+
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] }),
|
177
|
+
# # #(Text "\n")]
|
178
|
+
#
|
179
|
+
# *Example* only the elements, not the text nodes.
|
180
|
+
#
|
181
|
+
# frag.elements.deconstruct
|
182
|
+
# # => [#(Element:0x35c { name = "div", children = [ #(Text "Start")] }),
|
183
|
+
# # #(Element:0x370 {
|
184
|
+
# # name = "a",
|
185
|
+
# # attributes = [ #(Attr:0x384 { name = "href", value = "#jump" })],
|
186
|
+
# # children = [ #(Text "shortcut")]
|
187
|
+
# # }),
|
188
|
+
# # #(Element:0x398 { name = "div", children = [ #(Text "End")] })]
|
189
|
+
#
|
190
|
+
def deconstruct
|
191
|
+
children.to_a
|
192
|
+
end
|
193
|
+
|
151
194
|
private
|
152
195
|
|
153
196
|
# fix for issue 770
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
|
3
4
|
module Nokogiri
|
@@ -6,6 +7,47 @@ module Nokogiri
|
|
6
7
|
include Nokogiri::XML::PP::Node
|
7
8
|
attr_reader :document
|
8
9
|
|
10
|
+
#
|
11
|
+
# :call-seq: deconstruct_keys(array_of_names) → Hash
|
12
|
+
#
|
13
|
+
# Returns a hash describing the Namespace, to use in pattern matching.
|
14
|
+
#
|
15
|
+
# Valid keys and their values:
|
16
|
+
# - +prefix+ → (String, nil) The namespace's prefix, or +nil+ if there is no prefix (e.g., default namespace).
|
17
|
+
# - +href+ → (String) The namespace's URI
|
18
|
+
#
|
19
|
+
# ⚡ This is an experimental feature, available since v1.14.0
|
20
|
+
#
|
21
|
+
# *Example*
|
22
|
+
#
|
23
|
+
# doc = Nokogiri::XML.parse(<<~XML)
|
24
|
+
# <?xml version="1.0"?>
|
25
|
+
# <root xmlns="http://nokogiri.org/ns/default" xmlns:noko="http://nokogiri.org/ns/noko">
|
26
|
+
# <child1 foo="abc" noko:bar="def"/>
|
27
|
+
# <noko:child2 foo="qwe" noko:bar="rty"/>
|
28
|
+
# </root>
|
29
|
+
# XML
|
30
|
+
#
|
31
|
+
# doc.root.elements.first.namespace
|
32
|
+
# # => #(Namespace:0x35c { href = "http://nokogiri.org/ns/default" })
|
33
|
+
#
|
34
|
+
# doc.root.elements.first.namespace.deconstruct_keys([:prefix, :href])
|
35
|
+
# # => {:prefix=>nil, :href=>"http://nokogiri.org/ns/default"}
|
36
|
+
#
|
37
|
+
# doc.root.elements.last.namespace
|
38
|
+
# # => #(Namespace:0x370 {
|
39
|
+
# # prefix = "noko",
|
40
|
+
# # href = "http://nokogiri.org/ns/noko"
|
41
|
+
# # })
|
42
|
+
#
|
43
|
+
# doc.root.elements.last.namespace.deconstruct_keys([:prefix, :href])
|
44
|
+
# # => {:prefix=>"noko", :href=>"http://nokogiri.org/ns/noko"}
|
45
|
+
#
|
46
|
+
#
|
47
|
+
def deconstruct_keys(keys)
|
48
|
+
{ prefix: prefix, href: href }
|
49
|
+
end
|
50
|
+
|
9
51
|
private
|
10
52
|
|
11
53
|
def inspect_attributes
|
@@ -29,14 +29,16 @@ module Nokogiri
|
|
29
29
|
DEFAULT_XML = AS_XML # https://github.com/sparklemotion/nokogiri/issues/#issue/415
|
30
30
|
# the default for HTML document
|
31
31
|
DEFAULT_HTML = NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
|
32
|
+
# the default for XHTML document
|
33
|
+
DEFAULT_XHTML = NO_DECLARATION | AS_XHTML
|
32
34
|
else
|
33
35
|
# the default for XML documents
|
34
36
|
DEFAULT_XML = FORMAT | AS_XML
|
35
37
|
# the default for HTML document
|
36
38
|
DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
|
39
|
+
# the default for XHTML document
|
40
|
+
DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
|
37
41
|
end
|
38
|
-
# the default for XHTML document
|
39
|
-
DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
|
40
42
|
|
41
43
|
# Integer representation of the SaveOptions
|
42
44
|
attr_reader :options
|
@@ -47,7 +49,7 @@ module Nokogiri
|
|
47
49
|
end
|
48
50
|
|
49
51
|
constants.each do |constant|
|
50
|
-
class_eval
|
52
|
+
class_eval <<~RUBY, __FILE__, __LINE__ + 1
|
51
53
|
def #{constant.downcase}
|
52
54
|
@options |= #{constant}
|
53
55
|
self
|
@@ -56,7 +58,7 @@ module Nokogiri
|
|
56
58
|
def #{constant.downcase}?
|
57
59
|
#{constant} & @options == #{constant}
|
58
60
|
end
|
59
|
-
|
61
|
+
RUBY
|
60
62
|
end
|
61
63
|
|
62
64
|
alias_method :to_i, :options
|