nokogiri 1.12.5 → 1.13.6
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/README.md +9 -7
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/extconf.rb +64 -44
- data/ext/nokogiri/html4_sax_parser_context.c +2 -3
- data/ext/nokogiri/xml_document.c +35 -35
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +2 -2
- data/ext/nokogiri/xml_encoding_handler.c +25 -11
- data/ext/nokogiri/xml_node.c +638 -333
- data/ext/nokogiri/xml_reader.c +37 -11
- data/ext/nokogiri/xml_sax_parser_context.c +10 -3
- data/ext/nokogiri/xml_xpath_context.c +72 -49
- data/ext/nokogiri/xslt_stylesheet.c +107 -9
- data/gumbo-parser/src/parser.c +0 -11
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +360 -341
- data/lib/nokogiri/css/parser.y +249 -244
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +1 -0
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +38 -6
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +1 -1
- data/lib/nokogiri/gumbo.rb +1 -0
- data/lib/nokogiri/html.rb +16 -10
- data/lib/nokogiri/html4/builder.rb +1 -0
- data/lib/nokogiri/html4/document.rb +88 -77
- data/lib/nokogiri/html4/document_fragment.rb +11 -7
- data/lib/nokogiri/html4/element_description.rb +1 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +426 -520
- data/lib/nokogiri/html4/entity_lookup.rb +2 -1
- data/lib/nokogiri/html4/sax/parser.rb +5 -2
- data/lib/nokogiri/html4/sax/parser_context.rb +1 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +7 -7
- data/lib/nokogiri/html4.rb +11 -5
- data/lib/nokogiri/html5/document.rb +27 -10
- data/lib/nokogiri/html5/document_fragment.rb +5 -2
- data/lib/nokogiri/html5/node.rb +10 -3
- data/lib/nokogiri/html5.rb +69 -64
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +20 -13
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +34 -32
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +144 -103
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +8 -4
- data/lib/nokogiri/xml/node.rb +521 -351
- data/lib/nokogiri/xml/node_set.rb +50 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -7
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +1 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +17 -19
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +20 -19
- data/lib/nokogiri/xml/sax/parser.rb +37 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +1 -0
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +12 -0
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +4 -3
- data/lib/nokogiri/xslt/stylesheet.rb +1 -0
- data/lib/nokogiri/xslt.rb +21 -13
- data/lib/nokogiri.rb +19 -16
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/0004-use-glibc-strlen.patch +3 -3
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2443 -1914
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +109 -31
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +0 -31
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +0 -19
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
data/lib/nokogiri/xml/xpath.rb
CHANGED
@@ -1,7 +1,19 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module XPath
|
6
|
+
# The XPath search prefix to search globally, +//+
|
7
|
+
GLOBAL_SEARCH_PREFIX = "//"
|
8
|
+
|
9
|
+
# The XPath search prefix to search direct descendants of the root element, +/+
|
10
|
+
ROOT_SEARCH_PREFIX = "/"
|
11
|
+
|
12
|
+
# The XPath search prefix to search direct descendants of the current element, +./+
|
13
|
+
CURRENT_SEARCH_PREFIX = "./"
|
14
|
+
|
15
|
+
# The XPath search prefix to search anywhere in the current element's subtree, +.//+
|
16
|
+
SUBTREE_SEARCH_PREFIX = ".//"
|
5
17
|
end
|
6
18
|
end
|
7
19
|
end
|
@@ -1,17 +1,16 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
class XPathContext
|
5
|
-
|
6
6
|
###
|
7
7
|
# Register namespaces in +namespaces+
|
8
8
|
def register_namespaces(namespaces)
|
9
9
|
namespaces.each do |k, v|
|
10
|
-
k = k.to_s.gsub(/.*:/,
|
10
|
+
k = k.to_s.gsub(/.*:/, "") # strip off 'xmlns:' or 'xml:'
|
11
11
|
register_ns(k, v)
|
12
12
|
end
|
13
13
|
end
|
14
|
-
|
15
14
|
end
|
16
15
|
end
|
17
16
|
end
|
data/lib/nokogiri/xml.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
class << self
|
4
5
|
###
|
@@ -21,12 +22,12 @@ module Nokogiri
|
|
21
22
|
# Nokogiri::XML::Reader for mor information
|
22
23
|
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
|
23
24
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
24
|
-
# Give the options to the user
|
25
25
|
yield options if block_given?
|
26
26
|
|
27
27
|
if string_or_io.respond_to?(:read)
|
28
28
|
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
29
29
|
end
|
30
|
+
|
30
31
|
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
31
32
|
end
|
32
33
|
|
@@ -38,8 +39,8 @@ module Nokogiri
|
|
38
39
|
|
39
40
|
####
|
40
41
|
# Parse a fragment from +string+ in to a NodeSet.
|
41
|
-
def fragment(string)
|
42
|
-
XML::DocumentFragment.parse(string)
|
42
|
+
def fragment(string, options = ParseOptions::DEFAULT_XML, &block)
|
43
|
+
XML::DocumentFragment.parse(string, options, &block)
|
43
44
|
end
|
44
45
|
end
|
45
46
|
end
|
data/lib/nokogiri/xslt.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
3
|
+
|
2
4
|
module Nokogiri
|
3
5
|
class << self
|
4
6
|
###
|
@@ -8,7 +10,7 @@ module Nokogiri
|
|
8
10
|
#
|
9
11
|
# xslt = Nokogiri::XSLT(File.read(ARGV[0]))
|
10
12
|
#
|
11
|
-
def XSLT
|
13
|
+
def XSLT(stylesheet, modules = {})
|
12
14
|
XSLT.parse(stylesheet, modules)
|
13
15
|
end
|
14
16
|
end
|
@@ -33,22 +35,28 @@ module Nokogiri
|
|
33
35
|
end
|
34
36
|
end
|
35
37
|
|
36
|
-
|
37
|
-
#
|
38
|
+
# :call-seq:
|
39
|
+
# quote_params(params) → Array
|
40
|
+
#
|
41
|
+
# Quote parameters in +params+ for stylesheet safety.
|
42
|
+
# See Nokogiri::XSLT::Stylesheet.transform for example usage.
|
43
|
+
#
|
44
|
+
# [Parameters]
|
45
|
+
# - +params+ (Hash, Array) XSLT parameters (key->value, or tuples of [key, value])
|
46
|
+
#
|
47
|
+
# [Returns] Array of string parameters, with quotes correctly escaped for use with XSLT::Stylesheet.transform
|
48
|
+
#
|
38
49
|
def quote_params(params)
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
"concat('#{v.gsub(/'/, %q{', "'", '})}')"
|
44
|
-
else
|
45
|
-
"'#{v}'"
|
46
|
-
end
|
50
|
+
params.flatten.each_slice(2).each_with_object([]) do |kv, quoted_params|
|
51
|
+
key, value = kv.map(&:to_s)
|
52
|
+
value = if /'/.match?(value)
|
53
|
+
"concat('#{value.gsub(/'/, %q{', "'", '})}')"
|
47
54
|
else
|
48
|
-
|
55
|
+
"'#{value}'"
|
49
56
|
end
|
57
|
+
quoted_params << key
|
58
|
+
quoted_params << value
|
50
59
|
end
|
51
|
-
parray.flatten
|
52
60
|
end
|
53
61
|
end
|
54
62
|
end
|
data/lib/nokogiri.rb
CHANGED
@@ -1,6 +1,5 @@
|
|
1
|
-
#
|
1
|
+
# coding: utf-8
|
2
2
|
# frozen_string_literal: true
|
3
|
-
# Modify the PATH on windows so that the external DLLs will get loaded.
|
4
3
|
|
5
4
|
require "rbconfig"
|
6
5
|
|
@@ -19,30 +18,32 @@ require_relative "nokogiri/extension"
|
|
19
18
|
#
|
20
19
|
# Here is an example:
|
21
20
|
#
|
22
|
-
#
|
23
|
-
#
|
21
|
+
# require 'nokogiri'
|
22
|
+
# require 'open-uri'
|
24
23
|
#
|
25
|
-
#
|
24
|
+
# # Get a Nokogiri::HTML4::Document for the page we’re interested in...
|
26
25
|
#
|
27
|
-
#
|
26
|
+
# doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
|
28
27
|
#
|
29
|
-
#
|
28
|
+
# # Do funky things with it using Nokogiri::XML::Node methods...
|
30
29
|
#
|
31
|
-
#
|
32
|
-
#
|
33
|
-
#
|
34
|
-
#
|
35
|
-
#
|
30
|
+
# ####
|
31
|
+
# # Search for nodes by css
|
32
|
+
# doc.css('h3.r a.l').each do |link|
|
33
|
+
# puts link.content
|
34
|
+
# end
|
36
35
|
#
|
37
|
-
# See
|
38
|
-
#
|
36
|
+
# See also:
|
37
|
+
#
|
38
|
+
# - Nokogiri::XML::Searchable#css for more information about CSS searching
|
39
|
+
# - Nokogiri::XML::Searchable#xpath for more information about XPath searching
|
39
40
|
module Nokogiri
|
40
41
|
class << self
|
41
42
|
###
|
42
43
|
# Parse an HTML or XML document. +string+ contains the document.
|
43
44
|
def parse(string, url = nil, encoding = nil, options = nil)
|
44
45
|
if string.respond_to?(:read) ||
|
45
|
-
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i
|
46
|
+
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i.match?(string[0, 512])
|
46
47
|
# Expect an HTML indicator to appear within the first 512
|
47
48
|
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
48
49
|
# shouldn't be that long)
|
@@ -85,6 +86,7 @@ module Nokogiri
|
|
85
86
|
Nokogiri(*args, &block).slop!
|
86
87
|
end
|
87
88
|
|
89
|
+
# :nodoc:
|
88
90
|
def install_default_aliases
|
89
91
|
# Make sure to support some popular encoding aliases not known by
|
90
92
|
# all iconv implementations.
|
@@ -105,7 +107,7 @@ end
|
|
105
107
|
#
|
106
108
|
# To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
|
107
109
|
def Nokogiri(*args, &block)
|
108
|
-
if
|
110
|
+
if block
|
109
111
|
Nokogiri::HTML4::Builder.new(&block).doc.root
|
110
112
|
else
|
111
113
|
Nokogiri.parse(*args)
|
@@ -113,6 +115,7 @@ def Nokogiri(*args, &block)
|
|
113
115
|
end
|
114
116
|
|
115
117
|
require_relative "nokogiri/version"
|
118
|
+
require_relative "nokogiri/class_resolver"
|
116
119
|
require_relative "nokogiri/syntax_error"
|
117
120
|
require_relative "nokogiri/xml"
|
118
121
|
require_relative "nokogiri/xslt"
|
@@ -1,8 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri'
|
3
2
|
|
4
|
-
|
5
|
-
|
3
|
+
require "nokogiri"
|
4
|
+
|
5
|
+
module XSD
|
6
|
+
module XMLParser
|
6
7
|
###
|
7
8
|
# Nokogiri XML parser for soap4r.
|
8
9
|
#
|
@@ -27,40 +28,40 @@ module XSD # :nodoc:
|
|
27
28
|
class Nokogiri < XSD::XMLParser::Parser
|
28
29
|
###
|
29
30
|
# Create a new XSD parser with +host+ and +opt+
|
30
|
-
def initialize
|
31
|
+
def initialize(host, opt = {})
|
31
32
|
super
|
32
|
-
@parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset ||
|
33
|
+
@parser = ::Nokogiri::XML::SAX::Parser.new(self, @charset || "UTF-8")
|
33
34
|
end
|
34
35
|
|
35
36
|
###
|
36
37
|
# Start parsing +string_or_readable+
|
37
|
-
def do_parse
|
38
|
+
def do_parse(string_or_readable)
|
38
39
|
@parser.parse(string_or_readable)
|
39
40
|
end
|
40
41
|
|
41
42
|
###
|
42
43
|
# Handle the start_element event with +name+ and +attrs+
|
43
|
-
def start_element
|
44
|
+
def start_element(name, attrs = [])
|
44
45
|
super(name, Hash[*attrs.flatten])
|
45
46
|
end
|
46
47
|
|
47
48
|
###
|
48
49
|
# Handle the end_element event with +name+
|
49
|
-
def end_element
|
50
|
+
def end_element(name)
|
50
51
|
super
|
51
52
|
end
|
52
53
|
|
53
54
|
###
|
54
55
|
# Handle errors with message +msg+
|
55
|
-
def error
|
56
|
-
raise ParseError
|
56
|
+
def error(msg)
|
57
|
+
raise ParseError, msg
|
57
58
|
end
|
58
|
-
|
59
|
+
alias_method :warning, :error
|
59
60
|
|
60
61
|
###
|
61
62
|
# Handle cdata_blocks containing +string+
|
62
|
-
def cdata_block
|
63
|
-
characters
|
63
|
+
def cdata_block(string)
|
64
|
+
characters(string)
|
64
65
|
end
|
65
66
|
|
66
67
|
###
|
@@ -70,16 +71,16 @@ module XSD # :nodoc:
|
|
70
71
|
# +prefix+ is the namespace prefix for the element
|
71
72
|
# +uri+ is the associated namespace URI
|
72
73
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
73
|
-
def start_element_namespace
|
74
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
74
75
|
###
|
75
76
|
# Deal with SAX v1 interface
|
76
|
-
name = [prefix, name].compact.join(
|
77
|
-
attributes = ns.map
|
78
|
-
[[
|
79
|
-
|
80
|
-
[[attr.prefix, attr.localname].compact.join(
|
81
|
-
|
82
|
-
start_element
|
77
|
+
name = [prefix, name].compact.join(":")
|
78
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
79
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
80
|
+
end + attrs.map do |attr|
|
81
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
82
|
+
end.flatten
|
83
|
+
start_element(name, attributes)
|
83
84
|
end
|
84
85
|
|
85
86
|
###
|
@@ -87,13 +88,13 @@ module XSD # :nodoc:
|
|
87
88
|
# +name+ is the element's name
|
88
89
|
# +prefix+ is the namespace prefix associated with the element
|
89
90
|
# +uri+ is the associated namespace URI
|
90
|
-
def end_element_namespace
|
91
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
91
92
|
###
|
92
93
|
# Deal with SAX v1 interface
|
93
|
-
end_element
|
94
|
+
end_element([prefix, name].compact.join(":"))
|
94
95
|
end
|
95
96
|
|
96
|
-
|
97
|
+
["xmldecl", "start_document", "end_document", "comment"].each do |name|
|
97
98
|
class_eval %{ def #{name}(*args); end }
|
98
99
|
end
|
99
100
|
|
@@ -31,18 +31,18 @@ diff --git a/xmlstring.c b/xmlstring.c
|
|
31
31
|
index e8a1e45d..df247dff 100644
|
32
32
|
--- a/xmlstring.c
|
33
33
|
+++ b/xmlstring.c
|
34
|
-
@@ -423,
|
34
|
+
@@ -423,12 +423,7 @@ xmlStrsub(const xmlChar *str, int start, int len) {
|
35
35
|
|
36
36
|
int
|
37
37
|
xmlStrlen(const xmlChar *str) {
|
38
|
-
-
|
38
|
+
- size_t len = 0;
|
39
39
|
-
|
40
40
|
if (str == NULL) return(0);
|
41
41
|
- while (*str != 0) { /* non input consuming */
|
42
42
|
- str++;
|
43
43
|
- len++;
|
44
44
|
- }
|
45
|
-
- return(len);
|
45
|
+
- return(len > INT_MAX ? 0 : len);
|
46
46
|
+
|
47
47
|
+ return strlen((const char*)str);
|
48
48
|
}
|