nokogiri 1.11.6-x86-linux → 1.12.1-x86-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +6 -5
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +181 -103
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +20 -18
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +5 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +51 -38
- data/ext/nokogiri/nokogiri.h +16 -9
- data/ext/nokogiri/xml_document.c +13 -13
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +2 -0
- data/ext/nokogiri/xml_node.c +102 -102
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri.rb +31 -29
- data/lib/nokogiri/2.5/nokogiri.so +0 -0
- data/lib/nokogiri/2.6/nokogiri.so +0 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +2 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +11 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/node.rb +6 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- metadata +35 -35
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
Binary file
|
Binary file
|
Binary file
|
Binary file
|
data/lib/nokogiri/css.rb
CHANGED
@@ -1,28 +1,28 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri/css/node'
|
3
|
-
require 'nokogiri/css/xpath_visitor'
|
4
|
-
x = $-w
|
5
|
-
$-w = false
|
6
|
-
require 'nokogiri/css/parser'
|
7
|
-
$-w = x
|
8
|
-
|
9
|
-
require 'nokogiri/css/tokenizer'
|
10
|
-
require 'nokogiri/css/syntax_error'
|
11
|
-
|
12
2
|
module Nokogiri
|
13
3
|
module CSS
|
14
4
|
class << self
|
15
5
|
###
|
16
6
|
# Parse this CSS selector in +selector+. Returns an AST.
|
17
|
-
def parse
|
18
|
-
Parser.new.parse
|
7
|
+
def parse(selector)
|
8
|
+
Parser.new.parse(selector)
|
19
9
|
end
|
20
10
|
|
21
11
|
###
|
22
12
|
# Get the XPath for +selector+.
|
23
|
-
def xpath_for
|
24
|
-
Parser.new(options[:ns] || {}).xpath_for
|
13
|
+
def xpath_for(selector, options = {})
|
14
|
+
Parser.new(options[:ns] || {}).xpath_for(selector, options)
|
25
15
|
end
|
26
16
|
end
|
27
17
|
end
|
28
18
|
end
|
19
|
+
|
20
|
+
require_relative "css/node"
|
21
|
+
require_relative "css/xpath_visitor"
|
22
|
+
x = $-w
|
23
|
+
$-w = false
|
24
|
+
require_relative "css/parser"
|
25
|
+
$-w = x
|
26
|
+
|
27
|
+
require_relative "css/tokenizer"
|
28
|
+
require_relative "css/syntax_error"
|
data/lib/nokogiri/css/parser.rb
CHANGED
data/lib/nokogiri/css/parser.y
CHANGED
data/lib/nokogiri/extension.rb
CHANGED
@@ -3,7 +3,7 @@
|
|
3
3
|
# load the C or Java extension
|
4
4
|
begin
|
5
5
|
::RUBY_VERSION =~ /(\d+\.\d+)/
|
6
|
-
|
6
|
+
require_relative "#{Regexp.last_match(1)}/nokogiri"
|
7
7
|
rescue LoadError => e
|
8
8
|
if e.message =~ /GLIBC/
|
9
9
|
warn(<<~EOM)
|
@@ -22,5 +22,5 @@ rescue LoadError => e
|
|
22
22
|
EOM
|
23
23
|
raise e
|
24
24
|
end
|
25
|
-
|
25
|
+
require_relative "nokogiri"
|
26
26
|
end
|
@@ -0,0 +1,14 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module Gumbo
|
4
|
+
# The default maximum number of attributes per element.
|
5
|
+
DEFAULT_MAX_ATTRIBUTES = 400
|
6
|
+
|
7
|
+
# The default maximum number of errors for parsing a document or a fragment.
|
8
|
+
DEFAULT_MAX_ERRORS = 0
|
9
|
+
|
10
|
+
# The default maximum depth of the DOM tree produced by parsing a document
|
11
|
+
# or fragment.
|
12
|
+
DEFAULT_MAX_TREE_DEPTH = 400
|
13
|
+
end
|
14
|
+
end
|
data/lib/nokogiri/html.rb
CHANGED
@@ -1,38 +1,42 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
require 'nokogiri/html/document'
|
4
|
-
require 'nokogiri/html/document_fragment'
|
5
|
-
require 'nokogiri/html/sax/parser_context'
|
6
|
-
require 'nokogiri/html/sax/parser'
|
7
|
-
require 'nokogiri/html/sax/push_parser'
|
8
|
-
require 'nokogiri/html/element_description'
|
9
|
-
require 'nokogiri/html/element_description_defaults'
|
2
|
+
require_relative "html4"
|
10
3
|
|
11
4
|
module Nokogiri
|
12
|
-
|
13
|
-
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
end
|
5
|
+
HTML = Nokogiri::HTML4
|
6
|
+
|
7
|
+
# @!method HTML(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
8
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
9
|
+
# @!scope class
|
10
|
+
define_singleton_method(:HTML, Nokogiri.method(:HTML4))
|
19
11
|
|
12
|
+
# @note This module/namespace is an alias for {Nokogiri::HTML4} as of v1.12.0. Before v1.12.0,
|
13
|
+
# {Nokogiri::HTML4} did not exist, and this was the module/namespace for all HTML-related
|
14
|
+
# classes.
|
20
15
|
module HTML
|
21
|
-
class
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
16
|
+
# @note This class is an alias for {Nokogiri::HTML4::Document} as of v1.12.0.
|
17
|
+
class Document < Nokogiri::XML::Document
|
18
|
+
end
|
19
|
+
|
20
|
+
# @note This class is an alias for {Nokogiri::HTML4::DocumentFragment} as of v1.12.0.
|
21
|
+
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
22
|
+
end
|
23
|
+
|
24
|
+
# @note This class is an alias for {Nokogiri::HTML4::Builder} as of v1.12.0.
|
25
|
+
class Builder < Nokogiri::XML::Builder
|
26
|
+
end
|
27
|
+
|
28
|
+
module SAX
|
29
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::Parser} as of v1.12.0.
|
30
|
+
class Parser < Nokogiri::XML::SAX::Parser
|
26
31
|
end
|
27
32
|
|
28
|
-
|
29
|
-
|
30
|
-
def fragment string, encoding = nil
|
31
|
-
HTML::DocumentFragment.parse string, encoding
|
33
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::ParserContext} as of v1.12.0.
|
34
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
32
35
|
end
|
33
|
-
end
|
34
36
|
|
35
|
-
|
36
|
-
|
37
|
+
# @note This class is an alias for {Nokogiri::HTML4::SAX::PushParser} as of v1.12.0.
|
38
|
+
class PushParser
|
39
|
+
end
|
40
|
+
end
|
37
41
|
end
|
38
42
|
end
|
@@ -0,0 +1,40 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
class << self
|
4
|
+
###
|
5
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
6
|
+
def HTML4(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
7
|
+
Nokogiri::HTML4::Document.parse(input, url, encoding, options, &block)
|
8
|
+
end
|
9
|
+
end
|
10
|
+
|
11
|
+
# @since v1.12.0
|
12
|
+
# @note Before v1.12.0, {Nokogiri::HTML4} did not exist, and {Nokogiri::HTML} was the module/namespace for parsing HTML.
|
13
|
+
module HTML4
|
14
|
+
class << self
|
15
|
+
###
|
16
|
+
# Parse HTML. Convenience method for Nokogiri::HTML4::Document.parse
|
17
|
+
def parse(input, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_HTML, &block)
|
18
|
+
Document.parse(input, url, encoding, options, &block)
|
19
|
+
end
|
20
|
+
|
21
|
+
####
|
22
|
+
# Parse a fragment from +string+ in to a NodeSet.
|
23
|
+
def fragment(string, encoding = nil)
|
24
|
+
HTML4::DocumentFragment.parse(string, encoding)
|
25
|
+
end
|
26
|
+
end
|
27
|
+
|
28
|
+
# Instance of Nokogiri::HTML4::EntityLookup
|
29
|
+
NamedCharacters = EntityLookup.new
|
30
|
+
end
|
31
|
+
end
|
32
|
+
|
33
|
+
require_relative "html4/entity_lookup"
|
34
|
+
require_relative "html4/document"
|
35
|
+
require_relative "html4/document_fragment"
|
36
|
+
require_relative "html4/sax/parser_context"
|
37
|
+
require_relative "html4/sax/parser"
|
38
|
+
require_relative "html4/sax/push_parser"
|
39
|
+
require_relative "html4/element_description"
|
40
|
+
require_relative "html4/element_description_defaults"
|
@@ -1,6 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module Nokogiri
|
3
|
-
module
|
3
|
+
module HTML4
|
4
4
|
###
|
5
5
|
# Nokogiri HTML builder is used for building HTML documents. It is very
|
6
6
|
# similar to the Nokogiri::XML::Builder. In fact, you should go read the
|
@@ -12,7 +12,7 @@ module Nokogiri
|
|
12
12
|
# Create an HTML document with a body that has an onload attribute, and a
|
13
13
|
# span tag with a class of "bold" that has content of "Hello world".
|
14
14
|
#
|
15
|
-
# builder = Nokogiri::
|
15
|
+
# builder = Nokogiri::HTML4::Builder.new do |doc|
|
16
16
|
# doc.html {
|
17
17
|
# doc.body(:onload => 'some_func();') {
|
18
18
|
# doc.span.bold {
|
@@ -3,7 +3,7 @@
|
|
3
3
|
require 'pathname'
|
4
4
|
|
5
5
|
module Nokogiri
|
6
|
-
module
|
6
|
+
module HTML4
|
7
7
|
class Document < Nokogiri::XML::Document
|
8
8
|
###
|
9
9
|
# Get the meta tag encoding for this document. If there is no meta tag,
|
@@ -268,12 +268,12 @@ module Nokogiri
|
|
268
268
|
m = chunk.match(/(<meta\s)(.*)(charset\s*=\s*([\w-]+))(.*)/i) and
|
269
269
|
return m[4]
|
270
270
|
catch(:encoding_found) {
|
271
|
-
Nokogiri::
|
271
|
+
Nokogiri::HTML4::SAX::Parser.new(JumpSAXHandler.new(:encoding_found)).parse(chunk)
|
272
272
|
nil
|
273
273
|
}
|
274
274
|
else
|
275
275
|
handler = SAXHandler.new
|
276
|
-
parser = Nokogiri::
|
276
|
+
parser = Nokogiri::HTML4::SAX::PushParser.new(handler)
|
277
277
|
parser << chunk rescue Nokogiri::SyntaxError
|
278
278
|
handler.encoding
|
279
279
|
end
|
@@ -286,7 +286,7 @@ module Nokogiri
|
|
286
286
|
end
|
287
287
|
|
288
288
|
# This method is used by the C extension so that
|
289
|
-
# Nokogiri::
|
289
|
+
# Nokogiri::HTML4::Document#read_io() does not leak memory when
|
290
290
|
# EncodingFound is raised.
|
291
291
|
attr_reader :encoding_found
|
292
292
|
|
@@ -1,11 +1,11 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module Nokogiri
|
3
|
-
module
|
3
|
+
module HTML4
|
4
4
|
class DocumentFragment < Nokogiri::XML::DocumentFragment
|
5
5
|
####
|
6
6
|
# Create a Nokogiri::XML::DocumentFragment from +tags+, using +encoding+
|
7
7
|
def self.parse(tags, encoding = nil)
|
8
|
-
doc =
|
8
|
+
doc = HTML4::Document.new
|
9
9
|
|
10
10
|
encoding ||= if tags.respond_to?(:encoding)
|
11
11
|
encoding = tags.encoding
|
@@ -39,7 +39,7 @@ module Nokogiri
|
|
39
39
|
"/html/body/node()"
|
40
40
|
end
|
41
41
|
|
42
|
-
temp_doc =
|
42
|
+
temp_doc = HTML4::Document.parse("<html><body>#{tags}", nil, document.encoding)
|
43
43
|
temp_doc.xpath(path).each { |child| child.parent = self }
|
44
44
|
self.errors = temp_doc.errors
|
45
45
|
end
|
@@ -1,18 +1,15 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module Nokogiri
|
3
|
-
module
|
3
|
+
module HTML4
|
4
4
|
###
|
5
|
-
# Nokogiri lets you write a SAX parser to process HTML but get HTML
|
6
|
-
# correction features.
|
5
|
+
# Nokogiri lets you write a SAX parser to process HTML but get HTML correction features.
|
7
6
|
#
|
8
|
-
# See Nokogiri::
|
9
|
-
# SAX parser with HTML.
|
7
|
+
# See Nokogiri::HTML4::SAX::Parser for a basic example of using a SAX parser with HTML.
|
10
8
|
#
|
11
9
|
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
12
10
|
module SAX
|
13
11
|
###
|
14
|
-
# This class lets you perform SAX style parsing on HTML with HTML
|
15
|
-
# error correction.
|
12
|
+
# This class lets you perform SAX style parsing on HTML with HTML error correction.
|
16
13
|
#
|
17
14
|
# Here is a basic usage example:
|
18
15
|
#
|
@@ -22,40 +19,40 @@ module Nokogiri
|
|
22
19
|
# end
|
23
20
|
# end
|
24
21
|
#
|
25
|
-
# parser = Nokogiri::
|
22
|
+
# parser = Nokogiri::HTML4::SAX::Parser.new(MyDoc.new)
|
26
23
|
# parser.parse(File.read(ARGV[0], mode: 'rb'))
|
27
24
|
#
|
28
25
|
# For more information on SAX parsers, see Nokogiri::XML::SAX
|
29
26
|
class Parser < Nokogiri::XML::SAX::Parser
|
30
27
|
###
|
31
28
|
# Parse html stored in +data+ using +encoding+
|
32
|
-
def parse_memory
|
29
|
+
def parse_memory(data, encoding = "UTF-8")
|
33
30
|
raise ArgumentError unless data
|
34
31
|
return unless data.length > 0
|
35
32
|
ctx = ParserContext.memory(data, encoding)
|
36
33
|
yield ctx if block_given?
|
37
|
-
ctx.parse_with
|
34
|
+
ctx.parse_with(self)
|
38
35
|
end
|
39
36
|
|
40
37
|
###
|
41
38
|
# Parse given +io+
|
42
|
-
def parse_io
|
39
|
+
def parse_io(io, encoding = "UTF-8")
|
43
40
|
check_encoding(encoding)
|
44
41
|
@encoding = encoding
|
45
42
|
ctx = ParserContext.io(io, ENCODINGS[encoding])
|
46
43
|
yield ctx if block_given?
|
47
|
-
ctx.parse_with
|
44
|
+
ctx.parse_with(self)
|
48
45
|
end
|
49
46
|
|
50
47
|
###
|
51
48
|
# Parse a file with +filename+
|
52
|
-
def parse_file
|
49
|
+
def parse_file(filename, encoding = "UTF-8")
|
53
50
|
raise ArgumentError unless filename
|
54
51
|
raise Errno::ENOENT unless File.exist?(filename)
|
55
52
|
raise Errno::EISDIR if File.directory?(filename)
|
56
53
|
ctx = ParserContext.file(filename, encoding)
|
57
54
|
yield ctx if block_given?
|
58
|
-
ctx.parse_with
|
55
|
+
ctx.parse_with(self)
|
59
56
|
end
|
60
57
|
end
|
61
58
|
end
|
@@ -0,0 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
module Nokogiri
|
3
|
+
module HTML4
|
4
|
+
module SAX
|
5
|
+
###
|
6
|
+
# Context for HTML SAX parsers. This class is usually not instantiated by the user. Instead,
|
7
|
+
# you should be looking at Nokogiri::HTML4::SAX::Parser
|
8
|
+
class ParserContext < Nokogiri::XML::SAX::ParserContext
|
9
|
+
def self.new(thing, encoding = "UTF-8")
|
10
|
+
if [:read, :close].all? { |x| thing.respond_to?(x) }
|
11
|
+
super
|
12
|
+
else
|
13
|
+
memory(thing, encoding)
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
@@ -1,17 +1,17 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
2
|
module Nokogiri
|
3
|
-
module
|
3
|
+
module HTML4
|
4
4
|
module SAX
|
5
5
|
class PushParser
|
6
6
|
|
7
|
-
# The Nokogiri::
|
7
|
+
# The Nokogiri::HTML4::SAX::Document on which the PushParser will be
|
8
8
|
# operating
|
9
9
|
attr_accessor :document
|
10
10
|
|
11
|
-
def initialize(doc =
|
11
|
+
def initialize(doc = HTML4::SAX::Document.new, file_name = nil, encoding = 'UTF-8')
|
12
12
|
@document = doc
|
13
13
|
@encoding = encoding
|
14
|
-
@sax_parser =
|
14
|
+
@sax_parser = HTML4::SAX::Parser.new(doc, @encoding)
|
15
15
|
|
16
16
|
## Create our push parser context
|
17
17
|
initialize_native(@sax_parser, file_name, encoding)
|
@@ -27,7 +27,7 @@ module Nokogiri
|
|
27
27
|
|
28
28
|
###
|
29
29
|
# Finish the parsing. This method is only necessary for
|
30
|
-
# Nokogiri::
|
30
|
+
# Nokogiri::HTML4::SAX::Document#end_document to be called.
|
31
31
|
def finish
|
32
32
|
write '', true
|
33
33
|
end
|