nokogiri 1.11.4 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +6 -5
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +185 -103
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +6 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +70 -38
- data/ext/nokogiri/nokogiri.h +19 -9
- data/ext/nokogiri/xml_document.c +14 -14
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +4 -2
- data/ext/nokogiri/xml_node.c +123 -108
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/extension.rb +7 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +12 -2
- data/lib/nokogiri/xml/builder.rb +38 -0
- data/lib/nokogiri/xml/document.rb +46 -0
- data/lib/nokogiri/xml/node/save_options.rb +1 -1
- data/lib/nokogiri/xml/node.rb +6 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri.rb +31 -29
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- metadata +101 -58
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
require "stringio"
|
4
|
-
require "nokogiri/xml/node/save_options"
|
5
4
|
|
6
5
|
module Nokogiri
|
7
6
|
module XML
|
@@ -837,7 +836,7 @@ module Nokogiri
|
|
837
836
|
node_set = in_context(contents, options.to_i)
|
838
837
|
if (node_set.empty? && (document.errors.length > error_count))
|
839
838
|
if options.recover?
|
840
|
-
fragment = Nokogiri::
|
839
|
+
fragment = Nokogiri::HTML4::DocumentFragment.parse contents
|
841
840
|
node_set = fragment.children
|
842
841
|
else
|
843
842
|
raise document.errors[error_count]
|
@@ -883,7 +882,7 @@ module Nokogiri
|
|
883
882
|
type == DOCUMENT_NODE
|
884
883
|
end
|
885
884
|
|
886
|
-
# Returns true if this is an
|
885
|
+
# Returns true if this is an HTML4::Document node
|
887
886
|
def html?
|
888
887
|
type == HTML_DOCUMENT_NODE
|
889
888
|
end
|
@@ -909,11 +908,11 @@ module Nokogiri
|
|
909
908
|
end
|
910
909
|
|
911
910
|
###
|
912
|
-
# Fetch the Nokogiri::
|
911
|
+
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
913
912
|
# nil on XML documents and on unknown tags.
|
914
913
|
def description
|
915
914
|
return nil if document.xml?
|
916
|
-
Nokogiri::
|
915
|
+
Nokogiri::HTML4::ElementDescription[name]
|
917
916
|
end
|
918
917
|
|
919
918
|
###
|
@@ -1235,3 +1234,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
1235
1234
|
end
|
1236
1235
|
end
|
1237
1236
|
end
|
1237
|
+
|
1238
|
+
require_relative "node/save_options"
|
@@ -71,6 +71,8 @@ module Nokogiri
|
|
71
71
|
|
72
72
|
# the default options used for parsing XML documents
|
73
73
|
DEFAULT_XML = RECOVER | NONET
|
74
|
+
# the default options used for parsing XSLT stylesheets
|
75
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
|
74
76
|
# the default options used for parsing HTML documents
|
75
77
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
76
78
|
# the default options used for parsing XML schemas
|
data/lib/nokogiri/xml/pp.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
2
|
+
require_relative "pp/node"
|
3
|
+
require_relative "pp/character_data"
|
@@ -2,20 +2,19 @@
|
|
2
2
|
module Nokogiri
|
3
3
|
module XML
|
4
4
|
###
|
5
|
-
# SAX Parsers are event driven parsers.
|
6
|
-
#
|
7
|
-
#
|
5
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
6
|
+
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
7
|
+
# Nokogiri::HTML4::SAX.
|
8
8
|
#
|
9
|
-
# The basic way a SAX style parser works is by creating a parser,
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# it encounters events you said you would like to know about.
|
9
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
10
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
11
|
+
# you when it encounters events you said you would like to know about.
|
13
12
|
#
|
14
|
-
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
15
|
-
#
|
13
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
14
|
+
# methods for which you would like notification.
|
16
15
|
#
|
17
|
-
# For example, if I want to be notified when a document ends, and when an
|
18
|
-
#
|
16
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
17
|
+
# would write a class like this:
|
19
18
|
#
|
20
19
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
21
20
|
# def end_document
|
@@ -27,8 +26,7 @@ module Nokogiri
|
|
27
26
|
# end
|
28
27
|
# end
|
29
28
|
#
|
30
|
-
# Then I would instantiate a SAX parser with this document, and feed the
|
31
|
-
# parser some XML
|
29
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
32
30
|
#
|
33
31
|
# # Create a new parser
|
34
32
|
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
@@ -36,25 +34,21 @@ module Nokogiri
|
|
36
34
|
# # Feed the parser some XML
|
37
35
|
# parser.parse(File.open(ARGV[0]))
|
38
36
|
#
|
39
|
-
# Now my document handler will be called when each node starts, and when
|
40
|
-
#
|
41
|
-
# a look at Nokogiri::XML::SAX::Document.
|
37
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
38
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
42
39
|
#
|
43
|
-
# Two SAX parsers for XML are available, a parser that reads from a string
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
40
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
41
|
+
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
42
|
+
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
47
43
|
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
48
44
|
module SAX
|
49
45
|
###
|
50
|
-
# This class is used for registering types of events you are interested
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# you are interested in knowing about.
|
46
|
+
# This class is used for registering types of events you are interested in handling. All of
|
47
|
+
# the methods on this class are available as possible events while parsing an XML document. To
|
48
|
+
# register for any particular event, just subclass this class and implement the methods you
|
49
|
+
# are interested in knowing about.
|
55
50
|
#
|
56
|
-
# To only be notified about start and end element events, write a class
|
57
|
-
# like this:
|
51
|
+
# To only be notified about start and end element events, write a class like this:
|
58
52
|
#
|
59
53
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
60
54
|
# def start_element name, attrs = []
|
@@ -66,8 +60,8 @@ module Nokogiri
|
|
66
60
|
# end
|
67
61
|
# end
|
68
62
|
#
|
69
|
-
# You can use this event handler for any SAX style parser included with
|
70
|
-
# Nokogiri
|
63
|
+
# You can use this event handler for any SAX style parser included with Nokogiri. See
|
64
|
+
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
|
71
65
|
class Document
|
72
66
|
###
|
73
67
|
# Called when an XML declaration is parsed
|
@@ -129,7 +123,7 @@ module Nokogiri
|
|
129
123
|
end
|
130
124
|
|
131
125
|
###
|
132
|
-
# Characters read between a tag.
|
126
|
+
# Characters read between a tag. This method might be called multiple
|
133
127
|
# times given one contiguous string of characters.
|
134
128
|
#
|
135
129
|
# +string+ contains the character data
|
data/lib/nokogiri/xml/sax.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
require_relative "sax/document"
|
3
|
+
require_relative "sax/parser_context"
|
4
|
+
require_relative "sax/parser"
|
5
|
+
require_relative "sax/push_parser"
|
data/lib/nokogiri/xml/xpath.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
@@ -1,38 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri/xml/pp'
|
3
|
-
require 'nokogiri/xml/parse_options'
|
4
|
-
require 'nokogiri/xml/sax'
|
5
|
-
require 'nokogiri/xml/searchable'
|
6
|
-
require 'nokogiri/xml/node'
|
7
|
-
require 'nokogiri/xml/attribute_decl'
|
8
|
-
require 'nokogiri/xml/element_decl'
|
9
|
-
require 'nokogiri/xml/element_content'
|
10
|
-
require 'nokogiri/xml/character_data'
|
11
|
-
require 'nokogiri/xml/namespace'
|
12
|
-
require 'nokogiri/xml/attr'
|
13
|
-
require 'nokogiri/xml/dtd'
|
14
|
-
require 'nokogiri/xml/cdata'
|
15
|
-
require 'nokogiri/xml/text'
|
16
|
-
require 'nokogiri/xml/document'
|
17
|
-
require 'nokogiri/xml/document_fragment'
|
18
|
-
require 'nokogiri/xml/processing_instruction'
|
19
|
-
require 'nokogiri/xml/node_set'
|
20
|
-
require 'nokogiri/xml/syntax_error'
|
21
|
-
require 'nokogiri/xml/xpath'
|
22
|
-
require 'nokogiri/xml/xpath_context'
|
23
|
-
require 'nokogiri/xml/builder'
|
24
|
-
require 'nokogiri/xml/reader'
|
25
|
-
require 'nokogiri/xml/notation'
|
26
|
-
require 'nokogiri/xml/entity_decl'
|
27
|
-
require 'nokogiri/xml/entity_reference'
|
28
|
-
require 'nokogiri/xml/schema'
|
29
|
-
require 'nokogiri/xml/relax_ng'
|
30
|
-
|
31
2
|
module Nokogiri
|
32
3
|
class << self
|
33
4
|
###
|
34
5
|
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
35
|
-
def XML
|
6
|
+
def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
|
36
7
|
Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
|
37
8
|
end
|
38
9
|
end
|
@@ -41,20 +12,19 @@ module Nokogiri
|
|
41
12
|
# Original C14N 1.0 spec canonicalization
|
42
13
|
XML_C14N_1_0 = 0
|
43
14
|
# Exclusive C14N 1.0 spec canonicalization
|
44
|
-
XML_C14N_EXCLUSIVE_1_0 =
|
15
|
+
XML_C14N_EXCLUSIVE_1_0 = 1
|
45
16
|
# C14N 1.1 spec canonicalization
|
46
17
|
XML_C14N_1_1 = 2
|
47
18
|
class << self
|
48
19
|
###
|
49
20
|
# Parse an XML document using the Nokogiri::XML::Reader API. See
|
50
21
|
# Nokogiri::XML::Reader for mor information
|
51
|
-
def Reader
|
52
|
-
|
22
|
+
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
|
53
23
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
54
24
|
# Give the options to the user
|
55
25
|
yield options if block_given?
|
56
26
|
|
57
|
-
if string_or_io.respond_to?
|
27
|
+
if string_or_io.respond_to?(:read)
|
58
28
|
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
59
29
|
end
|
60
30
|
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
@@ -62,15 +32,44 @@ module Nokogiri
|
|
62
32
|
|
63
33
|
###
|
64
34
|
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
65
|
-
def parse
|
35
|
+
def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
|
66
36
|
Document.parse(thing, url, encoding, options, &block)
|
67
37
|
end
|
68
38
|
|
69
39
|
####
|
70
40
|
# Parse a fragment from +string+ in to a NodeSet.
|
71
|
-
def fragment
|
41
|
+
def fragment(string)
|
72
42
|
XML::DocumentFragment.parse(string)
|
73
43
|
end
|
74
44
|
end
|
75
45
|
end
|
76
46
|
end
|
47
|
+
|
48
|
+
require_relative "xml/pp"
|
49
|
+
require_relative "xml/parse_options"
|
50
|
+
require_relative "xml/sax"
|
51
|
+
require_relative "xml/searchable"
|
52
|
+
require_relative "xml/node"
|
53
|
+
require_relative "xml/attribute_decl"
|
54
|
+
require_relative "xml/element_decl"
|
55
|
+
require_relative "xml/element_content"
|
56
|
+
require_relative "xml/character_data"
|
57
|
+
require_relative "xml/namespace"
|
58
|
+
require_relative "xml/attr"
|
59
|
+
require_relative "xml/dtd"
|
60
|
+
require_relative "xml/cdata"
|
61
|
+
require_relative "xml/text"
|
62
|
+
require_relative "xml/document"
|
63
|
+
require_relative "xml/document_fragment"
|
64
|
+
require_relative "xml/processing_instruction"
|
65
|
+
require_relative "xml/node_set"
|
66
|
+
require_relative "xml/syntax_error"
|
67
|
+
require_relative "xml/xpath"
|
68
|
+
require_relative "xml/xpath_context"
|
69
|
+
require_relative "xml/builder"
|
70
|
+
require_relative "xml/reader"
|
71
|
+
require_relative "xml/notation"
|
72
|
+
require_relative "xml/entity_decl"
|
73
|
+
require_relative "xml/entity_reference"
|
74
|
+
require_relative "xml/schema"
|
75
|
+
require_relative "xml/relax_ng"
|
@@ -18,7 +18,7 @@ module Nokogiri
|
|
18
18
|
# Apply an XSLT stylesheet to an XML::Document.
|
19
19
|
# +params+ is an array of strings used as XSLT parameters.
|
20
20
|
# returns serialized document
|
21
|
-
def apply_to
|
21
|
+
def apply_to(document, params = [])
|
22
22
|
serialize(transform(document, params))
|
23
23
|
end
|
24
24
|
end
|
data/lib/nokogiri/xslt.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri/xslt/stylesheet'
|
3
|
-
|
4
2
|
module Nokogiri
|
5
3
|
class << self
|
6
4
|
###
|
@@ -22,32 +20,32 @@ module Nokogiri
|
|
22
20
|
class << self
|
23
21
|
###
|
24
22
|
# Parse the stylesheet in +string+, register any +modules+
|
25
|
-
def parse
|
23
|
+
def parse(string, modules = {})
|
26
24
|
modules.each do |url, klass|
|
27
|
-
XSLT.register
|
25
|
+
XSLT.register(url, klass)
|
28
26
|
end
|
29
27
|
|
28
|
+
doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
|
30
29
|
if Nokogiri.jruby?
|
31
|
-
Stylesheet.parse_stylesheet_doc(
|
30
|
+
Stylesheet.parse_stylesheet_doc(doc, string)
|
32
31
|
else
|
33
|
-
Stylesheet.parse_stylesheet_doc(
|
32
|
+
Stylesheet.parse_stylesheet_doc(doc)
|
34
33
|
end
|
35
34
|
end
|
36
35
|
|
37
36
|
###
|
38
37
|
# Quote parameters in +params+ for stylesheet safety
|
39
|
-
def quote_params
|
38
|
+
def quote_params(params)
|
40
39
|
parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
|
41
|
-
parray.each_with_index do |v,i|
|
42
|
-
if i % 2 > 0
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end
|
40
|
+
parray.each_with_index do |v, i|
|
41
|
+
parray[i] = if i % 2 > 0
|
42
|
+
if v =~ /'/
|
43
|
+
"concat('#{v.gsub(/'/, %q{', "'", '})}')"
|
44
|
+
else
|
45
|
+
"'#{v}'"
|
46
|
+
end
|
49
47
|
else
|
50
|
-
|
48
|
+
v.to_s
|
51
49
|
end
|
52
50
|
end
|
53
51
|
parray.flatten
|
@@ -55,3 +53,5 @@ module Nokogiri
|
|
55
53
|
end
|
56
54
|
end
|
57
55
|
end
|
56
|
+
|
57
|
+
require_relative "xslt/stylesheet"
|
data/lib/nokogiri.rb
CHANGED
@@ -2,38 +2,29 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
# Modify the PATH on windows so that the external DLLs will get loaded.
|
4
4
|
|
5
|
-
require
|
5
|
+
require "rbconfig"
|
6
6
|
|
7
7
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
8
|
-
|
8
|
+
require_relative "nokogiri/jruby/dependencies"
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
require 'nokogiri/version'
|
14
|
-
require 'nokogiri/syntax_error'
|
15
|
-
require 'nokogiri/xml'
|
16
|
-
require 'nokogiri/xslt'
|
17
|
-
require 'nokogiri/html'
|
18
|
-
require 'nokogiri/decorators/slop'
|
19
|
-
require 'nokogiri/css'
|
20
|
-
require 'nokogiri/html/builder'
|
11
|
+
require_relative "nokogiri/extension"
|
21
12
|
|
22
13
|
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
23
14
|
# correctly implemented CSS3 selector support as well as XPath 1.0
|
24
15
|
# support.
|
25
16
|
#
|
26
17
|
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
27
|
-
# Nokogiri::
|
18
|
+
# Nokogiri::HTML4::Document depending on the kind of document you parse.
|
28
19
|
#
|
29
20
|
# Here is an example:
|
30
21
|
#
|
31
22
|
# require 'nokogiri'
|
32
23
|
# require 'open-uri'
|
33
24
|
#
|
34
|
-
# # Get a Nokogiri::
|
25
|
+
# # Get a Nokogiri::HTML4::Document for the page we’re interested in...
|
35
26
|
#
|
36
|
-
# doc = Nokogiri::
|
27
|
+
# doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
|
37
28
|
#
|
38
29
|
# # Do funky things with it using Nokogiri::XML::Node methods...
|
39
30
|
#
|
@@ -49,27 +40,27 @@ module Nokogiri
|
|
49
40
|
class << self
|
50
41
|
###
|
51
42
|
# Parse an HTML or XML document. +string+ contains the document.
|
52
|
-
def parse
|
43
|
+
def parse(string, url = nil, encoding = nil, options = nil)
|
53
44
|
if string.respond_to?(:read) ||
|
54
45
|
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
|
55
46
|
# Expect an HTML indicator to appear within the first 512
|
56
47
|
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
57
48
|
# shouldn't be that long)
|
58
|
-
Nokogiri.
|
49
|
+
Nokogiri.HTML4(string, url, encoding,
|
59
50
|
options || XML::ParseOptions::DEFAULT_HTML)
|
60
51
|
else
|
61
52
|
Nokogiri.XML(string, url, encoding,
|
62
53
|
options || XML::ParseOptions::DEFAULT_XML)
|
63
|
-
end.tap
|
54
|
+
end.tap do |doc|
|
64
55
|
yield doc if block_given?
|
65
|
-
|
56
|
+
end
|
66
57
|
end
|
67
58
|
|
68
59
|
###
|
69
60
|
# Create a new Nokogiri::XML::DocumentFragment
|
70
|
-
def make
|
61
|
+
def make(input = nil, opts = {}, &blk)
|
71
62
|
if input
|
72
|
-
Nokogiri::
|
63
|
+
Nokogiri::HTML4.fragment(input).children.first
|
73
64
|
else
|
74
65
|
Nokogiri(&blk)
|
75
66
|
end
|
@@ -98,10 +89,10 @@ module Nokogiri
|
|
98
89
|
# Make sure to support some popular encoding aliases not known by
|
99
90
|
# all iconv implementations.
|
100
91
|
{
|
101
|
-
|
102
|
-
}.each
|
92
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
93
|
+
}.each do |alias_name, name|
|
103
94
|
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
104
|
-
|
95
|
+
end
|
105
96
|
end
|
106
97
|
end
|
107
98
|
|
@@ -109,15 +100,26 @@ module Nokogiri
|
|
109
100
|
end
|
110
101
|
|
111
102
|
###
|
112
|
-
# Parse a document contained in +args+. Nokogiri will try to guess what
|
113
|
-
#
|
114
|
-
# Nokogiri.parse
|
103
|
+
# Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
|
104
|
+
# attempting to parse. For more information, see Nokogiri.parse
|
115
105
|
#
|
116
|
-
# To specify the type of document, use Nokogiri.XML or Nokogiri.
|
106
|
+
# To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
|
117
107
|
def Nokogiri(*args, &block)
|
118
108
|
if block_given?
|
119
|
-
Nokogiri::
|
109
|
+
Nokogiri::HTML4::Builder.new(&block).doc.root
|
120
110
|
else
|
121
111
|
Nokogiri.parse(*args)
|
122
112
|
end
|
123
113
|
end
|
114
|
+
|
115
|
+
require_relative "nokogiri/version"
|
116
|
+
require_relative "nokogiri/syntax_error"
|
117
|
+
require_relative "nokogiri/xml"
|
118
|
+
require_relative "nokogiri/xslt"
|
119
|
+
require_relative "nokogiri/html4"
|
120
|
+
require_relative "nokogiri/html"
|
121
|
+
require_relative "nokogiri/decorators/slop"
|
122
|
+
require_relative "nokogiri/css"
|
123
|
+
require_relative "nokogiri/html4/builder"
|
124
|
+
|
125
|
+
require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
|
@@ -0,0 +1,31 @@
|
|
1
|
+
From 3e1aad4fe584747fd7d17cc7b2863a78e2d21a77 Mon Sep 17 00:00:00 2001
|
2
|
+
From: Nick Wellnhofer <wellnhofer@aevum.de>
|
3
|
+
Date: Wed, 2 Jun 2021 17:31:49 +0200
|
4
|
+
Subject: [PATCH] Fix XPath recursion limit
|
5
|
+
|
6
|
+
Fix accounting of recursion depth when parsing XPath expressions.
|
7
|
+
|
8
|
+
This silly bug introduced in commit 804c5297 could lead to spurious
|
9
|
+
errors when parsing larger expressions or XSLT documents.
|
10
|
+
|
11
|
+
Should fix #264.
|
12
|
+
---
|
13
|
+
xpath.c | 2 +-
|
14
|
+
1 file changed, 1 insertion(+), 1 deletion(-)
|
15
|
+
|
16
|
+
diff --git a/xpath.c b/xpath.c
|
17
|
+
index 7497ba0..1aa2f1a 100644
|
18
|
+
--- a/xpath.c
|
19
|
+
+++ b/xpath.c
|
20
|
+
@@ -10983,7 +10983,7 @@ xmlXPathCompileExpr(xmlXPathParserContextPtr ctxt, int sort) {
|
21
|
+
}
|
22
|
+
|
23
|
+
if (xpctxt != NULL)
|
24
|
+
- xpctxt->depth -= 1;
|
25
|
+
+ xpctxt->depth -= 10;
|
26
|
+
}
|
27
|
+
|
28
|
+
/**
|
29
|
+
--
|
30
|
+
2.31.0
|
31
|
+
|