nokogiri 1.11.2 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +7 -6
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +185 -103
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +6 -5
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +70 -38
- data/ext/nokogiri/nokogiri.h +19 -9
- data/ext/nokogiri/xml_document.c +50 -49
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_encoding_handler.c +11 -6
- data/ext/nokogiri/xml_namespace.c +4 -2
- data/ext/nokogiri/xml_node.c +147 -133
- data/ext/nokogiri/xml_node_set.c +20 -20
- data/ext/nokogiri/xml_reader.c +2 -0
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +2 -0
- data/ext/nokogiri/xml_schema.c +2 -0
- data/ext/nokogiri/xml_xpath_context.c +67 -65
- data/ext/nokogiri/xslt_stylesheet.c +2 -1
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/parser.rb +1 -1
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/extension.rb +7 -2
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +3 -3
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +12 -2
- data/lib/nokogiri/xml/builder.rb +38 -0
- data/lib/nokogiri/xml/document.rb +89 -17
- data/lib/nokogiri/xml/node/save_options.rb +1 -1
- data/lib/nokogiri/xml/node.rb +7 -5
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/xpath.rb +2 -2
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri.rb +31 -29
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +110 -69
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -14,11 +14,12 @@ module Nokogiri
|
|
14
14
|
# Nokogiri::XML::Searchable#xpath
|
15
15
|
#
|
16
16
|
class Document < Nokogiri::XML::Node
|
17
|
-
#
|
18
|
-
#
|
17
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
18
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
19
|
+
# characters in NCNAMEs.
|
19
20
|
NCNAME_START_CHAR = "A-Za-z_"
|
20
|
-
NCNAME_CHAR = NCNAME_START_CHAR + "
|
21
|
-
NCNAME_RE = /^xmlns(
|
21
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
22
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
22
23
|
|
23
24
|
##
|
24
25
|
# Parse an XML file.
|
@@ -112,39 +113,110 @@ module Nokogiri
|
|
112
113
|
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
113
114
|
attr_accessor :errors
|
114
115
|
|
116
|
+
# When true, reparented elements without a namespace will inherit their new parent's
|
117
|
+
# namespace (if one exists). Defaults to +false+.
|
118
|
+
#
|
119
|
+
# @example Default behavior of namespace inheritance
|
120
|
+
# xml = <<~EOF
|
121
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
122
|
+
# <foo:parent>
|
123
|
+
# </foo:parent>
|
124
|
+
# </root>
|
125
|
+
# EOF
|
126
|
+
# doc = Nokogiri::XML(xml)
|
127
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
128
|
+
# parent.add_child("<child></child>")
|
129
|
+
# doc.to_xml
|
130
|
+
# # => <?xml version="1.0"?>
|
131
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
132
|
+
# # <foo:parent>
|
133
|
+
# # <child/>
|
134
|
+
# # </foo:parent>
|
135
|
+
# # </root>
|
136
|
+
#
|
137
|
+
# @example Setting namespace inheritance to +true+
|
138
|
+
# xml = <<~EOF
|
139
|
+
# <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
140
|
+
# <foo:parent>
|
141
|
+
# </foo:parent>
|
142
|
+
# </root>
|
143
|
+
# EOF
|
144
|
+
# doc = Nokogiri::XML(xml)
|
145
|
+
# doc.namespace_inheritance = true
|
146
|
+
# parent = doc.at_xpath("//foo:parent", "foo" => "http://nokogiri.org/default_ns/test/foo")
|
147
|
+
# parent.add_child("<child></child>")
|
148
|
+
# doc.to_xml
|
149
|
+
# # => <?xml version="1.0"?>
|
150
|
+
# # <root xmlns:foo="http://nokogiri.org/default_ns/test/foo">
|
151
|
+
# # <foo:parent>
|
152
|
+
# # <foo:child/>
|
153
|
+
# # </foo:parent>
|
154
|
+
# # </root>
|
155
|
+
#
|
156
|
+
# @return [Boolean]
|
157
|
+
#
|
158
|
+
# @since v1.12.4
|
159
|
+
attr_accessor :namespace_inheritance
|
160
|
+
|
115
161
|
def initialize *args # :nodoc:
|
116
162
|
@errors = []
|
117
163
|
@decorators = nil
|
164
|
+
@namespace_inheritance = false
|
118
165
|
end
|
119
166
|
|
120
167
|
##
|
121
|
-
# Create
|
168
|
+
# Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
|
169
|
+
# setting contents or attributes.
|
170
|
+
#
|
171
|
+
# Arguments may be passed to initialize the element:
|
172
|
+
# - a +Hash+ argument will be used to set attributes
|
173
|
+
# - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
|
174
|
+
#
|
175
|
+
# A block may be passed to mutate the node.
|
176
|
+
#
|
177
|
+
# @param name [String]
|
178
|
+
# @param contents_or_attrs [#to_s,Hash]
|
179
|
+
# @yieldparam node [Nokogiri::XML::Element]
|
180
|
+
# @return [Nokogiri::XML::Element]
|
181
|
+
#
|
182
|
+
# @example An empty element without attributes
|
183
|
+
# doc.create_element("div")
|
184
|
+
# # => <div></div>
|
185
|
+
#
|
186
|
+
# @example An element with contents
|
187
|
+
# doc.create_element("div", "contents")
|
188
|
+
# # => <div>contents</div>
|
189
|
+
#
|
190
|
+
# @example An element with attributes
|
191
|
+
# doc.create_element("div", {"class" => "container"})
|
192
|
+
# # => <div class='container'></div>
|
193
|
+
#
|
194
|
+
# @example An element with contents and attributes
|
195
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
196
|
+
# # => <div class='container'>contents</div>
|
122
197
|
#
|
123
|
-
#
|
124
|
-
# doc.create_element
|
125
|
-
# doc.create_element "div", "contents" # <div>contents</div>
|
126
|
-
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
127
|
-
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
198
|
+
# @example Passing a block to mutate the element
|
199
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
128
200
|
#
|
129
|
-
def create_element
|
201
|
+
def create_element(name, *contents_or_attrs, &block)
|
130
202
|
elm = Nokogiri::XML::Element.new(name, self, &block)
|
131
|
-
|
203
|
+
contents_or_attrs.each do |arg|
|
132
204
|
case arg
|
133
205
|
when Hash
|
134
|
-
arg.each
|
206
|
+
arg.each do |k, v|
|
135
207
|
key = k.to_s
|
136
208
|
if key =~ NCNAME_RE
|
137
|
-
ns_name =
|
138
|
-
elm.add_namespace_definition
|
209
|
+
ns_name = Regexp.last_match(1)
|
210
|
+
elm.add_namespace_definition(ns_name, v)
|
139
211
|
else
|
140
212
|
elm[k.to_s] = v.to_s
|
141
213
|
end
|
142
|
-
|
214
|
+
end
|
143
215
|
else
|
144
216
|
elm.content = arg
|
145
217
|
end
|
146
218
|
end
|
147
|
-
if ns = elm.namespace_definitions.find { |n| n.prefix.nil?
|
219
|
+
if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
|
148
220
|
elm.namespace = ns
|
149
221
|
end
|
150
222
|
elm
|
@@ -34,7 +34,7 @@ module Nokogiri
|
|
34
34
|
DEFAULT_HTML = FORMAT | NO_DECLARATION | NO_EMPTY_TAGS | AS_HTML
|
35
35
|
end
|
36
36
|
# the default for XHTML document
|
37
|
-
DEFAULT_XHTML = FORMAT | NO_DECLARATION |
|
37
|
+
DEFAULT_XHTML = FORMAT | NO_DECLARATION | AS_XHTML
|
38
38
|
|
39
39
|
# Integer representation of the SaveOptions
|
40
40
|
attr_reader :options
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,7 +1,6 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
require "stringio"
|
4
|
-
require "nokogiri/xml/node/save_options"
|
5
4
|
|
6
5
|
module Nokogiri
|
7
6
|
module XML
|
@@ -93,6 +92,7 @@ module Nokogiri
|
|
93
92
|
# Create a new node with +name+ sharing GC lifecycle with +document+.
|
94
93
|
# @param name [String]
|
95
94
|
# @param document [Nokogiri::XML::Document]
|
95
|
+
# @yieldparam node [Nokogiri::XML::Node]
|
96
96
|
# @return [Nokogiri::XML::Node]
|
97
97
|
# @see Nokogiri::XML::Node.new
|
98
98
|
def initialize(name, document)
|
@@ -836,7 +836,7 @@ module Nokogiri
|
|
836
836
|
node_set = in_context(contents, options.to_i)
|
837
837
|
if (node_set.empty? && (document.errors.length > error_count))
|
838
838
|
if options.recover?
|
839
|
-
fragment = Nokogiri::
|
839
|
+
fragment = Nokogiri::HTML4::DocumentFragment.parse contents
|
840
840
|
node_set = fragment.children
|
841
841
|
else
|
842
842
|
raise document.errors[error_count]
|
@@ -882,7 +882,7 @@ module Nokogiri
|
|
882
882
|
type == DOCUMENT_NODE
|
883
883
|
end
|
884
884
|
|
885
|
-
# Returns true if this is an
|
885
|
+
# Returns true if this is an HTML4::Document node
|
886
886
|
def html?
|
887
887
|
type == HTML_DOCUMENT_NODE
|
888
888
|
end
|
@@ -908,11 +908,11 @@ module Nokogiri
|
|
908
908
|
end
|
909
909
|
|
910
910
|
###
|
911
|
-
# Fetch the Nokogiri::
|
911
|
+
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
912
912
|
# nil on XML documents and on unknown tags.
|
913
913
|
def description
|
914
914
|
return nil if document.xml?
|
915
|
-
Nokogiri::
|
915
|
+
Nokogiri::HTML4::ElementDescription[name]
|
916
916
|
end
|
917
917
|
|
918
918
|
###
|
@@ -1234,3 +1234,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
1234
1234
|
end
|
1235
1235
|
end
|
1236
1236
|
end
|
1237
|
+
|
1238
|
+
require_relative "node/save_options"
|
@@ -71,6 +71,8 @@ module Nokogiri
|
|
71
71
|
|
72
72
|
# the default options used for parsing XML documents
|
73
73
|
DEFAULT_XML = RECOVER | NONET
|
74
|
+
# the default options used for parsing XSLT stylesheets
|
75
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
|
74
76
|
# the default options used for parsing HTML documents
|
75
77
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
76
78
|
# the default options used for parsing XML schemas
|
data/lib/nokogiri/xml/pp.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
2
|
+
require_relative "pp/node"
|
3
|
+
require_relative "pp/character_data"
|
@@ -2,20 +2,19 @@
|
|
2
2
|
module Nokogiri
|
3
3
|
module XML
|
4
4
|
###
|
5
|
-
# SAX Parsers are event driven parsers.
|
6
|
-
#
|
7
|
-
#
|
5
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
6
|
+
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
7
|
+
# Nokogiri::HTML4::SAX.
|
8
8
|
#
|
9
|
-
# The basic way a SAX style parser works is by creating a parser,
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# it encounters events you said you would like to know about.
|
9
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
10
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
11
|
+
# you when it encounters events you said you would like to know about.
|
13
12
|
#
|
14
|
-
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
15
|
-
#
|
13
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
14
|
+
# methods for which you would like notification.
|
16
15
|
#
|
17
|
-
# For example, if I want to be notified when a document ends, and when an
|
18
|
-
#
|
16
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
17
|
+
# would write a class like this:
|
19
18
|
#
|
20
19
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
21
20
|
# def end_document
|
@@ -27,8 +26,7 @@ module Nokogiri
|
|
27
26
|
# end
|
28
27
|
# end
|
29
28
|
#
|
30
|
-
# Then I would instantiate a SAX parser with this document, and feed the
|
31
|
-
# parser some XML
|
29
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
32
30
|
#
|
33
31
|
# # Create a new parser
|
34
32
|
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
@@ -36,25 +34,21 @@ module Nokogiri
|
|
36
34
|
# # Feed the parser some XML
|
37
35
|
# parser.parse(File.open(ARGV[0]))
|
38
36
|
#
|
39
|
-
# Now my document handler will be called when each node starts, and when
|
40
|
-
#
|
41
|
-
# a look at Nokogiri::XML::SAX::Document.
|
37
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
38
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
42
39
|
#
|
43
|
-
# Two SAX parsers for XML are available, a parser that reads from a string
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
40
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
41
|
+
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
42
|
+
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
47
43
|
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
48
44
|
module SAX
|
49
45
|
###
|
50
|
-
# This class is used for registering types of events you are interested
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# you are interested in knowing about.
|
46
|
+
# This class is used for registering types of events you are interested in handling. All of
|
47
|
+
# the methods on this class are available as possible events while parsing an XML document. To
|
48
|
+
# register for any particular event, just subclass this class and implement the methods you
|
49
|
+
# are interested in knowing about.
|
55
50
|
#
|
56
|
-
# To only be notified about start and end element events, write a class
|
57
|
-
# like this:
|
51
|
+
# To only be notified about start and end element events, write a class like this:
|
58
52
|
#
|
59
53
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
60
54
|
# def start_element name, attrs = []
|
@@ -66,8 +60,8 @@ module Nokogiri
|
|
66
60
|
# end
|
67
61
|
# end
|
68
62
|
#
|
69
|
-
# You can use this event handler for any SAX style parser included with
|
70
|
-
# Nokogiri
|
63
|
+
# You can use this event handler for any SAX style parser included with Nokogiri. See
|
64
|
+
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
|
71
65
|
class Document
|
72
66
|
###
|
73
67
|
# Called when an XML declaration is parsed
|
@@ -129,7 +123,7 @@ module Nokogiri
|
|
129
123
|
end
|
130
124
|
|
131
125
|
###
|
132
|
-
# Characters read between a tag.
|
126
|
+
# Characters read between a tag. This method might be called multiple
|
133
127
|
# times given one contiguous string of characters.
|
134
128
|
#
|
135
129
|
# +string+ contains the character data
|
data/lib/nokogiri/xml/sax.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
require_relative "sax/document"
|
3
|
+
require_relative "sax/parser_context"
|
4
|
+
require_relative "sax/parser"
|
5
|
+
require_relative "sax/push_parser"
|
data/lib/nokogiri/xml/xpath.rb
CHANGED
data/lib/nokogiri/xml.rb
CHANGED
@@ -1,38 +1,9 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri/xml/pp'
|
3
|
-
require 'nokogiri/xml/parse_options'
|
4
|
-
require 'nokogiri/xml/sax'
|
5
|
-
require 'nokogiri/xml/searchable'
|
6
|
-
require 'nokogiri/xml/node'
|
7
|
-
require 'nokogiri/xml/attribute_decl'
|
8
|
-
require 'nokogiri/xml/element_decl'
|
9
|
-
require 'nokogiri/xml/element_content'
|
10
|
-
require 'nokogiri/xml/character_data'
|
11
|
-
require 'nokogiri/xml/namespace'
|
12
|
-
require 'nokogiri/xml/attr'
|
13
|
-
require 'nokogiri/xml/dtd'
|
14
|
-
require 'nokogiri/xml/cdata'
|
15
|
-
require 'nokogiri/xml/text'
|
16
|
-
require 'nokogiri/xml/document'
|
17
|
-
require 'nokogiri/xml/document_fragment'
|
18
|
-
require 'nokogiri/xml/processing_instruction'
|
19
|
-
require 'nokogiri/xml/node_set'
|
20
|
-
require 'nokogiri/xml/syntax_error'
|
21
|
-
require 'nokogiri/xml/xpath'
|
22
|
-
require 'nokogiri/xml/xpath_context'
|
23
|
-
require 'nokogiri/xml/builder'
|
24
|
-
require 'nokogiri/xml/reader'
|
25
|
-
require 'nokogiri/xml/notation'
|
26
|
-
require 'nokogiri/xml/entity_decl'
|
27
|
-
require 'nokogiri/xml/entity_reference'
|
28
|
-
require 'nokogiri/xml/schema'
|
29
|
-
require 'nokogiri/xml/relax_ng'
|
30
|
-
|
31
2
|
module Nokogiri
|
32
3
|
class << self
|
33
4
|
###
|
34
5
|
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
35
|
-
def XML
|
6
|
+
def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
|
36
7
|
Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
|
37
8
|
end
|
38
9
|
end
|
@@ -41,20 +12,19 @@ module Nokogiri
|
|
41
12
|
# Original C14N 1.0 spec canonicalization
|
42
13
|
XML_C14N_1_0 = 0
|
43
14
|
# Exclusive C14N 1.0 spec canonicalization
|
44
|
-
XML_C14N_EXCLUSIVE_1_0 =
|
15
|
+
XML_C14N_EXCLUSIVE_1_0 = 1
|
45
16
|
# C14N 1.1 spec canonicalization
|
46
17
|
XML_C14N_1_1 = 2
|
47
18
|
class << self
|
48
19
|
###
|
49
20
|
# Parse an XML document using the Nokogiri::XML::Reader API. See
|
50
21
|
# Nokogiri::XML::Reader for mor information
|
51
|
-
def Reader
|
52
|
-
|
22
|
+
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
|
53
23
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
54
24
|
# Give the options to the user
|
55
25
|
yield options if block_given?
|
56
26
|
|
57
|
-
if string_or_io.respond_to?
|
27
|
+
if string_or_io.respond_to?(:read)
|
58
28
|
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
59
29
|
end
|
60
30
|
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
@@ -62,15 +32,44 @@ module Nokogiri
|
|
62
32
|
|
63
33
|
###
|
64
34
|
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
65
|
-
def parse
|
35
|
+
def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
|
66
36
|
Document.parse(thing, url, encoding, options, &block)
|
67
37
|
end
|
68
38
|
|
69
39
|
####
|
70
40
|
# Parse a fragment from +string+ in to a NodeSet.
|
71
|
-
def fragment
|
41
|
+
def fragment(string)
|
72
42
|
XML::DocumentFragment.parse(string)
|
73
43
|
end
|
74
44
|
end
|
75
45
|
end
|
76
46
|
end
|
47
|
+
|
48
|
+
require_relative "xml/pp"
|
49
|
+
require_relative "xml/parse_options"
|
50
|
+
require_relative "xml/sax"
|
51
|
+
require_relative "xml/searchable"
|
52
|
+
require_relative "xml/node"
|
53
|
+
require_relative "xml/attribute_decl"
|
54
|
+
require_relative "xml/element_decl"
|
55
|
+
require_relative "xml/element_content"
|
56
|
+
require_relative "xml/character_data"
|
57
|
+
require_relative "xml/namespace"
|
58
|
+
require_relative "xml/attr"
|
59
|
+
require_relative "xml/dtd"
|
60
|
+
require_relative "xml/cdata"
|
61
|
+
require_relative "xml/text"
|
62
|
+
require_relative "xml/document"
|
63
|
+
require_relative "xml/document_fragment"
|
64
|
+
require_relative "xml/processing_instruction"
|
65
|
+
require_relative "xml/node_set"
|
66
|
+
require_relative "xml/syntax_error"
|
67
|
+
require_relative "xml/xpath"
|
68
|
+
require_relative "xml/xpath_context"
|
69
|
+
require_relative "xml/builder"
|
70
|
+
require_relative "xml/reader"
|
71
|
+
require_relative "xml/notation"
|
72
|
+
require_relative "xml/entity_decl"
|
73
|
+
require_relative "xml/entity_reference"
|
74
|
+
require_relative "xml/schema"
|
75
|
+
require_relative "xml/relax_ng"
|
@@ -18,7 +18,7 @@ module Nokogiri
|
|
18
18
|
# Apply an XSLT stylesheet to an XML::Document.
|
19
19
|
# +params+ is an array of strings used as XSLT parameters.
|
20
20
|
# returns serialized document
|
21
|
-
def apply_to
|
21
|
+
def apply_to(document, params = [])
|
22
22
|
serialize(transform(document, params))
|
23
23
|
end
|
24
24
|
end
|
data/lib/nokogiri/xslt.rb
CHANGED
@@ -1,6 +1,4 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
require 'nokogiri/xslt/stylesheet'
|
3
|
-
|
4
2
|
module Nokogiri
|
5
3
|
class << self
|
6
4
|
###
|
@@ -22,32 +20,32 @@ module Nokogiri
|
|
22
20
|
class << self
|
23
21
|
###
|
24
22
|
# Parse the stylesheet in +string+, register any +modules+
|
25
|
-
def parse
|
23
|
+
def parse(string, modules = {})
|
26
24
|
modules.each do |url, klass|
|
27
|
-
XSLT.register
|
25
|
+
XSLT.register(url, klass)
|
28
26
|
end
|
29
27
|
|
28
|
+
doc = XML::Document.parse(string, nil, nil, XML::ParseOptions::DEFAULT_XSLT)
|
30
29
|
if Nokogiri.jruby?
|
31
|
-
Stylesheet.parse_stylesheet_doc(
|
30
|
+
Stylesheet.parse_stylesheet_doc(doc, string)
|
32
31
|
else
|
33
|
-
Stylesheet.parse_stylesheet_doc(
|
32
|
+
Stylesheet.parse_stylesheet_doc(doc)
|
34
33
|
end
|
35
34
|
end
|
36
35
|
|
37
36
|
###
|
38
37
|
# Quote parameters in +params+ for stylesheet safety
|
39
|
-
def quote_params
|
38
|
+
def quote_params(params)
|
40
39
|
parray = (params.instance_of?(Hash) ? params.to_a.flatten : params).dup
|
41
|
-
parray.each_with_index do |v,i|
|
42
|
-
if i % 2 > 0
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
end
|
40
|
+
parray.each_with_index do |v, i|
|
41
|
+
parray[i] = if i % 2 > 0
|
42
|
+
if v =~ /'/
|
43
|
+
"concat('#{v.gsub(/'/, %q{', "'", '})}')"
|
44
|
+
else
|
45
|
+
"'#{v}'"
|
46
|
+
end
|
49
47
|
else
|
50
|
-
|
48
|
+
v.to_s
|
51
49
|
end
|
52
50
|
end
|
53
51
|
parray.flatten
|
@@ -55,3 +53,5 @@ module Nokogiri
|
|
55
53
|
end
|
56
54
|
end
|
57
55
|
end
|
56
|
+
|
57
|
+
require_relative "xslt/stylesheet"
|
data/lib/nokogiri.rb
CHANGED
@@ -2,38 +2,29 @@
|
|
2
2
|
# frozen_string_literal: true
|
3
3
|
# Modify the PATH on windows so that the external DLLs will get loaded.
|
4
4
|
|
5
|
-
require
|
5
|
+
require "rbconfig"
|
6
6
|
|
7
7
|
if defined?(RUBY_ENGINE) && RUBY_ENGINE == "jruby"
|
8
|
-
|
8
|
+
require_relative "nokogiri/jruby/dependencies"
|
9
9
|
end
|
10
10
|
|
11
|
-
|
12
|
-
|
13
|
-
require 'nokogiri/version'
|
14
|
-
require 'nokogiri/syntax_error'
|
15
|
-
require 'nokogiri/xml'
|
16
|
-
require 'nokogiri/xslt'
|
17
|
-
require 'nokogiri/html'
|
18
|
-
require 'nokogiri/decorators/slop'
|
19
|
-
require 'nokogiri/css'
|
20
|
-
require 'nokogiri/html/builder'
|
11
|
+
require_relative "nokogiri/extension"
|
21
12
|
|
22
13
|
# Nokogiri parses and searches XML/HTML very quickly, and also has
|
23
14
|
# correctly implemented CSS3 selector support as well as XPath 1.0
|
24
15
|
# support.
|
25
16
|
#
|
26
17
|
# Parsing a document returns either a Nokogiri::XML::Document, or a
|
27
|
-
# Nokogiri::
|
18
|
+
# Nokogiri::HTML4::Document depending on the kind of document you parse.
|
28
19
|
#
|
29
20
|
# Here is an example:
|
30
21
|
#
|
31
22
|
# require 'nokogiri'
|
32
23
|
# require 'open-uri'
|
33
24
|
#
|
34
|
-
# # Get a Nokogiri::
|
25
|
+
# # Get a Nokogiri::HTML4::Document for the page we’re interested in...
|
35
26
|
#
|
36
|
-
# doc = Nokogiri::
|
27
|
+
# doc = Nokogiri::HTML4(URI.open('http://www.google.com/search?q=tenderlove'))
|
37
28
|
#
|
38
29
|
# # Do funky things with it using Nokogiri::XML::Node methods...
|
39
30
|
#
|
@@ -49,27 +40,27 @@ module Nokogiri
|
|
49
40
|
class << self
|
50
41
|
###
|
51
42
|
# Parse an HTML or XML document. +string+ contains the document.
|
52
|
-
def parse
|
43
|
+
def parse(string, url = nil, encoding = nil, options = nil)
|
53
44
|
if string.respond_to?(:read) ||
|
54
45
|
/^\s*<(?:!DOCTYPE\s+)?html[\s>]/i === string[0, 512]
|
55
46
|
# Expect an HTML indicator to appear within the first 512
|
56
47
|
# characters of a document. (<?xml ?> + <?xml-stylesheet ?>
|
57
48
|
# shouldn't be that long)
|
58
|
-
Nokogiri.
|
49
|
+
Nokogiri.HTML4(string, url, encoding,
|
59
50
|
options || XML::ParseOptions::DEFAULT_HTML)
|
60
51
|
else
|
61
52
|
Nokogiri.XML(string, url, encoding,
|
62
53
|
options || XML::ParseOptions::DEFAULT_XML)
|
63
|
-
end.tap
|
54
|
+
end.tap do |doc|
|
64
55
|
yield doc if block_given?
|
65
|
-
|
56
|
+
end
|
66
57
|
end
|
67
58
|
|
68
59
|
###
|
69
60
|
# Create a new Nokogiri::XML::DocumentFragment
|
70
|
-
def make
|
61
|
+
def make(input = nil, opts = {}, &blk)
|
71
62
|
if input
|
72
|
-
Nokogiri::
|
63
|
+
Nokogiri::HTML4.fragment(input).children.first
|
73
64
|
else
|
74
65
|
Nokogiri(&blk)
|
75
66
|
end
|
@@ -98,10 +89,10 @@ module Nokogiri
|
|
98
89
|
# Make sure to support some popular encoding aliases not known by
|
99
90
|
# all iconv implementations.
|
100
91
|
{
|
101
|
-
|
102
|
-
}.each
|
92
|
+
"Windows-31J" => "CP932", # Windows-31J is the IANA registered name of CP932.
|
93
|
+
}.each do |alias_name, name|
|
103
94
|
EncodingHandler.alias(name, alias_name) if EncodingHandler[alias_name].nil?
|
104
|
-
|
95
|
+
end
|
105
96
|
end
|
106
97
|
end
|
107
98
|
|
@@ -109,15 +100,26 @@ module Nokogiri
|
|
109
100
|
end
|
110
101
|
|
111
102
|
###
|
112
|
-
# Parse a document contained in +args+. Nokogiri will try to guess what
|
113
|
-
#
|
114
|
-
# Nokogiri.parse
|
103
|
+
# Parse a document contained in +args+. Nokogiri will try to guess what type of document you are
|
104
|
+
# attempting to parse. For more information, see Nokogiri.parse
|
115
105
|
#
|
116
|
-
# To specify the type of document, use Nokogiri.XML or Nokogiri.
|
106
|
+
# To specify the type of document, use {Nokogiri.XML}, {Nokogiri.HTML4}, or {Nokogiri.HTML5}.
|
117
107
|
def Nokogiri(*args, &block)
|
118
108
|
if block_given?
|
119
|
-
Nokogiri::
|
109
|
+
Nokogiri::HTML4::Builder.new(&block).doc.root
|
120
110
|
else
|
121
111
|
Nokogiri.parse(*args)
|
122
112
|
end
|
123
113
|
end
|
114
|
+
|
115
|
+
require_relative "nokogiri/version"
|
116
|
+
require_relative "nokogiri/syntax_error"
|
117
|
+
require_relative "nokogiri/xml"
|
118
|
+
require_relative "nokogiri/xslt"
|
119
|
+
require_relative "nokogiri/html4"
|
120
|
+
require_relative "nokogiri/html"
|
121
|
+
require_relative "nokogiri/decorators/slop"
|
122
|
+
require_relative "nokogiri/css"
|
123
|
+
require_relative "nokogiri/html4/builder"
|
124
|
+
|
125
|
+
require_relative "nokogiri/html5" if Nokogiri.uses_gumbo?
|
File without changes
|
File without changes
|
@@ -16,7 +16,7 @@ index cf96d41..1372d8b 100644
|
|
16
16
|
}
|
17
17
|
|
18
18
|
-libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
|
19
|
-
+$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
|
19
|
+
+$(top_builddir)/libxml2.la: $(libxml2_la_OBJECTS) $(libxml2_la_DEPENDENCIES) $(EXTRA_libxml2_la_DEPENDENCIES)
|
20
20
|
$(AM_V_CCLD)$(libxml2_la_LINK) -rpath $(libdir) $(libxml2_la_OBJECTS) $(libxml2_la_LIBADD) $(LIBS)
|
21
21
|
|
22
22
|
testdso.la: $(testdso_la_OBJECTS) $(testdso_la_DEPENDENCIES) $(EXTRA_testdso_la_DEPENDENCIES)
|