nokogiri 1.11.1 → 1.12.0.rc1
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/LICENSE-DEPENDENCIES.md +232 -11
- data/LICENSE.md +1 -1
- data/README.md +27 -21
- data/dependencies.yml +12 -12
- data/ext/nokogiri/depend +35 -474
- data/ext/nokogiri/extconf.rb +391 -243
- data/ext/nokogiri/gumbo.c +611 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +18 -23
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +29 -27
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +206 -66
- data/ext/nokogiri/nokogiri.h +166 -76
- data/ext/nokogiri/test_global_handlers.c +3 -4
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +258 -200
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +58 -49
- data/ext/nokogiri/xml_node.c +473 -414
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +193 -157
- data/ext/nokogiri/xml_relax_ng.c +29 -23
- data/ext/nokogiri/xml_sax_parser.c +111 -106
- data/ext/nokogiri/xml_sax_parser_context.c +102 -85
- data/ext/nokogiri/xml_sax_push_parser.c +34 -27
- data/ext/nokogiri/xml_schema.c +49 -41
- data/ext/nokogiri/xml_syntax_error.c +21 -23
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +86 -77
- data/ext/nokogiri/xslt_stylesheet.c +157 -156
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +17 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri.rb +31 -50
- data/lib/nokogiri/css.rb +14 -14
- data/lib/nokogiri/css/parser.rb +2 -2
- data/lib/nokogiri/css/parser.y +1 -1
- data/lib/nokogiri/css/syntax_error.rb +1 -1
- data/lib/nokogiri/extension.rb +26 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +31 -27
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/{html → html4}/builder.rb +2 -2
- data/lib/nokogiri/{html → html4}/document.rb +4 -4
- data/lib/nokogiri/{html → html4}/document_fragment.rb +17 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +1 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +1 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +1 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +11 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +5 -5
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/version/constant.rb +1 -1
- data/lib/nokogiri/version/info.rb +42 -9
- data/lib/nokogiri/xml.rb +35 -36
- data/lib/nokogiri/xml/document.rb +74 -28
- data/lib/nokogiri/xml/node.rb +45 -47
- data/lib/nokogiri/xml/parse_options.rb +2 -0
- data/lib/nokogiri/xml/pp.rb +2 -2
- data/lib/nokogiri/xml/reader.rb +2 -9
- data/lib/nokogiri/xml/sax.rb +4 -4
- data/lib/nokogiri/xml/sax/document.rb +24 -30
- data/lib/nokogiri/xml/xpath.rb +3 -5
- data/lib/nokogiri/xml/xpath/syntax_error.rb +1 -1
- data/lib/nokogiri/xslt.rb +16 -16
- data/lib/nokogiri/xslt/stylesheet.rb +1 -1
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +0 -0
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- metadata +117 -109
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -118
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -63
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -25
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
# coding: utf-8
|
1
2
|
# frozen_string_literal: true
|
2
3
|
|
3
4
|
require 'pathname'
|
@@ -13,11 +14,12 @@ module Nokogiri
|
|
13
14
|
# Nokogiri::XML::Searchable#xpath
|
14
15
|
#
|
15
16
|
class Document < Nokogiri::XML::Node
|
16
|
-
#
|
17
|
-
#
|
17
|
+
# See http://www.w3.org/TR/REC-xml-names/#ns-decl for more details. Note that we're not
|
18
|
+
# attempting to handle unicode characters partly because libxml2 doesn't handle unicode
|
19
|
+
# characters in NCNAMEs.
|
18
20
|
NCNAME_START_CHAR = "A-Za-z_"
|
19
|
-
NCNAME_CHAR = NCNAME_START_CHAR + "
|
20
|
-
NCNAME_RE = /^xmlns(
|
21
|
+
NCNAME_CHAR = NCNAME_START_CHAR + "\\-\\.0-9"
|
22
|
+
NCNAME_RE = /^xmlns(?::([#{NCNAME_START_CHAR}][#{NCNAME_CHAR}]*))?$/
|
21
23
|
|
22
24
|
##
|
23
25
|
# Parse an XML file.
|
@@ -79,6 +81,35 @@ module Nokogiri
|
|
79
81
|
return doc
|
80
82
|
end
|
81
83
|
|
84
|
+
##
|
85
|
+
# @!method wrap(java_document)
|
86
|
+
# @!scope class
|
87
|
+
#
|
88
|
+
# Create a {Document} using an existing Java DOM document object.
|
89
|
+
#
|
90
|
+
# The returned {Document} shares the same underlying data structure as the Java object, so
|
91
|
+
# changes in one are reflected in the other.
|
92
|
+
#
|
93
|
+
# @param java_document [Java::OrgW3cDom::Document]
|
94
|
+
# @return [Nokogiri::XML::Document]
|
95
|
+
# @note This method is only available when running JRuby.
|
96
|
+
# @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
|
97
|
+
# @see #to_java
|
98
|
+
|
99
|
+
##
|
100
|
+
# @!method to_java()
|
101
|
+
#
|
102
|
+
# Returns the underlying Java DOM document object for the {Document}.
|
103
|
+
#
|
104
|
+
# The returned Java object shares the same underlying data structure as the {Document}, so
|
105
|
+
# changes in one are reflected in the other.
|
106
|
+
#
|
107
|
+
# @return [Java::OrgW3cDom::Document]
|
108
|
+
# @note This method is only available when running JRuby.
|
109
|
+
# @note The class +Java::OrgW3cDom::Document+ is also accessible as +org.w3c.dom.Document+.
|
110
|
+
# @see .wrap
|
111
|
+
|
112
|
+
|
82
113
|
# A list of Nokogiri::XML::SyntaxError found when parsing a document
|
83
114
|
attr_accessor :errors
|
84
115
|
|
@@ -88,33 +119,58 @@ module Nokogiri
|
|
88
119
|
end
|
89
120
|
|
90
121
|
##
|
91
|
-
# Create
|
122
|
+
# Create a new +Element+ with +name+ sharing GC lifecycle with the document, optionally
|
123
|
+
# setting contents or attributes.
|
124
|
+
#
|
125
|
+
# Arguments may be passed to initialize the element:
|
126
|
+
# - a +Hash+ argument will be used to set attributes
|
127
|
+
# - a non-Hash object that responds to +#to_s+ will be used to set the new node's contents
|
92
128
|
#
|
93
|
-
#
|
94
|
-
# doc.create_element "div", :class => "container" # <div class='container'></div>
|
95
|
-
# doc.create_element "div", "contents" # <div>contents</div>
|
96
|
-
# doc.create_element "div", "contents", :class => "container" # <div class='container'>contents</div>
|
97
|
-
# doc.create_element "div" { |node| node['class'] = "container" } # <div class='container'></div>
|
129
|
+
# A block may be passed to mutate the node.
|
98
130
|
#
|
99
|
-
|
131
|
+
# @param name [String]
|
132
|
+
# @param contents_or_attrs [#to_s,Hash]
|
133
|
+
# @yieldparam node [Nokogiri::XML::Element]
|
134
|
+
# @return [Nokogiri::XML::Element]
|
135
|
+
#
|
136
|
+
# @example An empty element without attributes
|
137
|
+
# doc.create_element("div")
|
138
|
+
# # => <div></div>
|
139
|
+
#
|
140
|
+
# @example An element with contents
|
141
|
+
# doc.create_element("div", "contents")
|
142
|
+
# # => <div>contents</div>
|
143
|
+
#
|
144
|
+
# @example An element with attributes
|
145
|
+
# doc.create_element("div", {"class" => "container"})
|
146
|
+
# # => <div class='container'></div>
|
147
|
+
#
|
148
|
+
# @example An element with contents and attributes
|
149
|
+
# doc.create_element("div", "contents", {"class" => "container"})
|
150
|
+
# # => <div class='container'>contents</div>
|
151
|
+
#
|
152
|
+
# @example Passing a block to mutate the element
|
153
|
+
# doc.create_element("div") { |node| node["class"] = "blue" if before_noon? }
|
154
|
+
#
|
155
|
+
def create_element(name, *contents_or_attrs, &block)
|
100
156
|
elm = Nokogiri::XML::Element.new(name, self, &block)
|
101
|
-
|
157
|
+
contents_or_attrs.each do |arg|
|
102
158
|
case arg
|
103
159
|
when Hash
|
104
|
-
arg.each
|
160
|
+
arg.each do |k, v|
|
105
161
|
key = k.to_s
|
106
162
|
if key =~ NCNAME_RE
|
107
|
-
ns_name =
|
108
|
-
elm.add_namespace_definition
|
163
|
+
ns_name = Regexp.last_match(1)
|
164
|
+
elm.add_namespace_definition(ns_name, v)
|
109
165
|
else
|
110
166
|
elm[k.to_s] = v.to_s
|
111
167
|
end
|
112
|
-
|
168
|
+
end
|
113
169
|
else
|
114
170
|
elm.content = arg
|
115
171
|
end
|
116
172
|
end
|
117
|
-
if ns = elm.namespace_definitions.find { |n| n.prefix.nil?
|
173
|
+
if ns = elm.namespace_definitions.find { |n| n.prefix.nil? || (n.prefix == '') }
|
118
174
|
elm.namespace = ns
|
119
175
|
end
|
120
176
|
elm
|
@@ -262,24 +318,14 @@ module Nokogiri
|
|
262
318
|
end
|
263
319
|
alias :<< :add_child
|
264
320
|
|
265
|
-
##
|
266
|
-
# +JRuby+
|
267
|
-
# Wraps Java's org.w3c.dom.document and returns Nokogiri::XML::Document
|
268
|
-
def self.wrap(document) end if false # native-ext provides Document.wrap
|
269
|
-
|
270
|
-
##
|
271
|
-
# +JRuby+
|
272
|
-
# Returns Java's org.w3c.dom.document of this Document.
|
273
|
-
def to_java; end if false # JRuby provides #to_java
|
274
|
-
|
275
321
|
private
|
322
|
+
|
276
323
|
def self.empty_doc? string_or_io
|
277
324
|
string_or_io.nil? ||
|
278
325
|
(string_or_io.respond_to?(:empty?) && string_or_io.empty?) ||
|
279
326
|
(string_or_io.respond_to?(:eof?) && string_or_io.eof?)
|
280
327
|
end
|
281
328
|
|
282
|
-
# @private
|
283
329
|
IMPLIED_XPATH_CONTEXTS = [ '//'.freeze ].freeze # :nodoc:
|
284
330
|
|
285
331
|
def inspect_attributes
|
data/lib/nokogiri/xml/node.rb
CHANGED
@@ -1,68 +1,57 @@
|
|
1
1
|
# encoding: UTF-8
|
2
2
|
# frozen_string_literal: true
|
3
3
|
require "stringio"
|
4
|
-
require "nokogiri/xml/node/save_options"
|
5
4
|
|
6
5
|
module Nokogiri
|
7
6
|
module XML
|
8
|
-
|
9
|
-
# Nokogiri::XML::Node is your window to the fun filled world of dealing
|
10
|
-
#
|
11
|
-
#
|
7
|
+
##
|
8
|
+
# {Nokogiri::XML::Node} is your window to the fun filled world of dealing with XML and HTML
|
9
|
+
# tags. A {Nokogiri::XML::Node} may be treated similarly to a hash with regard to attributes. For
|
10
|
+
# example:
|
12
11
|
#
|
13
|
-
#
|
14
|
-
# => <a href
|
15
|
-
#
|
16
|
-
# => "
|
17
|
-
#
|
18
|
-
# =>
|
19
|
-
#
|
20
|
-
# => ["#foo", "link"]
|
21
|
-
# irb(main):008:0> node['class'] = 'green'
|
22
|
-
# => "green"
|
23
|
-
# irb(main):009:0> node
|
24
|
-
# => <a href="#foo" id="link" class="green">link</a>
|
25
|
-
# irb(main):010:0>
|
12
|
+
# node = Nokogiri::XML::DocumentFragment.parse("<a href='#foo' id='link'>link</a>").at_css("a")
|
13
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\">link</a>"
|
14
|
+
# node['href'] # => "#foo"
|
15
|
+
# node.keys # => ["href", "id"]
|
16
|
+
# node.values # => ["#foo", "link"]
|
17
|
+
# node['class'] = 'green' # => "green"
|
18
|
+
# node.to_html # => "<a href=\"#foo\" id=\"link\" class=\"green\">link</a>"
|
26
19
|
#
|
27
|
-
# See
|
20
|
+
# See the method group entitled "Working With Node Attributes" for the full set of methods.
|
28
21
|
#
|
29
|
-
# Nokogiri::XML::Node also has methods that let you move around your
|
22
|
+
# {Nokogiri::XML::Node} also has methods that let you move around your
|
30
23
|
# tree. For navigating your tree, see:
|
31
24
|
#
|
32
|
-
# *
|
33
|
-
# *
|
34
|
-
# *
|
35
|
-
# *
|
36
|
-
#
|
25
|
+
# * {#parent}
|
26
|
+
# * {#children}
|
27
|
+
# * {#next}
|
28
|
+
# * {#previous}
|
37
29
|
#
|
38
30
|
# When printing or otherwise emitting a document or a node (and
|
39
31
|
# its subtree), there are a few methods you might want to use:
|
40
32
|
#
|
41
|
-
# * content, text, inner_text, to_str: emit plaintext
|
42
|
-
#
|
43
|
-
#
|
44
|
-
# document, meaning that entities will be replaced (e.g., "<"
|
45
|
-
# will be replaced with "<"), meaning that any sanitizing will
|
46
|
-
# likely be un-done in the output.
|
33
|
+
# * {#content}, {#text}, {#inner_text}, {#to_str}: These methods will all <b>emit plaintext</b>,
|
34
|
+
# meaning that entities will be replaced (e.g., "<" will be replaced with "<"), meaning
|
35
|
+
# that any sanitizing will likely be un-done in the output.
|
47
36
|
#
|
48
|
-
# * to_s, to_xml, to_html, inner_html:
|
37
|
+
# * {#to_s}, {#to_xml}, {#to_html}, {#inner_html}: These methods will all <b>emit
|
38
|
+
# properly-escaped markup</b>, meaning that it's suitable for consumption by browsers,
|
39
|
+
# parsers, etc.
|
49
40
|
#
|
50
|
-
#
|
51
|
-
# that it's suitable for consumption by browsers, parsers, etc.
|
41
|
+
# You may search this node's subtree using {#xpath} and {#css}
|
52
42
|
#
|
53
|
-
# You may search this node's subtree using Searchable#xpath and Searchable#css
|
54
43
|
class Node
|
55
44
|
include Nokogiri::XML::PP::Node
|
56
45
|
include Nokogiri::XML::Searchable
|
57
46
|
include Enumerable
|
58
47
|
|
59
|
-
# Element node type, see Nokogiri::XML::Node#element?
|
48
|
+
# Element node type, see {Nokogiri::XML::Node#element?}
|
60
49
|
ELEMENT_NODE = 1
|
61
50
|
# Attribute node type
|
62
51
|
ATTRIBUTE_NODE = 2
|
63
|
-
# Text node type, see Nokogiri::XML::Node#text?
|
52
|
+
# Text node type, see {Nokogiri::XML::Node#text?}
|
64
53
|
TEXT_NODE = 3
|
65
|
-
# CDATA node type, see Nokogiri::XML::Node#cdata?
|
54
|
+
# CDATA node type, see {Nokogiri::XML::Node#cdata?}
|
66
55
|
CDATA_SECTION_NODE = 4
|
67
56
|
# Entity reference node type
|
68
57
|
ENTITY_REF_NODE = 5
|
@@ -70,9 +59,9 @@ module Nokogiri
|
|
70
59
|
ENTITY_NODE = 6
|
71
60
|
# PI node type
|
72
61
|
PI_NODE = 7
|
73
|
-
# Comment node type, see Nokogiri::XML::Node#comment?
|
62
|
+
# Comment node type, see {Nokogiri::XML::Node#comment?}
|
74
63
|
COMMENT_NODE = 8
|
75
|
-
# Document node type, see Nokogiri::XML::Node#xml?
|
64
|
+
# Document node type, see {Nokogiri::XML::Node#xml?}
|
76
65
|
DOCUMENT_NODE = 9
|
77
66
|
# Document type node type
|
78
67
|
DOCUMENT_TYPE_NODE = 10
|
@@ -80,7 +69,7 @@ module Nokogiri
|
|
80
69
|
DOCUMENT_FRAG_NODE = 11
|
81
70
|
# Notation node type
|
82
71
|
NOTATION_NODE = 12
|
83
|
-
# HTML document node type, see Nokogiri::XML::Node#html?
|
72
|
+
# HTML document node type, see {Nokogiri::XML::Node#html?}
|
84
73
|
HTML_DOCUMENT_NODE = 13
|
85
74
|
# DTD node type
|
86
75
|
DTD_NODE = 14
|
@@ -99,8 +88,15 @@ module Nokogiri
|
|
99
88
|
# DOCB document node type
|
100
89
|
DOCB_DOCUMENT_NODE = 21
|
101
90
|
|
102
|
-
|
103
|
-
|
91
|
+
##
|
92
|
+
# Create a new node with +name+ sharing GC lifecycle with +document+.
|
93
|
+
# @param name [String]
|
94
|
+
# @param document [Nokogiri::XML::Document]
|
95
|
+
# @yieldparam node [Nokogiri::XML::Node]
|
96
|
+
# @return [Nokogiri::XML::Node]
|
97
|
+
# @see Nokogiri::XML::Node.new
|
98
|
+
def initialize(name, document)
|
99
|
+
# This is intentionally empty.
|
104
100
|
end
|
105
101
|
|
106
102
|
###
|
@@ -840,7 +836,7 @@ module Nokogiri
|
|
840
836
|
node_set = in_context(contents, options.to_i)
|
841
837
|
if (node_set.empty? && (document.errors.length > error_count))
|
842
838
|
if options.recover?
|
843
|
-
fragment = Nokogiri::
|
839
|
+
fragment = Nokogiri::HTML4::DocumentFragment.parse contents
|
844
840
|
node_set = fragment.children
|
845
841
|
else
|
846
842
|
raise document.errors[error_count]
|
@@ -886,7 +882,7 @@ module Nokogiri
|
|
886
882
|
type == DOCUMENT_NODE
|
887
883
|
end
|
888
884
|
|
889
|
-
# Returns true if this is an
|
885
|
+
# Returns true if this is an HTML4::Document node
|
890
886
|
def html?
|
891
887
|
type == HTML_DOCUMENT_NODE
|
892
888
|
end
|
@@ -912,11 +908,11 @@ module Nokogiri
|
|
912
908
|
end
|
913
909
|
|
914
910
|
###
|
915
|
-
# Fetch the Nokogiri::
|
911
|
+
# Fetch the Nokogiri::HTML4::ElementDescription for this node. Returns
|
916
912
|
# nil on XML documents and on unknown tags.
|
917
913
|
def description
|
918
914
|
return nil if document.xml?
|
919
|
-
Nokogiri::
|
915
|
+
Nokogiri::HTML4::ElementDescription[name]
|
920
916
|
end
|
921
917
|
|
922
918
|
###
|
@@ -1238,3 +1234,5 @@ Requires a Node, NodeSet or String argument, and cannot accept a #{data.class}.
|
|
1238
1234
|
end
|
1239
1235
|
end
|
1240
1236
|
end
|
1237
|
+
|
1238
|
+
require_relative "node/save_options"
|
@@ -71,6 +71,8 @@ module Nokogiri
|
|
71
71
|
|
72
72
|
# the default options used for parsing XML documents
|
73
73
|
DEFAULT_XML = RECOVER | NONET
|
74
|
+
# the default options used for parsing XSLT stylesheets
|
75
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
|
74
76
|
# the default options used for parsing HTML documents
|
75
77
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
76
78
|
# the default options used for parsing XML schemas
|
data/lib/nokogiri/xml/pp.rb
CHANGED
@@ -1,3 +1,3 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
2
|
+
require_relative "pp/node"
|
3
|
+
require_relative "pp/character_data"
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -86,7 +86,8 @@ module Nokogiri
|
|
86
86
|
private :initialize
|
87
87
|
|
88
88
|
###
|
89
|
-
# Get
|
89
|
+
# Get the attributes of the current node as a Hash
|
90
|
+
# @return [Hash<String, String>] Attribute names and values
|
90
91
|
def attributes
|
91
92
|
attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
|
92
93
|
hash[node.name] = node.to_s
|
@@ -96,14 +97,6 @@ module Nokogiri
|
|
96
97
|
attrs_hash
|
97
98
|
end
|
98
99
|
|
99
|
-
###
|
100
|
-
# Get a list of attributes for the current node
|
101
|
-
def attribute_nodes
|
102
|
-
nodes = attr_nodes
|
103
|
-
nodes.each { |v| v.instance_variable_set(:@_r, self) }
|
104
|
-
nodes
|
105
|
-
end
|
106
|
-
|
107
100
|
###
|
108
101
|
# Move the cursor through the document yielding the cursor to the block
|
109
102
|
def each
|
data/lib/nokogiri/xml/sax.rb
CHANGED
@@ -1,5 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
require_relative "sax/document"
|
3
|
+
require_relative "sax/parser_context"
|
4
|
+
require_relative "sax/parser"
|
5
|
+
require_relative "sax/push_parser"
|
@@ -2,20 +2,19 @@
|
|
2
2
|
module Nokogiri
|
3
3
|
module XML
|
4
4
|
###
|
5
|
-
# SAX Parsers are event driven parsers.
|
6
|
-
#
|
7
|
-
#
|
5
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
6
|
+
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
7
|
+
# Nokogiri::HTML4::SAX.
|
8
8
|
#
|
9
|
-
# The basic way a SAX style parser works is by creating a parser,
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# it encounters events you said you would like to know about.
|
9
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
10
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
11
|
+
# you when it encounters events you said you would like to know about.
|
13
12
|
#
|
14
|
-
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
15
|
-
#
|
13
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
14
|
+
# methods for which you would like notification.
|
16
15
|
#
|
17
|
-
# For example, if I want to be notified when a document ends, and when an
|
18
|
-
#
|
16
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
17
|
+
# would write a class like this:
|
19
18
|
#
|
20
19
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
21
20
|
# def end_document
|
@@ -27,8 +26,7 @@ module Nokogiri
|
|
27
26
|
# end
|
28
27
|
# end
|
29
28
|
#
|
30
|
-
# Then I would instantiate a SAX parser with this document, and feed the
|
31
|
-
# parser some XML
|
29
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
32
30
|
#
|
33
31
|
# # Create a new parser
|
34
32
|
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
@@ -36,25 +34,21 @@ module Nokogiri
|
|
36
34
|
# # Feed the parser some XML
|
37
35
|
# parser.parse(File.open(ARGV[0]))
|
38
36
|
#
|
39
|
-
# Now my document handler will be called when each node starts, and when
|
40
|
-
#
|
41
|
-
# a look at Nokogiri::XML::SAX::Document.
|
37
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
38
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
42
39
|
#
|
43
|
-
# Two SAX parsers for XML are available, a parser that reads from a string
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
40
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
41
|
+
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
42
|
+
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
47
43
|
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
48
44
|
module SAX
|
49
45
|
###
|
50
|
-
# This class is used for registering types of events you are interested
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# you are interested in knowing about.
|
46
|
+
# This class is used for registering types of events you are interested in handling. All of
|
47
|
+
# the methods on this class are available as possible events while parsing an XML document. To
|
48
|
+
# register for any particular event, just subclass this class and implement the methods you
|
49
|
+
# are interested in knowing about.
|
55
50
|
#
|
56
|
-
# To only be notified about start and end element events, write a class
|
57
|
-
# like this:
|
51
|
+
# To only be notified about start and end element events, write a class like this:
|
58
52
|
#
|
59
53
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
60
54
|
# def start_element name, attrs = []
|
@@ -66,8 +60,8 @@ module Nokogiri
|
|
66
60
|
# end
|
67
61
|
# end
|
68
62
|
#
|
69
|
-
# You can use this event handler for any SAX style parser included with
|
70
|
-
# Nokogiri
|
63
|
+
# You can use this event handler for any SAX style parser included with Nokogiri. See
|
64
|
+
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
|
71
65
|
class Document
|
72
66
|
###
|
73
67
|
# Called when an XML declaration is parsed
|
@@ -129,7 +123,7 @@ module Nokogiri
|
|
129
123
|
end
|
130
124
|
|
131
125
|
###
|
132
|
-
# Characters read between a tag.
|
126
|
+
# Characters read between a tag. This method might be called multiple
|
133
127
|
# times given one contiguous string of characters.
|
134
128
|
#
|
135
129
|
# +string+ contains the character data
|