nokogiri 1.10.3 → 1.12.5
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +3 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +176 -96
- data/dependencies.yml +28 -26
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +716 -414
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +120 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +191 -89
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +15 -15
- data/ext/nokogiri/xml_attribute_decl.c +18 -18
- data/ext/nokogiri/xml_cdata.c +13 -18
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +267 -195
- data/ext/nokogiri/xml_document_fragment.c +13 -15
- data/ext/nokogiri/xml_dtd.c +54 -48
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +22 -22
- data/ext/nokogiri/xml_encoding_handler.c +28 -17
- data/ext/nokogiri/xml_entity_decl.c +32 -30
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +60 -51
- data/ext/nokogiri/xml_node.c +493 -407
- data/ext/nokogiri/xml_node_set.c +174 -162
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +197 -172
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +105 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +112 -33
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +13 -17
- data/ext/nokogiri/xml_xpath_context.c +158 -73
- data/ext/nokogiri/xslt_stylesheet.c +158 -164
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4886 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/css/node.rb +1 -0
- data/lib/nokogiri/css/parser.rb +64 -63
- data/lib/nokogiri/css/parser.y +3 -3
- data/lib/nokogiri/css/parser_extras.rb +39 -36
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +105 -103
- data/lib/nokogiri/css/xpath_visitor.rb +73 -43
- data/lib/nokogiri/css.rb +15 -14
- data/lib/nokogiri/decorators/slop.rb +1 -0
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +14 -0
- data/lib/nokogiri/html.rb +32 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +17 -30
- data/lib/nokogiri/{html → html4}/document_fragment.rb +18 -17
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/{html → html4}/element_description_defaults.rb +2 -1
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +2 -1
- data/lib/nokogiri/{html → html4}/sax/parser.rb +12 -14
- data/lib/nokogiri/html4/sax/parser_context.rb +19 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +6 -5
- data/lib/nokogiri/html4.rb +40 -0
- data/lib/nokogiri/html5/document.rb +74 -0
- data/lib/nokogiri/html5/document_fragment.rb +80 -0
- data/lib/nokogiri/html5/node.rb +93 -0
- data/lib/nokogiri/html5.rb +473 -0
- data/lib/nokogiri/jruby/dependencies.rb +20 -0
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +5 -0
- data/lib/nokogiri/version/info.rb +215 -0
- data/lib/nokogiri/version.rb +3 -109
- data/lib/nokogiri/xml/attr.rb +1 -0
- data/lib/nokogiri/xml/attribute_decl.rb +1 -0
- data/lib/nokogiri/xml/builder.rb +74 -32
- data/lib/nokogiri/xml/cdata.rb +1 -0
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +138 -41
- data/lib/nokogiri/xml/document_fragment.rb +5 -6
- data/lib/nokogiri/xml/dtd.rb +1 -0
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_decl.rb +1 -0
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +1 -0
- data/lib/nokogiri/xml/node/save_options.rb +2 -1
- data/lib/nokogiri/xml/node.rb +629 -293
- data/lib/nokogiri/xml/node_set.rb +1 -0
- data/lib/nokogiri/xml/notation.rb +1 -0
- data/lib/nokogiri/xml/parse_options.rb +12 -3
- data/lib/nokogiri/xml/pp/character_data.rb +1 -0
- data/lib/nokogiri/xml/pp/node.rb +1 -0
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +1 -0
- data/lib/nokogiri/xml/reader.rb +9 -12
- data/lib/nokogiri/xml/relax_ng.rb +7 -2
- data/lib/nokogiri/xml/sax/document.rb +25 -30
- data/lib/nokogiri/xml/sax/parser.rb +1 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +1 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +1 -0
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +13 -4
- data/lib/nokogiri/xml/searchable.rb +25 -16
- data/lib/nokogiri/xml/syntax_error.rb +1 -0
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +4 -5
- data/lib/nokogiri/xml/xpath_context.rb +1 -0
- data/lib/nokogiri/xml.rb +36 -36
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +17 -16
- data/lib/nokogiri.rb +32 -51
- data/lib/xsd/xmlparser/nokogiri.rb +1 -0
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0004-use-glibc-strlen.patch +53 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxml2/0007-Fix-XPath-recursion-limit.patch +31 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2511 -0
- data/patches/libxslt/0002-Fix-xml2-config-check-in-configure-script.patch +19 -0
- data/ports/archives/libxml2-2.9.12.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- metadata +151 -153
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
###
|
@@ -5,11 +6,11 @@ module Nokogiri
|
|
5
6
|
#
|
6
7
|
# == Building combinations of parse options
|
7
8
|
# You can build your own combinations of these parse options by using any of the following methods:
|
8
|
-
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
|
9
|
+
# *Note*: All examples attempt to set the +RECOVER+ & +NOENT+ options.
|
9
10
|
# [Ruby's bitwise operators] You can use the Ruby bitwise operators to set various combinations.
|
10
|
-
# Nokogiri.XML('<content>Chapter 1</content',
|
11
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new((1 << 0) | (1 << 1)))
|
11
12
|
# [Method chaining] Every option has an equivalent method in lowercase. You can chain these methods together to set various combinations.
|
12
|
-
# Nokogiri.XML('<content>Chapter 1</content',
|
13
|
+
# Nokogiri.XML('<content>Chapter 1</content', nil, nil, Nokogiri::XML::ParseOptions.new.recover.noent)
|
13
14
|
# [Using Ruby Blocks] You can also setup parse combinations in the block passed to Nokogiri.XML or Nokogiri.HTML
|
14
15
|
# Nokogiri.XML('<content>Chapter 1</content') {|config| config.recover.noent}
|
15
16
|
#
|
@@ -70,8 +71,12 @@ module Nokogiri
|
|
70
71
|
|
71
72
|
# the default options used for parsing XML documents
|
72
73
|
DEFAULT_XML = RECOVER | NONET
|
74
|
+
# the default options used for parsing XSLT stylesheets
|
75
|
+
DEFAULT_XSLT = RECOVER | NONET | NOENT | DTDLOAD | DTDATTR | NOCDATA
|
73
76
|
# the default options used for parsing HTML documents
|
74
77
|
DEFAULT_HTML = RECOVER | NOERROR | NOWARNING | NONET
|
78
|
+
# the default options used for parsing XML schemas
|
79
|
+
DEFAULT_SCHEMA = NONET
|
75
80
|
|
76
81
|
attr_accessor :options
|
77
82
|
def initialize options = STRICT
|
@@ -106,6 +111,10 @@ module Nokogiri
|
|
106
111
|
@options & RECOVER == STRICT
|
107
112
|
end
|
108
113
|
|
114
|
+
def ==(other)
|
115
|
+
other.to_i == to_i
|
116
|
+
end
|
117
|
+
|
109
118
|
alias :to_i :options
|
110
119
|
|
111
120
|
def inspect
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
data/lib/nokogiri/xml/pp.rb
CHANGED
@@ -1,2 +1,3 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative "pp/node"
|
3
|
+
require_relative "pp/character_data"
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
###
|
@@ -85,19 +86,15 @@ module Nokogiri
|
|
85
86
|
private :initialize
|
86
87
|
|
87
88
|
###
|
88
|
-
# Get
|
89
|
+
# Get the attributes of the current node as a Hash
|
90
|
+
# @return [Hash<String, String>] Attribute names and values
|
89
91
|
def attributes
|
90
|
-
|
91
|
-
|
92
|
-
|
93
|
-
|
94
|
-
|
95
|
-
|
96
|
-
# Get a list of attributes for the current node
|
97
|
-
def attribute_nodes
|
98
|
-
nodes = attr_nodes
|
99
|
-
nodes.each { |v| v.instance_variable_set(:@_r, self) }
|
100
|
-
nodes
|
92
|
+
attrs_hash = attribute_nodes.each_with_object({}) do |node, hash|
|
93
|
+
hash[node.name] = node.to_s
|
94
|
+
end
|
95
|
+
ns = namespaces
|
96
|
+
attrs_hash.merge!(ns) if ns
|
97
|
+
attrs_hash
|
101
98
|
end
|
102
99
|
|
103
100
|
###
|
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
class << self
|
4
5
|
###
|
5
6
|
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
6
7
|
# See Nokogiri::XML::RelaxNG for an example.
|
7
|
-
def RelaxNG
|
8
|
-
RelaxNG.new(string_or_io)
|
8
|
+
def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
9
|
+
RelaxNG.new(string_or_io, options)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
|
@@ -26,6 +27,10 @@ module Nokogiri
|
|
26
27
|
# end
|
27
28
|
#
|
28
29
|
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
30
|
+
#
|
31
|
+
# NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
|
32
|
+
# underlying parsing libraries to access network resources. This is counter to Nokogiri's
|
33
|
+
# "untrusted by default" security policy, but is a limitation of the underlying libraries.
|
29
34
|
class RelaxNG < Nokogiri::XML::Schema
|
30
35
|
end
|
31
36
|
end
|
@@ -1,20 +1,20 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
###
|
4
|
-
# SAX Parsers are event driven parsers.
|
5
|
-
#
|
6
|
-
#
|
5
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
6
|
+
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
7
|
+
# Nokogiri::HTML4::SAX.
|
7
8
|
#
|
8
|
-
# The basic way a SAX style parser works is by creating a parser,
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# it encounters events you said you would like to know about.
|
9
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
10
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
11
|
+
# you when it encounters events you said you would like to know about.
|
12
12
|
#
|
13
|
-
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
14
|
-
#
|
13
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
14
|
+
# methods for which you would like notification.
|
15
15
|
#
|
16
|
-
# For example, if I want to be notified when a document ends, and when an
|
17
|
-
#
|
16
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
17
|
+
# would write a class like this:
|
18
18
|
#
|
19
19
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
20
20
|
# def end_document
|
@@ -26,8 +26,7 @@ module Nokogiri
|
|
26
26
|
# end
|
27
27
|
# end
|
28
28
|
#
|
29
|
-
# Then I would instantiate a SAX parser with this document, and feed the
|
30
|
-
# parser some XML
|
29
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
31
30
|
#
|
32
31
|
# # Create a new parser
|
33
32
|
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
@@ -35,25 +34,21 @@ module Nokogiri
|
|
35
34
|
# # Feed the parser some XML
|
36
35
|
# parser.parse(File.open(ARGV[0]))
|
37
36
|
#
|
38
|
-
# Now my document handler will be called when each node starts, and when
|
39
|
-
#
|
40
|
-
# a look at Nokogiri::XML::SAX::Document.
|
37
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
38
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
41
39
|
#
|
42
|
-
# Two SAX parsers for XML are available, a parser that reads from a string
|
43
|
-
#
|
44
|
-
#
|
45
|
-
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
40
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
41
|
+
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
42
|
+
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
46
43
|
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
47
44
|
module SAX
|
48
45
|
###
|
49
|
-
# This class is used for registering types of events you are interested
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
# you are interested in knowing about.
|
46
|
+
# This class is used for registering types of events you are interested in handling. All of
|
47
|
+
# the methods on this class are available as possible events while parsing an XML document. To
|
48
|
+
# register for any particular event, just subclass this class and implement the methods you
|
49
|
+
# are interested in knowing about.
|
54
50
|
#
|
55
|
-
# To only be notified about start and end element events, write a class
|
56
|
-
# like this:
|
51
|
+
# To only be notified about start and end element events, write a class like this:
|
57
52
|
#
|
58
53
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
59
54
|
# def start_element name, attrs = []
|
@@ -65,8 +60,8 @@ module Nokogiri
|
|
65
60
|
# end
|
66
61
|
# end
|
67
62
|
#
|
68
|
-
# You can use this event handler for any SAX style parser included with
|
69
|
-
# Nokogiri
|
63
|
+
# You can use this event handler for any SAX style parser included with Nokogiri. See
|
64
|
+
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
|
70
65
|
class Document
|
71
66
|
###
|
72
67
|
# Called when an XML declaration is parsed
|
@@ -128,7 +123,7 @@ module Nokogiri
|
|
128
123
|
end
|
129
124
|
|
130
125
|
###
|
131
|
-
# Characters read between a tag.
|
126
|
+
# Characters read between a tag. This method might be called multiple
|
132
127
|
# times given one contiguous string of characters.
|
133
128
|
#
|
134
129
|
# +string+ contains the character data
|
data/lib/nokogiri/xml/sax.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
require_relative "sax/document"
|
3
|
+
require_relative "sax/parser_context"
|
4
|
+
require_relative "sax/parser"
|
5
|
+
require_relative "sax/push_parser"
|
data/lib/nokogiri/xml/schema.rb
CHANGED
@@ -1,11 +1,12 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
class << self
|
4
5
|
###
|
5
6
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
6
7
|
# object.
|
7
|
-
def Schema
|
8
|
-
Schema.new(string_or_io)
|
8
|
+
def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
9
|
+
Schema.new(string_or_io, options)
|
9
10
|
end
|
10
11
|
end
|
11
12
|
|
@@ -26,15 +27,23 @@ module Nokogiri
|
|
26
27
|
# end
|
27
28
|
#
|
28
29
|
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
30
|
+
#
|
31
|
+
# NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
|
32
|
+
# are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
|
33
|
+
# documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
|
34
|
+
# security policy. If a document is trusted, then the caller may turn off the NONET option via
|
35
|
+
# the ParseOptions to re-enable external entity resolution over a network connection.
|
29
36
|
class Schema
|
30
37
|
# Errors while parsing the schema file
|
31
38
|
attr_accessor :errors
|
39
|
+
# The Nokogiri::XML::ParseOptions used to parse the schema
|
40
|
+
attr_accessor :parse_options
|
32
41
|
|
33
42
|
###
|
34
43
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
35
44
|
# object.
|
36
|
-
def self.new string_or_io
|
37
|
-
from_document
|
45
|
+
def self.new string_or_io, options = ParseOptions::DEFAULT_SCHEMA
|
46
|
+
from_document(Nokogiri::XML(string_or_io), options)
|
38
47
|
end
|
39
48
|
|
40
49
|
###
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XML
|
3
4
|
#
|
@@ -11,7 +12,9 @@ module Nokogiri
|
|
11
12
|
# Regular expression used by Searchable#search to determine if a query
|
12
13
|
# string is CSS or XPath
|
13
14
|
LOOKS_LIKE_XPATH = /^(\.\/|\/|\.\.|\.$)/
|
14
|
-
|
15
|
+
|
16
|
+
# @!group Searching via XPath or CSS Queries
|
17
|
+
|
15
18
|
###
|
16
19
|
# call-seq: search *paths, [namespace-bindings, xpath-variable-bindings, custom-handler-class]
|
17
20
|
#
|
@@ -45,7 +48,7 @@ module Nokogiri
|
|
45
48
|
# )
|
46
49
|
#
|
47
50
|
# See Searchable#xpath and Searchable#css for further usage help.
|
48
|
-
def search
|
51
|
+
def search(*args)
|
49
52
|
paths, handler, ns, binds = extract_params(args)
|
50
53
|
|
51
54
|
xpaths = paths.map(&:to_s).map do |path|
|
@@ -54,6 +57,7 @@ module Nokogiri
|
|
54
57
|
|
55
58
|
xpath(*(xpaths + [ns, handler, binds].compact))
|
56
59
|
end
|
60
|
+
|
57
61
|
alias :/ :search
|
58
62
|
|
59
63
|
###
|
@@ -63,9 +67,10 @@ module Nokogiri
|
|
63
67
|
# result. +paths+ must be one or more XPath or CSS queries.
|
64
68
|
#
|
65
69
|
# See Searchable#search for more information.
|
66
|
-
def at
|
70
|
+
def at(*args)
|
67
71
|
search(*args).first
|
68
72
|
end
|
73
|
+
|
69
74
|
alias :% :at
|
70
75
|
|
71
76
|
###
|
@@ -101,7 +106,7 @@ module Nokogiri
|
|
101
106
|
# found in an XML document, where tags names are case-sensitive
|
102
107
|
# (e.g., "H1" is distinct from "h1").
|
103
108
|
#
|
104
|
-
def css
|
109
|
+
def css(*args)
|
105
110
|
rules, handler, ns, _ = extract_params(args)
|
106
111
|
|
107
112
|
css_internal self, rules, handler, ns
|
@@ -114,7 +119,7 @@ module Nokogiri
|
|
114
119
|
# match. +rules+ must be one or more CSS selectors.
|
115
120
|
#
|
116
121
|
# See Searchable#css for more information.
|
117
|
-
def at_css
|
122
|
+
def at_css(*args)
|
118
123
|
css(*args).first
|
119
124
|
end
|
120
125
|
|
@@ -148,7 +153,7 @@ module Nokogiri
|
|
148
153
|
# end
|
149
154
|
# }.new)
|
150
155
|
#
|
151
|
-
def xpath
|
156
|
+
def xpath(*args)
|
152
157
|
paths, handler, ns, binds = extract_params(args)
|
153
158
|
|
154
159
|
xpath_internal self, paths, handler, ns, binds
|
@@ -161,17 +166,19 @@ module Nokogiri
|
|
161
166
|
# match. +paths+ must be one or more XPath queries.
|
162
167
|
#
|
163
168
|
# See Searchable#xpath for more information.
|
164
|
-
def at_xpath
|
169
|
+
def at_xpath(*args)
|
165
170
|
xpath(*args).first
|
166
171
|
end
|
167
172
|
|
173
|
+
# @!endgroup
|
174
|
+
|
168
175
|
private
|
169
176
|
|
170
|
-
def css_internal
|
177
|
+
def css_internal(node, rules, handler, ns)
|
171
178
|
xpath_internal node, css_rules_to_xpath(rules, ns), handler, ns, nil
|
172
179
|
end
|
173
180
|
|
174
|
-
def xpath_internal
|
181
|
+
def xpath_internal(node, paths, handler, ns, binds)
|
175
182
|
document = node.document
|
176
183
|
return NodeSet.new(document) unless document
|
177
184
|
|
@@ -186,12 +193,12 @@ module Nokogiri
|
|
186
193
|
end
|
187
194
|
end
|
188
195
|
|
189
|
-
def xpath_impl
|
196
|
+
def xpath_impl(node, path, handler, ns, binds)
|
190
197
|
ctx = XPathContext.new(node)
|
191
198
|
ctx.register_namespaces(ns)
|
192
|
-
path = path.gsub(/xmlns:/,
|
199
|
+
path = path.gsub(/xmlns:/, " :") unless Nokogiri.uses_libxml?
|
193
200
|
|
194
|
-
binds.each do |key,value|
|
201
|
+
binds.each do |key, value|
|
195
202
|
ctx.register_variable key.to_s, value
|
196
203
|
end if binds
|
197
204
|
|
@@ -202,13 +209,15 @@ module Nokogiri
|
|
202
209
|
rules.map { |rule| xpath_query_from_css_rule(rule, ns) }
|
203
210
|
end
|
204
211
|
|
205
|
-
def xpath_query_from_css_rule
|
212
|
+
def xpath_query_from_css_rule(rule, ns)
|
213
|
+
visitor = Nokogiri::CSS::XPathVisitorOptimallyUseBuiltins.new
|
206
214
|
self.class::IMPLIED_XPATH_CONTEXTS.map do |implied_xpath_context|
|
207
|
-
CSS.xpath_for(rule.to_s, :prefix => implied_xpath_context, :ns => ns
|
208
|
-
|
215
|
+
CSS.xpath_for(rule.to_s, {:prefix => implied_xpath_context, :ns => ns,
|
216
|
+
:visitor => visitor})
|
217
|
+
end.join(" | ")
|
209
218
|
end
|
210
219
|
|
211
|
-
def extract_params
|
220
|
+
def extract_params(params) # :nodoc:
|
212
221
|
handler = params.find do |param|
|
213
222
|
![Hash, String, Symbol].include?(param.class)
|
214
223
|
end
|
data/lib/nokogiri/xml/text.rb
CHANGED
data/lib/nokogiri/xml/xpath.rb
CHANGED
@@ -1,10 +1,9 @@
|
|
1
|
-
|
2
|
-
|
1
|
+
# frozen_string_literal: true
|
3
2
|
module Nokogiri
|
4
3
|
module XML
|
5
|
-
|
6
|
-
# The Nokogiri::XML::Document tied to this XPath instance
|
7
|
-
attr_accessor :document
|
4
|
+
module XPath
|
8
5
|
end
|
9
6
|
end
|
10
7
|
end
|
8
|
+
|
9
|
+
require_relative "xpath/syntax_error"
|
data/lib/nokogiri/xml.rb
CHANGED
@@ -1,37 +1,9 @@
|
|
1
|
-
|
2
|
-
require 'nokogiri/xml/parse_options'
|
3
|
-
require 'nokogiri/xml/sax'
|
4
|
-
require 'nokogiri/xml/searchable'
|
5
|
-
require 'nokogiri/xml/node'
|
6
|
-
require 'nokogiri/xml/attribute_decl'
|
7
|
-
require 'nokogiri/xml/element_decl'
|
8
|
-
require 'nokogiri/xml/element_content'
|
9
|
-
require 'nokogiri/xml/character_data'
|
10
|
-
require 'nokogiri/xml/namespace'
|
11
|
-
require 'nokogiri/xml/attr'
|
12
|
-
require 'nokogiri/xml/dtd'
|
13
|
-
require 'nokogiri/xml/cdata'
|
14
|
-
require 'nokogiri/xml/text'
|
15
|
-
require 'nokogiri/xml/document'
|
16
|
-
require 'nokogiri/xml/document_fragment'
|
17
|
-
require 'nokogiri/xml/processing_instruction'
|
18
|
-
require 'nokogiri/xml/node_set'
|
19
|
-
require 'nokogiri/xml/syntax_error'
|
20
|
-
require 'nokogiri/xml/xpath'
|
21
|
-
require 'nokogiri/xml/xpath_context'
|
22
|
-
require 'nokogiri/xml/builder'
|
23
|
-
require 'nokogiri/xml/reader'
|
24
|
-
require 'nokogiri/xml/notation'
|
25
|
-
require 'nokogiri/xml/entity_decl'
|
26
|
-
require 'nokogiri/xml/entity_reference'
|
27
|
-
require 'nokogiri/xml/schema'
|
28
|
-
require 'nokogiri/xml/relax_ng'
|
29
|
-
|
1
|
+
# frozen_string_literal: true
|
30
2
|
module Nokogiri
|
31
3
|
class << self
|
32
4
|
###
|
33
5
|
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
34
|
-
def XML
|
6
|
+
def XML(thing, url = nil, encoding = nil, options = XML::ParseOptions::DEFAULT_XML, &block)
|
35
7
|
Nokogiri::XML::Document.parse(thing, url, encoding, options, &block)
|
36
8
|
end
|
37
9
|
end
|
@@ -40,20 +12,19 @@ module Nokogiri
|
|
40
12
|
# Original C14N 1.0 spec canonicalization
|
41
13
|
XML_C14N_1_0 = 0
|
42
14
|
# Exclusive C14N 1.0 spec canonicalization
|
43
|
-
XML_C14N_EXCLUSIVE_1_0 =
|
15
|
+
XML_C14N_EXCLUSIVE_1_0 = 1
|
44
16
|
# C14N 1.1 spec canonicalization
|
45
17
|
XML_C14N_1_1 = 2
|
46
18
|
class << self
|
47
19
|
###
|
48
20
|
# Parse an XML document using the Nokogiri::XML::Reader API. See
|
49
21
|
# Nokogiri::XML::Reader for mor information
|
50
|
-
def Reader
|
51
|
-
|
22
|
+
def Reader(string_or_io, url = nil, encoding = nil, options = ParseOptions::STRICT)
|
52
23
|
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
53
24
|
# Give the options to the user
|
54
25
|
yield options if block_given?
|
55
26
|
|
56
|
-
if string_or_io.respond_to?
|
27
|
+
if string_or_io.respond_to?(:read)
|
57
28
|
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
58
29
|
end
|
59
30
|
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
@@ -61,15 +32,44 @@ module Nokogiri
|
|
61
32
|
|
62
33
|
###
|
63
34
|
# Parse XML. Convenience method for Nokogiri::XML::Document.parse
|
64
|
-
def parse
|
35
|
+
def parse(thing, url = nil, encoding = nil, options = ParseOptions::DEFAULT_XML, &block)
|
65
36
|
Document.parse(thing, url, encoding, options, &block)
|
66
37
|
end
|
67
38
|
|
68
39
|
####
|
69
40
|
# Parse a fragment from +string+ in to a NodeSet.
|
70
|
-
def fragment
|
41
|
+
def fragment(string)
|
71
42
|
XML::DocumentFragment.parse(string)
|
72
43
|
end
|
73
44
|
end
|
74
45
|
end
|
75
46
|
end
|
47
|
+
|
48
|
+
require_relative "xml/pp"
|
49
|
+
require_relative "xml/parse_options"
|
50
|
+
require_relative "xml/sax"
|
51
|
+
require_relative "xml/searchable"
|
52
|
+
require_relative "xml/node"
|
53
|
+
require_relative "xml/attribute_decl"
|
54
|
+
require_relative "xml/element_decl"
|
55
|
+
require_relative "xml/element_content"
|
56
|
+
require_relative "xml/character_data"
|
57
|
+
require_relative "xml/namespace"
|
58
|
+
require_relative "xml/attr"
|
59
|
+
require_relative "xml/dtd"
|
60
|
+
require_relative "xml/cdata"
|
61
|
+
require_relative "xml/text"
|
62
|
+
require_relative "xml/document"
|
63
|
+
require_relative "xml/document_fragment"
|
64
|
+
require_relative "xml/processing_instruction"
|
65
|
+
require_relative "xml/node_set"
|
66
|
+
require_relative "xml/syntax_error"
|
67
|
+
require_relative "xml/xpath"
|
68
|
+
require_relative "xml/xpath_context"
|
69
|
+
require_relative "xml/builder"
|
70
|
+
require_relative "xml/reader"
|
71
|
+
require_relative "xml/notation"
|
72
|
+
require_relative "xml/entity_decl"
|
73
|
+
require_relative "xml/entity_reference"
|
74
|
+
require_relative "xml/schema"
|
75
|
+
require_relative "xml/relax_ng"
|
@@ -1,3 +1,4 @@
|
|
1
|
+
# frozen_string_literal: true
|
1
2
|
module Nokogiri
|
2
3
|
module XSLT
|
3
4
|
###
|
@@ -17,7 +18,7 @@ module Nokogiri
|
|
17
18
|
# Apply an XSLT stylesheet to an XML::Document.
|
18
19
|
# +params+ is an array of strings used as XSLT parameters.
|
19
20
|
# returns serialized document
|
20
|
-
def apply_to
|
21
|
+
def apply_to(document, params = [])
|
21
22
|
serialize(transform(document, params))
|
22
23
|
end
|
23
24
|
end
|