nokogiri 1.11.3 → 1.13.8
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +2 -0
- data/LICENSE-DEPENDENCIES.md +243 -22
- data/LICENSE.md +1 -1
- data/README.md +14 -11
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -64
- data/ext/nokogiri/depend +35 -34
- data/ext/nokogiri/extconf.rb +237 -133
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/{html_document.c → html4_document.c} +8 -8
- data/ext/nokogiri/{html_element_description.c → html4_element_description.c} +21 -19
- data/ext/nokogiri/{html_entity_lookup.c → html4_entity_lookup.c} +7 -7
- data/ext/nokogiri/{html_sax_parser_context.c → html4_sax_parser_context.c} +8 -8
- data/ext/nokogiri/{html_sax_push_parser.c → html4_sax_push_parser.c} +4 -4
- data/ext/nokogiri/libxml2_backwards_compat.c +30 -30
- data/ext/nokogiri/nokogiri.c +70 -38
- data/ext/nokogiri/nokogiri.h +27 -9
- data/ext/nokogiri/xml_attr.c +2 -2
- data/ext/nokogiri/xml_attribute_decl.c +3 -3
- data/ext/nokogiri/xml_cdata.c +1 -1
- data/ext/nokogiri/xml_document.c +50 -50
- data/ext/nokogiri/xml_document_fragment.c +0 -2
- data/ext/nokogiri/xml_dtd.c +10 -10
- data/ext/nokogiri/xml_element_content.c +2 -0
- data/ext/nokogiri/xml_element_decl.c +3 -3
- data/ext/nokogiri/xml_encoding_handler.c +31 -12
- data/ext/nokogiri/xml_entity_decl.c +5 -5
- data/ext/nokogiri/xml_namespace.c +4 -2
- data/ext/nokogiri/xml_node.c +833 -492
- data/ext/nokogiri/xml_node_set.c +24 -24
- data/ext/nokogiri/xml_reader.c +90 -11
- data/ext/nokogiri/xml_sax_parser.c +6 -6
- data/ext/nokogiri/xml_sax_parser_context.c +12 -3
- data/ext/nokogiri/xml_schema.c +5 -3
- data/ext/nokogiri/xml_text.c +1 -1
- data/ext/nokogiri/xml_xpath_context.c +110 -85
- data/ext/nokogiri/xslt_stylesheet.c +109 -10
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +9 -8
- data/lib/nokogiri/css/parser.rb +361 -342
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +22 -20
- data/lib/nokogiri/css/syntax_error.rb +2 -1
- data/lib/nokogiri/css/tokenizer.rb +4 -3
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +179 -82
- data/lib/nokogiri/css.rb +49 -17
- data/lib/nokogiri/decorators/slop.rb +8 -7
- data/lib/nokogiri/extension.rb +8 -3
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +37 -27
- data/lib/nokogiri/{html → html4}/builder.rb +3 -2
- data/lib/nokogiri/{html → html4}/document.rb +92 -81
- data/lib/nokogiri/{html → html4}/document_fragment.rb +13 -9
- data/lib/nokogiri/{html → html4}/element_description.rb +2 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +3 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +16 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +11 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +10 -9
- data/lib/nokogiri/syntax_error.rb +1 -0
- data/lib/nokogiri/version/constant.rb +2 -1
- data/lib/nokogiri/version/info.rb +31 -14
- data/lib/nokogiri/version.rb +1 -0
- data/lib/nokogiri/xml/attr.rb +5 -3
- data/lib/nokogiri/xml/attribute_decl.rb +2 -1
- data/lib/nokogiri/xml/builder.rb +71 -31
- data/lib/nokogiri/xml/cdata.rb +2 -1
- data/lib/nokogiri/xml/character_data.rb +1 -0
- data/lib/nokogiri/xml/document.rb +183 -96
- data/lib/nokogiri/xml/document_fragment.rb +41 -38
- data/lib/nokogiri/xml/dtd.rb +3 -2
- data/lib/nokogiri/xml/element_content.rb +1 -0
- data/lib/nokogiri/xml/element_decl.rb +2 -1
- data/lib/nokogiri/xml/entity_decl.rb +3 -2
- data/lib/nokogiri/xml/entity_reference.rb +1 -0
- data/lib/nokogiri/xml/namespace.rb +2 -0
- data/lib/nokogiri/xml/node/save_options.rb +9 -5
- data/lib/nokogiri/xml/node.rb +525 -354
- data/lib/nokogiri/xml/node_set.rb +50 -54
- data/lib/nokogiri/xml/notation.rb +12 -0
- data/lib/nokogiri/xml/parse_options.rb +13 -6
- data/lib/nokogiri/xml/pp/character_data.rb +8 -6
- data/lib/nokogiri/xml/pp/node.rb +24 -26
- data/lib/nokogiri/xml/pp.rb +3 -2
- data/lib/nokogiri/xml/processing_instruction.rb +2 -1
- data/lib/nokogiri/xml/reader.rb +20 -24
- data/lib/nokogiri/xml/relax_ng.rb +1 -0
- data/lib/nokogiri/xml/sax/document.rb +44 -49
- data/lib/nokogiri/xml/sax/parser.rb +37 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +7 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +5 -5
- data/lib/nokogiri/xml/sax.rb +5 -4
- data/lib/nokogiri/xml/schema.rb +7 -6
- data/lib/nokogiri/xml/searchable.rb +93 -62
- data/lib/nokogiri/xml/syntax_error.rb +5 -4
- data/lib/nokogiri/xml/text.rb +1 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +2 -1
- data/lib/nokogiri/xml/xpath.rb +13 -1
- data/lib/nokogiri/xml/xpath_context.rb +2 -3
- data/lib/nokogiri/xml.rb +37 -37
- data/lib/nokogiri/xslt/stylesheet.rb +2 -1
- data/lib/nokogiri/xslt.rb +28 -20
- data/lib/nokogiri.rb +48 -43
- data/lib/xsd/xmlparser/nokogiri.rb +25 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/{0004-libxml2.la-is-in-top_builddir.patch → 0003-libxml2.la-is-in-top_builddir.patch} +1 -1
- data/patches/libxml2/{0008-use-glibc-strlen.patch → 0004-use-glibc-strlen.patch} +3 -3
- data/patches/libxml2/{0009-avoid-isnan-isinf.patch → 0005-avoid-isnan-isinf.patch} +4 -4
- data/patches/libxml2/0006-update-automake-files-for-arm64.patch +3040 -0
- data/patches/libxml2/0008-htmlParseComment-handle-abruptly-closed-comments.patch +61 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +2445 -1919
- data/ports/archives/libxml2-2.9.14.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.35.tar.xz +0 -0
- metadata +204 -93
- data/lib/nokogiri/html/element_description_defaults.rb +0 -672
- data/lib/nokogiri/html/sax/parser_context.rb +0 -17
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/patches/libxml2/0006-htmlParseComment-treat-as-if-it-closed-the-comment.patch +0 -73
- data/patches/libxml2/0007-use-new-htmlParseLookupCommentEnd-to-find-comment-en.patch +0 -103
- data/patches/libxml2/0010-parser.c-shrink-the-input-buffer-when-appropriate.patch +0 -70
- data/patches/libxml2/0011-update-automake-files-for-arm64.patch +0 -2511
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
@@ -1,21 +1,21 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
###
|
5
|
-
# SAX Parsers are event driven parsers.
|
6
|
-
#
|
7
|
-
#
|
6
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
7
|
+
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
8
|
+
# Nokogiri::HTML4::SAX.
|
8
9
|
#
|
9
|
-
# The basic way a SAX style parser works is by creating a parser,
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# it encounters events you said you would like to know about.
|
10
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
11
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
12
|
+
# you when it encounters events you said you would like to know about.
|
13
13
|
#
|
14
|
-
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
15
|
-
#
|
14
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
15
|
+
# methods for which you would like notification.
|
16
16
|
#
|
17
|
-
# For example, if I want to be notified when a document ends, and when an
|
18
|
-
#
|
17
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
18
|
+
# would write a class like this:
|
19
19
|
#
|
20
20
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
21
21
|
# def end_document
|
@@ -27,8 +27,7 @@ module Nokogiri
|
|
27
27
|
# end
|
28
28
|
# end
|
29
29
|
#
|
30
|
-
# Then I would instantiate a SAX parser with this document, and feed the
|
31
|
-
# parser some XML
|
30
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
32
31
|
#
|
33
32
|
# # Create a new parser
|
34
33
|
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
@@ -36,25 +35,21 @@ module Nokogiri
|
|
36
35
|
# # Feed the parser some XML
|
37
36
|
# parser.parse(File.open(ARGV[0]))
|
38
37
|
#
|
39
|
-
# Now my document handler will be called when each node starts, and when
|
40
|
-
#
|
41
|
-
# a look at Nokogiri::XML::SAX::Document.
|
38
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
39
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
42
40
|
#
|
43
|
-
# Two SAX parsers for XML are available, a parser that reads from a string
|
44
|
-
#
|
45
|
-
#
|
46
|
-
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
41
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
42
|
+
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
43
|
+
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
47
44
|
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
48
45
|
module SAX
|
49
46
|
###
|
50
|
-
# This class is used for registering types of events you are interested
|
51
|
-
#
|
52
|
-
#
|
53
|
-
#
|
54
|
-
# you are interested in knowing about.
|
47
|
+
# This class is used for registering types of events you are interested in handling. All of
|
48
|
+
# the methods on this class are available as possible events while parsing an XML document. To
|
49
|
+
# register for any particular event, just subclass this class and implement the methods you
|
50
|
+
# are interested in knowing about.
|
55
51
|
#
|
56
|
-
# To only be notified about start and end element events, write a class
|
57
|
-
# like this:
|
52
|
+
# To only be notified about start and end element events, write a class like this:
|
58
53
|
#
|
59
54
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
60
55
|
# def start_element name, attrs = []
|
@@ -66,12 +61,12 @@ module Nokogiri
|
|
66
61
|
# end
|
67
62
|
# end
|
68
63
|
#
|
69
|
-
# You can use this event handler for any SAX style parser included with
|
70
|
-
# Nokogiri
|
64
|
+
# You can use this event handler for any SAX style parser included with Nokogiri. See
|
65
|
+
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
|
71
66
|
class Document
|
72
67
|
###
|
73
68
|
# Called when an XML declaration is parsed
|
74
|
-
def xmldecl
|
69
|
+
def xmldecl(version, encoding, standalone)
|
75
70
|
end
|
76
71
|
|
77
72
|
###
|
@@ -89,13 +84,13 @@ module Nokogiri
|
|
89
84
|
# * +name+ is the name of the tag
|
90
85
|
# * +attrs+ are an assoc list of namespaces and attributes, e.g.:
|
91
86
|
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
92
|
-
def start_element
|
87
|
+
def start_element(name, attrs = [])
|
93
88
|
end
|
94
89
|
|
95
90
|
###
|
96
91
|
# Called at the end of an element
|
97
92
|
# +name+ is the tag name
|
98
|
-
def end_element
|
93
|
+
def end_element(name)
|
99
94
|
end
|
100
95
|
|
101
96
|
###
|
@@ -105,16 +100,16 @@ module Nokogiri
|
|
105
100
|
# +prefix+ is the namespace prefix for the element
|
106
101
|
# +uri+ is the associated namespace URI
|
107
102
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
108
|
-
def start_element_namespace
|
103
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
109
104
|
###
|
110
105
|
# Deal with SAX v1 interface
|
111
|
-
name = [prefix, name].compact.join(
|
112
|
-
attributes = ns.map
|
113
|
-
[[
|
114
|
-
|
115
|
-
[[attr.prefix, attr.localname].compact.join(
|
116
|
-
|
117
|
-
start_element
|
106
|
+
name = [prefix, name].compact.join(":")
|
107
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
108
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
109
|
+
end + attrs.map do |attr|
|
110
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
111
|
+
end
|
112
|
+
start_element(name, attributes)
|
118
113
|
end
|
119
114
|
|
120
115
|
###
|
@@ -122,49 +117,49 @@ module Nokogiri
|
|
122
117
|
# +name+ is the element's name
|
123
118
|
# +prefix+ is the namespace prefix associated with the element
|
124
119
|
# +uri+ is the associated namespace URI
|
125
|
-
def end_element_namespace
|
120
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
126
121
|
###
|
127
122
|
# Deal with SAX v1 interface
|
128
|
-
end_element
|
123
|
+
end_element([prefix, name].compact.join(":"))
|
129
124
|
end
|
130
125
|
|
131
126
|
###
|
132
|
-
# Characters read between a tag.
|
127
|
+
# Characters read between a tag. This method might be called multiple
|
133
128
|
# times given one contiguous string of characters.
|
134
129
|
#
|
135
130
|
# +string+ contains the character data
|
136
|
-
def characters
|
131
|
+
def characters(string)
|
137
132
|
end
|
138
133
|
|
139
134
|
###
|
140
135
|
# Called when comments are encountered
|
141
136
|
# +string+ contains the comment data
|
142
|
-
def comment
|
137
|
+
def comment(string)
|
143
138
|
end
|
144
139
|
|
145
140
|
###
|
146
141
|
# Called on document warnings
|
147
142
|
# +string+ contains the warning
|
148
|
-
def warning
|
143
|
+
def warning(string)
|
149
144
|
end
|
150
145
|
|
151
146
|
###
|
152
147
|
# Called on document errors
|
153
148
|
# +string+ contains the error
|
154
|
-
def error
|
149
|
+
def error(string)
|
155
150
|
end
|
156
151
|
|
157
152
|
###
|
158
153
|
# Called when cdata blocks are found
|
159
154
|
# +string+ contains the cdata content
|
160
|
-
def cdata_block
|
155
|
+
def cdata_block(string)
|
161
156
|
end
|
162
157
|
|
163
158
|
###
|
164
159
|
# Called when processing instructions are found
|
165
160
|
# +name+ is the target of the instruction
|
166
161
|
# +content+ is the value of the instruction
|
167
|
-
def processing_instruction
|
162
|
+
def processing_instruction(name, content)
|
168
163
|
end
|
169
164
|
end
|
170
165
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -36,29 +37,29 @@ module Nokogiri
|
|
36
37
|
|
37
38
|
# Encodinds this parser supports
|
38
39
|
ENCODINGS = {
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
61
|
-
|
40
|
+
"NONE" => 0, # No char encoding detected
|
41
|
+
"UTF-8" => 1, # UTF-8
|
42
|
+
"UTF16LE" => 2, # UTF-16 little endian
|
43
|
+
"UTF16BE" => 3, # UTF-16 big endian
|
44
|
+
"UCS4LE" => 4, # UCS-4 little endian
|
45
|
+
"UCS4BE" => 5, # UCS-4 big endian
|
46
|
+
"EBCDIC" => 6, # EBCDIC uh!
|
47
|
+
"UCS4-2143" => 7, # UCS-4 unusual ordering
|
48
|
+
"UCS4-3412" => 8, # UCS-4 unusual ordering
|
49
|
+
"UCS2" => 9, # UCS-2
|
50
|
+
"ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
|
51
|
+
"ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
|
52
|
+
"ISO-8859-3" => 12, # ISO-8859-3
|
53
|
+
"ISO-8859-4" => 13, # ISO-8859-4
|
54
|
+
"ISO-8859-5" => 14, # ISO-8859-5
|
55
|
+
"ISO-8859-6" => 15, # ISO-8859-6
|
56
|
+
"ISO-8859-7" => 16, # ISO-8859-7
|
57
|
+
"ISO-8859-8" => 17, # ISO-8859-8
|
58
|
+
"ISO-8859-9" => 18, # ISO-8859-9
|
59
|
+
"ISO-2022-JP" => 19, # ISO-2022-JP
|
60
|
+
"SHIFT-JIS" => 20, # Shift_JIS
|
61
|
+
"EUC-JP" => 21, # EUC-JP
|
62
|
+
"ASCII" => 22, # pure ASCII
|
62
63
|
}
|
63
64
|
|
64
65
|
# The Nokogiri::XML::SAX::Document where events will be sent.
|
@@ -68,7 +69,7 @@ module Nokogiri
|
|
68
69
|
attr_accessor :encoding
|
69
70
|
|
70
71
|
# Create a new Parser with +doc+ and +encoding+
|
71
|
-
def initialize
|
72
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
|
72
73
|
@encoding = check_encoding(encoding)
|
73
74
|
@document = doc
|
74
75
|
@warned = false
|
@@ -77,7 +78,7 @@ module Nokogiri
|
|
77
78
|
###
|
78
79
|
# Parse given +thing+ which may be a string containing xml, or an
|
79
80
|
# IO object.
|
80
|
-
def parse
|
81
|
+
def parse(thing, &block)
|
81
82
|
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
82
83
|
parse_io(thing, &block)
|
83
84
|
else
|
@@ -87,34 +88,36 @@ module Nokogiri
|
|
87
88
|
|
88
89
|
###
|
89
90
|
# Parse given +io+
|
90
|
-
def parse_io
|
91
|
+
def parse_io(io, encoding = "ASCII")
|
91
92
|
@encoding = check_encoding(encoding)
|
92
93
|
ctx = ParserContext.io(io, ENCODINGS[@encoding])
|
93
94
|
yield ctx if block_given?
|
94
|
-
ctx.parse_with
|
95
|
+
ctx.parse_with(self)
|
95
96
|
end
|
96
97
|
|
97
98
|
###
|
98
99
|
# Parse a file with +filename+
|
99
|
-
def parse_file
|
100
|
+
def parse_file(filename)
|
100
101
|
raise ArgumentError unless filename
|
101
102
|
raise Errno::ENOENT unless File.exist?(filename)
|
102
103
|
raise Errno::EISDIR if File.directory?(filename)
|
103
|
-
|
104
|
+
|
105
|
+
ctx = ParserContext.file(filename)
|
104
106
|
yield ctx if block_given?
|
105
|
-
ctx.parse_with
|
107
|
+
ctx.parse_with(self)
|
106
108
|
end
|
107
109
|
|
108
|
-
def parse_memory
|
109
|
-
ctx = ParserContext.memory
|
110
|
+
def parse_memory(data)
|
111
|
+
ctx = ParserContext.memory(data)
|
110
112
|
yield ctx if block_given?
|
111
|
-
ctx.parse_with
|
113
|
+
ctx.parse_with(self)
|
112
114
|
end
|
113
115
|
|
114
116
|
private
|
117
|
+
|
115
118
|
def check_encoding(encoding)
|
116
119
|
encoding.upcase.tap do |enc|
|
117
|
-
raise ArgumentError
|
120
|
+
raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
|
118
121
|
end
|
119
122
|
end
|
120
123
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -7,9 +8,12 @@ module Nokogiri
|
|
7
8
|
# by the user. Instead, you should be looking at
|
8
9
|
# Nokogiri::XML::SAX::Parser
|
9
10
|
class ParserContext
|
10
|
-
def self.new
|
11
|
-
[:read, :close].all? { |x| thing.respond_to?(x) }
|
12
|
-
io(thing, Parser::ENCODINGS[encoding])
|
11
|
+
def self.new(thing, encoding = "UTF-8")
|
12
|
+
if [:read, :close].all? { |x| thing.respond_to?(x) }
|
13
|
+
io(thing, Parser::ENCODINGS[encoding])
|
14
|
+
else
|
15
|
+
memory(thing)
|
16
|
+
end
|
13
17
|
end
|
14
18
|
end
|
15
19
|
end
|
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
module SAX
|
@@ -24,7 +25,6 @@ module Nokogiri
|
|
24
25
|
# parser << "/div>"
|
25
26
|
# parser.finish
|
26
27
|
class PushParser
|
27
|
-
|
28
28
|
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
29
29
|
# operating
|
30
30
|
attr_accessor :document
|
@@ -32,7 +32,7 @@ module Nokogiri
|
|
32
32
|
###
|
33
33
|
# Create a new PushParser with +doc+ as the SAX Document, providing
|
34
34
|
# an optional +file_name+ and +encoding+
|
35
|
-
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding =
|
35
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
36
36
|
@document = doc
|
37
37
|
@encoding = encoding
|
38
38
|
@sax_parser = XML::SAX::Parser.new(doc)
|
@@ -44,16 +44,16 @@ module Nokogiri
|
|
44
44
|
###
|
45
45
|
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
46
46
|
# that can be called will be called immediately.
|
47
|
-
def write
|
47
|
+
def write(chunk, last_chunk = false)
|
48
48
|
native_write(chunk, last_chunk)
|
49
49
|
end
|
50
|
-
|
50
|
+
alias_method :<<, :write
|
51
51
|
|
52
52
|
###
|
53
53
|
# Finish the parsing. This method is only necessary for
|
54
54
|
# Nokogiri::XML::SAX::Document#end_document to be called.
|
55
55
|
def finish
|
56
|
-
write
|
56
|
+
write("", true)
|
57
57
|
end
|
58
58
|
end
|
59
59
|
end
|
data/lib/nokogiri/xml/sax.rb
CHANGED
@@ -1,5 +1,6 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
-
|
3
|
-
|
4
|
-
|
5
|
-
|
2
|
+
|
3
|
+
require_relative "sax/document"
|
4
|
+
require_relative "sax/parser_context"
|
5
|
+
require_relative "sax/parser"
|
6
|
+
require_relative "sax/push_parser"
|
data/lib/nokogiri/xml/schema.rb
CHANGED
@@ -1,4 +1,5 @@
|
|
1
1
|
# frozen_string_literal: true
|
2
|
+
|
2
3
|
module Nokogiri
|
3
4
|
module XML
|
4
5
|
class << self
|
@@ -42,7 +43,7 @@ module Nokogiri
|
|
42
43
|
###
|
43
44
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
44
45
|
# object.
|
45
|
-
def self.new
|
46
|
+
def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
46
47
|
from_document(Nokogiri::XML(string_or_io), options)
|
47
48
|
end
|
48
49
|
|
@@ -51,9 +52,9 @@ module Nokogiri
|
|
51
52
|
# Nokogiri::XML::Document object, or a filename. An Array of
|
52
53
|
# Nokogiri::XML::SyntaxError objects found while validating the
|
53
54
|
# +thing+ is returned.
|
54
|
-
def validate
|
55
|
-
if thing.is_a?(Nokogiri::XML::Document)
|
56
|
-
validate_document(thing)
|
55
|
+
def validate(thing)
|
56
|
+
if thing.is_a?(Nokogiri::XML::Document)
|
57
|
+
validate_document(thing)
|
57
58
|
elsif File.file?(thing)
|
58
59
|
validate_file(thing)
|
59
60
|
else
|
@@ -64,8 +65,8 @@ module Nokogiri
|
|
64
65
|
###
|
65
66
|
# Returns true if +thing+ is a valid Nokogiri::XML::Document or
|
66
67
|
# file.
|
67
|
-
def valid?
|
68
|
-
validate(thing).
|
68
|
+
def valid?(thing)
|
69
|
+
validate(thing).empty?
|
69
70
|
end
|
70
71
|
end
|
71
72
|
end
|