nokogiri 1.10.3 → 1.13.9
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +5 -0
- data/LICENSE-DEPENDENCIES.md +1173 -884
- data/LICENSE.md +1 -1
- data/README.md +178 -96
- data/bin/nokogiri +63 -50
- data/dependencies.yml +13 -62
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +761 -424
- data/ext/nokogiri/gumbo.c +584 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +119 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +228 -91
- data/ext/nokogiri/nokogiri.h +199 -88
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +17 -17
- data/ext/nokogiri/xml_attribute_decl.c +21 -21
- data/ext/nokogiri/xml_cdata.c +14 -19
- data/ext/nokogiri/xml_comment.c +19 -26
- data/ext/nokogiri/xml_document.c +296 -220
- data/ext/nokogiri/xml_document_fragment.c +12 -16
- data/ext/nokogiri/xml_dtd.c +64 -58
- data/ext/nokogiri/xml_element_content.c +31 -26
- data/ext/nokogiri/xml_element_decl.c +25 -25
- data/ext/nokogiri/xml_encoding_handler.c +43 -18
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +16 -18
- data/ext/nokogiri/xml_namespace.c +98 -53
- data/ext/nokogiri/xml_node.c +1065 -653
- data/ext/nokogiri/xml_node_set.c +178 -166
- data/ext/nokogiri/xml_processing_instruction.c +17 -19
- data/ext/nokogiri/xml_reader.c +277 -175
- data/ext/nokogiri/xml_relax_ng.c +52 -28
- data/ext/nokogiri/xml_sax_parser.c +112 -112
- data/ext/nokogiri/xml_sax_parser_context.c +112 -86
- data/ext/nokogiri/xml_sax_push_parser.c +36 -27
- data/ext/nokogiri/xml_schema.c +114 -35
- data/ext/nokogiri/xml_syntax_error.c +42 -21
- data/ext/nokogiri/xml_text.c +14 -18
- data/ext/nokogiri/xml_xpath_context.c +226 -115
- data/ext/nokogiri/xslt_stylesheet.c +265 -173
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +101 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +626 -0
- data/gumbo-parser/src/error.h +148 -0
- data/gumbo-parser/src/foreign_attrs.c +104 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/gumbo.h +943 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/parser.c +4875 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +222 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +169 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3463 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +68 -0
- data/gumbo-parser/src/util.h +30 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +10 -8
- data/lib/nokogiri/css/parser.rb +397 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +54 -49
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +107 -104
- data/lib/nokogiri/css/tokenizer.rex +3 -2
- data/lib/nokogiri/css/xpath_visitor.rb +218 -91
- data/lib/nokogiri/css.rb +50 -17
- data/lib/nokogiri/decorators/slop.rb +9 -7
- data/lib/nokogiri/extension.rb +31 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/{html → html4}/document.rb +103 -105
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +578 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/{html → html4}/sax/parser.rb +17 -16
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +46 -0
- data/lib/nokogiri/html5/document.rb +91 -0
- data/lib/nokogiri/html5/document_fragment.rb +83 -0
- data/lib/nokogiri/html5/node.rb +100 -0
- data/lib/nokogiri/html5.rb +478 -0
- data/lib/nokogiri/jruby/dependencies.rb +21 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +222 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +6 -3
- data/lib/nokogiri/xml/attribute_decl.rb +3 -1
- data/lib/nokogiri/xml/builder.rb +97 -53
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +224 -86
- data/lib/nokogiri/xml/document_fragment.rb +46 -44
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +2 -0
- data/lib/nokogiri/xml/element_decl.rb +3 -1
- data/lib/nokogiri/xml/entity_decl.rb +4 -2
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +3 -0
- data/lib/nokogiri/xml/node/save_options.rb +10 -5
- data/lib/nokogiri/xml/node.rb +884 -378
- data/lib/nokogiri/xml/node_set.rb +51 -54
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +22 -8
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +25 -26
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +3 -1
- data/lib/nokogiri/xml/reader.rb +21 -28
- data/lib/nokogiri/xml/relax_ng.rb +8 -2
- data/lib/nokogiri/xml/sax/document.rb +45 -49
- data/lib/nokogiri/xml/sax/parser.rb +38 -34
- data/lib/nokogiri/xml/sax/parser_context.rb +8 -3
- data/lib/nokogiri/xml/sax/push_parser.rb +6 -5
- data/lib/nokogiri/xml/sax.rb +6 -4
- data/lib/nokogiri/xml/schema.rb +19 -9
- data/lib/nokogiri/xml/searchable.rb +112 -72
- data/lib/nokogiri/xml/syntax_error.rb +6 -4
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +3 -3
- data/lib/nokogiri/xml.rb +38 -37
- data/lib/nokogiri/xslt/stylesheet.rb +3 -1
- data/lib/nokogiri/xslt.rb +29 -20
- data/lib/nokogiri.rb +49 -65
- data/lib/xsd/xmlparser/nokogiri.rb +26 -24
- data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
- data/patches/libxml2/0003-libxml2.la-is-in-top_builddir.patch +25 -0
- data/patches/libxml2/0005-avoid-isnan-isinf.patch +81 -0
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxslt/0001-update-automake-files-for-arm64.patch +3037 -0
- data/ports/archives/libxml2-2.10.3.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.37.tar.xz +0 -0
- metadata +205 -138
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxslt/0001-Fix-security-framework-bypass.patch +0 -120
- data/ports/archives/libxml2-2.9.9.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.33.tar.gz +0 -0
@@ -1,20 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
###
|
4
|
-
# SAX Parsers are event driven parsers.
|
5
|
-
#
|
6
|
-
#
|
6
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
7
|
+
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
8
|
+
# Nokogiri::HTML4::SAX.
|
7
9
|
#
|
8
|
-
# The basic way a SAX style parser works is by creating a parser,
|
9
|
-
#
|
10
|
-
#
|
11
|
-
# it encounters events you said you would like to know about.
|
10
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
11
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
12
|
+
# you when it encounters events you said you would like to know about.
|
12
13
|
#
|
13
|
-
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
14
|
-
#
|
14
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
15
|
+
# methods for which you would like notification.
|
15
16
|
#
|
16
|
-
# For example, if I want to be notified when a document ends, and when an
|
17
|
-
#
|
17
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
18
|
+
# would write a class like this:
|
18
19
|
#
|
19
20
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
20
21
|
# def end_document
|
@@ -26,8 +27,7 @@ module Nokogiri
|
|
26
27
|
# end
|
27
28
|
# end
|
28
29
|
#
|
29
|
-
# Then I would instantiate a SAX parser with this document, and feed the
|
30
|
-
# parser some XML
|
30
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
31
31
|
#
|
32
32
|
# # Create a new parser
|
33
33
|
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
@@ -35,25 +35,21 @@ module Nokogiri
|
|
35
35
|
# # Feed the parser some XML
|
36
36
|
# parser.parse(File.open(ARGV[0]))
|
37
37
|
#
|
38
|
-
# Now my document handler will be called when each node starts, and when
|
39
|
-
#
|
40
|
-
# a look at Nokogiri::XML::SAX::Document.
|
38
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
39
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
41
40
|
#
|
42
|
-
# Two SAX parsers for XML are available, a parser that reads from a string
|
43
|
-
#
|
44
|
-
#
|
45
|
-
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
41
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
42
|
+
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
43
|
+
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
46
44
|
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
47
45
|
module SAX
|
48
46
|
###
|
49
|
-
# This class is used for registering types of events you are interested
|
50
|
-
#
|
51
|
-
#
|
52
|
-
#
|
53
|
-
# you are interested in knowing about.
|
47
|
+
# This class is used for registering types of events you are interested in handling. All of
|
48
|
+
# the methods on this class are available as possible events while parsing an XML document. To
|
49
|
+
# register for any particular event, just subclass this class and implement the methods you
|
50
|
+
# are interested in knowing about.
|
54
51
|
#
|
55
|
-
# To only be notified about start and end element events, write a class
|
56
|
-
# like this:
|
52
|
+
# To only be notified about start and end element events, write a class like this:
|
57
53
|
#
|
58
54
|
# class MyDocument < Nokogiri::XML::SAX::Document
|
59
55
|
# def start_element name, attrs = []
|
@@ -65,12 +61,12 @@ module Nokogiri
|
|
65
61
|
# end
|
66
62
|
# end
|
67
63
|
#
|
68
|
-
# You can use this event handler for any SAX style parser included with
|
69
|
-
# Nokogiri
|
64
|
+
# You can use this event handler for any SAX style parser included with Nokogiri. See
|
65
|
+
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
|
70
66
|
class Document
|
71
67
|
###
|
72
68
|
# Called when an XML declaration is parsed
|
73
|
-
def xmldecl
|
69
|
+
def xmldecl(version, encoding, standalone)
|
74
70
|
end
|
75
71
|
|
76
72
|
###
|
@@ -88,13 +84,13 @@ module Nokogiri
|
|
88
84
|
# * +name+ is the name of the tag
|
89
85
|
# * +attrs+ are an assoc list of namespaces and attributes, e.g.:
|
90
86
|
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
91
|
-
def start_element
|
87
|
+
def start_element(name, attrs = [])
|
92
88
|
end
|
93
89
|
|
94
90
|
###
|
95
91
|
# Called at the end of an element
|
96
92
|
# +name+ is the tag name
|
97
|
-
def end_element
|
93
|
+
def end_element(name)
|
98
94
|
end
|
99
95
|
|
100
96
|
###
|
@@ -104,16 +100,16 @@ module Nokogiri
|
|
104
100
|
# +prefix+ is the namespace prefix for the element
|
105
101
|
# +uri+ is the associated namespace URI
|
106
102
|
# +ns+ is a hash of namespace prefix:urls associated with the element
|
107
|
-
def start_element_namespace
|
103
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
108
104
|
###
|
109
105
|
# Deal with SAX v1 interface
|
110
|
-
name = [prefix, name].compact.join(
|
111
|
-
attributes = ns.map
|
112
|
-
[[
|
113
|
-
|
114
|
-
[[attr.prefix, attr.localname].compact.join(
|
115
|
-
|
116
|
-
start_element
|
106
|
+
name = [prefix, name].compact.join(":")
|
107
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
108
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
109
|
+
end + attrs.map do |attr|
|
110
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
111
|
+
end
|
112
|
+
start_element(name, attributes)
|
117
113
|
end
|
118
114
|
|
119
115
|
###
|
@@ -121,49 +117,49 @@ module Nokogiri
|
|
121
117
|
# +name+ is the element's name
|
122
118
|
# +prefix+ is the namespace prefix associated with the element
|
123
119
|
# +uri+ is the associated namespace URI
|
124
|
-
def end_element_namespace
|
120
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
125
121
|
###
|
126
122
|
# Deal with SAX v1 interface
|
127
|
-
end_element
|
123
|
+
end_element([prefix, name].compact.join(":"))
|
128
124
|
end
|
129
125
|
|
130
126
|
###
|
131
|
-
# Characters read between a tag.
|
127
|
+
# Characters read between a tag. This method might be called multiple
|
132
128
|
# times given one contiguous string of characters.
|
133
129
|
#
|
134
130
|
# +string+ contains the character data
|
135
|
-
def characters
|
131
|
+
def characters(string)
|
136
132
|
end
|
137
133
|
|
138
134
|
###
|
139
135
|
# Called when comments are encountered
|
140
136
|
# +string+ contains the comment data
|
141
|
-
def comment
|
137
|
+
def comment(string)
|
142
138
|
end
|
143
139
|
|
144
140
|
###
|
145
141
|
# Called on document warnings
|
146
142
|
# +string+ contains the warning
|
147
|
-
def warning
|
143
|
+
def warning(string)
|
148
144
|
end
|
149
145
|
|
150
146
|
###
|
151
147
|
# Called on document errors
|
152
148
|
# +string+ contains the error
|
153
|
-
def error
|
149
|
+
def error(string)
|
154
150
|
end
|
155
151
|
|
156
152
|
###
|
157
153
|
# Called when cdata blocks are found
|
158
154
|
# +string+ contains the cdata content
|
159
|
-
def cdata_block
|
155
|
+
def cdata_block(string)
|
160
156
|
end
|
161
157
|
|
162
158
|
###
|
163
159
|
# Called when processing instructions are found
|
164
160
|
# +name+ is the target of the instruction
|
165
161
|
# +content+ is the value of the instruction
|
166
|
-
def processing_instruction
|
162
|
+
def processing_instruction(name, content)
|
167
163
|
end
|
168
164
|
end
|
169
165
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
module SAX
|
@@ -35,29 +37,29 @@ module Nokogiri
|
|
35
37
|
|
36
38
|
# Encodinds this parser supports
|
37
39
|
ENCODINGS = {
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
42
|
-
|
43
|
-
|
44
|
-
|
45
|
-
|
46
|
-
|
47
|
-
|
48
|
-
|
49
|
-
|
50
|
-
|
51
|
-
|
52
|
-
|
53
|
-
|
54
|
-
|
55
|
-
|
56
|
-
|
57
|
-
|
58
|
-
|
59
|
-
|
60
|
-
|
40
|
+
"NONE" => 0, # No char encoding detected
|
41
|
+
"UTF-8" => 1, # UTF-8
|
42
|
+
"UTF16LE" => 2, # UTF-16 little endian
|
43
|
+
"UTF16BE" => 3, # UTF-16 big endian
|
44
|
+
"UCS4LE" => 4, # UCS-4 little endian
|
45
|
+
"UCS4BE" => 5, # UCS-4 big endian
|
46
|
+
"EBCDIC" => 6, # EBCDIC uh!
|
47
|
+
"UCS4-2143" => 7, # UCS-4 unusual ordering
|
48
|
+
"UCS4-3412" => 8, # UCS-4 unusual ordering
|
49
|
+
"UCS2" => 9, # UCS-2
|
50
|
+
"ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
|
51
|
+
"ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
|
52
|
+
"ISO-8859-3" => 12, # ISO-8859-3
|
53
|
+
"ISO-8859-4" => 13, # ISO-8859-4
|
54
|
+
"ISO-8859-5" => 14, # ISO-8859-5
|
55
|
+
"ISO-8859-6" => 15, # ISO-8859-6
|
56
|
+
"ISO-8859-7" => 16, # ISO-8859-7
|
57
|
+
"ISO-8859-8" => 17, # ISO-8859-8
|
58
|
+
"ISO-8859-9" => 18, # ISO-8859-9
|
59
|
+
"ISO-2022-JP" => 19, # ISO-2022-JP
|
60
|
+
"SHIFT-JIS" => 20, # Shift_JIS
|
61
|
+
"EUC-JP" => 21, # EUC-JP
|
62
|
+
"ASCII" => 22, # pure ASCII
|
61
63
|
}
|
62
64
|
|
63
65
|
# The Nokogiri::XML::SAX::Document where events will be sent.
|
@@ -67,7 +69,7 @@ module Nokogiri
|
|
67
69
|
attr_accessor :encoding
|
68
70
|
|
69
71
|
# Create a new Parser with +doc+ and +encoding+
|
70
|
-
def initialize
|
72
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
|
71
73
|
@encoding = check_encoding(encoding)
|
72
74
|
@document = doc
|
73
75
|
@warned = false
|
@@ -76,7 +78,7 @@ module Nokogiri
|
|
76
78
|
###
|
77
79
|
# Parse given +thing+ which may be a string containing xml, or an
|
78
80
|
# IO object.
|
79
|
-
def parse
|
81
|
+
def parse(thing, &block)
|
80
82
|
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
81
83
|
parse_io(thing, &block)
|
82
84
|
else
|
@@ -86,34 +88,36 @@ module Nokogiri
|
|
86
88
|
|
87
89
|
###
|
88
90
|
# Parse given +io+
|
89
|
-
def parse_io
|
91
|
+
def parse_io(io, encoding = "ASCII")
|
90
92
|
@encoding = check_encoding(encoding)
|
91
93
|
ctx = ParserContext.io(io, ENCODINGS[@encoding])
|
92
94
|
yield ctx if block_given?
|
93
|
-
ctx.parse_with
|
95
|
+
ctx.parse_with(self)
|
94
96
|
end
|
95
97
|
|
96
98
|
###
|
97
99
|
# Parse a file with +filename+
|
98
|
-
def parse_file
|
100
|
+
def parse_file(filename)
|
99
101
|
raise ArgumentError unless filename
|
100
102
|
raise Errno::ENOENT unless File.exist?(filename)
|
101
103
|
raise Errno::EISDIR if File.directory?(filename)
|
102
|
-
|
104
|
+
|
105
|
+
ctx = ParserContext.file(filename)
|
103
106
|
yield ctx if block_given?
|
104
|
-
ctx.parse_with
|
107
|
+
ctx.parse_with(self)
|
105
108
|
end
|
106
109
|
|
107
|
-
def parse_memory
|
108
|
-
ctx = ParserContext.memory
|
110
|
+
def parse_memory(data)
|
111
|
+
ctx = ParserContext.memory(data)
|
109
112
|
yield ctx if block_given?
|
110
|
-
ctx.parse_with
|
113
|
+
ctx.parse_with(self)
|
111
114
|
end
|
112
115
|
|
113
116
|
private
|
117
|
+
|
114
118
|
def check_encoding(encoding)
|
115
119
|
encoding.upcase.tap do |enc|
|
116
|
-
raise ArgumentError
|
120
|
+
raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
|
117
121
|
end
|
118
122
|
end
|
119
123
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
module SAX
|
@@ -6,9 +8,12 @@ module Nokogiri
|
|
6
8
|
# by the user. Instead, you should be looking at
|
7
9
|
# Nokogiri::XML::SAX::Parser
|
8
10
|
class ParserContext
|
9
|
-
def self.new
|
10
|
-
[:read, :close].all? { |x| thing.respond_to?(x) }
|
11
|
-
io(thing, Parser::ENCODINGS[encoding])
|
11
|
+
def self.new(thing, encoding = "UTF-8")
|
12
|
+
if [:read, :close].all? { |x| thing.respond_to?(x) }
|
13
|
+
io(thing, Parser::ENCODINGS[encoding])
|
14
|
+
else
|
15
|
+
memory(thing)
|
16
|
+
end
|
12
17
|
end
|
13
18
|
end
|
14
19
|
end
|
@@ -1,3 +1,5 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
module SAX
|
@@ -23,7 +25,6 @@ module Nokogiri
|
|
23
25
|
# parser << "/div>"
|
24
26
|
# parser.finish
|
25
27
|
class PushParser
|
26
|
-
|
27
28
|
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
28
29
|
# operating
|
29
30
|
attr_accessor :document
|
@@ -31,7 +32,7 @@ module Nokogiri
|
|
31
32
|
###
|
32
33
|
# Create a new PushParser with +doc+ as the SAX Document, providing
|
33
34
|
# an optional +file_name+ and +encoding+
|
34
|
-
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding =
|
35
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
35
36
|
@document = doc
|
36
37
|
@encoding = encoding
|
37
38
|
@sax_parser = XML::SAX::Parser.new(doc)
|
@@ -43,16 +44,16 @@ module Nokogiri
|
|
43
44
|
###
|
44
45
|
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
45
46
|
# that can be called will be called immediately.
|
46
|
-
def write
|
47
|
+
def write(chunk, last_chunk = false)
|
47
48
|
native_write(chunk, last_chunk)
|
48
49
|
end
|
49
|
-
|
50
|
+
alias_method :<<, :write
|
50
51
|
|
51
52
|
###
|
52
53
|
# Finish the parsing. This method is only necessary for
|
53
54
|
# Nokogiri::XML::SAX::Document#end_document to be called.
|
54
55
|
def finish
|
55
|
-
write
|
56
|
+
write("", true)
|
56
57
|
end
|
57
58
|
end
|
58
59
|
end
|
data/lib/nokogiri/xml/sax.rb
CHANGED
@@ -1,4 +1,6 @@
|
|
1
|
-
|
2
|
-
|
3
|
-
|
4
|
-
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
require_relative "sax/document"
|
4
|
+
require_relative "sax/parser_context"
|
5
|
+
require_relative "sax/parser"
|
6
|
+
require_relative "sax/push_parser"
|
data/lib/nokogiri/xml/schema.rb
CHANGED
@@ -1,11 +1,13 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
class << self
|
4
6
|
###
|
5
7
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
6
8
|
# object.
|
7
|
-
def Schema
|
8
|
-
Schema.new(string_or_io)
|
9
|
+
def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
10
|
+
Schema.new(string_or_io, options)
|
9
11
|
end
|
10
12
|
end
|
11
13
|
|
@@ -26,15 +28,23 @@ module Nokogiri
|
|
26
28
|
# end
|
27
29
|
#
|
28
30
|
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
31
|
+
#
|
32
|
+
# NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
|
33
|
+
# are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
|
34
|
+
# documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
|
35
|
+
# security policy. If a document is trusted, then the caller may turn off the NONET option via
|
36
|
+
# the ParseOptions to re-enable external entity resolution over a network connection.
|
29
37
|
class Schema
|
30
38
|
# Errors while parsing the schema file
|
31
39
|
attr_accessor :errors
|
40
|
+
# The Nokogiri::XML::ParseOptions used to parse the schema
|
41
|
+
attr_accessor :parse_options
|
32
42
|
|
33
43
|
###
|
34
44
|
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
35
45
|
# object.
|
36
|
-
def self.new
|
37
|
-
from_document
|
46
|
+
def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
47
|
+
from_document(Nokogiri::XML(string_or_io), options)
|
38
48
|
end
|
39
49
|
|
40
50
|
###
|
@@ -42,9 +52,9 @@ module Nokogiri
|
|
42
52
|
# Nokogiri::XML::Document object, or a filename. An Array of
|
43
53
|
# Nokogiri::XML::SyntaxError objects found while validating the
|
44
54
|
# +thing+ is returned.
|
45
|
-
def validate
|
46
|
-
if thing.is_a?(Nokogiri::XML::Document)
|
47
|
-
validate_document(thing)
|
55
|
+
def validate(thing)
|
56
|
+
if thing.is_a?(Nokogiri::XML::Document)
|
57
|
+
validate_document(thing)
|
48
58
|
elsif File.file?(thing)
|
49
59
|
validate_file(thing)
|
50
60
|
else
|
@@ -55,8 +65,8 @@ module Nokogiri
|
|
55
65
|
###
|
56
66
|
# Returns true if +thing+ is a valid Nokogiri::XML::Document or
|
57
67
|
# file.
|
58
|
-
def valid?
|
59
|
-
validate(thing).
|
68
|
+
def valid?(thing)
|
69
|
+
validate(thing).empty?
|
60
70
|
end
|
61
71
|
end
|
62
72
|
end
|