nokogiri 1.18.0-aarch64-linux-gnu
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +39 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +486 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +274 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +27 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +321 -0
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# This parser is a SAX style parser that reads its input as it deems necessary. The parser
|
8
|
+
# takes a Nokogiri::XML::SAX::Document, an optional encoding, then given an XML input, sends
|
9
|
+
# messages to the Nokogiri::XML::SAX::Document.
|
10
|
+
#
|
11
|
+
# Here is an example of using this parser:
|
12
|
+
#
|
13
|
+
# # Create a subclass of Nokogiri::XML::SAX::Document and implement
|
14
|
+
# # the events we care about:
|
15
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
16
|
+
# def start_element name, attrs = []
|
17
|
+
# puts "starting: #{name}"
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# def end_element name
|
21
|
+
# puts "ending: #{name}"
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
|
26
|
+
#
|
27
|
+
# # Hand an IO object to the parser, which will read the XML from the IO.
|
28
|
+
# File.open(path_to_xml) do |f|
|
29
|
+
# parser.parse(f)
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# For more information about \SAX parsers, see Nokogiri::XML::SAX.
|
33
|
+
#
|
34
|
+
# Also see Nokogiri::XML::SAX::Document for the available events.
|
35
|
+
#
|
36
|
+
# For \HTML documents, use the subclass Nokogiri::HTML4::SAX::Parser.
|
37
|
+
#
|
38
|
+
class Parser
|
39
|
+
# to dynamically resolve ParserContext in inherited methods
|
40
|
+
include Nokogiri::ClassResolver
|
41
|
+
|
42
|
+
# Structure used for marshalling attributes for some callbacks in XML::SAX::Document.
|
43
|
+
class Attribute < Struct.new(:localname, :prefix, :uri, :value)
|
44
|
+
end
|
45
|
+
|
46
|
+
ENCODINGS = { # :nodoc:
|
47
|
+
"NONE" => 0, # No char encoding detected
|
48
|
+
"UTF-8" => 1, # UTF-8
|
49
|
+
"UTF16LE" => 2, # UTF-16 little endian
|
50
|
+
"UTF16BE" => 3, # UTF-16 big endian
|
51
|
+
"UCS4LE" => 4, # UCS-4 little endian
|
52
|
+
"UCS4BE" => 5, # UCS-4 big endian
|
53
|
+
"EBCDIC" => 6, # EBCDIC uh!
|
54
|
+
"UCS4-2143" => 7, # UCS-4 unusual ordering
|
55
|
+
"UCS4-3412" => 8, # UCS-4 unusual ordering
|
56
|
+
"UCS2" => 9, # UCS-2
|
57
|
+
"ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
|
58
|
+
"ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
|
59
|
+
"ISO-8859-3" => 12, # ISO-8859-3
|
60
|
+
"ISO-8859-4" => 13, # ISO-8859-4
|
61
|
+
"ISO-8859-5" => 14, # ISO-8859-5
|
62
|
+
"ISO-8859-6" => 15, # ISO-8859-6
|
63
|
+
"ISO-8859-7" => 16, # ISO-8859-7
|
64
|
+
"ISO-8859-8" => 17, # ISO-8859-8
|
65
|
+
"ISO-8859-9" => 18, # ISO-8859-9
|
66
|
+
"ISO-2022-JP" => 19, # ISO-2022-JP
|
67
|
+
"SHIFT-JIS" => 20, # Shift_JIS
|
68
|
+
"EUC-JP" => 21, # EUC-JP
|
69
|
+
"ASCII" => 22, # pure ASCII
|
70
|
+
}
|
71
|
+
REVERSE_ENCODINGS = ENCODINGS.invert # :nodoc:
|
72
|
+
deprecate_constant :ENCODINGS
|
73
|
+
|
74
|
+
# The Nokogiri::XML::SAX::Document where events will be sent.
|
75
|
+
attr_accessor :document
|
76
|
+
|
77
|
+
# The encoding beings used for this document.
|
78
|
+
attr_accessor :encoding
|
79
|
+
|
80
|
+
###
|
81
|
+
# :call-seq:
|
82
|
+
# new ⇒ SAX::Parser
|
83
|
+
# new(handler) ⇒ SAX::Parser
|
84
|
+
# new(handler, encoding) ⇒ SAX::Parser
|
85
|
+
#
|
86
|
+
# Create a new Parser.
|
87
|
+
#
|
88
|
+
# [Parameters]
|
89
|
+
# - +handler+ (optional Nokogiri::XML::SAX::Document) The document that will receive
|
90
|
+
# events. Will create a new Nokogiri::XML::SAX::Document if not given, which is accessible
|
91
|
+
# through the #document attribute.
|
92
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
93
|
+
# parsing the input. (default +nil+ for auto-detection)
|
94
|
+
#
|
95
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = nil)
|
96
|
+
@encoding = encoding
|
97
|
+
@document = doc
|
98
|
+
@warned = false
|
99
|
+
|
100
|
+
initialize_native unless Nokogiri.jruby?
|
101
|
+
end
|
102
|
+
|
103
|
+
###
|
104
|
+
# :call-seq:
|
105
|
+
# parse(input) { |parser_context| ... }
|
106
|
+
#
|
107
|
+
# Parse the input, sending events to the SAX::Document at #document.
|
108
|
+
#
|
109
|
+
# [Parameters]
|
110
|
+
# - +input+ (String, IO) The input to parse.
|
111
|
+
#
|
112
|
+
# If +input+ quacks like a readable IO object, this method forwards to Parser.parse_io,
|
113
|
+
# otherwise it forwards to Parser.parse_memory.
|
114
|
+
#
|
115
|
+
# [Yields]
|
116
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
117
|
+
# to set options on the parser context before parsing begins.
|
118
|
+
#
|
119
|
+
def parse(input, &block)
|
120
|
+
if input.respond_to?(:read) && input.respond_to?(:close)
|
121
|
+
parse_io(input, &block)
|
122
|
+
else
|
123
|
+
parse_memory(input, &block)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
###
|
128
|
+
# :call-seq:
|
129
|
+
# parse_io(io) { |parser_context| ... }
|
130
|
+
# parse_io(io, encoding) { |parser_context| ... }
|
131
|
+
#
|
132
|
+
# Parse an input stream.
|
133
|
+
#
|
134
|
+
# [Parameters]
|
135
|
+
# - +io+ (IO) The readable IO object from which to read input
|
136
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
137
|
+
# parsing the input, or +nil+ for auto-detection. (default #encoding)
|
138
|
+
#
|
139
|
+
# [Yields]
|
140
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
141
|
+
# to set options on the parser context before parsing begins.
|
142
|
+
#
|
143
|
+
def parse_io(io, encoding = @encoding)
|
144
|
+
ctx = related_class("ParserContext").io(io, encoding)
|
145
|
+
yield ctx if block_given?
|
146
|
+
ctx.parse_with(self)
|
147
|
+
end
|
148
|
+
|
149
|
+
###
|
150
|
+
# :call-seq:
|
151
|
+
# parse_memory(input) { |parser_context| ... }
|
152
|
+
# parse_memory(input, encoding) { |parser_context| ... }
|
153
|
+
#
|
154
|
+
# Parse an input string.
|
155
|
+
#
|
156
|
+
# [Parameters]
|
157
|
+
# - +input+ (String) The input string to be parsed.
|
158
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
159
|
+
# parsing the input, or +nil+ for auto-detection. (default #encoding)
|
160
|
+
#
|
161
|
+
# [Yields]
|
162
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
163
|
+
# to set options on the parser context before parsing begins.
|
164
|
+
#
|
165
|
+
def parse_memory(input, encoding = @encoding)
|
166
|
+
ctx = related_class("ParserContext").memory(input, encoding)
|
167
|
+
yield ctx if block_given?
|
168
|
+
ctx.parse_with(self)
|
169
|
+
end
|
170
|
+
|
171
|
+
###
|
172
|
+
# :call-seq:
|
173
|
+
# parse_file(filename) { |parser_context| ... }
|
174
|
+
# parse_file(filename, encoding) { |parser_context| ... }
|
175
|
+
#
|
176
|
+
# Parse a file.
|
177
|
+
#
|
178
|
+
# [Parameters]
|
179
|
+
# - +filename+ (String) The path to the file to be parsed.
|
180
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
181
|
+
# parsing the input, or +nil+ for auto-detection. (default #encoding)
|
182
|
+
#
|
183
|
+
# [Yields]
|
184
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
185
|
+
# to set options on the parser context before parsing begins.
|
186
|
+
#
|
187
|
+
def parse_file(filename, encoding = @encoding)
|
188
|
+
raise ArgumentError, "no filename provided" unless filename
|
189
|
+
raise Errno::ENOENT unless File.exist?(filename)
|
190
|
+
raise Errno::EISDIR if File.directory?(filename)
|
191
|
+
|
192
|
+
ctx = related_class("ParserContext").file(filename, encoding)
|
193
|
+
yield ctx if block_given?
|
194
|
+
ctx.parse_with(self)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# Context object to invoke the XML SAX parser on the SAX::Document handler.
|
8
|
+
#
|
9
|
+
# 💡 This class is usually not instantiated by the user. Use Nokogiri::XML::SAX::Parser
|
10
|
+
# instead.
|
11
|
+
class ParserContext
|
12
|
+
class << self
|
13
|
+
###
|
14
|
+
# :call-seq:
|
15
|
+
# new(input)
|
16
|
+
# new(input, encoding)
|
17
|
+
#
|
18
|
+
# Create a parser context for an IO or a String. This is a shorthand method for
|
19
|
+
# ParserContext.io and ParserContext.memory.
|
20
|
+
#
|
21
|
+
# [Parameters]
|
22
|
+
# - +input+ (IO, String) A String or a readable IO object
|
23
|
+
# - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
|
24
|
+
# encoding to use (default +nil+, encoding will be autodetected)
|
25
|
+
#
|
26
|
+
# If +input+ quacks like a readable IO object, this method forwards to ParserContext.io,
|
27
|
+
# otherwise it forwards to ParserContext.memory.
|
28
|
+
#
|
29
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
30
|
+
#
|
31
|
+
def new(input, encoding = nil)
|
32
|
+
if [:read, :close].all? { |x| input.respond_to?(x) }
|
33
|
+
io(input, encoding)
|
34
|
+
else
|
35
|
+
memory(input, encoding)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
###
|
40
|
+
# :call-seq:
|
41
|
+
# io(input)
|
42
|
+
# io(input, encoding)
|
43
|
+
#
|
44
|
+
# Create a parser context for an +input+ IO which will assume +encoding+
|
45
|
+
#
|
46
|
+
# [Parameters]
|
47
|
+
# - +io+ (IO) The readable IO object from which to read input
|
48
|
+
# - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
|
49
|
+
# encoding to use (default +nil+, encoding will be autodetected)
|
50
|
+
#
|
51
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
52
|
+
#
|
53
|
+
# 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse
|
54
|
+
# methods which are more convenient for most use cases.
|
55
|
+
#
|
56
|
+
def io(input, encoding = nil)
|
57
|
+
native_io(input, resolve_encoding(encoding))
|
58
|
+
end
|
59
|
+
|
60
|
+
###
|
61
|
+
# :call-seq:
|
62
|
+
# memory(input)
|
63
|
+
# memory(input, encoding)
|
64
|
+
#
|
65
|
+
# Create a parser context for the +input+ String.
|
66
|
+
#
|
67
|
+
# [Parameters]
|
68
|
+
# - +input+ (String) The input string to be parsed.
|
69
|
+
# - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
|
70
|
+
# use (default +nil+, encoding will be autodetected)
|
71
|
+
#
|
72
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
73
|
+
#
|
74
|
+
# 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse methods
|
75
|
+
# which are more convenient for most use cases.
|
76
|
+
#
|
77
|
+
def memory(input, encoding = nil)
|
78
|
+
native_memory(input, resolve_encoding(encoding))
|
79
|
+
end
|
80
|
+
|
81
|
+
###
|
82
|
+
# :call-seq:
|
83
|
+
# file(path)
|
84
|
+
# file(path, encoding)
|
85
|
+
#
|
86
|
+
# Create a parser context for the file at +path+.
|
87
|
+
#
|
88
|
+
# [Parameters]
|
89
|
+
# - +path+ (String) The path to the input file
|
90
|
+
# - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
|
91
|
+
# use (default +nil+, encoding will be autodetected)
|
92
|
+
#
|
93
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
94
|
+
#
|
95
|
+
# 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser.parse_file which
|
96
|
+
# is more convenient for most use cases.
|
97
|
+
def file(input, encoding = nil)
|
98
|
+
native_file(input, resolve_encoding(encoding))
|
99
|
+
end
|
100
|
+
|
101
|
+
private def resolve_encoding(encoding)
|
102
|
+
case encoding
|
103
|
+
when Encoding
|
104
|
+
encoding
|
105
|
+
|
106
|
+
when nil
|
107
|
+
nil # totally fine, parser will guess encoding
|
108
|
+
|
109
|
+
when Integer
|
110
|
+
warn("Passing an integer to Nokogiri::XML::SAX::ParserContext.io is deprecated. Use an Encoding object instead. This will become an error in a future release.", uplevel: 2, category: :deprecated)
|
111
|
+
|
112
|
+
return nil if encoding == Parser::ENCODINGS["NONE"]
|
113
|
+
|
114
|
+
encoding = Parser::REVERSE_ENCODINGS[encoding]
|
115
|
+
raise ArgumentError, "Invalid libxml2 encoding id #{encoding}" if encoding.nil?
|
116
|
+
Encoding.find(encoding)
|
117
|
+
|
118
|
+
when String
|
119
|
+
Encoding.find(encoding)
|
120
|
+
|
121
|
+
else
|
122
|
+
raise ArgumentError, "Cannot resolve #{encoding.inspect} to an Encoding"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# PushParser can parse a document that is fed to it manually. It
|
8
|
+
# must be given a SAX::Document object which will be called with
|
9
|
+
# SAX events as the document is being parsed.
|
10
|
+
#
|
11
|
+
# Calling PushParser#<< writes XML to the parser, calling any SAX
|
12
|
+
# callbacks it can.
|
13
|
+
#
|
14
|
+
# PushParser#finish tells the parser that the document is finished
|
15
|
+
# and calls the end_document SAX method.
|
16
|
+
#
|
17
|
+
# Example:
|
18
|
+
#
|
19
|
+
# parser = PushParser.new(Class.new(XML::SAX::Document) {
|
20
|
+
# def start_document
|
21
|
+
# puts "start document called"
|
22
|
+
# end
|
23
|
+
# }.new)
|
24
|
+
# parser << "<div>hello<"
|
25
|
+
# parser << "/div>"
|
26
|
+
# parser.finish
|
27
|
+
class PushParser
|
28
|
+
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
29
|
+
# operating
|
30
|
+
attr_accessor :document
|
31
|
+
|
32
|
+
###
|
33
|
+
# Create a new PushParser with +doc+ as the SAX Document, providing
|
34
|
+
# an optional +file_name+ and +encoding+
|
35
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
36
|
+
@document = doc
|
37
|
+
@encoding = encoding
|
38
|
+
@sax_parser = XML::SAX::Parser.new(doc)
|
39
|
+
|
40
|
+
## Create our push parser context
|
41
|
+
initialize_native(@sax_parser, file_name)
|
42
|
+
end
|
43
|
+
|
44
|
+
###
|
45
|
+
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
46
|
+
# that can be called will be called immediately.
|
47
|
+
def write(chunk, last_chunk = false)
|
48
|
+
native_write(chunk, last_chunk)
|
49
|
+
end
|
50
|
+
alias_method :<<, :write
|
51
|
+
|
52
|
+
###
|
53
|
+
# Finish the parsing. This method is only necessary for
|
54
|
+
# Nokogiri::XML::SAX::Document#end_document to be called.
|
55
|
+
#
|
56
|
+
# ⚠ Note that empty documents are treated as an error when using the libxml2-based
|
57
|
+
# implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby).
|
58
|
+
def finish
|
59
|
+
write("", true)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
###
|
6
|
+
# SAX Parsers are event-driven parsers.
|
7
|
+
#
|
8
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
9
|
+
# feels necessary, and a parser that you explicitly feed XML in chunks. If you want to let
|
10
|
+
# Nokogiri deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have
|
11
|
+
# fine grain control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
12
|
+
#
|
13
|
+
# If you want to do SAX style parsing of HTML, check out Nokogiri::HTML4::SAX.
|
14
|
+
#
|
15
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
16
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
17
|
+
# you when it encounters events you said you would like to know about.
|
18
|
+
#
|
19
|
+
# To register for events, subclass Nokogiri::XML::SAX::Document and implement the methods for
|
20
|
+
# which you would like notification.
|
21
|
+
#
|
22
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
23
|
+
# would write a class like this:
|
24
|
+
#
|
25
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
26
|
+
# def end_document
|
27
|
+
# puts "the document has ended"
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# def start_element name, attributes = []
|
31
|
+
# puts "#{name} started"
|
32
|
+
# end
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
36
|
+
#
|
37
|
+
# # Create a new parser
|
38
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
|
39
|
+
#
|
40
|
+
# # Feed the parser some XML
|
41
|
+
# parser.parse(File.open(ARGV[0]))
|
42
|
+
#
|
43
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
44
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
45
|
+
#
|
46
|
+
module SAX
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
require_relative "sax/document"
|
52
|
+
require_relative "sax/parser_context"
|
53
|
+
require_relative "sax/parser"
|
54
|
+
require_relative "sax/push_parser"
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class << self
|
6
|
+
# :call-seq:
|
7
|
+
# Schema(input) → Nokogiri::XML::Schema
|
8
|
+
# Schema(input, parse_options) → Nokogiri::XML::Schema
|
9
|
+
#
|
10
|
+
# Convenience method for Nokogiri::XML::Schema.new
|
11
|
+
def Schema(...)
|
12
|
+
Schema.new(...)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Nokogiri::XML::Schema is used for validating \XML against an \XSD schema definition.
|
17
|
+
#
|
18
|
+
# ⚠ Since v1.11.0, Schema treats inputs as *untrusted* by default, and so external entities are
|
19
|
+
# not resolved from the network (+http://+ or +ftp://+). When parsing a trusted document, the
|
20
|
+
# caller may turn off the +NONET+ option via the ParseOptions to (re-)enable external entity
|
21
|
+
# resolution over a network connection.
|
22
|
+
#
|
23
|
+
# 🛡 Before v1.11.0, documents were "trusted" by default during schema parsing which was counter
|
24
|
+
# to Nokogiri's "untrusted by default" security policy.
|
25
|
+
#
|
26
|
+
# *Example:* Determine whether an \XML document is valid.
|
27
|
+
#
|
28
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
29
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
30
|
+
# schema.valid?(doc) # Boolean
|
31
|
+
#
|
32
|
+
# *Example:* Validate an \XML document against an \XSD schema, and capture any errors that are found.
|
33
|
+
#
|
34
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
35
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
36
|
+
# errors = schema.validate(doc) # Array<SyntaxError>
|
37
|
+
#
|
38
|
+
# *Example:* Validate an \XML document using a Document containing an \XSD schema definition.
|
39
|
+
#
|
40
|
+
# schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
|
41
|
+
# schema = Nokogiri::XML::Schema.from_document(schema_doc)
|
42
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
43
|
+
# schema.valid?(doc) # Boolean
|
44
|
+
#
|
45
|
+
class Schema
|
46
|
+
# The errors found while parsing the \XSD
|
47
|
+
#
|
48
|
+
# [Returns] Array<Nokogiri::XML::SyntaxError>
|
49
|
+
attr_accessor :errors
|
50
|
+
|
51
|
+
# The options used to parse the schema
|
52
|
+
#
|
53
|
+
# [Returns] Nokogiri::XML::ParseOptions
|
54
|
+
attr_accessor :parse_options
|
55
|
+
|
56
|
+
# :call-seq:
|
57
|
+
# new(input) → Nokogiri::XML::Schema
|
58
|
+
# new(input, parse_options) → Nokogiri::XML::Schema
|
59
|
+
#
|
60
|
+
# Parse an \XSD schema definition from a String or IO to create a new Nokogiri::XML::Schema
|
61
|
+
#
|
62
|
+
# [Parameters]
|
63
|
+
# - +input+ (String | IO) \XSD schema definition
|
64
|
+
# - +parse_options+ (Nokogiri::XML::ParseOptions)
|
65
|
+
# Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
|
66
|
+
#
|
67
|
+
# [Returns] Nokogiri::XML::Schema
|
68
|
+
#
|
69
|
+
def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, parse_options: parse_options_)
|
70
|
+
from_document(Nokogiri::XML::Document.parse(input), parse_options)
|
71
|
+
end
|
72
|
+
|
73
|
+
# :call-seq:
|
74
|
+
# read_memory(input) → Nokogiri::XML::Schema
|
75
|
+
# read_memory(input, parse_options) → Nokogiri::XML::Schema
|
76
|
+
#
|
77
|
+
# Convenience method for Nokogiri::XML::Schema.new
|
78
|
+
def self.read_memory(...)
|
79
|
+
# TODO deprecate this method
|
80
|
+
new(...)
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
# :call-seq: validate(input) → Array<SyntaxError>
|
85
|
+
#
|
86
|
+
# Validate +input+ and return any errors that are found.
|
87
|
+
#
|
88
|
+
# [Parameters]
|
89
|
+
# - +input+ (Nokogiri::XML::Document | String)
|
90
|
+
# A parsed document, or a string containing a local filename.
|
91
|
+
#
|
92
|
+
# [Returns] Array<SyntaxError>
|
93
|
+
#
|
94
|
+
# *Example:* Validate an existing XML::Document, and capture any errors that are found.
|
95
|
+
#
|
96
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
97
|
+
# errors = schema.validate(document)
|
98
|
+
#
|
99
|
+
# *Example:* Validate an \XML document on disk, and capture any errors that are found.
|
100
|
+
#
|
101
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
102
|
+
# errors = schema.validate("/path/to/file.xml")
|
103
|
+
#
|
104
|
+
def validate(input)
|
105
|
+
if input.is_a?(Nokogiri::XML::Document)
|
106
|
+
validate_document(input)
|
107
|
+
elsif File.file?(input)
|
108
|
+
validate_file(input)
|
109
|
+
else
|
110
|
+
raise ArgumentError, "Must provide Nokogiri::XML::Document or the name of an existing file"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# :call-seq: valid?(input) → Boolean
|
116
|
+
#
|
117
|
+
# Validate +input+ and return a Boolean indicating whether the document is valid
|
118
|
+
#
|
119
|
+
# [Parameters]
|
120
|
+
# - +input+ (Nokogiri::XML::Document | String)
|
121
|
+
# A parsed document, or a string containing a local filename.
|
122
|
+
#
|
123
|
+
# [Returns] Boolean
|
124
|
+
#
|
125
|
+
# *Example:* Validate an existing XML::Document
|
126
|
+
#
|
127
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
128
|
+
# return unless schema.valid?(document)
|
129
|
+
#
|
130
|
+
# *Example:* Validate an \XML document on disk
|
131
|
+
#
|
132
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
133
|
+
# return unless schema.valid?("/path/to/file.xml")
|
134
|
+
#
|
135
|
+
def valid?(input)
|
136
|
+
validate(input).empty?
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|