nokogiri 1.18.0.rc1-aarch64-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,199 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# This parser is a SAX style parser that reads its input as it deems necessary. The parser
|
8
|
+
# takes a Nokogiri::XML::SAX::Document, an optional encoding, then given an XML input, sends
|
9
|
+
# messages to the Nokogiri::XML::SAX::Document.
|
10
|
+
#
|
11
|
+
# Here is an example of using this parser:
|
12
|
+
#
|
13
|
+
# # Create a subclass of Nokogiri::XML::SAX::Document and implement
|
14
|
+
# # the events we care about:
|
15
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
16
|
+
# def start_element name, attrs = []
|
17
|
+
# puts "starting: #{name}"
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# def end_element name
|
21
|
+
# puts "ending: #{name}"
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
|
26
|
+
#
|
27
|
+
# # Hand an IO object to the parser, which will read the XML from the IO.
|
28
|
+
# File.open(path_to_xml) do |f|
|
29
|
+
# parser.parse(f)
|
30
|
+
# end
|
31
|
+
#
|
32
|
+
# For more information about \SAX parsers, see Nokogiri::XML::SAX.
|
33
|
+
#
|
34
|
+
# Also see Nokogiri::XML::SAX::Document for the available events.
|
35
|
+
#
|
36
|
+
# For \HTML documents, use the subclass Nokogiri::HTML4::SAX::Parser.
|
37
|
+
#
|
38
|
+
class Parser
|
39
|
+
# to dynamically resolve ParserContext in inherited methods
|
40
|
+
include Nokogiri::ClassResolver
|
41
|
+
|
42
|
+
# Structure used for marshalling attributes for some callbacks in XML::SAX::Document.
|
43
|
+
class Attribute < Struct.new(:localname, :prefix, :uri, :value)
|
44
|
+
end
|
45
|
+
|
46
|
+
ENCODINGS = { # :nodoc:
|
47
|
+
"NONE" => 0, # No char encoding detected
|
48
|
+
"UTF-8" => 1, # UTF-8
|
49
|
+
"UTF16LE" => 2, # UTF-16 little endian
|
50
|
+
"UTF16BE" => 3, # UTF-16 big endian
|
51
|
+
"UCS4LE" => 4, # UCS-4 little endian
|
52
|
+
"UCS4BE" => 5, # UCS-4 big endian
|
53
|
+
"EBCDIC" => 6, # EBCDIC uh!
|
54
|
+
"UCS4-2143" => 7, # UCS-4 unusual ordering
|
55
|
+
"UCS4-3412" => 8, # UCS-4 unusual ordering
|
56
|
+
"UCS2" => 9, # UCS-2
|
57
|
+
"ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
|
58
|
+
"ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
|
59
|
+
"ISO-8859-3" => 12, # ISO-8859-3
|
60
|
+
"ISO-8859-4" => 13, # ISO-8859-4
|
61
|
+
"ISO-8859-5" => 14, # ISO-8859-5
|
62
|
+
"ISO-8859-6" => 15, # ISO-8859-6
|
63
|
+
"ISO-8859-7" => 16, # ISO-8859-7
|
64
|
+
"ISO-8859-8" => 17, # ISO-8859-8
|
65
|
+
"ISO-8859-9" => 18, # ISO-8859-9
|
66
|
+
"ISO-2022-JP" => 19, # ISO-2022-JP
|
67
|
+
"SHIFT-JIS" => 20, # Shift_JIS
|
68
|
+
"EUC-JP" => 21, # EUC-JP
|
69
|
+
"ASCII" => 22, # pure ASCII
|
70
|
+
}
|
71
|
+
REVERSE_ENCODINGS = ENCODINGS.invert # :nodoc:
|
72
|
+
deprecate_constant :ENCODINGS
|
73
|
+
|
74
|
+
# The Nokogiri::XML::SAX::Document where events will be sent.
|
75
|
+
attr_accessor :document
|
76
|
+
|
77
|
+
# The encoding beings used for this document.
|
78
|
+
attr_accessor :encoding
|
79
|
+
|
80
|
+
###
|
81
|
+
# :call-seq:
|
82
|
+
# new ⇒ SAX::Parser
|
83
|
+
# new(handler) ⇒ SAX::Parser
|
84
|
+
# new(handler, encoding) ⇒ SAX::Parser
|
85
|
+
#
|
86
|
+
# Create a new Parser.
|
87
|
+
#
|
88
|
+
# [Parameters]
|
89
|
+
# - +handler+ (optional Nokogiri::XML::SAX::Document) The document that will receive
|
90
|
+
# events. Will create a new Nokogiri::XML::SAX::Document if not given, which is accessible
|
91
|
+
# through the #document attribute.
|
92
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
93
|
+
# parsing the input. (default +nil+ for auto-detection)
|
94
|
+
#
|
95
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = nil)
|
96
|
+
@encoding = encoding
|
97
|
+
@document = doc
|
98
|
+
@warned = false
|
99
|
+
|
100
|
+
initialize_native unless Nokogiri.jruby?
|
101
|
+
end
|
102
|
+
|
103
|
+
###
|
104
|
+
# :call-seq:
|
105
|
+
# parse(input) { |parser_context| ... }
|
106
|
+
#
|
107
|
+
# Parse the input, sending events to the SAX::Document at #document.
|
108
|
+
#
|
109
|
+
# [Parameters]
|
110
|
+
# - +input+ (String, IO) The input to parse.
|
111
|
+
#
|
112
|
+
# If +input+ quacks like a readable IO object, this method forwards to Parser.parse_io,
|
113
|
+
# otherwise it forwards to Parser.parse_memory.
|
114
|
+
#
|
115
|
+
# [Yields]
|
116
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
117
|
+
# to set options on the parser context before parsing begins.
|
118
|
+
#
|
119
|
+
def parse(input, &block)
|
120
|
+
if input.respond_to?(:read) && input.respond_to?(:close)
|
121
|
+
parse_io(input, &block)
|
122
|
+
else
|
123
|
+
parse_memory(input, &block)
|
124
|
+
end
|
125
|
+
end
|
126
|
+
|
127
|
+
###
|
128
|
+
# :call-seq:
|
129
|
+
# parse_io(io) { |parser_context| ... }
|
130
|
+
# parse_io(io, encoding) { |parser_context| ... }
|
131
|
+
#
|
132
|
+
# Parse an input stream.
|
133
|
+
#
|
134
|
+
# [Parameters]
|
135
|
+
# - +io+ (IO) The readable IO object from which to read input
|
136
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
137
|
+
# parsing the input, or +nil+ for auto-detection. (default #encoding)
|
138
|
+
#
|
139
|
+
# [Yields]
|
140
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
141
|
+
# to set options on the parser context before parsing begins.
|
142
|
+
#
|
143
|
+
def parse_io(io, encoding = @encoding)
|
144
|
+
ctx = related_class("ParserContext").io(io, encoding)
|
145
|
+
yield ctx if block_given?
|
146
|
+
ctx.parse_with(self)
|
147
|
+
end
|
148
|
+
|
149
|
+
###
|
150
|
+
# :call-seq:
|
151
|
+
# parse_memory(input) { |parser_context| ... }
|
152
|
+
# parse_memory(input, encoding) { |parser_context| ... }
|
153
|
+
#
|
154
|
+
# Parse an input string.
|
155
|
+
#
|
156
|
+
# [Parameters]
|
157
|
+
# - +input+ (String) The input string to be parsed.
|
158
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
159
|
+
# parsing the input, or +nil+ for auto-detection. (default #encoding)
|
160
|
+
#
|
161
|
+
# [Yields]
|
162
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
163
|
+
# to set options on the parser context before parsing begins.
|
164
|
+
#
|
165
|
+
def parse_memory(input, encoding = @encoding)
|
166
|
+
ctx = related_class("ParserContext").memory(input, encoding)
|
167
|
+
yield ctx if block_given?
|
168
|
+
ctx.parse_with(self)
|
169
|
+
end
|
170
|
+
|
171
|
+
###
|
172
|
+
# :call-seq:
|
173
|
+
# parse_file(filename) { |parser_context| ... }
|
174
|
+
# parse_file(filename, encoding) { |parser_context| ... }
|
175
|
+
#
|
176
|
+
# Parse a file.
|
177
|
+
#
|
178
|
+
# [Parameters]
|
179
|
+
# - +filename+ (String) The path to the file to be parsed.
|
180
|
+
# - +encoding+ (optional Encoding, String, nil) An Encoding or encoding name to use when
|
181
|
+
# parsing the input, or +nil+ for auto-detection. (default #encoding)
|
182
|
+
#
|
183
|
+
# [Yields]
|
184
|
+
# If a block is given, the underlying ParserContext object will be yielded. This can be used
|
185
|
+
# to set options on the parser context before parsing begins.
|
186
|
+
#
|
187
|
+
def parse_file(filename, encoding = @encoding)
|
188
|
+
raise ArgumentError, "no filename provided" unless filename
|
189
|
+
raise Errno::ENOENT unless File.exist?(filename)
|
190
|
+
raise Errno::EISDIR if File.directory?(filename)
|
191
|
+
|
192
|
+
ctx = related_class("ParserContext").file(filename, encoding)
|
193
|
+
yield ctx if block_given?
|
194
|
+
ctx.parse_with(self)
|
195
|
+
end
|
196
|
+
end
|
197
|
+
end
|
198
|
+
end
|
199
|
+
end
|
@@ -0,0 +1,129 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# Context object to invoke the XML SAX parser on the SAX::Document handler.
|
8
|
+
#
|
9
|
+
# 💡 This class is usually not instantiated by the user. Use Nokogiri::XML::SAX::Parser
|
10
|
+
# instead.
|
11
|
+
class ParserContext
|
12
|
+
class << self
|
13
|
+
###
|
14
|
+
# :call-seq:
|
15
|
+
# new(input)
|
16
|
+
# new(input, encoding)
|
17
|
+
#
|
18
|
+
# Create a parser context for an IO or a String. This is a shorthand method for
|
19
|
+
# ParserContext.io and ParserContext.memory.
|
20
|
+
#
|
21
|
+
# [Parameters]
|
22
|
+
# - +input+ (IO, String) A String or a readable IO object
|
23
|
+
# - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
|
24
|
+
# encoding to use (default +nil+, encoding will be autodetected)
|
25
|
+
#
|
26
|
+
# If +input+ quacks like a readable IO object, this method forwards to ParserContext.io,
|
27
|
+
# otherwise it forwards to ParserContext.memory.
|
28
|
+
#
|
29
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
30
|
+
#
|
31
|
+
def new(input, encoding = nil)
|
32
|
+
if [:read, :close].all? { |x| input.respond_to?(x) }
|
33
|
+
io(input, encoding)
|
34
|
+
else
|
35
|
+
memory(input, encoding)
|
36
|
+
end
|
37
|
+
end
|
38
|
+
|
39
|
+
###
|
40
|
+
# :call-seq:
|
41
|
+
# io(input)
|
42
|
+
# io(input, encoding)
|
43
|
+
#
|
44
|
+
# Create a parser context for an +input+ IO which will assume +encoding+
|
45
|
+
#
|
46
|
+
# [Parameters]
|
47
|
+
# - +io+ (IO) The readable IO object from which to read input
|
48
|
+
# - +encoding+ (optional) (Encoding) The +Encoding+ to use, or the name of an
|
49
|
+
# encoding to use (default +nil+, encoding will be autodetected)
|
50
|
+
#
|
51
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
52
|
+
#
|
53
|
+
# 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse
|
54
|
+
# methods which are more convenient for most use cases.
|
55
|
+
#
|
56
|
+
def io(input, encoding = nil)
|
57
|
+
native_io(input, resolve_encoding(encoding))
|
58
|
+
end
|
59
|
+
|
60
|
+
###
|
61
|
+
# :call-seq:
|
62
|
+
# memory(input)
|
63
|
+
# memory(input, encoding)
|
64
|
+
#
|
65
|
+
# Create a parser context for the +input+ String.
|
66
|
+
#
|
67
|
+
# [Parameters]
|
68
|
+
# - +input+ (String) The input string to be parsed.
|
69
|
+
# - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
|
70
|
+
# use (default +nil+, encoding will be autodetected)
|
71
|
+
#
|
72
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
73
|
+
#
|
74
|
+
# 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser parse methods
|
75
|
+
# which are more convenient for most use cases.
|
76
|
+
#
|
77
|
+
def memory(input, encoding = nil)
|
78
|
+
native_memory(input, resolve_encoding(encoding))
|
79
|
+
end
|
80
|
+
|
81
|
+
###
|
82
|
+
# :call-seq:
|
83
|
+
# file(path)
|
84
|
+
# file(path, encoding)
|
85
|
+
#
|
86
|
+
# Create a parser context for the file at +path+.
|
87
|
+
#
|
88
|
+
# [Parameters]
|
89
|
+
# - +path+ (String) The path to the input file
|
90
|
+
# - +encoding+ (optional) (Encoding, String) The +Encoding+ to use, or the name of an encoding to
|
91
|
+
# use (default +nil+, encoding will be autodetected)
|
92
|
+
#
|
93
|
+
# [Returns] Nokogiri::XML::SAX::ParserContext
|
94
|
+
#
|
95
|
+
# 💡 Calling this method directly is discouraged. Use Nokogiri::XML::SAX::Parser.parse_file which
|
96
|
+
# is more convenient for most use cases.
|
97
|
+
def file(input, encoding = nil)
|
98
|
+
native_file(input, resolve_encoding(encoding))
|
99
|
+
end
|
100
|
+
|
101
|
+
private def resolve_encoding(encoding)
|
102
|
+
case encoding
|
103
|
+
when Encoding
|
104
|
+
encoding
|
105
|
+
|
106
|
+
when nil
|
107
|
+
nil # totally fine, parser will guess encoding
|
108
|
+
|
109
|
+
when Integer
|
110
|
+
warn("Passing an integer to Nokogiri::XML::SAX::ParserContext.io is deprecated. Use an Encoding object instead. This will become an error in a future release.", uplevel: 2, category: :deprecated)
|
111
|
+
|
112
|
+
return nil if encoding == Parser::ENCODINGS["NONE"]
|
113
|
+
|
114
|
+
encoding = Parser::REVERSE_ENCODINGS[encoding]
|
115
|
+
raise ArgumentError, "Invalid libxml2 encoding id #{encoding}" if encoding.nil?
|
116
|
+
Encoding.find(encoding)
|
117
|
+
|
118
|
+
when String
|
119
|
+
Encoding.find(encoding)
|
120
|
+
|
121
|
+
else
|
122
|
+
raise ArgumentError, "Cannot resolve #{encoding.inspect} to an Encoding"
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
126
|
+
end
|
127
|
+
end
|
128
|
+
end
|
129
|
+
end
|
@@ -0,0 +1,64 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# PushParser can parse a document that is fed to it manually. It
|
8
|
+
# must be given a SAX::Document object which will be called with
|
9
|
+
# SAX events as the document is being parsed.
|
10
|
+
#
|
11
|
+
# Calling PushParser#<< writes XML to the parser, calling any SAX
|
12
|
+
# callbacks it can.
|
13
|
+
#
|
14
|
+
# PushParser#finish tells the parser that the document is finished
|
15
|
+
# and calls the end_document SAX method.
|
16
|
+
#
|
17
|
+
# Example:
|
18
|
+
#
|
19
|
+
# parser = PushParser.new(Class.new(XML::SAX::Document) {
|
20
|
+
# def start_document
|
21
|
+
# puts "start document called"
|
22
|
+
# end
|
23
|
+
# }.new)
|
24
|
+
# parser << "<div>hello<"
|
25
|
+
# parser << "/div>"
|
26
|
+
# parser.finish
|
27
|
+
class PushParser
|
28
|
+
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
29
|
+
# operating
|
30
|
+
attr_accessor :document
|
31
|
+
|
32
|
+
###
|
33
|
+
# Create a new PushParser with +doc+ as the SAX Document, providing
|
34
|
+
# an optional +file_name+ and +encoding+
|
35
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
36
|
+
@document = doc
|
37
|
+
@encoding = encoding
|
38
|
+
@sax_parser = XML::SAX::Parser.new(doc)
|
39
|
+
|
40
|
+
## Create our push parser context
|
41
|
+
initialize_native(@sax_parser, file_name)
|
42
|
+
end
|
43
|
+
|
44
|
+
###
|
45
|
+
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
46
|
+
# that can be called will be called immediately.
|
47
|
+
def write(chunk, last_chunk = false)
|
48
|
+
native_write(chunk, last_chunk)
|
49
|
+
end
|
50
|
+
alias_method :<<, :write
|
51
|
+
|
52
|
+
###
|
53
|
+
# Finish the parsing. This method is only necessary for
|
54
|
+
# Nokogiri::XML::SAX::Document#end_document to be called.
|
55
|
+
#
|
56
|
+
# ⚠ Note that empty documents are treated as an error when using the libxml2-based
|
57
|
+
# implementation (CRuby), but are fine when using the Xerces-based implementation (JRuby).
|
58
|
+
def finish
|
59
|
+
write("", true)
|
60
|
+
end
|
61
|
+
end
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
@@ -0,0 +1,54 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
###
|
6
|
+
# SAX Parsers are event-driven parsers.
|
7
|
+
#
|
8
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
9
|
+
# feels necessary, and a parser that you explicitly feed XML in chunks. If you want to let
|
10
|
+
# Nokogiri deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have
|
11
|
+
# fine grain control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
12
|
+
#
|
13
|
+
# If you want to do SAX style parsing of HTML, check out Nokogiri::HTML4::SAX.
|
14
|
+
#
|
15
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
16
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
17
|
+
# you when it encounters events you said you would like to know about.
|
18
|
+
#
|
19
|
+
# To register for events, subclass Nokogiri::XML::SAX::Document and implement the methods for
|
20
|
+
# which you would like notification.
|
21
|
+
#
|
22
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
23
|
+
# would write a class like this:
|
24
|
+
#
|
25
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
26
|
+
# def end_document
|
27
|
+
# puts "the document has ended"
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# def start_element name, attributes = []
|
31
|
+
# puts "#{name} started"
|
32
|
+
# end
|
33
|
+
# end
|
34
|
+
#
|
35
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
36
|
+
#
|
37
|
+
# # Create a new parser
|
38
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyHandler.new)
|
39
|
+
#
|
40
|
+
# # Feed the parser some XML
|
41
|
+
# parser.parse(File.open(ARGV[0]))
|
42
|
+
#
|
43
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
44
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
45
|
+
#
|
46
|
+
module SAX
|
47
|
+
end
|
48
|
+
end
|
49
|
+
end
|
50
|
+
|
51
|
+
require_relative "sax/document"
|
52
|
+
require_relative "sax/parser_context"
|
53
|
+
require_relative "sax/parser"
|
54
|
+
require_relative "sax/push_parser"
|
@@ -0,0 +1,140 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class << self
|
6
|
+
# :call-seq:
|
7
|
+
# Schema(input) → Nokogiri::XML::Schema
|
8
|
+
# Schema(input, parse_options) → Nokogiri::XML::Schema
|
9
|
+
#
|
10
|
+
# Convenience method for Nokogiri::XML::Schema.new
|
11
|
+
def Schema(...)
|
12
|
+
Schema.new(...)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Nokogiri::XML::Schema is used for validating \XML against an \XSD schema definition.
|
17
|
+
#
|
18
|
+
# ⚠ Since v1.11.0, Schema treats inputs as *untrusted* by default, and so external entities are
|
19
|
+
# not resolved from the network (+http://+ or +ftp://+). When parsing a trusted document, the
|
20
|
+
# caller may turn off the +NONET+ option via the ParseOptions to (re-)enable external entity
|
21
|
+
# resolution over a network connection.
|
22
|
+
#
|
23
|
+
# 🛡 Before v1.11.0, documents were "trusted" by default during schema parsing which was counter
|
24
|
+
# to Nokogiri's "untrusted by default" security policy.
|
25
|
+
#
|
26
|
+
# *Example:* Determine whether an \XML document is valid.
|
27
|
+
#
|
28
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
29
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
30
|
+
# schema.valid?(doc) # Boolean
|
31
|
+
#
|
32
|
+
# *Example:* Validate an \XML document against an \XSD schema, and capture any errors that are found.
|
33
|
+
#
|
34
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
35
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
36
|
+
# errors = schema.validate(doc) # Array<SyntaxError>
|
37
|
+
#
|
38
|
+
# *Example:* Validate an \XML document using a Document containing an \XSD schema definition.
|
39
|
+
#
|
40
|
+
# schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
|
41
|
+
# schema = Nokogiri::XML::Schema.from_document(schema_doc)
|
42
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
43
|
+
# schema.valid?(doc) # Boolean
|
44
|
+
#
|
45
|
+
class Schema
|
46
|
+
# The errors found while parsing the \XSD
|
47
|
+
#
|
48
|
+
# [Returns] Array<Nokogiri::XML::SyntaxError>
|
49
|
+
attr_accessor :errors
|
50
|
+
|
51
|
+
# The options used to parse the schema
|
52
|
+
#
|
53
|
+
# [Returns] Nokogiri::XML::ParseOptions
|
54
|
+
attr_accessor :parse_options
|
55
|
+
|
56
|
+
# :call-seq:
|
57
|
+
# new(input) → Nokogiri::XML::Schema
|
58
|
+
# new(input, parse_options) → Nokogiri::XML::Schema
|
59
|
+
#
|
60
|
+
# Parse an \XSD schema definition from a String or IO to create a new Nokogiri::XML::Schema
|
61
|
+
#
|
62
|
+
# [Parameters]
|
63
|
+
# - +input+ (String | IO) \XSD schema definition
|
64
|
+
# - +parse_options+ (Nokogiri::XML::ParseOptions)
|
65
|
+
# Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA
|
66
|
+
#
|
67
|
+
# [Returns] Nokogiri::XML::Schema
|
68
|
+
#
|
69
|
+
def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, parse_options: parse_options_)
|
70
|
+
from_document(Nokogiri::XML::Document.parse(input), parse_options)
|
71
|
+
end
|
72
|
+
|
73
|
+
# :call-seq:
|
74
|
+
# read_memory(input) → Nokogiri::XML::Schema
|
75
|
+
# read_memory(input, parse_options) → Nokogiri::XML::Schema
|
76
|
+
#
|
77
|
+
# Convenience method for Nokogiri::XML::Schema.new
|
78
|
+
def self.read_memory(...)
|
79
|
+
# TODO deprecate this method
|
80
|
+
new(...)
|
81
|
+
end
|
82
|
+
|
83
|
+
#
|
84
|
+
# :call-seq: validate(input) → Array<SyntaxError>
|
85
|
+
#
|
86
|
+
# Validate +input+ and return any errors that are found.
|
87
|
+
#
|
88
|
+
# [Parameters]
|
89
|
+
# - +input+ (Nokogiri::XML::Document | String)
|
90
|
+
# A parsed document, or a string containing a local filename.
|
91
|
+
#
|
92
|
+
# [Returns] Array<SyntaxError>
|
93
|
+
#
|
94
|
+
# *Example:* Validate an existing XML::Document, and capture any errors that are found.
|
95
|
+
#
|
96
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
97
|
+
# errors = schema.validate(document)
|
98
|
+
#
|
99
|
+
# *Example:* Validate an \XML document on disk, and capture any errors that are found.
|
100
|
+
#
|
101
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
102
|
+
# errors = schema.validate("/path/to/file.xml")
|
103
|
+
#
|
104
|
+
def validate(input)
|
105
|
+
if input.is_a?(Nokogiri::XML::Document)
|
106
|
+
validate_document(input)
|
107
|
+
elsif File.file?(input)
|
108
|
+
validate_file(input)
|
109
|
+
else
|
110
|
+
raise ArgumentError, "Must provide Nokogiri::XML::Document or the name of an existing file"
|
111
|
+
end
|
112
|
+
end
|
113
|
+
|
114
|
+
#
|
115
|
+
# :call-seq: valid?(input) → Boolean
|
116
|
+
#
|
117
|
+
# Validate +input+ and return a Boolean indicating whether the document is valid
|
118
|
+
#
|
119
|
+
# [Parameters]
|
120
|
+
# - +input+ (Nokogiri::XML::Document | String)
|
121
|
+
# A parsed document, or a string containing a local filename.
|
122
|
+
#
|
123
|
+
# [Returns] Boolean
|
124
|
+
#
|
125
|
+
# *Example:* Validate an existing XML::Document
|
126
|
+
#
|
127
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
128
|
+
# return unless schema.valid?(document)
|
129
|
+
#
|
130
|
+
# *Example:* Validate an \XML document on disk
|
131
|
+
#
|
132
|
+
# schema = Nokogiri::XML::Schema.new(File.read(XSD_FILE))
|
133
|
+
# return unless schema.valid?("/path/to/file.xml")
|
134
|
+
#
|
135
|
+
def valid?(input)
|
136
|
+
validate(input).empty?
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
140
|
+
end
|