nokogiri 1.18.0-arm-linux-musl
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +39 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +486 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +274 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +27 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +321 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
# :nodoc: all
|
6
|
+
module PP
|
7
|
+
module Node
|
8
|
+
COLLECTIONS = [:attribute_nodes, :children]
|
9
|
+
|
10
|
+
def inspect
|
11
|
+
# handle the case where an exception is thrown during object construction
|
12
|
+
if respond_to?(:data_ptr?) && !data_ptr?
|
13
|
+
return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
|
14
|
+
end
|
15
|
+
|
16
|
+
attributes = inspect_attributes.reject do |x|
|
17
|
+
attribute = send(x)
|
18
|
+
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
19
|
+
rescue NoMethodError
|
20
|
+
true
|
21
|
+
end
|
22
|
+
attributes = if inspect_attributes.length == 1
|
23
|
+
send(attributes.first).inspect
|
24
|
+
else
|
25
|
+
attributes.map do |attribute|
|
26
|
+
"#{attribute}=#{send(attribute).inspect}"
|
27
|
+
end.join(" ")
|
28
|
+
end
|
29
|
+
"#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
|
30
|
+
end
|
31
|
+
|
32
|
+
def pretty_print(pp)
|
33
|
+
nice_name = self.class.name.split("::").last
|
34
|
+
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
35
|
+
pp.breakable
|
36
|
+
|
37
|
+
attrs = inspect_attributes.filter_map do |t|
|
38
|
+
[t, send(t)] if respond_to?(t)
|
39
|
+
end.find_all do |x|
|
40
|
+
if x.last
|
41
|
+
if COLLECTIONS.include?(x.first)
|
42
|
+
!x.last.empty?
|
43
|
+
else
|
44
|
+
true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
if inspect_attributes.length == 1
|
50
|
+
pp.pp(attrs.first.last)
|
51
|
+
else
|
52
|
+
pp.seplist(attrs) do |v|
|
53
|
+
if COLLECTIONS.include?(v.first)
|
54
|
+
pp.group(2, "#{v.first} = [", "]") do
|
55
|
+
pp.breakable
|
56
|
+
pp.seplist(v.last) do |item|
|
57
|
+
pp.pp(item)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
else
|
61
|
+
pp.text("#{v.first} = ")
|
62
|
+
pp.pp(v.last)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
pp.breakable
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
###
|
6
|
+
# The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
|
7
|
+
# call Nokogiri::XML::Reader#each to iterate over each node.
|
8
|
+
#
|
9
|
+
# Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
|
10
|
+
# Reader is given an \XML document, and yields nodes to an each block.
|
11
|
+
#
|
12
|
+
# The Reader parser might be good for when you need the speed and low memory usage of a \SAX
|
13
|
+
# parser, but do not want to write a SAX::Document handler.
|
14
|
+
#
|
15
|
+
# Here is an example of usage:
|
16
|
+
#
|
17
|
+
# reader = Nokogiri::XML::Reader.new <<~XML
|
18
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
19
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
20
|
+
# </x>
|
21
|
+
# XML
|
22
|
+
#
|
23
|
+
# reader.each do |node|
|
24
|
+
# # node is an instance of Nokogiri::XML::Reader
|
25
|
+
# puts node.name
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# âš Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
29
|
+
# document, you must parse the document again. It may be better to capture all information you
|
30
|
+
# need during a single iteration.
|
31
|
+
#
|
32
|
+
# âš libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
|
33
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
34
|
+
class Reader
|
35
|
+
include Enumerable
|
36
|
+
|
37
|
+
TYPE_NONE = 0
|
38
|
+
# Element node type
|
39
|
+
TYPE_ELEMENT = 1
|
40
|
+
# Attribute node type
|
41
|
+
TYPE_ATTRIBUTE = 2
|
42
|
+
# Text node type
|
43
|
+
TYPE_TEXT = 3
|
44
|
+
# CDATA node type
|
45
|
+
TYPE_CDATA = 4
|
46
|
+
# Entity Reference node type
|
47
|
+
TYPE_ENTITY_REFERENCE = 5
|
48
|
+
# Entity node type
|
49
|
+
TYPE_ENTITY = 6
|
50
|
+
# PI node type
|
51
|
+
TYPE_PROCESSING_INSTRUCTION = 7
|
52
|
+
# Comment node type
|
53
|
+
TYPE_COMMENT = 8
|
54
|
+
# Document node type
|
55
|
+
TYPE_DOCUMENT = 9
|
56
|
+
# Document Type node type
|
57
|
+
TYPE_DOCUMENT_TYPE = 10
|
58
|
+
# Document Fragment node type
|
59
|
+
TYPE_DOCUMENT_FRAGMENT = 11
|
60
|
+
# Notation node type
|
61
|
+
TYPE_NOTATION = 12
|
62
|
+
# Whitespace node type
|
63
|
+
TYPE_WHITESPACE = 13
|
64
|
+
# Significant Whitespace node type
|
65
|
+
TYPE_SIGNIFICANT_WHITESPACE = 14
|
66
|
+
# Element end node type
|
67
|
+
TYPE_END_ELEMENT = 15
|
68
|
+
# Entity end node type
|
69
|
+
TYPE_END_ENTITY = 16
|
70
|
+
# \XML Declaration node type
|
71
|
+
TYPE_XML_DECLARATION = 17
|
72
|
+
|
73
|
+
# A list of errors encountered while parsing
|
74
|
+
attr_accessor :errors
|
75
|
+
|
76
|
+
# The \XML source
|
77
|
+
attr_reader :source
|
78
|
+
|
79
|
+
alias_method :self_closing?, :empty_element?
|
80
|
+
|
81
|
+
# :call-seq:
|
82
|
+
# Reader.new(input) { |options| ... } → Reader
|
83
|
+
# Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
|
84
|
+
#
|
85
|
+
# Create a new Reader to parse an \XML document.
|
86
|
+
#
|
87
|
+
# [Required Parameters]
|
88
|
+
# - +input+ (String | IO): The \XML document to parse.
|
89
|
+
#
|
90
|
+
# [Optional Parameters]
|
91
|
+
# - +url:+ (String) The base URL of the document.
|
92
|
+
# - +encoding:+ (String) The name of the encoding of the document.
|
93
|
+
# - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
|
94
|
+
# Defaults to +ParseOptions::STRICT+.
|
95
|
+
#
|
96
|
+
# [Yields]
|
97
|
+
# If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
|
98
|
+
# the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
|
99
|
+
def self.new(
|
100
|
+
string_or_io,
|
101
|
+
url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
|
102
|
+
url: url_, encoding: encoding_, options: options_
|
103
|
+
)
|
104
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
105
|
+
yield options if block_given?
|
106
|
+
|
107
|
+
if string_or_io.respond_to?(:read)
|
108
|
+
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
110
|
+
|
111
|
+
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
112
|
+
end
|
113
|
+
|
114
|
+
private def initialize(source, url = nil, encoding = nil) # :nodoc:
|
115
|
+
@source = source
|
116
|
+
@errors = []
|
117
|
+
@encoding = encoding
|
118
|
+
end
|
119
|
+
|
120
|
+
# Get the attributes and namespaces of the current node as a Hash.
|
121
|
+
#
|
122
|
+
# This is the union of Reader#attribute_hash and Reader#namespaces
|
123
|
+
#
|
124
|
+
# [Returns]
|
125
|
+
# (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
|
126
|
+
def attributes
|
127
|
+
attribute_hash.merge(namespaces)
|
128
|
+
end
|
129
|
+
|
130
|
+
###
|
131
|
+
# Move the cursor through the document yielding the cursor to the block
|
132
|
+
def each
|
133
|
+
while (cursor = read)
|
134
|
+
yield cursor
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class << self
|
6
|
+
# :call-seq:
|
7
|
+
# RelaxNG(input) → Nokogiri::XML::RelaxNG
|
8
|
+
# RelaxNG(input, options:) → Nokogiri::XML::RelaxNG
|
9
|
+
#
|
10
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new
|
11
|
+
def RelaxNG(...)
|
12
|
+
RelaxNG.new(...)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Nokogiri::XML::RelaxNG is used for validating \XML against a RELAX NG schema definition.
|
17
|
+
#
|
18
|
+
# 🛡 <b>Do not use this class for untrusted schema documents.</b> RELAX NG input is always
|
19
|
+
# treated as *trusted*, meaning that the underlying parsing libraries <b>will access network
|
20
|
+
# resources</b>. This is counter to Nokogiri's "untrusted by default" security policy, but is an
|
21
|
+
# unfortunate limitation of the underlying libraries.
|
22
|
+
#
|
23
|
+
# *Example:* Determine whether an \XML document is valid.
|
24
|
+
#
|
25
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.read(RELAX_NG_FILE))
|
26
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
27
|
+
# schema.valid?(doc) # Boolean
|
28
|
+
#
|
29
|
+
# *Example:* Validate an \XML document against a \RelaxNG schema, and capture any errors that are found.
|
30
|
+
#
|
31
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.open(RELAX_NG_FILE))
|
32
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
33
|
+
# errors = schema.validate(doc) # Array<SyntaxError>
|
34
|
+
#
|
35
|
+
# *Example:* Validate an \XML document using a Document containing a RELAX NG schema definition.
|
36
|
+
#
|
37
|
+
# schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
|
38
|
+
# schema = Nokogiri::XML::RelaxNG.from_document(schema_doc)
|
39
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
40
|
+
# schema.valid?(doc) # Boolean
|
41
|
+
#
|
42
|
+
class RelaxNG < Nokogiri::XML::Schema
|
43
|
+
# :call-seq:
|
44
|
+
# new(input) → Nokogiri::XML::RelaxNG
|
45
|
+
# new(input, options:) → Nokogiri::XML::RelaxNG
|
46
|
+
#
|
47
|
+
# Parse a RELAX NG schema definition from a String or IO to create a new Nokogiri::XML::RelaxNG.
|
48
|
+
#
|
49
|
+
# [Parameters]
|
50
|
+
# - +input+ (String | IO) RELAX NG schema definition
|
51
|
+
# - +options:+ (Nokogiri::XML::ParseOptions)
|
52
|
+
# Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA âš Unused
|
53
|
+
#
|
54
|
+
# [Returns] Nokogiri::XML::RelaxNG
|
55
|
+
#
|
56
|
+
# âš +parse_options+ is currently unused by this method and is present only as a placeholder for
|
57
|
+
# future functionality.
|
58
|
+
#
|
59
|
+
# Also see convenience method Nokogiri::XML::RelaxNG()
|
60
|
+
def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, options: parse_options_)
|
61
|
+
from_document(Nokogiri::XML::Document.parse(input), options)
|
62
|
+
end
|
63
|
+
|
64
|
+
# :call-seq:
|
65
|
+
# read_memory(input) → Nokogiri::XML::RelaxNG
|
66
|
+
# read_memory(input, options:) → Nokogiri::XML::RelaxNG
|
67
|
+
#
|
68
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new.
|
69
|
+
def self.read_memory(...)
|
70
|
+
# TODO deprecate this method
|
71
|
+
new(...)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,258 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
# :markup: markdown
|
7
|
+
#
|
8
|
+
# The SAX::Document class is used for registering types of events you are interested in
|
9
|
+
# handling. All of the methods on this class are available as possible events while parsing an
|
10
|
+
# \XML document. To register for any particular event, subclass this class and implement the
|
11
|
+
# methods you are interested in knowing about.
|
12
|
+
#
|
13
|
+
# To only be notified about start and end element events, write a class like this:
|
14
|
+
#
|
15
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
16
|
+
# def start_element name, attrs = []
|
17
|
+
# puts "#{name} started!"
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# def end_element name
|
21
|
+
# puts "#{name} ended"
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# You can use this event handler for any SAX-style parser included with Nokogiri.
|
26
|
+
#
|
27
|
+
# See also:
|
28
|
+
#
|
29
|
+
# - Nokogiri::XML::SAX
|
30
|
+
# - Nokogiri::HTML4::SAX
|
31
|
+
#
|
32
|
+
# ### Entity Handling
|
33
|
+
#
|
34
|
+
# âš Entity handling is complicated in a SAX parser! Please read this section carefully if
|
35
|
+
# you're not getting the behavior you expect.
|
36
|
+
#
|
37
|
+
# Entities will be reported to the user via callbacks to #characters, to #reference, or
|
38
|
+
# possibly to both. The behavior is determined by a combination of _entity type_ and the value
|
39
|
+
# of ParserContext#replace_entities. (Recall that the default value of
|
40
|
+
# ParserContext#replace_entities is `false`.)
|
41
|
+
#
|
42
|
+
# âš <b>It is UNSAFE to set ParserContext#replace_entities to `true`</b> when parsing untrusted
|
43
|
+
# documents.
|
44
|
+
#
|
45
|
+
# 💡 For more information on entity types, see [Wikipedia's page on
|
46
|
+
# DTDs](https://en.wikipedia.org/wiki/Document_type_definition#Entity_declarations).
|
47
|
+
#
|
48
|
+
# | Entity type | #characters | #reference |
|
49
|
+
# |--------------------------------------|------------------------------------|-------------------------------------|
|
50
|
+
# | Char ref (e.g., <tt>’</tt>) | always | never |
|
51
|
+
# | Predefined (e.g., <tt>&</tt>) | always | never |
|
52
|
+
# | Undeclared †| never | <tt>#replace_entities == false</tt> |
|
53
|
+
# | Internal | always | <tt>#replace_entities == false</tt> |
|
54
|
+
# | External †| <tt>#replace_entities == true</tt> | <tt>#replace_entities == false</tt> |
|
55
|
+
#
|
56
|
+
#
|
57
|
+
#
|
58
|
+
# †In the case where the replacement text for the entity is unknown (e.g., an undeclared entity
|
59
|
+
# or an external entity that could not be resolved because of network issues), then the
|
60
|
+
# replacement text will not be reported. If ParserContext#replace_entities is `true`, this
|
61
|
+
# means the #characters callback will not be invoked. If ParserContext#replace_entities is
|
62
|
+
# `false`, then the #reference callback will be invoked, but with `nil` for the `content`
|
63
|
+
# argument.
|
64
|
+
#
|
65
|
+
class Document
|
66
|
+
###
|
67
|
+
# Called when an \XML declaration is parsed.
|
68
|
+
#
|
69
|
+
# [Parameters]
|
70
|
+
# - +version+ (String) the version attribute
|
71
|
+
# - +encoding+ (String, nil) the encoding of the document if present, else +nil+
|
72
|
+
# - +standalone+ ("yes", "no", nil) the standalone attribute if present, else +nil+
|
73
|
+
def xmldecl(version, encoding, standalone)
|
74
|
+
end
|
75
|
+
|
76
|
+
###
|
77
|
+
# Called when document starts parsing.
|
78
|
+
def start_document
|
79
|
+
end
|
80
|
+
|
81
|
+
###
|
82
|
+
# Called when document ends parsing.
|
83
|
+
def end_document
|
84
|
+
end
|
85
|
+
|
86
|
+
###
|
87
|
+
# Called at the beginning of an element.
|
88
|
+
#
|
89
|
+
# [Parameters]
|
90
|
+
# - +name+ (String) the name of the element
|
91
|
+
# - +attrs+ (Array<Array<String>>) an assoc list of namespace declarations and attributes, e.g.:
|
92
|
+
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
93
|
+
#
|
94
|
+
# 💡If you're dealing with XML and need to handle namespaces, use the
|
95
|
+
# #start_element_namespace method instead.
|
96
|
+
#
|
97
|
+
# Note that the element namespace and any attribute namespaces are not provided, and so any
|
98
|
+
# namespaced elements or attributes will be returned as strings including the prefix:
|
99
|
+
#
|
100
|
+
# parser.parse(<<~XML)
|
101
|
+
# <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
|
102
|
+
# <foo:bar foo:quux="xxx">hello world</foo:bar>
|
103
|
+
# </root>
|
104
|
+
# XML
|
105
|
+
#
|
106
|
+
# assert_pattern do
|
107
|
+
# parser.document.start_elements => [
|
108
|
+
# ["root", [["xmlns:foo", "http://foo.example.com/"], ["xmlns", "http://example.com/"]]],
|
109
|
+
# ["foo:bar", [["foo:quux", "xxx"]]],
|
110
|
+
# ]
|
111
|
+
# end
|
112
|
+
#
|
113
|
+
def start_element(name, attrs = [])
|
114
|
+
end
|
115
|
+
|
116
|
+
###
|
117
|
+
# Called at the end of an element.
|
118
|
+
#
|
119
|
+
# [Parameters]
|
120
|
+
# - +name+ (String) the name of the element being closed
|
121
|
+
#
|
122
|
+
def end_element(name)
|
123
|
+
end
|
124
|
+
|
125
|
+
###
|
126
|
+
# Called at the beginning of an element.
|
127
|
+
#
|
128
|
+
# [Parameters]
|
129
|
+
# - +name+ (String) is the name of the element
|
130
|
+
# - +attrs+ (Array<Attribute>) is an array of structs with the following properties:
|
131
|
+
# - +localname+ (String) the local name of the attribute
|
132
|
+
# - +value+ (String) the value of the attribute
|
133
|
+
# - +prefix+ (String, nil) the namespace prefix of the attribute
|
134
|
+
# - +uri+ (String, nil) the namespace URI of the attribute
|
135
|
+
# - +prefix+ (String, nil) is the namespace prefix for the element
|
136
|
+
# - +uri+ (String, nil) is the associated URI for the element's namespace
|
137
|
+
# - +ns+ (Array<Array<String, String>>) is an assoc list of namespace declarations on the element
|
138
|
+
#
|
139
|
+
# 💡If you're dealing with HTML or don't care about namespaces, try #start_element instead.
|
140
|
+
#
|
141
|
+
# [Example]
|
142
|
+
# it "start_elements_namespace is called with namespaced attributes" do
|
143
|
+
# parser.parse(<<~XML)
|
144
|
+
# <root xmlns:foo='http://foo.example.com/'>
|
145
|
+
# <foo:a foo:bar='hello' />
|
146
|
+
# </root>
|
147
|
+
# XML
|
148
|
+
#
|
149
|
+
# assert_pattern do
|
150
|
+
# parser.document.start_elements_namespace => [
|
151
|
+
# [
|
152
|
+
# "root",
|
153
|
+
# [],
|
154
|
+
# nil, nil,
|
155
|
+
# [["foo", "http://foo.example.com/"]], # namespace declarations
|
156
|
+
# ], [
|
157
|
+
# "a",
|
158
|
+
# [Nokogiri::XML::SAX::Parser::Attribute(localname: "bar", prefix: "foo", uri: "http://foo.example.com/", value: "hello")], # prefixed attribute
|
159
|
+
# "foo", "http://foo.example.com/", # prefix and uri for the "a" element
|
160
|
+
# [],
|
161
|
+
# ]
|
162
|
+
# ]
|
163
|
+
# end
|
164
|
+
# end
|
165
|
+
#
|
166
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
167
|
+
# Deal with SAX v1 interface
|
168
|
+
name = [prefix, name].compact.join(":")
|
169
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
170
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
171
|
+
end + attrs.map do |attr|
|
172
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
173
|
+
end
|
174
|
+
start_element(name, attributes)
|
175
|
+
end
|
176
|
+
|
177
|
+
###
|
178
|
+
# Called at the end of an element.
|
179
|
+
#
|
180
|
+
# [Parameters]
|
181
|
+
# - +name+ (String) is the name of the element
|
182
|
+
# - +prefix+ (String, nil) is the namespace prefix for the element
|
183
|
+
# - +uri+ (String, nil) is the associated URI for the element's namespace
|
184
|
+
#
|
185
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
186
|
+
# Deal with SAX v1 interface
|
187
|
+
end_element([prefix, name].compact.join(":"))
|
188
|
+
end
|
189
|
+
|
190
|
+
###
|
191
|
+
# Called when character data is parsed, and for parsed entities when
|
192
|
+
# ParserContext#replace_entities is +true+.
|
193
|
+
#
|
194
|
+
# [Parameters]
|
195
|
+
# - +string+ contains the character data or entity replacement text
|
196
|
+
#
|
197
|
+
# âš Please see Document@Entity+Handling for important information about how entities are handled.
|
198
|
+
#
|
199
|
+
# âš This method might be called multiple times for a contiguous string of characters.
|
200
|
+
#
|
201
|
+
def characters(string)
|
202
|
+
end
|
203
|
+
|
204
|
+
###
|
205
|
+
# Called when a parsed entity is referenced and not replaced.
|
206
|
+
#
|
207
|
+
# [Parameters]
|
208
|
+
# - +name+ (String) is the name of the entity
|
209
|
+
# - +content+ (String, nil) is the replacement text for the entity, if known
|
210
|
+
#
|
211
|
+
# âš Please see Document@Entity+Handling for important information about how entities are handled.
|
212
|
+
#
|
213
|
+
# âš An internal entity may result in a call to both #characters and #reference.
|
214
|
+
#
|
215
|
+
# Since v1.17.0
|
216
|
+
#
|
217
|
+
def reference(name, content)
|
218
|
+
end
|
219
|
+
|
220
|
+
###
|
221
|
+
# Called when comments are encountered
|
222
|
+
# [Parameters]
|
223
|
+
# - +string+ contains the comment data
|
224
|
+
def comment(string)
|
225
|
+
end
|
226
|
+
|
227
|
+
###
|
228
|
+
# Called on document warnings
|
229
|
+
# [Parameters]
|
230
|
+
# - +string+ contains the warning
|
231
|
+
def warning(string)
|
232
|
+
end
|
233
|
+
|
234
|
+
###
|
235
|
+
# Called on document errors
|
236
|
+
# [Parameters]
|
237
|
+
# - +string+ contains the error
|
238
|
+
def error(string)
|
239
|
+
end
|
240
|
+
|
241
|
+
###
|
242
|
+
# Called when cdata blocks are found
|
243
|
+
# [Parameters]
|
244
|
+
# - +string+ contains the cdata content
|
245
|
+
def cdata_block(string)
|
246
|
+
end
|
247
|
+
|
248
|
+
###
|
249
|
+
# Called when processing instructions are found
|
250
|
+
# [Parameters]
|
251
|
+
# - +name+ is the target of the instruction
|
252
|
+
# - +content+ is the value of the instruction
|
253
|
+
def processing_instruction(name, content)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|