nokogiri 1.18.0.rc1-x86_64-linux-gnu
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +293 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +42 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1173 -0
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +336 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +171 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +115 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +82 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +244 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +166 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +41 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +251 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +98 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1390 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +671 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +219 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +959 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +143 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +87 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1382 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +106 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +477 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +136 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +438 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +962 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +188 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +436 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +215 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +102 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +249 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +366 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +347 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +489 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +579 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +633 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +138 -0
- data/ext/nokogiri/include/libxslt/attributes.h +39 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1995 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +146 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +44 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +343 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +294 -0
- data/ext/nokogiri/nokogiri.h +238 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +62 -0
- data/ext/nokogiri/xml_comment.c +57 -0
- data/ext/nokogiri/xml_document.c +784 -0
- data/ext/nokogiri/xml_document_fragment.c +29 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +131 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +112 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +181 -0
- data/ext/nokogiri/xml_node.c +2459 -0
- data/ext/nokogiri/xml_node_set.c +518 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +777 -0
- data/ext/nokogiri/xml_relax_ng.c +149 -0
- data/ext/nokogiri/xml_sax_parser.c +403 -0
- data/ext/nokogiri/xml_sax_parser_context.c +390 -0
- data/ext/nokogiri/xml_sax_push_parser.c +206 -0
- data/ext/nokogiri/xml_schema.c +226 -0
- data/ext/nokogiri/xml_syntax_error.c +93 -0
- data/ext/nokogiri/xml_text.c +59 -0
- data/ext/nokogiri/xml_xpath_context.c +502 -0
- data/ext/nokogiri/xslt_stylesheet.c +421 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/3.3/nokogiri.so +0 -0
- data/lib/nokogiri/3.4/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +58 -0
- data/lib/nokogiri/css/parser.rb +772 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +36 -0
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +57 -0
- data/lib/nokogiri/css/xpath_visitor.rb +375 -0
- data/lib/nokogiri/css.rb +132 -0
- data/lib/nokogiri/decorators/slop.rb +42 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +22 -0
- data/lib/nokogiri/xml/builder.rb +494 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +514 -0
- data/lib/nokogiri/xml/document_fragment.rb +276 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +46 -0
- data/lib/nokogiri/xml/element_decl.rb +17 -0
- data/lib/nokogiri/xml/entity_decl.rb +23 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +57 -0
- data/lib/nokogiri/xml/node/save_options.rb +76 -0
- data/lib/nokogiri/xml/node.rb +1650 -0
- data/lib/nokogiri/xml/node_set.rb +449 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +73 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +139 -0
- data/lib/nokogiri/xml/relax_ng.rb +75 -0
- data/lib/nokogiri/xml/sax/document.rb +258 -0
- data/lib/nokogiri/xml/sax/parser.rb +199 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +129 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +64 -0
- data/lib/nokogiri/xml/sax.rb +54 -0
- data/lib/nokogiri/xml/schema.rb +140 -0
- data/lib/nokogiri/xml/searchable.rb +297 -0
- data/lib/nokogiri/xml/syntax_error.rb +94 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +49 -0
- data/lib/nokogiri/xml.rb +65 -0
- data/lib/nokogiri/xslt/stylesheet.rb +49 -0
- data/lib/nokogiri/xslt.rb +129 -0
- data/lib/nokogiri.rb +128 -0
- data/lib/xsd/xmlparser/nokogiri.rb +105 -0
- metadata +324 -0
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
# :nodoc: all
|
6
|
+
module PP
|
7
|
+
module Node
|
8
|
+
COLLECTIONS = [:attribute_nodes, :children]
|
9
|
+
|
10
|
+
def inspect
|
11
|
+
# handle the case where an exception is thrown during object construction
|
12
|
+
if respond_to?(:data_ptr?) && !data_ptr?
|
13
|
+
return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
|
14
|
+
end
|
15
|
+
|
16
|
+
attributes = inspect_attributes.reject do |x|
|
17
|
+
attribute = send(x)
|
18
|
+
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
19
|
+
rescue NoMethodError
|
20
|
+
true
|
21
|
+
end
|
22
|
+
attributes = if inspect_attributes.length == 1
|
23
|
+
send(attributes.first).inspect
|
24
|
+
else
|
25
|
+
attributes.map do |attribute|
|
26
|
+
"#{attribute}=#{send(attribute).inspect}"
|
27
|
+
end.join(" ")
|
28
|
+
end
|
29
|
+
"#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
|
30
|
+
end
|
31
|
+
|
32
|
+
def pretty_print(pp)
|
33
|
+
nice_name = self.class.name.split("::").last
|
34
|
+
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
35
|
+
pp.breakable
|
36
|
+
|
37
|
+
attrs = inspect_attributes.filter_map do |t|
|
38
|
+
[t, send(t)] if respond_to?(t)
|
39
|
+
end.find_all do |x|
|
40
|
+
if x.last
|
41
|
+
if COLLECTIONS.include?(x.first)
|
42
|
+
!x.last.empty?
|
43
|
+
else
|
44
|
+
true
|
45
|
+
end
|
46
|
+
end
|
47
|
+
end
|
48
|
+
|
49
|
+
if inspect_attributes.length == 1
|
50
|
+
pp.pp(attrs.first.last)
|
51
|
+
else
|
52
|
+
pp.seplist(attrs) do |v|
|
53
|
+
if COLLECTIONS.include?(v.first)
|
54
|
+
pp.group(2, "#{v.first} = [", "]") do
|
55
|
+
pp.breakable
|
56
|
+
pp.seplist(v.last) do |item|
|
57
|
+
pp.pp(item)
|
58
|
+
end
|
59
|
+
end
|
60
|
+
else
|
61
|
+
pp.text("#{v.first} = ")
|
62
|
+
pp.pp(v.last)
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
pp.breakable
|
68
|
+
end
|
69
|
+
end
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|
@@ -0,0 +1,139 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
###
|
6
|
+
# The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
|
7
|
+
# call Nokogiri::XML::Reader#each to iterate over each node.
|
8
|
+
#
|
9
|
+
# Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
|
10
|
+
# Reader is given an \XML document, and yields nodes to an each block.
|
11
|
+
#
|
12
|
+
# The Reader parser might be good for when you need the speed and low memory usage of a \SAX
|
13
|
+
# parser, but do not want to write a SAX::Document handler.
|
14
|
+
#
|
15
|
+
# Here is an example of usage:
|
16
|
+
#
|
17
|
+
# reader = Nokogiri::XML::Reader.new <<~XML
|
18
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
19
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
20
|
+
# </x>
|
21
|
+
# XML
|
22
|
+
#
|
23
|
+
# reader.each do |node|
|
24
|
+
# # node is an instance of Nokogiri::XML::Reader
|
25
|
+
# puts node.name
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# âš Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
29
|
+
# document, you must parse the document again. It may be better to capture all information you
|
30
|
+
# need during a single iteration.
|
31
|
+
#
|
32
|
+
# âš libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
|
33
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
34
|
+
class Reader
|
35
|
+
include Enumerable
|
36
|
+
|
37
|
+
TYPE_NONE = 0
|
38
|
+
# Element node type
|
39
|
+
TYPE_ELEMENT = 1
|
40
|
+
# Attribute node type
|
41
|
+
TYPE_ATTRIBUTE = 2
|
42
|
+
# Text node type
|
43
|
+
TYPE_TEXT = 3
|
44
|
+
# CDATA node type
|
45
|
+
TYPE_CDATA = 4
|
46
|
+
# Entity Reference node type
|
47
|
+
TYPE_ENTITY_REFERENCE = 5
|
48
|
+
# Entity node type
|
49
|
+
TYPE_ENTITY = 6
|
50
|
+
# PI node type
|
51
|
+
TYPE_PROCESSING_INSTRUCTION = 7
|
52
|
+
# Comment node type
|
53
|
+
TYPE_COMMENT = 8
|
54
|
+
# Document node type
|
55
|
+
TYPE_DOCUMENT = 9
|
56
|
+
# Document Type node type
|
57
|
+
TYPE_DOCUMENT_TYPE = 10
|
58
|
+
# Document Fragment node type
|
59
|
+
TYPE_DOCUMENT_FRAGMENT = 11
|
60
|
+
# Notation node type
|
61
|
+
TYPE_NOTATION = 12
|
62
|
+
# Whitespace node type
|
63
|
+
TYPE_WHITESPACE = 13
|
64
|
+
# Significant Whitespace node type
|
65
|
+
TYPE_SIGNIFICANT_WHITESPACE = 14
|
66
|
+
# Element end node type
|
67
|
+
TYPE_END_ELEMENT = 15
|
68
|
+
# Entity end node type
|
69
|
+
TYPE_END_ENTITY = 16
|
70
|
+
# \XML Declaration node type
|
71
|
+
TYPE_XML_DECLARATION = 17
|
72
|
+
|
73
|
+
# A list of errors encountered while parsing
|
74
|
+
attr_accessor :errors
|
75
|
+
|
76
|
+
# The \XML source
|
77
|
+
attr_reader :source
|
78
|
+
|
79
|
+
alias_method :self_closing?, :empty_element?
|
80
|
+
|
81
|
+
# :call-seq:
|
82
|
+
# Reader.new(input) { |options| ... } → Reader
|
83
|
+
# Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
|
84
|
+
#
|
85
|
+
# Create a new Reader to parse an \XML document.
|
86
|
+
#
|
87
|
+
# [Required Parameters]
|
88
|
+
# - +input+ (String | IO): The \XML document to parse.
|
89
|
+
#
|
90
|
+
# [Optional Parameters]
|
91
|
+
# - +url:+ (String) The base URL of the document.
|
92
|
+
# - +encoding:+ (String) The name of the encoding of the document.
|
93
|
+
# - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
|
94
|
+
# Defaults to +ParseOptions::STRICT+.
|
95
|
+
#
|
96
|
+
# [Yields]
|
97
|
+
# If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
|
98
|
+
# the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
|
99
|
+
def self.new(
|
100
|
+
string_or_io,
|
101
|
+
url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
|
102
|
+
url: url_, encoding: encoding_, options: options_
|
103
|
+
)
|
104
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
105
|
+
yield options if block_given?
|
106
|
+
|
107
|
+
if string_or_io.respond_to?(:read)
|
108
|
+
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
110
|
+
|
111
|
+
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
112
|
+
end
|
113
|
+
|
114
|
+
private def initialize(source, url = nil, encoding = nil) # :nodoc:
|
115
|
+
@source = source
|
116
|
+
@errors = []
|
117
|
+
@encoding = encoding
|
118
|
+
end
|
119
|
+
|
120
|
+
# Get the attributes and namespaces of the current node as a Hash.
|
121
|
+
#
|
122
|
+
# This is the union of Reader#attribute_hash and Reader#namespaces
|
123
|
+
#
|
124
|
+
# [Returns]
|
125
|
+
# (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
|
126
|
+
def attributes
|
127
|
+
attribute_hash.merge(namespaces)
|
128
|
+
end
|
129
|
+
|
130
|
+
###
|
131
|
+
# Move the cursor through the document yielding the cursor to the block
|
132
|
+
def each
|
133
|
+
while (cursor = read)
|
134
|
+
yield cursor
|
135
|
+
end
|
136
|
+
end
|
137
|
+
end
|
138
|
+
end
|
139
|
+
end
|
@@ -0,0 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class << self
|
6
|
+
# :call-seq:
|
7
|
+
# RelaxNG(input) → Nokogiri::XML::RelaxNG
|
8
|
+
# RelaxNG(input, options:) → Nokogiri::XML::RelaxNG
|
9
|
+
#
|
10
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new
|
11
|
+
def RelaxNG(...)
|
12
|
+
RelaxNG.new(...)
|
13
|
+
end
|
14
|
+
end
|
15
|
+
|
16
|
+
# Nokogiri::XML::RelaxNG is used for validating \XML against a RELAX NG schema definition.
|
17
|
+
#
|
18
|
+
# 🛡 <b>Do not use this class for untrusted schema documents.</b> RELAX NG input is always
|
19
|
+
# treated as *trusted*, meaning that the underlying parsing libraries <b>will access network
|
20
|
+
# resources</b>. This is counter to Nokogiri's "untrusted by default" security policy, but is an
|
21
|
+
# unfortunate limitation of the underlying libraries.
|
22
|
+
#
|
23
|
+
# *Example:* Determine whether an \XML document is valid.
|
24
|
+
#
|
25
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.read(RELAX_NG_FILE))
|
26
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
27
|
+
# schema.valid?(doc) # Boolean
|
28
|
+
#
|
29
|
+
# *Example:* Validate an \XML document against a \RelaxNG schema, and capture any errors that are found.
|
30
|
+
#
|
31
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.open(RELAX_NG_FILE))
|
32
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
33
|
+
# errors = schema.validate(doc) # Array<SyntaxError>
|
34
|
+
#
|
35
|
+
# *Example:* Validate an \XML document using a Document containing a RELAX NG schema definition.
|
36
|
+
#
|
37
|
+
# schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
|
38
|
+
# schema = Nokogiri::XML::RelaxNG.from_document(schema_doc)
|
39
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
40
|
+
# schema.valid?(doc) # Boolean
|
41
|
+
#
|
42
|
+
class RelaxNG < Nokogiri::XML::Schema
|
43
|
+
# :call-seq:
|
44
|
+
# new(input) → Nokogiri::XML::RelaxNG
|
45
|
+
# new(input, options:) → Nokogiri::XML::RelaxNG
|
46
|
+
#
|
47
|
+
# Parse a RELAX NG schema definition from a String or IO to create a new Nokogiri::XML::RelaxNG.
|
48
|
+
#
|
49
|
+
# [Parameters]
|
50
|
+
# - +input+ (String | IO) RELAX NG schema definition
|
51
|
+
# - +options:+ (Nokogiri::XML::ParseOptions)
|
52
|
+
# Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA âš Unused
|
53
|
+
#
|
54
|
+
# [Returns] Nokogiri::XML::RelaxNG
|
55
|
+
#
|
56
|
+
# âš +parse_options+ is currently unused by this method and is present only as a placeholder for
|
57
|
+
# future functionality.
|
58
|
+
#
|
59
|
+
# Also see convenience method Nokogiri::XML::RelaxNG()
|
60
|
+
def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, options: parse_options_)
|
61
|
+
from_document(Nokogiri::XML::Document.parse(input), options)
|
62
|
+
end
|
63
|
+
|
64
|
+
# :call-seq:
|
65
|
+
# read_memory(input) → Nokogiri::XML::RelaxNG
|
66
|
+
# read_memory(input, options:) → Nokogiri::XML::RelaxNG
|
67
|
+
#
|
68
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new.
|
69
|
+
def self.read_memory(...)
|
70
|
+
# TODO deprecate this method
|
71
|
+
new(...)
|
72
|
+
end
|
73
|
+
end
|
74
|
+
end
|
75
|
+
end
|
@@ -0,0 +1,258 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
# :markup: markdown
|
7
|
+
#
|
8
|
+
# The SAX::Document class is used for registering types of events you are interested in
|
9
|
+
# handling. All of the methods on this class are available as possible events while parsing an
|
10
|
+
# \XML document. To register for any particular event, subclass this class and implement the
|
11
|
+
# methods you are interested in knowing about.
|
12
|
+
#
|
13
|
+
# To only be notified about start and end element events, write a class like this:
|
14
|
+
#
|
15
|
+
# class MyHandler < Nokogiri::XML::SAX::Document
|
16
|
+
# def start_element name, attrs = []
|
17
|
+
# puts "#{name} started!"
|
18
|
+
# end
|
19
|
+
#
|
20
|
+
# def end_element name
|
21
|
+
# puts "#{name} ended"
|
22
|
+
# end
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# You can use this event handler for any SAX-style parser included with Nokogiri.
|
26
|
+
#
|
27
|
+
# See also:
|
28
|
+
#
|
29
|
+
# - Nokogiri::XML::SAX
|
30
|
+
# - Nokogiri::HTML4::SAX
|
31
|
+
#
|
32
|
+
# ### Entity Handling
|
33
|
+
#
|
34
|
+
# âš Entity handling is complicated in a SAX parser! Please read this section carefully if
|
35
|
+
# you're not getting the behavior you expect.
|
36
|
+
#
|
37
|
+
# Entities will be reported to the user via callbacks to #characters, to #reference, or
|
38
|
+
# possibly to both. The behavior is determined by a combination of _entity type_ and the value
|
39
|
+
# of ParserContext#replace_entities. (Recall that the default value of
|
40
|
+
# ParserContext#replace_entities is `false`.)
|
41
|
+
#
|
42
|
+
# âš <b>It is UNSAFE to set ParserContext#replace_entities to `true`</b> when parsing untrusted
|
43
|
+
# documents.
|
44
|
+
#
|
45
|
+
# 💡 For more information on entity types, see [Wikipedia's page on
|
46
|
+
# DTDs](https://en.wikipedia.org/wiki/Document_type_definition#Entity_declarations).
|
47
|
+
#
|
48
|
+
# | Entity type | #characters | #reference |
|
49
|
+
# |--------------------------------------|------------------------------------|-------------------------------------|
|
50
|
+
# | Char ref (e.g., <tt>’</tt>) | always | never |
|
51
|
+
# | Predefined (e.g., <tt>&</tt>) | always | never |
|
52
|
+
# | Undeclared †| never | <tt>#replace_entities == false</tt> |
|
53
|
+
# | Internal | always | <tt>#replace_entities == false</tt> |
|
54
|
+
# | External †| <tt>#replace_entities == true</tt> | <tt>#replace_entities == false</tt> |
|
55
|
+
#
|
56
|
+
#
|
57
|
+
#
|
58
|
+
# †In the case where the replacement text for the entity is unknown (e.g., an undeclared entity
|
59
|
+
# or an external entity that could not be resolved because of network issues), then the
|
60
|
+
# replacement text will not be reported. If ParserContext#replace_entities is `true`, this
|
61
|
+
# means the #characters callback will not be invoked. If ParserContext#replace_entities is
|
62
|
+
# `false`, then the #reference callback will be invoked, but with `nil` for the `content`
|
63
|
+
# argument.
|
64
|
+
#
|
65
|
+
class Document
|
66
|
+
###
|
67
|
+
# Called when an \XML declaration is parsed.
|
68
|
+
#
|
69
|
+
# [Parameters]
|
70
|
+
# - +version+ (String) the version attribute
|
71
|
+
# - +encoding+ (String, nil) the encoding of the document if present, else +nil+
|
72
|
+
# - +standalone+ ("yes", "no", nil) the standalone attribute if present, else +nil+
|
73
|
+
def xmldecl(version, encoding, standalone)
|
74
|
+
end
|
75
|
+
|
76
|
+
###
|
77
|
+
# Called when document starts parsing.
|
78
|
+
def start_document
|
79
|
+
end
|
80
|
+
|
81
|
+
###
|
82
|
+
# Called when document ends parsing.
|
83
|
+
def end_document
|
84
|
+
end
|
85
|
+
|
86
|
+
###
|
87
|
+
# Called at the beginning of an element.
|
88
|
+
#
|
89
|
+
# [Parameters]
|
90
|
+
# - +name+ (String) the name of the element
|
91
|
+
# - +attrs+ (Array<Array<String>>) an assoc list of namespace declarations and attributes, e.g.:
|
92
|
+
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
93
|
+
#
|
94
|
+
# 💡If you're dealing with XML and need to handle namespaces, use the
|
95
|
+
# #start_element_namespace method instead.
|
96
|
+
#
|
97
|
+
# Note that the element namespace and any attribute namespaces are not provided, and so any
|
98
|
+
# namespaced elements or attributes will be returned as strings including the prefix:
|
99
|
+
#
|
100
|
+
# parser.parse(<<~XML)
|
101
|
+
# <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
|
102
|
+
# <foo:bar foo:quux="xxx">hello world</foo:bar>
|
103
|
+
# </root>
|
104
|
+
# XML
|
105
|
+
#
|
106
|
+
# assert_pattern do
|
107
|
+
# parser.document.start_elements => [
|
108
|
+
# ["root", [["xmlns:foo", "http://foo.example.com/"], ["xmlns", "http://example.com/"]]],
|
109
|
+
# ["foo:bar", [["foo:quux", "xxx"]]],
|
110
|
+
# ]
|
111
|
+
# end
|
112
|
+
#
|
113
|
+
def start_element(name, attrs = [])
|
114
|
+
end
|
115
|
+
|
116
|
+
###
|
117
|
+
# Called at the end of an element.
|
118
|
+
#
|
119
|
+
# [Parameters]
|
120
|
+
# - +name+ (String) the name of the element being closed
|
121
|
+
#
|
122
|
+
def end_element(name)
|
123
|
+
end
|
124
|
+
|
125
|
+
###
|
126
|
+
# Called at the beginning of an element.
|
127
|
+
#
|
128
|
+
# [Parameters]
|
129
|
+
# - +name+ (String) is the name of the element
|
130
|
+
# - +attrs+ (Array<Attribute>) is an array of structs with the following properties:
|
131
|
+
# - +localname+ (String) the local name of the attribute
|
132
|
+
# - +value+ (String) the value of the attribute
|
133
|
+
# - +prefix+ (String, nil) the namespace prefix of the attribute
|
134
|
+
# - +uri+ (String, nil) the namespace URI of the attribute
|
135
|
+
# - +prefix+ (String, nil) is the namespace prefix for the element
|
136
|
+
# - +uri+ (String, nil) is the associated URI for the element's namespace
|
137
|
+
# - +ns+ (Array<Array<String, String>>) is an assoc list of namespace declarations on the element
|
138
|
+
#
|
139
|
+
# 💡If you're dealing with HTML or don't care about namespaces, try #start_element instead.
|
140
|
+
#
|
141
|
+
# [Example]
|
142
|
+
# it "start_elements_namespace is called with namespaced attributes" do
|
143
|
+
# parser.parse(<<~XML)
|
144
|
+
# <root xmlns:foo='http://foo.example.com/'>
|
145
|
+
# <foo:a foo:bar='hello' />
|
146
|
+
# </root>
|
147
|
+
# XML
|
148
|
+
#
|
149
|
+
# assert_pattern do
|
150
|
+
# parser.document.start_elements_namespace => [
|
151
|
+
# [
|
152
|
+
# "root",
|
153
|
+
# [],
|
154
|
+
# nil, nil,
|
155
|
+
# [["foo", "http://foo.example.com/"]], # namespace declarations
|
156
|
+
# ], [
|
157
|
+
# "a",
|
158
|
+
# [Nokogiri::XML::SAX::Parser::Attribute(localname: "bar", prefix: "foo", uri: "http://foo.example.com/", value: "hello")], # prefixed attribute
|
159
|
+
# "foo", "http://foo.example.com/", # prefix and uri for the "a" element
|
160
|
+
# [],
|
161
|
+
# ]
|
162
|
+
# ]
|
163
|
+
# end
|
164
|
+
# end
|
165
|
+
#
|
166
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = []) # rubocop:disable Metrics/ParameterLists
|
167
|
+
# Deal with SAX v1 interface
|
168
|
+
name = [prefix, name].compact.join(":")
|
169
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
170
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
171
|
+
end + attrs.map do |attr|
|
172
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
173
|
+
end
|
174
|
+
start_element(name, attributes)
|
175
|
+
end
|
176
|
+
|
177
|
+
###
|
178
|
+
# Called at the end of an element.
|
179
|
+
#
|
180
|
+
# [Parameters]
|
181
|
+
# - +name+ (String) is the name of the element
|
182
|
+
# - +prefix+ (String, nil) is the namespace prefix for the element
|
183
|
+
# - +uri+ (String, nil) is the associated URI for the element's namespace
|
184
|
+
#
|
185
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
186
|
+
# Deal with SAX v1 interface
|
187
|
+
end_element([prefix, name].compact.join(":"))
|
188
|
+
end
|
189
|
+
|
190
|
+
###
|
191
|
+
# Called when character data is parsed, and for parsed entities when
|
192
|
+
# ParserContext#replace_entities is +true+.
|
193
|
+
#
|
194
|
+
# [Parameters]
|
195
|
+
# - +string+ contains the character data or entity replacement text
|
196
|
+
#
|
197
|
+
# âš Please see Document@Entity+Handling for important information about how entities are handled.
|
198
|
+
#
|
199
|
+
# âš This method might be called multiple times for a contiguous string of characters.
|
200
|
+
#
|
201
|
+
def characters(string)
|
202
|
+
end
|
203
|
+
|
204
|
+
###
|
205
|
+
# Called when a parsed entity is referenced and not replaced.
|
206
|
+
#
|
207
|
+
# [Parameters]
|
208
|
+
# - +name+ (String) is the name of the entity
|
209
|
+
# - +content+ (String, nil) is the replacement text for the entity, if known
|
210
|
+
#
|
211
|
+
# âš Please see Document@Entity+Handling for important information about how entities are handled.
|
212
|
+
#
|
213
|
+
# âš An internal entity may result in a call to both #characters and #reference.
|
214
|
+
#
|
215
|
+
# Since v1.17.0
|
216
|
+
#
|
217
|
+
def reference(name, content)
|
218
|
+
end
|
219
|
+
|
220
|
+
###
|
221
|
+
# Called when comments are encountered
|
222
|
+
# [Parameters]
|
223
|
+
# - +string+ contains the comment data
|
224
|
+
def comment(string)
|
225
|
+
end
|
226
|
+
|
227
|
+
###
|
228
|
+
# Called on document warnings
|
229
|
+
# [Parameters]
|
230
|
+
# - +string+ contains the warning
|
231
|
+
def warning(string)
|
232
|
+
end
|
233
|
+
|
234
|
+
###
|
235
|
+
# Called on document errors
|
236
|
+
# [Parameters]
|
237
|
+
# - +string+ contains the error
|
238
|
+
def error(string)
|
239
|
+
end
|
240
|
+
|
241
|
+
###
|
242
|
+
# Called when cdata blocks are found
|
243
|
+
# [Parameters]
|
244
|
+
# - +string+ contains the cdata content
|
245
|
+
def cdata_block(string)
|
246
|
+
end
|
247
|
+
|
248
|
+
###
|
249
|
+
# Called when processing instructions are found
|
250
|
+
# [Parameters]
|
251
|
+
# - +name+ is the target of the instruction
|
252
|
+
# - +content+ is the value of the instruction
|
253
|
+
def processing_instruction(name, content)
|
254
|
+
end
|
255
|
+
end
|
256
|
+
end
|
257
|
+
end
|
258
|
+
end
|