nokogiri 1.10.9 → 1.18.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +4 -4
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +1632 -1022
- data/LICENSE.md +1 -1
- data/README.md +190 -95
- data/bin/nokogiri +63 -50
- data/dependencies.yml +34 -66
- data/ext/nokogiri/depend +38 -358
- data/ext/nokogiri/extconf.rb +909 -422
- data/ext/nokogiri/gumbo.c +610 -0
- data/ext/nokogiri/html4_document.c +171 -0
- data/ext/nokogiri/html4_element_description.c +299 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser.c +40 -0
- data/ext/nokogiri/html4_sax_parser_context.c +98 -0
- data/ext/nokogiri/html4_sax_push_parser.c +96 -0
- data/ext/nokogiri/libxml2_polyfill.c +114 -0
- data/ext/nokogiri/nokogiri.c +258 -105
- data/ext/nokogiri/nokogiri.h +207 -90
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +18 -18
- data/ext/nokogiri/xml_attribute_decl.c +22 -22
- data/ext/nokogiri/xml_cdata.c +33 -33
- data/ext/nokogiri/xml_comment.c +19 -31
- data/ext/nokogiri/xml_document.c +499 -323
- data/ext/nokogiri/xml_document_fragment.c +17 -36
- data/ext/nokogiri/xml_dtd.c +65 -59
- data/ext/nokogiri/xml_element_content.c +63 -55
- data/ext/nokogiri/xml_element_decl.c +31 -31
- data/ext/nokogiri/xml_encoding_handler.c +54 -21
- data/ext/nokogiri/xml_entity_decl.c +37 -35
- data/ext/nokogiri/xml_entity_reference.c +17 -19
- data/ext/nokogiri/xml_namespace.c +131 -61
- data/ext/nokogiri/xml_node.c +1429 -723
- data/ext/nokogiri/xml_node_set.c +257 -225
- data/ext/nokogiri/xml_processing_instruction.c +18 -20
- data/ext/nokogiri/xml_reader.c +340 -231
- data/ext/nokogiri/xml_relax_ng.c +87 -99
- data/ext/nokogiri/xml_sax_parser.c +269 -176
- data/ext/nokogiri/xml_sax_parser_context.c +286 -152
- data/ext/nokogiri/xml_sax_push_parser.c +111 -64
- data/ext/nokogiri/xml_schema.c +132 -140
- data/ext/nokogiri/xml_syntax_error.c +52 -23
- data/ext/nokogiri/xml_text.c +37 -30
- data/ext/nokogiri/xml_xpath_context.c +373 -185
- data/ext/nokogiri/xslt_stylesheet.c +342 -191
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +129 -0
- data/gumbo-parser/THANKS +27 -0
- data/gumbo-parser/src/Makefile +34 -0
- data/gumbo-parser/src/README.md +41 -0
- data/gumbo-parser/src/ascii.c +75 -0
- data/gumbo-parser/src/ascii.h +115 -0
- data/gumbo-parser/src/attribute.c +42 -0
- data/gumbo-parser/src/attribute.h +17 -0
- data/gumbo-parser/src/char_ref.c +22225 -0
- data/gumbo-parser/src/char_ref.h +29 -0
- data/gumbo-parser/src/char_ref.rl +2154 -0
- data/gumbo-parser/src/error.c +658 -0
- data/gumbo-parser/src/error.h +152 -0
- data/gumbo-parser/src/foreign_attrs.c +103 -0
- data/gumbo-parser/src/foreign_attrs.gperf +27 -0
- data/gumbo-parser/src/insertion_mode.h +33 -0
- data/gumbo-parser/src/macros.h +91 -0
- data/gumbo-parser/src/nokogiri_gumbo.h +953 -0
- data/gumbo-parser/src/parser.c +4932 -0
- data/gumbo-parser/src/parser.h +41 -0
- data/gumbo-parser/src/replacement.h +33 -0
- data/gumbo-parser/src/string_buffer.c +103 -0
- data/gumbo-parser/src/string_buffer.h +68 -0
- data/gumbo-parser/src/string_piece.c +48 -0
- data/gumbo-parser/src/svg_attrs.c +174 -0
- data/gumbo-parser/src/svg_attrs.gperf +77 -0
- data/gumbo-parser/src/svg_tags.c +137 -0
- data/gumbo-parser/src/svg_tags.gperf +55 -0
- data/gumbo-parser/src/tag.c +223 -0
- data/gumbo-parser/src/tag_lookup.c +382 -0
- data/gumbo-parser/src/tag_lookup.gperf +170 -0
- data/gumbo-parser/src/tag_lookup.h +13 -0
- data/gumbo-parser/src/token_buffer.c +79 -0
- data/gumbo-parser/src/token_buffer.h +71 -0
- data/gumbo-parser/src/token_type.h +17 -0
- data/gumbo-parser/src/tokenizer.c +3464 -0
- data/gumbo-parser/src/tokenizer.h +112 -0
- data/gumbo-parser/src/tokenizer_states.h +339 -0
- data/gumbo-parser/src/utf8.c +245 -0
- data/gumbo-parser/src/utf8.h +164 -0
- data/gumbo-parser/src/util.c +66 -0
- data/gumbo-parser/src/util.h +34 -0
- data/gumbo-parser/src/vector.c +111 -0
- data/gumbo-parser/src/vector.h +45 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +14 -8
- data/lib/nokogiri/css/parser.rb +399 -377
- data/lib/nokogiri/css/parser.y +250 -245
- data/lib/nokogiri/css/parser_extras.rb +16 -71
- data/lib/nokogiri/css/selector_cache.rb +38 -0
- data/lib/nokogiri/css/syntax_error.rb +3 -1
- data/lib/nokogiri/css/tokenizer.rb +7 -5
- data/lib/nokogiri/css/tokenizer.rex +11 -9
- data/lib/nokogiri/css/xpath_visitor.rb +242 -96
- data/lib/nokogiri/css.rb +122 -17
- data/lib/nokogiri/decorators/slop.rb +11 -11
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +38 -27
- data/lib/nokogiri/{html → html4}/builder.rb +4 -2
- data/lib/nokogiri/html4/document.rb +235 -0
- data/lib/nokogiri/html4/document_fragment.rb +166 -0
- data/lib/nokogiri/{html → html4}/element_description.rb +3 -1
- data/lib/nokogiri/html4/element_description_defaults.rb +2040 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/{html → html4}/entity_lookup.rb +4 -2
- data/lib/nokogiri/html4/sax/parser.rb +48 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +15 -0
- data/lib/nokogiri/{html → html4}/sax/push_parser.rb +12 -11
- data/lib/nokogiri/html4.rb +42 -0
- data/lib/nokogiri/html5/builder.rb +40 -0
- data/lib/nokogiri/html5/document.rb +199 -0
- data/lib/nokogiri/html5/document_fragment.rb +200 -0
- data/lib/nokogiri/html5/node.rb +103 -0
- data/lib/nokogiri/html5.rb +368 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +2 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +224 -0
- data/lib/nokogiri/version.rb +3 -108
- data/lib/nokogiri/xml/attr.rb +55 -3
- data/lib/nokogiri/xml/attribute_decl.rb +6 -2
- data/lib/nokogiri/xml/builder.rb +83 -35
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/character_data.rb +2 -0
- data/lib/nokogiri/xml/document.rb +359 -130
- data/lib/nokogiri/xml/document_fragment.rb +170 -54
- data/lib/nokogiri/xml/dtd.rb +4 -2
- data/lib/nokogiri/xml/element_content.rb +12 -2
- data/lib/nokogiri/xml/element_decl.rb +6 -2
- data/lib/nokogiri/xml/entity_decl.rb +7 -3
- data/lib/nokogiri/xml/entity_reference.rb +2 -0
- data/lib/nokogiri/xml/namespace.rb +44 -0
- data/lib/nokogiri/xml/node/save_options.rb +23 -8
- data/lib/nokogiri/xml/node.rb +1168 -420
- data/lib/nokogiri/xml/node_set.rb +145 -67
- data/lib/nokogiri/xml/notation.rb +13 -0
- data/lib/nokogiri/xml/parse_options.rb +145 -52
- data/lib/nokogiri/xml/pp/character_data.rb +9 -6
- data/lib/nokogiri/xml/pp/node.rb +47 -30
- data/lib/nokogiri/xml/pp.rb +4 -2
- data/lib/nokogiri/xml/processing_instruction.rb +4 -1
- data/lib/nokogiri/xml/reader.rb +68 -41
- data/lib/nokogiri/xml/relax_ng.rb +60 -17
- data/lib/nokogiri/xml/sax/document.rb +198 -111
- data/lib/nokogiri/xml/sax/parser.rb +144 -67
- data/lib/nokogiri/xml/sax/parser_context.rb +119 -6
- data/lib/nokogiri/xml/sax/push_parser.rb +9 -5
- data/lib/nokogiri/xml/sax.rb +54 -4
- data/lib/nokogiri/xml/schema.rb +116 -39
- data/lib/nokogiri/xml/searchable.rb +139 -95
- data/lib/nokogiri/xml/syntax_error.rb +29 -5
- data/lib/nokogiri/xml/text.rb +2 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +4 -2
- data/lib/nokogiri/xml/xpath.rb +15 -4
- data/lib/nokogiri/xml/xpath_context.rb +15 -4
- data/lib/nokogiri/xml.rb +45 -55
- data/lib/nokogiri/xslt/stylesheet.rb +32 -8
- data/lib/nokogiri/xslt.rb +103 -30
- data/lib/nokogiri.rb +59 -75
- data/lib/xsd/xmlparser/nokogiri.rb +32 -29
- data/patches/libxml2/0009-allow-wildcard-namespaces.patch +77 -0
- data/patches/libxml2/0010-update-config.guess-and-config.sub-for-libxml2.patch +224 -0
- data/patches/libxml2/0011-rip-out-libxml2-s-libc_single_threaded-support.patch +30 -0
- data/patches/libxml2/0019-xpath-Use-separate-static-hash-table-for-standard-fu.patch +244 -0
- data/patches/libxslt/0001-update-config.guess-and-config.sub-for-libxslt.patch +224 -0
- data/ports/archives/libxml2-2.13.6.tar.xz +0 -0
- data/ports/archives/libxslt-1.1.42.tar.xz +0 -0
- metadata +123 -295
- data/ext/nokogiri/html_document.c +0 -170
- data/ext/nokogiri/html_document.h +0 -10
- data/ext/nokogiri/html_element_description.c +0 -279
- data/ext/nokogiri/html_element_description.h +0 -10
- data/ext/nokogiri/html_entity_lookup.c +0 -32
- data/ext/nokogiri/html_entity_lookup.h +0 -8
- data/ext/nokogiri/html_sax_parser_context.c +0 -116
- data/ext/nokogiri/html_sax_parser_context.h +0 -11
- data/ext/nokogiri/html_sax_push_parser.c +0 -87
- data/ext/nokogiri/html_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_attr.h +0 -9
- data/ext/nokogiri/xml_attribute_decl.h +0 -9
- data/ext/nokogiri/xml_cdata.h +0 -9
- data/ext/nokogiri/xml_comment.h +0 -9
- data/ext/nokogiri/xml_document.h +0 -23
- data/ext/nokogiri/xml_document_fragment.h +0 -10
- data/ext/nokogiri/xml_dtd.h +0 -10
- data/ext/nokogiri/xml_element_content.h +0 -10
- data/ext/nokogiri/xml_element_decl.h +0 -9
- data/ext/nokogiri/xml_encoding_handler.h +0 -8
- data/ext/nokogiri/xml_entity_decl.h +0 -10
- data/ext/nokogiri/xml_entity_reference.h +0 -9
- data/ext/nokogiri/xml_io.c +0 -61
- data/ext/nokogiri/xml_io.h +0 -11
- data/ext/nokogiri/xml_libxml2_hacks.c +0 -112
- data/ext/nokogiri/xml_libxml2_hacks.h +0 -12
- data/ext/nokogiri/xml_namespace.h +0 -14
- data/ext/nokogiri/xml_node.h +0 -13
- data/ext/nokogiri/xml_node_set.h +0 -12
- data/ext/nokogiri/xml_processing_instruction.h +0 -9
- data/ext/nokogiri/xml_reader.h +0 -10
- data/ext/nokogiri/xml_relax_ng.h +0 -9
- data/ext/nokogiri/xml_sax_parser.h +0 -39
- data/ext/nokogiri/xml_sax_parser_context.h +0 -10
- data/ext/nokogiri/xml_sax_push_parser.h +0 -9
- data/ext/nokogiri/xml_schema.h +0 -9
- data/ext/nokogiri/xml_syntax_error.h +0 -13
- data/ext/nokogiri/xml_text.h +0 -9
- data/ext/nokogiri/xml_xpath_context.h +0 -10
- data/ext/nokogiri/xslt_stylesheet.h +0 -14
- data/lib/nokogiri/html/document.rb +0 -335
- data/lib/nokogiri/html/document_fragment.rb +0 -49
- data/lib/nokogiri/html/element_description_defaults.rb +0 -671
- data/lib/nokogiri/html/sax/parser.rb +0 -62
- data/lib/nokogiri/html/sax/parser_context.rb +0 -16
- data/patches/libxml2/0001-Revert-Do-not-URI-escape-in-server-side-includes.patch +0 -78
- data/patches/libxml2/0004-libxml2.la-is-in-top_builddir.patch +0 -25
- data/patches/libxml2/0005-Fix-infinite-loop-in-xmlStringLenDecodeEntities.patch +0 -32
- data/ports/archives/libxml2-2.9.10.tar.gz +0 -0
- data/ports/archives/libxslt-1.1.34.tar.gz +0 -0
- /data/patches/libxml2/{0002-Remove-script-macro-support.patch → 0001-Remove-script-macro-support.patch} +0 -0
- /data/patches/libxml2/{0003-Update-entities-to-remove-handling-of-ssi.patch → 0002-Update-entities-to-remove-handling-of-ssi.patch} +0 -0
@@ -1,16 +1,19 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
5
|
+
# :nodoc: all
|
3
6
|
module PP
|
4
7
|
module CharacterData
|
5
|
-
def pretty_print
|
6
|
-
nice_name = self.class.name.split(
|
7
|
-
pp.group(2, "#(#{nice_name} ",
|
8
|
-
pp.pp
|
8
|
+
def pretty_print(pp)
|
9
|
+
nice_name = self.class.name.split("::").last
|
10
|
+
pp.group(2, "#(#{nice_name} ", ")") do
|
11
|
+
pp.pp(text)
|
9
12
|
end
|
10
13
|
end
|
11
14
|
|
12
|
-
def inspect
|
13
|
-
"#<#{self.class.name}:#{
|
15
|
+
def inspect
|
16
|
+
"#<#{self.class.name}:#{format("0x%x", object_id)} #{text.inspect}>"
|
14
17
|
end
|
15
18
|
end
|
16
19
|
end
|
data/lib/nokogiri/xml/pp/node.rb
CHANGED
@@ -1,53 +1,70 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
5
|
+
# :nodoc: all
|
3
6
|
module PP
|
4
7
|
module Node
|
5
|
-
|
6
|
-
attributes = inspect_attributes.reject { |x|
|
7
|
-
begin
|
8
|
-
attribute = send x
|
9
|
-
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
10
|
-
rescue NoMethodError
|
11
|
-
true
|
12
|
-
end
|
13
|
-
}.map { |attribute|
|
14
|
-
"#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
|
15
|
-
}.join ' '
|
16
|
-
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
|
17
|
-
end
|
8
|
+
COLLECTIONS = [:attribute_nodes, :children]
|
18
9
|
|
19
|
-
def
|
20
|
-
|
21
|
-
|
10
|
+
def inspect
|
11
|
+
# handle the case where an exception is thrown during object construction
|
12
|
+
if respond_to?(:data_ptr?) && !data_ptr?
|
13
|
+
return "#<#{self.class}:#{format("0x%x", object_id)} (no data)>"
|
14
|
+
end
|
22
15
|
|
16
|
+
attributes = inspect_attributes.reject do |x|
|
17
|
+
attribute = send(x)
|
18
|
+
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
19
|
+
rescue NoMethodError
|
20
|
+
true
|
21
|
+
end
|
22
|
+
attributes = if inspect_attributes.length == 1
|
23
|
+
send(attributes.first).inspect
|
24
|
+
else
|
25
|
+
attributes.map do |attribute|
|
26
|
+
"#{attribute}=#{send(attribute).inspect}"
|
27
|
+
end.join(" ")
|
28
|
+
end
|
29
|
+
"#<#{self.class}:#{format("0x%x", object_id)} #{attributes}>"
|
30
|
+
end
|
31
|
+
|
32
|
+
def pretty_print(pp)
|
33
|
+
nice_name = self.class.name.split("::").last
|
34
|
+
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
23
35
|
pp.breakable
|
24
|
-
|
36
|
+
|
37
|
+
attrs = inspect_attributes.filter_map do |t|
|
25
38
|
[t, send(t)] if respond_to?(t)
|
26
|
-
|
39
|
+
end.find_all do |x|
|
27
40
|
if x.last
|
28
|
-
if
|
41
|
+
if COLLECTIONS.include?(x.first)
|
29
42
|
!x.last.empty?
|
30
43
|
else
|
31
44
|
true
|
32
45
|
end
|
33
46
|
end
|
34
|
-
|
47
|
+
end
|
35
48
|
|
36
|
-
|
37
|
-
|
38
|
-
|
39
|
-
|
40
|
-
|
41
|
-
|
49
|
+
if inspect_attributes.length == 1
|
50
|
+
pp.pp(attrs.first.last)
|
51
|
+
else
|
52
|
+
pp.seplist(attrs) do |v|
|
53
|
+
if COLLECTIONS.include?(v.first)
|
54
|
+
pp.group(2, "#{v.first} = [", "]") do
|
55
|
+
pp.breakable
|
56
|
+
pp.seplist(v.last) do |item|
|
57
|
+
pp.pp(item)
|
58
|
+
end
|
42
59
|
end
|
60
|
+
else
|
61
|
+
pp.text("#{v.first} = ")
|
62
|
+
pp.pp(v.last)
|
43
63
|
end
|
44
|
-
else
|
45
|
-
pp.text "#{v.first} = "
|
46
|
-
pp.pp v.last
|
47
64
|
end
|
48
65
|
end
|
49
|
-
pp.breakable
|
50
66
|
|
67
|
+
pp.breakable
|
51
68
|
end
|
52
69
|
end
|
53
70
|
end
|
data/lib/nokogiri/xml/pp.rb
CHANGED
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -1,32 +1,36 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
###
|
4
|
-
#
|
5
|
-
#
|
6
|
-
# to an each block.
|
6
|
+
# The Reader parser allows you to effectively pull parse an \XML document. Once instantiated,
|
7
|
+
# call Nokogiri::XML::Reader#each to iterate over each node.
|
7
8
|
#
|
8
|
-
#
|
9
|
+
# Nokogiri::XML::Reader parses an \XML document similar to the way a cursor would move. The
|
10
|
+
# Reader is given an \XML document, and yields nodes to an each block.
|
9
11
|
#
|
10
|
-
#
|
11
|
-
#
|
12
|
-
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
13
|
-
# </x>
|
14
|
-
# eoxml
|
12
|
+
# The Reader parser might be good for when you need the speed and low memory usage of a \SAX
|
13
|
+
# parser, but do not want to write a SAX::Document handler.
|
15
14
|
#
|
16
|
-
#
|
15
|
+
# Here is an example of usage:
|
17
16
|
#
|
18
|
-
#
|
19
|
-
#
|
17
|
+
# reader = Nokogiri::XML::Reader.new <<~XML
|
18
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
19
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
20
|
+
# </x>
|
21
|
+
# XML
|
20
22
|
#
|
21
|
-
#
|
23
|
+
# reader.each do |node|
|
24
|
+
# # node is an instance of Nokogiri::XML::Reader
|
25
|
+
# puts node.name
|
26
|
+
# end
|
22
27
|
#
|
23
|
-
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
# need during the first iteration.
|
28
|
+
# ⚠ Nokogiri::XML::Reader#each can only be called once! Once the cursor moves through the entire
|
29
|
+
# document, you must parse the document again. It may be better to capture all information you
|
30
|
+
# need during a single iteration.
|
27
31
|
#
|
28
|
-
#
|
29
|
-
#
|
32
|
+
# ⚠ libxml2 does not support error recovery in the Reader parser. The +RECOVER+ ParseOption is
|
33
|
+
# ignored. If a syntax error is encountered during parsing, an exception will be raised.
|
30
34
|
class Reader
|
31
35
|
include Enumerable
|
32
36
|
|
@@ -63,47 +67,70 @@ module Nokogiri
|
|
63
67
|
TYPE_END_ELEMENT = 15
|
64
68
|
# Entity end node type
|
65
69
|
TYPE_END_ENTITY = 16
|
66
|
-
# XML Declaration node type
|
70
|
+
# \XML Declaration node type
|
67
71
|
TYPE_XML_DECLARATION = 17
|
68
72
|
|
69
73
|
# A list of errors encountered while parsing
|
70
74
|
attr_accessor :errors
|
71
75
|
|
72
|
-
# The
|
73
|
-
attr_reader :encoding
|
74
|
-
|
75
|
-
# The XML source
|
76
|
+
# The \XML source
|
76
77
|
attr_reader :source
|
77
78
|
|
78
|
-
|
79
|
+
alias_method :self_closing?, :empty_element?
|
80
|
+
|
81
|
+
# :call-seq:
|
82
|
+
# Reader.new(input) { |options| ... } → Reader
|
83
|
+
# Reader.new(input, url:, encoding:, options:) { |options| ... } → Reader
|
84
|
+
#
|
85
|
+
# Create a new Reader to parse an \XML document.
|
86
|
+
#
|
87
|
+
# [Required Parameters]
|
88
|
+
# - +input+ (String | IO): The \XML document to parse.
|
89
|
+
#
|
90
|
+
# [Optional Parameters]
|
91
|
+
# - +url:+ (String) The base URL of the document.
|
92
|
+
# - +encoding:+ (String) The name of the encoding of the document.
|
93
|
+
# - +options:+ (Integer | ParseOptions) Options to control the parser behavior.
|
94
|
+
# Defaults to +ParseOptions::STRICT+.
|
95
|
+
#
|
96
|
+
# [Yields]
|
97
|
+
# If present, the block will be passed a Nokogiri::XML::ParseOptions object to modify before
|
98
|
+
# the fragment is parsed. See Nokogiri::XML::ParseOptions for more information.
|
99
|
+
def self.new(
|
100
|
+
string_or_io,
|
101
|
+
url_ = nil, encoding_ = nil, options_ = ParseOptions::STRICT,
|
102
|
+
url: url_, encoding: encoding_, options: options_
|
103
|
+
)
|
104
|
+
options = Nokogiri::XML::ParseOptions.new(options) if Integer === options
|
105
|
+
yield options if block_given?
|
106
|
+
|
107
|
+
if string_or_io.respond_to?(:read)
|
108
|
+
return Reader.from_io(string_or_io, url, encoding, options.to_i)
|
109
|
+
end
|
79
110
|
|
80
|
-
|
111
|
+
Reader.from_memory(string_or_io, url, encoding, options.to_i)
|
112
|
+
end
|
113
|
+
|
114
|
+
private def initialize(source, url = nil, encoding = nil) # :nodoc:
|
81
115
|
@source = source
|
82
116
|
@errors = []
|
83
117
|
@encoding = encoding
|
84
118
|
end
|
85
|
-
private :initialize
|
86
119
|
|
87
|
-
|
88
|
-
#
|
120
|
+
# Get the attributes and namespaces of the current node as a Hash.
|
121
|
+
#
|
122
|
+
# This is the union of Reader#attribute_hash and Reader#namespaces
|
123
|
+
#
|
124
|
+
# [Returns]
|
125
|
+
# (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
|
89
126
|
def attributes
|
90
|
-
|
91
|
-
[node.name, node.to_s]
|
92
|
-
}].merge(namespaces || {})
|
93
|
-
end
|
94
|
-
|
95
|
-
###
|
96
|
-
# Get a list of attributes for the current node
|
97
|
-
def attribute_nodes
|
98
|
-
nodes = attr_nodes
|
99
|
-
nodes.each { |v| v.instance_variable_set(:@_r, self) }
|
100
|
-
nodes
|
127
|
+
attribute_hash.merge(namespaces)
|
101
128
|
end
|
102
129
|
|
103
130
|
###
|
104
131
|
# Move the cursor through the document yielding the cursor to the block
|
105
132
|
def each
|
106
|
-
while cursor =
|
133
|
+
while (cursor = read)
|
107
134
|
yield cursor
|
108
135
|
end
|
109
136
|
end
|
@@ -1,32 +1,75 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
1
3
|
module Nokogiri
|
2
4
|
module XML
|
3
5
|
class << self
|
4
|
-
|
5
|
-
#
|
6
|
-
#
|
7
|
-
|
8
|
-
|
6
|
+
# :call-seq:
|
7
|
+
# RelaxNG(input) → Nokogiri::XML::RelaxNG
|
8
|
+
# RelaxNG(input, options:) → Nokogiri::XML::RelaxNG
|
9
|
+
#
|
10
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new
|
11
|
+
def RelaxNG(...)
|
12
|
+
RelaxNG.new(...)
|
9
13
|
end
|
10
14
|
end
|
11
15
|
|
12
|
-
|
13
|
-
#
|
14
|
-
#
|
16
|
+
# Nokogiri::XML::RelaxNG is used for validating \XML against a RELAX NG schema definition.
|
17
|
+
#
|
18
|
+
# 🛡 <b>Do not use this class for untrusted schema documents.</b> RELAX NG input is always
|
19
|
+
# treated as *trusted*, meaning that the underlying parsing libraries <b>will access network
|
20
|
+
# resources</b>. This is counter to Nokogiri's "untrusted by default" security policy, but is an
|
21
|
+
# unfortunate limitation of the underlying libraries.
|
22
|
+
#
|
23
|
+
# *Example:* Determine whether an \XML document is valid.
|
24
|
+
#
|
25
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.read(RELAX_NG_FILE))
|
26
|
+
# doc = Nokogiri::XML::Document.parse(File.read(XML_FILE))
|
27
|
+
# schema.valid?(doc) # Boolean
|
15
28
|
#
|
16
|
-
#
|
29
|
+
# *Example:* Validate an \XML document against a \RelaxNG schema, and capture any errors that are found.
|
17
30
|
#
|
18
|
-
#
|
19
|
-
#
|
31
|
+
# schema = Nokogiri::XML::RelaxNG.new(File.open(RELAX_NG_FILE))
|
32
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
33
|
+
# errors = schema.validate(doc) # Array<SyntaxError>
|
20
34
|
#
|
21
|
-
#
|
22
|
-
# doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
|
35
|
+
# *Example:* Validate an \XML document using a Document containing a RELAX NG schema definition.
|
23
36
|
#
|
24
|
-
#
|
25
|
-
#
|
26
|
-
#
|
37
|
+
# schema_doc = Nokogiri::XML::Document.parse(File.read(RELAX_NG_FILE))
|
38
|
+
# schema = Nokogiri::XML::RelaxNG.from_document(schema_doc)
|
39
|
+
# doc = Nokogiri::XML::Document.parse(File.open(XML_FILE))
|
40
|
+
# schema.valid?(doc) # Boolean
|
27
41
|
#
|
28
|
-
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
29
42
|
class RelaxNG < Nokogiri::XML::Schema
|
43
|
+
# :call-seq:
|
44
|
+
# new(input) → Nokogiri::XML::RelaxNG
|
45
|
+
# new(input, options:) → Nokogiri::XML::RelaxNG
|
46
|
+
#
|
47
|
+
# Parse a RELAX NG schema definition from a String or IO to create a new Nokogiri::XML::RelaxNG.
|
48
|
+
#
|
49
|
+
# [Parameters]
|
50
|
+
# - +input+ (String | IO) RELAX NG schema definition
|
51
|
+
# - +options:+ (Nokogiri::XML::ParseOptions)
|
52
|
+
# Defaults to Nokogiri::XML::ParseOptions::DEFAULT_SCHEMA ⚠ Unused
|
53
|
+
#
|
54
|
+
# [Returns] Nokogiri::XML::RelaxNG
|
55
|
+
#
|
56
|
+
# ⚠ +parse_options+ is currently unused by this method and is present only as a placeholder for
|
57
|
+
# future functionality.
|
58
|
+
#
|
59
|
+
# Also see convenience method Nokogiri::XML::RelaxNG()
|
60
|
+
def self.new(input, parse_options_ = ParseOptions::DEFAULT_SCHEMA, options: parse_options_)
|
61
|
+
from_document(Nokogiri::XML::Document.parse(input), options)
|
62
|
+
end
|
63
|
+
|
64
|
+
# :call-seq:
|
65
|
+
# read_memory(input) → Nokogiri::XML::RelaxNG
|
66
|
+
# read_memory(input, options:) → Nokogiri::XML::RelaxNG
|
67
|
+
#
|
68
|
+
# Convenience method for Nokogiri::XML::RelaxNG.new.
|
69
|
+
def self.read_memory(...)
|
70
|
+
# TODO deprecate this method
|
71
|
+
new(...)
|
72
|
+
end
|
30
73
|
end
|
31
74
|
end
|
32
75
|
end
|