nokogiri 1.14.0.rc1-arm-linux
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/Gemfile +38 -0
- data/LICENSE-DEPENDENCIES.md +2224 -0
- data/LICENSE.md +9 -0
- data/README.md +287 -0
- data/bin/nokogiri +131 -0
- data/dependencies.yml +41 -0
- data/ext/nokogiri/depend +38 -0
- data/ext/nokogiri/extconf.rb +1082 -0
- data/ext/nokogiri/gumbo.c +594 -0
- data/ext/nokogiri/html4_document.c +166 -0
- data/ext/nokogiri/html4_element_description.c +294 -0
- data/ext/nokogiri/html4_entity_lookup.c +37 -0
- data/ext/nokogiri/html4_sax_parser_context.c +114 -0
- data/ext/nokogiri/html4_sax_push_parser.c +95 -0
- data/ext/nokogiri/include/libexslt/exslt.h +108 -0
- data/ext/nokogiri/include/libexslt/exsltconfig.h +70 -0
- data/ext/nokogiri/include/libexslt/exsltexports.h +63 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLparser.h +306 -0
- data/ext/nokogiri/include/libxml2/libxml/HTMLtree.h +147 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX.h +204 -0
- data/ext/nokogiri/include/libxml2/libxml/SAX2.h +172 -0
- data/ext/nokogiri/include/libxml2/libxml/c14n.h +128 -0
- data/ext/nokogiri/include/libxml2/libxml/catalog.h +182 -0
- data/ext/nokogiri/include/libxml2/libxml/chvalid.h +230 -0
- data/ext/nokogiri/include/libxml2/libxml/debugXML.h +217 -0
- data/ext/nokogiri/include/libxml2/libxml/dict.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/encoding.h +232 -0
- data/ext/nokogiri/include/libxml2/libxml/entities.h +153 -0
- data/ext/nokogiri/include/libxml2/libxml/globals.h +499 -0
- data/ext/nokogiri/include/libxml2/libxml/hash.h +236 -0
- data/ext/nokogiri/include/libxml2/libxml/list.h +137 -0
- data/ext/nokogiri/include/libxml2/libxml/nanoftp.h +186 -0
- data/ext/nokogiri/include/libxml2/libxml/nanohttp.h +81 -0
- data/ext/nokogiri/include/libxml2/libxml/parser.h +1244 -0
- data/ext/nokogiri/include/libxml2/libxml/parserInternals.h +656 -0
- data/ext/nokogiri/include/libxml2/libxml/pattern.h +100 -0
- data/ext/nokogiri/include/libxml2/libxml/relaxng.h +218 -0
- data/ext/nokogiri/include/libxml2/libxml/schemasInternals.h +958 -0
- data/ext/nokogiri/include/libxml2/libxml/schematron.h +142 -0
- data/ext/nokogiri/include/libxml2/libxml/threads.h +91 -0
- data/ext/nokogiri/include/libxml2/libxml/tree.h +1312 -0
- data/ext/nokogiri/include/libxml2/libxml/uri.h +94 -0
- data/ext/nokogiri/include/libxml2/libxml/valid.h +463 -0
- data/ext/nokogiri/include/libxml2/libxml/xinclude.h +129 -0
- data/ext/nokogiri/include/libxml2/libxml/xlink.h +189 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlIO.h +368 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlautomata.h +146 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlerror.h +947 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlexports.h +77 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmemory.h +226 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlmodule.h +57 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlreader.h +428 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlregexp.h +222 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlsave.h +88 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemas.h +246 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlschemastypes.h +152 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlstring.h +140 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlunicode.h +202 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlversion.h +503 -0
- data/ext/nokogiri/include/libxml2/libxml/xmlwriter.h +488 -0
- data/ext/nokogiri/include/libxml2/libxml/xpath.h +575 -0
- data/ext/nokogiri/include/libxml2/libxml/xpathInternals.h +632 -0
- data/ext/nokogiri/include/libxml2/libxml/xpointer.h +137 -0
- data/ext/nokogiri/include/libxslt/attributes.h +38 -0
- data/ext/nokogiri/include/libxslt/documents.h +93 -0
- data/ext/nokogiri/include/libxslt/extensions.h +262 -0
- data/ext/nokogiri/include/libxslt/extra.h +72 -0
- data/ext/nokogiri/include/libxslt/functions.h +78 -0
- data/ext/nokogiri/include/libxslt/imports.h +75 -0
- data/ext/nokogiri/include/libxslt/keys.h +53 -0
- data/ext/nokogiri/include/libxslt/namespaces.h +68 -0
- data/ext/nokogiri/include/libxslt/numbersInternals.h +73 -0
- data/ext/nokogiri/include/libxslt/pattern.h +84 -0
- data/ext/nokogiri/include/libxslt/preproc.h +43 -0
- data/ext/nokogiri/include/libxslt/security.h +104 -0
- data/ext/nokogiri/include/libxslt/templates.h +77 -0
- data/ext/nokogiri/include/libxslt/transform.h +207 -0
- data/ext/nokogiri/include/libxslt/variables.h +118 -0
- data/ext/nokogiri/include/libxslt/xslt.h +110 -0
- data/ext/nokogiri/include/libxslt/xsltInternals.h +1982 -0
- data/ext/nokogiri/include/libxslt/xsltconfig.h +179 -0
- data/ext/nokogiri/include/libxslt/xsltexports.h +64 -0
- data/ext/nokogiri/include/libxslt/xsltlocale.h +76 -0
- data/ext/nokogiri/include/libxslt/xsltutils.h +310 -0
- data/ext/nokogiri/libxml2_backwards_compat.c +121 -0
- data/ext/nokogiri/nokogiri.c +259 -0
- data/ext/nokogiri/nokogiri.h +235 -0
- data/ext/nokogiri/test_global_handlers.c +40 -0
- data/ext/nokogiri/xml_attr.c +103 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_cdata.c +57 -0
- data/ext/nokogiri/xml_comment.c +62 -0
- data/ext/nokogiri/xml_document.c +689 -0
- data/ext/nokogiri/xml_document_fragment.c +44 -0
- data/ext/nokogiri/xml_dtd.c +208 -0
- data/ext/nokogiri/xml_element_content.c +128 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_encoding_handler.c +104 -0
- data/ext/nokogiri/xml_entity_decl.c +112 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_namespace.c +186 -0
- data/ext/nokogiri/xml_node.c +2425 -0
- data/ext/nokogiri/xml_node_set.c +496 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_reader.c +794 -0
- data/ext/nokogiri/xml_relax_ng.c +183 -0
- data/ext/nokogiri/xml_sax_parser.c +316 -0
- data/ext/nokogiri/xml_sax_parser_context.c +283 -0
- data/ext/nokogiri/xml_sax_push_parser.c +166 -0
- data/ext/nokogiri/xml_schema.c +282 -0
- data/ext/nokogiri/xml_syntax_error.c +85 -0
- data/ext/nokogiri/xml_text.c +48 -0
- data/ext/nokogiri/xml_xpath_context.c +413 -0
- data/ext/nokogiri/xslt_stylesheet.c +363 -0
- data/gumbo-parser/CHANGES.md +63 -0
- data/gumbo-parser/Makefile +111 -0
- data/gumbo-parser/THANKS +27 -0
- data/lib/nokogiri/2.7/nokogiri.so +0 -0
- data/lib/nokogiri/3.0/nokogiri.so +0 -0
- data/lib/nokogiri/3.1/nokogiri.so +0 -0
- data/lib/nokogiri/3.2/nokogiri.so +0 -0
- data/lib/nokogiri/class_resolver.rb +67 -0
- data/lib/nokogiri/css/node.rb +54 -0
- data/lib/nokogiri/css/parser.rb +770 -0
- data/lib/nokogiri/css/parser.y +277 -0
- data/lib/nokogiri/css/parser_extras.rb +96 -0
- data/lib/nokogiri/css/syntax_error.rb +9 -0
- data/lib/nokogiri/css/tokenizer.rb +155 -0
- data/lib/nokogiri/css/tokenizer.rex +56 -0
- data/lib/nokogiri/css/xpath_visitor.rb +359 -0
- data/lib/nokogiri/css.rb +66 -0
- data/lib/nokogiri/decorators/slop.rb +44 -0
- data/lib/nokogiri/encoding_handler.rb +57 -0
- data/lib/nokogiri/extension.rb +32 -0
- data/lib/nokogiri/gumbo.rb +15 -0
- data/lib/nokogiri/html.rb +48 -0
- data/lib/nokogiri/html4/builder.rb +37 -0
- data/lib/nokogiri/html4/document.rb +214 -0
- data/lib/nokogiri/html4/document_fragment.rb +54 -0
- data/lib/nokogiri/html4/element_description.rb +25 -0
- data/lib/nokogiri/html4/element_description_defaults.rb +572 -0
- data/lib/nokogiri/html4/encoding_reader.rb +121 -0
- data/lib/nokogiri/html4/entity_lookup.rb +15 -0
- data/lib/nokogiri/html4/sax/parser.rb +63 -0
- data/lib/nokogiri/html4/sax/parser_context.rb +20 -0
- data/lib/nokogiri/html4/sax/push_parser.rb +37 -0
- data/lib/nokogiri/html4.rb +47 -0
- data/lib/nokogiri/html5/document.rb +168 -0
- data/lib/nokogiri/html5/document_fragment.rb +90 -0
- data/lib/nokogiri/html5/node.rb +98 -0
- data/lib/nokogiri/html5.rb +389 -0
- data/lib/nokogiri/jruby/dependencies.rb +3 -0
- data/lib/nokogiri/jruby/nokogiri_jars.rb +43 -0
- data/lib/nokogiri/syntax_error.rb +6 -0
- data/lib/nokogiri/version/constant.rb +6 -0
- data/lib/nokogiri/version/info.rb +223 -0
- data/lib/nokogiri/version.rb +4 -0
- data/lib/nokogiri/xml/attr.rb +66 -0
- data/lib/nokogiri/xml/attribute_decl.rb +20 -0
- data/lib/nokogiri/xml/builder.rb +487 -0
- data/lib/nokogiri/xml/cdata.rb +13 -0
- data/lib/nokogiri/xml/character_data.rb +9 -0
- data/lib/nokogiri/xml/document.rb +471 -0
- data/lib/nokogiri/xml/document_fragment.rb +205 -0
- data/lib/nokogiri/xml/dtd.rb +34 -0
- data/lib/nokogiri/xml/element_content.rb +38 -0
- data/lib/nokogiri/xml/element_decl.rb +15 -0
- data/lib/nokogiri/xml/entity_decl.rb +21 -0
- data/lib/nokogiri/xml/entity_reference.rb +20 -0
- data/lib/nokogiri/xml/namespace.rb +58 -0
- data/lib/nokogiri/xml/node/save_options.rb +68 -0
- data/lib/nokogiri/xml/node.rb +1563 -0
- data/lib/nokogiri/xml/node_set.rb +446 -0
- data/lib/nokogiri/xml/notation.rb +19 -0
- data/lib/nokogiri/xml/parse_options.rb +213 -0
- data/lib/nokogiri/xml/pp/character_data.rb +21 -0
- data/lib/nokogiri/xml/pp/node.rb +57 -0
- data/lib/nokogiri/xml/pp.rb +4 -0
- data/lib/nokogiri/xml/processing_instruction.rb +11 -0
- data/lib/nokogiri/xml/reader.rb +105 -0
- data/lib/nokogiri/xml/relax_ng.rb +38 -0
- data/lib/nokogiri/xml/sax/document.rb +167 -0
- data/lib/nokogiri/xml/sax/parser.rb +125 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +21 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +61 -0
- data/lib/nokogiri/xml/sax.rb +6 -0
- data/lib/nokogiri/xml/schema.rb +73 -0
- data/lib/nokogiri/xml/searchable.rb +270 -0
- data/lib/nokogiri/xml/syntax_error.rb +72 -0
- data/lib/nokogiri/xml/text.rb +11 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +13 -0
- data/lib/nokogiri/xml/xpath.rb +21 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xml.rb +76 -0
- data/lib/nokogiri/xslt/stylesheet.rb +27 -0
- data/lib/nokogiri/xslt.rb +65 -0
- data/lib/nokogiri.rb +120 -0
- data/lib/xsd/xmlparser/nokogiri.rb +104 -0
- metadata +317 -0
@@ -0,0 +1,57 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
# :nodoc: all
|
6
|
+
module PP
|
7
|
+
module Node
|
8
|
+
COLLECTIONS = [:attribute_nodes, :children]
|
9
|
+
|
10
|
+
def inspect
|
11
|
+
attributes = inspect_attributes.reject do |x|
|
12
|
+
attribute = send(x)
|
13
|
+
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
14
|
+
rescue NoMethodError
|
15
|
+
true
|
16
|
+
end.map do |attribute|
|
17
|
+
"#{attribute.to_s.sub(/_\w+/, "s")}=#{send(attribute).inspect}"
|
18
|
+
end.join(" ")
|
19
|
+
"#<#{self.class.name}:#{format("0x%x", object_id)} #{attributes}>"
|
20
|
+
end
|
21
|
+
|
22
|
+
def pretty_print(pp)
|
23
|
+
nice_name = self.class.name.split("::").last
|
24
|
+
pp.group(2, "#(#{nice_name}:#{format("0x%x", object_id)} {", "})") do
|
25
|
+
pp.breakable
|
26
|
+
attrs = inspect_attributes.filter_map do |t|
|
27
|
+
[t, send(t)] if respond_to?(t)
|
28
|
+
end.find_all do |x|
|
29
|
+
if x.last
|
30
|
+
if COLLECTIONS.include?(x.first)
|
31
|
+
!x.last.empty?
|
32
|
+
else
|
33
|
+
true
|
34
|
+
end
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
pp.seplist(attrs) do |v|
|
39
|
+
if COLLECTIONS.include?(v.first)
|
40
|
+
pp.group(2, "#{v.first.to_s.sub(/_\w+$/, "s")} = [", "]") do
|
41
|
+
pp.breakable
|
42
|
+
pp.seplist(v.last) do |item|
|
43
|
+
pp.pp(item)
|
44
|
+
end
|
45
|
+
end
|
46
|
+
else
|
47
|
+
pp.text("#{v.first} = ")
|
48
|
+
pp.pp(v.last)
|
49
|
+
end
|
50
|
+
end
|
51
|
+
pp.breakable
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
57
|
+
end
|
@@ -0,0 +1,105 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
###
|
6
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
7
|
+
# would move. The Reader is given an XML document, and yields nodes
|
8
|
+
# to an each block.
|
9
|
+
#
|
10
|
+
# Here is an example of usage:
|
11
|
+
#
|
12
|
+
# reader = Nokogiri::XML::Reader(<<-eoxml)
|
13
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
14
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
15
|
+
# </x>
|
16
|
+
# eoxml
|
17
|
+
#
|
18
|
+
# reader.each do |node|
|
19
|
+
#
|
20
|
+
# # node is an instance of Nokogiri::XML::Reader
|
21
|
+
# puts node.name
|
22
|
+
#
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# Note that Nokogiri::XML::Reader#each can only be called once!! Once
|
26
|
+
# the cursor moves through the entire document, you must parse the
|
27
|
+
# document again. So make sure that you capture any information you
|
28
|
+
# need during the first iteration.
|
29
|
+
#
|
30
|
+
# The Reader parser is good for when you need the speed of a SAX parser,
|
31
|
+
# but do not want to write a Document handler.
|
32
|
+
class Reader
|
33
|
+
include Enumerable
|
34
|
+
|
35
|
+
TYPE_NONE = 0
|
36
|
+
# Element node type
|
37
|
+
TYPE_ELEMENT = 1
|
38
|
+
# Attribute node type
|
39
|
+
TYPE_ATTRIBUTE = 2
|
40
|
+
# Text node type
|
41
|
+
TYPE_TEXT = 3
|
42
|
+
# CDATA node type
|
43
|
+
TYPE_CDATA = 4
|
44
|
+
# Entity Reference node type
|
45
|
+
TYPE_ENTITY_REFERENCE = 5
|
46
|
+
# Entity node type
|
47
|
+
TYPE_ENTITY = 6
|
48
|
+
# PI node type
|
49
|
+
TYPE_PROCESSING_INSTRUCTION = 7
|
50
|
+
# Comment node type
|
51
|
+
TYPE_COMMENT = 8
|
52
|
+
# Document node type
|
53
|
+
TYPE_DOCUMENT = 9
|
54
|
+
# Document Type node type
|
55
|
+
TYPE_DOCUMENT_TYPE = 10
|
56
|
+
# Document Fragment node type
|
57
|
+
TYPE_DOCUMENT_FRAGMENT = 11
|
58
|
+
# Notation node type
|
59
|
+
TYPE_NOTATION = 12
|
60
|
+
# Whitespace node type
|
61
|
+
TYPE_WHITESPACE = 13
|
62
|
+
# Significant Whitespace node type
|
63
|
+
TYPE_SIGNIFICANT_WHITESPACE = 14
|
64
|
+
# Element end node type
|
65
|
+
TYPE_END_ELEMENT = 15
|
66
|
+
# Entity end node type
|
67
|
+
TYPE_END_ENTITY = 16
|
68
|
+
# XML Declaration node type
|
69
|
+
TYPE_XML_DECLARATION = 17
|
70
|
+
|
71
|
+
# A list of errors encountered while parsing
|
72
|
+
attr_accessor :errors
|
73
|
+
|
74
|
+
# The XML source
|
75
|
+
attr_reader :source
|
76
|
+
|
77
|
+
alias_method :self_closing?, :empty_element?
|
78
|
+
|
79
|
+
def initialize(source, url = nil, encoding = nil) # :nodoc:
|
80
|
+
@source = source
|
81
|
+
@errors = []
|
82
|
+
@encoding = encoding
|
83
|
+
end
|
84
|
+
private :initialize
|
85
|
+
|
86
|
+
# Get the attributes and namespaces of the current node as a Hash.
|
87
|
+
#
|
88
|
+
# This is the union of Reader#attribute_hash and Reader#namespaces
|
89
|
+
#
|
90
|
+
# [Returns]
|
91
|
+
# (Hash<String, String>) Attribute names and values, and namespace prefixes and hrefs.
|
92
|
+
def attributes
|
93
|
+
attribute_hash.merge(namespaces)
|
94
|
+
end
|
95
|
+
|
96
|
+
###
|
97
|
+
# Move the cursor through the document yielding the cursor to the block
|
98
|
+
def each
|
99
|
+
while (cursor = read)
|
100
|
+
yield cursor
|
101
|
+
end
|
102
|
+
end
|
103
|
+
end
|
104
|
+
end
|
105
|
+
end
|
@@ -0,0 +1,38 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class << self
|
6
|
+
###
|
7
|
+
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
8
|
+
# See Nokogiri::XML::RelaxNG for an example.
|
9
|
+
def RelaxNG(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
10
|
+
RelaxNG.new(string_or_io, options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
###
|
15
|
+
# Nokogiri::XML::RelaxNG is used for validating XML against a
|
16
|
+
# RelaxNG schema.
|
17
|
+
#
|
18
|
+
# == Synopsis
|
19
|
+
#
|
20
|
+
# Validate an XML document against a RelaxNG schema. Loop over the errors
|
21
|
+
# that are returned and print them out:
|
22
|
+
#
|
23
|
+
# schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
|
24
|
+
# doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
|
25
|
+
#
|
26
|
+
# schema.validate(doc).each do |error|
|
27
|
+
# puts error.message
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
31
|
+
#
|
32
|
+
# NOTE: RelaxNG input is always treated as TRUSTED documents, meaning that they will cause the
|
33
|
+
# underlying parsing libraries to access network resources. This is counter to Nokogiri's
|
34
|
+
# "untrusted by default" security policy, but is a limitation of the underlying libraries.
|
35
|
+
class RelaxNG < Nokogiri::XML::Schema
|
36
|
+
end
|
37
|
+
end
|
38
|
+
end
|
@@ -0,0 +1,167 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
###
|
6
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different event based parsers when
|
7
|
+
# dealing with XML. If you want to do SAX style parsing using HTML, check out
|
8
|
+
# Nokogiri::HTML4::SAX.
|
9
|
+
#
|
10
|
+
# The basic way a SAX style parser works is by creating a parser, telling the parser about the
|
11
|
+
# events we're interested in, then giving the parser some XML to process. The parser will notify
|
12
|
+
# you when it encounters events you said you would like to know about.
|
13
|
+
#
|
14
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document, and implement the
|
15
|
+
# methods for which you would like notification.
|
16
|
+
#
|
17
|
+
# For example, if I want to be notified when a document ends, and when an element starts, I
|
18
|
+
# would write a class like this:
|
19
|
+
#
|
20
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
21
|
+
# def end_document
|
22
|
+
# puts "the document has ended"
|
23
|
+
# end
|
24
|
+
#
|
25
|
+
# def start_element name, attributes = []
|
26
|
+
# puts "#{name} started"
|
27
|
+
# end
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# Then I would instantiate a SAX parser with this document, and feed the parser some XML
|
31
|
+
#
|
32
|
+
# # Create a new parser
|
33
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
34
|
+
#
|
35
|
+
# # Feed the parser some XML
|
36
|
+
# parser.parse(File.open(ARGV[0]))
|
37
|
+
#
|
38
|
+
# Now my document handler will be called when each node starts, and when then document ends. To
|
39
|
+
# see what kinds of events are available, take a look at Nokogiri::XML::SAX::Document.
|
40
|
+
#
|
41
|
+
# Two SAX parsers for XML are available, a parser that reads from a string or IO object as it
|
42
|
+
# feels necessary, and a parser that lets you spoon feed it XML. If you want to let Nokogiri
|
43
|
+
# deal with reading your XML, use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
44
|
+
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
45
|
+
module SAX
|
46
|
+
###
|
47
|
+
# This class is used for registering types of events you are interested in handling. All of
|
48
|
+
# the methods on this class are available as possible events while parsing an XML document. To
|
49
|
+
# register for any particular event, just subclass this class and implement the methods you
|
50
|
+
# are interested in knowing about.
|
51
|
+
#
|
52
|
+
# To only be notified about start and end element events, write a class like this:
|
53
|
+
#
|
54
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
55
|
+
# def start_element name, attrs = []
|
56
|
+
# puts "#{name} started!"
|
57
|
+
# end
|
58
|
+
#
|
59
|
+
# def end_element name
|
60
|
+
# puts "#{name} ended"
|
61
|
+
# end
|
62
|
+
# end
|
63
|
+
#
|
64
|
+
# You can use this event handler for any SAX style parser included with Nokogiri. See
|
65
|
+
# Nokogiri::XML::SAX, and Nokogiri::HTML4::SAX.
|
66
|
+
class Document
|
67
|
+
###
|
68
|
+
# Called when an XML declaration is parsed
|
69
|
+
def xmldecl(version, encoding, standalone)
|
70
|
+
end
|
71
|
+
|
72
|
+
###
|
73
|
+
# Called when document starts parsing
|
74
|
+
def start_document
|
75
|
+
end
|
76
|
+
|
77
|
+
###
|
78
|
+
# Called when document ends parsing
|
79
|
+
def end_document
|
80
|
+
end
|
81
|
+
|
82
|
+
###
|
83
|
+
# Called at the beginning of an element
|
84
|
+
# * +name+ is the name of the tag
|
85
|
+
# * +attrs+ are an assoc list of namespaces and attributes, e.g.:
|
86
|
+
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
87
|
+
def start_element(name, attrs = [])
|
88
|
+
end
|
89
|
+
|
90
|
+
###
|
91
|
+
# Called at the end of an element
|
92
|
+
# +name+ is the tag name
|
93
|
+
def end_element(name)
|
94
|
+
end
|
95
|
+
|
96
|
+
###
|
97
|
+
# Called at the beginning of an element
|
98
|
+
# +name+ is the element name
|
99
|
+
# +attrs+ is a list of attributes
|
100
|
+
# +prefix+ is the namespace prefix for the element
|
101
|
+
# +uri+ is the associated namespace URI
|
102
|
+
# +ns+ is a hash of namespace prefix:urls associated with the element
|
103
|
+
def start_element_namespace(name, attrs = [], prefix = nil, uri = nil, ns = [])
|
104
|
+
###
|
105
|
+
# Deal with SAX v1 interface
|
106
|
+
name = [prefix, name].compact.join(":")
|
107
|
+
attributes = ns.map do |ns_prefix, ns_uri|
|
108
|
+
[["xmlns", ns_prefix].compact.join(":"), ns_uri]
|
109
|
+
end + attrs.map do |attr|
|
110
|
+
[[attr.prefix, attr.localname].compact.join(":"), attr.value]
|
111
|
+
end
|
112
|
+
start_element(name, attributes)
|
113
|
+
end
|
114
|
+
|
115
|
+
###
|
116
|
+
# Called at the end of an element
|
117
|
+
# +name+ is the element's name
|
118
|
+
# +prefix+ is the namespace prefix associated with the element
|
119
|
+
# +uri+ is the associated namespace URI
|
120
|
+
def end_element_namespace(name, prefix = nil, uri = nil)
|
121
|
+
###
|
122
|
+
# Deal with SAX v1 interface
|
123
|
+
end_element([prefix, name].compact.join(":"))
|
124
|
+
end
|
125
|
+
|
126
|
+
###
|
127
|
+
# Characters read between a tag. This method might be called multiple
|
128
|
+
# times given one contiguous string of characters.
|
129
|
+
#
|
130
|
+
# +string+ contains the character data
|
131
|
+
def characters(string)
|
132
|
+
end
|
133
|
+
|
134
|
+
###
|
135
|
+
# Called when comments are encountered
|
136
|
+
# +string+ contains the comment data
|
137
|
+
def comment(string)
|
138
|
+
end
|
139
|
+
|
140
|
+
###
|
141
|
+
# Called on document warnings
|
142
|
+
# +string+ contains the warning
|
143
|
+
def warning(string)
|
144
|
+
end
|
145
|
+
|
146
|
+
###
|
147
|
+
# Called on document errors
|
148
|
+
# +string+ contains the error
|
149
|
+
def error(string)
|
150
|
+
end
|
151
|
+
|
152
|
+
###
|
153
|
+
# Called when cdata blocks are found
|
154
|
+
# +string+ contains the cdata content
|
155
|
+
def cdata_block(string)
|
156
|
+
end
|
157
|
+
|
158
|
+
###
|
159
|
+
# Called when processing instructions are found
|
160
|
+
# +name+ is the target of the instruction
|
161
|
+
# +content+ is the value of the instruction
|
162
|
+
def processing_instruction(name, content)
|
163
|
+
end
|
164
|
+
end
|
165
|
+
end
|
166
|
+
end
|
167
|
+
end
|
@@ -0,0 +1,125 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# This parser is a SAX style parser that reads it's input as it
|
8
|
+
# deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
|
9
|
+
# an optional encoding, then given an XML input, sends messages to
|
10
|
+
# the Nokogiri::XML::SAX::Document.
|
11
|
+
#
|
12
|
+
# Here is an example of using this parser:
|
13
|
+
#
|
14
|
+
# # Create a subclass of Nokogiri::XML::SAX::Document and implement
|
15
|
+
# # the events we care about:
|
16
|
+
# class MyDoc < Nokogiri::XML::SAX::Document
|
17
|
+
# def start_element name, attrs = []
|
18
|
+
# puts "starting: #{name}"
|
19
|
+
# end
|
20
|
+
#
|
21
|
+
# def end_element name
|
22
|
+
# puts "ending: #{name}"
|
23
|
+
# end
|
24
|
+
# end
|
25
|
+
#
|
26
|
+
# # Create our parser
|
27
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
|
28
|
+
#
|
29
|
+
# # Send some XML to the parser
|
30
|
+
# parser.parse(File.open(ARGV[0]))
|
31
|
+
#
|
32
|
+
# For more information about SAX parsers, see Nokogiri::XML::SAX. Also
|
33
|
+
# see Nokogiri::XML::SAX::Document for the available events.
|
34
|
+
class Parser
|
35
|
+
class Attribute < Struct.new(:localname, :prefix, :uri, :value)
|
36
|
+
end
|
37
|
+
|
38
|
+
# Encodinds this parser supports
|
39
|
+
ENCODINGS = {
|
40
|
+
"NONE" => 0, # No char encoding detected
|
41
|
+
"UTF-8" => 1, # UTF-8
|
42
|
+
"UTF16LE" => 2, # UTF-16 little endian
|
43
|
+
"UTF16BE" => 3, # UTF-16 big endian
|
44
|
+
"UCS4LE" => 4, # UCS-4 little endian
|
45
|
+
"UCS4BE" => 5, # UCS-4 big endian
|
46
|
+
"EBCDIC" => 6, # EBCDIC uh!
|
47
|
+
"UCS4-2143" => 7, # UCS-4 unusual ordering
|
48
|
+
"UCS4-3412" => 8, # UCS-4 unusual ordering
|
49
|
+
"UCS2" => 9, # UCS-2
|
50
|
+
"ISO-8859-1" => 10, # ISO-8859-1 ISO Latin 1
|
51
|
+
"ISO-8859-2" => 11, # ISO-8859-2 ISO Latin 2
|
52
|
+
"ISO-8859-3" => 12, # ISO-8859-3
|
53
|
+
"ISO-8859-4" => 13, # ISO-8859-4
|
54
|
+
"ISO-8859-5" => 14, # ISO-8859-5
|
55
|
+
"ISO-8859-6" => 15, # ISO-8859-6
|
56
|
+
"ISO-8859-7" => 16, # ISO-8859-7
|
57
|
+
"ISO-8859-8" => 17, # ISO-8859-8
|
58
|
+
"ISO-8859-9" => 18, # ISO-8859-9
|
59
|
+
"ISO-2022-JP" => 19, # ISO-2022-JP
|
60
|
+
"SHIFT-JIS" => 20, # Shift_JIS
|
61
|
+
"EUC-JP" => 21, # EUC-JP
|
62
|
+
"ASCII" => 22, # pure ASCII
|
63
|
+
}
|
64
|
+
|
65
|
+
# The Nokogiri::XML::SAX::Document where events will be sent.
|
66
|
+
attr_accessor :document
|
67
|
+
|
68
|
+
# The encoding beings used for this document.
|
69
|
+
attr_accessor :encoding
|
70
|
+
|
71
|
+
# Create a new Parser with +doc+ and +encoding+
|
72
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = "UTF-8")
|
73
|
+
@encoding = check_encoding(encoding)
|
74
|
+
@document = doc
|
75
|
+
@warned = false
|
76
|
+
end
|
77
|
+
|
78
|
+
###
|
79
|
+
# Parse given +thing+ which may be a string containing xml, or an
|
80
|
+
# IO object.
|
81
|
+
def parse(thing, &block)
|
82
|
+
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
83
|
+
parse_io(thing, &block)
|
84
|
+
else
|
85
|
+
parse_memory(thing, &block)
|
86
|
+
end
|
87
|
+
end
|
88
|
+
|
89
|
+
###
|
90
|
+
# Parse given +io+
|
91
|
+
def parse_io(io, encoding = @encoding)
|
92
|
+
ctx = ParserContext.io(io, ENCODINGS[check_encoding(encoding)])
|
93
|
+
yield ctx if block_given?
|
94
|
+
ctx.parse_with(self)
|
95
|
+
end
|
96
|
+
|
97
|
+
###
|
98
|
+
# Parse a file with +filename+
|
99
|
+
def parse_file(filename)
|
100
|
+
raise ArgumentError unless filename
|
101
|
+
raise Errno::ENOENT unless File.exist?(filename)
|
102
|
+
raise Errno::EISDIR if File.directory?(filename)
|
103
|
+
|
104
|
+
ctx = ParserContext.file(filename)
|
105
|
+
yield ctx if block_given?
|
106
|
+
ctx.parse_with(self)
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_memory(data)
|
110
|
+
ctx = ParserContext.memory(data)
|
111
|
+
yield ctx if block_given?
|
112
|
+
ctx.parse_with(self)
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
|
117
|
+
def check_encoding(encoding)
|
118
|
+
encoding.upcase.tap do |enc|
|
119
|
+
raise ArgumentError, "'#{enc}' is not a valid encoding" unless ENCODINGS[enc]
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|
124
|
+
end
|
125
|
+
end
|
@@ -0,0 +1,21 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# Context for XML SAX parsers. This class is usually not instantiated
|
8
|
+
# by the user. Instead, you should be looking at
|
9
|
+
# Nokogiri::XML::SAX::Parser
|
10
|
+
class ParserContext
|
11
|
+
def self.new(thing, encoding = "UTF-8")
|
12
|
+
if [:read, :close].all? { |x| thing.respond_to?(x) }
|
13
|
+
io(thing, Parser::ENCODINGS[encoding])
|
14
|
+
else
|
15
|
+
memory(thing)
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
19
|
+
end
|
20
|
+
end
|
21
|
+
end
|
@@ -0,0 +1,61 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
module SAX
|
6
|
+
###
|
7
|
+
# PushParser can parse a document that is fed to it manually. It
|
8
|
+
# must be given a SAX::Document object which will be called with
|
9
|
+
# SAX events as the document is being parsed.
|
10
|
+
#
|
11
|
+
# Calling PushParser#<< writes XML to the parser, calling any SAX
|
12
|
+
# callbacks it can.
|
13
|
+
#
|
14
|
+
# PushParser#finish tells the parser that the document is finished
|
15
|
+
# and calls the end_document SAX method.
|
16
|
+
#
|
17
|
+
# Example:
|
18
|
+
#
|
19
|
+
# parser = PushParser.new(Class.new(XML::SAX::Document) {
|
20
|
+
# def start_document
|
21
|
+
# puts "start document called"
|
22
|
+
# end
|
23
|
+
# }.new)
|
24
|
+
# parser << "<div>hello<"
|
25
|
+
# parser << "/div>"
|
26
|
+
# parser.finish
|
27
|
+
class PushParser
|
28
|
+
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
29
|
+
# operating
|
30
|
+
attr_accessor :document
|
31
|
+
|
32
|
+
###
|
33
|
+
# Create a new PushParser with +doc+ as the SAX Document, providing
|
34
|
+
# an optional +file_name+ and +encoding+
|
35
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = "UTF-8")
|
36
|
+
@document = doc
|
37
|
+
@encoding = encoding
|
38
|
+
@sax_parser = XML::SAX::Parser.new(doc)
|
39
|
+
|
40
|
+
## Create our push parser context
|
41
|
+
initialize_native(@sax_parser, file_name)
|
42
|
+
end
|
43
|
+
|
44
|
+
###
|
45
|
+
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
46
|
+
# that can be called will be called immediately.
|
47
|
+
def write(chunk, last_chunk = false)
|
48
|
+
native_write(chunk, last_chunk)
|
49
|
+
end
|
50
|
+
alias_method :<<, :write
|
51
|
+
|
52
|
+
###
|
53
|
+
# Finish the parsing. This method is only necessary for
|
54
|
+
# Nokogiri::XML::SAX::Document#end_document to be called.
|
55
|
+
def finish
|
56
|
+
write("", true)
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
61
|
+
end
|
@@ -0,0 +1,73 @@
|
|
1
|
+
# frozen_string_literal: true
|
2
|
+
|
3
|
+
module Nokogiri
|
4
|
+
module XML
|
5
|
+
class << self
|
6
|
+
###
|
7
|
+
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
8
|
+
# object.
|
9
|
+
def Schema(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
10
|
+
Schema.new(string_or_io, options)
|
11
|
+
end
|
12
|
+
end
|
13
|
+
|
14
|
+
###
|
15
|
+
# Nokogiri::XML::Schema is used for validating XML against a schema
|
16
|
+
# (usually from an xsd file).
|
17
|
+
#
|
18
|
+
# == Synopsis
|
19
|
+
#
|
20
|
+
# Validate an XML document against a Schema. Loop over the errors that
|
21
|
+
# are returned and print them out:
|
22
|
+
#
|
23
|
+
# xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
|
24
|
+
# doc = Nokogiri::XML(File.read(PO_XML_FILE))
|
25
|
+
#
|
26
|
+
# xsd.validate(doc).each do |error|
|
27
|
+
# puts error.message
|
28
|
+
# end
|
29
|
+
#
|
30
|
+
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
31
|
+
#
|
32
|
+
# NOTE: As of v1.11.0, Schema treats inputs as UNTRUSTED by default, and so external entities
|
33
|
+
# are not resolved from the network (`http://` or `ftp://`). Previously, parsing treated
|
34
|
+
# documents as "trusted" by default which was counter to Nokogiri's "untrusted by default"
|
35
|
+
# security policy. If a document is trusted, then the caller may turn off the NONET option via
|
36
|
+
# the ParseOptions to re-enable external entity resolution over a network connection.
|
37
|
+
class Schema
|
38
|
+
# Errors while parsing the schema file
|
39
|
+
attr_accessor :errors
|
40
|
+
# The Nokogiri::XML::ParseOptions used to parse the schema
|
41
|
+
attr_accessor :parse_options
|
42
|
+
|
43
|
+
###
|
44
|
+
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
45
|
+
# object.
|
46
|
+
def self.new(string_or_io, options = ParseOptions::DEFAULT_SCHEMA)
|
47
|
+
from_document(Nokogiri::XML(string_or_io), options)
|
48
|
+
end
|
49
|
+
|
50
|
+
###
|
51
|
+
# Validate +thing+ against this schema. +thing+ can be a
|
52
|
+
# Nokogiri::XML::Document object, or a filename. An Array of
|
53
|
+
# Nokogiri::XML::SyntaxError objects found while validating the
|
54
|
+
# +thing+ is returned.
|
55
|
+
def validate(thing)
|
56
|
+
if thing.is_a?(Nokogiri::XML::Document)
|
57
|
+
validate_document(thing)
|
58
|
+
elsif File.file?(thing)
|
59
|
+
validate_file(thing)
|
60
|
+
else
|
61
|
+
raise ArgumentError, "Must provide Nokogiri::Xml::Document or the name of an existing file"
|
62
|
+
end
|
63
|
+
end
|
64
|
+
|
65
|
+
###
|
66
|
+
# Returns true if +thing+ is a valid Nokogiri::XML::Document or
|
67
|
+
# file.
|
68
|
+
def valid?(thing)
|
69
|
+
validate(thing).empty?
|
70
|
+
end
|
71
|
+
end
|
72
|
+
end
|
73
|
+
end
|