nokogiri 1.6.2.rc1-x64-mingw32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- checksums.yaml +7 -0
- data/.autotest +26 -0
- data/.editorconfig +17 -0
- data/.gemtest +0 -0
- data/.travis.yml +25 -0
- data/CHANGELOG.ja.rdoc +857 -0
- data/CHANGELOG.rdoc +880 -0
- data/C_CODING_STYLE.rdoc +33 -0
- data/Gemfile +21 -0
- data/Manifest.txt +371 -0
- data/README.ja.rdoc +112 -0
- data/README.rdoc +180 -0
- data/ROADMAP.md +89 -0
- data/Rakefile +351 -0
- data/STANDARD_RESPONSES.md +47 -0
- data/Y_U_NO_GEMSPEC.md +155 -0
- data/bin/nokogiri +78 -0
- data/build_all +130 -0
- data/dependencies.yml +4 -0
- data/ext/nokogiri/depend +358 -0
- data/ext/nokogiri/extconf.rb +453 -0
- data/ext/nokogiri/html_document.c +170 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +279 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +32 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser_context.c +116 -0
- data/ext/nokogiri/html_sax_parser_context.h +11 -0
- data/ext/nokogiri/html_sax_push_parser.c +87 -0
- data/ext/nokogiri/html_sax_push_parser.h +9 -0
- data/ext/nokogiri/nokogiri.c +148 -0
- data/ext/nokogiri/nokogiri.h +164 -0
- data/ext/nokogiri/xml_attr.c +94 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_attribute_decl.c +70 -0
- data/ext/nokogiri/xml_attribute_decl.h +9 -0
- data/ext/nokogiri/xml_cdata.c +56 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +54 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +577 -0
- data/ext/nokogiri/xml_document.h +23 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +202 -0
- data/ext/nokogiri/xml_dtd.h +10 -0
- data/ext/nokogiri/xml_element_content.c +123 -0
- data/ext/nokogiri/xml_element_content.h +10 -0
- data/ext/nokogiri/xml_element_decl.c +69 -0
- data/ext/nokogiri/xml_element_decl.h +9 -0
- data/ext/nokogiri/xml_encoding_handler.c +79 -0
- data/ext/nokogiri/xml_encoding_handler.h +8 -0
- data/ext/nokogiri/xml_entity_decl.c +110 -0
- data/ext/nokogiri/xml_entity_decl.h +10 -0
- data/ext/nokogiri/xml_entity_reference.c +52 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +56 -0
- data/ext/nokogiri/xml_io.h +11 -0
- data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
- data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
- data/ext/nokogiri/xml_namespace.c +78 -0
- data/ext/nokogiri/xml_namespace.h +13 -0
- data/ext/nokogiri/xml_node.c +1541 -0
- data/ext/nokogiri/xml_node.h +13 -0
- data/ext/nokogiri/xml_node_set.c +467 -0
- data/ext/nokogiri/xml_node_set.h +14 -0
- data/ext/nokogiri/xml_processing_instruction.c +56 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +681 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +161 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +312 -0
- data/ext/nokogiri/xml_sax_parser.h +39 -0
- data/ext/nokogiri/xml_sax_parser_context.c +262 -0
- data/ext/nokogiri/xml_sax_parser_context.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +115 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +205 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +63 -0
- data/ext/nokogiri/xml_syntax_error.h +13 -0
- data/ext/nokogiri/xml_text.c +52 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath_context.c +307 -0
- data/ext/nokogiri/xml_xpath_context.h +10 -0
- data/ext/nokogiri/xslt_stylesheet.c +270 -0
- data/ext/nokogiri/xslt_stylesheet.h +14 -0
- data/lib/nokogiri.rb +137 -0
- data/lib/nokogiri/2.0/nokogiri.so +0 -0
- data/lib/nokogiri/2.1/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +27 -0
- data/lib/nokogiri/css/node.rb +52 -0
- data/lib/nokogiri/css/parser.rb +715 -0
- data/lib/nokogiri/css/parser.y +249 -0
- data/lib/nokogiri/css/parser_extras.rb +91 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +152 -0
- data/lib/nokogiri/css/tokenizer.rex +55 -0
- data/lib/nokogiri/css/xpath_visitor.rb +219 -0
- data/lib/nokogiri/decorators/slop.rb +35 -0
- data/lib/nokogiri/html.rb +37 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +333 -0
- data/lib/nokogiri/html/document_fragment.rb +41 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/element_description_defaults.rb +671 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +52 -0
- data/lib/nokogiri/html/sax/parser_context.rb +16 -0
- data/lib/nokogiri/html/sax/push_parser.rb +16 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +106 -0
- data/lib/nokogiri/xml.rb +73 -0
- data/lib/nokogiri/xml/attr.rb +14 -0
- data/lib/nokogiri/xml/attribute_decl.rb +18 -0
- data/lib/nokogiri/xml/builder.rb +443 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/character_data.rb +7 -0
- data/lib/nokogiri/xml/document.rb +279 -0
- data/lib/nokogiri/xml/document_fragment.rb +112 -0
- data/lib/nokogiri/xml/dtd.rb +32 -0
- data/lib/nokogiri/xml/element_content.rb +36 -0
- data/lib/nokogiri/xml/element_decl.rb +13 -0
- data/lib/nokogiri/xml/entity_decl.rb +19 -0
- data/lib/nokogiri/xml/namespace.rb +13 -0
- data/lib/nokogiri/xml/node.rb +982 -0
- data/lib/nokogiri/xml/node/save_options.rb +61 -0
- data/lib/nokogiri/xml/node_set.rb +355 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +98 -0
- data/lib/nokogiri/xml/pp.rb +2 -0
- data/lib/nokogiri/xml/pp/character_data.rb +18 -0
- data/lib/nokogiri/xml/pp/node.rb +56 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +112 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +4 -0
- data/lib/nokogiri/xml/sax/document.rb +171 -0
- data/lib/nokogiri/xml/sax/parser.rb +123 -0
- data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +63 -0
- data/lib/nokogiri/xml/syntax_error.rb +47 -0
- data/lib/nokogiri/xml/text.rb +9 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +56 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +102 -0
- data/suppressions/README.txt +1 -0
- data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
- data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
- data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
- data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
- data/tasks/nokogiri.org.rb +24 -0
- data/tasks/test.rb +95 -0
- data/test/css/test_nthiness.rb +222 -0
- data/test/css/test_parser.rb +358 -0
- data/test/css/test_tokenizer.rb +198 -0
- data/test/css/test_xpath_visitor.rb +96 -0
- data/test/decorators/test_slop.rb +16 -0
- data/test/files/2ch.html +108 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/atom.xml +344 -0
- data/test/files/bar/bar.xsd +4 -0
- data/test/files/bogus.xml +0 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/encoding.html +82 -0
- data/test/files/encoding.xhtml +84 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/foo/foo.xsd +4 -0
- data/test/files/metacharset.html +10 -0
- data/test/files/noencoding.html +47 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/saml/saml20assertion_schema.xsd +283 -0
- data/test/files/saml/saml20protocol_schema.xsd +302 -0
- data/test/files/saml/xenc_schema.xsd +146 -0
- data/test/files/saml/xmldsig_schema.xsd +318 -0
- data/test/files/shift_jis.html +10 -0
- data/test/files/shift_jis.xml +5 -0
- data/test/files/shift_jis_no_charset.html +9 -0
- data/test/files/snuggles.xml +3 -0
- data/test/files/staff.dtd +10 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/test_document_url/bar.xml +2 -0
- data/test/files/test_document_url/document.dtd +4 -0
- data/test/files/test_document_url/document.xml +6 -0
- data/test/files/tlm.html +850 -0
- data/test/files/to_be_xincluded.xml +2 -0
- data/test/files/valid_bar.xml +2 -0
- data/test/files/xinclude.xml +4 -0
- data/test/helper.rb +164 -0
- data/test/html/sax/test_parser.rb +141 -0
- data/test/html/sax/test_parser_context.rb +46 -0
- data/test/html/test_builder.rb +164 -0
- data/test/html/test_document.rb +619 -0
- data/test/html/test_document_encoding.rb +148 -0
- data/test/html/test_document_fragment.rb +261 -0
- data/test/html/test_element_description.rb +105 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +196 -0
- data/test/html/test_node_encoding.rb +27 -0
- data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
- data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
- data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
- data/test/namespaces/test_namespaces_in_parsed_doc.rb +66 -0
- data/test/test_convert_xpath.rb +135 -0
- data/test/test_css_cache.rb +45 -0
- data/test/test_encoding_handler.rb +46 -0
- data/test/test_memory_leak.rb +156 -0
- data/test/test_nokogiri.rb +138 -0
- data/test/test_reader.rb +558 -0
- data/test/test_soap4r_sax.rb +52 -0
- data/test/test_xslt_transforms.rb +279 -0
- data/test/xml/node/test_save_options.rb +28 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +382 -0
- data/test/xml/sax/test_parser_context.rb +115 -0
- data/test/xml/sax/test_push_parser.rb +157 -0
- data/test/xml/test_attr.rb +64 -0
- data/test/xml/test_attribute_decl.rb +86 -0
- data/test/xml/test_builder.rb +315 -0
- data/test/xml/test_c14n.rb +161 -0
- data/test/xml/test_cdata.rb +48 -0
- data/test/xml/test_comment.rb +29 -0
- data/test/xml/test_document.rb +934 -0
- data/test/xml/test_document_encoding.rb +28 -0
- data/test/xml/test_document_fragment.rb +228 -0
- data/test/xml/test_dtd.rb +187 -0
- data/test/xml/test_dtd_encoding.rb +33 -0
- data/test/xml/test_element_content.rb +56 -0
- data/test/xml/test_element_decl.rb +73 -0
- data/test/xml/test_entity_decl.rb +122 -0
- data/test/xml/test_entity_reference.rb +245 -0
- data/test/xml/test_namespace.rb +95 -0
- data/test/xml/test_node.rb +1155 -0
- data/test/xml/test_node_attributes.rb +113 -0
- data/test/xml/test_node_encoding.rb +107 -0
- data/test/xml/test_node_inheritance.rb +32 -0
- data/test/xml/test_node_reparenting.rb +374 -0
- data/test/xml/test_node_set.rb +755 -0
- data/test/xml/test_parse_options.rb +64 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +142 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +129 -0
- data/test/xml/test_syntax_error.rb +12 -0
- data/test/xml/test_text.rb +45 -0
- data/test/xml/test_unparented_node.rb +422 -0
- data/test/xml/test_xinclude.rb +83 -0
- data/test/xml/test_xpath.rb +376 -0
- data/test/xslt/test_custom_functions.rb +133 -0
- data/test/xslt/test_exception_handling.rb +37 -0
- data/test_all +81 -0
- metadata +601 -0
@@ -0,0 +1,18 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
module PP
|
4
|
+
module CharacterData
|
5
|
+
def pretty_print pp # :nodoc:
|
6
|
+
nice_name = self.class.name.split('::').last
|
7
|
+
pp.group(2, "#(#{nice_name} ", ')') do
|
8
|
+
pp.pp text
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
def inspect # :nodoc:
|
13
|
+
"#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
|
14
|
+
end
|
15
|
+
end
|
16
|
+
end
|
17
|
+
end
|
18
|
+
end
|
@@ -0,0 +1,56 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
module PP
|
4
|
+
module Node
|
5
|
+
def inspect # :nodoc:
|
6
|
+
attributes = inspect_attributes.reject { |x|
|
7
|
+
begin
|
8
|
+
attribute = send x
|
9
|
+
!attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
|
10
|
+
rescue NoMethodError
|
11
|
+
true
|
12
|
+
end
|
13
|
+
}.map { |attribute|
|
14
|
+
"#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
|
15
|
+
}.join ' '
|
16
|
+
"#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
|
17
|
+
end
|
18
|
+
|
19
|
+
def pretty_print pp # :nodoc:
|
20
|
+
nice_name = self.class.name.split('::').last
|
21
|
+
pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
|
22
|
+
|
23
|
+
pp.breakable
|
24
|
+
attrs = inspect_attributes.map { |t|
|
25
|
+
[t, send(t)] if respond_to?(t)
|
26
|
+
}.compact.find_all { |x|
|
27
|
+
if x.last
|
28
|
+
if [:attribute_nodes, :children].include? x.first
|
29
|
+
!x.last.empty?
|
30
|
+
else
|
31
|
+
true
|
32
|
+
end
|
33
|
+
end
|
34
|
+
}
|
35
|
+
|
36
|
+
pp.seplist(attrs) do |v|
|
37
|
+
if [:attribute_nodes, :children].include? v.first
|
38
|
+
pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
|
39
|
+
pp.breakable
|
40
|
+
pp.seplist(v.last) do |item|
|
41
|
+
pp.pp item
|
42
|
+
end
|
43
|
+
end
|
44
|
+
else
|
45
|
+
pp.text "#{v.first} = "
|
46
|
+
pp.pp v.last
|
47
|
+
end
|
48
|
+
end
|
49
|
+
pp.breakable
|
50
|
+
|
51
|
+
end
|
52
|
+
end
|
53
|
+
end
|
54
|
+
end
|
55
|
+
end
|
56
|
+
end
|
@@ -0,0 +1,112 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
5
|
+
# would move. The Reader is given an XML document, and yields nodes
|
6
|
+
# to an each block.
|
7
|
+
#
|
8
|
+
# Here is an example of usage:
|
9
|
+
#
|
10
|
+
# reader = Nokogiri::XML::Reader(<<-eoxml)
|
11
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
12
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
13
|
+
# </x>
|
14
|
+
# eoxml
|
15
|
+
#
|
16
|
+
# reader.each do |node|
|
17
|
+
#
|
18
|
+
# # node is an instance of Nokogiri::XML::Reader
|
19
|
+
# puts node.name
|
20
|
+
#
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# Note that Nokogiri::XML::Reader#each can only be called once!! Once
|
24
|
+
# the cursor moves through the entire document, you must parse the
|
25
|
+
# document again. So make sure that you capture any information you
|
26
|
+
# need during the first iteration.
|
27
|
+
#
|
28
|
+
# The Reader parser is good for when you need the speed of a SAX parser,
|
29
|
+
# but do not want to write a Document handler.
|
30
|
+
class Reader
|
31
|
+
include Enumerable
|
32
|
+
|
33
|
+
TYPE_NONE = 0
|
34
|
+
# Element node type
|
35
|
+
TYPE_ELEMENT = 1
|
36
|
+
# Attribute node type
|
37
|
+
TYPE_ATTRIBUTE = 2
|
38
|
+
# Text node type
|
39
|
+
TYPE_TEXT = 3
|
40
|
+
# CDATA node type
|
41
|
+
TYPE_CDATA = 4
|
42
|
+
# Entity Reference node type
|
43
|
+
TYPE_ENTITY_REFERENCE = 5
|
44
|
+
# Entity node type
|
45
|
+
TYPE_ENTITY = 6
|
46
|
+
# PI node type
|
47
|
+
TYPE_PROCESSING_INSTRUCTION = 7
|
48
|
+
# Comment node type
|
49
|
+
TYPE_COMMENT = 8
|
50
|
+
# Document node type
|
51
|
+
TYPE_DOCUMENT = 9
|
52
|
+
# Document Type node type
|
53
|
+
TYPE_DOCUMENT_TYPE = 10
|
54
|
+
# Document Fragment node type
|
55
|
+
TYPE_DOCUMENT_FRAGMENT = 11
|
56
|
+
# Notation node type
|
57
|
+
TYPE_NOTATION = 12
|
58
|
+
# Whitespace node type
|
59
|
+
TYPE_WHITESPACE = 13
|
60
|
+
# Significant Whitespace node type
|
61
|
+
TYPE_SIGNIFICANT_WHITESPACE = 14
|
62
|
+
# Element end node type
|
63
|
+
TYPE_END_ELEMENT = 15
|
64
|
+
# Entity end node type
|
65
|
+
TYPE_END_ENTITY = 16
|
66
|
+
# XML Declaration node type
|
67
|
+
TYPE_XML_DECLARATION = 17
|
68
|
+
|
69
|
+
# A list of errors encountered while parsing
|
70
|
+
attr_accessor :errors
|
71
|
+
|
72
|
+
# The encoding for the document
|
73
|
+
attr_reader :encoding
|
74
|
+
|
75
|
+
# The XML source
|
76
|
+
attr_reader :source
|
77
|
+
|
78
|
+
alias :self_closing? :empty_element?
|
79
|
+
|
80
|
+
def initialize source, url = nil, encoding = nil # :nodoc:
|
81
|
+
@source = source
|
82
|
+
@errors = []
|
83
|
+
@encoding = encoding
|
84
|
+
end
|
85
|
+
private :initialize
|
86
|
+
|
87
|
+
###
|
88
|
+
# Get a list of attributes for the current node.
|
89
|
+
def attributes
|
90
|
+
Hash[attribute_nodes.map { |node|
|
91
|
+
[node.name, node.to_s]
|
92
|
+
}].merge(namespaces || {})
|
93
|
+
end
|
94
|
+
|
95
|
+
###
|
96
|
+
# Get a list of attributes for the current node
|
97
|
+
def attribute_nodes
|
98
|
+
nodes = attr_nodes
|
99
|
+
nodes.each { |v| v.instance_variable_set(:@_r, self) }
|
100
|
+
nodes
|
101
|
+
end
|
102
|
+
|
103
|
+
###
|
104
|
+
# Move the cursor through the document yielding the cursor to the block
|
105
|
+
def each
|
106
|
+
while cursor = self.read
|
107
|
+
yield cursor
|
108
|
+
end
|
109
|
+
end
|
110
|
+
end
|
111
|
+
end
|
112
|
+
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class << self
|
4
|
+
###
|
5
|
+
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
6
|
+
# See Nokogiri::XML::RelaxNG for an example.
|
7
|
+
def RelaxNG string_or_io
|
8
|
+
RelaxNG.new(string_or_io)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
###
|
13
|
+
# Nokogiri::XML::RelaxNG is used for validating XML against a
|
14
|
+
# RelaxNG schema.
|
15
|
+
#
|
16
|
+
# == Synopsis
|
17
|
+
#
|
18
|
+
# Validate an XML document against a RelaxNG schema. Loop over the errors
|
19
|
+
# that are returned and print them out:
|
20
|
+
#
|
21
|
+
# schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
|
22
|
+
# doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
|
23
|
+
#
|
24
|
+
# schema.validate(doc).each do |error|
|
25
|
+
# puts error.message
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
29
|
+
class RelaxNG < Nokogiri::XML::Schema
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,171 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different
|
5
|
+
# event based parsers when dealing with XML. If you want to do SAX style
|
6
|
+
# parsing using HTML, check out Nokogiri::HTML::SAX.
|
7
|
+
#
|
8
|
+
# The basic way a SAX style parser works is by creating a parser,
|
9
|
+
# telling the parser about the events we're interested in, then giving
|
10
|
+
# the parser some XML to process. The parser will notify you when
|
11
|
+
# it encounters events your said you would like to know about.
|
12
|
+
#
|
13
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
14
|
+
# and implement the methods for which you would like notification.
|
15
|
+
#
|
16
|
+
# For example, if I want to be notified when a document ends, and when an
|
17
|
+
# element starts, I would write a class like this:
|
18
|
+
#
|
19
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
20
|
+
# def end_document
|
21
|
+
# puts "the document has ended"
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# def start_element name, attributes = []
|
25
|
+
# puts "#{name} started"
|
26
|
+
# end
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Then I would instantiate a SAX parser with this document, and feed the
|
30
|
+
# parser some XML
|
31
|
+
#
|
32
|
+
# # Create a new parser
|
33
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
34
|
+
#
|
35
|
+
# # Feed the parser some XML
|
36
|
+
# parser.parse(File.open(ARGV[0]))
|
37
|
+
#
|
38
|
+
# Now my document handler will be called when each node starts, and when
|
39
|
+
# then document ends. To see what kinds of events are available, take
|
40
|
+
# a look at Nokogiri::XML::SAX::Document.
|
41
|
+
#
|
42
|
+
# Two SAX parsers for XML are available, a parser that reads from a string
|
43
|
+
# or IO object as it feels necessary, and a parser that lets you spoon
|
44
|
+
# feed it XML. If you want to let Nokogiri deal with reading your XML,
|
45
|
+
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
46
|
+
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
47
|
+
module SAX
|
48
|
+
###
|
49
|
+
# This class is used for registering types of events you are interested
|
50
|
+
# in handling. All of the methods on this class are available as
|
51
|
+
# possible events while parsing an XML document. To register for any
|
52
|
+
# particular event, just subclass this class and implement the methods
|
53
|
+
# you are interested in knowing about.
|
54
|
+
#
|
55
|
+
# To only be notified about start and end element events, write a class
|
56
|
+
# like this:
|
57
|
+
#
|
58
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
59
|
+
# def start_element name, attrs = []
|
60
|
+
# puts "#{name} started!"
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# def end_element name
|
64
|
+
# puts "#{name} ended"
|
65
|
+
# end
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# You can use this event handler for any SAX style parser included with
|
69
|
+
# Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
|
70
|
+
class Document
|
71
|
+
###
|
72
|
+
# Called when an XML declaration is parsed
|
73
|
+
def xmldecl version, encoding, standalone
|
74
|
+
end
|
75
|
+
|
76
|
+
###
|
77
|
+
# Called when document starts parsing
|
78
|
+
def start_document
|
79
|
+
end
|
80
|
+
|
81
|
+
###
|
82
|
+
# Called when document ends parsing
|
83
|
+
def end_document
|
84
|
+
end
|
85
|
+
|
86
|
+
###
|
87
|
+
# Called at the beginning of an element
|
88
|
+
# * +name+ is the name of the tag
|
89
|
+
# * +attrs+ are an assoc list of namespaces and attributes, e.g.:
|
90
|
+
# [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
|
91
|
+
def start_element name, attrs = []
|
92
|
+
end
|
93
|
+
|
94
|
+
###
|
95
|
+
# Called at the end of an element
|
96
|
+
# +name+ is the tag name
|
97
|
+
def end_element name
|
98
|
+
end
|
99
|
+
|
100
|
+
###
|
101
|
+
# Called at the beginning of an element
|
102
|
+
# +name+ is the element name
|
103
|
+
# +attrs+ is a list of attributes
|
104
|
+
# +prefix+ is the namespace prefix for the element
|
105
|
+
# +uri+ is the associated namespace URI
|
106
|
+
# +ns+ is a hash of namespace prefix:urls associated with the element
|
107
|
+
def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
|
108
|
+
###
|
109
|
+
# Deal with SAX v1 interface
|
110
|
+
name = [prefix, name].compact.join(':')
|
111
|
+
attributes = ns.map { |ns_prefix,ns_uri|
|
112
|
+
[['xmlns', ns_prefix].compact.join(':'), ns_uri]
|
113
|
+
} + attrs.map { |attr|
|
114
|
+
[[attr.prefix, attr.localname].compact.join(':'), attr.value]
|
115
|
+
}
|
116
|
+
start_element name, attributes
|
117
|
+
end
|
118
|
+
|
119
|
+
###
|
120
|
+
# Called at the end of an element
|
121
|
+
# +name+ is the element's name
|
122
|
+
# +prefix+ is the namespace prefix associated with the element
|
123
|
+
# +uri+ is the associated namespace URI
|
124
|
+
def end_element_namespace name, prefix = nil, uri = nil
|
125
|
+
###
|
126
|
+
# Deal with SAX v1 interface
|
127
|
+
end_element [prefix, name].compact.join(':')
|
128
|
+
end
|
129
|
+
|
130
|
+
###
|
131
|
+
# Characters read between a tag. This method might be called multiple
|
132
|
+
# times given one contiguous string of characters.
|
133
|
+
#
|
134
|
+
# +string+ contains the character data
|
135
|
+
def characters string
|
136
|
+
end
|
137
|
+
|
138
|
+
###
|
139
|
+
# Called when comments are encountered
|
140
|
+
# +string+ contains the comment data
|
141
|
+
def comment string
|
142
|
+
end
|
143
|
+
|
144
|
+
###
|
145
|
+
# Called on document warnings
|
146
|
+
# +string+ contains the warning
|
147
|
+
def warning string
|
148
|
+
end
|
149
|
+
|
150
|
+
###
|
151
|
+
# Called on document errors
|
152
|
+
# +string+ contains the error
|
153
|
+
def error string
|
154
|
+
end
|
155
|
+
|
156
|
+
###
|
157
|
+
# Called when cdata blocks are found
|
158
|
+
# +string+ contains the cdata content
|
159
|
+
def cdata_block string
|
160
|
+
end
|
161
|
+
|
162
|
+
###
|
163
|
+
# Called when processing instructions are found
|
164
|
+
# +name+ is the target of the instruction
|
165
|
+
# +content+ is the value of the instruction
|
166
|
+
def processing_instruction name, content
|
167
|
+
end
|
168
|
+
end
|
169
|
+
end
|
170
|
+
end
|
171
|
+
end
|
@@ -0,0 +1,123 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
module SAX
|
4
|
+
###
|
5
|
+
# This parser is a SAX style parser that reads it's input as it
|
6
|
+
# deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
|
7
|
+
# an optional encoding, then given an XML input, sends messages to
|
8
|
+
# the Nokogiri::XML::SAX::Document.
|
9
|
+
#
|
10
|
+
# Here is an example of using this parser:
|
11
|
+
#
|
12
|
+
# # Create a subclass of Nokogiri::XML::SAX::Document and implement
|
13
|
+
# # the events we care about:
|
14
|
+
# class MyDoc < Nokogiri::XML::SAX::Document
|
15
|
+
# def start_element name, attrs = []
|
16
|
+
# puts "starting: #{name}"
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# def end_element name
|
20
|
+
# puts "ending: #{name}"
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# # Create our parser
|
25
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
|
26
|
+
#
|
27
|
+
# # Send some XML to the parser
|
28
|
+
# parser.parse(File.open(ARGV[0]))
|
29
|
+
#
|
30
|
+
# For more information about SAX parsers, see Nokogiri::XML::SAX. Also
|
31
|
+
# see Nokogiri::XML::SAX::Document for the available events.
|
32
|
+
class Parser
|
33
|
+
class Attribute < Struct.new(:localname, :prefix, :uri, :value)
|
34
|
+
end
|
35
|
+
|
36
|
+
# Encodinds this parser supports
|
37
|
+
ENCODINGS = {
|
38
|
+
'NONE' => 0, # No char encoding detected
|
39
|
+
'UTF-8' => 1, # UTF-8
|
40
|
+
'UTF16LE' => 2, # UTF-16 little endian
|
41
|
+
'UTF16BE' => 3, # UTF-16 big endian
|
42
|
+
'UCS4LE' => 4, # UCS-4 little endian
|
43
|
+
'UCS4BE' => 5, # UCS-4 big endian
|
44
|
+
'EBCDIC' => 6, # EBCDIC uh!
|
45
|
+
'UCS4-2143' => 7, # UCS-4 unusual ordering
|
46
|
+
'UCS4-3412' => 8, # UCS-4 unusual ordering
|
47
|
+
'UCS2' => 9, # UCS-2
|
48
|
+
'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
|
49
|
+
'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
|
50
|
+
'ISO-8859-3' => 12, # ISO-8859-3
|
51
|
+
'ISO-8859-4' => 13, # ISO-8859-4
|
52
|
+
'ISO-8859-5' => 14, # ISO-8859-5
|
53
|
+
'ISO-8859-6' => 15, # ISO-8859-6
|
54
|
+
'ISO-8859-7' => 16, # ISO-8859-7
|
55
|
+
'ISO-8859-8' => 17, # ISO-8859-8
|
56
|
+
'ISO-8859-9' => 18, # ISO-8859-9
|
57
|
+
'ISO-2022-JP' => 19, # ISO-2022-JP
|
58
|
+
'SHIFT-JIS' => 20, # Shift_JIS
|
59
|
+
'EUC-JP' => 21, # EUC-JP
|
60
|
+
'ASCII' => 22, # pure ASCII
|
61
|
+
}
|
62
|
+
|
63
|
+
# The Nokogiri::XML::SAX::Document where events will be sent.
|
64
|
+
attr_accessor :document
|
65
|
+
|
66
|
+
# The encoding beings used for this document.
|
67
|
+
attr_accessor :encoding
|
68
|
+
|
69
|
+
# Create a new Parser with +doc+ and +encoding+
|
70
|
+
def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
|
71
|
+
check_encoding(encoding)
|
72
|
+
@encoding = encoding
|
73
|
+
@document = doc
|
74
|
+
@warned = false
|
75
|
+
end
|
76
|
+
|
77
|
+
###
|
78
|
+
# Parse given +thing+ which may be a string containing xml, or an
|
79
|
+
# IO object.
|
80
|
+
def parse thing, &block
|
81
|
+
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
82
|
+
parse_io(thing, &block)
|
83
|
+
else
|
84
|
+
parse_memory(thing, &block)
|
85
|
+
end
|
86
|
+
end
|
87
|
+
|
88
|
+
###
|
89
|
+
# Parse given +io+
|
90
|
+
def parse_io io, encoding = 'ASCII'
|
91
|
+
check_encoding(encoding)
|
92
|
+
@encoding = encoding
|
93
|
+
ctx = ParserContext.io(io, ENCODINGS[encoding])
|
94
|
+
yield ctx if block_given?
|
95
|
+
ctx.parse_with self
|
96
|
+
end
|
97
|
+
|
98
|
+
###
|
99
|
+
# Parse a file with +filename+
|
100
|
+
def parse_file filename
|
101
|
+
raise ArgumentError unless filename
|
102
|
+
raise Errno::ENOENT unless File.exist?(filename)
|
103
|
+
raise Errno::EISDIR if File.directory?(filename)
|
104
|
+
ctx = ParserContext.file filename
|
105
|
+
yield ctx if block_given?
|
106
|
+
ctx.parse_with self
|
107
|
+
end
|
108
|
+
|
109
|
+
def parse_memory data
|
110
|
+
ctx = ParserContext.memory data
|
111
|
+
yield ctx if block_given?
|
112
|
+
ctx.parse_with self
|
113
|
+
end
|
114
|
+
|
115
|
+
private
|
116
|
+
def check_encoding(encoding)
|
117
|
+
encoding.upcase!
|
118
|
+
raise ArgumentError.new("'#{encoding}' is not a valid encoding") unless ENCODINGS[encoding]
|
119
|
+
end
|
120
|
+
end
|
121
|
+
end
|
122
|
+
end
|
123
|
+
end
|