nokogiri 1.3.0-x86-mswin32
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +27 -0
- data/CHANGELOG.ja.rdoc +233 -0
- data/CHANGELOG.rdoc +222 -0
- data/Manifest.txt +247 -0
- data/README.ja.rdoc +103 -0
- data/README.rdoc +117 -0
- data/Rakefile +205 -0
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +89 -0
- data/ext/nokogiri/html_document.c +183 -0
- data/ext/nokogiri/html_document.h +10 -0
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.c +30 -0
- data/ext/nokogiri/html_entity_lookup.h +8 -0
- data/ext/nokogiri/html_sax_parser.c +57 -0
- data/ext/nokogiri/html_sax_parser.h +11 -0
- data/ext/nokogiri/iconv.dll +0 -0
- data/ext/nokogiri/libexslt.dll +0 -0
- data/ext/nokogiri/libxml2.dll +0 -0
- data/ext/nokogiri/libxslt.dll +0 -0
- data/ext/nokogiri/nokogiri.c +81 -0
- data/ext/nokogiri/nokogiri.h +149 -0
- data/ext/nokogiri/xml_attr.c +92 -0
- data/ext/nokogiri/xml_attr.h +9 -0
- data/ext/nokogiri/xml_cdata.c +53 -0
- data/ext/nokogiri/xml_cdata.h +9 -0
- data/ext/nokogiri/xml_comment.c +51 -0
- data/ext/nokogiri/xml_comment.h +9 -0
- data/ext/nokogiri/xml_document.c +308 -0
- data/ext/nokogiri/xml_document.h +21 -0
- data/ext/nokogiri/xml_document_fragment.c +48 -0
- data/ext/nokogiri/xml_document_fragment.h +10 -0
- data/ext/nokogiri/xml_dtd.c +102 -0
- data/ext/nokogiri/xml_dtd.h +8 -0
- data/ext/nokogiri/xml_entity_reference.c +50 -0
- data/ext/nokogiri/xml_entity_reference.h +9 -0
- data/ext/nokogiri/xml_io.c +24 -0
- data/ext/nokogiri/xml_io.h +10 -0
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +928 -0
- data/ext/nokogiri/xml_node.h +14 -0
- data/ext/nokogiri/xml_node_set.c +386 -0
- data/ext/nokogiri/xml_node_set.h +9 -0
- data/ext/nokogiri/xml_processing_instruction.c +54 -0
- data/ext/nokogiri/xml_processing_instruction.h +9 -0
- data/ext/nokogiri/xml_reader.c +572 -0
- data/ext/nokogiri/xml_reader.h +10 -0
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +336 -0
- data/ext/nokogiri/xml_sax_parser.h +10 -0
- data/ext/nokogiri/xml_sax_push_parser.c +86 -0
- data/ext/nokogiri/xml_sax_push_parser.h +9 -0
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.c +203 -0
- data/ext/nokogiri/xml_syntax_error.h +12 -0
- data/ext/nokogiri/xml_text.c +47 -0
- data/ext/nokogiri/xml_text.h +9 -0
- data/ext/nokogiri/xml_xpath.c +53 -0
- data/ext/nokogiri/xml_xpath.h +11 -0
- data/ext/nokogiri/xml_xpath_context.c +252 -0
- data/ext/nokogiri/xml_xpath_context.h +9 -0
- data/ext/nokogiri/xslt_stylesheet.c +131 -0
- data/ext/nokogiri/xslt_stylesheet.h +9 -0
- data/ext/nokogiri/zlib1.dll +0 -0
- data/lib/action-nokogiri.rb +36 -0
- data/lib/nokogiri.rb +110 -0
- data/lib/nokogiri/1.8/nokogiri.so +0 -0
- data/lib/nokogiri/1.9/nokogiri.so +0 -0
- data/lib/nokogiri/css.rb +25 -0
- data/lib/nokogiri/css/generated_parser.rb +748 -0
- data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
- data/lib/nokogiri/css/node.rb +107 -0
- data/lib/nokogiri/css/parser.rb +82 -0
- data/lib/nokogiri/css/parser.y +227 -0
- data/lib/nokogiri/css/syntax_error.rb +7 -0
- data/lib/nokogiri/css/tokenizer.rb +11 -0
- data/lib/nokogiri/css/tokenizer.rex +54 -0
- data/lib/nokogiri/css/xpath_visitor.rb +172 -0
- data/lib/nokogiri/decorators.rb +2 -0
- data/lib/nokogiri/decorators/hpricot.rb +3 -0
- data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
- data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
- data/lib/nokogiri/decorators/slop.rb +33 -0
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +62 -0
- data/lib/nokogiri/html.rb +34 -0
- data/lib/nokogiri/html/builder.rb +35 -0
- data/lib/nokogiri/html/document.rb +71 -0
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +13 -0
- data/lib/nokogiri/html/sax/parser.rb +47 -0
- data/lib/nokogiri/nokogiri.rb +1 -0
- data/lib/nokogiri/syntax_error.rb +4 -0
- data/lib/nokogiri/version.rb +29 -0
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +62 -0
- data/lib/nokogiri/xml/attr.rb +9 -0
- data/lib/nokogiri/xml/builder.rb +254 -0
- data/lib/nokogiri/xml/cdata.rb +11 -0
- data/lib/nokogiri/xml/document.rb +100 -0
- data/lib/nokogiri/xml/document_fragment.rb +49 -0
- data/lib/nokogiri/xml/dtd.rb +11 -0
- data/lib/nokogiri/xml/entity_declaration.rb +11 -0
- data/lib/nokogiri/xml/fragment_handler.rb +55 -0
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +745 -0
- data/lib/nokogiri/xml/node/save_options.rb +42 -0
- data/lib/nokogiri/xml/node_set.rb +238 -0
- data/lib/nokogiri/xml/notation.rb +6 -0
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +8 -0
- data/lib/nokogiri/xml/reader.rb +66 -0
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +3 -0
- data/lib/nokogiri/xml/sax/document.rb +143 -0
- data/lib/nokogiri/xml/sax/parser.rb +101 -0
- data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +34 -0
- data/lib/nokogiri/xml/xpath.rb +10 -0
- data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
- data/lib/nokogiri/xml/xpath_context.rb +16 -0
- data/lib/nokogiri/xslt.rb +48 -0
- data/lib/nokogiri/xslt/stylesheet.rb +25 -0
- data/lib/xsd/xmlparser/nokogiri.rb +64 -0
- data/tasks/test.rb +161 -0
- data/test/css/test_nthiness.rb +160 -0
- data/test/css/test_parser.rb +277 -0
- data/test/css/test_tokenizer.rb +176 -0
- data/test/css/test_xpath_visitor.rb +76 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/dont_hurt_em_why.xml +422 -0
- data/test/files/exslt.xml +8 -0
- data/test/files/exslt.xslt +35 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/files/staff.xml +59 -0
- data/test/files/staff.xslt +32 -0
- data/test/files/tlm.html +850 -0
- data/test/helper.rb +123 -0
- data/test/hpricot/files/basic.xhtml +17 -0
- data/test/hpricot/files/boingboing.html +2266 -0
- data/test/hpricot/files/cy0.html +3653 -0
- data/test/hpricot/files/immob.html +400 -0
- data/test/hpricot/files/pace_application.html +1320 -0
- data/test/hpricot/files/tenderlove.html +16 -0
- data/test/hpricot/files/uswebgen.html +220 -0
- data/test/hpricot/files/utf8.html +1054 -0
- data/test/hpricot/files/week9.html +1723 -0
- data/test/hpricot/files/why.xml +19 -0
- data/test/hpricot/load_files.rb +11 -0
- data/test/hpricot/test_alter.rb +68 -0
- data/test/hpricot/test_builder.rb +20 -0
- data/test/hpricot/test_parser.rb +426 -0
- data/test/hpricot/test_paths.rb +15 -0
- data/test/hpricot/test_preserved.rb +77 -0
- data/test/hpricot/test_xml.rb +30 -0
- data/test/html/sax/test_parser.rb +52 -0
- data/test/html/test_builder.rb +156 -0
- data/test/html/test_document.rb +361 -0
- data/test/html/test_document_encoding.rb +46 -0
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_named_characters.rb +14 -0
- data/test/html/test_node.rb +165 -0
- data/test/test_convert_xpath.rb +186 -0
- data/test/test_css_cache.rb +56 -0
- data/test/test_gc.rb +15 -0
- data/test/test_memory_leak.rb +77 -0
- data/test/test_nokogiri.rb +127 -0
- data/test/test_reader.rb +316 -0
- data/test/test_xslt_transforms.rb +131 -0
- data/test/xml/node/test_save_options.rb +20 -0
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +169 -0
- data/test/xml/sax/test_push_parser.rb +92 -0
- data/test/xml/test_attr.rb +38 -0
- data/test/xml/test_builder.rb +73 -0
- data/test/xml/test_cdata.rb +38 -0
- data/test/xml/test_comment.rb +23 -0
- data/test/xml/test_document.rb +397 -0
- data/test/xml/test_document_encoding.rb +26 -0
- data/test/xml/test_document_fragment.rb +76 -0
- data/test/xml/test_dtd.rb +42 -0
- data/test/xml/test_dtd_encoding.rb +31 -0
- data/test/xml/test_entity_reference.rb +21 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +808 -0
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +84 -0
- data/test/xml/test_node_set.rb +368 -0
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +30 -0
- data/test/xml/test_reader_encoding.rb +126 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +18 -0
- data/test/xml/test_unparented_node.rb +381 -0
- data/test/xml/test_xpath.rb +106 -0
- metadata +409 -0
@@ -0,0 +1,32 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class << self
|
4
|
+
###
|
5
|
+
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
6
|
+
# See Nokogiri::XML::RelaxNG for an example.
|
7
|
+
def RelaxNG string_or_io
|
8
|
+
RelaxNG.new(string_or_io)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
###
|
13
|
+
# Nokogiri::XML::RelaxNG is used for validating XML against a
|
14
|
+
# RelaxNG schema.
|
15
|
+
#
|
16
|
+
# == Synopsis
|
17
|
+
#
|
18
|
+
# Validate an XML document against a RelaxNG schema. Loop over the errors
|
19
|
+
# that are returned and print them out:
|
20
|
+
#
|
21
|
+
# schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
|
22
|
+
# doc = Nokogiri::XML(File.read(ADDRESS_XML_FILE))
|
23
|
+
#
|
24
|
+
# schema.validate(doc).each do |error|
|
25
|
+
# puts error.message
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
29
|
+
class RelaxNG < Nokogiri::XML::Schema
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
@@ -0,0 +1,143 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different
|
5
|
+
# event based parsers when dealing with XML. If you want to do SAX style
|
6
|
+
# parsing using HTML, check out Nokogiri::HTML::SAX.
|
7
|
+
#
|
8
|
+
# The basic way a SAX style parser works is by creating a parser,
|
9
|
+
# telling the parser about the events we're interested in, then giving
|
10
|
+
# the parser some XML to process. The parser will notify you when
|
11
|
+
# it encounters events your said you would like to know about.
|
12
|
+
#
|
13
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
14
|
+
# and implement the methods for which you would like notification.
|
15
|
+
#
|
16
|
+
# For example, if I want to be notified when a document ends, and when an
|
17
|
+
# element starts, I would write a class like this:
|
18
|
+
#
|
19
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
20
|
+
# def end_document
|
21
|
+
# puts "the document has ended"
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# def start_element name, attributes = []
|
25
|
+
# puts "#{name} started"
|
26
|
+
# end
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Then I would instantiate a SAX parser with this document, and feed the
|
30
|
+
# parser some XML
|
31
|
+
#
|
32
|
+
# # Create a new parser
|
33
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
34
|
+
#
|
35
|
+
# # Feed the parser some XML
|
36
|
+
# parser.parse(File.read(ARGV[0], 'rb'))
|
37
|
+
#
|
38
|
+
# Now my document handler will be called when each node starts, and when
|
39
|
+
# then document ends. To see what kinds of events are available, take
|
40
|
+
# a look at Nokogiri::XML::SAX::Document.
|
41
|
+
#
|
42
|
+
# Two SAX parsers for XML are available, a parser that reads from a string
|
43
|
+
# or IO object as it feels necessary, and a parser that lets you spoon
|
44
|
+
# feed it XML. If you want to let Nokogiri deal with reading your XML,
|
45
|
+
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
46
|
+
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
47
|
+
module SAX
|
48
|
+
###
|
49
|
+
# This class is used for registering types of events you are interested
|
50
|
+
# in handling. All of the methods on this class are available as
|
51
|
+
# possible events while parsing an XML document. To register for any
|
52
|
+
# particular event, just subclass this class and implement the methods
|
53
|
+
# you are interested in knowing about.
|
54
|
+
#
|
55
|
+
# To only be notified about start and end element events, write a class
|
56
|
+
# like this:
|
57
|
+
#
|
58
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
59
|
+
# def start_element name, attrs = []
|
60
|
+
# puts "#{name} started!"
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# def end_element name
|
64
|
+
# puts "#{name} ended"
|
65
|
+
# end
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# You can use this event handler for any SAX style parser included with
|
69
|
+
# Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
|
70
|
+
class Document
|
71
|
+
###
|
72
|
+
# Called when document starts parsing
|
73
|
+
def start_document
|
74
|
+
end
|
75
|
+
|
76
|
+
###
|
77
|
+
# Called when document ends parsing
|
78
|
+
def end_document
|
79
|
+
end
|
80
|
+
|
81
|
+
###
|
82
|
+
# Called at the beginning of an element
|
83
|
+
# +name+ is the name of the tag with +attrs+ as attributes
|
84
|
+
def start_element name, attrs = []
|
85
|
+
end
|
86
|
+
|
87
|
+
###
|
88
|
+
# Called at the end of an element
|
89
|
+
# +name+ is the tag name
|
90
|
+
def end_element name
|
91
|
+
end
|
92
|
+
|
93
|
+
###
|
94
|
+
# Called at the beginning of an element
|
95
|
+
# +name+ is the element name
|
96
|
+
# +attrs+ is a hash of attributes
|
97
|
+
# +prefix+ is the namespace prefix for the element
|
98
|
+
# +uri+ is the associated namespace URI
|
99
|
+
# +namespaces+ is a hash of namespace prefix:urls associated with the element
|
100
|
+
def start_element_ns(name, attrs = {}, prefix = nil, uri = nil, namespaces = {})
|
101
|
+
end
|
102
|
+
|
103
|
+
###
|
104
|
+
# Called at the end of an element
|
105
|
+
# +name+ is the element's name
|
106
|
+
# +prefix+ is the namespace prefix associated with the element
|
107
|
+
# +uri+ is the associated namespace URI
|
108
|
+
def end_element_ns(name, prefix = nil, uri = nil)
|
109
|
+
end
|
110
|
+
|
111
|
+
###
|
112
|
+
# Characters read between a tag
|
113
|
+
# +string+ contains the character data
|
114
|
+
def characters string
|
115
|
+
end
|
116
|
+
|
117
|
+
###
|
118
|
+
# Called when comments are encountered
|
119
|
+
# +string+ contains the comment data
|
120
|
+
def comment string
|
121
|
+
end
|
122
|
+
|
123
|
+
###
|
124
|
+
# Called on document warnings
|
125
|
+
# +string+ contains the warning
|
126
|
+
def warning string
|
127
|
+
end
|
128
|
+
|
129
|
+
###
|
130
|
+
# Called on document errors
|
131
|
+
# +string+ contains the error
|
132
|
+
def error string
|
133
|
+
end
|
134
|
+
|
135
|
+
###
|
136
|
+
# Called when cdata blocks are found
|
137
|
+
# +string+ contains the cdata content
|
138
|
+
def cdata_block string
|
139
|
+
end
|
140
|
+
end
|
141
|
+
end
|
142
|
+
end
|
143
|
+
end
|
@@ -0,0 +1,101 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
module SAX
|
4
|
+
###
|
5
|
+
# This parser is a SAX style parser that reads it's input as it
|
6
|
+
# deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
|
7
|
+
# an optional encoding, then given an XML input, sends messages to
|
8
|
+
# the Nokogiri::XML::SAX::Document.
|
9
|
+
#
|
10
|
+
# Here is an example of using this parser:
|
11
|
+
#
|
12
|
+
# # Create a subclass of Nokogiri::XML::SAX::Document and implement
|
13
|
+
# # the events we care about:
|
14
|
+
# class MyDoc < Nokogiri::XML::SAX::Document
|
15
|
+
# def start_element name, attrs = []
|
16
|
+
# puts "starting: #{name}"
|
17
|
+
# end
|
18
|
+
#
|
19
|
+
# def end_element name
|
20
|
+
# puts "ending: #{name}"
|
21
|
+
# end
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# # Create our parser
|
25
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
|
26
|
+
#
|
27
|
+
# # Send some XML to the parser
|
28
|
+
# parser.parse(File.read(ARGV[0]))
|
29
|
+
#
|
30
|
+
# For more information about SAX parsers, see Nokogiri::XML::SAX. Also
|
31
|
+
# see Nokogiri::XML::SAX::Document for the available events.
|
32
|
+
class Parser
|
33
|
+
# Encodinds this parser supports
|
34
|
+
ENCODINGS = {
|
35
|
+
'NONE' => 0, # No char encoding detected
|
36
|
+
'UTF-8' => 1, # UTF-8
|
37
|
+
'UTF16LE' => 2, # UTF-16 little endian
|
38
|
+
'UTF16BE' => 3, # UTF-16 big endian
|
39
|
+
'UCS4LE' => 4, # UCS-4 little endian
|
40
|
+
'UCS4BE' => 5, # UCS-4 big endian
|
41
|
+
'EBCDIC' => 6, # EBCDIC uh!
|
42
|
+
'UCS4-2143' => 7, # UCS-4 unusual ordering
|
43
|
+
'UCS4-3412' => 8, # UCS-4 unusual ordering
|
44
|
+
'UCS2' => 9, # UCS-2
|
45
|
+
'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
|
46
|
+
'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
|
47
|
+
'ISO-8859-3' => 12, # ISO-8859-3
|
48
|
+
'ISO-8859-4' => 13, # ISO-8859-4
|
49
|
+
'ISO-8859-5' => 14, # ISO-8859-5
|
50
|
+
'ISO-8859-6' => 15, # ISO-8859-6
|
51
|
+
'ISO-8859-7' => 16, # ISO-8859-7
|
52
|
+
'ISO-8859-8' => 17, # ISO-8859-8
|
53
|
+
'ISO-8859-9' => 18, # ISO-8859-9
|
54
|
+
'ISO-2022-JP' => 19, # ISO-2022-JP
|
55
|
+
'SHIFT-JIS' => 20, # Shift_JIS
|
56
|
+
'EUC-JP' => 21, # EUC-JP
|
57
|
+
'ASCII' => 22, # pure ASCII
|
58
|
+
}
|
59
|
+
|
60
|
+
# The Nokogiri::XML::SAX::Document where events will be sent.
|
61
|
+
attr_accessor :document
|
62
|
+
|
63
|
+
# The encoding beings used for this document.
|
64
|
+
attr_accessor :encoding
|
65
|
+
|
66
|
+
# Create a new Parser with +doc+ and +encoding+
|
67
|
+
def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = 'ASCII')
|
68
|
+
@encoding = encoding
|
69
|
+
@document = doc
|
70
|
+
end
|
71
|
+
|
72
|
+
###
|
73
|
+
# Parse given +thing+ which may be a string containing xml, or an
|
74
|
+
# IO object.
|
75
|
+
def parse thing
|
76
|
+
if thing.respond_to?(:read) && thing.respond_to?(:close)
|
77
|
+
parse_io(thing)
|
78
|
+
else
|
79
|
+
parse_memory(thing)
|
80
|
+
end
|
81
|
+
end
|
82
|
+
|
83
|
+
###
|
84
|
+
# Parse given +io+
|
85
|
+
def parse_io io, encoding = 'ASCII'
|
86
|
+
@encoding = encoding
|
87
|
+
native_parse_io io, ENCODINGS[@encoding] || ENCODINGS['ASCII']
|
88
|
+
end
|
89
|
+
|
90
|
+
###
|
91
|
+
# Parse a file with +filename+
|
92
|
+
def parse_file filename
|
93
|
+
raise ArgumentError unless filename
|
94
|
+
raise Errno::ENOENT unless File.exists?(filename)
|
95
|
+
raise Errno::EISDIR if File.directory?(filename)
|
96
|
+
native_parse_file filename
|
97
|
+
end
|
98
|
+
end
|
99
|
+
end
|
100
|
+
end
|
101
|
+
end
|
@@ -0,0 +1,60 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
module SAX
|
4
|
+
###
|
5
|
+
# PushParser can parse a document that is fed to it manually. It
|
6
|
+
# must be given a SAX::Document object which will be called with
|
7
|
+
# SAX events as the document is being parsed.
|
8
|
+
#
|
9
|
+
# Calling PushParser#<< writes XML to the parser, calling any SAX
|
10
|
+
# callbacks it can.
|
11
|
+
#
|
12
|
+
# PushParser#finish tells the parser that the document is finished
|
13
|
+
# and calls the end_document SAX method.
|
14
|
+
#
|
15
|
+
# Example:
|
16
|
+
#
|
17
|
+
# parser = PushParser.new(Class.new(XML::SAX::Document) {
|
18
|
+
# def start_document
|
19
|
+
# puts "start document called"
|
20
|
+
# end
|
21
|
+
# }.new)
|
22
|
+
# parser << "<div>hello<"
|
23
|
+
# parser << "/div>"
|
24
|
+
# parser.finish
|
25
|
+
class PushParser
|
26
|
+
|
27
|
+
# The Nokogiri::XML::SAX::Document on which the PushParser will be
|
28
|
+
# operating
|
29
|
+
attr_accessor :document
|
30
|
+
|
31
|
+
###
|
32
|
+
# Create a new PushParser with +doc+ as the SAX Document, providing
|
33
|
+
# an optional +file_name+ and +encoding+
|
34
|
+
def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'ASCII')
|
35
|
+
@document = doc
|
36
|
+
@encoding = encoding
|
37
|
+
@sax_parser = XML::SAX::Parser.new(doc)
|
38
|
+
|
39
|
+
## Create our push parser context
|
40
|
+
initialize_native(@sax_parser, file_name)
|
41
|
+
end
|
42
|
+
|
43
|
+
###
|
44
|
+
# Write a +chunk+ of XML to the PushParser. Any callback methods
|
45
|
+
# that can be called will be called immidiately.
|
46
|
+
def write chunk, last_chunk = false
|
47
|
+
native_write(chunk, last_chunk)
|
48
|
+
end
|
49
|
+
alias :<< :write
|
50
|
+
|
51
|
+
###
|
52
|
+
# Finish the parsing. This method is only necessary for
|
53
|
+
# Nokogiri::XML::SAX::Document#end_document to be called.
|
54
|
+
def finish
|
55
|
+
write '', true
|
56
|
+
end
|
57
|
+
end
|
58
|
+
end
|
59
|
+
end
|
60
|
+
end
|
@@ -0,0 +1,65 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class << self
|
4
|
+
###
|
5
|
+
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
6
|
+
# object.
|
7
|
+
def Schema string_or_io
|
8
|
+
Schema.new(string_or_io)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
###
|
13
|
+
# Nokogiri::XML::Schema is used for validating XML against a schema
|
14
|
+
# (usually from an xsd file).
|
15
|
+
#
|
16
|
+
# == Synopsis
|
17
|
+
#
|
18
|
+
# Validate an XML document against a Schema. Loop over the errors that
|
19
|
+
# are returned and print them out:
|
20
|
+
#
|
21
|
+
# xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
|
22
|
+
# doc = Nokogiri::XML(File.read(PO_XML_FILE))
|
23
|
+
#
|
24
|
+
# xsd.validate(doc).each do |error|
|
25
|
+
# puts error.message
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
29
|
+
class Schema
|
30
|
+
# Errors while parsing the schema file
|
31
|
+
attr_accessor :errors
|
32
|
+
|
33
|
+
###
|
34
|
+
# Create a new Nokogiri::XML::Schema object using a +string_or_io+
|
35
|
+
# object.
|
36
|
+
def self.new string_or_io
|
37
|
+
if string_or_io.respond_to?(:read)
|
38
|
+
string_or_io = string_or_io.read
|
39
|
+
end
|
40
|
+
|
41
|
+
read_memory(string_or_io)
|
42
|
+
end
|
43
|
+
|
44
|
+
###
|
45
|
+
# Validate +thing+ against this schema. +thing+ can be a
|
46
|
+
# Nokogiri::XML::Document object, or a filename. An Array of
|
47
|
+
# Nokogiri::XML::SyntaxError objects found while validating the
|
48
|
+
# +thing+ is returned.
|
49
|
+
def validate thing
|
50
|
+
return validate_document(thing) if thing.is_a?(Nokogiri::XML::Document)
|
51
|
+
|
52
|
+
# FIXME libxml2 has an api for validating files. We should switch
|
53
|
+
# to that because it will probably save memory.
|
54
|
+
validate_document(Nokogiri::XML(File.read(thing)))
|
55
|
+
end
|
56
|
+
|
57
|
+
###
|
58
|
+
# Returns true if +thing+ is a valid Nokogiri::XML::Document or
|
59
|
+
# file.
|
60
|
+
def valid? thing
|
61
|
+
validate(thing).length == 0
|
62
|
+
end
|
63
|
+
end
|
64
|
+
end
|
65
|
+
end
|
@@ -0,0 +1,34 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# This class provides information about XML SyntaxErrors. These
|
5
|
+
# exceptions are typically stored on Nokogiri::XML::Document#errors.
|
6
|
+
class SyntaxError < ::Nokogiri::SyntaxError
|
7
|
+
###
|
8
|
+
# return true if this is a non error
|
9
|
+
def none?
|
10
|
+
level == 0
|
11
|
+
end
|
12
|
+
|
13
|
+
###
|
14
|
+
# return true if this is a warning
|
15
|
+
def warning?
|
16
|
+
level == 1
|
17
|
+
end
|
18
|
+
|
19
|
+
###
|
20
|
+
# return true if this is an error
|
21
|
+
def error?
|
22
|
+
level == 2
|
23
|
+
end
|
24
|
+
|
25
|
+
###
|
26
|
+
# return true if this error is fatal
|
27
|
+
def fatal?
|
28
|
+
level == 3
|
29
|
+
end
|
30
|
+
|
31
|
+
alias :to_s :message
|
32
|
+
end
|
33
|
+
end
|
34
|
+
end
|