nokogiri 1.2.3 → 1.3.0
Sign up to get free protection for your applications and to get access to all the features.
Potentially problematic release.
This version of nokogiri might be problematic. Click here for more details.
- data/.autotest +14 -2
- data/CHANGELOG.ja.rdoc +38 -0
- data/CHANGELOG.rdoc +43 -0
- data/Manifest.txt +80 -5
- data/README.ja.rdoc +12 -11
- data/README.rdoc +4 -2
- data/Rakefile +103 -173
- data/bin/nokogiri +47 -0
- data/ext/nokogiri/extconf.rb +19 -13
- data/ext/nokogiri/html_document.c +39 -3
- data/ext/nokogiri/html_document.h +1 -1
- data/ext/nokogiri/html_element_description.c +272 -0
- data/ext/nokogiri/html_element_description.h +10 -0
- data/ext/nokogiri/html_entity_lookup.h +1 -1
- data/ext/nokogiri/html_sax_parser.h +1 -1
- data/ext/nokogiri/{native.c → nokogiri.c} +11 -3
- data/ext/nokogiri/{native.h → nokogiri.h} +18 -4
- data/ext/nokogiri/xml_attr.c +14 -5
- data/ext/nokogiri/xml_attr.h +1 -1
- data/ext/nokogiri/xml_cdata.c +15 -6
- data/ext/nokogiri/xml_cdata.h +1 -1
- data/ext/nokogiri/xml_comment.c +13 -4
- data/ext/nokogiri/xml_comment.h +1 -1
- data/ext/nokogiri/xml_document.c +50 -41
- data/ext/nokogiri/xml_document.h +1 -1
- data/ext/nokogiri/xml_document_fragment.c +12 -4
- data/ext/nokogiri/xml_document_fragment.h +1 -1
- data/ext/nokogiri/xml_dtd.c +1 -1
- data/ext/nokogiri/xml_dtd.h +1 -1
- data/ext/nokogiri/xml_entity_reference.c +13 -4
- data/ext/nokogiri/xml_entity_reference.h +1 -1
- data/ext/nokogiri/xml_io.h +1 -1
- data/ext/nokogiri/xml_namespace.c +69 -0
- data/ext/nokogiri/xml_namespace.h +12 -0
- data/ext/nokogiri/xml_node.c +232 -124
- data/ext/nokogiri/xml_node.h +3 -4
- data/ext/nokogiri/xml_node_set.c +206 -19
- data/ext/nokogiri/xml_node_set.h +1 -1
- data/ext/nokogiri/xml_processing_instruction.c +14 -4
- data/ext/nokogiri/xml_processing_instruction.h +1 -1
- data/ext/nokogiri/xml_reader.c +87 -7
- data/ext/nokogiri/xml_reader.h +1 -1
- data/ext/nokogiri/xml_relax_ng.c +106 -0
- data/ext/nokogiri/xml_relax_ng.h +9 -0
- data/ext/nokogiri/xml_sax_parser.c +122 -2
- data/ext/nokogiri/xml_sax_parser.h +1 -1
- data/ext/nokogiri/xml_sax_push_parser.c +1 -0
- data/ext/nokogiri/xml_sax_push_parser.h +1 -1
- data/ext/nokogiri/xml_schema.c +107 -0
- data/ext/nokogiri/xml_schema.h +9 -0
- data/ext/nokogiri/xml_syntax_error.h +1 -1
- data/ext/nokogiri/xml_text.c +10 -3
- data/ext/nokogiri/xml_text.h +1 -1
- data/ext/nokogiri/xml_xpath.h +1 -1
- data/ext/nokogiri/xml_xpath_context.h +1 -1
- data/ext/nokogiri/xslt_stylesheet.c +29 -16
- data/ext/nokogiri/xslt_stylesheet.h +1 -1
- data/lib/action-nokogiri.rb +7 -1
- data/lib/nokogiri.rb +21 -5
- data/lib/nokogiri/css/generated_parser.rb +49 -14
- data/lib/nokogiri/css/generated_tokenizer.rb +2 -2
- data/lib/nokogiri/css/node.rb +13 -3
- data/lib/nokogiri/css/parser.rb +8 -0
- data/lib/nokogiri/css/parser.y +7 -7
- data/lib/nokogiri/css/tokenizer.rb +2 -0
- data/lib/nokogiri/css/xpath_visitor.rb +10 -6
- data/lib/nokogiri/decorators/hpricot/node.rb +1 -1
- data/lib/nokogiri/decorators/hpricot/node_set.rb +2 -2
- data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +2 -0
- data/lib/nokogiri/decorators/slop.rb +3 -1
- data/lib/nokogiri/ffi/html/document.rb +37 -0
- data/lib/nokogiri/ffi/html/element_description.rb +85 -0
- data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
- data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
- data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
- data/lib/nokogiri/ffi/libxml.rb +314 -0
- data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
- data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
- data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
- data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
- data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
- data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
- data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
- data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
- data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
- data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
- data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
- data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
- data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
- data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
- data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
- data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
- data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
- data/lib/nokogiri/ffi/xml/attr.rb +41 -0
- data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
- data/lib/nokogiri/ffi/xml/comment.rb +18 -0
- data/lib/nokogiri/ffi/xml/document.rb +107 -0
- data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
- data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
- data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
- data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
- data/lib/nokogiri/ffi/xml/node.rb +380 -0
- data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
- data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
- data/lib/nokogiri/ffi/xml/reader.rb +217 -0
- data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
- data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
- data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
- data/lib/nokogiri/ffi/xml/schema.rb +55 -0
- data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
- data/lib/nokogiri/ffi/xml/text.rb +18 -0
- data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
- data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
- data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
- data/lib/nokogiri/hpricot.rb +14 -3
- data/lib/nokogiri/html.rb +11 -46
- data/lib/nokogiri/html/builder.rb +27 -1
- data/lib/nokogiri/html/document.rb +62 -6
- data/lib/nokogiri/html/document_fragment.rb +15 -0
- data/lib/nokogiri/html/element_description.rb +23 -0
- data/lib/nokogiri/html/entity_lookup.rb +2 -0
- data/lib/nokogiri/html/sax/parser.rb +27 -1
- data/lib/nokogiri/version.rb +26 -1
- data/lib/nokogiri/version_warning.rb +11 -0
- data/lib/nokogiri/xml.rb +25 -51
- data/lib/nokogiri/xml/builder.rb +166 -10
- data/lib/nokogiri/xml/cdata.rb +3 -1
- data/lib/nokogiri/xml/document.rb +39 -6
- data/lib/nokogiri/xml/document_fragment.rb +41 -1
- data/lib/nokogiri/xml/dtd.rb +3 -1
- data/lib/nokogiri/xml/entity_declaration.rb +3 -1
- data/lib/nokogiri/xml/fragment_handler.rb +24 -3
- data/lib/nokogiri/xml/namespace.rb +7 -0
- data/lib/nokogiri/xml/node.rb +314 -65
- data/lib/nokogiri/xml/node/save_options.rb +12 -2
- data/lib/nokogiri/xml/node_set.rb +58 -8
- data/lib/nokogiri/xml/parse_options.rb +80 -0
- data/lib/nokogiri/xml/processing_instruction.rb +2 -0
- data/lib/nokogiri/xml/reader.rb +42 -3
- data/lib/nokogiri/xml/relax_ng.rb +32 -0
- data/lib/nokogiri/xml/sax.rb +0 -7
- data/lib/nokogiri/xml/sax/document.rb +84 -0
- data/lib/nokogiri/xml/sax/parser.rb +38 -2
- data/lib/nokogiri/xml/sax/push_parser.rb +12 -0
- data/lib/nokogiri/xml/schema.rb +65 -0
- data/lib/nokogiri/xml/syntax_error.rb +11 -0
- data/lib/nokogiri/xml/xpath.rb +1 -1
- data/lib/nokogiri/xml/xpath_context.rb +2 -0
- data/lib/nokogiri/xslt.rb +21 -1
- data/lib/nokogiri/xslt/stylesheet.rb +19 -0
- data/lib/xsd/xmlparser/nokogiri.rb +12 -2
- data/tasks/test.rb +42 -19
- data/test/css/test_parser.rb +29 -0
- data/test/ffi/test_document.rb +35 -0
- data/test/files/address_book.rlx +12 -0
- data/test/files/address_book.xml +10 -0
- data/test/files/po.xml +32 -0
- data/test/files/po.xsd +66 -0
- data/test/helper.rb +38 -8
- data/test/html/sax/test_parser.rb +12 -0
- data/test/html/test_builder.rb +25 -2
- data/test/html/test_document.rb +91 -20
- data/test/html/test_document_fragment.rb +97 -0
- data/test/html/test_element_description.rb +95 -0
- data/test/html/test_node.rb +66 -3
- data/test/test_convert_xpath.rb +1 -1
- data/test/test_memory_leak.rb +57 -18
- data/test/test_nokogiri.rb +24 -2
- data/test/test_reader.rb +77 -0
- data/test/test_xslt_transforms.rb +120 -82
- data/test/xml/node/test_subclass.rb +44 -0
- data/test/xml/sax/test_parser.rb +9 -0
- data/test/xml/sax/test_push_parser.rb +24 -0
- data/test/xml/test_attr.rb +7 -0
- data/test/xml/test_builder.rb +48 -0
- data/test/xml/test_cdata.rb +19 -0
- data/test/xml/test_comment.rb +6 -0
- data/test/xml/test_document.rb +101 -2
- data/test/xml/test_document_fragment.rb +55 -3
- data/test/xml/test_entity_reference.rb +4 -0
- data/test/xml/test_namespace.rb +43 -0
- data/test/xml/test_node.rb +255 -8
- data/test/xml/test_node_attributes.rb +34 -0
- data/test/xml/test_node_encoding.rb +9 -2
- data/test/xml/test_node_set.rb +197 -1
- data/test/xml/test_parse_options.rb +52 -0
- data/test/xml/test_processing_instruction.rb +5 -0
- data/test/xml/test_relax_ng.rb +60 -0
- data/test/xml/test_schema.rb +65 -0
- data/test/xml/test_text.rb +5 -0
- data/test/xml/test_unparented_node.rb +3 -3
- metadata +128 -12
- data/lib/nokogiri/xml/comment.rb +0 -6
- data/lib/nokogiri/xml/element.rb +0 -6
- data/lib/nokogiri/xml/text.rb +0 -6
@@ -4,15 +4,25 @@ module Nokogiri
|
|
4
4
|
###
|
5
5
|
# Save options for serializing nodes
|
6
6
|
class SaveOptions
|
7
|
-
|
8
|
-
|
7
|
+
# Format serialized xml
|
8
|
+
FORMAT = 1
|
9
|
+
# Do not include delcarations
|
10
|
+
NO_DECLARATION = 2
|
11
|
+
# Do not include empty tags
|
9
12
|
NO_EMPTY_TAGS = 4
|
13
|
+
# Do not save XHTML
|
10
14
|
NO_XHTML = 8
|
15
|
+
# Save as XHTML
|
11
16
|
AS_XHTML = 16
|
17
|
+
# Save as XML
|
12
18
|
AS_XML = 32
|
19
|
+
# Save as HTML
|
13
20
|
AS_HTML = 64
|
14
21
|
|
22
|
+
# Integer representation of the SaveOptions
|
15
23
|
attr_reader :options
|
24
|
+
|
25
|
+
# Create a new SaveOptions object with +options+
|
16
26
|
def initialize options = 0; @options = options; end
|
17
27
|
constants.each do |constant|
|
18
28
|
class_eval %{
|
@@ -19,8 +19,13 @@ module Nokogiri
|
|
19
19
|
|
20
20
|
###
|
21
21
|
# Get the first element of the NodeSet.
|
22
|
-
def first
|
23
|
-
self[0]
|
22
|
+
def first n = nil
|
23
|
+
return self[0] unless n
|
24
|
+
list = []
|
25
|
+
0.upto(n - 1) do |i|
|
26
|
+
list << self[i]
|
27
|
+
end
|
28
|
+
list
|
24
29
|
end
|
25
30
|
|
26
31
|
###
|
@@ -35,6 +40,13 @@ module Nokogiri
|
|
35
40
|
length == 0
|
36
41
|
end
|
37
42
|
|
43
|
+
###
|
44
|
+
# Returns the index of the first node in self that is == to +node+. Returns nil if no match is found.
|
45
|
+
def index(node)
|
46
|
+
each_with_index { |member, j| return j if member == node }
|
47
|
+
nil
|
48
|
+
end
|
49
|
+
|
38
50
|
###
|
39
51
|
# Insert +datum+ before the first Node in this NodeSet
|
40
52
|
def before datum
|
@@ -77,6 +89,7 @@ module Nokogiri
|
|
77
89
|
return self[path] if path.is_a?(Numeric)
|
78
90
|
search(path, ns).first
|
79
91
|
end
|
92
|
+
alias :% :at
|
80
93
|
|
81
94
|
###
|
82
95
|
# Append the class attribute +name+ to all Node objects in the NodeSet.
|
@@ -112,8 +125,8 @@ module Nokogiri
|
|
112
125
|
each do |el|
|
113
126
|
el.set_attribute(key, value || blk[el])
|
114
127
|
end
|
115
|
-
return self
|
116
|
-
end
|
128
|
+
return self
|
129
|
+
end
|
117
130
|
if key.is_a? Hash
|
118
131
|
key.each { |k,v| self.attr(k,v) }
|
119
132
|
return self
|
@@ -130,7 +143,7 @@ module Nokogiri
|
|
130
143
|
next unless el.respond_to? :remove_attribute
|
131
144
|
el.remove_attribute(name)
|
132
145
|
end
|
133
|
-
self
|
146
|
+
self
|
134
147
|
end
|
135
148
|
|
136
149
|
###
|
@@ -165,24 +178,61 @@ module Nokogiri
|
|
165
178
|
self
|
166
179
|
end
|
167
180
|
|
181
|
+
###
|
182
|
+
# Convert this NodeSet to a string.
|
168
183
|
def to_s
|
169
184
|
map { |x| x.to_s }.join
|
170
185
|
end
|
171
186
|
|
187
|
+
###
|
188
|
+
# Convert this NodeSet to HTML
|
172
189
|
def to_html *args
|
173
|
-
map { |x| x.to_html(*args) }.join
|
190
|
+
map { |x| x.to_html(*args) }.join
|
174
191
|
end
|
175
192
|
|
193
|
+
###
|
194
|
+
# Convert this NodeSet to XHTML
|
176
195
|
def to_xhtml *args
|
177
|
-
map { |x| x.to_xhtml(*args) }.join
|
196
|
+
map { |x| x.to_xhtml(*args) }.join
|
178
197
|
end
|
179
198
|
|
199
|
+
###
|
200
|
+
# Convert this NodeSet to XML
|
180
201
|
def to_xml *args
|
181
|
-
map { |x| x.to_xml(*args) }.join
|
202
|
+
map { |x| x.to_xml(*args) }.join
|
182
203
|
end
|
183
204
|
|
184
205
|
alias :size :length
|
185
206
|
alias :to_ary :to_a
|
207
|
+
|
208
|
+
###
|
209
|
+
# Removes the last element from set and returns it, or +nil+ if
|
210
|
+
# the set is empty
|
211
|
+
def pop
|
212
|
+
return nil if length == 0
|
213
|
+
delete last
|
214
|
+
end
|
215
|
+
|
216
|
+
###
|
217
|
+
# Returns the first element of the NodeSet and removes it. Returns
|
218
|
+
# +nil+ if the set is empty.
|
219
|
+
def shift
|
220
|
+
return nil if length == 0
|
221
|
+
delete first
|
222
|
+
end
|
223
|
+
|
224
|
+
###
|
225
|
+
# Equality -- Two NodeSets are equal if the contain the same number
|
226
|
+
# of elements and if each element is equal to the corresponding
|
227
|
+
# element in the other NodeSet
|
228
|
+
def == other
|
229
|
+
return false unless other.is_a?(Nokogiri::XML::NodeSet)
|
230
|
+
return false unless length == other.length
|
231
|
+
each_with_index do |node, i|
|
232
|
+
return false unless node == other[i]
|
233
|
+
end
|
234
|
+
true
|
235
|
+
end
|
186
236
|
end
|
187
237
|
end
|
188
238
|
end
|
@@ -0,0 +1,80 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
###
|
4
|
+
# Parse options for passing to Nokogiri.XML or Nokogiri.HTML
|
5
|
+
class ParseOptions
|
6
|
+
# Strict parsing
|
7
|
+
STRICT = 0
|
8
|
+
# Recover from errors
|
9
|
+
RECOVER = 1 << 0
|
10
|
+
# Substitute entities
|
11
|
+
NOENT = 1 << 1
|
12
|
+
# Load external subsets
|
13
|
+
DTDLOAD = 1 << 2
|
14
|
+
# Default DTD attributes
|
15
|
+
DTDATTR = 1 << 3
|
16
|
+
# validate with the DTD
|
17
|
+
DTDVALID = 1 << 4
|
18
|
+
# suppress error reports
|
19
|
+
NOERROR = 1 << 5
|
20
|
+
# suppress warning reports
|
21
|
+
NOWARNING = 1 << 6
|
22
|
+
# pedantic error reporting
|
23
|
+
PEDANTIC = 1 << 7
|
24
|
+
# remove blank nodes
|
25
|
+
NOBLANKS = 1 << 8
|
26
|
+
# use the SAX1 interface internally
|
27
|
+
SAX1 = 1 << 9
|
28
|
+
# Implement XInclude substitition
|
29
|
+
XINCLUDE = 1 << 10
|
30
|
+
# Forbid network access
|
31
|
+
NONET = 1 << 11
|
32
|
+
# Do not reuse the context dictionnary
|
33
|
+
NODICT = 1 << 12
|
34
|
+
# remove redundant namespaces declarations
|
35
|
+
NSCLEAN = 1 << 13
|
36
|
+
# merge CDATA as text nodes
|
37
|
+
NOCDATA = 1 << 14
|
38
|
+
# do not generate XINCLUDE START/END nodes
|
39
|
+
NOXINCNODE = 1 << 15
|
40
|
+
|
41
|
+
attr_accessor :options
|
42
|
+
def initialize options = 0
|
43
|
+
@options = options
|
44
|
+
end
|
45
|
+
|
46
|
+
constants.each do |constant|
|
47
|
+
next if constant == 'STRICT'
|
48
|
+
class_eval %{
|
49
|
+
def #{constant.downcase}
|
50
|
+
@options |= #{constant}
|
51
|
+
self
|
52
|
+
end
|
53
|
+
|
54
|
+
def #{constant.downcase}?
|
55
|
+
#{constant} & @options == #{constant}
|
56
|
+
end
|
57
|
+
}
|
58
|
+
end
|
59
|
+
|
60
|
+
def strict
|
61
|
+
@options |= STRICT
|
62
|
+
self
|
63
|
+
end
|
64
|
+
|
65
|
+
def strict?
|
66
|
+
@options & RECOVER == STRICT
|
67
|
+
end
|
68
|
+
|
69
|
+
alias :to_i :options
|
70
|
+
|
71
|
+
def inspect
|
72
|
+
options = []
|
73
|
+
self.class.constants.each do |k|
|
74
|
+
options << k.downcase if send(:"#{k.downcase}?")
|
75
|
+
end
|
76
|
+
super.sub(/>$/, " " + options.join(', ') + ">")
|
77
|
+
end
|
78
|
+
end
|
79
|
+
end
|
80
|
+
end
|
data/lib/nokogiri/xml/reader.rb
CHANGED
@@ -1,27 +1,66 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
|
+
###
|
4
|
+
# Nokogiri::XML::Reader parses an XML document similar to the way a cursor
|
5
|
+
# would move. The Reader is given an XML document, and yields nodes
|
6
|
+
# to an each block.
|
7
|
+
#
|
8
|
+
# Here is an example of usage:
|
9
|
+
#
|
10
|
+
# reader = Nokogiri::XML::Reader(<<-eoxml)
|
11
|
+
# <x xmlns:tenderlove='http://tenderlovemaking.com/'>
|
12
|
+
# <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
|
13
|
+
# </x>
|
14
|
+
# eoxml
|
15
|
+
#
|
16
|
+
# reader.each do |node|
|
17
|
+
#
|
18
|
+
# # node is an instance of Nokogiri::XML::Reader
|
19
|
+
# puts node.name
|
20
|
+
#
|
21
|
+
# end
|
22
|
+
#
|
23
|
+
# Note that Nokogiri::XML::Reader#each can only be called once!! Once
|
24
|
+
# the cursor moves through the entire document, you must parse the
|
25
|
+
# document again. So make sure that you capture any information you
|
26
|
+
# need during the first iteration.
|
27
|
+
#
|
28
|
+
# The Reader parser is good for when you need the speed of a SAX parser,
|
29
|
+
# but do not want to write a Document handler.
|
3
30
|
class Reader
|
4
31
|
include Enumerable
|
32
|
+
|
33
|
+
# A list of errors encountered while parsing
|
5
34
|
attr_accessor :errors
|
35
|
+
|
36
|
+
# The encoding for the document
|
6
37
|
attr_reader :encoding
|
7
38
|
|
8
|
-
|
9
|
-
|
39
|
+
# The XML source
|
40
|
+
attr_reader :source
|
41
|
+
|
42
|
+
def initialize source, url = nil, encoding = nil # :nodoc:
|
43
|
+
@source = source
|
44
|
+
@errors = []
|
10
45
|
@encoding = encoding
|
11
46
|
end
|
47
|
+
private :initialize
|
12
48
|
|
49
|
+
###
|
50
|
+
# Get a list of attributes for the current node.
|
13
51
|
def attributes
|
14
52
|
Hash[*(attribute_nodes.map { |node|
|
15
53
|
[node.name, node.to_s]
|
16
54
|
}.flatten)].merge(namespaces || {})
|
17
55
|
end
|
18
56
|
|
57
|
+
###
|
58
|
+
# Move the cursor through the document yielding each node to the block
|
19
59
|
def each(&block)
|
20
60
|
while node = self.read
|
21
61
|
block.call(node)
|
22
62
|
end
|
23
63
|
end
|
24
|
-
private :initialize
|
25
64
|
end
|
26
65
|
end
|
27
66
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
module Nokogiri
|
2
|
+
module XML
|
3
|
+
class << self
|
4
|
+
###
|
5
|
+
# Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
|
6
|
+
# See Nokogiri::XML::RelaxNG for an example.
|
7
|
+
def RelaxNG string_or_io
|
8
|
+
RelaxNG.new(string_or_io)
|
9
|
+
end
|
10
|
+
end
|
11
|
+
|
12
|
+
###
|
13
|
+
# Nokogiri::XML::RelaxNG is used for validating XML against a
|
14
|
+
# RelaxNG schema.
|
15
|
+
#
|
16
|
+
# == Synopsis
|
17
|
+
#
|
18
|
+
# Validate an XML document against a RelaxNG schema. Loop over the errors
|
19
|
+
# that are returned and print them out:
|
20
|
+
#
|
21
|
+
# schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
|
22
|
+
# doc = Nokogiri::XML(File.read(ADDRESS_XML_FILE))
|
23
|
+
#
|
24
|
+
# schema.validate(doc).each do |error|
|
25
|
+
# puts error.message
|
26
|
+
# end
|
27
|
+
#
|
28
|
+
# The list of errors are Nokogiri::XML::SyntaxError objects.
|
29
|
+
class RelaxNG < Nokogiri::XML::Schema
|
30
|
+
end
|
31
|
+
end
|
32
|
+
end
|
data/lib/nokogiri/xml/sax.rb
CHANGED
@@ -1,6 +1,72 @@
|
|
1
1
|
module Nokogiri
|
2
2
|
module XML
|
3
|
+
###
|
4
|
+
# SAX Parsers are event driven parsers. Nokogiri provides two different
|
5
|
+
# event based parsers when dealing with XML. If you want to do SAX style
|
6
|
+
# parsing using HTML, check out Nokogiri::HTML::SAX.
|
7
|
+
#
|
8
|
+
# The basic way a SAX style parser works is by creating a parser,
|
9
|
+
# telling the parser about the events we're interested in, then giving
|
10
|
+
# the parser some XML to process. The parser will notify you when
|
11
|
+
# it encounters events your said you would like to know about.
|
12
|
+
#
|
13
|
+
# To register for events, you simply subclass Nokogiri::XML::SAX::Document,
|
14
|
+
# and implement the methods for which you would like notification.
|
15
|
+
#
|
16
|
+
# For example, if I want to be notified when a document ends, and when an
|
17
|
+
# element starts, I would write a class like this:
|
18
|
+
#
|
19
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
20
|
+
# def end_document
|
21
|
+
# puts "the document has ended"
|
22
|
+
# end
|
23
|
+
#
|
24
|
+
# def start_element name, attributes = []
|
25
|
+
# puts "#{name} started"
|
26
|
+
# end
|
27
|
+
# end
|
28
|
+
#
|
29
|
+
# Then I would instantiate a SAX parser with this document, and feed the
|
30
|
+
# parser some XML
|
31
|
+
#
|
32
|
+
# # Create a new parser
|
33
|
+
# parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
|
34
|
+
#
|
35
|
+
# # Feed the parser some XML
|
36
|
+
# parser.parse(File.read(ARGV[0], 'rb'))
|
37
|
+
#
|
38
|
+
# Now my document handler will be called when each node starts, and when
|
39
|
+
# then document ends. To see what kinds of events are available, take
|
40
|
+
# a look at Nokogiri::XML::SAX::Document.
|
41
|
+
#
|
42
|
+
# Two SAX parsers for XML are available, a parser that reads from a string
|
43
|
+
# or IO object as it feels necessary, and a parser that lets you spoon
|
44
|
+
# feed it XML. If you want to let Nokogiri deal with reading your XML,
|
45
|
+
# use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
|
46
|
+
# control over the XML input, use the Nokogiri::XML::SAX::PushParser.
|
3
47
|
module SAX
|
48
|
+
###
|
49
|
+
# This class is used for registering types of events you are interested
|
50
|
+
# in handling. All of the methods on this class are available as
|
51
|
+
# possible events while parsing an XML document. To register for any
|
52
|
+
# particular event, just subclass this class and implement the methods
|
53
|
+
# you are interested in knowing about.
|
54
|
+
#
|
55
|
+
# To only be notified about start and end element events, write a class
|
56
|
+
# like this:
|
57
|
+
#
|
58
|
+
# class MyDocument < Nokogiri::XML::SAX::Document
|
59
|
+
# def start_element name, attrs = []
|
60
|
+
# puts "#{name} started!"
|
61
|
+
# end
|
62
|
+
#
|
63
|
+
# def end_element name
|
64
|
+
# puts "#{name} ended"
|
65
|
+
# end
|
66
|
+
# end
|
67
|
+
#
|
68
|
+
# You can use this event handler for any SAX style parser included with
|
69
|
+
# Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
|
4
70
|
class Document
|
5
71
|
###
|
6
72
|
# Called when document starts parsing
|
@@ -24,6 +90,24 @@ module Nokogiri
|
|
24
90
|
def end_element name
|
25
91
|
end
|
26
92
|
|
93
|
+
###
|
94
|
+
# Called at the beginning of an element
|
95
|
+
# +name+ is the element name
|
96
|
+
# +attrs+ is a hash of attributes
|
97
|
+
# +prefix+ is the namespace prefix for the element
|
98
|
+
# +uri+ is the associated namespace URI
|
99
|
+
# +namespaces+ is a hash of namespace prefix:urls associated with the element
|
100
|
+
def start_element_ns(name, attrs = {}, prefix = nil, uri = nil, namespaces = {})
|
101
|
+
end
|
102
|
+
|
103
|
+
###
|
104
|
+
# Called at the end of an element
|
105
|
+
# +name+ is the element's name
|
106
|
+
# +prefix+ is the namespace prefix associated with the element
|
107
|
+
# +uri+ is the associated namespace URI
|
108
|
+
def end_element_ns(name, prefix = nil, uri = nil)
|
109
|
+
end
|
110
|
+
|
27
111
|
###
|
28
112
|
# Characters read between a tag
|
29
113
|
# +string+ contains the character data
|