nokogiri 1.2.3 → 1.3.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (200) hide show
  1. data/.autotest +14 -2
  2. data/CHANGELOG.ja.rdoc +38 -0
  3. data/CHANGELOG.rdoc +43 -0
  4. data/Manifest.txt +80 -5
  5. data/README.ja.rdoc +12 -11
  6. data/README.rdoc +4 -2
  7. data/Rakefile +103 -173
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +19 -13
  10. data/ext/nokogiri/html_document.c +39 -3
  11. data/ext/nokogiri/html_document.h +1 -1
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.h +1 -1
  15. data/ext/nokogiri/html_sax_parser.h +1 -1
  16. data/ext/nokogiri/{native.c → nokogiri.c} +11 -3
  17. data/ext/nokogiri/{native.h → nokogiri.h} +18 -4
  18. data/ext/nokogiri/xml_attr.c +14 -5
  19. data/ext/nokogiri/xml_attr.h +1 -1
  20. data/ext/nokogiri/xml_cdata.c +15 -6
  21. data/ext/nokogiri/xml_cdata.h +1 -1
  22. data/ext/nokogiri/xml_comment.c +13 -4
  23. data/ext/nokogiri/xml_comment.h +1 -1
  24. data/ext/nokogiri/xml_document.c +50 -41
  25. data/ext/nokogiri/xml_document.h +1 -1
  26. data/ext/nokogiri/xml_document_fragment.c +12 -4
  27. data/ext/nokogiri/xml_document_fragment.h +1 -1
  28. data/ext/nokogiri/xml_dtd.c +1 -1
  29. data/ext/nokogiri/xml_dtd.h +1 -1
  30. data/ext/nokogiri/xml_entity_reference.c +13 -4
  31. data/ext/nokogiri/xml_entity_reference.h +1 -1
  32. data/ext/nokogiri/xml_io.h +1 -1
  33. data/ext/nokogiri/xml_namespace.c +69 -0
  34. data/ext/nokogiri/xml_namespace.h +12 -0
  35. data/ext/nokogiri/xml_node.c +232 -124
  36. data/ext/nokogiri/xml_node.h +3 -4
  37. data/ext/nokogiri/xml_node_set.c +206 -19
  38. data/ext/nokogiri/xml_node_set.h +1 -1
  39. data/ext/nokogiri/xml_processing_instruction.c +14 -4
  40. data/ext/nokogiri/xml_processing_instruction.h +1 -1
  41. data/ext/nokogiri/xml_reader.c +87 -7
  42. data/ext/nokogiri/xml_reader.h +1 -1
  43. data/ext/nokogiri/xml_relax_ng.c +106 -0
  44. data/ext/nokogiri/xml_relax_ng.h +9 -0
  45. data/ext/nokogiri/xml_sax_parser.c +122 -2
  46. data/ext/nokogiri/xml_sax_parser.h +1 -1
  47. data/ext/nokogiri/xml_sax_push_parser.c +1 -0
  48. data/ext/nokogiri/xml_sax_push_parser.h +1 -1
  49. data/ext/nokogiri/xml_schema.c +107 -0
  50. data/ext/nokogiri/xml_schema.h +9 -0
  51. data/ext/nokogiri/xml_syntax_error.h +1 -1
  52. data/ext/nokogiri/xml_text.c +10 -3
  53. data/ext/nokogiri/xml_text.h +1 -1
  54. data/ext/nokogiri/xml_xpath.h +1 -1
  55. data/ext/nokogiri/xml_xpath_context.h +1 -1
  56. data/ext/nokogiri/xslt_stylesheet.c +29 -16
  57. data/ext/nokogiri/xslt_stylesheet.h +1 -1
  58. data/lib/action-nokogiri.rb +7 -1
  59. data/lib/nokogiri.rb +21 -5
  60. data/lib/nokogiri/css/generated_parser.rb +49 -14
  61. data/lib/nokogiri/css/generated_tokenizer.rb +2 -2
  62. data/lib/nokogiri/css/node.rb +13 -3
  63. data/lib/nokogiri/css/parser.rb +8 -0
  64. data/lib/nokogiri/css/parser.y +7 -7
  65. data/lib/nokogiri/css/tokenizer.rb +2 -0
  66. data/lib/nokogiri/css/xpath_visitor.rb +10 -6
  67. data/lib/nokogiri/decorators/hpricot/node.rb +1 -1
  68. data/lib/nokogiri/decorators/hpricot/node_set.rb +2 -2
  69. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +2 -0
  70. data/lib/nokogiri/decorators/slop.rb +3 -1
  71. data/lib/nokogiri/ffi/html/document.rb +37 -0
  72. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  73. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  74. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  75. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  76. data/lib/nokogiri/ffi/libxml.rb +314 -0
  77. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  78. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  79. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  80. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  81. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  82. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  83. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  84. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  85. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  86. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  87. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  88. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  89. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  90. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  91. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  92. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  93. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  94. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  95. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  96. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  97. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  98. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  99. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  100. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  101. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  102. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  103. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  104. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  105. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  106. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  107. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  108. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  109. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  110. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  111. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  112. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  113. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  114. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  115. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  116. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  117. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  118. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  119. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  120. data/lib/nokogiri/hpricot.rb +14 -3
  121. data/lib/nokogiri/html.rb +11 -46
  122. data/lib/nokogiri/html/builder.rb +27 -1
  123. data/lib/nokogiri/html/document.rb +62 -6
  124. data/lib/nokogiri/html/document_fragment.rb +15 -0
  125. data/lib/nokogiri/html/element_description.rb +23 -0
  126. data/lib/nokogiri/html/entity_lookup.rb +2 -0
  127. data/lib/nokogiri/html/sax/parser.rb +27 -1
  128. data/lib/nokogiri/version.rb +26 -1
  129. data/lib/nokogiri/version_warning.rb +11 -0
  130. data/lib/nokogiri/xml.rb +25 -51
  131. data/lib/nokogiri/xml/builder.rb +166 -10
  132. data/lib/nokogiri/xml/cdata.rb +3 -1
  133. data/lib/nokogiri/xml/document.rb +39 -6
  134. data/lib/nokogiri/xml/document_fragment.rb +41 -1
  135. data/lib/nokogiri/xml/dtd.rb +3 -1
  136. data/lib/nokogiri/xml/entity_declaration.rb +3 -1
  137. data/lib/nokogiri/xml/fragment_handler.rb +24 -3
  138. data/lib/nokogiri/xml/namespace.rb +7 -0
  139. data/lib/nokogiri/xml/node.rb +314 -65
  140. data/lib/nokogiri/xml/node/save_options.rb +12 -2
  141. data/lib/nokogiri/xml/node_set.rb +58 -8
  142. data/lib/nokogiri/xml/parse_options.rb +80 -0
  143. data/lib/nokogiri/xml/processing_instruction.rb +2 -0
  144. data/lib/nokogiri/xml/reader.rb +42 -3
  145. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  146. data/lib/nokogiri/xml/sax.rb +0 -7
  147. data/lib/nokogiri/xml/sax/document.rb +84 -0
  148. data/lib/nokogiri/xml/sax/parser.rb +38 -2
  149. data/lib/nokogiri/xml/sax/push_parser.rb +12 -0
  150. data/lib/nokogiri/xml/schema.rb +65 -0
  151. data/lib/nokogiri/xml/syntax_error.rb +11 -0
  152. data/lib/nokogiri/xml/xpath.rb +1 -1
  153. data/lib/nokogiri/xml/xpath_context.rb +2 -0
  154. data/lib/nokogiri/xslt.rb +21 -1
  155. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  156. data/lib/xsd/xmlparser/nokogiri.rb +12 -2
  157. data/tasks/test.rb +42 -19
  158. data/test/css/test_parser.rb +29 -0
  159. data/test/ffi/test_document.rb +35 -0
  160. data/test/files/address_book.rlx +12 -0
  161. data/test/files/address_book.xml +10 -0
  162. data/test/files/po.xml +32 -0
  163. data/test/files/po.xsd +66 -0
  164. data/test/helper.rb +38 -8
  165. data/test/html/sax/test_parser.rb +12 -0
  166. data/test/html/test_builder.rb +25 -2
  167. data/test/html/test_document.rb +91 -20
  168. data/test/html/test_document_fragment.rb +97 -0
  169. data/test/html/test_element_description.rb +95 -0
  170. data/test/html/test_node.rb +66 -3
  171. data/test/test_convert_xpath.rb +1 -1
  172. data/test/test_memory_leak.rb +57 -18
  173. data/test/test_nokogiri.rb +24 -2
  174. data/test/test_reader.rb +77 -0
  175. data/test/test_xslt_transforms.rb +120 -82
  176. data/test/xml/node/test_subclass.rb +44 -0
  177. data/test/xml/sax/test_parser.rb +9 -0
  178. data/test/xml/sax/test_push_parser.rb +24 -0
  179. data/test/xml/test_attr.rb +7 -0
  180. data/test/xml/test_builder.rb +48 -0
  181. data/test/xml/test_cdata.rb +19 -0
  182. data/test/xml/test_comment.rb +6 -0
  183. data/test/xml/test_document.rb +101 -2
  184. data/test/xml/test_document_fragment.rb +55 -3
  185. data/test/xml/test_entity_reference.rb +4 -0
  186. data/test/xml/test_namespace.rb +43 -0
  187. data/test/xml/test_node.rb +255 -8
  188. data/test/xml/test_node_attributes.rb +34 -0
  189. data/test/xml/test_node_encoding.rb +9 -2
  190. data/test/xml/test_node_set.rb +197 -1
  191. data/test/xml/test_parse_options.rb +52 -0
  192. data/test/xml/test_processing_instruction.rb +5 -0
  193. data/test/xml/test_relax_ng.rb +60 -0
  194. data/test/xml/test_schema.rb +65 -0
  195. data/test/xml/test_text.rb +5 -0
  196. data/test/xml/test_unparented_node.rb +3 -3
  197. metadata +128 -12
  198. data/lib/nokogiri/xml/comment.rb +0 -6
  199. data/lib/nokogiri/xml/element.rb +0 -6
  200. data/lib/nokogiri/xml/text.rb +0 -6
@@ -4,15 +4,25 @@ module Nokogiri
4
4
  ###
5
5
  # Save options for serializing nodes
6
6
  class SaveOptions
7
- FORMAT = 1 # Format serialized xml
8
- NO_DECLARATION = 2 # Do not include delcarations
7
+ # Format serialized xml
8
+ FORMAT = 1
9
+ # Do not include delcarations
10
+ NO_DECLARATION = 2
11
+ # Do not include empty tags
9
12
  NO_EMPTY_TAGS = 4
13
+ # Do not save XHTML
10
14
  NO_XHTML = 8
15
+ # Save as XHTML
11
16
  AS_XHTML = 16
17
+ # Save as XML
12
18
  AS_XML = 32
19
+ # Save as HTML
13
20
  AS_HTML = 64
14
21
 
22
+ # Integer representation of the SaveOptions
15
23
  attr_reader :options
24
+
25
+ # Create a new SaveOptions object with +options+
16
26
  def initialize options = 0; @options = options; end
17
27
  constants.each do |constant|
18
28
  class_eval %{
@@ -19,8 +19,13 @@ module Nokogiri
19
19
 
20
20
  ###
21
21
  # Get the first element of the NodeSet.
22
- def first
23
- self[0]
22
+ def first n = nil
23
+ return self[0] unless n
24
+ list = []
25
+ 0.upto(n - 1) do |i|
26
+ list << self[i]
27
+ end
28
+ list
24
29
  end
25
30
 
26
31
  ###
@@ -35,6 +40,13 @@ module Nokogiri
35
40
  length == 0
36
41
  end
37
42
 
43
+ ###
44
+ # Returns the index of the first node in self that is == to +node+. Returns nil if no match is found.
45
+ def index(node)
46
+ each_with_index { |member, j| return j if member == node }
47
+ nil
48
+ end
49
+
38
50
  ###
39
51
  # Insert +datum+ before the first Node in this NodeSet
40
52
  def before datum
@@ -77,6 +89,7 @@ module Nokogiri
77
89
  return self[path] if path.is_a?(Numeric)
78
90
  search(path, ns).first
79
91
  end
92
+ alias :% :at
80
93
 
81
94
  ###
82
95
  # Append the class attribute +name+ to all Node objects in the NodeSet.
@@ -112,8 +125,8 @@ module Nokogiri
112
125
  each do |el|
113
126
  el.set_attribute(key, value || blk[el])
114
127
  end
115
- return self
116
- end
128
+ return self
129
+ end
117
130
  if key.is_a? Hash
118
131
  key.each { |k,v| self.attr(k,v) }
119
132
  return self
@@ -130,7 +143,7 @@ module Nokogiri
130
143
  next unless el.respond_to? :remove_attribute
131
144
  el.remove_attribute(name)
132
145
  end
133
- self
146
+ self
134
147
  end
135
148
 
136
149
  ###
@@ -165,24 +178,61 @@ module Nokogiri
165
178
  self
166
179
  end
167
180
 
181
+ ###
182
+ # Convert this NodeSet to a string.
168
183
  def to_s
169
184
  map { |x| x.to_s }.join
170
185
  end
171
186
 
187
+ ###
188
+ # Convert this NodeSet to HTML
172
189
  def to_html *args
173
- map { |x| x.to_html(*args) }.join('')
190
+ map { |x| x.to_html(*args) }.join
174
191
  end
175
192
 
193
+ ###
194
+ # Convert this NodeSet to XHTML
176
195
  def to_xhtml *args
177
- map { |x| x.to_xhtml(*args) }.join('')
196
+ map { |x| x.to_xhtml(*args) }.join
178
197
  end
179
198
 
199
+ ###
200
+ # Convert this NodeSet to XML
180
201
  def to_xml *args
181
- map { |x| x.to_xml(*args) }.join('')
202
+ map { |x| x.to_xml(*args) }.join
182
203
  end
183
204
 
184
205
  alias :size :length
185
206
  alias :to_ary :to_a
207
+
208
+ ###
209
+ # Removes the last element from set and returns it, or +nil+ if
210
+ # the set is empty
211
+ def pop
212
+ return nil if length == 0
213
+ delete last
214
+ end
215
+
216
+ ###
217
+ # Returns the first element of the NodeSet and removes it. Returns
218
+ # +nil+ if the set is empty.
219
+ def shift
220
+ return nil if length == 0
221
+ delete first
222
+ end
223
+
224
+ ###
225
+ # Equality -- Two NodeSets are equal if the contain the same number
226
+ # of elements and if each element is equal to the corresponding
227
+ # element in the other NodeSet
228
+ def == other
229
+ return false unless other.is_a?(Nokogiri::XML::NodeSet)
230
+ return false unless length == other.length
231
+ each_with_index do |node, i|
232
+ return false unless node == other[i]
233
+ end
234
+ true
235
+ end
186
236
  end
187
237
  end
188
238
  end
@@ -0,0 +1,80 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Parse options for passing to Nokogiri.XML or Nokogiri.HTML
5
+ class ParseOptions
6
+ # Strict parsing
7
+ STRICT = 0
8
+ # Recover from errors
9
+ RECOVER = 1 << 0
10
+ # Substitute entities
11
+ NOENT = 1 << 1
12
+ # Load external subsets
13
+ DTDLOAD = 1 << 2
14
+ # Default DTD attributes
15
+ DTDATTR = 1 << 3
16
+ # validate with the DTD
17
+ DTDVALID = 1 << 4
18
+ # suppress error reports
19
+ NOERROR = 1 << 5
20
+ # suppress warning reports
21
+ NOWARNING = 1 << 6
22
+ # pedantic error reporting
23
+ PEDANTIC = 1 << 7
24
+ # remove blank nodes
25
+ NOBLANKS = 1 << 8
26
+ # use the SAX1 interface internally
27
+ SAX1 = 1 << 9
28
+ # Implement XInclude substitition
29
+ XINCLUDE = 1 << 10
30
+ # Forbid network access
31
+ NONET = 1 << 11
32
+ # Do not reuse the context dictionnary
33
+ NODICT = 1 << 12
34
+ # remove redundant namespaces declarations
35
+ NSCLEAN = 1 << 13
36
+ # merge CDATA as text nodes
37
+ NOCDATA = 1 << 14
38
+ # do not generate XINCLUDE START/END nodes
39
+ NOXINCNODE = 1 << 15
40
+
41
+ attr_accessor :options
42
+ def initialize options = 0
43
+ @options = options
44
+ end
45
+
46
+ constants.each do |constant|
47
+ next if constant == 'STRICT'
48
+ class_eval %{
49
+ def #{constant.downcase}
50
+ @options |= #{constant}
51
+ self
52
+ end
53
+
54
+ def #{constant.downcase}?
55
+ #{constant} & @options == #{constant}
56
+ end
57
+ }
58
+ end
59
+
60
+ def strict
61
+ @options |= STRICT
62
+ self
63
+ end
64
+
65
+ def strict?
66
+ @options & RECOVER == STRICT
67
+ end
68
+
69
+ alias :to_i :options
70
+
71
+ def inspect
72
+ options = []
73
+ self.class.constants.each do |k|
74
+ options << k.downcase if send(:"#{k.downcase}?")
75
+ end
76
+ super.sub(/>$/, " " + options.join(', ') + ">")
77
+ end
78
+ end
79
+ end
80
+ end
@@ -1,6 +1,8 @@
1
1
  module Nokogiri
2
2
  module XML
3
3
  class ProcessingInstruction < Node
4
+ def initialize document, name, content
5
+ end
4
6
  end
5
7
  end
6
8
  end
@@ -1,27 +1,66 @@
1
1
  module Nokogiri
2
2
  module XML
3
+ ###
4
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
5
+ # would move. The Reader is given an XML document, and yields nodes
6
+ # to an each block.
7
+ #
8
+ # Here is an example of usage:
9
+ #
10
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
11
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
12
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
13
+ # </x>
14
+ # eoxml
15
+ #
16
+ # reader.each do |node|
17
+ #
18
+ # # node is an instance of Nokogiri::XML::Reader
19
+ # puts node.name
20
+ #
21
+ # end
22
+ #
23
+ # Note that Nokogiri::XML::Reader#each can only be called once!! Once
24
+ # the cursor moves through the entire document, you must parse the
25
+ # document again. So make sure that you capture any information you
26
+ # need during the first iteration.
27
+ #
28
+ # The Reader parser is good for when you need the speed of a SAX parser,
29
+ # but do not want to write a Document handler.
3
30
  class Reader
4
31
  include Enumerable
32
+
33
+ # A list of errors encountered while parsing
5
34
  attr_accessor :errors
35
+
36
+ # The encoding for the document
6
37
  attr_reader :encoding
7
38
 
8
- def initialize url = nil, encoding = nil
9
- @errors = []
39
+ # The XML source
40
+ attr_reader :source
41
+
42
+ def initialize source, url = nil, encoding = nil # :nodoc:
43
+ @source = source
44
+ @errors = []
10
45
  @encoding = encoding
11
46
  end
47
+ private :initialize
12
48
 
49
+ ###
50
+ # Get a list of attributes for the current node.
13
51
  def attributes
14
52
  Hash[*(attribute_nodes.map { |node|
15
53
  [node.name, node.to_s]
16
54
  }.flatten)].merge(namespaces || {})
17
55
  end
18
56
 
57
+ ###
58
+ # Move the cursor through the document yielding each node to the block
19
59
  def each(&block)
20
60
  while node = self.read
21
61
  block.call(node)
22
62
  end
23
63
  end
24
- private :initialize
25
64
  end
26
65
  end
27
66
  end
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class << self
4
+ ###
5
+ # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
+ # See Nokogiri::XML::RelaxNG for an example.
7
+ def RelaxNG string_or_io
8
+ RelaxNG.new(string_or_io)
9
+ end
10
+ end
11
+
12
+ ###
13
+ # Nokogiri::XML::RelaxNG is used for validating XML against a
14
+ # RelaxNG schema.
15
+ #
16
+ # == Synopsis
17
+ #
18
+ # Validate an XML document against a RelaxNG schema. Loop over the errors
19
+ # that are returned and print them out:
20
+ #
21
+ # schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
22
+ # doc = Nokogiri::XML(File.read(ADDRESS_XML_FILE))
23
+ #
24
+ # schema.validate(doc).each do |error|
25
+ # puts error.message
26
+ # end
27
+ #
28
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
29
+ class RelaxNG < Nokogiri::XML::Schema
30
+ end
31
+ end
32
+ end
@@ -1,10 +1,3 @@
1
1
  require 'nokogiri/xml/sax/document'
2
2
  require 'nokogiri/xml/sax/parser'
3
3
  require 'nokogiri/xml/sax/push_parser'
4
-
5
- module Nokogiri
6
- module XML
7
- module SAX
8
- end
9
- end
10
- end
@@ -1,6 +1,72 @@
1
1
  module Nokogiri
2
2
  module XML
3
+ ###
4
+ # SAX Parsers are event driven parsers. Nokogiri provides two different
5
+ # event based parsers when dealing with XML. If you want to do SAX style
6
+ # parsing using HTML, check out Nokogiri::HTML::SAX.
7
+ #
8
+ # The basic way a SAX style parser works is by creating a parser,
9
+ # telling the parser about the events we're interested in, then giving
10
+ # the parser some XML to process. The parser will notify you when
11
+ # it encounters events your said you would like to know about.
12
+ #
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
14
+ # and implement the methods for which you would like notification.
15
+ #
16
+ # For example, if I want to be notified when a document ends, and when an
17
+ # element starts, I would write a class like this:
18
+ #
19
+ # class MyDocument < Nokogiri::XML::SAX::Document
20
+ # def end_document
21
+ # puts "the document has ended"
22
+ # end
23
+ #
24
+ # def start_element name, attributes = []
25
+ # puts "#{name} started"
26
+ # end
27
+ # end
28
+ #
29
+ # Then I would instantiate a SAX parser with this document, and feed the
30
+ # parser some XML
31
+ #
32
+ # # Create a new parser
33
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
34
+ #
35
+ # # Feed the parser some XML
36
+ # parser.parse(File.read(ARGV[0], 'rb'))
37
+ #
38
+ # Now my document handler will be called when each node starts, and when
39
+ # then document ends. To see what kinds of events are available, take
40
+ # a look at Nokogiri::XML::SAX::Document.
41
+ #
42
+ # Two SAX parsers for XML are available, a parser that reads from a string
43
+ # or IO object as it feels necessary, and a parser that lets you spoon
44
+ # feed it XML. If you want to let Nokogiri deal with reading your XML,
45
+ # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
46
+ # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
3
47
  module SAX
48
+ ###
49
+ # This class is used for registering types of events you are interested
50
+ # in handling. All of the methods on this class are available as
51
+ # possible events while parsing an XML document. To register for any
52
+ # particular event, just subclass this class and implement the methods
53
+ # you are interested in knowing about.
54
+ #
55
+ # To only be notified about start and end element events, write a class
56
+ # like this:
57
+ #
58
+ # class MyDocument < Nokogiri::XML::SAX::Document
59
+ # def start_element name, attrs = []
60
+ # puts "#{name} started!"
61
+ # end
62
+ #
63
+ # def end_element name
64
+ # puts "#{name} ended"
65
+ # end
66
+ # end
67
+ #
68
+ # You can use this event handler for any SAX style parser included with
69
+ # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
4
70
  class Document
5
71
  ###
6
72
  # Called when document starts parsing
@@ -24,6 +90,24 @@ module Nokogiri
24
90
  def end_element name
25
91
  end
26
92
 
93
+ ###
94
+ # Called at the beginning of an element
95
+ # +name+ is the element name
96
+ # +attrs+ is a hash of attributes
97
+ # +prefix+ is the namespace prefix for the element
98
+ # +uri+ is the associated namespace URI
99
+ # +namespaces+ is a hash of namespace prefix:urls associated with the element
100
+ def start_element_ns(name, attrs = {}, prefix = nil, uri = nil, namespaces = {})
101
+ end
102
+
103
+ ###
104
+ # Called at the end of an element
105
+ # +name+ is the element's name
106
+ # +prefix+ is the namespace prefix associated with the element
107
+ # +uri+ is the associated namespace URI
108
+ def end_element_ns(name, prefix = nil, uri = nil)
109
+ end
110
+
27
111
  ###
28
112
  # Characters read between a tag
29
113
  # +string+ contains the character data