superfeedr-nokogiri 1.4.0.20091116183308

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (270) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +330 -0
  3. data/CHANGELOG.rdoc +314 -0
  4. data/Manifest.txt +269 -0
  5. data/README.ja.rdoc +105 -0
  6. data/README.rdoc +118 -0
  7. data/Rakefile +244 -0
  8. data/bin/nokogiri +49 -0
  9. data/ext/nokogiri/extconf.rb +145 -0
  10. data/ext/nokogiri/html_document.c +145 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +32 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  17. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  18. data/ext/nokogiri/nokogiri.c +89 -0
  19. data/ext/nokogiri/nokogiri.h +145 -0
  20. data/ext/nokogiri/xml_attr.c +92 -0
  21. data/ext/nokogiri/xml_attr.h +9 -0
  22. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  23. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  24. data/ext/nokogiri/xml_cdata.c +54 -0
  25. data/ext/nokogiri/xml_cdata.h +9 -0
  26. data/ext/nokogiri/xml_comment.c +52 -0
  27. data/ext/nokogiri/xml_comment.h +9 -0
  28. data/ext/nokogiri/xml_document.c +388 -0
  29. data/ext/nokogiri/xml_document.h +24 -0
  30. data/ext/nokogiri/xml_document_fragment.c +46 -0
  31. data/ext/nokogiri/xml_document_fragment.h +10 -0
  32. data/ext/nokogiri/xml_dtd.c +192 -0
  33. data/ext/nokogiri/xml_dtd.h +10 -0
  34. data/ext/nokogiri/xml_element_content.c +123 -0
  35. data/ext/nokogiri/xml_element_content.h +10 -0
  36. data/ext/nokogiri/xml_element_decl.c +69 -0
  37. data/ext/nokogiri/xml_element_decl.h +9 -0
  38. data/ext/nokogiri/xml_entity_decl.c +97 -0
  39. data/ext/nokogiri/xml_entity_decl.h +10 -0
  40. data/ext/nokogiri/xml_entity_reference.c +50 -0
  41. data/ext/nokogiri/xml_entity_reference.h +9 -0
  42. data/ext/nokogiri/xml_io.c +31 -0
  43. data/ext/nokogiri/xml_io.h +11 -0
  44. data/ext/nokogiri/xml_namespace.c +74 -0
  45. data/ext/nokogiri/xml_namespace.h +12 -0
  46. data/ext/nokogiri/xml_node.c +1060 -0
  47. data/ext/nokogiri/xml_node.h +13 -0
  48. data/ext/nokogiri/xml_node_set.c +397 -0
  49. data/ext/nokogiri/xml_node_set.h +9 -0
  50. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  51. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  52. data/ext/nokogiri/xml_reader.c +593 -0
  53. data/ext/nokogiri/xml_reader.h +10 -0
  54. data/ext/nokogiri/xml_relax_ng.c +159 -0
  55. data/ext/nokogiri/xml_relax_ng.h +9 -0
  56. data/ext/nokogiri/xml_sax_parser.c +286 -0
  57. data/ext/nokogiri/xml_sax_parser.h +43 -0
  58. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  59. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  60. data/ext/nokogiri/xml_sax_push_parser.c +114 -0
  61. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  62. data/ext/nokogiri/xml_schema.c +156 -0
  63. data/ext/nokogiri/xml_schema.h +9 -0
  64. data/ext/nokogiri/xml_syntax_error.c +261 -0
  65. data/ext/nokogiri/xml_syntax_error.h +13 -0
  66. data/ext/nokogiri/xml_text.c +48 -0
  67. data/ext/nokogiri/xml_text.h +9 -0
  68. data/ext/nokogiri/xml_xpath.c +53 -0
  69. data/ext/nokogiri/xml_xpath.h +11 -0
  70. data/ext/nokogiri/xml_xpath_context.c +239 -0
  71. data/ext/nokogiri/xml_xpath_context.h +9 -0
  72. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  73. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  74. data/lib/nokogiri.rb +116 -0
  75. data/lib/nokogiri/css.rb +25 -0
  76. data/lib/nokogiri/css/generated_parser.rb +646 -0
  77. data/lib/nokogiri/css/generated_tokenizer.rb +142 -0
  78. data/lib/nokogiri/css/node.rb +99 -0
  79. data/lib/nokogiri/css/parser.rb +82 -0
  80. data/lib/nokogiri/css/parser.y +227 -0
  81. data/lib/nokogiri/css/syntax_error.rb +7 -0
  82. data/lib/nokogiri/css/tokenizer.rb +7 -0
  83. data/lib/nokogiri/css/tokenizer.rex +54 -0
  84. data/lib/nokogiri/css/xpath_visitor.rb +162 -0
  85. data/lib/nokogiri/decorators/slop.rb +33 -0
  86. data/lib/nokogiri/ffi/html/document.rb +28 -0
  87. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  88. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  89. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  90. data/lib/nokogiri/ffi/io_callbacks.rb +42 -0
  91. data/lib/nokogiri/ffi/libxml.rb +356 -0
  92. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  93. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  94. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  95. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  96. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  97. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  98. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  100. data/lib/nokogiri/ffi/structs/xml_dtd.rb +28 -0
  101. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  102. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  103. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  104. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  105. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  106. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  107. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  108. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  109. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  110. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  111. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  112. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +15 -0
  113. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  114. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  115. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  116. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  117. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  118. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  119. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  120. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  121. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  122. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  123. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  124. data/lib/nokogiri/ffi/xml/document.rb +135 -0
  125. data/lib/nokogiri/ffi/xml/document_fragment.rb +21 -0
  126. data/lib/nokogiri/ffi/xml/dtd.rb +69 -0
  127. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  128. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  129. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  130. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  131. data/lib/nokogiri/ffi/xml/namespace.rb +44 -0
  132. data/lib/nokogiri/ffi/xml/node.rb +444 -0
  133. data/lib/nokogiri/ffi/xml/node_set.rb +133 -0
  134. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  135. data/lib/nokogiri/ffi/xml/reader.rb +227 -0
  136. data/lib/nokogiri/ffi/xml/relax_ng.rb +85 -0
  137. data/lib/nokogiri/ffi/xml/sax/parser.rb +142 -0
  138. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  139. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +39 -0
  140. data/lib/nokogiri/ffi/xml/schema.rb +92 -0
  141. data/lib/nokogiri/ffi/xml/syntax_error.rb +91 -0
  142. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  143. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  144. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  145. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  146. data/lib/nokogiri/html.rb +35 -0
  147. data/lib/nokogiri/html/builder.rb +35 -0
  148. data/lib/nokogiri/html/document.rb +88 -0
  149. data/lib/nokogiri/html/document_fragment.rb +15 -0
  150. data/lib/nokogiri/html/element_description.rb +23 -0
  151. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  152. data/lib/nokogiri/html/sax/parser.rb +48 -0
  153. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  154. data/lib/nokogiri/syntax_error.rb +4 -0
  155. data/lib/nokogiri/version.rb +33 -0
  156. data/lib/nokogiri/version_warning.rb +11 -0
  157. data/lib/nokogiri/xml.rb +67 -0
  158. data/lib/nokogiri/xml/attr.rb +14 -0
  159. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  160. data/lib/nokogiri/xml/builder.rb +405 -0
  161. data/lib/nokogiri/xml/cdata.rb +11 -0
  162. data/lib/nokogiri/xml/character_data.rb +7 -0
  163. data/lib/nokogiri/xml/document.rb +131 -0
  164. data/lib/nokogiri/xml/document_fragment.rb +69 -0
  165. data/lib/nokogiri/xml/dtd.rb +11 -0
  166. data/lib/nokogiri/xml/element_content.rb +36 -0
  167. data/lib/nokogiri/xml/element_decl.rb +13 -0
  168. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  169. data/lib/nokogiri/xml/fragment_handler.rb +71 -0
  170. data/lib/nokogiri/xml/namespace.rb +13 -0
  171. data/lib/nokogiri/xml/node.rb +665 -0
  172. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  173. data/lib/nokogiri/xml/node_set.rb +307 -0
  174. data/lib/nokogiri/xml/notation.rb +6 -0
  175. data/lib/nokogiri/xml/parse_options.rb +85 -0
  176. data/lib/nokogiri/xml/pp.rb +2 -0
  177. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  178. data/lib/nokogiri/xml/pp/node.rb +56 -0
  179. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  180. data/lib/nokogiri/xml/reader.rb +74 -0
  181. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  182. data/lib/nokogiri/xml/sax.rb +4 -0
  183. data/lib/nokogiri/xml/sax/document.rb +160 -0
  184. data/lib/nokogiri/xml/sax/parser.rb +115 -0
  185. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  186. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  187. data/lib/nokogiri/xml/schema.rb +61 -0
  188. data/lib/nokogiri/xml/syntax_error.rb +38 -0
  189. data/lib/nokogiri/xml/xpath.rb +10 -0
  190. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  191. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  192. data/lib/nokogiri/xslt.rb +48 -0
  193. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  194. data/lib/xsd/xmlparser/nokogiri.rb +71 -0
  195. data/tasks/test.rb +100 -0
  196. data/test/css/test_nthiness.rb +159 -0
  197. data/test/css/test_parser.rb +277 -0
  198. data/test/css/test_tokenizer.rb +183 -0
  199. data/test/css/test_xpath_visitor.rb +76 -0
  200. data/test/ffi/test_document.rb +35 -0
  201. data/test/files/2ch.html +108 -0
  202. data/test/files/address_book.rlx +12 -0
  203. data/test/files/address_book.xml +10 -0
  204. data/test/files/bar/bar.xsd +4 -0
  205. data/test/files/dont_hurt_em_why.xml +422 -0
  206. data/test/files/exslt.xml +8 -0
  207. data/test/files/exslt.xslt +35 -0
  208. data/test/files/foo/foo.xsd +4 -0
  209. data/test/files/po.xml +32 -0
  210. data/test/files/po.xsd +66 -0
  211. data/test/files/shift_jis.html +10 -0
  212. data/test/files/shift_jis.xml +5 -0
  213. data/test/files/snuggles.xml +3 -0
  214. data/test/files/staff.dtd +10 -0
  215. data/test/files/staff.xml +59 -0
  216. data/test/files/staff.xslt +32 -0
  217. data/test/files/tlm.html +850 -0
  218. data/test/files/valid_bar.xml +2 -0
  219. data/test/helper.rb +136 -0
  220. data/test/html/sax/test_parser.rb +64 -0
  221. data/test/html/sax/test_parser_context.rb +48 -0
  222. data/test/html/test_builder.rb +164 -0
  223. data/test/html/test_document.rb +390 -0
  224. data/test/html/test_document_encoding.rb +77 -0
  225. data/test/html/test_document_fragment.rb +132 -0
  226. data/test/html/test_element_description.rb +94 -0
  227. data/test/html/test_named_characters.rb +14 -0
  228. data/test/html/test_node.rb +228 -0
  229. data/test/html/test_node_encoding.rb +27 -0
  230. data/test/test_convert_xpath.rb +135 -0
  231. data/test/test_css_cache.rb +45 -0
  232. data/test/test_gc.rb +15 -0
  233. data/test/test_memory_leak.rb +77 -0
  234. data/test/test_nokogiri.rb +134 -0
  235. data/test/test_reader.rb +358 -0
  236. data/test/test_xslt_transforms.rb +131 -0
  237. data/test/xml/node/test_save_options.rb +20 -0
  238. data/test/xml/node/test_subclass.rb +44 -0
  239. data/test/xml/sax/test_parser.rb +307 -0
  240. data/test/xml/sax/test_parser_context.rb +56 -0
  241. data/test/xml/sax/test_push_parser.rb +131 -0
  242. data/test/xml/test_attr.rb +38 -0
  243. data/test/xml/test_attribute_decl.rb +82 -0
  244. data/test/xml/test_builder.rb +167 -0
  245. data/test/xml/test_cdata.rb +38 -0
  246. data/test/xml/test_comment.rb +29 -0
  247. data/test/xml/test_document.rb +607 -0
  248. data/test/xml/test_document_encoding.rb +26 -0
  249. data/test/xml/test_document_fragment.rb +138 -0
  250. data/test/xml/test_dtd.rb +82 -0
  251. data/test/xml/test_dtd_encoding.rb +33 -0
  252. data/test/xml/test_element_content.rb +56 -0
  253. data/test/xml/test_element_decl.rb +73 -0
  254. data/test/xml/test_entity_decl.rb +83 -0
  255. data/test/xml/test_entity_reference.rb +21 -0
  256. data/test/xml/test_namespace.rb +68 -0
  257. data/test/xml/test_node.rb +889 -0
  258. data/test/xml/test_node_attributes.rb +34 -0
  259. data/test/xml/test_node_encoding.rb +107 -0
  260. data/test/xml/test_node_set.rb +531 -0
  261. data/test/xml/test_parse_options.rb +52 -0
  262. data/test/xml/test_processing_instruction.rb +30 -0
  263. data/test/xml/test_reader_encoding.rb +126 -0
  264. data/test/xml/test_relax_ng.rb +60 -0
  265. data/test/xml/test_schema.rb +89 -0
  266. data/test/xml/test_syntax_error.rb +27 -0
  267. data/test/xml/test_text.rb +30 -0
  268. data/test/xml/test_unparented_node.rb +381 -0
  269. data/test/xml/test_xpath.rb +106 -0
  270. metadata +430 -0
@@ -0,0 +1,56 @@
1
+ module Nokogiri
2
+ module XML
3
+ module PP
4
+ module Node
5
+ def inspect # :nodoc:
6
+ attributes = inspect_attributes.reject { |x|
7
+ begin
8
+ attribute = send x
9
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
10
+ rescue NoMethodError
11
+ true
12
+ end
13
+ }.map { |attribute|
14
+ "#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
15
+ }.join ' '
16
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
17
+ end
18
+
19
+ def pretty_print pp # :nodoc:
20
+ nice_name = self.class.name.split('::').last
21
+ pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
22
+
23
+ pp.breakable
24
+ attrs = inspect_attributes.map { |t|
25
+ [t, send(t)] if respond_to?(t)
26
+ }.compact.find_all { |x|
27
+ if x.last
28
+ if [:attribute_nodes, :children].include? x.first
29
+ !x.last.empty?
30
+ else
31
+ true
32
+ end
33
+ end
34
+ }
35
+
36
+ pp.seplist(attrs) do |v|
37
+ if [:attribute_nodes, :children].include? v.first
38
+ pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
39
+ pp.breakable
40
+ pp.seplist(v.last) do |item|
41
+ pp.pp item
42
+ end
43
+ end
44
+ else
45
+ pp.text "#{v.first} = "
46
+ pp.pp v.last
47
+ end
48
+ end
49
+ pp.breakable
50
+
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,8 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ProcessingInstruction < Node
4
+ def initialize document, name, content
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,74 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
5
+ # would move. The Reader is given an XML document, and yields nodes
6
+ # to an each block.
7
+ #
8
+ # Here is an example of usage:
9
+ #
10
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
11
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
12
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
13
+ # </x>
14
+ # eoxml
15
+ #
16
+ # reader.each do |node|
17
+ #
18
+ # # node is an instance of Nokogiri::XML::Reader
19
+ # puts node.name
20
+ #
21
+ # end
22
+ #
23
+ # Note that Nokogiri::XML::Reader#each can only be called once!! Once
24
+ # the cursor moves through the entire document, you must parse the
25
+ # document again. So make sure that you capture any information you
26
+ # need during the first iteration.
27
+ #
28
+ # The Reader parser is good for when you need the speed of a SAX parser,
29
+ # but do not want to write a Document handler.
30
+ class Reader
31
+ include Enumerable
32
+
33
+ # A list of errors encountered while parsing
34
+ attr_accessor :errors
35
+
36
+ # The encoding for the document
37
+ attr_reader :encoding
38
+
39
+ # The XML source
40
+ attr_reader :source
41
+
42
+ def initialize source, url = nil, encoding = nil # :nodoc:
43
+ @source = source
44
+ @errors = []
45
+ @encoding = encoding
46
+ end
47
+ private :initialize
48
+
49
+ ###
50
+ # Get a list of attributes for the current node.
51
+ def attributes
52
+ Hash[*(attribute_nodes.map { |node|
53
+ [node.name, node.to_s]
54
+ }.flatten)].merge(namespaces || {})
55
+ end
56
+
57
+ ###
58
+ # Get a list of attributes for the current node
59
+ def attribute_nodes
60
+ nodes = attr_nodes
61
+ nodes.each { |v| v.instance_variable_set(:@_r, self) }
62
+ nodes
63
+ end
64
+
65
+ ###
66
+ # Move the cursor through the document yielding each node to the block
67
+ def each(&block)
68
+ while node = self.read
69
+ block.call(node)
70
+ end
71
+ end
72
+ end
73
+ end
74
+ end
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class << self
4
+ ###
5
+ # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
+ # See Nokogiri::XML::RelaxNG for an example.
7
+ def RelaxNG string_or_io
8
+ RelaxNG.new(string_or_io)
9
+ end
10
+ end
11
+
12
+ ###
13
+ # Nokogiri::XML::RelaxNG is used for validating XML against a
14
+ # RelaxNG schema.
15
+ #
16
+ # == Synopsis
17
+ #
18
+ # Validate an XML document against a RelaxNG schema. Loop over the errors
19
+ # that are returned and print them out:
20
+ #
21
+ # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
22
+ # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
23
+ #
24
+ # schema.validate(doc).each do |error|
25
+ # puts error.message
26
+ # end
27
+ #
28
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
29
+ class RelaxNG < Nokogiri::XML::Schema
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,4 @@
1
+ require 'nokogiri/xml/sax/document'
2
+ require 'nokogiri/xml/sax/parser_context'
3
+ require 'nokogiri/xml/sax/parser'
4
+ require 'nokogiri/xml/sax/push_parser'
@@ -0,0 +1,160 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # SAX Parsers are event driven parsers. Nokogiri provides two different
5
+ # event based parsers when dealing with XML. If you want to do SAX style
6
+ # parsing using HTML, check out Nokogiri::HTML::SAX.
7
+ #
8
+ # The basic way a SAX style parser works is by creating a parser,
9
+ # telling the parser about the events we're interested in, then giving
10
+ # the parser some XML to process. The parser will notify you when
11
+ # it encounters events your said you would like to know about.
12
+ #
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
14
+ # and implement the methods for which you would like notification.
15
+ #
16
+ # For example, if I want to be notified when a document ends, and when an
17
+ # element starts, I would write a class like this:
18
+ #
19
+ # class MyDocument < Nokogiri::XML::SAX::Document
20
+ # def end_document
21
+ # puts "the document has ended"
22
+ # end
23
+ #
24
+ # def start_element name, attributes = []
25
+ # puts "#{name} started"
26
+ # end
27
+ # end
28
+ #
29
+ # Then I would instantiate a SAX parser with this document, and feed the
30
+ # parser some XML
31
+ #
32
+ # # Create a new parser
33
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
34
+ #
35
+ # # Feed the parser some XML
36
+ # parser.parse(File.read(ARGV[0], 'rb'))
37
+ #
38
+ # Now my document handler will be called when each node starts, and when
39
+ # then document ends. To see what kinds of events are available, take
40
+ # a look at Nokogiri::XML::SAX::Document.
41
+ #
42
+ # Two SAX parsers for XML are available, a parser that reads from a string
43
+ # or IO object as it feels necessary, and a parser that lets you spoon
44
+ # feed it XML. If you want to let Nokogiri deal with reading your XML,
45
+ # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
46
+ # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
47
+ module SAX
48
+ ###
49
+ # This class is used for registering types of events you are interested
50
+ # in handling. All of the methods on this class are available as
51
+ # possible events while parsing an XML document. To register for any
52
+ # particular event, just subclass this class and implement the methods
53
+ # you are interested in knowing about.
54
+ #
55
+ # To only be notified about start and end element events, write a class
56
+ # like this:
57
+ #
58
+ # class MyDocument < Nokogiri::XML::SAX::Document
59
+ # def start_element name, attrs = []
60
+ # puts "#{name} started!"
61
+ # end
62
+ #
63
+ # def end_element name
64
+ # puts "#{name} ended"
65
+ # end
66
+ # end
67
+ #
68
+ # You can use this event handler for any SAX style parser included with
69
+ # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
70
+ class Document
71
+ ###
72
+ # Called when an XML declaration is parsed
73
+ def xmldecl version, encoding, standalone
74
+ end
75
+
76
+ ###
77
+ # Called when document starts parsing
78
+ def start_document
79
+ end
80
+
81
+ ###
82
+ # Called when document ends parsing
83
+ def end_document
84
+ end
85
+
86
+ ###
87
+ # Called at the beginning of an element
88
+ # +name+ is the name of the tag with +attrs+ as attributes
89
+ def start_element name, attrs = []
90
+ end
91
+
92
+ ###
93
+ # Called at the end of an element
94
+ # +name+ is the tag name
95
+ def end_element name
96
+ end
97
+
98
+ ###
99
+ # Called at the beginning of an element
100
+ # +name+ is the element name
101
+ # +attrs+ is a list of attributes
102
+ # +prefix+ is the namespace prefix for the element
103
+ # +uri+ is the associated namespace URI
104
+ # +ns+ is a hash of namespace prefix:urls associated with the element
105
+ def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
106
+ ###
107
+ # Deal with SAX v1 interface
108
+ name = [prefix, name].compact.join(':')
109
+ attributes = ns.map { |ns_prefix,ns_uri|
110
+ [['xmlns', ns_prefix].compact.join(':'), ns_uri]
111
+ } + attrs.map { |attr|
112
+ [[attr.prefix, attr.localname].compact.join(':'), attr.value]
113
+ }.flatten
114
+ start_element name, attributes
115
+ end
116
+
117
+ ###
118
+ # Called at the end of an element
119
+ # +name+ is the element's name
120
+ # +prefix+ is the namespace prefix associated with the element
121
+ # +uri+ is the associated namespace URI
122
+ def end_element_namespace name, prefix = nil, uri = nil
123
+ ###
124
+ # Deal with SAX v1 interface
125
+ end_element [prefix, name].compact.join(':')
126
+ end
127
+
128
+ ###
129
+ # Characters read between a tag
130
+ # +string+ contains the character data
131
+ def characters string
132
+ end
133
+
134
+ ###
135
+ # Called when comments are encountered
136
+ # +string+ contains the comment data
137
+ def comment string
138
+ end
139
+
140
+ ###
141
+ # Called on document warnings
142
+ # +string+ contains the warning
143
+ def warning string
144
+ end
145
+
146
+ ###
147
+ # Called on document errors
148
+ # +string+ contains the error
149
+ def error string
150
+ end
151
+
152
+ ###
153
+ # Called when cdata blocks are found
154
+ # +string+ contains the cdata content
155
+ def cdata_block string
156
+ end
157
+ end
158
+ end
159
+ end
160
+ end
@@ -0,0 +1,115 @@
1
+ module Nokogiri
2
+ module XML
3
+ module SAX
4
+ ###
5
+ # This parser is a SAX style parser that reads it's input as it
6
+ # deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
7
+ # an optional encoding, then given an XML input, sends messages to
8
+ # the Nokogiri::XML::SAX::Document.
9
+ #
10
+ # Here is an example of using this parser:
11
+ #
12
+ # # Create a subclass of Nokogiri::XML::SAX::Document and implement
13
+ # # the events we care about:
14
+ # class MyDoc < Nokogiri::XML::SAX::Document
15
+ # def start_element name, attrs = []
16
+ # puts "starting: #{name}"
17
+ # end
18
+ #
19
+ # def end_element name
20
+ # puts "ending: #{name}"
21
+ # end
22
+ # end
23
+ #
24
+ # # Create our parser
25
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
26
+ #
27
+ # # Send some XML to the parser
28
+ # parser.parse(File.read(ARGV[0]))
29
+ #
30
+ # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
31
+ # see Nokogiri::XML::SAX::Document for the available events.
32
+ class Parser
33
+ class Attribute < Struct.new(:localname, :prefix, :uri, :value)
34
+ end
35
+
36
+ # Encodinds this parser supports
37
+ ENCODINGS = {
38
+ 'NONE' => 0, # No char encoding detected
39
+ 'UTF-8' => 1, # UTF-8
40
+ 'UTF16LE' => 2, # UTF-16 little endian
41
+ 'UTF16BE' => 3, # UTF-16 big endian
42
+ 'UCS4LE' => 4, # UCS-4 little endian
43
+ 'UCS4BE' => 5, # UCS-4 big endian
44
+ 'EBCDIC' => 6, # EBCDIC uh!
45
+ 'UCS4-2143' => 7, # UCS-4 unusual ordering
46
+ 'UCS4-3412' => 8, # UCS-4 unusual ordering
47
+ 'UCS2' => 9, # UCS-2
48
+ 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
49
+ 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
50
+ 'ISO-8859-3' => 12, # ISO-8859-3
51
+ 'ISO-8859-4' => 13, # ISO-8859-4
52
+ 'ISO-8859-5' => 14, # ISO-8859-5
53
+ 'ISO-8859-6' => 15, # ISO-8859-6
54
+ 'ISO-8859-7' => 16, # ISO-8859-7
55
+ 'ISO-8859-8' => 17, # ISO-8859-8
56
+ 'ISO-8859-9' => 18, # ISO-8859-9
57
+ 'ISO-2022-JP' => 19, # ISO-2022-JP
58
+ 'SHIFT-JIS' => 20, # Shift_JIS
59
+ 'EUC-JP' => 21, # EUC-JP
60
+ 'ASCII' => 22, # pure ASCII
61
+ }
62
+
63
+ # The Nokogiri::XML::SAX::Document where events will be sent.
64
+ attr_accessor :document
65
+
66
+ # The encoding beings used for this document.
67
+ attr_accessor :encoding
68
+
69
+ # Create a new Parser with +doc+ and +encoding+
70
+ def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
71
+ @encoding = encoding
72
+ @document = doc
73
+ @warned = false
74
+ end
75
+
76
+ ###
77
+ # Parse given +thing+ which may be a string containing xml, or an
78
+ # IO object.
79
+ def parse thing, &block
80
+ if thing.respond_to?(:read) && thing.respond_to?(:close)
81
+ parse_io(thing, &block)
82
+ else
83
+ parse_memory(thing, &block)
84
+ end
85
+ end
86
+
87
+ ###
88
+ # Parse given +io+
89
+ def parse_io io, encoding = 'ASCII'
90
+ @encoding = encoding
91
+ ctx = ParserContext.io(io, ENCODINGS[encoding])
92
+ yield ctx if block_given?
93
+ ctx.parse_with self
94
+ end
95
+
96
+ ###
97
+ # Parse a file with +filename+
98
+ def parse_file filename
99
+ raise ArgumentError unless filename
100
+ raise Errno::ENOENT unless File.exists?(filename)
101
+ raise Errno::EISDIR if File.directory?(filename)
102
+ ctx = ParserContext.file filename
103
+ yield ctx if block_given?
104
+ ctx.parse_with self
105
+ end
106
+
107
+ def parse_memory data
108
+ ctx = ParserContext.memory data
109
+ yield ctx if block_given?
110
+ ctx.parse_with self
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end