nokogiri 1.6.2.rc1-x64-mingw32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (263) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.editorconfig +17 -0
  4. data/.gemtest +0 -0
  5. data/.travis.yml +25 -0
  6. data/CHANGELOG.ja.rdoc +857 -0
  7. data/CHANGELOG.rdoc +880 -0
  8. data/C_CODING_STYLE.rdoc +33 -0
  9. data/Gemfile +21 -0
  10. data/Manifest.txt +371 -0
  11. data/README.ja.rdoc +112 -0
  12. data/README.rdoc +180 -0
  13. data/ROADMAP.md +89 -0
  14. data/Rakefile +351 -0
  15. data/STANDARD_RESPONSES.md +47 -0
  16. data/Y_U_NO_GEMSPEC.md +155 -0
  17. data/bin/nokogiri +78 -0
  18. data/build_all +130 -0
  19. data/dependencies.yml +4 -0
  20. data/ext/nokogiri/depend +358 -0
  21. data/ext/nokogiri/extconf.rb +453 -0
  22. data/ext/nokogiri/html_document.c +170 -0
  23. data/ext/nokogiri/html_document.h +10 -0
  24. data/ext/nokogiri/html_element_description.c +279 -0
  25. data/ext/nokogiri/html_element_description.h +10 -0
  26. data/ext/nokogiri/html_entity_lookup.c +32 -0
  27. data/ext/nokogiri/html_entity_lookup.h +8 -0
  28. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  29. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  30. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  31. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  32. data/ext/nokogiri/nokogiri.c +148 -0
  33. data/ext/nokogiri/nokogiri.h +164 -0
  34. data/ext/nokogiri/xml_attr.c +94 -0
  35. data/ext/nokogiri/xml_attr.h +9 -0
  36. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  37. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  38. data/ext/nokogiri/xml_cdata.c +56 -0
  39. data/ext/nokogiri/xml_cdata.h +9 -0
  40. data/ext/nokogiri/xml_comment.c +54 -0
  41. data/ext/nokogiri/xml_comment.h +9 -0
  42. data/ext/nokogiri/xml_document.c +577 -0
  43. data/ext/nokogiri/xml_document.h +23 -0
  44. data/ext/nokogiri/xml_document_fragment.c +48 -0
  45. data/ext/nokogiri/xml_document_fragment.h +10 -0
  46. data/ext/nokogiri/xml_dtd.c +202 -0
  47. data/ext/nokogiri/xml_dtd.h +10 -0
  48. data/ext/nokogiri/xml_element_content.c +123 -0
  49. data/ext/nokogiri/xml_element_content.h +10 -0
  50. data/ext/nokogiri/xml_element_decl.c +69 -0
  51. data/ext/nokogiri/xml_element_decl.h +9 -0
  52. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  53. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  54. data/ext/nokogiri/xml_entity_decl.c +110 -0
  55. data/ext/nokogiri/xml_entity_decl.h +10 -0
  56. data/ext/nokogiri/xml_entity_reference.c +52 -0
  57. data/ext/nokogiri/xml_entity_reference.h +9 -0
  58. data/ext/nokogiri/xml_io.c +56 -0
  59. data/ext/nokogiri/xml_io.h +11 -0
  60. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  61. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  62. data/ext/nokogiri/xml_namespace.c +78 -0
  63. data/ext/nokogiri/xml_namespace.h +13 -0
  64. data/ext/nokogiri/xml_node.c +1541 -0
  65. data/ext/nokogiri/xml_node.h +13 -0
  66. data/ext/nokogiri/xml_node_set.c +467 -0
  67. data/ext/nokogiri/xml_node_set.h +14 -0
  68. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  69. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  70. data/ext/nokogiri/xml_reader.c +681 -0
  71. data/ext/nokogiri/xml_reader.h +10 -0
  72. data/ext/nokogiri/xml_relax_ng.c +161 -0
  73. data/ext/nokogiri/xml_relax_ng.h +9 -0
  74. data/ext/nokogiri/xml_sax_parser.c +312 -0
  75. data/ext/nokogiri/xml_sax_parser.h +39 -0
  76. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  77. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  78. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  79. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  80. data/ext/nokogiri/xml_schema.c +205 -0
  81. data/ext/nokogiri/xml_schema.h +9 -0
  82. data/ext/nokogiri/xml_syntax_error.c +63 -0
  83. data/ext/nokogiri/xml_syntax_error.h +13 -0
  84. data/ext/nokogiri/xml_text.c +52 -0
  85. data/ext/nokogiri/xml_text.h +9 -0
  86. data/ext/nokogiri/xml_xpath_context.c +307 -0
  87. data/ext/nokogiri/xml_xpath_context.h +10 -0
  88. data/ext/nokogiri/xslt_stylesheet.c +270 -0
  89. data/ext/nokogiri/xslt_stylesheet.h +14 -0
  90. data/lib/nokogiri.rb +137 -0
  91. data/lib/nokogiri/2.0/nokogiri.so +0 -0
  92. data/lib/nokogiri/2.1/nokogiri.so +0 -0
  93. data/lib/nokogiri/css.rb +27 -0
  94. data/lib/nokogiri/css/node.rb +52 -0
  95. data/lib/nokogiri/css/parser.rb +715 -0
  96. data/lib/nokogiri/css/parser.y +249 -0
  97. data/lib/nokogiri/css/parser_extras.rb +91 -0
  98. data/lib/nokogiri/css/syntax_error.rb +7 -0
  99. data/lib/nokogiri/css/tokenizer.rb +152 -0
  100. data/lib/nokogiri/css/tokenizer.rex +55 -0
  101. data/lib/nokogiri/css/xpath_visitor.rb +219 -0
  102. data/lib/nokogiri/decorators/slop.rb +35 -0
  103. data/lib/nokogiri/html.rb +37 -0
  104. data/lib/nokogiri/html/builder.rb +35 -0
  105. data/lib/nokogiri/html/document.rb +333 -0
  106. data/lib/nokogiri/html/document_fragment.rb +41 -0
  107. data/lib/nokogiri/html/element_description.rb +23 -0
  108. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  109. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  110. data/lib/nokogiri/html/sax/parser.rb +52 -0
  111. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  112. data/lib/nokogiri/html/sax/push_parser.rb +16 -0
  113. data/lib/nokogiri/syntax_error.rb +4 -0
  114. data/lib/nokogiri/version.rb +106 -0
  115. data/lib/nokogiri/xml.rb +73 -0
  116. data/lib/nokogiri/xml/attr.rb +14 -0
  117. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  118. data/lib/nokogiri/xml/builder.rb +443 -0
  119. data/lib/nokogiri/xml/cdata.rb +11 -0
  120. data/lib/nokogiri/xml/character_data.rb +7 -0
  121. data/lib/nokogiri/xml/document.rb +279 -0
  122. data/lib/nokogiri/xml/document_fragment.rb +112 -0
  123. data/lib/nokogiri/xml/dtd.rb +32 -0
  124. data/lib/nokogiri/xml/element_content.rb +36 -0
  125. data/lib/nokogiri/xml/element_decl.rb +13 -0
  126. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  127. data/lib/nokogiri/xml/namespace.rb +13 -0
  128. data/lib/nokogiri/xml/node.rb +982 -0
  129. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  130. data/lib/nokogiri/xml/node_set.rb +355 -0
  131. data/lib/nokogiri/xml/notation.rb +6 -0
  132. data/lib/nokogiri/xml/parse_options.rb +98 -0
  133. data/lib/nokogiri/xml/pp.rb +2 -0
  134. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  135. data/lib/nokogiri/xml/pp/node.rb +56 -0
  136. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  137. data/lib/nokogiri/xml/reader.rb +112 -0
  138. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  139. data/lib/nokogiri/xml/sax.rb +4 -0
  140. data/lib/nokogiri/xml/sax/document.rb +171 -0
  141. data/lib/nokogiri/xml/sax/parser.rb +123 -0
  142. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  143. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  144. data/lib/nokogiri/xml/schema.rb +63 -0
  145. data/lib/nokogiri/xml/syntax_error.rb +47 -0
  146. data/lib/nokogiri/xml/text.rb +9 -0
  147. data/lib/nokogiri/xml/xpath.rb +10 -0
  148. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  149. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  150. data/lib/nokogiri/xslt.rb +56 -0
  151. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/suppressions/README.txt +1 -0
  154. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  155. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  156. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  157. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  158. data/tasks/nokogiri.org.rb +24 -0
  159. data/tasks/test.rb +95 -0
  160. data/test/css/test_nthiness.rb +222 -0
  161. data/test/css/test_parser.rb +358 -0
  162. data/test/css/test_tokenizer.rb +198 -0
  163. data/test/css/test_xpath_visitor.rb +96 -0
  164. data/test/decorators/test_slop.rb +16 -0
  165. data/test/files/2ch.html +108 -0
  166. data/test/files/address_book.rlx +12 -0
  167. data/test/files/address_book.xml +10 -0
  168. data/test/files/atom.xml +344 -0
  169. data/test/files/bar/bar.xsd +4 -0
  170. data/test/files/bogus.xml +0 -0
  171. data/test/files/dont_hurt_em_why.xml +422 -0
  172. data/test/files/encoding.html +82 -0
  173. data/test/files/encoding.xhtml +84 -0
  174. data/test/files/exslt.xml +8 -0
  175. data/test/files/exslt.xslt +35 -0
  176. data/test/files/foo/foo.xsd +4 -0
  177. data/test/files/metacharset.html +10 -0
  178. data/test/files/noencoding.html +47 -0
  179. data/test/files/po.xml +32 -0
  180. data/test/files/po.xsd +66 -0
  181. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  182. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  183. data/test/files/saml/xenc_schema.xsd +146 -0
  184. data/test/files/saml/xmldsig_schema.xsd +318 -0
  185. data/test/files/shift_jis.html +10 -0
  186. data/test/files/shift_jis.xml +5 -0
  187. data/test/files/shift_jis_no_charset.html +9 -0
  188. data/test/files/snuggles.xml +3 -0
  189. data/test/files/staff.dtd +10 -0
  190. data/test/files/staff.xml +59 -0
  191. data/test/files/staff.xslt +32 -0
  192. data/test/files/test_document_url/bar.xml +2 -0
  193. data/test/files/test_document_url/document.dtd +4 -0
  194. data/test/files/test_document_url/document.xml +6 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/files/to_be_xincluded.xml +2 -0
  197. data/test/files/valid_bar.xml +2 -0
  198. data/test/files/xinclude.xml +4 -0
  199. data/test/helper.rb +164 -0
  200. data/test/html/sax/test_parser.rb +141 -0
  201. data/test/html/sax/test_parser_context.rb +46 -0
  202. data/test/html/test_builder.rb +164 -0
  203. data/test/html/test_document.rb +619 -0
  204. data/test/html/test_document_encoding.rb +148 -0
  205. data/test/html/test_document_fragment.rb +261 -0
  206. data/test/html/test_element_description.rb +105 -0
  207. data/test/html/test_named_characters.rb +14 -0
  208. data/test/html/test_node.rb +196 -0
  209. data/test/html/test_node_encoding.rb +27 -0
  210. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  211. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  212. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  213. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  214. data/test/namespaces/test_namespaces_in_parsed_doc.rb +66 -0
  215. data/test/test_convert_xpath.rb +135 -0
  216. data/test/test_css_cache.rb +45 -0
  217. data/test/test_encoding_handler.rb +46 -0
  218. data/test/test_memory_leak.rb +156 -0
  219. data/test/test_nokogiri.rb +138 -0
  220. data/test/test_reader.rb +558 -0
  221. data/test/test_soap4r_sax.rb +52 -0
  222. data/test/test_xslt_transforms.rb +279 -0
  223. data/test/xml/node/test_save_options.rb +28 -0
  224. data/test/xml/node/test_subclass.rb +44 -0
  225. data/test/xml/sax/test_parser.rb +382 -0
  226. data/test/xml/sax/test_parser_context.rb +115 -0
  227. data/test/xml/sax/test_push_parser.rb +157 -0
  228. data/test/xml/test_attr.rb +64 -0
  229. data/test/xml/test_attribute_decl.rb +86 -0
  230. data/test/xml/test_builder.rb +315 -0
  231. data/test/xml/test_c14n.rb +161 -0
  232. data/test/xml/test_cdata.rb +48 -0
  233. data/test/xml/test_comment.rb +29 -0
  234. data/test/xml/test_document.rb +934 -0
  235. data/test/xml/test_document_encoding.rb +28 -0
  236. data/test/xml/test_document_fragment.rb +228 -0
  237. data/test/xml/test_dtd.rb +187 -0
  238. data/test/xml/test_dtd_encoding.rb +33 -0
  239. data/test/xml/test_element_content.rb +56 -0
  240. data/test/xml/test_element_decl.rb +73 -0
  241. data/test/xml/test_entity_decl.rb +122 -0
  242. data/test/xml/test_entity_reference.rb +245 -0
  243. data/test/xml/test_namespace.rb +95 -0
  244. data/test/xml/test_node.rb +1155 -0
  245. data/test/xml/test_node_attributes.rb +113 -0
  246. data/test/xml/test_node_encoding.rb +107 -0
  247. data/test/xml/test_node_inheritance.rb +32 -0
  248. data/test/xml/test_node_reparenting.rb +374 -0
  249. data/test/xml/test_node_set.rb +755 -0
  250. data/test/xml/test_parse_options.rb +64 -0
  251. data/test/xml/test_processing_instruction.rb +30 -0
  252. data/test/xml/test_reader_encoding.rb +142 -0
  253. data/test/xml/test_relax_ng.rb +60 -0
  254. data/test/xml/test_schema.rb +129 -0
  255. data/test/xml/test_syntax_error.rb +12 -0
  256. data/test/xml/test_text.rb +45 -0
  257. data/test/xml/test_unparented_node.rb +422 -0
  258. data/test/xml/test_xinclude.rb +83 -0
  259. data/test/xml/test_xpath.rb +376 -0
  260. data/test/xslt/test_custom_functions.rb +133 -0
  261. data/test/xslt/test_exception_handling.rb +37 -0
  262. data/test_all +81 -0
  263. metadata +601 -0
@@ -0,0 +1,2 @@
1
+ require 'nokogiri/xml/pp/node'
2
+ require 'nokogiri/xml/pp/character_data'
@@ -0,0 +1,18 @@
1
+ module Nokogiri
2
+ module XML
3
+ module PP
4
+ module CharacterData
5
+ def pretty_print pp # :nodoc:
6
+ nice_name = self.class.name.split('::').last
7
+ pp.group(2, "#(#{nice_name} ", ')') do
8
+ pp.pp text
9
+ end
10
+ end
11
+
12
+ def inspect # :nodoc:
13
+ "#<#{self.class.name}:#{sprintf("0x%x",object_id)} #{text.inspect}>"
14
+ end
15
+ end
16
+ end
17
+ end
18
+ end
@@ -0,0 +1,56 @@
1
+ module Nokogiri
2
+ module XML
3
+ module PP
4
+ module Node
5
+ def inspect # :nodoc:
6
+ attributes = inspect_attributes.reject { |x|
7
+ begin
8
+ attribute = send x
9
+ !attribute || (attribute.respond_to?(:empty?) && attribute.empty?)
10
+ rescue NoMethodError
11
+ true
12
+ end
13
+ }.map { |attribute|
14
+ "#{attribute.to_s.sub(/_\w+/, 's')}=#{send(attribute).inspect}"
15
+ }.join ' '
16
+ "#<#{self.class.name}:#{sprintf("0x%x", object_id)} #{attributes}>"
17
+ end
18
+
19
+ def pretty_print pp # :nodoc:
20
+ nice_name = self.class.name.split('::').last
21
+ pp.group(2, "#(#{nice_name}:#{sprintf("0x%x", object_id)} {", '})') do
22
+
23
+ pp.breakable
24
+ attrs = inspect_attributes.map { |t|
25
+ [t, send(t)] if respond_to?(t)
26
+ }.compact.find_all { |x|
27
+ if x.last
28
+ if [:attribute_nodes, :children].include? x.first
29
+ !x.last.empty?
30
+ else
31
+ true
32
+ end
33
+ end
34
+ }
35
+
36
+ pp.seplist(attrs) do |v|
37
+ if [:attribute_nodes, :children].include? v.first
38
+ pp.group(2, "#{v.first.to_s.sub(/_\w+$/, 's')} = [", "]") do
39
+ pp.breakable
40
+ pp.seplist(v.last) do |item|
41
+ pp.pp item
42
+ end
43
+ end
44
+ else
45
+ pp.text "#{v.first} = "
46
+ pp.pp v.last
47
+ end
48
+ end
49
+ pp.breakable
50
+
51
+ end
52
+ end
53
+ end
54
+ end
55
+ end
56
+ end
@@ -0,0 +1,8 @@
1
+ module Nokogiri
2
+ module XML
3
+ class ProcessingInstruction < Node
4
+ def initialize document, name, content
5
+ end
6
+ end
7
+ end
8
+ end
@@ -0,0 +1,112 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # Nokogiri::XML::Reader parses an XML document similar to the way a cursor
5
+ # would move. The Reader is given an XML document, and yields nodes
6
+ # to an each block.
7
+ #
8
+ # Here is an example of usage:
9
+ #
10
+ # reader = Nokogiri::XML::Reader(<<-eoxml)
11
+ # <x xmlns:tenderlove='http://tenderlovemaking.com/'>
12
+ # <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
13
+ # </x>
14
+ # eoxml
15
+ #
16
+ # reader.each do |node|
17
+ #
18
+ # # node is an instance of Nokogiri::XML::Reader
19
+ # puts node.name
20
+ #
21
+ # end
22
+ #
23
+ # Note that Nokogiri::XML::Reader#each can only be called once!! Once
24
+ # the cursor moves through the entire document, you must parse the
25
+ # document again. So make sure that you capture any information you
26
+ # need during the first iteration.
27
+ #
28
+ # The Reader parser is good for when you need the speed of a SAX parser,
29
+ # but do not want to write a Document handler.
30
+ class Reader
31
+ include Enumerable
32
+
33
+ TYPE_NONE = 0
34
+ # Element node type
35
+ TYPE_ELEMENT = 1
36
+ # Attribute node type
37
+ TYPE_ATTRIBUTE = 2
38
+ # Text node type
39
+ TYPE_TEXT = 3
40
+ # CDATA node type
41
+ TYPE_CDATA = 4
42
+ # Entity Reference node type
43
+ TYPE_ENTITY_REFERENCE = 5
44
+ # Entity node type
45
+ TYPE_ENTITY = 6
46
+ # PI node type
47
+ TYPE_PROCESSING_INSTRUCTION = 7
48
+ # Comment node type
49
+ TYPE_COMMENT = 8
50
+ # Document node type
51
+ TYPE_DOCUMENT = 9
52
+ # Document Type node type
53
+ TYPE_DOCUMENT_TYPE = 10
54
+ # Document Fragment node type
55
+ TYPE_DOCUMENT_FRAGMENT = 11
56
+ # Notation node type
57
+ TYPE_NOTATION = 12
58
+ # Whitespace node type
59
+ TYPE_WHITESPACE = 13
60
+ # Significant Whitespace node type
61
+ TYPE_SIGNIFICANT_WHITESPACE = 14
62
+ # Element end node type
63
+ TYPE_END_ELEMENT = 15
64
+ # Entity end node type
65
+ TYPE_END_ENTITY = 16
66
+ # XML Declaration node type
67
+ TYPE_XML_DECLARATION = 17
68
+
69
+ # A list of errors encountered while parsing
70
+ attr_accessor :errors
71
+
72
+ # The encoding for the document
73
+ attr_reader :encoding
74
+
75
+ # The XML source
76
+ attr_reader :source
77
+
78
+ alias :self_closing? :empty_element?
79
+
80
+ def initialize source, url = nil, encoding = nil # :nodoc:
81
+ @source = source
82
+ @errors = []
83
+ @encoding = encoding
84
+ end
85
+ private :initialize
86
+
87
+ ###
88
+ # Get a list of attributes for the current node.
89
+ def attributes
90
+ Hash[attribute_nodes.map { |node|
91
+ [node.name, node.to_s]
92
+ }].merge(namespaces || {})
93
+ end
94
+
95
+ ###
96
+ # Get a list of attributes for the current node
97
+ def attribute_nodes
98
+ nodes = attr_nodes
99
+ nodes.each { |v| v.instance_variable_set(:@_r, self) }
100
+ nodes
101
+ end
102
+
103
+ ###
104
+ # Move the cursor through the document yielding the cursor to the block
105
+ def each
106
+ while cursor = self.read
107
+ yield cursor
108
+ end
109
+ end
110
+ end
111
+ end
112
+ end
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class << self
4
+ ###
5
+ # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
+ # See Nokogiri::XML::RelaxNG for an example.
7
+ def RelaxNG string_or_io
8
+ RelaxNG.new(string_or_io)
9
+ end
10
+ end
11
+
12
+ ###
13
+ # Nokogiri::XML::RelaxNG is used for validating XML against a
14
+ # RelaxNG schema.
15
+ #
16
+ # == Synopsis
17
+ #
18
+ # Validate an XML document against a RelaxNG schema. Loop over the errors
19
+ # that are returned and print them out:
20
+ #
21
+ # schema = Nokogiri::XML::RelaxNG(File.open(ADDRESS_SCHEMA_FILE))
22
+ # doc = Nokogiri::XML(File.open(ADDRESS_XML_FILE))
23
+ #
24
+ # schema.validate(doc).each do |error|
25
+ # puts error.message
26
+ # end
27
+ #
28
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
29
+ class RelaxNG < Nokogiri::XML::Schema
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,4 @@
1
+ require 'nokogiri/xml/sax/document'
2
+ require 'nokogiri/xml/sax/parser_context'
3
+ require 'nokogiri/xml/sax/parser'
4
+ require 'nokogiri/xml/sax/push_parser'
@@ -0,0 +1,171 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # SAX Parsers are event driven parsers. Nokogiri provides two different
5
+ # event based parsers when dealing with XML. If you want to do SAX style
6
+ # parsing using HTML, check out Nokogiri::HTML::SAX.
7
+ #
8
+ # The basic way a SAX style parser works is by creating a parser,
9
+ # telling the parser about the events we're interested in, then giving
10
+ # the parser some XML to process. The parser will notify you when
11
+ # it encounters events your said you would like to know about.
12
+ #
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
14
+ # and implement the methods for which you would like notification.
15
+ #
16
+ # For example, if I want to be notified when a document ends, and when an
17
+ # element starts, I would write a class like this:
18
+ #
19
+ # class MyDocument < Nokogiri::XML::SAX::Document
20
+ # def end_document
21
+ # puts "the document has ended"
22
+ # end
23
+ #
24
+ # def start_element name, attributes = []
25
+ # puts "#{name} started"
26
+ # end
27
+ # end
28
+ #
29
+ # Then I would instantiate a SAX parser with this document, and feed the
30
+ # parser some XML
31
+ #
32
+ # # Create a new parser
33
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
34
+ #
35
+ # # Feed the parser some XML
36
+ # parser.parse(File.open(ARGV[0]))
37
+ #
38
+ # Now my document handler will be called when each node starts, and when
39
+ # then document ends. To see what kinds of events are available, take
40
+ # a look at Nokogiri::XML::SAX::Document.
41
+ #
42
+ # Two SAX parsers for XML are available, a parser that reads from a string
43
+ # or IO object as it feels necessary, and a parser that lets you spoon
44
+ # feed it XML. If you want to let Nokogiri deal with reading your XML,
45
+ # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
46
+ # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
47
+ module SAX
48
+ ###
49
+ # This class is used for registering types of events you are interested
50
+ # in handling. All of the methods on this class are available as
51
+ # possible events while parsing an XML document. To register for any
52
+ # particular event, just subclass this class and implement the methods
53
+ # you are interested in knowing about.
54
+ #
55
+ # To only be notified about start and end element events, write a class
56
+ # like this:
57
+ #
58
+ # class MyDocument < Nokogiri::XML::SAX::Document
59
+ # def start_element name, attrs = []
60
+ # puts "#{name} started!"
61
+ # end
62
+ #
63
+ # def end_element name
64
+ # puts "#{name} ended"
65
+ # end
66
+ # end
67
+ #
68
+ # You can use this event handler for any SAX style parser included with
69
+ # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
70
+ class Document
71
+ ###
72
+ # Called when an XML declaration is parsed
73
+ def xmldecl version, encoding, standalone
74
+ end
75
+
76
+ ###
77
+ # Called when document starts parsing
78
+ def start_document
79
+ end
80
+
81
+ ###
82
+ # Called when document ends parsing
83
+ def end_document
84
+ end
85
+
86
+ ###
87
+ # Called at the beginning of an element
88
+ # * +name+ is the name of the tag
89
+ # * +attrs+ are an assoc list of namespaces and attributes, e.g.:
90
+ # [ ["xmlns:foo", "http://sample.net"], ["size", "large"] ]
91
+ def start_element name, attrs = []
92
+ end
93
+
94
+ ###
95
+ # Called at the end of an element
96
+ # +name+ is the tag name
97
+ def end_element name
98
+ end
99
+
100
+ ###
101
+ # Called at the beginning of an element
102
+ # +name+ is the element name
103
+ # +attrs+ is a list of attributes
104
+ # +prefix+ is the namespace prefix for the element
105
+ # +uri+ is the associated namespace URI
106
+ # +ns+ is a hash of namespace prefix:urls associated with the element
107
+ def start_element_namespace name, attrs = [], prefix = nil, uri = nil, ns = []
108
+ ###
109
+ # Deal with SAX v1 interface
110
+ name = [prefix, name].compact.join(':')
111
+ attributes = ns.map { |ns_prefix,ns_uri|
112
+ [['xmlns', ns_prefix].compact.join(':'), ns_uri]
113
+ } + attrs.map { |attr|
114
+ [[attr.prefix, attr.localname].compact.join(':'), attr.value]
115
+ }
116
+ start_element name, attributes
117
+ end
118
+
119
+ ###
120
+ # Called at the end of an element
121
+ # +name+ is the element's name
122
+ # +prefix+ is the namespace prefix associated with the element
123
+ # +uri+ is the associated namespace URI
124
+ def end_element_namespace name, prefix = nil, uri = nil
125
+ ###
126
+ # Deal with SAX v1 interface
127
+ end_element [prefix, name].compact.join(':')
128
+ end
129
+
130
+ ###
131
+ # Characters read between a tag. This method might be called multiple
132
+ # times given one contiguous string of characters.
133
+ #
134
+ # +string+ contains the character data
135
+ def characters string
136
+ end
137
+
138
+ ###
139
+ # Called when comments are encountered
140
+ # +string+ contains the comment data
141
+ def comment string
142
+ end
143
+
144
+ ###
145
+ # Called on document warnings
146
+ # +string+ contains the warning
147
+ def warning string
148
+ end
149
+
150
+ ###
151
+ # Called on document errors
152
+ # +string+ contains the error
153
+ def error string
154
+ end
155
+
156
+ ###
157
+ # Called when cdata blocks are found
158
+ # +string+ contains the cdata content
159
+ def cdata_block string
160
+ end
161
+
162
+ ###
163
+ # Called when processing instructions are found
164
+ # +name+ is the target of the instruction
165
+ # +content+ is the value of the instruction
166
+ def processing_instruction name, content
167
+ end
168
+ end
169
+ end
170
+ end
171
+ end
@@ -0,0 +1,123 @@
1
+ module Nokogiri
2
+ module XML
3
+ module SAX
4
+ ###
5
+ # This parser is a SAX style parser that reads it's input as it
6
+ # deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
7
+ # an optional encoding, then given an XML input, sends messages to
8
+ # the Nokogiri::XML::SAX::Document.
9
+ #
10
+ # Here is an example of using this parser:
11
+ #
12
+ # # Create a subclass of Nokogiri::XML::SAX::Document and implement
13
+ # # the events we care about:
14
+ # class MyDoc < Nokogiri::XML::SAX::Document
15
+ # def start_element name, attrs = []
16
+ # puts "starting: #{name}"
17
+ # end
18
+ #
19
+ # def end_element name
20
+ # puts "ending: #{name}"
21
+ # end
22
+ # end
23
+ #
24
+ # # Create our parser
25
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
26
+ #
27
+ # # Send some XML to the parser
28
+ # parser.parse(File.open(ARGV[0]))
29
+ #
30
+ # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
31
+ # see Nokogiri::XML::SAX::Document for the available events.
32
+ class Parser
33
+ class Attribute < Struct.new(:localname, :prefix, :uri, :value)
34
+ end
35
+
36
+ # Encodinds this parser supports
37
+ ENCODINGS = {
38
+ 'NONE' => 0, # No char encoding detected
39
+ 'UTF-8' => 1, # UTF-8
40
+ 'UTF16LE' => 2, # UTF-16 little endian
41
+ 'UTF16BE' => 3, # UTF-16 big endian
42
+ 'UCS4LE' => 4, # UCS-4 little endian
43
+ 'UCS4BE' => 5, # UCS-4 big endian
44
+ 'EBCDIC' => 6, # EBCDIC uh!
45
+ 'UCS4-2143' => 7, # UCS-4 unusual ordering
46
+ 'UCS4-3412' => 8, # UCS-4 unusual ordering
47
+ 'UCS2' => 9, # UCS-2
48
+ 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
49
+ 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
50
+ 'ISO-8859-3' => 12, # ISO-8859-3
51
+ 'ISO-8859-4' => 13, # ISO-8859-4
52
+ 'ISO-8859-5' => 14, # ISO-8859-5
53
+ 'ISO-8859-6' => 15, # ISO-8859-6
54
+ 'ISO-8859-7' => 16, # ISO-8859-7
55
+ 'ISO-8859-8' => 17, # ISO-8859-8
56
+ 'ISO-8859-9' => 18, # ISO-8859-9
57
+ 'ISO-2022-JP' => 19, # ISO-2022-JP
58
+ 'SHIFT-JIS' => 20, # Shift_JIS
59
+ 'EUC-JP' => 21, # EUC-JP
60
+ 'ASCII' => 22, # pure ASCII
61
+ }
62
+
63
+ # The Nokogiri::XML::SAX::Document where events will be sent.
64
+ attr_accessor :document
65
+
66
+ # The encoding beings used for this document.
67
+ attr_accessor :encoding
68
+
69
+ # Create a new Parser with +doc+ and +encoding+
70
+ def initialize doc = Nokogiri::XML::SAX::Document.new, encoding = 'UTF-8'
71
+ check_encoding(encoding)
72
+ @encoding = encoding
73
+ @document = doc
74
+ @warned = false
75
+ end
76
+
77
+ ###
78
+ # Parse given +thing+ which may be a string containing xml, or an
79
+ # IO object.
80
+ def parse thing, &block
81
+ if thing.respond_to?(:read) && thing.respond_to?(:close)
82
+ parse_io(thing, &block)
83
+ else
84
+ parse_memory(thing, &block)
85
+ end
86
+ end
87
+
88
+ ###
89
+ # Parse given +io+
90
+ def parse_io io, encoding = 'ASCII'
91
+ check_encoding(encoding)
92
+ @encoding = encoding
93
+ ctx = ParserContext.io(io, ENCODINGS[encoding])
94
+ yield ctx if block_given?
95
+ ctx.parse_with self
96
+ end
97
+
98
+ ###
99
+ # Parse a file with +filename+
100
+ def parse_file filename
101
+ raise ArgumentError unless filename
102
+ raise Errno::ENOENT unless File.exist?(filename)
103
+ raise Errno::EISDIR if File.directory?(filename)
104
+ ctx = ParserContext.file filename
105
+ yield ctx if block_given?
106
+ ctx.parse_with self
107
+ end
108
+
109
+ def parse_memory data
110
+ ctx = ParserContext.memory data
111
+ yield ctx if block_given?
112
+ ctx.parse_with self
113
+ end
114
+
115
+ private
116
+ def check_encoding(encoding)
117
+ encoding.upcase!
118
+ raise ArgumentError.new("'#{encoding}' is not a valid encoding") unless ENCODINGS[encoding]
119
+ end
120
+ end
121
+ end
122
+ end
123
+ end