nokogiri 1.3.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (256) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +233 -0
  3. data/CHANGELOG.rdoc +222 -0
  4. data/Manifest.txt +247 -0
  5. data/README.ja.rdoc +103 -0
  6. data/README.rdoc +117 -0
  7. data/Rakefile +205 -0
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +89 -0
  10. data/ext/nokogiri/html_document.c +183 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +30 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser.c +57 -0
  17. data/ext/nokogiri/html_sax_parser.h +11 -0
  18. data/ext/nokogiri/iconv.dll +0 -0
  19. data/ext/nokogiri/libexslt.dll +0 -0
  20. data/ext/nokogiri/libxml2.dll +0 -0
  21. data/ext/nokogiri/libxslt.dll +0 -0
  22. data/ext/nokogiri/nokogiri.c +81 -0
  23. data/ext/nokogiri/nokogiri.h +149 -0
  24. data/ext/nokogiri/xml_attr.c +92 -0
  25. data/ext/nokogiri/xml_attr.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +53 -0
  27. data/ext/nokogiri/xml_cdata.h +9 -0
  28. data/ext/nokogiri/xml_comment.c +51 -0
  29. data/ext/nokogiri/xml_comment.h +9 -0
  30. data/ext/nokogiri/xml_document.c +308 -0
  31. data/ext/nokogiri/xml_document.h +21 -0
  32. data/ext/nokogiri/xml_document_fragment.c +48 -0
  33. data/ext/nokogiri/xml_document_fragment.h +10 -0
  34. data/ext/nokogiri/xml_dtd.c +102 -0
  35. data/ext/nokogiri/xml_dtd.h +8 -0
  36. data/ext/nokogiri/xml_entity_reference.c +50 -0
  37. data/ext/nokogiri/xml_entity_reference.h +9 -0
  38. data/ext/nokogiri/xml_io.c +24 -0
  39. data/ext/nokogiri/xml_io.h +10 -0
  40. data/ext/nokogiri/xml_namespace.c +69 -0
  41. data/ext/nokogiri/xml_namespace.h +12 -0
  42. data/ext/nokogiri/xml_node.c +928 -0
  43. data/ext/nokogiri/xml_node.h +14 -0
  44. data/ext/nokogiri/xml_node_set.c +386 -0
  45. data/ext/nokogiri/xml_node_set.h +9 -0
  46. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  47. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  48. data/ext/nokogiri/xml_reader.c +572 -0
  49. data/ext/nokogiri/xml_reader.h +10 -0
  50. data/ext/nokogiri/xml_relax_ng.c +106 -0
  51. data/ext/nokogiri/xml_relax_ng.h +9 -0
  52. data/ext/nokogiri/xml_sax_parser.c +336 -0
  53. data/ext/nokogiri/xml_sax_parser.h +10 -0
  54. data/ext/nokogiri/xml_sax_push_parser.c +86 -0
  55. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  56. data/ext/nokogiri/xml_schema.c +107 -0
  57. data/ext/nokogiri/xml_schema.h +9 -0
  58. data/ext/nokogiri/xml_syntax_error.c +203 -0
  59. data/ext/nokogiri/xml_syntax_error.h +12 -0
  60. data/ext/nokogiri/xml_text.c +47 -0
  61. data/ext/nokogiri/xml_text.h +9 -0
  62. data/ext/nokogiri/xml_xpath.c +53 -0
  63. data/ext/nokogiri/xml_xpath.h +11 -0
  64. data/ext/nokogiri/xml_xpath_context.c +252 -0
  65. data/ext/nokogiri/xml_xpath_context.h +9 -0
  66. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  67. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  68. data/ext/nokogiri/zlib1.dll +0 -0
  69. data/lib/action-nokogiri.rb +36 -0
  70. data/lib/nokogiri.rb +110 -0
  71. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  72. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  73. data/lib/nokogiri/css.rb +25 -0
  74. data/lib/nokogiri/css/generated_parser.rb +748 -0
  75. data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
  76. data/lib/nokogiri/css/node.rb +107 -0
  77. data/lib/nokogiri/css/parser.rb +82 -0
  78. data/lib/nokogiri/css/parser.y +227 -0
  79. data/lib/nokogiri/css/syntax_error.rb +7 -0
  80. data/lib/nokogiri/css/tokenizer.rb +11 -0
  81. data/lib/nokogiri/css/tokenizer.rex +54 -0
  82. data/lib/nokogiri/css/xpath_visitor.rb +172 -0
  83. data/lib/nokogiri/decorators.rb +2 -0
  84. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  85. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  86. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  87. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
  88. data/lib/nokogiri/decorators/slop.rb +33 -0
  89. data/lib/nokogiri/ffi/html/document.rb +37 -0
  90. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  91. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  92. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  93. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  94. data/lib/nokogiri/ffi/libxml.rb +314 -0
  95. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  96. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  97. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  98. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  102. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  103. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  105. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  106. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  107. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  108. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  109. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  110. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  111. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  112. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  113. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  114. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  115. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  117. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  118. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  119. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  120. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  121. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  122. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  123. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  124. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  125. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  126. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  127. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  128. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  129. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  130. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  131. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  132. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  133. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  134. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  135. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  136. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  137. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  138. data/lib/nokogiri/hpricot.rb +62 -0
  139. data/lib/nokogiri/html.rb +34 -0
  140. data/lib/nokogiri/html/builder.rb +35 -0
  141. data/lib/nokogiri/html/document.rb +71 -0
  142. data/lib/nokogiri/html/document_fragment.rb +15 -0
  143. data/lib/nokogiri/html/element_description.rb +23 -0
  144. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  145. data/lib/nokogiri/html/sax/parser.rb +47 -0
  146. data/lib/nokogiri/nokogiri.rb +1 -0
  147. data/lib/nokogiri/syntax_error.rb +4 -0
  148. data/lib/nokogiri/version.rb +29 -0
  149. data/lib/nokogiri/version_warning.rb +11 -0
  150. data/lib/nokogiri/xml.rb +62 -0
  151. data/lib/nokogiri/xml/attr.rb +9 -0
  152. data/lib/nokogiri/xml/builder.rb +254 -0
  153. data/lib/nokogiri/xml/cdata.rb +11 -0
  154. data/lib/nokogiri/xml/document.rb +100 -0
  155. data/lib/nokogiri/xml/document_fragment.rb +49 -0
  156. data/lib/nokogiri/xml/dtd.rb +11 -0
  157. data/lib/nokogiri/xml/entity_declaration.rb +11 -0
  158. data/lib/nokogiri/xml/fragment_handler.rb +55 -0
  159. data/lib/nokogiri/xml/namespace.rb +7 -0
  160. data/lib/nokogiri/xml/node.rb +745 -0
  161. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  162. data/lib/nokogiri/xml/node_set.rb +238 -0
  163. data/lib/nokogiri/xml/notation.rb +6 -0
  164. data/lib/nokogiri/xml/parse_options.rb +80 -0
  165. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  166. data/lib/nokogiri/xml/reader.rb +66 -0
  167. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  168. data/lib/nokogiri/xml/sax.rb +3 -0
  169. data/lib/nokogiri/xml/sax/document.rb +143 -0
  170. data/lib/nokogiri/xml/sax/parser.rb +101 -0
  171. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  172. data/lib/nokogiri/xml/schema.rb +65 -0
  173. data/lib/nokogiri/xml/syntax_error.rb +34 -0
  174. data/lib/nokogiri/xml/xpath.rb +10 -0
  175. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  176. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  177. data/lib/nokogiri/xslt.rb +48 -0
  178. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  179. data/lib/xsd/xmlparser/nokogiri.rb +64 -0
  180. data/tasks/test.rb +161 -0
  181. data/test/css/test_nthiness.rb +160 -0
  182. data/test/css/test_parser.rb +277 -0
  183. data/test/css/test_tokenizer.rb +176 -0
  184. data/test/css/test_xpath_visitor.rb +76 -0
  185. data/test/ffi/test_document.rb +35 -0
  186. data/test/files/address_book.rlx +12 -0
  187. data/test/files/address_book.xml +10 -0
  188. data/test/files/dont_hurt_em_why.xml +422 -0
  189. data/test/files/exslt.xml +8 -0
  190. data/test/files/exslt.xslt +35 -0
  191. data/test/files/po.xml +32 -0
  192. data/test/files/po.xsd +66 -0
  193. data/test/files/staff.xml +59 -0
  194. data/test/files/staff.xslt +32 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/helper.rb +123 -0
  197. data/test/hpricot/files/basic.xhtml +17 -0
  198. data/test/hpricot/files/boingboing.html +2266 -0
  199. data/test/hpricot/files/cy0.html +3653 -0
  200. data/test/hpricot/files/immob.html +400 -0
  201. data/test/hpricot/files/pace_application.html +1320 -0
  202. data/test/hpricot/files/tenderlove.html +16 -0
  203. data/test/hpricot/files/uswebgen.html +220 -0
  204. data/test/hpricot/files/utf8.html +1054 -0
  205. data/test/hpricot/files/week9.html +1723 -0
  206. data/test/hpricot/files/why.xml +19 -0
  207. data/test/hpricot/load_files.rb +11 -0
  208. data/test/hpricot/test_alter.rb +68 -0
  209. data/test/hpricot/test_builder.rb +20 -0
  210. data/test/hpricot/test_parser.rb +426 -0
  211. data/test/hpricot/test_paths.rb +15 -0
  212. data/test/hpricot/test_preserved.rb +77 -0
  213. data/test/hpricot/test_xml.rb +30 -0
  214. data/test/html/sax/test_parser.rb +52 -0
  215. data/test/html/test_builder.rb +156 -0
  216. data/test/html/test_document.rb +361 -0
  217. data/test/html/test_document_encoding.rb +46 -0
  218. data/test/html/test_document_fragment.rb +97 -0
  219. data/test/html/test_element_description.rb +95 -0
  220. data/test/html/test_named_characters.rb +14 -0
  221. data/test/html/test_node.rb +165 -0
  222. data/test/test_convert_xpath.rb +186 -0
  223. data/test/test_css_cache.rb +56 -0
  224. data/test/test_gc.rb +15 -0
  225. data/test/test_memory_leak.rb +77 -0
  226. data/test/test_nokogiri.rb +127 -0
  227. data/test/test_reader.rb +316 -0
  228. data/test/test_xslt_transforms.rb +131 -0
  229. data/test/xml/node/test_save_options.rb +20 -0
  230. data/test/xml/node/test_subclass.rb +44 -0
  231. data/test/xml/sax/test_parser.rb +169 -0
  232. data/test/xml/sax/test_push_parser.rb +92 -0
  233. data/test/xml/test_attr.rb +38 -0
  234. data/test/xml/test_builder.rb +73 -0
  235. data/test/xml/test_cdata.rb +38 -0
  236. data/test/xml/test_comment.rb +23 -0
  237. data/test/xml/test_document.rb +397 -0
  238. data/test/xml/test_document_encoding.rb +26 -0
  239. data/test/xml/test_document_fragment.rb +76 -0
  240. data/test/xml/test_dtd.rb +42 -0
  241. data/test/xml/test_dtd_encoding.rb +31 -0
  242. data/test/xml/test_entity_reference.rb +21 -0
  243. data/test/xml/test_namespace.rb +43 -0
  244. data/test/xml/test_node.rb +808 -0
  245. data/test/xml/test_node_attributes.rb +34 -0
  246. data/test/xml/test_node_encoding.rb +84 -0
  247. data/test/xml/test_node_set.rb +368 -0
  248. data/test/xml/test_parse_options.rb +52 -0
  249. data/test/xml/test_processing_instruction.rb +30 -0
  250. data/test/xml/test_reader_encoding.rb +126 -0
  251. data/test/xml/test_relax_ng.rb +60 -0
  252. data/test/xml/test_schema.rb +65 -0
  253. data/test/xml/test_text.rb +18 -0
  254. data/test/xml/test_unparented_node.rb +381 -0
  255. data/test/xml/test_xpath.rb +106 -0
  256. metadata +409 -0
@@ -0,0 +1,32 @@
1
+ module Nokogiri
2
+ module XML
3
+ class << self
4
+ ###
5
+ # Create a new Nokogiri::XML::RelaxNG document from +string_or_io+.
6
+ # See Nokogiri::XML::RelaxNG for an example.
7
+ def RelaxNG string_or_io
8
+ RelaxNG.new(string_or_io)
9
+ end
10
+ end
11
+
12
+ ###
13
+ # Nokogiri::XML::RelaxNG is used for validating XML against a
14
+ # RelaxNG schema.
15
+ #
16
+ # == Synopsis
17
+ #
18
+ # Validate an XML document against a RelaxNG schema. Loop over the errors
19
+ # that are returned and print them out:
20
+ #
21
+ # schema = Nokogiri::XML::RelaxNG(File.read(ADDRESS_SCHEMA_FILE))
22
+ # doc = Nokogiri::XML(File.read(ADDRESS_XML_FILE))
23
+ #
24
+ # schema.validate(doc).each do |error|
25
+ # puts error.message
26
+ # end
27
+ #
28
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
29
+ class RelaxNG < Nokogiri::XML::Schema
30
+ end
31
+ end
32
+ end
@@ -0,0 +1,3 @@
1
+ require 'nokogiri/xml/sax/document'
2
+ require 'nokogiri/xml/sax/parser'
3
+ require 'nokogiri/xml/sax/push_parser'
@@ -0,0 +1,143 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # SAX Parsers are event driven parsers. Nokogiri provides two different
5
+ # event based parsers when dealing with XML. If you want to do SAX style
6
+ # parsing using HTML, check out Nokogiri::HTML::SAX.
7
+ #
8
+ # The basic way a SAX style parser works is by creating a parser,
9
+ # telling the parser about the events we're interested in, then giving
10
+ # the parser some XML to process. The parser will notify you when
11
+ # it encounters events your said you would like to know about.
12
+ #
13
+ # To register for events, you simply subclass Nokogiri::XML::SAX::Document,
14
+ # and implement the methods for which you would like notification.
15
+ #
16
+ # For example, if I want to be notified when a document ends, and when an
17
+ # element starts, I would write a class like this:
18
+ #
19
+ # class MyDocument < Nokogiri::XML::SAX::Document
20
+ # def end_document
21
+ # puts "the document has ended"
22
+ # end
23
+ #
24
+ # def start_element name, attributes = []
25
+ # puts "#{name} started"
26
+ # end
27
+ # end
28
+ #
29
+ # Then I would instantiate a SAX parser with this document, and feed the
30
+ # parser some XML
31
+ #
32
+ # # Create a new parser
33
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDocument.new)
34
+ #
35
+ # # Feed the parser some XML
36
+ # parser.parse(File.read(ARGV[0], 'rb'))
37
+ #
38
+ # Now my document handler will be called when each node starts, and when
39
+ # then document ends. To see what kinds of events are available, take
40
+ # a look at Nokogiri::XML::SAX::Document.
41
+ #
42
+ # Two SAX parsers for XML are available, a parser that reads from a string
43
+ # or IO object as it feels necessary, and a parser that lets you spoon
44
+ # feed it XML. If you want to let Nokogiri deal with reading your XML,
45
+ # use the Nokogiri::XML::SAX::Parser. If you want to have fine grain
46
+ # control over the XML input, use the Nokogiri::XML::SAX::PushParser.
47
+ module SAX
48
+ ###
49
+ # This class is used for registering types of events you are interested
50
+ # in handling. All of the methods on this class are available as
51
+ # possible events while parsing an XML document. To register for any
52
+ # particular event, just subclass this class and implement the methods
53
+ # you are interested in knowing about.
54
+ #
55
+ # To only be notified about start and end element events, write a class
56
+ # like this:
57
+ #
58
+ # class MyDocument < Nokogiri::XML::SAX::Document
59
+ # def start_element name, attrs = []
60
+ # puts "#{name} started!"
61
+ # end
62
+ #
63
+ # def end_element name
64
+ # puts "#{name} ended"
65
+ # end
66
+ # end
67
+ #
68
+ # You can use this event handler for any SAX style parser included with
69
+ # Nokogiri. See Nokogiri::XML::SAX, and Nokogiri::HTML::SAX.
70
+ class Document
71
+ ###
72
+ # Called when document starts parsing
73
+ def start_document
74
+ end
75
+
76
+ ###
77
+ # Called when document ends parsing
78
+ def end_document
79
+ end
80
+
81
+ ###
82
+ # Called at the beginning of an element
83
+ # +name+ is the name of the tag with +attrs+ as attributes
84
+ def start_element name, attrs = []
85
+ end
86
+
87
+ ###
88
+ # Called at the end of an element
89
+ # +name+ is the tag name
90
+ def end_element name
91
+ end
92
+
93
+ ###
94
+ # Called at the beginning of an element
95
+ # +name+ is the element name
96
+ # +attrs+ is a hash of attributes
97
+ # +prefix+ is the namespace prefix for the element
98
+ # +uri+ is the associated namespace URI
99
+ # +namespaces+ is a hash of namespace prefix:urls associated with the element
100
+ def start_element_ns(name, attrs = {}, prefix = nil, uri = nil, namespaces = {})
101
+ end
102
+
103
+ ###
104
+ # Called at the end of an element
105
+ # +name+ is the element's name
106
+ # +prefix+ is the namespace prefix associated with the element
107
+ # +uri+ is the associated namespace URI
108
+ def end_element_ns(name, prefix = nil, uri = nil)
109
+ end
110
+
111
+ ###
112
+ # Characters read between a tag
113
+ # +string+ contains the character data
114
+ def characters string
115
+ end
116
+
117
+ ###
118
+ # Called when comments are encountered
119
+ # +string+ contains the comment data
120
+ def comment string
121
+ end
122
+
123
+ ###
124
+ # Called on document warnings
125
+ # +string+ contains the warning
126
+ def warning string
127
+ end
128
+
129
+ ###
130
+ # Called on document errors
131
+ # +string+ contains the error
132
+ def error string
133
+ end
134
+
135
+ ###
136
+ # Called when cdata blocks are found
137
+ # +string+ contains the cdata content
138
+ def cdata_block string
139
+ end
140
+ end
141
+ end
142
+ end
143
+ end
@@ -0,0 +1,101 @@
1
+ module Nokogiri
2
+ module XML
3
+ module SAX
4
+ ###
5
+ # This parser is a SAX style parser that reads it's input as it
6
+ # deems necessary. The parser takes a Nokogiri::XML::SAX::Document,
7
+ # an optional encoding, then given an XML input, sends messages to
8
+ # the Nokogiri::XML::SAX::Document.
9
+ #
10
+ # Here is an example of using this parser:
11
+ #
12
+ # # Create a subclass of Nokogiri::XML::SAX::Document and implement
13
+ # # the events we care about:
14
+ # class MyDoc < Nokogiri::XML::SAX::Document
15
+ # def start_element name, attrs = []
16
+ # puts "starting: #{name}"
17
+ # end
18
+ #
19
+ # def end_element name
20
+ # puts "ending: #{name}"
21
+ # end
22
+ # end
23
+ #
24
+ # # Create our parser
25
+ # parser = Nokogiri::XML::SAX::Parser.new(MyDoc.new)
26
+ #
27
+ # # Send some XML to the parser
28
+ # parser.parse(File.read(ARGV[0]))
29
+ #
30
+ # For more information about SAX parsers, see Nokogiri::XML::SAX. Also
31
+ # see Nokogiri::XML::SAX::Document for the available events.
32
+ class Parser
33
+ # Encodinds this parser supports
34
+ ENCODINGS = {
35
+ 'NONE' => 0, # No char encoding detected
36
+ 'UTF-8' => 1, # UTF-8
37
+ 'UTF16LE' => 2, # UTF-16 little endian
38
+ 'UTF16BE' => 3, # UTF-16 big endian
39
+ 'UCS4LE' => 4, # UCS-4 little endian
40
+ 'UCS4BE' => 5, # UCS-4 big endian
41
+ 'EBCDIC' => 6, # EBCDIC uh!
42
+ 'UCS4-2143' => 7, # UCS-4 unusual ordering
43
+ 'UCS4-3412' => 8, # UCS-4 unusual ordering
44
+ 'UCS2' => 9, # UCS-2
45
+ 'ISO-8859-1' => 10, # ISO-8859-1 ISO Latin 1
46
+ 'ISO-8859-2' => 11, # ISO-8859-2 ISO Latin 2
47
+ 'ISO-8859-3' => 12, # ISO-8859-3
48
+ 'ISO-8859-4' => 13, # ISO-8859-4
49
+ 'ISO-8859-5' => 14, # ISO-8859-5
50
+ 'ISO-8859-6' => 15, # ISO-8859-6
51
+ 'ISO-8859-7' => 16, # ISO-8859-7
52
+ 'ISO-8859-8' => 17, # ISO-8859-8
53
+ 'ISO-8859-9' => 18, # ISO-8859-9
54
+ 'ISO-2022-JP' => 19, # ISO-2022-JP
55
+ 'SHIFT-JIS' => 20, # Shift_JIS
56
+ 'EUC-JP' => 21, # EUC-JP
57
+ 'ASCII' => 22, # pure ASCII
58
+ }
59
+
60
+ # The Nokogiri::XML::SAX::Document where events will be sent.
61
+ attr_accessor :document
62
+
63
+ # The encoding beings used for this document.
64
+ attr_accessor :encoding
65
+
66
+ # Create a new Parser with +doc+ and +encoding+
67
+ def initialize(doc = Nokogiri::XML::SAX::Document.new, encoding = 'ASCII')
68
+ @encoding = encoding
69
+ @document = doc
70
+ end
71
+
72
+ ###
73
+ # Parse given +thing+ which may be a string containing xml, or an
74
+ # IO object.
75
+ def parse thing
76
+ if thing.respond_to?(:read) && thing.respond_to?(:close)
77
+ parse_io(thing)
78
+ else
79
+ parse_memory(thing)
80
+ end
81
+ end
82
+
83
+ ###
84
+ # Parse given +io+
85
+ def parse_io io, encoding = 'ASCII'
86
+ @encoding = encoding
87
+ native_parse_io io, ENCODINGS[@encoding] || ENCODINGS['ASCII']
88
+ end
89
+
90
+ ###
91
+ # Parse a file with +filename+
92
+ def parse_file filename
93
+ raise ArgumentError unless filename
94
+ raise Errno::ENOENT unless File.exists?(filename)
95
+ raise Errno::EISDIR if File.directory?(filename)
96
+ native_parse_file filename
97
+ end
98
+ end
99
+ end
100
+ end
101
+ end
@@ -0,0 +1,60 @@
1
+ module Nokogiri
2
+ module XML
3
+ module SAX
4
+ ###
5
+ # PushParser can parse a document that is fed to it manually. It
6
+ # must be given a SAX::Document object which will be called with
7
+ # SAX events as the document is being parsed.
8
+ #
9
+ # Calling PushParser#<< writes XML to the parser, calling any SAX
10
+ # callbacks it can.
11
+ #
12
+ # PushParser#finish tells the parser that the document is finished
13
+ # and calls the end_document SAX method.
14
+ #
15
+ # Example:
16
+ #
17
+ # parser = PushParser.new(Class.new(XML::SAX::Document) {
18
+ # def start_document
19
+ # puts "start document called"
20
+ # end
21
+ # }.new)
22
+ # parser << "<div>hello<"
23
+ # parser << "/div>"
24
+ # parser.finish
25
+ class PushParser
26
+
27
+ # The Nokogiri::XML::SAX::Document on which the PushParser will be
28
+ # operating
29
+ attr_accessor :document
30
+
31
+ ###
32
+ # Create a new PushParser with +doc+ as the SAX Document, providing
33
+ # an optional +file_name+ and +encoding+
34
+ def initialize(doc = XML::SAX::Document.new, file_name = nil, encoding = 'ASCII')
35
+ @document = doc
36
+ @encoding = encoding
37
+ @sax_parser = XML::SAX::Parser.new(doc)
38
+
39
+ ## Create our push parser context
40
+ initialize_native(@sax_parser, file_name)
41
+ end
42
+
43
+ ###
44
+ # Write a +chunk+ of XML to the PushParser. Any callback methods
45
+ # that can be called will be called immidiately.
46
+ def write chunk, last_chunk = false
47
+ native_write(chunk, last_chunk)
48
+ end
49
+ alias :<< :write
50
+
51
+ ###
52
+ # Finish the parsing. This method is only necessary for
53
+ # Nokogiri::XML::SAX::Document#end_document to be called.
54
+ def finish
55
+ write '', true
56
+ end
57
+ end
58
+ end
59
+ end
60
+ end
@@ -0,0 +1,65 @@
1
+ module Nokogiri
2
+ module XML
3
+ class << self
4
+ ###
5
+ # Create a new Nokogiri::XML::Schema object using a +string_or_io+
6
+ # object.
7
+ def Schema string_or_io
8
+ Schema.new(string_or_io)
9
+ end
10
+ end
11
+
12
+ ###
13
+ # Nokogiri::XML::Schema is used for validating XML against a schema
14
+ # (usually from an xsd file).
15
+ #
16
+ # == Synopsis
17
+ #
18
+ # Validate an XML document against a Schema. Loop over the errors that
19
+ # are returned and print them out:
20
+ #
21
+ # xsd = Nokogiri::XML::Schema(File.read(PO_SCHEMA_FILE))
22
+ # doc = Nokogiri::XML(File.read(PO_XML_FILE))
23
+ #
24
+ # xsd.validate(doc).each do |error|
25
+ # puts error.message
26
+ # end
27
+ #
28
+ # The list of errors are Nokogiri::XML::SyntaxError objects.
29
+ class Schema
30
+ # Errors while parsing the schema file
31
+ attr_accessor :errors
32
+
33
+ ###
34
+ # Create a new Nokogiri::XML::Schema object using a +string_or_io+
35
+ # object.
36
+ def self.new string_or_io
37
+ if string_or_io.respond_to?(:read)
38
+ string_or_io = string_or_io.read
39
+ end
40
+
41
+ read_memory(string_or_io)
42
+ end
43
+
44
+ ###
45
+ # Validate +thing+ against this schema. +thing+ can be a
46
+ # Nokogiri::XML::Document object, or a filename. An Array of
47
+ # Nokogiri::XML::SyntaxError objects found while validating the
48
+ # +thing+ is returned.
49
+ def validate thing
50
+ return validate_document(thing) if thing.is_a?(Nokogiri::XML::Document)
51
+
52
+ # FIXME libxml2 has an api for validating files. We should switch
53
+ # to that because it will probably save memory.
54
+ validate_document(Nokogiri::XML(File.read(thing)))
55
+ end
56
+
57
+ ###
58
+ # Returns true if +thing+ is a valid Nokogiri::XML::Document or
59
+ # file.
60
+ def valid? thing
61
+ validate(thing).length == 0
62
+ end
63
+ end
64
+ end
65
+ end
@@ -0,0 +1,34 @@
1
+ module Nokogiri
2
+ module XML
3
+ ###
4
+ # This class provides information about XML SyntaxErrors. These
5
+ # exceptions are typically stored on Nokogiri::XML::Document#errors.
6
+ class SyntaxError < ::Nokogiri::SyntaxError
7
+ ###
8
+ # return true if this is a non error
9
+ def none?
10
+ level == 0
11
+ end
12
+
13
+ ###
14
+ # return true if this is a warning
15
+ def warning?
16
+ level == 1
17
+ end
18
+
19
+ ###
20
+ # return true if this is an error
21
+ def error?
22
+ level == 2
23
+ end
24
+
25
+ ###
26
+ # return true if this error is fatal
27
+ def fatal?
28
+ level == 3
29
+ end
30
+
31
+ alias :to_s :message
32
+ end
33
+ end
34
+ end