nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -1,28 +1,258 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "helper"))
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
2
4
 
3
5
  module Nokogiri
4
6
  module XML
5
7
  module SAX
6
8
  class TestParser < Nokogiri::SAX::TestCase
7
9
  def setup
10
+ super
8
11
  @parser = XML::SAX::Parser.new(Doc.new)
9
12
  end
10
13
 
11
- def test_parse
14
+ def test_parser_context_yielded_io
15
+ doc = Doc.new
16
+ parser = XML::SAX::Parser.new doc
17
+ xml = "<foo a='&amp;b'/>"
18
+
19
+ block_called = false
20
+ parser.parse(StringIO.new(xml)) { |ctx|
21
+ block_called = true
22
+ ctx.replace_entities = true
23
+ }
24
+
25
+ assert block_called
26
+
27
+ assert_equal [['foo', [['a', '&b']]]], doc.start_elements
28
+ end
29
+
30
+ def test_parser_context_yielded_in_memory
31
+ doc = Doc.new
32
+ parser = XML::SAX::Parser.new doc
33
+ xml = "<foo a='&amp;b'/>"
34
+
35
+ block_called = false
36
+ parser.parse(xml) { |ctx|
37
+ block_called = true
38
+ ctx.replace_entities = true
39
+ }
40
+
41
+ assert block_called
42
+
43
+ assert_equal [['foo', [['a', '&b']]]], doc.start_elements
44
+ end
45
+
46
+ def test_xml_decl
47
+ [
48
+ ['', nil],
49
+ ['<?xml version="1.0" ?>',
50
+ ['1.0']],
51
+ ['<?xml version="1.0" encoding="UTF-8" ?>',
52
+ ['1.0', 'UTF-8']],
53
+ ['<?xml version="1.0" standalone="yes"?>',
54
+ ['1.0', 'yes']],
55
+ ['<?xml version="1.0" standalone="no"?>',
56
+ ['1.0', 'no']],
57
+ ['<?xml version="1.0" encoding="UTF-8" standalone="no"?>',
58
+ ['1.0', "UTF-8", 'no']],
59
+ ['<?xml version="1.0" encoding="ISO-8859-1" standalone="yes"?>',
60
+ ['1.0', "ISO-8859-1", 'yes']]
61
+ ].each do |decl, value|
62
+ parser = XML::SAX::Parser.new(Doc.new)
63
+
64
+ xml = "#{decl}\n<root />"
65
+ parser.parse xml
66
+ assert parser.document.start_document_called, xml
67
+ assert_equal value, parser.document.xmldecls, xml
68
+ end
69
+ end
70
+
71
+ def test_parse_empty
72
+ assert_raises RuntimeError do
73
+ @parser.parse('')
74
+ end
75
+ end
76
+
77
+ def test_namespace_declaration_order_is_saved
78
+ @parser.parse <<-eoxml
79
+ <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
80
+ <a foo:bar='hello' />
81
+ </root>
82
+ eoxml
83
+ assert_equal 2, @parser.document.start_elements_namespace.length
84
+ el = @parser.document.start_elements_namespace.first
85
+ namespaces = el.last
86
+ assert_equal ['foo', 'http://foo.example.com/'], namespaces.first
87
+ assert_equal [nil, 'http://example.com/'], namespaces.last
88
+ end
89
+
90
+ def test_bad_document_calls_error_handler
91
+ @parser.parse('<foo><bar></foo>')
92
+ assert @parser.document.errors
93
+ assert @parser.document.errors.length > 0
94
+ end
95
+
96
+ def test_namespace_are_super_fun_to_parse
97
+ @parser.parse <<-eoxml
98
+ <root xmlns:foo='http://foo.example.com/'>
99
+ <a foo:bar='hello' />
100
+ <b xmlns:foo='http://bar.example.com/'>
101
+ <a foo:bar='hello' />
102
+ </b>
103
+ <foo:bar>hello world</foo:bar>
104
+ </root>
105
+ eoxml
106
+
107
+ assert @parser.document.start_elements_namespace.length > 0
108
+ el = @parser.document.start_elements_namespace[1]
109
+ assert_equal 'a', el.first
110
+ assert_equal 1, el[1].length
111
+
112
+ attribute = el[1].first
113
+ assert_equal 'bar', attribute.localname
114
+ assert_equal 'foo', attribute.prefix
115
+ assert_equal 'hello', attribute.value
116
+ assert_equal 'http://foo.example.com/', attribute.uri
117
+ end
118
+
119
+ def test_sax_v1_namespace_attribute_declarations
120
+ @parser.parse <<-eoxml
121
+ <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
122
+ <a foo:bar='hello' />
123
+ <b xmlns:foo='http://bar.example.com/'>
124
+ <a foo:bar='hello' />
125
+ </b>
126
+ <foo:bar>hello world</foo:bar>
127
+ </root>
128
+ eoxml
129
+ assert @parser.document.start_elements.length > 0
130
+ elm = @parser.document.start_elements.first
131
+ assert_equal 'root', elm.first
132
+ assert elm[1].include?(['xmlns:foo', 'http://foo.example.com/'])
133
+ assert elm[1].include?(['xmlns', 'http://example.com/'])
134
+ end
135
+
136
+ def test_sax_v1_namespace_nodes
137
+ @parser.parse <<-eoxml
138
+ <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
139
+ <a foo:bar='hello' />
140
+ <b xmlns:foo='http://bar.example.com/'>
141
+ <a foo:bar='hello' />
142
+ </b>
143
+ <foo:bar>hello world</foo:bar>
144
+ </root>
145
+ eoxml
146
+ assert_equal 5, @parser.document.start_elements.length
147
+ assert @parser.document.start_elements.map(&:first).include?('foo:bar')
148
+ assert @parser.document.end_elements.map(&:first).include?('foo:bar')
149
+ end
150
+
151
+ def test_start_is_called_without_namespace
152
+ @parser.parse(<<-eoxml)
153
+ <root xmlns:foo='http://foo.example.com/' xmlns='http://example.com/'>
154
+ <foo:f><bar></foo:f>
155
+ </root>
156
+ eoxml
157
+ assert_equal ['root', 'foo:f', 'bar'],
158
+ @parser.document.start_elements.map(&:first)
159
+ end
160
+
161
+ def test_parser_sets_encoding
162
+ parser = XML::SAX::Parser.new(Doc.new, 'UTF-8')
163
+ assert_equal 'UTF-8', parser.encoding
164
+ end
165
+
166
+ def test_errors_set_after_parsing_bad_dom
167
+ doc = Nokogiri::XML('<foo><bar></foo>')
168
+ assert doc.errors
169
+
170
+ @parser.parse('<foo><bar></foo>')
171
+ assert @parser.document.errors
172
+ assert @parser.document.errors.length > 0
173
+
174
+ doc.errors.each do |error|
175
+ assert_equal 'UTF-8', error.message.encoding.name
176
+ end
177
+
178
+ # when using JRuby Nokogiri, more errors will be generated as the DOM
179
+ # parser continue to parse an ill formed document, while the sax parser
180
+ # will stop at the first error
181
+ unless Nokogiri.jruby?
182
+ assert_equal doc.errors.length, @parser.document.errors.length
183
+ end
184
+ end
185
+
186
+ def test_parse_with_memory_argument
187
+ @parser.parse(File.read(XML_FILE))
188
+ assert(@parser.document.cdata_blocks.length > 0)
189
+ end
190
+
191
+ def test_parse_with_io_argument
12
192
  File.open(XML_FILE, 'rb') { |f|
13
193
  @parser.parse(f)
14
194
  }
15
- @parser.parse(File.read(XML_FILE))
195
+ assert(@parser.document.cdata_blocks.length > 0)
16
196
  end
17
197
 
18
198
  def test_parse_io
199
+ call_parse_io_with_encoding 'UTF-8'
200
+ end
201
+
202
+ # issue #828
203
+ def test_parse_io_lower_case_encoding
204
+ call_parse_io_with_encoding 'utf-8'
205
+ end
206
+
207
+ def call_parse_io_with_encoding encoding
19
208
  File.open(XML_FILE, 'rb') { |f|
20
- @parser.parse_io(f)
209
+ @parser.parse_io(f, encoding)
21
210
  }
211
+ assert(@parser.document.cdata_blocks.length > 0)
212
+
213
+ called = false
214
+ @parser.document.start_elements.flatten.each do |thing|
215
+ assert_equal 'UTF-8', thing.encoding.name
216
+ called = true
217
+ end
218
+ assert called
219
+
220
+ called = false
221
+ @parser.document.end_elements.flatten.each do |thing|
222
+ assert_equal 'UTF-8', thing.encoding.name
223
+ called = true
224
+ end
225
+ assert called
226
+
227
+ called = false
228
+ @parser.document.data.each do |thing|
229
+ assert_equal 'UTF-8', thing.encoding.name
230
+ called = true
231
+ end
232
+ assert called
233
+
234
+ called = false
235
+ @parser.document.comments.flatten.each do |thing|
236
+ assert_equal 'UTF-8', thing.encoding.name
237
+ called = true
238
+ end
239
+ assert called
240
+
241
+ called = false
242
+ @parser.document.cdata_blocks.flatten.each do |thing|
243
+ assert_equal 'UTF-8', thing.encoding.name
244
+ called = true
245
+ end
246
+ assert called
22
247
  end
23
248
 
24
249
  def test_parse_file
25
250
  @parser.parse_file(XML_FILE)
251
+
252
+ assert_raises(ArgumentError) {
253
+ @parser.parse_file(nil)
254
+ }
255
+
26
256
  assert_raises(Errno::ENOENT) {
27
257
  @parser.parse_file('')
28
258
  }
@@ -31,6 +261,15 @@ module Nokogiri
31
261
  }
32
262
  end
33
263
 
264
+ def test_render_parse_nil_param
265
+ assert_raises(ArgumentError) { @parser.parse_memory(nil) }
266
+ end
267
+
268
+ def test_bad_encoding_args
269
+ assert_raises(ArgumentError) { XML::SAX::Parser.new(Doc.new, 'not an encoding') }
270
+ assert_raises(ArgumentError) { @parser.parse_io(StringIO.new('<root/>'), 'not an encoding')}
271
+ end
272
+
34
273
  def test_ctag
35
274
  @parser.parse_memory(<<-eoxml)
36
275
  <p id="asdfasdf">
@@ -77,15 +316,77 @@ module Nokogiri
77
316
  @parser.parse_memory(<<-eoxml)
78
317
  <p id="asdfasdf">Paragraph 1</p>
79
318
  eoxml
80
- assert_equal [["p", ["id", "asdfasdf"]]],
319
+ assert_equal [["p", [["id", "asdfasdf"]]]],
320
+ @parser.document.start_elements
321
+ end
322
+
323
+ def test_start_element_attrs_include_namespaces
324
+ @parser.parse_memory(<<-eoxml)
325
+ <p xmlns:foo='http://foo.example.com/'>Paragraph 1</p>
326
+ eoxml
327
+ assert_equal [["p", [['xmlns:foo', 'http://foo.example.com/']]]],
81
328
  @parser.document.start_elements
82
329
  end
83
330
 
84
- def test_parse_document
331
+ def test_processing_instruction
85
332
  @parser.parse_memory(<<-eoxml)
86
- <p>Paragraph 1</p>
87
- <p>Paragraph 2</p>
333
+ <?xml-stylesheet href="a.xsl" type="text/xsl"?>
334
+ <?xml version="1.0"?>
335
+ eoxml
336
+ assert_equal [['xml-stylesheet', 'href="a.xsl" type="text/xsl"']],
337
+ @parser.document.processing_instructions
338
+ end
339
+
340
+ if Nokogiri.uses_libxml? # JRuby SAXParser only parses well-formed XML documents
341
+ def test_parse_document
342
+ @parser.parse_memory(<<-eoxml)
343
+ <p>Paragraph 1</p>
344
+ <p>Paragraph 2</p>
345
+ eoxml
346
+ end
347
+ end
348
+
349
+ def test_parser_attributes
350
+ xml = <<-eoxml
351
+ <?xml version="1.0" ?><root><foo a="&amp;b" c="&gt;d" /></root>
352
+ eoxml
353
+
354
+ block_called = false
355
+ @parser.parse(xml) { |ctx|
356
+ block_called = true
357
+ ctx.replace_entities = true
358
+ }
359
+
360
+ assert block_called
361
+
362
+ assert_equal [['root', []], ['foo', [['a', '&b'], ['c', '>d']]]], @parser.document.start_elements
363
+ end
364
+
365
+ def test_recovery_from_incorrect_xml
366
+ xml = <<-eoxml
367
+ <?xml version="1.0" ?><Root><Data><?xml version='1.0'?><Item>hey</Item></Data><Data><Item>hey yourself</Item></Data></Root>
368
+ eoxml
369
+
370
+ block_called = false
371
+ @parser.parse(xml) { |ctx|
372
+ block_called = true
373
+ ctx.recovery = true
374
+ }
375
+
376
+ assert block_called
377
+
378
+ assert_equal [['Root', []], ['Data', []], ['Item', []], ['Data', []], ['Item', []]], @parser.document.start_elements
379
+ end
380
+
381
+ def test_square_bracket_in_text # issue 1261
382
+ xml = <<-eoxml
383
+ <tu tuid="87dea04cf60af103ff09d1dba36ae820" segtype="block">
384
+ <prop type="x-smartling-string-variant">en:#:home_page:#:stories:#:[6]:#:name</prop>
385
+ <tuv xml:lang="en-US"><seg>Sandy S.</seg></tuv>
386
+ </tu>
88
387
  eoxml
388
+ @parser.parse(xml)
389
+ assert @parser.document.data.must_include "en:#:home_page:#:stories:#:[6]:#:name"
89
390
  end
90
391
  end
91
392
  end
@@ -0,0 +1,115 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ module Nokogiri
6
+ module XML
7
+ module SAX
8
+ class TestParserContext < Nokogiri::SAX::TestCase
9
+ def setup
10
+ @xml = '<hello>
11
+
12
+ world
13
+ <inter>
14
+ <net>
15
+ </net>
16
+ </inter>
17
+
18
+ </hello>'
19
+ end
20
+
21
+ class Counter < Nokogiri::XML::SAX::Document
22
+ attr_accessor :context, :lines, :columns
23
+ def initialize
24
+ @context = nil
25
+ @lines = []
26
+ @columns = []
27
+ end
28
+
29
+ def start_element name, attrs = []
30
+ @lines << [name, context.line]
31
+ @columns << [name, context.column]
32
+ end
33
+ end
34
+
35
+ def test_line_numbers
36
+ sax_handler = Counter.new
37
+
38
+ parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
39
+ parser.parse(@xml) do |ctx|
40
+ sax_handler.context = ctx
41
+ end
42
+
43
+ assert_equal [["hello", 1], ["inter", 4], ["net", 5]],
44
+ sax_handler.lines
45
+ end
46
+
47
+ def test_column_numbers
48
+ sax_handler = Counter.new
49
+
50
+ parser = Nokogiri::XML::SAX::Parser.new(sax_handler)
51
+ parser.parse(@xml) do |ctx|
52
+ sax_handler.context = ctx
53
+ end
54
+
55
+ assert_equal [["hello", 7], ["inter", 7], ["net", 9]],
56
+ sax_handler.columns
57
+ end
58
+
59
+ def test_replace_entities
60
+ pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
61
+ pc.replace_entities = false
62
+ assert_equal false, pc.replace_entities
63
+
64
+ pc.replace_entities = true
65
+ assert_equal true, pc.replace_entities
66
+ end
67
+
68
+ def test_recovery
69
+ pc = ParserContext.new StringIO.new('<root />'), 'UTF-8'
70
+ pc.recovery = false
71
+ assert_equal false, pc.recovery
72
+
73
+ pc.recovery = true
74
+ assert_equal true, pc.recovery
75
+ end
76
+
77
+ def test_from_io
78
+ ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
79
+ assert ctx
80
+ end
81
+
82
+ def test_from_string
83
+ assert ParserContext.new 'blah blah'
84
+ end
85
+
86
+ def test_parse_with
87
+ ctx = ParserContext.new 'blah'
88
+ assert_raises ArgumentError do
89
+ ctx.parse_with nil
90
+ end
91
+ end
92
+
93
+ def test_parse_with_sax_parser
94
+ xml = "<root />"
95
+ ctx = ParserContext.new xml
96
+ parser = Parser.new Doc.new
97
+ assert_nil ctx.parse_with parser
98
+ end
99
+
100
+ def test_from_file
101
+ ctx = ParserContext.file XML_FILE
102
+ parser = Parser.new Doc.new
103
+ assert_nil ctx.parse_with parser
104
+ end
105
+
106
+ def test_parse_with_returns_nil
107
+ xml = "<root />"
108
+ ctx = ParserContext.new xml
109
+ parser = Parser.new Doc.new
110
+ assert_nil ctx.parse_with(parser)
111
+ end
112
+ end
113
+ end
114
+ end
115
+ end
@@ -0,0 +1,157 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ module Nokogiri
6
+ module XML
7
+ module SAX
8
+ class TestPushParser < Nokogiri::SAX::TestCase
9
+ def setup
10
+ super
11
+ @parser = XML::SAX::PushParser.new(Doc.new)
12
+ end
13
+
14
+ def test_exception
15
+ assert_raises(SyntaxError) do
16
+ @parser << "<foo /><foo />"
17
+ end
18
+
19
+ assert_raises(SyntaxError) do
20
+ @parser << nil
21
+ end
22
+ end
23
+
24
+ def test_end_document_called
25
+ @parser.<<(<<-eoxml)
26
+ <p id="asdfasdf">
27
+ <!-- This is a comment -->
28
+ Paragraph 1
29
+ </p>
30
+ eoxml
31
+ assert ! @parser.document.end_document_called
32
+ @parser.finish
33
+ assert @parser.document.end_document_called
34
+ end
35
+
36
+ def test_start_element
37
+ @parser.<<(<<-eoxml)
38
+ <p id="asdfasdf">
39
+ eoxml
40
+
41
+ assert_equal [["p", [["id", "asdfasdf"]]]],
42
+ @parser.document.start_elements
43
+
44
+ @parser.<<(<<-eoxml)
45
+ <!-- This is a comment -->
46
+ Paragraph 1
47
+ </p>
48
+ eoxml
49
+ assert_equal [' This is a comment '], @parser.document.comments
50
+ @parser.finish
51
+ end
52
+
53
+ def test_start_element_with_namespaces
54
+ @parser.<<(<<-eoxml)
55
+ <p xmlns:foo="http://foo.example.com/">
56
+ eoxml
57
+
58
+ assert_equal [["p", [["xmlns:foo", "http://foo.example.com/"]]]],
59
+ @parser.document.start_elements
60
+
61
+ @parser.<<(<<-eoxml)
62
+ <!-- This is a comment -->
63
+ Paragraph 1
64
+ </p>
65
+ eoxml
66
+ assert_equal [' This is a comment '], @parser.document.comments
67
+ @parser.finish
68
+ end
69
+
70
+ def test_start_element_ns
71
+ @parser.<<(<<-eoxml)
72
+ <stream:stream xmlns='jabber:client' xmlns:stream='http://etherx.jabber.org/streams' version='1.0' size='large'></stream:stream>
73
+ eoxml
74
+
75
+ assert_equal 1, @parser.document.start_elements_namespace.length
76
+ el = @parser.document.start_elements_namespace.first
77
+
78
+ assert_equal 'stream', el.first
79
+ assert_equal 2, el[1].length
80
+ assert_equal [['version', '1.0'], ['size', 'large']],
81
+ el[1].map { |x| [x.localname, x.value] }
82
+
83
+ assert_equal 'stream', el[2]
84
+ assert_equal 'http://etherx.jabber.org/streams', el[3]
85
+ @parser.finish
86
+ end
87
+
88
+ def test_end_element_ns
89
+ @parser.<<(<<-eoxml)
90
+ <stream:stream xmlns='jabber:client' xmlns:stream='http://etherx.jabber.org/streams' version='1.0'></stream:stream>
91
+ eoxml
92
+
93
+ assert_equal [['stream', 'stream', 'http://etherx.jabber.org/streams']],
94
+ @parser.document.end_elements_namespace
95
+ @parser.finish
96
+ end
97
+
98
+ def test_chevron_partial_xml
99
+ @parser.<<(<<-eoxml)
100
+ <p id="asdfasdf">
101
+ eoxml
102
+
103
+ @parser.<<(<<-eoxml)
104
+ <!-- This is a comment -->
105
+ Paragraph 1
106
+ </p>
107
+ eoxml
108
+ assert_equal [' This is a comment '], @parser.document.comments
109
+ @parser.finish
110
+ end
111
+
112
+ def test_chevron
113
+ @parser.<<(<<-eoxml)
114
+ <p id="asdfasdf">
115
+ <!-- This is a comment -->
116
+ Paragraph 1
117
+ </p>
118
+ eoxml
119
+ @parser.finish
120
+ assert_equal [' This is a comment '], @parser.document.comments
121
+ end
122
+
123
+ def test_default_options
124
+ assert_equal 0, @parser.options
125
+ end
126
+
127
+ def test_recover
128
+ @parser.options |= XML::ParseOptions::RECOVER
129
+ @parser.<<(<<-eoxml)
130
+ <p>
131
+ Foo
132
+ <bar>
133
+ Bar
134
+ </p>
135
+ eoxml
136
+ @parser.finish
137
+ assert(@parser.document.errors.size >= 1)
138
+ assert_equal [["p", []], ["bar", []]], @parser.document.start_elements
139
+ assert_equal "FooBar", @parser.document.data.map { |x|
140
+ x.gsub(/\s/, '')
141
+ }.join
142
+ end
143
+
144
+ def test_broken_encoding
145
+ skip("ultra hard to fix for pure Java version") if Nokogiri.jruby?
146
+ @parser.options |= XML::ParseOptions::RECOVER
147
+ # This is ISO_8859-1:
148
+ @parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
149
+ @parser.finish
150
+ assert(@parser.document.errors.size >= 1)
151
+ assert_equal "Gau\337", @parser.document.data.join
152
+ assert_equal [["r"]], @parser.document.end_elements
153
+ end
154
+ end
155
+ end
156
+ end
157
+ end
@@ -0,0 +1,67 @@
1
+ require "helper"
2
+
3
+ module Nokogiri
4
+ module XML
5
+ class TestAttr < Nokogiri::TestCase
6
+ def test_new
7
+ 100.times {
8
+ doc = Nokogiri::XML::Document.new
9
+ assert doc
10
+ assert Nokogiri::XML::Attr.new(doc, 'foo')
11
+ }
12
+ end
13
+
14
+ def test_content=
15
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
16
+ address = xml.xpath('//address')[3]
17
+ street = address.attributes['street']
18
+ street.content = "Y&ent1;"
19
+ assert_equal "Y&ent1;", street.value
20
+ end
21
+
22
+ def test_value=
23
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
24
+ address = xml.xpath('//address')[3]
25
+ street = address.attributes['street']
26
+ street.value = "Y&ent1;"
27
+ assert_equal "Y&ent1;", street.value
28
+ end
29
+
30
+ def test_unlink # aliased as :remove
31
+ xml = Nokogiri::XML.parse(File.read(XML_FILE), XML_FILE)
32
+ address = xml.xpath('/staff/employee/address').first
33
+ assert_equal 'Yes', address['domestic']
34
+
35
+ attr = address.attribute_nodes.first
36
+ return_val = attr.unlink
37
+ assert_nil address['domestic']
38
+ assert_equal attr, return_val
39
+ end
40
+
41
+ def test_parsing_attribute_namespace
42
+ doc = Nokogiri::XML <<-EOXML
43
+ <root xmlns='http://google.com/' xmlns:f='http://flavorjon.es/'>
44
+ <div f:myattr='foo'></div>
45
+ </root>
46
+ EOXML
47
+
48
+ node = doc.at_css "div"
49
+ attr = node.attributes["myattr"]
50
+ assert_equal "http://flavorjon.es/", attr.namespace.href
51
+ end
52
+
53
+ def test_setting_attribute_namespace
54
+ doc = Nokogiri::XML <<-EOXML
55
+ <root xmlns='http://google.com/' xmlns:f='http://flavorjon.es/'>
56
+ <div f:myattr='foo'></div>
57
+ </root>
58
+ EOXML
59
+
60
+ node = doc.at_css "div"
61
+ attr = node.attributes["myattr"]
62
+ attr.add_namespace("fizzle", "http://fizzle.com/")
63
+ assert_equal "http://fizzle.com/", attr.namespace.href
64
+ end
65
+ end
66
+ end
67
+ end