nokogiri 1.0.0 → 1.6.8.1

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (309) hide show
  1. checksums.yaml +7 -0
  2. data/.autotest +26 -0
  3. data/.cross_rubies +9 -0
  4. data/.editorconfig +17 -0
  5. data/.gemtest +0 -0
  6. data/.travis.yml +51 -0
  7. data/CHANGELOG.rdoc +1160 -0
  8. data/CONTRIBUTING.md +42 -0
  9. data/C_CODING_STYLE.rdoc +33 -0
  10. data/Gemfile +22 -0
  11. data/LICENSE.txt +31 -0
  12. data/Manifest.txt +284 -40
  13. data/README.md +166 -0
  14. data/ROADMAP.md +111 -0
  15. data/Rakefile +310 -199
  16. data/STANDARD_RESPONSES.md +47 -0
  17. data/Y_U_NO_GEMSPEC.md +155 -0
  18. data/appveyor.yml +22 -0
  19. data/bin/nokogiri +118 -0
  20. data/build_all +45 -0
  21. data/dependencies.yml +29 -0
  22. data/ext/nokogiri/depend +358 -0
  23. data/ext/nokogiri/extconf.rb +664 -34
  24. data/ext/nokogiri/html_document.c +120 -33
  25. data/ext/nokogiri/html_document.h +1 -1
  26. data/ext/nokogiri/html_element_description.c +279 -0
  27. data/ext/nokogiri/html_element_description.h +10 -0
  28. data/ext/nokogiri/html_entity_lookup.c +32 -0
  29. data/ext/nokogiri/html_entity_lookup.h +8 -0
  30. data/ext/nokogiri/html_sax_parser_context.c +116 -0
  31. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  32. data/ext/nokogiri/html_sax_push_parser.c +87 -0
  33. data/ext/nokogiri/html_sax_push_parser.h +9 -0
  34. data/ext/nokogiri/nokogiri.c +145 -0
  35. data/ext/nokogiri/nokogiri.h +131 -0
  36. data/ext/nokogiri/xml_attr.c +94 -0
  37. data/ext/nokogiri/xml_attr.h +9 -0
  38. data/ext/nokogiri/xml_attribute_decl.c +70 -0
  39. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  40. data/ext/nokogiri/xml_cdata.c +23 -19
  41. data/ext/nokogiri/xml_cdata.h +1 -1
  42. data/ext/nokogiri/xml_comment.c +69 -0
  43. data/ext/nokogiri/xml_comment.h +9 -0
  44. data/ext/nokogiri/xml_document.c +501 -54
  45. data/ext/nokogiri/xml_document.h +14 -1
  46. data/ext/nokogiri/xml_document_fragment.c +48 -0
  47. data/ext/nokogiri/xml_document_fragment.h +10 -0
  48. data/ext/nokogiri/xml_dtd.c +109 -24
  49. data/ext/nokogiri/xml_dtd.h +3 -1
  50. data/ext/nokogiri/xml_element_content.c +123 -0
  51. data/ext/nokogiri/xml_element_content.h +10 -0
  52. data/ext/nokogiri/xml_element_decl.c +69 -0
  53. data/ext/nokogiri/xml_element_decl.h +9 -0
  54. data/ext/nokogiri/xml_encoding_handler.c +79 -0
  55. data/ext/nokogiri/xml_encoding_handler.h +8 -0
  56. data/ext/nokogiri/xml_entity_decl.c +110 -0
  57. data/ext/nokogiri/xml_entity_decl.h +10 -0
  58. data/ext/nokogiri/xml_entity_reference.c +52 -0
  59. data/ext/nokogiri/xml_entity_reference.h +9 -0
  60. data/ext/nokogiri/xml_io.c +60 -0
  61. data/ext/nokogiri/xml_io.h +11 -0
  62. data/ext/nokogiri/xml_libxml2_hacks.c +112 -0
  63. data/ext/nokogiri/xml_libxml2_hacks.h +12 -0
  64. data/ext/nokogiri/xml_namespace.c +117 -0
  65. data/ext/nokogiri/xml_namespace.h +13 -0
  66. data/ext/nokogiri/xml_node.c +1285 -315
  67. data/ext/nokogiri/xml_node.h +4 -6
  68. data/ext/nokogiri/xml_node_set.c +415 -54
  69. data/ext/nokogiri/xml_node_set.h +6 -2
  70. data/ext/nokogiri/xml_processing_instruction.c +56 -0
  71. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  72. data/ext/nokogiri/xml_reader.c +316 -77
  73. data/ext/nokogiri/xml_reader.h +1 -1
  74. data/ext/nokogiri/xml_relax_ng.c +161 -0
  75. data/ext/nokogiri/xml_relax_ng.h +9 -0
  76. data/ext/nokogiri/xml_sax_parser.c +215 -80
  77. data/ext/nokogiri/xml_sax_parser.h +30 -1
  78. data/ext/nokogiri/xml_sax_parser_context.c +262 -0
  79. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  80. data/ext/nokogiri/xml_sax_push_parser.c +115 -0
  81. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  82. data/ext/nokogiri/xml_schema.c +205 -0
  83. data/ext/nokogiri/xml_schema.h +9 -0
  84. data/ext/nokogiri/xml_syntax_error.c +45 -175
  85. data/ext/nokogiri/xml_syntax_error.h +4 -2
  86. data/ext/nokogiri/xml_text.c +37 -14
  87. data/ext/nokogiri/xml_text.h +1 -1
  88. data/ext/nokogiri/xml_xpath_context.c +230 -13
  89. data/ext/nokogiri/xml_xpath_context.h +2 -1
  90. data/ext/nokogiri/xslt_stylesheet.c +196 -34
  91. data/ext/nokogiri/xslt_stylesheet.h +6 -1
  92. data/lib/nokogiri/css/node.rb +18 -61
  93. data/lib/nokogiri/css/parser.rb +725 -17
  94. data/lib/nokogiri/css/parser.y +126 -63
  95. data/lib/nokogiri/css/parser_extras.rb +91 -0
  96. data/lib/nokogiri/css/syntax_error.rb +7 -0
  97. data/lib/nokogiri/css/tokenizer.rb +148 -5
  98. data/lib/nokogiri/css/tokenizer.rex +31 -39
  99. data/lib/nokogiri/css/xpath_visitor.rb +109 -51
  100. data/lib/nokogiri/css.rb +24 -3
  101. data/lib/nokogiri/decorators/slop.rb +42 -0
  102. data/lib/nokogiri/html/builder.rb +27 -1
  103. data/lib/nokogiri/html/document.rb +329 -3
  104. data/lib/nokogiri/html/document_fragment.rb +39 -0
  105. data/lib/nokogiri/html/element_description.rb +23 -0
  106. data/lib/nokogiri/html/element_description_defaults.rb +671 -0
  107. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  108. data/lib/nokogiri/html/sax/parser.rb +35 -4
  109. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  110. data/lib/nokogiri/html/sax/push_parser.rb +36 -0
  111. data/lib/nokogiri/html.rb +18 -76
  112. data/lib/nokogiri/syntax_error.rb +4 -0
  113. data/lib/nokogiri/version.rb +106 -1
  114. data/lib/nokogiri/xml/attr.rb +14 -0
  115. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  116. data/lib/nokogiri/xml/builder.rb +395 -31
  117. data/lib/nokogiri/xml/cdata.rb +4 -2
  118. data/lib/nokogiri/xml/character_data.rb +7 -0
  119. data/lib/nokogiri/xml/document.rb +267 -12
  120. data/lib/nokogiri/xml/document_fragment.rb +149 -0
  121. data/lib/nokogiri/xml/dtd.rb +27 -1
  122. data/lib/nokogiri/xml/element_content.rb +36 -0
  123. data/lib/nokogiri/xml/element_decl.rb +13 -0
  124. data/lib/nokogiri/xml/entity_decl.rb +19 -0
  125. data/lib/nokogiri/xml/namespace.rb +13 -0
  126. data/lib/nokogiri/xml/node/save_options.rb +61 -0
  127. data/lib/nokogiri/xml/node.rb +748 -109
  128. data/lib/nokogiri/xml/node_set.rb +200 -72
  129. data/lib/nokogiri/xml/parse_options.rb +120 -0
  130. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  131. data/lib/nokogiri/xml/pp/node.rb +56 -0
  132. data/lib/nokogiri/xml/pp.rb +2 -0
  133. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  134. data/lib/nokogiri/xml/reader.rb +102 -4
  135. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  136. data/lib/nokogiri/xml/sax/document.rb +114 -2
  137. data/lib/nokogiri/xml/sax/parser.rb +97 -7
  138. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  139. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  140. data/lib/nokogiri/xml/sax.rb +2 -7
  141. data/lib/nokogiri/xml/schema.rb +63 -0
  142. data/lib/nokogiri/xml/searchable.rb +221 -0
  143. data/lib/nokogiri/xml/syntax_error.rb +27 -1
  144. data/lib/nokogiri/xml/text.rb +4 -1
  145. data/lib/nokogiri/xml/xpath/syntax_error.rb +11 -0
  146. data/lib/nokogiri/xml/xpath.rb +4 -0
  147. data/lib/nokogiri/xml/xpath_context.rb +3 -1
  148. data/lib/nokogiri/xml.rb +45 -38
  149. data/lib/nokogiri/xslt/stylesheet.rb +19 -0
  150. data/lib/nokogiri/xslt.rb +47 -2
  151. data/lib/nokogiri.rb +117 -24
  152. data/lib/xsd/xmlparser/nokogiri.rb +102 -0
  153. data/patches/sort-patches-by-date +25 -0
  154. data/ports/archives/libxml2-2.9.4.tar.gz +0 -0
  155. data/ports/archives/libxslt-1.1.29.tar.gz +0 -0
  156. data/suppressions/README.txt +1 -0
  157. data/suppressions/nokogiri_ree-1.8.7.358.supp +61 -0
  158. data/suppressions/nokogiri_ruby-1.8.7.370.supp +0 -0
  159. data/suppressions/nokogiri_ruby-1.9.2.320.supp +28 -0
  160. data/suppressions/nokogiri_ruby-1.9.3.327.supp +28 -0
  161. data/tasks/test.rb +100 -0
  162. data/test/css/test_nthiness.rb +73 -6
  163. data/test/css/test_parser.rb +184 -39
  164. data/test/css/test_tokenizer.rb +72 -19
  165. data/test/css/test_xpath_visitor.rb +44 -2
  166. data/test/decorators/test_slop.rb +20 -0
  167. data/test/files/2ch.html +108 -0
  168. data/test/files/GH_1042.html +18 -0
  169. data/test/files/address_book.rlx +12 -0
  170. data/test/files/address_book.xml +10 -0
  171. data/test/files/atom.xml +344 -0
  172. data/test/files/bar/bar.xsd +4 -0
  173. data/test/files/bogus.xml +0 -0
  174. data/test/files/dont_hurt_em_why.xml +422 -0
  175. data/test/files/encoding.html +82 -0
  176. data/test/files/encoding.xhtml +84 -0
  177. data/test/files/exslt.xml +8 -0
  178. data/test/files/exslt.xslt +35 -0
  179. data/test/files/foo/foo.xsd +4 -0
  180. data/test/files/metacharset.html +10 -0
  181. data/test/files/namespace_pressure_test.xml +1684 -0
  182. data/test/files/noencoding.html +47 -0
  183. data/test/files/po.xml +32 -0
  184. data/test/files/po.xsd +66 -0
  185. data/test/files/saml/saml20assertion_schema.xsd +283 -0
  186. data/test/files/saml/saml20protocol_schema.xsd +302 -0
  187. data/test/files/saml/xenc_schema.xsd +146 -0
  188. data/test/files/saml/xmldsig_schema.xsd +318 -0
  189. data/test/files/shift_jis.html +10 -0
  190. data/test/files/shift_jis.xml +5 -0
  191. data/test/files/shift_jis_no_charset.html +9 -0
  192. data/test/files/slow-xpath.xml +25509 -0
  193. data/test/files/snuggles.xml +3 -0
  194. data/test/files/staff.dtd +10 -0
  195. data/test/files/test_document_url/bar.xml +2 -0
  196. data/test/files/test_document_url/document.dtd +4 -0
  197. data/test/files/test_document_url/document.xml +6 -0
  198. data/test/files/tlm.html +2 -1
  199. data/test/files/to_be_xincluded.xml +2 -0
  200. data/test/files/valid_bar.xml +2 -0
  201. data/test/files/xinclude.xml +4 -0
  202. data/test/helper.rb +124 -13
  203. data/test/html/sax/test_parser.rb +118 -4
  204. data/test/html/sax/test_parser_context.rb +46 -0
  205. data/test/html/sax/test_push_parser.rb +87 -0
  206. data/test/html/test_builder.rb +94 -8
  207. data/test/html/test_document.rb +626 -11
  208. data/test/html/test_document_encoding.rb +145 -0
  209. data/test/html/test_document_fragment.rb +301 -0
  210. data/test/html/test_element_description.rb +105 -0
  211. data/test/html/test_named_characters.rb +14 -0
  212. data/test/html/test_node.rb +212 -0
  213. data/test/html/test_node_encoding.rb +85 -0
  214. data/test/namespaces/test_additional_namespaces_in_builder_doc.rb +14 -0
  215. data/test/namespaces/test_namespaces_aliased_default.rb +24 -0
  216. data/test/namespaces/test_namespaces_in_builder_doc.rb +75 -0
  217. data/test/namespaces/test_namespaces_in_cloned_doc.rb +31 -0
  218. data/test/namespaces/test_namespaces_in_created_doc.rb +75 -0
  219. data/test/namespaces/test_namespaces_in_parsed_doc.rb +80 -0
  220. data/test/namespaces/test_namespaces_preservation.rb +31 -0
  221. data/test/test_convert_xpath.rb +2 -47
  222. data/test/test_css_cache.rb +45 -0
  223. data/test/test_encoding_handler.rb +48 -0
  224. data/test/test_memory_leak.rb +156 -0
  225. data/test/test_nokogiri.rb +103 -1
  226. data/test/test_soap4r_sax.rb +52 -0
  227. data/test/test_xslt_transforms.rb +293 -8
  228. data/test/xml/node/test_save_options.rb +28 -0
  229. data/test/xml/node/test_subclass.rb +44 -0
  230. data/test/xml/sax/test_parser.rb +309 -8
  231. data/test/xml/sax/test_parser_context.rb +115 -0
  232. data/test/xml/sax/test_push_parser.rb +157 -0
  233. data/test/xml/test_attr.rb +67 -0
  234. data/test/xml/test_attribute_decl.rb +86 -0
  235. data/test/xml/test_builder.rb +327 -2
  236. data/test/xml/test_c14n.rb +180 -0
  237. data/test/xml/test_cdata.rb +32 -2
  238. data/test/xml/test_comment.rb +40 -0
  239. data/test/xml/test_document.rb +846 -35
  240. data/test/xml/test_document_encoding.rb +31 -0
  241. data/test/xml/test_document_fragment.rb +271 -0
  242. data/test/xml/test_dtd.rb +153 -9
  243. data/test/xml/test_dtd_encoding.rb +31 -0
  244. data/test/xml/test_element_content.rb +56 -0
  245. data/test/xml/test_element_decl.rb +73 -0
  246. data/test/xml/test_entity_decl.rb +122 -0
  247. data/test/xml/test_entity_reference.rb +251 -0
  248. data/test/xml/test_namespace.rb +96 -0
  249. data/test/xml/test_node.rb +1126 -105
  250. data/test/xml/test_node_attributes.rb +115 -0
  251. data/test/xml/test_node_encoding.rb +69 -0
  252. data/test/xml/test_node_inheritance.rb +32 -0
  253. data/test/xml/test_node_reparenting.rb +549 -0
  254. data/test/xml/test_node_set.rb +668 -9
  255. data/test/xml/test_parse_options.rb +64 -0
  256. data/test/xml/test_processing_instruction.rb +30 -0
  257. data/test/xml/test_reader.rb +589 -0
  258. data/test/xml/test_reader_encoding.rb +134 -0
  259. data/test/xml/test_relax_ng.rb +60 -0
  260. data/test/xml/test_schema.rb +142 -0
  261. data/test/xml/test_syntax_error.rb +30 -0
  262. data/test/xml/test_text.rb +49 -2
  263. data/test/xml/test_unparented_node.rb +440 -0
  264. data/test/xml/test_xinclude.rb +83 -0
  265. data/test/xml/test_xpath.rb +445 -0
  266. data/test/xslt/test_custom_functions.rb +133 -0
  267. data/test/xslt/test_exception_handling.rb +37 -0
  268. data/test_all +107 -0
  269. metadata +459 -115
  270. data/History.txt +0 -6
  271. data/README.ja.txt +0 -86
  272. data/README.txt +0 -87
  273. data/ext/nokogiri/html_sax_parser.c +0 -32
  274. data/ext/nokogiri/html_sax_parser.h +0 -11
  275. data/ext/nokogiri/native.c +0 -40
  276. data/ext/nokogiri/native.h +0 -51
  277. data/ext/nokogiri/xml_xpath.c +0 -46
  278. data/ext/nokogiri/xml_xpath.h +0 -11
  279. data/lib/nokogiri/css/generated_parser.rb +0 -653
  280. data/lib/nokogiri/css/generated_tokenizer.rb +0 -159
  281. data/lib/nokogiri/decorators/hpricot/node.rb +0 -58
  282. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -14
  283. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -17
  284. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  285. data/lib/nokogiri/decorators.rb +0 -1
  286. data/lib/nokogiri/hpricot.rb +0 -47
  287. data/lib/nokogiri/xml/after_handler.rb +0 -18
  288. data/lib/nokogiri/xml/before_handler.rb +0 -32
  289. data/lib/nokogiri/xml/element.rb +0 -6
  290. data/lib/nokogiri/xml/entity_declaration.rb +0 -9
  291. data/nokogiri.gemspec +0 -34
  292. data/test/hpricot/files/basic.xhtml +0 -17
  293. data/test/hpricot/files/boingboing.html +0 -2266
  294. data/test/hpricot/files/cy0.html +0 -3653
  295. data/test/hpricot/files/immob.html +0 -400
  296. data/test/hpricot/files/pace_application.html +0 -1320
  297. data/test/hpricot/files/tenderlove.html +0 -16
  298. data/test/hpricot/files/uswebgen.html +0 -220
  299. data/test/hpricot/files/utf8.html +0 -1054
  300. data/test/hpricot/files/week9.html +0 -1723
  301. data/test/hpricot/files/why.xml +0 -19
  302. data/test/hpricot/load_files.rb +0 -7
  303. data/test/hpricot/test_alter.rb +0 -67
  304. data/test/hpricot/test_builder.rb +0 -27
  305. data/test/hpricot/test_parser.rb +0 -423
  306. data/test/hpricot/test_paths.rb +0 -15
  307. data/test/hpricot/test_preserved.rb +0 -78
  308. data/test/hpricot/test_xml.rb +0 -30
  309. data/test/test_reader.rb +0 -222
@@ -0,0 +1,3 @@
1
+ <x xmlns:tenderlove='http://tenderlovemaking.com/'>
2
+ <tenderlove:foo awesome='true'>snuggles!</tenderlove:foo>
3
+ </x>
@@ -0,0 +1,10 @@
1
+ <!ENTITY ent1 "es">
2
+ <!ENTITY ent2 "1900 Dallas Road">
3
+ <!ENTITY ent3 "Texas">
4
+ <!ENTITY ent4 "<entElement domestic='Yes'>Element data</entElement><?PItarget PIdata?>">
5
+ <!ENTITY ent5 PUBLIC "entityURI" "entityFile" NDATA notation1>
6
+ <!ENTITY ent1 "This entity should be discarded">
7
+ <!ELEMENT br EMPTY>
8
+ <!ATTLIST br width CDATA "0">
9
+ <!NOTATION notation1 PUBLIC "notation1File">
10
+ <!NOTATION notation2 SYSTEM "notation2File">
@@ -0,0 +1,2 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <blah>foobar</blah>
@@ -0,0 +1,4 @@
1
+ <!ENTITY bar SYSTEM "bar.xml">
2
+ <!ELEMENT document (body)>
3
+ <!ELEMENT blah ANY>
4
+ <!ELEMENT body ANY>
@@ -0,0 +1,6 @@
1
+ <?xml version="1.0" encoding="UTF-8" ?>
2
+ <!DOCTYPE document SYSTEM "document.dtd">
3
+
4
+ <document>
5
+ <body>&bar;</body>
6
+ </document>
data/test/files/tlm.html CHANGED
@@ -46,7 +46,7 @@
46
46
  .codesnip-container {border:1px solid #ccc; background:#eee; padding: 5px;margin:10px;}
47
47
  </style>
48
48
  <link rel="EditURI" type="application/rsd+xml" title="RSD" href="http://tenderlovemaking.com/xmlrpc.php?rsd" />
49
- <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
49
+ <link rel="wlwmanifest" type="application/wlwmanifest+xml" href="http://tenderlovemaking.com/wp-includes/wlwmanifest.xml" />
50
50
  <meta name="generator" content="WordPress 2.6" />
51
51
 
52
52
  <link rel="stylesheet" type="text/css" href="http://tenderlovemaking.com/wp-content/plugins/spell_checker/spell_checker.css" />
@@ -826,6 +826,7 @@ page.<span class="me1">body</span> =~ /&lt;textarea<span class="br0">&#91;</span
826
826
  </ul>
827
827
  </div>
828
828
 
829
+ <div id="abc.123" class='special.character'>Special character div</div>
829
830
  <div id="footer">
830
831
  A design by <a href="http://blog.geminigeek.com/wordpress-theme">GeminiGeek</a> &bull; Powered by <a href="http://wordpress.org">Wordpress</a><!--&bull; <a href="#">CSS</a> &bull; <a href="#">xHTML 1.0</a>-->
831
832
  </div>
@@ -0,0 +1,2 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <included>this snippet is to be included from xinclude.xml</included>
@@ -0,0 +1,2 @@
1
+ <?xml version="1.0" encoding="UTF-8"?>
2
+ <bar />
@@ -0,0 +1,4 @@
1
+ <?xml version="1.0" encoding="utf-8"?>
2
+ <test xmlns:xi="http://www.w3.org/2001/XInclude">
3
+ <xi:include href="to_be_xincluded.xml"/>
4
+ </test>
data/test/helper.rb CHANGED
@@ -1,23 +1,101 @@
1
- require 'test/unit'
1
+ #Process.setrlimit(Process::RLIMIT_CORE, Process::RLIM_INFINITY) unless RUBY_PLATFORM =~ /(java|mswin|mingw)/i
2
+ $VERBOSE = true
3
+ require 'minitest/autorun'
4
+ require 'minitest/pride'
5
+ require 'fileutils'
6
+ require 'tempfile'
7
+ require 'pp'
2
8
 
3
- %w(../lib ../ext).each do |path|
4
- $LOAD_PATH.unshift(File.expand_path(File.join(File.dirname(__FILE__), path)))
9
+ require 'nokogiri'
10
+ if ENV['TEST_NOKOGIRI_WITH_LIBXML_RUBY']
11
+ require 'libxml'
12
+ warn "#{__FILE__}:#{__LINE__}: loaded libxml-ruby '#{LibXML::XML::VERSION}'"
5
13
  end
6
14
 
7
- require 'nokogiri'
15
+ warn "#{__FILE__}:#{__LINE__}: version info: #{Nokogiri::VERSION_INFO.inspect}"
8
16
 
9
17
  module Nokogiri
10
- class TestCase < Test::Unit::TestCase
11
- ASSETS_DIR = File.join(File.dirname(__FILE__), 'files')
12
- XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
13
- XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
14
- HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
15
-
16
- undef :default_test
18
+ class TestCase < MiniTest::Spec
19
+ ASSETS_DIR = File.expand_path File.join(File.dirname(__FILE__), 'files')
20
+ ADDRESS_SCHEMA_FILE = File.join(ASSETS_DIR, 'address_book.rlx')
21
+ ADDRESS_XML_FILE = File.join(ASSETS_DIR, 'address_book.xml')
22
+ ENCODING_HTML_FILE = File.join(ASSETS_DIR, 'encoding.html')
23
+ ENCODING_XHTML_FILE = File.join(ASSETS_DIR, 'encoding.xhtml')
24
+ EXML_FILE = File.join(ASSETS_DIR, 'exslt.xml')
25
+ EXSLT_FILE = File.join(ASSETS_DIR, 'exslt.xslt')
26
+ HTML_FILE = File.join(ASSETS_DIR, 'tlm.html')
27
+ METACHARSET_FILE = File.join(ASSETS_DIR, 'metacharset.html')
28
+ NICH_FILE = File.join(ASSETS_DIR, '2ch.html')
29
+ NOENCODING_FILE = File.join(ASSETS_DIR, 'noencoding.html')
30
+ PO_SCHEMA_FILE = File.join(ASSETS_DIR, 'po.xsd')
31
+ PO_XML_FILE = File.join(ASSETS_DIR, 'po.xml')
32
+ SHIFT_JIS_HTML = File.join(ASSETS_DIR, 'shift_jis.html')
33
+ SHIFT_JIS_NO_CHARSET= File.join(ASSETS_DIR, 'shift_jis_no_charset.html')
34
+ SHIFT_JIS_XML = File.join(ASSETS_DIR, 'shift_jis.xml')
35
+ SNUGGLES_FILE = File.join(ASSETS_DIR, 'snuggles.xml')
36
+ XML_FILE = File.join(ASSETS_DIR, 'staff.xml')
37
+ XML_XINCLUDE_FILE = File.join(ASSETS_DIR, 'xinclude.xml')
38
+ XML_ATOM_FILE = File.join(ASSETS_DIR, 'atom.xml')
39
+ XSLT_FILE = File.join(ASSETS_DIR, 'staff.xslt')
40
+ XPATH_FILE = File.join(ASSETS_DIR, 'slow-xpath.xml')
17
41
 
18
42
  def teardown
19
- GC.start if ENV['NOKOGIRI_GC']
43
+ if ENV['NOKOGIRI_GC']
44
+ STDOUT.putc '!'
45
+ if RUBY_PLATFORM =~ /java/
46
+ require 'java'
47
+ java.lang.System.gc
48
+ else
49
+ GC.start
50
+ end
51
+ end
52
+ end
53
+
54
+ def stress_memory_while &block
55
+ # force the test to explicitly declare a skip
56
+ raise "JRuby doesn't do GC" if Nokogiri.jruby?
57
+
58
+ old_stress = GC.stress
59
+ begin
60
+ GC.stress = true
61
+ yield
62
+ ensure
63
+ GC.stress = old_stress
64
+ end
20
65
  end
66
+
67
+ def assert_indent amount, doc, message = nil
68
+ nodes = []
69
+ doc.traverse do |node|
70
+ nodes << node if node.text? && node.blank?
71
+ end
72
+ assert nodes.length > 0
73
+ nodes.each do |node|
74
+ len = node.content.gsub(/[\r\n]/, '').length
75
+ assert_equal(0, len % amount, message)
76
+ end
77
+ end
78
+
79
+ def util_decorate(document, decorator_module)
80
+ document.decorators(XML::Node) << decorator_module
81
+ document.decorators(XML::NodeSet) << decorator_module
82
+ document.decorate!
83
+ end
84
+
85
+ #
86
+ # Test::Unit backwards compatibility section
87
+ #
88
+ alias :assert_no_match :refute_match
89
+ alias :assert_not_nil :refute_nil
90
+ alias :assert_raise :assert_raises
91
+ alias :assert_not_equal :refute_equal
92
+
93
+ def assert_not_send send_ary, m = nil
94
+ recv, msg, *args = send_ary
95
+ m = message(m) {
96
+ "Expected #{mu_pp(recv)}.#{msg}(*#{mu_pp(args)}) to return false" }
97
+ assert !recv.__send__(msg, *args), m
98
+ end unless method_defined?(:assert_not_send)
21
99
  end
22
100
 
23
101
  module SAX
@@ -25,7 +103,15 @@ module Nokogiri
25
103
  class Doc < XML::SAX::Document
26
104
  attr_reader :start_elements, :start_document_called
27
105
  attr_reader :end_elements, :end_document_called
28
- attr_reader :data, :comments, :cdata_blocks
106
+ attr_reader :data, :comments, :cdata_blocks, :start_elements_namespace
107
+ attr_reader :errors, :warnings, :end_elements_namespace
108
+ attr_reader :xmldecls
109
+ attr_reader :processing_instructions
110
+
111
+ def xmldecl version, encoding, standalone
112
+ @xmldecls = [version, encoding, standalone].compact
113
+ super
114
+ end
29
115
 
30
116
  def start_document
31
117
  @start_document_called = true
@@ -37,16 +123,36 @@ module Nokogiri
37
123
  super
38
124
  end
39
125
 
126
+ def error error
127
+ (@errors ||= []) << error
128
+ super
129
+ end
130
+
131
+ def warning warning
132
+ (@warning ||= []) << warning
133
+ super
134
+ end
135
+
40
136
  def start_element *args
41
137
  (@start_elements ||= []) << args
42
138
  super
43
139
  end
44
140
 
141
+ def start_element_namespace *args
142
+ (@start_elements_namespace ||= []) << args
143
+ super
144
+ end
145
+
45
146
  def end_element *args
46
147
  (@end_elements ||= []) << args
47
148
  super
48
149
  end
49
150
 
151
+ def end_element_namespace *args
152
+ (@end_elements_namespace ||= []) << args
153
+ super
154
+ end
155
+
50
156
  def characters string
51
157
  @data ||= []
52
158
  @data += [string]
@@ -64,6 +170,11 @@ module Nokogiri
64
170
  @cdata_blocks += [string]
65
171
  super
66
172
  end
173
+
174
+ def processing_instruction name, content
175
+ @processing_instructions ||= []
176
+ @processing_instructions << [name, content]
177
+ end
67
178
  end
68
179
  end
69
180
  end
@@ -1,16 +1,71 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "helper"))
1
+ # -*- coding: utf-8 -*-
2
+ require "helper"
2
3
 
3
4
  module Nokogiri
4
5
  module HTML
5
6
  module SAX
6
7
  class TestParser < Nokogiri::SAX::TestCase
7
8
  def setup
9
+ super
8
10
  @parser = HTML::SAX::Parser.new(Doc.new)
9
11
  end
10
12
 
13
+ def test_parse_empty_document
14
+ # This caused a segfault in libxml 2.6.x
15
+ assert_nil @parser.parse ''
16
+ end
17
+
18
+ def test_parse_empty_file
19
+ # Make sure empty files don't break stuff
20
+ empty_file_name = File.join(ASSETS_DIR, 'bogus.xml')
21
+ # assert_nothing_raised do
22
+ @parser.parse_file empty_file_name
23
+ # end
24
+ end
25
+
11
26
  def test_parse_file
12
27
  @parser.parse_file(HTML_FILE)
13
- assert_equal 1110, @parser.document.end_elements.length
28
+
29
+ # Take a look at the comment in test_parse_document to know
30
+ # a possible reason to this difference.
31
+ if Nokogiri.uses_libxml?
32
+ assert_equal 1111, @parser.document.end_elements.length
33
+ else
34
+ assert_equal 1120, @parser.document.end_elements.length
35
+ end
36
+ end
37
+
38
+ def test_parse_file_nil_argument
39
+ assert_raises(ArgumentError) {
40
+ @parser.parse_file(nil)
41
+ }
42
+ end
43
+
44
+ def test_parse_file_non_existant
45
+ assert_raise Errno::ENOENT do
46
+ @parser.parse_file('there_is_no_reasonable_way_this_file_exists')
47
+ end
48
+ end
49
+
50
+ def test_parse_file_with_dir
51
+ assert_raise Errno::EISDIR do
52
+ @parser.parse_file(File.dirname(__FILE__))
53
+ end
54
+ end
55
+
56
+ def test_parse_memory_nil
57
+ assert_raise ArgumentError do
58
+ @parser.parse_memory(nil)
59
+ end
60
+ end
61
+
62
+ def test_parse_force_encoding
63
+ @parser.parse_memory(<<-HTML, 'UTF-8')
64
+ <meta http-equiv="Content-Type" content="text/html; charset=windows-1251">
65
+ Информация
66
+ HTML
67
+ assert_equal("Информация",
68
+ @parser.document.data.join.strip)
14
69
  end
15
70
 
16
71
  def test_parse_document
@@ -18,8 +73,67 @@ module Nokogiri
18
73
  <p>Paragraph 1</p>
19
74
  <p>Paragraph 2</p>
20
75
  eoxml
21
- assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
22
- @parser.document.start_elements)
76
+
77
+ # JRuby version is different because of the internal implementation
78
+ # JRuby version uses NekoHTML which inserts empty "head" elements.
79
+ #
80
+ # Currently following features are set:
81
+ # "http://cyberneko.org/html/properties/names/elems" => "lower"
82
+ # "http://cyberneko.org/html/properties/names/attrs" => "lower"
83
+ if Nokogiri.uses_libxml?
84
+ assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
85
+ @parser.document.start_elements)
86
+ else
87
+ assert_equal([["html", []], ["head", []], ["body", []], ["p", []], ["p", []]],
88
+ @parser.document.start_elements)
89
+ end
90
+ end
91
+
92
+ def test_parser_attributes
93
+ html = <<-eohtml
94
+ <html>
95
+ <head>
96
+ <title>hello</title>
97
+ </head>
98
+ <body>
99
+ <img src="face.jpg" title="daddy &amp; me">
100
+ <hr noshade size="2">
101
+ </body>
102
+ </html>
103
+ eohtml
104
+
105
+ block_called = false
106
+ @parser.parse(html) { |ctx|
107
+ block_called = true
108
+ ctx.replace_entities = true
109
+ }
110
+
111
+ assert block_called
112
+
113
+ noshade_value = if Nokogiri.uses_libxml? && Nokogiri::VERSION_INFO['libxml']['loaded'] < '2.7.7'
114
+ ['noshade', 'noshade']
115
+ else
116
+ ['noshade', nil]
117
+ end
118
+
119
+ assert_equal [
120
+ ['html', []],
121
+ ['head', []],
122
+ ['title', []],
123
+ ['body', []],
124
+ ['img', [
125
+ ['src', 'face.jpg'],
126
+ ['title', 'daddy & me']
127
+ ]],
128
+ ['hr', [
129
+ noshade_value,
130
+ ['size', '2']
131
+ ]]
132
+ ], @parser.document.start_elements
133
+ end
134
+
135
+ def test_empty_processing_instruction
136
+ @parser.parse_memory("<strong>this will segfault<?strong>")
23
137
  end
24
138
  end
25
139
  end
@@ -0,0 +1,46 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ module SAX
8
+ class TestParserContext < Nokogiri::SAX::TestCase
9
+ def test_from_io
10
+ ctx = ParserContext.new StringIO.new('fo'), 'UTF-8'
11
+ assert ctx
12
+ end
13
+
14
+ def test_from_string
15
+ ctx = ParserContext.new 'blah blah'
16
+ assert ctx
17
+ end
18
+
19
+ def test_parse_with
20
+ ctx = ParserContext.new 'blah'
21
+ assert_raises ArgumentError do
22
+ ctx.parse_with nil
23
+ end
24
+ end
25
+
26
+ def test_parse_with_sax_parser
27
+ # assert_nothing_raised do
28
+ xml = "<root />"
29
+ ctx = ParserContext.new xml
30
+ parser = Parser.new Doc.new
31
+ ctx.parse_with parser
32
+ # end
33
+ end
34
+
35
+ def test_from_file
36
+ # assert_nothing_raised do
37
+ ctx = ParserContext.file HTML_FILE, 'UTF-8'
38
+ parser = Parser.new Doc.new
39
+ ctx.parse_with parser
40
+ # end
41
+ end
42
+ end
43
+ end
44
+ end
45
+ end
46
+
@@ -0,0 +1,87 @@
1
+ # -*- coding: utf-8 -*-
2
+
3
+ require "helper"
4
+
5
+ module Nokogiri
6
+ module HTML
7
+ module SAX
8
+ class TestPushParser < Nokogiri::SAX::TestCase
9
+ def setup
10
+ super
11
+ @parser = HTML::SAX::PushParser.new(Doc.new)
12
+ end
13
+
14
+ def test_end_document_called
15
+ @parser.<<(<<-eoxml)
16
+ <p id="asdfasdf">
17
+ <!-- This is a comment -->
18
+ Paragraph 1
19
+ </p>
20
+ eoxml
21
+ assert ! @parser.document.end_document_called
22
+ @parser.finish
23
+ assert @parser.document.end_document_called
24
+ end
25
+
26
+ def test_start_element
27
+ @parser.<<(<<-eoxml)
28
+ <!DOCTYPE html PUBLIC "-//W3C//DTD HTML 4.0 Transitional//EN" "http://www.w3.org/TR/REC-html40/loose.dtd">
29
+ <html><head><body><p id="asdfasdf">
30
+ eoxml
31
+
32
+ assert_equal [["html", []], ["head", []], ["body", []], ["p", [["id", "asdfasdf"]]]],
33
+ @parser.document.start_elements
34
+
35
+ @parser.<<(<<-eoxml)
36
+ <!-- This is a comment -->
37
+ Paragraph 1
38
+ </p></body></html>
39
+ eoxml
40
+ assert_equal [' This is a comment '], @parser.document.comments
41
+ @parser.finish
42
+ end
43
+
44
+
45
+ def test_chevron_partial_html
46
+ @parser.<<(<<-eoxml)
47
+ <p id="asdfasdf">
48
+ eoxml
49
+
50
+ @parser.<<(<<-eoxml)
51
+ <!-- This is a comment -->
52
+ Paragraph 1
53
+ </p>
54
+ eoxml
55
+ assert_equal [' This is a comment '], @parser.document.comments
56
+ @parser.finish
57
+ end
58
+
59
+ def test_chevron
60
+ @parser.<<(<<-eoxml)
61
+ <p id="asdfasdf">
62
+ <!-- This is a comment -->
63
+ Paragraph 1
64
+ </p>
65
+ eoxml
66
+ @parser.finish
67
+ assert_equal [' This is a comment '], @parser.document.comments
68
+ end
69
+
70
+ def test_default_options
71
+ assert_equal 0, @parser.options
72
+ end
73
+
74
+ def test_broken_encoding
75
+ skip("ultra hard to fix for pure Java version") if Nokogiri.jruby?
76
+ @parser.options |= XML::ParseOptions::RECOVER
77
+ # This is ISO_8859-1:
78
+ @parser.<< "<?xml version='1.0' encoding='UTF-8'?><r>Gau\337</r>"
79
+ @parser.finish
80
+ assert(@parser.document.errors.size >= 1)
81
+ assert_equal "Gau\337", @parser.document.data.join
82
+ assert_equal [["r"], ["body"], ["html"]], @parser.document.end_elements
83
+ end
84
+ end
85
+ end
86
+ end
87
+ end
@@ -1,8 +1,35 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
1
+ require "helper"
2
2
 
3
3
  module Nokogiri
4
4
  module HTML
5
5
  class TestBuilder < Nokogiri::TestCase
6
+ def test_top_level_function_builds
7
+ foo = nil
8
+ Nokogiri() { |xml| foo = xml }
9
+ assert_instance_of Nokogiri::HTML::Builder, foo
10
+ end
11
+
12
+ def test_builder_with_explicit_tags
13
+ html_doc = Nokogiri::HTML::Builder.new {
14
+ div.slide(:class => 'another_class') {
15
+ node = Nokogiri::XML::Node.new("id", doc)
16
+ node.content = "hello"
17
+ insert(node)
18
+ }
19
+ }.doc
20
+ assert_equal 1, html_doc.css('div.slide > id').length
21
+ assert_equal 'hello', html_doc.at('div.slide > id').content
22
+ end
23
+
24
+ def test_hash_as_attributes_for_attribute_method
25
+ html = Nokogiri::HTML::Builder.new { ||
26
+ div.slide(:class => 'another_class') {
27
+ span 'Slide 1'
28
+ }
29
+ }.to_html
30
+ assert_match 'class="slide another_class"', html
31
+ end
32
+
6
33
  def test_hash_as_attributes
7
34
  builder = Nokogiri::HTML::Builder.new do
8
35
  div(:id => 'awesome') {
@@ -10,7 +37,32 @@ module Nokogiri
10
37
  }
11
38
  end
12
39
  assert_equal('<div id="awesome"><h1>america</h1></div>',
13
- builder.to_html.gsub(/\n/, ''))
40
+ builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
41
+ end
42
+
43
+ def test_href_with_attributes
44
+ uri = 'http://tenderlovemaking.com/'
45
+ built = Nokogiri::XML::Builder.new {
46
+ div {
47
+ a('King Khan & The Shrines', :href => uri)
48
+ }
49
+ }
50
+ assert_equal 'http://tenderlovemaking.com/',
51
+ built.doc.at('a')[:href]
52
+ end
53
+
54
+ def test_tag_nesting
55
+ builder = Nokogiri::HTML::Builder.new do
56
+ body {
57
+ span.left ''
58
+ span.middle {
59
+ div.icon ''
60
+ }
61
+ span.right ''
62
+ }
63
+ end
64
+ assert node = builder.doc.css('span.right').first
65
+ assert_equal 'middle', node.previous_sibling['class']
14
66
  end
15
67
 
16
68
  def test_has_ampersand
@@ -22,7 +74,7 @@ module Nokogiri
22
74
  end
23
75
  assert_equal(
24
76
  '<div class="rad" id="thing">&lt;awe&amp;some&gt;<b>hello &amp; world</b></div>',
25
- builder.to_html.gsub(/\n/, ''))
77
+ builder.doc.root.to_html.gsub(/\n/, ''))
26
78
  end
27
79
 
28
80
  def test_multi_tags
@@ -34,7 +86,7 @@ module Nokogiri
34
86
  end
35
87
  assert_equal(
36
88
  '<div class="rad" id="thing">&lt;awesome&gt;<b>hello</b></div>',
37
- builder.doc.to_html.gsub(/\n/, ''))
89
+ builder.doc.root.to_html.gsub(/\n/, ''))
38
90
  end
39
91
 
40
92
  def test_attributes_plus_block
@@ -44,7 +96,7 @@ module Nokogiri
44
96
  }
45
97
  end
46
98
  assert_equal('<div class="rad" id="thing">&lt;awesome&gt;</div>',
47
- builder.doc.to_html.chomp)
99
+ builder.doc.root.to_html.chomp)
48
100
  end
49
101
 
50
102
  def test_builder_adds_attributes
@@ -52,14 +104,14 @@ module Nokogiri
52
104
  div.rad.thing! "tender div"
53
105
  end
54
106
  assert_equal('<div class="rad" id="thing">tender div</div>',
55
- builder.doc.to_html.chomp)
107
+ builder.doc.root.to_html.chomp)
56
108
  end
57
109
 
58
110
  def test_bold_tag
59
111
  builder = Nokogiri::HTML::Builder.new do
60
112
  b "bold tag"
61
113
  end
62
- assert_equal('<b>bold tag</b>', builder.doc.to_html.chomp)
114
+ assert_equal('<b>bold tag</b>', builder.doc.root.to_html.chomp)
63
115
  end
64
116
 
65
117
  def test_html_then_body_tag
@@ -71,7 +123,41 @@ module Nokogiri
71
123
  }
72
124
  end
73
125
  assert_equal('<html><body><b>bold tag</b></body></html>',
74
- builder.doc.to_html.chomp)
126
+ builder.doc.root.to_html.chomp.gsub(/>\s*</, '><'))
127
+ end
128
+
129
+ def test_instance_eval_with_delegation_to_block_context
130
+ class << self
131
+ def foo
132
+ "foo!"
133
+ end
134
+ end
135
+
136
+ builder = Nokogiri::HTML::Builder.new { text foo }
137
+ assert builder.to_html.include?("foo!")
138
+ end
139
+
140
+ def test_builder_with_param
141
+ doc = Nokogiri::HTML::Builder.new { |html|
142
+ html.body {
143
+ html.p "hello world"
144
+ }
145
+ }.doc
146
+
147
+ assert node = doc.xpath('//body/p').first
148
+ assert_equal 'hello world', node.content
149
+ end
150
+
151
+ def test_builder_with_id
152
+ text = "hello world"
153
+ doc = Nokogiri::HTML::Builder.new { |html|
154
+ html.body {
155
+ html.id_ text
156
+ }
157
+ }.doc
158
+
159
+ assert node = doc.xpath('//body/id').first
160
+ assert_equal text, node.content
75
161
  end
76
162
  end
77
163
  end