nokogiri 1.3.0-x86-mswin32

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (256) hide show
  1. data/.autotest +27 -0
  2. data/CHANGELOG.ja.rdoc +233 -0
  3. data/CHANGELOG.rdoc +222 -0
  4. data/Manifest.txt +247 -0
  5. data/README.ja.rdoc +103 -0
  6. data/README.rdoc +117 -0
  7. data/Rakefile +205 -0
  8. data/bin/nokogiri +47 -0
  9. data/ext/nokogiri/extconf.rb +89 -0
  10. data/ext/nokogiri/html_document.c +183 -0
  11. data/ext/nokogiri/html_document.h +10 -0
  12. data/ext/nokogiri/html_element_description.c +272 -0
  13. data/ext/nokogiri/html_element_description.h +10 -0
  14. data/ext/nokogiri/html_entity_lookup.c +30 -0
  15. data/ext/nokogiri/html_entity_lookup.h +8 -0
  16. data/ext/nokogiri/html_sax_parser.c +57 -0
  17. data/ext/nokogiri/html_sax_parser.h +11 -0
  18. data/ext/nokogiri/iconv.dll +0 -0
  19. data/ext/nokogiri/libexslt.dll +0 -0
  20. data/ext/nokogiri/libxml2.dll +0 -0
  21. data/ext/nokogiri/libxslt.dll +0 -0
  22. data/ext/nokogiri/nokogiri.c +81 -0
  23. data/ext/nokogiri/nokogiri.h +149 -0
  24. data/ext/nokogiri/xml_attr.c +92 -0
  25. data/ext/nokogiri/xml_attr.h +9 -0
  26. data/ext/nokogiri/xml_cdata.c +53 -0
  27. data/ext/nokogiri/xml_cdata.h +9 -0
  28. data/ext/nokogiri/xml_comment.c +51 -0
  29. data/ext/nokogiri/xml_comment.h +9 -0
  30. data/ext/nokogiri/xml_document.c +308 -0
  31. data/ext/nokogiri/xml_document.h +21 -0
  32. data/ext/nokogiri/xml_document_fragment.c +48 -0
  33. data/ext/nokogiri/xml_document_fragment.h +10 -0
  34. data/ext/nokogiri/xml_dtd.c +102 -0
  35. data/ext/nokogiri/xml_dtd.h +8 -0
  36. data/ext/nokogiri/xml_entity_reference.c +50 -0
  37. data/ext/nokogiri/xml_entity_reference.h +9 -0
  38. data/ext/nokogiri/xml_io.c +24 -0
  39. data/ext/nokogiri/xml_io.h +10 -0
  40. data/ext/nokogiri/xml_namespace.c +69 -0
  41. data/ext/nokogiri/xml_namespace.h +12 -0
  42. data/ext/nokogiri/xml_node.c +928 -0
  43. data/ext/nokogiri/xml_node.h +14 -0
  44. data/ext/nokogiri/xml_node_set.c +386 -0
  45. data/ext/nokogiri/xml_node_set.h +9 -0
  46. data/ext/nokogiri/xml_processing_instruction.c +54 -0
  47. data/ext/nokogiri/xml_processing_instruction.h +9 -0
  48. data/ext/nokogiri/xml_reader.c +572 -0
  49. data/ext/nokogiri/xml_reader.h +10 -0
  50. data/ext/nokogiri/xml_relax_ng.c +106 -0
  51. data/ext/nokogiri/xml_relax_ng.h +9 -0
  52. data/ext/nokogiri/xml_sax_parser.c +336 -0
  53. data/ext/nokogiri/xml_sax_parser.h +10 -0
  54. data/ext/nokogiri/xml_sax_push_parser.c +86 -0
  55. data/ext/nokogiri/xml_sax_push_parser.h +9 -0
  56. data/ext/nokogiri/xml_schema.c +107 -0
  57. data/ext/nokogiri/xml_schema.h +9 -0
  58. data/ext/nokogiri/xml_syntax_error.c +203 -0
  59. data/ext/nokogiri/xml_syntax_error.h +12 -0
  60. data/ext/nokogiri/xml_text.c +47 -0
  61. data/ext/nokogiri/xml_text.h +9 -0
  62. data/ext/nokogiri/xml_xpath.c +53 -0
  63. data/ext/nokogiri/xml_xpath.h +11 -0
  64. data/ext/nokogiri/xml_xpath_context.c +252 -0
  65. data/ext/nokogiri/xml_xpath_context.h +9 -0
  66. data/ext/nokogiri/xslt_stylesheet.c +131 -0
  67. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  68. data/ext/nokogiri/zlib1.dll +0 -0
  69. data/lib/action-nokogiri.rb +36 -0
  70. data/lib/nokogiri.rb +110 -0
  71. data/lib/nokogiri/1.8/nokogiri.so +0 -0
  72. data/lib/nokogiri/1.9/nokogiri.so +0 -0
  73. data/lib/nokogiri/css.rb +25 -0
  74. data/lib/nokogiri/css/generated_parser.rb +748 -0
  75. data/lib/nokogiri/css/generated_tokenizer.rb +144 -0
  76. data/lib/nokogiri/css/node.rb +107 -0
  77. data/lib/nokogiri/css/parser.rb +82 -0
  78. data/lib/nokogiri/css/parser.y +227 -0
  79. data/lib/nokogiri/css/syntax_error.rb +7 -0
  80. data/lib/nokogiri/css/tokenizer.rb +11 -0
  81. data/lib/nokogiri/css/tokenizer.rex +54 -0
  82. data/lib/nokogiri/css/xpath_visitor.rb +172 -0
  83. data/lib/nokogiri/decorators.rb +2 -0
  84. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  85. data/lib/nokogiri/decorators/hpricot/node.rb +56 -0
  86. data/lib/nokogiri/decorators/hpricot/node_set.rb +54 -0
  87. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +30 -0
  88. data/lib/nokogiri/decorators/slop.rb +33 -0
  89. data/lib/nokogiri/ffi/html/document.rb +37 -0
  90. data/lib/nokogiri/ffi/html/element_description.rb +85 -0
  91. data/lib/nokogiri/ffi/html/entity_lookup.rb +16 -0
  92. data/lib/nokogiri/ffi/html/sax/parser.rb +21 -0
  93. data/lib/nokogiri/ffi/io_callbacks.rb +32 -0
  94. data/lib/nokogiri/ffi/libxml.rb +314 -0
  95. data/lib/nokogiri/ffi/structs/common_node.rb +26 -0
  96. data/lib/nokogiri/ffi/structs/html_elem_desc.rb +24 -0
  97. data/lib/nokogiri/ffi/structs/html_entity_desc.rb +13 -0
  98. data/lib/nokogiri/ffi/structs/xml_alloc.rb +16 -0
  99. data/lib/nokogiri/ffi/structs/xml_attr.rb +19 -0
  100. data/lib/nokogiri/ffi/structs/xml_buffer.rb +16 -0
  101. data/lib/nokogiri/ffi/structs/xml_document.rb +108 -0
  102. data/lib/nokogiri/ffi/structs/xml_dtd.rb +26 -0
  103. data/lib/nokogiri/ffi/structs/xml_node.rb +28 -0
  104. data/lib/nokogiri/ffi/structs/xml_node_set.rb +53 -0
  105. data/lib/nokogiri/ffi/structs/xml_notation.rb +11 -0
  106. data/lib/nokogiri/ffi/structs/xml_ns.rb +15 -0
  107. data/lib/nokogiri/ffi/structs/xml_relax_ng.rb +14 -0
  108. data/lib/nokogiri/ffi/structs/xml_sax_handler.rb +51 -0
  109. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +14 -0
  110. data/lib/nokogiri/ffi/structs/xml_schema.rb +13 -0
  111. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +31 -0
  112. data/lib/nokogiri/ffi/structs/xml_text_reader.rb +12 -0
  113. data/lib/nokogiri/ffi/structs/xml_xpath_context.rb +37 -0
  114. data/lib/nokogiri/ffi/structs/xml_xpath_object.rb +35 -0
  115. data/lib/nokogiri/ffi/structs/xml_xpath_parser_context.rb +20 -0
  116. data/lib/nokogiri/ffi/structs/xslt_stylesheet.rb +13 -0
  117. data/lib/nokogiri/ffi/xml/attr.rb +41 -0
  118. data/lib/nokogiri/ffi/xml/cdata.rb +19 -0
  119. data/lib/nokogiri/ffi/xml/comment.rb +18 -0
  120. data/lib/nokogiri/ffi/xml/document.rb +107 -0
  121. data/lib/nokogiri/ffi/xml/document_fragment.rb +26 -0
  122. data/lib/nokogiri/ffi/xml/dtd.rb +42 -0
  123. data/lib/nokogiri/ffi/xml/entity_reference.rb +19 -0
  124. data/lib/nokogiri/ffi/xml/namespace.rb +38 -0
  125. data/lib/nokogiri/ffi/xml/node.rb +380 -0
  126. data/lib/nokogiri/ffi/xml/node_set.rb +130 -0
  127. data/lib/nokogiri/ffi/xml/processing_instruction.rb +20 -0
  128. data/lib/nokogiri/ffi/xml/reader.rb +217 -0
  129. data/lib/nokogiri/ffi/xml/relax_ng.rb +51 -0
  130. data/lib/nokogiri/ffi/xml/sax/parser.rb +148 -0
  131. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +38 -0
  132. data/lib/nokogiri/ffi/xml/schema.rb +55 -0
  133. data/lib/nokogiri/ffi/xml/syntax_error.rb +76 -0
  134. data/lib/nokogiri/ffi/xml/text.rb +18 -0
  135. data/lib/nokogiri/ffi/xml/xpath.rb +19 -0
  136. data/lib/nokogiri/ffi/xml/xpath_context.rb +135 -0
  137. data/lib/nokogiri/ffi/xslt/stylesheet.rb +47 -0
  138. data/lib/nokogiri/hpricot.rb +62 -0
  139. data/lib/nokogiri/html.rb +34 -0
  140. data/lib/nokogiri/html/builder.rb +35 -0
  141. data/lib/nokogiri/html/document.rb +71 -0
  142. data/lib/nokogiri/html/document_fragment.rb +15 -0
  143. data/lib/nokogiri/html/element_description.rb +23 -0
  144. data/lib/nokogiri/html/entity_lookup.rb +13 -0
  145. data/lib/nokogiri/html/sax/parser.rb +47 -0
  146. data/lib/nokogiri/nokogiri.rb +1 -0
  147. data/lib/nokogiri/syntax_error.rb +4 -0
  148. data/lib/nokogiri/version.rb +29 -0
  149. data/lib/nokogiri/version_warning.rb +11 -0
  150. data/lib/nokogiri/xml.rb +62 -0
  151. data/lib/nokogiri/xml/attr.rb +9 -0
  152. data/lib/nokogiri/xml/builder.rb +254 -0
  153. data/lib/nokogiri/xml/cdata.rb +11 -0
  154. data/lib/nokogiri/xml/document.rb +100 -0
  155. data/lib/nokogiri/xml/document_fragment.rb +49 -0
  156. data/lib/nokogiri/xml/dtd.rb +11 -0
  157. data/lib/nokogiri/xml/entity_declaration.rb +11 -0
  158. data/lib/nokogiri/xml/fragment_handler.rb +55 -0
  159. data/lib/nokogiri/xml/namespace.rb +7 -0
  160. data/lib/nokogiri/xml/node.rb +745 -0
  161. data/lib/nokogiri/xml/node/save_options.rb +42 -0
  162. data/lib/nokogiri/xml/node_set.rb +238 -0
  163. data/lib/nokogiri/xml/notation.rb +6 -0
  164. data/lib/nokogiri/xml/parse_options.rb +80 -0
  165. data/lib/nokogiri/xml/processing_instruction.rb +8 -0
  166. data/lib/nokogiri/xml/reader.rb +66 -0
  167. data/lib/nokogiri/xml/relax_ng.rb +32 -0
  168. data/lib/nokogiri/xml/sax.rb +3 -0
  169. data/lib/nokogiri/xml/sax/document.rb +143 -0
  170. data/lib/nokogiri/xml/sax/parser.rb +101 -0
  171. data/lib/nokogiri/xml/sax/push_parser.rb +60 -0
  172. data/lib/nokogiri/xml/schema.rb +65 -0
  173. data/lib/nokogiri/xml/syntax_error.rb +34 -0
  174. data/lib/nokogiri/xml/xpath.rb +10 -0
  175. data/lib/nokogiri/xml/xpath/syntax_error.rb +8 -0
  176. data/lib/nokogiri/xml/xpath_context.rb +16 -0
  177. data/lib/nokogiri/xslt.rb +48 -0
  178. data/lib/nokogiri/xslt/stylesheet.rb +25 -0
  179. data/lib/xsd/xmlparser/nokogiri.rb +64 -0
  180. data/tasks/test.rb +161 -0
  181. data/test/css/test_nthiness.rb +160 -0
  182. data/test/css/test_parser.rb +277 -0
  183. data/test/css/test_tokenizer.rb +176 -0
  184. data/test/css/test_xpath_visitor.rb +76 -0
  185. data/test/ffi/test_document.rb +35 -0
  186. data/test/files/address_book.rlx +12 -0
  187. data/test/files/address_book.xml +10 -0
  188. data/test/files/dont_hurt_em_why.xml +422 -0
  189. data/test/files/exslt.xml +8 -0
  190. data/test/files/exslt.xslt +35 -0
  191. data/test/files/po.xml +32 -0
  192. data/test/files/po.xsd +66 -0
  193. data/test/files/staff.xml +59 -0
  194. data/test/files/staff.xslt +32 -0
  195. data/test/files/tlm.html +850 -0
  196. data/test/helper.rb +123 -0
  197. data/test/hpricot/files/basic.xhtml +17 -0
  198. data/test/hpricot/files/boingboing.html +2266 -0
  199. data/test/hpricot/files/cy0.html +3653 -0
  200. data/test/hpricot/files/immob.html +400 -0
  201. data/test/hpricot/files/pace_application.html +1320 -0
  202. data/test/hpricot/files/tenderlove.html +16 -0
  203. data/test/hpricot/files/uswebgen.html +220 -0
  204. data/test/hpricot/files/utf8.html +1054 -0
  205. data/test/hpricot/files/week9.html +1723 -0
  206. data/test/hpricot/files/why.xml +19 -0
  207. data/test/hpricot/load_files.rb +11 -0
  208. data/test/hpricot/test_alter.rb +68 -0
  209. data/test/hpricot/test_builder.rb +20 -0
  210. data/test/hpricot/test_parser.rb +426 -0
  211. data/test/hpricot/test_paths.rb +15 -0
  212. data/test/hpricot/test_preserved.rb +77 -0
  213. data/test/hpricot/test_xml.rb +30 -0
  214. data/test/html/sax/test_parser.rb +52 -0
  215. data/test/html/test_builder.rb +156 -0
  216. data/test/html/test_document.rb +361 -0
  217. data/test/html/test_document_encoding.rb +46 -0
  218. data/test/html/test_document_fragment.rb +97 -0
  219. data/test/html/test_element_description.rb +95 -0
  220. data/test/html/test_named_characters.rb +14 -0
  221. data/test/html/test_node.rb +165 -0
  222. data/test/test_convert_xpath.rb +186 -0
  223. data/test/test_css_cache.rb +56 -0
  224. data/test/test_gc.rb +15 -0
  225. data/test/test_memory_leak.rb +77 -0
  226. data/test/test_nokogiri.rb +127 -0
  227. data/test/test_reader.rb +316 -0
  228. data/test/test_xslt_transforms.rb +131 -0
  229. data/test/xml/node/test_save_options.rb +20 -0
  230. data/test/xml/node/test_subclass.rb +44 -0
  231. data/test/xml/sax/test_parser.rb +169 -0
  232. data/test/xml/sax/test_push_parser.rb +92 -0
  233. data/test/xml/test_attr.rb +38 -0
  234. data/test/xml/test_builder.rb +73 -0
  235. data/test/xml/test_cdata.rb +38 -0
  236. data/test/xml/test_comment.rb +23 -0
  237. data/test/xml/test_document.rb +397 -0
  238. data/test/xml/test_document_encoding.rb +26 -0
  239. data/test/xml/test_document_fragment.rb +76 -0
  240. data/test/xml/test_dtd.rb +42 -0
  241. data/test/xml/test_dtd_encoding.rb +31 -0
  242. data/test/xml/test_entity_reference.rb +21 -0
  243. data/test/xml/test_namespace.rb +43 -0
  244. data/test/xml/test_node.rb +808 -0
  245. data/test/xml/test_node_attributes.rb +34 -0
  246. data/test/xml/test_node_encoding.rb +84 -0
  247. data/test/xml/test_node_set.rb +368 -0
  248. data/test/xml/test_parse_options.rb +52 -0
  249. data/test/xml/test_processing_instruction.rb +30 -0
  250. data/test/xml/test_reader_encoding.rb +126 -0
  251. data/test/xml/test_relax_ng.rb +60 -0
  252. data/test/xml/test_schema.rb +65 -0
  253. data/test/xml/test_text.rb +18 -0
  254. data/test/xml/test_unparented_node.rb +381 -0
  255. data/test/xml/test_xpath.rb +106 -0
  256. metadata +409 -0
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def test_roundtrip
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
10
+ ele = @basic.at(css_sel)
11
+ assert_equal ele, @basic.at(ele.css_path), ele.css_path
12
+ assert_equal ele, @basic.at(ele.xpath), ele.xpath
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,77 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestPreserved < Nokogiri::TestCase
5
+ def assert_roundtrip str
6
+ doc = Nokogiri.Hpricot(str)
7
+ yield doc if block_given?
8
+ str2 = doc.to_original_html
9
+ [*str].zip([*str2]).each do |s1, s2|
10
+ assert_equal s1, s2
11
+ end
12
+ end
13
+
14
+ def assert_html str1, str2
15
+ doc = Nokogiri.Hpricot(str2)
16
+ yield doc if block_given?
17
+ assert_equal str1, doc.to_original_html
18
+ end
19
+
20
+ ####
21
+ # Not supporting to_original_html
22
+ #def test_simple
23
+ # str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
24
+ # assert_html str, str
25
+ # assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
26
+ # (doc/:p).set('class', 'new')
27
+ # end
28
+ #end
29
+
30
+ ####
31
+ # Not supporting to_original_html
32
+ #def test_parent
33
+ # str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
34
+ # assert_html str, str
35
+ # assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
36
+ # (doc/:head).remove
37
+ # (doc/:div).set('id', 'all')
38
+ # (doc/:p).wrap('<div></div>')
39
+ # end
40
+ #end
41
+
42
+ # Not really a valid test. If libxml can figure out the encoding of the file,
43
+ # it will use that encoding, otherwise it uses the &#xwhatever so that no data
44
+ # is lost.
45
+ #
46
+ # libxml on OSX can't figure out the encoding, so this tests passes. linux
47
+ # can figure out the encoding, so it fails.
48
+ #def test_escaping_of_contents
49
+ # doc = Nokogiri.Hpricot(TestFiles::BOINGBOING)
50
+ # assert_equal "Fukuda&#x2019;s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
51
+ #end
52
+
53
+ ####
54
+ # Modified. No.
55
+ #def test_files
56
+ # assert_roundtrip TestFiles::BASIC
57
+ # assert_roundtrip TestFiles::BOINGBOING
58
+ # assert_roundtrip TestFiles::CY0
59
+ #end
60
+
61
+ ####
62
+ # Modified.. When calling "to_html" on the document, proper html/doc tags
63
+ # are produced too.
64
+ def test_escaping_of_attrs
65
+ # ampersands in URLs
66
+ str = %{<a href="http://google.com/search?q=nokogiri&amp;l=en">Google</a>}
67
+ link = (doc = Nokogiri.Hpricot(str)).at(:a)
68
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
69
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
70
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link.raw_attributes['href']
71
+ assert_equal str, link.to_html
72
+
73
+ # alter the url
74
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
75
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, link.to_html.gsub(/%22/, '&quot;')
76
+ end
77
+ end
@@ -0,0 +1,30 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+ # normally, the link tags are empty HTML tags.
7
+ # contributed by laudney.
8
+ def test_normally_empty
9
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
10
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
11
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
12
+ end
13
+
14
+ # make sure XML doesn't get downcased
15
+ def test_casing
16
+ doc = Hpricot::XML(TestFiles::WHY)
17
+
18
+ ### Modified.
19
+ # I don't want to differentiate pseudo classes from namespaces. If
20
+ # you're parsing xml, use XPath. That's what its for. :-P
21
+ assert_equal "hourly", (doc.at "//sy:updatePeriod").content
22
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
23
+ end
24
+
25
+ # be sure tags named "text" are ok
26
+ def test_text_tags
27
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
28
+ assert_equal "City Poisoned", (doc/"title").text
29
+ end
30
+ end
@@ -0,0 +1,52 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ module SAX
6
+ class TestParser < Nokogiri::SAX::TestCase
7
+ def setup
8
+ super
9
+ @parser = HTML::SAX::Parser.new(Doc.new)
10
+ end
11
+
12
+ def test_parse_file
13
+ @parser.parse_file(HTML_FILE)
14
+ assert_equal 1110, @parser.document.end_elements.length
15
+ end
16
+
17
+ def test_parse_file_nil_argument
18
+ assert_raises(ArgumentError) {
19
+ @parser.parse_file(nil)
20
+ }
21
+ end
22
+
23
+ def test_parse_file_non_existant
24
+ assert_raise Errno::ENOENT do
25
+ @parser.parse_file('foo')
26
+ end
27
+ end
28
+
29
+ def test_parse_file_with_dir
30
+ assert_raise Errno::EISDIR do
31
+ @parser.parse_file(File.dirname(__FILE__))
32
+ end
33
+ end
34
+
35
+ def test_parse_memory_nil
36
+ assert_raise ArgumentError do
37
+ @parser.parse_memory(nil)
38
+ end
39
+ end
40
+
41
+ def test_parse_document
42
+ @parser.parse_memory(<<-eoxml)
43
+ <p>Paragraph 1</p>
44
+ <p>Paragraph 2</p>
45
+ eoxml
46
+ assert_equal([["html", []], ["body", []], ["p", []], ["p", []]],
47
+ @parser.document.start_elements)
48
+ end
49
+ end
50
+ end
51
+ end
52
+ end
@@ -0,0 +1,156 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestBuilder < Nokogiri::TestCase
6
+ def test_builder_with_explicit_tags
7
+ html_doc = Nokogiri::HTML::Builder.new {
8
+ div.slide(:class => 'another_class') {
9
+ node = Nokogiri::XML::Node.new("id", doc)
10
+ node.content = "hello"
11
+ insert(node)
12
+ }
13
+ }.doc
14
+ assert_equal 1, html_doc.css('div.slide > id').length
15
+ assert_equal 'hello', html_doc.at('div.slide > id').content
16
+ end
17
+
18
+ def test_hash_as_attributes_for_attribute_method
19
+ html = Nokogiri::HTML::Builder.new { ||
20
+ div.slide(:class => 'another_class') {
21
+ span 'Slide 1'
22
+ }
23
+ }.to_html
24
+ assert_match 'class="slide another_class"', html
25
+ end
26
+
27
+ def test_hash_as_attributes
28
+ builder = Nokogiri::HTML::Builder.new do
29
+ div(:id => 'awesome') {
30
+ h1 "america"
31
+ }
32
+ end
33
+ assert_equal('<div id="awesome"><h1>america</h1></div>',
34
+ builder.doc.root.to_html.gsub(/\n/, '').gsub(/>\s*</, '><'))
35
+ end
36
+
37
+ def test_href_with_attributes
38
+ uri = 'http://tenderlovemaking.com/'
39
+ built = Nokogiri::XML::Builder.new {
40
+ div {
41
+ a('King Khan & The Shrines', :href => uri)
42
+ }
43
+ }
44
+ assert_equal 'http://tenderlovemaking.com/',
45
+ built.doc.at('a')[:href]
46
+ end
47
+
48
+ def test_tag_nesting
49
+ builder = Nokogiri::HTML::Builder.new do
50
+ span.left ''
51
+ span.middle {
52
+ div.icon ''
53
+ }
54
+ span.right ''
55
+ end
56
+ assert node = builder.doc.css('span.right').first
57
+ assert_equal 'middle', node.previous_sibling['class']
58
+ end
59
+
60
+ def test_has_ampersand
61
+ builder = Nokogiri::HTML::Builder.new do
62
+ div.rad.thing! {
63
+ text "<awe&some>"
64
+ b "hello & world"
65
+ }
66
+ end
67
+ assert_equal(
68
+ '<div class="rad" id="thing">&lt;awe&amp;some&gt;<b>hello &amp; world</b></div>',
69
+ builder.doc.root.to_html.gsub(/\n/, ''))
70
+ end
71
+
72
+ def test_multi_tags
73
+ builder = Nokogiri::HTML::Builder.new do
74
+ div.rad.thing! {
75
+ text "<awesome>"
76
+ b "hello"
77
+ }
78
+ end
79
+ assert_equal(
80
+ '<div class="rad" id="thing">&lt;awesome&gt;<b>hello</b></div>',
81
+ builder.doc.root.to_html.gsub(/\n/, ''))
82
+ end
83
+
84
+ def test_attributes_plus_block
85
+ builder = Nokogiri::HTML::Builder.new do
86
+ div.rad.thing! {
87
+ text "<awesome>"
88
+ }
89
+ end
90
+ assert_equal('<div class="rad" id="thing">&lt;awesome&gt;</div>',
91
+ builder.doc.root.to_html.chomp)
92
+ end
93
+
94
+ def test_builder_adds_attributes
95
+ builder = Nokogiri::HTML::Builder.new do
96
+ div.rad.thing! "tender div"
97
+ end
98
+ assert_equal('<div class="rad" id="thing">tender div</div>',
99
+ builder.doc.root.to_html.chomp)
100
+ end
101
+
102
+ def test_bold_tag
103
+ builder = Nokogiri::HTML::Builder.new do
104
+ b "bold tag"
105
+ end
106
+ assert_equal('<b>bold tag</b>', builder.doc.root.to_html.chomp)
107
+ end
108
+
109
+ def test_html_then_body_tag
110
+ builder = Nokogiri::HTML::Builder.new do
111
+ html {
112
+ body {
113
+ b "bold tag"
114
+ }
115
+ }
116
+ end
117
+ assert_equal('<html><body><b>bold tag</b></body></html>',
118
+ builder.doc.root.to_html.chomp.gsub(/>\s*</, '><'))
119
+ end
120
+
121
+ def test_instance_eval_with_delegation_to_block_context
122
+ class << self
123
+ def foo
124
+ "foo!"
125
+ end
126
+ end
127
+
128
+ builder = Nokogiri::HTML::Builder.new { text foo }
129
+ assert builder.to_html.include?("foo!")
130
+ end
131
+
132
+ def test_builder_with_param
133
+ doc = Nokogiri::HTML::Builder.new { |html|
134
+ html.body {
135
+ html.p "hello world"
136
+ }
137
+ }.doc
138
+
139
+ assert node = doc.xpath('//body/p').first
140
+ assert_equal 'hello world', node.content
141
+ end
142
+
143
+ def test_builder_with_id
144
+ text = "hello world"
145
+ doc = Nokogiri::HTML::Builder.new { |html|
146
+ html.body {
147
+ html.id_ text
148
+ }
149
+ }.doc
150
+
151
+ assert node = doc.xpath('//body/id').first
152
+ assert_equal text, node.content
153
+ end
154
+ end
155
+ end
156
+ end
@@ -0,0 +1,361 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ module Nokogiri
4
+ module HTML
5
+ class TestDocument < Nokogiri::TestCase
6
+ def setup
7
+ super
8
+ @html = Nokogiri::HTML.parse(File.read(HTML_FILE))
9
+ end
10
+
11
+ def test_document_takes_config_block
12
+ options = nil
13
+ Nokogiri::HTML(File.read(HTML_FILE), HTML_FILE) do |cfg|
14
+ options = cfg
15
+ options.nonet.nowarning.dtdattr
16
+ end
17
+ assert options.nonet?
18
+ assert options.nowarning?
19
+ assert options.dtdattr?
20
+ end
21
+
22
+ def test_parse_takes_config_block
23
+ options = nil
24
+ Nokogiri::HTML.parse(File.read(HTML_FILE), HTML_FILE) do |cfg|
25
+ options = cfg
26
+ options.nonet.nowarning.dtdattr
27
+ end
28
+ assert options.nonet?
29
+ assert options.nowarning?
30
+ assert options.dtdattr?
31
+ end
32
+
33
+ def test_subclass
34
+ klass = Class.new(Nokogiri::HTML::Document)
35
+ doc = klass.new
36
+ assert_instance_of klass, doc
37
+ end
38
+
39
+ def test_subclass_initialize
40
+ klass = Class.new(Nokogiri::HTML::Document) do
41
+ attr_accessor :initialized_with
42
+
43
+ def initialize(*args)
44
+ @initialized_with = args
45
+ end
46
+ end
47
+ doc = klass.new("uri", "external_id", 1)
48
+ assert_equal ["uri", "external_id", 1], doc.initialized_with
49
+ end
50
+
51
+ def test_subclass_dup
52
+ klass = Class.new(Nokogiri::HTML::Document)
53
+ doc = klass.new.dup
54
+ assert_instance_of klass, doc
55
+ end
56
+
57
+ def test_subclass_parse
58
+ klass = Class.new(Nokogiri::HTML::Document)
59
+ doc = klass.parse(File.read(HTML_FILE))
60
+ assert_equal @html.to_s, doc.to_s
61
+ assert_instance_of klass, doc
62
+ end
63
+
64
+ def test_document_parse_method
65
+ html = Nokogiri::HTML::Document.parse(File.read(HTML_FILE))
66
+ assert_equal @html.to_s, html.to_s
67
+ end
68
+
69
+ def test_empty_string_returns_empty_doc
70
+ doc = Nokogiri::HTML('')
71
+ end
72
+
73
+ unless %w[2 6] === LIBXML_VERSION.split('.')[0..1]
74
+ # FIXME: this is a hack around broken libxml versions
75
+ def test_to_xhtml_with_indent
76
+ doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
77
+ doc = Nokogiri::HTML(doc.to_xhtml(:indent => 2))
78
+ assert_indent 2, doc
79
+ end
80
+
81
+ def test_write_to_xhtml_with_indent
82
+ io = StringIO.new
83
+ doc = Nokogiri::HTML('<html><body><a>foo</a></body></html>')
84
+ doc.write_xhtml_to io, :indent => 5
85
+ io.rewind
86
+ doc = Nokogiri::HTML(io.read)
87
+ assert_indent 5, doc
88
+ end
89
+ end
90
+
91
+ def test_swap_should_not_exist
92
+ assert_raises(NoMethodError) {
93
+ @html.swap
94
+ }
95
+ end
96
+
97
+ def test_namespace_should_not_exist
98
+ assert_raises(NoMethodError) {
99
+ @html.namespace
100
+ }
101
+ end
102
+
103
+ def test_meta_encoding
104
+ assert_equal 'UTF-8', @html.meta_encoding
105
+ end
106
+
107
+ def test_meta_encoding=
108
+ @html.meta_encoding = 'EUC-JP'
109
+ assert_equal 'EUC-JP', @html.meta_encoding
110
+ end
111
+
112
+ def test_root_node_parent_is_document
113
+ parent = @html.root.parent
114
+ assert_equal @html, parent
115
+ assert_instance_of Nokogiri::HTML::Document, parent
116
+ end
117
+
118
+ def test_parse_empty_document
119
+ doc = Nokogiri::HTML("\n")
120
+ assert_equal 0, doc.css('a').length
121
+ assert_equal 0, doc.xpath('//a').length
122
+ assert_equal 0, doc.search('//a').length
123
+ end
124
+
125
+ def test_HTML_function
126
+ html = Nokogiri::HTML(File.read(HTML_FILE))
127
+ assert html.html?
128
+ end
129
+
130
+ def test_parse_io
131
+ assert doc = File.open(HTML_FILE, 'rb') { |f|
132
+ Document.read_io(f, nil, 'UTF-8',
133
+ XML::ParseOptions::NOERROR | XML::ParseOptions::NOWARNING
134
+ )
135
+ }
136
+ end
137
+
138
+ def test_to_xhtml
139
+ assert_match 'XHTML', @html.to_xhtml
140
+ assert_match 'XHTML', @html.to_xhtml(:encoding => 'UTF-8')
141
+ assert_match 'UTF-8', @html.to_xhtml(:encoding => 'UTF-8')
142
+ end
143
+
144
+ def test_no_xml_header
145
+ html = Nokogiri::HTML(<<-eohtml)
146
+ <html>
147
+ </html>
148
+ eohtml
149
+ assert html.to_html.length > 0, 'html length is too short'
150
+ assert_no_match(/^<\?xml/, html.to_html)
151
+ end
152
+
153
+ def test_document_has_error
154
+ html = Nokogiri::HTML(<<-eohtml)
155
+ <html>
156
+ <body>
157
+ <div awesome="asdf>
158
+ <p>inside div tag</p>
159
+ </div>
160
+ <p>outside div tag</p>
161
+ </body>
162
+ </html>
163
+ eohtml
164
+ assert html.errors.length > 0
165
+ end
166
+
167
+ def test_relative_css
168
+ html = Nokogiri::HTML(<<-eohtml)
169
+ <html>
170
+ <body>
171
+ <div>
172
+ <p>inside div tag</p>
173
+ </div>
174
+ <p>outside div tag</p>
175
+ </body>
176
+ </html>
177
+ eohtml
178
+ set = html.search('div').search('p')
179
+ assert_equal(1, set.length)
180
+ assert_equal('inside div tag', set.first.inner_text)
181
+ end
182
+
183
+ def test_multi_css
184
+ html = Nokogiri::HTML(<<-eohtml)
185
+ <html>
186
+ <body>
187
+ <div>
188
+ <p>p tag</p>
189
+ <a>a tag</a>
190
+ </div>
191
+ </body>
192
+ </html>
193
+ eohtml
194
+ set = html.css('p, a')
195
+ assert_equal(2, set.length)
196
+ assert_equal ['a tag', 'p tag'].sort, set.map { |x| x.content }.sort
197
+ end
198
+
199
+ def test_inner_text
200
+ html = Nokogiri::HTML(<<-eohtml)
201
+ <html>
202
+ <body>
203
+ <div>
204
+ <p>
205
+ Hello world!
206
+ </p>
207
+ </div>
208
+ </body>
209
+ </html>
210
+ eohtml
211
+ node = html.xpath('//div').first
212
+ assert_equal('Hello world!', node.inner_text.strip)
213
+ end
214
+
215
+ def test_find_by_xpath
216
+ found = @html.xpath('//div/a')
217
+ assert_equal 3, found.length
218
+ end
219
+
220
+ def test_find_by_css
221
+ found = @html.css('div > a')
222
+ assert_equal 3, found.length
223
+ end
224
+
225
+ def test_find_by_css_with_square_brackets
226
+ found = @html.css("div[@id='header'] > h1")
227
+ found = @html.css("div[@id='header'] h1") # this blows up on commit 6fa0f6d329d9dbf1cc21c0ac72f7e627bb4c05fc
228
+ assert_equal 1, found.length
229
+ end
230
+
231
+ def test_find_with_function
232
+ found = @html.css("div:awesome() h1", Class.new {
233
+ def awesome divs
234
+ [divs.first]
235
+ end
236
+ }.new)
237
+ end
238
+
239
+ def test_dup_shallow
240
+ found = @html.search('//div/a').first
241
+ dup = found.dup(0)
242
+ assert dup
243
+ assert_equal '', dup.content
244
+ end
245
+
246
+ def test_search_can_handle_xpath_and_css
247
+ found = @html.search('//div/a', 'div > p')
248
+ length = @html.xpath('//div/a').length +
249
+ @html.css('div > p').length
250
+ assert_equal length, found.length
251
+ end
252
+
253
+ def test_dup_document
254
+ assert dup = @html.dup
255
+ assert_not_equal dup, @html
256
+ assert @html.html?
257
+ assert_instance_of Nokogiri::HTML::Document, dup
258
+ assert dup.html?, 'duplicate should be html'
259
+ assert_equal @html.to_s, dup.to_s
260
+ end
261
+
262
+ def test_dup_document_shallow
263
+ assert dup = @html.dup(0)
264
+ assert_not_equal dup, @html
265
+ end
266
+
267
+ def test_dup
268
+ found = @html.search('//div/a').first
269
+ dup = found.dup
270
+ assert dup
271
+ assert_equal found.content, dup.content
272
+ assert_equal found.document, dup.document
273
+ end
274
+
275
+ def test_inner_html
276
+ html = Nokogiri::HTML(<<-eohtml)
277
+ <html>
278
+ <body>
279
+ <div>
280
+ <p>
281
+ Hello world!
282
+ </p>
283
+ </div>
284
+ </body>
285
+ </html>
286
+ eohtml
287
+ node = html.xpath('//div').first
288
+ assert_equal('<p>Helloworld!</p>', node.inner_html.gsub(/\s/, ''))
289
+ end
290
+
291
+ def test_fragment_contains_text_node
292
+ fragment = Nokogiri::HTML.fragment('fooo')
293
+ assert_equal 1, fragment.children.length
294
+ assert_equal 'fooo', fragment.inner_text
295
+ end
296
+
297
+ def test_fragment_includes_two_tags
298
+ assert_equal 2, Nokogiri::HTML.fragment("<br/><hr/>").children.length
299
+ end
300
+
301
+ def test_relative_css_finder
302
+ doc = Nokogiri::HTML(<<-eohtml)
303
+ <html>
304
+ <body>
305
+ <div class="red">
306
+ <p>
307
+ inside red
308
+ </p>
309
+ </div>
310
+ <div class="green">
311
+ <p>
312
+ inside green
313
+ </p>
314
+ </div>
315
+ </body>
316
+ </html>
317
+ eohtml
318
+ red_divs = doc.css('div.red')
319
+ assert_equal 1, red_divs.length
320
+ p_tags = red_divs.first.css('p')
321
+ assert_equal 1, p_tags.length
322
+ assert_equal 'inside red', p_tags.first.text.strip
323
+ end
324
+
325
+ def test_find_classes
326
+ doc = Nokogiri::HTML(<<-eohtml)
327
+ <html>
328
+ <body>
329
+ <p class="red">RED</p>
330
+ <p class="awesome red">RED</p>
331
+ <p class="notred">GREEN</p>
332
+ <p class="green notred">GREEN</p>
333
+ </body>
334
+ </html>
335
+ eohtml
336
+ list = doc.css('.red')
337
+ assert_equal 2, list.length
338
+ assert_equal %w{ RED RED }, list.map { |x| x.text }
339
+ end
340
+
341
+ def test_parse_can_take_io
342
+ html = nil
343
+ File.open(HTML_FILE, 'rb') { |f|
344
+ html = Nokogiri::HTML(f)
345
+ }
346
+ assert html.html?
347
+ end
348
+
349
+ def test_html?
350
+ assert !@html.xml?
351
+ assert @html.html?
352
+ end
353
+
354
+ def test_serialize
355
+ assert @html.serialize
356
+ assert @html.to_html
357
+ end
358
+ end
359
+ end
360
+ end
361
+