nokogiri 1.3.3 → 1.4.0

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (201) hide show
  1. data/CHANGELOG.ja.rdoc +48 -3
  2. data/CHANGELOG.rdoc +42 -0
  3. data/Manifest.txt +44 -29
  4. data/README.ja.rdoc +0 -2
  5. data/README.rdoc +4 -7
  6. data/Rakefile +42 -6
  7. data/bin/nokogiri +7 -5
  8. data/ext/nokogiri/extconf.rb +5 -21
  9. data/ext/nokogiri/html_document.c +14 -50
  10. data/ext/nokogiri/html_element_description.c +7 -7
  11. data/ext/nokogiri/html_entity_lookup.c +6 -4
  12. data/ext/nokogiri/html_sax_parser_context.c +92 -0
  13. data/ext/nokogiri/html_sax_parser_context.h +11 -0
  14. data/ext/nokogiri/nokogiri.c +9 -3
  15. data/ext/nokogiri/nokogiri.h +16 -20
  16. data/ext/nokogiri/xml_attr.c +1 -1
  17. data/ext/nokogiri/xml_attribute_decl.c +67 -0
  18. data/ext/nokogiri/xml_attribute_decl.h +9 -0
  19. data/ext/nokogiri/xml_cdata.c +6 -5
  20. data/ext/nokogiri/xml_comment.c +3 -2
  21. data/ext/nokogiri/xml_document.c +93 -23
  22. data/ext/nokogiri/xml_document_fragment.c +1 -3
  23. data/ext/nokogiri/xml_dtd.c +63 -6
  24. data/ext/nokogiri/xml_element_content.c +123 -0
  25. data/ext/nokogiri/xml_element_content.h +10 -0
  26. data/ext/nokogiri/xml_element_decl.c +69 -0
  27. data/ext/nokogiri/xml_element_decl.h +9 -0
  28. data/ext/nokogiri/xml_entity_decl.c +97 -0
  29. data/ext/nokogiri/xml_entity_decl.h +10 -0
  30. data/ext/nokogiri/xml_entity_reference.c +1 -1
  31. data/ext/nokogiri/xml_io.c +10 -3
  32. data/ext/nokogiri/xml_io.h +1 -0
  33. data/ext/nokogiri/xml_namespace.c +2 -2
  34. data/ext/nokogiri/xml_node.c +139 -34
  35. data/ext/nokogiri/xml_node.h +0 -1
  36. data/ext/nokogiri/xml_node_set.c +23 -16
  37. data/ext/nokogiri/xml_processing_instruction.c +1 -1
  38. data/ext/nokogiri/xml_reader.c +78 -50
  39. data/ext/nokogiri/xml_sax_parser.c +109 -168
  40. data/ext/nokogiri/xml_sax_parser.h +33 -0
  41. data/ext/nokogiri/xml_sax_parser_context.c +155 -0
  42. data/ext/nokogiri/xml_sax_parser_context.h +10 -0
  43. data/ext/nokogiri/xml_sax_push_parser.c +11 -6
  44. data/ext/nokogiri/xml_syntax_error.c +63 -12
  45. data/ext/nokogiri/xml_text.c +4 -3
  46. data/ext/nokogiri/xml_xpath.c +1 -1
  47. data/ext/nokogiri/xml_xpath_context.c +12 -25
  48. data/ext/nokogiri/xslt_stylesheet.c +3 -3
  49. data/lib/nokogiri.rb +4 -4
  50. data/lib/nokogiri/css/generated_tokenizer.rb +1 -0
  51. data/lib/nokogiri/css/node.rb +1 -9
  52. data/lib/nokogiri/css/xpath_visitor.rb +11 -21
  53. data/lib/nokogiri/ffi/html/document.rb +0 -9
  54. data/lib/nokogiri/ffi/html/sax/parser_context.rb +38 -0
  55. data/lib/nokogiri/ffi/io_callbacks.rb +4 -2
  56. data/lib/nokogiri/ffi/libxml.rb +44 -10
  57. data/lib/nokogiri/ffi/structs/common_node.rb +1 -1
  58. data/lib/nokogiri/ffi/structs/xml_attribute.rb +27 -0
  59. data/lib/nokogiri/ffi/structs/xml_dtd.rb +3 -1
  60. data/lib/nokogiri/ffi/structs/xml_element.rb +26 -0
  61. data/lib/nokogiri/ffi/structs/xml_element_content.rb +17 -0
  62. data/lib/nokogiri/ffi/structs/xml_entity.rb +32 -0
  63. data/lib/nokogiri/ffi/structs/xml_enumeration.rb +12 -0
  64. data/lib/nokogiri/ffi/structs/xml_parser_context.rb +19 -0
  65. data/lib/nokogiri/ffi/structs/xml_sax_push_parser_context.rb +4 -3
  66. data/lib/nokogiri/ffi/structs/xml_syntax_error.rb +1 -1
  67. data/lib/nokogiri/ffi/xml/attribute_decl.rb +27 -0
  68. data/lib/nokogiri/ffi/xml/comment.rb +2 -2
  69. data/lib/nokogiri/ffi/xml/document.rb +29 -12
  70. data/lib/nokogiri/ffi/xml/document_fragment.rb +0 -5
  71. data/lib/nokogiri/ffi/xml/dtd.rb +14 -3
  72. data/lib/nokogiri/ffi/xml/element_content.rb +43 -0
  73. data/lib/nokogiri/ffi/xml/element_decl.rb +19 -0
  74. data/lib/nokogiri/ffi/xml/entity_decl.rb +27 -0
  75. data/lib/nokogiri/ffi/xml/node.rb +45 -5
  76. data/lib/nokogiri/ffi/xml/node_set.rb +1 -1
  77. data/lib/nokogiri/ffi/xml/reader.rb +45 -24
  78. data/lib/nokogiri/ffi/xml/sax/parser.rb +27 -34
  79. data/lib/nokogiri/ffi/xml/sax/parser_context.rb +67 -0
  80. data/lib/nokogiri/ffi/xml/sax/push_parser.rb +5 -4
  81. data/lib/nokogiri/ffi/xml/syntax_error.rb +31 -16
  82. data/lib/nokogiri/ffi/xml/text.rb +2 -2
  83. data/lib/nokogiri/html.rb +1 -0
  84. data/lib/nokogiri/html/document.rb +39 -24
  85. data/lib/nokogiri/html/sax/parser.rb +2 -2
  86. data/lib/nokogiri/html/sax/parser_context.rb +16 -0
  87. data/lib/nokogiri/version.rb +1 -1
  88. data/lib/nokogiri/xml.rb +6 -1
  89. data/lib/nokogiri/xml/attr.rb +5 -0
  90. data/lib/nokogiri/xml/attribute_decl.rb +18 -0
  91. data/lib/nokogiri/xml/builder.rb +121 -13
  92. data/lib/nokogiri/xml/character_data.rb +7 -0
  93. data/lib/nokogiri/xml/document.rb +43 -29
  94. data/lib/nokogiri/xml/document_fragment.rb +26 -6
  95. data/lib/nokogiri/xml/dtd.rb +5 -5
  96. data/lib/nokogiri/xml/element_content.rb +36 -0
  97. data/lib/nokogiri/xml/element_decl.rb +13 -0
  98. data/lib/nokogiri/xml/entity_decl.rb +15 -0
  99. data/lib/nokogiri/xml/fragment_handler.rb +22 -11
  100. data/lib/nokogiri/xml/namespace.rb +6 -0
  101. data/lib/nokogiri/xml/node.rb +33 -15
  102. data/lib/nokogiri/xml/node_set.rb +66 -44
  103. data/lib/nokogiri/xml/pp.rb +2 -0
  104. data/lib/nokogiri/xml/pp/character_data.rb +18 -0
  105. data/lib/nokogiri/xml/pp/node.rb +56 -0
  106. data/lib/nokogiri/xml/reader.rb +8 -0
  107. data/lib/nokogiri/xml/sax.rb +1 -1
  108. data/lib/nokogiri/xml/sax/document.rb +18 -1
  109. data/lib/nokogiri/xml/sax/parser.rb +15 -8
  110. data/lib/nokogiri/xml/sax/parser_context.rb +16 -0
  111. data/lib/nokogiri/xml/sax/push_parser.rb +0 -3
  112. data/lib/nokogiri/xml/syntax_error.rb +4 -0
  113. data/lib/nokogiri/xslt/stylesheet.rb +1 -1
  114. data/test/css/test_nthiness.rb +1 -1
  115. data/test/css/test_parser.rb +1 -1
  116. data/test/css/test_tokenizer.rb +1 -1
  117. data/test/css/test_xpath_visitor.rb +1 -1
  118. data/test/ffi/test_document.rb +1 -1
  119. data/test/files/shift_jis.html +10 -0
  120. data/test/files/staff.dtd +10 -0
  121. data/test/helper.rb +12 -3
  122. data/test/html/sax/test_parser.rb +1 -1
  123. data/test/html/sax/test_parser_context.rb +48 -0
  124. data/test/html/test_builder.rb +8 -2
  125. data/test/html/test_document.rb +23 -1
  126. data/test/html/test_document_encoding.rb +15 -1
  127. data/test/html/test_document_fragment.rb +10 -1
  128. data/test/html/test_element_description.rb +1 -2
  129. data/test/html/test_named_characters.rb +1 -1
  130. data/test/html/test_node.rb +61 -1
  131. data/test/html/test_node_encoding.rb +27 -0
  132. data/test/test_convert_xpath.rb +1 -3
  133. data/test/test_css_cache.rb +1 -1
  134. data/test/test_gc.rb +1 -1
  135. data/test/test_memory_leak.rb +1 -1
  136. data/test/test_nokogiri.rb +3 -3
  137. data/test/test_reader.rb +29 -1
  138. data/test/test_xslt_transforms.rb +1 -1
  139. data/test/xml/node/test_save_options.rb +1 -1
  140. data/test/xml/node/test_subclass.rb +1 -1
  141. data/test/xml/sax/test_parser.rb +64 -3
  142. data/test/xml/sax/test_parser_context.rb +56 -0
  143. data/test/xml/sax/test_push_parser.rb +11 -1
  144. data/test/xml/test_attr.rb +1 -1
  145. data/test/xml/test_attribute_decl.rb +82 -0
  146. data/test/xml/test_builder.rb +95 -1
  147. data/test/xml/test_cdata.rb +1 -1
  148. data/test/xml/test_comment.rb +7 -1
  149. data/test/xml/test_document.rb +147 -6
  150. data/test/xml/test_document_encoding.rb +1 -1
  151. data/test/xml/test_document_fragment.rb +55 -5
  152. data/test/xml/test_dtd.rb +40 -5
  153. data/test/xml/test_dtd_encoding.rb +3 -1
  154. data/test/xml/test_element_content.rb +56 -0
  155. data/test/xml/test_element_decl.rb +73 -0
  156. data/test/xml/test_entity_decl.rb +83 -0
  157. data/test/xml/test_entity_reference.rb +1 -1
  158. data/test/xml/test_namespace.rb +21 -1
  159. data/test/xml/test_node.rb +70 -4
  160. data/test/xml/test_node_attributes.rb +1 -1
  161. data/test/xml/test_node_encoding.rb +1 -1
  162. data/test/xml/test_node_set.rb +136 -2
  163. data/test/xml/test_parse_options.rb +1 -1
  164. data/test/xml/test_processing_instruction.rb +1 -1
  165. data/test/xml/test_reader_encoding.rb +1 -1
  166. data/test/xml/test_relax_ng.rb +1 -1
  167. data/test/xml/test_schema.rb +1 -1
  168. data/test/xml/test_syntax_error.rb +27 -0
  169. data/test/xml/test_text.rb +13 -1
  170. data/test/xml/test_unparented_node.rb +1 -1
  171. data/test/xml/test_xpath.rb +1 -1
  172. metadata +57 -40
  173. data/ext/nokogiri/html_sax_parser.c +0 -57
  174. data/ext/nokogiri/html_sax_parser.h +0 -11
  175. data/lib/action-nokogiri.rb +0 -38
  176. data/lib/nokogiri/decorators.rb +0 -2
  177. data/lib/nokogiri/decorators/hpricot.rb +0 -3
  178. data/lib/nokogiri/decorators/hpricot/node.rb +0 -56
  179. data/lib/nokogiri/decorators/hpricot/node_set.rb +0 -54
  180. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +0 -30
  181. data/lib/nokogiri/ffi/html/sax/parser.rb +0 -21
  182. data/lib/nokogiri/hpricot.rb +0 -92
  183. data/lib/nokogiri/xml/entity_declaration.rb +0 -11
  184. data/lib/nokogiri/xml/sax/legacy_handlers.rb +0 -65
  185. data/test/hpricot/files/basic.xhtml +0 -17
  186. data/test/hpricot/files/boingboing.html +0 -2266
  187. data/test/hpricot/files/cy0.html +0 -3653
  188. data/test/hpricot/files/immob.html +0 -400
  189. data/test/hpricot/files/pace_application.html +0 -1320
  190. data/test/hpricot/files/tenderlove.html +0 -16
  191. data/test/hpricot/files/uswebgen.html +0 -220
  192. data/test/hpricot/files/utf8.html +0 -1054
  193. data/test/hpricot/files/week9.html +0 -1723
  194. data/test/hpricot/files/why.xml +0 -19
  195. data/test/hpricot/load_files.rb +0 -11
  196. data/test/hpricot/test_alter.rb +0 -68
  197. data/test/hpricot/test_builder.rb +0 -20
  198. data/test/hpricot/test_parser.rb +0 -350
  199. data/test/hpricot/test_paths.rb +0 -15
  200. data/test/hpricot/test_preserved.rb +0 -77
  201. data/test/hpricot/test_xml.rb +0 -30
@@ -1,19 +0,0 @@
1
- <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
- <channel>
3
- <title>why the lucky stiff</title>
4
- <link>http://whytheluckystiff.net</link>
5
- <description>hex-editing reality to give us infinite grenades!!</description>
6
- <dc:language>en-us</dc:language>
7
- <dc:creator/>
8
- <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
- <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
- <sy:updatePeriod>hourly</sy:updatePeriod>
11
- <sy:updateFrequency>1</sy:updateFrequency>
12
- <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
- <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
- &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
- &lt;/blockquote&gt;
16
- &lt;blockquote&gt;
17
- &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
- &lt;/blockquote&gt;</description></item></channel>
19
- </rss>
@@ -1,11 +0,0 @@
1
- module TestFiles
2
- Dir.chdir(File.dirname(__FILE__)) do
3
- Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
- if RUBY_VERSION >= '1.9'
5
- const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.open(fname, 'r:ascii-8bit') { |f| f.read }
6
- else
7
- const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, File.read(fname)
8
- end
9
- end
10
- end
11
- end
@@ -1,68 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
- require File.join(File.dirname(__FILE__),"load_files")
3
-
4
- class TestAlter < Nokogiri::TestCase
5
- include Nokogiri
6
-
7
- def setup
8
- super
9
- @basic = Nokogiri::HTML.parse(TestFiles::BASIC)
10
- end
11
-
12
- def test_before
13
- test0 = "<link rel='stylesheet' href='test0.css' />"
14
- @basic.at("link").before(test0)
15
- assert_equal 'test0.css', @basic.at("link").attributes['href'].to_s
16
- end
17
-
18
- def test_after
19
- test_inf = "<link rel='stylesheet' href='test_inf.css' />"
20
- @basic.search("link")[-1].after(test_inf)
21
- assert_equal 'test_inf.css', @basic.search("link")[-1]['href']
22
- end
23
-
24
- def test_wrap
25
- ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
26
- assert_equal 'wrapper', ohmy[0].parent['id']
27
- assert_equal 'ohmy', Nokogiri(@basic.to_html).at("#wrapper").children[0]['class']
28
- end
29
-
30
- def test_add_class
31
- first_p = (@basic/"p:first").add_class("testing123")
32
- assert first_p[0].get_attribute("class").split(" ").include?("testing123")
33
- assert((Nokogiri(@basic.to_html)/"p:first")[0]["class"].split(" ").include?("testing123"))
34
- ####
35
- # Modified. We do not support OB1 bug.
36
- assert !(Nokogiri(@basic.to_html)/"p:gt(1)")[0]["class"].split(" ").include?("testing123")
37
- end
38
-
39
- def test_change_attributes
40
- all_ps = (@basic/"p").attr("title", "Some Title")
41
- all_as = (@basic/"a").attr("href", "http://my_new_href.com")
42
- all_lb = (@basic/"link").attr("href") { |e| e.name }
43
- GC.start # try to shake out GC bugs with xpath and node sets.
44
- assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"].to_s == "Some Title"}
45
- assert_changed(@basic, "a", all_as) {|a| a.attributes["href"].to_s == "http://my_new_href.com"}
46
- assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"].to_s == "link" }
47
- end
48
-
49
- def test_remove_attr
50
- all_rl = (@basic/"link").remove_attr("href")
51
- assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
52
- end
53
-
54
- def test_remove_class
55
- all_c1 = (@basic/"p[@class*='last']").remove_class("last")
56
- assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
57
- end
58
-
59
- def test_remove_all_classes
60
- all_c2 = (@basic/"p[@class]").remove_class
61
- assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
62
- end
63
-
64
- def assert_changed original, selector, set, &block
65
- assert set.all?(&block)
66
- assert Nokogiri(original.to_html).search(selector).all?(&block)
67
- end
68
- end
@@ -1,20 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
-
3
- class TestBuilder < Nokogiri::TestCase
4
- ####
5
- # Modified
6
- def test_escaping_text
7
- doc = Nokogiri() { b "<a\"b>" }
8
- assert_match "<b>&lt;a\"b&gt;</b>", doc.to_html.chomp
9
- assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
10
- end
11
-
12
- ####
13
- # Modified
14
- def test_no_escaping_text
15
- doc = Nokogiri() { div.test.me! { text "<a\"b>" } }
16
- assert_match %{<div class="test" id="me">&lt;a"b&gt;</div>},
17
- doc.to_html.chomp
18
- assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
19
- end
20
- end
@@ -1,350 +0,0 @@
1
- require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
- require File.join(File.dirname(__FILE__),"load_files")
3
-
4
- class TestParser < Nokogiri::TestCase
5
- include Nokogiri
6
-
7
- def test_set_attr
8
- @basic = Nokogiri.parse(TestFiles::BASIC)
9
- @basic.search('//p').set('class', 'para')
10
- assert_equal 4, @basic.search('//p').length
11
- assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
12
- end
13
-
14
- def test_filter_by_attr
15
- @boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
16
-
17
- # this link is escaped in the doc
18
- link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
19
- assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
20
- end
21
-
22
- def test_filter_contains
23
- @basic = Nokogiri.parse(TestFiles::BASIC)
24
- assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s.chomp
25
- end
26
-
27
- def test_get_element_by_id
28
- @basic = Nokogiri.parse(TestFiles::BASIC)
29
- assert_equal 'link1', @basic.at('#link1')['id']
30
- assert_equal 'link1', @basic.at('#body1').at('#link1')['id']
31
- end
32
-
33
- def test_get_element_by_tag_name
34
- @basic = Nokogiri.parse(TestFiles::BASIC)
35
- assert_equal 'link1', @basic.at('a')['id']
36
- assert_equal 'link1', @basic.at('body').at('#link1')['id']
37
- end
38
-
39
- def test_output_basic
40
- @basic = Nokogiri.parse(TestFiles::BASIC)
41
- @basic2 = Nokogiri.parse(@basic.inner_html)
42
- scan_basic @basic2
43
- end
44
-
45
- def test_scan_basic
46
- @basic = Nokogiri.parse(TestFiles::BASIC)
47
- scan_basic @basic
48
- end
49
-
50
- def scan_basic doc
51
- assert_not_equal doc.children.first.to_s, doc.children[1].to_s
52
- assert_equal 'link1', doc.at('#link1')['id']
53
- assert_equal 'link1', doc.at("p a")['id']
54
- assert_equal 'link1', (doc/:p/:a).first['id']
55
- assert_equal 'link1', doc.search('p').at('a')['id']
56
-
57
- assert_equal 'link2', (doc/'p').css('.ohmy').search('a').first['id']
58
- assert_equal((doc/'p')[2], (doc/'p').css('[text()="The third paragraph"]')[0])
59
- assert_equal 3, (doc/'p:not(.ohmy)').length
60
-
61
- assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
62
- assert_equal 2, (doc/'p > a').length
63
- assert_equal 1, (doc/'p.ohmy > a').length
64
- assert_equal 2, (doc/'p / a').length
65
- assert_equal 2, (doc/'link ~ link').length
66
- assert_equal 3, (doc/'title ~ link').length
67
- assert_equal 5, (doc/"//p/text()").length
68
- assert_equal 6, (doc/"//p[a]//text()").length
69
- assert_equal 2, (doc/"//p/a/text()").length
70
- end
71
-
72
- def test_positional
73
- h = Nokogiri( "<div><br/><p>one</p><p>two</p></div>" )
74
- assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s.chomp # MODIFIED: eq(0) -> eq(1), and removed initial '//'
75
- assert_equal "<p>one</p>", h.search("div/p:first").to_s.chomp # MODIFIED: removed initial '//'
76
- assert_equal "<p>one</p>", h.search("div/p:first()").to_s.chomp # MODIFIED: removed initial '//'
77
- end
78
-
79
- def test_pace
80
- doc = Nokogiri(TestFiles::PACE_APPLICATION)
81
- assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
82
- end
83
-
84
- def test_scan_boingboing
85
- @boingboing = Nokogiri.HTML(TestFiles::BOINGBOING)
86
- assert_equal 60, (@boingboing/'p.posted').length
87
- assert_equal 1, @boingboing.search("//a[@name='027906']").length
88
- assert_equal 3, @boingboing.search("a[text()*='Boing']").length
89
- assert_equal 1, @boingboing.search(
90
- "//h3[normalize-space(text())='College kids reportedly taking more smart drugs']"
91
- ).length
92
- assert_equal 0, @boingboing.search("h3[text()='College']").length
93
- assert_equal 60, @boingboing.search("h3").length
94
- assert_equal 59, @boingboing.search("//h3[normalize-space(text())!='College kids reportedly taking more smart drugs']").length
95
- assert_equal 211, @boingboing.search("p").length
96
- end
97
-
98
- def test_reparent
99
- doc = Nokogiri(%{<div id="blurb_1"></div>})
100
- div1 = doc.search('#blurb_1')
101
- div1.before('<div id="blurb_0"></div>')
102
-
103
- div0 = doc.search('#blurb_0')
104
- div0.before('<div id="blurb_a"></div>')
105
-
106
- assert_equal 'div', doc.at('#blurb_1').name
107
- end
108
-
109
- def test_siblings
110
- @basic = Nokogiri.parse(TestFiles::BASIC)
111
- t = @basic.at(:title)
112
- e = t.next_sibling
113
- assert_equal 'test1.css', e['href']
114
- assert_equal 'title', e.previous_sibling.name
115
- end
116
-
117
- def test_css_negation
118
- @basic = Nokogiri.parse(TestFiles::BASIC)
119
- assert_equal 3, (@basic/'p:not(.final)').length
120
- end
121
-
122
- def test_remove_attribute
123
- @basic = Nokogiri.parse(TestFiles::BASIC)
124
- (@basic/:p).each { |ele| ele.remove_attribute('class') }
125
- assert_equal 0, (@basic/'p[@class]').length
126
- end
127
-
128
- def test_abs_xpath
129
- @boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
130
- assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
131
- assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
132
- assert_equal 18, @boingboing.search("//script").length
133
- divs = @boingboing.search("//script/../div")
134
- assert_equal 2, divs.length
135
- imgs = @boingboing.search('//div/p/a/img')
136
- assert_equal 12, imgs.length
137
- assert_equal 16, @boingboing.search('//div').search('p/a/img').length
138
- assert imgs.all? { |x| x.name == 'img' }
139
- end
140
-
141
- def test_predicates
142
- @boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
143
- assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
144
- p_imgs = @boingboing.search('//div/p[/a/img]')
145
- #assert_equal 15, p_imgs.length
146
- assert p_imgs.all? { |x| x.name == 'p' }
147
- p_imgs = @boingboing.search('//div/p[a/img]')
148
- assert_equal 12, p_imgs.length
149
- assert p_imgs.all? { |x| x.name == 'p' }
150
- assert_equal 1, @boingboing.search('//input[@checked]').length
151
- end
152
-
153
- def test_tag_case
154
- @tenderlove = Nokogiri.parse(TestFiles::TENDERLOVE)
155
- assert_equal 2, @tenderlove.search('//a').length
156
- assert_equal 3, @tenderlove.search('//area').length
157
- assert_equal 2, @tenderlove.search('//meta').length
158
- end
159
-
160
- def test_alt_predicates
161
- @boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
162
- assert_equal 2, @boingboing.search('table/tr:last').length
163
-
164
- @basic = Nokogiri.parse(TestFiles::BASIC)
165
- assert_equal "<p>The third paragraph</p>",
166
- @basic.search('p:eq(3)').to_html.chomp
167
- @basic.search('p:last').to_html.gsub(/\s+/,' ').gsub(/>\s*</, '><')
168
- assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class')
169
- end
170
-
171
- def test_insert_after # ticket #63
172
- doc = Nokogiri('<html><body><div id="a-div"></div></body></html>')
173
- (doc/'div').each do |element|
174
- element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
175
- end
176
- assert_match '<div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p>',
177
- doc.to_html.gsub(/\n/, '').gsub(/>\s*</, '><')
178
- end
179
-
180
- def test_insert_before # ticket #61
181
- doc = Nokogiri.HTML('<html><body><div id="a-div"></div></body></html>')
182
- (doc/'div').each do |element|
183
- element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
184
- end
185
- assert_match '<p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div>',
186
- doc.to_html.gsub(/\n/, '').gsub(/>\s*</, '><')
187
- end
188
-
189
- def test_many_paths
190
- @boingboing = Nokogiri.parse(TestFiles::BOINGBOING)
191
- assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
192
- end
193
-
194
- def test_class_search
195
- doc = Nokogiri.HTML("<div class=xyz '>abc</div>")
196
- assert_equal 1, doc.search(".xyz").length
197
-
198
- doc = Nokogiri.HTML("<div class=xyz>abc</div><div class=abc>xyz</div>")
199
- assert_equal 1, doc.search(".xyz").length
200
- assert_equal 4, doc.search("*").length
201
- end
202
-
203
- def test_kleene_star
204
- # bug noticed by raja bhatia
205
- doc = Nokogiri.HTML("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
206
- assert_equal 2, doc.search("*[@class*='small']").length
207
- assert_equal 2, doc.search("*.small").length
208
- assert_equal 2, doc.search(".small").length
209
- assert_equal 2, doc.search(".large").length
210
- end
211
-
212
- def test_empty_comment
213
- doc = Nokogiri.HTML("<p><!----></p>")
214
- doc = doc.search('//body').first
215
- assert doc.children[0].children[0].comment?
216
-
217
- doc = Nokogiri.HTML("<p><!-- --></p>")
218
- doc = doc.search('//body').first
219
- assert doc.children[0].children[0].comment?
220
- end
221
-
222
- def test_body_newlines
223
- @immob = Nokogiri.parse(TestFiles::IMMOB)
224
- body = @immob.at(:body)
225
- {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
226
- 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
227
- 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
228
- assert_equal v, body[k]
229
- end
230
- end
231
-
232
- def test_nested_twins
233
- @doc = Nokogiri("<div>Hi<div>there</div></div>")
234
- assert_equal 1, (@doc/"div div").length
235
- end
236
-
237
- def test_wildcard
238
- @basic = Nokogiri::HTML.parse(TestFiles::BASIC)
239
- assert_equal 3, (@basic/"*[@id]").length
240
- assert_equal 3, (@basic/"//*[@id]").length
241
- end
242
-
243
- def test_javascripts
244
- @immob = Nokogiri::HTML.parse(TestFiles::IMMOB)
245
- assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
246
- end
247
-
248
- ####
249
- # Modified. This test passes with later versions of libxml
250
- def test_nested_scripts
251
- @week9 = Nokogiri.parse(TestFiles::WEEK9)
252
- unless Nokogiri::LIBXML_VERSION == '2.6.16'
253
- assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
254
- end
255
- end
256
-
257
- def test_uswebgen
258
- @uswebgen = HTML.parse(TestFiles::USWEBGEN)
259
- # sent by brent beardsley, nokogiri 0.3 had problems with all the links.
260
- assert_equal 67, (@uswebgen/:a).length
261
- end
262
-
263
- def test_mangled_tags
264
- [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
265
- %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
266
- %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
267
- %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
268
- each do |str|
269
- doc = Nokogiri(str)
270
- assert_equal 1, (doc/:form).length
271
- assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
272
- end
273
- end
274
-
275
- ####
276
- # Modified. Added question. Don't care.
277
- def test_procins
278
- doc = Nokogiri.HTML("<?php print('hello') ?>\n<?xml blah='blah'?>")
279
- assert_equal "php", doc.children[1].name
280
- assert_equal "blah='blah'?", doc.children[2].content #"# quote added so emacs ruby-mode parser doesn't barf
281
- end
282
-
283
- ####
284
- # Altered... libxml does not get a buffer error
285
- def test_buffer_error
286
- assert_nothing_raised {
287
- Nokogiri(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
288
- }
289
- end
290
-
291
- def test_youtube_attr
292
- str = <<-edoc
293
- <html><body>
294
- Lorem ipsum. Jolly roger, ding-dong sing-a-long
295
- <object width="425" height="350">
296
- <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
297
- <param name="wmode" value="transparent"></param>
298
- <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
299
- type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
300
- </embed>
301
- </object>
302
- Check out my posting, I have bright mice in large clown cars.
303
- <object width="425" height="350">
304
- <param name="movie" value="http://www.youtube.com/v/foobar"></param>
305
- <param name="wmode" value="transparent"></param>
306
- <embed src="http://www.youtube.com/v/foobar"
307
- type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
308
- </embed>
309
- </object>
310
- </body></html?
311
- edoc
312
- doc = Nokogiri(str)
313
- assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
314
- doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
315
- end
316
-
317
- # ticket #84 by jamezilla
318
- def test_screwed_xmlns
319
- doc = Nokogiri(<<-edoc)
320
- <?xml:namespace prefix = cwi />
321
- <html><body>HAI</body></html>
322
- edoc
323
- assert_equal "HAI", doc.at("body").inner_text
324
- end
325
-
326
- def test_filters
327
- @basic = Nokogiri.parse(TestFiles::BASIC)
328
- assert_equal 1, (@basic/"title:parent").size
329
- assert_equal 4, (@basic/"p:parent").size
330
- assert_equal 0, (@basic/"title:empty").size
331
- assert_equal 3, (@basic/"link:empty").size
332
- end
333
-
334
- def test_keep_cdata
335
- str = %{<script> /*<![CDATA[*/
336
- /*]]>*/ </script>}
337
- # MODIFIED: if you want the cdata, to_xml it
338
- assert_match str, Nokogiri(str).to_xml
339
- end
340
-
341
- def test_namespace
342
- chunk = <<-END
343
- <a xmlns:t="http://www.nexopia.com/dev/template">
344
- <t:sam>hi </t:sam>
345
- </a>
346
- END
347
- doc = Nokogiri::XML(chunk)
348
- assert((doc/"//t:sam").size > 0)
349
- end
350
- end