tenderlove-nokogiri 0.0.0.20081001111445

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (104) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +105 -0
  3. data/README.txt +51 -0
  4. data/Rakefile +70 -0
  5. data/ext/nokogiri/extconf.rb +24 -0
  6. data/ext/nokogiri/html_document.c +85 -0
  7. data/ext/nokogiri/html_document.h +10 -0
  8. data/ext/nokogiri/html_sax_parser.c +32 -0
  9. data/ext/nokogiri/html_sax_parser.h +11 -0
  10. data/ext/nokogiri/native.c +35 -0
  11. data/ext/nokogiri/native.h +32 -0
  12. data/ext/nokogiri/xml_cdata.c +36 -0
  13. data/ext/nokogiri/xml_cdata.h +9 -0
  14. data/ext/nokogiri/xml_document.c +159 -0
  15. data/ext/nokogiri/xml_document.h +10 -0
  16. data/ext/nokogiri/xml_node.c +573 -0
  17. data/ext/nokogiri/xml_node.h +13 -0
  18. data/ext/nokogiri/xml_node_set.c +90 -0
  19. data/ext/nokogiri/xml_node_set.h +9 -0
  20. data/ext/nokogiri/xml_reader.c +420 -0
  21. data/ext/nokogiri/xml_reader.h +10 -0
  22. data/ext/nokogiri/xml_sax_parser.c +161 -0
  23. data/ext/nokogiri/xml_sax_parser.h +10 -0
  24. data/ext/nokogiri/xml_text.c +25 -0
  25. data/ext/nokogiri/xml_text.h +9 -0
  26. data/ext/nokogiri/xml_xpath.c +39 -0
  27. data/ext/nokogiri/xml_xpath.h +11 -0
  28. data/ext/nokogiri/xml_xpath_context.c +69 -0
  29. data/ext/nokogiri/xml_xpath_context.h +9 -0
  30. data/ext/nokogiri/xslt_stylesheet.c +83 -0
  31. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  32. data/lib/nokogiri.rb +45 -0
  33. data/lib/nokogiri/css.rb +6 -0
  34. data/lib/nokogiri/css/node.rb +95 -0
  35. data/lib/nokogiri/css/parser.rb +24 -0
  36. data/lib/nokogiri/css/parser.y +198 -0
  37. data/lib/nokogiri/css/tokenizer.rb +9 -0
  38. data/lib/nokogiri/css/tokenizer.rex +63 -0
  39. data/lib/nokogiri/css/xpath_visitor.rb +153 -0
  40. data/lib/nokogiri/decorators.rb +1 -0
  41. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  42. data/lib/nokogiri/decorators/hpricot/node.rb +47 -0
  43. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  44. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +13 -0
  45. data/lib/nokogiri/hpricot.rb +46 -0
  46. data/lib/nokogiri/html.rb +64 -0
  47. data/lib/nokogiri/html/builder.rb +9 -0
  48. data/lib/nokogiri/html/document.rb +9 -0
  49. data/lib/nokogiri/html/sax/parser.rb +21 -0
  50. data/lib/nokogiri/version.rb +3 -0
  51. data/lib/nokogiri/xml.rb +29 -0
  52. data/lib/nokogiri/xml/after_handler.rb +18 -0
  53. data/lib/nokogiri/xml/before_handler.rb +32 -0
  54. data/lib/nokogiri/xml/builder.rb +79 -0
  55. data/lib/nokogiri/xml/document.rb +22 -0
  56. data/lib/nokogiri/xml/node.rb +162 -0
  57. data/lib/nokogiri/xml/node_set.rb +136 -0
  58. data/lib/nokogiri/xml/reader.rb +14 -0
  59. data/lib/nokogiri/xml/sax.rb +9 -0
  60. data/lib/nokogiri/xml/sax/document.rb +59 -0
  61. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  62. data/lib/nokogiri/xml/text.rb +6 -0
  63. data/lib/nokogiri/xml/xpath.rb +6 -0
  64. data/lib/nokogiri/xslt.rb +11 -0
  65. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  66. data/nokogiri.gemspec +33 -0
  67. data/test/css/test_nthiness.rb +141 -0
  68. data/test/css/test_parser.rb +214 -0
  69. data/test/css/test_tokenizer.rb +162 -0
  70. data/test/files/staff.xml +57 -0
  71. data/test/files/staff.xslt +32 -0
  72. data/test/files/tlm.html +850 -0
  73. data/test/helper.rb +70 -0
  74. data/test/hpricot/files/basic.xhtml +17 -0
  75. data/test/hpricot/files/boingboing.html +2266 -0
  76. data/test/hpricot/files/cy0.html +3653 -0
  77. data/test/hpricot/files/immob.html +400 -0
  78. data/test/hpricot/files/pace_application.html +1320 -0
  79. data/test/hpricot/files/tenderlove.html +16 -0
  80. data/test/hpricot/files/uswebgen.html +220 -0
  81. data/test/hpricot/files/utf8.html +1054 -0
  82. data/test/hpricot/files/week9.html +1723 -0
  83. data/test/hpricot/files/why.xml +19 -0
  84. data/test/hpricot/load_files.rb +7 -0
  85. data/test/hpricot/test_alter.rb +67 -0
  86. data/test/hpricot/test_builder.rb +27 -0
  87. data/test/hpricot/test_parser.rb +412 -0
  88. data/test/hpricot/test_paths.rb +15 -0
  89. data/test/hpricot/test_preserved.rb +72 -0
  90. data/test/hpricot/test_xml.rb +26 -0
  91. data/test/html/sax/test_parser.rb +27 -0
  92. data/test/html/test_builder.rb +78 -0
  93. data/test/html/test_document.rb +22 -0
  94. data/test/test_convert_xpath.rb +173 -0
  95. data/test/test_nokogiri.rb +36 -0
  96. data/test/test_reader.rb +222 -0
  97. data/test/test_xslt_transforms.rb +29 -0
  98. data/test/xml/sax/test_parser.rb +93 -0
  99. data/test/xml/test_builder.rb +16 -0
  100. data/test/xml/test_document.rb +141 -0
  101. data/test/xml/test_node.rb +148 -0
  102. data/test/xml/test_node_set.rb +54 -0
  103. data/test/xml/test_text.rb +13 -0
  104. metadata +191 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,67 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestAlter < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def setup
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ end
10
+
11
+ def test_before
12
+ test0 = "<link rel='stylesheet' href='test0.css' />"
13
+ @basic.at("link").before(test0)
14
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
15
+ end
16
+
17
+ def test_after
18
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
19
+ @basic.search("link")[-1].after(test_inf)
20
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
21
+ end
22
+
23
+ def test_wrap
24
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
25
+ assert_equal 'wrapper', ohmy[0].parent['id']
26
+ assert_equal 'ohmy', Nokogiri.Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
27
+ end
28
+
29
+ def test_add_class
30
+ first_p = (@basic/"p:first").add_class("testing123")
31
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
32
+ assert (Nokogiri.Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123")
33
+ ####
34
+ # Modified. We do not support OB1 bug.
35
+ assert !(Nokogiri.Hpricot(@basic.to_html)/"p:gt(1)")[0].attributes["class"].split(" ").include?("testing123")
36
+ end
37
+
38
+ def test_change_attributes
39
+ all_ps = (@basic/"p").attr("title", "Some Title")
40
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
41
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
42
+ GC.start # try to shake out GC bugs with xpath and node sets.
43
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
44
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
45
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
46
+ end
47
+
48
+ def test_remove_attr
49
+ all_rl = (@basic/"link").remove_attr("href")
50
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
51
+ end
52
+
53
+ def test_remove_class
54
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
55
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
56
+ end
57
+
58
+ def test_remove_all_classes
59
+ all_c2 = (@basic/"p[@class]").remove_class
60
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
61
+ end
62
+
63
+ def assert_changed original, selector, set, &block
64
+ assert set.all?(&block)
65
+ assert Nokogiri.Hpricot(original.to_html).search(selector).all?(&block)
66
+ end
67
+ end
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ class TestBuilder < Nokogiri::TestCase
4
+ ####
5
+ # Modified
6
+ def test_escaping_text
7
+ doc = Nokogiri.Hpricot() { b "<a\"b>" }
8
+ assert_equal "<b>&lt;a\"b&gt;</b>", doc.to_html.chomp
9
+ assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
10
+ end
11
+
12
+ ####
13
+ # Modified
14
+ def test_no_escaping_text
15
+ doc = Nokogiri.Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me">&lt;a"b&gt;</div>},
17
+ doc.to_html.chomp
18
+ assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
19
+ end
20
+
21
+ ####
22
+ # Modified
23
+ def test_latin1_entities
24
+ doc = Nokogiri.Hpricot() { b "\200\225" }
25
+ assert_equal "<b>&#21;</b>", doc.to_html.chomp
26
+ end
27
+ end
@@ -0,0 +1,412 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def test_set_attr
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ @basic.search('//p').set('class', 'para')
10
+ assert_equal 4, @basic.search('//p').length
11
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
12
+ end
13
+
14
+ # Test creating a new element
15
+ def test_new_element
16
+ elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
17
+ assert_not_nil(elem)
18
+ assert_not_nil(elem.attributes)
19
+ end
20
+
21
+ def test_scan_text
22
+ assert_equal 'FOO', Hpricot.make("FOO").first.content
23
+ end
24
+
25
+ def test_filter_by_attr
26
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
27
+
28
+ # this link is escaped in the doc
29
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
30
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
31
+ end
32
+
33
+ def test_filter_contains
34
+ @basic = Hpricot.parse(TestFiles::BASIC)
35
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
36
+ end
37
+
38
+ def test_get_element_by_id
39
+ @basic = Hpricot.parse(TestFiles::BASIC)
40
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
41
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
42
+ end
43
+
44
+ def test_get_element_by_tag_name
45
+ @basic = Hpricot.parse(TestFiles::BASIC)
46
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
47
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
48
+ end
49
+
50
+ def test_output_basic
51
+ @basic = Hpricot.parse(TestFiles::BASIC)
52
+ @basic2 = Hpricot.parse(@basic.inner_html)
53
+ scan_basic @basic2
54
+ end
55
+
56
+ def test_scan_basic
57
+ @basic = Hpricot.parse(TestFiles::BASIC)
58
+ scan_basic @basic
59
+ end
60
+
61
+ def scan_basic doc
62
+ ####
63
+ # Modified: asserting kind is not duck typey
64
+ #assert_kind_of Hpricot::XMLDecl, doc.children.first
65
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
66
+ assert_equal 'link1', doc.at('#link1')['id']
67
+ assert_equal 'link1', doc.at("p a")['id']
68
+ assert_equal 'link1', (doc/:p/:a).first['id']
69
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
70
+ assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
71
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
72
+ assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
73
+ assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
74
+ assert_equal 4, (doc/'p').filter('*').length
75
+ assert_equal 4, (doc/'p').filter('* *').length
76
+ eles = (doc/'p').filter('.ohmy')
77
+ assert_equal 1, eles.length
78
+ assert_equal 'ohmy', eles.first.get_attribute('class')
79
+ assert_equal 3, (doc/'p:not(.ohmy)').length
80
+ assert_equal 3, (doc/'p').not('.ohmy').length
81
+ assert_equal 3, (doc/'p').not(eles.first).length
82
+ assert_equal 2, (doc/'p').filter('[@class]').length
83
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
84
+ assert_equal 1, (doc/'p').filter('[@class~="final"]').length
85
+ assert_equal 2, (doc/'p > a').length
86
+ assert_equal 1, (doc/'p.ohmy > a').length
87
+ assert_equal 2, (doc/'p / a').length
88
+ assert_equal 2, (doc/'link ~ link').length
89
+ assert_equal 3, (doc/'title ~ link').length
90
+ assert_equal 5, (doc/"//p/text()").length
91
+ assert_equal 6, (doc/"//p[a]//text()").length
92
+ assert_equal 2, (doc/"//p/a/text()").length
93
+ end
94
+
95
+ def test_positional
96
+ h = Nokogiri.Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
97
+ assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s # MODIFIED: eq(0) -> eq(1), and removed initial '//'
98
+ assert_equal "<p>one</p>", h.search("div/p:first").to_s # MODIFIED: removed initial '//'
99
+ assert_equal "<p>one</p>", h.search("div/p:first()").to_s # MODIFIED: removed initial '//'
100
+ end
101
+
102
+ def test_pace
103
+ doc = Nokogiri.Hpricot(TestFiles::PACE_APPLICATION)
104
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
105
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
106
+ end
107
+
108
+ def test_scan_boingboing
109
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
110
+ assert_equal 60, (@boingboing/'p.posted').length
111
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
112
+ ### MODIFIED: libxml wraps the contents of <script> in a CDATA tag, so we won't be able to parse comments.
113
+ # assert_equal 10, @boingboing.search("script comment()").length
114
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
115
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
116
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
117
+ assert_equal 60, @boingboing.search("h3").length
118
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
119
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
120
+ ### Modified. Hpricot is wrong
121
+ #assert_equal 129, @boingboing.search("p[text()]").length
122
+ if Nokogiri::LIBXML_VERSION == '2.6.16'
123
+ assert_equal 111, @boingboing.search("p[text()]").length
124
+ else
125
+ assert_equal 110, @boingboing.search("p[text()]").length
126
+ end
127
+ assert_equal 211, @boingboing.search("p").length
128
+ end
129
+
130
+ def test_reparent
131
+ doc = Nokogiri.Hpricot(%{<div id="blurb_1"></div>})
132
+ div1 = doc.search('#blurb_1')
133
+ div1.before('<div id="blurb_0"></div>')
134
+
135
+ div0 = doc.search('#blurb_0')
136
+ div0.before('<div id="blurb_a"></div>')
137
+
138
+ assert_equal 'div', doc.at('#blurb_1').name
139
+ end
140
+
141
+ def test_siblings
142
+ @basic = Hpricot.parse(TestFiles::BASIC)
143
+ t = @basic.at(:title)
144
+ e = t.next_sibling
145
+ assert_equal 'test1.css', e['href']
146
+ assert_equal 'title', e.previous_sibling.name
147
+ end
148
+
149
+ def test_css_negation
150
+ @basic = Hpricot.parse(TestFiles::BASIC)
151
+ assert_equal 3, (@basic/'p:not(.final)').length
152
+ end
153
+
154
+ def test_remove_attribute
155
+ @basic = Hpricot.parse(TestFiles::BASIC)
156
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
157
+ assert_equal 0, (@basic/'p[@class]').length
158
+ end
159
+
160
+ ##
161
+ # Modified: hpricot is giving incorrect counts. Libxml gets it right.
162
+ def test_abs_xpath
163
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
164
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
165
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
166
+ assert_equal 18, @boingboing.search("//script").length
167
+ divs = @boingboing.search("//script/../div")
168
+ assert_equal 2, divs.length # hpricot says this is 1, but that's wrong.
169
+ imgs = @boingboing.search('//div/p/a/img')
170
+ assert_equal 12, imgs.length # hpricot says this is 15, but that's wrong.
171
+ assert_equal 16, @boingboing.search('//div').search('p/a/img').length
172
+ assert imgs.all? { |x| x.name == 'img' }
173
+ end
174
+
175
+ def test_predicates
176
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
177
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
178
+ p_imgs = @boingboing.search('//div/p[/a/img]')
179
+ assert_equal 15, p_imgs.length
180
+ assert p_imgs.all? { |x| x.name == 'p' }
181
+ p_imgs = @boingboing.search('//div/p[a/img]')
182
+ assert_equal 18, p_imgs.length
183
+ assert p_imgs.all? { |x| x.name == 'p' }
184
+ assert_equal 1, @boingboing.search('//input[@checked]').length
185
+ end
186
+
187
+ def test_tag_case
188
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
189
+ assert_equal 2, @tenderlove.search('//a').length
190
+ assert_equal 3, @tenderlove.search('//area').length
191
+ assert_equal 2, @tenderlove.search('//meta').length
192
+ end
193
+
194
+ def test_alt_predicates
195
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
196
+ assert_equal 2, @boingboing.search('table/tr:last').length # MODIFIED to not have '//' prefix
197
+
198
+ @basic = Hpricot.parse(TestFiles::BASIC)
199
+ ##
200
+ # MODIFIED:
201
+ # hpricot has an off-by-one bug eith eq-and-friends.
202
+ assert_equal "<p>The third paragraph</p>",
203
+ @basic.search('p:eq(3)').to_html # under Hpricot this was eq(2)
204
+ ##
205
+ # MODIFIED: to be blank-agnostic, because Nokogiri's to_html is slightly different compared to Hpricot.
206
+ assert_equal '<p class="last final"> <b>THE FINAL PARAGRAPH</b> </p>',
207
+ @basic.search('p:last').to_html.gsub(/\s+/,' ')
208
+ assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class') # MODIFIED to not have '//' prefix
209
+ end
210
+
211
+ def test_insert_after # ticket #63
212
+ doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
213
+ (doc/'div').each do |element|
214
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
215
+ end
216
+ assert_match '<div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p>',
217
+ doc.to_html.gsub(/\n/, '')
218
+ end
219
+
220
+ def test_insert_before # ticket #61
221
+ doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
222
+ (doc/'div').each do |element|
223
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
224
+ end
225
+ assert_match '<p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div>',
226
+ doc.to_html.gsub(/\n/, '')
227
+ end
228
+
229
+ def test_many_paths
230
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
231
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
232
+ ###
233
+ # Modified. I don't want to support this syntax. Just use a comma.
234
+ #assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
235
+ end
236
+
237
+ ####
238
+ # Modified. Epic Fail. We're on the duck type train folks.
239
+ #def test_stacked_search
240
+ # @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
241
+ # assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
242
+ #end
243
+
244
+ def test_class_search
245
+ # test case sent by Chih-Chao Lam
246
+ doc = Nokogiri.Hpricot("<div class=xyz'>abc</div>")
247
+ assert_equal 1, doc.search(".xyz").length
248
+ doc = Nokogiri.Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
249
+ assert_equal 1, doc.search(".xyz").length
250
+ assert_equal 4, doc.search("*").length
251
+ end
252
+
253
+ def test_kleene_star
254
+ # bug noticed by raja bhatia
255
+ doc = Nokogiri.Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
256
+ assert_equal 2, doc.search("*[@class*='small']").length
257
+ assert_equal 2, doc.search("*.small").length
258
+ assert_equal 2, doc.search(".small").length
259
+ assert_equal 2, doc.search(".large").length
260
+ end
261
+
262
+ def test_empty_comment
263
+ doc = Nokogiri.Hpricot("<p><!----></p>")
264
+ doc = doc.search('//body').first
265
+ assert doc.children[0].children[0].comment?
266
+
267
+ doc = Nokogiri.Hpricot("<p><!-- --></p>")
268
+ doc = doc.search('//body').first
269
+ assert doc.children[0].children[0].comment?
270
+ end
271
+
272
+ def test_body_newlines
273
+ @immob = Hpricot.parse(TestFiles::IMMOB)
274
+ body = @immob.at(:body)
275
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
276
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
277
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
278
+ assert_equal v, body[k]
279
+ end
280
+ end
281
+
282
+ def test_nested_twins
283
+ @doc = Nokogiri.Hpricot("<div>Hi<div>there</div></div>")
284
+ assert_equal 1, (@doc/"div div").length
285
+ end
286
+
287
+ def test_wildcard
288
+ @basic = Hpricot.parse(TestFiles::BASIC)
289
+ assert_equal 3, (@basic/"*[@id]").length
290
+ assert_equal 3, (@basic/"//*[@id]").length
291
+ end
292
+
293
+ def test_javascripts
294
+ @immob = Hpricot.parse(TestFiles::IMMOB)
295
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
296
+ end
297
+
298
+ def test_nested_scripts
299
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
300
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
301
+ end
302
+
303
+ def test_uswebgen
304
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
305
+ # sent by brent beardsley, nokogiri 0.3 had problems with all the links.
306
+ assert_equal 67, (@uswebgen/:a).length
307
+ end
308
+
309
+ def test_mangled_tags
310
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
311
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
312
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
313
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
314
+ each do |str|
315
+ doc = Nokogiri.Hpricot(str)
316
+ assert_equal 1, (doc/:form).length
317
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
318
+ end
319
+ end
320
+
321
+ ####
322
+ # Modified. Added question. Don't care.
323
+ def test_procins
324
+ doc = Nokogiri.Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
325
+ assert_equal "php", doc.children[1].target
326
+ assert_equal "blah='blah'?", doc.children[2].content
327
+ end
328
+
329
+ ####
330
+ # Altered... libxml does not get a buffer error
331
+ def test_buffer_error
332
+ assert_nothing_raised {
333
+ Nokogiri.Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
334
+ }
335
+ end
336
+
337
+ def test_youtube_attr
338
+ str = <<-edoc
339
+ <html><body>
340
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
341
+ <object width="425" height="350">
342
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
343
+ <param name="wmode" value="transparent"></param>
344
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
345
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
346
+ </embed>
347
+ </object>
348
+ Check out my posting, I have bright mice in large clown cars.
349
+ <object width="425" height="350">
350
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
351
+ <param name="wmode" value="transparent"></param>
352
+ <embed src="http://www.youtube.com/v/foobar"
353
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
354
+ </embed>
355
+ </object>
356
+ </body></html?
357
+ edoc
358
+ doc = Nokogiri.Hpricot(str)
359
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
360
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
361
+ end
362
+
363
+ # ticket #84 by jamezilla
364
+ def test_screwed_xmlns
365
+ doc = Nokogiri.Hpricot(<<-edoc)
366
+ <?xml:namespace prefix = cwi />
367
+ <html><body>HAI</body></html>
368
+ edoc
369
+ assert_equal "HAI", doc.at("body").inner_text
370
+ end
371
+
372
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
373
+ def test_self_closed_form
374
+ doc = Nokogiri.Hpricot(<<-edoc)
375
+ <body>
376
+ <form action="/loginRegForm" name="regForm" method="POST" />
377
+ <input type="button">
378
+ </form>
379
+ </body>
380
+ edoc
381
+ assert_equal "button", doc.at("//form/input")['type']
382
+ end
383
+
384
+ def test_filters
385
+ @basic = Hpricot.parse(TestFiles::BASIC)
386
+ ##
387
+ # MODIFIED:
388
+ # Hpricot considers nodes with text-only (but no child tags) to be empty.
389
+ # Nokogiri considers that any content makes a parent.
390
+ assert_equal 1, (@basic/"title:parent").size # so this was 0 under Hpricot
391
+ assert_equal 4, (@basic/"p:parent").size
392
+ assert_equal 0, (@basic/"title:empty").size
393
+ assert_equal 3, (@basic/"link:empty").size
394
+ end
395
+
396
+ def test_keep_cdata
397
+ str = %{<script> /*<![CDATA[*/
398
+ /*]]>*/ </script>}
399
+ assert_match str, Nokogiri.Hpricot(str).to_html
400
+ end
401
+
402
+ def test_namespace
403
+ chunk = <<-END
404
+ <a xmlns:t="http://www.nexopia.com/dev/template">
405
+ <t:sam>hi </t:sam>
406
+ </a>
407
+ END
408
+ doc = Hpricot::XML(chunk)
409
+ assert (doc/"//t:sam").size > 0 # at least this should probably work
410
+ # assert (doc/"//sam").size > 0 # this would be nice
411
+ end
412
+ end