nokogiri 1.0.0-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.

Potentially problematic release.


This version of nokogiri might be problematic. Click here for more details.

Files changed (127) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +120 -0
  3. data/README.ja.txt +86 -0
  4. data/README.txt +87 -0
  5. data/Rakefile +264 -0
  6. data/ext/nokogiri/extconf.rb +59 -0
  7. data/ext/nokogiri/html_document.c +83 -0
  8. data/ext/nokogiri/html_document.h +10 -0
  9. data/ext/nokogiri/html_sax_parser.c +32 -0
  10. data/ext/nokogiri/html_sax_parser.h +11 -0
  11. data/ext/nokogiri/iconv.dll +0 -0
  12. data/ext/nokogiri/libexslt.dll +0 -0
  13. data/ext/nokogiri/libxml2.dll +0 -0
  14. data/ext/nokogiri/libxslt.dll +0 -0
  15. data/ext/nokogiri/native.c +40 -0
  16. data/ext/nokogiri/native.h +51 -0
  17. data/ext/nokogiri/native.so +0 -0
  18. data/ext/nokogiri/xml_cdata.c +52 -0
  19. data/ext/nokogiri/xml_cdata.h +9 -0
  20. data/ext/nokogiri/xml_document.c +159 -0
  21. data/ext/nokogiri/xml_document.h +10 -0
  22. data/ext/nokogiri/xml_dtd.c +117 -0
  23. data/ext/nokogiri/xml_dtd.h +8 -0
  24. data/ext/nokogiri/xml_node.c +709 -0
  25. data/ext/nokogiri/xml_node.h +15 -0
  26. data/ext/nokogiri/xml_node_set.c +124 -0
  27. data/ext/nokogiri/xml_node_set.h +9 -0
  28. data/ext/nokogiri/xml_reader.c +429 -0
  29. data/ext/nokogiri/xml_reader.h +10 -0
  30. data/ext/nokogiri/xml_sax_parser.c +174 -0
  31. data/ext/nokogiri/xml_sax_parser.h +10 -0
  32. data/ext/nokogiri/xml_syntax_error.c +194 -0
  33. data/ext/nokogiri/xml_syntax_error.h +11 -0
  34. data/ext/nokogiri/xml_text.c +29 -0
  35. data/ext/nokogiri/xml_text.h +9 -0
  36. data/ext/nokogiri/xml_xpath.c +46 -0
  37. data/ext/nokogiri/xml_xpath.h +11 -0
  38. data/ext/nokogiri/xml_xpath_context.c +81 -0
  39. data/ext/nokogiri/xml_xpath_context.h +9 -0
  40. data/ext/nokogiri/xslt_stylesheet.c +108 -0
  41. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  42. data/ext/nokogiri/zlib1.dll +0 -0
  43. data/lib/nokogiri.rb +51 -0
  44. data/lib/nokogiri/css.rb +6 -0
  45. data/lib/nokogiri/css/generated_parser.rb +653 -0
  46. data/lib/nokogiri/css/generated_tokenizer.rb +159 -0
  47. data/lib/nokogiri/css/node.rb +95 -0
  48. data/lib/nokogiri/css/parser.rb +24 -0
  49. data/lib/nokogiri/css/parser.y +198 -0
  50. data/lib/nokogiri/css/tokenizer.rb +9 -0
  51. data/lib/nokogiri/css/tokenizer.rex +63 -0
  52. data/lib/nokogiri/css/xpath_visitor.rb +165 -0
  53. data/lib/nokogiri/decorators.rb +1 -0
  54. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  55. data/lib/nokogiri/decorators/hpricot/node.rb +58 -0
  56. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  57. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +17 -0
  58. data/lib/nokogiri/hpricot.rb +47 -0
  59. data/lib/nokogiri/html.rb +95 -0
  60. data/lib/nokogiri/html/builder.rb +9 -0
  61. data/lib/nokogiri/html/document.rb +9 -0
  62. data/lib/nokogiri/html/sax/parser.rb +21 -0
  63. data/lib/nokogiri/version.rb +3 -0
  64. data/lib/nokogiri/xml.rb +67 -0
  65. data/lib/nokogiri/xml/after_handler.rb +18 -0
  66. data/lib/nokogiri/xml/before_handler.rb +32 -0
  67. data/lib/nokogiri/xml/builder.rb +79 -0
  68. data/lib/nokogiri/xml/cdata.rb +9 -0
  69. data/lib/nokogiri/xml/document.rb +30 -0
  70. data/lib/nokogiri/xml/dtd.rb +6 -0
  71. data/lib/nokogiri/xml/element.rb +6 -0
  72. data/lib/nokogiri/xml/entity_declaration.rb +9 -0
  73. data/lib/nokogiri/xml/node.rb +195 -0
  74. data/lib/nokogiri/xml/node_set.rb +183 -0
  75. data/lib/nokogiri/xml/notation.rb +6 -0
  76. data/lib/nokogiri/xml/reader.rb +14 -0
  77. data/lib/nokogiri/xml/sax.rb +9 -0
  78. data/lib/nokogiri/xml/sax/document.rb +59 -0
  79. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  80. data/lib/nokogiri/xml/syntax_error.rb +21 -0
  81. data/lib/nokogiri/xml/text.rb +6 -0
  82. data/lib/nokogiri/xml/xpath.rb +6 -0
  83. data/lib/nokogiri/xml/xpath_context.rb +14 -0
  84. data/lib/nokogiri/xslt.rb +11 -0
  85. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  86. data/nokogiri.gemspec +34 -0
  87. data/test/css/test_nthiness.rb +159 -0
  88. data/test/css/test_parser.rb +224 -0
  89. data/test/css/test_tokenizer.rb +162 -0
  90. data/test/css/test_xpath_visitor.rb +54 -0
  91. data/test/files/staff.xml +59 -0
  92. data/test/files/staff.xslt +32 -0
  93. data/test/files/tlm.html +850 -0
  94. data/test/helper.rb +70 -0
  95. data/test/hpricot/files/basic.xhtml +17 -0
  96. data/test/hpricot/files/boingboing.html +2266 -0
  97. data/test/hpricot/files/cy0.html +3653 -0
  98. data/test/hpricot/files/immob.html +400 -0
  99. data/test/hpricot/files/pace_application.html +1320 -0
  100. data/test/hpricot/files/tenderlove.html +16 -0
  101. data/test/hpricot/files/uswebgen.html +220 -0
  102. data/test/hpricot/files/utf8.html +1054 -0
  103. data/test/hpricot/files/week9.html +1723 -0
  104. data/test/hpricot/files/why.xml +19 -0
  105. data/test/hpricot/load_files.rb +7 -0
  106. data/test/hpricot/test_alter.rb +67 -0
  107. data/test/hpricot/test_builder.rb +27 -0
  108. data/test/hpricot/test_parser.rb +423 -0
  109. data/test/hpricot/test_paths.rb +15 -0
  110. data/test/hpricot/test_preserved.rb +78 -0
  111. data/test/hpricot/test_xml.rb +30 -0
  112. data/test/html/sax/test_parser.rb +27 -0
  113. data/test/html/test_builder.rb +78 -0
  114. data/test/html/test_document.rb +86 -0
  115. data/test/test_convert_xpath.rb +180 -0
  116. data/test/test_nokogiri.rb +36 -0
  117. data/test/test_reader.rb +222 -0
  118. data/test/test_xslt_transforms.rb +29 -0
  119. data/test/xml/sax/test_parser.rb +93 -0
  120. data/test/xml/test_builder.rb +16 -0
  121. data/test/xml/test_cdata.rb +18 -0
  122. data/test/xml/test_document.rb +171 -0
  123. data/test/xml/test_dtd.rb +43 -0
  124. data/test/xml/test_node.rb +223 -0
  125. data/test/xml/test_node_set.rb +116 -0
  126. data/test/xml/test_text.rb +13 -0
  127. metadata +217 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,67 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestAlter < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def setup
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ end
10
+
11
+ def test_before
12
+ test0 = "<link rel='stylesheet' href='test0.css' />"
13
+ @basic.at("link").before(test0)
14
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
15
+ end
16
+
17
+ def test_after
18
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
19
+ @basic.search("link")[-1].after(test_inf)
20
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
21
+ end
22
+
23
+ def test_wrap
24
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
25
+ assert_equal 'wrapper', ohmy[0].parent['id']
26
+ assert_equal 'ohmy', Nokogiri.Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
27
+ end
28
+
29
+ def test_add_class
30
+ first_p = (@basic/"p:first").add_class("testing123")
31
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
32
+ assert((Nokogiri.Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123"))
33
+ ####
34
+ # Modified. We do not support OB1 bug.
35
+ assert !(Nokogiri.Hpricot(@basic.to_html)/"p:gt(1)")[0].attributes["class"].split(" ").include?("testing123")
36
+ end
37
+
38
+ def test_change_attributes
39
+ all_ps = (@basic/"p").attr("title", "Some Title")
40
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
41
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
42
+ GC.start # try to shake out GC bugs with xpath and node sets.
43
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
44
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
45
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
46
+ end
47
+
48
+ def test_remove_attr
49
+ all_rl = (@basic/"link").remove_attr("href")
50
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
51
+ end
52
+
53
+ def test_remove_class
54
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
55
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
56
+ end
57
+
58
+ def test_remove_all_classes
59
+ all_c2 = (@basic/"p[@class]").remove_class
60
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
61
+ end
62
+
63
+ def assert_changed original, selector, set, &block
64
+ assert set.all?(&block)
65
+ assert Nokogiri.Hpricot(original.to_html).search(selector).all?(&block)
66
+ end
67
+ end
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ class TestBuilder < Nokogiri::TestCase
4
+ ####
5
+ # Modified
6
+ def test_escaping_text
7
+ doc = Nokogiri.Hpricot() { b "<a\"b>" }
8
+ assert_equal "<b>&lt;a\"b&gt;</b>", doc.to_html.chomp
9
+ assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
10
+ end
11
+
12
+ ####
13
+ # Modified
14
+ def test_no_escaping_text
15
+ doc = Nokogiri.Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me">&lt;a"b&gt;</div>},
17
+ doc.to_html.chomp
18
+ assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
19
+ end
20
+
21
+ ####
22
+ # Modified
23
+ def test_latin1_entities
24
+ doc = Nokogiri.Hpricot() { b "\200\225" }
25
+ assert_equal "<b>&#21;</b>", doc.to_html.chomp
26
+ end
27
+ end
@@ -0,0 +1,423 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def test_set_attr
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ @basic.search('//p').set('class', 'para')
10
+ assert_equal 4, @basic.search('//p').length
11
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
12
+ end
13
+
14
+ # Test creating a new element
15
+ def test_new_element
16
+ elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
17
+ assert_not_nil(elem)
18
+ assert_not_nil(elem.attributes)
19
+ end
20
+
21
+ def test_scan_text
22
+ assert_equal 'FOO', Hpricot.make("FOO").first.content
23
+ end
24
+
25
+ def test_filter_by_attr
26
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
27
+
28
+ # this link is escaped in the doc
29
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
30
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
31
+ end
32
+
33
+ def test_filter_contains
34
+ @basic = Hpricot.parse(TestFiles::BASIC)
35
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
36
+ end
37
+
38
+ def test_get_element_by_id
39
+ @basic = Hpricot.parse(TestFiles::BASIC)
40
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
41
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
42
+ end
43
+
44
+ def test_get_element_by_tag_name
45
+ @basic = Hpricot.parse(TestFiles::BASIC)
46
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
47
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
48
+ end
49
+
50
+ def test_output_basic
51
+ @basic = Hpricot.parse(TestFiles::BASIC)
52
+ @basic2 = Hpricot.parse(@basic.inner_html)
53
+ scan_basic @basic2
54
+ end
55
+
56
+ def test_scan_basic
57
+ @basic = Hpricot.parse(TestFiles::BASIC)
58
+ scan_basic @basic
59
+ end
60
+
61
+ def scan_basic doc
62
+ ####
63
+ # Modified: asserting kind is not duck typey
64
+ #assert_kind_of Hpricot::XMLDecl, doc.children.first
65
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
66
+ assert_equal 'link1', doc.at('#link1')['id']
67
+ assert_equal 'link1', doc.at("p a")['id']
68
+ assert_equal 'link1', (doc/:p/:a).first['id']
69
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
70
+
71
+ ### Modified: We're not supporting the filter() function
72
+ #assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
73
+ #assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
74
+ #assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
75
+ #assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
76
+ #assert_equal 4, (doc/'p').filter('*').length
77
+ #assert_equal 4, (doc/'p').filter('* *').length
78
+ #eles = (doc/'p').filter('.ohmy')
79
+ #assert_equal 1, eles.length
80
+ #assert_equal 'ohmy', eles.first.get_attribute('class')
81
+ assert_equal 3, (doc/'p:not(.ohmy)').length
82
+
83
+ ### Modified: We're not supporting the not() function
84
+ #assert_equal 3, (doc/'p').not('.ohmy').length
85
+ #assert_equal 3, (doc/'p').not(eles.first).length
86
+ #assert_equal 2, (doc/'p').filter('[@class]').length
87
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
88
+ #assert_equal 1, (doc/'p').filter('[@class~="final"]').length
89
+ assert_equal 2, (doc/'p > a').length
90
+ assert_equal 1, (doc/'p.ohmy > a').length
91
+ assert_equal 2, (doc/'p / a').length
92
+ assert_equal 2, (doc/'link ~ link').length
93
+ assert_equal 3, (doc/'title ~ link').length
94
+ assert_equal 5, (doc/"//p/text()").length
95
+ assert_equal 6, (doc/"//p[a]//text()").length
96
+ assert_equal 2, (doc/"//p/a/text()").length
97
+ end
98
+
99
+ def test_positional
100
+ h = Nokogiri.Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
101
+ assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s # MODIFIED: eq(0) -> eq(1), and removed initial '//'
102
+ assert_equal "<p>one</p>", h.search("div/p:first").to_s # MODIFIED: removed initial '//'
103
+ assert_equal "<p>one</p>", h.search("div/p:first()").to_s # MODIFIED: removed initial '//'
104
+ end
105
+
106
+ def test_pace
107
+ doc = Nokogiri.Hpricot(TestFiles::PACE_APPLICATION)
108
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
109
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
110
+ end
111
+
112
+ def test_scan_boingboing
113
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
114
+ assert_equal 60, (@boingboing/'p.posted').length
115
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
116
+ ### MODIFIED: libxml wraps the contents of <script> in a CDATA tag, so we won't be able to parse comments.
117
+ # assert_equal 10, @boingboing.search("script comment()").length
118
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
119
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
120
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
121
+ assert_equal 60, @boingboing.search("h3").length
122
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
123
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
124
+ ### Modified. Hpricot is wrong
125
+ #assert_equal 129, @boingboing.search("p[text()]").length
126
+ if Nokogiri::LIBXML_VERSION == '2.6.16'
127
+ assert_equal 111, @boingboing.search("p[text()]").length
128
+ else
129
+ assert_equal 110, @boingboing.search("p[text()]").length
130
+ end
131
+ assert_equal 211, @boingboing.search("p").length
132
+ end
133
+
134
+ def test_reparent
135
+ doc = Nokogiri.Hpricot(%{<div id="blurb_1"></div>})
136
+ div1 = doc.search('#blurb_1')
137
+ div1.before('<div id="blurb_0"></div>')
138
+
139
+ div0 = doc.search('#blurb_0')
140
+ div0.before('<div id="blurb_a"></div>')
141
+
142
+ assert_equal 'div', doc.at('#blurb_1').name
143
+ end
144
+
145
+ def test_siblings
146
+ @basic = Hpricot.parse(TestFiles::BASIC)
147
+ t = @basic.at(:title)
148
+ e = t.next_sibling
149
+ assert_equal 'test1.css', e['href']
150
+ assert_equal 'title', e.previous_sibling.name
151
+ end
152
+
153
+ def test_css_negation
154
+ @basic = Hpricot.parse(TestFiles::BASIC)
155
+ assert_equal 3, (@basic/'p:not(.final)').length
156
+ end
157
+
158
+ def test_remove_attribute
159
+ @basic = Hpricot.parse(TestFiles::BASIC)
160
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
161
+ assert_equal 0, (@basic/'p[@class]').length
162
+ end
163
+
164
+ ##
165
+ # Modified: hpricot is giving incorrect counts. Libxml gets it right.
166
+ def test_abs_xpath
167
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
168
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
169
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
170
+ assert_equal 18, @boingboing.search("//script").length
171
+ divs = @boingboing.search("//script/../div")
172
+ assert_equal 2, divs.length # hpricot says this is 1, but that's wrong.
173
+ imgs = @boingboing.search('//div/p/a/img')
174
+ assert_equal 12, imgs.length # hpricot says this is 15, but that's wrong.
175
+ assert_equal 16, @boingboing.search('//div').search('p/a/img').length
176
+ assert imgs.all? { |x| x.name == 'img' }
177
+ end
178
+
179
+ def test_predicates
180
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
181
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
182
+ p_imgs = @boingboing.search('//div/p[/a/img]')
183
+ #assert_equal 15, p_imgs.length
184
+ assert p_imgs.all? { |x| x.name == 'p' }
185
+ p_imgs = @boingboing.search('//div/p[a/img]')
186
+ assert_equal 12, p_imgs.length
187
+ assert p_imgs.all? { |x| x.name == 'p' }
188
+ assert_equal 1, @boingboing.search('//input[@checked]').length
189
+ end
190
+
191
+ def test_tag_case
192
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
193
+ assert_equal 2, @tenderlove.search('//a').length
194
+ assert_equal 3, @tenderlove.search('//area').length
195
+ assert_equal 2, @tenderlove.search('//meta').length
196
+ end
197
+
198
+ def test_alt_predicates
199
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
200
+ assert_equal 2, @boingboing.search('table/tr:last').length # MODIFIED to not have '//' prefix
201
+
202
+ @basic = Hpricot.parse(TestFiles::BASIC)
203
+ ##
204
+ # MODIFIED:
205
+ # hpricot has an off-by-one bug eith eq-and-friends.
206
+ assert_equal "<p>The third paragraph</p>",
207
+ @basic.search('p:eq(3)').to_html # under Hpricot this was eq(2)
208
+ ##
209
+ # MODIFIED: to be blank-agnostic, because Nokogiri's to_html is slightly different compared to Hpricot.
210
+ assert_equal '<p class="last final"> <b>THE FINAL PARAGRAPH</b> </p>',
211
+ @basic.search('p:last').to_html.gsub(/\s+/,' ')
212
+ assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class') # MODIFIED to not have '//' prefix
213
+ end
214
+
215
+ def test_insert_after # ticket #63
216
+ doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
217
+ (doc/'div').each do |element|
218
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
219
+ end
220
+ assert_match '<div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p>',
221
+ doc.to_html.gsub(/\n/, '')
222
+ end
223
+
224
+ def test_insert_before # ticket #61
225
+ doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
226
+ (doc/'div').each do |element|
227
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
228
+ end
229
+ assert_match '<p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div>',
230
+ doc.to_html.gsub(/\n/, '')
231
+ end
232
+
233
+ def test_many_paths
234
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
235
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
236
+ ###
237
+ # Modified. I don't want to support this syntax. Just use a comma.
238
+ #assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
239
+ end
240
+
241
+ ####
242
+ # Modified. Epic Fail. We're on the duck type train folks.
243
+ #def test_stacked_search
244
+ # @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
245
+ # assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
246
+ #end
247
+
248
+ def test_class_search
249
+ # test case sent by Chih-Chao Lam
250
+ # Modified. libxml corrects this differently than hpricot
251
+ doc = Nokogiri.Hpricot("<div class=xyz '>abc</div>")
252
+ assert_equal 1, doc.search(".xyz").length
253
+
254
+ doc = Nokogiri.Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
255
+ assert_equal 1, doc.search(".xyz").length
256
+ assert_equal 4, doc.search("*").length
257
+ end
258
+
259
+ def test_kleene_star
260
+ # bug noticed by raja bhatia
261
+ doc = Nokogiri.Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
262
+ assert_equal 2, doc.search("*[@class*='small']").length
263
+ assert_equal 2, doc.search("*.small").length
264
+ assert_equal 2, doc.search(".small").length
265
+ assert_equal 2, doc.search(".large").length
266
+ end
267
+
268
+ def test_empty_comment
269
+ doc = Nokogiri.Hpricot("<p><!----></p>")
270
+ doc = doc.search('//body').first
271
+ assert doc.children[0].children[0].comment?
272
+
273
+ doc = Nokogiri.Hpricot("<p><!-- --></p>")
274
+ doc = doc.search('//body').first
275
+ assert doc.children[0].children[0].comment?
276
+ end
277
+
278
+ def test_body_newlines
279
+ @immob = Hpricot.parse(TestFiles::IMMOB)
280
+ body = @immob.at(:body)
281
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
282
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
283
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
284
+ assert_equal v, body[k]
285
+ end
286
+ end
287
+
288
+ def test_nested_twins
289
+ @doc = Nokogiri.Hpricot("<div>Hi<div>there</div></div>")
290
+ assert_equal 1, (@doc/"div div").length
291
+ end
292
+
293
+ def test_wildcard
294
+ @basic = Hpricot.parse(TestFiles::BASIC)
295
+ assert_equal 3, (@basic/"*[@id]").length
296
+ assert_equal 3, (@basic/"//*[@id]").length
297
+ end
298
+
299
+ def test_javascripts
300
+ @immob = Hpricot.parse(TestFiles::IMMOB)
301
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
302
+ end
303
+
304
+ ####
305
+ # Modified. This test passes with later versions of libxml
306
+ def test_nested_scripts
307
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
308
+ unless Nokogiri::LIBXML_VERSION == '2.6.16'
309
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
310
+ end
311
+ end
312
+
313
+ def test_uswebgen
314
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
315
+ # sent by brent beardsley, nokogiri 0.3 had problems with all the links.
316
+ assert_equal 67, (@uswebgen/:a).length
317
+ end
318
+
319
+ def test_mangled_tags
320
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
321
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
322
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
323
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
324
+ each do |str|
325
+ doc = Nokogiri.Hpricot(str)
326
+ assert_equal 1, (doc/:form).length
327
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
328
+ end
329
+ end
330
+
331
+ ####
332
+ # Modified. Added question. Don't care.
333
+ def test_procins
334
+ doc = Nokogiri.Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
335
+ assert_equal "php", doc.children[1].target
336
+ assert_equal "blah='blah'?", doc.children[2].content
337
+ end
338
+
339
+ ####
340
+ # Altered... libxml does not get a buffer error
341
+ def test_buffer_error
342
+ assert_nothing_raised {
343
+ Nokogiri.Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
344
+ }
345
+ end
346
+
347
+ def test_youtube_attr
348
+ str = <<-edoc
349
+ <html><body>
350
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
351
+ <object width="425" height="350">
352
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
353
+ <param name="wmode" value="transparent"></param>
354
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
355
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
356
+ </embed>
357
+ </object>
358
+ Check out my posting, I have bright mice in large clown cars.
359
+ <object width="425" height="350">
360
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
361
+ <param name="wmode" value="transparent"></param>
362
+ <embed src="http://www.youtube.com/v/foobar"
363
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
364
+ </embed>
365
+ </object>
366
+ </body></html?
367
+ edoc
368
+ doc = Nokogiri.Hpricot(str)
369
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
370
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
371
+ end
372
+
373
+ # ticket #84 by jamezilla
374
+ def test_screwed_xmlns
375
+ doc = Nokogiri.Hpricot(<<-edoc)
376
+ <?xml:namespace prefix = cwi />
377
+ <html><body>HAI</body></html>
378
+ edoc
379
+ assert_equal "HAI", doc.at("body").inner_text
380
+ end
381
+
382
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
383
+ # MODIFIED: This is an issue with libxml which we cannot deal with....
384
+ #def test_self_closed_form
385
+ # doc = Nokogiri.Hpricot(<<-edoc)
386
+ # <body>
387
+ # <form action="/loginRegForm" name="regForm" method="POST" />
388
+ # <input type="button">
389
+ # </form>
390
+ # </body>
391
+ # edoc
392
+ # assert_equal "button", doc.at("//form/input")['type']
393
+ #end
394
+
395
+ def test_filters
396
+ @basic = Hpricot.parse(TestFiles::BASIC)
397
+ ##
398
+ # MODIFIED:
399
+ # Hpricot considers nodes with text-only (but no child tags) to be empty.
400
+ # Nokogiri considers that any content makes a parent.
401
+ assert_equal 1, (@basic/"title:parent").size # so this was 0 under Hpricot
402
+ assert_equal 4, (@basic/"p:parent").size
403
+ assert_equal 0, (@basic/"title:empty").size
404
+ assert_equal 3, (@basic/"link:empty").size
405
+ end
406
+
407
+ def test_keep_cdata
408
+ str = %{<script> /*<![CDATA[*/
409
+ /*]]>*/ </script>}
410
+ assert_match str, Nokogiri.Hpricot(str).to_html
411
+ end
412
+
413
+ def test_namespace
414
+ chunk = <<-END
415
+ <a xmlns:t="http://www.nexopia.com/dev/template">
416
+ <t:sam>hi </t:sam>
417
+ </a>
418
+ END
419
+ doc = Hpricot::XML(chunk)
420
+ assert((doc/"//t:sam").size > 0) # at least this should probably work
421
+ # assert (doc/"//sam").size > 0 # this would be nice
422
+ end
423
+ end