tenderlove-nokogiri 0.0.0-x86-mswin32-60

Sign up to get free protection for your applications and to get access to all the features.
Files changed (117) hide show
  1. data/History.txt +6 -0
  2. data/Manifest.txt +120 -0
  3. data/README.ja.txt +86 -0
  4. data/README.txt +87 -0
  5. data/Rakefile +264 -0
  6. data/ext/nokogiri/extconf.rb +59 -0
  7. data/ext/nokogiri/html_document.c +83 -0
  8. data/ext/nokogiri/html_document.h +10 -0
  9. data/ext/nokogiri/html_sax_parser.c +32 -0
  10. data/ext/nokogiri/html_sax_parser.h +11 -0
  11. data/ext/nokogiri/native.c +40 -0
  12. data/ext/nokogiri/native.h +51 -0
  13. data/ext/nokogiri/xml_cdata.c +52 -0
  14. data/ext/nokogiri/xml_cdata.h +9 -0
  15. data/ext/nokogiri/xml_document.c +159 -0
  16. data/ext/nokogiri/xml_document.h +10 -0
  17. data/ext/nokogiri/xml_dtd.c +117 -0
  18. data/ext/nokogiri/xml_dtd.h +8 -0
  19. data/ext/nokogiri/xml_node.c +709 -0
  20. data/ext/nokogiri/xml_node.h +15 -0
  21. data/ext/nokogiri/xml_node_set.c +124 -0
  22. data/ext/nokogiri/xml_node_set.h +9 -0
  23. data/ext/nokogiri/xml_reader.c +429 -0
  24. data/ext/nokogiri/xml_reader.h +10 -0
  25. data/ext/nokogiri/xml_sax_parser.c +174 -0
  26. data/ext/nokogiri/xml_sax_parser.h +10 -0
  27. data/ext/nokogiri/xml_syntax_error.c +194 -0
  28. data/ext/nokogiri/xml_syntax_error.h +11 -0
  29. data/ext/nokogiri/xml_text.c +29 -0
  30. data/ext/nokogiri/xml_text.h +9 -0
  31. data/ext/nokogiri/xml_xpath.c +46 -0
  32. data/ext/nokogiri/xml_xpath.h +11 -0
  33. data/ext/nokogiri/xml_xpath_context.c +81 -0
  34. data/ext/nokogiri/xml_xpath_context.h +9 -0
  35. data/ext/nokogiri/xslt_stylesheet.c +108 -0
  36. data/ext/nokogiri/xslt_stylesheet.h +9 -0
  37. data/lib/nokogiri/css/node.rb +95 -0
  38. data/lib/nokogiri/css/parser.rb +24 -0
  39. data/lib/nokogiri/css/parser.y +198 -0
  40. data/lib/nokogiri/css/tokenizer.rb +9 -0
  41. data/lib/nokogiri/css/tokenizer.rex +63 -0
  42. data/lib/nokogiri/css/xpath_visitor.rb +165 -0
  43. data/lib/nokogiri/css.rb +6 -0
  44. data/lib/nokogiri/decorators/hpricot/node.rb +58 -0
  45. data/lib/nokogiri/decorators/hpricot/node_set.rb +14 -0
  46. data/lib/nokogiri/decorators/hpricot/xpath_visitor.rb +17 -0
  47. data/lib/nokogiri/decorators/hpricot.rb +3 -0
  48. data/lib/nokogiri/decorators.rb +1 -0
  49. data/lib/nokogiri/hpricot.rb +47 -0
  50. data/lib/nokogiri/html/builder.rb +9 -0
  51. data/lib/nokogiri/html/document.rb +9 -0
  52. data/lib/nokogiri/html/sax/parser.rb +21 -0
  53. data/lib/nokogiri/html.rb +95 -0
  54. data/lib/nokogiri/version.rb +3 -0
  55. data/lib/nokogiri/xml/after_handler.rb +18 -0
  56. data/lib/nokogiri/xml/before_handler.rb +32 -0
  57. data/lib/nokogiri/xml/builder.rb +79 -0
  58. data/lib/nokogiri/xml/cdata.rb +9 -0
  59. data/lib/nokogiri/xml/document.rb +30 -0
  60. data/lib/nokogiri/xml/dtd.rb +6 -0
  61. data/lib/nokogiri/xml/node.rb +195 -0
  62. data/lib/nokogiri/xml/node_set.rb +183 -0
  63. data/lib/nokogiri/xml/notation.rb +6 -0
  64. data/lib/nokogiri/xml/reader.rb +14 -0
  65. data/lib/nokogiri/xml/sax/document.rb +59 -0
  66. data/lib/nokogiri/xml/sax/parser.rb +33 -0
  67. data/lib/nokogiri/xml/sax.rb +9 -0
  68. data/lib/nokogiri/xml/syntax_error.rb +21 -0
  69. data/lib/nokogiri/xml/text.rb +6 -0
  70. data/lib/nokogiri/xml/xpath.rb +6 -0
  71. data/lib/nokogiri/xml/xpath_context.rb +14 -0
  72. data/lib/nokogiri/xml.rb +67 -0
  73. data/lib/nokogiri/xslt/stylesheet.rb +6 -0
  74. data/lib/nokogiri/xslt.rb +11 -0
  75. data/lib/nokogiri.rb +51 -0
  76. data/nokogiri.gemspec +34 -0
  77. data/test/css/test_nthiness.rb +159 -0
  78. data/test/css/test_parser.rb +224 -0
  79. data/test/css/test_tokenizer.rb +162 -0
  80. data/test/css/test_xpath_visitor.rb +54 -0
  81. data/test/files/staff.xml +59 -0
  82. data/test/files/staff.xslt +32 -0
  83. data/test/files/tlm.html +850 -0
  84. data/test/helper.rb +70 -0
  85. data/test/hpricot/files/basic.xhtml +17 -0
  86. data/test/hpricot/files/boingboing.html +2266 -0
  87. data/test/hpricot/files/cy0.html +3653 -0
  88. data/test/hpricot/files/immob.html +400 -0
  89. data/test/hpricot/files/pace_application.html +1320 -0
  90. data/test/hpricot/files/tenderlove.html +16 -0
  91. data/test/hpricot/files/uswebgen.html +220 -0
  92. data/test/hpricot/files/utf8.html +1054 -0
  93. data/test/hpricot/files/week9.html +1723 -0
  94. data/test/hpricot/files/why.xml +19 -0
  95. data/test/hpricot/load_files.rb +7 -0
  96. data/test/hpricot/test_alter.rb +67 -0
  97. data/test/hpricot/test_builder.rb +27 -0
  98. data/test/hpricot/test_parser.rb +423 -0
  99. data/test/hpricot/test_paths.rb +15 -0
  100. data/test/hpricot/test_preserved.rb +78 -0
  101. data/test/hpricot/test_xml.rb +30 -0
  102. data/test/html/sax/test_parser.rb +27 -0
  103. data/test/html/test_builder.rb +78 -0
  104. data/test/html/test_document.rb +86 -0
  105. data/test/test_convert_xpath.rb +180 -0
  106. data/test/test_nokogiri.rb +36 -0
  107. data/test/test_reader.rb +222 -0
  108. data/test/test_xslt_transforms.rb +29 -0
  109. data/test/xml/sax/test_parser.rb +93 -0
  110. data/test/xml/test_builder.rb +16 -0
  111. data/test/xml/test_cdata.rb +18 -0
  112. data/test/xml/test_document.rb +171 -0
  113. data/test/xml/test_dtd.rb +43 -0
  114. data/test/xml/test_node.rb +223 -0
  115. data/test/xml/test_node_set.rb +116 -0
  116. data/test/xml/test_text.rb +13 -0
  117. metadata +214 -0
@@ -0,0 +1,19 @@
1
+ <?xml version='1.0'?><rss xmlns:admin='http://webns.net/mvcb/' version='2.0' xmlns:sy='http://purl.org/rss/1.0/modules/syndication/' xmlns:dc='http://purl.org/dc/elements/1.1/' xmlns:rdf='http://www.w3.org/1999/02/22-rdf-syntax-ns#'>
2
+ <channel>
3
+ <title>why the lucky stiff</title>
4
+ <link>http://whytheluckystiff.net</link>
5
+ <description>hex-editing reality to give us infinite grenades!!</description>
6
+ <dc:language>en-us</dc:language>
7
+ <dc:creator/>
8
+ <dc:date>2007-01-16T22:39:04+00:00</dc:date>
9
+ <admin:generatorAgent rdf:resource='http://hobix.com/?v=0.4'/>
10
+ <sy:updatePeriod>hourly</sy:updatePeriod>
11
+ <sy:updateFrequency>1</sy:updateFrequency>
12
+ <sy:updateBase>2000-01-01T12:00+00:00</sy:updateBase>
13
+ <item><title>1.3</title><link>http://whytheluckystiff.net/quatrains/1.3.html</link><guid isPermaLink='false'>quatrains/1.3@http://whytheluckystiff.net</guid><dc:subject>quatrains</dc:subject><dc:subject>quatrains</dc:subject><dc:creator>why the lucky stiff</dc:creator><dc:date>2007-01-14T08:47:05+00:00</dc:date><description>&lt;blockquote&gt;
14
+ &lt;p&gt;That cadillac of yours and that driver of yours!&lt;br /&gt;You and your teacups rattling away in the back seat!&lt;br /&gt;You always took the mike, oh, and all those cowboys you shot!&lt;br /&gt;I held your hand! And I&amp;#8217;ll shoot a cowboy one day!&lt;/p&gt;
15
+ &lt;/blockquote&gt;
16
+ &lt;blockquote&gt;
17
+ &lt;p&gt;You said, &amp;#8220;Let&amp;#8217;s run into the woods like kids!&amp;#8221; &lt;br /&gt;You said, &amp;#8220;Let&amp;#8217;s rub our hands together super-hot!&amp;#8221; &lt;br /&gt;And we scalded the trees and left octagons, I think that was you and&lt;br /&gt;You threw parties on the roof!&lt;/p&gt;
18
+ &lt;/blockquote&gt;</description></item></channel>
19
+ </rss>
@@ -0,0 +1,7 @@
1
+ module TestFiles
2
+ Dir.chdir(File.dirname(__FILE__)) do
3
+ Dir['files/*.{html,xhtml,xml}'].each do |fname|
4
+ const_set fname[%r!/(\w+)\.\w+$!, 1].upcase, IO.read(fname)
5
+ end
6
+ end
7
+ end
@@ -0,0 +1,67 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestAlter < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def setup
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ end
10
+
11
+ def test_before
12
+ test0 = "<link rel='stylesheet' href='test0.css' />"
13
+ @basic.at("link").before(test0)
14
+ assert_equal 'test0.css', @basic.at("link").attributes['href']
15
+ end
16
+
17
+ def test_after
18
+ test_inf = "<link rel='stylesheet' href='test_inf.css' />"
19
+ @basic.search("link")[-1].after(test_inf)
20
+ assert_equal 'test_inf.css', @basic.search("link")[-1].attributes['href']
21
+ end
22
+
23
+ def test_wrap
24
+ ohmy = (@basic/"p.ohmy").wrap("<div id='wrapper'></div>")
25
+ assert_equal 'wrapper', ohmy[0].parent['id']
26
+ assert_equal 'ohmy', Nokogiri.Hpricot(@basic.to_html).at("#wrapper").children[0]['class']
27
+ end
28
+
29
+ def test_add_class
30
+ first_p = (@basic/"p:first").add_class("testing123")
31
+ assert first_p[0].get_attribute("class").split(" ").include?("testing123")
32
+ assert((Nokogiri.Hpricot(@basic.to_html)/"p:first")[0].attributes["class"].split(" ").include?("testing123"))
33
+ ####
34
+ # Modified. We do not support OB1 bug.
35
+ assert !(Nokogiri.Hpricot(@basic.to_html)/"p:gt(1)")[0].attributes["class"].split(" ").include?("testing123")
36
+ end
37
+
38
+ def test_change_attributes
39
+ all_ps = (@basic/"p").attr("title", "Some Title")
40
+ all_as = (@basic/"a").attr("href", "http://my_new_href.com")
41
+ all_lb = (@basic/"link").attr("href") { |e| e.name }
42
+ GC.start # try to shake out GC bugs with xpath and node sets.
43
+ assert_changed(@basic, "p", all_ps) {|p| p.attributes["title"] == "Some Title"}
44
+ assert_changed(@basic, "a", all_as) {|a| a.attributes["href"] == "http://my_new_href.com"}
45
+ assert_changed(@basic, "link", all_lb) {|a| a.attributes["href"] == "link" }
46
+ end
47
+
48
+ def test_remove_attr
49
+ all_rl = (@basic/"link").remove_attr("href")
50
+ assert_changed(@basic, "link", all_rl) { |link| link['href'].nil? }
51
+ end
52
+
53
+ def test_remove_class
54
+ all_c1 = (@basic/"p[@class*='last']").remove_class("last")
55
+ assert_changed(@basic, "p[@class*='last']", all_c1) { |p| p['class'] == 'final' }
56
+ end
57
+
58
+ def test_remove_all_classes
59
+ all_c2 = (@basic/"p[@class]").remove_class
60
+ assert_changed(@basic, "p[@class]", all_c2) { |p| p['class'].nil? }
61
+ end
62
+
63
+ def assert_changed original, selector, set, &block
64
+ assert set.all?(&block)
65
+ assert Nokogiri.Hpricot(original.to_html).search(selector).all?(&block)
66
+ end
67
+ end
@@ -0,0 +1,27 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+
3
+ class TestBuilder < Nokogiri::TestCase
4
+ ####
5
+ # Modified
6
+ def test_escaping_text
7
+ doc = Nokogiri.Hpricot() { b "<a\"b>" }
8
+ assert_equal "<b>&lt;a\"b&gt;</b>", doc.to_html.chomp
9
+ assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
10
+ end
11
+
12
+ ####
13
+ # Modified
14
+ def test_no_escaping_text
15
+ doc = Nokogiri.Hpricot() { div.test.me! { text "<a\"b>" } }
16
+ assert_equal %{<div class="test" id="me">&lt;a"b&gt;</div>},
17
+ doc.to_html.chomp
18
+ assert_equal %{&lt;a\"b&gt;}, doc.at("text()").to_s
19
+ end
20
+
21
+ ####
22
+ # Modified
23
+ def test_latin1_entities
24
+ doc = Nokogiri.Hpricot() { b "\200\225" }
25
+ assert_equal "<b>&#21;</b>", doc.to_html.chomp
26
+ end
27
+ end
@@ -0,0 +1,423 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def test_set_attr
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ @basic.search('//p').set('class', 'para')
10
+ assert_equal 4, @basic.search('//p').length
11
+ assert_equal 4, @basic.search('//p').find_all { |x| x['class'] == 'para' }.length
12
+ end
13
+
14
+ # Test creating a new element
15
+ def test_new_element
16
+ elem = Hpricot::Elem.new(Hpricot::STag.new('form'))
17
+ assert_not_nil(elem)
18
+ assert_not_nil(elem.attributes)
19
+ end
20
+
21
+ def test_scan_text
22
+ assert_equal 'FOO', Hpricot.make("FOO").first.content
23
+ end
24
+
25
+ def test_filter_by_attr
26
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
27
+
28
+ # this link is escaped in the doc
29
+ link = 'http://www.youtube.com/watch?v=TvSNXyNw26g&search=chris%20ware'
30
+ assert_equal link, @boingboing.at("a[@href='#{link}']")['href']
31
+ end
32
+
33
+ def test_filter_contains
34
+ @basic = Hpricot.parse(TestFiles::BASIC)
35
+ assert_equal '<title>Sample XHTML</title>', @basic.search("title:contains('Sample')").to_s
36
+ end
37
+
38
+ def test_get_element_by_id
39
+ @basic = Hpricot.parse(TestFiles::BASIC)
40
+ assert_equal 'link1', @basic.get_element_by_id('link1')['id']
41
+ assert_equal 'link1', @basic.get_element_by_id('body1').get_element_by_id('link1').get_attribute('id')
42
+ end
43
+
44
+ def test_get_element_by_tag_name
45
+ @basic = Hpricot.parse(TestFiles::BASIC)
46
+ assert_equal 'link1', @basic.get_elements_by_tag_name('a')[0].get_attribute('id')
47
+ assert_equal 'link1', @basic.get_elements_by_tag_name('body')[0].get_element_by_id('link1').get_attribute('id')
48
+ end
49
+
50
+ def test_output_basic
51
+ @basic = Hpricot.parse(TestFiles::BASIC)
52
+ @basic2 = Hpricot.parse(@basic.inner_html)
53
+ scan_basic @basic2
54
+ end
55
+
56
+ def test_scan_basic
57
+ @basic = Hpricot.parse(TestFiles::BASIC)
58
+ scan_basic @basic
59
+ end
60
+
61
+ def scan_basic doc
62
+ ####
63
+ # Modified: asserting kind is not duck typey
64
+ #assert_kind_of Hpricot::XMLDecl, doc.children.first
65
+ assert_not_equal doc.children.first.to_s, doc.children[1].to_s
66
+ assert_equal 'link1', doc.at('#link1')['id']
67
+ assert_equal 'link1', doc.at("p a")['id']
68
+ assert_equal 'link1', (doc/:p/:a).first['id']
69
+ assert_equal 'link1', doc.search('p').at('a').get_attribute('id')
70
+
71
+ ### Modified: We're not supporting the filter() function
72
+ #assert_equal 'link2', (doc/'p').filter('.ohmy').search('a').first.get_attribute('id')
73
+ #assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
74
+ #assert_equal (doc/'p')[2], (doc/'p').filter(':nth(2)')[0]
75
+ #assert_equal (doc/'p')[2], (doc/'p').filter('[3]')[0]
76
+ #assert_equal 4, (doc/'p').filter('*').length
77
+ #assert_equal 4, (doc/'p').filter('* *').length
78
+ #eles = (doc/'p').filter('.ohmy')
79
+ #assert_equal 1, eles.length
80
+ #assert_equal 'ohmy', eles.first.get_attribute('class')
81
+ assert_equal 3, (doc/'p:not(.ohmy)').length
82
+
83
+ ### Modified: We're not supporting the not() function
84
+ #assert_equal 3, (doc/'p').not('.ohmy').length
85
+ #assert_equal 3, (doc/'p').not(eles.first).length
86
+ #assert_equal 2, (doc/'p').filter('[@class]').length
87
+ assert_equal 'last final', (doc/'p[@class~="final"]').first.get_attribute('class')
88
+ #assert_equal 1, (doc/'p').filter('[@class~="final"]').length
89
+ assert_equal 2, (doc/'p > a').length
90
+ assert_equal 1, (doc/'p.ohmy > a').length
91
+ assert_equal 2, (doc/'p / a').length
92
+ assert_equal 2, (doc/'link ~ link').length
93
+ assert_equal 3, (doc/'title ~ link').length
94
+ assert_equal 5, (doc/"//p/text()").length
95
+ assert_equal 6, (doc/"//p[a]//text()").length
96
+ assert_equal 2, (doc/"//p/a/text()").length
97
+ end
98
+
99
+ def test_positional
100
+ h = Nokogiri.Hpricot( "<div><br/><p>one</p><p>two</p></div>" )
101
+ assert_equal "<p>one</p>", h.search("div/p:eq(1)").to_s # MODIFIED: eq(0) -> eq(1), and removed initial '//'
102
+ assert_equal "<p>one</p>", h.search("div/p:first").to_s # MODIFIED: removed initial '//'
103
+ assert_equal "<p>one</p>", h.search("div/p:first()").to_s # MODIFIED: removed initial '//'
104
+ end
105
+
106
+ def test_pace
107
+ doc = Nokogiri.Hpricot(TestFiles::PACE_APPLICATION)
108
+ assert_equal 'get', doc.at('form[@name=frmSect11]')['method']
109
+ # assert_equal '2', doc.at('#hdnSpouse')['value']
110
+ end
111
+
112
+ def test_scan_boingboing
113
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
114
+ assert_equal 60, (@boingboing/'p.posted').length
115
+ assert_equal 1, @boingboing.search("//a[@name='027906']").length
116
+ ### MODIFIED: libxml wraps the contents of <script> in a CDATA tag, so we won't be able to parse comments.
117
+ # assert_equal 10, @boingboing.search("script comment()").length
118
+ assert_equal 3, @boingboing.search("a[text()*='Boing']").length
119
+ assert_equal 1, @boingboing.search("h3[text()='College kids reportedly taking more smart drugs']").length
120
+ assert_equal 0, @boingboing.search("h3[text()='College']").length
121
+ assert_equal 60, @boingboing.search("h3").length
122
+ assert_equal 59, @boingboing.search("h3[text()!='College kids reportedly taking more smart drugs']").length
123
+ assert_equal 17, @boingboing.search("h3[text()$='s']").length
124
+ ### Modified. Hpricot is wrong
125
+ #assert_equal 129, @boingboing.search("p[text()]").length
126
+ if Nokogiri::LIBXML_VERSION == '2.6.16'
127
+ assert_equal 111, @boingboing.search("p[text()]").length
128
+ else
129
+ assert_equal 110, @boingboing.search("p[text()]").length
130
+ end
131
+ assert_equal 211, @boingboing.search("p").length
132
+ end
133
+
134
+ def test_reparent
135
+ doc = Nokogiri.Hpricot(%{<div id="blurb_1"></div>})
136
+ div1 = doc.search('#blurb_1')
137
+ div1.before('<div id="blurb_0"></div>')
138
+
139
+ div0 = doc.search('#blurb_0')
140
+ div0.before('<div id="blurb_a"></div>')
141
+
142
+ assert_equal 'div', doc.at('#blurb_1').name
143
+ end
144
+
145
+ def test_siblings
146
+ @basic = Hpricot.parse(TestFiles::BASIC)
147
+ t = @basic.at(:title)
148
+ e = t.next_sibling
149
+ assert_equal 'test1.css', e['href']
150
+ assert_equal 'title', e.previous_sibling.name
151
+ end
152
+
153
+ def test_css_negation
154
+ @basic = Hpricot.parse(TestFiles::BASIC)
155
+ assert_equal 3, (@basic/'p:not(.final)').length
156
+ end
157
+
158
+ def test_remove_attribute
159
+ @basic = Hpricot.parse(TestFiles::BASIC)
160
+ (@basic/:p).each { |ele| ele.remove_attribute('class') }
161
+ assert_equal 0, (@basic/'p[@class]').length
162
+ end
163
+
164
+ ##
165
+ # Modified: hpricot is giving incorrect counts. Libxml gets it right.
166
+ def test_abs_xpath
167
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
168
+ assert_equal 60, @boingboing.search("/html/body//p[@class='posted']").length
169
+ assert_equal 60, @boingboing.search("/*/body//p[@class='posted']").length
170
+ assert_equal 18, @boingboing.search("//script").length
171
+ divs = @boingboing.search("//script/../div")
172
+ assert_equal 2, divs.length # hpricot says this is 1, but that's wrong.
173
+ imgs = @boingboing.search('//div/p/a/img')
174
+ assert_equal 12, imgs.length # hpricot says this is 15, but that's wrong.
175
+ assert_equal 16, @boingboing.search('//div').search('p/a/img').length
176
+ assert imgs.all? { |x| x.name == 'img' }
177
+ end
178
+
179
+ def test_predicates
180
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
181
+ assert_equal 2, @boingboing.search('//link[@rel="alternate"]').length
182
+ p_imgs = @boingboing.search('//div/p[/a/img]')
183
+ #assert_equal 15, p_imgs.length
184
+ assert p_imgs.all? { |x| x.name == 'p' }
185
+ p_imgs = @boingboing.search('//div/p[a/img]')
186
+ assert_equal 12, p_imgs.length
187
+ assert p_imgs.all? { |x| x.name == 'p' }
188
+ assert_equal 1, @boingboing.search('//input[@checked]').length
189
+ end
190
+
191
+ def test_tag_case
192
+ @tenderlove = Hpricot.parse(TestFiles::TENDERLOVE)
193
+ assert_equal 2, @tenderlove.search('//a').length
194
+ assert_equal 3, @tenderlove.search('//area').length
195
+ assert_equal 2, @tenderlove.search('//meta').length
196
+ end
197
+
198
+ def test_alt_predicates
199
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
200
+ assert_equal 2, @boingboing.search('table/tr:last').length # MODIFIED to not have '//' prefix
201
+
202
+ @basic = Hpricot.parse(TestFiles::BASIC)
203
+ ##
204
+ # MODIFIED:
205
+ # hpricot has an off-by-one bug eith eq-and-friends.
206
+ assert_equal "<p>The third paragraph</p>",
207
+ @basic.search('p:eq(3)').to_html # under Hpricot this was eq(2)
208
+ ##
209
+ # MODIFIED: to be blank-agnostic, because Nokogiri's to_html is slightly different compared to Hpricot.
210
+ assert_equal '<p class="last final"> <b>THE FINAL PARAGRAPH</b> </p>',
211
+ @basic.search('p:last').to_html.gsub(/\s+/,' ')
212
+ assert_equal 'last final', @basic.search('p:last-of-type').first.get_attribute('class') # MODIFIED to not have '//' prefix
213
+ end
214
+
215
+ def test_insert_after # ticket #63
216
+ doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
217
+ (doc/'div').each do |element|
218
+ element.after('<p>Paragraph 1</p><p>Paragraph 2</p>')
219
+ end
220
+ assert_match '<div id="a-div"></div><p>Paragraph 1</p><p>Paragraph 2</p>',
221
+ doc.to_html.gsub(/\n/, '')
222
+ end
223
+
224
+ def test_insert_before # ticket #61
225
+ doc = Nokogiri.Hpricot('<html><body><div id="a-div"></div></body></html>')
226
+ (doc/'div').each do |element|
227
+ element.before('<p>Paragraph 1</p><p>Paragraph 2</p>')
228
+ end
229
+ assert_match '<p>Paragraph 1</p><p>Paragraph 2</p><div id="a-div"></div>',
230
+ doc.to_html.gsub(/\n/, '')
231
+ end
232
+
233
+ def test_many_paths
234
+ @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
235
+ assert_equal 62, @boingboing.search('p.posted, link[@rel="alternate"]').length
236
+ ###
237
+ # Modified. I don't want to support this syntax. Just use a comma.
238
+ #assert_equal 20, @boingboing.search('//div/p[a/img]|//link[@rel="alternate"]').length
239
+ end
240
+
241
+ ####
242
+ # Modified. Epic Fail. We're on the duck type train folks.
243
+ #def test_stacked_search
244
+ # @boingboing = Hpricot.parse(TestFiles::BOINGBOING)
245
+ # assert_kind_of Hpricot::Elements, @boingboing.search('//div/p').search('a img')
246
+ #end
247
+
248
+ def test_class_search
249
+ # test case sent by Chih-Chao Lam
250
+ # Modified. libxml corrects this differently than hpricot
251
+ doc = Nokogiri.Hpricot("<div class=xyz '>abc</div>")
252
+ assert_equal 1, doc.search(".xyz").length
253
+
254
+ doc = Nokogiri.Hpricot("<div class=xyz>abc</div><div class=abc>xyz</div>")
255
+ assert_equal 1, doc.search(".xyz").length
256
+ assert_equal 4, doc.search("*").length
257
+ end
258
+
259
+ def test_kleene_star
260
+ # bug noticed by raja bhatia
261
+ doc = Nokogiri.Hpricot("<span class='small'>1</span><div class='large'>2</div><div class='small'>3</div><span class='blue large'>4</span>")
262
+ assert_equal 2, doc.search("*[@class*='small']").length
263
+ assert_equal 2, doc.search("*.small").length
264
+ assert_equal 2, doc.search(".small").length
265
+ assert_equal 2, doc.search(".large").length
266
+ end
267
+
268
+ def test_empty_comment
269
+ doc = Nokogiri.Hpricot("<p><!----></p>")
270
+ doc = doc.search('//body').first
271
+ assert doc.children[0].children[0].comment?
272
+
273
+ doc = Nokogiri.Hpricot("<p><!-- --></p>")
274
+ doc = doc.search('//body').first
275
+ assert doc.children[0].children[0].comment?
276
+ end
277
+
278
+ def test_body_newlines
279
+ @immob = Hpricot.parse(TestFiles::IMMOB)
280
+ body = @immob.at(:body)
281
+ {'background' => '', 'bgcolor' => '#ffffff', 'text' => '#000000', 'marginheight' => '10',
282
+ 'marginwidth' => '10', 'leftmargin' => '10', 'topmargin' => '10', 'link' => '#000066',
283
+ 'alink' => '#ff6600', 'hlink' => "#ff6600", 'vlink' => "#000000"}.each do |k, v|
284
+ assert_equal v, body[k]
285
+ end
286
+ end
287
+
288
+ def test_nested_twins
289
+ @doc = Nokogiri.Hpricot("<div>Hi<div>there</div></div>")
290
+ assert_equal 1, (@doc/"div div").length
291
+ end
292
+
293
+ def test_wildcard
294
+ @basic = Hpricot.parse(TestFiles::BASIC)
295
+ assert_equal 3, (@basic/"*[@id]").length
296
+ assert_equal 3, (@basic/"//*[@id]").length
297
+ end
298
+
299
+ def test_javascripts
300
+ @immob = Hpricot.parse(TestFiles::IMMOB)
301
+ assert_equal 3, (@immob/:script)[0].inner_html.scan(/<LINK/).length
302
+ end
303
+
304
+ ####
305
+ # Modified. This test passes with later versions of libxml
306
+ def test_nested_scripts
307
+ @week9 = Hpricot.parse(TestFiles::WEEK9)
308
+ unless Nokogiri::LIBXML_VERSION == '2.6.16'
309
+ assert_equal 14, (@week9/"a").find_all { |x| x.inner_html.include? "GameCenter" }.length
310
+ end
311
+ end
312
+
313
+ def test_uswebgen
314
+ @uswebgen = Hpricot.parse(TestFiles::USWEBGEN)
315
+ # sent by brent beardsley, nokogiri 0.3 had problems with all the links.
316
+ assert_equal 67, (@uswebgen/:a).length
317
+ end
318
+
319
+ def test_mangled_tags
320
+ [%{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
321
+ %{<html><form name='loginForm' ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'></form></html>},
322
+ %{<html><form name='loginForm'?URL= ?URL= method='post' action='/units/a/login/1,13088,779-1,00.html'?URL=></form></html>},
323
+ %{<html><form name='loginForm' method='post' action='/units/a/login/1,13088,779-1,00.html' ?URL=></form></html>}].
324
+ each do |str|
325
+ doc = Nokogiri.Hpricot(str)
326
+ assert_equal 1, (doc/:form).length
327
+ assert_equal '/units/a/login/1,13088,779-1,00.html', doc.at("form")['action']
328
+ end
329
+ end
330
+
331
+ ####
332
+ # Modified. Added question. Don't care.
333
+ def test_procins
334
+ doc = Nokogiri.Hpricot("<?php print('hello') ?>\n<?xml blah='blah'?>")
335
+ assert_equal "php", doc.children[1].target
336
+ assert_equal "blah='blah'?", doc.children[2].content
337
+ end
338
+
339
+ ####
340
+ # Altered... libxml does not get a buffer error
341
+ def test_buffer_error
342
+ assert_nothing_raised {
343
+ Nokogiri.Hpricot(%{<p>\n\n<input type="hidden" name="__VIEWSTATE" value="#{(("X" * 2000) + "\n") * 22}" />\n\n</p>})
344
+ }
345
+ end
346
+
347
+ def test_youtube_attr
348
+ str = <<-edoc
349
+ <html><body>
350
+ Lorem ipsum. Jolly roger, ding-dong sing-a-long
351
+ <object width="425" height="350">
352
+ <param name="movie" value="http://www.youtube.com/v/NbDQ4M_cuwA"></param>
353
+ <param name="wmode" value="transparent"></param>
354
+ <embed src="http://www.youtube.com/v/NbDQ4M_cuwA"
355
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
356
+ </embed>
357
+ </object>
358
+ Check out my posting, I have bright mice in large clown cars.
359
+ <object width="425" height="350">
360
+ <param name="movie" value="http://www.youtube.com/v/foobar"></param>
361
+ <param name="wmode" value="transparent"></param>
362
+ <embed src="http://www.youtube.com/v/foobar"
363
+ type="application/x-shockwave-flash" wmode="transparent" width="425" height="350">
364
+ </embed>
365
+ </object>
366
+ </body></html?
367
+ edoc
368
+ doc = Nokogiri.Hpricot(str)
369
+ assert_equal "http://www.youtube.com/v/NbDQ4M_cuwA",
370
+ doc.at("//object/param[@value='http://www.youtube.com/v/NbDQ4M_cuwA']")['value']
371
+ end
372
+
373
+ # ticket #84 by jamezilla
374
+ def test_screwed_xmlns
375
+ doc = Nokogiri.Hpricot(<<-edoc)
376
+ <?xml:namespace prefix = cwi />
377
+ <html><body>HAI</body></html>
378
+ edoc
379
+ assert_equal "HAI", doc.at("body").inner_text
380
+ end
381
+
382
+ # Reported by Jonathan Nichols on the Hpricot list (24 May 2007)
383
+ # MODIFIED: This is an issue with libxml which we cannot deal with....
384
+ #def test_self_closed_form
385
+ # doc = Nokogiri.Hpricot(<<-edoc)
386
+ # <body>
387
+ # <form action="/loginRegForm" name="regForm" method="POST" />
388
+ # <input type="button">
389
+ # </form>
390
+ # </body>
391
+ # edoc
392
+ # assert_equal "button", doc.at("//form/input")['type']
393
+ #end
394
+
395
+ def test_filters
396
+ @basic = Hpricot.parse(TestFiles::BASIC)
397
+ ##
398
+ # MODIFIED:
399
+ # Hpricot considers nodes with text-only (but no child tags) to be empty.
400
+ # Nokogiri considers that any content makes a parent.
401
+ assert_equal 1, (@basic/"title:parent").size # so this was 0 under Hpricot
402
+ assert_equal 4, (@basic/"p:parent").size
403
+ assert_equal 0, (@basic/"title:empty").size
404
+ assert_equal 3, (@basic/"link:empty").size
405
+ end
406
+
407
+ def test_keep_cdata
408
+ str = %{<script> /*<![CDATA[*/
409
+ /*]]>*/ </script>}
410
+ assert_match str, Nokogiri.Hpricot(str).to_html
411
+ end
412
+
413
+ def test_namespace
414
+ chunk = <<-END
415
+ <a xmlns:t="http://www.nexopia.com/dev/template">
416
+ <t:sam>hi </t:sam>
417
+ </a>
418
+ END
419
+ doc = Hpricot::XML(chunk)
420
+ assert((doc/"//t:sam").size > 0) # at least this should probably work
421
+ # assert (doc/"//sam").size > 0 # this would be nice
422
+ end
423
+ end
@@ -0,0 +1,15 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+
7
+ def test_roundtrip
8
+ @basic = Hpricot.parse(TestFiles::BASIC)
9
+ %w[link link[2] body #link1 a p.ohmy].each do |css_sel|
10
+ ele = @basic.at(css_sel)
11
+ assert_equal ele, @basic.at(ele.css_path), ele.css_path
12
+ assert_equal ele, @basic.at(ele.xpath), ele.xpath
13
+ end
14
+ end
15
+ end
@@ -0,0 +1,78 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestPreserved < Nokogiri::TestCase
5
+ def assert_roundtrip str
6
+ doc = Nokogiri.Hpricot(str)
7
+ yield doc if block_given?
8
+ str2 = doc.to_original_html
9
+ [*str].zip([*str2]).each do |s1, s2|
10
+ assert_equal s1, s2
11
+ end
12
+ end
13
+
14
+ def assert_html str1, str2
15
+ doc = Nokogiri.Hpricot(str2)
16
+ yield doc if block_given?
17
+ assert_equal str1, doc.to_original_html
18
+ end
19
+
20
+ ####
21
+ # Not supporting to_original_html
22
+ #def test_simple
23
+ # str = "<p>Hpricot is a <b>you know <i>uh</b> fine thing.</p>"
24
+ # assert_html str, str
25
+ # assert_html "<p class=\"new\">Hpricot is a <b>you know <i>uh</b> fine thing.</p>", str do |doc|
26
+ # (doc/:p).set('class', 'new')
27
+ # end
28
+ #end
29
+
30
+ ####
31
+ # Not supporting to_original_html
32
+ #def test_parent
33
+ # str = "<html><base href='/'><head><title>Test</title></head><body><div id='wrap'><p>Paragraph one.</p><p>Paragraph two.</p></div></body></html>"
34
+ # assert_html str, str
35
+ # assert_html "<html><base href='/'><body><div id=\"all\"><div><p>Paragraph one.</p></div><div><p>Paragraph two.</p></div></div></body></html>", str do |doc|
36
+ # (doc/:head).remove
37
+ # (doc/:div).set('id', 'all')
38
+ # (doc/:p).wrap('<div></div>')
39
+ # end
40
+ #end
41
+
42
+ # Not really a valid test. If libxml can figure out the encoding of the file,
43
+ # it will use that encoding, otherwise it uses the &#xwhatever so that no data
44
+ # is lost.
45
+ #
46
+ # libxml on OSX can't figure out the encoding, so this tests passes. linux
47
+ # can figure out the encoding, so it fails.
48
+ #def test_escaping_of_contents
49
+ # doc = Nokogiri.Hpricot(TestFiles::BOINGBOING)
50
+ # assert_equal "Fukuda&#x2019;s Automatic Door opens around your body as you pass through it. The idea is to save energy and keep the room clean.", doc.at("img[@alt='200606131240']").next.to_s.strip
51
+ #end
52
+
53
+ ####
54
+ # Modified. No.
55
+ #def test_files
56
+ # assert_roundtrip TestFiles::BASIC
57
+ # assert_roundtrip TestFiles::BOINGBOING
58
+ # assert_roundtrip TestFiles::CY0
59
+ #end
60
+
61
+ ####
62
+ # Modified.. When calling "to_html" on the document, proper html/doc tags
63
+ # are produced too.
64
+ def test_escaping_of_attrs
65
+ # ampersands in URLs
66
+ str = %{<a href="http://google.com/search?q=nokogiri&amp;l=en">Google</a>}
67
+ link = (doc = Nokogiri.Hpricot(str)).at(:a)
68
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link['href']
69
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link.attributes['href']
70
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link.get_attribute('href')
71
+ assert_equal "http://google.com/search?q=nokogiri&l=en", link.raw_attributes['href']
72
+ assert_equal str, link.to_html
73
+
74
+ # alter the url
75
+ link['href'] = "javascript:alert(\"AGGA-KA-BOO!\")"
76
+ assert_equal %{<a href="javascript:alert(&quot;AGGA-KA-BOO!&quot;)">Google</a>}, link.to_html.gsub(/%22/, '&quot;')
77
+ end
78
+ end
@@ -0,0 +1,30 @@
1
+ require File.expand_path(File.join(File.dirname(__FILE__), '..', "helper"))
2
+ require File.join(File.dirname(__FILE__),"load_files")
3
+
4
+ class TestParser < Nokogiri::TestCase
5
+ include Nokogiri
6
+ # normally, the link tags are empty HTML tags.
7
+ # contributed by laudney.
8
+ def test_normally_empty
9
+ doc = Hpricot::XML("<rss><channel><title>this is title</title><link>http://fake.com</link></channel></rss>")
10
+ assert_equal "this is title", (doc/:rss/:channel/:title).text
11
+ assert_equal "http://fake.com", (doc/:rss/:channel/:link).text
12
+ end
13
+
14
+ # make sure XML doesn't get downcased
15
+ def test_casing
16
+ doc = Hpricot::XML(TestFiles::WHY)
17
+
18
+ ### Modified.
19
+ # I don't want to differentiate pseudo classes from namespaces. If
20
+ # you're parsing xml, use XPath. That's what its for. :-P
21
+ assert_equal "hourly", (doc.at "//sy:updatePeriod").content
22
+ assert_equal 1, (doc/"guid[@isPermaLink]").length
23
+ end
24
+
25
+ # be sure tags named "text" are ok
26
+ def test_text_tags
27
+ doc = Hpricot::XML("<feed><title>City Poisoned</title><text>Rita Lee has poisoned Brazil.</text></feed>")
28
+ assert_equal "City Poisoned", (doc/"title").text
29
+ end
30
+ end