rubyful_soup_2011 0.1.5

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,441 @@
1
+ #Unit tests for Rubyful Soup.
2
+ #
3
+ #These tests make sure the Rubyful Soup works as it should. If you
4
+ #find a bug in Rubyful Soup, the best way to express it is as a test
5
+ #case like this that fails.
6
+
7
+ require 'test/unit'
8
+ require 'rubygems'
9
+ require 'rubyful_soup'
10
+
11
+ class SoupTest < Test::Unit::TestCase
12
+
13
+ #Parse the given text and make sure its string rep is the other
14
+ #given text.
15
+ def assert_soup_equals(toParse, rep=nil, c=BeautifulStoneSoup)
16
+ if rep == nil
17
+ rep = toParse
18
+ end
19
+ assert_equal(c.new(toParse).to_s(false), rep)
20
+ end
21
+
22
+ #Null test to shut the compiler up.
23
+ def test_null
24
+ end
25
+
26
+ end
27
+
28
+ #Tests the various ways of fetching tags from a soup.
29
+ class ToteThatTag < SoupTest
30
+
31
+ def setup
32
+ ml = %{
33
+ <a id="x">1</a>
34
+ <a id="a">2</a>
35
+ <b id="b">3</b>
36
+ <b id="x">4</b>
37
+ <abc:d width="100">5</abc:d>}
38
+ @soup = BeautifulStoneSoup.new(ml)
39
+ end
40
+
41
+ def test_fetch_by_name
42
+ matching = @soup.find_all('a')
43
+ assert_equal(matching.length, 2)
44
+ assert_equal(matching[0].name, 'a')
45
+ assert_equal(matching[0], @soup.find('a'))
46
+ assert_equal(@soup.find('abc:d').contents.length, 1)
47
+
48
+ firstB = @soup.find('b')
49
+ nextB = firstB.find_next('b')
50
+ assert_equal(nextB.contents[0], '4')
51
+ assert_equal(nextB['id'], 'x')
52
+
53
+ end
54
+
55
+ def test_fetch_by_block
56
+
57
+ a = @soup.find_all('a')
58
+ b = @soup.find_all do |x|
59
+ x.is_a? Tag and x.name == 'a'
60
+ end
61
+ assert_equal(a,b)
62
+
63
+ a = @soup.find_text('3')
64
+ b = @soup.find_text do |x|
65
+ x.is_a? NavigableString and x == '3'
66
+ end
67
+ assert_equal(a,b)
68
+
69
+ matching = @soup.find_all do |x|
70
+ x.respond_to?('name') and x.name == x['id']
71
+ end
72
+ assert_equal(matching.length, 2)
73
+ assert_equal(matching[0].name, 'a')
74
+ end
75
+
76
+ def test_fetch_by_attribute
77
+ matching = @soup.find_all(nil, :attrs=>{'id' => 'x'})
78
+ assert_equal(matching.length, 2)
79
+ assert_equal(matching[0].name, 'a')
80
+ assert_equal(matching[1].name, 'b')
81
+
82
+ assert_equal(@soup.find_all(nil, :attrs=>{'id' => nil}).length, 1)
83
+ assert_equal(@soup.find_all(nil, :attrs=>{'id' => nil}).length, 1)
84
+
85
+ assert_equal(@soup.find_all(nil, :attrs=>{'width' => 100}).length, 1)
86
+ end
87
+
88
+ def test_tag_name_as_method
89
+ firstB = @soup.find('b')
90
+ assert_equal(firstB, @soup.b)
91
+ assert_equal(firstB, @soup.b_tag)
92
+ end
93
+
94
+ def test_fetch_by_list
95
+ matching = @soup.find_all(['a', 'abc:d'])
96
+ assert_equal(matching.length, 3)
97
+ end
98
+
99
+ def test_fetch_by_hash
100
+ matching = @soup.find_all({'a' => true, 'b' => true})
101
+ assert_equal(matching.length, 4)
102
+ end
103
+
104
+ def test_fetch_by_re
105
+ r = /a.*/
106
+ assert_equal(@soup.find_all(r).length, 3)
107
+ end
108
+
109
+ def test_fetch_by_method
110
+ proc = Proc.new { |x| return x.name == x['id'] }
111
+ matching = @soup.find_all(proc)
112
+ assert_equal(matching.length, 2)
113
+ assert_equal(matching[0].name, 'a')
114
+ end
115
+
116
+ end
117
+
118
+ #Testing the integrity of the parse tree.
119
+ class FollowThatTag < SoupTest
120
+
121
+ @@PROXIMITY_TEST = BeautifulStoneSoup.new('<b id="1"><b id="2"><b id="3"><b id="4">')
122
+
123
+ @@SIBLING_TEST = BeautifulStoneSoup.new('<blockquote id="1"><blockquote id="1.1"></blockquote></blockquote><blockquote id="2"><blockquote id="2.1"></blockquote></blockquote><blockquote id="3"><blockquote id="3.1"></blockquote></blockquote><blockquote id="4">')
124
+
125
+ def test_parents
126
+ soup = BeautifulSoup.new('<ul id="foo"></ul><ul id="foo"><ul><ul id="foo" a="b"><b>Blah</b></ul></ul></ul>')
127
+ b = soup.find('b')
128
+ assert_equal(b.find_parents('ul', :attrs=>{'id' => 'foo'}).length, 2)
129
+ assert_equal(b.find_parent('ul')['a'], 'b')
130
+ end
131
+
132
+ def test_next_sibling
133
+ soup = @@SIBLING_TEST
134
+ tag = 'blockquote'
135
+ b = soup.find(tag, :attrs=>{'id' => 2})
136
+ assert_equal(b.find_next(tag)['id'], '2.1')
137
+ assert_equal(b.find_next_sibling(tag)['id'], '3')
138
+ assert_equal(b.find_next_sibling(tag)['id'], '3')
139
+ assert_equal(b.find_next_siblings(tag).length, 2)
140
+ assert_equal(b.find_next_siblings(tag, :attrs=>{'id' => 4}).length, 1)
141
+ end
142
+
143
+ def test_previous_sibling
144
+ soup = @@SIBLING_TEST
145
+ tag = 'blockquote'
146
+ b = soup.find(tag, :attrs=>{'id' => 3})
147
+ assert_equal(b.find_previous(tag)['id'], '2.1')
148
+ assert_equal(b.find_previous_sibling(tag)['id'], '2')
149
+ assert_equal(b.find_previous_sibling(tag)['id'], '2')
150
+ assert_equal(b.find_previous_siblings(tag).length, 2)
151
+ assert_equal(b.find_previous_siblings(tag, :attrs=>{'id' => 1}).length, 1)
152
+ end
153
+
154
+ def test_text_navigation
155
+ soup = BeautifulSoup.new('Foo<b>Bar</b><i id="1"><b>Baz<br />Blee<hr id="1"/></b></i>Blargh')
156
+ baz = soup.find_text('Baz')
157
+ assert_equal(baz.find_parent("i")['id'], '1')
158
+ assert_equal(baz.find_next(nil, :text=> 'Blee'), 'Blee')
159
+ assert_equal(baz.find_next_sibling(nil, :text=>'Blee'), 'Blee')
160
+ assert_equal(baz.find_next_sibling(nil, :text=>'Blargh'), nil)
161
+ assert_equal(baz.find_next_sibling('hr')['id'], '1')
162
+ end
163
+
164
+ end
165
+
166
+ #Tests the nextSibling and previousSibling navigation.
167
+ class SiblingRivalry < SoupTest
168
+
169
+ def test_siblings
170
+ soup = BeautifulSoup.new("<ul><li>1<p>A</p>B</li><li>2</li><li>3</li></ul>")
171
+ second_li = soup.find('li').next_sibling
172
+ assert_equal(second_li.name, 'li')
173
+ assert_equal(second_li.string, '2')
174
+ assert_equal(soup.find_text('1').next_sibling.name, 'p')
175
+ assert_equal(soup.find('p').next_sibling, 'B')
176
+ assert_equal(soup.find('p').next_sibling.previous_sibling.next_sibling,
177
+ 'B')
178
+ end
179
+ end
180
+
181
+ #Tests the various built-in functions of Tag objects.
182
+ class TagsAreObjectsToo < SoupTest
183
+
184
+ @@SOUP = BeautifulSoup.new('<top id="1">1<b>2</b>3</top>')
185
+
186
+ def test_length
187
+ assert_equal(@@SOUP.top.length, 3)
188
+ end
189
+
190
+ def test_hash_lookup
191
+ assert_equal(@@SOUP.top['id'], "1")
192
+ end
193
+
194
+ def test_iterator
195
+ bucket = []
196
+ @@SOUP.top.each do |x|
197
+ bucket << x
198
+ end
199
+ assert_equal(bucket.length, 3)
200
+ assert_equal(bucket[2], "3")
201
+ end
202
+
203
+ end
204
+
205
+ #Tests the use of 'string' as an alias for a tag's only content.
206
+ class StringEmUp < SoupTest
207
+
208
+ def test_string
209
+ s = BeautifulSoup.new('<b>foo</b>')
210
+ assert_equal(s.b.string, 'foo')
211
+ end
212
+
213
+ def test_lack_of_string
214
+ s = BeautifulSoup.new("<b>f<i>e</i>o</b>")
215
+ self.assert_equal(s.b.string, nil)
216
+ end
217
+ end
218
+
219
+ #Tests the limit argument.
220
+ class ThatsMyLimit < SoupTest
221
+
222
+ def test_basic_limits
223
+ s = BeautifulSoup.new('<br id="1" /><br id="1" /><br id="1" /><br id="1" />')
224
+ assert_equal(s.find_all('br').length, 4)
225
+ assert_equal(s.find_all('br', :limit=> 2).length, 2)
226
+ end
227
+ end
228
+
229
+ #Testing the modification of the tree.
230
+ class WriteOnlyCode < SoupTest
231
+
232
+ def test_replace_contents
233
+ soup = BeautifulSoup.new('<a>foo</a>')
234
+ soup.a.contents[0] = (NavigableString.new('bar'))
235
+ assert_equal(soup.render_contents, '<a>bar</a>')
236
+ end
237
+
238
+ def test_modify_attributes
239
+ soup = BeautifulSoup.new('<a id="1"></a>')
240
+ first_a = soup.find('a')
241
+
242
+ first_a['id'] = 2
243
+ assert_equal(soup.render_contents, '<a id="2"></a>')
244
+ first_a['id'] = nil
245
+ assert_equal(soup.render_contents, '<a></a>')
246
+
247
+ first_a['id2'] = 'foo'
248
+ assert_equal(soup.render_contents, '<a id2="foo"></a>')
249
+ first_a.delete('id2')
250
+ assert_equal(soup.render_contents, '<a></a>')
251
+ end
252
+
253
+ #Makes sure tags don't step on each others' toes.
254
+ def test_new_tag_
255
+ soup = BeautifulSoup.new('')
256
+ a = Tag.new(soup, 'a')
257
+ ol = Tag.new(soup, 'ol')
258
+ a["href"] = "http://foo.com/"
259
+ assert_equal(ol["href"], nil)
260
+ end
261
+ end
262
+
263
+ #Our operators do it all! Call now!
264
+ class OperatorOverload < SoupTest
265
+
266
+ def test_tag_name_as_find
267
+ # Tests that referencing a tag name as a member delegates to find.
268
+ soup = BeautifulSoup.new('<b id="1">foo<i>bar</i></b><b>Red herring</b>')
269
+ assert_equal(soup.b.i, soup.find('b').find('i'))
270
+ assert_equal(soup.b.i.string, 'bar')
271
+ assert_equal(soup.b['id'], '1')
272
+ assert_equal(soup.b.contents[0], 'foo')
273
+ assert(soup.a == nil)
274
+
275
+ #Test the .foo_tag variant of .foo.
276
+ assert_equal(soup.b_tag.i_tag.string, 'bar')
277
+ assert_equal(soup.b.i_tag.string, 'bar')
278
+ assert_equal(soup.find('b').find('i'), soup.b_tag.i_tag)
279
+ end
280
+ end
281
+
282
+ #Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!
283
+ class NestableEgg < SoupTest
284
+
285
+ def test_para_inside_blockquote
286
+ soup = BeautifulSoup.new('<blockquote><p><b>Foo</b></p></blockquote><p>Bar')
287
+ assert_equal(soup.blockquote.p.b.string, 'Foo')
288
+ assert_equal(soup.blockquote.b.string, 'Foo')
289
+ assert_equal(soup.find('p', :recursive=>false).string, 'Bar')
290
+ end
291
+
292
+ def test_nested_tables
293
+ text = %{<table id="1"><tr><td>Here's another table:
294
+ <table id="2"><tr><td>Juicy text</td></tr></table></td></tr></table>}
295
+ soup = BeautifulSoup.new(text)
296
+ assert_equal(soup.table.table.td.string, 'Juicy text')
297
+ assert_equal(soup.find_all('table').length, 2)
298
+ assert_equal(soup.table.find_all('table').length, 1)
299
+ assert_equal(soup.find('table', :attrs=>{'id' => 2}).parent.parent.parent.name,
300
+ 'table')
301
+ end
302
+
303
+ def test_bad_nested_tables
304
+ soup = BeautifulSoup.new("<table><tr><table><tr id='nested'></tr></table></tr></table>")
305
+ assert_equal(soup.table.tr.table.tr['id'], 'nested')
306
+ end
307
+ end
308
+
309
+
310
+ #Here we test cleanup of text that breaks an unaltered parser or is just
311
+ #obnoxious.
312
+ class CleanupOnAisleFour < SoupTest
313
+
314
+ def test_self_closing_tag
315
+ assert_equal(BeautifulStoneSoup.new("Foo<br/>Bar").find('br').to_s,
316
+ '<br />')
317
+ assert_soup_equals('<p>test1<br/>test2</p>',
318
+ '<p>test1<br />test2</p>')
319
+ end
320
+
321
+ def test_bad_closing_tags
322
+ BeautifulStoneSoup.new("<a>Foo<b>Bar</a>")
323
+ end
324
+
325
+ def test_premature_closing_tag
326
+ BeautifulStoneSoup.new("</b><a>Foo<b>Bar</a>")
327
+ end
328
+
329
+ def test_bad_doctype
330
+ assert_soup_equals("<!DOCTYPE foo='bar'>")
331
+ end
332
+
333
+ def test_whitespace_in_declaration
334
+ assert_soup_equals('<! DOCTYPE>', '<!DOCTYPE>')
335
+ end
336
+
337
+ def test_JunkInDeclaration
338
+ assert_soup_equals('<! Foo = -8>a', '<!Foo = -8>a')
339
+ end
340
+
341
+ def test_incomplete_declaration
342
+ assert_soup_equals('a<!b <p>c', 'a<!b <p>c</p>')
343
+ end
344
+
345
+ def test_valid_but_bogus_declaration
346
+ assert_soup_equals('<! Foo >a', '<!Foo >a')
347
+ end
348
+
349
+ #This fails for a totally bogus reason! I can't figure it out.
350
+ #def test_smart_quotes_not_so_smart_anymore_FAILS
351
+ # assert_soup_equals("\x91Foo\x92", '&lsquo;Foo&rsquo;')
352
+ #end
353
+
354
+ #def test_incomplete_declaration_at_endFAILS
355
+ # assert_soup_equals('a<!b')
356
+ #end
357
+
358
+ end
359
+
360
+ #Verifies that the parser treats multiple feed calls the same as one
361
+ #big feed call only if constructed with
362
+ #initialTextIsEverything=False.
363
+ class KeepOnParsing < SoupTest
364
+
365
+ def test_multiple_parse_calls
366
+ f1 = '<foo>bah<bar>'
367
+ f2 = 'blee</bar></foo>'
368
+
369
+ s1 = BeautifulSoup.new(f1+f2)
370
+ s2 = BeautifulSoup.new(f1)
371
+ s2.feed(f2)
372
+ s3 = BeautifulSoup.new(f1, :initial_text_is_everything => false)
373
+ s3.feed(f2)
374
+ assert_not_equal(s1, s2)
375
+ assert_equal(s1, s3)
376
+ end
377
+ end
378
+
379
+ #Verifies that BeautifulSOAP parser works.
380
+ class SOAPMeUp < SoupTest
381
+ def test_basic_soap
382
+ s = "<foo><bar>baz</bar></foo>"
383
+ soup = BeautifulSOAP.new(s)
384
+ assert_equal(soup.to_s, %{<foo bar="baz"><bar>baz</bar></foo>})
385
+ end
386
+
387
+ def test_dont_overwrite_existing_attr
388
+ s = %{<foo bar="don't kill me!"><bar>baz</bar></foo>}
389
+ soup = BeautifulSOAP.new(s)
390
+ assert_equal(soup.to_s, s)
391
+ end
392
+ end
393
+
394
+ # Verifies that you can decide not to parse certain tags.
395
+ class OnlyTheLonely < SoupTest
396
+ def test_parse_only_these
397
+ html = "<a>1<b>2</b>3</a><b>4<a>5</a>6</b>"
398
+ soup = BeautifulStoneSoup.new(html, :parse_only_these=>'b')
399
+ puts soup
400
+ assert_equal(soup.to_s, "<b>2</b><b>4<a>5</a>6</b>")
401
+ end
402
+ end
403
+
404
+ #The Unicode test suite has not yet been ported because I haven't
405
+ #figured out how Ruby does Unicode.
406
+
407
+ # class UnicodeRed < SoupTest
408
+ # "Makes sure Unicode works."
409
+
410
+ # def setUp
411
+ # text = 'foo<b>bar</b>'
412
+ # @soup = BeautifulStoneSoup
413
+ # @soup.feed(text)
414
+
415
+ # def test_BasicUnicode
416
+ # import types
417
+ # sType = types.StringType
418
+ # uType = types.UnicodeType
419
+
420
+ # u = u'\3100'
421
+ # #It starts out ASCII...
422
+ # assert_equal(type(@soup.renderContents), sType)
423
+ # assert_equal(type(@soup.prettify), sType)
424
+ # #But you can have unicode if you want.
425
+ # assert_equal(type(unicode(@soup)), uType)
426
+
427
+ # #Add a Unicode character and it's Unicode.
428
+ # @soup.feed(u)
429
+ # assert_equal(type(@soup.renderContents), uType)
430
+ # assert_equal(type(@soup.prettify), uType)
431
+ # #But you can have ASCII if you want.
432
+ # assert_equal(type(str(@soup)), sType)
433
+
434
+ # #The part without any Unicode is still ASCII.
435
+ # assert_equal(type(@soup.b.prettify), sType)
436
+
437
+ # #But if you add a Unicode character it'll become Unicode.
438
+ # @soup.b['foo'] = u'\3100'
439
+ # assert_equal(type(@soup.b.prettify), uType)
440
+
441
+
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubyful_soup_2011
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.5
6
+ platform: ruby
7
+ authors:
8
+ - Drew Baumann
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-03-11 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description: Finally updated this to work 1.9.2 so we can use it with our flybymiles app
18
+ email: db@30robots.com
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - lib/rubyful_soup.rb
27
+ - tests/rubyful_soup_tests.rb
28
+ has_rdoc: true
29
+ homepage: http://xxx.rubyforge.org/
30
+ licenses: []
31
+
32
+ post_install_message:
33
+ rdoc_options: []
34
+
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ requirements: []
50
+
51
+ rubyforge_project: rubyful_soup_2011
52
+ rubygems_version: 1.5.0
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: Updated rubyful_soup to work with 1.9.2
56
+ test_files:
57
+ - tests/rubyful_soup_tests.rb