rubyful_soup_2011 0.1.5

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,441 @@
1
+ #Unit tests for Rubyful Soup.
2
+ #
3
+ #These tests make sure the Rubyful Soup works as it should. If you
4
+ #find a bug in Rubyful Soup, the best way to express it is as a test
5
+ #case like this that fails.
6
+
7
+ require 'test/unit'
8
+ require 'rubygems'
9
+ require 'rubyful_soup'
10
+
11
+ class SoupTest < Test::Unit::TestCase
12
+
13
+ #Parse the given text and make sure its string rep is the other
14
+ #given text.
15
+ def assert_soup_equals(toParse, rep=nil, c=BeautifulStoneSoup)
16
+ if rep == nil
17
+ rep = toParse
18
+ end
19
+ assert_equal(c.new(toParse).to_s(false), rep)
20
+ end
21
+
22
+ #Null test to shut the compiler up.
23
+ def test_null
24
+ end
25
+
26
+ end
27
+
28
+ #Tests the various ways of fetching tags from a soup.
29
+ class ToteThatTag < SoupTest
30
+
31
+ def setup
32
+ ml = %{
33
+ <a id="x">1</a>
34
+ <a id="a">2</a>
35
+ <b id="b">3</b>
36
+ <b id="x">4</b>
37
+ <abc:d width="100">5</abc:d>}
38
+ @soup = BeautifulStoneSoup.new(ml)
39
+ end
40
+
41
+ def test_fetch_by_name
42
+ matching = @soup.find_all('a')
43
+ assert_equal(matching.length, 2)
44
+ assert_equal(matching[0].name, 'a')
45
+ assert_equal(matching[0], @soup.find('a'))
46
+ assert_equal(@soup.find('abc:d').contents.length, 1)
47
+
48
+ firstB = @soup.find('b')
49
+ nextB = firstB.find_next('b')
50
+ assert_equal(nextB.contents[0], '4')
51
+ assert_equal(nextB['id'], 'x')
52
+
53
+ end
54
+
55
+ def test_fetch_by_block
56
+
57
+ a = @soup.find_all('a')
58
+ b = @soup.find_all do |x|
59
+ x.is_a? Tag and x.name == 'a'
60
+ end
61
+ assert_equal(a,b)
62
+
63
+ a = @soup.find_text('3')
64
+ b = @soup.find_text do |x|
65
+ x.is_a? NavigableString and x == '3'
66
+ end
67
+ assert_equal(a,b)
68
+
69
+ matching = @soup.find_all do |x|
70
+ x.respond_to?('name') and x.name == x['id']
71
+ end
72
+ assert_equal(matching.length, 2)
73
+ assert_equal(matching[0].name, 'a')
74
+ end
75
+
76
+ def test_fetch_by_attribute
77
+ matching = @soup.find_all(nil, :attrs=>{'id' => 'x'})
78
+ assert_equal(matching.length, 2)
79
+ assert_equal(matching[0].name, 'a')
80
+ assert_equal(matching[1].name, 'b')
81
+
82
+ assert_equal(@soup.find_all(nil, :attrs=>{'id' => nil}).length, 1)
83
+ assert_equal(@soup.find_all(nil, :attrs=>{'id' => nil}).length, 1)
84
+
85
+ assert_equal(@soup.find_all(nil, :attrs=>{'width' => 100}).length, 1)
86
+ end
87
+
88
+ def test_tag_name_as_method
89
+ firstB = @soup.find('b')
90
+ assert_equal(firstB, @soup.b)
91
+ assert_equal(firstB, @soup.b_tag)
92
+ end
93
+
94
+ def test_fetch_by_list
95
+ matching = @soup.find_all(['a', 'abc:d'])
96
+ assert_equal(matching.length, 3)
97
+ end
98
+
99
+ def test_fetch_by_hash
100
+ matching = @soup.find_all({'a' => true, 'b' => true})
101
+ assert_equal(matching.length, 4)
102
+ end
103
+
104
+ def test_fetch_by_re
105
+ r = /a.*/
106
+ assert_equal(@soup.find_all(r).length, 3)
107
+ end
108
+
109
+ def test_fetch_by_method
110
+ proc = Proc.new { |x| return x.name == x['id'] }
111
+ matching = @soup.find_all(proc)
112
+ assert_equal(matching.length, 2)
113
+ assert_equal(matching[0].name, 'a')
114
+ end
115
+
116
+ end
117
+
118
+ #Testing the integrity of the parse tree.
119
+ class FollowThatTag < SoupTest
120
+
121
+ @@PROXIMITY_TEST = BeautifulStoneSoup.new('<b id="1"><b id="2"><b id="3"><b id="4">')
122
+
123
+ @@SIBLING_TEST = BeautifulStoneSoup.new('<blockquote id="1"><blockquote id="1.1"></blockquote></blockquote><blockquote id="2"><blockquote id="2.1"></blockquote></blockquote><blockquote id="3"><blockquote id="3.1"></blockquote></blockquote><blockquote id="4">')
124
+
125
+ def test_parents
126
+ soup = BeautifulSoup.new('<ul id="foo"></ul><ul id="foo"><ul><ul id="foo" a="b"><b>Blah</b></ul></ul></ul>')
127
+ b = soup.find('b')
128
+ assert_equal(b.find_parents('ul', :attrs=>{'id' => 'foo'}).length, 2)
129
+ assert_equal(b.find_parent('ul')['a'], 'b')
130
+ end
131
+
132
+ def test_next_sibling
133
+ soup = @@SIBLING_TEST
134
+ tag = 'blockquote'
135
+ b = soup.find(tag, :attrs=>{'id' => 2})
136
+ assert_equal(b.find_next(tag)['id'], '2.1')
137
+ assert_equal(b.find_next_sibling(tag)['id'], '3')
138
+ assert_equal(b.find_next_sibling(tag)['id'], '3')
139
+ assert_equal(b.find_next_siblings(tag).length, 2)
140
+ assert_equal(b.find_next_siblings(tag, :attrs=>{'id' => 4}).length, 1)
141
+ end
142
+
143
+ def test_previous_sibling
144
+ soup = @@SIBLING_TEST
145
+ tag = 'blockquote'
146
+ b = soup.find(tag, :attrs=>{'id' => 3})
147
+ assert_equal(b.find_previous(tag)['id'], '2.1')
148
+ assert_equal(b.find_previous_sibling(tag)['id'], '2')
149
+ assert_equal(b.find_previous_sibling(tag)['id'], '2')
150
+ assert_equal(b.find_previous_siblings(tag).length, 2)
151
+ assert_equal(b.find_previous_siblings(tag, :attrs=>{'id' => 1}).length, 1)
152
+ end
153
+
154
+ def test_text_navigation
155
+ soup = BeautifulSoup.new('Foo<b>Bar</b><i id="1"><b>Baz<br />Blee<hr id="1"/></b></i>Blargh')
156
+ baz = soup.find_text('Baz')
157
+ assert_equal(baz.find_parent("i")['id'], '1')
158
+ assert_equal(baz.find_next(nil, :text=> 'Blee'), 'Blee')
159
+ assert_equal(baz.find_next_sibling(nil, :text=>'Blee'), 'Blee')
160
+ assert_equal(baz.find_next_sibling(nil, :text=>'Blargh'), nil)
161
+ assert_equal(baz.find_next_sibling('hr')['id'], '1')
162
+ end
163
+
164
+ end
165
+
166
+ #Tests the nextSibling and previousSibling navigation.
167
+ class SiblingRivalry < SoupTest
168
+
169
+ def test_siblings
170
+ soup = BeautifulSoup.new("<ul><li>1<p>A</p>B</li><li>2</li><li>3</li></ul>")
171
+ second_li = soup.find('li').next_sibling
172
+ assert_equal(second_li.name, 'li')
173
+ assert_equal(second_li.string, '2')
174
+ assert_equal(soup.find_text('1').next_sibling.name, 'p')
175
+ assert_equal(soup.find('p').next_sibling, 'B')
176
+ assert_equal(soup.find('p').next_sibling.previous_sibling.next_sibling,
177
+ 'B')
178
+ end
179
+ end
180
+
181
+ #Tests the various built-in functions of Tag objects.
182
+ class TagsAreObjectsToo < SoupTest
183
+
184
+ @@SOUP = BeautifulSoup.new('<top id="1">1<b>2</b>3</top>')
185
+
186
+ def test_length
187
+ assert_equal(@@SOUP.top.length, 3)
188
+ end
189
+
190
+ def test_hash_lookup
191
+ assert_equal(@@SOUP.top['id'], "1")
192
+ end
193
+
194
+ def test_iterator
195
+ bucket = []
196
+ @@SOUP.top.each do |x|
197
+ bucket << x
198
+ end
199
+ assert_equal(bucket.length, 3)
200
+ assert_equal(bucket[2], "3")
201
+ end
202
+
203
+ end
204
+
205
+ #Tests the use of 'string' as an alias for a tag's only content.
206
+ class StringEmUp < SoupTest
207
+
208
+ def test_string
209
+ s = BeautifulSoup.new('<b>foo</b>')
210
+ assert_equal(s.b.string, 'foo')
211
+ end
212
+
213
+ def test_lack_of_string
214
+ s = BeautifulSoup.new("<b>f<i>e</i>o</b>")
215
+ self.assert_equal(s.b.string, nil)
216
+ end
217
+ end
218
+
219
+ #Tests the limit argument.
220
+ class ThatsMyLimit < SoupTest
221
+
222
+ def test_basic_limits
223
+ s = BeautifulSoup.new('<br id="1" /><br id="1" /><br id="1" /><br id="1" />')
224
+ assert_equal(s.find_all('br').length, 4)
225
+ assert_equal(s.find_all('br', :limit=> 2).length, 2)
226
+ end
227
+ end
228
+
229
+ #Testing the modification of the tree.
230
+ class WriteOnlyCode < SoupTest
231
+
232
+ def test_replace_contents
233
+ soup = BeautifulSoup.new('<a>foo</a>')
234
+ soup.a.contents[0] = (NavigableString.new('bar'))
235
+ assert_equal(soup.render_contents, '<a>bar</a>')
236
+ end
237
+
238
+ def test_modify_attributes
239
+ soup = BeautifulSoup.new('<a id="1"></a>')
240
+ first_a = soup.find('a')
241
+
242
+ first_a['id'] = 2
243
+ assert_equal(soup.render_contents, '<a id="2"></a>')
244
+ first_a['id'] = nil
245
+ assert_equal(soup.render_contents, '<a></a>')
246
+
247
+ first_a['id2'] = 'foo'
248
+ assert_equal(soup.render_contents, '<a id2="foo"></a>')
249
+ first_a.delete('id2')
250
+ assert_equal(soup.render_contents, '<a></a>')
251
+ end
252
+
253
+ #Makes sure tags don't step on each others' toes.
254
+ def test_new_tag_
255
+ soup = BeautifulSoup.new('')
256
+ a = Tag.new(soup, 'a')
257
+ ol = Tag.new(soup, 'ol')
258
+ a["href"] = "http://foo.com/"
259
+ assert_equal(ol["href"], nil)
260
+ end
261
+ end
262
+
263
+ #Our operators do it all! Call now!
264
+ class OperatorOverload < SoupTest
265
+
266
+ def test_tag_name_as_find
267
+ # Tests that referencing a tag name as a member delegates to find.
268
+ soup = BeautifulSoup.new('<b id="1">foo<i>bar</i></b><b>Red herring</b>')
269
+ assert_equal(soup.b.i, soup.find('b').find('i'))
270
+ assert_equal(soup.b.i.string, 'bar')
271
+ assert_equal(soup.b['id'], '1')
272
+ assert_equal(soup.b.contents[0], 'foo')
273
+ assert(soup.a == nil)
274
+
275
+ #Test the .foo_tag variant of .foo.
276
+ assert_equal(soup.b_tag.i_tag.string, 'bar')
277
+ assert_equal(soup.b.i_tag.string, 'bar')
278
+ assert_equal(soup.find('b').find('i'), soup.b_tag.i_tag)
279
+ end
280
+ end
281
+
282
+ #Here we test tag nesting. TEST THE NEST, DUDE! X-TREME!
283
+ class NestableEgg < SoupTest
284
+
285
+ def test_para_inside_blockquote
286
+ soup = BeautifulSoup.new('<blockquote><p><b>Foo</b></p></blockquote><p>Bar')
287
+ assert_equal(soup.blockquote.p.b.string, 'Foo')
288
+ assert_equal(soup.blockquote.b.string, 'Foo')
289
+ assert_equal(soup.find('p', :recursive=>false).string, 'Bar')
290
+ end
291
+
292
+ def test_nested_tables
293
+ text = %{<table id="1"><tr><td>Here's another table:
294
+ <table id="2"><tr><td>Juicy text</td></tr></table></td></tr></table>}
295
+ soup = BeautifulSoup.new(text)
296
+ assert_equal(soup.table.table.td.string, 'Juicy text')
297
+ assert_equal(soup.find_all('table').length, 2)
298
+ assert_equal(soup.table.find_all('table').length, 1)
299
+ assert_equal(soup.find('table', :attrs=>{'id' => 2}).parent.parent.parent.name,
300
+ 'table')
301
+ end
302
+
303
+ def test_bad_nested_tables
304
+ soup = BeautifulSoup.new("<table><tr><table><tr id='nested'></tr></table></tr></table>")
305
+ assert_equal(soup.table.tr.table.tr['id'], 'nested')
306
+ end
307
+ end
308
+
309
+
310
+ #Here we test cleanup of text that breaks an unaltered parser or is just
311
+ #obnoxious.
312
+ class CleanupOnAisleFour < SoupTest
313
+
314
+ def test_self_closing_tag
315
+ assert_equal(BeautifulStoneSoup.new("Foo<br/>Bar").find('br').to_s,
316
+ '<br />')
317
+ assert_soup_equals('<p>test1<br/>test2</p>',
318
+ '<p>test1<br />test2</p>')
319
+ end
320
+
321
+ def test_bad_closing_tags
322
+ BeautifulStoneSoup.new("<a>Foo<b>Bar</a>")
323
+ end
324
+
325
+ def test_premature_closing_tag
326
+ BeautifulStoneSoup.new("</b><a>Foo<b>Bar</a>")
327
+ end
328
+
329
+ def test_bad_doctype
330
+ assert_soup_equals("<!DOCTYPE foo='bar'>")
331
+ end
332
+
333
+ def test_whitespace_in_declaration
334
+ assert_soup_equals('<! DOCTYPE>', '<!DOCTYPE>')
335
+ end
336
+
337
+ def test_JunkInDeclaration
338
+ assert_soup_equals('<! Foo = -8>a', '<!Foo = -8>a')
339
+ end
340
+
341
+ def test_incomplete_declaration
342
+ assert_soup_equals('a<!b <p>c', 'a<!b <p>c</p>')
343
+ end
344
+
345
+ def test_valid_but_bogus_declaration
346
+ assert_soup_equals('<! Foo >a', '<!Foo >a')
347
+ end
348
+
349
+ #This fails for a totally bogus reason! I can't figure it out.
350
+ #def test_smart_quotes_not_so_smart_anymore_FAILS
351
+ # assert_soup_equals("\x91Foo\x92", '&lsquo;Foo&rsquo;')
352
+ #end
353
+
354
+ #def test_incomplete_declaration_at_endFAILS
355
+ # assert_soup_equals('a<!b')
356
+ #end
357
+
358
+ end
359
+
360
+ #Verifies that the parser treats multiple feed calls the same as one
361
+ #big feed call only if constructed with
362
+ #initialTextIsEverything=False.
363
+ class KeepOnParsing < SoupTest
364
+
365
+ def test_multiple_parse_calls
366
+ f1 = '<foo>bah<bar>'
367
+ f2 = 'blee</bar></foo>'
368
+
369
+ s1 = BeautifulSoup.new(f1+f2)
370
+ s2 = BeautifulSoup.new(f1)
371
+ s2.feed(f2)
372
+ s3 = BeautifulSoup.new(f1, :initial_text_is_everything => false)
373
+ s3.feed(f2)
374
+ assert_not_equal(s1, s2)
375
+ assert_equal(s1, s3)
376
+ end
377
+ end
378
+
379
+ #Verifies that BeautifulSOAP parser works.
380
+ class SOAPMeUp < SoupTest
381
+ def test_basic_soap
382
+ s = "<foo><bar>baz</bar></foo>"
383
+ soup = BeautifulSOAP.new(s)
384
+ assert_equal(soup.to_s, %{<foo bar="baz"><bar>baz</bar></foo>})
385
+ end
386
+
387
+ def test_dont_overwrite_existing_attr
388
+ s = %{<foo bar="don't kill me!"><bar>baz</bar></foo>}
389
+ soup = BeautifulSOAP.new(s)
390
+ assert_equal(soup.to_s, s)
391
+ end
392
+ end
393
+
394
+ # Verifies that you can decide not to parse certain tags.
395
+ class OnlyTheLonely < SoupTest
396
+ def test_parse_only_these
397
+ html = "<a>1<b>2</b>3</a><b>4<a>5</a>6</b>"
398
+ soup = BeautifulStoneSoup.new(html, :parse_only_these=>'b')
399
+ puts soup
400
+ assert_equal(soup.to_s, "<b>2</b><b>4<a>5</a>6</b>")
401
+ end
402
+ end
403
+
404
+ #The Unicode test suite has not yet been ported because I haven't
405
+ #figured out how Ruby does Unicode.
406
+
407
+ # class UnicodeRed < SoupTest
408
+ # "Makes sure Unicode works."
409
+
410
+ # def setUp
411
+ # text = 'foo<b>bar</b>'
412
+ # @soup = BeautifulStoneSoup
413
+ # @soup.feed(text)
414
+
415
+ # def test_BasicUnicode
416
+ # import types
417
+ # sType = types.StringType
418
+ # uType = types.UnicodeType
419
+
420
+ # u = u'\3100'
421
+ # #It starts out ASCII...
422
+ # assert_equal(type(@soup.renderContents), sType)
423
+ # assert_equal(type(@soup.prettify), sType)
424
+ # #But you can have unicode if you want.
425
+ # assert_equal(type(unicode(@soup)), uType)
426
+
427
+ # #Add a Unicode character and it's Unicode.
428
+ # @soup.feed(u)
429
+ # assert_equal(type(@soup.renderContents), uType)
430
+ # assert_equal(type(@soup.prettify), uType)
431
+ # #But you can have ASCII if you want.
432
+ # assert_equal(type(str(@soup)), sType)
433
+
434
+ # #The part without any Unicode is still ASCII.
435
+ # assert_equal(type(@soup.b.prettify), sType)
436
+
437
+ # #But if you add a Unicode character it'll become Unicode.
438
+ # @soup.b['foo'] = u'\3100'
439
+ # assert_equal(type(@soup.b.prettify), uType)
440
+
441
+
metadata ADDED
@@ -0,0 +1,57 @@
1
+ --- !ruby/object:Gem::Specification
2
+ name: rubyful_soup_2011
3
+ version: !ruby/object:Gem::Version
4
+ prerelease:
5
+ version: 0.1.5
6
+ platform: ruby
7
+ authors:
8
+ - Drew Baumann
9
+ autorequire:
10
+ bindir: bin
11
+ cert_chain: []
12
+
13
+ date: 2011-03-11 00:00:00 -07:00
14
+ default_executable:
15
+ dependencies: []
16
+
17
+ description: Finally updated this to work 1.9.2 so we can use it with our flybymiles app
18
+ email: db@30robots.com
19
+ executables: []
20
+
21
+ extensions: []
22
+
23
+ extra_rdoc_files: []
24
+
25
+ files:
26
+ - lib/rubyful_soup.rb
27
+ - tests/rubyful_soup_tests.rb
28
+ has_rdoc: true
29
+ homepage: http://xxx.rubyforge.org/
30
+ licenses: []
31
+
32
+ post_install_message:
33
+ rdoc_options: []
34
+
35
+ require_paths:
36
+ - lib
37
+ required_ruby_version: !ruby/object:Gem::Requirement
38
+ none: false
39
+ requirements:
40
+ - - ">="
41
+ - !ruby/object:Gem::Version
42
+ version: "0"
43
+ required_rubygems_version: !ruby/object:Gem::Requirement
44
+ none: false
45
+ requirements:
46
+ - - ">="
47
+ - !ruby/object:Gem::Version
48
+ version: "0"
49
+ requirements: []
50
+
51
+ rubyforge_project: rubyful_soup_2011
52
+ rubygems_version: 1.5.0
53
+ signing_key:
54
+ specification_version: 3
55
+ summary: Updated rubyful_soup to work with 1.9.2
56
+ test_files:
57
+ - tests/rubyful_soup_tests.rb