rubylibre-feedzirra 0.0.14 → 0.0.23
Sign up to get free protection for your applications and to get access to all the features.
- data/README.rdoc +169 -0
- data/README.textile +9 -0
- data/lib/feedzirra/feed.rb +32 -37
- data/lib/feedzirra/parser/atom.rb +9 -0
- data/lib/feedzirra/parser/atom_entry.rb +6 -0
- data/lib/feedzirra/parser/atom_feed_burner_entry.rb +1 -1
- data/lib/feedzirra/parser/itunes_category.rb +12 -0
- data/lib/feedzirra/parser/mrss_category.rb +11 -0
- data/lib/feedzirra/parser/mrss_content.rb +48 -0
- data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
- data/lib/feedzirra/parser/mrss_credit.rb +11 -0
- data/lib/feedzirra/parser/mrss_group.rb +37 -0
- data/lib/feedzirra/parser/mrss_hash.rb +10 -0
- data/lib/feedzirra/parser/mrss_player.rb +11 -0
- data/lib/feedzirra/parser/mrss_rating.rb +10 -0
- data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
- data/lib/feedzirra/parser/mrss_text.rb +13 -0
- data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
- data/lib/feedzirra/parser/rss.rb +64 -9
- data/lib/feedzirra/parser/rss_entry.rb +54 -14
- data/lib/feedzirra/parser/rss_image.rb +15 -0
- data/lib/feedzirra.rb +17 -5
- data/spec/benchmarks/feed_benchmarks.rb +98 -0
- data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
- data/spec/benchmarks/fetching_benchmarks.rb +28 -0
- data/spec/benchmarks/parsing_benchmark.rb +30 -0
- data/spec/benchmarks/updating_benchmarks.rb +33 -0
- data/spec/feedzirra/feed_spec.rb +35 -53
- data/spec/feedzirra/parser/atom_entry_spec.rb +4 -0
- data/spec/feedzirra/parser/atom_spec.rb +8 -0
- data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
- data/spec/feedzirra/parser/rss_entry_spec.rb +121 -8
- data/spec/feedzirra/parser/rss_spec.rb +66 -14
- data/spec/sample_feeds/run_against_sample.rb +20 -0
- data/spec/spec_helper.rb +3 -3
- metadata +37 -22
- data/lib/feedzirra/parser/itunes_rss.rb +0 -50
- data/lib/feedzirra/parser/itunes_rss_item.rb +0 -31
- data/lib/feedzirra/parser/itunes_rss_owner.rb +0 -12
- data/spec/feedzirra/parser/itunes_rss_item_spec.rb +0 -48
- data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +0 -18
- data/spec/feedzirra/parser/itunes_rss_spec.rb +0 -50
- data/spec/spec.opts +0 -2
data/spec/feedzirra/feed_spec.rb
CHANGED
@@ -5,38 +5,20 @@ describe Feedzirra::Feed do
|
|
5
5
|
before(:all) do
|
6
6
|
Feedzirra::Feed.add_common_feed_entry_element("wfw:commentRss", :as => :comment_rss)
|
7
7
|
end
|
8
|
-
|
9
|
-
it "should parse the added element out of Atom feed entries" do
|
10
|
-
Feedzirra::Feed.parse(sample_wfw_feed).entries.first.comment_rss.should == "this is the new val"
|
11
|
-
end
|
12
|
-
|
13
|
-
it "should parse the added element out of Atom Feedburner feed entries" do
|
14
|
-
Feedzirra::Parser::AtomEntry.new.should respond_to(:comment_rss)
|
15
|
-
end
|
16
|
-
|
17
|
-
it "should parse the added element out of RSS feed entries" do
|
18
|
-
Feedzirra::Parser::RSSEntry.new.should respond_to(:comment_rss)
|
19
|
-
end
|
20
|
-
end
|
21
|
-
|
22
|
-
describe "#add_common_feed_element" do
|
23
|
-
before(:all) do
|
24
|
-
Feedzirra::Feed.add_common_feed_element("description", :as => :description)
|
25
|
-
end
|
26
|
-
|
8
|
+
|
27
9
|
it "should parse the added element out of Atom feeds" do
|
28
|
-
Feedzirra::Feed.parse(
|
10
|
+
Feedzirra::Feed.parse(sample_wfw_feed).entries.first.comment_rss.should == "this is the new val"
|
29
11
|
end
|
30
|
-
|
12
|
+
|
31
13
|
it "should parse the added element out of Atom Feedburner feeds" do
|
32
|
-
Feedzirra::Parser::
|
14
|
+
Feedzirra::Parser::AtomEntry.new.should respond_to(:comment_rss)
|
33
15
|
end
|
34
|
-
|
16
|
+
|
35
17
|
it "should parse the added element out of RSS feeds" do
|
36
|
-
Feedzirra::Parser::
|
18
|
+
Feedzirra::Parser::RSSEntry.new.should respond_to(:comment_rss)
|
37
19
|
end
|
38
20
|
end
|
39
|
-
|
21
|
+
|
40
22
|
describe "#parse" do # many of these tests are redundant with the specific feed type tests, but I put them here for completeness
|
41
23
|
context "when there's an available parser" do
|
42
24
|
it "should parse an rdf feed" do
|
@@ -65,17 +47,17 @@ describe Feedzirra::Feed do
|
|
65
47
|
feed.title.should == "Paul Dix Explains Nothing"
|
66
48
|
feed.entries.first.published.to_s.should == "Thu Jan 22 15:50:22 UTC 2009"
|
67
49
|
feed.entries.size.should == 5
|
68
|
-
end
|
50
|
+
end
|
69
51
|
|
70
52
|
it "should parse an itunes feed as a standard RSS feed" do
|
71
53
|
feed = Feedzirra::Feed.parse(sample_itunes_feed)
|
72
54
|
feed.title.should == "All About Everything"
|
73
55
|
feed.entries.first.published.should == Time.parse("Wed, 15 Jun 2005 19:00:00 GMT")
|
74
|
-
|
56
|
+
|
75
57
|
# Since the commit 621957879, iTunes feeds will be parsed as standard RSS, so this
|
76
58
|
# entry should now not have a method for itunes_author.
|
77
59
|
feed.entries.first.should_not respond_to(:itunes_author)
|
78
|
-
feed.entries.size.should ==
|
60
|
+
feed.entries.size.should == 4
|
79
61
|
end
|
80
62
|
end
|
81
63
|
|
@@ -131,7 +113,7 @@ describe Feedzirra::Feed do
|
|
131
113
|
true
|
132
114
|
end
|
133
115
|
end
|
134
|
-
|
116
|
+
|
135
117
|
new_feed_type.should be_able_to_parse(feed_text)
|
136
118
|
Feedzirra::Feed.add_feed_class(new_feed_type)
|
137
119
|
Feedzirra::Feed.determine_feed_parser_for_xml(feed_text).should == new_feed_type
|
@@ -183,7 +165,7 @@ describe Feedzirra::Feed do
|
|
183
165
|
@curl_easy = stub('curl_easy')
|
184
166
|
@curl = stub('curl', :headers => {}, :follow_location= => true, :on_failure => true)
|
185
167
|
@curl.stub!(:on_success).and_yield(@cmock)
|
186
|
-
|
168
|
+
|
187
169
|
Curl::Multi.stub!(:new).and_return(@multi)
|
188
170
|
Curl::Easy.stub!(:new).and_yield(@curl).and_return(@curl_easy)
|
189
171
|
end
|
@@ -197,7 +179,7 @@ describe Feedzirra::Feed do
|
|
197
179
|
Feedzirra::Feed.fetch_raw(@paul_feed[:url])
|
198
180
|
@curl.headers['User-Agent'].should == Feedzirra::Feed::USER_AGENT
|
199
181
|
end
|
200
|
-
|
182
|
+
|
201
183
|
it "should set if modified since as an option if passed" do
|
202
184
|
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :if_modified_since => Time.parse("Wed, 28 Jan 2009 04:10:32 GMT"))
|
203
185
|
@curl.headers["If-Modified-Since"].should == 'Wed, 28 Jan 2009 04:10:32 GMT'
|
@@ -207,7 +189,7 @@ describe Feedzirra::Feed do
|
|
207
189
|
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
208
190
|
@curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
209
191
|
end
|
210
|
-
|
192
|
+
|
211
193
|
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
212
194
|
@curl.should_receive(:userpwd=).with('username:password')
|
213
195
|
Feedzirra::Feed.fetch_raw(@paul_feed[:url], :http_authentication => ['username', 'password'])
|
@@ -225,7 +207,7 @@ describe Feedzirra::Feed do
|
|
225
207
|
it "should take multiple feed urls and return a hash of urls and response xml" do
|
226
208
|
multi = stub('curl_multi', :add => true, :perform => true)
|
227
209
|
Curl::Multi.stub!(:new).and_return(multi)
|
228
|
-
|
210
|
+
|
229
211
|
paul_response = stub('paul_response', :header_str => '', :body_str => @paul_feed[:xml] )
|
230
212
|
trotter_response = stub('trotter_response', :header_str => '', :body_str => @trotter_feed[:xml] )
|
231
213
|
|
@@ -234,10 +216,10 @@ describe Feedzirra::Feed do
|
|
234
216
|
|
235
217
|
trotter_curl = stub('trotter_curl', :headers => {}, :follow_location= => true, :on_failure => true)
|
236
218
|
trotter_curl.stub!(:on_success).and_yield(trotter_response)
|
237
|
-
|
219
|
+
|
238
220
|
Curl::Easy.should_receive(:new).with(@paul_feed[:url]).ordered.and_yield(paul_curl)
|
239
221
|
Curl::Easy.should_receive(:new).with(@trotter_feed[:url]).ordered.and_yield(trotter_curl)
|
240
|
-
|
222
|
+
|
241
223
|
results = Feedzirra::Feed.fetch_raw([@paul_feed[:url], @trotter_feed[:url]])
|
242
224
|
results.keys.should include(@paul_feed[:url])
|
243
225
|
results.keys.should include(@trotter_feed[:url])
|
@@ -256,7 +238,7 @@ describe Feedzirra::Feed do
|
|
256
238
|
@multi = Curl::Multi.new(@paul_feed[:url])
|
257
239
|
@multi.stub!(:add)
|
258
240
|
@easy_curl = Curl::Easy.new(@paul_feed[:url])
|
259
|
-
|
241
|
+
|
260
242
|
Curl::Easy.should_receive(:new).and_yield(@easy_curl)
|
261
243
|
end
|
262
244
|
|
@@ -264,12 +246,12 @@ describe Feedzirra::Feed do
|
|
264
246
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :user_agent => 'My cool application')
|
265
247
|
@easy_curl.headers["User-Agent"].should == 'My cool application'
|
266
248
|
end
|
267
|
-
|
249
|
+
|
268
250
|
it "should set user agent to default if it's not passed as an option" do
|
269
251
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
270
252
|
@easy_curl.headers["User-Agent"].should == Feedzirra::Feed::USER_AGENT
|
271
253
|
end
|
272
|
-
|
254
|
+
|
273
255
|
it "should set if modified since as an option if passed" do
|
274
256
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_modified_since => Time.parse("Jan 25 2009 04:10:32 GMT"))
|
275
257
|
@easy_curl.headers["If-Modified-Since"].should == 'Sun, 25 Jan 2009 04:10:32 GMT'
|
@@ -279,12 +261,12 @@ describe Feedzirra::Feed do
|
|
279
261
|
@easy_curl.should_receive(:follow_location=).with(true)
|
280
262
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
281
263
|
end
|
282
|
-
|
264
|
+
|
283
265
|
it 'should set userpwd for http basic authentication if :http_authentication is passed' do
|
284
266
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :http_authentication => ['myusername', 'mypassword'])
|
285
267
|
@easy_curl.userpwd.should == 'myusername:mypassword'
|
286
268
|
end
|
287
|
-
|
269
|
+
|
288
270
|
it 'should set accepted encodings' do
|
289
271
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {:compress => true})
|
290
272
|
@easy_curl.headers["Accept-encoding"].should == 'gzip, deflate'
|
@@ -294,7 +276,7 @@ describe Feedzirra::Feed do
|
|
294
276
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, :if_none_match => 'ziEyTl4q9GH04BR4jgkImd0GvSE')
|
295
277
|
@easy_curl.headers["If-None-Match"].should == 'ziEyTl4q9GH04BR4jgkImd0GvSE'
|
296
278
|
end
|
297
|
-
|
279
|
+
|
298
280
|
describe 'on success' do
|
299
281
|
before(:each) do
|
300
282
|
@feed = mock('feed', :feed_url= => true, :etag= => true, :last_modified= => true)
|
@@ -310,7 +292,7 @@ describe Feedzirra::Feed do
|
|
310
292
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
311
293
|
@easy_curl.on_success.call(@easy_curl)
|
312
294
|
end
|
313
|
-
|
295
|
+
|
314
296
|
it 'should determine the xml parser class' do
|
315
297
|
Feedzirra::Feed.should_receive(:determine_feed_parser_for_xml).with(@paul_feed[:xml]).and_return(Feedzirra::Parser::AtomFeedBurner)
|
316
298
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
@@ -322,7 +304,7 @@ describe Feedzirra::Feed do
|
|
322
304
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, {})
|
323
305
|
@easy_curl.on_success.call(@easy_curl)
|
324
306
|
end
|
325
|
-
|
307
|
+
|
326
308
|
describe 'when a compatible xml parser class is found' do
|
327
309
|
it 'should set the last effective url to the feed url' do
|
328
310
|
@easy_curl.should_receive(:last_effective_url).and_return(@paul_feed[:url])
|
@@ -347,11 +329,11 @@ describe Feedzirra::Feed do
|
|
347
329
|
responses = {}
|
348
330
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
349
331
|
@easy_curl.on_success.call(@easy_curl)
|
350
|
-
|
332
|
+
|
351
333
|
responses.length.should == 1
|
352
334
|
responses['http://feeds.feedburner.com/PaulDixExplainsNothing'].should == @feed
|
353
335
|
end
|
354
|
-
|
336
|
+
|
355
337
|
it 'should call proc if :on_success option is passed' do
|
356
338
|
success = lambda { |url, feed| }
|
357
339
|
success.should_receive(:call).with(@paul_feed[:url], @feed)
|
@@ -381,7 +363,7 @@ describe Feedzirra::Feed do
|
|
381
363
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], {}, { :on_failure => failure })
|
382
364
|
@easy_curl.on_failure.call(@easy_curl)
|
383
365
|
end
|
384
|
-
|
366
|
+
|
385
367
|
it 'should return the http code in the responses' do
|
386
368
|
responses = {}
|
387
369
|
Feedzirra::Feed.add_url_to_multi(@multi, @paul_feed[:url], [], responses, {})
|
@@ -407,14 +389,14 @@ describe Feedzirra::Feed do
|
|
407
389
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, :user_agent => 'My cool application')
|
408
390
|
@easy_curl.headers["User-Agent"].should == 'My cool application'
|
409
391
|
end
|
410
|
-
|
392
|
+
|
411
393
|
it "should set user agent to default if it's not passed as an option" do
|
412
394
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
413
395
|
@easy_curl.headers["User-Agent"].should == Feedzirra::Feed::USER_AGENT
|
414
396
|
end
|
415
397
|
|
416
398
|
it "should set if modified since as an option if passed"
|
417
|
-
|
399
|
+
|
418
400
|
it 'should set follow location to true' do
|
419
401
|
@easy_curl.should_receive(:follow_location=).with(true)
|
420
402
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
@@ -443,7 +425,7 @@ describe Feedzirra::Feed do
|
|
443
425
|
end
|
444
426
|
|
445
427
|
it 'should process the next feed in the queue'
|
446
|
-
|
428
|
+
|
447
429
|
it 'should parse the updated feed' do
|
448
430
|
Feedzirra::Parser::AtomFeedBurner.should_receive(:parse).and_return(@new_feed)
|
449
431
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
@@ -484,7 +466,7 @@ describe Feedzirra::Feed do
|
|
484
466
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
485
467
|
@easy_curl.on_success.call(@easy_curl)
|
486
468
|
end
|
487
|
-
|
469
|
+
|
488
470
|
it 'should call update from feed on the old feed with the updated feed' do
|
489
471
|
@feed.should_receive(:update_from_feed).with(@new_feed)
|
490
472
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, {})
|
@@ -509,7 +491,7 @@ describe Feedzirra::Feed do
|
|
509
491
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], {}, { :on_success => success })
|
510
492
|
@easy_curl.on_failure.call(@easy_curl)
|
511
493
|
end
|
512
|
-
|
494
|
+
|
513
495
|
it 'should return the http code in the responses' do
|
514
496
|
responses = {}
|
515
497
|
Feedzirra::Feed.add_feed_to_multi(@multi, @feed, [], responses, {})
|
@@ -541,7 +523,7 @@ describe Feedzirra::Feed do
|
|
541
523
|
Zlib::GzipReader.should_receive(:new).with(string_io).and_return(string_io)
|
542
524
|
Feedzirra::Feed.decode_content(@curl_easy)
|
543
525
|
end
|
544
|
-
|
526
|
+
|
545
527
|
it 'should deflate the response body using inflate if the Content-Encoding: is deflate' do
|
546
528
|
@curl_easy.stub!(:header_str).and_return('Content-Encoding: deflate')
|
547
529
|
Zlib::Inflate.should_receive(:inflate).with(@curl_easy.body_str)
|
@@ -558,7 +540,7 @@ describe Feedzirra::Feed do
|
|
558
540
|
it 'should perform the updating using multicurl'
|
559
541
|
it "should pass any request options through to add_feed_to_multi"
|
560
542
|
it "should return a feed object if a single feed is passed in"
|
561
|
-
it "should return an return an array of feed objects if multiple feeds are passed in"
|
543
|
+
it "should return an return an array of feed objects if multiple feeds are passed in"
|
562
544
|
end
|
563
545
|
end
|
564
546
|
end
|
@@ -15,6 +15,10 @@ describe Feedzirra::Parser::AtomEntry do
|
|
15
15
|
@entry.url.should == "http://aws.typepad.com/aws/2009/01/aws-job-architect-designer-position-in-turkey.html"
|
16
16
|
end
|
17
17
|
|
18
|
+
it "should parse the url even when" do
|
19
|
+
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).entries.first.url.should == "http://www.innoq.com/blog/phaus/2009/07/ja.html"
|
20
|
+
end
|
21
|
+
|
18
22
|
it "should parse the author" do
|
19
23
|
@entry.author.should == "AWS Editor"
|
20
24
|
end
|
@@ -24,6 +24,14 @@ describe Feedzirra::Parser::Atom do
|
|
24
24
|
@feed.url.should == "http://aws.typepad.com/aws/"
|
25
25
|
end
|
26
26
|
|
27
|
+
it "should parse the url even when it doesn't have the type='text/html' attribute" do
|
28
|
+
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).url.should == "http://www.innoq.com/planet/"
|
29
|
+
end
|
30
|
+
|
31
|
+
it "should parse the feed_url even when it doesn't have the type='application/atom+xml' attribute" do
|
32
|
+
Feedzirra::Parser::Atom.parse(load_sample("atom_with_link_tag_for_url_unmarked.xml")).feed_url.should == "http://www.innoq.com/planet/atom.xml"
|
33
|
+
end
|
34
|
+
|
27
35
|
it "should parse the feed_url" do
|
28
36
|
@feed.feed_url.should == "http://aws.typepad.com/aws/atom.xml"
|
29
37
|
end
|
@@ -0,0 +1,32 @@
|
|
1
|
+
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
|
+
|
3
|
+
describe Feedzirra::Parser::RSSEntry::MRSSContent do
|
4
|
+
before(:each) do
|
5
|
+
# I don't really like doing it this way because these unit test should only rely on RSSEntry,
|
6
|
+
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
|
+
@entries = Feedzirra::Parser::RSS.parse(sample_mrss_feed).entries
|
8
|
+
end
|
9
|
+
|
10
|
+
it "should parse the media" do
|
11
|
+
entry = @entries.first
|
12
|
+
entry.media_content.size.should == 1
|
13
|
+
entry.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
|
14
|
+
entry.media_thumbnail.should == 'http://3.gvt0.com/vi/Y3rNEu4A8WM/default.jpg'
|
15
|
+
entry.media_thumbnail_width.should == '320'
|
16
|
+
entry.media_thumbnail_height.should == '240'
|
17
|
+
end
|
18
|
+
|
19
|
+
it "should handle multiple pieces of content" do
|
20
|
+
media = @entries[1].media_content
|
21
|
+
media.size.should == 2
|
22
|
+
media[0].url.should == 'http://www.youtube.com/v/pvaM6sjLbuA&fs=1'
|
23
|
+
media[0].content_type.should == 'application/x-shockwave-flash'
|
24
|
+
media[0].medium.should == 'video'
|
25
|
+
media[0].duration.should == '575'
|
26
|
+
|
27
|
+
media[1].url.should == 'http://www.youtube.com/v/pvaM6sjLbuA&fs=2'
|
28
|
+
media[1].content_type.should == 'video/mp4'
|
29
|
+
media[1].medium.should == 'video'
|
30
|
+
media[1].duration.should == '576'
|
31
|
+
end
|
32
|
+
end
|
@@ -1,32 +1,32 @@
|
|
1
1
|
require File.join(File.dirname(__FILE__), %w[.. .. spec_helper])
|
2
2
|
|
3
3
|
describe Feedzirra::Parser::RSSEntry do
|
4
|
-
before
|
4
|
+
before do
|
5
5
|
# I don't really like doing it this way because these unit test should only rely on RSSEntry,
|
6
6
|
# but this is actually how it should work. You would never just pass entry xml straight to the AtomEnry
|
7
7
|
@entry = Feedzirra::Parser::RSS.parse(sample_rss_feed).entries.first
|
8
8
|
end
|
9
|
-
|
9
|
+
|
10
10
|
it "should parse the title" do
|
11
11
|
@entry.title.should == "Nokogiri’s Slop Feature"
|
12
12
|
end
|
13
|
-
|
13
|
+
|
14
14
|
it "should parse the url" do
|
15
15
|
@entry.url.should == "http://tenderlovemaking.com/2008/12/04/nokogiris-slop-feature/"
|
16
16
|
end
|
17
|
-
|
17
|
+
|
18
18
|
it "should parse the author" do
|
19
19
|
@entry.author.should == "Aaron Patterson"
|
20
20
|
end
|
21
|
-
|
21
|
+
|
22
22
|
it "should parse the content" do
|
23
23
|
@entry.content.should == sample_rss_entry_content
|
24
24
|
end
|
25
|
-
|
25
|
+
|
26
26
|
it "should provide a summary" do
|
27
27
|
@entry.summary.should == "Oops! When I released nokogiri version 1.0.7, I totally forgot to talk about Nokogiri::Slop() feature that was added. Why is it called \"slop\"? It lets you sloppily explore documents. Basically, it decorates your document with method_missing() that allows you to search your document via method calls.\nGiven this document:\n\ndoc = Nokogiri::Slop(<<-eohtml)\n<html>\n  <body>\n  [...]"
|
28
28
|
end
|
29
|
-
|
29
|
+
|
30
30
|
it "should parse the published date" do
|
31
31
|
@entry.published.to_s.should == "Thu Dec 04 17:17:49 UTC 2008"
|
32
32
|
end
|
@@ -34,8 +34,121 @@ describe Feedzirra::Parser::RSSEntry do
|
|
34
34
|
it "should parse the categories" do
|
35
35
|
@entry.categories.should == ['computadora', 'nokogiri', 'rails']
|
36
36
|
end
|
37
|
-
|
37
|
+
|
38
38
|
it "should parse the guid as id" do
|
39
39
|
@entry.id.should == "http://tenderlovemaking.com/?p=198"
|
40
40
|
end
|
41
|
+
|
42
|
+
describe "parsing an iTunes feed" do
|
43
|
+
before do
|
44
|
+
@item = Feedzirra::Parser::RSS.parse(sample_itunes_feed).entries.first
|
45
|
+
end
|
46
|
+
|
47
|
+
it "should parse the title" do
|
48
|
+
@item.title.should == "Shake Shake Shake Your Spices"
|
49
|
+
end
|
50
|
+
|
51
|
+
it "should parse the author" do
|
52
|
+
@item.author.should == "John Doe"
|
53
|
+
end
|
54
|
+
|
55
|
+
it "should parse the subtitle" do
|
56
|
+
@item.subtitle.should == "A short primer on table spices"
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should parse the summary" do
|
60
|
+
@item.summary.should == "This week we talk about salt and pepper shakers, comparing and contrasting pour rates, construction materials, and overall aesthetics. Come and join the party!"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should parse the enclosure" do
|
64
|
+
@item.enclosure_length.should == "8727310"
|
65
|
+
@item.enclosure_type.should == "audio/x-m4a"
|
66
|
+
@item.enclosure_url.should == "http://example.com/podcasts/everything/AllAboutEverythingEpisode3.m4a"
|
67
|
+
end
|
68
|
+
|
69
|
+
it "should parse the id" do
|
70
|
+
@item.id.should == "http://example.com/podcasts/archive/aae20050615.m4a"
|
71
|
+
end
|
72
|
+
|
73
|
+
it "should parse the published date" do
|
74
|
+
@item.published.should == Time.parse('Wed Jun 15 19:00:00 UTC 2005')
|
75
|
+
end
|
76
|
+
|
77
|
+
it "should parse the duration" do
|
78
|
+
@item.duration.should == "7:04"
|
79
|
+
end
|
80
|
+
|
81
|
+
it "should parse the keywords" do
|
82
|
+
@item.keywords.should == "salt, pepper, shaker, exciting"
|
83
|
+
end
|
84
|
+
end
|
85
|
+
|
86
|
+
describe "parsing Media RSS" do
|
87
|
+
before do
|
88
|
+
@item = Feedzirra::Parser::RSS.parse(sample_mrss_feed).entries.first
|
89
|
+
end
|
90
|
+
|
91
|
+
it "should parse media:rating" do
|
92
|
+
@item.rating.should == 'adult'
|
93
|
+
@item.rating_scheme.should == 'urn:simple'
|
94
|
+
end
|
95
|
+
|
96
|
+
it "should parse media:title" do
|
97
|
+
@item.media_title.should == 'The Montauk Monster-Hells Visits New York!'
|
98
|
+
end
|
99
|
+
|
100
|
+
it "should parse media:description" do
|
101
|
+
@item.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
|
102
|
+
end
|
103
|
+
|
104
|
+
it "should parse media:keywords" do
|
105
|
+
@item.media_keywords.should == 'kitty, cat, big dog, yarn, fluffy'
|
106
|
+
end
|
107
|
+
|
108
|
+
it "should parse media:tumbnail" do
|
109
|
+
@item.media_content.size.should == 1
|
110
|
+
@item.media_description.should == 'The story began with a July 23 article in a local newspaper, The Independent. Jenna Hewitt, 26, of Montauk, and three friends said they found the ...'
|
111
|
+
@item.media_thumbnail.should == 'http://3.gvt0.com/vi/Y3rNEu4A8WM/default.jpg'
|
112
|
+
@item.media_thumbnail_width.should == '320'
|
113
|
+
@item.media_thumbnail_height.should == '240'
|
114
|
+
end
|
115
|
+
|
116
|
+
it "should parse media:category" do
|
117
|
+
@item.media_category.should == 'Arts/Movies/Titles/A/Ace_Ventura_Series/Ace_Ventura_-_Pet_Detective'
|
118
|
+
@item.media_category_scheme.should == 'http://dmoz.org'
|
119
|
+
@item.media_category_label.should == 'Ace Ventura - Pet Detective'
|
120
|
+
end
|
121
|
+
|
122
|
+
it "should parse media:hash" do
|
123
|
+
@item.media_hash.should == 'dfdec888b72151965a34b4b59031290a'
|
124
|
+
@item.media_hash_algo.should == 'md5'
|
125
|
+
end
|
126
|
+
|
127
|
+
it "should parse media:player" do
|
128
|
+
@item.media_player_url.should == 'http://www.example.com/player?id=1111'
|
129
|
+
@item.media_player_width.should == '400'
|
130
|
+
@item.media_player_height.should == '200'
|
131
|
+
end
|
132
|
+
|
133
|
+
it "should parse media:credit" do
|
134
|
+
@item.credits.size.should == 2
|
135
|
+
@item.credits.first.role.should == 'producer'
|
136
|
+
@item.credits.first.scheme.should == 'urn:ebu'
|
137
|
+
pending 'not sure why the name isn\'t getting set'
|
138
|
+
@item.credits.first.name.should == 'John Doe'
|
139
|
+
end
|
140
|
+
|
141
|
+
it "should parse media:copyright" do
|
142
|
+
@item.copyright.should == '2009 Example Co.'
|
143
|
+
@item.copyright_url.should == 'http://example.com/copyright.html'
|
144
|
+
end
|
145
|
+
|
146
|
+
it "should parse media:restriction" do
|
147
|
+
pending 'need to figure out why this is getting String'
|
148
|
+
@item.media_restriction.type.should == 'MRSSRestriction'
|
149
|
+
@item.media_restriction.value.should == 'au us'
|
150
|
+
@item.media_restriction.scope.should == 'country'
|
151
|
+
@item.media_restriction.relationship.should == 'allow'
|
152
|
+
end
|
153
|
+
end
|
41
154
|
end
|
@@ -6,36 +6,88 @@ describe Feedzirra::Parser::RSS do
|
|
6
6
|
Feedzirra::Parser::RSS.should be_able_to_parse(sample_rss_feed)
|
7
7
|
end
|
8
8
|
|
9
|
-
|
10
|
-
|
11
|
-
|
12
|
-
|
13
|
-
|
9
|
+
it "should return true for an rdf feed" do
|
10
|
+
Feedzirra::Parser::RSS.should be_able_to_parse(sample_rdf_feed)
|
11
|
+
end
|
12
|
+
|
13
|
+
it "should return true for an iTunes feed" do
|
14
|
+
Feedzirra::Parser::RSS.should be_able_to_parse(sample_itunes_feed)
|
15
|
+
end
|
16
|
+
|
14
17
|
it "should return fase for an atom feed" do
|
15
18
|
Feedzirra::Parser::RSS.should_not be_able_to_parse(sample_atom_feed)
|
16
19
|
end
|
17
20
|
end
|
18
21
|
|
19
22
|
describe "parsing" do
|
20
|
-
before
|
21
|
-
@feed = Feedzirra::Parser::RSS.parse(
|
23
|
+
before do
|
24
|
+
@feed = Feedzirra::Parser::RSS.parse(sample_mrss_feed)
|
22
25
|
end
|
23
|
-
|
26
|
+
|
24
27
|
it "should parse the title" do
|
25
|
-
@feed.title.should == "
|
28
|
+
@feed.title.should == "Google Video - Hot videos"
|
26
29
|
end
|
27
|
-
|
30
|
+
|
28
31
|
it "should parse the url" do
|
29
|
-
@feed.url.should == "http://
|
32
|
+
@feed.url.should == "http://video.google.com/"
|
30
33
|
end
|
31
|
-
|
34
|
+
|
32
35
|
it "should provide an accessor for the feed_url" do
|
33
36
|
@feed.respond_to?(:feed_url).should == true
|
34
37
|
@feed.respond_to?(:feed_url=).should == true
|
35
38
|
end
|
36
|
-
|
39
|
+
|
37
40
|
it "should parse entries" do
|
38
|
-
@feed.entries.size.should ==
|
41
|
+
@feed.entries.size.should == 20
|
42
|
+
end
|
43
|
+
|
44
|
+
it "should parse the image" do
|
45
|
+
pending 'setting NilClass for some reason'
|
46
|
+
@feed.image.class.should == 'RSSImage'
|
47
|
+
@feed.image.title.should == 'Google Video - Hot videos'
|
48
|
+
@feed.image.link.should == 'http://video.google.com/'
|
49
|
+
@feed.image.url.should == 'http://video.google.com/common/google_logo_small.jpg'
|
50
|
+
@feed.image.width.should == '100'
|
51
|
+
@feed.image.height.should == '37'
|
52
|
+
end
|
53
|
+
|
54
|
+
describe "parsing an iTunes feed" do
|
55
|
+
before do
|
56
|
+
@feed = Feedzirra::Parser::RSS.parse(sample_itunes_feed)
|
57
|
+
end
|
58
|
+
|
59
|
+
it "should parse an image" do
|
60
|
+
@feed.image.should == "http://example.com/podcasts/everything/AllAboutEverything.jpg"
|
61
|
+
end
|
62
|
+
|
63
|
+
it "should parse categories" do
|
64
|
+
@feed.categories.size == 2
|
65
|
+
@feed.categories[0].should == "Technology"
|
66
|
+
@feed.categories[1].should == "Gadgets"
|
67
|
+
@feed.categories[2].should == "TV & Film"
|
68
|
+
|
69
|
+
# @feed.categories[0].name.should == "Technology"
|
70
|
+
# @feed.categories[0].sub_categories.size.should == 1
|
71
|
+
# @feed.categories[0].sub_categories[0].should == "Gadgets"
|
72
|
+
# @feed.categories[1].name.should == "TV & Film"
|
73
|
+
# @feed.categories[1].sub_categories.size.should == 0
|
74
|
+
end
|
75
|
+
|
76
|
+
it "should parse the summary" do
|
77
|
+
@feed.summary.should == "All About Everything is a show about everything. Each week we dive into any subject known to man and talk about it as much as we can. Look for our Podcast in the iTunes Music Store"
|
78
|
+
end
|
79
|
+
|
80
|
+
it "should parse entries" do
|
81
|
+
@feed.entries.size.should == 4
|
82
|
+
end
|
83
|
+
|
84
|
+
it "should parse the owner name" do
|
85
|
+
@feed.owner_name.should == 'John Doe'
|
86
|
+
end
|
87
|
+
|
88
|
+
it "should parse the owner email" do
|
89
|
+
@feed.owner_email.should == 'john.doe@example.com'
|
90
|
+
end
|
39
91
|
end
|
40
92
|
end
|
41
93
|
end
|
@@ -0,0 +1,20 @@
|
|
1
|
+
require 'rubygems'
|
2
|
+
require File.dirname(__FILE__) + "/../../lib/feedzirra.rb"
|
3
|
+
|
4
|
+
feed_urls = File.readlines(File.dirname(__FILE__) + "/top5kfeeds.dat").collect {|line| line.split.first}
|
5
|
+
|
6
|
+
success = lambda do |url, feed|
|
7
|
+
puts "SUCCESS - #{feed.title} - #{url}"
|
8
|
+
end
|
9
|
+
|
10
|
+
failed_feeds = []
|
11
|
+
failure = lambda do |url, response_code, header, body|
|
12
|
+
failed_feeds << url if response_code == 200
|
13
|
+
puts "*********** FAILED with #{response_code} on #{url}"
|
14
|
+
end
|
15
|
+
|
16
|
+
Feedzirra::Feed.fetch_and_parse(feed_urls, :on_success => success, :on_failure => failure)
|
17
|
+
|
18
|
+
File.open("./failed_urls.txt", "w") do |f|
|
19
|
+
f.write failed_feeds.join("\n")
|
20
|
+
end
|
data/spec/spec_helper.rb
CHANGED
@@ -57,6 +57,6 @@ def sample_wfw_feed
|
|
57
57
|
load_sample("PaulDixExplainsNothingWFW.xml")
|
58
58
|
end
|
59
59
|
|
60
|
-
def
|
61
|
-
load_sample("
|
62
|
-
end
|
60
|
+
def sample_mrss_feed
|
61
|
+
load_sample("GoogleVideoSample.xml")
|
62
|
+
end
|