rubylibre-feedzirra 0.0.14 → 0.0.23

Sign up to get free protection for your applications and to get access to all the features.
Files changed (43) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +9 -0
  3. data/lib/feedzirra/feed.rb +32 -37
  4. data/lib/feedzirra/parser/atom.rb +9 -0
  5. data/lib/feedzirra/parser/atom_entry.rb +6 -0
  6. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +1 -1
  7. data/lib/feedzirra/parser/itunes_category.rb +12 -0
  8. data/lib/feedzirra/parser/mrss_category.rb +11 -0
  9. data/lib/feedzirra/parser/mrss_content.rb +48 -0
  10. data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
  11. data/lib/feedzirra/parser/mrss_credit.rb +11 -0
  12. data/lib/feedzirra/parser/mrss_group.rb +37 -0
  13. data/lib/feedzirra/parser/mrss_hash.rb +10 -0
  14. data/lib/feedzirra/parser/mrss_player.rb +11 -0
  15. data/lib/feedzirra/parser/mrss_rating.rb +10 -0
  16. data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
  17. data/lib/feedzirra/parser/mrss_text.rb +13 -0
  18. data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
  19. data/lib/feedzirra/parser/rss.rb +64 -9
  20. data/lib/feedzirra/parser/rss_entry.rb +54 -14
  21. data/lib/feedzirra/parser/rss_image.rb +15 -0
  22. data/lib/feedzirra.rb +17 -5
  23. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  24. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  25. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  26. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  27. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  28. data/spec/feedzirra/feed_spec.rb +35 -53
  29. data/spec/feedzirra/parser/atom_entry_spec.rb +4 -0
  30. data/spec/feedzirra/parser/atom_spec.rb +8 -0
  31. data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
  32. data/spec/feedzirra/parser/rss_entry_spec.rb +121 -8
  33. data/spec/feedzirra/parser/rss_spec.rb +66 -14
  34. data/spec/sample_feeds/run_against_sample.rb +20 -0
  35. data/spec/spec_helper.rb +3 -3
  36. metadata +37 -22
  37. data/lib/feedzirra/parser/itunes_rss.rb +0 -50
  38. data/lib/feedzirra/parser/itunes_rss_item.rb +0 -31
  39. data/lib/feedzirra/parser/itunes_rss_owner.rb +0 -12
  40. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +0 -48
  41. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +0 -18
  42. data/spec/feedzirra/parser/itunes_rss_spec.rb +0 -50
  43. data/spec/spec.opts +0 -2
@@ -1,28 +1,83 @@
1
+ require File.dirname(__FILE__) + '/mrss_credit'
2
+ require File.dirname(__FILE__) + '/mrss_restriction'
3
+ require File.dirname(__FILE__) + '/mrss_category'
4
+ require File.dirname(__FILE__) + '/mrss_copyright'
5
+ require File.dirname(__FILE__) + '/mrss_hash'
6
+ require File.dirname(__FILE__) + '/mrss_player'
7
+ require File.dirname(__FILE__) + '/mrss_rating'
8
+ require File.dirname(__FILE__) + '/mrss_restriction'
9
+ require File.dirname(__FILE__) + '/mrss_text'
10
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
11
+
1
12
  module Feedzirra
2
-
3
13
  module Parser
4
14
  # == Summary
5
15
  # Parser for dealing with RSS feeds.
6
16
  #
7
- # == Attributes
8
- # * title
9
- # * feed_url
10
- # * url
11
- # * entries
12
17
  class RSS
13
18
  include SAXMachine
14
19
  include FeedUtilities
20
+
21
+ attr_accessor :feed_url
22
+
23
+ # RSS 2.0 required elements
15
24
  element :title
16
25
  element :link, :as => :url
26
+ element :description
17
27
  elements :item, :as => :entries, :class => RSSEntry
18
28
 
19
- attr_accessor :feed_url
29
+ # RSS 2.0 optional elements
30
+ element :language
31
+ element :copyright
32
+ element :managingEditor
33
+ element :webMaster
34
+ element :pubDate
35
+ element :lastBuildDate
36
+ element :category
37
+ element :generator
38
+ element :docs
39
+ element :cloud
40
+ element :ttl
41
+ element :image, :class => RSSImage
42
+ element :rating
43
+ element :textInput
44
+ element :skipHours
45
+ element :skipDays
46
+
47
+ # iTunes
48
+ element :'itunes:author', :as => :author
49
+ element :'itunes:block', :as => :itunes_block
50
+ element :'itunes:image', :as => :image, :value => :href
51
+ element :'itunes:explicit', :as => :explicit
52
+ element :'itunes:keywords', :as => :keywords
53
+ element :'itunes:new-feed-url', :as => :feed_url
54
+ element :'itunes:name', :as => :owner_name
55
+ element :'itunes:email', :as => :owner_email
56
+ element :'itunes:subtitle', :as => :subtitle
57
+ element :'itunes:summary', :as => :summary
58
+
59
+ elements :'itunes:category', :as => :categories, :value => :text
60
+ # elements :'itunes:category', :as => :itunes_categories,
61
+ # :class => ITunesCategory
62
+
63
+ # MediaRSS support
64
+ element :'media:title', :as => :media_title
65
+ element :'media:keywords', :as => :media_keywords
66
+ element :'media:description', :as => :media_description
67
+
68
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
69
+ element :'media:rating', :as => :rating, :class => MRSSRating
70
+ element :'media:category', :as => :media_category, :class => MRSSCategory
71
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
72
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
73
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
74
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
75
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
76
+ element :'media:text', :as => :text, :class => MRSSText
20
77
 
21
78
  def self.able_to_parse?(xml) #:nodoc:
22
79
  xml =~ /\<rss|rdf/
23
80
  end
24
81
  end
25
-
26
82
  end
27
-
28
83
  end
@@ -1,5 +1,17 @@
1
+ require File.dirname(__FILE__) + '/mrss_content'
2
+ require File.dirname(__FILE__) + '/mrss_credit'
3
+ require File.dirname(__FILE__) + '/mrss_restriction'
4
+ require File.dirname(__FILE__) + '/mrss_group'
5
+ require File.dirname(__FILE__) + '/mrss_category'
6
+ require File.dirname(__FILE__) + '/mrss_copyright'
7
+ require File.dirname(__FILE__) + '/mrss_hash'
8
+ require File.dirname(__FILE__) + '/mrss_player'
9
+ require File.dirname(__FILE__) + '/mrss_rating'
10
+ require File.dirname(__FILE__) + '/mrss_restriction'
11
+ require File.dirname(__FILE__) + '/mrss_text'
12
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
13
+
1
14
  module Feedzirra
2
-
3
15
  module Parser
4
16
  # == Summary
5
17
  # Parser for dealing with RDF feed entries.
@@ -15,28 +27,56 @@ module Feedzirra
15
27
  class RSSEntry
16
28
  include SAXMachine
17
29
  include FeedEntryUtilities
30
+
31
+ # RSS 2.0 elements
18
32
  element :title
19
33
  element :link, :as => :url
20
-
21
- element :"dc:creator", :as => :author
22
- element :author, :as => :author
23
- element :"content:encoded", :as => :content
24
34
  element :description, :as => :summary
25
-
35
+ element :author
36
+ elements :category, :as => :categories
37
+ element :comments
38
+ element :guid, :as => :id
26
39
  element :pubDate, :as => :published
27
- element :pubdate, :as => :published
40
+ element :source
41
+ element :enclosure, :value => :length, :as => :enclosure_length
42
+ element :enclosure, :value => :type, :as => :enclosure_type
43
+ element :enclosure, :value => :url, :as => :enclosure_url
44
+
45
+
46
+ # RDF elements
28
47
  element :"dc:date", :as => :published
29
48
  element :"dc:Date", :as => :published
30
49
  element :"dcterms:created", :as => :published
50
+ element :issued, :as => :published
51
+ element :"content:encoded", :as => :content
52
+ element :"dc:creator", :as => :author
53
+ element :"dcterms:modified", :as => :updated
31
54
 
55
+ # MediaRSS support, optional elements
56
+ element :'media:title', :as => :media_title
57
+ element :'media:keywords', :as => :media_keywords
58
+ element :'media:description', :as => :media_description
32
59
 
33
- element :"dcterms:modified", :as => :updated
34
- element :issued, :as => :published
35
- elements :category, :as => :categories
60
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
61
+ element :'media:rating', :as => :rating, :class => MRSSRating
62
+ element :'media:category', :as => :media_category, :class => MRSSCategory
63
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
64
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
65
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
66
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
67
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
68
+ element :'media:text', :as => :text, :class => MRSSText
69
+ elements :'media:content', :as => :media_content, :class => MRSSContent
70
+ elements :'media:group', :as => :media_groups, :class => MRSSGroup
36
71
 
37
- element :guid, :as => :id
72
+ # iTunes
73
+ element :'itunes:author', :as => :author
74
+ element :'itunes:block', :as => :itunes_block
75
+ element :'itunes:duration', :as => :duration
76
+ element :'itunes:explicit', :as => :explicit
77
+ element :'itunes:keywords', :as => :keywords
78
+ element :'itunes:subtitle', :as => :subtitle
79
+ element :'itunes:summary', :as => :summary
38
80
  end
39
-
40
81
  end
41
-
42
- end
82
+ end
@@ -0,0 +1,15 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class RSS
4
+ class RSSImage
5
+ include SAXMachine
6
+
7
+ element :title
8
+ element :link
9
+ element :url
10
+ element :width
11
+ element :height
12
+ end
13
+ end
14
+ end
15
+ end
data/lib/feedzirra.rb CHANGED
@@ -11,6 +11,7 @@ require 'active_support/basic_object'
11
11
  require 'active_support/core_ext/object'
12
12
  require 'active_support/core_ext/time'
13
13
 
14
+
14
15
  require 'core_ext/date'
15
16
  require 'core_ext/string'
16
17
 
@@ -18,17 +19,28 @@ require 'feedzirra/feed_utilities'
18
19
  require 'feedzirra/feed_entry_utilities'
19
20
  require 'feedzirra/feed'
20
21
 
22
+ require 'feedzirra/parser/mrss_content'
23
+ require 'feedzirra/parser/mrss_credit'
24
+ require 'feedzirra/parser/mrss_restriction'
25
+ require 'feedzirra/parser/mrss_group'
26
+ require 'feedzirra/parser/mrss_category'
27
+ require 'feedzirra/parser/mrss_copyright'
28
+ require 'feedzirra/parser/mrss_hash'
29
+ require 'feedzirra/parser/mrss_player'
30
+ require 'feedzirra/parser/mrss_rating'
31
+ require 'feedzirra/parser/mrss_restriction'
32
+ require 'feedzirra/parser/mrss_text'
33
+ require 'feedzirra/parser/mrss_thumbnail'
21
34
  require 'feedzirra/parser/rss_entry'
22
- require 'feedzirra/parser/itunes_rss_owner'
23
- require 'feedzirra/parser/itunes_rss_item'
35
+ require 'feedzirra/parser/rss_image'
36
+ require 'feedzirra/parser/itunes_category'
24
37
  require 'feedzirra/parser/atom_entry'
25
38
  require 'feedzirra/parser/atom_feed_burner_entry'
26
39
 
27
40
  require 'feedzirra/parser/rss'
28
- require 'feedzirra/parser/itunes_rss'
29
41
  require 'feedzirra/parser/atom'
30
42
  require 'feedzirra/parser/atom_feed_burner'
31
43
 
32
44
  module Feedzirra
33
- VERSION = "0.0.14"
34
- end
45
+ VERSION = "0.0.21"
46
+ end
@@ -0,0 +1,98 @@
1
+ # this is some spike code to compare the speed of different methods for performing
2
+ # multiple feed fetches
3
+ require 'rubygems'
4
+ require 'curb'
5
+ require 'activesupport'
6
+
7
+ require 'net/http'
8
+ require 'uri'
9
+
10
+ require 'benchmark'
11
+ include Benchmark
12
+
13
+ GET_COUNT = 1
14
+ urls = ["http://www.pauldix.net"] * GET_COUNT
15
+
16
+
17
+ benchmark do |t|
18
+ t.report("taf2-curb") do
19
+ multi = Curl::Multi.new
20
+ urls.each do |url|
21
+ easy = Curl::Easy.new(url) do |curl|
22
+ curl.headers["User-Agent"] = "feedzirra"
23
+ # curl.headers["If-Modified-Since"] = Time.now.httpdate
24
+ # curl.headers["If-None-Match"] = "ziEyTl4q9GH04BR4jgkImd0GvSE"
25
+ curl.follow_location = true
26
+ curl.on_success do |c|
27
+ # puts c.header_str.inspect
28
+ # puts c.response_code
29
+ # puts c.body_str.slice(0, 500)
30
+ end
31
+ curl.on_failure do |c|
32
+ puts "**** #{c.response_code}"
33
+ end
34
+ end
35
+ multi.add(easy)
36
+ end
37
+
38
+ multi.perform
39
+ end
40
+
41
+ t.report("nethttp") do
42
+ urls.each do |url|
43
+ res = Net::HTTP.get(URI.parse(url))
44
+ # puts res.slice(0, 500)
45
+ end
46
+ end
47
+
48
+ require 'rfuzz/session'
49
+ include RFuzz
50
+ t.report("rfuzz") do
51
+ GET_COUNT.times do
52
+ http = HttpClient.new("www.pauldix.net", 80)
53
+ response = http.get("/")
54
+ if response.http_status != "200"
55
+ puts "***** #{response.http_status}"
56
+ else
57
+ # puts response.http_status
58
+ # puts response.http_body.slice(0, 500)
59
+ end
60
+ end
61
+ end
62
+
63
+ require 'eventmachine'
64
+ t.report("eventmachine") do
65
+ counter = GET_COUNT
66
+ EM.run do
67
+ GET_COUNT.times do
68
+ http = EM::Protocols::HttpClient2.connect("www.pauldix.net", 80)
69
+ request = http.get("/")
70
+ request.callback do
71
+ # puts request.status
72
+ # puts request.content.slice(0, 500)
73
+ counter -= 1
74
+ EM.stop if counter == 0
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+
81
+ require 'curl-multi'
82
+ t.report("curl multi") do
83
+ multi = Curl::Multi.new
84
+ urls.each do |url|
85
+ on_failure = lambda do |ex|
86
+ puts "****** Failed to retrieve #{url}"
87
+ end
88
+
89
+ on_success = lambda do |body|
90
+ # puts "got #{url}"
91
+ # puts body.slice(0, 500)
92
+ end
93
+ multi.get(url, on_success, on_failure)
94
+ end
95
+
96
+ multi.select([], []) while multi.size > 0
97
+ end
98
+ end
@@ -0,0 +1,40 @@
1
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
2
+ require 'rfeedparser'
3
+ require 'feed-normalizer'
4
+ require 'open-uri'
5
+
6
+ require 'benchmark'
7
+ include Benchmark
8
+
9
+ iterations = 10
10
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
11
+ puts "benchmarks on #{urls.size} feeds"
12
+ puts "************************************"
13
+ benchmark do |t|
14
+ t.report("feedzirra") do
15
+ iterations.times do
16
+ Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
17
+ end
18
+ end
19
+
20
+ t.report("rfeedparser") do
21
+ iterations.times do
22
+ urls.each do |url|
23
+ feed = FeedParser.parse(url)
24
+ $stdout.print '.'
25
+ $stdout.flush
26
+ end
27
+ end
28
+ end
29
+
30
+ t.report("feed-normalizer") do
31
+ iterations.times do
32
+ urls.each do |url|
33
+ # have to use the :force option to make feed-normalizer parse an atom feed
34
+ feed = FeedNormalizer::FeedNormalizer.parse(open(url), :force_parser => FeedNormalizer::SimpleRssParser)
35
+ $stdout.print '.'
36
+ $stdout.flush
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
3
+
4
+ require 'open-uri'
5
+
6
+ require 'benchmark'
7
+ include Benchmark
8
+
9
+ iterations = 10
10
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
11
+ puts "benchmarks on #{urls.size} feeds"
12
+ puts "************************************"
13
+ benchmark do |t|
14
+ t.report("feedzirra open uri") do
15
+ iterations.times do
16
+ urls.each do |url|
17
+ Feedzirra::Feed.parse(open(url, "User-Agent" => "feedzirra http://github.com/pauldix/feedzirra/tree/master").read)
18
+ $stdout.print '.'; $stdout.flush
19
+ end
20
+ end
21
+ end
22
+
23
+ t.report("feedzirra fetch and parse") do
24
+ iterations.times do
25
+ Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
2
+ require 'rfeedparser'
3
+ require 'feed-normalizer'
4
+
5
+ require 'benchmark'
6
+ include Benchmark
7
+
8
+ iterations = 50
9
+ xml = File.read(File.dirname(__FILE__) + '/../sample_feeds/PaulDixExplainsNothing.xml')
10
+
11
+ benchmark do |t|
12
+ t.report("feedzirra") do
13
+ iterations.times do
14
+ Feedzirra::Feed.parse(xml)
15
+ end
16
+ end
17
+
18
+ t.report("rfeedparser") do
19
+ iterations.times do
20
+ FeedParser.parse(xml)
21
+ end
22
+ end
23
+
24
+ t.report("feed-normalizer") do
25
+ iterations.times do
26
+ # have to use the :force option to make feed-normalizer parse an atom feed
27
+ FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
3
+
4
+ require 'benchmark'
5
+ include Benchmark
6
+
7
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt")
8
+ puts "benchmarks on #{urls.size} feeds"
9
+ puts "************************************"
10
+ benchmark do |t|
11
+ feeds = {}
12
+ t.report("feedzirra fetch and parse") do
13
+ feeds = Feedzirra::Feed.fetch_and_parse(urls,
14
+ :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush },
15
+ :on_failure => lambda {|url, response_code, header, body| puts "#{response_code} ERROR on #{url}"})
16
+ end
17
+
18
+ # curb caches the dns lookups for 60 seconds. to make things fair we have to wait for the cache to expire
19
+ puts "sleeping to wait for dns cache to clear"
20
+ 65.times {$stdout.print('.'); sleep(1)}
21
+ puts "done"
22
+
23
+ updated_feeds = []
24
+ t.report("feedzirra update") do
25
+ updated_feeds = Feedzirra::Feed.update(feeds.values.reject {|f| f.class == Fixnum},
26
+ :on_success => lambda {|feed| $stdout.print '.'; $stdout.flush},
27
+ :on_failure => lambda {|feed, response_code, header, body| puts "#{response_code} ERROR on #{feed.feed_url}"})
28
+ end
29
+
30
+ updated_feeds.each do |feed|
31
+ puts feed.feed_url if feed.updated?
32
+ end
33
+ end