rubylibre-feedzirra 0.0.14 → 0.0.23

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (43) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +9 -0
  3. data/lib/feedzirra/feed.rb +32 -37
  4. data/lib/feedzirra/parser/atom.rb +9 -0
  5. data/lib/feedzirra/parser/atom_entry.rb +6 -0
  6. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +1 -1
  7. data/lib/feedzirra/parser/itunes_category.rb +12 -0
  8. data/lib/feedzirra/parser/mrss_category.rb +11 -0
  9. data/lib/feedzirra/parser/mrss_content.rb +48 -0
  10. data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
  11. data/lib/feedzirra/parser/mrss_credit.rb +11 -0
  12. data/lib/feedzirra/parser/mrss_group.rb +37 -0
  13. data/lib/feedzirra/parser/mrss_hash.rb +10 -0
  14. data/lib/feedzirra/parser/mrss_player.rb +11 -0
  15. data/lib/feedzirra/parser/mrss_rating.rb +10 -0
  16. data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
  17. data/lib/feedzirra/parser/mrss_text.rb +13 -0
  18. data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
  19. data/lib/feedzirra/parser/rss.rb +64 -9
  20. data/lib/feedzirra/parser/rss_entry.rb +54 -14
  21. data/lib/feedzirra/parser/rss_image.rb +15 -0
  22. data/lib/feedzirra.rb +17 -5
  23. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  24. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  25. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  26. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  27. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  28. data/spec/feedzirra/feed_spec.rb +35 -53
  29. data/spec/feedzirra/parser/atom_entry_spec.rb +4 -0
  30. data/spec/feedzirra/parser/atom_spec.rb +8 -0
  31. data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
  32. data/spec/feedzirra/parser/rss_entry_spec.rb +121 -8
  33. data/spec/feedzirra/parser/rss_spec.rb +66 -14
  34. data/spec/sample_feeds/run_against_sample.rb +20 -0
  35. data/spec/spec_helper.rb +3 -3
  36. metadata +37 -22
  37. data/lib/feedzirra/parser/itunes_rss.rb +0 -50
  38. data/lib/feedzirra/parser/itunes_rss_item.rb +0 -31
  39. data/lib/feedzirra/parser/itunes_rss_owner.rb +0 -12
  40. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +0 -48
  41. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +0 -18
  42. data/spec/feedzirra/parser/itunes_rss_spec.rb +0 -50
  43. data/spec/spec.opts +0 -2
@@ -1,28 +1,83 @@
1
+ require File.dirname(__FILE__) + '/mrss_credit'
2
+ require File.dirname(__FILE__) + '/mrss_restriction'
3
+ require File.dirname(__FILE__) + '/mrss_category'
4
+ require File.dirname(__FILE__) + '/mrss_copyright'
5
+ require File.dirname(__FILE__) + '/mrss_hash'
6
+ require File.dirname(__FILE__) + '/mrss_player'
7
+ require File.dirname(__FILE__) + '/mrss_rating'
8
+ require File.dirname(__FILE__) + '/mrss_restriction'
9
+ require File.dirname(__FILE__) + '/mrss_text'
10
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
11
+
1
12
  module Feedzirra
2
-
3
13
  module Parser
4
14
  # == Summary
5
15
  # Parser for dealing with RSS feeds.
6
16
  #
7
- # == Attributes
8
- # * title
9
- # * feed_url
10
- # * url
11
- # * entries
12
17
  class RSS
13
18
  include SAXMachine
14
19
  include FeedUtilities
20
+
21
+ attr_accessor :feed_url
22
+
23
+ # RSS 2.0 required elements
15
24
  element :title
16
25
  element :link, :as => :url
26
+ element :description
17
27
  elements :item, :as => :entries, :class => RSSEntry
18
28
 
19
- attr_accessor :feed_url
29
+ # RSS 2.0 optional elements
30
+ element :language
31
+ element :copyright
32
+ element :managingEditor
33
+ element :webMaster
34
+ element :pubDate
35
+ element :lastBuildDate
36
+ element :category
37
+ element :generator
38
+ element :docs
39
+ element :cloud
40
+ element :ttl
41
+ element :image, :class => RSSImage
42
+ element :rating
43
+ element :textInput
44
+ element :skipHours
45
+ element :skipDays
46
+
47
+ # iTunes
48
+ element :'itunes:author', :as => :author
49
+ element :'itunes:block', :as => :itunes_block
50
+ element :'itunes:image', :as => :image, :value => :href
51
+ element :'itunes:explicit', :as => :explicit
52
+ element :'itunes:keywords', :as => :keywords
53
+ element :'itunes:new-feed-url', :as => :feed_url
54
+ element :'itunes:name', :as => :owner_name
55
+ element :'itunes:email', :as => :owner_email
56
+ element :'itunes:subtitle', :as => :subtitle
57
+ element :'itunes:summary', :as => :summary
58
+
59
+ elements :'itunes:category', :as => :categories, :value => :text
60
+ # elements :'itunes:category', :as => :itunes_categories,
61
+ # :class => ITunesCategory
62
+
63
+ # MediaRSS support
64
+ element :'media:title', :as => :media_title
65
+ element :'media:keywords', :as => :media_keywords
66
+ element :'media:description', :as => :media_description
67
+
68
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
69
+ element :'media:rating', :as => :rating, :class => MRSSRating
70
+ element :'media:category', :as => :media_category, :class => MRSSCategory
71
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
72
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
73
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
74
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
75
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
76
+ element :'media:text', :as => :text, :class => MRSSText
20
77
 
21
78
  def self.able_to_parse?(xml) #:nodoc:
22
79
  xml =~ /\<rss|rdf/
23
80
  end
24
81
  end
25
-
26
82
  end
27
-
28
83
  end
@@ -1,5 +1,17 @@
1
+ require File.dirname(__FILE__) + '/mrss_content'
2
+ require File.dirname(__FILE__) + '/mrss_credit'
3
+ require File.dirname(__FILE__) + '/mrss_restriction'
4
+ require File.dirname(__FILE__) + '/mrss_group'
5
+ require File.dirname(__FILE__) + '/mrss_category'
6
+ require File.dirname(__FILE__) + '/mrss_copyright'
7
+ require File.dirname(__FILE__) + '/mrss_hash'
8
+ require File.dirname(__FILE__) + '/mrss_player'
9
+ require File.dirname(__FILE__) + '/mrss_rating'
10
+ require File.dirname(__FILE__) + '/mrss_restriction'
11
+ require File.dirname(__FILE__) + '/mrss_text'
12
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
13
+
1
14
  module Feedzirra
2
-
3
15
  module Parser
4
16
  # == Summary
5
17
  # Parser for dealing with RDF feed entries.
@@ -15,28 +27,56 @@ module Feedzirra
15
27
  class RSSEntry
16
28
  include SAXMachine
17
29
  include FeedEntryUtilities
30
+
31
+ # RSS 2.0 elements
18
32
  element :title
19
33
  element :link, :as => :url
20
-
21
- element :"dc:creator", :as => :author
22
- element :author, :as => :author
23
- element :"content:encoded", :as => :content
24
34
  element :description, :as => :summary
25
-
35
+ element :author
36
+ elements :category, :as => :categories
37
+ element :comments
38
+ element :guid, :as => :id
26
39
  element :pubDate, :as => :published
27
- element :pubdate, :as => :published
40
+ element :source
41
+ element :enclosure, :value => :length, :as => :enclosure_length
42
+ element :enclosure, :value => :type, :as => :enclosure_type
43
+ element :enclosure, :value => :url, :as => :enclosure_url
44
+
45
+
46
+ # RDF elements
28
47
  element :"dc:date", :as => :published
29
48
  element :"dc:Date", :as => :published
30
49
  element :"dcterms:created", :as => :published
50
+ element :issued, :as => :published
51
+ element :"content:encoded", :as => :content
52
+ element :"dc:creator", :as => :author
53
+ element :"dcterms:modified", :as => :updated
31
54
 
55
+ # MediaRSS support, optional elements
56
+ element :'media:title', :as => :media_title
57
+ element :'media:keywords', :as => :media_keywords
58
+ element :'media:description', :as => :media_description
32
59
 
33
- element :"dcterms:modified", :as => :updated
34
- element :issued, :as => :published
35
- elements :category, :as => :categories
60
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
61
+ element :'media:rating', :as => :rating, :class => MRSSRating
62
+ element :'media:category', :as => :media_category, :class => MRSSCategory
63
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
64
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
65
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
66
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
67
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
68
+ element :'media:text', :as => :text, :class => MRSSText
69
+ elements :'media:content', :as => :media_content, :class => MRSSContent
70
+ elements :'media:group', :as => :media_groups, :class => MRSSGroup
36
71
 
37
- element :guid, :as => :id
72
+ # iTunes
73
+ element :'itunes:author', :as => :author
74
+ element :'itunes:block', :as => :itunes_block
75
+ element :'itunes:duration', :as => :duration
76
+ element :'itunes:explicit', :as => :explicit
77
+ element :'itunes:keywords', :as => :keywords
78
+ element :'itunes:subtitle', :as => :subtitle
79
+ element :'itunes:summary', :as => :summary
38
80
  end
39
-
40
81
  end
41
-
42
- end
82
+ end
@@ -0,0 +1,15 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class RSS
4
+ class RSSImage
5
+ include SAXMachine
6
+
7
+ element :title
8
+ element :link
9
+ element :url
10
+ element :width
11
+ element :height
12
+ end
13
+ end
14
+ end
15
+ end
data/lib/feedzirra.rb CHANGED
@@ -11,6 +11,7 @@ require 'active_support/basic_object'
11
11
  require 'active_support/core_ext/object'
12
12
  require 'active_support/core_ext/time'
13
13
 
14
+
14
15
  require 'core_ext/date'
15
16
  require 'core_ext/string'
16
17
 
@@ -18,17 +19,28 @@ require 'feedzirra/feed_utilities'
18
19
  require 'feedzirra/feed_entry_utilities'
19
20
  require 'feedzirra/feed'
20
21
 
22
+ require 'feedzirra/parser/mrss_content'
23
+ require 'feedzirra/parser/mrss_credit'
24
+ require 'feedzirra/parser/mrss_restriction'
25
+ require 'feedzirra/parser/mrss_group'
26
+ require 'feedzirra/parser/mrss_category'
27
+ require 'feedzirra/parser/mrss_copyright'
28
+ require 'feedzirra/parser/mrss_hash'
29
+ require 'feedzirra/parser/mrss_player'
30
+ require 'feedzirra/parser/mrss_rating'
31
+ require 'feedzirra/parser/mrss_restriction'
32
+ require 'feedzirra/parser/mrss_text'
33
+ require 'feedzirra/parser/mrss_thumbnail'
21
34
  require 'feedzirra/parser/rss_entry'
22
- require 'feedzirra/parser/itunes_rss_owner'
23
- require 'feedzirra/parser/itunes_rss_item'
35
+ require 'feedzirra/parser/rss_image'
36
+ require 'feedzirra/parser/itunes_category'
24
37
  require 'feedzirra/parser/atom_entry'
25
38
  require 'feedzirra/parser/atom_feed_burner_entry'
26
39
 
27
40
  require 'feedzirra/parser/rss'
28
- require 'feedzirra/parser/itunes_rss'
29
41
  require 'feedzirra/parser/atom'
30
42
  require 'feedzirra/parser/atom_feed_burner'
31
43
 
32
44
  module Feedzirra
33
- VERSION = "0.0.14"
34
- end
45
+ VERSION = "0.0.21"
46
+ end
@@ -0,0 +1,98 @@
1
+ # this is some spike code to compare the speed of different methods for performing
2
+ # multiple feed fetches
3
+ require 'rubygems'
4
+ require 'curb'
5
+ require 'activesupport'
6
+
7
+ require 'net/http'
8
+ require 'uri'
9
+
10
+ require 'benchmark'
11
+ include Benchmark
12
+
13
+ GET_COUNT = 1
14
+ urls = ["http://www.pauldix.net"] * GET_COUNT
15
+
16
+
17
+ benchmark do |t|
18
+ t.report("taf2-curb") do
19
+ multi = Curl::Multi.new
20
+ urls.each do |url|
21
+ easy = Curl::Easy.new(url) do |curl|
22
+ curl.headers["User-Agent"] = "feedzirra"
23
+ # curl.headers["If-Modified-Since"] = Time.now.httpdate
24
+ # curl.headers["If-None-Match"] = "ziEyTl4q9GH04BR4jgkImd0GvSE"
25
+ curl.follow_location = true
26
+ curl.on_success do |c|
27
+ # puts c.header_str.inspect
28
+ # puts c.response_code
29
+ # puts c.body_str.slice(0, 500)
30
+ end
31
+ curl.on_failure do |c|
32
+ puts "**** #{c.response_code}"
33
+ end
34
+ end
35
+ multi.add(easy)
36
+ end
37
+
38
+ multi.perform
39
+ end
40
+
41
+ t.report("nethttp") do
42
+ urls.each do |url|
43
+ res = Net::HTTP.get(URI.parse(url))
44
+ # puts res.slice(0, 500)
45
+ end
46
+ end
47
+
48
+ require 'rfuzz/session'
49
+ include RFuzz
50
+ t.report("rfuzz") do
51
+ GET_COUNT.times do
52
+ http = HttpClient.new("www.pauldix.net", 80)
53
+ response = http.get("/")
54
+ if response.http_status != "200"
55
+ puts "***** #{response.http_status}"
56
+ else
57
+ # puts response.http_status
58
+ # puts response.http_body.slice(0, 500)
59
+ end
60
+ end
61
+ end
62
+
63
+ require 'eventmachine'
64
+ t.report("eventmachine") do
65
+ counter = GET_COUNT
66
+ EM.run do
67
+ GET_COUNT.times do
68
+ http = EM::Protocols::HttpClient2.connect("www.pauldix.net", 80)
69
+ request = http.get("/")
70
+ request.callback do
71
+ # puts request.status
72
+ # puts request.content.slice(0, 500)
73
+ counter -= 1
74
+ EM.stop if counter == 0
75
+ end
76
+ end
77
+ end
78
+ end
79
+
80
+
81
+ require 'curl-multi'
82
+ t.report("curl multi") do
83
+ multi = Curl::Multi.new
84
+ urls.each do |url|
85
+ on_failure = lambda do |ex|
86
+ puts "****** Failed to retrieve #{url}"
87
+ end
88
+
89
+ on_success = lambda do |body|
90
+ # puts "got #{url}"
91
+ # puts body.slice(0, 500)
92
+ end
93
+ multi.get(url, on_success, on_failure)
94
+ end
95
+
96
+ multi.select([], []) while multi.size > 0
97
+ end
98
+ end
@@ -0,0 +1,40 @@
1
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
2
+ require 'rfeedparser'
3
+ require 'feed-normalizer'
4
+ require 'open-uri'
5
+
6
+ require 'benchmark'
7
+ include Benchmark
8
+
9
+ iterations = 10
10
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
11
+ puts "benchmarks on #{urls.size} feeds"
12
+ puts "************************************"
13
+ benchmark do |t|
14
+ t.report("feedzirra") do
15
+ iterations.times do
16
+ Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
17
+ end
18
+ end
19
+
20
+ t.report("rfeedparser") do
21
+ iterations.times do
22
+ urls.each do |url|
23
+ feed = FeedParser.parse(url)
24
+ $stdout.print '.'
25
+ $stdout.flush
26
+ end
27
+ end
28
+ end
29
+
30
+ t.report("feed-normalizer") do
31
+ iterations.times do
32
+ urls.each do |url|
33
+ # have to use the :force option to make feed-normalizer parse an atom feed
34
+ feed = FeedNormalizer::FeedNormalizer.parse(open(url), :force_parser => FeedNormalizer::SimpleRssParser)
35
+ $stdout.print '.'
36
+ $stdout.flush
37
+ end
38
+ end
39
+ end
40
+ end
@@ -0,0 +1,28 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
3
+
4
+ require 'open-uri'
5
+
6
+ require 'benchmark'
7
+ include Benchmark
8
+
9
+ iterations = 10
10
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt").slice(0, 20)
11
+ puts "benchmarks on #{urls.size} feeds"
12
+ puts "************************************"
13
+ benchmark do |t|
14
+ t.report("feedzirra open uri") do
15
+ iterations.times do
16
+ urls.each do |url|
17
+ Feedzirra::Feed.parse(open(url, "User-Agent" => "feedzirra http://github.com/pauldix/feedzirra/tree/master").read)
18
+ $stdout.print '.'; $stdout.flush
19
+ end
20
+ end
21
+ end
22
+
23
+ t.report("feedzirra fetch and parse") do
24
+ iterations.times do
25
+ Feedzirra::Feed.fetch_and_parse(urls, :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush })
26
+ end
27
+ end
28
+ end
@@ -0,0 +1,30 @@
1
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
2
+ require 'rfeedparser'
3
+ require 'feed-normalizer'
4
+
5
+ require 'benchmark'
6
+ include Benchmark
7
+
8
+ iterations = 50
9
+ xml = File.read(File.dirname(__FILE__) + '/../sample_feeds/PaulDixExplainsNothing.xml')
10
+
11
+ benchmark do |t|
12
+ t.report("feedzirra") do
13
+ iterations.times do
14
+ Feedzirra::Feed.parse(xml)
15
+ end
16
+ end
17
+
18
+ t.report("rfeedparser") do
19
+ iterations.times do
20
+ FeedParser.parse(xml)
21
+ end
22
+ end
23
+
24
+ t.report("feed-normalizer") do
25
+ iterations.times do
26
+ # have to use the :force option to make feed-normalizer parse an atom feed
27
+ FeedNormalizer::FeedNormalizer.parse(xml, :force_parser => FeedNormalizer::SimpleRssParser)
28
+ end
29
+ end
30
+ end
@@ -0,0 +1,33 @@
1
+ require 'rubygems'
2
+ require File.dirname(__FILE__) + '/../../lib/feedzirra.rb'
3
+
4
+ require 'benchmark'
5
+ include Benchmark
6
+
7
+ urls = File.readlines(File.dirname(__FILE__) + "/../sample_feeds/successful_feed_urls.txt")
8
+ puts "benchmarks on #{urls.size} feeds"
9
+ puts "************************************"
10
+ benchmark do |t|
11
+ feeds = {}
12
+ t.report("feedzirra fetch and parse") do
13
+ feeds = Feedzirra::Feed.fetch_and_parse(urls,
14
+ :on_success => lambda { |url, feed| $stdout.print '.'; $stdout.flush },
15
+ :on_failure => lambda {|url, response_code, header, body| puts "#{response_code} ERROR on #{url}"})
16
+ end
17
+
18
+ # curb caches the dns lookups for 60 seconds. to make things fair we have to wait for the cache to expire
19
+ puts "sleeping to wait for dns cache to clear"
20
+ 65.times {$stdout.print('.'); sleep(1)}
21
+ puts "done"
22
+
23
+ updated_feeds = []
24
+ t.report("feedzirra update") do
25
+ updated_feeds = Feedzirra::Feed.update(feeds.values.reject {|f| f.class == Fixnum},
26
+ :on_success => lambda {|feed| $stdout.print '.'; $stdout.flush},
27
+ :on_failure => lambda {|feed, response_code, header, body| puts "#{response_code} ERROR on #{feed.feed_url}"})
28
+ end
29
+
30
+ updated_feeds.each do |feed|
31
+ puts feed.feed_url if feed.updated?
32
+ end
33
+ end