Spectives-logophobia-feedzirra 0.0.31

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +205 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra.rb +44 -0
  7. data/lib/feedzirra/feed.rb +333 -0
  8. data/lib/feedzirra/feed_entry_utilities.rb +45 -0
  9. data/lib/feedzirra/feed_utilities.rb +71 -0
  10. data/lib/feedzirra/parser/atom.rb +35 -0
  11. data/lib/feedzirra/parser/atom_entry.rb +41 -0
  12. data/lib/feedzirra/parser/itunes_category.rb +12 -0
  13. data/lib/feedzirra/parser/mrss_category.rb +11 -0
  14. data/lib/feedzirra/parser/mrss_content.rb +48 -0
  15. data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
  16. data/lib/feedzirra/parser/mrss_credit.rb +11 -0
  17. data/lib/feedzirra/parser/mrss_group.rb +37 -0
  18. data/lib/feedzirra/parser/mrss_hash.rb +10 -0
  19. data/lib/feedzirra/parser/mrss_player.rb +11 -0
  20. data/lib/feedzirra/parser/mrss_rating.rb +10 -0
  21. data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
  22. data/lib/feedzirra/parser/mrss_text.rb +13 -0
  23. data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
  24. data/lib/feedzirra/parser/rss.rb +83 -0
  25. data/lib/feedzirra/parser/rss_entry.rb +83 -0
  26. data/lib/feedzirra/parser/rss_image.rb +15 -0
  27. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  28. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  29. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  30. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  31. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  32. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  33. data/spec/feedzirra/feed_spec.rb +546 -0
  34. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  35. data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
  36. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  37. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  38. data/spec/feedzirra/parser/atom_spec.rb +43 -0
  39. data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
  40. data/spec/feedzirra/parser/rss_entry_spec.rb +154 -0
  41. data/spec/feedzirra/parser/rss_spec.rb +93 -0
  42. data/spec/sample_feeds/run_against_sample.rb +20 -0
  43. data/spec/spec_helper.rb +62 -0
  44. metadata +155 -0
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class Atom
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :link, :as => :links, :value => :href
19
+ elements :entry, :as => :entries, :class => AtomEntry
20
+
21
+ def self.able_to_parse?(xml) #:nodoc:
22
+ xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
23
+ end
24
+
25
+ def url
26
+ @url || links.last
27
+ end
28
+
29
+ def feed_url
30
+ @feed_url || links.first
31
+ end
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,41 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :"feedburner:origLink", :as => :orig_url # stupid feedburner does weird things with some feeds, we need to be able to manually distinguis them
20
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
21
+ element :name, :as => :author
22
+ element :content
23
+ element :summary
24
+ element :published
25
+ element :id
26
+ element :created, :as => :published
27
+ element :issued, :as => :published
28
+ element :updated
29
+ element :modified, :as => :updated
30
+ elements :category, :as => :categories, :value => :term
31
+ elements :link, :as => :links, :value => :href
32
+ elements :link, :as => :enclosure_links, :value => :href, :with => {:rel => "enclosure"}
33
+
34
+ def url
35
+ @url || links.first
36
+ end
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,12 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class RSS
4
+ class ITunesCategory
5
+ include SAXMachine
6
+
7
+ element :'itunes:category', :as => :name, :value => :text
8
+ elements :'itunes:category', :as => :sub_categories, :value => :text
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCategory
4
+ include SAXMachine
5
+
6
+ element :'media:category', :as => :category
7
+ element :'media:category', :value => :scheme, :as => :scheme
8
+ element :'media:category', :value => :label, :as => :label
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + '/mrss_credit'
2
+ require File.dirname(__FILE__) + '/mrss_restriction'
3
+ require File.dirname(__FILE__) + '/mrss_category'
4
+ require File.dirname(__FILE__) + '/mrss_copyright'
5
+ require File.dirname(__FILE__) + '/mrss_hash'
6
+ require File.dirname(__FILE__) + '/mrss_player'
7
+ require File.dirname(__FILE__) + '/mrss_rating'
8
+ require File.dirname(__FILE__) + '/mrss_restriction'
9
+ require File.dirname(__FILE__) + '/mrss_text'
10
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
11
+
12
+ module Feedzirra
13
+ module Parser
14
+ class MRSSContent
15
+ include SAXMachine
16
+
17
+ element :'media:content', :as => :url, :value => :url
18
+ element :'media:content', :as => :content_type, :value => :type
19
+ element :'media:content', :as => :medium, :value => :medium
20
+ element :'media:content', :as => :duration, :value => :duration
21
+ element :'media:content', :as => :isDefault, :value => :isDefault
22
+ element :'media:content', :as => :expression, :value => :expression
23
+ element :'media:content', :as => :bitrate, :value => :bitrate
24
+ element :'media:content', :as => :framerate, :value => :framerate
25
+ element :'media:content', :as => :samplingrate, :value => :sampling
26
+ element :'media:content', :as => :channels, :value => :duration
27
+ element :'media:content', :as => :height, :value => :height
28
+ element :'media:content', :as => :width, :value => :width
29
+ element :'media:content', :as => :lang, :value => :lang
30
+ element :'media:content', :as => :fileSize, :value => :fileSize
31
+
32
+ # optional elements
33
+ element :'media:title', :as => :media_title
34
+ element :'media:keywords', :as => :media_keywords
35
+ element :'media:description', :as => :media_description
36
+
37
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
38
+ element :'media:rating', :as => :rating, :class => MRSSRating
39
+ element :'media:category', :as => :media_category, :class => MRSSCategory
40
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
41
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
42
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
43
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
44
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
45
+ element :'media:text', :as => :text, :class => MRSSText
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,10 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCopyright
4
+ include SAXMachine
5
+
6
+ element :'media:copyright', :as => :copyright
7
+ element :'media:copyright', :as => :url, :value => :url
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCredit
4
+ include SAXMachine
5
+
6
+ element :'media:credit', :as => :role, :value => :role
7
+ element :'media:credit', :as => :scheme, :value => :scheme
8
+ element :'media:credit', :as => :name
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,37 @@
1
+ require File.dirname(__FILE__) + '/mrss_content'
2
+ require File.dirname(__FILE__) + '/mrss_credit'
3
+ require File.dirname(__FILE__) + '/mrss_restriction'
4
+ require File.dirname(__FILE__) + '/mrss_group'
5
+ require File.dirname(__FILE__) + '/mrss_category'
6
+ require File.dirname(__FILE__) + '/mrss_copyright'
7
+ require File.dirname(__FILE__) + '/mrss_hash'
8
+ require File.dirname(__FILE__) + '/mrss_player'
9
+ require File.dirname(__FILE__) + '/mrss_rating'
10
+ require File.dirname(__FILE__) + '/mrss_restriction'
11
+ require File.dirname(__FILE__) + '/mrss_text'
12
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
13
+
14
+ module Feedzirra
15
+ module Parser
16
+ class MRSSGroup
17
+ include SAXMachine
18
+
19
+ elements :'media:content', :as => :media_content, :class => MRSSContent
20
+
21
+ # optional elements
22
+ element :'media:title', :as => :media_title
23
+ element :'media:keywords', :as => :media_keywords
24
+ element :'media:description', :as => :media_description
25
+
26
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
27
+ element :'media:rating', :as => :rating, :class => MRSSRating
28
+ element :'media:category', :as => :media_category, :class => MRSSCategory
29
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
30
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
31
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
32
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
33
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
34
+ element :'media:text', :as => :text, :class => MRSSText
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSHash
4
+ include SAXMachine
5
+
6
+ element :'media:hash', :as => :hash
7
+ element :'media:hash', :value => :algo, :as => :algo
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSPlayer
4
+ include SAXMachine
5
+
6
+ element :'media:player', :value => :url, :as => :url
7
+ element :'media:player', :value => :width, :as => :width
8
+ element :'media:player', :value => :height, :as => :height
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,10 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSRating
4
+ include SAXMachine
5
+
6
+ element :'media:rating', :as => :rating
7
+ element :'media:rating', :value => :scheme, :as => :scheme
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSRestriction
4
+ include SAXMachine
5
+
6
+ element :'media:restriction', :as => :value
7
+ element :'media:restriction', :as => :scope, :value => :type
8
+ element :'media:restriction', :as => :relationship, :value => :relationship
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,13 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSText
4
+ include SAXMachine
5
+
6
+ element :'media:text', :as => :type, :value => :type
7
+ element :'media:text', :as => :lang, :value => :lang
8
+ element :'media:text', :as => :start, :value => :start
9
+ element :'media:text', :as => :end, :value => :end
10
+ element :'media:text', :as => :text
11
+ end
12
+ end
13
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSThumbnail
4
+ include SAXMachine
5
+
6
+ element :'media:thumbnail', :as => :url, :value => :url
7
+ element :'media:thumbnail', :as => :with, :value => :width
8
+ element :'media:thumbnail', :as => :height, :value => :height
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,83 @@
1
+ require File.dirname(__FILE__) + '/mrss_credit'
2
+ require File.dirname(__FILE__) + '/mrss_restriction'
3
+ require File.dirname(__FILE__) + '/mrss_category'
4
+ require File.dirname(__FILE__) + '/mrss_copyright'
5
+ require File.dirname(__FILE__) + '/mrss_hash'
6
+ require File.dirname(__FILE__) + '/mrss_player'
7
+ require File.dirname(__FILE__) + '/mrss_rating'
8
+ require File.dirname(__FILE__) + '/mrss_restriction'
9
+ require File.dirname(__FILE__) + '/mrss_text'
10
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
11
+
12
+ module Feedzirra
13
+ module Parser
14
+ # == Summary
15
+ # Parser for dealing with RSS feeds.
16
+ #
17
+ class RSS
18
+ include SAXMachine
19
+ include FeedUtilities
20
+
21
+ attr_accessor :feed_url
22
+
23
+ # RSS 2.0 required elements
24
+ element :title
25
+ element :link, :as => :url
26
+ element :description
27
+ elements :item, :as => :entries, :class => RSSEntry
28
+
29
+ # RSS 2.0 optional elements
30
+ element :language
31
+ element :copyright
32
+ element :managingEditor
33
+ element :webMaster
34
+ element :pubDate
35
+ element :lastBuildDate
36
+ element :category
37
+ element :generator
38
+ element :docs
39
+ element :cloud
40
+ element :ttl
41
+ element :image, :class => RSSImage
42
+ element :rating
43
+ element :textInput
44
+ element :skipHours
45
+ element :skipDays
46
+
47
+ # iTunes
48
+ element :'itunes:author', :as => :author
49
+ element :'itunes:block', :as => :itunes_block
50
+ element :'itunes:image', :as => :image, :value => :href
51
+ element :'itunes:explicit', :as => :explicit
52
+ element :'itunes:keywords', :as => :keywords
53
+ element :'itunes:new-feed-url', :as => :feed_url
54
+ element :'itunes:name', :as => :owner_name
55
+ element :'itunes:email', :as => :owner_email
56
+ element :'itunes:subtitle', :as => :subtitle
57
+ element :'itunes:summary', :as => :summary
58
+
59
+ elements :'itunes:category', :as => :categories, :value => :text
60
+ # elements :'itunes:category', :as => :itunes_categories,
61
+ # :class => ITunesCategory
62
+
63
+ # MediaRSS support
64
+ element :'media:title', :as => :media_title
65
+ element :'media:keywords', :as => :media_keywords
66
+ element :'media:description', :as => :media_description
67
+
68
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
69
+ element :'media:rating', :as => :rating, :class => MRSSRating
70
+ element :'media:category', :as => :media_category, :class => MRSSCategory
71
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
72
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
73
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
74
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
75
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
76
+ element :'media:text', :as => :text, :class => MRSSText
77
+
78
+ def self.able_to_parse?(xml) #:nodoc:
79
+ xml =~ /\<rss|rdf/
80
+ end
81
+ end
82
+ end
83
+ end
@@ -0,0 +1,83 @@
1
+ require File.dirname(__FILE__) + '/mrss_content'
2
+ require File.dirname(__FILE__) + '/mrss_credit'
3
+ require File.dirname(__FILE__) + '/mrss_restriction'
4
+ require File.dirname(__FILE__) + '/mrss_group'
5
+ require File.dirname(__FILE__) + '/mrss_category'
6
+ require File.dirname(__FILE__) + '/mrss_copyright'
7
+ require File.dirname(__FILE__) + '/mrss_hash'
8
+ require File.dirname(__FILE__) + '/mrss_player'
9
+ require File.dirname(__FILE__) + '/mrss_rating'
10
+ require File.dirname(__FILE__) + '/mrss_restriction'
11
+ require File.dirname(__FILE__) + '/mrss_text'
12
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
13
+
14
+ module Feedzirra
15
+ module Parser
16
+ # == Summary
17
+ # Parser for dealing with RDF feed entries.
18
+ #
19
+ # == Attributes
20
+ # * title
21
+ # * url
22
+ # * author
23
+ # * content
24
+ # * summary
25
+ # * published
26
+ # * categories
27
+ class RSSEntry
28
+ include SAXMachine
29
+ include FeedEntryUtilities
30
+
31
+ # RSS 2.0 elements
32
+ element :title
33
+ element :"feedburner:origLink", :as => :orig_url # stupid feedburner does weird things with some feeds, we need to be able to manually distinguis them
34
+ element :link, :as => :url
35
+ element :description, :as => :summary
36
+ element :author
37
+ elements :category, :as => :categories
38
+ element :comments
39
+ element :guid, :as => :id
40
+ element :pubDate, :as => :published
41
+ element :source
42
+ element :enclosure, :value => :length, :as => :enclosure_length
43
+ element :enclosure, :value => :type, :as => :enclosure_type
44
+ element :enclosure, :value => :url, :as => :enclosure_url
45
+
46
+
47
+ # RDF elements
48
+ element :"dc:date", :as => :published
49
+ element :"dc:Date", :as => :published
50
+ element :"dcterms:created", :as => :published
51
+ element :issued, :as => :published
52
+ element :"content:encoded", :as => :content
53
+ element :"dc:creator", :as => :author
54
+ element :"dcterms:modified", :as => :updated
55
+
56
+ # MediaRSS support, optional elements
57
+ element :'media:title', :as => :media_title
58
+ element :'media:keywords', :as => :media_keywords
59
+ element :'media:description', :as => :media_description
60
+
61
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
62
+ element :'media:rating', :as => :rating, :class => MRSSRating
63
+ element :'media:category', :as => :media_category, :class => MRSSCategory
64
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
65
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
66
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
67
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
68
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
69
+ element :'media:text', :as => :text, :class => MRSSText
70
+ elements :'media:content', :as => :media_content, :class => MRSSContent
71
+ elements :'media:group', :as => :media_groups, :class => MRSSGroup
72
+
73
+ # iTunes
74
+ element :'itunes:author', :as => :author
75
+ element :'itunes:block', :as => :itunes_block
76
+ element :'itunes:duration', :as => :duration
77
+ element :'itunes:explicit', :as => :explicit
78
+ element :'itunes:keywords', :as => :keywords
79
+ element :'itunes:subtitle', :as => :subtitle
80
+ element :'itunes:summary', :as => :summary
81
+ end
82
+ end
83
+ end