UnderpantsGnome-feedzirra 0.0.14

Sign up to get free protection for your applications and to get access to all the features.
Files changed (33) hide show
  1. data/README.textile +198 -0
  2. data/Rakefile +56 -0
  3. data/lib/core_ext/date.rb +21 -0
  4. data/lib/core_ext/string.rb +9 -0
  5. data/lib/feedzirra.rb +35 -0
  6. data/lib/feedzirra/feed.rb +315 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +45 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +26 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +34 -0
  11. data/lib/feedzirra/parser/atom_feed_burner.rb +27 -0
  12. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +35 -0
  13. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  14. data/lib/feedzirra/parser/itunes_rss_item.rb +31 -0
  15. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  16. data/lib/feedzirra/parser/mrss_content.rb +23 -0
  17. data/lib/feedzirra/parser/rss.rb +28 -0
  18. data/lib/feedzirra/parser/rss_entry.rb +54 -0
  19. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  20. data/spec/feedzirra/feed_spec.rb +546 -0
  21. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  22. data/spec/feedzirra/parser/atom_entry_spec.rb +45 -0
  23. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  24. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  25. data/spec/feedzirra/parser/atom_spec.rb +35 -0
  26. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +48 -0
  27. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  28. data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
  29. data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
  30. data/spec/feedzirra/parser/rss_spec.rb +41 -0
  31. data/spec/spec.opts +2 -0
  32. data/spec/spec_helper.rb +62 -0
  33. metadata +143 -0
@@ -0,0 +1,45 @@
1
+ module Feedzirra
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
6
+
7
+ def parse_datetime(string)
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
20
+ end
21
+
22
+ ##
23
+ # Writter for published. By default, we keep the "oldest" publish time found.
24
+ def published=(val)
25
+ parsed = parse_datetime(val)
26
+ @published = parsed if !@published || parsed < @published
27
+ end
28
+
29
+ ##
30
+ # Writter for udapted. By default, we keep the most recenet update time found.
31
+ def updated=(val)
32
+ parsed = parse_datetime(val)
33
+ @updated = parsed if !@updated || parsed > @updated
34
+ end
35
+
36
+ def sanitize!
37
+ self.title.sanitize! if self.title
38
+ self.author.sanitize! if self.author
39
+ self.summary.sanitize! if self.summary
40
+ self.content.sanitize! if self.content
41
+ end
42
+
43
+ alias_method :last_modified, :published
44
+ end
45
+ end
@@ -0,0 +1,71 @@
1
+ module Feedzirra
2
+ module FeedUtilities
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
4
+
5
+ attr_writer :new_entries, :updated, :last_modified
6
+ attr_accessor :etag
7
+
8
+ def last_modified
9
+ @last_modified ||= begin
10
+ entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
11
+ entry ? entry.published : nil
12
+ end
13
+ end
14
+
15
+ def updated?
16
+ @updated
17
+ end
18
+
19
+ def new_entries
20
+ @new_entries ||= []
21
+ end
22
+
23
+ def has_new_entries?
24
+ new_entries.size > 0
25
+ end
26
+
27
+ def update_from_feed(feed)
28
+ self.new_entries += find_new_entries_for(feed)
29
+ self.entries.unshift(*self.new_entries)
30
+
31
+ updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
32
+ end
33
+
34
+ def update_attribute(feed, name)
35
+ old_value, new_value = send(name), feed.send(name)
36
+
37
+ if old_value != new_value
38
+ send("#{name}=", new_value)
39
+ end
40
+ end
41
+
42
+ def sanitize_entries!
43
+ entries.each {|entry| entry.sanitize!}
44
+ end
45
+
46
+ private
47
+
48
+ def updated!
49
+ @updated = true
50
+ end
51
+
52
+ def find_new_entries_for(feed)
53
+ # this implementation is a hack, which is why it's so ugly.
54
+ # it's to get around the fact that not all feeds have a published date.
55
+ # however, they're always ordered with the newest one first.
56
+ # So we go through the entries just parsed and insert each one as a new entry
57
+ # until we get to one that has the same url as the the newest for the feed
58
+ latest_entry = self.entries.first
59
+ found_new_entries = []
60
+ feed.entries.each do |entry|
61
+ break if entry.url == latest_entry.url
62
+ found_new_entries << entry
63
+ end
64
+ found_new_entries
65
+ end
66
+
67
+ def existing_entry?(test_entry)
68
+ entries.any? { |entry| entry.url == test_entry.url }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,26 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class Atom
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :entry, :as => :entries, :class => AtomEntry
19
+
20
+ def self.able_to_parse?(xml) #:nodoc:
21
+ xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
22
+ end
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,34 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
20
+ element :name, :as => :author
21
+ element :content
22
+ element :summary
23
+ element :published
24
+ element :id
25
+ element :created, :as => :published
26
+ element :issued, :as => :published
27
+ element :updated
28
+ element :modified, :as => :updated
29
+ elements :category, :as => :categories, :value => :term
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,27 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class AtomFeedBurner
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
19
+
20
+ def self.able_to_parse?(xml) #:nodoc:
21
+ (xml =~ /Atom/ && xml =~ /feedburner/) || false
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomFeedBurnerEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :name, :as => :author
20
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
21
+ element :"feedburner:origLink", :as => :url
22
+ element :summary
23
+ element :content
24
+ element :published
25
+ element :id
26
+ element :issued, :as => :published
27
+ element :created, :as => :published
28
+ element :updated
29
+ element :modified, :as => :updated
30
+ elements :category, :as => :categories, :value => :term
31
+ end
32
+
33
+ end
34
+
35
+ end
@@ -0,0 +1,50 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes is RSS 2.0 + some apple extensions
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSS
7
+ include SAXMachine
8
+ include FeedUtilities
9
+
10
+ attr_accessor :feed_url
11
+
12
+ # RSS 2.0 elements that need including
13
+ element :copyright
14
+ element :description
15
+ element :language
16
+ element :managingEditor
17
+ element :title
18
+ element :link, :as => :url
19
+
20
+ # If author is not present use managingEditor on the channel
21
+ element :"itunes:author", :as => :itunes_author
22
+ element :"itunes:block", :as => :itunes_block
23
+ element :"itunes:image", :value => :href, :as => :itunes_image
24
+ element :"itunes:explicit", :as => :itunes_explicit
25
+ element :"itunes:keywords", :as => :itunes_keywords
26
+ # New URL for the podcast feed
27
+ element :"itunes:new-feed-url", :as => :itunes_new_feed_url
28
+ element :"itunes:subtitle", :as => :itunes_subtitle
29
+ # If summary is not present, use the description tag
30
+ element :"itunes:summary", :as => :itunes_summary
31
+
32
+ # iTunes RSS feeds can have multiple main categories...
33
+ # ...and multiple sub-categories per category
34
+ # TODO subcategories not supported correctly - they are at the same level
35
+ # as the main categories
36
+ elements :"itunes:category", :as => :itunes_categories, :value => :text
37
+
38
+ elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
39
+
40
+ elements :item, :as => :entries, :class => ITunesRSSItem
41
+
42
+ def self.able_to_parse?(xml)
43
+ xml =~ /xmlns:itunes=\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,31 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes extensions to the standard RSS2.0 item
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSSItem
7
+ include SAXMachine
8
+ include FeedUtilities
9
+ element :author
10
+ element :guid
11
+ element :title
12
+ element :link, :as => :url
13
+ element :description, :as => :summary
14
+ element :pubDate, :as => :published
15
+
16
+ # If author is not present use author tag on the item
17
+ element :"itunes:author", :as => :itunes_author
18
+ element :"itunes:block", :as => :itunes_block
19
+ element :"itunes:duration", :as => :itunes_duration
20
+ element :"itunes:explicit", :as => :itunes_explicit
21
+ element :"itunes:keywords", :as => :itunes_keywords
22
+ element :"itunes:subtitle", :as => :itunes_subtitle
23
+ # If summary is not present, use the description tag
24
+ element :"itunes:summary", :as => :itunes_summary
25
+ element :enclosure, :value => :length, :as => :enclosure_length
26
+ element :enclosure, :value => :type, :as => :enclosure_type
27
+ element :enclosure, :value => :url, :as => :enclosure_url
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,12 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ class ITunesRSSOwner
5
+ include SAXMachine
6
+ include FeedUtilities
7
+ element :"itunes:name", :as => :name
8
+ element :"itunes:email", :as => :email
9
+ end
10
+ end
11
+
12
+ end
@@ -0,0 +1,23 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class RSSEntry
4
+ # == Summary
5
+ # Parser for dealing with multiple media:content entries.
6
+ #
7
+ # == Attributes
8
+ # * url
9
+ # * content_type
10
+ # * medium
11
+ # * duration
12
+ class MRSSContent
13
+ include SAXMachine
14
+ include FeedEntryUtilities
15
+
16
+ element :'media:content', :as => :url, :value => :url
17
+ element :'media:content', :as => :content_type, :value => :type
18
+ element :'media:content', :as => :medium, :value => :medium
19
+ element :'media:content', :as => :duration, :value => :duration
20
+ end
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,28 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with RSS feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class RSS
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url
17
+ elements :item, :as => :entries, :class => RSSEntry
18
+
19
+ attr_accessor :feed_url
20
+
21
+ def self.able_to_parse?(xml) #:nodoc:
22
+ xml =~ /\<rss|rdf/
23
+ end
24
+ end
25
+
26
+ end
27
+
28
+ end
@@ -0,0 +1,54 @@
1
+ require File.dirname(__FILE__) + '/mrss_content'
2
+
3
+ module Feedzirra
4
+
5
+ module Parser
6
+ # == Summary
7
+ # Parser for dealing with RDF feed entries.
8
+ #
9
+ # == Attributes
10
+ # * title
11
+ # * url
12
+ # * author
13
+ # * content
14
+ # * summary
15
+ # * published
16
+ # * categories
17
+ class RSSEntry
18
+ include SAXMachine
19
+ include FeedEntryUtilities
20
+ element :title
21
+ element :link, :as => :url
22
+
23
+ element :"dc:creator", :as => :author
24
+ element :"content:encoded", :as => :content
25
+ element :description, :as => :summary
26
+
27
+ element :pubDate, :as => :published
28
+ element :"dc:date", :as => :published
29
+ element :"dc:Date", :as => :published
30
+ element :"dcterms:created", :as => :published
31
+
32
+
33
+ element :"dcterms:modified", :as => :updated
34
+ element :issued, :as => :published
35
+ elements :category, :as => :categories
36
+
37
+ element :guid, :as => :id
38
+
39
+ # TODO: uncomment this when the bug is resolved
40
+ # element :enclosure, :value => :length, :as => :enclosure_length
41
+ element :enclosure, :value => :type, :as => :enclosure_type
42
+ element :enclosure, :value => :url, :as => :enclosure_url
43
+
44
+ element :'media:thumbnail', :as => :media_thumbnail, :value => :url
45
+ element :'media:thumbnail', :as => :media_thumbnail_width, :value => :width
46
+ element :'media:thumbnail', :as => :media_thumbnail_height, :value => :height
47
+ element :'media:description', :as => :media_description
48
+ elements :'media:content', :as => :media_content, :class => MRSSContent
49
+
50
+ end
51
+
52
+ end
53
+
54
+ end