feedzirra 0.0.18.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,45 @@
1
+ module Feedzirra
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
6
+
7
+ def parse_datetime(string)
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
20
+ end
21
+
22
+ ##
23
+ # Writter for published. By default, we keep the "oldest" publish time found.
24
+ def published=(val)
25
+ parsed = parse_datetime(val)
26
+ @published = parsed if !@published || parsed < @published
27
+ end
28
+
29
+ ##
30
+ # Writter for udapted. By default, we keep the most recenet update time found.
31
+ def updated=(val)
32
+ parsed = parse_datetime(val)
33
+ @updated = parsed if !@updated || parsed > @updated
34
+ end
35
+
36
+ def sanitize!
37
+ self.title.sanitize! if self.title
38
+ self.author.sanitize! if self.author
39
+ self.summary.sanitize! if self.summary
40
+ self.content.sanitize! if self.content
41
+ end
42
+
43
+ alias_method :last_modified, :published
44
+ end
45
+ end
@@ -0,0 +1,71 @@
1
+ module Feedzirra
2
+ module FeedUtilities
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
4
+
5
+ attr_writer :new_entries, :updated, :last_modified
6
+ attr_accessor :etag
7
+
8
+ def last_modified
9
+ @last_modified ||= begin
10
+ entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
11
+ entry ? entry.published : nil
12
+ end
13
+ end
14
+
15
+ def updated?
16
+ @updated
17
+ end
18
+
19
+ def new_entries
20
+ @new_entries ||= []
21
+ end
22
+
23
+ def has_new_entries?
24
+ new_entries.size > 0
25
+ end
26
+
27
+ def update_from_feed(feed)
28
+ self.new_entries += find_new_entries_for(feed)
29
+ self.entries.unshift(*self.new_entries)
30
+
31
+ updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
32
+ end
33
+
34
+ def update_attribute(feed, name)
35
+ old_value, new_value = send(name), feed.send(name)
36
+
37
+ if old_value != new_value
38
+ send("#{name}=", new_value)
39
+ end
40
+ end
41
+
42
+ def sanitize_entries!
43
+ entries.each {|entry| entry.sanitize!}
44
+ end
45
+
46
+ private
47
+
48
+ def updated!
49
+ @updated = true
50
+ end
51
+
52
+ def find_new_entries_for(feed)
53
+ # this implementation is a hack, which is why it's so ugly.
54
+ # it's to get around the fact that not all feeds have a published date.
55
+ # however, they're always ordered with the newest one first.
56
+ # So we go through the entries just parsed and insert each one as a new entry
57
+ # until we get to one that has the same url as the the newest for the feed
58
+ latest_entry = self.entries.first
59
+ found_new_entries = []
60
+ feed.entries.each do |entry|
61
+ break if entry.url == latest_entry.url
62
+ found_new_entries << entry
63
+ end
64
+ found_new_entries
65
+ end
66
+
67
+ def existing_entry?(test_entry)
68
+ entries.any? { |entry| entry.url == test_entry.url }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,47 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * prev_page
9
+ # * next_page
10
+ # * lat_page
11
+ # * title
12
+ # * subtitle
13
+ # * updated
14
+ # * feed_url
15
+ # * url
16
+ # * related
17
+ # * entries
18
+ class Atom
19
+ include SAXMachine
20
+ include FeedUtilities
21
+ element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
22
+ element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
23
+ element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
24
+ element :title
25
+ element :subtitle
26
+ element :updated
27
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
28
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
29
+ elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
30
+ elements :link, :as => :links, :value => :href
31
+ elements :entry, :as => :entries, :class => AtomEntry
32
+
33
+ def self.able_to_parse?(xml) #:nodoc:
34
+ xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
35
+ end
36
+
37
+ def url
38
+ @url || links.last
39
+ end
40
+
41
+ def feed_url
42
+ @feed_url || links.first
43
+ end
44
+ end
45
+ end
46
+
47
+ end
@@ -0,0 +1,51 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * related
11
+ # * author
12
+ # * content
13
+ # * summary
14
+ # * published
15
+ # * categories
16
+ # * media_content
17
+ # * media_description
18
+ # * media_thumbnail
19
+ # * enclosure
20
+ class AtomEntry
21
+ include SAXMachine
22
+ include FeedEntryUtilities
23
+ element :title
24
+ element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
25
+ elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
26
+ element :name, :as => :author
27
+ element :content
28
+ element :summary
29
+ element :published
30
+ element :id
31
+ element :created, :as => :published
32
+ element :issued, :as => :published
33
+ element :updated
34
+ element :modified, :as => :updated
35
+ elements :category, :as => :categories, :value => :term
36
+
37
+ element :"media:content", :as => :media_content, :value => :url
38
+ element :"media:description", :as => :media_description
39
+ element :"media:thumbnail", :as => :media_thumbnail, :value => :url
40
+ element :enclosure, :value => :url
41
+
42
+ elements :link, :as => :links, :value => :href
43
+
44
+ def url
45
+ @url || links.first
46
+ end
47
+ end
48
+
49
+ end
50
+
51
+ end
@@ -0,0 +1,27 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class AtomFeedBurner
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
19
+
20
+ def self.able_to_parse?(xml) #:nodoc:
21
+ (xml =~ /Atom/ && xml =~ /feedburner/) || false
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomFeedBurnerEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :name, :as => :author
20
+ element :link, :as => :url, :value => :href, :with => {:rel => "alternate"}
21
+ element :"feedburner:origLink", :as => :url
22
+ element :summary
23
+ element :content
24
+ element :published
25
+ element :id
26
+ element :issued, :as => :published
27
+ element :created, :as => :published
28
+ element :updated
29
+ element :modified, :as => :updated
30
+ elements :category, :as => :categories, :value => :term
31
+ end
32
+
33
+ end
34
+
35
+ end
@@ -0,0 +1,50 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes is RSS 2.0 + some apple extensions
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSS
7
+ include SAXMachine
8
+ include FeedUtilities
9
+
10
+ attr_accessor :feed_url
11
+
12
+ # RSS 2.0 elements that need including
13
+ element :copyright
14
+ element :description
15
+ element :language
16
+ element :managingEditor
17
+ element :title
18
+ element :link, :as => :url
19
+
20
+ # If author is not present use managingEditor on the channel
21
+ element :"itunes:author", :as => :itunes_author
22
+ element :"itunes:block", :as => :itunes_block
23
+ element :"itunes:image", :value => :href, :as => :itunes_image
24
+ element :"itunes:explicit", :as => :itunes_explicit
25
+ element :"itunes:keywords", :as => :itunes_keywords
26
+ # New URL for the podcast feed
27
+ element :"itunes:new-feed-url", :as => :itunes_new_feed_url
28
+ element :"itunes:subtitle", :as => :itunes_subtitle
29
+ # If summary is not present, use the description tag
30
+ element :"itunes:summary", :as => :itunes_summary
31
+
32
+ # iTunes RSS feeds can have multiple main categories...
33
+ # ...and multiple sub-categories per category
34
+ # TODO subcategories not supported correctly - they are at the same level
35
+ # as the main categories
36
+ elements :"itunes:category", :as => :itunes_categories, :value => :text
37
+
38
+ elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
39
+
40
+ elements :item, :as => :entries, :class => ITunesRSSItem
41
+
42
+ def self.able_to_parse?(xml)
43
+ xml =~ /xmlns:itunes=\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,31 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes extensions to the standard RSS2.0 item
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSSItem
7
+ include SAXMachine
8
+ include FeedUtilities
9
+ element :author
10
+ element :guid
11
+ element :title
12
+ element :link, :as => :url
13
+ element :description, :as => :summary
14
+ element :pubDate, :as => :published
15
+
16
+ # If author is not present use author tag on the item
17
+ element :"itunes:author", :as => :itunes_author
18
+ element :"itunes:block", :as => :itunes_block
19
+ element :"itunes:duration", :as => :itunes_duration
20
+ element :"itunes:explicit", :as => :itunes_explicit
21
+ element :"itunes:keywords", :as => :itunes_keywords
22
+ element :"itunes:subtitle", :as => :itunes_subtitle
23
+ # If summary is not present, use the description tag
24
+ element :"itunes:summary", :as => :itunes_summary
25
+ element :enclosure, :value => :length, :as => :enclosure_length
26
+ element :enclosure, :value => :type, :as => :enclosure_type
27
+ element :enclosure, :value => :url, :as => :enclosure_url
28
+ end
29
+ end
30
+
31
+ end
@@ -0,0 +1,12 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ class ITunesRSSOwner
5
+ include SAXMachine
6
+ include FeedUtilities
7
+ element :"itunes:name", :as => :name
8
+ element :"itunes:email", :as => :email
9
+ end
10
+ end
11
+
12
+ end
@@ -0,0 +1,40 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with RSS feeds.
6
+ #
7
+ # == Attributes
8
+ # * prev_page
9
+ # * next_page
10
+ # * lat_page
11
+ # * title
12
+ # * feed_url
13
+ # * url
14
+ # * related
15
+ # * description
16
+ # * language
17
+ # * entries
18
+ class RSS
19
+ include SAXMachine
20
+ include FeedUtilities
21
+ element :"atom:link", :as => :prev_page, :value => :href, :with => {:rel => 'prev'}
22
+ element :"atom:link", :as => :next_page, :value => :href, :with => {:rel => 'next'}
23
+ element :"atom:link", :as => :last_page, :value => :href, :with => {:rel => 'last'}
24
+ element :title
25
+ element :link, :as => :url
26
+ elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
27
+ element :description
28
+ element :language
29
+ elements :item, :as => :entries, :class => RSSEntry
30
+
31
+ attr_accessor :feed_url
32
+
33
+ def self.able_to_parse?(xml) #:nodoc:
34
+ xml =~ /\<rss|\<rdf/
35
+ end
36
+ end
37
+
38
+ end
39
+
40
+ end
@@ -0,0 +1,55 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with RDF feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * related
11
+ # * author
12
+ # * content
13
+ # * summary
14
+ # * published
15
+ # * updated
16
+ # * categories
17
+ # * media_content
18
+ # * media_description
19
+ # * media_thumbnail
20
+ # * enclosure
21
+ class RSSEntry
22
+ include SAXMachine
23
+ include FeedEntryUtilities
24
+ element :title
25
+ element :link, :as => :url
26
+ elements :link, :as => :related, :value => :href, :with => {:rel => "related"}
27
+
28
+ element :author
29
+ element :"dc:creator", :as => :author
30
+ element :author, :as => :author
31
+ element :"content:encoded", :as => :content
32
+ element :description, :as => :summary
33
+
34
+ element :pubDate, :as => :published
35
+ element :pubdate, :as => :published
36
+ element :"dc:date", :as => :published
37
+ element :"dc:Date", :as => :published
38
+ element :"dcterms:created", :as => :published
39
+
40
+
41
+ element :"dcterms:modified", :as => :updated
42
+ element :issued, :as => :published
43
+ elements :category, :as => :categories
44
+
45
+ element :"media:content", :as => :media_content, :value => :url
46
+ element :"media:description", :as => :media_description
47
+ element :"media:thumbnail", :as => :media_thumbnail, :value => :url
48
+ element :enclosure, :value => :url
49
+
50
+ element :guid, :as => :id
51
+ end
52
+
53
+ end
54
+
55
+ end