julien51-feedzirra 0.0.13

Sign up to get free protection for your applications and to get access to all the features.
Files changed (35) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +194 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/feed.rb +317 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +51 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +26 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +34 -0
  11. data/lib/feedzirra/parser/atom_feed_burner.rb +27 -0
  12. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +35 -0
  13. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  14. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  15. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  16. data/lib/feedzirra/parser/rss.rb +28 -0
  17. data/lib/feedzirra/parser/rss_entry.rb +40 -0
  18. data/lib/feedzirra/push_parser.rb +56 -0
  19. data/lib/feedzirra.rb +37 -0
  20. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  21. data/spec/feedzirra/feed_spec.rb +551 -0
  22. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  23. data/spec/feedzirra/parser/atom_entry_spec.rb +45 -0
  24. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  25. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  26. data/spec/feedzirra/parser/atom_spec.rb +35 -0
  27. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +52 -0
  28. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  29. data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
  30. data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
  31. data/spec/feedzirra/parser/rss_spec.rb +41 -0
  32. data/spec/feedzirra/push_parser_spec.rb +16 -0
  33. data/spec/spec.opts +2 -0
  34. data/spec/spec_helper.rb +58 -0
  35. metadata +145 -0
@@ -0,0 +1,317 @@
1
+ module Feedzirra
2
+ class NoParserAvailable < StandardError; end
3
+
4
+ class Feed
5
+ USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
6
+ TIMEOUT = 30
7
+
8
+ # Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
9
+ #
10
+ # === Parameters
11
+ # [xml<String>] The XML that you would like parsed.
12
+ # === Returns
13
+ # An instance of the determined feed type. By default a Feedzirra::Atom, Feedzirra::AtomFeedBurner, Feedzirra::RDF, or Feedzirra::RSS object.
14
+ # === Raises
15
+ # Feedzirra::NoParserAvailable : If no valid parser classes could be found for the feed.
16
+ def self.parse(xml)
17
+ if parser = determine_feed_parser_for_xml(xml)
18
+ parser.parse(xml)
19
+ else
20
+ raise NoParserAvailable.new("No valid parser for XML.")
21
+ end
22
+ end
23
+
24
+ # Determines the correct parser class to use for parsing the feed.
25
+ #
26
+ # === Parameters
27
+ # [xml<String>] The XML that you would like determine the parser for.
28
+ # === Returns
29
+ # The class name of the parser that can handle the XML.
30
+ def self.determine_feed_parser_for_xml(xml)
31
+ start_of_doc = xml.slice(0, 1000)
32
+ feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
33
+ end
34
+
35
+ # Adds a new feed parsing class that will be used for parsing.
36
+ #
37
+ # === Parameters
38
+ # [klass<Constant>] The class/constant that you want to register.
39
+ # === Returns
40
+ # A updated array of feed parser class names.
41
+ def self.add_feed_class(klass)
42
+ feed_classes.unshift klass
43
+ end
44
+
45
+ # Provides a list of registered feed parsing classes.
46
+ #
47
+ # === Returns
48
+ # A array of class names.
49
+ def self.feed_classes
50
+ @feed_classes ||= [Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
51
+ end
52
+
53
+ # Makes all entry types look for the passed in element to parse. This is actually just a call to
54
+ # element (a SAXMachine call) in the class
55
+ #
56
+ # === Parameters
57
+ # [element_tag<String>]
58
+ # [options<Hash>] Valid keys are same as with SAXMachine
59
+ def self.add_common_feed_entry_element(element_tag, options = {})
60
+ # need to think of a better way to do this. will break for people who want this behavior
61
+ # across their added classes
62
+ feed_classes.map{|k| eval("#{k}Entry") }.each do |klass|
63
+ klass.send(:element, element_tag, options)
64
+ end
65
+ end
66
+
67
+ # Fetches and returns the raw XML for each URL provided.
68
+ #
69
+ # === Parameters
70
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
71
+ # [options<Hash>] Valid keys for this argument as as followed:
72
+ # :user_agent - String that overrides the default user agent.
73
+ # :if_modified_since - Time object representing when the feed was last updated.
74
+ # :if_none_match - String that's normally an etag for the request that was stored previously.
75
+ # :on_success - Block that gets executed after a successful request.
76
+ # :on_failure - Block that gets executed after a failed request.
77
+ # === Returns
78
+ # A String of XML if a single URL is passed.
79
+ #
80
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
81
+ def self.fetch_raw(urls, options = {})
82
+ url_queue = [*urls]
83
+ multi = Curl::Multi.new
84
+ responses = {}
85
+ url_queue.each do |url|
86
+ easy = Curl::Easy.new(url) do |curl|
87
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
88
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
89
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
90
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
91
+ curl.follow_location = true
92
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
93
+
94
+ curl.on_success do |c|
95
+ responses[url] = decode_content(c)
96
+ end
97
+ curl.on_failure do |c|
98
+ responses[url] = c.response_code
99
+ end
100
+ end
101
+ multi.add(easy)
102
+ end
103
+
104
+ multi.perform
105
+ urls.is_a?(String) ? responses.values.first : responses
106
+ end
107
+
108
+ # Fetches and returns the parsed XML for each URL provided.
109
+ #
110
+ # === Parameters
111
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
112
+ # [options<Hash>] Valid keys for this argument as as followed:
113
+ # * :user_agent - String that overrides the default user agent.
114
+ # * :if_modified_since - Time object representing when the feed was last updated.
115
+ # * :if_none_match - String, an etag for the request that was stored previously.
116
+ # * :on_success - Block that gets executed after a successful request.
117
+ # * :on_failure - Block that gets executed after a failed request.
118
+ # === Returns
119
+ # A Feed object if a single URL is passed.
120
+ #
121
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
122
+ def self.fetch_and_parse(urls, options = {})
123
+ url_queue = [*urls]
124
+ multi = Curl::Multi.new
125
+ responses = {}
126
+
127
+ # I broke these down so I would only try to do 30 simultaneously because
128
+ # I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
129
+ url_queue.slice!(0, 30).each do |url|
130
+ add_url_to_multi(multi, url, url_queue, responses, options)
131
+ end
132
+
133
+ multi.perform
134
+ return urls.is_a?(String) ? responses.values.first : responses
135
+ end
136
+
137
+ # Decodes the XML document if it was compressed.
138
+ #
139
+ # === Parameters
140
+ # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
141
+ # === Returns
142
+ # A decoded string of XML.
143
+ def self.decode_content(c)
144
+ if c.header_str.match(/Content-Encoding: gzip/)
145
+ begin
146
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
147
+ xml = gz.read
148
+ gz.close
149
+ rescue Zlib::GzipFile::Error
150
+ # Maybe this is not gzipped?
151
+ xml = c.body_str
152
+ end
153
+ elsif c.header_str.match(/Content-Encoding: deflate/)
154
+ xml = Zlib::Inflate.inflate(c.body_str)
155
+ else
156
+ xml = c.body_str
157
+ end
158
+
159
+ xml
160
+ end
161
+
162
+ # Updates each feed for each Feed object provided.
163
+ #
164
+ # === Parameters
165
+ # [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
166
+ # [options<Hash>] Valid keys for this argument as as followed:
167
+ # * :user_agent - String that overrides the default user agent.
168
+ # * :on_success - Block that gets executed after a successful request.
169
+ # * :on_failure - Block that gets executed after a failed request.
170
+ # === Returns
171
+ # A updated Feed object if a single URL is passed.
172
+ #
173
+ # A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
174
+ def self.update(feeds, options = {})
175
+ feed_queue = [*feeds]
176
+ multi = Curl::Multi.new
177
+ responses = {}
178
+
179
+ feed_queue.slice!(0, 30).each do |feed|
180
+ add_feed_to_multi(multi, feed, feed_queue, responses, options)
181
+ end
182
+
183
+ multi.perform
184
+ responses.size == 1 ? responses.values.first : responses.values
185
+ end
186
+
187
+ # An abstraction for adding a feed by URL to the passed Curb::multi stack.
188
+ #
189
+ # === Parameters
190
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
191
+ # [url<String>] The URL of the feed that you would like to be fetched.
192
+ # [url_queue<Array>] An array of URLs that are queued for request.
193
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
194
+ # [feeds<String> or <Array>] A single feed object, or an array of feed objects.
195
+ # [options<Hash>] Valid keys for this argument as as followed:
196
+ # * :user_agent - String that overrides the default user agent.
197
+ # * :on_success - Block that gets executed after a successful request.
198
+ # * :on_failure - Block that gets executed after a failed request.
199
+ # === Returns
200
+ # The updated Curl::Multi object with the request details added to it's stack.
201
+ def self.add_url_to_multi(multi, url, url_queue, responses, options)
202
+ easy = Curl::Easy.new(url) do |curl|
203
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
204
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
205
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
206
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
207
+ curl.follow_location = true
208
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
209
+
210
+ curl.on_success do |c|
211
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
212
+ xml = decode_content(c)
213
+ klass = determine_feed_parser_for_xml(xml)
214
+
215
+ if klass
216
+ begin
217
+ feed = klass.parse(xml)
218
+ feed.feed_url = c.last_effective_url
219
+ feed.etag = etag_from_header(c.header_str)
220
+ feed.last_modified = last_modified_from_header(c.header_str)
221
+ responses[url] = feed
222
+ options[:on_success].call(url, feed) if options.has_key?(:on_success)
223
+ rescue Exception => e
224
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
225
+ end
226
+ else
227
+ # puts "Error determining parser for #{url} - #{c.last_effective_url}"
228
+ # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
229
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
230
+ end
231
+ end
232
+
233
+ curl.on_failure do |c|
234
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
235
+ responses[url] = c.response_code
236
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
237
+ end
238
+ end
239
+ multi.add(easy)
240
+ end
241
+
242
+ # An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
243
+ #
244
+ # === Parameters
245
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
246
+ # [feed<Feed>] A feed object that you would like to be fetched.
247
+ # [url_queue<Array>] An array of feed objects that are queued for request.
248
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
249
+ # [feeds<String>] or <Array> A single feed object, or an array of feed objects.
250
+ # [options<Hash>] Valid keys for this argument as as followed:
251
+ # * :user_agent - String that overrides the default user agent.
252
+ # * :on_success - Block that gets executed after a successful request.
253
+ # * :on_failure - Block that gets executed after a failed request.
254
+ # === Returns
255
+ # The updated Curl::Multi object with the request details added to it's stack.
256
+ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
257
+ easy = Curl::Easy.new(feed.feed_url) do |curl|
258
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
259
+ curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
260
+ curl.headers["If-None-Match"] = feed.etag if feed.etag
261
+ curl.timeout = (options[:timeout] || TIMEOUT)
262
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
263
+ curl.follow_location = true
264
+
265
+ curl.on_success do |c|
266
+ begin
267
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
268
+ updated_feed = Feed.parse(c.body_str)
269
+ updated_feed.feed_url = c.last_effective_url
270
+ updated_feed.etag = etag_from_header(c.header_str)
271
+ updated_feed.last_modified = last_modified_from_header(c.header_str)
272
+ feed.update_from_feed(updated_feed)
273
+ responses[feed.feed_url] = feed
274
+ options[:on_success].call(feed) if options.has_key?(:on_success)
275
+ rescue Exception => e
276
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
277
+ end
278
+ end
279
+
280
+ curl.on_failure do |c|
281
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
282
+ response_code = c.response_code
283
+ if response_code == 304 # it's not modified. this isn't an error condition
284
+ responses[feed.feed_url] = feed
285
+ options[:on_success].call(feed) if options.has_key?(:on_success)
286
+ else
287
+ responses[feed.url] = c.response_code
288
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
289
+ end
290
+ end
291
+ end
292
+ multi.add(easy)
293
+ end
294
+
295
+ # Determines the etag from the request headers.
296
+ #
297
+ # === Parameters
298
+ # [header<String>] Raw request header returned from the request
299
+ # === Returns
300
+ # A string of the etag or nil if it cannot be found in the headers.
301
+ def self.etag_from_header(header)
302
+ header =~ /.*ETag:\s(.*)\r/
303
+ $1
304
+ end
305
+
306
+ # Determines the last modified date from the request headers.
307
+ #
308
+ # === Parameters
309
+ # [header<String>] Raw request header returned from the request
310
+ # === Returns
311
+ # A Time object of the last modified date or nil if it cannot be found in the headers.
312
+ def self.last_modified_from_header(header)
313
+ header =~ /.*Last-Modified:\s(.*)\r/
314
+ Time.parse($1) if $1
315
+ end
316
+ end
317
+ end
@@ -0,0 +1,51 @@
1
+ module Feedzirra
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
6
+
7
+ def parse_datetime(string)
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
20
+ end
21
+
22
+ ##
23
+ # Summary is @summary of @content of nil.
24
+ def summary
25
+ @summary || @content
26
+ end
27
+
28
+ ##
29
+ # Writter for published. By default, we keep the "oldest" publish time found.
30
+ def published=(val)
31
+ parsed = parse_datetime(val)
32
+ @published = parsed if !@published || parsed < @published
33
+ end
34
+
35
+ ##
36
+ # Writter for udapted. By default, we keep the most recenet update time found.
37
+ def updated=(val)
38
+ parsed = parse_datetime(val)
39
+ @updated = parsed if !@updated || parsed > @updated
40
+ end
41
+
42
+ def sanitize!
43
+ self.title.sanitize! if self.title
44
+ self.author.sanitize! if self.author
45
+ self.summary.sanitize! if self.summary
46
+ self.content.sanitize! if self.content
47
+ end
48
+
49
+ alias_method :last_modified, :published
50
+ end
51
+ end
@@ -0,0 +1,71 @@
1
+ module Feedzirra
2
+ module FeedUtilities
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
4
+
5
+ attr_writer :new_entries, :updated, :last_modified
6
+ attr_accessor :etag
7
+
8
+ def last_modified
9
+ @last_modified ||= begin
10
+ entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
11
+ entry ? entry.published : nil
12
+ end
13
+ end
14
+
15
+ def updated?
16
+ @updated
17
+ end
18
+
19
+ def new_entries
20
+ @new_entries ||= []
21
+ end
22
+
23
+ def has_new_entries?
24
+ new_entries.size > 0
25
+ end
26
+
27
+ def update_from_feed(feed)
28
+ self.new_entries += find_new_entries_for(feed)
29
+ self.entries.unshift(*self.new_entries)
30
+
31
+ updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
32
+ end
33
+
34
+ def update_attribute(feed, name)
35
+ old_value, new_value = send(name), feed.send(name)
36
+
37
+ if old_value != new_value
38
+ send("#{name}=", new_value)
39
+ end
40
+ end
41
+
42
+ def sanitize_entries!
43
+ entries.each {|entry| entry.sanitize!}
44
+ end
45
+
46
+ private
47
+
48
+ def updated!
49
+ @updated = true
50
+ end
51
+
52
+ def find_new_entries_for(feed)
53
+ # this implementation is a hack, which is why it's so ugly.
54
+ # it's to get around the fact that not all feeds have a published date.
55
+ # however, they're always ordered with the newest one first.
56
+ # So we go through the entries just parsed and insert each one as a new entry
57
+ # until we get to one that has the same url as the the newest for the feed
58
+ latest_entry = self.entries.first
59
+ found_new_entries = []
60
+ feed.entries.each do |entry|
61
+ break if entry.url == latest_entry.url
62
+ found_new_entries << entry
63
+ end
64
+ found_new_entries
65
+ end
66
+
67
+ def existing_entry?(test_entry)
68
+ entries.any? { |entry| entry.url == test_entry.url }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,26 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class Atom
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :entry, :as => :entries, :class => AtomEntry
19
+
20
+ def self.able_to_parse?(xml) #:nodoc:
21
+ xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
22
+ end
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,34 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
20
+ element :name, :as => :author
21
+ element :content
22
+ element :summary
23
+ element :published
24
+ element :id
25
+ element :created, :as => :published
26
+ element :issued, :as => :published
27
+ element :updated
28
+ element :modified, :as => :updated
29
+ elements :category, :as => :categories, :value => :term
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,27 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class AtomFeedBurner
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
19
+
20
+ def self.able_to_parse?(xml) #:nodoc:
21
+ (xml =~ /Atom/ && xml =~ /feedburner/) || false
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomFeedBurnerEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :name, :as => :author
20
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
21
+ element :"feedburner:origLink", :as => :url
22
+ element :summary
23
+ element :content
24
+ element :published
25
+ element :id
26
+ element :issued, :as => :published
27
+ element :created, :as => :published
28
+ element :updated
29
+ element :modified, :as => :updated
30
+ elements :category, :as => :categories, :value => :term
31
+ end
32
+
33
+ end
34
+
35
+ end
@@ -0,0 +1,50 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes is RSS 2.0 + some apple extensions
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSS
7
+ include SAXMachine
8
+ include FeedUtilities
9
+
10
+ attr_accessor :feed_url
11
+
12
+ # RSS 2.0 elements that need including
13
+ element :copyright
14
+ element :description
15
+ element :language
16
+ element :managingEditor
17
+ element :title
18
+ element :link, :as => :url
19
+
20
+ # If author is not present use managingEditor on the channel
21
+ element :"itunes:author", :as => :itunes_author
22
+ element :"itunes:block", :as => :itunes_block
23
+ element :"itunes:image", :value => :href, :as => :itunes_image
24
+ element :"itunes:explicit", :as => :itunes_explicit
25
+ element :"itunes:keywords", :as => :itunes_keywords
26
+ # New URL for the podcast feed
27
+ element :"itunes:new-feed-url", :as => :itunes_new_feed_url
28
+ element :"itunes:subtitle", :as => :itunes_subtitle
29
+ # If summary is not present, use the description tag
30
+ element :"itunes:summary", :as => :itunes_summary
31
+
32
+ # iTunes RSS feeds can have multiple main categories...
33
+ # ...and multiple sub-categories per category
34
+ # TODO subcategories not supported correctly - they are at the same level
35
+ # as the main categories
36
+ elements :"itunes:category", :as => :itunes_categories, :value => :text
37
+
38
+ elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
39
+
40
+ elements :item, :as => :entries, :class => ITunesRSSItem
41
+
42
+ def self.able_to_parse?(xml)
43
+ xml =~ /xmlns:itunes=\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,32 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes extensions to the standard RSS2.0 item
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSSItem
7
+ include SAXMachine
8
+ include FeedEntryUtilities
9
+ element :author
10
+ element :guid
11
+ element :title
12
+ element :link, :as => :url
13
+ element :description, :as => :summary
14
+ element :pubDate, :as => :published
15
+ element :"content:encoded", :as => :content
16
+
17
+ # If author is not present use author tag on the item
18
+ element :"itunes:author", :as => :itunes_author
19
+ element :"itunes:block", :as => :itunes_block
20
+ element :"itunes:duration", :as => :itunes_duration
21
+ element :"itunes:explicit", :as => :itunes_explicit
22
+ element :"itunes:keywords", :as => :itunes_keywords
23
+ element :"itunes:subtitle", :as => :itunes_subtitle
24
+ # If summary is not present, use the description tag
25
+ element :"itunes:summary", :as => :itunes_summary
26
+ element :enclosure, :value => :length, :as => :enclosure_length
27
+ element :enclosure, :value => :type, :as => :enclosure_type
28
+ element :enclosure, :value => :url, :as => :enclosure_url
29
+ end
30
+ end
31
+
32
+ end
@@ -0,0 +1,12 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ class ITunesRSSOwner
5
+ include SAXMachine
6
+ include FeedUtilities
7
+ element :"itunes:name", :as => :name
8
+ element :"itunes:email", :as => :email
9
+ end
10
+ end
11
+
12
+ end
@@ -0,0 +1,28 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with RSS feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class RSS
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url
17
+ elements :item, :as => :entries, :class => RSSEntry
18
+
19
+ attr_accessor :feed_url
20
+
21
+ def self.able_to_parse?(xml) #:nodoc:
22
+ xml =~ /\<rss|rdf/
23
+ end
24
+ end
25
+
26
+ end
27
+
28
+ end