julien51-feedzirra 0.0.13

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (35) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +194 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/feed.rb +317 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +51 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +26 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +34 -0
  11. data/lib/feedzirra/parser/atom_feed_burner.rb +27 -0
  12. data/lib/feedzirra/parser/atom_feed_burner_entry.rb +35 -0
  13. data/lib/feedzirra/parser/itunes_rss.rb +50 -0
  14. data/lib/feedzirra/parser/itunes_rss_item.rb +32 -0
  15. data/lib/feedzirra/parser/itunes_rss_owner.rb +12 -0
  16. data/lib/feedzirra/parser/rss.rb +28 -0
  17. data/lib/feedzirra/parser/rss_entry.rb +40 -0
  18. data/lib/feedzirra/push_parser.rb +56 -0
  19. data/lib/feedzirra.rb +37 -0
  20. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  21. data/spec/feedzirra/feed_spec.rb +551 -0
  22. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  23. data/spec/feedzirra/parser/atom_entry_spec.rb +45 -0
  24. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  25. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  26. data/spec/feedzirra/parser/atom_spec.rb +35 -0
  27. data/spec/feedzirra/parser/itunes_rss_item_spec.rb +52 -0
  28. data/spec/feedzirra/parser/itunes_rss_owner_spec.rb +18 -0
  29. data/spec/feedzirra/parser/itunes_rss_spec.rb +50 -0
  30. data/spec/feedzirra/parser/rss_entry_spec.rb +41 -0
  31. data/spec/feedzirra/parser/rss_spec.rb +41 -0
  32. data/spec/feedzirra/push_parser_spec.rb +16 -0
  33. data/spec/spec.opts +2 -0
  34. data/spec/spec_helper.rb +58 -0
  35. metadata +145 -0
@@ -0,0 +1,317 @@
1
+ module Feedzirra
2
+ class NoParserAvailable < StandardError; end
3
+
4
+ class Feed
5
+ USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
6
+ TIMEOUT = 30
7
+
8
+ # Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
9
+ #
10
+ # === Parameters
11
+ # [xml<String>] The XML that you would like parsed.
12
+ # === Returns
13
+ # An instance of the determined feed type. By default a Feedzirra::Atom, Feedzirra::AtomFeedBurner, Feedzirra::RDF, or Feedzirra::RSS object.
14
+ # === Raises
15
+ # Feedzirra::NoParserAvailable : If no valid parser classes could be found for the feed.
16
+ def self.parse(xml)
17
+ if parser = determine_feed_parser_for_xml(xml)
18
+ parser.parse(xml)
19
+ else
20
+ raise NoParserAvailable.new("No valid parser for XML.")
21
+ end
22
+ end
23
+
24
+ # Determines the correct parser class to use for parsing the feed.
25
+ #
26
+ # === Parameters
27
+ # [xml<String>] The XML that you would like determine the parser for.
28
+ # === Returns
29
+ # The class name of the parser that can handle the XML.
30
+ def self.determine_feed_parser_for_xml(xml)
31
+ start_of_doc = xml.slice(0, 1000)
32
+ feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
33
+ end
34
+
35
+ # Adds a new feed parsing class that will be used for parsing.
36
+ #
37
+ # === Parameters
38
+ # [klass<Constant>] The class/constant that you want to register.
39
+ # === Returns
40
+ # A updated array of feed parser class names.
41
+ def self.add_feed_class(klass)
42
+ feed_classes.unshift klass
43
+ end
44
+
45
+ # Provides a list of registered feed parsing classes.
46
+ #
47
+ # === Returns
48
+ # A array of class names.
49
+ def self.feed_classes
50
+ @feed_classes ||= [Feedzirra::Parser::RSS, Feedzirra::Parser::AtomFeedBurner, Feedzirra::Parser::Atom]
51
+ end
52
+
53
+ # Makes all entry types look for the passed in element to parse. This is actually just a call to
54
+ # element (a SAXMachine call) in the class
55
+ #
56
+ # === Parameters
57
+ # [element_tag<String>]
58
+ # [options<Hash>] Valid keys are same as with SAXMachine
59
+ def self.add_common_feed_entry_element(element_tag, options = {})
60
+ # need to think of a better way to do this. will break for people who want this behavior
61
+ # across their added classes
62
+ feed_classes.map{|k| eval("#{k}Entry") }.each do |klass|
63
+ klass.send(:element, element_tag, options)
64
+ end
65
+ end
66
+
67
+ # Fetches and returns the raw XML for each URL provided.
68
+ #
69
+ # === Parameters
70
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
71
+ # [options<Hash>] Valid keys for this argument as as followed:
72
+ # :user_agent - String that overrides the default user agent.
73
+ # :if_modified_since - Time object representing when the feed was last updated.
74
+ # :if_none_match - String that's normally an etag for the request that was stored previously.
75
+ # :on_success - Block that gets executed after a successful request.
76
+ # :on_failure - Block that gets executed after a failed request.
77
+ # === Returns
78
+ # A String of XML if a single URL is passed.
79
+ #
80
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
81
+ def self.fetch_raw(urls, options = {})
82
+ url_queue = [*urls]
83
+ multi = Curl::Multi.new
84
+ responses = {}
85
+ url_queue.each do |url|
86
+ easy = Curl::Easy.new(url) do |curl|
87
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
88
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
89
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
90
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
91
+ curl.follow_location = true
92
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
93
+
94
+ curl.on_success do |c|
95
+ responses[url] = decode_content(c)
96
+ end
97
+ curl.on_failure do |c|
98
+ responses[url] = c.response_code
99
+ end
100
+ end
101
+ multi.add(easy)
102
+ end
103
+
104
+ multi.perform
105
+ urls.is_a?(String) ? responses.values.first : responses
106
+ end
107
+
108
+ # Fetches and returns the parsed XML for each URL provided.
109
+ #
110
+ # === Parameters
111
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
112
+ # [options<Hash>] Valid keys for this argument as as followed:
113
+ # * :user_agent - String that overrides the default user agent.
114
+ # * :if_modified_since - Time object representing when the feed was last updated.
115
+ # * :if_none_match - String, an etag for the request that was stored previously.
116
+ # * :on_success - Block that gets executed after a successful request.
117
+ # * :on_failure - Block that gets executed after a failed request.
118
+ # === Returns
119
+ # A Feed object if a single URL is passed.
120
+ #
121
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
122
+ def self.fetch_and_parse(urls, options = {})
123
+ url_queue = [*urls]
124
+ multi = Curl::Multi.new
125
+ responses = {}
126
+
127
+ # I broke these down so I would only try to do 30 simultaneously because
128
+ # I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
129
+ url_queue.slice!(0, 30).each do |url|
130
+ add_url_to_multi(multi, url, url_queue, responses, options)
131
+ end
132
+
133
+ multi.perform
134
+ return urls.is_a?(String) ? responses.values.first : responses
135
+ end
136
+
137
+ # Decodes the XML document if it was compressed.
138
+ #
139
+ # === Parameters
140
+ # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
141
+ # === Returns
142
+ # A decoded string of XML.
143
+ def self.decode_content(c)
144
+ if c.header_str.match(/Content-Encoding: gzip/)
145
+ begin
146
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
147
+ xml = gz.read
148
+ gz.close
149
+ rescue Zlib::GzipFile::Error
150
+ # Maybe this is not gzipped?
151
+ xml = c.body_str
152
+ end
153
+ elsif c.header_str.match(/Content-Encoding: deflate/)
154
+ xml = Zlib::Inflate.inflate(c.body_str)
155
+ else
156
+ xml = c.body_str
157
+ end
158
+
159
+ xml
160
+ end
161
+
162
+ # Updates each feed for each Feed object provided.
163
+ #
164
+ # === Parameters
165
+ # [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
166
+ # [options<Hash>] Valid keys for this argument as as followed:
167
+ # * :user_agent - String that overrides the default user agent.
168
+ # * :on_success - Block that gets executed after a successful request.
169
+ # * :on_failure - Block that gets executed after a failed request.
170
+ # === Returns
171
+ # A updated Feed object if a single URL is passed.
172
+ #
173
+ # A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
174
+ def self.update(feeds, options = {})
175
+ feed_queue = [*feeds]
176
+ multi = Curl::Multi.new
177
+ responses = {}
178
+
179
+ feed_queue.slice!(0, 30).each do |feed|
180
+ add_feed_to_multi(multi, feed, feed_queue, responses, options)
181
+ end
182
+
183
+ multi.perform
184
+ responses.size == 1 ? responses.values.first : responses.values
185
+ end
186
+
187
+ # An abstraction for adding a feed by URL to the passed Curb::multi stack.
188
+ #
189
+ # === Parameters
190
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
191
+ # [url<String>] The URL of the feed that you would like to be fetched.
192
+ # [url_queue<Array>] An array of URLs that are queued for request.
193
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
194
+ # [feeds<String> or <Array>] A single feed object, or an array of feed objects.
195
+ # [options<Hash>] Valid keys for this argument as as followed:
196
+ # * :user_agent - String that overrides the default user agent.
197
+ # * :on_success - Block that gets executed after a successful request.
198
+ # * :on_failure - Block that gets executed after a failed request.
199
+ # === Returns
200
+ # The updated Curl::Multi object with the request details added to it's stack.
201
+ def self.add_url_to_multi(multi, url, url_queue, responses, options)
202
+ easy = Curl::Easy.new(url) do |curl|
203
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
204
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
205
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
206
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
207
+ curl.follow_location = true
208
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
209
+
210
+ curl.on_success do |c|
211
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
212
+ xml = decode_content(c)
213
+ klass = determine_feed_parser_for_xml(xml)
214
+
215
+ if klass
216
+ begin
217
+ feed = klass.parse(xml)
218
+ feed.feed_url = c.last_effective_url
219
+ feed.etag = etag_from_header(c.header_str)
220
+ feed.last_modified = last_modified_from_header(c.header_str)
221
+ responses[url] = feed
222
+ options[:on_success].call(url, feed) if options.has_key?(:on_success)
223
+ rescue Exception => e
224
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
225
+ end
226
+ else
227
+ # puts "Error determining parser for #{url} - #{c.last_effective_url}"
228
+ # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
229
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
230
+ end
231
+ end
232
+
233
+ curl.on_failure do |c|
234
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
235
+ responses[url] = c.response_code
236
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
237
+ end
238
+ end
239
+ multi.add(easy)
240
+ end
241
+
242
+ # An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
243
+ #
244
+ # === Parameters
245
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
246
+ # [feed<Feed>] A feed object that you would like to be fetched.
247
+ # [url_queue<Array>] An array of feed objects that are queued for request.
248
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
249
+ # [feeds<String>] or <Array> A single feed object, or an array of feed objects.
250
+ # [options<Hash>] Valid keys for this argument as as followed:
251
+ # * :user_agent - String that overrides the default user agent.
252
+ # * :on_success - Block that gets executed after a successful request.
253
+ # * :on_failure - Block that gets executed after a failed request.
254
+ # === Returns
255
+ # The updated Curl::Multi object with the request details added to it's stack.
256
+ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
257
+ easy = Curl::Easy.new(feed.feed_url) do |curl|
258
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
259
+ curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
260
+ curl.headers["If-None-Match"] = feed.etag if feed.etag
261
+ curl.timeout = (options[:timeout] || TIMEOUT)
262
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
263
+ curl.follow_location = true
264
+
265
+ curl.on_success do |c|
266
+ begin
267
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
268
+ updated_feed = Feed.parse(c.body_str)
269
+ updated_feed.feed_url = c.last_effective_url
270
+ updated_feed.etag = etag_from_header(c.header_str)
271
+ updated_feed.last_modified = last_modified_from_header(c.header_str)
272
+ feed.update_from_feed(updated_feed)
273
+ responses[feed.feed_url] = feed
274
+ options[:on_success].call(feed) if options.has_key?(:on_success)
275
+ rescue Exception => e
276
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
277
+ end
278
+ end
279
+
280
+ curl.on_failure do |c|
281
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
282
+ response_code = c.response_code
283
+ if response_code == 304 # it's not modified. this isn't an error condition
284
+ responses[feed.feed_url] = feed
285
+ options[:on_success].call(feed) if options.has_key?(:on_success)
286
+ else
287
+ responses[feed.url] = c.response_code
288
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
289
+ end
290
+ end
291
+ end
292
+ multi.add(easy)
293
+ end
294
+
295
+ # Determines the etag from the request headers.
296
+ #
297
+ # === Parameters
298
+ # [header<String>] Raw request header returned from the request
299
+ # === Returns
300
+ # A string of the etag or nil if it cannot be found in the headers.
301
+ def self.etag_from_header(header)
302
+ header =~ /.*ETag:\s(.*)\r/
303
+ $1
304
+ end
305
+
306
+ # Determines the last modified date from the request headers.
307
+ #
308
+ # === Parameters
309
+ # [header<String>] Raw request header returned from the request
310
+ # === Returns
311
+ # A Time object of the last modified date or nil if it cannot be found in the headers.
312
+ def self.last_modified_from_header(header)
313
+ header =~ /.*Last-Modified:\s(.*)\r/
314
+ Time.parse($1) if $1
315
+ end
316
+ end
317
+ end
@@ -0,0 +1,51 @@
1
+ module Feedzirra
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
6
+
7
+ def parse_datetime(string)
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
20
+ end
21
+
22
+ ##
23
+ # Summary is @summary of @content of nil.
24
+ def summary
25
+ @summary || @content
26
+ end
27
+
28
+ ##
29
+ # Writter for published. By default, we keep the "oldest" publish time found.
30
+ def published=(val)
31
+ parsed = parse_datetime(val)
32
+ @published = parsed if !@published || parsed < @published
33
+ end
34
+
35
+ ##
36
+ # Writter for udapted. By default, we keep the most recenet update time found.
37
+ def updated=(val)
38
+ parsed = parse_datetime(val)
39
+ @updated = parsed if !@updated || parsed > @updated
40
+ end
41
+
42
+ def sanitize!
43
+ self.title.sanitize! if self.title
44
+ self.author.sanitize! if self.author
45
+ self.summary.sanitize! if self.summary
46
+ self.content.sanitize! if self.content
47
+ end
48
+
49
+ alias_method :last_modified, :published
50
+ end
51
+ end
@@ -0,0 +1,71 @@
1
+ module Feedzirra
2
+ module FeedUtilities
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
4
+
5
+ attr_writer :new_entries, :updated, :last_modified
6
+ attr_accessor :etag
7
+
8
+ def last_modified
9
+ @last_modified ||= begin
10
+ entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
11
+ entry ? entry.published : nil
12
+ end
13
+ end
14
+
15
+ def updated?
16
+ @updated
17
+ end
18
+
19
+ def new_entries
20
+ @new_entries ||= []
21
+ end
22
+
23
+ def has_new_entries?
24
+ new_entries.size > 0
25
+ end
26
+
27
+ def update_from_feed(feed)
28
+ self.new_entries += find_new_entries_for(feed)
29
+ self.entries.unshift(*self.new_entries)
30
+
31
+ updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
32
+ end
33
+
34
+ def update_attribute(feed, name)
35
+ old_value, new_value = send(name), feed.send(name)
36
+
37
+ if old_value != new_value
38
+ send("#{name}=", new_value)
39
+ end
40
+ end
41
+
42
+ def sanitize_entries!
43
+ entries.each {|entry| entry.sanitize!}
44
+ end
45
+
46
+ private
47
+
48
+ def updated!
49
+ @updated = true
50
+ end
51
+
52
+ def find_new_entries_for(feed)
53
+ # this implementation is a hack, which is why it's so ugly.
54
+ # it's to get around the fact that not all feeds have a published date.
55
+ # however, they're always ordered with the newest one first.
56
+ # So we go through the entries just parsed and insert each one as a new entry
57
+ # until we get to one that has the same url as the the newest for the feed
58
+ latest_entry = self.entries.first
59
+ found_new_entries = []
60
+ feed.entries.each do |entry|
61
+ break if entry.url == latest_entry.url
62
+ found_new_entries << entry
63
+ end
64
+ found_new_entries
65
+ end
66
+
67
+ def existing_entry?(test_entry)
68
+ entries.any? { |entry| entry.url == test_entry.url }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,26 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class Atom
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :entry, :as => :entries, :class => AtomEntry
19
+
20
+ def self.able_to_parse?(xml) #:nodoc:
21
+ xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
22
+ end
23
+ end
24
+ end
25
+
26
+ end
@@ -0,0 +1,34 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
20
+ element :name, :as => :author
21
+ element :content
22
+ element :summary
23
+ element :published
24
+ element :id
25
+ element :created, :as => :published
26
+ element :issued, :as => :published
27
+ element :updated
28
+ element :modified, :as => :updated
29
+ elements :category, :as => :categories, :value => :term
30
+ end
31
+
32
+ end
33
+
34
+ end
@@ -0,0 +1,27 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class AtomFeedBurner
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :entry, :as => :entries, :class => AtomFeedBurnerEntry
19
+
20
+ def self.able_to_parse?(xml) #:nodoc:
21
+ (xml =~ /Atom/ && xml =~ /feedburner/) || false
22
+ end
23
+ end
24
+
25
+ end
26
+
27
+ end
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Feedburner Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomFeedBurnerEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :name, :as => :author
20
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
21
+ element :"feedburner:origLink", :as => :url
22
+ element :summary
23
+ element :content
24
+ element :published
25
+ element :id
26
+ element :issued, :as => :published
27
+ element :created, :as => :published
28
+ element :updated
29
+ element :modified, :as => :updated
30
+ elements :category, :as => :categories, :value => :term
31
+ end
32
+
33
+ end
34
+
35
+ end
@@ -0,0 +1,50 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes is RSS 2.0 + some apple extensions
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSS
7
+ include SAXMachine
8
+ include FeedUtilities
9
+
10
+ attr_accessor :feed_url
11
+
12
+ # RSS 2.0 elements that need including
13
+ element :copyright
14
+ element :description
15
+ element :language
16
+ element :managingEditor
17
+ element :title
18
+ element :link, :as => :url
19
+
20
+ # If author is not present use managingEditor on the channel
21
+ element :"itunes:author", :as => :itunes_author
22
+ element :"itunes:block", :as => :itunes_block
23
+ element :"itunes:image", :value => :href, :as => :itunes_image
24
+ element :"itunes:explicit", :as => :itunes_explicit
25
+ element :"itunes:keywords", :as => :itunes_keywords
26
+ # New URL for the podcast feed
27
+ element :"itunes:new-feed-url", :as => :itunes_new_feed_url
28
+ element :"itunes:subtitle", :as => :itunes_subtitle
29
+ # If summary is not present, use the description tag
30
+ element :"itunes:summary", :as => :itunes_summary
31
+
32
+ # iTunes RSS feeds can have multiple main categories...
33
+ # ...and multiple sub-categories per category
34
+ # TODO subcategories not supported correctly - they are at the same level
35
+ # as the main categories
36
+ elements :"itunes:category", :as => :itunes_categories, :value => :text
37
+
38
+ elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
39
+
40
+ elements :item, :as => :entries, :class => ITunesRSSItem
41
+
42
+ def self.able_to_parse?(xml)
43
+ xml =~ /xmlns:itunes=\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/
44
+ end
45
+
46
+ end
47
+
48
+ end
49
+
50
+ end
@@ -0,0 +1,32 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # iTunes extensions to the standard RSS2.0 item
5
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
6
+ class ITunesRSSItem
7
+ include SAXMachine
8
+ include FeedEntryUtilities
9
+ element :author
10
+ element :guid
11
+ element :title
12
+ element :link, :as => :url
13
+ element :description, :as => :summary
14
+ element :pubDate, :as => :published
15
+ element :"content:encoded", :as => :content
16
+
17
+ # If author is not present use author tag on the item
18
+ element :"itunes:author", :as => :itunes_author
19
+ element :"itunes:block", :as => :itunes_block
20
+ element :"itunes:duration", :as => :itunes_duration
21
+ element :"itunes:explicit", :as => :itunes_explicit
22
+ element :"itunes:keywords", :as => :itunes_keywords
23
+ element :"itunes:subtitle", :as => :itunes_subtitle
24
+ # If summary is not present, use the description tag
25
+ element :"itunes:summary", :as => :itunes_summary
26
+ element :enclosure, :value => :length, :as => :enclosure_length
27
+ element :enclosure, :value => :type, :as => :enclosure_type
28
+ element :enclosure, :value => :url, :as => :enclosure_url
29
+ end
30
+ end
31
+
32
+ end
@@ -0,0 +1,12 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ class ITunesRSSOwner
5
+ include SAXMachine
6
+ include FeedUtilities
7
+ element :"itunes:name", :as => :name
8
+ element :"itunes:email", :as => :email
9
+ end
10
+ end
11
+
12
+ end
@@ -0,0 +1,28 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with RSS feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class RSS
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url
17
+ elements :item, :as => :entries, :class => RSSEntry
18
+
19
+ attr_accessor :feed_url
20
+
21
+ def self.able_to_parse?(xml) #:nodoc:
22
+ xml =~ /\<rss|rdf/
23
+ end
24
+ end
25
+
26
+ end
27
+
28
+ end