Spectives-feedzirra 0.0.28

Sign up to get free protection for your applications and to get access to all the features.
Files changed (44) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +205 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/feed.rb +334 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +45 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +35 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +41 -0
  11. data/lib/feedzirra/parser/itunes_category.rb +12 -0
  12. data/lib/feedzirra/parser/mrss_category.rb +11 -0
  13. data/lib/feedzirra/parser/mrss_content.rb +48 -0
  14. data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
  15. data/lib/feedzirra/parser/mrss_credit.rb +11 -0
  16. data/lib/feedzirra/parser/mrss_group.rb +37 -0
  17. data/lib/feedzirra/parser/mrss_hash.rb +10 -0
  18. data/lib/feedzirra/parser/mrss_player.rb +11 -0
  19. data/lib/feedzirra/parser/mrss_rating.rb +10 -0
  20. data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
  21. data/lib/feedzirra/parser/mrss_text.rb +13 -0
  22. data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
  23. data/lib/feedzirra/parser/rss.rb +83 -0
  24. data/lib/feedzirra/parser/rss_entry.rb +83 -0
  25. data/lib/feedzirra/parser/rss_image.rb +15 -0
  26. data/lib/feedzirra.rb +44 -0
  27. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  28. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  29. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  30. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  31. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  32. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  33. data/spec/feedzirra/feed_spec.rb +546 -0
  34. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  35. data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
  36. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  37. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  38. data/spec/feedzirra/parser/atom_spec.rb +43 -0
  39. data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
  40. data/spec/feedzirra/parser/rss_entry_spec.rb +154 -0
  41. data/spec/feedzirra/parser/rss_spec.rb +93 -0
  42. data/spec/sample_feeds/run_against_sample.rb +20 -0
  43. data/spec/spec_helper.rb +62 -0
  44. metadata +154 -0
@@ -0,0 +1,334 @@
1
+ module Feedzirra
2
+ class NoParserAvailable < StandardError; end
3
+
4
+ class Feed
5
+ USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
6
+
7
+ # Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
8
+ #
9
+ # === Parameters
10
+ # [xml<String>] The XML that you would like parsed.
11
+ # === Returns
12
+ # An instance of the determined feed type. By default a Feedzirra::Atom, Feedzirra::AtomFeedBurner, Feedzirra::RDF, or Feedzirra::RSS object.
13
+ # === Raises
14
+ # Feedzirra::NoParserAvailable : If no valid parser classes could be found for the feed.
15
+ def self.parse(xml)
16
+ if parser = determine_feed_parser_for_xml(xml)
17
+ parser.parse(xml)
18
+ else
19
+ raise NoParserAvailable.new("No valid parser for XML.")
20
+ end
21
+ end
22
+
23
+ # Determines the correct parser class to use for parsing the feed.
24
+ #
25
+ # === Parameters
26
+ # [xml<String>] The XML that you would like determine the parser for.
27
+ # === Returns
28
+ # The class name of the parser that can handle the XML.
29
+ def self.determine_feed_parser_for_xml(xml)
30
+ start_of_doc = xml.slice(0, 2000)
31
+ feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
32
+ end
33
+
34
+ # Adds a new feed parsing class that will be used for parsing.
35
+ #
36
+ # === Parameters
37
+ # [klass<Constant>] The class/constant that you want to register.
38
+ # === Returns
39
+ # A updated array of feed parser class names.
40
+ def self.add_feed_class(klass)
41
+ feed_classes.unshift klass
42
+ end
43
+
44
+ # Provides a list of registered feed parsing classes.
45
+ #
46
+ # === Returns
47
+ # A array of class names.
48
+ def self.feed_classes
49
+ @feed_classes ||= [
50
+ Feedzirra::Parser::RSS,
51
+ Feedzirra::Parser::AtomFeedBurner,
52
+ Feedzirra::Parser::Atom
53
+ ]
54
+ end
55
+
56
+ # Makes all entry types look for the passed in element to parse. This is actually just a call to
57
+ # element (a SAXMachine call) in the class
58
+ #
59
+ # === Parameters
60
+ # [element_tag<String>]
61
+ # [options<Hash>] Valid keys are same as with SAXMachine
62
+ def self.add_common_feed_entry_element(element_tag, options = {})
63
+ # need to think of a better way to do this. will break for people who want this behavior
64
+ # across their added classes
65
+ feed_classes.map{|k| eval("#{k}Entry") }.each do |klass|
66
+ klass.send(:element, element_tag, options)
67
+ end
68
+ end
69
+
70
+ # Fetches and returns the raw XML for each URL provided.
71
+ #
72
+ # === Parameters
73
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
74
+ # [options<Hash>] Valid keys for this argument as as followed:
75
+ # :user_agent - String that overrides the default user agent.
76
+ # :if_modified_since - Time object representing when the feed was last updated.
77
+ # :if_none_match - String that's normally an etag for the request that was stored previously.
78
+ # :on_success - Block that gets executed after a successful request.
79
+ # :on_failure - Block that gets executed after a failed request.
80
+ # === Returns
81
+ # A String of XML if a single URL is passed.
82
+ #
83
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
84
+ def self.fetch_raw(urls, options = {})
85
+ url_queue = [*urls]
86
+ multi = Curl::Multi.new
87
+ responses = {}
88
+ url_queue.each do |url|
89
+ easy = Curl::Easy.new(url) do |curl|
90
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
91
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
92
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
93
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
94
+ curl.follow_location = true
95
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
96
+
97
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
98
+ curl.timeout = options[:timeout] if options[:timeout]
99
+
100
+ curl.on_success do |c|
101
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
102
+ responses[url] = decode_content(c)
103
+ end
104
+ curl.on_failure do |c|
105
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
106
+ responses[url] = c.response_code
107
+ end
108
+ end
109
+ multi.add(easy)
110
+ end
111
+
112
+ multi.perform
113
+ urls.is_a?(String) ? responses.values.first : responses
114
+ end
115
+
116
+ # Fetches and returns the parsed XML for each URL provided.
117
+ #
118
+ # === Parameters
119
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
120
+ # [options<Hash>] Valid keys for this argument as as followed:
121
+ # * :user_agent - String that overrides the default user agent.
122
+ # * :if_modified_since - Time object representing when the feed was last updated.
123
+ # * :if_none_match - String, an etag for the request that was stored previously.
124
+ # * :on_success - Block that gets executed after a successful request.
125
+ # * :on_failure - Block that gets executed after a failed request.
126
+ # === Returns
127
+ # A Feed object if a single URL is passed.
128
+ #
129
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
130
+ def self.fetch_and_parse(urls, options = {})
131
+ url_queue = [*urls]
132
+ multi = Curl::Multi.new
133
+ responses = {}
134
+
135
+ # I broke these down so I would only try to do 30 simultaneously because
136
+ # I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
137
+ url_queue.slice!(0, 30).each do |url|
138
+ add_url_to_multi(multi, url, url_queue, responses, options)
139
+ end
140
+
141
+ multi.perform
142
+ return urls.is_a?(String) ? responses.values.first : responses
143
+ end
144
+
145
+ # Decodes the XML document if it was compressed.
146
+ #
147
+ # === Parameters
148
+ # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
149
+ # === Returns
150
+ # A decoded string of XML.
151
+ def self.decode_content(c)
152
+ if c.header_str.match(/Content-Encoding: gzip/)
153
+ begin
154
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
155
+ xml = gz.read
156
+ gz.close
157
+ rescue Zlib::GzipFile::Error
158
+ # Maybe this is not gzipped?
159
+ xml = c.body_str
160
+ end
161
+ elsif c.header_str.match(/Content-Encoding: deflate/)
162
+ xml = Zlib::Inflate.inflate(c.body_str)
163
+ else
164
+ xml = c.body_str
165
+ end
166
+
167
+ xml
168
+ end
169
+
170
+ # Updates each feed for each Feed object provided.
171
+ #
172
+ # === Parameters
173
+ # [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
174
+ # [options<Hash>] Valid keys for this argument as as followed:
175
+ # * :user_agent - String that overrides the default user agent.
176
+ # * :on_success - Block that gets executed after a successful request.
177
+ # * :on_failure - Block that gets executed after a failed request.
178
+ # === Returns
179
+ # A updated Feed object if a single URL is passed.
180
+ #
181
+ # A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
182
+ def self.update(feeds, options = {})
183
+ feed_queue = [*feeds]
184
+ multi = Curl::Multi.new
185
+ responses = {}
186
+
187
+ feed_queue.slice!(0, 30).each do |feed|
188
+ add_feed_to_multi(multi, feed, feed_queue, responses, options)
189
+ end
190
+
191
+ multi.perform
192
+ responses.size == 1 ? responses.values.first : responses.values
193
+ end
194
+
195
+ # An abstraction for adding a feed by URL to the passed Curb::multi stack.
196
+ #
197
+ # === Parameters
198
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
199
+ # [url<String>] The URL of the feed that you would like to be fetched.
200
+ # [url_queue<Array>] An array of URLs that are queued for request.
201
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
202
+ # [feeds<String> or <Array>] A single feed object, or an array of feed objects.
203
+ # [options<Hash>] Valid keys for this argument as as followed:
204
+ # * :user_agent - String that overrides the default user agent.
205
+ # * :on_success - Block that gets executed after a successful request.
206
+ # * :on_failure - Block that gets executed after a failed request.
207
+ # === Returns
208
+ # The updated Curl::Multi object with the request details added to it's stack.
209
+ def self.add_url_to_multi(multi, url, url_queue, responses, options)
210
+ easy = Curl::Easy.new(url) do |curl|
211
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
212
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
213
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
214
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
215
+ curl.follow_location = true
216
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
217
+
218
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
219
+ curl.timeout = options[:timeout] if options[:timeout]
220
+
221
+ curl.on_success do |c|
222
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
223
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
224
+ xml = decode_content(c)
225
+ klass = determine_feed_parser_for_xml(xml)
226
+
227
+ if klass
228
+ begin
229
+ feed = klass.parse(xml)
230
+ feed.feed_url = c.last_effective_url
231
+ feed.etag = etag_from_header(c.header_str)
232
+ feed.last_modified = last_modified_from_header(c.header_str)
233
+ responses[url] = feed
234
+ options[:on_success].call(url, feed) if options.has_key?(:on_success)
235
+ rescue Exception => e
236
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
237
+ end
238
+ else
239
+ # puts "Error determining parser for #{url} - #{c.last_effective_url}"
240
+ # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
241
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
242
+ end
243
+ end
244
+
245
+ curl.on_failure do |c|
246
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
247
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
248
+ responses[url] = c.response_code
249
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
250
+ end
251
+ end
252
+ multi.add(easy)
253
+ end
254
+
255
+ # An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
256
+ #
257
+ # === Parameters
258
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
259
+ # [feed<Feed>] A feed object that you would like to be fetched.
260
+ # [url_queue<Array>] An array of feed objects that are queued for request.
261
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
262
+ # [feeds<String>] or <Array> A single feed object, or an array of feed objects.
263
+ # [options<Hash>] Valid keys for this argument as as followed:
264
+ # * :user_agent - String that overrides the default user agent.
265
+ # * :on_success - Block that gets executed after a successful request.
266
+ # * :on_failure - Block that gets executed after a failed request.
267
+ # === Returns
268
+ # The updated Curl::Multi object with the request details added to it's stack.
269
+ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
270
+ easy = Curl::Easy.new(feed.feed_url) do |curl|
271
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
272
+ curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
273
+ curl.headers["If-None-Match"] = feed.etag if feed.etag
274
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
275
+ curl.follow_location = true
276
+
277
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
278
+ curl.timeout = options[:timeout] if options[:timeout]
279
+
280
+ curl.on_success do |c|
281
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
282
+ begin
283
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
284
+ updated_feed = Feed.parse(c.body_str)
285
+ updated_feed.feed_url = c.last_effective_url
286
+ updated_feed.etag = etag_from_header(c.header_str)
287
+ updated_feed.last_modified = last_modified_from_header(c.header_str)
288
+ feed.update_from_feed(updated_feed)
289
+ responses[feed.feed_url] = feed
290
+ options[:on_success].call(feed) if options.has_key?(:on_success)
291
+ rescue Exception => e
292
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
293
+ end
294
+ end
295
+
296
+ curl.on_failure do |c|
297
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
298
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
299
+ response_code = c.response_code
300
+ if response_code == 304 # it's not modified. this isn't an error condition
301
+ responses[feed.feed_url] = feed
302
+ options[:on_success].call(feed) if options.has_key?(:on_success)
303
+ else
304
+ responses[feed.url] = c.response_code
305
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
306
+ end
307
+ end
308
+ end
309
+ multi.add(easy)
310
+ end
311
+
312
+ # Determines the etag from the request headers.
313
+ #
314
+ # === Parameters
315
+ # [header<String>] Raw request header returned from the request
316
+ # === Returns
317
+ # A string of the etag or nil if it cannot be found in the headers.
318
+ def self.etag_from_header(header)
319
+ header =~ /.*ETag:\s(.*)\r/
320
+ $1
321
+ end
322
+
323
+ # Determines the last modified date from the request headers.
324
+ #
325
+ # === Parameters
326
+ # [header<String>] Raw request header returned from the request
327
+ # === Returns
328
+ # A Time object of the last modified date or nil if it cannot be found in the headers.
329
+ def self.last_modified_from_header(header)
330
+ header =~ /.*Last-Modified:\s(.*)\r/
331
+ Time.parse($1) if $1
332
+ end
333
+ end
334
+ end
@@ -0,0 +1,45 @@
1
+ module Feedzirra
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
6
+
7
+ def parse_datetime(string)
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
20
+ end
21
+
22
+ ##
23
+ # Writter for published. By default, we keep the "oldest" publish time found.
24
+ def published=(val)
25
+ parsed = parse_datetime(val)
26
+ @published = parsed if !@published || parsed < @published
27
+ end
28
+
29
+ ##
30
+ # Writter for udapted. By default, we keep the most recenet update time found.
31
+ def updated=(val)
32
+ parsed = parse_datetime(val)
33
+ @updated = parsed if !@updated || parsed > @updated
34
+ end
35
+
36
+ def sanitize!
37
+ self.title.sanitize! if self.title
38
+ self.author.sanitize! if self.author
39
+ self.summary.sanitize! if self.summary
40
+ self.content.sanitize! if self.content
41
+ end
42
+
43
+ alias_method :last_modified, :published
44
+ end
45
+ end
@@ -0,0 +1,71 @@
1
+ module Feedzirra
2
+ module FeedUtilities
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
4
+
5
+ attr_writer :new_entries, :updated, :last_modified
6
+ attr_accessor :etag
7
+
8
+ def last_modified
9
+ @last_modified ||= begin
10
+ entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
11
+ entry ? entry.published : nil
12
+ end
13
+ end
14
+
15
+ def updated?
16
+ @updated
17
+ end
18
+
19
+ def new_entries
20
+ @new_entries ||= []
21
+ end
22
+
23
+ def has_new_entries?
24
+ new_entries.size > 0
25
+ end
26
+
27
+ def update_from_feed(feed)
28
+ self.new_entries += find_new_entries_for(feed)
29
+ self.entries.unshift(*self.new_entries)
30
+
31
+ updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
32
+ end
33
+
34
+ def update_attribute(feed, name)
35
+ old_value, new_value = send(name), feed.send(name)
36
+
37
+ if old_value != new_value
38
+ send("#{name}=", new_value)
39
+ end
40
+ end
41
+
42
+ def sanitize_entries!
43
+ entries.each {|entry| entry.sanitize!}
44
+ end
45
+
46
+ private
47
+
48
+ def updated!
49
+ @updated = true
50
+ end
51
+
52
+ def find_new_entries_for(feed)
53
+ # this implementation is a hack, which is why it's so ugly.
54
+ # it's to get around the fact that not all feeds have a published date.
55
+ # however, they're always ordered with the newest one first.
56
+ # So we go through the entries just parsed and insert each one as a new entry
57
+ # until we get to one that has the same url as the the newest for the feed
58
+ latest_entry = self.entries.first
59
+ found_new_entries = []
60
+ feed.entries.each do |entry|
61
+ break if entry.url == latest_entry.url
62
+ found_new_entries << entry
63
+ end
64
+ found_new_entries
65
+ end
66
+
67
+ def existing_entry?(test_entry)
68
+ entries.any? { |entry| entry.url == test_entry.url }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class Atom
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :link, :as => :links, :value => :href
19
+ elements :entry, :as => :entries, :class => AtomEntry
20
+
21
+ def self.able_to_parse?(xml) #:nodoc:
22
+ xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
23
+ end
24
+
25
+ def url
26
+ @url || links.last
27
+ end
28
+
29
+ def feed_url
30
+ @feed_url || links.first
31
+ end
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,41 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
20
+ element :"feedburner:origLink", :as => :url
21
+ element :name, :as => :author
22
+ element :content
23
+ element :summary
24
+ element :published
25
+ element :id
26
+ element :created, :as => :published
27
+ element :issued, :as => :published
28
+ element :updated
29
+ element :modified, :as => :updated
30
+ elements :category, :as => :categories, :value => :term
31
+ elements :link, :as => :links, :value => :href
32
+ elements :link, :as => :enclosure_links, :value => :href, :with => {:rel => "enclosure"}
33
+
34
+ def url
35
+ @url || links.first
36
+ end
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,12 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class RSS
4
+ class ITunesCategory
5
+ include SAXMachine
6
+
7
+ element :'itunes:category', :as => :name, :value => :text
8
+ elements :'itunes:category', :as => :sub_categories, :value => :text
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCategory
4
+ include SAXMachine
5
+
6
+ element :'media:category', :as => :category
7
+ element :'media:category', :value => :scheme, :as => :scheme
8
+ element :'media:category', :value => :label, :as => :label
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + '/mrss_credit'
2
+ require File.dirname(__FILE__) + '/mrss_restriction'
3
+ require File.dirname(__FILE__) + '/mrss_category'
4
+ require File.dirname(__FILE__) + '/mrss_copyright'
5
+ require File.dirname(__FILE__) + '/mrss_hash'
6
+ require File.dirname(__FILE__) + '/mrss_player'
7
+ require File.dirname(__FILE__) + '/mrss_rating'
8
+ require File.dirname(__FILE__) + '/mrss_restriction'
9
+ require File.dirname(__FILE__) + '/mrss_text'
10
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
11
+
12
+ module Feedzirra
13
+ module Parser
14
+ class MRSSContent
15
+ include SAXMachine
16
+
17
+ element :'media:content', :as => :url, :value => :url
18
+ element :'media:content', :as => :content_type, :value => :type
19
+ element :'media:content', :as => :medium, :value => :medium
20
+ element :'media:content', :as => :duration, :value => :duration
21
+ element :'media:content', :as => :isDefault, :value => :isDefault
22
+ element :'media:content', :as => :expression, :value => :expression
23
+ element :'media:content', :as => :bitrate, :value => :bitrate
24
+ element :'media:content', :as => :framerate, :value => :framerate
25
+ element :'media:content', :as => :samplingrate, :value => :sampling
26
+ element :'media:content', :as => :channels, :value => :duration
27
+ element :'media:content', :as => :height, :value => :height
28
+ element :'media:content', :as => :width, :value => :width
29
+ element :'media:content', :as => :lang, :value => :lang
30
+ element :'media:content', :as => :fileSize, :value => :fileSize
31
+
32
+ # optional elements
33
+ element :'media:title', :as => :media_title
34
+ element :'media:keywords', :as => :media_keywords
35
+ element :'media:description', :as => :media_description
36
+
37
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
38
+ element :'media:rating', :as => :rating, :class => MRSSRating
39
+ element :'media:category', :as => :media_category, :class => MRSSCategory
40
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
41
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
42
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
43
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
44
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
45
+ element :'media:text', :as => :text, :class => MRSSText
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,10 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCopyright
4
+ include SAXMachine
5
+
6
+ element :'media:copyright', :as => :copyright
7
+ element :'media:copyright', :as => :url, :value => :url
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCredit
4
+ include SAXMachine
5
+
6
+ element :'media:credit', :as => :role, :value => :role
7
+ element :'media:credit', :as => :scheme, :value => :scheme
8
+ element :'media:credit', :as => :name
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,37 @@
1
+ require File.dirname(__FILE__) + '/mrss_content'
2
+ require File.dirname(__FILE__) + '/mrss_credit'
3
+ require File.dirname(__FILE__) + '/mrss_restriction'
4
+ require File.dirname(__FILE__) + '/mrss_group'
5
+ require File.dirname(__FILE__) + '/mrss_category'
6
+ require File.dirname(__FILE__) + '/mrss_copyright'
7
+ require File.dirname(__FILE__) + '/mrss_hash'
8
+ require File.dirname(__FILE__) + '/mrss_player'
9
+ require File.dirname(__FILE__) + '/mrss_rating'
10
+ require File.dirname(__FILE__) + '/mrss_restriction'
11
+ require File.dirname(__FILE__) + '/mrss_text'
12
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
13
+
14
+ module Feedzirra
15
+ module Parser
16
+ class MRSSGroup
17
+ include SAXMachine
18
+
19
+ elements :'media:content', :as => :media_content, :class => MRSSContent
20
+
21
+ # optional elements
22
+ element :'media:title', :as => :media_title
23
+ element :'media:keywords', :as => :media_keywords
24
+ element :'media:description', :as => :media_description
25
+
26
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
27
+ element :'media:rating', :as => :rating, :class => MRSSRating
28
+ element :'media:category', :as => :media_category, :class => MRSSCategory
29
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
30
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
31
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
32
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
33
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
34
+ element :'media:text', :as => :text, :class => MRSSText
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSHash
4
+ include SAXMachine
5
+
6
+ element :'media:hash', :as => :hash
7
+ element :'media:hash', :value => :algo, :as => :algo
8
+ end
9
+ end
10
+ end