Spectives-feedzirra 0.0.28

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (44) hide show
  1. data/README.rdoc +169 -0
  2. data/README.textile +205 -0
  3. data/Rakefile +56 -0
  4. data/lib/core_ext/date.rb +21 -0
  5. data/lib/core_ext/string.rb +9 -0
  6. data/lib/feedzirra/feed.rb +334 -0
  7. data/lib/feedzirra/feed_entry_utilities.rb +45 -0
  8. data/lib/feedzirra/feed_utilities.rb +71 -0
  9. data/lib/feedzirra/parser/atom.rb +35 -0
  10. data/lib/feedzirra/parser/atom_entry.rb +41 -0
  11. data/lib/feedzirra/parser/itunes_category.rb +12 -0
  12. data/lib/feedzirra/parser/mrss_category.rb +11 -0
  13. data/lib/feedzirra/parser/mrss_content.rb +48 -0
  14. data/lib/feedzirra/parser/mrss_copyright.rb +10 -0
  15. data/lib/feedzirra/parser/mrss_credit.rb +11 -0
  16. data/lib/feedzirra/parser/mrss_group.rb +37 -0
  17. data/lib/feedzirra/parser/mrss_hash.rb +10 -0
  18. data/lib/feedzirra/parser/mrss_player.rb +11 -0
  19. data/lib/feedzirra/parser/mrss_rating.rb +10 -0
  20. data/lib/feedzirra/parser/mrss_restriction.rb +11 -0
  21. data/lib/feedzirra/parser/mrss_text.rb +13 -0
  22. data/lib/feedzirra/parser/mrss_thumbnail.rb +11 -0
  23. data/lib/feedzirra/parser/rss.rb +83 -0
  24. data/lib/feedzirra/parser/rss_entry.rb +83 -0
  25. data/lib/feedzirra/parser/rss_image.rb +15 -0
  26. data/lib/feedzirra.rb +44 -0
  27. data/spec/benchmarks/feed_benchmarks.rb +98 -0
  28. data/spec/benchmarks/feedzirra_benchmarks.rb +40 -0
  29. data/spec/benchmarks/fetching_benchmarks.rb +28 -0
  30. data/spec/benchmarks/parsing_benchmark.rb +30 -0
  31. data/spec/benchmarks/updating_benchmarks.rb +33 -0
  32. data/spec/feedzirra/feed_entry_utilities_spec.rb +52 -0
  33. data/spec/feedzirra/feed_spec.rb +546 -0
  34. data/spec/feedzirra/feed_utilities_spec.rb +149 -0
  35. data/spec/feedzirra/parser/atom_entry_spec.rb +49 -0
  36. data/spec/feedzirra/parser/atom_feed_burner_entry_spec.rb +42 -0
  37. data/spec/feedzirra/parser/atom_feed_burner_spec.rb +39 -0
  38. data/spec/feedzirra/parser/atom_spec.rb +43 -0
  39. data/spec/feedzirra/parser/mrss_content_spec.rb +32 -0
  40. data/spec/feedzirra/parser/rss_entry_spec.rb +154 -0
  41. data/spec/feedzirra/parser/rss_spec.rb +93 -0
  42. data/spec/sample_feeds/run_against_sample.rb +20 -0
  43. data/spec/spec_helper.rb +62 -0
  44. metadata +154 -0
@@ -0,0 +1,334 @@
1
+ module Feedzirra
2
+ class NoParserAvailable < StandardError; end
3
+
4
+ class Feed
5
+ USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
6
+
7
+ # Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
8
+ #
9
+ # === Parameters
10
+ # [xml<String>] The XML that you would like parsed.
11
+ # === Returns
12
+ # An instance of the determined feed type. By default a Feedzirra::Atom, Feedzirra::AtomFeedBurner, Feedzirra::RDF, or Feedzirra::RSS object.
13
+ # === Raises
14
+ # Feedzirra::NoParserAvailable : If no valid parser classes could be found for the feed.
15
+ def self.parse(xml)
16
+ if parser = determine_feed_parser_for_xml(xml)
17
+ parser.parse(xml)
18
+ else
19
+ raise NoParserAvailable.new("No valid parser for XML.")
20
+ end
21
+ end
22
+
23
+ # Determines the correct parser class to use for parsing the feed.
24
+ #
25
+ # === Parameters
26
+ # [xml<String>] The XML that you would like determine the parser for.
27
+ # === Returns
28
+ # The class name of the parser that can handle the XML.
29
+ def self.determine_feed_parser_for_xml(xml)
30
+ start_of_doc = xml.slice(0, 2000)
31
+ feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
32
+ end
33
+
34
+ # Adds a new feed parsing class that will be used for parsing.
35
+ #
36
+ # === Parameters
37
+ # [klass<Constant>] The class/constant that you want to register.
38
+ # === Returns
39
+ # A updated array of feed parser class names.
40
+ def self.add_feed_class(klass)
41
+ feed_classes.unshift klass
42
+ end
43
+
44
+ # Provides a list of registered feed parsing classes.
45
+ #
46
+ # === Returns
47
+ # A array of class names.
48
+ def self.feed_classes
49
+ @feed_classes ||= [
50
+ Feedzirra::Parser::RSS,
51
+ Feedzirra::Parser::AtomFeedBurner,
52
+ Feedzirra::Parser::Atom
53
+ ]
54
+ end
55
+
56
+ # Makes all entry types look for the passed in element to parse. This is actually just a call to
57
+ # element (a SAXMachine call) in the class
58
+ #
59
+ # === Parameters
60
+ # [element_tag<String>]
61
+ # [options<Hash>] Valid keys are same as with SAXMachine
62
+ def self.add_common_feed_entry_element(element_tag, options = {})
63
+ # need to think of a better way to do this. will break for people who want this behavior
64
+ # across their added classes
65
+ feed_classes.map{|k| eval("#{k}Entry") }.each do |klass|
66
+ klass.send(:element, element_tag, options)
67
+ end
68
+ end
69
+
70
+ # Fetches and returns the raw XML for each URL provided.
71
+ #
72
+ # === Parameters
73
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
74
+ # [options<Hash>] Valid keys for this argument as as followed:
75
+ # :user_agent - String that overrides the default user agent.
76
+ # :if_modified_since - Time object representing when the feed was last updated.
77
+ # :if_none_match - String that's normally an etag for the request that was stored previously.
78
+ # :on_success - Block that gets executed after a successful request.
79
+ # :on_failure - Block that gets executed after a failed request.
80
+ # === Returns
81
+ # A String of XML if a single URL is passed.
82
+ #
83
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
84
+ def self.fetch_raw(urls, options = {})
85
+ url_queue = [*urls]
86
+ multi = Curl::Multi.new
87
+ responses = {}
88
+ url_queue.each do |url|
89
+ easy = Curl::Easy.new(url) do |curl|
90
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
91
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
92
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
93
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
94
+ curl.follow_location = true
95
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
96
+
97
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
98
+ curl.timeout = options[:timeout] if options[:timeout]
99
+
100
+ curl.on_success do |c|
101
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
102
+ responses[url] = decode_content(c)
103
+ end
104
+ curl.on_failure do |c|
105
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
106
+ responses[url] = c.response_code
107
+ end
108
+ end
109
+ multi.add(easy)
110
+ end
111
+
112
+ multi.perform
113
+ urls.is_a?(String) ? responses.values.first : responses
114
+ end
115
+
116
+ # Fetches and returns the parsed XML for each URL provided.
117
+ #
118
+ # === Parameters
119
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
120
+ # [options<Hash>] Valid keys for this argument as as followed:
121
+ # * :user_agent - String that overrides the default user agent.
122
+ # * :if_modified_since - Time object representing when the feed was last updated.
123
+ # * :if_none_match - String, an etag for the request that was stored previously.
124
+ # * :on_success - Block that gets executed after a successful request.
125
+ # * :on_failure - Block that gets executed after a failed request.
126
+ # === Returns
127
+ # A Feed object if a single URL is passed.
128
+ #
129
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
130
+ def self.fetch_and_parse(urls, options = {})
131
+ url_queue = [*urls]
132
+ multi = Curl::Multi.new
133
+ responses = {}
134
+
135
+ # I broke these down so I would only try to do 30 simultaneously because
136
+ # I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
137
+ url_queue.slice!(0, 30).each do |url|
138
+ add_url_to_multi(multi, url, url_queue, responses, options)
139
+ end
140
+
141
+ multi.perform
142
+ return urls.is_a?(String) ? responses.values.first : responses
143
+ end
144
+
145
+ # Decodes the XML document if it was compressed.
146
+ #
147
+ # === Parameters
148
+ # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
149
+ # === Returns
150
+ # A decoded string of XML.
151
+ def self.decode_content(c)
152
+ if c.header_str.match(/Content-Encoding: gzip/)
153
+ begin
154
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
155
+ xml = gz.read
156
+ gz.close
157
+ rescue Zlib::GzipFile::Error
158
+ # Maybe this is not gzipped?
159
+ xml = c.body_str
160
+ end
161
+ elsif c.header_str.match(/Content-Encoding: deflate/)
162
+ xml = Zlib::Inflate.inflate(c.body_str)
163
+ else
164
+ xml = c.body_str
165
+ end
166
+
167
+ xml
168
+ end
169
+
170
+ # Updates each feed for each Feed object provided.
171
+ #
172
+ # === Parameters
173
+ # [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
174
+ # [options<Hash>] Valid keys for this argument as as followed:
175
+ # * :user_agent - String that overrides the default user agent.
176
+ # * :on_success - Block that gets executed after a successful request.
177
+ # * :on_failure - Block that gets executed after a failed request.
178
+ # === Returns
179
+ # A updated Feed object if a single URL is passed.
180
+ #
181
+ # A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
182
+ def self.update(feeds, options = {})
183
+ feed_queue = [*feeds]
184
+ multi = Curl::Multi.new
185
+ responses = {}
186
+
187
+ feed_queue.slice!(0, 30).each do |feed|
188
+ add_feed_to_multi(multi, feed, feed_queue, responses, options)
189
+ end
190
+
191
+ multi.perform
192
+ responses.size == 1 ? responses.values.first : responses.values
193
+ end
194
+
195
+ # An abstraction for adding a feed by URL to the passed Curb::multi stack.
196
+ #
197
+ # === Parameters
198
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
199
+ # [url<String>] The URL of the feed that you would like to be fetched.
200
+ # [url_queue<Array>] An array of URLs that are queued for request.
201
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
202
+ # [feeds<String> or <Array>] A single feed object, or an array of feed objects.
203
+ # [options<Hash>] Valid keys for this argument as as followed:
204
+ # * :user_agent - String that overrides the default user agent.
205
+ # * :on_success - Block that gets executed after a successful request.
206
+ # * :on_failure - Block that gets executed after a failed request.
207
+ # === Returns
208
+ # The updated Curl::Multi object with the request details added to it's stack.
209
+ def self.add_url_to_multi(multi, url, url_queue, responses, options)
210
+ easy = Curl::Easy.new(url) do |curl|
211
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
212
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
213
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
214
+ curl.headers["Accept-encoding"] = 'gzip, deflate' if options.has_key?(:compress)
215
+ curl.follow_location = true
216
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
217
+
218
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
219
+ curl.timeout = options[:timeout] if options[:timeout]
220
+
221
+ curl.on_success do |c|
222
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
223
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
224
+ xml = decode_content(c)
225
+ klass = determine_feed_parser_for_xml(xml)
226
+
227
+ if klass
228
+ begin
229
+ feed = klass.parse(xml)
230
+ feed.feed_url = c.last_effective_url
231
+ feed.etag = etag_from_header(c.header_str)
232
+ feed.last_modified = last_modified_from_header(c.header_str)
233
+ responses[url] = feed
234
+ options[:on_success].call(url, feed) if options.has_key?(:on_success)
235
+ rescue Exception => e
236
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
237
+ end
238
+ else
239
+ # puts "Error determining parser for #{url} - #{c.last_effective_url}"
240
+ # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
241
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
242
+ end
243
+ end
244
+
245
+ curl.on_failure do |c|
246
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
247
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
248
+ responses[url] = c.response_code
249
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
250
+ end
251
+ end
252
+ multi.add(easy)
253
+ end
254
+
255
+ # An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
256
+ #
257
+ # === Parameters
258
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
259
+ # [feed<Feed>] A feed object that you would like to be fetched.
260
+ # [url_queue<Array>] An array of feed objects that are queued for request.
261
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
262
+ # [feeds<String>] or <Array> A single feed object, or an array of feed objects.
263
+ # [options<Hash>] Valid keys for this argument as as followed:
264
+ # * :user_agent - String that overrides the default user agent.
265
+ # * :on_success - Block that gets executed after a successful request.
266
+ # * :on_failure - Block that gets executed after a failed request.
267
+ # === Returns
268
+ # The updated Curl::Multi object with the request details added to it's stack.
269
+ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
270
+ easy = Curl::Easy.new(feed.feed_url) do |curl|
271
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
272
+ curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
273
+ curl.headers["If-None-Match"] = feed.etag if feed.etag
274
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
275
+ curl.follow_location = true
276
+
277
+ curl.max_redirects = options[:max_redirects] if options[:max_redirects]
278
+ curl.timeout = options[:timeout] if options[:timeout]
279
+
280
+ curl.on_success do |c|
281
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
282
+ begin
283
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
284
+ updated_feed = Feed.parse(c.body_str)
285
+ updated_feed.feed_url = c.last_effective_url
286
+ updated_feed.etag = etag_from_header(c.header_str)
287
+ updated_feed.last_modified = last_modified_from_header(c.header_str)
288
+ feed.update_from_feed(updated_feed)
289
+ responses[feed.feed_url] = feed
290
+ options[:on_success].call(feed) if options.has_key?(:on_success)
291
+ rescue Exception => e
292
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
293
+ end
294
+ end
295
+
296
+ curl.on_failure do |c|
297
+ c = c.select { |e| e.kind_of? Curl::Easy }.first if(c.kind_of? Array)
298
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
299
+ response_code = c.response_code
300
+ if response_code == 304 # it's not modified. this isn't an error condition
301
+ responses[feed.feed_url] = feed
302
+ options[:on_success].call(feed) if options.has_key?(:on_success)
303
+ else
304
+ responses[feed.url] = c.response_code
305
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
306
+ end
307
+ end
308
+ end
309
+ multi.add(easy)
310
+ end
311
+
312
+ # Determines the etag from the request headers.
313
+ #
314
+ # === Parameters
315
+ # [header<String>] Raw request header returned from the request
316
+ # === Returns
317
+ # A string of the etag or nil if it cannot be found in the headers.
318
+ def self.etag_from_header(header)
319
+ header =~ /.*ETag:\s(.*)\r/
320
+ $1
321
+ end
322
+
323
+ # Determines the last modified date from the request headers.
324
+ #
325
+ # === Parameters
326
+ # [header<String>] Raw request header returned from the request
327
+ # === Returns
328
+ # A Time object of the last modified date or nil if it cannot be found in the headers.
329
+ def self.last_modified_from_header(header)
330
+ header =~ /.*Last-Modified:\s(.*)\r/
331
+ Time.parse($1) if $1
332
+ end
333
+ end
334
+ end
@@ -0,0 +1,45 @@
1
+ module Feedzirra
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
6
+
7
+ def parse_datetime(string)
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
20
+ end
21
+
22
+ ##
23
+ # Writter for published. By default, we keep the "oldest" publish time found.
24
+ def published=(val)
25
+ parsed = parse_datetime(val)
26
+ @published = parsed if !@published || parsed < @published
27
+ end
28
+
29
+ ##
30
+ # Writter for udapted. By default, we keep the most recenet update time found.
31
+ def updated=(val)
32
+ parsed = parse_datetime(val)
33
+ @updated = parsed if !@updated || parsed > @updated
34
+ end
35
+
36
+ def sanitize!
37
+ self.title.sanitize! if self.title
38
+ self.author.sanitize! if self.author
39
+ self.summary.sanitize! if self.summary
40
+ self.content.sanitize! if self.content
41
+ end
42
+
43
+ alias_method :last_modified, :published
44
+ end
45
+ end
@@ -0,0 +1,71 @@
1
+ module Feedzirra
2
+ module FeedUtilities
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
4
+
5
+ attr_writer :new_entries, :updated, :last_modified
6
+ attr_accessor :etag
7
+
8
+ def last_modified
9
+ @last_modified ||= begin
10
+ entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
11
+ entry ? entry.published : nil
12
+ end
13
+ end
14
+
15
+ def updated?
16
+ @updated
17
+ end
18
+
19
+ def new_entries
20
+ @new_entries ||= []
21
+ end
22
+
23
+ def has_new_entries?
24
+ new_entries.size > 0
25
+ end
26
+
27
+ def update_from_feed(feed)
28
+ self.new_entries += find_new_entries_for(feed)
29
+ self.entries.unshift(*self.new_entries)
30
+
31
+ updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
32
+ end
33
+
34
+ def update_attribute(feed, name)
35
+ old_value, new_value = send(name), feed.send(name)
36
+
37
+ if old_value != new_value
38
+ send("#{name}=", new_value)
39
+ end
40
+ end
41
+
42
+ def sanitize_entries!
43
+ entries.each {|entry| entry.sanitize!}
44
+ end
45
+
46
+ private
47
+
48
+ def updated!
49
+ @updated = true
50
+ end
51
+
52
+ def find_new_entries_for(feed)
53
+ # this implementation is a hack, which is why it's so ugly.
54
+ # it's to get around the fact that not all feeds have a published date.
55
+ # however, they're always ordered with the newest one first.
56
+ # So we go through the entries just parsed and insert each one as a new entry
57
+ # until we get to one that has the same url as the the newest for the feed
58
+ latest_entry = self.entries.first
59
+ found_new_entries = []
60
+ feed.entries.each do |entry|
61
+ break if entry.url == latest_entry.url
62
+ found_new_entries << entry
63
+ end
64
+ found_new_entries
65
+ end
66
+
67
+ def existing_entry?(test_entry)
68
+ entries.any? { |entry| entry.url == test_entry.url }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feeds.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * feed_url
10
+ # * url
11
+ # * entries
12
+ class Atom
13
+ include SAXMachine
14
+ include FeedUtilities
15
+ element :title
16
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html"}
17
+ element :link, :as => :feed_url, :value => :href, :with => {:type => "application/atom+xml"}
18
+ elements :link, :as => :links, :value => :href
19
+ elements :entry, :as => :entries, :class => AtomEntry
20
+
21
+ def self.able_to_parse?(xml) #:nodoc:
22
+ xml =~ /(Atom)|(#{Regexp.escape("http://purl.org/atom")})/
23
+ end
24
+
25
+ def url
26
+ @url || links.last
27
+ end
28
+
29
+ def feed_url
30
+ @feed_url || links.first
31
+ end
32
+ end
33
+ end
34
+
35
+ end
@@ -0,0 +1,41 @@
1
+ module Feedzirra
2
+
3
+ module Parser
4
+ # == Summary
5
+ # Parser for dealing with Atom feed entries.
6
+ #
7
+ # == Attributes
8
+ # * title
9
+ # * url
10
+ # * author
11
+ # * content
12
+ # * summary
13
+ # * published
14
+ # * categories
15
+ class AtomEntry
16
+ include SAXMachine
17
+ include FeedEntryUtilities
18
+ element :title
19
+ element :link, :as => :url, :value => :href, :with => {:type => "text/html", :rel => "alternate"}
20
+ element :"feedburner:origLink", :as => :url
21
+ element :name, :as => :author
22
+ element :content
23
+ element :summary
24
+ element :published
25
+ element :id
26
+ element :created, :as => :published
27
+ element :issued, :as => :published
28
+ element :updated
29
+ element :modified, :as => :updated
30
+ elements :category, :as => :categories, :value => :term
31
+ elements :link, :as => :links, :value => :href
32
+ elements :link, :as => :enclosure_links, :value => :href, :with => {:rel => "enclosure"}
33
+
34
+ def url
35
+ @url || links.first
36
+ end
37
+ end
38
+
39
+ end
40
+
41
+ end
@@ -0,0 +1,12 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class RSS
4
+ class ITunesCategory
5
+ include SAXMachine
6
+
7
+ element :'itunes:category', :as => :name, :value => :text
8
+ elements :'itunes:category', :as => :sub_categories, :value => :text
9
+ end
10
+ end
11
+ end
12
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCategory
4
+ include SAXMachine
5
+
6
+ element :'media:category', :as => :category
7
+ element :'media:category', :value => :scheme, :as => :scheme
8
+ element :'media:category', :value => :label, :as => :label
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,48 @@
1
+ require File.dirname(__FILE__) + '/mrss_credit'
2
+ require File.dirname(__FILE__) + '/mrss_restriction'
3
+ require File.dirname(__FILE__) + '/mrss_category'
4
+ require File.dirname(__FILE__) + '/mrss_copyright'
5
+ require File.dirname(__FILE__) + '/mrss_hash'
6
+ require File.dirname(__FILE__) + '/mrss_player'
7
+ require File.dirname(__FILE__) + '/mrss_rating'
8
+ require File.dirname(__FILE__) + '/mrss_restriction'
9
+ require File.dirname(__FILE__) + '/mrss_text'
10
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
11
+
12
+ module Feedzirra
13
+ module Parser
14
+ class MRSSContent
15
+ include SAXMachine
16
+
17
+ element :'media:content', :as => :url, :value => :url
18
+ element :'media:content', :as => :content_type, :value => :type
19
+ element :'media:content', :as => :medium, :value => :medium
20
+ element :'media:content', :as => :duration, :value => :duration
21
+ element :'media:content', :as => :isDefault, :value => :isDefault
22
+ element :'media:content', :as => :expression, :value => :expression
23
+ element :'media:content', :as => :bitrate, :value => :bitrate
24
+ element :'media:content', :as => :framerate, :value => :framerate
25
+ element :'media:content', :as => :samplingrate, :value => :sampling
26
+ element :'media:content', :as => :channels, :value => :duration
27
+ element :'media:content', :as => :height, :value => :height
28
+ element :'media:content', :as => :width, :value => :width
29
+ element :'media:content', :as => :lang, :value => :lang
30
+ element :'media:content', :as => :fileSize, :value => :fileSize
31
+
32
+ # optional elements
33
+ element :'media:title', :as => :media_title
34
+ element :'media:keywords', :as => :media_keywords
35
+ element :'media:description', :as => :media_description
36
+
37
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
38
+ element :'media:rating', :as => :rating, :class => MRSSRating
39
+ element :'media:category', :as => :media_category, :class => MRSSCategory
40
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
41
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
42
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
43
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
44
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
45
+ element :'media:text', :as => :text, :class => MRSSText
46
+ end
47
+ end
48
+ end
@@ -0,0 +1,10 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCopyright
4
+ include SAXMachine
5
+
6
+ element :'media:copyright', :as => :copyright
7
+ element :'media:copyright', :as => :url, :value => :url
8
+ end
9
+ end
10
+ end
@@ -0,0 +1,11 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSCredit
4
+ include SAXMachine
5
+
6
+ element :'media:credit', :as => :role, :value => :role
7
+ element :'media:credit', :as => :scheme, :value => :scheme
8
+ element :'media:credit', :as => :name
9
+ end
10
+ end
11
+ end
@@ -0,0 +1,37 @@
1
+ require File.dirname(__FILE__) + '/mrss_content'
2
+ require File.dirname(__FILE__) + '/mrss_credit'
3
+ require File.dirname(__FILE__) + '/mrss_restriction'
4
+ require File.dirname(__FILE__) + '/mrss_group'
5
+ require File.dirname(__FILE__) + '/mrss_category'
6
+ require File.dirname(__FILE__) + '/mrss_copyright'
7
+ require File.dirname(__FILE__) + '/mrss_hash'
8
+ require File.dirname(__FILE__) + '/mrss_player'
9
+ require File.dirname(__FILE__) + '/mrss_rating'
10
+ require File.dirname(__FILE__) + '/mrss_restriction'
11
+ require File.dirname(__FILE__) + '/mrss_text'
12
+ require File.dirname(__FILE__) + '/mrss_thumbnail'
13
+
14
+ module Feedzirra
15
+ module Parser
16
+ class MRSSGroup
17
+ include SAXMachine
18
+
19
+ elements :'media:content', :as => :media_content, :class => MRSSContent
20
+
21
+ # optional elements
22
+ element :'media:title', :as => :media_title
23
+ element :'media:keywords', :as => :media_keywords
24
+ element :'media:description', :as => :media_description
25
+
26
+ element :'media:thumbnail', :as => :media_thumbnail, :class => MRSSThumbnail
27
+ element :'media:rating', :as => :rating, :class => MRSSRating
28
+ element :'media:category', :as => :media_category, :class => MRSSCategory
29
+ element :'media:hash', :as => :media_hash, :class => MRSSHash
30
+ element :'media:player', :as => :media_player, :class => MRSSPlayer
31
+ elements :'media:credit', :as => :credits, :class => MRSSCredit
32
+ element :'media:copyright', :as => :copyright, :class => MRSSCopyright
33
+ element :'media:restriction', :as => :media_restriction, :class => MRSSRestriction
34
+ element :'media:text', :as => :text, :class => MRSSText
35
+ end
36
+ end
37
+ end
@@ -0,0 +1,10 @@
1
+ module Feedzirra
2
+ module Parser
3
+ class MRSSHash
4
+ include SAXMachine
5
+
6
+ element :'media:hash', :as => :hash
7
+ element :'media:hash', :value => :algo, :as => :algo
8
+ end
9
+ end
10
+ end