dinsley-feedzirra 0.0.8

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,378 @@
1
+ require 'ruby-debug'
2
+
3
+ module Feedzirra
4
+ class NoParserAvailable < StandardError; end
5
+
6
+ class Feed
7
+ USER_AGENT = "feedzirra http://github.com/pauldix/feedzirra/tree/master"
8
+
9
+ # Takes a raw XML feed and attempts to parse it. If no parser is available a Feedzirra::NoParserAvailable exception is raised.
10
+ #
11
+ # === Parameters
12
+ # [xml<String>] The XML that you would like parsed.
13
+ # === Returns
14
+ # An instance of the determined feed type. By default a Feedzirra::Atom, Feedzirra::AtomFeedBurner, Feedzirra::RDF, or Feedzirra::RSS object.
15
+ # === Raises
16
+ # Feedzirra::NoParserAvailable : If no valid parser classes could be found for the feed.
17
+ def self.parse(xml)
18
+ if parser = determine_feed_parser_for_xml(xml)
19
+ parser.parse(xml)
20
+ else
21
+ raise NoParserAvailable.new("No valid parser for XML.")
22
+ end
23
+ end
24
+
25
+ # Discovers feeds that are specified for auto-discovery if a webpage is passed, or returns
26
+ # the feed if it's passed and parsable.
27
+ #
28
+ # === Parameters
29
+ # [url<String>] A website or feed URL.
30
+ # [options<Hash>] Valid keys for this argument as as followed:
31
+ # :user_agent - String that overrides the default user agent.
32
+ # === Returns
33
+ # Array of URL strings.
34
+ def self.discover(url, options = {})
35
+ feeds = []
36
+ content_types = ['application/x.atom+xml', 'application/atom+xml', 'application/rss+xml', 'application/rdf+xml' 'application/xml', 'text/xml']
37
+
38
+ request = Curl::Easy.new(url) do |curl|
39
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
40
+ curl.headers["Accept-encoding"] = 'gzip, deflate'
41
+
42
+ curl.timeout = options[:timeout] if options[:timeout]
43
+ curl.follow_location = true
44
+
45
+ curl.on_success do |response|
46
+ if response.content_type =~ /text\/html/
47
+ html_document = Nokogiri.parse(decode_content(response))
48
+ selectors = content_types.collect { |content_type| "link[@type='#{content_type}'][@rel='alternate']" }
49
+
50
+ html_document.search(*selectors).each do |feed|
51
+ if feed['href'] =~ /\Ahttp|https:\/\//
52
+ feeds << feed['href']
53
+ else
54
+ feeds << "#{response.url.gsub(/\/$/, '')}/#{feed['href'].gsub(/^(\/)/, '')}"
55
+ end
56
+ end
57
+ else
58
+ feeds << response.url
59
+ end
60
+ end
61
+ end
62
+
63
+ begin
64
+ request.perform
65
+ rescue Curl::Err::HostResolutionError, Curl::Err::TimeoutError
66
+ end
67
+
68
+ return feeds
69
+ end
70
+
71
+ # Determines the correct parser class to use for parsing the feed.
72
+ #
73
+ # === Parameters
74
+ # [xml<String>] The XML that you would like determine the parser for.
75
+ # === Returns
76
+ # The class name of the parser that can handle the XML.
77
+ def self.determine_feed_parser_for_xml(xml)
78
+ start_of_doc = xml.slice(0, 1000)
79
+ feed_classes.detect {|klass| klass.able_to_parse?(start_of_doc)}
80
+ end
81
+
82
+ # Adds a new feed parsing class that will be used for parsing.
83
+ #
84
+ # === Parameters
85
+ # [klass<Constant>] The class/constant that you want to register.
86
+ # === Returns
87
+ # A updated array of feed parser class names.
88
+ def self.add_feed_class(klass)
89
+ feed_classes.unshift klass
90
+ end
91
+
92
+ # Provides a list of registered feed parsing classes.
93
+ #
94
+ # === Returns
95
+ # A array of class names.
96
+ def self.feed_classes
97
+ @feed_classes ||= [ITunesRSS, RSS, AtomFeedBurner, Atom]
98
+ end
99
+
100
+ # Makes all entry types look for the passed in element to parse. This is actually just a call to
101
+ # element (a SAXMachine call) in the class
102
+ #
103
+ # === Parameters
104
+ # [element_tag<String>]
105
+ # [options<Hash>] Valid keys are same as with SAXMachine
106
+ def self.add_common_feed_entry_element(element_tag, options = {})
107
+ # need to think of a better way to do this. will break for people who want this behavior
108
+ # across their added classes
109
+ [RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
110
+ klass.send(:element, element_tag, options)
111
+ end
112
+ end
113
+
114
+ # Makes all entry types look for the passed in elements to parse. This is actually just a call to
115
+ # elements (a SAXMachine call) in the class
116
+ #
117
+ # === Parameters
118
+ # [element_tag<String>]
119
+ # [options<Hash>] Valid keys are same as with SAXMachine
120
+ def self.add_common_feed_entry_elements(element_tag, options = {})
121
+ # need to think of a better way to do this. will break for people who want this behavior
122
+ # across their added classes
123
+ [RSSEntry, AtomFeedBurnerEntry, AtomEntry].each do |klass|
124
+ klass.send(:elements, element_tag, options)
125
+ end
126
+ end
127
+
128
+ # Fetches and returns the raw XML for each URL provided.
129
+ #
130
+ # === Parameters
131
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
132
+ # [options<Hash>] Valid keys for this argument as as followed:
133
+ # :user_agent - String that overrides the default user agent.
134
+ # :if_modified_since - Time object representing when the feed was last updated.
135
+ # :if_none_match - String that's normally an etag for the request that was stored previously.
136
+ # :on_success - Block that gets executed after a successful request.
137
+ # :on_failure - Block that gets executed after a failed request.
138
+ # === Returns
139
+ # A String of XML if a single URL is passed.
140
+ #
141
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the XML.
142
+ def self.fetch_raw(urls, options = {})
143
+ url_queue = [*urls]
144
+ multi = Curl::Multi.new
145
+ responses = {}
146
+ url_queue.each do |url|
147
+ easy = Curl::Easy.new(url) do |curl|
148
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
149
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
150
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
151
+ curl.headers["Accept-encoding"] = 'gzip, deflate'
152
+ curl.follow_location = true
153
+ curl.timeout = options[:timeout] if options[:timeout]
154
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
155
+
156
+ curl.on_success do |c|
157
+ responses[url] = decode_content(c)
158
+ end
159
+ curl.on_failure do |c|
160
+ responses[url] = c.response_code
161
+ end
162
+ end
163
+ multi.add(easy)
164
+ end
165
+
166
+ multi.perform
167
+ return urls.is_a?(String) ? responses.values.first : responses
168
+ end
169
+
170
+ # Fetches and returns the parsed XML for each URL provided.
171
+ #
172
+ # === Parameters
173
+ # [urls<String> or <Array>] A single feed URL, or an array of feed URLs.
174
+ # [options<Hash>] Valid keys for this argument as as followed:
175
+ # * :user_agent - String that overrides the default user agent.
176
+ # * :if_modified_since - Time object representing when the feed was last updated.
177
+ # * :if_none_match - String, an etag for the request that was stored previously.
178
+ # * :on_success - Block that gets executed after a successful request.
179
+ # * :on_failure - Block that gets executed after a failed request.
180
+ # === Returns
181
+ # A Feed object if a single URL is passed.
182
+ #
183
+ # A Hash if multiple URL's are passed. The key will be the URL, and the value the Feed object.
184
+ def self.fetch_and_parse(urls, options = {})
185
+ url_queue = [*urls]
186
+ multi = Curl::Multi.new
187
+ responses = {}
188
+
189
+ # I broke these down so I would only try to do 30 simultaneously because
190
+ # I was getting weird errors when doing a lot. As one finishes it pops another off the queue.
191
+ url_queue.slice!(0, 30).each do |url|
192
+ add_url_to_multi(multi, url, url_queue, responses, options)
193
+ end
194
+
195
+ multi.perform
196
+ return urls.is_a?(String) ? responses.values.first : responses
197
+ end
198
+
199
+ # Decodes the XML document if it was compressed.
200
+ #
201
+ # === Parameters
202
+ # [curl_request<Curl::Easy>] The Curl::Easy response object from the request.
203
+ # === Returns
204
+ # A decoded string of XML.
205
+ def self.decode_content(c)
206
+ if c.header_str.match(/Content-Encoding: gzip/)
207
+ begin
208
+ gz = Zlib::GzipReader.new(StringIO.new(c.body_str))
209
+ xml = gz.read
210
+ gz.close
211
+ rescue Zlib::GzipFile::Error
212
+ # Maybe this is not gzipped?
213
+ xml = c.body_str
214
+ end
215
+ elsif c.header_str.match(/Content-Encoding: deflate/)
216
+ begin
217
+ xml = Zlib::Inflate.inflate(c.body_str)
218
+ rescue Zlib::DataError
219
+ # .NET’s DeflateStream uses a raw deflate algorithm in the gzdeflate method
220
+ # but ruby defaults to either gzip or zlib this will decode the stream as a raw stream.
221
+ xml = Zlib::Inflate.new(-Zlib::MAX_WBITS).inflate(c.body_str)
222
+ end
223
+ else
224
+ xml = c.body_str
225
+ end
226
+
227
+ xml
228
+ end
229
+
230
+ # Updates each feed for each Feed object provided.
231
+ #
232
+ # === Parameters
233
+ # [feeds<Feed> or <Array>] A single feed object, or an array of feed objects.
234
+ # [options<Hash>] Valid keys for this argument as as followed:
235
+ # * :user_agent - String that overrides the default user agent.
236
+ # * :on_success - Block that gets executed after a successful request.
237
+ # * :on_failure - Block that gets executed after a failed request.
238
+ # === Returns
239
+ # A updated Feed object if a single URL is passed.
240
+ #
241
+ # A Hash if multiple Feeds are passed. The key will be the URL, and the value the updated Feed object.
242
+ def self.update(feeds, options = {})
243
+ feed_queue = [*feeds]
244
+ multi = Curl::Multi.new
245
+ responses = {}
246
+
247
+ feed_queue.slice!(0, 30).each do |feed|
248
+ add_feed_to_multi(multi, feed, feed_queue, responses, options)
249
+ end
250
+
251
+ multi.perform
252
+ return responses.size == 1 ? responses.values.first : responses.values
253
+ end
254
+
255
+ # An abstraction for adding a feed by URL to the passed Curb::multi stack.
256
+ #
257
+ # === Parameters
258
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
259
+ # [url<String>] The URL of the feed that you would like to be fetched.
260
+ # [url_queue<Array>] An array of URLs that are queued for request.
261
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
262
+ # [feeds<String> or <Array>] A single feed object, or an array of feed objects.
263
+ # [options<Hash>] Valid keys for this argument as as followed:
264
+ # * :user_agent - String that overrides the default user agent.
265
+ # * :on_success - Block that gets executed after a successful request.
266
+ # * :on_failure - Block that gets executed after a failed request.
267
+ # === Returns
268
+ # The updated Curl::Multi object with the request details added to it's stack.
269
+ def self.add_url_to_multi(multi, url, url_queue, responses, options)
270
+ easy = Curl::Easy.new(url) do |curl|
271
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
272
+ curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
273
+ curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
274
+ curl.headers["Accept-encoding"] = 'gzip, deflate'
275
+ curl.timeout = options[:timeout] if options[:timeout]
276
+ curl.follow_location = true
277
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
278
+
279
+ curl.on_success do |c|
280
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
281
+ xml = decode_content(c)
282
+ klass = determine_feed_parser_for_xml(xml)
283
+
284
+ if klass
285
+ feed = klass.parse(xml)
286
+ feed.feed_url = c.last_effective_url
287
+ feed.etag = etag_from_header(c.header_str)
288
+ feed.last_modified = last_modified_from_header(c.header_str)
289
+ responses[url] = feed
290
+ options[:on_success].call(url, feed) if options.has_key?(:on_success)
291
+ else
292
+ # puts "Error determining parser for #{url} - #{c.last_effective_url}"
293
+ # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
294
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
295
+ end
296
+ end
297
+
298
+ curl.on_failure do |c|
299
+ add_url_to_multi(multi, url_queue.shift, url_queue, responses, options) unless url_queue.empty?
300
+ responses[url] = c.response_code
301
+ options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
302
+ end
303
+ end
304
+ multi.add(easy)
305
+ end
306
+
307
+ # An abstraction for adding a feed by a Feed object to the passed Curb::multi stack.
308
+ #
309
+ # === Parameters
310
+ # [multi<Curl::Multi>] The Curl::Multi object that the request should be added too.
311
+ # [feed<Feed>] A feed object that you would like to be fetched.
312
+ # [url_queue<Array>] An array of feed objects that are queued for request.
313
+ # [responses<Hash>] Existing responses that you want the response from the request added to.
314
+ # [feeds<String>] or <Array> A single feed object, or an array of feed objects.
315
+ # [options<Hash>] Valid keys for this argument as as followed:
316
+ # * :user_agent - String that overrides the default user agent.
317
+ # * :on_success - Block that gets executed after a successful request.
318
+ # * :on_failure - Block that gets executed after a failed request.
319
+ # === Returns
320
+ # The updated Curl::Multi object with the request details added to it's stack.
321
+ def self.add_feed_to_multi(multi, feed, feed_queue, responses, options)
322
+ easy = Curl::Easy.new(feed.feed_url) do |curl|
323
+ curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
324
+ curl.headers["If-Modified-Since"] = feed.last_modified.httpdate if feed.last_modified
325
+ curl.headers["If-None-Match"] = feed.etag if feed.etag
326
+ curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
327
+ curl.timeout = options[:timeout] if options[:timeout]
328
+ curl.follow_location = true
329
+
330
+ curl.on_success do |c|
331
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
332
+ updated_feed = Feed.parse(c.body_str)
333
+ updated_feed.feed_url = c.last_effective_url
334
+ updated_feed.etag = etag_from_header(c.header_str)
335
+ updated_feed.last_modified = last_modified_from_header(c.header_str)
336
+ feed.update_from_feed(updated_feed)
337
+ responses[feed.feed_url] = feed
338
+ options[:on_success].call(feed) if options.has_key?(:on_success)
339
+ end
340
+
341
+ curl.on_failure do |c|
342
+ add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
343
+ response_code = c.response_code
344
+ if response_code == 304 # it's not modified. this isn't an error condition
345
+ responses[feed.feed_url] = feed
346
+ options[:on_success].call(feed) if options.has_key?(:on_success)
347
+ else
348
+ responses[feed.url] = c.response_code
349
+ options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
350
+ end
351
+ end
352
+ end
353
+ multi.add(easy)
354
+ end
355
+
356
+ # Determines the etag from the request headers.
357
+ #
358
+ # === Parameters
359
+ # [header<String>] Raw request header returned from the request
360
+ # === Returns
361
+ # A string of the etag or nil if it cannot be found in the headers.
362
+ def self.etag_from_header(header)
363
+ header =~ /.*ETag:\s(.*)\r/
364
+ $1
365
+ end
366
+
367
+ # Determines the last modified date from the request headers.
368
+ #
369
+ # === Parameters
370
+ # [header<String>] Raw request header returned from the request
371
+ # === Returns
372
+ # A Time object of the last modified date or nil if it cannot be found in the headers.
373
+ def self.last_modified_from_header(header)
374
+ header =~ /.*Last-Modified:\s(.*)\r/
375
+ Time.parse($1) if $1
376
+ end
377
+ end
378
+ end
@@ -0,0 +1,45 @@
1
+ module Feedzirra
2
+ module FeedEntryUtilities
3
+ def published
4
+ @published || @updated
5
+ end
6
+
7
+ def parse_datetime(string)
8
+ begin
9
+ DateTime.parse(string).feed_utils_to_gm_time
10
+ rescue
11
+ puts "DATE CAN'T BE PARSED: #{string}"
12
+ nil
13
+ end
14
+ end
15
+
16
+ ##
17
+ # Returns the id of the entry or its url if not id is present, as some formats don't support it
18
+ def id
19
+ @id || @url
20
+ end
21
+
22
+ ##
23
+ # Writter for published. By default, we keep the "oldest" publish time found.
24
+ def published=(val)
25
+ parsed = parse_datetime(val)
26
+ @published = parsed if !@published || parsed < @published
27
+ end
28
+
29
+ ##
30
+ # Writter for udapted. By default, we keep the most recenet update time found.
31
+ def updated=(val)
32
+ parsed = parse_datetime(val)
33
+ @updated = parsed if !@updated || parsed > @updated
34
+ end
35
+
36
+ def sanitize!
37
+ self.title.sanitize! if self.title
38
+ self.author.sanitize! if self.author
39
+ self.summary.sanitize! if self.summary
40
+ self.content.sanitize! if self.content
41
+ end
42
+
43
+ alias_method :last_modified, :published
44
+ end
45
+ end
@@ -0,0 +1,71 @@
1
+ module Feedzirra
2
+ module FeedUtilities
3
+ UPDATABLE_ATTRIBUTES = %w(title feed_url url last_modified)
4
+
5
+ attr_writer :new_entries, :updated, :last_modified
6
+ attr_accessor :etag
7
+
8
+ def last_modified
9
+ @last_modified ||= begin
10
+ entry = entries.reject {|e| e.published.nil? }.sort_by { |entry| entry.published if entry.published }.last
11
+ entry ? entry.published : nil
12
+ end
13
+ end
14
+
15
+ def updated?
16
+ @updated
17
+ end
18
+
19
+ def new_entries
20
+ @new_entries ||= []
21
+ end
22
+
23
+ def has_new_entries?
24
+ new_entries.size > 0
25
+ end
26
+
27
+ def update_from_feed(feed)
28
+ self.new_entries += find_new_entries_for(feed)
29
+ self.entries.unshift(*self.new_entries)
30
+
31
+ updated! if UPDATABLE_ATTRIBUTES.any? { |name| update_attribute(feed, name) }
32
+ end
33
+
34
+ def update_attribute(feed, name)
35
+ old_value, new_value = send(name), feed.send(name)
36
+
37
+ if old_value != new_value
38
+ send("#{name}=", new_value)
39
+ end
40
+ end
41
+
42
+ def sanitize_entries!
43
+ entries.each {|entry| entry.sanitize!}
44
+ end
45
+
46
+ private
47
+
48
+ def updated!
49
+ @updated = true
50
+ end
51
+
52
+ def find_new_entries_for(feed)
53
+ # this implementation is a hack, which is why it's so ugly.
54
+ # it's to get around the fact that not all feeds have a published date.
55
+ # however, they're always ordered with the newest one first.
56
+ # So we go through the entries just parsed and insert each one as a new entry
57
+ # until we get to one that has the same url as the the newest for the feed
58
+ latest_entry = self.entries.first
59
+ found_new_entries = []
60
+ feed.entries.each do |entry|
61
+ break if entry.url == latest_entry.url
62
+ found_new_entries << entry
63
+ end
64
+ found_new_entries
65
+ end
66
+
67
+ def existing_entry?(test_entry)
68
+ entries.any? { |entry| entry.url == test_entry.url }
69
+ end
70
+ end
71
+ end
@@ -0,0 +1,46 @@
1
+ module Feedzirra
2
+ # iTunes is RSS 2.0 + some apple extensions
3
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
4
+ class ITunesRSS
5
+ include SAXMachine
6
+ include FeedUtilities
7
+
8
+ attr_accessor :feed_url
9
+
10
+ # RSS 2.0 elements that need including
11
+ element :copyright
12
+ element :description
13
+ element :language
14
+ element :managingEditor
15
+ element :title
16
+ element :link, :as => :url
17
+
18
+ # If author is not present use managingEditor on the channel
19
+ element :"itunes:author", :as => :itunes_author
20
+ element :"itunes:block", :as => :itunes_block
21
+ element :"itunes:image", :value => :href, :as => :itunes_image
22
+ element :"itunes:explicit", :as => :itunes_explicit
23
+ element :"itunes:keywords", :as => :itunes_keywords
24
+ # New URL for the podcast feed
25
+ element :"itunes:new-feed-url", :as => :itunes_new_feed_url
26
+ element :"itunes:subtitle", :as => :itunes_subtitle
27
+ # If summary is not present, use the description tag
28
+ element :"itunes:summary", :as => :itunes_summary
29
+
30
+ # iTunes RSS feeds can have multiple main categories...
31
+ # ...and multiple sub-categories per category
32
+ # TODO subcategories not supported correctly - they are at the same level
33
+ # as the main categories
34
+ elements :"itunes:category", :as => :itunes_categories, :value => :text
35
+
36
+ elements :"itunes:owner", :as => :itunes_owners, :class => ITunesRSSOwner
37
+
38
+ elements :item, :as => :entries, :class => ITunesRSSItem
39
+
40
+ def self.able_to_parse?(xml)
41
+ xml =~ /xmlns:itunes=\"http:\/\/www.itunes.com\/dtds\/podcast-1.0.dtd\"/
42
+ end
43
+
44
+ end
45
+
46
+ end
@@ -0,0 +1,28 @@
1
+ module Feedzirra
2
+ # iTunes extensions to the standard RSS2.0 item
3
+ # Source: http://www.apple.com/itunes/whatson/podcasts/specs.html
4
+ class ITunesRSSItem
5
+ include SAXMachine
6
+ include FeedUtilities
7
+ element :author
8
+ element :guid
9
+ element :title
10
+ element :link, :as => :url
11
+ element :description, :as => :summary
12
+ element :pubDate, :as => :published
13
+
14
+ # If author is not present use author tag on the item
15
+ element :"itunes:author", :as => :itunes_author
16
+ element :"itunes:block", :as => :itunes_block
17
+ element :"itunes:duration", :as => :itunes_duration
18
+ element :"itunes:explicit", :as => :itunes_explicit
19
+ element :"itunes:keywords", :as => :itunes_keywords
20
+ element :"itunes:subtitle", :as => :itunes_subtitle
21
+ # If summary is not present, use the description tag
22
+ element :"itunes:summary", :as => :itunes_summary
23
+ element :enclosure, :value => :length, :as => :enclosure_length
24
+ element :enclosure, :value => :type, :as => :enclosure_type
25
+ element :enclosure, :value => :url, :as => :enclosure_url
26
+ end
27
+
28
+ end
@@ -0,0 +1,8 @@
1
+ module Feedzirra
2
+ class ITunesRSSOwner
3
+ include SAXMachine
4
+ include FeedUtilities
5
+ element :"itunes:name", :as => :name
6
+ element :"itunes:email", :as => :email
7
+ end
8
+ end
@@ -0,0 +1,23 @@
1
+ module Feedzirra
2
+ # == Summary
3
+ # Parser for dealing with RSS feeds.
4
+ #
5
+ # == Attributes
6
+ # * title
7
+ # * feed_url
8
+ # * url
9
+ # * entries
10
+ class RSS
11
+ include SAXMachine
12
+ include FeedUtilities
13
+ element :title
14
+ element :link, :as => :url
15
+ elements :item, :as => :entries, :class => RSSEntry
16
+
17
+ attr_accessor :feed_url
18
+
19
+ def self.able_to_parse?(xml) #:nodoc:
20
+ xml =~ /\<rss|rdf/
21
+ end
22
+ end
23
+ end
@@ -0,0 +1,35 @@
1
+ module Feedzirra
2
+ # == Summary
3
+ # Parser for dealing with RDF feed entries.
4
+ #
5
+ # == Attributes
6
+ # * title
7
+ # * url
8
+ # * author
9
+ # * content
10
+ # * summary
11
+ # * published
12
+ # * categories
13
+ class RSSEntry
14
+ include SAXMachine
15
+ include FeedEntryUtilities
16
+ element :title
17
+ element :link, :as => :url
18
+
19
+ element :"dc:creator", :as => :author
20
+ element :"content:encoded", :as => :content
21
+ element :description, :as => :summary
22
+
23
+ element :pubDate, :as => :published
24
+ element :"dc:date", :as => :published
25
+ element :"dc:Date", :as => :published
26
+ element :"dcterms:created", :as => :published
27
+
28
+
29
+ element :"dcterms:modified", :as => :updated
30
+ element :issued, :as => :published
31
+ elements :category, :as => :categories
32
+
33
+ element :guid, :as => :id
34
+ end
35
+ end
data/lib/feedzirra.rb ADDED
@@ -0,0 +1,34 @@
1
+ $LOAD_PATH.unshift(File.dirname(__FILE__)) unless $LOAD_PATH.include?(File.dirname(__FILE__))
2
+
3
+ gem 'activesupport'
4
+
5
+ require 'zlib'
6
+ require 'curb'
7
+ require 'sax-machine'
8
+ require 'dryopteris'
9
+ require 'uri'
10
+ require 'active_support/basic_object'
11
+ require 'active_support/core_ext/object'
12
+ require 'active_support/core_ext/time'
13
+
14
+ require 'core_ext/date'
15
+ require 'core_ext/string'
16
+
17
+ require 'feedzirra/feed_utilities'
18
+ require 'feedzirra/feed_entry_utilities'
19
+ require 'feedzirra/feed'
20
+
21
+ require 'feedzirra/rss_entry'
22
+ require 'feedzirra/itunes_rss_owner'
23
+ require 'feedzirra/itunes_rss_item'
24
+ require 'feedzirra/atom_entry'
25
+ require 'feedzirra/atom_feed_burner_entry'
26
+
27
+ require 'feedzirra/rss'
28
+ require 'feedzirra/itunes_rss'
29
+ require 'feedzirra/atom'
30
+ require 'feedzirra/atom_feed_burner'
31
+
32
+ module Feedzirra
33
+ VERSION = "0.0.8"
34
+ end