pauldix-feedzirra 0.0.8 → 0.0.9
Sign up to get free protection for your applications and to get access to all the features.
- data/README.textile +11 -3
- data/lib/feedzirra.rb +1 -1
- data/lib/feedzirra/feed.rb +26 -18
- metadata +1 -1
data/README.textile
CHANGED
@@ -116,6 +116,12 @@ Feedzirra::Feed.add_common_feed_entry_element("wfw:commentRss", :as => :comment_
|
|
116
116
|
# AtomEntry classes. Now you can access those in an atom feed:
|
117
117
|
Feedzirra::Feed.parse(some_atom_xml).entries.first.comment_rss_ # => wfw:commentRss is now parsed!
|
118
118
|
|
119
|
+
|
120
|
+
# You can also define your own parsers and add them to the ones Feedzirra knows about. Here's an example that adds
|
121
|
+
# ITunesRSS parsing. It's included in the library, but not part of Feedzirra by default because some of the field names
|
122
|
+
# differ from other classes, thus breaking normalization.
|
123
|
+
Feedzirra::Feed.add_feed_class(ITunesRSS) # now all feeds will be checked to see if they match ITunesRSS before others
|
124
|
+
|
119
125
|
# You can also access http basic auth feeds. Unfortunately, you can't get to these inside of a bulk get of a bunch of feeds.
|
120
126
|
# You'll have to do it on its own like so:
|
121
127
|
Feedzirra::Feed.fetch_and_parse(some_url, :http_authentication => ["myusername", "mypassword"])
|
@@ -149,13 +155,15 @@ h2. Next Steps
|
|
149
155
|
This thing needs to hammer on many different feeds in the wild. I'm sure there will be bugs. I want to find them and crush them. I didn't bother using the test suite for feedparser. i wanted to start fresh.
|
150
156
|
|
151
157
|
Here are some more specific TODOs.
|
158
|
+
* Fix the iTunes parser so things are normalized again
|
159
|
+
* Fix the Zlib deflate error
|
160
|
+
* Fix this error: http://github.com/inbox/70508
|
161
|
+
* Convert to use Typhoeus instead of taf2-curb
|
162
|
+
* Make the entries parse all link fields
|
152
163
|
* Make a feedzirra-rails gem to integrate feedzirra seamlessly with Rails and ActiveRecord.
|
153
|
-
* Add support for authenticated feeds.
|
154
164
|
* Create a super sweet DSL for defining new parsers.
|
155
165
|
* Test against Ruby 1.9.1 and fix any bugs.
|
156
|
-
* I'm not keeping track of modified on entries. Should I add this?
|
157
166
|
* Clean up the fetching code inside feed.rb so it doesn't suck so hard.
|
158
|
-
* Make the feed_spec actually mock stuff out so it doesn't hit the net.
|
159
167
|
* Readdress how feeds determine if they can parse a document. Maybe I should use namespaces instead?
|
160
168
|
|
161
169
|
h2. LICENSE
|
data/lib/feedzirra.rb
CHANGED
data/lib/feedzirra/feed.rb
CHANGED
@@ -46,7 +46,7 @@ module Feedzirra
|
|
46
46
|
# === Returns
|
47
47
|
# A array of class names.
|
48
48
|
def self.feed_classes
|
49
|
-
@feed_classes ||= [
|
49
|
+
@feed_classes ||= [RSS, AtomFeedBurner, Atom]
|
50
50
|
end
|
51
51
|
|
52
52
|
# Makes all entry types look for the passed in element to parse. This is actually just a call to
|
@@ -58,7 +58,7 @@ module Feedzirra
|
|
58
58
|
def self.add_common_feed_entry_element(element_tag, options = {})
|
59
59
|
# need to think of a better way to do this. will break for people who want this behavior
|
60
60
|
# across their added classes
|
61
|
-
|
61
|
+
feed_classes.each do |klass|
|
62
62
|
klass.send(:element, element_tag, options)
|
63
63
|
end
|
64
64
|
end
|
@@ -100,7 +100,7 @@ module Feedzirra
|
|
100
100
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
101
101
|
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
102
102
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
103
|
-
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
103
|
+
# curl.headers["Accept-encoding"] = 'gzip, deflate'
|
104
104
|
curl.follow_location = true
|
105
105
|
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
106
106
|
|
@@ -216,7 +216,7 @@ module Feedzirra
|
|
216
216
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
217
217
|
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
218
218
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
219
|
-
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
219
|
+
# curl.headers["Accept-encoding"] = 'gzip, deflate'
|
220
220
|
curl.follow_location = true
|
221
221
|
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
222
222
|
|
@@ -226,12 +226,16 @@ module Feedzirra
|
|
226
226
|
klass = determine_feed_parser_for_xml(xml)
|
227
227
|
|
228
228
|
if klass
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
229
|
+
begin
|
230
|
+
feed = klass.parse(xml)
|
231
|
+
feed.feed_url = c.last_effective_url
|
232
|
+
feed.etag = etag_from_header(c.header_str)
|
233
|
+
feed.last_modified = last_modified_from_header(c.header_str)
|
234
|
+
responses[url] = feed
|
235
|
+
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
236
|
+
rescue Exception => e
|
237
|
+
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
|
238
|
+
end
|
235
239
|
else
|
236
240
|
# puts "Error determining parser for #{url} - #{c.last_effective_url}"
|
237
241
|
# raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
|
@@ -271,14 +275,18 @@ module Feedzirra
|
|
271
275
|
curl.follow_location = true
|
272
276
|
|
273
277
|
curl.on_success do |c|
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
278
|
+
begin
|
279
|
+
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
280
|
+
updated_feed = Feed.parse(c.body_str)
|
281
|
+
updated_feed.feed_url = c.last_effective_url
|
282
|
+
updated_feed.etag = etag_from_header(c.header_str)
|
283
|
+
updated_feed.last_modified = last_modified_from_header(c.header_str)
|
284
|
+
feed.update_from_feed(updated_feed)
|
285
|
+
responses[feed.feed_url] = feed
|
286
|
+
options[:on_success].call(feed) if options.has_key?(:on_success)
|
287
|
+
rescue Exception => e
|
288
|
+
options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
|
289
|
+
end
|
282
290
|
end
|
283
291
|
|
284
292
|
curl.on_failure do |c|
|