pauldix-feedzirra 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +11 -3
- data/lib/feedzirra.rb +1 -1
- data/lib/feedzirra/feed.rb +26 -18
- metadata +1 -1
data/README.textile
CHANGED
@@ -116,6 +116,12 @@ Feedzirra::Feed.add_common_feed_entry_element("wfw:commentRss", :as => :comment_
|
|
116
116
|
# AtomEntry classes. Now you can access those in an atom feed:
|
117
117
|
Feedzirra::Feed.parse(some_atom_xml).entries.first.comment_rss_ # => wfw:commentRss is now parsed!
|
118
118
|
|
119
|
+
|
120
|
+
# You can also define your own parsers and add them to the ones Feedzirra knows about. Here's an example that adds
|
121
|
+
# ITunesRSS parsing. It's included in the library, but not part of Feedzirra by default because some of the field names
|
122
|
+
# differ from other classes, thus breaking normalization.
|
123
|
+
Feedzirra::Feed.add_feed_class(ITunesRSS) # now all feeds will be checked to see if they match ITunesRSS before others
|
124
|
+
|
119
125
|
# You can also access http basic auth feeds. Unfortunately, you can't get to these inside of a bulk get of a bunch of feeds.
|
120
126
|
# You'll have to do it on its own like so:
|
121
127
|
Feedzirra::Feed.fetch_and_parse(some_url, :http_authentication => ["myusername", "mypassword"])
|
@@ -149,13 +155,15 @@ h2. Next Steps
|
|
149
155
|
This thing needs to hammer on many different feeds in the wild. I'm sure there will be bugs. I want to find them and crush them. I didn't bother using the test suite for feedparser. i wanted to start fresh.
|
150
156
|
|
151
157
|
Here are some more specific TODOs.
|
158
|
+
* Fix the iTunes parser so things are normalized again
|
159
|
+
* Fix the Zlib deflate error
|
160
|
+
* Fix this error: http://github.com/inbox/70508
|
161
|
+
* Convert to use Typhoeus instead of taf2-curb
|
162
|
+
* Make the entries parse all link fields
|
152
163
|
* Make a feedzirra-rails gem to integrate feedzirra seamlessly with Rails and ActiveRecord.
|
153
|
-
* Add support for authenticated feeds.
|
154
164
|
* Create a super sweet DSL for defining new parsers.
|
155
165
|
* Test against Ruby 1.9.1 and fix any bugs.
|
156
|
-
* I'm not keeping track of modified on entries. Should I add this?
|
157
166
|
* Clean up the fetching code inside feed.rb so it doesn't suck so hard.
|
158
|
-
* Make the feed_spec actually mock stuff out so it doesn't hit the net.
|
159
167
|
* Readdress how feeds determine if they can parse a document. Maybe I should use namespaces instead?
|
160
168
|
|
161
169
|
h2. LICENSE
|
data/lib/feedzirra.rb
CHANGED
data/lib/feedzirra/feed.rb
CHANGED
@@ -46,7 +46,7 @@ module Feedzirra
|
|
46
46
|
# === Returns
|
47
47
|
# A array of class names.
|
48
48
|
def self.feed_classes
|
49
|
-
@feed_classes ||= [
|
49
|
+
@feed_classes ||= [RSS, AtomFeedBurner, Atom]
|
50
50
|
end
|
51
51
|
|
52
52
|
# Makes all entry types look for the passed in element to parse. This is actually just a call to
|
@@ -58,7 +58,7 @@ module Feedzirra
|
|
58
58
|
def self.add_common_feed_entry_element(element_tag, options = {})
|
59
59
|
# need to think of a better way to do this. will break for people who want this behavior
|
60
60
|
# across their added classes
|
61
|
-
|
61
|
+
feed_classes.each do |klass|
|
62
62
|
klass.send(:element, element_tag, options)
|
63
63
|
end
|
64
64
|
end
|
@@ -100,7 +100,7 @@ module Feedzirra
|
|
100
100
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
101
101
|
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
102
102
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
103
|
-
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
103
|
+
# curl.headers["Accept-encoding"] = 'gzip, deflate'
|
104
104
|
curl.follow_location = true
|
105
105
|
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
106
106
|
|
@@ -216,7 +216,7 @@ module Feedzirra
|
|
216
216
|
curl.headers["User-Agent"] = (options[:user_agent] || USER_AGENT)
|
217
217
|
curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
|
218
218
|
curl.headers["If-None-Match"] = options[:if_none_match] if options.has_key?(:if_none_match)
|
219
|
-
curl.headers["Accept-encoding"] = 'gzip, deflate'
|
219
|
+
# curl.headers["Accept-encoding"] = 'gzip, deflate'
|
220
220
|
curl.follow_location = true
|
221
221
|
curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
|
222
222
|
|
@@ -226,12 +226,16 @@ module Feedzirra
|
|
226
226
|
klass = determine_feed_parser_for_xml(xml)
|
227
227
|
|
228
228
|
if klass
|
229
|
-
|
230
|
-
|
231
|
-
|
232
|
-
|
233
|
-
|
234
|
-
|
229
|
+
begin
|
230
|
+
feed = klass.parse(xml)
|
231
|
+
feed.feed_url = c.last_effective_url
|
232
|
+
feed.etag = etag_from_header(c.header_str)
|
233
|
+
feed.last_modified = last_modified_from_header(c.header_str)
|
234
|
+
responses[url] = feed
|
235
|
+
options[:on_success].call(url, feed) if options.has_key?(:on_success)
|
236
|
+
rescue Exception => e
|
237
|
+
options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
|
238
|
+
end
|
235
239
|
else
|
236
240
|
# puts "Error determining parser for #{url} - #{c.last_effective_url}"
|
237
241
|
# raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
|
@@ -271,14 +275,18 @@ module Feedzirra
|
|
271
275
|
curl.follow_location = true
|
272
276
|
|
273
277
|
curl.on_success do |c|
|
274
|
-
|
275
|
-
|
276
|
-
|
277
|
-
|
278
|
-
|
279
|
-
|
280
|
-
|
281
|
-
|
278
|
+
begin
|
279
|
+
add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
|
280
|
+
updated_feed = Feed.parse(c.body_str)
|
281
|
+
updated_feed.feed_url = c.last_effective_url
|
282
|
+
updated_feed.etag = etag_from_header(c.header_str)
|
283
|
+
updated_feed.last_modified = last_modified_from_header(c.header_str)
|
284
|
+
feed.update_from_feed(updated_feed)
|
285
|
+
responses[feed.feed_url] = feed
|
286
|
+
options[:on_success].call(feed) if options.has_key?(:on_success)
|
287
|
+
rescue Exception => e
|
288
|
+
options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
|
289
|
+
end
|
282
290
|
end
|
283
291
|
|
284
292
|
curl.on_failure do |c|
|