pauldix-feedzirra 0.0.8 → 0.0.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +11 -3
- data/lib/feedzirra.rb +1 -1
- data/lib/feedzirra/feed.rb +26 -18
- metadata +1 -1
    
        data/README.textile
    CHANGED
    
    | @@ -116,6 +116,12 @@ Feedzirra::Feed.add_common_feed_entry_element("wfw:commentRss", :as => :comment_ | |
| 116 116 | 
             
            # AtomEntry classes. Now you can access those in an atom feed:
         | 
| 117 117 | 
             
            Feedzirra::Feed.parse(some_atom_xml).entries.first.comment_rss_ # => wfw:commentRss is now parsed!
         | 
| 118 118 |  | 
| 119 | 
            +
             | 
| 120 | 
            +
            # You can also define your own parsers and add them to the ones Feedzirra knows about. Here's an example that adds 
         | 
| 121 | 
            +
            # ITunesRSS parsing. It's included in the library, but not part of Feedzirra by default because some of the field names 
         | 
| 122 | 
            +
            # differ from other classes, thus breaking normalization.
         | 
| 123 | 
            +
            Feedzirra::Feed.add_feed_class(ITunesRSS) # now all feeds will be checked to see if they match ITunesRSS before others
         | 
| 124 | 
            +
             | 
| 119 125 | 
             
            # You can also access http basic auth feeds. Unfortunately, you can't get to these inside of a bulk get of a bunch of feeds.
         | 
| 120 126 | 
             
            # You'll have to do it on its own like so:
         | 
| 121 127 | 
             
            Feedzirra::Feed.fetch_and_parse(some_url, :http_authentication => ["myusername", "mypassword"])
         | 
| @@ -149,13 +155,15 @@ h2. Next Steps | |
| 149 155 | 
             
            This thing needs to hammer on many different feeds in the wild. I'm sure there will be bugs. I want to find them and crush them. I didn't bother using the test suite for feedparser. i wanted to start fresh.
         | 
| 150 156 |  | 
| 151 157 | 
             
            Here are some more specific TODOs.
         | 
| 158 | 
            +
            * Fix the iTunes parser so things are normalized again
         | 
| 159 | 
            +
            * Fix the Zlib deflate error
         | 
| 160 | 
            +
            * Fix this error: http://github.com/inbox/70508
         | 
| 161 | 
            +
            * Convert to use Typhoeus instead of taf2-curb
         | 
| 162 | 
            +
            * Make the entries parse all link fields
         | 
| 152 163 | 
             
            * Make a feedzirra-rails gem to integrate feedzirra seamlessly with Rails and ActiveRecord.
         | 
| 153 | 
            -
            * Add support for authenticated feeds.
         | 
| 154 164 | 
             
            * Create a super sweet DSL for defining new parsers.
         | 
| 155 165 | 
             
            * Test against Ruby 1.9.1 and fix any bugs.
         | 
| 156 | 
            -
            * I'm not keeping track of modified on entries. Should I add this?
         | 
| 157 166 | 
             
            * Clean up the fetching code inside feed.rb so it doesn't suck so hard.
         | 
| 158 | 
            -
            * Make the feed_spec actually mock stuff out so it doesn't hit the net.
         | 
| 159 167 | 
             
            * Readdress how feeds determine if they can parse a document. Maybe I should use namespaces instead?
         | 
| 160 168 |  | 
| 161 169 | 
             
            h2. LICENSE
         | 
    
        data/lib/feedzirra.rb
    CHANGED
    
    
    
        data/lib/feedzirra/feed.rb
    CHANGED
    
    | @@ -46,7 +46,7 @@ module Feedzirra | |
| 46 46 | 
             
                # === Returns
         | 
| 47 47 | 
             
                # A array of class names.
         | 
| 48 48 | 
             
                def self.feed_classes
         | 
| 49 | 
            -
                  @feed_classes ||= [ | 
| 49 | 
            +
                  @feed_classes ||= [RSS, AtomFeedBurner, Atom]
         | 
| 50 50 | 
             
                end
         | 
| 51 51 |  | 
| 52 52 | 
             
                # Makes all entry types look for the passed in element to parse. This is actually just a call to 
         | 
| @@ -58,7 +58,7 @@ module Feedzirra | |
| 58 58 | 
             
                def self.add_common_feed_entry_element(element_tag, options = {})
         | 
| 59 59 | 
             
                  # need to think of a better way to do this. will break for people who want this behavior
         | 
| 60 60 | 
             
                  # across their added classes
         | 
| 61 | 
            -
                   | 
| 61 | 
            +
                  feed_classes.each do |klass|
         | 
| 62 62 | 
             
                    klass.send(:element, element_tag, options)
         | 
| 63 63 | 
             
                  end
         | 
| 64 64 | 
             
                end
         | 
| @@ -100,7 +100,7 @@ module Feedzirra | |
| 100 100 | 
             
                      curl.headers["User-Agent"]        = (options[:user_agent] || USER_AGENT)
         | 
| 101 101 | 
             
                      curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
         | 
| 102 102 | 
             
                      curl.headers["If-None-Match"]     = options[:if_none_match] if options.has_key?(:if_none_match)
         | 
| 103 | 
            -
                      curl.headers["Accept-encoding"]   = 'gzip, deflate'
         | 
| 103 | 
            +
            #          curl.headers["Accept-encoding"]   = 'gzip, deflate'
         | 
| 104 104 | 
             
                      curl.follow_location = true
         | 
| 105 105 | 
             
                      curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
         | 
| 106 106 |  | 
| @@ -216,7 +216,7 @@ module Feedzirra | |
| 216 216 | 
             
                    curl.headers["User-Agent"]        = (options[:user_agent] || USER_AGENT)
         | 
| 217 217 | 
             
                    curl.headers["If-Modified-Since"] = options[:if_modified_since].httpdate if options.has_key?(:if_modified_since)
         | 
| 218 218 | 
             
                    curl.headers["If-None-Match"]     = options[:if_none_match] if options.has_key?(:if_none_match)
         | 
| 219 | 
            -
                    curl.headers["Accept-encoding"]   = 'gzip, deflate'
         | 
| 219 | 
            +
            #        curl.headers["Accept-encoding"]   = 'gzip, deflate'
         | 
| 220 220 | 
             
                    curl.follow_location = true
         | 
| 221 221 | 
             
                    curl.userpwd = options[:http_authentication].join(':') if options.has_key?(:http_authentication)
         | 
| 222 222 |  | 
| @@ -226,12 +226,16 @@ module Feedzirra | |
| 226 226 | 
             
                      klass = determine_feed_parser_for_xml(xml)
         | 
| 227 227 |  | 
| 228 228 | 
             
                      if klass
         | 
| 229 | 
            -
                         | 
| 230 | 
            -
             | 
| 231 | 
            -
             | 
| 232 | 
            -
             | 
| 233 | 
            -
             | 
| 234 | 
            -
             | 
| 229 | 
            +
                        begin
         | 
| 230 | 
            +
                          feed = klass.parse(xml)
         | 
| 231 | 
            +
                          feed.feed_url = c.last_effective_url
         | 
| 232 | 
            +
                          feed.etag = etag_from_header(c.header_str)
         | 
| 233 | 
            +
                          feed.last_modified = last_modified_from_header(c.header_str)
         | 
| 234 | 
            +
                          responses[url] = feed
         | 
| 235 | 
            +
                          options[:on_success].call(url, feed) if options.has_key?(:on_success)
         | 
| 236 | 
            +
                        rescue Exception => e
         | 
| 237 | 
            +
                          options[:on_failure].call(url, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
         | 
| 238 | 
            +
                        end
         | 
| 235 239 | 
             
                      else
         | 
| 236 240 | 
             
                        # puts "Error determining parser for #{url} - #{c.last_effective_url}"
         | 
| 237 241 | 
             
                        # raise NoParserAvailable.new("no valid parser for content.") (this would unfirtunately fail the whole 'multi', so it's not really useable)
         | 
| @@ -271,14 +275,18 @@ module Feedzirra | |
| 271 275 | 
             
                    curl.follow_location = true
         | 
| 272 276 |  | 
| 273 277 | 
             
                    curl.on_success do |c|
         | 
| 274 | 
            -
                       | 
| 275 | 
            -
             | 
| 276 | 
            -
             | 
| 277 | 
            -
             | 
| 278 | 
            -
             | 
| 279 | 
            -
             | 
| 280 | 
            -
             | 
| 281 | 
            -
             | 
| 278 | 
            +
                      begin
         | 
| 279 | 
            +
                        add_feed_to_multi(multi, feed_queue.shift, feed_queue, responses, options) unless feed_queue.empty?
         | 
| 280 | 
            +
                        updated_feed = Feed.parse(c.body_str)
         | 
| 281 | 
            +
                        updated_feed.feed_url = c.last_effective_url
         | 
| 282 | 
            +
                        updated_feed.etag = etag_from_header(c.header_str)
         | 
| 283 | 
            +
                        updated_feed.last_modified = last_modified_from_header(c.header_str)
         | 
| 284 | 
            +
                        feed.update_from_feed(updated_feed)
         | 
| 285 | 
            +
                        responses[feed.feed_url] = feed
         | 
| 286 | 
            +
                        options[:on_success].call(feed) if options.has_key?(:on_success)
         | 
| 287 | 
            +
                      rescue Exception => e
         | 
| 288 | 
            +
                        options[:on_failure].call(feed, c.response_code, c.header_str, c.body_str) if options.has_key?(:on_failure)
         | 
| 289 | 
            +
                      end
         | 
| 282 290 | 
             
                    end
         | 
| 283 291 |  | 
| 284 292 | 
             
                    curl.on_failure do |c|
         |