spidr 0.1.3 → 0.1.4
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/History.txt +4 -0
- data/README.txt +1 -1
- data/Rakefile +1 -1
- data/lib/spidr/agent.rb +0 -1
- data/lib/spidr/page.rb +11 -6
- data/lib/spidr/version.rb +1 -1
- metadata +3 -3
    
        data/History.txt
    CHANGED
    
    
    
        data/README.txt
    CHANGED
    
    
    
        data/Rakefile
    CHANGED
    
    
    
        data/lib/spidr/agent.rb
    CHANGED
    
    
    
        data/lib/spidr/page.rb
    CHANGED
    
    | @@ -1,5 +1,5 @@ | |
| 1 1 | 
             
            require 'uri'
         | 
| 2 | 
            -
            require ' | 
| 2 | 
            +
            require 'nokogiri'
         | 
| 3 3 |  | 
| 4 4 | 
             
            module Spidr
         | 
| 5 5 | 
             
              class Page
         | 
| @@ -185,12 +185,17 @@ module Spidr | |
| 185 185 | 
             
                end
         | 
| 186 186 |  | 
| 187 187 | 
             
                #
         | 
| 188 | 
            -
                #  | 
| 189 | 
            -
                #  | 
| 188 | 
            +
                # If the page has a <tt>text/html</tt> content-type, a
         | 
| 189 | 
            +
                # Nokogiri::HTML::Document object will be returned. If the page has a
         | 
| 190 | 
            +
                # <tt>text/xml</tt> content-type, a Nokogiri::XML::Document object
         | 
| 191 | 
            +
                # will be returned. Other content-types will cause +nil+ to be
         | 
| 192 | 
            +
                # returned.
         | 
| 190 193 | 
             
                #
         | 
| 191 194 | 
             
                def doc
         | 
| 192 195 | 
             
                  if html?
         | 
| 193 | 
            -
                    return @doc ||=  | 
| 196 | 
            +
                    return @doc ||= Nokogiri::HTML(body)
         | 
| 197 | 
            +
                  elsif xml?
         | 
| 198 | 
            +
                    return @doc ||= Nokogiri::XML(body)
         | 
| 194 199 | 
             
                  end
         | 
| 195 200 | 
             
                end
         | 
| 196 201 |  | 
| @@ -201,8 +206,8 @@ module Spidr | |
| 201 206 | 
             
                  urls = []
         | 
| 202 207 |  | 
| 203 208 | 
             
                  if html?
         | 
| 204 | 
            -
                    doc.search('a[@href]') do |a|
         | 
| 205 | 
            -
                      url = a. | 
| 209 | 
            +
                    self.doc.search('a[@href]').each do |a|
         | 
| 210 | 
            +
                      url = a.get_attribute('href')
         | 
| 206 211 |  | 
| 207 212 | 
             
                      urls << url unless url.empty?
         | 
| 208 213 | 
             
                    end
         | 
    
        data/lib/spidr/version.rb
    CHANGED
    
    
    
        metadata
    CHANGED
    
    | @@ -1,7 +1,7 @@ | |
| 1 1 | 
             
            --- !ruby/object:Gem::Specification 
         | 
| 2 2 | 
             
            name: spidr
         | 
| 3 3 | 
             
            version: !ruby/object:Gem::Version 
         | 
| 4 | 
            -
              version: 0.1. | 
| 4 | 
            +
              version: 0.1.4
         | 
| 5 5 | 
             
            platform: ruby
         | 
| 6 6 | 
             
            authors: 
         | 
| 7 7 | 
             
            - Postmodern
         | 
| @@ -9,11 +9,11 @@ autorequire: | |
| 9 9 | 
             
            bindir: bin
         | 
| 10 10 | 
             
            cert_chain: []
         | 
| 11 11 |  | 
| 12 | 
            -
            date: 2009-01- | 
| 12 | 
            +
            date: 2009-01-15 00:00:00 -08:00
         | 
| 13 13 | 
             
            default_executable: 
         | 
| 14 14 | 
             
            dependencies: 
         | 
| 15 15 | 
             
            - !ruby/object:Gem::Dependency 
         | 
| 16 | 
            -
              name:  | 
| 16 | 
            +
              name: nokogiri
         | 
| 17 17 | 
             
              type: :runtime
         | 
| 18 18 | 
             
              version_requirement: 
         | 
| 19 19 | 
             
              version_requirements: !ruby/object:Gem::Requirement 
         |