cobweb 1.0.6 → 1.0.8
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- data/README.textile +5 -1
 - data/lib/cobweb.rb +2 -2
 - data/lib/cobweb_crawler.rb +89 -65
 - data/lib/cobweb_version.rb +1 -1
 - data/lib/stats.rb +95 -93
 - data/spec/spec_helper.rb +8 -7
 - metadata +24 -24
 
    
        data/README.textile
    CHANGED
    
    | 
         @@ -1,5 +1,5 @@ 
     | 
|
| 
       1 
1 
     | 
    
         | 
| 
       2 
     | 
    
         
            -
            h1. Cobweb v1.0. 
     | 
| 
      
 2 
     | 
    
         
            +
            h1. Cobweb v1.0.8
         
     | 
| 
       3 
3 
     | 
    
         | 
| 
       4 
4 
     | 
    
         
             
            "@cobweb_gem":https://twitter.com/cobweb_gem
         
     | 
| 
       5 
5 
     | 
    
         | 
| 
         @@ -152,6 +152,10 @@ bc. statistics = CobwebCrawler.new(:cache => 600).crawl("http://www.pepsico.com" 
     | 
|
| 
       152 
152 
     | 
    
         
             
            end
         
     | 
| 
       153 
153 
     | 
    
         
             
            puts "Finished Crawl with #{statistics[:page_count]} pages and #{statistics[:asset_count]} assets."
         
     | 
| 
       154 
154 
     | 
    
         | 
| 
      
 155 
     | 
    
         
            +
            There are some specific options for CobwebCrawler in addition to the normal cobweb options
         
     | 
| 
      
 156 
     | 
    
         
            +
             
     | 
| 
      
 157 
     | 
    
         
            +
              * thread_count - specifies the number of threads used by the crawler, defaults to 1
         
     | 
| 
      
 158 
     | 
    
         
            +
             
     | 
| 
       155 
159 
     | 
    
         
             
            h3. CobwebCrawlHelper
         
     | 
| 
       156 
160 
     | 
    
         | 
| 
       157 
161 
     | 
    
         
             
            The CobwebCrawlHelper class is a helper class to assist in getting information about a crawl and to perform functions against the crawl
         
     | 
    
        data/lib/cobweb.rb
    CHANGED
    
    | 
         @@ -127,10 +127,10 @@ class Cobweb 
     | 
|
| 
       127 
127 
     | 
    
         
             
                  content = HashUtil.deep_symbolize_keys(Marshal.load(redis.get(unique_id)))
         
     | 
| 
       128 
128 
     | 
    
         
             
                else
         
     | 
| 
       129 
129 
     | 
    
         
             
                  # retrieve data
         
     | 
| 
       130 
     | 
    
         
            -
                  unless @http && @http.address == uri.host && @http.port == uri.inferred_port
         
     | 
| 
      
 130 
     | 
    
         
            +
                  #unless @http && @http.address == uri.host && @http.port == uri.inferred_port
         
     | 
| 
       131 
131 
     | 
    
         
             
                    puts "Creating connection to #{uri.host}..." if @options[:debug]
         
     | 
| 
       132 
132 
     | 
    
         
             
                    @http = Net::HTTP.new(uri.host, uri.inferred_port)
         
     | 
| 
       133 
     | 
    
         
            -
                  end
         
     | 
| 
      
 133 
     | 
    
         
            +
                  #end
         
     | 
| 
       134 
134 
     | 
    
         
             
                  if uri.scheme == "https"
         
     | 
| 
       135 
135 
     | 
    
         
             
                    @http.use_ssl = true
         
     | 
| 
       136 
136 
     | 
    
         
             
                    @http.verify_mode = OpenSSL::SSL::VERIFY_NONE
         
     | 
    
        data/lib/cobweb_crawler.rb
    CHANGED
    
    | 
         @@ -39,6 +39,7 @@ class CobwebCrawler 
     | 
|
| 
       39 
39 
     | 
    
         
             
              # Initiates a crawl starting at the base_url and applying the options supplied. Can also take a block that is executed and passed content hash and statistic hash'
         
     | 
| 
       40 
40 
     | 
    
         
             
              def crawl(base_url, crawl_options = {}, &block)
         
     | 
| 
       41 
41 
     | 
    
         
             
                @options[:base_url] = base_url unless @options.has_key? :base_url
         
     | 
| 
      
 42 
     | 
    
         
            +
                @options[:thread_count] = 1 unless @options.has_key? :thread_count
         
     | 
| 
       42 
43 
     | 
    
         | 
| 
       43 
44 
     | 
    
         
             
                @options[:internal_urls] << base_url if @options[:internal_urls].empty?
         
     | 
| 
       44 
45 
     | 
    
         
             
                @redis.sadd("internal_urls", base_url) if @options[:internal_urls].empty?
         
     | 
| 
         @@ -46,87 +47,110 @@ class CobwebCrawler 
     | 
|
| 
       46 
47 
     | 
    
         
             
                @crawl_options = crawl_options
         
     | 
| 
       47 
48 
     | 
    
         | 
| 
       48 
49 
     | 
    
         
             
                @redis.sadd("queued", base_url) unless base_url.nil? || @redis.sismember("crawled", base_url) || @redis.sismember("queued", base_url)
         
     | 
| 
       49 
     | 
    
         
            -
                crawl_counter = @redis.scard("crawled").to_i
         
     | 
| 
       50 
     | 
    
         
            -
                queue_counter = @redis.scard("queued").to_i
         
     | 
| 
      
 50 
     | 
    
         
            +
                @crawl_counter = @redis.scard("crawled").to_i
         
     | 
| 
      
 51 
     | 
    
         
            +
                @queue_counter = @redis.scard("queued").to_i
         
     | 
| 
       51 
52 
     | 
    
         | 
| 
      
 53 
     | 
    
         
            +
                @threads = []
         
     | 
| 
       52 
54 
     | 
    
         
             
                begin
         
     | 
| 
       53 
55 
     | 
    
         
             
                  @stats.start_crawl(@options)
         
     | 
| 
       54 
     | 
    
         
            -
                   
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
                    
         
     | 
| 
      
 56 
     | 
    
         
            +
                  
         
     | 
| 
      
 57 
     | 
    
         
            +
                  @threads << Thread.new do
         
     | 
| 
      
 58 
     | 
    
         
            +
                    Thread.abort_on_exception = true
         
     | 
| 
      
 59 
     | 
    
         
            +
                    spawn_thread(&block)
         
     | 
| 
      
 60 
     | 
    
         
            +
                  end
         
     | 
| 
       57 
61 
     | 
    
         | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
             
     | 
| 
      
 62 
     | 
    
         
            +
                  sleep 5
         
     | 
| 
      
 63 
     | 
    
         
            +
                  while running_thread_count > 0
         
     | 
| 
      
 64 
     | 
    
         
            +
                    if @queue_counter > 0
         
     | 
| 
      
 65 
     | 
    
         
            +
                      (@options[:thread_count]-running_thread_count).times.each do
         
     | 
| 
      
 66 
     | 
    
         
            +
                        @threads << Thread.new do
         
     | 
| 
      
 67 
     | 
    
         
            +
                          Thread.abort_on_exception = true
         
     | 
| 
      
 68 
     | 
    
         
            +
                          spawn_thread(&block)
         
     | 
| 
      
 69 
     | 
    
         
            +
                        end
         
     | 
| 
      
 70 
     | 
    
         
            +
                      end
         
     | 
| 
      
 71 
     | 
    
         
            +
                    end
         
     | 
| 
      
 72 
     | 
    
         
            +
                    sleep 1
         
     | 
| 
      
 73 
     | 
    
         
            +
                  end
         
     | 
| 
      
 74 
     | 
    
         
            +
                  
         
     | 
| 
      
 75 
     | 
    
         
            +
                ensure
         
     | 
| 
      
 76 
     | 
    
         
            +
                  @stats.end_crawl(@options)
         
     | 
| 
      
 77 
     | 
    
         
            +
                end
         
     | 
| 
      
 78 
     | 
    
         
            +
                @stats
         
     | 
| 
      
 79 
     | 
    
         
            +
              end
         
     | 
| 
       60 
80 
     | 
    
         | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                          content = @cobweb.get(url) unless url.nil?
         
     | 
| 
       66 
     | 
    
         
            -
                          if content.nil?
         
     | 
| 
       67 
     | 
    
         
            -
                            queue_counter = queue_counter - 1 #@redis.scard("queued").to_i
         
     | 
| 
       68 
     | 
    
         
            -
                          else
         
     | 
| 
       69 
     | 
    
         
            -
                            @stats.update_status("Processing #{url}...")
         
     | 
| 
      
 81 
     | 
    
         
            +
              def spawn_thread(&block)
         
     | 
| 
      
 82 
     | 
    
         
            +
                  while @queue_counter>0 && (@options[:crawl_limit].to_i == 0 || @options[:crawl_limit].to_i > @crawl_counter)
         
     | 
| 
      
 83 
     | 
    
         
            +
                    url = @redis.spop "queued"
         
     | 
| 
      
 84 
     | 
    
         
            +
                  @queue_counter = 0 if url.nil?
         
     | 
| 
       70 
85 
     | 
    
         | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
      
 86 
     | 
    
         
            +
                  @options[:url] = url
         
     | 
| 
      
 87 
     | 
    
         
            +
                  unless @redis.sismember("crawled", url.to_s)
         
     | 
| 
      
 88 
     | 
    
         
            +
                    begin
         
     | 
| 
      
 89 
     | 
    
         
            +
                      @stats.update_status("Requesting #{url}...")
         
     | 
| 
      
 90 
     | 
    
         
            +
                      content = @cobweb.get(url) unless url.nil?
         
     | 
| 
      
 91 
     | 
    
         
            +
                      if content.nil?
         
     | 
| 
      
 92 
     | 
    
         
            +
                        @queue_counter = @queue_counter - 1 #@redis.scard("queued").to_i
         
     | 
| 
      
 93 
     | 
    
         
            +
                      else
         
     | 
| 
      
 94 
     | 
    
         
            +
                        @stats.update_status("Processing #{url}...")
         
     | 
| 
      
 95 
     | 
    
         
            +
             
     | 
| 
      
 96 
     | 
    
         
            +
                        @redis.sadd "crawled", url.to_s
         
     | 
| 
      
 97 
     | 
    
         
            +
                        @redis.incr "crawl-counter" 
         
     | 
| 
      
 98 
     | 
    
         
            +
                      
         
     | 
| 
      
 99 
     | 
    
         
            +
                        internal_links = ContentLinkParser.new(url, content[:body]).all_links(:valid_schemes => [:http, :https])
         
     | 
| 
       75 
100 
     | 
    
         | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
       77 
     | 
    
         
            -
             
     | 
| 
      
 101 
     | 
    
         
            +
                        # select the link if its internal (eliminate external before expensive lookups in queued and crawled)
         
     | 
| 
      
 102 
     | 
    
         
            +
                        cobweb_links = CobwebLinks.new(@options)
         
     | 
| 
       78 
103 
     | 
    
         | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
      
 104 
     | 
    
         
            +
                        internal_links = internal_links.select{|link| cobweb_links.internal?(link) || (@options[:crawl_linked_external] && cobweb_links.internal?(url.to_s))}
         
     | 
| 
       80 
105 
     | 
    
         | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
       83 
     | 
    
         
            -
             
     | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
       89 
     | 
    
         
            -
             
     | 
| 
       90 
     | 
    
         
            -
             
     | 
| 
       91 
     | 
    
         
            -
             
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
             
     | 
| 
       94 
     | 
    
         
            -
             
     | 
| 
       95 
     | 
    
         
            -
             
     | 
| 
       96 
     | 
    
         
            -
             
     | 
| 
      
 106 
     | 
    
         
            +
                        all_internal_links = internal_links
         
     | 
| 
      
 107 
     | 
    
         
            +
                        
         
     | 
| 
      
 108 
     | 
    
         
            +
                        # reject the link if we've crawled it or queued it
         
     | 
| 
      
 109 
     | 
    
         
            +
                        internal_links.reject!{|link| @redis.sismember("crawled", link)}
         
     | 
| 
      
 110 
     | 
    
         
            +
                        internal_links.reject!{|link| @redis.sismember("queued", link)}
         
     | 
| 
      
 111 
     | 
    
         
            +
                        internal_links.reject!{|link| link.nil? || link.empty?}
         
     | 
| 
      
 112 
     | 
    
         
            +
                      
         
     | 
| 
      
 113 
     | 
    
         
            +
                        internal_links.each do |link|
         
     | 
| 
      
 114 
     | 
    
         
            +
                          puts "Added #{link.to_s} to queue" if @debug
         
     | 
| 
      
 115 
     | 
    
         
            +
                          @redis.sadd "queued", link unless link.nil?
         
     | 
| 
      
 116 
     | 
    
         
            +
                          children = @redis.hget("navigation", url)
         
     | 
| 
      
 117 
     | 
    
         
            +
                          children = [] if children.nil?
         
     | 
| 
      
 118 
     | 
    
         
            +
                          children << link
         
     | 
| 
      
 119 
     | 
    
         
            +
                          @redis.hset "navigation", url, children
         
     | 
| 
      
 120 
     | 
    
         
            +
                          @queue_counter += 1
         
     | 
| 
      
 121 
     | 
    
         
            +
                        end
         
     | 
| 
       97 
122 
     | 
    
         | 
| 
       98 
     | 
    
         
            -
             
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
                              end
         
     | 
| 
       102 
     | 
    
         
            -
                            end
         
     | 
| 
       103 
     | 
    
         
            -
                          
         
     | 
| 
       104 
     | 
    
         
            -
                            crawl_counter = @redis.scard("crawled").to_i
         
     | 
| 
       105 
     | 
    
         
            -
                            queue_counter = @redis.scard("queued").to_i
         
     | 
| 
       106 
     | 
    
         
            -
                          
         
     | 
| 
       107 
     | 
    
         
            -
                            @stats.update_statistics(content, crawl_counter, queue_counter)
         
     | 
| 
       108 
     | 
    
         
            -
                            @stats.update_status("Completed #{url}.")
         
     | 
| 
       109 
     | 
    
         
            -
                            yield content, @stats.get_statistics if block_given?
         
     | 
| 
      
 123 
     | 
    
         
            +
                        if @options[:store_refered_url]
         
     | 
| 
      
 124 
     | 
    
         
            +
                          all_internal_links.each do |link|
         
     | 
| 
      
 125 
     | 
    
         
            +
                            @redis.sadd("inbound_links_#{Digest::MD5.hexdigest(link)}", url)
         
     | 
| 
       110 
126 
     | 
    
         
             
                          end
         
     | 
| 
       111 
     | 
    
         
            -
                        rescue => e
         
     | 
| 
       112 
     | 
    
         
            -
                          puts "Error loading #{url}: #{e}"
         
     | 
| 
       113 
     | 
    
         
            -
                          #puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
         
     | 
| 
       114 
     | 
    
         
            -
                          #ap e
         
     | 
| 
       115 
     | 
    
         
            -
                          #ap e.backtrace
         
     | 
| 
       116 
     | 
    
         
            -
                        ensure
         
     | 
| 
       117 
     | 
    
         
            -
                          crawl_counter = @redis.scard("crawled").to_i
         
     | 
| 
       118 
     | 
    
         
            -
                          queue_counter = @redis.scard("queued").to_i
         
     | 
| 
       119 
127 
     | 
    
         
             
                        end
         
     | 
| 
       120 
     | 
    
         
            -
                       
     | 
| 
       121 
     | 
    
         
            -
                         
     | 
| 
      
 128 
     | 
    
         
            +
                      
         
     | 
| 
      
 129 
     | 
    
         
            +
                        @crawl_counter = @redis.scard("crawled").to_i
         
     | 
| 
      
 130 
     | 
    
         
            +
                        @queue_counter = @redis.scard("queued").to_i
         
     | 
| 
      
 131 
     | 
    
         
            +
                      
         
     | 
| 
      
 132 
     | 
    
         
            +
                        @stats.update_statistics(content, @crawl_counter, @queue_counter)
         
     | 
| 
      
 133 
     | 
    
         
            +
                        @stats.update_status("Completed #{url}.")
         
     | 
| 
      
 134 
     | 
    
         
            +
                        yield content, @stats.get_statistics if block_given?
         
     | 
| 
       122 
135 
     | 
    
         
             
                      end
         
     | 
| 
      
 136 
     | 
    
         
            +
                    rescue => e
         
     | 
| 
      
 137 
     | 
    
         
            +
                      puts "Error loading #{url}: #{e}"
         
     | 
| 
      
 138 
     | 
    
         
            +
                      #puts "!!!!!!!!!!!! ERROR !!!!!!!!!!!!!!!!"
         
     | 
| 
      
 139 
     | 
    
         
            +
                      #ap e
         
     | 
| 
      
 140 
     | 
    
         
            +
                      #ap e.backtrace
         
     | 
| 
      
 141 
     | 
    
         
            +
                    ensure
         
     | 
| 
      
 142 
     | 
    
         
            +
                      @crawl_counter = @redis.scard("crawled").to_i
         
     | 
| 
      
 143 
     | 
    
         
            +
                      @queue_counter = @redis.scard("queued").to_i
         
     | 
| 
       123 
144 
     | 
    
         
             
                    end
         
     | 
| 
       124 
     | 
    
         
            -
             
     | 
| 
      
 145 
     | 
    
         
            +
                  else
         
     | 
| 
      
 146 
     | 
    
         
            +
                    puts "Already crawled #{@options[:url]}" if @debug
         
     | 
| 
       125 
147 
     | 
    
         
             
                  end
         
     | 
| 
       126 
     | 
    
         
            -
                ensure
         
     | 
| 
       127 
     | 
    
         
            -
                  @stats.end_crawl(@options)
         
     | 
| 
       128 
148 
     | 
    
         
             
                end
         
     | 
| 
       129 
     | 
    
         
            -
                 
     | 
| 
      
 149 
     | 
    
         
            +
                Thread.exit
         
     | 
| 
      
 150 
     | 
    
         
            +
              end
         
     | 
| 
      
 151 
     | 
    
         
            +
             
     | 
| 
      
 152 
     | 
    
         
            +
              def running_thread_count
         
     | 
| 
      
 153 
     | 
    
         
            +
                @threads.map{|t| t.status}.select{|status| status=="run" || status == "sleep"}.count
         
     | 
| 
       130 
154 
     | 
    
         
             
              end
         
     | 
| 
       131 
155 
     | 
    
         | 
| 
       132 
156 
     | 
    
         
             
            end
         
     | 
    
        data/lib/cobweb_version.rb
    CHANGED
    
    
    
        data/lib/stats.rb
    CHANGED
    
    | 
         @@ -8,6 +8,7 @@ class Stats 
     | 
|
| 
       8 
8 
     | 
    
         
             
              def initialize(options)
         
     | 
| 
       9 
9 
     | 
    
         
             
                options[:redis_options] = {} unless options.has_key? :redis_options
         
     | 
| 
       10 
10 
     | 
    
         
             
                @full_redis = Redis.new(options[:redis_options])
         
     | 
| 
      
 11 
     | 
    
         
            +
                @lock = Mutex.new
         
     | 
| 
       11 
12 
     | 
    
         
             
                @redis = Redis::Namespace.new("cobweb-#{Cobweb.version}-#{options[:crawl_id]}", :redis => @full_redis)
         
     | 
| 
       12 
13 
     | 
    
         
             
              end
         
     | 
| 
       13 
14 
     | 
    
         | 
| 
         @@ -44,105 +45,106 @@ class Stats 
     | 
|
| 
       44 
45 
     | 
    
         
             
              # Returns statistics hash.  update_statistics takes the content hash, extracts statistics from it and updates redis with the data.  
         
     | 
| 
       45 
46 
     | 
    
         
             
              def update_statistics(content, crawl_counter=@redis.scard("crawled").to_i, queue_counter=@redis.scard("queued").to_i)
         
     | 
| 
       46 
47 
     | 
    
         | 
| 
       47 
     | 
    
         
            -
                @ 
     | 
| 
       48 
     | 
    
         
            -
             
     | 
| 
       49 
     | 
    
         
            -
             
     | 
| 
       50 
     | 
    
         
            -
                   
     | 
| 
       51 
     | 
    
         
            -
             
     | 
| 
       52 
     | 
    
         
            -
                   
     | 
| 
       53 
     | 
    
         
            -
             
     | 
| 
       54 
     | 
    
         
            -
             
     | 
| 
       55 
     | 
    
         
            -
             
     | 
| 
       56 
     | 
    
         
            -
             
     | 
| 
       57 
     | 
    
         
            -
                   
     | 
| 
       58 
     | 
    
         
            -
             
     | 
| 
       59 
     | 
    
         
            -
                   
     | 
| 
       60 
     | 
    
         
            -
             
     | 
| 
       61 
     | 
    
         
            -
             
     | 
| 
       62 
     | 
    
         
            -
             
     | 
| 
       63 
     | 
    
         
            -
             
     | 
| 
       64 
     | 
    
         
            -
             
     | 
| 
       65 
     | 
    
         
            -
                   
     | 
| 
       66 
     | 
    
         
            -
             
     | 
| 
       67 
     | 
    
         
            -
             
     | 
| 
       68 
     | 
    
         
            -
             
     | 
| 
       69 
     | 
    
         
            -
                   
     | 
| 
       70 
     | 
    
         
            -
             
     | 
| 
       71 
     | 
    
         
            -
             
     | 
| 
       72 
     | 
    
         
            -
             
     | 
| 
       73 
     | 
    
         
            -
             
     | 
| 
       74 
     | 
    
         
            -
             
     | 
| 
       75 
     | 
    
         
            -
             
     | 
| 
       76 
     | 
    
         
            -
             
     | 
| 
      
 48 
     | 
    
         
            +
                @lock.synchronize {
         
     | 
| 
      
 49 
     | 
    
         
            +
                  @statistics = get_statistics
         
     | 
| 
      
 50 
     | 
    
         
            +
                  
         
     | 
| 
      
 51 
     | 
    
         
            +
                  if @statistics.has_key? :average_response_time
         
     | 
| 
      
 52 
     | 
    
         
            +
                    @statistics[:average_response_time] = (((@redis.hget("statistics", "average_response_time").to_f*crawl_counter) + content[:response_time].to_f) / (crawl_counter + 1))
         
     | 
| 
      
 53 
     | 
    
         
            +
                  else
         
     | 
| 
      
 54 
     | 
    
         
            +
                    @statistics[:average_response_time] = content[:response_time].to_f
         
     | 
| 
      
 55 
     | 
    
         
            +
                  end
         
     | 
| 
      
 56 
     | 
    
         
            +
                  @statistics[:maximum_response_time] = content[:response_time].to_f if @statistics[:maximum_response_time].nil? or content[:response_time].to_f > @statistics[:maximum_response_time].to_f
         
     | 
| 
      
 57 
     | 
    
         
            +
                  @statistics[:minimum_response_time] = content[:response_time].to_f if @statistics[:minimum_response_time].nil? or content[:response_time].to_f < @statistics[:minimum_response_time].to_f
         
     | 
| 
      
 58 
     | 
    
         
            +
                  if @statistics.has_key? :average_length
         
     | 
| 
      
 59 
     | 
    
         
            +
                    @statistics[:average_length] = (((@redis.hget("statistics", "average_length").to_i*crawl_counter) + content[:length].to_i) / (crawl_counter + 1))
         
     | 
| 
      
 60 
     | 
    
         
            +
                  else
         
     | 
| 
      
 61 
     | 
    
         
            +
                    @statistics[:average_length] = content[:length].to_i
         
     | 
| 
      
 62 
     | 
    
         
            +
                  end
         
     | 
| 
      
 63 
     | 
    
         
            +
                  @statistics[:maximum_length] = content[:length].to_i if @redis.hget("statistics", "maximum_length").nil? or content[:length].to_i > @statistics[:maximum_length].to_i
         
     | 
| 
      
 64 
     | 
    
         
            +
                  @statistics[:minimum_length] = content[:length].to_i if @redis.hget("statistics", "minimum_length").nil? or content[:length].to_i < @statistics[:minimum_length].to_i
         
     | 
| 
      
 65 
     | 
    
         
            +
                  
         
     | 
| 
      
 66 
     | 
    
         
            +
                  if content[:mime_type].include?("text/html") or content[:mime_type].include?("application/xhtml+xml")
         
     | 
| 
      
 67 
     | 
    
         
            +
                    @statistics[:page_count] = @statistics[:page_count].to_i + 1
         
     | 
| 
      
 68 
     | 
    
         
            +
                    @statistics[:page_size] = @statistics[:page_size].to_i + content[:length].to_i
         
     | 
| 
      
 69 
     | 
    
         
            +
                    increment_time_stat("pages_count")
         
     | 
| 
      
 70 
     | 
    
         
            +
                  else
         
     | 
| 
      
 71 
     | 
    
         
            +
                    @statistics[:asset_count] = @statistics[:asset_count].to_i + 1
         
     | 
| 
      
 72 
     | 
    
         
            +
                    @statistics[:asset_size] = @statistics[:asset_size].to_i + content[:length].to_i
         
     | 
| 
      
 73 
     | 
    
         
            +
                    increment_time_stat("assets_count")
         
     | 
| 
      
 74 
     | 
    
         
            +
                  end
         
     | 
| 
      
 75 
     | 
    
         
            +
                  
         
     | 
| 
      
 76 
     | 
    
         
            +
                  total_redirects = @statistics[:total_redirects].to_i
         
     | 
| 
      
 77 
     | 
    
         
            +
                  @statistics[:total_redirects] = 0 if total_redirects.nil?
         
     | 
| 
      
 78 
     | 
    
         
            +
                  @statistics[:total_redirects] = total_redirects += content[:redirect_through].count unless content[:redirect_through].nil?
         
     | 
| 
       77 
79 
     | 
    
         | 
| 
       78 
     | 
    
         
            -
             
     | 
| 
       79 
     | 
    
         
            -
             
     | 
| 
       80 
     | 
    
         
            -
             
     | 
| 
       81 
     | 
    
         
            -
             
     | 
| 
       82 
     | 
    
         
            -
             
     | 
| 
      
 80 
     | 
    
         
            +
                  @statistics[:crawl_counter] = crawl_counter
         
     | 
| 
      
 81 
     | 
    
         
            +
                  @statistics[:queue_counter] = queue_counter
         
     | 
| 
      
 82 
     | 
    
         
            +
                  
         
     | 
| 
      
 83 
     | 
    
         
            +
                  total_length = @statistics[:total_length].to_i
         
     | 
| 
      
 84 
     | 
    
         
            +
                  @statistics[:total_length] = total_length + content[:length].to_i
         
     | 
| 
       83 
85 
     | 
    
         | 
| 
       84 
     | 
    
         
            -
             
     | 
| 
       85 
     | 
    
         
            -
             
     | 
| 
       86 
     | 
    
         
            -
             
     | 
| 
       87 
     | 
    
         
            -
             
     | 
| 
       88 
     | 
    
         
            -
             
     | 
| 
      
 86 
     | 
    
         
            +
                  mime_counts = {}
         
     | 
| 
      
 87 
     | 
    
         
            +
                  if @statistics.has_key? :mime_counts
         
     | 
| 
      
 88 
     | 
    
         
            +
                    mime_counts = @statistics[:mime_counts]
         
     | 
| 
      
 89 
     | 
    
         
            +
                    if mime_counts.has_key? content[:mime_type]
         
     | 
| 
      
 90 
     | 
    
         
            +
                      mime_counts[content[:mime_type]] += 1
         
     | 
| 
      
 91 
     | 
    
         
            +
                    else
         
     | 
| 
      
 92 
     | 
    
         
            +
                      mime_counts[content[:mime_type]] = 1
         
     | 
| 
      
 93 
     | 
    
         
            +
                    end
         
     | 
| 
       89 
94 
     | 
    
         
             
                  else
         
     | 
| 
       90 
     | 
    
         
            -
                    mime_counts 
     | 
| 
      
 95 
     | 
    
         
            +
                    mime_counts = {content[:mime_type] => 1}
         
     | 
| 
       91 
96 
     | 
    
         
             
                  end
         
     | 
| 
       92 
     | 
    
         
            -
             
     | 
| 
       93 
     | 
    
         
            -
                  mime_counts = {content[:mime_type] => 1}
         
     | 
| 
       94 
     | 
    
         
            -
                end
         
     | 
| 
       95 
     | 
    
         
            -
                @statistics[:mime_counts] = mime_counts.to_json
         
     | 
| 
      
 97 
     | 
    
         
            +
                  @statistics[:mime_counts] = mime_counts.to_json
         
     | 
| 
       96 
98 
     | 
    
         | 
| 
       97 
     | 
    
         
            -
             
     | 
| 
       98 
     | 
    
         
            -
             
     | 
| 
       99 
     | 
    
         
            -
             
     | 
| 
       100 
     | 
    
         
            -
             
     | 
| 
       101 
     | 
    
         
            -
             
     | 
| 
       102 
     | 
    
         
            -
             
     | 
| 
       103 
     | 
    
         
            -
             
     | 
| 
       104 
     | 
    
         
            -
             
     | 
| 
       105 
     | 
    
         
            -
             
     | 
| 
       106 
     | 
    
         
            -
             
     | 
| 
       107 
     | 
    
         
            -
             
     | 
| 
       108 
     | 
    
         
            -
             
     | 
| 
       109 
     | 
    
         
            -
             
     | 
| 
       110 
     | 
    
         
            -
             
     | 
| 
       111 
     | 
    
         
            -
             
     | 
| 
       112 
     | 
    
         
            -
             
     | 
| 
       113 
     | 
    
         
            -
             
     | 
| 
       114 
     | 
    
         
            -
             
     | 
| 
       115 
     | 
    
         
            -
             
     | 
| 
       116 
     | 
    
         
            -
             
     | 
| 
       117 
     | 
    
         
            -
             
     | 
| 
       118 
     | 
    
         
            -
             
     | 
| 
       119 
     | 
    
         
            -
             
     | 
| 
       120 
     | 
    
         
            -
             
     | 
| 
       121 
     | 
    
         
            -
             
     | 
| 
      
 99 
     | 
    
         
            +
                  # record mime categories stats
         
     | 
| 
      
 100 
     | 
    
         
            +
                  if content[:mime_type].cobweb_starts_with? "text"
         
     | 
| 
      
 101 
     | 
    
         
            +
                    increment_time_stat("mime_text_count")
         
     | 
| 
      
 102 
     | 
    
         
            +
                  elsif content[:mime_type].cobweb_starts_with? "application"
         
     | 
| 
      
 103 
     | 
    
         
            +
                    increment_time_stat("mime_application_count")
         
     | 
| 
      
 104 
     | 
    
         
            +
                  elsif content[:mime_type].cobweb_starts_with? "audio"
         
     | 
| 
      
 105 
     | 
    
         
            +
                    increment_time_stat("mime_audio_count")
         
     | 
| 
      
 106 
     | 
    
         
            +
                  elsif content[:mime_type].cobweb_starts_with? "image"
         
     | 
| 
      
 107 
     | 
    
         
            +
                    increment_time_stat("mime_image_count")
         
     | 
| 
      
 108 
     | 
    
         
            +
                  elsif content[:mime_type].cobweb_starts_with? "message"
         
     | 
| 
      
 109 
     | 
    
         
            +
                    increment_time_stat("mime_message_count")
         
     | 
| 
      
 110 
     | 
    
         
            +
                  elsif content[:mime_type].cobweb_starts_with? "model"
         
     | 
| 
      
 111 
     | 
    
         
            +
                    increment_time_stat("mime_model_count")
         
     | 
| 
      
 112 
     | 
    
         
            +
                  elsif content[:mime_type].cobweb_starts_with? "multipart"
         
     | 
| 
      
 113 
     | 
    
         
            +
                    increment_time_stat("mime_multipart_count")
         
     | 
| 
      
 114 
     | 
    
         
            +
                  elsif content[:mime_type].cobweb_starts_with? "video"
         
     | 
| 
      
 115 
     | 
    
         
            +
                    increment_time_stat("mime_video_count")
         
     | 
| 
      
 116 
     | 
    
         
            +
                  end
         
     | 
| 
      
 117 
     | 
    
         
            +
                  
         
     | 
| 
      
 118 
     | 
    
         
            +
                  status_counts = {}
         
     | 
| 
      
 119 
     | 
    
         
            +
                  if @statistics.has_key? :status_counts
         
     | 
| 
      
 120 
     | 
    
         
            +
                    status_counts = @statistics[:status_counts]
         
     | 
| 
      
 121 
     | 
    
         
            +
                    status_code = content[:status_code].to_i.to_s.to_sym
         
     | 
| 
      
 122 
     | 
    
         
            +
                    if status_counts.has_key? status_code
         
     | 
| 
      
 123 
     | 
    
         
            +
                      status_counts[status_code] += 1
         
     | 
| 
      
 124 
     | 
    
         
            +
                    else
         
     | 
| 
      
 125 
     | 
    
         
            +
                      status_counts[status_code] = 1
         
     | 
| 
      
 126 
     | 
    
         
            +
                    end      
         
     | 
| 
       122 
127 
     | 
    
         
             
                  else
         
     | 
| 
       123 
     | 
    
         
            -
                    status_counts 
     | 
| 
       124 
     | 
    
         
            -
                  end 
     | 
| 
       125 
     | 
    
         
            -
             
     | 
| 
       126 
     | 
    
         
            -
                   
     | 
| 
       127 
     | 
    
         
            -
             
     | 
| 
       128 
     | 
    
         
            -
             
     | 
| 
       129 
     | 
    
         
            -
             
     | 
| 
       130 
     | 
    
         
            -
             
     | 
| 
       131 
     | 
    
         
            -
                   
     | 
| 
       132 
     | 
    
         
            -
             
     | 
| 
       133 
     | 
    
         
            -
                   
     | 
| 
       134 
     | 
    
         
            -
             
     | 
| 
       135 
     | 
    
         
            -
                   
     | 
| 
       136 
     | 
    
         
            -
             
     | 
| 
       137 
     | 
    
         
            -
             
     | 
| 
       138 
     | 
    
         
            -
             
     | 
| 
       139 
     | 
    
         
            -
             
     | 
| 
       140 
     | 
    
         
            -
             
     | 
| 
       141 
     | 
    
         
            -
             
     | 
| 
       142 
     | 
    
         
            -
                
         
     | 
| 
       143 
     | 
    
         
            -
                redis_command = "@redis.hmset 'statistics', #{@statistics.keys.map{|key| "'#{key}', '#{@statistics[key].to_s.gsub("'","''")}'"}.join(", ")}"
         
     | 
| 
       144 
     | 
    
         
            -
                instance_eval redis_command
         
     | 
| 
       145 
     | 
    
         
            -
                
         
     | 
| 
      
 128 
     | 
    
         
            +
                    status_counts = {status_code => 1}
         
     | 
| 
      
 129 
     | 
    
         
            +
                  end
         
     | 
| 
      
 130 
     | 
    
         
            +
                  
         
     | 
| 
      
 131 
     | 
    
         
            +
                  # record statistics by status type
         
     | 
| 
      
 132 
     | 
    
         
            +
                  if content[:status_code] >= 200 && content[:status_code] < 300
         
     | 
| 
      
 133 
     | 
    
         
            +
                    increment_time_stat("status_200_count")
         
     | 
| 
      
 134 
     | 
    
         
            +
                  elsif content[:status_code] >= 400 && content[:status_code] < 500
         
     | 
| 
      
 135 
     | 
    
         
            +
                    increment_time_stat("status|_400_count")
         
     | 
| 
      
 136 
     | 
    
         
            +
                  elsif content[:status_code] >= 500 && content[:status_code] < 600
         
     | 
| 
      
 137 
     | 
    
         
            +
                    increment_time_stat("status|_500_count")
         
     | 
| 
      
 138 
     | 
    
         
            +
                  end
         
     | 
| 
      
 139 
     | 
    
         
            +
                  
         
     | 
| 
      
 140 
     | 
    
         
            +
                  @statistics[:status_counts] = status_counts.to_json
         
     | 
| 
      
 141 
     | 
    
         
            +
                  
         
     | 
| 
      
 142 
     | 
    
         
            +
                  ## time based statistics
         
     | 
| 
      
 143 
     | 
    
         
            +
                  increment_time_stat("minute_totals", "minute", 60)
         
     | 
| 
      
 144 
     | 
    
         
            +
                  
         
     | 
| 
      
 145 
     | 
    
         
            +
                  redis_command = "@redis.hmset 'statistics', #{@statistics.keys.map{|key| "'#{key}', '#{@statistics[key].to_s.gsub("'","''")}'"}.join(", ")}"
         
     | 
| 
      
 146 
     | 
    
         
            +
                  instance_eval redis_command
         
     | 
| 
      
 147 
     | 
    
         
            +
                }
         
     | 
| 
       146 
148 
     | 
    
         
             
                @statistics
         
     | 
| 
       147 
149 
     | 
    
         
             
              end
         
     | 
| 
       148 
150 
     | 
    
         | 
    
        data/spec/spec_helper.rb
    CHANGED
    
    | 
         @@ -12,16 +12,17 @@ RSpec.configure do |config| 
     | 
|
| 
       12 
12 
     | 
    
         
             
              if ENV["TRAVIS_RUBY_VERSION"] || ENV['CI']
         
     | 
| 
       13 
13 
     | 
    
         
             
                config.filter_run_excluding :local_only => true
         
     | 
| 
       14 
14 
     | 
    
         
             
              end
         
     | 
| 
      
 15 
     | 
    
         
            +
             
     | 
| 
      
 16 
     | 
    
         
            +
              Thread.new do
         
     | 
| 
      
 17 
     | 
    
         
            +
                @thin ||= Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
         
     | 
| 
      
 18 
     | 
    
         
            +
              end
         
     | 
| 
      
 19 
     | 
    
         
            +
             
     | 
| 
      
 20 
     | 
    
         
            +
              # WAIT FOR START TO COMPLETE
         
     | 
| 
      
 21 
     | 
    
         
            +
              sleep 1
         
     | 
| 
      
 22 
     | 
    
         
            +
             
     | 
| 
       15 
23 
     | 
    
         | 
| 
       16 
24 
     | 
    
         
             
              config.before(:all) {
         
     | 
| 
       17 
25 
     | 
    
         
             
                # START THIN SERVER TO HOST THE SAMPLE SITE FOR CRAWLING
         
     | 
| 
       18 
     | 
    
         
            -
                @thin = nil
         
     | 
| 
       19 
     | 
    
         
            -
                Thread.new do
         
     | 
| 
       20 
     | 
    
         
            -
                  @thin = Thin::Server.start("0.0.0.0", 3532, SampleServer.app)
         
     | 
| 
       21 
     | 
    
         
            -
                end
         
     | 
| 
       22 
     | 
    
         
            -
              
         
     | 
| 
       23 
     | 
    
         
            -
                # WAIT FOR START TO COMPLETE
         
     | 
| 
       24 
     | 
    
         
            -
                sleep 1
         
     | 
| 
       25 
26 
     | 
    
         
             
              }
         
     | 
| 
       26 
27 
     | 
    
         | 
| 
       27 
28 
     | 
    
         
             
              config.before(:each) {
         
     | 
    
        metadata
    CHANGED
    
    | 
         @@ -1,7 +1,7 @@ 
     | 
|
| 
       1 
1 
     | 
    
         
             
            --- !ruby/object:Gem::Specification
         
     | 
| 
       2 
2 
     | 
    
         
             
            name: cobweb
         
     | 
| 
       3 
3 
     | 
    
         
             
            version: !ruby/object:Gem::Version
         
     | 
| 
       4 
     | 
    
         
            -
              version: 1.0. 
     | 
| 
      
 4 
     | 
    
         
            +
              version: 1.0.8
         
     | 
| 
       5 
5 
     | 
    
         
             
              prerelease: 
         
     | 
| 
       6 
6 
     | 
    
         
             
            platform: ruby
         
     | 
| 
       7 
7 
     | 
    
         
             
            authors:
         
     | 
| 
         @@ -9,11 +9,11 @@ authors: 
     | 
|
| 
       9 
9 
     | 
    
         
             
            autorequire: 
         
     | 
| 
       10 
10 
     | 
    
         
             
            bindir: bin
         
     | 
| 
       11 
11 
     | 
    
         
             
            cert_chain: []
         
     | 
| 
       12 
     | 
    
         
            -
            date: 2013-02- 
     | 
| 
      
 12 
     | 
    
         
            +
            date: 2013-02-12 00:00:00.000000000 Z
         
     | 
| 
       13 
13 
     | 
    
         
             
            dependencies:
         
     | 
| 
       14 
14 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       15 
15 
     | 
    
         
             
              name: resque
         
     | 
| 
       16 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 16 
     | 
    
         
            +
              requirement: &70128767187740 !ruby/object:Gem::Requirement
         
     | 
| 
       17 
17 
     | 
    
         
             
                none: false
         
     | 
| 
       18 
18 
     | 
    
         
             
                requirements:
         
     | 
| 
       19 
19 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -21,10 +21,10 @@ dependencies: 
     | 
|
| 
       21 
21 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       22 
22 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       23 
23 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       24 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 24 
     | 
    
         
            +
              version_requirements: *70128767187740
         
     | 
| 
       25 
25 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       26 
26 
     | 
    
         
             
              name: redis
         
     | 
| 
       27 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 27 
     | 
    
         
            +
              requirement: &70128767183580 !ruby/object:Gem::Requirement
         
     | 
| 
       28 
28 
     | 
    
         
             
                none: false
         
     | 
| 
       29 
29 
     | 
    
         
             
                requirements:
         
     | 
| 
       30 
30 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -32,10 +32,10 @@ dependencies: 
     | 
|
| 
       32 
32 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       33 
33 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       34 
34 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       35 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 35 
     | 
    
         
            +
              version_requirements: *70128767183580
         
     | 
| 
       36 
36 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       37 
37 
     | 
    
         
             
              name: nokogiri
         
     | 
| 
       38 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 38 
     | 
    
         
            +
              requirement: &70128767182220 !ruby/object:Gem::Requirement
         
     | 
| 
       39 
39 
     | 
    
         
             
                none: false
         
     | 
| 
       40 
40 
     | 
    
         
             
                requirements:
         
     | 
| 
       41 
41 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -43,10 +43,10 @@ dependencies: 
     | 
|
| 
       43 
43 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       44 
44 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       45 
45 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       46 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 46 
     | 
    
         
            +
              version_requirements: *70128767182220
         
     | 
| 
       47 
47 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       48 
48 
     | 
    
         
             
              name: addressable
         
     | 
| 
       49 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 49 
     | 
    
         
            +
              requirement: &70128767175380 !ruby/object:Gem::Requirement
         
     | 
| 
       50 
50 
     | 
    
         
             
                none: false
         
     | 
| 
       51 
51 
     | 
    
         
             
                requirements:
         
     | 
| 
       52 
52 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -54,10 +54,10 @@ dependencies: 
     | 
|
| 
       54 
54 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       55 
55 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       56 
56 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       57 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 57 
     | 
    
         
            +
              version_requirements: *70128767175380
         
     | 
| 
       58 
58 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       59 
59 
     | 
    
         
             
              name: rspec
         
     | 
| 
       60 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 60 
     | 
    
         
            +
              requirement: &70128767172200 !ruby/object:Gem::Requirement
         
     | 
| 
       61 
61 
     | 
    
         
             
                none: false
         
     | 
| 
       62 
62 
     | 
    
         
             
                requirements:
         
     | 
| 
       63 
63 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -65,10 +65,10 @@ dependencies: 
     | 
|
| 
       65 
65 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       66 
66 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       67 
67 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       68 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 68 
     | 
    
         
            +
              version_requirements: *70128767172200
         
     | 
| 
       69 
69 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       70 
70 
     | 
    
         
             
              name: awesome_print
         
     | 
| 
       71 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 71 
     | 
    
         
            +
              requirement: &70128767170580 !ruby/object:Gem::Requirement
         
     | 
| 
       72 
72 
     | 
    
         
             
                none: false
         
     | 
| 
       73 
73 
     | 
    
         
             
                requirements:
         
     | 
| 
       74 
74 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -76,10 +76,10 @@ dependencies: 
     | 
|
| 
       76 
76 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       77 
77 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       78 
78 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       79 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 79 
     | 
    
         
            +
              version_requirements: *70128767170580
         
     | 
| 
       80 
80 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       81 
81 
     | 
    
         
             
              name: sinatra
         
     | 
| 
       82 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 82 
     | 
    
         
            +
              requirement: &70128767167800 !ruby/object:Gem::Requirement
         
     | 
| 
       83 
83 
     | 
    
         
             
                none: false
         
     | 
| 
       84 
84 
     | 
    
         
             
                requirements:
         
     | 
| 
       85 
85 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -87,10 +87,10 @@ dependencies: 
     | 
|
| 
       87 
87 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       88 
88 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       89 
89 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       90 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 90 
     | 
    
         
            +
              version_requirements: *70128767167800
         
     | 
| 
       91 
91 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       92 
92 
     | 
    
         
             
              name: thin
         
     | 
| 
       93 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 93 
     | 
    
         
            +
              requirement: &70128767161460 !ruby/object:Gem::Requirement
         
     | 
| 
       94 
94 
     | 
    
         
             
                none: false
         
     | 
| 
       95 
95 
     | 
    
         
             
                requirements:
         
     | 
| 
       96 
96 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -98,10 +98,10 @@ dependencies: 
     | 
|
| 
       98 
98 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       99 
99 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       100 
100 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       101 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 101 
     | 
    
         
            +
              version_requirements: *70128767161460
         
     | 
| 
       102 
102 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       103 
103 
     | 
    
         
             
              name: haml
         
     | 
| 
       104 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 104 
     | 
    
         
            +
              requirement: &70128767157640 !ruby/object:Gem::Requirement
         
     | 
| 
       105 
105 
     | 
    
         
             
                none: false
         
     | 
| 
       106 
106 
     | 
    
         
             
                requirements:
         
     | 
| 
       107 
107 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -109,10 +109,10 @@ dependencies: 
     | 
|
| 
       109 
109 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       110 
110 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       111 
111 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       112 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 112 
     | 
    
         
            +
              version_requirements: *70128767157640
         
     | 
| 
       113 
113 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       114 
114 
     | 
    
         
             
              name: namespaced_redis
         
     | 
| 
       115 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 115 
     | 
    
         
            +
              requirement: &70128767152300 !ruby/object:Gem::Requirement
         
     | 
| 
       116 
116 
     | 
    
         
             
                none: false
         
     | 
| 
       117 
117 
     | 
    
         
             
                requirements:
         
     | 
| 
       118 
118 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -120,10 +120,10 @@ dependencies: 
     | 
|
| 
       120 
120 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       121 
121 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       122 
122 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       123 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 123 
     | 
    
         
            +
              version_requirements: *70128767152300
         
     | 
| 
       124 
124 
     | 
    
         
             
            - !ruby/object:Gem::Dependency
         
     | 
| 
       125 
125 
     | 
    
         
             
              name: json
         
     | 
| 
       126 
     | 
    
         
            -
              requirement: & 
     | 
| 
      
 126 
     | 
    
         
            +
              requirement: &70128767148180 !ruby/object:Gem::Requirement
         
     | 
| 
       127 
127 
     | 
    
         
             
                none: false
         
     | 
| 
       128 
128 
     | 
    
         
             
                requirements:
         
     | 
| 
       129 
129 
     | 
    
         
             
                - - ! '>='
         
     | 
| 
         @@ -131,7 +131,7 @@ dependencies: 
     | 
|
| 
       131 
131 
     | 
    
         
             
                    version: '0'
         
     | 
| 
       132 
132 
     | 
    
         
             
              type: :runtime
         
     | 
| 
       133 
133 
     | 
    
         
             
              prerelease: false
         
     | 
| 
       134 
     | 
    
         
            -
              version_requirements: * 
     | 
| 
      
 134 
     | 
    
         
            +
              version_requirements: *70128767148180
         
     | 
| 
       135 
135 
     | 
    
         
             
            description: Cobweb is a web crawler that can use resque to cluster crawls to quickly
         
     | 
| 
       136 
136 
     | 
    
         
             
              crawl extremely large sites which is much more performant than multi-threaded crawlers.  It
         
     | 
| 
       137 
137 
     | 
    
         
             
              is also a standalone crawler that has a sophisticated statistics monitoring interface
         
     |