arachnid 0.2.0 → 0.2.1

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/arachnid.rb +3 -3
  2. metadata +1 -1
data/lib/arachnid.rb CHANGED
@@ -24,7 +24,7 @@ class Arachnid
24
24
  #defaults to 1 thread so people don't do a stupid amount of crawling on unsuspecting domains
25
25
  threads = options[:threads] ? options[:threads] : 1
26
26
  #defaults to -1 so it will always keep running until it runs out of urls
27
- max_urls = options[:max_urls] ? options[:max_urls] : -1
27
+ max_urls = options[:max_urls] ? options[:max_urls] : nil
28
28
 
29
29
  @hydra = Typhoeus::Hydra.new(:max_concurrency => threads)
30
30
  @global_visited = BloomFilter::Native.new(:size => 1000000, :hashes => 5, :seed => 1, :bucket => 8, :raise => false)
@@ -32,13 +32,13 @@ class Arachnid
32
32
 
33
33
  @global_queue << @start_url
34
34
 
35
- while(@global_queue.size > 0 && @global_visited.size != max_urls)
35
+ while(@global_queue.size > 0 && (max_urls && @global_visited.size.to_i < max_urls))
36
36
  temp_queue = @global_queue
37
37
 
38
38
  temp_queue.each do |q|
39
39
 
40
40
  begin
41
- request = Typhoeus::Request.new(q, :timeout => 10000)
41
+ request = Typhoeus::Request.new(q, :timeout => 10000, :follow_location => true)
42
42
 
43
43
  request.on_complete do |response|
44
44
 
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: arachnid
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.2.0
5
+ version: 0.2.1
6
6
  platform: ruby
7
7
  authors:
8
8
  - dchuk