arachnid 0.2.0 → 0.2.1
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/arachnid.rb +3 -3
- metadata +1 -1
data/lib/arachnid.rb
CHANGED
@@ -24,7 +24,7 @@ class Arachnid
|
|
24
24
|
#defaults to 1 thread so people don't do a stupid amount of crawling on unsuspecting domains
|
25
25
|
threads = options[:threads] ? options[:threads] : 1
|
26
26
|
#defaults to -1 so it will always keep running until it runs out of urls
|
27
|
-
max_urls = options[:max_urls] ? options[:max_urls] :
|
27
|
+
max_urls = options[:max_urls] ? options[:max_urls] : nil
|
28
28
|
|
29
29
|
@hydra = Typhoeus::Hydra.new(:max_concurrency => threads)
|
30
30
|
@global_visited = BloomFilter::Native.new(:size => 1000000, :hashes => 5, :seed => 1, :bucket => 8, :raise => false)
|
@@ -32,13 +32,13 @@ class Arachnid
|
|
32
32
|
|
33
33
|
@global_queue << @start_url
|
34
34
|
|
35
|
-
while(@global_queue.size > 0 && @global_visited.size
|
35
|
+
while(@global_queue.size > 0 && (max_urls && @global_visited.size.to_i < max_urls))
|
36
36
|
temp_queue = @global_queue
|
37
37
|
|
38
38
|
temp_queue.each do |q|
|
39
39
|
|
40
40
|
begin
|
41
|
-
request = Typhoeus::Request.new(q, :timeout => 10000)
|
41
|
+
request = Typhoeus::Request.new(q, :timeout => 10000, :follow_location => true)
|
42
42
|
|
43
43
|
request.on_complete do |response|
|
44
44
|
|