arachnid 0.1.3 → 0.2.0
Sign up to get free protection for your applications and to get access to all the features.
- data/lib/arachnid.rb +4 -1
- metadata +1 -1
data/lib/arachnid.rb
CHANGED
@@ -21,7 +21,10 @@ class Arachnid
|
|
21
21
|
|
22
22
|
def crawl(options = {})
|
23
23
|
|
24
|
+
#defaults to 1 thread so people don't do a stupid amount of crawling on unsuspecting domains
|
24
25
|
threads = options[:threads] ? options[:threads] : 1
|
26
|
+
#defaults to -1 so it will always keep running until it runs out of urls
|
27
|
+
max_urls = options[:max_urls] ? options[:max_urls] : -1
|
25
28
|
|
26
29
|
@hydra = Typhoeus::Hydra.new(:max_concurrency => threads)
|
27
30
|
@global_visited = BloomFilter::Native.new(:size => 1000000, :hashes => 5, :seed => 1, :bucket => 8, :raise => false)
|
@@ -29,7 +32,7 @@ class Arachnid
|
|
29
32
|
|
30
33
|
@global_queue << @start_url
|
31
34
|
|
32
|
-
while(@global_queue.size > 0)
|
35
|
+
while(@global_queue.size > 0 && @global_visited.size != max_urls)
|
33
36
|
temp_queue = @global_queue
|
34
37
|
|
35
38
|
temp_queue.each do |q|
|