arachnid 0.1.3 → 0.2.0

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/arachnid.rb +4 -1
  2. metadata +1 -1
data/lib/arachnid.rb CHANGED
@@ -21,7 +21,10 @@ class Arachnid
21
21
 
22
22
  def crawl(options = {})
23
23
 
24
+ #defaults to 1 thread so people don't do a stupid amount of crawling on unsuspecting domains
24
25
  threads = options[:threads] ? options[:threads] : 1
26
+ #defaults to -1 so it will always keep running until it runs out of urls
27
+ max_urls = options[:max_urls] ? options[:max_urls] : -1
25
28
 
26
29
  @hydra = Typhoeus::Hydra.new(:max_concurrency => threads)
27
30
  @global_visited = BloomFilter::Native.new(:size => 1000000, :hashes => 5, :seed => 1, :bucket => 8, :raise => false)
@@ -29,7 +32,7 @@ class Arachnid
29
32
 
30
33
  @global_queue << @start_url
31
34
 
32
- while(@global_queue.size > 0)
35
+ while(@global_queue.size > 0 && @global_visited.size != max_urls)
33
36
  temp_queue = @global_queue
34
37
 
35
38
  temp_queue.each do |q|
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: arachnid
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.3
5
+ version: 0.2.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - dchuk