arachnid 0.1.3 → 0.2.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (2) hide show
  1. data/lib/arachnid.rb +4 -1
  2. metadata +1 -1
data/lib/arachnid.rb CHANGED
@@ -21,7 +21,10 @@ class Arachnid
21
21
 
22
22
  def crawl(options = {})
23
23
 
24
+ #defaults to 1 thread so people don't do a stupid amount of crawling on unsuspecting domains
24
25
  threads = options[:threads] ? options[:threads] : 1
26
+ #defaults to -1 so it will always keep running until it runs out of urls
27
+ max_urls = options[:max_urls] ? options[:max_urls] : -1
25
28
 
26
29
  @hydra = Typhoeus::Hydra.new(:max_concurrency => threads)
27
30
  @global_visited = BloomFilter::Native.new(:size => 1000000, :hashes => 5, :seed => 1, :bucket => 8, :raise => false)
@@ -29,7 +32,7 @@ class Arachnid
29
32
 
30
33
  @global_queue << @start_url
31
34
 
32
- while(@global_queue.size > 0)
35
+ while(@global_queue.size > 0 && @global_visited.size != max_urls)
33
36
  temp_queue = @global_queue
34
37
 
35
38
  temp_queue.each do |q|
metadata CHANGED
@@ -2,7 +2,7 @@
2
2
  name: arachnid
3
3
  version: !ruby/object:Gem::Version
4
4
  prerelease:
5
- version: 0.1.3
5
+ version: 0.2.0
6
6
  platform: ruby
7
7
  authors:
8
8
  - dchuk