arachnid 0.3.0 → 0.3.1

Sign up to get free protection for your applications and to get access to all the features.
Files changed (2) hide show
  1. data/lib/arachnid.rb +22 -1
  2. metadata +1 -1
data/lib/arachnid.rb CHANGED
@@ -15,6 +15,7 @@ class Arachnid
15
15
  @split_url_at_hash = options[:split_url_at_hash] ? options[:split_url_at_hash] : false
16
16
  @exclude_urls_with_hash = options[:exclude_urls_with_hash] ? options[:exclude_urls_with_hash] : false
17
17
  @exclude_urls_with_images = options[:exclude_urls_with_images] ? options[:exclude_urls_with_images] : false
18
+ @proxy_list = options[:proxy_list] ? options[:proxy_list] : nil
18
19
 
19
20
  @debug = options[:debug] ? options[:debug] : false
20
21
  end
@@ -26,6 +27,8 @@ class Arachnid
26
27
  #defaults to -1 so it will always keep running until it runs out of urls
27
28
  max_urls = options[:max_urls] ? options[:max_urls] : nil
28
29
 
30
+
31
+
29
32
  @hydra = Typhoeus::Hydra.new(:max_concurrency => threads)
30
33
  @global_visited = BloomFilter::Native.new(:size => 1000000, :hashes => 5, :seed => 1, :bucket => 8, :raise => false)
31
34
  @global_queue = []
@@ -38,7 +41,11 @@ class Arachnid
38
41
  temp_queue.each do |q|
39
42
 
40
43
  begin
41
- request = Typhoeus::Request.new(q, :timeout => 10000, :follow_location => true)
44
+ ip,port,user,pass = grab_proxy
45
+
46
+ request = Typhoeus::Request.new(q, :timeout => 10000, :follow_location => true) if ip == nil
47
+ request = Typhoeus::Request.new(q, :timeout => 10000, :follow_location => true, :proxy => "#{ip}:#{port}") if ip != nil && user == nil
48
+ request = Typhoeus::Request.new(q, :timeout => 10000, :follow_location => true, :proxy => "#{ip}:#{port}", :proxy_username => user, :proxy_password => pass) if user != nil
42
49
 
43
50
  request.on_complete do |response|
44
51
 
@@ -95,6 +102,20 @@ class Arachnid
95
102
  end
96
103
  end
97
104
 
105
+ def internal_link?(url, effective_url)
106
+
107
+ absolute_url = make_absolute(url, effective_url)
108
+
109
+ parsed_url = parse_domain(absolute_url)
110
+
111
+ def grab_proxy
112
+
113
+ return nil unless @proxy_list
114
+
115
+ return @proxy_list.sample.split(':')
116
+
117
+ end
118
+
98
119
  def internal_link?(url, effective_url)
99
120
 
100
121
  absolute_url = make_absolute(url, effective_url)
metadata CHANGED
@@ -1,7 +1,7 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: arachnid
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.3.0
4
+ version: 0.3.1
5
5
  prerelease:
6
6
  platform: ruby
7
7
  authors: