requestmanager 0.0.2 → 0.0.3
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/lib/requestmanager.rb +76 -22
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48391942056d9ec893ed2e68d7ea93e07d1095e1
|
4
|
+
data.tar.gz: ac26cce67927dcd1e9fad3ca2991852d48f97d7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63c284f54b0ce6e223918d6f054c9727991ca46d12ef4bd7dfc8fd65a52bc50897449348c261ed4976420c5a19b6b0c285b4d755860479b7943b1b6584850004
|
7
|
+
data.tar.gz: a2013b4535a16bd29a88aa650152106a5e5b8e73b73a015bf4b58950316a39e78d8a0e98bb9293db20b25952315e0e21c2aa2c0f08c2a2862ec36722b81e8d69
|
data/lib/requestmanager.rb
CHANGED
@@ -4,17 +4,78 @@ require 'pry'
|
|
4
4
|
|
5
5
|
|
6
6
|
class RequestManager
|
7
|
-
def initialize(proxy_list, request_interval)
|
7
|
+
def initialize(proxy_list, request_interval, browser_num)
|
8
8
|
@proxy_list = parse_proxy_list(proxy_list)
|
9
9
|
@request_interval = request_interval
|
10
|
-
@used_proxies =
|
10
|
+
@used_proxies = Array.new
|
11
|
+
@browser_num = browser_num
|
12
|
+
@browsers = Hash.new
|
13
|
+
open_n_browsers
|
14
|
+
end
|
15
|
+
|
16
|
+
# Open the specified number of browsers
|
17
|
+
def open_n_browsers
|
18
|
+
(1..@browser_num).each do |i|
|
19
|
+
open_browser
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Open the browser with a random proxy
|
24
|
+
def open_browser
|
25
|
+
chosen_proxy = @proxy_list != nil ? get_random_proxy : nil
|
26
|
+
@browsers[chosen_proxy] = [gen_driver(chosen_proxy), Time.now]
|
27
|
+
end
|
28
|
+
|
29
|
+
# Get the most recently used browser
|
30
|
+
def get_most_recent_browser
|
31
|
+
most_recent = @browsers.first
|
32
|
+
@browsers.each do |browser|
|
33
|
+
if browser[1][1] > most_recent[1][1]
|
34
|
+
most_recent = browser
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return most_recent
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get the least recently used browser
|
42
|
+
def get_least_recent_browser
|
43
|
+
least_recent = @browsers.first
|
44
|
+
@browsers.each do |browser|
|
45
|
+
if browser[1][1] < least_recent[1][1]
|
46
|
+
least_recent = browser
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Update the usage time
|
51
|
+
@browsers[least_recent[0]] = [least_recent[1][0], Time.now]
|
52
|
+
return least_recent[1][0]
|
53
|
+
end
|
54
|
+
|
55
|
+
# Restart the browser and open new one
|
56
|
+
def restart_browser
|
57
|
+
# Get most recently used browser and close it
|
58
|
+
close_browser = get_most_recent_browser
|
59
|
+
close_browser[1][0].quit
|
60
|
+
|
61
|
+
# Remove it from lists of used browsers and start new
|
62
|
+
@browsers.delete(close_browser[0])
|
63
|
+
open_browser
|
64
|
+
@used_proxies.delete(close_browser[0])
|
65
|
+
end
|
66
|
+
|
67
|
+
# Close all the browsers
|
68
|
+
def close_all_browsers
|
69
|
+
@browsers.each do |browser|
|
70
|
+
browser[1][0].quit
|
71
|
+
end
|
11
72
|
end
|
12
73
|
|
13
74
|
# Get the page requested
|
14
75
|
def get_page(url, form_input = nil)
|
15
|
-
|
16
|
-
|
17
|
-
|
76
|
+
# Get the page
|
77
|
+
browser = get_least_recent_browser
|
78
|
+
browser.navigate.to url
|
18
79
|
puts "Getting page " + url
|
19
80
|
|
20
81
|
# Handle form input if there is any
|
@@ -24,10 +85,9 @@ class RequestManager
|
|
24
85
|
element.submit
|
25
86
|
end
|
26
87
|
|
27
|
-
# Sleep while things load then save
|
28
|
-
sleep(
|
29
|
-
page_html =
|
30
|
-
driver.quit
|
88
|
+
# Sleep while things load then save output
|
89
|
+
sleep(rand(@request_interval[0]..@request_interval[1]))
|
90
|
+
page_html = browser.page_source
|
31
91
|
return page_html
|
32
92
|
end
|
33
93
|
|
@@ -49,27 +109,21 @@ class RequestManager
|
|
49
109
|
end
|
50
110
|
|
51
111
|
# Choose a random proxy that hasn't been used recently
|
52
|
-
def get_random_proxy
|
112
|
+
def get_random_proxy
|
53
113
|
max = @proxy_list.length
|
54
114
|
chosen = @proxy_list[Random.rand(max)]
|
55
|
-
|
115
|
+
chosen_proxy = chosen[0]+":"+chosen[1]
|
116
|
+
|
56
117
|
# Only use proxy if it hasn't been used in last n seconds on same host
|
57
|
-
if
|
58
|
-
@used_proxies
|
59
|
-
return
|
118
|
+
if !@used_proxies.include?(chosen_proxy)
|
119
|
+
@used_proxies.push(chosen_proxy)
|
120
|
+
return chosen_proxy
|
60
121
|
else
|
61
122
|
sleep(0.005)
|
62
|
-
get_random_proxy
|
123
|
+
get_random_proxy
|
63
124
|
end
|
64
125
|
end
|
65
126
|
|
66
|
-
# Checks if a proxy has been used on domain in the last 20 seconds
|
67
|
-
def is_not_used?(chosen, url)
|
68
|
-
return (!@used_proxies[chosen] ||
|
69
|
-
@used_proxies[chosen][0] <= Time.now-@request_interval[0] ||
|
70
|
-
@used_proxies[chosen][1] != URI.parse(url).host)
|
71
|
-
end
|
72
|
-
|
73
127
|
# Parse the proxy list
|
74
128
|
def parse_proxy_list(proxy_file)
|
75
129
|
if proxy_file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: requestmanager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Manages proxies, wait intervals, etc
|
14
14
|
email: shidash@shidash.com
|