requestmanager 0.0.2 → 0.0.3
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/lib/requestmanager.rb +76 -22
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA1:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 48391942056d9ec893ed2e68d7ea93e07d1095e1
|
4
|
+
data.tar.gz: ac26cce67927dcd1e9fad3ca2991852d48f97d7b
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 63c284f54b0ce6e223918d6f054c9727991ca46d12ef4bd7dfc8fd65a52bc50897449348c261ed4976420c5a19b6b0c285b4d755860479b7943b1b6584850004
|
7
|
+
data.tar.gz: a2013b4535a16bd29a88aa650152106a5e5b8e73b73a015bf4b58950316a39e78d8a0e98bb9293db20b25952315e0e21c2aa2c0f08c2a2862ec36722b81e8d69
|
data/lib/requestmanager.rb
CHANGED
@@ -4,17 +4,78 @@ require 'pry'
|
|
4
4
|
|
5
5
|
|
6
6
|
class RequestManager
|
7
|
-
def initialize(proxy_list, request_interval)
|
7
|
+
def initialize(proxy_list, request_interval, browser_num)
|
8
8
|
@proxy_list = parse_proxy_list(proxy_list)
|
9
9
|
@request_interval = request_interval
|
10
|
-
@used_proxies =
|
10
|
+
@used_proxies = Array.new
|
11
|
+
@browser_num = browser_num
|
12
|
+
@browsers = Hash.new
|
13
|
+
open_n_browsers
|
14
|
+
end
|
15
|
+
|
16
|
+
# Open the specified number of browsers
|
17
|
+
def open_n_browsers
|
18
|
+
(1..@browser_num).each do |i|
|
19
|
+
open_browser
|
20
|
+
end
|
21
|
+
end
|
22
|
+
|
23
|
+
# Open the browser with a random proxy
|
24
|
+
def open_browser
|
25
|
+
chosen_proxy = @proxy_list != nil ? get_random_proxy : nil
|
26
|
+
@browsers[chosen_proxy] = [gen_driver(chosen_proxy), Time.now]
|
27
|
+
end
|
28
|
+
|
29
|
+
# Get the most recently used browser
|
30
|
+
def get_most_recent_browser
|
31
|
+
most_recent = @browsers.first
|
32
|
+
@browsers.each do |browser|
|
33
|
+
if browser[1][1] > most_recent[1][1]
|
34
|
+
most_recent = browser
|
35
|
+
end
|
36
|
+
end
|
37
|
+
|
38
|
+
return most_recent
|
39
|
+
end
|
40
|
+
|
41
|
+
# Get the least recently used browser
|
42
|
+
def get_least_recent_browser
|
43
|
+
least_recent = @browsers.first
|
44
|
+
@browsers.each do |browser|
|
45
|
+
if browser[1][1] < least_recent[1][1]
|
46
|
+
least_recent = browser
|
47
|
+
end
|
48
|
+
end
|
49
|
+
|
50
|
+
# Update the usage time
|
51
|
+
@browsers[least_recent[0]] = [least_recent[1][0], Time.now]
|
52
|
+
return least_recent[1][0]
|
53
|
+
end
|
54
|
+
|
55
|
+
# Restart the browser and open new one
|
56
|
+
def restart_browser
|
57
|
+
# Get most recently used browser and close it
|
58
|
+
close_browser = get_most_recent_browser
|
59
|
+
close_browser[1][0].quit
|
60
|
+
|
61
|
+
# Remove it from lists of used browsers and start new
|
62
|
+
@browsers.delete(close_browser[0])
|
63
|
+
open_browser
|
64
|
+
@used_proxies.delete(close_browser[0])
|
65
|
+
end
|
66
|
+
|
67
|
+
# Close all the browsers
|
68
|
+
def close_all_browsers
|
69
|
+
@browsers.each do |browser|
|
70
|
+
browser[1][0].quit
|
71
|
+
end
|
11
72
|
end
|
12
73
|
|
13
74
|
# Get the page requested
|
14
75
|
def get_page(url, form_input = nil)
|
15
|
-
|
16
|
-
|
17
|
-
|
76
|
+
# Get the page
|
77
|
+
browser = get_least_recent_browser
|
78
|
+
browser.navigate.to url
|
18
79
|
puts "Getting page " + url
|
19
80
|
|
20
81
|
# Handle form input if there is any
|
@@ -24,10 +85,9 @@ class RequestManager
|
|
24
85
|
element.submit
|
25
86
|
end
|
26
87
|
|
27
|
-
# Sleep while things load then save
|
28
|
-
sleep(
|
29
|
-
page_html =
|
30
|
-
driver.quit
|
88
|
+
# Sleep while things load then save output
|
89
|
+
sleep(rand(@request_interval[0]..@request_interval[1]))
|
90
|
+
page_html = browser.page_source
|
31
91
|
return page_html
|
32
92
|
end
|
33
93
|
|
@@ -49,27 +109,21 @@ class RequestManager
|
|
49
109
|
end
|
50
110
|
|
51
111
|
# Choose a random proxy that hasn't been used recently
|
52
|
-
def get_random_proxy
|
112
|
+
def get_random_proxy
|
53
113
|
max = @proxy_list.length
|
54
114
|
chosen = @proxy_list[Random.rand(max)]
|
55
|
-
|
115
|
+
chosen_proxy = chosen[0]+":"+chosen[1]
|
116
|
+
|
56
117
|
# Only use proxy if it hasn't been used in last n seconds on same host
|
57
|
-
if
|
58
|
-
@used_proxies
|
59
|
-
return
|
118
|
+
if !@used_proxies.include?(chosen_proxy)
|
119
|
+
@used_proxies.push(chosen_proxy)
|
120
|
+
return chosen_proxy
|
60
121
|
else
|
61
122
|
sleep(0.005)
|
62
|
-
get_random_proxy
|
123
|
+
get_random_proxy
|
63
124
|
end
|
64
125
|
end
|
65
126
|
|
66
|
-
# Checks if a proxy has been used on domain in the last 20 seconds
|
67
|
-
def is_not_used?(chosen, url)
|
68
|
-
return (!@used_proxies[chosen] ||
|
69
|
-
@used_proxies[chosen][0] <= Time.now-@request_interval[0] ||
|
70
|
-
@used_proxies[chosen][1] != URI.parse(url).host)
|
71
|
-
end
|
72
|
-
|
73
127
|
# Parse the proxy list
|
74
128
|
def parse_proxy_list(proxy_file)
|
75
129
|
if proxy_file
|
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: requestmanager
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.0.
|
4
|
+
version: 0.0.3
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- M. C. McGrath
|
8
8
|
autorequire:
|
9
9
|
bindir: bin
|
10
10
|
cert_chain: []
|
11
|
-
date: 2015-11-
|
11
|
+
date: 2015-11-03 00:00:00.000000000 Z
|
12
12
|
dependencies: []
|
13
13
|
description: Manages proxies, wait intervals, etc
|
14
14
|
email: shidash@shidash.com
|