requestmanager 0.0.2 → 0.0.3

Sign up to get free protection for your applications and to get access to all the features.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/requestmanager.rb +76 -22
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f659f37925e9e2719610b340be45956537bd03b
4
- data.tar.gz: 42a0ec24a2c767b540494f2ee61c1cbf59a777f8
3
+ metadata.gz: 48391942056d9ec893ed2e68d7ea93e07d1095e1
4
+ data.tar.gz: ac26cce67927dcd1e9fad3ca2991852d48f97d7b
5
5
  SHA512:
6
- metadata.gz: 6b5207c4531a5dd3b717d5e12ce37d3c9e41296c156a2d40a2249c0baf46b4cf0e35d5f328783917868bcd5f8f9ba11585e38ab4001b290a67c5f802af7d30d0
7
- data.tar.gz: c748d9ab381d76ce132f51eb317829d23cd7d51267c9b273ff25386c4adb0660bb473156bbc5bddb1c6dfe8542cd4c663b04dde88b53f6fa4ad02e531d01e21a
6
+ metadata.gz: 63c284f54b0ce6e223918d6f054c9727991ca46d12ef4bd7dfc8fd65a52bc50897449348c261ed4976420c5a19b6b0c285b4d755860479b7943b1b6584850004
7
+ data.tar.gz: a2013b4535a16bd29a88aa650152106a5e5b8e73b73a015bf4b58950316a39e78d8a0e98bb9293db20b25952315e0e21c2aa2c0f08c2a2862ec36722b81e8d69
@@ -4,17 +4,78 @@ require 'pry'
4
4
 
5
5
 
6
6
  class RequestManager
7
- def initialize(proxy_list, request_interval)
7
+ def initialize(proxy_list, request_interval, browser_num)
8
8
  @proxy_list = parse_proxy_list(proxy_list)
9
9
  @request_interval = request_interval
10
- @used_proxies = Hash.new
10
+ @used_proxies = Array.new
11
+ @browser_num = browser_num
12
+ @browsers = Hash.new
13
+ open_n_browsers
14
+ end
15
+
16
+ # Open the specified number of browsers
17
+ def open_n_browsers
18
+ (1..@browser_num).each do |i|
19
+ open_browser
20
+ end
21
+ end
22
+
23
+ # Open the browser with a random proxy
24
+ def open_browser
25
+ chosen_proxy = @proxy_list != nil ? get_random_proxy : nil
26
+ @browsers[chosen_proxy] = [gen_driver(chosen_proxy), Time.now]
27
+ end
28
+
29
+ # Get the most recently used browser
30
+ def get_most_recent_browser
31
+ most_recent = @browsers.first
32
+ @browsers.each do |browser|
33
+ if browser[1][1] > most_recent[1][1]
34
+ most_recent = browser
35
+ end
36
+ end
37
+
38
+ return most_recent
39
+ end
40
+
41
+ # Get the least recently used browser
42
+ def get_least_recent_browser
43
+ least_recent = @browsers.first
44
+ @browsers.each do |browser|
45
+ if browser[1][1] < least_recent[1][1]
46
+ least_recent = browser
47
+ end
48
+ end
49
+
50
+ # Update the usage time
51
+ @browsers[least_recent[0]] = [least_recent[1][0], Time.now]
52
+ return least_recent[1][0]
53
+ end
54
+
55
+ # Restart the browser and open new one
56
+ def restart_browser
57
+ # Get most recently used browser and close it
58
+ close_browser = get_most_recent_browser
59
+ close_browser[1][0].quit
60
+
61
+ # Remove it from lists of used browsers and start new
62
+ @browsers.delete(close_browser[0])
63
+ open_browser
64
+ @used_proxies.delete(close_browser[0])
65
+ end
66
+
67
+ # Close all the browsers
68
+ def close_all_browsers
69
+ @browsers.each do |browser|
70
+ browser[1][0].quit
71
+ end
11
72
  end
12
73
 
13
74
  # Get the page requested
14
75
  def get_page(url, form_input = nil)
15
- chosen_proxy = @proxy_list != nil ? get_random_proxy(url) : nil
16
- driver = gen_driver(chosen_proxy)
17
- driver.navigate.to url
76
+ # Get the page
77
+ browser = get_least_recent_browser
78
+ browser.navigate.to url
18
79
  puts "Getting page " + url
19
80
 
20
81
  # Handle form input if there is any
@@ -24,10 +85,9 @@ class RequestManager
24
85
  element.submit
25
86
  end
26
87
 
27
- # Sleep while things load then save
28
- sleep(7)
29
- page_html = driver.page_source
30
- driver.quit
88
+ # Sleep while things load then save output
89
+ sleep(rand(@request_interval[0]..@request_interval[1]))
90
+ page_html = browser.page_source
31
91
  return page_html
32
92
  end
33
93
 
@@ -49,27 +109,21 @@ class RequestManager
49
109
  end
50
110
 
51
111
  # Choose a random proxy that hasn't been used recently
52
- def get_random_proxy(url)
112
+ def get_random_proxy
53
113
  max = @proxy_list.length
54
114
  chosen = @proxy_list[Random.rand(max)]
55
-
115
+ chosen_proxy = chosen[0]+":"+chosen[1]
116
+
56
117
  # Only use proxy if it hasn't been used in last n seconds on same host
57
- if is_not_used?(chosen, url)
58
- @used_proxies[chosen] = [Time.now, URI.parse(url).host]
59
- return chosen[0]+":"+chosen[1]
118
+ if !@used_proxies.include?(chosen_proxy)
119
+ @used_proxies.push(chosen_proxy)
120
+ return chosen_proxy
60
121
  else
61
122
  sleep(0.005)
62
- get_random_proxy(url)
123
+ get_random_proxy
63
124
  end
64
125
  end
65
126
 
66
- # Checks if a proxy has been used on domain in the last 20 seconds
67
- def is_not_used?(chosen, url)
68
- return (!@used_proxies[chosen] ||
69
- @used_proxies[chosen][0] <= Time.now-@request_interval[0] ||
70
- @used_proxies[chosen][1] != URI.parse(url).host)
71
- end
72
-
73
127
  # Parse the proxy list
74
128
  def parse_proxy_list(proxy_file)
75
129
  if proxy_file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: requestmanager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-01 00:00:00.000000000 Z
11
+ date: 2015-11-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Manages proxies, wait intervals, etc
14
14
  email: shidash@shidash.com