requestmanager 0.0.2 → 0.0.3

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (3) hide show
  1. checksums.yaml +4 -4
  2. data/lib/requestmanager.rb +76 -22
  3. metadata +2 -2
checksums.yaml CHANGED
@@ -1,7 +1,7 @@
1
1
  ---
2
2
  SHA1:
3
- metadata.gz: 3f659f37925e9e2719610b340be45956537bd03b
4
- data.tar.gz: 42a0ec24a2c767b540494f2ee61c1cbf59a777f8
3
+ metadata.gz: 48391942056d9ec893ed2e68d7ea93e07d1095e1
4
+ data.tar.gz: ac26cce67927dcd1e9fad3ca2991852d48f97d7b
5
5
  SHA512:
6
- metadata.gz: 6b5207c4531a5dd3b717d5e12ce37d3c9e41296c156a2d40a2249c0baf46b4cf0e35d5f328783917868bcd5f8f9ba11585e38ab4001b290a67c5f802af7d30d0
7
- data.tar.gz: c748d9ab381d76ce132f51eb317829d23cd7d51267c9b273ff25386c4adb0660bb473156bbc5bddb1c6dfe8542cd4c663b04dde88b53f6fa4ad02e531d01e21a
6
+ metadata.gz: 63c284f54b0ce6e223918d6f054c9727991ca46d12ef4bd7dfc8fd65a52bc50897449348c261ed4976420c5a19b6b0c285b4d755860479b7943b1b6584850004
7
+ data.tar.gz: a2013b4535a16bd29a88aa650152106a5e5b8e73b73a015bf4b58950316a39e78d8a0e98bb9293db20b25952315e0e21c2aa2c0f08c2a2862ec36722b81e8d69
@@ -4,17 +4,78 @@ require 'pry'
4
4
 
5
5
 
6
6
  class RequestManager
7
- def initialize(proxy_list, request_interval)
7
+ def initialize(proxy_list, request_interval, browser_num)
8
8
  @proxy_list = parse_proxy_list(proxy_list)
9
9
  @request_interval = request_interval
10
- @used_proxies = Hash.new
10
+ @used_proxies = Array.new
11
+ @browser_num = browser_num
12
+ @browsers = Hash.new
13
+ open_n_browsers
14
+ end
15
+
16
+ # Open the specified number of browsers
17
+ def open_n_browsers
18
+ (1..@browser_num).each do |i|
19
+ open_browser
20
+ end
21
+ end
22
+
23
+ # Open the browser with a random proxy
24
+ def open_browser
25
+ chosen_proxy = @proxy_list != nil ? get_random_proxy : nil
26
+ @browsers[chosen_proxy] = [gen_driver(chosen_proxy), Time.now]
27
+ end
28
+
29
+ # Get the most recently used browser
30
+ def get_most_recent_browser
31
+ most_recent = @browsers.first
32
+ @browsers.each do |browser|
33
+ if browser[1][1] > most_recent[1][1]
34
+ most_recent = browser
35
+ end
36
+ end
37
+
38
+ return most_recent
39
+ end
40
+
41
+ # Get the least recently used browser
42
+ def get_least_recent_browser
43
+ least_recent = @browsers.first
44
+ @browsers.each do |browser|
45
+ if browser[1][1] < least_recent[1][1]
46
+ least_recent = browser
47
+ end
48
+ end
49
+
50
+ # Update the usage time
51
+ @browsers[least_recent[0]] = [least_recent[1][0], Time.now]
52
+ return least_recent[1][0]
53
+ end
54
+
55
+ # Restart the browser and open new one
56
+ def restart_browser
57
+ # Get most recently used browser and close it
58
+ close_browser = get_most_recent_browser
59
+ close_browser[1][0].quit
60
+
61
+ # Remove it from lists of used browsers and start new
62
+ @browsers.delete(close_browser[0])
63
+ open_browser
64
+ @used_proxies.delete(close_browser[0])
65
+ end
66
+
67
+ # Close all the browsers
68
+ def close_all_browsers
69
+ @browsers.each do |browser|
70
+ browser[1][0].quit
71
+ end
11
72
  end
12
73
 
13
74
  # Get the page requested
14
75
  def get_page(url, form_input = nil)
15
- chosen_proxy = @proxy_list != nil ? get_random_proxy(url) : nil
16
- driver = gen_driver(chosen_proxy)
17
- driver.navigate.to url
76
+ # Get the page
77
+ browser = get_least_recent_browser
78
+ browser.navigate.to url
18
79
  puts "Getting page " + url
19
80
 
20
81
  # Handle form input if there is any
@@ -24,10 +85,9 @@ class RequestManager
24
85
  element.submit
25
86
  end
26
87
 
27
- # Sleep while things load then save
28
- sleep(7)
29
- page_html = driver.page_source
30
- driver.quit
88
+ # Sleep while things load then save output
89
+ sleep(rand(@request_interval[0]..@request_interval[1]))
90
+ page_html = browser.page_source
31
91
  return page_html
32
92
  end
33
93
 
@@ -49,27 +109,21 @@ class RequestManager
49
109
  end
50
110
 
51
111
  # Choose a random proxy that hasn't been used recently
52
- def get_random_proxy(url)
112
+ def get_random_proxy
53
113
  max = @proxy_list.length
54
114
  chosen = @proxy_list[Random.rand(max)]
55
-
115
+ chosen_proxy = chosen[0]+":"+chosen[1]
116
+
56
117
  # Only use proxy if it hasn't been used in last n seconds on same host
57
- if is_not_used?(chosen, url)
58
- @used_proxies[chosen] = [Time.now, URI.parse(url).host]
59
- return chosen[0]+":"+chosen[1]
118
+ if !@used_proxies.include?(chosen_proxy)
119
+ @used_proxies.push(chosen_proxy)
120
+ return chosen_proxy
60
121
  else
61
122
  sleep(0.005)
62
- get_random_proxy(url)
123
+ get_random_proxy
63
124
  end
64
125
  end
65
126
 
66
- # Checks if a proxy has been used on domain in the last 20 seconds
67
- def is_not_used?(chosen, url)
68
- return (!@used_proxies[chosen] ||
69
- @used_proxies[chosen][0] <= Time.now-@request_interval[0] ||
70
- @used_proxies[chosen][1] != URI.parse(url).host)
71
- end
72
-
73
127
  # Parse the proxy list
74
128
  def parse_proxy_list(proxy_file)
75
129
  if proxy_file
metadata CHANGED
@@ -1,14 +1,14 @@
1
1
  --- !ruby/object:Gem::Specification
2
2
  name: requestmanager
3
3
  version: !ruby/object:Gem::Version
4
- version: 0.0.2
4
+ version: 0.0.3
5
5
  platform: ruby
6
6
  authors:
7
7
  - M. C. McGrath
8
8
  autorequire:
9
9
  bindir: bin
10
10
  cert_chain: []
11
- date: 2015-11-01 00:00:00.000000000 Z
11
+ date: 2015-11-03 00:00:00.000000000 Z
12
12
  dependencies: []
13
13
  description: Manages proxies, wait intervals, etc
14
14
  email: shidash@shidash.com