browsed 0.1.8 → 0.1.9
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/browsed/chrome.rb +1 -1
- data/lib/browsed/client.rb +34 -21
- data/lib/browsed/firefox.rb +12 -5
- data/lib/browsed/manager.rb +69 -61
- data/lib/browsed/poltergeist.rb +2 -2
- data/lib/browsed/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ac593ddda39f90bf445f8e858cd4a05a3b08cf997f33b1ee448f9791127148c
|
4
|
+
data.tar.gz: e79c1d0d006db49e81b872706868ca00d5ddc087b52738e19a863df33c598cf3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bc94644afe6938c6df5ce3e3ad53a7c99b93acc52334fab62a1a03c0913551071a07aa3525ff400d97180bc508ac7cbdff645ca2c9e41ff66ba0913769240aa
|
7
|
+
data.tar.gz: 80db44c4e2bae419e34e5aba0c28d597a55d1204ce3d2c0cce1fbda40204bfd959fb0180e4b8b74a6284a4f5bd2fcad2d5536c7f24405018a6c5229e2f3c1dde
|
data/Gemfile.lock
CHANGED
data/lib/browsed/chrome.rb
CHANGED
@@ -2,7 +2,7 @@ module Browsed
|
|
2
2
|
module Chrome
|
3
3
|
|
4
4
|
private
|
5
|
-
def register_chrome_driver(
|
5
|
+
def register_chrome_driver(options: {}, timeout: 60, debug: false)
|
6
6
|
profile = Selenium::WebDriver::Chrome::Profile.new
|
7
7
|
|
8
8
|
profile["user-agent"] = self.user_agent unless self.user_agent.to_s.empty?
|
data/lib/browsed/client.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
module Browsed
|
2
2
|
class Client
|
3
3
|
attr_accessor :configuration
|
4
|
+
attr_accessor :driver, :browser, :browser_id, :environment
|
5
|
+
attr_accessor :session, :proxy
|
6
|
+
attr_accessor :device, :user_agent, :resolution
|
4
7
|
attr_accessor :manager, :maximum_processes
|
5
|
-
attr_accessor :driver, :browser, :environment
|
6
|
-
attr_accessor :session
|
7
|
-
attr_accessor :device, :proxy
|
8
|
-
attr_accessor :user_agent
|
9
|
-
attr_accessor :resolution
|
10
8
|
|
11
9
|
include Capybara::DSL
|
12
10
|
|
@@ -18,7 +16,7 @@ module Browsed
|
|
18
16
|
user_agent: nil,
|
19
17
|
resolution: nil,
|
20
18
|
environment: :production,
|
21
|
-
|
19
|
+
options: {},
|
22
20
|
maximum_processes: nil)
|
23
21
|
|
24
22
|
self.configuration = configuration
|
@@ -27,34 +25,36 @@ module Browsed
|
|
27
25
|
self.browser = browser || self.configuration.browser
|
28
26
|
self.environment = environment || self.configuration.environment
|
29
27
|
|
28
|
+
self.browser_id = generate_browser_id
|
30
29
|
self.device = device
|
31
30
|
self.proxy = proxy
|
32
31
|
|
33
|
-
self.manager = Browsed::Manager.new(browser: self.browser)
|
32
|
+
self.manager = ::Browsed::Manager.new(browser: self.browser)
|
34
33
|
self.maximum_processes = maximum_processes || self.configuration.maximum_processes
|
35
34
|
|
36
35
|
set_user_agent(user_agent)
|
37
36
|
set_resolution(resolution)
|
38
37
|
|
39
|
-
|
38
|
+
options.merge!(browser_id: self.browser_id)
|
39
|
+
setup_capybara(options: options)
|
40
40
|
end
|
41
41
|
|
42
42
|
include ::Browsed::Poltergeist
|
43
43
|
include ::Browsed::Firefox
|
44
44
|
include ::Browsed::Chrome
|
45
45
|
|
46
|
-
def setup_capybara(
|
46
|
+
def setup_capybara(options: {}, retries: 3)
|
47
47
|
if can_start_new_process?
|
48
|
-
register_driver!(
|
48
|
+
register_driver!(options)
|
49
49
|
|
50
50
|
Capybara.default_driver = self.driver
|
51
51
|
Capybara.javascript_driver = self.driver
|
52
52
|
|
53
|
-
Capybara.default_max_wait_time =
|
53
|
+
Capybara.default_max_wait_time = options.fetch(:wait_time, 30) #seconds
|
54
54
|
|
55
55
|
self.session = Capybara::Session.new(self.driver)
|
56
56
|
else
|
57
|
-
raise Browsed::TooManyProcessesError, "Too many
|
57
|
+
raise Browsed::TooManyProcessesError, "Too many #{self.browser} processes running, reached maximum allowed number of #{self.maximum_processes} processes."
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
@@ -78,25 +78,38 @@ module Browsed
|
|
78
78
|
def reset_session!
|
79
79
|
self.session.reset_session!
|
80
80
|
end
|
81
|
+
|
82
|
+
def generate_browser_id
|
83
|
+
SecureRandom.hex[0..15]
|
84
|
+
end
|
81
85
|
|
82
|
-
def
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
+
def quit!(retries: 3)
|
87
|
+
begin
|
88
|
+
self.session.driver.quit
|
89
|
+
rescue Exception
|
90
|
+
retries -= 1
|
91
|
+
retry if retries > 0
|
92
|
+
end
|
93
|
+
|
94
|
+
# If Selenium/Phantom somehow isn't able to shut down the browser, force a shutdown using kill -9
|
95
|
+
self.manager.set_command(browser_id: self.browser_id)
|
96
|
+
self.manager.kill_processes!
|
97
|
+
|
98
|
+
self.session = nil
|
86
99
|
end
|
87
100
|
|
88
101
|
private
|
89
|
-
def register_driver!(
|
102
|
+
def register_driver!(options = {})
|
90
103
|
if poltergeist?
|
91
|
-
register_poltergeist_driver(
|
104
|
+
register_poltergeist_driver(options: options)
|
92
105
|
elsif selenium?
|
93
106
|
if firefox_browser?
|
94
|
-
register_firefox_driver(
|
107
|
+
register_firefox_driver(options: options)
|
95
108
|
elsif firefox_headless_browser?
|
96
|
-
register_firefox_driver(
|
109
|
+
register_firefox_driver(options: options.merge(headless: true))
|
97
110
|
elsif chrome_browser?
|
98
111
|
self.driver = :selenium_chrome
|
99
|
-
register_chrome_driver(
|
112
|
+
register_chrome_driver(options: options)
|
100
113
|
end
|
101
114
|
end
|
102
115
|
end
|
data/lib/browsed/firefox.rb
CHANGED
@@ -2,10 +2,11 @@ module Browsed
|
|
2
2
|
module Firefox
|
3
3
|
|
4
4
|
private
|
5
|
-
def register_firefox_driver(
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def register_firefox_driver(options: {}, timeout: 60, debug: false)
|
6
|
+
browser_id = options.fetch(:browser_id, nil)
|
7
|
+
headless = options.fetch(:headless, false)
|
8
|
+
download_path = options.fetch(:download_path, self.configuration.download_path)
|
9
|
+
private_browsing = options.fetch(:private_browsing, false)
|
9
10
|
|
10
11
|
profile = Selenium::WebDriver::Firefox::Profile.new
|
11
12
|
|
@@ -22,15 +23,21 @@ module Browsed
|
|
22
23
|
profile["pdfjs.disabled"] = true
|
23
24
|
end
|
24
25
|
|
26
|
+
profile["browser.tabs.warnOnClose"] = false
|
27
|
+
profile["browser.tabs.warnOnCloseOtherTabs"] = false
|
25
28
|
profile["general.useragent.override"] = self.user_agent unless self.user_agent.to_s.empty?
|
26
29
|
|
30
|
+
id = SecureRandom.hex[0..15]
|
27
31
|
profile = firefox_proxy_options(profile)
|
28
32
|
options = Selenium::WebDriver::Firefox::Options.new(profile: profile)
|
29
|
-
options.args << "--
|
33
|
+
options.args << "--browser_id=#{id}" unless browser_id.to_s.empty?
|
34
|
+
options.args << "--headless" if headless
|
30
35
|
|
31
36
|
Capybara.register_driver self.driver do |app|
|
32
37
|
Capybara::Selenium::Driver.new(app, browser: :firefox, options: options)
|
33
38
|
end
|
39
|
+
|
40
|
+
return id
|
34
41
|
end
|
35
42
|
|
36
43
|
def firefox_proxy_options(profile)
|
data/lib/browsed/manager.rb
CHANGED
@@ -1,46 +1,35 @@
|
|
1
1
|
module Browsed
|
2
2
|
class Manager
|
3
|
-
attr_accessor :command, :kill_signal
|
3
|
+
attr_accessor :command, :kill_signal, :logging
|
4
4
|
|
5
|
-
def initialize(browser: :phantomjs, kill_signal: 9)
|
6
|
-
|
7
|
-
when :phantomjs
|
8
|
-
self.command = "ps -ef | grep /[p]hantomjs"
|
9
|
-
when :firefox
|
10
|
-
self.command = "ps -ef | grep /[g]eckodriver"
|
11
|
-
when :chrome
|
12
|
-
self.command = "ps -ef | grep /[c]hromedriver"
|
13
|
-
else
|
14
|
-
self.command = "ps -ef | grep /[p]hantomjs"
|
15
|
-
end
|
5
|
+
def initialize(command: nil, browser_id: nil, browser: :phantomjs, kill_signal: 9, logging: false)
|
6
|
+
set_command(command: command, browser_id: browser_id, browser: browser)
|
16
7
|
|
17
8
|
self.kill_signal = kill_signal
|
9
|
+
self.logging = logging
|
18
10
|
end
|
19
11
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
12
|
+
def set_command(command: nil, browser_id: nil, browser: :phantomjs)
|
13
|
+
if !command.to_s.empty?
|
14
|
+
self.command = command
|
15
|
+
elsif !browser_id.to_s.empty?
|
16
|
+
self.command = "ps aux | awk '/--browser_id=#{browser_id}/'"
|
17
|
+
else
|
18
|
+
case browser
|
19
|
+
when :phantomjs
|
20
|
+
self.command = "ps -ef | grep /[p]hantomjs"
|
21
|
+
when :firefox
|
22
|
+
self.command = "ps -ef | grep /[f]irefox-bin"
|
23
|
+
when :chrome
|
24
|
+
self.command = "ps -ef | grep /[c]hromedriver"
|
25
|
+
else
|
26
|
+
self.command = "ps -ef | grep /[p]hantomjs"
|
31
27
|
end
|
32
|
-
end
|
28
|
+
end
|
33
29
|
end
|
34
30
|
|
35
|
-
def
|
36
|
-
|
37
|
-
|
38
|
-
begin
|
39
|
-
::Process.kill(self.kill_signal, process[:pid])
|
40
|
-
|
41
|
-
rescue StandardError => e
|
42
|
-
info "[Browsed::Manager] - #{Time.now.to_s(:db)}: Failed to kill process with pid '#{process[:pid]}'. Error Class: #{e.class.name}. Error Message: #{e.message}"
|
43
|
-
end
|
31
|
+
def can_start_more_processes?(max_count: nil)
|
32
|
+
return max_count.nil? || get_current_processes.size < max_count
|
44
33
|
end
|
45
34
|
|
46
35
|
def get_current_processes
|
@@ -55,44 +44,63 @@ module Browsed
|
|
55
44
|
return processes
|
56
45
|
end
|
57
46
|
|
58
|
-
|
47
|
+
def kill_processes!(started_after: nil)
|
48
|
+
processes = get_current_processes
|
49
|
+
|
50
|
+
processes.each do |process|
|
51
|
+
killable = started_after.nil? || (process[:date] && process[:date] < (Time.now - started_after))
|
52
|
+
kill_process!(process) if killable
|
53
|
+
end if processes && processes.any?
|
54
|
+
end
|
59
55
|
|
60
|
-
def
|
61
|
-
process
|
56
|
+
def kill_process!(process)
|
57
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Killing process with PID #{process[:pid]} matching command #{self.command}."
|
62
58
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
59
|
+
begin
|
60
|
+
::Process.kill(self.kill_signal, process[:pid])
|
61
|
+
|
62
|
+
rescue StandardError => e
|
63
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Failed to kill process with pid '#{process[:pid]}'. Error Class: #{e.class.name}. Error Message: #{e.message}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def parse_process(process_data)
|
69
|
+
process = {}
|
67
70
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
+
parts = process_data.split(' ')
|
72
|
+
pid = parts[1].to_i
|
73
|
+
started = parts[4].to_s
|
74
|
+
date = parse_date(started)
|
71
75
|
|
72
|
-
|
76
|
+
process[:pid] = pid
|
77
|
+
process[:started] = started
|
78
|
+
process[:date] = date
|
73
79
|
|
74
|
-
|
75
|
-
|
80
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Pid: #{pid}. Started: #{started}. Date: #{date}.\n"
|
81
|
+
|
82
|
+
return process
|
83
|
+
end
|
76
84
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
85
|
+
def parse_date(date, retries = 3)
|
86
|
+
begin
|
87
|
+
if (!(date =~ /^[a-z]{3,4}\d*/i).nil?) #Sep16
|
88
|
+
parsed_date = DateTime.strptime(date, "%b%d")
|
81
89
|
|
82
|
-
|
83
|
-
|
84
|
-
|
90
|
+
elsif (!(date =~ /^\d*:\d*/i).nil?) #11:34
|
91
|
+
parsed_date = Time.strptime(date, "%H:%M").to_datetime
|
92
|
+
end
|
85
93
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
94
|
+
rescue StandardError => e
|
95
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Exception occurred while trying to parse date/time string '#{date}'. Error Class: #{e.class.name}. Error: #{e.message}."
|
96
|
+
retries -= 1
|
97
|
+
retry if retries > 0
|
98
|
+
end
|
90
99
|
end
|
91
|
-
end
|
92
100
|
|
93
|
-
|
94
|
-
|
95
|
-
|
101
|
+
def info(message)
|
102
|
+
puts message if self.logging
|
103
|
+
end
|
96
104
|
|
97
105
|
end
|
98
106
|
end
|
data/lib/browsed/poltergeist.rb
CHANGED
@@ -2,9 +2,9 @@ module Browsed
|
|
2
2
|
module Poltergeist
|
3
3
|
|
4
4
|
private
|
5
|
-
def register_poltergeist_driver(
|
5
|
+
def register_poltergeist_driver(options: {}, timeout: 60, debug: false)
|
6
6
|
phantom_opts = ['--ignore-ssl-errors=true', '--ssl-protocol=any']
|
7
|
-
disable_images =
|
7
|
+
disable_images = options.fetch(:disable_images, false)
|
8
8
|
|
9
9
|
if disable_images
|
10
10
|
phantom_opts << "--load-images=false"
|
data/lib/browsed/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: browsed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sebastian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|