browsed 0.1.8 → 0.1.9
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/Gemfile.lock +1 -1
- data/lib/browsed/chrome.rb +1 -1
- data/lib/browsed/client.rb +34 -21
- data/lib/browsed/firefox.rb +12 -5
- data/lib/browsed/manager.rb +69 -61
- data/lib/browsed/poltergeist.rb +2 -2
- data/lib/browsed/version.rb +1 -1
- metadata +2 -2
checksums.yaml
CHANGED
@@ -1,7 +1,7 @@
|
|
1
1
|
---
|
2
2
|
SHA256:
|
3
|
-
metadata.gz:
|
4
|
-
data.tar.gz:
|
3
|
+
metadata.gz: 1ac593ddda39f90bf445f8e858cd4a05a3b08cf997f33b1ee448f9791127148c
|
4
|
+
data.tar.gz: e79c1d0d006db49e81b872706868ca00d5ddc087b52738e19a863df33c598cf3
|
5
5
|
SHA512:
|
6
|
-
metadata.gz:
|
7
|
-
data.tar.gz:
|
6
|
+
metadata.gz: 7bc94644afe6938c6df5ce3e3ad53a7c99b93acc52334fab62a1a03c0913551071a07aa3525ff400d97180bc508ac7cbdff645ca2c9e41ff66ba0913769240aa
|
7
|
+
data.tar.gz: 80db44c4e2bae419e34e5aba0c28d597a55d1204ce3d2c0cce1fbda40204bfd959fb0180e4b8b74a6284a4f5bd2fcad2d5536c7f24405018a6c5229e2f3c1dde
|
data/Gemfile.lock
CHANGED
data/lib/browsed/chrome.rb
CHANGED
@@ -2,7 +2,7 @@ module Browsed
|
|
2
2
|
module Chrome
|
3
3
|
|
4
4
|
private
|
5
|
-
def register_chrome_driver(
|
5
|
+
def register_chrome_driver(options: {}, timeout: 60, debug: false)
|
6
6
|
profile = Selenium::WebDriver::Chrome::Profile.new
|
7
7
|
|
8
8
|
profile["user-agent"] = self.user_agent unless self.user_agent.to_s.empty?
|
data/lib/browsed/client.rb
CHANGED
@@ -1,12 +1,10 @@
|
|
1
1
|
module Browsed
|
2
2
|
class Client
|
3
3
|
attr_accessor :configuration
|
4
|
+
attr_accessor :driver, :browser, :browser_id, :environment
|
5
|
+
attr_accessor :session, :proxy
|
6
|
+
attr_accessor :device, :user_agent, :resolution
|
4
7
|
attr_accessor :manager, :maximum_processes
|
5
|
-
attr_accessor :driver, :browser, :environment
|
6
|
-
attr_accessor :session
|
7
|
-
attr_accessor :device, :proxy
|
8
|
-
attr_accessor :user_agent
|
9
|
-
attr_accessor :resolution
|
10
8
|
|
11
9
|
include Capybara::DSL
|
12
10
|
|
@@ -18,7 +16,7 @@ module Browsed
|
|
18
16
|
user_agent: nil,
|
19
17
|
resolution: nil,
|
20
18
|
environment: :production,
|
21
|
-
|
19
|
+
options: {},
|
22
20
|
maximum_processes: nil)
|
23
21
|
|
24
22
|
self.configuration = configuration
|
@@ -27,34 +25,36 @@ module Browsed
|
|
27
25
|
self.browser = browser || self.configuration.browser
|
28
26
|
self.environment = environment || self.configuration.environment
|
29
27
|
|
28
|
+
self.browser_id = generate_browser_id
|
30
29
|
self.device = device
|
31
30
|
self.proxy = proxy
|
32
31
|
|
33
|
-
self.manager = Browsed::Manager.new(browser: self.browser)
|
32
|
+
self.manager = ::Browsed::Manager.new(browser: self.browser)
|
34
33
|
self.maximum_processes = maximum_processes || self.configuration.maximum_processes
|
35
34
|
|
36
35
|
set_user_agent(user_agent)
|
37
36
|
set_resolution(resolution)
|
38
37
|
|
39
|
-
|
38
|
+
options.merge!(browser_id: self.browser_id)
|
39
|
+
setup_capybara(options: options)
|
40
40
|
end
|
41
41
|
|
42
42
|
include ::Browsed::Poltergeist
|
43
43
|
include ::Browsed::Firefox
|
44
44
|
include ::Browsed::Chrome
|
45
45
|
|
46
|
-
def setup_capybara(
|
46
|
+
def setup_capybara(options: {}, retries: 3)
|
47
47
|
if can_start_new_process?
|
48
|
-
register_driver!(
|
48
|
+
register_driver!(options)
|
49
49
|
|
50
50
|
Capybara.default_driver = self.driver
|
51
51
|
Capybara.javascript_driver = self.driver
|
52
52
|
|
53
|
-
Capybara.default_max_wait_time =
|
53
|
+
Capybara.default_max_wait_time = options.fetch(:wait_time, 30) #seconds
|
54
54
|
|
55
55
|
self.session = Capybara::Session.new(self.driver)
|
56
56
|
else
|
57
|
-
raise Browsed::TooManyProcessesError, "Too many
|
57
|
+
raise Browsed::TooManyProcessesError, "Too many #{self.browser} processes running, reached maximum allowed number of #{self.maximum_processes} processes."
|
58
58
|
end
|
59
59
|
end
|
60
60
|
|
@@ -78,25 +78,38 @@ module Browsed
|
|
78
78
|
def reset_session!
|
79
79
|
self.session.reset_session!
|
80
80
|
end
|
81
|
+
|
82
|
+
def generate_browser_id
|
83
|
+
SecureRandom.hex[0..15]
|
84
|
+
end
|
81
85
|
|
82
|
-
def
|
83
|
-
|
84
|
-
|
85
|
-
|
86
|
+
def quit!(retries: 3)
|
87
|
+
begin
|
88
|
+
self.session.driver.quit
|
89
|
+
rescue Exception
|
90
|
+
retries -= 1
|
91
|
+
retry if retries > 0
|
92
|
+
end
|
93
|
+
|
94
|
+
# If Selenium/Phantom somehow isn't able to shut down the browser, force a shutdown using kill -9
|
95
|
+
self.manager.set_command(browser_id: self.browser_id)
|
96
|
+
self.manager.kill_processes!
|
97
|
+
|
98
|
+
self.session = nil
|
86
99
|
end
|
87
100
|
|
88
101
|
private
|
89
|
-
def register_driver!(
|
102
|
+
def register_driver!(options = {})
|
90
103
|
if poltergeist?
|
91
|
-
register_poltergeist_driver(
|
104
|
+
register_poltergeist_driver(options: options)
|
92
105
|
elsif selenium?
|
93
106
|
if firefox_browser?
|
94
|
-
register_firefox_driver(
|
107
|
+
register_firefox_driver(options: options)
|
95
108
|
elsif firefox_headless_browser?
|
96
|
-
register_firefox_driver(
|
109
|
+
register_firefox_driver(options: options.merge(headless: true))
|
97
110
|
elsif chrome_browser?
|
98
111
|
self.driver = :selenium_chrome
|
99
|
-
register_chrome_driver(
|
112
|
+
register_chrome_driver(options: options)
|
100
113
|
end
|
101
114
|
end
|
102
115
|
end
|
data/lib/browsed/firefox.rb
CHANGED
@@ -2,10 +2,11 @@ module Browsed
|
|
2
2
|
module Firefox
|
3
3
|
|
4
4
|
private
|
5
|
-
def register_firefox_driver(
|
6
|
-
|
7
|
-
|
8
|
-
|
5
|
+
def register_firefox_driver(options: {}, timeout: 60, debug: false)
|
6
|
+
browser_id = options.fetch(:browser_id, nil)
|
7
|
+
headless = options.fetch(:headless, false)
|
8
|
+
download_path = options.fetch(:download_path, self.configuration.download_path)
|
9
|
+
private_browsing = options.fetch(:private_browsing, false)
|
9
10
|
|
10
11
|
profile = Selenium::WebDriver::Firefox::Profile.new
|
11
12
|
|
@@ -22,15 +23,21 @@ module Browsed
|
|
22
23
|
profile["pdfjs.disabled"] = true
|
23
24
|
end
|
24
25
|
|
26
|
+
profile["browser.tabs.warnOnClose"] = false
|
27
|
+
profile["browser.tabs.warnOnCloseOtherTabs"] = false
|
25
28
|
profile["general.useragent.override"] = self.user_agent unless self.user_agent.to_s.empty?
|
26
29
|
|
30
|
+
id = SecureRandom.hex[0..15]
|
27
31
|
profile = firefox_proxy_options(profile)
|
28
32
|
options = Selenium::WebDriver::Firefox::Options.new(profile: profile)
|
29
|
-
options.args << "--
|
33
|
+
options.args << "--browser_id=#{id}" unless browser_id.to_s.empty?
|
34
|
+
options.args << "--headless" if headless
|
30
35
|
|
31
36
|
Capybara.register_driver self.driver do |app|
|
32
37
|
Capybara::Selenium::Driver.new(app, browser: :firefox, options: options)
|
33
38
|
end
|
39
|
+
|
40
|
+
return id
|
34
41
|
end
|
35
42
|
|
36
43
|
def firefox_proxy_options(profile)
|
data/lib/browsed/manager.rb
CHANGED
@@ -1,46 +1,35 @@
|
|
1
1
|
module Browsed
|
2
2
|
class Manager
|
3
|
-
attr_accessor :command, :kill_signal
|
3
|
+
attr_accessor :command, :kill_signal, :logging
|
4
4
|
|
5
|
-
def initialize(browser: :phantomjs, kill_signal: 9)
|
6
|
-
|
7
|
-
when :phantomjs
|
8
|
-
self.command = "ps -ef | grep /[p]hantomjs"
|
9
|
-
when :firefox
|
10
|
-
self.command = "ps -ef | grep /[g]eckodriver"
|
11
|
-
when :chrome
|
12
|
-
self.command = "ps -ef | grep /[c]hromedriver"
|
13
|
-
else
|
14
|
-
self.command = "ps -ef | grep /[p]hantomjs"
|
15
|
-
end
|
5
|
+
def initialize(command: nil, browser_id: nil, browser: :phantomjs, kill_signal: 9, logging: false)
|
6
|
+
set_command(command: command, browser_id: browser_id, browser: browser)
|
16
7
|
|
17
8
|
self.kill_signal = kill_signal
|
9
|
+
self.logging = logging
|
18
10
|
end
|
19
11
|
|
20
|
-
def
|
21
|
-
|
22
|
-
|
23
|
-
|
24
|
-
|
25
|
-
|
26
|
-
|
27
|
-
|
28
|
-
|
29
|
-
|
30
|
-
|
12
|
+
def set_command(command: nil, browser_id: nil, browser: :phantomjs)
|
13
|
+
if !command.to_s.empty?
|
14
|
+
self.command = command
|
15
|
+
elsif !browser_id.to_s.empty?
|
16
|
+
self.command = "ps aux | awk '/--browser_id=#{browser_id}/'"
|
17
|
+
else
|
18
|
+
case browser
|
19
|
+
when :phantomjs
|
20
|
+
self.command = "ps -ef | grep /[p]hantomjs"
|
21
|
+
when :firefox
|
22
|
+
self.command = "ps -ef | grep /[f]irefox-bin"
|
23
|
+
when :chrome
|
24
|
+
self.command = "ps -ef | grep /[c]hromedriver"
|
25
|
+
else
|
26
|
+
self.command = "ps -ef | grep /[p]hantomjs"
|
31
27
|
end
|
32
|
-
end
|
28
|
+
end
|
33
29
|
end
|
34
30
|
|
35
|
-
def
|
36
|
-
|
37
|
-
|
38
|
-
begin
|
39
|
-
::Process.kill(self.kill_signal, process[:pid])
|
40
|
-
|
41
|
-
rescue StandardError => e
|
42
|
-
info "[Browsed::Manager] - #{Time.now.to_s(:db)}: Failed to kill process with pid '#{process[:pid]}'. Error Class: #{e.class.name}. Error Message: #{e.message}"
|
43
|
-
end
|
31
|
+
def can_start_more_processes?(max_count: nil)
|
32
|
+
return max_count.nil? || get_current_processes.size < max_count
|
44
33
|
end
|
45
34
|
|
46
35
|
def get_current_processes
|
@@ -55,44 +44,63 @@ module Browsed
|
|
55
44
|
return processes
|
56
45
|
end
|
57
46
|
|
58
|
-
|
47
|
+
def kill_processes!(started_after: nil)
|
48
|
+
processes = get_current_processes
|
49
|
+
|
50
|
+
processes.each do |process|
|
51
|
+
killable = started_after.nil? || (process[:date] && process[:date] < (Time.now - started_after))
|
52
|
+
kill_process!(process) if killable
|
53
|
+
end if processes && processes.any?
|
54
|
+
end
|
59
55
|
|
60
|
-
def
|
61
|
-
process
|
56
|
+
def kill_process!(process)
|
57
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Killing process with PID #{process[:pid]} matching command #{self.command}."
|
62
58
|
|
63
|
-
|
64
|
-
|
65
|
-
|
66
|
-
|
59
|
+
begin
|
60
|
+
::Process.kill(self.kill_signal, process[:pid])
|
61
|
+
|
62
|
+
rescue StandardError => e
|
63
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Failed to kill process with pid '#{process[:pid]}'. Error Class: #{e.class.name}. Error Message: #{e.message}"
|
64
|
+
end
|
65
|
+
end
|
66
|
+
|
67
|
+
private
|
68
|
+
def parse_process(process_data)
|
69
|
+
process = {}
|
67
70
|
|
68
|
-
|
69
|
-
|
70
|
-
|
71
|
+
parts = process_data.split(' ')
|
72
|
+
pid = parts[1].to_i
|
73
|
+
started = parts[4].to_s
|
74
|
+
date = parse_date(started)
|
71
75
|
|
72
|
-
|
76
|
+
process[:pid] = pid
|
77
|
+
process[:started] = started
|
78
|
+
process[:date] = date
|
73
79
|
|
74
|
-
|
75
|
-
|
80
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Pid: #{pid}. Started: #{started}. Date: #{date}.\n"
|
81
|
+
|
82
|
+
return process
|
83
|
+
end
|
76
84
|
|
77
|
-
|
78
|
-
|
79
|
-
|
80
|
-
|
85
|
+
def parse_date(date, retries = 3)
|
86
|
+
begin
|
87
|
+
if (!(date =~ /^[a-z]{3,4}\d*/i).nil?) #Sep16
|
88
|
+
parsed_date = DateTime.strptime(date, "%b%d")
|
81
89
|
|
82
|
-
|
83
|
-
|
84
|
-
|
90
|
+
elsif (!(date =~ /^\d*:\d*/i).nil?) #11:34
|
91
|
+
parsed_date = Time.strptime(date, "%H:%M").to_datetime
|
92
|
+
end
|
85
93
|
|
86
|
-
|
87
|
-
|
88
|
-
|
89
|
-
|
94
|
+
rescue StandardError => e
|
95
|
+
info "[Browsed::Manager] - #{Time.now.to_s}: Exception occurred while trying to parse date/time string '#{date}'. Error Class: #{e.class.name}. Error: #{e.message}."
|
96
|
+
retries -= 1
|
97
|
+
retry if retries > 0
|
98
|
+
end
|
90
99
|
end
|
91
|
-
end
|
92
100
|
|
93
|
-
|
94
|
-
|
95
|
-
|
101
|
+
def info(message)
|
102
|
+
puts message if self.logging
|
103
|
+
end
|
96
104
|
|
97
105
|
end
|
98
106
|
end
|
data/lib/browsed/poltergeist.rb
CHANGED
@@ -2,9 +2,9 @@ module Browsed
|
|
2
2
|
module Poltergeist
|
3
3
|
|
4
4
|
private
|
5
|
-
def register_poltergeist_driver(
|
5
|
+
def register_poltergeist_driver(options: {}, timeout: 60, debug: false)
|
6
6
|
phantom_opts = ['--ignore-ssl-errors=true', '--ssl-protocol=any']
|
7
|
-
disable_images =
|
7
|
+
disable_images = options.fetch(:disable_images, false)
|
8
8
|
|
9
9
|
if disable_images
|
10
10
|
phantom_opts << "--load-images=false"
|
data/lib/browsed/version.rb
CHANGED
metadata
CHANGED
@@ -1,14 +1,14 @@
|
|
1
1
|
--- !ruby/object:Gem::Specification
|
2
2
|
name: browsed
|
3
3
|
version: !ruby/object:Gem::Version
|
4
|
-
version: 0.1.
|
4
|
+
version: 0.1.9
|
5
5
|
platform: ruby
|
6
6
|
authors:
|
7
7
|
- Sebastian
|
8
8
|
autorequire:
|
9
9
|
bindir: exe
|
10
10
|
cert_chain: []
|
11
|
-
date: 2018-
|
11
|
+
date: 2018-10-10 00:00:00.000000000 Z
|
12
12
|
dependencies:
|
13
13
|
- !ruby/object:Gem::Dependency
|
14
14
|
name: capybara
|