kimurai 1.4.0 → 2.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.rubocop.yml +9 -0
- data/CHANGELOG.md +21 -0
- data/Gemfile +2 -2
- data/README.md +476 -648
- data/Rakefile +6 -6
- data/bin/console +3 -4
- data/exe/kimurai +0 -1
- data/kimurai.gemspec +38 -37
- data/lib/kimurai/base/saver.rb +15 -19
- data/lib/kimurai/base/storage.rb +1 -1
- data/lib/kimurai/base.rb +38 -38
- data/lib/kimurai/base_helper.rb +5 -4
- data/lib/kimurai/browser_builder/mechanize_builder.rb +121 -119
- data/lib/kimurai/browser_builder/selenium_chrome_builder.rb +160 -152
- data/lib/kimurai/browser_builder/selenium_firefox_builder.rb +162 -160
- data/lib/kimurai/browser_builder.rb +1 -7
- data/lib/kimurai/capybara_configuration.rb +1 -1
- data/lib/kimurai/capybara_ext/driver/base.rb +50 -46
- data/lib/kimurai/capybara_ext/mechanize/driver.rb +51 -50
- data/lib/kimurai/capybara_ext/selenium/driver.rb +33 -29
- data/lib/kimurai/capybara_ext/session.rb +31 -38
- data/lib/kimurai/cli/generator.rb +15 -15
- data/lib/kimurai/cli.rb +49 -86
- data/lib/kimurai/core_ext/array.rb +2 -2
- data/lib/kimurai/core_ext/hash.rb +1 -1
- data/lib/kimurai/core_ext/numeric.rb +4 -4
- data/lib/kimurai/pipeline.rb +2 -1
- data/lib/kimurai/runner.rb +6 -6
- data/lib/kimurai/template/Gemfile +2 -2
- data/lib/kimurai/template/config/boot.rb +4 -4
- data/lib/kimurai/template/config/schedule.rb +15 -15
- data/lib/kimurai/template/spiders/application_spider.rb +8 -14
- data/lib/kimurai/version.rb +1 -1
- data/lib/kimurai.rb +7 -3
- metadata +58 -65
- data/.travis.yml +0 -5
- data/lib/kimurai/automation/deploy.yml +0 -54
- data/lib/kimurai/automation/setup/chromium_chromedriver.yml +0 -26
- data/lib/kimurai/automation/setup/firefox_geckodriver.yml +0 -20
- data/lib/kimurai/automation/setup/phantomjs.yml +0 -33
- data/lib/kimurai/automation/setup/ruby_environment.yml +0 -124
- data/lib/kimurai/automation/setup.yml +0 -44
- data/lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb +0 -175
- data/lib/kimurai/capybara_ext/poltergeist/driver.rb +0 -13
- data/lib/kimurai/cli/ansible_command_builder.rb +0 -71
- data/lib/kimurai/template/config/automation.yml +0 -13
|
@@ -4,151 +4,153 @@ require_relative '../capybara_configuration'
|
|
|
4
4
|
require_relative '../capybara_ext/mechanize/driver'
|
|
5
5
|
require_relative '../capybara_ext/session'
|
|
6
6
|
|
|
7
|
-
module Kimurai
|
|
8
|
-
|
|
9
|
-
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
|
|
15
|
-
|
|
16
|
-
|
|
17
|
-
|
|
18
|
-
|
|
19
|
-
|
|
20
|
-
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
|
|
7
|
+
module Kimurai
|
|
8
|
+
module BrowserBuilder
|
|
9
|
+
class MechanizeBuilder
|
|
10
|
+
attr_reader :logger, :spider
|
|
11
|
+
|
|
12
|
+
def initialize(config, spider:)
|
|
13
|
+
@config = config
|
|
14
|
+
@spider = spider
|
|
15
|
+
@logger = spider.logger
|
|
16
|
+
end
|
|
17
|
+
|
|
18
|
+
def build
|
|
19
|
+
# Register driver
|
|
20
|
+
Capybara.register_driver :mechanize do |_app|
|
|
21
|
+
driver = Capybara::Mechanize::Driver.new('app')
|
|
22
|
+
# keep the history as small as possible (by default it's unlimited)
|
|
23
|
+
driver.configure { |a| a.history.max_size = 2 }
|
|
24
|
+
driver
|
|
25
|
+
end
|
|
25
26
|
|
|
26
|
-
|
|
27
|
-
|
|
28
|
-
|
|
29
|
-
|
|
27
|
+
# Create browser instance (Capybara session)
|
|
28
|
+
@browser = Capybara::Session.new(:mechanize)
|
|
29
|
+
@browser.spider = spider
|
|
30
|
+
logger.debug 'BrowserBuilder (mechanize): created browser instance'
|
|
30
31
|
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
|
|
32
|
+
if @config[:extensions].present?
|
|
33
|
+
logger.error 'BrowserBuilder (mechanize): `extensions` option not supported, skipped'
|
|
34
|
+
end
|
|
34
35
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
36
|
+
# Proxy
|
|
37
|
+
if (proxy = @config[:proxy].presence)
|
|
38
|
+
proxy_string = (proxy.instance_of?(Proc) ? proxy.call : proxy).strip
|
|
39
|
+
ip, port, type = proxy_string.split(':')
|
|
40
|
+
|
|
41
|
+
if type == 'http'
|
|
42
|
+
@browser.driver.set_proxy(*proxy_string.split(':'))
|
|
43
|
+
logger.debug "BrowserBuilder (mechanize): enabled http proxy, ip: #{ip}, port: #{port}"
|
|
44
|
+
else
|
|
45
|
+
logger.error "BrowserBuilder (mechanize): can't set #{type} proxy (not supported), skipped"
|
|
46
|
+
end
|
|
47
|
+
end
|
|
39
48
|
|
|
40
|
-
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
logger.error "BrowserBuilder (mechanize): can't set #{type} proxy (not supported), skipped"
|
|
49
|
+
# SSL
|
|
50
|
+
if (ssl_cert_path = @config[:ssl_cert_path].presence)
|
|
51
|
+
@browser.driver.browser.agent.http.ca_file = ssl_cert_path
|
|
52
|
+
logger.debug 'BrowserBuilder (mechanize): enabled custom ssl_cert'
|
|
45
53
|
end
|
|
46
|
-
end
|
|
47
54
|
|
|
48
|
-
|
|
49
|
-
|
|
50
|
-
|
|
51
|
-
|
|
52
|
-
end
|
|
55
|
+
if @config[:ignore_ssl_errors].present?
|
|
56
|
+
@browser.driver.browser.agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
|
|
57
|
+
logger.debug 'BrowserBuilder (mechanize): enabled ignore_ssl_errors'
|
|
58
|
+
end
|
|
53
59
|
|
|
54
|
-
|
|
55
|
-
|
|
56
|
-
|
|
57
|
-
|
|
60
|
+
# Headers
|
|
61
|
+
if (headers = @config[:headers].presence)
|
|
62
|
+
@browser.driver.headers = headers
|
|
63
|
+
logger.debug 'BrowserBuilder (mechanize): enabled custom headers'
|
|
64
|
+
end
|
|
58
65
|
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
@browser.driver.headers = headers
|
|
62
|
-
logger.debug "BrowserBuilder (mechanize): enabled custom headers"
|
|
63
|
-
end
|
|
66
|
+
if (user_agent = @config[:user_agent].presence)
|
|
67
|
+
user_agent_string = (user_agent.instance_of?(Proc) ? user_agent.call : user_agent).strip
|
|
64
68
|
|
|
65
|
-
|
|
66
|
-
|
|
69
|
+
@browser.driver.add_header('User-Agent', user_agent_string)
|
|
70
|
+
logger.debug 'BrowserBuilder (mechanize): enabled custom user_agent'
|
|
71
|
+
end
|
|
67
72
|
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
73
|
+
# Cookies
|
|
74
|
+
if (cookies = @config[:cookies].presence)
|
|
75
|
+
cookies.each do |cookie|
|
|
76
|
+
@browser.driver.set_cookie(cookie[:name], cookie[:value], cookie)
|
|
77
|
+
end
|
|
71
78
|
|
|
72
|
-
|
|
73
|
-
if cookies = @config[:cookies].presence
|
|
74
|
-
cookies.each do |cookie|
|
|
75
|
-
@browser.driver.set_cookie(cookie[:name], cookie[:value], cookie)
|
|
79
|
+
logger.debug 'BrowserBuilder (mechanize): enabled custom cookies'
|
|
76
80
|
end
|
|
77
81
|
|
|
78
|
-
|
|
79
|
-
|
|
82
|
+
# Browser instance options
|
|
83
|
+
# skip_request_errors
|
|
84
|
+
if (skip_errors = @config[:skip_request_errors].presence)
|
|
85
|
+
@browser.config.skip_request_errors = skip_errors
|
|
86
|
+
logger.debug 'BrowserBuilder (mechanize): enabled skip_request_errors'
|
|
87
|
+
end
|
|
80
88
|
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
|
|
85
|
-
|
|
86
|
-
end
|
|
89
|
+
# retry_request_errors
|
|
90
|
+
if (retry_errors = @config[:retry_request_errors].presence)
|
|
91
|
+
@browser.config.retry_request_errors = retry_errors
|
|
92
|
+
logger.debug 'BrowserBuilder (mechanize): enabled retry_request_errors'
|
|
93
|
+
end
|
|
87
94
|
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
|
|
91
|
-
|
|
92
|
-
end
|
|
95
|
+
# restart_if
|
|
96
|
+
if @config[:restart_if].present?
|
|
97
|
+
logger.warn 'BrowserBuilder (mechanize): restart_if options not supported by Mechanize, skipped'
|
|
98
|
+
end
|
|
93
99
|
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
100
|
+
# before_request clear_cookies
|
|
101
|
+
if @config.dig(:before_request, :clear_cookies)
|
|
102
|
+
@browser.config.before_request[:clear_cookies] = true
|
|
103
|
+
logger.debug 'BrowserBuilder (mechanize): enabled before_request.clear_cookies'
|
|
104
|
+
end
|
|
98
105
|
|
|
99
|
-
|
|
100
|
-
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
|
|
106
|
+
# before_request clear_and_set_cookies
|
|
107
|
+
if @config.dig(:before_request, :clear_and_set_cookies)
|
|
108
|
+
if (cookies = @config[:cookies].presence)
|
|
109
|
+
@browser.config.cookies = cookies
|
|
110
|
+
@browser.config.before_request[:clear_and_set_cookies] = true
|
|
111
|
+
logger.debug 'BrowserBuilder (mechanize): enabled before_request.clear_and_set_cookies'
|
|
112
|
+
else
|
|
113
|
+
logger.error 'BrowserBuilder (mechanize): cookies should be present to enable before_request.clear_and_set_cookies, skipped'
|
|
114
|
+
end
|
|
115
|
+
end
|
|
104
116
|
|
|
105
|
-
|
|
106
|
-
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
117
|
+
# before_request change_user_agent
|
|
118
|
+
if @config.dig(:before_request, :change_user_agent)
|
|
119
|
+
if @config[:user_agent].present? && @config[:user_agent].instance_of?(Proc)
|
|
120
|
+
@browser.config.user_agent = @config[:user_agent]
|
|
121
|
+
@browser.config.before_request[:change_user_agent] = true
|
|
122
|
+
logger.debug 'BrowserBuilder (mechanize): enabled before_request.change_user_agent'
|
|
123
|
+
else
|
|
124
|
+
logger.error 'BrowserBuilder (mechanize): user_agent should be present and has lambda format to enable before_request.change_user_agent, skipped'
|
|
125
|
+
end
|
|
113
126
|
end
|
|
114
|
-
end
|
|
115
127
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
128
|
+
# before_request change_proxy
|
|
129
|
+
if @config.dig(:before_request, :change_proxy)
|
|
130
|
+
if @config[:proxy].present? && @config[:proxy].instance_of?(Proc)
|
|
131
|
+
@browser.config.proxy = @config[:proxy]
|
|
132
|
+
@browser.config.before_request[:change_proxy] = true
|
|
133
|
+
logger.debug 'BrowserBuilder (mechanize): enabled before_request.change_proxy'
|
|
134
|
+
else
|
|
135
|
+
logger.error 'BrowserBuilder (mechanize): proxy should be present and has lambda format to enable before_request.change_proxy, skipped'
|
|
136
|
+
end
|
|
124
137
|
end
|
|
125
|
-
end
|
|
126
138
|
|
|
127
|
-
|
|
128
|
-
|
|
129
|
-
|
|
130
|
-
|
|
131
|
-
@browser.config.before_request[:change_proxy] = true
|
|
132
|
-
logger.debug "BrowserBuilder (mechanize): enabled before_request.change_proxy"
|
|
133
|
-
else
|
|
134
|
-
logger.error "BrowserBuilder (mechanize): proxy should be present and has lambda format to enable before_request.change_proxy, skipped"
|
|
139
|
+
# before_request delay
|
|
140
|
+
if (delay = @config.dig(:before_request, :delay).presence)
|
|
141
|
+
@browser.config.before_request[:delay] = delay
|
|
142
|
+
logger.debug 'BrowserBuilder (mechanize): enabled before_request.delay'
|
|
135
143
|
end
|
|
136
|
-
end
|
|
137
144
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
145
|
+
# encoding
|
|
146
|
+
if (encoding = @config[:encoding])
|
|
147
|
+
@browser.config.encoding = encoding
|
|
148
|
+
logger.debug "BrowserBuilder (mechanize): enabled encoding: #{encoding}"
|
|
149
|
+
end
|
|
143
150
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
@browser.config.encoding = encoding
|
|
147
|
-
logger.debug "BrowserBuilder (mechanize): enabled encoding: #{encoding}"
|
|
151
|
+
# return Capybara session instance
|
|
152
|
+
@browser
|
|
148
153
|
end
|
|
149
|
-
|
|
150
|
-
# return Capybara session instance
|
|
151
|
-
@browser
|
|
152
154
|
end
|
|
153
155
|
end
|
|
154
156
|
end
|
|
@@ -4,195 +4,203 @@ require_relative '../capybara_configuration'
|
|
|
4
4
|
require_relative '../capybara_ext/selenium/driver'
|
|
5
5
|
require_relative '../capybara_ext/session'
|
|
6
6
|
|
|
7
|
-
module Kimurai
|
|
8
|
-
|
|
9
|
-
class
|
|
10
|
-
|
|
11
|
-
|
|
12
|
-
|
|
13
|
-
attr_reader :logger, :spider
|
|
14
|
-
|
|
15
|
-
def initialize(config, spider:)
|
|
16
|
-
@config = config
|
|
17
|
-
@spider = spider
|
|
18
|
-
@logger = spider.logger
|
|
19
|
-
end
|
|
20
|
-
|
|
21
|
-
def build
|
|
22
|
-
# Register driver
|
|
23
|
-
Capybara.register_driver :selenium_chrome do |app|
|
|
24
|
-
# Create driver options
|
|
25
|
-
opts = { args: %w[--disable-gpu --no-sandbox --disable-translate] }
|
|
7
|
+
module Kimurai
|
|
8
|
+
module BrowserBuilder
|
|
9
|
+
class SeleniumChromeBuilder
|
|
10
|
+
class << self
|
|
11
|
+
attr_accessor :virtual_display
|
|
12
|
+
end
|
|
26
13
|
|
|
27
|
-
|
|
28
|
-
if chrome_path = Kimurai.configuration.selenium_chrome_path
|
|
29
|
-
opts.merge!(binary: chrome_path)
|
|
30
|
-
end
|
|
14
|
+
attr_reader :logger, :spider
|
|
31
15
|
|
|
32
|
-
|
|
33
|
-
|
|
16
|
+
def initialize(config, spider:)
|
|
17
|
+
@config = config
|
|
18
|
+
@spider = spider
|
|
19
|
+
@logger = spider.logger
|
|
20
|
+
end
|
|
34
21
|
|
|
35
|
-
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
|
|
39
|
-
|
|
22
|
+
def build
|
|
23
|
+
# Register driver
|
|
24
|
+
Capybara.register_driver :selenium_chrome do |app|
|
|
25
|
+
# Create driver options
|
|
26
|
+
# See all options here: https://seleniumhq.github.io/selenium/docs/api/rb/Selenium/WebDriver/Chrome/Options.html
|
|
27
|
+
driver_options = Selenium::WebDriver::Chrome::Options.new
|
|
28
|
+
driver_options.args += %w[--disable-gpu --no-sandbox --disable-translate
|
|
29
|
+
--disable-blink-features=AutomationControlled]
|
|
30
|
+
|
|
31
|
+
# Provide custom chrome browser path:
|
|
32
|
+
if (chrome_path = Kimurai.configuration.selenium_chrome_path)
|
|
33
|
+
driver_options.binary = chrome_path
|
|
34
|
+
end
|
|
40
35
|
|
|
41
|
-
|
|
42
|
-
|
|
43
|
-
|
|
44
|
-
|
|
36
|
+
# Window size
|
|
37
|
+
if (size = @config[:window_size].presence)
|
|
38
|
+
driver_options.args << "--window-size=#{size.join(',')}"
|
|
39
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled window_size'
|
|
40
|
+
end
|
|
45
41
|
|
|
46
|
-
|
|
47
|
-
|
|
48
|
-
|
|
49
|
-
|
|
42
|
+
# Proxy
|
|
43
|
+
if (proxy = @config[:proxy].presence)
|
|
44
|
+
proxy_string = (proxy.instance_of?(Proc) ? proxy.call : proxy).strip
|
|
45
|
+
ip, port, type, user, password = proxy_string.split(':')
|
|
46
|
+
|
|
47
|
+
if %w[http socks5].include?(type)
|
|
48
|
+
if user.nil? && password.nil?
|
|
49
|
+
driver_options.args << "--proxy-server=#{type}://#{ip}:#{port}"
|
|
50
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled #{type} proxy, ip: #{ip}, port: #{port}"
|
|
51
|
+
else
|
|
52
|
+
logger.error "BrowserBuilder (selenium_chrome): proxy with authentication doesn't supported by selenium, skipped"
|
|
53
|
+
end
|
|
50
54
|
else
|
|
51
|
-
logger.error "BrowserBuilder (selenium_chrome):
|
|
55
|
+
logger.error "BrowserBuilder (selenium_chrome): wrong type of proxy: #{type}, skipped"
|
|
52
56
|
end
|
|
53
|
-
else
|
|
54
|
-
logger.error "BrowserBuilder (selenium_chrome): wrong type of proxy: #{type}, skipped"
|
|
55
57
|
end
|
|
56
|
-
end
|
|
57
58
|
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
|
|
61
|
-
|
|
62
|
-
|
|
63
|
-
|
|
59
|
+
if (proxy_bypass_list = @config[:proxy_bypass_list].presence)
|
|
60
|
+
if proxy
|
|
61
|
+
driver_options.args << "--proxy-bypass-list=#{proxy_bypass_list.join(';')}"
|
|
62
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled proxy_bypass_list'
|
|
63
|
+
else
|
|
64
|
+
logger.error 'BrowserBuilder (selenium_chrome): provide `proxy` to set proxy_bypass_list, skipped'
|
|
65
|
+
end
|
|
64
66
|
end
|
|
65
|
-
end
|
|
66
67
|
|
|
67
|
-
|
|
68
|
-
|
|
69
|
-
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
|
|
68
|
+
# SSL
|
|
69
|
+
if @config[:ignore_ssl_errors].present?
|
|
70
|
+
driver_options.args << '--ignore-certificate-errors'
|
|
71
|
+
driver_options.args << '--allow-insecure-localhost'
|
|
72
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled ignore_ssl_errors'
|
|
73
|
+
end
|
|
73
74
|
|
|
74
|
-
|
|
75
|
-
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
|
|
75
|
+
# Disable images
|
|
76
|
+
if @config[:disable_images].present?
|
|
77
|
+
driver_options.prefs['profile.managed_default_content_settings.images'] = 2
|
|
78
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled disable_images'
|
|
79
|
+
end
|
|
79
80
|
|
|
80
|
-
|
|
81
|
-
|
|
82
|
-
|
|
83
|
-
|
|
81
|
+
# Headers
|
|
82
|
+
if @config[:headers].present?
|
|
83
|
+
logger.warn "BrowserBuilder: (selenium_chrome): custom headers doesn't supported by selenium, skipped"
|
|
84
|
+
end
|
|
84
85
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
|
|
86
|
+
if (user_agent = @config[:user_agent].presence)
|
|
87
|
+
user_agent_string = (user_agent.instance_of?(Proc) ? user_agent.call : user_agent).strip
|
|
88
|
+
driver_options.args << "--user-agent='#{user_agent_string}'"
|
|
89
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled custom user_agent'
|
|
90
|
+
end
|
|
90
91
|
|
|
91
|
-
|
|
92
|
-
|
|
93
|
-
|
|
94
|
-
|
|
95
|
-
|
|
96
|
-
|
|
97
|
-
|
|
98
|
-
|
|
92
|
+
# Headless mode
|
|
93
|
+
if ENV['HEADLESS'] != 'false'
|
|
94
|
+
if @config[:headless_mode] == :virtual_display
|
|
95
|
+
if Gem::Platform.local.os == 'linux'
|
|
96
|
+
unless self.class.virtual_display
|
|
97
|
+
require 'headless'
|
|
98
|
+
self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false)
|
|
99
|
+
self.class.virtual_display.start
|
|
100
|
+
end
|
|
101
|
+
|
|
102
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled virtual_display headless_mode'
|
|
103
|
+
else
|
|
104
|
+
logger.error 'BrowserBuilder (selenium_chrome): virtual_display headless_mode works only ' \
|
|
105
|
+
'on Linux platform. Browser will run in normal mode. Set `native` mode instead.'
|
|
99
106
|
end
|
|
100
|
-
|
|
101
|
-
logger.debug "BrowserBuilder (selenium_chrome): enabled virtual_display headless_mode"
|
|
102
107
|
else
|
|
103
|
-
|
|
104
|
-
|
|
108
|
+
driver_options.args << '--headless'
|
|
109
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled native headless_mode'
|
|
105
110
|
end
|
|
111
|
+
end
|
|
112
|
+
|
|
113
|
+
# Use Selenium Manager by default (auto-downloads driver), or custom path if configured
|
|
114
|
+
if (chromedriver_path = Kimurai.configuration.chromedriver_path)
|
|
115
|
+
service = Selenium::WebDriver::Service.chrome(path: chromedriver_path)
|
|
116
|
+
Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options, service: service)
|
|
106
117
|
else
|
|
107
|
-
|
|
108
|
-
|
|
118
|
+
# Let Selenium Manager handle driver automatically
|
|
119
|
+
Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options)
|
|
109
120
|
end
|
|
110
121
|
end
|
|
111
122
|
|
|
112
|
-
|
|
113
|
-
Capybara::
|
|
114
|
-
|
|
123
|
+
# Create browser instance (Capybara session)
|
|
124
|
+
@browser = Capybara::Session.new(:selenium_chrome)
|
|
125
|
+
@browser.spider = spider
|
|
126
|
+
logger.debug 'BrowserBuilder (selenium_chrome): created browser instance'
|
|
115
127
|
|
|
116
|
-
|
|
117
|
-
|
|
118
|
-
|
|
119
|
-
logger.debug "BrowserBuilder (selenium_chrome): created browser instance"
|
|
128
|
+
if @config[:extensions].present?
|
|
129
|
+
logger.error 'BrowserBuilder (selenium_chrome): `extensions` option not supported by Selenium, skipped'
|
|
130
|
+
end
|
|
120
131
|
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
132
|
+
# Cookies
|
|
133
|
+
if (cookies = @config[:cookies].presence)
|
|
134
|
+
@browser.config.cookies = cookies
|
|
135
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled custom cookies'
|
|
136
|
+
end
|
|
124
137
|
|
|
125
|
-
|
|
126
|
-
|
|
127
|
-
@
|
|
128
|
-
|
|
129
|
-
|
|
138
|
+
# Browser instance options
|
|
139
|
+
# skip_request_errors
|
|
140
|
+
if (skip_errors = @config[:skip_request_errors].presence)
|
|
141
|
+
@browser.config.skip_request_errors = skip_errors
|
|
142
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled skip_request_errors'
|
|
143
|
+
end
|
|
130
144
|
|
|
131
|
-
|
|
132
|
-
|
|
133
|
-
|
|
134
|
-
|
|
135
|
-
|
|
136
|
-
end
|
|
145
|
+
# retry_request_errors
|
|
146
|
+
if (retry_errors = @config[:retry_request_errors].presence)
|
|
147
|
+
@browser.config.retry_request_errors = retry_errors
|
|
148
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled retry_request_errors'
|
|
149
|
+
end
|
|
137
150
|
|
|
138
|
-
|
|
139
|
-
|
|
140
|
-
|
|
141
|
-
|
|
142
|
-
|
|
151
|
+
# restart_if
|
|
152
|
+
if (requests_limit = @config.dig(:restart_if, :requests_limit).presence)
|
|
153
|
+
@browser.config.restart_if[:requests_limit] = requests_limit
|
|
154
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.requests_limit >= #{requests_limit}"
|
|
155
|
+
end
|
|
143
156
|
|
|
144
|
-
|
|
145
|
-
|
|
146
|
-
|
|
147
|
-
|
|
148
|
-
end
|
|
157
|
+
if (memory_limit = @config.dig(:restart_if, :memory_limit).presence)
|
|
158
|
+
@browser.config.restart_if[:memory_limit] = memory_limit
|
|
159
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.memory_limit >= #{memory_limit}"
|
|
160
|
+
end
|
|
149
161
|
|
|
150
|
-
|
|
151
|
-
@
|
|
152
|
-
|
|
153
|
-
|
|
162
|
+
# before_request clear_cookies
|
|
163
|
+
if @config.dig(:before_request, :clear_cookies)
|
|
164
|
+
@browser.config.before_request[:clear_cookies] = true
|
|
165
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.clear_cookies'
|
|
166
|
+
end
|
|
154
167
|
|
|
155
|
-
|
|
156
|
-
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
168
|
+
# before_request clear_and_set_cookies
|
|
169
|
+
if @config.dig(:before_request, :clear_and_set_cookies)
|
|
170
|
+
if (cookies = @config[:cookies].presence)
|
|
171
|
+
@browser.config.cookies = cookies
|
|
172
|
+
@browser.config.before_request[:clear_and_set_cookies] = true
|
|
173
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.clear_and_set_cookies'
|
|
174
|
+
else
|
|
175
|
+
logger.error 'BrowserBuilder (selenium_chrome): cookies should be present to enable before_request.clear_and_set_cookies, skipped'
|
|
176
|
+
end
|
|
177
|
+
end
|
|
160
178
|
|
|
161
|
-
|
|
162
|
-
|
|
163
|
-
|
|
164
|
-
@browser.config.cookies = cookies
|
|
165
|
-
@browser.config.before_request[:clear_and_set_cookies] = true
|
|
166
|
-
logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.clear_and_set_cookies"
|
|
167
|
-
else
|
|
168
|
-
logger.error "BrowserBuilder (selenium_chrome): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
|
|
179
|
+
# before_request change_user_agent
|
|
180
|
+
if @config.dig(:before_request, :change_user_agent)
|
|
181
|
+
logger.error 'BrowserBuilder (selenium_chrome): before_request.change_user_agent option not supported by Selenium, skipped'
|
|
169
182
|
end
|
|
170
|
-
end
|
|
171
183
|
|
|
172
|
-
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
|
|
184
|
+
# before_request change_proxy
|
|
185
|
+
if @config.dig(:before_request, :change_proxy)
|
|
186
|
+
logger.error 'BrowserBuilder (selenium_chrome): before_request.change_proxy option not supported by Selenium, skipped'
|
|
187
|
+
end
|
|
176
188
|
|
|
177
|
-
|
|
178
|
-
|
|
179
|
-
|
|
180
|
-
|
|
189
|
+
# before_request delay
|
|
190
|
+
if (delay = @config.dig(:before_request, :delay).presence)
|
|
191
|
+
@browser.config.before_request[:delay] = delay
|
|
192
|
+
logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.delay'
|
|
193
|
+
end
|
|
181
194
|
|
|
182
|
-
|
|
183
|
-
|
|
184
|
-
|
|
185
|
-
|
|
186
|
-
|
|
195
|
+
# encoding
|
|
196
|
+
if (encoding = @config[:encoding])
|
|
197
|
+
@browser.config.encoding = encoding
|
|
198
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled encoding: #{encoding}"
|
|
199
|
+
end
|
|
187
200
|
|
|
188
|
-
|
|
189
|
-
|
|
190
|
-
@browser.config.encoding = encoding
|
|
191
|
-
logger.debug "BrowserBuilder (selenium_chrome): enabled encoding: #{encoding}"
|
|
201
|
+
# return Capybara session instance
|
|
202
|
+
@browser
|
|
192
203
|
end
|
|
193
|
-
|
|
194
|
-
# return Capybara session instance
|
|
195
|
-
@browser
|
|
196
204
|
end
|
|
197
205
|
end
|
|
198
206
|
end
|