kimurai 1.4.0 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (47) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -0
  3. data/CHANGELOG.md +21 -0
  4. data/Gemfile +2 -2
  5. data/README.md +476 -648
  6. data/Rakefile +6 -6
  7. data/bin/console +3 -4
  8. data/exe/kimurai +0 -1
  9. data/kimurai.gemspec +38 -37
  10. data/lib/kimurai/base/saver.rb +15 -19
  11. data/lib/kimurai/base/storage.rb +1 -1
  12. data/lib/kimurai/base.rb +38 -38
  13. data/lib/kimurai/base_helper.rb +5 -4
  14. data/lib/kimurai/browser_builder/mechanize_builder.rb +121 -119
  15. data/lib/kimurai/browser_builder/selenium_chrome_builder.rb +160 -152
  16. data/lib/kimurai/browser_builder/selenium_firefox_builder.rb +162 -160
  17. data/lib/kimurai/browser_builder.rb +1 -7
  18. data/lib/kimurai/capybara_configuration.rb +1 -1
  19. data/lib/kimurai/capybara_ext/driver/base.rb +50 -46
  20. data/lib/kimurai/capybara_ext/mechanize/driver.rb +51 -50
  21. data/lib/kimurai/capybara_ext/selenium/driver.rb +33 -29
  22. data/lib/kimurai/capybara_ext/session.rb +31 -38
  23. data/lib/kimurai/cli/generator.rb +15 -15
  24. data/lib/kimurai/cli.rb +49 -86
  25. data/lib/kimurai/core_ext/array.rb +2 -2
  26. data/lib/kimurai/core_ext/hash.rb +1 -1
  27. data/lib/kimurai/core_ext/numeric.rb +4 -4
  28. data/lib/kimurai/pipeline.rb +2 -1
  29. data/lib/kimurai/runner.rb +6 -6
  30. data/lib/kimurai/template/Gemfile +2 -2
  31. data/lib/kimurai/template/config/boot.rb +4 -4
  32. data/lib/kimurai/template/config/schedule.rb +15 -15
  33. data/lib/kimurai/template/spiders/application_spider.rb +8 -14
  34. data/lib/kimurai/version.rb +1 -1
  35. data/lib/kimurai.rb +7 -3
  36. metadata +58 -65
  37. data/.travis.yml +0 -5
  38. data/lib/kimurai/automation/deploy.yml +0 -54
  39. data/lib/kimurai/automation/setup/chromium_chromedriver.yml +0 -26
  40. data/lib/kimurai/automation/setup/firefox_geckodriver.yml +0 -20
  41. data/lib/kimurai/automation/setup/phantomjs.yml +0 -33
  42. data/lib/kimurai/automation/setup/ruby_environment.yml +0 -124
  43. data/lib/kimurai/automation/setup.yml +0 -44
  44. data/lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb +0 -175
  45. data/lib/kimurai/capybara_ext/poltergeist/driver.rb +0 -13
  46. data/lib/kimurai/cli/ansible_command_builder.rb +0 -71
  47. data/lib/kimurai/template/config/automation.yml +0 -13
@@ -4,151 +4,153 @@ require_relative '../capybara_configuration'
4
4
  require_relative '../capybara_ext/mechanize/driver'
5
5
  require_relative '../capybara_ext/session'
6
6
 
7
- module Kimurai::BrowserBuilder
8
- class MechanizeBuilder
9
- attr_reader :logger, :spider
10
-
11
- def initialize(config, spider:)
12
- @config = config
13
- @spider = spider
14
- @logger = spider.logger
15
- end
16
-
17
- def build
18
- # Register driver
19
- Capybara.register_driver :mechanize do |app|
20
- driver = Capybara::Mechanize::Driver.new("app")
21
- # keep the history as small as possible (by default it's unlimited)
22
- driver.configure { |a| a.history.max_size = 2 }
23
- driver
24
- end
7
+ module Kimurai
8
+ module BrowserBuilder
9
+ class MechanizeBuilder
10
+ attr_reader :logger, :spider
11
+
12
+ def initialize(config, spider:)
13
+ @config = config
14
+ @spider = spider
15
+ @logger = spider.logger
16
+ end
17
+
18
+ def build
19
+ # Register driver
20
+ Capybara.register_driver :mechanize do |_app|
21
+ driver = Capybara::Mechanize::Driver.new('app')
22
+ # keep the history as small as possible (by default it's unlimited)
23
+ driver.configure { |a| a.history.max_size = 2 }
24
+ driver
25
+ end
25
26
 
26
- # Create browser instance (Capybara session)
27
- @browser = Capybara::Session.new(:mechanize)
28
- @browser.spider = spider
29
- logger.debug "BrowserBuilder (mechanize): created browser instance"
27
+ # Create browser instance (Capybara session)
28
+ @browser = Capybara::Session.new(:mechanize)
29
+ @browser.spider = spider
30
+ logger.debug 'BrowserBuilder (mechanize): created browser instance'
30
31
 
31
- if @config[:extensions].present?
32
- logger.error "BrowserBuilder (mechanize): `extensions` option not supported, skipped"
33
- end
32
+ if @config[:extensions].present?
33
+ logger.error 'BrowserBuilder (mechanize): `extensions` option not supported, skipped'
34
+ end
34
35
 
35
- # Proxy
36
- if proxy = @config[:proxy].presence
37
- proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
38
- ip, port, type = proxy_string.split(":")
36
+ # Proxy
37
+ if (proxy = @config[:proxy].presence)
38
+ proxy_string = (proxy.instance_of?(Proc) ? proxy.call : proxy).strip
39
+ ip, port, type = proxy_string.split(':')
40
+
41
+ if type == 'http'
42
+ @browser.driver.set_proxy(*proxy_string.split(':'))
43
+ logger.debug "BrowserBuilder (mechanize): enabled http proxy, ip: #{ip}, port: #{port}"
44
+ else
45
+ logger.error "BrowserBuilder (mechanize): can't set #{type} proxy (not supported), skipped"
46
+ end
47
+ end
39
48
 
40
- if type == "http"
41
- @browser.driver.set_proxy(*proxy_string.split(":"))
42
- logger.debug "BrowserBuilder (mechanize): enabled http proxy, ip: #{ip}, port: #{port}"
43
- else
44
- logger.error "BrowserBuilder (mechanize): can't set #{type} proxy (not supported), skipped"
49
+ # SSL
50
+ if (ssl_cert_path = @config[:ssl_cert_path].presence)
51
+ @browser.driver.browser.agent.http.ca_file = ssl_cert_path
52
+ logger.debug 'BrowserBuilder (mechanize): enabled custom ssl_cert'
45
53
  end
46
- end
47
54
 
48
- # SSL
49
- if ssl_cert_path = @config[:ssl_cert_path].presence
50
- @browser.driver.browser.agent.http.ca_file = ssl_cert_path
51
- logger.debug "BrowserBuilder (mechanize): enabled custom ssl_cert"
52
- end
55
+ if @config[:ignore_ssl_errors].present?
56
+ @browser.driver.browser.agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
57
+ logger.debug 'BrowserBuilder (mechanize): enabled ignore_ssl_errors'
58
+ end
53
59
 
54
- if @config[:ignore_ssl_errors].present?
55
- @browser.driver.browser.agent.verify_mode = OpenSSL::SSL::VERIFY_NONE
56
- logger.debug "BrowserBuilder (mechanize): enabled ignore_ssl_errors"
57
- end
60
+ # Headers
61
+ if (headers = @config[:headers].presence)
62
+ @browser.driver.headers = headers
63
+ logger.debug 'BrowserBuilder (mechanize): enabled custom headers'
64
+ end
58
65
 
59
- # Headers
60
- if headers = @config[:headers].presence
61
- @browser.driver.headers = headers
62
- logger.debug "BrowserBuilder (mechanize): enabled custom headers"
63
- end
66
+ if (user_agent = @config[:user_agent].presence)
67
+ user_agent_string = (user_agent.instance_of?(Proc) ? user_agent.call : user_agent).strip
64
68
 
65
- if user_agent = @config[:user_agent].presence
66
- user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
69
+ @browser.driver.add_header('User-Agent', user_agent_string)
70
+ logger.debug 'BrowserBuilder (mechanize): enabled custom user_agent'
71
+ end
67
72
 
68
- @browser.driver.add_header("User-Agent", user_agent_string)
69
- logger.debug "BrowserBuilder (mechanize): enabled custom user_agent"
70
- end
73
+ # Cookies
74
+ if (cookies = @config[:cookies].presence)
75
+ cookies.each do |cookie|
76
+ @browser.driver.set_cookie(cookie[:name], cookie[:value], cookie)
77
+ end
71
78
 
72
- # Cookies
73
- if cookies = @config[:cookies].presence
74
- cookies.each do |cookie|
75
- @browser.driver.set_cookie(cookie[:name], cookie[:value], cookie)
79
+ logger.debug 'BrowserBuilder (mechanize): enabled custom cookies'
76
80
  end
77
81
 
78
- logger.debug "BrowserBuilder (mechanize): enabled custom cookies"
79
- end
82
+ # Browser instance options
83
+ # skip_request_errors
84
+ if (skip_errors = @config[:skip_request_errors].presence)
85
+ @browser.config.skip_request_errors = skip_errors
86
+ logger.debug 'BrowserBuilder (mechanize): enabled skip_request_errors'
87
+ end
80
88
 
81
- # Browser instance options
82
- # skip_request_errors
83
- if skip_errors = @config[:skip_request_errors].presence
84
- @browser.config.skip_request_errors = skip_errors
85
- logger.debug "BrowserBuilder (mechanize): enabled skip_request_errors"
86
- end
89
+ # retry_request_errors
90
+ if (retry_errors = @config[:retry_request_errors].presence)
91
+ @browser.config.retry_request_errors = retry_errors
92
+ logger.debug 'BrowserBuilder (mechanize): enabled retry_request_errors'
93
+ end
87
94
 
88
- # retry_request_errors
89
- if retry_errors = @config[:retry_request_errors].presence
90
- @browser.config.retry_request_errors = retry_errors
91
- logger.debug "BrowserBuilder (mechanize): enabled retry_request_errors"
92
- end
95
+ # restart_if
96
+ if @config[:restart_if].present?
97
+ logger.warn 'BrowserBuilder (mechanize): restart_if options not supported by Mechanize, skipped'
98
+ end
93
99
 
94
- # restart_if
95
- if @config[:restart_if].present?
96
- logger.warn "BrowserBuilder (mechanize): restart_if options not supported by Mechanize, skipped"
97
- end
100
+ # before_request clear_cookies
101
+ if @config.dig(:before_request, :clear_cookies)
102
+ @browser.config.before_request[:clear_cookies] = true
103
+ logger.debug 'BrowserBuilder (mechanize): enabled before_request.clear_cookies'
104
+ end
98
105
 
99
- # before_request clear_cookies
100
- if @config.dig(:before_request, :clear_cookies)
101
- @browser.config.before_request[:clear_cookies] = true
102
- logger.debug "BrowserBuilder (mechanize): enabled before_request.clear_cookies"
103
- end
106
+ # before_request clear_and_set_cookies
107
+ if @config.dig(:before_request, :clear_and_set_cookies)
108
+ if (cookies = @config[:cookies].presence)
109
+ @browser.config.cookies = cookies
110
+ @browser.config.before_request[:clear_and_set_cookies] = true
111
+ logger.debug 'BrowserBuilder (mechanize): enabled before_request.clear_and_set_cookies'
112
+ else
113
+ logger.error 'BrowserBuilder (mechanize): cookies should be present to enable before_request.clear_and_set_cookies, skipped'
114
+ end
115
+ end
104
116
 
105
- # before_request clear_and_set_cookies
106
- if @config.dig(:before_request, :clear_and_set_cookies)
107
- if cookies = @config[:cookies].presence
108
- @browser.config.cookies = cookies
109
- @browser.config.before_request[:clear_and_set_cookies] = true
110
- logger.debug "BrowserBuilder (mechanize): enabled before_request.clear_and_set_cookies"
111
- else
112
- logger.error "BrowserBuilder (mechanize): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
117
+ # before_request change_user_agent
118
+ if @config.dig(:before_request, :change_user_agent)
119
+ if @config[:user_agent].present? && @config[:user_agent].instance_of?(Proc)
120
+ @browser.config.user_agent = @config[:user_agent]
121
+ @browser.config.before_request[:change_user_agent] = true
122
+ logger.debug 'BrowserBuilder (mechanize): enabled before_request.change_user_agent'
123
+ else
124
+ logger.error 'BrowserBuilder (mechanize): user_agent should be present and has lambda format to enable before_request.change_user_agent, skipped'
125
+ end
113
126
  end
114
- end
115
127
 
116
- # before_request change_user_agent
117
- if @config.dig(:before_request, :change_user_agent)
118
- if @config[:user_agent].present? && @config[:user_agent].class == Proc
119
- @browser.config.user_agent = @config[:user_agent]
120
- @browser.config.before_request[:change_user_agent] = true
121
- logger.debug "BrowserBuilder (mechanize): enabled before_request.change_user_agent"
122
- else
123
- logger.error "BrowserBuilder (mechanize): user_agent should be present and has lambda format to enable before_request.change_user_agent, skipped"
128
+ # before_request change_proxy
129
+ if @config.dig(:before_request, :change_proxy)
130
+ if @config[:proxy].present? && @config[:proxy].instance_of?(Proc)
131
+ @browser.config.proxy = @config[:proxy]
132
+ @browser.config.before_request[:change_proxy] = true
133
+ logger.debug 'BrowserBuilder (mechanize): enabled before_request.change_proxy'
134
+ else
135
+ logger.error 'BrowserBuilder (mechanize): proxy should be present and has lambda format to enable before_request.change_proxy, skipped'
136
+ end
124
137
  end
125
- end
126
138
 
127
- # before_request change_proxy
128
- if @config.dig(:before_request, :change_proxy)
129
- if @config[:proxy].present? && @config[:proxy].class == Proc
130
- @browser.config.proxy = @config[:proxy]
131
- @browser.config.before_request[:change_proxy] = true
132
- logger.debug "BrowserBuilder (mechanize): enabled before_request.change_proxy"
133
- else
134
- logger.error "BrowserBuilder (mechanize): proxy should be present and has lambda format to enable before_request.change_proxy, skipped"
139
+ # before_request delay
140
+ if (delay = @config.dig(:before_request, :delay).presence)
141
+ @browser.config.before_request[:delay] = delay
142
+ logger.debug 'BrowserBuilder (mechanize): enabled before_request.delay'
135
143
  end
136
- end
137
144
 
138
- # before_request delay
139
- if delay = @config.dig(:before_request, :delay).presence
140
- @browser.config.before_request[:delay] = delay
141
- logger.debug "BrowserBuilder (mechanize): enabled before_request.delay"
142
- end
145
+ # encoding
146
+ if (encoding = @config[:encoding])
147
+ @browser.config.encoding = encoding
148
+ logger.debug "BrowserBuilder (mechanize): enabled encoding: #{encoding}"
149
+ end
143
150
 
144
- # encoding
145
- if encoding = @config[:encoding]
146
- @browser.config.encoding = encoding
147
- logger.debug "BrowserBuilder (mechanize): enabled encoding: #{encoding}"
151
+ # return Capybara session instance
152
+ @browser
148
153
  end
149
-
150
- # return Capybara session instance
151
- @browser
152
154
  end
153
155
  end
154
156
  end
@@ -4,195 +4,203 @@ require_relative '../capybara_configuration'
4
4
  require_relative '../capybara_ext/selenium/driver'
5
5
  require_relative '../capybara_ext/session'
6
6
 
7
- module Kimurai::BrowserBuilder
8
- class SeleniumChromeBuilder
9
- class << self
10
- attr_accessor :virtual_display
11
- end
12
-
13
- attr_reader :logger, :spider
14
-
15
- def initialize(config, spider:)
16
- @config = config
17
- @spider = spider
18
- @logger = spider.logger
19
- end
20
-
21
- def build
22
- # Register driver
23
- Capybara.register_driver :selenium_chrome do |app|
24
- # Create driver options
25
- opts = { args: %w[--disable-gpu --no-sandbox --disable-translate] }
7
+ module Kimurai
8
+ module BrowserBuilder
9
+ class SeleniumChromeBuilder
10
+ class << self
11
+ attr_accessor :virtual_display
12
+ end
26
13
 
27
- # Provide custom chrome browser path:
28
- if chrome_path = Kimurai.configuration.selenium_chrome_path
29
- opts.merge!(binary: chrome_path)
30
- end
14
+ attr_reader :logger, :spider
31
15
 
32
- # See all options here: https://seleniumhq.github.io/selenium/docs/api/rb/Selenium/WebDriver/Chrome/Options.html
33
- driver_options = Selenium::WebDriver::Chrome::Options.new(opts)
16
+ def initialize(config, spider:)
17
+ @config = config
18
+ @spider = spider
19
+ @logger = spider.logger
20
+ end
34
21
 
35
- # Window size
36
- if size = @config[:window_size].presence
37
- driver_options.args << "--window-size=#{size.join(',')}"
38
- logger.debug "BrowserBuilder (selenium_chrome): enabled window_size"
39
- end
22
+ def build
23
+ # Register driver
24
+ Capybara.register_driver :selenium_chrome do |app|
25
+ # Create driver options
26
+ # See all options here: https://seleniumhq.github.io/selenium/docs/api/rb/Selenium/WebDriver/Chrome/Options.html
27
+ driver_options = Selenium::WebDriver::Chrome::Options.new
28
+ driver_options.args += %w[--disable-gpu --no-sandbox --disable-translate
29
+ --disable-blink-features=AutomationControlled]
30
+
31
+ # Provide custom chrome browser path:
32
+ if (chrome_path = Kimurai.configuration.selenium_chrome_path)
33
+ driver_options.binary = chrome_path
34
+ end
40
35
 
41
- # Proxy
42
- if proxy = @config[:proxy].presence
43
- proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
44
- ip, port, type, user, password = proxy_string.split(":")
36
+ # Window size
37
+ if (size = @config[:window_size].presence)
38
+ driver_options.args << "--window-size=#{size.join(',')}"
39
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled window_size'
40
+ end
45
41
 
46
- if %w(http socks5).include?(type)
47
- if user.nil? && password.nil?
48
- driver_options.args << "--proxy-server=#{type}://#{ip}:#{port}"
49
- logger.debug "BrowserBuilder (selenium_chrome): enabled #{type} proxy, ip: #{ip}, port: #{port}"
42
+ # Proxy
43
+ if (proxy = @config[:proxy].presence)
44
+ proxy_string = (proxy.instance_of?(Proc) ? proxy.call : proxy).strip
45
+ ip, port, type, user, password = proxy_string.split(':')
46
+
47
+ if %w[http socks5].include?(type)
48
+ if user.nil? && password.nil?
49
+ driver_options.args << "--proxy-server=#{type}://#{ip}:#{port}"
50
+ logger.debug "BrowserBuilder (selenium_chrome): enabled #{type} proxy, ip: #{ip}, port: #{port}"
51
+ else
52
+ logger.error "BrowserBuilder (selenium_chrome): proxy with authentication doesn't supported by selenium, skipped"
53
+ end
50
54
  else
51
- logger.error "BrowserBuilder (selenium_chrome): proxy with authentication doesn't supported by selenium, skipped"
55
+ logger.error "BrowserBuilder (selenium_chrome): wrong type of proxy: #{type}, skipped"
52
56
  end
53
- else
54
- logger.error "BrowserBuilder (selenium_chrome): wrong type of proxy: #{type}, skipped"
55
57
  end
56
- end
57
58
 
58
- if proxy_bypass_list = @config[:proxy_bypass_list].presence
59
- if proxy
60
- driver_options.args << "--proxy-bypass-list=#{proxy_bypass_list.join(';')}"
61
- logger.debug "BrowserBuilder (selenium_chrome): enabled proxy_bypass_list"
62
- else
63
- logger.error "BrowserBuilder (selenium_chrome): provide `proxy` to set proxy_bypass_list, skipped"
59
+ if (proxy_bypass_list = @config[:proxy_bypass_list].presence)
60
+ if proxy
61
+ driver_options.args << "--proxy-bypass-list=#{proxy_bypass_list.join(';')}"
62
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled proxy_bypass_list'
63
+ else
64
+ logger.error 'BrowserBuilder (selenium_chrome): provide `proxy` to set proxy_bypass_list, skipped'
65
+ end
64
66
  end
65
- end
66
67
 
67
- # SSL
68
- if @config[:ignore_ssl_errors].present?
69
- driver_options.args << "--ignore-certificate-errors"
70
- driver_options.args << "--allow-insecure-localhost"
71
- logger.debug "BrowserBuilder (selenium_chrome): enabled ignore_ssl_errors"
72
- end
68
+ # SSL
69
+ if @config[:ignore_ssl_errors].present?
70
+ driver_options.args << '--ignore-certificate-errors'
71
+ driver_options.args << '--allow-insecure-localhost'
72
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled ignore_ssl_errors'
73
+ end
73
74
 
74
- # Disable images
75
- if @config[:disable_images].present?
76
- driver_options.prefs["profile.managed_default_content_settings.images"] = 2
77
- logger.debug "BrowserBuilder (selenium_chrome): enabled disable_images"
78
- end
75
+ # Disable images
76
+ if @config[:disable_images].present?
77
+ driver_options.prefs['profile.managed_default_content_settings.images'] = 2
78
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled disable_images'
79
+ end
79
80
 
80
- # Headers
81
- if @config[:headers].present?
82
- logger.warn "BrowserBuilder: (selenium_chrome): custom headers doesn't supported by selenium, skipped"
83
- end
81
+ # Headers
82
+ if @config[:headers].present?
83
+ logger.warn "BrowserBuilder: (selenium_chrome): custom headers doesn't supported by selenium, skipped"
84
+ end
84
85
 
85
- if user_agent = @config[:user_agent].presence
86
- user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
87
- driver_options.args << "--user-agent='#{user_agent_string}'"
88
- logger.debug "BrowserBuilder (selenium_chrome): enabled custom user_agent"
89
- end
86
+ if (user_agent = @config[:user_agent].presence)
87
+ user_agent_string = (user_agent.instance_of?(Proc) ? user_agent.call : user_agent).strip
88
+ driver_options.args << "--user-agent='#{user_agent_string}'"
89
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled custom user_agent'
90
+ end
90
91
 
91
- # Headless mode
92
- if ENV["HEADLESS"] != "false"
93
- if @config[:headless_mode] == :virtual_display
94
- if Gem::Platform.local.os == "linux"
95
- unless self.class.virtual_display
96
- require 'headless'
97
- self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false)
98
- self.class.virtual_display.start
92
+ # Headless mode
93
+ if ENV['HEADLESS'] != 'false'
94
+ if @config[:headless_mode] == :virtual_display
95
+ if Gem::Platform.local.os == 'linux'
96
+ unless self.class.virtual_display
97
+ require 'headless'
98
+ self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false)
99
+ self.class.virtual_display.start
100
+ end
101
+
102
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled virtual_display headless_mode'
103
+ else
104
+ logger.error 'BrowserBuilder (selenium_chrome): virtual_display headless_mode works only ' \
105
+ 'on Linux platform. Browser will run in normal mode. Set `native` mode instead.'
99
106
  end
100
-
101
- logger.debug "BrowserBuilder (selenium_chrome): enabled virtual_display headless_mode"
102
107
  else
103
- logger.error "BrowserBuilder (selenium_chrome): virtual_display headless_mode works only " \
104
- "on Linux platform. Browser will run in normal mode. Set `native` mode instead."
108
+ driver_options.args << '--headless'
109
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled native headless_mode'
105
110
  end
111
+ end
112
+
113
+ # Use Selenium Manager by default (auto-downloads driver), or custom path if configured
114
+ if (chromedriver_path = Kimurai.configuration.chromedriver_path)
115
+ service = Selenium::WebDriver::Service.chrome(path: chromedriver_path)
116
+ Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options, service: service)
106
117
  else
107
- driver_options.args << "--headless"
108
- logger.debug "BrowserBuilder (selenium_chrome): enabled native headless_mode"
118
+ # Let Selenium Manager handle driver automatically
119
+ Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options)
109
120
  end
110
121
  end
111
122
 
112
- chromedriver_path = Kimurai.configuration.chromedriver_path || "/usr/local/bin/chromedriver"
113
- Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options, driver_path: chromedriver_path)
114
- end
123
+ # Create browser instance (Capybara session)
124
+ @browser = Capybara::Session.new(:selenium_chrome)
125
+ @browser.spider = spider
126
+ logger.debug 'BrowserBuilder (selenium_chrome): created browser instance'
115
127
 
116
- # Create browser instance (Capybara session)
117
- @browser = Capybara::Session.new(:selenium_chrome)
118
- @browser.spider = spider
119
- logger.debug "BrowserBuilder (selenium_chrome): created browser instance"
128
+ if @config[:extensions].present?
129
+ logger.error 'BrowserBuilder (selenium_chrome): `extensions` option not supported by Selenium, skipped'
130
+ end
120
131
 
121
- if @config[:extensions].present?
122
- logger.error "BrowserBuilder (selenium_chrome): `extensions` option not supported by Selenium, skipped"
123
- end
132
+ # Cookies
133
+ if (cookies = @config[:cookies].presence)
134
+ @browser.config.cookies = cookies
135
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled custom cookies'
136
+ end
124
137
 
125
- # Cookies
126
- if cookies = @config[:cookies].presence
127
- @browser.config.cookies = cookies
128
- logger.debug "BrowserBuilder (selenium_chrome): enabled custom cookies"
129
- end
138
+ # Browser instance options
139
+ # skip_request_errors
140
+ if (skip_errors = @config[:skip_request_errors].presence)
141
+ @browser.config.skip_request_errors = skip_errors
142
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled skip_request_errors'
143
+ end
130
144
 
131
- # Browser instance options
132
- # skip_request_errors
133
- if skip_errors = @config[:skip_request_errors].presence
134
- @browser.config.skip_request_errors = skip_errors
135
- logger.debug "BrowserBuilder (selenium_chrome): enabled skip_request_errors"
136
- end
145
+ # retry_request_errors
146
+ if (retry_errors = @config[:retry_request_errors].presence)
147
+ @browser.config.retry_request_errors = retry_errors
148
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled retry_request_errors'
149
+ end
137
150
 
138
- # retry_request_errors
139
- if retry_errors = @config[:retry_request_errors].presence
140
- @browser.config.retry_request_errors = retry_errors
141
- logger.debug "BrowserBuilder (selenium_chrome): enabled retry_request_errors"
142
- end
151
+ # restart_if
152
+ if (requests_limit = @config.dig(:restart_if, :requests_limit).presence)
153
+ @browser.config.restart_if[:requests_limit] = requests_limit
154
+ logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.requests_limit >= #{requests_limit}"
155
+ end
143
156
 
144
- # restart_if
145
- if requests_limit = @config.dig(:restart_if, :requests_limit).presence
146
- @browser.config.restart_if[:requests_limit] = requests_limit
147
- logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.requests_limit >= #{requests_limit}"
148
- end
157
+ if (memory_limit = @config.dig(:restart_if, :memory_limit).presence)
158
+ @browser.config.restart_if[:memory_limit] = memory_limit
159
+ logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.memory_limit >= #{memory_limit}"
160
+ end
149
161
 
150
- if memory_limit = @config.dig(:restart_if, :memory_limit).presence
151
- @browser.config.restart_if[:memory_limit] = memory_limit
152
- logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.memory_limit >= #{memory_limit}"
153
- end
162
+ # before_request clear_cookies
163
+ if @config.dig(:before_request, :clear_cookies)
164
+ @browser.config.before_request[:clear_cookies] = true
165
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.clear_cookies'
166
+ end
154
167
 
155
- # before_request clear_cookies
156
- if @config.dig(:before_request, :clear_cookies)
157
- @browser.config.before_request[:clear_cookies] = true
158
- logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.clear_cookies"
159
- end
168
+ # before_request clear_and_set_cookies
169
+ if @config.dig(:before_request, :clear_and_set_cookies)
170
+ if (cookies = @config[:cookies].presence)
171
+ @browser.config.cookies = cookies
172
+ @browser.config.before_request[:clear_and_set_cookies] = true
173
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.clear_and_set_cookies'
174
+ else
175
+ logger.error 'BrowserBuilder (selenium_chrome): cookies should be present to enable before_request.clear_and_set_cookies, skipped'
176
+ end
177
+ end
160
178
 
161
- # before_request clear_and_set_cookies
162
- if @config.dig(:before_request, :clear_and_set_cookies)
163
- if cookies = @config[:cookies].presence
164
- @browser.config.cookies = cookies
165
- @browser.config.before_request[:clear_and_set_cookies] = true
166
- logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.clear_and_set_cookies"
167
- else
168
- logger.error "BrowserBuilder (selenium_chrome): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
179
+ # before_request change_user_agent
180
+ if @config.dig(:before_request, :change_user_agent)
181
+ logger.error 'BrowserBuilder (selenium_chrome): before_request.change_user_agent option not supported by Selenium, skipped'
169
182
  end
170
- end
171
183
 
172
- # before_request change_user_agent
173
- if @config.dig(:before_request, :change_user_agent)
174
- logger.error "BrowserBuilder (selenium_chrome): before_request.change_user_agent option not supported by Selenium, skipped"
175
- end
184
+ # before_request change_proxy
185
+ if @config.dig(:before_request, :change_proxy)
186
+ logger.error 'BrowserBuilder (selenium_chrome): before_request.change_proxy option not supported by Selenium, skipped'
187
+ end
176
188
 
177
- # before_request change_proxy
178
- if @config.dig(:before_request, :change_proxy)
179
- logger.error "BrowserBuilder (selenium_chrome): before_request.change_proxy option not supported by Selenium, skipped"
180
- end
189
+ # before_request delay
190
+ if (delay = @config.dig(:before_request, :delay).presence)
191
+ @browser.config.before_request[:delay] = delay
192
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.delay'
193
+ end
181
194
 
182
- # before_request delay
183
- if delay = @config.dig(:before_request, :delay).presence
184
- @browser.config.before_request[:delay] = delay
185
- logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.delay"
186
- end
195
+ # encoding
196
+ if (encoding = @config[:encoding])
197
+ @browser.config.encoding = encoding
198
+ logger.debug "BrowserBuilder (selenium_chrome): enabled encoding: #{encoding}"
199
+ end
187
200
 
188
- # encoding
189
- if encoding = @config[:encoding]
190
- @browser.config.encoding = encoding
191
- logger.debug "BrowserBuilder (selenium_chrome): enabled encoding: #{encoding}"
201
+ # return Capybara session instance
202
+ @browser
192
203
  end
193
-
194
- # return Capybara session instance
195
- @browser
196
204
  end
197
205
  end
198
206
  end