kimurai 1.0.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +7 -0
- data/.gitignore +11 -0
- data/.travis.yml +5 -0
- data/CODE_OF_CONDUCT.md +74 -0
- data/Gemfile +6 -0
- data/LICENSE.txt +21 -0
- data/README.md +1923 -0
- data/Rakefile +10 -0
- data/bin/console +14 -0
- data/bin/setup +8 -0
- data/exe/kimurai +6 -0
- data/kimurai.gemspec +48 -0
- data/lib/kimurai.rb +53 -0
- data/lib/kimurai/automation/deploy.yml +54 -0
- data/lib/kimurai/automation/setup.yml +44 -0
- data/lib/kimurai/automation/setup/chromium_chromedriver.yml +26 -0
- data/lib/kimurai/automation/setup/firefox_geckodriver.yml +20 -0
- data/lib/kimurai/automation/setup/phantomjs.yml +33 -0
- data/lib/kimurai/automation/setup/ruby_environment.yml +124 -0
- data/lib/kimurai/base.rb +249 -0
- data/lib/kimurai/base/simple_saver.rb +98 -0
- data/lib/kimurai/base/uniq_checker.rb +22 -0
- data/lib/kimurai/base_helper.rb +22 -0
- data/lib/kimurai/browser_builder.rb +32 -0
- data/lib/kimurai/browser_builder/mechanize_builder.rb +140 -0
- data/lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb +156 -0
- data/lib/kimurai/browser_builder/selenium_chrome_builder.rb +178 -0
- data/lib/kimurai/browser_builder/selenium_firefox_builder.rb +185 -0
- data/lib/kimurai/capybara_configuration.rb +10 -0
- data/lib/kimurai/capybara_ext/driver/base.rb +62 -0
- data/lib/kimurai/capybara_ext/mechanize/driver.rb +55 -0
- data/lib/kimurai/capybara_ext/poltergeist/driver.rb +13 -0
- data/lib/kimurai/capybara_ext/selenium/driver.rb +24 -0
- data/lib/kimurai/capybara_ext/session.rb +150 -0
- data/lib/kimurai/capybara_ext/session/config.rb +18 -0
- data/lib/kimurai/cli.rb +157 -0
- data/lib/kimurai/cli/ansible_command_builder.rb +71 -0
- data/lib/kimurai/cli/generator.rb +57 -0
- data/lib/kimurai/core_ext/array.rb +14 -0
- data/lib/kimurai/core_ext/numeric.rb +19 -0
- data/lib/kimurai/core_ext/string.rb +7 -0
- data/lib/kimurai/pipeline.rb +25 -0
- data/lib/kimurai/runner.rb +72 -0
- data/lib/kimurai/template/.gitignore +18 -0
- data/lib/kimurai/template/.ruby-version +1 -0
- data/lib/kimurai/template/Gemfile +20 -0
- data/lib/kimurai/template/README.md +3 -0
- data/lib/kimurai/template/config/application.rb +32 -0
- data/lib/kimurai/template/config/automation.yml +13 -0
- data/lib/kimurai/template/config/boot.rb +22 -0
- data/lib/kimurai/template/config/initializers/.keep +0 -0
- data/lib/kimurai/template/config/schedule.rb +57 -0
- data/lib/kimurai/template/db/.keep +0 -0
- data/lib/kimurai/template/helpers/application_helper.rb +3 -0
- data/lib/kimurai/template/lib/.keep +0 -0
- data/lib/kimurai/template/log/.keep +0 -0
- data/lib/kimurai/template/pipelines/saver.rb +11 -0
- data/lib/kimurai/template/pipelines/validator.rb +24 -0
- data/lib/kimurai/template/spiders/application_spider.rb +104 -0
- data/lib/kimurai/template/tmp/.keep +0 -0
- data/lib/kimurai/version.rb +3 -0
- metadata +349 -0
@@ -0,0 +1,156 @@
|
|
1
|
+
require 'capybara'
|
2
|
+
require 'capybara/poltergeist'
|
3
|
+
require_relative '../capybara_configuration'
|
4
|
+
require_relative '../capybara_ext/poltergeist/driver'
|
5
|
+
require_relative '../capybara_ext/session'
|
6
|
+
|
7
|
+
module Kimurai
|
8
|
+
class BrowserBuilder
|
9
|
+
class PoltergeistPhantomJSBuilder
|
10
|
+
attr_reader :logger, :spider
|
11
|
+
|
12
|
+
def initialize(config, spider:)
|
13
|
+
@config = config
|
14
|
+
@spider = spider
|
15
|
+
@logger = spider.logger
|
16
|
+
end
|
17
|
+
|
18
|
+
def build
|
19
|
+
# Register driver
|
20
|
+
Capybara.register_driver :poltergeist_phantomjs do |app|
|
21
|
+
# Create driver options
|
22
|
+
driver_options = {
|
23
|
+
js_errors: false, debug: false, inspector: false, phantomjs_options: []
|
24
|
+
}
|
25
|
+
|
26
|
+
# Window size
|
27
|
+
if size = @config[:window_size].presence
|
28
|
+
driver_options[:window_size] = size
|
29
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled window_size"
|
30
|
+
end
|
31
|
+
|
32
|
+
# SSL
|
33
|
+
if ssl_cert_path = @config[:ssl_cert_path].presence
|
34
|
+
driver_options[:phantomjs_options] << "--ssl-certificates-path=#{ssl_cert_path}"
|
35
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom ssl_cert"
|
36
|
+
end
|
37
|
+
|
38
|
+
if @config[:ignore_ssl_errors].present?
|
39
|
+
driver_options[:phantomjs_options].push("--ignore-ssl-errors=yes", "--ssl-protocol=any")
|
40
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled ignore_ssl_errors"
|
41
|
+
end
|
42
|
+
|
43
|
+
# Disable images
|
44
|
+
if @config[:disable_images].present?
|
45
|
+
driver_options[:phantomjs_options] << "--load-images=no"
|
46
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled disable_images"
|
47
|
+
end
|
48
|
+
|
49
|
+
Capybara::Poltergeist::Driver.new(app, driver_options)
|
50
|
+
end
|
51
|
+
|
52
|
+
# Create browser instance (Capybara session)
|
53
|
+
@browser = Capybara::Session.new(:poltergeist_phantomjs)
|
54
|
+
@browser.spider = spider
|
55
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): created browser instance"
|
56
|
+
|
57
|
+
# Proxy
|
58
|
+
if proxy = @config[:proxy].presence
|
59
|
+
proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
|
60
|
+
ip, port, type = proxy_string.split(":")
|
61
|
+
|
62
|
+
@browser.set_proxy(*proxy_string.split(":"))
|
63
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled #{type} proxy, ip: #{ip}, port: #{port}"
|
64
|
+
end
|
65
|
+
|
66
|
+
# Headers
|
67
|
+
if headers = @config[:headers].presence
|
68
|
+
@browser.driver.headers = headers
|
69
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom headers"
|
70
|
+
end
|
71
|
+
|
72
|
+
if user_agent = @config[:user_agent].presence
|
73
|
+
user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
|
74
|
+
|
75
|
+
@browser.driver.add_header("User-Agent", user_agent_string)
|
76
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom user-agent"
|
77
|
+
end
|
78
|
+
|
79
|
+
# Cookies
|
80
|
+
if cookies = @config[:cookies].presence
|
81
|
+
cookies.each do |cookie|
|
82
|
+
@browser.driver.set_cookie(cookie[:name], cookie[:value], cookie)
|
83
|
+
end
|
84
|
+
|
85
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled custom cookies"
|
86
|
+
end
|
87
|
+
|
88
|
+
# Browser instance options
|
89
|
+
# retry_request_errors
|
90
|
+
if errors = @config.dig(:browser, :retry_request_errors).presence
|
91
|
+
@browser.config.retry_request_errors = errors
|
92
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser retry_request_errors`"
|
93
|
+
end
|
94
|
+
|
95
|
+
# restart_if
|
96
|
+
if requests_limit = @config.dig(:browser, :restart_if, :requests_limit).presence
|
97
|
+
@browser.config.restart_if[:requests_limit] = requests_limit
|
98
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser restart_if requests_limit` >= #{requests_limit}"
|
99
|
+
end
|
100
|
+
|
101
|
+
if memory_limit = @config.dig(:browser, :restart_if, :memory_limit).presence
|
102
|
+
@browser.config.restart_if[:memory_limit] = memory_limit
|
103
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser restart_if memory_limit` >= #{memory_limit}"
|
104
|
+
end
|
105
|
+
|
106
|
+
# before_request clear_cookies
|
107
|
+
if @config.dig(:browser, :before_request, :clear_cookies)
|
108
|
+
@browser.config.before_request[:clear_cookies] = true
|
109
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser before_request clear_cookies`"
|
110
|
+
end
|
111
|
+
|
112
|
+
# before_request clear_and_set_cookies
|
113
|
+
if @config.dig(:browser, :before_request, :clear_and_set_cookies)
|
114
|
+
if cookies = @config[:cookies].presence
|
115
|
+
@browser.config.cookies = cookies
|
116
|
+
@browser.config.before_request[:clear_and_set_cookies] = true
|
117
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser before_request clear_and_set_cookies`"
|
118
|
+
else
|
119
|
+
logger.error "BrowserBuilder (poltergeist_phantomjs): `cookies` should be present to enable `browser before_request clear_and_set_cookies`, skipped"
|
120
|
+
end
|
121
|
+
end
|
122
|
+
|
123
|
+
# before_request change_user_agent
|
124
|
+
if @config.dig(:browser, :before_request, :change_user_agent)
|
125
|
+
if @config[:user_agent].present? && @config[:user_agent].class == Proc
|
126
|
+
@browser.config.user_agent = @config[:user_agent]
|
127
|
+
@browser.config.before_request[:change_user_agent] = true
|
128
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser before_request change_user_agent`"
|
129
|
+
else
|
130
|
+
logger.error "BrowserBuilder (poltergeist_phantomjs): `user_agent` should be present and has lambda format to enable `browser before_request change_user_agent`, skipped"
|
131
|
+
end
|
132
|
+
end
|
133
|
+
|
134
|
+
# before_request change_proxy
|
135
|
+
if @config.dig(:browser, :before_request, :change_proxy)
|
136
|
+
if @config[:proxy].present? && @config[:proxy].class == Proc
|
137
|
+
@browser.config.proxy = @config[:proxy]
|
138
|
+
@browser.config.before_request[:change_proxy] = true
|
139
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser before_request change_proxy`"
|
140
|
+
else
|
141
|
+
logger.error "BrowserBuilder (poltergeist_phantomjs): `proxy` should be present and has lambda format to enable `browser before_request change_proxy`, skipped"
|
142
|
+
end
|
143
|
+
end
|
144
|
+
|
145
|
+
# before_request delay
|
146
|
+
if delay = @config.dig(:browser, :before_request, :delay).presence
|
147
|
+
@browser.config.before_request[:delay] = delay
|
148
|
+
logger.debug "BrowserBuilder (poltergeist_phantomjs): enabled `browser before_request delay`"
|
149
|
+
end
|
150
|
+
|
151
|
+
# return Capybara session instance
|
152
|
+
@browser
|
153
|
+
end
|
154
|
+
end
|
155
|
+
end
|
156
|
+
end
|
@@ -0,0 +1,178 @@
|
|
1
|
+
require 'capybara'
|
2
|
+
require 'selenium-webdriver'
|
3
|
+
require_relative '../capybara_configuration'
|
4
|
+
require_relative '../capybara_ext/selenium/driver'
|
5
|
+
require_relative '../capybara_ext/session'
|
6
|
+
|
7
|
+
module Kimurai
|
8
|
+
class BrowserBuilder
|
9
|
+
class SeleniumChromeBuilder
|
10
|
+
class << self
|
11
|
+
attr_accessor :virtual_display
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :logger, :spider
|
15
|
+
|
16
|
+
def initialize(config, spider:)
|
17
|
+
@config = config
|
18
|
+
@spider = spider
|
19
|
+
@logger = spider.logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def build
|
23
|
+
# Register driver
|
24
|
+
Capybara.register_driver :selenium_chrome do |app|
|
25
|
+
# Create driver options
|
26
|
+
default_args = %w[--disable-gpu --no-sandbox --disable-translate]
|
27
|
+
driver_options = Selenium::WebDriver::Chrome::Options.new(args: default_args)
|
28
|
+
|
29
|
+
# Window size
|
30
|
+
if size = @config[:window_size].presence
|
31
|
+
driver_options.args << "--window-size=#{size.join(',')}"
|
32
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled window_size"
|
33
|
+
end
|
34
|
+
|
35
|
+
# Proxy
|
36
|
+
if proxy = @config[:proxy].presence
|
37
|
+
proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
|
38
|
+
ip, port, type, user, password = proxy_string.split(":")
|
39
|
+
|
40
|
+
if user.nil? && password.nil?
|
41
|
+
driver_options.args << "--proxy-server=#{type}://#{ip}:#{port}"
|
42
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled #{type} proxy, ip: #{ip}, port: #{port}"
|
43
|
+
else
|
44
|
+
logger.error "BrowserBuilder (selenium_chrome): proxy with authentication doesn't supported by selenium, skipped"
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
48
|
+
if proxy_bypass_list = @config[:proxy_bypass_list].presence
|
49
|
+
if proxy
|
50
|
+
driver_options.args << "--proxy-bypass-list=#{proxy_bypass_list.join(';')}"
|
51
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled proxy_bypass_list"
|
52
|
+
else
|
53
|
+
logger.error "BrowserBuilder (selenium_chrome): provide `proxy` to set proxy_bypass_list, skipped"
|
54
|
+
end
|
55
|
+
end
|
56
|
+
|
57
|
+
# SSL
|
58
|
+
if @config[:ignore_ssl_errors].present?
|
59
|
+
driver_options.args << "--ignore-certificate-errors"
|
60
|
+
driver_options.args << "--allow-insecure-localhost"
|
61
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled ignore_ssl_errors"
|
62
|
+
end
|
63
|
+
|
64
|
+
# Disable images
|
65
|
+
if @config[:disable_images].present?
|
66
|
+
driver_options.prefs["profile.managed_default_content_settings.images"] = 2
|
67
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled disable_images"
|
68
|
+
end
|
69
|
+
|
70
|
+
# Headers
|
71
|
+
if @config[:headers].present?
|
72
|
+
logger.warn "BrowserBuilder: (selenium_chrome): custom headers doesn't supported by selenium, skipped"
|
73
|
+
end
|
74
|
+
|
75
|
+
if user_agent = @config[:user_agent].presence
|
76
|
+
user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
|
77
|
+
driver_options.args << "--user-agent='#{user_agent_string}'"
|
78
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled custom user-agent"
|
79
|
+
end
|
80
|
+
|
81
|
+
# Headless mode
|
82
|
+
if ENV["HEADLESS"] != "false"
|
83
|
+
if @config[:headless_mode] == :virtual_display
|
84
|
+
if Gem::Platform.local.os == "linux"
|
85
|
+
unless self.class.virtual_display
|
86
|
+
require 'headless'
|
87
|
+
self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false)
|
88
|
+
self.class.virtual_display.start
|
89
|
+
end
|
90
|
+
|
91
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled virtual_display headless_mode"
|
92
|
+
else
|
93
|
+
logger.error "BrowserBuilder (selenium_chrome): virtual_display headless_mode works only " \
|
94
|
+
"on Linux platform. Browser will run in normal mode. Set `native` mode instead."
|
95
|
+
end
|
96
|
+
else
|
97
|
+
driver_options.args << "--headless"
|
98
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled native headless_mode"
|
99
|
+
end
|
100
|
+
end
|
101
|
+
|
102
|
+
Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options, driver_path: "/usr/local/bin/chromedriver")
|
103
|
+
end
|
104
|
+
|
105
|
+
# Create browser instance (Capybara session)
|
106
|
+
@browser = Capybara::Session.new(:selenium_chrome)
|
107
|
+
@browser.spider = spider
|
108
|
+
logger.debug "BrowserBuilder (selenium_chrome): created browser instance"
|
109
|
+
|
110
|
+
# Window size
|
111
|
+
# if size = @config[:window_size].presence
|
112
|
+
# @browser.current_window.resize_to(*size)
|
113
|
+
# logger.debug "BrowserBuilder (selenium_chrome): enabled window_size"
|
114
|
+
# end
|
115
|
+
|
116
|
+
# Cookies
|
117
|
+
if cookies = @config[:cookies].presence
|
118
|
+
@browser.config.cookies = cookies
|
119
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled custom cookies"
|
120
|
+
end
|
121
|
+
|
122
|
+
# Browser instance options
|
123
|
+
# retry_request_errors
|
124
|
+
if errors = @config.dig(:browser, :retry_request_errors).presence
|
125
|
+
@browser.config.retry_request_errors = errors
|
126
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled `browser retry_request_errors`"
|
127
|
+
end
|
128
|
+
|
129
|
+
# restart_if
|
130
|
+
if requests_limit = @config.dig(:browser, :restart_if, :requests_limit).presence
|
131
|
+
@browser.config.restart_if[:requests_limit] = requests_limit
|
132
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled `browser restart_if requests_limit` >= #{requests_limit}"
|
133
|
+
end
|
134
|
+
|
135
|
+
if memory_limit = @config.dig(:browser, :restart_if, :memory_limit).presence
|
136
|
+
@browser.config.restart_if[:memory_limit] = memory_limit
|
137
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled `browser restart_if memory_limit` >= #{memory_limit}"
|
138
|
+
end
|
139
|
+
|
140
|
+
# before_request clear_cookies
|
141
|
+
if @config.dig(:browser, :before_request, :clear_cookies)
|
142
|
+
@browser.config.before_request[:clear_cookies] = true
|
143
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled `browser before_request clear_cookies`"
|
144
|
+
end
|
145
|
+
|
146
|
+
# before_request clear_and_set_cookies
|
147
|
+
if @config.dig(:browser, :before_request, :clear_and_set_cookies)
|
148
|
+
if cookies = @config[:cookies].presence
|
149
|
+
@browser.config.cookies = cookies
|
150
|
+
@browser.config.before_request[:clear_and_set_cookies] = true
|
151
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled `browser before_request clear_and_set_cookies`"
|
152
|
+
else
|
153
|
+
logger.error "BrowserBuilder (selenium_chrome): `cookies` should be present to enable `browser before_request clear_and_set_cookies`, skipped"
|
154
|
+
end
|
155
|
+
end
|
156
|
+
|
157
|
+
# before_request change_user_agent
|
158
|
+
if @config.dig(:browser, :before_request, :change_user_agent)
|
159
|
+
logger.error "BrowserBuilder (selenium_chrome): `browser before_request change_user_agent` option not supported by Selenium, skipped"
|
160
|
+
end
|
161
|
+
|
162
|
+
# before_request change_proxy
|
163
|
+
if @config.dig(:browser, :before_request, :change_proxy)
|
164
|
+
logger.error "BrowserBuilder (selenium_chrome): `browser before_request change_proxy` option not supported by Selenium, skipped"
|
165
|
+
end
|
166
|
+
|
167
|
+
# before_request delay
|
168
|
+
if delay = @config.dig(:browser, :before_request, :delay).presence
|
169
|
+
@browser.config.before_request[:delay] = delay
|
170
|
+
logger.debug "BrowserBuilder (selenium_chrome): enabled `browser before_request delay`"
|
171
|
+
end
|
172
|
+
|
173
|
+
# return Capybara session instance
|
174
|
+
@browser
|
175
|
+
end
|
176
|
+
end
|
177
|
+
end
|
178
|
+
end
|
@@ -0,0 +1,185 @@
|
|
1
|
+
require 'capybara'
|
2
|
+
require 'selenium-webdriver'
|
3
|
+
require_relative '../capybara_configuration'
|
4
|
+
require_relative '../capybara_ext/selenium/driver'
|
5
|
+
require_relative '../capybara_ext/session'
|
6
|
+
|
7
|
+
module Kimurai
|
8
|
+
class BrowserBuilder
|
9
|
+
class SeleniumFirefoxBuilder
|
10
|
+
class << self
|
11
|
+
attr_accessor :virtual_display
|
12
|
+
end
|
13
|
+
|
14
|
+
attr_reader :logger, :spider
|
15
|
+
|
16
|
+
def initialize(config, spider:)
|
17
|
+
@config = config
|
18
|
+
@spider = spider
|
19
|
+
@logger = spider.logger
|
20
|
+
end
|
21
|
+
|
22
|
+
def build
|
23
|
+
# Register driver
|
24
|
+
Capybara.register_driver :selenium_firefox do |app|
|
25
|
+
# Create driver options
|
26
|
+
driver_options = Selenium::WebDriver::Firefox::Options.new
|
27
|
+
driver_options.profile = Selenium::WebDriver::Firefox::Profile.new
|
28
|
+
driver_options.profile["browser.link.open_newwindow"] = 3 # open windows in tabs
|
29
|
+
driver_options.profile["media.peerconnection.enabled"] = false # disable web rtc
|
30
|
+
|
31
|
+
# Proxy
|
32
|
+
if proxy = @config[:proxy].presence
|
33
|
+
proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
|
34
|
+
ip, port, type, user, password = proxy_string.split(":")
|
35
|
+
|
36
|
+
if user.nil? && password.nil?
|
37
|
+
driver_options.profile["network.proxy.type"] = 1
|
38
|
+
if type == "http"
|
39
|
+
driver_options.profile["network.proxy.http"] = ip
|
40
|
+
driver_options.profile["network.proxy.http_port"] = port.to_i
|
41
|
+
driver_options.profile["network.proxy.ssl"] = ip
|
42
|
+
driver_options.profile["network.proxy.ssl_port"] = port.to_i
|
43
|
+
elsif type == "socks5"
|
44
|
+
driver_options.profile["network.proxy.socks"] = ip
|
45
|
+
driver_options.profile["network.proxy.socks_port"] = port.to_i
|
46
|
+
driver_options.profile["network.proxy.socks_version"] = 5
|
47
|
+
driver_options.profile["network.proxy.socks_remote_dns"] = true
|
48
|
+
end
|
49
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled #{type} proxy, ip: #{ip}, port: #{port}"
|
50
|
+
else
|
51
|
+
logger.error "BrowserBuilder (selenium_firefox): proxy with authentication doesn't supported by selenium, skipped"
|
52
|
+
end
|
53
|
+
end
|
54
|
+
|
55
|
+
if proxy_bypass_list = @config[:proxy_bypass_list].presence
|
56
|
+
if proxy
|
57
|
+
driver_options.profile["network.proxy.no_proxies_on"] = proxy_bypass_list.join(", ")
|
58
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled proxy_bypass_list"
|
59
|
+
else
|
60
|
+
logger.error "BrowserBuilder (selenium_firefox): provide `proxy` to set proxy_bypass_list, skipped"
|
61
|
+
end
|
62
|
+
end
|
63
|
+
|
64
|
+
# SSL
|
65
|
+
if @config[:ignore_ssl_errors].present?
|
66
|
+
driver_options.profile.secure_ssl = false
|
67
|
+
driver_options.profile.assume_untrusted_certificate_issuer = true
|
68
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled ignore_ssl_errors"
|
69
|
+
end
|
70
|
+
|
71
|
+
# Disable images
|
72
|
+
if @config[:disable_images].present?
|
73
|
+
driver_options.profile["permissions.default.image"] = 2
|
74
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled disable_images"
|
75
|
+
end
|
76
|
+
|
77
|
+
# Headers
|
78
|
+
if @config[:headers].present?
|
79
|
+
logger.warn "BrowserBuilder: (selenium_firefox): custom headers doesn't supported by selenium, skipped"
|
80
|
+
end
|
81
|
+
|
82
|
+
if user_agent = @config[:user_agent].presence
|
83
|
+
user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
|
84
|
+
driver_options.profile["general.useragent.override"] = user_agent_string
|
85
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled custom user-agent"
|
86
|
+
end
|
87
|
+
|
88
|
+
# Headless mode
|
89
|
+
if ENV["HEADLESS"] != "false"
|
90
|
+
if @config[:headless_mode] == :virtual_display
|
91
|
+
if Gem::Platform.local.os == "linux"
|
92
|
+
unless self.class.virtual_display
|
93
|
+
require 'headless'
|
94
|
+
self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false)
|
95
|
+
self.class.virtual_display.start
|
96
|
+
end
|
97
|
+
|
98
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled virtual_display headless_mode"
|
99
|
+
else
|
100
|
+
logger.error "BrowserBuilder (selenium_firefox): virtual_display headless_mode works only " \
|
101
|
+
"on Linux platform. Browser will run in normal mode. Set `native` mode instead."
|
102
|
+
end
|
103
|
+
else
|
104
|
+
driver_options.args << "--headless"
|
105
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled native headless_mode"
|
106
|
+
end
|
107
|
+
end
|
108
|
+
|
109
|
+
Capybara::Selenium::Driver.new(app, browser: :firefox, options: driver_options)
|
110
|
+
end
|
111
|
+
|
112
|
+
# Create browser instance (Capybara session)
|
113
|
+
@browser = Capybara::Session.new(:selenium_firefox)
|
114
|
+
@browser.spider = spider
|
115
|
+
logger.debug "BrowserBuilder (selenium_firefox): created browser instance"
|
116
|
+
|
117
|
+
# Window size
|
118
|
+
if size = @config[:window_size].presence
|
119
|
+
@browser.current_window.resize_to(*size)
|
120
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled window_size"
|
121
|
+
end
|
122
|
+
|
123
|
+
# Cookies
|
124
|
+
if cookies = @config[:cookies].presence
|
125
|
+
@browser.config.cookies = cookies
|
126
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled custom cookies"
|
127
|
+
end
|
128
|
+
|
129
|
+
# Browser instance options
|
130
|
+
# retry_request_errors
|
131
|
+
if errors = @config.dig(:browser, :retry_request_errors).presence
|
132
|
+
@browser.config.retry_request_errors = errors
|
133
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled `browser retry_request_errors`"
|
134
|
+
end
|
135
|
+
|
136
|
+
# restart_if
|
137
|
+
if requests_limit = @config.dig(:browser, :restart_if, :requests_limit).presence
|
138
|
+
@browser.config.restart_if[:requests_limit] = requests_limit
|
139
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled `browser restart_if requests_limit` >= #{requests_limit}"
|
140
|
+
end
|
141
|
+
|
142
|
+
if memory_limit = @config.dig(:browser, :restart_if, :memory_limit).presence
|
143
|
+
@browser.config.restart_if[:memory_limit] = memory_limit
|
144
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled `browser restart_if memory_limit` >= #{memory_limit}"
|
145
|
+
end
|
146
|
+
|
147
|
+
# before_request clear_cookies
|
148
|
+
if @config.dig(:browser, :before_request, :clear_cookies)
|
149
|
+
@browser.config.before_request[:clear_cookies] = true
|
150
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled `browser before_request clear_cookies`"
|
151
|
+
end
|
152
|
+
|
153
|
+
# before_request clear_and_set_cookies
|
154
|
+
if @config.dig(:browser, :before_request, :clear_and_set_cookies)
|
155
|
+
if cookies = @config[:cookies].presence
|
156
|
+
@browser.config.cookies = cookies
|
157
|
+
@browser.config.before_request[:clear_and_set_cookies] = true
|
158
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled `browser before_request clear_and_set_cookies`"
|
159
|
+
else
|
160
|
+
logger.error "BrowserBuilder (selenium_firefox): `cookies` should be present to enable `browser before_request clear_and_set_cookies`, skipped"
|
161
|
+
end
|
162
|
+
end
|
163
|
+
|
164
|
+
# before_request change_user_agent
|
165
|
+
if @config.dig(:browser, :before_request, :change_user_agent)
|
166
|
+
logger.error "BrowserBuilder (selenium_firefox): `browser before_request change_user_agent` option not supported by Selenium, skipped"
|
167
|
+
end
|
168
|
+
|
169
|
+
# before_request change_proxy
|
170
|
+
if @config.dig(:browser, :before_request, :change_proxy)
|
171
|
+
logger.error "BrowserBuilder (selenium_firefox): `browser before_request change_proxy` option not supported by Selenium, skipped"
|
172
|
+
end
|
173
|
+
|
174
|
+
# before_request delay
|
175
|
+
if delay = @config.dig(:browser, :before_request, :delay).presence
|
176
|
+
@browser.config.before_request[:delay] = delay
|
177
|
+
logger.debug "BrowserBuilder (selenium_firefox): enabled `browser before_request delay`"
|
178
|
+
end
|
179
|
+
|
180
|
+
# return Capybara session instance
|
181
|
+
@browser
|
182
|
+
end
|
183
|
+
end
|
184
|
+
end
|
185
|
+
end
|