kimurai 1.3.2 → 2.0.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Files changed (48) hide show
  1. checksums.yaml +4 -4
  2. data/.rubocop.yml +9 -0
  3. data/CHANGELOG.md +29 -0
  4. data/Gemfile +2 -2
  5. data/README.md +478 -649
  6. data/Rakefile +6 -6
  7. data/bin/console +3 -4
  8. data/exe/kimurai +0 -1
  9. data/kimurai.gemspec +38 -37
  10. data/lib/kimurai/base/saver.rb +15 -19
  11. data/lib/kimurai/base/storage.rb +1 -1
  12. data/lib/kimurai/base.rb +42 -38
  13. data/lib/kimurai/base_helper.rb +5 -4
  14. data/lib/kimurai/browser_builder/mechanize_builder.rb +44 -38
  15. data/lib/kimurai/browser_builder/selenium_chrome_builder.rb +63 -51
  16. data/lib/kimurai/browser_builder/selenium_firefox_builder.rb +61 -55
  17. data/lib/kimurai/browser_builder.rb +7 -31
  18. data/lib/kimurai/capybara_configuration.rb +1 -1
  19. data/lib/kimurai/capybara_ext/driver/base.rb +50 -46
  20. data/lib/kimurai/capybara_ext/mechanize/driver.rb +51 -50
  21. data/lib/kimurai/capybara_ext/selenium/driver.rb +33 -29
  22. data/lib/kimurai/capybara_ext/session/config.rb +1 -1
  23. data/lib/kimurai/capybara_ext/session.rb +40 -38
  24. data/lib/kimurai/cli/generator.rb +15 -15
  25. data/lib/kimurai/cli.rb +52 -85
  26. data/lib/kimurai/core_ext/array.rb +2 -2
  27. data/lib/kimurai/core_ext/hash.rb +1 -1
  28. data/lib/kimurai/core_ext/numeric.rb +4 -4
  29. data/lib/kimurai/pipeline.rb +2 -1
  30. data/lib/kimurai/runner.rb +6 -6
  31. data/lib/kimurai/template/Gemfile +2 -2
  32. data/lib/kimurai/template/config/boot.rb +4 -4
  33. data/lib/kimurai/template/config/schedule.rb +15 -15
  34. data/lib/kimurai/template/spiders/application_spider.rb +14 -14
  35. data/lib/kimurai/version.rb +1 -1
  36. data/lib/kimurai.rb +7 -3
  37. metadata +58 -65
  38. data/.travis.yml +0 -5
  39. data/lib/kimurai/automation/deploy.yml +0 -54
  40. data/lib/kimurai/automation/setup/chromium_chromedriver.yml +0 -26
  41. data/lib/kimurai/automation/setup/firefox_geckodriver.yml +0 -20
  42. data/lib/kimurai/automation/setup/phantomjs.yml +0 -33
  43. data/lib/kimurai/automation/setup/ruby_environment.yml +0 -124
  44. data/lib/kimurai/automation/setup.yml +0 -44
  45. data/lib/kimurai/browser_builder/poltergeist_phantomjs_builder.rb +0 -171
  46. data/lib/kimurai/capybara_ext/poltergeist/driver.rb +0 -13
  47. data/lib/kimurai/cli/ansible_command_builder.rb +0 -71
  48. data/lib/kimurai/template/config/automation.yml +0 -13
@@ -5,7 +5,7 @@ require_relative '../capybara_ext/selenium/driver'
5
5
  require_relative '../capybara_ext/session'
6
6
 
7
7
  module Kimurai
8
- class BrowserBuilder
8
+ module BrowserBuilder
9
9
  class SeleniumChromeBuilder
10
10
  class << self
11
11
  attr_accessor :virtual_display
@@ -23,28 +23,28 @@ module Kimurai
23
23
  # Register driver
24
24
  Capybara.register_driver :selenium_chrome do |app|
25
25
  # Create driver options
26
- opts = { args: %w[--disable-gpu --no-sandbox --disable-translate] }
26
+ # See all options here: https://seleniumhq.github.io/selenium/docs/api/rb/Selenium/WebDriver/Chrome/Options.html
27
+ driver_options = Selenium::WebDriver::Chrome::Options.new
28
+ driver_options.args += %w[--disable-gpu --no-sandbox --disable-translate
29
+ --disable-blink-features=AutomationControlled]
27
30
 
28
31
  # Provide custom chrome browser path:
29
- if chrome_path = Kimurai.configuration.selenium_chrome_path
30
- opts.merge!(binary: chrome_path)
32
+ if (chrome_path = Kimurai.configuration.selenium_chrome_path)
33
+ driver_options.binary = chrome_path
31
34
  end
32
35
 
33
- # See all options here: https://seleniumhq.github.io/selenium/docs/api/rb/Selenium/WebDriver/Chrome/Options.html
34
- driver_options = Selenium::WebDriver::Chrome::Options.new(opts)
35
-
36
36
  # Window size
37
- if size = @config[:window_size].presence
37
+ if (size = @config[:window_size].presence)
38
38
  driver_options.args << "--window-size=#{size.join(',')}"
39
- logger.debug "BrowserBuilder (selenium_chrome): enabled window_size"
39
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled window_size'
40
40
  end
41
41
 
42
42
  # Proxy
43
- if proxy = @config[:proxy].presence
44
- proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
45
- ip, port, type, user, password = proxy_string.split(":")
43
+ if (proxy = @config[:proxy].presence)
44
+ proxy_string = (proxy.instance_of?(Proc) ? proxy.call : proxy).strip
45
+ ip, port, type, user, password = proxy_string.split(':')
46
46
 
47
- if %w(http socks5).include?(type)
47
+ if %w[http socks5].include?(type)
48
48
  if user.nil? && password.nil?
49
49
  driver_options.args << "--proxy-server=#{type}://#{ip}:#{port}"
50
50
  logger.debug "BrowserBuilder (selenium_chrome): enabled #{type} proxy, ip: #{ip}, port: #{port}"
@@ -56,26 +56,26 @@ module Kimurai
56
56
  end
57
57
  end
58
58
 
59
- if proxy_bypass_list = @config[:proxy_bypass_list].presence
59
+ if (proxy_bypass_list = @config[:proxy_bypass_list].presence)
60
60
  if proxy
61
61
  driver_options.args << "--proxy-bypass-list=#{proxy_bypass_list.join(';')}"
62
- logger.debug "BrowserBuilder (selenium_chrome): enabled proxy_bypass_list"
62
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled proxy_bypass_list'
63
63
  else
64
- logger.error "BrowserBuilder (selenium_chrome): provide `proxy` to set proxy_bypass_list, skipped"
64
+ logger.error 'BrowserBuilder (selenium_chrome): provide `proxy` to set proxy_bypass_list, skipped'
65
65
  end
66
66
  end
67
67
 
68
68
  # SSL
69
69
  if @config[:ignore_ssl_errors].present?
70
- driver_options.args << "--ignore-certificate-errors"
71
- driver_options.args << "--allow-insecure-localhost"
72
- logger.debug "BrowserBuilder (selenium_chrome): enabled ignore_ssl_errors"
70
+ driver_options.args << '--ignore-certificate-errors'
71
+ driver_options.args << '--allow-insecure-localhost'
72
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled ignore_ssl_errors'
73
73
  end
74
74
 
75
75
  # Disable images
76
76
  if @config[:disable_images].present?
77
- driver_options.prefs["profile.managed_default_content_settings.images"] = 2
78
- logger.debug "BrowserBuilder (selenium_chrome): enabled disable_images"
77
+ driver_options.prefs['profile.managed_default_content_settings.images'] = 2
78
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled disable_images'
79
79
  end
80
80
 
81
81
  # Headers
@@ -83,72 +83,78 @@ module Kimurai
83
83
  logger.warn "BrowserBuilder: (selenium_chrome): custom headers doesn't supported by selenium, skipped"
84
84
  end
85
85
 
86
- if user_agent = @config[:user_agent].presence
87
- user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
86
+ if (user_agent = @config[:user_agent].presence)
87
+ user_agent_string = (user_agent.instance_of?(Proc) ? user_agent.call : user_agent).strip
88
88
  driver_options.args << "--user-agent='#{user_agent_string}'"
89
- logger.debug "BrowserBuilder (selenium_chrome): enabled custom user_agent"
89
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled custom user_agent'
90
90
  end
91
91
 
92
92
  # Headless mode
93
- if ENV["HEADLESS"] != "false"
93
+ if ENV['HEADLESS'] != 'false'
94
94
  if @config[:headless_mode] == :virtual_display
95
- if Gem::Platform.local.os == "linux"
95
+ if Gem::Platform.local.os == 'linux'
96
96
  unless self.class.virtual_display
97
97
  require 'headless'
98
98
  self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false)
99
99
  self.class.virtual_display.start
100
100
  end
101
101
 
102
- logger.debug "BrowserBuilder (selenium_chrome): enabled virtual_display headless_mode"
102
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled virtual_display headless_mode'
103
103
  else
104
- logger.error "BrowserBuilder (selenium_chrome): virtual_display headless_mode works only " \
105
- "on Linux platform. Browser will run in normal mode. Set `native` mode instead."
104
+ logger.error 'BrowserBuilder (selenium_chrome): virtual_display headless_mode works only ' \
105
+ 'on Linux platform. Browser will run in normal mode. Set `native` mode instead.'
106
106
  end
107
107
  else
108
- driver_options.args << "--headless"
109
- logger.debug "BrowserBuilder (selenium_chrome): enabled native headless_mode"
108
+ driver_options.args << '--headless'
109
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled native headless_mode'
110
110
  end
111
111
  end
112
112
 
113
- chromedriver_path = Kimurai.configuration.chromedriver_path || "/usr/local/bin/chromedriver"
114
- Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options, driver_path: chromedriver_path)
113
+ # Use Selenium Manager by default (auto-downloads driver), or custom path if configured
114
+ if (chromedriver_path = Kimurai.configuration.chromedriver_path)
115
+ service = Selenium::WebDriver::Service.chrome(path: chromedriver_path)
116
+ Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options, service: service)
117
+ else
118
+ # Let Selenium Manager handle driver automatically
119
+ Capybara::Selenium::Driver.new(app, browser: :chrome, options: driver_options)
120
+ end
115
121
  end
116
122
 
117
123
  # Create browser instance (Capybara session)
118
124
  @browser = Capybara::Session.new(:selenium_chrome)
119
125
  @browser.spider = spider
120
- logger.debug "BrowserBuilder (selenium_chrome): created browser instance"
126
+ logger.debug 'BrowserBuilder (selenium_chrome): created browser instance'
121
127
 
122
128
  if @config[:extensions].present?
123
- logger.error "BrowserBuilder (selenium_chrome): `extensions` option not supported by Selenium, skipped"
129
+ logger.error 'BrowserBuilder (selenium_chrome): `extensions` option not supported by Selenium, skipped'
124
130
  end
125
131
 
126
132
  # Cookies
127
- if cookies = @config[:cookies].presence
133
+ if (cookies = @config[:cookies].presence)
128
134
  @browser.config.cookies = cookies
129
- logger.debug "BrowserBuilder (selenium_chrome): enabled custom cookies"
135
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled custom cookies'
130
136
  end
131
137
 
132
138
  # Browser instance options
133
139
  # skip_request_errors
134
- if skip_errors = @config[:skip_request_errors].presence
140
+ if (skip_errors = @config[:skip_request_errors].presence)
135
141
  @browser.config.skip_request_errors = skip_errors
136
- logger.debug "BrowserBuilder (selenium_chrome): enabled skip_request_errors"
142
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled skip_request_errors'
137
143
  end
138
144
 
139
145
  # retry_request_errors
140
- if retry_errors = @config[:retry_request_errors].presence
146
+ if (retry_errors = @config[:retry_request_errors].presence)
141
147
  @browser.config.retry_request_errors = retry_errors
142
- logger.debug "BrowserBuilder (selenium_chrome): enabled retry_request_errors"
148
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled retry_request_errors'
143
149
  end
144
150
 
145
151
  # restart_if
146
- if requests_limit = @config.dig(:restart_if, :requests_limit).presence
152
+ if (requests_limit = @config.dig(:restart_if, :requests_limit).presence)
147
153
  @browser.config.restart_if[:requests_limit] = requests_limit
148
154
  logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.requests_limit >= #{requests_limit}"
149
155
  end
150
156
 
151
- if memory_limit = @config.dig(:restart_if, :memory_limit).presence
157
+ if (memory_limit = @config.dig(:restart_if, :memory_limit).presence)
152
158
  @browser.config.restart_if[:memory_limit] = memory_limit
153
159
  logger.debug "BrowserBuilder (selenium_chrome): enabled restart_if.memory_limit >= #{memory_limit}"
154
160
  end
@@ -156,34 +162,40 @@ module Kimurai
156
162
  # before_request clear_cookies
157
163
  if @config.dig(:before_request, :clear_cookies)
158
164
  @browser.config.before_request[:clear_cookies] = true
159
- logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.clear_cookies"
165
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.clear_cookies'
160
166
  end
161
167
 
162
168
  # before_request clear_and_set_cookies
163
169
  if @config.dig(:before_request, :clear_and_set_cookies)
164
- if cookies = @config[:cookies].presence
170
+ if (cookies = @config[:cookies].presence)
165
171
  @browser.config.cookies = cookies
166
172
  @browser.config.before_request[:clear_and_set_cookies] = true
167
- logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.clear_and_set_cookies"
173
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.clear_and_set_cookies'
168
174
  else
169
- logger.error "BrowserBuilder (selenium_chrome): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
175
+ logger.error 'BrowserBuilder (selenium_chrome): cookies should be present to enable before_request.clear_and_set_cookies, skipped'
170
176
  end
171
177
  end
172
178
 
173
179
  # before_request change_user_agent
174
180
  if @config.dig(:before_request, :change_user_agent)
175
- logger.error "BrowserBuilder (selenium_chrome): before_request.change_user_agent option not supported by Selenium, skipped"
181
+ logger.error 'BrowserBuilder (selenium_chrome): before_request.change_user_agent option not supported by Selenium, skipped'
176
182
  end
177
183
 
178
184
  # before_request change_proxy
179
185
  if @config.dig(:before_request, :change_proxy)
180
- logger.error "BrowserBuilder (selenium_chrome): before_request.change_proxy option not supported by Selenium, skipped"
186
+ logger.error 'BrowserBuilder (selenium_chrome): before_request.change_proxy option not supported by Selenium, skipped'
181
187
  end
182
188
 
183
189
  # before_request delay
184
- if delay = @config.dig(:before_request, :delay).presence
190
+ if (delay = @config.dig(:before_request, :delay).presence)
185
191
  @browser.config.before_request[:delay] = delay
186
- logger.debug "BrowserBuilder (selenium_chrome): enabled before_request.delay"
192
+ logger.debug 'BrowserBuilder (selenium_chrome): enabled before_request.delay'
193
+ end
194
+
195
+ # encoding
196
+ if (encoding = @config[:encoding])
197
+ @browser.config.encoding = encoding
198
+ logger.debug "BrowserBuilder (selenium_chrome): enabled encoding: #{encoding}"
187
199
  end
188
200
 
189
201
  # return Capybara session instance
@@ -5,7 +5,7 @@ require_relative '../capybara_ext/selenium/driver'
5
5
  require_relative '../capybara_ext/session'
6
6
 
7
7
  module Kimurai
8
- class BrowserBuilder
8
+ module BrowserBuilder
9
9
  class SeleniumFirefoxBuilder
10
10
  class << self
11
11
  attr_accessor :virtual_display
@@ -25,28 +25,28 @@ module Kimurai
25
25
  # Create driver options
26
26
  driver_options = Selenium::WebDriver::Firefox::Options.new
27
27
  driver_options.profile = Selenium::WebDriver::Firefox::Profile.new
28
- driver_options.profile["browser.link.open_newwindow"] = 3 # open windows in tabs
29
- driver_options.profile["media.peerconnection.enabled"] = false # disable web rtc
28
+ driver_options.profile['browser.link.open_newwindow'] = 3 # open windows in tabs
29
+ driver_options.profile['media.peerconnection.enabled'] = false # disable web rtc
30
30
 
31
31
  # Proxy
32
- if proxy = @config[:proxy].presence
33
- proxy_string = (proxy.class == Proc ? proxy.call : proxy).strip
34
- ip, port, type, user, password = proxy_string.split(":")
32
+ if (proxy = @config[:proxy].presence)
33
+ proxy_string = (proxy.instance_of?(Proc) ? proxy.call : proxy).strip
34
+ ip, port, type, user, password = proxy_string.split(':')
35
35
 
36
36
  if user.nil? && password.nil?
37
- driver_options.profile["network.proxy.type"] = 1
38
- if type == "http"
39
- driver_options.profile["network.proxy.http"] = ip
40
- driver_options.profile["network.proxy.http_port"] = port.to_i
41
- driver_options.profile["network.proxy.ssl"] = ip
42
- driver_options.profile["network.proxy.ssl_port"] = port.to_i
37
+ driver_options.profile['network.proxy.type'] = 1
38
+ if type == 'http'
39
+ driver_options.profile['network.proxy.http'] = ip
40
+ driver_options.profile['network.proxy.http_port'] = port.to_i
41
+ driver_options.profile['network.proxy.ssl'] = ip
42
+ driver_options.profile['network.proxy.ssl_port'] = port.to_i
43
43
 
44
44
  logger.debug "BrowserBuilder (selenium_firefox): enabled http proxy, ip: #{ip}, port: #{port}"
45
- elsif type == "socks5"
46
- driver_options.profile["network.proxy.socks"] = ip
47
- driver_options.profile["network.proxy.socks_port"] = port.to_i
48
- driver_options.profile["network.proxy.socks_version"] = 5
49
- driver_options.profile["network.proxy.socks_remote_dns"] = true
45
+ elsif type == 'socks5'
46
+ driver_options.profile['network.proxy.socks'] = ip
47
+ driver_options.profile['network.proxy.socks_port'] = port.to_i
48
+ driver_options.profile['network.proxy.socks_version'] = 5
49
+ driver_options.profile['network.proxy.socks_remote_dns'] = true
50
50
 
51
51
  logger.debug "BrowserBuilder (selenium_firefox): enabled socks5 proxy, ip: #{ip}, port: #{port}"
52
52
  else
@@ -57,12 +57,12 @@ module Kimurai
57
57
  end
58
58
  end
59
59
 
60
- if proxy_bypass_list = @config[:proxy_bypass_list].presence
60
+ if (proxy_bypass_list = @config[:proxy_bypass_list].presence)
61
61
  if proxy
62
- driver_options.profile["network.proxy.no_proxies_on"] = proxy_bypass_list.join(", ")
63
- logger.debug "BrowserBuilder (selenium_firefox): enabled proxy_bypass_list"
62
+ driver_options.profile['network.proxy.no_proxies_on'] = proxy_bypass_list.join(', ')
63
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled proxy_bypass_list'
64
64
  else
65
- logger.error "BrowserBuilder (selenium_firefox): provide `proxy` to set proxy_bypass_list, skipped"
65
+ logger.error 'BrowserBuilder (selenium_firefox): provide `proxy` to set proxy_bypass_list, skipped'
66
66
  end
67
67
  end
68
68
 
@@ -70,13 +70,13 @@ module Kimurai
70
70
  if @config[:ignore_ssl_errors].present?
71
71
  driver_options.profile.secure_ssl = false
72
72
  driver_options.profile.assume_untrusted_certificate_issuer = true
73
- logger.debug "BrowserBuilder (selenium_firefox): enabled ignore_ssl_errors"
73
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled ignore_ssl_errors'
74
74
  end
75
75
 
76
76
  # Disable images
77
77
  if @config[:disable_images].present?
78
- driver_options.profile["permissions.default.image"] = 2
79
- logger.debug "BrowserBuilder (selenium_firefox): enabled disable_images"
78
+ driver_options.profile['permissions.default.image'] = 2
79
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled disable_images'
80
80
  end
81
81
 
82
82
  # Headers
@@ -84,30 +84,30 @@ module Kimurai
84
84
  logger.warn "BrowserBuilder: (selenium_firefox): custom headers doesn't supported by selenium, skipped"
85
85
  end
86
86
 
87
- if user_agent = @config[:user_agent].presence
88
- user_agent_string = (user_agent.class == Proc ? user_agent.call : user_agent).strip
89
- driver_options.profile["general.useragent.override"] = user_agent_string
90
- logger.debug "BrowserBuilder (selenium_firefox): enabled custom user_agent"
87
+ if (user_agent = @config[:user_agent].presence)
88
+ user_agent_string = (user_agent.instance_of?(Proc) ? user_agent.call : user_agent).strip
89
+ driver_options.profile['general.useragent.override'] = user_agent_string
90
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled custom user_agent'
91
91
  end
92
92
 
93
93
  # Headless mode
94
- if ENV["HEADLESS"] != "false"
94
+ if ENV['HEADLESS'] != 'false'
95
95
  if @config[:headless_mode] == :virtual_display
96
- if Gem::Platform.local.os == "linux"
96
+ if Gem::Platform.local.os == 'linux'
97
97
  unless self.class.virtual_display
98
98
  require 'headless'
99
99
  self.class.virtual_display = Headless.new(reuse: true, destroy_at_exit: false)
100
100
  self.class.virtual_display.start
101
101
  end
102
102
 
103
- logger.debug "BrowserBuilder (selenium_firefox): enabled virtual_display headless_mode"
103
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled virtual_display headless_mode'
104
104
  else
105
- logger.error "BrowserBuilder (selenium_firefox): virtual_display headless_mode works only " \
106
- "on Linux platform. Browser will run in normal mode. Set `native` mode instead."
105
+ logger.error 'BrowserBuilder (selenium_firefox): virtual_display headless_mode works only ' \
106
+ 'on Linux platform. Browser will run in normal mode. Set `native` mode instead.'
107
107
  end
108
108
  else
109
- driver_options.args << "--headless"
110
- logger.debug "BrowserBuilder (selenium_firefox): enabled native headless_mode"
109
+ driver_options.args << '--headless'
110
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled native headless_mode'
111
111
  end
112
112
  end
113
113
 
@@ -117,44 +117,44 @@ module Kimurai
117
117
  # Create browser instance (Capybara session)
118
118
  @browser = Capybara::Session.new(:selenium_firefox)
119
119
  @browser.spider = spider
120
- logger.debug "BrowserBuilder (selenium_firefox): created browser instance"
120
+ logger.debug 'BrowserBuilder (selenium_firefox): created browser instance'
121
121
 
122
122
  if @config[:extensions].present?
123
- logger.error "BrowserBuilder (selenium_firefox): `extensions` option not supported by Selenium, skipped"
123
+ logger.error 'BrowserBuilder (selenium_firefox): `extensions` option not supported by Selenium, skipped'
124
124
  end
125
125
 
126
126
  # Window size
127
- if size = @config[:window_size].presence
127
+ if (size = @config[:window_size].presence)
128
128
  @browser.current_window.resize_to(*size)
129
- logger.debug "BrowserBuilder (selenium_firefox): enabled window_size"
129
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled window_size'
130
130
  end
131
131
 
132
132
  # Cookies
133
- if cookies = @config[:cookies].presence
133
+ if (cookies = @config[:cookies].presence)
134
134
  @browser.config.cookies = cookies
135
- logger.debug "BrowserBuilder (selenium_firefox): enabled custom cookies"
135
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled custom cookies'
136
136
  end
137
137
 
138
138
  # Browser instance options
139
139
  # skip_request_errors
140
- if skip_errors = @config[:skip_request_errors].presence
140
+ if (skip_errors = @config[:skip_request_errors].presence)
141
141
  @browser.config.skip_request_errors = skip_errors
142
- logger.debug "BrowserBuilder (selenium_firefox): enabled skip_request_errors"
142
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled skip_request_errors'
143
143
  end
144
144
 
145
145
  # retry_request_errors
146
- if retry_errors = @config[:retry_request_errors].presence
146
+ if (retry_errors = @config[:retry_request_errors].presence)
147
147
  @browser.config.retry_request_errors = retry_errors
148
- logger.debug "BrowserBuilder (selenium_firefox): enabled retry_request_errors"
148
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled retry_request_errors'
149
149
  end
150
150
 
151
151
  # restart_if
152
- if requests_limit = @config.dig(:restart_if, :requests_limit).presence
152
+ if (requests_limit = @config.dig(:restart_if, :requests_limit).presence)
153
153
  @browser.config.restart_if[:requests_limit] = requests_limit
154
154
  logger.debug "BrowserBuilder (selenium_firefox): enabled restart_if.requests_limit >= #{requests_limit}"
155
155
  end
156
156
 
157
- if memory_limit = @config.dig(:restart_if, :memory_limit).presence
157
+ if (memory_limit = @config.dig(:restart_if, :memory_limit).presence)
158
158
  @browser.config.restart_if[:memory_limit] = memory_limit
159
159
  logger.debug "BrowserBuilder (selenium_firefox): enabled restart_if.memory_limit >= #{memory_limit}"
160
160
  end
@@ -162,34 +162,40 @@ module Kimurai
162
162
  # before_request clear_cookies
163
163
  if @config.dig(:before_request, :clear_cookies)
164
164
  @browser.config.before_request[:clear_cookies] = true
165
- logger.debug "BrowserBuilder (selenium_firefox): enabled before_request.clear_cookies"
165
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled before_request.clear_cookies'
166
166
  end
167
167
 
168
168
  # before_request clear_and_set_cookies
169
169
  if @config.dig(:before_request, :clear_and_set_cookies)
170
- if cookies = @config[:cookies].presence
170
+ if (cookies = @config[:cookies].presence)
171
171
  @browser.config.cookies = cookies
172
172
  @browser.config.before_request[:clear_and_set_cookies] = true
173
- logger.debug "BrowserBuilder (selenium_firefox): enabled before_request.clear_and_set_cookies"
173
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled before_request.clear_and_set_cookies'
174
174
  else
175
- logger.error "BrowserBuilder (selenium_firefox): cookies should be present to enable before_request.clear_and_set_cookies, skipped"
175
+ logger.error 'BrowserBuilder (selenium_firefox): cookies should be present to enable before_request.clear_and_set_cookies, skipped'
176
176
  end
177
177
  end
178
178
 
179
179
  # before_request change_user_agent
180
180
  if @config.dig(:before_request, :change_user_agent)
181
- logger.error "BrowserBuilder (selenium_firefox): before_request.change_user_agent option not supported by Selenium, skipped"
181
+ logger.error 'BrowserBuilder (selenium_firefox): before_request.change_user_agent option not supported by Selenium, skipped'
182
182
  end
183
183
 
184
184
  # before_request change_proxy
185
185
  if @config.dig(:before_request, :change_proxy)
186
- logger.error "BrowserBuilder (selenium_firefox): before_request.change_proxy option not supported by Selenium, skipped"
186
+ logger.error 'BrowserBuilder (selenium_firefox): before_request.change_proxy option not supported by Selenium, skipped'
187
187
  end
188
188
 
189
189
  # before_request delay
190
- if delay = @config.dig(:before_request, :delay).presence
190
+ if (delay = @config.dig(:before_request, :delay).presence)
191
191
  @browser.config.before_request[:delay] = delay
192
- logger.debug "BrowserBuilder (selenium_firefox): enabled before_request.delay"
192
+ logger.debug 'BrowserBuilder (selenium_firefox): enabled before_request.delay'
193
+ end
194
+
195
+ # encoding
196
+ if (encoding = @config[:encoding])
197
+ @browser.config.encoding = encoding
198
+ logger.debug "BrowserBuilder (selenium_firefox): enabled encoding: #{encoding}"
193
199
  end
194
200
 
195
201
  # return Capybara session instance
@@ -1,38 +1,14 @@
1
1
  module Kimurai
2
- class BrowserBuilder
3
- AVAILABLE_ENGINES = [
4
- :mechanize,
5
- :mechanize_standalone,
6
- :poltergeist_phantomjs,
7
- :selenium_firefox,
8
- :selenium_chrome
9
- ]
10
-
2
+ module BrowserBuilder
11
3
  def self.build(engine, config = {}, spider:)
12
- unless AVAILABLE_ENGINES.include? engine
13
- raise "BrowserBuilder: wrong name of engine, available engines: #{AVAILABLE_ENGINES.join(', ')}"
14
- end
15
-
16
- if config[:browser].present?
17
- raise "++++++ BrowserBuilder: browser option is depricated. Now all sub-options inside " \
18
- "`browser` should be placed right into `@config` hash, without `browser` parent key.\n" \
19
- "See more here: https://github.com/vifreefly/kimuraframework/blob/master/CHANGELOG.md#breaking-changes-110 ++++++"
4
+ begin
5
+ require "kimurai/browser_builder/#{engine}_builder"
6
+ rescue LoadError
20
7
  end
21
8
 
22
- case engine
23
- when :mechanize
24
- require_relative 'browser_builder/mechanize_builder'
25
- MechanizeBuilder.new(config, spider: spider).build
26
- when :selenium_chrome
27
- require_relative 'browser_builder/selenium_chrome_builder'
28
- SeleniumChromeBuilder.new(config, spider: spider).build
29
- when :poltergeist_phantomjs
30
- require_relative 'browser_builder/poltergeist_phantomjs_builder'
31
- PoltergeistPhantomJSBuilder.new(config, spider: spider).build
32
- when :selenium_firefox
33
- require_relative 'browser_builder/selenium_firefox_builder'
34
- SeleniumFirefoxBuilder.new(config, spider: spider).build
35
- end
9
+ builder_class_name = "#{engine}_builder".classify
10
+ builder = "Kimurai::BrowserBuilder::#{builder_class_name}".constantize
11
+ builder.new(config, spider: spider).build
36
12
  end
37
13
  end
38
14
  end
@@ -3,7 +3,7 @@ require 'capybara'
3
3
  Capybara.configure do |config|
4
4
  config.run_server = false
5
5
  config.default_selector = :xpath
6
- config.save_path = "tmp"
6
+ config.save_path = 'tmp'
7
7
  config.default_max_wait_time = 10
8
8
  config.ignore_hidden_elements = false
9
9
  config.threadsafe = true
@@ -1,62 +1,66 @@
1
1
  require 'pathname'
2
2
 
3
- class Capybara::Driver::Base
4
- attr_accessor :visited
5
- attr_writer :requests, :responses
3
+ module Capybara
4
+ module Driver
5
+ class Base
6
+ attr_accessor :visited
7
+ attr_writer :requests, :responses
6
8
 
7
- def requests
8
- @requests ||= 0
9
- end
9
+ def requests
10
+ @requests ||= 0
11
+ end
10
12
 
11
- def responses
12
- @responses ||= 0
13
- end
13
+ def responses
14
+ @responses ||= 0
15
+ end
14
16
 
15
- def current_memory
16
- driver_pid = pid
17
+ def current_memory
18
+ driver_pid = pid
17
19
 
18
- all = (get_descendant_processes(driver_pid) << driver_pid).uniq
19
- all.map { |pid| get_process_memory(pid) }.sum
20
- end
20
+ all = (get_descendant_processes(driver_pid) << driver_pid).uniq
21
+ all.map { |pid| get_process_memory(pid) }.sum
22
+ end
21
23
 
22
- private
24
+ private
23
25
 
24
- def get_descendant_processes(base)
25
- descendants = Hash.new { |ht, k| ht[k] = [k] }
26
- Hash[*`ps -eo pid,ppid`.scan(/\d+/).map(&:to_i)].each do |pid, ppid|
27
- descendants[ppid] << descendants[pid]
28
- end
26
+ def get_descendant_processes(base)
27
+ descendants = Hash.new { |ht, k| ht[k] = [k] }
28
+ Hash[*`ps -eo pid,ppid`.scan(/\d+/).map(&:to_i)].each do |pid, ppid|
29
+ descendants[ppid] << descendants[pid]
30
+ end
29
31
 
30
- descendants[base].flatten - [base]
31
- end
32
+ descendants[base].flatten - [base]
33
+ end
32
34
 
33
- # https://github.com/schneems/get_process_mem
34
- # Note: for Linux takes PSS (not RSS) memory (I think PSS better fits in this case)
35
- def get_process_memory(pid)
36
- case @platform ||= Gem::Platform.local.os
37
- when "linux"
38
- begin
39
- file = Pathname.new "/proc/#{pid}/smaps"
40
- return 0 unless file.exist?
41
-
42
- lines = file.each_line.select { |line| line.match(/^Pss/) }
43
- return 0 if lines.empty?
44
-
45
- lines.reduce(0) do |sum, line|
46
- line.match(/(?<value>(\d*\.{0,1}\d+))\s+(?<unit>\w\w)/) do |m|
47
- sum += m[:value].to_i
48
- end
35
+ # https://github.com/schneems/get_process_mem
36
+ # Note: for Linux takes PSS (not RSS) memory (I think PSS better fits in this case)
37
+ def get_process_memory(pid)
38
+ case @platform ||= Gem::Platform.local.os
39
+ when 'linux'
40
+ begin
41
+ file = Pathname.new "/proc/#{pid}/smaps"
42
+ return 0 unless file.exist?
49
43
 
50
- sum
44
+ lines = file.each_line.select { |line| line.match(/^Pss/) }
45
+ return 0 if lines.empty?
46
+
47
+ lines.reduce(0) do |sum, line|
48
+ line.match(/(?<value>(\d*\.{0,1}\d+))\s+(?<unit>\w\w)/) do |m|
49
+ sum += m[:value].to_i
50
+ end
51
+
52
+ sum
53
+ end
54
+ rescue Errno::EACCES
55
+ 0
56
+ end
57
+ when 'darwin'
58
+ mem = `ps -o rss= -p #{pid}`.strip
59
+ mem.empty? ? 0 : mem.to_i
60
+ else
61
+ raise "Can't check process memory, wrong type of platform: #{@platform}"
51
62
  end
52
- rescue Errno::EACCES
53
- 0
54
63
  end
55
- when "darwin"
56
- mem = `ps -o rss= -p #{pid}`.strip
57
- mem.empty? ? 0 : mem.to_i
58
- else
59
- raise "Can't check process memory, wrong type of platform: #{@platform}"
60
64
  end
61
65
  end
62
66
  end