ferrum 0.6 → 0.9

Sign up to get free protection for your applications and to get access to all the features.
@@ -4,6 +4,7 @@ require "base64"
4
4
  require "forwardable"
5
5
  require "ferrum/page"
6
6
  require "ferrum/contexts"
7
+ require "ferrum/browser/xvfb"
7
8
  require "ferrum/browser/process"
8
9
  require "ferrum/browser/client"
9
10
 
@@ -16,18 +17,20 @@ module Ferrum
16
17
  extend Forwardable
17
18
  delegate %i[default_context] => :contexts
18
19
  delegate %i[targets create_target create_page page pages windows] => :default_context
19
- delegate %i[goto back forward refresh
20
- at_css at_xpath css xpath current_url title body doctype
20
+ delegate %i[goto back forward refresh reload stop wait_for_reload
21
+ at_css at_xpath css xpath current_url current_title url title
22
+ body doctype set_content
21
23
  headers cookies network
22
24
  mouse keyboard
23
25
  screenshot pdf viewport_size
24
26
  frames frame_by main_frame
25
27
  evaluate evaluate_on evaluate_async execute
26
- add_script_tag add_style_tag
28
+ add_script_tag add_style_tag bypass_csp
27
29
  on] => :page
30
+ delegate %i[default_user_agent] => :process
28
31
 
29
32
  attr_reader :client, :process, :contexts, :logger, :js_errors,
30
- :slowmo, :base_url, :options, :window_size
33
+ :slowmo, :base_url, :options, :window_size, :ws_max_receive_size
31
34
  attr_writer :timeout
32
35
 
33
36
  def initialize(options = nil)
@@ -38,9 +41,10 @@ module Ferrum
38
41
  @original_window_size = @window_size
39
42
 
40
43
  @options = Hash(options.merge(window_size: @window_size))
41
- @logger, @timeout = @options.values_at(:logger, :timeout)
44
+ @logger, @timeout, @ws_max_receive_size =
45
+ @options.values_at(:logger, :timeout, :ws_max_receive_size)
42
46
  @js_errors = @options.fetch(:js_errors, false)
43
- @slowmo = @options[:slowmo].to_i
47
+ @slowmo = @options[:slowmo].to_f
44
48
 
45
49
  if @options.key?(:base_url)
46
50
  self.base_url = @options[:base_url]
@@ -67,7 +71,9 @@ module Ferrum
67
71
  end
68
72
 
69
73
  def extensions
70
- @extensions ||= Array(@options[:extensions]).map { |p| File.read(p) }
74
+ @extensions ||= Array(@options[:extensions]).map do |ext|
75
+ (ext.is_a?(Hash) && ext[:source]) || File.read(ext)
76
+ end
71
77
  end
72
78
 
73
79
  def timeout
@@ -111,7 +117,7 @@ module Ferrum
111
117
  def start
112
118
  Ferrum.started
113
119
  @process = Process.start(@options)
114
- @client = Client.new(self, @process.ws_url, 0, false)
120
+ @client = Client.new(self, @process.ws_url)
115
121
  @contexts = Contexts.new(self)
116
122
  end
117
123
  end
@@ -7,20 +7,25 @@ require "ferrum/browser/web_socket"
7
7
  module Ferrum
8
8
  class Browser
9
9
  class Client
10
- def initialize(browser, ws_url, start_id = 0, allow_slowmo = true)
11
- @command_id = start_id
12
- @pendings = Concurrent::Hash.new
10
+ INTERRUPTIONS = %w[Fetch.requestPaused Fetch.authRequired].freeze
11
+
12
+ def initialize(browser, ws_url, id_starts_with: 0)
13
13
  @browser = browser
14
- @slowmo = @browser.slowmo if allow_slowmo && @browser.slowmo > 0
15
- @ws = WebSocket.new(ws_url, @browser.logger)
16
- @subscriber = Subscriber.new
14
+ @command_id = id_starts_with
15
+ @pendings = Concurrent::Hash.new
16
+ @ws = WebSocket.new(ws_url, @browser.ws_max_receive_size, @browser.logger)
17
+ @subscriber, @interruptor = Subscriber.build(2)
17
18
 
18
19
  @thread = Thread.new do
19
20
  Thread.current.abort_on_exception = true
20
- Thread.current.report_on_exception = true if Thread.current.respond_to?(:report_on_exception=)
21
+ if Thread.current.respond_to?(:report_on_exception=)
22
+ Thread.current.report_on_exception = true
23
+ end
21
24
 
22
25
  while message = @ws.messages.pop
23
- if message.key?("method")
26
+ if INTERRUPTIONS.include?(message["method"])
27
+ @interruptor.async.call(message)
28
+ elsif message.key?("method")
24
29
  @subscriber.async.call(message)
25
30
  else
26
31
  @pendings[message["id"]]&.set(message)
@@ -33,7 +38,6 @@ module Ferrum
33
38
  pending = Concurrent::IVar.new
34
39
  message = build_message(method, params)
35
40
  @pendings[message[:id]] = pending
36
- sleep(@slowmo) if @slowmo
37
41
  @ws.send_message(message)
38
42
  data = pending.value!(@browser.timeout)
39
43
  @pendings.delete(message[:id])
@@ -46,7 +50,12 @@ module Ferrum
46
50
  end
47
51
 
48
52
  def on(event, &block)
49
- @subscriber.on(event, &block)
53
+ case event
54
+ when *INTERRUPTIONS
55
+ @interruptor.on(event, &block)
56
+ else
57
+ @subscriber.on(event, &block)
58
+ end
50
59
  end
51
60
 
52
61
  def close
@@ -0,0 +1,57 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Browser
5
+ class Command
6
+ NOT_FOUND = "Could not find an executable for the browser. Try to make " \
7
+ "it available on the PATH or set environment varible for " \
8
+ "example BROWSER_PATH=\"/usr/bin/chrome\"".freeze
9
+
10
+ # Currently only these browsers support CDP:
11
+ # https://github.com/cyrus-and/chrome-remote-interface#implementations
12
+ def self.build(options, user_data_dir)
13
+ defaults = case options[:browser_name]
14
+ when :firefox
15
+ Options::Firefox.options
16
+ when :chrome, :opera, :edge, nil
17
+ Options::Chrome.options
18
+ else
19
+ raise NotImplementedError, "not supported browser"
20
+ end
21
+
22
+ new(defaults, options, user_data_dir)
23
+ end
24
+
25
+ attr_reader :defaults, :path, :options
26
+
27
+ def initialize(defaults, options, user_data_dir)
28
+ @flags = {}
29
+ @defaults = defaults
30
+ @options, @user_data_dir = options, user_data_dir
31
+ @path = options[:browser_path] || ENV["BROWSER_PATH"] || defaults.detect_path
32
+ raise Cliver::Dependency::NotFound.new(NOT_FOUND) unless @path
33
+ merge_options
34
+ end
35
+
36
+ def xvfb?
37
+ !!options[:xvfb]
38
+ end
39
+
40
+ def to_a
41
+ [path] + @flags.map { |k, v| v.nil? ? "--#{k}" : "--#{k}=#{v}" }
42
+ end
43
+
44
+ private
45
+
46
+ def merge_options
47
+ @flags = defaults.merge_required(@flags, options, @user_data_dir)
48
+
49
+ unless options[:ignore_default_browser_options]
50
+ @flags = defaults.merge_default(@flags, options)
51
+ end
52
+
53
+ @flags.merge!(options.fetch(:browser_options, {}))
54
+ end
55
+ end
56
+ end
57
+ end
@@ -0,0 +1,46 @@
1
+ # frozen_string_literal: true
2
+
3
+ require "singleton"
4
+
5
+ module Ferrum
6
+ class Browser
7
+ module Options
8
+ class Base
9
+ BROWSER_HOST = "127.0.0.1"
10
+ BROWSER_PORT = "0"
11
+
12
+ include Singleton
13
+
14
+ def self.options
15
+ instance
16
+ end
17
+
18
+ def to_h
19
+ self.class::DEFAULT_OPTIONS
20
+ end
21
+
22
+ def except(*keys)
23
+ to_h.reject { |n, _| keys.include?(n) }
24
+ end
25
+
26
+ def detect_path
27
+ if Ferrum.mac?
28
+ self.class::MAC_BIN_PATH.find { |n| File.exist?(n) }
29
+ else
30
+ self.class::LINUX_BIN_PATH.find do |name|
31
+ path = Cliver.detect(name) and break(path)
32
+ end
33
+ end
34
+ end
35
+
36
+ def merge_required(flags, options, user_data_dir)
37
+ raise NotImplementedError
38
+ end
39
+
40
+ def merge_default(flags, options)
41
+ raise NotImplementedError
42
+ end
43
+ end
44
+ end
45
+ end
46
+ end
@@ -0,0 +1,73 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Browser
5
+ module Options
6
+ class Chrome < Base
7
+ DEFAULT_OPTIONS = {
8
+ "headless" => nil,
9
+ "disable-gpu" => nil,
10
+ "hide-scrollbars" => nil,
11
+ "mute-audio" => nil,
12
+ "enable-automation" => nil,
13
+ "disable-web-security" => nil,
14
+ "disable-session-crashed-bubble" => nil,
15
+ "disable-breakpad" => nil,
16
+ "disable-sync" => nil,
17
+ "no-first-run" => nil,
18
+ "use-mock-keychain" => nil,
19
+ "keep-alive-for-test" => nil,
20
+ "disable-popup-blocking" => nil,
21
+ "disable-extensions" => nil,
22
+ "disable-hang-monitor" => nil,
23
+ "disable-features" => "site-per-process,TranslateUI",
24
+ "disable-translate" => nil,
25
+ "disable-background-networking" => nil,
26
+ "enable-features" => "NetworkService,NetworkServiceInProcess",
27
+ "disable-background-timer-throttling" => nil,
28
+ "disable-backgrounding-occluded-windows" => nil,
29
+ "disable-client-side-phishing-detection" => nil,
30
+ "disable-default-apps" => nil,
31
+ "disable-dev-shm-usage" => nil,
32
+ "disable-ipc-flooding-protection" => nil,
33
+ "disable-prompt-on-repost" => nil,
34
+ "disable-renderer-backgrounding" => nil,
35
+ "force-color-profile" => "srgb",
36
+ "metrics-recording-only" => nil,
37
+ "safebrowsing-disable-auto-update" => nil,
38
+ "password-store" => "basic",
39
+ # Note: --no-sandbox is not needed if you properly setup a user in the container.
40
+ # https://github.com/ebidel/lighthouse-ci/blob/master/builder/Dockerfile#L35-L40
41
+ # "no-sandbox" => nil,
42
+ }.freeze
43
+
44
+ MAC_BIN_PATH = [
45
+ "/Applications/Chromium.app/Contents/MacOS/Chromium",
46
+ "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
47
+ ].freeze
48
+ LINUX_BIN_PATH = %w[chromium google-chrome-unstable google-chrome-beta
49
+ google-chrome chrome chromium-browser
50
+ google-chrome-stable].freeze
51
+
52
+ def merge_required(flags, options, user_data_dir)
53
+ port = options.fetch(:port, BROWSER_PORT)
54
+ host = options.fetch(:host, BROWSER_HOST)
55
+ flags.merge("remote-debugging-port" => port,
56
+ "remote-debugging-address" => host,
57
+ # Doesn't work on MacOS, so we need to set it by CDP
58
+ "window-size" => options[:window_size].join(","),
59
+ "user-data-dir" => user_data_dir)
60
+ end
61
+
62
+ def merge_default(flags, options)
63
+ unless options.fetch(:headless, true)
64
+ defaults = except("headless", "disable-gpu")
65
+ end
66
+
67
+ defaults ||= DEFAULT_OPTIONS
68
+ defaults.merge(flags)
69
+ end
70
+ end
71
+ end
72
+ end
73
+ end
@@ -0,0 +1,34 @@
1
+ # frozen_string_literal: true
2
+
3
+ module Ferrum
4
+ class Browser
5
+ module Options
6
+ class Firefox < Base
7
+ DEFAULT_OPTIONS = {
8
+ "headless" => nil,
9
+ }.freeze
10
+
11
+ MAC_BIN_PATH = [
12
+ "/Applications/Firefox.app/Contents/MacOS/firefox-bin"
13
+ ].freeze
14
+ LINUX_BIN_PATH = %w[firefox].freeze
15
+
16
+ def merge_required(flags, options, user_data_dir)
17
+ port = options.fetch(:port, BROWSER_PORT)
18
+ host = options.fetch(:host, BROWSER_HOST)
19
+ flags.merge("remote-debugger" => "#{host}:#{port}",
20
+ "profile" => user_data_dir)
21
+ end
22
+
23
+ def merge_default(flags, options)
24
+ unless options.fetch(:headless, true)
25
+ defaults = except("headless")
26
+ end
27
+
28
+ defaults ||= DEFAULT_OPTIONS
29
+ defaults.merge(flags)
30
+ end
31
+ end
32
+ end
33
+ end
34
+ end
@@ -5,6 +5,11 @@ require "net/http"
5
5
  require "json"
6
6
  require "addressable"
7
7
  require "tmpdir"
8
+ require "forwardable"
9
+ require "ferrum/browser/options/base"
10
+ require "ferrum/browser/options/chrome"
11
+ require "ferrum/browser/options/firefox"
12
+ require "ferrum/browser/command"
8
13
 
9
14
  module Ferrum
10
15
  class Browser
@@ -12,52 +17,14 @@ module Ferrum
12
17
  KILL_TIMEOUT = 2
13
18
  WAIT_KILLED = 0.05
14
19
  PROCESS_TIMEOUT = ENV.fetch("FERRUM_PROCESS_TIMEOUT", 2).to_i
15
- BROWSER_PATH = ENV["BROWSER_PATH"]
16
- BROWSER_HOST = "127.0.0.1"
17
- BROWSER_PORT = "0"
18
- DEFAULT_OPTIONS = {
19
- "headless" => nil,
20
- "disable-gpu" => nil,
21
- "hide-scrollbars" => nil,
22
- "mute-audio" => nil,
23
- "enable-automation" => nil,
24
- "disable-web-security" => nil,
25
- "disable-session-crashed-bubble" => nil,
26
- "disable-breakpad" => nil,
27
- "disable-sync" => nil,
28
- "no-first-run" => nil,
29
- "use-mock-keychain" => nil,
30
- "keep-alive-for-test" => nil,
31
- "disable-popup-blocking" => nil,
32
- "disable-extensions" => nil,
33
- "disable-hang-monitor" => nil,
34
- "disable-features" => "site-per-process,TranslateUI",
35
- "disable-translate" => nil,
36
- "disable-background-networking" => nil,
37
- "enable-features" => "NetworkService,NetworkServiceInProcess",
38
- "disable-background-timer-throttling" => nil,
39
- "disable-backgrounding-occluded-windows" => nil,
40
- "disable-client-side-phishing-detection" => nil,
41
- "disable-default-apps" => nil,
42
- "disable-dev-shm-usage" => nil,
43
- "disable-ipc-flooding-protection" => nil,
44
- "disable-prompt-on-repost" => nil,
45
- "disable-renderer-backgrounding" => nil,
46
- "force-color-profile" => "srgb",
47
- "metrics-recording-only" => nil,
48
- "safebrowsing-disable-auto-update" => nil,
49
- "password-store" => "basic",
50
- # Note: --no-sandbox is not needed if you properly setup a user in the container.
51
- # https://github.com/ebidel/lighthouse-ci/blob/master/builder/Dockerfile#L35-L40
52
- # "no-sandbox" => nil,
53
- }.freeze
54
-
55
- NOT_FOUND = "Could not find an executable for chrome. Try to make it " \
56
- "available on the PATH or set environment varible for " \
57
- "example BROWSER_PATH=\"/Applications/Chromium.app/Contents/MacOS/Chromium\""
58
-
59
-
60
- attr_reader :host, :port, :ws_url, :pid, :path, :options, :cmd
20
+
21
+ attr_reader :host, :port, :ws_url, :pid, :command,
22
+ :default_user_agent, :browser_version, :protocol_version,
23
+ :v8_version, :webkit_version, :xvfb
24
+
25
+
26
+ extend Forwardable
27
+ delegate path: :command
61
28
 
62
29
  def self.start(*args)
63
30
  new(*args).tap(&:start)
@@ -85,65 +52,24 @@ module Ferrum
85
52
  end
86
53
 
87
54
  def self.directory_remover(path)
88
- proc do
89
- begin
90
- FileUtils.remove_entry(path)
91
- rescue Errno::ENOENT
92
- end
93
- end
94
- end
95
-
96
- def self.detect_browser_path
97
- if RUBY_PLATFORM.include?("darwin")
98
- [
99
- "/Applications/Chromium.app/Contents/MacOS/Chromium",
100
- "/Applications/Google Chrome.app/Contents/MacOS/Google Chrome"
101
- ].find { |path| File.exist?(path) }
102
- else
103
- %w[chromium google-chrome-unstable google-chrome-beta google-chrome chrome chromium-browser google-chrome-stable].reduce(nil) do |path, exe|
104
- path = Cliver.detect(exe)
105
- break path if path
106
- end
107
- end
55
+ proc { FileUtils.remove_entry(path) rescue Errno::ENOENT }
108
56
  end
109
57
 
110
58
  def initialize(options)
111
- @options = {}
112
-
113
- @path = options[:browser_path] || BROWSER_PATH || self.class.detect_browser_path
114
-
115
59
  if options[:url]
116
60
  url = URI.join(options[:url].to_s, "/json/version")
117
61
  response = JSON.parse(::Net::HTTP.get(url))
118
62
  set_ws_url(response["webSocketDebuggerUrl"])
63
+ parse_browser_versions
119
64
  return
120
65
  end
121
66
 
122
- # Doesn't work on MacOS, so we need to set it by CDP as well
123
- @options.merge!("window-size" => options[:window_size].join(","))
124
-
125
- port = options.fetch(:port, BROWSER_PORT)
126
- @options.merge!("remote-debugging-port" => port)
127
-
128
- host = options.fetch(:host, BROWSER_HOST)
129
- @options.merge!("remote-debugging-address" => host)
130
-
131
- @temp_user_data_dir = Dir.mktmpdir
132
- ObjectSpace.define_finalizer(self, self.class.directory_remover(@temp_user_data_dir))
133
- @options.merge!("user-data-dir" => @temp_user_data_dir)
134
-
135
- @options = DEFAULT_OPTIONS.merge(@options)
136
-
137
- unless options.fetch(:headless, true)
138
- @options.delete("headless")
139
- @options.delete("disable-gpu")
140
- end
141
-
67
+ @logger = options[:logger]
142
68
  @process_timeout = options.fetch(:process_timeout, PROCESS_TIMEOUT)
143
69
 
144
- @options.merge!(options.fetch(:browser_options, {}))
145
-
146
- @logger = options[:logger]
70
+ tmpdir = Dir.mktmpdir
71
+ ObjectSpace.define_finalizer(self, self.class.directory_remover(tmpdir))
72
+ @command = Command.build(options, tmpdir)
147
73
  end
148
74
 
149
75
  def start
@@ -156,21 +82,29 @@ module Ferrum
156
82
  process_options[:pgroup] = true unless Ferrum.windows?
157
83
  process_options[:out] = process_options[:err] = write_io
158
84
 
159
- raise Cliver::Dependency::NotFound.new(NOT_FOUND) unless @path
85
+ if @command.xvfb?
86
+ @xvfb = Xvfb.start(@command.options)
87
+ ObjectSpace.define_finalizer(self, self.class.process_killer(@xvfb.pid))
88
+ end
160
89
 
161
- @cmd = [@path] + @options.map { |k, v| v.nil? ? "--#{k}" : "--#{k}=#{v}" }
162
- @pid = ::Process.spawn(*@cmd, process_options)
90
+ @pid = ::Process.spawn(Hash(@xvfb&.to_env), *@command.to_a, process_options)
163
91
  ObjectSpace.define_finalizer(self, self.class.process_killer(@pid))
164
92
 
165
93
  parse_ws_url(read_io, @process_timeout)
94
+ parse_browser_versions
166
95
  ensure
167
96
  close_io(read_io, write_io)
168
97
  end
169
98
  end
170
99
 
171
100
  def stop
172
- kill if @pid
173
- remove_temp_user_data_dir if @temp_user_data_dir
101
+ if @pid
102
+ kill(@pid)
103
+ kill(@xvfb.pid) if @xvfb&.pid
104
+ @pid = nil
105
+ end
106
+
107
+ remove_user_data_dir if @user_data_dir
174
108
  ObjectSpace.undefine_finalizer(self)
175
109
  end
176
110
 
@@ -181,14 +115,13 @@ module Ferrum
181
115
 
182
116
  private
183
117
 
184
- def kill
185
- self.class.process_killer(@pid).call
186
- @pid = nil
118
+ def kill(pid)
119
+ self.class.process_killer(pid).call
187
120
  end
188
121
 
189
- def remove_temp_user_data_dir
190
- self.class.directory_remover(@temp_user_data_dir).call
191
- @temp_user_data_dir = nil
122
+ def remove_user_data_dir
123
+ self.class.directory_remover(@user_data_dir).call
124
+ @user_data_dir = nil
192
125
  end
193
126
 
194
127
  def parse_ws_url(read_io, timeout)
@@ -210,8 +143,8 @@ module Ferrum
210
143
  end
211
144
 
212
145
  unless ws_url
213
- @logger.puts output if @logger
214
- raise "Chrome process did not produce websocket url within #{timeout} seconds"
146
+ @logger.puts(output) if @logger
147
+ raise ProcessTimeoutError.new(timeout)
215
148
  end
216
149
  end
217
150
 
@@ -221,12 +154,25 @@ module Ferrum
221
154
  @port = @ws_url.port
222
155
  end
223
156
 
157
+ def parse_browser_versions
158
+ return unless ws_url.is_a?(Addressable::URI)
159
+
160
+ version_url = URI.parse(ws_url.merge(scheme: "http", path: "/json/version"))
161
+ response = JSON.parse(::Net::HTTP.get(version_url))
162
+
163
+ @v8_version = response["V8-Version"]
164
+ @browser_version = response["Browser"]
165
+ @webkit_version = response["WebKit-Version"]
166
+ @default_user_agent = response["User-Agent"]
167
+ @protocol_version = response["Protocol-Version"]
168
+ end
169
+
224
170
  def close_io(*ios)
225
171
  ios.each do |io|
226
172
  begin
227
173
  io.close unless io.closed?
228
174
  rescue IOError
229
- raise unless RUBY_ENGINE == 'jruby'
175
+ raise unless RUBY_ENGINE == "jruby"
230
176
  end
231
177
  end
232
178
  end