puppeteer-ruby 0.37.4 → 0.40.1

Sign up to get free protection for your applications and to get access to all the features.
@@ -27,50 +27,67 @@ class Puppeteer::FrameManager
27
27
  # @type {!Set<string>}
28
28
  @isolated_worlds = Set.new
29
29
 
30
- @client.on_event('Page.frameAttached') do |event|
31
- handle_frame_attached(event['frameId'], event['parentFrameId'])
30
+ setup_listeners(@client)
31
+ end
32
+
33
+ private def setup_listeners(client)
34
+ client.on_event('Page.frameAttached') do |event|
35
+ handle_frame_attached(client, event['frameId'], event['parentFrameId'])
32
36
  end
33
- @client.on_event('Page.frameNavigated') do |event|
37
+ client.on_event('Page.frameNavigated') do |event|
34
38
  handle_frame_navigated(event['frame'])
35
39
  end
36
- @client.on_event('Page.navigatedWithinDocument') do |event|
40
+ client.on_event('Page.navigatedWithinDocument') do |event|
37
41
  handle_frame_navigated_within_document(event['frameId'], event['url'])
38
42
  end
39
- @client.on_event('Page.frameDetached') do |event|
40
- handle_frame_detached(event['frameId'])
43
+ client.on_event('Page.frameDetached') do |event|
44
+ handle_frame_detached(event['frameId'], event['reason'])
41
45
  end
42
- @client.on_event('Page.frameStoppedLoading') do |event|
46
+ client.on_event('Page.frameStoppedLoading') do |event|
43
47
  handle_frame_stopped_loading(event['frameId'])
44
48
  end
45
- @client.on_event('Runtime.executionContextCreated') do |event|
46
- handle_execution_context_created(event['context'])
49
+ client.on_event('Runtime.executionContextCreated') do |event|
50
+ handle_execution_context_created(event['context'], client)
47
51
  end
48
- @client.on_event('Runtime.executionContextDestroyed') do |event|
49
- handle_execution_context_destroyed(event['executionContextId'])
52
+ client.on_event('Runtime.executionContextDestroyed') do |event|
53
+ handle_execution_context_destroyed(event['executionContextId'], client)
50
54
  end
51
- @client.on_event('Runtime.executionContextsCleared') do |event|
52
- handle_execution_contexts_cleared
55
+ client.on_event('Runtime.executionContextsCleared') do |event|
56
+ handle_execution_contexts_cleared(client)
53
57
  end
54
- @client.on_event('Page.lifecycleEvent') do |event|
58
+ client.on_event('Page.lifecycleEvent') do |event|
55
59
  handle_lifecycle_event(event)
56
60
  end
61
+ client.on_event('Target.attachedToTarget') do |event|
62
+ handle_attached_to_target(event)
63
+ end
64
+ client.on_event('Target.detachedFromTarget') do |event|
65
+ handle_detached_from_target(event)
66
+ end
57
67
  end
58
68
 
59
69
  attr_reader :client, :timeout_settings
60
70
 
61
- private def init
71
+ private def init(cdp_session = nil)
72
+ client = cdp_session || @client
73
+
62
74
  results = await_all(
63
- @client.async_send_message('Page.enable'),
64
- @client.async_send_message('Page.getFrameTree'),
75
+ client.async_send_message('Page.enable'),
76
+ client.async_send_message('Page.getFrameTree'),
65
77
  )
66
78
  frame_tree = results.last['frameTree']
67
- handle_frame_tree(frame_tree)
79
+ handle_frame_tree(client, frame_tree)
68
80
  await_all(
69
- @client.async_send_message('Page.setLifecycleEventsEnabled', enabled: true),
70
- @client.async_send_message('Runtime.enable'),
81
+ client.async_send_message('Page.setLifecycleEventsEnabled', enabled: true),
82
+ client.async_send_message('Runtime.enable'),
71
83
  )
72
- ensure_isolated_world(UTILITY_WORLD_NAME)
73
- @network_manager.init
84
+ ensure_isolated_world(client, UTILITY_WORLD_NAME)
85
+ @network_manager.init unless cdp_session
86
+ rescue => err
87
+ # The target might have been closed before the initialization finished.
88
+ return if err.message.include?('Target closed') || err.message.include?('Session closed')
89
+
90
+ raise
74
91
  end
75
92
 
76
93
  define_async_method :async_init
@@ -154,6 +171,28 @@ class Puppeteer::FrameManager
154
171
  watcher.navigation_response
155
172
  end
156
173
 
174
+ # @param event [Hash]
175
+ def handle_attached_to_target(event)
176
+ return if event['targetInfo']['type'] != 'iframe'
177
+
178
+ frame = @frames[event['targetInfo']['targetId']]
179
+ session = Puppeteer::Connection.from_session(@client).session(event['sessionId'])
180
+
181
+ frame.send(:update_client, session)
182
+ setup_listeners(session)
183
+ async_init(session)
184
+ end
185
+
186
+ # @param event [Hash]
187
+ def handle_detached_from_target(event)
188
+ frame = @frames[event['targetId']]
189
+ if frame && frame.oop_frame?
190
+ # When an OOP iframe is removed from the page, it
191
+ # will only get a Target.detachedFromTarget event.
192
+ remove_frame_recursively(frame)
193
+ end
194
+ end
195
+
157
196
  # @param event [Hash]
158
197
  def handle_lifecycle_event(event)
159
198
  frame = @frames[event['frameId']]
@@ -170,16 +209,17 @@ class Puppeteer::FrameManager
170
209
  emit_event(FrameManagerEmittedEvents::LifecycleEvent, frame)
171
210
  end
172
211
 
212
+ # @param session [Puppeteer::CDPSession]
173
213
  # @param frame_tree [Hash]
174
- def handle_frame_tree(frame_tree)
214
+ def handle_frame_tree(session, frame_tree)
175
215
  if frame_tree['frame']['parentId']
176
- handle_frame_attached(frame_tree['frame']['id'], frame_tree['frame']['parentId'])
216
+ handle_frame_attached(session, frame_tree['frame']['id'], frame_tree['frame']['parentId'])
177
217
  end
178
218
  handle_frame_navigated(frame_tree['frame'])
179
219
  return if !frame_tree['childFrames']
180
220
 
181
221
  frame_tree['childFrames'].each do |child|
182
- handle_frame_tree(child)
222
+ handle_frame_tree(session, child)
183
223
  end
184
224
  end
185
225
 
@@ -204,15 +244,25 @@ class Puppeteer::FrameManager
204
244
  @frames[frame_id]
205
245
  end
206
246
 
207
- # @param {string} frameId
208
- # @param {?string} parentFrameId
209
- def handle_frame_attached(frame_id, parent_frame_id)
210
- return if @frames.has_key?(frame_id)
247
+ # @param session [Puppeteer::CDPSession]
248
+ # @param frameId [String]
249
+ # @param parentFrameId [String|nil]
250
+ def handle_frame_attached(session, frame_id, parent_frame_id)
251
+ if @frames.has_key?(frame_id)
252
+ frame = @frames[frame_id]
253
+ if session && frame.oop_frame?
254
+ # If an OOP iframes becomes a normal iframe again
255
+ # it is first attached to the parent page before
256
+ # the target is removed.
257
+ frame.send(:update_client, session)
258
+ end
259
+ return
260
+ end
211
261
  if !parent_frame_id
212
262
  raise ArgymentError.new('parent_frame_id must not be nil')
213
263
  end
214
264
  parent_frame = @frames[parent_frame_id]
215
- frame = Puppeteer::Frame.new(self, @client, parent_frame, frame_id)
265
+ frame = Puppeteer::Frame.new(self, parent_frame, frame_id, session)
216
266
  @frames[frame_id] = frame
217
267
 
218
268
  emit_event(FrameManagerEmittedEvents::FrameAttached, frame)
@@ -247,7 +297,7 @@ class Puppeteer::FrameManager
247
297
  frame.id = frame_payload['id']
248
298
  else
249
299
  # Initial main frame navigation.
250
- frame = Puppeteer::Frame.new(self, @client, nil, frame_payload['id'])
300
+ frame = Puppeteer::Frame.new(self, nil, frame_payload['id'], @client)
251
301
  end
252
302
  @frames[frame_payload['id']] = frame
253
303
  @main_frame = frame
@@ -259,22 +309,26 @@ class Puppeteer::FrameManager
259
309
  emit_event(FrameManagerEmittedEvents::FrameNavigated, frame)
260
310
  end
261
311
 
312
+ # @param session [Puppeteer::CDPSession]
262
313
  # @param name [String]
263
- def ensure_isolated_world(name)
264
- return if @isolated_worlds.include?(name)
265
- @isolated_worlds << name
314
+ private def ensure_isolated_world(session, name)
315
+ key = "#{session.id}:#{name}"
316
+ return if @isolated_worlds.include?(key)
317
+ @isolated_worlds << key
266
318
 
267
- @client.send_message('Page.addScriptToEvaluateOnNewDocument',
319
+ session.send_message('Page.addScriptToEvaluateOnNewDocument',
268
320
  source: "//# sourceURL=#{Puppeteer::ExecutionContext::EVALUATION_SCRIPT_URL}",
269
321
  worldName: name,
270
322
  )
271
- create_isolated_worlds_promises = frames.map do |frame|
272
- @client.async_send_message('Page.createIsolatedWorld',
273
- frameId: frame.id,
274
- grantUniveralAccess: true,
275
- worldName: name,
276
- )
277
- end
323
+ create_isolated_worlds_promises = frames.
324
+ select { |frame| frame._client == session }.
325
+ map do |frame|
326
+ session.async_send_message('Page.createIsolatedWorld',
327
+ frameId: frame.id,
328
+ grantUniveralAccess: true,
329
+ worldName: name,
330
+ )
331
+ end
278
332
  await_all(*create_isolated_worlds_promises)
279
333
  end
280
334
 
@@ -289,19 +343,31 @@ class Puppeteer::FrameManager
289
343
  end
290
344
 
291
345
  # @param frame_id [String]
292
- def handle_frame_detached(frame_id)
346
+ # @param reason [String]
347
+ def handle_frame_detached(frame_id, reason)
293
348
  frame = @frames[frame_id]
294
- if frame
295
- remove_frame_recursively(frame)
349
+ if reason == 'remove'
350
+ # Only remove the frame if the reason for the detached event is
351
+ # an actual removement of the frame.
352
+ # For frames that become OOP iframes, the reason would be 'swap'.
353
+ if frame
354
+ remove_frame_recursively(frame)
355
+ end
296
356
  end
297
357
  end
298
358
 
299
359
  # @param context_payload [Hash]
300
- def handle_execution_context_created(context_payload)
360
+ # @pram session [Puppeteer::CDPSession]
361
+ def handle_execution_context_created(context_payload, session)
301
362
  frame = if_present(context_payload.dig('auxData', 'frameId')) { |frame_id| @frames[frame_id] }
302
363
 
303
364
  world = nil
304
365
  if frame
366
+ # commented out the original implementation for allowing us to use Frame#evaluate on OOP iframe.
367
+ #
368
+ # # Only care about execution contexts created for the current session.
369
+ # return if @client != session
370
+
305
371
  if context_payload.dig('auxData', 'isDefault')
306
372
  world = frame.main_world
307
373
  elsif context_payload['name'] == UTILITY_WORLD_NAME && !frame.secondary_world.has_context?
@@ -316,34 +382,45 @@ class Puppeteer::FrameManager
316
382
  @isolated_worlds << context_payload['name']
317
383
  end
318
384
 
319
- context = Puppeteer::ExecutionContext.new(@client, context_payload, world)
385
+ context = Puppeteer::ExecutionContext.new(frame&._client || @client, context_payload, world)
320
386
  if world
321
387
  world.context = context
322
388
  end
323
- @context_id_to_context[context_payload['id']] = context
389
+ key = "#{session.id}:#{context_payload['id']}"
390
+ @context_id_to_context[key] = context
324
391
  end
325
392
 
326
- # @param {number} executionContextId
327
- def handle_execution_context_destroyed(execution_context_id)
328
- context = @context_id_to_context[execution_context_id]
393
+ # @param execution_context_id [Integer]
394
+ # @param session [Puppeteer::CDPSEssion]
395
+ def handle_execution_context_destroyed(execution_context_id, session)
396
+ key = "#{session.id}:#{execution_context_id}"
397
+ context = @context_id_to_context[key]
329
398
  return unless context
330
- @context_id_to_context.delete(execution_context_id)
399
+ @context_id_to_context.delete(key)
331
400
  if context.world
332
401
  context.world.delete_context(execution_context_id)
333
402
  end
334
403
  end
335
404
 
336
- def handle_execution_contexts_cleared
337
- @context_id_to_context.each do |execution_context_id, context|
338
- if context.world
339
- context.world.delete_context(execution_context_id)
405
+ # @param session [Puppeteer::CDPSession]
406
+ def handle_execution_contexts_cleared(session)
407
+ @context_id_to_context.select! do |execution_context_id, context|
408
+ # Make sure to only clear execution contexts that belong
409
+ # to the current session.
410
+ if context.client != session
411
+ true # keep
412
+ else
413
+ if context.world
414
+ context.world.delete_context(execution_context_id)
415
+ end
416
+ false # remove
340
417
  end
341
418
  end
342
- @context_id_to_context.clear
343
419
  end
344
420
 
345
- def execution_context_by_id(context_id)
346
- @context_id_to_context[context_id] or raise "INTERNAL ERROR: missing context with id = #{context_id}"
421
+ def execution_context_by_id(context_id, session)
422
+ key = "#{session.id}:#{context_id}"
423
+ @context_id_to_context[key] or raise "INTERNAL ERROR: missing context with id = #{context_id}"
347
424
  end
348
425
 
349
426
  # @param {!Frame} frame
@@ -72,7 +72,15 @@ class Puppeteer::HTTPRequest
72
72
  end
73
73
 
74
74
  attr_reader :internal
75
- attr_reader :url, :resource_type, :method, :post_data, :headers, :response, :frame
75
+ attr_reader :url, :resource_type, :method, :post_data, :headers, :response, :frame, :initiator
76
+
77
+ def inspect
78
+ values = %i[request_id method url].map do |sym|
79
+ value = instance_variable_get(:"@#{sym}")
80
+ "@#{sym}=#{value}"
81
+ end
82
+ "#<Puppeteer::HTTPRequest #{values.join(' ')}>"
83
+ end
76
84
 
77
85
  private def assert_interception_allowed
78
86
  unless @allow_interception
@@ -31,7 +31,8 @@ class Puppeteer::HTTPResponse
31
31
  # @param client [Puppeteer::CDPSession]
32
32
  # @param request [Puppeteer::HTTPRequest]
33
33
  # @param response_payload [Hash]
34
- def initialize(client, request, response_payload)
34
+ # @param extra_info [Hash|nil]
35
+ def initialize(client, request, response_payload, extra_info)
35
36
  @client = client
36
37
  @request = request
37
38
 
@@ -41,14 +42,15 @@ class Puppeteer::HTTPResponse
41
42
  port: response_payload['remotePort'],
42
43
  )
43
44
 
44
- @status = response_payload['status']
45
- @status_text = response_payload['statusText']
45
+ @status_text = parse_štatus_text_from_extra_info(extra_info) || response_payload['statusText']
46
46
  @url = request.url
47
47
  @from_disk_cache = !!response_payload['fromDiskCache']
48
48
  @from_service_worker = !!response_payload['fromServiceWorker']
49
49
 
50
+ @status = extra_info ? extra_info['statusCode'] : response_payload['status']
50
51
  @headers = {}
51
- response_payload['headers'].each do |key, value|
52
+ headers = extra_info ? extra_info['headers'] : response_payload['headers']
53
+ headers.each do |key, value|
52
54
  @headers[key.downcase] = value
53
55
  end
54
56
  @security_details = if_present(response_payload['securityDetails']) do |security_payload|
@@ -62,6 +64,25 @@ class Puppeteer::HTTPResponse
62
64
 
63
65
  attr_reader :remote_address, :url, :status, :status_text, :headers, :security_details, :request
64
66
 
67
+ def inspect
68
+ values = %i[remote_address url status status_text headers security_details request].map do |sym|
69
+ value = instance_variable_get(:"@#{sym}")
70
+ "@#{sym}=#{value}"
71
+ end
72
+ "#<Puppeteer::HTTPRequest #{values.join(' ')}>"
73
+ end
74
+
75
+ private def parse_štatus_text_from_extra_info(extra_info)
76
+ return nil if !extra_info || !extra_info['headersText']
77
+ first_line = extra_info['headersText'].split("\r").first
78
+ return nil unless first_line
79
+ /[^ ]* [^ ]* (.*)/.match(first_line) do |m|
80
+ return m[1]
81
+ end
82
+
83
+ nil
84
+ end
85
+
65
86
  # @return [Boolean]
66
87
  def ok?
67
88
  @status == 0 || (@status >= 200 && @status <= 299)
@@ -36,6 +36,14 @@ class Puppeteer::JSHandle
36
36
 
37
37
  attr_reader :context, :remote_object
38
38
 
39
+ def inspect
40
+ values = %i[context remote_object disposed].map do |sym|
41
+ value = instance_variable_get(:"@#{sym}")
42
+ "@#{sym}=#{value}"
43
+ end
44
+ "#<Puppeteer::JSHandle #{values.join(' ')}>"
45
+ end
46
+
39
47
  # @return [Puppeteer::ExecutionContext]
40
48
  def execution_context
41
49
  @context
@@ -27,21 +27,25 @@ module Puppeteer::Launcher
27
27
  @chrome_arg_options.args.dup
28
28
  end
29
29
 
30
- #
31
- # let temporaryUserDataDir = null;
32
-
33
30
  if chrome_arguments.none? { |arg| arg.start_with?('--remote-debugging-') }
34
31
  if @launch_options.pipe?
35
32
  chrome_arguments << '--remote-debugging-pipe'
36
33
  else
37
- chrome_arguments << '--remote-debugging-port=0'
34
+ chrome_arguments << "--remote-debugging-port=#{@chrome_arg_options.debugging_port}"
38
35
  end
39
36
  end
40
37
 
41
- temporary_user_data_dir = nil
42
- if chrome_arguments.none? { |arg| arg.start_with?('--user-data-dir') }
43
- temporary_user_data_dir = Dir.mktmpdir('puppeteer_dev_chrome_profile-')
44
- chrome_arguments << "--user-data-dir=#{temporary_user_data_dir}"
38
+ user_data_dir = chrome_arguments.find { |arg| arg.start_with?('--user-data-dir') }
39
+ if user_data_dir
40
+ user_data_dir = user_data_dir.split('=').last
41
+ unless File.exist?(user_data_dir)
42
+ raise ArgumentError.new("Chrome user data dir not found at '#{user_data_dir}'")
43
+ end
44
+ using_temp_user_data_dir = false
45
+ else
46
+ user_data_dir = Dir.mktmpdir('puppeteer_dev_chrome_profile-', ENV['PUPPETEER_TMP_DIR'])
47
+ chrome_arguments << "--user-data-dir=#{user_data_dir}"
48
+ using_temp_user_data_dir = true
45
49
  end
46
50
 
47
51
  chrome_executable =
@@ -51,7 +55,13 @@ module Puppeteer::Launcher
51
55
  @launch_options.executable_path || fallback_executable_path
52
56
  end
53
57
  use_pipe = chrome_arguments.include?('--remote-debugging-pipe')
54
- runner = Puppeteer::BrowserRunner.new(chrome_executable, chrome_arguments, temporary_user_data_dir)
58
+ runner = Puppeteer::BrowserRunner.new(
59
+ false,
60
+ chrome_executable,
61
+ chrome_arguments,
62
+ user_data_dir,
63
+ using_temp_user_data_dir,
64
+ )
55
65
  runner.start(
56
66
  handle_SIGHUP: @launch_options.handle_SIGHUP?,
57
67
  handle_SIGTERM: @launch_options.handle_SIGTERM?,
@@ -61,30 +71,39 @@ module Puppeteer::Launcher
61
71
  pipe: use_pipe,
62
72
  )
63
73
 
74
+ browser =
75
+ begin
76
+ connection = runner.setup_connection(
77
+ use_pipe: use_pipe,
78
+ timeout: @launch_options.timeout,
79
+ slow_mo: @browser_options.slow_mo,
80
+ preferred_revision: @preferred_revision,
81
+ )
82
+
83
+ Puppeteer::Browser.create(
84
+ connection: connection,
85
+ context_ids: [],
86
+ ignore_https_errors: @browser_options.ignore_https_errors?,
87
+ default_viewport: @browser_options.default_viewport,
88
+ process: runner.proc,
89
+ close_callback: -> { runner.close },
90
+ )
91
+ rescue
92
+ runner.kill
93
+ raise
94
+ end
95
+
64
96
  begin
65
- connection = runner.setup_connection(
66
- use_pipe: use_pipe,
97
+ browser.wait_for_target(
98
+ predicate: ->(target) { target.type == 'page' },
67
99
  timeout: @launch_options.timeout,
68
- slow_mo: @browser_options.slow_mo,
69
- preferred_revision: @preferred_revision,
70
- )
71
-
72
- browser = Puppeteer::Browser.create(
73
- connection: connection,
74
- context_ids: [],
75
- ignore_https_errors: @browser_options.ignore_https_errors?,
76
- default_viewport: @browser_options.default_viewport,
77
- process: runner.proc,
78
- close_callback: -> { runner.close },
79
100
  )
80
-
81
- browser.wait_for_target(predicate: ->(target) { target.type == 'page' })
82
-
83
- browser
84
101
  rescue
85
- runner.kill
102
+ browser.close
86
103
  raise
87
104
  end
105
+
106
+ browser
88
107
  end
89
108
 
90
109
  class DefaultArgs
@@ -267,15 +286,15 @@ module Puppeteer::Launcher
267
286
  end
268
287
 
269
288
  chrome_path = chrome_path_map[channel]
270
- if chrome_path.is_a?(Proc)
271
- chrome_path = chrome_path.call
272
- end
273
-
274
289
  unless chrome_path
275
290
  raise ArgumentError.new("Invalid channel: '#{channel}'. Allowed channel is #{chrome_path_map.keys}")
276
291
  end
277
292
 
278
- unless File.exist?(chrome_path)
293
+ if chrome_path.is_a?(Proc)
294
+ chrome_path = chrome_path.call
295
+ end
296
+
297
+ if !chrome_path || !File.exist?(chrome_path)
279
298
  raise "#{channel} is not installed on this system.\nExpected path: #{chrome_path}"
280
299
  end
281
300
 
@@ -34,9 +34,10 @@ module Puppeteer::Launcher
34
34
  if @headless.nil?
35
35
  @headless = !@devtools
36
36
  end
37
+ @debugging_port = options[:debugging_port] || 0
37
38
  end
38
39
 
39
- attr_reader :args, :user_data_dir
40
+ attr_reader :args, :user_data_dir, :debugging_port
40
41
 
41
42
  def headless?
42
43
  @headless