puppeteer-ruby 0.0.23 → 0.29.0
Sign up to get free protection for your applications and to get access to all the features.
- checksums.yaml +4 -4
- data/.circleci/config.yml +32 -22
- data/.github/ISSUE_TEMPLATE/bug_report.md +17 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +15 -0
- data/.github/workflows/docs.yml +2 -2
- data/.github/workflows/reviewdog.yml +1 -1
- data/CHANGELOG.md +88 -0
- data/Dockerfile +1 -1
- data/README.md +18 -2
- data/lib/puppeteer.rb +3 -0
- data/lib/puppeteer/aria_query_handler.rb +71 -0
- data/lib/puppeteer/browser.rb +0 -2
- data/lib/puppeteer/concurrent_ruby_utils.rb +6 -3
- data/lib/puppeteer/custom_query_handler.rb +51 -0
- data/lib/puppeteer/define_async_method.rb +15 -6
- data/lib/puppeteer/dom_world.rb +231 -74
- data/lib/puppeteer/element_handle.rb +13 -22
- data/lib/puppeteer/execution_context.rb +12 -0
- data/lib/puppeteer/launcher/chrome.rb +4 -1
- data/lib/puppeteer/page.rb +66 -52
- data/lib/puppeteer/page/screenshot_options.rb +2 -2
- data/lib/puppeteer/page/screenshot_task_queue.rb +13 -0
- data/lib/puppeteer/query_handler_manager.rb +65 -0
- data/lib/puppeteer/remote_object.rb +12 -0
- data/lib/puppeteer/target.rb +2 -4
- data/lib/puppeteer/version.rb +1 -1
- data/lib/puppeteer/wait_task.rb +16 -4
- data/puppeteer-ruby.gemspec +6 -4
- metadata +45 -10
@@ -10,12 +10,21 @@ module Puppeteer::DefineAsyncMethod
|
|
10
10
|
end
|
11
11
|
|
12
12
|
original_method = instance_method(async_method_name[6..-1])
|
13
|
-
define_method(async_method_name) do |*args|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
define_method(async_method_name) do |*args, **kwargs|
|
14
|
+
if kwargs.empty? # for Ruby < 2.7
|
15
|
+
Concurrent::Promises.future do
|
16
|
+
original_method.bind(self).call(*args)
|
17
|
+
rescue => err
|
18
|
+
Logger.new($stderr).warn(err)
|
19
|
+
raise err
|
20
|
+
end
|
21
|
+
else
|
22
|
+
Concurrent::Promises.future do
|
23
|
+
original_method.bind(self).call(*args, **kwargs)
|
24
|
+
rescue => err
|
25
|
+
Logger.new($stderr).warn(err)
|
26
|
+
raise err
|
27
|
+
end
|
19
28
|
end
|
20
29
|
end
|
21
30
|
end
|
data/lib/puppeteer/dom_world.rb
CHANGED
@@ -4,6 +4,47 @@ require 'thread'
|
|
4
4
|
class Puppeteer::DOMWorld
|
5
5
|
using Puppeteer::DefineAsyncMethod
|
6
6
|
|
7
|
+
class BindingFunction
|
8
|
+
def initialize(name:, proc:)
|
9
|
+
@name = name
|
10
|
+
@proc = proc
|
11
|
+
end
|
12
|
+
|
13
|
+
def call(*args)
|
14
|
+
@proc.call(*args)
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :name
|
18
|
+
|
19
|
+
def page_binding_init_string
|
20
|
+
<<~JAVASCRIPT
|
21
|
+
(type, bindingName) => {
|
22
|
+
/* Cast window to any here as we're about to add properties to it
|
23
|
+
* via win[bindingName] which TypeScript doesn't like.
|
24
|
+
*/
|
25
|
+
const win = window;
|
26
|
+
const binding = win[bindingName];
|
27
|
+
|
28
|
+
win[bindingName] = (...args) => {
|
29
|
+
const me = window[bindingName];
|
30
|
+
let callbacks = me.callbacks;
|
31
|
+
if (!callbacks) {
|
32
|
+
callbacks = new Map();
|
33
|
+
me.callbacks = callbacks;
|
34
|
+
}
|
35
|
+
const seq = (me.lastSeq || 0) + 1;
|
36
|
+
me.lastSeq = seq;
|
37
|
+
const promise = new Promise((resolve, reject) =>
|
38
|
+
callbacks.set(seq, { resolve, reject })
|
39
|
+
);
|
40
|
+
binding(JSON.stringify({ type, name: bindingName, seq, args }));
|
41
|
+
return promise;
|
42
|
+
};
|
43
|
+
}
|
44
|
+
JAVASCRIPT
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
7
48
|
# @param {!Puppeteer.FrameManager} frameManager
|
8
49
|
# @param {!Puppeteer.Frame} frame
|
9
50
|
# @param {!Puppeteer.TimeoutSettings} timeoutSettings
|
@@ -13,19 +54,29 @@ class Puppeteer::DOMWorld
|
|
13
54
|
@timeout_settings = timeout_settings
|
14
55
|
@context_promise = resolvable_future
|
15
56
|
@wait_tasks = Set.new
|
57
|
+
@bound_functions = {}
|
58
|
+
@ctx_bindings = Set.new
|
16
59
|
@detached = false
|
60
|
+
|
61
|
+
frame_manager.client.on_event('Runtime.bindingCalled', &method(:handle_binding_called))
|
17
62
|
end
|
18
63
|
|
19
64
|
attr_reader :frame
|
20
65
|
|
21
66
|
# only used in Puppeteer::WaitTask#initialize
|
22
|
-
def _wait_tasks
|
67
|
+
private def _wait_tasks
|
23
68
|
@wait_tasks
|
24
69
|
end
|
25
70
|
|
71
|
+
# only used in Puppeteer::WaitTask#initialize
|
72
|
+
private def _bound_functions
|
73
|
+
@bound_functions
|
74
|
+
end
|
75
|
+
|
26
76
|
# @param context [Puppeteer::ExecutionContext]
|
27
77
|
def context=(context)
|
28
78
|
if context
|
79
|
+
@ctx_bindings.clear
|
29
80
|
unless @context_promise.resolved?
|
30
81
|
@context_promise.fulfill(context)
|
31
82
|
end
|
@@ -378,61 +429,127 @@ class Puppeteer::DOMWorld
|
|
378
429
|
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
379
430
|
# @param timeout [Integer]
|
380
431
|
def wait_for_selector(selector, visible: nil, hidden: nil, timeout: nil)
|
381
|
-
|
432
|
+
# call wait_for_selector_in_page with custom query selector.
|
433
|
+
query_selector_manager = Puppeteer::QueryHandlerManager.instance
|
434
|
+
query_selector_manager.detect_query_handler(selector).wait_for(self, visible: visible, hidden: hidden, timeout: timeout)
|
382
435
|
end
|
383
436
|
|
384
|
-
|
385
|
-
|
386
|
-
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
387
|
-
# @param timeout [Integer]
|
388
|
-
def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
|
389
|
-
wait_for_selector_or_xpath(xpath, true, visible: visible, hidden: hidden, timeout: timeout)
|
437
|
+
private def binding_identifier(name, context)
|
438
|
+
"#{name}_#{context.send(:_context_id)}"
|
390
439
|
end
|
391
440
|
|
392
|
-
# /**
|
393
|
-
# * @param {Function|string} pageFunction
|
394
|
-
# * @param {!{polling?: string|number, timeout?: number}=} options
|
395
|
-
# * @return {!Promise<!Puppeteer.JSHandle>}
|
396
|
-
# */
|
397
|
-
# waitForFunction(pageFunction, options = {}, ...args) {
|
398
|
-
# const {
|
399
|
-
# polling = 'raf',
|
400
|
-
# timeout = this._timeoutSettings.timeout(),
|
401
|
-
# } = options;
|
402
|
-
# return new WaitTask(this, pageFunction, 'function', polling, timeout, ...args).promise;
|
403
|
-
# }
|
404
441
|
|
405
|
-
|
406
|
-
|
407
|
-
|
442
|
+
def add_binding_to_context(context, binding_function)
|
443
|
+
return if @ctx_bindings.include?(binding_identifier(binding_function.name, context))
|
444
|
+
|
445
|
+
expression = binding_function.page_binding_init_string
|
446
|
+
begin
|
447
|
+
context.client.send_message('Runtime.addBinding',
|
448
|
+
name: binding_function.name,
|
449
|
+
executionContextName: context.send(:_context_name))
|
450
|
+
context.evaluate(expression, 'internal', binding_function.name)
|
451
|
+
rescue => err
|
452
|
+
# We could have tried to evaluate in a context which was already
|
453
|
+
# destroyed. This happens, for example, if the page is navigated while
|
454
|
+
# we are trying to add the binding
|
455
|
+
allowed = [
|
456
|
+
'Execution context was destroyed',
|
457
|
+
'Cannot find context with specified id',
|
458
|
+
]
|
459
|
+
if allowed.any? { |msg| err.message.include?(msg) }
|
460
|
+
# ignore
|
461
|
+
else
|
462
|
+
raise
|
463
|
+
end
|
464
|
+
end
|
465
|
+
@ctx_bindings << binding_identifier(binding_function.name, context)
|
466
|
+
end
|
467
|
+
|
468
|
+
private def handle_binding_called(event)
|
469
|
+
return unless has_context?
|
470
|
+
payload = JSON.parse(event['payload']) rescue nil
|
471
|
+
name = payload['name']
|
472
|
+
args = payload['args']
|
473
|
+
|
474
|
+
# The binding was either called by something in the page or it was
|
475
|
+
# called before our wrapper was initialized.
|
476
|
+
return unless payload
|
477
|
+
return unless payload['type'] == 'internal'
|
478
|
+
context = execution_context
|
479
|
+
return unless @ctx_bindings.include?(binding_identifier(name, context))
|
480
|
+
return unless context.send(:_context_id) == event['executionContextId']
|
481
|
+
|
482
|
+
result = @bound_functions[name].call(*args)
|
483
|
+
deliver_result_js = <<~JAVASCRIPT
|
484
|
+
(name, seq, result) => {
|
485
|
+
globalThis[name].callbacks.get(seq).resolve(result);
|
486
|
+
globalThis[name].callbacks.delete(seq);
|
487
|
+
}
|
488
|
+
JAVASCRIPT
|
489
|
+
|
490
|
+
begin
|
491
|
+
context.evaluate(deliver_result_js, name, payload['seq'], result)
|
492
|
+
rescue => err
|
493
|
+
# The WaitTask may already have been resolved by timing out, or the
|
494
|
+
# exection context may have been destroyed.
|
495
|
+
# In both caes, the promises above are rejected with a protocol error.
|
496
|
+
# We can safely ignores these, as the WaitTask is re-installed in
|
497
|
+
# the next execution context if needed.
|
498
|
+
return if err.message.include?('Protocol error')
|
499
|
+
raise
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
# @param query_one [String] JS function (element: Element | Document, selector: string) => Element | null;
|
504
|
+
# @param selector [String]
|
505
|
+
# @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
|
506
|
+
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
408
507
|
# @param timeout [Integer]
|
409
|
-
|
410
|
-
|
411
|
-
|
508
|
+
private def wait_for_selector_in_page(query_one, selector, visible: nil, hidden: nil, timeout: nil, binding_function: nil)
|
509
|
+
option_wait_for_visible = visible || false
|
510
|
+
option_wait_for_hidden = hidden || false
|
412
511
|
option_timeout = timeout || @timeout_settings.timeout
|
413
512
|
|
414
|
-
|
513
|
+
polling =
|
514
|
+
if option_wait_for_visible || option_wait_for_hidden
|
515
|
+
'raf'
|
516
|
+
else
|
517
|
+
'mutation'
|
518
|
+
end
|
519
|
+
title = "selector #{selector}#{option_wait_for_hidden ? 'to be hidden' : ''}"
|
520
|
+
|
521
|
+
selector_predicate = make_predicate_string(
|
522
|
+
predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
|
523
|
+
predicate_query_handler: query_one,
|
524
|
+
async: true,
|
525
|
+
predicate_body: <<~JAVASCRIPT
|
526
|
+
const node = await predicateQueryHandler(document, selector)
|
527
|
+
return checkWaitForOptions(node, waitForVisible, waitForHidden);
|
528
|
+
JAVASCRIPT
|
529
|
+
)
|
530
|
+
|
531
|
+
wait_task = Puppeteer::WaitTask.new(
|
415
532
|
dom_world: self,
|
416
|
-
predicate_body:
|
417
|
-
title:
|
418
|
-
polling:
|
533
|
+
predicate_body: selector_predicate,
|
534
|
+
title: title,
|
535
|
+
polling: polling,
|
419
536
|
timeout: option_timeout,
|
420
|
-
args:
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
537
|
+
args: [selector, option_wait_for_visible, option_wait_for_hidden],
|
538
|
+
binding_function: binding_function,
|
539
|
+
)
|
540
|
+
handle = wait_task.await_promise
|
541
|
+
unless handle.as_element
|
542
|
+
handle.dispose
|
543
|
+
return nil
|
544
|
+
end
|
545
|
+
handle.as_element
|
428
546
|
end
|
429
547
|
|
430
|
-
# @param
|
431
|
-
# @param is_xpath [Boolean]
|
548
|
+
# @param xpath [String]
|
432
549
|
# @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
|
433
550
|
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
434
551
|
# @param timeout [Integer]
|
435
|
-
|
552
|
+
def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
|
436
553
|
option_wait_for_visible = visible || false
|
437
554
|
option_wait_for_hidden = hidden || false
|
438
555
|
option_timeout = timeout || @timeout_settings.timeout
|
@@ -443,15 +560,23 @@ class Puppeteer::DOMWorld
|
|
443
560
|
else
|
444
561
|
'mutation'
|
445
562
|
end
|
446
|
-
title = "
|
563
|
+
title = "XPath #{xpath}#{option_wait_for_hidden ? 'to be hidden' : ''}"
|
564
|
+
|
565
|
+
xpath_predicate = make_predicate_string(
|
566
|
+
predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
|
567
|
+
predicate_body: <<~JAVASCRIPT
|
568
|
+
const node = document.evaluate(selector, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
569
|
+
return checkWaitForOptions(node, waitForVisible, waitForHidden);
|
570
|
+
JAVASCRIPT
|
571
|
+
)
|
447
572
|
|
448
573
|
wait_task = Puppeteer::WaitTask.new(
|
449
574
|
dom_world: self,
|
450
|
-
predicate_body:
|
575
|
+
predicate_body: xpath_predicate,
|
451
576
|
title: title,
|
452
577
|
polling: polling,
|
453
578
|
timeout: option_timeout,
|
454
|
-
args: [
|
579
|
+
args: [xpath, option_wait_for_visible, option_wait_for_hidden],
|
455
580
|
)
|
456
581
|
handle = wait_task.await_promise
|
457
582
|
unless handle.as_element
|
@@ -461,34 +586,66 @@ class Puppeteer::DOMWorld
|
|
461
586
|
handle.as_element
|
462
587
|
end
|
463
588
|
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
589
|
+
# @param page_function [String]
|
590
|
+
# @param args [Array]
|
591
|
+
# @param polling [Integer|String]
|
592
|
+
# @param timeout [Integer]
|
593
|
+
# @return [Puppeteer::JSHandle]
|
594
|
+
def wait_for_function(page_function, args: [], polling: nil, timeout: nil)
|
595
|
+
option_polling = polling || 'raf'
|
596
|
+
option_timeout = timeout || @timeout_settings.timeout
|
597
|
+
|
598
|
+
Puppeteer::WaitTask.new(
|
599
|
+
dom_world: self,
|
600
|
+
predicate_body: page_function,
|
601
|
+
title: 'function',
|
602
|
+
polling: option_polling,
|
603
|
+
timeout: option_timeout,
|
604
|
+
args: args,
|
605
|
+
).await_promise
|
606
|
+
end
|
607
|
+
|
608
|
+
|
609
|
+
# @return [String]
|
610
|
+
def title
|
611
|
+
evaluate('() => document.title')
|
612
|
+
end
|
613
|
+
|
614
|
+
private def make_predicate_string(predicate_arg_def:, predicate_body:, predicate_query_handler: nil, async: false)
|
615
|
+
predicate_query_handler_string =
|
616
|
+
if predicate_query_handler
|
617
|
+
"const predicateQueryHandler = #{predicate_query_handler}"
|
618
|
+
else
|
619
|
+
""
|
620
|
+
end
|
621
|
+
|
622
|
+
<<~JAVASCRIPT
|
623
|
+
#{async ? 'async ' : ''}function _#{predicate_arg_def} {
|
624
|
+
#{predicate_query_handler_string}
|
625
|
+
#{predicate_body}
|
626
|
+
|
627
|
+
function checkWaitForOptions(node, waitForVisible, waitForHidden) {
|
628
|
+
if (!node) return waitForHidden;
|
629
|
+
if (!waitForVisible && !waitForHidden) return node;
|
630
|
+
const element =
|
631
|
+
node.nodeType === Node.TEXT_NODE ? node.parentElement : node;
|
632
|
+
|
633
|
+
const style = window.getComputedStyle(element);
|
634
|
+
const isVisible =
|
635
|
+
style && style.visibility !== 'hidden' && hasVisibleBoundingBox();
|
636
|
+
const success =
|
637
|
+
waitForVisible === isVisible || waitForHidden === !isVisible;
|
638
|
+
return success ? node : null;
|
639
|
+
|
640
|
+
/**
|
641
|
+
* @return {boolean}
|
642
|
+
*/
|
643
|
+
function hasVisibleBoundingBox() {
|
644
|
+
const rect = element.getBoundingClientRect();
|
645
|
+
return !!(rect.top || rect.bottom || rect.width || rect.height);
|
646
|
+
}
|
647
|
+
}
|
648
|
+
}
|
649
|
+
JAVASCRIPT
|
650
|
+
end
|
494
651
|
end
|
@@ -314,32 +314,20 @@ class Puppeteer::ElementHandle < Puppeteer::JSHandle
|
|
314
314
|
end
|
315
315
|
end
|
316
316
|
|
317
|
+
private def query_handler_manager
|
318
|
+
Puppeteer::QueryHandlerManager.instance
|
319
|
+
end
|
320
|
+
|
317
321
|
# `$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
|
318
322
|
# @param selector [String]
|
319
323
|
def S(selector)
|
320
|
-
|
321
|
-
'(element, selector) => element.querySelector(selector)',
|
322
|
-
selector,
|
323
|
-
)
|
324
|
-
element = handle.as_element
|
325
|
-
|
326
|
-
if element
|
327
|
-
return element
|
328
|
-
end
|
329
|
-
handle.dispose
|
330
|
-
nil
|
324
|
+
query_handler_manager.detect_query_handler(selector).query_one(self)
|
331
325
|
end
|
332
326
|
|
333
327
|
# `$$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
|
334
328
|
# @param selector [String]
|
335
329
|
def SS(selector)
|
336
|
-
|
337
|
-
'(element, selector) => element.querySelectorAll(selector)',
|
338
|
-
selector,
|
339
|
-
)
|
340
|
-
properties = handles.properties
|
341
|
-
handles.dispose
|
342
|
-
properties.values.map(&:as_element).compact
|
330
|
+
query_handler_manager.detect_query_handler(selector).query_all(self)
|
343
331
|
end
|
344
332
|
|
345
333
|
class ElementNotFoundError < StandardError
|
@@ -370,10 +358,7 @@ class Puppeteer::ElementHandle < Puppeteer::JSHandle
|
|
370
358
|
# @param page_function [String]
|
371
359
|
# @return [Object]
|
372
360
|
def SSeval(selector, page_function, *args)
|
373
|
-
handles =
|
374
|
-
'(element, selector) => Array.from(element.querySelectorAll(selector))',
|
375
|
-
selector,
|
376
|
-
)
|
361
|
+
handles = query_handler_manager.detect_query_handler(selector).query_all_array(self)
|
377
362
|
result = handles.evaluate(page_function, *args)
|
378
363
|
handles.dispose
|
379
364
|
|
@@ -430,4 +415,10 @@ class Puppeteer::ElementHandle < Puppeteer::JSHandle
|
|
430
415
|
# https://en.wikipedia.org/wiki/Polygon#Simple_polygons
|
431
416
|
quad.zip(quad.rotate).map { |p1, p2| (p1.x * p2.y - p2.x * p1.y) / 2 }.reduce(:+).abs
|
432
417
|
end
|
418
|
+
|
419
|
+
# used in AriaQueryHandler
|
420
|
+
def query_ax_tree(accessible_name: nil, role: nil)
|
421
|
+
@remote_object.query_ax_tree(@client,
|
422
|
+
accessible_name: accessible_name, role: role)
|
423
|
+
end
|
433
424
|
end
|