puppeteer-ruby 0.0.23 → 0.29.0
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- checksums.yaml +4 -4
- data/.circleci/config.yml +32 -22
- data/.github/ISSUE_TEMPLATE/bug_report.md +17 -0
- data/.github/ISSUE_TEMPLATE/feature_request.md +15 -0
- data/.github/workflows/docs.yml +2 -2
- data/.github/workflows/reviewdog.yml +1 -1
- data/CHANGELOG.md +88 -0
- data/Dockerfile +1 -1
- data/README.md +18 -2
- data/lib/puppeteer.rb +3 -0
- data/lib/puppeteer/aria_query_handler.rb +71 -0
- data/lib/puppeteer/browser.rb +0 -2
- data/lib/puppeteer/concurrent_ruby_utils.rb +6 -3
- data/lib/puppeteer/custom_query_handler.rb +51 -0
- data/lib/puppeteer/define_async_method.rb +15 -6
- data/lib/puppeteer/dom_world.rb +231 -74
- data/lib/puppeteer/element_handle.rb +13 -22
- data/lib/puppeteer/execution_context.rb +12 -0
- data/lib/puppeteer/launcher/chrome.rb +4 -1
- data/lib/puppeteer/page.rb +66 -52
- data/lib/puppeteer/page/screenshot_options.rb +2 -2
- data/lib/puppeteer/page/screenshot_task_queue.rb +13 -0
- data/lib/puppeteer/query_handler_manager.rb +65 -0
- data/lib/puppeteer/remote_object.rb +12 -0
- data/lib/puppeteer/target.rb +2 -4
- data/lib/puppeteer/version.rb +1 -1
- data/lib/puppeteer/wait_task.rb +16 -4
- data/puppeteer-ruby.gemspec +6 -4
- metadata +45 -10
@@ -10,12 +10,21 @@ module Puppeteer::DefineAsyncMethod
|
|
10
10
|
end
|
11
11
|
|
12
12
|
original_method = instance_method(async_method_name[6..-1])
|
13
|
-
define_method(async_method_name) do |*args|
|
14
|
-
|
15
|
-
|
16
|
-
|
17
|
-
|
18
|
-
|
13
|
+
define_method(async_method_name) do |*args, **kwargs|
|
14
|
+
if kwargs.empty? # for Ruby < 2.7
|
15
|
+
Concurrent::Promises.future do
|
16
|
+
original_method.bind(self).call(*args)
|
17
|
+
rescue => err
|
18
|
+
Logger.new($stderr).warn(err)
|
19
|
+
raise err
|
20
|
+
end
|
21
|
+
else
|
22
|
+
Concurrent::Promises.future do
|
23
|
+
original_method.bind(self).call(*args, **kwargs)
|
24
|
+
rescue => err
|
25
|
+
Logger.new($stderr).warn(err)
|
26
|
+
raise err
|
27
|
+
end
|
19
28
|
end
|
20
29
|
end
|
21
30
|
end
|
data/lib/puppeteer/dom_world.rb
CHANGED
@@ -4,6 +4,47 @@ require 'thread'
|
|
4
4
|
class Puppeteer::DOMWorld
|
5
5
|
using Puppeteer::DefineAsyncMethod
|
6
6
|
|
7
|
+
class BindingFunction
|
8
|
+
def initialize(name:, proc:)
|
9
|
+
@name = name
|
10
|
+
@proc = proc
|
11
|
+
end
|
12
|
+
|
13
|
+
def call(*args)
|
14
|
+
@proc.call(*args)
|
15
|
+
end
|
16
|
+
|
17
|
+
attr_reader :name
|
18
|
+
|
19
|
+
def page_binding_init_string
|
20
|
+
<<~JAVASCRIPT
|
21
|
+
(type, bindingName) => {
|
22
|
+
/* Cast window to any here as we're about to add properties to it
|
23
|
+
* via win[bindingName] which TypeScript doesn't like.
|
24
|
+
*/
|
25
|
+
const win = window;
|
26
|
+
const binding = win[bindingName];
|
27
|
+
|
28
|
+
win[bindingName] = (...args) => {
|
29
|
+
const me = window[bindingName];
|
30
|
+
let callbacks = me.callbacks;
|
31
|
+
if (!callbacks) {
|
32
|
+
callbacks = new Map();
|
33
|
+
me.callbacks = callbacks;
|
34
|
+
}
|
35
|
+
const seq = (me.lastSeq || 0) + 1;
|
36
|
+
me.lastSeq = seq;
|
37
|
+
const promise = new Promise((resolve, reject) =>
|
38
|
+
callbacks.set(seq, { resolve, reject })
|
39
|
+
);
|
40
|
+
binding(JSON.stringify({ type, name: bindingName, seq, args }));
|
41
|
+
return promise;
|
42
|
+
};
|
43
|
+
}
|
44
|
+
JAVASCRIPT
|
45
|
+
end
|
46
|
+
end
|
47
|
+
|
7
48
|
# @param {!Puppeteer.FrameManager} frameManager
|
8
49
|
# @param {!Puppeteer.Frame} frame
|
9
50
|
# @param {!Puppeteer.TimeoutSettings} timeoutSettings
|
@@ -13,19 +54,29 @@ class Puppeteer::DOMWorld
|
|
13
54
|
@timeout_settings = timeout_settings
|
14
55
|
@context_promise = resolvable_future
|
15
56
|
@wait_tasks = Set.new
|
57
|
+
@bound_functions = {}
|
58
|
+
@ctx_bindings = Set.new
|
16
59
|
@detached = false
|
60
|
+
|
61
|
+
frame_manager.client.on_event('Runtime.bindingCalled', &method(:handle_binding_called))
|
17
62
|
end
|
18
63
|
|
19
64
|
attr_reader :frame
|
20
65
|
|
21
66
|
# only used in Puppeteer::WaitTask#initialize
|
22
|
-
def _wait_tasks
|
67
|
+
private def _wait_tasks
|
23
68
|
@wait_tasks
|
24
69
|
end
|
25
70
|
|
71
|
+
# only used in Puppeteer::WaitTask#initialize
|
72
|
+
private def _bound_functions
|
73
|
+
@bound_functions
|
74
|
+
end
|
75
|
+
|
26
76
|
# @param context [Puppeteer::ExecutionContext]
|
27
77
|
def context=(context)
|
28
78
|
if context
|
79
|
+
@ctx_bindings.clear
|
29
80
|
unless @context_promise.resolved?
|
30
81
|
@context_promise.fulfill(context)
|
31
82
|
end
|
@@ -378,61 +429,127 @@ class Puppeteer::DOMWorld
|
|
378
429
|
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
379
430
|
# @param timeout [Integer]
|
380
431
|
def wait_for_selector(selector, visible: nil, hidden: nil, timeout: nil)
|
381
|
-
|
432
|
+
# call wait_for_selector_in_page with custom query selector.
|
433
|
+
query_selector_manager = Puppeteer::QueryHandlerManager.instance
|
434
|
+
query_selector_manager.detect_query_handler(selector).wait_for(self, visible: visible, hidden: hidden, timeout: timeout)
|
382
435
|
end
|
383
436
|
|
384
|
-
|
385
|
-
|
386
|
-
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
387
|
-
# @param timeout [Integer]
|
388
|
-
def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
|
389
|
-
wait_for_selector_or_xpath(xpath, true, visible: visible, hidden: hidden, timeout: timeout)
|
437
|
+
private def binding_identifier(name, context)
|
438
|
+
"#{name}_#{context.send(:_context_id)}"
|
390
439
|
end
|
391
440
|
|
392
|
-
# /**
|
393
|
-
# * @param {Function|string} pageFunction
|
394
|
-
# * @param {!{polling?: string|number, timeout?: number}=} options
|
395
|
-
# * @return {!Promise<!Puppeteer.JSHandle>}
|
396
|
-
# */
|
397
|
-
# waitForFunction(pageFunction, options = {}, ...args) {
|
398
|
-
# const {
|
399
|
-
# polling = 'raf',
|
400
|
-
# timeout = this._timeoutSettings.timeout(),
|
401
|
-
# } = options;
|
402
|
-
# return new WaitTask(this, pageFunction, 'function', polling, timeout, ...args).promise;
|
403
|
-
# }
|
404
441
|
|
405
|
-
|
406
|
-
|
407
|
-
|
442
|
+
def add_binding_to_context(context, binding_function)
|
443
|
+
return if @ctx_bindings.include?(binding_identifier(binding_function.name, context))
|
444
|
+
|
445
|
+
expression = binding_function.page_binding_init_string
|
446
|
+
begin
|
447
|
+
context.client.send_message('Runtime.addBinding',
|
448
|
+
name: binding_function.name,
|
449
|
+
executionContextName: context.send(:_context_name))
|
450
|
+
context.evaluate(expression, 'internal', binding_function.name)
|
451
|
+
rescue => err
|
452
|
+
# We could have tried to evaluate in a context which was already
|
453
|
+
# destroyed. This happens, for example, if the page is navigated while
|
454
|
+
# we are trying to add the binding
|
455
|
+
allowed = [
|
456
|
+
'Execution context was destroyed',
|
457
|
+
'Cannot find context with specified id',
|
458
|
+
]
|
459
|
+
if allowed.any? { |msg| err.message.include?(msg) }
|
460
|
+
# ignore
|
461
|
+
else
|
462
|
+
raise
|
463
|
+
end
|
464
|
+
end
|
465
|
+
@ctx_bindings << binding_identifier(binding_function.name, context)
|
466
|
+
end
|
467
|
+
|
468
|
+
private def handle_binding_called(event)
|
469
|
+
return unless has_context?
|
470
|
+
payload = JSON.parse(event['payload']) rescue nil
|
471
|
+
name = payload['name']
|
472
|
+
args = payload['args']
|
473
|
+
|
474
|
+
# The binding was either called by something in the page or it was
|
475
|
+
# called before our wrapper was initialized.
|
476
|
+
return unless payload
|
477
|
+
return unless payload['type'] == 'internal'
|
478
|
+
context = execution_context
|
479
|
+
return unless @ctx_bindings.include?(binding_identifier(name, context))
|
480
|
+
return unless context.send(:_context_id) == event['executionContextId']
|
481
|
+
|
482
|
+
result = @bound_functions[name].call(*args)
|
483
|
+
deliver_result_js = <<~JAVASCRIPT
|
484
|
+
(name, seq, result) => {
|
485
|
+
globalThis[name].callbacks.get(seq).resolve(result);
|
486
|
+
globalThis[name].callbacks.delete(seq);
|
487
|
+
}
|
488
|
+
JAVASCRIPT
|
489
|
+
|
490
|
+
begin
|
491
|
+
context.evaluate(deliver_result_js, name, payload['seq'], result)
|
492
|
+
rescue => err
|
493
|
+
# The WaitTask may already have been resolved by timing out, or the
|
494
|
+
# exection context may have been destroyed.
|
495
|
+
# In both caes, the promises above are rejected with a protocol error.
|
496
|
+
# We can safely ignores these, as the WaitTask is re-installed in
|
497
|
+
# the next execution context if needed.
|
498
|
+
return if err.message.include?('Protocol error')
|
499
|
+
raise
|
500
|
+
end
|
501
|
+
end
|
502
|
+
|
503
|
+
# @param query_one [String] JS function (element: Element | Document, selector: string) => Element | null;
|
504
|
+
# @param selector [String]
|
505
|
+
# @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
|
506
|
+
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
408
507
|
# @param timeout [Integer]
|
409
|
-
|
410
|
-
|
411
|
-
|
508
|
+
private def wait_for_selector_in_page(query_one, selector, visible: nil, hidden: nil, timeout: nil, binding_function: nil)
|
509
|
+
option_wait_for_visible = visible || false
|
510
|
+
option_wait_for_hidden = hidden || false
|
412
511
|
option_timeout = timeout || @timeout_settings.timeout
|
413
512
|
|
414
|
-
|
513
|
+
polling =
|
514
|
+
if option_wait_for_visible || option_wait_for_hidden
|
515
|
+
'raf'
|
516
|
+
else
|
517
|
+
'mutation'
|
518
|
+
end
|
519
|
+
title = "selector #{selector}#{option_wait_for_hidden ? 'to be hidden' : ''}"
|
520
|
+
|
521
|
+
selector_predicate = make_predicate_string(
|
522
|
+
predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
|
523
|
+
predicate_query_handler: query_one,
|
524
|
+
async: true,
|
525
|
+
predicate_body: <<~JAVASCRIPT
|
526
|
+
const node = await predicateQueryHandler(document, selector)
|
527
|
+
return checkWaitForOptions(node, waitForVisible, waitForHidden);
|
528
|
+
JAVASCRIPT
|
529
|
+
)
|
530
|
+
|
531
|
+
wait_task = Puppeteer::WaitTask.new(
|
415
532
|
dom_world: self,
|
416
|
-
predicate_body:
|
417
|
-
title:
|
418
|
-
polling:
|
533
|
+
predicate_body: selector_predicate,
|
534
|
+
title: title,
|
535
|
+
polling: polling,
|
419
536
|
timeout: option_timeout,
|
420
|
-
args:
|
421
|
-
|
422
|
-
|
423
|
-
|
424
|
-
|
425
|
-
|
426
|
-
|
427
|
-
|
537
|
+
args: [selector, option_wait_for_visible, option_wait_for_hidden],
|
538
|
+
binding_function: binding_function,
|
539
|
+
)
|
540
|
+
handle = wait_task.await_promise
|
541
|
+
unless handle.as_element
|
542
|
+
handle.dispose
|
543
|
+
return nil
|
544
|
+
end
|
545
|
+
handle.as_element
|
428
546
|
end
|
429
547
|
|
430
|
-
# @param
|
431
|
-
# @param is_xpath [Boolean]
|
548
|
+
# @param xpath [String]
|
432
549
|
# @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
|
433
550
|
# @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
|
434
551
|
# @param timeout [Integer]
|
435
|
-
|
552
|
+
def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
|
436
553
|
option_wait_for_visible = visible || false
|
437
554
|
option_wait_for_hidden = hidden || false
|
438
555
|
option_timeout = timeout || @timeout_settings.timeout
|
@@ -443,15 +560,23 @@ class Puppeteer::DOMWorld
|
|
443
560
|
else
|
444
561
|
'mutation'
|
445
562
|
end
|
446
|
-
title = "
|
563
|
+
title = "XPath #{xpath}#{option_wait_for_hidden ? 'to be hidden' : ''}"
|
564
|
+
|
565
|
+
xpath_predicate = make_predicate_string(
|
566
|
+
predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
|
567
|
+
predicate_body: <<~JAVASCRIPT
|
568
|
+
const node = document.evaluate(selector, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
|
569
|
+
return checkWaitForOptions(node, waitForVisible, waitForHidden);
|
570
|
+
JAVASCRIPT
|
571
|
+
)
|
447
572
|
|
448
573
|
wait_task = Puppeteer::WaitTask.new(
|
449
574
|
dom_world: self,
|
450
|
-
predicate_body:
|
575
|
+
predicate_body: xpath_predicate,
|
451
576
|
title: title,
|
452
577
|
polling: polling,
|
453
578
|
timeout: option_timeout,
|
454
|
-
args: [
|
579
|
+
args: [xpath, option_wait_for_visible, option_wait_for_hidden],
|
455
580
|
)
|
456
581
|
handle = wait_task.await_promise
|
457
582
|
unless handle.as_element
|
@@ -461,34 +586,66 @@ class Puppeteer::DOMWorld
|
|
461
586
|
handle.as_element
|
462
587
|
end
|
463
588
|
|
464
|
-
|
465
|
-
|
466
|
-
|
467
|
-
|
468
|
-
|
469
|
-
|
470
|
-
|
471
|
-
|
472
|
-
|
473
|
-
|
474
|
-
|
475
|
-
|
476
|
-
|
477
|
-
|
478
|
-
|
479
|
-
|
480
|
-
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
589
|
+
# @param page_function [String]
|
590
|
+
# @param args [Array]
|
591
|
+
# @param polling [Integer|String]
|
592
|
+
# @param timeout [Integer]
|
593
|
+
# @return [Puppeteer::JSHandle]
|
594
|
+
def wait_for_function(page_function, args: [], polling: nil, timeout: nil)
|
595
|
+
option_polling = polling || 'raf'
|
596
|
+
option_timeout = timeout || @timeout_settings.timeout
|
597
|
+
|
598
|
+
Puppeteer::WaitTask.new(
|
599
|
+
dom_world: self,
|
600
|
+
predicate_body: page_function,
|
601
|
+
title: 'function',
|
602
|
+
polling: option_polling,
|
603
|
+
timeout: option_timeout,
|
604
|
+
args: args,
|
605
|
+
).await_promise
|
606
|
+
end
|
607
|
+
|
608
|
+
|
609
|
+
# @return [String]
|
610
|
+
def title
|
611
|
+
evaluate('() => document.title')
|
612
|
+
end
|
613
|
+
|
614
|
+
private def make_predicate_string(predicate_arg_def:, predicate_body:, predicate_query_handler: nil, async: false)
|
615
|
+
predicate_query_handler_string =
|
616
|
+
if predicate_query_handler
|
617
|
+
"const predicateQueryHandler = #{predicate_query_handler}"
|
618
|
+
else
|
619
|
+
""
|
620
|
+
end
|
621
|
+
|
622
|
+
<<~JAVASCRIPT
|
623
|
+
#{async ? 'async ' : ''}function _#{predicate_arg_def} {
|
624
|
+
#{predicate_query_handler_string}
|
625
|
+
#{predicate_body}
|
626
|
+
|
627
|
+
function checkWaitForOptions(node, waitForVisible, waitForHidden) {
|
628
|
+
if (!node) return waitForHidden;
|
629
|
+
if (!waitForVisible && !waitForHidden) return node;
|
630
|
+
const element =
|
631
|
+
node.nodeType === Node.TEXT_NODE ? node.parentElement : node;
|
632
|
+
|
633
|
+
const style = window.getComputedStyle(element);
|
634
|
+
const isVisible =
|
635
|
+
style && style.visibility !== 'hidden' && hasVisibleBoundingBox();
|
636
|
+
const success =
|
637
|
+
waitForVisible === isVisible || waitForHidden === !isVisible;
|
638
|
+
return success ? node : null;
|
639
|
+
|
640
|
+
/**
|
641
|
+
* @return {boolean}
|
642
|
+
*/
|
643
|
+
function hasVisibleBoundingBox() {
|
644
|
+
const rect = element.getBoundingClientRect();
|
645
|
+
return !!(rect.top || rect.bottom || rect.width || rect.height);
|
646
|
+
}
|
647
|
+
}
|
648
|
+
}
|
649
|
+
JAVASCRIPT
|
650
|
+
end
|
494
651
|
end
|
@@ -314,32 +314,20 @@ class Puppeteer::ElementHandle < Puppeteer::JSHandle
|
|
314
314
|
end
|
315
315
|
end
|
316
316
|
|
317
|
+
private def query_handler_manager
|
318
|
+
Puppeteer::QueryHandlerManager.instance
|
319
|
+
end
|
320
|
+
|
317
321
|
# `$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
|
318
322
|
# @param selector [String]
|
319
323
|
def S(selector)
|
320
|
-
|
321
|
-
'(element, selector) => element.querySelector(selector)',
|
322
|
-
selector,
|
323
|
-
)
|
324
|
-
element = handle.as_element
|
325
|
-
|
326
|
-
if element
|
327
|
-
return element
|
328
|
-
end
|
329
|
-
handle.dispose
|
330
|
-
nil
|
324
|
+
query_handler_manager.detect_query_handler(selector).query_one(self)
|
331
325
|
end
|
332
326
|
|
333
327
|
# `$$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
|
334
328
|
# @param selector [String]
|
335
329
|
def SS(selector)
|
336
|
-
|
337
|
-
'(element, selector) => element.querySelectorAll(selector)',
|
338
|
-
selector,
|
339
|
-
)
|
340
|
-
properties = handles.properties
|
341
|
-
handles.dispose
|
342
|
-
properties.values.map(&:as_element).compact
|
330
|
+
query_handler_manager.detect_query_handler(selector).query_all(self)
|
343
331
|
end
|
344
332
|
|
345
333
|
class ElementNotFoundError < StandardError
|
@@ -370,10 +358,7 @@ class Puppeteer::ElementHandle < Puppeteer::JSHandle
|
|
370
358
|
# @param page_function [String]
|
371
359
|
# @return [Object]
|
372
360
|
def SSeval(selector, page_function, *args)
|
373
|
-
handles =
|
374
|
-
'(element, selector) => Array.from(element.querySelectorAll(selector))',
|
375
|
-
selector,
|
376
|
-
)
|
361
|
+
handles = query_handler_manager.detect_query_handler(selector).query_all_array(self)
|
377
362
|
result = handles.evaluate(page_function, *args)
|
378
363
|
handles.dispose
|
379
364
|
|
@@ -430,4 +415,10 @@ class Puppeteer::ElementHandle < Puppeteer::JSHandle
|
|
430
415
|
# https://en.wikipedia.org/wiki/Polygon#Simple_polygons
|
431
416
|
quad.zip(quad.rotate).map { |p1, p2| (p1.x * p2.y - p2.x * p1.y) / 2 }.reduce(:+).abs
|
432
417
|
end
|
418
|
+
|
419
|
+
# used in AriaQueryHandler
|
420
|
+
def query_ax_tree(accessible_name: nil, role: nil)
|
421
|
+
@remote_object.query_ax_tree(@client,
|
422
|
+
accessible_name: accessible_name, role: role)
|
423
|
+
end
|
433
424
|
end
|