puppeteer-ruby 0.0.25 → 0.30.0

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,51 @@
1
+ class Puppeteer::CustomQueryHandler
2
+ # @param query_one [String] JS function (element: Element | Document, selector: string) => Element | null;
3
+ # @param query_all [String] JS function (element: Element | Document, selector: string) => Element[] | NodeListOf<Element>;
4
+ def initialize(query_one: nil, query_all: nil)
5
+ @query_one = query_one
6
+ @query_all = query_all
7
+ end
8
+
9
+ def query_one(element, selector)
10
+ unless @query_one
11
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
12
+ end
13
+
14
+ handle = element.evaluate_handle(@query_one, selector)
15
+ element = handle.as_element
16
+
17
+ if element
18
+ return element
19
+ end
20
+ handle.dispose
21
+ nil
22
+ end
23
+
24
+ def wait_for(dom_world, selector, visible: nil, hidden: nil, timeout: nil)
25
+ unless @query_one
26
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
27
+ end
28
+
29
+ dom_world.send(:wait_for_selector_in_page, @query_one, selector, visible: visible, hidden: hidden, timeout: timeout)
30
+ end
31
+
32
+ def query_all(element, selector)
33
+ unless @query_all
34
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
35
+ end
36
+
37
+ handles = element.evaluate_handle(@query_all, selector)
38
+ properties = handles.properties
39
+ handles.dispose
40
+ properties.values.map(&:as_element).compact
41
+ end
42
+
43
+ def query_all_array(element, selector)
44
+ unless @query_all
45
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
46
+ end
47
+
48
+ handles = element.evaluate_handle(@query_all, selector)
49
+ handles.evaluate_handle('(res) => Array.from(res)')
50
+ end
51
+ end
@@ -10,12 +10,21 @@ module Puppeteer::DefineAsyncMethod
10
10
  end
11
11
 
12
12
  original_method = instance_method(async_method_name[6..-1])
13
- define_method(async_method_name) do |*args|
14
- Concurrent::Promises.future do
15
- original_method.bind(self).call(*args)
16
- rescue => err
17
- Logger.new($stderr).warn(err)
18
- raise err
13
+ define_method(async_method_name) do |*args, **kwargs|
14
+ if kwargs.empty? # for Ruby < 2.7
15
+ Concurrent::Promises.future do
16
+ original_method.bind(self).call(*args)
17
+ rescue => err
18
+ Logger.new($stderr).warn(err)
19
+ raise err
20
+ end
21
+ else
22
+ Concurrent::Promises.future do
23
+ original_method.bind(self).call(*args, **kwargs)
24
+ rescue => err
25
+ Logger.new($stderr).warn(err)
26
+ raise err
27
+ end
19
28
  end
20
29
  end
21
30
  end
@@ -4,6 +4,47 @@ require 'thread'
4
4
  class Puppeteer::DOMWorld
5
5
  using Puppeteer::DefineAsyncMethod
6
6
 
7
+ class BindingFunction
8
+ def initialize(name:, proc:)
9
+ @name = name
10
+ @proc = proc
11
+ end
12
+
13
+ def call(*args)
14
+ @proc.call(*args)
15
+ end
16
+
17
+ attr_reader :name
18
+
19
+ def page_binding_init_string
20
+ <<~JAVASCRIPT
21
+ (type, bindingName) => {
22
+ /* Cast window to any here as we're about to add properties to it
23
+ * via win[bindingName] which TypeScript doesn't like.
24
+ */
25
+ const win = window;
26
+ const binding = win[bindingName];
27
+
28
+ win[bindingName] = (...args) => {
29
+ const me = window[bindingName];
30
+ let callbacks = me.callbacks;
31
+ if (!callbacks) {
32
+ callbacks = new Map();
33
+ me.callbacks = callbacks;
34
+ }
35
+ const seq = (me.lastSeq || 0) + 1;
36
+ me.lastSeq = seq;
37
+ const promise = new Promise((resolve, reject) =>
38
+ callbacks.set(seq, { resolve, reject })
39
+ );
40
+ binding(JSON.stringify({ type, name: bindingName, seq, args }));
41
+ return promise;
42
+ };
43
+ }
44
+ JAVASCRIPT
45
+ end
46
+ end
47
+
7
48
  # @param {!Puppeteer.FrameManager} frameManager
8
49
  # @param {!Puppeteer.Frame} frame
9
50
  # @param {!Puppeteer.TimeoutSettings} timeoutSettings
@@ -13,19 +54,29 @@ class Puppeteer::DOMWorld
13
54
  @timeout_settings = timeout_settings
14
55
  @context_promise = resolvable_future
15
56
  @wait_tasks = Set.new
57
+ @bound_functions = {}
58
+ @ctx_bindings = Set.new
16
59
  @detached = false
60
+
61
+ frame_manager.client.on_event('Runtime.bindingCalled', &method(:handle_binding_called))
17
62
  end
18
63
 
19
64
  attr_reader :frame
20
65
 
21
66
  # only used in Puppeteer::WaitTask#initialize
22
- def _wait_tasks
67
+ private def _wait_tasks
23
68
  @wait_tasks
24
69
  end
25
70
 
71
+ # only used in Puppeteer::WaitTask#initialize
72
+ private def _bound_functions
73
+ @bound_functions
74
+ end
75
+
26
76
  # @param context [Puppeteer::ExecutionContext]
27
77
  def context=(context)
28
78
  if context
79
+ @ctx_bindings.clear
29
80
  unless @context_promise.resolved?
30
81
  @context_promise.fulfill(context)
31
82
  end
@@ -75,12 +126,13 @@ class Puppeteer::DOMWorld
75
126
  execution_context.evaluate(page_function, *args)
76
127
  end
77
128
 
78
- # `$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
129
+ # `$()` in JavaScript.
79
130
  # @param {string} selector
80
131
  # @return {!Promise<?Puppeteer.ElementHandle>}
81
- def S(selector)
82
- document.S(selector)
132
+ def query_selector(selector)
133
+ document.query_selector(selector)
83
134
  end
135
+ alias_method :S, :query_selector
84
136
 
85
137
  private def evaluate_document
86
138
  # sometimes execution_context.evaluate_handle('document') returns null object.
@@ -107,30 +159,33 @@ class Puppeteer::DOMWorld
107
159
  document.Sx(expression)
108
160
  end
109
161
 
110
- # `$eval()` in JavaScript. $ is not allowed to use as a method name in Ruby.
162
+ # `$eval()` in JavaScript.
111
163
  # @param {string} selector
112
164
  # @param {Function|string} pageFunction
113
165
  # @param {!Array<*>} args
114
166
  # @return {!Promise<(!Object|undefined)>}
115
- def Seval(selector, page_function, *args)
116
- document.Seval(selector, page_function, *args)
167
+ def eval_on_selector(selector, page_function, *args)
168
+ document.eval_on_selector(selector, page_function, *args)
117
169
  end
170
+ alias_method :Seval, :eval_on_selector
118
171
 
119
- # `$$eval()` in JavaScript. $ is not allowed to use as a method name in Ruby.
172
+ # `$$eval()` in JavaScript.
120
173
  # @param {string} selector
121
174
  # @param {Function|string} pageFunction
122
175
  # @param {!Array<*>} args
123
176
  # @return {!Promise<(!Object|undefined)>}
124
- def SSeval(selector, page_function, *args)
125
- document.SSeval(selector, page_function, *args)
177
+ def eval_on_selector_all(selector, page_function, *args)
178
+ document.eval_on_selector_all(selector, page_function, *args)
126
179
  end
180
+ alias_method :SSeval, :eval_on_selector_all
127
181
 
128
- # `$$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
182
+ # `$$()` in JavaScript.
129
183
  # @param {string} selector
130
184
  # @return {!Promise<!Array<!Puppeteer.ElementHandle>>}
131
- def SS(selector)
132
- document.SS(selector)
185
+ def query_selector_all(selector)
186
+ document.query_selector_all(selector)
133
187
  end
188
+ alias_method :SS, :query_selector_all
134
189
 
135
190
  # @return [String]
136
191
  def content
@@ -325,14 +380,14 @@ class Puppeteer::DOMWorld
325
380
  # @param button [String] "left"|"right"|"middle"
326
381
  # @param click_count [Number]
327
382
  def click(selector, delay: nil, button: nil, click_count: nil)
328
- handle = S(selector) or raise ElementNotFoundError.new(selector)
383
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
329
384
  handle.click(delay: delay, button: button, click_count: click_count)
330
385
  handle.dispose
331
386
  end
332
387
 
333
388
  # @param selector [String]
334
389
  def focus(selector)
335
- handle = S(selector) or raise ElementNotFoundError.new(selector)
390
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
336
391
  handle.focus
337
392
  handle.dispose
338
393
  end
@@ -350,7 +405,7 @@ class Puppeteer::DOMWorld
350
405
  # @param selector [String]
351
406
  # @return [Array<String>]
352
407
  def select(selector, *values)
353
- handle = S(selector) or raise ElementNotFoundError.new(selector)
408
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
354
409
  result = handle.select(*values)
355
410
  handle.dispose
356
411
 
@@ -359,7 +414,7 @@ class Puppeteer::DOMWorld
359
414
 
360
415
  # @param selector [String]
361
416
  def tap(selector)
362
- handle = S(selector) or raise ElementNotFoundError.new(selector)
417
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
363
418
  handle.tap
364
419
  handle.dispose
365
420
  end
@@ -368,7 +423,7 @@ class Puppeteer::DOMWorld
368
423
  # @param text [String]
369
424
  # @param delay [Number]
370
425
  def type_text(selector, text, delay: nil)
371
- handle = S(selector) or raise ElementNotFoundError.new(selector)
426
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
372
427
  handle.type_text(text, delay: delay)
373
428
  handle.dispose
374
429
  end
@@ -378,61 +433,127 @@ class Puppeteer::DOMWorld
378
433
  # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
379
434
  # @param timeout [Integer]
380
435
  def wait_for_selector(selector, visible: nil, hidden: nil, timeout: nil)
381
- wait_for_selector_or_xpath(selector, false, visible: visible, hidden: hidden, timeout: timeout)
436
+ # call wait_for_selector_in_page with custom query selector.
437
+ query_selector_manager = Puppeteer::QueryHandlerManager.instance
438
+ query_selector_manager.detect_query_handler(selector).wait_for(self, visible: visible, hidden: hidden, timeout: timeout)
382
439
  end
383
440
 
384
- # @param xpath [String]
385
- # @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
386
- # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
387
- # @param timeout [Integer]
388
- def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
389
- wait_for_selector_or_xpath(xpath, true, visible: visible, hidden: hidden, timeout: timeout)
441
+ private def binding_identifier(name, context)
442
+ "#{name}_#{context.send(:_context_id)}"
390
443
  end
391
444
 
392
- # /**
393
- # * @param {Function|string} pageFunction
394
- # * @param {!{polling?: string|number, timeout?: number}=} options
395
- # * @return {!Promise<!Puppeteer.JSHandle>}
396
- # */
397
- # waitForFunction(pageFunction, options = {}, ...args) {
398
- # const {
399
- # polling = 'raf',
400
- # timeout = this._timeoutSettings.timeout(),
401
- # } = options;
402
- # return new WaitTask(this, pageFunction, 'function', polling, timeout, ...args).promise;
403
- # }
404
445
 
405
- # @param page_function [String]
406
- # @param args [Array]
407
- # @param polling [Integer|String]
446
+ def add_binding_to_context(context, binding_function)
447
+ return if @ctx_bindings.include?(binding_identifier(binding_function.name, context))
448
+
449
+ expression = binding_function.page_binding_init_string
450
+ begin
451
+ context.client.send_message('Runtime.addBinding',
452
+ name: binding_function.name,
453
+ executionContextName: context.send(:_context_name))
454
+ context.evaluate(expression, 'internal', binding_function.name)
455
+ rescue => err
456
+ # We could have tried to evaluate in a context which was already
457
+ # destroyed. This happens, for example, if the page is navigated while
458
+ # we are trying to add the binding
459
+ allowed = [
460
+ 'Execution context was destroyed',
461
+ 'Cannot find context with specified id',
462
+ ]
463
+ if allowed.any? { |msg| err.message.include?(msg) }
464
+ # ignore
465
+ else
466
+ raise
467
+ end
468
+ end
469
+ @ctx_bindings << binding_identifier(binding_function.name, context)
470
+ end
471
+
472
+ private def handle_binding_called(event)
473
+ return unless has_context?
474
+ payload = JSON.parse(event['payload']) rescue nil
475
+ name = payload['name']
476
+ args = payload['args']
477
+
478
+ # The binding was either called by something in the page or it was
479
+ # called before our wrapper was initialized.
480
+ return unless payload
481
+ return unless payload['type'] == 'internal'
482
+ context = execution_context
483
+ return unless @ctx_bindings.include?(binding_identifier(name, context))
484
+ return unless context.send(:_context_id) == event['executionContextId']
485
+
486
+ result = @bound_functions[name].call(*args)
487
+ deliver_result_js = <<~JAVASCRIPT
488
+ (name, seq, result) => {
489
+ globalThis[name].callbacks.get(seq).resolve(result);
490
+ globalThis[name].callbacks.delete(seq);
491
+ }
492
+ JAVASCRIPT
493
+
494
+ begin
495
+ context.evaluate(deliver_result_js, name, payload['seq'], result)
496
+ rescue => err
497
+ # The WaitTask may already have been resolved by timing out, or the
498
+ # exection context may have been destroyed.
499
+ # In both caes, the promises above are rejected with a protocol error.
500
+ # We can safely ignores these, as the WaitTask is re-installed in
501
+ # the next execution context if needed.
502
+ return if err.message.include?('Protocol error')
503
+ raise
504
+ end
505
+ end
506
+
507
+ # @param query_one [String] JS function (element: Element | Document, selector: string) => Element | null;
508
+ # @param selector [String]
509
+ # @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
510
+ # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
408
511
  # @param timeout [Integer]
409
- # @return [Puppeteer::JSHandle]
410
- def wait_for_function(page_function, args: [], polling: nil, timeout: nil)
411
- option_polling = polling || 'raf'
512
+ private def wait_for_selector_in_page(query_one, selector, visible: nil, hidden: nil, timeout: nil, binding_function: nil)
513
+ option_wait_for_visible = visible || false
514
+ option_wait_for_hidden = hidden || false
412
515
  option_timeout = timeout || @timeout_settings.timeout
413
516
 
414
- Puppeteer::WaitTask.new(
517
+ polling =
518
+ if option_wait_for_visible || option_wait_for_hidden
519
+ 'raf'
520
+ else
521
+ 'mutation'
522
+ end
523
+ title = "selector #{selector}#{option_wait_for_hidden ? 'to be hidden' : ''}"
524
+
525
+ selector_predicate = make_predicate_string(
526
+ predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
527
+ predicate_query_handler: query_one,
528
+ async: true,
529
+ predicate_body: <<~JAVASCRIPT
530
+ const node = await predicateQueryHandler(document, selector)
531
+ return checkWaitForOptions(node, waitForVisible, waitForHidden);
532
+ JAVASCRIPT
533
+ )
534
+
535
+ wait_task = Puppeteer::WaitTask.new(
415
536
  dom_world: self,
416
- predicate_body: page_function,
417
- title: 'function',
418
- polling: option_polling,
537
+ predicate_body: selector_predicate,
538
+ title: title,
539
+ polling: polling,
419
540
  timeout: option_timeout,
420
- args: args,
421
- ).await_promise
422
- end
423
-
424
-
425
- # @return [String]
426
- def title
427
- evaluate('() => document.title')
541
+ args: [selector, option_wait_for_visible, option_wait_for_hidden],
542
+ binding_function: binding_function,
543
+ )
544
+ handle = wait_task.await_promise
545
+ unless handle.as_element
546
+ handle.dispose
547
+ return nil
548
+ end
549
+ handle.as_element
428
550
  end
429
551
 
430
- # @param selector_or_xpath [String]
431
- # @param is_xpath [Boolean]
552
+ # @param xpath [String]
432
553
  # @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
433
554
  # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
434
555
  # @param timeout [Integer]
435
- private def wait_for_selector_or_xpath(selector_or_xpath, is_xpath, visible: nil, hidden: nil, timeout: nil)
556
+ def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
436
557
  option_wait_for_visible = visible || false
437
558
  option_wait_for_hidden = hidden || false
438
559
  option_timeout = timeout || @timeout_settings.timeout
@@ -443,15 +564,23 @@ class Puppeteer::DOMWorld
443
564
  else
444
565
  'mutation'
445
566
  end
446
- title = "#{is_xpath ? :XPath : :selector} #{selector_or_xpath}#{option_wait_for_hidden ? 'to be hidden' : ''}"
567
+ title = "XPath #{xpath}#{option_wait_for_hidden ? 'to be hidden' : ''}"
568
+
569
+ xpath_predicate = make_predicate_string(
570
+ predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
571
+ predicate_body: <<~JAVASCRIPT
572
+ const node = document.evaluate(selector, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
573
+ return checkWaitForOptions(node, waitForVisible, waitForHidden);
574
+ JAVASCRIPT
575
+ )
447
576
 
448
577
  wait_task = Puppeteer::WaitTask.new(
449
578
  dom_world: self,
450
- predicate_body: PREDICATE,
579
+ predicate_body: xpath_predicate,
451
580
  title: title,
452
581
  polling: polling,
453
582
  timeout: option_timeout,
454
- args: [selector_or_xpath, is_xpath, option_wait_for_visible, option_wait_for_hidden],
583
+ args: [xpath, option_wait_for_visible, option_wait_for_hidden],
455
584
  )
456
585
  handle = wait_task.await_promise
457
586
  unless handle.as_element
@@ -461,34 +590,66 @@ class Puppeteer::DOMWorld
461
590
  handle.as_element
462
591
  end
463
592
 
464
- PREDICATE = <<~JAVASCRIPT
465
- /**
466
- * @param {string} selectorOrXPath
467
- * @param {boolean} isXPath
468
- * @param {boolean} waitForVisible
469
- * @param {boolean} waitForHidden
470
- * @return {?Node|boolean}
471
- */
472
- function _(selectorOrXPath, isXPath, waitForVisible, waitForHidden) {
473
- const node = isXPath
474
- ? document.evaluate(selectorOrXPath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue
475
- : document.querySelector(selectorOrXPath);
476
- if (!node)
477
- return waitForHidden;
478
- if (!waitForVisible && !waitForHidden)
479
- return node;
480
- const element = /** @type {Element} */ (node.nodeType === Node.TEXT_NODE ? node.parentElement : node);
481
- const style = window.getComputedStyle(element);
482
- const isVisible = style && style.visibility !== 'hidden' && hasVisibleBoundingBox();
483
- const success = (waitForVisible === isVisible || waitForHidden === !isVisible);
484
- return success ? node : null;
485
- /**
486
- * @return {boolean}
487
- */
488
- function hasVisibleBoundingBox() {
489
- const rect = element.getBoundingClientRect();
490
- return !!(rect.top || rect.bottom || rect.width || rect.height);
491
- }
492
- }
493
- JAVASCRIPT
593
+ # @param page_function [String]
594
+ # @param args [Array]
595
+ # @param polling [Integer|String]
596
+ # @param timeout [Integer]
597
+ # @return [Puppeteer::JSHandle]
598
+ def wait_for_function(page_function, args: [], polling: nil, timeout: nil)
599
+ option_polling = polling || 'raf'
600
+ option_timeout = timeout || @timeout_settings.timeout
601
+
602
+ Puppeteer::WaitTask.new(
603
+ dom_world: self,
604
+ predicate_body: page_function,
605
+ title: 'function',
606
+ polling: option_polling,
607
+ timeout: option_timeout,
608
+ args: args,
609
+ ).await_promise
610
+ end
611
+
612
+
613
+ # @return [String]
614
+ def title
615
+ evaluate('() => document.title')
616
+ end
617
+
618
+ private def make_predicate_string(predicate_arg_def:, predicate_body:, predicate_query_handler: nil, async: false)
619
+ predicate_query_handler_string =
620
+ if predicate_query_handler
621
+ "const predicateQueryHandler = #{predicate_query_handler}"
622
+ else
623
+ ""
624
+ end
625
+
626
+ <<~JAVASCRIPT
627
+ #{async ? 'async ' : ''}function _#{predicate_arg_def} {
628
+ #{predicate_query_handler_string}
629
+ #{predicate_body}
630
+
631
+ function checkWaitForOptions(node, waitForVisible, waitForHidden) {
632
+ if (!node) return waitForHidden;
633
+ if (!waitForVisible && !waitForHidden) return node;
634
+ const element =
635
+ node.nodeType === Node.TEXT_NODE ? node.parentElement : node;
636
+
637
+ const style = window.getComputedStyle(element);
638
+ const isVisible =
639
+ style && style.visibility !== 'hidden' && hasVisibleBoundingBox();
640
+ const success =
641
+ waitForVisible === isVisible || waitForHidden === !isVisible;
642
+ return success ? node : null;
643
+
644
+ /**
645
+ * @return {boolean}
646
+ */
647
+ function hasVisibleBoundingBox() {
648
+ const rect = element.getBoundingClientRect();
649
+ return !!(rect.top || rect.bottom || rect.width || rect.height);
650
+ }
651
+ }
652
+ }
653
+ JAVASCRIPT
654
+ end
494
655
  end