puppeteer-ruby 0.0.25 → 0.30.0

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,51 @@
1
+ class Puppeteer::CustomQueryHandler
2
+ # @param query_one [String] JS function (element: Element | Document, selector: string) => Element | null;
3
+ # @param query_all [String] JS function (element: Element | Document, selector: string) => Element[] | NodeListOf<Element>;
4
+ def initialize(query_one: nil, query_all: nil)
5
+ @query_one = query_one
6
+ @query_all = query_all
7
+ end
8
+
9
+ def query_one(element, selector)
10
+ unless @query_one
11
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
12
+ end
13
+
14
+ handle = element.evaluate_handle(@query_one, selector)
15
+ element = handle.as_element
16
+
17
+ if element
18
+ return element
19
+ end
20
+ handle.dispose
21
+ nil
22
+ end
23
+
24
+ def wait_for(dom_world, selector, visible: nil, hidden: nil, timeout: nil)
25
+ unless @query_one
26
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
27
+ end
28
+
29
+ dom_world.send(:wait_for_selector_in_page, @query_one, selector, visible: visible, hidden: hidden, timeout: timeout)
30
+ end
31
+
32
+ def query_all(element, selector)
33
+ unless @query_all
34
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
35
+ end
36
+
37
+ handles = element.evaluate_handle(@query_all, selector)
38
+ properties = handles.properties
39
+ handles.dispose
40
+ properties.values.map(&:as_element).compact
41
+ end
42
+
43
+ def query_all_array(element, selector)
44
+ unless @query_all
45
+ raise NotImplementedError.new("#{self.class}##{__method__} is not implemented.")
46
+ end
47
+
48
+ handles = element.evaluate_handle(@query_all, selector)
49
+ handles.evaluate_handle('(res) => Array.from(res)')
50
+ end
51
+ end
@@ -10,12 +10,21 @@ module Puppeteer::DefineAsyncMethod
10
10
  end
11
11
 
12
12
  original_method = instance_method(async_method_name[6..-1])
13
- define_method(async_method_name) do |*args|
14
- Concurrent::Promises.future do
15
- original_method.bind(self).call(*args)
16
- rescue => err
17
- Logger.new($stderr).warn(err)
18
- raise err
13
+ define_method(async_method_name) do |*args, **kwargs|
14
+ if kwargs.empty? # for Ruby < 2.7
15
+ Concurrent::Promises.future do
16
+ original_method.bind(self).call(*args)
17
+ rescue => err
18
+ Logger.new($stderr).warn(err)
19
+ raise err
20
+ end
21
+ else
22
+ Concurrent::Promises.future do
23
+ original_method.bind(self).call(*args, **kwargs)
24
+ rescue => err
25
+ Logger.new($stderr).warn(err)
26
+ raise err
27
+ end
19
28
  end
20
29
  end
21
30
  end
@@ -4,6 +4,47 @@ require 'thread'
4
4
  class Puppeteer::DOMWorld
5
5
  using Puppeteer::DefineAsyncMethod
6
6
 
7
+ class BindingFunction
8
+ def initialize(name:, proc:)
9
+ @name = name
10
+ @proc = proc
11
+ end
12
+
13
+ def call(*args)
14
+ @proc.call(*args)
15
+ end
16
+
17
+ attr_reader :name
18
+
19
+ def page_binding_init_string
20
+ <<~JAVASCRIPT
21
+ (type, bindingName) => {
22
+ /* Cast window to any here as we're about to add properties to it
23
+ * via win[bindingName] which TypeScript doesn't like.
24
+ */
25
+ const win = window;
26
+ const binding = win[bindingName];
27
+
28
+ win[bindingName] = (...args) => {
29
+ const me = window[bindingName];
30
+ let callbacks = me.callbacks;
31
+ if (!callbacks) {
32
+ callbacks = new Map();
33
+ me.callbacks = callbacks;
34
+ }
35
+ const seq = (me.lastSeq || 0) + 1;
36
+ me.lastSeq = seq;
37
+ const promise = new Promise((resolve, reject) =>
38
+ callbacks.set(seq, { resolve, reject })
39
+ );
40
+ binding(JSON.stringify({ type, name: bindingName, seq, args }));
41
+ return promise;
42
+ };
43
+ }
44
+ JAVASCRIPT
45
+ end
46
+ end
47
+
7
48
  # @param {!Puppeteer.FrameManager} frameManager
8
49
  # @param {!Puppeteer.Frame} frame
9
50
  # @param {!Puppeteer.TimeoutSettings} timeoutSettings
@@ -13,19 +54,29 @@ class Puppeteer::DOMWorld
13
54
  @timeout_settings = timeout_settings
14
55
  @context_promise = resolvable_future
15
56
  @wait_tasks = Set.new
57
+ @bound_functions = {}
58
+ @ctx_bindings = Set.new
16
59
  @detached = false
60
+
61
+ frame_manager.client.on_event('Runtime.bindingCalled', &method(:handle_binding_called))
17
62
  end
18
63
 
19
64
  attr_reader :frame
20
65
 
21
66
  # only used in Puppeteer::WaitTask#initialize
22
- def _wait_tasks
67
+ private def _wait_tasks
23
68
  @wait_tasks
24
69
  end
25
70
 
71
+ # only used in Puppeteer::WaitTask#initialize
72
+ private def _bound_functions
73
+ @bound_functions
74
+ end
75
+
26
76
  # @param context [Puppeteer::ExecutionContext]
27
77
  def context=(context)
28
78
  if context
79
+ @ctx_bindings.clear
29
80
  unless @context_promise.resolved?
30
81
  @context_promise.fulfill(context)
31
82
  end
@@ -75,12 +126,13 @@ class Puppeteer::DOMWorld
75
126
  execution_context.evaluate(page_function, *args)
76
127
  end
77
128
 
78
- # `$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
129
+ # `$()` in JavaScript.
79
130
  # @param {string} selector
80
131
  # @return {!Promise<?Puppeteer.ElementHandle>}
81
- def S(selector)
82
- document.S(selector)
132
+ def query_selector(selector)
133
+ document.query_selector(selector)
83
134
  end
135
+ alias_method :S, :query_selector
84
136
 
85
137
  private def evaluate_document
86
138
  # sometimes execution_context.evaluate_handle('document') returns null object.
@@ -107,30 +159,33 @@ class Puppeteer::DOMWorld
107
159
  document.Sx(expression)
108
160
  end
109
161
 
110
- # `$eval()` in JavaScript. $ is not allowed to use as a method name in Ruby.
162
+ # `$eval()` in JavaScript.
111
163
  # @param {string} selector
112
164
  # @param {Function|string} pageFunction
113
165
  # @param {!Array<*>} args
114
166
  # @return {!Promise<(!Object|undefined)>}
115
- def Seval(selector, page_function, *args)
116
- document.Seval(selector, page_function, *args)
167
+ def eval_on_selector(selector, page_function, *args)
168
+ document.eval_on_selector(selector, page_function, *args)
117
169
  end
170
+ alias_method :Seval, :eval_on_selector
118
171
 
119
- # `$$eval()` in JavaScript. $ is not allowed to use as a method name in Ruby.
172
+ # `$$eval()` in JavaScript.
120
173
  # @param {string} selector
121
174
  # @param {Function|string} pageFunction
122
175
  # @param {!Array<*>} args
123
176
  # @return {!Promise<(!Object|undefined)>}
124
- def SSeval(selector, page_function, *args)
125
- document.SSeval(selector, page_function, *args)
177
+ def eval_on_selector_all(selector, page_function, *args)
178
+ document.eval_on_selector_all(selector, page_function, *args)
126
179
  end
180
+ alias_method :SSeval, :eval_on_selector_all
127
181
 
128
- # `$$()` in JavaScript. $ is not allowed to use as a method name in Ruby.
182
+ # `$$()` in JavaScript.
129
183
  # @param {string} selector
130
184
  # @return {!Promise<!Array<!Puppeteer.ElementHandle>>}
131
- def SS(selector)
132
- document.SS(selector)
185
+ def query_selector_all(selector)
186
+ document.query_selector_all(selector)
133
187
  end
188
+ alias_method :SS, :query_selector_all
134
189
 
135
190
  # @return [String]
136
191
  def content
@@ -325,14 +380,14 @@ class Puppeteer::DOMWorld
325
380
  # @param button [String] "left"|"right"|"middle"
326
381
  # @param click_count [Number]
327
382
  def click(selector, delay: nil, button: nil, click_count: nil)
328
- handle = S(selector) or raise ElementNotFoundError.new(selector)
383
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
329
384
  handle.click(delay: delay, button: button, click_count: click_count)
330
385
  handle.dispose
331
386
  end
332
387
 
333
388
  # @param selector [String]
334
389
  def focus(selector)
335
- handle = S(selector) or raise ElementNotFoundError.new(selector)
390
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
336
391
  handle.focus
337
392
  handle.dispose
338
393
  end
@@ -350,7 +405,7 @@ class Puppeteer::DOMWorld
350
405
  # @param selector [String]
351
406
  # @return [Array<String>]
352
407
  def select(selector, *values)
353
- handle = S(selector) or raise ElementNotFoundError.new(selector)
408
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
354
409
  result = handle.select(*values)
355
410
  handle.dispose
356
411
 
@@ -359,7 +414,7 @@ class Puppeteer::DOMWorld
359
414
 
360
415
  # @param selector [String]
361
416
  def tap(selector)
362
- handle = S(selector) or raise ElementNotFoundError.new(selector)
417
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
363
418
  handle.tap
364
419
  handle.dispose
365
420
  end
@@ -368,7 +423,7 @@ class Puppeteer::DOMWorld
368
423
  # @param text [String]
369
424
  # @param delay [Number]
370
425
  def type_text(selector, text, delay: nil)
371
- handle = S(selector) or raise ElementNotFoundError.new(selector)
426
+ handle = query_selector(selector) or raise ElementNotFoundError.new(selector)
372
427
  handle.type_text(text, delay: delay)
373
428
  handle.dispose
374
429
  end
@@ -378,61 +433,127 @@ class Puppeteer::DOMWorld
378
433
  # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
379
434
  # @param timeout [Integer]
380
435
  def wait_for_selector(selector, visible: nil, hidden: nil, timeout: nil)
381
- wait_for_selector_or_xpath(selector, false, visible: visible, hidden: hidden, timeout: timeout)
436
+ # call wait_for_selector_in_page with custom query selector.
437
+ query_selector_manager = Puppeteer::QueryHandlerManager.instance
438
+ query_selector_manager.detect_query_handler(selector).wait_for(self, visible: visible, hidden: hidden, timeout: timeout)
382
439
  end
383
440
 
384
- # @param xpath [String]
385
- # @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
386
- # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
387
- # @param timeout [Integer]
388
- def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
389
- wait_for_selector_or_xpath(xpath, true, visible: visible, hidden: hidden, timeout: timeout)
441
+ private def binding_identifier(name, context)
442
+ "#{name}_#{context.send(:_context_id)}"
390
443
  end
391
444
 
392
- # /**
393
- # * @param {Function|string} pageFunction
394
- # * @param {!{polling?: string|number, timeout?: number}=} options
395
- # * @return {!Promise<!Puppeteer.JSHandle>}
396
- # */
397
- # waitForFunction(pageFunction, options = {}, ...args) {
398
- # const {
399
- # polling = 'raf',
400
- # timeout = this._timeoutSettings.timeout(),
401
- # } = options;
402
- # return new WaitTask(this, pageFunction, 'function', polling, timeout, ...args).promise;
403
- # }
404
445
 
405
- # @param page_function [String]
406
- # @param args [Array]
407
- # @param polling [Integer|String]
446
+ def add_binding_to_context(context, binding_function)
447
+ return if @ctx_bindings.include?(binding_identifier(binding_function.name, context))
448
+
449
+ expression = binding_function.page_binding_init_string
450
+ begin
451
+ context.client.send_message('Runtime.addBinding',
452
+ name: binding_function.name,
453
+ executionContextName: context.send(:_context_name))
454
+ context.evaluate(expression, 'internal', binding_function.name)
455
+ rescue => err
456
+ # We could have tried to evaluate in a context which was already
457
+ # destroyed. This happens, for example, if the page is navigated while
458
+ # we are trying to add the binding
459
+ allowed = [
460
+ 'Execution context was destroyed',
461
+ 'Cannot find context with specified id',
462
+ ]
463
+ if allowed.any? { |msg| err.message.include?(msg) }
464
+ # ignore
465
+ else
466
+ raise
467
+ end
468
+ end
469
+ @ctx_bindings << binding_identifier(binding_function.name, context)
470
+ end
471
+
472
+ private def handle_binding_called(event)
473
+ return unless has_context?
474
+ payload = JSON.parse(event['payload']) rescue nil
475
+ name = payload['name']
476
+ args = payload['args']
477
+
478
+ # The binding was either called by something in the page or it was
479
+ # called before our wrapper was initialized.
480
+ return unless payload
481
+ return unless payload['type'] == 'internal'
482
+ context = execution_context
483
+ return unless @ctx_bindings.include?(binding_identifier(name, context))
484
+ return unless context.send(:_context_id) == event['executionContextId']
485
+
486
+ result = @bound_functions[name].call(*args)
487
+ deliver_result_js = <<~JAVASCRIPT
488
+ (name, seq, result) => {
489
+ globalThis[name].callbacks.get(seq).resolve(result);
490
+ globalThis[name].callbacks.delete(seq);
491
+ }
492
+ JAVASCRIPT
493
+
494
+ begin
495
+ context.evaluate(deliver_result_js, name, payload['seq'], result)
496
+ rescue => err
497
+ # The WaitTask may already have been resolved by timing out, or the
498
+ # exection context may have been destroyed.
499
+ # In both caes, the promises above are rejected with a protocol error.
500
+ # We can safely ignores these, as the WaitTask is re-installed in
501
+ # the next execution context if needed.
502
+ return if err.message.include?('Protocol error')
503
+ raise
504
+ end
505
+ end
506
+
507
+ # @param query_one [String] JS function (element: Element | Document, selector: string) => Element | null;
508
+ # @param selector [String]
509
+ # @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
510
+ # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
408
511
  # @param timeout [Integer]
409
- # @return [Puppeteer::JSHandle]
410
- def wait_for_function(page_function, args: [], polling: nil, timeout: nil)
411
- option_polling = polling || 'raf'
512
+ private def wait_for_selector_in_page(query_one, selector, visible: nil, hidden: nil, timeout: nil, binding_function: nil)
513
+ option_wait_for_visible = visible || false
514
+ option_wait_for_hidden = hidden || false
412
515
  option_timeout = timeout || @timeout_settings.timeout
413
516
 
414
- Puppeteer::WaitTask.new(
517
+ polling =
518
+ if option_wait_for_visible || option_wait_for_hidden
519
+ 'raf'
520
+ else
521
+ 'mutation'
522
+ end
523
+ title = "selector #{selector}#{option_wait_for_hidden ? 'to be hidden' : ''}"
524
+
525
+ selector_predicate = make_predicate_string(
526
+ predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
527
+ predicate_query_handler: query_one,
528
+ async: true,
529
+ predicate_body: <<~JAVASCRIPT
530
+ const node = await predicateQueryHandler(document, selector)
531
+ return checkWaitForOptions(node, waitForVisible, waitForHidden);
532
+ JAVASCRIPT
533
+ )
534
+
535
+ wait_task = Puppeteer::WaitTask.new(
415
536
  dom_world: self,
416
- predicate_body: page_function,
417
- title: 'function',
418
- polling: option_polling,
537
+ predicate_body: selector_predicate,
538
+ title: title,
539
+ polling: polling,
419
540
  timeout: option_timeout,
420
- args: args,
421
- ).await_promise
422
- end
423
-
424
-
425
- # @return [String]
426
- def title
427
- evaluate('() => document.title')
541
+ args: [selector, option_wait_for_visible, option_wait_for_hidden],
542
+ binding_function: binding_function,
543
+ )
544
+ handle = wait_task.await_promise
545
+ unless handle.as_element
546
+ handle.dispose
547
+ return nil
548
+ end
549
+ handle.as_element
428
550
  end
429
551
 
430
- # @param selector_or_xpath [String]
431
- # @param is_xpath [Boolean]
552
+ # @param xpath [String]
432
553
  # @param visible [Boolean] Wait for element visible (not 'display: none' nor 'visibility: hidden') on true. default to false.
433
554
  # @param hidden [Boolean] Wait for element invisible ('display: none' nor 'visibility: hidden') on true. default to false.
434
555
  # @param timeout [Integer]
435
- private def wait_for_selector_or_xpath(selector_or_xpath, is_xpath, visible: nil, hidden: nil, timeout: nil)
556
+ def wait_for_xpath(xpath, visible: nil, hidden: nil, timeout: nil)
436
557
  option_wait_for_visible = visible || false
437
558
  option_wait_for_hidden = hidden || false
438
559
  option_timeout = timeout || @timeout_settings.timeout
@@ -443,15 +564,23 @@ class Puppeteer::DOMWorld
443
564
  else
444
565
  'mutation'
445
566
  end
446
- title = "#{is_xpath ? :XPath : :selector} #{selector_or_xpath}#{option_wait_for_hidden ? 'to be hidden' : ''}"
567
+ title = "XPath #{xpath}#{option_wait_for_hidden ? 'to be hidden' : ''}"
568
+
569
+ xpath_predicate = make_predicate_string(
570
+ predicate_arg_def: '(selector, waitForVisible, waitForHidden)',
571
+ predicate_body: <<~JAVASCRIPT
572
+ const node = document.evaluate(selector, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue;
573
+ return checkWaitForOptions(node, waitForVisible, waitForHidden);
574
+ JAVASCRIPT
575
+ )
447
576
 
448
577
  wait_task = Puppeteer::WaitTask.new(
449
578
  dom_world: self,
450
- predicate_body: PREDICATE,
579
+ predicate_body: xpath_predicate,
451
580
  title: title,
452
581
  polling: polling,
453
582
  timeout: option_timeout,
454
- args: [selector_or_xpath, is_xpath, option_wait_for_visible, option_wait_for_hidden],
583
+ args: [xpath, option_wait_for_visible, option_wait_for_hidden],
455
584
  )
456
585
  handle = wait_task.await_promise
457
586
  unless handle.as_element
@@ -461,34 +590,66 @@ class Puppeteer::DOMWorld
461
590
  handle.as_element
462
591
  end
463
592
 
464
- PREDICATE = <<~JAVASCRIPT
465
- /**
466
- * @param {string} selectorOrXPath
467
- * @param {boolean} isXPath
468
- * @param {boolean} waitForVisible
469
- * @param {boolean} waitForHidden
470
- * @return {?Node|boolean}
471
- */
472
- function _(selectorOrXPath, isXPath, waitForVisible, waitForHidden) {
473
- const node = isXPath
474
- ? document.evaluate(selectorOrXPath, document, null, XPathResult.FIRST_ORDERED_NODE_TYPE, null).singleNodeValue
475
- : document.querySelector(selectorOrXPath);
476
- if (!node)
477
- return waitForHidden;
478
- if (!waitForVisible && !waitForHidden)
479
- return node;
480
- const element = /** @type {Element} */ (node.nodeType === Node.TEXT_NODE ? node.parentElement : node);
481
- const style = window.getComputedStyle(element);
482
- const isVisible = style && style.visibility !== 'hidden' && hasVisibleBoundingBox();
483
- const success = (waitForVisible === isVisible || waitForHidden === !isVisible);
484
- return success ? node : null;
485
- /**
486
- * @return {boolean}
487
- */
488
- function hasVisibleBoundingBox() {
489
- const rect = element.getBoundingClientRect();
490
- return !!(rect.top || rect.bottom || rect.width || rect.height);
491
- }
492
- }
493
- JAVASCRIPT
593
+ # @param page_function [String]
594
+ # @param args [Array]
595
+ # @param polling [Integer|String]
596
+ # @param timeout [Integer]
597
+ # @return [Puppeteer::JSHandle]
598
+ def wait_for_function(page_function, args: [], polling: nil, timeout: nil)
599
+ option_polling = polling || 'raf'
600
+ option_timeout = timeout || @timeout_settings.timeout
601
+
602
+ Puppeteer::WaitTask.new(
603
+ dom_world: self,
604
+ predicate_body: page_function,
605
+ title: 'function',
606
+ polling: option_polling,
607
+ timeout: option_timeout,
608
+ args: args,
609
+ ).await_promise
610
+ end
611
+
612
+
613
+ # @return [String]
614
+ def title
615
+ evaluate('() => document.title')
616
+ end
617
+
618
+ private def make_predicate_string(predicate_arg_def:, predicate_body:, predicate_query_handler: nil, async: false)
619
+ predicate_query_handler_string =
620
+ if predicate_query_handler
621
+ "const predicateQueryHandler = #{predicate_query_handler}"
622
+ else
623
+ ""
624
+ end
625
+
626
+ <<~JAVASCRIPT
627
+ #{async ? 'async ' : ''}function _#{predicate_arg_def} {
628
+ #{predicate_query_handler_string}
629
+ #{predicate_body}
630
+
631
+ function checkWaitForOptions(node, waitForVisible, waitForHidden) {
632
+ if (!node) return waitForHidden;
633
+ if (!waitForVisible && !waitForHidden) return node;
634
+ const element =
635
+ node.nodeType === Node.TEXT_NODE ? node.parentElement : node;
636
+
637
+ const style = window.getComputedStyle(element);
638
+ const isVisible =
639
+ style && style.visibility !== 'hidden' && hasVisibleBoundingBox();
640
+ const success =
641
+ waitForVisible === isVisible || waitForHidden === !isVisible;
642
+ return success ? node : null;
643
+
644
+ /**
645
+ * @return {boolean}
646
+ */
647
+ function hasVisibleBoundingBox() {
648
+ const rect = element.getBoundingClientRect();
649
+ return !!(rect.top || rect.bottom || rect.width || rect.height);
650
+ }
651
+ }
652
+ }
653
+ JAVASCRIPT
654
+ end
494
655
  end