puppeteer-ruby 0.50.0.alpha5 → 0.50.0.alpha6

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,367 @@
1
+ # rbs_inline: enabled
2
+
3
+ class Puppeteer::PQueryHandler
4
+ POLLING_DEFAULT_SECONDS = 0.1
5
+
6
+ CSS_QUERY_SELECTOR_JS = <<~'JAVASCRIPT'
7
+ (element, selector) => {
8
+ const results = [];
9
+ const isQueryableNode = node => {
10
+ return node && typeof node.querySelectorAll === 'function';
11
+ };
12
+ if (!isQueryableNode(element)) {
13
+ return results;
14
+ }
15
+ const trimmed = selector.trimStart();
16
+ if (!trimmed) {
17
+ return results;
18
+ }
19
+ const IDENT_TOKEN_START = /[-\w\P{ASCII}*]/u;
20
+ if (IDENT_TOKEN_START.test(trimmed[0])) {
21
+ results.push(...element.querySelectorAll(selector));
22
+ return results;
23
+ }
24
+ if (!element.parentElement) {
25
+ results.push(...element.querySelectorAll(selector));
26
+ return results;
27
+ }
28
+ let index = 0;
29
+ for (const child of element.parentElement.children) {
30
+ ++index;
31
+ if (child === element) {
32
+ break;
33
+ }
34
+ }
35
+ results.push(
36
+ ...element.parentElement.querySelectorAll(
37
+ `:scope>:nth-child(${index})${selector}`
38
+ )
39
+ );
40
+ return results;
41
+ }
42
+ JAVASCRIPT
43
+
44
+ PIERCE_JS = <<~JAVASCRIPT
45
+ node => {
46
+ if (!node) {
47
+ return [];
48
+ }
49
+ if (node.shadowRoot) {
50
+ return [node.shadowRoot];
51
+ }
52
+ return [node];
53
+ }
54
+ JAVASCRIPT
55
+
56
+ PIERCE_ALL_JS = <<~JAVASCRIPT
57
+ root => {
58
+ if (!root) {
59
+ return [];
60
+ }
61
+ const results = [];
62
+ const rootNode = root.shadowRoot ? root.shadowRoot : root;
63
+ results.push(rootNode);
64
+ const walkers = [document.createTreeWalker(rootNode, NodeFilter.SHOW_ELEMENT)];
65
+ for (const walker of walkers) {
66
+ let node;
67
+ while ((node = walker.nextNode())) {
68
+ if (!node.shadowRoot) {
69
+ continue;
70
+ }
71
+ results.push(node.shadowRoot);
72
+ walkers.push(document.createTreeWalker(node.shadowRoot, NodeFilter.SHOW_ELEMENT));
73
+ }
74
+ }
75
+ return results;
76
+ }
77
+ JAVASCRIPT
78
+
79
+ DOM_SORT_JS = <<~JAVASCRIPT
80
+ (...elements) => {
81
+ const results = new Set(elements);
82
+ class DepthCalculator {
83
+ constructor() {
84
+ this.cache = new WeakMap();
85
+ }
86
+ calculate(node, depth = []) {
87
+ if (node === null) {
88
+ return depth;
89
+ }
90
+ if (node instanceof ShadowRoot) {
91
+ node = node.host;
92
+ }
93
+ const cached = this.cache.get(node);
94
+ if (cached) {
95
+ return [...cached, ...depth];
96
+ }
97
+ let index = 0;
98
+ for (let prev = node.previousSibling; prev; prev = prev.previousSibling) {
99
+ ++index;
100
+ }
101
+ const value = this.calculate(node.parentNode, [index]);
102
+ this.cache.set(node, value);
103
+ return [...value, ...depth];
104
+ }
105
+ }
106
+ const compareDepths = (a, b) => {
107
+ if (a.length + b.length === 0) {
108
+ return 0;
109
+ }
110
+ const [i = -1, ...restA] = a;
111
+ const [j = -1, ...restB] = b;
112
+ if (i === j) {
113
+ return compareDepths(restA, restB);
114
+ }
115
+ return i < j ? -1 : 1;
116
+ };
117
+ const calculator = new DepthCalculator();
118
+ return [...results]
119
+ .map(result => [result, calculator.calculate(result)])
120
+ .sort(([, a], [, b]) => compareDepths(a, b))
121
+ .map(([result]) => result);
122
+ }
123
+ JAVASCRIPT
124
+
125
+ # @rbs query_handler_manager: untyped -- Query handler manager
126
+ # @rbs return: void -- No return value
127
+ def initialize(query_handler_manager: Puppeteer::QueryHandlerManager.instance)
128
+ @query_handler_manager = query_handler_manager
129
+ end
130
+
131
+ # @rbs element: Puppeteer::ElementHandle -- Root element
132
+ # @rbs selector: String -- PSelector string
133
+ # @rbs return: Puppeteer::ElementHandle? -- First matching element
134
+ def query_one(element, selector)
135
+ matches = query_all(element, selector)
136
+ return nil if matches.empty?
137
+
138
+ matches.first
139
+ end
140
+
141
+ # @rbs element: Puppeteer::ElementHandle -- Root element
142
+ # @rbs selector: String -- PSelector string
143
+ # @rbs return: Array[Puppeteer::ElementHandle] -- Matching elements
144
+ def query_all(element, selector)
145
+ selectors, = Puppeteer::PSelectorParser.parse(selector)
146
+ return [] if selectors.empty?
147
+
148
+ results = selectors.flat_map do |complex|
149
+ run_complex_selector(element, complex)
150
+ end
151
+
152
+ return [] if results.empty?
153
+
154
+ dom_sort(results)
155
+ end
156
+
157
+ # @rbs element: Puppeteer::ElementHandle -- Root element
158
+ # @rbs selector: String -- PSelector string
159
+ # @rbs return: Puppeteer::JSHandle -- Handle to array of elements
160
+ def query_all_array(element, selector)
161
+ elements = query_all(element, selector)
162
+ return element.execution_context.evaluate_handle('() => []') if elements.empty?
163
+
164
+ element.execution_context.evaluate_handle('(...elements) => elements', *elements)
165
+ ensure
166
+ elements&.each(&:dispose)
167
+ end
168
+
169
+ # @rbs element_or_frame: Puppeteer::ElementHandle | Puppeteer::Frame -- Root element or frame
170
+ # @rbs selector: String -- PSelector string
171
+ # @rbs visible: bool? -- Wait for element to be visible
172
+ # @rbs hidden: bool? -- Wait for element to be hidden
173
+ # @rbs timeout: Numeric? -- Timeout in milliseconds
174
+ # @rbs polling: (String | Numeric | nil) -- Polling strategy
175
+ # @rbs return: Puppeteer::ElementHandle? -- Matching element
176
+ def wait_for(element_or_frame, selector, visible:, hidden:, timeout:, polling: nil)
177
+ case element_or_frame
178
+ when Puppeteer::Frame
179
+ frame = element_or_frame
180
+ root = nil
181
+ when Puppeteer::ElementHandle
182
+ frame = element_or_frame.frame
183
+ root = frame.puppeteer_world.adopt_handle(element_or_frame)
184
+ else
185
+ raise ArgumentError.new("element_or_frame must be a Frame or ElementHandle. #{element_or_frame.inspect}")
186
+ end
187
+
188
+ timeout_ms = timeout.nil? ? frame.default_timeout : timeout
189
+ deadline = timeout_ms && timeout_ms > 0 ? monotonic_time + (timeout_ms / 1000.0) : nil
190
+ polling_interval = polling_interval_seconds(polling, visible: visible, hidden: hidden)
191
+
192
+ begin
193
+ loop do
194
+ resolved_root = root || frame.puppeteer_world.document
195
+ handle = query_one(resolved_root, selector)
196
+
197
+ if handle
198
+ if visible
199
+ return transfer_handle(frame, handle) if handle.visible?
200
+ elsif hidden
201
+ return transfer_handle(frame, handle) if handle.hidden?
202
+ else
203
+ return transfer_handle(frame, handle)
204
+ end
205
+ handle.dispose
206
+ elsif hidden
207
+ return nil
208
+ end
209
+
210
+ raise_timeout(selector) if deadline && monotonic_time >= deadline
211
+ Puppeteer::AsyncUtils.sleep_seconds(polling_interval)
212
+ end
213
+ rescue => err
214
+ if err.is_a?(Puppeteer::TimeoutError)
215
+ raise
216
+ end
217
+
218
+ wait_for_selector_error =
219
+ if err.is_a?(Puppeteer::TimeoutError)
220
+ Puppeteer::TimeoutError.new("Waiting for selector `#{selector}` failed")
221
+ else
222
+ Puppeteer::Error.new("Waiting for selector `#{selector}` failed")
223
+ end
224
+ wait_for_selector_error.cause = err
225
+ raise wait_for_selector_error
226
+ ensure
227
+ root&.dispose
228
+ end
229
+ end
230
+
231
+ private
232
+ def run_complex_selector(root, complex)
233
+ elements = [root]
234
+
235
+ complex.each do |part|
236
+ case part
237
+ when '>>>'
238
+ elements = pierce_all(elements)
239
+ when '>>>>'
240
+ elements = pierce(elements)
241
+ else
242
+ elements = apply_compound(elements, part)
243
+ end
244
+ return [] if elements.empty?
245
+ end
246
+
247
+ elements
248
+ end
249
+
250
+ def apply_compound(elements, compound)
251
+ compound.reduce(elements) do |current, selector|
252
+ next [] if current.empty?
253
+
254
+ if selector.is_a?(String)
255
+ apply_css_selector(current, selector)
256
+ else
257
+ apply_pseudo_selector(current, selector)
258
+ end
259
+ end
260
+ end
261
+
262
+ def apply_css_selector(elements, selector)
263
+ elements.flat_map do |element|
264
+ query_css_selector(element, selector)
265
+ end
266
+ end
267
+
268
+ def query_css_selector(element, selector)
269
+ return [] if selector.strip.empty?
270
+
271
+ array_handle = element.evaluate_handle(CSS_QUERY_SELECTOR_JS, selector)
272
+ array_handle_to_elements(array_handle)
273
+ ensure
274
+ array_handle&.dispose
275
+ end
276
+
277
+ def apply_pseudo_selector(elements, pseudo)
278
+ handler = query_handler_for_pseudo(pseudo.name)
279
+ target_context = elements.first&.execution_context
280
+
281
+ elements.flat_map do |element|
282
+ handles = handler.query_all(element, pseudo.value) || []
283
+ handles.map do |handle|
284
+ ensure_context(handle, target_context)
285
+ end
286
+ end
287
+ end
288
+
289
+ def query_handler_for_pseudo(name)
290
+ handler = @query_handler_manager.query_handlers[name.to_sym]
291
+ return handler if handler
292
+
293
+ raise Puppeteer::Error.new("Unknown selector type: #{name}")
294
+ end
295
+
296
+ def pierce(elements)
297
+ elements.flat_map do |element|
298
+ array_handle = element.evaluate_handle(PIERCE_JS)
299
+ array_handle_to_elements(array_handle)
300
+ ensure
301
+ array_handle&.dispose
302
+ end
303
+ end
304
+
305
+ def pierce_all(elements)
306
+ elements.flat_map do |element|
307
+ array_handle = element.evaluate_handle(PIERCE_ALL_JS)
308
+ array_handle_to_elements(array_handle)
309
+ ensure
310
+ array_handle&.dispose
311
+ end
312
+ end
313
+
314
+ def dom_sort(elements)
315
+ return [] if elements.empty?
316
+
317
+ context = elements.first.execution_context
318
+ sorted_handle = context.evaluate_handle(DOM_SORT_JS, *elements)
319
+ sorted = array_handle_to_elements(sorted_handle)
320
+ elements.each(&:dispose)
321
+ sorted
322
+ ensure
323
+ sorted_handle&.dispose
324
+ end
325
+
326
+ def array_handle_to_elements(array_handle)
327
+ properties = array_handle.properties
328
+ properties.keys.sort_by(&:to_i).filter_map do |key|
329
+ properties[key].as_element
330
+ end
331
+ end
332
+
333
+ def ensure_context(handle, context)
334
+ return handle if context.nil? || handle.execution_context == context
335
+
336
+ world = context.world
337
+ return handle unless world
338
+
339
+ world.transfer_handle(handle)
340
+ end
341
+
342
+ def transfer_handle(frame, handle)
343
+ if handle.execution_context == frame.main_world.execution_context
344
+ handle
345
+ else
346
+ frame.main_world.transfer_handle(handle)
347
+ end
348
+ end
349
+
350
+ def polling_interval_seconds(polling, visible:, hidden:)
351
+ return polling / 1000.0 if polling.is_a?(Numeric)
352
+
353
+ if visible || hidden
354
+ return 0.016
355
+ end
356
+
357
+ POLLING_DEFAULT_SECONDS
358
+ end
359
+
360
+ def monotonic_time
361
+ Process.clock_gettime(Process::CLOCK_MONOTONIC)
362
+ end
363
+
364
+ def raise_timeout(selector)
365
+ raise Puppeteer::TimeoutError.new("Waiting for selector `#{selector}` failed")
366
+ end
367
+ end
@@ -0,0 +1,241 @@
1
+ # rbs_inline: enabled
2
+
3
+ module Puppeteer
4
+ module PSelectorParser
5
+ class PseudoSelector
6
+ attr_reader :name, :value
7
+
8
+ def initialize(name:, value:)
9
+ @name = name
10
+ @value = value
11
+ end
12
+ end
13
+
14
+ PSEUDO_PREFIX = '::-p-'
15
+
16
+ # @rbs selector: String -- Selector to parse
17
+ # @rbs return: [Array[untyped], bool, bool, bool] -- Parsed selectors and flags
18
+ def self.parse(selector)
19
+ is_pure_css = true
20
+ has_pseudo_classes = false
21
+ has_aria = false
22
+
23
+ selectors = []
24
+ compound = []
25
+ complex = [compound]
26
+ storage = +''
27
+
28
+ i = 0
29
+ length = selector.length
30
+ in_quote = nil
31
+ escaped = false
32
+ paren_depth = 0
33
+
34
+ while i < length
35
+ char = selector[i]
36
+
37
+ if escaped
38
+ storage << char
39
+ escaped = false
40
+ i += 1
41
+ next
42
+ end
43
+
44
+ if char == '\\'
45
+ storage << char
46
+ escaped = true
47
+ i += 1
48
+ next
49
+ end
50
+
51
+ if in_quote
52
+ if char == in_quote
53
+ in_quote = nil
54
+ end
55
+ storage << char
56
+ i += 1
57
+ next
58
+ end
59
+
60
+ if char == '"' || char == "'"
61
+ in_quote = char
62
+ storage << char
63
+ i += 1
64
+ next
65
+ end
66
+
67
+ if char == '('
68
+ paren_depth += 1
69
+ storage << char
70
+ i += 1
71
+ next
72
+ end
73
+
74
+ if char == ')'
75
+ paren_depth = [paren_depth - 1, 0].max
76
+ storage << char
77
+ i += 1
78
+ next
79
+ end
80
+
81
+ if paren_depth == 0
82
+ if starts_with_at?(selector, i, '>>>>')
83
+ flush_storage(storage, compound)
84
+ complex << '>>>>'
85
+ compound = []
86
+ complex << compound
87
+ is_pure_css = false
88
+ i += 4
89
+ next
90
+ end
91
+
92
+ if starts_with_at?(selector, i, '>>>')
93
+ flush_storage(storage, compound)
94
+ complex << '>>>'
95
+ compound = []
96
+ complex << compound
97
+ is_pure_css = false
98
+ i += 3
99
+ next
100
+ end
101
+
102
+ if starts_with_at?(selector, i, PSEUDO_PREFIX)
103
+ flush_storage(storage, compound)
104
+ i += PSEUDO_PREFIX.length
105
+ name, i = parse_name(selector, i)
106
+ raise ArgumentError.new('Invalid PSelector name') if name.empty?
107
+
108
+ i = skip_spaces(selector, i)
109
+ value = ''
110
+ if selector[i] == '('
111
+ value, i = parse_argument(selector, i)
112
+ end
113
+ value = unquote(value.strip)
114
+
115
+ compound << PseudoSelector.new(name: name, value: value)
116
+ is_pure_css = false
117
+ has_aria = true if name == 'aria'
118
+ next
119
+ end
120
+
121
+ if char == ','
122
+ flush_storage(storage, compound)
123
+ selectors << complex
124
+ compound = []
125
+ complex = [compound]
126
+ i += 1
127
+ next
128
+ end
129
+
130
+ if char == ':' && selector[i, PSEUDO_PREFIX.length] != PSEUDO_PREFIX
131
+ if selector[i + 1] != ':'
132
+ has_pseudo_classes = true
133
+ end
134
+ end
135
+ end
136
+
137
+ storage << char
138
+ i += 1
139
+ end
140
+
141
+ raise ArgumentError.new('Unterminated string in PSelector') if in_quote
142
+ raise ArgumentError.new('Unterminated parentheses in PSelector') if paren_depth != 0
143
+
144
+ flush_storage(storage, compound)
145
+ selectors << complex if complex.any?
146
+
147
+ [selectors, is_pure_css, has_pseudo_classes, has_aria]
148
+ end
149
+
150
+ def self.flush_storage(storage, compound)
151
+ css = storage.strip
152
+ compound << css unless css.empty?
153
+ storage.clear
154
+ end
155
+ private_class_method :flush_storage
156
+
157
+ def self.parse_name(selector, index)
158
+ name = +''
159
+ while index < selector.length && selector[index] =~ /[A-Za-z0-9_-]/
160
+ name << selector[index]
161
+ index += 1
162
+ end
163
+ [name, index]
164
+ end
165
+ private_class_method :parse_name
166
+
167
+ def self.skip_spaces(selector, index)
168
+ index += 1 while index < selector.length && selector[index] =~ /\s/
169
+ index
170
+ end
171
+ private_class_method :skip_spaces
172
+
173
+ def self.parse_argument(selector, index)
174
+ raise ArgumentError.new('Expected opening parenthesis') unless selector[index] == '('
175
+
176
+ index += 1
177
+ start = index
178
+ depth = 1
179
+ in_quote = nil
180
+ escaped = false
181
+
182
+ while index < selector.length
183
+ char = selector[index]
184
+
185
+ if escaped
186
+ escaped = false
187
+ index += 1
188
+ next
189
+ end
190
+
191
+ if char == '\\'
192
+ escaped = true
193
+ index += 1
194
+ next
195
+ end
196
+
197
+ if in_quote
198
+ if char == in_quote
199
+ in_quote = nil
200
+ end
201
+ index += 1
202
+ next
203
+ end
204
+
205
+ if char == '"' || char == "'"
206
+ in_quote = char
207
+ index += 1
208
+ next
209
+ end
210
+
211
+ if char == '('
212
+ depth += 1
213
+ elsif char == ')'
214
+ depth -= 1
215
+ if depth == 0
216
+ value = selector[start...index]
217
+ return [value, index + 1]
218
+ end
219
+ end
220
+
221
+ index += 1
222
+ end
223
+
224
+ raise ArgumentError.new('Unterminated PSelector argument')
225
+ end
226
+ private_class_method :parse_argument
227
+
228
+ def self.starts_with_at?(selector, index, token)
229
+ selector[index, token.length] == token
230
+ end
231
+ private_class_method :starts_with_at?
232
+
233
+ def self.unquote(text)
234
+ if text.length > 1 && (text.start_with?('"') || text.start_with?("'")) && text.end_with?(text[0])
235
+ text = text[1...-1]
236
+ end
237
+ text.gsub(/\\([\s\S])/m, '\\1')
238
+ end
239
+ private_class_method :unquote
240
+ end
241
+ end
@@ -459,6 +459,21 @@ class Puppeteer::Page
459
459
  @timeout_settings.default_timeout = timeout
460
460
  end
461
461
 
462
+ # @rbs return: Numeric -- Default timeout in milliseconds
463
+ def default_timeout
464
+ @timeout_settings.timeout
465
+ end
466
+
467
+ # @rbs selector_or_function: String -- Selector or JS function
468
+ # @rbs return: Puppeteer::Locator -- Locator for selector or function
469
+ def locator(selector_or_function)
470
+ if Puppeteer::Locator.function_string?(selector_or_function)
471
+ Puppeteer::FunctionLocator.create(self, selector_or_function)
472
+ else
473
+ Puppeteer::NodeLocator.create(self, selector_or_function)
474
+ end
475
+ end
476
+
462
477
  # `$()` in JavaScript.
463
478
  # @rbs selector: String -- CSS selector
464
479
  # @rbs return: Puppeteer::ElementHandle? -- Matching element or nil
@@ -45,62 +45,7 @@ class Puppeteer::QueryHandlerManager
45
45
  end
46
46
 
47
47
  private def p_query_handler
48
- @p_query_handler ||= Puppeteer::CustomQueryHandler.new(
49
- query_one: <<~JAVASCRIPT,
50
- (element, selector) => {
51
- const parts = selector.split('>>>').map((part) => part.trim()).filter(Boolean);
52
- let roots = [element];
53
- if (parts.length === 0) return null;
54
- for (let i = 0; i < parts.length; i++) {
55
- const part = parts[i];
56
- const next = [];
57
- for (const root of roots) {
58
- const scope = root;
59
- const elements = scope.querySelectorAll(part);
60
- for (const node of elements) {
61
- if (i === parts.length - 1) {
62
- next.push(node);
63
- } else if (node.shadowRoot) {
64
- next.push(node.shadowRoot);
65
- }
66
- }
67
- }
68
- if (i === parts.length - 1) {
69
- return next[0] || null;
70
- }
71
- roots = next;
72
- }
73
- return null;
74
- }
75
- JAVASCRIPT
76
- query_all: <<~JAVASCRIPT,
77
- (element, selector) => {
78
- const parts = selector.split('>>>').map((part) => part.trim()).filter(Boolean);
79
- let roots = [element];
80
- if (parts.length === 0) return [];
81
- for (let i = 0; i < parts.length; i++) {
82
- const part = parts[i];
83
- const next = [];
84
- for (const root of roots) {
85
- const scope = root;
86
- const elements = scope.querySelectorAll(part);
87
- for (const node of elements) {
88
- if (i === parts.length - 1) {
89
- next.push(node);
90
- } else if (node.shadowRoot) {
91
- next.push(node.shadowRoot);
92
- }
93
- }
94
- }
95
- if (i === parts.length - 1) {
96
- return next;
97
- }
98
- roots = next;
99
- }
100
- return [];
101
- }
102
- JAVASCRIPT
103
- )
48
+ @p_query_handler ||= Puppeteer::PQueryHandler.new(query_handler_manager: self)
104
49
  end
105
50
 
106
51
  private def xpath_handler
@@ -331,11 +276,20 @@ class Puppeteer::QueryHandlerManager
331
276
  raise ArgumentError.new("Query set to use \"#{name}\", but no query handler of that name was found")
332
277
  end
333
278
  polling = name == 'aria' ? 'raf' : 'mutation'
334
- elsif selector.include?('>>>')
335
- query_handler = p_query_handler
336
- polling = 'raf'
337
279
  else
338
- query_handler = default_handler
280
+ begin
281
+ _selectors, is_pure_css, has_pseudo_classes, has_aria =
282
+ Puppeteer::PSelectorParser.parse(selector)
283
+ if is_pure_css
284
+ polling = has_pseudo_classes ? 'raf' : 'mutation'
285
+ query_handler = default_handler
286
+ else
287
+ polling = has_aria ? 'raf' : 'mutation'
288
+ query_handler = p_query_handler
289
+ end
290
+ rescue StandardError
291
+ query_handler = default_handler
292
+ end
339
293
  end
340
294
 
341
295
  Result.new(