seleniumbase 4.32.1__py3-none-any.whl → 4.32.2__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -0,0 +1,625 @@
1
+ from __future__ import annotations
2
+ import asyncio
3
+ import collections
4
+ import inspect
5
+ import itertools
6
+ import json
7
+ import logging
8
+ import sys
9
+ import types
10
+ from asyncio import iscoroutine, iscoroutinefunction
11
+ from typing import (
12
+ Generator,
13
+ Union,
14
+ Awaitable,
15
+ Callable,
16
+ Any,
17
+ TypeVar,
18
+ )
19
+ import websockets
20
+ from . import cdp_util as util
21
+ import mycdp as cdp
22
+ import mycdp.network
23
+ import mycdp.page
24
+ import mycdp.storage
25
+ import mycdp.runtime
26
+ import mycdp.target
27
+ import mycdp.util
28
+
29
+ T = TypeVar("T")
30
+ GLOBAL_DELAY = 0.005
31
+ MAX_SIZE: int = 2**28
32
+ PING_TIMEOUT: int = 1800 # 30 minutes
33
+ TargetType = Union[cdp.target.TargetInfo, cdp.target.TargetID]
34
+ logger = logging.getLogger("uc.connection")
35
+
36
+
37
+ class ProtocolException(Exception):
38
+ def __init__(self, *args, **kwargs):
39
+ self.message = None
40
+ self.code = None
41
+ self.args = args
42
+ if isinstance(args[0], dict):
43
+ self.message = args[0].get("message", None) # noqa
44
+ self.code = args[0].get("code", None)
45
+ elif hasattr(args[0], "to_json"):
46
+ def serialize(obj, _d=0):
47
+ res = "\n"
48
+ for k, v in obj.items():
49
+ space = "\t" * _d
50
+ if isinstance(v, dict):
51
+ res += f"{space}{k}: {serialize(v, _d + 1)}\n"
52
+ else:
53
+ res += f"{space}{k}: {v}\n"
54
+ return res
55
+ self.message = serialize(args[0].to_json())
56
+ else:
57
+ self.message = "| ".join(str(x) for x in args)
58
+
59
+ def __str__(self):
60
+ return f"{self.message} [code: {self.code}]" if self.code else f"{self.message}" # noqa
61
+
62
+
63
+ class SettingClassVarNotAllowedException(PermissionError):
64
+ pass
65
+
66
+
67
+ class Transaction(asyncio.Future):
68
+ __cdp_obj__: Generator = None
69
+ method: str = None
70
+ params: dict = None
71
+ id: int = None
72
+
73
+ def __init__(self, cdp_obj: Generator):
74
+ """
75
+ :param cdp_obj:
76
+ """
77
+ super().__init__()
78
+ self.__cdp_obj__ = cdp_obj
79
+ self.connection = None
80
+ self.method, *params = next(self.__cdp_obj__).values()
81
+ if params:
82
+ params = params.pop()
83
+ self.params = params
84
+
85
+ @property
86
+ def message(self):
87
+ return json.dumps(
88
+ {"method": self.method, "params": self.params, "id": self.id}
89
+ )
90
+
91
+ @property
92
+ def has_exception(self):
93
+ try:
94
+ if self.exception():
95
+ return True
96
+ except BaseException:
97
+ return True
98
+ return False
99
+
100
+ def __call__(self, **response: dict):
101
+ """
102
+ Parses the response message and marks the future complete.
103
+ :param response:
104
+ """
105
+ if "error" in response:
106
+ # Set exception and bail out
107
+ return self.set_exception(ProtocolException(response["error"]))
108
+ try:
109
+ # Try to parse the result according to the PyCDP docs.
110
+ self.__cdp_obj__.send(response["result"])
111
+ except StopIteration as e:
112
+ # Exception value holds the parsed response
113
+ return self.set_result(e.value)
114
+ raise ProtocolException(
115
+ "Could not parse the cdp response:\n%s" % response
116
+ )
117
+
118
+ def __repr__(self):
119
+ success = False if (self.done() and self.has_exception) else True
120
+ if self.done():
121
+ status = "finished"
122
+ else:
123
+ status = "pending"
124
+ fmt = (
125
+ f"<{self.__class__.__name__}\n\t"
126
+ f"method: {self.method}\n\t"
127
+ f"status: {status}\n\t"
128
+ f"success: {success}>"
129
+ )
130
+ return fmt
131
+
132
+
133
+ class EventTransaction(Transaction):
134
+ event = None
135
+ value = None
136
+
137
+ def __init__(self, event_object):
138
+ try:
139
+ super().__init__(None)
140
+ except BaseException:
141
+ pass
142
+ self.set_result(event_object)
143
+ self.event = self.value = self.result()
144
+
145
+ def __repr__(self):
146
+ status = "finished"
147
+ success = False if self.exception() else True
148
+ event_object = self.result()
149
+ fmt = (
150
+ f"{self.__class__.__name__}\n\t"
151
+ f"event: {event_object.__class__.__module__}.{event_object.__class__.__name__}\n\t" # noqa
152
+ f"status: {status}\n\t"
153
+ f"success: {success}>"
154
+ )
155
+ return fmt
156
+
157
+
158
+ class CantTouchThis(type):
159
+ def __setattr__(cls, attr, value):
160
+ """:meta private:"""
161
+ if attr == "__annotations__":
162
+ # Fix autodoc
163
+ return super().__setattr__(attr, value)
164
+ raise SettingClassVarNotAllowedException(
165
+ "\n".join(
166
+ (
167
+ "don't set '%s' on the %s class directly, "
168
+ "as those are shared with other objects.",
169
+ "use `my_object.%s = %s` instead",
170
+ )
171
+ )
172
+ % (attr, cls.__name__, attr, value)
173
+ )
174
+
175
+
176
+ class Connection(metaclass=CantTouchThis):
177
+ attached: bool = None
178
+ websocket: websockets.WebSocketClientProtocol
179
+ _target: cdp.target.TargetInfo
180
+
181
+ def __init__(
182
+ self,
183
+ websocket_url=None,
184
+ target=None,
185
+ _owner=None,
186
+ **kwargs,
187
+ ):
188
+ super().__init__()
189
+ self._target = target
190
+ self.__count__ = itertools.count(0)
191
+ self._owner = _owner
192
+ self.websocket_url: str = websocket_url
193
+ self.websocket = None
194
+ self.mapper = {}
195
+ self.handlers = collections.defaultdict(list)
196
+ self.recv_task = None
197
+ self.enabled_domains = []
198
+ self._last_result = []
199
+ self.listener: Listener = None
200
+ self.__dict__.update(**kwargs)
201
+
202
+ @property
203
+ def target(self) -> cdp.target.TargetInfo:
204
+ return self._target
205
+
206
+ @target.setter
207
+ def target(self, target: cdp.target.TargetInfo):
208
+ if not isinstance(target, cdp.target.TargetInfo):
209
+ raise TypeError(
210
+ "target must be set to a '%s' but got '%s"
211
+ % (cdp.target.TargetInfo.__name__, type(target).__name__)
212
+ )
213
+ self._target = target
214
+
215
+ @property
216
+ def closed(self):
217
+ if not self.websocket:
218
+ return True
219
+ return self.websocket.closed
220
+
221
+ def add_handler(
222
+ self,
223
+ event_type_or_domain: Union[type, types.ModuleType],
224
+ handler: Union[Callable, Awaitable],
225
+ ):
226
+ """
227
+ Add a handler for given event.
228
+ If event_type_or_domain is a module instead of a type,
229
+ it will find all available events and add the handler.
230
+ If you want to receive event updates (eg. network traffic),
231
+ you can add handlers for those events.
232
+ Handlers can be regular callback functions
233
+ or async coroutine functions (and also just lambdas).
234
+ For example, if you want to check the network traffic:
235
+ .. code-block::
236
+ page.add_handler(
237
+ cdp.network.RequestWillBeSent, lambda event: print(
238
+ 'network event => %s' % event.request
239
+ )
240
+ )
241
+ Next time there's network traffic, you'll see lots of console output.
242
+ :param event_type_or_domain:
243
+ :param handler:
244
+ """
245
+ if isinstance(event_type_or_domain, types.ModuleType):
246
+ for name, obj in inspect.getmembers_static(event_type_or_domain):
247
+ if name.isupper():
248
+ continue
249
+ if not name[0].isupper():
250
+ continue
251
+ if not isinstance(obj, type):
252
+ continue
253
+ if inspect.isbuiltin(obj):
254
+ continue
255
+ self.handlers[obj].append(handler)
256
+ return
257
+ self.handlers[event_type_or_domain].append(handler)
258
+
259
+ async def aopen(self, **kw):
260
+ """
261
+ Opens the websocket connection. Shouldn't be called manually by users.
262
+ """
263
+ if not self.websocket or self.websocket.closed:
264
+ try:
265
+ self.websocket = await websockets.connect(
266
+ self.websocket_url,
267
+ ping_timeout=PING_TIMEOUT,
268
+ max_size=MAX_SIZE,
269
+ )
270
+ self.listener = Listener(self)
271
+ except (Exception,) as e:
272
+ logger.debug("Exception during opening of websocket: %s", e)
273
+ if self.listener:
274
+ self.listener.cancel()
275
+ raise
276
+ if not self.listener or not self.listener.running:
277
+ self.listener = Listener(self)
278
+ logger.debug(
279
+ "\n✅ Opened websocket connection to %s", self.websocket_url
280
+ )
281
+ # When a websocket connection is closed (either by error or on purpose)
282
+ # and reconnected, the registered event listeners (if any), should be
283
+ # registered again, so the browser sends those events.
284
+ await self._register_handlers()
285
+
286
+ async def aclose(self):
287
+ """
288
+ Closes the websocket connection. Shouldn't be called manually by users.
289
+ """
290
+ if self.websocket and not self.websocket.closed:
291
+ if self.listener and self.listener.running:
292
+ self.listener.cancel()
293
+ self.enabled_domains.clear()
294
+ await self.websocket.close()
295
+ logger.debug(
296
+ "\n❌ Closed websocket connection to %s", self.websocket_url
297
+ )
298
+
299
+ async def sleep(self, t: Union[int, float] = 0.25):
300
+ await self.update_target()
301
+ await asyncio.sleep(t)
302
+
303
+ def feed_cdp(self, cdp_obj):
304
+ """
305
+ Used in specific cases, mostly during cdp.fetch.RequestPaused events,
306
+ in which the browser literally blocks.
307
+ By using feed_cdp, you can issue a response without a blocking "await".
308
+ Note: This method won't cause a response.
309
+ Note: This is not an async method, just a regular method!
310
+ :param cdp_obj:
311
+ """
312
+ asyncio.ensure_future(self.send(cdp_obj))
313
+
314
+ async def wait(self, t: Union[int, float] = None):
315
+ """
316
+ Waits until the event listener reports idle
317
+ (no new events received in certain timespan).
318
+ When `t` is provided, ensures waiting for `t` seconds, no matter what.
319
+ :param t:
320
+ """
321
+ await self.update_target()
322
+ loop = asyncio.get_running_loop()
323
+ start_time = loop.time()
324
+ try:
325
+ if isinstance(t, (int, float)):
326
+ await asyncio.wait_for(self.listener.idle.wait(), timeout=t)
327
+ while (loop.time() - start_time) < t:
328
+ await asyncio.sleep(0.1)
329
+ else:
330
+ await self.listener.idle.wait()
331
+ except asyncio.TimeoutError:
332
+ if isinstance(t, (int, float)):
333
+ # Explicit time is given, which is now passed, so leave now.
334
+ return
335
+ except AttributeError:
336
+ # No listener created yet.
337
+ pass
338
+
339
+ def __getattr__(self, item):
340
+ """:meta private:"""
341
+ try:
342
+ return getattr(self.target, item)
343
+ except AttributeError:
344
+ raise
345
+
346
+ async def __aenter__(self):
347
+ """:meta private:"""
348
+ return self
349
+
350
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
351
+ """:meta private:"""
352
+ await self.aclose()
353
+ if exc_type and exc_val:
354
+ raise exc_type(exc_val)
355
+
356
+ def __await__(self):
357
+ """
358
+ Updates targets and wait for event listener to report idle.
359
+ Idle is reported when no new events are received for 1 second.
360
+ """
361
+ return self.wait().__await__()
362
+
363
+ async def update_target(self):
364
+ target_info: cdp.target.TargetInfo = await self.send(
365
+ cdp.target.get_target_info(self.target_id), _is_update=True
366
+ )
367
+ self.target = target_info
368
+
369
+ async def send(
370
+ self,
371
+ cdp_obj: Generator[dict[str, Any], dict[str, Any], Any],
372
+ _is_update=False,
373
+ ) -> Any:
374
+ """
375
+ Send a protocol command.
376
+ The commands are made using any of the cdp.<domain>.<method>()'s
377
+ and is used to send custom cdp commands as well.
378
+ :param cdp_obj: The generator object created by a cdp method
379
+ :param _is_update: Internal flag
380
+ Prevents infinite loop by skipping the registeration of handlers
381
+ when multiple calls to connection.send() are made.
382
+ """
383
+ await self.aopen()
384
+ if not self.websocket or self.closed:
385
+ return
386
+ if self._owner:
387
+ browser = self._owner
388
+ if browser.config:
389
+ if browser.config.expert:
390
+ await self._prepare_expert()
391
+ if browser.config.headless:
392
+ await self._prepare_headless()
393
+ if not self.listener or not self.listener.running:
394
+ self.listener = Listener(self)
395
+ try:
396
+ tx = Transaction(cdp_obj)
397
+ tx.connection = self
398
+ if not self.mapper:
399
+ self.__count__ = itertools.count(0)
400
+ tx.id = next(self.__count__)
401
+ self.mapper.update({tx.id: tx})
402
+ if not _is_update:
403
+ await self._register_handlers()
404
+ await self.websocket.send(tx.message)
405
+ try:
406
+ return await tx
407
+ except ProtocolException as e:
408
+ e.message += f"\ncommand:{tx.method}\nparams:{tx.params}"
409
+ raise e
410
+ except Exception:
411
+ await self.aclose()
412
+
413
+ async def _register_handlers(self):
414
+ """
415
+ Ensure that for current (event) handlers, the corresponding
416
+ domain is enabled in the protocol.
417
+ """
418
+ # Save a copy of current enabled domains in a variable.
419
+ # At the end, this variable will hold the domains that
420
+ # are not represented by handlers, and can be removed.
421
+ enabled_domains = self.enabled_domains.copy()
422
+ for event_type in self.handlers.copy():
423
+ domain_mod = None
424
+ if len(self.handlers[event_type]) == 0:
425
+ self.handlers.pop(event_type)
426
+ continue
427
+ if isinstance(event_type, type):
428
+ domain_mod = util.cdp_get_module(event_type.__module__)
429
+ if domain_mod in self.enabled_domains:
430
+ # At this point, the domain is being used by a handler, so
431
+ # remove that domain from temp variable 'enabled_domains'.
432
+ if domain_mod in enabled_domains:
433
+ enabled_domains.remove(domain_mod)
434
+ continue
435
+ elif domain_mod not in self.enabled_domains:
436
+ if domain_mod in (cdp.target, cdp.storage):
437
+ continue
438
+ try:
439
+ # Prevent infinite loops.
440
+ logger.debug("Registered %s", domain_mod)
441
+ self.enabled_domains.append(domain_mod)
442
+ await self.send(domain_mod.enable(), _is_update=True)
443
+ except BaseException: # Don't error before request is sent
444
+ logger.debug("", exc_info=True)
445
+ try:
446
+ self.enabled_domains.remove(domain_mod)
447
+ except BaseException:
448
+ logger.debug("NOT GOOD", exc_info=True)
449
+ continue
450
+ finally:
451
+ continue
452
+ for ed in enabled_domains:
453
+ # Items still present at this point are unused and need removal.
454
+ self.enabled_domains.remove(ed)
455
+
456
+ async def _prepare_headless(self):
457
+ return # (This functionality has moved to a new location!)
458
+
459
+ async def _prepare_expert(self):
460
+ if getattr(self, "_prep_expert_done", None):
461
+ return
462
+ if self._owner:
463
+ part1 = "Element.prototype._attachShadow = "
464
+ part2 = "Element.prototype.attachShadow"
465
+ parts = part1 + part2
466
+ await self._send_oneshot(
467
+ cdp.page.add_script_to_evaluate_on_new_document(
468
+ """
469
+ %s;
470
+ Element.prototype.attachShadow = function () {
471
+ return this._attachShadow( { mode: "open" } );
472
+ };
473
+ """ % parts
474
+ )
475
+ )
476
+ await self._send_oneshot(cdp.page.enable())
477
+ setattr(self, "_prep_expert_done", True)
478
+
479
+ async def _send_oneshot(self, cdp_obj):
480
+ tx = Transaction(cdp_obj)
481
+ tx.connection = self
482
+ tx.id = -2
483
+ self.mapper.update({tx.id: tx})
484
+ await self.websocket.send(tx.message)
485
+ try:
486
+ # In try/except since if browser connection sends this,
487
+ # then it raises an exception.
488
+ return await tx
489
+ except ProtocolException:
490
+ pass
491
+
492
+
493
+ class Listener:
494
+ def __init__(self, connection: Connection):
495
+ self.connection = connection
496
+ self.history = collections.deque()
497
+ self.max_history = 1000
498
+ self.task: asyncio.Future = None
499
+ is_interactive = getattr(sys, "ps1", sys.flags.interactive)
500
+ self._time_before_considered_idle = 0.10 if not is_interactive else 0.75 # noqa
501
+ self.idle = asyncio.Event()
502
+ self.run()
503
+
504
+ def run(self):
505
+ self.task = asyncio.create_task(self.listener_loop())
506
+
507
+ @property
508
+ def time_before_considered_idle(self):
509
+ return self._time_before_considered_idle
510
+
511
+ @time_before_considered_idle.setter
512
+ def time_before_considered_idle(self, seconds: Union[int, float]):
513
+ self._time_before_considered_idle = seconds
514
+
515
+ def cancel(self):
516
+ if self.task and not self.task.cancelled():
517
+ self.task.cancel()
518
+
519
+ @property
520
+ def running(self):
521
+ if not self.task:
522
+ return False
523
+ if self.task.done():
524
+ return False
525
+ return True
526
+
527
+ async def listener_loop(self):
528
+ while True:
529
+ try:
530
+ msg = await asyncio.wait_for(
531
+ self.connection.websocket.recv(),
532
+ self.time_before_considered_idle,
533
+ )
534
+ except asyncio.TimeoutError:
535
+ self.idle.set()
536
+ # Pause for a moment.
537
+ # await asyncio.sleep(self.time_before_considered_idle / 10)
538
+ continue
539
+ except (Exception,) as e:
540
+ logger.debug(
541
+ "Connection listener exception "
542
+ "while reading websocket:\n%s", e
543
+ )
544
+ break
545
+ if not self.running:
546
+ # If we have been cancelled or otherwise stopped running,
547
+ # then break this loop.
548
+ break
549
+ self.idle.clear() # Not "idle" anymore.
550
+ message = json.loads(msg)
551
+ if "id" in message:
552
+ if message["id"] in self.connection.mapper:
553
+ tx = self.connection.mapper.pop(message["id"])
554
+ logger.debug(
555
+ "Got answer for %s (message_id:%d)", tx, message["id"]
556
+ )
557
+ tx(**message)
558
+ else:
559
+ if message["id"] == -2:
560
+ tx = self.connection.mapper.get(-2)
561
+ if tx:
562
+ tx(**message)
563
+ continue
564
+ else:
565
+ # Probably an event
566
+ try:
567
+ event = cdp.util.parse_json_event(message)
568
+ event_tx = EventTransaction(event)
569
+ if not self.connection.mapper:
570
+ self.connection.__count__ = itertools.count(0)
571
+ event_tx.id = next(self.connection.__count__)
572
+ self.connection.mapper[event_tx.id] = event_tx
573
+ except Exception as e:
574
+ logger.info(
575
+ "%s: %s during parsing of json from event : %s"
576
+ % (type(e).__name__, e.args, message),
577
+ exc_info=True,
578
+ )
579
+ continue
580
+ except KeyError as e:
581
+ logger.info("KeyError: %s" % e, exc_info=True)
582
+ continue
583
+ try:
584
+ if type(event) in self.connection.handlers:
585
+ callbacks = self.connection.handlers[type(event)]
586
+ else:
587
+ continue
588
+ if not len(callbacks):
589
+ continue
590
+ for callback in callbacks:
591
+ try:
592
+ if (
593
+ iscoroutinefunction(callback)
594
+ or iscoroutine(callback)
595
+ ):
596
+ try:
597
+ await callback(event, self.connection)
598
+ except TypeError:
599
+ await callback(event)
600
+ else:
601
+ try:
602
+ callback(event, self.connection)
603
+ except TypeError:
604
+ callback(event)
605
+ except Exception as e:
606
+ logger.warning(
607
+ "Exception in callback %s for event %s => %s",
608
+ callback,
609
+ event.__class__.__name__,
610
+ e,
611
+ exc_info=True,
612
+ )
613
+ raise
614
+ except asyncio.CancelledError:
615
+ break
616
+ except Exception:
617
+ raise
618
+ continue
619
+
620
+ def __repr__(self):
621
+ s_idle = "[idle]" if self.idle.is_set() else "[busy]"
622
+ s_cache_length = f"[cache size: {len(self.history)}]"
623
+ s_running = f"[running: {self.running}]"
624
+ s = f"{self.__class__.__name__} {s_running} {s_idle} {s_cache_length}>"
625
+ return s