seleniumbase 4.32.1__py3-none-any.whl → 4.32.2__py3-none-any.whl

Sign up to get free protection for your applications and to get access to all the features.
@@ -0,0 +1,625 @@
1
+ from __future__ import annotations
2
+ import asyncio
3
+ import collections
4
+ import inspect
5
+ import itertools
6
+ import json
7
+ import logging
8
+ import sys
9
+ import types
10
+ from asyncio import iscoroutine, iscoroutinefunction
11
+ from typing import (
12
+ Generator,
13
+ Union,
14
+ Awaitable,
15
+ Callable,
16
+ Any,
17
+ TypeVar,
18
+ )
19
+ import websockets
20
+ from . import cdp_util as util
21
+ import mycdp as cdp
22
+ import mycdp.network
23
+ import mycdp.page
24
+ import mycdp.storage
25
+ import mycdp.runtime
26
+ import mycdp.target
27
+ import mycdp.util
28
+
29
+ T = TypeVar("T")
30
+ GLOBAL_DELAY = 0.005
31
+ MAX_SIZE: int = 2**28
32
+ PING_TIMEOUT: int = 1800 # 30 minutes
33
+ TargetType = Union[cdp.target.TargetInfo, cdp.target.TargetID]
34
+ logger = logging.getLogger("uc.connection")
35
+
36
+
37
+ class ProtocolException(Exception):
38
+ def __init__(self, *args, **kwargs):
39
+ self.message = None
40
+ self.code = None
41
+ self.args = args
42
+ if isinstance(args[0], dict):
43
+ self.message = args[0].get("message", None) # noqa
44
+ self.code = args[0].get("code", None)
45
+ elif hasattr(args[0], "to_json"):
46
+ def serialize(obj, _d=0):
47
+ res = "\n"
48
+ for k, v in obj.items():
49
+ space = "\t" * _d
50
+ if isinstance(v, dict):
51
+ res += f"{space}{k}: {serialize(v, _d + 1)}\n"
52
+ else:
53
+ res += f"{space}{k}: {v}\n"
54
+ return res
55
+ self.message = serialize(args[0].to_json())
56
+ else:
57
+ self.message = "| ".join(str(x) for x in args)
58
+
59
+ def __str__(self):
60
+ return f"{self.message} [code: {self.code}]" if self.code else f"{self.message}" # noqa
61
+
62
+
63
+ class SettingClassVarNotAllowedException(PermissionError):
64
+ pass
65
+
66
+
67
+ class Transaction(asyncio.Future):
68
+ __cdp_obj__: Generator = None
69
+ method: str = None
70
+ params: dict = None
71
+ id: int = None
72
+
73
+ def __init__(self, cdp_obj: Generator):
74
+ """
75
+ :param cdp_obj:
76
+ """
77
+ super().__init__()
78
+ self.__cdp_obj__ = cdp_obj
79
+ self.connection = None
80
+ self.method, *params = next(self.__cdp_obj__).values()
81
+ if params:
82
+ params = params.pop()
83
+ self.params = params
84
+
85
+ @property
86
+ def message(self):
87
+ return json.dumps(
88
+ {"method": self.method, "params": self.params, "id": self.id}
89
+ )
90
+
91
+ @property
92
+ def has_exception(self):
93
+ try:
94
+ if self.exception():
95
+ return True
96
+ except BaseException:
97
+ return True
98
+ return False
99
+
100
+ def __call__(self, **response: dict):
101
+ """
102
+ Parses the response message and marks the future complete.
103
+ :param response:
104
+ """
105
+ if "error" in response:
106
+ # Set exception and bail out
107
+ return self.set_exception(ProtocolException(response["error"]))
108
+ try:
109
+ # Try to parse the result according to the PyCDP docs.
110
+ self.__cdp_obj__.send(response["result"])
111
+ except StopIteration as e:
112
+ # Exception value holds the parsed response
113
+ return self.set_result(e.value)
114
+ raise ProtocolException(
115
+ "Could not parse the cdp response:\n%s" % response
116
+ )
117
+
118
+ def __repr__(self):
119
+ success = False if (self.done() and self.has_exception) else True
120
+ if self.done():
121
+ status = "finished"
122
+ else:
123
+ status = "pending"
124
+ fmt = (
125
+ f"<{self.__class__.__name__}\n\t"
126
+ f"method: {self.method}\n\t"
127
+ f"status: {status}\n\t"
128
+ f"success: {success}>"
129
+ )
130
+ return fmt
131
+
132
+
133
+ class EventTransaction(Transaction):
134
+ event = None
135
+ value = None
136
+
137
+ def __init__(self, event_object):
138
+ try:
139
+ super().__init__(None)
140
+ except BaseException:
141
+ pass
142
+ self.set_result(event_object)
143
+ self.event = self.value = self.result()
144
+
145
+ def __repr__(self):
146
+ status = "finished"
147
+ success = False if self.exception() else True
148
+ event_object = self.result()
149
+ fmt = (
150
+ f"{self.__class__.__name__}\n\t"
151
+ f"event: {event_object.__class__.__module__}.{event_object.__class__.__name__}\n\t" # noqa
152
+ f"status: {status}\n\t"
153
+ f"success: {success}>"
154
+ )
155
+ return fmt
156
+
157
+
158
+ class CantTouchThis(type):
159
+ def __setattr__(cls, attr, value):
160
+ """:meta private:"""
161
+ if attr == "__annotations__":
162
+ # Fix autodoc
163
+ return super().__setattr__(attr, value)
164
+ raise SettingClassVarNotAllowedException(
165
+ "\n".join(
166
+ (
167
+ "don't set '%s' on the %s class directly, "
168
+ "as those are shared with other objects.",
169
+ "use `my_object.%s = %s` instead",
170
+ )
171
+ )
172
+ % (attr, cls.__name__, attr, value)
173
+ )
174
+
175
+
176
+ class Connection(metaclass=CantTouchThis):
177
+ attached: bool = None
178
+ websocket: websockets.WebSocketClientProtocol
179
+ _target: cdp.target.TargetInfo
180
+
181
+ def __init__(
182
+ self,
183
+ websocket_url=None,
184
+ target=None,
185
+ _owner=None,
186
+ **kwargs,
187
+ ):
188
+ super().__init__()
189
+ self._target = target
190
+ self.__count__ = itertools.count(0)
191
+ self._owner = _owner
192
+ self.websocket_url: str = websocket_url
193
+ self.websocket = None
194
+ self.mapper = {}
195
+ self.handlers = collections.defaultdict(list)
196
+ self.recv_task = None
197
+ self.enabled_domains = []
198
+ self._last_result = []
199
+ self.listener: Listener = None
200
+ self.__dict__.update(**kwargs)
201
+
202
+ @property
203
+ def target(self) -> cdp.target.TargetInfo:
204
+ return self._target
205
+
206
+ @target.setter
207
+ def target(self, target: cdp.target.TargetInfo):
208
+ if not isinstance(target, cdp.target.TargetInfo):
209
+ raise TypeError(
210
+ "target must be set to a '%s' but got '%s"
211
+ % (cdp.target.TargetInfo.__name__, type(target).__name__)
212
+ )
213
+ self._target = target
214
+
215
+ @property
216
+ def closed(self):
217
+ if not self.websocket:
218
+ return True
219
+ return self.websocket.closed
220
+
221
+ def add_handler(
222
+ self,
223
+ event_type_or_domain: Union[type, types.ModuleType],
224
+ handler: Union[Callable, Awaitable],
225
+ ):
226
+ """
227
+ Add a handler for given event.
228
+ If event_type_or_domain is a module instead of a type,
229
+ it will find all available events and add the handler.
230
+ If you want to receive event updates (eg. network traffic),
231
+ you can add handlers for those events.
232
+ Handlers can be regular callback functions
233
+ or async coroutine functions (and also just lambdas).
234
+ For example, if you want to check the network traffic:
235
+ .. code-block::
236
+ page.add_handler(
237
+ cdp.network.RequestWillBeSent, lambda event: print(
238
+ 'network event => %s' % event.request
239
+ )
240
+ )
241
+ Next time there's network traffic, you'll see lots of console output.
242
+ :param event_type_or_domain:
243
+ :param handler:
244
+ """
245
+ if isinstance(event_type_or_domain, types.ModuleType):
246
+ for name, obj in inspect.getmembers_static(event_type_or_domain):
247
+ if name.isupper():
248
+ continue
249
+ if not name[0].isupper():
250
+ continue
251
+ if not isinstance(obj, type):
252
+ continue
253
+ if inspect.isbuiltin(obj):
254
+ continue
255
+ self.handlers[obj].append(handler)
256
+ return
257
+ self.handlers[event_type_or_domain].append(handler)
258
+
259
+ async def aopen(self, **kw):
260
+ """
261
+ Opens the websocket connection. Shouldn't be called manually by users.
262
+ """
263
+ if not self.websocket or self.websocket.closed:
264
+ try:
265
+ self.websocket = await websockets.connect(
266
+ self.websocket_url,
267
+ ping_timeout=PING_TIMEOUT,
268
+ max_size=MAX_SIZE,
269
+ )
270
+ self.listener = Listener(self)
271
+ except (Exception,) as e:
272
+ logger.debug("Exception during opening of websocket: %s", e)
273
+ if self.listener:
274
+ self.listener.cancel()
275
+ raise
276
+ if not self.listener or not self.listener.running:
277
+ self.listener = Listener(self)
278
+ logger.debug(
279
+ "\n✅ Opened websocket connection to %s", self.websocket_url
280
+ )
281
+ # When a websocket connection is closed (either by error or on purpose)
282
+ # and reconnected, the registered event listeners (if any), should be
283
+ # registered again, so the browser sends those events.
284
+ await self._register_handlers()
285
+
286
+ async def aclose(self):
287
+ """
288
+ Closes the websocket connection. Shouldn't be called manually by users.
289
+ """
290
+ if self.websocket and not self.websocket.closed:
291
+ if self.listener and self.listener.running:
292
+ self.listener.cancel()
293
+ self.enabled_domains.clear()
294
+ await self.websocket.close()
295
+ logger.debug(
296
+ "\n❌ Closed websocket connection to %s", self.websocket_url
297
+ )
298
+
299
+ async def sleep(self, t: Union[int, float] = 0.25):
300
+ await self.update_target()
301
+ await asyncio.sleep(t)
302
+
303
+ def feed_cdp(self, cdp_obj):
304
+ """
305
+ Used in specific cases, mostly during cdp.fetch.RequestPaused events,
306
+ in which the browser literally blocks.
307
+ By using feed_cdp, you can issue a response without a blocking "await".
308
+ Note: This method won't cause a response.
309
+ Note: This is not an async method, just a regular method!
310
+ :param cdp_obj:
311
+ """
312
+ asyncio.ensure_future(self.send(cdp_obj))
313
+
314
+ async def wait(self, t: Union[int, float] = None):
315
+ """
316
+ Waits until the event listener reports idle
317
+ (no new events received in certain timespan).
318
+ When `t` is provided, ensures waiting for `t` seconds, no matter what.
319
+ :param t:
320
+ """
321
+ await self.update_target()
322
+ loop = asyncio.get_running_loop()
323
+ start_time = loop.time()
324
+ try:
325
+ if isinstance(t, (int, float)):
326
+ await asyncio.wait_for(self.listener.idle.wait(), timeout=t)
327
+ while (loop.time() - start_time) < t:
328
+ await asyncio.sleep(0.1)
329
+ else:
330
+ await self.listener.idle.wait()
331
+ except asyncio.TimeoutError:
332
+ if isinstance(t, (int, float)):
333
+ # Explicit time is given, which is now passed, so leave now.
334
+ return
335
+ except AttributeError:
336
+ # No listener created yet.
337
+ pass
338
+
339
+ def __getattr__(self, item):
340
+ """:meta private:"""
341
+ try:
342
+ return getattr(self.target, item)
343
+ except AttributeError:
344
+ raise
345
+
346
+ async def __aenter__(self):
347
+ """:meta private:"""
348
+ return self
349
+
350
+ async def __aexit__(self, exc_type, exc_val, exc_tb):
351
+ """:meta private:"""
352
+ await self.aclose()
353
+ if exc_type and exc_val:
354
+ raise exc_type(exc_val)
355
+
356
+ def __await__(self):
357
+ """
358
+ Updates targets and wait for event listener to report idle.
359
+ Idle is reported when no new events are received for 1 second.
360
+ """
361
+ return self.wait().__await__()
362
+
363
+ async def update_target(self):
364
+ target_info: cdp.target.TargetInfo = await self.send(
365
+ cdp.target.get_target_info(self.target_id), _is_update=True
366
+ )
367
+ self.target = target_info
368
+
369
+ async def send(
370
+ self,
371
+ cdp_obj: Generator[dict[str, Any], dict[str, Any], Any],
372
+ _is_update=False,
373
+ ) -> Any:
374
+ """
375
+ Send a protocol command.
376
+ The commands are made using any of the cdp.<domain>.<method>()'s
377
+ and is used to send custom cdp commands as well.
378
+ :param cdp_obj: The generator object created by a cdp method
379
+ :param _is_update: Internal flag
380
+ Prevents infinite loop by skipping the registeration of handlers
381
+ when multiple calls to connection.send() are made.
382
+ """
383
+ await self.aopen()
384
+ if not self.websocket or self.closed:
385
+ return
386
+ if self._owner:
387
+ browser = self._owner
388
+ if browser.config:
389
+ if browser.config.expert:
390
+ await self._prepare_expert()
391
+ if browser.config.headless:
392
+ await self._prepare_headless()
393
+ if not self.listener or not self.listener.running:
394
+ self.listener = Listener(self)
395
+ try:
396
+ tx = Transaction(cdp_obj)
397
+ tx.connection = self
398
+ if not self.mapper:
399
+ self.__count__ = itertools.count(0)
400
+ tx.id = next(self.__count__)
401
+ self.mapper.update({tx.id: tx})
402
+ if not _is_update:
403
+ await self._register_handlers()
404
+ await self.websocket.send(tx.message)
405
+ try:
406
+ return await tx
407
+ except ProtocolException as e:
408
+ e.message += f"\ncommand:{tx.method}\nparams:{tx.params}"
409
+ raise e
410
+ except Exception:
411
+ await self.aclose()
412
+
413
+ async def _register_handlers(self):
414
+ """
415
+ Ensure that for current (event) handlers, the corresponding
416
+ domain is enabled in the protocol.
417
+ """
418
+ # Save a copy of current enabled domains in a variable.
419
+ # At the end, this variable will hold the domains that
420
+ # are not represented by handlers, and can be removed.
421
+ enabled_domains = self.enabled_domains.copy()
422
+ for event_type in self.handlers.copy():
423
+ domain_mod = None
424
+ if len(self.handlers[event_type]) == 0:
425
+ self.handlers.pop(event_type)
426
+ continue
427
+ if isinstance(event_type, type):
428
+ domain_mod = util.cdp_get_module(event_type.__module__)
429
+ if domain_mod in self.enabled_domains:
430
+ # At this point, the domain is being used by a handler, so
431
+ # remove that domain from temp variable 'enabled_domains'.
432
+ if domain_mod in enabled_domains:
433
+ enabled_domains.remove(domain_mod)
434
+ continue
435
+ elif domain_mod not in self.enabled_domains:
436
+ if domain_mod in (cdp.target, cdp.storage):
437
+ continue
438
+ try:
439
+ # Prevent infinite loops.
440
+ logger.debug("Registered %s", domain_mod)
441
+ self.enabled_domains.append(domain_mod)
442
+ await self.send(domain_mod.enable(), _is_update=True)
443
+ except BaseException: # Don't error before request is sent
444
+ logger.debug("", exc_info=True)
445
+ try:
446
+ self.enabled_domains.remove(domain_mod)
447
+ except BaseException:
448
+ logger.debug("NOT GOOD", exc_info=True)
449
+ continue
450
+ finally:
451
+ continue
452
+ for ed in enabled_domains:
453
+ # Items still present at this point are unused and need removal.
454
+ self.enabled_domains.remove(ed)
455
+
456
+ async def _prepare_headless(self):
457
+ return # (This functionality has moved to a new location!)
458
+
459
+ async def _prepare_expert(self):
460
+ if getattr(self, "_prep_expert_done", None):
461
+ return
462
+ if self._owner:
463
+ part1 = "Element.prototype._attachShadow = "
464
+ part2 = "Element.prototype.attachShadow"
465
+ parts = part1 + part2
466
+ await self._send_oneshot(
467
+ cdp.page.add_script_to_evaluate_on_new_document(
468
+ """
469
+ %s;
470
+ Element.prototype.attachShadow = function () {
471
+ return this._attachShadow( { mode: "open" } );
472
+ };
473
+ """ % parts
474
+ )
475
+ )
476
+ await self._send_oneshot(cdp.page.enable())
477
+ setattr(self, "_prep_expert_done", True)
478
+
479
+ async def _send_oneshot(self, cdp_obj):
480
+ tx = Transaction(cdp_obj)
481
+ tx.connection = self
482
+ tx.id = -2
483
+ self.mapper.update({tx.id: tx})
484
+ await self.websocket.send(tx.message)
485
+ try:
486
+ # In try/except since if browser connection sends this,
487
+ # then it raises an exception.
488
+ return await tx
489
+ except ProtocolException:
490
+ pass
491
+
492
+
493
+ class Listener:
494
+ def __init__(self, connection: Connection):
495
+ self.connection = connection
496
+ self.history = collections.deque()
497
+ self.max_history = 1000
498
+ self.task: asyncio.Future = None
499
+ is_interactive = getattr(sys, "ps1", sys.flags.interactive)
500
+ self._time_before_considered_idle = 0.10 if not is_interactive else 0.75 # noqa
501
+ self.idle = asyncio.Event()
502
+ self.run()
503
+
504
+ def run(self):
505
+ self.task = asyncio.create_task(self.listener_loop())
506
+
507
+ @property
508
+ def time_before_considered_idle(self):
509
+ return self._time_before_considered_idle
510
+
511
+ @time_before_considered_idle.setter
512
+ def time_before_considered_idle(self, seconds: Union[int, float]):
513
+ self._time_before_considered_idle = seconds
514
+
515
+ def cancel(self):
516
+ if self.task and not self.task.cancelled():
517
+ self.task.cancel()
518
+
519
+ @property
520
+ def running(self):
521
+ if not self.task:
522
+ return False
523
+ if self.task.done():
524
+ return False
525
+ return True
526
+
527
+ async def listener_loop(self):
528
+ while True:
529
+ try:
530
+ msg = await asyncio.wait_for(
531
+ self.connection.websocket.recv(),
532
+ self.time_before_considered_idle,
533
+ )
534
+ except asyncio.TimeoutError:
535
+ self.idle.set()
536
+ # Pause for a moment.
537
+ # await asyncio.sleep(self.time_before_considered_idle / 10)
538
+ continue
539
+ except (Exception,) as e:
540
+ logger.debug(
541
+ "Connection listener exception "
542
+ "while reading websocket:\n%s", e
543
+ )
544
+ break
545
+ if not self.running:
546
+ # If we have been cancelled or otherwise stopped running,
547
+ # then break this loop.
548
+ break
549
+ self.idle.clear() # Not "idle" anymore.
550
+ message = json.loads(msg)
551
+ if "id" in message:
552
+ if message["id"] in self.connection.mapper:
553
+ tx = self.connection.mapper.pop(message["id"])
554
+ logger.debug(
555
+ "Got answer for %s (message_id:%d)", tx, message["id"]
556
+ )
557
+ tx(**message)
558
+ else:
559
+ if message["id"] == -2:
560
+ tx = self.connection.mapper.get(-2)
561
+ if tx:
562
+ tx(**message)
563
+ continue
564
+ else:
565
+ # Probably an event
566
+ try:
567
+ event = cdp.util.parse_json_event(message)
568
+ event_tx = EventTransaction(event)
569
+ if not self.connection.mapper:
570
+ self.connection.__count__ = itertools.count(0)
571
+ event_tx.id = next(self.connection.__count__)
572
+ self.connection.mapper[event_tx.id] = event_tx
573
+ except Exception as e:
574
+ logger.info(
575
+ "%s: %s during parsing of json from event : %s"
576
+ % (type(e).__name__, e.args, message),
577
+ exc_info=True,
578
+ )
579
+ continue
580
+ except KeyError as e:
581
+ logger.info("KeyError: %s" % e, exc_info=True)
582
+ continue
583
+ try:
584
+ if type(event) in self.connection.handlers:
585
+ callbacks = self.connection.handlers[type(event)]
586
+ else:
587
+ continue
588
+ if not len(callbacks):
589
+ continue
590
+ for callback in callbacks:
591
+ try:
592
+ if (
593
+ iscoroutinefunction(callback)
594
+ or iscoroutine(callback)
595
+ ):
596
+ try:
597
+ await callback(event, self.connection)
598
+ except TypeError:
599
+ await callback(event)
600
+ else:
601
+ try:
602
+ callback(event, self.connection)
603
+ except TypeError:
604
+ callback(event)
605
+ except Exception as e:
606
+ logger.warning(
607
+ "Exception in callback %s for event %s => %s",
608
+ callback,
609
+ event.__class__.__name__,
610
+ e,
611
+ exc_info=True,
612
+ )
613
+ raise
614
+ except asyncio.CancelledError:
615
+ break
616
+ except Exception:
617
+ raise
618
+ continue
619
+
620
+ def __repr__(self):
621
+ s_idle = "[idle]" if self.idle.is_set() else "[busy]"
622
+ s_cache_length = f"[cache size: {len(self.history)}]"
623
+ s_running = f"[running: {self.running}]"
624
+ s = f"{self.__class__.__name__} {s_running} {s_idle} {s_cache_length}>"
625
+ return s