scrapling 0.3.6__py3-none-any.whl → 0.3.7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,3 +1,4 @@
1
+ from random import randint
1
2
  from re import compile as re_compile
2
3
 
3
4
  from playwright.sync_api import (
@@ -20,10 +21,12 @@ from ._validators import validate_fetch as _validate
20
21
  from ._base import SyncSession, AsyncSession, StealthySessionMixin
21
22
  from scrapling.core.utils import log
22
23
  from scrapling.core._types import (
24
+ Any,
23
25
  Dict,
24
26
  List,
25
27
  Optional,
26
28
  Callable,
29
+ TYPE_CHECKING,
27
30
  SelectorWaitStates,
28
31
  )
29
32
  from scrapling.engines.toolbelt.convertor import (
@@ -33,7 +36,7 @@ from scrapling.engines.toolbelt.convertor import (
33
36
  from scrapling.engines.toolbelt.fingerprints import generate_convincing_referer
34
37
 
35
38
  __CF_PATTERN__ = re_compile("challenges.cloudflare.com/cdn-cgi/challenge-platform/.*")
36
- _UNSET = object()
39
+ _UNSET: Any = object()
37
40
 
38
41
 
39
42
  class StealthySession(StealthySessionMixin, SyncSession):
@@ -101,6 +104,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
101
104
  os_randomize: bool = False,
102
105
  disable_ads: bool = False,
103
106
  geoip: bool = False,
107
+ user_data_dir: str = "",
104
108
  selector_config: Optional[Dict] = None,
105
109
  additional_args: Optional[Dict] = None,
106
110
  ):
@@ -133,6 +137,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
133
137
  :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
134
138
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
135
139
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
140
+ :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
136
141
  :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
137
142
  :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
138
143
  """
@@ -156,6 +161,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
156
161
  block_images=block_images,
157
162
  block_webrtc=block_webrtc,
158
163
  os_randomize=os_randomize,
164
+ user_data_dir=user_data_dir,
159
165
  wait_selector=wait_selector,
160
166
  google_search=google_search,
161
167
  extra_headers=extra_headers,
@@ -170,9 +176,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
170
176
  def __create__(self):
171
177
  """Create a browser for this instance and context."""
172
178
  self.playwright = sync_playwright().start()
173
- self.context = self.playwright.firefox.launch_persistent_context( # pragma: no cover
174
- **self.launch_options
175
- )
179
+ self.context = self.playwright.firefox.launch_persistent_context(**self.launch_options)
176
180
 
177
181
  if self.init_script: # pragma: no cover
178
182
  self.context.add_init_script(path=self.init_script)
@@ -203,9 +207,9 @@ class StealthySession(StealthySessionMixin, SyncSession):
203
207
  self._closed = True
204
208
 
205
209
  @staticmethod
206
- def _get_page_content(page: Page) -> str | None:
210
+ def _get_page_content(page: Page) -> str:
207
211
  """
208
- A workaround for Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
212
+ A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
209
213
  :param page: The page to extract content from.
210
214
  :return:
211
215
  """
@@ -215,6 +219,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
215
219
  except PlaywrightError:
216
220
  page.wait_for_timeout(1000)
217
221
  continue
222
+ return "" # pyright: ignore
218
223
 
219
224
  def _solve_cloudflare(self, page: Page) -> None: # pragma: no cover
220
225
  """Solve the cloudflare challenge displayed on the playwright page passed
@@ -222,6 +227,10 @@ class StealthySession(StealthySessionMixin, SyncSession):
222
227
  :param page: The targeted page
223
228
  :return:
224
229
  """
230
+ try:
231
+ page.wait_for_load_state("networkidle", timeout=5000)
232
+ except PlaywrightError:
233
+ pass
225
234
  challenge_type = self._detect_cloudflare(self._get_page_content(page))
226
235
  if not challenge_type:
227
236
  log.error("No Cloudflare challenge found.")
@@ -244,26 +253,35 @@ class StealthySession(StealthySessionMixin, SyncSession):
244
253
  # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
245
254
  page.wait_for_timeout(500)
246
255
 
256
+ outer_box = {}
247
257
  iframe = page.frame(url=__CF_PATTERN__)
248
- if iframe is None:
249
- log.error("Didn't find Cloudflare iframe!")
250
- return
258
+ if iframe is not None:
259
+ iframe.wait_for_load_state(state="domcontentloaded")
260
+ iframe.wait_for_load_state("networkidle")
251
261
 
252
- if challenge_type != "embedded":
253
- while not iframe.frame_element().is_visible():
254
- # Double-checking that the iframe is loaded
255
- page.wait_for_timeout(500)
262
+ if challenge_type != "embedded":
263
+ while not iframe.frame_element().is_visible():
264
+ # Double-checking that the iframe is loaded
265
+ page.wait_for_timeout(500)
266
+ outer_box: Any = iframe.frame_element().bounding_box()
267
+
268
+ if not iframe or not outer_box:
269
+ outer_box: Any = page.locator(box_selector).last.bounding_box()
256
270
 
257
- iframe.wait_for_load_state(state="domcontentloaded")
258
- iframe.wait_for_load_state("networkidle")
259
271
  # Calculate the Captcha coordinates for any viewport
260
- outer_box = page.locator(box_selector).last.bounding_box()
261
- captcha_x, captcha_y = outer_box["x"] + 26, outer_box["y"] + 25
272
+ captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
262
273
 
263
274
  # Move the mouse to the center of the window, then press and hold the left mouse button
264
275
  page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
276
+ page.wait_for_load_state("networkidle")
277
+ if iframe is not None:
278
+ # Wait for the frame to be removed from the page
279
+ while iframe in page.frames:
280
+ page.wait_for_timeout(100)
265
281
  if challenge_type != "embedded":
282
+ page.locator(box_selector).last.wait_for(state="detached")
266
283
  page.locator(".zone-name-title").wait_for(state="hidden")
284
+ page.wait_for_load_state(state="load")
267
285
  page.wait_for_load_state(state="domcontentloaded")
268
286
 
269
287
  log.info("Cloudflare captcha is solved")
@@ -335,6 +353,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
335
353
  if (
336
354
  finished_response.request.resource_type == "document"
337
355
  and finished_response.request.is_navigation_request()
356
+ and finished_response.request.frame == page_info.page.main_frame
338
357
  ):
339
358
  final_response = finished_response
340
359
 
@@ -387,7 +406,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
387
406
  page_info.page, first_response, final_response, params.selector_config
388
407
  )
389
408
 
390
- # Close the page, to free up resources
409
+ # Close the page to free up resources
391
410
  page_info.page.close()
392
411
  self.page_pool.pages.remove(page_info)
393
412
 
@@ -427,6 +446,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
427
446
  os_randomize: bool = False,
428
447
  disable_ads: bool = False,
429
448
  geoip: bool = False,
449
+ user_data_dir: str = "",
430
450
  selector_config: Optional[Dict] = None,
431
451
  additional_args: Optional[Dict] = None,
432
452
  ):
@@ -460,6 +480,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
460
480
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
461
481
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
462
482
  :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
483
+ :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
463
484
  :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
464
485
  :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
465
486
  """
@@ -485,6 +506,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
485
506
  wait_selector=wait_selector,
486
507
  google_search=google_search,
487
508
  extra_headers=extra_headers,
509
+ user_data_dir=user_data_dir,
488
510
  additional_args=additional_args,
489
511
  selector_config=selector_config,
490
512
  solve_cloudflare=solve_cloudflare,
@@ -504,7 +526,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
504
526
  await self.context.add_init_script(path=self.init_script)
505
527
 
506
528
  if self.cookies:
507
- await self.context.add_cookies(self.cookies)
529
+ await self.context.add_cookies(self.cookies) # pyright: ignore [reportArgumentType]
508
530
 
509
531
  async def __aenter__(self):
510
532
  await self.__create__()
@@ -520,18 +542,18 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
520
542
 
521
543
  if self.context:
522
544
  await self.context.close()
523
- self.context = None
545
+ self.context = None # pyright: ignore
524
546
 
525
547
  if self.playwright:
526
548
  await self.playwright.stop()
527
- self.playwright = None
549
+ self.playwright = None # pyright: ignore
528
550
 
529
551
  self._closed = True
530
552
 
531
553
  @staticmethod
532
- async def _get_page_content(page: async_Page) -> str | None:
554
+ async def _get_page_content(page: async_Page) -> str:
533
555
  """
534
- A workaround for Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
556
+ A workaround for the Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
535
557
  :param page: The page to extract content from.
536
558
  :return:
537
559
  """
@@ -541,6 +563,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
541
563
  except PlaywrightError:
542
564
  await page.wait_for_timeout(1000)
543
565
  continue
566
+ return "" # pyright: ignore
544
567
 
545
568
  async def _solve_cloudflare(self, page: async_Page):
546
569
  """Solve the cloudflare challenge displayed on the playwright page passed. The async version
@@ -548,6 +571,10 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
548
571
  :param page: The async targeted page
549
572
  :return:
550
573
  """
574
+ try:
575
+ await page.wait_for_load_state("networkidle", timeout=5000)
576
+ except PlaywrightError:
577
+ pass
551
578
  challenge_type = self._detect_cloudflare(await self._get_page_content(page))
552
579
  if not challenge_type:
553
580
  log.error("No Cloudflare challenge found.")
@@ -570,26 +597,35 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
570
597
  # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
571
598
  await page.wait_for_timeout(500)
572
599
 
600
+ outer_box = {}
573
601
  iframe = page.frame(url=__CF_PATTERN__)
574
- if iframe is None:
575
- log.error("Didn't find Cloudflare iframe!")
576
- return
602
+ if iframe is not None:
603
+ await iframe.wait_for_load_state(state="domcontentloaded")
604
+ await iframe.wait_for_load_state("networkidle")
577
605
 
578
- if challenge_type != "embedded":
579
- while not await (await iframe.frame_element()).is_visible():
580
- # Double-checking that the iframe is loaded
581
- await page.wait_for_timeout(500)
606
+ if challenge_type != "embedded":
607
+ while not await (await iframe.frame_element()).is_visible():
608
+ # Double-checking that the iframe is loaded
609
+ await page.wait_for_timeout(500)
610
+ outer_box: Any = await (await iframe.frame_element()).bounding_box()
611
+
612
+ if not iframe or not outer_box:
613
+ outer_box: Any = await page.locator(box_selector).last.bounding_box()
582
614
 
583
- await iframe.wait_for_load_state(state="domcontentloaded")
584
- await iframe.wait_for_load_state("networkidle")
585
615
  # Calculate the Captcha coordinates for any viewport
586
- outer_box = await page.locator(box_selector).last.bounding_box()
587
- captcha_x, captcha_y = outer_box["x"] + 26, outer_box["y"] + 25
616
+ captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
588
617
 
589
618
  # Move the mouse to the center of the window, then press and hold the left mouse button
590
619
  await page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
620
+ await page.wait_for_load_state("networkidle")
621
+ if iframe is not None:
622
+ # Wait for the frame to be removed from the page
623
+ while iframe in page.frames:
624
+ await page.wait_for_timeout(100)
591
625
  if challenge_type != "embedded":
626
+ await page.locator(box_selector).wait_for(state="detached")
592
627
  await page.locator(".zone-name-title").wait_for(state="hidden")
628
+ await page.wait_for_load_state(state="load")
593
629
  await page.wait_for_load_state(state="domcontentloaded")
594
630
 
595
631
  log.info("Cloudflare captcha is solved")
@@ -661,12 +697,17 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
661
697
  if (
662
698
  finished_response.request.resource_type == "document"
663
699
  and finished_response.request.is_navigation_request()
700
+ and finished_response.request.frame == page_info.page.main_frame
664
701
  ):
665
702
  final_response = finished_response
666
703
 
667
704
  page_info = await self._get_page(params.timeout, params.extra_headers, params.disable_resources)
668
705
  page_info.mark_busy(url=url)
669
706
 
707
+ if TYPE_CHECKING:
708
+ if not isinstance(page_info.page, async_Page):
709
+ raise TypeError
710
+
670
711
  try:
671
712
  # Navigate to URL and wait for a specified state
672
713
  page_info.page.on("response", handle_response)
@@ -715,7 +756,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
715
756
  page_info.page, first_response, final_response, params.selector_config
716
757
  )
717
758
 
718
- # Close the page, to free up resources
759
+ # Close the page to free up resources
719
760
  await page_info.page.close()
720
761
  self.page_pool.pages.remove(page_info)
721
762
 
@@ -62,7 +62,7 @@ def _set_flags(hide_canvas, disable_webgl): # pragma: no cover
62
62
  @lru_cache(2, typed=True)
63
63
  def _launch_kwargs(
64
64
  headless,
65
- proxy,
65
+ proxy: Tuple,
66
66
  locale,
67
67
  extra_headers,
68
68
  useragent,
@@ -10,6 +10,7 @@ from playwright.async_api import (
10
10
  BrowserContext as AsyncBrowserContext,
11
11
  Playwright as AsyncPlaywright,
12
12
  Locator as AsyncLocator,
13
+ Page as async_Page,
13
14
  )
14
15
  from patchright.sync_api import sync_playwright as sync_patchright
15
16
  from patchright.async_api import async_playwright as async_patchright
@@ -18,10 +19,12 @@ from scrapling.core.utils import log
18
19
  from ._base import SyncSession, AsyncSession, DynamicSessionMixin
19
20
  from ._validators import validate_fetch as _validate
20
21
  from scrapling.core._types import (
22
+ Any,
21
23
  Dict,
22
24
  List,
23
25
  Optional,
24
26
  Callable,
27
+ TYPE_CHECKING,
25
28
  SelectorWaitStates,
26
29
  )
27
30
  from scrapling.engines.toolbelt.convertor import (
@@ -30,7 +33,7 @@ from scrapling.engines.toolbelt.convertor import (
30
33
  )
31
34
  from scrapling.engines.toolbelt.fingerprints import generate_convincing_referer
32
35
 
33
- _UNSET = object()
36
+ _UNSET: Any = object()
34
37
 
35
38
 
36
39
  class DynamicSession(DynamicSessionMixin, SyncSession):
@@ -94,7 +97,9 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
94
97
  network_idle: bool = False,
95
98
  load_dom: bool = True,
96
99
  wait_selector_state: SelectorWaitStates = "attached",
100
+ user_data_dir: str = "",
97
101
  selector_config: Optional[Dict] = None,
102
+ additional_args: Optional[Dict] = None,
98
103
  ):
99
104
  """A Browser session manager with page pooling, it's using a persistent browser Context by default with a temporary user profile directory.
100
105
 
@@ -121,7 +126,9 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
121
126
  :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
122
127
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
123
128
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
129
+ :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
124
130
  :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
131
+ :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
125
132
  """
126
133
  self.__validate__(
127
134
  wait=wait,
@@ -140,11 +147,13 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
140
147
  hide_canvas=hide_canvas,
141
148
  init_script=init_script,
142
149
  network_idle=network_idle,
150
+ user_data_dir=user_data_dir,
143
151
  google_search=google_search,
144
152
  extra_headers=extra_headers,
145
153
  wait_selector=wait_selector,
146
154
  disable_webgl=disable_webgl,
147
155
  selector_config=selector_config,
156
+ additional_args=additional_args,
148
157
  disable_resources=disable_resources,
149
158
  wait_selector_state=wait_selector_state,
150
159
  )
@@ -154,14 +163,14 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
154
163
  """Create a browser for this instance and context."""
155
164
  sync_context = sync_patchright if self.stealth else sync_playwright
156
165
 
157
- self.playwright: Playwright = sync_context().start()
166
+ self.playwright: Playwright = sync_context().start() # pyright: ignore [reportAttributeAccessIssue]
158
167
 
159
168
  if self.cdp_url: # pragma: no cover
160
169
  self.context = self.playwright.chromium.connect_over_cdp(endpoint_url=self.cdp_url).new_context(
161
170
  **self.context_options
162
171
  )
163
172
  else:
164
- self.context = self.playwright.chromium.launch_persistent_context(user_data_dir="", **self.launch_options)
173
+ self.context = self.playwright.chromium.launch_persistent_context(**self.launch_options)
165
174
 
166
175
  if self.init_script: # pragma: no cover
167
176
  self.context.add_init_script(path=self.init_script)
@@ -187,7 +196,7 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
187
196
 
188
197
  if self.playwright:
189
198
  self.playwright.stop()
190
- self.playwright = None
199
+ self.playwright = None # pyright: ignore
191
200
 
192
201
  self._closed = True
193
202
 
@@ -254,6 +263,7 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
254
263
  if (
255
264
  finished_response.request.resource_type == "document"
256
265
  and finished_response.request.is_navigation_request()
266
+ and finished_response.request.frame == page_info.page.main_frame
257
267
  ):
258
268
  final_response = finished_response
259
269
 
@@ -299,7 +309,7 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
299
309
  page_info.page, first_response, final_response, params.selector_config
300
310
  )
301
311
 
302
- # Close the page, to free up resources
312
+ # Close the page to free up resources
303
313
  page_info.page.close()
304
314
  self.page_pool.pages.remove(page_info)
305
315
 
@@ -337,7 +347,9 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
337
347
  network_idle: bool = False,
338
348
  load_dom: bool = True,
339
349
  wait_selector_state: SelectorWaitStates = "attached",
350
+ user_data_dir: str = "",
340
351
  selector_config: Optional[Dict] = None,
352
+ additional_args: Optional[Dict] = None,
341
353
  ):
342
354
  """A Browser session manager with page pooling
343
355
 
@@ -365,7 +377,9 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
365
377
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
366
378
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
367
379
  :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
380
+ :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
368
381
  :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
382
+ :param additional_args: Additional arguments to be passed to Playwright's context as additional settings, and it takes higher priority than Scrapling's settings.
369
383
  """
370
384
 
371
385
  self.__validate__(
@@ -385,11 +399,13 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
385
399
  hide_canvas=hide_canvas,
386
400
  init_script=init_script,
387
401
  network_idle=network_idle,
402
+ user_data_dir=user_data_dir,
388
403
  google_search=google_search,
389
404
  extra_headers=extra_headers,
390
405
  wait_selector=wait_selector,
391
406
  disable_webgl=disable_webgl,
392
407
  selector_config=selector_config,
408
+ additional_args=additional_args,
393
409
  disable_resources=disable_resources,
394
410
  wait_selector_state=wait_selector_state,
395
411
  )
@@ -399,21 +415,21 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
399
415
  """Create a browser for this instance and context."""
400
416
  async_context = async_patchright if self.stealth else async_playwright
401
417
 
402
- self.playwright: AsyncPlaywright = await async_context().start()
418
+ self.playwright: AsyncPlaywright = await async_context().start() # pyright: ignore [reportAttributeAccessIssue]
403
419
 
404
420
  if self.cdp_url:
405
421
  browser = await self.playwright.chromium.connect_over_cdp(endpoint_url=self.cdp_url)
406
422
  self.context: AsyncBrowserContext = await browser.new_context(**self.context_options)
407
423
  else:
408
424
  self.context: AsyncBrowserContext = await self.playwright.chromium.launch_persistent_context(
409
- user_data_dir="", **self.launch_options
425
+ **self.launch_options
410
426
  )
411
427
 
412
428
  if self.init_script: # pragma: no cover
413
429
  await self.context.add_init_script(path=self.init_script)
414
430
 
415
431
  if self.cookies:
416
- await self.context.add_cookies(self.cookies)
432
+ await self.context.add_cookies(self.cookies) # pyright: ignore
417
433
 
418
434
  async def __aenter__(self):
419
435
  await self.__create__()
@@ -429,11 +445,11 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
429
445
 
430
446
  if self.context:
431
447
  await self.context.close()
432
- self.context = None
448
+ self.context = None # pyright: ignore
433
449
 
434
450
  if self.playwright:
435
451
  await self.playwright.stop()
436
- self.playwright = None
452
+ self.playwright = None # pyright: ignore
437
453
 
438
454
  self._closed = True
439
455
 
@@ -500,12 +516,17 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
500
516
  if (
501
517
  finished_response.request.resource_type == "document"
502
518
  and finished_response.request.is_navigation_request()
519
+ and finished_response.request.frame == page_info.page.main_frame
503
520
  ):
504
521
  final_response = finished_response
505
522
 
506
523
  page_info = await self._get_page(params.timeout, params.extra_headers, params.disable_resources)
507
524
  page_info.mark_busy(url=url)
508
525
 
526
+ if TYPE_CHECKING:
527
+ if not isinstance(page_info.page, async_Page):
528
+ raise TypeError
529
+
509
530
  try:
510
531
  # Navigate to URL and wait for a specified state
511
532
  page_info.page.on("response", handle_response)
@@ -545,7 +566,7 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
545
566
  page_info.page, first_response, final_response, params.selector_config
546
567
  )
547
568
 
548
- # Close the page, to free up resources
569
+ # Close the page to free up resources
549
570
  await page_info.page.close()
550
571
  self.page_pool.pages.remove(page_info)
551
572
  return response
@@ -11,7 +11,9 @@ from scrapling.core._types import (
11
11
  Tuple,
12
12
  Optional,
13
13
  Callable,
14
+ Iterable,
14
15
  SelectorWaitStates,
16
+ overload,
15
17
  )
16
18
  from scrapling.engines.toolbelt.navigation import construct_proxy_dict
17
19
 
@@ -73,7 +75,7 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
73
75
  stealth: bool = False
74
76
  wait: Seconds = 0
75
77
  page_action: Optional[Callable] = None
76
- proxy: Optional[str | Dict[str, str]] = None # The default value for proxy in Playwright's source is `None`
78
+ proxy: Optional[str | Dict[str, str] | Tuple] = None # The default value for proxy in Playwright's source is `None`
77
79
  locale: str = "en-US"
78
80
  extra_headers: Optional[Dict[str, str]] = None
79
81
  useragent: Optional[str] = None
@@ -81,11 +83,13 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
81
83
  init_script: Optional[str] = None
82
84
  disable_resources: bool = False
83
85
  wait_selector: Optional[str] = None
84
- cookies: Optional[List[Dict]] = None
86
+ cookies: Optional[Iterable[Dict]] = None
85
87
  network_idle: bool = False
86
88
  load_dom: bool = True
87
89
  wait_selector_state: SelectorWaitStates = "attached"
88
- selector_config: Optional[Dict] = None
90
+ user_data_dir: str = ""
91
+ selector_config: Optional[Dict] = {}
92
+ additional_args: Optional[Dict] = {}
89
93
 
90
94
  def __post_init__(self):
91
95
  """Custom validation after msgspec validation"""
@@ -100,6 +104,8 @@ class PlaywrightConfig(Struct, kw_only=True, frozen=False):
100
104
  self.cookies = []
101
105
  if not self.selector_config:
102
106
  self.selector_config = {}
107
+ if not self.additional_args:
108
+ self.additional_args = {}
103
109
 
104
110
  if self.init_script is not None:
105
111
  _validate_file_path(self.init_script)
@@ -125,15 +131,16 @@ class CamoufoxConfig(Struct, kw_only=True, frozen=False):
125
131
  wait_selector: Optional[str] = None
126
132
  addons: Optional[List[str]] = None
127
133
  wait_selector_state: SelectorWaitStates = "attached"
128
- cookies: Optional[List[Dict]] = None
134
+ cookies: Optional[Iterable[Dict]] = None
129
135
  google_search: bool = True
130
136
  extra_headers: Optional[Dict[str, str]] = None
131
- proxy: Optional[str | Dict[str, str]] = None # The default value for proxy in Playwright's source is `None`
137
+ proxy: Optional[str | Dict[str, str] | Tuple] = None # The default value for proxy in Playwright's source is `None`
132
138
  os_randomize: bool = False
133
139
  disable_ads: bool = False
134
140
  geoip: bool = False
135
- selector_config: Optional[Dict] = None
136
- additional_args: Optional[Dict] = None
141
+ user_data_dir: str = ""
142
+ selector_config: Optional[Dict] = {}
143
+ additional_args: Optional[Dict] = {}
137
144
 
138
145
  def __post_init__(self):
139
146
  """Custom validation after msgspec validation"""
@@ -177,7 +184,7 @@ class FetchConfig(Struct, kw_only=True):
177
184
  network_idle: bool = False
178
185
  load_dom: bool = True
179
186
  solve_cloudflare: bool = False
180
- selector_config: Optional[Dict] = {}
187
+ selector_config: Dict = {}
181
188
 
182
189
  def to_dict(self):
183
190
  return {f: getattr(self, f) for f in self.__struct_fields__}
@@ -198,7 +205,7 @@ class _fetch_params:
198
205
  network_idle: bool
199
206
  load_dom: bool
200
207
  solve_cloudflare: bool
201
- selector_config: Optional[Dict]
208
+ selector_config: Dict
202
209
 
203
210
 
204
211
  def validate_fetch(params: List[Tuple], sentinel=None) -> _fetch_params:
@@ -222,7 +229,21 @@ def validate_fetch(params: List[Tuple], sentinel=None) -> _fetch_params:
222
229
  return _fetch_params(**result)
223
230
 
224
231
 
225
- def validate(params: Dict, model) -> PlaywrightConfig | CamoufoxConfig | FetchConfig:
232
+ @overload
233
+ def validate(params: Dict, model: type[PlaywrightConfig]) -> PlaywrightConfig: ...
234
+
235
+
236
+ @overload
237
+ def validate(params: Dict, model: type[CamoufoxConfig]) -> CamoufoxConfig: ...
238
+
239
+
240
+ @overload
241
+ def validate(params: Dict, model: type[FetchConfig]) -> FetchConfig: ...
242
+
243
+
244
+ def validate(
245
+ params: Dict, model: type[PlaywrightConfig] | type[CamoufoxConfig] | type[FetchConfig]
246
+ ) -> PlaywrightConfig | CamoufoxConfig | FetchConfig:
226
247
  try:
227
248
  return convert(params, model)
228
249
  except ValidationError as e: