scrapling 0.3.6__py3-none-any.whl → 0.3.8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -1,29 +1,29 @@
1
+ from random import randint
1
2
  from re import compile as re_compile
2
3
 
3
4
  from playwright.sync_api import (
4
- Response as SyncPlaywrightResponse,
5
- sync_playwright,
6
- Locator,
7
5
  Page,
6
+ Locator,
7
+ sync_playwright,
8
8
  )
9
9
  from playwright.async_api import (
10
10
  async_playwright,
11
- Response as AsyncPlaywrightResponse,
12
- BrowserContext as AsyncBrowserContext,
13
- Playwright as AsyncPlaywright,
14
- Locator as AsyncLocator,
15
11
  Page as async_Page,
12
+ Locator as AsyncLocator,
13
+ Playwright as AsyncPlaywright,
14
+ BrowserContext as AsyncBrowserContext,
16
15
  )
17
- from playwright._impl._errors import Error as PlaywrightError
18
16
 
19
- from ._validators import validate_fetch as _validate
17
+ from ._validators import validate_fetch as _validate, CamoufoxConfig
20
18
  from ._base import SyncSession, AsyncSession, StealthySessionMixin
21
19
  from scrapling.core.utils import log
22
20
  from scrapling.core._types import (
21
+ Any,
23
22
  Dict,
24
23
  List,
25
24
  Optional,
26
25
  Callable,
26
+ TYPE_CHECKING,
27
27
  SelectorWaitStates,
28
28
  )
29
29
  from scrapling.engines.toolbelt.convertor import (
@@ -33,7 +33,7 @@ from scrapling.engines.toolbelt.convertor import (
33
33
  from scrapling.engines.toolbelt.fingerprints import generate_convincing_referer
34
34
 
35
35
  __CF_PATTERN__ = re_compile("challenges.cloudflare.com/cdn-cgi/challenge-platform/.*")
36
- _UNSET = object()
36
+ _UNSET: Any = object()
37
37
 
38
38
 
39
39
  class StealthySession(StealthySessionMixin, SyncSession):
@@ -101,6 +101,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
101
101
  os_randomize: bool = False,
102
102
  disable_ads: bool = False,
103
103
  geoip: bool = False,
104
+ user_data_dir: str = "",
104
105
  selector_config: Optional[Dict] = None,
105
106
  additional_args: Optional[Dict] = None,
106
107
  ):
@@ -133,6 +134,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
133
134
  :param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
134
135
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
135
136
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
137
+ :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
136
138
  :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
137
139
  :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
138
140
  """
@@ -156,6 +158,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
156
158
  block_images=block_images,
157
159
  block_webrtc=block_webrtc,
158
160
  os_randomize=os_randomize,
161
+ user_data_dir=user_data_dir,
159
162
  wait_selector=wait_selector,
160
163
  google_search=google_search,
161
164
  extra_headers=extra_headers,
@@ -170,9 +173,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
170
173
  def __create__(self):
171
174
  """Create a browser for this instance and context."""
172
175
  self.playwright = sync_playwright().start()
173
- self.context = self.playwright.firefox.launch_persistent_context( # pragma: no cover
174
- **self.launch_options
175
- )
176
+ self.context = self.playwright.firefox.launch_persistent_context(**self.launch_options)
176
177
 
177
178
  if self.init_script: # pragma: no cover
178
179
  self.context.add_init_script(path=self.init_script)
@@ -180,56 +181,21 @@ class StealthySession(StealthySessionMixin, SyncSession):
180
181
  if self.cookies: # pragma: no cover
181
182
  self.context.add_cookies(self.cookies)
182
183
 
183
- def __enter__(self): # pragma: no cover
184
- self.__create__()
185
- return self
186
-
187
- def __exit__(self, exc_type, exc_val, exc_tb):
188
- self.close()
189
-
190
- def close(self): # pragma: no cover
191
- """Close all resources"""
192
- if self._closed: # pragma: no cover
193
- return
194
-
195
- if self.context:
196
- self.context.close()
197
- self.context = None
198
-
199
- if self.playwright:
200
- self.playwright.stop()
201
- self.playwright = None
202
-
203
- self._closed = True
204
-
205
- @staticmethod
206
- def _get_page_content(page: Page) -> str | None:
207
- """
208
- A workaround for Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
209
- :param page: The page to extract content from.
210
- :return:
211
- """
212
- while True:
213
- try:
214
- return page.content() or ""
215
- except PlaywrightError:
216
- page.wait_for_timeout(1000)
217
- continue
218
-
219
184
  def _solve_cloudflare(self, page: Page) -> None: # pragma: no cover
220
185
  """Solve the cloudflare challenge displayed on the playwright page passed
221
186
 
222
187
  :param page: The targeted page
223
188
  :return:
224
189
  """
225
- challenge_type = self._detect_cloudflare(self._get_page_content(page))
190
+ self._wait_for_networkidle(page, timeout=5000)
191
+ challenge_type = self._detect_cloudflare(ResponseFactory._get_page_content(page))
226
192
  if not challenge_type:
227
193
  log.error("No Cloudflare challenge found.")
228
194
  return
229
195
  else:
230
196
  log.info(f'The turnstile version discovered is "{challenge_type}"')
231
197
  if challenge_type == "non-interactive":
232
- while "<title>Just a moment...</title>" in (self._get_page_content(page)):
198
+ while "<title>Just a moment...</title>" in (ResponseFactory._get_page_content(page)):
233
199
  log.info("Waiting for Cloudflare wait page to disappear.")
234
200
  page.wait_for_timeout(1000)
235
201
  page.wait_for_load_state()
@@ -240,31 +206,43 @@ class StealthySession(StealthySessionMixin, SyncSession):
240
206
  box_selector = "#cf_turnstile div, #cf-turnstile div, .turnstile>div>div"
241
207
  if challenge_type != "embedded":
242
208
  box_selector = ".main-content p+div>div>div"
243
- while "Verifying you are human." in self._get_page_content(page):
209
+ while "Verifying you are human." in ResponseFactory._get_page_content(page):
244
210
  # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
245
211
  page.wait_for_timeout(500)
246
212
 
213
+ outer_box = {}
247
214
  iframe = page.frame(url=__CF_PATTERN__)
248
- if iframe is None:
249
- log.error("Didn't find Cloudflare iframe!")
250
- return
215
+ if iframe is not None:
216
+ self._wait_for_page_stability(iframe, True, True)
251
217
 
252
- if challenge_type != "embedded":
253
- while not iframe.frame_element().is_visible():
254
- # Double-checking that the iframe is loaded
255
- page.wait_for_timeout(500)
218
+ if challenge_type != "embedded":
219
+ while not iframe.frame_element().is_visible():
220
+ # Double-checking that the iframe is loaded
221
+ page.wait_for_timeout(500)
222
+ outer_box: Any = iframe.frame_element().bounding_box()
223
+
224
+ if not iframe or not outer_box:
225
+ outer_box: Any = page.locator(box_selector).last.bounding_box()
256
226
 
257
- iframe.wait_for_load_state(state="domcontentloaded")
258
- iframe.wait_for_load_state("networkidle")
259
227
  # Calculate the Captcha coordinates for any viewport
260
- outer_box = page.locator(box_selector).last.bounding_box()
261
- captcha_x, captcha_y = outer_box["x"] + 26, outer_box["y"] + 25
228
+ captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
262
229
 
263
230
  # Move the mouse to the center of the window, then press and hold the left mouse button
264
231
  page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
232
+ self._wait_for_networkidle(page)
233
+ if iframe is not None:
234
+ # Wait for the frame to be removed from the page (with 30s timeout = 300 iterations * 100 ms)
235
+ attempts = 0
236
+ while iframe in page.frames:
237
+ if attempts >= 300:
238
+ log.info("Cloudflare iframe didn't disappear after 30s, continuing...")
239
+ break
240
+ page.wait_for_timeout(100)
241
+ attempts += 1
265
242
  if challenge_type != "embedded":
243
+ page.locator(box_selector).last.wait_for(state="detached")
266
244
  page.locator(".zone-name-title").wait_for(state="hidden")
267
- page.wait_for_load_state(state="domcontentloaded")
245
+ self._wait_for_page_stability(page, True, False)
268
246
 
269
247
  log.info("Cloudflare captcha is solved")
270
248
  return
@@ -319,37 +297,26 @@ class StealthySession(StealthySessionMixin, SyncSession):
319
297
  ("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
320
298
  ("selector_config", selector_config, self.selector_config),
321
299
  ],
300
+ CamoufoxConfig,
322
301
  _UNSET,
323
302
  )
324
303
 
325
304
  if self._closed: # pragma: no cover
326
305
  raise RuntimeError("Context manager has been closed")
327
306
 
328
- final_response = None
329
307
  referer = (
330
308
  generate_convincing_referer(url) if (params.google_search and "referer" not in self._headers_keys) else None
331
309
  )
332
310
 
333
- def handle_response(finished_response: SyncPlaywrightResponse):
334
- nonlocal final_response
335
- if (
336
- finished_response.request.resource_type == "document"
337
- and finished_response.request.is_navigation_request()
338
- ):
339
- final_response = finished_response
340
-
341
311
  page_info = self._get_page(params.timeout, params.extra_headers, params.disable_resources)
342
- page_info.mark_busy(url=url)
312
+ final_response = [None]
313
+ handle_response = self._create_response_handler(page_info, final_response)
343
314
 
344
315
  try: # pragma: no cover
345
316
  # Navigate to URL and wait for a specified state
346
317
  page_info.page.on("response", handle_response)
347
318
  first_response = page_info.page.goto(url, referer=referer)
348
- if params.load_dom:
349
- page_info.page.wait_for_load_state(state="domcontentloaded")
350
-
351
- if params.network_idle:
352
- page_info.page.wait_for_load_state("networkidle")
319
+ self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
353
320
 
354
321
  if not first_response:
355
322
  raise RuntimeError(f"Failed to get response for {url}")
@@ -357,11 +324,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
357
324
  if params.solve_cloudflare:
358
325
  self._solve_cloudflare(page_info.page)
359
326
  # Make sure the page is fully loaded after the captcha
360
- page_info.page.wait_for_load_state(state="load")
361
- if params.load_dom:
362
- page_info.page.wait_for_load_state(state="domcontentloaded")
363
- if params.network_idle:
364
- page_info.page.wait_for_load_state("networkidle")
327
+ self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
365
328
 
366
329
  if params.page_action:
367
330
  try:
@@ -374,20 +337,16 @@ class StealthySession(StealthySessionMixin, SyncSession):
374
337
  waiter: Locator = page_info.page.locator(params.wait_selector)
375
338
  waiter.first.wait_for(state=params.wait_selector_state)
376
339
  # Wait again after waiting for the selector, helpful with protections like Cloudflare
377
- page_info.page.wait_for_load_state(state="load")
378
- if params.load_dom:
379
- page_info.page.wait_for_load_state(state="domcontentloaded")
380
- if params.network_idle:
381
- page_info.page.wait_for_load_state("networkidle")
340
+ self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
382
341
  except Exception as e:
383
342
  log.error(f"Error waiting for selector {params.wait_selector}: {e}")
384
343
 
385
344
  page_info.page.wait_for_timeout(params.wait)
386
345
  response = ResponseFactory.from_playwright_response(
387
- page_info.page, first_response, final_response, params.selector_config
346
+ page_info.page, first_response, final_response[0], params.selector_config, bool(params.page_action)
388
347
  )
389
348
 
390
- # Close the page, to free up resources
349
+ # Close the page to free up resources
391
350
  page_info.page.close()
392
351
  self.page_pool.pages.remove(page_info)
393
352
 
@@ -427,6 +386,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
427
386
  os_randomize: bool = False,
428
387
  disable_ads: bool = False,
429
388
  geoip: bool = False,
389
+ user_data_dir: str = "",
430
390
  selector_config: Optional[Dict] = None,
431
391
  additional_args: Optional[Dict] = None,
432
392
  ):
@@ -460,6 +420,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
460
420
  :param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
461
421
  :param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
462
422
  :param max_pages: The maximum number of tabs to be opened at the same time. It will be used in rotation through a PagePool.
423
+ :param user_data_dir: Path to a User Data Directory, which stores browser session data like cookies and local storage. The default is to create a temporary directory.
463
424
  :param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
464
425
  :param additional_args: Additional arguments to be passed to Camoufox as additional settings, and it takes higher priority than Scrapling's settings.
465
426
  """
@@ -485,6 +446,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
485
446
  wait_selector=wait_selector,
486
447
  google_search=google_search,
487
448
  extra_headers=extra_headers,
449
+ user_data_dir=user_data_dir,
488
450
  additional_args=additional_args,
489
451
  selector_config=selector_config,
490
452
  solve_cloudflare=solve_cloudflare,
@@ -504,58 +466,23 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
504
466
  await self.context.add_init_script(path=self.init_script)
505
467
 
506
468
  if self.cookies:
507
- await self.context.add_cookies(self.cookies)
508
-
509
- async def __aenter__(self):
510
- await self.__create__()
511
- return self
512
-
513
- async def __aexit__(self, exc_type, exc_val, exc_tb):
514
- await self.close()
515
-
516
- async def close(self):
517
- """Close all resources"""
518
- if self._closed: # pragma: no cover
519
- return
520
-
521
- if self.context:
522
- await self.context.close()
523
- self.context = None
469
+ await self.context.add_cookies(self.cookies) # pyright: ignore [reportArgumentType]
524
470
 
525
- if self.playwright:
526
- await self.playwright.stop()
527
- self.playwright = None
528
-
529
- self._closed = True
530
-
531
- @staticmethod
532
- async def _get_page_content(page: async_Page) -> str | None:
533
- """
534
- A workaround for Playwright issue with `page.content()` on Windows. Ref.: https://github.com/microsoft/playwright/issues/16108
535
- :param page: The page to extract content from.
536
- :return:
537
- """
538
- while True:
539
- try:
540
- return (await page.content()) or ""
541
- except PlaywrightError:
542
- await page.wait_for_timeout(1000)
543
- continue
544
-
545
- async def _solve_cloudflare(self, page: async_Page):
471
+ async def _solve_cloudflare(self, page: async_Page): # pragma: no cover
546
472
  """Solve the cloudflare challenge displayed on the playwright page passed. The async version
547
473
 
548
474
  :param page: The async targeted page
549
475
  :return:
550
476
  """
551
- challenge_type = self._detect_cloudflare(await self._get_page_content(page))
477
+ await self._wait_for_networkidle(page, timeout=5000)
478
+ challenge_type = self._detect_cloudflare(await ResponseFactory._get_async_page_content(page))
552
479
  if not challenge_type:
553
480
  log.error("No Cloudflare challenge found.")
554
481
  return
555
482
  else:
556
483
  log.info(f'The turnstile version discovered is "{challenge_type}"')
557
484
  if challenge_type == "non-interactive": # pragma: no cover
558
- while "<title>Just a moment...</title>" in (await self._get_page_content(page)):
485
+ while "<title>Just a moment...</title>" in (await ResponseFactory._get_async_page_content(page)):
559
486
  log.info("Waiting for Cloudflare wait page to disappear.")
560
487
  await page.wait_for_timeout(1000)
561
488
  await page.wait_for_load_state()
@@ -566,31 +493,43 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
566
493
  box_selector = "#cf_turnstile div, #cf-turnstile div, .turnstile>div>div"
567
494
  if challenge_type != "embedded":
568
495
  box_selector = ".main-content p+div>div>div"
569
- while "Verifying you are human." in (await self._get_page_content(page)):
496
+ while "Verifying you are human." in (await ResponseFactory._get_async_page_content(page)):
570
497
  # Waiting for the verify spinner to disappear, checking every 1s if it disappeared
571
498
  await page.wait_for_timeout(500)
572
499
 
500
+ outer_box = {}
573
501
  iframe = page.frame(url=__CF_PATTERN__)
574
- if iframe is None:
575
- log.error("Didn't find Cloudflare iframe!")
576
- return
502
+ if iframe is not None:
503
+ await self._wait_for_page_stability(iframe, True, True)
577
504
 
578
- if challenge_type != "embedded":
579
- while not await (await iframe.frame_element()).is_visible():
580
- # Double-checking that the iframe is loaded
581
- await page.wait_for_timeout(500)
505
+ if challenge_type != "embedded":
506
+ while not await (await iframe.frame_element()).is_visible():
507
+ # Double-checking that the iframe is loaded
508
+ await page.wait_for_timeout(500)
509
+ outer_box: Any = await (await iframe.frame_element()).bounding_box()
510
+
511
+ if not iframe or not outer_box:
512
+ outer_box: Any = await page.locator(box_selector).last.bounding_box()
582
513
 
583
- await iframe.wait_for_load_state(state="domcontentloaded")
584
- await iframe.wait_for_load_state("networkidle")
585
514
  # Calculate the Captcha coordinates for any viewport
586
- outer_box = await page.locator(box_selector).last.bounding_box()
587
- captcha_x, captcha_y = outer_box["x"] + 26, outer_box["y"] + 25
515
+ captcha_x, captcha_y = outer_box["x"] + randint(26, 28), outer_box["y"] + randint(25, 27)
588
516
 
589
517
  # Move the mouse to the center of the window, then press and hold the left mouse button
590
518
  await page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
519
+ await self._wait_for_networkidle(page)
520
+ if iframe is not None:
521
+ # Wait for the frame to be removed from the page (with 30s timeout = 300 iterations * 100 ms)
522
+ attempts = 0
523
+ while iframe in page.frames:
524
+ if attempts >= 300:
525
+ log.info("Cloudflare iframe didn't disappear after 30s, continuing...")
526
+ break
527
+ await page.wait_for_timeout(100)
528
+ attempts += 1
591
529
  if challenge_type != "embedded":
530
+ await page.locator(box_selector).wait_for(state="detached")
592
531
  await page.locator(".zone-name-title").wait_for(state="hidden")
593
- await page.wait_for_load_state(state="domcontentloaded")
532
+ await self._wait_for_page_stability(page, True, False)
594
533
 
595
534
  log.info("Cloudflare captcha is solved")
596
535
  return
@@ -645,37 +584,30 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
645
584
  ("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
646
585
  ("selector_config", selector_config, self.selector_config),
647
586
  ],
587
+ CamoufoxConfig,
648
588
  _UNSET,
649
589
  )
650
590
 
651
591
  if self._closed: # pragma: no cover
652
592
  raise RuntimeError("Context manager has been closed")
653
593
 
654
- final_response = None
655
594
  referer = (
656
595
  generate_convincing_referer(url) if (params.google_search and "referer" not in self._headers_keys) else None
657
596
  )
658
597
 
659
- async def handle_response(finished_response: AsyncPlaywrightResponse):
660
- nonlocal final_response
661
- if (
662
- finished_response.request.resource_type == "document"
663
- and finished_response.request.is_navigation_request()
664
- ):
665
- final_response = finished_response
666
-
667
598
  page_info = await self._get_page(params.timeout, params.extra_headers, params.disable_resources)
668
- page_info.mark_busy(url=url)
599
+ final_response = [None]
600
+ handle_response = self._create_response_handler(page_info, final_response)
601
+
602
+ if TYPE_CHECKING:
603
+ if not isinstance(page_info.page, async_Page):
604
+ raise TypeError
669
605
 
670
606
  try:
671
607
  # Navigate to URL and wait for a specified state
672
608
  page_info.page.on("response", handle_response)
673
609
  first_response = await page_info.page.goto(url, referer=referer)
674
- if params.load_dom:
675
- await page_info.page.wait_for_load_state(state="domcontentloaded")
676
-
677
- if params.network_idle:
678
- await page_info.page.wait_for_load_state("networkidle")
610
+ await self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
679
611
 
680
612
  if not first_response:
681
613
  raise RuntimeError(f"Failed to get response for {url}")
@@ -683,11 +615,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
683
615
  if params.solve_cloudflare:
684
616
  await self._solve_cloudflare(page_info.page)
685
617
  # Make sure the page is fully loaded after the captcha
686
- await page_info.page.wait_for_load_state(state="load")
687
- if params.load_dom:
688
- await page_info.page.wait_for_load_state(state="domcontentloaded")
689
- if params.network_idle:
690
- await page_info.page.wait_for_load_state("networkidle")
618
+ await self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
691
619
 
692
620
  if params.page_action:
693
621
  try:
@@ -700,11 +628,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
700
628
  waiter: AsyncLocator = page_info.page.locator(params.wait_selector)
701
629
  await waiter.first.wait_for(state=params.wait_selector_state)
702
630
  # Wait again after waiting for the selector, helpful with protections like Cloudflare
703
- await page_info.page.wait_for_load_state(state="load")
704
- if params.load_dom:
705
- await page_info.page.wait_for_load_state(state="domcontentloaded")
706
- if params.network_idle:
707
- await page_info.page.wait_for_load_state("networkidle")
631
+ await self._wait_for_page_stability(page_info.page, params.load_dom, params.network_idle)
708
632
  except Exception as e:
709
633
  log.error(f"Error waiting for selector {params.wait_selector}: {e}")
710
634
 
@@ -712,10 +636,10 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
712
636
 
713
637
  # Create response object
714
638
  response = await ResponseFactory.from_async_playwright_response(
715
- page_info.page, first_response, final_response, params.selector_config
639
+ page_info.page, first_response, final_response[0], params.selector_config, bool(params.page_action)
716
640
  )
717
641
 
718
- # Close the page, to free up resources
642
+ # Close the page to free up resources
719
643
  await page_info.page.close()
720
644
  self.page_pool.pages.remove(page_info)
721
645
 
@@ -62,7 +62,7 @@ def _set_flags(hide_canvas, disable_webgl): # pragma: no cover
62
62
  @lru_cache(2, typed=True)
63
63
  def _launch_kwargs(
64
64
  headless,
65
- proxy,
65
+ proxy: Tuple,
66
66
  locale,
67
67
  extra_headers,
68
68
  useragent,
@@ -70,12 +70,17 @@ def _launch_kwargs(
70
70
  stealth,
71
71
  hide_canvas,
72
72
  disable_webgl,
73
+ extra_flags: Tuple,
73
74
  ) -> Tuple:
74
75
  """Creates the arguments we will use while launching playwright's browser"""
76
+ base_args = DEFAULT_FLAGS
77
+ if extra_flags:
78
+ base_args = base_args + extra_flags
79
+
75
80
  launch_kwargs = {
76
81
  "locale": locale,
77
82
  "headless": headless,
78
- "args": DEFAULT_FLAGS,
83
+ "args": base_args,
79
84
  "color_scheme": "dark", # Bypasses the 'prefersLightColor' check in creepjs
80
85
  "proxy": proxy or tuple(),
81
86
  "device_scale_factor": 2,
@@ -85,9 +90,10 @@ def _launch_kwargs(
85
90
  "user_agent": useragent or __default_useragent__,
86
91
  }
87
92
  if stealth:
93
+ stealth_args = base_args + _set_flags(hide_canvas, disable_webgl)
88
94
  launch_kwargs.update(
89
95
  {
90
- "args": DEFAULT_FLAGS + _set_flags(hide_canvas, disable_webgl),
96
+ "args": stealth_args,
91
97
  "chromium_sandbox": True,
92
98
  "is_mobile": False,
93
99
  "has_touch": False,