scrapling 0.3.4__py3-none-any.whl → 0.3.6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- scrapling/__init__.py +29 -19
- scrapling/cli.py +25 -8
- scrapling/core/_types.py +0 -2
- scrapling/core/ai.py +22 -14
- scrapling/core/custom_types.py +2 -2
- scrapling/core/shell.py +6 -5
- scrapling/core/storage.py +2 -1
- scrapling/core/utils/__init__.py +0 -1
- scrapling/engines/_browsers/__init__.py +0 -2
- scrapling/engines/_browsers/_base.py +11 -36
- scrapling/engines/_browsers/_camoufox.py +75 -60
- scrapling/engines/_browsers/_controllers.py +43 -52
- scrapling/engines/_browsers/_page.py +1 -42
- scrapling/engines/_browsers/_validators.py +130 -65
- scrapling/engines/constants.py +0 -15
- scrapling/engines/static.py +417 -16
- scrapling/engines/toolbelt/navigation.py +1 -1
- scrapling/fetchers/__init__.py +36 -0
- scrapling/fetchers/chrome.py +205 -0
- scrapling/fetchers/firefox.py +216 -0
- scrapling/fetchers/requests.py +28 -0
- scrapling/parser.py +7 -7
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/METADATA +25 -23
- scrapling-0.3.6.dist-info/RECORD +47 -0
- scrapling/fetchers.py +0 -444
- scrapling-0.3.4.dist-info/RECORD +0 -44
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/WHEEL +0 -0
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/entry_points.txt +0 -0
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/licenses/LICENSE +0 -0
- {scrapling-0.3.4.dist-info → scrapling-0.3.6.dist-info}/top_level.txt +0 -0
@@ -16,7 +16,7 @@ from playwright.async_api import (
|
|
16
16
|
)
|
17
17
|
from playwright._impl._errors import Error as PlaywrightError
|
18
18
|
|
19
|
-
from ._validators import
|
19
|
+
from ._validators import validate_fetch as _validate
|
20
20
|
from ._base import SyncSession, AsyncSession, StealthySessionMixin
|
21
21
|
from scrapling.core.utils import log
|
22
22
|
from scrapling.core._types import (
|
@@ -116,7 +116,7 @@ class StealthySession(StealthySessionMixin, SyncSession):
|
|
116
116
|
:param cookies: Set cookies for the next request.
|
117
117
|
:param addons: List of Firefox addons to use. Must be paths to extracted addons.
|
118
118
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
119
|
-
:param solve_cloudflare: Solves all
|
119
|
+
:param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
|
120
120
|
:param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
|
121
121
|
:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
|
122
122
|
:param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
|
@@ -237,26 +237,33 @@ class StealthySession(StealthySessionMixin, SyncSession):
|
|
237
237
|
return
|
238
238
|
|
239
239
|
else:
|
240
|
-
|
241
|
-
|
242
|
-
|
240
|
+
box_selector = "#cf_turnstile div, #cf-turnstile div, .turnstile>div>div"
|
241
|
+
if challenge_type != "embedded":
|
242
|
+
box_selector = ".main-content p+div>div>div"
|
243
|
+
while "Verifying you are human." in self._get_page_content(page):
|
244
|
+
# Waiting for the verify spinner to disappear, checking every 1s if it disappeared
|
245
|
+
page.wait_for_timeout(500)
|
243
246
|
|
244
247
|
iframe = page.frame(url=__CF_PATTERN__)
|
245
248
|
if iframe is None:
|
246
|
-
log.
|
249
|
+
log.error("Didn't find Cloudflare iframe!")
|
247
250
|
return
|
248
251
|
|
249
|
-
|
250
|
-
|
251
|
-
|
252
|
+
if challenge_type != "embedded":
|
253
|
+
while not iframe.frame_element().is_visible():
|
254
|
+
# Double-checking that the iframe is loaded
|
255
|
+
page.wait_for_timeout(500)
|
252
256
|
|
257
|
+
iframe.wait_for_load_state(state="domcontentloaded")
|
258
|
+
iframe.wait_for_load_state("networkidle")
|
253
259
|
# Calculate the Captcha coordinates for any viewport
|
254
|
-
outer_box = page.locator(
|
260
|
+
outer_box = page.locator(box_selector).last.bounding_box()
|
255
261
|
captcha_x, captcha_y = outer_box["x"] + 26, outer_box["y"] + 25
|
256
262
|
|
257
263
|
# Move the mouse to the center of the window, then press and hold the left mouse button
|
258
264
|
page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
|
259
|
-
|
265
|
+
if challenge_type != "embedded":
|
266
|
+
page.locator(".zone-name-title").wait_for(state="hidden")
|
260
267
|
page.wait_for_load_state(state="domcontentloaded")
|
261
268
|
|
262
269
|
log.info("Cloudflare captcha is solved")
|
@@ -293,27 +300,26 @@ class StealthySession(StealthySessionMixin, SyncSession):
|
|
293
300
|
:param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
|
294
301
|
:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
|
295
302
|
:param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
|
296
|
-
:param solve_cloudflare: Solves all
|
303
|
+
:param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
|
297
304
|
:param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
|
298
305
|
:return: A `Response` object.
|
299
306
|
"""
|
300
|
-
|
301
|
-
|
302
|
-
|
303
|
-
|
304
|
-
|
305
|
-
|
306
|
-
|
307
|
-
|
308
|
-
|
309
|
-
|
310
|
-
|
311
|
-
|
312
|
-
|
313
|
-
|
314
|
-
|
315
|
-
|
316
|
-
CamoufoxConfig,
|
307
|
+
params = _validate(
|
308
|
+
[
|
309
|
+
("google_search", google_search, self.google_search),
|
310
|
+
("timeout", timeout, self.timeout),
|
311
|
+
("wait", wait, self.wait),
|
312
|
+
("page_action", page_action, self.page_action),
|
313
|
+
("extra_headers", extra_headers, self.extra_headers),
|
314
|
+
("disable_resources", disable_resources, self.disable_resources),
|
315
|
+
("wait_selector", wait_selector, self.wait_selector),
|
316
|
+
("wait_selector_state", wait_selector_state, self.wait_selector_state),
|
317
|
+
("network_idle", network_idle, self.network_idle),
|
318
|
+
("load_dom", load_dom, self.load_dom),
|
319
|
+
("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
|
320
|
+
("selector_config", selector_config, self.selector_config),
|
321
|
+
],
|
322
|
+
_UNSET,
|
317
323
|
)
|
318
324
|
|
319
325
|
if self._closed: # pragma: no cover
|
@@ -381,8 +387,9 @@ class StealthySession(StealthySessionMixin, SyncSession):
|
|
381
387
|
page_info.page, first_response, final_response, params.selector_config
|
382
388
|
)
|
383
389
|
|
384
|
-
#
|
385
|
-
page_info.
|
390
|
+
# Close the page, to free up resources
|
391
|
+
page_info.page.close()
|
392
|
+
self.page_pool.pages.remove(page_info)
|
386
393
|
|
387
394
|
return response
|
388
395
|
|
@@ -435,7 +442,7 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
|
|
435
442
|
:param cookies: Set cookies for the next request.
|
436
443
|
:param addons: List of Firefox addons to use. Must be paths to extracted addons.
|
437
444
|
:param humanize: Humanize the cursor movement. Takes either True or the MAX duration in seconds of the cursor movement. The cursor typically takes up to 1.5 seconds to move across the window.
|
438
|
-
:param solve_cloudflare: Solves all
|
445
|
+
:param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
|
439
446
|
:param allow_webgl: Enabled by default. Disabling WebGL is not recommended as many WAFs now check if WebGL is enabled.
|
440
447
|
:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
|
441
448
|
:param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
|
@@ -556,26 +563,33 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
|
|
556
563
|
return
|
557
564
|
|
558
565
|
else:
|
559
|
-
|
560
|
-
|
561
|
-
|
566
|
+
box_selector = "#cf_turnstile div, #cf-turnstile div, .turnstile>div>div"
|
567
|
+
if challenge_type != "embedded":
|
568
|
+
box_selector = ".main-content p+div>div>div"
|
569
|
+
while "Verifying you are human." in (await self._get_page_content(page)):
|
570
|
+
# Waiting for the verify spinner to disappear, checking every 1s if it disappeared
|
571
|
+
await page.wait_for_timeout(500)
|
562
572
|
|
563
573
|
iframe = page.frame(url=__CF_PATTERN__)
|
564
574
|
if iframe is None:
|
565
|
-
log.
|
575
|
+
log.error("Didn't find Cloudflare iframe!")
|
566
576
|
return
|
567
577
|
|
568
|
-
|
569
|
-
|
570
|
-
|
578
|
+
if challenge_type != "embedded":
|
579
|
+
while not await (await iframe.frame_element()).is_visible():
|
580
|
+
# Double-checking that the iframe is loaded
|
581
|
+
await page.wait_for_timeout(500)
|
571
582
|
|
583
|
+
await iframe.wait_for_load_state(state="domcontentloaded")
|
584
|
+
await iframe.wait_for_load_state("networkidle")
|
572
585
|
# Calculate the Captcha coordinates for any viewport
|
573
|
-
outer_box = await page.locator(
|
586
|
+
outer_box = await page.locator(box_selector).last.bounding_box()
|
574
587
|
captcha_x, captcha_y = outer_box["x"] + 26, outer_box["y"] + 25
|
575
588
|
|
576
589
|
# Move the mouse to the center of the window, then press and hold the left mouse button
|
577
590
|
await page.mouse.click(captcha_x, captcha_y, delay=60, button="left")
|
578
|
-
|
591
|
+
if challenge_type != "embedded":
|
592
|
+
await page.locator(".zone-name-title").wait_for(state="hidden")
|
579
593
|
await page.wait_for_load_state(state="domcontentloaded")
|
580
594
|
|
581
595
|
log.info("Cloudflare captcha is solved")
|
@@ -612,26 +626,26 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
|
|
612
626
|
:param wait_selector_state: The state to wait for the selector given with `wait_selector`. The default state is `attached`.
|
613
627
|
:param network_idle: Wait for the page until there are no network connections for at least 500 ms.
|
614
628
|
:param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
|
615
|
-
:param solve_cloudflare: Solves all
|
629
|
+
:param solve_cloudflare: Solves all types of the Cloudflare's Turnstile/Interstitial challenges before returning the response to you.
|
616
630
|
:param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
|
617
631
|
:return: A `Response` object.
|
618
632
|
"""
|
619
|
-
params =
|
620
|
-
|
621
|
-
|
622
|
-
|
623
|
-
|
624
|
-
|
625
|
-
|
626
|
-
|
627
|
-
|
628
|
-
|
629
|
-
|
630
|
-
|
631
|
-
|
632
|
-
|
633
|
-
|
634
|
-
|
633
|
+
params = _validate(
|
634
|
+
[
|
635
|
+
("google_search", google_search, self.google_search),
|
636
|
+
("timeout", timeout, self.timeout),
|
637
|
+
("wait", wait, self.wait),
|
638
|
+
("page_action", page_action, self.page_action),
|
639
|
+
("extra_headers", extra_headers, self.extra_headers),
|
640
|
+
("disable_resources", disable_resources, self.disable_resources),
|
641
|
+
("wait_selector", wait_selector, self.wait_selector),
|
642
|
+
("wait_selector_state", wait_selector_state, self.wait_selector_state),
|
643
|
+
("network_idle", network_idle, self.network_idle),
|
644
|
+
("load_dom", load_dom, self.load_dom),
|
645
|
+
("solve_cloudflare", solve_cloudflare, self.solve_cloudflare),
|
646
|
+
("selector_config", selector_config, self.selector_config),
|
647
|
+
],
|
648
|
+
_UNSET,
|
635
649
|
)
|
636
650
|
|
637
651
|
if self._closed: # pragma: no cover
|
@@ -701,8 +715,9 @@ class AsyncStealthySession(StealthySessionMixin, AsyncSession):
|
|
701
715
|
page_info.page, first_response, final_response, params.selector_config
|
702
716
|
)
|
703
717
|
|
704
|
-
#
|
705
|
-
page_info.
|
718
|
+
# Close the page, to free up resources
|
719
|
+
await page_info.page.close()
|
720
|
+
self.page_pool.pages.remove(page_info)
|
706
721
|
|
707
722
|
return response
|
708
723
|
|
@@ -11,14 +11,12 @@ from playwright.async_api import (
|
|
11
11
|
Playwright as AsyncPlaywright,
|
12
12
|
Locator as AsyncLocator,
|
13
13
|
)
|
14
|
-
from
|
15
|
-
from
|
16
|
-
async_playwright as async_rebrowser_playwright,
|
17
|
-
)
|
14
|
+
from patchright.sync_api import sync_playwright as sync_patchright
|
15
|
+
from patchright.async_api import async_playwright as async_patchright
|
18
16
|
|
19
17
|
from scrapling.core.utils import log
|
20
18
|
from ._base import SyncSession, AsyncSession, DynamicSessionMixin
|
21
|
-
from ._validators import
|
19
|
+
from ._validators import validate_fetch as _validate
|
22
20
|
from scrapling.core._types import (
|
23
21
|
Dict,
|
24
22
|
List,
|
@@ -119,7 +117,7 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
|
|
119
117
|
:param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
|
120
118
|
:param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
|
121
119
|
:param load_dom: Enabled by default, wait for all JavaScript on page(s) to fully load and execute.
|
122
|
-
:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers
|
120
|
+
:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
|
123
121
|
:param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
|
124
122
|
:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
|
125
123
|
:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
|
@@ -154,10 +152,7 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
|
|
154
152
|
|
155
153
|
def __create__(self):
|
156
154
|
"""Create a browser for this instance and context."""
|
157
|
-
sync_context =
|
158
|
-
if not self.stealth or self.real_chrome:
|
159
|
-
# Because rebrowser_playwright doesn't play well with real browsers
|
160
|
-
sync_context = sync_playwright
|
155
|
+
sync_context = sync_patchright if self.stealth else sync_playwright
|
161
156
|
|
162
157
|
self.playwright: Playwright = sync_context().start()
|
163
158
|
|
@@ -229,22 +224,21 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
|
|
229
224
|
:param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
|
230
225
|
:return: A `Response` object.
|
231
226
|
"""
|
232
|
-
|
233
|
-
|
234
|
-
|
235
|
-
|
236
|
-
|
237
|
-
|
238
|
-
|
239
|
-
|
240
|
-
|
241
|
-
|
242
|
-
|
243
|
-
|
244
|
-
|
245
|
-
|
246
|
-
|
247
|
-
PlaywrightConfig,
|
227
|
+
params = _validate(
|
228
|
+
[
|
229
|
+
("google_search", google_search, self.google_search),
|
230
|
+
("timeout", timeout, self.timeout),
|
231
|
+
("wait", wait, self.wait),
|
232
|
+
("page_action", page_action, self.page_action),
|
233
|
+
("extra_headers", extra_headers, self.extra_headers),
|
234
|
+
("disable_resources", disable_resources, self.disable_resources),
|
235
|
+
("wait_selector", wait_selector, self.wait_selector),
|
236
|
+
("wait_selector_state", wait_selector_state, self.wait_selector_state),
|
237
|
+
("network_idle", network_idle, self.network_idle),
|
238
|
+
("load_dom", load_dom, self.load_dom),
|
239
|
+
("selector_config", selector_config, self.selector_config),
|
240
|
+
],
|
241
|
+
_UNSET,
|
248
242
|
)
|
249
243
|
|
250
244
|
if self._closed: # pragma: no cover
|
@@ -305,8 +299,9 @@ class DynamicSession(DynamicSessionMixin, SyncSession):
|
|
305
299
|
page_info.page, first_response, final_response, params.selector_config
|
306
300
|
)
|
307
301
|
|
308
|
-
#
|
309
|
-
page_info.
|
302
|
+
# Close the page, to free up resources
|
303
|
+
page_info.page.close()
|
304
|
+
self.page_pool.pages.remove(page_info)
|
310
305
|
|
311
306
|
return response
|
312
307
|
|
@@ -365,7 +360,7 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
|
|
365
360
|
:param real_chrome: If you have a Chrome browser installed on your device, enable this, and the Fetcher will launch an instance of your browser and use it.
|
366
361
|
:param hide_canvas: Add random noise to canvas operations to prevent fingerprinting.
|
367
362
|
:param disable_webgl: Disables WebGL and WebGL 2.0 support entirely.
|
368
|
-
:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers
|
363
|
+
:param cdp_url: Instead of launching a new browser instance, connect to this CDP URL to control real browsers through CDP.
|
369
364
|
:param google_search: Enabled by default, Scrapling will set the referer header to be as if this request came from a Google search of this website's domain name.
|
370
365
|
:param extra_headers: A dictionary of extra headers to add to the request. _The referer set by the `google_search` argument takes priority over the referer set here if used together._
|
371
366
|
:param proxy: The proxy to be used with requests, it can be a string or a dictionary with the keys 'server', 'username', and 'password' only.
|
@@ -402,10 +397,7 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
|
|
402
397
|
|
403
398
|
async def __create__(self):
|
404
399
|
"""Create a browser for this instance and context."""
|
405
|
-
async_context =
|
406
|
-
if not self.stealth or self.real_chrome:
|
407
|
-
# Because rebrowser_playwright doesn't play well with real browsers
|
408
|
-
async_context = async_playwright
|
400
|
+
async_context = async_patchright if self.stealth else async_playwright
|
409
401
|
|
410
402
|
self.playwright: AsyncPlaywright = await async_context().start()
|
411
403
|
|
@@ -478,22 +470,21 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
|
|
478
470
|
:param selector_config: The arguments that will be passed in the end while creating the final Selector's class.
|
479
471
|
:return: A `Response` object.
|
480
472
|
"""
|
481
|
-
|
482
|
-
|
483
|
-
|
484
|
-
|
485
|
-
|
486
|
-
|
487
|
-
|
488
|
-
|
489
|
-
|
490
|
-
|
491
|
-
|
492
|
-
|
493
|
-
|
494
|
-
|
495
|
-
|
496
|
-
PlaywrightConfig,
|
473
|
+
params = _validate(
|
474
|
+
[
|
475
|
+
("google_search", google_search, self.google_search),
|
476
|
+
("timeout", timeout, self.timeout),
|
477
|
+
("wait", wait, self.wait),
|
478
|
+
("page_action", page_action, self.page_action),
|
479
|
+
("extra_headers", extra_headers, self.extra_headers),
|
480
|
+
("disable_resources", disable_resources, self.disable_resources),
|
481
|
+
("wait_selector", wait_selector, self.wait_selector),
|
482
|
+
("wait_selector_state", wait_selector_state, self.wait_selector_state),
|
483
|
+
("network_idle", network_idle, self.network_idle),
|
484
|
+
("load_dom", load_dom, self.load_dom),
|
485
|
+
("selector_config", selector_config, self.selector_config),
|
486
|
+
],
|
487
|
+
_UNSET,
|
497
488
|
)
|
498
489
|
|
499
490
|
if self._closed: # pragma: no cover
|
@@ -554,9 +545,9 @@ class AsyncDynamicSession(DynamicSessionMixin, AsyncSession):
|
|
554
545
|
page_info.page, first_response, final_response, params.selector_config
|
555
546
|
)
|
556
547
|
|
557
|
-
#
|
558
|
-
page_info.
|
559
|
-
|
548
|
+
# Close the page, to free up resources
|
549
|
+
await page_info.page.close()
|
550
|
+
self.page_pool.pages.remove(page_info)
|
560
551
|
return response
|
561
552
|
|
562
553
|
except Exception as e: # pragma: no cover
|
@@ -6,7 +6,7 @@ from playwright.async_api import Page as AsyncPage
|
|
6
6
|
|
7
7
|
from scrapling.core._types import Optional, List, Literal
|
8
8
|
|
9
|
-
PageState = Literal["
|
9
|
+
PageState = Literal["ready", "busy", "error"] # States that a page can be in
|
10
10
|
|
11
11
|
|
12
12
|
@dataclass
|
@@ -23,11 +23,6 @@ class PageInfo:
|
|
23
23
|
self.state = "busy"
|
24
24
|
self.url = url
|
25
25
|
|
26
|
-
def mark_finished(self):
|
27
|
-
"""Mark the page as finished for new requests"""
|
28
|
-
self.state = "finished"
|
29
|
-
self.url = ""
|
30
|
-
|
31
26
|
def mark_error(self):
|
32
27
|
"""Mark the page as having an error"""
|
33
28
|
self.state = "error"
|
@@ -67,12 +62,6 @@ class PagePool:
|
|
67
62
|
"""Get the total number of pages"""
|
68
63
|
return len(self.pages)
|
69
64
|
|
70
|
-
@property
|
71
|
-
def finished_count(self) -> int:
|
72
|
-
"""Get the number of finished pages"""
|
73
|
-
with self._lock:
|
74
|
-
return sum(1 for p in self.pages if p.state == "finished")
|
75
|
-
|
76
65
|
@property
|
77
66
|
def busy_count(self) -> int:
|
78
67
|
"""Get the number of busy pages"""
|
@@ -83,33 +72,3 @@ class PagePool:
|
|
83
72
|
"""Remove pages in error state"""
|
84
73
|
with self._lock:
|
85
74
|
self.pages = [p for p in self.pages if p.state != "error"]
|
86
|
-
|
87
|
-
def close_all_finished_pages(self):
|
88
|
-
"""Close all pages in finished state and remove them from the pool"""
|
89
|
-
with self._lock:
|
90
|
-
pages_to_remove = []
|
91
|
-
for page_info in self.pages:
|
92
|
-
if page_info.state == "finished":
|
93
|
-
try:
|
94
|
-
page_info.page.close()
|
95
|
-
except Exception:
|
96
|
-
pass
|
97
|
-
pages_to_remove.append(page_info)
|
98
|
-
|
99
|
-
for page_info in pages_to_remove:
|
100
|
-
self.pages.remove(page_info)
|
101
|
-
|
102
|
-
async def aclose_all_finished_pages(self):
|
103
|
-
"""Async version: Close all pages in finished state and remove them from the pool"""
|
104
|
-
with self._lock:
|
105
|
-
pages_to_remove = []
|
106
|
-
for page_info in self.pages:
|
107
|
-
if page_info.state == "finished":
|
108
|
-
try:
|
109
|
-
await page_info.page.close()
|
110
|
-
except Exception:
|
111
|
-
pass
|
112
|
-
pages_to_remove.append(page_info)
|
113
|
-
|
114
|
-
for page_info in pages_to_remove:
|
115
|
-
self.pages.remove(page_info)
|