crawlee 1.0.1b5__py3-none-any.whl → 1.0.1b6__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
- crawlee/browsers/_playwright_browser_controller.py +20 -14
- {crawlee-1.0.1b5.dist-info → crawlee-1.0.1b6.dist-info}/METADATA +1 -1
- {crawlee-1.0.1b5.dist-info → crawlee-1.0.1b6.dist-info}/RECORD +6 -6
- {crawlee-1.0.1b5.dist-info → crawlee-1.0.1b6.dist-info}/WHEEL +0 -0
- {crawlee-1.0.1b5.dist-info → crawlee-1.0.1b6.dist-info}/entry_points.txt +0 -0
- {crawlee-1.0.1b5.dist-info → crawlee-1.0.1b6.dist-info}/licenses/LICENSE +0 -0
|
@@ -2,6 +2,7 @@
|
|
|
2
2
|
|
|
3
3
|
from __future__ import annotations
|
|
4
4
|
|
|
5
|
+
from asyncio import Lock
|
|
5
6
|
from datetime import datetime, timedelta, timezone
|
|
6
7
|
from typing import TYPE_CHECKING, Any, cast
|
|
7
8
|
|
|
@@ -77,6 +78,19 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
77
78
|
|
|
78
79
|
self._total_opened_pages = 0
|
|
79
80
|
|
|
81
|
+
self._context_creation_lock: Lock | None = None
|
|
82
|
+
|
|
83
|
+
async def _get_context_creation_lock(self) -> Lock:
|
|
84
|
+
"""Get context checking and creation lock.
|
|
85
|
+
|
|
86
|
+
It should be done with lock to prevent multiple concurrent attempts to create context, which could lead to
|
|
87
|
+
memory leak as one of the two concurrently created contexts will become orphaned and not properly closed.
|
|
88
|
+
"""
|
|
89
|
+
if self._context_creation_lock:
|
|
90
|
+
return self._context_creation_lock
|
|
91
|
+
self._context_creation_lock = Lock()
|
|
92
|
+
return self._context_creation_lock
|
|
93
|
+
|
|
80
94
|
@property
|
|
81
95
|
@override
|
|
82
96
|
def pages(self) -> list[Page]:
|
|
@@ -137,12 +151,6 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
137
151
|
Raises:
|
|
138
152
|
ValueError: If the browser has reached the maximum number of open pages.
|
|
139
153
|
"""
|
|
140
|
-
if not self._browser_context:
|
|
141
|
-
self._browser_context = await self._create_browser_context(
|
|
142
|
-
browser_new_context_options=browser_new_context_options,
|
|
143
|
-
proxy_info=proxy_info,
|
|
144
|
-
)
|
|
145
|
-
|
|
146
154
|
if not self.has_free_capacity:
|
|
147
155
|
raise ValueError('Cannot open more pages in this browser.')
|
|
148
156
|
|
|
@@ -154,11 +162,12 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
154
162
|
)
|
|
155
163
|
page = await new_context.new_page()
|
|
156
164
|
else:
|
|
157
|
-
|
|
158
|
-
|
|
159
|
-
|
|
160
|
-
|
|
161
|
-
|
|
165
|
+
async with await self._get_context_creation_lock():
|
|
166
|
+
if not self._browser_context:
|
|
167
|
+
self._browser_context = await self._create_browser_context(
|
|
168
|
+
browser_new_context_options=browser_new_context_options,
|
|
169
|
+
proxy_info=proxy_info,
|
|
170
|
+
)
|
|
162
171
|
page = await self._browser_context.new_page()
|
|
163
172
|
|
|
164
173
|
# Handle page close event
|
|
@@ -169,7 +178,6 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
169
178
|
self._last_page_opened_at = datetime.now(timezone.utc)
|
|
170
179
|
|
|
171
180
|
self._total_opened_pages += 1
|
|
172
|
-
|
|
173
181
|
return page
|
|
174
182
|
|
|
175
183
|
@override
|
|
@@ -206,7 +214,6 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
206
214
|
`self._fingerprint_generator` is available.
|
|
207
215
|
"""
|
|
208
216
|
browser_new_context_options = dict(browser_new_context_options) if browser_new_context_options else {}
|
|
209
|
-
|
|
210
217
|
if proxy_info:
|
|
211
218
|
if browser_new_context_options.get('proxy'):
|
|
212
219
|
logger.warning("browser_new_context_options['proxy'] overriden by explicit `proxy_info` argument.")
|
|
@@ -244,5 +251,4 @@ class PlaywrightBrowserController(BrowserController):
|
|
|
244
251
|
browser_new_context_options['extra_http_headers'] = browser_new_context_options.get(
|
|
245
252
|
'extra_http_headers', extra_http_headers
|
|
246
253
|
)
|
|
247
|
-
|
|
248
254
|
return await self._browser.new_context(**browser_new_context_options)
|
|
@@ -45,7 +45,7 @@ crawlee/browsers/_browser_controller.py,sha256=-g0pB5Nx5q67eMZVka49x-HMfQqJYoI6k
|
|
|
45
45
|
crawlee/browsers/_browser_plugin.py,sha256=Wuojop___8ZO9eDoMs4JFmwMAFe5mZaTl0-Vz1PjkD8,3057
|
|
46
46
|
crawlee/browsers/_browser_pool.py,sha256=2pT4m_g0DfopjTHYXb-piN6GqxvkayOeb4gmOtn1QNM,15634
|
|
47
47
|
crawlee/browsers/_playwright_browser.py,sha256=1yXD6cXuLefZZGUG1m0CT38xXYSwIC7n95bJBdMOxbo,3820
|
|
48
|
-
crawlee/browsers/_playwright_browser_controller.py,sha256=
|
|
48
|
+
crawlee/browsers/_playwright_browser_controller.py,sha256=YaY19slRj8gIKrZy0M8rzF_zy2Z1Ym6d0S_vXcMX108,10215
|
|
49
49
|
crawlee/browsers/_playwright_browser_plugin.py,sha256=axZa_yZNCPHyM3Ijx9jW4CzzRXQTVzYAswcGAZHP3Hk,8106
|
|
50
50
|
crawlee/browsers/_types.py,sha256=eWgpoLMWu103hMQQTObkA01sVc_7hdPESl-TCyDMMV0,426
|
|
51
51
|
crawlee/browsers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -186,8 +186,8 @@ crawlee/storages/_key_value_store.py,sha256=3oI5hVoM_NpTQVKXCbQCmb0sZhW7vN2oXQo-
|
|
|
186
186
|
crawlee/storages/_request_queue.py,sha256=jt-d-NkI9lAorLssoI2r_lZjeEipe-5Cn6z9bfQqY3k,13154
|
|
187
187
|
crawlee/storages/_storage_instance_manager.py,sha256=iFX3ymsIXyTg8tMHtx5Wn9XyaC77dIf15GpuggsJPDM,7821
|
|
188
188
|
crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
189
|
-
crawlee-1.0.
|
|
190
|
-
crawlee-1.0.
|
|
191
|
-
crawlee-1.0.
|
|
192
|
-
crawlee-1.0.
|
|
193
|
-
crawlee-1.0.
|
|
189
|
+
crawlee-1.0.1b6.dist-info/METADATA,sha256=cjGXdw9ooazFDIzQ_Yj5wJon0rBiEqUtGJ-_AecYKjs,29314
|
|
190
|
+
crawlee-1.0.1b6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
191
|
+
crawlee-1.0.1b6.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
|
|
192
|
+
crawlee-1.0.1b6.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
193
|
+
crawlee-1.0.1b6.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|
|
File without changes
|