crawlee 1.0.1b5__py3-none-any.whl → 1.0.1b6__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from asyncio import Lock
5
6
  from datetime import datetime, timedelta, timezone
6
7
  from typing import TYPE_CHECKING, Any, cast
7
8
 
@@ -77,6 +78,19 @@ class PlaywrightBrowserController(BrowserController):
77
78
 
78
79
  self._total_opened_pages = 0
79
80
 
81
+ self._context_creation_lock: Lock | None = None
82
+
83
+ async def _get_context_creation_lock(self) -> Lock:
84
+ """Get context checking and creation lock.
85
+
86
+ It should be done with lock to prevent multiple concurrent attempts to create context, which could lead to
87
+ memory leak as one of the two concurrently created contexts will become orphaned and not properly closed.
88
+ """
89
+ if self._context_creation_lock:
90
+ return self._context_creation_lock
91
+ self._context_creation_lock = Lock()
92
+ return self._context_creation_lock
93
+
80
94
  @property
81
95
  @override
82
96
  def pages(self) -> list[Page]:
@@ -137,12 +151,6 @@ class PlaywrightBrowserController(BrowserController):
137
151
  Raises:
138
152
  ValueError: If the browser has reached the maximum number of open pages.
139
153
  """
140
- if not self._browser_context:
141
- self._browser_context = await self._create_browser_context(
142
- browser_new_context_options=browser_new_context_options,
143
- proxy_info=proxy_info,
144
- )
145
-
146
154
  if not self.has_free_capacity:
147
155
  raise ValueError('Cannot open more pages in this browser.')
148
156
 
@@ -154,11 +162,12 @@ class PlaywrightBrowserController(BrowserController):
154
162
  )
155
163
  page = await new_context.new_page()
156
164
  else:
157
- if not self._browser_context:
158
- self._browser_context = await self._create_browser_context(
159
- browser_new_context_options=browser_new_context_options,
160
- proxy_info=proxy_info,
161
- )
165
+ async with await self._get_context_creation_lock():
166
+ if not self._browser_context:
167
+ self._browser_context = await self._create_browser_context(
168
+ browser_new_context_options=browser_new_context_options,
169
+ proxy_info=proxy_info,
170
+ )
162
171
  page = await self._browser_context.new_page()
163
172
 
164
173
  # Handle page close event
@@ -169,7 +178,6 @@ class PlaywrightBrowserController(BrowserController):
169
178
  self._last_page_opened_at = datetime.now(timezone.utc)
170
179
 
171
180
  self._total_opened_pages += 1
172
-
173
181
  return page
174
182
 
175
183
  @override
@@ -206,7 +214,6 @@ class PlaywrightBrowserController(BrowserController):
206
214
  `self._fingerprint_generator` is available.
207
215
  """
208
216
  browser_new_context_options = dict(browser_new_context_options) if browser_new_context_options else {}
209
-
210
217
  if proxy_info:
211
218
  if browser_new_context_options.get('proxy'):
212
219
  logger.warning("browser_new_context_options['proxy'] overriden by explicit `proxy_info` argument.")
@@ -244,5 +251,4 @@ class PlaywrightBrowserController(BrowserController):
244
251
  browser_new_context_options['extra_http_headers'] = browser_new_context_options.get(
245
252
  'extra_http_headers', extra_http_headers
246
253
  )
247
-
248
254
  return await self._browser.new_context(**browser_new_context_options)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.1b5
3
+ Version: 1.0.1b6
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -45,7 +45,7 @@ crawlee/browsers/_browser_controller.py,sha256=-g0pB5Nx5q67eMZVka49x-HMfQqJYoI6k
45
45
  crawlee/browsers/_browser_plugin.py,sha256=Wuojop___8ZO9eDoMs4JFmwMAFe5mZaTl0-Vz1PjkD8,3057
46
46
  crawlee/browsers/_browser_pool.py,sha256=2pT4m_g0DfopjTHYXb-piN6GqxvkayOeb4gmOtn1QNM,15634
47
47
  crawlee/browsers/_playwright_browser.py,sha256=1yXD6cXuLefZZGUG1m0CT38xXYSwIC7n95bJBdMOxbo,3820
48
- crawlee/browsers/_playwright_browser_controller.py,sha256=IJ6UGD7n99WmyvDCc4RbjURfdAay_R9ymqPbcI2f4Q8,9764
48
+ crawlee/browsers/_playwright_browser_controller.py,sha256=YaY19slRj8gIKrZy0M8rzF_zy2Z1Ym6d0S_vXcMX108,10215
49
49
  crawlee/browsers/_playwright_browser_plugin.py,sha256=axZa_yZNCPHyM3Ijx9jW4CzzRXQTVzYAswcGAZHP3Hk,8106
50
50
  crawlee/browsers/_types.py,sha256=eWgpoLMWu103hMQQTObkA01sVc_7hdPESl-TCyDMMV0,426
51
51
  crawlee/browsers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -186,8 +186,8 @@ crawlee/storages/_key_value_store.py,sha256=3oI5hVoM_NpTQVKXCbQCmb0sZhW7vN2oXQo-
186
186
  crawlee/storages/_request_queue.py,sha256=jt-d-NkI9lAorLssoI2r_lZjeEipe-5Cn6z9bfQqY3k,13154
187
187
  crawlee/storages/_storage_instance_manager.py,sha256=iFX3ymsIXyTg8tMHtx5Wn9XyaC77dIf15GpuggsJPDM,7821
188
188
  crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- crawlee-1.0.1b5.dist-info/METADATA,sha256=HbXaga93niEC3vxL0zIv9dvMafyGFSqawpT2lOLoTr4,29314
190
- crawlee-1.0.1b5.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
191
- crawlee-1.0.1b5.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
192
- crawlee-1.0.1b5.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
193
- crawlee-1.0.1b5.dist-info/RECORD,,
189
+ crawlee-1.0.1b6.dist-info/METADATA,sha256=cjGXdw9ooazFDIzQ_Yj5wJon0rBiEqUtGJ-_AecYKjs,29314
190
+ crawlee-1.0.1b6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
191
+ crawlee-1.0.1b6.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
192
+ crawlee-1.0.1b6.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
193
+ crawlee-1.0.1b6.dist-info/RECORD,,