crawlee 1.0.0__py3-none-any.whl → 1.0.1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
@@ -2,6 +2,7 @@
2
2
 
3
3
  from __future__ import annotations
4
4
 
5
+ from asyncio import Lock
5
6
  from datetime import datetime, timedelta, timezone
6
7
  from typing import TYPE_CHECKING, Any, cast
7
8
 
@@ -77,6 +78,19 @@ class PlaywrightBrowserController(BrowserController):
77
78
 
78
79
  self._total_opened_pages = 0
79
80
 
81
+ self._context_creation_lock: Lock | None = None
82
+
83
+ async def _get_context_creation_lock(self) -> Lock:
84
+ """Get context checking and creation lock.
85
+
86
+ It should be done with lock to prevent multiple concurrent attempts to create context, which could lead to
87
+ memory leak as one of the two concurrently created contexts will become orphaned and not properly closed.
88
+ """
89
+ if self._context_creation_lock:
90
+ return self._context_creation_lock
91
+ self._context_creation_lock = Lock()
92
+ return self._context_creation_lock
93
+
80
94
  @property
81
95
  @override
82
96
  def pages(self) -> list[Page]:
@@ -137,12 +151,6 @@ class PlaywrightBrowserController(BrowserController):
137
151
  Raises:
138
152
  ValueError: If the browser has reached the maximum number of open pages.
139
153
  """
140
- if not self._browser_context:
141
- self._browser_context = await self._create_browser_context(
142
- browser_new_context_options=browser_new_context_options,
143
- proxy_info=proxy_info,
144
- )
145
-
146
154
  if not self.has_free_capacity:
147
155
  raise ValueError('Cannot open more pages in this browser.')
148
156
 
@@ -154,11 +162,12 @@ class PlaywrightBrowserController(BrowserController):
154
162
  )
155
163
  page = await new_context.new_page()
156
164
  else:
157
- if not self._browser_context:
158
- self._browser_context = await self._create_browser_context(
159
- browser_new_context_options=browser_new_context_options,
160
- proxy_info=proxy_info,
161
- )
165
+ async with await self._get_context_creation_lock():
166
+ if not self._browser_context:
167
+ self._browser_context = await self._create_browser_context(
168
+ browser_new_context_options=browser_new_context_options,
169
+ proxy_info=proxy_info,
170
+ )
162
171
  page = await self._browser_context.new_page()
163
172
 
164
173
  # Handle page close event
@@ -169,7 +178,6 @@ class PlaywrightBrowserController(BrowserController):
169
178
  self._last_page_opened_at = datetime.now(timezone.utc)
170
179
 
171
180
  self._total_opened_pages += 1
172
-
173
181
  return page
174
182
 
175
183
  @override
@@ -206,7 +214,6 @@ class PlaywrightBrowserController(BrowserController):
206
214
  `self._fingerprint_generator` is available.
207
215
  """
208
216
  browser_new_context_options = dict(browser_new_context_options) if browser_new_context_options else {}
209
-
210
217
  if proxy_info:
211
218
  if browser_new_context_options.get('proxy'):
212
219
  logger.warning("browser_new_context_options['proxy'] overriden by explicit `proxy_info` argument.")
@@ -244,5 +251,4 @@ class PlaywrightBrowserController(BrowserController):
244
251
  browser_new_context_options['extra_http_headers'] = browser_new_context_options.get(
245
252
  'extra_http_headers', extra_http_headers
246
253
  )
247
-
248
254
  return await self._browser.new_context(**browser_new_context_options)
@@ -5,8 +5,8 @@
5
5
  # % endif
6
6
  # % if cookiecutter.http_client == 'curl-impersonate'
7
7
  # % do extras.append('curl-impersonate')
8
- # % elif cookiecutter.http_client == 'impit'
9
- # % do extras.append('impit')
8
+ # % elif cookiecutter.http_client == 'httpx'
9
+ # % do extras.append('httpx')
10
10
  # % endif
11
11
 
12
12
  [project]
@@ -10,4 +10,7 @@ apify
10
10
  # % if cookiecutter.http_client == 'curl-impersonate'
11
11
  # % do extras.append('curl-impersonate')
12
12
  # % endif
13
+ # % if cookiecutter.http_client == 'httpx'
14
+ # % do extras.append('httpx')
15
+ # % endif
13
16
  crawlee[{{ extras | join(',') }}]
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.0.0
3
+ Version: 1.0.1
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -336,8 +336,6 @@ Description-Content-Type: text/markdown
336
336
 
337
337
  Crawlee covers your crawling and scraping end-to-end and **helps you build reliable scrapers. Fast.**
338
338
 
339
- > 🚀 Crawlee for Python is open to early adopters!
340
-
341
339
  Your crawlers will appear almost human-like and fly under the radar of modern bot protections even with the default configuration. Crawlee gives you the tools to crawl the web for links, scrape data and persistently store it in machine-readable formats, without having to worry about the technical details. And thanks to rich configuration options, you can tweak almost any aspect of Crawlee to suit your project's needs if the default settings don't cut it.
342
340
 
343
341
  > 👉 **View full documentation, guides and examples on the [Crawlee project website](https://crawlee.dev/python/)** 👈
@@ -45,7 +45,7 @@ crawlee/browsers/_browser_controller.py,sha256=-g0pB5Nx5q67eMZVka49x-HMfQqJYoI6k
45
45
  crawlee/browsers/_browser_plugin.py,sha256=Wuojop___8ZO9eDoMs4JFmwMAFe5mZaTl0-Vz1PjkD8,3057
46
46
  crawlee/browsers/_browser_pool.py,sha256=2pT4m_g0DfopjTHYXb-piN6GqxvkayOeb4gmOtn1QNM,15634
47
47
  crawlee/browsers/_playwright_browser.py,sha256=1yXD6cXuLefZZGUG1m0CT38xXYSwIC7n95bJBdMOxbo,3820
48
- crawlee/browsers/_playwright_browser_controller.py,sha256=IJ6UGD7n99WmyvDCc4RbjURfdAay_R9ymqPbcI2f4Q8,9764
48
+ crawlee/browsers/_playwright_browser_controller.py,sha256=YaY19slRj8gIKrZy0M8rzF_zy2Z1Ym6d0S_vXcMX108,10215
49
49
  crawlee/browsers/_playwright_browser_plugin.py,sha256=axZa_yZNCPHyM3Ijx9jW4CzzRXQTVzYAswcGAZHP3Hk,8106
50
50
  crawlee/browsers/_types.py,sha256=eWgpoLMWu103hMQQTObkA01sVc_7hdPESl-TCyDMMV0,426
51
51
  crawlee/browsers/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -126,8 +126,8 @@ crawlee/project_template/templates/routes_playwright_camoufox.py,sha256=XtXWbPZ4
126
126
  crawlee/project_template/{{cookiecutter.project_name}}/.dockerignore,sha256=PCDXvENlrMmYleuQULduBiw8ipXmE_iYJtCmeZVuz1I,6
127
127
  crawlee/project_template/{{cookiecutter.project_name}}/Dockerfile,sha256=NRSdHgEnCjBWE0lU3y-qHNEUJg_OH3zhoo0fPzkIs58,4138
128
128
  crawlee/project_template/{{cookiecutter.project_name}}/README.md,sha256=kEwhjWKqnSbg3gtGsuEiqWFGoqMdf4W7TZ0Lu0omwHk,1753
129
- crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml,sha256=E89kToOVOEclt5OB8YOg7fJM5o4TAEYlwz3-O1kpPi4,970
130
- crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt,sha256=xOeHqX5Bi2Iw1nkmBqbawX5A0qHo4CjQLn6JB-BNeUw,377
129
+ crawlee/project_template/{{cookiecutter.project_name}}/pyproject.toml,sha256=rfzarAQB8H93clog9xnqVThCIR7ltKqasMHX0-9PIMw,970
130
+ crawlee/project_template/{{cookiecutter.project_name}}/requirements.txt,sha256=HTiM50HxLguBgqKKLFR9DjsfrWgo13n8cAAJ9xhEmw8,460
131
131
  crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__init__.py,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
132
132
  crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/__main__.py,sha256=7YQVjE3HdCnoU055kLcKUcqXvbp3C2rtAY2TSJaItts,867
133
133
  crawlee/project_template/{{cookiecutter.project_name}}/{{cookiecutter.__package_name}}/main.py,sha256=mb3Wo_FpXGdIzWABJ0Y6CE-eXKxqSM_k__tnYYM1rF4,55
@@ -186,8 +186,8 @@ crawlee/storages/_key_value_store.py,sha256=3oI5hVoM_NpTQVKXCbQCmb0sZhW7vN2oXQo-
186
186
  crawlee/storages/_request_queue.py,sha256=jt-d-NkI9lAorLssoI2r_lZjeEipe-5Cn6z9bfQqY3k,13154
187
187
  crawlee/storages/_storage_instance_manager.py,sha256=iFX3ymsIXyTg8tMHtx5Wn9XyaC77dIf15GpuggsJPDM,7821
188
188
  crawlee/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
189
- crawlee-1.0.0.dist-info/METADATA,sha256=w6JIDE37hFDroKUw6Kc5KjhH93KLk4jK3I1aeHwrHBI,29366
190
- crawlee-1.0.0.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
191
- crawlee-1.0.0.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
192
- crawlee-1.0.0.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
193
- crawlee-1.0.0.dist-info/RECORD,,
189
+ crawlee-1.0.1.dist-info/METADATA,sha256=TUu48Ck-aNjxG2Kyn7Dampj6RZ_rGWV0MZcIUSf7rf0,29312
190
+ crawlee-1.0.1.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
191
+ crawlee-1.0.1.dist-info/entry_points.txt,sha256=1p65X3dA-cYvzjtlxLL6Kn1wpY-3uEDVqJLp53uNPeo,45
192
+ crawlee-1.0.1.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
193
+ crawlee-1.0.1.dist-info/RECORD,,