crawlee 1.1.1b1__py3-none-any.whl → 1.1.2b4__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of crawlee might be problematic. Click here for more details.

@@ -3,19 +3,25 @@ from __future__ import annotations
3
3
  import asyncio
4
4
  import logging
5
5
  import warnings
6
+ from datetime import timedelta
6
7
  from functools import partial
7
8
  from typing import TYPE_CHECKING, Any, Generic, Literal
8
9
 
10
+ import playwright.async_api
9
11
  from more_itertools import partition
10
12
  from pydantic import ValidationError
11
13
  from typing_extensions import NotRequired, TypedDict, TypeVar
12
14
 
13
15
  from crawlee import service_locator
14
16
  from crawlee._request import Request, RequestOptions
15
- from crawlee._types import ConcurrencySettings
17
+ from crawlee._types import (
18
+ BasicCrawlingContext,
19
+ ConcurrencySettings,
20
+ )
16
21
  from crawlee._utils.blocked import RETRY_CSS_SELECTORS
17
22
  from crawlee._utils.docs import docs_group
18
23
  from crawlee._utils.robots import RobotsTxtFile
24
+ from crawlee._utils.time import SharedTimeout
19
25
  from crawlee._utils.urls import to_absolute_url_iterator
20
26
  from crawlee.browsers import BrowserPool
21
27
  from crawlee.crawlers._basic import BasicCrawler, BasicCrawlerOptions, ContextPipeline
@@ -44,7 +50,6 @@ if TYPE_CHECKING:
44
50
 
45
51
  from crawlee import RequestTransformAction
46
52
  from crawlee._types import (
47
- BasicCrawlingContext,
48
53
  EnqueueLinksKwargs,
49
54
  ExtractLinksFunction,
50
55
  HttpHeaders,
@@ -106,6 +111,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
106
111
  fingerprint_generator: FingerprintGenerator | None | Literal['default'] = 'default',
107
112
  headless: bool | None = None,
108
113
  use_incognito_pages: bool | None = None,
114
+ navigation_timeout: timedelta | None = None,
109
115
  **kwargs: Unpack[BasicCrawlerOptions[PlaywrightCrawlingContext, StatisticsState]],
110
116
  ) -> None:
111
117
  """Initialize a new instance.
@@ -134,12 +140,16 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
134
140
  use_incognito_pages: By default pages share the same browser context. If set to True each page uses its
135
141
  own context that is destroyed once the page is closed or crashes.
136
142
  This option should not be used if `browser_pool` is provided.
143
+ navigation_timeout: Timeout for navigation (the process between opening a Playwright page and calling
144
+ the request handler)
137
145
  kwargs: Additional keyword arguments to pass to the underlying `BasicCrawler`.
138
146
  """
139
147
  configuration = kwargs.pop('configuration', None)
140
148
  if configuration is not None:
141
149
  service_locator.set_configuration(configuration)
142
150
 
151
+ self._shared_navigation_timeouts: dict[int, SharedTimeout] = {}
152
+
143
153
  if browser_pool:
144
154
  # Raise an exception if browser_pool is provided together with other browser-related arguments.
145
155
  if any(
@@ -202,6 +212,8 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
202
212
  if 'concurrency_settings' not in kwargs or kwargs['concurrency_settings'] is None:
203
213
  kwargs['concurrency_settings'] = ConcurrencySettings(desired_concurrency=1)
204
214
 
215
+ self._navigation_timeout = navigation_timeout or timedelta(minutes=1)
216
+
205
217
  super().__init__(**kwargs)
206
218
 
207
219
  async def _open_page(
@@ -228,10 +240,18 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
228
240
  block_requests=partial(block_requests, page=crawlee_page.page),
229
241
  )
230
242
 
231
- async with browser_page_context(crawlee_page.page):
232
- for hook in self._pre_navigation_hooks:
233
- await hook(pre_navigation_context)
234
- yield pre_navigation_context
243
+ context_id = id(pre_navigation_context)
244
+ self._shared_navigation_timeouts[context_id] = SharedTimeout(self._navigation_timeout)
245
+
246
+ try:
247
+ async with browser_page_context(crawlee_page.page):
248
+ for hook in self._pre_navigation_hooks:
249
+ async with self._shared_navigation_timeouts[context_id]:
250
+ await hook(pre_navigation_context)
251
+
252
+ yield pre_navigation_context
253
+ finally:
254
+ self._shared_navigation_timeouts.pop(context_id, None)
235
255
 
236
256
  def _prepare_request_interceptor(
237
257
  self,
@@ -266,6 +286,7 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
266
286
  Raises:
267
287
  ValueError: If the browser pool is not initialized.
268
288
  SessionError: If the URL cannot be loaded by the browser.
289
+ TimeoutError: If navigation does not succeed within the navigation timeout.
269
290
 
270
291
  Yields:
271
292
  The enhanced crawling context with the Playwright-specific features (page, response, enqueue_links,
@@ -297,7 +318,13 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
297
318
  # Set route_handler only for current request
298
319
  await context.page.route(context.request.url, route_handler)
299
320
 
300
- response = await context.page.goto(context.request.url)
321
+ try:
322
+ async with self._shared_navigation_timeouts[id(context)] as remaining_timeout:
323
+ response = await context.page.goto(
324
+ context.request.url, timeout=remaining_timeout.total_seconds() * 1000
325
+ )
326
+ except playwright.async_api.TimeoutError as exc:
327
+ raise asyncio.TimeoutError from exc
301
328
 
302
329
  if response is None:
303
330
  raise SessionError(f'Failed to load the URL: {context.request.url}')
@@ -369,9 +396,12 @@ class PlaywrightCrawler(BasicCrawler[PlaywrightCrawlingContext, StatisticsState]
369
396
  links_iterator: Iterator[str] = iter(
370
397
  [url for element in elements if (url := await element.get_attribute('href')) is not None]
371
398
  )
372
- links_iterator = to_absolute_url_iterator(
373
- context.request.loaded_url or context.request.url, links_iterator, logger=context.log
374
- )
399
+
400
+ # Get base URL from <base> tag if present
401
+ extracted_base_url = await context.page.evaluate('document.baseURI')
402
+ base_url: str = extracted_base_url or context.request.loaded_url or context.request.url
403
+
404
+ links_iterator = to_absolute_url_iterator(base_url, links_iterator, logger=context.log)
375
405
 
376
406
  if robots_txt_file:
377
407
  skipped, links_iterator = partition(lambda url: robots_txt_file.is_allowed(url), links_iterator)
@@ -59,6 +59,7 @@ class PlaywrightHttpClient(HttpClient):
59
59
  session: Session | None = None,
60
60
  proxy_info: ProxyInfo | None = None,
61
61
  statistics: Statistics | None = None,
62
+ timeout: timedelta | None = None,
62
63
  ) -> HttpCrawlingResult:
63
64
  raise NotImplementedError('The `crawl` method should not be used for `PlaywrightHttpClient`')
64
65
 
@@ -72,6 +73,7 @@ class PlaywrightHttpClient(HttpClient):
72
73
  payload: HttpPayload | None = None,
73
74
  session: Session | None = None,
74
75
  proxy_info: ProxyInfo | None = None,
76
+ timeout: timedelta | None = None,
75
77
  ) -> HttpResponse:
76
78
  # `proxy_info` are not used because `APIRequestContext` inherits the proxy from `BrowserContext`
77
79
  # TODO: Use `session` to restore all the fingerprint headers according to the `BrowserContext`, after resolved
@@ -87,7 +89,11 @@ class PlaywrightHttpClient(HttpClient):
87
89
 
88
90
  # Proxies appropriate to the browser context are used
89
91
  response = await browser_context.request.fetch(
90
- url_or_request=url, method=method.lower(), headers=dict(headers) if headers else None, data=payload
92
+ url_or_request=url,
93
+ method=method.lower(),
94
+ headers=dict(headers) if headers else None,
95
+ data=payload,
96
+ timeout=timeout.total_seconds() if timeout else None,
91
97
  )
92
98
 
93
99
  return await PlaywrightHttpResponse.from_playwright_response(response, protocol='')
@@ -104,6 +104,7 @@ class HttpClient(ABC):
104
104
  session: Session | None = None,
105
105
  proxy_info: ProxyInfo | None = None,
106
106
  statistics: Statistics | None = None,
107
+ timeout: timedelta | None = None,
107
108
  ) -> HttpCrawlingResult:
108
109
  """Perform the crawling for a given request.
109
110
 
@@ -114,6 +115,7 @@ class HttpClient(ABC):
114
115
  session: The session associated with the request.
115
116
  proxy_info: The information about the proxy to be used.
116
117
  statistics: The statistics object to register status codes.
118
+ timeout: Maximum time allowed to process the request.
117
119
 
118
120
  Raises:
119
121
  ProxyError: Raised if a proxy-related error occurs.
@@ -132,6 +134,7 @@ class HttpClient(ABC):
132
134
  payload: HttpPayload | None = None,
133
135
  session: Session | None = None,
134
136
  proxy_info: ProxyInfo | None = None,
137
+ timeout: timedelta | None = None,
135
138
  ) -> HttpResponse:
136
139
  """Send an HTTP request via the client.
137
140
 
@@ -144,6 +147,7 @@ class HttpClient(ABC):
144
147
  payload: The data to be sent as the request body.
145
148
  session: The session associated with the request.
146
149
  proxy_info: The information about the proxy to be used.
150
+ timeout: Maximum time allowed to process the request.
147
151
 
148
152
  Raises:
149
153
  ProxyError: Raised if a proxy-related error occurs.
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  from contextlib import asynccontextmanager
4
5
  from typing import TYPE_CHECKING, Any
5
6
 
@@ -10,6 +11,7 @@ from curl_cffi.requests.cookies import Cookies as CurlCookies
10
11
  from curl_cffi.requests.cookies import CurlMorsel
11
12
  from curl_cffi.requests.exceptions import ProxyError as CurlProxyError
12
13
  from curl_cffi.requests.exceptions import RequestException as CurlRequestError
14
+ from curl_cffi.requests.exceptions import Timeout
13
15
  from curl_cffi.requests.impersonate import DEFAULT_CHROME as CURL_DEFAULT_CHROME
14
16
  from typing_extensions import override
15
17
 
@@ -147,6 +149,7 @@ class CurlImpersonateHttpClient(HttpClient):
147
149
  session: Session | None = None,
148
150
  proxy_info: ProxyInfo | None = None,
149
151
  statistics: Statistics | None = None,
152
+ timeout: timedelta | None = None,
150
153
  ) -> HttpCrawlingResult:
151
154
  client = self._get_client(proxy_info.url if proxy_info else None)
152
155
 
@@ -157,7 +160,10 @@ class CurlImpersonateHttpClient(HttpClient):
157
160
  headers=request.headers,
158
161
  data=request.payload,
159
162
  cookies=session.cookies.jar if session else None,
163
+ timeout=timeout.total_seconds() if timeout else None,
160
164
  )
165
+ except Timeout as exc:
166
+ raise asyncio.TimeoutError from exc
161
167
  except CurlRequestError as exc:
162
168
  if self._is_proxy_error(exc):
163
169
  raise ProxyError from exc
@@ -186,6 +192,7 @@ class CurlImpersonateHttpClient(HttpClient):
186
192
  payload: HttpPayload | None = None,
187
193
  session: Session | None = None,
188
194
  proxy_info: ProxyInfo | None = None,
195
+ timeout: timedelta | None = None,
189
196
  ) -> HttpResponse:
190
197
  if isinstance(headers, dict) or headers is None:
191
198
  headers = HttpHeaders(headers or {})
@@ -200,7 +207,10 @@ class CurlImpersonateHttpClient(HttpClient):
200
207
  headers=dict(headers) if headers else None,
201
208
  data=payload,
202
209
  cookies=session.cookies.jar if session else None,
210
+ timeout=timeout.total_seconds() if timeout else None,
203
211
  )
212
+ except Timeout as exc:
213
+ raise asyncio.TimeoutError from exc
204
214
  except CurlRequestError as exc:
205
215
  if self._is_proxy_error(exc):
206
216
  raise ProxyError from exc
@@ -241,6 +251,8 @@ class CurlImpersonateHttpClient(HttpClient):
241
251
  stream=True,
242
252
  timeout=timeout.total_seconds() if timeout else None,
243
253
  )
254
+ except Timeout as exc:
255
+ raise asyncio.TimeoutError from exc
244
256
  except CurlRequestError as exc:
245
257
  if self._is_proxy_error(exc):
246
258
  raise ProxyError from exc
@@ -1,5 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
+ import asyncio
3
4
  from contextlib import asynccontextmanager
4
5
  from logging import getLogger
5
6
  from typing import TYPE_CHECKING, Any, cast
@@ -146,6 +147,7 @@ class HttpxHttpClient(HttpClient):
146
147
  session: Session | None = None,
147
148
  proxy_info: ProxyInfo | None = None,
148
149
  statistics: Statistics | None = None,
150
+ timeout: timedelta | None = None,
149
151
  ) -> HttpCrawlingResult:
150
152
  client = self._get_client(proxy_info.url if proxy_info else None)
151
153
  headers = self._combine_headers(request.headers)
@@ -157,10 +159,13 @@ class HttpxHttpClient(HttpClient):
157
159
  content=request.payload,
158
160
  cookies=session.cookies.jar if session else None,
159
161
  extensions={'crawlee_session': session if self._persist_cookies_per_session else None},
162
+ timeout=timeout.total_seconds() if timeout is not None else httpx.USE_CLIENT_DEFAULT,
160
163
  )
161
164
 
162
165
  try:
163
166
  response = await client.send(http_request)
167
+ except httpx.TimeoutException as exc:
168
+ raise asyncio.TimeoutError from exc
164
169
  except httpx.TransportError as exc:
165
170
  if self._is_proxy_error(exc):
166
171
  raise ProxyError from exc
@@ -185,6 +190,7 @@ class HttpxHttpClient(HttpClient):
185
190
  payload: HttpPayload | None = None,
186
191
  session: Session | None = None,
187
192
  proxy_info: ProxyInfo | None = None,
193
+ timeout: timedelta | None = None,
188
194
  ) -> HttpResponse:
189
195
  client = self._get_client(proxy_info.url if proxy_info else None)
190
196
 
@@ -195,10 +201,13 @@ class HttpxHttpClient(HttpClient):
195
201
  headers=headers,
196
202
  payload=payload,
197
203
  session=session,
204
+ timeout=httpx.Timeout(timeout.total_seconds()) if timeout is not None else None,
198
205
  )
199
206
 
200
207
  try:
201
208
  response = await client.send(http_request)
209
+ except httpx.TimeoutException as exc:
210
+ raise asyncio.TimeoutError from exc
202
211
  except httpx.TransportError as exc:
203
212
  if self._is_proxy_error(exc):
204
213
  raise ProxyError from exc
@@ -228,10 +237,13 @@ class HttpxHttpClient(HttpClient):
228
237
  headers=headers,
229
238
  payload=payload,
230
239
  session=session,
231
- timeout=timeout,
240
+ timeout=httpx.Timeout(None, connect=timeout.total_seconds()) if timeout else None,
232
241
  )
233
242
 
234
- response = await client.send(http_request, stream=True)
243
+ try:
244
+ response = await client.send(http_request, stream=True)
245
+ except httpx.TimeoutException as exc:
246
+ raise asyncio.TimeoutError from exc
235
247
 
236
248
  try:
237
249
  yield _HttpxResponse(response)
@@ -246,7 +258,7 @@ class HttpxHttpClient(HttpClient):
246
258
  headers: HttpHeaders | dict[str, str] | None,
247
259
  payload: HttpPayload | None,
248
260
  session: Session | None = None,
249
- timeout: timedelta | None = None,
261
+ timeout: httpx.Timeout | None = None,
250
262
  ) -> httpx.Request:
251
263
  """Build an `httpx.Request` using the provided parameters."""
252
264
  if isinstance(headers, dict) or headers is None:
@@ -254,15 +266,13 @@ class HttpxHttpClient(HttpClient):
254
266
 
255
267
  headers = self._combine_headers(headers)
256
268
 
257
- httpx_timeout = httpx.Timeout(None, connect=timeout.total_seconds()) if timeout else None
258
-
259
269
  return client.build_request(
260
270
  url=url,
261
271
  method=method,
262
272
  headers=dict(headers) if headers else None,
263
273
  content=payload,
264
274
  extensions={'crawlee_session': session if self._persist_cookies_per_session else None},
265
- timeout=httpx_timeout,
275
+ timeout=timeout if timeout else httpx.USE_CLIENT_DEFAULT,
266
276
  )
267
277
 
268
278
  def _get_client(self, proxy_url: str | None) -> httpx.AsyncClient:
@@ -6,7 +6,7 @@ from logging import getLogger
6
6
  from typing import TYPE_CHECKING, Any, TypedDict
7
7
 
8
8
  from cachetools import LRUCache
9
- from impit import AsyncClient, Browser, HTTPError, Response, TransportError
9
+ from impit import AsyncClient, Browser, HTTPError, Response, TimeoutException, TransportError
10
10
  from impit import ProxyError as ImpitProxyError
11
11
  from typing_extensions import override
12
12
 
@@ -125,6 +125,7 @@ class ImpitHttpClient(HttpClient):
125
125
  session: Session | None = None,
126
126
  proxy_info: ProxyInfo | None = None,
127
127
  statistics: Statistics | None = None,
128
+ timeout: timedelta | None = None,
128
129
  ) -> HttpCrawlingResult:
129
130
  client = self._get_client(proxy_info.url if proxy_info else None, session.cookies.jar if session else None)
130
131
 
@@ -134,7 +135,10 @@ class ImpitHttpClient(HttpClient):
134
135
  method=request.method,
135
136
  content=request.payload,
136
137
  headers=dict(request.headers) if request.headers else None,
138
+ timeout=timeout.total_seconds() if timeout else None,
137
139
  )
140
+ except TimeoutException as exc:
141
+ raise asyncio.TimeoutError from exc
138
142
  except (TransportError, HTTPError) as exc:
139
143
  if self._is_proxy_error(exc):
140
144
  raise ProxyError from exc
@@ -157,6 +161,7 @@ class ImpitHttpClient(HttpClient):
157
161
  payload: HttpPayload | None = None,
158
162
  session: Session | None = None,
159
163
  proxy_info: ProxyInfo | None = None,
164
+ timeout: timedelta | None = None,
160
165
  ) -> HttpResponse:
161
166
  if isinstance(headers, dict) or headers is None:
162
167
  headers = HttpHeaders(headers or {})
@@ -165,8 +170,14 @@ class ImpitHttpClient(HttpClient):
165
170
 
166
171
  try:
167
172
  response = await client.request(
168
- method=method, url=url, content=payload, headers=dict(headers) if headers else None
173
+ method=method,
174
+ url=url,
175
+ content=payload,
176
+ headers=dict(headers) if headers else None,
177
+ timeout=timeout.total_seconds() if timeout else None,
169
178
  )
179
+ except TimeoutException as exc:
180
+ raise asyncio.TimeoutError from exc
170
181
  except (TransportError, HTTPError) as exc:
171
182
  if self._is_proxy_error(exc):
172
183
  raise ProxyError from exc
@@ -189,14 +200,18 @@ class ImpitHttpClient(HttpClient):
189
200
  ) -> AsyncGenerator[HttpResponse]:
190
201
  client = self._get_client(proxy_info.url if proxy_info else None, session.cookies.jar if session else None)
191
202
 
192
- response = await client.request(
193
- method=method,
194
- url=url,
195
- content=payload,
196
- headers=dict(headers) if headers else None,
197
- timeout=timeout.total_seconds() if timeout else None,
198
- stream=True,
199
- )
203
+ try:
204
+ response = await client.request(
205
+ method=method,
206
+ url=url,
207
+ content=payload,
208
+ headers=dict(headers) if headers else None,
209
+ timeout=timeout.total_seconds() if timeout else None,
210
+ stream=True,
211
+ )
212
+ except TimeoutException as exc:
213
+ raise asyncio.TimeoutError from exc
214
+
200
215
  try:
201
216
  yield _ImpitResponse(response)
202
217
  finally:
@@ -134,7 +134,7 @@ class FileSystemDatasetClient(DatasetClient):
134
134
  continue
135
135
 
136
136
  try:
137
- file = await asyncio.to_thread(path_to_metadata.open)
137
+ file = await asyncio.to_thread(path_to_metadata.open, 'r', encoding='utf-8')
138
138
  try:
139
139
  file_content = json.load(file)
140
140
  metadata = DatasetMetadata(**file_content)
@@ -163,7 +163,7 @@ class FileSystemDatasetClient(DatasetClient):
163
163
 
164
164
  # If the dataset directory exists, reconstruct the client from the metadata file.
165
165
  if path_to_dataset.exists() and path_to_metadata.exists():
166
- file = await asyncio.to_thread(open, path_to_metadata)
166
+ file = await asyncio.to_thread(open, path_to_metadata, 'r', encoding='utf-8')
167
167
  try:
168
168
  file_content = json.load(file)
169
169
  finally:
@@ -133,7 +133,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
133
133
  continue
134
134
 
135
135
  try:
136
- file = await asyncio.to_thread(path_to_metadata.open)
136
+ file = await asyncio.to_thread(path_to_metadata.open, 'r', encoding='utf-8')
137
137
  try:
138
138
  file_content = json.load(file)
139
139
  metadata = KeyValueStoreMetadata(**file_content)
@@ -162,7 +162,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
162
162
 
163
163
  # If the key-value store directory exists, reconstruct the client from the metadata file.
164
164
  if path_to_kvs.exists() and path_to_metadata.exists():
165
- file = await asyncio.to_thread(open, path_to_metadata)
165
+ file = await asyncio.to_thread(open, path_to_metadata, 'r', encoding='utf-8')
166
166
  try:
167
167
  file_content = json.load(file)
168
168
  finally:
@@ -239,7 +239,7 @@ class FileSystemKeyValueStoreClient(KeyValueStoreClient):
239
239
  # Read the metadata file
240
240
  async with self._lock:
241
241
  try:
242
- file = await asyncio.to_thread(open, record_metadata_filepath)
242
+ file = await asyncio.to_thread(open, record_metadata_filepath, 'r', encoding='utf-8')
243
243
  except FileNotFoundError:
244
244
  logger.warning(f'Metadata file disappeared for key "{key}", aborting get_value')
245
245
  return None
@@ -197,7 +197,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
197
197
  continue
198
198
 
199
199
  try:
200
- file = await asyncio.to_thread(path_to_metadata.open)
200
+ file = await asyncio.to_thread(path_to_metadata.open, 'r', encoding='utf-8')
201
201
  try:
202
202
  file_content = json.load(file)
203
203
  metadata = RequestQueueMetadata(**file_content)
@@ -232,7 +232,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
232
232
 
233
233
  # If the RQ directory exists, reconstruct the client from the metadata file.
234
234
  if path_to_rq.exists() and path_to_metadata.exists():
235
- file = await asyncio.to_thread(open, path_to_metadata)
235
+ file = await asyncio.to_thread(open, path_to_metadata, 'r', encoding='utf-8')
236
236
  try:
237
237
  file_content = json.load(file)
238
238
  finally:
@@ -775,7 +775,7 @@ class FileSystemRequestQueueClient(RequestQueueClient):
775
775
  """
776
776
  # Open the request file.
777
777
  try:
778
- file = await asyncio.to_thread(open, file_path)
778
+ file = await asyncio.to_thread(open, file_path, 'r', encoding='utf-8')
779
779
  except FileNotFoundError:
780
780
  logger.warning(f'Request file "{file_path}" not found.')
781
781
  return None
@@ -1,6 +1,5 @@
1
1
  from __future__ import annotations
2
2
 
3
- import sys
4
3
  import warnings
5
4
  from datetime import timedelta
6
5
  from pathlib import Path
@@ -269,14 +268,6 @@ class SqlStorageClient(StorageClient):
269
268
  'Unsupported database. Supported: sqlite, postgresql. Consider using a different database.'
270
269
  )
271
270
 
272
- # TODO: https://github.com/apify/crawlee-python/issues/1555
273
- if 'postgresql' in connection_string and sys.version_info >= (3, 14):
274
- raise ValueError(
275
- 'SqlStorageClient cannot use PostgreSQL with Python 3.14 '
276
- 'due to asyncpg compatibility limitations. '
277
- 'Please use Python 3.13 or earlier, or switch to SQLite.'
278
- )
279
-
280
271
  self._engine = create_async_engine(
281
272
  connection_string,
282
273
  future=True,
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: crawlee
3
- Version: 1.1.1b1
3
+ Version: 1.1.2b4
4
4
  Summary: Crawlee for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://crawlee.dev/python/docs/changelog
@@ -226,6 +226,7 @@ Classifier: Programming Language :: Python :: 3.13
226
226
  Classifier: Programming Language :: Python :: 3.14
227
227
  Classifier: Topic :: Software Development :: Libraries
228
228
  Requires-Python: >=3.10
229
+ Requires-Dist: async-timeout>=5.0.1
229
230
  Requires-Dist: cachetools>=5.5.0
230
231
  Requires-Dist: colorama>=0.4.0
231
232
  Requires-Dist: impit>=0.8.0
@@ -247,7 +248,7 @@ Requires-Dist: scikit-learn>=1.6.0; extra == 'adaptive-crawler'
247
248
  Provides-Extra: all
248
249
  Requires-Dist: aiosqlite>=0.21.0; extra == 'all'
249
250
  Requires-Dist: apify-fingerprint-datapoints>=0.0.2; extra == 'all'
250
- Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'all'
251
+ Requires-Dist: asyncpg>=0.24.0; extra == 'all'
251
252
  Requires-Dist: beautifulsoup4[lxml]>=4.12.0; extra == 'all'
252
253
  Requires-Dist: browserforge>=1.2.3; extra == 'all'
253
254
  Requires-Dist: cookiecutter>=2.6.0; extra == 'all'
@@ -301,7 +302,7 @@ Requires-Dist: playwright>=1.27.0; extra == 'playwright'
301
302
  Provides-Extra: redis
302
303
  Requires-Dist: redis[hiredis]>=7.0.0; extra == 'redis'
303
304
  Provides-Extra: sql-postgres
304
- Requires-Dist: asyncpg>=0.24.0; (python_version < '3.14') and extra == 'sql-postgres'
305
+ Requires-Dist: asyncpg>=0.24.0; extra == 'sql-postgres'
305
306
  Requires-Dist: sqlalchemy[asyncio]<3.0.0,>=2.0.0; extra == 'sql-postgres'
306
307
  Provides-Extra: sql-sqlite
307
308
  Requires-Dist: aiosqlite>=0.21.0; extra == 'sql-sqlite'