apify 2.7.1b19__py3-none-any.whl → 2.7.1b21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

@@ -0,0 +1,399 @@
1
+ from __future__ import annotations
2
+
3
+ from collections import deque
4
+ from datetime import datetime, timezone
5
+ from logging import getLogger
6
+ from typing import TYPE_CHECKING, Final
7
+
8
+ from cachetools import LRUCache
9
+
10
+ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
11
+
12
+ from apify import Request
13
+ from apify.storage_clients._apify._utils import unique_key_to_request_id
14
+
15
+ if TYPE_CHECKING:
16
+ from collections.abc import Sequence
17
+
18
+ from apify_client.clients import RequestQueueClientAsync
19
+
20
+
21
+ logger = getLogger(__name__)
22
+
23
+
24
+ class _ApifyRequestQueueSingleClient:
25
+ """An Apify platform implementation of the request queue client with limited capability.
26
+
27
+ This client is designed to use as little resources as possible, but has to be used in constrained context.
28
+ Constraints:
29
+ - Only one client is consuming the request queue at the time.
30
+ - Multiple producers can put requests to the queue, but their forefront requests are not guaranteed to be handled
31
+ so quickly as this client does not aggressively fetch the forefront and relies on local head estimation.
32
+ - Requests are only added to the queue, never deleted. (Marking as handled is ok.)
33
+ - Other producers can add new requests, but not modify existing ones (otherwise caching can miss the updates)
34
+
35
+ If the constraints are not met, the client might work in an unpredictable way.
36
+ """
37
+
38
+ _MAX_HEAD_ITEMS: Final[int] = 1000
39
+ """The maximum head items read count limited by API."""
40
+
41
+ def __init__(
42
+ self,
43
+ *,
44
+ api_client: RequestQueueClientAsync,
45
+ metadata: RequestQueueMetadata,
46
+ cache_size: int,
47
+ ) -> None:
48
+ """Initialize a new instance.
49
+
50
+ Preferably use the `ApifyRequestQueueClient.open` class method to create a new instance.
51
+ """
52
+ self.metadata = metadata
53
+ """Additional data related to the RequestQueue."""
54
+
55
+ self._api_client = api_client
56
+ """The Apify request queue client for API operations."""
57
+
58
+ self._requests_cache: LRUCache[str, Request] = LRUCache(maxsize=cache_size)
59
+ """A cache to store request objects. Request unique key is used as the cache key."""
60
+
61
+ self._head_requests: deque[str] = deque()
62
+ """Ordered unique keys of requests that represent queue head."""
63
+
64
+ self._requests_already_handled: set[str] = set()
65
+ """Local estimation of requests unique keys that are already present and handled on the platform.
66
+
67
+ - To enhance local deduplication.
68
+ - To reduce the _requests_cache size. Already handled requests are most likely not going to be needed again,
69
+ so no need to cache more than their unique_key.
70
+ """
71
+
72
+ self._requests_in_progress: set[str] = set()
73
+ """Set of requests unique keys that are being processed locally.
74
+
75
+ - To help decide if the RQ is finished or not. This is the only consumer, so it can be tracked locally.
76
+ """
77
+
78
+ self._initialized_caches = False
79
+ """This flag indicates whether the local caches were already initialized.
80
+
81
+ Initialization is done lazily only if deduplication is needed (When calling add_batch_of_requests).
82
+ """
83
+
84
+ async def add_batch_of_requests(
85
+ self,
86
+ requests: Sequence[Request],
87
+ *,
88
+ forefront: bool = False,
89
+ ) -> AddRequestsResponse:
90
+ """Add a batch of requests to the queue.
91
+
92
+ Args:
93
+ requests: The requests to add.
94
+ forefront: Whether to add the requests to the beginning of the queue.
95
+
96
+ Returns:
97
+ Response containing information about the added requests.
98
+ """
99
+ if not self._initialized_caches:
100
+ # One time process to initialize local caches for existing request queues.
101
+ await self._init_caches()
102
+ self._initialized_caches = True
103
+
104
+ new_requests: list[Request] = []
105
+ already_present_requests: list[ProcessedRequest] = []
106
+
107
+ for request in requests:
108
+ # Check if request is known to be already handled (it has to be present as well.)
109
+ if request.unique_key in self._requests_already_handled:
110
+ already_present_requests.append(
111
+ ProcessedRequest.model_validate(
112
+ {
113
+ 'uniqueKey': request.unique_key,
114
+ 'wasAlreadyPresent': True,
115
+ 'wasAlreadyHandled': True,
116
+ }
117
+ )
118
+ )
119
+ # Check if request is known to be already present, but unhandled
120
+ elif self._requests_cache.get(request.unique_key):
121
+ already_present_requests.append(
122
+ ProcessedRequest.model_validate(
123
+ {
124
+ 'uniqueKey': request.unique_key,
125
+ 'wasAlreadyPresent': True,
126
+ 'wasAlreadyHandled': request.was_already_handled,
127
+ }
128
+ )
129
+ )
130
+ else:
131
+ # Push the request to the platform. Probably not there, or we are not aware of it
132
+ new_requests.append(request)
133
+
134
+ # Update local caches
135
+ self._requests_cache[request.unique_key] = request
136
+ if forefront:
137
+ self._head_requests.append(request.unique_key)
138
+ else:
139
+ self._head_requests.appendleft(request.unique_key)
140
+
141
+ if new_requests:
142
+ # Prepare requests for API by converting to dictionaries.
143
+ requests_dict = [
144
+ request.model_dump(
145
+ by_alias=True,
146
+ )
147
+ for request in new_requests
148
+ ]
149
+
150
+ # Send requests to API.
151
+ api_response = AddRequestsResponse.model_validate(
152
+ await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront)
153
+ )
154
+ # Add the locally known already present processed requests based on the local cache.
155
+ api_response.processed_requests.extend(already_present_requests)
156
+ # Remove unprocessed requests from the cache
157
+ for unprocessed_request in api_response.unprocessed_requests:
158
+ self._requests_cache.pop(unprocessed_request.unique_key, None)
159
+
160
+ else:
161
+ api_response = AddRequestsResponse.model_validate(
162
+ {'unprocessedRequests': [], 'processedRequests': already_present_requests}
163
+ )
164
+
165
+ # Update assumed total count for newly added requests.
166
+ new_request_count = 0
167
+ for processed_request in api_response.processed_requests:
168
+ if not processed_request.was_already_present and not processed_request.was_already_handled:
169
+ new_request_count += 1
170
+ self.metadata.total_request_count += new_request_count
171
+ self.metadata.pending_request_count += new_request_count
172
+
173
+ return api_response
174
+
175
+ async def get_request(self, unique_key: str) -> Request | None:
176
+ """Get a request by unique key.
177
+
178
+ Args:
179
+ unique_key: Unique key of the request to get.
180
+
181
+ Returns:
182
+ The request or None if not found.
183
+ """
184
+ if unique_key in self._requests_cache:
185
+ return self._requests_cache[unique_key]
186
+
187
+ response = await self._api_client.get_request(unique_key_to_request_id(unique_key))
188
+
189
+ if response is None:
190
+ return None
191
+
192
+ return Request.model_validate(response)
193
+
194
+ async def fetch_next_request(self) -> Request | None:
195
+ """Return the next request in the queue to be processed.
196
+
197
+ Once you successfully finish processing of the request, you need to call `mark_request_as_handled`
198
+ to mark the request as handled in the queue. If there was some error in processing the request, call
199
+ `reclaim_request` instead, so that the queue will give the request to some other consumer
200
+ in another call to the `fetch_next_request` method.
201
+
202
+ Returns:
203
+ The request or `None` if there are no more pending requests.
204
+ """
205
+ await self._ensure_head_is_non_empty()
206
+
207
+ while self._head_requests:
208
+ request_unique_key = self._head_requests.pop()
209
+ if (
210
+ request_unique_key not in self._requests_in_progress
211
+ and request_unique_key not in self._requests_already_handled
212
+ ):
213
+ self._requests_in_progress.add(request_unique_key)
214
+ return await self.get_request(request_unique_key)
215
+ # No request locally and the ones returned from the platform are already in progress.
216
+ return None
217
+
218
+ async def _ensure_head_is_non_empty(self) -> None:
219
+ """Ensure that the queue head has requests if they are available in the queue."""
220
+ if len(self._head_requests) <= 1:
221
+ await self._list_head()
222
+
223
+ async def _list_head(self) -> None:
224
+ desired_new_head_items = 200
225
+ # The head will contain in progress requests as well, so we need to fetch more, to get some new ones.
226
+ requested_head_items = max(self._MAX_HEAD_ITEMS, desired_new_head_items + len(self._requests_in_progress))
227
+ response = await self._api_client.list_head(limit=requested_head_items)
228
+
229
+ # Update metadata
230
+ # Check if there is another client working with the RequestQueue
231
+ self.metadata.had_multiple_clients = response.get('hadMultipleClients', False)
232
+ # Should warn once? This might be outside expected context if the other consumers consumes at the same time
233
+
234
+ if modified_at := response.get('queueModifiedAt'):
235
+ self.metadata.modified_at = max(self.metadata.modified_at, modified_at)
236
+
237
+ # Update the cached data
238
+ for request_data in response.get('items', []):
239
+ request = Request.model_validate(request_data)
240
+
241
+ if request.unique_key in self._requests_in_progress:
242
+ # Ignore requests that are already in progress, we will not process them again.
243
+ continue
244
+ if request.was_already_handled:
245
+ # Do not cache fully handled requests, we do not need them. Just cache their unique_key.
246
+ self._requests_already_handled.add(request.unique_key)
247
+ else:
248
+ # Only fetch the request if we do not know it yet.
249
+ if request.unique_key not in self._requests_cache:
250
+ request = Request.model_validate(
251
+ await self._api_client.get_request(unique_key_to_request_id(request.unique_key))
252
+ )
253
+ self._requests_cache[request.unique_key] = request
254
+
255
+ # Add new requests to the end of the head, unless already present in head
256
+ if request.unique_key not in self._head_requests:
257
+ self._head_requests.appendleft(request.unique_key)
258
+
259
+ async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
260
+ """Mark a request as handled after successful processing.
261
+
262
+ Handled requests will never again be returned by the `fetch_next_request` method.
263
+
264
+ Args:
265
+ request: The request to mark as handled.
266
+
267
+ Returns:
268
+ Information about the queue operation. `None` if the given request was not in progress.
269
+ """
270
+ # Set the handled_at timestamp if not already set
271
+
272
+ if request.handled_at is None:
273
+ request.handled_at = datetime.now(tz=timezone.utc)
274
+ self.metadata.handled_request_count += 1
275
+ self.metadata.pending_request_count -= 1
276
+
277
+ if cached_request := self._requests_cache.get(request.unique_key):
278
+ cached_request.handled_at = request.handled_at
279
+
280
+ try:
281
+ # Update the request in the API
282
+ # Works as upsert - adds the request if it does not exist yet. (Local request that was handled before
283
+ # adding to the queue.)
284
+ processed_request = await self._update_request(request)
285
+ # Remember that we handled this request, to optimize local deduplication.
286
+ self._requests_already_handled.add(request.unique_key)
287
+ # Remove request from cache. It will most likely not be needed.
288
+ self._requests_cache.pop(request.unique_key)
289
+ self._requests_in_progress.discard(request.unique_key)
290
+
291
+ except Exception as exc:
292
+ logger.debug(f'Error marking request {request.unique_key} as handled: {exc!s}')
293
+ return None
294
+ else:
295
+ return processed_request
296
+
297
+ async def reclaim_request(
298
+ self,
299
+ request: Request,
300
+ *,
301
+ forefront: bool = False,
302
+ ) -> ProcessedRequest | None:
303
+ """Reclaim a failed request back to the queue.
304
+
305
+ The request will be returned for processing later again by another call to `fetch_next_request`.
306
+
307
+ Args:
308
+ request: The request to return to the queue.
309
+ forefront: Whether to add the request to the head or the end of the queue.
310
+
311
+ Returns:
312
+ Information about the queue operation. `None` if the given request was not in progress.
313
+ """
314
+ # Check if the request was marked as handled and clear it. When reclaiming,
315
+ # we want to put the request back for processing.
316
+ if request.was_already_handled:
317
+ request.handled_at = None
318
+
319
+ try:
320
+ # Make sure request is in the local cache. We might need it.
321
+ self._requests_cache[request.unique_key] = request
322
+
323
+ # No longer in progress
324
+ self._requests_in_progress.discard(request.unique_key)
325
+ # No longer handled
326
+ self._requests_already_handled.discard(request.unique_key)
327
+
328
+ if forefront:
329
+ # Append to top of the local head estimation
330
+ self._head_requests.append(request.unique_key)
331
+
332
+ processed_request = await self._update_request(request, forefront=forefront)
333
+ processed_request.unique_key = request.unique_key
334
+ # If the request was previously handled, decrement our handled count since
335
+ # we're putting it back for processing.
336
+ if request.was_already_handled and not processed_request.was_already_handled:
337
+ self.metadata.handled_request_count -= 1
338
+ self.metadata.pending_request_count += 1
339
+
340
+ except Exception as exc:
341
+ logger.debug(f'Error reclaiming request {request.unique_key}: {exc!s}')
342
+ return None
343
+ else:
344
+ return processed_request
345
+
346
+ async def is_empty(self) -> bool:
347
+ """Check if the queue is empty.
348
+
349
+ Returns:
350
+ True if the queue is empty, False otherwise.
351
+ """
352
+ # Without the lock the `is_empty` is prone to falsely report True with some low probability race condition.
353
+ await self._ensure_head_is_non_empty()
354
+ return not self._head_requests and not self._requests_in_progress
355
+
356
+ async def _update_request(
357
+ self,
358
+ request: Request,
359
+ *,
360
+ forefront: bool = False,
361
+ ) -> ProcessedRequest:
362
+ """Update a request in the queue.
363
+
364
+ Args:
365
+ request: The updated request.
366
+ forefront: Whether to put the updated request in the beginning or the end of the queue.
367
+
368
+ Returns:
369
+ The updated request
370
+ """
371
+ request_dict = request.model_dump(by_alias=True)
372
+ request_dict['id'] = unique_key_to_request_id(request.unique_key)
373
+ response = await self._api_client.update_request(
374
+ request=request_dict,
375
+ forefront=forefront,
376
+ )
377
+
378
+ return ProcessedRequest.model_validate(
379
+ {'uniqueKey': request.unique_key} | response,
380
+ )
381
+
382
+ async def _init_caches(self) -> None:
383
+ """Initialize the local caches by getting requests from the existing queue.
384
+
385
+ This is mainly done to improve local deduplication capability. List request can return up to 10k requests, but
386
+ their order is implementation detail and does not respect head order or insertion order.
387
+
388
+ Deduplication on platform is expensive, it takes 1 API call per request and 1 write operation per request.
389
+ Local deduplication is cheaper, it takes 1 API call for whole cache and 1 read operation per request.
390
+ """
391
+ response = await self._api_client.list_requests(limit=10_000)
392
+ for request_data in response.get('items', []):
393
+ request = Request.model_validate(request_data)
394
+ if request.was_already_handled:
395
+ # Cache just unique_key for deduplication
396
+ self._requests_already_handled.add(request.unique_key)
397
+ else:
398
+ # Cache full request
399
+ self._requests_cache[request.unique_key] = request
@@ -1,6 +1,6 @@
1
1
  from __future__ import annotations
2
2
 
3
- from typing import TYPE_CHECKING
3
+ from typing import TYPE_CHECKING, Literal
4
4
 
5
5
  from typing_extensions import override
6
6
 
@@ -23,15 +23,35 @@ if TYPE_CHECKING:
23
23
  class ApifyStorageClient(StorageClient):
24
24
  """Apify storage client."""
25
25
 
26
+ def __init__(self, *, request_queue_access: Literal['single', 'shared'] = 'single') -> None:
27
+ """Initialize the Apify storage client.
28
+
29
+ Args:
30
+ request_queue_access: Controls the implementation of the request queue client based on expected scenario:
31
+ - 'single' is suitable for single consumer scenarios. It makes less API calls, is cheaper and faster.
32
+ - 'shared' is suitable for multiple consumers scenarios at the cost of higher API usage.
33
+ Detailed constraints for the 'single' access type:
34
+ - Only one client is consuming the request queue at the time.
35
+ - Multiple producers can put requests to the queue, but their forefront requests are not guaranteed to
36
+ be handled so quickly as this client does not aggressively fetch the forefront and relies on local
37
+ head estimation.
38
+ - Requests are only added to the queue, never deleted by other clients. (Marking as handled is ok.)
39
+ - Other producers can add new requests, but not modify existing ones.
40
+ (Modifications would not be included in local cache)
41
+ """
42
+ self._request_queue_access = request_queue_access
43
+
26
44
  # This class breaches Liskov Substitution Principle. It requires specialized Configuration compared to its parent.
27
45
  _lsp_violation_error_message_template = (
28
46
  'Expected "configuration" to be an instance of "apify.Configuration", but got {} instead.'
29
47
  )
30
48
 
31
49
  @override
32
- def get_additional_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
50
+ def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
33
51
  if isinstance(configuration, ApifyConfiguration):
34
- return hash_api_base_url_and_token(configuration)
52
+ # It is not supported to open exactly same queue with 'single' and 'shared' client at the same time.
53
+ # Whichever client variation gets used first, wins.
54
+ return super().get_storage_client_cache_key(configuration), hash_api_base_url_and_token(configuration)
35
55
 
36
56
  config_class = type(configuration)
37
57
  raise TypeError(
@@ -79,6 +99,8 @@ class ApifyStorageClient(StorageClient):
79
99
  ) -> ApifyRequestQueueClient:
80
100
  configuration = configuration or ApifyConfiguration.get_global_configuration()
81
101
  if isinstance(configuration, ApifyConfiguration):
82
- return await ApifyRequestQueueClient.open(id=id, name=name, alias=alias, configuration=configuration)
102
+ return await ApifyRequestQueueClient.open(
103
+ id=id, name=name, alias=alias, configuration=configuration, access=self._request_queue_access
104
+ )
83
105
 
84
106
  raise TypeError(self._lsp_violation_error_message_template.format(type(configuration).__name__))
@@ -1,7 +1,10 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import logging
4
+ import re
4
5
  from asyncio import Lock
6
+ from base64 import b64encode
7
+ from hashlib import sha256
5
8
  from logging import getLogger
6
9
  from typing import TYPE_CHECKING, ClassVar
7
10
 
@@ -76,7 +79,7 @@ class AliasResolver:
76
79
  Returns:
77
80
  Map of aliases and storage ids.
78
81
  """
79
- if not cls._alias_map:
82
+ if not cls._alias_map and Configuration.get_global_configuration().is_at_home:
80
83
  default_kvs_client = await _get_default_kvs_client()
81
84
 
82
85
  record = await default_kvs_client.get_record(cls._ALIAS_MAPPING_KEY)
@@ -107,7 +110,7 @@ class AliasResolver:
107
110
  # Update in-memory mapping
108
111
  (await self._get_alias_map())[self._storage_key] = storage_id
109
112
  if not Configuration.get_global_configuration().is_at_home:
110
- logging.getLogger(__name__).warning(
113
+ logging.getLogger(__name__).debug(
111
114
  'AliasResolver storage limited retention is only supported on Apify platform. Storage is not exported.'
112
115
  )
113
116
  return
@@ -156,7 +159,8 @@ async def _get_default_kvs_client() -> KeyValueStoreClientAsync:
156
159
  min_delay_between_retries_millis=500,
157
160
  timeout_secs=360,
158
161
  )
159
-
162
+ if not configuration.default_key_value_store_id:
163
+ raise ValueError("'Configuration.default_key_value_store_id' must be set.")
160
164
  return apify_client_async.key_value_store(key_value_store_id=configuration.default_key_value_store_id)
161
165
 
162
166
 
@@ -165,3 +169,26 @@ def hash_api_base_url_and_token(configuration: Configuration) -> str:
165
169
  if configuration.api_public_base_url is None or configuration.token is None:
166
170
  raise ValueError("'Configuration.api_public_base_url' and 'Configuration.token' must be set.")
167
171
  return compute_short_hash(f'{configuration.api_public_base_url}{configuration.token}'.encode())
172
+
173
+
174
+ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> str:
175
+ """Generate a deterministic request ID based on a unique key.
176
+
177
+ Args:
178
+ unique_key: The unique key to convert into a request ID.
179
+ request_id_length: The length of the request ID.
180
+
181
+ Returns:
182
+ A URL-safe, truncated request ID based on the unique key.
183
+ """
184
+ # Encode the unique key and compute its SHA-256 hash
185
+ hashed_key = sha256(unique_key.encode('utf-8')).digest()
186
+
187
+ # Encode the hash in base64 and decode it to get a string
188
+ base64_encoded = b64encode(hashed_key).decode('utf-8')
189
+
190
+ # Remove characters that are not URL-safe ('+', '/', or '=')
191
+ url_safe_key = re.sub(r'(\+|\/|=)', '', base64_encoded)
192
+
193
+ # Truncate the key to the desired length
194
+ return url_safe_key[:request_id_length]
@@ -0,0 +1 @@
1
+ from ._storage_client import SmartApifyStorageClient
@@ -0,0 +1,117 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from typing_extensions import override
6
+
7
+ from crawlee.storage_clients._base import DatasetClient, KeyValueStoreClient, RequestQueueClient, StorageClient
8
+
9
+ from apify._configuration import Configuration as ApifyConfiguration
10
+ from apify._utils import docs_group
11
+ from apify.storage_clients import ApifyStorageClient
12
+ from apify.storage_clients._file_system import ApifyFileSystemStorageClient
13
+
14
+ if TYPE_CHECKING:
15
+ from collections.abc import Hashable
16
+
17
+ from crawlee.configuration import Configuration as CrawleeConfiguration
18
+
19
+
20
+ @docs_group('Storage clients')
21
+ class SmartApifyStorageClient(StorageClient):
22
+ """SmartApifyStorageClient that delegates to cloud_storage_client or local_storage_client.
23
+
24
+ When running on Apify platform use cloud_storage_client, else use local_storage_client. This storage client is
25
+ designed to work specifically in Actor context.
26
+ """
27
+
28
+ def __init__(
29
+ self,
30
+ *,
31
+ cloud_storage_client: ApifyStorageClient | None = None,
32
+ local_storage_client: StorageClient | None = None,
33
+ ) -> None:
34
+ """Initialize the Apify storage client.
35
+
36
+ Args:
37
+ cloud_storage_client: Client used to communicate with the Apify platform storage. Either through
38
+ `force_cloud` argument when opening storages or automatically when running on the Apify platform.
39
+ local_storage_client: Client used to communicate with the storage when not running on the Apify
40
+ platform and not using `force_cloud` argument when opening storages.
41
+ """
42
+ self._cloud_storage_client = cloud_storage_client or ApifyStorageClient(request_queue_access='single')
43
+ self._local_storage_client = local_storage_client or ApifyFileSystemStorageClient()
44
+
45
+ def __str__(self) -> str:
46
+ return (
47
+ f'{self.__class__.__name__}(cloud_storage_client={self._cloud_storage_client.__class__.__name__},'
48
+ f' local_storage_client={self._local_storage_client.__class__.__name__})'
49
+ )
50
+
51
+ def get_suitable_storage_client(self, *, force_cloud: bool = False) -> StorageClient:
52
+ """Get a suitable storage client based on the global configuration and the value of the force_cloud flag.
53
+
54
+ Args:
55
+ force_cloud: If True, return `cloud_storage_client`.
56
+ """
57
+ if ApifyConfiguration.get_global_configuration().is_at_home:
58
+ return self._cloud_storage_client
59
+
60
+ configuration = ApifyConfiguration.get_global_configuration()
61
+ if force_cloud:
62
+ if configuration.token is None:
63
+ raise RuntimeError(
64
+ 'In order to use the Apify cloud storage from your computer, '
65
+ 'you need to provide an Apify token using the APIFY_TOKEN environment variable.'
66
+ )
67
+ return self._cloud_storage_client
68
+
69
+ return self._local_storage_client
70
+
71
+ @override
72
+ def get_storage_client_cache_key(self, configuration: CrawleeConfiguration) -> Hashable:
73
+ if ApifyConfiguration.get_global_configuration().is_at_home:
74
+ if isinstance(configuration, ApifyConfiguration):
75
+ return self._cloud_storage_client.get_storage_client_cache_key(configuration)
76
+ raise TypeError('Expecting ApifyConfiguration')
77
+
78
+ return self._local_storage_client.get_storage_client_cache_key(configuration)
79
+
80
+ @override
81
+ async def create_dataset_client(
82
+ self,
83
+ *,
84
+ id: str | None = None,
85
+ name: str | None = None,
86
+ alias: str | None = None,
87
+ configuration: CrawleeConfiguration | None = None,
88
+ ) -> DatasetClient:
89
+ return await self.get_suitable_storage_client().create_dataset_client(
90
+ id=id, name=id, alias=alias, configuration=configuration
91
+ )
92
+
93
+ @override
94
+ async def create_kvs_client(
95
+ self,
96
+ *,
97
+ id: str | None = None,
98
+ name: str | None = None,
99
+ alias: str | None = None,
100
+ configuration: CrawleeConfiguration | None = None,
101
+ ) -> KeyValueStoreClient:
102
+ return await self.get_suitable_storage_client().create_kvs_client(
103
+ id=id, name=id, alias=alias, configuration=configuration
104
+ )
105
+
106
+ @override
107
+ async def create_rq_client(
108
+ self,
109
+ *,
110
+ id: str | None = None,
111
+ name: str | None = None,
112
+ alias: str | None = None,
113
+ configuration: CrawleeConfiguration | None = None,
114
+ ) -> RequestQueueClient:
115
+ return await self.get_suitable_storage_client().create_rq_client(
116
+ id=id, name=id, alias=alias, configuration=configuration
117
+ )
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 2.7.1b19
3
+ Version: 2.7.1b21
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -228,7 +228,7 @@ Requires-Python: >=3.10
228
228
  Requires-Dist: apify-client<3.0.0,>=2.0.0
229
229
  Requires-Dist: apify-shared<3.0.0,>=2.0.0
230
230
  Requires-Dist: cachetools>=5.5.0
231
- Requires-Dist: crawlee==0.6.13b42
231
+ Requires-Dist: crawlee==0.6.13b46
232
232
  Requires-Dist: cryptography>=42.0.0
233
233
  Requires-Dist: impit>=0.6.1
234
234
  Requires-Dist: lazy-object-proxy>=1.11.0