PyPI - apify - Versions diffs - 3.0.2b7__py3-none-any.whl → 3.0.3__py3-none-any.whl - Mend

apify 3.0.2b7py3-none-any.whl → 3.0.3py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of apify might be problematic. Click here for more details.

Files changed (12) hide show

apify/request_loaders/_apify_request_list.py CHANGED Viewed

@@ -22,8 +22,8 @@ URL_NO_COMMAS_REGEX = re.compile(
 class _RequestDetails(BaseModel):
     method: HttpMethod = 'GET'
     payload: str = ''
-    headers: Annotated[dict[str, str], Field(default_factory=dict)] = {}
-    user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')] = {}
+    headers: Annotated[dict[str, str], Field(default_factory=dict)]
+    user_data: Annotated[dict[str, str], Field(default_factory=dict, alias='userData')]
 class _RequestsFromUrlInput(_RequestDetails):

apify/storage_clients/_apify/_dataset_client.py CHANGED Viewed

@@ -1,19 +1,19 @@
 from __future__ import annotations
 import asyncio
+import warnings
 from logging import getLogger
 from typing import TYPE_CHECKING, Any
 from typing_extensions import override
-from apify_client import ApifyClientAsync
 from crawlee._utils.byte_size import ByteSize
 from crawlee._utils.file import json_dumps
 from crawlee.storage_clients._base import DatasetClient
 from crawlee.storage_clients.models import DatasetItemsListPage, DatasetMetadata
 from crawlee.storages import Dataset
-from ._utils import AliasResolver
+from ._utils import AliasResolver, create_apify_client
 if TYPE_CHECKING:
     from collections.abc import AsyncIterator
@@ -52,12 +52,17 @@ class ApifyDatasetClient(DatasetClient):
         self._api_client = api_client
         """The Apify dataset client for API operations."""
-        self._api_public_base_url = api_public_base_url
-        """The public base URL for accessing the key-value store records."""
         self._lock = lock
         """A lock to ensure that only one operation is performed at a time."""
+        if api_public_base_url:
+            # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
+            warnings.warn(
+                'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
+                DeprecationWarning,
+                stacklevel=2,
+            )
     @override
     async def get_metadata(self) -> DatasetMetadata:
         metadata = await self._api_client.get()
@@ -99,29 +104,7 @@ class ApifyDatasetClient(DatasetClient):
         if sum(1 for param in [id, name, alias] if param is not None) > 1:
             raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
-        token = configuration.token
-        if not token:
-            raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
-        api_url = configuration.api_base_url
-        if not api_url:
-            raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
-        api_public_base_url = configuration.api_public_base_url
-        if not api_public_base_url:
-            raise ValueError(
-                'Apify storage client requires a valid API public base URL in Configuration '
-                f'(api_public_base_url={api_public_base_url}).'
-            )
-        # Create Apify client with the provided token and API URL.
-        apify_client_async = ApifyClientAsync(
-            token=token,
-            api_url=api_url,
-            max_retries=8,
-            min_delay_between_retries_millis=500,
-            timeout_secs=360,
-        )
+        apify_client_async = create_apify_client(configuration)
         apify_datasets_client = apify_client_async.datasets()
         # Normalize unnamed default storage in cases where not defined in `configuration.default_dataset_id` to unnamed
@@ -178,7 +161,7 @@ class ApifyDatasetClient(DatasetClient):
         return cls(
             api_client=apify_dataset_client,
-            api_public_base_url=api_public_base_url,
+            api_public_base_url='',  # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
             lock=asyncio.Lock(),
         )

apify/storage_clients/_apify/_key_value_store_client.py CHANGED Viewed

@@ -1,20 +1,18 @@
 from __future__ import annotations
 import asyncio
+import warnings
 from logging import getLogger
 from typing import TYPE_CHECKING, Any
 from typing_extensions import override
-from yarl import URL
-from apify_client import ApifyClientAsync
 from crawlee.storage_clients._base import KeyValueStoreClient
 from crawlee.storage_clients.models import KeyValueStoreRecord, KeyValueStoreRecordMetadata
 from crawlee.storages import KeyValueStore
 from ._models import ApifyKeyValueStoreMetadata, KeyValueStoreListKeysPage
-from ._utils import AliasResolver
-from apify._crypto import create_hmac_signature
+from ._utils import AliasResolver, create_apify_client
 if TYPE_CHECKING:
     from collections.abc import AsyncIterator
@@ -43,12 +41,17 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
         self._api_client = api_client
         """The Apify KVS client for API operations."""
-        self._api_public_base_url = api_public_base_url
-        """The public base URL for accessing the key-value store records."""
         self._lock = lock
         """A lock to ensure that only one operation is performed at a time."""
+        if api_public_base_url:
+            # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
+            warnings.warn(
+                'api_public_base_url argument is deprecated and will be removed in version 4.0.0',
+                DeprecationWarning,
+                stacklevel=2,
+            )
     @override
     async def get_metadata(self) -> ApifyKeyValueStoreMetadata:
         metadata = await self._api_client.get()
@@ -90,29 +93,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
         if sum(1 for param in [id, name, alias] if param is not None) > 1:
             raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
-        token = configuration.token
-        if not token:
-            raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
-        api_url = configuration.api_base_url
-        if not api_url:
-            raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
-        api_public_base_url = configuration.api_public_base_url
-        if not api_public_base_url:
-            raise ValueError(
-                'Apify storage client requires a valid API public base URL in Configuration '
-                f'(api_public_base_url={api_public_base_url}).'
-            )
-        # Create Apify client with the provided token and API URL.
-        apify_client_async = ApifyClientAsync(
-            token=token,
-            api_url=api_url,
-            max_retries=8,
-            min_delay_between_retries_millis=500,
-            timeout_secs=360,
-        )
+        apify_client_async = create_apify_client(configuration)
         apify_kvss_client = apify_client_async.key_value_stores()
         # Normalize unnamed default storage in cases where not defined in `configuration.default_key_value_store_id` to
@@ -170,7 +151,7 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
         return cls(
             api_client=apify_kvs_client,
-            api_public_base_url=api_public_base_url,
+            api_public_base_url='',  # Remove in version 4.0, https://github.com/apify/apify-sdk-python/issues/635
             lock=asyncio.Lock(),
         )
@@ -251,15 +232,4 @@ class ApifyKeyValueStoreClient(KeyValueStoreClient):
         Returns:
             A public URL that can be used to access the value of the given key in the KVS.
         """
-        if self._api_client.resource_id is None:
-            raise ValueError('resource_id cannot be None when generating a public URL')
-        public_url = (
-            URL(self._api_public_base_url) / 'v2' / 'key-value-stores' / self._api_client.resource_id / 'records' / key
-        )
-        metadata = await self.get_metadata()
-        if metadata.url_signing_secret_key is not None:
-            public_url = public_url.with_query(signature=create_hmac_signature(metadata.url_signing_secret_key, key))
-        return str(public_url)
+        return await self._api_client.get_record_public_url(key=key)

apify/storage_clients/_apify/_models.py CHANGED Viewed

@@ -94,8 +94,8 @@ class CachedRequest(BaseModel):
     Only internal structure.
     """
-    unique_key: str
-    """Unique key of the request."""
+    id: str
+    """Id of the request."""
     was_already_handled: bool
     """Whether the request was already handled."""

apify/storage_clients/_apify/_request_queue_client.py CHANGED Viewed

@@ -5,7 +5,6 @@ from typing import TYPE_CHECKING, Final, Literal
 from typing_extensions import override
-from apify_client import ApifyClientAsync
 from crawlee._utils.crypto import crypto_random_object_id
 from crawlee.storage_clients._base import RequestQueueClient
 from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
@@ -14,7 +13,7 @@ from crawlee.storages import RequestQueue
 from ._models import ApifyRequestQueueMetadata, RequestQueueStats
 from ._request_queue_shared_client import ApifyRequestQueueSharedClient
 from ._request_queue_single_client import ApifyRequestQueueSingleClient
-from ._utils import AliasResolver
+from ._utils import AliasResolver, create_apify_client
 if TYPE_CHECKING:
     from collections.abc import Sequence
@@ -228,29 +227,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
         if sum(1 for param in [id, name, alias] if param is not None) > 1:
             raise ValueError('Only one of "id", "name", or "alias" can be specified, not multiple.')
-        token = configuration.token
-        if not token:
-            raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
-        api_url = configuration.api_base_url
-        if not api_url:
-            raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
-        api_public_base_url = configuration.api_public_base_url
-        if not api_public_base_url:
-            raise ValueError(
-                'Apify storage client requires a valid API public base URL in Configuration '
-                f'(api_public_base_url={api_public_base_url}).'
-            )
-        # Create Apify client with the provided token and API URL.
-        apify_client_async = ApifyClientAsync(
-            token=token,
-            api_url=api_url,
-            max_retries=8,
-            min_delay_between_retries_millis=500,
-            timeout_secs=360,
-        )
+        apify_client_async = create_apify_client(configuration)
         apify_rqs_client = apify_client_async.request_queues()
         # Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to

apify/storage_clients/_apify/_request_queue_shared_client.py CHANGED Viewed

@@ -54,10 +54,10 @@ class ApifyRequestQueueSharedClient:
         """The Apify request queue client for API operations."""
         self._queue_head = deque[str]()
-        """A deque to store request unique keys in the queue head."""
+        """A deque to store request ids in the queue head."""
         self._requests_cache: LRUCache[str, CachedRequest] = LRUCache(maxsize=cache_size)
-        """A cache to store request objects. Request unique key is used as the cache key."""
+        """A cache to store request objects. Request id is used as the cache key."""
         self._queue_has_locked_requests: bool | None = None
         """Whether the queue has requests locked by another client."""
@@ -101,12 +101,14 @@ class ApifyRequestQueueSharedClient:
         already_present_requests: list[ProcessedRequest] = []
         for request in requests:
-            if self._requests_cache.get(request.unique_key):
+            request_id = unique_key_to_request_id(request.unique_key)
+            if self._requests_cache.get(request_id):
                 # We are not sure if it was already handled at this point, and it is not worth calling API for it.
                 # It could have been handled by another client in the meantime, so cached information about
                 # `request.was_already_handled` is not reliable.
                 already_present_requests.append(
                     ProcessedRequest(
+                        id=request_id,
                         unique_key=request.unique_key,
                         was_already_present=True,
                         was_already_handled=request.was_already_handled,
@@ -116,12 +118,13 @@ class ApifyRequestQueueSharedClient:
             else:
                 # Add new request to the cache.
                 processed_request = ProcessedRequest(
+                    id=request_id,
                     unique_key=request.unique_key,
                     was_already_present=True,
                     was_already_handled=request.was_already_handled,
                 )
                 self._cache_request(
-                    request.unique_key,
+                    request_id,
                     processed_request,
                 )
                 new_requests.append(request)
@@ -131,7 +134,6 @@ class ApifyRequestQueueSharedClient:
             requests_dict = [
                 request.model_dump(
                     by_alias=True,
-                    exclude={'id'},  # Exclude ID fields from requests since the API doesn't accept them.
                 )
                 for request in new_requests
             ]
@@ -146,7 +148,8 @@ class ApifyRequestQueueSharedClient:
             # Remove unprocessed requests from the cache
             for unprocessed_request in api_response.unprocessed_requests:
-                self._requests_cache.pop(unprocessed_request.unique_key, None)
+                unprocessed_request_id = unique_key_to_request_id(unprocessed_request.unique_key)
+                self._requests_cache.pop(unprocessed_request_id, None)
         else:
             api_response = AddRequestsResponse.model_validate(
@@ -179,7 +182,10 @@ class ApifyRequestQueueSharedClient:
         Returns:
             The request or None if not found.
         """
-        response = await self._api_client.get_request(unique_key_to_request_id(unique_key))
+        return await self._get_request_by_id(unique_key_to_request_id(unique_key))
+    async def _get_request_by_id(self, request_id: str) -> Request | None:
+        response = await self._api_client.get_request(request_id)
         if response is None:
             return None
@@ -206,15 +212,15 @@ class ApifyRequestQueueSharedClient:
                 return None
             # Get the next request ID from the queue head
-            next_unique_key = self._queue_head.popleft()
+            next_request_id = self._queue_head.popleft()
-        request = await self._get_or_hydrate_request(next_unique_key)
+        request = await self._get_or_hydrate_request(next_request_id)
         # Handle potential inconsistency where request might not be in the main table yet
         if request is None:
             logger.debug(
                 'Cannot find a request from the beginning of queue, will be retried later',
-                extra={'nextRequestUniqueKey': next_unique_key},
+                extra={'next_request_id': next_request_id},
             )
             return None
@@ -222,16 +228,16 @@ class ApifyRequestQueueSharedClient:
         if request.handled_at is not None:
             logger.debug(
                 'Request fetched from the beginning of queue was already handled',
-                extra={'nextRequestUniqueKey': next_unique_key},
+                extra={'next_request_id': next_request_id},
             )
             return None
         # Use get request to ensure we have the full request object.
-        request = await self.get_request(request.unique_key)
+        request = await self._get_request_by_id(next_request_id)
         if request is None:
             logger.debug(
                 'Request fetched from the beginning of queue was not found in the RQ',
-                extra={'nextRequestUniqueKey': next_unique_key},
+                extra={'next_request_id': next_request_id},
             )
             return None
@@ -248,15 +254,17 @@ class ApifyRequestQueueSharedClient:
         Returns:
             Information about the queue operation. `None` if the given request was not in progress.
         """
+        request_id = unique_key_to_request_id(request.unique_key)
         # Set the handled_at timestamp if not already set
         if request.handled_at is None:
             request.handled_at = datetime.now(tz=timezone.utc)
-        if cached_request := self._requests_cache[request.unique_key]:
+        if cached_request := self._requests_cache[request_id]:
             cached_request.was_already_handled = request.was_already_handled
         try:
             # Update the request in the API
             processed_request = await self._update_request(request)
+            processed_request.id = request_id
             processed_request.unique_key = request.unique_key
             # Update assumed handled count if this wasn't already handled
@@ -265,10 +273,9 @@ class ApifyRequestQueueSharedClient:
                 self.metadata.pending_request_count -= 1
             # Update the cache with the handled request
-            cache_key = request.unique_key
             self._cache_request(
-                cache_key,
-                processed_request,
+                cache_key=request_id,
+                processed_request=processed_request,
                 hydrated_request=request,
             )
         except Exception as exc:
@@ -352,17 +359,17 @@ class ApifyRequestQueueSharedClient:
         # Fetch requests from the API and populate the queue head
         await self._list_head()
-    async def _get_or_hydrate_request(self, unique_key: str) -> Request | None:
-        """Get a request by unique key, either from cache or by fetching from API.
+    async def _get_or_hydrate_request(self, request_id: str) -> Request | None:
+        """Get a request by id, either from cache or by fetching from API.
         Args:
-            unique_key: Unique key of the request to get.
+            request_id: Id of the request to get.
         Returns:
             The request if found and valid, otherwise None.
         """
         # First check if the request is in our cache
-        cached_entry = self._requests_cache.get(unique_key)
+        cached_entry = self._requests_cache.get(request_id)
         if cached_entry and cached_entry.hydrated:
             # If we have the request hydrated in cache, return it
@@ -371,17 +378,17 @@ class ApifyRequestQueueSharedClient:
         # If not in cache or not hydrated, fetch the request
         try:
             # Fetch the request data
-            request = await self.get_request(unique_key)
+            request = await self._get_request_by_id(request_id)
             # If request is not found and return None
             if not request:
                 return None
             # Update cache with hydrated request
-            cache_key = request.unique_key
             self._cache_request(
-                cache_key,
-                ProcessedRequest(
+                cache_key=request_id,
+                processed_request=ProcessedRequest(
+                    id=request_id,
                     unique_key=request.unique_key,
                     was_already_present=True,
                     was_already_handled=request.handled_at is not None,
@@ -389,7 +396,7 @@ class ApifyRequestQueueSharedClient:
                 hydrated_request=request,
             )
         except Exception as exc:
-            logger.debug(f'Error fetching request {unique_key}: {exc!s}')
+            logger.debug(f'Error fetching request {request_id}: {exc!s}')
             return None
         else:
             return request
@@ -438,8 +445,8 @@ class ApifyRequestQueueSharedClient:
             logger.debug(f'Using cached queue head with {len(self._queue_head)} requests')
             # Create a list of requests from the cached queue head
             items = []
-            for unique_key in list(self._queue_head)[:limit]:
-                cached_request = self._requests_cache.get(unique_key)
+            for request_id in list(self._queue_head)[:limit]:
+                cached_request = self._requests_cache.get(request_id)
                 if cached_request and cached_request.hydrated:
                     items.append(cached_request.hydrated)
@@ -472,32 +479,35 @@ class ApifyRequestQueueSharedClient:
         for request_data in response.get('items', []):
             request = Request.model_validate(request_data)
+            request_id = request_data.get('id')
             # Skip requests without ID or unique key
-            if not request.unique_key:
+            if not request.unique_key or not request_id:
                 logger.debug(
-                    'Skipping request from queue head, missing unique key',
+                    'Skipping request from queue head, missing unique key or id',
                     extra={
                         'unique_key': request.unique_key,
+                        'id': request_id,
                     },
                 )
                 continue
             # Cache the request
             self._cache_request(
-                request.unique_key,
+                request_id,
                 ProcessedRequest(
+                    id=request_id,
                     unique_key=request.unique_key,
                     was_already_present=True,
                     was_already_handled=False,
                 ),
                 hydrated_request=request,
             )
-            self._queue_head.append(request.unique_key)
+            self._queue_head.append(request_id)
-        for leftover_unique_key in leftover_buffer:
+        for leftover_id in leftover_buffer:
             # After adding new requests to the forefront, any existing leftover locked request is kept in the end.
-            self._queue_head.append(leftover_unique_key)
+            self._queue_head.append(leftover_id)
         return RequestQueueHead.model_validate(response)
     def _cache_request(
@@ -516,7 +526,7 @@ class ApifyRequestQueueSharedClient:
             hydrated_request: The hydrated request object, if available.
         """
         self._requests_cache[cache_key] = CachedRequest(
-            unique_key=processed_request.unique_key,
+            id=processed_request.id,
             was_already_handled=processed_request.was_already_handled,
             hydrated=hydrated_request,
             lock_expires_at=None,

apify/storage_clients/_apify/_request_queue_single_client.py CHANGED Viewed

@@ -56,21 +56,21 @@ class ApifyRequestQueueSingleClient:
         """The Apify request queue client for API operations."""
         self._requests_cache: LRUCache[str, Request] = LRUCache(maxsize=cache_size)
-        """A cache to store request objects. Request unique key is used as the cache key."""
+        """A cache to store request objects. Request id is used as the cache key."""
         self._head_requests: deque[str] = deque()
-        """Ordered unique keys of requests that represent queue head."""
+        """Ordered ids of requests that represent queue head."""
         self._requests_already_handled: set[str] = set()
         """Local estimation of requests unique keys that are already present and handled on the platform.
         - To enhance local deduplication.
         - To reduce the _requests_cache size. Already handled requests are most likely not going to be needed again,
-        so no need to cache more than their unique_key.
+        so no need to cache more than their id.
         """
         self._requests_in_progress: set[str] = set()
-        """Set of requests unique keys that are being processed locally.
+        """Set of requests ids that are being processed locally.
         - To help decide if the RQ is finished or not. This is the only consumer, so it can be tracked locally.
         """
@@ -105,19 +105,24 @@ class ApifyRequestQueueSingleClient:
         already_present_requests: list[ProcessedRequest] = []
         for request in requests:
+            # Calculate id for request
+            request_id = unique_key_to_request_id(request.unique_key)
             # Check if request is known to be already handled (it has to be present as well.)
-            if request.unique_key in self._requests_already_handled:
+            if request_id in self._requests_already_handled:
                 already_present_requests.append(
                     ProcessedRequest(
+                        id=request_id,
                         unique_key=request.unique_key,
                         was_already_present=True,
                         was_already_handled=True,
                     )
                 )
             # Check if request is known to be already present, but unhandled
-            elif self._requests_cache.get(request.unique_key):
+            elif self._requests_cache.get(request_id):
                 already_present_requests.append(
                     ProcessedRequest(
+                        id=request_id,
                         unique_key=request.unique_key,
                         was_already_present=True,
                         was_already_handled=request.was_already_handled,
@@ -128,11 +133,11 @@ class ApifyRequestQueueSingleClient:
                 new_requests.append(request)
                 # Update local caches
-                self._requests_cache[request.unique_key] = request
+                self._requests_cache[request_id] = request
                 if forefront:
-                    self._head_requests.append(request.unique_key)
+                    self._head_requests.append(request_id)
                 else:
-                    self._head_requests.appendleft(request.unique_key)
+                    self._head_requests.appendleft(request_id)
         if new_requests:
             # Prepare requests for API by converting to dictionaries.
@@ -151,7 +156,7 @@ class ApifyRequestQueueSingleClient:
             api_response.processed_requests.extend(already_present_requests)
             # Remove unprocessed requests from the cache
             for unprocessed_request in api_response.unprocessed_requests:
-                self._requests_cache.pop(unprocessed_request.unique_key, None)
+                self._requests_cache.pop(unique_key_to_request_id(unprocessed_request.unique_key), None)
         else:
             api_response = AddRequestsResponse(
@@ -178,15 +183,39 @@ class ApifyRequestQueueSingleClient:
         Returns:
             The request or None if not found.
         """
-        if unique_key in self._requests_cache:
-            return self._requests_cache[unique_key]
+        return await self._get_request(id=unique_key_to_request_id(unique_key))
+    async def _get_request(self, id: str) -> Request | None:
+        """Get a request by id.
+        Args:
+            id: Id of request to get.
+        Returns:
+            The request or None if not found.
+        """
+        if id in self._requests_cache:
+            return self._requests_cache[id]
-        response = await self._api_client.get_request(unique_key_to_request_id(unique_key))
+        # Requests that were not added by this client are not in local cache. Fetch them from platform.
+        response = await self._api_client.get_request(id)
         if response is None:
             return None
-        return Request.model_validate(response)
+        request = Request.model_validate(response)
+        # Updated local caches
+        if id in self._requests_in_progress:
+            # No caching of requests that are already in progress, client is already aware of them.
+            pass
+        elif request.was_already_handled:
+            # Cache only id for already handled requests
+            self._requests_already_handled.add(id)
+        else:
+            # Cache full request for unhandled requests that are not yet in progress and are not yet handled.
+            self._requests_cache[id] = request
+        return request
     async def fetch_next_request(self) -> Request | None:
         """Return the next request in the queue to be processed.
@@ -202,13 +231,10 @@ class ApifyRequestQueueSingleClient:
         await self._ensure_head_is_non_empty()
         while self._head_requests:
-            request_unique_key = self._head_requests.pop()
-            if (
-                request_unique_key not in self._requests_in_progress
-                and request_unique_key not in self._requests_already_handled
-            ):
-                self._requests_in_progress.add(request_unique_key)
-                return await self.get_request(request_unique_key)
+            request_id = self._head_requests.pop()
+            if request_id not in self._requests_in_progress and request_id not in self._requests_already_handled:
+                self._requests_in_progress.add(request_id)
+                return await self._get_request(request_id)
         # No request locally and the ones returned from the platform are already in progress.
         return None
@@ -233,44 +259,19 @@ class ApifyRequestQueueSingleClient:
         # Update the cached data
         for request_data in response.get('items', []):
-            # Due to https://github.com/apify/apify-core/blob/v0.1377.0/src/api/src/lib/request_queues/request_queue.ts#L53,
-            # the list_head endpoint may return truncated fields for long requests (e.g., long URLs or unique keys).
-            # If truncation is detected, fetch the full request data by its ID from the API.
-            # This is a temporary workaround - the caching will be refactored to use request IDs instead of unique keys.
-            # See https://github.com/apify/apify-sdk-python/issues/630 for details.
-            if '[truncated]' in request_data['uniqueKey'] or '[truncated]' in request_data['url']:
-                request_data = await self._api_client.get_request(request_id=request_data['id'])  # noqa: PLW2901
             request = Request.model_validate(request_data)
+            request_id = request_data['id']
-            if request.unique_key in self._requests_in_progress:
+            if request_id in self._requests_in_progress:
                 # Ignore requests that are already in progress, we will not process them again.
                 continue
             if request.was_already_handled:
-                # Do not cache fully handled requests, we do not need them. Just cache their unique_key.
-                self._requests_already_handled.add(request.unique_key)
-            else:
-                # Only fetch the request if we do not know it yet.
-                if request.unique_key not in self._requests_cache:
-                    request_id = unique_key_to_request_id(request.unique_key)
-                    if request_data is not None and request_id != request_data['id']:
-                        logger.warning(
-                            f'Request ID mismatch: {request_id} != {request_data["id"]}, '
-                            'this may cause unexpected behavior.'
-                        )
-                    # See https://github.com/apify/apify-sdk-python/issues/630 for details.
-                    if '[truncated]' not in request.unique_key:
-                        request_data = await self._api_client.get_request(request_id=request_id)  # noqa: PLW2901
-                        request = Request.model_validate(request_data)
-                    self._requests_cache[request.unique_key] = request
-                # Add new requests to the end of the head, unless already present in head
-                if request.unique_key not in self._head_requests:
-                    self._head_requests.appendleft(request.unique_key)
+                # Do not cache fully handled requests, we do not need them. Just cache their id.
+                self._requests_already_handled.add(request_id)
+            # Add new requests to the end of the head, unless already present in head
+            elif request_id not in self._head_requests:
+                self._head_requests.appendleft(request_id)
     async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
         """Mark a request as handled after successful processing.
@@ -285,12 +286,14 @@ class ApifyRequestQueueSingleClient:
         """
         # Set the handled_at timestamp if not already set
+        request_id = unique_key_to_request_id(request.unique_key)
         if request.handled_at is None:
             request.handled_at = datetime.now(tz=timezone.utc)
             self.metadata.handled_request_count += 1
             self.metadata.pending_request_count -= 1
-        if cached_request := self._requests_cache.get(request.unique_key):
+        if cached_request := self._requests_cache.get(request_id):
             cached_request.handled_at = request.handled_at
         try:
@@ -299,10 +302,10 @@ class ApifyRequestQueueSingleClient:
             # adding to the queue.)
             processed_request = await self._update_request(request)
             # Remember that we handled this request, to optimize local deduplication.
-            self._requests_already_handled.add(request.unique_key)
+            self._requests_already_handled.add(request_id)
             # Remove request from cache. It will most likely not be needed.
-            self._requests_cache.pop(request.unique_key)
-            self._requests_in_progress.discard(request.unique_key)
+            self._requests_cache.pop(request_id)
+            self._requests_in_progress.discard(request_id)
         except Exception as exc:
             logger.debug(f'Error marking request {request.unique_key} as handled: {exc!s}')
@@ -329,23 +332,27 @@ class ApifyRequestQueueSingleClient:
         """
         # Check if the request was marked as handled and clear it. When reclaiming,
         # we want to put the request back for processing.
+        request_id = unique_key_to_request_id(request.unique_key)
         if request.was_already_handled:
             request.handled_at = None
         try:
             # Make sure request is in the local cache. We might need it.
-            self._requests_cache[request.unique_key] = request
+            self._requests_cache[request_id] = request
             # No longer in progress
-            self._requests_in_progress.discard(request.unique_key)
+            self._requests_in_progress.discard(request_id)
             # No longer handled
-            self._requests_already_handled.discard(request.unique_key)
+            self._requests_already_handled.discard(request_id)
             if forefront:
                 # Append to top of the local head estimation
-                self._head_requests.append(request.unique_key)
+                self._head_requests.append(request_id)
             processed_request = await self._update_request(request, forefront=forefront)
+            processed_request.id = request_id
             processed_request.unique_key = request.unique_key
             # If the request was previously handled, decrement our handled count since
             # we're putting it back for processing.
@@ -407,9 +414,11 @@ class ApifyRequestQueueSingleClient:
         response = await self._api_client.list_requests(limit=10_000)
         for request_data in response.get('items', []):
             request = Request.model_validate(request_data)
+            request_id = request_data['id']
             if request.was_already_handled:
-                # Cache just unique_key for deduplication
-                self._requests_already_handled.add(request.unique_key)
+                # Cache just id for deduplication
+                self._requests_already_handled.add(request_id)
             else:
                 # Cache full request
-                self._requests_cache[request.unique_key] = request
+                self._requests_cache[request_id] = request

apify/storage_clients/_apify/_utils.py CHANGED Viewed

@@ -192,3 +192,30 @@ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) ->
     # Truncate the key to the desired length
     return url_safe_key[:request_id_length]
+def create_apify_client(configuration: Configuration) -> ApifyClientAsync:
+    """Create and return an ApifyClientAsync instance using the provided configuration."""
+    if not configuration.token:
+        raise ValueError(f'Apify storage client requires a valid token in Configuration (token={configuration.token}).')
+    api_url = configuration.api_base_url
+    if not api_url:
+        raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
+    api_public_base_url = configuration.api_public_base_url
+    if not api_public_base_url:
+        raise ValueError(
+            'Apify storage client requires a valid API public base URL in Configuration '
+            f'(api_public_base_url={api_public_base_url}).'
+        )
+    # Create Apify client with the provided token and API URL.
+    return ApifyClientAsync(
+        token=configuration.token,
+        api_url=api_url,
+        api_public_url=api_public_base_url,
+        max_retries=8,
+        min_delay_between_retries_millis=500,
+        timeout_secs=360,
+    )

{apify-3.0.2b7.dist-info → apify-3.0.3.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: apify
-Version: 3.0.2b7
+Version: 3.0.3
 Summary: Apify SDK for Python
 Project-URL: Apify Homepage, https://apify.com
 Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -225,7 +225,7 @@ Classifier: Programming Language :: Python :: 3.12
 Classifier: Programming Language :: Python :: 3.13
 Classifier: Topic :: Software Development :: Libraries
 Requires-Python: >=3.10
-Requires-Dist: apify-client<3.0.0,>=2.0.0
+Requires-Dist: apify-client<3.0.0,>=2.2.0
 Requires-Dist: apify-shared<3.0.0,>=2.0.0
 Requires-Dist: cachetools>=5.5.0
 Requires-Dist: crawlee<2.0.0,>=1.0.2

{apify-3.0.2b7.dist-info → apify-3.0.3.dist-info}/RECORD RENAMED Viewed

@@ -14,7 +14,7 @@ apify/events/_apify_event_manager.py,sha256=yArFrKa4wWDZo32iwaA3F_w36VSJf1Yaj_L1
 apify/events/_types.py,sha256=F0BHgACqnRfmdQ9GUcpnZvPxzw2bdRr8BqbGSA4cHeQ,3050
 apify/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 apify/request_loaders/__init__.py,sha256=SJqW0FbdZKEtAMB5kBLgqROzh3KmQc2CNEIhJpTGdPQ,356
-apify/request_loaders/_apify_request_list.py,sha256=kurCxX2jAKzHJ5N1Co6KjIgptqgVmjR0WpT8bd6uK9A,6220
+apify/request_loaders/_apify_request_list.py,sha256=jbZTHK3ACbh4YauYVJgXHXxB3rPcots5JMcr3GdIMz8,6210
 apify/request_loaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 apify/scrapy/__init__.py,sha256=m2a0ts_JY9xJkBy4JU5mV8PJqjA3GGKLXBFu4nl-n-A,1048
 apify/scrapy/_actor_runner.py,sha256=rXWSnlQWGskDUH8PtLCv5SkOIx4AiVa4QbCYeCett5c,938
@@ -35,14 +35,14 @@ apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSu
 apify/storage_clients/__init__.py,sha256=JheTvNpVD_luQXC1KTEgtr6yVnuMEC9ajBNLCX3HuSo,358
 apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
-apify/storage_clients/_apify/_dataset_client.py,sha256=Bb3UwOaFkyuEY7tuBf8K46R4ZP_b1EaAkDOXOqwSoW8,12498
-apify/storage_clients/_apify/_key_value_store_client.py,sha256=42dARbLX2oeOW7uYYKkDyQbEriMuh55Mxh0SqvkOEGg,10529
-apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
-apify/storage_clients/_apify/_request_queue_client.py,sha256=PUIVmGQxqFTkRxW9FIFWjT0OeDyAGt-ULlW-rdQDTyc,14194
-apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=uxkuIG1rgCArgs6agldC9vmB2bgrIlNnm1I214Gf6WA,20550
-apify/storage_clients/_apify/_request_queue_single_client.py,sha256=EuORHJnFLC1YAT6ZfQj-ayrfSJNpU4_61r_7uDyvwgA,18092
+apify/storage_clients/_apify/_dataset_client.py,sha256=qmCJyL1MN83tYRXmc31P6yMIXVZMyRrGjr7R6-86FSE,11869
+apify/storage_clients/_apify/_key_value_store_client.py,sha256=994a5bM_BGHIeirnny6QlXjy5CzMU2I9SmMksCbHCUY,9357
+apify/storage_clients/_apify/_models.py,sha256=XxBru5XFdj0jqX6V-uVahT-pMQU3pZ501aTNzXCuoMU,4556
+apify/storage_clients/_apify/_request_queue_client.py,sha256=tAyap34gpxvPiQ0McDjX5ojq1ZIZc4EI3PrW8VQqS4k,13292
+apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=pWmd6aPxM-eZ6PC1MfsfTcjD2mGGpCDS3ZZ3cG_2MEA,20971
+apify/storage_clients/_apify/_request_queue_single_client.py,sha256=d2txMwxW1nlYnvjdOH8xpxhcOYNeyc1ousGHRE7jsPg,17468
 apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
-apify/storage_clients/_apify/_utils.py,sha256=ywXoSM69amRokUZcshbAvQLIcSZq4L-bpYIGyeFxCGQ,7696
+apify/storage_clients/_apify/_utils.py,sha256=375gk_TJyMWIIgRbE9SS0hQup0h6sA3mzpTG53XIjkM,8769
 apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
 apify/storage_clients/_file_system/_key_value_store_client.py,sha256=gxM3ap67PnY80Rd7P3onPAf2pksYpU0LoAlJdayEMdU,4179
@@ -51,7 +51,7 @@ apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gF
 apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
 apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
 apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-apify-3.0.2b7.dist-info/METADATA,sha256=nwfRWA3Q2QIjiKSLd-jAX3Y8j9ksLx3XA3NBeZ4MZnY,22582
-apify-3.0.2b7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-apify-3.0.2b7.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
-apify-3.0.2b7.dist-info/RECORD,,
+apify-3.0.3.dist-info/METADATA,sha256=BghvJ18oGZT-KqyoV3wlC6WdJ-rVupfzSUaWXLix2-k,22580
+apify-3.0.3.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+apify-3.0.3.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
+apify-3.0.3.dist-info/RECORD,,

{apify-3.0.2b7.dist-info → apify-3.0.3.dist-info}/WHEEL RENAMED Viewed

File without changes

{apify-3.0.2b7.dist-info → apify-3.0.3.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

apify 3.0.2b7__py3-none-any.whl → 3.0.3__py3-none-any.whl

Potentially problematic release.

apify 3.0.2b7py3-none-any.whl → 3.0.3py3-none-any.whl