PyPI - apify - Versions diffs - 2.7.1b8__py3-none-any.whl → 2.7.1b9__py3-none-any.whl - Mend

apify 2.7.1b8py3-none-any.whl → 2.7.1b9py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.

This version of apify might be problematic. Click here for more details.

Files changed (5) hide show

apify/storage_clients/_apify/_request_queue_client.py CHANGED Viewed

@@ -13,6 +13,7 @@ from cachetools import LRUCache
 from typing_extensions import override
 from apify_client import ApifyClientAsync
+from crawlee._utils.crypto import crypto_random_object_id
 from crawlee.storage_clients._base import RequestQueueClient
 from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
@@ -65,10 +66,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
         self,
         *,
         api_client: RequestQueueClientAsync,
-        id: str,
-        name: str | None,
-        total_request_count: int,
-        handled_request_count: int,
+        metadata: RequestQueueMetadata,
     ) -> None:
         """Initialize a new instance.
@@ -77,11 +75,8 @@ class ApifyRequestQueueClient(RequestQueueClient):
         self._api_client = api_client
         """The Apify request queue client for API operations."""
-        self._id = id
-        """The ID of the request queue."""
-        self._name = name
-        """The name of the request queue."""
+        self._metadata = metadata
+        """Additional data related to the RequestQueue."""
         self._queue_head = deque[str]()
         """A deque to store request unique keys in the queue head."""
@@ -95,40 +90,43 @@ class ApifyRequestQueueClient(RequestQueueClient):
         self._should_check_for_forefront_requests = False
         """Whether to check for forefront requests in the next list_head call."""
-        self._had_multiple_clients = False
-        """Whether the request queue has been accessed by multiple clients."""
-        self._initial_total_count = total_request_count
-        """The initial total request count (from the API) when the queue was opened."""
-        self._initial_handled_count = handled_request_count
-        """The initial handled request count (from the API) when the queue was opened."""
-        self._assumed_total_count = 0
-        """The number of requests we assume are in the queue (tracked manually for this instance)."""
-        self._assumed_handled_count = 0
-        """The number of requests we assume have been handled (tracked manually for this instance)."""
         self._fetch_lock = asyncio.Lock()
         """Fetch lock to minimize race conditions when communicating with API."""
+    async def _get_metadata_estimate(self) -> RequestQueueMetadata:
+        """Try to get cached metadata first. If multiple clients, fuse with global metadata.
+        This method is used internally to avoid unnecessary API call unless needed (multiple clients).
+        Local estimation of metadata is without delay, unlike metadata from API. In situation where there is only one
+        client, it is the better choice.
+        """
+        if self._metadata.had_multiple_clients:
+            return await self.get_metadata()
+        # Get local estimation (will not include changes done bo another client)
+        return self._metadata
     @override
     async def get_metadata(self) -> RequestQueueMetadata:
-        total_count = self._initial_total_count + self._assumed_total_count
-        handled_count = self._initial_handled_count + self._assumed_handled_count
-        pending_count = total_count - handled_count
+        """Get metadata about the request queue.
+        Returns:
+            Metadata from the API, merged with local estimation, because in some cases, the data from the API can
+            be delayed.
+        """
+        response = await self._api_client.get()
+        if response is None:
+            raise ValueError('Failed to fetch request queue metadata from the API.')
+        # Enhance API response by local estimations (API can be delayed few seconds, while local estimation not.)
         return RequestQueueMetadata(
-            id=self._id,
-            name=self._name,
-            total_request_count=total_count,
-            handled_request_count=handled_count,
-            pending_request_count=pending_count,
-            created_at=datetime.now(timezone.utc),
-            modified_at=datetime.now(timezone.utc),
-            accessed_at=datetime.now(timezone.utc),
-            had_multiple_clients=self._had_multiple_clients,
+            id=response['id'],
+            name=response['name'],
+            total_request_count=max(response['totalRequestCount'], self._metadata.total_request_count),
+            handled_request_count=max(response['handledRequestCount'], self._metadata.handled_request_count),
+            pending_request_count=response['pendingRequestCount'],
+            created_at=min(response['createdAt'], self._metadata.created_at),
+            modified_at=max(response['modifiedAt'], self._metadata.modified_at),
+            accessed_at=max(response['accessedAt'], self._metadata.accessed_at),
+            had_multiple_clients=response['hadMultipleClients'] or self._metadata.had_multiple_clients,
         )
     @classmethod
@@ -187,27 +185,34 @@ class ApifyRequestQueueClient(RequestQueueClient):
         )
         apify_rqs_client = apify_client_async.request_queues()
-        # If both id and name are provided, raise an error.
-        if id and name:
-            raise ValueError('Only one of "id" or "name" can be specified, not both.')
-        # If id is provided, get the storage by ID.
-        if id and name is None:
-            apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
+        match (id, name):
+            case (None, None):
+                # If both id and name are None, try to get the default storage ID from environment variables.
+                # The default storage ID environment variable is set by the Apify platform. It also contains
+                # a new storage ID after Actor's reboot or migration.
+                id = configuration.default_request_queue_id
+            case (None, name):
+                # If only name is provided, get or create the storage by name.
+                id = RequestQueueMetadata.model_validate(
+                    await apify_rqs_client.get_or_create(name=name),
+                ).id
+            case (_, None):
+                # If only id is provided, use it.
+                pass
+            case (_, _):
+                # If both id and name are provided, raise an error.
+                raise ValueError('Only one of "id" or "name" can be specified, not both.')
+        if id is None:
+            raise RuntimeError('Unreachable code')
-        # If name is provided, get or create the storage by name.
-        if name and id is None:
-            id = RequestQueueMetadata.model_validate(
-                await apify_rqs_client.get_or_create(name=name),
-            ).id
-            apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
+        # Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
+        # It should persist across migrated or resurrected Actor runs on the Apify platform.
+        _api_max_client_key_length = 32
+        client_key = (configuration.actor_run_id or crypto_random_object_id(length=_api_max_client_key_length))[
+            :_api_max_client_key_length
+        ]
-        # If both id and name are None, try to get the default storage ID from environment variables.
-        # The default storage ID environment variable is set by the Apify platform. It also contains
-        # a new storage ID after Actor's reboot or migration.
-        if id is None and name is None:
-            id = configuration.default_request_queue_id
-            apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
+        apify_rq_client = apify_client_async.request_queue(request_queue_id=id, client_key=client_key)
         # Fetch its metadata.
         metadata = await apify_rq_client.get()
@@ -217,27 +222,18 @@ class ApifyRequestQueueClient(RequestQueueClient):
             id = RequestQueueMetadata.model_validate(
                 await apify_rqs_client.get_or_create(),
             ).id
-            apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
+            apify_rq_client = apify_client_async.request_queue(request_queue_id=id, client_key=client_key)
         # Verify that the storage exists by fetching its metadata again.
         metadata = await apify_rq_client.get()
         if metadata is None:
             raise ValueError(f'Opening request queue with id={id} and name={name} failed.')
-        metadata_model = RequestQueueMetadata.model_validate(
-            await apify_rqs_client.get_or_create(),
-        )
-        # Ensure we have a valid ID.
-        if id is None:
-            raise ValueError('Request queue ID cannot be None.')
+        metadata_model = RequestQueueMetadata.model_validate(metadata)
         return cls(
             api_client=apify_rq_client,
-            id=id,
-            name=name,
-            total_request_count=metadata_model.total_request_count,
-            handled_request_count=metadata_model.handled_request_count,
+            metadata=metadata_model,
         )
     @override
@@ -341,7 +337,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
             if not processed_request.was_already_present and not processed_request.was_already_handled:
                 new_request_count += 1
-        self._assumed_total_count += new_request_count
+        self._metadata.total_request_count += new_request_count
         return api_response
@@ -439,7 +435,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
             # Update assumed handled count if this wasn't already handled
             if not processed_request.was_already_handled:
-                self._assumed_handled_count += 1
+                self._metadata.handled_request_count += 1
             # Update the cache with the handled request
             cache_key = request.unique_key
@@ -487,7 +483,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
                 # If the request was previously handled, decrement our handled count since
                 # we're putting it back for processing.
                 if request.was_already_handled and not processed_request.was_already_handled:
-                    self._assumed_handled_count -= 1
+                    self._metadata.handled_request_count -= 1
                 # Update the cache
                 cache_key = request.unique_key
@@ -539,7 +535,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
         """Get a request by unique key, either from cache or by fetching from API.
         Args:
-            unique_key: Unique keu of the request to get.
+            unique_key: Unique key of the request to get.
         Returns:
             The request if found and valid, otherwise None.
@@ -645,7 +641,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
                 if cached_request and cached_request.hydrated:
                     items.append(cached_request.hydrated)
-            metadata = await self.get_metadata()
+            metadata = await self._get_metadata_estimate()
             return RequestQueueHead(
                 limit=limit,
@@ -672,6 +668,8 @@ class ApifyRequestQueueClient(RequestQueueClient):
         # Update the queue head cache
         self._queue_has_locked_requests = response.get('queueHasLockedRequests', False)
+        # Check if there is another client working with the RequestQueue
+        self._metadata.had_multiple_clients = response.get('hadMultipleClients', False)
         for request_data in response.get('items', []):
             request = Request.model_validate(request_data)

{apify-2.7.1b8.dist-info → apify-2.7.1b9.dist-info}/METADATA RENAMED Viewed

@@ -1,6 +1,6 @@
 Metadata-Version: 2.4
 Name: apify
-Version: 2.7.1b8
+Version: 2.7.1b9
 Summary: Apify SDK for Python
 Project-URL: Apify Homepage, https://apify.com
 Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog

{apify-2.7.1b8.dist-info → apify-2.7.1b9.dist-info}/RECORD RENAMED Viewed

@@ -38,7 +38,7 @@ apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBs
 apify/storage_clients/_apify/_dataset_client.py,sha256=8ZQvbtXZm54-V0Ukio0Z4jVI2gGkfqzZ59GlBQJXGUU,11485
 apify/storage_clients/_apify/_key_value_store_client.py,sha256=WbyzDCFmJS2hd_7ddYL3JEO9zvjUAAE1D_F4kohiim4,9455
 apify/storage_clients/_apify/_models.py,sha256=C6FpXswtO6kXE5RUumazm_conzJJS6PrXAGF9XBuDb8,3651
-apify/storage_clients/_apify/_request_queue_client.py,sha256=sMinB02V9236PH7fQTQ0AIwH6oObnZv2ivzKBEgaLOk,31372
+apify/storage_clients/_apify/_request_queue_client.py,sha256=LuKH_7Y9TMU1qtSagWRPsrb5aKcAIp3dkupS9W4615o,32117
 apify/storage_clients/_apify/_storage_client.py,sha256=5me6gHOeNAG3JaHxKRdzsZaa3FsqLDbObjhECGGWrr4,2890
 apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
 apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
@@ -46,7 +46,7 @@ apify/storage_clients/_file_system/_key_value_store_client.py,sha256=DHDv_e0kFwh
 apify/storage_clients/_file_system/_storage_client.py,sha256=UwxuSvhbyQ7zR1db1hTmZ1h38yH7btHNp82X7e8MWWE,1290
 apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
 apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
-apify-2.7.1b8.dist-info/METADATA,sha256=9OY09tUOf00U-K6_kvD63Vs8DKsDEGJs4SPTaPhwzq0,21800
-apify-2.7.1b8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
-apify-2.7.1b8.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
-apify-2.7.1b8.dist-info/RECORD,,
+apify-2.7.1b9.dist-info/METADATA,sha256=bPUGmcC2s7uUbJoz7SGfiMRCpYoiuc16FPNBlD-5f2k,21800
+apify-2.7.1b9.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
+apify-2.7.1b9.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
+apify-2.7.1b9.dist-info/RECORD,,

{apify-2.7.1b8.dist-info → apify-2.7.1b9.dist-info}/WHEEL RENAMED Viewed

File without changes

{apify-2.7.1b8.dist-info → apify-2.7.1b9.dist-info}/licenses/LICENSE RENAMED Viewed

File without changes

apify 2.7.1b8__py3-none-any.whl → 2.7.1b9__py3-none-any.whl

Potentially problematic release.

apify 2.7.1b8py3-none-any.whl → 2.7.1b9py3-none-any.whl