apify 3.0.2b6__py3-none-any.whl → 3.0.2b7__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

@@ -12,8 +12,8 @@ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest
12
12
  from crawlee.storages import RequestQueue
13
13
 
14
14
  from ._models import ApifyRequestQueueMetadata, RequestQueueStats
15
- from ._request_queue_shared_client import _ApifyRequestQueueSharedClient
16
- from ._request_queue_single_client import _ApifyRequestQueueSingleClient
15
+ from ._request_queue_shared_client import ApifyRequestQueueSharedClient
16
+ from ._request_queue_single_client import ApifyRequestQueueSingleClient
17
17
  from ._utils import AliasResolver
18
18
 
19
19
  if TYPE_CHECKING:
@@ -47,14 +47,14 @@ class ApifyRequestQueueClient(RequestQueueClient):
47
47
  self._api_client = api_client
48
48
  """The Apify request queue client for API operations."""
49
49
 
50
- self._implementation: _ApifyRequestQueueSingleClient | _ApifyRequestQueueSharedClient
50
+ self._implementation: ApifyRequestQueueSingleClient | ApifyRequestQueueSharedClient
51
51
  """Internal implementation used to communicate with the Apify platform based Request Queue."""
52
52
  if access == 'single':
53
- self._implementation = _ApifyRequestQueueSingleClient(
53
+ self._implementation = ApifyRequestQueueSingleClient(
54
54
  api_client=self._api_client, metadata=metadata, cache_size=self._MAX_CACHED_REQUESTS
55
55
  )
56
56
  elif access == 'shared':
57
- self._implementation = _ApifyRequestQueueSharedClient(
57
+ self._implementation = ApifyRequestQueueSharedClient(
58
58
  api_client=self._api_client,
59
59
  metadata=metadata,
60
60
  cache_size=self._MAX_CACHED_REQUESTS,
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
23
23
  logger = getLogger(__name__)
24
24
 
25
25
 
26
- class _ApifyRequestQueueSharedClient:
26
+ class ApifyRequestQueueSharedClient:
27
27
  """An Apify platform implementation of the request queue client.
28
28
 
29
29
  This implementation supports multiple producers and multiple consumers scenario.
@@ -106,23 +106,19 @@ class _ApifyRequestQueueSharedClient:
106
106
  # It could have been handled by another client in the meantime, so cached information about
107
107
  # `request.was_already_handled` is not reliable.
108
108
  already_present_requests.append(
109
- ProcessedRequest.model_validate(
110
- {
111
- 'uniqueKey': request.unique_key,
112
- 'wasAlreadyPresent': True,
113
- 'wasAlreadyHandled': request.was_already_handled,
114
- }
109
+ ProcessedRequest(
110
+ unique_key=request.unique_key,
111
+ was_already_present=True,
112
+ was_already_handled=request.was_already_handled,
115
113
  )
116
114
  )
117
115
 
118
116
  else:
119
117
  # Add new request to the cache.
120
- processed_request = ProcessedRequest.model_validate(
121
- {
122
- 'uniqueKey': request.unique_key,
123
- 'wasAlreadyPresent': True,
124
- 'wasAlreadyHandled': request.was_already_handled,
125
- }
118
+ processed_request = ProcessedRequest(
119
+ unique_key=request.unique_key,
120
+ was_already_present=True,
121
+ was_already_handled=request.was_already_handled,
126
122
  )
127
123
  self._cache_request(
128
124
  request.unique_key,
@@ -21,7 +21,7 @@ if TYPE_CHECKING:
21
21
  logger = getLogger(__name__)
22
22
 
23
23
 
24
- class _ApifyRequestQueueSingleClient:
24
+ class ApifyRequestQueueSingleClient:
25
25
  """An Apify platform implementation of the request queue client with limited capability.
26
26
 
27
27
  This client is designed to use as little resources as possible, but has to be used in constrained context.
@@ -108,23 +108,19 @@ class _ApifyRequestQueueSingleClient:
108
108
  # Check if request is known to be already handled (it has to be present as well.)
109
109
  if request.unique_key in self._requests_already_handled:
110
110
  already_present_requests.append(
111
- ProcessedRequest.model_validate(
112
- {
113
- 'uniqueKey': request.unique_key,
114
- 'wasAlreadyPresent': True,
115
- 'wasAlreadyHandled': True,
116
- }
111
+ ProcessedRequest(
112
+ unique_key=request.unique_key,
113
+ was_already_present=True,
114
+ was_already_handled=True,
117
115
  )
118
116
  )
119
117
  # Check if request is known to be already present, but unhandled
120
118
  elif self._requests_cache.get(request.unique_key):
121
119
  already_present_requests.append(
122
- ProcessedRequest.model_validate(
123
- {
124
- 'uniqueKey': request.unique_key,
125
- 'wasAlreadyPresent': True,
126
- 'wasAlreadyHandled': request.was_already_handled,
127
- }
120
+ ProcessedRequest(
121
+ unique_key=request.unique_key,
122
+ was_already_present=True,
123
+ was_already_handled=request.was_already_handled,
128
124
  )
129
125
  )
130
126
  else:
@@ -158,8 +154,9 @@ class _ApifyRequestQueueSingleClient:
158
154
  self._requests_cache.pop(unprocessed_request.unique_key, None)
159
155
 
160
156
  else:
161
- api_response = AddRequestsResponse.model_validate(
162
- {'unprocessedRequests': [], 'processedRequests': already_present_requests}
157
+ api_response = AddRequestsResponse(
158
+ unprocessed_requests=[],
159
+ processed_requests=already_present_requests,
163
160
  )
164
161
 
165
162
  # Update assumed total count for newly added requests.
@@ -236,11 +233,20 @@ class _ApifyRequestQueueSingleClient:
236
233
 
237
234
  # Update the cached data
238
235
  for request_data in response.get('items', []):
236
+ # Due to https://github.com/apify/apify-core/blob/v0.1377.0/src/api/src/lib/request_queues/request_queue.ts#L53,
237
+ # the list_head endpoint may return truncated fields for long requests (e.g., long URLs or unique keys).
238
+ # If truncation is detected, fetch the full request data by its ID from the API.
239
+ # This is a temporary workaround - the caching will be refactored to use request IDs instead of unique keys.
240
+ # See https://github.com/apify/apify-sdk-python/issues/630 for details.
241
+ if '[truncated]' in request_data['uniqueKey'] or '[truncated]' in request_data['url']:
242
+ request_data = await self._api_client.get_request(request_id=request_data['id']) # noqa: PLW2901
243
+
239
244
  request = Request.model_validate(request_data)
240
245
 
241
246
  if request.unique_key in self._requests_in_progress:
242
247
  # Ignore requests that are already in progress, we will not process them again.
243
248
  continue
249
+
244
250
  if request.was_already_handled:
245
251
  # Do not cache fully handled requests, we do not need them. Just cache their unique_key.
246
252
  self._requests_already_handled.add(request.unique_key)
@@ -248,16 +254,20 @@ class _ApifyRequestQueueSingleClient:
248
254
  # Only fetch the request if we do not know it yet.
249
255
  if request.unique_key not in self._requests_cache:
250
256
  request_id = unique_key_to_request_id(request.unique_key)
251
- complete_request_data = await self._api_client.get_request(request_id)
252
257
 
253
- if complete_request_data is not None:
254
- request = Request.model_validate(complete_request_data)
255
- self._requests_cache[request.unique_key] = request
256
- else:
258
+ if request_data is not None and request_id != request_data['id']:
257
259
  logger.warning(
258
- f'Could not fetch request data for unique_key=`{request.unique_key}` (id=`{request_id}`)'
260
+ f'Request ID mismatch: {request_id} != {request_data["id"]}, '
261
+ 'this may cause unexpected behavior.'
259
262
  )
260
263
 
264
+ # See https://github.com/apify/apify-sdk-python/issues/630 for details.
265
+ if '[truncated]' not in request.unique_key:
266
+ request_data = await self._api_client.get_request(request_id=request_id) # noqa: PLW2901
267
+ request = Request.model_validate(request_data)
268
+
269
+ self._requests_cache[request.unique_key] = request
270
+
261
271
  # Add new requests to the end of the head, unless already present in head
262
272
  if request.unique_key not in self._head_requests:
263
273
  self._head_requests.appendleft(request.unique_key)
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 3.0.2b6
3
+ Version: 3.0.2b7
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -38,9 +38,9 @@ apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBs
38
38
  apify/storage_clients/_apify/_dataset_client.py,sha256=Bb3UwOaFkyuEY7tuBf8K46R4ZP_b1EaAkDOXOqwSoW8,12498
39
39
  apify/storage_clients/_apify/_key_value_store_client.py,sha256=42dARbLX2oeOW7uYYKkDyQbEriMuh55Mxh0SqvkOEGg,10529
40
40
  apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
41
- apify/storage_clients/_apify/_request_queue_client.py,sha256=QXCLdTBeNW8RKWnxQOE71KOpZ_lqvqisa89eeiWwZ38,14200
42
- apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=CbvwcXRvfuBoy3wrQEdLX9_vKELPH_WhHQARP14audM,20709
43
- apify/storage_clients/_apify/_request_queue_single_client.py,sha256=bQMebZKyeN_gYF1ZcHcaWng_q0m41KsiNvV1YRyzD3M,17299
41
+ apify/storage_clients/_apify/_request_queue_client.py,sha256=PUIVmGQxqFTkRxW9FIFWjT0OeDyAGt-ULlW-rdQDTyc,14194
42
+ apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=uxkuIG1rgCArgs6agldC9vmB2bgrIlNnm1I214Gf6WA,20550
43
+ apify/storage_clients/_apify/_request_queue_single_client.py,sha256=EuORHJnFLC1YAT6ZfQj-ayrfSJNpU4_61r_7uDyvwgA,18092
44
44
  apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
45
45
  apify/storage_clients/_apify/_utils.py,sha256=ywXoSM69amRokUZcshbAvQLIcSZq4L-bpYIGyeFxCGQ,7696
46
46
  apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
@@ -51,7 +51,7 @@ apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gF
51
51
  apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
52
52
  apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
53
53
  apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
54
- apify-3.0.2b6.dist-info/METADATA,sha256=VvY5YhIVBaPQf2fk7f62zq8RW0ss9R8mwqJZV2Wwchk,22582
55
- apify-3.0.2b6.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
- apify-3.0.2b6.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
57
- apify-3.0.2b6.dist-info/RECORD,,
54
+ apify-3.0.2b7.dist-info/METADATA,sha256=nwfRWA3Q2QIjiKSLd-jAX3Y8j9ksLx3XA3NBeZ4MZnY,22582
55
+ apify-3.0.2b7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
56
+ apify-3.0.2b7.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
57
+ apify-3.0.2b7.dist-info/RECORD,,