apify 3.0.2b6__py3-none-any.whl → 3.0.2b7__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/storage_clients/_apify/_request_queue_client.py +5 -5
- apify/storage_clients/_apify/_request_queue_shared_client.py +9 -13
- apify/storage_clients/_apify/_request_queue_single_client.py +31 -21
- {apify-3.0.2b6.dist-info → apify-3.0.2b7.dist-info}/METADATA +1 -1
- {apify-3.0.2b6.dist-info → apify-3.0.2b7.dist-info}/RECORD +7 -7
- {apify-3.0.2b6.dist-info → apify-3.0.2b7.dist-info}/WHEEL +0 -0
- {apify-3.0.2b6.dist-info → apify-3.0.2b7.dist-info}/licenses/LICENSE +0 -0
|
@@ -12,8 +12,8 @@ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest
|
|
|
12
12
|
from crawlee.storages import RequestQueue
|
|
13
13
|
|
|
14
14
|
from ._models import ApifyRequestQueueMetadata, RequestQueueStats
|
|
15
|
-
from ._request_queue_shared_client import
|
|
16
|
-
from ._request_queue_single_client import
|
|
15
|
+
from ._request_queue_shared_client import ApifyRequestQueueSharedClient
|
|
16
|
+
from ._request_queue_single_client import ApifyRequestQueueSingleClient
|
|
17
17
|
from ._utils import AliasResolver
|
|
18
18
|
|
|
19
19
|
if TYPE_CHECKING:
|
|
@@ -47,14 +47,14 @@ class ApifyRequestQueueClient(RequestQueueClient):
|
|
|
47
47
|
self._api_client = api_client
|
|
48
48
|
"""The Apify request queue client for API operations."""
|
|
49
49
|
|
|
50
|
-
self._implementation:
|
|
50
|
+
self._implementation: ApifyRequestQueueSingleClient | ApifyRequestQueueSharedClient
|
|
51
51
|
"""Internal implementation used to communicate with the Apify platform based Request Queue."""
|
|
52
52
|
if access == 'single':
|
|
53
|
-
self._implementation =
|
|
53
|
+
self._implementation = ApifyRequestQueueSingleClient(
|
|
54
54
|
api_client=self._api_client, metadata=metadata, cache_size=self._MAX_CACHED_REQUESTS
|
|
55
55
|
)
|
|
56
56
|
elif access == 'shared':
|
|
57
|
-
self._implementation =
|
|
57
|
+
self._implementation = ApifyRequestQueueSharedClient(
|
|
58
58
|
api_client=self._api_client,
|
|
59
59
|
metadata=metadata,
|
|
60
60
|
cache_size=self._MAX_CACHED_REQUESTS,
|
|
@@ -23,7 +23,7 @@ if TYPE_CHECKING:
|
|
|
23
23
|
logger = getLogger(__name__)
|
|
24
24
|
|
|
25
25
|
|
|
26
|
-
class
|
|
26
|
+
class ApifyRequestQueueSharedClient:
|
|
27
27
|
"""An Apify platform implementation of the request queue client.
|
|
28
28
|
|
|
29
29
|
This implementation supports multiple producers and multiple consumers scenario.
|
|
@@ -106,23 +106,19 @@ class _ApifyRequestQueueSharedClient:
|
|
|
106
106
|
# It could have been handled by another client in the meantime, so cached information about
|
|
107
107
|
# `request.was_already_handled` is not reliable.
|
|
108
108
|
already_present_requests.append(
|
|
109
|
-
ProcessedRequest
|
|
110
|
-
|
|
111
|
-
|
|
112
|
-
|
|
113
|
-
'wasAlreadyHandled': request.was_already_handled,
|
|
114
|
-
}
|
|
109
|
+
ProcessedRequest(
|
|
110
|
+
unique_key=request.unique_key,
|
|
111
|
+
was_already_present=True,
|
|
112
|
+
was_already_handled=request.was_already_handled,
|
|
115
113
|
)
|
|
116
114
|
)
|
|
117
115
|
|
|
118
116
|
else:
|
|
119
117
|
# Add new request to the cache.
|
|
120
|
-
processed_request = ProcessedRequest
|
|
121
|
-
|
|
122
|
-
|
|
123
|
-
|
|
124
|
-
'wasAlreadyHandled': request.was_already_handled,
|
|
125
|
-
}
|
|
118
|
+
processed_request = ProcessedRequest(
|
|
119
|
+
unique_key=request.unique_key,
|
|
120
|
+
was_already_present=True,
|
|
121
|
+
was_already_handled=request.was_already_handled,
|
|
126
122
|
)
|
|
127
123
|
self._cache_request(
|
|
128
124
|
request.unique_key,
|
|
@@ -21,7 +21,7 @@ if TYPE_CHECKING:
|
|
|
21
21
|
logger = getLogger(__name__)
|
|
22
22
|
|
|
23
23
|
|
|
24
|
-
class
|
|
24
|
+
class ApifyRequestQueueSingleClient:
|
|
25
25
|
"""An Apify platform implementation of the request queue client with limited capability.
|
|
26
26
|
|
|
27
27
|
This client is designed to use as little resources as possible, but has to be used in constrained context.
|
|
@@ -108,23 +108,19 @@ class _ApifyRequestQueueSingleClient:
|
|
|
108
108
|
# Check if request is known to be already handled (it has to be present as well.)
|
|
109
109
|
if request.unique_key in self._requests_already_handled:
|
|
110
110
|
already_present_requests.append(
|
|
111
|
-
ProcessedRequest
|
|
112
|
-
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
'wasAlreadyHandled': True,
|
|
116
|
-
}
|
|
111
|
+
ProcessedRequest(
|
|
112
|
+
unique_key=request.unique_key,
|
|
113
|
+
was_already_present=True,
|
|
114
|
+
was_already_handled=True,
|
|
117
115
|
)
|
|
118
116
|
)
|
|
119
117
|
# Check if request is known to be already present, but unhandled
|
|
120
118
|
elif self._requests_cache.get(request.unique_key):
|
|
121
119
|
already_present_requests.append(
|
|
122
|
-
ProcessedRequest
|
|
123
|
-
|
|
124
|
-
|
|
125
|
-
|
|
126
|
-
'wasAlreadyHandled': request.was_already_handled,
|
|
127
|
-
}
|
|
120
|
+
ProcessedRequest(
|
|
121
|
+
unique_key=request.unique_key,
|
|
122
|
+
was_already_present=True,
|
|
123
|
+
was_already_handled=request.was_already_handled,
|
|
128
124
|
)
|
|
129
125
|
)
|
|
130
126
|
else:
|
|
@@ -158,8 +154,9 @@ class _ApifyRequestQueueSingleClient:
|
|
|
158
154
|
self._requests_cache.pop(unprocessed_request.unique_key, None)
|
|
159
155
|
|
|
160
156
|
else:
|
|
161
|
-
api_response = AddRequestsResponse
|
|
162
|
-
|
|
157
|
+
api_response = AddRequestsResponse(
|
|
158
|
+
unprocessed_requests=[],
|
|
159
|
+
processed_requests=already_present_requests,
|
|
163
160
|
)
|
|
164
161
|
|
|
165
162
|
# Update assumed total count for newly added requests.
|
|
@@ -236,11 +233,20 @@ class _ApifyRequestQueueSingleClient:
|
|
|
236
233
|
|
|
237
234
|
# Update the cached data
|
|
238
235
|
for request_data in response.get('items', []):
|
|
236
|
+
# Due to https://github.com/apify/apify-core/blob/v0.1377.0/src/api/src/lib/request_queues/request_queue.ts#L53,
|
|
237
|
+
# the list_head endpoint may return truncated fields for long requests (e.g., long URLs or unique keys).
|
|
238
|
+
# If truncation is detected, fetch the full request data by its ID from the API.
|
|
239
|
+
# This is a temporary workaround - the caching will be refactored to use request IDs instead of unique keys.
|
|
240
|
+
# See https://github.com/apify/apify-sdk-python/issues/630 for details.
|
|
241
|
+
if '[truncated]' in request_data['uniqueKey'] or '[truncated]' in request_data['url']:
|
|
242
|
+
request_data = await self._api_client.get_request(request_id=request_data['id']) # noqa: PLW2901
|
|
243
|
+
|
|
239
244
|
request = Request.model_validate(request_data)
|
|
240
245
|
|
|
241
246
|
if request.unique_key in self._requests_in_progress:
|
|
242
247
|
# Ignore requests that are already in progress, we will not process them again.
|
|
243
248
|
continue
|
|
249
|
+
|
|
244
250
|
if request.was_already_handled:
|
|
245
251
|
# Do not cache fully handled requests, we do not need them. Just cache their unique_key.
|
|
246
252
|
self._requests_already_handled.add(request.unique_key)
|
|
@@ -248,16 +254,20 @@ class _ApifyRequestQueueSingleClient:
|
|
|
248
254
|
# Only fetch the request if we do not know it yet.
|
|
249
255
|
if request.unique_key not in self._requests_cache:
|
|
250
256
|
request_id = unique_key_to_request_id(request.unique_key)
|
|
251
|
-
complete_request_data = await self._api_client.get_request(request_id)
|
|
252
257
|
|
|
253
|
-
if
|
|
254
|
-
request = Request.model_validate(complete_request_data)
|
|
255
|
-
self._requests_cache[request.unique_key] = request
|
|
256
|
-
else:
|
|
258
|
+
if request_data is not None and request_id != request_data['id']:
|
|
257
259
|
logger.warning(
|
|
258
|
-
f'
|
|
260
|
+
f'Request ID mismatch: {request_id} != {request_data["id"]}, '
|
|
261
|
+
'this may cause unexpected behavior.'
|
|
259
262
|
)
|
|
260
263
|
|
|
264
|
+
# See https://github.com/apify/apify-sdk-python/issues/630 for details.
|
|
265
|
+
if '[truncated]' not in request.unique_key:
|
|
266
|
+
request_data = await self._api_client.get_request(request_id=request_id) # noqa: PLW2901
|
|
267
|
+
request = Request.model_validate(request_data)
|
|
268
|
+
|
|
269
|
+
self._requests_cache[request.unique_key] = request
|
|
270
|
+
|
|
261
271
|
# Add new requests to the end of the head, unless already present in head
|
|
262
272
|
if request.unique_key not in self._head_requests:
|
|
263
273
|
self._head_requests.appendleft(request.unique_key)
|
|
@@ -38,9 +38,9 @@ apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBs
|
|
|
38
38
|
apify/storage_clients/_apify/_dataset_client.py,sha256=Bb3UwOaFkyuEY7tuBf8K46R4ZP_b1EaAkDOXOqwSoW8,12498
|
|
39
39
|
apify/storage_clients/_apify/_key_value_store_client.py,sha256=42dARbLX2oeOW7uYYKkDyQbEriMuh55Mxh0SqvkOEGg,10529
|
|
40
40
|
apify/storage_clients/_apify/_models.py,sha256=GEaN7Got1zIg42QPH36obHRWRDVNtzOkRuOWYRf9bFU,4572
|
|
41
|
-
apify/storage_clients/_apify/_request_queue_client.py,sha256=
|
|
42
|
-
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=
|
|
43
|
-
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=
|
|
41
|
+
apify/storage_clients/_apify/_request_queue_client.py,sha256=PUIVmGQxqFTkRxW9FIFWjT0OeDyAGt-ULlW-rdQDTyc,14194
|
|
42
|
+
apify/storage_clients/_apify/_request_queue_shared_client.py,sha256=uxkuIG1rgCArgs6agldC9vmB2bgrIlNnm1I214Gf6WA,20550
|
|
43
|
+
apify/storage_clients/_apify/_request_queue_single_client.py,sha256=EuORHJnFLC1YAT6ZfQj-ayrfSJNpU4_61r_7uDyvwgA,18092
|
|
44
44
|
apify/storage_clients/_apify/_storage_client.py,sha256=hFl_PuX1UgOydBD6pieZ0u2NWbDmZV-i0qygKdsuHt4,4873
|
|
45
45
|
apify/storage_clients/_apify/_utils.py,sha256=ywXoSM69amRokUZcshbAvQLIcSZq4L-bpYIGyeFxCGQ,7696
|
|
46
46
|
apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
@@ -51,7 +51,7 @@ apify/storage_clients/_smart_apify/__init__.py,sha256=614B2AaWY-dx6RQ6mod7VVR8gF
|
|
|
51
51
|
apify/storage_clients/_smart_apify/_storage_client.py,sha256=GCPmVe_xWAFcO2Cuej4su4i97_d33Q9Ih_Sc5xW2Wa4,4674
|
|
52
52
|
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
53
53
|
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
54
|
-
apify-3.0.
|
|
55
|
-
apify-3.0.
|
|
56
|
-
apify-3.0.
|
|
57
|
-
apify-3.0.
|
|
54
|
+
apify-3.0.2b7.dist-info/METADATA,sha256=nwfRWA3Q2QIjiKSLd-jAX3Y8j9ksLx3XA3NBeZ4MZnY,22582
|
|
55
|
+
apify-3.0.2b7.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
56
|
+
apify-3.0.2b7.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
57
|
+
apify-3.0.2b7.dist-info/RECORD,,
|
|
File without changes
|
|
File without changes
|