apify 2.7.2__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (46) hide show
  1. apify/_actor.py +47 -12
  2. apify/_charging.py +15 -9
  3. apify/_configuration.py +43 -1
  4. apify/_crypto.py +0 -6
  5. apify/_models.py +7 -7
  6. apify/_proxy_configuration.py +10 -10
  7. apify/_utils.py +25 -2
  8. apify/events/__init__.py +5 -0
  9. apify/events/_apify_event_manager.py +140 -0
  10. apify/events/_types.py +102 -0
  11. apify/log.py +0 -9
  12. apify/request_loaders/__init__.py +18 -0
  13. apify/{storages/_request_list.py → request_loaders/_apify_request_list.py} +25 -18
  14. apify/request_loaders/py.typed +0 -0
  15. apify/scrapy/_logging_config.py +1 -4
  16. apify/scrapy/extensions/_httpcache.py +9 -5
  17. apify/scrapy/requests.py +3 -3
  18. apify/scrapy/scheduler.py +8 -5
  19. apify/storage_clients/__init__.py +10 -0
  20. apify/storage_clients/_apify/__init__.py +11 -0
  21. apify/storage_clients/_apify/_dataset_client.py +304 -0
  22. apify/storage_clients/_apify/_key_value_store_client.py +241 -0
  23. apify/storage_clients/_apify/_models.py +107 -0
  24. apify/storage_clients/_apify/_request_queue_client.py +787 -0
  25. apify/storage_clients/_apify/_storage_client.py +80 -0
  26. apify/storage_clients/_apify/py.typed +0 -0
  27. apify/storage_clients/_file_system/__init__.py +2 -0
  28. apify/storage_clients/_file_system/_key_value_store_client.py +36 -0
  29. apify/storage_clients/_file_system/_storage_client.py +35 -0
  30. apify/storage_clients/py.typed +0 -0
  31. apify/storages/__init__.py +1 -3
  32. {apify-2.7.2.dist-info → apify-3.0.0rc1.dist-info}/METADATA +8 -7
  33. apify-3.0.0rc1.dist-info/RECORD +52 -0
  34. apify/_platform_event_manager.py +0 -231
  35. apify/apify_storage_client/__init__.py +0 -3
  36. apify/apify_storage_client/_apify_storage_client.py +0 -72
  37. apify/apify_storage_client/_dataset_client.py +0 -190
  38. apify/apify_storage_client/_dataset_collection_client.py +0 -51
  39. apify/apify_storage_client/_key_value_store_client.py +0 -109
  40. apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
  41. apify/apify_storage_client/_request_queue_client.py +0 -176
  42. apify/apify_storage_client/_request_queue_collection_client.py +0 -51
  43. apify-2.7.2.dist-info/RECORD +0 -44
  44. /apify/{apify_storage_client → events}/py.typed +0 -0
  45. {apify-2.7.2.dist-info → apify-3.0.0rc1.dist-info}/WHEEL +0 -0
  46. {apify-2.7.2.dist-info → apify-3.0.0rc1.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,787 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import re
5
+ from base64 import b64encode
6
+ from collections import deque
7
+ from datetime import datetime, timedelta, timezone
8
+ from hashlib import sha256
9
+ from logging import getLogger
10
+ from typing import TYPE_CHECKING, Final
11
+
12
+ from cachetools import LRUCache
13
+ from typing_extensions import override
14
+
15
+ from apify_client import ApifyClientAsync
16
+ from crawlee.storage_clients._base import RequestQueueClient
17
+ from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
18
+
19
+ from ._models import CachedRequest, ProlongRequestLockResponse, RequestQueueHead
20
+ from apify import Request
21
+
22
+ if TYPE_CHECKING:
23
+ from collections.abc import Sequence
24
+
25
+ from apify_client.clients import RequestQueueClientAsync
26
+
27
+ from apify import Configuration
28
+
29
+ logger = getLogger(__name__)
30
+
31
+
32
+ def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> str:
33
+ """Generate a deterministic request ID based on a unique key.
34
+
35
+ Args:
36
+ unique_key: The unique key to convert into a request ID.
37
+ request_id_length: The length of the request ID.
38
+
39
+ Returns:
40
+ A URL-safe, truncated request ID based on the unique key.
41
+ """
42
+ # Encode the unique key and compute its SHA-256 hash
43
+ hashed_key = sha256(unique_key.encode('utf-8')).digest()
44
+
45
+ # Encode the hash in base64 and decode it to get a string
46
+ base64_encoded = b64encode(hashed_key).decode('utf-8')
47
+
48
+ # Remove characters that are not URL-safe ('+', '/', or '=')
49
+ url_safe_key = re.sub(r'(\+|\/|=)', '', base64_encoded)
50
+
51
+ # Truncate the key to the desired length
52
+ return url_safe_key[:request_id_length]
53
+
54
+
55
+ class ApifyRequestQueueClient(RequestQueueClient):
56
+ """An Apify platform implementation of the request queue client."""
57
+
58
+ _DEFAULT_LOCK_TIME: Final[timedelta] = timedelta(minutes=3)
59
+ """The default lock time for requests in the queue."""
60
+
61
+ _MAX_CACHED_REQUESTS: Final[int] = 1_000_000
62
+ """Maximum number of requests that can be cached."""
63
+
64
+ def __init__(
65
+ self,
66
+ *,
67
+ api_client: RequestQueueClientAsync,
68
+ id: str,
69
+ name: str | None,
70
+ total_request_count: int,
71
+ handled_request_count: int,
72
+ ) -> None:
73
+ """Initialize a new instance.
74
+
75
+ Preferably use the `ApifyRequestQueueClient.open` class method to create a new instance.
76
+ """
77
+ self._api_client = api_client
78
+ """The Apify request queue client for API operations."""
79
+
80
+ self._id = id
81
+ """The ID of the request queue."""
82
+
83
+ self._name = name
84
+ """The name of the request queue."""
85
+
86
+ self._queue_head = deque[str]()
87
+ """A deque to store request unique keys in the queue head."""
88
+
89
+ self._requests_cache: LRUCache[str, CachedRequest] = LRUCache(maxsize=self._MAX_CACHED_REQUESTS)
90
+ """A cache to store request objects. Request unique key is used as the cache key."""
91
+
92
+ self._queue_has_locked_requests: bool | None = None
93
+ """Whether the queue has requests locked by another client."""
94
+
95
+ self._should_check_for_forefront_requests = False
96
+ """Whether to check for forefront requests in the next list_head call."""
97
+
98
+ self._had_multiple_clients = False
99
+ """Whether the request queue has been accessed by multiple clients."""
100
+
101
+ self._initial_total_count = total_request_count
102
+ """The initial total request count (from the API) when the queue was opened."""
103
+
104
+ self._initial_handled_count = handled_request_count
105
+ """The initial handled request count (from the API) when the queue was opened."""
106
+
107
+ self._assumed_total_count = 0
108
+ """The number of requests we assume are in the queue (tracked manually for this instance)."""
109
+
110
+ self._assumed_handled_count = 0
111
+ """The number of requests we assume have been handled (tracked manually for this instance)."""
112
+
113
+ self._fetch_lock = asyncio.Lock()
114
+ """Fetch lock to minimize race conditions when communicating with API."""
115
+
116
+ @override
117
+ async def get_metadata(self) -> RequestQueueMetadata:
118
+ total_count = self._initial_total_count + self._assumed_total_count
119
+ handled_count = self._initial_handled_count + self._assumed_handled_count
120
+ pending_count = total_count - handled_count
121
+
122
+ return RequestQueueMetadata(
123
+ id=self._id,
124
+ name=self._name,
125
+ total_request_count=total_count,
126
+ handled_request_count=handled_count,
127
+ pending_request_count=pending_count,
128
+ created_at=datetime.now(timezone.utc),
129
+ modified_at=datetime.now(timezone.utc),
130
+ accessed_at=datetime.now(timezone.utc),
131
+ had_multiple_clients=self._had_multiple_clients,
132
+ )
133
+
134
+ @classmethod
135
+ async def open(
136
+ cls,
137
+ *,
138
+ id: str | None,
139
+ name: str | None,
140
+ configuration: Configuration,
141
+ ) -> ApifyRequestQueueClient:
142
+ """Open an Apify request queue client.
143
+
144
+ This method creates and initializes a new instance of the Apify request queue client. It handles
145
+ authentication, storage lookup/creation, and metadata retrieval, and sets up internal caching and queue
146
+ management structures.
147
+
148
+ Args:
149
+ id: The ID of an existing request queue to open. If provided, the client will connect to this specific
150
+ storage. Cannot be used together with `name`.
151
+ name: The name of a request queue to get or create. If a storage with this name exists, it will be opened;
152
+ otherwise, a new one will be created. Cannot be used together with `id`.
153
+ configuration: The configuration object containing API credentials and settings. Must include a valid
154
+ `token` and `api_base_url`. May also contain a `default_request_queue_id` for fallback when neither
155
+ `id` nor `name` is provided.
156
+
157
+ Returns:
158
+ An instance for the opened or created storage client.
159
+
160
+ Raises:
161
+ ValueError: If the configuration is missing required fields (token, api_base_url), if both `id` and `name`
162
+ are provided, or if neither `id` nor `name` is provided and no default storage ID is available
163
+ in the configuration.
164
+ """
165
+ token = configuration.token
166
+ if not token:
167
+ raise ValueError(f'Apify storage client requires a valid token in Configuration (token={token}).')
168
+
169
+ api_url = configuration.api_base_url
170
+ if not api_url:
171
+ raise ValueError(f'Apify storage client requires a valid API URL in Configuration (api_url={api_url}).')
172
+
173
+ api_public_base_url = configuration.api_public_base_url
174
+ if not api_public_base_url:
175
+ raise ValueError(
176
+ 'Apify storage client requires a valid API public base URL in Configuration '
177
+ f'(api_public_base_url={api_public_base_url}).'
178
+ )
179
+
180
+ # Create Apify client with the provided token and API URL.
181
+ apify_client_async = ApifyClientAsync(
182
+ token=token,
183
+ api_url=api_url,
184
+ max_retries=8,
185
+ min_delay_between_retries_millis=500,
186
+ timeout_secs=360,
187
+ )
188
+ apify_rqs_client = apify_client_async.request_queues()
189
+
190
+ # If both id and name are provided, raise an error.
191
+ if id and name:
192
+ raise ValueError('Only one of "id" or "name" can be specified, not both.')
193
+
194
+ # If id is provided, get the storage by ID.
195
+ if id and name is None:
196
+ apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
197
+
198
+ # If name is provided, get or create the storage by name.
199
+ if name and id is None:
200
+ id = RequestQueueMetadata.model_validate(
201
+ await apify_rqs_client.get_or_create(name=name),
202
+ ).id
203
+ apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
204
+
205
+ # If both id and name are None, try to get the default storage ID from environment variables.
206
+ # The default storage ID environment variable is set by the Apify platform. It also contains
207
+ # a new storage ID after Actor's reboot or migration.
208
+ if id is None and name is None:
209
+ id = configuration.default_request_queue_id
210
+ apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
211
+
212
+ # Fetch its metadata.
213
+ metadata = await apify_rq_client.get()
214
+
215
+ # If metadata is None, it means the storage does not exist, so we create it.
216
+ if metadata is None:
217
+ id = RequestQueueMetadata.model_validate(
218
+ await apify_rqs_client.get_or_create(),
219
+ ).id
220
+ apify_rq_client = apify_client_async.request_queue(request_queue_id=id)
221
+
222
+ # Verify that the storage exists by fetching its metadata again.
223
+ metadata = await apify_rq_client.get()
224
+ if metadata is None:
225
+ raise ValueError(f'Opening request queue with id={id} and name={name} failed.')
226
+
227
+ metadata_model = RequestQueueMetadata.model_validate(
228
+ await apify_rqs_client.get_or_create(),
229
+ )
230
+
231
+ # Ensure we have a valid ID.
232
+ if id is None:
233
+ raise ValueError('Request queue ID cannot be None.')
234
+
235
+ return cls(
236
+ api_client=apify_rq_client,
237
+ id=id,
238
+ name=name,
239
+ total_request_count=metadata_model.total_request_count,
240
+ handled_request_count=metadata_model.handled_request_count,
241
+ )
242
+
243
+ @override
244
+ async def purge(self) -> None:
245
+ raise NotImplementedError(
246
+ 'Purging the request queue is not supported in the Apify platform. '
247
+ 'Use the `drop` method to delete the request queue instead.'
248
+ )
249
+
250
+ @override
251
+ async def drop(self) -> None:
252
+ await self._api_client.delete()
253
+
254
+ @override
255
+ async def add_batch_of_requests(
256
+ self,
257
+ requests: Sequence[Request],
258
+ *,
259
+ forefront: bool = False,
260
+ ) -> AddRequestsResponse:
261
+ """Add a batch of requests to the queue.
262
+
263
+ Args:
264
+ requests: The requests to add.
265
+ forefront: Whether to add the requests to the beginning of the queue.
266
+
267
+ Returns:
268
+ Response containing information about the added requests.
269
+ """
270
+ # Do not try to add previously added requests to avoid pointless expensive calls to API
271
+
272
+ new_requests: list[Request] = []
273
+ already_present_requests: list[ProcessedRequest] = []
274
+
275
+ for request in requests:
276
+ if self._requests_cache.get(request.unique_key):
277
+ # We are not sure if it was already handled at this point, and it is not worth calling API for it.
278
+ # It could have been handled by another client in the meantime, so cached information about
279
+ # `request.was_already_handled` is not reliable.
280
+ already_present_requests.append(
281
+ ProcessedRequest.model_validate(
282
+ {
283
+ 'uniqueKey': request.unique_key,
284
+ 'wasAlreadyPresent': True,
285
+ 'wasAlreadyHandled': request.was_already_handled,
286
+ }
287
+ )
288
+ )
289
+
290
+ else:
291
+ # Add new request to the cache.
292
+ processed_request = ProcessedRequest.model_validate(
293
+ {
294
+ 'uniqueKey': request.unique_key,
295
+ 'wasAlreadyPresent': True,
296
+ 'wasAlreadyHandled': request.was_already_handled,
297
+ }
298
+ )
299
+ self._cache_request(
300
+ request.unique_key,
301
+ processed_request,
302
+ )
303
+ new_requests.append(request)
304
+
305
+ if new_requests:
306
+ # Prepare requests for API by converting to dictionaries.
307
+ requests_dict = [
308
+ request.model_dump(
309
+ by_alias=True,
310
+ exclude={'id'}, # Exclude ID fields from requests since the API doesn't accept them.
311
+ )
312
+ for request in new_requests
313
+ ]
314
+
315
+ # Send requests to API.
316
+ api_response = AddRequestsResponse.model_validate(
317
+ await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront)
318
+ )
319
+
320
+ # Add the locally known already present processed requests based on the local cache.
321
+ api_response.processed_requests.extend(already_present_requests)
322
+
323
+ # Remove unprocessed requests from the cache
324
+ for unprocessed_request in api_response.unprocessed_requests:
325
+ self._requests_cache.pop(unprocessed_request.unique_key, None)
326
+
327
+ else:
328
+ api_response = AddRequestsResponse.model_validate(
329
+ {'unprocessedRequests': [], 'processedRequests': already_present_requests}
330
+ )
331
+
332
+ logger.debug(
333
+ f'Tried to add new requests: {len(new_requests)}, '
334
+ f'succeeded to add new requests: {len(api_response.processed_requests) - len(already_present_requests)}, '
335
+ f'skipped already present requests: {len(already_present_requests)}'
336
+ )
337
+
338
+ # Update assumed total count for newly added requests.
339
+ new_request_count = 0
340
+ for processed_request in api_response.processed_requests:
341
+ if not processed_request.was_already_present and not processed_request.was_already_handled:
342
+ new_request_count += 1
343
+
344
+ self._assumed_total_count += new_request_count
345
+
346
+ return api_response
347
+
348
+ @override
349
+ async def get_request(self, unique_key: str) -> Request | None:
350
+ """Get a request by unique key.
351
+
352
+ Args:
353
+ unique_key: Unique key of the request to get.
354
+
355
+ Returns:
356
+ The request or None if not found.
357
+ """
358
+ response = await self._api_client.get_request(unique_key_to_request_id(unique_key))
359
+
360
+ if response is None:
361
+ return None
362
+
363
+ return Request.model_validate(response)
364
+
365
+ @override
366
+ async def fetch_next_request(self) -> Request | None:
367
+ """Return the next request in the queue to be processed.
368
+
369
+ Once you successfully finish processing of the request, you need to call `mark_request_as_handled`
370
+ to mark the request as handled in the queue. If there was some error in processing the request, call
371
+ `reclaim_request` instead, so that the queue will give the request to some other consumer
372
+ in another call to the `fetch_next_request` method.
373
+
374
+ Returns:
375
+ The request or `None` if there are no more pending requests.
376
+ """
377
+ # Ensure the queue head has requests if available. Fetching the head with lock to prevent race conditions.
378
+ async with self._fetch_lock:
379
+ await self._ensure_head_is_non_empty()
380
+
381
+ # If queue head is empty after ensuring, there are no requests
382
+ if not self._queue_head:
383
+ return None
384
+
385
+ # Get the next request ID from the queue head
386
+ next_unique_key = self._queue_head.popleft()
387
+
388
+ request = await self._get_or_hydrate_request(next_unique_key)
389
+
390
+ # Handle potential inconsistency where request might not be in the main table yet
391
+ if request is None:
392
+ logger.debug(
393
+ 'Cannot find a request from the beginning of queue, will be retried later',
394
+ extra={'nextRequestUniqueKey': next_unique_key},
395
+ )
396
+ return None
397
+
398
+ # If the request was already handled, skip it
399
+ if request.handled_at is not None:
400
+ logger.debug(
401
+ 'Request fetched from the beginning of queue was already handled',
402
+ extra={'nextRequestUniqueKey': next_unique_key},
403
+ )
404
+ return None
405
+
406
+ # Use get request to ensure we have the full request object.
407
+ request = await self.get_request(request.unique_key)
408
+ if request is None:
409
+ logger.debug(
410
+ 'Request fetched from the beginning of queue was not found in the RQ',
411
+ extra={'nextRequestUniqueKey': next_unique_key},
412
+ )
413
+ return None
414
+
415
+ return request
416
+
417
+ @override
418
+ async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
419
+ """Mark a request as handled after successful processing.
420
+
421
+ Handled requests will never again be returned by the `fetch_next_request` method.
422
+
423
+ Args:
424
+ request: The request to mark as handled.
425
+
426
+ Returns:
427
+ Information about the queue operation. `None` if the given request was not in progress.
428
+ """
429
+ # Set the handled_at timestamp if not already set
430
+ if request.handled_at is None:
431
+ request.handled_at = datetime.now(tz=timezone.utc)
432
+
433
+ if cached_request := self._requests_cache[request.unique_key]:
434
+ cached_request.was_already_handled = request.was_already_handled
435
+ try:
436
+ # Update the request in the API
437
+ processed_request = await self._update_request(request)
438
+ processed_request.unique_key = request.unique_key
439
+
440
+ # Update assumed handled count if this wasn't already handled
441
+ if not processed_request.was_already_handled:
442
+ self._assumed_handled_count += 1
443
+
444
+ # Update the cache with the handled request
445
+ cache_key = request.unique_key
446
+ self._cache_request(
447
+ cache_key,
448
+ processed_request,
449
+ hydrated_request=request,
450
+ )
451
+ except Exception as exc:
452
+ logger.debug(f'Error marking request {request.unique_key} as handled: {exc!s}')
453
+ return None
454
+ else:
455
+ return processed_request
456
+
457
+ @override
458
+ async def reclaim_request(
459
+ self,
460
+ request: Request,
461
+ *,
462
+ forefront: bool = False,
463
+ ) -> ProcessedRequest | None:
464
+ """Reclaim a failed request back to the queue.
465
+
466
+ The request will be returned for processing later again by another call to `fetch_next_request`.
467
+
468
+ Args:
469
+ request: The request to return to the queue.
470
+ forefront: Whether to add the request to the head or the end of the queue.
471
+
472
+ Returns:
473
+ Information about the queue operation. `None` if the given request was not in progress.
474
+ """
475
+ # Check if the request was marked as handled and clear it. When reclaiming,
476
+ # we want to put the request back for processing.
477
+ if request.was_already_handled:
478
+ request.handled_at = None
479
+
480
+ # Reclaim with lock to prevent race conditions that could lead to double processing of the same request.
481
+ async with self._fetch_lock:
482
+ try:
483
+ # Update the request in the API.
484
+ processed_request = await self._update_request(request, forefront=forefront)
485
+ processed_request.unique_key = request.unique_key
486
+
487
+ # If the request was previously handled, decrement our handled count since
488
+ # we're putting it back for processing.
489
+ if request.was_already_handled and not processed_request.was_already_handled:
490
+ self._assumed_handled_count -= 1
491
+
492
+ # Update the cache
493
+ cache_key = request.unique_key
494
+ self._cache_request(
495
+ cache_key,
496
+ processed_request,
497
+ hydrated_request=request,
498
+ )
499
+
500
+ # If we're adding to the forefront, we need to check for forefront requests
501
+ # in the next list_head call
502
+ if forefront:
503
+ self._should_check_for_forefront_requests = True
504
+
505
+ # Try to release the lock on the request
506
+ try:
507
+ await self._delete_request_lock(request.unique_key, forefront=forefront)
508
+ except Exception as err:
509
+ logger.debug(f'Failed to delete request lock for request {request.unique_key}', exc_info=err)
510
+ except Exception as exc:
511
+ logger.debug(f'Error reclaiming request {request.unique_key}: {exc!s}')
512
+ return None
513
+ else:
514
+ return processed_request
515
+
516
+ @override
517
+ async def is_empty(self) -> bool:
518
+ """Check if the queue is empty.
519
+
520
+ Returns:
521
+ True if the queue is empty, False otherwise.
522
+ """
523
+ # Check _list_head and self._queue_has_locked_requests with lock to make sure they are consistent.
524
+ # Without the lock the `is_empty` is prone to falsely report True with some low probability race condition.
525
+ async with self._fetch_lock:
526
+ head = await self._list_head(limit=1, lock_time=None)
527
+ return len(head.items) == 0 and not self._queue_has_locked_requests
528
+
529
+ async def _ensure_head_is_non_empty(self) -> None:
530
+ """Ensure that the queue head has requests if they are available in the queue."""
531
+ # If queue head has adequate requests, skip fetching more
532
+ if len(self._queue_head) > 1 and not self._should_check_for_forefront_requests:
533
+ return
534
+
535
+ # Fetch requests from the API and populate the queue head
536
+ await self._list_head(lock_time=self._DEFAULT_LOCK_TIME)
537
+
538
+ async def _get_or_hydrate_request(self, unique_key: str) -> Request | None:
539
+ """Get a request by unique key, either from cache or by fetching from API.
540
+
541
+ Args:
542
+ unique_key: Unique keu of the request to get.
543
+
544
+ Returns:
545
+ The request if found and valid, otherwise None.
546
+ """
547
+ # First check if the request is in our cache
548
+ cached_entry = self._requests_cache.get(unique_key)
549
+
550
+ if cached_entry and cached_entry.hydrated:
551
+ # If we have the request hydrated in cache, check if lock is expired
552
+ if cached_entry.lock_expires_at and cached_entry.lock_expires_at < datetime.now(tz=timezone.utc):
553
+ # Try to prolong the lock if it's expired
554
+ try:
555
+ lock_secs = int(self._DEFAULT_LOCK_TIME.total_seconds())
556
+ response = await self._prolong_request_lock(unique_key, lock_secs=lock_secs)
557
+ cached_entry.lock_expires_at = response.lock_expires_at
558
+ except Exception:
559
+ # If prolonging the lock fails, we lost the request
560
+ logger.debug(f'Failed to prolong lock for request {unique_key}, returning None')
561
+ return None
562
+
563
+ return cached_entry.hydrated
564
+
565
+ # If not in cache or not hydrated, fetch the request
566
+ try:
567
+ # Try to acquire or prolong the lock
568
+ lock_secs = int(self._DEFAULT_LOCK_TIME.total_seconds())
569
+ await self._prolong_request_lock(unique_key, lock_secs=lock_secs)
570
+
571
+ # Fetch the request data
572
+ request = await self.get_request(unique_key)
573
+
574
+ # If request is not found, release lock and return None
575
+ if not request:
576
+ await self._delete_request_lock(unique_key)
577
+ return None
578
+
579
+ # Update cache with hydrated request
580
+ cache_key = request.unique_key
581
+ self._cache_request(
582
+ cache_key,
583
+ ProcessedRequest(
584
+ unique_key=request.unique_key,
585
+ was_already_present=True,
586
+ was_already_handled=request.handled_at is not None,
587
+ ),
588
+ hydrated_request=request,
589
+ )
590
+ except Exception as exc:
591
+ logger.debug(f'Error fetching or locking request {unique_key}: {exc!s}')
592
+ return None
593
+ else:
594
+ return request
595
+
596
+ async def _update_request(
597
+ self,
598
+ request: Request,
599
+ *,
600
+ forefront: bool = False,
601
+ ) -> ProcessedRequest:
602
+ """Update a request in the queue.
603
+
604
+ Args:
605
+ request: The updated request.
606
+ forefront: Whether to put the updated request in the beginning or the end of the queue.
607
+
608
+ Returns:
609
+ The updated request
610
+ """
611
+ request_dict = request.model_dump(by_alias=True)
612
+ request_dict['id'] = unique_key_to_request_id(request.unique_key)
613
+ response = await self._api_client.update_request(
614
+ request=request_dict,
615
+ forefront=forefront,
616
+ )
617
+
618
+ return ProcessedRequest.model_validate(
619
+ {'uniqueKey': request.unique_key} | response,
620
+ )
621
+
622
+ async def _list_head(
623
+ self,
624
+ *,
625
+ lock_time: timedelta | None = None,
626
+ limit: int = 25,
627
+ ) -> RequestQueueHead:
628
+ """Retrieve requests from the beginning of the queue.
629
+
630
+ Args:
631
+ lock_time: Duration for which to lock the retrieved requests.
632
+ If None, requests will not be locked.
633
+ limit: Maximum number of requests to retrieve.
634
+
635
+ Returns:
636
+ A collection of requests from the beginning of the queue.
637
+ """
638
+ # Return from cache if available and we're not checking for new forefront requests
639
+ if self._queue_head and not self._should_check_for_forefront_requests:
640
+ logger.debug(f'Using cached queue head with {len(self._queue_head)} requests')
641
+ # Create a list of requests from the cached queue head
642
+ items = []
643
+ for unique_key in list(self._queue_head)[:limit]:
644
+ cached_request = self._requests_cache.get(unique_key)
645
+ if cached_request and cached_request.hydrated:
646
+ items.append(cached_request.hydrated)
647
+
648
+ metadata = await self.get_metadata()
649
+
650
+ return RequestQueueHead(
651
+ limit=limit,
652
+ had_multiple_clients=metadata.had_multiple_clients,
653
+ queue_modified_at=metadata.modified_at,
654
+ items=items,
655
+ queue_has_locked_requests=self._queue_has_locked_requests,
656
+ lock_time=lock_time,
657
+ )
658
+ leftover_buffer = list[str]()
659
+ if self._should_check_for_forefront_requests:
660
+ leftover_buffer = list(self._queue_head)
661
+ self._queue_head.clear()
662
+ self._should_check_for_forefront_requests = False
663
+
664
+ # Otherwise fetch from API
665
+ lock_time = lock_time or self._DEFAULT_LOCK_TIME
666
+ lock_secs = int(lock_time.total_seconds())
667
+
668
+ response = await self._api_client.list_and_lock_head(
669
+ lock_secs=lock_secs,
670
+ limit=limit,
671
+ )
672
+
673
+ # Update the queue head cache
674
+ self._queue_has_locked_requests = response.get('queueHasLockedRequests', False)
675
+
676
+ for request_data in response.get('items', []):
677
+ request = Request.model_validate(request_data)
678
+
679
+ # Skip requests without ID or unique key
680
+ if not request.unique_key:
681
+ logger.debug(
682
+ 'Skipping request from queue head, missing ID or unique key',
683
+ extra={
684
+ 'unique_key': request.unique_key,
685
+ },
686
+ )
687
+ continue
688
+
689
+ # Cache the request
690
+ self._cache_request(
691
+ request.unique_key,
692
+ ProcessedRequest(
693
+ unique_key=request.unique_key,
694
+ was_already_present=True,
695
+ was_already_handled=False,
696
+ ),
697
+ hydrated_request=request,
698
+ )
699
+ self._queue_head.append(request.unique_key)
700
+
701
+ for leftover_unique_key in leftover_buffer:
702
+ # After adding new requests to the forefront, any existing leftover locked request is kept in the end.
703
+ self._queue_head.append(leftover_unique_key)
704
+ return RequestQueueHead.model_validate(response)
705
+
706
+ async def _prolong_request_lock(
707
+ self,
708
+ unique_key: str,
709
+ *,
710
+ lock_secs: int,
711
+ ) -> ProlongRequestLockResponse:
712
+ """Prolong the lock on a specific request in the queue.
713
+
714
+ Args:
715
+ unique_key: Unique key of the request whose lock is to be prolonged.
716
+ lock_secs: The additional amount of time, in seconds, that the request will remain locked.
717
+
718
+ Returns:
719
+ A response containing the time at which the lock will expire.
720
+ """
721
+ response = await self._api_client.prolong_request_lock(
722
+ request_id=unique_key_to_request_id(unique_key),
723
+ # All requests reaching this code were the tip of the queue at the moment when they were fetched,
724
+ # so if their lock expires, they should be put back to the forefront as their handling is long overdue.
725
+ forefront=True,
726
+ lock_secs=lock_secs,
727
+ )
728
+
729
+ result = ProlongRequestLockResponse(
730
+ lock_expires_at=datetime.fromisoformat(response['lockExpiresAt'].replace('Z', '+00:00'))
731
+ )
732
+
733
+ # Update the cache with the new lock expiration
734
+ for cached_request in self._requests_cache.values():
735
+ if cached_request.unique_key == unique_key:
736
+ cached_request.lock_expires_at = result.lock_expires_at
737
+ break
738
+
739
+ return result
740
+
741
+ async def _delete_request_lock(
742
+ self,
743
+ unique_key: str,
744
+ *,
745
+ forefront: bool = False,
746
+ ) -> None:
747
+ """Delete the lock on a specific request in the queue.
748
+
749
+ Args:
750
+ unique_key: Unique key of the request to delete the lock.
751
+ forefront: Whether to put the request in the beginning or the end of the queue after the lock is deleted.
752
+ """
753
+ try:
754
+ await self._api_client.delete_request_lock(
755
+ request_id=unique_key_to_request_id(unique_key),
756
+ forefront=forefront,
757
+ )
758
+
759
+ # Update the cache to remove the lock
760
+ for cached_request in self._requests_cache.values():
761
+ if cached_request.unique_key == unique_key:
762
+ cached_request.lock_expires_at = None
763
+ break
764
+ except Exception as err:
765
+ logger.debug(f'Failed to delete request lock for request {unique_key}', exc_info=err)
766
+
767
+ def _cache_request(
768
+ self,
769
+ cache_key: str,
770
+ processed_request: ProcessedRequest,
771
+ *,
772
+ hydrated_request: Request | None = None,
773
+ ) -> None:
774
+ """Cache a request for future use.
775
+
776
+ Args:
777
+ cache_key: The key to use for caching the request. It should be request ID.
778
+ processed_request: The processed request information.
779
+ forefront: Whether the request was added to the forefront of the queue.
780
+ hydrated_request: The hydrated request object, if available.
781
+ """
782
+ self._requests_cache[cache_key] = CachedRequest(
783
+ unique_key=processed_request.unique_key,
784
+ was_already_handled=processed_request.was_already_handled,
785
+ hydrated=hydrated_request,
786
+ lock_expires_at=None,
787
+ )