apify 2.7.1b19__py3-none-any.whl → 2.7.1b21__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

@@ -1,15 +1,8 @@
1
1
  from __future__ import annotations
2
2
 
3
- import asyncio
4
- import re
5
- from base64 import b64encode
6
- from collections import deque
7
- from datetime import datetime, timedelta, timezone
8
- from hashlib import sha256
9
3
  from logging import getLogger
10
- from typing import TYPE_CHECKING, Final
4
+ from typing import TYPE_CHECKING, Final, Literal
11
5
 
12
- from cachetools import LRUCache
13
6
  from typing_extensions import override
14
7
 
15
8
  from apify_client import ApifyClientAsync
@@ -18,54 +11,24 @@ from crawlee.storage_clients._base import RequestQueueClient
18
11
  from crawlee.storage_clients.models import AddRequestsResponse, ProcessedRequest, RequestQueueMetadata
19
12
  from crawlee.storages import RequestQueue
20
13
 
21
- from ._models import (
22
- ApifyRequestQueueMetadata,
23
- CachedRequest,
24
- ProlongRequestLockResponse,
25
- RequestQueueHead,
26
- RequestQueueStats,
27
- )
14
+ from ._models import ApifyRequestQueueMetadata, RequestQueueStats
15
+ from ._request_queue_shared_client import _ApifyRequestQueueSharedClient
16
+ from ._request_queue_single_client import _ApifyRequestQueueSingleClient
28
17
  from ._utils import AliasResolver
29
- from apify import Request
30
18
 
31
19
  if TYPE_CHECKING:
32
20
  from collections.abc import Sequence
33
21
 
34
22
  from apify_client.clients import RequestQueueClientAsync
23
+ from crawlee import Request
35
24
 
36
25
  from apify import Configuration
37
26
 
38
27
  logger = getLogger(__name__)
39
28
 
40
29
 
41
- def unique_key_to_request_id(unique_key: str, *, request_id_length: int = 15) -> str:
42
- """Generate a deterministic request ID based on a unique key.
43
-
44
- Args:
45
- unique_key: The unique key to convert into a request ID.
46
- request_id_length: The length of the request ID.
47
-
48
- Returns:
49
- A URL-safe, truncated request ID based on the unique key.
50
- """
51
- # Encode the unique key and compute its SHA-256 hash
52
- hashed_key = sha256(unique_key.encode('utf-8')).digest()
53
-
54
- # Encode the hash in base64 and decode it to get a string
55
- base64_encoded = b64encode(hashed_key).decode('utf-8')
56
-
57
- # Remove characters that are not URL-safe ('+', '/', or '=')
58
- url_safe_key = re.sub(r'(\+|\/|=)', '', base64_encoded)
59
-
60
- # Truncate the key to the desired length
61
- return url_safe_key[:request_id_length]
62
-
63
-
64
30
  class ApifyRequestQueueClient(RequestQueueClient):
65
- """An Apify platform implementation of the request queue client."""
66
-
67
- _DEFAULT_LOCK_TIME: Final[timedelta] = timedelta(minutes=3)
68
- """The default lock time for requests in the queue."""
31
+ """Base class for Apify platform implementations of the request queue client."""
69
32
 
70
33
  _MAX_CACHED_REQUESTS: Final[int] = 1_000_000
71
34
  """Maximum number of requests that can be cached."""
@@ -75,6 +38,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
75
38
  *,
76
39
  api_client: RequestQueueClientAsync,
77
40
  metadata: RequestQueueMetadata,
41
+ access: Literal['single', 'shared'] = 'single',
78
42
  ) -> None:
79
43
  """Initialize a new instance.
80
44
 
@@ -83,35 +47,112 @@ class ApifyRequestQueueClient(RequestQueueClient):
83
47
  self._api_client = api_client
84
48
  """The Apify request queue client for API operations."""
85
49
 
86
- self._metadata = metadata
87
- """Additional data related to the RequestQueue."""
50
+ self._implementation: _ApifyRequestQueueSingleClient | _ApifyRequestQueueSharedClient
51
+ """Internal implementation used to communicate with the Apify platform based Request Queue."""
52
+ if access == 'single':
53
+ self._implementation = _ApifyRequestQueueSingleClient(
54
+ api_client=self._api_client, metadata=metadata, cache_size=self._MAX_CACHED_REQUESTS
55
+ )
56
+ elif access == 'shared':
57
+ self._implementation = _ApifyRequestQueueSharedClient(
58
+ api_client=self._api_client,
59
+ metadata=metadata,
60
+ cache_size=self._MAX_CACHED_REQUESTS,
61
+ metadata_getter=self.get_metadata,
62
+ )
63
+ else:
64
+ raise RuntimeError(f"Unsupported access type: {access}. Allowed values are 'single' or 'shared'.")
65
+
66
+ @property
67
+ def _metadata(self) -> RequestQueueMetadata:
68
+ return self._implementation.metadata
69
+
70
+ @override
71
+ async def add_batch_of_requests(
72
+ self,
73
+ requests: Sequence[Request],
74
+ *,
75
+ forefront: bool = False,
76
+ ) -> AddRequestsResponse:
77
+ """Add a batch of requests to the queue.
78
+
79
+ Args:
80
+ requests: The requests to add.
81
+ forefront: Whether to add the requests to the beginning of the queue.
82
+
83
+ Returns:
84
+ Response containing information about the added requests.
85
+ """
86
+ return await self._implementation.add_batch_of_requests(requests, forefront=forefront)
87
+
88
+ @override
89
+ async def fetch_next_request(self) -> Request | None:
90
+ """Return the next request in the queue to be processed.
91
+
92
+ Once you successfully finish processing of the request, you need to call `mark_request_as_handled`
93
+ to mark the request as handled in the queue. If there was some error in processing the request, call
94
+ `reclaim_request` instead, so that the queue will give the request to some other consumer
95
+ in another call to the `fetch_next_request` method.
96
+
97
+ Returns:
98
+ The request or `None` if there are no more pending requests.
99
+ """
100
+ return await self._implementation.fetch_next_request()
101
+
102
+ @override
103
+ async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
104
+ """Mark a request as handled after successful processing.
105
+
106
+ Handled requests will never again be returned by the `fetch_next_request` method.
107
+
108
+ Args:
109
+ request: The request to mark as handled.
88
110
 
89
- self._queue_head = deque[str]()
90
- """A deque to store request unique keys in the queue head."""
111
+ Returns:
112
+ Information about the queue operation. `None` if the given request was not in progress.
113
+ """
114
+ return await self._implementation.mark_request_as_handled(request)
91
115
 
92
- self._requests_cache: LRUCache[str, CachedRequest] = LRUCache(maxsize=self._MAX_CACHED_REQUESTS)
93
- """A cache to store request objects. Request unique key is used as the cache key."""
116
+ @override
117
+ async def get_request(self, unique_key: str) -> Request | None:
118
+ """Get a request by unique key.
119
+
120
+ Args:
121
+ unique_key: Unique key of the request to get.
122
+
123
+ Returns:
124
+ The request or None if not found.
125
+ """
126
+ return await self._implementation.get_request(unique_key)
127
+
128
+ @override
129
+ async def reclaim_request(
130
+ self,
131
+ request: Request,
132
+ *,
133
+ forefront: bool = False,
134
+ ) -> ProcessedRequest | None:
135
+ """Reclaim a failed request back to the queue.
94
136
 
95
- self._queue_has_locked_requests: bool | None = None
96
- """Whether the queue has requests locked by another client."""
137
+ The request will be returned for processing later again by another call to `fetch_next_request`.
97
138
 
98
- self._should_check_for_forefront_requests = False
99
- """Whether to check for forefront requests in the next list_head call."""
139
+ Args:
140
+ request: The request to return to the queue.
141
+ forefront: Whether to add the request to the head or the end of the queue.
100
142
 
101
- self._fetch_lock = asyncio.Lock()
102
- """Fetch lock to minimize race conditions when communicating with API."""
143
+ Returns:
144
+ Information about the queue operation. `None` if the given request was not in progress.
145
+ """
146
+ return await self._implementation.reclaim_request(request, forefront=forefront)
103
147
 
104
- async def _get_metadata_estimate(self) -> RequestQueueMetadata:
105
- """Try to get cached metadata first. If multiple clients, fuse with global metadata.
148
+ @override
149
+ async def is_empty(self) -> bool:
150
+ """Check if the queue is empty.
106
151
 
107
- This method is used internally to avoid unnecessary API call unless needed (multiple clients).
108
- Local estimation of metadata is without delay, unlike metadata from API. In situation where there is only one
109
- client, it is the better choice.
152
+ Returns:
153
+ True if the queue is empty, False otherwise.
110
154
  """
111
- if self._metadata.had_multiple_clients:
112
- return await self.get_metadata()
113
- # Get local estimation (will not include changes done bo another client)
114
- return self._metadata
155
+ return await self._implementation.is_empty()
115
156
 
116
157
  @override
117
158
  async def get_metadata(self) -> ApifyRequestQueueMetadata:
@@ -146,6 +187,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
146
187
  name: str | None,
147
188
  alias: str | None,
148
189
  configuration: Configuration,
190
+ access: Literal['single', 'shared'] = 'single',
149
191
  ) -> ApifyRequestQueueClient:
150
192
  """Open an Apify request queue client.
151
193
 
@@ -163,6 +205,17 @@ class ApifyRequestQueueClient(RequestQueueClient):
163
205
  configuration: The configuration object containing API credentials and settings. Must include a valid
164
206
  `token` and `api_base_url`. May also contain a `default_request_queue_id` for fallback when neither
165
207
  `id`, `name`, nor `alias` is provided.
208
+ access: Controls the implementation of the request queue client based on expected scenario:
209
+ - 'single' is suitable for single consumer scenarios. It makes less API calls, is cheaper and faster.
210
+ - 'shared' is suitable for multiple consumers scenarios at the cost of higher API usage.
211
+ Detailed constraints for the 'single' access type:
212
+ - Only one client is consuming the request queue at the time.
213
+ - Multiple producers can put requests to the queue, but their forefront requests are not guaranteed to
214
+ be handled so quickly as this client does not aggressively fetch the forefront and relies on local
215
+ head estimation.
216
+ - Requests are only added to the queue, never deleted by other clients. (Marking as handled is ok.)
217
+ - Other producers can add new requests, but not modify existing ones.
218
+ (Modifications would not be included in local cache)
166
219
 
167
220
  Returns:
168
221
  An instance for the opened or created storage client.
@@ -200,8 +253,10 @@ class ApifyRequestQueueClient(RequestQueueClient):
200
253
  )
201
254
  apify_rqs_client = apify_client_async.request_queues()
202
255
 
203
- # Normalize 'default' alias to None
204
- alias = None if alias == 'default' else alias
256
+ # Normalize unnamed default storage in cases where not defined in `configuration.default_request_queue_id` to
257
+ # unnamed storage aliased as `__default__`
258
+ if not any([alias, name, id, configuration.default_request_queue_id]):
259
+ alias = '__default__'
205
260
 
206
261
  if alias:
207
262
  # Check if there is pre-existing alias mapping in the default KVS.
@@ -226,6 +281,11 @@ class ApifyRequestQueueClient(RequestQueueClient):
226
281
  # If none are provided, try to get the default storage ID from environment variables.
227
282
  elif id is None:
228
283
  id = configuration.default_request_queue_id
284
+ if not id:
285
+ raise ValueError(
286
+ 'RequestQueue "id", "name", or "alias" must be specified, '
287
+ 'or a default default_request_queue_id ID must be set in the configuration.'
288
+ )
229
289
 
230
290
  # Use suitable client_key to make `hadMultipleClients` response of Apify API useful.
231
291
  # It should persist across migrated or resurrected Actor runs on the Apify platform.
@@ -253,10 +313,7 @@ class ApifyRequestQueueClient(RequestQueueClient):
253
313
 
254
314
  metadata_model = RequestQueueMetadata.model_validate(metadata)
255
315
 
256
- return cls(
257
- api_client=apify_rq_client,
258
- metadata=metadata_model,
259
- )
316
+ return cls(api_client=apify_rq_client, metadata=metadata_model, access=access)
260
317
 
261
318
  @override
262
319
  async def purge(self) -> None:
@@ -268,540 +325,3 @@ class ApifyRequestQueueClient(RequestQueueClient):
268
325
  @override
269
326
  async def drop(self) -> None:
270
327
  await self._api_client.delete()
271
-
272
- @override
273
- async def add_batch_of_requests(
274
- self,
275
- requests: Sequence[Request],
276
- *,
277
- forefront: bool = False,
278
- ) -> AddRequestsResponse:
279
- """Add a batch of requests to the queue.
280
-
281
- Args:
282
- requests: The requests to add.
283
- forefront: Whether to add the requests to the beginning of the queue.
284
-
285
- Returns:
286
- Response containing information about the added requests.
287
- """
288
- # Do not try to add previously added requests to avoid pointless expensive calls to API
289
-
290
- new_requests: list[Request] = []
291
- already_present_requests: list[ProcessedRequest] = []
292
-
293
- for request in requests:
294
- if self._requests_cache.get(request.unique_key):
295
- # We are not sure if it was already handled at this point, and it is not worth calling API for it.
296
- # It could have been handled by another client in the meantime, so cached information about
297
- # `request.was_already_handled` is not reliable.
298
- already_present_requests.append(
299
- ProcessedRequest.model_validate(
300
- {
301
- 'uniqueKey': request.unique_key,
302
- 'wasAlreadyPresent': True,
303
- 'wasAlreadyHandled': request.was_already_handled,
304
- }
305
- )
306
- )
307
-
308
- else:
309
- # Add new request to the cache.
310
- processed_request = ProcessedRequest.model_validate(
311
- {
312
- 'uniqueKey': request.unique_key,
313
- 'wasAlreadyPresent': True,
314
- 'wasAlreadyHandled': request.was_already_handled,
315
- }
316
- )
317
- self._cache_request(
318
- request.unique_key,
319
- processed_request,
320
- )
321
- new_requests.append(request)
322
-
323
- if new_requests:
324
- # Prepare requests for API by converting to dictionaries.
325
- requests_dict = [
326
- request.model_dump(
327
- by_alias=True,
328
- exclude={'id'}, # Exclude ID fields from requests since the API doesn't accept them.
329
- )
330
- for request in new_requests
331
- ]
332
-
333
- # Send requests to API.
334
- api_response = AddRequestsResponse.model_validate(
335
- await self._api_client.batch_add_requests(requests=requests_dict, forefront=forefront)
336
- )
337
-
338
- # Add the locally known already present processed requests based on the local cache.
339
- api_response.processed_requests.extend(already_present_requests)
340
-
341
- # Remove unprocessed requests from the cache
342
- for unprocessed_request in api_response.unprocessed_requests:
343
- self._requests_cache.pop(unprocessed_request.unique_key, None)
344
-
345
- else:
346
- api_response = AddRequestsResponse.model_validate(
347
- {'unprocessedRequests': [], 'processedRequests': already_present_requests}
348
- )
349
-
350
- logger.debug(
351
- f'Tried to add new requests: {len(new_requests)}, '
352
- f'succeeded to add new requests: {len(api_response.processed_requests) - len(already_present_requests)}, '
353
- f'skipped already present requests: {len(already_present_requests)}'
354
- )
355
-
356
- # Update assumed total count for newly added requests.
357
- new_request_count = 0
358
- for processed_request in api_response.processed_requests:
359
- if not processed_request.was_already_present and not processed_request.was_already_handled:
360
- new_request_count += 1
361
-
362
- self._metadata.total_request_count += new_request_count
363
-
364
- return api_response
365
-
366
- @override
367
- async def get_request(self, unique_key: str) -> Request | None:
368
- """Get a request by unique key.
369
-
370
- Args:
371
- unique_key: Unique key of the request to get.
372
-
373
- Returns:
374
- The request or None if not found.
375
- """
376
- response = await self._api_client.get_request(unique_key_to_request_id(unique_key))
377
-
378
- if response is None:
379
- return None
380
-
381
- return Request.model_validate(response)
382
-
383
- @override
384
- async def fetch_next_request(self) -> Request | None:
385
- """Return the next request in the queue to be processed.
386
-
387
- Once you successfully finish processing of the request, you need to call `mark_request_as_handled`
388
- to mark the request as handled in the queue. If there was some error in processing the request, call
389
- `reclaim_request` instead, so that the queue will give the request to some other consumer
390
- in another call to the `fetch_next_request` method.
391
-
392
- Returns:
393
- The request or `None` if there are no more pending requests.
394
- """
395
- # Ensure the queue head has requests if available. Fetching the head with lock to prevent race conditions.
396
- async with self._fetch_lock:
397
- await self._ensure_head_is_non_empty()
398
-
399
- # If queue head is empty after ensuring, there are no requests
400
- if not self._queue_head:
401
- return None
402
-
403
- # Get the next request ID from the queue head
404
- next_unique_key = self._queue_head.popleft()
405
-
406
- request = await self._get_or_hydrate_request(next_unique_key)
407
-
408
- # Handle potential inconsistency where request might not be in the main table yet
409
- if request is None:
410
- logger.debug(
411
- 'Cannot find a request from the beginning of queue, will be retried later',
412
- extra={'nextRequestUniqueKey': next_unique_key},
413
- )
414
- return None
415
-
416
- # If the request was already handled, skip it
417
- if request.handled_at is not None:
418
- logger.debug(
419
- 'Request fetched from the beginning of queue was already handled',
420
- extra={'nextRequestUniqueKey': next_unique_key},
421
- )
422
- return None
423
-
424
- # Use get request to ensure we have the full request object.
425
- request = await self.get_request(request.unique_key)
426
- if request is None:
427
- logger.debug(
428
- 'Request fetched from the beginning of queue was not found in the RQ',
429
- extra={'nextRequestUniqueKey': next_unique_key},
430
- )
431
- return None
432
-
433
- return request
434
-
435
- @override
436
- async def mark_request_as_handled(self, request: Request) -> ProcessedRequest | None:
437
- """Mark a request as handled after successful processing.
438
-
439
- Handled requests will never again be returned by the `fetch_next_request` method.
440
-
441
- Args:
442
- request: The request to mark as handled.
443
-
444
- Returns:
445
- Information about the queue operation. `None` if the given request was not in progress.
446
- """
447
- # Set the handled_at timestamp if not already set
448
- if request.handled_at is None:
449
- request.handled_at = datetime.now(tz=timezone.utc)
450
-
451
- if cached_request := self._requests_cache[request.unique_key]:
452
- cached_request.was_already_handled = request.was_already_handled
453
- try:
454
- # Update the request in the API
455
- processed_request = await self._update_request(request)
456
- processed_request.unique_key = request.unique_key
457
-
458
- # Update assumed handled count if this wasn't already handled
459
- if not processed_request.was_already_handled:
460
- self._metadata.handled_request_count += 1
461
-
462
- # Update the cache with the handled request
463
- cache_key = request.unique_key
464
- self._cache_request(
465
- cache_key,
466
- processed_request,
467
- hydrated_request=request,
468
- )
469
- except Exception as exc:
470
- logger.debug(f'Error marking request {request.unique_key} as handled: {exc!s}')
471
- return None
472
- else:
473
- return processed_request
474
-
475
- @override
476
- async def reclaim_request(
477
- self,
478
- request: Request,
479
- *,
480
- forefront: bool = False,
481
- ) -> ProcessedRequest | None:
482
- """Reclaim a failed request back to the queue.
483
-
484
- The request will be returned for processing later again by another call to `fetch_next_request`.
485
-
486
- Args:
487
- request: The request to return to the queue.
488
- forefront: Whether to add the request to the head or the end of the queue.
489
-
490
- Returns:
491
- Information about the queue operation. `None` if the given request was not in progress.
492
- """
493
- # Check if the request was marked as handled and clear it. When reclaiming,
494
- # we want to put the request back for processing.
495
- if request.was_already_handled:
496
- request.handled_at = None
497
-
498
- # Reclaim with lock to prevent race conditions that could lead to double processing of the same request.
499
- async with self._fetch_lock:
500
- try:
501
- # Update the request in the API.
502
- processed_request = await self._update_request(request, forefront=forefront)
503
- processed_request.unique_key = request.unique_key
504
-
505
- # If the request was previously handled, decrement our handled count since
506
- # we're putting it back for processing.
507
- if request.was_already_handled and not processed_request.was_already_handled:
508
- self._metadata.handled_request_count -= 1
509
-
510
- # Update the cache
511
- cache_key = request.unique_key
512
- self._cache_request(
513
- cache_key,
514
- processed_request,
515
- hydrated_request=request,
516
- )
517
-
518
- # If we're adding to the forefront, we need to check for forefront requests
519
- # in the next list_head call
520
- if forefront:
521
- self._should_check_for_forefront_requests = True
522
-
523
- # Try to release the lock on the request
524
- try:
525
- await self._delete_request_lock(request.unique_key, forefront=forefront)
526
- except Exception as err:
527
- logger.debug(f'Failed to delete request lock for request {request.unique_key}', exc_info=err)
528
- except Exception as exc:
529
- logger.debug(f'Error reclaiming request {request.unique_key}: {exc!s}')
530
- return None
531
- else:
532
- return processed_request
533
-
534
- @override
535
- async def is_empty(self) -> bool:
536
- """Check if the queue is empty.
537
-
538
- Returns:
539
- True if the queue is empty, False otherwise.
540
- """
541
- # Check _list_head and self._queue_has_locked_requests with lock to make sure they are consistent.
542
- # Without the lock the `is_empty` is prone to falsely report True with some low probability race condition.
543
- async with self._fetch_lock:
544
- head = await self._list_head(limit=1, lock_time=None)
545
- return len(head.items) == 0 and not self._queue_has_locked_requests
546
-
547
- async def _ensure_head_is_non_empty(self) -> None:
548
- """Ensure that the queue head has requests if they are available in the queue."""
549
- # If queue head has adequate requests, skip fetching more
550
- if len(self._queue_head) > 1 and not self._should_check_for_forefront_requests:
551
- return
552
-
553
- # Fetch requests from the API and populate the queue head
554
- await self._list_head(lock_time=self._DEFAULT_LOCK_TIME)
555
-
556
- async def _get_or_hydrate_request(self, unique_key: str) -> Request | None:
557
- """Get a request by unique key, either from cache or by fetching from API.
558
-
559
- Args:
560
- unique_key: Unique key of the request to get.
561
-
562
- Returns:
563
- The request if found and valid, otherwise None.
564
- """
565
- # First check if the request is in our cache
566
- cached_entry = self._requests_cache.get(unique_key)
567
-
568
- if cached_entry and cached_entry.hydrated:
569
- # If we have the request hydrated in cache, check if lock is expired
570
- if cached_entry.lock_expires_at and cached_entry.lock_expires_at < datetime.now(tz=timezone.utc):
571
- # Try to prolong the lock if it's expired
572
- try:
573
- lock_secs = int(self._DEFAULT_LOCK_TIME.total_seconds())
574
- response = await self._prolong_request_lock(unique_key, lock_secs=lock_secs)
575
- cached_entry.lock_expires_at = response.lock_expires_at
576
- except Exception:
577
- # If prolonging the lock fails, we lost the request
578
- logger.debug(f'Failed to prolong lock for request {unique_key}, returning None')
579
- return None
580
-
581
- return cached_entry.hydrated
582
-
583
- # If not in cache or not hydrated, fetch the request
584
- try:
585
- # Try to acquire or prolong the lock
586
- lock_secs = int(self._DEFAULT_LOCK_TIME.total_seconds())
587
- await self._prolong_request_lock(unique_key, lock_secs=lock_secs)
588
-
589
- # Fetch the request data
590
- request = await self.get_request(unique_key)
591
-
592
- # If request is not found, release lock and return None
593
- if not request:
594
- await self._delete_request_lock(unique_key)
595
- return None
596
-
597
- # Update cache with hydrated request
598
- cache_key = request.unique_key
599
- self._cache_request(
600
- cache_key,
601
- ProcessedRequest(
602
- unique_key=request.unique_key,
603
- was_already_present=True,
604
- was_already_handled=request.handled_at is not None,
605
- ),
606
- hydrated_request=request,
607
- )
608
- except Exception as exc:
609
- logger.debug(f'Error fetching or locking request {unique_key}: {exc!s}')
610
- return None
611
- else:
612
- return request
613
-
614
- async def _update_request(
615
- self,
616
- request: Request,
617
- *,
618
- forefront: bool = False,
619
- ) -> ProcessedRequest:
620
- """Update a request in the queue.
621
-
622
- Args:
623
- request: The updated request.
624
- forefront: Whether to put the updated request in the beginning or the end of the queue.
625
-
626
- Returns:
627
- The updated request
628
- """
629
- request_dict = request.model_dump(by_alias=True)
630
- request_dict['id'] = unique_key_to_request_id(request.unique_key)
631
- response = await self._api_client.update_request(
632
- request=request_dict,
633
- forefront=forefront,
634
- )
635
-
636
- return ProcessedRequest.model_validate(
637
- {'uniqueKey': request.unique_key} | response,
638
- )
639
-
640
- async def _list_head(
641
- self,
642
- *,
643
- lock_time: timedelta | None = None,
644
- limit: int = 25,
645
- ) -> RequestQueueHead:
646
- """Retrieve requests from the beginning of the queue.
647
-
648
- Args:
649
- lock_time: Duration for which to lock the retrieved requests.
650
- If None, requests will not be locked.
651
- limit: Maximum number of requests to retrieve.
652
-
653
- Returns:
654
- A collection of requests from the beginning of the queue.
655
- """
656
- # Return from cache if available and we're not checking for new forefront requests
657
- if self._queue_head and not self._should_check_for_forefront_requests:
658
- logger.debug(f'Using cached queue head with {len(self._queue_head)} requests')
659
- # Create a list of requests from the cached queue head
660
- items = []
661
- for unique_key in list(self._queue_head)[:limit]:
662
- cached_request = self._requests_cache.get(unique_key)
663
- if cached_request and cached_request.hydrated:
664
- items.append(cached_request.hydrated)
665
-
666
- metadata = await self._get_metadata_estimate()
667
-
668
- return RequestQueueHead(
669
- limit=limit,
670
- had_multiple_clients=metadata.had_multiple_clients,
671
- queue_modified_at=metadata.modified_at,
672
- items=items,
673
- queue_has_locked_requests=self._queue_has_locked_requests,
674
- lock_time=lock_time,
675
- )
676
- leftover_buffer = list[str]()
677
- if self._should_check_for_forefront_requests:
678
- leftover_buffer = list(self._queue_head)
679
- self._queue_head.clear()
680
- self._should_check_for_forefront_requests = False
681
-
682
- # Otherwise fetch from API
683
- lock_time = lock_time or self._DEFAULT_LOCK_TIME
684
- lock_secs = int(lock_time.total_seconds())
685
-
686
- response = await self._api_client.list_and_lock_head(
687
- lock_secs=lock_secs,
688
- limit=limit,
689
- )
690
-
691
- # Update the queue head cache
692
- self._queue_has_locked_requests = response.get('queueHasLockedRequests', False)
693
- # Check if there is another client working with the RequestQueue
694
- self._metadata.had_multiple_clients = response.get('hadMultipleClients', False)
695
-
696
- for request_data in response.get('items', []):
697
- request = Request.model_validate(request_data)
698
-
699
- # Skip requests without ID or unique key
700
- if not request.unique_key:
701
- logger.debug(
702
- 'Skipping request from queue head, missing ID or unique key',
703
- extra={
704
- 'unique_key': request.unique_key,
705
- },
706
- )
707
- continue
708
-
709
- # Cache the request
710
- self._cache_request(
711
- request.unique_key,
712
- ProcessedRequest(
713
- unique_key=request.unique_key,
714
- was_already_present=True,
715
- was_already_handled=False,
716
- ),
717
- hydrated_request=request,
718
- )
719
- self._queue_head.append(request.unique_key)
720
-
721
- for leftover_unique_key in leftover_buffer:
722
- # After adding new requests to the forefront, any existing leftover locked request is kept in the end.
723
- self._queue_head.append(leftover_unique_key)
724
- return RequestQueueHead.model_validate(response)
725
-
726
- async def _prolong_request_lock(
727
- self,
728
- unique_key: str,
729
- *,
730
- lock_secs: int,
731
- ) -> ProlongRequestLockResponse:
732
- """Prolong the lock on a specific request in the queue.
733
-
734
- Args:
735
- unique_key: Unique key of the request whose lock is to be prolonged.
736
- lock_secs: The additional amount of time, in seconds, that the request will remain locked.
737
-
738
- Returns:
739
- A response containing the time at which the lock will expire.
740
- """
741
- response = await self._api_client.prolong_request_lock(
742
- request_id=unique_key_to_request_id(unique_key),
743
- # All requests reaching this code were the tip of the queue at the moment when they were fetched,
744
- # so if their lock expires, they should be put back to the forefront as their handling is long overdue.
745
- forefront=True,
746
- lock_secs=lock_secs,
747
- )
748
-
749
- result = ProlongRequestLockResponse(
750
- lock_expires_at=datetime.fromisoformat(response['lockExpiresAt'].replace('Z', '+00:00'))
751
- )
752
-
753
- # Update the cache with the new lock expiration
754
- for cached_request in self._requests_cache.values():
755
- if cached_request.unique_key == unique_key:
756
- cached_request.lock_expires_at = result.lock_expires_at
757
- break
758
-
759
- return result
760
-
761
- async def _delete_request_lock(
762
- self,
763
- unique_key: str,
764
- *,
765
- forefront: bool = False,
766
- ) -> None:
767
- """Delete the lock on a specific request in the queue.
768
-
769
- Args:
770
- unique_key: Unique key of the request to delete the lock.
771
- forefront: Whether to put the request in the beginning or the end of the queue after the lock is deleted.
772
- """
773
- try:
774
- await self._api_client.delete_request_lock(
775
- request_id=unique_key_to_request_id(unique_key),
776
- forefront=forefront,
777
- )
778
-
779
- # Update the cache to remove the lock
780
- for cached_request in self._requests_cache.values():
781
- if cached_request.unique_key == unique_key:
782
- cached_request.lock_expires_at = None
783
- break
784
- except Exception as err:
785
- logger.debug(f'Failed to delete request lock for request {unique_key}', exc_info=err)
786
-
787
- def _cache_request(
788
- self,
789
- cache_key: str,
790
- processed_request: ProcessedRequest,
791
- *,
792
- hydrated_request: Request | None = None,
793
- ) -> None:
794
- """Cache a request for future use.
795
-
796
- Args:
797
- cache_key: The key to use for caching the request. It should be request ID.
798
- processed_request: The processed request information.
799
- forefront: Whether the request was added to the forefront of the queue.
800
- hydrated_request: The hydrated request object, if available.
801
- """
802
- self._requests_cache[cache_key] = CachedRequest(
803
- unique_key=processed_request.unique_key,
804
- was_already_handled=processed_request.was_already_handled,
805
- hydrated=hydrated_request,
806
- lock_expires_at=None,
807
- )