apify 1.7.1b1__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/__init__.py +33 -4
- apify/_actor.py +1074 -0
- apify/_configuration.py +370 -0
- apify/_consts.py +10 -0
- apify/_crypto.py +31 -27
- apify/_models.py +117 -0
- apify/_platform_event_manager.py +231 -0
- apify/_proxy_configuration.py +320 -0
- apify/_utils.py +18 -484
- apify/apify_storage_client/__init__.py +3 -0
- apify/apify_storage_client/_apify_storage_client.py +68 -0
- apify/apify_storage_client/_dataset_client.py +190 -0
- apify/apify_storage_client/_dataset_collection_client.py +51 -0
- apify/apify_storage_client/_key_value_store_client.py +94 -0
- apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
- apify/apify_storage_client/_request_queue_client.py +176 -0
- apify/apify_storage_client/_request_queue_collection_client.py +51 -0
- apify/apify_storage_client/py.typed +0 -0
- apify/log.py +22 -105
- apify/scrapy/__init__.py +11 -3
- apify/scrapy/middlewares/__init__.py +3 -1
- apify/scrapy/middlewares/apify_proxy.py +29 -27
- apify/scrapy/middlewares/py.typed +0 -0
- apify/scrapy/pipelines/__init__.py +3 -1
- apify/scrapy/pipelines/actor_dataset_push.py +6 -3
- apify/scrapy/pipelines/py.typed +0 -0
- apify/scrapy/py.typed +0 -0
- apify/scrapy/requests.py +60 -58
- apify/scrapy/scheduler.py +28 -19
- apify/scrapy/utils.py +10 -32
- apify/storages/__init__.py +4 -10
- apify/storages/_request_list.py +150 -0
- apify/storages/py.typed +0 -0
- apify-2.2.1.dist-info/METADATA +211 -0
- apify-2.2.1.dist-info/RECORD +38 -0
- {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/WHEEL +1 -2
- apify/_memory_storage/__init__.py +0 -3
- apify/_memory_storage/file_storage_utils.py +0 -71
- apify/_memory_storage/memory_storage_client.py +0 -219
- apify/_memory_storage/resource_clients/__init__.py +0 -19
- apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
- apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
- apify/_memory_storage/resource_clients/dataset.py +0 -452
- apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
- apify/_memory_storage/resource_clients/key_value_store.py +0 -533
- apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
- apify/_memory_storage/resource_clients/request_queue.py +0 -466
- apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
- apify/actor.py +0 -1351
- apify/config.py +0 -127
- apify/consts.py +0 -67
- apify/event_manager.py +0 -236
- apify/proxy_configuration.py +0 -365
- apify/storages/base_storage.py +0 -181
- apify/storages/dataset.py +0 -494
- apify/storages/key_value_store.py +0 -257
- apify/storages/request_queue.py +0 -602
- apify/storages/storage_client_manager.py +0 -72
- apify-1.7.1b1.dist-info/METADATA +0 -149
- apify-1.7.1b1.dist-info/RECORD +0 -41
- apify-1.7.1b1.dist-info/top_level.txt +0 -1
- {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/LICENSE +0 -0
|
@@ -1,466 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import json
|
|
5
|
-
import os
|
|
6
|
-
from datetime import datetime, timezone
|
|
7
|
-
from decimal import Decimal
|
|
8
|
-
from typing import TYPE_CHECKING
|
|
9
|
-
|
|
10
|
-
import aioshutil
|
|
11
|
-
from apify_shared.utils import filter_out_none_values_recursively, ignore_docs, json_dumps
|
|
12
|
-
from sortedcollections import ValueSortedDict
|
|
13
|
-
|
|
14
|
-
from apify._crypto import crypto_random_object_id
|
|
15
|
-
from apify._memory_storage.file_storage_utils import delete_request, update_metadata, update_request_queue_item
|
|
16
|
-
from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
|
|
17
|
-
from apify._utils import force_rename, raise_on_duplicate_storage, raise_on_non_existing_storage, unique_key_to_request_id
|
|
18
|
-
from apify.consts import StorageTypes
|
|
19
|
-
|
|
20
|
-
if TYPE_CHECKING:
|
|
21
|
-
from apify._memory_storage.memory_storage_client import MemoryStorageClient
|
|
22
|
-
|
|
23
|
-
|
|
24
|
-
@ignore_docs
|
|
25
|
-
class RequestQueueClient(BaseResourceClient):
|
|
26
|
-
"""Sub-client for manipulating a single request queue."""
|
|
27
|
-
|
|
28
|
-
_id: str
|
|
29
|
-
_resource_directory: str
|
|
30
|
-
_memory_storage_client: MemoryStorageClient
|
|
31
|
-
_name: str | None
|
|
32
|
-
_requests: ValueSortedDict
|
|
33
|
-
_created_at: datetime
|
|
34
|
-
_accessed_at: datetime
|
|
35
|
-
_modified_at: datetime
|
|
36
|
-
_handled_request_count = 0
|
|
37
|
-
_pending_request_count = 0
|
|
38
|
-
_last_used_timestamp = Decimal(0.0)
|
|
39
|
-
_file_operation_lock: asyncio.Lock
|
|
40
|
-
|
|
41
|
-
def __init__(
|
|
42
|
-
self: RequestQueueClient,
|
|
43
|
-
*,
|
|
44
|
-
base_storage_directory: str,
|
|
45
|
-
memory_storage_client: MemoryStorageClient,
|
|
46
|
-
id: str | None = None, # noqa: A002
|
|
47
|
-
name: str | None = None,
|
|
48
|
-
) -> None:
|
|
49
|
-
"""Initialize the RequestQueueClient."""
|
|
50
|
-
self._id = id or crypto_random_object_id()
|
|
51
|
-
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
|
|
52
|
-
self._memory_storage_client = memory_storage_client
|
|
53
|
-
self._name = name
|
|
54
|
-
self._requests = ValueSortedDict(lambda req: req.get('orderNo') or -float('inf'))
|
|
55
|
-
self._created_at = datetime.now(timezone.utc)
|
|
56
|
-
self._accessed_at = datetime.now(timezone.utc)
|
|
57
|
-
self._modified_at = datetime.now(timezone.utc)
|
|
58
|
-
self._file_operation_lock = asyncio.Lock()
|
|
59
|
-
|
|
60
|
-
async def get(self: RequestQueueClient) -> dict | None:
|
|
61
|
-
"""Retrieve the request queue.
|
|
62
|
-
|
|
63
|
-
Returns:
|
|
64
|
-
dict, optional: The retrieved request queue, or None, if it does not exist
|
|
65
|
-
"""
|
|
66
|
-
found = self._find_or_create_client_by_id_or_name(memory_storage_client=self._memory_storage_client, id=self._id, name=self._name)
|
|
67
|
-
|
|
68
|
-
if found:
|
|
69
|
-
async with found._file_operation_lock:
|
|
70
|
-
await found._update_timestamps(has_been_modified=False)
|
|
71
|
-
return found._to_resource_info()
|
|
72
|
-
|
|
73
|
-
return None
|
|
74
|
-
|
|
75
|
-
async def update(self: RequestQueueClient, *, name: str | None = None) -> dict:
|
|
76
|
-
"""Update the request queue with specified fields.
|
|
77
|
-
|
|
78
|
-
Args:
|
|
79
|
-
name (str, optional): The new name for the request queue
|
|
80
|
-
|
|
81
|
-
Returns:
|
|
82
|
-
dict: The updated request queue
|
|
83
|
-
"""
|
|
84
|
-
# Check by id
|
|
85
|
-
existing_queue_by_id = self._find_or_create_client_by_id_or_name(
|
|
86
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
87
|
-
)
|
|
88
|
-
|
|
89
|
-
if existing_queue_by_id is None:
|
|
90
|
-
raise_on_non_existing_storage(StorageTypes.REQUEST_QUEUE, self._id)
|
|
91
|
-
|
|
92
|
-
# Skip if no changes
|
|
93
|
-
if name is None:
|
|
94
|
-
return existing_queue_by_id._to_resource_info()
|
|
95
|
-
|
|
96
|
-
async with existing_queue_by_id._file_operation_lock:
|
|
97
|
-
# Check that name is not in use already
|
|
98
|
-
existing_queue_by_name = next(
|
|
99
|
-
(queue for queue in self._memory_storage_client._request_queues_handled if queue._name and queue._name.lower() == name.lower()), None
|
|
100
|
-
)
|
|
101
|
-
|
|
102
|
-
if existing_queue_by_name is not None:
|
|
103
|
-
raise_on_duplicate_storage(StorageTypes.REQUEST_QUEUE, 'name', name)
|
|
104
|
-
|
|
105
|
-
existing_queue_by_id._name = name
|
|
106
|
-
|
|
107
|
-
previous_dir = existing_queue_by_id._resource_directory
|
|
108
|
-
|
|
109
|
-
existing_queue_by_id._resource_directory = os.path.join(self._memory_storage_client._request_queues_directory, name)
|
|
110
|
-
|
|
111
|
-
await force_rename(previous_dir, existing_queue_by_id._resource_directory)
|
|
112
|
-
|
|
113
|
-
# Update timestamps
|
|
114
|
-
await existing_queue_by_id._update_timestamps(has_been_modified=True)
|
|
115
|
-
|
|
116
|
-
return existing_queue_by_id._to_resource_info()
|
|
117
|
-
|
|
118
|
-
async def delete(self: RequestQueueClient) -> None:
|
|
119
|
-
"""Delete the request queue."""
|
|
120
|
-
queue = next((queue for queue in self._memory_storage_client._request_queues_handled if queue._id == self._id), None)
|
|
121
|
-
|
|
122
|
-
if queue is not None:
|
|
123
|
-
async with queue._file_operation_lock:
|
|
124
|
-
self._memory_storage_client._request_queues_handled.remove(queue)
|
|
125
|
-
queue._pending_request_count = 0
|
|
126
|
-
queue._handled_request_count = 0
|
|
127
|
-
queue._requests.clear()
|
|
128
|
-
|
|
129
|
-
if os.path.exists(queue._resource_directory):
|
|
130
|
-
await aioshutil.rmtree(queue._resource_directory)
|
|
131
|
-
|
|
132
|
-
async def list_head(self: RequestQueueClient, *, limit: int | None = None) -> dict:
|
|
133
|
-
"""Retrieve a given number of requests from the beginning of the queue.
|
|
134
|
-
|
|
135
|
-
Args:
|
|
136
|
-
limit (int, optional): How many requests to retrieve
|
|
137
|
-
|
|
138
|
-
Returns:
|
|
139
|
-
dict: The desired number of requests from the beginning of the queue.
|
|
140
|
-
"""
|
|
141
|
-
existing_queue_by_id = self._find_or_create_client_by_id_or_name(
|
|
142
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
143
|
-
)
|
|
144
|
-
|
|
145
|
-
if existing_queue_by_id is None:
|
|
146
|
-
raise_on_non_existing_storage(StorageTypes.REQUEST_QUEUE, self._id)
|
|
147
|
-
|
|
148
|
-
async with existing_queue_by_id._file_operation_lock:
|
|
149
|
-
await existing_queue_by_id._update_timestamps(has_been_modified=False)
|
|
150
|
-
|
|
151
|
-
items: list[dict] = []
|
|
152
|
-
|
|
153
|
-
# Iterate all requests in the queue which have sorted key larger than infinity, which means `orderNo` is not `None`
|
|
154
|
-
# This will iterate them in order of `orderNo`
|
|
155
|
-
for request_key in existing_queue_by_id._requests.irange_key(min_key=-float('inf'), inclusive=(False, True)):
|
|
156
|
-
if len(items) == limit:
|
|
157
|
-
break
|
|
158
|
-
|
|
159
|
-
request = existing_queue_by_id._requests.get(request_key)
|
|
160
|
-
|
|
161
|
-
# Check that the request still exists and was not handled,
|
|
162
|
-
# in case something deleted it or marked it as handled concurrenctly
|
|
163
|
-
if request and request['orderNo']:
|
|
164
|
-
items.append(request)
|
|
165
|
-
|
|
166
|
-
return {
|
|
167
|
-
'limit': limit,
|
|
168
|
-
'hadMultipleClients': False,
|
|
169
|
-
'queueModifiedAt': existing_queue_by_id._modified_at,
|
|
170
|
-
'items': [self._json_to_request(item['json']) for item in items],
|
|
171
|
-
}
|
|
172
|
-
|
|
173
|
-
async def add_request(self: RequestQueueClient, request: dict, *, forefront: bool | None = None) -> dict:
|
|
174
|
-
"""Add a request to the queue.
|
|
175
|
-
|
|
176
|
-
Args:
|
|
177
|
-
request (dict): The request to add to the queue
|
|
178
|
-
forefront (bool, optional): Whether to add the request to the head or the end of the queue
|
|
179
|
-
|
|
180
|
-
Returns:
|
|
181
|
-
dict: The added request.
|
|
182
|
-
"""
|
|
183
|
-
existing_queue_by_id = self._find_or_create_client_by_id_or_name(
|
|
184
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
185
|
-
)
|
|
186
|
-
|
|
187
|
-
if existing_queue_by_id is None:
|
|
188
|
-
raise_on_non_existing_storage(StorageTypes.REQUEST_QUEUE, self._id)
|
|
189
|
-
|
|
190
|
-
request_model = self._create_internal_request(request, forefront)
|
|
191
|
-
|
|
192
|
-
async with existing_queue_by_id._file_operation_lock:
|
|
193
|
-
existing_request_with_id = existing_queue_by_id._requests.get(request_model['id'])
|
|
194
|
-
|
|
195
|
-
# We already have the request present, so we return information about it
|
|
196
|
-
if existing_request_with_id is not None:
|
|
197
|
-
await existing_queue_by_id._update_timestamps(has_been_modified=False)
|
|
198
|
-
|
|
199
|
-
return {
|
|
200
|
-
'requestId': existing_request_with_id['id'],
|
|
201
|
-
'wasAlreadyHandled': existing_request_with_id['orderNo'] is None,
|
|
202
|
-
'wasAlreadyPresent': True,
|
|
203
|
-
}
|
|
204
|
-
|
|
205
|
-
existing_queue_by_id._requests[request_model['id']] = request_model
|
|
206
|
-
if request_model['orderNo'] is None:
|
|
207
|
-
existing_queue_by_id._handled_request_count += 1
|
|
208
|
-
else:
|
|
209
|
-
existing_queue_by_id._pending_request_count += 1
|
|
210
|
-
await existing_queue_by_id._update_timestamps(has_been_modified=True)
|
|
211
|
-
await update_request_queue_item(
|
|
212
|
-
request=request_model,
|
|
213
|
-
request_id=request_model['id'],
|
|
214
|
-
entity_directory=existing_queue_by_id._resource_directory,
|
|
215
|
-
persist_storage=self._memory_storage_client._persist_storage,
|
|
216
|
-
)
|
|
217
|
-
|
|
218
|
-
return {
|
|
219
|
-
'requestId': request_model['id'],
|
|
220
|
-
# We return wasAlreadyHandled: false even though the request may
|
|
221
|
-
# have been added as handled, because that's how API behaves.
|
|
222
|
-
'wasAlreadyHandled': False,
|
|
223
|
-
'wasAlreadyPresent': False,
|
|
224
|
-
}
|
|
225
|
-
|
|
226
|
-
async def get_request(self: RequestQueueClient, request_id: str) -> dict | None:
|
|
227
|
-
"""Retrieve a request from the queue.
|
|
228
|
-
|
|
229
|
-
Args:
|
|
230
|
-
request_id (str): ID of the request to retrieve
|
|
231
|
-
|
|
232
|
-
Returns:
|
|
233
|
-
dict, optional: The retrieved request, or None, if it did not exist.
|
|
234
|
-
"""
|
|
235
|
-
existing_queue_by_id = self._find_or_create_client_by_id_or_name(
|
|
236
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
237
|
-
)
|
|
238
|
-
|
|
239
|
-
if existing_queue_by_id is None:
|
|
240
|
-
raise_on_non_existing_storage(StorageTypes.REQUEST_QUEUE, self._id)
|
|
241
|
-
|
|
242
|
-
async with existing_queue_by_id._file_operation_lock:
|
|
243
|
-
await existing_queue_by_id._update_timestamps(has_been_modified=False)
|
|
244
|
-
|
|
245
|
-
request = existing_queue_by_id._requests.get(request_id)
|
|
246
|
-
return self._json_to_request(request['json'] if request is not None else None)
|
|
247
|
-
|
|
248
|
-
async def update_request(self: RequestQueueClient, request: dict, *, forefront: bool | None = None) -> dict:
|
|
249
|
-
"""Update a request in the queue.
|
|
250
|
-
|
|
251
|
-
Args:
|
|
252
|
-
request (dict): The updated request
|
|
253
|
-
forefront (bool, optional): Whether to put the updated request in the beginning or the end of the queue
|
|
254
|
-
|
|
255
|
-
Returns:
|
|
256
|
-
dict: The updated request
|
|
257
|
-
"""
|
|
258
|
-
existing_queue_by_id = self._find_or_create_client_by_id_or_name(
|
|
259
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
260
|
-
)
|
|
261
|
-
|
|
262
|
-
if existing_queue_by_id is None:
|
|
263
|
-
raise_on_non_existing_storage(StorageTypes.REQUEST_QUEUE, self._id)
|
|
264
|
-
|
|
265
|
-
request_model = self._create_internal_request(request, forefront)
|
|
266
|
-
|
|
267
|
-
# First we need to check the existing request to be
|
|
268
|
-
# able to return information about its handled state.
|
|
269
|
-
|
|
270
|
-
existing_request = existing_queue_by_id._requests.get(request_model['id'])
|
|
271
|
-
|
|
272
|
-
# Undefined means that the request is not present in the queue.
|
|
273
|
-
# We need to insert it, to behave the same as API.
|
|
274
|
-
if existing_request is None:
|
|
275
|
-
return await self.add_request(request, forefront=forefront)
|
|
276
|
-
|
|
277
|
-
async with existing_queue_by_id._file_operation_lock:
|
|
278
|
-
# When updating the request, we need to make sure that
|
|
279
|
-
# the handled counts are updated correctly in all cases.
|
|
280
|
-
existing_queue_by_id._requests[request_model['id']] = request_model
|
|
281
|
-
|
|
282
|
-
pending_count_adjustment = 0
|
|
283
|
-
is_request_handled_state_changing = not isinstance(existing_request['orderNo'], type(request_model['orderNo']))
|
|
284
|
-
request_was_handled_before_update = existing_request['orderNo'] is None
|
|
285
|
-
|
|
286
|
-
# We add 1 pending request if previous state was handled
|
|
287
|
-
if is_request_handled_state_changing:
|
|
288
|
-
pending_count_adjustment = 1 if request_was_handled_before_update else -1
|
|
289
|
-
|
|
290
|
-
existing_queue_by_id._pending_request_count += pending_count_adjustment
|
|
291
|
-
existing_queue_by_id._handled_request_count -= pending_count_adjustment
|
|
292
|
-
await existing_queue_by_id._update_timestamps(has_been_modified=True)
|
|
293
|
-
await update_request_queue_item(
|
|
294
|
-
request=request_model,
|
|
295
|
-
request_id=request_model['id'],
|
|
296
|
-
entity_directory=existing_queue_by_id._resource_directory,
|
|
297
|
-
persist_storage=self._memory_storage_client._persist_storage,
|
|
298
|
-
)
|
|
299
|
-
|
|
300
|
-
return {
|
|
301
|
-
'requestId': request_model['id'],
|
|
302
|
-
'wasAlreadyHandled': request_was_handled_before_update,
|
|
303
|
-
'wasAlreadyPresent': True,
|
|
304
|
-
}
|
|
305
|
-
|
|
306
|
-
async def delete_request(self: RequestQueueClient, request_id: str) -> None:
|
|
307
|
-
"""Delete a request from the queue.
|
|
308
|
-
|
|
309
|
-
Args:
|
|
310
|
-
request_id (str): ID of the request to delete.
|
|
311
|
-
"""
|
|
312
|
-
existing_queue_by_id = self._find_or_create_client_by_id_or_name(
|
|
313
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
314
|
-
)
|
|
315
|
-
|
|
316
|
-
if existing_queue_by_id is None:
|
|
317
|
-
raise_on_non_existing_storage(StorageTypes.REQUEST_QUEUE, self._id)
|
|
318
|
-
|
|
319
|
-
async with existing_queue_by_id._file_operation_lock:
|
|
320
|
-
request = existing_queue_by_id._requests.get(request_id)
|
|
321
|
-
|
|
322
|
-
if request:
|
|
323
|
-
del existing_queue_by_id._requests[request_id]
|
|
324
|
-
if request['orderNo'] is None:
|
|
325
|
-
existing_queue_by_id._handled_request_count -= 1
|
|
326
|
-
else:
|
|
327
|
-
existing_queue_by_id._pending_request_count -= 1
|
|
328
|
-
await existing_queue_by_id._update_timestamps(has_been_modified=True)
|
|
329
|
-
await delete_request(entity_directory=existing_queue_by_id._resource_directory, request_id=request_id)
|
|
330
|
-
|
|
331
|
-
def _to_resource_info(self: RequestQueueClient) -> dict:
|
|
332
|
-
"""Retrieve the request queue store info."""
|
|
333
|
-
return {
|
|
334
|
-
'accessedAt': self._accessed_at,
|
|
335
|
-
'createdAt': self._created_at,
|
|
336
|
-
'hadMultipleClients': False,
|
|
337
|
-
'handledRequestCount': self._handled_request_count,
|
|
338
|
-
'id': self._id,
|
|
339
|
-
'modifiedAt': self._modified_at,
|
|
340
|
-
'name': self._name,
|
|
341
|
-
'pendingRequestCount': self._pending_request_count,
|
|
342
|
-
'stats': {},
|
|
343
|
-
'totalRequestCount': len(self._requests),
|
|
344
|
-
'userId': '1',
|
|
345
|
-
}
|
|
346
|
-
|
|
347
|
-
async def _update_timestamps(self: RequestQueueClient, has_been_modified: bool) -> None: # noqa: FBT001
|
|
348
|
-
self._accessed_at = datetime.now(timezone.utc)
|
|
349
|
-
|
|
350
|
-
if has_been_modified:
|
|
351
|
-
self._modified_at = datetime.now(timezone.utc)
|
|
352
|
-
|
|
353
|
-
request_queue_info = self._to_resource_info()
|
|
354
|
-
await update_metadata(
|
|
355
|
-
data=request_queue_info,
|
|
356
|
-
entity_directory=self._resource_directory,
|
|
357
|
-
write_metadata=self._memory_storage_client._write_metadata,
|
|
358
|
-
)
|
|
359
|
-
|
|
360
|
-
def _json_to_request(self: RequestQueueClient, request_json: str | None) -> dict | None:
|
|
361
|
-
if request_json is None:
|
|
362
|
-
return None
|
|
363
|
-
request = json.loads(request_json)
|
|
364
|
-
return filter_out_none_values_recursively(request)
|
|
365
|
-
|
|
366
|
-
def _create_internal_request(self: RequestQueueClient, request: dict, forefront: bool | None) -> dict:
|
|
367
|
-
order_no = self._calculate_order_no(request, forefront)
|
|
368
|
-
id = unique_key_to_request_id(request['uniqueKey']) # noqa: A001
|
|
369
|
-
|
|
370
|
-
if request.get('id') is not None and request['id'] != id:
|
|
371
|
-
raise ValueError('Request ID does not match its unique_key.')
|
|
372
|
-
|
|
373
|
-
json_request = json_dumps({**request, 'id': id})
|
|
374
|
-
return {
|
|
375
|
-
'id': id,
|
|
376
|
-
'json': json_request,
|
|
377
|
-
'method': request.get('method'),
|
|
378
|
-
'orderNo': order_no,
|
|
379
|
-
'retryCount': request.get('retryCount', 0),
|
|
380
|
-
'uniqueKey': request['uniqueKey'],
|
|
381
|
-
'url': request['url'],
|
|
382
|
-
}
|
|
383
|
-
|
|
384
|
-
def _calculate_order_no(self: RequestQueueClient, request: dict, forefront: bool | None) -> Decimal | None:
|
|
385
|
-
if request.get('handledAt') is not None:
|
|
386
|
-
return None
|
|
387
|
-
|
|
388
|
-
# Get the current timestamp in milliseconds
|
|
389
|
-
timestamp = Decimal(datetime.now(timezone.utc).timestamp()) * 1000
|
|
390
|
-
timestamp = round(timestamp, 6)
|
|
391
|
-
|
|
392
|
-
# Make sure that this timestamp was not used yet, so that we have unique orderNos
|
|
393
|
-
if timestamp <= self._last_used_timestamp:
|
|
394
|
-
timestamp = self._last_used_timestamp + Decimal(0.000001)
|
|
395
|
-
|
|
396
|
-
self._last_used_timestamp = timestamp
|
|
397
|
-
|
|
398
|
-
return -timestamp if forefront else timestamp
|
|
399
|
-
|
|
400
|
-
@classmethod
|
|
401
|
-
def _get_storages_dir(cls: type[RequestQueueClient], memory_storage_client: MemoryStorageClient) -> str:
|
|
402
|
-
return memory_storage_client._request_queues_directory
|
|
403
|
-
|
|
404
|
-
@classmethod
|
|
405
|
-
def _get_storage_client_cache(
|
|
406
|
-
cls: type[RequestQueueClient],
|
|
407
|
-
memory_storage_client: MemoryStorageClient,
|
|
408
|
-
) -> list[RequestQueueClient]:
|
|
409
|
-
return memory_storage_client._request_queues_handled
|
|
410
|
-
|
|
411
|
-
@classmethod
|
|
412
|
-
def _create_from_directory(
|
|
413
|
-
cls: type[RequestQueueClient],
|
|
414
|
-
storage_directory: str,
|
|
415
|
-
memory_storage_client: MemoryStorageClient,
|
|
416
|
-
id: str | None = None, # noqa: A002
|
|
417
|
-
name: str | None = None,
|
|
418
|
-
) -> RequestQueueClient:
|
|
419
|
-
created_at = datetime.now(timezone.utc)
|
|
420
|
-
accessed_at = datetime.now(timezone.utc)
|
|
421
|
-
modified_at = datetime.now(timezone.utc)
|
|
422
|
-
handled_request_count = 0
|
|
423
|
-
pending_request_count = 0
|
|
424
|
-
entries: list[dict] = []
|
|
425
|
-
|
|
426
|
-
# Access the request queue folder
|
|
427
|
-
for entry in os.scandir(storage_directory):
|
|
428
|
-
if entry.is_file():
|
|
429
|
-
if entry.name == '__metadata__.json':
|
|
430
|
-
# We have found the queue's metadata file, build out information based on it
|
|
431
|
-
with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f:
|
|
432
|
-
metadata = json.load(f)
|
|
433
|
-
id = metadata['id'] # noqa: A001
|
|
434
|
-
name = metadata['name']
|
|
435
|
-
created_at = datetime.fromisoformat(metadata['createdAt'])
|
|
436
|
-
accessed_at = datetime.fromisoformat(metadata['accessedAt'])
|
|
437
|
-
modified_at = datetime.fromisoformat(metadata['modifiedAt'])
|
|
438
|
-
handled_request_count = metadata['handledRequestCount']
|
|
439
|
-
pending_request_count = metadata['pendingRequestCount']
|
|
440
|
-
|
|
441
|
-
continue
|
|
442
|
-
|
|
443
|
-
with open(os.path.join(storage_directory, entry.name), encoding='utf-8') as f:
|
|
444
|
-
request = json.load(f)
|
|
445
|
-
if request.get('orderNo'):
|
|
446
|
-
request['orderNo'] = Decimal(request.get('orderNo'))
|
|
447
|
-
entries.append(request)
|
|
448
|
-
|
|
449
|
-
new_client = cls(
|
|
450
|
-
base_storage_directory=memory_storage_client._request_queues_directory,
|
|
451
|
-
memory_storage_client=memory_storage_client,
|
|
452
|
-
id=id,
|
|
453
|
-
name=name,
|
|
454
|
-
)
|
|
455
|
-
|
|
456
|
-
# Overwrite properties
|
|
457
|
-
new_client._accessed_at = accessed_at
|
|
458
|
-
new_client._created_at = created_at
|
|
459
|
-
new_client._modified_at = modified_at
|
|
460
|
-
new_client._handled_request_count = handled_request_count
|
|
461
|
-
new_client._pending_request_count = pending_request_count
|
|
462
|
-
|
|
463
|
-
for request in entries:
|
|
464
|
-
new_client._requests[request['id']] = request
|
|
465
|
-
|
|
466
|
-
return new_client
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
from apify_shared.utils import ignore_docs
|
|
6
|
-
|
|
7
|
-
from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
-
from apify._memory_storage.resource_clients.request_queue import RequestQueueClient
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
from apify_shared.models import ListPage
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@ignore_docs
|
|
15
|
-
class RequestQueueCollectionClient(BaseResourceCollectionClient):
|
|
16
|
-
"""Sub-client for manipulating request queues."""
|
|
17
|
-
|
|
18
|
-
def _get_storage_client_cache(self: RequestQueueCollectionClient) -> list[RequestQueueClient]:
|
|
19
|
-
return self._memory_storage_client._request_queues_handled
|
|
20
|
-
|
|
21
|
-
def _get_resource_client_class(self: RequestQueueCollectionClient) -> type[RequestQueueClient]:
|
|
22
|
-
return RequestQueueClient
|
|
23
|
-
|
|
24
|
-
async def list(self: RequestQueueCollectionClient) -> ListPage:
|
|
25
|
-
"""List the available request queues.
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
ListPage: The list of available request queues matching the specified filters.
|
|
29
|
-
"""
|
|
30
|
-
return await super().list()
|
|
31
|
-
|
|
32
|
-
async def get_or_create(
|
|
33
|
-
self: RequestQueueCollectionClient,
|
|
34
|
-
*,
|
|
35
|
-
name: str | None = None,
|
|
36
|
-
schema: dict | None = None,
|
|
37
|
-
_id: str | None = None,
|
|
38
|
-
) -> dict:
|
|
39
|
-
"""Retrieve a named request queue, or create a new one when it doesn't exist.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
name (str, optional): The name of the request queue to retrieve or create.
|
|
43
|
-
schema (dict, optional): The schema of the request queue
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
dict: The retrieved or newly-created request queue.
|
|
47
|
-
"""
|
|
48
|
-
return await super().get_or_create(name=name, schema=schema, _id=_id)
|