apify 1.7.1b1__py3-none-any.whl → 2.2.0b14__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/__init__.py +19 -4
- apify/_actor.py +1030 -0
- apify/_configuration.py +370 -0
- apify/_consts.py +10 -0
- apify/_crypto.py +31 -27
- apify/_models.py +117 -0
- apify/_platform_event_manager.py +231 -0
- apify/_proxy_configuration.py +320 -0
- apify/_utils.py +18 -484
- apify/apify_storage_client/__init__.py +3 -0
- apify/apify_storage_client/_apify_storage_client.py +68 -0
- apify/apify_storage_client/_dataset_client.py +190 -0
- apify/apify_storage_client/_dataset_collection_client.py +51 -0
- apify/apify_storage_client/_key_value_store_client.py +94 -0
- apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
- apify/apify_storage_client/_request_queue_client.py +176 -0
- apify/apify_storage_client/_request_queue_collection_client.py +51 -0
- apify/apify_storage_client/py.typed +0 -0
- apify/log.py +22 -105
- apify/scrapy/__init__.py +11 -3
- apify/scrapy/middlewares/__init__.py +3 -1
- apify/scrapy/middlewares/apify_proxy.py +29 -27
- apify/scrapy/middlewares/py.typed +0 -0
- apify/scrapy/pipelines/__init__.py +3 -1
- apify/scrapy/pipelines/actor_dataset_push.py +6 -3
- apify/scrapy/pipelines/py.typed +0 -0
- apify/scrapy/py.typed +0 -0
- apify/scrapy/requests.py +60 -58
- apify/scrapy/scheduler.py +28 -19
- apify/scrapy/utils.py +10 -32
- apify/storages/__init__.py +4 -10
- apify/storages/_request_list.py +150 -0
- apify/storages/py.typed +0 -0
- apify-2.2.0b14.dist-info/METADATA +211 -0
- apify-2.2.0b14.dist-info/RECORD +38 -0
- {apify-1.7.1b1.dist-info → apify-2.2.0b14.dist-info}/WHEEL +1 -2
- apify/_memory_storage/__init__.py +0 -3
- apify/_memory_storage/file_storage_utils.py +0 -71
- apify/_memory_storage/memory_storage_client.py +0 -219
- apify/_memory_storage/resource_clients/__init__.py +0 -19
- apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
- apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
- apify/_memory_storage/resource_clients/dataset.py +0 -452
- apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
- apify/_memory_storage/resource_clients/key_value_store.py +0 -533
- apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
- apify/_memory_storage/resource_clients/request_queue.py +0 -466
- apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
- apify/actor.py +0 -1351
- apify/config.py +0 -127
- apify/consts.py +0 -67
- apify/event_manager.py +0 -236
- apify/proxy_configuration.py +0 -365
- apify/storages/base_storage.py +0 -181
- apify/storages/dataset.py +0 -494
- apify/storages/key_value_store.py +0 -257
- apify/storages/request_queue.py +0 -602
- apify/storages/storage_client_manager.py +0 -72
- apify-1.7.1b1.dist-info/METADATA +0 -149
- apify-1.7.1b1.dist-info/RECORD +0 -41
- apify-1.7.1b1.dist-info/top_level.txt +0 -1
- {apify-1.7.1b1.dist-info → apify-2.2.0b14.dist-info}/LICENSE +0 -0
|
@@ -1,533 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import io
|
|
5
|
-
import json
|
|
6
|
-
import mimetypes
|
|
7
|
-
import os
|
|
8
|
-
import pathlib
|
|
9
|
-
from datetime import datetime, timezone
|
|
10
|
-
from operator import itemgetter
|
|
11
|
-
from typing import TYPE_CHECKING, Any, AsyncIterator, TypedDict
|
|
12
|
-
|
|
13
|
-
import aiofiles
|
|
14
|
-
import aioshutil
|
|
15
|
-
from aiofiles.os import makedirs
|
|
16
|
-
from apify_shared.utils import ignore_docs, is_file_or_bytes, json_dumps
|
|
17
|
-
|
|
18
|
-
from apify._crypto import crypto_random_object_id
|
|
19
|
-
from apify._memory_storage.file_storage_utils import update_metadata
|
|
20
|
-
from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
|
|
21
|
-
from apify._utils import (
|
|
22
|
-
force_remove,
|
|
23
|
-
force_rename,
|
|
24
|
-
guess_file_extension,
|
|
25
|
-
maybe_parse_body,
|
|
26
|
-
raise_on_duplicate_storage,
|
|
27
|
-
raise_on_non_existing_storage,
|
|
28
|
-
)
|
|
29
|
-
from apify.consts import DEFAULT_API_PARAM_LIMIT, StorageTypes
|
|
30
|
-
from apify.log import logger
|
|
31
|
-
|
|
32
|
-
if TYPE_CHECKING:
|
|
33
|
-
from typing_extensions import NotRequired
|
|
34
|
-
|
|
35
|
-
from apify._memory_storage.memory_storage_client import MemoryStorageClient
|
|
36
|
-
|
|
37
|
-
|
|
38
|
-
class KeyValueStoreRecord(TypedDict):
|
|
39
|
-
key: str
|
|
40
|
-
value: Any
|
|
41
|
-
contentType: str | None
|
|
42
|
-
filename: NotRequired[str]
|
|
43
|
-
|
|
44
|
-
|
|
45
|
-
def _filename_from_record(record: KeyValueStoreRecord) -> str:
|
|
46
|
-
if record.get('filename') is not None:
|
|
47
|
-
return record['filename']
|
|
48
|
-
|
|
49
|
-
content_type = record.get('contentType')
|
|
50
|
-
if not content_type or content_type == 'application/octet-stream':
|
|
51
|
-
return record['key']
|
|
52
|
-
|
|
53
|
-
extension = guess_file_extension(content_type)
|
|
54
|
-
if record['key'].endswith(f'.{extension}'):
|
|
55
|
-
return record['key']
|
|
56
|
-
|
|
57
|
-
return f'{record["key"]}.{extension}'
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@ignore_docs
|
|
61
|
-
class KeyValueStoreClient(BaseResourceClient):
|
|
62
|
-
"""Sub-client for manipulating a single key-value store."""
|
|
63
|
-
|
|
64
|
-
_id: str
|
|
65
|
-
_resource_directory: str
|
|
66
|
-
_memory_storage_client: MemoryStorageClient
|
|
67
|
-
_name: str | None
|
|
68
|
-
_records: dict[str, KeyValueStoreRecord]
|
|
69
|
-
_created_at: datetime
|
|
70
|
-
_accessed_at: datetime
|
|
71
|
-
_modified_at: datetime
|
|
72
|
-
_file_operation_lock: asyncio.Lock
|
|
73
|
-
|
|
74
|
-
def __init__(
|
|
75
|
-
self: KeyValueStoreClient,
|
|
76
|
-
*,
|
|
77
|
-
base_storage_directory: str,
|
|
78
|
-
memory_storage_client: MemoryStorageClient,
|
|
79
|
-
id: str | None = None, # noqa: A002
|
|
80
|
-
name: str | None = None,
|
|
81
|
-
) -> None:
|
|
82
|
-
"""Initialize the KeyValueStoreClient."""
|
|
83
|
-
self._id = id or crypto_random_object_id()
|
|
84
|
-
self._resource_directory = os.path.join(base_storage_directory, name or self._id)
|
|
85
|
-
self._memory_storage_client = memory_storage_client
|
|
86
|
-
self._name = name
|
|
87
|
-
self._records = {}
|
|
88
|
-
self._created_at = datetime.now(timezone.utc)
|
|
89
|
-
self._accessed_at = datetime.now(timezone.utc)
|
|
90
|
-
self._modified_at = datetime.now(timezone.utc)
|
|
91
|
-
self._file_operation_lock = asyncio.Lock()
|
|
92
|
-
|
|
93
|
-
async def get(self: KeyValueStoreClient) -> dict | None:
|
|
94
|
-
"""Retrieve the key-value store.
|
|
95
|
-
|
|
96
|
-
Returns:
|
|
97
|
-
dict, optional: The retrieved key-value store, or None if it does not exist
|
|
98
|
-
"""
|
|
99
|
-
found = self._find_or_create_client_by_id_or_name(memory_storage_client=self._memory_storage_client, id=self._id, name=self._name)
|
|
100
|
-
|
|
101
|
-
if found:
|
|
102
|
-
async with found._file_operation_lock:
|
|
103
|
-
await found._update_timestamps(has_been_modified=False)
|
|
104
|
-
return found._to_resource_info()
|
|
105
|
-
|
|
106
|
-
return None
|
|
107
|
-
|
|
108
|
-
async def update(self: KeyValueStoreClient, *, name: str | None = None) -> dict:
|
|
109
|
-
"""Update the key-value store with specified fields.
|
|
110
|
-
|
|
111
|
-
Args:
|
|
112
|
-
name (str, optional): The new name for key-value store
|
|
113
|
-
|
|
114
|
-
Returns:
|
|
115
|
-
dict: The updated key-value store
|
|
116
|
-
"""
|
|
117
|
-
# Check by id
|
|
118
|
-
existing_store_by_id = self._find_or_create_client_by_id_or_name(
|
|
119
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
120
|
-
)
|
|
121
|
-
|
|
122
|
-
if existing_store_by_id is None:
|
|
123
|
-
raise_on_non_existing_storage(StorageTypes.KEY_VALUE_STORE, self._id)
|
|
124
|
-
|
|
125
|
-
# Skip if no changes
|
|
126
|
-
if name is None:
|
|
127
|
-
return existing_store_by_id._to_resource_info()
|
|
128
|
-
|
|
129
|
-
async with existing_store_by_id._file_operation_lock:
|
|
130
|
-
# Check that name is not in use already
|
|
131
|
-
existing_store_by_name = next(
|
|
132
|
-
(store for store in self._memory_storage_client._key_value_stores_handled if store._name and store._name.lower() == name.lower()),
|
|
133
|
-
None,
|
|
134
|
-
)
|
|
135
|
-
|
|
136
|
-
if existing_store_by_name is not None:
|
|
137
|
-
raise_on_duplicate_storage(StorageTypes.KEY_VALUE_STORE, 'name', name)
|
|
138
|
-
|
|
139
|
-
existing_store_by_id._name = name
|
|
140
|
-
|
|
141
|
-
previous_dir = existing_store_by_id._resource_directory
|
|
142
|
-
|
|
143
|
-
existing_store_by_id._resource_directory = os.path.join(self._memory_storage_client._key_value_stores_directory, name)
|
|
144
|
-
|
|
145
|
-
await force_rename(previous_dir, existing_store_by_id._resource_directory)
|
|
146
|
-
|
|
147
|
-
# Update timestamps
|
|
148
|
-
await existing_store_by_id._update_timestamps(has_been_modified=True)
|
|
149
|
-
|
|
150
|
-
return existing_store_by_id._to_resource_info()
|
|
151
|
-
|
|
152
|
-
async def delete(self: KeyValueStoreClient) -> None:
|
|
153
|
-
"""Delete the key-value store."""
|
|
154
|
-
store = next((store for store in self._memory_storage_client._key_value_stores_handled if store._id == self._id), None)
|
|
155
|
-
|
|
156
|
-
if store is not None:
|
|
157
|
-
async with store._file_operation_lock:
|
|
158
|
-
self._memory_storage_client._key_value_stores_handled.remove(store)
|
|
159
|
-
store._records.clear()
|
|
160
|
-
|
|
161
|
-
if os.path.exists(store._resource_directory):
|
|
162
|
-
await aioshutil.rmtree(store._resource_directory)
|
|
163
|
-
|
|
164
|
-
async def list_keys(
|
|
165
|
-
self: KeyValueStoreClient,
|
|
166
|
-
*,
|
|
167
|
-
limit: int = DEFAULT_API_PARAM_LIMIT,
|
|
168
|
-
exclusive_start_key: str | None = None,
|
|
169
|
-
) -> dict:
|
|
170
|
-
"""List the keys in the key-value store.
|
|
171
|
-
|
|
172
|
-
Args:
|
|
173
|
-
limit (int, optional): Number of keys to be returned. Maximum value is 1000
|
|
174
|
-
exclusive_start_key (str, optional): All keys up to this one (including) are skipped from the result
|
|
175
|
-
|
|
176
|
-
Returns:
|
|
177
|
-
dict: The list of keys in the key-value store matching the given arguments
|
|
178
|
-
"""
|
|
179
|
-
# Check by id
|
|
180
|
-
existing_store_by_id = self._find_or_create_client_by_id_or_name(
|
|
181
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
182
|
-
)
|
|
183
|
-
|
|
184
|
-
if existing_store_by_id is None:
|
|
185
|
-
raise_on_non_existing_storage(StorageTypes.KEY_VALUE_STORE, self._id)
|
|
186
|
-
|
|
187
|
-
items = []
|
|
188
|
-
|
|
189
|
-
for record in existing_store_by_id._records.values():
|
|
190
|
-
size = len(record['value'])
|
|
191
|
-
items.append(
|
|
192
|
-
{
|
|
193
|
-
'key': record['key'],
|
|
194
|
-
'size': size,
|
|
195
|
-
}
|
|
196
|
-
)
|
|
197
|
-
|
|
198
|
-
if len(items) == 0:
|
|
199
|
-
return {
|
|
200
|
-
'count': len(items),
|
|
201
|
-
'limit': limit,
|
|
202
|
-
'exclusiveStartKey': exclusive_start_key,
|
|
203
|
-
'isTruncated': False,
|
|
204
|
-
'nextExclusiveStartKey': None,
|
|
205
|
-
'items': items,
|
|
206
|
-
}
|
|
207
|
-
|
|
208
|
-
# Lexically sort to emulate the API
|
|
209
|
-
items = sorted(items, key=itemgetter('key'))
|
|
210
|
-
|
|
211
|
-
truncated_items = items
|
|
212
|
-
if exclusive_start_key is not None:
|
|
213
|
-
key_pos = next((idx for idx, i in enumerate(items) if i['key'] == exclusive_start_key), None)
|
|
214
|
-
if key_pos is not None:
|
|
215
|
-
truncated_items = items[(key_pos + 1) :]
|
|
216
|
-
|
|
217
|
-
limited_items = truncated_items[:limit]
|
|
218
|
-
|
|
219
|
-
last_item_in_store = items[-1]
|
|
220
|
-
last_selected_item = limited_items[-1]
|
|
221
|
-
is_last_selected_item_absolutely_last = last_item_in_store == last_selected_item
|
|
222
|
-
next_exclusive_start_key = None if is_last_selected_item_absolutely_last else last_selected_item['key']
|
|
223
|
-
|
|
224
|
-
async with existing_store_by_id._file_operation_lock:
|
|
225
|
-
await existing_store_by_id._update_timestamps(has_been_modified=False)
|
|
226
|
-
|
|
227
|
-
return {
|
|
228
|
-
'count': len(items),
|
|
229
|
-
'limit': limit,
|
|
230
|
-
'exclusiveStartKey': exclusive_start_key,
|
|
231
|
-
'isTruncated': not is_last_selected_item_absolutely_last,
|
|
232
|
-
'nextExclusiveStartKey': next_exclusive_start_key,
|
|
233
|
-
'items': limited_items,
|
|
234
|
-
}
|
|
235
|
-
|
|
236
|
-
async def _get_record_internal(
|
|
237
|
-
self: KeyValueStoreClient,
|
|
238
|
-
key: str,
|
|
239
|
-
as_bytes: bool = False, # noqa: FBT001, FBT002
|
|
240
|
-
) -> dict | None:
|
|
241
|
-
# Check by id
|
|
242
|
-
existing_store_by_id = self._find_or_create_client_by_id_or_name(
|
|
243
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
244
|
-
)
|
|
245
|
-
|
|
246
|
-
if existing_store_by_id is None:
|
|
247
|
-
raise_on_non_existing_storage(StorageTypes.KEY_VALUE_STORE, self._id)
|
|
248
|
-
|
|
249
|
-
stored_record = existing_store_by_id._records.get(key)
|
|
250
|
-
|
|
251
|
-
if stored_record is None:
|
|
252
|
-
return None
|
|
253
|
-
|
|
254
|
-
record = {
|
|
255
|
-
'key': stored_record['key'],
|
|
256
|
-
'value': stored_record['value'],
|
|
257
|
-
'contentType': stored_record.get('contentType'),
|
|
258
|
-
}
|
|
259
|
-
|
|
260
|
-
if not as_bytes:
|
|
261
|
-
try:
|
|
262
|
-
record['value'] = maybe_parse_body(record['value'], record['contentType'])
|
|
263
|
-
except ValueError:
|
|
264
|
-
logger.exception('Error parsing key-value store record')
|
|
265
|
-
|
|
266
|
-
async with existing_store_by_id._file_operation_lock:
|
|
267
|
-
await existing_store_by_id._update_timestamps(has_been_modified=False)
|
|
268
|
-
|
|
269
|
-
return record
|
|
270
|
-
|
|
271
|
-
async def get_record(self: KeyValueStoreClient, key: str) -> dict | None:
|
|
272
|
-
"""Retrieve the given record from the key-value store.
|
|
273
|
-
|
|
274
|
-
Args:
|
|
275
|
-
key (str): Key of the record to retrieve
|
|
276
|
-
|
|
277
|
-
Returns:
|
|
278
|
-
dict, optional: The requested record, or None, if the record does not exist
|
|
279
|
-
"""
|
|
280
|
-
return await self._get_record_internal(key)
|
|
281
|
-
|
|
282
|
-
async def get_record_as_bytes(self: KeyValueStoreClient, key: str) -> dict | None:
|
|
283
|
-
"""Retrieve the given record from the key-value store, without parsing it.
|
|
284
|
-
|
|
285
|
-
Args:
|
|
286
|
-
key (str): Key of the record to retrieve
|
|
287
|
-
|
|
288
|
-
Returns:
|
|
289
|
-
dict, optional: The requested record, or None, if the record does not exist
|
|
290
|
-
"""
|
|
291
|
-
return await self._get_record_internal(key, as_bytes=True)
|
|
292
|
-
|
|
293
|
-
async def stream_record(self: KeyValueStoreClient, _key: str) -> AsyncIterator[dict | None]:
|
|
294
|
-
raise NotImplementedError('This method is not supported in local memory storage.')
|
|
295
|
-
|
|
296
|
-
async def set_record(self: KeyValueStoreClient, key: str, value: Any, content_type: str | None = None) -> None:
|
|
297
|
-
"""Set a value to the given record in the key-value store.
|
|
298
|
-
|
|
299
|
-
Args:
|
|
300
|
-
key (str): The key of the record to save the value to
|
|
301
|
-
value (Any): The value to save into the record
|
|
302
|
-
content_type (str, optional): The content type of the saved value
|
|
303
|
-
"""
|
|
304
|
-
# Check by id
|
|
305
|
-
existing_store_by_id = self._find_or_create_client_by_id_or_name(
|
|
306
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
307
|
-
)
|
|
308
|
-
|
|
309
|
-
if existing_store_by_id is None:
|
|
310
|
-
raise_on_non_existing_storage(StorageTypes.KEY_VALUE_STORE, self._id)
|
|
311
|
-
|
|
312
|
-
if isinstance(value, io.IOBase):
|
|
313
|
-
raise NotImplementedError('File-like values are not supported in local memory storage')
|
|
314
|
-
|
|
315
|
-
if content_type is None:
|
|
316
|
-
if is_file_or_bytes(value):
|
|
317
|
-
content_type = 'application/octet-stream'
|
|
318
|
-
elif isinstance(value, str):
|
|
319
|
-
content_type = 'text/plain; charset=utf-8'
|
|
320
|
-
else:
|
|
321
|
-
content_type = 'application/json; charset=utf-8'
|
|
322
|
-
|
|
323
|
-
if 'application/json' in content_type and not is_file_or_bytes(value) and not isinstance(value, str):
|
|
324
|
-
value = json_dumps(value).encode('utf-8')
|
|
325
|
-
|
|
326
|
-
async with existing_store_by_id._file_operation_lock:
|
|
327
|
-
await existing_store_by_id._update_timestamps(has_been_modified=True)
|
|
328
|
-
record: KeyValueStoreRecord = {
|
|
329
|
-
'key': key,
|
|
330
|
-
'value': value,
|
|
331
|
-
'contentType': content_type,
|
|
332
|
-
}
|
|
333
|
-
|
|
334
|
-
old_record = existing_store_by_id._records.get(key)
|
|
335
|
-
existing_store_by_id._records[key] = record
|
|
336
|
-
|
|
337
|
-
if self._memory_storage_client._persist_storage:
|
|
338
|
-
if old_record is not None and _filename_from_record(old_record) != _filename_from_record(record):
|
|
339
|
-
await existing_store_by_id._delete_persisted_record(old_record)
|
|
340
|
-
|
|
341
|
-
await existing_store_by_id._persist_record(record)
|
|
342
|
-
|
|
343
|
-
async def _persist_record(self: KeyValueStoreClient, record: KeyValueStoreRecord) -> None:
|
|
344
|
-
store_directory = self._resource_directory
|
|
345
|
-
record_filename = _filename_from_record(record)
|
|
346
|
-
record['filename'] = record_filename
|
|
347
|
-
|
|
348
|
-
# Ensure the directory for the entity exists
|
|
349
|
-
await makedirs(store_directory, exist_ok=True)
|
|
350
|
-
|
|
351
|
-
# Create files for the record
|
|
352
|
-
record_path = os.path.join(store_directory, record_filename)
|
|
353
|
-
record_metadata_path = os.path.join(store_directory, record_filename + '.__metadata__.json')
|
|
354
|
-
|
|
355
|
-
# Convert to bytes if string
|
|
356
|
-
if isinstance(record['value'], str):
|
|
357
|
-
record['value'] = record['value'].encode('utf-8')
|
|
358
|
-
|
|
359
|
-
async with aiofiles.open(record_path, mode='wb') as f:
|
|
360
|
-
await f.write(record['value'])
|
|
361
|
-
|
|
362
|
-
if self._memory_storage_client._write_metadata:
|
|
363
|
-
async with aiofiles.open(record_metadata_path, mode='wb') as f:
|
|
364
|
-
await f.write(
|
|
365
|
-
json_dumps(
|
|
366
|
-
{
|
|
367
|
-
'key': record['key'],
|
|
368
|
-
'contentType': record['contentType'],
|
|
369
|
-
}
|
|
370
|
-
).encode('utf-8')
|
|
371
|
-
)
|
|
372
|
-
|
|
373
|
-
async def delete_record(self: KeyValueStoreClient, key: str) -> None:
|
|
374
|
-
"""Delete the specified record from the key-value store.
|
|
375
|
-
|
|
376
|
-
Args:
|
|
377
|
-
key (str): The key of the record which to delete
|
|
378
|
-
"""
|
|
379
|
-
# Check by id
|
|
380
|
-
existing_store_by_id = self._find_or_create_client_by_id_or_name(
|
|
381
|
-
memory_storage_client=self._memory_storage_client, id=self._id, name=self._name
|
|
382
|
-
)
|
|
383
|
-
|
|
384
|
-
if existing_store_by_id is None:
|
|
385
|
-
raise_on_non_existing_storage(StorageTypes.KEY_VALUE_STORE, self._id)
|
|
386
|
-
|
|
387
|
-
record = existing_store_by_id._records.get(key)
|
|
388
|
-
|
|
389
|
-
if record is not None:
|
|
390
|
-
async with existing_store_by_id._file_operation_lock:
|
|
391
|
-
del existing_store_by_id._records[key]
|
|
392
|
-
await existing_store_by_id._update_timestamps(has_been_modified=True)
|
|
393
|
-
if self._memory_storage_client._persist_storage:
|
|
394
|
-
await existing_store_by_id._delete_persisted_record(record)
|
|
395
|
-
|
|
396
|
-
async def _delete_persisted_record(self: KeyValueStoreClient, record: KeyValueStoreRecord) -> None:
|
|
397
|
-
store_directory = self._resource_directory
|
|
398
|
-
record_filename = _filename_from_record(record)
|
|
399
|
-
|
|
400
|
-
# Ensure the directory for the entity exists
|
|
401
|
-
await makedirs(store_directory, exist_ok=True)
|
|
402
|
-
|
|
403
|
-
# Create files for the record
|
|
404
|
-
record_path = os.path.join(store_directory, record_filename)
|
|
405
|
-
record_metadata_path = os.path.join(store_directory, record_filename + '.__metadata__.json')
|
|
406
|
-
|
|
407
|
-
await force_remove(record_path)
|
|
408
|
-
await force_remove(record_metadata_path)
|
|
409
|
-
|
|
410
|
-
def _to_resource_info(self: KeyValueStoreClient) -> dict:
|
|
411
|
-
"""Retrieve the key-value store info."""
|
|
412
|
-
return {
|
|
413
|
-
'id': self._id,
|
|
414
|
-
'name': self._name,
|
|
415
|
-
'accessedAt': self._accessed_at,
|
|
416
|
-
'createdAt': self._created_at,
|
|
417
|
-
'modifiedAt': self._modified_at,
|
|
418
|
-
'userId': '1',
|
|
419
|
-
}
|
|
420
|
-
|
|
421
|
-
async def _update_timestamps(self: KeyValueStoreClient, has_been_modified: bool) -> None: # noqa: FBT001
|
|
422
|
-
self._accessed_at = datetime.now(timezone.utc)
|
|
423
|
-
|
|
424
|
-
if has_been_modified:
|
|
425
|
-
self._modified_at = datetime.now(timezone.utc)
|
|
426
|
-
|
|
427
|
-
kv_store_info = self._to_resource_info()
|
|
428
|
-
await update_metadata(
|
|
429
|
-
data=kv_store_info,
|
|
430
|
-
entity_directory=self._resource_directory,
|
|
431
|
-
write_metadata=self._memory_storage_client._write_metadata,
|
|
432
|
-
)
|
|
433
|
-
|
|
434
|
-
@classmethod
|
|
435
|
-
def _get_storages_dir(cls: type[KeyValueStoreClient], memory_storage_client: MemoryStorageClient) -> str:
|
|
436
|
-
return memory_storage_client._key_value_stores_directory
|
|
437
|
-
|
|
438
|
-
@classmethod
|
|
439
|
-
def _get_storage_client_cache(
|
|
440
|
-
cls: type[KeyValueStoreClient],
|
|
441
|
-
memory_storage_client: MemoryStorageClient,
|
|
442
|
-
) -> list[KeyValueStoreClient]:
|
|
443
|
-
return memory_storage_client._key_value_stores_handled
|
|
444
|
-
|
|
445
|
-
@classmethod
|
|
446
|
-
def _create_from_directory(
|
|
447
|
-
cls: type[KeyValueStoreClient],
|
|
448
|
-
storage_directory: str,
|
|
449
|
-
memory_storage_client: MemoryStorageClient,
|
|
450
|
-
id: str | None = None, # noqa: A002
|
|
451
|
-
name: str | None = None,
|
|
452
|
-
) -> KeyValueStoreClient:
|
|
453
|
-
created_at = datetime.now(timezone.utc)
|
|
454
|
-
accessed_at = datetime.now(timezone.utc)
|
|
455
|
-
modified_at = datetime.now(timezone.utc)
|
|
456
|
-
|
|
457
|
-
store_metadata_path = os.path.join(storage_directory, '__metadata__.json')
|
|
458
|
-
if os.path.exists(store_metadata_path):
|
|
459
|
-
with open(store_metadata_path, encoding='utf-8') as f:
|
|
460
|
-
metadata = json.load(f)
|
|
461
|
-
id = metadata['id'] # noqa: A001
|
|
462
|
-
name = metadata['name']
|
|
463
|
-
created_at = datetime.fromisoformat(metadata['createdAt'])
|
|
464
|
-
accessed_at = datetime.fromisoformat(metadata['accessedAt'])
|
|
465
|
-
modified_at = datetime.fromisoformat(metadata['modifiedAt'])
|
|
466
|
-
|
|
467
|
-
new_client = KeyValueStoreClient(
|
|
468
|
-
base_storage_directory=memory_storage_client._key_value_stores_directory,
|
|
469
|
-
memory_storage_client=memory_storage_client,
|
|
470
|
-
id=id,
|
|
471
|
-
name=name,
|
|
472
|
-
)
|
|
473
|
-
|
|
474
|
-
# Overwrite internal properties
|
|
475
|
-
new_client._accessed_at = accessed_at
|
|
476
|
-
new_client._created_at = created_at
|
|
477
|
-
new_client._modified_at = modified_at
|
|
478
|
-
|
|
479
|
-
# Scan the key value store folder, check each entry in there and parse it as a store record
|
|
480
|
-
for entry in os.scandir(storage_directory):
|
|
481
|
-
if not entry.is_file():
|
|
482
|
-
continue
|
|
483
|
-
|
|
484
|
-
# Ignore metadata files on their own
|
|
485
|
-
if entry.name.endswith('__metadata__.json'):
|
|
486
|
-
continue
|
|
487
|
-
|
|
488
|
-
with open(os.path.join(storage_directory, entry.name), 'rb') as f:
|
|
489
|
-
file_content = f.read()
|
|
490
|
-
|
|
491
|
-
# Try checking if this file has a metadata file associated with it
|
|
492
|
-
metadata = None
|
|
493
|
-
if os.path.exists(os.path.join(storage_directory, entry.name + '.__metadata__.json')):
|
|
494
|
-
with open(os.path.join(storage_directory, entry.name + '.__metadata__.json'), encoding='utf-8') as metadata_file:
|
|
495
|
-
try:
|
|
496
|
-
metadata = json.load(metadata_file)
|
|
497
|
-
assert metadata.get('key') is not None # noqa: S101
|
|
498
|
-
assert metadata.get('contentType') is not None # noqa: S101
|
|
499
|
-
except Exception:
|
|
500
|
-
logger.warning(
|
|
501
|
-
f"""Metadata of key-value store entry "{entry.name}" for store {name or id} could not be parsed."""
|
|
502
|
-
'The metadata file will be ignored.',
|
|
503
|
-
exc_info=True,
|
|
504
|
-
)
|
|
505
|
-
|
|
506
|
-
if not metadata:
|
|
507
|
-
content_type, _ = mimetypes.guess_type(entry.name)
|
|
508
|
-
if content_type is None:
|
|
509
|
-
content_type = 'application/octet-stream'
|
|
510
|
-
|
|
511
|
-
metadata = {
|
|
512
|
-
'key': pathlib.Path(entry.name).stem,
|
|
513
|
-
'contentType': content_type,
|
|
514
|
-
}
|
|
515
|
-
|
|
516
|
-
try:
|
|
517
|
-
maybe_parse_body(file_content, metadata['contentType'])
|
|
518
|
-
except Exception:
|
|
519
|
-
metadata['contentType'] = 'application/octet-stream'
|
|
520
|
-
logger.warning(
|
|
521
|
-
f"""Key-value store entry "{metadata['key']}" for store {name or id} could not be parsed."""
|
|
522
|
-
'The entry will be assumed as binary.',
|
|
523
|
-
exc_info=True,
|
|
524
|
-
)
|
|
525
|
-
|
|
526
|
-
new_client._records[metadata['key']] = {
|
|
527
|
-
'key': metadata['key'],
|
|
528
|
-
'contentType': metadata['contentType'],
|
|
529
|
-
'filename': entry.name,
|
|
530
|
-
'value': file_content,
|
|
531
|
-
}
|
|
532
|
-
|
|
533
|
-
return new_client
|
|
@@ -1,48 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
from apify_shared.utils import ignore_docs
|
|
6
|
-
|
|
7
|
-
from apify._memory_storage.resource_clients.base_resource_collection_client import BaseResourceCollectionClient
|
|
8
|
-
from apify._memory_storage.resource_clients.key_value_store import KeyValueStoreClient
|
|
9
|
-
|
|
10
|
-
if TYPE_CHECKING:
|
|
11
|
-
from apify_shared.models import ListPage
|
|
12
|
-
|
|
13
|
-
|
|
14
|
-
@ignore_docs
|
|
15
|
-
class KeyValueStoreCollectionClient(BaseResourceCollectionClient):
|
|
16
|
-
"""Sub-client for manipulating key-value stores."""
|
|
17
|
-
|
|
18
|
-
def _get_storage_client_cache(self: KeyValueStoreCollectionClient) -> list[KeyValueStoreClient]:
|
|
19
|
-
return self._memory_storage_client._key_value_stores_handled
|
|
20
|
-
|
|
21
|
-
def _get_resource_client_class(self: KeyValueStoreCollectionClient) -> type[KeyValueStoreClient]:
|
|
22
|
-
return KeyValueStoreClient
|
|
23
|
-
|
|
24
|
-
async def list(self: KeyValueStoreCollectionClient) -> ListPage:
|
|
25
|
-
"""List the available key-value stores.
|
|
26
|
-
|
|
27
|
-
Returns:
|
|
28
|
-
ListPage: The list of available key-value stores matching the specified filters.
|
|
29
|
-
"""
|
|
30
|
-
return await super().list()
|
|
31
|
-
|
|
32
|
-
async def get_or_create(
|
|
33
|
-
self: KeyValueStoreCollectionClient,
|
|
34
|
-
*,
|
|
35
|
-
name: str | None = None,
|
|
36
|
-
schema: dict | None = None,
|
|
37
|
-
_id: str | None = None,
|
|
38
|
-
) -> dict:
|
|
39
|
-
"""Retrieve a named key-value store, or create a new one when it doesn't exist.
|
|
40
|
-
|
|
41
|
-
Args:
|
|
42
|
-
name (str, optional): The name of the key-value store to retrieve or create.
|
|
43
|
-
schema (Dict, optional): The schema of the key-value store
|
|
44
|
-
|
|
45
|
-
Returns:
|
|
46
|
-
dict: The retrieved or newly-created key-value store.
|
|
47
|
-
"""
|
|
48
|
-
return await super().get_or_create(name=name, schema=schema, _id=_id)
|