apify 1.7.0b1__py3-none-any.whl → 2.2.0b14__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (62) hide show
  1. apify/__init__.py +19 -4
  2. apify/_actor.py +1030 -0
  3. apify/_configuration.py +370 -0
  4. apify/_consts.py +10 -0
  5. apify/_crypto.py +31 -27
  6. apify/_models.py +117 -0
  7. apify/_platform_event_manager.py +231 -0
  8. apify/_proxy_configuration.py +320 -0
  9. apify/_utils.py +18 -484
  10. apify/apify_storage_client/__init__.py +3 -0
  11. apify/apify_storage_client/_apify_storage_client.py +68 -0
  12. apify/apify_storage_client/_dataset_client.py +190 -0
  13. apify/apify_storage_client/_dataset_collection_client.py +51 -0
  14. apify/apify_storage_client/_key_value_store_client.py +94 -0
  15. apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
  16. apify/apify_storage_client/_request_queue_client.py +176 -0
  17. apify/apify_storage_client/_request_queue_collection_client.py +51 -0
  18. apify/apify_storage_client/py.typed +0 -0
  19. apify/log.py +22 -105
  20. apify/scrapy/__init__.py +11 -3
  21. apify/scrapy/middlewares/__init__.py +3 -1
  22. apify/scrapy/middlewares/apify_proxy.py +29 -27
  23. apify/scrapy/middlewares/py.typed +0 -0
  24. apify/scrapy/pipelines/__init__.py +3 -1
  25. apify/scrapy/pipelines/actor_dataset_push.py +6 -3
  26. apify/scrapy/pipelines/py.typed +0 -0
  27. apify/scrapy/py.typed +0 -0
  28. apify/scrapy/requests.py +60 -58
  29. apify/scrapy/scheduler.py +28 -19
  30. apify/scrapy/utils.py +10 -32
  31. apify/storages/__init__.py +4 -10
  32. apify/storages/_request_list.py +150 -0
  33. apify/storages/py.typed +0 -0
  34. apify-2.2.0b14.dist-info/METADATA +211 -0
  35. apify-2.2.0b14.dist-info/RECORD +38 -0
  36. {apify-1.7.0b1.dist-info → apify-2.2.0b14.dist-info}/WHEEL +1 -2
  37. apify/_memory_storage/__init__.py +0 -3
  38. apify/_memory_storage/file_storage_utils.py +0 -71
  39. apify/_memory_storage/memory_storage_client.py +0 -219
  40. apify/_memory_storage/resource_clients/__init__.py +0 -19
  41. apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
  42. apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
  43. apify/_memory_storage/resource_clients/dataset.py +0 -452
  44. apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
  45. apify/_memory_storage/resource_clients/key_value_store.py +0 -533
  46. apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
  47. apify/_memory_storage/resource_clients/request_queue.py +0 -466
  48. apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
  49. apify/actor.py +0 -1351
  50. apify/config.py +0 -127
  51. apify/consts.py +0 -67
  52. apify/event_manager.py +0 -236
  53. apify/proxy_configuration.py +0 -365
  54. apify/storages/base_storage.py +0 -181
  55. apify/storages/dataset.py +0 -494
  56. apify/storages/key_value_store.py +0 -257
  57. apify/storages/request_queue.py +0 -602
  58. apify/storages/storage_client_manager.py +0 -72
  59. apify-1.7.0b1.dist-info/METADATA +0 -149
  60. apify-1.7.0b1.dist-info/RECORD +0 -41
  61. apify-1.7.0b1.dist-info/top_level.txt +0 -1
  62. {apify-1.7.0b1.dist-info → apify-2.2.0b14.dist-info}/LICENSE +0 -0
@@ -1,71 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import os
4
-
5
- import aiofiles
6
- from aiofiles.os import makedirs
7
- from apify_shared.utils import json_dumps
8
-
9
- from apify._utils import force_remove
10
-
11
-
12
- async def update_metadata(*, data: dict, entity_directory: str, write_metadata: bool) -> None:
13
- # Skip writing the actual metadata file. This is done after ensuring the directory exists so we have the directory present
14
- if not write_metadata:
15
- return
16
-
17
- # Ensure the directory for the entity exists
18
- await makedirs(entity_directory, exist_ok=True)
19
-
20
- # Write the metadata to the file
21
- file_path = os.path.join(entity_directory, '__metadata__.json')
22
- async with aiofiles.open(file_path, mode='wb') as f:
23
- await f.write(json_dumps(data).encode('utf-8'))
24
-
25
-
26
- async def _update_dataset_items(
27
- *,
28
- data: list[tuple[str, dict]],
29
- entity_directory: str,
30
- persist_storage: bool,
31
- ) -> None:
32
- # Skip writing files to the disk if the client has the option set to false
33
- if not persist_storage:
34
- return
35
-
36
- # Ensure the directory for the entity exists
37
- await makedirs(entity_directory, exist_ok=True)
38
-
39
- # Save all the new items to the disk
40
- for idx, item in data:
41
- file_path = os.path.join(entity_directory, f'{idx}.json')
42
- async with aiofiles.open(file_path, mode='wb') as f:
43
- await f.write(json_dumps(item).encode('utf-8'))
44
-
45
-
46
- async def update_request_queue_item(
47
- *,
48
- request_id: str,
49
- request: dict,
50
- entity_directory: str,
51
- persist_storage: bool,
52
- ) -> None:
53
- # Skip writing files to the disk if the client has the option set to false
54
- if not persist_storage:
55
- return
56
-
57
- # Ensure the directory for the entity exists
58
- await makedirs(entity_directory, exist_ok=True)
59
-
60
- # Write the request to the file
61
- file_path = os.path.join(entity_directory, f'{request_id}.json')
62
- async with aiofiles.open(file_path, mode='wb') as f:
63
- await f.write(json_dumps(request).encode('utf-8'))
64
-
65
-
66
- async def delete_request(*, request_id: str, entity_directory: str) -> None:
67
- # Ensure the directory for the entity exists
68
- await makedirs(entity_directory, exist_ok=True)
69
-
70
- file_path = os.path.join(entity_directory, f'{request_id}.json')
71
- await force_remove(file_path)
@@ -1,219 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- import contextlib
5
- import os
6
- from pathlib import Path
7
-
8
- import aioshutil
9
- from aiofiles import ospath
10
- from aiofiles.os import rename, scandir
11
- from apify_shared.consts import ApifyEnvVars
12
- from apify_shared.utils import ignore_docs
13
-
14
- from apify._memory_storage.resource_clients.dataset import DatasetClient
15
- from apify._memory_storage.resource_clients.dataset_collection import DatasetCollectionClient
16
- from apify._memory_storage.resource_clients.key_value_store import KeyValueStoreClient
17
- from apify._memory_storage.resource_clients.key_value_store_collection import KeyValueStoreCollectionClient
18
- from apify._memory_storage.resource_clients.request_queue import RequestQueueClient
19
- from apify._memory_storage.resource_clients.request_queue_collection import RequestQueueCollectionClient
20
- from apify._utils import maybe_parse_bool
21
-
22
- """
23
- Memory storage emulates data storages that are available on the Apify platform.
24
- Specifically, it emulates clients for datasets, key-value stores and request queues.
25
- The data are held in-memory and persisted locally if `persist_storage` is True.
26
- The metadata of the storages is also persisted if `write_metadata` is True.
27
- """
28
-
29
-
30
- @ignore_docs
31
- class MemoryStorageClient:
32
- """Class representing an in-memory storage."""
33
-
34
- _local_data_directory: str
35
- _datasets_directory: str
36
- _key_value_stores_directory: str
37
- _request_queues_directory: str
38
- _write_metadata: bool
39
- _persist_storage: bool
40
- _datasets_handled: list[DatasetClient]
41
- _key_value_stores_handled: list[KeyValueStoreClient]
42
- _request_queues_handled: list[RequestQueueClient]
43
-
44
- _purged_on_start: bool = False
45
- _purge_lock: asyncio.Lock
46
-
47
- """Indicates whether a purge was already performed on this instance"""
48
-
49
- def __init__(
50
- self: MemoryStorageClient,
51
- *,
52
- local_data_directory: str | None = None,
53
- write_metadata: bool | None = None,
54
- persist_storage: bool | None = None,
55
- ) -> None:
56
- """Initialize the MemoryStorageClient.
57
-
58
- Args:
59
- local_data_directory (str, optional): A local directory where all data will be persisted
60
- persist_storage (bool, optional): Whether to persist the data to the `local_data_directory` or just keep them in memory
61
- write_metadata (bool, optional): Whether to persist metadata of the storages as well
62
- """
63
- self._local_data_directory = local_data_directory or os.getenv(ApifyEnvVars.LOCAL_STORAGE_DIR) or './storage'
64
- self._datasets_directory = os.path.join(self._local_data_directory, 'datasets')
65
- self._key_value_stores_directory = os.path.join(self._local_data_directory, 'key_value_stores')
66
- self._request_queues_directory = os.path.join(self._local_data_directory, 'request_queues')
67
- self._write_metadata = write_metadata if write_metadata is not None else '*' in os.getenv('DEBUG', '')
68
- self._persist_storage = persist_storage if persist_storage is not None else maybe_parse_bool(os.getenv(ApifyEnvVars.PERSIST_STORAGE, 'true'))
69
- self._datasets_handled = []
70
- self._key_value_stores_handled = []
71
- self._request_queues_handled = []
72
- self._purge_lock = asyncio.Lock()
73
-
74
- def datasets(self: MemoryStorageClient) -> DatasetCollectionClient:
75
- """Retrieve the sub-client for manipulating datasets."""
76
- return DatasetCollectionClient(base_storage_directory=self._datasets_directory, memory_storage_client=self)
77
-
78
- def dataset(self: MemoryStorageClient, dataset_id: str) -> DatasetClient:
79
- """Retrieve the sub-client for manipulating a single dataset.
80
-
81
- Args:
82
- dataset_id (str): ID of the dataset to be manipulated
83
- """
84
- return DatasetClient(base_storage_directory=self._datasets_directory, memory_storage_client=self, id=dataset_id)
85
-
86
- def key_value_stores(self: MemoryStorageClient) -> KeyValueStoreCollectionClient:
87
- """Retrieve the sub-client for manipulating key-value stores."""
88
- return KeyValueStoreCollectionClient(base_storage_directory=self._key_value_stores_directory, memory_storage_client=self)
89
-
90
- def key_value_store(self: MemoryStorageClient, key_value_store_id: str) -> KeyValueStoreClient:
91
- """Retrieve the sub-client for manipulating a single key-value store.
92
-
93
- Args:
94
- key_value_store_id (str): ID of the key-value store to be manipulated
95
- """
96
- return KeyValueStoreClient(base_storage_directory=self._key_value_stores_directory, memory_storage_client=self, id=key_value_store_id)
97
-
98
- def request_queues(self: MemoryStorageClient) -> RequestQueueCollectionClient:
99
- """Retrieve the sub-client for manipulating request queues."""
100
- return RequestQueueCollectionClient(base_storage_directory=self._request_queues_directory, memory_storage_client=self)
101
-
102
- def request_queue(
103
- self: MemoryStorageClient,
104
- request_queue_id: str,
105
- *,
106
- client_key: str | None = None, # noqa: ARG002
107
- ) -> RequestQueueClient:
108
- """Retrieve the sub-client for manipulating a single request queue.
109
-
110
- Args:
111
- request_queue_id (str): ID of the request queue to be manipulated
112
- client_key (str): A unique identifier of the client accessing the request queue
113
- """
114
- return RequestQueueClient(base_storage_directory=self._request_queues_directory, memory_storage_client=self, id=request_queue_id)
115
-
116
- async def _purge_on_start(self: MemoryStorageClient) -> None:
117
- # Optimistic, non-blocking check
118
- if self._purged_on_start is True:
119
- return
120
-
121
- async with self._purge_lock:
122
- # Another check under the lock just to be sure
123
- if self._purged_on_start is True:
124
- return # type: ignore[unreachable] # Mypy doesn't understand that the _purged_on_start can change while we're getting the async lock
125
-
126
- await self._purge()
127
- self._purged_on_start = True
128
-
129
- async def _purge(self: MemoryStorageClient) -> None:
130
- """Clean up the default storage directories before the run starts.
131
-
132
- Specifically, `purge` cleans up:
133
- - local directory containing the default dataset;
134
- - all records from the default key-value store in the local directory, except for the "INPUT" key;
135
- - local directory containing the default request queue.
136
- """
137
- # Key-value stores
138
- if await ospath.exists(self._key_value_stores_directory):
139
- key_value_store_folders = await scandir(self._key_value_stores_directory)
140
- for key_value_store_folder in key_value_store_folders:
141
- if key_value_store_folder.name.startswith('__APIFY_TEMPORARY') or key_value_store_folder.name.startswith('__OLD'):
142
- await self._batch_remove_files(key_value_store_folder.path)
143
- elif key_value_store_folder.name == 'default':
144
- await self._handle_default_key_value_store(key_value_store_folder.path)
145
-
146
- # Datasets
147
- if await ospath.exists(self._datasets_directory):
148
- dataset_folders = await scandir(self._datasets_directory)
149
- for dataset_folder in dataset_folders:
150
- if dataset_folder.name == 'default' or dataset_folder.name.startswith('__APIFY_TEMPORARY'):
151
- await self._batch_remove_files(dataset_folder.path)
152
- # Request queues
153
- if await ospath.exists(self._request_queues_directory):
154
- request_queue_folders = await scandir(self._request_queues_directory)
155
- for request_queue_folder in request_queue_folders:
156
- if request_queue_folder.name == 'default' or request_queue_folder.name.startswith('__APIFY_TEMPORARY'):
157
- await self._batch_remove_files(request_queue_folder.path)
158
-
159
- async def _handle_default_key_value_store(self: MemoryStorageClient, folder: str) -> None:
160
- """Remove everything from the default key-value store folder except `possible_input_keys`."""
161
- folder_exists = await ospath.exists(folder)
162
- temporary_path = os.path.normpath(os.path.join(folder, '../__APIFY_MIGRATING_KEY_VALUE_STORE__'))
163
-
164
- # For optimization, we want to only attempt to copy a few files from the default key-value store
165
- possible_input_keys = [
166
- 'INPUT',
167
- 'INPUT.json',
168
- 'INPUT.bin',
169
- 'INPUT.txt',
170
- ]
171
-
172
- if folder_exists:
173
- # Create a temporary folder to save important files in
174
- Path(temporary_path).mkdir(parents=True, exist_ok=True)
175
-
176
- # Go through each file and save the ones that are important
177
- for entity in possible_input_keys:
178
- original_file_path = os.path.join(folder, entity)
179
- temp_file_path = os.path.join(temporary_path, entity)
180
- with contextlib.suppress(Exception):
181
- await rename(original_file_path, temp_file_path)
182
-
183
- # Remove the original folder and all its content
184
- counter = 0
185
- temp_path_for_old_folder = os.path.normpath(os.path.join(folder, f'../__OLD_DEFAULT_{counter}__'))
186
- done = False
187
- try:
188
- while not done:
189
- await rename(folder, temp_path_for_old_folder)
190
- done = True
191
- except Exception:
192
- counter += 1
193
- temp_path_for_old_folder = os.path.normpath(os.path.join(folder, f'../__OLD_DEFAULT_{counter}__'))
194
-
195
- # Replace the temporary folder with the original folder
196
- await rename(temporary_path, folder)
197
-
198
- # Remove the old folder
199
- await self._batch_remove_files(temp_path_for_old_folder)
200
-
201
- async def _batch_remove_files(self: MemoryStorageClient, folder: str, counter: int = 0) -> None:
202
- folder_exists = await ospath.exists(folder)
203
-
204
- if folder_exists:
205
- temporary_folder = (
206
- folder
207
- if os.path.basename(folder).startswith('__APIFY_TEMPORARY_')
208
- else os.path.normpath(os.path.join(folder, f'../__APIFY_TEMPORARY_{counter}__'))
209
- )
210
-
211
- try:
212
- # Rename the old folder to the new one to allow background deletions
213
- await rename(folder, temporary_folder)
214
- except Exception:
215
- # Folder exists already, try again with an incremented counter
216
- return await self._batch_remove_files(folder, counter + 1)
217
-
218
- await aioshutil.rmtree(temporary_folder, ignore_errors=True)
219
- return None
@@ -1,19 +0,0 @@
1
- from .base_resource_client import BaseResourceClient
2
- from .base_resource_collection_client import BaseResourceCollectionClient
3
- from .dataset import DatasetClient
4
- from .dataset_collection import DatasetCollectionClient
5
- from .key_value_store import KeyValueStoreClient
6
- from .key_value_store_collection import KeyValueStoreCollectionClient
7
- from .request_queue import RequestQueueClient
8
- from .request_queue_collection import RequestQueueCollectionClient
9
-
10
- __all__ = [
11
- 'BaseResourceClient',
12
- 'BaseResourceCollectionClient',
13
- 'DatasetClient',
14
- 'DatasetCollectionClient',
15
- 'KeyValueStoreClient',
16
- 'KeyValueStoreCollectionClient',
17
- 'RequestQueueClient',
18
- 'RequestQueueCollectionClient',
19
- ]
@@ -1,141 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import json
4
- import os
5
- from abc import ABC, abstractmethod
6
- from typing import TYPE_CHECKING
7
-
8
- from apify_shared.utils import ignore_docs
9
-
10
- if TYPE_CHECKING:
11
- from typing_extensions import Self
12
-
13
- from apify._memory_storage.memory_storage_client import MemoryStorageClient
14
-
15
-
16
- @ignore_docs
17
- class BaseResourceClient(ABC):
18
- """Base class for resource clients."""
19
-
20
- _id: str
21
- _name: str | None
22
- _resource_directory: str
23
-
24
- @abstractmethod
25
- def __init__(
26
- self: BaseResourceClient,
27
- *,
28
- base_storage_directory: str,
29
- memory_storage_client: MemoryStorageClient,
30
- id: str | None = None, # noqa: A002
31
- name: str | None = None,
32
- ) -> None:
33
- """Initialize the BaseResourceClient."""
34
- raise NotImplementedError('You must override this method in the subclass!')
35
-
36
- @abstractmethod
37
- async def get(self: BaseResourceClient) -> dict | None:
38
- """Retrieve the storage.
39
-
40
- Returns:
41
- dict, optional: The retrieved storage, or None, if it does not exist
42
- """
43
- raise NotImplementedError('You must override this method in the subclass!')
44
-
45
- @classmethod
46
- @abstractmethod
47
- def _get_storages_dir(cls: type[BaseResourceClient], memory_storage_client: MemoryStorageClient) -> str:
48
- raise NotImplementedError('You must override this method in the subclass!')
49
-
50
- @classmethod
51
- @abstractmethod
52
- def _get_storage_client_cache(
53
- cls, # noqa: ANN102 # type annotated cls does not work with Self as a return type
54
- memory_storage_client: MemoryStorageClient,
55
- ) -> list[Self]:
56
- raise NotImplementedError('You must override this method in the subclass!')
57
-
58
- @abstractmethod
59
- def _to_resource_info(self: BaseResourceClient) -> dict:
60
- raise NotImplementedError('You must override this method in the subclass!')
61
-
62
- @classmethod
63
- @abstractmethod
64
- def _create_from_directory(
65
- cls, # noqa: ANN102 # type annotated cls does not work with Self as a return type
66
- storage_directory: str,
67
- memory_storage_client: MemoryStorageClient,
68
- id: str | None = None, # noqa: A002
69
- name: str | None = None,
70
- ) -> Self:
71
- raise NotImplementedError('You must override this method in the subclass!')
72
-
73
- @classmethod
74
- def _find_or_create_client_by_id_or_name(
75
- cls, # noqa: ANN102 # type annotated cls does not work with Self as a return type
76
- memory_storage_client: MemoryStorageClient,
77
- id: str | None = None, # noqa: A002
78
- name: str | None = None,
79
- ) -> Self | None:
80
- assert id is not None or name is not None # noqa: S101
81
-
82
- storage_client_cache = cls._get_storage_client_cache(memory_storage_client)
83
- storages_dir = cls._get_storages_dir(memory_storage_client)
84
-
85
- # First check memory cache
86
- found = next(
87
- (
88
- storage_client
89
- for storage_client in storage_client_cache
90
- if storage_client._id == id or (storage_client._name and name and storage_client._name.lower() == name.lower())
91
- ),
92
- None,
93
- )
94
-
95
- if found is not None:
96
- return found
97
-
98
- storage_path = None
99
-
100
- # First try to find the storage by looking up the directory by name
101
- if name:
102
- possible_storage_path = os.path.join(storages_dir, name)
103
- if os.access(possible_storage_path, os.F_OK):
104
- storage_path = possible_storage_path
105
-
106
- # If it's not found, try going through the storages dir and finding it by metadata
107
- if not storage_path and os.access(storages_dir, os.F_OK):
108
- for entry in os.scandir(storages_dir):
109
- if not entry.is_dir():
110
- continue
111
- metadata_path = os.path.join(entry.path, '__metadata__.json')
112
- if not os.access(metadata_path, os.F_OK):
113
- continue
114
- with open(metadata_path, encoding='utf-8') as metadata_file:
115
- metadata = json.load(metadata_file)
116
- if id and id == metadata.get('id'):
117
- storage_path = entry.path
118
- name = metadata.get(name)
119
- break
120
- if name and name == metadata.get('name'):
121
- storage_path = entry.path
122
- id = metadata.get(id) # noqa: A001
123
- break
124
-
125
- # As a last resort, try to check if the accessed storage is the default one,
126
- # and the folder has no metadata
127
- # TODO: make this respect the APIFY_DEFAULT_XXX_ID env var
128
- # https://github.com/apify/apify-sdk-python/issues/149
129
- if id == 'default':
130
- possible_storage_path = os.path.join(storages_dir, id)
131
- if os.access(possible_storage_path, os.F_OK):
132
- storage_path = possible_storage_path
133
-
134
- if not storage_path:
135
- return None
136
-
137
- resource_client = cls._create_from_directory(storage_path, memory_storage_client, id, name)
138
-
139
- storage_client_cache.append(resource_client)
140
-
141
- return resource_client
@@ -1,114 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from abc import ABC, abstractmethod
4
- from operator import itemgetter
5
- from typing import TYPE_CHECKING, Generic, TypeVar, cast
6
-
7
- from apify_shared.models import ListPage
8
- from apify_shared.utils import ignore_docs
9
-
10
- from apify._memory_storage.file_storage_utils import update_metadata
11
- from apify._memory_storage.resource_clients.base_resource_client import BaseResourceClient
12
-
13
- if TYPE_CHECKING:
14
- from apify._memory_storage.memory_storage_client import MemoryStorageClient
15
-
16
-
17
- ResourceClientType = TypeVar('ResourceClientType', bound=BaseResourceClient, contravariant=True) # noqa: PLC0105
18
-
19
-
20
- @ignore_docs
21
- class BaseResourceCollectionClient(ABC, Generic[ResourceClientType]):
22
- """Base class for resource collection clients."""
23
-
24
- _base_storage_directory: str
25
- _memory_storage_client: MemoryStorageClient
26
-
27
- def __init__(
28
- self: BaseResourceCollectionClient,
29
- *,
30
- base_storage_directory: str,
31
- memory_storage_client: MemoryStorageClient,
32
- ) -> None:
33
- """Initialize the DatasetCollectionClient with the passed arguments."""
34
- self._base_storage_directory = base_storage_directory
35
- self._memory_storage_client = memory_storage_client
36
-
37
- @abstractmethod
38
- def _get_storage_client_cache(self: BaseResourceCollectionClient) -> list[ResourceClientType]:
39
- raise NotImplementedError('You must override this method in the subclass!')
40
-
41
- @abstractmethod
42
- def _get_resource_client_class(self: BaseResourceCollectionClient) -> type[ResourceClientType]:
43
- raise NotImplementedError('You must override this method in the subclass!')
44
-
45
- @abstractmethod
46
- async def list(self: BaseResourceCollectionClient) -> ListPage:
47
- """List the available storages.
48
-
49
- Returns:
50
- ListPage: The list of available storages matching the specified filters.
51
- """
52
- storage_client_cache = self._get_storage_client_cache()
53
-
54
- items = [storage._to_resource_info() for storage in storage_client_cache]
55
-
56
- return ListPage(
57
- {
58
- 'total': len(items),
59
- 'count': len(items),
60
- 'offset': 0,
61
- 'limit': len(items),
62
- 'desc': False,
63
- 'items': sorted(items, key=itemgetter('createdAt')),
64
- }
65
- )
66
-
67
- @abstractmethod
68
- async def get_or_create(
69
- self: BaseResourceCollectionClient,
70
- *,
71
- name: str | None = None,
72
- schema: dict | None = None,
73
- _id: str | None = None,
74
- ) -> dict:
75
- """Retrieve a named storage, or create a new one when it doesn't exist.
76
-
77
- Args:
78
- name (str, optional): The name of the storage to retrieve or create.
79
- schema (Dict, optional): The schema of the storage
80
-
81
- Returns:
82
- dict: The retrieved or newly-created storage.
83
- """
84
- resource_client_class = self._get_resource_client_class()
85
- storage_client_cache = self._get_storage_client_cache()
86
-
87
- if name or _id:
88
- found = resource_client_class._find_or_create_client_by_id_or_name(
89
- memory_storage_client=self._memory_storage_client,
90
- name=name,
91
- id=_id,
92
- )
93
- if found:
94
- resource_info = found._to_resource_info()
95
- return cast(dict, resource_info)
96
-
97
- new_resource = resource_client_class(
98
- id=_id,
99
- name=name,
100
- base_storage_directory=self._base_storage_directory,
101
- memory_storage_client=self._memory_storage_client,
102
- )
103
- storage_client_cache.append(new_resource)
104
-
105
- resource_info = new_resource._to_resource_info()
106
-
107
- # Write to the disk
108
- await update_metadata(
109
- data=resource_info,
110
- entity_directory=new_resource._resource_directory,
111
- write_metadata=self._memory_storage_client._write_metadata,
112
- )
113
-
114
- return cast(dict, resource_info)