apify 1.7.1b1__py3-none-any.whl → 2.2.1b1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (62) hide show
  1. apify/__init__.py +33 -4
  2. apify/_actor.py +1074 -0
  3. apify/_configuration.py +370 -0
  4. apify/_consts.py +10 -0
  5. apify/_crypto.py +31 -27
  6. apify/_models.py +117 -0
  7. apify/_platform_event_manager.py +231 -0
  8. apify/_proxy_configuration.py +320 -0
  9. apify/_utils.py +18 -484
  10. apify/apify_storage_client/__init__.py +3 -0
  11. apify/apify_storage_client/_apify_storage_client.py +68 -0
  12. apify/apify_storage_client/_dataset_client.py +190 -0
  13. apify/apify_storage_client/_dataset_collection_client.py +51 -0
  14. apify/apify_storage_client/_key_value_store_client.py +94 -0
  15. apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
  16. apify/apify_storage_client/_request_queue_client.py +176 -0
  17. apify/apify_storage_client/_request_queue_collection_client.py +51 -0
  18. apify/apify_storage_client/py.typed +0 -0
  19. apify/log.py +22 -105
  20. apify/scrapy/__init__.py +11 -3
  21. apify/scrapy/middlewares/__init__.py +3 -1
  22. apify/scrapy/middlewares/apify_proxy.py +29 -27
  23. apify/scrapy/middlewares/py.typed +0 -0
  24. apify/scrapy/pipelines/__init__.py +3 -1
  25. apify/scrapy/pipelines/actor_dataset_push.py +6 -3
  26. apify/scrapy/pipelines/py.typed +0 -0
  27. apify/scrapy/py.typed +0 -0
  28. apify/scrapy/requests.py +60 -58
  29. apify/scrapy/scheduler.py +28 -19
  30. apify/scrapy/utils.py +10 -32
  31. apify/storages/__init__.py +4 -10
  32. apify/storages/_request_list.py +150 -0
  33. apify/storages/py.typed +0 -0
  34. apify-2.2.1b1.dist-info/METADATA +211 -0
  35. apify-2.2.1b1.dist-info/RECORD +38 -0
  36. {apify-1.7.1b1.dist-info → apify-2.2.1b1.dist-info}/WHEEL +1 -2
  37. apify/_memory_storage/__init__.py +0 -3
  38. apify/_memory_storage/file_storage_utils.py +0 -71
  39. apify/_memory_storage/memory_storage_client.py +0 -219
  40. apify/_memory_storage/resource_clients/__init__.py +0 -19
  41. apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
  42. apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
  43. apify/_memory_storage/resource_clients/dataset.py +0 -452
  44. apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
  45. apify/_memory_storage/resource_clients/key_value_store.py +0 -533
  46. apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
  47. apify/_memory_storage/resource_clients/request_queue.py +0 -466
  48. apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
  49. apify/actor.py +0 -1351
  50. apify/config.py +0 -127
  51. apify/consts.py +0 -67
  52. apify/event_manager.py +0 -236
  53. apify/proxy_configuration.py +0 -365
  54. apify/storages/base_storage.py +0 -181
  55. apify/storages/dataset.py +0 -494
  56. apify/storages/key_value_store.py +0 -257
  57. apify/storages/request_queue.py +0 -602
  58. apify/storages/storage_client_manager.py +0 -72
  59. apify-1.7.1b1.dist-info/METADATA +0 -149
  60. apify-1.7.1b1.dist-info/RECORD +0 -41
  61. apify-1.7.1b1.dist-info/top_level.txt +0 -1
  62. {apify-1.7.1b1.dist-info → apify-2.2.1b1.dist-info}/LICENSE +0 -0
@@ -1,365 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import inspect
4
- import ipaddress
5
- import re
6
- from typing import TYPE_CHECKING, Any, Awaitable, Callable, Pattern, TypedDict
7
- from urllib.parse import urljoin, urlparse
8
-
9
- import httpx
10
- from apify_shared.consts import ApifyEnvVars
11
- from apify_shared.utils import ignore_docs
12
-
13
- from apify.config import Configuration
14
- from apify.log import logger
15
-
16
- if TYPE_CHECKING:
17
- from apify_client import ApifyClientAsync
18
- from typing_extensions import NotRequired
19
-
20
- APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
21
- COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
22
- SESSION_ID_MAX_LENGTH = 50
23
-
24
-
25
- def is_url(url: str) -> bool:
26
- """Check if the given string is a valid URL."""
27
- try:
28
- parsed_url = urlparse(urljoin(url, '/'))
29
- has_all_parts = all([parsed_url.scheme, parsed_url.netloc, parsed_url.path])
30
- is_domain = '.' in parsed_url.netloc
31
- is_localhost = parsed_url.netloc == 'localhost'
32
- try:
33
- ipaddress.ip_address(parsed_url.netloc)
34
- is_ip_address = True
35
- except Exception:
36
- is_ip_address = False
37
-
38
- return has_all_parts and any([is_domain, is_localhost, is_ip_address])
39
- except Exception:
40
- return False
41
-
42
-
43
- def _check(
44
- value: Any,
45
- *,
46
- label: str | None,
47
- pattern: Pattern | None = None,
48
- min_length: int | None = None,
49
- max_length: int | None = None,
50
- ) -> None:
51
- error_str = f'Value {value}'
52
- if label:
53
- error_str += f' of argument {label}'
54
-
55
- if min_length and len(value) < min_length:
56
- raise ValueError(f'{error_str} is shorter than minimum allowed length {min_length}')
57
-
58
- if max_length and len(value) > max_length:
59
- raise ValueError(f'{error_str} is longer than maximum allowed length {max_length}')
60
-
61
- if pattern and not re.fullmatch(pattern, value):
62
- raise ValueError(f'{error_str} does not match pattern {pattern.pattern!r}')
63
-
64
-
65
- class ProxyInfo(TypedDict):
66
- """Provides information about a proxy connection that is used for requests."""
67
-
68
- url: str
69
- """The URL of the proxy."""
70
-
71
- hostname: str
72
- """The hostname of the proxy."""
73
-
74
- port: int
75
- """The proxy port."""
76
-
77
- username: NotRequired[str]
78
- """The username for the proxy."""
79
-
80
- password: str
81
- """The password for the proxy."""
82
-
83
- groups: NotRequired[list[str]]
84
- """An array of proxy groups to be used by the [Apify Proxy](https://docs.apify.com/proxy).
85
- If not provided, the proxy will select the groups automatically.
86
- """
87
-
88
- country_code: NotRequired[str]
89
- """If set and relevant proxies are available in your Apify account, all proxied requests will
90
- use IP addresses that are geolocated to the specified country. For example `GB` for IPs
91
- from Great Britain. Note that online services often have their own rules for handling
92
- geolocation and thus the country selection is a best attempt at geolocation, rather than
93
- a guaranteed hit. This parameter is optional, by default, each proxied request is assigned
94
- an IP address from a random country. The country code needs to be a two letter ISO country code.
95
- See the [full list of available country codes](https://en.wikipedia.org/wiki/ISO_3166-1_alpha-2#Officially_assigned_code_elements).
96
- This parameter is optional, by default, the proxy uses all available proxy servers from all countries.
97
- """
98
-
99
- session_id: NotRequired[str]
100
- """The identifier of the used proxy session, if used. Using the same session ID guarantees getting the same proxy URL."""
101
-
102
-
103
- class ProxyConfiguration:
104
- """Configures a connection to a proxy server with the provided options.
105
-
106
- Proxy servers are used to prevent target websites from blocking your crawlers based on IP address rate limits or blacklists.
107
- The default servers used by this class are managed by [Apify Proxy](https://docs.apify.com/proxy).
108
- To be able to use Apify Proxy, you need an Apify account and access to the selected proxies. If you provide no configuration option,
109
- the proxies will be managed automatically using a smart algorithm.
110
-
111
- If you want to use your own proxies, use the `proxy_urls` or `new_url_function` constructor options.
112
- Your list of proxy URLs will be rotated by the configuration, if this option is provided.
113
- """
114
-
115
- is_man_in_the_middle = False
116
-
117
- _next_custom_url_index = 0
118
- _proxy_urls: list[str]
119
- _used_proxy_urls: dict[str, str]
120
- _new_url_function: Callable[[str | None], str] | Callable[[str | None], Awaitable[str]] | None = None
121
- _groups: list[str]
122
- _country_code: str | None = None
123
- _password: str | None = None
124
- _hostname: str
125
- _port: int
126
- _uses_apify_proxy: bool | None = None
127
- _actor_config: Configuration
128
- _apify_client: ApifyClientAsync | None = None
129
-
130
- @ignore_docs
131
- def __init__(
132
- self: ProxyConfiguration,
133
- *,
134
- password: str | None = None,
135
- groups: list[str] | None = None,
136
- country_code: str | None = None,
137
- proxy_urls: list[str] | None = None,
138
- new_url_function: Callable[[str | None], str] | Callable[[str | None], Awaitable[str]] | None = None,
139
- _actor_config: Configuration | None = None,
140
- _apify_client: ApifyClientAsync | None = None,
141
- ) -> None:
142
- """Create a ProxyConfiguration instance. It is highly recommended to use `Actor.create_proxy_configuration()` instead of this.
143
-
144
- Args:
145
- password (str, optional): Password for the Apify Proxy. If not provided, will use os.environ['APIFY_PROXY_PASSWORD'], if available.
146
- groups (list of str, optional): Proxy groups which the Apify Proxy should use, if provided.
147
- country_code (str, optional): Country which the Apify Proxy should use, if provided.
148
- proxy_urls (list of str, optional): Custom proxy server URLs which should be rotated through.
149
- new_url_function (Callable, optional): Function which returns a custom proxy URL to be used.
150
- """
151
- if groups:
152
- groups = [str(group) for group in groups]
153
- for group in groups:
154
- _check(group, label='groups', pattern=APIFY_PROXY_VALUE_REGEX)
155
- if country_code:
156
- country_code = str(country_code)
157
- _check(country_code, label='country_code', pattern=COUNTRY_CODE_REGEX)
158
- if proxy_urls:
159
- for i, url in enumerate(proxy_urls):
160
- if not is_url(url):
161
- raise ValueError(f'proxy_urls[{i}] ("{url}") is not a valid URL')
162
-
163
- # Validation
164
- if proxy_urls and new_url_function:
165
- raise ValueError('Cannot combine custom proxies in "proxy_urls" with custom generating function in "new_url_function".')
166
-
167
- if (proxy_urls or new_url_function) and (groups or country_code):
168
- raise ValueError(
169
- 'Cannot combine custom proxies with Apify Proxy!'
170
- ' It is not allowed to set "proxy_urls" or "new_url_function" combined with'
171
- ' "groups" or "country_code".'
172
- )
173
-
174
- # mypy has a bug with narrowing types for filter (https://github.com/python/mypy/issues/12682)
175
- if proxy_urls and next(filter(lambda url: 'apify.com' in url, proxy_urls), None): # type: ignore
176
- logger.warning(
177
- 'Some Apify proxy features may work incorrectly. Please consider setting up Apify properties instead of `proxy_urls`.\n'
178
- 'See https://sdk.apify.com/docs/guides/proxy-management#apify-proxy-configuration'
179
- )
180
-
181
- self._actor_config = _actor_config or Configuration._get_default_instance()
182
- self._apify_client = _apify_client
183
-
184
- self._hostname = self._actor_config.proxy_hostname
185
- self._port = self._actor_config.proxy_port
186
- self._password = password or self._actor_config.proxy_password
187
-
188
- self._proxy_urls = list(proxy_urls) if proxy_urls else []
189
- self._used_proxy_urls = {}
190
- self._new_url_function = new_url_function
191
- self._groups = list(groups) if groups else []
192
- self._country_code = country_code
193
- self._uses_apify_proxy = not (proxy_urls or new_url_function)
194
-
195
- async def initialize(self: ProxyConfiguration) -> None:
196
- """Load the Apify Proxy password if the API token is provided and check access to Apify Proxy and provided proxy groups.
197
-
198
- Only called if Apify Proxy configuration is used.
199
- Also checks if country has access to Apify Proxy groups if the country code is provided.
200
-
201
- You should use the Actor.create_proxy_configuration function
202
- to create a pre-initialized `ProxyConfiguration` instance instead of calling this manually.
203
- """
204
- if self._uses_apify_proxy:
205
- await self._maybe_fetch_password()
206
- await self._check_access()
207
-
208
- async def new_url(self: ProxyConfiguration, session_id: int | str | None = None) -> str:
209
- """Return a new proxy URL based on provided configuration options and the `sessionId` parameter.
210
-
211
- Args:
212
- session_id (int or str, optional): Represents the identifier of a proxy session (https://docs.apify.com/proxy#sessions).
213
- All the HTTP requests going through the proxy with the same session identifier
214
- will use the same target proxy server (i.e. the same IP address).
215
- The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
216
-
217
- Returns:
218
- str: A string with a proxy URL, including authentication credentials and port number.
219
- For example, `http://bob:password123@proxy.example.com:8000`
220
- """
221
- if session_id is not None:
222
- session_id = f'{session_id}'
223
- _check(session_id, label='session_id', max_length=SESSION_ID_MAX_LENGTH, pattern=APIFY_PROXY_VALUE_REGEX)
224
-
225
- if self._new_url_function:
226
- try:
227
- res = self._new_url_function(session_id)
228
- if inspect.isawaitable(res):
229
- res = await res
230
- return str(res)
231
- except Exception as exc:
232
- raise ValueError('The provided "new_url_function" did not return a valid URL') from exc
233
-
234
- if self._proxy_urls:
235
- if not session_id:
236
- index = self._next_custom_url_index
237
- self._next_custom_url_index = (self._next_custom_url_index + 1) % len(self._proxy_urls)
238
- return self._proxy_urls[index]
239
-
240
- if session_id not in self._used_proxy_urls:
241
- index = self._next_custom_url_index
242
- self._next_custom_url_index = (self._next_custom_url_index + 1) % len(self._proxy_urls)
243
- self._used_proxy_urls[session_id] = self._proxy_urls[index]
244
-
245
- return self._used_proxy_urls[session_id]
246
-
247
- username = self._get_username(session_id)
248
-
249
- return f'http://{username}:{self._password}@{self._hostname}:{self._port}'
250
-
251
- async def new_proxy_info(self: ProxyConfiguration, session_id: int | str | None = None) -> ProxyInfo:
252
- """Create a new ProxyInfo object.
253
-
254
- Use it if you want to work with a rich representation of a proxy URL.
255
- If you need the URL string only, use `ProxyConfiguration.new_url`.
256
-
257
- Args:
258
- session_id (int or str, optional): Represents the identifier of a proxy session (https://docs.apify.com/proxy#sessions).
259
- All the HTTP requests going through the proxy with the same session identifier
260
- will use the same target proxy server (i.e. the same IP address).
261
- The identifier must not be longer than 50 characters and include only the following: `0-9`, `a-z`, `A-Z`, `"."`, `"_"` and `"~"`.
262
-
263
- Returns:
264
- ProxyInfo: Dictionary that represents information about the proxy and its configuration.
265
- """
266
- if session_id is not None:
267
- session_id = f'{session_id}'
268
- _check(session_id, label='session_id', max_length=SESSION_ID_MAX_LENGTH, pattern=APIFY_PROXY_VALUE_REGEX)
269
-
270
- url = await self.new_url(session_id)
271
- res: ProxyInfo
272
- if self._uses_apify_proxy:
273
- res = {
274
- 'url': url,
275
- 'hostname': self._hostname,
276
- 'port': self._port,
277
- 'username': self._get_username(session_id),
278
- 'password': self._password or '',
279
- 'groups': self._groups,
280
- }
281
- if self._country_code:
282
- res['country_code'] = self._country_code
283
- if session_id is not None:
284
- res['session_id'] = session_id
285
- return res
286
-
287
- parsed_url = urlparse(url)
288
- assert parsed_url.hostname is not None # noqa: S101
289
- assert parsed_url.port is not None # noqa: S101
290
- res = {
291
- 'url': url,
292
- 'hostname': parsed_url.hostname,
293
- 'port': parsed_url.port,
294
- 'password': parsed_url.password or '',
295
- }
296
- if parsed_url.username:
297
- res['username'] = parsed_url.username
298
- return res
299
-
300
- async def _maybe_fetch_password(self: ProxyConfiguration) -> None:
301
- token = self._actor_config.token
302
-
303
- if token and self._apify_client:
304
- user_info = await self._apify_client.user().get()
305
- if user_info:
306
- password = user_info['proxy']['password']
307
-
308
- if self._password:
309
- if self._password != password:
310
- logger.warning(
311
- 'The Apify Proxy password you provided belongs to'
312
- ' a different user than the Apify token you are using. Are you sure this is correct?'
313
- )
314
- else:
315
- self._password = password
316
-
317
- if not self._password:
318
- raise ValueError(
319
- 'Apify Proxy password must be provided using the "password" constructor argument'
320
- f' or the "{ApifyEnvVars.PROXY_PASSWORD}" environment variable.'
321
- f' If you add the "{ApifyEnvVars.TOKEN}" environment variable, the password will be automatically inferred.'
322
- )
323
-
324
- async def _check_access(self: ProxyConfiguration) -> None:
325
- proxy_status_url = f'{self._actor_config.proxy_status_url}/?format=json'
326
-
327
- status = None
328
- async with httpx.AsyncClient(proxies=await self.new_url()) as client:
329
- for _ in range(2):
330
- try:
331
- response = await client.get(proxy_status_url)
332
- status = response.json()
333
- break
334
- except Exception: # noqa: S110
335
- # retry on connection errors
336
- pass
337
-
338
- if status:
339
- if not status['connected']:
340
- raise ConnectionError(status['connectionError'])
341
-
342
- self.is_man_in_the_middle = status['isManInTheMiddle']
343
- else:
344
- logger.warning(
345
- 'Apify Proxy access check timed out. Watch out for errors with status code 407. '
346
- "If you see some, it most likely means you don't have access to either all or some of the proxies you're trying to use."
347
- )
348
-
349
- def _get_username(self: ProxyConfiguration, session_id: int | str | None = None) -> str:
350
- if session_id is not None:
351
- session_id = f'{session_id}'
352
-
353
- parts: list[str] = []
354
-
355
- if self._groups:
356
- parts.append(f'groups-{"+".join(self._groups)}')
357
- if session_id is not None:
358
- parts.append(f'session-{session_id}')
359
- if self._country_code:
360
- parts.append(f'country-{self._country_code}')
361
-
362
- if not parts:
363
- return 'auto'
364
-
365
- return ','.join(parts)
@@ -1,181 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- from abc import ABC, abstractmethod
5
- from typing import TYPE_CHECKING, Generic, TypeVar, cast
6
-
7
- from apify_shared.utils import ignore_docs
8
-
9
- from apify._memory_storage import MemoryStorageClient
10
- from apify._memory_storage.resource_clients import BaseResourceClient, BaseResourceCollectionClient
11
- from apify.config import Configuration
12
- from apify.storages.storage_client_manager import StorageClientManager
13
-
14
- if TYPE_CHECKING:
15
- from apify_client import ApifyClientAsync
16
-
17
- BaseResourceClientType = TypeVar('BaseResourceClientType', bound=BaseResourceClient)
18
- BaseResourceCollectionClientType = TypeVar('BaseResourceCollectionClientType', bound=BaseResourceCollectionClient)
19
-
20
-
21
- @ignore_docs
22
- class BaseStorage(ABC, Generic[BaseResourceClientType, BaseResourceCollectionClientType]):
23
- """A class for managing storages."""
24
-
25
- _id: str
26
- _name: str | None
27
- _storage_client: ApifyClientAsync | MemoryStorageClient
28
- _config: Configuration
29
-
30
- _cache_by_id: dict | None = None
31
- _cache_by_name: dict | None = None
32
- _storage_creating_lock: asyncio.Lock | None = None
33
-
34
- def __init__(
35
- self: BaseStorage,
36
- id: str, # noqa: A002
37
- name: str | None,
38
- client: ApifyClientAsync | MemoryStorageClient,
39
- config: Configuration,
40
- ) -> None:
41
- """Initialize the storage.
42
-
43
- Do not use this method directly, but use `Actor.open_<STORAGE>()` instead.
44
-
45
- Args:
46
- id (str): The storage id
47
- name (str, optional): The storage name
48
- client (ApifyClientAsync or MemoryStorageClient): The storage client
49
- config (Configuration): The configuration
50
- """
51
- self._id = id
52
- self._name = name
53
- self._storage_client = client
54
- self._config = config
55
-
56
- @classmethod
57
- @abstractmethod
58
- def _get_human_friendly_label(cls: type[BaseStorage]) -> str:
59
- raise NotImplementedError('You must override this method in the subclass!')
60
-
61
- @classmethod
62
- @abstractmethod
63
- def _get_default_id(cls: type[BaseStorage], config: Configuration) -> str:
64
- raise NotImplementedError('You must override this method in the subclass!')
65
-
66
- @classmethod
67
- @abstractmethod
68
- def _get_single_storage_client(
69
- cls: type[BaseStorage],
70
- id: str, # noqa: A002
71
- client: ApifyClientAsync | MemoryStorageClient,
72
- ) -> BaseResourceClientType:
73
- raise NotImplementedError('You must override this method in the subclass!')
74
-
75
- @classmethod
76
- @abstractmethod
77
- def _get_storage_collection_client(
78
- cls: type[BaseStorage],
79
- client: ApifyClientAsync | MemoryStorageClient,
80
- ) -> BaseResourceCollectionClientType:
81
- raise NotImplementedError('You must override this method in the subclass!')
82
-
83
- @classmethod
84
- def _ensure_class_initialized(cls: type[BaseStorage]) -> None:
85
- if cls._cache_by_id is None:
86
- cls._cache_by_id = {}
87
- if cls._cache_by_name is None:
88
- cls._cache_by_name = {}
89
- if cls._storage_creating_lock is None:
90
- cls._storage_creating_lock = asyncio.Lock()
91
-
92
- @classmethod
93
- @abstractmethod
94
- async def open(
95
- cls: type[BaseStorage],
96
- *,
97
- id: str | None = None, # noqa: A002
98
- name: str | None = None,
99
- force_cloud: bool = False,
100
- config: Configuration | None = None,
101
- ) -> BaseStorage:
102
- """Open a storage, or return a cached storage object if it was opened before.
103
-
104
- Opens a storage with the given ID or name.
105
- Returns the cached storage object if the storage was opened before.
106
-
107
- Args:
108
- id (str, optional): ID of the storage to be opened.
109
- If neither `id` nor `name` are provided, the method returns the default storage associated with the actor run.
110
- If the storage with the given ID does not exist, it raises an error.
111
- name (str, optional): Name of the storage to be opened.
112
- If neither `id` nor `name` are provided, the method returns the default storage associated with the actor run.
113
- If the storage with the given name does not exist, it is created.
114
- force_cloud (bool, optional): If set to True, it will open a storage on the Apify Platform even when running the actor locally.
115
- Defaults to False.
116
- config (Configuration, optional): A `Configuration` instance, uses global configuration if omitted.
117
-
118
- Returns:
119
- An instance of the storage.
120
- """
121
- cls._ensure_class_initialized()
122
- assert cls._cache_by_id is not None # noqa: S101
123
- assert cls._cache_by_name is not None # noqa: S101
124
- assert not (id and name) # noqa: S101
125
-
126
- used_config = config or Configuration.get_global_configuration()
127
- used_client = StorageClientManager.get_storage_client(force_cloud=force_cloud)
128
-
129
- is_default_storage_on_local = False
130
- # Fetch default ID if no ID or name was passed
131
- if not id and not name:
132
- if isinstance(used_client, MemoryStorageClient):
133
- is_default_storage_on_local = True
134
- id = cls._get_default_id(used_config) # noqa: A001
135
-
136
- # Try to get the storage instance from cache
137
- cached_storage = None
138
- if id:
139
- cached_storage = cls._cache_by_id.get(id)
140
- elif name:
141
- cached_storage = cls._cache_by_name.get(name)
142
-
143
- if cached_storage is not None:
144
- # This cast is needed since MyPy doesn't understand very well that Self and Storage are the same
145
- return cast(BaseStorage, cached_storage)
146
-
147
- # Purge default storages if configured
148
- if used_config.purge_on_start and isinstance(used_client, MemoryStorageClient):
149
- await used_client._purge_on_start()
150
-
151
- assert cls._storage_creating_lock is not None # noqa: S101
152
- async with cls._storage_creating_lock:
153
- # Create the storage
154
- if id and not is_default_storage_on_local:
155
- single_storage_client = cls._get_single_storage_client(id, used_client)
156
- storage_info = await single_storage_client.get()
157
- if not storage_info:
158
- storage_label = cls._get_human_friendly_label()
159
- raise RuntimeError(f'{storage_label} with id "{id}" does not exist!')
160
- elif is_default_storage_on_local:
161
- storage_collection_client = cls._get_storage_collection_client(used_client)
162
- storage_info = await storage_collection_client.get_or_create(name=name, _id=id)
163
- else:
164
- storage_collection_client = cls._get_storage_collection_client(used_client)
165
- storage_info = await storage_collection_client.get_or_create(name=name)
166
-
167
- storage = cls(storage_info['id'], storage_info.get('name'), used_client, used_config)
168
-
169
- # Cache by id and name
170
- cls._cache_by_id[storage._id] = storage
171
- if storage._name is not None:
172
- cls._cache_by_name[storage._name] = storage
173
-
174
- return storage
175
-
176
- def _remove_from_cache(self: BaseStorage) -> None:
177
- if self.__class__._cache_by_id is not None:
178
- del self.__class__._cache_by_id[self._id]
179
-
180
- if self._name and self.__class__._cache_by_name is not None:
181
- del self.__class__._cache_by_name[self._name]