apify 2.7.1b6__py3-none-any.whl → 2.7.1b8__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +6 -7
- apify/_configuration.py +42 -0
- apify/_proxy_configuration.py +8 -5
- apify/_utils.py +9 -1
- apify/events/__init__.py +5 -0
- apify/events/_apify_event_manager.py +140 -0
- apify/events/_types.py +102 -0
- apify/log.py +0 -7
- apify/request_loaders/__init__.py +18 -0
- apify/{storages/_request_list.py → request_loaders/_apify_request_list.py} +22 -15
- apify/request_loaders/py.typed +0 -0
- apify/scrapy/_logging_config.py +1 -4
- apify/scrapy/extensions/_httpcache.py +9 -5
- apify/scrapy/requests.py +3 -3
- apify/scrapy/scheduler.py +8 -5
- apify/storage_clients/__init__.py +10 -0
- apify/storage_clients/_apify/__init__.py +11 -0
- apify/storage_clients/_apify/_dataset_client.py +304 -0
- apify/storage_clients/_apify/_key_value_store_client.py +241 -0
- apify/storage_clients/_apify/_models.py +107 -0
- apify/storage_clients/_apify/_request_queue_client.py +787 -0
- apify/storage_clients/_apify/_storage_client.py +80 -0
- apify/storage_clients/_apify/py.typed +0 -0
- apify/storage_clients/_file_system/__init__.py +2 -0
- apify/storage_clients/_file_system/_key_value_store_client.py +36 -0
- apify/storage_clients/_file_system/_storage_client.py +35 -0
- apify/storage_clients/py.typed +0 -0
- apify/storages/__init__.py +1 -3
- {apify-2.7.1b6.dist-info → apify-2.7.1b8.dist-info}/METADATA +7 -5
- apify-2.7.1b8.dist-info/RECORD +52 -0
- apify/_platform_event_manager.py +0 -215
- apify/apify_storage_client/__init__.py +0 -3
- apify/apify_storage_client/_apify_storage_client.py +0 -72
- apify/apify_storage_client/_dataset_client.py +0 -190
- apify/apify_storage_client/_dataset_collection_client.py +0 -51
- apify/apify_storage_client/_key_value_store_client.py +0 -109
- apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
- apify/apify_storage_client/_request_queue_client.py +0 -176
- apify/apify_storage_client/_request_queue_collection_client.py +0 -51
- apify-2.7.1b6.dist-info/RECORD +0 -44
- /apify/{apify_storage_client → events}/py.typed +0 -0
- {apify-2.7.1b6.dist-info → apify-2.7.1b8.dist-info}/WHEEL +0 -0
- {apify-2.7.1b6.dist-info → apify-2.7.1b8.dist-info}/licenses/LICENSE +0 -0
|
@@ -0,0 +1,80 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from crawlee.storage_clients._base import StorageClient
|
|
8
|
+
|
|
9
|
+
from ._dataset_client import ApifyDatasetClient
|
|
10
|
+
from ._key_value_store_client import ApifyKeyValueStoreClient
|
|
11
|
+
from ._request_queue_client import ApifyRequestQueueClient
|
|
12
|
+
from apify._utils import docs_group
|
|
13
|
+
|
|
14
|
+
if TYPE_CHECKING:
|
|
15
|
+
from crawlee.configuration import Configuration
|
|
16
|
+
|
|
17
|
+
|
|
18
|
+
@docs_group('Storage clients')
|
|
19
|
+
class ApifyStorageClient(StorageClient):
|
|
20
|
+
"""Apify storage client."""
|
|
21
|
+
|
|
22
|
+
@override
|
|
23
|
+
async def create_dataset_client(
|
|
24
|
+
self,
|
|
25
|
+
*,
|
|
26
|
+
id: str | None = None,
|
|
27
|
+
name: str | None = None,
|
|
28
|
+
configuration: Configuration | None = None,
|
|
29
|
+
) -> ApifyDatasetClient:
|
|
30
|
+
# Import here to avoid circular imports.
|
|
31
|
+
from apify import Configuration as ApifyConfiguration # noqa: PLC0415
|
|
32
|
+
|
|
33
|
+
configuration = configuration or ApifyConfiguration.get_global_configuration()
|
|
34
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
35
|
+
return await ApifyDatasetClient.open(id=id, name=name, configuration=configuration)
|
|
36
|
+
|
|
37
|
+
raise TypeError(
|
|
38
|
+
f'Expected "configuration" to be an instance of "apify.Configuration", '
|
|
39
|
+
f'but got {type(configuration).__name__} instead.'
|
|
40
|
+
)
|
|
41
|
+
|
|
42
|
+
@override
|
|
43
|
+
async def create_kvs_client(
|
|
44
|
+
self,
|
|
45
|
+
*,
|
|
46
|
+
id: str | None = None,
|
|
47
|
+
name: str | None = None,
|
|
48
|
+
configuration: Configuration | None = None,
|
|
49
|
+
) -> ApifyKeyValueStoreClient:
|
|
50
|
+
# Import here to avoid circular imports.
|
|
51
|
+
from apify import Configuration as ApifyConfiguration # noqa: PLC0415
|
|
52
|
+
|
|
53
|
+
configuration = configuration or ApifyConfiguration.get_global_configuration()
|
|
54
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
55
|
+
return await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
|
|
56
|
+
|
|
57
|
+
raise TypeError(
|
|
58
|
+
f'Expected "configuration" to be an instance of "apify.Configuration", '
|
|
59
|
+
f'but got {type(configuration).__name__} instead.'
|
|
60
|
+
)
|
|
61
|
+
|
|
62
|
+
@override
|
|
63
|
+
async def create_rq_client(
|
|
64
|
+
self,
|
|
65
|
+
*,
|
|
66
|
+
id: str | None = None,
|
|
67
|
+
name: str | None = None,
|
|
68
|
+
configuration: Configuration | None = None,
|
|
69
|
+
) -> ApifyRequestQueueClient:
|
|
70
|
+
# Import here to avoid circular imports.
|
|
71
|
+
from apify import Configuration as ApifyConfiguration # noqa: PLC0415
|
|
72
|
+
|
|
73
|
+
configuration = configuration or ApifyConfiguration.get_global_configuration()
|
|
74
|
+
if isinstance(configuration, ApifyConfiguration):
|
|
75
|
+
return await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration)
|
|
76
|
+
|
|
77
|
+
raise TypeError(
|
|
78
|
+
f'Expected "configuration" to be an instance of "apify.Configuration", '
|
|
79
|
+
f'but got {type(configuration).__name__} instead.'
|
|
80
|
+
)
|
|
File without changes
|
|
@@ -0,0 +1,36 @@
|
|
|
1
|
+
import asyncio
|
|
2
|
+
|
|
3
|
+
from typing_extensions import override
|
|
4
|
+
|
|
5
|
+
from crawlee._consts import METADATA_FILENAME
|
|
6
|
+
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
|
|
7
|
+
|
|
8
|
+
from apify._configuration import Configuration
|
|
9
|
+
|
|
10
|
+
|
|
11
|
+
class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
|
|
12
|
+
"""Apify-specific implementation of the `FileSystemKeyValueStoreClient`.
|
|
13
|
+
|
|
14
|
+
The only difference is that it overrides the `purge` method to delete all files in the key-value store
|
|
15
|
+
directory, except for the metadata file and the `INPUT.json` file.
|
|
16
|
+
"""
|
|
17
|
+
|
|
18
|
+
@override
|
|
19
|
+
async def purge(self) -> None:
|
|
20
|
+
"""Purges the key-value store by deleting all its contents.
|
|
21
|
+
|
|
22
|
+
It deletes all files in the key-value store directory, except for the metadata file and
|
|
23
|
+
the `INPUT.json` file. It also updates the metadata to reflect that the store has been purged.
|
|
24
|
+
"""
|
|
25
|
+
kvs_input_key = Configuration.get_global_configuration().input_key
|
|
26
|
+
async with self._lock:
|
|
27
|
+
for file_path in self.path_to_kvs.glob('*'):
|
|
28
|
+
if file_path.name in {METADATA_FILENAME, f'{kvs_input_key}.json'}:
|
|
29
|
+
continue
|
|
30
|
+
if file_path.is_file():
|
|
31
|
+
await asyncio.to_thread(file_path.unlink, missing_ok=True)
|
|
32
|
+
|
|
33
|
+
await self._update_metadata(
|
|
34
|
+
update_accessed_at=True,
|
|
35
|
+
update_modified_at=True,
|
|
36
|
+
)
|
|
@@ -0,0 +1,35 @@
|
|
|
1
|
+
from __future__ import annotations
|
|
2
|
+
|
|
3
|
+
from typing import TYPE_CHECKING
|
|
4
|
+
|
|
5
|
+
from typing_extensions import override
|
|
6
|
+
|
|
7
|
+
from crawlee.configuration import Configuration
|
|
8
|
+
from crawlee.storage_clients import FileSystemStorageClient
|
|
9
|
+
|
|
10
|
+
from ._key_value_store_client import ApifyFileSystemKeyValueStoreClient
|
|
11
|
+
|
|
12
|
+
if TYPE_CHECKING:
|
|
13
|
+
from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
|
|
14
|
+
|
|
15
|
+
|
|
16
|
+
class ApifyFileSystemStorageClient(FileSystemStorageClient):
|
|
17
|
+
"""Apify-specific implementation of the file system storage client.
|
|
18
|
+
|
|
19
|
+
The only difference is that it uses `ApifyFileSystemKeyValueStoreClient` for key-value stores,
|
|
20
|
+
which overrides the `purge` method to delete all files in the key-value store directory
|
|
21
|
+
except for the metadata file and the `INPUT.json` file.
|
|
22
|
+
"""
|
|
23
|
+
|
|
24
|
+
@override
|
|
25
|
+
async def create_kvs_client(
|
|
26
|
+
self,
|
|
27
|
+
*,
|
|
28
|
+
id: str | None = None,
|
|
29
|
+
name: str | None = None,
|
|
30
|
+
configuration: Configuration | None = None,
|
|
31
|
+
) -> FileSystemKeyValueStoreClient:
|
|
32
|
+
configuration = configuration or Configuration.get_global_configuration()
|
|
33
|
+
client = await ApifyFileSystemKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
|
|
34
|
+
await self._purge_if_needed(client, configuration)
|
|
35
|
+
return client
|
|
File without changes
|
apify/storages/__init__.py
CHANGED
|
@@ -1,6 +1,6 @@
|
|
|
1
1
|
Metadata-Version: 2.4
|
|
2
2
|
Name: apify
|
|
3
|
-
Version: 2.7.
|
|
3
|
+
Version: 2.7.1b8
|
|
4
4
|
Summary: Apify SDK for Python
|
|
5
5
|
Project-URL: Apify Homepage, https://apify.com
|
|
6
6
|
Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
|
|
@@ -225,15 +225,17 @@ Classifier: Programming Language :: Python :: 3.12
|
|
|
225
225
|
Classifier: Programming Language :: Python :: 3.13
|
|
226
226
|
Classifier: Topic :: Software Development :: Libraries
|
|
227
227
|
Requires-Python: >=3.10
|
|
228
|
-
Requires-Dist: apify-client<2.0.0
|
|
229
|
-
Requires-Dist: apify-shared<2.0.0
|
|
230
|
-
Requires-Dist:
|
|
228
|
+
Requires-Dist: apify-client<3.0.0,>=2.0.0
|
|
229
|
+
Requires-Dist: apify-shared<3.0.0,>=2.0.0
|
|
230
|
+
Requires-Dist: cachetools>=5.5.0
|
|
231
|
+
Requires-Dist: crawlee==1.0.0rc1
|
|
231
232
|
Requires-Dist: cryptography>=42.0.0
|
|
232
|
-
Requires-Dist:
|
|
233
|
+
Requires-Dist: impit>=0.5.3
|
|
233
234
|
Requires-Dist: lazy-object-proxy<1.11.0
|
|
234
235
|
Requires-Dist: more-itertools>=10.2.0
|
|
235
236
|
Requires-Dist: typing-extensions>=4.1.0
|
|
236
237
|
Requires-Dist: websockets>=14.0
|
|
238
|
+
Requires-Dist: yarl>=1.18.0
|
|
237
239
|
Provides-Extra: scrapy
|
|
238
240
|
Requires-Dist: scrapy>=2.11.0; extra == 'scrapy'
|
|
239
241
|
Description-Content-Type: text/markdown
|
|
@@ -0,0 +1,52 @@
|
|
|
1
|
+
apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
|
|
2
|
+
apify/_actor.py,sha256=kOEvs_3TxjZ1PlwLEnIEL5nAD12oKs0ULldYyI-9irw,52985
|
|
3
|
+
apify/_charging.py,sha256=mJ-BueULWZxqvbdM_WGbsb-V3vTJ8Gw38k81eGwJhVY,12481
|
|
4
|
+
apify/_configuration.py,sha256=Ta-qPlKenLaI0IOlODg_A4ZwmPtnuS7OWLSJyexcqmA,13283
|
|
5
|
+
apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
|
|
6
|
+
apify/_crypto.py,sha256=tqUs13QkemDtGzvU41pIA2HUEawpDlgzqbwKjm4I8kM,6852
|
|
7
|
+
apify/_models.py,sha256=EzU-inWeJ7T5HNVYEwnYb79W-q4OAPhtrYctfRYzpTE,7848
|
|
8
|
+
apify/_proxy_configuration.py,sha256=K9V4vG9-bAx7_a4l0zHhfbzvzopJeHek-qUJ05aQ6wI,13066
|
|
9
|
+
apify/_utils.py,sha256=og_zzRXELQmirklJWp1kSV7pwQPFfAE81UO3IZ4xBNs,2414
|
|
10
|
+
apify/log.py,sha256=Ry251sK4qPRGiFGe3DKcOZazg5OzC6RXS546Zzk8H2M,1003
|
|
11
|
+
apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
12
|
+
apify/events/__init__.py,sha256=daZzFNJlJFnm0HZZQVeT26pvRWufjHzKZYERANgfX4o,182
|
|
13
|
+
apify/events/_apify_event_manager.py,sha256=yArFrKa4wWDZo32iwaA3F_w36VSJf1Yaj_L1opo8ncU,5917
|
|
14
|
+
apify/events/_types.py,sha256=F0BHgACqnRfmdQ9GUcpnZvPxzw2bdRr8BqbGSA4cHeQ,3050
|
|
15
|
+
apify/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
16
|
+
apify/request_loaders/__init__.py,sha256=SJqW0FbdZKEtAMB5kBLgqROzh3KmQc2CNEIhJpTGdPQ,356
|
|
17
|
+
apify/request_loaders/_apify_request_list.py,sha256=kurCxX2jAKzHJ5N1Co6KjIgptqgVmjR0WpT8bd6uK9A,6220
|
|
18
|
+
apify/request_loaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
19
|
+
apify/scrapy/__init__.py,sha256=m2a0ts_JY9xJkBy4JU5mV8PJqjA3GGKLXBFu4nl-n-A,1048
|
|
20
|
+
apify/scrapy/_actor_runner.py,sha256=rXWSnlQWGskDUH8PtLCv5SkOIx4AiVa4QbCYeCett5c,938
|
|
21
|
+
apify/scrapy/_async_thread.py,sha256=8xif_fWce7vaMLuDc-XuDzZlHbCI-NY61YXdP2P27QY,4753
|
|
22
|
+
apify/scrapy/_logging_config.py,sha256=KZttIiMXcfu_isYcvXQnWdoYFdYI2wK5AAdbhd57cp4,1819
|
|
23
|
+
apify/scrapy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
24
|
+
apify/scrapy/requests.py,sha256=JNVsITB-4xtT8LnkQc8z_G4OcEE9CmkMUvLGqbva5ZI,6460
|
|
25
|
+
apify/scrapy/scheduler.py,sha256=hvTJ9AxLoifqZi7C8MtLcCP0ujGk4D4dgq4qbCD_zco,6098
|
|
26
|
+
apify/scrapy/utils.py,sha256=Ssfa-P9-g9XYP1suDce6dQ8ta7PfijiPoMl2iplE6Ow,2126
|
|
27
|
+
apify/scrapy/extensions/__init__.py,sha256=cVQ8CCtOsJsRP28YKZWSUsi4FBwxI-yPJRNSXPFSa_o,98
|
|
28
|
+
apify/scrapy/extensions/_httpcache.py,sha256=XIS9vFCcUtdSfeKAKnxRnI9dX_GMmX2Od8OPnOaDhQ0,8870
|
|
29
|
+
apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
|
|
30
|
+
apify/scrapy/middlewares/apify_proxy.py,sha256=CDAOXS3bcVDZHM3B0GvhXbxEikMIadLF_0P73WL_nI4,5550
|
|
31
|
+
apify/scrapy/middlewares/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
32
|
+
apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1ccgWlZI,119
|
|
33
|
+
apify/scrapy/pipelines/actor_dataset_push.py,sha256=XUUyznQTD-E3wYUUFt2WAOnWhbnRrY0WuedlfYfYhDI,846
|
|
34
|
+
apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
35
|
+
apify/storage_clients/__init__.py,sha256=9WLAKs2GnnP0yyKR0mc3AfJ1IqXF48V3KPMp6KaB8kU,277
|
|
36
|
+
apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
37
|
+
apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
|
|
38
|
+
apify/storage_clients/_apify/_dataset_client.py,sha256=8ZQvbtXZm54-V0Ukio0Z4jVI2gGkfqzZ59GlBQJXGUU,11485
|
|
39
|
+
apify/storage_clients/_apify/_key_value_store_client.py,sha256=WbyzDCFmJS2hd_7ddYL3JEO9zvjUAAE1D_F4kohiim4,9455
|
|
40
|
+
apify/storage_clients/_apify/_models.py,sha256=C6FpXswtO6kXE5RUumazm_conzJJS6PrXAGF9XBuDb8,3651
|
|
41
|
+
apify/storage_clients/_apify/_request_queue_client.py,sha256=sMinB02V9236PH7fQTQ0AIwH6oObnZv2ivzKBEgaLOk,31372
|
|
42
|
+
apify/storage_clients/_apify/_storage_client.py,sha256=5me6gHOeNAG3JaHxKRdzsZaa3FsqLDbObjhECGGWrr4,2890
|
|
43
|
+
apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
44
|
+
apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
|
|
45
|
+
apify/storage_clients/_file_system/_key_value_store_client.py,sha256=DHDv_e0kFwhBCIXUadZAamHhUao3NsSniUEXgnttnVY,1430
|
|
46
|
+
apify/storage_clients/_file_system/_storage_client.py,sha256=UwxuSvhbyQ7zR1db1hTmZ1h38yH7btHNp82X7e8MWWE,1290
|
|
47
|
+
apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
|
|
48
|
+
apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
|
|
49
|
+
apify-2.7.1b8.dist-info/METADATA,sha256=9OY09tUOf00U-K6_kvD63Vs8DKsDEGJs4SPTaPhwzq0,21800
|
|
50
|
+
apify-2.7.1b8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
|
|
51
|
+
apify-2.7.1b8.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
|
|
52
|
+
apify-2.7.1b8.dist-info/RECORD,,
|
apify/_platform_event_manager.py
DELETED
|
@@ -1,215 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
from datetime import datetime
|
|
5
|
-
from typing import TYPE_CHECKING, Annotated, Any, Literal
|
|
6
|
-
|
|
7
|
-
import websockets.asyncio.client
|
|
8
|
-
from pydantic import BaseModel, Discriminator, Field, TypeAdapter
|
|
9
|
-
from typing_extensions import Self, Unpack, override
|
|
10
|
-
|
|
11
|
-
from crawlee.events._event_manager import EventManager, EventManagerOptions
|
|
12
|
-
from crawlee.events._local_event_manager import LocalEventManager
|
|
13
|
-
from crawlee.events._types import (
|
|
14
|
-
Event,
|
|
15
|
-
EventAbortingData,
|
|
16
|
-
EventExitData,
|
|
17
|
-
EventMigratingData,
|
|
18
|
-
EventPersistStateData,
|
|
19
|
-
EventSystemInfoData,
|
|
20
|
-
)
|
|
21
|
-
|
|
22
|
-
from apify._utils import docs_group
|
|
23
|
-
from apify.log import logger
|
|
24
|
-
|
|
25
|
-
if TYPE_CHECKING:
|
|
26
|
-
from types import TracebackType
|
|
27
|
-
|
|
28
|
-
from apify._configuration import Configuration
|
|
29
|
-
|
|
30
|
-
__all__ = ['EventManager', 'LocalEventManager', 'PlatformEventManager']
|
|
31
|
-
|
|
32
|
-
|
|
33
|
-
@docs_group('Event data')
|
|
34
|
-
class SystemInfoEventData(BaseModel):
|
|
35
|
-
mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
|
|
36
|
-
mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
|
|
37
|
-
mem_max_bytes: Annotated[float, Field(alias='memMaxBytes')]
|
|
38
|
-
cpu_avg_usage: Annotated[float, Field(alias='cpuAvgUsage')]
|
|
39
|
-
cpu_max_usage: Annotated[float, Field(alias='cpuMaxUsage')]
|
|
40
|
-
cpu_current_usage: Annotated[float, Field(alias='cpuCurrentUsage')]
|
|
41
|
-
is_cpu_overloaded: Annotated[bool, Field(alias='isCpuOverloaded')]
|
|
42
|
-
created_at: Annotated[datetime, Field(alias='createdAt')]
|
|
43
|
-
|
|
44
|
-
def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
|
|
45
|
-
return EventSystemInfoData.model_validate(
|
|
46
|
-
{
|
|
47
|
-
'cpu_info': {
|
|
48
|
-
'used_ratio': (self.cpu_current_usage / 100) / dedicated_cpus,
|
|
49
|
-
'created_at': self.created_at,
|
|
50
|
-
},
|
|
51
|
-
'memory_info': {
|
|
52
|
-
'total_size': self.mem_max_bytes,
|
|
53
|
-
'current_size': self.mem_current_bytes,
|
|
54
|
-
'created_at': self.created_at,
|
|
55
|
-
},
|
|
56
|
-
}
|
|
57
|
-
)
|
|
58
|
-
|
|
59
|
-
|
|
60
|
-
@docs_group('Events')
|
|
61
|
-
class PersistStateEvent(BaseModel):
|
|
62
|
-
name: Literal[Event.PERSIST_STATE]
|
|
63
|
-
data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
|
|
64
|
-
|
|
65
|
-
|
|
66
|
-
@docs_group('Events')
|
|
67
|
-
class SystemInfoEvent(BaseModel):
|
|
68
|
-
name: Literal[Event.SYSTEM_INFO]
|
|
69
|
-
data: SystemInfoEventData
|
|
70
|
-
|
|
71
|
-
|
|
72
|
-
@docs_group('Events')
|
|
73
|
-
class MigratingEvent(BaseModel):
|
|
74
|
-
name: Literal[Event.MIGRATING]
|
|
75
|
-
data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
|
|
76
|
-
|
|
77
|
-
|
|
78
|
-
@docs_group('Events')
|
|
79
|
-
class AbortingEvent(BaseModel):
|
|
80
|
-
name: Literal[Event.ABORTING]
|
|
81
|
-
data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
|
|
82
|
-
|
|
83
|
-
|
|
84
|
-
@docs_group('Events')
|
|
85
|
-
class ExitEvent(BaseModel):
|
|
86
|
-
name: Literal[Event.EXIT]
|
|
87
|
-
data: Annotated[EventExitData, Field(default_factory=EventExitData)]
|
|
88
|
-
|
|
89
|
-
|
|
90
|
-
@docs_group('Events')
|
|
91
|
-
class EventWithoutData(BaseModel):
|
|
92
|
-
name: Literal[
|
|
93
|
-
Event.SESSION_RETIRED,
|
|
94
|
-
Event.BROWSER_LAUNCHED,
|
|
95
|
-
Event.BROWSER_RETIRED,
|
|
96
|
-
Event.BROWSER_CLOSED,
|
|
97
|
-
Event.PAGE_CREATED,
|
|
98
|
-
Event.PAGE_CLOSED,
|
|
99
|
-
]
|
|
100
|
-
data: Any = None
|
|
101
|
-
|
|
102
|
-
|
|
103
|
-
@docs_group('Events')
|
|
104
|
-
class DeprecatedEvent(BaseModel):
|
|
105
|
-
name: Literal['cpuInfo']
|
|
106
|
-
data: Annotated[dict[str, Any], Field(default_factory=dict)]
|
|
107
|
-
|
|
108
|
-
|
|
109
|
-
@docs_group('Events')
|
|
110
|
-
class UnknownEvent(BaseModel):
|
|
111
|
-
name: str
|
|
112
|
-
data: Annotated[dict[str, Any], Field(default_factory=dict)]
|
|
113
|
-
|
|
114
|
-
|
|
115
|
-
EventMessage = PersistStateEvent | SystemInfoEvent | MigratingEvent | AbortingEvent | ExitEvent | EventWithoutData
|
|
116
|
-
|
|
117
|
-
event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
|
|
118
|
-
Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
|
|
119
|
-
)
|
|
120
|
-
|
|
121
|
-
|
|
122
|
-
@docs_group('Event managers')
|
|
123
|
-
class PlatformEventManager(EventManager):
|
|
124
|
-
"""A class for managing Actor events.
|
|
125
|
-
|
|
126
|
-
You shouldn't use this class directly,
|
|
127
|
-
but instead use it via the `Actor.on()` and `Actor.off()` methods.
|
|
128
|
-
"""
|
|
129
|
-
|
|
130
|
-
_platform_events_websocket: websockets.asyncio.client.ClientConnection | None = None
|
|
131
|
-
_process_platform_messages_task: asyncio.Task | None = None
|
|
132
|
-
_send_system_info_interval_task: asyncio.Task | None = None
|
|
133
|
-
_connected_to_platform_websocket: asyncio.Future = asyncio.Future()
|
|
134
|
-
|
|
135
|
-
def __init__(self, config: Configuration, **kwargs: Unpack[EventManagerOptions]) -> None:
|
|
136
|
-
"""Create an instance of the EventManager.
|
|
137
|
-
|
|
138
|
-
Args:
|
|
139
|
-
config: The Actor configuration to be used in this event manager.
|
|
140
|
-
kwargs: Event manager options - forwarded to the base class
|
|
141
|
-
"""
|
|
142
|
-
super().__init__(**kwargs)
|
|
143
|
-
|
|
144
|
-
self._config = config
|
|
145
|
-
self._listener_tasks = set()
|
|
146
|
-
self._connected_to_platform_websocket = asyncio.Future[bool]()
|
|
147
|
-
|
|
148
|
-
@override
|
|
149
|
-
async def __aenter__(self) -> Self:
|
|
150
|
-
await super().__aenter__()
|
|
151
|
-
self._connected_to_platform_websocket = asyncio.Future()
|
|
152
|
-
|
|
153
|
-
# Run tasks but don't await them
|
|
154
|
-
if self._config.actor_events_ws_url:
|
|
155
|
-
self._process_platform_messages_task = asyncio.create_task(
|
|
156
|
-
self._process_platform_messages(self._config.actor_events_ws_url)
|
|
157
|
-
)
|
|
158
|
-
is_connected = await self._connected_to_platform_websocket
|
|
159
|
-
if not is_connected:
|
|
160
|
-
raise RuntimeError('Error connecting to platform events websocket!')
|
|
161
|
-
else:
|
|
162
|
-
logger.debug('APIFY_ACTOR_EVENTS_WS_URL env var not set, no events from Apify platform will be emitted.')
|
|
163
|
-
|
|
164
|
-
return self
|
|
165
|
-
|
|
166
|
-
@override
|
|
167
|
-
async def __aexit__(
|
|
168
|
-
self,
|
|
169
|
-
exc_type: type[BaseException] | None,
|
|
170
|
-
exc_value: BaseException | None,
|
|
171
|
-
exc_traceback: TracebackType | None,
|
|
172
|
-
) -> None:
|
|
173
|
-
if self._platform_events_websocket:
|
|
174
|
-
await self._platform_events_websocket.close()
|
|
175
|
-
|
|
176
|
-
if self._process_platform_messages_task:
|
|
177
|
-
await self._process_platform_messages_task
|
|
178
|
-
|
|
179
|
-
await super().__aexit__(exc_type, exc_value, exc_traceback)
|
|
180
|
-
|
|
181
|
-
async def _process_platform_messages(self, ws_url: str) -> None:
|
|
182
|
-
try:
|
|
183
|
-
async with websockets.asyncio.client.connect(ws_url) as websocket:
|
|
184
|
-
self._platform_events_websocket = websocket
|
|
185
|
-
self._connected_to_platform_websocket.set_result(True)
|
|
186
|
-
|
|
187
|
-
async for message in websocket:
|
|
188
|
-
try:
|
|
189
|
-
parsed_message = event_data_adapter.validate_json(message)
|
|
190
|
-
|
|
191
|
-
if isinstance(parsed_message, DeprecatedEvent):
|
|
192
|
-
continue
|
|
193
|
-
|
|
194
|
-
if isinstance(parsed_message, UnknownEvent):
|
|
195
|
-
logger.info(
|
|
196
|
-
f'Unknown message received: event_name={parsed_message.name}, '
|
|
197
|
-
f'event_data={parsed_message.data}'
|
|
198
|
-
)
|
|
199
|
-
continue
|
|
200
|
-
|
|
201
|
-
self.emit(
|
|
202
|
-
event=parsed_message.name,
|
|
203
|
-
event_data=parsed_message.data
|
|
204
|
-
if not isinstance(parsed_message.data, SystemInfoEventData)
|
|
205
|
-
else parsed_message.data.to_crawlee_format(self._config.dedicated_cpus or 1),
|
|
206
|
-
)
|
|
207
|
-
|
|
208
|
-
if parsed_message.name == Event.MIGRATING:
|
|
209
|
-
await self._emit_persist_state_event_rec_task.stop()
|
|
210
|
-
self.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=True))
|
|
211
|
-
except Exception:
|
|
212
|
-
logger.exception('Cannot parse Actor event', extra={'message': message})
|
|
213
|
-
except Exception:
|
|
214
|
-
logger.exception('Error in websocket connection')
|
|
215
|
-
self._connected_to_platform_websocket.set_result(False)
|
|
@@ -1,72 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
from typing import TYPE_CHECKING
|
|
4
|
-
|
|
5
|
-
from typing_extensions import override
|
|
6
|
-
|
|
7
|
-
from apify_client import ApifyClientAsync
|
|
8
|
-
from crawlee._utils.crypto import crypto_random_object_id
|
|
9
|
-
from crawlee.storage_clients import StorageClient
|
|
10
|
-
|
|
11
|
-
from apify._utils import docs_group
|
|
12
|
-
from apify.apify_storage_client._dataset_client import DatasetClient
|
|
13
|
-
from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
|
|
14
|
-
from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient
|
|
15
|
-
from apify.apify_storage_client._key_value_store_collection_client import KeyValueStoreCollectionClient
|
|
16
|
-
from apify.apify_storage_client._request_queue_client import RequestQueueClient
|
|
17
|
-
from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
|
|
18
|
-
|
|
19
|
-
if TYPE_CHECKING:
|
|
20
|
-
from apify._configuration import Configuration
|
|
21
|
-
|
|
22
|
-
|
|
23
|
-
@docs_group('Storage clients')
|
|
24
|
-
class ApifyStorageClient(StorageClient):
|
|
25
|
-
"""A storage client implementation based on the Apify platform storage."""
|
|
26
|
-
|
|
27
|
-
def __init__(self, *, configuration: Configuration) -> None:
|
|
28
|
-
self._client_key = crypto_random_object_id()
|
|
29
|
-
self._apify_client = ApifyClientAsync(
|
|
30
|
-
token=configuration.token,
|
|
31
|
-
api_url=configuration.api_base_url,
|
|
32
|
-
max_retries=8,
|
|
33
|
-
min_delay_between_retries_millis=500,
|
|
34
|
-
timeout_secs=360,
|
|
35
|
-
)
|
|
36
|
-
self._configuration = configuration
|
|
37
|
-
|
|
38
|
-
@classmethod
|
|
39
|
-
def from_config(cls, config: Configuration) -> ApifyStorageClient:
|
|
40
|
-
return cls(configuration=config)
|
|
41
|
-
|
|
42
|
-
@override
|
|
43
|
-
def dataset(self, id: str) -> DatasetClient:
|
|
44
|
-
return DatasetClient(self._apify_client.dataset(id))
|
|
45
|
-
|
|
46
|
-
@override
|
|
47
|
-
def datasets(self) -> DatasetCollectionClient:
|
|
48
|
-
return DatasetCollectionClient(self._apify_client.datasets())
|
|
49
|
-
|
|
50
|
-
@override
|
|
51
|
-
def key_value_store(self, id: str) -> KeyValueStoreClient:
|
|
52
|
-
return KeyValueStoreClient(self._apify_client.key_value_store(id), self._configuration.api_public_base_url)
|
|
53
|
-
|
|
54
|
-
@override
|
|
55
|
-
def key_value_stores(self) -> KeyValueStoreCollectionClient:
|
|
56
|
-
return KeyValueStoreCollectionClient(self._apify_client.key_value_stores())
|
|
57
|
-
|
|
58
|
-
@override
|
|
59
|
-
def request_queue(self, id: str) -> RequestQueueClient:
|
|
60
|
-
return RequestQueueClient(self._apify_client.request_queue(id, client_key=self._client_key))
|
|
61
|
-
|
|
62
|
-
@override
|
|
63
|
-
def request_queues(self) -> RequestQueueCollectionClient:
|
|
64
|
-
return RequestQueueCollectionClient(self._apify_client.request_queues())
|
|
65
|
-
|
|
66
|
-
@override
|
|
67
|
-
async def purge_on_start(self) -> None:
|
|
68
|
-
pass
|
|
69
|
-
|
|
70
|
-
@override
|
|
71
|
-
def get_rate_limit_errors(self) -> dict[int, int]:
|
|
72
|
-
return self._apify_client.stats.rate_limit_errors
|