apify 2.7.1b6__py3-none-any.whl → 2.7.1b8__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (43) hide show
  1. apify/_actor.py +6 -7
  2. apify/_configuration.py +42 -0
  3. apify/_proxy_configuration.py +8 -5
  4. apify/_utils.py +9 -1
  5. apify/events/__init__.py +5 -0
  6. apify/events/_apify_event_manager.py +140 -0
  7. apify/events/_types.py +102 -0
  8. apify/log.py +0 -7
  9. apify/request_loaders/__init__.py +18 -0
  10. apify/{storages/_request_list.py → request_loaders/_apify_request_list.py} +22 -15
  11. apify/request_loaders/py.typed +0 -0
  12. apify/scrapy/_logging_config.py +1 -4
  13. apify/scrapy/extensions/_httpcache.py +9 -5
  14. apify/scrapy/requests.py +3 -3
  15. apify/scrapy/scheduler.py +8 -5
  16. apify/storage_clients/__init__.py +10 -0
  17. apify/storage_clients/_apify/__init__.py +11 -0
  18. apify/storage_clients/_apify/_dataset_client.py +304 -0
  19. apify/storage_clients/_apify/_key_value_store_client.py +241 -0
  20. apify/storage_clients/_apify/_models.py +107 -0
  21. apify/storage_clients/_apify/_request_queue_client.py +787 -0
  22. apify/storage_clients/_apify/_storage_client.py +80 -0
  23. apify/storage_clients/_apify/py.typed +0 -0
  24. apify/storage_clients/_file_system/__init__.py +2 -0
  25. apify/storage_clients/_file_system/_key_value_store_client.py +36 -0
  26. apify/storage_clients/_file_system/_storage_client.py +35 -0
  27. apify/storage_clients/py.typed +0 -0
  28. apify/storages/__init__.py +1 -3
  29. {apify-2.7.1b6.dist-info → apify-2.7.1b8.dist-info}/METADATA +7 -5
  30. apify-2.7.1b8.dist-info/RECORD +52 -0
  31. apify/_platform_event_manager.py +0 -215
  32. apify/apify_storage_client/__init__.py +0 -3
  33. apify/apify_storage_client/_apify_storage_client.py +0 -72
  34. apify/apify_storage_client/_dataset_client.py +0 -190
  35. apify/apify_storage_client/_dataset_collection_client.py +0 -51
  36. apify/apify_storage_client/_key_value_store_client.py +0 -109
  37. apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
  38. apify/apify_storage_client/_request_queue_client.py +0 -176
  39. apify/apify_storage_client/_request_queue_collection_client.py +0 -51
  40. apify-2.7.1b6.dist-info/RECORD +0 -44
  41. /apify/{apify_storage_client → events}/py.typed +0 -0
  42. {apify-2.7.1b6.dist-info → apify-2.7.1b8.dist-info}/WHEEL +0 -0
  43. {apify-2.7.1b6.dist-info → apify-2.7.1b8.dist-info}/licenses/LICENSE +0 -0
@@ -0,0 +1,80 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from typing_extensions import override
6
+
7
+ from crawlee.storage_clients._base import StorageClient
8
+
9
+ from ._dataset_client import ApifyDatasetClient
10
+ from ._key_value_store_client import ApifyKeyValueStoreClient
11
+ from ._request_queue_client import ApifyRequestQueueClient
12
+ from apify._utils import docs_group
13
+
14
+ if TYPE_CHECKING:
15
+ from crawlee.configuration import Configuration
16
+
17
+
18
+ @docs_group('Storage clients')
19
+ class ApifyStorageClient(StorageClient):
20
+ """Apify storage client."""
21
+
22
+ @override
23
+ async def create_dataset_client(
24
+ self,
25
+ *,
26
+ id: str | None = None,
27
+ name: str | None = None,
28
+ configuration: Configuration | None = None,
29
+ ) -> ApifyDatasetClient:
30
+ # Import here to avoid circular imports.
31
+ from apify import Configuration as ApifyConfiguration # noqa: PLC0415
32
+
33
+ configuration = configuration or ApifyConfiguration.get_global_configuration()
34
+ if isinstance(configuration, ApifyConfiguration):
35
+ return await ApifyDatasetClient.open(id=id, name=name, configuration=configuration)
36
+
37
+ raise TypeError(
38
+ f'Expected "configuration" to be an instance of "apify.Configuration", '
39
+ f'but got {type(configuration).__name__} instead.'
40
+ )
41
+
42
+ @override
43
+ async def create_kvs_client(
44
+ self,
45
+ *,
46
+ id: str | None = None,
47
+ name: str | None = None,
48
+ configuration: Configuration | None = None,
49
+ ) -> ApifyKeyValueStoreClient:
50
+ # Import here to avoid circular imports.
51
+ from apify import Configuration as ApifyConfiguration # noqa: PLC0415
52
+
53
+ configuration = configuration or ApifyConfiguration.get_global_configuration()
54
+ if isinstance(configuration, ApifyConfiguration):
55
+ return await ApifyKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
56
+
57
+ raise TypeError(
58
+ f'Expected "configuration" to be an instance of "apify.Configuration", '
59
+ f'but got {type(configuration).__name__} instead.'
60
+ )
61
+
62
+ @override
63
+ async def create_rq_client(
64
+ self,
65
+ *,
66
+ id: str | None = None,
67
+ name: str | None = None,
68
+ configuration: Configuration | None = None,
69
+ ) -> ApifyRequestQueueClient:
70
+ # Import here to avoid circular imports.
71
+ from apify import Configuration as ApifyConfiguration # noqa: PLC0415
72
+
73
+ configuration = configuration or ApifyConfiguration.get_global_configuration()
74
+ if isinstance(configuration, ApifyConfiguration):
75
+ return await ApifyRequestQueueClient.open(id=id, name=name, configuration=configuration)
76
+
77
+ raise TypeError(
78
+ f'Expected "configuration" to be an instance of "apify.Configuration", '
79
+ f'but got {type(configuration).__name__} instead.'
80
+ )
File without changes
@@ -0,0 +1,2 @@
1
+ from ._key_value_store_client import ApifyFileSystemKeyValueStoreClient
2
+ from ._storage_client import ApifyFileSystemStorageClient
@@ -0,0 +1,36 @@
1
+ import asyncio
2
+
3
+ from typing_extensions import override
4
+
5
+ from crawlee._consts import METADATA_FILENAME
6
+ from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
7
+
8
+ from apify._configuration import Configuration
9
+
10
+
11
+ class ApifyFileSystemKeyValueStoreClient(FileSystemKeyValueStoreClient):
12
+ """Apify-specific implementation of the `FileSystemKeyValueStoreClient`.
13
+
14
+ The only difference is that it overrides the `purge` method to delete all files in the key-value store
15
+ directory, except for the metadata file and the `INPUT.json` file.
16
+ """
17
+
18
+ @override
19
+ async def purge(self) -> None:
20
+ """Purges the key-value store by deleting all its contents.
21
+
22
+ It deletes all files in the key-value store directory, except for the metadata file and
23
+ the `INPUT.json` file. It also updates the metadata to reflect that the store has been purged.
24
+ """
25
+ kvs_input_key = Configuration.get_global_configuration().input_key
26
+ async with self._lock:
27
+ for file_path in self.path_to_kvs.glob('*'):
28
+ if file_path.name in {METADATA_FILENAME, f'{kvs_input_key}.json'}:
29
+ continue
30
+ if file_path.is_file():
31
+ await asyncio.to_thread(file_path.unlink, missing_ok=True)
32
+
33
+ await self._update_metadata(
34
+ update_accessed_at=True,
35
+ update_modified_at=True,
36
+ )
@@ -0,0 +1,35 @@
1
+ from __future__ import annotations
2
+
3
+ from typing import TYPE_CHECKING
4
+
5
+ from typing_extensions import override
6
+
7
+ from crawlee.configuration import Configuration
8
+ from crawlee.storage_clients import FileSystemStorageClient
9
+
10
+ from ._key_value_store_client import ApifyFileSystemKeyValueStoreClient
11
+
12
+ if TYPE_CHECKING:
13
+ from crawlee.storage_clients._file_system import FileSystemKeyValueStoreClient
14
+
15
+
16
+ class ApifyFileSystemStorageClient(FileSystemStorageClient):
17
+ """Apify-specific implementation of the file system storage client.
18
+
19
+ The only difference is that it uses `ApifyFileSystemKeyValueStoreClient` for key-value stores,
20
+ which overrides the `purge` method to delete all files in the key-value store directory
21
+ except for the metadata file and the `INPUT.json` file.
22
+ """
23
+
24
+ @override
25
+ async def create_kvs_client(
26
+ self,
27
+ *,
28
+ id: str | None = None,
29
+ name: str | None = None,
30
+ configuration: Configuration | None = None,
31
+ ) -> FileSystemKeyValueStoreClient:
32
+ configuration = configuration or Configuration.get_global_configuration()
33
+ client = await ApifyFileSystemKeyValueStoreClient.open(id=id, name=name, configuration=configuration)
34
+ await self._purge_if_needed(client, configuration)
35
+ return client
File without changes
@@ -1,5 +1,3 @@
1
1
  from crawlee.storages import Dataset, KeyValueStore, RequestQueue
2
2
 
3
- from ._request_list import RequestList
4
-
5
- __all__ = ['Dataset', 'KeyValueStore', 'RequestList', 'RequestQueue']
3
+ __all__ = ['Dataset', 'KeyValueStore', 'RequestQueue']
@@ -1,6 +1,6 @@
1
1
  Metadata-Version: 2.4
2
2
  Name: apify
3
- Version: 2.7.1b6
3
+ Version: 2.7.1b8
4
4
  Summary: Apify SDK for Python
5
5
  Project-URL: Apify Homepage, https://apify.com
6
6
  Project-URL: Changelog, https://docs.apify.com/sdk/python/docs/changelog
@@ -225,15 +225,17 @@ Classifier: Programming Language :: Python :: 3.12
225
225
  Classifier: Programming Language :: Python :: 3.13
226
226
  Classifier: Topic :: Software Development :: Libraries
227
227
  Requires-Python: >=3.10
228
- Requires-Dist: apify-client<2.0.0
229
- Requires-Dist: apify-shared<2.0.0
230
- Requires-Dist: crawlee~=0.6.0
228
+ Requires-Dist: apify-client<3.0.0,>=2.0.0
229
+ Requires-Dist: apify-shared<3.0.0,>=2.0.0
230
+ Requires-Dist: cachetools>=5.5.0
231
+ Requires-Dist: crawlee==1.0.0rc1
231
232
  Requires-Dist: cryptography>=42.0.0
232
- Requires-Dist: httpx>=0.27.0
233
+ Requires-Dist: impit>=0.5.3
233
234
  Requires-Dist: lazy-object-proxy<1.11.0
234
235
  Requires-Dist: more-itertools>=10.2.0
235
236
  Requires-Dist: typing-extensions>=4.1.0
236
237
  Requires-Dist: websockets>=14.0
238
+ Requires-Dist: yarl>=1.18.0
237
239
  Provides-Extra: scrapy
238
240
  Requires-Dist: scrapy>=2.11.0; extra == 'scrapy'
239
241
  Description-Content-Type: text/markdown
@@ -0,0 +1,52 @@
1
+ apify/__init__.py,sha256=HpgKg2FZWJuSPfDygzJ62psylhw4NN4tKFnoYUIhcd4,838
2
+ apify/_actor.py,sha256=kOEvs_3TxjZ1PlwLEnIEL5nAD12oKs0ULldYyI-9irw,52985
3
+ apify/_charging.py,sha256=mJ-BueULWZxqvbdM_WGbsb-V3vTJ8Gw38k81eGwJhVY,12481
4
+ apify/_configuration.py,sha256=Ta-qPlKenLaI0IOlODg_A4ZwmPtnuS7OWLSJyexcqmA,13283
5
+ apify/_consts.py,sha256=CjhyEJ4Mi0lcIrzfqz8dN7nPJWGjCeBrrXQy1PZ6zRI,440
6
+ apify/_crypto.py,sha256=tqUs13QkemDtGzvU41pIA2HUEawpDlgzqbwKjm4I8kM,6852
7
+ apify/_models.py,sha256=EzU-inWeJ7T5HNVYEwnYb79W-q4OAPhtrYctfRYzpTE,7848
8
+ apify/_proxy_configuration.py,sha256=K9V4vG9-bAx7_a4l0zHhfbzvzopJeHek-qUJ05aQ6wI,13066
9
+ apify/_utils.py,sha256=og_zzRXELQmirklJWp1kSV7pwQPFfAE81UO3IZ4xBNs,2414
10
+ apify/log.py,sha256=Ry251sK4qPRGiFGe3DKcOZazg5OzC6RXS546Zzk8H2M,1003
11
+ apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
12
+ apify/events/__init__.py,sha256=daZzFNJlJFnm0HZZQVeT26pvRWufjHzKZYERANgfX4o,182
13
+ apify/events/_apify_event_manager.py,sha256=yArFrKa4wWDZo32iwaA3F_w36VSJf1Yaj_L1opo8ncU,5917
14
+ apify/events/_types.py,sha256=F0BHgACqnRfmdQ9GUcpnZvPxzw2bdRr8BqbGSA4cHeQ,3050
15
+ apify/events/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
16
+ apify/request_loaders/__init__.py,sha256=SJqW0FbdZKEtAMB5kBLgqROzh3KmQc2CNEIhJpTGdPQ,356
17
+ apify/request_loaders/_apify_request_list.py,sha256=kurCxX2jAKzHJ5N1Co6KjIgptqgVmjR0WpT8bd6uK9A,6220
18
+ apify/request_loaders/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
19
+ apify/scrapy/__init__.py,sha256=m2a0ts_JY9xJkBy4JU5mV8PJqjA3GGKLXBFu4nl-n-A,1048
20
+ apify/scrapy/_actor_runner.py,sha256=rXWSnlQWGskDUH8PtLCv5SkOIx4AiVa4QbCYeCett5c,938
21
+ apify/scrapy/_async_thread.py,sha256=8xif_fWce7vaMLuDc-XuDzZlHbCI-NY61YXdP2P27QY,4753
22
+ apify/scrapy/_logging_config.py,sha256=KZttIiMXcfu_isYcvXQnWdoYFdYI2wK5AAdbhd57cp4,1819
23
+ apify/scrapy/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
24
+ apify/scrapy/requests.py,sha256=JNVsITB-4xtT8LnkQc8z_G4OcEE9CmkMUvLGqbva5ZI,6460
25
+ apify/scrapy/scheduler.py,sha256=hvTJ9AxLoifqZi7C8MtLcCP0ujGk4D4dgq4qbCD_zco,6098
26
+ apify/scrapy/utils.py,sha256=Ssfa-P9-g9XYP1suDce6dQ8ta7PfijiPoMl2iplE6Ow,2126
27
+ apify/scrapy/extensions/__init__.py,sha256=cVQ8CCtOsJsRP28YKZWSUsi4FBwxI-yPJRNSXPFSa_o,98
28
+ apify/scrapy/extensions/_httpcache.py,sha256=XIS9vFCcUtdSfeKAKnxRnI9dX_GMmX2Od8OPnOaDhQ0,8870
29
+ apify/scrapy/middlewares/__init__.py,sha256=tfW-d3WFWLeNEjL8fTmon6NwgD-OXx1Bw2fBdU-wPy4,114
30
+ apify/scrapy/middlewares/apify_proxy.py,sha256=CDAOXS3bcVDZHM3B0GvhXbxEikMIadLF_0P73WL_nI4,5550
31
+ apify/scrapy/middlewares/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
32
+ apify/scrapy/pipelines/__init__.py,sha256=GWPeLN_Zwj8vRBWtXW6DaxdB7mvyQ7Jw5Tz1ccgWlZI,119
33
+ apify/scrapy/pipelines/actor_dataset_push.py,sha256=XUUyznQTD-E3wYUUFt2WAOnWhbnRrY0WuedlfYfYhDI,846
34
+ apify/scrapy/pipelines/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
35
+ apify/storage_clients/__init__.py,sha256=9WLAKs2GnnP0yyKR0mc3AfJ1IqXF48V3KPMp6KaB8kU,277
36
+ apify/storage_clients/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
37
+ apify/storage_clients/_apify/__init__.py,sha256=mtbVDsxqWL3kx30elnh0kAn2kZ4s3BBsWa15Y5e7RMU,347
38
+ apify/storage_clients/_apify/_dataset_client.py,sha256=8ZQvbtXZm54-V0Ukio0Z4jVI2gGkfqzZ59GlBQJXGUU,11485
39
+ apify/storage_clients/_apify/_key_value_store_client.py,sha256=WbyzDCFmJS2hd_7ddYL3JEO9zvjUAAE1D_F4kohiim4,9455
40
+ apify/storage_clients/_apify/_models.py,sha256=C6FpXswtO6kXE5RUumazm_conzJJS6PrXAGF9XBuDb8,3651
41
+ apify/storage_clients/_apify/_request_queue_client.py,sha256=sMinB02V9236PH7fQTQ0AIwH6oObnZv2ivzKBEgaLOk,31372
42
+ apify/storage_clients/_apify/_storage_client.py,sha256=5me6gHOeNAG3JaHxKRdzsZaa3FsqLDbObjhECGGWrr4,2890
43
+ apify/storage_clients/_apify/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
44
+ apify/storage_clients/_file_system/__init__.py,sha256=rDbXatXV9wHKPhKTrXDzWnexhTm7sIJQWucMi-P-SD4,130
45
+ apify/storage_clients/_file_system/_key_value_store_client.py,sha256=DHDv_e0kFwhBCIXUadZAamHhUao3NsSniUEXgnttnVY,1430
46
+ apify/storage_clients/_file_system/_storage_client.py,sha256=UwxuSvhbyQ7zR1db1hTmZ1h38yH7btHNp82X7e8MWWE,1290
47
+ apify/storages/__init__.py,sha256=-9tEYJVabVs_eRVhUehxN58GH0UG8OfuGjGwuDieP2M,122
48
+ apify/storages/py.typed,sha256=47DEQpj8HBSa-_TImW-5JCeuQeRkm5NMpJWZG3hSuFU,0
49
+ apify-2.7.1b8.dist-info/METADATA,sha256=9OY09tUOf00U-K6_kvD63Vs8DKsDEGJs4SPTaPhwzq0,21800
50
+ apify-2.7.1b8.dist-info/WHEEL,sha256=qtCwoSJWgHk21S1Kb4ihdzI2rlJ1ZKaIurTj_ngOhyQ,87
51
+ apify-2.7.1b8.dist-info/licenses/LICENSE,sha256=AsFjHssKjj4LGd2ZCqXn6FBzMqcWdjQre1byPPSypVw,11355
52
+ apify-2.7.1b8.dist-info/RECORD,,
@@ -1,215 +0,0 @@
1
- from __future__ import annotations
2
-
3
- import asyncio
4
- from datetime import datetime
5
- from typing import TYPE_CHECKING, Annotated, Any, Literal
6
-
7
- import websockets.asyncio.client
8
- from pydantic import BaseModel, Discriminator, Field, TypeAdapter
9
- from typing_extensions import Self, Unpack, override
10
-
11
- from crawlee.events._event_manager import EventManager, EventManagerOptions
12
- from crawlee.events._local_event_manager import LocalEventManager
13
- from crawlee.events._types import (
14
- Event,
15
- EventAbortingData,
16
- EventExitData,
17
- EventMigratingData,
18
- EventPersistStateData,
19
- EventSystemInfoData,
20
- )
21
-
22
- from apify._utils import docs_group
23
- from apify.log import logger
24
-
25
- if TYPE_CHECKING:
26
- from types import TracebackType
27
-
28
- from apify._configuration import Configuration
29
-
30
- __all__ = ['EventManager', 'LocalEventManager', 'PlatformEventManager']
31
-
32
-
33
- @docs_group('Event data')
34
- class SystemInfoEventData(BaseModel):
35
- mem_avg_bytes: Annotated[float, Field(alias='memAvgBytes')]
36
- mem_current_bytes: Annotated[float, Field(alias='memCurrentBytes')]
37
- mem_max_bytes: Annotated[float, Field(alias='memMaxBytes')]
38
- cpu_avg_usage: Annotated[float, Field(alias='cpuAvgUsage')]
39
- cpu_max_usage: Annotated[float, Field(alias='cpuMaxUsage')]
40
- cpu_current_usage: Annotated[float, Field(alias='cpuCurrentUsage')]
41
- is_cpu_overloaded: Annotated[bool, Field(alias='isCpuOverloaded')]
42
- created_at: Annotated[datetime, Field(alias='createdAt')]
43
-
44
- def to_crawlee_format(self, dedicated_cpus: float) -> EventSystemInfoData:
45
- return EventSystemInfoData.model_validate(
46
- {
47
- 'cpu_info': {
48
- 'used_ratio': (self.cpu_current_usage / 100) / dedicated_cpus,
49
- 'created_at': self.created_at,
50
- },
51
- 'memory_info': {
52
- 'total_size': self.mem_max_bytes,
53
- 'current_size': self.mem_current_bytes,
54
- 'created_at': self.created_at,
55
- },
56
- }
57
- )
58
-
59
-
60
- @docs_group('Events')
61
- class PersistStateEvent(BaseModel):
62
- name: Literal[Event.PERSIST_STATE]
63
- data: Annotated[EventPersistStateData, Field(default_factory=lambda: EventPersistStateData(is_migrating=False))]
64
-
65
-
66
- @docs_group('Events')
67
- class SystemInfoEvent(BaseModel):
68
- name: Literal[Event.SYSTEM_INFO]
69
- data: SystemInfoEventData
70
-
71
-
72
- @docs_group('Events')
73
- class MigratingEvent(BaseModel):
74
- name: Literal[Event.MIGRATING]
75
- data: Annotated[EventMigratingData, Field(default_factory=EventMigratingData)]
76
-
77
-
78
- @docs_group('Events')
79
- class AbortingEvent(BaseModel):
80
- name: Literal[Event.ABORTING]
81
- data: Annotated[EventAbortingData, Field(default_factory=EventAbortingData)]
82
-
83
-
84
- @docs_group('Events')
85
- class ExitEvent(BaseModel):
86
- name: Literal[Event.EXIT]
87
- data: Annotated[EventExitData, Field(default_factory=EventExitData)]
88
-
89
-
90
- @docs_group('Events')
91
- class EventWithoutData(BaseModel):
92
- name: Literal[
93
- Event.SESSION_RETIRED,
94
- Event.BROWSER_LAUNCHED,
95
- Event.BROWSER_RETIRED,
96
- Event.BROWSER_CLOSED,
97
- Event.PAGE_CREATED,
98
- Event.PAGE_CLOSED,
99
- ]
100
- data: Any = None
101
-
102
-
103
- @docs_group('Events')
104
- class DeprecatedEvent(BaseModel):
105
- name: Literal['cpuInfo']
106
- data: Annotated[dict[str, Any], Field(default_factory=dict)]
107
-
108
-
109
- @docs_group('Events')
110
- class UnknownEvent(BaseModel):
111
- name: str
112
- data: Annotated[dict[str, Any], Field(default_factory=dict)]
113
-
114
-
115
- EventMessage = PersistStateEvent | SystemInfoEvent | MigratingEvent | AbortingEvent | ExitEvent | EventWithoutData
116
-
117
- event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
118
- Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
119
- )
120
-
121
-
122
- @docs_group('Event managers')
123
- class PlatformEventManager(EventManager):
124
- """A class for managing Actor events.
125
-
126
- You shouldn't use this class directly,
127
- but instead use it via the `Actor.on()` and `Actor.off()` methods.
128
- """
129
-
130
- _platform_events_websocket: websockets.asyncio.client.ClientConnection | None = None
131
- _process_platform_messages_task: asyncio.Task | None = None
132
- _send_system_info_interval_task: asyncio.Task | None = None
133
- _connected_to_platform_websocket: asyncio.Future = asyncio.Future()
134
-
135
- def __init__(self, config: Configuration, **kwargs: Unpack[EventManagerOptions]) -> None:
136
- """Create an instance of the EventManager.
137
-
138
- Args:
139
- config: The Actor configuration to be used in this event manager.
140
- kwargs: Event manager options - forwarded to the base class
141
- """
142
- super().__init__(**kwargs)
143
-
144
- self._config = config
145
- self._listener_tasks = set()
146
- self._connected_to_platform_websocket = asyncio.Future[bool]()
147
-
148
- @override
149
- async def __aenter__(self) -> Self:
150
- await super().__aenter__()
151
- self._connected_to_platform_websocket = asyncio.Future()
152
-
153
- # Run tasks but don't await them
154
- if self._config.actor_events_ws_url:
155
- self._process_platform_messages_task = asyncio.create_task(
156
- self._process_platform_messages(self._config.actor_events_ws_url)
157
- )
158
- is_connected = await self._connected_to_platform_websocket
159
- if not is_connected:
160
- raise RuntimeError('Error connecting to platform events websocket!')
161
- else:
162
- logger.debug('APIFY_ACTOR_EVENTS_WS_URL env var not set, no events from Apify platform will be emitted.')
163
-
164
- return self
165
-
166
- @override
167
- async def __aexit__(
168
- self,
169
- exc_type: type[BaseException] | None,
170
- exc_value: BaseException | None,
171
- exc_traceback: TracebackType | None,
172
- ) -> None:
173
- if self._platform_events_websocket:
174
- await self._platform_events_websocket.close()
175
-
176
- if self._process_platform_messages_task:
177
- await self._process_platform_messages_task
178
-
179
- await super().__aexit__(exc_type, exc_value, exc_traceback)
180
-
181
- async def _process_platform_messages(self, ws_url: str) -> None:
182
- try:
183
- async with websockets.asyncio.client.connect(ws_url) as websocket:
184
- self._platform_events_websocket = websocket
185
- self._connected_to_platform_websocket.set_result(True)
186
-
187
- async for message in websocket:
188
- try:
189
- parsed_message = event_data_adapter.validate_json(message)
190
-
191
- if isinstance(parsed_message, DeprecatedEvent):
192
- continue
193
-
194
- if isinstance(parsed_message, UnknownEvent):
195
- logger.info(
196
- f'Unknown message received: event_name={parsed_message.name}, '
197
- f'event_data={parsed_message.data}'
198
- )
199
- continue
200
-
201
- self.emit(
202
- event=parsed_message.name,
203
- event_data=parsed_message.data
204
- if not isinstance(parsed_message.data, SystemInfoEventData)
205
- else parsed_message.data.to_crawlee_format(self._config.dedicated_cpus or 1),
206
- )
207
-
208
- if parsed_message.name == Event.MIGRATING:
209
- await self._emit_persist_state_event_rec_task.stop()
210
- self.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=True))
211
- except Exception:
212
- logger.exception('Cannot parse Actor event', extra={'message': message})
213
- except Exception:
214
- logger.exception('Error in websocket connection')
215
- self._connected_to_platform_websocket.set_result(False)
@@ -1,3 +0,0 @@
1
- from apify.apify_storage_client._apify_storage_client import ApifyStorageClient
2
-
3
- __all__ = ['ApifyStorageClient']
@@ -1,72 +0,0 @@
1
- from __future__ import annotations
2
-
3
- from typing import TYPE_CHECKING
4
-
5
- from typing_extensions import override
6
-
7
- from apify_client import ApifyClientAsync
8
- from crawlee._utils.crypto import crypto_random_object_id
9
- from crawlee.storage_clients import StorageClient
10
-
11
- from apify._utils import docs_group
12
- from apify.apify_storage_client._dataset_client import DatasetClient
13
- from apify.apify_storage_client._dataset_collection_client import DatasetCollectionClient
14
- from apify.apify_storage_client._key_value_store_client import KeyValueStoreClient
15
- from apify.apify_storage_client._key_value_store_collection_client import KeyValueStoreCollectionClient
16
- from apify.apify_storage_client._request_queue_client import RequestQueueClient
17
- from apify.apify_storage_client._request_queue_collection_client import RequestQueueCollectionClient
18
-
19
- if TYPE_CHECKING:
20
- from apify._configuration import Configuration
21
-
22
-
23
- @docs_group('Storage clients')
24
- class ApifyStorageClient(StorageClient):
25
- """A storage client implementation based on the Apify platform storage."""
26
-
27
- def __init__(self, *, configuration: Configuration) -> None:
28
- self._client_key = crypto_random_object_id()
29
- self._apify_client = ApifyClientAsync(
30
- token=configuration.token,
31
- api_url=configuration.api_base_url,
32
- max_retries=8,
33
- min_delay_between_retries_millis=500,
34
- timeout_secs=360,
35
- )
36
- self._configuration = configuration
37
-
38
- @classmethod
39
- def from_config(cls, config: Configuration) -> ApifyStorageClient:
40
- return cls(configuration=config)
41
-
42
- @override
43
- def dataset(self, id: str) -> DatasetClient:
44
- return DatasetClient(self._apify_client.dataset(id))
45
-
46
- @override
47
- def datasets(self) -> DatasetCollectionClient:
48
- return DatasetCollectionClient(self._apify_client.datasets())
49
-
50
- @override
51
- def key_value_store(self, id: str) -> KeyValueStoreClient:
52
- return KeyValueStoreClient(self._apify_client.key_value_store(id), self._configuration.api_public_base_url)
53
-
54
- @override
55
- def key_value_stores(self) -> KeyValueStoreCollectionClient:
56
- return KeyValueStoreCollectionClient(self._apify_client.key_value_stores())
57
-
58
- @override
59
- def request_queue(self, id: str) -> RequestQueueClient:
60
- return RequestQueueClient(self._apify_client.request_queue(id, client_key=self._client_key))
61
-
62
- @override
63
- def request_queues(self) -> RequestQueueCollectionClient:
64
- return RequestQueueCollectionClient(self._apify_client.request_queues())
65
-
66
- @override
67
- async def purge_on_start(self) -> None:
68
- pass
69
-
70
- @override
71
- def get_rate_limit_errors(self) -> dict[int, int]:
72
- return self._apify_client.stats.rate_limit_errors