apify 2.7.3__py3-none-any.whl → 3.0.0rc1__py3-none-any.whl

This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.

Potentially problematic release.


This version of apify might be problematic. Click here for more details.

Files changed (46) hide show
  1. apify/_actor.py +47 -12
  2. apify/_charging.py +15 -9
  3. apify/_configuration.py +34 -1
  4. apify/_crypto.py +0 -6
  5. apify/_models.py +7 -7
  6. apify/_proxy_configuration.py +10 -10
  7. apify/_utils.py +25 -2
  8. apify/events/__init__.py +5 -0
  9. apify/events/_apify_event_manager.py +140 -0
  10. apify/events/_types.py +102 -0
  11. apify/log.py +0 -9
  12. apify/request_loaders/__init__.py +18 -0
  13. apify/{storages/_request_list.py → request_loaders/_apify_request_list.py} +25 -18
  14. apify/request_loaders/py.typed +0 -0
  15. apify/scrapy/_logging_config.py +1 -4
  16. apify/scrapy/extensions/_httpcache.py +9 -5
  17. apify/scrapy/requests.py +3 -3
  18. apify/scrapy/scheduler.py +8 -5
  19. apify/storage_clients/__init__.py +10 -0
  20. apify/storage_clients/_apify/__init__.py +11 -0
  21. apify/storage_clients/_apify/_dataset_client.py +304 -0
  22. apify/storage_clients/_apify/_key_value_store_client.py +241 -0
  23. apify/storage_clients/_apify/_models.py +107 -0
  24. apify/storage_clients/_apify/_request_queue_client.py +787 -0
  25. apify/storage_clients/_apify/_storage_client.py +80 -0
  26. apify/storage_clients/_apify/py.typed +0 -0
  27. apify/storage_clients/_file_system/__init__.py +2 -0
  28. apify/storage_clients/_file_system/_key_value_store_client.py +36 -0
  29. apify/storage_clients/_file_system/_storage_client.py +35 -0
  30. apify/storage_clients/py.typed +0 -0
  31. apify/storages/__init__.py +1 -3
  32. {apify-2.7.3.dist-info → apify-3.0.0rc1.dist-info}/METADATA +8 -7
  33. apify-3.0.0rc1.dist-info/RECORD +52 -0
  34. apify/_platform_event_manager.py +0 -231
  35. apify/apify_storage_client/__init__.py +0 -3
  36. apify/apify_storage_client/_apify_storage_client.py +0 -72
  37. apify/apify_storage_client/_dataset_client.py +0 -190
  38. apify/apify_storage_client/_dataset_collection_client.py +0 -51
  39. apify/apify_storage_client/_key_value_store_client.py +0 -109
  40. apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
  41. apify/apify_storage_client/_request_queue_client.py +0 -176
  42. apify/apify_storage_client/_request_queue_collection_client.py +0 -51
  43. apify-2.7.3.dist-info/RECORD +0 -44
  44. /apify/{apify_storage_client → events}/py.typed +0 -0
  45. {apify-2.7.3.dist-info → apify-3.0.0rc1.dist-info}/WHEEL +0 -0
  46. {apify-2.7.3.dist-info → apify-3.0.0rc1.dist-info}/licenses/LICENSE +0 -0
apify/_actor.py CHANGED
@@ -5,7 +5,7 @@ import os
5
5
  import sys
6
6
  from contextlib import suppress
7
7
  from datetime import datetime, timedelta, timezone
8
- from typing import TYPE_CHECKING, Any, Callable, Literal, TypeVar, cast, overload
8
+ from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast, overload
9
9
 
10
10
  from lazy_object_proxy import Proxy
11
11
  from more_itertools import flatten
@@ -13,7 +13,6 @@ from pydantic import AliasChoices
13
13
 
14
14
  from apify_client import ApifyClientAsync
15
15
  from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
16
- from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
17
16
  from crawlee import service_locator
18
17
  from crawlee.events import (
19
18
  Event,
@@ -30,15 +29,16 @@ from apify._configuration import Configuration
30
29
  from apify._consts import EVENT_LISTENERS_TIMEOUT
31
30
  from apify._crypto import decrypt_input_secrets, load_private_key
32
31
  from apify._models import ActorRun
33
- from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
34
32
  from apify._proxy_configuration import ProxyConfiguration
35
- from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
36
- from apify.apify_storage_client import ApifyStorageClient
33
+ from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython, maybe_extract_enum_member_value
34
+ from apify.events import ApifyEventManager, EventManager, LocalEventManager
37
35
  from apify.log import _configure_logging, logger
36
+ from apify.storage_clients import ApifyStorageClient
38
37
  from apify.storages import Dataset, KeyValueStore, RequestQueue
39
38
 
40
39
  if TYPE_CHECKING:
41
40
  import logging
41
+ from collections.abc import Callable
42
42
  from types import TracebackType
43
43
 
44
44
  from typing_extensions import Self
@@ -53,9 +53,46 @@ MainReturnType = TypeVar('MainReturnType')
53
53
 
54
54
 
55
55
  @docs_name('Actor')
56
- @docs_group('Classes')
56
+ @docs_group('Actor')
57
57
  class _ActorType:
58
- """The class of `Actor`. Only make a new instance if you're absolutely sure you need to."""
58
+ """The core class for building Actors on the Apify platform.
59
+
60
+ Actors are serverless programs running in the cloud that can perform anything from simple actions
61
+ (such as filling out a web form or sending an email) to complex operations (such as crawling an
62
+ entire website or removing duplicates from a large dataset). They are packaged as Docker containers
63
+ which accept well-defined JSON input, perform an action, and optionally produce well-defined output.
64
+
65
+ ### References
66
+
67
+ - Apify platform documentation: https://docs.apify.com/platform/actors
68
+ - Actor whitepaper: https://whitepaper.actor/
69
+
70
+ ### Usage
71
+
72
+ ```python
73
+ import asyncio
74
+
75
+ import httpx
76
+ from apify import Actor
77
+ from bs4 import BeautifulSoup
78
+
79
+
80
+ async def main() -> None:
81
+ async with Actor:
82
+ actor_input = await Actor.get_input()
83
+ async with httpx.AsyncClient() as client:
84
+ response = await client.get(actor_input['url'])
85
+ soup = BeautifulSoup(response.content, 'html.parser')
86
+ data = {
87
+ 'url': actor_input['url'],
88
+ 'title': soup.title.string if soup.title else None,
89
+ }
90
+ await Actor.push_data(data)
91
+
92
+ if __name__ == '__main__':
93
+ asyncio.run(main())
94
+ ```
95
+ """
59
96
 
60
97
  _is_rebooting = False
61
98
  _is_any_instance_initialized = False
@@ -88,12 +125,12 @@ class _ActorType:
88
125
 
89
126
  # Create an instance of the cloud storage client, the local storage client is obtained
90
127
  # from the service locator.
91
- self._cloud_storage_client = ApifyStorageClient.from_config(config=self._configuration)
128
+ self._cloud_storage_client = ApifyStorageClient()
92
129
 
93
130
  # Set the event manager based on whether the Actor is running on the platform or locally.
94
131
  self._event_manager = (
95
- PlatformEventManager(
96
- config=self._configuration,
132
+ ApifyEventManager(
133
+ configuration=self._configuration,
97
134
  persist_state_interval=self._configuration.persist_state_interval,
98
135
  )
99
136
  if self.is_at_home()
@@ -107,7 +144,6 @@ class _ActorType:
107
144
 
108
145
  self._is_initialized = False
109
146
 
110
- @ignore_docs
111
147
  async def __aenter__(self) -> Self:
112
148
  """Initialize the Actor.
113
149
 
@@ -119,7 +155,6 @@ class _ActorType:
119
155
  await self.init()
120
156
  return self
121
157
 
122
- @ignore_docs
123
158
  async def __aexit__(
124
159
  self,
125
160
  _exc_type: type[BaseException] | None,
apify/_charging.py CHANGED
@@ -4,11 +4,10 @@ import math
4
4
  from dataclasses import dataclass
5
5
  from datetime import datetime, timezone
6
6
  from decimal import Decimal
7
- from typing import TYPE_CHECKING, Protocol, Union
7
+ from typing import TYPE_CHECKING, Protocol
8
8
 
9
9
  from pydantic import TypeAdapter
10
10
 
11
- from apify_shared.utils import ignore_docs
12
11
  from crawlee._utils.context import ensure_context
13
12
 
14
13
  from apify._models import ActorRun, PricingModel
@@ -23,13 +22,21 @@ if TYPE_CHECKING:
23
22
 
24
23
  from apify._configuration import Configuration
25
24
 
25
+ run_validator = TypeAdapter[ActorRun | None](ActorRun | None)
26
26
 
27
- run_validator: TypeAdapter[ActorRun | None] = TypeAdapter(Union[ActorRun, None])
28
27
 
29
-
30
- @docs_group('Interfaces')
28
+ @docs_group('Charging')
31
29
  class ChargingManager(Protocol):
32
- """Provides fine-grained access to pay-per-event functionality."""
30
+ """Provides fine-grained access to pay-per-event functionality.
31
+
32
+ The ChargingManager allows you to charge for specific events in your Actor when using
33
+ the pay-per-event pricing model. This enables precise cost control and transparent
34
+ billing for different operations within your Actor.
35
+
36
+ ### References
37
+
38
+ - Apify platform documentation: https://docs.apify.com/platform/actors/publishing/monetize
39
+ """
33
40
 
34
41
  async def charge(self, event_name: str, count: int = 1) -> ChargeResult:
35
42
  """Charge for a specified number of events - sub-operations of the Actor.
@@ -58,7 +65,7 @@ class ChargingManager(Protocol):
58
65
  """
59
66
 
60
67
 
61
- @docs_group('Data structures')
68
+ @docs_group('Charging')
62
69
  @dataclass(frozen=True)
63
70
  class ChargeResult:
64
71
  """Result of the `ChargingManager.charge` method."""
@@ -73,7 +80,7 @@ class ChargeResult:
73
80
  """How many events of each known type can still be charged within the limit."""
74
81
 
75
82
 
76
- @docs_group('Data structures')
83
+ @docs_group('Charging')
77
84
  @dataclass
78
85
  class ActorPricingInfo:
79
86
  """Result of the `ChargingManager.get_pricing_info` method."""
@@ -91,7 +98,6 @@ class ActorPricingInfo:
91
98
  """Price of every known event type."""
92
99
 
93
100
 
94
- @ignore_docs
95
101
  class ChargingManagerImplementation(ChargingManager):
96
102
  """Implementation of the `ChargingManager` Protocol - this is only meant to be instantiated internally."""
97
103
 
apify/_configuration.py CHANGED
@@ -25,7 +25,7 @@ def _transform_to_list(value: Any) -> list[str] | None:
25
25
  return value if isinstance(value, list) else str(value).split(',')
26
26
 
27
27
 
28
- @docs_group('Classes')
28
+ @docs_group('Configuration')
29
29
  class Configuration(CrawleeConfiguration):
30
30
  """A class for specifying the configuration of an Actor.
31
31
 
@@ -140,6 +140,39 @@ class Configuration(CrawleeConfiguration):
140
140
  ),
141
141
  ] = None
142
142
 
143
+ default_dataset_id: Annotated[
144
+ str,
145
+ Field(
146
+ validation_alias=AliasChoices(
147
+ 'actor_default_dataset_id',
148
+ 'apify_default_dataset_id',
149
+ ),
150
+ description='Default dataset ID used by the Apify storage client when no ID or name is provided.',
151
+ ),
152
+ ] = 'default'
153
+
154
+ default_key_value_store_id: Annotated[
155
+ str,
156
+ Field(
157
+ validation_alias=AliasChoices(
158
+ 'actor_default_key_value_store_id',
159
+ 'apify_default_key_value_store_id',
160
+ ),
161
+ description='Default key-value store ID for the Apify storage client when no ID or name is provided.',
162
+ ),
163
+ ] = 'default'
164
+
165
+ default_request_queue_id: Annotated[
166
+ str,
167
+ Field(
168
+ validation_alias=AliasChoices(
169
+ 'actor_default_request_queue_id',
170
+ 'apify_default_request_queue_id',
171
+ ),
172
+ description='Default request queue ID for the Apify storage client when no ID or name is provided.',
173
+ ),
174
+ ] = 'default'
175
+
143
176
  disable_outdated_warning: Annotated[
144
177
  bool,
145
178
  Field(
apify/_crypto.py CHANGED
@@ -12,7 +12,6 @@ from cryptography.hazmat.primitives import hashes, serialization
12
12
  from cryptography.hazmat.primitives.asymmetric import padding, rsa
13
13
  from cryptography.hazmat.primitives.ciphers import Cipher, algorithms, modes
14
14
 
15
- from apify_shared.utils import ignore_docs
16
15
  from crawlee._utils.crypto import crypto_random_object_id
17
16
 
18
17
  from apify._consts import ENCRYPTED_INPUT_VALUE_REGEXP, ENCRYPTED_JSON_VALUE_PREFIX, ENCRYPTED_STRING_VALUE_PREFIX
@@ -22,7 +21,6 @@ ENCRYPTION_IV_LENGTH = 16
22
21
  ENCRYPTION_AUTH_TAG_LENGTH = 16
23
22
 
24
23
 
25
- @ignore_docs
26
24
  def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
27
25
  """Encrypts the given value using AES cipher and the password for encryption using the public key.
28
26
 
@@ -66,7 +64,6 @@ def public_encrypt(value: str, *, public_key: rsa.RSAPublicKey) -> dict:
66
64
  }
67
65
 
68
66
 
69
- @ignore_docs
70
67
  def private_decrypt(
71
68
  encrypted_password: str,
72
69
  encrypted_value: str,
@@ -118,7 +115,6 @@ def private_decrypt(
118
115
  return decipher_bytes.decode('utf-8')
119
116
 
120
117
 
121
- @ignore_docs
122
118
  def load_private_key(private_key_file_base64: str, private_key_password: str) -> rsa.RSAPrivateKey:
123
119
  private_key = serialization.load_pem_private_key(
124
120
  base64.b64decode(private_key_file_base64.encode('utf-8')),
@@ -138,7 +134,6 @@ def _load_public_key(public_key_file_base64: str) -> rsa.RSAPublicKey:
138
134
  return public_key
139
135
 
140
136
 
141
- @ignore_docs
142
137
  def decrypt_input_secrets(private_key: rsa.RSAPrivateKey, input_data: Any) -> Any:
143
138
  """Decrypt input secrets."""
144
139
  if not isinstance(input_data, dict):
@@ -180,7 +175,6 @@ def encode_base62(num: int) -> str:
180
175
  return res
181
176
 
182
177
 
183
- @ignore_docs
184
178
  def create_hmac_signature(secret_key: str, message: str) -> str:
185
179
  """Generate an HMAC signature and encodes it using Base62. Base62 encoding reduces the signature length.
186
180
 
apify/_models.py CHANGED
@@ -13,10 +13,10 @@ from crawlee._utils.urls import validate_http_url
13
13
  from apify._utils import docs_group
14
14
 
15
15
  if TYPE_CHECKING:
16
- from typing_extensions import TypeAlias
16
+ from typing import TypeAlias
17
17
 
18
18
 
19
- @docs_group('Data structures')
19
+ @docs_group('Actor')
20
20
  class Webhook(BaseModel):
21
21
  __model_config__ = ConfigDict(populate_by_name=True)
22
22
 
@@ -35,14 +35,14 @@ class Webhook(BaseModel):
35
35
  ] = None
36
36
 
37
37
 
38
- @docs_group('Data structures')
38
+ @docs_group('Actor')
39
39
  class ActorRunMeta(BaseModel):
40
40
  __model_config__ = ConfigDict(populate_by_name=True)
41
41
 
42
42
  origin: Annotated[MetaOrigin, Field()]
43
43
 
44
44
 
45
- @docs_group('Data structures')
45
+ @docs_group('Actor')
46
46
  class ActorRunStats(BaseModel):
47
47
  __model_config__ = ConfigDict(populate_by_name=True)
48
48
 
@@ -63,7 +63,7 @@ class ActorRunStats(BaseModel):
63
63
  compute_units: Annotated[float, Field(alias='computeUnits')]
64
64
 
65
65
 
66
- @docs_group('Data structures')
66
+ @docs_group('Actor')
67
67
  class ActorRunOptions(BaseModel):
68
68
  __model_config__ = ConfigDict(populate_by_name=True)
69
69
 
@@ -74,7 +74,7 @@ class ActorRunOptions(BaseModel):
74
74
  max_total_charge_usd: Annotated[Decimal | None, Field(alias='maxTotalChargeUsd')] = None
75
75
 
76
76
 
77
- @docs_group('Data structures')
77
+ @docs_group('Actor')
78
78
  class ActorRunUsage(BaseModel):
79
79
  __model_config__ = ConfigDict(populate_by_name=True)
80
80
 
@@ -92,7 +92,7 @@ class ActorRunUsage(BaseModel):
92
92
  proxy_serps: Annotated[float | None, Field(alias='PROXY_SERPS')] = None
93
93
 
94
94
 
95
- @docs_group('Data structures')
95
+ @docs_group('Actor')
96
96
  class ActorRun(BaseModel):
97
97
  __model_config__ = ConfigDict(populate_by_name=True)
98
98
 
@@ -1,16 +1,17 @@
1
1
  from __future__ import annotations
2
2
 
3
3
  import ipaddress
4
+ import json
4
5
  import re
5
6
  from dataclasses import dataclass, field
6
7
  from re import Pattern
7
8
  from typing import TYPE_CHECKING, Any
8
9
  from urllib.parse import urljoin, urlparse
9
10
 
10
- import httpx
11
+ import impit
12
+ from yarl import URL
11
13
 
12
14
  from apify_shared.consts import ApifyEnvVars
13
- from apify_shared.utils import ignore_docs
14
15
  from crawlee.proxy_configuration import ProxyConfiguration as CrawleeProxyConfiguration
15
16
  from crawlee.proxy_configuration import ProxyInfo as CrawleeProxyInfo
16
17
  from crawlee.proxy_configuration import _NewUrlFunction
@@ -21,14 +22,14 @@ from apify.log import logger
21
22
 
22
23
  if TYPE_CHECKING:
23
24
  from apify_client import ApifyClientAsync
24
- from crawlee import Request
25
+
26
+ from apify import Request
25
27
 
26
28
  APIFY_PROXY_VALUE_REGEX = re.compile(r'^[\w._~]+$')
27
29
  COUNTRY_CODE_REGEX = re.compile(r'^[A-Z]{2}$')
28
30
  SESSION_ID_MAX_LENGTH = 50
29
31
 
30
32
 
31
- @ignore_docs
32
33
  def is_url(url: str) -> bool:
33
34
  """Check if the given string is a valid URL."""
34
35
  try:
@@ -69,7 +70,7 @@ def _check(
69
70
  raise ValueError(f'{error_str} does not match pattern {pattern.pattern!r}')
70
71
 
71
72
 
72
- @docs_group('Classes')
73
+ @docs_group('Configuration')
73
74
  @dataclass
74
75
  class ProxyInfo(CrawleeProxyInfo):
75
76
  """Provides information about a proxy connection that is used for requests."""
@@ -89,7 +90,7 @@ class ProxyInfo(CrawleeProxyInfo):
89
90
  """
90
91
 
91
92
 
92
- @docs_group('Classes')
93
+ @docs_group('Configuration')
93
94
  class ProxyConfiguration(CrawleeProxyConfiguration):
94
95
  """Configures a connection to a proxy server with the provided options.
95
96
 
@@ -104,7 +105,6 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
104
105
 
105
106
  _configuration: Configuration
106
107
 
107
- @ignore_docs
108
108
  def __init__(
109
109
  self,
110
110
  *,
@@ -233,7 +233,7 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
233
233
  return None
234
234
 
235
235
  if self._uses_apify_proxy:
236
- parsed_url = httpx.URL(proxy_info.url)
236
+ parsed_url = URL(proxy_info.url)
237
237
  username = self._get_username(session_id)
238
238
 
239
239
  return ProxyInfo(
@@ -277,11 +277,11 @@ class ProxyConfiguration(CrawleeProxyConfiguration):
277
277
  return
278
278
 
279
279
  status = None
280
- async with httpx.AsyncClient(proxy=proxy_info.url, timeout=10) as client:
280
+ async with impit.AsyncClient(proxy=proxy_info.url, timeout=10) as client:
281
281
  for _ in range(2):
282
282
  try:
283
283
  response = await client.get(proxy_status_url)
284
- status = response.json()
284
+ status = json.loads(response.text)
285
285
  break
286
286
  except Exception: # noqa: S110
287
287
  # retry on connection errors
apify/_utils.py CHANGED
@@ -2,8 +2,12 @@ from __future__ import annotations
2
2
 
3
3
  import builtins
4
4
  import sys
5
+ from enum import Enum
5
6
  from importlib import metadata
6
- from typing import Callable, Literal
7
+ from typing import TYPE_CHECKING, Any, Literal
8
+
9
+ if TYPE_CHECKING:
10
+ from collections.abc import Callable
7
11
 
8
12
 
9
13
  def get_system_info() -> dict:
@@ -27,7 +31,19 @@ def is_running_in_ipython() -> bool:
27
31
  return getattr(builtins, '__IPYTHON__', False)
28
32
 
29
33
 
30
- GroupName = Literal['Classes', 'Abstract classes', 'Interfaces', 'Data structures', 'Errors', 'Functions']
34
+ # The order of the rendered API groups is defined in the website/docusaurus.config.js file.
35
+ GroupName = Literal[
36
+ 'Actor',
37
+ 'Charging',
38
+ 'Configuration',
39
+ 'Event data',
40
+ 'Event managers',
41
+ 'Events',
42
+ 'Request loaders',
43
+ 'Storage clients',
44
+ 'Storage data',
45
+ 'Storages',
46
+ ]
31
47
 
32
48
 
33
49
  def docs_group(group_name: GroupName) -> Callable: # noqa: ARG001
@@ -66,3 +82,10 @@ def docs_name(symbol_name: str) -> Callable: # noqa: ARG001
66
82
  return func
67
83
 
68
84
  return wrapper
85
+
86
+
87
+ def maybe_extract_enum_member_value(maybe_enum_member: Any) -> Any:
88
+ """Extract the value of an enumeration member if it is an Enum, otherwise return the original value."""
89
+ if isinstance(maybe_enum_member, Enum):
90
+ return maybe_enum_member.value
91
+ return maybe_enum_member
@@ -0,0 +1,5 @@
1
+ from crawlee.events import EventManager, LocalEventManager
2
+
3
+ from ._apify_event_manager import ApifyEventManager
4
+
5
+ __all__ = ['ApifyEventManager', 'EventManager', 'LocalEventManager']
@@ -0,0 +1,140 @@
1
+ from __future__ import annotations
2
+
3
+ import asyncio
4
+ import contextlib
5
+ from typing import TYPE_CHECKING, Annotated
6
+
7
+ import websockets.asyncio.client
8
+ from pydantic import Discriminator, TypeAdapter
9
+ from typing_extensions import Self, Unpack, override
10
+
11
+ from crawlee.events import EventManager
12
+ from crawlee.events._types import Event, EventPersistStateData
13
+
14
+ from apify._utils import docs_group
15
+ from apify.events._types import DeprecatedEvent, EventMessage, SystemInfoEventData, UnknownEvent
16
+ from apify.log import logger
17
+
18
+ if TYPE_CHECKING:
19
+ from types import TracebackType
20
+
21
+ from crawlee.events._event_manager import EventManagerOptions
22
+
23
+ from apify._configuration import Configuration
24
+
25
+
26
+ event_data_adapter = TypeAdapter[EventMessage | DeprecatedEvent | UnknownEvent](
27
+ Annotated[EventMessage, Discriminator('name')] | DeprecatedEvent | UnknownEvent
28
+ )
29
+
30
+
31
+ @docs_group('Event managers')
32
+ class ApifyEventManager(EventManager):
33
+ """Event manager for the Apify platform.
34
+
35
+ This class extends Crawlee's `EventManager` to provide Apify-specific functionality, including websocket
36
+ connectivity to the Apify platform for receiving platform events.
37
+
38
+ The event manager handles:
39
+ - Registration and emission of events and their listeners.
40
+ - Websocket connection to Apify platform events.
41
+ - Processing and validation of platform messages.
42
+ - Automatic event forwarding from the platform to local event listeners.
43
+
44
+ This class should not be used directly. Use the `Actor.on` and `Actor.off` methods to interact
45
+ with the event system.
46
+ """
47
+
48
+ def __init__(self, configuration: Configuration, **kwargs: Unpack[EventManagerOptions]) -> None:
49
+ """Initialize a new instance.
50
+
51
+ Args:
52
+ configuration: The Actor configuration for the event manager.
53
+ **kwargs: Additional event manager options passed to the parent class.
54
+ """
55
+ super().__init__(**kwargs)
56
+
57
+ self._configuration = configuration
58
+ """The Actor configuration for the event manager."""
59
+
60
+ self._platform_events_websocket: websockets.asyncio.client.ClientConnection | None = None
61
+ """WebSocket connection to the platform events."""
62
+
63
+ self._process_platform_messages_task: asyncio.Task | None = None
64
+ """Task for processing messages from the platform websocket."""
65
+
66
+ self._connected_to_platform_websocket: asyncio.Future[bool] | None = None
67
+ """Future that resolves when the connection to the platform websocket is established."""
68
+
69
+ @override
70
+ async def __aenter__(self) -> Self:
71
+ await super().__aenter__()
72
+ self._connected_to_platform_websocket = asyncio.Future()
73
+
74
+ # Run tasks but don't await them
75
+ if self._configuration.actor_events_ws_url:
76
+ self._process_platform_messages_task = asyncio.create_task(
77
+ self._process_platform_messages(self._configuration.actor_events_ws_url)
78
+ )
79
+ is_connected = await self._connected_to_platform_websocket
80
+ if not is_connected:
81
+ raise RuntimeError('Error connecting to platform events websocket!')
82
+ else:
83
+ logger.debug('APIFY_ACTOR_EVENTS_WS_URL env var not set, no events from Apify platform will be emitted.')
84
+
85
+ return self
86
+
87
+ @override
88
+ async def __aexit__(
89
+ self,
90
+ exc_type: type[BaseException] | None,
91
+ exc_value: BaseException | None,
92
+ exc_traceback: TracebackType | None,
93
+ ) -> None:
94
+ if self._platform_events_websocket:
95
+ await self._platform_events_websocket.close()
96
+
97
+ if self._process_platform_messages_task and not self._process_platform_messages_task.done():
98
+ self._process_platform_messages_task.cancel()
99
+ with contextlib.suppress(asyncio.CancelledError):
100
+ await self._process_platform_messages_task
101
+
102
+ await super().__aexit__(exc_type, exc_value, exc_traceback)
103
+
104
+ async def _process_platform_messages(self, ws_url: str) -> None:
105
+ try:
106
+ async with websockets.asyncio.client.connect(ws_url) as websocket:
107
+ self._platform_events_websocket = websocket
108
+ if self._connected_to_platform_websocket is not None:
109
+ self._connected_to_platform_websocket.set_result(True)
110
+
111
+ async for message in websocket:
112
+ try:
113
+ parsed_message = event_data_adapter.validate_json(message)
114
+
115
+ if isinstance(parsed_message, DeprecatedEvent):
116
+ continue
117
+
118
+ if isinstance(parsed_message, UnknownEvent):
119
+ logger.info(
120
+ f'Unknown message received: event_name={parsed_message.name}, '
121
+ f'event_data={parsed_message.data}'
122
+ )
123
+ continue
124
+
125
+ self.emit(
126
+ event=parsed_message.name,
127
+ event_data=parsed_message.data
128
+ if not isinstance(parsed_message.data, SystemInfoEventData)
129
+ else parsed_message.data.to_crawlee_format(self._configuration.dedicated_cpus or 1),
130
+ )
131
+
132
+ if parsed_message.name == Event.MIGRATING:
133
+ await self._emit_persist_state_event_rec_task.stop()
134
+ self.emit(event=Event.PERSIST_STATE, event_data=EventPersistStateData(is_migrating=True))
135
+ except Exception:
136
+ logger.exception('Cannot parse Actor event', extra={'message': message})
137
+ except Exception:
138
+ logger.exception('Error in websocket connection')
139
+ if self._connected_to_platform_websocket is not None:
140
+ self._connected_to_platform_websocket.set_result(False)