apify 1.7.1b1__py3-none-any.whl → 2.2.1__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/__init__.py +33 -4
- apify/_actor.py +1074 -0
- apify/_configuration.py +370 -0
- apify/_consts.py +10 -0
- apify/_crypto.py +31 -27
- apify/_models.py +117 -0
- apify/_platform_event_manager.py +231 -0
- apify/_proxy_configuration.py +320 -0
- apify/_utils.py +18 -484
- apify/apify_storage_client/__init__.py +3 -0
- apify/apify_storage_client/_apify_storage_client.py +68 -0
- apify/apify_storage_client/_dataset_client.py +190 -0
- apify/apify_storage_client/_dataset_collection_client.py +51 -0
- apify/apify_storage_client/_key_value_store_client.py +94 -0
- apify/apify_storage_client/_key_value_store_collection_client.py +51 -0
- apify/apify_storage_client/_request_queue_client.py +176 -0
- apify/apify_storage_client/_request_queue_collection_client.py +51 -0
- apify/apify_storage_client/py.typed +0 -0
- apify/log.py +22 -105
- apify/scrapy/__init__.py +11 -3
- apify/scrapy/middlewares/__init__.py +3 -1
- apify/scrapy/middlewares/apify_proxy.py +29 -27
- apify/scrapy/middlewares/py.typed +0 -0
- apify/scrapy/pipelines/__init__.py +3 -1
- apify/scrapy/pipelines/actor_dataset_push.py +6 -3
- apify/scrapy/pipelines/py.typed +0 -0
- apify/scrapy/py.typed +0 -0
- apify/scrapy/requests.py +60 -58
- apify/scrapy/scheduler.py +28 -19
- apify/scrapy/utils.py +10 -32
- apify/storages/__init__.py +4 -10
- apify/storages/_request_list.py +150 -0
- apify/storages/py.typed +0 -0
- apify-2.2.1.dist-info/METADATA +211 -0
- apify-2.2.1.dist-info/RECORD +38 -0
- {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/WHEEL +1 -2
- apify/_memory_storage/__init__.py +0 -3
- apify/_memory_storage/file_storage_utils.py +0 -71
- apify/_memory_storage/memory_storage_client.py +0 -219
- apify/_memory_storage/resource_clients/__init__.py +0 -19
- apify/_memory_storage/resource_clients/base_resource_client.py +0 -141
- apify/_memory_storage/resource_clients/base_resource_collection_client.py +0 -114
- apify/_memory_storage/resource_clients/dataset.py +0 -452
- apify/_memory_storage/resource_clients/dataset_collection.py +0 -48
- apify/_memory_storage/resource_clients/key_value_store.py +0 -533
- apify/_memory_storage/resource_clients/key_value_store_collection.py +0 -48
- apify/_memory_storage/resource_clients/request_queue.py +0 -466
- apify/_memory_storage/resource_clients/request_queue_collection.py +0 -48
- apify/actor.py +0 -1351
- apify/config.py +0 -127
- apify/consts.py +0 -67
- apify/event_manager.py +0 -236
- apify/proxy_configuration.py +0 -365
- apify/storages/base_storage.py +0 -181
- apify/storages/dataset.py +0 -494
- apify/storages/key_value_store.py +0 -257
- apify/storages/request_queue.py +0 -602
- apify/storages/storage_client_manager.py +0 -72
- apify-1.7.1b1.dist-info/METADATA +0 -149
- apify-1.7.1b1.dist-info/RECORD +0 -41
- apify-1.7.1b1.dist-info/top_level.txt +0 -1
- {apify-1.7.1b1.dist-info → apify-2.2.1.dist-info}/LICENSE +0 -0
apify/actor.py
DELETED
|
@@ -1,1351 +0,0 @@
|
|
|
1
|
-
from __future__ import annotations
|
|
2
|
-
|
|
3
|
-
import asyncio
|
|
4
|
-
import contextlib
|
|
5
|
-
import inspect
|
|
6
|
-
import os
|
|
7
|
-
import sys
|
|
8
|
-
from datetime import datetime, timezone
|
|
9
|
-
from typing import TYPE_CHECKING, Any, Awaitable, Callable, TypeVar, cast
|
|
10
|
-
|
|
11
|
-
from apify_client import ApifyClientAsync
|
|
12
|
-
from apify_shared.consts import ActorEnvVars, ActorEventTypes, ActorExitCodes, ApifyEnvVars, WebhookEventType
|
|
13
|
-
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
|
|
14
|
-
|
|
15
|
-
from apify._crypto import decrypt_input_secrets, load_private_key
|
|
16
|
-
from apify._utils import (
|
|
17
|
-
dualproperty,
|
|
18
|
-
fetch_and_parse_env_var,
|
|
19
|
-
get_cpu_usage_percent,
|
|
20
|
-
get_memory_usage_bytes,
|
|
21
|
-
get_system_info,
|
|
22
|
-
is_running_in_ipython,
|
|
23
|
-
run_func_at_interval_async,
|
|
24
|
-
wrap_internal,
|
|
25
|
-
)
|
|
26
|
-
from apify.config import Configuration
|
|
27
|
-
from apify.consts import EVENT_LISTENERS_TIMEOUT_SECS
|
|
28
|
-
from apify.event_manager import EventManager
|
|
29
|
-
from apify.log import logger
|
|
30
|
-
from apify.proxy_configuration import ProxyConfiguration
|
|
31
|
-
from apify.storages import Dataset, KeyValueStore, RequestQueue, StorageClientManager
|
|
32
|
-
|
|
33
|
-
if TYPE_CHECKING:
|
|
34
|
-
import logging
|
|
35
|
-
from types import TracebackType
|
|
36
|
-
|
|
37
|
-
from apify._memory_storage import MemoryStorageClient
|
|
38
|
-
|
|
39
|
-
T = TypeVar('T')
|
|
40
|
-
MainReturnType = TypeVar('MainReturnType')
|
|
41
|
-
|
|
42
|
-
# This metaclass is needed so you can do `async with Actor: ...` instead of `async with Actor() as a: ...`
|
|
43
|
-
# and have automatic `Actor.init()` and `Actor.exit()`
|
|
44
|
-
|
|
45
|
-
|
|
46
|
-
class _ActorContextManager(type):
|
|
47
|
-
@staticmethod
|
|
48
|
-
async def __aenter__() -> type[Actor]:
|
|
49
|
-
await Actor.init()
|
|
50
|
-
return Actor
|
|
51
|
-
|
|
52
|
-
@staticmethod
|
|
53
|
-
async def __aexit__(
|
|
54
|
-
_exc_type: type[BaseException] | None,
|
|
55
|
-
exc_value: BaseException | None,
|
|
56
|
-
_exc_traceback: TracebackType | None,
|
|
57
|
-
) -> None:
|
|
58
|
-
if not Actor._get_default_instance()._is_exiting:
|
|
59
|
-
if exc_value:
|
|
60
|
-
await Actor.fail(
|
|
61
|
-
exit_code=ActorExitCodes.ERROR_USER_FUNCTION_THREW.value,
|
|
62
|
-
exception=exc_value,
|
|
63
|
-
)
|
|
64
|
-
else:
|
|
65
|
-
await Actor.exit()
|
|
66
|
-
|
|
67
|
-
|
|
68
|
-
class Actor(metaclass=_ActorContextManager):
|
|
69
|
-
"""The main class of the SDK, through which all the actor operations should be done."""
|
|
70
|
-
|
|
71
|
-
_default_instance: Actor | None = None
|
|
72
|
-
_apify_client: ApifyClientAsync
|
|
73
|
-
_memory_storage_client: MemoryStorageClient
|
|
74
|
-
_config: Configuration
|
|
75
|
-
_event_manager: EventManager
|
|
76
|
-
_send_system_info_interval_task: asyncio.Task | None = None
|
|
77
|
-
_send_persist_state_interval_task: asyncio.Task | None = None
|
|
78
|
-
_is_exiting = False
|
|
79
|
-
_was_final_persist_state_emitted = False
|
|
80
|
-
|
|
81
|
-
def __init__(self: Actor, config: Configuration | None = None) -> None:
|
|
82
|
-
"""Create an Actor instance.
|
|
83
|
-
|
|
84
|
-
Note that you don't have to do this, all the methods on this class function as classmethods too,
|
|
85
|
-
and that is their preferred usage.
|
|
86
|
-
|
|
87
|
-
Args:
|
|
88
|
-
config (Configuration, optional): The actor configuration to be used. If not passed, a new Configuration instance will be created.
|
|
89
|
-
"""
|
|
90
|
-
# To have methods which work the same as classmethods and instance methods,
|
|
91
|
-
# so you can do both Actor.xxx() and Actor().xxx(),
|
|
92
|
-
# we need to have an `_xxx_internal` instance method which contains the actual implementation of the method,
|
|
93
|
-
# and then in the instance constructor overwrite the `xxx` classmethod with the `_xxx_internal` instance method,
|
|
94
|
-
# while copying the annotations, types and so on.
|
|
95
|
-
self.init = wrap_internal(self._init_internal, self.init) # type: ignore
|
|
96
|
-
self.exit = wrap_internal(self._exit_internal, self.exit) # type: ignore
|
|
97
|
-
self.fail = wrap_internal(self._fail_internal, self.fail) # type: ignore
|
|
98
|
-
self.main = wrap_internal(self._main_internal, self.main) # type: ignore
|
|
99
|
-
self.new_client = wrap_internal(self._new_client_internal, self.new_client) # type: ignore
|
|
100
|
-
|
|
101
|
-
self.open_dataset = wrap_internal(self._open_dataset_internal, self.open_dataset) # type: ignore
|
|
102
|
-
self.open_key_value_store = wrap_internal(self._open_key_value_store_internal, self.open_key_value_store) # type: ignore
|
|
103
|
-
self.open_request_queue = wrap_internal(self._open_request_queue_internal, self.open_request_queue) # type: ignore
|
|
104
|
-
self.push_data = wrap_internal(self._push_data_internal, self.push_data) # type: ignore
|
|
105
|
-
self.get_input = wrap_internal(self._get_input_internal, self.get_input) # type: ignore
|
|
106
|
-
self.get_value = wrap_internal(self._get_value_internal, self.get_value) # type: ignore
|
|
107
|
-
self.set_value = wrap_internal(self._set_value_internal, self.set_value) # type: ignore
|
|
108
|
-
|
|
109
|
-
self.on = wrap_internal(self._on_internal, self.on) # type: ignore
|
|
110
|
-
self.off = wrap_internal(self._off_internal, self.off) # type: ignore
|
|
111
|
-
|
|
112
|
-
self.is_at_home = wrap_internal(self._is_at_home_internal, self.is_at_home) # type: ignore
|
|
113
|
-
self.get_env = wrap_internal(self._get_env_internal, self.get_env) # type: ignore
|
|
114
|
-
|
|
115
|
-
self.start = wrap_internal(self._start_internal, self.start) # type: ignore
|
|
116
|
-
self.call = wrap_internal(self._call_internal, self.call) # type: ignore
|
|
117
|
-
self.call_task = wrap_internal(self._call_task_internal, self.call_task) # type: ignore
|
|
118
|
-
self.abort = wrap_internal(self._abort_internal, self.abort) # type: ignore
|
|
119
|
-
self.metamorph = wrap_internal(self._metamorph_internal, self.metamorph) # type: ignore
|
|
120
|
-
self.reboot = wrap_internal(self._reboot_internal, self.reboot) # type: ignore
|
|
121
|
-
self.add_webhook = wrap_internal(self._add_webhook_internal, self.add_webhook) # type: ignore
|
|
122
|
-
self.set_status_message = wrap_internal(self._set_status_message_internal, self.set_status_message) # type: ignore
|
|
123
|
-
self.create_proxy_configuration = wrap_internal(self._create_proxy_configuration_internal, self.create_proxy_configuration) # type: ignore
|
|
124
|
-
|
|
125
|
-
self._config: Configuration = config or Configuration()
|
|
126
|
-
self._apify_client = self.new_client()
|
|
127
|
-
self._event_manager = EventManager(config=self._config)
|
|
128
|
-
|
|
129
|
-
self._is_initialized = False
|
|
130
|
-
|
|
131
|
-
@ignore_docs
|
|
132
|
-
async def __aenter__(self: Actor) -> Actor:
|
|
133
|
-
"""Initialize the Actor.
|
|
134
|
-
|
|
135
|
-
Automatically initializes the Actor instance when you use it in an `async with ...` statement.
|
|
136
|
-
|
|
137
|
-
When you exit the `async with` block, the `Actor.exit()` method is called,
|
|
138
|
-
and if any exception happens while executing the block code,
|
|
139
|
-
the `Actor.fail` method is called.
|
|
140
|
-
"""
|
|
141
|
-
await self.init()
|
|
142
|
-
return self
|
|
143
|
-
|
|
144
|
-
@ignore_docs
|
|
145
|
-
async def __aexit__(
|
|
146
|
-
self: Actor,
|
|
147
|
-
_exc_type: type[BaseException] | None,
|
|
148
|
-
exc_value: BaseException | None,
|
|
149
|
-
_exc_traceback: TracebackType | None,
|
|
150
|
-
) -> None:
|
|
151
|
-
"""Exit the Actor, handling any exceptions properly.
|
|
152
|
-
|
|
153
|
-
When you exit the `async with` block, the `Actor.exit()` method is called,
|
|
154
|
-
and if any exception happens while executing the block code,
|
|
155
|
-
the `Actor.fail` method is called.
|
|
156
|
-
"""
|
|
157
|
-
if not self._is_exiting:
|
|
158
|
-
if exc_value:
|
|
159
|
-
await self.fail(
|
|
160
|
-
exit_code=ActorExitCodes.ERROR_USER_FUNCTION_THREW.value,
|
|
161
|
-
exception=exc_value,
|
|
162
|
-
)
|
|
163
|
-
else:
|
|
164
|
-
await self.exit()
|
|
165
|
-
|
|
166
|
-
@classmethod
|
|
167
|
-
def _get_default_instance(cls: type[Actor]) -> Actor:
|
|
168
|
-
if not cls._default_instance:
|
|
169
|
-
cls._default_instance = cls(config=Configuration.get_global_configuration())
|
|
170
|
-
|
|
171
|
-
return cls._default_instance
|
|
172
|
-
|
|
173
|
-
@dualproperty
|
|
174
|
-
def apify_client(self_or_cls: type[Actor] | Actor) -> ApifyClientAsync: # noqa: N805
|
|
175
|
-
"""The ApifyClientAsync instance the Actor instance uses."""
|
|
176
|
-
if isinstance(self_or_cls, type):
|
|
177
|
-
return self_or_cls._get_default_instance()._apify_client
|
|
178
|
-
return self_or_cls._apify_client
|
|
179
|
-
|
|
180
|
-
@dualproperty
|
|
181
|
-
def config(self_or_cls: type[Actor] | Actor) -> Configuration: # noqa: N805
|
|
182
|
-
"""The Configuration instance the Actor instance uses."""
|
|
183
|
-
if isinstance(self_or_cls, type):
|
|
184
|
-
return self_or_cls._get_default_instance()._config
|
|
185
|
-
return self_or_cls._config
|
|
186
|
-
|
|
187
|
-
@dualproperty
|
|
188
|
-
def event_manager(self_or_cls: type[Actor] | Actor) -> EventManager: # noqa: N805
|
|
189
|
-
"""The EventManager instance the Actor instance uses."""
|
|
190
|
-
if isinstance(self_or_cls, type):
|
|
191
|
-
return self_or_cls._get_default_instance()._event_manager
|
|
192
|
-
|
|
193
|
-
return self_or_cls._event_manager
|
|
194
|
-
|
|
195
|
-
@dualproperty
|
|
196
|
-
def log(_self_or_cls: type[Actor] | Actor) -> logging.Logger: # noqa: N805
|
|
197
|
-
"""The logging.Logger instance the Actor uses."""
|
|
198
|
-
return logger
|
|
199
|
-
|
|
200
|
-
def _raise_if_not_initialized(self: Actor) -> None:
|
|
201
|
-
if not self._is_initialized:
|
|
202
|
-
raise RuntimeError('The actor was not initialized!')
|
|
203
|
-
|
|
204
|
-
@classmethod
|
|
205
|
-
async def init(cls: type[Actor]) -> None:
|
|
206
|
-
"""Initialize the actor instance.
|
|
207
|
-
|
|
208
|
-
This initializes the Actor instance.
|
|
209
|
-
It configures the right storage client based on whether the actor is running locally or on the Apify platform,
|
|
210
|
-
it initializes the event manager for processing actor events,
|
|
211
|
-
and starts an interval for regularly sending `PERSIST_STATE` events,
|
|
212
|
-
so that the actor can regularly persist its state in response to these events.
|
|
213
|
-
|
|
214
|
-
This method should be called immediately before performing any additional actor actions,
|
|
215
|
-
and it should be called only once.
|
|
216
|
-
"""
|
|
217
|
-
return await cls._get_default_instance().init()
|
|
218
|
-
|
|
219
|
-
async def _init_internal(self: Actor) -> None:
|
|
220
|
-
if self._is_initialized:
|
|
221
|
-
raise RuntimeError('The actor was already initialized!')
|
|
222
|
-
|
|
223
|
-
self._is_exiting = False
|
|
224
|
-
self._was_final_persist_state_emitted = False
|
|
225
|
-
|
|
226
|
-
self.log.info('Initializing actor...')
|
|
227
|
-
self.log.info('System info', extra=get_system_info())
|
|
228
|
-
|
|
229
|
-
# TODO: Print outdated SDK version warning (we need a new env var for this)
|
|
230
|
-
# https://github.com/apify/apify-sdk-python/issues/146
|
|
231
|
-
|
|
232
|
-
StorageClientManager.set_config(self._config)
|
|
233
|
-
if self._config.token:
|
|
234
|
-
StorageClientManager.set_cloud_client(self._apify_client)
|
|
235
|
-
|
|
236
|
-
await self._event_manager.init()
|
|
237
|
-
|
|
238
|
-
self._send_persist_state_interval_task = asyncio.create_task(
|
|
239
|
-
run_func_at_interval_async(
|
|
240
|
-
lambda: self._event_manager.emit(ActorEventTypes.PERSIST_STATE, {'isMigrating': False}),
|
|
241
|
-
self._config.persist_state_interval_millis / 1000,
|
|
242
|
-
),
|
|
243
|
-
)
|
|
244
|
-
|
|
245
|
-
if not self.is_at_home():
|
|
246
|
-
self._send_system_info_interval_task = asyncio.create_task(
|
|
247
|
-
run_func_at_interval_async(
|
|
248
|
-
lambda: self._event_manager.emit(ActorEventTypes.SYSTEM_INFO, self.get_system_info()),
|
|
249
|
-
self._config.system_info_interval_millis / 1000,
|
|
250
|
-
),
|
|
251
|
-
)
|
|
252
|
-
|
|
253
|
-
self._event_manager.on(ActorEventTypes.MIGRATING, self._respond_to_migrating_event)
|
|
254
|
-
|
|
255
|
-
# The CPU usage is calculated as an average between two last calls to psutil
|
|
256
|
-
# We need to make a first, dummy call, so the next calls have something to compare itself agains
|
|
257
|
-
get_cpu_usage_percent()
|
|
258
|
-
|
|
259
|
-
self._is_initialized = True
|
|
260
|
-
|
|
261
|
-
def get_system_info(self: Actor) -> dict:
|
|
262
|
-
"""Get the current system info."""
|
|
263
|
-
cpu_usage_percent = get_cpu_usage_percent()
|
|
264
|
-
memory_usage_bytes = get_memory_usage_bytes()
|
|
265
|
-
# This is in camel case to be compatible with the events from the platform
|
|
266
|
-
result = {
|
|
267
|
-
'createdAt': datetime.now(timezone.utc),
|
|
268
|
-
'cpuCurrentUsage': cpu_usage_percent,
|
|
269
|
-
'memCurrentBytes': memory_usage_bytes,
|
|
270
|
-
}
|
|
271
|
-
if self._config.max_used_cpu_ratio:
|
|
272
|
-
result['isCpuOverloaded'] = cpu_usage_percent > 100 * self._config.max_used_cpu_ratio
|
|
273
|
-
|
|
274
|
-
return result
|
|
275
|
-
|
|
276
|
-
async def _respond_to_migrating_event(self: Actor, _event_data: Any) -> None:
|
|
277
|
-
# Don't emit any more regular persist state events
|
|
278
|
-
if self._send_persist_state_interval_task and not self._send_persist_state_interval_task.cancelled():
|
|
279
|
-
self._send_persist_state_interval_task.cancel()
|
|
280
|
-
with contextlib.suppress(asyncio.CancelledError):
|
|
281
|
-
await self._send_persist_state_interval_task
|
|
282
|
-
|
|
283
|
-
self._event_manager.emit(ActorEventTypes.PERSIST_STATE, {'isMigrating': True})
|
|
284
|
-
self._was_final_persist_state_emitted = True
|
|
285
|
-
|
|
286
|
-
async def _cancel_event_emitting_intervals(self: Actor) -> None:
|
|
287
|
-
if self._send_persist_state_interval_task and not self._send_persist_state_interval_task.cancelled():
|
|
288
|
-
self._send_persist_state_interval_task.cancel()
|
|
289
|
-
with contextlib.suppress(asyncio.CancelledError):
|
|
290
|
-
await self._send_persist_state_interval_task
|
|
291
|
-
|
|
292
|
-
if self._send_system_info_interval_task and not self._send_system_info_interval_task.cancelled():
|
|
293
|
-
self._send_system_info_interval_task.cancel()
|
|
294
|
-
with contextlib.suppress(asyncio.CancelledError):
|
|
295
|
-
await self._send_system_info_interval_task
|
|
296
|
-
|
|
297
|
-
@classmethod
|
|
298
|
-
async def exit(
|
|
299
|
-
cls: type[Actor],
|
|
300
|
-
*,
|
|
301
|
-
exit_code: int = 0,
|
|
302
|
-
event_listeners_timeout_secs: float | None = EVENT_LISTENERS_TIMEOUT_SECS,
|
|
303
|
-
status_message: str | None = None,
|
|
304
|
-
) -> None:
|
|
305
|
-
"""Exit the actor instance.
|
|
306
|
-
|
|
307
|
-
This stops the Actor instance.
|
|
308
|
-
It cancels all the intervals for regularly sending `PERSIST_STATE` events,
|
|
309
|
-
sends a final `PERSIST_STATE` event,
|
|
310
|
-
waits for all the event listeners to finish,
|
|
311
|
-
and stops the event manager.
|
|
312
|
-
|
|
313
|
-
Args:
|
|
314
|
-
exit_code (int, optional): The exit code with which the actor should fail (defaults to `0`).
|
|
315
|
-
event_listeners_timeout_secs (float, optional): How long should the actor wait for actor event listeners to finish before exiting.
|
|
316
|
-
status_message (str, optional): The final status message that the actor should display.
|
|
317
|
-
"""
|
|
318
|
-
return await cls._get_default_instance().exit(
|
|
319
|
-
exit_code=exit_code,
|
|
320
|
-
event_listeners_timeout_secs=event_listeners_timeout_secs,
|
|
321
|
-
status_message=status_message,
|
|
322
|
-
)
|
|
323
|
-
|
|
324
|
-
async def _exit_internal(
|
|
325
|
-
self: Actor,
|
|
326
|
-
*,
|
|
327
|
-
exit_code: int = 0,
|
|
328
|
-
event_listeners_timeout_secs: float | None = EVENT_LISTENERS_TIMEOUT_SECS,
|
|
329
|
-
status_message: str | None = None,
|
|
330
|
-
) -> None:
|
|
331
|
-
self._raise_if_not_initialized()
|
|
332
|
-
|
|
333
|
-
self._is_exiting = True
|
|
334
|
-
|
|
335
|
-
exit_code = maybe_extract_enum_member_value(exit_code)
|
|
336
|
-
|
|
337
|
-
self.log.info('Exiting actor', extra={'exit_code': exit_code})
|
|
338
|
-
|
|
339
|
-
await self._cancel_event_emitting_intervals()
|
|
340
|
-
|
|
341
|
-
# Send final persist state event
|
|
342
|
-
if not self._was_final_persist_state_emitted:
|
|
343
|
-
self._event_manager.emit(ActorEventTypes.PERSIST_STATE, {'isMigrating': False})
|
|
344
|
-
self._was_final_persist_state_emitted = True
|
|
345
|
-
|
|
346
|
-
if status_message is not None:
|
|
347
|
-
await self.set_status_message(status_message, is_terminal=True)
|
|
348
|
-
|
|
349
|
-
# Sleep for a bit so that the listeners have a chance to trigger
|
|
350
|
-
await asyncio.sleep(0.1)
|
|
351
|
-
|
|
352
|
-
await self._event_manager.close(event_listeners_timeout_secs=event_listeners_timeout_secs)
|
|
353
|
-
|
|
354
|
-
self._is_initialized = False
|
|
355
|
-
|
|
356
|
-
if is_running_in_ipython():
|
|
357
|
-
self.log.debug(f'Not calling sys.exit({exit_code}) because actor is running in IPython')
|
|
358
|
-
elif os.getenv('PYTEST_CURRENT_TEST', default=False): # noqa: PLW1508
|
|
359
|
-
self.log.debug(f'Not calling sys.exit({exit_code}) because actor is running in an unit test')
|
|
360
|
-
elif hasattr(asyncio, '_nest_patched'):
|
|
361
|
-
self.log.debug(f'Not calling sys.exit({exit_code}) because actor is running in a nested event loop')
|
|
362
|
-
else:
|
|
363
|
-
sys.exit(exit_code)
|
|
364
|
-
|
|
365
|
-
@classmethod
|
|
366
|
-
async def fail(
|
|
367
|
-
cls: type[Actor],
|
|
368
|
-
*,
|
|
369
|
-
exit_code: int = 1,
|
|
370
|
-
exception: BaseException | None = None,
|
|
371
|
-
status_message: str | None = None,
|
|
372
|
-
) -> None:
|
|
373
|
-
"""Fail the actor instance.
|
|
374
|
-
|
|
375
|
-
This performs all the same steps as Actor.exit(),
|
|
376
|
-
but it additionally sets the exit code to `1` (by default).
|
|
377
|
-
|
|
378
|
-
Args:
|
|
379
|
-
exit_code (int, optional): The exit code with which the actor should fail (defaults to `1`).
|
|
380
|
-
exception (BaseException, optional): The exception with which the actor failed.
|
|
381
|
-
status_message (str, optional): The final status message that the actor should display.
|
|
382
|
-
"""
|
|
383
|
-
return await cls._get_default_instance().fail(
|
|
384
|
-
exit_code=exit_code,
|
|
385
|
-
exception=exception,
|
|
386
|
-
status_message=status_message,
|
|
387
|
-
)
|
|
388
|
-
|
|
389
|
-
async def _fail_internal(
|
|
390
|
-
self: Actor,
|
|
391
|
-
*,
|
|
392
|
-
exit_code: int = 1,
|
|
393
|
-
exception: BaseException | None = None,
|
|
394
|
-
status_message: str | None = None,
|
|
395
|
-
) -> None:
|
|
396
|
-
self._raise_if_not_initialized()
|
|
397
|
-
|
|
398
|
-
# In IPython, we don't run `sys.exit()` during actor exits,
|
|
399
|
-
# so the exception traceback will be printed on its own
|
|
400
|
-
if exception and not is_running_in_ipython():
|
|
401
|
-
self.log.exception('Actor failed with an exception', exc_info=exception)
|
|
402
|
-
|
|
403
|
-
await self.exit(exit_code=exit_code, status_message=status_message)
|
|
404
|
-
|
|
405
|
-
@classmethod
|
|
406
|
-
async def main(cls: type[Actor], main_actor_function: Callable[[], MainReturnType]) -> MainReturnType | None:
|
|
407
|
-
"""Initialize the actor, run the passed function and finish the actor cleanly.
|
|
408
|
-
|
|
409
|
-
**The `Actor.main()` function is optional** and is provided merely for your convenience.
|
|
410
|
-
It is mainly useful when you're running your code as an actor on the [Apify platform](https://apify.com/actors).
|
|
411
|
-
|
|
412
|
-
The `Actor.main()` function performs the following actions:
|
|
413
|
-
|
|
414
|
-
- When running on the Apify platform (i.e. `APIFY_IS_AT_HOME` environment variable is set),
|
|
415
|
-
it sets up a connection to listen for platform events.
|
|
416
|
-
For example, to get a notification about an imminent migration to another server.
|
|
417
|
-
- It invokes the user function passed as the `main_actor_function` parameter.
|
|
418
|
-
- If the user function was an async function, it awaits it.
|
|
419
|
-
- If the user function throws an exception or some other error is encountered,
|
|
420
|
-
it prints error details to console so that they are stored to the log,
|
|
421
|
-
and finishes the actor cleanly.
|
|
422
|
-
- Finally, it exits the Python process, with zero exit code on success and non-zero on errors.
|
|
423
|
-
|
|
424
|
-
Args:
|
|
425
|
-
main_actor_function (Callable): The user function which should be run in the actor
|
|
426
|
-
"""
|
|
427
|
-
return await cls._get_default_instance().main(
|
|
428
|
-
main_actor_function=main_actor_function,
|
|
429
|
-
)
|
|
430
|
-
|
|
431
|
-
async def _main_internal(self: Actor, main_actor_function: Callable[[], MainReturnType]) -> MainReturnType | None:
|
|
432
|
-
if not inspect.isfunction(main_actor_function):
|
|
433
|
-
raise TypeError(f'First argument passed to Actor.main() must be a function, but instead it was {type(main_actor_function)}')
|
|
434
|
-
|
|
435
|
-
await self.init()
|
|
436
|
-
try:
|
|
437
|
-
if inspect.iscoroutinefunction(main_actor_function):
|
|
438
|
-
res = await main_actor_function()
|
|
439
|
-
else:
|
|
440
|
-
res = main_actor_function()
|
|
441
|
-
await self.exit()
|
|
442
|
-
return cast(MainReturnType, res)
|
|
443
|
-
except Exception as exc:
|
|
444
|
-
await self.fail(
|
|
445
|
-
exit_code=ActorExitCodes.ERROR_USER_FUNCTION_THREW.value,
|
|
446
|
-
exception=exc,
|
|
447
|
-
)
|
|
448
|
-
return None
|
|
449
|
-
|
|
450
|
-
@classmethod
|
|
451
|
-
def new_client(
|
|
452
|
-
cls: type[Actor],
|
|
453
|
-
*,
|
|
454
|
-
token: str | None = None,
|
|
455
|
-
api_url: str | None = None,
|
|
456
|
-
max_retries: int | None = None,
|
|
457
|
-
min_delay_between_retries_millis: int | None = None,
|
|
458
|
-
timeout_secs: int | None = None,
|
|
459
|
-
) -> ApifyClientAsync:
|
|
460
|
-
"""Return a new instance of the Apify API client.
|
|
461
|
-
|
|
462
|
-
The `ApifyClientAsync` class is provided by the [apify-client](https://github.com/apify/apify-client-python) package,
|
|
463
|
-
and it is automatically configured using the `APIFY_API_BASE_URL` and `APIFY_TOKEN` environment variables.
|
|
464
|
-
|
|
465
|
-
You can override the token via the available options.
|
|
466
|
-
That's useful if you want to use the client as a different Apify user than the SDK internals are using.
|
|
467
|
-
|
|
468
|
-
Args:
|
|
469
|
-
token (str, optional): The Apify API token
|
|
470
|
-
api_url (str, optional): The URL of the Apify API server to which to connect to. Defaults to https://api.apify.com
|
|
471
|
-
max_retries (int, optional): How many times to retry a failed request at most
|
|
472
|
-
min_delay_between_retries_millis (int, optional): How long will the client wait between retrying requests
|
|
473
|
-
(increases exponentially from this value)
|
|
474
|
-
timeout_secs (int, optional): The socket timeout of the HTTP requests sent to the Apify API
|
|
475
|
-
"""
|
|
476
|
-
return cls._get_default_instance().new_client(
|
|
477
|
-
token=token,
|
|
478
|
-
api_url=api_url,
|
|
479
|
-
max_retries=max_retries,
|
|
480
|
-
min_delay_between_retries_millis=min_delay_between_retries_millis,
|
|
481
|
-
timeout_secs=timeout_secs,
|
|
482
|
-
)
|
|
483
|
-
|
|
484
|
-
def _new_client_internal(
|
|
485
|
-
self: Actor,
|
|
486
|
-
*,
|
|
487
|
-
token: str | None = None,
|
|
488
|
-
api_url: str | None = None,
|
|
489
|
-
max_retries: int | None = None,
|
|
490
|
-
min_delay_between_retries_millis: int | None = None,
|
|
491
|
-
timeout_secs: int | None = None,
|
|
492
|
-
) -> ApifyClientAsync:
|
|
493
|
-
token = token or self._config.token
|
|
494
|
-
api_url = api_url or self._config.api_base_url
|
|
495
|
-
return ApifyClientAsync(
|
|
496
|
-
token=token,
|
|
497
|
-
api_url=api_url,
|
|
498
|
-
max_retries=max_retries,
|
|
499
|
-
min_delay_between_retries_millis=min_delay_between_retries_millis,
|
|
500
|
-
timeout_secs=timeout_secs,
|
|
501
|
-
)
|
|
502
|
-
|
|
503
|
-
def _get_storage_client(self: Actor, force_cloud: bool) -> ApifyClientAsync | None: # noqa: FBT001
|
|
504
|
-
return self._apify_client if force_cloud else None
|
|
505
|
-
|
|
506
|
-
@classmethod
|
|
507
|
-
async def open_dataset(
|
|
508
|
-
cls: type[Actor],
|
|
509
|
-
*,
|
|
510
|
-
id: str | None = None, # noqa: A002
|
|
511
|
-
name: str | None = None,
|
|
512
|
-
force_cloud: bool = False,
|
|
513
|
-
) -> Dataset:
|
|
514
|
-
"""Open a dataset.
|
|
515
|
-
|
|
516
|
-
Datasets are used to store structured data where each object stored has the same attributes,
|
|
517
|
-
such as online store products or real estate offers.
|
|
518
|
-
The actual data is stored either on the local filesystem or in the Apify cloud.
|
|
519
|
-
|
|
520
|
-
Args:
|
|
521
|
-
id (str, optional): ID of the dataset to be opened.
|
|
522
|
-
If neither `id` nor `name` are provided, the method returns the default dataset associated with the actor run.
|
|
523
|
-
name (str, optional): Name of the dataset to be opened.
|
|
524
|
-
If neither `id` nor `name` are provided, the method returns the default dataset associated with the actor run.
|
|
525
|
-
force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used.
|
|
526
|
-
This way it is possible to combine local and cloud storage.
|
|
527
|
-
|
|
528
|
-
Returns:
|
|
529
|
-
Dataset: An instance of the `Dataset` class for the given ID or name.
|
|
530
|
-
|
|
531
|
-
"""
|
|
532
|
-
return await cls._get_default_instance().open_dataset(id=id, name=name, force_cloud=force_cloud)
|
|
533
|
-
|
|
534
|
-
async def _open_dataset_internal(
|
|
535
|
-
self: Actor,
|
|
536
|
-
*,
|
|
537
|
-
id: str | None = None, # noqa: A002
|
|
538
|
-
name: str | None = None,
|
|
539
|
-
force_cloud: bool = False,
|
|
540
|
-
) -> Dataset:
|
|
541
|
-
self._raise_if_not_initialized()
|
|
542
|
-
|
|
543
|
-
return await Dataset.open(id=id, name=name, force_cloud=force_cloud, config=self._config)
|
|
544
|
-
|
|
545
|
-
@classmethod
|
|
546
|
-
async def open_key_value_store(
|
|
547
|
-
cls: type[Actor],
|
|
548
|
-
*,
|
|
549
|
-
id: str | None = None, # noqa: A002
|
|
550
|
-
name: str | None = None,
|
|
551
|
-
force_cloud: bool = False,
|
|
552
|
-
) -> KeyValueStore:
|
|
553
|
-
"""Open a key-value store.
|
|
554
|
-
|
|
555
|
-
Key-value stores are used to store records or files, along with their MIME content type.
|
|
556
|
-
The records are stored and retrieved using a unique key.
|
|
557
|
-
The actual data is stored either on a local filesystem or in the Apify cloud.
|
|
558
|
-
|
|
559
|
-
Args:
|
|
560
|
-
id (str, optional): ID of the key-value store to be opened.
|
|
561
|
-
If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
|
|
562
|
-
name (str, optional): Name of the key-value store to be opened.
|
|
563
|
-
If neither `id` nor `name` are provided, the method returns the default key-value store associated with the actor run.
|
|
564
|
-
force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used.
|
|
565
|
-
This way it is possible to combine local and cloud storage.
|
|
566
|
-
|
|
567
|
-
Returns:
|
|
568
|
-
KeyValueStore: An instance of the `KeyValueStore` class for the given ID or name.
|
|
569
|
-
"""
|
|
570
|
-
return await cls._get_default_instance().open_key_value_store(id=id, name=name, force_cloud=force_cloud)
|
|
571
|
-
|
|
572
|
-
async def _open_key_value_store_internal(
|
|
573
|
-
self: Actor,
|
|
574
|
-
*,
|
|
575
|
-
id: str | None = None, # noqa: A002
|
|
576
|
-
name: str | None = None,
|
|
577
|
-
force_cloud: bool = False,
|
|
578
|
-
) -> KeyValueStore:
|
|
579
|
-
self._raise_if_not_initialized()
|
|
580
|
-
|
|
581
|
-
return await KeyValueStore.open(id=id, name=name, force_cloud=force_cloud, config=self._config)
|
|
582
|
-
|
|
583
|
-
@classmethod
|
|
584
|
-
async def open_request_queue(
|
|
585
|
-
cls: type[Actor],
|
|
586
|
-
*,
|
|
587
|
-
id: str | None = None, # noqa: A002
|
|
588
|
-
name: str | None = None,
|
|
589
|
-
force_cloud: bool = False,
|
|
590
|
-
) -> RequestQueue:
|
|
591
|
-
"""Open a request queue.
|
|
592
|
-
|
|
593
|
-
Request queue represents a queue of URLs to crawl, which is stored either on local filesystem or in the Apify cloud.
|
|
594
|
-
The queue is used for deep crawling of websites, where you start with several URLs and then
|
|
595
|
-
recursively follow links to other pages. The data structure supports both breadth-first
|
|
596
|
-
and depth-first crawling orders.
|
|
597
|
-
|
|
598
|
-
Args:
|
|
599
|
-
id (str, optional): ID of the request queue to be opened.
|
|
600
|
-
If neither `id` nor `name` are provided, the method returns the default request queue associated with the actor run.
|
|
601
|
-
name (str, optional): Name of the request queue to be opened.
|
|
602
|
-
If neither `id` nor `name` are provided, the method returns the default request queue associated with the actor run.
|
|
603
|
-
force_cloud (bool, optional): If set to `True` then the Apify cloud storage is always used.
|
|
604
|
-
This way it is possible to combine local and cloud storage.
|
|
605
|
-
|
|
606
|
-
Returns:
|
|
607
|
-
RequestQueue: An instance of the `RequestQueue` class for the given ID or name.
|
|
608
|
-
"""
|
|
609
|
-
return await cls._get_default_instance().open_request_queue(id=id, name=name, force_cloud=force_cloud)
|
|
610
|
-
|
|
611
|
-
async def _open_request_queue_internal(
|
|
612
|
-
self: Actor,
|
|
613
|
-
*,
|
|
614
|
-
id: str | None = None, # noqa: A002
|
|
615
|
-
name: str | None = None,
|
|
616
|
-
force_cloud: bool = False,
|
|
617
|
-
) -> RequestQueue:
|
|
618
|
-
self._raise_if_not_initialized()
|
|
619
|
-
|
|
620
|
-
return await RequestQueue.open(id=id, name=name, force_cloud=force_cloud, config=self._config)
|
|
621
|
-
|
|
622
|
-
@classmethod
|
|
623
|
-
async def push_data(cls: type[Actor], data: Any) -> None:
|
|
624
|
-
"""Store an object or a list of objects to the default dataset of the current actor run.
|
|
625
|
-
|
|
626
|
-
Args:
|
|
627
|
-
data (object or list of objects, optional): The data to push to the default dataset.
|
|
628
|
-
"""
|
|
629
|
-
return await cls._get_default_instance().push_data(data=data)
|
|
630
|
-
|
|
631
|
-
async def _push_data_internal(self: Actor, data: Any) -> None:
|
|
632
|
-
self._raise_if_not_initialized()
|
|
633
|
-
|
|
634
|
-
if not data:
|
|
635
|
-
return
|
|
636
|
-
|
|
637
|
-
dataset = await self.open_dataset()
|
|
638
|
-
await dataset.push_data(data)
|
|
639
|
-
|
|
640
|
-
@classmethod
|
|
641
|
-
async def get_input(cls: type[Actor]) -> Any:
|
|
642
|
-
"""Get the actor input value from the default key-value store associated with the current actor run."""
|
|
643
|
-
return await cls._get_default_instance().get_input()
|
|
644
|
-
|
|
645
|
-
async def _get_input_internal(self: Actor) -> Any:
|
|
646
|
-
self._raise_if_not_initialized()
|
|
647
|
-
|
|
648
|
-
input_value = await self.get_value(self._config.input_key)
|
|
649
|
-
input_secrets_private_key = self._config.input_secrets_private_key_file
|
|
650
|
-
input_secrets_key_passphrase = self._config.input_secrets_private_key_passphrase
|
|
651
|
-
if input_secrets_private_key and input_secrets_key_passphrase:
|
|
652
|
-
private_key = load_private_key(
|
|
653
|
-
input_secrets_private_key,
|
|
654
|
-
input_secrets_key_passphrase,
|
|
655
|
-
)
|
|
656
|
-
input_value = decrypt_input_secrets(private_key, input_value)
|
|
657
|
-
|
|
658
|
-
return input_value
|
|
659
|
-
|
|
660
|
-
@classmethod
|
|
661
|
-
async def get_value(cls: type[Actor], key: str, default_value: Any = None) -> Any:
|
|
662
|
-
"""Get a value from the default key-value store associated with the current actor run.
|
|
663
|
-
|
|
664
|
-
Args:
|
|
665
|
-
key (str): The key of the record which to retrieve.
|
|
666
|
-
default_value (Any, optional): Default value returned in case the record does not exist.
|
|
667
|
-
"""
|
|
668
|
-
return await cls._get_default_instance().get_value(key=key, default_value=default_value)
|
|
669
|
-
|
|
670
|
-
async def _get_value_internal(self: Actor, key: str, default_value: Any = None) -> Any:
|
|
671
|
-
self._raise_if_not_initialized()
|
|
672
|
-
|
|
673
|
-
key_value_store = await self.open_key_value_store()
|
|
674
|
-
return await key_value_store.get_value(key, default_value)
|
|
675
|
-
|
|
676
|
-
@classmethod
|
|
677
|
-
async def set_value(
|
|
678
|
-
cls: type[Actor],
|
|
679
|
-
key: str,
|
|
680
|
-
value: Any,
|
|
681
|
-
*,
|
|
682
|
-
content_type: str | None = None,
|
|
683
|
-
) -> None:
|
|
684
|
-
"""Set or delete a value in the default key-value store associated with the current actor run.
|
|
685
|
-
|
|
686
|
-
Args:
|
|
687
|
-
key (str): The key of the record which to set.
|
|
688
|
-
value (any): The value of the record which to set, or None, if the record should be deleted.
|
|
689
|
-
content_type (str, optional): The content type which should be set to the value.
|
|
690
|
-
"""
|
|
691
|
-
return await cls._get_default_instance().set_value(
|
|
692
|
-
key=key,
|
|
693
|
-
value=value,
|
|
694
|
-
content_type=content_type,
|
|
695
|
-
)
|
|
696
|
-
|
|
697
|
-
async def _set_value_internal(
|
|
698
|
-
self: Actor,
|
|
699
|
-
key: str,
|
|
700
|
-
value: Any,
|
|
701
|
-
*,
|
|
702
|
-
content_type: str | None = None,
|
|
703
|
-
) -> None:
|
|
704
|
-
self._raise_if_not_initialized()
|
|
705
|
-
|
|
706
|
-
key_value_store = await self.open_key_value_store()
|
|
707
|
-
return await key_value_store.set_value(key, value, content_type=content_type)
|
|
708
|
-
|
|
709
|
-
@classmethod
|
|
710
|
-
def on(cls: type[Actor], event_name: ActorEventTypes, listener: Callable) -> Callable:
|
|
711
|
-
"""Add an event listener to the actor's event manager.
|
|
712
|
-
|
|
713
|
-
The following events can be emitted:
|
|
714
|
-
- `ActorEventTypes.SYSTEM_INFO`:
|
|
715
|
-
Emitted every minute, the event data contains info about the resource usage of the actor.
|
|
716
|
-
- `ActorEventTypes.MIGRATING`:
|
|
717
|
-
Emitted when the actor running on the Apify platform is going to be migrated to another worker server soon.
|
|
718
|
-
You can use it to persist the state of the actor and gracefully stop your in-progress tasks,
|
|
719
|
-
so that they are not interrupted by the migration..
|
|
720
|
-
- `ActorEventTypes.PERSIST_STATE`:
|
|
721
|
-
Emitted in regular intervals (by default 60 seconds) to notify the actor that it should persist its state,
|
|
722
|
-
in order to avoid repeating all work when the actor restarts.
|
|
723
|
-
This event is automatically emitted together with the migrating event,
|
|
724
|
-
in which case the `isMigrating` flag in the event data is set to True, otherwise the flag is False.
|
|
725
|
-
Note that this event is provided merely for your convenience,
|
|
726
|
-
you can achieve the same effect using an interval and listening for the migrating event.
|
|
727
|
-
- `ActorEventTypes.ABORTING`:
|
|
728
|
-
When a user aborts an actor run on the Apify platform,
|
|
729
|
-
they can choose to abort it gracefully, to allow the actor some time before getting terminated.
|
|
730
|
-
This graceful abort emits the aborting event, which you can use to clean up the actor state.
|
|
731
|
-
|
|
732
|
-
Args:
|
|
733
|
-
event_name (ActorEventTypes): The actor event for which to listen to.
|
|
734
|
-
listener (Callable): The function which is to be called when the event is emitted (can be async).
|
|
735
|
-
"""
|
|
736
|
-
return cls._get_default_instance().on(event_name, listener)
|
|
737
|
-
|
|
738
|
-
def _on_internal(self: Actor, event_name: ActorEventTypes, listener: Callable) -> Callable:
|
|
739
|
-
self._raise_if_not_initialized()
|
|
740
|
-
|
|
741
|
-
return self._event_manager.on(event_name, listener)
|
|
742
|
-
|
|
743
|
-
@classmethod
|
|
744
|
-
def off(cls: type[Actor], event_name: ActorEventTypes, listener: Callable | None = None) -> None:
|
|
745
|
-
"""Remove a listener, or all listeners, from an actor event.
|
|
746
|
-
|
|
747
|
-
Args:
|
|
748
|
-
event_name (ActorEventTypes): The actor event for which to remove listeners.
|
|
749
|
-
listener (Callable, optional): The listener which is supposed to be removed. If not passed, all listeners of this event are removed.
|
|
750
|
-
"""
|
|
751
|
-
return cls._get_default_instance().off(event_name, listener)
|
|
752
|
-
|
|
753
|
-
def _off_internal(self: Actor, event_name: ActorEventTypes, listener: Callable | None = None) -> None:
|
|
754
|
-
self._raise_if_not_initialized()
|
|
755
|
-
|
|
756
|
-
return self._event_manager.off(event_name, listener)
|
|
757
|
-
|
|
758
|
-
@classmethod
|
|
759
|
-
def is_at_home(cls: type[Actor]) -> bool:
|
|
760
|
-
"""Return `True` when the actor is running on the Apify platform, and `False` otherwise (for example when running locally)."""
|
|
761
|
-
return cls._get_default_instance().is_at_home()
|
|
762
|
-
|
|
763
|
-
def _is_at_home_internal(self: Actor) -> bool:
|
|
764
|
-
return self._config.is_at_home
|
|
765
|
-
|
|
766
|
-
@classmethod
|
|
767
|
-
def get_env(cls: type[Actor]) -> dict:
|
|
768
|
-
"""Return a dictionary with information parsed from all the `APIFY_XXX` environment variables.
|
|
769
|
-
|
|
770
|
-
For a list of all the environment variables,
|
|
771
|
-
see the [Actor documentation](https://docs.apify.com/actors/development/environment-variables).
|
|
772
|
-
If some variables are not defined or are invalid, the corresponding value in the resulting dictionary will be None.
|
|
773
|
-
"""
|
|
774
|
-
return cls._get_default_instance().get_env()
|
|
775
|
-
|
|
776
|
-
def _get_env_internal(self: Actor) -> dict:
|
|
777
|
-
self._raise_if_not_initialized()
|
|
778
|
-
|
|
779
|
-
return {env_var.name.lower(): fetch_and_parse_env_var(env_var) for env_var in [*ActorEnvVars, *ApifyEnvVars]}
|
|
780
|
-
|
|
781
|
-
@classmethod
|
|
782
|
-
async def start(
|
|
783
|
-
cls: type[Actor],
|
|
784
|
-
actor_id: str,
|
|
785
|
-
run_input: Any = None,
|
|
786
|
-
*,
|
|
787
|
-
token: str | None = None,
|
|
788
|
-
content_type: str | None = None,
|
|
789
|
-
build: str | None = None,
|
|
790
|
-
memory_mbytes: int | None = None,
|
|
791
|
-
timeout_secs: int | None = None,
|
|
792
|
-
wait_for_finish: int | None = None,
|
|
793
|
-
webhooks: list[dict] | None = None,
|
|
794
|
-
) -> dict:
|
|
795
|
-
"""Run an actor on the Apify platform.
|
|
796
|
-
|
|
797
|
-
Unlike `Actor.call`, this method just starts the run without waiting for finish.
|
|
798
|
-
|
|
799
|
-
Args:
|
|
800
|
-
actor_id (str): The ID of the actor to be run.
|
|
801
|
-
run_input (Any, optional): The input to pass to the actor run.
|
|
802
|
-
token (str, optional): The Apify API token to use for this request (defaults to the `APIFY_TOKEN` environment variable).
|
|
803
|
-
content_type (str, optional): The content type of the input.
|
|
804
|
-
build (str, optional): Specifies the actor build to run. It can be either a build tag or build number.
|
|
805
|
-
By default, the run uses the build specified in the default run configuration for the actor (typically latest).
|
|
806
|
-
memory_mbytes (int, optional): Memory limit for the run, in megabytes.
|
|
807
|
-
By default, the run uses a memory limit specified in the default run configuration for the actor.
|
|
808
|
-
timeout_secs (int, optional): Optional timeout for the run, in seconds.
|
|
809
|
-
By default, the run uses timeout specified in the default run configuration for the actor.
|
|
810
|
-
wait_for_finish (int, optional): The maximum number of seconds the server waits for the run to finish.
|
|
811
|
-
By default, it is 0, the maximum value is 300.
|
|
812
|
-
webhooks (list of dict, optional): Optional ad-hoc webhooks (https://docs.apify.com/webhooks/ad-hoc-webhooks)
|
|
813
|
-
associated with the actor run which can be used to receive a notification,
|
|
814
|
-
e.g. when the actor finished or failed.
|
|
815
|
-
If you already have a webhook set up for the actor or task, you do not have to add it again here.
|
|
816
|
-
Each webhook is represented by a dictionary containing these items:
|
|
817
|
-
* ``event_types``: list of ``WebhookEventType`` values which trigger the webhook
|
|
818
|
-
* ``request_url``: URL to which to send the webhook HTTP request
|
|
819
|
-
* ``payload_template`` (optional): Optional template for the request payload
|
|
820
|
-
|
|
821
|
-
Returns:
|
|
822
|
-
dict: Info about the started actor run
|
|
823
|
-
"""
|
|
824
|
-
return await cls._get_default_instance().start(
|
|
825
|
-
actor_id=actor_id,
|
|
826
|
-
run_input=run_input,
|
|
827
|
-
token=token,
|
|
828
|
-
content_type=content_type,
|
|
829
|
-
build=build,
|
|
830
|
-
memory_mbytes=memory_mbytes,
|
|
831
|
-
timeout_secs=timeout_secs,
|
|
832
|
-
wait_for_finish=wait_for_finish,
|
|
833
|
-
webhooks=webhooks,
|
|
834
|
-
)
|
|
835
|
-
|
|
836
|
-
async def _start_internal(
|
|
837
|
-
self: Actor,
|
|
838
|
-
actor_id: str,
|
|
839
|
-
run_input: Any = None,
|
|
840
|
-
*,
|
|
841
|
-
token: str | None = None,
|
|
842
|
-
content_type: str | None = None,
|
|
843
|
-
build: str | None = None,
|
|
844
|
-
memory_mbytes: int | None = None,
|
|
845
|
-
timeout_secs: int | None = None,
|
|
846
|
-
wait_for_finish: int | None = None,
|
|
847
|
-
webhooks: list[dict] | None = None,
|
|
848
|
-
) -> dict:
|
|
849
|
-
self._raise_if_not_initialized()
|
|
850
|
-
|
|
851
|
-
client = self.new_client(token=token) if token else self._apify_client
|
|
852
|
-
|
|
853
|
-
return await client.actor(actor_id).start(
|
|
854
|
-
run_input=run_input,
|
|
855
|
-
content_type=content_type,
|
|
856
|
-
build=build,
|
|
857
|
-
memory_mbytes=memory_mbytes,
|
|
858
|
-
timeout_secs=timeout_secs,
|
|
859
|
-
wait_for_finish=wait_for_finish,
|
|
860
|
-
webhooks=webhooks,
|
|
861
|
-
)
|
|
862
|
-
|
|
863
|
-
@classmethod
|
|
864
|
-
async def abort(
|
|
865
|
-
cls: type[Actor],
|
|
866
|
-
run_id: str,
|
|
867
|
-
*,
|
|
868
|
-
token: str | None = None,
|
|
869
|
-
gracefully: bool | None = None,
|
|
870
|
-
) -> dict:
|
|
871
|
-
"""Abort given actor run on the Apify platform using the current user account (determined by the `APIFY_TOKEN` environment variable).
|
|
872
|
-
|
|
873
|
-
Args:
|
|
874
|
-
run_id (str): The ID of the actor run to be aborted.
|
|
875
|
-
token (str, optional): The Apify API token to use for this request (defaults to the `APIFY_TOKEN` environment variable).
|
|
876
|
-
gracefully (bool, optional): If True, the actor run will abort gracefully.
|
|
877
|
-
It will send ``aborting`` and ``persistStates`` events into the run and force-stop the run after 30 seconds.
|
|
878
|
-
It is helpful in cases where you plan to resurrect the run later.
|
|
879
|
-
|
|
880
|
-
Returns:
|
|
881
|
-
dict: Info about the aborted actor run
|
|
882
|
-
"""
|
|
883
|
-
return await cls._get_default_instance().abort(
|
|
884
|
-
run_id=run_id,
|
|
885
|
-
token=token,
|
|
886
|
-
gracefully=gracefully,
|
|
887
|
-
)
|
|
888
|
-
|
|
889
|
-
async def _abort_internal(
|
|
890
|
-
self: Actor,
|
|
891
|
-
run_id: str,
|
|
892
|
-
*,
|
|
893
|
-
token: str | None = None,
|
|
894
|
-
status_message: str | None = None,
|
|
895
|
-
gracefully: bool | None = None,
|
|
896
|
-
) -> dict:
|
|
897
|
-
self._raise_if_not_initialized()
|
|
898
|
-
|
|
899
|
-
client = self.new_client(token=token) if token else self._apify_client
|
|
900
|
-
|
|
901
|
-
if status_message:
|
|
902
|
-
await client.run(run_id).update(status_message=status_message)
|
|
903
|
-
|
|
904
|
-
return await client.run(run_id).abort(gracefully=gracefully)
|
|
905
|
-
|
|
906
|
-
@classmethod
|
|
907
|
-
async def call(
|
|
908
|
-
cls: type[Actor],
|
|
909
|
-
actor_id: str,
|
|
910
|
-
run_input: Any = None,
|
|
911
|
-
*,
|
|
912
|
-
token: str | None = None,
|
|
913
|
-
content_type: str | None = None,
|
|
914
|
-
build: str | None = None,
|
|
915
|
-
memory_mbytes: int | None = None,
|
|
916
|
-
timeout_secs: int | None = None,
|
|
917
|
-
webhooks: list[dict] | None = None,
|
|
918
|
-
wait_secs: int | None = None,
|
|
919
|
-
) -> dict | None:
|
|
920
|
-
"""Start an actor on the Apify Platform and wait for it to finish before returning.
|
|
921
|
-
|
|
922
|
-
It waits indefinitely, unless the wait_secs argument is provided.
|
|
923
|
-
|
|
924
|
-
Args:
|
|
925
|
-
actor_id (str): The ID of the actor to be run.
|
|
926
|
-
run_input (Any, optional): The input to pass to the actor run.
|
|
927
|
-
token (str, optional): The Apify API token to use for this request (defaults to the `APIFY_TOKEN` environment variable).
|
|
928
|
-
content_type (str, optional): The content type of the input.
|
|
929
|
-
build (str, optional): Specifies the actor build to run. It can be either a build tag or build number.
|
|
930
|
-
By default, the run uses the build specified in the default run configuration for the actor (typically latest).
|
|
931
|
-
memory_mbytes (int, optional): Memory limit for the run, in megabytes.
|
|
932
|
-
By default, the run uses a memory limit specified in the default run configuration for the actor.
|
|
933
|
-
timeout_secs (int, optional): Optional timeout for the run, in seconds.
|
|
934
|
-
By default, the run uses timeout specified in the default run configuration for the actor.
|
|
935
|
-
webhooks (list, optional): Optional webhooks (https://docs.apify.com/webhooks) associated with the actor run,
|
|
936
|
-
which can be used to receive a notification, e.g. when the actor finished or failed.
|
|
937
|
-
If you already have a webhook set up for the actor, you do not have to add it again here.
|
|
938
|
-
wait_secs (int, optional): The maximum number of seconds the server waits for the run to finish. If not provided, waits indefinitely.
|
|
939
|
-
|
|
940
|
-
Returns:
|
|
941
|
-
dict: Info about the started actor run
|
|
942
|
-
"""
|
|
943
|
-
return await cls._get_default_instance().call(
|
|
944
|
-
actor_id=actor_id,
|
|
945
|
-
token=token,
|
|
946
|
-
run_input=run_input,
|
|
947
|
-
content_type=content_type,
|
|
948
|
-
build=build,
|
|
949
|
-
memory_mbytes=memory_mbytes,
|
|
950
|
-
timeout_secs=timeout_secs,
|
|
951
|
-
webhooks=webhooks,
|
|
952
|
-
wait_secs=wait_secs,
|
|
953
|
-
)
|
|
954
|
-
|
|
955
|
-
async def _call_internal(
|
|
956
|
-
self: Actor,
|
|
957
|
-
actor_id: str,
|
|
958
|
-
run_input: Any = None,
|
|
959
|
-
*,
|
|
960
|
-
token: str | None = None,
|
|
961
|
-
content_type: str | None = None,
|
|
962
|
-
build: str | None = None,
|
|
963
|
-
memory_mbytes: int | None = None,
|
|
964
|
-
timeout_secs: int | None = None,
|
|
965
|
-
webhooks: list[dict] | None = None,
|
|
966
|
-
wait_secs: int | None = None,
|
|
967
|
-
) -> dict | None:
|
|
968
|
-
self._raise_if_not_initialized()
|
|
969
|
-
|
|
970
|
-
client = self.new_client(token=token) if token else self._apify_client
|
|
971
|
-
|
|
972
|
-
return await client.actor(actor_id).call(
|
|
973
|
-
run_input=run_input,
|
|
974
|
-
content_type=content_type,
|
|
975
|
-
build=build,
|
|
976
|
-
memory_mbytes=memory_mbytes,
|
|
977
|
-
timeout_secs=timeout_secs,
|
|
978
|
-
webhooks=webhooks,
|
|
979
|
-
wait_secs=wait_secs,
|
|
980
|
-
)
|
|
981
|
-
|
|
982
|
-
@classmethod
|
|
983
|
-
async def call_task(
|
|
984
|
-
cls: type[Actor],
|
|
985
|
-
task_id: str,
|
|
986
|
-
task_input: dict | None = None,
|
|
987
|
-
*,
|
|
988
|
-
build: str | None = None,
|
|
989
|
-
memory_mbytes: int | None = None,
|
|
990
|
-
timeout_secs: int | None = None,
|
|
991
|
-
webhooks: list[dict] | None = None,
|
|
992
|
-
wait_secs: int | None = None,
|
|
993
|
-
token: str | None = None,
|
|
994
|
-
) -> dict | None:
|
|
995
|
-
"""Start an actor task on the Apify Platform and wait for it to finish before returning.
|
|
996
|
-
|
|
997
|
-
It waits indefinitely, unless the wait_secs argument is provided.
|
|
998
|
-
|
|
999
|
-
Note that an actor task is a saved input configuration and options for an actor.
|
|
1000
|
-
If you want to run an actor directly rather than an actor task, please use the `Actor.call`
|
|
1001
|
-
|
|
1002
|
-
Args:
|
|
1003
|
-
task_id (str): The ID of the actor to be run.
|
|
1004
|
-
task_input (Any, optional): Overrides the input to pass to the actor run.
|
|
1005
|
-
token (str, optional): The Apify API token to use for this request (defaults to the `APIFY_TOKEN` environment variable).
|
|
1006
|
-
content_type (str, optional): The content type of the input.
|
|
1007
|
-
build (str, optional): Specifies the actor build to run. It can be either a build tag or build number.
|
|
1008
|
-
By default, the run uses the build specified in the default run configuration for the actor (typically latest).
|
|
1009
|
-
memory_mbytes (int, optional): Memory limit for the run, in megabytes.
|
|
1010
|
-
By default, the run uses a memory limit specified in the default run configuration for the actor.
|
|
1011
|
-
timeout_secs (int, optional): Optional timeout for the run, in seconds.
|
|
1012
|
-
By default, the run uses timeout specified in the default run configuration for the actor.
|
|
1013
|
-
webhooks (list, optional): Optional webhooks (https://docs.apify.com/webhooks) associated with the actor run,
|
|
1014
|
-
which can be used to receive a notification, e.g. when the actor finished or failed.
|
|
1015
|
-
If you already have a webhook set up for the actor, you do not have to add it again here.
|
|
1016
|
-
wait_secs (int, optional): The maximum number of seconds the server waits for the run to finish. If not provided, waits indefinitely.
|
|
1017
|
-
|
|
1018
|
-
Returns:
|
|
1019
|
-
dict: Info about the started actor run
|
|
1020
|
-
"""
|
|
1021
|
-
return await cls._get_default_instance().call_task(
|
|
1022
|
-
task_id=task_id,
|
|
1023
|
-
task_input=task_input,
|
|
1024
|
-
token=token,
|
|
1025
|
-
build=build,
|
|
1026
|
-
memory_mbytes=memory_mbytes,
|
|
1027
|
-
timeout_secs=timeout_secs,
|
|
1028
|
-
webhooks=webhooks,
|
|
1029
|
-
wait_secs=wait_secs,
|
|
1030
|
-
)
|
|
1031
|
-
|
|
1032
|
-
async def _call_task_internal(
|
|
1033
|
-
self: Actor,
|
|
1034
|
-
task_id: str,
|
|
1035
|
-
task_input: dict | None = None,
|
|
1036
|
-
*,
|
|
1037
|
-
build: str | None = None,
|
|
1038
|
-
memory_mbytes: int | None = None,
|
|
1039
|
-
timeout_secs: int | None = None,
|
|
1040
|
-
webhooks: list[dict] | None = None,
|
|
1041
|
-
wait_secs: int | None = None,
|
|
1042
|
-
token: str | None = None,
|
|
1043
|
-
) -> dict | None:
|
|
1044
|
-
self._raise_if_not_initialized()
|
|
1045
|
-
|
|
1046
|
-
client = self.new_client(token=token) if token else self._apify_client
|
|
1047
|
-
|
|
1048
|
-
return await client.task(task_id).call(
|
|
1049
|
-
task_input=task_input,
|
|
1050
|
-
build=build,
|
|
1051
|
-
memory_mbytes=memory_mbytes,
|
|
1052
|
-
timeout_secs=timeout_secs,
|
|
1053
|
-
webhooks=webhooks,
|
|
1054
|
-
wait_secs=wait_secs,
|
|
1055
|
-
)
|
|
1056
|
-
|
|
1057
|
-
@classmethod
|
|
1058
|
-
async def metamorph(
|
|
1059
|
-
cls: type[Actor],
|
|
1060
|
-
target_actor_id: str,
|
|
1061
|
-
run_input: Any = None,
|
|
1062
|
-
*,
|
|
1063
|
-
target_actor_build: str | None = None,
|
|
1064
|
-
content_type: str | None = None,
|
|
1065
|
-
custom_after_sleep_millis: int | None = None,
|
|
1066
|
-
) -> None:
|
|
1067
|
-
"""Transform this actor run to an actor run of a different actor.
|
|
1068
|
-
|
|
1069
|
-
The platform stops the current actor container and starts a new container with the new actor instead.
|
|
1070
|
-
All the default storages are preserved,
|
|
1071
|
-
and the new input is stored under the `INPUT-METAMORPH-1` key in the same default key-value store.
|
|
1072
|
-
|
|
1073
|
-
Args:
|
|
1074
|
-
target_actor_id (str): ID of the target actor that the run should be transformed into
|
|
1075
|
-
run_input (Any, optional): The input to pass to the new run.
|
|
1076
|
-
target_actor_build (str, optional): The build of the target actor. It can be either a build tag or build number.
|
|
1077
|
-
By default, the run uses the build specified in the default run configuration for the target actor (typically the latest build).
|
|
1078
|
-
content_type (str, optional): The content type of the input.
|
|
1079
|
-
custom_after_sleep_millis (int, optional): How long to sleep for after the metamorph, to wait for the container to be stopped.
|
|
1080
|
-
|
|
1081
|
-
Returns:
|
|
1082
|
-
dict: The actor run data.
|
|
1083
|
-
"""
|
|
1084
|
-
return await cls._get_default_instance().metamorph(
|
|
1085
|
-
target_actor_id=target_actor_id,
|
|
1086
|
-
target_actor_build=target_actor_build,
|
|
1087
|
-
run_input=run_input,
|
|
1088
|
-
content_type=content_type,
|
|
1089
|
-
custom_after_sleep_millis=custom_after_sleep_millis,
|
|
1090
|
-
)
|
|
1091
|
-
|
|
1092
|
-
async def _metamorph_internal(
|
|
1093
|
-
self: Actor,
|
|
1094
|
-
target_actor_id: str,
|
|
1095
|
-
run_input: Any = None,
|
|
1096
|
-
*,
|
|
1097
|
-
target_actor_build: str | None = None,
|
|
1098
|
-
content_type: str | None = None,
|
|
1099
|
-
custom_after_sleep_millis: int | None = None,
|
|
1100
|
-
) -> None:
|
|
1101
|
-
self._raise_if_not_initialized()
|
|
1102
|
-
|
|
1103
|
-
if not self.is_at_home():
|
|
1104
|
-
self.log.error('Actor.metamorph() is only supported when running on the Apify platform.')
|
|
1105
|
-
return
|
|
1106
|
-
|
|
1107
|
-
if not custom_after_sleep_millis:
|
|
1108
|
-
custom_after_sleep_millis = self._config.metamorph_after_sleep_millis
|
|
1109
|
-
|
|
1110
|
-
# If is_at_home() is True, config.actor_run_id is always set
|
|
1111
|
-
assert self._config.actor_run_id is not None # noqa: S101
|
|
1112
|
-
|
|
1113
|
-
await self._apify_client.run(self._config.actor_run_id).metamorph(
|
|
1114
|
-
target_actor_id=target_actor_id,
|
|
1115
|
-
run_input=run_input,
|
|
1116
|
-
target_actor_build=target_actor_build,
|
|
1117
|
-
content_type=content_type,
|
|
1118
|
-
)
|
|
1119
|
-
|
|
1120
|
-
if custom_after_sleep_millis:
|
|
1121
|
-
await asyncio.sleep(custom_after_sleep_millis / 1000)
|
|
1122
|
-
|
|
1123
|
-
@classmethod
|
|
1124
|
-
async def reboot(
|
|
1125
|
-
cls: type[Actor],
|
|
1126
|
-
*,
|
|
1127
|
-
event_listeners_timeout_secs: int | None = EVENT_LISTENERS_TIMEOUT_SECS,
|
|
1128
|
-
custom_after_sleep_millis: int | None = None,
|
|
1129
|
-
) -> None:
|
|
1130
|
-
"""Internally reboot this actor.
|
|
1131
|
-
|
|
1132
|
-
The system stops the current container and starts a new one, with the same run ID and default storages.
|
|
1133
|
-
|
|
1134
|
-
Args:
|
|
1135
|
-
event_listeners_timeout_secs (int, optional): How long should the actor wait for actor event listeners to finish before exiting
|
|
1136
|
-
custom_after_sleep_millis (int, optional): How long to sleep for after the reboot, to wait for the container to be stopped.
|
|
1137
|
-
"""
|
|
1138
|
-
return await cls._get_default_instance().reboot(
|
|
1139
|
-
event_listeners_timeout_secs=event_listeners_timeout_secs,
|
|
1140
|
-
custom_after_sleep_millis=custom_after_sleep_millis,
|
|
1141
|
-
)
|
|
1142
|
-
|
|
1143
|
-
async def _reboot_internal(
|
|
1144
|
-
self: Actor,
|
|
1145
|
-
*,
|
|
1146
|
-
event_listeners_timeout_secs: int | None = EVENT_LISTENERS_TIMEOUT_SECS,
|
|
1147
|
-
custom_after_sleep_millis: int | None = None,
|
|
1148
|
-
) -> None:
|
|
1149
|
-
self._raise_if_not_initialized()
|
|
1150
|
-
|
|
1151
|
-
if not self.is_at_home():
|
|
1152
|
-
self.log.error('Actor.reboot() is only supported when running on the Apify platform.')
|
|
1153
|
-
return
|
|
1154
|
-
|
|
1155
|
-
if not custom_after_sleep_millis:
|
|
1156
|
-
custom_after_sleep_millis = self._config.metamorph_after_sleep_millis
|
|
1157
|
-
|
|
1158
|
-
await self._cancel_event_emitting_intervals()
|
|
1159
|
-
|
|
1160
|
-
self._event_manager.emit(ActorEventTypes.PERSIST_STATE, {'isMigrating': True})
|
|
1161
|
-
self._was_final_persist_state_emitted = True
|
|
1162
|
-
|
|
1163
|
-
await self._event_manager.close(event_listeners_timeout_secs=event_listeners_timeout_secs)
|
|
1164
|
-
|
|
1165
|
-
assert self._config.actor_run_id is not None # noqa: S101
|
|
1166
|
-
await self._apify_client.run(self._config.actor_run_id).reboot()
|
|
1167
|
-
|
|
1168
|
-
if custom_after_sleep_millis:
|
|
1169
|
-
await asyncio.sleep(custom_after_sleep_millis / 1000)
|
|
1170
|
-
|
|
1171
|
-
@classmethod
|
|
1172
|
-
async def add_webhook(
|
|
1173
|
-
cls: type[Actor],
|
|
1174
|
-
*,
|
|
1175
|
-
event_types: list[WebhookEventType],
|
|
1176
|
-
request_url: str,
|
|
1177
|
-
payload_template: str | None = None,
|
|
1178
|
-
ignore_ssl_errors: bool | None = None,
|
|
1179
|
-
do_not_retry: bool | None = None,
|
|
1180
|
-
idempotency_key: str | None = None,
|
|
1181
|
-
) -> dict:
|
|
1182
|
-
"""Create an ad-hoc webhook for the current actor run.
|
|
1183
|
-
|
|
1184
|
-
This webhook lets you receive a notification when the actor run finished or failed.
|
|
1185
|
-
|
|
1186
|
-
Note that webhooks are only supported for actors running on the Apify platform.
|
|
1187
|
-
When running the actor locally, the function will print a warning and have no effect.
|
|
1188
|
-
|
|
1189
|
-
For more information about Apify actor webhooks, please see the [documentation](https://docs.apify.com/webhooks).
|
|
1190
|
-
|
|
1191
|
-
Args:
|
|
1192
|
-
event_types (list of WebhookEventType): List of event types that should trigger the webhook. At least one is required.
|
|
1193
|
-
request_url (str): URL that will be invoked once the webhook is triggered.
|
|
1194
|
-
payload_template (str, optional): Specification of the payload that will be sent to request_url
|
|
1195
|
-
ignore_ssl_errors (bool, optional): Whether the webhook should ignore SSL errors returned by request_url
|
|
1196
|
-
do_not_retry (bool, optional): Whether the webhook should retry sending the payload to request_url upon
|
|
1197
|
-
failure.
|
|
1198
|
-
idempotency_key (str, optional): A unique identifier of a webhook. You can use it to ensure that you won't
|
|
1199
|
-
create the same webhook multiple times.
|
|
1200
|
-
|
|
1201
|
-
Returns:
|
|
1202
|
-
dict: The created webhook
|
|
1203
|
-
"""
|
|
1204
|
-
return await cls._get_default_instance().add_webhook(
|
|
1205
|
-
event_types=event_types,
|
|
1206
|
-
request_url=request_url,
|
|
1207
|
-
payload_template=payload_template,
|
|
1208
|
-
ignore_ssl_errors=ignore_ssl_errors,
|
|
1209
|
-
do_not_retry=do_not_retry,
|
|
1210
|
-
idempotency_key=idempotency_key,
|
|
1211
|
-
)
|
|
1212
|
-
|
|
1213
|
-
async def _add_webhook_internal(
|
|
1214
|
-
self: Actor,
|
|
1215
|
-
*,
|
|
1216
|
-
event_types: list[WebhookEventType],
|
|
1217
|
-
request_url: str,
|
|
1218
|
-
payload_template: str | None = None,
|
|
1219
|
-
ignore_ssl_errors: bool | None = None,
|
|
1220
|
-
do_not_retry: bool | None = None,
|
|
1221
|
-
idempotency_key: str | None = None,
|
|
1222
|
-
) -> dict | None:
|
|
1223
|
-
self._raise_if_not_initialized()
|
|
1224
|
-
|
|
1225
|
-
if not self.is_at_home():
|
|
1226
|
-
self.log.error('Actor.add_webhook() is only supported when running on the Apify platform.')
|
|
1227
|
-
return None
|
|
1228
|
-
|
|
1229
|
-
# If is_at_home() is True, config.actor_run_id is always set
|
|
1230
|
-
assert self._config.actor_run_id is not None # noqa: S101
|
|
1231
|
-
|
|
1232
|
-
return await self._apify_client.webhooks().create(
|
|
1233
|
-
actor_run_id=self._config.actor_run_id,
|
|
1234
|
-
event_types=event_types,
|
|
1235
|
-
request_url=request_url,
|
|
1236
|
-
payload_template=payload_template,
|
|
1237
|
-
ignore_ssl_errors=ignore_ssl_errors,
|
|
1238
|
-
do_not_retry=do_not_retry,
|
|
1239
|
-
idempotency_key=idempotency_key,
|
|
1240
|
-
)
|
|
1241
|
-
|
|
1242
|
-
@classmethod
|
|
1243
|
-
async def set_status_message(
|
|
1244
|
-
cls: type[Actor],
|
|
1245
|
-
status_message: str,
|
|
1246
|
-
*,
|
|
1247
|
-
is_terminal: bool | None = None,
|
|
1248
|
-
) -> dict | None:
|
|
1249
|
-
"""Set the status message for the current actor run.
|
|
1250
|
-
|
|
1251
|
-
Args:
|
|
1252
|
-
status_message (str): The status message to set to the run.
|
|
1253
|
-
is_terminal (bool, optional): Set this flag to True if this is the final status message of the Actor run.
|
|
1254
|
-
|
|
1255
|
-
Returns:
|
|
1256
|
-
dict: The updated actor run object
|
|
1257
|
-
"""
|
|
1258
|
-
return await cls._get_default_instance().set_status_message(status_message=status_message, is_terminal=is_terminal)
|
|
1259
|
-
|
|
1260
|
-
async def _set_status_message_internal(
|
|
1261
|
-
self: Actor,
|
|
1262
|
-
status_message: str,
|
|
1263
|
-
*,
|
|
1264
|
-
is_terminal: bool | None = None,
|
|
1265
|
-
) -> dict | None:
|
|
1266
|
-
self._raise_if_not_initialized()
|
|
1267
|
-
|
|
1268
|
-
if not self.is_at_home():
|
|
1269
|
-
title = 'Terminal status message' if is_terminal else 'Status message'
|
|
1270
|
-
self.log.info(f'[{title}]: {status_message}')
|
|
1271
|
-
return None
|
|
1272
|
-
|
|
1273
|
-
# If is_at_home() is True, config.actor_run_id is always set
|
|
1274
|
-
assert self._config.actor_run_id is not None # noqa: S101
|
|
1275
|
-
|
|
1276
|
-
return await self._apify_client.run(self._config.actor_run_id).update(status_message=status_message, is_status_message_terminal=is_terminal)
|
|
1277
|
-
|
|
1278
|
-
@classmethod
|
|
1279
|
-
async def create_proxy_configuration(
|
|
1280
|
-
cls: type[Actor],
|
|
1281
|
-
*,
|
|
1282
|
-
actor_proxy_input: dict | None = None, # this is the raw proxy input from the actor run input, it is not spread or snake_cased in here
|
|
1283
|
-
password: str | None = None,
|
|
1284
|
-
groups: list[str] | None = None,
|
|
1285
|
-
country_code: str | None = None,
|
|
1286
|
-
proxy_urls: list[str] | None = None,
|
|
1287
|
-
new_url_function: Callable[[str | None], str] | Callable[[str | None], Awaitable[str]] | None = None,
|
|
1288
|
-
) -> ProxyConfiguration | None:
|
|
1289
|
-
"""Create a ProxyConfiguration object with the passed proxy configuration.
|
|
1290
|
-
|
|
1291
|
-
Configures connection to a proxy server with the provided options.
|
|
1292
|
-
Proxy servers are used to prevent target websites from blocking your crawlers based on IP address rate limits or blacklists.
|
|
1293
|
-
|
|
1294
|
-
For more details and code examples, see the `ProxyConfiguration` class.
|
|
1295
|
-
|
|
1296
|
-
Args:
|
|
1297
|
-
actor_proxy_input (dict, optional): Proxy configuration field from the actor input, if actor has such input field.
|
|
1298
|
-
If you pass this argument, all the other arguments will be inferred from it.
|
|
1299
|
-
password (str, optional): Password for the Apify Proxy. If not provided, will use os.environ['APIFY_PROXY_PASSWORD'], if available.
|
|
1300
|
-
groups (list of str, optional): Proxy groups which the Apify Proxy should use, if provided.
|
|
1301
|
-
country_code (str, optional): Country which the Apify Proxy should use, if provided.
|
|
1302
|
-
proxy_urls (list of str, optional): Custom proxy server URLs which should be rotated through.
|
|
1303
|
-
new_url_function (Callable, optional): Function which returns a custom proxy URL to be used.
|
|
1304
|
-
|
|
1305
|
-
Returns:
|
|
1306
|
-
ProxyConfiguration, optional: ProxyConfiguration object with the passed configuration,
|
|
1307
|
-
or None, if no proxy should be used based on the configuration.
|
|
1308
|
-
"""
|
|
1309
|
-
return await cls._get_default_instance().create_proxy_configuration(
|
|
1310
|
-
password=password,
|
|
1311
|
-
groups=groups,
|
|
1312
|
-
country_code=country_code,
|
|
1313
|
-
proxy_urls=proxy_urls,
|
|
1314
|
-
new_url_function=new_url_function,
|
|
1315
|
-
actor_proxy_input=actor_proxy_input,
|
|
1316
|
-
)
|
|
1317
|
-
|
|
1318
|
-
async def _create_proxy_configuration_internal(
|
|
1319
|
-
self: Actor,
|
|
1320
|
-
*,
|
|
1321
|
-
actor_proxy_input: dict | None = None, # this is the raw proxy input from the actor run input, it is not spread or snake_cased in here
|
|
1322
|
-
password: str | None = None,
|
|
1323
|
-
groups: list[str] | None = None,
|
|
1324
|
-
country_code: str | None = None,
|
|
1325
|
-
proxy_urls: list[str] | None = None,
|
|
1326
|
-
new_url_function: Callable[[str | None], str] | Callable[[str | None], Awaitable[str]] | None = None,
|
|
1327
|
-
) -> ProxyConfiguration | None:
|
|
1328
|
-
self._raise_if_not_initialized()
|
|
1329
|
-
|
|
1330
|
-
if actor_proxy_input is not None:
|
|
1331
|
-
if actor_proxy_input.get('useApifyProxy', False):
|
|
1332
|
-
country_code = country_code or actor_proxy_input.get('apifyProxyCountry')
|
|
1333
|
-
groups = groups or actor_proxy_input.get('apifyProxyGroups')
|
|
1334
|
-
else:
|
|
1335
|
-
proxy_urls = actor_proxy_input.get('proxyUrls', [])
|
|
1336
|
-
if not proxy_urls:
|
|
1337
|
-
return None
|
|
1338
|
-
|
|
1339
|
-
proxy_configuration = ProxyConfiguration(
|
|
1340
|
-
password=password,
|
|
1341
|
-
groups=groups,
|
|
1342
|
-
country_code=country_code,
|
|
1343
|
-
proxy_urls=proxy_urls,
|
|
1344
|
-
new_url_function=new_url_function,
|
|
1345
|
-
_actor_config=self._config,
|
|
1346
|
-
_apify_client=self._apify_client,
|
|
1347
|
-
)
|
|
1348
|
-
|
|
1349
|
-
await proxy_configuration.initialize()
|
|
1350
|
-
|
|
1351
|
-
return proxy_configuration
|