apify 2.7.3__py3-none-any.whl → 3.0.0__py3-none-any.whl
This diff represents the content of publicly available package versions that have been released to one of the supported registries. The information contained in this diff is provided for informational purposes only and reflects changes between package versions as they appear in their respective public registries.
Potentially problematic release.
This version of apify might be problematic. Click here for more details.
- apify/_actor.py +194 -126
- apify/_charging.py +34 -9
- apify/_configuration.py +70 -6
- apify/_crypto.py +0 -6
- apify/_models.py +7 -7
- apify/_proxy_configuration.py +10 -10
- apify/_utils.py +25 -2
- apify/events/__init__.py +5 -0
- apify/events/_apify_event_manager.py +140 -0
- apify/events/_types.py +102 -0
- apify/log.py +0 -9
- apify/request_loaders/__init__.py +18 -0
- apify/{storages/_request_list.py → request_loaders/_apify_request_list.py} +25 -18
- apify/request_loaders/py.typed +0 -0
- apify/scrapy/_logging_config.py +1 -4
- apify/scrapy/extensions/_httpcache.py +9 -5
- apify/scrapy/requests.py +3 -3
- apify/scrapy/scheduler.py +8 -5
- apify/storage_clients/__init__.py +12 -0
- apify/storage_clients/_apify/__init__.py +11 -0
- apify/storage_clients/_apify/_dataset_client.py +328 -0
- apify/storage_clients/_apify/_key_value_store_client.py +265 -0
- apify/storage_clients/_apify/_models.py +131 -0
- apify/storage_clients/_apify/_request_queue_client.py +327 -0
- apify/storage_clients/_apify/_request_queue_shared_client.py +527 -0
- apify/storage_clients/_apify/_request_queue_single_client.py +399 -0
- apify/storage_clients/_apify/_storage_client.py +106 -0
- apify/storage_clients/_apify/_utils.py +194 -0
- apify/storage_clients/_apify/py.typed +0 -0
- apify/storage_clients/_file_system/__init__.py +2 -0
- apify/storage_clients/_file_system/_key_value_store_client.py +57 -0
- apify/storage_clients/_file_system/_storage_client.py +41 -0
- apify/storage_clients/_smart_apify/__init__.py +1 -0
- apify/storage_clients/_smart_apify/_storage_client.py +117 -0
- apify/storage_clients/py.typed +0 -0
- apify/storages/__init__.py +1 -3
- {apify-2.7.3.dist-info → apify-3.0.0.dist-info}/METADATA +25 -9
- apify-3.0.0.dist-info/RECORD +57 -0
- apify/_platform_event_manager.py +0 -231
- apify/apify_storage_client/__init__.py +0 -3
- apify/apify_storage_client/_apify_storage_client.py +0 -72
- apify/apify_storage_client/_dataset_client.py +0 -190
- apify/apify_storage_client/_dataset_collection_client.py +0 -51
- apify/apify_storage_client/_key_value_store_client.py +0 -109
- apify/apify_storage_client/_key_value_store_collection_client.py +0 -51
- apify/apify_storage_client/_request_queue_client.py +0 -176
- apify/apify_storage_client/_request_queue_collection_client.py +0 -51
- apify-2.7.3.dist-info/RECORD +0 -44
- /apify/{apify_storage_client → events}/py.typed +0 -0
- {apify-2.7.3.dist-info → apify-3.0.0.dist-info}/WHEEL +0 -0
- {apify-2.7.3.dist-info → apify-3.0.0.dist-info}/licenses/LICENSE +0 -0
apify/_actor.py
CHANGED
|
@@ -5,7 +5,8 @@ import os
|
|
|
5
5
|
import sys
|
|
6
6
|
from contextlib import suppress
|
|
7
7
|
from datetime import datetime, timedelta, timezone
|
|
8
|
-
from
|
|
8
|
+
from functools import cached_property
|
|
9
|
+
from typing import TYPE_CHECKING, Any, Literal, TypeVar, cast, overload
|
|
9
10
|
|
|
10
11
|
from lazy_object_proxy import Proxy
|
|
11
12
|
from more_itertools import flatten
|
|
@@ -13,8 +14,8 @@ from pydantic import AliasChoices
|
|
|
13
14
|
|
|
14
15
|
from apify_client import ApifyClientAsync
|
|
15
16
|
from apify_shared.consts import ActorEnvVars, ActorExitCodes, ApifyEnvVars
|
|
16
|
-
from apify_shared.utils import ignore_docs, maybe_extract_enum_member_value
|
|
17
17
|
from crawlee import service_locator
|
|
18
|
+
from crawlee.errors import ServiceConflictError
|
|
18
19
|
from crawlee.events import (
|
|
19
20
|
Event,
|
|
20
21
|
EventAbortingData,
|
|
@@ -30,21 +31,23 @@ from apify._configuration import Configuration
|
|
|
30
31
|
from apify._consts import EVENT_LISTENERS_TIMEOUT
|
|
31
32
|
from apify._crypto import decrypt_input_secrets, load_private_key
|
|
32
33
|
from apify._models import ActorRun
|
|
33
|
-
from apify._platform_event_manager import EventManager, LocalEventManager, PlatformEventManager
|
|
34
34
|
from apify._proxy_configuration import ProxyConfiguration
|
|
35
|
-
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython
|
|
36
|
-
from apify.
|
|
35
|
+
from apify._utils import docs_group, docs_name, get_system_info, is_running_in_ipython, maybe_extract_enum_member_value
|
|
36
|
+
from apify.events import ApifyEventManager, EventManager, LocalEventManager
|
|
37
37
|
from apify.log import _configure_logging, logger
|
|
38
|
+
from apify.storage_clients import ApifyStorageClient
|
|
39
|
+
from apify.storage_clients._file_system import ApifyFileSystemStorageClient
|
|
40
|
+
from apify.storage_clients._smart_apify._storage_client import SmartApifyStorageClient
|
|
38
41
|
from apify.storages import Dataset, KeyValueStore, RequestQueue
|
|
39
42
|
|
|
40
43
|
if TYPE_CHECKING:
|
|
41
44
|
import logging
|
|
45
|
+
from collections.abc import Callable
|
|
42
46
|
from types import TracebackType
|
|
43
47
|
|
|
44
48
|
from typing_extensions import Self
|
|
45
49
|
|
|
46
50
|
from crawlee.proxy_configuration import _NewUrlFunction
|
|
47
|
-
from crawlee.storage_clients import StorageClient
|
|
48
51
|
|
|
49
52
|
from apify._models import Webhook
|
|
50
53
|
|
|
@@ -53,9 +56,46 @@ MainReturnType = TypeVar('MainReturnType')
|
|
|
53
56
|
|
|
54
57
|
|
|
55
58
|
@docs_name('Actor')
|
|
56
|
-
@docs_group('
|
|
59
|
+
@docs_group('Actor')
|
|
57
60
|
class _ActorType:
|
|
58
|
-
"""The class
|
|
61
|
+
"""The core class for building Actors on the Apify platform.
|
|
62
|
+
|
|
63
|
+
Actors are serverless programs running in the cloud that can perform anything from simple actions
|
|
64
|
+
(such as filling out a web form or sending an email) to complex operations (such as crawling an
|
|
65
|
+
entire website or removing duplicates from a large dataset). They are packaged as Docker containers
|
|
66
|
+
which accept well-defined JSON input, perform an action, and optionally produce well-defined output.
|
|
67
|
+
|
|
68
|
+
### References
|
|
69
|
+
|
|
70
|
+
- Apify platform documentation: https://docs.apify.com/platform/actors
|
|
71
|
+
- Actor whitepaper: https://whitepaper.actor/
|
|
72
|
+
|
|
73
|
+
### Usage
|
|
74
|
+
|
|
75
|
+
```python
|
|
76
|
+
import asyncio
|
|
77
|
+
|
|
78
|
+
import httpx
|
|
79
|
+
from apify import Actor
|
|
80
|
+
from bs4 import BeautifulSoup
|
|
81
|
+
|
|
82
|
+
|
|
83
|
+
async def main() -> None:
|
|
84
|
+
async with Actor:
|
|
85
|
+
actor_input = await Actor.get_input()
|
|
86
|
+
async with httpx.AsyncClient() as client:
|
|
87
|
+
response = await client.get(actor_input['url'])
|
|
88
|
+
soup = BeautifulSoup(response.content, 'html.parser')
|
|
89
|
+
data = {
|
|
90
|
+
'url': actor_input['url'],
|
|
91
|
+
'title': soup.title.string if soup.title else None,
|
|
92
|
+
}
|
|
93
|
+
await Actor.push_data(data)
|
|
94
|
+
|
|
95
|
+
if __name__ == '__main__':
|
|
96
|
+
asyncio.run(main())
|
|
97
|
+
```
|
|
98
|
+
"""
|
|
59
99
|
|
|
60
100
|
_is_rebooting = False
|
|
61
101
|
_is_any_instance_initialized = False
|
|
@@ -82,32 +122,17 @@ class _ActorType:
|
|
|
82
122
|
self._exit_process = self._get_default_exit_process() if exit_process is None else exit_process
|
|
83
123
|
self._is_exiting = False
|
|
84
124
|
|
|
85
|
-
|
|
86
|
-
|
|
87
|
-
|
|
88
|
-
|
|
89
|
-
# Create an instance of the cloud storage client, the local storage client is obtained
|
|
90
|
-
# from the service locator.
|
|
91
|
-
self._cloud_storage_client = ApifyStorageClient.from_config(config=self._configuration)
|
|
92
|
-
|
|
93
|
-
# Set the event manager based on whether the Actor is running on the platform or locally.
|
|
94
|
-
self._event_manager = (
|
|
95
|
-
PlatformEventManager(
|
|
96
|
-
config=self._configuration,
|
|
97
|
-
persist_state_interval=self._configuration.persist_state_interval,
|
|
98
|
-
)
|
|
99
|
-
if self.is_at_home()
|
|
100
|
-
else LocalEventManager(
|
|
101
|
-
system_info_interval=self._configuration.system_info_interval,
|
|
102
|
-
persist_state_interval=self._configuration.persist_state_interval,
|
|
103
|
-
)
|
|
104
|
-
)
|
|
125
|
+
# Actor state when this method is being executed is unpredictable.
|
|
126
|
+
# Actor can be initialized by lazy object proxy or by user directly, or by both.
|
|
127
|
+
# Until `init` method is run, this state of uncertainty remains. This is the reason why any setting done here in
|
|
128
|
+
# `__init__` method should not be considered final.
|
|
105
129
|
|
|
106
|
-
self.
|
|
130
|
+
self._configuration = configuration
|
|
131
|
+
self._configure_logging = configure_logging
|
|
132
|
+
self._apify_client: ApifyClientAsync | None = None
|
|
107
133
|
|
|
108
134
|
self._is_initialized = False
|
|
109
135
|
|
|
110
|
-
@ignore_docs
|
|
111
136
|
async def __aenter__(self) -> Self:
|
|
112
137
|
"""Initialize the Actor.
|
|
113
138
|
|
|
@@ -119,7 +144,6 @@ class _ActorType:
|
|
|
119
144
|
await self.init()
|
|
120
145
|
return self
|
|
121
146
|
|
|
122
|
-
@ignore_docs
|
|
123
147
|
async def __aexit__(
|
|
124
148
|
self,
|
|
125
149
|
_exc_type: type[BaseException] | None,
|
|
@@ -163,46 +187,87 @@ class _ActorType:
|
|
|
163
187
|
@property
|
|
164
188
|
def apify_client(self) -> ApifyClientAsync:
|
|
165
189
|
"""The ApifyClientAsync instance the Actor instance uses."""
|
|
190
|
+
if not self._apify_client:
|
|
191
|
+
self._apify_client = self.new_client()
|
|
166
192
|
return self._apify_client
|
|
167
193
|
|
|
168
|
-
@
|
|
194
|
+
@cached_property
|
|
169
195
|
def configuration(self) -> Configuration:
|
|
170
196
|
"""The Configuration instance the Actor instance uses."""
|
|
171
|
-
|
|
197
|
+
if self._configuration:
|
|
198
|
+
return self._configuration
|
|
199
|
+
|
|
200
|
+
try:
|
|
201
|
+
# Set implicit default Apify configuration, unless configuration was already set.
|
|
202
|
+
implicit_configuration = Configuration()
|
|
203
|
+
service_locator.set_configuration(implicit_configuration)
|
|
204
|
+
self._configuration = implicit_configuration
|
|
205
|
+
except ServiceConflictError:
|
|
206
|
+
self.log.debug(
|
|
207
|
+
'Configuration in service locator was set explicitly before Actor.init was called.'
|
|
208
|
+
'Using the existing configuration as implicit configuration for the Actor.'
|
|
209
|
+
)
|
|
172
210
|
|
|
173
|
-
|
|
174
|
-
|
|
175
|
-
"""The Configuration instance the Actor instance uses."""
|
|
211
|
+
# Use the configuration from the service locator
|
|
212
|
+
self._configuration = Configuration.get_global_configuration()
|
|
176
213
|
return self._configuration
|
|
177
214
|
|
|
178
|
-
@
|
|
215
|
+
@cached_property
|
|
179
216
|
def event_manager(self) -> EventManager:
|
|
180
217
|
"""The EventManager instance the Actor instance uses."""
|
|
181
|
-
return
|
|
218
|
+
return (
|
|
219
|
+
ApifyEventManager(
|
|
220
|
+
configuration=self.configuration,
|
|
221
|
+
persist_state_interval=self.configuration.persist_state_interval,
|
|
222
|
+
)
|
|
223
|
+
if self.is_at_home()
|
|
224
|
+
else LocalEventManager(
|
|
225
|
+
system_info_interval=self.configuration.system_info_interval,
|
|
226
|
+
persist_state_interval=self.configuration.persist_state_interval,
|
|
227
|
+
)
|
|
228
|
+
)
|
|
182
229
|
|
|
183
230
|
@property
|
|
184
231
|
def log(self) -> logging.Logger:
|
|
185
232
|
"""The logging.Logger instance the Actor uses."""
|
|
186
233
|
return logger
|
|
187
234
|
|
|
188
|
-
@property
|
|
189
|
-
def _local_storage_client(self) -> StorageClient:
|
|
190
|
-
"""The local storage client the Actor instance uses."""
|
|
191
|
-
return service_locator.get_storage_client()
|
|
192
|
-
|
|
193
235
|
def _raise_if_not_initialized(self) -> None:
|
|
194
236
|
if not self._is_initialized:
|
|
195
237
|
raise RuntimeError('The Actor was not initialized!')
|
|
196
238
|
|
|
197
|
-
|
|
198
|
-
|
|
199
|
-
|
|
239
|
+
@cached_property
|
|
240
|
+
def _storage_client(self) -> SmartApifyStorageClient:
|
|
241
|
+
"""Storage client used by the actor.
|
|
200
242
|
|
|
201
|
-
|
|
202
|
-
|
|
203
|
-
|
|
204
|
-
|
|
243
|
+
Depending on the initialization of the service locator the client can be created in different ways.
|
|
244
|
+
"""
|
|
245
|
+
try:
|
|
246
|
+
# Nothing was set by the user.
|
|
247
|
+
implicit_storage_client = SmartApifyStorageClient(
|
|
248
|
+
local_storage_client=ApifyFileSystemStorageClient(), cloud_storage_client=ApifyStorageClient()
|
|
205
249
|
)
|
|
250
|
+
service_locator.set_storage_client(implicit_storage_client)
|
|
251
|
+
except ServiceConflictError:
|
|
252
|
+
self.log.debug(
|
|
253
|
+
'Storage client in service locator was set explicitly before Actor.init was called.'
|
|
254
|
+
'Using the existing storage client as implicit storage client for the Actor.'
|
|
255
|
+
)
|
|
256
|
+
else:
|
|
257
|
+
return implicit_storage_client
|
|
258
|
+
|
|
259
|
+
# User set something in the service locator.
|
|
260
|
+
explicit_storage_client = service_locator.get_storage_client()
|
|
261
|
+
if isinstance(explicit_storage_client, SmartApifyStorageClient):
|
|
262
|
+
# The client was manually set to the right type in the service locator. This is the explicit way.
|
|
263
|
+
return explicit_storage_client
|
|
264
|
+
|
|
265
|
+
raise RuntimeError(
|
|
266
|
+
'The storage client in the service locator has to be instance of SmartApifyStorageClient. If you want to '
|
|
267
|
+
'set the storage client manually you have to call '
|
|
268
|
+
'`service_locator.set_storage_client(SmartApifyStorageClient(...))` before entering Actor context or '
|
|
269
|
+
'awaiting `Actor.init`.'
|
|
270
|
+
)
|
|
206
271
|
|
|
207
272
|
async def init(self) -> None:
|
|
208
273
|
"""Initialize the Actor instance.
|
|
@@ -215,6 +280,14 @@ class _ActorType:
|
|
|
215
280
|
This method should be called immediately before performing any additional Actor actions, and it should be
|
|
216
281
|
called only once.
|
|
217
282
|
"""
|
|
283
|
+
self.log.info('Initializing Actor...')
|
|
284
|
+
if self._configuration:
|
|
285
|
+
# Set explicitly the configuration in the service locator
|
|
286
|
+
service_locator.set_configuration(self.configuration)
|
|
287
|
+
else:
|
|
288
|
+
# Ensure that the configuration (cached property) is set
|
|
289
|
+
_ = self.configuration
|
|
290
|
+
|
|
218
291
|
if self._is_initialized:
|
|
219
292
|
raise RuntimeError('The Actor was already initialized!')
|
|
220
293
|
|
|
@@ -227,27 +300,20 @@ class _ActorType:
|
|
|
227
300
|
self._is_exiting = False
|
|
228
301
|
self._was_final_persist_state_emitted = False
|
|
229
302
|
|
|
230
|
-
|
|
231
|
-
if self.is_at_home():
|
|
232
|
-
service_locator.set_storage_client(self._cloud_storage_client)
|
|
303
|
+
self.log.debug(f'Storage client set to {self._storage_client}')
|
|
233
304
|
|
|
234
305
|
service_locator.set_event_manager(self.event_manager)
|
|
235
|
-
service_locator.set_configuration(self.configuration)
|
|
236
306
|
|
|
237
307
|
# The logging configuration has to be called after all service_locator set methods.
|
|
238
308
|
if self._configure_logging:
|
|
239
309
|
_configure_logging()
|
|
240
310
|
|
|
241
|
-
self.log.info('Initializing Actor...')
|
|
242
311
|
self.log.info('System info', extra=get_system_info())
|
|
243
312
|
|
|
244
|
-
|
|
245
|
-
# https://github.com/apify/apify-sdk-python/issues/146
|
|
246
|
-
|
|
247
|
-
await self._event_manager.__aenter__()
|
|
313
|
+
await self.event_manager.__aenter__()
|
|
248
314
|
self.log.debug('Event manager initialized')
|
|
249
315
|
|
|
250
|
-
await self.
|
|
316
|
+
await self._charging_manager_implementation.__aenter__()
|
|
251
317
|
self.log.debug('Charging manager initialized')
|
|
252
318
|
|
|
253
319
|
self._is_initialized = True
|
|
@@ -288,10 +354,10 @@ class _ActorType:
|
|
|
288
354
|
await asyncio.sleep(0.1)
|
|
289
355
|
|
|
290
356
|
if event_listeners_timeout:
|
|
291
|
-
await self.
|
|
357
|
+
await self.event_manager.wait_for_all_listeners_to_complete(timeout=event_listeners_timeout)
|
|
292
358
|
|
|
293
|
-
await self.
|
|
294
|
-
await self.
|
|
359
|
+
await self.event_manager.__aexit__(None, None, None)
|
|
360
|
+
await self._charging_manager_implementation.__aexit__(None, None, None)
|
|
295
361
|
|
|
296
362
|
await asyncio.wait_for(finalize(), cleanup_timeout.total_seconds())
|
|
297
363
|
self._is_initialized = False
|
|
@@ -350,8 +416,8 @@ class _ActorType:
|
|
|
350
416
|
(increases exponentially from this value).
|
|
351
417
|
timeout: The socket timeout of the HTTP requests sent to the Apify API.
|
|
352
418
|
"""
|
|
353
|
-
token = token or self.
|
|
354
|
-
api_url = api_url or self.
|
|
419
|
+
token = token or self.configuration.token
|
|
420
|
+
api_url = api_url or self.configuration.api_base_url
|
|
355
421
|
return ApifyClientAsync(
|
|
356
422
|
token=token,
|
|
357
423
|
api_url=api_url,
|
|
@@ -366,6 +432,7 @@ class _ActorType:
|
|
|
366
432
|
self,
|
|
367
433
|
*,
|
|
368
434
|
id: str | None = None,
|
|
435
|
+
alias: str | None = None,
|
|
369
436
|
name: str | None = None,
|
|
370
437
|
force_cloud: bool = False,
|
|
371
438
|
) -> Dataset:
|
|
@@ -376,10 +443,12 @@ class _ActorType:
|
|
|
376
443
|
the Apify cloud.
|
|
377
444
|
|
|
378
445
|
Args:
|
|
379
|
-
id: ID of the dataset to
|
|
380
|
-
|
|
381
|
-
name:
|
|
382
|
-
|
|
446
|
+
id: The ID of the dataset to open. If provided, searches for existing dataset by ID.
|
|
447
|
+
Mutually exclusive with name and alias.
|
|
448
|
+
name: The name of the dataset to open (global scope, persists across runs).
|
|
449
|
+
Mutually exclusive with id and alias.
|
|
450
|
+
alias: The alias of the dataset to open (run scope, creates unnamed storage).
|
|
451
|
+
Mutually exclusive with id and name.
|
|
383
452
|
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
|
|
384
453
|
to combine local and cloud storage.
|
|
385
454
|
|
|
@@ -387,21 +456,18 @@ class _ActorType:
|
|
|
387
456
|
An instance of the `Dataset` class for the given ID or name.
|
|
388
457
|
"""
|
|
389
458
|
self._raise_if_not_initialized()
|
|
390
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
391
|
-
|
|
392
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
393
|
-
|
|
394
459
|
return await Dataset.open(
|
|
395
460
|
id=id,
|
|
396
461
|
name=name,
|
|
397
|
-
|
|
398
|
-
storage_client=
|
|
462
|
+
alias=alias,
|
|
463
|
+
storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
|
|
399
464
|
)
|
|
400
465
|
|
|
401
466
|
async def open_key_value_store(
|
|
402
467
|
self,
|
|
403
468
|
*,
|
|
404
469
|
id: str | None = None,
|
|
470
|
+
alias: str | None = None,
|
|
405
471
|
name: str | None = None,
|
|
406
472
|
force_cloud: bool = False,
|
|
407
473
|
) -> KeyValueStore:
|
|
@@ -411,10 +477,12 @@ class _ActorType:
|
|
|
411
477
|
and retrieved using a unique key. The actual data is stored either on a local filesystem or in the Apify cloud.
|
|
412
478
|
|
|
413
479
|
Args:
|
|
414
|
-
id: ID of the
|
|
415
|
-
|
|
416
|
-
name:
|
|
417
|
-
|
|
480
|
+
id: The ID of the KVS to open. If provided, searches for existing KVS by ID.
|
|
481
|
+
Mutually exclusive with name and alias.
|
|
482
|
+
name: The name of the KVS to open (global scope, persists across runs).
|
|
483
|
+
Mutually exclusive with id and alias.
|
|
484
|
+
alias: The alias of the KVS to open (run scope, creates unnamed storage).
|
|
485
|
+
Mutually exclusive with id and name.
|
|
418
486
|
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
|
|
419
487
|
to combine local and cloud storage.
|
|
420
488
|
|
|
@@ -422,20 +490,18 @@ class _ActorType:
|
|
|
422
490
|
An instance of the `KeyValueStore` class for the given ID or name.
|
|
423
491
|
"""
|
|
424
492
|
self._raise_if_not_initialized()
|
|
425
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
426
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
427
|
-
|
|
428
493
|
return await KeyValueStore.open(
|
|
429
494
|
id=id,
|
|
430
495
|
name=name,
|
|
431
|
-
|
|
432
|
-
storage_client=
|
|
496
|
+
alias=alias,
|
|
497
|
+
storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
|
|
433
498
|
)
|
|
434
499
|
|
|
435
500
|
async def open_request_queue(
|
|
436
501
|
self,
|
|
437
502
|
*,
|
|
438
503
|
id: str | None = None,
|
|
504
|
+
alias: str | None = None,
|
|
439
505
|
name: str | None = None,
|
|
440
506
|
force_cloud: bool = False,
|
|
441
507
|
) -> RequestQueue:
|
|
@@ -447,10 +513,12 @@ class _ActorType:
|
|
|
447
513
|
crawling orders.
|
|
448
514
|
|
|
449
515
|
Args:
|
|
450
|
-
id: ID of the
|
|
451
|
-
|
|
452
|
-
name:
|
|
453
|
-
|
|
516
|
+
id: The ID of the RQ to open. If provided, searches for existing RQ by ID.
|
|
517
|
+
Mutually exclusive with name and alias.
|
|
518
|
+
name: The name of the RQ to open (global scope, persists across runs).
|
|
519
|
+
Mutually exclusive with id and alias.
|
|
520
|
+
alias: The alias of the RQ to open (run scope, creates unnamed storage).
|
|
521
|
+
Mutually exclusive with id and name.
|
|
454
522
|
force_cloud: If set to `True` then the Apify cloud storage is always used. This way it is possible
|
|
455
523
|
to combine local and cloud storage.
|
|
456
524
|
|
|
@@ -458,15 +526,11 @@ class _ActorType:
|
|
|
458
526
|
An instance of the `RequestQueue` class for the given ID or name.
|
|
459
527
|
"""
|
|
460
528
|
self._raise_if_not_initialized()
|
|
461
|
-
self._raise_if_cloud_requested_but_not_configured(force_cloud=force_cloud)
|
|
462
|
-
|
|
463
|
-
storage_client = self._cloud_storage_client if force_cloud else self._local_storage_client
|
|
464
|
-
|
|
465
529
|
return await RequestQueue.open(
|
|
466
530
|
id=id,
|
|
467
531
|
name=name,
|
|
468
|
-
|
|
469
|
-
storage_client=
|
|
532
|
+
alias=alias,
|
|
533
|
+
storage_client=self._storage_client.get_suitable_storage_client(force_cloud=force_cloud),
|
|
470
534
|
)
|
|
471
535
|
|
|
472
536
|
@overload
|
|
@@ -489,7 +553,7 @@ class _ActorType:
|
|
|
489
553
|
data = data if isinstance(data, list) else [data]
|
|
490
554
|
|
|
491
555
|
max_charged_count = (
|
|
492
|
-
self.
|
|
556
|
+
self.get_charging_manager().calculate_max_event_charge_count_within_limit(charged_event_name)
|
|
493
557
|
if charged_event_name is not None
|
|
494
558
|
else None
|
|
495
559
|
)
|
|
@@ -503,7 +567,7 @@ class _ActorType:
|
|
|
503
567
|
await dataset.push_data(data)
|
|
504
568
|
|
|
505
569
|
if charged_event_name:
|
|
506
|
-
return await self.
|
|
570
|
+
return await self.get_charging_manager().charge(
|
|
507
571
|
event_name=charged_event_name,
|
|
508
572
|
count=min(max_charged_count, len(data)) if max_charged_count is not None else len(data),
|
|
509
573
|
)
|
|
@@ -514,9 +578,9 @@ class _ActorType:
|
|
|
514
578
|
"""Get the Actor input value from the default key-value store associated with the current Actor run."""
|
|
515
579
|
self._raise_if_not_initialized()
|
|
516
580
|
|
|
517
|
-
input_value = await self.get_value(self.
|
|
518
|
-
input_secrets_private_key = self.
|
|
519
|
-
input_secrets_key_passphrase = self.
|
|
581
|
+
input_value = await self.get_value(self.configuration.input_key)
|
|
582
|
+
input_secrets_private_key = self.configuration.input_secrets_private_key_file
|
|
583
|
+
input_secrets_key_passphrase = self.configuration.input_secrets_private_key_passphrase
|
|
520
584
|
if input_secrets_private_key and input_secrets_key_passphrase:
|
|
521
585
|
private_key = load_private_key(
|
|
522
586
|
input_secrets_private_key,
|
|
@@ -560,7 +624,11 @@ class _ActorType:
|
|
|
560
624
|
def get_charging_manager(self) -> ChargingManager:
|
|
561
625
|
"""Retrieve the charging manager to access granular pricing information."""
|
|
562
626
|
self._raise_if_not_initialized()
|
|
563
|
-
return self.
|
|
627
|
+
return self._charging_manager_implementation
|
|
628
|
+
|
|
629
|
+
@cached_property
|
|
630
|
+
def _charging_manager_implementation(self) -> ChargingManagerImplementation:
|
|
631
|
+
return ChargingManagerImplementation(self.configuration, self.apify_client)
|
|
564
632
|
|
|
565
633
|
async def charge(self, event_name: str, count: int = 1) -> ChargeResult:
|
|
566
634
|
"""Charge for a specified number of events - sub-operations of the Actor.
|
|
@@ -572,7 +640,7 @@ class _ActorType:
|
|
|
572
640
|
count: Number of events to charge for.
|
|
573
641
|
"""
|
|
574
642
|
self._raise_if_not_initialized()
|
|
575
|
-
return await self.
|
|
643
|
+
return await self.get_charging_manager().charge(event_name, count)
|
|
576
644
|
|
|
577
645
|
@overload
|
|
578
646
|
def on(
|
|
@@ -623,7 +691,7 @@ class _ActorType:
|
|
|
623
691
|
"""
|
|
624
692
|
self._raise_if_not_initialized()
|
|
625
693
|
|
|
626
|
-
self.
|
|
694
|
+
self.event_manager.on(event=event_name, listener=listener)
|
|
627
695
|
return listener
|
|
628
696
|
|
|
629
697
|
@overload
|
|
@@ -649,11 +717,11 @@ class _ActorType:
|
|
|
649
717
|
"""
|
|
650
718
|
self._raise_if_not_initialized()
|
|
651
719
|
|
|
652
|
-
self.
|
|
720
|
+
self.event_manager.off(event=event_name, listener=listener)
|
|
653
721
|
|
|
654
722
|
def is_at_home(self) -> bool:
|
|
655
723
|
"""Return `True` when the Actor is running on the Apify platform, and `False` otherwise (e.g. local run)."""
|
|
656
|
-
return self.
|
|
724
|
+
return self.configuration.is_at_home
|
|
657
725
|
|
|
658
726
|
def get_env(self) -> dict:
|
|
659
727
|
"""Return a dictionary with information parsed from all the `APIFY_XXX` environment variables.
|
|
@@ -679,7 +747,7 @@ class _ActorType:
|
|
|
679
747
|
aliases = [field_name]
|
|
680
748
|
|
|
681
749
|
for alias in aliases:
|
|
682
|
-
config[alias] = getattr(self.
|
|
750
|
+
config[alias] = getattr(self.configuration, field_name)
|
|
683
751
|
|
|
684
752
|
env_vars = {env_var.value.lower(): env_var.name.lower() for env_var in [*ActorEnvVars, *ApifyEnvVars]}
|
|
685
753
|
return {option_name: config[env_var] for env_var, option_name in env_vars.items() if env_var in config}
|
|
@@ -724,7 +792,7 @@ class _ActorType:
|
|
|
724
792
|
"""
|
|
725
793
|
self._raise_if_not_initialized()
|
|
726
794
|
|
|
727
|
-
client = self.new_client(token=token) if token else self.
|
|
795
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
728
796
|
|
|
729
797
|
if webhooks:
|
|
730
798
|
serialized_webhooks = [
|
|
@@ -755,7 +823,7 @@ class _ActorType:
|
|
|
755
823
|
return ActorRun.model_validate(api_result)
|
|
756
824
|
|
|
757
825
|
def _get_remaining_time(self) -> timedelta | None:
|
|
758
|
-
"""Get time remaining from the
|
|
826
|
+
"""Get time remaining from the Actor timeout. Returns `None` if not on an Apify platform."""
|
|
759
827
|
if self.is_at_home() and self.configuration.timeout_at:
|
|
760
828
|
return self.configuration.timeout_at - datetime.now(tz=timezone.utc)
|
|
761
829
|
|
|
@@ -791,7 +859,7 @@ class _ActorType:
|
|
|
791
859
|
"""
|
|
792
860
|
self._raise_if_not_initialized()
|
|
793
861
|
|
|
794
|
-
client = self.new_client(token=token) if token else self.
|
|
862
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
795
863
|
|
|
796
864
|
if status_message:
|
|
797
865
|
await client.run(run_id).update(status_message=status_message)
|
|
@@ -844,7 +912,7 @@ class _ActorType:
|
|
|
844
912
|
"""
|
|
845
913
|
self._raise_if_not_initialized()
|
|
846
914
|
|
|
847
|
-
client = self.new_client(token=token) if token else self.
|
|
915
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
848
916
|
|
|
849
917
|
if webhooks:
|
|
850
918
|
serialized_webhooks = [
|
|
@@ -916,7 +984,7 @@ class _ActorType:
|
|
|
916
984
|
"""
|
|
917
985
|
self._raise_if_not_initialized()
|
|
918
986
|
|
|
919
|
-
client = self.new_client(token=token) if token else self.
|
|
987
|
+
client = self.new_client(token=token) if token else self.apify_client
|
|
920
988
|
|
|
921
989
|
if webhooks:
|
|
922
990
|
serialized_webhooks = [
|
|
@@ -967,13 +1035,13 @@ class _ActorType:
|
|
|
967
1035
|
return
|
|
968
1036
|
|
|
969
1037
|
if not custom_after_sleep:
|
|
970
|
-
custom_after_sleep = self.
|
|
1038
|
+
custom_after_sleep = self.configuration.metamorph_after_sleep
|
|
971
1039
|
|
|
972
|
-
# If is_at_home() is True,
|
|
973
|
-
if not self.
|
|
1040
|
+
# If is_at_home() is True, configuration.actor_run_id is always set
|
|
1041
|
+
if not self.configuration.actor_run_id:
|
|
974
1042
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
975
1043
|
|
|
976
|
-
await self.
|
|
1044
|
+
await self.apify_client.run(self.configuration.actor_run_id).metamorph(
|
|
977
1045
|
target_actor_id=target_actor_id,
|
|
978
1046
|
run_input=run_input,
|
|
979
1047
|
target_actor_build=target_actor_build,
|
|
@@ -1010,7 +1078,7 @@ class _ActorType:
|
|
|
1010
1078
|
_ActorType._is_rebooting = True
|
|
1011
1079
|
|
|
1012
1080
|
if not custom_after_sleep:
|
|
1013
|
-
custom_after_sleep = self.
|
|
1081
|
+
custom_after_sleep = self.configuration.metamorph_after_sleep
|
|
1014
1082
|
|
|
1015
1083
|
# Call all the listeners for the PERSIST_STATE and MIGRATING events, and wait for them to finish.
|
|
1016
1084
|
# PERSIST_STATE listeners are called to allow the Actor to persist its state before the reboot.
|
|
@@ -1019,10 +1087,10 @@ class _ActorType:
|
|
|
1019
1087
|
# We can't just emit the events and wait for all listeners to finish,
|
|
1020
1088
|
# because this method might be called from an event listener itself, and we would deadlock.
|
|
1021
1089
|
persist_state_listeners = flatten(
|
|
1022
|
-
(self.
|
|
1090
|
+
(self.event_manager._listeners_to_wrappers[Event.PERSIST_STATE] or {}).values() # noqa: SLF001
|
|
1023
1091
|
)
|
|
1024
1092
|
migrating_listeners = flatten(
|
|
1025
|
-
(self.
|
|
1093
|
+
(self.event_manager._listeners_to_wrappers[Event.MIGRATING] or {}).values() # noqa: SLF001
|
|
1026
1094
|
)
|
|
1027
1095
|
|
|
1028
1096
|
await asyncio.gather(
|
|
@@ -1030,10 +1098,10 @@ class _ActorType:
|
|
|
1030
1098
|
*[listener(EventMigratingData()) for listener in migrating_listeners],
|
|
1031
1099
|
)
|
|
1032
1100
|
|
|
1033
|
-
if not self.
|
|
1101
|
+
if not self.configuration.actor_run_id:
|
|
1034
1102
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1035
1103
|
|
|
1036
|
-
await self.
|
|
1104
|
+
await self.apify_client.run(self.configuration.actor_run_id).reboot()
|
|
1037
1105
|
|
|
1038
1106
|
if custom_after_sleep:
|
|
1039
1107
|
await asyncio.sleep(custom_after_sleep.total_seconds())
|
|
@@ -1072,11 +1140,11 @@ class _ActorType:
|
|
|
1072
1140
|
return
|
|
1073
1141
|
|
|
1074
1142
|
# If is_at_home() is True, config.actor_run_id is always set
|
|
1075
|
-
if not self.
|
|
1143
|
+
if not self.configuration.actor_run_id:
|
|
1076
1144
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1077
1145
|
|
|
1078
|
-
await self.
|
|
1079
|
-
actor_run_id=self.
|
|
1146
|
+
await self.apify_client.webhooks().create(
|
|
1147
|
+
actor_run_id=self.configuration.actor_run_id,
|
|
1080
1148
|
event_types=webhook.event_types,
|
|
1081
1149
|
request_url=webhook.request_url,
|
|
1082
1150
|
payload_template=webhook.payload_template,
|
|
@@ -1108,10 +1176,10 @@ class _ActorType:
|
|
|
1108
1176
|
return None
|
|
1109
1177
|
|
|
1110
1178
|
# If is_at_home() is True, config.actor_run_id is always set
|
|
1111
|
-
if not self.
|
|
1179
|
+
if not self.configuration.actor_run_id:
|
|
1112
1180
|
raise RuntimeError('actor_run_id cannot be None when running on the Apify platform.')
|
|
1113
1181
|
|
|
1114
|
-
api_result = await self.
|
|
1182
|
+
api_result = await self.apify_client.run(self.configuration.actor_run_id).update(
|
|
1115
1183
|
status_message=status_message, is_status_message_terminal=is_terminal
|
|
1116
1184
|
)
|
|
1117
1185
|
|
|
@@ -1121,7 +1189,7 @@ class _ActorType:
|
|
|
1121
1189
|
self,
|
|
1122
1190
|
*,
|
|
1123
1191
|
actor_proxy_input: dict
|
|
1124
|
-
| None = None, # this is the raw proxy input from the
|
|
1192
|
+
| None = None, # this is the raw proxy input from the Actor run input, it is not spread or snake_cased in here
|
|
1125
1193
|
password: str | None = None,
|
|
1126
1194
|
groups: list[str] | None = None,
|
|
1127
1195
|
country_code: str | None = None,
|
|
@@ -1166,7 +1234,7 @@ class _ActorType:
|
|
|
1166
1234
|
country_code=country_code,
|
|
1167
1235
|
proxy_urls=proxy_urls,
|
|
1168
1236
|
new_url_function=new_url_function,
|
|
1169
|
-
_actor_config=self.
|
|
1237
|
+
_actor_config=self.configuration,
|
|
1170
1238
|
_apify_client=self._apify_client,
|
|
1171
1239
|
)
|
|
1172
1240
|
|